treat 1.2.0 → 2.0.0rc1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -2
- data/README.md +12 -21
- data/lib/treat/autoload.rb +44 -0
- data/lib/treat/config/config.rb +38 -0
- data/lib/treat/config/configurable.rb +51 -0
- data/lib/treat/config/data/config.rb +50 -0
- data/lib/treat/config/data/core.rb +52 -0
- data/lib/treat/config/data/databases.rb +10 -0
- data/lib/treat/config/data/entities.rb +15 -0
- data/lib/treat/config/data/languages/agnostic.rb +31 -0
- data/lib/treat/config/{languages → data/languages}/arabic.rb +0 -0
- data/lib/treat/config/{languages → data/languages}/chinese.rb +0 -0
- data/lib/treat/config/{languages → data/languages}/dutch.rb +1 -1
- data/lib/treat/config/data/languages/english.rb +95 -0
- data/lib/treat/config/data/languages/french.rb +148 -0
- data/lib/treat/config/data/languages/german.rb +135 -0
- data/lib/treat/config/{languages → data/languages}/greek.rb +1 -1
- data/lib/treat/config/data/languages/italian.rb +162 -0
- data/lib/treat/config/data/languages/polish.rb +11 -0
- data/lib/treat/config/{languages → data/languages}/portuguese.rb +1 -1
- data/lib/treat/config/{languages → data/languages}/russian.rb +1 -1
- data/lib/treat/config/data/languages/spanish.rb +291 -0
- data/lib/treat/config/data/languages/swedish.rb +289 -0
- data/lib/treat/config/data/libraries.rb +12 -0
- data/lib/treat/config/data/linguistics.rb +44 -0
- data/lib/treat/config/data/tags.rb +328 -0
- data/lib/treat/config/{workers → data/workers}/extractors.rb +2 -10
- data/lib/treat/config/{workers → data/workers}/formatters.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/inflectors.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/learners.rb +0 -0
- data/lib/treat/config/{workers → data/workers}/lexicalizers.rb +4 -3
- data/lib/treat/config/{workers → data/workers}/processors.rb +3 -3
- data/lib/treat/config/{workers → data/workers}/retrievers.rb +0 -0
- data/lib/treat/config/importable.rb +31 -0
- data/lib/treat/config/paths.rb +23 -0
- data/lib/treat/config/tags.rb +37 -0
- data/lib/treat/core/dsl.rb +55 -0
- data/lib/treat/{installer.rb → core/installer.rb} +10 -12
- data/lib/treat/core/server.rb +40 -0
- data/lib/treat/entities/entities.rb +101 -0
- data/lib/treat/entities/{abilities/doable.rb → entity/applicable.rb} +5 -3
- data/lib/treat/entities/{abilities → entity}/buildable.rb +118 -63
- data/lib/treat/entities/{abilities → entity}/checkable.rb +2 -2
- data/lib/treat/entities/{abilities → entity}/comparable.rb +6 -6
- data/lib/treat/entities/{abilities → entity}/countable.rb +2 -1
- data/lib/treat/entities/entity/debuggable.rb +86 -0
- data/lib/treat/entities/{abilities → entity}/delegatable.rb +16 -26
- data/lib/treat/entities/{abilities → entity}/exportable.rb +2 -2
- data/lib/treat/entities/{abilities → entity}/iterable.rb +4 -16
- data/lib/treat/entities/{abilities → entity}/magical.rb +22 -17
- data/lib/treat/entities/entity/registrable.rb +36 -0
- data/lib/treat/entities/{abilities → entity}/stringable.rb +18 -15
- data/lib/treat/entities/entity.rb +86 -77
- data/lib/treat/exception.rb +3 -0
- data/lib/treat/helpers/hash.rb +29 -0
- data/lib/treat/helpers/help.rb +35 -0
- data/lib/treat/helpers/object.rb +55 -0
- data/lib/treat/helpers/string.rb +124 -0
- data/lib/treat/{core → learning}/data_set.rb +11 -11
- data/lib/treat/{core → learning}/export.rb +3 -3
- data/lib/treat/{core → learning}/problem.rb +26 -16
- data/lib/treat/{core → learning}/question.rb +5 -9
- data/lib/treat/loaders/linguistics.rb +8 -9
- data/lib/treat/loaders/stanford.rb +5 -11
- data/lib/treat/modules.rb +33 -0
- data/lib/treat/proxies/array.rb +27 -0
- data/lib/treat/proxies/language.rb +47 -0
- data/lib/treat/proxies/number.rb +18 -0
- data/lib/treat/proxies/proxy.rb +25 -0
- data/lib/treat/proxies/string.rb +18 -0
- data/lib/treat/version.rb +10 -1
- data/lib/treat/{workers.rb → workers/categorizable.rb} +18 -19
- data/lib/treat/workers/extractors/keywords/tf_idf.rb +11 -11
- data/lib/treat/workers/extractors/language/what_language.rb +8 -6
- data/lib/treat/workers/extractors/name_tag/stanford.rb +10 -4
- data/lib/treat/workers/extractors/similarity/levenshtein.rb +36 -0
- data/lib/treat/workers/extractors/similarity/tf_idf.rb +27 -0
- data/lib/treat/workers/extractors/tf_idf/native.rb +4 -4
- data/lib/treat/workers/extractors/time/chronic.rb +2 -4
- data/lib/treat/workers/extractors/time/nickel.rb +19 -20
- data/lib/treat/workers/extractors/time/ruby.rb +2 -1
- data/lib/treat/workers/extractors/topic_words/lda.rb +12 -12
- data/lib/treat/workers/extractors/topics/reuters.rb +9 -13
- data/lib/treat/workers/formatters/readers/autoselect.rb +1 -1
- data/lib/treat/workers/formatters/readers/image.rb +19 -9
- data/lib/treat/workers/formatters/readers/odt.rb +2 -1
- data/lib/treat/workers/formatters/readers/pdf.rb +20 -3
- data/lib/treat/workers/formatters/readers/xml.rb +0 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +10 -20
- data/lib/treat/workers/formatters/serializers/xml.rb +17 -26
- data/lib/treat/workers/formatters/serializers/yaml.rb +5 -4
- data/lib/treat/workers/formatters/unserializers/mongo.rb +4 -4
- data/lib/treat/workers/formatters/unserializers/xml.rb +3 -4
- data/lib/treat/workers/formatters/unserializers/yaml.rb +3 -4
- data/lib/treat/workers/formatters/visualizers/dot.rb +1 -0
- data/lib/treat/workers/formatters/visualizers/standoff.rb +2 -3
- data/lib/treat/workers/formatters/visualizers/tree.rb +2 -3
- data/lib/treat/workers/{group.rb → groupable.rb} +9 -9
- data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +1 -3
- data/lib/treat/workers/inflectors/conjugators/linguistics.rb +5 -7
- data/lib/treat/workers/inflectors/declensors/english.rb +13 -20
- data/lib/treat/workers/inflectors/declensors/linguistics.rb +29 -28
- data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +0 -2
- data/lib/treat/workers/inflectors/stemmers/porter.rb +8 -10
- data/lib/treat/workers/inflectors/stemmers/porter_c.rb +7 -7
- data/lib/treat/workers/inflectors/stemmers/uea.rb +3 -8
- data/lib/treat/workers/learners/classifiers/id3.rb +17 -14
- data/lib/treat/workers/learners/classifiers/linear.rb +15 -27
- data/lib/treat/workers/learners/classifiers/mlp.rb +32 -19
- data/lib/treat/workers/learners/classifiers/svm.rb +28 -21
- data/lib/treat/workers/lexicalizers/categorizers/from_tag.rb +19 -3
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +15 -7
- data/lib/treat/workers/lexicalizers/taggers/brill/patch.rb +4 -1
- data/lib/treat/workers/lexicalizers/taggers/brill.rb +8 -19
- data/lib/treat/workers/lexicalizers/taggers/lingua.rb +4 -15
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +22 -13
- data/lib/treat/workers/processors/chunkers/autoselect.rb +2 -3
- data/lib/treat/workers/processors/chunkers/html.rb +1 -6
- data/lib/treat/workers/processors/parsers/enju.rb +2 -4
- data/lib/treat/workers/processors/parsers/stanford.rb +13 -7
- data/lib/treat/workers/processors/segmenters/punkt.rb +25 -11
- data/lib/treat/workers/processors/segmenters/scalpel.rb +20 -0
- data/lib/treat/workers/processors/segmenters/srx.rb +42 -0
- data/lib/treat/workers/processors/segmenters/stanford.rb +5 -5
- data/lib/treat/workers/processors/segmenters/tactful.rb +21 -11
- data/lib/treat/workers/processors/tokenizers/ptb.rb +40 -30
- data/lib/treat/workers/processors/tokenizers/punkt.rb +14 -19
- data/lib/treat/workers/processors/tokenizers/stanford.rb +38 -22
- data/lib/treat/workers/retrievers/indexers/ferret.rb +6 -3
- data/lib/treat/workers/retrievers/searchers/ferret.rb +2 -2
- data/lib/treat/workers/workers.rb +6 -0
- data/lib/treat.rb +18 -32
- data/models/MANIFEST +1 -0
- data/spec/core/data_set.rb +174 -0
- data/spec/core/export.rb +52 -0
- data/spec/core/problem.rb +144 -0
- data/spec/core/question.rb +52 -0
- data/spec/{collection.rb → entities/collection.rb} +20 -35
- data/spec/{document.rb → entities/document.rb} +3 -54
- data/spec/{entity.rb → entities/entity.rb} +10 -9
- data/spec/entities/phrase.rb +33 -0
- data/spec/{token.rb → entities/token.rb} +0 -57
- data/spec/entities/word.rb +3 -0
- data/spec/{zone.rb → entities/zone.rb} +0 -26
- data/spec/helper.rb +116 -32
- data/spec/sandbox.rb +258 -25
- data/spec/treat.rb +26 -34
- data/spec/workers/agnostic.rb +137 -0
- data/spec/workers/english.rb +194 -0
- data/spec/workers/examples/english/economist/hungarys_troubles.txt +46 -0
- data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/archimedes.abw +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/euler.html +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/gauss.pdf +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/leibniz.txt +0 -0
- data/spec/{samples → workers/examples/english}/mathematicians/newton.doc +0 -0
- data/spec/workers/examples/english/phrase.xml +5 -0
- data/spec/workers/examples/english/test.txt +1 -0
- data/spec/workers/language.rb +280 -0
- data/spec/workers.rb +28 -0
- metadata +122 -105
- data/lib/treat/config/core/acronyms.rb +0 -5
- data/lib/treat/config/core/encodings.rb +0 -8
- data/lib/treat/config/core/entities.rb +0 -2
- data/lib/treat/config/core/language.rb +0 -3
- data/lib/treat/config/core/paths.rb +0 -8
- data/lib/treat/config/core/syntax.rb +0 -1
- data/lib/treat/config/core/verbosity.rb +0 -1
- data/lib/treat/config/databases/default.rb +0 -1
- data/lib/treat/config/databases/mongo.rb +0 -1
- data/lib/treat/config/languages/agnostic.rb +0 -34
- data/lib/treat/config/languages/english.rb +0 -60
- data/lib/treat/config/languages/french.rb +0 -18
- data/lib/treat/config/languages/german.rb +0 -18
- data/lib/treat/config/languages/italian.rb +0 -12
- data/lib/treat/config/languages/polish.rb +0 -12
- data/lib/treat/config/languages/spanish.rb +0 -12
- data/lib/treat/config/languages/swedish.rb +0 -12
- data/lib/treat/config/libraries/punkt.rb +0 -1
- data/lib/treat/config/libraries/reuters.rb +0 -1
- data/lib/treat/config/libraries/stanford.rb +0 -1
- data/lib/treat/config/linguistics/categories.rb +0 -4
- data/lib/treat/config/linguistics/punctuation.rb +0 -33
- data/lib/treat/config/tags/aligned.rb +0 -221
- data/lib/treat/config/tags/enju.rb +0 -71
- data/lib/treat/config/tags/paris7.rb +0 -17
- data/lib/treat/config/tags/ptb.rb +0 -15
- data/lib/treat/config/workers/list.rb +0 -1
- data/lib/treat/config.rb +0 -135
- data/lib/treat/core.rb +0 -5
- data/lib/treat/entities/abilities/copyable.rb +0 -47
- data/lib/treat/entities/abilities/debuggable.rb +0 -83
- data/lib/treat/entities/abilities/registrable.rb +0 -46
- data/lib/treat/entities/collection.rb +0 -40
- data/lib/treat/entities/document.rb +0 -10
- data/lib/treat/entities/group.rb +0 -18
- data/lib/treat/entities/section.rb +0 -13
- data/lib/treat/entities/token.rb +0 -47
- data/lib/treat/entities/zone.rb +0 -12
- data/lib/treat/entities.rb +0 -6
- data/lib/treat/helpers/didyoumean.rb +0 -57
- data/lib/treat/helpers/escaping.rb +0 -15
- data/lib/treat/helpers/formatting.rb +0 -41
- data/lib/treat/helpers/objtohash.rb +0 -8
- data/lib/treat/helpers/platform.rb +0 -15
- data/lib/treat/helpers/reflection.rb +0 -17
- data/lib/treat/helpers/temporary.rb +0 -27
- data/lib/treat/helpers/verbosity.rb +0 -19
- data/lib/treat/helpers.rb +0 -5
- data/lib/treat/loaders.rb +0 -10
- data/lib/treat/proxies.rb +0 -106
- data/lib/treat/workers/formatters/unserializers/autoselect.rb +0 -17
- data/lib/treat/workers/inflectors/declensors/active_support.rb +0 -31
- data/lib/treat/workers/processors/tokenizers/tactful.rb +0 -68
- data/spec/core.rb +0 -441
- data/spec/phrase.rb +0 -112
- data/spec/word.rb +0 -111
@@ -1,221 +0,0 @@
|
|
1
|
-
{tag_sets: [
|
2
|
-
:claws_c5, :brown, :penn, :stutgart, :chinese, :paris7
|
3
|
-
],
|
4
|
-
phrase_tags: [
|
5
|
-
'Adjectival phrase', ['', '', 'ADJP', '', '', 'AP'],
|
6
|
-
'Adverbial phrase', ['', '', 'ADVP', '', '', 'AdP'],
|
7
|
-
'Conjunction phrase', ['', '', 'CONJP', '', '', 'Ssub'],
|
8
|
-
'Fragment', ['', '', 'FRAG', '', '', ''],
|
9
|
-
'Interjectional phrase', ['', '', 'INTJ', '', '', ''],
|
10
|
-
'List marker', ['', '', 'LST', '', '', ''],
|
11
|
-
'Not a phrase', ['', '', 'NAC', '', '', ''],
|
12
|
-
'Noun phrase', ['', '', 'NP', '', '', 'NP'],
|
13
|
-
'Verbal nucleus', ['', '', '', '', '', 'VN'],
|
14
|
-
'Head of noun phrase', ['', '', 'NX', '', '', ''],
|
15
|
-
'Prepositional phrase', ['', '', 'PP', '', '', 'PP'],
|
16
|
-
'Parenthetical', ['', '', 'PRN', '', '', ''],
|
17
|
-
'Particle', ['', '', 'PRT', '', '', ''],
|
18
|
-
'Participial phrase', ['', '', '', '', '', 'VPart'],
|
19
|
-
'Quantifier phrase', ['', '', 'QP', '', '', ''],
|
20
|
-
'Relative clause', ['', '', 'RRC', '', '', 'Srel'],
|
21
|
-
'Coordinated phrase', ['', '', 'UCP', '', '', 'COORD'],
|
22
|
-
'Infinitival phrase', ['', '', '', '', '', 'VPinf'],
|
23
|
-
'Verb phrase', ['', '', 'VP', '', '', ''],
|
24
|
-
'Wh adjective phrase', ['', '', 'WHADJP', '', '', ''],
|
25
|
-
'Wh adverb phrase', ['', '', 'WHAVP', '', '', ''],
|
26
|
-
'Wh noun phrase', ['', '', 'WHNP', '', '', ''],
|
27
|
-
'Wh prepositional phrase', ['', '', 'WHPP', '', '', ''],
|
28
|
-
'Unknown', ['', '', 'X', '', '', ''],
|
29
|
-
'Phrase', ['', '', 'P', '', '', 'Sint'],
|
30
|
-
'Sentence', ['', '', 'S', '', '', 'SENT'],
|
31
|
-
'Phrase', ['', '', 'SBAR', '', '', ''] # Fix
|
32
|
-
],
|
33
|
-
word_tags: [
|
34
|
-
|
35
|
-
# Aligned tags for the Claws C5, Brown and Penn tag sets.
|
36
|
-
# Adapted from Manning, Christopher and Schütze, Hinrich,
|
37
|
-
# 1999. Foundations of Statistical Natural Language
|
38
|
-
# Processing. MIT Press, p. 141-142;
|
39
|
-
# http://www.isocat.org/rest/dcs/376;
|
40
|
-
|
41
|
-
'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'A'],
|
42
|
-
'Adjective', ['AJ0', 'JJ', 'JJ', '', 'JJ', 'ADJ'],
|
43
|
-
'Ajective, adverbial or predicative', ['', '', '', 'ADJD', '', 'ADJ'],
|
44
|
-
'Adjective, attribute', ['', '', '', 'ADJA', 'VA', 'ADJ'],
|
45
|
-
'Adjective, ordinal number', ['ORD', 'OD', 'JJ', '', 'OD', 'ADJ'],
|
46
|
-
'Adjective, comparative', ['AJC', 'JJR', 'JJR', 'KOKOM', '', 'ADJ'],
|
47
|
-
'Adjective, superlative', ['AJS', 'JJT', 'JJS', '', 'JJ', 'ADJ'],
|
48
|
-
'Adjective, superlative, semantically', ['AJ0', 'JJS', 'JJ', '', '', 'ADJ'],
|
49
|
-
'Adjective, cardinal number', ['CRD', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
|
50
|
-
'Adjective, cardinal number, one', ['PNI', 'CD', 'CD', 'CARD', 'CD', 'ADJ'],
|
51
|
-
|
52
|
-
'Adverb', ['AV0', 'RB', 'RB', 'ADV', 'AD', 'ADV'],
|
53
|
-
'Adverb, negative', ['XX0', '*', 'RB', 'PTKNEG', '', 'ADV'],
|
54
|
-
'Adverb, comparative', ['AV0', 'RBR', 'RBR', '', 'AD', 'ADV'],
|
55
|
-
'Adverb, superlative', ['AV0', 'RBT', 'RBS', '', 'AD', 'ADV'],
|
56
|
-
'Adverb, particle', ['AVP', 'RP', 'RP', '', '', 'ADV'],
|
57
|
-
'Adverb, question', ['AVQ', 'WRB', 'WRB', '', 'AD', 'ADV'],
|
58
|
-
'Adverb, degree & question', ['AVQ', 'WQL', 'WRB', '', 'ADV'],
|
59
|
-
'Adverb, degree', ['AV0', 'QL', 'RB', '', '', 'ADV'],
|
60
|
-
'Adverb, degree, postposed', ['AV0', 'QLP', 'RB', '', '', 'ADV'],
|
61
|
-
'Adverb, nominal', ['AV0', 'RN', 'RB', 'PROP', '', 'ADV'],
|
62
|
-
'Adverb, pronominal', ['', '', '', '', 'PROP', '', 'ADV'],
|
63
|
-
|
64
|
-
'Conjunction, coordination', ['CJC', 'CC', 'CC', 'KON', 'CC', 'COOD'],
|
65
|
-
'Conjunction, coordination, and', ['CJC', 'CC', 'CC', 'KON', 'CC', 'ET'],
|
66
|
-
'Conjunction, subordination', ['CJS', 'CS', 'IN', 'KOUS', 'CS', 'CONJ'],
|
67
|
-
'Conjunction, subordination with to and infinitive', ['', '', '', 'KOUI', '', ''],
|
68
|
-
'Conjunction, complementizer, that', ['CJT', 'CS', 'IN', '', '', 'C'],
|
69
|
-
|
70
|
-
'Determiner', ['DT0', 'DT', 'DT', '', 'DT', 'D'],
|
71
|
-
'Determiner, pronoun', ['DT0', 'DTI', 'DT', '', '', 'D'],
|
72
|
-
'Determiner, pronoun, plural', ['DT0', 'DTS', 'DT', '', '', 'D'],
|
73
|
-
'Determiner, prequalifier', ['DT0', 'ABL', 'DT', '', '', 'D'],
|
74
|
-
'Determiner, prequantifier', ['DT0', 'ABN', 'PDT', '', 'DT', 'D'],
|
75
|
-
'Determiner, pronoun or double conjunction', ['DT0', 'ABX', 'PDT', '', '', 'D'],
|
76
|
-
'Determiner, pronoun or double conjunction', ['DT0', 'DTX', 'DT', '', '', 'D'],
|
77
|
-
'Determiner, article', ['AT0', 'AT', 'DT', 'ART', '', 'D'],
|
78
|
-
'Determiner, postdeterminer', ['DT0', 'AP', 'DT', '', '', 'D'],
|
79
|
-
'Determiner, possessive', ['DPS', 'PP$', 'PRP$', '', '', 'D'],
|
80
|
-
'Determiner, possessive, second', ['DPS', 'PP$', 'PRPS', '', '', 'D'],
|
81
|
-
'Determiner, question', ['DTQ', 'WDT', 'WDT', '', 'DT', 'D'],
|
82
|
-
'Determiner, possessive & question', ['DTQ', 'WP$', 'WP$', '', '', 'D'],
|
83
|
-
'Interjection', ['', '', '', '', '', 'I'],
|
84
|
-
'Localizer', ['', '', '', '', 'LC'],
|
85
|
-
|
86
|
-
'Measure word', ['', '', '', '', 'M'],
|
87
|
-
|
88
|
-
'Noun, common', ['NN0', 'NN', 'NN', 'N', 'NN', 'NN'],
|
89
|
-
'Noun, singular', ['NN1', 'NN', 'NN', 'NN', 'NN', 'N'],
|
90
|
-
'Noun, plural', ['NN2', 'NNS', 'NNS', 'NN', 'NN', 'N'],
|
91
|
-
'Noun, proper, singular', ['NP0', 'NP', 'NNP', 'NE', 'NR', 'N'],
|
92
|
-
'Noun, proper, plural', ['NP0', 'NPS', 'NNPS', 'NE', 'NR', 'N'],
|
93
|
-
'Noun, adverbial', ['NN0', 'NR', 'NN', 'NE', '', 'N'],
|
94
|
-
'Noun, adverbial, plural', ['NN2', 'NRS', 'NNS', '', 'N'],
|
95
|
-
'Noun, temporal', ['', '', '', '', 'NT', 'N'],
|
96
|
-
'Noun, verbal', ['', '', '', '', 'NN', 'N'],
|
97
|
-
|
98
|
-
'Pronoun, nominal (indefinite)', ['PNI', 'PN', 'PRP', '', 'PN', 'CL'],
|
99
|
-
'Pronoun, personal, subject', ['PNP', 'PPSS', 'PRP', 'PPER'],
|
100
|
-
'Pronoun, personal, subject, 3SG', ['PNP', 'PPS', 'PRP', 'PPER'],
|
101
|
-
'Pronoun, personal, object', ['PNP', 'PPO', 'PRP', 'PPER'],
|
102
|
-
'Pronoun, reflexive', ['PNX', 'PPL', 'PRP', 'PRF'],
|
103
|
-
'Pronoun, reflexive, plural', ['PNX', 'PPLS', 'PRP', 'PRF'],
|
104
|
-
'Pronoun, question, subject', ['PNQ', 'WPS', 'WP', 'PWAV'],
|
105
|
-
'Pronoun, question, subject', ['PNQ', 'WPS', 'WPS', 'PWAV'], # Hack
|
106
|
-
'Pronoun, question, object', ['PNQ', 'WPO', 'WP', 'PWAV', 'PWAT'],
|
107
|
-
'Pronoun, existential there', ['EX0', 'EX', 'EX'],
|
108
|
-
'Pronoun, attributive demonstrative', ['', '', '', 'PDAT'],
|
109
|
-
'Prounoun, attributive indefinite without determiner', ['', '', '', 'PIAT'],
|
110
|
-
'Pronoun, attributive possessive', ['', '', '', 'PPOSAT', ''],
|
111
|
-
'Pronoun, substituting demonstrative', ['', '', '', 'PDS'],
|
112
|
-
'Pronoun, substituting possessive', ['', '', '', 'PPOSS', ''],
|
113
|
-
'Prounoun, substituting indefinite', ['', '', '', 'PIS'],
|
114
|
-
'Pronoun, attributive relative', ['', '', '', 'PRELAT', ''],
|
115
|
-
'Pronoun, substituting relative', ['', '', '', 'PRELS', ''],
|
116
|
-
'Pronoun, attributive interrogative', ['', '', '', 'PWAT'],
|
117
|
-
'Pronoun, adverbial interrogative', ['', '', '', 'PWAV'],
|
118
|
-
|
119
|
-
'Pronoun, substituting interrogative', ['', '', '', 'PWS'],
|
120
|
-
'Verb, main, finite', ['', '', '', 'VVFIN', '', 'V'],
|
121
|
-
'Verb, main, infinitive', ['', '', '', 'VVINF', '', 'V'],
|
122
|
-
'Verb, main, imperative', ['', '', '', 'VVIMP', '', 'V'],
|
123
|
-
'Verb, base present form (not infinitive)', ['VVB', 'VB', 'VBP', '', '', 'V'],
|
124
|
-
'Verb, infinitive', ['VVI', 'VB', 'VB', 'V', '', 'V'],
|
125
|
-
'Verb, past tense', ['VVD', 'VBD', 'VBD', '', '', 'V'],
|
126
|
-
'Verb, present participle', ['VVG', 'VBG', 'VBG', 'VAPP', '', 'V'],
|
127
|
-
'Verb, past/passive participle', ['VVN', 'VBN', 'VBN', 'VVPP', '', 'V'],
|
128
|
-
'Verb, present, 3SG, -s form', ['VVZ', 'VBZ', 'VBZ', '', '', 'V'],
|
129
|
-
'Verb, auxiliary', ['', '', '', 'VAFIN', '', 'V'],
|
130
|
-
'Verb, imperative', ['', '', '', 'VAIMP', '', 'V'],
|
131
|
-
'Verb, imperative infinitive', ['', '', '', 'VAINF', '', 'V'],
|
132
|
-
'Verb, auxiliary do, base', ['VDB', 'DO', 'VBP', '', '', 'V'],
|
133
|
-
'Verb, auxiliary do, infinitive', ['VDB', 'DO', 'VB', '', '', 'V'],
|
134
|
-
'Verb, auxiliary do, past', ['VDD', 'DOD', 'VBD', '', '', 'V'],
|
135
|
-
'Verb, auxiliary do, present participle', ['VDG', 'VBG', 'VBG', '', '', 'V'],
|
136
|
-
'Verb, auxiliary do, past participle', ['VDN', 'VBN', 'VBN', '', '', 'V'],
|
137
|
-
'Verb, auxiliary do, present 3SG', ['VDZ', 'DOZ', 'VBZ', '', '', 'V'],
|
138
|
-
'Verb, auxiliary have, base', ['VHB', 'HV', 'VBP', 'VA', '', 'V'],
|
139
|
-
'Verb, auxiliary have, infinitive', ['VHI', 'HV', 'VB', 'VAINF', '', 'V'],
|
140
|
-
'Verb, auxiliary have, past', ['VHD', 'HVD', 'VBD', 'VA', '', 'V'],
|
141
|
-
'Verb, auxiliary have, present participle', ['VHG', 'HVG', 'VBG', 'VA', '', 'V'],
|
142
|
-
'Verb, auxiliary have, past participle', ['VHN', 'HVN', 'VBN', 'VAPP', '', 'V'],
|
143
|
-
'Verb, auxiliary have, present 3SG', ['VHZ', 'HVZ', 'VBZ', 'VA', '', 'V'],
|
144
|
-
'Verb, auxiliary be, infinitive', ['VBI', 'BE', 'VB', '', '', 'V'],
|
145
|
-
'Verb, auxiliary be, past', ['VBD', 'BED', 'VBD', '', '', 'V'],
|
146
|
-
'Verb, auxiliary be, past, 3SG', ['VBD', 'BEDZ', 'VBD', '', '', 'V'],
|
147
|
-
'Verb, auxiliary be, present participle', ['VBG', 'BEG', 'VBG', '', '', 'V'],
|
148
|
-
'Verb, auxiliary be, past participle', ['VBN', 'BEN', 'VBN', '', '', 'V'],
|
149
|
-
'Verb, auxiliary be, present, 3SG', ['VBZ', 'BEZ', 'VBZ', '', '', 'V'],
|
150
|
-
'Verb, auxiliary be, present, 1SG', ['VBB', 'BEM', 'VBP', '', '', 'V'],
|
151
|
-
'Verb, auxiliary be, present', ['VBB', 'BER', 'VBP', '', '', 'V'],
|
152
|
-
'Verb, modal', ['VM0', 'MD', 'MD', 'VMFIN', 'VV', 'V'],
|
153
|
-
'Verb, modal', ['VM0', 'MD', 'MD', 'VMINF', 'VV', 'V'],
|
154
|
-
'Verb, modal, finite', ['', '', '', '', 'VMFIN', 'V'],
|
155
|
-
'Verb, modal, infinite', ['', '', '', '', 'VMINF', 'V'],
|
156
|
-
'Verb, modal, past participle', ['', '', '', '', 'VMPP', 'V'],
|
157
|
-
|
158
|
-
'Particle', ['', '', '', '', '', 'PRT'],
|
159
|
-
'Particle, with adverb', ['', '', '', 'PTKA', '', 'PRT'],
|
160
|
-
'Particle, answer', ['', '', '', 'PTKANT', '', 'PRT'],
|
161
|
-
'Particle, negation', ['', '', '', 'PTKNEG', '', 'PRT'],
|
162
|
-
'Particle, separated verb', ['', '', '', 'PTKVZ', '', 'PRT'],
|
163
|
-
'Particle, to as infinitive marker', ['TO0', 'TO', 'TO', 'PTKZU', '', 'PRT'],
|
164
|
-
|
165
|
-
'Preposition, comparative', ['', '', '', 'KOKOM', '', 'P'],
|
166
|
-
'Preposition, to', ['PRP', 'IN', 'TO', '', '', 'P'],
|
167
|
-
'Preposition', ['PRP', 'IN', 'IN', 'APPR', 'P', 'P'],
|
168
|
-
'Preposition, with aritcle', ['', '', '', 'APPART', '', 'P'],
|
169
|
-
'Preposition, of', ['PRF', 'IN', 'IN', '', '', 'P'],
|
170
|
-
|
171
|
-
'Possessive', ['POS', '$', 'POS'],
|
172
|
-
|
173
|
-
'Postposition', ['', '', '', 'APPO'],
|
174
|
-
|
175
|
-
'Circumposition, right', ['', '', '', 'APZR', ''],
|
176
|
-
|
177
|
-
'Interjection, onomatopoeia or other isolate', ['ITJ', 'UH', 'UH', 'ITJ', 'IJ'],
|
178
|
-
|
179
|
-
'Onomatopoeia', ['', '', '', '', 'ON'],
|
180
|
-
|
181
|
-
'Punctuation', ['', '', '', '', 'PU', 'PN'],
|
182
|
-
'Punctuation, sentence ender', ['PUN', '.', '.', '', '', 'PN'],
|
183
|
-
|
184
|
-
'Punctuation, semicolon', ['PUN', '.', '.', '', '', 'PN'],
|
185
|
-
'Puncutation, colon or ellipsis', ['PUN', ':', ':'],
|
186
|
-
'Punctuationm, comma', ['PUN', ',', ',', '$,'],
|
187
|
-
'Punctuation, dash', ['PUN', '-', '-'],
|
188
|
-
'Punctuation, dollar sign', ['PUN', '', '$'],
|
189
|
-
'Punctuation, left bracket', ['PUL', '(', '(', '$('],
|
190
|
-
'Punctuation, right bracket', ['PUR', ')', ')'],
|
191
|
-
'Punctuation, quotation mark, left', ['PUQ', '', '``'],
|
192
|
-
'Punctuation, quotation mark, right', ['PUQ', '', '"'],
|
193
|
-
|
194
|
-
'Punctuation, left bracket', ['PUL', '(', 'PPL'],
|
195
|
-
'Punctuation, right bracket', ['PUR', ')', 'PPR'],
|
196
|
-
'Punctuation, left square bracket', ['PUL', '(', 'LSB'],
|
197
|
-
'Punctuation, right square bracket', ['PUR', ')', 'RSB'],
|
198
|
-
'Punctuation, left curly bracket', ['PUL', '(', 'LCB'],
|
199
|
-
'Punctuation, right curly bracket', ['PUR', ')', 'RCB'],
|
200
|
-
|
201
|
-
'Unknown, foreign words (not in lexicon)', ['UNZ', '(FW-)', 'FW', '', 'FW'],
|
202
|
-
|
203
|
-
'Symbol', ['', '', 'SYM', 'XY'],
|
204
|
-
'Symbol, alphabetical', ['ZZ0', '', ''],
|
205
|
-
'Symbol, list item', ['', '', 'LS'],
|
206
|
-
|
207
|
-
# Not sure about these tags from the Chinese PTB.
|
208
|
-
'Aspect marker', ['', '', '', '', 'AS'], # ?
|
209
|
-
'Ba-construction', ['', '', '', '', 'BA'], # ?
|
210
|
-
'In relative', ['', '', '', '', 'DEC'], # ?
|
211
|
-
'Associative', ['', '', '', '', 'DER'], # ?
|
212
|
-
'In V-de or V-de-R construct', ['', '', '', '', 'DER'], # ?
|
213
|
-
'For words ? ', ['', '', '', '', 'ETC'], # ?
|
214
|
-
'In long bei-construct', ['', '', '', '', 'LB'], # ?
|
215
|
-
'In short bei-construct', ['', '', '', '', 'SB'], # ?
|
216
|
-
'Sentence-nal particle', ['', '', '', '', 'SB'], # ?
|
217
|
-
'Particle, other', ['', '', '', '', 'MSP'], # ?
|
218
|
-
'Before VP', ['', '', '', '', 'DEV'], # ?
|
219
|
-
'Verb, ? as main verb', ['', '', '', '', 'VE'], # ?
|
220
|
-
'Verb, ????', ['', '', '', '', 'VC'] # ?
|
221
|
-
]}
|
@@ -1,71 +0,0 @@
|
|
1
|
-
{cat_to_category: {
|
2
|
-
'ADJ' => 'adjective',
|
3
|
-
'ADV' => 'adverb',
|
4
|
-
'CONJ' => 'conjunction',
|
5
|
-
'COOD' => 'conjunction',
|
6
|
-
'C' => 'complementizer',
|
7
|
-
'D' => 'determiner',
|
8
|
-
'N' => 'noun',
|
9
|
-
'P' => 'preposition',
|
10
|
-
'PN' => 'punctuation',
|
11
|
-
'SC' => 'conjunction',
|
12
|
-
'V' => 'verb',
|
13
|
-
'PRT' => 'particle'
|
14
|
-
},
|
15
|
-
cat_to_description: [
|
16
|
-
['ADJ', 'Adjective'],
|
17
|
-
['ADV', 'Adverb'],
|
18
|
-
['CONJ', 'Coordination conjunction'],
|
19
|
-
['C', 'Complementizer'],
|
20
|
-
['D', 'Determiner'],
|
21
|
-
['N', 'Noun'],
|
22
|
-
['P', 'Preposition'],
|
23
|
-
['SC', 'Subordination conjunction'],
|
24
|
-
['V', 'Verb'],
|
25
|
-
['COOD', 'Part of coordination'],
|
26
|
-
['PN', 'Punctuation'],
|
27
|
-
['PRT', 'Particle'],
|
28
|
-
['S', 'Sentence']
|
29
|
-
],
|
30
|
-
xcat_to_description: [
|
31
|
-
['COOD', 'Coordinated phrase/clause'],
|
32
|
-
['IMP', 'Imperative sentence'],
|
33
|
-
['INV', 'Subject-verb inversion'],
|
34
|
-
['Q', 'Interrogative sentence with subject-verb inversion'],
|
35
|
-
['REL', 'A relativizer included'],
|
36
|
-
['FREL', 'A free relative included'],
|
37
|
-
['TRACE', 'A trace included'],
|
38
|
-
['WH', 'A wh-question word included']
|
39
|
-
],
|
40
|
-
xcat_to_ptb: [
|
41
|
-
['ADJP', '', 'ADJP'],
|
42
|
-
['ADJP', 'REL', 'WHADJP'],
|
43
|
-
['ADJP', 'FREL', 'WHADJP'],
|
44
|
-
['ADJP', 'WH', 'WHADJP'],
|
45
|
-
['ADVP', '', 'ADVP'],
|
46
|
-
['ADVP', 'REL', 'WHADVP'],
|
47
|
-
['ADVP', 'FREL', 'WHADVP'],
|
48
|
-
['ADVP', 'WH', 'WHADVP'],
|
49
|
-
['CONJP', '', 'CONJP'],
|
50
|
-
['CP', '', 'SBAR'],
|
51
|
-
['DP', '', 'NP'],
|
52
|
-
['NP', '', 'NP'],
|
53
|
-
['NX', 'NX', 'NAC'],
|
54
|
-
['NP' 'REL' 'WHNP'],
|
55
|
-
['NP' 'FREL' 'WHNP'],
|
56
|
-
['NP' 'WH' 'WHNP'],
|
57
|
-
['PP', '', 'PP'],
|
58
|
-
['PP', 'REL', 'WHPP'],
|
59
|
-
['PP', 'WH', 'WHPP'],
|
60
|
-
['PRT', '', 'PRT'],
|
61
|
-
['S', '', 'S'],
|
62
|
-
['S', 'INV', 'SINV'],
|
63
|
-
['S', 'Q', 'SQ'],
|
64
|
-
['S', 'REL', 'SBAR'],
|
65
|
-
['S', 'FREL', 'SBAR'],
|
66
|
-
['S', 'WH', 'SBARQ'],
|
67
|
-
['SCP', '', 'SBAR'],
|
68
|
-
['VP', '', 'VP'],
|
69
|
-
['VP', '', 'VP'],
|
70
|
-
['', '', 'UK']
|
71
|
-
]}
|
@@ -1,17 +0,0 @@
|
|
1
|
-
{tag_to_category: {
|
2
|
-
'C' => :complementizer,
|
3
|
-
'PN' => :punctuation,
|
4
|
-
'SC' => :conjunction
|
5
|
-
}
|
6
|
-
# Paris7 Treebank functional tags
|
7
|
-
=begin
|
8
|
-
SUJ (subject)
|
9
|
-
OBJ (direct object)
|
10
|
-
ATS (predicative complement of a subject)
|
11
|
-
ATO (predicative complement of a direct object)
|
12
|
-
MOD (modifier or adjunct)
|
13
|
-
A-OBJ (indirect complement introduced by à)
|
14
|
-
DE-OBJ (indirect complement introduced by de)
|
15
|
-
P-OBJ (indirect complement introduced by another preposition)
|
16
|
-
=end
|
17
|
-
}
|
@@ -1,15 +0,0 @@
|
|
1
|
-
{escape_characters: {
|
2
|
-
'(' => '-LRB-',
|
3
|
-
')' => '-RRB-',
|
4
|
-
'[' => '-LSB-',
|
5
|
-
']' => '-RSB-',
|
6
|
-
'{' => '-LCB-',
|
7
|
-
'}' => '-RCB-'
|
8
|
-
},
|
9
|
-
phrase_tag_to_description: [
|
10
|
-
['S', 'Paris7 declarative clause'],
|
11
|
-
['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
|
12
|
-
['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
|
13
|
-
['SINV', 'Inverted declarative sentence'],
|
14
|
-
['SQ', 'Inverted yes/no question']
|
15
|
-
]}
|
@@ -1 +0,0 @@
|
|
1
|
-
[:extractors, :inflectors, :formatters, :learners, :lexicalizers, :processors, :retrievers]
|
data/lib/treat/config.rb
DELETED
@@ -1,135 +0,0 @@
|
|
1
|
-
module Treat::Config
|
2
|
-
|
3
|
-
Paths = [ :tmp, :lib, :bin,
|
4
|
-
:files, :data, :models, :spec ]
|
5
|
-
|
6
|
-
class << self
|
7
|
-
attr_accessor :config
|
8
|
-
end
|
9
|
-
|
10
|
-
Treat.module_eval do
|
11
|
-
# Handle all missing methods as conf options.
|
12
|
-
def self.method_missing(sym, *args, &block)
|
13
|
-
super(sym, *args, &block) if sym == :to_ary
|
14
|
-
Treat::Config.config[sym]
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def self.configure
|
19
|
-
# Temporary configuration hash.
|
20
|
-
config = { paths: {} }
|
21
|
-
confdir = get_full_path(:lib) + 'treat/config'
|
22
|
-
# Iterate over each directory in the config.
|
23
|
-
Dir[confdir + '/*'].each do |dir|
|
24
|
-
name = File.basename(dir, '.*').intern
|
25
|
-
config[name] = {}
|
26
|
-
# Iterate over each file in the directory.
|
27
|
-
Dir[confdir + "/#{name}/*.rb"].each do |file|
|
28
|
-
key = File.basename(file, '.*').intern
|
29
|
-
config[name][key] = eval(File.read(file))
|
30
|
-
end
|
31
|
-
end
|
32
|
-
# Get the path config.
|
33
|
-
Paths.each do |path|
|
34
|
-
config[:paths][path] = get_full_path(path)
|
35
|
-
end
|
36
|
-
# Get the tag alignments.
|
37
|
-
configure_tags!(config[:tags][:aligned])
|
38
|
-
# Convert hash to structs.
|
39
|
-
self.config = self.hash_to_struct(config)
|
40
|
-
end
|
41
|
-
|
42
|
-
def self.get_full_path(dir)
|
43
|
-
File.dirname(__FILE__) +
|
44
|
-
'/../../' + dir.to_s + "/"
|
45
|
-
end
|
46
|
-
|
47
|
-
def self.configure_tags!(config)
|
48
|
-
ts = config[:tag_sets]
|
49
|
-
config[:word_tags_to_category] =
|
50
|
-
align_tags(config[:word_tags], ts)
|
51
|
-
config[:phrase_tags_to_category] =
|
52
|
-
align_tags(config[:phrase_tags], ts)
|
53
|
-
end
|
54
|
-
|
55
|
-
# Align tag configuration.
|
56
|
-
def self.align_tags(tags, tag_sets)
|
57
|
-
wttc = {}
|
58
|
-
tags.each_slice(2) do |desc, tags|
|
59
|
-
category = desc.gsub(',', ' ,').
|
60
|
-
split(' ')[0].downcase
|
61
|
-
tag_sets.each_with_index do |tag_set, i|
|
62
|
-
next unless tags[i]
|
63
|
-
wttc[tags[i]] ||= {}
|
64
|
-
wttc[tags[i]][tag_set] = category
|
65
|
-
end
|
66
|
-
end
|
67
|
-
wttc
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.hash_to_struct(hash)
|
71
|
-
return hash if hash.keys.
|
72
|
-
select { |k| !k.is_a?(Symbol) }.size > 0
|
73
|
-
struct = Struct.new(
|
74
|
-
*hash.keys).new(*hash.values)
|
75
|
-
hash.each do |key, value|
|
76
|
-
if value.is_a?(Hash)
|
77
|
-
struct[key] =
|
78
|
-
self.hash_to_struct(value)
|
79
|
-
end
|
80
|
-
end
|
81
|
-
struct
|
82
|
-
end
|
83
|
-
|
84
|
-
# Turn on syntactic sugar.
|
85
|
-
def self.sweeten!
|
86
|
-
|
87
|
-
# Undo this in unsweeten! - # Fix
|
88
|
-
Treat::Entities.module_eval do
|
89
|
-
self.constants.each do |type|
|
90
|
-
define_singleton_method(type) do |value='', id=nil|
|
91
|
-
const_get(type).build(value, id)
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
|
96
|
-
return if Treat.core.syntax.sweetened
|
97
|
-
Treat.core.syntax.sweetened = true
|
98
|
-
Treat.core.entities.list.each do |type|
|
99
|
-
next if type == :Symbol
|
100
|
-
kname = cc(type).intern
|
101
|
-
klass = Treat::Entities.const_get(kname)
|
102
|
-
Object.class_eval do
|
103
|
-
define_method(kname) do |val, opts={}|
|
104
|
-
klass.build(val, opts)
|
105
|
-
end
|
106
|
-
end
|
107
|
-
end
|
108
|
-
|
109
|
-
Treat::Core.constants.each do |kname|
|
110
|
-
Object.class_eval do
|
111
|
-
klass = Treat::Core.const_get(kname)
|
112
|
-
define_method(kname) do |*args|
|
113
|
-
klass.new(*args)
|
114
|
-
end
|
115
|
-
end
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
119
|
-
|
120
|
-
# Turn off syntactic sugar.
|
121
|
-
def self.unsweeten!
|
122
|
-
return unless Treat.core.syntax.sweetened
|
123
|
-
Treat.core.syntax.sweetened = false
|
124
|
-
Treat.core.entities.list.each do |type|
|
125
|
-
name = cc(type).intern
|
126
|
-
next if type == :Symbol
|
127
|
-
Object.class_eval { remove_method(name) }
|
128
|
-
end
|
129
|
-
|
130
|
-
end
|
131
|
-
|
132
|
-
# Run all configuration.
|
133
|
-
self.configure
|
134
|
-
|
135
|
-
end
|
data/lib/treat/core.rb
DELETED
@@ -1,47 +0,0 @@
|
|
1
|
-
module Treat::Entities::Abilities::Copyable
|
2
|
-
|
3
|
-
require 'fileutils'
|
4
|
-
|
5
|
-
# What happens when it is a database-stored
|
6
|
-
# collection or document ?
|
7
|
-
def copy_into(collection)
|
8
|
-
unless collection.is_a?(
|
9
|
-
Treat::Entities::Collection)
|
10
|
-
raise Treat::Exception,
|
11
|
-
"Cannot copy an entity into " +
|
12
|
-
"something else than a collection."
|
13
|
-
end
|
14
|
-
if type == :document
|
15
|
-
copy_document_into(collection)
|
16
|
-
elsif type == :collection
|
17
|
-
copy_collection_into(collection)
|
18
|
-
else
|
19
|
-
raise Treat::Exception,
|
20
|
-
"Can only copy a document " +
|
21
|
-
"or collection into a collection."
|
22
|
-
end
|
23
|
-
end
|
24
|
-
|
25
|
-
def copy_collection_into(collection)
|
26
|
-
copy = dup
|
27
|
-
f = File.dirname(folder)
|
28
|
-
f = f.split(File::SEPARATOR)[-1]
|
29
|
-
f = File.join(collection.folder, f)
|
30
|
-
FileUtils.mkdir(f) unless
|
31
|
-
FileTest.directory(f)
|
32
|
-
FileUtils.cp_r(folder, f)
|
33
|
-
copy.set :folder, f
|
34
|
-
copy
|
35
|
-
end
|
36
|
-
|
37
|
-
def copy_document_into(collection)
|
38
|
-
copy = dup
|
39
|
-
return copy unless file
|
40
|
-
f = File.basename(file)
|
41
|
-
f = File.join(collection.folder, f)
|
42
|
-
FileUtils.cp(file, f)
|
43
|
-
copy.set :file, f
|
44
|
-
copy
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
@@ -1,83 +0,0 @@
|
|
1
|
-
# When Treat.debug is set to true, each call to
|
2
|
-
# #call_worker will result in a debug message being
|
3
|
-
# printed by the #print_debug function.
|
4
|
-
module Treat::Entities::Abilities::Debuggable
|
5
|
-
|
6
|
-
@@prev = nil
|
7
|
-
@@i = 0
|
8
|
-
|
9
|
-
# Explains what Treat is currently doing.
|
10
|
-
def print_debug(entity, task, worker, group, options)
|
11
|
-
|
12
|
-
targs = group.targets.map do |target|
|
13
|
-
target.to_s
|
14
|
-
end
|
15
|
-
|
16
|
-
if targs.size == 1
|
17
|
-
t = targs[0]
|
18
|
-
else
|
19
|
-
t = targs[0..-2].join(', ') +
|
20
|
-
' and/or ' + targs[-1]
|
21
|
-
end
|
22
|
-
|
23
|
-
genitive = targs.size > 1 ?
|
24
|
-
'their' : 'its'
|
25
|
-
|
26
|
-
doing = ''
|
27
|
-
|
28
|
-
human_task = task.to_s.gsub('_', ' ')
|
29
|
-
|
30
|
-
if group.type == :transformer ||
|
31
|
-
group.type == :computer
|
32
|
-
|
33
|
-
tt = human_task
|
34
|
-
tt = tt[0..-2] if tt[-1] == 'e'
|
35
|
-
ed = tt[-1] == 'd' ? '' : 'ed'
|
36
|
-
doing = "#{tt.capitalize}#{ed} #{t}"
|
37
|
-
|
38
|
-
elsif group.type == :annotator
|
39
|
-
|
40
|
-
if group.preset_option
|
41
|
-
opt = options[group.preset_option]
|
42
|
-
form = opt.to_s.gsub('_', ' ')
|
43
|
-
human_task[-1] = ''
|
44
|
-
human_task = form + ' ' + human_task
|
45
|
-
end
|
46
|
-
|
47
|
-
doing = "Annotated #{t} with " +
|
48
|
-
"#{genitive} #{human_task}"
|
49
|
-
end
|
50
|
-
|
51
|
-
if group.to_s.index('Formatters')
|
52
|
-
curr = doing +
|
53
|
-
' in format ' +
|
54
|
-
worker.to_s
|
55
|
-
else
|
56
|
-
curr = doing +
|
57
|
-
' using ' +
|
58
|
-
worker.to_s.gsub('_', ' ')
|
59
|
-
end
|
60
|
-
|
61
|
-
curr.gsub!('ss', 's') unless curr.index('class')
|
62
|
-
curr += '.'
|
63
|
-
|
64
|
-
if curr == @@prev
|
65
|
-
@@i += 1
|
66
|
-
else
|
67
|
-
if @@i > 1
|
68
|
-
Treat.core.entities.list.each do |e|
|
69
|
-
@@prev.gsub!(e.to_s, e.to_s + 's')
|
70
|
-
end
|
71
|
-
@@prev.gsub!('its', 'their')
|
72
|
-
@@prev = @@prev.split(' ').
|
73
|
-
insert(1, @@i.to_s).join(' ')
|
74
|
-
end
|
75
|
-
@@i = 0
|
76
|
-
puts @@prev # Last call doesn't get shown.
|
77
|
-
end
|
78
|
-
|
79
|
-
@@prev = curr
|
80
|
-
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
@@ -1,46 +0,0 @@
|
|
1
|
-
# Registers occurences of textual values inside
|
2
|
-
# all children entity. Useful to calculate frequency.
|
3
|
-
module Treat::Entities::Abilities::Registrable
|
4
|
-
|
5
|
-
# Registers a token in the @registry hash.
|
6
|
-
def register(entity)
|
7
|
-
|
8
|
-
unless @registry
|
9
|
-
@count = 0
|
10
|
-
@registry = {
|
11
|
-
:value => {},
|
12
|
-
:position => {},
|
13
|
-
:type => {},
|
14
|
-
:id => {}
|
15
|
-
}
|
16
|
-
end
|
17
|
-
|
18
|
-
if entity.is_a?(Treat::Entities::Token) ||
|
19
|
-
entity.is_a?(Treat::Entities::Phrase)
|
20
|
-
val = entity.to_s.downcase
|
21
|
-
@registry[:value][val] ||= 0
|
22
|
-
@registry[:value][val] += 1
|
23
|
-
end
|
24
|
-
|
25
|
-
@registry[:id][entity.id] = true
|
26
|
-
@registry[:type][entity.type] ||= 0
|
27
|
-
@registry[:type][entity.type] += 1
|
28
|
-
@registry[:position][entity.id] = @count
|
29
|
-
@count += 1
|
30
|
-
|
31
|
-
@parent.register(entity) if has_parent?
|
32
|
-
|
33
|
-
end
|
34
|
-
|
35
|
-
# Backtrack up the tree to find a token registry,
|
36
|
-
# by default the one in the root node of any entity.
|
37
|
-
def registry(type = nil)
|
38
|
-
if has_parent? &&
|
39
|
-
type != self.type
|
40
|
-
@parent.registry(type)
|
41
|
-
else
|
42
|
-
@registry
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
end
|