treat 1.0.6 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +2 -4
- data/README.md +13 -12
- data/bin/MANIFEST +1 -0
- data/bin/stanford/bridge.jar +0 -0
- data/bin/stanford/joda-time.jar +0 -0
- data/bin/stanford/stanford-corenlp.jar +0 -0
- data/bin/stanford/stanford-parser.jar +0 -0
- data/bin/stanford/xom.jar +0 -0
- data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
- data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
- data/files/{INFO → MANIFEST} +0 -0
- data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
- data/files/weather-central-canada-heat-wave.html +1370 -0
- data/lib/treat/config/core/acronyms.rb +4 -0
- data/lib/treat/config/core/encodings.rb +8 -0
- data/lib/treat/config/core/entities.rb +2 -0
- data/lib/treat/config/core/language.rb +3 -0
- data/lib/treat/config/core/paths.rb +8 -0
- data/lib/treat/config/core/syntax.rb +1 -0
- data/lib/treat/config/core/verbosity.rb +1 -0
- data/lib/treat/config/databases/mongo.rb +3 -0
- data/lib/treat/config/languages/agnostic.rb +34 -0
- data/lib/treat/config/languages/arabic.rb +13 -0
- data/lib/treat/config/languages/chinese.rb +13 -0
- data/lib/treat/config/languages/dutch.rb +12 -0
- data/lib/treat/config/languages/english.rb +60 -0
- data/lib/treat/config/languages/french.rb +18 -0
- data/lib/treat/config/languages/german.rb +18 -0
- data/lib/treat/config/languages/greek.rb +12 -0
- data/lib/treat/config/languages/italian.rb +12 -0
- data/lib/treat/config/languages/polish.rb +12 -0
- data/lib/treat/config/languages/portuguese.rb +12 -0
- data/lib/treat/config/languages/russian.rb +12 -0
- data/lib/treat/config/languages/spanish.rb +12 -0
- data/lib/treat/config/languages/swedish.rb +12 -0
- data/lib/treat/config/libraries/stanford.rb +1 -0
- data/lib/treat/config/linguistics/categories.rb +4 -0
- data/lib/treat/config/linguistics/punctuation.rb +33 -0
- data/lib/treat/config/tags/aligned.rb +221 -0
- data/lib/treat/config/tags/enju.rb +71 -0
- data/lib/treat/config/tags/paris7.rb +17 -0
- data/lib/treat/config/tags/ptb.rb +15 -0
- data/lib/treat/config/workers/extractors.rb +39 -0
- data/lib/treat/config/workers/formatters.rb +20 -0
- data/lib/treat/config/workers/inflectors.rb +27 -0
- data/lib/treat/config/workers/learners.rb +6 -0
- data/lib/treat/config/workers/lexicalizers.rb +18 -0
- data/lib/treat/config/workers/list.rb +1 -0
- data/lib/treat/config/workers/processors.rb +19 -0
- data/lib/treat/config/workers/retrievers.rb +12 -0
- data/lib/treat/config.rb +125 -0
- data/lib/treat/{classification.rb → core/classification.rb} +1 -1
- data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
- data/lib/treat/{tree.rb → core/node.rb} +5 -5
- data/lib/treat/core/server.rb +3 -0
- data/lib/treat/core.rb +5 -0
- data/lib/treat/entities/abilities/buildable.rb +61 -56
- data/lib/treat/entities/abilities/checkable.rb +2 -2
- data/lib/treat/entities/abilities/comparable.rb +21 -0
- data/lib/treat/entities/abilities/copyable.rb +2 -0
- data/lib/treat/entities/abilities/countable.rb +1 -1
- data/lib/treat/entities/abilities/debuggable.rb +1 -1
- data/lib/treat/entities/abilities/delegatable.rb +42 -36
- data/lib/treat/entities/abilities/doable.rb +2 -2
- data/lib/treat/entities/abilities/exportable.rb +1 -1
- data/lib/treat/entities/abilities/iterable.rb +21 -33
- data/lib/treat/entities/abilities/magical.rb +8 -8
- data/lib/treat/entities/abilities/registrable.rb +0 -38
- data/lib/treat/entities/abilities/stringable.rb +19 -19
- data/lib/treat/entities/collection.rb +31 -0
- data/lib/treat/entities/document.rb +10 -0
- data/lib/treat/entities/entity.rb +18 -13
- data/lib/treat/entities/group.rb +15 -0
- data/lib/treat/entities/section.rb +13 -0
- data/lib/treat/entities/token.rb +35 -0
- data/lib/treat/entities/zone.rb +11 -0
- data/lib/treat/entities.rb +5 -75
- data/lib/treat/helpers/didyoumean.rb +57 -0
- data/lib/treat/helpers/escaping.rb +15 -0
- data/lib/treat/helpers/formatting.rb +41 -0
- data/lib/treat/helpers/platform.rb +15 -0
- data/lib/treat/helpers/reflection.rb +17 -0
- data/lib/treat/helpers/temporary.rb +27 -0
- data/lib/treat/helpers/verbosity.rb +19 -0
- data/lib/treat/helpers.rb +5 -0
- data/lib/treat/installer.rb +46 -165
- data/lib/treat/loaders/linguistics.rb +22 -27
- data/lib/treat/loaders/stanford.rb +23 -41
- data/lib/treat/loaders.rb +10 -0
- data/lib/treat/proxies.rb +73 -24
- data/lib/treat/version.rb +3 -0
- data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
- data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
- data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
- data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
- data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
- data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
- data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
- data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
- data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
- data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
- data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
- data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
- data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
- data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
- data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
- data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
- data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
- data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
- data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
- data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
- data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
- data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
- data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
- data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
- data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
- data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
- data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
- data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
- data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
- data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
- data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
- data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
- data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
- data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
- data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
- data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
- data/lib/treat/workers.rb +96 -0
- data/lib/treat.rb +23 -49
- data/spec/collection.rb +4 -4
- data/spec/document.rb +5 -5
- data/spec/entity.rb +33 -32
- data/spec/{tree.rb → node.rb} +5 -5
- data/spec/phrase.rb +5 -39
- data/spec/sandbox.rb +212 -6
- data/spec/token.rb +12 -9
- data/spec/treat.rb +12 -9
- data/spec/word.rb +10 -9
- data/spec/zone.rb +6 -2
- data/tmp/{INFO → MANIFEST} +0 -0
- data/tmp/english.yaml +10340 -0
- metadata +149 -139
- data/lib/treat/ai.rb +0 -12
- data/lib/treat/categories.rb +0 -90
- data/lib/treat/categorizable.rb +0 -44
- data/lib/treat/configurable.rb +0 -115
- data/lib/treat/dependencies.rb +0 -25
- data/lib/treat/downloader.rb +0 -87
- data/lib/treat/entities/abilities.rb +0 -10
- data/lib/treat/entities/entities.rb +0 -102
- data/lib/treat/exception.rb +0 -7
- data/lib/treat/extractors.rb +0 -79
- data/lib/treat/formatters/serializers/mongo.rb +0 -64
- data/lib/treat/formatters.rb +0 -41
- data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
- data/lib/treat/inflectors.rb +0 -52
- data/lib/treat/kernel.rb +0 -208
- data/lib/treat/languages/arabic.rb +0 -16
- data/lib/treat/languages/chinese.rb +0 -16
- data/lib/treat/languages/dutch.rb +0 -16
- data/lib/treat/languages/english.rb +0 -63
- data/lib/treat/languages/french.rb +0 -20
- data/lib/treat/languages/german.rb +0 -20
- data/lib/treat/languages/greek.rb +0 -16
- data/lib/treat/languages/italian.rb +0 -17
- data/lib/treat/languages/language.rb +0 -10
- data/lib/treat/languages/list.txt +0 -504
- data/lib/treat/languages/polish.rb +0 -16
- data/lib/treat/languages/portuguese.rb +0 -16
- data/lib/treat/languages/russian.rb +0 -16
- data/lib/treat/languages/spanish.rb +0 -16
- data/lib/treat/languages/swedish.rb +0 -16
- data/lib/treat/languages.rb +0 -132
- data/lib/treat/lexicalizers.rb +0 -37
- data/lib/treat/object.rb +0 -7
- data/lib/treat/processors/chunkers/autoselect.rb +0 -16
- data/lib/treat/processors/chunkers/txt.rb +0 -21
- data/lib/treat/processors.rb +0 -38
- data/lib/treat/retrievers.rb +0 -27
- data/lib/treat/server.rb +0 -26
- data/lib/treat/universalisation/encodings.rb +0 -12
- data/lib/treat/universalisation/tags.rb +0 -453
- data/lib/treat/universalisation.rb +0 -9
- data/spec/languages.rb +0 -25
@@ -0,0 +1,71 @@
|
|
1
|
+
{cat_to_category: {
|
2
|
+
'ADJ' => 'adjective',
|
3
|
+
'ADV' => 'adverb',
|
4
|
+
'CONJ' => 'conjunction',
|
5
|
+
'COOD' => 'conjunction',
|
6
|
+
'C' => 'complementizer',
|
7
|
+
'D' => 'determiner',
|
8
|
+
'N' => 'noun',
|
9
|
+
'P' => 'preposition',
|
10
|
+
'PN' => 'punctuation',
|
11
|
+
'SC' => 'conjunction',
|
12
|
+
'V' => 'verb',
|
13
|
+
'PRT' => 'particle'
|
14
|
+
},
|
15
|
+
cat_to_description: [
|
16
|
+
['ADJ', 'Adjective'],
|
17
|
+
['ADV', 'Adverb'],
|
18
|
+
['CONJ', 'Coordination conjunction'],
|
19
|
+
['C', 'Complementizer'],
|
20
|
+
['D', 'Determiner'],
|
21
|
+
['N', 'Noun'],
|
22
|
+
['P', 'Preposition'],
|
23
|
+
['SC', 'Subordination conjunction'],
|
24
|
+
['V', 'Verb'],
|
25
|
+
['COOD', 'Part of coordination'],
|
26
|
+
['PN', 'Punctuation'],
|
27
|
+
['PRT', 'Particle'],
|
28
|
+
['S', 'Sentence']
|
29
|
+
],
|
30
|
+
xcat_to_description: [
|
31
|
+
['COOD', 'Coordinated phrase/clause'],
|
32
|
+
['IMP', 'Imperative sentence'],
|
33
|
+
['INV', 'Subject-verb inversion'],
|
34
|
+
['Q', 'Interrogative sentence with subject-verb inversion'],
|
35
|
+
['REL', 'A relativizer included'],
|
36
|
+
['FREL', 'A free relative included'],
|
37
|
+
['TRACE', 'A trace included'],
|
38
|
+
['WH', 'A wh-question word included']
|
39
|
+
],
|
40
|
+
xcat_to_ptb: [
|
41
|
+
['ADJP', '', 'ADJP'],
|
42
|
+
['ADJP', 'REL', 'WHADJP'],
|
43
|
+
['ADJP', 'FREL', 'WHADJP'],
|
44
|
+
['ADJP', 'WH', 'WHADJP'],
|
45
|
+
['ADVP', '', 'ADVP'],
|
46
|
+
['ADVP', 'REL', 'WHADVP'],
|
47
|
+
['ADVP', 'FREL', 'WHADVP'],
|
48
|
+
['ADVP', 'WH', 'WHADVP'],
|
49
|
+
['CONJP', '', 'CONJP'],
|
50
|
+
['CP', '', 'SBAR'],
|
51
|
+
['DP', '', 'NP'],
|
52
|
+
['NP', '', 'NP'],
|
53
|
+
['NX', 'NX', 'NAC'],
|
54
|
+
['NP' 'REL' 'WHNP'],
|
55
|
+
['NP' 'FREL' 'WHNP'],
|
56
|
+
['NP' 'WH' 'WHNP'],
|
57
|
+
['PP', '', 'PP'],
|
58
|
+
['PP', 'REL', 'WHPP'],
|
59
|
+
['PP', 'WH', 'WHPP'],
|
60
|
+
['PRT', '', 'PRT'],
|
61
|
+
['S', '', 'S'],
|
62
|
+
['S', 'INV', 'SINV'],
|
63
|
+
['S', 'Q', 'SQ'],
|
64
|
+
['S', 'REL', 'SBAR'],
|
65
|
+
['S', 'FREL', 'SBAR'],
|
66
|
+
['S', 'WH', 'SBARQ'],
|
67
|
+
['SCP', '', 'SBAR'],
|
68
|
+
['VP', '', 'VP'],
|
69
|
+
['VP', '', 'VP'],
|
70
|
+
['', '', 'UK']
|
71
|
+
]}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
{tag_to_category: {
|
2
|
+
'C' => :complementizer,
|
3
|
+
'PN' => :punctuation,
|
4
|
+
'SC' => :conjunction
|
5
|
+
}
|
6
|
+
# Paris7 Treebank functional tags
|
7
|
+
=begin
|
8
|
+
SUJ (subject)
|
9
|
+
OBJ (direct object)
|
10
|
+
ATS (predicative complement of a subject)
|
11
|
+
ATO (predicative complement of a direct object)
|
12
|
+
MOD (modifier or adjunct)
|
13
|
+
A-OBJ (indirect complement introduced by à)
|
14
|
+
DE-OBJ (indirect complement introduced by de)
|
15
|
+
P-OBJ (indirect complement introduced by another preposition)
|
16
|
+
=end
|
17
|
+
}
|
@@ -0,0 +1,15 @@
|
|
1
|
+
{escape_characters: {
|
2
|
+
'(' => '-LRB-',
|
3
|
+
')' => '-RRB-',
|
4
|
+
'[' => '-LSB-',
|
5
|
+
']' => '-RSB-',
|
6
|
+
'{' => '-LCB-',
|
7
|
+
'}' => '-RCB-'
|
8
|
+
},
|
9
|
+
phrase_tag_to_description: [
|
10
|
+
['S', 'Paris7 declarative clause'],
|
11
|
+
['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
|
12
|
+
['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
|
13
|
+
['SINV', 'Inverted declarative sentence'],
|
14
|
+
['SQ', 'Inverted yes/no question']
|
15
|
+
]}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
{
|
2
|
+
language: {
|
3
|
+
type: :annotator,
|
4
|
+
targets: [:entity],
|
5
|
+
default: :what_language
|
6
|
+
},
|
7
|
+
time: {
|
8
|
+
type: :annotator,
|
9
|
+
targets: [:phrase]
|
10
|
+
},
|
11
|
+
topics: {
|
12
|
+
type: :annotator,
|
13
|
+
targets: [:document, :section, :zone]
|
14
|
+
},
|
15
|
+
keywords: {
|
16
|
+
type: :annotator,
|
17
|
+
targets: [:document, :section, :zone]
|
18
|
+
},
|
19
|
+
topic_words: {
|
20
|
+
type: :annotator,
|
21
|
+
targets: [:collection]
|
22
|
+
},
|
23
|
+
name_tag: {
|
24
|
+
type: :annotator,
|
25
|
+
targets: [:phrase, :word]
|
26
|
+
},
|
27
|
+
coreferences: {
|
28
|
+
type: :annotator,
|
29
|
+
targets: [:zone]
|
30
|
+
},
|
31
|
+
tf_idf: {
|
32
|
+
type: :annotator,
|
33
|
+
targets: [:word]
|
34
|
+
},
|
35
|
+
summary: {
|
36
|
+
type: :annotator,
|
37
|
+
targets: [:document]
|
38
|
+
}
|
39
|
+
}
|
@@ -0,0 +1,20 @@
|
|
1
|
+
{
|
2
|
+
readers: {
|
3
|
+
type: :computer,
|
4
|
+
targets: [:document],
|
5
|
+
},
|
6
|
+
unserializers: {
|
7
|
+
type: :computer,
|
8
|
+
targets: [:entity],
|
9
|
+
},
|
10
|
+
serializers: {
|
11
|
+
type: :computer,
|
12
|
+
targets: [:entity],
|
13
|
+
default: :yaml,
|
14
|
+
},
|
15
|
+
visualizers: {
|
16
|
+
type: :computer,
|
17
|
+
targets: [:entity],
|
18
|
+
default: :tree
|
19
|
+
}
|
20
|
+
}
|
@@ -0,0 +1,27 @@
|
|
1
|
+
{
|
2
|
+
stemmers: {
|
3
|
+
type: :annotator,
|
4
|
+
targets: [:word]
|
5
|
+
},
|
6
|
+
declensors: {
|
7
|
+
type: :annotator,
|
8
|
+
targets: [:word],
|
9
|
+
preset_option: :count,
|
10
|
+
presets: [:plural, :singular]
|
11
|
+
},
|
12
|
+
conjugators: {
|
13
|
+
type: :annotator,
|
14
|
+
targets: [:word],
|
15
|
+
preset_option: :form,
|
16
|
+
presets: [:infinitive, :present_participle,
|
17
|
+
:plural_verb, :singular_verb]
|
18
|
+
},
|
19
|
+
cardinalizers: {
|
20
|
+
type: :annotator,
|
21
|
+
targets: [:number]
|
22
|
+
},
|
23
|
+
ordinalizers: {
|
24
|
+
type: :annotator,
|
25
|
+
targets: [:number]
|
26
|
+
}
|
27
|
+
}
|
@@ -0,0 +1,18 @@
|
|
1
|
+
{
|
2
|
+
taggers: {
|
3
|
+
type: :annotator,
|
4
|
+
targets: [:phrase, :token]
|
5
|
+
},
|
6
|
+
categorizers: {
|
7
|
+
type: :annotator,
|
8
|
+
targets: [:phrase, :token],
|
9
|
+
recursive: true
|
10
|
+
},
|
11
|
+
sensers: {
|
12
|
+
type: :annotator,
|
13
|
+
targets: [:word],
|
14
|
+
preset_option: :nym,
|
15
|
+
presets: [:synonyms, :antonyms,
|
16
|
+
:hyponyms, :hypernyms],
|
17
|
+
}
|
18
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
[:extractors, :inflectors, :formatters, :learners, :lexicalizers, :processors, :retrievers]
|
@@ -0,0 +1,19 @@
|
|
1
|
+
{
|
2
|
+
chunkers: {
|
3
|
+
type: :transformer,
|
4
|
+
targets: [:document],
|
5
|
+
default: :autoselect
|
6
|
+
},
|
7
|
+
segmenters: {
|
8
|
+
type: :transformer,
|
9
|
+
targets: [:zone]
|
10
|
+
},
|
11
|
+
tokenizers: {
|
12
|
+
type: :transformer,
|
13
|
+
targets: [:sentence, :phrase]
|
14
|
+
},
|
15
|
+
parsers: {
|
16
|
+
type: :transformer,
|
17
|
+
targets: [:sentence, :phrase]
|
18
|
+
}
|
19
|
+
}
|
data/lib/treat/config.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
module Treat::Config
|
2
|
+
|
3
|
+
Paths = [ :tmp, :lib, :bin,
|
4
|
+
:files, :data, :models, :spec ]
|
5
|
+
|
6
|
+
class << self
|
7
|
+
attr_accessor :config
|
8
|
+
end
|
9
|
+
|
10
|
+
Treat.module_eval do
|
11
|
+
# Handle all missing methods as conf options.
|
12
|
+
def self.method_missing(sym, *args, &block)
|
13
|
+
super(sym, *args, &block) if sym == :to_ary
|
14
|
+
Treat::Config.config[sym]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.configure
|
19
|
+
# Temporary configuration hash.
|
20
|
+
config = { paths: {} }
|
21
|
+
confdir = get_full_path(:lib) + 'treat/config'
|
22
|
+
# Iterate over each directory in the config.
|
23
|
+
Dir[confdir + '/*'].each do |dir|
|
24
|
+
name = File.basename(dir, '.*').intern
|
25
|
+
config[name] = {}
|
26
|
+
# Iterate over each file in the directory.
|
27
|
+
Dir[confdir + "/#{name}/*.rb"].each do |file|
|
28
|
+
key = File.basename(file, '.*').intern
|
29
|
+
config[name][key] = eval(File.read(file))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
# Get the path config.
|
33
|
+
Paths.each do |path|
|
34
|
+
config[:paths][path] = get_full_path(path)
|
35
|
+
end
|
36
|
+
# Get the tag alignments.
|
37
|
+
configure_tags!(config[:tags][:aligned])
|
38
|
+
# Convert hash to structs.
|
39
|
+
self.config = self.hash_to_struct(config)
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.get_full_path(dir)
|
43
|
+
File.dirname(__FILE__) +
|
44
|
+
'/../../' + dir.to_s + "/"
|
45
|
+
end
|
46
|
+
|
47
|
+
def self.configure_tags!(config)
|
48
|
+
ts = config[:tag_sets]
|
49
|
+
config[:word_tags_to_category] =
|
50
|
+
align_tags(config[:word_tags], ts)
|
51
|
+
config[:phrase_tags_to_category] =
|
52
|
+
align_tags(config[:phrase_tags], ts)
|
53
|
+
end
|
54
|
+
|
55
|
+
# Align tag configuration.
|
56
|
+
def self.align_tags(tags, tag_sets)
|
57
|
+
wttc = {}
|
58
|
+
tags.each_slice(2) do |desc, tags|
|
59
|
+
category = desc.gsub(',', ' ,').
|
60
|
+
split(' ')[0].downcase
|
61
|
+
tag_sets.each_with_index do |tag_set, i|
|
62
|
+
next unless tags[i]
|
63
|
+
wttc[tags[i]] ||= {}
|
64
|
+
wttc[tags[i]][tag_set] = category
|
65
|
+
end
|
66
|
+
end
|
67
|
+
wttc
|
68
|
+
end
|
69
|
+
|
70
|
+
def self.hash_to_struct(hash)
|
71
|
+
return hash if hash.keys.
|
72
|
+
select { |k| !k.is_a?(Symbol) }.size > 0
|
73
|
+
struct = Struct.new(
|
74
|
+
*hash.keys).new(*hash.values)
|
75
|
+
hash.each do |key, value|
|
76
|
+
if value.is_a?(Hash)
|
77
|
+
struct[key] =
|
78
|
+
self.hash_to_struct(value)
|
79
|
+
end
|
80
|
+
end
|
81
|
+
struct
|
82
|
+
end
|
83
|
+
|
84
|
+
# Turn on syntactic sugar.
|
85
|
+
def self.sweeten!
|
86
|
+
|
87
|
+
# Undo this in unsweeten! - # Fix
|
88
|
+
Treat::Entities.module_eval do
|
89
|
+
self.constants.each do |type|
|
90
|
+
define_singleton_method(type) do |value='', id=nil|
|
91
|
+
const_get(type).build(value, id)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
return if Treat.core.syntax.sweetened
|
97
|
+
Treat.core.syntax.sweetened = true
|
98
|
+
Treat.core.entities.list.each do |type|
|
99
|
+
next if type == :Symbol
|
100
|
+
kname = cc(type).intern
|
101
|
+
klass = Treat::Entities.const_get(kname)
|
102
|
+
Object.class_eval do
|
103
|
+
define_method(kname) do |val, opts={}|
|
104
|
+
klass.build(val, opts)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
# Turn off syntactic sugar.
|
111
|
+
def self.unsweeten!
|
112
|
+
return unless Treat.core.syntax.sweetened
|
113
|
+
Treat.core.syntax.sweetened = false
|
114
|
+
Treat.core.entities.list.each do |type|
|
115
|
+
name = cc(type).intern
|
116
|
+
next if type == :Symbol
|
117
|
+
Object.class_eval { remove_method(name) }
|
118
|
+
end
|
119
|
+
|
120
|
+
end
|
121
|
+
|
122
|
+
# Run all configuration.
|
123
|
+
self.configure
|
124
|
+
|
125
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
# This module provides an abstract tree structure.
|
2
|
-
module Treat::
|
2
|
+
module Treat::Core
|
3
3
|
|
4
4
|
# This class is a node for an N-ary tree data structure
|
5
5
|
# with a unique identifier, text value, children, features
|
@@ -113,7 +113,7 @@ module Treat::Tree
|
|
113
113
|
# node from the children.
|
114
114
|
def remove!(ion)
|
115
115
|
return nil unless ion
|
116
|
-
if ion.is_a? Treat::
|
116
|
+
if ion.is_a? Treat::Core::Node
|
117
117
|
@children.delete(ion)
|
118
118
|
@children_hash.delete(ion.id)
|
119
119
|
ion.set_as_root!
|
@@ -203,7 +203,7 @@ module Treat::Tree
|
|
203
203
|
# the supplied dependency type.
|
204
204
|
def link(id_or_node, type = nil,
|
205
205
|
directed = true, direction = 1)
|
206
|
-
if id_or_node.is_a?(Treat::
|
206
|
+
if id_or_node.is_a?(Treat::Core::Node)
|
207
207
|
id = root.find(id_or_node).id
|
208
208
|
else
|
209
209
|
id = id_or_node
|
@@ -220,7 +220,7 @@ module Treat::Tree
|
|
220
220
|
|
221
221
|
# Find the node in the tree with the given id.
|
222
222
|
def find(id_or_node)
|
223
|
-
if id_or_node.is_a?(Treat::
|
223
|
+
if id_or_node.is_a?(Treat::Core::Node)
|
224
224
|
id = id_or_node.id
|
225
225
|
else
|
226
226
|
id = id_or_node
|
@@ -230,7 +230,7 @@ module Treat::Tree
|
|
230
230
|
end
|
231
231
|
self.each do |child|
|
232
232
|
r = child.find(id)
|
233
|
-
return r if r.is_a? Treat::
|
233
|
+
return r if r.is_a? Treat::Core::Node
|
234
234
|
end
|
235
235
|
nil
|
236
236
|
end
|
data/lib/treat/core.rb
ADDED
@@ -3,9 +3,11 @@
|
|
3
3
|
# a string or a numeric object. This class
|
4
4
|
# is pretty much self-explanatory.
|
5
5
|
module Treat::Entities::Abilities::Buildable
|
6
|
-
|
6
|
+
|
7
|
+
require 'schiphol'
|
7
8
|
require 'fileutils'
|
8
|
-
|
9
|
+
require 'uri'
|
10
|
+
|
9
11
|
# Simple regexps to match common entities.
|
10
12
|
WordRegexp = /^[[:alpha:]\-']+$/
|
11
13
|
NumberRegexp = /^#?([0-9]+)(\.[0-9]+)?$/
|
@@ -23,7 +25,9 @@ module Treat::Entities::Abilities::Buildable
|
|
23
25
|
def build(file_or_value, options = {})
|
24
26
|
|
25
27
|
fv = file_or_value.to_s
|
26
|
-
if self == Treat::Entities::Document
|
28
|
+
if self == Treat::Entities::Document ||
|
29
|
+
(fv.index('yml') || fv.index('yaml') ||
|
30
|
+
fv.index('xml') || fv.index('mongo'))
|
27
31
|
if fv =~ UriRegexp
|
28
32
|
from_url(fv, options)
|
29
33
|
else
|
@@ -82,22 +86,18 @@ module Treat::Entities::Abilities::Buildable
|
|
82
86
|
'Cannot create something ' +
|
83
87
|
'else than a document from a url.'
|
84
88
|
end
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
f = Treat::Downloader.download(
|
96
|
-
uri.scheme, uri.host, path, file)
|
97
|
-
options[:default_to] ||= :html
|
89
|
+
|
90
|
+
f = Schiphol.download(url,
|
91
|
+
:download_folder => Treat.paths.files,
|
92
|
+
:show_progress => Treat.core.verbosity.silence,
|
93
|
+
:rectify_extensions => true,
|
94
|
+
:max_tries => 3
|
95
|
+
)
|
96
|
+
|
97
|
+
options[:default_to] ||= 'html'
|
98
98
|
|
99
99
|
e = from_file(f, options)
|
100
|
-
e.set :url,
|
100
|
+
e.set :url, url.to_s
|
101
101
|
e
|
102
102
|
|
103
103
|
end
|
@@ -161,24 +161,17 @@ module Treat::Entities::Abilities::Buildable
|
|
161
161
|
|
162
162
|
# Build a document from a raw or serialized file.
|
163
163
|
def from_file(file, options)
|
164
|
-
|
165
|
-
unless File.readable?(file)
|
166
|
-
raise Treat::Exception,
|
167
|
-
"Path '#{file}' does not "+
|
168
|
-
"point to a readable file."
|
169
|
-
end
|
170
164
|
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
if fmt == :yaml || fmt == :yml ||
|
176
|
-
(fmt == :xml && is_treat_xml?(file))
|
177
|
-
f = from_serialized_file(file, options)
|
165
|
+
|
166
|
+
if file.index('yml') || file.index('yaml') || file.index('xml') || file.index('mongo')
|
167
|
+
from_serialized_file(file, options)
|
178
168
|
else
|
179
|
-
|
169
|
+
fmt = Treat::Workers::Formatters::Readers::Autoselect.
|
170
|
+
detect_format(file, options[:default_to])
|
171
|
+
options[:_format] = fmt
|
172
|
+
from_raw_file(file, options)
|
180
173
|
end
|
181
|
-
|
174
|
+
|
182
175
|
end
|
183
176
|
|
184
177
|
# Build a document from a raw file.
|
@@ -190,7 +183,13 @@ module Treat::Entities::Abilities::Buildable
|
|
190
183
|
"Cannot create something else than a " +
|
191
184
|
"document from raw file '#{file}'."
|
192
185
|
end
|
193
|
-
|
186
|
+
|
187
|
+
unless File.readable?(file)
|
188
|
+
raise Treat::Exception,
|
189
|
+
"Path '#{file}' does not "+
|
190
|
+
"point to a readable file."
|
191
|
+
end
|
192
|
+
|
194
193
|
d = Treat::Entities::Document.new(file)
|
195
194
|
|
196
195
|
d.read(:autoselect, options)
|
@@ -200,11 +199,29 @@ module Treat::Entities::Abilities::Buildable
|
|
200
199
|
# Build an entity from a serialized file.
|
201
200
|
def from_serialized_file(file, options)
|
202
201
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
202
|
+
if file.index('mongo')
|
203
|
+
options[:id] = file.scan( # Consolidate this
|
204
|
+
/([0-9]+)\.mongo/).first.first
|
205
|
+
from_db(:mongo, options)
|
206
|
+
else
|
207
|
+
unless File.readable?(file)
|
208
|
+
raise Treat::Exception,
|
209
|
+
"Path '#{file}' does not "+
|
210
|
+
"point to a readable file."
|
211
|
+
end
|
212
|
+
d = Treat::Entities::Document.new(file)
|
213
|
+
d.unserialize(:autoselect, options)
|
214
|
+
d.children[0].set_as_root! # Fix this
|
215
|
+
d.children[0]
|
216
|
+
end
|
217
|
+
|
218
|
+
end
|
219
|
+
|
220
|
+
def from_db(adapter, options)
|
221
|
+
id = options[:id]
|
222
|
+
e = self.new(nil, id)
|
223
|
+
e.unserialize(adapter, options)
|
224
|
+
e
|
208
225
|
end
|
209
226
|
|
210
227
|
# Build any kind of entity from a string.
|
@@ -217,7 +234,7 @@ module Treat::Entities::Abilities::Buildable
|
|
217
234
|
"collection from a string " +
|
218
235
|
"(need a readable file/folder)."
|
219
236
|
when :phrase
|
220
|
-
|
237
|
+
sentence_or_phrase_from_string(string)
|
221
238
|
when :token
|
222
239
|
token_from_string(string)
|
223
240
|
when :zone
|
@@ -229,7 +246,7 @@ module Treat::Entities::Abilities::Buildable
|
|
229
246
|
if string.gsub(/[\.\!\?]+/,
|
230
247
|
'.').count('.') <= 1 &&
|
231
248
|
string.count("\n") == 0
|
232
|
-
|
249
|
+
sentence_or_phrase_from_string(string)
|
233
250
|
else
|
234
251
|
zone_from_string(string)
|
235
252
|
end
|
@@ -245,11 +262,13 @@ module Treat::Entities::Abilities::Buildable
|
|
245
262
|
end
|
246
263
|
|
247
264
|
# Build a phrase from a string.
|
248
|
-
def
|
265
|
+
def sentence_or_phrase_from_string(string)
|
249
266
|
|
250
267
|
check_encoding(string)
|
251
268
|
|
252
|
-
if string
|
269
|
+
if !(string =~ /[a-zA-Z]+/)
|
270
|
+
Treat::Entities::Fragment.new(string)
|
271
|
+
elsif string.count('.!?') >= 1
|
253
272
|
Treat::Entities::Sentence.new(string)
|
254
273
|
else
|
255
274
|
Treat::Entities::Phrase.new(string)
|
@@ -300,20 +319,6 @@ module Treat::Entities::Abilities::Buildable
|
|
300
319
|
end
|
301
320
|
|
302
321
|
end
|
303
|
-
|
304
|
-
# Eventually find a better way.
|
305
|
-
def is_treat_xml?(file)
|
306
|
-
|
307
|
-
beginning = nil
|
308
|
-
|
309
|
-
File.open(file) do |w|
|
310
|
-
beginning = w.readlines(200)
|
311
|
-
end
|
312
|
-
|
313
|
-
beginning = beginning.join(' ')
|
314
|
-
beginning.count('<treat>') > 0
|
315
|
-
|
316
|
-
end
|
317
322
|
|
318
323
|
def create_collection(fv)
|
319
324
|
FileUtils.mkdir(fv)
|
@@ -11,8 +11,8 @@ module Treat::Entities::Abilities::Checkable
|
|
11
11
|
return @features[feature] if has?(feature)
|
12
12
|
return send(feature) if do_it
|
13
13
|
task = caller_method(2) # This is dangerous !
|
14
|
-
g1 = Treat::
|
15
|
-
g2 = Treat::
|
14
|
+
g1 = Treat::Workers.lookup(task)
|
15
|
+
g2 = Treat::Workers.lookup(feature)
|
16
16
|
|
17
17
|
raise Treat::Exception,
|
18
18
|
"#{g1.type.to_s.capitalize} #{task} " +
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Treat::Entities::Abilities::Comparable
|
2
|
+
|
3
|
+
def compare_with(klass)
|
4
|
+
|
5
|
+
i = 0; rank_a = nil; rank_b = nil
|
6
|
+
|
7
|
+
Treat.core.entities.order.each do |type|
|
8
|
+
klass2 = Treat::Entities.const_get(cc(type))
|
9
|
+
rank_a = i if self <= klass2
|
10
|
+
rank_b = i if klass <= klass2
|
11
|
+
next if rank_a && rank_b
|
12
|
+
i += 1
|
13
|
+
end
|
14
|
+
|
15
|
+
return -1 if rank_a < rank_b
|
16
|
+
return 0 if rank_a == rank_b
|
17
|
+
return 1 if rank_a > rank_b
|
18
|
+
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|