treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
@@ -0,0 +1,71 @@
1
+ {cat_to_category: {
2
+ 'ADJ' => 'adjective',
3
+ 'ADV' => 'adverb',
4
+ 'CONJ' => 'conjunction',
5
+ 'COOD' => 'conjunction',
6
+ 'C' => 'complementizer',
7
+ 'D' => 'determiner',
8
+ 'N' => 'noun',
9
+ 'P' => 'preposition',
10
+ 'PN' => 'punctuation',
11
+ 'SC' => 'conjunction',
12
+ 'V' => 'verb',
13
+ 'PRT' => 'particle'
14
+ },
15
+ cat_to_description: [
16
+ ['ADJ', 'Adjective'],
17
+ ['ADV', 'Adverb'],
18
+ ['CONJ', 'Coordination conjunction'],
19
+ ['C', 'Complementizer'],
20
+ ['D', 'Determiner'],
21
+ ['N', 'Noun'],
22
+ ['P', 'Preposition'],
23
+ ['SC', 'Subordination conjunction'],
24
+ ['V', 'Verb'],
25
+ ['COOD', 'Part of coordination'],
26
+ ['PN', 'Punctuation'],
27
+ ['PRT', 'Particle'],
28
+ ['S', 'Sentence']
29
+ ],
30
+ xcat_to_description: [
31
+ ['COOD', 'Coordinated phrase/clause'],
32
+ ['IMP', 'Imperative sentence'],
33
+ ['INV', 'Subject-verb inversion'],
34
+ ['Q', 'Interrogative sentence with subject-verb inversion'],
35
+ ['REL', 'A relativizer included'],
36
+ ['FREL', 'A free relative included'],
37
+ ['TRACE', 'A trace included'],
38
+ ['WH', 'A wh-question word included']
39
+ ],
40
+ xcat_to_ptb: [
41
+ ['ADJP', '', 'ADJP'],
42
+ ['ADJP', 'REL', 'WHADJP'],
43
+ ['ADJP', 'FREL', 'WHADJP'],
44
+ ['ADJP', 'WH', 'WHADJP'],
45
+ ['ADVP', '', 'ADVP'],
46
+ ['ADVP', 'REL', 'WHADVP'],
47
+ ['ADVP', 'FREL', 'WHADVP'],
48
+ ['ADVP', 'WH', 'WHADVP'],
49
+ ['CONJP', '', 'CONJP'],
50
+ ['CP', '', 'SBAR'],
51
+ ['DP', '', 'NP'],
52
+ ['NP', '', 'NP'],
53
+ ['NX', 'NX', 'NAC'],
54
+ ['NP' 'REL' 'WHNP'],
55
+ ['NP' 'FREL' 'WHNP'],
56
+ ['NP' 'WH' 'WHNP'],
57
+ ['PP', '', 'PP'],
58
+ ['PP', 'REL', 'WHPP'],
59
+ ['PP', 'WH', 'WHPP'],
60
+ ['PRT', '', 'PRT'],
61
+ ['S', '', 'S'],
62
+ ['S', 'INV', 'SINV'],
63
+ ['S', 'Q', 'SQ'],
64
+ ['S', 'REL', 'SBAR'],
65
+ ['S', 'FREL', 'SBAR'],
66
+ ['S', 'WH', 'SBARQ'],
67
+ ['SCP', '', 'SBAR'],
68
+ ['VP', '', 'VP'],
69
+ ['VP', '', 'VP'],
70
+ ['', '', 'UK']
71
+ ]}
@@ -0,0 +1,17 @@
1
+ {tag_to_category: {
2
+ 'C' => :complementizer,
3
+ 'PN' => :punctuation,
4
+ 'SC' => :conjunction
5
+ }
6
+ # Paris7 Treebank functional tags
7
+ =begin
8
+ SUJ (subject)
9
+ OBJ (direct object)
10
+ ATS (predicative complement of a subject)
11
+ ATO (predicative complement of a direct object)
12
+ MOD (modifier or adjunct)
13
+ A-OBJ (indirect complement introduced by à)
14
+ DE-OBJ (indirect complement introduced by de)
15
+ P-OBJ (indirect complement introduced by another preposition)
16
+ =end
17
+ }
@@ -0,0 +1,15 @@
1
+ {escape_characters: {
2
+ '(' => '-LRB-',
3
+ ')' => '-RRB-',
4
+ '[' => '-LSB-',
5
+ ']' => '-RSB-',
6
+ '{' => '-LCB-',
7
+ '}' => '-RCB-'
8
+ },
9
+ phrase_tag_to_description: [
10
+ ['S', 'Paris7 declarative clause'],
11
+ ['SBAR', 'Clause introduced by a (possibly empty) subordinating conjunction'],
12
+ ['SBARQ', 'Direct question introduced by a wh-word or a wh-phrase'],
13
+ ['SINV', 'Inverted declarative sentence'],
14
+ ['SQ', 'Inverted yes/no question']
15
+ ]}
@@ -0,0 +1,39 @@
1
+ {
2
+ language: {
3
+ type: :annotator,
4
+ targets: [:entity],
5
+ default: :what_language
6
+ },
7
+ time: {
8
+ type: :annotator,
9
+ targets: [:phrase]
10
+ },
11
+ topics: {
12
+ type: :annotator,
13
+ targets: [:document, :section, :zone]
14
+ },
15
+ keywords: {
16
+ type: :annotator,
17
+ targets: [:document, :section, :zone]
18
+ },
19
+ topic_words: {
20
+ type: :annotator,
21
+ targets: [:collection]
22
+ },
23
+ name_tag: {
24
+ type: :annotator,
25
+ targets: [:phrase, :word]
26
+ },
27
+ coreferences: {
28
+ type: :annotator,
29
+ targets: [:zone]
30
+ },
31
+ tf_idf: {
32
+ type: :annotator,
33
+ targets: [:word]
34
+ },
35
+ summary: {
36
+ type: :annotator,
37
+ targets: [:document]
38
+ }
39
+ }
@@ -0,0 +1,20 @@
1
+ {
2
+ readers: {
3
+ type: :computer,
4
+ targets: [:document],
5
+ },
6
+ unserializers: {
7
+ type: :computer,
8
+ targets: [:entity],
9
+ },
10
+ serializers: {
11
+ type: :computer,
12
+ targets: [:entity],
13
+ default: :yaml,
14
+ },
15
+ visualizers: {
16
+ type: :computer,
17
+ targets: [:entity],
18
+ default: :tree
19
+ }
20
+ }
@@ -0,0 +1,27 @@
1
+ {
2
+ stemmers: {
3
+ type: :annotator,
4
+ targets: [:word]
5
+ },
6
+ declensors: {
7
+ type: :annotator,
8
+ targets: [:word],
9
+ preset_option: :count,
10
+ presets: [:plural, :singular]
11
+ },
12
+ conjugators: {
13
+ type: :annotator,
14
+ targets: [:word],
15
+ preset_option: :form,
16
+ presets: [:infinitive, :present_participle,
17
+ :plural_verb, :singular_verb]
18
+ },
19
+ cardinalizers: {
20
+ type: :annotator,
21
+ targets: [:number]
22
+ },
23
+ ordinalizers: {
24
+ type: :annotator,
25
+ targets: [:number]
26
+ }
27
+ }
@@ -0,0 +1,6 @@
1
+ {
2
+ classifiers: {
3
+ type: :computer,
4
+ targets: [:entity]
5
+ }
6
+ }
@@ -0,0 +1,18 @@
1
+ {
2
+ taggers: {
3
+ type: :annotator,
4
+ targets: [:phrase, :token]
5
+ },
6
+ categorizers: {
7
+ type: :annotator,
8
+ targets: [:phrase, :token],
9
+ recursive: true
10
+ },
11
+ sensers: {
12
+ type: :annotator,
13
+ targets: [:word],
14
+ preset_option: :nym,
15
+ presets: [:synonyms, :antonyms,
16
+ :hyponyms, :hypernyms],
17
+ }
18
+ }
@@ -0,0 +1 @@
1
+ [:extractors, :inflectors, :formatters, :learners, :lexicalizers, :processors, :retrievers]
@@ -0,0 +1,19 @@
1
+ {
2
+ chunkers: {
3
+ type: :transformer,
4
+ targets: [:document],
5
+ default: :autoselect
6
+ },
7
+ segmenters: {
8
+ type: :transformer,
9
+ targets: [:zone]
10
+ },
11
+ tokenizers: {
12
+ type: :transformer,
13
+ targets: [:sentence, :phrase]
14
+ },
15
+ parsers: {
16
+ type: :transformer,
17
+ targets: [:sentence, :phrase]
18
+ }
19
+ }
@@ -0,0 +1,12 @@
1
+ {
2
+ indexers: {
3
+ type: :annotator,
4
+ targets: [:collection],
5
+ default: :ferret
6
+ },
7
+ searchers: {
8
+ type: :computer,
9
+ targets: [:collection],
10
+ default: :ferret
11
+ }
12
+ }
@@ -0,0 +1,125 @@
1
+ module Treat::Config
2
+
3
+ Paths = [ :tmp, :lib, :bin,
4
+ :files, :data, :models, :spec ]
5
+
6
+ class << self
7
+ attr_accessor :config
8
+ end
9
+
10
+ Treat.module_eval do
11
+ # Handle all missing methods as conf options.
12
+ def self.method_missing(sym, *args, &block)
13
+ super(sym, *args, &block) if sym == :to_ary
14
+ Treat::Config.config[sym]
15
+ end
16
+ end
17
+
18
+ def self.configure
19
+ # Temporary configuration hash.
20
+ config = { paths: {} }
21
+ confdir = get_full_path(:lib) + 'treat/config'
22
+ # Iterate over each directory in the config.
23
+ Dir[confdir + '/*'].each do |dir|
24
+ name = File.basename(dir, '.*').intern
25
+ config[name] = {}
26
+ # Iterate over each file in the directory.
27
+ Dir[confdir + "/#{name}/*.rb"].each do |file|
28
+ key = File.basename(file, '.*').intern
29
+ config[name][key] = eval(File.read(file))
30
+ end
31
+ end
32
+ # Get the path config.
33
+ Paths.each do |path|
34
+ config[:paths][path] = get_full_path(path)
35
+ end
36
+ # Get the tag alignments.
37
+ configure_tags!(config[:tags][:aligned])
38
+ # Convert hash to structs.
39
+ self.config = self.hash_to_struct(config)
40
+ end
41
+
42
+ def self.get_full_path(dir)
43
+ File.dirname(__FILE__) +
44
+ '/../../' + dir.to_s + "/"
45
+ end
46
+
47
+ def self.configure_tags!(config)
48
+ ts = config[:tag_sets]
49
+ config[:word_tags_to_category] =
50
+ align_tags(config[:word_tags], ts)
51
+ config[:phrase_tags_to_category] =
52
+ align_tags(config[:phrase_tags], ts)
53
+ end
54
+
55
+ # Align tag configuration.
56
+ def self.align_tags(tags, tag_sets)
57
+ wttc = {}
58
+ tags.each_slice(2) do |desc, tags|
59
+ category = desc.gsub(',', ' ,').
60
+ split(' ')[0].downcase
61
+ tag_sets.each_with_index do |tag_set, i|
62
+ next unless tags[i]
63
+ wttc[tags[i]] ||= {}
64
+ wttc[tags[i]][tag_set] = category
65
+ end
66
+ end
67
+ wttc
68
+ end
69
+
70
+ def self.hash_to_struct(hash)
71
+ return hash if hash.keys.
72
+ select { |k| !k.is_a?(Symbol) }.size > 0
73
+ struct = Struct.new(
74
+ *hash.keys).new(*hash.values)
75
+ hash.each do |key, value|
76
+ if value.is_a?(Hash)
77
+ struct[key] =
78
+ self.hash_to_struct(value)
79
+ end
80
+ end
81
+ struct
82
+ end
83
+
84
+ # Turn on syntactic sugar.
85
+ def self.sweeten!
86
+
87
+ # Undo this in unsweeten! - # Fix
88
+ Treat::Entities.module_eval do
89
+ self.constants.each do |type|
90
+ define_singleton_method(type) do |value='', id=nil|
91
+ const_get(type).build(value, id)
92
+ end
93
+ end
94
+ end
95
+
96
+ return if Treat.core.syntax.sweetened
97
+ Treat.core.syntax.sweetened = true
98
+ Treat.core.entities.list.each do |type|
99
+ next if type == :Symbol
100
+ kname = cc(type).intern
101
+ klass = Treat::Entities.const_get(kname)
102
+ Object.class_eval do
103
+ define_method(kname) do |val, opts={}|
104
+ klass.build(val, opts)
105
+ end
106
+ end
107
+ end
108
+ end
109
+
110
+ # Turn off syntactic sugar.
111
+ def self.unsweeten!
112
+ return unless Treat.core.syntax.sweetened
113
+ Treat.core.syntax.sweetened = false
114
+ Treat.core.entities.list.each do |type|
115
+ name = cc(type).intern
116
+ next if type == :Symbol
117
+ Object.class_eval { remove_method(name) }
118
+ end
119
+
120
+ end
121
+
122
+ # Run all configuration.
123
+ self.configure
124
+
125
+ end
@@ -1,4 +1,4 @@
1
- class Treat::Classification
1
+ class Treat::Core::Classification
2
2
 
3
3
  attr_reader :types
4
4
  attr_reader :features
@@ -1,7 +1,4 @@
1
- class Treat::DataSet
2
-
3
- require 'psych'
4
- require 'treat/classification'
1
+ class Treat::Core::DataSet
5
2
 
6
3
  attr_reader :classification
7
4
  attr_reader :labels
@@ -1,5 +1,5 @@
1
1
  # This module provides an abstract tree structure.
2
- module Treat::Tree
2
+ module Treat::Core
3
3
 
4
4
  # This class is a node for an N-ary tree data structure
5
5
  # with a unique identifier, text value, children, features
@@ -113,7 +113,7 @@ module Treat::Tree
113
113
  # node from the children.
114
114
  def remove!(ion)
115
115
  return nil unless ion
116
- if ion.is_a? Treat::Tree::Node
116
+ if ion.is_a? Treat::Core::Node
117
117
  @children.delete(ion)
118
118
  @children_hash.delete(ion.id)
119
119
  ion.set_as_root!
@@ -203,7 +203,7 @@ module Treat::Tree
203
203
  # the supplied dependency type.
204
204
  def link(id_or_node, type = nil,
205
205
  directed = true, direction = 1)
206
- if id_or_node.is_a?(Treat::Tree::Node)
206
+ if id_or_node.is_a?(Treat::Core::Node)
207
207
  id = root.find(id_or_node).id
208
208
  else
209
209
  id = id_or_node
@@ -220,7 +220,7 @@ module Treat::Tree
220
220
 
221
221
  # Find the node in the tree with the given id.
222
222
  def find(id_or_node)
223
- if id_or_node.is_a?(Treat::Tree::Node)
223
+ if id_or_node.is_a?(Treat::Core::Node)
224
224
  id = id_or_node.id
225
225
  else
226
226
  id = id_or_node
@@ -230,7 +230,7 @@ module Treat::Tree
230
230
  end
231
231
  self.each do |child|
232
232
  r = child.find(id)
233
- return r if r.is_a? Treat::Tree::Node
233
+ return r if r.is_a? Treat::Core::Node
234
234
  end
235
235
  nil
236
236
  end
@@ -0,0 +1,3 @@
1
+ module Treat::Core::Server
2
+ # To implement.
3
+ end
data/lib/treat/core.rb ADDED
@@ -0,0 +1,5 @@
1
+ # Contains the core classes used by Treat.
2
+ module Treat::Core
3
+ p = Treat.paths.lib + 'treat/core/*.rb'
4
+ Dir.glob(p).each { |f| require f }
5
+ end
@@ -3,9 +3,11 @@
3
3
  # a string or a numeric object. This class
4
4
  # is pretty much self-explanatory.
5
5
  module Treat::Entities::Abilities::Buildable
6
-
6
+
7
+ require 'schiphol'
7
8
  require 'fileutils'
8
-
9
+ require 'uri'
10
+
9
11
  # Simple regexps to match common entities.
10
12
  WordRegexp = /^[[:alpha:]\-']+$/
11
13
  NumberRegexp = /^#?([0-9]+)(\.[0-9]+)?$/
@@ -23,7 +25,9 @@ module Treat::Entities::Abilities::Buildable
23
25
  def build(file_or_value, options = {})
24
26
 
25
27
  fv = file_or_value.to_s
26
- if self == Treat::Entities::Document
28
+ if self == Treat::Entities::Document ||
29
+ (fv.index('yml') || fv.index('yaml') ||
30
+ fv.index('xml') || fv.index('mongo'))
27
31
  if fv =~ UriRegexp
28
32
  from_url(fv, options)
29
33
  else
@@ -82,22 +86,18 @@ module Treat::Entities::Abilities::Buildable
82
86
  'Cannot create something ' +
83
87
  'else than a document from a url.'
84
88
  end
85
-
86
- uri = ::URI.parse(url)
87
-
88
- sp = uri.path.split('/')
89
- sp.shift if sp[0] == ''
90
-
91
- file = sp[-1]
92
- path = sp.size == 1 ?
93
- '/' : sp[0..-2].join('/')
94
-
95
- f = Treat::Downloader.download(
96
- uri.scheme, uri.host, path, file)
97
- options[:default_to] ||= :html
89
+
90
+ f = Schiphol.download(url,
91
+ :download_folder => Treat.paths.files,
92
+ :show_progress => Treat.core.verbosity.silence,
93
+ :rectify_extensions => true,
94
+ :max_tries => 3
95
+ )
96
+
97
+ options[:default_to] ||= 'html'
98
98
 
99
99
  e = from_file(f, options)
100
- e.set :url, uri.to_s
100
+ e.set :url, url.to_s
101
101
  e
102
102
 
103
103
  end
@@ -161,24 +161,17 @@ module Treat::Entities::Abilities::Buildable
161
161
 
162
162
  # Build a document from a raw or serialized file.
163
163
  def from_file(file, options)
164
-
165
- unless File.readable?(file)
166
- raise Treat::Exception,
167
- "Path '#{file}' does not "+
168
- "point to a readable file."
169
- end
170
164
 
171
- fmt = Treat::Formatters::Readers::Autoselect.
172
- detect_format(file, options[:default_to])
173
- options[:_format] = fmt
174
-
175
- if fmt == :yaml || fmt == :yml ||
176
- (fmt == :xml && is_treat_xml?(file))
177
- f = from_serialized_file(file, options)
165
+
166
+ if file.index('yml') || file.index('yaml') || file.index('xml') || file.index('mongo')
167
+ from_serialized_file(file, options)
178
168
  else
179
- f = from_raw_file(file, options)
169
+ fmt = Treat::Workers::Formatters::Readers::Autoselect.
170
+ detect_format(file, options[:default_to])
171
+ options[:_format] = fmt
172
+ from_raw_file(file, options)
180
173
  end
181
-
174
+
182
175
  end
183
176
 
184
177
  # Build a document from a raw file.
@@ -190,7 +183,13 @@ module Treat::Entities::Abilities::Buildable
190
183
  "Cannot create something else than a " +
191
184
  "document from raw file '#{file}'."
192
185
  end
193
-
186
+
187
+ unless File.readable?(file)
188
+ raise Treat::Exception,
189
+ "Path '#{file}' does not "+
190
+ "point to a readable file."
191
+ end
192
+
194
193
  d = Treat::Entities::Document.new(file)
195
194
 
196
195
  d.read(:autoselect, options)
@@ -200,11 +199,29 @@ module Treat::Entities::Abilities::Buildable
200
199
  # Build an entity from a serialized file.
201
200
  def from_serialized_file(file, options)
202
201
 
203
- d = Treat::Entities::Document.new(file)
204
- d.unserialize(:autoselect, options)
205
- d.children[0].set_as_root!
206
- d.children[0]
207
-
202
+ if file.index('mongo')
203
+ options[:id] = file.scan( # Consolidate this
204
+ /([0-9]+)\.mongo/).first.first
205
+ from_db(:mongo, options)
206
+ else
207
+ unless File.readable?(file)
208
+ raise Treat::Exception,
209
+ "Path '#{file}' does not "+
210
+ "point to a readable file."
211
+ end
212
+ d = Treat::Entities::Document.new(file)
213
+ d.unserialize(:autoselect, options)
214
+ d.children[0].set_as_root! # Fix this
215
+ d.children[0]
216
+ end
217
+
218
+ end
219
+
220
+ def from_db(adapter, options)
221
+ id = options[:id]
222
+ e = self.new(nil, id)
223
+ e.unserialize(adapter, options)
224
+ e
208
225
  end
209
226
 
210
227
  # Build any kind of entity from a string.
@@ -217,7 +234,7 @@ module Treat::Entities::Abilities::Buildable
217
234
  "collection from a string " +
218
235
  "(need a readable file/folder)."
219
236
  when :phrase
220
- phrase_from_string(string)
237
+ sentence_or_phrase_from_string(string)
221
238
  when :token
222
239
  token_from_string(string)
223
240
  when :zone
@@ -229,7 +246,7 @@ module Treat::Entities::Abilities::Buildable
229
246
  if string.gsub(/[\.\!\?]+/,
230
247
  '.').count('.') <= 1 &&
231
248
  string.count("\n") == 0
232
- phrase_from_string(string)
249
+ sentence_or_phrase_from_string(string)
233
250
  else
234
251
  zone_from_string(string)
235
252
  end
@@ -245,11 +262,13 @@ module Treat::Entities::Abilities::Buildable
245
262
  end
246
263
 
247
264
  # Build a phrase from a string.
248
- def phrase_from_string(string)
265
+ def sentence_or_phrase_from_string(string)
249
266
 
250
267
  check_encoding(string)
251
268
 
252
- if string.count('.!?') >= 1
269
+ if !(string =~ /[a-zA-Z]+/)
270
+ Treat::Entities::Fragment.new(string)
271
+ elsif string.count('.!?') >= 1
253
272
  Treat::Entities::Sentence.new(string)
254
273
  else
255
274
  Treat::Entities::Phrase.new(string)
@@ -300,20 +319,6 @@ module Treat::Entities::Abilities::Buildable
300
319
  end
301
320
 
302
321
  end
303
-
304
- # Eventually find a better way.
305
- def is_treat_xml?(file)
306
-
307
- beginning = nil
308
-
309
- File.open(file) do |w|
310
- beginning = w.readlines(200)
311
- end
312
-
313
- beginning = beginning.join(' ')
314
- beginning.count('<treat>') > 0
315
-
316
- end
317
322
 
318
323
  def create_collection(fv)
319
324
  FileUtils.mkdir(fv)
@@ -11,8 +11,8 @@ module Treat::Entities::Abilities::Checkable
11
11
  return @features[feature] if has?(feature)
12
12
  return send(feature) if do_it
13
13
  task = caller_method(2) # This is dangerous !
14
- g1 = Treat::Categories.lookup(task)
15
- g2 = Treat::Categories.lookup(feature)
14
+ g1 = Treat::Workers.lookup(task)
15
+ g2 = Treat::Workers.lookup(feature)
16
16
 
17
17
  raise Treat::Exception,
18
18
  "#{g1.type.to_s.capitalize} #{task} " +
@@ -0,0 +1,21 @@
1
+ module Treat::Entities::Abilities::Comparable
2
+
3
+ def compare_with(klass)
4
+
5
+ i = 0; rank_a = nil; rank_b = nil
6
+
7
+ Treat.core.entities.order.each do |type|
8
+ klass2 = Treat::Entities.const_get(cc(type))
9
+ rank_a = i if self <= klass2
10
+ rank_b = i if klass <= klass2
11
+ next if rank_a && rank_b
12
+ i += 1
13
+ end
14
+
15
+ return -1 if rank_a < rank_b
16
+ return 0 if rank_a == rank_b
17
+ return 1 if rank_a > rank_b
18
+
19
+ end
20
+
21
+ end