treat 1.0.6 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (210) hide show
  1. data/LICENSE +2 -4
  2. data/README.md +13 -12
  3. data/bin/MANIFEST +1 -0
  4. data/bin/stanford/bridge.jar +0 -0
  5. data/bin/stanford/joda-time.jar +0 -0
  6. data/bin/stanford/stanford-corenlp.jar +0 -0
  7. data/bin/stanford/stanford-parser.jar +0 -0
  8. data/bin/stanford/xom.jar +0 -0
  9. data/files/{www.economist.com/21552208 → 21552208.html} +86 -89
  10. data/files/{guides.rubyonrails.org/3_2_release_notes.html → 3_2_release_notes.html} +0 -0
  11. data/files/{INFO → MANIFEST} +0 -0
  12. data/files/{www.rubyinside.com/nethttp-cheat-sheet-2940.html → nethttp-cheat-sheet-2940.html} +12 -16
  13. data/files/weather-central-canada-heat-wave.html +1370 -0
  14. data/lib/treat/config/core/acronyms.rb +4 -0
  15. data/lib/treat/config/core/encodings.rb +8 -0
  16. data/lib/treat/config/core/entities.rb +2 -0
  17. data/lib/treat/config/core/language.rb +3 -0
  18. data/lib/treat/config/core/paths.rb +8 -0
  19. data/lib/treat/config/core/syntax.rb +1 -0
  20. data/lib/treat/config/core/verbosity.rb +1 -0
  21. data/lib/treat/config/databases/mongo.rb +3 -0
  22. data/lib/treat/config/languages/agnostic.rb +34 -0
  23. data/lib/treat/config/languages/arabic.rb +13 -0
  24. data/lib/treat/config/languages/chinese.rb +13 -0
  25. data/lib/treat/config/languages/dutch.rb +12 -0
  26. data/lib/treat/config/languages/english.rb +60 -0
  27. data/lib/treat/config/languages/french.rb +18 -0
  28. data/lib/treat/config/languages/german.rb +18 -0
  29. data/lib/treat/config/languages/greek.rb +12 -0
  30. data/lib/treat/config/languages/italian.rb +12 -0
  31. data/lib/treat/config/languages/polish.rb +12 -0
  32. data/lib/treat/config/languages/portuguese.rb +12 -0
  33. data/lib/treat/config/languages/russian.rb +12 -0
  34. data/lib/treat/config/languages/spanish.rb +12 -0
  35. data/lib/treat/config/languages/swedish.rb +12 -0
  36. data/lib/treat/config/libraries/stanford.rb +1 -0
  37. data/lib/treat/config/linguistics/categories.rb +4 -0
  38. data/lib/treat/config/linguistics/punctuation.rb +33 -0
  39. data/lib/treat/config/tags/aligned.rb +221 -0
  40. data/lib/treat/config/tags/enju.rb +71 -0
  41. data/lib/treat/config/tags/paris7.rb +17 -0
  42. data/lib/treat/config/tags/ptb.rb +15 -0
  43. data/lib/treat/config/workers/extractors.rb +39 -0
  44. data/lib/treat/config/workers/formatters.rb +20 -0
  45. data/lib/treat/config/workers/inflectors.rb +27 -0
  46. data/lib/treat/config/workers/learners.rb +6 -0
  47. data/lib/treat/config/workers/lexicalizers.rb +18 -0
  48. data/lib/treat/config/workers/list.rb +1 -0
  49. data/lib/treat/config/workers/processors.rb +19 -0
  50. data/lib/treat/config/workers/retrievers.rb +12 -0
  51. data/lib/treat/config.rb +125 -0
  52. data/lib/treat/{classification.rb → core/classification.rb} +1 -1
  53. data/lib/treat/{data_set.rb → core/data_set.rb} +1 -4
  54. data/lib/treat/{tree.rb → core/node.rb} +5 -5
  55. data/lib/treat/core/server.rb +3 -0
  56. data/lib/treat/core.rb +5 -0
  57. data/lib/treat/entities/abilities/buildable.rb +61 -56
  58. data/lib/treat/entities/abilities/checkable.rb +2 -2
  59. data/lib/treat/entities/abilities/comparable.rb +21 -0
  60. data/lib/treat/entities/abilities/copyable.rb +2 -0
  61. data/lib/treat/entities/abilities/countable.rb +1 -1
  62. data/lib/treat/entities/abilities/debuggable.rb +1 -1
  63. data/lib/treat/entities/abilities/delegatable.rb +42 -36
  64. data/lib/treat/entities/abilities/doable.rb +2 -2
  65. data/lib/treat/entities/abilities/exportable.rb +1 -1
  66. data/lib/treat/entities/abilities/iterable.rb +21 -33
  67. data/lib/treat/entities/abilities/magical.rb +8 -8
  68. data/lib/treat/entities/abilities/registrable.rb +0 -38
  69. data/lib/treat/entities/abilities/stringable.rb +19 -19
  70. data/lib/treat/entities/collection.rb +31 -0
  71. data/lib/treat/entities/document.rb +10 -0
  72. data/lib/treat/entities/entity.rb +18 -13
  73. data/lib/treat/entities/group.rb +15 -0
  74. data/lib/treat/entities/section.rb +13 -0
  75. data/lib/treat/entities/token.rb +35 -0
  76. data/lib/treat/entities/zone.rb +11 -0
  77. data/lib/treat/entities.rb +5 -75
  78. data/lib/treat/helpers/didyoumean.rb +57 -0
  79. data/lib/treat/helpers/escaping.rb +15 -0
  80. data/lib/treat/helpers/formatting.rb +41 -0
  81. data/lib/treat/helpers/platform.rb +15 -0
  82. data/lib/treat/helpers/reflection.rb +17 -0
  83. data/lib/treat/helpers/temporary.rb +27 -0
  84. data/lib/treat/helpers/verbosity.rb +19 -0
  85. data/lib/treat/helpers.rb +5 -0
  86. data/lib/treat/installer.rb +46 -165
  87. data/lib/treat/loaders/linguistics.rb +22 -27
  88. data/lib/treat/loaders/stanford.rb +23 -41
  89. data/lib/treat/loaders.rb +10 -0
  90. data/lib/treat/proxies.rb +73 -24
  91. data/lib/treat/version.rb +3 -0
  92. data/lib/treat/{extractors → workers/extractors}/keywords/tf_idf.rb +1 -1
  93. data/lib/treat/{extractors → workers/extractors}/language/what_language.rb +11 -4
  94. data/lib/treat/{extractors → workers/extractors}/name_tag/stanford.rb +3 -4
  95. data/lib/treat/{extractors → workers/extractors}/tf_idf/native.rb +4 -5
  96. data/lib/treat/{extractors → workers/extractors}/time/chronic.rb +1 -1
  97. data/lib/treat/{extractors → workers/extractors}/time/nickel.rb +1 -1
  98. data/lib/treat/{extractors → workers/extractors}/time/ruby.rb +1 -1
  99. data/lib/treat/{extractors → workers/extractors}/topic_words/lda.rb +1 -1
  100. data/lib/treat/{extractors → workers/extractors}/topics/reuters.rb +4 -4
  101. data/lib/treat/{formatters → workers/formatters}/readers/abw.rb +2 -2
  102. data/lib/treat/{formatters → workers/formatters}/readers/autoselect.rb +10 -3
  103. data/lib/treat/{formatters → workers/formatters}/readers/doc.rb +2 -2
  104. data/lib/treat/{formatters → workers/formatters}/readers/html.rb +4 -4
  105. data/lib/treat/{formatters → workers/formatters}/readers/image.rb +2 -2
  106. data/lib/treat/{formatters → workers/formatters}/readers/odt.rb +2 -2
  107. data/lib/treat/{formatters → workers/formatters}/readers/pdf.rb +2 -2
  108. data/lib/treat/{formatters → workers/formatters}/readers/txt.rb +2 -2
  109. data/lib/treat/{formatters → workers/formatters}/readers/xml.rb +2 -2
  110. data/lib/treat/workers/formatters/serializers/mongo.rb +60 -0
  111. data/lib/treat/{formatters → workers/formatters}/serializers/xml.rb +1 -2
  112. data/lib/treat/{formatters → workers/formatters}/serializers/yaml.rb +1 -1
  113. data/lib/treat/{formatters → workers/formatters}/unserializers/autoselect.rb +3 -1
  114. data/lib/treat/workers/formatters/unserializers/mongo.rb +80 -0
  115. data/lib/treat/{formatters → workers/formatters}/unserializers/xml.rb +2 -2
  116. data/lib/treat/{formatters → workers/formatters}/unserializers/yaml.rb +1 -1
  117. data/lib/treat/{formatters → workers/formatters}/visualizers/dot.rb +1 -1
  118. data/lib/treat/{formatters → workers/formatters}/visualizers/standoff.rb +2 -3
  119. data/lib/treat/{formatters → workers/formatters}/visualizers/tree.rb +1 -1
  120. data/lib/treat/{groupable.rb → workers/group.rb} +6 -12
  121. data/lib/treat/{inflectors → workers/inflectors}/cardinalizers/linguistics.rb +7 -2
  122. data/lib/treat/{inflectors → workers/inflectors}/conjugators/linguistics.rb +11 -11
  123. data/lib/treat/{inflectors → workers/inflectors}/declensors/active_support.rb +2 -2
  124. data/lib/treat/{inflectors → workers/inflectors}/declensors/english/inflect.rb +1 -1
  125. data/lib/treat/{inflectors → workers/inflectors}/declensors/english.rb +2 -2
  126. data/lib/treat/{inflectors → workers/inflectors}/declensors/linguistics.rb +4 -4
  127. data/lib/treat/{inflectors → workers/inflectors}/ordinalizers/linguistics.rb +8 -2
  128. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter.rb +2 -2
  129. data/lib/treat/{inflectors → workers/inflectors}/stemmers/porter_c.rb +1 -1
  130. data/lib/treat/{inflectors → workers/inflectors}/stemmers/uea.rb +1 -1
  131. data/lib/treat/{ai → workers/learners}/classifiers/id3.rb +1 -1
  132. data/lib/treat/{ai → workers/learners}/classifiers/mlp.rb +1 -1
  133. data/lib/treat/{lexicalizers → workers/lexicalizers}/categorizers/from_tag.rb +9 -9
  134. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet/synset.rb +2 -2
  135. data/lib/treat/{lexicalizers → workers/lexicalizers}/sensers/wordnet.rb +4 -4
  136. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill/patch.rb +2 -2
  137. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/brill.rb +2 -8
  138. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/lingua.rb +1 -6
  139. data/lib/treat/{lexicalizers → workers/lexicalizers}/taggers/stanford.rb +31 -42
  140. data/lib/treat/workers/processors/chunkers/autoselect.rb +19 -0
  141. data/lib/treat/{processors → workers/processors}/chunkers/html.rb +4 -3
  142. data/lib/treat/workers/processors/chunkers/txt.rb +32 -0
  143. data/lib/treat/{processors → workers/processors}/parsers/enju.rb +3 -3
  144. data/lib/treat/{processors → workers/processors}/parsers/stanford.rb +6 -8
  145. data/lib/treat/{processors → workers/processors}/segmenters/punkt.rb +6 -10
  146. data/lib/treat/{processors → workers/processors}/segmenters/stanford.rb +2 -2
  147. data/lib/treat/{processors → workers/processors}/segmenters/tactful.rb +3 -6
  148. data/lib/treat/{processors → workers/processors}/tokenizers/ptb.rb +6 -5
  149. data/lib/treat/{processors → workers/processors}/tokenizers/punkt.rb +1 -1
  150. data/lib/treat/{processors → workers/processors}/tokenizers/stanford.rb +1 -1
  151. data/lib/treat/{processors → workers/processors}/tokenizers/tactful.rb +3 -5
  152. data/lib/treat/{retrievers → workers/retrievers}/indexers/ferret.rb +1 -1
  153. data/lib/treat/{retrievers → workers/retrievers}/searchers/ferret.rb +1 -1
  154. data/lib/treat/workers.rb +96 -0
  155. data/lib/treat.rb +23 -49
  156. data/spec/collection.rb +4 -4
  157. data/spec/document.rb +5 -5
  158. data/spec/entity.rb +33 -32
  159. data/spec/{tree.rb → node.rb} +5 -5
  160. data/spec/phrase.rb +5 -39
  161. data/spec/sandbox.rb +212 -6
  162. data/spec/token.rb +12 -9
  163. data/spec/treat.rb +12 -9
  164. data/spec/word.rb +10 -9
  165. data/spec/zone.rb +6 -2
  166. data/tmp/{INFO → MANIFEST} +0 -0
  167. data/tmp/english.yaml +10340 -0
  168. metadata +149 -139
  169. data/lib/treat/ai.rb +0 -12
  170. data/lib/treat/categories.rb +0 -90
  171. data/lib/treat/categorizable.rb +0 -44
  172. data/lib/treat/configurable.rb +0 -115
  173. data/lib/treat/dependencies.rb +0 -25
  174. data/lib/treat/downloader.rb +0 -87
  175. data/lib/treat/entities/abilities.rb +0 -10
  176. data/lib/treat/entities/entities.rb +0 -102
  177. data/lib/treat/exception.rb +0 -7
  178. data/lib/treat/extractors.rb +0 -79
  179. data/lib/treat/formatters/serializers/mongo.rb +0 -64
  180. data/lib/treat/formatters.rb +0 -41
  181. data/lib/treat/helpers/decimal_point_escaper.rb +0 -22
  182. data/lib/treat/inflectors.rb +0 -52
  183. data/lib/treat/kernel.rb +0 -208
  184. data/lib/treat/languages/arabic.rb +0 -16
  185. data/lib/treat/languages/chinese.rb +0 -16
  186. data/lib/treat/languages/dutch.rb +0 -16
  187. data/lib/treat/languages/english.rb +0 -63
  188. data/lib/treat/languages/french.rb +0 -20
  189. data/lib/treat/languages/german.rb +0 -20
  190. data/lib/treat/languages/greek.rb +0 -16
  191. data/lib/treat/languages/italian.rb +0 -17
  192. data/lib/treat/languages/language.rb +0 -10
  193. data/lib/treat/languages/list.txt +0 -504
  194. data/lib/treat/languages/polish.rb +0 -16
  195. data/lib/treat/languages/portuguese.rb +0 -16
  196. data/lib/treat/languages/russian.rb +0 -16
  197. data/lib/treat/languages/spanish.rb +0 -16
  198. data/lib/treat/languages/swedish.rb +0 -16
  199. data/lib/treat/languages.rb +0 -132
  200. data/lib/treat/lexicalizers.rb +0 -37
  201. data/lib/treat/object.rb +0 -7
  202. data/lib/treat/processors/chunkers/autoselect.rb +0 -16
  203. data/lib/treat/processors/chunkers/txt.rb +0 -21
  204. data/lib/treat/processors.rb +0 -38
  205. data/lib/treat/retrievers.rb +0 -27
  206. data/lib/treat/server.rb +0 -26
  207. data/lib/treat/universalisation/encodings.rb +0 -12
  208. data/lib/treat/universalisation/tags.rb +0 -453
  209. data/lib/treat/universalisation.rb +0 -9
  210. data/spec/languages.rb +0 -25
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.6
4
+ version: 1.1.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,16 +9,16 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-05-22 00:00:00.000000000 Z
12
+ date: 2012-07-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: rubyzip
15
+ name: schiphol
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: 0.9.6.1
21
+ version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
24
  version_requirements: !ruby/object:Gem::Requirement
@@ -26,23 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ! '>='
28
28
  - !ruby/object:Gem::Version
29
- version: 0.9.6.1
30
- - !ruby/object:Gem::Dependency
31
- name: progressbar
32
- requirement: !ruby/object:Gem::Requirement
33
- none: false
34
- requirements:
35
- - - ! '>='
36
- - !ruby/object:Gem::Version
37
- version: 0.10.0
38
- type: :runtime
39
- prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: 0.10.0
29
+ version: '0'
46
30
  - !ruby/object:Gem::Dependency
47
31
  name: rspec
48
32
  requirement: !ruby/object:Gem::Requirement
@@ -50,7 +34,7 @@ dependencies:
50
34
  requirements:
51
35
  - - ! '>='
52
36
  - !ruby/object:Gem::Version
53
- version: 2.9.0
37
+ version: '0'
54
38
  type: :development
55
39
  prerelease: false
56
40
  version_requirements: !ruby/object:Gem::Requirement
@@ -58,7 +42,7 @@ dependencies:
58
42
  requirements:
59
43
  - - ! '>='
60
44
  - !ruby/object:Gem::Version
61
- version: 2.9.0
45
+ version: '0'
62
46
  - !ruby/object:Gem::Dependency
63
47
  name: rake
64
48
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +50,7 @@ dependencies:
66
50
  requirements:
67
51
  - - ! '>='
68
52
  - !ruby/object:Gem::Version
69
- version: 0.9.2
53
+ version: '0'
70
54
  type: :development
71
55
  prerelease: false
72
56
  version_requirements: !ruby/object:Gem::Requirement
@@ -74,26 +58,66 @@ dependencies:
74
58
  requirements:
75
59
  - - ! '>='
76
60
  - !ruby/object:Gem::Version
77
- version: 0.9.2
78
- description: ! ' Treat is a full-fledged natural language processing toolkit for Ruby. '
61
+ version: '0'
62
+ description: ! ' Treat is a natural language processing framework for Ruby. '
79
63
  email:
80
64
  - louis.mullie@gmail.com
81
65
  executables: []
82
66
  extensions: []
83
67
  extra_rdoc_files: []
84
68
  files:
85
- - lib/treat/ai/classifiers/id3.rb
86
- - lib/treat/ai/classifiers/mlp.rb
87
- - lib/treat/ai.rb
88
- - lib/treat/categories.rb
89
- - lib/treat/categorizable.rb
90
- - lib/treat/classification.rb
91
- - lib/treat/configurable.rb
92
- - lib/treat/data_set.rb
93
- - lib/treat/dependencies.rb
94
- - lib/treat/downloader.rb
69
+ - bin/MANIFEST
70
+ - bin/stanford/bridge.jar
71
+ - bin/stanford/joda-time.jar
72
+ - bin/stanford/stanford-corenlp.jar
73
+ - bin/stanford/stanford-parser.jar
74
+ - bin/stanford/xom.jar
75
+ - lib/treat/config/core/acronyms.rb
76
+ - lib/treat/config/core/encodings.rb
77
+ - lib/treat/config/core/entities.rb
78
+ - lib/treat/config/core/language.rb
79
+ - lib/treat/config/core/paths.rb
80
+ - lib/treat/config/core/syntax.rb
81
+ - lib/treat/config/core/verbosity.rb
82
+ - lib/treat/config/databases/mongo.rb
83
+ - lib/treat/config/languages/agnostic.rb
84
+ - lib/treat/config/languages/arabic.rb
85
+ - lib/treat/config/languages/chinese.rb
86
+ - lib/treat/config/languages/dutch.rb
87
+ - lib/treat/config/languages/english.rb
88
+ - lib/treat/config/languages/french.rb
89
+ - lib/treat/config/languages/german.rb
90
+ - lib/treat/config/languages/greek.rb
91
+ - lib/treat/config/languages/italian.rb
92
+ - lib/treat/config/languages/polish.rb
93
+ - lib/treat/config/languages/portuguese.rb
94
+ - lib/treat/config/languages/russian.rb
95
+ - lib/treat/config/languages/spanish.rb
96
+ - lib/treat/config/languages/swedish.rb
97
+ - lib/treat/config/libraries/stanford.rb
98
+ - lib/treat/config/linguistics/categories.rb
99
+ - lib/treat/config/linguistics/punctuation.rb
100
+ - lib/treat/config/tags/aligned.rb
101
+ - lib/treat/config/tags/enju.rb
102
+ - lib/treat/config/tags/paris7.rb
103
+ - lib/treat/config/tags/ptb.rb
104
+ - lib/treat/config/workers/extractors.rb
105
+ - lib/treat/config/workers/formatters.rb
106
+ - lib/treat/config/workers/inflectors.rb
107
+ - lib/treat/config/workers/learners.rb
108
+ - lib/treat/config/workers/lexicalizers.rb
109
+ - lib/treat/config/workers/list.rb
110
+ - lib/treat/config/workers/processors.rb
111
+ - lib/treat/config/workers/retrievers.rb
112
+ - lib/treat/config.rb
113
+ - lib/treat/core/classification.rb
114
+ - lib/treat/core/data_set.rb
115
+ - lib/treat/core/node.rb
116
+ - lib/treat/core/server.rb
117
+ - lib/treat/core.rb
95
118
  - lib/treat/entities/abilities/buildable.rb
96
119
  - lib/treat/entities/abilities/checkable.rb
120
+ - lib/treat/entities/abilities/comparable.rb
97
121
  - lib/treat/entities/abilities/copyable.rb
98
122
  - lib/treat/entities/abilities/countable.rb
99
123
  - lib/treat/entities/abilities/debuggable.rb
@@ -104,109 +128,96 @@ files:
104
128
  - lib/treat/entities/abilities/magical.rb
105
129
  - lib/treat/entities/abilities/registrable.rb
106
130
  - lib/treat/entities/abilities/stringable.rb
107
- - lib/treat/entities/abilities.rb
108
- - lib/treat/entities/entities.rb
131
+ - lib/treat/entities/collection.rb
132
+ - lib/treat/entities/document.rb
109
133
  - lib/treat/entities/entity.rb
134
+ - lib/treat/entities/group.rb
135
+ - lib/treat/entities/section.rb
136
+ - lib/treat/entities/token.rb
137
+ - lib/treat/entities/zone.rb
110
138
  - lib/treat/entities.rb
111
- - lib/treat/exception.rb
112
- - lib/treat/extractors/keywords/tf_idf.rb
113
- - lib/treat/extractors/language/what_language.rb
114
- - lib/treat/extractors/name_tag/stanford.rb
115
- - lib/treat/extractors/tf_idf/native.rb
116
- - lib/treat/extractors/time/chronic.rb
117
- - lib/treat/extractors/time/nickel.rb
118
- - lib/treat/extractors/time/ruby.rb
119
- - lib/treat/extractors/topic_words/lda.rb
120
- - lib/treat/extractors/topics/reuters.rb
121
- - lib/treat/extractors.rb
122
- - lib/treat/formatters/readers/abw.rb
123
- - lib/treat/formatters/readers/autoselect.rb
124
- - lib/treat/formatters/readers/doc.rb
125
- - lib/treat/formatters/readers/html.rb
126
- - lib/treat/formatters/readers/image.rb
127
- - lib/treat/formatters/readers/odt.rb
128
- - lib/treat/formatters/readers/pdf.rb
129
- - lib/treat/formatters/readers/txt.rb
130
- - lib/treat/formatters/readers/xml.rb
131
- - lib/treat/formatters/serializers/mongo.rb
132
- - lib/treat/formatters/serializers/xml.rb
133
- - lib/treat/formatters/serializers/yaml.rb
134
- - lib/treat/formatters/unserializers/autoselect.rb
135
- - lib/treat/formatters/unserializers/xml.rb
136
- - lib/treat/formatters/unserializers/yaml.rb
137
- - lib/treat/formatters/visualizers/dot.rb
138
- - lib/treat/formatters/visualizers/standoff.rb
139
- - lib/treat/formatters/visualizers/tree.rb
140
- - lib/treat/formatters.rb
141
- - lib/treat/groupable.rb
142
- - lib/treat/helpers/decimal_point_escaper.rb
143
- - lib/treat/inflectors/cardinalizers/linguistics.rb
144
- - lib/treat/inflectors/conjugators/linguistics.rb
145
- - lib/treat/inflectors/declensors/active_support.rb
146
- - lib/treat/inflectors/declensors/english/inflect.rb
147
- - lib/treat/inflectors/declensors/english.rb
148
- - lib/treat/inflectors/declensors/linguistics.rb
149
- - lib/treat/inflectors/ordinalizers/linguistics.rb
150
- - lib/treat/inflectors/stemmers/porter.rb
151
- - lib/treat/inflectors/stemmers/porter_c.rb
152
- - lib/treat/inflectors/stemmers/uea.rb
153
- - lib/treat/inflectors.rb
139
+ - lib/treat/helpers/didyoumean.rb
140
+ - lib/treat/helpers/escaping.rb
141
+ - lib/treat/helpers/formatting.rb
142
+ - lib/treat/helpers/platform.rb
143
+ - lib/treat/helpers/reflection.rb
144
+ - lib/treat/helpers/temporary.rb
145
+ - lib/treat/helpers/verbosity.rb
146
+ - lib/treat/helpers.rb
154
147
  - lib/treat/installer.rb
155
- - lib/treat/kernel.rb
156
- - lib/treat/languages/arabic.rb
157
- - lib/treat/languages/chinese.rb
158
- - lib/treat/languages/dutch.rb
159
- - lib/treat/languages/english.rb
160
- - lib/treat/languages/french.rb
161
- - lib/treat/languages/german.rb
162
- - lib/treat/languages/greek.rb
163
- - lib/treat/languages/italian.rb
164
- - lib/treat/languages/language.rb
165
- - lib/treat/languages/list.txt
166
- - lib/treat/languages/polish.rb
167
- - lib/treat/languages/portuguese.rb
168
- - lib/treat/languages/russian.rb
169
- - lib/treat/languages/spanish.rb
170
- - lib/treat/languages/swedish.rb
171
- - lib/treat/languages.rb
172
- - lib/treat/lexicalizers/categorizers/from_tag.rb
173
- - lib/treat/lexicalizers/sensers/wordnet/synset.rb
174
- - lib/treat/lexicalizers/sensers/wordnet.rb
175
- - lib/treat/lexicalizers/taggers/brill/patch.rb
176
- - lib/treat/lexicalizers/taggers/brill.rb
177
- - lib/treat/lexicalizers/taggers/lingua.rb
178
- - lib/treat/lexicalizers/taggers/stanford.rb
179
- - lib/treat/lexicalizers.rb
180
148
  - lib/treat/loaders/linguistics.rb
181
149
  - lib/treat/loaders/stanford.rb
182
- - lib/treat/object.rb
183
- - lib/treat/processors/chunkers/autoselect.rb
184
- - lib/treat/processors/chunkers/html.rb
185
- - lib/treat/processors/chunkers/txt.rb
186
- - lib/treat/processors/parsers/enju.rb
187
- - lib/treat/processors/parsers/stanford.rb
188
- - lib/treat/processors/segmenters/punkt.rb
189
- - lib/treat/processors/segmenters/stanford.rb
190
- - lib/treat/processors/segmenters/tactful.rb
191
- - lib/treat/processors/tokenizers/ptb.rb
192
- - lib/treat/processors/tokenizers/punkt.rb
193
- - lib/treat/processors/tokenizers/stanford.rb
194
- - lib/treat/processors/tokenizers/tactful.rb
195
- - lib/treat/processors.rb
150
+ - lib/treat/loaders.rb
196
151
  - lib/treat/proxies.rb
197
- - lib/treat/retrievers/indexers/ferret.rb
198
- - lib/treat/retrievers/searchers/ferret.rb
199
- - lib/treat/retrievers.rb
200
- - lib/treat/server.rb
201
- - lib/treat/tree.rb
202
- - lib/treat/universalisation/encodings.rb
203
- - lib/treat/universalisation/tags.rb
204
- - lib/treat/universalisation.rb
152
+ - lib/treat/version.rb
153
+ - lib/treat/workers/extractors/keywords/tf_idf.rb
154
+ - lib/treat/workers/extractors/language/what_language.rb
155
+ - lib/treat/workers/extractors/name_tag/stanford.rb
156
+ - lib/treat/workers/extractors/tf_idf/native.rb
157
+ - lib/treat/workers/extractors/time/chronic.rb
158
+ - lib/treat/workers/extractors/time/nickel.rb
159
+ - lib/treat/workers/extractors/time/ruby.rb
160
+ - lib/treat/workers/extractors/topic_words/lda.rb
161
+ - lib/treat/workers/extractors/topics/reuters.rb
162
+ - lib/treat/workers/formatters/readers/abw.rb
163
+ - lib/treat/workers/formatters/readers/autoselect.rb
164
+ - lib/treat/workers/formatters/readers/doc.rb
165
+ - lib/treat/workers/formatters/readers/html.rb
166
+ - lib/treat/workers/formatters/readers/image.rb
167
+ - lib/treat/workers/formatters/readers/odt.rb
168
+ - lib/treat/workers/formatters/readers/pdf.rb
169
+ - lib/treat/workers/formatters/readers/txt.rb
170
+ - lib/treat/workers/formatters/readers/xml.rb
171
+ - lib/treat/workers/formatters/serializers/mongo.rb
172
+ - lib/treat/workers/formatters/serializers/xml.rb
173
+ - lib/treat/workers/formatters/serializers/yaml.rb
174
+ - lib/treat/workers/formatters/unserializers/autoselect.rb
175
+ - lib/treat/workers/formatters/unserializers/mongo.rb
176
+ - lib/treat/workers/formatters/unserializers/xml.rb
177
+ - lib/treat/workers/formatters/unserializers/yaml.rb
178
+ - lib/treat/workers/formatters/visualizers/dot.rb
179
+ - lib/treat/workers/formatters/visualizers/standoff.rb
180
+ - lib/treat/workers/formatters/visualizers/tree.rb
181
+ - lib/treat/workers/group.rb
182
+ - lib/treat/workers/inflectors/cardinalizers/linguistics.rb
183
+ - lib/treat/workers/inflectors/conjugators/linguistics.rb
184
+ - lib/treat/workers/inflectors/declensors/active_support.rb
185
+ - lib/treat/workers/inflectors/declensors/english/inflect.rb
186
+ - lib/treat/workers/inflectors/declensors/english.rb
187
+ - lib/treat/workers/inflectors/declensors/linguistics.rb
188
+ - lib/treat/workers/inflectors/ordinalizers/linguistics.rb
189
+ - lib/treat/workers/inflectors/stemmers/porter.rb
190
+ - lib/treat/workers/inflectors/stemmers/porter_c.rb
191
+ - lib/treat/workers/inflectors/stemmers/uea.rb
192
+ - lib/treat/workers/learners/classifiers/id3.rb
193
+ - lib/treat/workers/learners/classifiers/mlp.rb
194
+ - lib/treat/workers/lexicalizers/categorizers/from_tag.rb
195
+ - lib/treat/workers/lexicalizers/sensers/wordnet/synset.rb
196
+ - lib/treat/workers/lexicalizers/sensers/wordnet.rb
197
+ - lib/treat/workers/lexicalizers/taggers/brill/patch.rb
198
+ - lib/treat/workers/lexicalizers/taggers/brill.rb
199
+ - lib/treat/workers/lexicalizers/taggers/lingua.rb
200
+ - lib/treat/workers/lexicalizers/taggers/stanford.rb
201
+ - lib/treat/workers/processors/chunkers/autoselect.rb
202
+ - lib/treat/workers/processors/chunkers/html.rb
203
+ - lib/treat/workers/processors/chunkers/txt.rb
204
+ - lib/treat/workers/processors/parsers/enju.rb
205
+ - lib/treat/workers/processors/parsers/stanford.rb
206
+ - lib/treat/workers/processors/segmenters/punkt.rb
207
+ - lib/treat/workers/processors/segmenters/stanford.rb
208
+ - lib/treat/workers/processors/segmenters/tactful.rb
209
+ - lib/treat/workers/processors/tokenizers/ptb.rb
210
+ - lib/treat/workers/processors/tokenizers/punkt.rb
211
+ - lib/treat/workers/processors/tokenizers/stanford.rb
212
+ - lib/treat/workers/processors/tokenizers/tactful.rb
213
+ - lib/treat/workers/retrievers/indexers/ferret.rb
214
+ - lib/treat/workers/retrievers/searchers/ferret.rb
215
+ - lib/treat/workers.rb
205
216
  - lib/treat.rb
206
217
  - spec/collection.rb
207
218
  - spec/document.rb
208
219
  - spec/entity.rb
209
- - spec/languages.rb
220
+ - spec/node.rb
210
221
  - spec/phrase.rb
211
222
  - spec/samples/mathematicians/archimedes.abw
212
223
  - spec/samples/mathematicians/euler.html
@@ -216,21 +227,20 @@ files:
216
227
  - spec/sandbox.rb
217
228
  - spec/token.rb
218
229
  - spec/treat.rb
219
- - spec/tree.rb
220
230
  - spec/word.rb
221
231
  - spec/zone.rb
222
- - tmp/INFO
223
- - files/guides.rubyonrails.org/3_2_release_notes.html
224
- - files/INFO
225
- - files/www.economist.com/21552208
226
- - files/www.rubyinside.com/nethttp-cheat-sheet-2940.html
232
+ - tmp/english.yaml
233
+ - tmp/MANIFEST
234
+ - files/21552208.html
235
+ - files/3_2_release_notes.html
236
+ - files/MANIFEST
237
+ - files/nethttp-cheat-sheet-2940.html
238
+ - files/weather-central-canada-heat-wave.html
227
239
  - README.md
228
240
  - LICENSE
229
241
  homepage: https://github.com/louismullie/treat
230
242
  licenses: []
231
- post_install_message: ! "********************************************************************************\n\nThank
232
- you for installing Treat!\n\nComplete the installation by running:\n\n require
233
- 'treat'\n Treat.install\n\ninside IRB or a Ruby script.\n\n********************************************************************************\n\n"
243
+ post_install_message:
234
244
  rdoc_options: []
235
245
  require_paths:
236
246
  - lib
@@ -248,7 +258,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
248
258
  version: '0'
249
259
  requirements: []
250
260
  rubyforge_project:
251
- rubygems_version: 1.8.21
261
+ rubygems_version: 1.8.24
252
262
  signing_key:
253
263
  specification_version: 3
254
264
  summary: Text Retrieval, Extraction and Annotation Toolkit.
data/lib/treat/ai.rb DELETED
@@ -1,12 +0,0 @@
1
- module Treat::AI
2
-
3
- module Classifiers
4
- extend Treat::Groupable
5
- self.type = :computer
6
- self.targets = [:entity]
7
- self.default = :id3
8
- end
9
-
10
- extend Treat::Categorizable
11
-
12
- end
@@ -1,90 +0,0 @@
1
- # This module keeps track of all the Treat::Categorizable
2
- # modules that exist and the methods they define.
3
- #
4
- #
5
- # - Processors perform the building of tree of
6
- # entities representing texts (chunking,
7
- # segmenting, tokenizing, parsing).
8
- # - Lexicalizers give lexical information about
9
- # words (synsets, semantic relationships,
10
- # tag, word category).
11
- # - Extractors extract semantic information about
12
- # an entity (language, topic, date, time, named
13
- # entity, coreferences).
14
- # - Inflectors allow to retrieve the different
15
- # inflections of a word (declensors, conjugators,
16
- # stemmers, lemmatizers).
17
- # - Formatters handle the conversion of entities to
18
- # and from different formats(readers, serializers,
19
- # unserializers, visualizers).
20
- # - Retrievers allow to index and search collections
21
- # of documents.
22
- module Treat::Categories
23
-
24
- class << self
25
- # A list of all categories.
26
- attr_accessor :list
27
- end
28
-
29
- # Array - list of all categories.
30
- self.list = []
31
- # A lookup table for entity types.
32
- @@lookup = {}
33
-
34
- # Require all categories.
35
- require 'treat/categorizable'
36
- require 'treat/formatters'
37
- require 'treat/processors'
38
- require 'treat/lexicalizers'
39
- require 'treat/inflectors'
40
- require 'treat/extractors'
41
- require 'treat/retrievers'
42
- require 'treat/ai'
43
-
44
- # Create the lookup table.
45
- self.list.each do |category|
46
- category.groups.each do |group|
47
- group = category.const_get(group)
48
- @@lookup[group.method] = group
49
- group.presets.each do |x,y|
50
- @@lookup[x] = group
51
- end if group.presets
52
- end
53
- end
54
-
55
- # Find the class of a group given its method.
56
- def self.lookup(method)
57
- @@lookup[method]
58
- end
59
-
60
- # Fix -- This must be moved urgently.
61
- Treat::Entities::Entity.class_eval do
62
-
63
- alias :true_language :language
64
-
65
- def language(extractor = nil, options = {})
66
-
67
- if is_a?(Treat::Entities::Symbol) ||
68
- is_a?(Treat::Entities::Number)
69
- return Treat.default_language
70
- end
71
-
72
- if !Treat.detect_language
73
- return Treat.default_language
74
- else
75
- dlvl = Treat.language_detection_level
76
- if (Treat::Entities.rank(type) <
77
- Treat::Entities.rank(dlvl)) &&
78
- has_parent?
79
- anc = ancestor_with_type(dlvl)
80
- return anc.language if anc
81
- end
82
- end
83
-
84
- true_language(extractor, options)
85
-
86
- end
87
-
88
- end
89
-
90
- end
@@ -1,44 +0,0 @@
1
- # A categorizable module brings together groups
2
- # of algorithms that perform similar functions.
3
- module Treat::Categorizable
4
-
5
- # The contents of each categorizable
6
- # module are groupable.
7
- require 'treat/groupable'
8
-
9
- # Add workers to the Entities based on the
10
- # configuration for a given category.
11
- def self.extended(category)
12
- Treat::Categories.list << category
13
- category.module_eval do
14
- groups.each do |group|
15
- group = const_get(group)
16
- group.targets.each do |entity_type|
17
- entity = Treat::Entities.
18
- const_get(cc(entity_type))
19
- entity.class_eval do
20
- add_workers group
21
- end
22
- end
23
- end
24
- end
25
- end
26
-
27
- # Get the list of groups defined
28
- # under this module.
29
- @@groups = self.constants
30
-
31
- # Populate a list of methods.
32
- @@methods = []
33
- @@groups.each do |group|
34
- @@methods << const_get(group).method
35
- end
36
-
37
- # Provide a list of methods implemented in
38
- # the groups contained within this category.
39
- def methods; @@methods; end
40
-
41
- # Provides a list of groups within this category.
42
- def groups; self.constants; end
43
-
44
- end
@@ -1,115 +0,0 @@
1
- # This module provides configuration options for the Treat toolkit
2
- # (enable/disable syntactic sugar, enable/disable language detection
3
- # and set default language or language detection level.
4
- module Treat::Configurable
5
-
6
- # Modify the singleton class of the base module (Treat).
7
- def self.extended(base)
8
-
9
- # Configuration options that are available for the Treat module.
10
- class << base
11
- # Symbol - default language to use when detect_language is false.
12
- attr_accessor :default_language
13
- # Boolean - detect language or use default?
14
- attr_accessor :detect_language
15
- # Symbol - the finest entity level at which to detect language.
16
- attr_accessor :language_detection_level
17
- # Boolean - whether to output debug information or not.
18
- attr_accessor :debug
19
- # Boolean - whether to silence the output of external programs.
20
- attr_accessor :silence
21
- end
22
-
23
- # Set the default options.
24
- base.module_eval do
25
- # Set the default language to english.
26
- self.default_language = :eng
27
- # Turn language detection off by default.
28
- self.detect_language = false
29
- # Detect the language once per document by default.
30
- self.language_detection_level = :document
31
- # Set debug to off by default.
32
- self.debug = false
33
- # Silence external programs by default.
34
- self.silence = true
35
- end
36
-
37
- end
38
-
39
- # Turn on syntactic sugar for the creation of Entities.
40
- #
41
- # All entities found under Treat::Entities will be made
42
- # available within the global namespace. As an example,
43
- # 'Treat::Entities::Word' can then be referred to as 'Word'.
44
- #
45
- # There is one exception: the Symbol class is not sweetened
46
- # to avoid clashing with the Symbol class defined by Ruby.
47
- def sweeten!
48
- return if @@sweetened
49
- @@sweetened = true
50
- each_entity_class do |type, klass|
51
- Object.class_eval do
52
- unless type == :Symbol
53
- define_method(type) do |file_or_value, options={}|
54
- klass.build(file_or_value, options)
55
- end
56
- end
57
- end
58
- end
59
- end
60
-
61
- # Turn off syntactic sugar.
62
- def unsweeten!
63
- return unless @@sweetened
64
- @@sweetened = false
65
- each_entity_class do |type, klass|
66
- Object.class_eval do
67
- remove_method(type)
68
- end unless type == :Symbol
69
- end
70
- end
71
-
72
- # Boolean - whether syntactic sugar is
73
- # enabled or not.
74
- def sweetened?; @@sweetened; end
75
-
76
- # Syntactic sugar is disabled by default.
77
- @@sweetened = false
78
-
79
- # Turn on language detection, optionally setting
80
- # the language detection level (finest level at
81
- # which language detection is performed).
82
- def self.detect!(level = nil)
83
- self.detect_language = true
84
- if level
85
- self.language_detection_level = level
86
- end
87
- end
88
-
89
- # Turn off language detection, optionally setting
90
- # a new default language to use.
91
- def self.undetect!(default = :english)
92
- self.detect_language = false
93
- if default
94
- self.default_language = default
95
- end
96
- end
97
-
98
- # Use the supplied language by default and
99
- # turn off language detection.
100
- def self.use(language)
101
- self.detect_language = false
102
- self.default_language = language
103
- end
104
-
105
- private
106
- # Helper method, yields each entity type and class.
107
- def each_entity_class
108
- Treat::Entities.list.each do |entity_type|
109
- type = cc(entity_type).intern
110
- klass = Treat::Entities.const_get(type, klass)
111
- yield type, klass
112
- end
113
- end
114
-
115
- end
@@ -1,25 +0,0 @@
1
- class Treat::Dependencies
2
-
3
- Gem = [
4
- ['psych', '>= 1.2.2', '(un)serialize annotated entities to YAML format'],
5
- ['nokogiri', '>= 1.5.2', 'read and parse XML and HTML formats'],
6
- ['sdsykes-ferret', '>= 0.11.6.19', 'perform full-text search in collections'],
7
- ['lda-ruby', '>= 0.3.8', 'extract topic words from documents and collections'],
8
- ['ruby-readability', '>= 0.5.2', 'extract the readable content from HTML pages'],
9
- ['stanford-core-nlp', '>= 0.3.0', 'tokenize, segment, parse texts and perform named entity recognition'],
10
- ['whatlanguage', '>= 1.0.0', 'detect the language of text'],
11
- ['linguistics', '>= 1.0.9', 'retrieve the inflection of nouns, verbs and numbers in English'],
12
- ['punkt-segmenter', '>= 0.9.1', 'segment texts into sentences'],
13
- ['chronic', '>= 0.6.7', 'detect date and time in text'],
14
- ['decisiontree', '>= 0.3.0', 'perform decision tree classification of text entities'],
15
- ['ai4r', '>= 1.11', 'perform different kinds of classification tasks on text entities']
16
- ]
17
-
18
- Binary = [
19
- ['ocropus', 'recognize text in image files'],
20
- ['antiword', 'extract text from DOC files'],
21
- ['poppler-utils', 'extract text from PDF files'],
22
- ['graphviz', 'export and visualize directed graphs']
23
- ]
24
-
25
- end