treat 0.2.5 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (242) hide show
  1. data/LICENSE +3 -3
  2. data/README.md +33 -0
  3. data/files/INFO +1 -0
  4. data/lib/treat.rb +40 -105
  5. data/lib/treat/ai.rb +12 -0
  6. data/lib/treat/ai/classifiers/id3.rb +27 -0
  7. data/lib/treat/categories.rb +82 -35
  8. data/lib/treat/categorizable.rb +44 -0
  9. data/lib/treat/classification.rb +61 -0
  10. data/lib/treat/configurable.rb +115 -0
  11. data/lib/treat/data_set.rb +42 -0
  12. data/lib/treat/dependencies.rb +24 -0
  13. data/lib/treat/downloader.rb +87 -0
  14. data/lib/treat/entities.rb +68 -66
  15. data/lib/treat/entities/abilities.rb +10 -0
  16. data/lib/treat/entities/abilities/buildable.rb +327 -0
  17. data/lib/treat/entities/abilities/checkable.rb +31 -0
  18. data/lib/treat/entities/abilities/copyable.rb +45 -0
  19. data/lib/treat/entities/abilities/countable.rb +51 -0
  20. data/lib/treat/entities/abilities/debuggable.rb +83 -0
  21. data/lib/treat/entities/abilities/delegatable.rb +123 -0
  22. data/lib/treat/entities/abilities/doable.rb +62 -0
  23. data/lib/treat/entities/abilities/exportable.rb +11 -0
  24. data/lib/treat/entities/abilities/iterable.rb +115 -0
  25. data/lib/treat/entities/abilities/magical.rb +83 -0
  26. data/lib/treat/entities/abilities/registrable.rb +74 -0
  27. data/lib/treat/entities/abilities/stringable.rb +91 -0
  28. data/lib/treat/entities/entities.rb +104 -0
  29. data/lib/treat/entities/entity.rb +122 -245
  30. data/lib/treat/exception.rb +4 -4
  31. data/lib/treat/extractors.rb +77 -80
  32. data/lib/treat/extractors/keywords/tf_idf.rb +56 -22
  33. data/lib/treat/extractors/language/what_language.rb +50 -45
  34. data/lib/treat/extractors/name_tag/stanford.rb +55 -0
  35. data/lib/treat/extractors/tf_idf/native.rb +87 -0
  36. data/lib/treat/extractors/time/chronic.rb +55 -0
  37. data/lib/treat/extractors/time/nickel.rb +86 -62
  38. data/lib/treat/extractors/time/ruby.rb +53 -0
  39. data/lib/treat/extractors/topic_words/lda.rb +67 -58
  40. data/lib/treat/extractors/topics/reuters.rb +100 -87
  41. data/lib/treat/formatters.rb +39 -35
  42. data/lib/treat/formatters/readers/abw.rb +49 -29
  43. data/lib/treat/formatters/readers/autoselect.rb +37 -33
  44. data/lib/treat/formatters/readers/doc.rb +19 -13
  45. data/lib/treat/formatters/readers/html.rb +52 -30
  46. data/lib/treat/formatters/readers/image.rb +41 -40
  47. data/lib/treat/formatters/readers/odt.rb +59 -45
  48. data/lib/treat/formatters/readers/pdf.rb +28 -25
  49. data/lib/treat/formatters/readers/txt.rb +12 -15
  50. data/lib/treat/formatters/readers/xml.rb +73 -36
  51. data/lib/treat/formatters/serializers/xml.rb +80 -79
  52. data/lib/treat/formatters/serializers/yaml.rb +19 -18
  53. data/lib/treat/formatters/unserializers/autoselect.rb +12 -22
  54. data/lib/treat/formatters/unserializers/xml.rb +94 -99
  55. data/lib/treat/formatters/unserializers/yaml.rb +20 -19
  56. data/lib/treat/formatters/visualizers/dot.rb +132 -132
  57. data/lib/treat/formatters/visualizers/standoff.rb +52 -44
  58. data/lib/treat/formatters/visualizers/tree.rb +26 -29
  59. data/lib/treat/groupable.rb +153 -0
  60. data/lib/treat/helpers/decimal_point_escaper.rb +22 -0
  61. data/lib/treat/inflectors.rb +50 -45
  62. data/lib/treat/inflectors/cardinalizers/linguistics.rb +40 -0
  63. data/lib/treat/inflectors/conjugators/linguistics.rb +55 -0
  64. data/lib/treat/inflectors/declensors/active_support.rb +31 -0
  65. data/lib/treat/inflectors/declensors/english.rb +38 -0
  66. data/lib/treat/inflectors/declensors/english/inflect.rb +288 -0
  67. data/lib/treat/inflectors/declensors/linguistics.rb +49 -0
  68. data/lib/treat/inflectors/ordinalizers/linguistics.rb +17 -0
  69. data/lib/treat/inflectors/stemmers/porter.rb +160 -0
  70. data/lib/treat/inflectors/stemmers/porter_c.rb +24 -0
  71. data/lib/treat/inflectors/stemmers/uea.rb +28 -0
  72. data/lib/treat/installer.rb +308 -0
  73. data/lib/treat/kernel.rb +105 -27
  74. data/lib/treat/languages.rb +122 -88
  75. data/lib/treat/languages/arabic.rb +15 -15
  76. data/lib/treat/languages/chinese.rb +15 -15
  77. data/lib/treat/languages/dutch.rb +15 -15
  78. data/lib/treat/languages/english.rb +61 -62
  79. data/lib/treat/languages/french.rb +19 -19
  80. data/lib/treat/languages/german.rb +20 -20
  81. data/lib/treat/languages/greek.rb +15 -15
  82. data/lib/treat/languages/italian.rb +16 -16
  83. data/lib/treat/languages/polish.rb +15 -15
  84. data/lib/treat/languages/portuguese.rb +15 -15
  85. data/lib/treat/languages/russian.rb +15 -15
  86. data/lib/treat/languages/spanish.rb +16 -16
  87. data/lib/treat/languages/swedish.rb +16 -16
  88. data/lib/treat/lexicalizers.rb +34 -55
  89. data/lib/treat/lexicalizers/categorizers/from_tag.rb +54 -0
  90. data/lib/treat/lexicalizers/sensers/wordnet.rb +57 -0
  91. data/lib/treat/lexicalizers/sensers/wordnet/synset.rb +71 -0
  92. data/lib/treat/lexicalizers/taggers/brill.rb +70 -0
  93. data/lib/treat/lexicalizers/taggers/brill/patch.rb +61 -0
  94. data/lib/treat/lexicalizers/taggers/lingua.rb +90 -0
  95. data/lib/treat/lexicalizers/taggers/stanford.rb +97 -0
  96. data/lib/treat/linguistics.rb +9 -0
  97. data/lib/treat/linguistics/categories.rb +11 -0
  98. data/lib/treat/linguistics/tags.rb +422 -0
  99. data/lib/treat/loaders/linguistics.rb +30 -0
  100. data/lib/treat/loaders/stanford.rb +27 -0
  101. data/lib/treat/object.rb +1 -0
  102. data/lib/treat/processors.rb +37 -44
  103. data/lib/treat/processors/chunkers/autoselect.rb +16 -0
  104. data/lib/treat/processors/chunkers/html.rb +71 -0
  105. data/lib/treat/processors/chunkers/txt.rb +18 -24
  106. data/lib/treat/processors/parsers/enju.rb +253 -208
  107. data/lib/treat/processors/parsers/stanford.rb +130 -131
  108. data/lib/treat/processors/segmenters/punkt.rb +79 -45
  109. data/lib/treat/processors/segmenters/stanford.rb +46 -48
  110. data/lib/treat/processors/segmenters/tactful.rb +43 -36
  111. data/lib/treat/processors/tokenizers/perl.rb +124 -92
  112. data/lib/treat/processors/tokenizers/ptb.rb +81 -0
  113. data/lib/treat/processors/tokenizers/punkt.rb +48 -42
  114. data/lib/treat/processors/tokenizers/stanford.rb +39 -38
  115. data/lib/treat/processors/tokenizers/tactful.rb +64 -55
  116. data/lib/treat/proxies.rb +52 -35
  117. data/lib/treat/retrievers.rb +26 -16
  118. data/lib/treat/retrievers/indexers/ferret.rb +47 -26
  119. data/lib/treat/retrievers/searchers/ferret.rb +69 -50
  120. data/lib/treat/tree.rb +241 -183
  121. data/spec/collection.rb +123 -0
  122. data/spec/document.rb +93 -0
  123. data/spec/entity.rb +408 -0
  124. data/spec/languages.rb +25 -0
  125. data/spec/phrase.rb +146 -0
  126. data/spec/samples/mathematicians/archimedes.abw +34 -0
  127. data/spec/samples/mathematicians/euler.html +21 -0
  128. data/spec/samples/mathematicians/gauss.pdf +0 -0
  129. data/spec/samples/mathematicians/leibniz.txt +13 -0
  130. data/spec/samples/mathematicians/newton.doc +0 -0
  131. data/spec/sandbox.rb +5 -0
  132. data/spec/token.rb +109 -0
  133. data/spec/treat.rb +52 -0
  134. data/spec/tree.rb +117 -0
  135. data/spec/word.rb +110 -0
  136. data/spec/zone.rb +66 -0
  137. data/tmp/INFO +1 -1
  138. metadata +100 -201
  139. data/INSTALL +0 -1
  140. data/README +0 -3
  141. data/TODO +0 -28
  142. data/lib/economist/half_cocked_basel.txt +0 -16
  143. data/lib/economist/hungarys_troubles.txt +0 -46
  144. data/lib/economist/indias_slowdown.txt +0 -15
  145. data/lib/economist/merkozy_rides_again.txt +0 -24
  146. data/lib/economist/prada_is_not_walmart.txt +0 -9
  147. data/lib/economist/to_infinity_and_beyond.txt +0 -15
  148. data/lib/ferret/_11.cfs +0 -0
  149. data/lib/ferret/_14.cfs +0 -0
  150. data/lib/ferret/_p.cfs +0 -0
  151. data/lib/ferret/_s.cfs +0 -0
  152. data/lib/ferret/_v.cfs +0 -0
  153. data/lib/ferret/_y.cfs +0 -0
  154. data/lib/ferret/segments +0 -0
  155. data/lib/ferret/segments_15 +0 -0
  156. data/lib/treat/buildable.rb +0 -157
  157. data/lib/treat/category.rb +0 -33
  158. data/lib/treat/delegatable.rb +0 -116
  159. data/lib/treat/doable.rb +0 -45
  160. data/lib/treat/entities/collection.rb +0 -14
  161. data/lib/treat/entities/document.rb +0 -12
  162. data/lib/treat/entities/phrases.rb +0 -17
  163. data/lib/treat/entities/tokens.rb +0 -61
  164. data/lib/treat/entities/zones.rb +0 -41
  165. data/lib/treat/extractors/coreferences/stanford.rb +0 -69
  166. data/lib/treat/extractors/date/chronic.rb +0 -32
  167. data/lib/treat/extractors/date/ruby.rb +0 -25
  168. data/lib/treat/extractors/keywords/topics_tf_idf.rb +0 -48
  169. data/lib/treat/extractors/language/language_extractor.rb +0 -27
  170. data/lib/treat/extractors/named_entity_tag/stanford.rb +0 -53
  171. data/lib/treat/extractors/roles/naive.rb +0 -73
  172. data/lib/treat/extractors/statistics/frequency_in.rb +0 -16
  173. data/lib/treat/extractors/statistics/position_in.rb +0 -14
  174. data/lib/treat/extractors/statistics/tf_idf.rb +0 -104
  175. data/lib/treat/extractors/statistics/transition_matrix.rb +0 -105
  176. data/lib/treat/extractors/statistics/transition_probability.rb +0 -57
  177. data/lib/treat/extractors/topic_words/lda/data.dat +0 -46
  178. data/lib/treat/extractors/topic_words/lda/wiki.yml +0 -121
  179. data/lib/treat/extractors/topics/reuters/industry.xml +0 -2717
  180. data/lib/treat/extractors/topics/reuters/region.xml +0 -13586
  181. data/lib/treat/extractors/topics/reuters/topics.xml +0 -17977
  182. data/lib/treat/feature.rb +0 -58
  183. data/lib/treat/features.rb +0 -7
  184. data/lib/treat/formatters/visualizers/short_value.rb +0 -29
  185. data/lib/treat/formatters/visualizers/txt.rb +0 -45
  186. data/lib/treat/group.rb +0 -106
  187. data/lib/treat/helpers/linguistics_loader.rb +0 -18
  188. data/lib/treat/inflectors/cardinal_words/linguistics.rb +0 -42
  189. data/lib/treat/inflectors/conjugations/linguistics.rb +0 -36
  190. data/lib/treat/inflectors/declensions/english.rb +0 -319
  191. data/lib/treat/inflectors/declensions/linguistics.rb +0 -42
  192. data/lib/treat/inflectors/ordinal_words/linguistics.rb +0 -20
  193. data/lib/treat/inflectors/stem/porter.rb +0 -162
  194. data/lib/treat/inflectors/stem/porter_c.rb +0 -26
  195. data/lib/treat/inflectors/stem/uea.rb +0 -30
  196. data/lib/treat/install.rb +0 -59
  197. data/lib/treat/languages/tags.rb +0 -377
  198. data/lib/treat/lexicalizers/category/from_tag.rb +0 -49
  199. data/lib/treat/lexicalizers/linkages/naive.rb +0 -63
  200. data/lib/treat/lexicalizers/synsets/wordnet.rb +0 -76
  201. data/lib/treat/lexicalizers/tag/brill.rb +0 -91
  202. data/lib/treat/lexicalizers/tag/lingua.rb +0 -123
  203. data/lib/treat/lexicalizers/tag/stanford.rb +0 -70
  204. data/lib/treat/processors/segmenters/punkt/dutch.yaml +0 -9716
  205. data/lib/treat/processors/segmenters/punkt/english.yaml +0 -10340
  206. data/lib/treat/processors/segmenters/punkt/french.yaml +0 -43159
  207. data/lib/treat/processors/segmenters/punkt/german.yaml +0 -9572
  208. data/lib/treat/processors/segmenters/punkt/greek.yaml +0 -6050
  209. data/lib/treat/processors/segmenters/punkt/italian.yaml +0 -14748
  210. data/lib/treat/processors/segmenters/punkt/polish.yaml +0 -9751
  211. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +0 -13662
  212. data/lib/treat/processors/segmenters/punkt/russian.yaml +0 -4237
  213. data/lib/treat/processors/segmenters/punkt/spanish.yaml +0 -24034
  214. data/lib/treat/processors/segmenters/punkt/swedish.yaml +0 -10001
  215. data/lib/treat/processors/tokenizers/macintyre.rb +0 -77
  216. data/lib/treat/processors/tokenizers/multilingual.rb +0 -30
  217. data/lib/treat/registrable.rb +0 -28
  218. data/lib/treat/sugar.rb +0 -50
  219. data/lib/treat/viewable.rb +0 -29
  220. data/lib/treat/visitable.rb +0 -28
  221. data/test/profile.rb +0 -2
  222. data/test/tc_entity.rb +0 -117
  223. data/test/tc_extractors.rb +0 -73
  224. data/test/tc_formatters.rb +0 -41
  225. data/test/tc_inflectors.rb +0 -34
  226. data/test/tc_lexicalizers.rb +0 -32
  227. data/test/tc_processors.rb +0 -50
  228. data/test/tc_resources.rb +0 -22
  229. data/test/tc_treat.rb +0 -60
  230. data/test/tc_tree.rb +0 -60
  231. data/test/tests.rb +0 -20
  232. data/test/texts.rb +0 -19
  233. data/test/texts/english/half_cocked_basel.txt +0 -16
  234. data/test/texts/english/hose_and_dry.doc +0 -0
  235. data/test/texts/english/hungarys_troubles.abw +0 -70
  236. data/test/texts/english/long.html +0 -24
  237. data/test/texts/english/long.txt +0 -22
  238. data/test/texts/english/medium.txt +0 -5
  239. data/test/texts/english/republican_nomination.pdf +0 -0
  240. data/test/texts/english/saving_the_euro.odt +0 -0
  241. data/test/texts/english/short.txt +0 -3
  242. data/test/texts/english/zero_sum.html +0 -111
data/spec/word.rb ADDED
@@ -0,0 +1,110 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Entities::Word do
4
+
5
+ describe "Inflectors" do
6
+
7
+ before do
8
+ @inflectors = Treat::Languages::English::Inflectors
9
+ end
10
+
11
+ describe "#stem" do
12
+
13
+ it "returns the stem of the word" do
14
+ @inflectors[:stemmers].each do |s|
15
+ 'running'.stem(s).should eql 'run'
16
+ end
17
+ end
18
+
19
+ end
20
+
21
+ describe "#infinitive" do
22
+ it "returns the infinitive form of a verb" do
23
+ @inflectors[:conjugators].each do |c|
24
+ 'running'.infinitive(c).should eql 'run'
25
+ end
26
+ end
27
+ end
28
+
29
+ # Nil if not verb?
30
+ describe "#present_participle" do
31
+ it "returns the present participle form of a verb" do
32
+ @inflectors[:conjugators].each do |c|
33
+ 'running'.infinitive(c).should eql 'run'
34
+ end
35
+ end
36
+ end
37
+
38
+ describe "#plural" do
39
+ it "returns the plural form of the word" do
40
+ @inflectors[:declensors].each do |i|
41
+ # 'inflection'.plural(i).should eql 'inflections'
42
+ end
43
+ end
44
+ end
45
+
46
+ describe "#singular" do
47
+ it "returns the singular form of the word" do
48
+ @inflectors[:declensors].each do |i|
49
+ next if i == :linguistics # Fix this
50
+ # 'inflections'.singular(i).should eql 'inflections'
51
+ end
52
+ end
53
+ end
54
+
55
+ describe "#ordinal_form" do
56
+ it "returns the ordinal form of a number" do
57
+ @inflectors[:cardinalizers].each do |o|
58
+ 20.cardinal.should eql 'twenty'
59
+ end
60
+ @inflectors[:ordinalizers].each do |o|
61
+ 20.ordinal.should eql 'twentieth'
62
+ end
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+ describe "Lexicalizable" do
69
+
70
+ describe "#synonyms" do
71
+
72
+ it "returns the synonyms of the word" do
73
+ # Should the word be included in synonyms?
74
+ 'glass'.synonyms[-1].should eql 'looking_glass'
75
+ end
76
+
77
+ end
78
+
79
+ describe "#antonyms" do
80
+ it "returns the antonyms of the word" do
81
+ 'glass'.antonyms.should eql []
82
+ end
83
+ end
84
+
85
+ describe "#hypernyms" do
86
+ it "returns the hypernyms of the word" do
87
+ 'glass'.hypernyms[-1].should eql 'glasswork'
88
+ end
89
+ end
90
+
91
+ describe "#hyponyms" do
92
+ it "returns the hyponyms of the word" do
93
+ 'glass'.hyponyms[-1].should eql 'wineglass'
94
+ end
95
+ end
96
+
97
+ end
98
+
99
+ describe "Extractable" do
100
+ describe "#tf_idf" do
101
+ it "returns the TF*IDF score of the word" do
102
+ #c = Treat::Entities::Collection.build(
103
+ #Treat.spec + 'samples/mathematicians')
104
+ #c.do(:chunk, :segment, :tokenize)
105
+ #c.words[30].tf_idf.should eql 0.2231
106
+ end
107
+ end
108
+ end
109
+
110
+ end
data/spec/zone.rb ADDED
@@ -0,0 +1,66 @@
1
+ require_relative '../lib/treat'
2
+
3
+ describe Treat::Entities::Zone do
4
+
5
+ describe "Processable" do
6
+
7
+ describe "#segment" do
8
+
9
+ it "splits a zone into phrases/sentences and adds them as children of the zone" do
10
+ Treat::Languages::English::Processors[:segmenters].each do |s|
11
+ paragraph = Treat::Entities::Paragraph.new(
12
+ "This is a first sentence inside the first paragraph. " +
13
+ "This is the second sentence that is inside the paragraph.")
14
+ paragraph.segment(s)
15
+ paragraph.children.should eql paragraph.phrases
16
+ paragraph.phrases.map { |t| t.to_s }.should
17
+ eql ["This is a first sentence inside the first paragraph.",
18
+ "This is the second sentence that is inside the paragraph."]
19
+ end
20
+ end
21
+
22
+ end
23
+
24
+ end
25
+
26
+ describe "Buildable" do
27
+
28
+ describe "#build" do
29
+
30
+ context "when called with a section of text" do
31
+
32
+ it "creates a section with the text" do
33
+
34
+ section = "A title\nFollowed by a fake sentence."
35
+ s = Treat::Entities::Zone.build(section)
36
+ s.should be_an_instance_of Treat::Entities::Section
37
+
38
+ end
39
+
40
+ end
41
+
42
+ context "when called with a paragraph of text" do
43
+
44
+ it "creates a paragraph with the text" do
45
+ paragraph = "Sentence 1. Sentence 2. Sentence 3."
46
+ p = Treat::Entities::Zone.build(paragraph)
47
+ p.should be_instance_of Treat::Entities::Paragraph
48
+ end
49
+
50
+ end
51
+
52
+ context "when called with a very short text" do
53
+
54
+ it "creates a title with the text" do
55
+ title = "A title!"
56
+ p = Treat::Entities::Zone.build(title)
57
+ p.should be_instance_of Treat::Entities::Title
58
+ end
59
+
60
+ end
61
+
62
+ end
63
+
64
+ end
65
+
66
+ end
data/tmp/INFO CHANGED
@@ -1 +1 @@
1
- This is a folder for temporary files created by Treat.
1
+ This is a folder containing the temporary files created by Treat.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 1.0.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,200 +9,116 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-02-07 00:00:00.000000000 Z
12
+ date: 2012-04-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
- name: rjb
16
- requirement: &70201865896920 !ruby/object:Gem::Requirement
15
+ name: rubyzip
16
+ requirement: !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
20
20
  - !ruby/object:Gem::Version
21
- version: '0'
21
+ version: 0.9.6.1
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70201865896920
25
- - !ruby/object:Gem::Dependency
26
- name: zip
27
- requirement: &70201865895740 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '0'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: *70201865895740
36
- - !ruby/object:Gem::Dependency
37
- name: hpricot
38
- requirement: &70201865893800 !ruby/object:Gem::Requirement
39
- none: false
40
- requirements:
41
- - - ! '>='
42
- - !ruby/object:Gem::Version
43
- version: '0'
44
- type: :runtime
45
- prerelease: false
46
- version_requirements: *70201865893800
47
- - !ruby/object:Gem::Dependency
48
- name: nokogiri
49
- requirement: &70201865892000 !ruby/object:Gem::Requirement
50
- none: false
51
- requirements:
52
- - - ! '>='
53
- - !ruby/object:Gem::Version
54
- version: '0'
55
- type: :runtime
56
- prerelease: false
57
- version_requirements: *70201865892000
58
- - !ruby/object:Gem::Dependency
59
- name: psych
60
- requirement: &70201865889880 !ruby/object:Gem::Requirement
24
+ version_requirements: !ruby/object:Gem::Requirement
61
25
  none: false
62
26
  requirements:
63
27
  - - ! '>='
64
28
  - !ruby/object:Gem::Version
65
- version: '0'
66
- type: :runtime
67
- prerelease: false
68
- version_requirements: *70201865889880
29
+ version: 0.9.6.1
69
30
  - !ruby/object:Gem::Dependency
70
- name: ferret
71
- requirement: &70201865888620 !ruby/object:Gem::Requirement
31
+ name: progressbar
32
+ requirement: !ruby/object:Gem::Requirement
72
33
  none: false
73
34
  requirements:
74
35
  - - ! '>='
75
36
  - !ruby/object:Gem::Version
76
- version: '0'
37
+ version: 0.10.0
77
38
  type: :runtime
78
39
  prerelease: false
79
- version_requirements: *70201865888620
80
- - !ruby/object:Gem::Dependency
81
- name: whatlanguage
82
- requirement: &70201865887460 !ruby/object:Gem::Requirement
40
+ version_requirements: !ruby/object:Gem::Requirement
83
41
  none: false
84
42
  requirements:
85
43
  - - ! '>='
86
44
  - !ruby/object:Gem::Version
87
- version: '0'
88
- type: :runtime
89
- prerelease: false
90
- version_requirements: *70201865887460
45
+ version: 0.10.0
91
46
  - !ruby/object:Gem::Dependency
92
- name: linguistics
93
- requirement: &70201865886480 !ruby/object:Gem::Requirement
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
94
49
  none: false
95
50
  requirements:
96
51
  - - ! '>='
97
52
  - !ruby/object:Gem::Version
98
- version: '0'
99
- type: :runtime
53
+ version: 2.9.0
54
+ type: :development
100
55
  prerelease: false
101
- version_requirements: *70201865886480
102
- - !ruby/object:Gem::Dependency
103
- name: stanford-core-nlp
104
- requirement: &70201865885300 !ruby/object:Gem::Requirement
56
+ version_requirements: !ruby/object:Gem::Requirement
105
57
  none: false
106
58
  requirements:
107
59
  - - ! '>='
108
60
  - !ruby/object:Gem::Version
109
- version: '0'
110
- type: :runtime
111
- prerelease: false
112
- version_requirements: *70201865885300
61
+ version: 2.9.0
113
62
  - !ruby/object:Gem::Dependency
114
- name: punkt-segmenter
115
- requirement: &70201865883560 !ruby/object:Gem::Requirement
63
+ name: rake
64
+ requirement: !ruby/object:Gem::Requirement
116
65
  none: false
117
66
  requirements:
118
67
  - - ! '>='
119
68
  - !ruby/object:Gem::Version
120
- version: '0'
121
- type: :runtime
69
+ version: 0.9.2
70
+ type: :development
122
71
  prerelease: false
123
- version_requirements: *70201865883560
124
- - !ruby/object:Gem::Dependency
125
- name: lda-ruby
126
- requirement: &70201865882040 !ruby/object:Gem::Requirement
127
- none: false
128
- requirements:
129
- - - ! '>='
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :runtime
133
- prerelease: false
134
- version_requirements: *70201865882040
135
- - !ruby/object:Gem::Dependency
136
- name: chronic
137
- requirement: &70201865880540 !ruby/object:Gem::Requirement
72
+ version_requirements: !ruby/object:Gem::Requirement
138
73
  none: false
139
74
  requirements:
140
75
  - - ! '>='
141
76
  - !ruby/object:Gem::Version
142
- version: '0'
143
- type: :runtime
144
- prerelease: false
145
- version_requirements: *70201865880540
146
- description: ! ' Treat is a toolkit for text retrieval, information extraction and
147
- natural language processing. '
77
+ version: 0.9.2
78
+ description: ! ' Treat is a Ruby toolkit for text retrieval, information extraction
79
+ and natural language processing. '
148
80
  email:
149
81
  - louis.mullie@gmail.com
150
82
  executables: []
151
83
  extensions: []
152
84
  extra_rdoc_files: []
153
85
  files:
154
- - lib/economist/half_cocked_basel.txt
155
- - lib/economist/hungarys_troubles.txt
156
- - lib/economist/indias_slowdown.txt
157
- - lib/economist/merkozy_rides_again.txt
158
- - lib/economist/prada_is_not_walmart.txt
159
- - lib/economist/to_infinity_and_beyond.txt
160
- - lib/ferret/_11.cfs
161
- - lib/ferret/_14.cfs
162
- - lib/ferret/_p.cfs
163
- - lib/ferret/_s.cfs
164
- - lib/ferret/_v.cfs
165
- - lib/ferret/_y.cfs
166
- - lib/ferret/segments
167
- - lib/ferret/segments_15
168
- - lib/treat/buildable.rb
86
+ - lib/treat/ai/classifiers/id3.rb
87
+ - lib/treat/ai.rb
169
88
  - lib/treat/categories.rb
170
- - lib/treat/category.rb
171
- - lib/treat/delegatable.rb
172
- - lib/treat/doable.rb
173
- - lib/treat/entities/collection.rb
174
- - lib/treat/entities/document.rb
89
+ - lib/treat/categorizable.rb
90
+ - lib/treat/classification.rb
91
+ - lib/treat/configurable.rb
92
+ - lib/treat/data_set.rb
93
+ - lib/treat/dependencies.rb
94
+ - lib/treat/downloader.rb
95
+ - lib/treat/entities/abilities/buildable.rb
96
+ - lib/treat/entities/abilities/checkable.rb
97
+ - lib/treat/entities/abilities/copyable.rb
98
+ - lib/treat/entities/abilities/countable.rb
99
+ - lib/treat/entities/abilities/debuggable.rb
100
+ - lib/treat/entities/abilities/delegatable.rb
101
+ - lib/treat/entities/abilities/doable.rb
102
+ - lib/treat/entities/abilities/exportable.rb
103
+ - lib/treat/entities/abilities/iterable.rb
104
+ - lib/treat/entities/abilities/magical.rb
105
+ - lib/treat/entities/abilities/registrable.rb
106
+ - lib/treat/entities/abilities/stringable.rb
107
+ - lib/treat/entities/abilities.rb
108
+ - lib/treat/entities/entities.rb
175
109
  - lib/treat/entities/entity.rb
176
- - lib/treat/entities/phrases.rb
177
- - lib/treat/entities/tokens.rb
178
- - lib/treat/entities/zones.rb
179
110
  - lib/treat/entities.rb
180
111
  - lib/treat/exception.rb
181
- - lib/treat/extractors/coreferences/stanford.rb
182
- - lib/treat/extractors/date/chronic.rb
183
- - lib/treat/extractors/date/ruby.rb
184
112
  - lib/treat/extractors/keywords/tf_idf.rb
185
- - lib/treat/extractors/keywords/topics_tf_idf.rb
186
- - lib/treat/extractors/language/language_extractor.rb
187
113
  - lib/treat/extractors/language/what_language.rb
188
- - lib/treat/extractors/named_entity_tag/stanford.rb
189
- - lib/treat/extractors/roles/naive.rb
190
- - lib/treat/extractors/statistics/frequency_in.rb
191
- - lib/treat/extractors/statistics/position_in.rb
192
- - lib/treat/extractors/statistics/tf_idf.rb
193
- - lib/treat/extractors/statistics/transition_matrix.rb
194
- - lib/treat/extractors/statistics/transition_probability.rb
114
+ - lib/treat/extractors/name_tag/stanford.rb
115
+ - lib/treat/extractors/tf_idf/native.rb
116
+ - lib/treat/extractors/time/chronic.rb
195
117
  - lib/treat/extractors/time/nickel.rb
196
- - lib/treat/extractors/topic_words/lda/data.dat
197
- - lib/treat/extractors/topic_words/lda/wiki.yml
118
+ - lib/treat/extractors/time/ruby.rb
198
119
  - lib/treat/extractors/topic_words/lda.rb
199
- - lib/treat/extractors/topics/reuters/industry.xml
200
- - lib/treat/extractors/topics/reuters/region.xml
201
- - lib/treat/extractors/topics/reuters/topics.xml
202
120
  - lib/treat/extractors/topics/reuters.rb
203
121
  - lib/treat/extractors.rb
204
- - lib/treat/feature.rb
205
- - lib/treat/features.rb
206
122
  - lib/treat/formatters/readers/abw.rb
207
123
  - lib/treat/formatters/readers/autoselect.rb
208
124
  - lib/treat/formatters/readers/doc.rb
@@ -218,23 +134,23 @@ files:
218
134
  - lib/treat/formatters/unserializers/xml.rb
219
135
  - lib/treat/formatters/unserializers/yaml.rb
220
136
  - lib/treat/formatters/visualizers/dot.rb
221
- - lib/treat/formatters/visualizers/short_value.rb
222
137
  - lib/treat/formatters/visualizers/standoff.rb
223
138
  - lib/treat/formatters/visualizers/tree.rb
224
- - lib/treat/formatters/visualizers/txt.rb
225
139
  - lib/treat/formatters.rb
226
- - lib/treat/group.rb
227
- - lib/treat/helpers/linguistics_loader.rb
228
- - lib/treat/inflectors/cardinal_words/linguistics.rb
229
- - lib/treat/inflectors/conjugations/linguistics.rb
230
- - lib/treat/inflectors/declensions/english.rb
231
- - lib/treat/inflectors/declensions/linguistics.rb
232
- - lib/treat/inflectors/ordinal_words/linguistics.rb
233
- - lib/treat/inflectors/stem/porter.rb
234
- - lib/treat/inflectors/stem/porter_c.rb
235
- - lib/treat/inflectors/stem/uea.rb
140
+ - lib/treat/groupable.rb
141
+ - lib/treat/helpers/decimal_point_escaper.rb
142
+ - lib/treat/inflectors/cardinalizers/linguistics.rb
143
+ - lib/treat/inflectors/conjugators/linguistics.rb
144
+ - lib/treat/inflectors/declensors/active_support.rb
145
+ - lib/treat/inflectors/declensors/english/inflect.rb
146
+ - lib/treat/inflectors/declensors/english.rb
147
+ - lib/treat/inflectors/declensors/linguistics.rb
148
+ - lib/treat/inflectors/ordinalizers/linguistics.rb
149
+ - lib/treat/inflectors/stemmers/porter.rb
150
+ - lib/treat/inflectors/stemmers/porter_c.rb
151
+ - lib/treat/inflectors/stemmers/uea.rb
236
152
  - lib/treat/inflectors.rb
237
- - lib/treat/install.rb
153
+ - lib/treat/installer.rb
238
154
  - lib/treat/kernel.rb
239
155
  - lib/treat/languages/arabic.rb
240
156
  - lib/treat/languages/chinese.rb
@@ -250,77 +166,61 @@ files:
250
166
  - lib/treat/languages/russian.rb
251
167
  - lib/treat/languages/spanish.rb
252
168
  - lib/treat/languages/swedish.rb
253
- - lib/treat/languages/tags.rb
254
169
  - lib/treat/languages.rb
255
- - lib/treat/lexicalizers/category/from_tag.rb
256
- - lib/treat/lexicalizers/linkages/naive.rb
257
- - lib/treat/lexicalizers/synsets/wordnet.rb
258
- - lib/treat/lexicalizers/tag/brill.rb
259
- - lib/treat/lexicalizers/tag/lingua.rb
260
- - lib/treat/lexicalizers/tag/stanford.rb
170
+ - lib/treat/lexicalizers/categorizers/from_tag.rb
171
+ - lib/treat/lexicalizers/sensers/wordnet/synset.rb
172
+ - lib/treat/lexicalizers/sensers/wordnet.rb
173
+ - lib/treat/lexicalizers/taggers/brill/patch.rb
174
+ - lib/treat/lexicalizers/taggers/brill.rb
175
+ - lib/treat/lexicalizers/taggers/lingua.rb
176
+ - lib/treat/lexicalizers/taggers/stanford.rb
261
177
  - lib/treat/lexicalizers.rb
178
+ - lib/treat/linguistics/categories.rb
179
+ - lib/treat/linguistics/tags.rb
180
+ - lib/treat/linguistics.rb
181
+ - lib/treat/loaders/linguistics.rb
182
+ - lib/treat/loaders/stanford.rb
262
183
  - lib/treat/object.rb
184
+ - lib/treat/processors/chunkers/autoselect.rb
185
+ - lib/treat/processors/chunkers/html.rb
263
186
  - lib/treat/processors/chunkers/txt.rb
264
187
  - lib/treat/processors/parsers/enju.rb
265
188
  - lib/treat/processors/parsers/stanford.rb
266
- - lib/treat/processors/segmenters/punkt/dutch.yaml
267
- - lib/treat/processors/segmenters/punkt/english.yaml
268
- - lib/treat/processors/segmenters/punkt/french.yaml
269
- - lib/treat/processors/segmenters/punkt/german.yaml
270
- - lib/treat/processors/segmenters/punkt/greek.yaml
271
- - lib/treat/processors/segmenters/punkt/italian.yaml
272
- - lib/treat/processors/segmenters/punkt/polish.yaml
273
- - lib/treat/processors/segmenters/punkt/portuguese.yaml
274
- - lib/treat/processors/segmenters/punkt/russian.yaml
275
- - lib/treat/processors/segmenters/punkt/spanish.yaml
276
- - lib/treat/processors/segmenters/punkt/swedish.yaml
277
189
  - lib/treat/processors/segmenters/punkt.rb
278
190
  - lib/treat/processors/segmenters/stanford.rb
279
191
  - lib/treat/processors/segmenters/tactful.rb
280
- - lib/treat/processors/tokenizers/macintyre.rb
281
- - lib/treat/processors/tokenizers/multilingual.rb
282
192
  - lib/treat/processors/tokenizers/perl.rb
193
+ - lib/treat/processors/tokenizers/ptb.rb
283
194
  - lib/treat/processors/tokenizers/punkt.rb
284
195
  - lib/treat/processors/tokenizers/stanford.rb
285
196
  - lib/treat/processors/tokenizers/tactful.rb
286
197
  - lib/treat/processors.rb
287
198
  - lib/treat/proxies.rb
288
- - lib/treat/registrable.rb
289
199
  - lib/treat/retrievers/indexers/ferret.rb
290
200
  - lib/treat/retrievers/searchers/ferret.rb
291
201
  - lib/treat/retrievers.rb
292
- - lib/treat/sugar.rb
293
202
  - lib/treat/tree.rb
294
- - lib/treat/viewable.rb
295
- - lib/treat/visitable.rb
296
203
  - lib/treat.rb
297
- - test/profile.rb
298
- - test/tc_entity.rb
299
- - test/tc_extractors.rb
300
- - test/tc_formatters.rb
301
- - test/tc_inflectors.rb
302
- - test/tc_lexicalizers.rb
303
- - test/tc_processors.rb
304
- - test/tc_resources.rb
305
- - test/tc_treat.rb
306
- - test/tc_tree.rb
307
- - test/tests.rb
308
- - test/texts/english/half_cocked_basel.txt
309
- - test/texts/english/hose_and_dry.doc
310
- - test/texts/english/hungarys_troubles.abw
311
- - test/texts/english/long.html
312
- - test/texts/english/long.txt
313
- - test/texts/english/medium.txt
314
- - test/texts/english/republican_nomination.pdf
315
- - test/texts/english/saving_the_euro.odt
316
- - test/texts/english/short.txt
317
- - test/texts/english/zero_sum.html
318
- - test/texts.rb
204
+ - spec/collection.rb
205
+ - spec/document.rb
206
+ - spec/entity.rb
207
+ - spec/languages.rb
208
+ - spec/phrase.rb
209
+ - spec/samples/mathematicians/archimedes.abw
210
+ - spec/samples/mathematicians/euler.html
211
+ - spec/samples/mathematicians/gauss.pdf
212
+ - spec/samples/mathematicians/leibniz.txt
213
+ - spec/samples/mathematicians/newton.doc
214
+ - spec/sandbox.rb
215
+ - spec/token.rb
216
+ - spec/treat.rb
217
+ - spec/tree.rb
218
+ - spec/word.rb
219
+ - spec/zone.rb
319
220
  - tmp/INFO
320
- - README
321
- - TODO
221
+ - files/INFO
222
+ - README.md
322
223
  - LICENSE
323
- - INSTALL
324
224
  homepage: https://github.com/louismullie/treat
325
225
  licenses: []
326
226
  post_install_message:
@@ -341,9 +241,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
341
241
  version: '0'
342
242
  requirements: []
343
243
  rubyforge_project:
344
- rubygems_version: 1.8.15
244
+ rubygems_version: 1.8.21
345
245
  signing_key:
346
246
  specification_version: 3
347
- summary: Text retrieval, extraction and annotation toolkit
247
+ summary: A text retrieval, extraction and annotation toolkit for Ruby.
348
248
  test_files: []
349
- has_rdoc: