treat 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. data/LICENSE +4 -4
  2. data/TODO +21 -54
  3. data/lib/economist/half_cocked_basel.txt +16 -0
  4. data/lib/economist/hose_and_dry.doc +0 -0
  5. data/lib/economist/hungarys_troubles.abw +70 -0
  6. data/lib/economist/republican_nomination.pdf +0 -0
  7. data/lib/economist/saving_the_euro.odt +0 -0
  8. data/lib/economist/to_infinity_and_beyond.txt +15 -0
  9. data/lib/economist/zero_sum.html +91 -0
  10. data/lib/treat.rb +58 -72
  11. data/lib/treat/buildable.rb +59 -15
  12. data/lib/treat/categories.rb +26 -14
  13. data/lib/treat/category.rb +2 -2
  14. data/lib/treat/delegatable.rb +65 -48
  15. data/lib/treat/doable.rb +44 -0
  16. data/lib/treat/entities.rb +34 -14
  17. data/lib/treat/entities/collection.rb +2 -0
  18. data/lib/treat/entities/document.rb +3 -2
  19. data/lib/treat/entities/entity.rb +105 -90
  20. data/lib/treat/entities/phrases.rb +17 -0
  21. data/lib/treat/entities/tokens.rb +28 -13
  22. data/lib/treat/entities/zones.rb +20 -0
  23. data/lib/treat/extractors.rb +49 -11
  24. data/lib/treat/extractors/coreferences/stanford.rb +68 -0
  25. data/lib/treat/extractors/date/chronic.rb +32 -0
  26. data/lib/treat/extractors/date/ruby.rb +25 -0
  27. data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
  28. data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
  29. data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
  30. data/lib/treat/extractors/language/what_language.rb +49 -0
  31. data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
  32. data/lib/treat/extractors/roles/naive.rb +73 -0
  33. data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
  34. data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
  35. data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
  36. data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
  37. data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
  38. data/lib/treat/extractors/time/nickel.rb +30 -12
  39. data/lib/treat/extractors/topic_words/lda.rb +9 -9
  40. data/lib/treat/extractors/topics/reuters.rb +14 -15
  41. data/lib/treat/extractors/topics/reuters/region.xml +1 -0
  42. data/lib/treat/features.rb +7 -0
  43. data/lib/treat/formatters/readers/abw.rb +6 -1
  44. data/lib/treat/formatters/readers/autoselect.rb +5 -6
  45. data/lib/treat/formatters/readers/doc.rb +3 -1
  46. data/lib/treat/formatters/readers/html.rb +1 -1
  47. data/lib/treat/formatters/readers/image.rb +43 -0
  48. data/lib/treat/formatters/readers/odt.rb +1 -2
  49. data/lib/treat/formatters/readers/pdf.rb +9 -1
  50. data/lib/treat/formatters/readers/xml.rb +40 -0
  51. data/lib/treat/formatters/serializers/xml.rb +50 -14
  52. data/lib/treat/formatters/serializers/yaml.rb +7 -2
  53. data/lib/treat/formatters/unserializers/xml.rb +33 -7
  54. data/lib/treat/formatters/visualizers/dot.rb +90 -20
  55. data/lib/treat/formatters/visualizers/short_value.rb +2 -2
  56. data/lib/treat/formatters/visualizers/standoff.rb +2 -2
  57. data/lib/treat/formatters/visualizers/tree.rb +1 -1
  58. data/lib/treat/formatters/visualizers/txt.rb +13 -4
  59. data/lib/treat/group.rb +16 -10
  60. data/lib/treat/helpers/linguistics_loader.rb +18 -0
  61. data/lib/treat/inflectors.rb +10 -0
  62. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  63. data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
  64. data/lib/treat/inflectors/declensions/english.rb +319 -0
  65. data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
  66. data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
  67. data/lib/treat/install.rb +59 -0
  68. data/lib/treat/kernel.rb +18 -8
  69. data/lib/treat/languages.rb +18 -11
  70. data/lib/treat/languages/arabic.rb +4 -2
  71. data/lib/treat/languages/chinese.rb +6 -2
  72. data/lib/treat/languages/dutch.rb +16 -0
  73. data/lib/treat/languages/english.rb +47 -19
  74. data/lib/treat/languages/french.rb +8 -5
  75. data/lib/treat/languages/german.rb +9 -6
  76. data/lib/treat/languages/greek.rb +16 -0
  77. data/lib/treat/languages/italian.rb +6 -3
  78. data/lib/treat/languages/polish.rb +16 -0
  79. data/lib/treat/languages/portuguese.rb +16 -0
  80. data/lib/treat/languages/russian.rb +16 -0
  81. data/lib/treat/languages/spanish.rb +16 -0
  82. data/lib/treat/languages/swedish.rb +16 -0
  83. data/lib/treat/languages/tags.rb +377 -0
  84. data/lib/treat/lexicalizers.rb +34 -23
  85. data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
  86. data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
  87. data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
  88. data/lib/treat/lexicalizers/tag/brill.rb +35 -40
  89. data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
  90. data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
  91. data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
  92. data/lib/treat/processors.rb +8 -8
  93. data/lib/treat/processors/chunkers/txt.rb +4 -4
  94. data/lib/treat/processors/parsers/enju.rb +114 -99
  95. data/lib/treat/processors/parsers/stanford.rb +109 -41
  96. data/lib/treat/processors/segmenters/punkt.rb +17 -18
  97. data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
  98. data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
  99. data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
  100. data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
  101. data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
  102. data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
  103. data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
  104. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
  105. data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
  106. data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
  107. data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
  108. data/lib/treat/processors/segmenters/stanford.rb +38 -37
  109. data/lib/treat/processors/segmenters/tactful.rb +5 -4
  110. data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
  111. data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
  112. data/lib/treat/processors/tokenizers/perl.rb +2 -2
  113. data/lib/treat/processors/tokenizers/punkt.rb +6 -2
  114. data/lib/treat/processors/tokenizers/stanford.rb +25 -24
  115. data/lib/treat/processors/tokenizers/tactful.rb +1 -2
  116. data/lib/treat/proxies.rb +2 -35
  117. data/lib/treat/registrable.rb +17 -22
  118. data/lib/treat/sugar.rb +11 -11
  119. data/lib/treat/tree.rb +27 -17
  120. data/lib/treat/viewable.rb +29 -0
  121. data/lib/treat/visitable.rb +1 -1
  122. data/test/tc_entity.rb +56 -49
  123. data/test/tc_extractors.rb +41 -18
  124. data/test/tc_formatters.rb +7 -8
  125. data/test/tc_inflectors.rb +19 -24
  126. data/test/tc_lexicalizers.rb +12 -19
  127. data/test/tc_processors.rb +26 -12
  128. data/test/tc_resources.rb +2 -7
  129. data/test/tc_treat.rb +20 -22
  130. data/test/tc_tree.rb +4 -4
  131. data/test/tests.rb +3 -5
  132. data/test/texts.rb +13 -14
  133. data/tmp/INFO +1 -0
  134. metadata +78 -158
  135. data/bin/INFO +0 -1
  136. data/examples/benchmark.rb +0 -81
  137. data/examples/keywords.rb +0 -148
  138. data/lib/treat/detectors.rb +0 -31
  139. data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
  140. data/lib/treat/detectors/format/file.rb +0 -36
  141. data/lib/treat/detectors/language/what_language.rb +0 -29
  142. data/lib/treat/entities/constituents.rb +0 -15
  143. data/lib/treat/entities/sentence.rb +0 -8
  144. data/lib/treat/extractors/named_entity/abner.rb +0 -20
  145. data/lib/treat/extractors/named_entity/stanford.rb +0 -174
  146. data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
  147. data/lib/treat/extractors/time/chronic.rb +0 -20
  148. data/lib/treat/extractors/time/native.rb +0 -18
  149. data/lib/treat/formatters/readers/gocr.rb +0 -26
  150. data/lib/treat/formatters/readers/ocropus.rb +0 -31
  151. data/lib/treat/formatters/visualizers/html.rb +0 -13
  152. data/lib/treat/formatters/visualizers/inspect.rb +0 -20
  153. data/lib/treat/inflectors/declensions/en.rb +0 -18
  154. data/lib/treat/languages/categories.rb +0 -5
  155. data/lib/treat/languages/english/categories.rb +0 -23
  156. data/lib/treat/languages/english/tags.rb +0 -352
  157. data/lib/treat/languages/xinhua.rb +0 -12
  158. data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
  159. data/lib/treat/string.rb +0 -5
  160. data/test/tc_detectors.rb +0 -26
@@ -3,8 +3,8 @@ module Treat
3
3
  class TestLanguages < Test::Unit::TestCase
4
4
 
5
5
  def test_languages
6
- assert_equal :eng, Treat::Languages.find(:english, 2)
7
- assert_equal :en, Treat::Languages.find(:english, 1)
6
+ assert_equal :eng, Treat::Languages.code(:english, 2)
7
+ assert_equal :en, Treat::Languages.code(:english, 1)
8
8
  assert_equal :english, Treat::Languages.describe(:eng)
9
9
  assert_equal :english, Treat::Languages.describe(:en)
10
10
  end
@@ -17,11 +17,6 @@ module Treat
17
17
 
18
18
  end
19
19
 
20
- def test_edges
21
-
22
- end
23
-
24
20
  end
25
-
26
21
  end
27
22
  end
@@ -1,10 +1,10 @@
1
1
  module Treat
2
2
  module Tests
3
3
  class TestTreat < Test::Unit::TestCase
4
-
4
+
5
5
  def test_edulcoration
6
- Treat.edulcorate
7
- assert_equal true, Treat.edulcorated?
6
+ Treat.sweeten!
7
+ assert_equal true, Treat.sweetened?
8
8
  Treat::Entities.list.each do |klass|
9
9
  next if klass == :symbol
10
10
  assert_nothing_raised do
@@ -16,9 +16,9 @@ module Treat
16
16
  raise
17
17
  end
18
18
  end
19
- end
20
- Treat.unedulcorate
21
- assert_equal false, Treat.edulcorated?
19
+ end
20
+ Treat.unsweeten!
21
+ assert_equal false, Treat.sweetened?
22
22
  Treat::Entities.list.each do |klass|
23
23
  next if klass == :symbol
24
24
  assert_raise(NoMethodError) do
@@ -26,37 +26,35 @@ module Treat
26
26
  end
27
27
  end
28
28
  end
29
-
29
+
30
30
  def test_modules_loaded?
31
31
  ['exception',
32
- 'languages',
33
- 'entities',
34
- 'feature',
35
- 'category',
36
- 'group',
37
- 'detectors',
38
- 'formatters',
39
- 'processors',
40
- 'lexicalizers',
41
- 'extractors',
42
- 'inflectors',
32
+ 'languages',
33
+ 'entities',
34
+ 'feature',
35
+ 'category',
36
+ 'group',
37
+ 'formatters',
38
+ 'processors',
39
+ 'lexicalizers',
40
+ 'extractors',
41
+ 'inflectors',
43
42
  'proxies'].each do |klass|
44
43
  assert_nothing_raised do
45
44
  Treat.const_get klass.capitalize
46
45
  end
47
46
  end
48
47
  end
49
-
48
+
50
49
  def test_paths
51
50
  assert_not_nil Treat.lib
52
- assert_not_nil Treat.bin
53
51
  assert_not_nil Treat.test
54
52
  end
55
-
53
+
56
54
  def test_file_permissions
57
55
  assert_equal true, File.writable?(Treat.lib + '/../tmp')
58
56
  end
59
-
57
+
60
58
  end
61
59
  end
62
60
  end
@@ -8,7 +8,7 @@ module Treat
8
8
  @leaf = Treat::Tree::Node.new('leaf node', 'leaf')
9
9
  @root << @branch << @leaf
10
10
  @root << @sibling
11
- @leaf.associate(@sibling, 'some dependency')
11
+ @leaf.link(@sibling, 'some dependency')
12
12
  end
13
13
  def test_branching
14
14
  assert_equal 2, @root.children.size
@@ -50,9 +50,9 @@ module Treat
50
50
  assert_equal true, @branch.has_parent?
51
51
  assert_equal true, @leaf.has_parent?
52
52
 
53
- assert_equal false, @root.has_edges?
54
- assert_equal false, @branch.has_edges?
55
- assert_equal true, @leaf.has_edges?
53
+ assert_equal false, @root.has_dependencies?
54
+ assert_equal false, @branch.has_dependencies?
55
+ assert_equal true, @leaf.has_dependencies?
56
56
 
57
57
  end
58
58
  end
@@ -4,19 +4,17 @@ $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
4
4
 
5
5
  require 'treat'
6
6
 
7
- # $LOAD_PATH << '/ruby/treat/test' # Remove for release
8
- # Treat.bin = '/ruby/nat/bin' # Remove for release
7
+ #$LOAD_PATH << '/ruby/gems/treat/test' # Remove for release
9
8
 
10
9
  require 'texts'
11
10
 
11
+ # This is roughly in order of dependence.
12
12
  require 'tc_treat'
13
13
  require 'tc_tree'
14
14
  require 'tc_entity'
15
15
  require 'tc_resources'
16
-
17
- require 'tc_detectors'
18
16
  require 'tc_formatters'
19
17
  require 'tc_inflectors'
20
18
  require 'tc_lexicalizers'
21
19
  require 'tc_processors'
22
- require 'tc_extractors'
20
+ #require 'tc_extractors'
@@ -1,20 +1,19 @@
1
1
  module Treat
2
2
  module Tests
3
3
 
4
- EnglishHtmlDoc = Treat::Entities::Document("#{Treat.test}/texts/english/long.html")
5
- EnglishLongDoc = Treat::Entities::Document("#{Treat.test}/texts/english/long.txt")
6
- EnglishMediumDoc = Treat::Entities::Document("#{Treat.test}/texts/english/medium.txt")
7
- EnglishShortDoc = Treat::Entities::Document("#{Treat.test}/texts/english/short.txt")
8
-
9
- EnglishTime = Treat::Entities::Phrase('5 PM')
10
- EnglishDate = Treat::Entities::Phrase('this tuesday')
11
-
12
- EnglishSentence = Treat::Entities::Sentence('The quick brown fox jumped over the lazy dog.')
13
-
14
- EnglishVerb = Treat::Entities::Word('run'); EnglishVerb.set :category, :verb
15
- EnglishWord = Treat::Entities::Word('running')
16
- EnglishNoun = Treat::Entities::Word('captain')
17
- Number = Treat::Entities::Number(20)
4
+ module English
5
+ Collection = Treat::Entities::Collection "#{Treat.test}/texts/english"
6
+ LongDoc = Treat::Entities::Document "#{Treat.test}/texts/english/long.txt"
7
+ MediumDoc = Treat::Entities::Document "#{Treat.test}/texts/english/medium.txt"
8
+ ShortDoc = Treat::Entities::Document "#{Treat.test}/texts/english/short.txt"
9
+ Time = Treat::Entities::Phrase 'every Tuesday at 3:00'
10
+ Date = Treat::Entities::Phrase '2011/02/01'
11
+ Sentence = Treat::Entities::Sentence 'The quick brown fox jumped over the lazy dog.'
12
+ Verb = Treat::Entities::Word 'run'
13
+ Word = Treat::Entities::Word 'running'
14
+ Noun = Treat::Entities::Word 'captain'
15
+ Number = Treat::Entities::Number 20
16
+ end
18
17
 
19
18
  end
20
19
  end
@@ -0,0 +1 @@
1
+ This is a folder for temporary files created by Treat.
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: treat
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4
4
+ version: 0.2.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-01-26 00:00:00.000000000 Z
12
+ date: 2012-02-06 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rjb
16
- requirement: &70243259830940 !ruby/object:Gem::Requirement
16
+ requirement: &70310096172480 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,21 +21,10 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *70243259830940
24
+ version_requirements: *70310096172480
25
25
  - !ruby/object:Gem::Dependency
26
- name: nokogiri
27
- requirement: &70243259829420 !ruby/object:Gem::Requirement
28
- none: false
29
- requirements:
30
- - - ! '>='
31
- - !ruby/object:Gem::Version
32
- version: '0'
33
- type: :runtime
34
- prerelease: false
35
- version_requirements: *70243259829420
36
- - !ruby/object:Gem::Dependency
37
- name: chronic
38
- requirement: &70243259828180 !ruby/object:Gem::Requirement
26
+ name: zip
27
+ requirement: &70310096171240 !ruby/object:Gem::Requirement
39
28
  none: false
40
29
  requirements:
41
30
  - - ! '>='
@@ -43,10 +32,10 @@ dependencies:
43
32
  version: '0'
44
33
  type: :runtime
45
34
  prerelease: false
46
- version_requirements: *70243259828180
35
+ version_requirements: *70310096171240
47
36
  - !ruby/object:Gem::Dependency
48
37
  name: hpricot
49
- requirement: &70243259826580 !ruby/object:Gem::Requirement
38
+ requirement: &70310096170020 !ruby/object:Gem::Requirement
50
39
  none: false
51
40
  requirements:
52
41
  - - ! '>='
@@ -54,10 +43,10 @@ dependencies:
54
43
  version: '0'
55
44
  type: :runtime
56
45
  prerelease: false
57
- version_requirements: *70243259826580
46
+ version_requirements: *70310096170020
58
47
  - !ruby/object:Gem::Dependency
59
- name: psych
60
- requirement: &70243259824560 !ruby/object:Gem::Requirement
48
+ name: nokogiri
49
+ requirement: &70310096168860 !ruby/object:Gem::Requirement
61
50
  none: false
62
51
  requirements:
63
52
  - - ! '>='
@@ -65,10 +54,10 @@ dependencies:
65
54
  version: '0'
66
55
  type: :runtime
67
56
  prerelease: false
68
- version_requirements: *70243259824560
57
+ version_requirements: *70310096168860
69
58
  - !ruby/object:Gem::Dependency
70
- name: rchardet19
71
- requirement: &70243259839300 !ruby/object:Gem::Requirement
59
+ name: psych
60
+ requirement: &70310096166960 !ruby/object:Gem::Requirement
72
61
  none: false
73
62
  requirements:
74
63
  - - ! '>='
@@ -76,43 +65,10 @@ dependencies:
76
65
  version: '0'
77
66
  type: :runtime
78
67
  prerelease: false
79
- version_requirements: *70243259839300
68
+ version_requirements: *70310096166960
80
69
  - !ruby/object:Gem::Dependency
81
70
  name: whatlanguage
82
- requirement: &70243259837640 !ruby/object:Gem::Requirement
83
- none: false
84
- requirements:
85
- - - ! '>='
86
- - !ruby/object:Gem::Version
87
- version: '0'
88
- type: :runtime
89
- prerelease: false
90
- version_requirements: *70243259837640
91
- - !ruby/object:Gem::Dependency
92
- name: wordnet
93
- requirement: &70243259835240 !ruby/object:Gem::Requirement
94
- none: false
95
- requirements:
96
- - - ! '>='
97
- - !ruby/object:Gem::Version
98
- version: '0'
99
- type: :runtime
100
- prerelease: false
101
- version_requirements: *70243259835240
102
- - !ruby/object:Gem::Dependency
103
- name: rbtagger
104
- requirement: &70243259832320 !ruby/object:Gem::Requirement
105
- none: false
106
- requirements:
107
- - - ! '>='
108
- - !ruby/object:Gem::Version
109
- version: '0'
110
- type: :runtime
111
- prerelease: false
112
- version_requirements: *70243259832320
113
- - !ruby/object:Gem::Dependency
114
- name: engtagger
115
- requirement: &70243259839180 !ruby/object:Gem::Requirement
71
+ requirement: &70310096163460 !ruby/object:Gem::Requirement
116
72
  none: false
117
73
  requirements:
118
74
  - - ! '>='
@@ -120,54 +76,10 @@ dependencies:
120
76
  version: '0'
121
77
  type: :runtime
122
78
  prerelease: false
123
- version_requirements: *70243259839180
124
- - !ruby/object:Gem::Dependency
125
- name: punkt-segmenter
126
- requirement: &70243259836880 !ruby/object:Gem::Requirement
127
- none: false
128
- requirements:
129
- - - ! '>='
130
- - !ruby/object:Gem::Version
131
- version: '0'
132
- type: :runtime
133
- prerelease: false
134
- version_requirements: *70243259836880
135
- - !ruby/object:Gem::Dependency
136
- name: tokenizer
137
- requirement: &70243259834100 !ruby/object:Gem::Requirement
138
- none: false
139
- requirements:
140
- - - ! '>='
141
- - !ruby/object:Gem::Version
142
- version: '0'
143
- type: :runtime
144
- prerelease: false
145
- version_requirements: *70243259834100
146
- - !ruby/object:Gem::Dependency
147
- name: tactful_tokenizer
148
- requirement: &70243259830340 !ruby/object:Gem::Requirement
149
- none: false
150
- requirements:
151
- - - ! '>='
152
- - !ruby/object:Gem::Version
153
- version: '0'
154
- type: :runtime
155
- prerelease: false
156
- version_requirements: *70243259830340
157
- - !ruby/object:Gem::Dependency
158
- name: english
159
- requirement: &70243259828860 !ruby/object:Gem::Requirement
160
- none: false
161
- requirements:
162
- - - ! '>='
163
- - !ruby/object:Gem::Version
164
- version: '0'
165
- type: :runtime
166
- prerelease: false
167
- version_requirements: *70243259828860
79
+ version_requirements: *70310096163460
168
80
  - !ruby/object:Gem::Dependency
169
81
  name: linguistics
170
- requirement: &70243259827140 !ruby/object:Gem::Requirement
82
+ requirement: &70310096159580 !ruby/object:Gem::Requirement
171
83
  none: false
172
84
  requirements:
173
85
  - - ! '>='
@@ -175,10 +87,10 @@ dependencies:
175
87
  version: '0'
176
88
  type: :runtime
177
89
  prerelease: false
178
- version_requirements: *70243259827140
90
+ version_requirements: *70310096159580
179
91
  - !ruby/object:Gem::Dependency
180
- name: ruby-stemmer
181
- requirement: &70243259824840 !ruby/object:Gem::Requirement
92
+ name: stanford-core-nlp
93
+ requirement: &70310096158080 !ruby/object:Gem::Requirement
182
94
  none: false
183
95
  requirements:
184
96
  - - ! '>='
@@ -186,10 +98,10 @@ dependencies:
186
98
  version: '0'
187
99
  type: :runtime
188
100
  prerelease: false
189
- version_requirements: *70243259824840
101
+ version_requirements: *70310096158080
190
102
  - !ruby/object:Gem::Dependency
191
- name: uea-stemmer
192
- requirement: &70243259812140 !ruby/object:Gem::Requirement
103
+ name: punkt-segmenter
104
+ requirement: &70310096156900 !ruby/object:Gem::Requirement
193
105
  none: false
194
106
  requirements:
195
107
  - - ! '>='
@@ -197,10 +109,10 @@ dependencies:
197
109
  version: '0'
198
110
  type: :runtime
199
111
  prerelease: false
200
- version_requirements: *70243259812140
112
+ version_requirements: *70310096156900
201
113
  - !ruby/object:Gem::Dependency
202
114
  name: lda-ruby
203
- requirement: &70243259810000 !ruby/object:Gem::Requirement
115
+ requirement: &70310096155740 !ruby/object:Gem::Requirement
204
116
  none: false
205
117
  requirements:
206
118
  - - ! '>='
@@ -208,10 +120,10 @@ dependencies:
208
120
  version: '0'
209
121
  type: :runtime
210
122
  prerelease: false
211
- version_requirements: *70243259810000
123
+ version_requirements: *70310096155740
212
124
  - !ruby/object:Gem::Dependency
213
- name: nickel
214
- requirement: &70243259808300 !ruby/object:Gem::Requirement
125
+ name: chronic
126
+ requirement: &70310096154280 !ruby/object:Gem::Requirement
215
127
  none: false
216
128
  requirements:
217
129
  - - ! '>='
@@ -219,18 +131,7 @@ dependencies:
219
131
  version: '0'
220
132
  type: :runtime
221
133
  prerelease: false
222
- version_requirements: *70243259808300
223
- - !ruby/object:Gem::Dependency
224
- name: unprof
225
- requirement: &70243259805800 !ruby/object:Gem::Requirement
226
- none: false
227
- requirements:
228
- - - ! '>='
229
- - !ruby/object:Gem::Version
230
- version: '0'
231
- type: :development
232
- prerelease: false
233
- version_requirements: *70243259805800
134
+ version_requirements: *70310096154280
234
135
  description: ! ' Treat is a toolkit for text retrieval, information extraction and
235
136
  natural language processing. '
236
137
  email:
@@ -239,35 +140,40 @@ executables: []
239
140
  extensions: []
240
141
  extra_rdoc_files: []
241
142
  files:
143
+ - lib/economist/half_cocked_basel.txt
144
+ - lib/economist/hose_and_dry.doc
145
+ - lib/economist/hungarys_troubles.abw
146
+ - lib/economist/republican_nomination.pdf
147
+ - lib/economist/saving_the_euro.odt
148
+ - lib/economist/to_infinity_and_beyond.txt
149
+ - lib/economist/zero_sum.html
242
150
  - lib/treat/buildable.rb
243
151
  - lib/treat/categories.rb
244
152
  - lib/treat/category.rb
245
153
  - lib/treat/delegatable.rb
246
- - lib/treat/detectors/encoding/r_chardet19.rb
247
- - lib/treat/detectors/format/file.rb
248
- - lib/treat/detectors/language/language_detector.rb
249
- - lib/treat/detectors/language/what_language.rb
250
- - lib/treat/detectors.rb
154
+ - lib/treat/doable.rb
251
155
  - lib/treat/entities/collection.rb
252
- - lib/treat/entities/constituents.rb
253
156
  - lib/treat/entities/document.rb
254
157
  - lib/treat/entities/entity.rb
255
- - lib/treat/entities/sentence.rb
158
+ - lib/treat/entities/phrases.rb
256
159
  - lib/treat/entities/tokens.rb
257
160
  - lib/treat/entities/zones.rb
258
161
  - lib/treat/entities.rb
259
162
  - lib/treat/exception.rb
260
- - lib/treat/extractors/keywords/topics_frequency.rb
261
- - lib/treat/extractors/named_entity/abner.rb
262
- - lib/treat/extractors/named_entity/stanford.rb
163
+ - lib/treat/extractors/coreferences/stanford.rb
164
+ - lib/treat/extractors/date/chronic.rb
165
+ - lib/treat/extractors/date/ruby.rb
166
+ - lib/treat/extractors/keywords/tf_idf.rb
167
+ - lib/treat/extractors/keywords/topics_tf_idf.rb
168
+ - lib/treat/extractors/language/language_extractor.rb
169
+ - lib/treat/extractors/language/what_language.rb
170
+ - lib/treat/extractors/named_entity_tag/stanford.rb
171
+ - lib/treat/extractors/roles/naive.rb
263
172
  - lib/treat/extractors/statistics/frequency_in.rb
264
- - lib/treat/extractors/statistics/frequency_of.rb
265
- - lib/treat/extractors/statistics/position_in_parent.rb
173
+ - lib/treat/extractors/statistics/position_in.rb
266
174
  - lib/treat/extractors/statistics/tf_idf.rb
267
175
  - lib/treat/extractors/statistics/transition_matrix.rb
268
176
  - lib/treat/extractors/statistics/transition_probability.rb
269
- - lib/treat/extractors/time/chronic.rb
270
- - lib/treat/extractors/time/native.rb
271
177
  - lib/treat/extractors/time/nickel.rb
272
178
  - lib/treat/extractors/topic_words/lda/data.dat
273
179
  - lib/treat/extractors/topic_words/lda/wiki.yml
@@ -278,63 +184,79 @@ files:
278
184
  - lib/treat/extractors/topics/reuters.rb
279
185
  - lib/treat/extractors.rb
280
186
  - lib/treat/feature.rb
187
+ - lib/treat/features.rb
281
188
  - lib/treat/formatters/readers/abw.rb
282
189
  - lib/treat/formatters/readers/autoselect.rb
283
190
  - lib/treat/formatters/readers/doc.rb
284
- - lib/treat/formatters/readers/gocr.rb
285
191
  - lib/treat/formatters/readers/html.rb
286
- - lib/treat/formatters/readers/ocropus.rb
192
+ - lib/treat/formatters/readers/image.rb
287
193
  - lib/treat/formatters/readers/odt.rb
288
194
  - lib/treat/formatters/readers/pdf.rb
289
195
  - lib/treat/formatters/readers/txt.rb
196
+ - lib/treat/formatters/readers/xml.rb
290
197
  - lib/treat/formatters/serializers/xml.rb
291
198
  - lib/treat/formatters/serializers/yaml.rb
292
199
  - lib/treat/formatters/unserializers/autoselect.rb
293
200
  - lib/treat/formatters/unserializers/xml.rb
294
201
  - lib/treat/formatters/unserializers/yaml.rb
295
202
  - lib/treat/formatters/visualizers/dot.rb
296
- - lib/treat/formatters/visualizers/html.rb
297
- - lib/treat/formatters/visualizers/inspect.rb
298
203
  - lib/treat/formatters/visualizers/short_value.rb
299
204
  - lib/treat/formatters/visualizers/standoff.rb
300
205
  - lib/treat/formatters/visualizers/tree.rb
301
206
  - lib/treat/formatters/visualizers/txt.rb
302
207
  - lib/treat/formatters.rb
303
208
  - lib/treat/group.rb
209
+ - lib/treat/helpers/linguistics_loader.rb
304
210
  - lib/treat/inflectors/cardinal_words/linguistics.rb
305
211
  - lib/treat/inflectors/conjugations/linguistics.rb
306
- - lib/treat/inflectors/declensions/en.rb
212
+ - lib/treat/inflectors/declensions/english.rb
307
213
  - lib/treat/inflectors/declensions/linguistics.rb
308
214
  - lib/treat/inflectors/ordinal_words/linguistics.rb
309
215
  - lib/treat/inflectors/stem/porter.rb
310
216
  - lib/treat/inflectors/stem/porter_c.rb
311
217
  - lib/treat/inflectors/stem/uea.rb
312
218
  - lib/treat/inflectors.rb
219
+ - lib/treat/install.rb
313
220
  - lib/treat/kernel.rb
314
221
  - lib/treat/languages/arabic.rb
315
- - lib/treat/languages/categories.rb
316
222
  - lib/treat/languages/chinese.rb
317
- - lib/treat/languages/english/categories.rb
318
- - lib/treat/languages/english/tags.rb
223
+ - lib/treat/languages/dutch.rb
319
224
  - lib/treat/languages/english.rb
320
225
  - lib/treat/languages/french.rb
321
226
  - lib/treat/languages/german.rb
227
+ - lib/treat/languages/greek.rb
322
228
  - lib/treat/languages/italian.rb
323
229
  - lib/treat/languages/list.txt
324
- - lib/treat/languages/xinhua.rb
230
+ - lib/treat/languages/polish.rb
231
+ - lib/treat/languages/portuguese.rb
232
+ - lib/treat/languages/russian.rb
233
+ - lib/treat/languages/spanish.rb
234
+ - lib/treat/languages/swedish.rb
235
+ - lib/treat/languages/tags.rb
325
236
  - lib/treat/languages.rb
326
237
  - lib/treat/lexicalizers/category/from_tag.rb
327
238
  - lib/treat/lexicalizers/linkages/naive.rb
328
- - lib/treat/lexicalizers/synsets/rita_wn.rb
329
239
  - lib/treat/lexicalizers/synsets/wordnet.rb
330
240
  - lib/treat/lexicalizers/tag/brill.rb
331
241
  - lib/treat/lexicalizers/tag/lingua.rb
332
242
  - lib/treat/lexicalizers/tag/stanford.rb
243
+ - lib/treat/lexicalizers/tag/tagger.rb
333
244
  - lib/treat/lexicalizers.rb
334
245
  - lib/treat/object.rb
335
246
  - lib/treat/processors/chunkers/txt.rb
336
247
  - lib/treat/processors/parsers/enju.rb
337
248
  - lib/treat/processors/parsers/stanford.rb
249
+ - lib/treat/processors/segmenters/punkt/dutch.yaml
250
+ - lib/treat/processors/segmenters/punkt/english.yaml
251
+ - lib/treat/processors/segmenters/punkt/french.yaml
252
+ - lib/treat/processors/segmenters/punkt/german.yaml
253
+ - lib/treat/processors/segmenters/punkt/greek.yaml
254
+ - lib/treat/processors/segmenters/punkt/italian.yaml
255
+ - lib/treat/processors/segmenters/punkt/polish.yaml
256
+ - lib/treat/processors/segmenters/punkt/portuguese.yaml
257
+ - lib/treat/processors/segmenters/punkt/russian.yaml
258
+ - lib/treat/processors/segmenters/punkt/spanish.yaml
259
+ - lib/treat/processors/segmenters/punkt/swedish.yaml
338
260
  - lib/treat/processors/segmenters/punkt.rb
339
261
  - lib/treat/processors/segmenters/stanford.rb
340
262
  - lib/treat/processors/segmenters/tactful.rb
@@ -347,13 +269,12 @@ files:
347
269
  - lib/treat/processors.rb
348
270
  - lib/treat/proxies.rb
349
271
  - lib/treat/registrable.rb
350
- - lib/treat/string.rb
351
272
  - lib/treat/sugar.rb
352
273
  - lib/treat/tree.rb
274
+ - lib/treat/viewable.rb
353
275
  - lib/treat/visitable.rb
354
276
  - lib/treat.rb
355
277
  - test/profile.rb
356
- - test/tc_detectors.rb
357
278
  - test/tc_entity.rb
358
279
  - test/tc_extractors.rb
359
280
  - test/tc_formatters.rb
@@ -375,9 +296,7 @@ files:
375
296
  - test/texts/english/short.txt
376
297
  - test/texts/english/zero_sum.html
377
298
  - test/texts.rb
378
- - examples/benchmark.rb
379
- - examples/keywords.rb
380
- - bin/INFO
299
+ - tmp/INFO
381
300
  - README
382
301
  - TODO
383
302
  - LICENSE
@@ -407,3 +326,4 @@ signing_key:
407
326
  specification_version: 3
408
327
  summary: Text retrieval, extraction and annotation toolkit
409
328
  test_files: []
329
+ has_rdoc: