treat 0.1.4 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (160) hide show
  1. data/LICENSE +4 -4
  2. data/TODO +21 -54
  3. data/lib/economist/half_cocked_basel.txt +16 -0
  4. data/lib/economist/hose_and_dry.doc +0 -0
  5. data/lib/economist/hungarys_troubles.abw +70 -0
  6. data/lib/economist/republican_nomination.pdf +0 -0
  7. data/lib/economist/saving_the_euro.odt +0 -0
  8. data/lib/economist/to_infinity_and_beyond.txt +15 -0
  9. data/lib/economist/zero_sum.html +91 -0
  10. data/lib/treat.rb +58 -72
  11. data/lib/treat/buildable.rb +59 -15
  12. data/lib/treat/categories.rb +26 -14
  13. data/lib/treat/category.rb +2 -2
  14. data/lib/treat/delegatable.rb +65 -48
  15. data/lib/treat/doable.rb +44 -0
  16. data/lib/treat/entities.rb +34 -14
  17. data/lib/treat/entities/collection.rb +2 -0
  18. data/lib/treat/entities/document.rb +3 -2
  19. data/lib/treat/entities/entity.rb +105 -90
  20. data/lib/treat/entities/phrases.rb +17 -0
  21. data/lib/treat/entities/tokens.rb +28 -13
  22. data/lib/treat/entities/zones.rb +20 -0
  23. data/lib/treat/extractors.rb +49 -11
  24. data/lib/treat/extractors/coreferences/stanford.rb +68 -0
  25. data/lib/treat/extractors/date/chronic.rb +32 -0
  26. data/lib/treat/extractors/date/ruby.rb +25 -0
  27. data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
  28. data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
  29. data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
  30. data/lib/treat/extractors/language/what_language.rb +49 -0
  31. data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
  32. data/lib/treat/extractors/roles/naive.rb +73 -0
  33. data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
  34. data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
  35. data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
  36. data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
  37. data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
  38. data/lib/treat/extractors/time/nickel.rb +30 -12
  39. data/lib/treat/extractors/topic_words/lda.rb +9 -9
  40. data/lib/treat/extractors/topics/reuters.rb +14 -15
  41. data/lib/treat/extractors/topics/reuters/region.xml +1 -0
  42. data/lib/treat/features.rb +7 -0
  43. data/lib/treat/formatters/readers/abw.rb +6 -1
  44. data/lib/treat/formatters/readers/autoselect.rb +5 -6
  45. data/lib/treat/formatters/readers/doc.rb +3 -1
  46. data/lib/treat/formatters/readers/html.rb +1 -1
  47. data/lib/treat/formatters/readers/image.rb +43 -0
  48. data/lib/treat/formatters/readers/odt.rb +1 -2
  49. data/lib/treat/formatters/readers/pdf.rb +9 -1
  50. data/lib/treat/formatters/readers/xml.rb +40 -0
  51. data/lib/treat/formatters/serializers/xml.rb +50 -14
  52. data/lib/treat/formatters/serializers/yaml.rb +7 -2
  53. data/lib/treat/formatters/unserializers/xml.rb +33 -7
  54. data/lib/treat/formatters/visualizers/dot.rb +90 -20
  55. data/lib/treat/formatters/visualizers/short_value.rb +2 -2
  56. data/lib/treat/formatters/visualizers/standoff.rb +2 -2
  57. data/lib/treat/formatters/visualizers/tree.rb +1 -1
  58. data/lib/treat/formatters/visualizers/txt.rb +13 -4
  59. data/lib/treat/group.rb +16 -10
  60. data/lib/treat/helpers/linguistics_loader.rb +18 -0
  61. data/lib/treat/inflectors.rb +10 -0
  62. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  63. data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
  64. data/lib/treat/inflectors/declensions/english.rb +319 -0
  65. data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
  66. data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
  67. data/lib/treat/install.rb +59 -0
  68. data/lib/treat/kernel.rb +18 -8
  69. data/lib/treat/languages.rb +18 -11
  70. data/lib/treat/languages/arabic.rb +4 -2
  71. data/lib/treat/languages/chinese.rb +6 -2
  72. data/lib/treat/languages/dutch.rb +16 -0
  73. data/lib/treat/languages/english.rb +47 -19
  74. data/lib/treat/languages/french.rb +8 -5
  75. data/lib/treat/languages/german.rb +9 -6
  76. data/lib/treat/languages/greek.rb +16 -0
  77. data/lib/treat/languages/italian.rb +6 -3
  78. data/lib/treat/languages/polish.rb +16 -0
  79. data/lib/treat/languages/portuguese.rb +16 -0
  80. data/lib/treat/languages/russian.rb +16 -0
  81. data/lib/treat/languages/spanish.rb +16 -0
  82. data/lib/treat/languages/swedish.rb +16 -0
  83. data/lib/treat/languages/tags.rb +377 -0
  84. data/lib/treat/lexicalizers.rb +34 -23
  85. data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
  86. data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
  87. data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
  88. data/lib/treat/lexicalizers/tag/brill.rb +35 -40
  89. data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
  90. data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
  91. data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
  92. data/lib/treat/processors.rb +8 -8
  93. data/lib/treat/processors/chunkers/txt.rb +4 -4
  94. data/lib/treat/processors/parsers/enju.rb +114 -99
  95. data/lib/treat/processors/parsers/stanford.rb +109 -41
  96. data/lib/treat/processors/segmenters/punkt.rb +17 -18
  97. data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
  98. data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
  99. data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
  100. data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
  101. data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
  102. data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
  103. data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
  104. data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
  105. data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
  106. data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
  107. data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
  108. data/lib/treat/processors/segmenters/stanford.rb +38 -37
  109. data/lib/treat/processors/segmenters/tactful.rb +5 -4
  110. data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
  111. data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
  112. data/lib/treat/processors/tokenizers/perl.rb +2 -2
  113. data/lib/treat/processors/tokenizers/punkt.rb +6 -2
  114. data/lib/treat/processors/tokenizers/stanford.rb +25 -24
  115. data/lib/treat/processors/tokenizers/tactful.rb +1 -2
  116. data/lib/treat/proxies.rb +2 -35
  117. data/lib/treat/registrable.rb +17 -22
  118. data/lib/treat/sugar.rb +11 -11
  119. data/lib/treat/tree.rb +27 -17
  120. data/lib/treat/viewable.rb +29 -0
  121. data/lib/treat/visitable.rb +1 -1
  122. data/test/tc_entity.rb +56 -49
  123. data/test/tc_extractors.rb +41 -18
  124. data/test/tc_formatters.rb +7 -8
  125. data/test/tc_inflectors.rb +19 -24
  126. data/test/tc_lexicalizers.rb +12 -19
  127. data/test/tc_processors.rb +26 -12
  128. data/test/tc_resources.rb +2 -7
  129. data/test/tc_treat.rb +20 -22
  130. data/test/tc_tree.rb +4 -4
  131. data/test/tests.rb +3 -5
  132. data/test/texts.rb +13 -14
  133. data/tmp/INFO +1 -0
  134. metadata +78 -158
  135. data/bin/INFO +0 -1
  136. data/examples/benchmark.rb +0 -81
  137. data/examples/keywords.rb +0 -148
  138. data/lib/treat/detectors.rb +0 -31
  139. data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
  140. data/lib/treat/detectors/format/file.rb +0 -36
  141. data/lib/treat/detectors/language/what_language.rb +0 -29
  142. data/lib/treat/entities/constituents.rb +0 -15
  143. data/lib/treat/entities/sentence.rb +0 -8
  144. data/lib/treat/extractors/named_entity/abner.rb +0 -20
  145. data/lib/treat/extractors/named_entity/stanford.rb +0 -174
  146. data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
  147. data/lib/treat/extractors/time/chronic.rb +0 -20
  148. data/lib/treat/extractors/time/native.rb +0 -18
  149. data/lib/treat/formatters/readers/gocr.rb +0 -26
  150. data/lib/treat/formatters/readers/ocropus.rb +0 -31
  151. data/lib/treat/formatters/visualizers/html.rb +0 -13
  152. data/lib/treat/formatters/visualizers/inspect.rb +0 -20
  153. data/lib/treat/inflectors/declensions/en.rb +0 -18
  154. data/lib/treat/languages/categories.rb +0 -5
  155. data/lib/treat/languages/english/categories.rb +0 -23
  156. data/lib/treat/languages/english/tags.rb +0 -352
  157. data/lib/treat/languages/xinhua.rb +0 -12
  158. data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
  159. data/lib/treat/string.rb +0 -5
  160. data/test/tc_detectors.rb +0 -26
@@ -0,0 +1,29 @@
1
+ module Treat
2
+ module Viewable
3
+ # Return the entity's string value in plain text format.
4
+ def to_string; @value; end
5
+ # An alias for #to_string.
6
+ def to_s; visualize(:txt); end
7
+ alias :to_str :to_s
8
+ # Return a shortened value of the entity's string value using [...].
9
+ def short_value(ml = 6); visualize(:short_value, :max_length => ml); end
10
+ # Return an informative string representation of the entity.
11
+ def inspect
12
+ s = "#{cl(self.class)} (#{@id.to_s})"
13
+ if caller_method(2) == :inspect
14
+ @id.to_s
15
+ else
16
+ dependencies = []
17
+ @dependencies.each do |dependency|
18
+ dependencies << "#{dependency.target}#{dependency.type}"
19
+ end
20
+ s += " | #{short_value.inspect}" +
21
+ " | #{@features.inspect}" +
22
+ " | { #{dependencies.join(', ')} }"
23
+ end
24
+ s
25
+ end
26
+ # Print out an ASCII representation of the tree.
27
+ def print_tree; puts visualize(:tree); end
28
+ end
29
+ end
@@ -8,7 +8,7 @@ module Treat
8
8
  if group.type == :transformer
9
9
  if has_children?
10
10
  @children.each do |entity|
11
- if group.has_target?(entity.class)
11
+ if group.has_target?(entity.class) && entity.id != id
12
12
  entity.accept(group, klass, method, options)
13
13
  end
14
14
  end
@@ -2,17 +2,14 @@ module Treat
2
2
  module Tests
3
3
  class TestEntity < Test::Unit::TestCase
4
4
  def setup
5
- @text = Treat::Entities::Section.new
6
-
5
+ @section = Treat::Entities::Section.new
7
6
  @sentence = Treat::Entities::Sentence.new
8
-
9
- @noun_phrase = Treat::Entities::Phrase.new
10
- @noun_phrase.set :tag, 'NP'
11
- @verb_phrase = Treat::Entities::Phrase.new
12
- @verb_phrase.set :tag, 'VP'
13
- @adj_phrase = Treat::Entities::Phrase.new
14
- @adj_phrase.set :tag, 'ADJP'
15
-
7
+ @noun_cons = Treat::Entities::Phrase.new
8
+ @noun_cons.set :tag, 'NP'
9
+ @verb_cons = Treat::Entities::Phrase.new
10
+ @verb_cons.set :tag, 'VP'
11
+ @adj_cons = Treat::Entities::Phrase.new
12
+ @adj_cons.set :tag, 'ADJP'
16
13
  @det = Treat::Entities::Word.new('The')
17
14
  @det.set :category, :determiner
18
15
  @det.set :tag, 'DT'
@@ -34,77 +31,87 @@ module Treat
34
31
  @verb.set :tag, 'VBG'
35
32
  @verb.set :tag_set, :penn
36
33
  @dot = Treat::Entities::Punctuation.new('.')
37
-
38
- @text << @sentence << [@noun_phrase, @verb_phrase, @dot]
39
- @noun_phrase << [@det, @adj_phrase, @noun]
40
- @adj_phrase << @adj
41
- @verb_phrase << [@aux, @verb]
34
+ @section << @sentence << [@noun_cons, @verb_cons, @dot]
35
+ @noun_cons << [@det, @adj_cons, @noun]
36
+ @adj_cons << @adj
37
+ @verb_cons << [@aux, @verb]
42
38
  end
43
39
 
44
- def test_respond_to_missing
45
-
40
+ def test_viewable
41
+ s = 'Happiness is not an ideal of reason, but of imagination.'.tokenize
42
+ assert_nothing_raised do
43
+ # Return the string value of the sentence.
44
+ s.to_s
45
+ # Return a debug description of the sentence.
46
+ s.inspect
47
+ # Return a shortened version of the Sentence with [...]
48
+ s.short_value
49
+ end
46
50
  end
47
-
51
+
48
52
  def test_registrable
49
- assert_equal @text.token_registry, @verb.token_registry
50
- assert_equal @noun, @text.token_registry[:id][@noun.id]
51
- assert_equal [@noun], @text.token_registry[:value][@noun.value]
53
+ assert_equal @section.token_registry, @verb.token_registry
54
+ assert_equal @noun, @section.token_registry[:id][@noun.id]
55
+ assert_equal [@noun], @section.token_registry[:value][@noun.value]
52
56
  end
53
57
 
54
-
55
58
  def test_delegatable_visitable
56
- assert_raise(Treat::Exception) do
57
- @text.encoding(:nonexistent)
59
+ assert_raise(Treat::Exception) do
60
+ @section.encoding(:nonexistent)
58
61
  end
59
62
  assert_nothing_raised do
60
- @text.language
63
+ @section.language
61
64
  end
62
65
  end
63
-
66
+
64
67
  def test_type
65
- assert_equal :section, @text.type
68
+ assert_equal :section, @section.type
66
69
  end
67
-
70
+
68
71
  def test_printers
69
72
  assert_nothing_raised do
70
- @text.to_s
71
- @text.to_string
72
- @text.short_value
73
- @text.inspect
73
+ @section.to_s
74
+ @section.to_string
75
+ @section.short_value
76
+ @section.inspect
74
77
  end
75
78
  end
76
79
 
77
80
  def test_magic_methods
78
- assert_equal @sentence, @text.sentence
79
- assert_equal [@sentence], @text.sentences
80
- assert_equal 1, @text.sentence_count
81
-
82
- assert_equal [@det], @text.words_with_value('The')
83
- assert_equal [@verb], @text.words_with_tag('VBG')
84
-
85
- assert_equal @noun, @text.noun
86
- assert_equal [@aux, @verb], @text.verbs
87
- assert_equal 6, @text.token_count
88
-
89
- @text.each_sentence do |s|
81
+
82
+ assert_equal true, @sentence.is_sentence?
83
+ assert_equal true, @noun.is_noun?
84
+
85
+ assert_equal @sentence, @section.sentence
86
+ assert_equal [@sentence], @section.sentences
87
+ assert_equal 1, @section.sentence_count
88
+
89
+ assert_equal [@det], @section.words_with_value('The')
90
+ assert_equal [@verb], @section.words_with_tag('VBG')
91
+
92
+ assert_equal @noun, @section.noun
93
+ assert_equal [@aux, @verb], @section.verbs
94
+ assert_equal 6, @section.token_count
95
+
96
+ @section.each_sentence do |s|
90
97
  assert_equal @sentence, s
91
98
  end
92
- @text.each_noun do |n|
99
+ @section.each_noun do |n|
93
100
  assert_equal @noun, n
94
101
  end
95
- @text.each_with_value('The') do |x|
102
+ @section.each_with_value('The') do |x|
96
103
  assert_equal @det, x
97
104
  end
98
-
105
+
99
106
  assert_equal @sentence, @noun.parent_sentence
100
107
  end
101
108
 
102
109
  def test_features
103
110
  @verb.set :test, :test
104
111
  assert_equal :test, @verb.test
105
- assert_raise(Treat::Exception) { @verb.nonexistent }
112
+ assert_raise(Treat::Exception) { @verb.nonexistent }
106
113
  end
107
-
114
+
108
115
  end
109
116
  end
110
117
  end
@@ -1,34 +1,38 @@
1
+ # encoding: utf-8
1
2
  module Treat
2
3
  module Tests
3
4
  class TestExtractors < Test::Unit::TestCase
4
5
 
5
6
  def setup
6
- @time = Treat::Tests::EnglishTime
7
- @date = Treat::Tests::EnglishDate
8
- @doc = Treat::Tests::EnglishLongDoc
9
- @word = Treat::Tests::EnglishWord
7
+ @time = Treat::Tests::English::Time
8
+ @date = Treat::Tests::English::Date
9
+ @doc = Treat::Tests::English::LongDoc
10
+ @word = Treat::Tests::English::Word
11
+ @col = Treat::Tests::English::Collection
10
12
  end
11
13
 
12
14
  def test_time
13
- assert_nothing_raised { @date.time(:chronic) }
14
- assert_nothing_raised { @date.time(:native) }
15
- assert_nothing_raised { @date.time(:nickel) }
15
+ assert_nothing_raised { @time.time(:nickel) }
16
+ end
17
+
18
+ def test_date
19
+ assert_equal 2011, @date.date(:chronic).year
20
+ assert_equal 2011, @date.date(:ruby).year
16
21
  end
17
22
 
18
23
  def test_topic_words
19
- assert_nothing_raised { @doc.topic_words(:lda) }
24
+ assert_nothing_raised { @col.topic_words(:lda) }
20
25
  end
21
-
22
-
26
+
23
27
  def test_named_entity
24
- # assert_nothing_raised { @doc.named_entity(:stanford) }
25
- # assert_nothing_raised { @doc.named_entity(:abner) }
28
+ p = 'Angela Merkel and Nicolas Sarkozy were the first ones to board the p'
29
+ assert_nothing_raised { @doc.named_entity(:stanford) }
26
30
  end
27
31
 
28
32
  def test_keywords
29
- assert_nothing_raised do
30
- topics = @doc.topic_words(:lda)
31
- @doc.keywords(:topics_frequency, topic_words: topics)
33
+ assert_nothing_raised do
34
+ topics = @col.topic_words(:lda)
35
+ @doc.keywords(:topics_frequency, :topic_words => topics)
32
36
  end
33
37
  end
34
38
 
@@ -38,13 +42,32 @@ module Treat
38
42
 
39
43
  def test_statistics
40
44
  @doc.chunk.segment(:tactful).tokenize
41
-
42
- assert_nothing_raised { @doc.statistics(:frequency_of, value: 'the') }
43
- assert_nothing_raised { @word.statistics(:frequency_in) }
45
+ assert_equal 1, @word.frequency_in(:document)
46
+ assert_nothing_raised { @word.tf_idf ; puts @word.tf_idf }
44
47
  # assert_nothing_raised { @doc.statistics(:position_in) }
45
48
  # assert_nothing_raised { @doc.statistics(:transition_matrix) }
46
49
  # assert_nothing_raised { @doc.statistics(:transition_probability) }
47
50
  end
51
+
52
+ def test_language
53
+ assert_equal Treat.default_language, @doc.language
54
+ Treat.detect_language = true
55
+ assert_equal :eng, @doc.language
56
+
57
+ a = 'I want to know God\'s thoughts; the rest are details. - Albert Einstein'
58
+ b = 'El mundo de hoy no tiene sentido, así que ¿por qué debería pintar cuadros que lo tuvieran? - Pablo Picasso'
59
+ c = 'Un bon Allemand ne peut souffrir les Français, mais il boit volontiers les vins de France. - Goethe'
60
+ d = 'Wir haben die Kunst, damit wir nicht an der Wahrheit zugrunde gehen. - Friedrich Nietzsche'
61
+
62
+ assert_equal :eng, a.language
63
+ assert_equal :spa, b.language
64
+ assert_equal :fre, c.language
65
+ assert_equal :ger, d.language
66
+
67
+ # Reset defaults
68
+ Treat.detect_language = false
69
+ end
70
+
48
71
  end
49
72
  end
50
73
  end
@@ -3,25 +3,25 @@ module Treat
3
3
  class TestFormatters < Test::Unit::TestCase
4
4
 
5
5
  def setup
6
- @doc = Treat::Tests::EnglishShortDoc
7
- @html_doc = Treat::Tests::EnglishHtmlDoc
8
- @sentence = Treat::Tests::EnglishSentence
6
+ @doc = Treat::Tests::English::ShortDoc
7
+ @sentence = Treat::Tests::English::Sentence
9
8
  end
10
9
 
11
10
  def test_readers
12
- # How should we test this?
11
+ # This is done by loading a collection with all types of texts.
13
12
  end
14
13
 
15
-
16
14
  def test_serializers_and_unserializers
15
+ # Test roundtrip Ruby -> YAML -> Ruby -> YAML
17
16
  create_temp_file('yml') do |tmp|
18
- @doc.serialize(:yaml).save(tmp)
17
+ @doc.serialize(:yaml, :file => tmp)
19
18
  doc = Treat::Entities::Document(tmp)
20
19
  assert_equal File.read(tmp).length,
21
20
  doc.serialize(:yaml).length
22
21
  end
22
+ # Test roundtrip Ruby -> XML -> Ruby -> XML.
23
23
  create_temp_file('xml') do |tmp|
24
- @doc.serialize(:xml).save(tmp)
24
+ @doc.serialize(:xml, :file => tmp)
25
25
  doc = Treat::Entities::Document(tmp)
26
26
  assert_equal File.read(tmp).length,
27
27
  doc.serialize(:xml).length
@@ -32,7 +32,6 @@ module Treat
32
32
  assert_nothing_raised { @doc.visualize(:tree) }
33
33
  # assert_nothing_raised { @doc.visualize(:html) }
34
34
  assert_nothing_raised { @doc.visualize(:dot) }
35
- assert_nothing_raised { @doc.visualize(:inspect) }
36
35
  assert_nothing_raised { @doc.visualize(:short_value) }
37
36
  assert_nothing_raised { @sentence.visualize(:standoff) }
38
37
  end
@@ -2,38 +2,33 @@ module Treat
2
2
  module Tests
3
3
  class TestInflectors < Test::Unit::TestCase
4
4
 
5
- def setup
6
- @word = Treat::Tests::EnglishWord
7
- @number = Treat::Tests::Number
8
- @verb = Treat::Tests::EnglishVerb
9
- @noun = Treat::Tests::EnglishNoun
10
- end
11
-
12
5
  def test_lemmatizers
13
6
  # Not implemented yet.
14
7
  end
15
8
 
16
9
  def test_stemmers
17
- assert_equal 'run', @word.stem(:porter)
18
- assert_equal 'run', @word.stem(:porter_c)
19
- assert_equal 'run', @word.stem(:uea)
10
+ assert_equal 'run', 'running'.stem(:porter)
11
+ assert_equal 'run', 'running'.stem(:porter_c)
12
+ assert_equal 'run', 'running'.stem(:uea)
20
13
  end
21
- end
22
14
 
23
- def test_conjugators
24
- assert_equal 'running', @verb.present_participle
25
- assert_equal 'run', @verb.infinitive
26
- assert_equal 'run', @verb.plural
27
- end
15
+ def test_conjugators
16
+ assert_equal 'run', 'running'.infinitive
17
+ assert_equal 'running', 'run'.present_participle
18
+ assert_equal 'run', 'runs'.plural_verb
19
+ end
28
20
 
29
- def test_declensors
30
- assert_equal 'geese', @noun.plural
31
- end
21
+ def test_declensors
22
+ assert_equal 'inflections', 'inflection'.plural(:linguistics)
23
+ assert_equal 'inflections', 'inflection'.plural(:english)
24
+ assert_equal 'inflection', 'inflections'.singular(:english)
25
+ end
32
26
 
33
- def test_ordinal_and_cardinal_words
34
- assert_equal 'twenty', @number.cardinal_words
35
- assert_equal 'twentieth', @number.ordinal_words
36
- end
27
+ def test_ordinal_and_cardinal_words
28
+ assert_equal 'twenty', 20.cardinal_words
29
+ assert_equal 'twentieth', 20.ordinal_words
30
+ end
37
31
 
32
+ end
38
33
  end
39
- end
34
+ end
@@ -2,35 +2,28 @@ module Treat
2
2
  module Tests
3
3
  class TestLexicalizers < Test::Unit::TestCase
4
4
 
5
- def setup
6
- @word = Treat::Tests::EnglishWord
7
- @sentence = Treat::Tests::EnglishSentence.parse
8
- end
9
-
10
5
  def test_category
11
- assert_equal :verb, @word.category(:from_tag)
6
+ assert_equal :verb, 'visualize'.category(:from_tag, :tagger => :stanford)
7
+ assert_equal :noun, 'inflection'.category(:from_tag, :tagger => :brill)
8
+ assert_equal :adjective, 'sweet'.category(:from_tag, :tagger => :lingua)
12
9
  end
13
10
 
14
11
  def test_synsets
15
- # assert_nothing_raised { @word.synsets(:rita_wn) }
16
- assert_nothing_raised { @word.synsets(:wordnet) }
17
- assert_nothing_raised { @word.synonyms(:wordnet) }
18
- assert_nothing_raised { @word.antonyms(:wordnet) }
19
- assert_nothing_raised { @word.hyponyms(:wordnet) }
20
- assert_nothing_raised { @word.hypernyms(:wordnet) }
12
+ assert_equal 'mature', 'ripe'.synonyms(:wordnet)[0]
13
+ # assert_equal 'green', ' ripe'.antonyms(:wordnet)[0]
14
+ assert_equal 'beverage', 'coffee'.hypernyms(:wordnet)[0]
15
+ assert_equal 'gravy', 'juice'.hyponyms(:wordnet)[0]
21
16
  end
22
17
 
23
18
  def test_linkages
24
- assert_nothing_raised { @sentence.linkages(:naive, :linkage => :main_verb) }
25
- assert_nothing_raised { @sentence.linkages(:naive, :linkage => :subject) }
26
- assert_nothing_raised { @sentence.linkages(:naive, :linkage => :object) }
27
- assert_nothing_raised { @sentence.linkages(:naive, :linkage => :patient) }
19
+ sentence = 'Good is bad, but bad is not good'
20
+ # assert_equal sentence.parse(:enju).linkages
28
21
  end
29
22
 
30
23
  def test_taggers
31
- assert_nothing_raised { @word.tag(:brill) }
32
- assert_nothing_raised { @word.tag(:lingua) }
33
- assert_nothing_raised { @word.tag(:stanford) }
24
+ assert_equal 'VBG', 'running'.tag(:stanford)
25
+ assert_equal 'VBG', 'running'.tag(:brill)
26
+ assert_equal 'VBG', 'running'.tag(:lingua)
34
27
  end
35
28
 
36
29
  end
@@ -3,31 +3,45 @@ module Treat
3
3
  class TestProcessors < Test::Unit::TestCase
4
4
 
5
5
  def setup
6
- @doc = Treat::Tests::EnglishShortDoc
6
+ @doc = Treat::Tests::English::ShortDoc
7
7
  end
8
8
 
9
9
  def test_tokenizers
10
- assert_nothing_raised { @doc.tokenize(:macintyre) }
11
- assert_nothing_raised { @doc.tokenize(:multilingual) }
12
- assert_nothing_raised { @doc.tokenize(:perl) }
13
- assert_nothing_raised { @doc.tokenize(:punkt) }
14
- assert_nothing_raised { @doc.tokenize(:stanford) }
15
- assert_nothing_raised { @doc.tokenize(:tactful) }
10
+ words = ['A', 'sentence', 'to', 'tokenize']
11
+ tokenize_map = lambda do |worker, o={}|
12
+ 'A sentence to tokenize'.
13
+ tokenize(worker, o).words.map { |w| w.value }
14
+ end
15
+ assert_equal words, tokenize_map.call(:macintyre)
16
+ assert_equal words, tokenize_map.call(:multilingual)
17
+ assert_equal words, tokenize_map.call(:perl)
18
+ assert_equal words, tokenize_map.call(:punkt)
19
+ assert_equal words, tokenize_map.call(:stanford, :silence => true)
20
+ assert_equal words, tokenize_map.call(:tactful)
16
21
  end
17
22
 
18
23
  def test_segmenters
19
- assert_nothing_raised { @doc.segment(:punkt) }
20
- assert_nothing_raised { @doc.segment(:stanford) }
21
- assert_nothing_raised { @doc.segment(:tactful) }
24
+ sentences = ['This is sentence 1.', 'This is sentence 2.']
25
+ segment_map = lambda do |worker,o={}|
26
+ 'This is sentence 1. This is sentence 2.'.
27
+ segment(worker, o).sentences.map { |s| s.value }
28
+ end
29
+ assert_equal sentences, segment_map.call(:punkt)
30
+ assert_equal sentences, segment_map.call(:stanford, :silence => true)
31
+ assert_equal sentences, segment_map.call(:tactful)
22
32
  end
23
33
 
24
34
  def test_chunkers
25
- assert_nothing_raised { @doc.chunk(:txt) }
35
+ title = 'This is a title!'
36
+ paragraph = 'This is sentence 1. This is a potential sentence inside a pargraph describing the wonders of the world.'
37
+ s = "This is a title!\nThis is sentence 1. This is a potential sentence inside a pargraph describing the wonders of the world.".chunk
38
+ assert_equal title, s.title.value
39
+ assert_equal paragraph, s.paragraph.value
26
40
  end
27
41
 
28
42
  def test_parsers
29
43
  assert_nothing_raised { @doc.segment.parse(:enju) }
30
- assert_nothing_raised { @doc.segment.parse(:stanford) }
44
+ assert_nothing_raised { @doc.segment.parse(:stanford, :silence => true) }
31
45
  end
32
46
 
33
47
  end