treat 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,127 @@
1
+ module Treat
2
+ # Provides utility functions used across the library.
3
+ module Utilities
4
+ # Require file utilities.
5
+ require 'fileutils'
6
+ # Returns the platform we are running on.
7
+ def self.platform
8
+ RUBY_PLATFORM.split("-")[1]
9
+ end
10
+ # Runs a block of code silently, i.e. without
11
+ # expressing warnings even in verbose mode.
12
+ # Rename to silence_streamsings.
13
+ def self.silently(&block)
14
+ warn_level = $VERBOSE
15
+ $VERBOSE = nil
16
+ result = block.call
17
+ $VERBOSE = warn_level
18
+ result
19
+ end
20
+ def self.silence_streams(*streams)
21
+ yield
22
+ end
23
+ # Create a temporary file which is deleted
24
+ # after execution of the block.
25
+ require 'tempfile'
26
+ def self.create_temp_file(ext, value = nil, &block)
27
+ tmp = Tempfile.new(['', ".#{ext.to_s}"], Treat.tmp)
28
+ tmp.puts(value) if value
29
+ block.call(tmp.path)
30
+ end
31
+ # A list of acronyms used in class names within
32
+ # the program. These do not CamelCase; they
33
+ # CAMELCASE.
34
+ @@acronyms = ['XML', 'HTML', 'YAML', 'UEA', 'LDA', 'PDF', 'GOCR', 'Treat'].join('|')
35
+ @@cc_cache = {}
36
+ # Convert un_camel_case to CamelCase.
37
+ def self.camel_case(o_phrase)
38
+ phrase = o_phrase.to_s.dup
39
+ return @@cc_cache[o_phrase] if @@cc_cache[o_phrase]
40
+ phrase.gsub!(/#{@@acronyms.downcase}[^a-z]+/) { |a| a.upcase }
41
+ phrase.gsub!(/^[a-z]|_[a-z]/) { |a| a.upcase }
42
+ phrase.gsub!('_', '')
43
+ @@cc_cache[o_phrase] = phrase
44
+ phrase
45
+ end
46
+ @@ucc_cache = {}
47
+ # Convert CamelCase to un_camel_case.
48
+ def self.un_camel_case(o_phrase)
49
+ phrase = o_phrase.to_s.dup
50
+ return @@ucc_cache[o_phrase] if @@ucc_cache[o_phrase]
51
+ phrase.gsub!(/#{@@acronyms}/) { |a| a.downcase.capitalize }
52
+ phrase.gsub!(/[A-Z]/) { |p| '_' + p.downcase }
53
+ phrase = phrase[1..-1] if phrase[0] == '_'
54
+ @@ucc_cache[o_phrase] = phrase
55
+ phrase
56
+ end
57
+ # Return the levensthein distance between two stringsm
58
+ # taking into account the costs of insertion, deletion,
59
+ # and substitution. Stolen from:
60
+ # http://ruby-snippets.heroku.com/string/levenshtein-distance
61
+ def self.levenshtein(first, other, ins=1, del=1, sub=1)
62
+ return nil if first.nil? || other.nil?
63
+ dm = []
64
+ dm[0] = (0..first.length).collect { |i| i * ins}
65
+ fill = [0] * (first.length - 1)
66
+ for i in 1..other.length
67
+ dm[i] = [i * del, fill.flatten]
68
+ end
69
+ for i in 1..other.length
70
+ for j in 1..first.length
71
+ dm[i][j] = [
72
+ dm[i-1][j-1] + (first[i-1] == other[i-1] ? 0 : sub),
73
+ dm[i][j-1] + ins,
74
+ dm[i-1][j] + del
75
+ ].min
76
+ end
77
+ end
78
+ dm[other.length][first.length]
79
+ end
80
+ # Search the list to see if there are words
81
+ # similar to name. If yes, return a string
82
+ # saying "Did you mean ... ?"
83
+ def self.did_you_mean?(list, name)
84
+ msg = ''
85
+ sugg = []
86
+ list.each do |element|
87
+ l = levenshtein(element,name)
88
+ if l > 0 && l < 2
89
+ sugg << element
90
+ end
91
+ end
92
+ unless sugg.empty?
93
+ if sugg.size == 1
94
+ msg += " Perhaps you meant '#{sugg[0]}' ?"
95
+ else
96
+ sugg_quote = sugg[0..-2].map {|x| '\'' + x + '\''}
97
+ msg += " Perhaps you meant #{sugg_quote.join(', ')}," +
98
+ " or '#{sugg[-1]}' ?"
99
+ end
100
+ end
101
+ msg
102
+ end
103
+ def self.caller_method(n = 3)
104
+ at = caller(n).first
105
+ /^(.+?):(\d+)(?::in `(.*)')?/ =~ at
106
+ :"#{Regexp.last_match[3]}"
107
+ end
108
+ end
109
+ end
110
+
111
+ # Make undefining constants publicly
112
+ # available on any object.
113
+ Object.module_eval do
114
+ def self.const_unset(const); Object.instance_eval { remove_const(const) }; puts const; end
115
+ end
116
+
117
+ # Make the most common utility functions available in the global scope.
118
+ def create_temp_file(ext, value = nil, &block)
119
+ Treat::Utilities.create_temp_file(ext, value) { |f| block.call(f) }
120
+ end
121
+ def silence_streams(*streams); Treat::Utilities.silence_streams(*streams) { yield }; end
122
+ def silently(&block); Treat::Utilities.silently { block.call }; end
123
+ def cc(w); Treat::Utilities.camel_case(w); end
124
+ def ucc(w); Treat::Utilities.un_camel_case(w); end
125
+ def cl(n); n.to_s.split('::')[-1]; end
126
+ def did_you_mean?(l, e); Treat::Utilities.did_you_mean?(l, e); end
127
+ def caller_method(n = 3); Treat::Utilities.caller_method(n); end
@@ -0,0 +1,27 @@
1
+ module Treat
2
+ # Make a tree visitable by implementing the method #accept.
3
+ module Visitable
4
+ # Accept a visitor implemented by klass, which is
5
+ # found in the supplied group, and call method on it.
6
+ def accept(group, klass, method, options)
7
+ if group.has_target?(self.class)
8
+ if group.type == :transformer
9
+ if has_children?
10
+ @children.each do |entity|
11
+ if group.has_target?(entity.class)
12
+ entity.accept(group, klass, method, options)
13
+ end
14
+ end
15
+ else
16
+ klass.send(method, self, options)
17
+ end
18
+ return self
19
+ else
20
+ return klass.send(method, self, options)
21
+ end
22
+ else
23
+ raise "This type of visitor cannot visit a #{self.class}."
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,2 @@
1
+ require 'unprof'
2
+ require 'tests'
@@ -0,0 +1,27 @@
1
+ module Treat
2
+ module Tests
3
+ class TestDetectors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @doc = Treat::Tests::EnglishLongDoc
7
+ end
8
+
9
+ def test_format_detectors
10
+ assert_equal :txt, @doc.format
11
+ end
12
+
13
+ def test_encoding_detectors
14
+ assert_equal :utf_8, @doc.encoding(:native)
15
+ assert_equal :utf_8, @doc.encoding(:r_chardet19)
16
+ end
17
+
18
+ def test_language_detectors
19
+ assert_equal Treat.default_language, @doc.language
20
+ Treat.detect_language = true
21
+ assert_equal :eng, @doc.language
22
+ Treat.detect_language = false
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,105 @@
1
+ module Treat
2
+ module Tests
3
+ class TestEntity < Test::Unit::TestCase
4
+ def setup
5
+ @text = Treat::Entities::Text.new
6
+
7
+ @sentence = Treat::Entities::Sentence.new
8
+
9
+ @noun_phrase = Treat::Entities::Phrase.new
10
+ @noun_phrase.set :tag, 'NP'
11
+ @verb_phrase = Treat::Entities::Phrase.new
12
+ @verb_phrase.set :tag, 'VP'
13
+ @adj_phrase = Treat::Entities::Phrase.new
14
+ @adj_phrase.set :tag, 'ADJP'
15
+
16
+ @det = Treat::Entities::Word.new('The')
17
+ @det.set :cat, :determiner
18
+ @det.set :tag, 'DT'
19
+ @adj = Treat::Entities::Word.new('lazy')
20
+ @adj.set :cat, :adjective
21
+ @adj.set :tag, 'JJ'
22
+ @noun = Treat::Entities::Word.new('fox')
23
+ @noun.set :cat, :noun
24
+ @noun.set :tag, 'NN'
25
+ @aux = Treat::Entities::Word.new('is')
26
+ @aux.set :cat, :verb
27
+ @aux.set :tag, 'VBZ'
28
+ @verb = Treat::Entities::Word.new('running')
29
+ @verb.set :cat, :verb
30
+ @verb.set :tag, 'VBG'
31
+ @dot = Treat::Entities::Punctuation.new('.')
32
+
33
+ @text << @sentence << [@noun_phrase, @verb_phrase, @dot]
34
+ @noun_phrase << [@det, @adj_phrase, @noun]
35
+ @adj_phrase << @adj
36
+ @verb_phrase << [@aux, @verb]
37
+ end
38
+
39
+ def test_respond_to_missing
40
+
41
+ end
42
+
43
+ def test_registrable
44
+ assert_equal @text.token_registry, @verb.token_registry
45
+ assert_equal @noun, @text.token_registry[:id][@noun.id]
46
+ assert_equal [@noun], @text.token_registry[:value][@noun.value]
47
+ end
48
+
49
+
50
+ def test_delegatable_visitable
51
+ assert_raise(Treat::Exception) do
52
+ @text.encoding(:nonexistent)
53
+ end
54
+ assert_nothing_raised do
55
+ @text.format
56
+ end
57
+ end
58
+
59
+ def test_type
60
+ assert_equal :text, @text.type
61
+ end
62
+
63
+ def test_printers
64
+ assert_nothing_raised do
65
+ @text.to_s
66
+ @text.to_string
67
+ @text.short_value
68
+ @text.inspect
69
+ end
70
+ end
71
+
72
+ def test_magic_methods
73
+ assert_equal @sentence, @text.sentence
74
+ assert_equal [@sentence], @text.sentences
75
+ assert_equal 1, @text.sentence_count
76
+
77
+ assert_equal [@det], @text.words_with_value('The')
78
+ assert_equal [@verb], @text.words_with_tag('VBG')
79
+
80
+ assert_equal @noun, @text.noun
81
+ assert_equal [@aux, @verb], @text.verbs
82
+ assert_equal 6, @text.token_count
83
+
84
+ @text.each_sentence do |s|
85
+ assert_equal @sentence, s
86
+ end
87
+ @text.each_noun do |n|
88
+ assert_equal @noun, n
89
+ end
90
+ @text.each_with_value('The') do |x|
91
+ assert_equal @det, x
92
+ end
93
+
94
+ assert_equal @sentence, @noun.parent_sentence
95
+ end
96
+
97
+ def test_features
98
+ @verb.set :test, :test
99
+ assert_equal :test, @verb.test
100
+ assert_raise(Treat::Exception) { @verb.nonexistent }
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,48 @@
1
+ module Treat
2
+ module Tests
3
+ class TestExtractors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @time = Treat::Tests::EnglishTime
7
+ @date = Treat::Tests::EnglishDate
8
+ @doc = Treat::Tests::EnglishLongDoc
9
+ @word = Treat::Tests::EnglishWord
10
+ end
11
+
12
+ def test_time
13
+ assert_nothing_raised { @date.time(:chronic) }
14
+ assert_nothing_raised { @date.time(:native) }
15
+ assert_nothing_raised { @date.time(:nickel) }
16
+ end
17
+
18
+ def test_topic_words
19
+ assert_nothing_raised { @doc.topic_words(:lda) }
20
+ end
21
+
22
+
23
+ def test_named_entity
24
+ # assert_nothing_raised { @doc.named_entity(:stanford) }
25
+ # assert_nothing_raised { @doc.named_entity(:abner) }
26
+ end
27
+
28
+ def test_key_sentences
29
+ topics = @doc.topic_words(:lda)
30
+ assert_nothing_raised { @doc.key_sentences(:topics_frequency, topics) }
31
+ end
32
+
33
+ def test_topics
34
+ assert_nothing_raised { @doc.topics(:reuters) }
35
+ end
36
+
37
+ def test_statistics
38
+ @doc.chunk.segment(:tactful).tokenize
39
+
40
+ assert_nothing_raised { @doc.statistics(:frequency_of, value: 'the') }
41
+ assert_nothing_raised { @word.statistics(:frequency) }
42
+ # assert_nothing_raised { @doc.statistics(:position_in) }
43
+ # assert_nothing_raised { @doc.statistics(:transition_matrix) }
44
+ # assert_nothing_raised { @doc.statistics(:transition_probability) }
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,46 @@
1
+ module Treat
2
+ module Tests
3
+ class TestFormatters < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @doc = Treat::Tests::EnglishShortDoc
7
+ @html_doc = Treat::Tests::EnglishHtmlDoc
8
+ @sentence = Treat::Tests::EnglishSentence
9
+ end
10
+
11
+ def test_readers
12
+ # How should we test this?
13
+ end
14
+
15
+
16
+ def test_serializers_and_unserializers
17
+ create_temp_file('yml') do |tmp|
18
+ @doc.serialize(:yaml).save(tmp)
19
+ doc = Treat::Entities::Document(tmp)
20
+ assert_equal File.read(tmp).length,
21
+ doc.serialize(:yaml).length
22
+ end
23
+ create_temp_file('xml') do |tmp|
24
+ @doc.serialize(:xml).save(tmp)
25
+ doc = Treat::Entities::Document(tmp)
26
+ assert_equal File.read(tmp).length,
27
+ doc.serialize(:xml).length
28
+ end
29
+ end
30
+
31
+ def test_visualizers
32
+ assert_nothing_raised { @doc.visualize(:tree) }
33
+ # assert_nothing_raised { @doc.visualize(:html) }
34
+ assert_nothing_raised { @doc.visualize(:dot) }
35
+ assert_nothing_raised { @doc.visualize(:inspect) }
36
+ assert_nothing_raised { @doc.visualize(:short_value) }
37
+ assert_nothing_raised { @sentence.visualize(:standoff) }
38
+ end
39
+
40
+ def test_cleaners
41
+ assert_nothing_raised { @html_doc.clean(:html) }
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ module Treat
2
+ module Tests
3
+ class TestInflectors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @word = Treat::Tests::EnglishWord
7
+ @number = Treat::Tests::Number
8
+ @verb = Treat::Tests::EnglishVerb
9
+ @noun = Treat::Tests::EnglishNoun
10
+ end
11
+
12
+ def test_lemmatizers
13
+ # Not implemented yet.
14
+ end
15
+
16
+ def test_stemmers
17
+ assert_equal 'run', @word.stem(:porter)
18
+ assert_equal 'run', @word.stem(:porter_c)
19
+ assert_equal 'run', @word.stem(:uea)
20
+ end
21
+ end
22
+
23
+ def test_conjugators
24
+ assert_equal 'running', @verb.present_participle
25
+ assert_equal 'run', @verb.infinitive
26
+ assert_equal 'run', @verb.plural
27
+ end
28
+
29
+ def test_declensors
30
+ assert_equal 'geese', @noun.plural
31
+ end
32
+
33
+ def test_ordinal_and_cardinal_words
34
+ assert_equal 'twenty', @number.cardinal_words
35
+ assert_equal 'twentieth', @number.ordinal_words
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ module Treat
2
+ module Tests
3
+ class TestLexicalizers < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @word = Treat::Tests::EnglishWord
7
+ @sentence = Treat::Tests::EnglishSentence.parse
8
+ end
9
+
10
+ def test_category
11
+ assert_equal :verb, @word.category(:from_tag)
12
+ end
13
+
14
+ def test_synsets
15
+ # assert_nothing_raised { @word.synsets(:rita_wn) }
16
+ assert_nothing_raised { @word.synsets(:wordnet) }
17
+ assert_nothing_raised { @word.synonyms(:wordnet) }
18
+ assert_nothing_raised { @word.antonyms(:wordnet) }
19
+ assert_nothing_raised { @word.hyponyms(:wordnet) }
20
+ assert_nothing_raised { @word.hypernyms(:wordnet) }
21
+ end
22
+
23
+ def test_linkages
24
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :main_verb) }
25
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :subject) }
26
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :object) }
27
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :patient) }
28
+ end
29
+
30
+ def test_taggers
31
+ assert_nothing_raised { @word.tag(:brill) }
32
+ assert_nothing_raised { @word.tag(:lingua) }
33
+ assert_nothing_raised { @word.tag(:stanford) }
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+ end