treat 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (147) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +28 -0
  3. data/README +0 -0
  4. data/TODO +67 -0
  5. data/bin/INFO +1 -0
  6. data/examples/benchmark.rb +81 -0
  7. data/examples/keywords.rb +60 -0
  8. data/examples/texts/bugged_out.txt +26 -0
  9. data/examples/texts/half_cocked_basel.txt +16 -0
  10. data/examples/texts/hedge_funds.txt +24 -0
  11. data/examples/texts/hose_and_dry.txt +19 -0
  12. data/examples/texts/hungarys_troubles.txt +46 -0
  13. data/examples/texts/indias_slowdown.txt +15 -0
  14. data/examples/texts/merkozy_rides_again.txt +24 -0
  15. data/examples/texts/prada_is_not_walmart.txt +9 -0
  16. data/examples/texts/republican_nomination.txt +26 -0
  17. data/examples/texts/to_infinity_and_beyond.txt +15 -0
  18. data/lib/treat.rb +91 -0
  19. data/lib/treat/buildable.rb +115 -0
  20. data/lib/treat/categories.rb +29 -0
  21. data/lib/treat/category.rb +28 -0
  22. data/lib/treat/delegatable.rb +90 -0
  23. data/lib/treat/detectors.rb +28 -0
  24. data/lib/treat/detectors/encoding/native.rb +12 -0
  25. data/lib/treat/detectors/encoding/r_chardet19.rb +24 -0
  26. data/lib/treat/detectors/format/file.rb +36 -0
  27. data/lib/treat/detectors/language/language_detector.rb +19 -0
  28. data/lib/treat/detectors/language/what_language.rb +29 -0
  29. data/lib/treat/entities.rb +52 -0
  30. data/lib/treat/entities/collection.rb +19 -0
  31. data/lib/treat/entities/constituents.rb +15 -0
  32. data/lib/treat/entities/document.rb +11 -0
  33. data/lib/treat/entities/entity.rb +242 -0
  34. data/lib/treat/entities/sentence.rb +8 -0
  35. data/lib/treat/entities/text.rb +7 -0
  36. data/lib/treat/entities/tokens.rb +37 -0
  37. data/lib/treat/entities/zones.rb +17 -0
  38. data/lib/treat/exception.rb +5 -0
  39. data/lib/treat/extractors.rb +41 -0
  40. data/lib/treat/extractors/key_sentences/topics_frequency.rb +49 -0
  41. data/lib/treat/extractors/named_entity/abner.rb +20 -0
  42. data/lib/treat/extractors/named_entity/stanford.rb +174 -0
  43. data/lib/treat/extractors/statistics/frequency.rb +22 -0
  44. data/lib/treat/extractors/statistics/frequency_of.rb +17 -0
  45. data/lib/treat/extractors/statistics/position_in.rb +13 -0
  46. data/lib/treat/extractors/statistics/transition_matrix.rb +105 -0
  47. data/lib/treat/extractors/statistics/transition_probability.rb +53 -0
  48. data/lib/treat/extractors/time/chronic.rb +12 -0
  49. data/lib/treat/extractors/time/native.rb +12 -0
  50. data/lib/treat/extractors/time/nickel.rb +45 -0
  51. data/lib/treat/extractors/topic_words/lda.rb +71 -0
  52. data/lib/treat/extractors/topic_words/lda/data.dat +46 -0
  53. data/lib/treat/extractors/topic_words/lda/wiki.yml +121 -0
  54. data/lib/treat/extractors/topics/reuters.rb +91 -0
  55. data/lib/treat/extractors/topics/reuters/industry.xml +2717 -0
  56. data/lib/treat/extractors/topics/reuters/region.xml +13585 -0
  57. data/lib/treat/extractors/topics/reuters/topics.xml +17977 -0
  58. data/lib/treat/feature.rb +53 -0
  59. data/lib/treat/formatters.rb +44 -0
  60. data/lib/treat/formatters/cleaners/html.rb +17 -0
  61. data/lib/treat/formatters/readers/autoselect.rb +35 -0
  62. data/lib/treat/formatters/readers/gocr.rb +24 -0
  63. data/lib/treat/formatters/readers/html.rb +13 -0
  64. data/lib/treat/formatters/readers/ocropus.rb +31 -0
  65. data/lib/treat/formatters/readers/pdf.rb +17 -0
  66. data/lib/treat/formatters/readers/txt.rb +15 -0
  67. data/lib/treat/formatters/serializers/xml.rb +48 -0
  68. data/lib/treat/formatters/serializers/yaml.rb +15 -0
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +96 -0
  70. data/lib/treat/formatters/unserializers/autoselect.rb +19 -0
  71. data/lib/treat/formatters/unserializers/xml.rb +79 -0
  72. data/lib/treat/formatters/unserializers/yaml.rb +15 -0
  73. data/lib/treat/formatters/visualizers/dot.rb +73 -0
  74. data/lib/treat/formatters/visualizers/html.rb +12 -0
  75. data/lib/treat/formatters/visualizers/inspect.rb +16 -0
  76. data/lib/treat/formatters/visualizers/short_value.rb +14 -0
  77. data/lib/treat/formatters/visualizers/standoff.rb +41 -0
  78. data/lib/treat/formatters/visualizers/tree.rb +28 -0
  79. data/lib/treat/formatters/visualizers/txt.rb +31 -0
  80. data/lib/treat/group.rb +96 -0
  81. data/lib/treat/inflectors.rb +50 -0
  82. data/lib/treat/inflectors/cardinal_words/linguistics.rb +45 -0
  83. data/lib/treat/inflectors/conjugators/linguistics.rb +30 -0
  84. data/lib/treat/inflectors/declensors/en.rb +18 -0
  85. data/lib/treat/inflectors/declensors/linguistics.rb +30 -0
  86. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +12 -0
  87. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +213 -0
  88. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +68 -0
  89. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +6 -0
  90. data/lib/treat/inflectors/ordinal_words/linguistics.rb +21 -0
  91. data/lib/treat/inflectors/stemmers/porter.rb +158 -0
  92. data/lib/treat/inflectors/stemmers/porter_c.rb +23 -0
  93. data/lib/treat/inflectors/stemmers/uea.rb +30 -0
  94. data/lib/treat/lexicalizers.rb +49 -0
  95. data/lib/treat/lexicalizers/category/from_tag.rb +30 -0
  96. data/lib/treat/lexicalizers/linkages/naive.rb +63 -0
  97. data/lib/treat/lexicalizers/synsets/rita_wn.rb +23 -0
  98. data/lib/treat/lexicalizers/synsets/wordnet.rb +72 -0
  99. data/lib/treat/lexicalizers/tag/brill.rb +101 -0
  100. data/lib/treat/lexicalizers/tag/lingua.rb +114 -0
  101. data/lib/treat/lexicalizers/tag/stanford.rb +86 -0
  102. data/lib/treat/processors.rb +45 -0
  103. data/lib/treat/processors/chunkers/txt.rb +27 -0
  104. data/lib/treat/processors/parsers/enju.rb +214 -0
  105. data/lib/treat/processors/parsers/stanford.rb +60 -0
  106. data/lib/treat/processors/segmenters/punkt.rb +48 -0
  107. data/lib/treat/processors/segmenters/stanford.rb +45 -0
  108. data/lib/treat/processors/segmenters/tactful.rb +34 -0
  109. data/lib/treat/processors/tokenizers/macintyre.rb +76 -0
  110. data/lib/treat/processors/tokenizers/multilingual.rb +31 -0
  111. data/lib/treat/processors/tokenizers/perl.rb +96 -0
  112. data/lib/treat/processors/tokenizers/punkt.rb +42 -0
  113. data/lib/treat/processors/tokenizers/stanford.rb +33 -0
  114. data/lib/treat/processors/tokenizers/tactful.rb +59 -0
  115. data/lib/treat/proxies.rb +66 -0
  116. data/lib/treat/registrable.rb +26 -0
  117. data/lib/treat/resources.rb +10 -0
  118. data/lib/treat/resources/categories.rb +18 -0
  119. data/lib/treat/resources/delegates.rb +96 -0
  120. data/lib/treat/resources/dependencies.rb +0 -0
  121. data/lib/treat/resources/edges.rb +8 -0
  122. data/lib/treat/resources/formats.rb +23 -0
  123. data/lib/treat/resources/languages.rb +86 -0
  124. data/lib/treat/resources/languages.txt +504 -0
  125. data/lib/treat/resources/tags.rb +393 -0
  126. data/lib/treat/sugar.rb +43 -0
  127. data/lib/treat/tree.rb +174 -0
  128. data/lib/treat/utilities.rb +127 -0
  129. data/lib/treat/visitable.rb +27 -0
  130. data/test/profile.rb +2 -0
  131. data/test/tc_detectors.rb +27 -0
  132. data/test/tc_entity.rb +105 -0
  133. data/test/tc_extractors.rb +48 -0
  134. data/test/tc_formatters.rb +46 -0
  135. data/test/tc_inflectors.rb +39 -0
  136. data/test/tc_lexicalizers.rb +39 -0
  137. data/test/tc_processors.rb +36 -0
  138. data/test/tc_resources.rb +27 -0
  139. data/test/tc_treat.rb +64 -0
  140. data/test/tc_tree.rb +60 -0
  141. data/test/tests.rb +19 -0
  142. data/test/texts.rb +20 -0
  143. data/test/texts/english/long.html +24 -0
  144. data/test/texts/english/long.txt +22 -0
  145. data/test/texts/english/medium.txt +5 -0
  146. data/test/texts/english/short.txt +3 -0
  147. metadata +412 -0
@@ -0,0 +1,127 @@
1
+ module Treat
2
+ # Provides utility functions used across the library.
3
+ module Utilities
4
+ # Require file utilities.
5
+ require 'fileutils'
6
+ # Returns the platform we are running on.
7
+ def self.platform
8
+ RUBY_PLATFORM.split("-")[1]
9
+ end
10
+ # Runs a block of code silently, i.e. without
11
+ # expressing warnings even in verbose mode.
12
+ # Rename to silence_streamsings.
13
+ def self.silently(&block)
14
+ warn_level = $VERBOSE
15
+ $VERBOSE = nil
16
+ result = block.call
17
+ $VERBOSE = warn_level
18
+ result
19
+ end
20
+ def self.silence_streams(*streams)
21
+ yield
22
+ end
23
+ # Create a temporary file which is deleted
24
+ # after execution of the block.
25
+ require 'tempfile'
26
+ def self.create_temp_file(ext, value = nil, &block)
27
+ tmp = Tempfile.new(['', ".#{ext.to_s}"], Treat.tmp)
28
+ tmp.puts(value) if value
29
+ block.call(tmp.path)
30
+ end
31
+ # A list of acronyms used in class names within
32
+ # the program. These do not CamelCase; they
33
+ # CAMELCASE.
34
+ @@acronyms = ['XML', 'HTML', 'YAML', 'UEA', 'LDA', 'PDF', 'GOCR', 'Treat'].join('|')
35
+ @@cc_cache = {}
36
+ # Convert un_camel_case to CamelCase.
37
+ def self.camel_case(o_phrase)
38
+ phrase = o_phrase.to_s.dup
39
+ return @@cc_cache[o_phrase] if @@cc_cache[o_phrase]
40
+ phrase.gsub!(/#{@@acronyms.downcase}[^a-z]+/) { |a| a.upcase }
41
+ phrase.gsub!(/^[a-z]|_[a-z]/) { |a| a.upcase }
42
+ phrase.gsub!('_', '')
43
+ @@cc_cache[o_phrase] = phrase
44
+ phrase
45
+ end
46
+ @@ucc_cache = {}
47
+ # Convert CamelCase to un_camel_case.
48
+ def self.un_camel_case(o_phrase)
49
+ phrase = o_phrase.to_s.dup
50
+ return @@ucc_cache[o_phrase] if @@ucc_cache[o_phrase]
51
+ phrase.gsub!(/#{@@acronyms}/) { |a| a.downcase.capitalize }
52
+ phrase.gsub!(/[A-Z]/) { |p| '_' + p.downcase }
53
+ phrase = phrase[1..-1] if phrase[0] == '_'
54
+ @@ucc_cache[o_phrase] = phrase
55
+ phrase
56
+ end
57
+ # Return the levensthein distance between two stringsm
58
+ # taking into account the costs of insertion, deletion,
59
+ # and substitution. Stolen from:
60
+ # http://ruby-snippets.heroku.com/string/levenshtein-distance
61
+ def self.levenshtein(first, other, ins=1, del=1, sub=1)
62
+ return nil if first.nil? || other.nil?
63
+ dm = []
64
+ dm[0] = (0..first.length).collect { |i| i * ins}
65
+ fill = [0] * (first.length - 1)
66
+ for i in 1..other.length
67
+ dm[i] = [i * del, fill.flatten]
68
+ end
69
+ for i in 1..other.length
70
+ for j in 1..first.length
71
+ dm[i][j] = [
72
+ dm[i-1][j-1] + (first[i-1] == other[i-1] ? 0 : sub),
73
+ dm[i][j-1] + ins,
74
+ dm[i-1][j] + del
75
+ ].min
76
+ end
77
+ end
78
+ dm[other.length][first.length]
79
+ end
80
+ # Search the list to see if there are words
81
+ # similar to name. If yes, return a string
82
+ # saying "Did you mean ... ?"
83
+ def self.did_you_mean?(list, name)
84
+ msg = ''
85
+ sugg = []
86
+ list.each do |element|
87
+ l = levenshtein(element,name)
88
+ if l > 0 && l < 2
89
+ sugg << element
90
+ end
91
+ end
92
+ unless sugg.empty?
93
+ if sugg.size == 1
94
+ msg += " Perhaps you meant '#{sugg[0]}' ?"
95
+ else
96
+ sugg_quote = sugg[0..-2].map {|x| '\'' + x + '\''}
97
+ msg += " Perhaps you meant #{sugg_quote.join(', ')}," +
98
+ " or '#{sugg[-1]}' ?"
99
+ end
100
+ end
101
+ msg
102
+ end
103
+ def self.caller_method(n = 3)
104
+ at = caller(n).first
105
+ /^(.+?):(\d+)(?::in `(.*)')?/ =~ at
106
+ :"#{Regexp.last_match[3]}"
107
+ end
108
+ end
109
+ end
110
+
111
+ # Make undefining constants publicly
112
+ # available on any object.
113
+ Object.module_eval do
114
+ def self.const_unset(const); Object.instance_eval { remove_const(const) }; puts const; end
115
+ end
116
+
117
+ # Make the most common utility functions available in the global scope.
118
+ def create_temp_file(ext, value = nil, &block)
119
+ Treat::Utilities.create_temp_file(ext, value) { |f| block.call(f) }
120
+ end
121
+ def silence_streams(*streams); Treat::Utilities.silence_streams(*streams) { yield }; end
122
+ def silently(&block); Treat::Utilities.silently { block.call }; end
123
+ def cc(w); Treat::Utilities.camel_case(w); end
124
+ def ucc(w); Treat::Utilities.un_camel_case(w); end
125
+ def cl(n); n.to_s.split('::')[-1]; end
126
+ def did_you_mean?(l, e); Treat::Utilities.did_you_mean?(l, e); end
127
+ def caller_method(n = 3); Treat::Utilities.caller_method(n); end
@@ -0,0 +1,27 @@
1
+ module Treat
2
+ # Make a tree visitable by implementing the method #accept.
3
+ module Visitable
4
+ # Accept a visitor implemented by klass, which is
5
+ # found in the supplied group, and call method on it.
6
+ def accept(group, klass, method, options)
7
+ if group.has_target?(self.class)
8
+ if group.type == :transformer
9
+ if has_children?
10
+ @children.each do |entity|
11
+ if group.has_target?(entity.class)
12
+ entity.accept(group, klass, method, options)
13
+ end
14
+ end
15
+ else
16
+ klass.send(method, self, options)
17
+ end
18
+ return self
19
+ else
20
+ return klass.send(method, self, options)
21
+ end
22
+ else
23
+ raise "This type of visitor cannot visit a #{self.class}."
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,2 @@
1
+ require 'unprof'
2
+ require 'tests'
@@ -0,0 +1,27 @@
1
+ module Treat
2
+ module Tests
3
+ class TestDetectors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @doc = Treat::Tests::EnglishLongDoc
7
+ end
8
+
9
+ def test_format_detectors
10
+ assert_equal :txt, @doc.format
11
+ end
12
+
13
+ def test_encoding_detectors
14
+ assert_equal :utf_8, @doc.encoding(:native)
15
+ assert_equal :utf_8, @doc.encoding(:r_chardet19)
16
+ end
17
+
18
+ def test_language_detectors
19
+ assert_equal Treat.default_language, @doc.language
20
+ Treat.detect_language = true
21
+ assert_equal :eng, @doc.language
22
+ Treat.detect_language = false
23
+ end
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,105 @@
1
+ module Treat
2
+ module Tests
3
+ class TestEntity < Test::Unit::TestCase
4
+ def setup
5
+ @text = Treat::Entities::Text.new
6
+
7
+ @sentence = Treat::Entities::Sentence.new
8
+
9
+ @noun_phrase = Treat::Entities::Phrase.new
10
+ @noun_phrase.set :tag, 'NP'
11
+ @verb_phrase = Treat::Entities::Phrase.new
12
+ @verb_phrase.set :tag, 'VP'
13
+ @adj_phrase = Treat::Entities::Phrase.new
14
+ @adj_phrase.set :tag, 'ADJP'
15
+
16
+ @det = Treat::Entities::Word.new('The')
17
+ @det.set :cat, :determiner
18
+ @det.set :tag, 'DT'
19
+ @adj = Treat::Entities::Word.new('lazy')
20
+ @adj.set :cat, :adjective
21
+ @adj.set :tag, 'JJ'
22
+ @noun = Treat::Entities::Word.new('fox')
23
+ @noun.set :cat, :noun
24
+ @noun.set :tag, 'NN'
25
+ @aux = Treat::Entities::Word.new('is')
26
+ @aux.set :cat, :verb
27
+ @aux.set :tag, 'VBZ'
28
+ @verb = Treat::Entities::Word.new('running')
29
+ @verb.set :cat, :verb
30
+ @verb.set :tag, 'VBG'
31
+ @dot = Treat::Entities::Punctuation.new('.')
32
+
33
+ @text << @sentence << [@noun_phrase, @verb_phrase, @dot]
34
+ @noun_phrase << [@det, @adj_phrase, @noun]
35
+ @adj_phrase << @adj
36
+ @verb_phrase << [@aux, @verb]
37
+ end
38
+
39
+ def test_respond_to_missing
40
+
41
+ end
42
+
43
+ def test_registrable
44
+ assert_equal @text.token_registry, @verb.token_registry
45
+ assert_equal @noun, @text.token_registry[:id][@noun.id]
46
+ assert_equal [@noun], @text.token_registry[:value][@noun.value]
47
+ end
48
+
49
+
50
+ def test_delegatable_visitable
51
+ assert_raise(Treat::Exception) do
52
+ @text.encoding(:nonexistent)
53
+ end
54
+ assert_nothing_raised do
55
+ @text.format
56
+ end
57
+ end
58
+
59
+ def test_type
60
+ assert_equal :text, @text.type
61
+ end
62
+
63
+ def test_printers
64
+ assert_nothing_raised do
65
+ @text.to_s
66
+ @text.to_string
67
+ @text.short_value
68
+ @text.inspect
69
+ end
70
+ end
71
+
72
+ def test_magic_methods
73
+ assert_equal @sentence, @text.sentence
74
+ assert_equal [@sentence], @text.sentences
75
+ assert_equal 1, @text.sentence_count
76
+
77
+ assert_equal [@det], @text.words_with_value('The')
78
+ assert_equal [@verb], @text.words_with_tag('VBG')
79
+
80
+ assert_equal @noun, @text.noun
81
+ assert_equal [@aux, @verb], @text.verbs
82
+ assert_equal 6, @text.token_count
83
+
84
+ @text.each_sentence do |s|
85
+ assert_equal @sentence, s
86
+ end
87
+ @text.each_noun do |n|
88
+ assert_equal @noun, n
89
+ end
90
+ @text.each_with_value('The') do |x|
91
+ assert_equal @det, x
92
+ end
93
+
94
+ assert_equal @sentence, @noun.parent_sentence
95
+ end
96
+
97
+ def test_features
98
+ @verb.set :test, :test
99
+ assert_equal :test, @verb.test
100
+ assert_raise(Treat::Exception) { @verb.nonexistent }
101
+ end
102
+
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,48 @@
1
+ module Treat
2
+ module Tests
3
+ class TestExtractors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @time = Treat::Tests::EnglishTime
7
+ @date = Treat::Tests::EnglishDate
8
+ @doc = Treat::Tests::EnglishLongDoc
9
+ @word = Treat::Tests::EnglishWord
10
+ end
11
+
12
+ def test_time
13
+ assert_nothing_raised { @date.time(:chronic) }
14
+ assert_nothing_raised { @date.time(:native) }
15
+ assert_nothing_raised { @date.time(:nickel) }
16
+ end
17
+
18
+ def test_topic_words
19
+ assert_nothing_raised { @doc.topic_words(:lda) }
20
+ end
21
+
22
+
23
+ def test_named_entity
24
+ # assert_nothing_raised { @doc.named_entity(:stanford) }
25
+ # assert_nothing_raised { @doc.named_entity(:abner) }
26
+ end
27
+
28
+ def test_key_sentences
29
+ topics = @doc.topic_words(:lda)
30
+ assert_nothing_raised { @doc.key_sentences(:topics_frequency, topics) }
31
+ end
32
+
33
+ def test_topics
34
+ assert_nothing_raised { @doc.topics(:reuters) }
35
+ end
36
+
37
+ def test_statistics
38
+ @doc.chunk.segment(:tactful).tokenize
39
+
40
+ assert_nothing_raised { @doc.statistics(:frequency_of, value: 'the') }
41
+ assert_nothing_raised { @word.statistics(:frequency) }
42
+ # assert_nothing_raised { @doc.statistics(:position_in) }
43
+ # assert_nothing_raised { @doc.statistics(:transition_matrix) }
44
+ # assert_nothing_raised { @doc.statistics(:transition_probability) }
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,46 @@
1
+ module Treat
2
+ module Tests
3
+ class TestFormatters < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @doc = Treat::Tests::EnglishShortDoc
7
+ @html_doc = Treat::Tests::EnglishHtmlDoc
8
+ @sentence = Treat::Tests::EnglishSentence
9
+ end
10
+
11
+ def test_readers
12
+ # How should we test this?
13
+ end
14
+
15
+
16
+ def test_serializers_and_unserializers
17
+ create_temp_file('yml') do |tmp|
18
+ @doc.serialize(:yaml).save(tmp)
19
+ doc = Treat::Entities::Document(tmp)
20
+ assert_equal File.read(tmp).length,
21
+ doc.serialize(:yaml).length
22
+ end
23
+ create_temp_file('xml') do |tmp|
24
+ @doc.serialize(:xml).save(tmp)
25
+ doc = Treat::Entities::Document(tmp)
26
+ assert_equal File.read(tmp).length,
27
+ doc.serialize(:xml).length
28
+ end
29
+ end
30
+
31
+ def test_visualizers
32
+ assert_nothing_raised { @doc.visualize(:tree) }
33
+ # assert_nothing_raised { @doc.visualize(:html) }
34
+ assert_nothing_raised { @doc.visualize(:dot) }
35
+ assert_nothing_raised { @doc.visualize(:inspect) }
36
+ assert_nothing_raised { @doc.visualize(:short_value) }
37
+ assert_nothing_raised { @sentence.visualize(:standoff) }
38
+ end
39
+
40
+ def test_cleaners
41
+ assert_nothing_raised { @html_doc.clean(:html) }
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,39 @@
1
+ module Treat
2
+ module Tests
3
+ class TestInflectors < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @word = Treat::Tests::EnglishWord
7
+ @number = Treat::Tests::Number
8
+ @verb = Treat::Tests::EnglishVerb
9
+ @noun = Treat::Tests::EnglishNoun
10
+ end
11
+
12
+ def test_lemmatizers
13
+ # Not implemented yet.
14
+ end
15
+
16
+ def test_stemmers
17
+ assert_equal 'run', @word.stem(:porter)
18
+ assert_equal 'run', @word.stem(:porter_c)
19
+ assert_equal 'run', @word.stem(:uea)
20
+ end
21
+ end
22
+
23
+ def test_conjugators
24
+ assert_equal 'running', @verb.present_participle
25
+ assert_equal 'run', @verb.infinitive
26
+ assert_equal 'run', @verb.plural
27
+ end
28
+
29
+ def test_declensors
30
+ assert_equal 'geese', @noun.plural
31
+ end
32
+
33
+ def test_ordinal_and_cardinal_words
34
+ assert_equal 'twenty', @number.cardinal_words
35
+ assert_equal 'twentieth', @number.ordinal_words
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,39 @@
1
+ module Treat
2
+ module Tests
3
+ class TestLexicalizers < Test::Unit::TestCase
4
+
5
+ def setup
6
+ @word = Treat::Tests::EnglishWord
7
+ @sentence = Treat::Tests::EnglishSentence.parse
8
+ end
9
+
10
+ def test_category
11
+ assert_equal :verb, @word.category(:from_tag)
12
+ end
13
+
14
+ def test_synsets
15
+ # assert_nothing_raised { @word.synsets(:rita_wn) }
16
+ assert_nothing_raised { @word.synsets(:wordnet) }
17
+ assert_nothing_raised { @word.synonyms(:wordnet) }
18
+ assert_nothing_raised { @word.antonyms(:wordnet) }
19
+ assert_nothing_raised { @word.hyponyms(:wordnet) }
20
+ assert_nothing_raised { @word.hypernyms(:wordnet) }
21
+ end
22
+
23
+ def test_linkages
24
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :main_verb) }
25
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :subject) }
26
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :object) }
27
+ assert_nothing_raised { @sentence.linkages(:naive, :linkage => :patient) }
28
+ end
29
+
30
+ def test_taggers
31
+ assert_nothing_raised { @word.tag(:brill) }
32
+ assert_nothing_raised { @word.tag(:lingua) }
33
+ assert_nothing_raised { @word.tag(:stanford) }
34
+ end
35
+
36
+ end
37
+
38
+ end
39
+ end