treat 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (81) hide show
  1. data/INSTALL +1 -0
  2. data/README +3 -0
  3. data/TODO +14 -26
  4. data/bin/INFO +1 -1
  5. data/lib/treat/buildable.rb +10 -11
  6. data/lib/treat/categories.rb +8 -6
  7. data/lib/treat/category.rb +7 -2
  8. data/lib/treat/delegatable.rb +64 -56
  9. data/lib/treat/detectors/encoding/r_chardet19.rb +1 -1
  10. data/lib/treat/detectors/language/language_detector.rb +2 -1
  11. data/lib/treat/detectors/language/what_language.rb +2 -2
  12. data/lib/treat/detectors.rb +3 -0
  13. data/lib/treat/entities/entity.rb +1 -1
  14. data/lib/treat/entities.rb +9 -10
  15. data/lib/treat/exception.rb +3 -1
  16. data/lib/treat/extractors/named_entity/abner.rb +1 -1
  17. data/lib/treat/extractors/named_entity/stanford.rb +2 -2
  18. data/lib/treat/extractors/time/chronic.rb +2 -2
  19. data/lib/treat/extractors/time/nickel.rb +2 -2
  20. data/lib/treat/extractors/topic_words/lda.rb +2 -2
  21. data/lib/treat/extractors.rb +12 -9
  22. data/lib/treat/feature.rb +6 -1
  23. data/lib/treat/formatters/cleaners/html.rb +1 -1
  24. data/lib/treat/formatters.rb +8 -8
  25. data/lib/treat/group.rb +11 -10
  26. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  27. data/lib/treat/inflectors/{conjugators → conjugations}/linguistics.rb +6 -6
  28. data/lib/treat/inflectors/{declensors → declensions}/en.rb +2 -2
  29. data/lib/treat/inflectors/{declensors → declensions}/linguistics.rb +5 -5
  30. data/lib/treat/inflectors/ordinal_words/linguistics.rb +4 -4
  31. data/lib/treat/inflectors/{stemmers → stem}/porter.rb +1 -1
  32. data/lib/treat/inflectors/{stemmers → stem}/porter_c.rb +3 -3
  33. data/lib/treat/inflectors/{stemmers → stem}/uea.rb +3 -3
  34. data/lib/treat/inflectors.rb +8 -21
  35. data/lib/treat/kernel.rb +120 -0
  36. data/lib/treat/languages/arabic.rb +14 -0
  37. data/lib/treat/languages/categories.rb +5 -0
  38. data/lib/treat/languages/chinese.rb +12 -0
  39. data/lib/treat/languages/english/categories.rb +23 -0
  40. data/lib/treat/{resources → languages/english}/tags.rb +127 -184
  41. data/lib/treat/languages/english.rb +33 -0
  42. data/lib/treat/languages/french.rb +17 -0
  43. data/lib/treat/languages/german.rb +17 -0
  44. data/lib/treat/languages/italian.rb +14 -0
  45. data/lib/treat/{resources/languages.txt → languages/list.txt} +0 -0
  46. data/lib/treat/languages/xinhua.rb +12 -0
  47. data/lib/treat/languages.rb +91 -0
  48. data/lib/treat/lexicalizers/category/from_tag.rb +20 -8
  49. data/lib/treat/lexicalizers/synsets/rita_wn.rb +1 -1
  50. data/lib/treat/lexicalizers/tag/brill.rb +2 -1
  51. data/lib/treat/lexicalizers/tag/lingua.rb +2 -1
  52. data/lib/treat/lexicalizers/tag/stanford.rb +16 -15
  53. data/lib/treat/lexicalizers.rb +1 -1
  54. data/lib/treat/object.rb +6 -0
  55. data/lib/treat/processors/parsers/enju.rb +3 -2
  56. data/lib/treat/processors/parsers/stanford.rb +15 -12
  57. data/lib/treat/processors/segmenters/punkt.rb +1 -1
  58. data/lib/treat/processors/segmenters/stanford.rb +7 -5
  59. data/lib/treat/processors/segmenters/tactful.rb +1 -1
  60. data/lib/treat/processors/tokenizers/multilingual.rb +2 -2
  61. data/lib/treat/processors/tokenizers/stanford.rb +7 -5
  62. data/lib/treat/visitable.rb +2 -1
  63. data/lib/treat.rb +105 -54
  64. data/test/tc_entity.rb +5 -0
  65. data/test/tc_resources.rb +5 -5
  66. data/test/tc_treat.rb +1 -2
  67. data/test/tests.rb +2 -1
  68. metadata +63 -64
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +0 -96
  70. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +0 -213
  71. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +0 -68
  72. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +0 -6
  73. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +0 -12
  74. data/lib/treat/resources/categories.rb +0 -18
  75. data/lib/treat/resources/delegates.rb +0 -96
  76. data/lib/treat/resources/dependencies.rb +0 -0
  77. data/lib/treat/resources/edges.rb +0 -8
  78. data/lib/treat/resources/formats.rb +0 -23
  79. data/lib/treat/resources/languages.rb +0 -86
  80. data/lib/treat/resources.rb +0 -10
  81. data/lib/treat/utilities.rb +0 -127
@@ -3,17 +3,15 @@ module Treat
3
3
  module Tag
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
- jar = "#{Treat.bin}/stanford_tagger/stanford-postagger.jar"
9
- unless File.readable?(jar)
10
- raise "Could not find stanford tagger JAR file in #{jar}."+
11
- " You may need to set Treat.bin to a custom value."
8
+ jar = "#{Treat.bin}/stanford-tagger*/stanford-postagger*.jar"
9
+ jars = Dir.glob(jar)
10
+ if jars.empty? || !File.readable?(jars[0])
11
+ raise "Could not find stanford tagger JAR file (looking in #{jar})."+
12
+ " You may need to manually download the JAR files and/or set Treat.bin."
12
13
  end
13
- Rjb::load(
14
- "#{Treat.bin}/stanford_tagger/stanford-postagger.jar",
15
- ['-Xms256M', '-Xmx512M']
16
- )
14
+ Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
17
15
  MaxentTagger = ::Rjb::import('edu.stanford.nlp.tagger.maxent.MaxentTagger')
18
16
  Word = ::Rjb::import('edu.stanford.nlp.ling.Word')
19
17
  List = ::Rjb::import('java.util.ArrayList')
@@ -43,8 +41,8 @@ module Treat
43
41
  else
44
42
  model = LanguageToModel[lang]
45
43
  if model.nil?
46
- raise Treat::Exception "There exists no Stanford" +
47
- "tagger model for language #{lang}."
44
+ raise Treat::Exception, "There exists no Stanford tagger model for " +
45
+ "the #{Treat::Languages.describe(lang)} language ."
48
46
  end
49
47
  end
50
48
  # Reinitialize the tagger if the options have changed.
@@ -53,15 +51,18 @@ module Treat
53
51
  @@taggers[lang] = nil # Reset the tagger
54
52
  end
55
53
  if @@taggers[lang].nil?
56
- model = "#{Treat.bin}/stanford_tagger/models/#{model}"
57
- unless File.readable?(model)
58
- raise "Could not find a tagger model for language #{lang}: looking in #{model}."
54
+ model = "#{Treat.bin}/stanford-tagger*/models/#{model}"
55
+ models = Dir.glob(model)
56
+ if models.empty? || !File.readable?(models[0])
57
+ raise "Could not find a tagger model for the " +
58
+ "#{Treat::Languages.describe(lang)}: looking in #{model}."
59
59
  end
60
60
  silence_streams(STDOUT, STDERR) do
61
61
  @@taggers[lang] =
62
- MaxentTagger.new(model)
62
+ MaxentTagger.new(models[0])
63
63
  end
64
64
  end
65
+ entity.set :tag_set, :penn
65
66
  list = List.new
66
67
  id_list = {}
67
68
  i = 0
@@ -7,7 +7,7 @@ module Treat
7
7
  module Tag
8
8
  extend Group
9
9
  self.type = :annotator
10
- self.targets = [:phrase, :word]
10
+ self.targets = [:word]
11
11
  end
12
12
  module Category
13
13
  extend Group
@@ -0,0 +1,6 @@
1
+ # Make undefining constants publicly available on any object.
2
+ Object.module_eval do
3
+ def self.const_unset(const)
4
+ Object.instance_eval { remove_const(const) }
5
+ end
6
+ end
@@ -55,7 +55,7 @@ module Treat
55
55
  text = entity.to_s + '.'
56
56
  else
57
57
  remove_last = false
58
- text = entity.to_s
58
+ text = entity.to_s.gsub('.', '') + '.' # Fix
59
59
  end
60
60
  stdin.puts(text + "\n")
61
61
  parsed = build(stdout.gets, remove_last)
@@ -120,7 +120,7 @@ module Treat
120
120
  new_attributes[:enju_cat] = value
121
121
  xcat = attributes['xcat'].split(' ')[0]
122
122
  xcat ||= ''
123
- tags = Treat::Resources::Tags::EnjuCatXcatToPTB.select do |m|
123
+ tags = Treat::Languages::English::EnjuCatXcatToPTB.select do |m|
124
124
  m[0] == value && m[1] == xcat
125
125
  end
126
126
  if tags.empty?
@@ -144,6 +144,7 @@ module Treat
144
144
  # Handle naming conventions.
145
145
  if attributes.has_key?('pos')
146
146
  new_attributes[:tag] = new_attributes[:pos]
147
+ new_attributes[:tag_set] = :penn
147
148
  new_attributes.delete :pos
148
149
  end
149
150
  # Create the appropriate entity for the
@@ -3,22 +3,24 @@ module Treat
3
3
  module Parsers
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently { require 'rjb' }
7
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
8
- unless File.readable?(jar)
9
- raise "Could not find stanford parser JAR file in #{jar}."+
10
- " You may need to set Treat.bin to a custom value."
6
+ silence_warnings { require 'rjb' }
7
+ jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
8
+ jars = Dir.glob(jar)
9
+ if jars.empty? || !File.readable?(jars[0])
10
+ raise "Could not find stanford parser JAR file (looking in #{jar})"+
11
+ " You may need to manually download the JAR files and/or set Treat.bin."
11
12
  end
12
- Rjb::load(jar, ['-Xms256M', '-Xmx512M'])
13
+ Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
13
14
  LexicalizedParser = ::Rjb::import('edu.stanford.nlp.parser.lexparser.LexicalizedParser')
14
15
  @@parsers = {}
15
16
  def self.parse(entity, options = {})
16
- lang = Treat::Resources::Languages.describe(entity.language).to_s
17
- pcfg = "#{Treat.bin}/stanford_parser/grammar/#{lang.upcase}PCFG.ser.gz"
18
- unless File.readable?(pcfg)
19
- raise "Could not find a language model for #{lang}: looking in #{pcfg}."
17
+ lang = Treat::Languages.describe(entity.language).to_s.upcase
18
+ pcfg = "#{Treat.bin}/stanford-parser*/grammar/#{lang.upcase}PCFG.ser.gz"
19
+ pcfgs = Dir.glob(pcfg)
20
+ if pcfgs.empty? || !File.readable?(pcfgs[0])
21
+ raise "Could not find a language model for #{lang.downcase} (looking in #{pcfg})."
20
22
  end
21
- @@parsers[lang] ||= LexicalizedParser.new(pcfg) # Fix - check that exists.
23
+ @@parsers[lang] ||= LexicalizedParser.new(pcfgs[0])
22
24
  parse = @@parsers[lang].apply(entity.to_s)
23
25
  entity.remove_all!
24
26
  recurse(parse, entity)
@@ -41,12 +43,13 @@ module Treat
41
43
  return recurse(java_node.children[0], ruby_node)
42
44
  end
43
45
  java_node.children.each do |java_child|
44
- dependencies = java_child.dependencies.iterator
46
+ # dependencies = java_child.dependencies.iterator
45
47
  # while dependencies.has_next
46
48
  #dependency = dependencies.next
47
49
  # end
48
50
  ruby_child = Treat::Entities::Phrase.new
49
51
  ruby_child.set :tag, java_child.value
52
+ ruby_child.set :tag_set, :penn
50
53
  ruby_node << ruby_child
51
54
  unless java_child.children.empty?
52
55
  recurse(java_child, ruby_child)
@@ -9,7 +9,7 @@ module Treat
9
9
  # Unsupervised Multilingual Sentence Boundary Detection.
10
10
  # Computational Linguistics 32: 485-525.
11
11
  class Punkt
12
- silently { require 'punkt-segmenter' }
12
+ silence_warnings { require 'punkt-segmenter' }
13
13
  # Hold one copy of the segmenter per language.
14
14
  @@segmenters = {}
15
15
  # Hold only one trainer per language.
@@ -3,13 +3,15 @@ module Treat
3
3
  module Segmenters
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
9
- unless File.readable?(jar)
10
- raise "Could not find stanford parser JAR file in #{jar}."+
11
- " You may need to set Treat.bin to a custom value."
8
+ jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
9
+ jars = Dir.glob(jar)
10
+ if jars.empty? || !File.readable?(jars[0])
11
+ raise "Could not find stanford parser JAR file (lookin in #{jar})."+
12
+ " You may need to manually download the JAR files and/or set Treat.bin."
12
13
  end
14
+ ::Rjb::load(jars[0])
13
15
  DocumentPreprocessor =
14
16
  ::Rjb::import('edu.stanford.nlp.process.DocumentPreprocessor')
15
17
  StringReader = ::Rjb::import('java.io.StringReader')
@@ -11,7 +11,7 @@ module Treat
11
11
  # Project website:
12
12
  class Tactful
13
13
  # Require the 'tactful_tokenizer' gem.
14
- silently { require 'tactful_tokenizer' }
14
+ silence_warnings { require 'tactful_tokenizer' }
15
15
  # Somewhere in the depths of the code this is defined...
16
16
  String.class_eval { undef :tokenize }
17
17
  # Keep only one copy of the segmenter.
@@ -8,13 +8,13 @@ module Treat
8
8
  # Hold one tokenizer per language.
9
9
  @@tokenizers = {}
10
10
  # Require the 'tokenizer' gem.
11
- silently { require 'tokenizer' }
11
+ silence_warnings { require 'tokenizer' }
12
12
  # Perform the tokenization of English, German or French text.
13
13
  # Options:
14
14
  # :language => (Symbol) Force a language for the tokenizer.
15
15
  def self.tokenize(entity, options = {})
16
16
  lang = options[:language] ? options[:language] : entity.language
17
- lang = Treat::Resources::Languages.find(lang, 1)
17
+ lang = Treat::Languages.find(lang, 1)
18
18
  if @@tokenizers[lang].nil?
19
19
  @@tokenizers[lang] = ::Tokenizer::Tokenizer.new(lang)
20
20
  end
@@ -3,14 +3,16 @@ module Treat
3
3
  module Tokenizers
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
8
  # Load the Stanford Parser Java files.
9
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
10
- unless File.readable?(jar)
11
- raise "Could not find stanford parser JAR file in #{jar}."+
12
- " You may need to set Treat.bin to a custom value."
9
+ jar = "#{Treat.bin}/stanford-parser/stanford-parser.jar"
10
+ jars = Dir.glob(jar)
11
+ if jars.empty? || !File.readable?(jars[0])
12
+ raise "Could not find stanford parser JAR file (looking in #{jar})."+
13
+ " You may need to manually download the JAR files and/or set Treat.bin."
13
14
  end
15
+ ::Rjb::load(jars[0])
14
16
  # Load the Stanford Parser classes.
15
17
  PTBTokenizer = ::Rjb::import('edu.stanford.nlp.process.PTBTokenizer')
16
18
  CoreLabelTokenFactory = ::Rjb::import('edu.stanford.nlp.process.CoreLabelTokenFactory')
@@ -20,7 +20,8 @@ module Treat
20
20
  return klass.send(method, self, options)
21
21
  end
22
22
  else
23
- raise "This type of visitor cannot visit a #{self.class}."
23
+ raise NAT::Exception,
24
+ "This type of visitor cannot visit a #{self.class}."
24
25
  end
25
26
  end
26
27
  end
data/lib/treat.rb CHANGED
@@ -1,58 +1,93 @@
1
- # This file requires all source code files for the Treat module.
2
-
3
1
  #
4
- # Main Treat namespace.
2
+ # Main namespace for Treat modules.
5
3
  #
6
- # Textual model:
4
+ # 1. Entities
7
5
  #
8
- # - Tree - Contains abstract tree node and leaf structures.
9
- # - Entities - Contains concrtypee node and leaf structures
10
- # that represent textual entities.
6
+ # Entities are Tree structures that represent any textual
7
+ # entity (from a collection of texts down to an individual
8
+ # word) with a value, features, children and edges linking
9
+ # it to other textual entities. Sugar provides syntactic sugar
10
+ # for Entities and can be enabled by running Treat.edulcorate.
11
+ #
12
+ # Here are some example of how to create entities:
11
13
  #
12
- # Algorithm namespaces:
14
+ # c = Collection 'folder_with_documents'
15
+ # d = Document 'filename.txt' # (or PDF, html, xml, png, jpg, gif).
16
+ # p = Paragraph 'A short story. The end.'
17
+ # s = Sentence 'That is not a sentence.'
18
+ # w = Word 'fox'
19
+ #
20
+ # Here's a full list of entities (subtypes in parentheses):
21
+ # Collection, Document, Zone (Section, Title, Paragraph or List),
22
+ # Sentence, Constituent (Phrase or Clause), Token (Word, Number,
23
+ # Symbol or Punctuation).
24
+ #
25
+ # 2. Proxies
26
+ #
27
+ # Proxies allow the Treat functions to be called on the core
28
+ # Ruby classes String, Numeric and Array. They build the entity
29
+ # corresponding to the supplied raw text and send the requested
30
+ # function to it.
31
+ #
32
+ # For example,
13
33
  #
14
- # - Dtypeectors - Namespace for language, encoding, and format
15
- # detectors.
16
- # - Extractors - Namespace for algorithms that extract
17
- # information from entities.
18
- # - Formatters - Namespace for algorithms that handle
19
- # conversion to and from different formats.
20
- # - Inflectors - Namespace for algorithms that supply
21
- # the base form, inflections and declensions of a word.
22
- # - Lexicalizers - Namespace for algorithms that supply
23
- # lexical information about a word (part of speech,
24
- # synstypes, klass.)
25
- # - Processors - Namespace for algorithms that process an
26
- # entity into a tree of sub-entities.
34
+ # 'fox'.tag
35
+ #
36
+ # Is equivalent to:
37
+ #
38
+ # w = Word 'fox'
39
+ # w.tag
40
+ #
41
+ # 3. Functions
27
42
  #
28
- # Other modules:
43
+ # A class is defined for each implemented algorithm performing a given
44
+ # task. These classes are clustered into groups of algorithms performing
45
+ # the same given task (Group), and the groups are clustered into Categories
46
+ # of groups performing related tasks.
29
47
  #
30
- # - Group - Creates functions for algorithm groups.
31
- # - Proxies - Provide proxies for Treat functions on String,
32
- # Numeric and Array classes.
33
- # - Utilities - Supply utility functions used across the library.
48
+ # Here are the different Categories:
49
+ #
50
+ # - Detectors - Category for language, encoding, and format
51
+ # detectors.
52
+ # - Extractors - Category for algorithms that extract information
53
+ # from entities.
54
+ # - Formatters - Category for algorithms that handle conversion
55
+ # to and from different formats.
56
+ # - Inflectors - Category for algorithms that supply the base
57
+ # form, inflections and declensions of a word.
58
+ # - Lexicalizers - Category for algorithms that supply lexical
59
+ # information about a word (part of speech, synsets, word categories).
60
+ # - Processors - Namespace for algorithms that process collections and
61
+ # documents into trees.
62
+ #
63
+ # 3. Linguistic resources
64
+ #
65
+ # The Languages module contains linguistic information about
66
+ # languages (full ISO-639-1 and 2 language list, tag alignments
67
+ # for three treebanks, word categories, etc.)
68
+ #
69
+ # 4. Mixins for entities.
70
+ #
71
+ # Buildable, Delegatable, Visitable and Registrable are
72
+ # or extended by Entity and provide it with the ability to be built,
73
+ # to delegate function calls, to accept visitors and to maintain a
74
+ # token registry, respectively.
34
75
  #
76
+ # 5. Exception
77
+ #
78
+ # Exception defines a custom exception for the Treat module.
79
+ #
35
80
  module Treat
36
81
 
37
82
  # Make sure that we are running on Ruby 1.9 or higher.
38
83
  if RUBY_VERSION <= '1.9'
39
84
  raise 'Treat requires Ruby 1.9 or higher.'
40
85
  end
41
-
86
+
42
87
  # The current version of Treat.
43
- VERSION = "0.1.1"
88
+ VERSION = "0.1.2"
44
89
 
45
- # Require all files for the Treat library.
46
- require 'treat/exception'
47
- require 'treat/utilities'
48
- require 'treat/resources'
49
- require 'treat/entities'
50
- require 'treat/categories'
51
- require 'treat/proxies'
52
-
53
- # Provides syntactic sugar.
54
- require 'treat/sugar'
55
- extend Sugar
90
+ # $LOAD_PATH << '/ruby/treat/lib/' # Remove for release
56
91
 
57
92
  # Create class variables for the Treat module.
58
93
  class << self
@@ -67,25 +102,41 @@ module Treat
67
102
  attr_accessor :language_detection_level
68
103
  # String - main folder for executable files.
69
104
  attr_accessor :bin
105
+ # String - folder of this file.
106
+ attr_accessor :lib
107
+ # String - folder for tests.
108
+ attr_accessor :test
109
+ # String - folder for temp files.
110
+ attr_accessor :tmp
70
111
  end
71
-
72
- # Folder paths.
73
- @@lib = File.dirname(__FILE__)
74
- @@test = @@lib + '/../test/'
75
- @@tmp = @@lib + '/../tmp/'
76
- @@bin = @@lib + '/../bin'
77
- def self.lib; @@lib; end
78
- def self.test; @@test; end
79
- def self.tmp; @@tmp; end
80
-
81
- # Stype the default language to english.
112
+
113
+ # Set the default language to english.
82
114
  self.default_language = :eng
83
- # Stype the default encoding to utf-8.
115
+ # Set the default encoding to utf-8.
84
116
  self.default_encoding = :utf_8
85
117
  # Turn language detection off by default.
86
118
  self.detect_language = false
87
- # Dtypeect the language once per text by default.
119
+ # Detect the language once per text by default.
88
120
  self.language_detection_level = :text
89
- # Stype the bin path to the gem's bin folder by default.
90
- self.bin = @@bin
121
+ # Set the lib path to that of this file.
122
+ self.lib = File.dirname(__FILE__)
123
+ # Set the paths to the bin, test and tmp folders.
124
+ self.bin = self.lib + '/../bin/'
125
+ self.test = self.lib + '/../test/'
126
+ self.tmp = self.lib + '/../tmp/'
127
+
128
+ # Require modified core classes.
129
+ require 'treat/object'
130
+ require 'treat/kernel'
131
+
132
+ # Require all files for the Treat library.
133
+ require 'treat/exception'
134
+ require 'treat/languages'
135
+ require 'treat/entities'
136
+ require 'treat/categories'
137
+ require 'treat/proxies'
138
+ require 'treat/sugar'
139
+
140
+ extend Sugar
141
+
91
142
  end
data/test/tc_entity.rb CHANGED
@@ -16,18 +16,23 @@ module Treat
16
16
  @det = Treat::Entities::Word.new('The')
17
17
  @det.set :cat, :determiner
18
18
  @det.set :tag, 'DT'
19
+ @det.set :tag_set, :penn
19
20
  @adj = Treat::Entities::Word.new('lazy')
20
21
  @adj.set :cat, :adjective
21
22
  @adj.set :tag, 'JJ'
23
+ @adj.set :tag_set, :penn
22
24
  @noun = Treat::Entities::Word.new('fox')
23
25
  @noun.set :cat, :noun
24
26
  @noun.set :tag, 'NN'
27
+ @noun.set :tag_set, :penn
25
28
  @aux = Treat::Entities::Word.new('is')
26
29
  @aux.set :cat, :verb
27
30
  @aux.set :tag, 'VBZ'
31
+ @aux.set :tag_set, :penn
28
32
  @verb = Treat::Entities::Word.new('running')
29
33
  @verb.set :cat, :verb
30
34
  @verb.set :tag, 'VBG'
35
+ @verb.set :tag_set, :penn
31
36
  @dot = Treat::Entities::Punctuation.new('.')
32
37
 
33
38
  @text << @sentence << [@noun_phrase, @verb_phrase, @dot]
data/test/tc_resources.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  module Treat
2
2
  module Tests
3
- class TestResources < Test::Unit::TestCase
3
+ class TestLanguages < Test::Unit::TestCase
4
4
 
5
5
  def test_languages
6
- assert_equal :eng, Treat::Resources::Languages.find(:english, 2)
7
- assert_equal :en, Treat::Resources::Languages.find(:english, 1)
8
- assert_equal :english, Treat::Resources::Languages.describe(:eng)
9
- assert_equal :english, Treat::Resources::Languages.describe(:en)
6
+ assert_equal :eng, Treat::Languages.find(:english, 2)
7
+ assert_equal :en, Treat::Languages.find(:english, 1)
8
+ assert_equal :english, Treat::Languages.describe(:eng)
9
+ assert_equal :english, Treat::Languages.describe(:en)
10
10
  end
11
11
 
12
12
  def test_tags
data/test/tc_treat.rb CHANGED
@@ -29,8 +29,7 @@ module Treat
29
29
 
30
30
  def test_modules_loaded?
31
31
  ['exception',
32
- 'utilities',
33
- 'resources',
32
+ 'languages',
34
33
  'entities',
35
34
  'feature',
36
35
  'category',
data/test/tests.rb CHANGED
@@ -1,11 +1,12 @@
1
1
  require 'test/unit'
2
2
 
3
- # $LOAD_PATH << '/ruby/treat/test/' # Remove for production
4
3
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
5
4
 
6
5
  require 'treat'
7
6
  require 'texts'
8
7
 
8
+ # Treat.bin = '/ruby/nat/bin' # Remove for release
9
+
9
10
  require 'tc_treat'
10
11
  require 'tc_tree'
11
12
  require 'tc_entity'