treat 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (81) hide show
  1. data/INSTALL +1 -0
  2. data/README +3 -0
  3. data/TODO +14 -26
  4. data/bin/INFO +1 -1
  5. data/lib/treat/buildable.rb +10 -11
  6. data/lib/treat/categories.rb +8 -6
  7. data/lib/treat/category.rb +7 -2
  8. data/lib/treat/delegatable.rb +64 -56
  9. data/lib/treat/detectors/encoding/r_chardet19.rb +1 -1
  10. data/lib/treat/detectors/language/language_detector.rb +2 -1
  11. data/lib/treat/detectors/language/what_language.rb +2 -2
  12. data/lib/treat/detectors.rb +3 -0
  13. data/lib/treat/entities/entity.rb +1 -1
  14. data/lib/treat/entities.rb +9 -10
  15. data/lib/treat/exception.rb +3 -1
  16. data/lib/treat/extractors/named_entity/abner.rb +1 -1
  17. data/lib/treat/extractors/named_entity/stanford.rb +2 -2
  18. data/lib/treat/extractors/time/chronic.rb +2 -2
  19. data/lib/treat/extractors/time/nickel.rb +2 -2
  20. data/lib/treat/extractors/topic_words/lda.rb +2 -2
  21. data/lib/treat/extractors.rb +12 -9
  22. data/lib/treat/feature.rb +6 -1
  23. data/lib/treat/formatters/cleaners/html.rb +1 -1
  24. data/lib/treat/formatters.rb +8 -8
  25. data/lib/treat/group.rb +11 -10
  26. data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
  27. data/lib/treat/inflectors/{conjugators → conjugations}/linguistics.rb +6 -6
  28. data/lib/treat/inflectors/{declensors → declensions}/en.rb +2 -2
  29. data/lib/treat/inflectors/{declensors → declensions}/linguistics.rb +5 -5
  30. data/lib/treat/inflectors/ordinal_words/linguistics.rb +4 -4
  31. data/lib/treat/inflectors/{stemmers → stem}/porter.rb +1 -1
  32. data/lib/treat/inflectors/{stemmers → stem}/porter_c.rb +3 -3
  33. data/lib/treat/inflectors/{stemmers → stem}/uea.rb +3 -3
  34. data/lib/treat/inflectors.rb +8 -21
  35. data/lib/treat/kernel.rb +120 -0
  36. data/lib/treat/languages/arabic.rb +14 -0
  37. data/lib/treat/languages/categories.rb +5 -0
  38. data/lib/treat/languages/chinese.rb +12 -0
  39. data/lib/treat/languages/english/categories.rb +23 -0
  40. data/lib/treat/{resources → languages/english}/tags.rb +127 -184
  41. data/lib/treat/languages/english.rb +33 -0
  42. data/lib/treat/languages/french.rb +17 -0
  43. data/lib/treat/languages/german.rb +17 -0
  44. data/lib/treat/languages/italian.rb +14 -0
  45. data/lib/treat/{resources/languages.txt → languages/list.txt} +0 -0
  46. data/lib/treat/languages/xinhua.rb +12 -0
  47. data/lib/treat/languages.rb +91 -0
  48. data/lib/treat/lexicalizers/category/from_tag.rb +20 -8
  49. data/lib/treat/lexicalizers/synsets/rita_wn.rb +1 -1
  50. data/lib/treat/lexicalizers/tag/brill.rb +2 -1
  51. data/lib/treat/lexicalizers/tag/lingua.rb +2 -1
  52. data/lib/treat/lexicalizers/tag/stanford.rb +16 -15
  53. data/lib/treat/lexicalizers.rb +1 -1
  54. data/lib/treat/object.rb +6 -0
  55. data/lib/treat/processors/parsers/enju.rb +3 -2
  56. data/lib/treat/processors/parsers/stanford.rb +15 -12
  57. data/lib/treat/processors/segmenters/punkt.rb +1 -1
  58. data/lib/treat/processors/segmenters/stanford.rb +7 -5
  59. data/lib/treat/processors/segmenters/tactful.rb +1 -1
  60. data/lib/treat/processors/tokenizers/multilingual.rb +2 -2
  61. data/lib/treat/processors/tokenizers/stanford.rb +7 -5
  62. data/lib/treat/visitable.rb +2 -1
  63. data/lib/treat.rb +105 -54
  64. data/test/tc_entity.rb +5 -0
  65. data/test/tc_resources.rb +5 -5
  66. data/test/tc_treat.rb +1 -2
  67. data/test/tests.rb +2 -1
  68. metadata +63 -64
  69. data/lib/treat/formatters/serializers/yaml/helper.rb +0 -96
  70. data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +0 -213
  71. data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +0 -68
  72. data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +0 -6
  73. data/lib/treat/inflectors/lemmatizers/e_lemma.rb +0 -12
  74. data/lib/treat/resources/categories.rb +0 -18
  75. data/lib/treat/resources/delegates.rb +0 -96
  76. data/lib/treat/resources/dependencies.rb +0 -0
  77. data/lib/treat/resources/edges.rb +0 -8
  78. data/lib/treat/resources/formats.rb +0 -23
  79. data/lib/treat/resources/languages.rb +0 -86
  80. data/lib/treat/resources.rb +0 -10
  81. data/lib/treat/utilities.rb +0 -127
@@ -3,17 +3,15 @@ module Treat
3
3
  module Tag
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
- jar = "#{Treat.bin}/stanford_tagger/stanford-postagger.jar"
9
- unless File.readable?(jar)
10
- raise "Could not find stanford tagger JAR file in #{jar}."+
11
- " You may need to set Treat.bin to a custom value."
8
+ jar = "#{Treat.bin}/stanford-tagger*/stanford-postagger*.jar"
9
+ jars = Dir.glob(jar)
10
+ if jars.empty? || !File.readable?(jars[0])
11
+ raise "Could not find stanford tagger JAR file (looking in #{jar})."+
12
+ " You may need to manually download the JAR files and/or set Treat.bin."
12
13
  end
13
- Rjb::load(
14
- "#{Treat.bin}/stanford_tagger/stanford-postagger.jar",
15
- ['-Xms256M', '-Xmx512M']
16
- )
14
+ Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
17
15
  MaxentTagger = ::Rjb::import('edu.stanford.nlp.tagger.maxent.MaxentTagger')
18
16
  Word = ::Rjb::import('edu.stanford.nlp.ling.Word')
19
17
  List = ::Rjb::import('java.util.ArrayList')
@@ -43,8 +41,8 @@ module Treat
43
41
  else
44
42
  model = LanguageToModel[lang]
45
43
  if model.nil?
46
- raise Treat::Exception "There exists no Stanford" +
47
- "tagger model for language #{lang}."
44
+ raise Treat::Exception, "There exists no Stanford tagger model for " +
45
+ "the #{Treat::Languages.describe(lang)} language ."
48
46
  end
49
47
  end
50
48
  # Reinitialize the tagger if the options have changed.
@@ -53,15 +51,18 @@ module Treat
53
51
  @@taggers[lang] = nil # Reset the tagger
54
52
  end
55
53
  if @@taggers[lang].nil?
56
- model = "#{Treat.bin}/stanford_tagger/models/#{model}"
57
- unless File.readable?(model)
58
- raise "Could not find a tagger model for language #{lang}: looking in #{model}."
54
+ model = "#{Treat.bin}/stanford-tagger*/models/#{model}"
55
+ models = Dir.glob(model)
56
+ if models.empty? || !File.readable?(models[0])
57
+ raise "Could not find a tagger model for the " +
58
+ "#{Treat::Languages.describe(lang)}: looking in #{model}."
59
59
  end
60
60
  silence_streams(STDOUT, STDERR) do
61
61
  @@taggers[lang] =
62
- MaxentTagger.new(model)
62
+ MaxentTagger.new(models[0])
63
63
  end
64
64
  end
65
+ entity.set :tag_set, :penn
65
66
  list = List.new
66
67
  id_list = {}
67
68
  i = 0
@@ -7,7 +7,7 @@ module Treat
7
7
  module Tag
8
8
  extend Group
9
9
  self.type = :annotator
10
- self.targets = [:phrase, :word]
10
+ self.targets = [:word]
11
11
  end
12
12
  module Category
13
13
  extend Group
@@ -0,0 +1,6 @@
1
+ # Make undefining constants publicly available on any object.
2
+ Object.module_eval do
3
+ def self.const_unset(const)
4
+ Object.instance_eval { remove_const(const) }
5
+ end
6
+ end
@@ -55,7 +55,7 @@ module Treat
55
55
  text = entity.to_s + '.'
56
56
  else
57
57
  remove_last = false
58
- text = entity.to_s
58
+ text = entity.to_s.gsub('.', '') + '.' # Fix
59
59
  end
60
60
  stdin.puts(text + "\n")
61
61
  parsed = build(stdout.gets, remove_last)
@@ -120,7 +120,7 @@ module Treat
120
120
  new_attributes[:enju_cat] = value
121
121
  xcat = attributes['xcat'].split(' ')[0]
122
122
  xcat ||= ''
123
- tags = Treat::Resources::Tags::EnjuCatXcatToPTB.select do |m|
123
+ tags = Treat::Languages::English::EnjuCatXcatToPTB.select do |m|
124
124
  m[0] == value && m[1] == xcat
125
125
  end
126
126
  if tags.empty?
@@ -144,6 +144,7 @@ module Treat
144
144
  # Handle naming conventions.
145
145
  if attributes.has_key?('pos')
146
146
  new_attributes[:tag] = new_attributes[:pos]
147
+ new_attributes[:tag_set] = :penn
147
148
  new_attributes.delete :pos
148
149
  end
149
150
  # Create the appropriate entity for the
@@ -3,22 +3,24 @@ module Treat
3
3
  module Parsers
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently { require 'rjb' }
7
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
8
- unless File.readable?(jar)
9
- raise "Could not find stanford parser JAR file in #{jar}."+
10
- " You may need to set Treat.bin to a custom value."
6
+ silence_warnings { require 'rjb' }
7
+ jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
8
+ jars = Dir.glob(jar)
9
+ if jars.empty? || !File.readable?(jars[0])
10
+ raise "Could not find stanford parser JAR file (looking in #{jar})"+
11
+ " You may need to manually download the JAR files and/or set Treat.bin."
11
12
  end
12
- Rjb::load(jar, ['-Xms256M', '-Xmx512M'])
13
+ Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
13
14
  LexicalizedParser = ::Rjb::import('edu.stanford.nlp.parser.lexparser.LexicalizedParser')
14
15
  @@parsers = {}
15
16
  def self.parse(entity, options = {})
16
- lang = Treat::Resources::Languages.describe(entity.language).to_s
17
- pcfg = "#{Treat.bin}/stanford_parser/grammar/#{lang.upcase}PCFG.ser.gz"
18
- unless File.readable?(pcfg)
19
- raise "Could not find a language model for #{lang}: looking in #{pcfg}."
17
+ lang = Treat::Languages.describe(entity.language).to_s.upcase
18
+ pcfg = "#{Treat.bin}/stanford-parser*/grammar/#{lang.upcase}PCFG.ser.gz"
19
+ pcfgs = Dir.glob(pcfg)
20
+ if pcfgs.empty? || !File.readable?(pcfgs[0])
21
+ raise "Could not find a language model for #{lang.downcase} (looking in #{pcfg})."
20
22
  end
21
- @@parsers[lang] ||= LexicalizedParser.new(pcfg) # Fix - check that exists.
23
+ @@parsers[lang] ||= LexicalizedParser.new(pcfgs[0])
22
24
  parse = @@parsers[lang].apply(entity.to_s)
23
25
  entity.remove_all!
24
26
  recurse(parse, entity)
@@ -41,12 +43,13 @@ module Treat
41
43
  return recurse(java_node.children[0], ruby_node)
42
44
  end
43
45
  java_node.children.each do |java_child|
44
- dependencies = java_child.dependencies.iterator
46
+ # dependencies = java_child.dependencies.iterator
45
47
  # while dependencies.has_next
46
48
  #dependency = dependencies.next
47
49
  # end
48
50
  ruby_child = Treat::Entities::Phrase.new
49
51
  ruby_child.set :tag, java_child.value
52
+ ruby_child.set :tag_set, :penn
50
53
  ruby_node << ruby_child
51
54
  unless java_child.children.empty?
52
55
  recurse(java_child, ruby_child)
@@ -9,7 +9,7 @@ module Treat
9
9
  # Unsupervised Multilingual Sentence Boundary Detection.
10
10
  # Computational Linguistics 32: 485-525.
11
11
  class Punkt
12
- silently { require 'punkt-segmenter' }
12
+ silence_warnings { require 'punkt-segmenter' }
13
13
  # Hold one copy of the segmenter per language.
14
14
  @@segmenters = {}
15
15
  # Hold only one trainer per language.
@@ -3,13 +3,15 @@ module Treat
3
3
  module Segmenters
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
9
- unless File.readable?(jar)
10
- raise "Could not find stanford parser JAR file in #{jar}."+
11
- " You may need to set Treat.bin to a custom value."
8
+ jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
9
+ jars = Dir.glob(jar)
10
+ if jars.empty? || !File.readable?(jars[0])
11
+ raise "Could not find stanford parser JAR file (lookin in #{jar})."+
12
+ " You may need to manually download the JAR files and/or set Treat.bin."
12
13
  end
14
+ ::Rjb::load(jars[0])
13
15
  DocumentPreprocessor =
14
16
  ::Rjb::import('edu.stanford.nlp.process.DocumentPreprocessor')
15
17
  StringReader = ::Rjb::import('java.io.StringReader')
@@ -11,7 +11,7 @@ module Treat
11
11
  # Project website:
12
12
  class Tactful
13
13
  # Require the 'tactful_tokenizer' gem.
14
- silently { require 'tactful_tokenizer' }
14
+ silence_warnings { require 'tactful_tokenizer' }
15
15
  # Somewhere in the depths of the code this is defined...
16
16
  String.class_eval { undef :tokenize }
17
17
  # Keep only one copy of the segmenter.
@@ -8,13 +8,13 @@ module Treat
8
8
  # Hold one tokenizer per language.
9
9
  @@tokenizers = {}
10
10
  # Require the 'tokenizer' gem.
11
- silently { require 'tokenizer' }
11
+ silence_warnings { require 'tokenizer' }
12
12
  # Perform the tokenization of English, German or French text.
13
13
  # Options:
14
14
  # :language => (Symbol) Force a language for the tokenizer.
15
15
  def self.tokenize(entity, options = {})
16
16
  lang = options[:language] ? options[:language] : entity.language
17
- lang = Treat::Resources::Languages.find(lang, 1)
17
+ lang = Treat::Languages.find(lang, 1)
18
18
  if @@tokenizers[lang].nil?
19
19
  @@tokenizers[lang] = ::Tokenizer::Tokenizer.new(lang)
20
20
  end
@@ -3,14 +3,16 @@ module Treat
3
3
  module Tokenizers
4
4
  class Stanford
5
5
  # Require the Ruby-Java bridge.
6
- silently do
6
+ silence_warnings do
7
7
  require 'rjb'
8
8
  # Load the Stanford Parser Java files.
9
- jar = "#{Treat.bin}/stanford_parser/stanford-parser.jar"
10
- unless File.readable?(jar)
11
- raise "Could not find stanford parser JAR file in #{jar}."+
12
- " You may need to set Treat.bin to a custom value."
9
+ jar = "#{Treat.bin}/stanford-parser/stanford-parser.jar"
10
+ jars = Dir.glob(jar)
11
+ if jars.empty? || !File.readable?(jars[0])
12
+ raise "Could not find stanford parser JAR file (looking in #{jar})."+
13
+ " You may need to manually download the JAR files and/or set Treat.bin."
13
14
  end
15
+ ::Rjb::load(jars[0])
14
16
  # Load the Stanford Parser classes.
15
17
  PTBTokenizer = ::Rjb::import('edu.stanford.nlp.process.PTBTokenizer')
16
18
  CoreLabelTokenFactory = ::Rjb::import('edu.stanford.nlp.process.CoreLabelTokenFactory')
@@ -20,7 +20,8 @@ module Treat
20
20
  return klass.send(method, self, options)
21
21
  end
22
22
  else
23
- raise "This type of visitor cannot visit a #{self.class}."
23
+ raise NAT::Exception,
24
+ "This type of visitor cannot visit a #{self.class}."
24
25
  end
25
26
  end
26
27
  end
data/lib/treat.rb CHANGED
@@ -1,58 +1,93 @@
1
- # This file requires all source code files for the Treat module.
2
-
3
1
  #
4
- # Main Treat namespace.
2
+ # Main namespace for Treat modules.
5
3
  #
6
- # Textual model:
4
+ # 1. Entities
7
5
  #
8
- # - Tree - Contains abstract tree node and leaf structures.
9
- # - Entities - Contains concrtypee node and leaf structures
10
- # that represent textual entities.
6
+ # Entities are Tree structures that represent any textual
7
+ # entity (from a collection of texts down to an individual
8
+ # word) with a value, features, children and edges linking
9
+ # it to other textual entities. Sugar provides syntactic sugar
10
+ # for Entities and can be enabled by running Treat.edulcorate.
11
+ #
12
+ # Here are some example of how to create entities:
11
13
  #
12
- # Algorithm namespaces:
14
+ # c = Collection 'folder_with_documents'
15
+ # d = Document 'filename.txt' # (or PDF, html, xml, png, jpg, gif).
16
+ # p = Paragraph 'A short story. The end.'
17
+ # s = Sentence 'That is not a sentence.'
18
+ # w = Word 'fox'
19
+ #
20
+ # Here's a full list of entities (subtypes in parentheses):
21
+ # Collection, Document, Zone (Section, Title, Paragraph or List),
22
+ # Sentence, Constituent (Phrase or Clause), Token (Word, Number,
23
+ # Symbol or Punctuation).
24
+ #
25
+ # 2. Proxies
26
+ #
27
+ # Proxies allow the Treat functions to be called on the core
28
+ # Ruby classes String, Numeric and Array. They build the entity
29
+ # corresponding to the supplied raw text and send the requested
30
+ # function to it.
31
+ #
32
+ # For example,
13
33
  #
14
- # - Dtypeectors - Namespace for language, encoding, and format
15
- # detectors.
16
- # - Extractors - Namespace for algorithms that extract
17
- # information from entities.
18
- # - Formatters - Namespace for algorithms that handle
19
- # conversion to and from different formats.
20
- # - Inflectors - Namespace for algorithms that supply
21
- # the base form, inflections and declensions of a word.
22
- # - Lexicalizers - Namespace for algorithms that supply
23
- # lexical information about a word (part of speech,
24
- # synstypes, klass.)
25
- # - Processors - Namespace for algorithms that process an
26
- # entity into a tree of sub-entities.
34
+ # 'fox'.tag
35
+ #
36
+ # Is equivalent to:
37
+ #
38
+ # w = Word 'fox'
39
+ # w.tag
40
+ #
41
+ # 3. Functions
27
42
  #
28
- # Other modules:
43
+ # A class is defined for each implemented algorithm performing a given
44
+ # task. These classes are clustered into groups of algorithms performing
45
+ # the same given task (Group), and the groups are clustered into Categories
46
+ # of groups performing related tasks.
29
47
  #
30
- # - Group - Creates functions for algorithm groups.
31
- # - Proxies - Provide proxies for Treat functions on String,
32
- # Numeric and Array classes.
33
- # - Utilities - Supply utility functions used across the library.
48
+ # Here are the different Categories:
49
+ #
50
+ # - Detectors - Category for language, encoding, and format
51
+ # detectors.
52
+ # - Extractors - Category for algorithms that extract information
53
+ # from entities.
54
+ # - Formatters - Category for algorithms that handle conversion
55
+ # to and from different formats.
56
+ # - Inflectors - Category for algorithms that supply the base
57
+ # form, inflections and declensions of a word.
58
+ # - Lexicalizers - Category for algorithms that supply lexical
59
+ # information about a word (part of speech, synsets, word categories).
60
+ # - Processors - Namespace for algorithms that process collections and
61
+ # documents into trees.
62
+ #
63
+ # 3. Linguistic resources
64
+ #
65
+ # The Languages module contains linguistic information about
66
+ # languages (full ISO-639-1 and 2 language list, tag alignments
67
+ # for three treebanks, word categories, etc.)
68
+ #
69
+ # 4. Mixins for entities.
70
+ #
71
+ # Buildable, Delegatable, Visitable and Registrable are
72
+ # or extended by Entity and provide it with the ability to be built,
73
+ # to delegate function calls, to accept visitors and to maintain a
74
+ # token registry, respectively.
34
75
  #
76
+ # 5. Exception
77
+ #
78
+ # Exception defines a custom exception for the Treat module.
79
+ #
35
80
  module Treat
36
81
 
37
82
  # Make sure that we are running on Ruby 1.9 or higher.
38
83
  if RUBY_VERSION <= '1.9'
39
84
  raise 'Treat requires Ruby 1.9 or higher.'
40
85
  end
41
-
86
+
42
87
  # The current version of Treat.
43
- VERSION = "0.1.1"
88
+ VERSION = "0.1.2"
44
89
 
45
- # Require all files for the Treat library.
46
- require 'treat/exception'
47
- require 'treat/utilities'
48
- require 'treat/resources'
49
- require 'treat/entities'
50
- require 'treat/categories'
51
- require 'treat/proxies'
52
-
53
- # Provides syntactic sugar.
54
- require 'treat/sugar'
55
- extend Sugar
90
+ # $LOAD_PATH << '/ruby/treat/lib/' # Remove for release
56
91
 
57
92
  # Create class variables for the Treat module.
58
93
  class << self
@@ -67,25 +102,41 @@ module Treat
67
102
  attr_accessor :language_detection_level
68
103
  # String - main folder for executable files.
69
104
  attr_accessor :bin
105
+ # String - folder of this file.
106
+ attr_accessor :lib
107
+ # String - folder for tests.
108
+ attr_accessor :test
109
+ # String - folder for temp files.
110
+ attr_accessor :tmp
70
111
  end
71
-
72
- # Folder paths.
73
- @@lib = File.dirname(__FILE__)
74
- @@test = @@lib + '/../test/'
75
- @@tmp = @@lib + '/../tmp/'
76
- @@bin = @@lib + '/../bin'
77
- def self.lib; @@lib; end
78
- def self.test; @@test; end
79
- def self.tmp; @@tmp; end
80
-
81
- # Stype the default language to english.
112
+
113
+ # Set the default language to english.
82
114
  self.default_language = :eng
83
- # Stype the default encoding to utf-8.
115
+ # Set the default encoding to utf-8.
84
116
  self.default_encoding = :utf_8
85
117
  # Turn language detection off by default.
86
118
  self.detect_language = false
87
- # Dtypeect the language once per text by default.
119
+ # Detect the language once per text by default.
88
120
  self.language_detection_level = :text
89
- # Stype the bin path to the gem's bin folder by default.
90
- self.bin = @@bin
121
+ # Set the lib path to that of this file.
122
+ self.lib = File.dirname(__FILE__)
123
+ # Set the paths to the bin, test and tmp folders.
124
+ self.bin = self.lib + '/../bin/'
125
+ self.test = self.lib + '/../test/'
126
+ self.tmp = self.lib + '/../tmp/'
127
+
128
+ # Require modified core classes.
129
+ require 'treat/object'
130
+ require 'treat/kernel'
131
+
132
+ # Require all files for the Treat library.
133
+ require 'treat/exception'
134
+ require 'treat/languages'
135
+ require 'treat/entities'
136
+ require 'treat/categories'
137
+ require 'treat/proxies'
138
+ require 'treat/sugar'
139
+
140
+ extend Sugar
141
+
91
142
  end
data/test/tc_entity.rb CHANGED
@@ -16,18 +16,23 @@ module Treat
16
16
  @det = Treat::Entities::Word.new('The')
17
17
  @det.set :cat, :determiner
18
18
  @det.set :tag, 'DT'
19
+ @det.set :tag_set, :penn
19
20
  @adj = Treat::Entities::Word.new('lazy')
20
21
  @adj.set :cat, :adjective
21
22
  @adj.set :tag, 'JJ'
23
+ @adj.set :tag_set, :penn
22
24
  @noun = Treat::Entities::Word.new('fox')
23
25
  @noun.set :cat, :noun
24
26
  @noun.set :tag, 'NN'
27
+ @noun.set :tag_set, :penn
25
28
  @aux = Treat::Entities::Word.new('is')
26
29
  @aux.set :cat, :verb
27
30
  @aux.set :tag, 'VBZ'
31
+ @aux.set :tag_set, :penn
28
32
  @verb = Treat::Entities::Word.new('running')
29
33
  @verb.set :cat, :verb
30
34
  @verb.set :tag, 'VBG'
35
+ @verb.set :tag_set, :penn
31
36
  @dot = Treat::Entities::Punctuation.new('.')
32
37
 
33
38
  @text << @sentence << [@noun_phrase, @verb_phrase, @dot]
data/test/tc_resources.rb CHANGED
@@ -1,12 +1,12 @@
1
1
  module Treat
2
2
  module Tests
3
- class TestResources < Test::Unit::TestCase
3
+ class TestLanguages < Test::Unit::TestCase
4
4
 
5
5
  def test_languages
6
- assert_equal :eng, Treat::Resources::Languages.find(:english, 2)
7
- assert_equal :en, Treat::Resources::Languages.find(:english, 1)
8
- assert_equal :english, Treat::Resources::Languages.describe(:eng)
9
- assert_equal :english, Treat::Resources::Languages.describe(:en)
6
+ assert_equal :eng, Treat::Languages.find(:english, 2)
7
+ assert_equal :en, Treat::Languages.find(:english, 1)
8
+ assert_equal :english, Treat::Languages.describe(:eng)
9
+ assert_equal :english, Treat::Languages.describe(:en)
10
10
  end
11
11
 
12
12
  def test_tags
data/test/tc_treat.rb CHANGED
@@ -29,8 +29,7 @@ module Treat
29
29
 
30
30
  def test_modules_loaded?
31
31
  ['exception',
32
- 'utilities',
33
- 'resources',
32
+ 'languages',
34
33
  'entities',
35
34
  'feature',
36
35
  'category',
data/test/tests.rb CHANGED
@@ -1,11 +1,12 @@
1
1
  require 'test/unit'
2
2
 
3
- # $LOAD_PATH << '/ruby/treat/test/' # Remove for production
4
3
  $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
5
4
 
6
5
  require 'treat'
7
6
  require 'texts'
8
7
 
8
+ # Treat.bin = '/ruby/nat/bin' # Remove for release
9
+
9
10
  require 'tc_treat'
10
11
  require 'tc_tree'
11
12
  require 'tc_entity'