treat 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +1 -0
- data/README +3 -0
- data/TODO +14 -26
- data/bin/INFO +1 -1
- data/lib/treat/buildable.rb +10 -11
- data/lib/treat/categories.rb +8 -6
- data/lib/treat/category.rb +7 -2
- data/lib/treat/delegatable.rb +64 -56
- data/lib/treat/detectors/encoding/r_chardet19.rb +1 -1
- data/lib/treat/detectors/language/language_detector.rb +2 -1
- data/lib/treat/detectors/language/what_language.rb +2 -2
- data/lib/treat/detectors.rb +3 -0
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/entities.rb +9 -10
- data/lib/treat/exception.rb +3 -1
- data/lib/treat/extractors/named_entity/abner.rb +1 -1
- data/lib/treat/extractors/named_entity/stanford.rb +2 -2
- data/lib/treat/extractors/time/chronic.rb +2 -2
- data/lib/treat/extractors/time/nickel.rb +2 -2
- data/lib/treat/extractors/topic_words/lda.rb +2 -2
- data/lib/treat/extractors.rb +12 -9
- data/lib/treat/feature.rb +6 -1
- data/lib/treat/formatters/cleaners/html.rb +1 -1
- data/lib/treat/formatters.rb +8 -8
- data/lib/treat/group.rb +11 -10
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
- data/lib/treat/inflectors/{conjugators → conjugations}/linguistics.rb +6 -6
- data/lib/treat/inflectors/{declensors → declensions}/en.rb +2 -2
- data/lib/treat/inflectors/{declensors → declensions}/linguistics.rb +5 -5
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +4 -4
- data/lib/treat/inflectors/{stemmers → stem}/porter.rb +1 -1
- data/lib/treat/inflectors/{stemmers → stem}/porter_c.rb +3 -3
- data/lib/treat/inflectors/{stemmers → stem}/uea.rb +3 -3
- data/lib/treat/inflectors.rb +8 -21
- data/lib/treat/kernel.rb +120 -0
- data/lib/treat/languages/arabic.rb +14 -0
- data/lib/treat/languages/categories.rb +5 -0
- data/lib/treat/languages/chinese.rb +12 -0
- data/lib/treat/languages/english/categories.rb +23 -0
- data/lib/treat/{resources → languages/english}/tags.rb +127 -184
- data/lib/treat/languages/english.rb +33 -0
- data/lib/treat/languages/french.rb +17 -0
- data/lib/treat/languages/german.rb +17 -0
- data/lib/treat/languages/italian.rb +14 -0
- data/lib/treat/{resources/languages.txt → languages/list.txt} +0 -0
- data/lib/treat/languages/xinhua.rb +12 -0
- data/lib/treat/languages.rb +91 -0
- data/lib/treat/lexicalizers/category/from_tag.rb +20 -8
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +1 -1
- data/lib/treat/lexicalizers/tag/brill.rb +2 -1
- data/lib/treat/lexicalizers/tag/lingua.rb +2 -1
- data/lib/treat/lexicalizers/tag/stanford.rb +16 -15
- data/lib/treat/lexicalizers.rb +1 -1
- data/lib/treat/object.rb +6 -0
- data/lib/treat/processors/parsers/enju.rb +3 -2
- data/lib/treat/processors/parsers/stanford.rb +15 -12
- data/lib/treat/processors/segmenters/punkt.rb +1 -1
- data/lib/treat/processors/segmenters/stanford.rb +7 -5
- data/lib/treat/processors/segmenters/tactful.rb +1 -1
- data/lib/treat/processors/tokenizers/multilingual.rb +2 -2
- data/lib/treat/processors/tokenizers/stanford.rb +7 -5
- data/lib/treat/visitable.rb +2 -1
- data/lib/treat.rb +105 -54
- data/test/tc_entity.rb +5 -0
- data/test/tc_resources.rb +5 -5
- data/test/tc_treat.rb +1 -2
- data/test/tests.rb +2 -1
- metadata +63 -64
- data/lib/treat/formatters/serializers/yaml/helper.rb +0 -96
- data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +0 -213
- data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +0 -68
- data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +0 -6
- data/lib/treat/inflectors/lemmatizers/e_lemma.rb +0 -12
- data/lib/treat/resources/categories.rb +0 -18
- data/lib/treat/resources/delegates.rb +0 -96
- data/lib/treat/resources/dependencies.rb +0 -0
- data/lib/treat/resources/edges.rb +0 -8
- data/lib/treat/resources/formats.rb +0 -23
- data/lib/treat/resources/languages.rb +0 -86
- data/lib/treat/resources.rb +0 -10
- data/lib/treat/utilities.rb +0 -127
@@ -3,17 +3,15 @@ module Treat
|
|
3
3
|
module Tag
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
|
-
jar = "#{Treat.bin}/
|
9
|
-
|
10
|
-
|
11
|
-
"
|
8
|
+
jar = "#{Treat.bin}/stanford-tagger*/stanford-postagger*.jar"
|
9
|
+
jars = Dir.glob(jar)
|
10
|
+
if jars.empty? || !File.readable?(jars[0])
|
11
|
+
raise "Could not find stanford tagger JAR file (looking in #{jar})."+
|
12
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
12
13
|
end
|
13
|
-
Rjb::load(
|
14
|
-
"#{Treat.bin}/stanford_tagger/stanford-postagger.jar",
|
15
|
-
['-Xms256M', '-Xmx512M']
|
16
|
-
)
|
14
|
+
Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
|
17
15
|
MaxentTagger = ::Rjb::import('edu.stanford.nlp.tagger.maxent.MaxentTagger')
|
18
16
|
Word = ::Rjb::import('edu.stanford.nlp.ling.Word')
|
19
17
|
List = ::Rjb::import('java.util.ArrayList')
|
@@ -43,8 +41,8 @@ module Treat
|
|
43
41
|
else
|
44
42
|
model = LanguageToModel[lang]
|
45
43
|
if model.nil?
|
46
|
-
raise Treat::Exception "There exists no Stanford" +
|
47
|
-
"
|
44
|
+
raise Treat::Exception, "There exists no Stanford tagger model for " +
|
45
|
+
"the #{Treat::Languages.describe(lang)} language ."
|
48
46
|
end
|
49
47
|
end
|
50
48
|
# Reinitialize the tagger if the options have changed.
|
@@ -53,15 +51,18 @@ module Treat
|
|
53
51
|
@@taggers[lang] = nil # Reset the tagger
|
54
52
|
end
|
55
53
|
if @@taggers[lang].nil?
|
56
|
-
model = "#{Treat.bin}/
|
57
|
-
|
58
|
-
|
54
|
+
model = "#{Treat.bin}/stanford-tagger*/models/#{model}"
|
55
|
+
models = Dir.glob(model)
|
56
|
+
if models.empty? || !File.readable?(models[0])
|
57
|
+
raise "Could not find a tagger model for the " +
|
58
|
+
"#{Treat::Languages.describe(lang)}: looking in #{model}."
|
59
59
|
end
|
60
60
|
silence_streams(STDOUT, STDERR) do
|
61
61
|
@@taggers[lang] =
|
62
|
-
MaxentTagger.new(
|
62
|
+
MaxentTagger.new(models[0])
|
63
63
|
end
|
64
64
|
end
|
65
|
+
entity.set :tag_set, :penn
|
65
66
|
list = List.new
|
66
67
|
id_list = {}
|
67
68
|
i = 0
|
data/lib/treat/lexicalizers.rb
CHANGED
data/lib/treat/object.rb
ADDED
@@ -55,7 +55,7 @@ module Treat
|
|
55
55
|
text = entity.to_s + '.'
|
56
56
|
else
|
57
57
|
remove_last = false
|
58
|
-
text = entity.to_s
|
58
|
+
text = entity.to_s.gsub('.', '') + '.' # Fix
|
59
59
|
end
|
60
60
|
stdin.puts(text + "\n")
|
61
61
|
parsed = build(stdout.gets, remove_last)
|
@@ -120,7 +120,7 @@ module Treat
|
|
120
120
|
new_attributes[:enju_cat] = value
|
121
121
|
xcat = attributes['xcat'].split(' ')[0]
|
122
122
|
xcat ||= ''
|
123
|
-
tags = Treat::
|
123
|
+
tags = Treat::Languages::English::EnjuCatXcatToPTB.select do |m|
|
124
124
|
m[0] == value && m[1] == xcat
|
125
125
|
end
|
126
126
|
if tags.empty?
|
@@ -144,6 +144,7 @@ module Treat
|
|
144
144
|
# Handle naming conventions.
|
145
145
|
if attributes.has_key?('pos')
|
146
146
|
new_attributes[:tag] = new_attributes[:pos]
|
147
|
+
new_attributes[:tag_set] = :penn
|
147
148
|
new_attributes.delete :pos
|
148
149
|
end
|
149
150
|
# Create the appropriate entity for the
|
@@ -3,22 +3,24 @@ module Treat
|
|
3
3
|
module Parsers
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
7
|
-
jar = "#{Treat.bin}/
|
8
|
-
|
9
|
-
|
10
|
-
"
|
6
|
+
silence_warnings { require 'rjb' }
|
7
|
+
jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
|
8
|
+
jars = Dir.glob(jar)
|
9
|
+
if jars.empty? || !File.readable?(jars[0])
|
10
|
+
raise "Could not find stanford parser JAR file (looking in #{jar})"+
|
11
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
11
12
|
end
|
12
|
-
Rjb::load(
|
13
|
+
Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
|
13
14
|
LexicalizedParser = ::Rjb::import('edu.stanford.nlp.parser.lexparser.LexicalizedParser')
|
14
15
|
@@parsers = {}
|
15
16
|
def self.parse(entity, options = {})
|
16
|
-
lang = Treat::
|
17
|
-
pcfg = "#{Treat.bin}/
|
18
|
-
|
19
|
-
|
17
|
+
lang = Treat::Languages.describe(entity.language).to_s.upcase
|
18
|
+
pcfg = "#{Treat.bin}/stanford-parser*/grammar/#{lang.upcase}PCFG.ser.gz"
|
19
|
+
pcfgs = Dir.glob(pcfg)
|
20
|
+
if pcfgs.empty? || !File.readable?(pcfgs[0])
|
21
|
+
raise "Could not find a language model for #{lang.downcase} (looking in #{pcfg})."
|
20
22
|
end
|
21
|
-
@@parsers[lang] ||= LexicalizedParser.new(
|
23
|
+
@@parsers[lang] ||= LexicalizedParser.new(pcfgs[0])
|
22
24
|
parse = @@parsers[lang].apply(entity.to_s)
|
23
25
|
entity.remove_all!
|
24
26
|
recurse(parse, entity)
|
@@ -41,12 +43,13 @@ module Treat
|
|
41
43
|
return recurse(java_node.children[0], ruby_node)
|
42
44
|
end
|
43
45
|
java_node.children.each do |java_child|
|
44
|
-
dependencies = java_child.dependencies.iterator
|
46
|
+
# dependencies = java_child.dependencies.iterator
|
45
47
|
# while dependencies.has_next
|
46
48
|
#dependency = dependencies.next
|
47
49
|
# end
|
48
50
|
ruby_child = Treat::Entities::Phrase.new
|
49
51
|
ruby_child.set :tag, java_child.value
|
52
|
+
ruby_child.set :tag_set, :penn
|
50
53
|
ruby_node << ruby_child
|
51
54
|
unless java_child.children.empty?
|
52
55
|
recurse(java_child, ruby_child)
|
@@ -9,7 +9,7 @@ module Treat
|
|
9
9
|
# Unsupervised Multilingual Sentence Boundary Detection.
|
10
10
|
# Computational Linguistics 32: 485-525.
|
11
11
|
class Punkt
|
12
|
-
|
12
|
+
silence_warnings { require 'punkt-segmenter' }
|
13
13
|
# Hold one copy of the segmenter per language.
|
14
14
|
@@segmenters = {}
|
15
15
|
# Hold only one trainer per language.
|
@@ -3,13 +3,15 @@ module Treat
|
|
3
3
|
module Segmenters
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
|
-
jar = "#{Treat.bin}/
|
9
|
-
|
10
|
-
|
11
|
-
"
|
8
|
+
jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
|
9
|
+
jars = Dir.glob(jar)
|
10
|
+
if jars.empty? || !File.readable?(jars[0])
|
11
|
+
raise "Could not find stanford parser JAR file (lookin in #{jar})."+
|
12
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
12
13
|
end
|
14
|
+
::Rjb::load(jars[0])
|
13
15
|
DocumentPreprocessor =
|
14
16
|
::Rjb::import('edu.stanford.nlp.process.DocumentPreprocessor')
|
15
17
|
StringReader = ::Rjb::import('java.io.StringReader')
|
@@ -11,7 +11,7 @@ module Treat
|
|
11
11
|
# Project website:
|
12
12
|
class Tactful
|
13
13
|
# Require the 'tactful_tokenizer' gem.
|
14
|
-
|
14
|
+
silence_warnings { require 'tactful_tokenizer' }
|
15
15
|
# Somewhere in the depths of the code this is defined...
|
16
16
|
String.class_eval { undef :tokenize }
|
17
17
|
# Keep only one copy of the segmenter.
|
@@ -8,13 +8,13 @@ module Treat
|
|
8
8
|
# Hold one tokenizer per language.
|
9
9
|
@@tokenizers = {}
|
10
10
|
# Require the 'tokenizer' gem.
|
11
|
-
|
11
|
+
silence_warnings { require 'tokenizer' }
|
12
12
|
# Perform the tokenization of English, German or French text.
|
13
13
|
# Options:
|
14
14
|
# :language => (Symbol) Force a language for the tokenizer.
|
15
15
|
def self.tokenize(entity, options = {})
|
16
16
|
lang = options[:language] ? options[:language] : entity.language
|
17
|
-
lang = Treat::
|
17
|
+
lang = Treat::Languages.find(lang, 1)
|
18
18
|
if @@tokenizers[lang].nil?
|
19
19
|
@@tokenizers[lang] = ::Tokenizer::Tokenizer.new(lang)
|
20
20
|
end
|
@@ -3,14 +3,16 @@ module Treat
|
|
3
3
|
module Tokenizers
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
8
|
# Load the Stanford Parser Java files.
|
9
|
-
jar = "#{Treat.bin}/
|
10
|
-
|
11
|
-
|
12
|
-
"
|
9
|
+
jar = "#{Treat.bin}/stanford-parser/stanford-parser.jar"
|
10
|
+
jars = Dir.glob(jar)
|
11
|
+
if jars.empty? || !File.readable?(jars[0])
|
12
|
+
raise "Could not find stanford parser JAR file (looking in #{jar})."+
|
13
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
13
14
|
end
|
15
|
+
::Rjb::load(jars[0])
|
14
16
|
# Load the Stanford Parser classes.
|
15
17
|
PTBTokenizer = ::Rjb::import('edu.stanford.nlp.process.PTBTokenizer')
|
16
18
|
CoreLabelTokenFactory = ::Rjb::import('edu.stanford.nlp.process.CoreLabelTokenFactory')
|
data/lib/treat/visitable.rb
CHANGED
data/lib/treat.rb
CHANGED
@@ -1,58 +1,93 @@
|
|
1
|
-
# This file requires all source code files for the Treat module.
|
2
|
-
|
3
1
|
#
|
4
|
-
# Main Treat
|
2
|
+
# Main namespace for Treat modules.
|
5
3
|
#
|
6
|
-
#
|
4
|
+
# 1. Entities
|
7
5
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
6
|
+
# Entities are Tree structures that represent any textual
|
7
|
+
# entity (from a collection of texts down to an individual
|
8
|
+
# word) with a value, features, children and edges linking
|
9
|
+
# it to other textual entities. Sugar provides syntactic sugar
|
10
|
+
# for Entities and can be enabled by running Treat.edulcorate.
|
11
|
+
#
|
12
|
+
# Here are some example of how to create entities:
|
11
13
|
#
|
12
|
-
#
|
14
|
+
# c = Collection 'folder_with_documents'
|
15
|
+
# d = Document 'filename.txt' # (or PDF, html, xml, png, jpg, gif).
|
16
|
+
# p = Paragraph 'A short story. The end.'
|
17
|
+
# s = Sentence 'That is not a sentence.'
|
18
|
+
# w = Word 'fox'
|
19
|
+
#
|
20
|
+
# Here's a full list of entities (subtypes in parentheses):
|
21
|
+
# Collection, Document, Zone (Section, Title, Paragraph or List),
|
22
|
+
# Sentence, Constituent (Phrase or Clause), Token (Word, Number,
|
23
|
+
# Symbol or Punctuation).
|
24
|
+
#
|
25
|
+
# 2. Proxies
|
26
|
+
#
|
27
|
+
# Proxies allow the Treat functions to be called on the core
|
28
|
+
# Ruby classes String, Numeric and Array. They build the entity
|
29
|
+
# corresponding to the supplied raw text and send the requested
|
30
|
+
# function to it.
|
31
|
+
#
|
32
|
+
# For example,
|
13
33
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
# - Lexicalizers - Namespace for algorithms that supply
|
23
|
-
# lexical information about a word (part of speech,
|
24
|
-
# synstypes, klass.)
|
25
|
-
# - Processors - Namespace for algorithms that process an
|
26
|
-
# entity into a tree of sub-entities.
|
34
|
+
# 'fox'.tag
|
35
|
+
#
|
36
|
+
# Is equivalent to:
|
37
|
+
#
|
38
|
+
# w = Word 'fox'
|
39
|
+
# w.tag
|
40
|
+
#
|
41
|
+
# 3. Functions
|
27
42
|
#
|
28
|
-
#
|
43
|
+
# A class is defined for each implemented algorithm performing a given
|
44
|
+
# task. These classes are clustered into groups of algorithms performing
|
45
|
+
# the same given task (Group), and the groups are clustered into Categories
|
46
|
+
# of groups performing related tasks.
|
29
47
|
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
48
|
+
# Here are the different Categories:
|
49
|
+
#
|
50
|
+
# - Detectors - Category for language, encoding, and format
|
51
|
+
# detectors.
|
52
|
+
# - Extractors - Category for algorithms that extract information
|
53
|
+
# from entities.
|
54
|
+
# - Formatters - Category for algorithms that handle conversion
|
55
|
+
# to and from different formats.
|
56
|
+
# - Inflectors - Category for algorithms that supply the base
|
57
|
+
# form, inflections and declensions of a word.
|
58
|
+
# - Lexicalizers - Category for algorithms that supply lexical
|
59
|
+
# information about a word (part of speech, synsets, word categories).
|
60
|
+
# - Processors - Namespace for algorithms that process collections and
|
61
|
+
# documents into trees.
|
62
|
+
#
|
63
|
+
# 3. Linguistic resources
|
64
|
+
#
|
65
|
+
# The Languages module contains linguistic information about
|
66
|
+
# languages (full ISO-639-1 and 2 language list, tag alignments
|
67
|
+
# for three treebanks, word categories, etc.)
|
68
|
+
#
|
69
|
+
# 4. Mixins for entities.
|
70
|
+
#
|
71
|
+
# Buildable, Delegatable, Visitable and Registrable are
|
72
|
+
# or extended by Entity and provide it with the ability to be built,
|
73
|
+
# to delegate function calls, to accept visitors and to maintain a
|
74
|
+
# token registry, respectively.
|
34
75
|
#
|
76
|
+
# 5. Exception
|
77
|
+
#
|
78
|
+
# Exception defines a custom exception for the Treat module.
|
79
|
+
#
|
35
80
|
module Treat
|
36
81
|
|
37
82
|
# Make sure that we are running on Ruby 1.9 or higher.
|
38
83
|
if RUBY_VERSION <= '1.9'
|
39
84
|
raise 'Treat requires Ruby 1.9 or higher.'
|
40
85
|
end
|
41
|
-
|
86
|
+
|
42
87
|
# The current version of Treat.
|
43
|
-
VERSION = "0.1.
|
88
|
+
VERSION = "0.1.2"
|
44
89
|
|
45
|
-
|
46
|
-
require 'treat/exception'
|
47
|
-
require 'treat/utilities'
|
48
|
-
require 'treat/resources'
|
49
|
-
require 'treat/entities'
|
50
|
-
require 'treat/categories'
|
51
|
-
require 'treat/proxies'
|
52
|
-
|
53
|
-
# Provides syntactic sugar.
|
54
|
-
require 'treat/sugar'
|
55
|
-
extend Sugar
|
90
|
+
# $LOAD_PATH << '/ruby/treat/lib/' # Remove for release
|
56
91
|
|
57
92
|
# Create class variables for the Treat module.
|
58
93
|
class << self
|
@@ -67,25 +102,41 @@ module Treat
|
|
67
102
|
attr_accessor :language_detection_level
|
68
103
|
# String - main folder for executable files.
|
69
104
|
attr_accessor :bin
|
105
|
+
# String - folder of this file.
|
106
|
+
attr_accessor :lib
|
107
|
+
# String - folder for tests.
|
108
|
+
attr_accessor :test
|
109
|
+
# String - folder for temp files.
|
110
|
+
attr_accessor :tmp
|
70
111
|
end
|
71
|
-
|
72
|
-
#
|
73
|
-
@@lib = File.dirname(__FILE__)
|
74
|
-
@@test = @@lib + '/../test/'
|
75
|
-
@@tmp = @@lib + '/../tmp/'
|
76
|
-
@@bin = @@lib + '/../bin'
|
77
|
-
def self.lib; @@lib; end
|
78
|
-
def self.test; @@test; end
|
79
|
-
def self.tmp; @@tmp; end
|
80
|
-
|
81
|
-
# Stype the default language to english.
|
112
|
+
|
113
|
+
# Set the default language to english.
|
82
114
|
self.default_language = :eng
|
83
|
-
#
|
115
|
+
# Set the default encoding to utf-8.
|
84
116
|
self.default_encoding = :utf_8
|
85
117
|
# Turn language detection off by default.
|
86
118
|
self.detect_language = false
|
87
|
-
#
|
119
|
+
# Detect the language once per text by default.
|
88
120
|
self.language_detection_level = :text
|
89
|
-
#
|
90
|
-
self.
|
121
|
+
# Set the lib path to that of this file.
|
122
|
+
self.lib = File.dirname(__FILE__)
|
123
|
+
# Set the paths to the bin, test and tmp folders.
|
124
|
+
self.bin = self.lib + '/../bin/'
|
125
|
+
self.test = self.lib + '/../test/'
|
126
|
+
self.tmp = self.lib + '/../tmp/'
|
127
|
+
|
128
|
+
# Require modified core classes.
|
129
|
+
require 'treat/object'
|
130
|
+
require 'treat/kernel'
|
131
|
+
|
132
|
+
# Require all files for the Treat library.
|
133
|
+
require 'treat/exception'
|
134
|
+
require 'treat/languages'
|
135
|
+
require 'treat/entities'
|
136
|
+
require 'treat/categories'
|
137
|
+
require 'treat/proxies'
|
138
|
+
require 'treat/sugar'
|
139
|
+
|
140
|
+
extend Sugar
|
141
|
+
|
91
142
|
end
|
data/test/tc_entity.rb
CHANGED
@@ -16,18 +16,23 @@ module Treat
|
|
16
16
|
@det = Treat::Entities::Word.new('The')
|
17
17
|
@det.set :cat, :determiner
|
18
18
|
@det.set :tag, 'DT'
|
19
|
+
@det.set :tag_set, :penn
|
19
20
|
@adj = Treat::Entities::Word.new('lazy')
|
20
21
|
@adj.set :cat, :adjective
|
21
22
|
@adj.set :tag, 'JJ'
|
23
|
+
@adj.set :tag_set, :penn
|
22
24
|
@noun = Treat::Entities::Word.new('fox')
|
23
25
|
@noun.set :cat, :noun
|
24
26
|
@noun.set :tag, 'NN'
|
27
|
+
@noun.set :tag_set, :penn
|
25
28
|
@aux = Treat::Entities::Word.new('is')
|
26
29
|
@aux.set :cat, :verb
|
27
30
|
@aux.set :tag, 'VBZ'
|
31
|
+
@aux.set :tag_set, :penn
|
28
32
|
@verb = Treat::Entities::Word.new('running')
|
29
33
|
@verb.set :cat, :verb
|
30
34
|
@verb.set :tag, 'VBG'
|
35
|
+
@verb.set :tag_set, :penn
|
31
36
|
@dot = Treat::Entities::Punctuation.new('.')
|
32
37
|
|
33
38
|
@text << @sentence << [@noun_phrase, @verb_phrase, @dot]
|
data/test/tc_resources.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Treat
|
2
2
|
module Tests
|
3
|
-
class
|
3
|
+
class TestLanguages < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def test_languages
|
6
|
-
assert_equal :eng, Treat::
|
7
|
-
assert_equal :en, Treat::
|
8
|
-
assert_equal :english, Treat::
|
9
|
-
assert_equal :english, Treat::
|
6
|
+
assert_equal :eng, Treat::Languages.find(:english, 2)
|
7
|
+
assert_equal :en, Treat::Languages.find(:english, 1)
|
8
|
+
assert_equal :english, Treat::Languages.describe(:eng)
|
9
|
+
assert_equal :english, Treat::Languages.describe(:en)
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_tags
|
data/test/tc_treat.rb
CHANGED
data/test/tests.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
|
3
|
-
# $LOAD_PATH << '/ruby/treat/test/' # Remove for production
|
4
3
|
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
5
4
|
|
6
5
|
require 'treat'
|
7
6
|
require 'texts'
|
8
7
|
|
8
|
+
# Treat.bin = '/ruby/nat/bin' # Remove for release
|
9
|
+
|
9
10
|
require 'tc_treat'
|
10
11
|
require 'tc_tree'
|
11
12
|
require 'tc_entity'
|