treat 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +1 -0
- data/README +3 -0
- data/TODO +14 -26
- data/bin/INFO +1 -1
- data/lib/treat/buildable.rb +10 -11
- data/lib/treat/categories.rb +8 -6
- data/lib/treat/category.rb +7 -2
- data/lib/treat/delegatable.rb +64 -56
- data/lib/treat/detectors/encoding/r_chardet19.rb +1 -1
- data/lib/treat/detectors/language/language_detector.rb +2 -1
- data/lib/treat/detectors/language/what_language.rb +2 -2
- data/lib/treat/detectors.rb +3 -0
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/entities.rb +9 -10
- data/lib/treat/exception.rb +3 -1
- data/lib/treat/extractors/named_entity/abner.rb +1 -1
- data/lib/treat/extractors/named_entity/stanford.rb +2 -2
- data/lib/treat/extractors/time/chronic.rb +2 -2
- data/lib/treat/extractors/time/nickel.rb +2 -2
- data/lib/treat/extractors/topic_words/lda.rb +2 -2
- data/lib/treat/extractors.rb +12 -9
- data/lib/treat/feature.rb +6 -1
- data/lib/treat/formatters/cleaners/html.rb +1 -1
- data/lib/treat/formatters.rb +8 -8
- data/lib/treat/group.rb +11 -10
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
- data/lib/treat/inflectors/{conjugators → conjugations}/linguistics.rb +6 -6
- data/lib/treat/inflectors/{declensors → declensions}/en.rb +2 -2
- data/lib/treat/inflectors/{declensors → declensions}/linguistics.rb +5 -5
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +4 -4
- data/lib/treat/inflectors/{stemmers → stem}/porter.rb +1 -1
- data/lib/treat/inflectors/{stemmers → stem}/porter_c.rb +3 -3
- data/lib/treat/inflectors/{stemmers → stem}/uea.rb +3 -3
- data/lib/treat/inflectors.rb +8 -21
- data/lib/treat/kernel.rb +120 -0
- data/lib/treat/languages/arabic.rb +14 -0
- data/lib/treat/languages/categories.rb +5 -0
- data/lib/treat/languages/chinese.rb +12 -0
- data/lib/treat/languages/english/categories.rb +23 -0
- data/lib/treat/{resources → languages/english}/tags.rb +127 -184
- data/lib/treat/languages/english.rb +33 -0
- data/lib/treat/languages/french.rb +17 -0
- data/lib/treat/languages/german.rb +17 -0
- data/lib/treat/languages/italian.rb +14 -0
- data/lib/treat/{resources/languages.txt → languages/list.txt} +0 -0
- data/lib/treat/languages/xinhua.rb +12 -0
- data/lib/treat/languages.rb +91 -0
- data/lib/treat/lexicalizers/category/from_tag.rb +20 -8
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +1 -1
- data/lib/treat/lexicalizers/tag/brill.rb +2 -1
- data/lib/treat/lexicalizers/tag/lingua.rb +2 -1
- data/lib/treat/lexicalizers/tag/stanford.rb +16 -15
- data/lib/treat/lexicalizers.rb +1 -1
- data/lib/treat/object.rb +6 -0
- data/lib/treat/processors/parsers/enju.rb +3 -2
- data/lib/treat/processors/parsers/stanford.rb +15 -12
- data/lib/treat/processors/segmenters/punkt.rb +1 -1
- data/lib/treat/processors/segmenters/stanford.rb +7 -5
- data/lib/treat/processors/segmenters/tactful.rb +1 -1
- data/lib/treat/processors/tokenizers/multilingual.rb +2 -2
- data/lib/treat/processors/tokenizers/stanford.rb +7 -5
- data/lib/treat/visitable.rb +2 -1
- data/lib/treat.rb +105 -54
- data/test/tc_entity.rb +5 -0
- data/test/tc_resources.rb +5 -5
- data/test/tc_treat.rb +1 -2
- data/test/tests.rb +2 -1
- metadata +63 -64
- data/lib/treat/formatters/serializers/yaml/helper.rb +0 -96
- data/lib/treat/inflectors/lemmatizers/e_lemma/Makefile +0 -213
- data/lib/treat/inflectors/lemmatizers/e_lemma/elemma.c +0 -68
- data/lib/treat/inflectors/lemmatizers/e_lemma/extconf.rb +0 -6
- data/lib/treat/inflectors/lemmatizers/e_lemma.rb +0 -12
- data/lib/treat/resources/categories.rb +0 -18
- data/lib/treat/resources/delegates.rb +0 -96
- data/lib/treat/resources/dependencies.rb +0 -0
- data/lib/treat/resources/edges.rb +0 -8
- data/lib/treat/resources/formats.rb +0 -23
- data/lib/treat/resources/languages.rb +0 -86
- data/lib/treat/resources.rb +0 -10
- data/lib/treat/utilities.rb +0 -127
@@ -3,17 +3,15 @@ module Treat
|
|
3
3
|
module Tag
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
|
-
jar = "#{Treat.bin}/
|
9
|
-
|
10
|
-
|
11
|
-
"
|
8
|
+
jar = "#{Treat.bin}/stanford-tagger*/stanford-postagger*.jar"
|
9
|
+
jars = Dir.glob(jar)
|
10
|
+
if jars.empty? || !File.readable?(jars[0])
|
11
|
+
raise "Could not find stanford tagger JAR file (looking in #{jar})."+
|
12
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
12
13
|
end
|
13
|
-
Rjb::load(
|
14
|
-
"#{Treat.bin}/stanford_tagger/stanford-postagger.jar",
|
15
|
-
['-Xms256M', '-Xmx512M']
|
16
|
-
)
|
14
|
+
Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
|
17
15
|
MaxentTagger = ::Rjb::import('edu.stanford.nlp.tagger.maxent.MaxentTagger')
|
18
16
|
Word = ::Rjb::import('edu.stanford.nlp.ling.Word')
|
19
17
|
List = ::Rjb::import('java.util.ArrayList')
|
@@ -43,8 +41,8 @@ module Treat
|
|
43
41
|
else
|
44
42
|
model = LanguageToModel[lang]
|
45
43
|
if model.nil?
|
46
|
-
raise Treat::Exception "There exists no Stanford" +
|
47
|
-
"
|
44
|
+
raise Treat::Exception, "There exists no Stanford tagger model for " +
|
45
|
+
"the #{Treat::Languages.describe(lang)} language ."
|
48
46
|
end
|
49
47
|
end
|
50
48
|
# Reinitialize the tagger if the options have changed.
|
@@ -53,15 +51,18 @@ module Treat
|
|
53
51
|
@@taggers[lang] = nil # Reset the tagger
|
54
52
|
end
|
55
53
|
if @@taggers[lang].nil?
|
56
|
-
model = "#{Treat.bin}/
|
57
|
-
|
58
|
-
|
54
|
+
model = "#{Treat.bin}/stanford-tagger*/models/#{model}"
|
55
|
+
models = Dir.glob(model)
|
56
|
+
if models.empty? || !File.readable?(models[0])
|
57
|
+
raise "Could not find a tagger model for the " +
|
58
|
+
"#{Treat::Languages.describe(lang)}: looking in #{model}."
|
59
59
|
end
|
60
60
|
silence_streams(STDOUT, STDERR) do
|
61
61
|
@@taggers[lang] =
|
62
|
-
MaxentTagger.new(
|
62
|
+
MaxentTagger.new(models[0])
|
63
63
|
end
|
64
64
|
end
|
65
|
+
entity.set :tag_set, :penn
|
65
66
|
list = List.new
|
66
67
|
id_list = {}
|
67
68
|
i = 0
|
data/lib/treat/lexicalizers.rb
CHANGED
data/lib/treat/object.rb
ADDED
@@ -55,7 +55,7 @@ module Treat
|
|
55
55
|
text = entity.to_s + '.'
|
56
56
|
else
|
57
57
|
remove_last = false
|
58
|
-
text = entity.to_s
|
58
|
+
text = entity.to_s.gsub('.', '') + '.' # Fix
|
59
59
|
end
|
60
60
|
stdin.puts(text + "\n")
|
61
61
|
parsed = build(stdout.gets, remove_last)
|
@@ -120,7 +120,7 @@ module Treat
|
|
120
120
|
new_attributes[:enju_cat] = value
|
121
121
|
xcat = attributes['xcat'].split(' ')[0]
|
122
122
|
xcat ||= ''
|
123
|
-
tags = Treat::
|
123
|
+
tags = Treat::Languages::English::EnjuCatXcatToPTB.select do |m|
|
124
124
|
m[0] == value && m[1] == xcat
|
125
125
|
end
|
126
126
|
if tags.empty?
|
@@ -144,6 +144,7 @@ module Treat
|
|
144
144
|
# Handle naming conventions.
|
145
145
|
if attributes.has_key?('pos')
|
146
146
|
new_attributes[:tag] = new_attributes[:pos]
|
147
|
+
new_attributes[:tag_set] = :penn
|
147
148
|
new_attributes.delete :pos
|
148
149
|
end
|
149
150
|
# Create the appropriate entity for the
|
@@ -3,22 +3,24 @@ module Treat
|
|
3
3
|
module Parsers
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
7
|
-
jar = "#{Treat.bin}/
|
8
|
-
|
9
|
-
|
10
|
-
"
|
6
|
+
silence_warnings { require 'rjb' }
|
7
|
+
jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
|
8
|
+
jars = Dir.glob(jar)
|
9
|
+
if jars.empty? || !File.readable?(jars[0])
|
10
|
+
raise "Could not find stanford parser JAR file (looking in #{jar})"+
|
11
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
11
12
|
end
|
12
|
-
Rjb::load(
|
13
|
+
Rjb::load(jars[0], ['-Xms256M', '-Xmx512M'])
|
13
14
|
LexicalizedParser = ::Rjb::import('edu.stanford.nlp.parser.lexparser.LexicalizedParser')
|
14
15
|
@@parsers = {}
|
15
16
|
def self.parse(entity, options = {})
|
16
|
-
lang = Treat::
|
17
|
-
pcfg = "#{Treat.bin}/
|
18
|
-
|
19
|
-
|
17
|
+
lang = Treat::Languages.describe(entity.language).to_s.upcase
|
18
|
+
pcfg = "#{Treat.bin}/stanford-parser*/grammar/#{lang.upcase}PCFG.ser.gz"
|
19
|
+
pcfgs = Dir.glob(pcfg)
|
20
|
+
if pcfgs.empty? || !File.readable?(pcfgs[0])
|
21
|
+
raise "Could not find a language model for #{lang.downcase} (looking in #{pcfg})."
|
20
22
|
end
|
21
|
-
@@parsers[lang] ||= LexicalizedParser.new(
|
23
|
+
@@parsers[lang] ||= LexicalizedParser.new(pcfgs[0])
|
22
24
|
parse = @@parsers[lang].apply(entity.to_s)
|
23
25
|
entity.remove_all!
|
24
26
|
recurse(parse, entity)
|
@@ -41,12 +43,13 @@ module Treat
|
|
41
43
|
return recurse(java_node.children[0], ruby_node)
|
42
44
|
end
|
43
45
|
java_node.children.each do |java_child|
|
44
|
-
dependencies = java_child.dependencies.iterator
|
46
|
+
# dependencies = java_child.dependencies.iterator
|
45
47
|
# while dependencies.has_next
|
46
48
|
#dependency = dependencies.next
|
47
49
|
# end
|
48
50
|
ruby_child = Treat::Entities::Phrase.new
|
49
51
|
ruby_child.set :tag, java_child.value
|
52
|
+
ruby_child.set :tag_set, :penn
|
50
53
|
ruby_node << ruby_child
|
51
54
|
unless java_child.children.empty?
|
52
55
|
recurse(java_child, ruby_child)
|
@@ -9,7 +9,7 @@ module Treat
|
|
9
9
|
# Unsupervised Multilingual Sentence Boundary Detection.
|
10
10
|
# Computational Linguistics 32: 485-525.
|
11
11
|
class Punkt
|
12
|
-
|
12
|
+
silence_warnings { require 'punkt-segmenter' }
|
13
13
|
# Hold one copy of the segmenter per language.
|
14
14
|
@@segmenters = {}
|
15
15
|
# Hold only one trainer per language.
|
@@ -3,13 +3,15 @@ module Treat
|
|
3
3
|
module Segmenters
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
|
-
jar = "#{Treat.bin}/
|
9
|
-
|
10
|
-
|
11
|
-
"
|
8
|
+
jar = "#{Treat.bin}/stanford-parser*/stanford-parser*.jar"
|
9
|
+
jars = Dir.glob(jar)
|
10
|
+
if jars.empty? || !File.readable?(jars[0])
|
11
|
+
raise "Could not find stanford parser JAR file (lookin in #{jar})."+
|
12
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
12
13
|
end
|
14
|
+
::Rjb::load(jars[0])
|
13
15
|
DocumentPreprocessor =
|
14
16
|
::Rjb::import('edu.stanford.nlp.process.DocumentPreprocessor')
|
15
17
|
StringReader = ::Rjb::import('java.io.StringReader')
|
@@ -11,7 +11,7 @@ module Treat
|
|
11
11
|
# Project website:
|
12
12
|
class Tactful
|
13
13
|
# Require the 'tactful_tokenizer' gem.
|
14
|
-
|
14
|
+
silence_warnings { require 'tactful_tokenizer' }
|
15
15
|
# Somewhere in the depths of the code this is defined...
|
16
16
|
String.class_eval { undef :tokenize }
|
17
17
|
# Keep only one copy of the segmenter.
|
@@ -8,13 +8,13 @@ module Treat
|
|
8
8
|
# Hold one tokenizer per language.
|
9
9
|
@@tokenizers = {}
|
10
10
|
# Require the 'tokenizer' gem.
|
11
|
-
|
11
|
+
silence_warnings { require 'tokenizer' }
|
12
12
|
# Perform the tokenization of English, German or French text.
|
13
13
|
# Options:
|
14
14
|
# :language => (Symbol) Force a language for the tokenizer.
|
15
15
|
def self.tokenize(entity, options = {})
|
16
16
|
lang = options[:language] ? options[:language] : entity.language
|
17
|
-
lang = Treat::
|
17
|
+
lang = Treat::Languages.find(lang, 1)
|
18
18
|
if @@tokenizers[lang].nil?
|
19
19
|
@@tokenizers[lang] = ::Tokenizer::Tokenizer.new(lang)
|
20
20
|
end
|
@@ -3,14 +3,16 @@ module Treat
|
|
3
3
|
module Tokenizers
|
4
4
|
class Stanford
|
5
5
|
# Require the Ruby-Java bridge.
|
6
|
-
|
6
|
+
silence_warnings do
|
7
7
|
require 'rjb'
|
8
8
|
# Load the Stanford Parser Java files.
|
9
|
-
jar = "#{Treat.bin}/
|
10
|
-
|
11
|
-
|
12
|
-
"
|
9
|
+
jar = "#{Treat.bin}/stanford-parser/stanford-parser.jar"
|
10
|
+
jars = Dir.glob(jar)
|
11
|
+
if jars.empty? || !File.readable?(jars[0])
|
12
|
+
raise "Could not find stanford parser JAR file (looking in #{jar})."+
|
13
|
+
" You may need to manually download the JAR files and/or set Treat.bin."
|
13
14
|
end
|
15
|
+
::Rjb::load(jars[0])
|
14
16
|
# Load the Stanford Parser classes.
|
15
17
|
PTBTokenizer = ::Rjb::import('edu.stanford.nlp.process.PTBTokenizer')
|
16
18
|
CoreLabelTokenFactory = ::Rjb::import('edu.stanford.nlp.process.CoreLabelTokenFactory')
|
data/lib/treat/visitable.rb
CHANGED
data/lib/treat.rb
CHANGED
@@ -1,58 +1,93 @@
|
|
1
|
-
# This file requires all source code files for the Treat module.
|
2
|
-
|
3
1
|
#
|
4
|
-
# Main Treat
|
2
|
+
# Main namespace for Treat modules.
|
5
3
|
#
|
6
|
-
#
|
4
|
+
# 1. Entities
|
7
5
|
#
|
8
|
-
#
|
9
|
-
#
|
10
|
-
#
|
6
|
+
# Entities are Tree structures that represent any textual
|
7
|
+
# entity (from a collection of texts down to an individual
|
8
|
+
# word) with a value, features, children and edges linking
|
9
|
+
# it to other textual entities. Sugar provides syntactic sugar
|
10
|
+
# for Entities and can be enabled by running Treat.edulcorate.
|
11
|
+
#
|
12
|
+
# Here are some example of how to create entities:
|
11
13
|
#
|
12
|
-
#
|
14
|
+
# c = Collection 'folder_with_documents'
|
15
|
+
# d = Document 'filename.txt' # (or PDF, html, xml, png, jpg, gif).
|
16
|
+
# p = Paragraph 'A short story. The end.'
|
17
|
+
# s = Sentence 'That is not a sentence.'
|
18
|
+
# w = Word 'fox'
|
19
|
+
#
|
20
|
+
# Here's a full list of entities (subtypes in parentheses):
|
21
|
+
# Collection, Document, Zone (Section, Title, Paragraph or List),
|
22
|
+
# Sentence, Constituent (Phrase or Clause), Token (Word, Number,
|
23
|
+
# Symbol or Punctuation).
|
24
|
+
#
|
25
|
+
# 2. Proxies
|
26
|
+
#
|
27
|
+
# Proxies allow the Treat functions to be called on the core
|
28
|
+
# Ruby classes String, Numeric and Array. They build the entity
|
29
|
+
# corresponding to the supplied raw text and send the requested
|
30
|
+
# function to it.
|
31
|
+
#
|
32
|
+
# For example,
|
13
33
|
#
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
# - Lexicalizers - Namespace for algorithms that supply
|
23
|
-
# lexical information about a word (part of speech,
|
24
|
-
# synstypes, klass.)
|
25
|
-
# - Processors - Namespace for algorithms that process an
|
26
|
-
# entity into a tree of sub-entities.
|
34
|
+
# 'fox'.tag
|
35
|
+
#
|
36
|
+
# Is equivalent to:
|
37
|
+
#
|
38
|
+
# w = Word 'fox'
|
39
|
+
# w.tag
|
40
|
+
#
|
41
|
+
# 3. Functions
|
27
42
|
#
|
28
|
-
#
|
43
|
+
# A class is defined for each implemented algorithm performing a given
|
44
|
+
# task. These classes are clustered into groups of algorithms performing
|
45
|
+
# the same given task (Group), and the groups are clustered into Categories
|
46
|
+
# of groups performing related tasks.
|
29
47
|
#
|
30
|
-
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
48
|
+
# Here are the different Categories:
|
49
|
+
#
|
50
|
+
# - Detectors - Category for language, encoding, and format
|
51
|
+
# detectors.
|
52
|
+
# - Extractors - Category for algorithms that extract information
|
53
|
+
# from entities.
|
54
|
+
# - Formatters - Category for algorithms that handle conversion
|
55
|
+
# to and from different formats.
|
56
|
+
# - Inflectors - Category for algorithms that supply the base
|
57
|
+
# form, inflections and declensions of a word.
|
58
|
+
# - Lexicalizers - Category for algorithms that supply lexical
|
59
|
+
# information about a word (part of speech, synsets, word categories).
|
60
|
+
# - Processors - Namespace for algorithms that process collections and
|
61
|
+
# documents into trees.
|
62
|
+
#
|
63
|
+
# 3. Linguistic resources
|
64
|
+
#
|
65
|
+
# The Languages module contains linguistic information about
|
66
|
+
# languages (full ISO-639-1 and 2 language list, tag alignments
|
67
|
+
# for three treebanks, word categories, etc.)
|
68
|
+
#
|
69
|
+
# 4. Mixins for entities.
|
70
|
+
#
|
71
|
+
# Buildable, Delegatable, Visitable and Registrable are
|
72
|
+
# or extended by Entity and provide it with the ability to be built,
|
73
|
+
# to delegate function calls, to accept visitors and to maintain a
|
74
|
+
# token registry, respectively.
|
34
75
|
#
|
76
|
+
# 5. Exception
|
77
|
+
#
|
78
|
+
# Exception defines a custom exception for the Treat module.
|
79
|
+
#
|
35
80
|
module Treat
|
36
81
|
|
37
82
|
# Make sure that we are running on Ruby 1.9 or higher.
|
38
83
|
if RUBY_VERSION <= '1.9'
|
39
84
|
raise 'Treat requires Ruby 1.9 or higher.'
|
40
85
|
end
|
41
|
-
|
86
|
+
|
42
87
|
# The current version of Treat.
|
43
|
-
VERSION = "0.1.
|
88
|
+
VERSION = "0.1.2"
|
44
89
|
|
45
|
-
|
46
|
-
require 'treat/exception'
|
47
|
-
require 'treat/utilities'
|
48
|
-
require 'treat/resources'
|
49
|
-
require 'treat/entities'
|
50
|
-
require 'treat/categories'
|
51
|
-
require 'treat/proxies'
|
52
|
-
|
53
|
-
# Provides syntactic sugar.
|
54
|
-
require 'treat/sugar'
|
55
|
-
extend Sugar
|
90
|
+
# $LOAD_PATH << '/ruby/treat/lib/' # Remove for release
|
56
91
|
|
57
92
|
# Create class variables for the Treat module.
|
58
93
|
class << self
|
@@ -67,25 +102,41 @@ module Treat
|
|
67
102
|
attr_accessor :language_detection_level
|
68
103
|
# String - main folder for executable files.
|
69
104
|
attr_accessor :bin
|
105
|
+
# String - folder of this file.
|
106
|
+
attr_accessor :lib
|
107
|
+
# String - folder for tests.
|
108
|
+
attr_accessor :test
|
109
|
+
# String - folder for temp files.
|
110
|
+
attr_accessor :tmp
|
70
111
|
end
|
71
|
-
|
72
|
-
#
|
73
|
-
@@lib = File.dirname(__FILE__)
|
74
|
-
@@test = @@lib + '/../test/'
|
75
|
-
@@tmp = @@lib + '/../tmp/'
|
76
|
-
@@bin = @@lib + '/../bin'
|
77
|
-
def self.lib; @@lib; end
|
78
|
-
def self.test; @@test; end
|
79
|
-
def self.tmp; @@tmp; end
|
80
|
-
|
81
|
-
# Stype the default language to english.
|
112
|
+
|
113
|
+
# Set the default language to english.
|
82
114
|
self.default_language = :eng
|
83
|
-
#
|
115
|
+
# Set the default encoding to utf-8.
|
84
116
|
self.default_encoding = :utf_8
|
85
117
|
# Turn language detection off by default.
|
86
118
|
self.detect_language = false
|
87
|
-
#
|
119
|
+
# Detect the language once per text by default.
|
88
120
|
self.language_detection_level = :text
|
89
|
-
#
|
90
|
-
self.
|
121
|
+
# Set the lib path to that of this file.
|
122
|
+
self.lib = File.dirname(__FILE__)
|
123
|
+
# Set the paths to the bin, test and tmp folders.
|
124
|
+
self.bin = self.lib + '/../bin/'
|
125
|
+
self.test = self.lib + '/../test/'
|
126
|
+
self.tmp = self.lib + '/../tmp/'
|
127
|
+
|
128
|
+
# Require modified core classes.
|
129
|
+
require 'treat/object'
|
130
|
+
require 'treat/kernel'
|
131
|
+
|
132
|
+
# Require all files for the Treat library.
|
133
|
+
require 'treat/exception'
|
134
|
+
require 'treat/languages'
|
135
|
+
require 'treat/entities'
|
136
|
+
require 'treat/categories'
|
137
|
+
require 'treat/proxies'
|
138
|
+
require 'treat/sugar'
|
139
|
+
|
140
|
+
extend Sugar
|
141
|
+
|
91
142
|
end
|
data/test/tc_entity.rb
CHANGED
@@ -16,18 +16,23 @@ module Treat
|
|
16
16
|
@det = Treat::Entities::Word.new('The')
|
17
17
|
@det.set :cat, :determiner
|
18
18
|
@det.set :tag, 'DT'
|
19
|
+
@det.set :tag_set, :penn
|
19
20
|
@adj = Treat::Entities::Word.new('lazy')
|
20
21
|
@adj.set :cat, :adjective
|
21
22
|
@adj.set :tag, 'JJ'
|
23
|
+
@adj.set :tag_set, :penn
|
22
24
|
@noun = Treat::Entities::Word.new('fox')
|
23
25
|
@noun.set :cat, :noun
|
24
26
|
@noun.set :tag, 'NN'
|
27
|
+
@noun.set :tag_set, :penn
|
25
28
|
@aux = Treat::Entities::Word.new('is')
|
26
29
|
@aux.set :cat, :verb
|
27
30
|
@aux.set :tag, 'VBZ'
|
31
|
+
@aux.set :tag_set, :penn
|
28
32
|
@verb = Treat::Entities::Word.new('running')
|
29
33
|
@verb.set :cat, :verb
|
30
34
|
@verb.set :tag, 'VBG'
|
35
|
+
@verb.set :tag_set, :penn
|
31
36
|
@dot = Treat::Entities::Punctuation.new('.')
|
32
37
|
|
33
38
|
@text << @sentence << [@noun_phrase, @verb_phrase, @dot]
|
data/test/tc_resources.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Treat
|
2
2
|
module Tests
|
3
|
-
class
|
3
|
+
class TestLanguages < Test::Unit::TestCase
|
4
4
|
|
5
5
|
def test_languages
|
6
|
-
assert_equal :eng, Treat::
|
7
|
-
assert_equal :en, Treat::
|
8
|
-
assert_equal :english, Treat::
|
9
|
-
assert_equal :english, Treat::
|
6
|
+
assert_equal :eng, Treat::Languages.find(:english, 2)
|
7
|
+
assert_equal :en, Treat::Languages.find(:english, 1)
|
8
|
+
assert_equal :english, Treat::Languages.describe(:eng)
|
9
|
+
assert_equal :english, Treat::Languages.describe(:en)
|
10
10
|
end
|
11
11
|
|
12
12
|
def test_tags
|
data/test/tc_treat.rb
CHANGED
data/test/tests.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
|
3
|
-
# $LOAD_PATH << '/ruby/treat/test/' # Remove for production
|
4
3
|
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
5
4
|
|
6
5
|
require 'treat'
|
7
6
|
require 'texts'
|
8
7
|
|
8
|
+
# Treat.bin = '/ruby/nat/bin' # Remove for release
|
9
|
+
|
9
10
|
require 'tc_treat'
|
10
11
|
require 'tc_tree'
|
11
12
|
require 'tc_entity'
|