treat 0.1.4 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +4 -4
- data/TODO +21 -54
- data/lib/economist/half_cocked_basel.txt +16 -0
- data/lib/economist/hose_and_dry.doc +0 -0
- data/lib/economist/hungarys_troubles.abw +70 -0
- data/lib/economist/republican_nomination.pdf +0 -0
- data/lib/economist/saving_the_euro.odt +0 -0
- data/lib/economist/to_infinity_and_beyond.txt +15 -0
- data/lib/economist/zero_sum.html +91 -0
- data/lib/treat.rb +58 -72
- data/lib/treat/buildable.rb +59 -15
- data/lib/treat/categories.rb +26 -14
- data/lib/treat/category.rb +2 -2
- data/lib/treat/delegatable.rb +65 -48
- data/lib/treat/doable.rb +44 -0
- data/lib/treat/entities.rb +34 -14
- data/lib/treat/entities/collection.rb +2 -0
- data/lib/treat/entities/document.rb +3 -2
- data/lib/treat/entities/entity.rb +105 -90
- data/lib/treat/entities/phrases.rb +17 -0
- data/lib/treat/entities/tokens.rb +28 -13
- data/lib/treat/entities/zones.rb +20 -0
- data/lib/treat/extractors.rb +49 -11
- data/lib/treat/extractors/coreferences/stanford.rb +68 -0
- data/lib/treat/extractors/date/chronic.rb +32 -0
- data/lib/treat/extractors/date/ruby.rb +25 -0
- data/lib/treat/extractors/keywords/tf_idf.rb +26 -0
- data/lib/treat/extractors/keywords/{topics_frequency.rb → topics_tf_idf.rb} +15 -7
- data/lib/treat/{detectors/language/language_detector.rb → extractors/language/language_extractor.rb} +5 -2
- data/lib/treat/extractors/language/what_language.rb +49 -0
- data/lib/treat/extractors/named_entity_tag/stanford.rb +53 -0
- data/lib/treat/extractors/roles/naive.rb +73 -0
- data/lib/treat/extractors/statistics/frequency_in.rb +6 -13
- data/lib/treat/extractors/statistics/{position_in_parent.rb → position_in.rb} +1 -1
- data/lib/treat/extractors/statistics/tf_idf.rb +89 -21
- data/lib/treat/extractors/statistics/transition_matrix.rb +11 -11
- data/lib/treat/extractors/statistics/transition_probability.rb +4 -4
- data/lib/treat/extractors/time/nickel.rb +30 -12
- data/lib/treat/extractors/topic_words/lda.rb +9 -9
- data/lib/treat/extractors/topics/reuters.rb +14 -15
- data/lib/treat/extractors/topics/reuters/region.xml +1 -0
- data/lib/treat/features.rb +7 -0
- data/lib/treat/formatters/readers/abw.rb +6 -1
- data/lib/treat/formatters/readers/autoselect.rb +5 -6
- data/lib/treat/formatters/readers/doc.rb +3 -1
- data/lib/treat/formatters/readers/html.rb +1 -1
- data/lib/treat/formatters/readers/image.rb +43 -0
- data/lib/treat/formatters/readers/odt.rb +1 -2
- data/lib/treat/formatters/readers/pdf.rb +9 -1
- data/lib/treat/formatters/readers/xml.rb +40 -0
- data/lib/treat/formatters/serializers/xml.rb +50 -14
- data/lib/treat/formatters/serializers/yaml.rb +7 -2
- data/lib/treat/formatters/unserializers/xml.rb +33 -7
- data/lib/treat/formatters/visualizers/dot.rb +90 -20
- data/lib/treat/formatters/visualizers/short_value.rb +2 -2
- data/lib/treat/formatters/visualizers/standoff.rb +2 -2
- data/lib/treat/formatters/visualizers/tree.rb +1 -1
- data/lib/treat/formatters/visualizers/txt.rb +13 -4
- data/lib/treat/group.rb +16 -10
- data/lib/treat/helpers/linguistics_loader.rb +18 -0
- data/lib/treat/inflectors.rb +10 -0
- data/lib/treat/inflectors/cardinal_words/linguistics.rb +3 -3
- data/lib/treat/inflectors/conjugations/linguistics.rb +5 -12
- data/lib/treat/inflectors/declensions/english.rb +319 -0
- data/lib/treat/inflectors/declensions/linguistics.rb +12 -11
- data/lib/treat/inflectors/ordinal_words/linguistics.rb +3 -3
- data/lib/treat/install.rb +59 -0
- data/lib/treat/kernel.rb +18 -8
- data/lib/treat/languages.rb +18 -11
- data/lib/treat/languages/arabic.rb +4 -2
- data/lib/treat/languages/chinese.rb +6 -2
- data/lib/treat/languages/dutch.rb +16 -0
- data/lib/treat/languages/english.rb +47 -19
- data/lib/treat/languages/french.rb +8 -5
- data/lib/treat/languages/german.rb +9 -6
- data/lib/treat/languages/greek.rb +16 -0
- data/lib/treat/languages/italian.rb +6 -3
- data/lib/treat/languages/polish.rb +16 -0
- data/lib/treat/languages/portuguese.rb +16 -0
- data/lib/treat/languages/russian.rb +16 -0
- data/lib/treat/languages/spanish.rb +16 -0
- data/lib/treat/languages/swedish.rb +16 -0
- data/lib/treat/languages/tags.rb +377 -0
- data/lib/treat/lexicalizers.rb +34 -23
- data/lib/treat/lexicalizers/category/from_tag.rb +17 -10
- data/lib/treat/lexicalizers/linkages/naive.rb +51 -51
- data/lib/treat/lexicalizers/synsets/wordnet.rb +5 -1
- data/lib/treat/lexicalizers/tag/brill.rb +35 -40
- data/lib/treat/lexicalizers/tag/lingua.rb +19 -14
- data/lib/treat/lexicalizers/tag/stanford.rb +59 -68
- data/lib/treat/lexicalizers/tag/tagger.rb +29 -0
- data/lib/treat/processors.rb +8 -8
- data/lib/treat/processors/chunkers/txt.rb +4 -4
- data/lib/treat/processors/parsers/enju.rb +114 -99
- data/lib/treat/processors/parsers/stanford.rb +109 -41
- data/lib/treat/processors/segmenters/punkt.rb +17 -18
- data/lib/treat/processors/segmenters/punkt/dutch.yaml +9716 -0
- data/lib/treat/processors/segmenters/punkt/english.yaml +10340 -0
- data/lib/treat/processors/segmenters/punkt/french.yaml +43159 -0
- data/lib/treat/processors/segmenters/punkt/german.yaml +9572 -0
- data/lib/treat/processors/segmenters/punkt/greek.yaml +6050 -0
- data/lib/treat/processors/segmenters/punkt/italian.yaml +14748 -0
- data/lib/treat/processors/segmenters/punkt/polish.yaml +9751 -0
- data/lib/treat/processors/segmenters/punkt/portuguese.yaml +13662 -0
- data/lib/treat/processors/segmenters/punkt/russian.yaml +4237 -0
- data/lib/treat/processors/segmenters/punkt/spanish.yaml +24034 -0
- data/lib/treat/processors/segmenters/punkt/swedish.yaml +10001 -0
- data/lib/treat/processors/segmenters/stanford.rb +38 -37
- data/lib/treat/processors/segmenters/tactful.rb +5 -4
- data/lib/treat/processors/tokenizers/macintyre.rb +7 -6
- data/lib/treat/processors/tokenizers/multilingual.rb +2 -3
- data/lib/treat/processors/tokenizers/perl.rb +2 -2
- data/lib/treat/processors/tokenizers/punkt.rb +6 -2
- data/lib/treat/processors/tokenizers/stanford.rb +25 -24
- data/lib/treat/processors/tokenizers/tactful.rb +1 -2
- data/lib/treat/proxies.rb +2 -35
- data/lib/treat/registrable.rb +17 -22
- data/lib/treat/sugar.rb +11 -11
- data/lib/treat/tree.rb +27 -17
- data/lib/treat/viewable.rb +29 -0
- data/lib/treat/visitable.rb +1 -1
- data/test/tc_entity.rb +56 -49
- data/test/tc_extractors.rb +41 -18
- data/test/tc_formatters.rb +7 -8
- data/test/tc_inflectors.rb +19 -24
- data/test/tc_lexicalizers.rb +12 -19
- data/test/tc_processors.rb +26 -12
- data/test/tc_resources.rb +2 -7
- data/test/tc_treat.rb +20 -22
- data/test/tc_tree.rb +4 -4
- data/test/tests.rb +3 -5
- data/test/texts.rb +13 -14
- data/tmp/INFO +1 -0
- metadata +78 -158
- data/bin/INFO +0 -1
- data/examples/benchmark.rb +0 -81
- data/examples/keywords.rb +0 -148
- data/lib/treat/detectors.rb +0 -31
- data/lib/treat/detectors/encoding/r_chardet19.rb +0 -27
- data/lib/treat/detectors/format/file.rb +0 -36
- data/lib/treat/detectors/language/what_language.rb +0 -29
- data/lib/treat/entities/constituents.rb +0 -15
- data/lib/treat/entities/sentence.rb +0 -8
- data/lib/treat/extractors/named_entity/abner.rb +0 -20
- data/lib/treat/extractors/named_entity/stanford.rb +0 -174
- data/lib/treat/extractors/statistics/frequency_of.rb +0 -15
- data/lib/treat/extractors/time/chronic.rb +0 -20
- data/lib/treat/extractors/time/native.rb +0 -18
- data/lib/treat/formatters/readers/gocr.rb +0 -26
- data/lib/treat/formatters/readers/ocropus.rb +0 -31
- data/lib/treat/formatters/visualizers/html.rb +0 -13
- data/lib/treat/formatters/visualizers/inspect.rb +0 -20
- data/lib/treat/inflectors/declensions/en.rb +0 -18
- data/lib/treat/languages/categories.rb +0 -5
- data/lib/treat/languages/english/categories.rb +0 -23
- data/lib/treat/languages/english/tags.rb +0 -352
- data/lib/treat/languages/xinhua.rb +0 -12
- data/lib/treat/lexicalizers/synsets/rita_wn.rb +0 -23
- data/lib/treat/string.rb +0 -5
- data/test/tc_detectors.rb +0 -26
@@ -1,49 +1,50 @@
|
|
1
1
|
module Treat
|
2
2
|
module Processors
|
3
3
|
module Segmenters
|
4
|
-
# A wrapper for the sentence splitter supplied by
|
4
|
+
# A wrapper for the sentence splitter supplied by
|
5
5
|
# the Stanford parser.
|
6
6
|
class Stanford
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
#
|
22
|
-
#
|
7
|
+
require 'stanford-core-nlp'
|
8
|
+
DefaultOptions = {
|
9
|
+
:silence => false,
|
10
|
+
:log_to_file => false,
|
11
|
+
:also_tokenize => false
|
12
|
+
}
|
13
|
+
# Segment sentences using the sentence splitter supplied by
|
14
|
+
# the Stanford parser. By default, this segmenter also adds
|
15
|
+
# the tokens as children of the sentences.
|
16
|
+
#
|
17
|
+
# Options:
|
18
|
+
# - (Boolean) :also_tokenize - Whether to also add the tokens
|
19
|
+
# as children of the sentence.
|
20
|
+
# - (String) :log_to_file => a filename to log output to
|
21
|
+
# instead of displaying it.
|
22
|
+
# - (String) :silence => send
|
23
23
|
def self.segment(entity, options = {})
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
if options[:
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
24
|
+
options = DefaultOptions.merge(options)
|
25
|
+
options[:log_to_file] = '/dev/null' if options[:silence]
|
26
|
+
if options[:log_to_file]
|
27
|
+
::StanfordCoreNLP.log_file = options[:log_to_file]
|
28
|
+
end
|
29
|
+
options = DefaultOptions.merge(options)
|
30
|
+
pipeline = ::StanfordCoreNLP.load(:tokenize, :ssplit)
|
31
|
+
text = ::StanfordCoreNLP::Text.new(entity.to_s)
|
32
|
+
pipeline.annotate(text)
|
33
|
+
text.get(:sentences).each do |sentence|
|
34
|
+
s = Treat::Entities::Sentence.from_string(sentence.to_s, true)
|
35
|
+
entity << s
|
36
|
+
if options[:also_tokenize]
|
37
|
+
sentence.get(:tokens).each do |token|
|
38
|
+
t = Treat::Entities::Phrase.from_string(token.value)
|
39
|
+
s << t
|
40
|
+
t.set :character_offset_begin,
|
41
|
+
token.get(:character_offset_begin)
|
42
|
+
|
43
|
+
t.set :character_offset_end,
|
44
|
+
token.get(:character_offset_end)
|
42
45
|
end
|
43
46
|
end
|
44
|
-
entity << sentence
|
45
47
|
end
|
46
|
-
entity
|
47
48
|
end
|
48
49
|
end
|
49
50
|
end
|
@@ -15,7 +15,7 @@ module Treat
|
|
15
15
|
class Tactful
|
16
16
|
# Require the 'tactful_tokenizer' gem.
|
17
17
|
silence_warnings { require 'tactful_tokenizer' }
|
18
|
-
#
|
18
|
+
# Remove function definition 'tactful_tokenizer' by gem.
|
19
19
|
String.class_eval { undef :tokenize }
|
20
20
|
# Keep only one copy of the segmenter.
|
21
21
|
@@segmenter = nil
|
@@ -25,11 +25,12 @@ module Treat
|
|
25
25
|
# Options: none.
|
26
26
|
def self.segment(entity, options = {})
|
27
27
|
@@segmenter ||= TactfulTokenizer::Model.new
|
28
|
-
|
28
|
+
s = entity.to_s
|
29
|
+
s.gsub!(/([^\.\?!]\.|\!|\?)([^\s])/) { $1 + ' ' + $2 }
|
30
|
+
sentences = @@segmenter.tokenize_text(s)
|
29
31
|
sentences.each do |sentence|
|
30
|
-
entity << Entities::
|
32
|
+
entity << Entities::Phrase.from_string(sentence)
|
31
33
|
end
|
32
|
-
entity
|
33
34
|
end
|
34
35
|
end
|
35
36
|
end
|
@@ -16,13 +16,15 @@ module Treat
|
|
16
16
|
class Macintyre
|
17
17
|
# Tokenize the entity using a native rule-based algorithm.
|
18
18
|
def self.tokenize(entity, options = {})
|
19
|
-
|
20
|
-
|
19
|
+
if entity.has_children?
|
20
|
+
raise Treat::Exception,
|
21
|
+
'Cannot tokenize a Phrase that already has children.'
|
22
|
+
end
|
23
|
+
chunks = split(entity.to_s)
|
21
24
|
chunks.each do |chunk|
|
22
25
|
next if chunk =~ /([[:space:]]+)/
|
23
|
-
entity << Treat::Entities::
|
26
|
+
entity << Treat::Entities::Token.from_string(chunk)
|
24
27
|
end
|
25
|
-
entity
|
26
28
|
end
|
27
29
|
# Helper method to split the string into tokens.
|
28
30
|
def self.split(string)
|
@@ -63,8 +65,7 @@ module Treat
|
|
63
65
|
s.gsub!(/ '([Tt])is /,' \'\1 is ')
|
64
66
|
s.gsub!(/ '([Tt])was /,' \'\1 was ')
|
65
67
|
s.gsub!(/ ([Ww])anna /,' \1an na ')
|
66
|
-
while s.sub!(/(\s)([0-9]+) , ([0-9]+)(\s)/, '\1\2,\3\4')
|
67
|
-
end
|
68
|
+
while s.sub!(/(\s)([0-9]+) , ([0-9]+)(\s)/, '\1\2,\3\4'); end
|
68
69
|
s.gsub!(/\//, ' / ')
|
69
70
|
s.gsub!(/\s+/,' ')
|
70
71
|
s.strip!
|
@@ -14,16 +14,15 @@ module Treat
|
|
14
14
|
# :language => (Symbol) Force a language for the tokenizer.
|
15
15
|
def self.tokenize(entity, options = {})
|
16
16
|
lang = options[:language] ? options[:language] : entity.language
|
17
|
-
lang = Treat::Languages.
|
17
|
+
lang = Treat::Languages.code(lang, 1)
|
18
18
|
if @@tokenizers[lang].nil?
|
19
19
|
@@tokenizers[lang] = ::Tokenizer::Tokenizer.new(lang)
|
20
20
|
end
|
21
21
|
tokens = @@tokenizers[lang].tokenize(entity.to_s)
|
22
22
|
tokens.each do |token|
|
23
23
|
next if token =~ /([[:space:]]+)/
|
24
|
-
entity << Treat::Entities::
|
24
|
+
entity << Treat::Entities::Token.from_string(token)
|
25
25
|
end
|
26
|
-
entity
|
27
26
|
end
|
28
27
|
end
|
29
28
|
end
|
@@ -85,9 +85,9 @@ module Treat
|
|
85
85
|
#s/ (wan)(na) / $1 $2 /ig;
|
86
86
|
text.gsub!(/ (wan)(na) /i,' \1 \2 ')
|
87
87
|
tokens = text.split(/\s/)
|
88
|
-
tokens.each do |token|
|
88
|
+
tokens[1..-1].each do |token|
|
89
89
|
next if token =~ /([[:space:]]+)/
|
90
|
-
entity << Treat::Entities::
|
90
|
+
entity << Treat::Entities::Token.from_string(token)
|
91
91
|
end
|
92
92
|
end
|
93
93
|
end
|
@@ -31,9 +31,13 @@ module Treat
|
|
31
31
|
# Options: none.
|
32
32
|
def self.tokenize(entity, options = {})
|
33
33
|
entity.to_s.scan(ReWordTokenizer).each do |token|
|
34
|
-
|
34
|
+
if SentEndChars.include?(token[-1])
|
35
|
+
entity << Treat::Entities::Token.from_string(token[0..-2])
|
36
|
+
entity << Treat::Entities::Token.from_string(token[-1..-1])
|
37
|
+
else
|
38
|
+
entity << Treat::Entities::Token.from_string(token)
|
39
|
+
end
|
35
40
|
end
|
36
|
-
entity
|
37
41
|
end
|
38
42
|
end
|
39
43
|
end
|
@@ -4,34 +4,35 @@ module Treat
|
|
4
4
|
# A wrapper for the Stanford parser's Penn-Treebank
|
5
5
|
# style tokenizer.
|
6
6
|
class Stanford
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
if jars.empty? || !File.readable?(jars[0])
|
14
|
-
raise "Could not find stanford parser JAR file (looking in #{jar})."+
|
15
|
-
" You may need to manually download the JAR files and/or set Treat.bin."
|
16
|
-
end
|
17
|
-
::Rjb::load(jars[0])
|
18
|
-
# Load the Stanford Parser classes.
|
19
|
-
PTBTokenizer = ::Rjb::import('edu.stanford.nlp.process.PTBTokenizer')
|
20
|
-
CoreLabelTokenFactory = ::Rjb::import('edu.stanford.nlp.process.CoreLabelTokenFactory')
|
21
|
-
StringReader = ::Rjb::import('java.io.StringReader')
|
22
|
-
end
|
7
|
+
require 'stanford-core-nlp'
|
8
|
+
DefaultOptions = {
|
9
|
+
:silence => false,
|
10
|
+
:log_to_file => nil
|
11
|
+
}
|
12
|
+
@@tokenizer = nil
|
23
13
|
# Tokenize the entity using a Penn-Treebank style tokenizer
|
24
14
|
# included with the Stanford Parser.
|
15
|
+
#
|
16
|
+
# Options:
|
17
|
+
# - (String) :log_to_file => a filename to log output to
|
18
|
+
# instead of displaying it.
|
25
19
|
def self.tokenize(entity, options = {})
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
20
|
+
options = DefaultOptions.merge(options)
|
21
|
+
options[:log_to_file] = '/dev/null' if options[:silence]
|
22
|
+
if options[:log_to_file]
|
23
|
+
::StanfordCoreNLP.log_file = options[:log_to_file]
|
24
|
+
end
|
25
|
+
@@tokenizer ||= ::StanfordCoreNLP.load(:tokenize)
|
26
|
+
text = ::StanfordCoreNLP::Text.new(entity.to_s)
|
27
|
+
@@tokenizer.annotate(text)
|
28
|
+
text.get(:tokens).each do |token|
|
29
|
+
t = Treat::Entities::Token.from_string(token.value)
|
30
|
+
entity << t
|
31
|
+
t.set :character_offset_begin,
|
32
|
+
token.get(:character_offset_begin)
|
33
|
+
t.set :character_offset_end,
|
34
|
+
token.get(:character_offset_end)
|
33
35
|
end
|
34
|
-
entity
|
35
36
|
end
|
36
37
|
end
|
37
38
|
end
|
data/lib/treat/proxies.rb
CHANGED
@@ -7,7 +7,7 @@ module Treat
|
|
7
7
|
# Build the entity corresponding to the proxied
|
8
8
|
# object and send the method call to the entity.
|
9
9
|
def method_missing(sym, *args, &block)
|
10
|
-
if Treat::Categories.
|
10
|
+
if sym == :do || Treat::Categories.lookup(sym)
|
11
11
|
to_entity.send(sym, *args)
|
12
12
|
else
|
13
13
|
super(sym, *args, &block)
|
@@ -20,10 +20,6 @@ module Treat
|
|
20
20
|
# Install Treat functions on String objects.
|
21
21
|
module String
|
22
22
|
include Treat::Proxies::Proxy
|
23
|
-
# Save the string to the specified file.
|
24
|
-
def save(file)
|
25
|
-
File.open(file, 'w') { |f| f.write(self) }
|
26
|
-
end
|
27
23
|
# Return the entity corresponding to the string.
|
28
24
|
def to_entity
|
29
25
|
Treat::Entities::Entity.from_string(self.to_s)
|
@@ -34,40 +30,11 @@ module Treat
|
|
34
30
|
include Treat::Proxies::Proxy
|
35
31
|
# Return the entity corresponding to the number.
|
36
32
|
def to_entity(builder = nil)
|
37
|
-
Treat::Entities::
|
38
|
-
end
|
39
|
-
end
|
40
|
-
# Install Treat functions on Array objects.
|
41
|
-
module Array
|
42
|
-
include Treat::Proxies::Proxy
|
43
|
-
# The behaviour of this proxy is special:
|
44
|
-
# if a Treat function is called on an array,
|
45
|
-
# the function will be called on each element
|
46
|
-
# of the array and a new array with the
|
47
|
-
# results will be returned.
|
48
|
-
def method_missing(sym, *args, &block)
|
49
|
-
if Category.has_method?(sym)
|
50
|
-
array = []
|
51
|
-
each do |element|
|
52
|
-
if element.is_a? Treat::Entities::Entity
|
53
|
-
array << element.send(sym, *args)
|
54
|
-
else
|
55
|
-
unless [Numeric, String, Array].include?(element.class)
|
56
|
-
raise Treat::Exception "Cannot convert object with type " +
|
57
|
-
"#{element.class} into an entity."
|
58
|
-
end
|
59
|
-
array << element.to_entity.send(sym, *args)
|
60
|
-
end
|
61
|
-
end
|
62
|
-
array
|
63
|
-
else
|
64
|
-
super(sym, *args, &block)
|
65
|
-
end
|
33
|
+
Treat::Entities::Number.from_numeric(self)
|
66
34
|
end
|
67
35
|
end
|
68
36
|
# Include the proxies in the core classes.
|
69
37
|
::String.class_eval { include Treat::Proxies::String }
|
70
38
|
::Numeric.class_eval { include Treat::Proxies::Numeric }
|
71
|
-
::Array.class_eval { include Treat::Proxies::Array }
|
72
39
|
end
|
73
40
|
end
|
data/lib/treat/registrable.rb
CHANGED
@@ -1,32 +1,27 @@
|
|
1
1
|
module Treat
|
2
2
|
module Registrable
|
3
|
-
# Registers a token in the @token_registry
|
4
|
-
# hash in the root node.
|
3
|
+
# Registers a token in the @token_registry hash.
|
5
4
|
def register_token(token)
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
@parent.register_token(token)
|
13
|
-
end
|
14
|
-
else
|
15
|
-
@parent.register_token(token)
|
16
|
-
end
|
5
|
+
@token_registry ||= {:value => {}, :id => {}}
|
6
|
+
@token_registry[:id][token.id] = token
|
7
|
+
v = token.to_s.downcase
|
8
|
+
@token_registry[:value][v] ||= []
|
9
|
+
@token_registry[:value][v] << token
|
10
|
+
@parent.register_token(token) if has_parent?
|
17
11
|
end
|
18
|
-
# Find the token registry,
|
19
|
-
#
|
12
|
+
# Find the token registry, by default the one
|
13
|
+
# in the root node.
|
20
14
|
def token_registry(type = nil)
|
21
|
-
if
|
22
|
-
@token_registry ||= {value
|
15
|
+
if (type == nil && is_root?) || type == self.type
|
16
|
+
@token_registry ||= {:value => {}, :id => {}}
|
23
17
|
return @token_registry
|
24
|
-
end
|
25
|
-
if has_parent?
|
26
|
-
@parent.token_registry(type)
|
27
18
|
else
|
28
|
-
|
29
|
-
|
19
|
+
if has_parent?
|
20
|
+
@parent.token_registry(type)
|
21
|
+
else
|
22
|
+
@token_registry ||= {:value => {}, :id => {}}
|
23
|
+
@token_registry
|
24
|
+
end
|
30
25
|
end
|
31
26
|
end
|
32
27
|
end
|
data/lib/treat/sugar.rb
CHANGED
@@ -5,9 +5,9 @@ module Treat
|
|
5
5
|
# Treat::Entities::Word can now be referred to as simply 'Word'.
|
6
6
|
module Sugar
|
7
7
|
# Installs syntactic sugar.
|
8
|
-
def
|
9
|
-
return if @@
|
10
|
-
@@
|
8
|
+
def sweeten!
|
9
|
+
return if @@sweetened
|
10
|
+
@@sweetened = true
|
11
11
|
each_entity_class do |type, klass|
|
12
12
|
unless type == :Symbol
|
13
13
|
Object.class_eval do
|
@@ -18,11 +18,11 @@ module Treat
|
|
18
18
|
end
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
21
|
+
|
22
22
|
# Uninstalls syntactic sugar.
|
23
|
-
def
|
24
|
-
return unless @@
|
25
|
-
@@
|
23
|
+
def unsweeten!
|
24
|
+
return unless @@sweetened
|
25
|
+
@@sweetened = false
|
26
26
|
each_entity_class do |type, klass|
|
27
27
|
unless type == :Symbol
|
28
28
|
Object.class_eval do
|
@@ -31,17 +31,17 @@ module Treat
|
|
31
31
|
end
|
32
32
|
end
|
33
33
|
end
|
34
|
-
|
34
|
+
|
35
35
|
# Boolean - whether syntactic sugar is
|
36
36
|
# enabled or not.
|
37
|
-
def
|
37
|
+
def sweetened?; @@sweetened; end
|
38
38
|
# Syntactic sugar is disabled by default.
|
39
|
-
@@
|
39
|
+
@@sweetened = false
|
40
40
|
private
|
41
41
|
# Helper method, yields each entity type and class.
|
42
42
|
def each_entity_class
|
43
43
|
Treat::Entities.list.each do |entity_type|
|
44
|
-
type =
|
44
|
+
type = cc(entity_type).intern
|
45
45
|
klass = Treat::Entities.const_get(type, klass)
|
46
46
|
yield type, klass
|
47
47
|
end
|
data/lib/treat/tree.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
module Treat
|
2
2
|
# This module provides an abstract tree structure with
|
3
|
-
# nodes having an id, a value, children, features and
|
3
|
+
# nodes having an id, a value, children, features and dependencies.
|
4
4
|
module Tree
|
5
5
|
# This class models the nodes for an N-ary tree data structue
|
6
6
|
# with unique identifiers, text value, children, features
|
7
|
-
# (annotations) and
|
7
|
+
# (annotations) and dependencies.
|
8
8
|
#
|
9
9
|
# This class was tightly based on the 'rubytree' gem.
|
10
10
|
# RubyTree is licensed under the BSD license and can
|
@@ -24,28 +24,30 @@ module Treat
|
|
24
24
|
attr_reader :children
|
25
25
|
# A hash containing the features of this node.
|
26
26
|
attr_accessor :features
|
27
|
-
#
|
27
|
+
# An array containing the dependencies that link this
|
28
28
|
# node to other nodes.
|
29
|
-
attr_accessor :
|
29
|
+
attr_accessor :dependencies
|
30
|
+
# A struct for dependencies.
|
31
|
+
Struct.new('Dependency', :target, :type, :directed, :direction)
|
30
32
|
# The parent of the node.
|
31
33
|
attr_accessor :parent
|
32
34
|
# Initialize the node with its value and id.
|
33
35
|
# Setup containers for the children, features
|
34
|
-
# and
|
36
|
+
# and dependencies of this node.
|
35
37
|
def initialize(value, id = nil)
|
36
38
|
@parent = nil
|
37
39
|
@value, @id = value, id
|
38
40
|
@children = []
|
39
41
|
@children_hash = {}
|
40
42
|
@features = {}
|
41
|
-
@
|
43
|
+
@dependencies = []
|
42
44
|
end
|
43
|
-
# Boolean - does the node have
|
44
|
-
def
|
45
|
+
# Boolean - does the node have dependencies?
|
46
|
+
def has_dependencies?; !(@dependencies.size == 0); end
|
45
47
|
# Boolean - does the node have children?
|
46
|
-
def has_children?;
|
48
|
+
def has_children?; !(@children.size == 0); end
|
47
49
|
# Boolean - does the node have features?
|
48
|
-
def has_features?;
|
50
|
+
def has_features?; !(@features.size == 0); end
|
49
51
|
# Boolean - does the node have a parent?
|
50
52
|
def has_parent?; !@parent.nil?; end
|
51
53
|
# Boolean - does the node not have a parent?
|
@@ -132,6 +134,10 @@ module Treat
|
|
132
134
|
@features ||= {}
|
133
135
|
@features[feature] = value
|
134
136
|
end
|
137
|
+
# Unset a feature.
|
138
|
+
def unset(feature)
|
139
|
+
@features.delete(feature)
|
140
|
+
end
|
135
141
|
# Return the depth of this node in the tree.
|
136
142
|
def depth
|
137
143
|
return 0 if is_root?
|
@@ -139,23 +145,26 @@ module Treat
|
|
139
145
|
end
|
140
146
|
# Does the entity have a feature ?
|
141
147
|
def has_feature?(feature)
|
142
|
-
@features.has_key?(feature)
|
143
|
-
[
|
148
|
+
(@features.has_key?(feature) &&
|
149
|
+
!@features[feature].nil?) ||
|
150
|
+
[:id, :value, :children, :dependencies].include?(feature)
|
144
151
|
end
|
145
152
|
alias :has? :has_feature?
|
146
153
|
# Link this node to the target node with
|
147
|
-
# the supplied
|
148
|
-
def
|
149
|
-
if id_or_node.is_a?
|
154
|
+
# the supplied dependency type.
|
155
|
+
def link(id_or_node, type = nil, directed = true, direction = 1)
|
156
|
+
if id_or_node.is_a?(Treat::Tree::Node)
|
150
157
|
id = root.find(id_or_node).id
|
151
158
|
else
|
152
159
|
id = id_or_node
|
153
160
|
end
|
154
|
-
@
|
161
|
+
@dependencies.each { |d| return if d.target == id }
|
162
|
+
@dependencies <<
|
163
|
+
Struct::Dependency.new(id, type, directed, direction)
|
155
164
|
end
|
156
165
|
# Find the node in the tree with the given id.
|
157
166
|
def find(id_or_node)
|
158
|
-
if id_or_node.is_a?
|
167
|
+
if id_or_node.is_a?(Treat::Tree::Node)
|
159
168
|
id = id_or_node.id
|
160
169
|
else
|
161
170
|
id = id_or_node
|
@@ -165,6 +174,7 @@ module Treat
|
|
165
174
|
r = child.find(id)
|
166
175
|
return r if r.is_a? Tree::Node
|
167
176
|
end
|
177
|
+
nil
|
168
178
|
end
|
169
179
|
# Find the root of the tree within which
|
170
180
|
# this node is contained.
|