treat 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/files/21552208.html +786 -0
- data/files/nethttp-cheat-sheet-2940.html +393 -0
- data/lib/treat/builder.rb +6 -0
- data/lib/treat/config/data/languages/agnostic.rb +2 -2
- data/lib/treat/core/server.rb +1 -0
- data/lib/treat/entities/entity/buildable.rb +1 -1
- data/lib/treat/loaders/linguistics.rb +6 -7
- data/lib/treat/loaders/stanford.rb +45 -11
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/categorizable.rb +30 -32
- data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -24
- data/lib/treat/workers/formatters/readers/html.rb +1 -1
- data/lib/treat/workers/formatters/readers/xml.rb +1 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +1 -1
- data/lib/treat/workers/groupable.rb +2 -1
- data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +3 -3
- data/lib/treat/workers/inflectors/conjugators/linguistics.rb +6 -4
- data/lib/treat/workers/inflectors/declensors/linguistics.rb +11 -18
- data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +4 -4
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +1 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +23 -21
- data/lib/treat/workers/processors/parsers/stanford.rb +10 -20
- data/lib/treat/workers/processors/segmenters/stanford.rb +1 -1
- data/lib/treat/workers/processors/tokenizers/maxent.rb +29 -0
- data/lib/treat/workers/processors/tokenizers/stanford.rb +2 -4
- data/lib/treat.rb +1 -0
- data/spec/helper.rb +8 -6
- data/spec/sandbox.rb +18 -6
- data/spec/workers/agnostic.rb +76 -29
- data/spec/workers/english.rb +23 -73
- data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
- metadata +6 -18
@@ -9,26 +9,24 @@ module Treat::Workers::Categorizable
|
|
9
9
|
@@lookup = {}
|
10
10
|
|
11
11
|
# Find a worker group based on method.
|
12
|
-
def lookup(method)
|
13
|
-
@@lookup[method]
|
14
|
-
end
|
12
|
+
def lookup(method); @@lookup[method]; end
|
15
13
|
|
16
14
|
def categorize!
|
17
15
|
Treat.workers.members.each do |cat|
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
name = cat.capitalize.intern
|
17
|
+
conf = load_category_conf(cat)
|
18
|
+
create_category(name, conf)
|
21
19
|
end
|
22
20
|
end
|
23
21
|
|
24
22
|
def load_category_conf(name)
|
25
|
-
|
26
|
-
if config.nil?
|
23
|
+
if !Treat.workers.respond_to?(name)
|
27
24
|
raise Treat::Exception,
|
28
25
|
"The configuration file " +
|
29
26
|
"for #{cat_sym} is missing."
|
27
|
+
else
|
28
|
+
Treat.workers[name]
|
30
29
|
end
|
31
|
-
config
|
32
30
|
end
|
33
31
|
|
34
32
|
def create_category(name, conf)
|
@@ -37,11 +35,11 @@ module Treat::Workers::Categorizable
|
|
37
35
|
conf.each_pair do |group, worker|
|
38
36
|
name = group.to_s.cc.intern
|
39
37
|
category.module_eval do
|
40
|
-
@@methods = []
|
41
|
-
@@methods; end
|
42
|
-
self.constants; end
|
38
|
+
@@methods = []
|
39
|
+
def methods; @@methods; end
|
40
|
+
def groups; self.constants; end
|
43
41
|
end
|
44
|
-
|
42
|
+
create_group(name, worker, category)
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
@@ -54,24 +52,6 @@ module Treat::Workers::Categorizable
|
|
54
52
|
@@lookup[group.method] = group
|
55
53
|
end
|
56
54
|
|
57
|
-
def bind_group_targets(group)
|
58
|
-
group.targets.each do |entity_type|
|
59
|
-
entity = Treat::Entities.
|
60
|
-
const_get(entity_type.cc)
|
61
|
-
entity.class_eval do
|
62
|
-
add_workers group
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def register_group_presets(group, conf)
|
68
|
-
return unless conf.respond_to? :presets
|
69
|
-
conf.presets.each do |m|
|
70
|
-
@@methods << m
|
71
|
-
@@lookup[m] = group
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
55
|
def set_group_options(group, conf)
|
76
56
|
group.module_eval do
|
77
57
|
extend Treat::Workers::Groupable
|
@@ -91,5 +71,23 @@ module Treat::Workers::Categorizable
|
|
91
71
|
end
|
92
72
|
end
|
93
73
|
end
|
94
|
-
|
74
|
+
|
75
|
+
def bind_group_targets(group)
|
76
|
+
group.targets.each do |entity_type|
|
77
|
+
entity = Treat::Entities.
|
78
|
+
const_get(entity_type.cc)
|
79
|
+
entity.class_eval do
|
80
|
+
add_workers group
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def register_group_presets(group, conf)
|
86
|
+
return unless conf.respond_to?(:presets)
|
87
|
+
conf.presets.each do |method|
|
88
|
+
@@methods << method
|
89
|
+
@@lookup[method] = group
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
95
93
|
end
|
@@ -16,32 +16,21 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
16
16
|
|
17
17
|
def self.name_tag(entity, options = {})
|
18
18
|
|
19
|
-
pp = nil
|
20
|
-
|
21
19
|
language = entity.language
|
22
|
-
|
23
20
|
Treat::Loaders::Stanford.load(language)
|
24
21
|
|
25
22
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
26
23
|
tokens = isolated_token ? [entity] : entity.tokens
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
@@classifiers[language] ||=
|
36
|
-
StanfordCoreNLP::CRFClassifier.
|
37
|
-
getClassifier(ms)
|
38
|
-
|
24
|
+
|
25
|
+
unless classifier = @@classifiers[language]
|
26
|
+
model = Treat::Loaders::Stanford.find_model(:ner, language)
|
27
|
+
classifier = StanfordCoreNLP::CRFClassifier.getClassifier(model)
|
28
|
+
@@classifiers[language] = classifier
|
29
|
+
end
|
30
|
+
|
39
31
|
token_list = StanfordCoreNLP.get_list(tokens)
|
40
|
-
sentence =
|
41
|
-
classify_sentence(token_list)
|
42
|
-
|
32
|
+
sentence = classifier.classify_sentence(token_list)
|
43
33
|
i = 0
|
44
|
-
n = 0
|
45
34
|
|
46
35
|
sentence.each do |s_token|
|
47
36
|
tag = s_token.get(:answer).to_s.downcase
|
@@ -49,14 +38,9 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
49
38
|
return tag if isolated_token
|
50
39
|
if tag
|
51
40
|
tokens[i].set :name_tag, tag
|
52
|
-
n += 1
|
53
41
|
end
|
54
42
|
i += 1
|
55
43
|
end
|
56
|
-
|
57
|
-
entity.set :named_entity_count, n
|
58
|
-
|
59
|
-
nil
|
60
44
|
|
61
45
|
end
|
62
46
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# https://github.com/iterationlabs/ruby-readability
|
7
7
|
class Treat::Workers::Formatters::Readers::HTML
|
8
8
|
|
9
|
-
silence_warnings { require '
|
9
|
+
silence_warnings { require 'ruby-readability' }
|
10
10
|
|
11
11
|
# By default, don't backup the original HTML
|
12
12
|
DefaultOptions = {
|
@@ -30,7 +30,7 @@ class Treat::Workers::Formatters::Readers::XML
|
|
30
30
|
@@xml_reader ||= StanfordCoreNLP.load(
|
31
31
|
:tokenize, :ssplit, :cleanxml)
|
32
32
|
|
33
|
-
text = StanfordCoreNLP::
|
33
|
+
text = StanfordCoreNLP::Annotation.new(xml)
|
34
34
|
@@xml_reader.annotate(text)
|
35
35
|
|
36
36
|
text.get(:sentences).each do |sentence|
|
@@ -17,7 +17,7 @@ class Treat::Workers::Formatters::Unserializers::Mongo
|
|
17
17
|
|
18
18
|
@@database ||= Mongo::Connection.
|
19
19
|
new(Treat.databases.mongo.host).
|
20
|
-
db(Treat.databases.mongo.db
|
20
|
+
db(db || Treat.databases.mongo.db)
|
21
21
|
|
22
22
|
supertype = Treat::Entities.const_get(
|
23
23
|
entity.type.to_s.capitalize.intern).superclass.mn.downcase
|
@@ -5,7 +5,8 @@ module Treat::Workers::Groupable
|
|
5
5
|
bits = self.ancestors[0].to_s.split('::')
|
6
6
|
bits.collect! { |bit| bit.ucc }
|
7
7
|
file = bits.join('/') + "/#{const.ucc}"
|
8
|
-
|
8
|
+
path = Treat.paths.lib + "#{file}.rb"
|
9
|
+
if not File.readable?(path)
|
9
10
|
raise Treat::Exception,
|
10
11
|
"File '#{file}.rb' corresponding to " +
|
11
12
|
"requested worker #{self}::#{const} " +
|
@@ -35,9 +35,9 @@ class Treat::Workers::Inflectors::Cardinalizers::Linguistics
|
|
35
35
|
# More specific options when using :type => :ordinal:
|
36
36
|
def self.cardinal(entity, options = {})
|
37
37
|
options = DefaultOptions.merge(options)
|
38
|
-
|
39
|
-
load(
|
40
|
-
|
38
|
+
lang = entity.language
|
39
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
40
|
+
entity.to_s.send(code).numwords(options)
|
41
41
|
end
|
42
42
|
|
43
43
|
end
|
@@ -35,13 +35,15 @@ module Treat::Workers::Inflectors::Conjugators::Linguistics
|
|
35
35
|
|
36
36
|
options = Forms[options[:form].to_s] if options[:form]
|
37
37
|
|
38
|
-
|
38
|
+
code = Treat::Loaders::Linguistics.load(entity.language)
|
39
|
+
obj = entity.to_s.send(code)
|
40
|
+
|
39
41
|
if options[:mode] == 'infinitive'
|
40
|
-
|
42
|
+
obj.infinitive
|
41
43
|
elsif options[:mode] == 'participle' && options[:tense] == 'present'
|
42
|
-
|
44
|
+
obj.present_participle
|
43
45
|
elsif options[:count] == 'plural' && options.size == 1
|
44
|
-
|
46
|
+
obj.plural_verb
|
45
47
|
else
|
46
48
|
raise Treat::Exception,
|
47
49
|
'This combination of modes, tenses, persons ' +
|
@@ -17,34 +17,27 @@ class Treat::Workers::Inflectors::Declensors::Linguistics
|
|
17
17
|
|
18
18
|
cat = entity.get(:category)
|
19
19
|
return if cat && !POS.include?(cat)
|
20
|
+
|
20
21
|
unless options[:count]
|
21
22
|
raise Treat::Exception, 'Must supply ' +
|
22
23
|
':count option ("singular" or "plural").'
|
23
24
|
end
|
24
|
-
|
25
|
-
klass = Treat::Loaders::
|
26
|
-
Linguistics.load(entity.language)
|
27
|
-
string = entity.to_s
|
28
|
-
|
29
|
-
if options[:count].to_s == 'plural'
|
30
|
-
if (entity.has?(:category))
|
31
|
-
result = ''
|
32
|
-
silence_warnings do
|
33
|
-
result = klass.send(
|
34
|
-
:"plural_#{entity.category}",
|
35
|
-
string)
|
36
|
-
end
|
37
|
-
return result
|
38
|
-
else
|
39
|
-
return klass.plural(string)
|
40
|
-
end
|
41
25
|
|
42
|
-
|
26
|
+
unless options[:count].to_s == 'plural'
|
43
27
|
raise Treat::Exception,
|
44
28
|
"Ruby Linguistics does not support " +
|
45
29
|
"singularization of words."
|
46
30
|
end
|
47
31
|
|
32
|
+
lang = entity.language
|
33
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
34
|
+
obj = entity.to_s.send(code)
|
35
|
+
|
36
|
+
if cat = entity.get(:category)
|
37
|
+
method = "plural_#{cat}"
|
38
|
+
obj.send(method)
|
39
|
+
else; obj.plural; end
|
40
|
+
|
48
41
|
end
|
49
42
|
|
50
43
|
end
|
@@ -11,11 +11,11 @@ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
|
|
11
11
|
|
12
12
|
# Desribe a number in words in ordinal form, using the
|
13
13
|
# 'linguistics' gem.
|
14
|
-
def self.ordinal(
|
14
|
+
def self.ordinal(entity, options = {})
|
15
15
|
options = DefaultOptions.merge(options)
|
16
|
-
|
17
|
-
Linguistics.load(
|
18
|
-
|
16
|
+
lang = entity.language
|
17
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
18
|
+
entity.to_s.send(code).ordinate
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -1,15 +1,15 @@
|
|
1
|
-
# POS tagging using (i) explicit use of both preceding
|
2
|
-
# and following tag contexts via a dependency network
|
3
|
-
# representation, (ii) broad use of lexical features,
|
4
|
-
# including jointly conditioning on multiple consecutive
|
5
|
-
# words, (iii) effective use of priors in conditional
|
6
|
-
# loglinear models, and (iv) fine-grained modeling of
|
1
|
+
# POS tagging using (i) explicit use of both preceding
|
2
|
+
# and following tag contexts via a dependency network
|
3
|
+
# representation, (ii) broad use of lexical features,
|
4
|
+
# including jointly conditioning on multiple consecutive
|
5
|
+
# words, (iii) effective use of priors in conditional
|
6
|
+
# loglinear models, and (iv) fine-grained modeling of
|
7
7
|
# unknown word features.
|
8
8
|
#
|
9
9
|
# Original paper: Toutanova, Manning, Klein and Singer.
|
10
|
-
# 2003. Feature-Rich Part-of-Speech Tagging with a
|
11
|
-
# Cyclic Dependency Network. In Proceedings of the
|
12
|
-
# Conference of the North American Chapter of the
|
10
|
+
# 2003. Feature-Rich Part-of-Speech Tagging with a
|
11
|
+
# Cyclic Dependency Network. In Proceedings of the
|
12
|
+
# Conference of the North American Chapter of the
|
13
13
|
# Association for Computational Linguistics.
|
14
14
|
class Treat::Workers::Lexicalizers::Taggers::Stanford
|
15
15
|
|
@@ -21,6 +21,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
21
21
|
:tagger_model => nil
|
22
22
|
}
|
23
23
|
|
24
|
+
# Shortcut for gem config.
|
25
|
+
Config = StanfordCoreNLP::Config
|
26
|
+
|
24
27
|
# Tag the word using one of the Stanford taggers.
|
25
28
|
def self.tag(entity, options = {})
|
26
29
|
|
@@ -36,9 +39,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
36
39
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
37
40
|
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
38
41
|
return 'G' if entity.is_a?(Treat::Entities::Group)
|
39
|
-
|
42
|
+
|
40
43
|
# Handle options and initialize the tagger.
|
41
|
-
lang = entity.language
|
44
|
+
lang = entity.language.intern
|
42
45
|
init_tagger(lang) unless @@taggers[lang]
|
43
46
|
options = get_options(options, lang)
|
44
47
|
tokens, t_list = get_token_list(entity)
|
@@ -46,7 +49,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
46
49
|
# Do the tagging.
|
47
50
|
i = 0
|
48
51
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
49
|
-
|
52
|
+
|
50
53
|
@@taggers[lang].apply(t_list).each do |tok|
|
51
54
|
tokens[i].set(:tag, tok.tag)
|
52
55
|
tokens[i].set(:tag_set,
|
@@ -59,21 +62,20 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
59
62
|
|
60
63
|
# Initialize the tagger for a language.
|
61
64
|
def self.init_tagger(language)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
@@taggers[language]
|
69
|
-
StanfordCoreNLP::MaxentTagger.new(model)
|
65
|
+
unless @@taggers[language]
|
66
|
+
Treat::Loaders::Stanford.load(language)
|
67
|
+
model = Treat::Loaders::Stanford.find_model(:pos,language)
|
68
|
+
tagger = StanfordCoreNLP::MaxentTagger.new(model)
|
69
|
+
@@taggers[language] = tagger
|
70
|
+
end
|
71
|
+
@@taggers[language]
|
70
72
|
end
|
71
73
|
|
72
74
|
# Handle the options for the tagger.
|
73
75
|
def self.get_options(options, language)
|
74
76
|
options = DefaultOptions.merge(options)
|
75
77
|
if options[:tagger_model]
|
76
|
-
|
78
|
+
StanfordCoreNLP.set_model('pos.model',
|
77
79
|
options[:tagger_model])
|
78
80
|
end
|
79
81
|
options[:tag_set] =
|
@@ -29,15 +29,14 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
29
29
|
# instead of displaying it.
|
30
30
|
def self.parse(entity, options = {})
|
31
31
|
|
32
|
-
val = entity.to_s
|
33
|
-
lang
|
34
|
-
init(lang, options)
|
32
|
+
val, lang = entity.to_s, entity.language
|
33
|
+
init(lang, options) unless @@parsers[lang]
|
35
34
|
|
36
35
|
entity.check_hasnt_children
|
37
36
|
|
38
37
|
tag_set = StanfordCoreNLP::Config::TagSets[lang]
|
39
38
|
|
40
|
-
text = ::StanfordCoreNLP::
|
39
|
+
text = ::StanfordCoreNLP::Annotation.new(val)
|
41
40
|
@@parsers[lang].annotate(text)
|
42
41
|
|
43
42
|
text.get(:sentences).each do |s|
|
@@ -50,7 +49,7 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
50
49
|
entity.set :tag, tag_s
|
51
50
|
entity.set :tag_opt, tag_opt if tag_opt
|
52
51
|
recurse(s.get(:tree).children[0], entity, tag_set)
|
53
|
-
break #######
|
52
|
+
break ####### ? FIX
|
54
53
|
else
|
55
54
|
recurse(s.get(:tree), entity, tag_set)
|
56
55
|
end
|
@@ -62,26 +61,17 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
62
61
|
end
|
63
62
|
|
64
63
|
def self.init(lang, options)
|
65
|
-
return if @@parsers[lang]
|
66
|
-
|
67
64
|
Treat::Loaders::Stanford.load(lang)
|
68
|
-
|
69
65
|
options = DefaultOptions.merge(options)
|
70
|
-
StanfordCoreNLP.use(lang)
|
66
|
+
StanfordCoreNLP.use(lang.intern)
|
71
67
|
if options[:tagger_model]
|
72
|
-
|
73
|
-
'pos.model', options[:tagger_model]
|
74
|
-
)
|
68
|
+
StanfordCoreNLP.set_model('pos.model', options[:tagger_model])
|
75
69
|
end
|
76
70
|
if options[:parser_model]
|
77
|
-
|
78
|
-
'parser.model', options[:parser_model]
|
79
|
-
)
|
71
|
+
StanfordCoreNLP.set_model('parser.model', options[:parser_model])
|
80
72
|
end
|
81
|
-
|
82
|
-
|
83
|
-
:tokenize, :ssplit, :pos, :lemma, :parse
|
84
|
-
)
|
73
|
+
annotators = [:tokenize, :ssplit, :pos, :lemma, :parse]
|
74
|
+
@@parsers[lang] = StanfordCoreNLP.load(*annotators)
|
85
75
|
end
|
86
76
|
|
87
77
|
# Helper method which recurses the tree supplied by
|
@@ -128,7 +118,7 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
128
118
|
l = java_child.children[0].to_s
|
129
119
|
v = java_child.children[0].value.to_s.strip
|
130
120
|
|
131
|
-
# Mhmhmhmhmhm
|
121
|
+
# Mhmhmhmhmhm FIX!
|
132
122
|
val = (l == v) ? v : l.split(' ')[-1].gsub(')', '')
|
133
123
|
ruby_child = Treat::Entities::Token.from_string(val)
|
134
124
|
end
|
@@ -32,7 +32,7 @@ class Treat::Workers::Processors::Segmenters::Stanford
|
|
32
32
|
::StanfordCoreNLP.load(:tokenize, :ssplit)
|
33
33
|
|
34
34
|
s = entity.to_s
|
35
|
-
text = ::StanfordCoreNLP::
|
35
|
+
text = ::StanfordCoreNLP::Annotation.new(s)
|
36
36
|
|
37
37
|
@@segmenter.annotate(text)
|
38
38
|
text.get(:sentences).each do |sentence|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Maximum entropy tokenization supplied by OpenNLP.
|
2
|
+
class Treat::Workers::Processors::Tokenizers::Maxent
|
3
|
+
|
4
|
+
require 'open-nlp'
|
5
|
+
OpenNLP.load
|
6
|
+
|
7
|
+
# Maximum entropy tokenization.
|
8
|
+
def self.tokenize(entity, options = {})
|
9
|
+
|
10
|
+
lang = entity.language
|
11
|
+
str = entity.to_s
|
12
|
+
|
13
|
+
unless @@tokenizers[lang]
|
14
|
+
OpenNLP.use(lang.intern)
|
15
|
+
@@tokenizers[lang] =
|
16
|
+
OpenNLP::TokenizerME.new
|
17
|
+
end
|
18
|
+
|
19
|
+
tokenizer = @@tokenizers[lang]
|
20
|
+
tokens = tokenizer.tokenize(str).to_a
|
21
|
+
|
22
|
+
tokens.each do |token|
|
23
|
+
entity << Treat::Entities
|
24
|
+
::Token.from_string(chunk)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -27,11 +27,9 @@ class Treat::Workers::Processors::Tokenizers::Stanford
|
|
27
27
|
# replacing "..." by ``...''. Off by default.
|
28
28
|
def self.tokenize(entity, options = {})
|
29
29
|
options = DefaultOptions.merge(options)
|
30
|
-
@@tokenizer ||=
|
31
|
-
::StanfordCoreNLP.load(:tokenize)
|
30
|
+
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
|
32
31
|
entity.check_hasnt_children
|
33
|
-
text = ::StanfordCoreNLP::
|
34
|
-
Text.new(entity.to_s)
|
32
|
+
text = ::StanfordCoreNLP::Annotation.new(entity.to_s)
|
35
33
|
@@tokenizer.annotate(text)
|
36
34
|
add_tokens(entity, text.get(:tokens), options)
|
37
35
|
end
|
data/lib/treat.rb
CHANGED
data/spec/helper.rb
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
require_relative '../lib/treat'
|
2
2
|
|
3
|
+
include Treat::Core::DSL
|
4
|
+
|
3
5
|
module Treat::Specs
|
4
6
|
|
5
7
|
require 'rspec'
|
6
8
|
|
7
9
|
# Some configuration options for devel.
|
10
|
+
|
8
11
|
Treat.databases.mongo.db = 'treat_test'
|
9
12
|
Treat.libraries.stanford.model_path =
|
10
|
-
'/ruby/stanford
|
13
|
+
'/ruby/stanford-core-nlp-minimal/models/'
|
11
14
|
Treat.libraries.stanford.jar_path =
|
12
|
-
'/ruby/stanford
|
15
|
+
'/ruby/stanford-core-nlp-minimal/bin/'
|
13
16
|
Treat.libraries.punkt.model_path =
|
14
|
-
'/ruby/punkt/'
|
17
|
+
'/ruby/punkt/models/'
|
15
18
|
Treat.libraries.reuters.model_path =
|
16
|
-
'/ruby/reuters/'
|
17
|
-
|
19
|
+
'/ruby/reuters/models/'
|
20
|
+
|
18
21
|
ModuleFiles = ['entities/*.rb', 'learning/*.rb']
|
19
22
|
|
20
23
|
# Provide helper functions for running specs.
|
@@ -25,7 +28,6 @@ module Treat::Specs
|
|
25
28
|
require 'simplecov'
|
26
29
|
SimpleCov.start do
|
27
30
|
add_filter '/spec/'
|
28
|
-
add_filter '/config/'
|
29
31
|
add_group 'Core', 'treat/core'
|
30
32
|
add_group 'Entities', 'treat/entities'
|
31
33
|
add_group 'Helpers', 'treat/helpers'
|
data/spec/sandbox.rb
CHANGED
@@ -1,13 +1,25 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require_relative '../lib/treat'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
Treat.databases.mongo.db = 'treat_test'
|
5
|
+
Treat.libraries.stanford.model_path =
|
6
|
+
'/ruby/stanford-core-nlp-minimal/models/'
|
7
|
+
Treat.libraries.stanford.jar_path =
|
8
|
+
'/ruby/stanford-core-nlp-minimal/bin/'
|
9
|
+
Treat.libraries.punkt.model_path =
|
10
|
+
'/ruby/punkt/models/'
|
11
|
+
Treat.libraries.reuters.model_path =
|
12
|
+
'/ruby/reuters/models/'
|
13
|
+
|
14
|
+
# include Treat::Core::DSL
|
15
|
+
|
16
|
+
Treat::Builder.new do
|
17
|
+
s = sentence "Hello, world!"
|
18
|
+
s.print_tree
|
19
|
+
end
|
10
20
|
|
21
|
+
p = paragraph('A walk in the park. A trip on a boat.').segment
|
22
|
+
p.visualize :dot, file: 'test.dot'
|
11
23
|
=begin
|
12
24
|
|
13
25
|
g = group("I was running")
|