treat 2.0.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/files/21552208.html +786 -0
- data/files/nethttp-cheat-sheet-2940.html +393 -0
- data/lib/treat/builder.rb +6 -0
- data/lib/treat/config/data/languages/agnostic.rb +2 -2
- data/lib/treat/core/server.rb +1 -0
- data/lib/treat/entities/entity/buildable.rb +1 -1
- data/lib/treat/loaders/linguistics.rb +6 -7
- data/lib/treat/loaders/stanford.rb +45 -11
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/categorizable.rb +30 -32
- data/lib/treat/workers/extractors/name_tag/stanford.rb +8 -24
- data/lib/treat/workers/formatters/readers/html.rb +1 -1
- data/lib/treat/workers/formatters/readers/xml.rb +1 -1
- data/lib/treat/workers/formatters/unserializers/mongo.rb +1 -1
- data/lib/treat/workers/groupable.rb +2 -1
- data/lib/treat/workers/inflectors/cardinalizers/linguistics.rb +3 -3
- data/lib/treat/workers/inflectors/conjugators/linguistics.rb +6 -4
- data/lib/treat/workers/inflectors/declensors/linguistics.rb +11 -18
- data/lib/treat/workers/inflectors/ordinalizers/linguistics.rb +4 -4
- data/lib/treat/workers/lexicalizers/sensers/wordnet.rb +1 -1
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +23 -21
- data/lib/treat/workers/processors/parsers/stanford.rb +10 -20
- data/lib/treat/workers/processors/segmenters/stanford.rb +1 -1
- data/lib/treat/workers/processors/tokenizers/maxent.rb +29 -0
- data/lib/treat/workers/processors/tokenizers/stanford.rb +2 -4
- data/lib/treat.rb +1 -0
- data/spec/helper.rb +8 -6
- data/spec/sandbox.rb +18 -6
- data/spec/workers/agnostic.rb +76 -29
- data/spec/workers/english.rb +23 -73
- data/spec/workers/examples/english/economist/saving_the_euro.odt +0 -0
- metadata +6 -18
@@ -9,26 +9,24 @@ module Treat::Workers::Categorizable
|
|
9
9
|
@@lookup = {}
|
10
10
|
|
11
11
|
# Find a worker group based on method.
|
12
|
-
def lookup(method)
|
13
|
-
@@lookup[method]
|
14
|
-
end
|
12
|
+
def lookup(method); @@lookup[method]; end
|
15
13
|
|
16
14
|
def categorize!
|
17
15
|
Treat.workers.members.each do |cat|
|
18
|
-
|
19
|
-
|
20
|
-
|
16
|
+
name = cat.capitalize.intern
|
17
|
+
conf = load_category_conf(cat)
|
18
|
+
create_category(name, conf)
|
21
19
|
end
|
22
20
|
end
|
23
21
|
|
24
22
|
def load_category_conf(name)
|
25
|
-
|
26
|
-
if config.nil?
|
23
|
+
if !Treat.workers.respond_to?(name)
|
27
24
|
raise Treat::Exception,
|
28
25
|
"The configuration file " +
|
29
26
|
"for #{cat_sym} is missing."
|
27
|
+
else
|
28
|
+
Treat.workers[name]
|
30
29
|
end
|
31
|
-
config
|
32
30
|
end
|
33
31
|
|
34
32
|
def create_category(name, conf)
|
@@ -37,11 +35,11 @@ module Treat::Workers::Categorizable
|
|
37
35
|
conf.each_pair do |group, worker|
|
38
36
|
name = group.to_s.cc.intern
|
39
37
|
category.module_eval do
|
40
|
-
@@methods = []
|
41
|
-
@@methods; end
|
42
|
-
self.constants; end
|
38
|
+
@@methods = []
|
39
|
+
def methods; @@methods; end
|
40
|
+
def groups; self.constants; end
|
43
41
|
end
|
44
|
-
|
42
|
+
create_group(name, worker, category)
|
45
43
|
end
|
46
44
|
end
|
47
45
|
|
@@ -54,24 +52,6 @@ module Treat::Workers::Categorizable
|
|
54
52
|
@@lookup[group.method] = group
|
55
53
|
end
|
56
54
|
|
57
|
-
def bind_group_targets(group)
|
58
|
-
group.targets.each do |entity_type|
|
59
|
-
entity = Treat::Entities.
|
60
|
-
const_get(entity_type.cc)
|
61
|
-
entity.class_eval do
|
62
|
-
add_workers group
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def register_group_presets(group, conf)
|
68
|
-
return unless conf.respond_to? :presets
|
69
|
-
conf.presets.each do |m|
|
70
|
-
@@methods << m
|
71
|
-
@@lookup[m] = group
|
72
|
-
end
|
73
|
-
end
|
74
|
-
|
75
55
|
def set_group_options(group, conf)
|
76
56
|
group.module_eval do
|
77
57
|
extend Treat::Workers::Groupable
|
@@ -91,5 +71,23 @@ module Treat::Workers::Categorizable
|
|
91
71
|
end
|
92
72
|
end
|
93
73
|
end
|
94
|
-
|
74
|
+
|
75
|
+
def bind_group_targets(group)
|
76
|
+
group.targets.each do |entity_type|
|
77
|
+
entity = Treat::Entities.
|
78
|
+
const_get(entity_type.cc)
|
79
|
+
entity.class_eval do
|
80
|
+
add_workers group
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def register_group_presets(group, conf)
|
86
|
+
return unless conf.respond_to?(:presets)
|
87
|
+
conf.presets.each do |method|
|
88
|
+
@@methods << method
|
89
|
+
@@lookup[method] = group
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
95
93
|
end
|
@@ -16,32 +16,21 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
16
16
|
|
17
17
|
def self.name_tag(entity, options = {})
|
18
18
|
|
19
|
-
pp = nil
|
20
|
-
|
21
19
|
language = entity.language
|
22
|
-
|
23
20
|
Treat::Loaders::Stanford.load(language)
|
24
21
|
|
25
22
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
26
23
|
tokens = isolated_token ? [entity] : entity.tokens
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
@@classifiers[language] ||=
|
36
|
-
StanfordCoreNLP::CRFClassifier.
|
37
|
-
getClassifier(ms)
|
38
|
-
|
24
|
+
|
25
|
+
unless classifier = @@classifiers[language]
|
26
|
+
model = Treat::Loaders::Stanford.find_model(:ner, language)
|
27
|
+
classifier = StanfordCoreNLP::CRFClassifier.getClassifier(model)
|
28
|
+
@@classifiers[language] = classifier
|
29
|
+
end
|
30
|
+
|
39
31
|
token_list = StanfordCoreNLP.get_list(tokens)
|
40
|
-
sentence =
|
41
|
-
classify_sentence(token_list)
|
42
|
-
|
32
|
+
sentence = classifier.classify_sentence(token_list)
|
43
33
|
i = 0
|
44
|
-
n = 0
|
45
34
|
|
46
35
|
sentence.each do |s_token|
|
47
36
|
tag = s_token.get(:answer).to_s.downcase
|
@@ -49,14 +38,9 @@ class Treat::Workers::Extractors::NameTag::Stanford
|
|
49
38
|
return tag if isolated_token
|
50
39
|
if tag
|
51
40
|
tokens[i].set :name_tag, tag
|
52
|
-
n += 1
|
53
41
|
end
|
54
42
|
i += 1
|
55
43
|
end
|
56
|
-
|
57
|
-
entity.set :named_entity_count, n
|
58
|
-
|
59
|
-
nil
|
60
44
|
|
61
45
|
end
|
62
46
|
|
@@ -6,7 +6,7 @@
|
|
6
6
|
# https://github.com/iterationlabs/ruby-readability
|
7
7
|
class Treat::Workers::Formatters::Readers::HTML
|
8
8
|
|
9
|
-
silence_warnings { require '
|
9
|
+
silence_warnings { require 'ruby-readability' }
|
10
10
|
|
11
11
|
# By default, don't backup the original HTML
|
12
12
|
DefaultOptions = {
|
@@ -30,7 +30,7 @@ class Treat::Workers::Formatters::Readers::XML
|
|
30
30
|
@@xml_reader ||= StanfordCoreNLP.load(
|
31
31
|
:tokenize, :ssplit, :cleanxml)
|
32
32
|
|
33
|
-
text = StanfordCoreNLP::
|
33
|
+
text = StanfordCoreNLP::Annotation.new(xml)
|
34
34
|
@@xml_reader.annotate(text)
|
35
35
|
|
36
36
|
text.get(:sentences).each do |sentence|
|
@@ -17,7 +17,7 @@ class Treat::Workers::Formatters::Unserializers::Mongo
|
|
17
17
|
|
18
18
|
@@database ||= Mongo::Connection.
|
19
19
|
new(Treat.databases.mongo.host).
|
20
|
-
db(Treat.databases.mongo.db
|
20
|
+
db(db || Treat.databases.mongo.db)
|
21
21
|
|
22
22
|
supertype = Treat::Entities.const_get(
|
23
23
|
entity.type.to_s.capitalize.intern).superclass.mn.downcase
|
@@ -5,7 +5,8 @@ module Treat::Workers::Groupable
|
|
5
5
|
bits = self.ancestors[0].to_s.split('::')
|
6
6
|
bits.collect! { |bit| bit.ucc }
|
7
7
|
file = bits.join('/') + "/#{const.ucc}"
|
8
|
-
|
8
|
+
path = Treat.paths.lib + "#{file}.rb"
|
9
|
+
if not File.readable?(path)
|
9
10
|
raise Treat::Exception,
|
10
11
|
"File '#{file}.rb' corresponding to " +
|
11
12
|
"requested worker #{self}::#{const} " +
|
@@ -35,9 +35,9 @@ class Treat::Workers::Inflectors::Cardinalizers::Linguistics
|
|
35
35
|
# More specific options when using :type => :ordinal:
|
36
36
|
def self.cardinal(entity, options = {})
|
37
37
|
options = DefaultOptions.merge(options)
|
38
|
-
|
39
|
-
load(
|
40
|
-
|
38
|
+
lang = entity.language
|
39
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
40
|
+
entity.to_s.send(code).numwords(options)
|
41
41
|
end
|
42
42
|
|
43
43
|
end
|
@@ -35,13 +35,15 @@ module Treat::Workers::Inflectors::Conjugators::Linguistics
|
|
35
35
|
|
36
36
|
options = Forms[options[:form].to_s] if options[:form]
|
37
37
|
|
38
|
-
|
38
|
+
code = Treat::Loaders::Linguistics.load(entity.language)
|
39
|
+
obj = entity.to_s.send(code)
|
40
|
+
|
39
41
|
if options[:mode] == 'infinitive'
|
40
|
-
|
42
|
+
obj.infinitive
|
41
43
|
elsif options[:mode] == 'participle' && options[:tense] == 'present'
|
42
|
-
|
44
|
+
obj.present_participle
|
43
45
|
elsif options[:count] == 'plural' && options.size == 1
|
44
|
-
|
46
|
+
obj.plural_verb
|
45
47
|
else
|
46
48
|
raise Treat::Exception,
|
47
49
|
'This combination of modes, tenses, persons ' +
|
@@ -17,34 +17,27 @@ class Treat::Workers::Inflectors::Declensors::Linguistics
|
|
17
17
|
|
18
18
|
cat = entity.get(:category)
|
19
19
|
return if cat && !POS.include?(cat)
|
20
|
+
|
20
21
|
unless options[:count]
|
21
22
|
raise Treat::Exception, 'Must supply ' +
|
22
23
|
':count option ("singular" or "plural").'
|
23
24
|
end
|
24
|
-
|
25
|
-
klass = Treat::Loaders::
|
26
|
-
Linguistics.load(entity.language)
|
27
|
-
string = entity.to_s
|
28
|
-
|
29
|
-
if options[:count].to_s == 'plural'
|
30
|
-
if (entity.has?(:category))
|
31
|
-
result = ''
|
32
|
-
silence_warnings do
|
33
|
-
result = klass.send(
|
34
|
-
:"plural_#{entity.category}",
|
35
|
-
string)
|
36
|
-
end
|
37
|
-
return result
|
38
|
-
else
|
39
|
-
return klass.plural(string)
|
40
|
-
end
|
41
25
|
|
42
|
-
|
26
|
+
unless options[:count].to_s == 'plural'
|
43
27
|
raise Treat::Exception,
|
44
28
|
"Ruby Linguistics does not support " +
|
45
29
|
"singularization of words."
|
46
30
|
end
|
47
31
|
|
32
|
+
lang = entity.language
|
33
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
34
|
+
obj = entity.to_s.send(code)
|
35
|
+
|
36
|
+
if cat = entity.get(:category)
|
37
|
+
method = "plural_#{cat}"
|
38
|
+
obj.send(method)
|
39
|
+
else; obj.plural; end
|
40
|
+
|
48
41
|
end
|
49
42
|
|
50
43
|
end
|
@@ -11,11 +11,11 @@ class Treat::Workers::Inflectors::Ordinalizers::Linguistics
|
|
11
11
|
|
12
12
|
# Desribe a number in words in ordinal form, using the
|
13
13
|
# 'linguistics' gem.
|
14
|
-
def self.ordinal(
|
14
|
+
def self.ordinal(entity, options = {})
|
15
15
|
options = DefaultOptions.merge(options)
|
16
|
-
|
17
|
-
Linguistics.load(
|
18
|
-
|
16
|
+
lang = entity.language
|
17
|
+
code = Treat::Loaders::Linguistics.load(lang)
|
18
|
+
entity.to_s.send(code).ordinate
|
19
19
|
end
|
20
20
|
|
21
21
|
end
|
@@ -1,15 +1,15 @@
|
|
1
|
-
# POS tagging using (i) explicit use of both preceding
|
2
|
-
# and following tag contexts via a dependency network
|
3
|
-
# representation, (ii) broad use of lexical features,
|
4
|
-
# including jointly conditioning on multiple consecutive
|
5
|
-
# words, (iii) effective use of priors in conditional
|
6
|
-
# loglinear models, and (iv) fine-grained modeling of
|
1
|
+
# POS tagging using (i) explicit use of both preceding
|
2
|
+
# and following tag contexts via a dependency network
|
3
|
+
# representation, (ii) broad use of lexical features,
|
4
|
+
# including jointly conditioning on multiple consecutive
|
5
|
+
# words, (iii) effective use of priors in conditional
|
6
|
+
# loglinear models, and (iv) fine-grained modeling of
|
7
7
|
# unknown word features.
|
8
8
|
#
|
9
9
|
# Original paper: Toutanova, Manning, Klein and Singer.
|
10
|
-
# 2003. Feature-Rich Part-of-Speech Tagging with a
|
11
|
-
# Cyclic Dependency Network. In Proceedings of the
|
12
|
-
# Conference of the North American Chapter of the
|
10
|
+
# 2003. Feature-Rich Part-of-Speech Tagging with a
|
11
|
+
# Cyclic Dependency Network. In Proceedings of the
|
12
|
+
# Conference of the North American Chapter of the
|
13
13
|
# Association for Computational Linguistics.
|
14
14
|
class Treat::Workers::Lexicalizers::Taggers::Stanford
|
15
15
|
|
@@ -21,6 +21,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
21
21
|
:tagger_model => nil
|
22
22
|
}
|
23
23
|
|
24
|
+
# Shortcut for gem config.
|
25
|
+
Config = StanfordCoreNLP::Config
|
26
|
+
|
24
27
|
# Tag the word using one of the Stanford taggers.
|
25
28
|
def self.tag(entity, options = {})
|
26
29
|
|
@@ -36,9 +39,9 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
36
39
|
return 'P' if entity.is_a?(Treat::Entities::Phrase)
|
37
40
|
return 'F' if entity.is_a?(Treat::Entities::Fragment)
|
38
41
|
return 'G' if entity.is_a?(Treat::Entities::Group)
|
39
|
-
|
42
|
+
|
40
43
|
# Handle options and initialize the tagger.
|
41
|
-
lang = entity.language
|
44
|
+
lang = entity.language.intern
|
42
45
|
init_tagger(lang) unless @@taggers[lang]
|
43
46
|
options = get_options(options, lang)
|
44
47
|
tokens, t_list = get_token_list(entity)
|
@@ -46,7 +49,7 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
46
49
|
# Do the tagging.
|
47
50
|
i = 0
|
48
51
|
isolated_token = entity.is_a?(Treat::Entities::Token)
|
49
|
-
|
52
|
+
|
50
53
|
@@taggers[lang].apply(t_list).each do |tok|
|
51
54
|
tokens[i].set(:tag, tok.tag)
|
52
55
|
tokens[i].set(:tag_set,
|
@@ -59,21 +62,20 @@ class Treat::Workers::Lexicalizers::Taggers::Stanford
|
|
59
62
|
|
60
63
|
# Initialize the tagger for a language.
|
61
64
|
def self.init_tagger(language)
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
@@taggers[language]
|
69
|
-
StanfordCoreNLP::MaxentTagger.new(model)
|
65
|
+
unless @@taggers[language]
|
66
|
+
Treat::Loaders::Stanford.load(language)
|
67
|
+
model = Treat::Loaders::Stanford.find_model(:pos,language)
|
68
|
+
tagger = StanfordCoreNLP::MaxentTagger.new(model)
|
69
|
+
@@taggers[language] = tagger
|
70
|
+
end
|
71
|
+
@@taggers[language]
|
70
72
|
end
|
71
73
|
|
72
74
|
# Handle the options for the tagger.
|
73
75
|
def self.get_options(options, language)
|
74
76
|
options = DefaultOptions.merge(options)
|
75
77
|
if options[:tagger_model]
|
76
|
-
|
78
|
+
StanfordCoreNLP.set_model('pos.model',
|
77
79
|
options[:tagger_model])
|
78
80
|
end
|
79
81
|
options[:tag_set] =
|
@@ -29,15 +29,14 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
29
29
|
# instead of displaying it.
|
30
30
|
def self.parse(entity, options = {})
|
31
31
|
|
32
|
-
val = entity.to_s
|
33
|
-
lang
|
34
|
-
init(lang, options)
|
32
|
+
val, lang = entity.to_s, entity.language
|
33
|
+
init(lang, options) unless @@parsers[lang]
|
35
34
|
|
36
35
|
entity.check_hasnt_children
|
37
36
|
|
38
37
|
tag_set = StanfordCoreNLP::Config::TagSets[lang]
|
39
38
|
|
40
|
-
text = ::StanfordCoreNLP::
|
39
|
+
text = ::StanfordCoreNLP::Annotation.new(val)
|
41
40
|
@@parsers[lang].annotate(text)
|
42
41
|
|
43
42
|
text.get(:sentences).each do |s|
|
@@ -50,7 +49,7 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
50
49
|
entity.set :tag, tag_s
|
51
50
|
entity.set :tag_opt, tag_opt if tag_opt
|
52
51
|
recurse(s.get(:tree).children[0], entity, tag_set)
|
53
|
-
break #######
|
52
|
+
break ####### ? FIX
|
54
53
|
else
|
55
54
|
recurse(s.get(:tree), entity, tag_set)
|
56
55
|
end
|
@@ -62,26 +61,17 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
62
61
|
end
|
63
62
|
|
64
63
|
def self.init(lang, options)
|
65
|
-
return if @@parsers[lang]
|
66
|
-
|
67
64
|
Treat::Loaders::Stanford.load(lang)
|
68
|
-
|
69
65
|
options = DefaultOptions.merge(options)
|
70
|
-
StanfordCoreNLP.use(lang)
|
66
|
+
StanfordCoreNLP.use(lang.intern)
|
71
67
|
if options[:tagger_model]
|
72
|
-
|
73
|
-
'pos.model', options[:tagger_model]
|
74
|
-
)
|
68
|
+
StanfordCoreNLP.set_model('pos.model', options[:tagger_model])
|
75
69
|
end
|
76
70
|
if options[:parser_model]
|
77
|
-
|
78
|
-
'parser.model', options[:parser_model]
|
79
|
-
)
|
71
|
+
StanfordCoreNLP.set_model('parser.model', options[:parser_model])
|
80
72
|
end
|
81
|
-
|
82
|
-
|
83
|
-
:tokenize, :ssplit, :pos, :lemma, :parse
|
84
|
-
)
|
73
|
+
annotators = [:tokenize, :ssplit, :pos, :lemma, :parse]
|
74
|
+
@@parsers[lang] = StanfordCoreNLP.load(*annotators)
|
85
75
|
end
|
86
76
|
|
87
77
|
# Helper method which recurses the tree supplied by
|
@@ -128,7 +118,7 @@ class Treat::Workers::Processors::Parsers::Stanford
|
|
128
118
|
l = java_child.children[0].to_s
|
129
119
|
v = java_child.children[0].value.to_s.strip
|
130
120
|
|
131
|
-
# Mhmhmhmhmhm
|
121
|
+
# Mhmhmhmhmhm FIX!
|
132
122
|
val = (l == v) ? v : l.split(' ')[-1].gsub(')', '')
|
133
123
|
ruby_child = Treat::Entities::Token.from_string(val)
|
134
124
|
end
|
@@ -32,7 +32,7 @@ class Treat::Workers::Processors::Segmenters::Stanford
|
|
32
32
|
::StanfordCoreNLP.load(:tokenize, :ssplit)
|
33
33
|
|
34
34
|
s = entity.to_s
|
35
|
-
text = ::StanfordCoreNLP::
|
35
|
+
text = ::StanfordCoreNLP::Annotation.new(s)
|
36
36
|
|
37
37
|
@@segmenter.annotate(text)
|
38
38
|
text.get(:sentences).each do |sentence|
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Maximum entropy tokenization supplied by OpenNLP.
|
2
|
+
class Treat::Workers::Processors::Tokenizers::Maxent
|
3
|
+
|
4
|
+
require 'open-nlp'
|
5
|
+
OpenNLP.load
|
6
|
+
|
7
|
+
# Maximum entropy tokenization.
|
8
|
+
def self.tokenize(entity, options = {})
|
9
|
+
|
10
|
+
lang = entity.language
|
11
|
+
str = entity.to_s
|
12
|
+
|
13
|
+
unless @@tokenizers[lang]
|
14
|
+
OpenNLP.use(lang.intern)
|
15
|
+
@@tokenizers[lang] =
|
16
|
+
OpenNLP::TokenizerME.new
|
17
|
+
end
|
18
|
+
|
19
|
+
tokenizer = @@tokenizers[lang]
|
20
|
+
tokens = tokenizer.tokenize(str).to_a
|
21
|
+
|
22
|
+
tokens.each do |token|
|
23
|
+
entity << Treat::Entities
|
24
|
+
::Token.from_string(chunk)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
@@ -27,11 +27,9 @@ class Treat::Workers::Processors::Tokenizers::Stanford
|
|
27
27
|
# replacing "..." by ``...''. Off by default.
|
28
28
|
def self.tokenize(entity, options = {})
|
29
29
|
options = DefaultOptions.merge(options)
|
30
|
-
@@tokenizer ||=
|
31
|
-
::StanfordCoreNLP.load(:tokenize)
|
30
|
+
@@tokenizer ||= StanfordCoreNLP.load(:tokenize)
|
32
31
|
entity.check_hasnt_children
|
33
|
-
text = ::StanfordCoreNLP::
|
34
|
-
Text.new(entity.to_s)
|
32
|
+
text = ::StanfordCoreNLP::Annotation.new(entity.to_s)
|
35
33
|
@@tokenizer.annotate(text)
|
36
34
|
add_tokens(entity, text.get(:tokens), options)
|
37
35
|
end
|
data/lib/treat.rb
CHANGED
data/spec/helper.rb
CHANGED
@@ -1,20 +1,23 @@
|
|
1
1
|
require_relative '../lib/treat'
|
2
2
|
|
3
|
+
include Treat::Core::DSL
|
4
|
+
|
3
5
|
module Treat::Specs
|
4
6
|
|
5
7
|
require 'rspec'
|
6
8
|
|
7
9
|
# Some configuration options for devel.
|
10
|
+
|
8
11
|
Treat.databases.mongo.db = 'treat_test'
|
9
12
|
Treat.libraries.stanford.model_path =
|
10
|
-
'/ruby/stanford
|
13
|
+
'/ruby/stanford-core-nlp-minimal/models/'
|
11
14
|
Treat.libraries.stanford.jar_path =
|
12
|
-
'/ruby/stanford
|
15
|
+
'/ruby/stanford-core-nlp-minimal/bin/'
|
13
16
|
Treat.libraries.punkt.model_path =
|
14
|
-
'/ruby/punkt/'
|
17
|
+
'/ruby/punkt/models/'
|
15
18
|
Treat.libraries.reuters.model_path =
|
16
|
-
'/ruby/reuters/'
|
17
|
-
|
19
|
+
'/ruby/reuters/models/'
|
20
|
+
|
18
21
|
ModuleFiles = ['entities/*.rb', 'learning/*.rb']
|
19
22
|
|
20
23
|
# Provide helper functions for running specs.
|
@@ -25,7 +28,6 @@ module Treat::Specs
|
|
25
28
|
require 'simplecov'
|
26
29
|
SimpleCov.start do
|
27
30
|
add_filter '/spec/'
|
28
|
-
add_filter '/config/'
|
29
31
|
add_group 'Core', 'treat/core'
|
30
32
|
add_group 'Entities', 'treat/entities'
|
31
33
|
add_group 'Helpers', 'treat/helpers'
|
data/spec/sandbox.rb
CHANGED
@@ -1,13 +1,25 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
require_relative '../lib/treat'
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
4
|
+
Treat.databases.mongo.db = 'treat_test'
|
5
|
+
Treat.libraries.stanford.model_path =
|
6
|
+
'/ruby/stanford-core-nlp-minimal/models/'
|
7
|
+
Treat.libraries.stanford.jar_path =
|
8
|
+
'/ruby/stanford-core-nlp-minimal/bin/'
|
9
|
+
Treat.libraries.punkt.model_path =
|
10
|
+
'/ruby/punkt/models/'
|
11
|
+
Treat.libraries.reuters.model_path =
|
12
|
+
'/ruby/reuters/models/'
|
13
|
+
|
14
|
+
# include Treat::Core::DSL
|
15
|
+
|
16
|
+
Treat::Builder.new do
|
17
|
+
s = sentence "Hello, world!"
|
18
|
+
s.print_tree
|
19
|
+
end
|
10
20
|
|
21
|
+
p = paragraph('A walk in the park. A trip on a boat.').segment
|
22
|
+
p.visualize :dot, file: 'test.dot'
|
11
23
|
=begin
|
12
24
|
|
13
25
|
g = group("I was running")
|