treat 1.1.2 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +1 -1
- data/lib/treat/config/core/acronyms.rb +2 -1
- data/lib/treat/config/libraries/punkt.rb +1 -0
- data/lib/treat/config/libraries/reuters.rb +1 -0
- data/lib/treat/core/data_set.rb +125 -66
- data/lib/treat/core/export.rb +59 -0
- data/lib/treat/core/problem.rb +101 -18
- data/lib/treat/core/question.rb +23 -7
- data/lib/treat/entities/abilities/iterable.rb +7 -3
- data/lib/treat/entities/abilities/stringable.rb +5 -5
- data/lib/treat/entities/collection.rb +10 -6
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/helpers/objtohash.rb +8 -0
- data/lib/treat/loaders/stanford.rb +10 -8
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
- data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
- data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
- data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
- data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
- data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
- data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
- data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
- data/lib/treat/workers/processors/parsers/enju.rb +17 -17
- data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
- data/spec/collection.rb +3 -3
- data/spec/core.rb +430 -21
- data/spec/document.rb +1 -1
- data/spec/entity.rb +2 -8
- data/spec/helper.rb +34 -0
- data/spec/phrase.rb +1 -1
- data/spec/sandbox.rb +31 -8
- data/spec/token.rb +1 -1
- data/spec/treat.rb +1 -1
- data/spec/word.rb +1 -1
- data/spec/zone.rb +1 -1
- metadata +9 -8
- data/files/3_2_release_notes.html +0 -766
- data/files/bc-monty-robinson-sentencing.html +0 -1569
- data/files/syria-aleppo-clashes.html +0 -1376
- data/lib/treat/core/feature.rb +0 -42
- data/lib/treat/core/node.rb +0 -251
- data/spec/node.rb +0 -117
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.
|
1
|
+
Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.2
|
2
2
|
|
3
3
|
This program is free software: you can redistribute it and/or modify
|
4
4
|
it under the terms of the GNU General Public License as published by
|
@@ -0,0 +1 @@
|
|
1
|
+
{model_path: nil}
|
@@ -0,0 +1 @@
|
|
1
|
+
{model_path: nil}
|
data/lib/treat/core/data_set.rb
CHANGED
@@ -3,11 +3,6 @@
|
|
3
3
|
# have already been classified, complete with
|
4
4
|
# references to these entities.
|
5
5
|
class Treat::Core::DataSet
|
6
|
-
|
7
|
-
# Used to serialize Procs.
|
8
|
-
silence_warnings do
|
9
|
-
require 'sourcify'
|
10
|
-
end
|
11
6
|
|
12
7
|
# The classification problem this
|
13
8
|
# data set holds data for.
|
@@ -15,68 +10,143 @@ class Treat::Core::DataSet
|
|
15
10
|
# Items that have been already
|
16
11
|
# classified (training data).
|
17
12
|
attr_accessor :items
|
18
|
-
# References to the IDs of the
|
19
|
-
# original entities contained
|
20
|
-
# in the data set.
|
21
|
-
attr_accessor :entities
|
22
13
|
|
23
|
-
# Initialize the DataSet.
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
14
|
+
# Initialize the DataSet.
|
15
|
+
def initialize(problem)
|
16
|
+
unless problem.is_a?(Treat::Core::Problem)
|
17
|
+
raise Treat::Exception, "The first argument " +
|
18
|
+
"to initialize should be an instance of " +
|
19
|
+
"Treat::Core::Problem."
|
20
|
+
end
|
21
|
+
@problem, @items = problem, []
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.build(from)
|
25
|
+
if from.is_a?(Hash)
|
26
|
+
Treat::Core::DataSet.unserialize(
|
27
|
+
Treat.databases.default.adapter, from)
|
28
|
+
elsif from.is_a?(String)
|
29
|
+
unless File.readable?(from)
|
30
|
+
raise Treat::Exception,
|
31
|
+
"Attempting to initialize data set from "
|
32
|
+
"file #{from}, but it is not readable."
|
33
|
+
end
|
34
|
+
Treat::Core::DataSet.unserialize(
|
35
|
+
extension, file: from)
|
39
36
|
end
|
40
37
|
end
|
41
38
|
|
42
|
-
# Add an entity to the data set.
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
# data set, along with the ID
|
49
|
-
# of the entity.
|
39
|
+
# Add an entity to the data set. The
|
40
|
+
# entity's relevant features are
|
41
|
+
# calculated based on the classification
|
42
|
+
# problem, and a line with the results
|
43
|
+
# of the calculation is added to the
|
44
|
+
# data set, along with the ID of the entity.
|
50
45
|
def <<(entity)
|
51
|
-
@items <<
|
52
|
-
|
53
|
-
@
|
46
|
+
@items << {
|
47
|
+
tags: (!@problem.tags.empty? ?
|
48
|
+
@problem.export_tags(entity) : []),
|
49
|
+
features: @problem.
|
50
|
+
export_features(entity),
|
51
|
+
id: entity.id }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Serialize the data set to a file,
|
55
|
+
# or store it inside the database.
|
56
|
+
def serialize(handler, options = {})
|
57
|
+
send("to_#{handler}", options)
|
54
58
|
end
|
55
59
|
|
56
|
-
#
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
#
|
63
|
-
def
|
60
|
+
# Unserialize a data set file created
|
61
|
+
# by using the #serialize method.
|
62
|
+
def self.unserialize(handler, options)
|
63
|
+
self.send("from_#{handler}", options)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Serialize the data set using Marshal.
|
67
|
+
def to_marshal(options)
|
68
|
+
file = options[:file]
|
64
69
|
problem = @problem.dup
|
65
70
|
problem.features.each do |feature|
|
66
|
-
|
67
|
-
|
71
|
+
feature.proc = nil
|
72
|
+
end
|
73
|
+
problem.tags.each do |tag|
|
74
|
+
tag.proc = nil
|
68
75
|
end
|
69
|
-
data = [problem, @items
|
76
|
+
data = [problem, @items]
|
70
77
|
File.open(file, 'w') do |f|
|
71
78
|
f.write(Marshal.dump(data))
|
72
79
|
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Unserialize the data using Marshal.
|
83
|
+
def self.from_marshal(options)
|
84
|
+
file = options[:file]
|
85
|
+
data = Marshal.load(File.binread(file))
|
86
|
+
problem, items = *data
|
73
87
|
problem.features.each do |feature|
|
74
|
-
next unless feature.
|
75
|
-
|
76
|
-
|
88
|
+
next unless feature.proc_string
|
89
|
+
feature.proc = eval(feature.proc_string)
|
90
|
+
end
|
91
|
+
problem.tags.each do |tag|
|
92
|
+
next unless tag.proc_string
|
93
|
+
tag.proc = eval(tag.proc_string)
|
94
|
+
end
|
95
|
+
data_set = Treat::Core::DataSet.new(problem)
|
96
|
+
data_set.items = items
|
97
|
+
data_set
|
98
|
+
end
|
99
|
+
|
100
|
+
# Serialize the data set to a MongoDB record.
|
101
|
+
def to_mongo(options)
|
102
|
+
require 'mongo'
|
103
|
+
host = options[:host] || Treat.databases.mongo.host
|
104
|
+
db = options[:db] || Treat.databases.mongo.db
|
105
|
+
# UNLESS HOST, UNLESS DB
|
106
|
+
database = Mongo::Connection.new(host).db(db)
|
107
|
+
database.collection('problems').update(
|
108
|
+
{id: @problem.id}, @problem.to_hash, {upsert: true})
|
109
|
+
feature_labels = @problem.feature_labels
|
110
|
+
feature_labels << @problem.question.name
|
111
|
+
tag_labels = @problem.tag_labels
|
112
|
+
tags = @problem.tags.map { |t| t.name }
|
113
|
+
data = database.collection('data')
|
114
|
+
pid = @problem.id
|
115
|
+
@items.each do |item|
|
116
|
+
item[:features] = Hash[feature_labels.zip(item[:features])]
|
117
|
+
item[:tags] = Hash[tag_labels.zip(item[:tags])]
|
118
|
+
item[:problem] = pid
|
119
|
+
data.insert(item)
|
77
120
|
end
|
78
121
|
end
|
79
122
|
|
123
|
+
def self.from_mongo(options)
|
124
|
+
require 'mongo'
|
125
|
+
host = options.delete(:host) || Treat.databases.mongo.host
|
126
|
+
db = options.delete(:db) || Treat.databases.mongo.db
|
127
|
+
database = Mongo::Connection.new(host).db(db)
|
128
|
+
p_record = database.collection('problems').
|
129
|
+
find_one(id: options[:problem])
|
130
|
+
unless p_record
|
131
|
+
raise Treat::Exception,
|
132
|
+
"Couldn't retrieve problem ID #{options[:problem]}."
|
133
|
+
end
|
134
|
+
problem = Treat::Core::Problem.from_hash(p_record)
|
135
|
+
data = database.collection('data').find(options).to_a
|
136
|
+
items = []
|
137
|
+
data.each do |datum|
|
138
|
+
datum.delete("_id"); datum.delete('problem')
|
139
|
+
item = {}
|
140
|
+
item[:features] = datum['features'].values
|
141
|
+
item[:tags] = datum['tags'].values
|
142
|
+
item[:id] = datum['id']
|
143
|
+
items << item
|
144
|
+
end
|
145
|
+
data_set = Treat::Core::DataSet.new(problem)
|
146
|
+
data_set.items = items
|
147
|
+
data_set
|
148
|
+
end
|
149
|
+
|
80
150
|
# Merge another data set into this one.
|
81
151
|
def merge(data_set)
|
82
152
|
if data_set.problem != @problem
|
@@ -84,25 +154,14 @@ class Treat::Core::DataSet
|
|
84
154
|
"Cannot merge two data sets that " +
|
85
155
|
"don't reference the same problem."
|
86
156
|
else
|
87
|
-
@items
|
88
|
-
@entities << data_set.entities
|
157
|
+
@items += data_set.items
|
89
158
|
end
|
90
159
|
end
|
91
160
|
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
problem, items, entities = *data
|
97
|
-
problem.features.each do |feature|
|
98
|
-
next unless feature.proc
|
99
|
-
source = feature.proc[5..-1]
|
100
|
-
feature.proc = eval("Proc.new #{source}")
|
101
|
-
end
|
102
|
-
data_set = Treat::Core::DataSet.new(problem)
|
103
|
-
data_set.items = items
|
104
|
-
data_set.entities = entities
|
105
|
-
data_set
|
161
|
+
# Compare with other data set.
|
162
|
+
def ==(data_set)
|
163
|
+
@problem == data_set.problem &&
|
164
|
+
@items == data_set.items
|
106
165
|
end
|
107
166
|
|
108
167
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Represents a feature to be used
|
2
|
+
# in a classification task.
|
3
|
+
class Treat::Core::Export
|
4
|
+
|
5
|
+
# The name of the feature. If no
|
6
|
+
# proc is supplied, this assumes
|
7
|
+
# that the target of your classification
|
8
|
+
# problem responds to the method
|
9
|
+
# corresponding to this name.
|
10
|
+
attr_reader :name
|
11
|
+
# The feature's default value, if nil.
|
12
|
+
attr_reader :default
|
13
|
+
# A proc that can be used to perform
|
14
|
+
# calculations before storing a feature.
|
15
|
+
attr_accessor :proc
|
16
|
+
# The proc as a string value.
|
17
|
+
attr_accessor :proc_string
|
18
|
+
|
19
|
+
# Initialize a feature for a classification problem.
|
20
|
+
def initialize(name, default = nil, proc_string = nil)
|
21
|
+
unless name.is_a?(Symbol)
|
22
|
+
raise Treat::Exception,
|
23
|
+
"The first argument to initialize should "+
|
24
|
+
"be a symbol representing the name of the export."
|
25
|
+
end
|
26
|
+
if proc_string && !proc_string.is_a?(String)
|
27
|
+
raise Treat::Exception,
|
28
|
+
"The third argument to initialize, " +
|
29
|
+
"if supplied, should be a string that " +
|
30
|
+
"can be evaluated to yield a Proc."
|
31
|
+
end
|
32
|
+
@name, @default, @proc_string =
|
33
|
+
name, default, proc_string
|
34
|
+
begin
|
35
|
+
@proc = proc_string ?
|
36
|
+
eval(proc_string) : nil
|
37
|
+
rescue Exception => e
|
38
|
+
raise Treat::Exception,
|
39
|
+
"The third argument to initialize " +
|
40
|
+
"did not evaluate without errors " +
|
41
|
+
"(#{e.message})."
|
42
|
+
end
|
43
|
+
if @proc && !@proc.is_a?(Proc)
|
44
|
+
raise Treat::Exception,
|
45
|
+
"The third argument did not evaluate to a Proc."
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Custom comparison operator for features.
|
50
|
+
def ==(feature)
|
51
|
+
@name == feature.name &&
|
52
|
+
@default == feature.default &&
|
53
|
+
@proc_string == feature.proc_string
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class Treat::Core::Feature < Treat::Core::Export; end
|
59
|
+
class Treat::Core::Tag < Treat::Core::Export; end
|
data/lib/treat/core/problem.rb
CHANGED
@@ -4,27 +4,56 @@
|
|
4
4
|
# to attempt to answer that question?
|
5
5
|
class Treat::Core::Problem
|
6
6
|
|
7
|
+
# A unique identifier for the problem.
|
8
|
+
attr_accessor :id
|
7
9
|
# The question we are trying to answer.
|
8
10
|
attr_reader :question
|
9
11
|
# An array of features that will be
|
10
12
|
# looked at in trying to answer the
|
11
13
|
# problem's question.
|
12
14
|
attr_reader :features
|
15
|
+
attr_reader :tags
|
13
16
|
# Just the labels from the features.
|
14
|
-
attr_reader :
|
17
|
+
attr_reader :feature_labels
|
18
|
+
attr_reader :tag_labels
|
15
19
|
|
16
20
|
# Initialize the problem with a question
|
17
|
-
# and an arbitrary number of features.
|
18
|
-
def initialize(question, *
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
# and an arbitrary number of features. # FIXME: init with id!?
|
22
|
+
def initialize(question, *exports)
|
23
|
+
unless question.is_a?(Treat::Core::Question)
|
24
|
+
raise Treat::Exception,
|
25
|
+
"The first argument to initialize " +
|
26
|
+
"should be an instance of " +
|
27
|
+
"Treat::Core::Question."
|
28
|
+
end
|
29
|
+
if exports.any? { |f| !f.is_a?(Treat::Core::Export) }
|
30
|
+
raise Treat::Exception,
|
31
|
+
"The second argument and all subsequent ones " +
|
32
|
+
"to initialize should be instances of subclasses " +
|
33
|
+
"of Treat::Core::Export."
|
34
|
+
end
|
35
|
+
@question, @id = question, object_id
|
36
|
+
@features = exports.select do |exp|
|
37
|
+
exp.is_a?(Treat::Core::Feature)
|
38
|
+
end
|
39
|
+
if @features.size == 0
|
40
|
+
raise Treat::Exception,
|
41
|
+
"Problem should be supplied with at least "+
|
42
|
+
"one feature to work with."
|
43
|
+
end
|
44
|
+
@tags = exports.select do |exp|
|
45
|
+
exp.is_a?(Treat::Core::Tag)
|
46
|
+
end
|
47
|
+
@feature_labels = @features.map { |f| f.name }
|
48
|
+
@tag_labels = @tags.map { |t| t.name }
|
22
49
|
end
|
23
50
|
|
24
51
|
# Custom comparison for problems.
|
52
|
+
# Should we check for ID here ? FIXME
|
25
53
|
def ==(problem)
|
26
54
|
@question == problem.question &&
|
27
|
-
@features == problem.features
|
55
|
+
@features == problem.features &&
|
56
|
+
@tags == problem.tags
|
28
57
|
end
|
29
58
|
|
30
59
|
# Return an array of all the entity's
|
@@ -32,18 +61,72 @@ class Treat::Core::Problem
|
|
32
61
|
# If include_answer is set to true, will
|
33
62
|
# append the answer to the problem after
|
34
63
|
# all of the features.
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
feature.proc.call(e) :
|
40
|
-
e.send(feature.name)
|
41
|
-
line << (r || feature.default)
|
42
|
-
end
|
43
|
-
return line unless include_answer
|
44
|
-
line << (e.has?(@question.name) ?
|
64
|
+
def export_features(e, include_answer = true)
|
65
|
+
features = export(e, @features)
|
66
|
+
return features unless include_answer
|
67
|
+
features << (e.has?(@question.name) ?
|
45
68
|
e.get(@question.name) : @question.default)
|
46
|
-
|
69
|
+
features
|
70
|
+
end
|
71
|
+
|
72
|
+
def export_tags(entity)
|
73
|
+
if @tags.empty?
|
74
|
+
raise Treat::Exception,
|
75
|
+
"Cannot export the tags, because " +
|
76
|
+
"this problem doesn't have any."
|
77
|
+
end
|
78
|
+
export(entity, @tags)
|
79
|
+
end
|
80
|
+
|
81
|
+
def export(entity, exports)
|
82
|
+
unless @question.target == entity.type
|
83
|
+
raise Treat::Exception,
|
84
|
+
"This classification problem targets #{@question.target}s, " +
|
85
|
+
"but a(n) #{entity.type} was passed to export instead."
|
86
|
+
end
|
87
|
+
ret = []
|
88
|
+
exports.each do |export|
|
89
|
+
r = export.proc ?
|
90
|
+
export.proc.call(entity) :
|
91
|
+
entity.send(export.name)
|
92
|
+
ret << (r || export.default)
|
93
|
+
end
|
94
|
+
ret
|
95
|
+
end
|
96
|
+
|
97
|
+
def to_hash
|
98
|
+
{'question' => object_to_hash(@question),
|
99
|
+
'features' => @features.map { |f|
|
100
|
+
object_to_hash(f.tap { |f| f.proc = nil }) },
|
101
|
+
'tags' => @tags.map { |t|
|
102
|
+
object_to_hash(t.tap { |t| t.proc = nil }) },
|
103
|
+
'id' => @id }
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.from_hash(hash)
|
107
|
+
question = Treat::Core::Question.new(
|
108
|
+
hash['question']['name'],
|
109
|
+
hash['question']['target'],
|
110
|
+
hash['question']['type'],
|
111
|
+
hash['question']['default'],
|
112
|
+
hash['question']['labels']
|
113
|
+
)
|
114
|
+
features = []
|
115
|
+
hash['features'].each do |feature|
|
116
|
+
features << Treat::Core::Feature.new(
|
117
|
+
feature['name'], feature['default'],
|
118
|
+
feature['proc_string'])
|
119
|
+
end
|
120
|
+
tags = []
|
121
|
+
hash['tags'].each do |tag|
|
122
|
+
tags << Treat::Core::Tag.new(
|
123
|
+
tag['name'], tag['default'],
|
124
|
+
tag['proc_string'])
|
125
|
+
end
|
126
|
+
features_and_tags = features + tags
|
127
|
+
p = Treat::Core::Problem.new(question, *features_and_tags)
|
128
|
+
p.id = hash['id']
|
129
|
+
p
|
47
130
|
end
|
48
131
|
|
49
132
|
end
|
data/lib/treat/core/question.rb
CHANGED
@@ -8,20 +8,35 @@ class Treat::Core::Question
|
|
8
8
|
# also be used as the annotation name
|
9
9
|
# for the answer to the question.
|
10
10
|
attr_reader :name
|
11
|
-
# Can be :continuous or :discrete,
|
12
|
-
# depending on the features used.
|
13
|
-
attr_reader :type
|
14
11
|
# Defines the target of the question
|
15
12
|
# (e.g. :sentence, :paragraph, etc.)
|
16
13
|
attr_reader :target
|
14
|
+
# Can be :continuous or :discrete,
|
15
|
+
# depending on the features used.
|
16
|
+
attr_reader :type
|
17
17
|
# Default for the answer to the question.
|
18
18
|
attr_reader :default
|
19
|
+
# A list of possible answers to the question.
|
20
|
+
attr_reader :labels
|
19
21
|
|
20
22
|
# Initialize the question.
|
21
23
|
def initialize(name, target,
|
22
|
-
type = :continuous, default = nil)
|
23
|
-
|
24
|
-
|
24
|
+
type = :continuous, default = nil, labels = [])
|
25
|
+
unless name.is_a?(Symbol)
|
26
|
+
raise Treat::Exception,
|
27
|
+
"Question name should be a symbol."
|
28
|
+
end
|
29
|
+
unless Treat.core.entities.list.include?(target)
|
30
|
+
raise Treat::Exception, "Target type should be " +
|
31
|
+
"a symbol and should be one of the following: " +
|
32
|
+
Treat.core.entities.list.inspect
|
33
|
+
end
|
34
|
+
unless [:continuous, :discrete].include?(type)
|
35
|
+
raise Treat::Exception, "Type should be " +
|
36
|
+
"continuous or discrete."
|
37
|
+
end
|
38
|
+
@name, @target, @type, @default, @labels =
|
39
|
+
name, target, type, default, labels
|
25
40
|
end
|
26
41
|
|
27
42
|
# Custom comparison operator for questions.
|
@@ -29,7 +44,8 @@ class Treat::Core::Question
|
|
29
44
|
@name == question.name &&
|
30
45
|
@type == question.type &&
|
31
46
|
@target == question.target &&
|
32
|
-
@default == question.default
|
47
|
+
@default == question.default &&
|
48
|
+
@labels = question.labels
|
33
49
|
end
|
34
50
|
|
35
51
|
end
|
@@ -94,10 +94,14 @@ module Treat::Entities::Abilities::Iterable
|
|
94
94
|
end
|
95
95
|
|
96
96
|
# Number of children that have a given feature.
|
97
|
-
|
97
|
+
# Second variable to allow for passing value to check for.
|
98
|
+
def num_children_with_feature(feature, value = nil, recursive = true)
|
98
99
|
i = 0
|
99
|
-
|
100
|
-
|
100
|
+
m = method(recursive ? :each_entity : :each)
|
101
|
+
m.call do |c|
|
102
|
+
next unless c.has?(feature)
|
103
|
+
i += (value == nil ? 1 :
|
104
|
+
(c.get(feature) == value ? 1 : 0))
|
101
105
|
end
|
102
106
|
i
|
103
107
|
end
|
@@ -42,14 +42,14 @@ module Treat::Entities::Abilities::Stringable
|
|
42
42
|
if caller_method(2) == :inspect
|
43
43
|
@id.to_s
|
44
44
|
else
|
45
|
-
|
46
|
-
@
|
47
|
-
|
48
|
-
"#{
|
45
|
+
edges = []
|
46
|
+
@edges.each do |edge|
|
47
|
+
edges <<
|
48
|
+
"#{edge.target}#{edge.type}"
|
49
49
|
end
|
50
50
|
s += " --- #{short_value.inspect}" +
|
51
51
|
" --- #{@features.inspect} " +
|
52
|
-
" --- #{
|
52
|
+
" --- #{edges.inspect} "
|
53
53
|
end
|
54
54
|
s
|
55
55
|
end
|
@@ -6,12 +6,16 @@ module Treat::Entities
|
|
6
6
|
# containing the texts of the collection.
|
7
7
|
def initialize(folder = nil, id = nil)
|
8
8
|
super('', id)
|
9
|
-
if folder
|
10
|
-
|
9
|
+
if folder
|
10
|
+
if !FileTest.directory?(folder)
|
11
|
+
FileUtils.mkdir(folder)
|
12
|
+
end
|
13
|
+
set :folder, folder if folder
|
14
|
+
i = folder + '/.index'
|
15
|
+
if FileTest.directory?(i)
|
16
|
+
set :index, i
|
17
|
+
end
|
11
18
|
end
|
12
|
-
set :folder, folder if folder
|
13
|
-
i = folder + '/.index'
|
14
|
-
set :index, i if FileTest.directory?(i)
|
15
19
|
end
|
16
20
|
|
17
21
|
# Works like the default <<, but if the
|
@@ -19,7 +23,7 @@ module Treat::Entities
|
|
19
23
|
# document, then copy that collection or
|
20
24
|
# document into this collection's folder.
|
21
25
|
def <<(entities, copy = true)
|
22
|
-
unless entities.is_a?
|
26
|
+
unless entities.is_a?(Array)
|
23
27
|
entities = [entities]
|
24
28
|
end
|
25
29
|
entities.each do |entity|
|
@@ -9,18 +9,20 @@ class Treat::Loaders::Stanford
|
|
9
9
|
def self.load(language = nil)
|
10
10
|
return if @@loaded
|
11
11
|
language ||= Treat.core.language.default
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
|
13
|
+
StanfordCoreNLP.jar_path =
|
14
|
+
Treat.libraries.stanford.jar_path ||
|
15
|
+
Treat.paths.bin + 'stanford/'
|
16
|
+
|
17
|
+
StanfordCoreNLP.model_path =
|
18
|
+
Treat.libraries.stanford.model_path ||
|
19
|
+
Treat.paths.models + 'stanford/'
|
20
|
+
|
20
21
|
StanfordCoreNLP.use(language)
|
21
22
|
if Treat.core.verbosity.silence
|
22
23
|
StanfordCoreNLP.log_file = NULL_DEVICE
|
23
24
|
end
|
25
|
+
|
24
26
|
StanfordCoreNLP.bind
|
25
27
|
@@loaded = true
|
26
28
|
end
|
data/lib/treat/version.rb
CHANGED
@@ -25,7 +25,7 @@ class Treat::Workers::Formatters::Serializers::Mongo
|
|
25
25
|
|
26
26
|
@@database ||= Mongo::Connection.
|
27
27
|
new(Treat.databases.mongo.host).
|
28
|
-
db(Treat.databases.mongo.db
|
28
|
+
db(options[:db] || Treat.databases.mongo.db)
|
29
29
|
|
30
30
|
supertype = cl(Treat::Entities.const_get(
|
31
31
|
entity.type.to_s.capitalize.intern).superclass).downcase
|
@@ -35,7 +35,7 @@ class Treat::Workers::Formatters::Serializers::Mongo
|
|
35
35
|
coll = @@database.collection(supertypes)
|
36
36
|
|
37
37
|
if entity.type == :collection
|
38
|
-
docs = @@database.collection('documents')
|
38
|
+
docs = @@database.collection('documents') # Take a design decision here.
|
39
39
|
coll.update(
|
40
40
|
{id: entity.id}, self.do_serialize(entity,
|
41
41
|
options.merge({:stop_at => Treat::Entities::Document})),
|