treat 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +1 -1
- data/lib/treat/config/core/acronyms.rb +2 -1
- data/lib/treat/config/libraries/punkt.rb +1 -0
- data/lib/treat/config/libraries/reuters.rb +1 -0
- data/lib/treat/core/data_set.rb +125 -66
- data/lib/treat/core/export.rb +59 -0
- data/lib/treat/core/problem.rb +101 -18
- data/lib/treat/core/question.rb +23 -7
- data/lib/treat/entities/abilities/iterable.rb +7 -3
- data/lib/treat/entities/abilities/stringable.rb +5 -5
- data/lib/treat/entities/collection.rb +10 -6
- data/lib/treat/entities/entity.rb +1 -1
- data/lib/treat/helpers/objtohash.rb +8 -0
- data/lib/treat/loaders/stanford.rb +10 -8
- data/lib/treat/version.rb +1 -1
- data/lib/treat/workers/formatters/serializers/mongo.rb +2 -2
- data/lib/treat/workers/formatters/serializers/xml.rb +7 -7
- data/lib/treat/workers/formatters/unserializers/mongo.rb +16 -8
- data/lib/treat/workers/formatters/unserializers/xml.rb +5 -5
- data/lib/treat/workers/formatters/visualizers/dot.rb +7 -7
- data/lib/treat/workers/learners/classifiers/id3.rb +4 -3
- data/lib/treat/workers/learners/classifiers/linear.rb +53 -0
- data/lib/treat/workers/learners/classifiers/mlp.rb +5 -5
- data/lib/treat/workers/learners/classifiers/svm.rb +31 -0
- data/lib/treat/workers/lexicalizers/taggers/stanford.rb +4 -2
- data/lib/treat/workers/processors/parsers/enju.rb +17 -17
- data/lib/treat/workers/processors/segmenters/punkt.rb +3 -1
- data/spec/collection.rb +3 -3
- data/spec/core.rb +430 -21
- data/spec/document.rb +1 -1
- data/spec/entity.rb +2 -8
- data/spec/helper.rb +34 -0
- data/spec/phrase.rb +1 -1
- data/spec/sandbox.rb +31 -8
- data/spec/token.rb +1 -1
- data/spec/treat.rb +1 -1
- data/spec/word.rb +1 -1
- data/spec/zone.rb +1 -1
- metadata +9 -8
- data/files/3_2_release_notes.html +0 -766
- data/files/bc-monty-robinson-sentencing.html +0 -1569
- data/files/syria-aleppo-clashes.html +0 -1376
- data/lib/treat/core/feature.rb +0 -42
- data/lib/treat/core/node.rb +0 -251
- data/spec/node.rb +0 -117
data/LICENSE
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.
|
1
|
+
Treat - Text Retrieval, Extraction and Annotation Toolkit, v. 1.1.2
|
2
2
|
|
3
3
|
This program is free software: you can redistribute it and/or modify
|
4
4
|
it under the terms of the GNU General Public License as published by
|
@@ -0,0 +1 @@
|
|
1
|
+
{model_path: nil}
|
@@ -0,0 +1 @@
|
|
1
|
+
{model_path: nil}
|
data/lib/treat/core/data_set.rb
CHANGED
@@ -3,11 +3,6 @@
|
|
3
3
|
# have already been classified, complete with
|
4
4
|
# references to these entities.
|
5
5
|
class Treat::Core::DataSet
|
6
|
-
|
7
|
-
# Used to serialize Procs.
|
8
|
-
silence_warnings do
|
9
|
-
require 'sourcify'
|
10
|
-
end
|
11
6
|
|
12
7
|
# The classification problem this
|
13
8
|
# data set holds data for.
|
@@ -15,68 +10,143 @@ class Treat::Core::DataSet
|
|
15
10
|
# Items that have been already
|
16
11
|
# classified (training data).
|
17
12
|
attr_accessor :items
|
18
|
-
# References to the IDs of the
|
19
|
-
# original entities contained
|
20
|
-
# in the data set.
|
21
|
-
attr_accessor :entities
|
22
13
|
|
23
|
-
# Initialize the DataSet.
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
14
|
+
# Initialize the DataSet.
|
15
|
+
def initialize(problem)
|
16
|
+
unless problem.is_a?(Treat::Core::Problem)
|
17
|
+
raise Treat::Exception, "The first argument " +
|
18
|
+
"to initialize should be an instance of " +
|
19
|
+
"Treat::Core::Problem."
|
20
|
+
end
|
21
|
+
@problem, @items = problem, []
|
22
|
+
end
|
23
|
+
|
24
|
+
def self.build(from)
|
25
|
+
if from.is_a?(Hash)
|
26
|
+
Treat::Core::DataSet.unserialize(
|
27
|
+
Treat.databases.default.adapter, from)
|
28
|
+
elsif from.is_a?(String)
|
29
|
+
unless File.readable?(from)
|
30
|
+
raise Treat::Exception,
|
31
|
+
"Attempting to initialize data set from "
|
32
|
+
"file #{from}, but it is not readable."
|
33
|
+
end
|
34
|
+
Treat::Core::DataSet.unserialize(
|
35
|
+
extension, file: from)
|
39
36
|
end
|
40
37
|
end
|
41
38
|
|
42
|
-
# Add an entity to the data set.
|
43
|
-
#
|
44
|
-
#
|
45
|
-
#
|
46
|
-
#
|
47
|
-
#
|
48
|
-
# data set, along with the ID
|
49
|
-
# of the entity.
|
39
|
+
# Add an entity to the data set. The
|
40
|
+
# entity's relevant features are
|
41
|
+
# calculated based on the classification
|
42
|
+
# problem, and a line with the results
|
43
|
+
# of the calculation is added to the
|
44
|
+
# data set, along with the ID of the entity.
|
50
45
|
def <<(entity)
|
51
|
-
@items <<
|
52
|
-
|
53
|
-
@
|
46
|
+
@items << {
|
47
|
+
tags: (!@problem.tags.empty? ?
|
48
|
+
@problem.export_tags(entity) : []),
|
49
|
+
features: @problem.
|
50
|
+
export_features(entity),
|
51
|
+
id: entity.id }
|
52
|
+
end
|
53
|
+
|
54
|
+
# Serialize the data set to a file,
|
55
|
+
# or store it inside the database.
|
56
|
+
def serialize(handler, options = {})
|
57
|
+
send("to_#{handler}", options)
|
54
58
|
end
|
55
59
|
|
56
|
-
#
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
#
|
63
|
-
def
|
60
|
+
# Unserialize a data set file created
|
61
|
+
# by using the #serialize method.
|
62
|
+
def self.unserialize(handler, options)
|
63
|
+
self.send("from_#{handler}", options)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Serialize the data set using Marshal.
|
67
|
+
def to_marshal(options)
|
68
|
+
file = options[:file]
|
64
69
|
problem = @problem.dup
|
65
70
|
problem.features.each do |feature|
|
66
|
-
|
67
|
-
|
71
|
+
feature.proc = nil
|
72
|
+
end
|
73
|
+
problem.tags.each do |tag|
|
74
|
+
tag.proc = nil
|
68
75
|
end
|
69
|
-
data = [problem, @items
|
76
|
+
data = [problem, @items]
|
70
77
|
File.open(file, 'w') do |f|
|
71
78
|
f.write(Marshal.dump(data))
|
72
79
|
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Unserialize the data using Marshal.
|
83
|
+
def self.from_marshal(options)
|
84
|
+
file = options[:file]
|
85
|
+
data = Marshal.load(File.binread(file))
|
86
|
+
problem, items = *data
|
73
87
|
problem.features.each do |feature|
|
74
|
-
next unless feature.
|
75
|
-
|
76
|
-
|
88
|
+
next unless feature.proc_string
|
89
|
+
feature.proc = eval(feature.proc_string)
|
90
|
+
end
|
91
|
+
problem.tags.each do |tag|
|
92
|
+
next unless tag.proc_string
|
93
|
+
tag.proc = eval(tag.proc_string)
|
94
|
+
end
|
95
|
+
data_set = Treat::Core::DataSet.new(problem)
|
96
|
+
data_set.items = items
|
97
|
+
data_set
|
98
|
+
end
|
99
|
+
|
100
|
+
# Serialize the data set to a MongoDB record.
|
101
|
+
def to_mongo(options)
|
102
|
+
require 'mongo'
|
103
|
+
host = options[:host] || Treat.databases.mongo.host
|
104
|
+
db = options[:db] || Treat.databases.mongo.db
|
105
|
+
# UNLESS HOST, UNLESS DB
|
106
|
+
database = Mongo::Connection.new(host).db(db)
|
107
|
+
database.collection('problems').update(
|
108
|
+
{id: @problem.id}, @problem.to_hash, {upsert: true})
|
109
|
+
feature_labels = @problem.feature_labels
|
110
|
+
feature_labels << @problem.question.name
|
111
|
+
tag_labels = @problem.tag_labels
|
112
|
+
tags = @problem.tags.map { |t| t.name }
|
113
|
+
data = database.collection('data')
|
114
|
+
pid = @problem.id
|
115
|
+
@items.each do |item|
|
116
|
+
item[:features] = Hash[feature_labels.zip(item[:features])]
|
117
|
+
item[:tags] = Hash[tag_labels.zip(item[:tags])]
|
118
|
+
item[:problem] = pid
|
119
|
+
data.insert(item)
|
77
120
|
end
|
78
121
|
end
|
79
122
|
|
123
|
+
def self.from_mongo(options)
|
124
|
+
require 'mongo'
|
125
|
+
host = options.delete(:host) || Treat.databases.mongo.host
|
126
|
+
db = options.delete(:db) || Treat.databases.mongo.db
|
127
|
+
database = Mongo::Connection.new(host).db(db)
|
128
|
+
p_record = database.collection('problems').
|
129
|
+
find_one(id: options[:problem])
|
130
|
+
unless p_record
|
131
|
+
raise Treat::Exception,
|
132
|
+
"Couldn't retrieve problem ID #{options[:problem]}."
|
133
|
+
end
|
134
|
+
problem = Treat::Core::Problem.from_hash(p_record)
|
135
|
+
data = database.collection('data').find(options).to_a
|
136
|
+
items = []
|
137
|
+
data.each do |datum|
|
138
|
+
datum.delete("_id"); datum.delete('problem')
|
139
|
+
item = {}
|
140
|
+
item[:features] = datum['features'].values
|
141
|
+
item[:tags] = datum['tags'].values
|
142
|
+
item[:id] = datum['id']
|
143
|
+
items << item
|
144
|
+
end
|
145
|
+
data_set = Treat::Core::DataSet.new(problem)
|
146
|
+
data_set.items = items
|
147
|
+
data_set
|
148
|
+
end
|
149
|
+
|
80
150
|
# Merge another data set into this one.
|
81
151
|
def merge(data_set)
|
82
152
|
if data_set.problem != @problem
|
@@ -84,25 +154,14 @@ class Treat::Core::DataSet
|
|
84
154
|
"Cannot merge two data sets that " +
|
85
155
|
"don't reference the same problem."
|
86
156
|
else
|
87
|
-
@items
|
88
|
-
@entities << data_set.entities
|
157
|
+
@items += data_set.items
|
89
158
|
end
|
90
159
|
end
|
91
160
|
|
92
|
-
#
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
problem, items, entities = *data
|
97
|
-
problem.features.each do |feature|
|
98
|
-
next unless feature.proc
|
99
|
-
source = feature.proc[5..-1]
|
100
|
-
feature.proc = eval("Proc.new #{source}")
|
101
|
-
end
|
102
|
-
data_set = Treat::Core::DataSet.new(problem)
|
103
|
-
data_set.items = items
|
104
|
-
data_set.entities = entities
|
105
|
-
data_set
|
161
|
+
# Compare with other data set.
|
162
|
+
def ==(data_set)
|
163
|
+
@problem == data_set.problem &&
|
164
|
+
@items == data_set.items
|
106
165
|
end
|
107
166
|
|
108
167
|
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Represents a feature to be used
|
2
|
+
# in a classification task.
|
3
|
+
class Treat::Core::Export
|
4
|
+
|
5
|
+
# The name of the feature. If no
|
6
|
+
# proc is supplied, this assumes
|
7
|
+
# that the target of your classification
|
8
|
+
# problem responds to the method
|
9
|
+
# corresponding to this name.
|
10
|
+
attr_reader :name
|
11
|
+
# The feature's default value, if nil.
|
12
|
+
attr_reader :default
|
13
|
+
# A proc that can be used to perform
|
14
|
+
# calculations before storing a feature.
|
15
|
+
attr_accessor :proc
|
16
|
+
# The proc as a string value.
|
17
|
+
attr_accessor :proc_string
|
18
|
+
|
19
|
+
# Initialize a feature for a classification problem.
|
20
|
+
def initialize(name, default = nil, proc_string = nil)
|
21
|
+
unless name.is_a?(Symbol)
|
22
|
+
raise Treat::Exception,
|
23
|
+
"The first argument to initialize should "+
|
24
|
+
"be a symbol representing the name of the export."
|
25
|
+
end
|
26
|
+
if proc_string && !proc_string.is_a?(String)
|
27
|
+
raise Treat::Exception,
|
28
|
+
"The third argument to initialize, " +
|
29
|
+
"if supplied, should be a string that " +
|
30
|
+
"can be evaluated to yield a Proc."
|
31
|
+
end
|
32
|
+
@name, @default, @proc_string =
|
33
|
+
name, default, proc_string
|
34
|
+
begin
|
35
|
+
@proc = proc_string ?
|
36
|
+
eval(proc_string) : nil
|
37
|
+
rescue Exception => e
|
38
|
+
raise Treat::Exception,
|
39
|
+
"The third argument to initialize " +
|
40
|
+
"did not evaluate without errors " +
|
41
|
+
"(#{e.message})."
|
42
|
+
end
|
43
|
+
if @proc && !@proc.is_a?(Proc)
|
44
|
+
raise Treat::Exception,
|
45
|
+
"The third argument did not evaluate to a Proc."
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
# Custom comparison operator for features.
|
50
|
+
def ==(feature)
|
51
|
+
@name == feature.name &&
|
52
|
+
@default == feature.default &&
|
53
|
+
@proc_string == feature.proc_string
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
class Treat::Core::Feature < Treat::Core::Export; end
|
59
|
+
class Treat::Core::Tag < Treat::Core::Export; end
|
data/lib/treat/core/problem.rb
CHANGED
@@ -4,27 +4,56 @@
|
|
4
4
|
# to attempt to answer that question?
|
5
5
|
class Treat::Core::Problem
|
6
6
|
|
7
|
+
# A unique identifier for the problem.
|
8
|
+
attr_accessor :id
|
7
9
|
# The question we are trying to answer.
|
8
10
|
attr_reader :question
|
9
11
|
# An array of features that will be
|
10
12
|
# looked at in trying to answer the
|
11
13
|
# problem's question.
|
12
14
|
attr_reader :features
|
15
|
+
attr_reader :tags
|
13
16
|
# Just the labels from the features.
|
14
|
-
attr_reader :
|
17
|
+
attr_reader :feature_labels
|
18
|
+
attr_reader :tag_labels
|
15
19
|
|
16
20
|
# Initialize the problem with a question
|
17
|
-
# and an arbitrary number of features.
|
18
|
-
def initialize(question, *
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
# and an arbitrary number of features. # FIXME: init with id!?
|
22
|
+
def initialize(question, *exports)
|
23
|
+
unless question.is_a?(Treat::Core::Question)
|
24
|
+
raise Treat::Exception,
|
25
|
+
"The first argument to initialize " +
|
26
|
+
"should be an instance of " +
|
27
|
+
"Treat::Core::Question."
|
28
|
+
end
|
29
|
+
if exports.any? { |f| !f.is_a?(Treat::Core::Export) }
|
30
|
+
raise Treat::Exception,
|
31
|
+
"The second argument and all subsequent ones " +
|
32
|
+
"to initialize should be instances of subclasses " +
|
33
|
+
"of Treat::Core::Export."
|
34
|
+
end
|
35
|
+
@question, @id = question, object_id
|
36
|
+
@features = exports.select do |exp|
|
37
|
+
exp.is_a?(Treat::Core::Feature)
|
38
|
+
end
|
39
|
+
if @features.size == 0
|
40
|
+
raise Treat::Exception,
|
41
|
+
"Problem should be supplied with at least "+
|
42
|
+
"one feature to work with."
|
43
|
+
end
|
44
|
+
@tags = exports.select do |exp|
|
45
|
+
exp.is_a?(Treat::Core::Tag)
|
46
|
+
end
|
47
|
+
@feature_labels = @features.map { |f| f.name }
|
48
|
+
@tag_labels = @tags.map { |t| t.name }
|
22
49
|
end
|
23
50
|
|
24
51
|
# Custom comparison for problems.
|
52
|
+
# Should we check for ID here ? FIXME
|
25
53
|
def ==(problem)
|
26
54
|
@question == problem.question &&
|
27
|
-
@features == problem.features
|
55
|
+
@features == problem.features &&
|
56
|
+
@tags == problem.tags
|
28
57
|
end
|
29
58
|
|
30
59
|
# Return an array of all the entity's
|
@@ -32,18 +61,72 @@ class Treat::Core::Problem
|
|
32
61
|
# If include_answer is set to true, will
|
33
62
|
# append the answer to the problem after
|
34
63
|
# all of the features.
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
feature.proc.call(e) :
|
40
|
-
e.send(feature.name)
|
41
|
-
line << (r || feature.default)
|
42
|
-
end
|
43
|
-
return line unless include_answer
|
44
|
-
line << (e.has?(@question.name) ?
|
64
|
+
def export_features(e, include_answer = true)
|
65
|
+
features = export(e, @features)
|
66
|
+
return features unless include_answer
|
67
|
+
features << (e.has?(@question.name) ?
|
45
68
|
e.get(@question.name) : @question.default)
|
46
|
-
|
69
|
+
features
|
70
|
+
end
|
71
|
+
|
72
|
+
def export_tags(entity)
|
73
|
+
if @tags.empty?
|
74
|
+
raise Treat::Exception,
|
75
|
+
"Cannot export the tags, because " +
|
76
|
+
"this problem doesn't have any."
|
77
|
+
end
|
78
|
+
export(entity, @tags)
|
79
|
+
end
|
80
|
+
|
81
|
+
def export(entity, exports)
|
82
|
+
unless @question.target == entity.type
|
83
|
+
raise Treat::Exception,
|
84
|
+
"This classification problem targets #{@question.target}s, " +
|
85
|
+
"but a(n) #{entity.type} was passed to export instead."
|
86
|
+
end
|
87
|
+
ret = []
|
88
|
+
exports.each do |export|
|
89
|
+
r = export.proc ?
|
90
|
+
export.proc.call(entity) :
|
91
|
+
entity.send(export.name)
|
92
|
+
ret << (r || export.default)
|
93
|
+
end
|
94
|
+
ret
|
95
|
+
end
|
96
|
+
|
97
|
+
def to_hash
|
98
|
+
{'question' => object_to_hash(@question),
|
99
|
+
'features' => @features.map { |f|
|
100
|
+
object_to_hash(f.tap { |f| f.proc = nil }) },
|
101
|
+
'tags' => @tags.map { |t|
|
102
|
+
object_to_hash(t.tap { |t| t.proc = nil }) },
|
103
|
+
'id' => @id }
|
104
|
+
end
|
105
|
+
|
106
|
+
def self.from_hash(hash)
|
107
|
+
question = Treat::Core::Question.new(
|
108
|
+
hash['question']['name'],
|
109
|
+
hash['question']['target'],
|
110
|
+
hash['question']['type'],
|
111
|
+
hash['question']['default'],
|
112
|
+
hash['question']['labels']
|
113
|
+
)
|
114
|
+
features = []
|
115
|
+
hash['features'].each do |feature|
|
116
|
+
features << Treat::Core::Feature.new(
|
117
|
+
feature['name'], feature['default'],
|
118
|
+
feature['proc_string'])
|
119
|
+
end
|
120
|
+
tags = []
|
121
|
+
hash['tags'].each do |tag|
|
122
|
+
tags << Treat::Core::Tag.new(
|
123
|
+
tag['name'], tag['default'],
|
124
|
+
tag['proc_string'])
|
125
|
+
end
|
126
|
+
features_and_tags = features + tags
|
127
|
+
p = Treat::Core::Problem.new(question, *features_and_tags)
|
128
|
+
p.id = hash['id']
|
129
|
+
p
|
47
130
|
end
|
48
131
|
|
49
132
|
end
|
data/lib/treat/core/question.rb
CHANGED
@@ -8,20 +8,35 @@ class Treat::Core::Question
|
|
8
8
|
# also be used as the annotation name
|
9
9
|
# for the answer to the question.
|
10
10
|
attr_reader :name
|
11
|
-
# Can be :continuous or :discrete,
|
12
|
-
# depending on the features used.
|
13
|
-
attr_reader :type
|
14
11
|
# Defines the target of the question
|
15
12
|
# (e.g. :sentence, :paragraph, etc.)
|
16
13
|
attr_reader :target
|
14
|
+
# Can be :continuous or :discrete,
|
15
|
+
# depending on the features used.
|
16
|
+
attr_reader :type
|
17
17
|
# Default for the answer to the question.
|
18
18
|
attr_reader :default
|
19
|
+
# A list of possible answers to the question.
|
20
|
+
attr_reader :labels
|
19
21
|
|
20
22
|
# Initialize the question.
|
21
23
|
def initialize(name, target,
|
22
|
-
type = :continuous, default = nil)
|
23
|
-
|
24
|
-
|
24
|
+
type = :continuous, default = nil, labels = [])
|
25
|
+
unless name.is_a?(Symbol)
|
26
|
+
raise Treat::Exception,
|
27
|
+
"Question name should be a symbol."
|
28
|
+
end
|
29
|
+
unless Treat.core.entities.list.include?(target)
|
30
|
+
raise Treat::Exception, "Target type should be " +
|
31
|
+
"a symbol and should be one of the following: " +
|
32
|
+
Treat.core.entities.list.inspect
|
33
|
+
end
|
34
|
+
unless [:continuous, :discrete].include?(type)
|
35
|
+
raise Treat::Exception, "Type should be " +
|
36
|
+
"continuous or discrete."
|
37
|
+
end
|
38
|
+
@name, @target, @type, @default, @labels =
|
39
|
+
name, target, type, default, labels
|
25
40
|
end
|
26
41
|
|
27
42
|
# Custom comparison operator for questions.
|
@@ -29,7 +44,8 @@ class Treat::Core::Question
|
|
29
44
|
@name == question.name &&
|
30
45
|
@type == question.type &&
|
31
46
|
@target == question.target &&
|
32
|
-
@default == question.default
|
47
|
+
@default == question.default &&
|
48
|
+
@labels = question.labels
|
33
49
|
end
|
34
50
|
|
35
51
|
end
|
@@ -94,10 +94,14 @@ module Treat::Entities::Abilities::Iterable
|
|
94
94
|
end
|
95
95
|
|
96
96
|
# Number of children that have a given feature.
|
97
|
-
|
97
|
+
# Second variable to allow for passing value to check for.
|
98
|
+
def num_children_with_feature(feature, value = nil, recursive = true)
|
98
99
|
i = 0
|
99
|
-
|
100
|
-
|
100
|
+
m = method(recursive ? :each_entity : :each)
|
101
|
+
m.call do |c|
|
102
|
+
next unless c.has?(feature)
|
103
|
+
i += (value == nil ? 1 :
|
104
|
+
(c.get(feature) == value ? 1 : 0))
|
101
105
|
end
|
102
106
|
i
|
103
107
|
end
|
@@ -42,14 +42,14 @@ module Treat::Entities::Abilities::Stringable
|
|
42
42
|
if caller_method(2) == :inspect
|
43
43
|
@id.to_s
|
44
44
|
else
|
45
|
-
|
46
|
-
@
|
47
|
-
|
48
|
-
"#{
|
45
|
+
edges = []
|
46
|
+
@edges.each do |edge|
|
47
|
+
edges <<
|
48
|
+
"#{edge.target}#{edge.type}"
|
49
49
|
end
|
50
50
|
s += " --- #{short_value.inspect}" +
|
51
51
|
" --- #{@features.inspect} " +
|
52
|
-
" --- #{
|
52
|
+
" --- #{edges.inspect} "
|
53
53
|
end
|
54
54
|
s
|
55
55
|
end
|
@@ -6,12 +6,16 @@ module Treat::Entities
|
|
6
6
|
# containing the texts of the collection.
|
7
7
|
def initialize(folder = nil, id = nil)
|
8
8
|
super('', id)
|
9
|
-
if folder
|
10
|
-
|
9
|
+
if folder
|
10
|
+
if !FileTest.directory?(folder)
|
11
|
+
FileUtils.mkdir(folder)
|
12
|
+
end
|
13
|
+
set :folder, folder if folder
|
14
|
+
i = folder + '/.index'
|
15
|
+
if FileTest.directory?(i)
|
16
|
+
set :index, i
|
17
|
+
end
|
11
18
|
end
|
12
|
-
set :folder, folder if folder
|
13
|
-
i = folder + '/.index'
|
14
|
-
set :index, i if FileTest.directory?(i)
|
15
19
|
end
|
16
20
|
|
17
21
|
# Works like the default <<, but if the
|
@@ -19,7 +23,7 @@ module Treat::Entities
|
|
19
23
|
# document, then copy that collection or
|
20
24
|
# document into this collection's folder.
|
21
25
|
def <<(entities, copy = true)
|
22
|
-
unless entities.is_a?
|
26
|
+
unless entities.is_a?(Array)
|
23
27
|
entities = [entities]
|
24
28
|
end
|
25
29
|
entities.each do |entity|
|
@@ -9,18 +9,20 @@ class Treat::Loaders::Stanford
|
|
9
9
|
def self.load(language = nil)
|
10
10
|
return if @@loaded
|
11
11
|
language ||= Treat.core.language.default
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
12
|
+
|
13
|
+
StanfordCoreNLP.jar_path =
|
14
|
+
Treat.libraries.stanford.jar_path ||
|
15
|
+
Treat.paths.bin + 'stanford/'
|
16
|
+
|
17
|
+
StanfordCoreNLP.model_path =
|
18
|
+
Treat.libraries.stanford.model_path ||
|
19
|
+
Treat.paths.models + 'stanford/'
|
20
|
+
|
20
21
|
StanfordCoreNLP.use(language)
|
21
22
|
if Treat.core.verbosity.silence
|
22
23
|
StanfordCoreNLP.log_file = NULL_DEVICE
|
23
24
|
end
|
25
|
+
|
24
26
|
StanfordCoreNLP.bind
|
25
27
|
@@loaded = true
|
26
28
|
end
|
data/lib/treat/version.rb
CHANGED
@@ -25,7 +25,7 @@ class Treat::Workers::Formatters::Serializers::Mongo
|
|
25
25
|
|
26
26
|
@@database ||= Mongo::Connection.
|
27
27
|
new(Treat.databases.mongo.host).
|
28
|
-
db(Treat.databases.mongo.db
|
28
|
+
db(options[:db] || Treat.databases.mongo.db)
|
29
29
|
|
30
30
|
supertype = cl(Treat::Entities.const_get(
|
31
31
|
entity.type.to_s.capitalize.intern).superclass).downcase
|
@@ -35,7 +35,7 @@ class Treat::Workers::Formatters::Serializers::Mongo
|
|
35
35
|
coll = @@database.collection(supertypes)
|
36
36
|
|
37
37
|
if entity.type == :collection
|
38
|
-
docs = @@database.collection('documents')
|
38
|
+
docs = @@database.collection('documents') # Take a design decision here.
|
39
39
|
coll.update(
|
40
40
|
{id: entity.id}, self.do_serialize(entity,
|
41
41
|
options.merge({:stop_at => Treat::Entities::Document})),
|