scout-gear 10.7.2 → 10.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +46 -32
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +4 -0
- data/lib/scout/association/item.rb +1 -1
- data/lib/scout/association.rb +28 -9
- data/lib/scout/entity/identifiers.rb +2 -2
- data/lib/scout/entity/property.rb +1 -0
- data/lib/scout/knowledge_base/enrichment.rb +9 -0
- data/lib/scout/knowledge_base/entity.rb +143 -0
- data/lib/scout/knowledge_base/list.rb +95 -0
- data/lib/scout/knowledge_base/query.rb +96 -0
- data/lib/scout/knowledge_base/registry.rb +173 -0
- data/lib/scout/knowledge_base/traverse.rb +329 -0
- data/lib/scout/knowledge_base.rb +91 -0
- data/lib/scout/tsv/annotation.rb +4 -4
- data/lib/scout/tsv/index.rb +0 -2
- data/lib/scout/tsv/parser.rb +1 -1
- data/lib/scout/tsv/stream.rb +3 -3
- data/lib/scout/tsv.rb +2 -0
- data/lib/scout/workflow/step/info.rb +10 -1
- data/scout-gear.gemspec +24 -6
- data/scout_commands/kb/config +33 -0
- data/scout_commands/kb/entities +35 -0
- data/scout_commands/kb/list +39 -0
- data/scout_commands/{db → kb}/query +6 -11
- data/scout_commands/{db → kb}/register +9 -8
- data/scout_commands/{db → kb}/show +6 -16
- data/scout_commands/kb/traverse +66 -0
- data/test/data/person/brothers +1 -1
- data/test/scout/entity/test_identifiers.rb +3 -3
- data/test/scout/knowledge_base/test_enrichment.rb +0 -0
- data/test/scout/knowledge_base/test_entity.rb +38 -0
- data/test/scout/knowledge_base/test_list.rb +40 -0
- data/test/scout/knowledge_base/test_query.rb +39 -0
- data/test/scout/knowledge_base/test_registry.rb +16 -0
- data/test/scout/knowledge_base/test_traverse.rb +245 -0
- data/test/scout/test_association.rb +17 -3
- data/test/scout/test_entity.rb +0 -15
- data/test/scout/test_knowledge_base.rb +27 -0
- data/test/test_helper.rb +17 -0
- metadata +23 -5
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'scout/association'
|
2
|
+
require 'scout/association/item'
|
3
|
+
|
4
|
+
class KnowledgeBase
|
5
|
+
def register(name, file = nil, options = {}, &block)
|
6
|
+
file = file.find if Path === file
|
7
|
+
@registry ||= IndiferentHash.setup({})
|
8
|
+
if block_given?
|
9
|
+
block.define_singleton_method(:filename) do name.to_s end
|
10
|
+
Log.debug("Registering #{ name } from code block")
|
11
|
+
@registry[name] = [block, options]
|
12
|
+
else
|
13
|
+
Log.debug("Registering #{ name }: #{ Log.fingerprint file } #{Log.fingerprint options}")
|
14
|
+
@registry[name] = [file, options]
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def all_databases
|
19
|
+
return [] unless @registry
|
20
|
+
@registry.keys
|
21
|
+
end
|
22
|
+
|
23
|
+
def include?(name)
|
24
|
+
all_databases.include? name
|
25
|
+
end
|
26
|
+
|
27
|
+
def fields(name)
|
28
|
+
@fields ||= {}
|
29
|
+
@fields[name] ||= get_index(name).fields
|
30
|
+
end
|
31
|
+
|
32
|
+
def description(name)
|
33
|
+
@descriptions ||= {}
|
34
|
+
@descriptions[name] ||= get_index(name).key_field.split("~")
|
35
|
+
end
|
36
|
+
|
37
|
+
def source(name)
|
38
|
+
description(name)[0]
|
39
|
+
end
|
40
|
+
|
41
|
+
def target(name)
|
42
|
+
description(name)[1]
|
43
|
+
end
|
44
|
+
|
45
|
+
def undirected(name)
|
46
|
+
description(name)[2]
|
47
|
+
end
|
48
|
+
|
49
|
+
def get_index(name, options = {})
|
50
|
+
name = name.to_s
|
51
|
+
options[:namespace] ||= self.namespace unless self.namespace.nil?
|
52
|
+
@indices ||= IndiferentHash.setup({})
|
53
|
+
@indices[[name, options]] ||=
|
54
|
+
begin
|
55
|
+
if options.empty?
|
56
|
+
key = name.to_s
|
57
|
+
elsif options[:key]
|
58
|
+
key = options[:key]
|
59
|
+
key = name if key == :name
|
60
|
+
else
|
61
|
+
fp = Misc.digest(options)
|
62
|
+
key = name.to_s + "_" + fp
|
63
|
+
end
|
64
|
+
|
65
|
+
Persist.memory("Index:" << [key, dir] * "@") do
|
66
|
+
options = options.dup
|
67
|
+
|
68
|
+
persist_dir = dir
|
69
|
+
persist_path = persist_dir[key].find
|
70
|
+
file, registered_options = registry[name]
|
71
|
+
|
72
|
+
options = IndiferentHash.add_defaults options, registered_options if registered_options and registered_options.any?
|
73
|
+
options = IndiferentHash.add_defaults options, :persist_path => persist_path, :persist_dir => persist_dir, :persist => true
|
74
|
+
|
75
|
+
if entity_options
|
76
|
+
options[:entity_options] ||= {}
|
77
|
+
entity_options.each do |type, info|
|
78
|
+
options[:entity_options][type] ||= {}
|
79
|
+
options[:entity_options][type] = IndiferentHash.add_defaults options[:entity_options][type], info
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
persist_options = IndiferentHash.pull_keys options, :persist
|
84
|
+
persist_options = IndiferentHash.add_defaults persist_options
|
85
|
+
|
86
|
+
index = if persist_path.exists? and persist_options[:persist] and not persist_options[:update]
|
87
|
+
Log.low "Re-opening index #{ name } from #{ Log.fingerprint persist_path }. #{options}"
|
88
|
+
Association.index(file, **options, persist_options: persist_options.dup)
|
89
|
+
else
|
90
|
+
options = IndiferentHash.add_defaults options, registered_options if registered_options
|
91
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
92
|
+
Log.medium "Opening index #{ name } from #{ Log.fingerprint file }. #{options}"
|
93
|
+
file = file.call if Proc === file
|
94
|
+
Association.index(file, **options, persist_options: persist_options.dup)
|
95
|
+
end
|
96
|
+
|
97
|
+
index.namespace = self.namespace unless self.namespace
|
98
|
+
|
99
|
+
index
|
100
|
+
end
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
def get_database(name, options = {})
|
105
|
+
options = options.dup
|
106
|
+
if self.namespace == options[:namespace]
|
107
|
+
options.delete(:namespace)
|
108
|
+
end
|
109
|
+
@databases ||= IndiferentHash.setup({})
|
110
|
+
@databases[[name, options]] ||=
|
111
|
+
begin
|
112
|
+
fp = Log.fingerprint([name,options])
|
113
|
+
|
114
|
+
if options.empty?
|
115
|
+
key = name.to_s
|
116
|
+
else
|
117
|
+
fp = Misc.digest(options)
|
118
|
+
key = name.to_s + "_" + fp
|
119
|
+
end
|
120
|
+
|
121
|
+
options[:namespace] ||= self.namespace unless self.namespace.nil?
|
122
|
+
|
123
|
+
key += '.database'
|
124
|
+
Persist.memory("Database:" << [key, dir] * "@") do
|
125
|
+
options = options.dup
|
126
|
+
|
127
|
+
persist_dir = dir
|
128
|
+
persist_path = persist_dir[key].find
|
129
|
+
file, registered_options = registry[name]
|
130
|
+
|
131
|
+
options = IndiferentHash.add_defaults options, registered_options if registered_options and registered_options.any?
|
132
|
+
options = IndiferentHash.add_defaults options, :persist_path => persist_path, :persist => true
|
133
|
+
|
134
|
+
if entity_options
|
135
|
+
options[:entity_options] ||= {}
|
136
|
+
entity_options.each do |type, info|
|
137
|
+
options[:entity_options][type] ||= {}
|
138
|
+
options[:entity_options][type] = IndiferentHash.add_defaults options[:entity_options][type], info
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
persist_options = IndiferentHash.pull_keys options, :persist
|
143
|
+
|
144
|
+
database = if persist_path.exists? and persist_options[:persist] and not persist_options[:update]
|
145
|
+
Log.low "Re-opening database #{ name } from #{ Log.fingerprint persist_path }. #{options}"
|
146
|
+
#Association.database(file, **options, persist_options: persist_options)
|
147
|
+
Association.database(file, **options.merge(persist_options: persist_options))
|
148
|
+
else
|
149
|
+
options = IndiferentHash.add_defaults options, registered_options if registered_options
|
150
|
+
undirected = IndiferentHash.process_options options, :undirected
|
151
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
152
|
+
Log.medium "Opening database #{ name } from #{ Log.fingerprint file }. #{options}"
|
153
|
+
file = file.call if Proc === file
|
154
|
+
#Association.database(file, **options, persist_options: persist_options)
|
155
|
+
Association.database(file, **options.merge(persist_options: persist_options))
|
156
|
+
end
|
157
|
+
|
158
|
+
database.namespace = self.namespace if self.namespace
|
159
|
+
|
160
|
+
database
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
164
|
+
|
165
|
+
def index_fields(name)
|
166
|
+
get_index(name).fields
|
167
|
+
end
|
168
|
+
|
169
|
+
def produce(name, *rest,&block)
|
170
|
+
register(name, *rest, &block)
|
171
|
+
get_index(name)
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,329 @@
|
|
1
|
+
class KnowledgeBase
|
2
|
+
|
3
|
+
class Traverser
|
4
|
+
attr_accessor :rules, :assignments, :matches, :kb
|
5
|
+
|
6
|
+
def initialize(kb, rules = [])
|
7
|
+
@kb = kb
|
8
|
+
@rules = rules
|
9
|
+
@assignments = {}
|
10
|
+
@matches = {}
|
11
|
+
end
|
12
|
+
|
13
|
+
def wildcard(name)
|
14
|
+
return name unless is_wildcard?(name)
|
15
|
+
assignments[name] || name
|
16
|
+
end
|
17
|
+
|
18
|
+
def is_wildcard?(name)
|
19
|
+
name[0] == '?'
|
20
|
+
end
|
21
|
+
|
22
|
+
def is_list?(name)
|
23
|
+
name[0] == ':'
|
24
|
+
end
|
25
|
+
|
26
|
+
def identify(db, source, target)
|
27
|
+
source_entities = if is_wildcard? source
|
28
|
+
assignments[source] || :all
|
29
|
+
elsif is_list? source
|
30
|
+
kb.load_list(source[1..-1])
|
31
|
+
else
|
32
|
+
kb.identify_source db, source
|
33
|
+
end
|
34
|
+
|
35
|
+
target_entities = if is_wildcard? target
|
36
|
+
assignments[target] || :all
|
37
|
+
elsif is_list? target
|
38
|
+
kb.load_list(target[1..-1])
|
39
|
+
else
|
40
|
+
kb.identify_target db, target
|
41
|
+
end
|
42
|
+
|
43
|
+
source_entities = [source_entities] unless Array === source_entities or source_entities == :all
|
44
|
+
target_entities = [target_entities] unless Array === target_entities or target_entities == :all
|
45
|
+
|
46
|
+
[source_entities, target_entities]
|
47
|
+
end
|
48
|
+
|
49
|
+
def reassign(matches, source, target)
|
50
|
+
#assignments[source] = (matches.any? ? matches.collect{|m| m.source_entity }.uniq : nil) if is_wildcard? source
|
51
|
+
#assignments[target] = (matches.any? ? matches.collect{|m| m.target_entity }.uniq : nil) if is_wildcard? target
|
52
|
+
assignments[source] = (matches.any? ? matches.source_entity.uniq : nil) if is_wildcard? source
|
53
|
+
assignments[target] = (matches.any? ? matches.target_entity.uniq : nil) if is_wildcard? target
|
54
|
+
end
|
55
|
+
|
56
|
+
def clean_matches(rules, all_matches, assignments)
|
57
|
+
paths = {}
|
58
|
+
|
59
|
+
rules.zip(all_matches).each do |rule, matches|
|
60
|
+
source, db, target = rule.split /\s+/
|
61
|
+
next if matches.nil?
|
62
|
+
|
63
|
+
if is_wildcard? source
|
64
|
+
assigned = assignments[source] || []
|
65
|
+
matches = matches.select{|m| assigned.include? m.partition("~").first }
|
66
|
+
end
|
67
|
+
|
68
|
+
if is_wildcard? target
|
69
|
+
assigned = assignments[target] || []
|
70
|
+
matches = matches.select{|m| assigned.include? m.partition("~").last }
|
71
|
+
end
|
72
|
+
|
73
|
+
paths[rule] = matches
|
74
|
+
end
|
75
|
+
|
76
|
+
paths
|
77
|
+
end
|
78
|
+
|
79
|
+
def _fp(rules, clean_matches, assignments)
|
80
|
+
return true if rules.empty?
|
81
|
+
|
82
|
+
rule, *rest = rules
|
83
|
+
source, db, target = rule.split /\s+/
|
84
|
+
|
85
|
+
wildcard_source = is_wildcard? source
|
86
|
+
wildcard_target = is_wildcard? target
|
87
|
+
|
88
|
+
paths = {}
|
89
|
+
matches = clean_matches[rule]
|
90
|
+
matches.each do |match|
|
91
|
+
new_assignments = nil
|
92
|
+
match_source, _sep, match_target = match.partition "~"
|
93
|
+
|
94
|
+
if wildcard_source
|
95
|
+
next if assignments[source] and assignments[source] != match_source
|
96
|
+
new_assignments ||= assignments.dup
|
97
|
+
new_assignments[source] = match_source
|
98
|
+
end
|
99
|
+
|
100
|
+
if wildcard_target
|
101
|
+
next if assignments[target] and assignments[target] != match_target
|
102
|
+
new_assignments ||= assignments.dup
|
103
|
+
new_assignments[target] = match_target
|
104
|
+
end
|
105
|
+
|
106
|
+
new_paths = _fp(rest, clean_matches, new_assignments)
|
107
|
+
next unless new_paths
|
108
|
+
paths[match] = new_paths
|
109
|
+
end
|
110
|
+
|
111
|
+
return false if paths.empty?
|
112
|
+
|
113
|
+
paths
|
114
|
+
end
|
115
|
+
|
116
|
+
def _ep(paths)
|
117
|
+
found = []
|
118
|
+
paths.each do |match,_next|
|
119
|
+
case _next
|
120
|
+
when TrueClass
|
121
|
+
found << [match]
|
122
|
+
when FalseClass
|
123
|
+
next
|
124
|
+
else
|
125
|
+
_ep(_next).each do |_n|
|
126
|
+
found << [match] + _n
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
found
|
131
|
+
end
|
132
|
+
|
133
|
+
def find_paths(rules, all_matches, assignments)
|
134
|
+
clean_matches = clean_matches(rules, all_matches, assignments)
|
135
|
+
|
136
|
+
path_hash = _fp(rules, clean_matches, {})
|
137
|
+
|
138
|
+
return [] unless path_hash
|
139
|
+
_ep(path_hash).collect do |path|
|
140
|
+
path.zip(clean_matches.values_at(*rules)).collect do |item, matches|
|
141
|
+
matches.select{|m| m == item }.first
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def traverse_db(db, source, target, conditions)
|
147
|
+
source_entities, target_entities = identify db, source, target
|
148
|
+
|
149
|
+
options = {:source => source_entities, :target => target_entities}
|
150
|
+
Log.debug "Traversing #{ db }: #{Log.fingerprint options}"
|
151
|
+
matches = kb.subset(db, options)
|
152
|
+
|
153
|
+
if conditions
|
154
|
+
Misc.tokenize(conditions).each do |condition|
|
155
|
+
if condition.index "="
|
156
|
+
key, value = condition.split("=")
|
157
|
+
matches = matches.select{|m| Misc.match_value(m.info[key.strip], value)}
|
158
|
+
else
|
159
|
+
matches = matches.select{|m| m.info[condition.strip].to_s =~ /\btrue\b/}
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
matches
|
165
|
+
end
|
166
|
+
|
167
|
+
def id_dbs(db)
|
168
|
+
# ToDo: Revise this, I'm not sure what id does anymore
|
169
|
+
# I think it deals with syndication
|
170
|
+
if db.include? '?'
|
171
|
+
all_dbs = kb.registry.keys.collect{|k| k.to_s }
|
172
|
+
_name, _sep, _kb = db.partition("@")
|
173
|
+
case
|
174
|
+
when _name[0] == '?'
|
175
|
+
dbs = all_dbs.select{|_db|
|
176
|
+
n,_s,d=_db.partition("@");
|
177
|
+
d.nil? or d.empty? or (d == _kb and assignments[_name].include?(n))
|
178
|
+
}
|
179
|
+
when _kb[0] == '?'
|
180
|
+
dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d) }
|
181
|
+
end
|
182
|
+
else
|
183
|
+
dbs = [db]
|
184
|
+
end
|
185
|
+
|
186
|
+
dbs
|
187
|
+
end
|
188
|
+
|
189
|
+
def traverse(nopaths = false)
|
190
|
+
all_matches = []
|
191
|
+
path_rules = []
|
192
|
+
acc_var = nil
|
193
|
+
pre_acc_var_assignments = nil
|
194
|
+
rules.each do |rule|
|
195
|
+
rule = rule.strip
|
196
|
+
next if rule.empty?
|
197
|
+
|
198
|
+
if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+(.*))?/)
|
199
|
+
Log.debug "Traverse rule: #{rule}"
|
200
|
+
path_rules << rule
|
201
|
+
|
202
|
+
source, db, target, conditions = m.captures
|
203
|
+
|
204
|
+
dbs = id_dbs(db)
|
205
|
+
|
206
|
+
rule_matches = nil
|
207
|
+
dbs.each do |_db|
|
208
|
+
matches = traverse_db(_db, source, target, conditions)
|
209
|
+
|
210
|
+
next if matches.nil? or matches.empty?
|
211
|
+
|
212
|
+
# ToDo: Revise this, I'm not sure what id does anymore
|
213
|
+
#
|
214
|
+
#if db.include? '?'
|
215
|
+
# _name, _sep, _kb = db.partition("@")
|
216
|
+
# case
|
217
|
+
# when _kb[0] == '?'
|
218
|
+
# assignments[_kb] ||= []
|
219
|
+
# assignments[_kb] << _db.partition("@").reject{|p| p.empty?}.last
|
220
|
+
# when _name[0] == '?'
|
221
|
+
# assignments[_name] ||= []
|
222
|
+
# assignments[_name] << _db.partition("@").first
|
223
|
+
# end
|
224
|
+
#end
|
225
|
+
|
226
|
+
if rule_matches.nil?
|
227
|
+
rule_matches = matches
|
228
|
+
else
|
229
|
+
matches.each do |m|
|
230
|
+
rule_matches << m
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
assignments.each{|k,v| v.uniq! if v}
|
235
|
+
end
|
236
|
+
|
237
|
+
reassign rule_matches, source, target if rule_matches
|
238
|
+
|
239
|
+
all_matches << rule_matches
|
240
|
+
|
241
|
+
elsif m = rule.match(/([^\s=]+)\s*=([^\s]*)\s*(.*)/)
|
242
|
+
Log.debug "Assign rule: #{rule}"
|
243
|
+
var, db, value_str = m.captures
|
244
|
+
names = value_str.split(",").collect{|v| v.strip}
|
245
|
+
if db.empty?
|
246
|
+
ids = names
|
247
|
+
else
|
248
|
+
dbs = id_dbs(db)
|
249
|
+
names = names.collect{|name| assignments.include?(name) ? assignments[name] : name}.flatten
|
250
|
+
ids = names.collect{|name|
|
251
|
+
id = nil
|
252
|
+
dbs.each do |db|
|
253
|
+
sid, tid = identify db, name, name
|
254
|
+
id = (sid + tid).compact.first
|
255
|
+
break if id
|
256
|
+
end
|
257
|
+
id
|
258
|
+
}
|
259
|
+
end
|
260
|
+
assignments[var] = ids
|
261
|
+
|
262
|
+
elsif m = rule.match(/(\?[^\s{]+)\s*{/)
|
263
|
+
acc_var = m.captures.first
|
264
|
+
pre_acc_var_assignments = assignments.dup
|
265
|
+
Log.debug "Start assign block: #{acc_var}"
|
266
|
+
elsif m = rule.match(/^\s*}\s*$/)
|
267
|
+
Log.debug "Close assign block: #{acc_var}"
|
268
|
+
saved_assign = assignments[acc_var]
|
269
|
+
assignments.clear
|
270
|
+
assignments.merge!(pre_acc_var_assignments)
|
271
|
+
pre_acc_var_assignments = nil
|
272
|
+
assignments[acc_var] = saved_assign
|
273
|
+
all_matches = []
|
274
|
+
path_rules = []
|
275
|
+
else
|
276
|
+
raise "Rule not understood: #{rule}"
|
277
|
+
end
|
278
|
+
end
|
279
|
+
|
280
|
+
return [assignments, nil] if nopaths
|
281
|
+
|
282
|
+
Log.debug "Finding paths: #{all_matches.length}"
|
283
|
+
paths = find_paths path_rules, all_matches, assignments
|
284
|
+
Log.debug "Found paths: #{paths.length}"
|
285
|
+
|
286
|
+
[assignments, paths]
|
287
|
+
end
|
288
|
+
|
289
|
+
#def traverse
|
290
|
+
# all_matches = []
|
291
|
+
|
292
|
+
# rules.each do |rule|
|
293
|
+
# rule = rule.strip
|
294
|
+
# next if rule.empty?
|
295
|
+
# source, db, target, conditions = rule.match(/([^\s]+)\s+([^\s]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/).captures
|
296
|
+
|
297
|
+
# source_entities, target_entities = identify db, source, target
|
298
|
+
|
299
|
+
# matches = kb.subset(db, :source => source_entities, :target => target_entities)
|
300
|
+
|
301
|
+
# if conditions
|
302
|
+
# conditions.split(/\s+/).each do |condition|
|
303
|
+
# if condition.index "="
|
304
|
+
# key, value = conditions.split("=")
|
305
|
+
# matches = matches.select{|m| m.info[key.strip].to_s =~ /\b#{value.strip}\b/}
|
306
|
+
# else
|
307
|
+
# matches = matches.select{|m| m.info[condition.strip].to_s =~ /\btrue\b/}
|
308
|
+
# end
|
309
|
+
# end
|
310
|
+
# end
|
311
|
+
|
312
|
+
# reassign matches, source, target
|
313
|
+
|
314
|
+
# all_matches << matches
|
315
|
+
# end
|
316
|
+
|
317
|
+
# paths = find_paths rules, all_matches, assignments
|
318
|
+
|
319
|
+
# [assignments, paths]
|
320
|
+
#end
|
321
|
+
|
322
|
+
end
|
323
|
+
|
324
|
+
def traverse(rules, nopaths=false)
|
325
|
+
traverser = KnowledgeBase::Traverser.new self, rules
|
326
|
+
traverser.traverse nopaths
|
327
|
+
end
|
328
|
+
|
329
|
+
end
|
@@ -0,0 +1,91 @@
|
|
1
|
+
require_relative 'association'
|
2
|
+
require_relative 'association/item'
|
3
|
+
require_relative 'knowledge_base/registry'
|
4
|
+
require_relative 'knowledge_base/entity'
|
5
|
+
require_relative 'knowledge_base/query'
|
6
|
+
require_relative 'knowledge_base/traverse'
|
7
|
+
require_relative 'knowledge_base/list'
|
8
|
+
#require 'scout/knowledge_base/query'
|
9
|
+
#require 'scout/knowledge_base/syndicate'
|
10
|
+
|
11
|
+
class KnowledgeBase
|
12
|
+
|
13
|
+
attr_accessor :dir, :namespace, :registry, :entity_options, :format, :identifier_files
|
14
|
+
|
15
|
+
def initialize(dir, namespace = nil)
|
16
|
+
@dir = Path.setup(dir.dup)
|
17
|
+
|
18
|
+
@namespace = namespace
|
19
|
+
|
20
|
+
@identifier_files = []
|
21
|
+
|
22
|
+
@registry ||= IndiferentHash.setup({})
|
23
|
+
@entity_options ||= IndiferentHash.setup({})
|
24
|
+
|
25
|
+
@format ||= IndiferentHash.setup({})
|
26
|
+
@descriptions ||= IndiferentHash.setup({})
|
27
|
+
@indices ||= IndiferentHash.setup({})
|
28
|
+
end
|
29
|
+
|
30
|
+
def config_file(name)
|
31
|
+
@dir.config[name.to_s]
|
32
|
+
end
|
33
|
+
|
34
|
+
def save_variable(name)
|
35
|
+
file = config_file(name)
|
36
|
+
variable = "@#{name}".to_sym
|
37
|
+
Open.write(file, self.instance_variable_get(variable).to_yaml)
|
38
|
+
end
|
39
|
+
|
40
|
+
def load_variable(name)
|
41
|
+
file = config_file(name)
|
42
|
+
variable = "@#{name}".to_sym
|
43
|
+
self.instance_variable_set(variable, YAML.load(Open.read(file))) if file.exists?
|
44
|
+
end
|
45
|
+
|
46
|
+
def save
|
47
|
+
save_variable(:namespace)
|
48
|
+
save_variable(:registry)
|
49
|
+
save_variable(:entity_options)
|
50
|
+
save_variable(:identifier_files)
|
51
|
+
end
|
52
|
+
|
53
|
+
def load
|
54
|
+
load_variable(:namespace)
|
55
|
+
load_variable(:registry)
|
56
|
+
load_variable(:entity_options)
|
57
|
+
load_variable(:identifier_files)
|
58
|
+
end
|
59
|
+
|
60
|
+
def self.load(dir)
|
61
|
+
dir = Path.setup("var").knowledge_base[dir.to_s] if Symbol === dir
|
62
|
+
kb = KnowledgeBase.new dir
|
63
|
+
kb.load
|
64
|
+
kb
|
65
|
+
end
|
66
|
+
|
67
|
+
def info(name)
|
68
|
+
|
69
|
+
source = self.source(name)
|
70
|
+
target = self.target(name)
|
71
|
+
source_type = self.source_type(name)
|
72
|
+
target_type = self.target_type(name)
|
73
|
+
fields = self.fields(name)
|
74
|
+
source_entity_options = self.entity_options_for source_type, name
|
75
|
+
target_entity_options = self.entity_options_for target_type, name
|
76
|
+
undirected = self.undirected(name) == 'undirected'
|
77
|
+
|
78
|
+
info = {
|
79
|
+
:source => source,
|
80
|
+
:target => target,
|
81
|
+
:source_type => source_type,
|
82
|
+
:target_type => target_type,
|
83
|
+
:source_entity_options => source_entity_options,
|
84
|
+
:target_entity_options => target_entity_options,
|
85
|
+
:fields => fields,
|
86
|
+
:undirected => undirected,
|
87
|
+
}
|
88
|
+
|
89
|
+
info
|
90
|
+
end
|
91
|
+
end
|
data/lib/scout/tsv/annotation.rb
CHANGED
@@ -48,12 +48,12 @@ module Annotation
|
|
48
48
|
|
49
49
|
fields = fields.flatten.compact.uniq
|
50
50
|
|
51
|
-
annotations = if Annotation.is_annotated?(objs)
|
52
|
-
objs.
|
51
|
+
annotations = if Annotation.is_annotated?(objs)
|
52
|
+
objs.annotation_hash.keys
|
53
53
|
elsif (Array === objs && objs.any?)
|
54
54
|
first = objs.compact.first
|
55
55
|
if Annotation.is_annotated?(first)
|
56
|
-
objs.compact.first.
|
56
|
+
objs.compact.first.annotation_hash.keys
|
57
57
|
else
|
58
58
|
raise "Objects didn't have annotations"
|
59
59
|
end
|
@@ -158,7 +158,7 @@ module Annotation
|
|
158
158
|
Annotation.load_tsv_values(id, values, tsv.fields)
|
159
159
|
end
|
160
160
|
|
161
|
-
case tsv.key_field
|
161
|
+
case tsv.key_field
|
162
162
|
when "List"
|
163
163
|
annotated_objects.first
|
164
164
|
else
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -66,8 +66,6 @@ module TSV
|
|
66
66
|
index = TSV.setup({}, :type => :single)
|
67
67
|
end
|
68
68
|
|
69
|
-
tsv_file = TSV.open(tsv_file, **data_options) if ! TSV === tsv_file
|
70
|
-
|
71
69
|
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
72
70
|
Log.low log_msg
|
73
71
|
bar = log_msg if TrueClass === bar
|
data/lib/scout/tsv/parser.rb
CHANGED
data/lib/scout/tsv/stream.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module TSV
|
2
|
-
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
3
3
|
sep = "\t" if sep.nil?
|
4
4
|
|
5
5
|
streams = streams.collect do |stream|
|
@@ -20,7 +20,7 @@ module TSV
|
|
20
20
|
num_streams = streams.length
|
21
21
|
|
22
22
|
streams = streams.collect do |stream|
|
23
|
-
Open.sort_stream(stream, memory: sort_memory)
|
23
|
+
Open.sort_stream(stream, memory: sort_memory, cmd_args: sort_cmd_args)
|
24
24
|
end if sort
|
25
25
|
|
26
26
|
begin
|
@@ -193,7 +193,7 @@ module TSV
|
|
193
193
|
dumper.close
|
194
194
|
|
195
195
|
streams.each do |stream|
|
196
|
-
stream.close if stream.respond_to?(:close)
|
196
|
+
stream.close if stream.respond_to?(:close)
|
197
197
|
stream.join if stream.respond_to?(:join)
|
198
198
|
end
|
199
199
|
end
|
data/lib/scout/tsv.rb
CHANGED
@@ -50,6 +50,15 @@ class Step
|
|
50
50
|
@info
|
51
51
|
end
|
52
52
|
|
53
|
+
def pid
|
54
|
+
info[:pid]
|
55
|
+
end
|
56
|
+
|
57
|
+
def pid=(pid)
|
58
|
+
set_info :pid, pid
|
59
|
+
end
|
60
|
+
|
61
|
+
|
53
62
|
def merge_info(new_info)
|
54
63
|
info = self.info
|
55
64
|
new_info.each do |key,value|
|
@@ -121,7 +130,7 @@ class Step
|
|
121
130
|
if message.nil?
|
122
131
|
Log.info [Log.color(:status, status, true), Log.color(:task, task_name, true), Log.color(:path, path)] * " "
|
123
132
|
else
|
124
|
-
message = Log.fingerprint(message).sub(/^'/,'').sub(/'$/,'')
|
133
|
+
message = Log.fingerprint(message.split("\n").first).sub(/^'/,'').sub(/'$/,'')
|
125
134
|
Log.info [Log.color(:status, status, true), Log.color(:task, task_name, true), message, Log.color(:path, path)] * " "
|
126
135
|
end
|
127
136
|
end
|