scout-gear 10.7.1 → 10.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +50 -30
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +5 -1
- data/lib/scout/association/item.rb +1 -1
- data/lib/scout/association.rb +46 -11
- data/lib/scout/entity/format.rb +9 -4
- data/lib/scout/entity/identifiers.rb +4 -4
- data/lib/scout/entity/named_array.rb +13 -0
- data/lib/scout/entity/property.rb +3 -1
- data/lib/scout/entity.rb +7 -4
- data/lib/scout/knowledge_base/enrichment.rb +9 -0
- data/lib/scout/knowledge_base/entity.rb +143 -0
- data/lib/scout/knowledge_base/list.rb +95 -0
- data/lib/scout/knowledge_base/query.rb +96 -0
- data/lib/scout/knowledge_base/registry.rb +173 -0
- data/lib/scout/knowledge_base/traverse.rb +329 -0
- data/lib/scout/knowledge_base.rb +91 -0
- data/lib/scout/persist/tsv/adapter/base.rb +13 -1
- data/lib/scout/persist/tsv.rb +2 -1
- data/lib/scout/tsv/annotation.rb +4 -4
- data/lib/scout/tsv/attach.rb +10 -2
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/dumper.rb +34 -30
- data/lib/scout/tsv/index.rb +0 -2
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +21 -10
- data/lib/scout/tsv/path.rb +8 -0
- data/lib/scout/tsv/stream.rb +17 -10
- data/lib/scout/tsv/traverse.rb +12 -2
- data/lib/scout/tsv/util/process.rb +4 -1
- data/lib/scout/tsv/util/select.rb +8 -2
- data/lib/scout/tsv/util/sort.rb +23 -15
- data/lib/scout/tsv/util.rb +11 -2
- data/lib/scout/tsv.rb +25 -11
- data/lib/scout/workflow/definition.rb +3 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +8 -5
- data/lib/scout/workflow/step/dependencies.rb +35 -11
- data/lib/scout/workflow/step/file.rb +2 -1
- data/lib/scout/workflow/step/info.rb +23 -2
- data/lib/scout/workflow/step/load.rb +5 -3
- data/lib/scout/workflow/step/progress.rb +6 -0
- data/lib/scout/workflow/step/provenance.rb +1 -1
- data/lib/scout/workflow/step/status.rb +10 -4
- data/lib/scout/workflow/step.rb +32 -12
- data/lib/scout/workflow/task/dependencies.rb +33 -24
- data/lib/scout/workflow/task/inputs.rb +40 -12
- data/lib/scout/workflow/task.rb +22 -10
- data/lib/scout/workflow/usage.rb +2 -2
- data/lib/scout/workflow.rb +1 -1
- data/scout-gear.gemspec +28 -4
- data/scout_commands/kb/config +33 -0
- data/scout_commands/kb/entities +35 -0
- data/scout_commands/kb/list +39 -0
- data/scout_commands/kb/query +78 -0
- data/scout_commands/kb/register +44 -0
- data/scout_commands/kb/show +37 -0
- data/scout_commands/kb/traverse +66 -0
- data/test/data/person/brothers +1 -1
- data/test/scout/entity/test_identifiers.rb +3 -3
- data/test/scout/entity/test_named_array.rb +21 -0
- data/test/scout/knowledge_base/test_enrichment.rb +0 -0
- data/test/scout/knowledge_base/test_entity.rb +38 -0
- data/test/scout/knowledge_base/test_list.rb +40 -0
- data/test/scout/knowledge_base/test_query.rb +39 -0
- data/test/scout/knowledge_base/test_registry.rb +16 -0
- data/test/scout/knowledge_base/test_traverse.rb +245 -0
- data/test/scout/persist/test_tsv.rb +20 -0
- data/test/scout/persist/tsv/adapter/test_base.rb +20 -0
- data/test/scout/test_association.rb +17 -3
- data/test/scout/test_entity.rb +0 -15
- data/test/scout/test_knowledge_base.rb +27 -0
- data/test/scout/test_tsv.rb +40 -0
- data/test/scout/tsv/test_dumper.rb +24 -0
- data/test/scout/tsv/test_path.rb +24 -0
- data/test/scout/tsv/test_stream.rb +93 -0
- data/test/scout/tsv/test_traverse.rb +99 -0
- data/test/scout/tsv/test_util.rb +2 -0
- data/test/scout/tsv/util/test_select.rb +22 -0
- data/test/scout/tsv/util/test_sort.rb +24 -0
- data/test/scout/workflow/step/test_dependencies.rb +26 -0
- data/test/scout/workflow/step/test_info.rb +35 -0
- data/test/scout/workflow/task/test_dependencies.rb +67 -1
- data/test/scout/workflow/task/test_inputs.rb +24 -7
- data/test/scout/workflow/test_task.rb +36 -0
- data/test/scout/workflow/test_usage.rb +0 -1
- data/test/test_helper.rb +17 -0
- metadata +27 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 469b6e128a39e5612de5698005de8f4859c015bf3d65a5490a58f2f9f0312a39
|
4
|
+
data.tar.gz: 0a76edd86052c35af31f6de227c0bc43c71bb6b64fdc757e1179bf88a4ce8701
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c8753c031fda58c461afdefa3412a0d7996c6d5139a30653b1e6fcfaeba7fdf31b4d300bb2cb07060e7eb12d63891fc7a157cec3964c59d5cc68584ffc8823c8
|
7
|
+
data.tar.gz: 05cbb772edf0384f2889ad1be2cc597425d071da8c488364a8c72b23e6b7098aea0089247c180da09165f3eb7d899de494dca9ce2a00f285dcfaa564c8dc3e68
|
data/.vimproject
CHANGED
@@ -38,38 +38,11 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
38
38
|
orchestrator.rb
|
39
39
|
}
|
40
40
|
}
|
41
|
-
semaphore.rb
|
42
41
|
work_queue.rb
|
43
42
|
work_queue=work_queue{
|
44
43
|
socket.rb
|
45
44
|
worker.rb
|
46
45
|
}
|
47
|
-
|
48
|
-
persist=persist{
|
49
|
-
engine.rb
|
50
|
-
engine=engine{
|
51
|
-
tokyocabinet.rb
|
52
|
-
fix_width_table.rb
|
53
|
-
tkrzw.rb
|
54
|
-
packed_index.rb
|
55
|
-
sharder.rb
|
56
|
-
}
|
57
|
-
tsv.rb
|
58
|
-
tsv=tsv{
|
59
|
-
adapter.rb
|
60
|
-
serialize.rb
|
61
|
-
adapter=adapter{
|
62
|
-
base.rb
|
63
|
-
|
64
|
-
fix_width_table.rb
|
65
|
-
packed_index.rb
|
66
|
-
tkrzw.rb
|
67
|
-
tokyocabinet.rb
|
68
|
-
sharder.rb
|
69
|
-
}
|
70
|
-
}
|
71
|
-
}
|
72
|
-
|
73
46
|
tsv.rb
|
74
47
|
tsv=tsv{
|
75
48
|
util.rb
|
@@ -101,26 +74,60 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
101
74
|
open.rb
|
102
75
|
csv.rb
|
103
76
|
}
|
77
|
+
persist=persist{
|
78
|
+
engine.rb
|
79
|
+
engine=engine{
|
80
|
+
tokyocabinet.rb
|
81
|
+
fix_width_table.rb
|
82
|
+
tkrzw.rb
|
83
|
+
packed_index.rb
|
84
|
+
sharder.rb
|
85
|
+
}
|
86
|
+
tsv.rb
|
87
|
+
tsv=tsv{
|
88
|
+
adapter.rb
|
89
|
+
serialize.rb
|
90
|
+
adapter=adapter{
|
91
|
+
base.rb
|
92
|
+
|
93
|
+
fix_width_table.rb
|
94
|
+
packed_index.rb
|
95
|
+
tkrzw.rb
|
96
|
+
tokyocabinet.rb
|
97
|
+
sharder.rb
|
98
|
+
}
|
99
|
+
}
|
100
|
+
}
|
104
101
|
entity.rb
|
105
102
|
entity=entity{
|
106
103
|
property.rb
|
107
104
|
object.rb
|
108
105
|
format.rb
|
109
106
|
identifiers.rb
|
107
|
+
named_array.rb
|
110
108
|
}
|
111
109
|
association.rb
|
112
110
|
association=association{
|
113
|
-
fields.rb
|
114
111
|
index.rb
|
112
|
+
fields.rb
|
115
113
|
item.rb
|
116
114
|
}
|
117
|
-
|
115
|
+
knowledge_base.rb
|
116
|
+
knowledge_base=knowledge_base{
|
117
|
+
registry.rb
|
118
|
+
entity.rb
|
119
|
+
query.rb
|
120
|
+
traverse.rb
|
121
|
+
enrichment.rb
|
122
|
+
list.rb
|
123
|
+
}
|
118
124
|
offsite.rb
|
119
125
|
offsite=offsite{
|
120
126
|
ssh.rb
|
121
127
|
sync.rb
|
122
128
|
step.rb
|
123
129
|
}
|
130
|
+
semaphore.rb
|
124
131
|
}
|
125
132
|
scout-gear.rb
|
126
133
|
workflow-scout.rb
|
@@ -137,6 +144,15 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
137
144
|
update
|
138
145
|
template
|
139
146
|
offsite
|
147
|
+
kb=kb{
|
148
|
+
config
|
149
|
+
entities
|
150
|
+
register
|
151
|
+
show
|
152
|
+
query
|
153
|
+
traverse
|
154
|
+
list
|
155
|
+
}
|
140
156
|
workflow=workflow{
|
141
157
|
task
|
142
158
|
list
|
@@ -158,8 +174,12 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
158
174
|
test_helper.rb
|
159
175
|
test_scout-gear.rb
|
160
176
|
test_scout.rb
|
161
|
-
data=data{
|
177
|
+
data=data filter="*"{
|
162
178
|
person=person{
|
179
|
+
brothers
|
180
|
+
identifiers
|
181
|
+
marriages
|
182
|
+
parents
|
163
183
|
}
|
164
184
|
}
|
165
185
|
scout=scout{
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
10.7.
|
1
|
+
10.7.3
|
@@ -2,6 +2,10 @@ require 'scout/annotation'
|
|
2
2
|
module Association
|
3
3
|
|
4
4
|
def self.index(file, source: nil, target: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
|
5
|
+
IndiferentHash.setup(kwargs)
|
6
|
+
source = kwargs.delete :source if kwargs.include?(:source)
|
7
|
+
target = kwargs.delete :target if kwargs.include?(:target)
|
8
|
+
|
5
9
|
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
6
10
|
index_persist_options = IndiferentHash.add_defaults persist_options.dup, persist: true,
|
7
11
|
prefix: "Association::Index",
|
@@ -157,7 +161,7 @@ module Association
|
|
157
161
|
new.read
|
158
162
|
end
|
159
163
|
|
160
|
-
new.unnamed =
|
164
|
+
new.unnamed = self.unnamed
|
161
165
|
|
162
166
|
new.undirected = undirected
|
163
167
|
|
@@ -102,7 +102,7 @@ module AssociationItem
|
|
102
102
|
property :info => :array2single do
|
103
103
|
fields = self.info_fields
|
104
104
|
|
105
|
-
|
105
|
+
next [{}] * self.length if fields.nil? or fields.empty?
|
106
106
|
|
107
107
|
value = self.value
|
108
108
|
value.collect{|v|
|
data/lib/scout/association.rb
CHANGED
@@ -6,17 +6,37 @@ require_relative 'association/item'
|
|
6
6
|
|
7
7
|
module Association
|
8
8
|
def self.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
|
9
|
+
IndiferentHash.setup(kwargs)
|
10
|
+
source = kwargs.delete :source if kwargs.include?(:source)
|
11
|
+
target = kwargs.delete :target if kwargs.include?(:target)
|
12
|
+
|
13
|
+
if Path.is_filename?(obj)
|
14
|
+
options = TSV.parse_options(obj).merge(kwargs)
|
15
|
+
else
|
16
|
+
options = kwargs.dup
|
17
|
+
end
|
18
|
+
|
19
|
+
if String === obj && options[:namespace] && obj.include?("NAMESPACE")
|
20
|
+
new_obj = obj.gsub(/\[?NAMESPACE\]?/, options[:namespace])
|
21
|
+
obj.annotate(new_obj)
|
22
|
+
obj = new_obj
|
23
|
+
end
|
24
|
+
|
9
25
|
all_fields = TSV.all_fields(obj)
|
10
|
-
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields,
|
26
|
+
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format))
|
11
27
|
|
12
28
|
original_source_header = all_fields[source_pos]
|
13
29
|
original_field_headers = all_fields.values_at(*field_pos)
|
14
30
|
original_target_header = all_fields[field_pos.first]
|
15
31
|
|
16
|
-
type, identifiers = IndiferentHash.process_options
|
32
|
+
type, identifiers = IndiferentHash.process_options options, :type, :identifiers
|
17
33
|
|
18
|
-
if source_format
|
34
|
+
if source_format || target_format
|
19
35
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(source_format), identifiers].flatten.compact
|
36
|
+
translation_files.collect!{|f| (Path.is_filename?(f, false) && options[:namespace]) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
37
|
+
end
|
38
|
+
|
39
|
+
if source_format
|
20
40
|
source_index = begin
|
21
41
|
TSV.translation_index(translation_files, source_header, source_format)
|
22
42
|
rescue
|
@@ -25,7 +45,6 @@ module Association
|
|
25
45
|
end
|
26
46
|
|
27
47
|
if target_format
|
28
|
-
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(target_format), identifiers].flatten.compact
|
29
48
|
target_index = begin
|
30
49
|
TSV.translation_index(translation_files, field_headers.first, target_format)
|
31
50
|
rescue
|
@@ -66,11 +85,11 @@ module Association
|
|
66
85
|
|
67
86
|
if source_index.nil? && target_index.nil?
|
68
87
|
if TSV === obj
|
69
|
-
IndiferentHash.pull_keys
|
70
|
-
type =
|
71
|
-
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **
|
88
|
+
IndiferentHash.pull_keys options, :persist
|
89
|
+
type = options[:type] || obj.type
|
90
|
+
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **options.merge(type: type, merge: true)
|
72
91
|
else
|
73
|
-
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **
|
92
|
+
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **options.merge(type: type))
|
74
93
|
end
|
75
94
|
res.key_field = final_key_field
|
76
95
|
res.fields = final_fields
|
@@ -93,8 +112,24 @@ module Association
|
|
93
112
|
transformer
|
94
113
|
end
|
95
114
|
|
96
|
-
def self.database(*args, **kwargs)
|
97
|
-
|
98
|
-
|
115
|
+
def self.database(file, *args, **kwargs)
|
116
|
+
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
117
|
+
|
118
|
+
database_persist_options = IndiferentHash.add_defaults persist_options.dup, persist: true,
|
119
|
+
prefix: "Association::Index", serializer: :list,
|
120
|
+
other_options: kwargs
|
121
|
+
|
122
|
+
Persist.tsv(file, kwargs, engine: "BDB", persist_options: database_persist_options) do |data|
|
123
|
+
tsv = open(file, *args, **kwargs)
|
124
|
+
if TSV::Transformer === tsv
|
125
|
+
tsv.tsv(merge: true, data: data)
|
126
|
+
elsif data.respond_to?(:persistence_path)
|
127
|
+
data.merge!(tsv)
|
128
|
+
tsv.annotate(data)
|
129
|
+
data
|
130
|
+
else
|
131
|
+
tsv
|
132
|
+
end
|
133
|
+
end
|
99
134
|
end
|
100
135
|
end
|
data/lib/scout/entity/format.rb
CHANGED
@@ -17,16 +17,20 @@ module Entity
|
|
17
17
|
def find(value)
|
18
18
|
@find_cache ||= {}
|
19
19
|
|
20
|
-
@find_cache
|
20
|
+
if @find_cache.include?(value)
|
21
|
+
@find_cache[value]
|
22
|
+
else
|
23
|
+
@find_cache[value] = begin
|
21
24
|
if orig_include? value
|
22
|
-
|
25
|
+
value
|
23
26
|
else
|
27
|
+
value = value.to_s
|
24
28
|
found = nil
|
25
29
|
each do |k,v|
|
26
|
-
if value
|
30
|
+
if value == k.to_s
|
27
31
|
found = k
|
28
32
|
break
|
29
|
-
elsif value
|
33
|
+
elsif value =~ /\(#{Regexp.quote k.to_s}\)/
|
30
34
|
found = k
|
31
35
|
break
|
32
36
|
end
|
@@ -34,6 +38,7 @@ module Entity
|
|
34
38
|
found
|
35
39
|
end
|
36
40
|
end
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
def [](value)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Entity
|
2
2
|
def self.identifier_files(field)
|
3
|
-
entity_type = Entity.formats[field]
|
3
|
+
entity_type = Entity.formats[Entity.formats.find(field)]
|
4
4
|
return [] unless entity_type and entity_type.include? Entity::Identified
|
5
5
|
entity_type.identifier_files
|
6
6
|
end
|
@@ -48,7 +48,7 @@ module Entity
|
|
48
48
|
def identifier_files
|
49
49
|
files = identity_type.identifier_files.dup
|
50
50
|
return [] if files.nil?
|
51
|
-
files.collect!{|f| f.annotate f.gsub(/\b#{NAMESPACE_TAG}\b/, namespace.to_s) } if
|
51
|
+
files.collect!{|f| f.annotate f.gsub(/\b#{NAMESPACE_TAG}\b/, namespace.to_s) } if annotation_hash.include? :namespace and self.namespace
|
52
52
|
if files.select{|f| f =~ /\b#{NAMESPACE_TAG}\b/ }.any?
|
53
53
|
Log.warn "Rejecting some identifier files for lack of 'namespace': " << files.select{|f| f =~ /\b#{NAMESPACE_TAG}\b/ } * ", "
|
54
54
|
end
|
@@ -91,6 +91,8 @@ module Entity
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
+
name = default if name.nil?
|
95
|
+
|
94
96
|
self.send(:include, Entity::Identified) unless Entity::Identified === self
|
95
97
|
|
96
98
|
self.format = all_fields
|
@@ -106,6 +108,4 @@ module Entity
|
|
106
108
|
@identifier_files << file
|
107
109
|
@identifier_files.uniq!
|
108
110
|
end
|
109
|
-
|
110
|
-
|
111
111
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'scout/named_array'
|
2
|
+
require 'scout/entity'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
|
6
|
+
def [](key)
|
7
|
+
pos = NamedArray.identify_name(@fields, key)
|
8
|
+
return nil if pos.nil?
|
9
|
+
v = super(pos)
|
10
|
+
field = @fields && Integer === key && ! @fields.include?(key) ? @fields[key] : key
|
11
|
+
Entity.prepare_entity(v, field)
|
12
|
+
end
|
13
|
+
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
require 'scout/exceptions'
|
1
2
|
module Entity
|
2
3
|
class << self
|
3
4
|
attr_accessor :entity_property_cache
|
@@ -20,7 +21,8 @@ module Entity
|
|
20
21
|
Persist.annotation_repo_persist(repo, [name, obj.id] * ":", &block)
|
21
22
|
else
|
22
23
|
|
23
|
-
|
24
|
+
_id = obj.nil? ? 'nil' : obj.id
|
25
|
+
Persist.persist([name, _id] * ":", type, options.dup, &block)
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
data/lib/scout/entity.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative 'entity/format'
|
|
3
3
|
require_relative 'entity/property'
|
4
4
|
require_relative 'entity/object'
|
5
5
|
require_relative 'entity/identifiers'
|
6
|
+
require_relative 'entity/named_array'
|
6
7
|
|
7
8
|
module Entity
|
8
9
|
def self.extended(base)
|
@@ -12,17 +13,19 @@ module Entity
|
|
12
13
|
base.instance_variable_set(:@persisted_methods, {})
|
13
14
|
base.include Entity::Object
|
14
15
|
base.include AnnotatedArray
|
16
|
+
base.format = base.to_s
|
15
17
|
base
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.prepare_entity(entity, field, options = {})
|
19
21
|
return entity unless defined? Entity
|
20
|
-
return entity unless String === entity or Array === entity
|
21
|
-
options ||= {}
|
22
|
-
|
23
|
-
dup_array = options.delete :dup_array
|
22
|
+
return entity unless String === entity or Array === entity or Numeric === entity
|
24
23
|
|
25
24
|
if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
|
25
|
+
options ||= {}
|
26
|
+
|
27
|
+
dup_array = options.delete :dup_array
|
28
|
+
|
26
29
|
params = options.dup
|
27
30
|
|
28
31
|
params[:format] ||= params.delete "format"
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'rbbt/knowledge_base/registry'
|
2
|
+
class KnowledgeBase
|
3
|
+
def enrichment(name, entities, options = {})
|
4
|
+
require 'rbbt/statistics/hypergeometric'
|
5
|
+
database = get_database(name, options)
|
6
|
+
entities = identify_source name, entities
|
7
|
+
database.enrichment entities, database.fields.first, :persist => false
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
require_relative '../entity'
|
2
|
+
|
3
|
+
class KnowledgeBase
|
4
|
+
|
5
|
+
def select_entities(name, entities, options = {})
|
6
|
+
index = get_index(name, options)
|
7
|
+
|
8
|
+
source_field = index.source_field
|
9
|
+
target_field = index.target_field
|
10
|
+
|
11
|
+
source_type = source_type name
|
12
|
+
target_type = target_type name
|
13
|
+
|
14
|
+
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s] || entities[:both]
|
15
|
+
target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s] || entities[:both]
|
16
|
+
|
17
|
+
[source_entities, target_entities]
|
18
|
+
end
|
19
|
+
|
20
|
+
def entity_options_for(type, database_name = nil)
|
21
|
+
entity_options = self.entity_options
|
22
|
+
IndiferentHash.setup entity_options if entity_options and not IndiferentHash === entity_options
|
23
|
+
options = entity_options[type.to_s] || entity_options[Entity.formats[type].to_s] || {}
|
24
|
+
options[:format] = @format[type] if Hash === @format && @format.include?(type)
|
25
|
+
namespace = self.namespace
|
26
|
+
namespace = db_namespace(database_name) if namespace.nil? and database_name
|
27
|
+
if database_name
|
28
|
+
database = get_database(database_name)
|
29
|
+
if database.entity_options and (database.entity_options[type] or database.entity_options[Entity.formats[type.to_s].to_s])
|
30
|
+
options = options.merge(database.entity_options[type] || database.entity_options[Entity.formats[type.to_s].to_s])
|
31
|
+
end
|
32
|
+
end
|
33
|
+
options
|
34
|
+
end
|
35
|
+
|
36
|
+
def annotate(entities, type, database = nil)
|
37
|
+
format = @format[type] || type
|
38
|
+
entity_options = entity_options_for(type, database)
|
39
|
+
Entity.prepare_entity(entities, format, entity_options)
|
40
|
+
end
|
41
|
+
|
42
|
+
def translate(entities, type)
|
43
|
+
if format = @format[type] and (entities.respond_to? :format and format != entities.format)
|
44
|
+
entities.to format
|
45
|
+
else
|
46
|
+
entities
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def source_type(name)
|
51
|
+
Entity.formats[Entity.formats.find(source(name))]
|
52
|
+
end
|
53
|
+
|
54
|
+
def target_type(name)
|
55
|
+
Entity.formats[Entity.formats.find(target(name))]
|
56
|
+
end
|
57
|
+
|
58
|
+
def entities
|
59
|
+
all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
|
60
|
+
end
|
61
|
+
|
62
|
+
def entity_types
|
63
|
+
entities.collect{|entity| Entity.formats[entity] }.uniq
|
64
|
+
end
|
65
|
+
|
66
|
+
def identifier_files(name)
|
67
|
+
get_database(name).identifier_files.dup + self.identifier_files
|
68
|
+
end
|
69
|
+
|
70
|
+
def db_namespace(name)
|
71
|
+
get_database(name).namespace
|
72
|
+
end
|
73
|
+
|
74
|
+
def source_index(name)
|
75
|
+
Persist.memory("Source index #{name}: KB directory #{dir}") do
|
76
|
+
identifier_files = identifier_files(name)
|
77
|
+
identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
|
78
|
+
identifier_files.uniq!
|
79
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
80
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if not namespace and db_namespace(name)
|
81
|
+
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
82
|
+
TSV.translation_index identifier_files, nil, source(name), :persist => true
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
def target_index(name)
|
87
|
+
Persist.memory("Target index #{name}: KB directory #{dir}") do
|
88
|
+
identifier_files = identifier_files(name)
|
89
|
+
identifier_files.concat Entity.identifier_files(target(name)) if defined? Entity
|
90
|
+
identifier_files.uniq!
|
91
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if self.namespace
|
92
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, db_namespace(name)))} if namespace.nil? and db_namespace(name)
|
93
|
+
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
94
|
+
TSV.translation_index identifier_files, nil, target(name), :persist => true
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def identify_source(name, entity)
|
99
|
+
return :all if entity == :all
|
100
|
+
index = begin source_index(name) rescue nil end
|
101
|
+
return entity if index.nil?
|
102
|
+
if Array === entity
|
103
|
+
entity.collect{|e| index[e] || e }
|
104
|
+
else
|
105
|
+
index[entity] || entity
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def identify_target(name, entity)
|
110
|
+
return :all if entity == :all
|
111
|
+
index = begin target_index(name) rescue nil end
|
112
|
+
return entity if index.nil?
|
113
|
+
if Array === entity
|
114
|
+
entity.collect{|e| index[e] || e }
|
115
|
+
else
|
116
|
+
index[entity] || entity
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def identify(name, entity)
|
121
|
+
identify_source(name, entity) || identify_target(name, entity)
|
122
|
+
end
|
123
|
+
|
124
|
+
def define_entity_modules
|
125
|
+
entity_options.each do |entity,options|
|
126
|
+
next unless options[:identifiers]
|
127
|
+
identifiers = options[:identifiers]
|
128
|
+
identifiers = identifiers.split(",") unless Array === identifiers
|
129
|
+
m = begin
|
130
|
+
Object.const_get entity
|
131
|
+
rescue
|
132
|
+
m = Module.new
|
133
|
+
m.extend Entity
|
134
|
+
m.include Entity::Identified
|
135
|
+
Object.const_set entity, m
|
136
|
+
end
|
137
|
+
|
138
|
+
identifiers.each do |file|
|
139
|
+
m.add_identifiers Path.setup(file), self.format
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'scout/annotation'
|
2
|
+
class KnowledgeBase
|
3
|
+
|
4
|
+
def list_file(id, entity_type = nil)
|
5
|
+
id = Path.sanitize_filename(id)
|
6
|
+
|
7
|
+
entity_type = entity_type.to_s.split(":").last
|
8
|
+
|
9
|
+
raise "Ilegal list id: #{ id }" unless Misc.path_relative_to(dir, File.join(dir, id))
|
10
|
+
|
11
|
+
if entity_type
|
12
|
+
if entity_type.to_s == "simple"
|
13
|
+
path = dir.lists[entity_type.to_s][id]
|
14
|
+
else
|
15
|
+
path = dir.lists[entity_type.to_s][id + ".tsv"]
|
16
|
+
end
|
17
|
+
else
|
18
|
+
path = dir.lists.glob("*/#{id}").first
|
19
|
+
path ||= dir.lists.glob("*/#{id}.tsv").first
|
20
|
+
raise "List not found #{id}" if path.nil?
|
21
|
+
end
|
22
|
+
|
23
|
+
path.find
|
24
|
+
end
|
25
|
+
|
26
|
+
def save_list(id, list)
|
27
|
+
if AnnotatedArray === list
|
28
|
+
path = list_file(id, list.base_entity)
|
29
|
+
else
|
30
|
+
path = list_file(id, :simple)
|
31
|
+
end
|
32
|
+
|
33
|
+
Open.lock path do
|
34
|
+
begin
|
35
|
+
if AnnotatedArray === list
|
36
|
+
Open.write(path, Annotation.tsv(list, :all).to_s)
|
37
|
+
else
|
38
|
+
Open.write(path, list * "\n")
|
39
|
+
end
|
40
|
+
rescue
|
41
|
+
FileUtils.rm(path) if File.exist?(path)
|
42
|
+
raise $!
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def load_list(id, entity_type = nil)
|
48
|
+
if entity_type
|
49
|
+
path = list_file(id, entity_type)
|
50
|
+
path = list_file(id) unless path.exists?
|
51
|
+
else
|
52
|
+
path = list_file(id)
|
53
|
+
end
|
54
|
+
|
55
|
+
raise "List not found: #{ id }" unless path and path.exists?
|
56
|
+
|
57
|
+
begin
|
58
|
+
if path.get_extension == 'tsv'
|
59
|
+
list = Annotation.load_tsv path.tsv
|
60
|
+
list.extend AnnotatedArray
|
61
|
+
list
|
62
|
+
else
|
63
|
+
path.list
|
64
|
+
end
|
65
|
+
rescue
|
66
|
+
Log.exception $!
|
67
|
+
nil
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
def lists
|
72
|
+
lists = {}
|
73
|
+
dir.lists.glob("*").each do |list_dir|
|
74
|
+
lists[list_dir.basename] = list_dir.glob("*").
|
75
|
+
collect(&:unset_extension).
|
76
|
+
collect(&:basename)
|
77
|
+
end
|
78
|
+
lists
|
79
|
+
end
|
80
|
+
|
81
|
+
def delete_list(id, entity_type = nil)
|
82
|
+
path = list_file(id, entity_type)
|
83
|
+
path = list_file(id) unless path.exists?
|
84
|
+
|
85
|
+
"This list does not belong to #{ user }: #{[entity_type, id] * ": "}" unless File.exist? path
|
86
|
+
|
87
|
+
Open.lock path do
|
88
|
+
begin
|
89
|
+
FileUtils.rm path if File.exist? path
|
90
|
+
rescue
|
91
|
+
raise $!
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|