rbbt-util 5.14.33 → 5.14.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +2 -0
- data/lib/rbbt/association/database.rb +153 -0
- data/lib/rbbt/association/index.rb +89 -20
- data/lib/rbbt/association/open.rb +37 -0
- data/lib/rbbt/association/util.rb +133 -0
- data/lib/rbbt/association.rb +1 -380
- data/lib/rbbt/entity/identifiers.rb +106 -0
- data/lib/rbbt/entity.rb +1 -0
- data/lib/rbbt/knowledge_base/entity.rb +107 -0
- data/lib/rbbt/knowledge_base/query.rb +83 -0
- data/lib/rbbt/knowledge_base/registry.rb +106 -0
- data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
- data/lib/rbbt/knowledge_base.rb +6 -359
- data/lib/rbbt/tsv/accessor.rb +4 -0
- data/lib/rbbt/tsv/change_id.rb +119 -0
- data/lib/rbbt/tsv/index.rb +6 -2
- data/lib/rbbt/tsv/parser.rb +7 -5
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/tsv.rb +2 -1
- data/lib/rbbt/util/R/model.rb +1 -1
- data/lib/rbbt/util/log.rb +2 -2
- data/lib/rbbt/util/misc/bgzf.rb +2 -0
- data/lib/rbbt/util/misc/inspect.rb +1 -1
- data/lib/rbbt-util.rb +11 -7
- data/lib/rbbt.rb +0 -1
- data/share/rbbt_commands/app/start +1 -1
- data/share/rbbt_commands/tsv/change_id +2 -2
- data/test/rbbt/association/test_database.rb +61 -0
- data/test/rbbt/association/test_index.rb +67 -22
- data/test/rbbt/association/test_open.rb +68 -0
- data/test/rbbt/association/test_util.rb +108 -0
- data/test/rbbt/entity/test_identifiers.rb +40 -0
- data/test/rbbt/knowledge_base/test_entity.rb +0 -0
- data/test/rbbt/knowledge_base/test_query.rb +45 -0
- data/test/rbbt/knowledge_base/test_registry.rb +52 -0
- data/test/rbbt/test_association.rb +3 -3
- data/test/rbbt/test_knowledge_base.rb +79 -51
- data/test/rbbt/test_monitor.rb +0 -2
- data/test/rbbt/test_packed_index.rb +1 -1
- data/test/rbbt/test_resource.rb +6 -6
- data/test/rbbt/test_tsv.rb +34 -44
- data/test/rbbt/tsv/parallel/test_through.rb +2 -4
- data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
- data/test/rbbt/tsv/test_change_id.rb +10 -0
- data/test/rbbt/util/R/test_model.rb +9 -10
- data/test/rbbt/util/test_misc.rb +1 -1
- data/test/test_helper.rb +4 -1
- metadata +24 -2
data/lib/rbbt/association.rb
CHANGED
@@ -1,383 +1,4 @@
|
|
1
|
-
require 'rbbt
|
2
|
-
require 'rbbt/tsv/change_id'
|
1
|
+
require 'rbbt/association/open'
|
3
2
|
require 'rbbt/association/index'
|
4
|
-
|
5
3
|
module Association
|
6
|
-
class << self
|
7
|
-
attr_accessor :databases
|
8
|
-
def databases
|
9
|
-
@databases ||= {}
|
10
|
-
end
|
11
|
-
end
|
12
|
-
|
13
|
-
def self.add_reciprocal(tsv)
|
14
|
-
tsv = tsv.type == :double ? tsv : tsv.to_double
|
15
|
-
new = TSV.open(tsv.dumper_stream)
|
16
|
-
tsv.with_unnamed do
|
17
|
-
tsv.through do |source, values|
|
18
|
-
next if values.flatten.compact.empty?
|
19
|
-
if values.length > 1
|
20
|
-
Misc.zip_fields(values).each do |_target_values|
|
21
|
-
target, *target_values = _target_values
|
22
|
-
if new[target].nil?
|
23
|
-
new_values = [[source]] + target_values.collect{|v| [v] }
|
24
|
-
new[target] = new_values
|
25
|
-
else
|
26
|
-
new_values = new[target].collect{|l| l.dup }
|
27
|
-
targets = new_values.shift
|
28
|
-
targets << source
|
29
|
-
rest = new_values.zip(target_values).collect do |o,n|
|
30
|
-
o << n
|
31
|
-
o
|
32
|
-
end
|
33
|
-
new_values = [targets] + rest
|
34
|
-
new[target] = new_values
|
35
|
-
end
|
36
|
-
end
|
37
|
-
else
|
38
|
-
values.first.each do |target|
|
39
|
-
if new[target].nil?
|
40
|
-
new[target] = [[source]]
|
41
|
-
else
|
42
|
-
new[target] = [new[target][0] + [source]]
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
tsv.annotate(new)
|
50
|
-
|
51
|
-
new
|
52
|
-
end
|
53
|
-
|
54
|
-
def self.resolve_field(name, fields)
|
55
|
-
entity_type = Entity.formats[name]
|
56
|
-
return "Field #{ name } could not be resolved: #{fields}" if entity_type.nil?
|
57
|
-
field = fields.select{|f| Entity.formats[f] == entity_type}.first
|
58
|
-
[field, nil, name]
|
59
|
-
end
|
60
|
-
|
61
|
-
def self.parse_field_specification(spec)
|
62
|
-
return [2,nil,nil] if Fixnum === spec
|
63
|
-
spec = spec.split "=>" unless Array === spec
|
64
|
-
field_part, final_format = spec
|
65
|
-
|
66
|
-
field, format = field_part.split "=~"
|
67
|
-
|
68
|
-
[field, format, final_format]
|
69
|
-
end
|
70
|
-
|
71
|
-
def self.calculate_headers(key_field, fields, spec)
|
72
|
-
all_fields = [key_field].concat fields if fields and key_field
|
73
|
-
field, header, format = parse_field_specification spec if spec
|
74
|
-
|
75
|
-
if field and key_field == field and not all_fields.include? field
|
76
|
-
field, header, format = resolve_field field, all_fields
|
77
|
-
end
|
78
|
-
|
79
|
-
[field, header, format]
|
80
|
-
end
|
81
|
-
|
82
|
-
#{{{ Open
|
83
|
-
|
84
|
-
def self.open_tsv(file, source, source_header, target, target_header, all_fields, options)
|
85
|
-
fields = Misc.process_options options, :fields
|
86
|
-
fields ||= all_fields.dup
|
87
|
-
|
88
|
-
fields.delete source
|
89
|
-
fields.delete target
|
90
|
-
fields.unshift target
|
91
|
-
|
92
|
-
open_options = options.merge({
|
93
|
-
:persist => false,
|
94
|
-
:key_field => all_fields.index(source),
|
95
|
-
:fields => fields.collect{|f| String === f ? all_fields.index(f): f },
|
96
|
-
:unnamed => true,
|
97
|
-
:merge => (options[:merge] or (options[:type] and options[:type].to_sym == :flat) ? false : true)
|
98
|
-
})
|
99
|
-
|
100
|
-
open_options["header_hash"] = "#" if options["header_hash"] == ""
|
101
|
-
|
102
|
-
field_headers = all_fields.values_at *open_options[:fields]
|
103
|
-
|
104
|
-
tsv = case file
|
105
|
-
when TSV
|
106
|
-
if file.fields == field_headers
|
107
|
-
file
|
108
|
-
else
|
109
|
-
file.reorder(source, field_headers)
|
110
|
-
end
|
111
|
-
else
|
112
|
-
TSV.open(file, open_options)
|
113
|
-
end
|
114
|
-
|
115
|
-
tsv.fields = field_headers
|
116
|
-
tsv.key_field = source
|
117
|
-
|
118
|
-
# Fix source header
|
119
|
-
if source_header and tsv.key_field != source_header
|
120
|
-
tsv.key_field = source_header
|
121
|
-
end
|
122
|
-
|
123
|
-
# Fix target header
|
124
|
-
if target_header and tsv.fields.first != target_header
|
125
|
-
tsv.fields = tsv.fields.collect{|f| f == target ? target_header : f }
|
126
|
-
end
|
127
|
-
|
128
|
-
tsv
|
129
|
-
end
|
130
|
-
|
131
|
-
def self.translate_tsv(tsv, source_final_format, target_final_format)
|
132
|
-
source_field = tsv.key_field
|
133
|
-
target_field = tsv.fields.first
|
134
|
-
|
135
|
-
if source_final_format and source_field != source_final_format and
|
136
|
-
Entity.formats[source_field]
|
137
|
-
|
138
|
-
Log.debug("Changing source format from #{tsv.key_field} to #{source_final_format}")
|
139
|
-
|
140
|
-
tsv.with_unnamed do
|
141
|
-
identifiers = tsv.identifiers || Organism.identifiers(tsv.namespace)
|
142
|
-
tsv = tsv.change_key source_final_format, :identifiers => identifiers, :persist => true
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
|
147
|
-
# Translate target
|
148
|
-
if target_final_format and target_field != target_final_format and
|
149
|
-
Entity.formats[target_field] and
|
150
|
-
Entity.formats[target_field] == Entity.formats[target_final_format]
|
151
|
-
|
152
|
-
Log.debug("Changing target format from #{tsv.fields.first} to #{target_final_format}")
|
153
|
-
|
154
|
-
save_key_field = tsv.key_field
|
155
|
-
tsv.key_field = "MASKED"
|
156
|
-
|
157
|
-
tsv.with_unnamed do
|
158
|
-
tsv = tsv.swap_id tsv.fields.first, target_final_format, :identifiers => Organism.identifiers(tsv.namespace), :persist => true, :compact => true
|
159
|
-
end
|
160
|
-
|
161
|
-
tsv.key_field = save_key_field
|
162
|
-
end
|
163
|
-
|
164
|
-
tsv
|
165
|
-
end
|
166
|
-
|
167
|
-
def self.specs(all_fields, options = {})
|
168
|
-
source_spec, source_format, target_spec, target_format, format, key_field, fields = Misc.process_options options, :source, :source_format, :target, :target_format, :format, :key_field, :fields
|
169
|
-
|
170
|
-
if key_field and all_fields
|
171
|
-
key_pos = (Fixnum === key_field ? key_field : all_fields.index(key_field) )
|
172
|
-
key_field = all_fields[key_pos]
|
173
|
-
else
|
174
|
-
key_field = all_fields.first if all_fields
|
175
|
-
end
|
176
|
-
|
177
|
-
if fields and all_fields
|
178
|
-
field_pos = fields.collect{|f| Fixnum === f ? f : all_fields.index(f) }
|
179
|
-
fields = all_fields.values_at *field_pos
|
180
|
-
else
|
181
|
-
#fields = all_fields[1..-1] if all_fields
|
182
|
-
end
|
183
|
-
|
184
|
-
source, source_header, orig_source_format = calculate_headers(key_field, fields || all_fields, source_spec)
|
185
|
-
source_format ||= orig_source_format
|
186
|
-
source = key_field if source.nil?
|
187
|
-
source = key_field if source == :key
|
188
|
-
source_header ||= source
|
189
|
-
|
190
|
-
target, target_header, orig_target_format = calculate_headers(key_field, fields || all_fields, target_spec)
|
191
|
-
target_format ||= orig_target_format
|
192
|
-
if target.nil?
|
193
|
-
target = case
|
194
|
-
when fields
|
195
|
-
fields.first
|
196
|
-
when key_field == source
|
197
|
-
all_fields[1]
|
198
|
-
else
|
199
|
-
(([key_field] + all_fields) - [source]).first
|
200
|
-
end
|
201
|
-
end
|
202
|
-
|
203
|
-
target = key_field if target == :key
|
204
|
-
target_header ||= target
|
205
|
-
|
206
|
-
case format
|
207
|
-
when String
|
208
|
-
source_format ||= format if Entity.formats[source_header] == Entity.formats[format]
|
209
|
-
target_format ||= format if Entity.formats[target_header] == Entity.formats[format]
|
210
|
-
when Hash
|
211
|
-
_type = Entity.formats[source_header].to_s
|
212
|
-
source_format ||= format[_type] if format.include? _type
|
213
|
-
_type = Entity.formats[target_header].to_s
|
214
|
-
target_format ||= format[_type] if format.include? _type
|
215
|
-
end
|
216
|
-
|
217
|
-
[source, source_header, source_format, target, target_header, target_format, fields || all_fields]
|
218
|
-
end
|
219
|
-
|
220
|
-
def self.load_tsv(file, options)
|
221
|
-
undirected = options[:undirected]
|
222
|
-
|
223
|
-
case file
|
224
|
-
when Proc
|
225
|
-
res = file.call
|
226
|
-
tsv = case res
|
227
|
-
when TSV, Path
|
228
|
-
return load_tsv(res, options)
|
229
|
-
else
|
230
|
-
tsv = TSV.open(res, :unnamed => true)
|
231
|
-
return load_tsv(tsv, options)
|
232
|
-
end
|
233
|
-
when TSV
|
234
|
-
key_field, *fields = all_fields = file.all_fields
|
235
|
-
else
|
236
|
-
key_field, *fields = all_fields = TSV.parse_header(file, options.merge(:fields => nil, :key_field => nil)).all_fields
|
237
|
-
end
|
238
|
-
|
239
|
-
source, source_header, source_format, target, target_header, target_format, fields = specs(all_fields, options)
|
240
|
-
|
241
|
-
Log.low("Loading associations from: #{ Misc.fingerprint file }")
|
242
|
-
Log.low("sources: #{ [source, source_header, source_format].join(", ") }")
|
243
|
-
Log.low("targets: #{ [target, target_header, target_format].join(", ") }")
|
244
|
-
|
245
|
-
tsv = open_tsv(file, source, source_header, target, target_header, all_fields, options.merge(:fields => fields.dup))
|
246
|
-
|
247
|
-
tsv = translate_tsv(tsv, source_format, target_format)
|
248
|
-
|
249
|
-
tsv = add_reciprocal(tsv) if undirected
|
250
|
-
|
251
|
-
tsv
|
252
|
-
end
|
253
|
-
|
254
|
-
def self.open(file, options = {}, persist_options = {})
|
255
|
-
options = {} if options.nil?
|
256
|
-
persist_options = {} if persist_options.nil?
|
257
|
-
|
258
|
-
namespace = options[:namespace]
|
259
|
-
old_file, file = file, file.sub('NAMESPACE', namespace) if namespace and String === file
|
260
|
-
old_file.annotate file if Path === old_file
|
261
|
-
|
262
|
-
Persist.persist_tsv(file, nil, options, {:persist => true, :prefix => "Association"}.merge(persist_options)) do |data|
|
263
|
-
options = options.clone
|
264
|
-
|
265
|
-
tsv = load_tsv(file, options)
|
266
|
-
|
267
|
-
tsv.annotate(data)
|
268
|
-
|
269
|
-
data.serializer = tsv.type
|
270
|
-
|
271
|
-
tsv.with_unnamed do
|
272
|
-
tsv.each do |k,v|
|
273
|
-
next if v.nil?
|
274
|
-
data[k] = v
|
275
|
-
end
|
276
|
-
end
|
277
|
-
|
278
|
-
data
|
279
|
-
end
|
280
|
-
end
|
281
|
-
|
282
|
-
#{{{ Index
|
283
|
-
|
284
|
-
#def self.get_index(index_file, write = false)
|
285
|
-
# Persist.open_tokyocabinet(index_file, write, :list, TokyoCabinet::BDB).tap{|r| r.unnamed = true; Association::Index.setup r }
|
286
|
-
#end
|
287
|
-
|
288
|
-
def self.index(file, options = {}, persist_options = nil)
|
289
|
-
options = {} if options.nil?
|
290
|
-
options = Misc.add_defaults options, :persist => true, :undirected => false
|
291
|
-
persist_options = Misc.pull_keys options, :persist if persist_options.nil?
|
292
|
-
|
293
|
-
expanded_persist_options = {:persist => true, :prefix => "Association Index"}.
|
294
|
-
merge(persist_options).
|
295
|
-
merge(:engine => TokyoCabinet::BDB, :serializer => :clean)
|
296
|
-
|
297
|
-
Persist.persist_tsv(file, nil, options, expanded_persist_options) do |assocs|
|
298
|
-
undirected = options[:undirected]
|
299
|
-
recycle = options[:recycle]
|
300
|
-
if file
|
301
|
-
tsv = TSV === file ? file : Association.open(file, options, persist_options.merge(:persist => false))
|
302
|
-
|
303
|
-
fields = tsv.fields
|
304
|
-
source_field = tsv.key_field
|
305
|
-
target_field = fields.first.split(":").last
|
306
|
-
|
307
|
-
key_field = [source_field, target_field, undirected ? "undirected" : nil].compact * "~"
|
308
|
-
|
309
|
-
TSV.setup(assocs, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
|
310
|
-
|
311
|
-
tsv.with_unnamed do
|
312
|
-
tsv.with_monitor :desc => "Extracting associations" do
|
313
|
-
case tsv.type
|
314
|
-
when :list
|
315
|
-
tsv.through do |source, values|
|
316
|
-
target, *rest = values
|
317
|
-
next if source.nil? or source.empty? or target.nil? or target.empty?
|
318
|
-
|
319
|
-
key = [source, target] * "~"
|
320
|
-
assocs[key] = rest
|
321
|
-
end
|
322
|
-
when :flat
|
323
|
-
tsv.through do |source, targets|
|
324
|
-
next if source.nil? or source.empty? or targets.nil? or targets.empty?
|
325
|
-
|
326
|
-
targets.each do |target|
|
327
|
-
next if target.nil? or target.empty?
|
328
|
-
key = [source, target] * "~"
|
329
|
-
assocs[key] = nil
|
330
|
-
end
|
331
|
-
end
|
332
|
-
|
333
|
-
when :double
|
334
|
-
tsv.through do |source, values|
|
335
|
-
next if values.empty?
|
336
|
-
next if source.nil? or source.empty?
|
337
|
-
next if values.empty?
|
338
|
-
|
339
|
-
targets = values.first
|
340
|
-
|
341
|
-
rest = values[1..-1]
|
342
|
-
|
343
|
-
size = values.first ? values.first.length : 0
|
344
|
-
|
345
|
-
rest.each_with_index do |list,i|
|
346
|
-
list.replace [list.first] * size if list.length == 1
|
347
|
-
end if recycle and size > 1
|
348
|
-
|
349
|
-
rest = Misc.zip_fields rest
|
350
|
-
|
351
|
-
annotations = rest.length > 1 ?
|
352
|
-
targets.zip(rest) :
|
353
|
-
targets.zip(rest * targets.length)
|
354
|
-
|
355
|
-
annotations.each do |target, info|
|
356
|
-
next if target.nil? or target.empty?
|
357
|
-
key = [source, target] * "~"
|
358
|
-
if assocs[key].nil? or info.nil?
|
359
|
-
assocs[key] = info
|
360
|
-
else
|
361
|
-
old_info = assocs[key]
|
362
|
-
info = old_info.zip(info).collect{|p| p * ";;" }
|
363
|
-
assocs[key] = info
|
364
|
-
end
|
365
|
-
end
|
366
|
-
end
|
367
|
-
else
|
368
|
-
raise "Type not supported: #{tsv.type}"
|
369
|
-
end
|
370
|
-
end
|
371
|
-
end
|
372
|
-
else
|
373
|
-
key_field, fields = options.values_at :key_field, :fields
|
374
|
-
TSV.setup(assocs, :key_field => key_field, :fields => fields[1..-1], :type => :list, :serializer => :list)
|
375
|
-
end
|
376
|
-
assocs.close
|
377
|
-
|
378
|
-
assocs
|
379
|
-
end.tap do |assocs|
|
380
|
-
Association::Index.setup assocs
|
381
|
-
end
|
382
|
-
end
|
383
4
|
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
module Entity
|
2
|
+
|
3
|
+
def self.identifier_files(field)
|
4
|
+
entity_type = Entity.formats[field]
|
5
|
+
return [] unless entity_type and entity_type.include? Entity::Identified
|
6
|
+
entity_type.identifier_files
|
7
|
+
end
|
8
|
+
|
9
|
+
module Identified
|
10
|
+
|
11
|
+
def self.included(base)
|
12
|
+
base.annotation :format
|
13
|
+
base.annotation :organism
|
14
|
+
|
15
|
+
base.module_eval do
|
16
|
+
def identity_type
|
17
|
+
self.annotation_types.select{|m| m.include? Entity::Identified }.last
|
18
|
+
end
|
19
|
+
|
20
|
+
def identifier_files
|
21
|
+
files = identity_type.identifier_files.dup
|
22
|
+
files.collect!{|f| f.annotate f.gsub(/\bNAMESPACE\b/, organism) } if annotations.include? :organism and self.organism
|
23
|
+
files.reject!{|f| f =~ /\bNAMESPACE\b/ }
|
24
|
+
files
|
25
|
+
end
|
26
|
+
|
27
|
+
def identifier_index(format = nil, source = nil)
|
28
|
+
Persist.memory("Entity index #{identity_type}: #{format} (from #{source || "All"})", :format => format, :source => source) do
|
29
|
+
source ||= self.respond_to?(:format)? self.format : nil
|
30
|
+
|
31
|
+
index = TSV.translation_index(identifier_files, format, source, :persist => true)
|
32
|
+
index.unnamed = true
|
33
|
+
index
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
base.property :to => :both do |target_format|
|
39
|
+
|
40
|
+
target_format = case target_format
|
41
|
+
when :name
|
42
|
+
identity_type.name_format
|
43
|
+
when :default
|
44
|
+
identity_type.default_format
|
45
|
+
when :ensembl
|
46
|
+
identity_type.formats.select{|f| f =~ /Ensembl/}.first
|
47
|
+
else
|
48
|
+
target_format
|
49
|
+
end
|
50
|
+
|
51
|
+
return self if target_format == format
|
52
|
+
if Array === self
|
53
|
+
self.annotate(identifier_index(target_format, self.format).values_at(*self))
|
54
|
+
else
|
55
|
+
self.annotate(identifier_index(target_format, self.format)[self])
|
56
|
+
end.tap{|o| o.format = target_format unless o.nil? }
|
57
|
+
end
|
58
|
+
|
59
|
+
base.property :name => :both do
|
60
|
+
to(:name)
|
61
|
+
end
|
62
|
+
|
63
|
+
base.property :default => :both do
|
64
|
+
to(:name)
|
65
|
+
end
|
66
|
+
|
67
|
+
base.property :ensembl => :both do
|
68
|
+
to(:ensembl)
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
attr_accessor :identifier_files, :formats, :default_format, :name_format, :description_format
|
76
|
+
|
77
|
+
def add_identifiers(file, default = nil, name = nil, description = nil)
|
78
|
+
if TSV === file
|
79
|
+
all_fields = file.all_fields
|
80
|
+
else
|
81
|
+
if file =~ /NAMESPACE/
|
82
|
+
all_fields = file.sub(/NAMESPACE/,'**').glob.collect do |f|
|
83
|
+
TSV.parse_header(f).all_fields
|
84
|
+
end.flatten.compact.uniq
|
85
|
+
else
|
86
|
+
all_fields = TSV.parse_header(file).all_fields
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
self.format = all_fields
|
91
|
+
@formats ||= []
|
92
|
+
@formats.concat all_fields
|
93
|
+
@formats.uniq!
|
94
|
+
|
95
|
+
@default_format = default if default
|
96
|
+
@name_format = name if name
|
97
|
+
@description_format = description if description
|
98
|
+
|
99
|
+
@identifier_files ||= []
|
100
|
+
@identifier_files << file
|
101
|
+
|
102
|
+
|
103
|
+
self.include Entity::Identified unless Entity::Identified === self
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
data/lib/rbbt/entity.rb
CHANGED
@@ -0,0 +1,107 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/knowledge_base/registry'
|
3
|
+
|
4
|
+
class KnowledgeBase
|
5
|
+
|
6
|
+
def select_entities(name, entities, options = {})
|
7
|
+
index = get_index(name, options)
|
8
|
+
source_field = index.source_field
|
9
|
+
target_field = index.target_field
|
10
|
+
|
11
|
+
source_type = Entity.formats[source_field]
|
12
|
+
target_type = Entity.formats[target_field]
|
13
|
+
|
14
|
+
source_entities = entities[:source] || entities[source_field] || entities[Entity.formats[source_field].to_s]
|
15
|
+
target_entities = entities[:target] || entities[target_field] || entities[Entity.formats[target_field].to_s]
|
16
|
+
|
17
|
+
[source_entities, target_entities]
|
18
|
+
end
|
19
|
+
|
20
|
+
|
21
|
+
def entity_options_for(type, database_name = nil)
|
22
|
+
options = entity_options[Entity.formats[type]] || {}
|
23
|
+
options[:format] = @format[type] if @format.include? :type
|
24
|
+
options = {:organism => namespace}.merge(options)
|
25
|
+
if database_name and
|
26
|
+
(database = get_database(database_name)).entity_options and
|
27
|
+
(database = get_database(database_name)).entity_options[type]
|
28
|
+
options = options.merge database.entity_options[type]
|
29
|
+
end
|
30
|
+
options
|
31
|
+
end
|
32
|
+
|
33
|
+
def translate(entities, type)
|
34
|
+
if format = @format[type] and (entities.respond_to? :format and format != entities.format)
|
35
|
+
entities.to format
|
36
|
+
else
|
37
|
+
entities
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def annotate(entities, type, database = nil)
|
42
|
+
format = @format[type] || type
|
43
|
+
Misc.prepare_entity(entities, format, entity_options_for(type, database))
|
44
|
+
end
|
45
|
+
|
46
|
+
def source_type(name)
|
47
|
+
Entity.formats[source(name)]
|
48
|
+
end
|
49
|
+
|
50
|
+
def target_type(name)
|
51
|
+
Entity.formats[target(name)]
|
52
|
+
end
|
53
|
+
|
54
|
+
def entities
|
55
|
+
all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
|
56
|
+
end
|
57
|
+
|
58
|
+
def entity_types
|
59
|
+
entities.collect{|entity| Entity.formats[entity] }.uniq
|
60
|
+
end
|
61
|
+
|
62
|
+
def identifier_files(name)
|
63
|
+
get_database(name).identifier_files.dup
|
64
|
+
end
|
65
|
+
|
66
|
+
def source_index(name)
|
67
|
+
Persist.memory("Source index #{name}: KB directory #{dir}") do
|
68
|
+
identifier_files = identifier_files(name)
|
69
|
+
identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
|
70
|
+
identifier_files.uniq!
|
71
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
72
|
+
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
73
|
+
TSV.translation_index identifier_files, source(name), nil, :persist => true
|
74
|
+
end
|
75
|
+
end
|
76
|
+
|
77
|
+
def target_index(name)
|
78
|
+
Persist.memory("Target index #{name}: KB directory #{dir}") do
|
79
|
+
identifier_files = identifier_files(name)
|
80
|
+
identifier_files.concat Entity.identifier_files(source(name)) if defined? Entity
|
81
|
+
identifier_files.uniq!
|
82
|
+
identifier_files.collect!{|f| f.annotate(f.gsub(/\bNAMESPACE\b/, namespace))} if namespace
|
83
|
+
identifier_files.reject!{|f| f.match(/\bNAMESPACE\b/)}
|
84
|
+
TSV.translation_index identifier_files, target(name), nil, :persist => true
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def identify_source(name, entity)
|
89
|
+
return :all if entity == :all
|
90
|
+
index = source_index(name)
|
91
|
+
return nil if index.nil?
|
92
|
+
index.values_at *entity
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def identify_target(name, entity)
|
97
|
+
return :all if entity == :all
|
98
|
+
index = target_index(name)
|
99
|
+
return nil if index.nil?
|
100
|
+
index.values_at *entity
|
101
|
+
end
|
102
|
+
|
103
|
+
def identify(name, entity)
|
104
|
+
identify_source(name, entity) || identify_target(name, entity)
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
require 'rbbt/knowledge_base/registry'
|
3
|
+
|
4
|
+
class KnowledgeBase
|
5
|
+
|
6
|
+
def _subset(name, source = :all, target = :all, options = {})
|
7
|
+
repo = get_index name, options
|
8
|
+
|
9
|
+
repo.subset(source, target)
|
10
|
+
end
|
11
|
+
|
12
|
+
def subset(name, entities, options = {}, &block)
|
13
|
+
entities, options = options, entities if entities.nil? and Hash === options
|
14
|
+
entities = case entities
|
15
|
+
when :all
|
16
|
+
{:target => :all, :source => :all}
|
17
|
+
when AnnotatedArray
|
18
|
+
format = entities.format if entities.respond_to? :format
|
19
|
+
format ||= entities.base_entity.to_s
|
20
|
+
{format => entities.clean_annotations}
|
21
|
+
when Hash
|
22
|
+
entities
|
23
|
+
else
|
24
|
+
raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
|
25
|
+
end
|
26
|
+
|
27
|
+
source, target = select_entities(name, entities, options)
|
28
|
+
|
29
|
+
return [] if source.nil? or target.nil?
|
30
|
+
return [] if Array === target and target.empty?
|
31
|
+
return [] if Array === source and source.empty?
|
32
|
+
|
33
|
+
matches = _subset name, source, target, options
|
34
|
+
|
35
|
+
setup(name, matches)
|
36
|
+
|
37
|
+
matches = matches.select(&block) if block_given?
|
38
|
+
|
39
|
+
matches
|
40
|
+
end
|
41
|
+
|
42
|
+
def all(name, options={})
|
43
|
+
repo = get_index name, options
|
44
|
+
setup name, repo.keys
|
45
|
+
end
|
46
|
+
|
47
|
+
def _children(name, entity)
|
48
|
+
repo = get_index name
|
49
|
+
repo.match(entity)
|
50
|
+
end
|
51
|
+
|
52
|
+
def children(name, entity)
|
53
|
+
entity = identify_source(name, entity)
|
54
|
+
setup(name, _children(name, entity))
|
55
|
+
end
|
56
|
+
|
57
|
+
def _parents(name, entity)
|
58
|
+
repo = get_index name
|
59
|
+
repo.reverse.match(entity)
|
60
|
+
end
|
61
|
+
|
62
|
+
def parents(name, entity)
|
63
|
+
entity = identify_target(name, entity)
|
64
|
+
setup(name, _parents(name, entity), true)
|
65
|
+
end
|
66
|
+
|
67
|
+
def _neighbours(name, entity)
|
68
|
+
if undirected(name) and source(name) == target(name)
|
69
|
+
{:children => _children(name, entity)}
|
70
|
+
else
|
71
|
+
{:parents => _parents(name, entity), :children => _children(name, entity)}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def neighbours(name, entity)
|
76
|
+
hash = _neighbours(name, entity)
|
77
|
+
IndiferentHash.setup(hash)
|
78
|
+
setup(name, hash[:children]) if hash[:children]
|
79
|
+
setup(name, hash[:parents], true) if hash[:parents]
|
80
|
+
hash
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|