rbbt-util 5.14.33 → 5.14.34
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +2 -0
- data/lib/rbbt/association/database.rb +153 -0
- data/lib/rbbt/association/index.rb +89 -20
- data/lib/rbbt/association/open.rb +37 -0
- data/lib/rbbt/association/util.rb +133 -0
- data/lib/rbbt/association.rb +1 -380
- data/lib/rbbt/entity/identifiers.rb +106 -0
- data/lib/rbbt/entity.rb +1 -0
- data/lib/rbbt/knowledge_base/entity.rb +107 -0
- data/lib/rbbt/knowledge_base/query.rb +83 -0
- data/lib/rbbt/knowledge_base/registry.rb +106 -0
- data/lib/rbbt/knowledge_base/syndicate.rb +22 -0
- data/lib/rbbt/knowledge_base.rb +6 -359
- data/lib/rbbt/tsv/accessor.rb +4 -0
- data/lib/rbbt/tsv/change_id.rb +119 -0
- data/lib/rbbt/tsv/index.rb +6 -2
- data/lib/rbbt/tsv/parser.rb +7 -5
- data/lib/rbbt/tsv/util.rb +1 -1
- data/lib/rbbt/tsv.rb +2 -1
- data/lib/rbbt/util/R/model.rb +1 -1
- data/lib/rbbt/util/log.rb +2 -2
- data/lib/rbbt/util/misc/bgzf.rb +2 -0
- data/lib/rbbt/util/misc/inspect.rb +1 -1
- data/lib/rbbt-util.rb +11 -7
- data/lib/rbbt.rb +0 -1
- data/share/rbbt_commands/app/start +1 -1
- data/share/rbbt_commands/tsv/change_id +2 -2
- data/test/rbbt/association/test_database.rb +61 -0
- data/test/rbbt/association/test_index.rb +67 -22
- data/test/rbbt/association/test_open.rb +68 -0
- data/test/rbbt/association/test_util.rb +108 -0
- data/test/rbbt/entity/test_identifiers.rb +40 -0
- data/test/rbbt/knowledge_base/test_entity.rb +0 -0
- data/test/rbbt/knowledge_base/test_query.rb +45 -0
- data/test/rbbt/knowledge_base/test_registry.rb +52 -0
- data/test/rbbt/test_association.rb +3 -3
- data/test/rbbt/test_knowledge_base.rb +79 -51
- data/test/rbbt/test_monitor.rb +0 -2
- data/test/rbbt/test_packed_index.rb +1 -1
- data/test/rbbt/test_resource.rb +6 -6
- data/test/rbbt/test_tsv.rb +34 -44
- data/test/rbbt/tsv/parallel/test_through.rb +2 -4
- data/test/rbbt/tsv/parallel/test_traverse.rb +30 -28
- data/test/rbbt/tsv/test_change_id.rb +10 -0
- data/test/rbbt/util/R/test_model.rb +9 -10
- data/test/rbbt/util/test_misc.rb +1 -1
- data/test/test_helper.rb +4 -1
- metadata +24 -2
@@ -0,0 +1,106 @@
|
|
1
|
+
require 'rbbt/association'
|
2
|
+
require 'rbbt/association/item'
|
3
|
+
|
4
|
+
class KnowledgeBase
|
5
|
+
|
6
|
+
def register(name, file = nil, options = {}, &block)
|
7
|
+
if block_given?
|
8
|
+
block.define_singleton_method(:filename) do name.to_s end
|
9
|
+
Log.debug("Registering #{ name } from code block")
|
10
|
+
@registry[name] = [block, options]
|
11
|
+
else
|
12
|
+
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
13
|
+
@registry[name] = [file, options]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def all_databases
|
18
|
+
@registry.keys
|
19
|
+
end
|
20
|
+
|
21
|
+
def description(name)
|
22
|
+
@descriptions[name] ||= get_index(name).key_field.split("~")
|
23
|
+
end
|
24
|
+
|
25
|
+
def source(name)
|
26
|
+
description(name)[0]
|
27
|
+
end
|
28
|
+
|
29
|
+
def target(name)
|
30
|
+
description(name)[1]
|
31
|
+
end
|
32
|
+
|
33
|
+
def undirected(name)
|
34
|
+
description(name)[2]
|
35
|
+
end
|
36
|
+
|
37
|
+
def get_index(name, options = {})
|
38
|
+
name = name.to_s
|
39
|
+
key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options]))
|
40
|
+
@indices[key] ||=
|
41
|
+
begin
|
42
|
+
Persist.memory("Index:" << [key, dir] * "@") do
|
43
|
+
options = options.dup
|
44
|
+
persist_dir = dir
|
45
|
+
persist_file = persist_dir[key]
|
46
|
+
file, registered_options = registry[name]
|
47
|
+
|
48
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :persist_dir => persist_dir, :namespace => namespace, :format => format, :persist => true
|
49
|
+
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
50
|
+
|
51
|
+
persist_options = Misc.pull_keys options, :persist
|
52
|
+
|
53
|
+
index = if persist_file.exists? and persist_options[:persist] and not persist_options[:update]
|
54
|
+
Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
|
55
|
+
Association.index(nil, options, persist_options.dup)
|
56
|
+
else
|
57
|
+
options = Misc.add_defaults options, registered_options if registered_options
|
58
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
59
|
+
Log.medium "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
|
60
|
+
Association.index(file, options, persist_options.dup)
|
61
|
+
end
|
62
|
+
|
63
|
+
index.namespace = self.namespace
|
64
|
+
|
65
|
+
index
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def get_database(name, options = {})
|
71
|
+
name = name.to_s
|
72
|
+
key = "Index:" + name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options.dup]))
|
73
|
+
@indices[key] ||=
|
74
|
+
begin
|
75
|
+
Persist.memory("Database:" << [key, dir] * "@") do
|
76
|
+
options = options.dup
|
77
|
+
persist_file = dir.indices[key]
|
78
|
+
file, registered_options = registry[name]
|
79
|
+
|
80
|
+
options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format, :persist => true
|
81
|
+
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
82
|
+
|
83
|
+
persist_options = Misc.pull_keys options, :persist
|
84
|
+
|
85
|
+
database = if persist_file.exists?
|
86
|
+
Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
|
87
|
+
Association.open(nil, options, persist_options)
|
88
|
+
else
|
89
|
+
options = Misc.add_defaults options, registered_options if registered_options
|
90
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
91
|
+
Log.medium "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
|
92
|
+
Association.open(file, options, persist_options)
|
93
|
+
end
|
94
|
+
|
95
|
+
database.namespace = self.namespace
|
96
|
+
|
97
|
+
database
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def index_fields(name)
|
103
|
+
get_index(name).fields
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
class KnowledgeBase
|
2
|
+
def syndicate(name, kb)
|
3
|
+
kb.all_databases.each do |database|
|
4
|
+
db_name = [database, name] * "@"
|
5
|
+
file, kb_options = kb.registry[database]
|
6
|
+
options = {}
|
7
|
+
options[:entity_options] = kb_options[:entity_options]
|
8
|
+
options[:undirected] = true if kb_options and kb_options[:undirected]
|
9
|
+
if kb.entity_options
|
10
|
+
options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
|
11
|
+
end
|
12
|
+
|
13
|
+
register(db_name, nil, options) do
|
14
|
+
kb.get_database(database)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def all_databases
|
20
|
+
@registry.keys
|
21
|
+
end
|
22
|
+
end
|
data/lib/rbbt/knowledge_base.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
+
|
1
2
|
require 'rbbt/association'
|
2
3
|
require 'rbbt/association/item'
|
3
|
-
require 'rbbt/entity'
|
4
|
+
require 'rbbt/knowledge_base/entity'
|
5
|
+
require 'rbbt/knowledge_base/query'
|
6
|
+
require 'rbbt/knowledge_base/syndicate'
|
4
7
|
|
5
8
|
class KnowledgeBase
|
6
|
-
class << self
|
7
|
-
attr_accessor :knowledge_base_dir, :registry
|
8
|
-
|
9
|
-
def registry
|
10
|
-
@registry ||= IndiferentHash.setup({})
|
11
|
-
end
|
12
|
-
|
13
|
-
def knowledge_base_dir
|
14
|
-
@knowledge_base_dir ||= Rbbt.var.knowledge_base
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def setup(name, matches, reverse = false)
|
19
|
-
AssociationItem.setup matches, self, name, reverse
|
20
|
-
end
|
21
9
|
|
22
10
|
attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
|
23
11
|
def initialize(dir, namespace = nil)
|
@@ -36,348 +24,7 @@ class KnowledgeBase
|
|
36
24
|
@databases = {}
|
37
25
|
end
|
38
26
|
|
39
|
-
def
|
40
|
-
|
41
|
-
new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
|
42
|
-
new_kb.format.merge! self.format
|
43
|
-
new_kb.entity_options.merge! self.entity_options
|
44
|
-
new_kb.registry = self.registry
|
45
|
-
new_kb
|
46
|
-
end
|
47
|
-
|
48
|
-
#{{{ Descriptions
|
49
|
-
|
50
|
-
def register(name, file = nil, options = {}, &block)
|
51
|
-
if block_given?
|
52
|
-
block.define_singleton_method(:filename) do name.to_s end
|
53
|
-
Log.debug("Registering #{ name } from code block")
|
54
|
-
@registry[name] = [block, options]
|
55
|
-
else
|
56
|
-
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
57
|
-
@registry[name] = [file, options]
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
def syndicate(name, kb)
|
62
|
-
kb.all_databases.each do |database|
|
63
|
-
db_name = [database, name] * "@"
|
64
|
-
file, kb_options = kb.registry[database]
|
65
|
-
options = {}
|
66
|
-
options[:entity_options] = kb_options[:entity_options]
|
67
|
-
options[:undirected] = true if kb_options and kb_options[:undirected]
|
68
|
-
if kb.entity_options
|
69
|
-
options[:entity_options] = kb.entity_options.merge(options[:entity_options] || {})
|
70
|
-
end
|
71
|
-
|
72
|
-
register(db_name, nil, options) do
|
73
|
-
kb.get_database(database)
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
def all_databases
|
79
|
-
@registry.keys
|
80
|
-
end
|
81
|
-
|
82
|
-
def description(name)
|
83
|
-
@descriptions[name] ||= get_index(name).key_field.split("~")
|
84
|
-
end
|
85
|
-
|
86
|
-
def source(name)
|
87
|
-
description(name)[0]
|
88
|
-
end
|
89
|
-
|
90
|
-
def target(name)
|
91
|
-
description(name)[1]
|
92
|
-
end
|
93
|
-
|
94
|
-
def undirected(name)
|
95
|
-
description(name)[2]
|
96
|
-
end
|
97
|
-
|
98
|
-
def source_type(name)
|
99
|
-
Entity.formats[source(name)]
|
100
|
-
end
|
101
|
-
|
102
|
-
def target_type(name)
|
103
|
-
Entity.formats[target(name)]
|
104
|
-
end
|
105
|
-
|
106
|
-
def index_fields(name)
|
107
|
-
get_index(name).fields
|
108
|
-
end
|
109
|
-
|
110
|
-
def entities
|
111
|
-
all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
|
112
|
-
end
|
113
|
-
|
114
|
-
def entity_types
|
115
|
-
entities.collect{|entity| Entity.formats[entity] }.uniq
|
116
|
-
end
|
117
|
-
|
118
|
-
#{{{ Open and get
|
119
|
-
|
120
|
-
def open_options
|
121
|
-
{:namespace => namespace, :format => @format}
|
122
|
-
end
|
123
|
-
|
124
|
-
def get_database(name, options = {})
|
125
|
-
key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options,format,namespace]))
|
126
|
-
@databases[key] ||=
|
127
|
-
begin
|
128
|
-
file, registered_options = registry[name]
|
129
|
-
database = Persist.memory("Database:" << [key, dir] * "@") do
|
130
|
-
persist_file = dir.databases[key]
|
131
|
-
|
132
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
|
133
|
-
options = Misc.add_defaults options, registered_options if registered_options
|
134
|
-
|
135
|
-
persist_options = Misc.pull_keys options, :persist
|
136
|
-
|
137
|
-
database = if persist_file.exists?
|
138
|
-
Log.low "Re-opening database #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
|
139
|
-
Association.open(file, options, persist_options)
|
140
|
-
else
|
141
|
-
raise "Repo #{ name } not found and not registered" if file.nil?
|
142
|
-
Log.low "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
|
143
|
-
Association.open(file, options, persist_options)
|
144
|
-
end
|
145
|
-
|
146
|
-
database.namespace = self.namespace
|
147
|
-
|
148
|
-
database
|
149
|
-
end
|
150
|
-
|
151
|
-
database.entity_options ||= {}
|
152
|
-
database.entity_options.merge! registered_options[:entity_options] if registered_options.include? :entity_options
|
153
|
-
database
|
154
|
-
end
|
155
|
-
end
|
156
|
-
|
157
|
-
def get_index(name, options = {})
|
158
|
-
name = name.to_s
|
159
|
-
key = name.to_s + "_" + Misc.digest(Misc.fingerprint([name,options]))
|
160
|
-
@indices[key] ||=
|
161
|
-
begin
|
162
|
-
Persist.memory("Index:" << [key, dir] * "@") do
|
163
|
-
persist_file = dir.indices[key]
|
164
|
-
file, registered_options = registry[name]
|
165
|
-
|
166
|
-
options = Misc.add_defaults options, :persist_file => persist_file, :namespace => namespace, :format => format
|
167
|
-
options = Misc.add_defaults options, registered_options if registered_options and registered_options.any?
|
168
|
-
|
169
|
-
persist_options = Misc.pull_keys options, :persist
|
170
|
-
|
171
|
-
index = if persist_file.exists?
|
172
|
-
Log.low "Re-opening index #{ name } from #{ Misc.fingerprint persist_file }. #{options}"
|
173
|
-
Association.index(nil, options, persist_options)
|
174
|
-
else
|
175
|
-
options = Misc.add_defaults options, registered_options if registered_options
|
176
|
-
raise "Repo #{ name } not found and not registered" if file.nil?
|
177
|
-
Log.low "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
|
178
|
-
Association.index(file, options, persist_options)
|
179
|
-
end
|
180
|
-
|
181
|
-
index.namespace = self.namespace
|
182
|
-
|
183
|
-
index
|
184
|
-
end
|
185
|
-
end
|
186
|
-
end
|
187
|
-
|
188
|
-
|
189
|
-
#{{{ Add manual database
|
190
|
-
|
191
|
-
def add_index(name, source_type, target_type, *fields)
|
192
|
-
options = fields.pop if Hash === fields.last
|
193
|
-
options ||= {}
|
194
|
-
undirected = Misc.process_options options, :undirected
|
195
|
-
|
196
|
-
undirected = nil unless undirected
|
197
|
-
|
198
|
-
repo_file = dir[name].find
|
199
|
-
index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
|
200
|
-
@indices[name] = index
|
201
|
-
end
|
202
|
-
|
203
|
-
def add(name, source, target, *rest)
|
204
|
-
code = [source, target] * "~"
|
205
|
-
repo = @indices[name]
|
206
|
-
repo[code] = rest
|
207
|
-
end
|
208
|
-
|
209
|
-
def write(name)
|
210
|
-
repo = @indices[name]
|
211
|
-
repo.write_and_read do
|
212
|
-
yield
|
213
|
-
end
|
214
|
-
end
|
215
|
-
|
216
|
-
#{{{ Annotate
|
217
|
-
|
218
|
-
def entity_options_for(type, database_name = nil)
|
219
|
-
options = entity_options[Entity.formats[type]] || {}
|
220
|
-
options[:format] = @format[type] if @format.include? :type
|
221
|
-
options = {:organism => namespace}.merge(options)
|
222
|
-
if database_name and
|
223
|
-
(database = get_database(database_name)).entity_options and
|
224
|
-
(database = get_database(database_name)).entity_options[type]
|
225
|
-
options = options.merge database.entity_options[type]
|
226
|
-
end
|
227
|
-
options
|
228
|
-
end
|
229
|
-
|
230
|
-
def annotate(entities, type, database = nil)
|
231
|
-
format = @format[type] || type
|
232
|
-
Misc.prepare_entity(entities, format, entity_options_for(type, database))
|
233
|
-
end
|
234
|
-
|
235
|
-
#{{{ Identify
|
236
|
-
|
237
|
-
|
238
|
-
def database_identify_index(database, target)
|
239
|
-
if database.identifier_files.any?
|
240
|
-
id_file = database.identifier_files.first
|
241
|
-
identifier_fields = TSV.parse_header(id_file).all_fields
|
242
|
-
if identifier_fields.include? target
|
243
|
-
TSV.index(database.identifiers, :target => target, :persist => true, :order => true)
|
244
|
-
else
|
245
|
-
{}
|
246
|
-
end
|
247
|
-
else
|
248
|
-
if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
|
249
|
-
Organism.identifiers(namespace).index(:target => target, :persist => true, :order => true)
|
250
|
-
else
|
251
|
-
{}
|
252
|
-
end
|
253
|
-
end
|
254
|
-
end
|
255
|
-
|
256
|
-
def identify_source(name, entity)
|
257
|
-
database = get_database(name, :persist => true)
|
258
|
-
return entity if Symbol === entity or (String === entity and database.include? entity)
|
259
|
-
source = source(name)
|
260
|
-
@identifiers[name] ||= {}
|
261
|
-
@identifiers[name]['source'] ||= begin
|
262
|
-
database_identify_index(database, source)
|
263
|
-
end
|
264
|
-
|
265
|
-
if Array === entity
|
266
|
-
@identifiers[name]['source'].chunked_values_at(entity).zip(entity).collect{|p|
|
267
|
-
p.compact.first
|
268
|
-
}
|
269
|
-
else
|
270
|
-
@identifiers[name]['source'][entity]
|
271
|
-
end
|
272
|
-
end
|
273
|
-
|
274
|
-
def identify_target(name, entity)
|
275
|
-
database = get_database(name, :persist => true)
|
276
|
-
return entity if Symbol === entity #or (String === entity) # and database.values.collect{|v| v.first}.compact.flatten.include?(entity))
|
277
|
-
target = target(name)
|
278
|
-
|
279
|
-
@identifiers[name] ||= {}
|
280
|
-
@identifiers[name]['target'] ||= begin
|
281
|
-
database_identify_index(database, target)
|
282
|
-
end
|
283
|
-
if Array === entity
|
284
|
-
@identifiers[name]['target'].chunked_values_at(entity).zip(entity).collect{|p|
|
285
|
-
p.compact.first
|
286
|
-
}
|
287
|
-
else
|
288
|
-
@identifiers[name]['target'][entity]
|
289
|
-
end
|
290
|
-
end
|
291
|
-
|
292
|
-
def identify(name, entity)
|
293
|
-
identify_source(name, entity) || identify_target(name, entity)
|
294
|
-
end
|
295
|
-
|
296
|
-
def normalize(entity)
|
297
|
-
source_matches = all_databases.collect{|d|
|
298
|
-
identify_source(d, entity)
|
299
|
-
}.flatten.compact.uniq
|
300
|
-
return entity if source_matches.include? entity
|
301
|
-
|
302
|
-
target_matches = all_databases.collect{|d|
|
303
|
-
identify_target(d, entity)
|
304
|
-
}.flatten.compact.uniq
|
305
|
-
return entity if target_matches.include? entity
|
306
|
-
|
307
|
-
(source_matches + target_matches).first
|
308
|
-
end
|
309
|
-
|
310
|
-
#{{{ Query
|
311
|
-
|
312
|
-
def all(name, options={})
|
313
|
-
repo = get_index name, options
|
314
|
-
setup name, repo.keys
|
315
|
-
end
|
316
|
-
|
317
|
-
def children(name, entity)
|
318
|
-
repo = get_index name
|
319
|
-
setup(name, repo.match(entity))
|
320
|
-
end
|
321
|
-
|
322
|
-
def parents(name, entity)
|
323
|
-
repo = get_index name
|
324
|
-
setup(name, repo.reverse.match(entity), true)
|
325
|
-
end
|
326
|
-
|
327
|
-
def neighbours(name, entity)
|
328
|
-
if undirected(name) and source(name) == target(name)
|
329
|
-
IndiferentHash.setup({:children => children(name, entity)})
|
330
|
-
else
|
331
|
-
IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
|
-
def subset(name, entities, &block)
|
336
|
-
entities = case entities
|
337
|
-
when :all
|
338
|
-
{:target => :all, :source => :all}
|
339
|
-
when AnnotatedArray
|
340
|
-
format = entities.format if entities.respond_to? :format
|
341
|
-
format ||= entities.base_entity.to_s
|
342
|
-
{format => entities.clean_annotations}
|
343
|
-
when Hash
|
344
|
-
entities
|
345
|
-
else
|
346
|
-
raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
|
347
|
-
end
|
348
|
-
|
349
|
-
repo = get_index name
|
350
|
-
|
351
|
-
matches = setup(name, repo.subset_entities(entities))
|
352
|
-
|
353
|
-
block_given? ? matches.select(&block) : matches
|
354
|
-
end
|
355
|
-
|
356
|
-
def translate(entities, type)
|
357
|
-
if format = @format[type] and (entities.respond_to? :format and format != entities.format)
|
358
|
-
entities.to format
|
359
|
-
else
|
360
|
-
entities
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
def pair_matches(source, target, undirected = nil)
|
365
|
-
all_databases.inject([]){|acc,database|
|
366
|
-
match = [source, target] * "~"
|
367
|
-
index = get_index(database)
|
368
|
-
|
369
|
-
if index.include? match
|
370
|
-
acc << setup(database, match)
|
371
|
-
|
372
|
-
elsif undirected or undirected(database)
|
373
|
-
inv = [target, source] * "~"
|
374
|
-
if index.include? inv
|
375
|
-
setup(database, inv)
|
376
|
-
acc << inv
|
377
|
-
end
|
378
|
-
end
|
379
|
-
|
380
|
-
acc
|
381
|
-
}
|
27
|
+
def setup(name, matches, reverse = false)
|
28
|
+
AssociationItem.setup matches, self, name, reverse
|
382
29
|
end
|
383
30
|
end
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
require 'yaml'
|
2
2
|
require 'rbbt/annotations'
|
3
3
|
require 'rbbt/tsv/dumper'
|
4
|
+
require 'set'
|
5
|
+
|
4
6
|
module TSV
|
5
7
|
|
6
8
|
TSV_SERIALIZER = YAML
|
@@ -601,6 +603,7 @@ module TSV
|
|
601
603
|
end
|
602
604
|
|
603
605
|
def summary
|
606
|
+
|
604
607
|
key = nil
|
605
608
|
values = nil
|
606
609
|
self.each do |k, v|
|
@@ -608,6 +611,7 @@ module TSV
|
|
608
611
|
values = v
|
609
612
|
break
|
610
613
|
end
|
614
|
+
|
611
615
|
with_unnamed do
|
612
616
|
<<-EOF
|
613
617
|
Filename = #{Path === filename ? filename.find : (filename || "No filename")}
|
data/lib/rbbt/tsv/change_id.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
1
|
require 'rbbt/tsv'
|
2
|
+
require 'rbbt/persist'
|
3
|
+
|
2
4
|
|
3
5
|
module TSV
|
4
6
|
def self.change_key(tsv, format, options = {}, &block)
|
@@ -84,5 +86,122 @@ module TSV
|
|
84
86
|
TSV.swap_id(self, *args)
|
85
87
|
end
|
86
88
|
|
89
|
+
def self.translation_index(files, target = nil, source = nil, options = {})
|
90
|
+
return nil if source == target
|
91
|
+
options = Misc.add_defaults options.dup, :persist => true
|
92
|
+
fields = source ? [source] : nil
|
93
|
+
files.each do |file|
|
94
|
+
if TSV === file
|
95
|
+
all_fields = file.all_fields
|
96
|
+
target = file.fields.first if target.nil?
|
97
|
+
return file.index(options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
|
98
|
+
else
|
99
|
+
all_fields = TSV.parse_header(file).all_fields
|
100
|
+
target = all_fields[1] if target.nil?
|
101
|
+
return TSV.index(file, options.merge(:target => target, :fields => fields, :order => true)) if (source.nil? or all_fields.include? source) and all_fields.include? target
|
102
|
+
end
|
103
|
+
end
|
104
|
+
|
105
|
+
files.each do |file|
|
106
|
+
all_fields = TSV === file ? file.all_fields : TSV.parse_header(file).all_fields
|
107
|
+
|
108
|
+
files.each do |other_file|
|
109
|
+
next if file == other_file
|
110
|
+
|
111
|
+
other_all_fields = TSV === other_file ? other_file.all_fields : TSV.parse_header(other_file).all_fields
|
112
|
+
|
113
|
+
common_field = (all_fields & other_all_fields).first
|
114
|
+
|
115
|
+
if common_field and (source.nil? or fields.include? source) and all_fields.include? common_field and
|
116
|
+
other_all_fields.include? common_field and other_all_fields.include? target
|
117
|
+
|
118
|
+
return Persist.persist_tsv(nil, Misc.fingerprint(files), {:files => files, :source => source, :target => target}, :prefix => "Translation index", :persist => options[:persist]) do |data|
|
119
|
+
index = TSV === file ?
|
120
|
+
file.index(options.merge(:target => common_field, :fields => fields)) :
|
121
|
+
TSV.index(file, options.merge(:target => common_field, :fields => fields))
|
122
|
+
|
123
|
+
other_index = TSV === other_file ?
|
124
|
+
other_file.index(options.merge(:target => target, :fields => [common_field])) :
|
125
|
+
TSV.index(other_file, options.merge(:target => target, :fields => [common_field]))
|
126
|
+
|
127
|
+
data.serializer = :clean
|
128
|
+
data.merge! index.to_list.attach(other_index.to_list).slice([target]).to_single
|
129
|
+
|
130
|
+
data
|
131
|
+
end
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
return nil
|
136
|
+
end
|
137
|
+
|
138
|
+
def self.translate(tsv, *args)
|
139
|
+
new = TSV.open translate_stream(tsv, *args)
|
140
|
+
new.identifiers = tsv.identifiers
|
141
|
+
new
|
142
|
+
end
|
143
|
+
|
144
|
+
def self.translate_stream(tsv, field, format, options = {}, &block)
|
145
|
+
options = Misc.add_defaults options, :persist => false, :identifier_files => tsv.identifier_files, :compact => true
|
146
|
+
|
147
|
+
identifier_files, identifiers, persist_input, compact = Misc.process_options options, :identifier_files, :identifiers, :persist, :compact
|
148
|
+
identifier_files = [tsv, identifiers].compact if identifier_files.nil? or identifier_files.empty?
|
149
|
+
|
150
|
+
identifier_files.uniq!
|
151
|
+
|
152
|
+
index = translation_index identifier_files, format, field, options.dup
|
153
|
+
raise "No index: #{Misc.fingerprint([identifier_files, field, format])}" if index.nil?
|
154
|
+
|
155
|
+
orig_type = tsv.type
|
156
|
+
tsv = tsv.to_double if orig_type != :double
|
157
|
+
|
158
|
+
pos = tsv.identify_field field
|
87
159
|
|
160
|
+
new_options = tsv.options
|
161
|
+
new_options[:identifiers] = tsv.identifiers.find if tsv.identifiers
|
162
|
+
|
163
|
+
case pos
|
164
|
+
when :key
|
165
|
+
new_options[:key_field] = format if tsv.key_field == field
|
166
|
+
dumper = TSV::Dumper.new new_options
|
167
|
+
dumper.init
|
168
|
+
TSV.traverse tsv, :into => dumper do |key,values|
|
169
|
+
new_key = index[key]
|
170
|
+
[new_key, values]
|
171
|
+
end
|
172
|
+
else
|
173
|
+
new_options[:fields] = tsv.fields.collect{|f| f == field ? format : f }
|
174
|
+
dumper = TSV::Dumper.new new_options
|
175
|
+
dumper.init
|
176
|
+
|
177
|
+
case tsv.type
|
178
|
+
when :double
|
179
|
+
TSV.traverse tsv, :into => dumper do |key,values|
|
180
|
+
original = values[pos]
|
181
|
+
new = index.values_at *original
|
182
|
+
values[pos] = new
|
183
|
+
[key, values]
|
184
|
+
end
|
185
|
+
when :list
|
186
|
+
TSV.traverse tsv, :into => dumper do |key,values|
|
187
|
+
original = values[pos]
|
188
|
+
new = index[original]
|
189
|
+
values[pos] = new
|
190
|
+
[key, values]
|
191
|
+
end
|
192
|
+
when :flat
|
193
|
+
TSV.traverse tsv, :into => dumper do |key,values|
|
194
|
+
new = index.values_at *values
|
195
|
+
[key, new]
|
196
|
+
end
|
197
|
+
when :single
|
198
|
+
TSV.traverse tsv, :into => dumper do |key,original|
|
199
|
+
new = index[original]
|
200
|
+
[key, new]
|
201
|
+
end
|
202
|
+
end
|
203
|
+
end
|
204
|
+
|
205
|
+
dumper.stream
|
206
|
+
end
|
88
207
|
end
|
data/lib/rbbt/tsv/index.rb
CHANGED
@@ -32,12 +32,16 @@ module TSV
|
|
32
32
|
|
33
33
|
new_key_field, new_fields = through target, fields, true do |key, values|
|
34
34
|
next if key.empty?
|
35
|
-
|
35
|
+
case type
|
36
|
+
when :single
|
36
37
|
values = [values]
|
37
38
|
values.unshift key
|
38
|
-
|
39
|
+
when :double
|
39
40
|
values = values.dup
|
40
41
|
values.unshift [key]
|
42
|
+
when :list, :flat
|
43
|
+
values = values.dup
|
44
|
+
values.unshift key
|
41
45
|
end
|
42
46
|
|
43
47
|
values.each_with_index do |list, i|
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -343,6 +343,8 @@ module TSV
|
|
343
343
|
when String === key_field
|
344
344
|
@key_position = @fields.dup.unshift(@key_field).index key_field
|
345
345
|
raise "Key field #{ key_field } was not found" if @key_position.nil?
|
346
|
+
when :key == key_field
|
347
|
+
@key_position = 0
|
346
348
|
else
|
347
349
|
raise "Format of key_field not understood: #{key_field.inspect}"
|
348
350
|
end
|
@@ -503,11 +505,11 @@ module TSV
|
|
503
505
|
# get parser
|
504
506
|
|
505
507
|
# grep
|
506
|
-
if grep
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
end
|
508
|
+
#if grep and false
|
509
|
+
# stream.rewind if stream.eof?
|
510
|
+
# stream = Open.grep(stream, grep, invert_grep)
|
511
|
+
# self.first_line = stream.gets
|
512
|
+
#end
|
511
513
|
|
512
514
|
# first line
|
513
515
|
line = self.rescue_first_line
|