rbbt-util 5.4.1 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/bin/rbbt_monitor.rb +8 -4
- data/lib/rbbt.rb +4 -11
- data/lib/rbbt/annotations.rb +4 -1
- data/lib/rbbt/association.rb +218 -157
- data/lib/rbbt/association/index.rb +92 -0
- data/lib/rbbt/association/item.rb +44 -0
- data/lib/rbbt/entity.rb +4 -0
- data/lib/rbbt/fix_width_table.rb +14 -9
- data/lib/rbbt/knowledge_base.rb +269 -0
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/persist/tsv.rb +22 -2
- data/lib/rbbt/resource.rb +0 -1
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/util.rb +0 -1
- data/lib/rbbt/tsv.rb +15 -14
- data/lib/rbbt/tsv/accessor.rb +21 -16
- data/lib/rbbt/tsv/attach.rb +5 -5
- data/lib/rbbt/tsv/attach/util.rb +4 -2
- data/lib/rbbt/tsv/change_id.rb +67 -0
- data/lib/rbbt/tsv/index.rb +5 -3
- data/lib/rbbt/tsv/manipulate.rb +83 -37
- data/lib/rbbt/tsv/parser.rb +2 -1
- data/lib/rbbt/tsv/util.rb +2 -0
- data/lib/rbbt/util/cmd.rb +1 -2
- data/lib/rbbt/util/log.rb +42 -38
- data/lib/rbbt/util/misc.rb +134 -46
- data/lib/rbbt/util/open.rb +3 -17
- data/lib/rbbt/util/semaphore.rb +8 -2
- data/lib/rbbt/workflow.rb +31 -46
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/lib/rbbt/workflow/step.rb +5 -3
- data/share/rbbt_commands/workflow/server +1 -0
- data/share/rbbt_commands/workflow/task +12 -2
- data/test/rbbt/association/test_index.rb +36 -0
- data/test/rbbt/test_annotations.rb +5 -4
- data/test/rbbt/test_association.rb +40 -13
- data/test/rbbt/test_knowledge_base.rb +103 -0
- data/test/rbbt/test_workflow.rb +4 -2
- data/test/rbbt/tsv/test_change_id.rb +43 -0
- data/test/rbbt/tsv/test_index.rb +2 -1
- data/test/rbbt/tsv/test_manipulate.rb +51 -0
- data/test/rbbt/util/test_misc.rb +21 -1
- data/test/test_helper.rb +8 -4
- metadata +12 -86
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
module Association
|
3
|
+
module Index
|
4
|
+
|
5
|
+
attr_accessor :source_field, :target_field, :undirected
|
6
|
+
def parse_key_field
|
7
|
+
@source_field, @target_field, @undirected = key_field.split("~")
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.setup(repo)
|
11
|
+
repo.extend Association::Index
|
12
|
+
repo.parse_key_field
|
13
|
+
repo.unnamed = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def reverse
|
17
|
+
@reverse ||= begin
|
18
|
+
reverse_filename = persistence_path + '.reverse'
|
19
|
+
|
20
|
+
if File.exists?(reverse_filename)
|
21
|
+
new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
|
22
|
+
else
|
23
|
+
new = Persist.open_tokyocabinet(reverse_filename, true, serializer, TokyoCabinet::BDB)
|
24
|
+
new.write
|
25
|
+
through do |key, value|
|
26
|
+
new_key = key.split("~").reverse.join("~")
|
27
|
+
new[new_key] = value
|
28
|
+
end
|
29
|
+
annotate(new)
|
30
|
+
new.key_field = key_field.split("~").values_at(1,0,2).compact * "~"
|
31
|
+
new.close
|
32
|
+
end
|
33
|
+
|
34
|
+
new.unnamed = true
|
35
|
+
|
36
|
+
Association::Index.setup new
|
37
|
+
new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def match(entity)
|
42
|
+
return [] if entity.nil?
|
43
|
+
prefix(entity + "~")
|
44
|
+
end
|
45
|
+
|
46
|
+
def matches(entities)
|
47
|
+
entities.inject(nil) do |acc,e|
|
48
|
+
m = match(e);
|
49
|
+
if acc.nil? or acc.empty?
|
50
|
+
acc = m
|
51
|
+
else
|
52
|
+
acc.concat m
|
53
|
+
end
|
54
|
+
acc
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
#{{{ Subset
|
59
|
+
|
60
|
+
def select_entities(entities)
|
61
|
+
source_type = Entity.formats[source_field]
|
62
|
+
target_type = Entity.formats[target_field]
|
63
|
+
|
64
|
+
source_entities = entities[source_field] || entities[Entity.formats[source_field].to_s]
|
65
|
+
target_entities = entities[target_field] || entities[Entity.formats[target_field].to_s]
|
66
|
+
|
67
|
+
[source_entities, target_entities]
|
68
|
+
end
|
69
|
+
|
70
|
+
def subset(source, target)
|
71
|
+
return [] if source.nil? or source.empty? or target.nil? or target.empty?
|
72
|
+
|
73
|
+
matches = source.uniq.inject([]){|acc,e| acc.concat(match(e)) }
|
74
|
+
|
75
|
+
target_matches = {}
|
76
|
+
|
77
|
+
matches.each{|code|
|
78
|
+
s,sep,t = code.partition "~"
|
79
|
+
next if (undirected and t > s)
|
80
|
+
target_matches[t] ||= []
|
81
|
+
target_matches[t] << code
|
82
|
+
}
|
83
|
+
|
84
|
+
target_matches.values_at(*target.uniq).flatten.compact
|
85
|
+
end
|
86
|
+
|
87
|
+
def subset_entities(entities)
|
88
|
+
source, target = select_entities(entities)
|
89
|
+
subset source, target
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
|
3
|
+
module AssociationItem
|
4
|
+
extend Entity
|
5
|
+
|
6
|
+
annotation :knowledge_base
|
7
|
+
annotation :database
|
8
|
+
annotation :reverse
|
9
|
+
|
10
|
+
property :part => :array2single do
|
11
|
+
self.clean_annotations.collect{|p| p.partition("~") }
|
12
|
+
end
|
13
|
+
|
14
|
+
property :target => :array2single do
|
15
|
+
self.part.collect{|p| p[2]}
|
16
|
+
end
|
17
|
+
|
18
|
+
property :source => :array2single do
|
19
|
+
self.clean_annotations.collect{|p| p[/[^~]+/] }
|
20
|
+
end
|
21
|
+
|
22
|
+
property :target_entity => :array2single do
|
23
|
+
type = reverse ? knowledge_base.source(database) : knowledge_base.target(database)
|
24
|
+
knowledge_base.annotate self.target, type if self.target.any?
|
25
|
+
end
|
26
|
+
|
27
|
+
property :source_entity => :array2single do
|
28
|
+
type = reverse ? knowledge_base.target(database) : knowledge_base.source(database)
|
29
|
+
knowledge_base.annotate self.source, type if self.target.any?
|
30
|
+
end
|
31
|
+
|
32
|
+
property :value => :array2single do
|
33
|
+
value = knowledge_base.get_index(database).chunked_values_at self
|
34
|
+
value.collect{|v| NamedArray.setup(v, knowledge_base.get_index(database).fields)}
|
35
|
+
end
|
36
|
+
|
37
|
+
property :info => :array2single do
|
38
|
+
fields = knowledge_base.index_fields(database)
|
39
|
+
return [{}] * self.length if fields.nil? or fields.empty?
|
40
|
+
value.collect{|v|
|
41
|
+
Hash[*v.zip(fields).flatten]
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
data/lib/rbbt/entity.rb
CHANGED
data/lib/rbbt/fix_width_table.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class FixWidthTable
|
2
2
|
|
3
3
|
attr_accessor :filename, :file, :value_size, :record_size, :range, :size
|
4
|
-
def initialize(filename, value_size = nil, range = nil, update = false)
|
4
|
+
def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
|
5
5
|
@filename = filename
|
6
6
|
|
7
7
|
if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists?(filename)
|
@@ -23,7 +23,11 @@ class FixWidthTable
|
|
23
23
|
@size = 0
|
24
24
|
else
|
25
25
|
Log.debug "FixWidthTable up-to-date: #{ filename }"
|
26
|
-
|
26
|
+
if in_memory
|
27
|
+
@file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
|
28
|
+
else
|
29
|
+
@file = File.open(@filename, 'r')
|
30
|
+
end
|
27
31
|
@value_size = @file.read(4).unpack("L").first
|
28
32
|
@range = @file.read(1).unpack("C").first == 1
|
29
33
|
@record_size = @value_size + (@range ? 12 : 4)
|
@@ -145,18 +149,19 @@ class FixWidthTable
|
|
145
149
|
|
146
150
|
while(upper >= lower) do
|
147
151
|
idx = lower + (upper - lower) / 2
|
148
|
-
|
152
|
+
pos_idx = pos(idx)
|
149
153
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
else
|
154
|
+
case pos <=> pos_idx
|
155
|
+
when 0
|
156
|
+
break
|
157
|
+
when -1
|
155
158
|
upper = idx - 1
|
159
|
+
when 1
|
160
|
+
lower = idx + 1
|
156
161
|
end
|
157
162
|
end
|
158
163
|
|
159
|
-
if
|
164
|
+
if pos_idx > pos
|
160
165
|
idx = idx - 1
|
161
166
|
end
|
162
167
|
|
@@ -0,0 +1,269 @@
|
|
1
|
+
require 'rbbt/association'
|
2
|
+
require 'rbbt/association/item'
|
3
|
+
require 'rbbt/entity'
|
4
|
+
|
5
|
+
class KnowledgeBase
|
6
|
+
class << self
|
7
|
+
attr_accessor :knowledge_base_dir, :registry
|
8
|
+
|
9
|
+
def registry
|
10
|
+
@registry ||= IndiferentHash.setup({})
|
11
|
+
end
|
12
|
+
|
13
|
+
def knowledge_base_dir
|
14
|
+
@knowledge_base_dir ||= Rbbt.var.knowledge_base
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
|
19
|
+
def initialize(dir, namespace = nil)
|
20
|
+
@dir = Path.setup dir
|
21
|
+
|
22
|
+
@namespace = namespace
|
23
|
+
@format = IndiferentHash.setup({})
|
24
|
+
|
25
|
+
@registry = IndiferentHash.setup({})
|
26
|
+
@entity_options = IndiferentHash.setup({})
|
27
|
+
|
28
|
+
@indices = IndiferentHash.setup({})
|
29
|
+
@databases = IndiferentHash.setup({})
|
30
|
+
@identifiers = IndiferentHash.setup({})
|
31
|
+
@descriptions = {}
|
32
|
+
@databases = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def version(new_namespace, force = false)
|
36
|
+
return self if new_namespace == namespace and not force
|
37
|
+
new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
|
38
|
+
new_kb.format.merge! self.format
|
39
|
+
new_kb.entity_options.merge! self.entity_options
|
40
|
+
new_kb.registry = self.registry
|
41
|
+
new_kb
|
42
|
+
end
|
43
|
+
|
44
|
+
#{{{ Descriptions
|
45
|
+
|
46
|
+
def register(name, file = nil, options = {}, &block)
|
47
|
+
if block_given?
|
48
|
+
Log.debug("Registering #{ name } from code block")
|
49
|
+
@registry[name] = [block, options]
|
50
|
+
else
|
51
|
+
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
52
|
+
@registry[name] = [file, options]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def all_databases
|
57
|
+
(@indices.keys + @registry.keys).uniq
|
58
|
+
end
|
59
|
+
|
60
|
+
def description(name)
|
61
|
+
@descriptions[name] ||= get_index(name).key_field.split("~")
|
62
|
+
end
|
63
|
+
|
64
|
+
def source(name)
|
65
|
+
description(name)[0]
|
66
|
+
end
|
67
|
+
|
68
|
+
def target(name)
|
69
|
+
description(name)[1]
|
70
|
+
end
|
71
|
+
|
72
|
+
def undirected(name)
|
73
|
+
description(name)[2]
|
74
|
+
end
|
75
|
+
|
76
|
+
def source_type(name)
|
77
|
+
Entity.formats[source(name)]
|
78
|
+
end
|
79
|
+
|
80
|
+
def target_type(name)
|
81
|
+
Entity.formats[target(name)]
|
82
|
+
end
|
83
|
+
|
84
|
+
def index_fields(name)
|
85
|
+
get_index(name).fields
|
86
|
+
end
|
87
|
+
|
88
|
+
def entities
|
89
|
+
all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
|
90
|
+
end
|
91
|
+
|
92
|
+
def entity_types
|
93
|
+
entities.collect{|entity| Entity.formats[entity] }.uniq
|
94
|
+
end
|
95
|
+
|
96
|
+
#{{{ Open and get
|
97
|
+
|
98
|
+
def open_options
|
99
|
+
{:namespace => namespace, :format => @format}
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_database(name, options = {})
|
103
|
+
persist_options = Misc.pull_keys options, :persist
|
104
|
+
|
105
|
+
file, registered_options = registry[name]
|
106
|
+
options = open_options.merge(registered_options || {}).merge(options)
|
107
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
108
|
+
|
109
|
+
@databases[name] ||= begin
|
110
|
+
Log.debug "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
|
111
|
+
Association.open(file, options, persist_options).
|
112
|
+
tap{|tsv| tsv.namespace = self.namespace}
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def get_index(name, options = {})
|
118
|
+
persist_options = Misc.pull_keys options, :persist
|
119
|
+
|
120
|
+
file, registered_options = registry[name]
|
121
|
+
options = open_options.merge(registered_options || {}).merge(options)
|
122
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
123
|
+
|
124
|
+
@indices[name] ||= begin
|
125
|
+
Log.debug "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
|
126
|
+
Association.index(file, options, persist_options).
|
127
|
+
tap{|tsv| tsv.namespace = self.namespace}
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def index(name, file, options = {}, persist_options = {})
|
132
|
+
@indices[name] = Association.index(file, open_options.merge(options), persist_options)
|
133
|
+
end
|
134
|
+
|
135
|
+
#{{{ Add manual database
|
136
|
+
|
137
|
+
def add_index(name, source_type, target_type, *fields)
|
138
|
+
options = fields.pop if Hash === fields.last
|
139
|
+
options ||= {}
|
140
|
+
undirected = Misc.process_options options, :undirected
|
141
|
+
|
142
|
+
undirected = nil unless undirected
|
143
|
+
|
144
|
+
repo_file = dir[name].find
|
145
|
+
index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
|
146
|
+
@indices[name] = index
|
147
|
+
end
|
148
|
+
|
149
|
+
def add(name, source, target, *rest)
|
150
|
+
code = [source, target] * "~"
|
151
|
+
repo = @indices[name]
|
152
|
+
repo[code] = rest
|
153
|
+
end
|
154
|
+
|
155
|
+
def write(name)
|
156
|
+
repo = @indices[name]
|
157
|
+
repo.write_and_read do
|
158
|
+
yield
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#{{{ Annotate
|
163
|
+
|
164
|
+
def entity_options_for(type)
|
165
|
+
options = entity_options[Entity.formats[type]] || {}
|
166
|
+
options[:format] = @format[type] if @format.include? :type
|
167
|
+
options = {:organism => namespace}.merge(options)
|
168
|
+
options
|
169
|
+
end
|
170
|
+
|
171
|
+
def annotate(entities, type)
|
172
|
+
Misc.prepare_entity(entities, type, entity_options_for(type))
|
173
|
+
end
|
174
|
+
|
175
|
+
#{{{ Identify
|
176
|
+
|
177
|
+
def identify_source(name, entity)
|
178
|
+
database = get_database(name, :persist => true)
|
179
|
+
return entity if database.include? entity
|
180
|
+
source = source(name)
|
181
|
+
@identifiers[name] ||= {}
|
182
|
+
@identifiers[name]['source'] ||= begin
|
183
|
+
if database.identifier_files.any?
|
184
|
+
if TSV.parse_header(database.identifier_files.first).all_fields.include? source
|
185
|
+
TSV.index(database.identifiers, :target => source, :persist => true)
|
186
|
+
else
|
187
|
+
{}
|
188
|
+
end
|
189
|
+
else
|
190
|
+
if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? source
|
191
|
+
Organism.identifiers(namespace).index(:target => source, :persist => true)
|
192
|
+
else
|
193
|
+
{}
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
@identifiers[name]['source'][entity]
|
199
|
+
end
|
200
|
+
|
201
|
+
def identify_target(name, entity)
|
202
|
+
database = get_database(name, :persist => true)
|
203
|
+
target = target(name)
|
204
|
+
|
205
|
+
@identifiers[name] ||= {}
|
206
|
+
@identifiers[name]['target'] ||= begin
|
207
|
+
if database.identifier_files.any?
|
208
|
+
if TSV.parse_header(database.identifier_files.first).all_fields.include? target
|
209
|
+
TSV.index(database.identifiers, :target => target, :persist => true)
|
210
|
+
else
|
211
|
+
{}
|
212
|
+
end
|
213
|
+
else
|
214
|
+
if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
|
215
|
+
Organism.identifiers(namespace).index(:target => target, :persist => true)
|
216
|
+
else
|
217
|
+
database.index(:target => database.fields.first, :fields => [database.fields.first], :persist => true)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
@identifiers[name]['target'][entity]
|
222
|
+
end
|
223
|
+
|
224
|
+
def identify(name, entity)
|
225
|
+
identify_source(name, entity) || identify_target(name, entity)
|
226
|
+
end
|
227
|
+
|
228
|
+
#{{{ Query
|
229
|
+
|
230
|
+
def children(name, entity)
|
231
|
+
repo = get_index name
|
232
|
+
AssociationItem.setup repo.match(entity), self, name, false
|
233
|
+
end
|
234
|
+
|
235
|
+
def parents(name, entity)
|
236
|
+
repo = get_index name
|
237
|
+
AssociationItem.setup repo.reverse.match(entity), self, name, true
|
238
|
+
end
|
239
|
+
|
240
|
+
def neighbours(name, entity)
|
241
|
+
if undirected(name)
|
242
|
+
IndiferentHash.setup({:children => children(name, entity)})
|
243
|
+
else
|
244
|
+
IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def subset(name, entities)
|
249
|
+
case entities
|
250
|
+
when AnnotatedArray
|
251
|
+
format = entities.format if entities.respond_to? :format
|
252
|
+
format ||= entities.base_entity.to_s
|
253
|
+
{format => entities.clean_annotations}
|
254
|
+
when Hash
|
255
|
+
else
|
256
|
+
raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
|
257
|
+
end
|
258
|
+
repo = get_index name
|
259
|
+
AssociationItem.setup repo.subset_entities(entities), self, name, false
|
260
|
+
end
|
261
|
+
|
262
|
+
def translate(entities, type)
|
263
|
+
if format = @format[type] and format != entities.format
|
264
|
+
entities.to format
|
265
|
+
else
|
266
|
+
entities
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|