rbbt-util 5.4.1 → 5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/bin/rbbt_monitor.rb +8 -4
- data/lib/rbbt.rb +4 -11
- data/lib/rbbt/annotations.rb +4 -1
- data/lib/rbbt/association.rb +218 -157
- data/lib/rbbt/association/index.rb +92 -0
- data/lib/rbbt/association/item.rb +44 -0
- data/lib/rbbt/entity.rb +4 -0
- data/lib/rbbt/fix_width_table.rb +14 -9
- data/lib/rbbt/knowledge_base.rb +269 -0
- data/lib/rbbt/persist.rb +1 -1
- data/lib/rbbt/persist/tsv.rb +22 -2
- data/lib/rbbt/resource.rb +0 -1
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/util.rb +0 -1
- data/lib/rbbt/tsv.rb +15 -14
- data/lib/rbbt/tsv/accessor.rb +21 -16
- data/lib/rbbt/tsv/attach.rb +5 -5
- data/lib/rbbt/tsv/attach/util.rb +4 -2
- data/lib/rbbt/tsv/change_id.rb +67 -0
- data/lib/rbbt/tsv/index.rb +5 -3
- data/lib/rbbt/tsv/manipulate.rb +83 -37
- data/lib/rbbt/tsv/parser.rb +2 -1
- data/lib/rbbt/tsv/util.rb +2 -0
- data/lib/rbbt/util/cmd.rb +1 -2
- data/lib/rbbt/util/log.rb +42 -38
- data/lib/rbbt/util/misc.rb +134 -46
- data/lib/rbbt/util/open.rb +3 -17
- data/lib/rbbt/util/semaphore.rb +8 -2
- data/lib/rbbt/workflow.rb +31 -46
- data/lib/rbbt/workflow/accessor.rb +1 -1
- data/lib/rbbt/workflow/step.rb +5 -3
- data/share/rbbt_commands/workflow/server +1 -0
- data/share/rbbt_commands/workflow/task +12 -2
- data/test/rbbt/association/test_index.rb +36 -0
- data/test/rbbt/test_annotations.rb +5 -4
- data/test/rbbt/test_association.rb +40 -13
- data/test/rbbt/test_knowledge_base.rb +103 -0
- data/test/rbbt/test_workflow.rb +4 -2
- data/test/rbbt/tsv/test_change_id.rb +43 -0
- data/test/rbbt/tsv/test_index.rb +2 -1
- data/test/rbbt/tsv/test_manipulate.rb +51 -0
- data/test/rbbt/util/test_misc.rb +21 -1
- data/test/test_helper.rb +8 -4
- metadata +12 -86
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'rbbt/tsv'
|
2
|
+
module Association
|
3
|
+
module Index
|
4
|
+
|
5
|
+
attr_accessor :source_field, :target_field, :undirected
|
6
|
+
def parse_key_field
|
7
|
+
@source_field, @target_field, @undirected = key_field.split("~")
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.setup(repo)
|
11
|
+
repo.extend Association::Index
|
12
|
+
repo.parse_key_field
|
13
|
+
repo.unnamed = true
|
14
|
+
end
|
15
|
+
|
16
|
+
def reverse
|
17
|
+
@reverse ||= begin
|
18
|
+
reverse_filename = persistence_path + '.reverse'
|
19
|
+
|
20
|
+
if File.exists?(reverse_filename)
|
21
|
+
new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
|
22
|
+
else
|
23
|
+
new = Persist.open_tokyocabinet(reverse_filename, true, serializer, TokyoCabinet::BDB)
|
24
|
+
new.write
|
25
|
+
through do |key, value|
|
26
|
+
new_key = key.split("~").reverse.join("~")
|
27
|
+
new[new_key] = value
|
28
|
+
end
|
29
|
+
annotate(new)
|
30
|
+
new.key_field = key_field.split("~").values_at(1,0,2).compact * "~"
|
31
|
+
new.close
|
32
|
+
end
|
33
|
+
|
34
|
+
new.unnamed = true
|
35
|
+
|
36
|
+
Association::Index.setup new
|
37
|
+
new
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def match(entity)
|
42
|
+
return [] if entity.nil?
|
43
|
+
prefix(entity + "~")
|
44
|
+
end
|
45
|
+
|
46
|
+
def matches(entities)
|
47
|
+
entities.inject(nil) do |acc,e|
|
48
|
+
m = match(e);
|
49
|
+
if acc.nil? or acc.empty?
|
50
|
+
acc = m
|
51
|
+
else
|
52
|
+
acc.concat m
|
53
|
+
end
|
54
|
+
acc
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
#{{{ Subset
|
59
|
+
|
60
|
+
def select_entities(entities)
|
61
|
+
source_type = Entity.formats[source_field]
|
62
|
+
target_type = Entity.formats[target_field]
|
63
|
+
|
64
|
+
source_entities = entities[source_field] || entities[Entity.formats[source_field].to_s]
|
65
|
+
target_entities = entities[target_field] || entities[Entity.formats[target_field].to_s]
|
66
|
+
|
67
|
+
[source_entities, target_entities]
|
68
|
+
end
|
69
|
+
|
70
|
+
def subset(source, target)
|
71
|
+
return [] if source.nil? or source.empty? or target.nil? or target.empty?
|
72
|
+
|
73
|
+
matches = source.uniq.inject([]){|acc,e| acc.concat(match(e)) }
|
74
|
+
|
75
|
+
target_matches = {}
|
76
|
+
|
77
|
+
matches.each{|code|
|
78
|
+
s,sep,t = code.partition "~"
|
79
|
+
next if (undirected and t > s)
|
80
|
+
target_matches[t] ||= []
|
81
|
+
target_matches[t] << code
|
82
|
+
}
|
83
|
+
|
84
|
+
target_matches.values_at(*target.uniq).flatten.compact
|
85
|
+
end
|
86
|
+
|
87
|
+
def subset_entities(entities)
|
88
|
+
source, target = select_entities(entities)
|
89
|
+
subset source, target
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
require 'rbbt/entity'
|
2
|
+
|
3
|
+
module AssociationItem
|
4
|
+
extend Entity
|
5
|
+
|
6
|
+
annotation :knowledge_base
|
7
|
+
annotation :database
|
8
|
+
annotation :reverse
|
9
|
+
|
10
|
+
property :part => :array2single do
|
11
|
+
self.clean_annotations.collect{|p| p.partition("~") }
|
12
|
+
end
|
13
|
+
|
14
|
+
property :target => :array2single do
|
15
|
+
self.part.collect{|p| p[2]}
|
16
|
+
end
|
17
|
+
|
18
|
+
property :source => :array2single do
|
19
|
+
self.clean_annotations.collect{|p| p[/[^~]+/] }
|
20
|
+
end
|
21
|
+
|
22
|
+
property :target_entity => :array2single do
|
23
|
+
type = reverse ? knowledge_base.source(database) : knowledge_base.target(database)
|
24
|
+
knowledge_base.annotate self.target, type if self.target.any?
|
25
|
+
end
|
26
|
+
|
27
|
+
property :source_entity => :array2single do
|
28
|
+
type = reverse ? knowledge_base.target(database) : knowledge_base.source(database)
|
29
|
+
knowledge_base.annotate self.source, type if self.target.any?
|
30
|
+
end
|
31
|
+
|
32
|
+
property :value => :array2single do
|
33
|
+
value = knowledge_base.get_index(database).chunked_values_at self
|
34
|
+
value.collect{|v| NamedArray.setup(v, knowledge_base.get_index(database).fields)}
|
35
|
+
end
|
36
|
+
|
37
|
+
property :info => :array2single do
|
38
|
+
fields = knowledge_base.index_fields(database)
|
39
|
+
return [{}] * self.length if fields.nil? or fields.empty?
|
40
|
+
value.collect{|v|
|
41
|
+
Hash[*v.zip(fields).flatten]
|
42
|
+
}
|
43
|
+
end
|
44
|
+
end
|
data/lib/rbbt/entity.rb
CHANGED
data/lib/rbbt/fix_width_table.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
class FixWidthTable
|
2
2
|
|
3
3
|
attr_accessor :filename, :file, :value_size, :record_size, :range, :size
|
4
|
-
def initialize(filename, value_size = nil, range = nil, update = false)
|
4
|
+
def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
|
5
5
|
@filename = filename
|
6
6
|
|
7
7
|
if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists?(filename)
|
@@ -23,7 +23,11 @@ class FixWidthTable
|
|
23
23
|
@size = 0
|
24
24
|
else
|
25
25
|
Log.debug "FixWidthTable up-to-date: #{ filename }"
|
26
|
-
|
26
|
+
if in_memory
|
27
|
+
@file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
|
28
|
+
else
|
29
|
+
@file = File.open(@filename, 'r')
|
30
|
+
end
|
27
31
|
@value_size = @file.read(4).unpack("L").first
|
28
32
|
@range = @file.read(1).unpack("C").first == 1
|
29
33
|
@record_size = @value_size + (@range ? 12 : 4)
|
@@ -145,18 +149,19 @@ class FixWidthTable
|
|
145
149
|
|
146
150
|
while(upper >= lower) do
|
147
151
|
idx = lower + (upper - lower) / 2
|
148
|
-
|
152
|
+
pos_idx = pos(idx)
|
149
153
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
else
|
154
|
+
case pos <=> pos_idx
|
155
|
+
when 0
|
156
|
+
break
|
157
|
+
when -1
|
155
158
|
upper = idx - 1
|
159
|
+
when 1
|
160
|
+
lower = idx + 1
|
156
161
|
end
|
157
162
|
end
|
158
163
|
|
159
|
-
if
|
164
|
+
if pos_idx > pos
|
160
165
|
idx = idx - 1
|
161
166
|
end
|
162
167
|
|
@@ -0,0 +1,269 @@
|
|
1
|
+
require 'rbbt/association'
|
2
|
+
require 'rbbt/association/item'
|
3
|
+
require 'rbbt/entity'
|
4
|
+
|
5
|
+
class KnowledgeBase
|
6
|
+
class << self
|
7
|
+
attr_accessor :knowledge_base_dir, :registry
|
8
|
+
|
9
|
+
def registry
|
10
|
+
@registry ||= IndiferentHash.setup({})
|
11
|
+
end
|
12
|
+
|
13
|
+
def knowledge_base_dir
|
14
|
+
@knowledge_base_dir ||= Rbbt.var.knowledge_base
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
|
19
|
+
def initialize(dir, namespace = nil)
|
20
|
+
@dir = Path.setup dir
|
21
|
+
|
22
|
+
@namespace = namespace
|
23
|
+
@format = IndiferentHash.setup({})
|
24
|
+
|
25
|
+
@registry = IndiferentHash.setup({})
|
26
|
+
@entity_options = IndiferentHash.setup({})
|
27
|
+
|
28
|
+
@indices = IndiferentHash.setup({})
|
29
|
+
@databases = IndiferentHash.setup({})
|
30
|
+
@identifiers = IndiferentHash.setup({})
|
31
|
+
@descriptions = {}
|
32
|
+
@databases = {}
|
33
|
+
end
|
34
|
+
|
35
|
+
def version(new_namespace, force = false)
|
36
|
+
return self if new_namespace == namespace and not force
|
37
|
+
new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
|
38
|
+
new_kb.format.merge! self.format
|
39
|
+
new_kb.entity_options.merge! self.entity_options
|
40
|
+
new_kb.registry = self.registry
|
41
|
+
new_kb
|
42
|
+
end
|
43
|
+
|
44
|
+
#{{{ Descriptions
|
45
|
+
|
46
|
+
def register(name, file = nil, options = {}, &block)
|
47
|
+
if block_given?
|
48
|
+
Log.debug("Registering #{ name } from code block")
|
49
|
+
@registry[name] = [block, options]
|
50
|
+
else
|
51
|
+
Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
|
52
|
+
@registry[name] = [file, options]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def all_databases
|
57
|
+
(@indices.keys + @registry.keys).uniq
|
58
|
+
end
|
59
|
+
|
60
|
+
def description(name)
|
61
|
+
@descriptions[name] ||= get_index(name).key_field.split("~")
|
62
|
+
end
|
63
|
+
|
64
|
+
def source(name)
|
65
|
+
description(name)[0]
|
66
|
+
end
|
67
|
+
|
68
|
+
def target(name)
|
69
|
+
description(name)[1]
|
70
|
+
end
|
71
|
+
|
72
|
+
def undirected(name)
|
73
|
+
description(name)[2]
|
74
|
+
end
|
75
|
+
|
76
|
+
def source_type(name)
|
77
|
+
Entity.formats[source(name)]
|
78
|
+
end
|
79
|
+
|
80
|
+
def target_type(name)
|
81
|
+
Entity.formats[target(name)]
|
82
|
+
end
|
83
|
+
|
84
|
+
def index_fields(name)
|
85
|
+
get_index(name).fields
|
86
|
+
end
|
87
|
+
|
88
|
+
def entities
|
89
|
+
all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
|
90
|
+
end
|
91
|
+
|
92
|
+
def entity_types
|
93
|
+
entities.collect{|entity| Entity.formats[entity] }.uniq
|
94
|
+
end
|
95
|
+
|
96
|
+
#{{{ Open and get
|
97
|
+
|
98
|
+
def open_options
|
99
|
+
{:namespace => namespace, :format => @format}
|
100
|
+
end
|
101
|
+
|
102
|
+
def get_database(name, options = {})
|
103
|
+
persist_options = Misc.pull_keys options, :persist
|
104
|
+
|
105
|
+
file, registered_options = registry[name]
|
106
|
+
options = open_options.merge(registered_options || {}).merge(options)
|
107
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
108
|
+
|
109
|
+
@databases[name] ||= begin
|
110
|
+
Log.debug "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
|
111
|
+
Association.open(file, options, persist_options).
|
112
|
+
tap{|tsv| tsv.namespace = self.namespace}
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
|
117
|
+
def get_index(name, options = {})
|
118
|
+
persist_options = Misc.pull_keys options, :persist
|
119
|
+
|
120
|
+
file, registered_options = registry[name]
|
121
|
+
options = open_options.merge(registered_options || {}).merge(options)
|
122
|
+
raise "Repo #{ name } not found and not registered" if file.nil?
|
123
|
+
|
124
|
+
@indices[name] ||= begin
|
125
|
+
Log.debug "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
|
126
|
+
Association.index(file, options, persist_options).
|
127
|
+
tap{|tsv| tsv.namespace = self.namespace}
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
def index(name, file, options = {}, persist_options = {})
|
132
|
+
@indices[name] = Association.index(file, open_options.merge(options), persist_options)
|
133
|
+
end
|
134
|
+
|
135
|
+
#{{{ Add manual database
|
136
|
+
|
137
|
+
def add_index(name, source_type, target_type, *fields)
|
138
|
+
options = fields.pop if Hash === fields.last
|
139
|
+
options ||= {}
|
140
|
+
undirected = Misc.process_options options, :undirected
|
141
|
+
|
142
|
+
undirected = nil unless undirected
|
143
|
+
|
144
|
+
repo_file = dir[name].find
|
145
|
+
index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
|
146
|
+
@indices[name] = index
|
147
|
+
end
|
148
|
+
|
149
|
+
def add(name, source, target, *rest)
|
150
|
+
code = [source, target] * "~"
|
151
|
+
repo = @indices[name]
|
152
|
+
repo[code] = rest
|
153
|
+
end
|
154
|
+
|
155
|
+
def write(name)
|
156
|
+
repo = @indices[name]
|
157
|
+
repo.write_and_read do
|
158
|
+
yield
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
#{{{ Annotate
|
163
|
+
|
164
|
+
def entity_options_for(type)
|
165
|
+
options = entity_options[Entity.formats[type]] || {}
|
166
|
+
options[:format] = @format[type] if @format.include? :type
|
167
|
+
options = {:organism => namespace}.merge(options)
|
168
|
+
options
|
169
|
+
end
|
170
|
+
|
171
|
+
def annotate(entities, type)
|
172
|
+
Misc.prepare_entity(entities, type, entity_options_for(type))
|
173
|
+
end
|
174
|
+
|
175
|
+
#{{{ Identify
|
176
|
+
|
177
|
+
def identify_source(name, entity)
|
178
|
+
database = get_database(name, :persist => true)
|
179
|
+
return entity if database.include? entity
|
180
|
+
source = source(name)
|
181
|
+
@identifiers[name] ||= {}
|
182
|
+
@identifiers[name]['source'] ||= begin
|
183
|
+
if database.identifier_files.any?
|
184
|
+
if TSV.parse_header(database.identifier_files.first).all_fields.include? source
|
185
|
+
TSV.index(database.identifiers, :target => source, :persist => true)
|
186
|
+
else
|
187
|
+
{}
|
188
|
+
end
|
189
|
+
else
|
190
|
+
if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? source
|
191
|
+
Organism.identifiers(namespace).index(:target => source, :persist => true)
|
192
|
+
else
|
193
|
+
{}
|
194
|
+
end
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
@identifiers[name]['source'][entity]
|
199
|
+
end
|
200
|
+
|
201
|
+
def identify_target(name, entity)
|
202
|
+
database = get_database(name, :persist => true)
|
203
|
+
target = target(name)
|
204
|
+
|
205
|
+
@identifiers[name] ||= {}
|
206
|
+
@identifiers[name]['target'] ||= begin
|
207
|
+
if database.identifier_files.any?
|
208
|
+
if TSV.parse_header(database.identifier_files.first).all_fields.include? target
|
209
|
+
TSV.index(database.identifiers, :target => target, :persist => true)
|
210
|
+
else
|
211
|
+
{}
|
212
|
+
end
|
213
|
+
else
|
214
|
+
if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
|
215
|
+
Organism.identifiers(namespace).index(:target => target, :persist => true)
|
216
|
+
else
|
217
|
+
database.index(:target => database.fields.first, :fields => [database.fields.first], :persist => true)
|
218
|
+
end
|
219
|
+
end
|
220
|
+
end
|
221
|
+
@identifiers[name]['target'][entity]
|
222
|
+
end
|
223
|
+
|
224
|
+
def identify(name, entity)
|
225
|
+
identify_source(name, entity) || identify_target(name, entity)
|
226
|
+
end
|
227
|
+
|
228
|
+
#{{{ Query
|
229
|
+
|
230
|
+
def children(name, entity)
|
231
|
+
repo = get_index name
|
232
|
+
AssociationItem.setup repo.match(entity), self, name, false
|
233
|
+
end
|
234
|
+
|
235
|
+
def parents(name, entity)
|
236
|
+
repo = get_index name
|
237
|
+
AssociationItem.setup repo.reverse.match(entity), self, name, true
|
238
|
+
end
|
239
|
+
|
240
|
+
def neighbours(name, entity)
|
241
|
+
if undirected(name)
|
242
|
+
IndiferentHash.setup({:children => children(name, entity)})
|
243
|
+
else
|
244
|
+
IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
def subset(name, entities)
|
249
|
+
case entities
|
250
|
+
when AnnotatedArray
|
251
|
+
format = entities.format if entities.respond_to? :format
|
252
|
+
format ||= entities.base_entity.to_s
|
253
|
+
{format => entities.clean_annotations}
|
254
|
+
when Hash
|
255
|
+
else
|
256
|
+
raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
|
257
|
+
end
|
258
|
+
repo = get_index name
|
259
|
+
AssociationItem.setup repo.subset_entities(entities), self, name, false
|
260
|
+
end
|
261
|
+
|
262
|
+
def translate(entities, type)
|
263
|
+
if format = @format[type] and format != entities.format
|
264
|
+
entities.to format
|
265
|
+
else
|
266
|
+
entities
|
267
|
+
end
|
268
|
+
end
|
269
|
+
end
|