rbbt-util 5.4.1 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/bin/rbbt_monitor.rb +8 -4
  3. data/lib/rbbt.rb +4 -11
  4. data/lib/rbbt/annotations.rb +4 -1
  5. data/lib/rbbt/association.rb +218 -157
  6. data/lib/rbbt/association/index.rb +92 -0
  7. data/lib/rbbt/association/item.rb +44 -0
  8. data/lib/rbbt/entity.rb +4 -0
  9. data/lib/rbbt/fix_width_table.rb +14 -9
  10. data/lib/rbbt/knowledge_base.rb +269 -0
  11. data/lib/rbbt/persist.rb +1 -1
  12. data/lib/rbbt/persist/tsv.rb +22 -2
  13. data/lib/rbbt/resource.rb +0 -1
  14. data/lib/rbbt/resource/path.rb +1 -1
  15. data/lib/rbbt/resource/util.rb +0 -1
  16. data/lib/rbbt/tsv.rb +15 -14
  17. data/lib/rbbt/tsv/accessor.rb +21 -16
  18. data/lib/rbbt/tsv/attach.rb +5 -5
  19. data/lib/rbbt/tsv/attach/util.rb +4 -2
  20. data/lib/rbbt/tsv/change_id.rb +67 -0
  21. data/lib/rbbt/tsv/index.rb +5 -3
  22. data/lib/rbbt/tsv/manipulate.rb +83 -37
  23. data/lib/rbbt/tsv/parser.rb +2 -1
  24. data/lib/rbbt/tsv/util.rb +2 -0
  25. data/lib/rbbt/util/cmd.rb +1 -2
  26. data/lib/rbbt/util/log.rb +42 -38
  27. data/lib/rbbt/util/misc.rb +134 -46
  28. data/lib/rbbt/util/open.rb +3 -17
  29. data/lib/rbbt/util/semaphore.rb +8 -2
  30. data/lib/rbbt/workflow.rb +31 -46
  31. data/lib/rbbt/workflow/accessor.rb +1 -1
  32. data/lib/rbbt/workflow/step.rb +5 -3
  33. data/share/rbbt_commands/workflow/server +1 -0
  34. data/share/rbbt_commands/workflow/task +12 -2
  35. data/test/rbbt/association/test_index.rb +36 -0
  36. data/test/rbbt/test_annotations.rb +5 -4
  37. data/test/rbbt/test_association.rb +40 -13
  38. data/test/rbbt/test_knowledge_base.rb +103 -0
  39. data/test/rbbt/test_workflow.rb +4 -2
  40. data/test/rbbt/tsv/test_change_id.rb +43 -0
  41. data/test/rbbt/tsv/test_index.rb +2 -1
  42. data/test/rbbt/tsv/test_manipulate.rb +51 -0
  43. data/test/rbbt/util/test_misc.rb +21 -1
  44. data/test/test_helper.rb +8 -4
  45. metadata +12 -86
@@ -0,0 +1,92 @@
1
+ require 'rbbt/tsv'
2
+ module Association
3
+ module Index
4
+
5
+ attr_accessor :source_field, :target_field, :undirected
6
+ def parse_key_field
7
+ @source_field, @target_field, @undirected = key_field.split("~")
8
+ end
9
+
10
+ def self.setup(repo)
11
+ repo.extend Association::Index
12
+ repo.parse_key_field
13
+ repo.unnamed = true
14
+ end
15
+
16
+ def reverse
17
+ @reverse ||= begin
18
+ reverse_filename = persistence_path + '.reverse'
19
+
20
+ if File.exists?(reverse_filename)
21
+ new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
22
+ else
23
+ new = Persist.open_tokyocabinet(reverse_filename, true, serializer, TokyoCabinet::BDB)
24
+ new.write
25
+ through do |key, value|
26
+ new_key = key.split("~").reverse.join("~")
27
+ new[new_key] = value
28
+ end
29
+ annotate(new)
30
+ new.key_field = key_field.split("~").values_at(1,0,2).compact * "~"
31
+ new.close
32
+ end
33
+
34
+ new.unnamed = true
35
+
36
+ Association::Index.setup new
37
+ new
38
+ end
39
+ end
40
+
41
+ def match(entity)
42
+ return [] if entity.nil?
43
+ prefix(entity + "~")
44
+ end
45
+
46
+ def matches(entities)
47
+ entities.inject(nil) do |acc,e|
48
+ m = match(e);
49
+ if acc.nil? or acc.empty?
50
+ acc = m
51
+ else
52
+ acc.concat m
53
+ end
54
+ acc
55
+ end
56
+ end
57
+
58
+ #{{{ Subset
59
+
60
+ def select_entities(entities)
61
+ source_type = Entity.formats[source_field]
62
+ target_type = Entity.formats[target_field]
63
+
64
+ source_entities = entities[source_field] || entities[Entity.formats[source_field].to_s]
65
+ target_entities = entities[target_field] || entities[Entity.formats[target_field].to_s]
66
+
67
+ [source_entities, target_entities]
68
+ end
69
+
70
+ def subset(source, target)
71
+ return [] if source.nil? or source.empty? or target.nil? or target.empty?
72
+
73
+ matches = source.uniq.inject([]){|acc,e| acc.concat(match(e)) }
74
+
75
+ target_matches = {}
76
+
77
+ matches.each{|code|
78
+ s,sep,t = code.partition "~"
79
+ next if (undirected and t > s)
80
+ target_matches[t] ||= []
81
+ target_matches[t] << code
82
+ }
83
+
84
+ target_matches.values_at(*target.uniq).flatten.compact
85
+ end
86
+
87
+ def subset_entities(entities)
88
+ source, target = select_entities(entities)
89
+ subset source, target
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,44 @@
1
+ require 'rbbt/entity'
2
+
3
+ module AssociationItem
4
+ extend Entity
5
+
6
+ annotation :knowledge_base
7
+ annotation :database
8
+ annotation :reverse
9
+
10
+ property :part => :array2single do
11
+ self.clean_annotations.collect{|p| p.partition("~") }
12
+ end
13
+
14
+ property :target => :array2single do
15
+ self.part.collect{|p| p[2]}
16
+ end
17
+
18
+ property :source => :array2single do
19
+ self.clean_annotations.collect{|p| p[/[^~]+/] }
20
+ end
21
+
22
+ property :target_entity => :array2single do
23
+ type = reverse ? knowledge_base.source(database) : knowledge_base.target(database)
24
+ knowledge_base.annotate self.target, type if self.target.any?
25
+ end
26
+
27
+ property :source_entity => :array2single do
28
+ type = reverse ? knowledge_base.target(database) : knowledge_base.source(database)
29
+ knowledge_base.annotate self.source, type if self.target.any?
30
+ end
31
+
32
+ property :value => :array2single do
33
+ value = knowledge_base.get_index(database).chunked_values_at self
34
+ value.collect{|v| NamedArray.setup(v, knowledge_base.get_index(database).fields)}
35
+ end
36
+
37
+ property :info => :array2single do
38
+ fields = knowledge_base.index_fields(database)
39
+ return [{}] * self.length if fields.nil? or fields.empty?
40
+ value.collect{|v|
41
+ Hash[*v.zip(fields).flatten]
42
+ }
43
+ end
44
+ end
data/lib/rbbt/entity.rb CHANGED
@@ -32,6 +32,10 @@ module Entity
32
32
  end
33
33
  end
34
34
 
35
+ def base_entity
36
+ self.annotation_types.select{|m| Entity === m}.last
37
+ end
38
+
35
39
  def property(*args, &block)
36
40
  class << self; self; end.property(*args,&block)
37
41
  end
@@ -1,7 +1,7 @@
1
1
  class FixWidthTable
2
2
 
3
3
  attr_accessor :filename, :file, :value_size, :record_size, :range, :size
4
- def initialize(filename, value_size = nil, range = nil, update = false)
4
+ def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
7
7
  if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists?(filename)
@@ -23,7 +23,11 @@ class FixWidthTable
23
23
  @size = 0
24
24
  else
25
25
  Log.debug "FixWidthTable up-to-date: #{ filename }"
26
- @file = File.open(@filename, 'r')
26
+ if in_memory
27
+ @file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
28
+ else
29
+ @file = File.open(@filename, 'r')
30
+ end
27
31
  @value_size = @file.read(4).unpack("L").first
28
32
  @range = @file.read(1).unpack("C").first == 1
29
33
  @record_size = @value_size + (@range ? 12 : 4)
@@ -145,18 +149,19 @@ class FixWidthTable
145
149
 
146
150
  while(upper >= lower) do
147
151
  idx = lower + (upper - lower) / 2
148
- comp = pos <=> pos(idx)
152
+ pos_idx = pos(idx)
149
153
 
150
- if comp == 0
151
- break
152
- elsif comp > 0
153
- lower = idx + 1
154
- else
154
+ case pos <=> pos_idx
155
+ when 0
156
+ break
157
+ when -1
155
158
  upper = idx - 1
159
+ when 1
160
+ lower = idx + 1
156
161
  end
157
162
  end
158
163
 
159
- if pos(idx) > pos
164
+ if pos_idx > pos
160
165
  idx = idx - 1
161
166
  end
162
167
 
@@ -0,0 +1,269 @@
1
+ require 'rbbt/association'
2
+ require 'rbbt/association/item'
3
+ require 'rbbt/entity'
4
+
5
+ class KnowledgeBase
6
+ class << self
7
+ attr_accessor :knowledge_base_dir, :registry
8
+
9
+ def registry
10
+ @registry ||= IndiferentHash.setup({})
11
+ end
12
+
13
+ def knowledge_base_dir
14
+ @knowledge_base_dir ||= Rbbt.var.knowledge_base
15
+ end
16
+ end
17
+
18
+ attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
19
+ def initialize(dir, namespace = nil)
20
+ @dir = Path.setup dir
21
+
22
+ @namespace = namespace
23
+ @format = IndiferentHash.setup({})
24
+
25
+ @registry = IndiferentHash.setup({})
26
+ @entity_options = IndiferentHash.setup({})
27
+
28
+ @indices = IndiferentHash.setup({})
29
+ @databases = IndiferentHash.setup({})
30
+ @identifiers = IndiferentHash.setup({})
31
+ @descriptions = {}
32
+ @databases = {}
33
+ end
34
+
35
+ def version(new_namespace, force = false)
36
+ return self if new_namespace == namespace and not force
37
+ new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
38
+ new_kb.format.merge! self.format
39
+ new_kb.entity_options.merge! self.entity_options
40
+ new_kb.registry = self.registry
41
+ new_kb
42
+ end
43
+
44
+ #{{{ Descriptions
45
+
46
+ def register(name, file = nil, options = {}, &block)
47
+ if block_given?
48
+ Log.debug("Registering #{ name } from code block")
49
+ @registry[name] = [block, options]
50
+ else
51
+ Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
52
+ @registry[name] = [file, options]
53
+ end
54
+ end
55
+
56
+ def all_databases
57
+ (@indices.keys + @registry.keys).uniq
58
+ end
59
+
60
+ def description(name)
61
+ @descriptions[name] ||= get_index(name).key_field.split("~")
62
+ end
63
+
64
+ def source(name)
65
+ description(name)[0]
66
+ end
67
+
68
+ def target(name)
69
+ description(name)[1]
70
+ end
71
+
72
+ def undirected(name)
73
+ description(name)[2]
74
+ end
75
+
76
+ def source_type(name)
77
+ Entity.formats[source(name)]
78
+ end
79
+
80
+ def target_type(name)
81
+ Entity.formats[target(name)]
82
+ end
83
+
84
+ def index_fields(name)
85
+ get_index(name).fields
86
+ end
87
+
88
+ def entities
89
+ all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
90
+ end
91
+
92
+ def entity_types
93
+ entities.collect{|entity| Entity.formats[entity] }.uniq
94
+ end
95
+
96
+ #{{{ Open and get
97
+
98
+ def open_options
99
+ {:namespace => namespace, :format => @format}
100
+ end
101
+
102
+ def get_database(name, options = {})
103
+ persist_options = Misc.pull_keys options, :persist
104
+
105
+ file, registered_options = registry[name]
106
+ options = open_options.merge(registered_options || {}).merge(options)
107
+ raise "Repo #{ name } not found and not registered" if file.nil?
108
+
109
+ @databases[name] ||= begin
110
+ Log.debug "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
111
+ Association.open(file, options, persist_options).
112
+ tap{|tsv| tsv.namespace = self.namespace}
113
+ end
114
+ end
115
+
116
+
117
+ def get_index(name, options = {})
118
+ persist_options = Misc.pull_keys options, :persist
119
+
120
+ file, registered_options = registry[name]
121
+ options = open_options.merge(registered_options || {}).merge(options)
122
+ raise "Repo #{ name } not found and not registered" if file.nil?
123
+
124
+ @indices[name] ||= begin
125
+ Log.debug "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
126
+ Association.index(file, options, persist_options).
127
+ tap{|tsv| tsv.namespace = self.namespace}
128
+ end
129
+ end
130
+
131
+ def index(name, file, options = {}, persist_options = {})
132
+ @indices[name] = Association.index(file, open_options.merge(options), persist_options)
133
+ end
134
+
135
+ #{{{ Add manual database
136
+
137
+ def add_index(name, source_type, target_type, *fields)
138
+ options = fields.pop if Hash === fields.last
139
+ options ||= {}
140
+ undirected = Misc.process_options options, :undirected
141
+
142
+ undirected = nil unless undirected
143
+
144
+ repo_file = dir[name].find
145
+ index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
146
+ @indices[name] = index
147
+ end
148
+
149
+ def add(name, source, target, *rest)
150
+ code = [source, target] * "~"
151
+ repo = @indices[name]
152
+ repo[code] = rest
153
+ end
154
+
155
+ def write(name)
156
+ repo = @indices[name]
157
+ repo.write_and_read do
158
+ yield
159
+ end
160
+ end
161
+
162
+ #{{{ Annotate
163
+
164
+ def entity_options_for(type)
165
+ options = entity_options[Entity.formats[type]] || {}
166
+ options[:format] = @format[type] if @format.include? :type
167
+ options = {:organism => namespace}.merge(options)
168
+ options
169
+ end
170
+
171
+ def annotate(entities, type)
172
+ Misc.prepare_entity(entities, type, entity_options_for(type))
173
+ end
174
+
175
+ #{{{ Identify
176
+
177
+ def identify_source(name, entity)
178
+ database = get_database(name, :persist => true)
179
+ return entity if database.include? entity
180
+ source = source(name)
181
+ @identifiers[name] ||= {}
182
+ @identifiers[name]['source'] ||= begin
183
+ if database.identifier_files.any?
184
+ if TSV.parse_header(database.identifier_files.first).all_fields.include? source
185
+ TSV.index(database.identifiers, :target => source, :persist => true)
186
+ else
187
+ {}
188
+ end
189
+ else
190
+ if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? source
191
+ Organism.identifiers(namespace).index(:target => source, :persist => true)
192
+ else
193
+ {}
194
+ end
195
+ end
196
+ end
197
+
198
+ @identifiers[name]['source'][entity]
199
+ end
200
+
201
+ def identify_target(name, entity)
202
+ database = get_database(name, :persist => true)
203
+ target = target(name)
204
+
205
+ @identifiers[name] ||= {}
206
+ @identifiers[name]['target'] ||= begin
207
+ if database.identifier_files.any?
208
+ if TSV.parse_header(database.identifier_files.first).all_fields.include? target
209
+ TSV.index(database.identifiers, :target => target, :persist => true)
210
+ else
211
+ {}
212
+ end
213
+ else
214
+ if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
215
+ Organism.identifiers(namespace).index(:target => target, :persist => true)
216
+ else
217
+ database.index(:target => database.fields.first, :fields => [database.fields.first], :persist => true)
218
+ end
219
+ end
220
+ end
221
+ @identifiers[name]['target'][entity]
222
+ end
223
+
224
+ def identify(name, entity)
225
+ identify_source(name, entity) || identify_target(name, entity)
226
+ end
227
+
228
+ #{{{ Query
229
+
230
+ def children(name, entity)
231
+ repo = get_index name
232
+ AssociationItem.setup repo.match(entity), self, name, false
233
+ end
234
+
235
+ def parents(name, entity)
236
+ repo = get_index name
237
+ AssociationItem.setup repo.reverse.match(entity), self, name, true
238
+ end
239
+
240
+ def neighbours(name, entity)
241
+ if undirected(name)
242
+ IndiferentHash.setup({:children => children(name, entity)})
243
+ else
244
+ IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
245
+ end
246
+ end
247
+
248
+ def subset(name, entities)
249
+ case entities
250
+ when AnnotatedArray
251
+ format = entities.format if entities.respond_to? :format
252
+ format ||= entities.base_entity.to_s
253
+ {format => entities.clean_annotations}
254
+ when Hash
255
+ else
256
+ raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
257
+ end
258
+ repo = get_index name
259
+ AssociationItem.setup repo.subset_entities(entities), self, name, false
260
+ end
261
+
262
+ def translate(entities, type)
263
+ if format = @format[type] and format != entities.format
264
+ entities.to format
265
+ else
266
+ entities
267
+ end
268
+ end
269
+ end