rbbt-util 5.4.1 → 5.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (45) hide show
  1. checksums.yaml +8 -8
  2. data/bin/rbbt_monitor.rb +8 -4
  3. data/lib/rbbt.rb +4 -11
  4. data/lib/rbbt/annotations.rb +4 -1
  5. data/lib/rbbt/association.rb +218 -157
  6. data/lib/rbbt/association/index.rb +92 -0
  7. data/lib/rbbt/association/item.rb +44 -0
  8. data/lib/rbbt/entity.rb +4 -0
  9. data/lib/rbbt/fix_width_table.rb +14 -9
  10. data/lib/rbbt/knowledge_base.rb +269 -0
  11. data/lib/rbbt/persist.rb +1 -1
  12. data/lib/rbbt/persist/tsv.rb +22 -2
  13. data/lib/rbbt/resource.rb +0 -1
  14. data/lib/rbbt/resource/path.rb +1 -1
  15. data/lib/rbbt/resource/util.rb +0 -1
  16. data/lib/rbbt/tsv.rb +15 -14
  17. data/lib/rbbt/tsv/accessor.rb +21 -16
  18. data/lib/rbbt/tsv/attach.rb +5 -5
  19. data/lib/rbbt/tsv/attach/util.rb +4 -2
  20. data/lib/rbbt/tsv/change_id.rb +67 -0
  21. data/lib/rbbt/tsv/index.rb +5 -3
  22. data/lib/rbbt/tsv/manipulate.rb +83 -37
  23. data/lib/rbbt/tsv/parser.rb +2 -1
  24. data/lib/rbbt/tsv/util.rb +2 -0
  25. data/lib/rbbt/util/cmd.rb +1 -2
  26. data/lib/rbbt/util/log.rb +42 -38
  27. data/lib/rbbt/util/misc.rb +134 -46
  28. data/lib/rbbt/util/open.rb +3 -17
  29. data/lib/rbbt/util/semaphore.rb +8 -2
  30. data/lib/rbbt/workflow.rb +31 -46
  31. data/lib/rbbt/workflow/accessor.rb +1 -1
  32. data/lib/rbbt/workflow/step.rb +5 -3
  33. data/share/rbbt_commands/workflow/server +1 -0
  34. data/share/rbbt_commands/workflow/task +12 -2
  35. data/test/rbbt/association/test_index.rb +36 -0
  36. data/test/rbbt/test_annotations.rb +5 -4
  37. data/test/rbbt/test_association.rb +40 -13
  38. data/test/rbbt/test_knowledge_base.rb +103 -0
  39. data/test/rbbt/test_workflow.rb +4 -2
  40. data/test/rbbt/tsv/test_change_id.rb +43 -0
  41. data/test/rbbt/tsv/test_index.rb +2 -1
  42. data/test/rbbt/tsv/test_manipulate.rb +51 -0
  43. data/test/rbbt/util/test_misc.rb +21 -1
  44. data/test/test_helper.rb +8 -4
  45. metadata +12 -86
@@ -0,0 +1,92 @@
1
+ require 'rbbt/tsv'
2
+ module Association
3
+ module Index
4
+
5
+ attr_accessor :source_field, :target_field, :undirected
6
+ def parse_key_field
7
+ @source_field, @target_field, @undirected = key_field.split("~")
8
+ end
9
+
10
+ def self.setup(repo)
11
+ repo.extend Association::Index
12
+ repo.parse_key_field
13
+ repo.unnamed = true
14
+ end
15
+
16
+ def reverse
17
+ @reverse ||= begin
18
+ reverse_filename = persistence_path + '.reverse'
19
+
20
+ if File.exists?(reverse_filename)
21
+ new = Persist.open_tokyocabinet(reverse_filename, false, serializer, TokyoCabinet::BDB)
22
+ else
23
+ new = Persist.open_tokyocabinet(reverse_filename, true, serializer, TokyoCabinet::BDB)
24
+ new.write
25
+ through do |key, value|
26
+ new_key = key.split("~").reverse.join("~")
27
+ new[new_key] = value
28
+ end
29
+ annotate(new)
30
+ new.key_field = key_field.split("~").values_at(1,0,2).compact * "~"
31
+ new.close
32
+ end
33
+
34
+ new.unnamed = true
35
+
36
+ Association::Index.setup new
37
+ new
38
+ end
39
+ end
40
+
41
+ def match(entity)
42
+ return [] if entity.nil?
43
+ prefix(entity + "~")
44
+ end
45
+
46
+ def matches(entities)
47
+ entities.inject(nil) do |acc,e|
48
+ m = match(e);
49
+ if acc.nil? or acc.empty?
50
+ acc = m
51
+ else
52
+ acc.concat m
53
+ end
54
+ acc
55
+ end
56
+ end
57
+
58
+ #{{{ Subset
59
+
60
+ def select_entities(entities)
61
+ source_type = Entity.formats[source_field]
62
+ target_type = Entity.formats[target_field]
63
+
64
+ source_entities = entities[source_field] || entities[Entity.formats[source_field].to_s]
65
+ target_entities = entities[target_field] || entities[Entity.formats[target_field].to_s]
66
+
67
+ [source_entities, target_entities]
68
+ end
69
+
70
+ def subset(source, target)
71
+ return [] if source.nil? or source.empty? or target.nil? or target.empty?
72
+
73
+ matches = source.uniq.inject([]){|acc,e| acc.concat(match(e)) }
74
+
75
+ target_matches = {}
76
+
77
+ matches.each{|code|
78
+ s,sep,t = code.partition "~"
79
+ next if (undirected and t > s)
80
+ target_matches[t] ||= []
81
+ target_matches[t] << code
82
+ }
83
+
84
+ target_matches.values_at(*target.uniq).flatten.compact
85
+ end
86
+
87
+ def subset_entities(entities)
88
+ source, target = select_entities(entities)
89
+ subset source, target
90
+ end
91
+ end
92
+ end
@@ -0,0 +1,44 @@
1
+ require 'rbbt/entity'
2
+
3
+ module AssociationItem
4
+ extend Entity
5
+
6
+ annotation :knowledge_base
7
+ annotation :database
8
+ annotation :reverse
9
+
10
+ property :part => :array2single do
11
+ self.clean_annotations.collect{|p| p.partition("~") }
12
+ end
13
+
14
+ property :target => :array2single do
15
+ self.part.collect{|p| p[2]}
16
+ end
17
+
18
+ property :source => :array2single do
19
+ self.clean_annotations.collect{|p| p[/[^~]+/] }
20
+ end
21
+
22
+ property :target_entity => :array2single do
23
+ type = reverse ? knowledge_base.source(database) : knowledge_base.target(database)
24
+ knowledge_base.annotate self.target, type if self.target.any?
25
+ end
26
+
27
+ property :source_entity => :array2single do
28
+ type = reverse ? knowledge_base.target(database) : knowledge_base.source(database)
29
+ knowledge_base.annotate self.source, type if self.target.any?
30
+ end
31
+
32
+ property :value => :array2single do
33
+ value = knowledge_base.get_index(database).chunked_values_at self
34
+ value.collect{|v| NamedArray.setup(v, knowledge_base.get_index(database).fields)}
35
+ end
36
+
37
+ property :info => :array2single do
38
+ fields = knowledge_base.index_fields(database)
39
+ return [{}] * self.length if fields.nil? or fields.empty?
40
+ value.collect{|v|
41
+ Hash[*v.zip(fields).flatten]
42
+ }
43
+ end
44
+ end
data/lib/rbbt/entity.rb CHANGED
@@ -32,6 +32,10 @@ module Entity
32
32
  end
33
33
  end
34
34
 
35
+ def base_entity
36
+ self.annotation_types.select{|m| Entity === m}.last
37
+ end
38
+
35
39
  def property(*args, &block)
36
40
  class << self; self; end.property(*args,&block)
37
41
  end
@@ -1,7 +1,7 @@
1
1
  class FixWidthTable
2
2
 
3
3
  attr_accessor :filename, :file, :value_size, :record_size, :range, :size
4
- def initialize(filename, value_size = nil, range = nil, update = false)
4
+ def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
7
7
  if update or %w(memory stringio).include?(filename.to_s.downcase) or not File.exists?(filename)
@@ -23,7 +23,11 @@ class FixWidthTable
23
23
  @size = 0
24
24
  else
25
25
  Log.debug "FixWidthTable up-to-date: #{ filename }"
26
- @file = File.open(@filename, 'r')
26
+ if in_memory
27
+ @file = StringIO.new(Open.read(@filename, :mode => 'rb'), 'r')
28
+ else
29
+ @file = File.open(@filename, 'r')
30
+ end
27
31
  @value_size = @file.read(4).unpack("L").first
28
32
  @range = @file.read(1).unpack("C").first == 1
29
33
  @record_size = @value_size + (@range ? 12 : 4)
@@ -145,18 +149,19 @@ class FixWidthTable
145
149
 
146
150
  while(upper >= lower) do
147
151
  idx = lower + (upper - lower) / 2
148
- comp = pos <=> pos(idx)
152
+ pos_idx = pos(idx)
149
153
 
150
- if comp == 0
151
- break
152
- elsif comp > 0
153
- lower = idx + 1
154
- else
154
+ case pos <=> pos_idx
155
+ when 0
156
+ break
157
+ when -1
155
158
  upper = idx - 1
159
+ when 1
160
+ lower = idx + 1
156
161
  end
157
162
  end
158
163
 
159
- if pos(idx) > pos
164
+ if pos_idx > pos
160
165
  idx = idx - 1
161
166
  end
162
167
 
@@ -0,0 +1,269 @@
1
+ require 'rbbt/association'
2
+ require 'rbbt/association/item'
3
+ require 'rbbt/entity'
4
+
5
+ class KnowledgeBase
6
+ class << self
7
+ attr_accessor :knowledge_base_dir, :registry
8
+
9
+ def registry
10
+ @registry ||= IndiferentHash.setup({})
11
+ end
12
+
13
+ def knowledge_base_dir
14
+ @knowledge_base_dir ||= Rbbt.var.knowledge_base
15
+ end
16
+ end
17
+
18
+ attr_accessor :namespace, :dir, :indices, :registry, :format, :databases, :entity_options
19
+ def initialize(dir, namespace = nil)
20
+ @dir = Path.setup dir
21
+
22
+ @namespace = namespace
23
+ @format = IndiferentHash.setup({})
24
+
25
+ @registry = IndiferentHash.setup({})
26
+ @entity_options = IndiferentHash.setup({})
27
+
28
+ @indices = IndiferentHash.setup({})
29
+ @databases = IndiferentHash.setup({})
30
+ @identifiers = IndiferentHash.setup({})
31
+ @descriptions = {}
32
+ @databases = {}
33
+ end
34
+
35
+ def version(new_namespace, force = false)
36
+ return self if new_namespace == namespace and not force
37
+ new_kb = KnowledgeBase.new dir[new_namespace], new_namespace
38
+ new_kb.format.merge! self.format
39
+ new_kb.entity_options.merge! self.entity_options
40
+ new_kb.registry = self.registry
41
+ new_kb
42
+ end
43
+
44
+ #{{{ Descriptions
45
+
46
+ def register(name, file = nil, options = {}, &block)
47
+ if block_given?
48
+ Log.debug("Registering #{ name } from code block")
49
+ @registry[name] = [block, options]
50
+ else
51
+ Log.debug("Registering #{ name }: #{ Misc.fingerprint file }")
52
+ @registry[name] = [file, options]
53
+ end
54
+ end
55
+
56
+ def all_databases
57
+ (@indices.keys + @registry.keys).uniq
58
+ end
59
+
60
+ def description(name)
61
+ @descriptions[name] ||= get_index(name).key_field.split("~")
62
+ end
63
+
64
+ def source(name)
65
+ description(name)[0]
66
+ end
67
+
68
+ def target(name)
69
+ description(name)[1]
70
+ end
71
+
72
+ def undirected(name)
73
+ description(name)[2]
74
+ end
75
+
76
+ def source_type(name)
77
+ Entity.formats[source(name)]
78
+ end
79
+
80
+ def target_type(name)
81
+ Entity.formats[target(name)]
82
+ end
83
+
84
+ def index_fields(name)
85
+ get_index(name).fields
86
+ end
87
+
88
+ def entities
89
+ all_databases.inject([]){|acc,name| acc << source(name); acc << target(name)}.uniq
90
+ end
91
+
92
+ def entity_types
93
+ entities.collect{|entity| Entity.formats[entity] }.uniq
94
+ end
95
+
96
+ #{{{ Open and get
97
+
98
+ def open_options
99
+ {:namespace => namespace, :format => @format}
100
+ end
101
+
102
+ def get_database(name, options = {})
103
+ persist_options = Misc.pull_keys options, :persist
104
+
105
+ file, registered_options = registry[name]
106
+ options = open_options.merge(registered_options || {}).merge(options)
107
+ raise "Repo #{ name } not found and not registered" if file.nil?
108
+
109
+ @databases[name] ||= begin
110
+ Log.debug "Opening database #{ name } from #{ Misc.fingerprint file }. #{options}"
111
+ Association.open(file, options, persist_options).
112
+ tap{|tsv| tsv.namespace = self.namespace}
113
+ end
114
+ end
115
+
116
+
117
+ def get_index(name, options = {})
118
+ persist_options = Misc.pull_keys options, :persist
119
+
120
+ file, registered_options = registry[name]
121
+ options = open_options.merge(registered_options || {}).merge(options)
122
+ raise "Repo #{ name } not found and not registered" if file.nil?
123
+
124
+ @indices[name] ||= begin
125
+ Log.debug "Opening index #{ name } from #{ Misc.fingerprint file }. #{options}"
126
+ Association.index(file, options, persist_options).
127
+ tap{|tsv| tsv.namespace = self.namespace}
128
+ end
129
+ end
130
+
131
+ def index(name, file, options = {}, persist_options = {})
132
+ @indices[name] = Association.index(file, open_options.merge(options), persist_options)
133
+ end
134
+
135
+ #{{{ Add manual database
136
+
137
+ def add_index(name, source_type, target_type, *fields)
138
+ options = fields.pop if Hash === fields.last
139
+ options ||= {}
140
+ undirected = Misc.process_options options, :undirected
141
+
142
+ undirected = nil unless undirected
143
+
144
+ repo_file = dir[name].find
145
+ index = Association.index(nil, {:namespace => namespace, :key_field => [source_type, target_type, undirected].compact * "~", :fields => fields}.merge(options), :file => repo_file, :update => true)
146
+ @indices[name] = index
147
+ end
148
+
149
+ def add(name, source, target, *rest)
150
+ code = [source, target] * "~"
151
+ repo = @indices[name]
152
+ repo[code] = rest
153
+ end
154
+
155
+ def write(name)
156
+ repo = @indices[name]
157
+ repo.write_and_read do
158
+ yield
159
+ end
160
+ end
161
+
162
+ #{{{ Annotate
163
+
164
+ def entity_options_for(type)
165
+ options = entity_options[Entity.formats[type]] || {}
166
+ options[:format] = @format[type] if @format.include? :type
167
+ options = {:organism => namespace}.merge(options)
168
+ options
169
+ end
170
+
171
+ def annotate(entities, type)
172
+ Misc.prepare_entity(entities, type, entity_options_for(type))
173
+ end
174
+
175
+ #{{{ Identify
176
+
177
+ def identify_source(name, entity)
178
+ database = get_database(name, :persist => true)
179
+ return entity if database.include? entity
180
+ source = source(name)
181
+ @identifiers[name] ||= {}
182
+ @identifiers[name]['source'] ||= begin
183
+ if database.identifier_files.any?
184
+ if TSV.parse_header(database.identifier_files.first).all_fields.include? source
185
+ TSV.index(database.identifiers, :target => source, :persist => true)
186
+ else
187
+ {}
188
+ end
189
+ else
190
+ if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? source
191
+ Organism.identifiers(namespace).index(:target => source, :persist => true)
192
+ else
193
+ {}
194
+ end
195
+ end
196
+ end
197
+
198
+ @identifiers[name]['source'][entity]
199
+ end
200
+
201
+ def identify_target(name, entity)
202
+ database = get_database(name, :persist => true)
203
+ target = target(name)
204
+
205
+ @identifiers[name] ||= {}
206
+ @identifiers[name]['target'] ||= begin
207
+ if database.identifier_files.any?
208
+ if TSV.parse_header(database.identifier_files.first).all_fields.include? target
209
+ TSV.index(database.identifiers, :target => target, :persist => true)
210
+ else
211
+ {}
212
+ end
213
+ else
214
+ if TSV.parse_header(Organism.identifiers(namespace)).all_fields.include? target
215
+ Organism.identifiers(namespace).index(:target => target, :persist => true)
216
+ else
217
+ database.index(:target => database.fields.first, :fields => [database.fields.first], :persist => true)
218
+ end
219
+ end
220
+ end
221
+ @identifiers[name]['target'][entity]
222
+ end
223
+
224
+ def identify(name, entity)
225
+ identify_source(name, entity) || identify_target(name, entity)
226
+ end
227
+
228
+ #{{{ Query
229
+
230
+ def children(name, entity)
231
+ repo = get_index name
232
+ AssociationItem.setup repo.match(entity), self, name, false
233
+ end
234
+
235
+ def parents(name, entity)
236
+ repo = get_index name
237
+ AssociationItem.setup repo.reverse.match(entity), self, name, true
238
+ end
239
+
240
+ def neighbours(name, entity)
241
+ if undirected(name)
242
+ IndiferentHash.setup({:children => children(name, entity)})
243
+ else
244
+ IndiferentHash.setup({:parents => parents(name, entity), :children => children(name, entity)})
245
+ end
246
+ end
247
+
248
+ def subset(name, entities)
249
+ case entities
250
+ when AnnotatedArray
251
+ format = entities.format if entities.respond_to? :format
252
+ format ||= entities.base_entity.to_s
253
+ {format => entities.clean_annotations}
254
+ when Hash
255
+ else
256
+ raise "Entities are not a Hash or an AnnotatedArray: #{Misc.fingerprint entities}"
257
+ end
258
+ repo = get_index name
259
+ AssociationItem.setup repo.subset_entities(entities), self, name, false
260
+ end
261
+
262
+ def translate(entities, type)
263
+ if format = @format[type] and format != entities.format
264
+ entities.to format
265
+ else
266
+ entities
267
+ end
268
+ end
269
+ end