rbbt-util 5.27.1 → 5.27.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations/util.rb +1 -2
  3. data/lib/rbbt/entity.rb +8 -5
  4. data/lib/rbbt/fix_width_table.rb +6 -5
  5. data/lib/rbbt/knowledge_base/entity.rb +3 -2
  6. data/lib/rbbt/knowledge_base/query.rb +9 -0
  7. data/lib/rbbt/persist.rb +2 -3
  8. data/lib/rbbt/persist/tsv.rb +5 -5
  9. data/lib/rbbt/persist/tsv/adapter.rb +136 -43
  10. data/lib/rbbt/persist/tsv/tokyocabinet.rb +5 -3
  11. data/lib/rbbt/resource.rb +8 -5
  12. data/lib/rbbt/resource/path.rb +3 -3
  13. data/lib/rbbt/tsv/accessor.rb +6 -6
  14. data/lib/rbbt/tsv/change_id.rb +3 -1
  15. data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
  16. data/lib/rbbt/tsv/util.rb +1 -0
  17. data/lib/rbbt/util/misc/exceptions.rb +8 -0
  18. data/lib/rbbt/workflow.rb +1 -1
  19. data/lib/rbbt/workflow/accessor.rb +3 -1
  20. data/lib/rbbt/workflow/definition.rb +6 -0
  21. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +9 -3
  22. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +9 -3
  23. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +7 -1
  24. data/lib/rbbt/workflow/step/accessor.rb +4 -0
  25. data/lib/rbbt/workflow/step/dependencies.rb +10 -6
  26. data/lib/rbbt/workflow/step/run.rb +2 -2
  27. data/lib/rbbt/workflow/task.rb +1 -1
  28. data/lib/rbbt/workflow/usage.rb +25 -8
  29. data/lib/rbbt/workflow/util/archive.rb +30 -4
  30. data/share/install/software/lib/install_helpers +9 -3
  31. data/share/rbbt_commands/migrate_job +3 -1
  32. data/share/rbbt_commands/system/status +3 -2
  33. data/share/rbbt_commands/tsv/get +33 -7
  34. data/share/rbbt_commands/workflow/monitor +3 -3
  35. data/share/rbbt_commands/workflow/task +1 -1
  36. data/test/rbbt/knowledge_base/test_query.rb +1 -1
  37. data/test/rbbt/test_entity.rb +14 -5
  38. data/test/rbbt/test_knowledge_base.rb +3 -3
  39. data/test/rbbt/workflow/step/test_dependencies.rb +40 -8
  40. data/test/rbbt/workflow/test_remote_workflow.rb +13 -1
  41. metadata +2 -2
@@ -170,10 +170,12 @@ module Resource
170
170
  if type and not File.exist?(final_path) or force
171
171
  Log.medium "Producing: #{ final_path }"
172
172
  lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
173
+
173
174
  Misc.lock lock_filename do
174
175
  FileUtils.rm_rf final_path if force and File.exist? final_path
175
- if not File.exist?(final_path) or force
176
- (remote_server and get_from_server(path, final_path)) or
176
+
177
+ if ! File.exist?(final_path) || force
178
+
177
179
  begin
178
180
  case type
179
181
  when :string
@@ -291,7 +293,7 @@ url='#{url}'
291
293
  rescue
292
294
  FileUtils.rm_rf final_path if File.exist? final_path
293
295
  raise $!
294
- end
296
+ end unless (remote_server && get_from_server(path, final_path))
295
297
  end
296
298
  end
297
299
  end
@@ -304,11 +306,12 @@ url='#{url}'
304
306
  resource ||= Rbbt
305
307
  (Path::STANDARD_SEARCH + resource.search_order + resource.search_paths.keys).uniq.each do |name|
306
308
  pattern = resource.search_paths[name]
307
- next if patterns.nil?
309
+ next if pattern.nil?
310
+ pattern = pattern.sub('{PWD}', Dir.pwd)
308
311
  if String === pattern and pattern.include?('{')
309
312
  regexp = "^" + pattern.gsub(/{([^}]+)}/,'(?<\1>[^/]+)') + "(?:/(?<REST>.*))?/?$"
310
313
  if m = path.match(regexp)
311
- if m["PKGDIR"] == resource.pkgdir
314
+ if ! m.named_captures.include?("PKGDIR") || m["PKGDIR"] == resource.pkgdir
312
315
  return self[m["TOPLEVEL"]][m["SUBPATH"]][m["REST"]]
313
316
  end
314
317
  end
@@ -199,7 +199,7 @@ module Path
199
199
  next if res
200
200
  next unless paths.include? w
201
201
  path = find(w, caller_lib, paths)
202
- res = path if File.exist? path
202
+ res = path if File.exist?(path)
203
203
  end if res.nil?
204
204
 
205
205
  (paths.keys - STANDARD_SEARCH - search_order).each do |w|
@@ -241,8 +241,8 @@ module Path
241
241
  sub('{REMOVE}/', '').
242
242
  sub('{REMOVE}', '')
243
243
 
244
- path = path + '.gz' if File.exist? path + '.gz'
245
- path = path + '.bgz' if File.exist? path + '.bgz'
244
+ path = path + '.gz' if File.exist?(path + '.gz')
245
+ path = path + '.bgz' if File.exist?(path + '.bgz')
246
246
 
247
247
  self.annotate path
248
248
 
@@ -243,7 +243,7 @@ module TSV
243
243
  end
244
244
 
245
245
  def []=(key, value, clean = false)
246
- return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
246
+ return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
247
247
  super(key, @serializer_module.dump(value))
248
248
  end
249
249
 
@@ -366,11 +366,11 @@ module TSV
366
366
  keys.length
367
367
  end
368
368
 
369
- def values_at(*keys)
370
- keys.collect do |key|
371
- self[key]
372
- end
373
- end
369
+ #def _values_at(*keys)
370
+ # keys.collect do |key|
371
+ # self[key]
372
+ # end
373
+ #end
374
374
 
375
375
  def chunked_values_at(keys, max = 5000)
376
376
  Misc.ordered_divide(keys, max).inject([]) do |acc,c|
@@ -10,7 +10,8 @@ module TSV
10
10
 
11
11
  identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
12
12
 
13
- if not tsv.fields.include? format
13
+
14
+ if ! tsv.fields.include?(format)
14
15
  new = {}
15
16
  tsv.each do |k,v|
16
17
  if v === String or v === Array
@@ -32,6 +33,7 @@ module TSV
32
33
  tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
33
34
  end
34
35
 
36
+
35
37
  tsv = tsv.reorder(format, tsv.fields[0..-2])
36
38
 
37
39
  tsv = tsv.to_flat if orig_type == :flat
@@ -374,7 +374,7 @@ module TSV
374
374
  when (defined? Step and Step)
375
375
 
376
376
  obj.clean if obj.aborted? or obj.recoverable_error?
377
- obj.run(true) unless obj.done? or obj.started?
377
+ obj.run(true) unless obj.done? || obj.started? || obj.result
378
378
 
379
379
  stream = obj.get_stream
380
380
 
@@ -122,6 +122,7 @@ module TSV
122
122
  end
123
123
  else
124
124
  file.grace
125
+
125
126
  stream = file.get_stream
126
127
  if stream && ! stream.closed?
127
128
  stream
@@ -15,6 +15,14 @@ end
15
15
  class Aborted < StandardError; end
16
16
 
17
17
  class TryAgain < StandardError; end
18
+
19
+ class TryThis < StandardError
20
+ attr_accessor :payload
21
+ def initialize(payload = nil)
22
+ @payload = payload
23
+ end
24
+ end
25
+
18
26
  class SemaphoreInterrupted < TryAgain; end
19
27
  class LockInterrupted < TryAgain; end
20
28
 
@@ -607,7 +607,7 @@ module Workflow
607
607
  end
608
608
 
609
609
  def self.load_step(path)
610
- Path.setup(path) unless Path === path
610
+ path = Path.setup(path.dup) unless Path === path
611
611
  path = path.find
612
612
 
613
613
  begin
@@ -35,6 +35,7 @@ module Workflow
35
35
  input_use = rec_input_use(name)
36
36
  input_defaults = rec_input_defaults(name)
37
37
  input_options = rec_input_options(name)
38
+ extension = task.extension
38
39
  export = case
39
40
  when (synchronous_exports.include?(name.to_sym) or synchronous_exports.include?(name.to_s))
40
41
  :synchronous
@@ -60,7 +61,8 @@ module Workflow
60
61
  :input_use => input_use,
61
62
  :result_type => result_type,
62
63
  :result_description => result_description,
63
- :dependencies => dependencies
64
+ :dependencies => dependencies,
65
+ :extension => extension
64
66
  }
65
67
  end
66
68
  end
@@ -18,6 +18,7 @@ module Workflow
18
18
  :description => "",
19
19
  :result_type => nil,
20
20
  :result_description => "",
21
+ :resumable => false,
21
22
  :extension => nil)
22
23
 
23
24
 
@@ -33,6 +34,10 @@ module Workflow
33
34
  @extension = extension
34
35
  end
35
36
 
37
+ def resumable
38
+ @resumable = true
39
+ end
40
+
36
41
  def returns(description)
37
42
  @result_description = description
38
43
  end
@@ -118,6 +123,7 @@ module Workflow
118
123
  :input_descriptions => consume_input_descriptions,
119
124
  :required_inputs => consume_required_inputs,
120
125
  :extension => consume_extension,
126
+ :resumable => consume_resumable,
121
127
  :input_options => consume_input_options
122
128
  }
123
129
 
@@ -137,13 +137,15 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ if Net::HTTPOK === rok
141
141
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
142
  @url = File.join(task_url, File.basename(_url)) if _url
143
143
  rok.read_body do |c,_a, _b|
144
144
  sin.write c
145
145
  end
146
146
  sin.close
147
+ elsif Net::HTTPSeeOther === rok
148
+ raise TryThis.new(rok)
147
149
  else
148
150
  err = StringIO.new
149
151
  rok.read_body do |c,_a, _b|
@@ -156,7 +158,7 @@ class RemoteWorkflow
156
158
  err.rewind
157
159
  err.read
158
160
  end
159
- ne = @adaptor.parse_exception text
161
+ ne = RemoteWorkflow.parse_exception text
160
162
  case ne
161
163
  when String
162
164
  parent.raise e.class, ne
@@ -173,7 +175,11 @@ class RemoteWorkflow
173
175
  end
174
176
 
175
177
  Log.debug{ "RestClient execute: #{ task_url } - #{Misc.fingerprint task_params}" }
176
- RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
178
+ begin
179
+ RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
+ rescue TryThis
181
+ RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
182
+ end
177
183
  end
178
184
 
179
185
  # It seems like now response body are now decoded by Net::HTTP after 2.1
@@ -25,8 +25,13 @@ class RemoteStep < Step
25
25
  end
26
26
 
27
27
  def cache_file
28
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
29
- Rbbt.var.cache.REST[[task, clean_name, digest] * "."].find
28
+ begin
29
+ digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
+ Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
31
+ rescue
32
+ Log.exception $!
33
+ raise $!
34
+ end
30
35
  end
31
36
 
32
37
  def cache_files
@@ -62,7 +67,6 @@ class RemoteStep < Step
62
67
  no_load ? Misc.add_GET_param(path, "_format", "raw") : @result
63
68
  end
64
69
 
65
-
66
70
  def self.get_streams(inputs, stream_input = nil)
67
71
  new_inputs = {}
68
72
  inputs.each do |k,v|
@@ -240,6 +244,7 @@ class RemoteStep < Step
240
244
  return true if cache_files.any?
241
245
  init_job unless @url
242
246
  Log.debug{ "Joining RemoteStep: #{path}" }
247
+
243
248
  if IO === @result
244
249
  res = @result
245
250
  @result = nil
@@ -253,6 +258,7 @@ class RemoteStep < Step
253
258
  sleep 1 unless self.done? || self.aborted? || self.error?
254
259
  while not (self.done? || self.aborted? || self.error?)
255
260
  sleep 3
261
+ iif [self.done?, self.status, self.info]
256
262
  end
257
263
  end
258
264
 
@@ -129,7 +129,7 @@ class RemoteStep
129
129
  end
130
130
  end
131
131
 
132
- def _run_job(cache_type = :async)
132
+ def _run_job(cache_type = :asynchronous)
133
133
  get_streams
134
134
 
135
135
  task_url = URI.encode(File.join(base_url, task.to_s))
@@ -142,7 +142,13 @@ class RemoteStep
142
142
  else
143
143
  @adaptor.execute_job(base_url, task, task_params, cache_type)
144
144
  end
145
+ end
146
+
145
147
 
148
+ def produce(*args)
149
+ @started = true
150
+ _run_job
146
151
  end
152
+
147
153
  end
148
154
  end
@@ -612,6 +612,10 @@ class Step
612
612
  provenance
613
613
  end
614
614
 
615
+ def resumable?
616
+ task && task.resumable
617
+ end
618
+
615
619
  def config(key, *tokens)
616
620
  options = tokens.pop if Hash === tokens.last
617
621
  options ||= {}
@@ -92,7 +92,7 @@ class Step
92
92
  (job.done? && job.dirty?) || (job.error? && job.dirty?) ||
93
93
  (!(job.noinfo? || job.done? || job.error? || job.aborted? || job.running?))
94
94
 
95
- job.clean
95
+ job.clean unless job.resumable? && (job.updated? && ! job.dirty?)
96
96
  job.set_info :status, :cleaned
97
97
  end
98
98
 
@@ -144,14 +144,18 @@ class Step
144
144
 
145
145
  dependency.status_lock.synchronize do
146
146
  if dependency.aborted? || (dependency.error? && dependency.recoverable_error? && ! canfail_paths.include?(dependency.path) && ! already_failed.include?(dependency.path)) || (!Open.remote?(dependency.path) && dependency.missing?)
147
- Log.warn "Cleaning dep. on exec #{Log.color :blue, dependency.path} (missing: #{dependency.missing?}; error #{dependency.error?})"
148
- dependency.clean
149
- already_failed << dependency.path
150
- raise TryAgain
147
+ if dependency.resumable?
148
+ dependency.status = :resume
149
+ else
150
+ Log.warn "Cleaning dep. on exec #{Log.color :blue, dependency.path} (missing: #{dependency.missing?}; error #{dependency.error?})"
151
+ dependency.clean
152
+ already_failed << dependency.path
153
+ raise TryAgain
154
+ end
151
155
  end
152
156
  end
153
157
 
154
- if ! (dependency.started? || dependency.error?)
158
+ if dependency.status == :resume || ! (dependency.started? || dependency.error?)
155
159
  log_dependency_exec(dependency, :starting)
156
160
  dependency.run(true)
157
161
  raise TryAgain
@@ -108,7 +108,7 @@ class Step
108
108
  @result = self._exec
109
109
  @result = @result.stream if TSV::Dumper === @result
110
110
  end
111
- (no_load or ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
111
+ (no_load || ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
112
112
  end
113
113
 
114
114
  def updatable?
@@ -610,7 +610,7 @@ class Step
610
610
  end
611
611
 
612
612
  def _clean_finished
613
- if Open.exists? path and not status == :done
613
+ if Open.exists?(path) && status != :done
614
614
  Log.warn "Aborted job had finished. Removing result -- #{ path }"
615
615
  begin
616
616
  Open.rm path
@@ -2,7 +2,7 @@ require 'rbbt/util/misc'
2
2
  require 'rbbt/persist'
3
3
 
4
4
  module Task
5
- attr_accessor :inputs, :input_types, :result_type, :input_defaults, :input_descriptions, :input_options, :required_inputs, :description, :name, :result_description, :extension, :workflow
5
+ attr_accessor :inputs, :input_types, :result_type, :input_defaults, :input_descriptions, :input_options, :required_inputs, :description, :name, :result_description, :extension, :workflow, :resumable
6
6
 
7
7
  def self.setup(options = {}, &block)
8
8
  block.extend Task
@@ -71,7 +71,7 @@ module Workflow
71
71
  @dep_tree ||= {}
72
72
  @dep_tree[name] ||= begin
73
73
  dep_tree = {}
74
- self.task_dependencies[name].reverse.each do |dep|
74
+ self.task_dependencies[name.to_sym].reverse.each do |dep|
75
75
  dep = dep.first if Array === dep && dep.length == 1
76
76
  dep = dep.dependency if DependencyBlock === dep
77
77
 
@@ -88,23 +88,38 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end
91
+ end if self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
95
95
 
96
+ def _prov_tasks(tree)
97
+ tasks = []
98
+ heap = [tree]
99
+ while heap.any?
100
+ t = heap.pop
101
+ t.each do |k,v|
102
+ tasks << k
103
+ heap << v
104
+ end
105
+ end
106
+ tasks
107
+ end
108
+
96
109
  def prov_string(tree)
97
110
  description = ""
98
111
 
99
112
  last = nil
100
113
  seen = Set.new
101
- tree.collect.to_a.flatten.select{|e| Symbol === e }.each do |task_name|
102
114
 
103
- child = last && last.include?(task_name)
104
- first = last.nil?
105
- last = dep_tree(task_name).collect.to_a.flatten.select{|e| Symbol === e}
115
+ tasks = _prov_tasks(tree)
116
+ tasks.each do |workflow,task_name|
117
+
118
+ next if seen.include?([workflow,task_name])
106
119
 
107
- next if seen.include?(task_name)
120
+ child = last && last.include?([workflow, task_name])
121
+ first = last.nil?
122
+ last = _prov_tasks(workflow.dep_tree(task_name))
108
123
 
109
124
  if child
110
125
  description << "->" << task_name.to_s
@@ -113,6 +128,8 @@ module Workflow
113
128
  else
114
129
  description << ";" << task_name.to_s
115
130
  end
131
+
132
+ seen << [workflow, task_name]
116
133
  end
117
134
  description
118
135
  end
@@ -177,7 +194,7 @@ module Workflow
177
194
  description = task.description || ""
178
195
  description = description.split("\n\n").first
179
196
 
180
- next if abridge and ! final.include?(name)
197
+ next if abridge && ! final.include?(name)
181
198
  puts Misc.format_definition_list_item(name.to_s, description, Log.terminal_width, 20, :yellow)
182
199
 
183
200
  prov_string = prov_string(dep_tree(name))
@@ -125,6 +125,7 @@ class Step
125
125
  def self.migrate(path, search_path, options = {})
126
126
  resource=Rbbt
127
127
 
128
+ orig_path = path
128
129
  other_rsync_args = options[:rsync]
129
130
 
130
131
  recursive = options[:recursive]
@@ -154,6 +155,7 @@ puts files * "\n"
154
155
  else
155
156
  if File.exists?(path)
156
157
  path = resource.identify(path)
158
+ raise "Resource #{resource} could not identify #{orig_path}" if path.nil?
157
159
  else
158
160
  path = Path.setup(path)
159
161
  end
@@ -178,7 +180,7 @@ puts resource[path].find(search_path)
178
180
  subpath_files = {}
179
181
  paths.sort.each do |path|
180
182
  parts = path.split("/")
181
- subpath = parts[0..-4] * "/"
183
+ subpath = parts[0..-4] * "/" + "/"
182
184
 
183
185
  if subpath_files.keys.any? && subpath.start_with?(subpath_files.keys.last)
184
186
  subpath = subpath_files.keys.last
@@ -190,6 +192,7 @@ puts resource[path].find(search_path)
190
192
  subpath_files[subpath] << source
191
193
  end
192
194
 
195
+ synced_files = []
193
196
  subpath_files.each do |subpath, files|
194
197
  if options[:target]
195
198
  CMD.cmd("ssh #{options[:target]} mkdir -p '#{File.dirname(target)}'")
@@ -204,11 +207,15 @@ puts resource[path].find(search_path)
204
207
  end
205
208
  target = [options[:target], target] * ":" if options[:target]
206
209
 
210
+ next if File.exists?(source) && File.exists?(target) && File.expand_path(source) == File.expand_path(target)
211
+
207
212
  files_and_dirs = Set.new( files )
208
213
  files.each do |file|
214
+ synced_files << File.join(subpath, file)
215
+
209
216
  parts = file.split("/")[0..-2].reject{|p| p.empty?}
210
217
  while parts.any?
211
- files_and_dirs << parts * "/"
218
+ files_and_dirs << parts * "/"
212
219
  parts.pop
213
220
  end
214
221
  end
@@ -218,8 +225,7 @@ puts resource[path].find(search_path)
218
225
 
219
226
  cmd = "rsync #{MAIN_RSYNC_ARGS} --progress #{test_str} --files-from='#{tmp_include_file}' #{source}/ #{target}/ #{other_rsync_args}"
220
227
 
221
- cmd << " && rm -Rf #{source}" if options[:delete]
222
-
228
+ #cmd << " && rm -Rf #{source}" if options[:delete]
223
229
  if options[:print]
224
230
  ppp Open.read(tmp_include_file)
225
231
  puts cmd
@@ -228,6 +234,26 @@ puts resource[path].find(search_path)
228
234
  end
229
235
  end
230
236
  end
237
+
238
+ if options[:delete] && synced_files.any?
239
+ puts Log.color :magenta, "About to erase these files:"
240
+ synced_files.each do |p|
241
+ puts Log.color :red, p
242
+ end
243
+
244
+ if options[:non_interactive]
245
+ response = 'yes'
246
+ else
247
+ puts Log.color :magenta, "Type 'yes' if you are sure:"
248
+ response = STDIN.gets.chomp
249
+ end
250
+
251
+ if response == 'yes'
252
+ synced_files.each do |p|
253
+ Open.rm p
254
+ end
255
+ end
256
+ end
231
257
  end
232
258
 
233
259
  def self.purge(path, recursive = false)