rbbt-util 5.27.1 → 5.27.6

Sign up to get free protection for your applications and to get access to all the features.
Files changed (41) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations/util.rb +1 -2
  3. data/lib/rbbt/entity.rb +8 -5
  4. data/lib/rbbt/fix_width_table.rb +6 -5
  5. data/lib/rbbt/knowledge_base/entity.rb +3 -2
  6. data/lib/rbbt/knowledge_base/query.rb +9 -0
  7. data/lib/rbbt/persist.rb +2 -3
  8. data/lib/rbbt/persist/tsv.rb +5 -5
  9. data/lib/rbbt/persist/tsv/adapter.rb +136 -43
  10. data/lib/rbbt/persist/tsv/tokyocabinet.rb +5 -3
  11. data/lib/rbbt/resource.rb +8 -5
  12. data/lib/rbbt/resource/path.rb +3 -3
  13. data/lib/rbbt/tsv/accessor.rb +6 -6
  14. data/lib/rbbt/tsv/change_id.rb +3 -1
  15. data/lib/rbbt/tsv/parallel/traverse.rb +1 -1
  16. data/lib/rbbt/tsv/util.rb +1 -0
  17. data/lib/rbbt/util/misc/exceptions.rb +8 -0
  18. data/lib/rbbt/workflow.rb +1 -1
  19. data/lib/rbbt/workflow/accessor.rb +3 -1
  20. data/lib/rbbt/workflow/definition.rb +6 -0
  21. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +9 -3
  22. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +9 -3
  23. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +7 -1
  24. data/lib/rbbt/workflow/step/accessor.rb +4 -0
  25. data/lib/rbbt/workflow/step/dependencies.rb +10 -6
  26. data/lib/rbbt/workflow/step/run.rb +2 -2
  27. data/lib/rbbt/workflow/task.rb +1 -1
  28. data/lib/rbbt/workflow/usage.rb +25 -8
  29. data/lib/rbbt/workflow/util/archive.rb +30 -4
  30. data/share/install/software/lib/install_helpers +9 -3
  31. data/share/rbbt_commands/migrate_job +3 -1
  32. data/share/rbbt_commands/system/status +3 -2
  33. data/share/rbbt_commands/tsv/get +33 -7
  34. data/share/rbbt_commands/workflow/monitor +3 -3
  35. data/share/rbbt_commands/workflow/task +1 -1
  36. data/test/rbbt/knowledge_base/test_query.rb +1 -1
  37. data/test/rbbt/test_entity.rb +14 -5
  38. data/test/rbbt/test_knowledge_base.rb +3 -3
  39. data/test/rbbt/workflow/step/test_dependencies.rb +40 -8
  40. data/test/rbbt/workflow/test_remote_workflow.rb +13 -1
  41. metadata +2 -2
@@ -170,10 +170,12 @@ module Resource
170
170
  if type and not File.exist?(final_path) or force
171
171
  Log.medium "Producing: #{ final_path }"
172
172
  lock_filename = Persist.persistence_path(final_path, {:dir => Resource.lock_dir})
173
+
173
174
  Misc.lock lock_filename do
174
175
  FileUtils.rm_rf final_path if force and File.exist? final_path
175
- if not File.exist?(final_path) or force
176
- (remote_server and get_from_server(path, final_path)) or
176
+
177
+ if ! File.exist?(final_path) || force
178
+
177
179
  begin
178
180
  case type
179
181
  when :string
@@ -291,7 +293,7 @@ url='#{url}'
291
293
  rescue
292
294
  FileUtils.rm_rf final_path if File.exist? final_path
293
295
  raise $!
294
- end
296
+ end unless (remote_server && get_from_server(path, final_path))
295
297
  end
296
298
  end
297
299
  end
@@ -304,11 +306,12 @@ url='#{url}'
304
306
  resource ||= Rbbt
305
307
  (Path::STANDARD_SEARCH + resource.search_order + resource.search_paths.keys).uniq.each do |name|
306
308
  pattern = resource.search_paths[name]
307
- next if patterns.nil?
309
+ next if pattern.nil?
310
+ pattern = pattern.sub('{PWD}', Dir.pwd)
308
311
  if String === pattern and pattern.include?('{')
309
312
  regexp = "^" + pattern.gsub(/{([^}]+)}/,'(?<\1>[^/]+)') + "(?:/(?<REST>.*))?/?$"
310
313
  if m = path.match(regexp)
311
- if m["PKGDIR"] == resource.pkgdir
314
+ if ! m.named_captures.include?("PKGDIR") || m["PKGDIR"] == resource.pkgdir
312
315
  return self[m["TOPLEVEL"]][m["SUBPATH"]][m["REST"]]
313
316
  end
314
317
  end
@@ -199,7 +199,7 @@ module Path
199
199
  next if res
200
200
  next unless paths.include? w
201
201
  path = find(w, caller_lib, paths)
202
- res = path if File.exist? path
202
+ res = path if File.exist?(path)
203
203
  end if res.nil?
204
204
 
205
205
  (paths.keys - STANDARD_SEARCH - search_order).each do |w|
@@ -241,8 +241,8 @@ module Path
241
241
  sub('{REMOVE}/', '').
242
242
  sub('{REMOVE}', '')
243
243
 
244
- path = path + '.gz' if File.exist? path + '.gz'
245
- path = path + '.bgz' if File.exist? path + '.bgz'
244
+ path = path + '.gz' if File.exist?(path + '.gz')
245
+ path = path + '.bgz' if File.exist?(path + '.bgz')
246
246
 
247
247
  self.annotate path
248
248
 
@@ -243,7 +243,7 @@ module TSV
243
243
  end
244
244
 
245
245
  def []=(key, value, clean = false)
246
- return super(key, value) if clean or value.nil? or TSV::CleanSerializer == self.serializer_module
246
+ return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
247
247
  super(key, @serializer_module.dump(value))
248
248
  end
249
249
 
@@ -366,11 +366,11 @@ module TSV
366
366
  keys.length
367
367
  end
368
368
 
369
- def values_at(*keys)
370
- keys.collect do |key|
371
- self[key]
372
- end
373
- end
369
+ #def _values_at(*keys)
370
+ # keys.collect do |key|
371
+ # self[key]
372
+ # end
373
+ #end
374
374
 
375
375
  def chunked_values_at(keys, max = 5000)
376
376
  Misc.ordered_divide(keys, max).inject([]) do |acc,c|
@@ -10,7 +10,8 @@ module TSV
10
10
 
11
11
  identifiers = Organism.identifiers(tsv.namespace) if identifiers.nil? and tsv.namespace
12
12
 
13
- if not tsv.fields.include? format
13
+
14
+ if ! tsv.fields.include?(format)
14
15
  new = {}
15
16
  tsv.each do |k,v|
16
17
  if v === String or v === Array
@@ -32,6 +33,7 @@ module TSV
32
33
  tsv = tsv.attach identifiers, :fields => [format], :persist_input => true
33
34
  end
34
35
 
36
+
35
37
  tsv = tsv.reorder(format, tsv.fields[0..-2])
36
38
 
37
39
  tsv = tsv.to_flat if orig_type == :flat
@@ -374,7 +374,7 @@ module TSV
374
374
  when (defined? Step and Step)
375
375
 
376
376
  obj.clean if obj.aborted? or obj.recoverable_error?
377
- obj.run(true) unless obj.done? or obj.started?
377
+ obj.run(true) unless obj.done? || obj.started? || obj.result
378
378
 
379
379
  stream = obj.get_stream
380
380
 
@@ -122,6 +122,7 @@ module TSV
122
122
  end
123
123
  else
124
124
  file.grace
125
+
125
126
  stream = file.get_stream
126
127
  if stream && ! stream.closed?
127
128
  stream
@@ -15,6 +15,14 @@ end
15
15
  class Aborted < StandardError; end
16
16
 
17
17
  class TryAgain < StandardError; end
18
+
19
+ class TryThis < StandardError
20
+ attr_accessor :payload
21
+ def initialize(payload = nil)
22
+ @payload = payload
23
+ end
24
+ end
25
+
18
26
  class SemaphoreInterrupted < TryAgain; end
19
27
  class LockInterrupted < TryAgain; end
20
28
 
@@ -607,7 +607,7 @@ module Workflow
607
607
  end
608
608
 
609
609
  def self.load_step(path)
610
- Path.setup(path) unless Path === path
610
+ path = Path.setup(path.dup) unless Path === path
611
611
  path = path.find
612
612
 
613
613
  begin
@@ -35,6 +35,7 @@ module Workflow
35
35
  input_use = rec_input_use(name)
36
36
  input_defaults = rec_input_defaults(name)
37
37
  input_options = rec_input_options(name)
38
+ extension = task.extension
38
39
  export = case
39
40
  when (synchronous_exports.include?(name.to_sym) or synchronous_exports.include?(name.to_s))
40
41
  :synchronous
@@ -60,7 +61,8 @@ module Workflow
60
61
  :input_use => input_use,
61
62
  :result_type => result_type,
62
63
  :result_description => result_description,
63
- :dependencies => dependencies
64
+ :dependencies => dependencies,
65
+ :extension => extension
64
66
  }
65
67
  end
66
68
  end
@@ -18,6 +18,7 @@ module Workflow
18
18
  :description => "",
19
19
  :result_type => nil,
20
20
  :result_description => "",
21
+ :resumable => false,
21
22
  :extension => nil)
22
23
 
23
24
 
@@ -33,6 +34,10 @@ module Workflow
33
34
  @extension = extension
34
35
  end
35
36
 
37
+ def resumable
38
+ @resumable = true
39
+ end
40
+
36
41
  def returns(description)
37
42
  @result_description = description
38
43
  end
@@ -118,6 +123,7 @@ module Workflow
118
123
  :input_descriptions => consume_input_descriptions,
119
124
  :required_inputs => consume_required_inputs,
120
125
  :extension => consume_extension,
126
+ :resumable => consume_resumable,
121
127
  :input_options => consume_input_options
122
128
  }
123
129
 
@@ -137,13 +137,15 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ if Net::HTTPOK === rok
141
141
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
142
  @url = File.join(task_url, File.basename(_url)) if _url
143
143
  rok.read_body do |c,_a, _b|
144
144
  sin.write c
145
145
  end
146
146
  sin.close
147
+ elsif Net::HTTPSeeOther === rok
148
+ raise TryThis.new(rok)
147
149
  else
148
150
  err = StringIO.new
149
151
  rok.read_body do |c,_a, _b|
@@ -156,7 +158,7 @@ class RemoteWorkflow
156
158
  err.rewind
157
159
  err.read
158
160
  end
159
- ne = @adaptor.parse_exception text
161
+ ne = RemoteWorkflow.parse_exception text
160
162
  case ne
161
163
  when String
162
164
  parent.raise e.class, ne
@@ -173,7 +175,11 @@ class RemoteWorkflow
173
175
  end
174
176
 
175
177
  Log.debug{ "RestClient execute: #{ task_url } - #{Misc.fingerprint task_params}" }
176
- RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
178
+ begin
179
+ RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
+ rescue TryThis
181
+ RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
182
+ end
177
183
  end
178
184
 
179
185
  # It seems like now response body are now decoded by Net::HTTP after 2.1
@@ -25,8 +25,13 @@ class RemoteStep < Step
25
25
  end
26
26
 
27
27
  def cache_file
28
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
29
- Rbbt.var.cache.REST[[task, clean_name, digest] * "."].find
28
+ begin
29
+ digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
+ Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
31
+ rescue
32
+ Log.exception $!
33
+ raise $!
34
+ end
30
35
  end
31
36
 
32
37
  def cache_files
@@ -62,7 +67,6 @@ class RemoteStep < Step
62
67
  no_load ? Misc.add_GET_param(path, "_format", "raw") : @result
63
68
  end
64
69
 
65
-
66
70
  def self.get_streams(inputs, stream_input = nil)
67
71
  new_inputs = {}
68
72
  inputs.each do |k,v|
@@ -240,6 +244,7 @@ class RemoteStep < Step
240
244
  return true if cache_files.any?
241
245
  init_job unless @url
242
246
  Log.debug{ "Joining RemoteStep: #{path}" }
247
+
243
248
  if IO === @result
244
249
  res = @result
245
250
  @result = nil
@@ -253,6 +258,7 @@ class RemoteStep < Step
253
258
  sleep 1 unless self.done? || self.aborted? || self.error?
254
259
  while not (self.done? || self.aborted? || self.error?)
255
260
  sleep 3
261
+ iif [self.done?, self.status, self.info]
256
262
  end
257
263
  end
258
264
 
@@ -129,7 +129,7 @@ class RemoteStep
129
129
  end
130
130
  end
131
131
 
132
- def _run_job(cache_type = :async)
132
+ def _run_job(cache_type = :asynchronous)
133
133
  get_streams
134
134
 
135
135
  task_url = URI.encode(File.join(base_url, task.to_s))
@@ -142,7 +142,13 @@ class RemoteStep
142
142
  else
143
143
  @adaptor.execute_job(base_url, task, task_params, cache_type)
144
144
  end
145
+ end
146
+
145
147
 
148
+ def produce(*args)
149
+ @started = true
150
+ _run_job
146
151
  end
152
+
147
153
  end
148
154
  end
@@ -612,6 +612,10 @@ class Step
612
612
  provenance
613
613
  end
614
614
 
615
+ def resumable?
616
+ task && task.resumable
617
+ end
618
+
615
619
  def config(key, *tokens)
616
620
  options = tokens.pop if Hash === tokens.last
617
621
  options ||= {}
@@ -92,7 +92,7 @@ class Step
92
92
  (job.done? && job.dirty?) || (job.error? && job.dirty?) ||
93
93
  (!(job.noinfo? || job.done? || job.error? || job.aborted? || job.running?))
94
94
 
95
- job.clean
95
+ job.clean unless job.resumable? && (job.updated? && ! job.dirty?)
96
96
  job.set_info :status, :cleaned
97
97
  end
98
98
 
@@ -144,14 +144,18 @@ class Step
144
144
 
145
145
  dependency.status_lock.synchronize do
146
146
  if dependency.aborted? || (dependency.error? && dependency.recoverable_error? && ! canfail_paths.include?(dependency.path) && ! already_failed.include?(dependency.path)) || (!Open.remote?(dependency.path) && dependency.missing?)
147
- Log.warn "Cleaning dep. on exec #{Log.color :blue, dependency.path} (missing: #{dependency.missing?}; error #{dependency.error?})"
148
- dependency.clean
149
- already_failed << dependency.path
150
- raise TryAgain
147
+ if dependency.resumable?
148
+ dependency.status = :resume
149
+ else
150
+ Log.warn "Cleaning dep. on exec #{Log.color :blue, dependency.path} (missing: #{dependency.missing?}; error #{dependency.error?})"
151
+ dependency.clean
152
+ already_failed << dependency.path
153
+ raise TryAgain
154
+ end
151
155
  end
152
156
  end
153
157
 
154
- if ! (dependency.started? || dependency.error?)
158
+ if dependency.status == :resume || ! (dependency.started? || dependency.error?)
155
159
  log_dependency_exec(dependency, :starting)
156
160
  dependency.run(true)
157
161
  raise TryAgain
@@ -108,7 +108,7 @@ class Step
108
108
  @result = self._exec
109
109
  @result = @result.stream if TSV::Dumper === @result
110
110
  end
111
- (no_load or ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
111
+ (no_load || ENV["RBBT_NO_STREAM"]) ? @result : prepare_result(@result, @task.result_description)
112
112
  end
113
113
 
114
114
  def updatable?
@@ -610,7 +610,7 @@ class Step
610
610
  end
611
611
 
612
612
  def _clean_finished
613
- if Open.exists? path and not status == :done
613
+ if Open.exists?(path) && status != :done
614
614
  Log.warn "Aborted job had finished. Removing result -- #{ path }"
615
615
  begin
616
616
  Open.rm path
@@ -2,7 +2,7 @@ require 'rbbt/util/misc'
2
2
  require 'rbbt/persist'
3
3
 
4
4
  module Task
5
- attr_accessor :inputs, :input_types, :result_type, :input_defaults, :input_descriptions, :input_options, :required_inputs, :description, :name, :result_description, :extension, :workflow
5
+ attr_accessor :inputs, :input_types, :result_type, :input_defaults, :input_descriptions, :input_options, :required_inputs, :description, :name, :result_description, :extension, :workflow, :resumable
6
6
 
7
7
  def self.setup(options = {}, &block)
8
8
  block.extend Task
@@ -71,7 +71,7 @@ module Workflow
71
71
  @dep_tree ||= {}
72
72
  @dep_tree[name] ||= begin
73
73
  dep_tree = {}
74
- self.task_dependencies[name].reverse.each do |dep|
74
+ self.task_dependencies[name.to_sym].reverse.each do |dep|
75
75
  dep = dep.first if Array === dep && dep.length == 1
76
76
  dep = dep.dependency if DependencyBlock === dep
77
77
 
@@ -88,23 +88,38 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end
91
+ end if self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
95
95
 
96
+ def _prov_tasks(tree)
97
+ tasks = []
98
+ heap = [tree]
99
+ while heap.any?
100
+ t = heap.pop
101
+ t.each do |k,v|
102
+ tasks << k
103
+ heap << v
104
+ end
105
+ end
106
+ tasks
107
+ end
108
+
96
109
  def prov_string(tree)
97
110
  description = ""
98
111
 
99
112
  last = nil
100
113
  seen = Set.new
101
- tree.collect.to_a.flatten.select{|e| Symbol === e }.each do |task_name|
102
114
 
103
- child = last && last.include?(task_name)
104
- first = last.nil?
105
- last = dep_tree(task_name).collect.to_a.flatten.select{|e| Symbol === e}
115
+ tasks = _prov_tasks(tree)
116
+ tasks.each do |workflow,task_name|
117
+
118
+ next if seen.include?([workflow,task_name])
106
119
 
107
- next if seen.include?(task_name)
120
+ child = last && last.include?([workflow, task_name])
121
+ first = last.nil?
122
+ last = _prov_tasks(workflow.dep_tree(task_name))
108
123
 
109
124
  if child
110
125
  description << "->" << task_name.to_s
@@ -113,6 +128,8 @@ module Workflow
113
128
  else
114
129
  description << ";" << task_name.to_s
115
130
  end
131
+
132
+ seen << [workflow, task_name]
116
133
  end
117
134
  description
118
135
  end
@@ -177,7 +194,7 @@ module Workflow
177
194
  description = task.description || ""
178
195
  description = description.split("\n\n").first
179
196
 
180
- next if abridge and ! final.include?(name)
197
+ next if abridge && ! final.include?(name)
181
198
  puts Misc.format_definition_list_item(name.to_s, description, Log.terminal_width, 20, :yellow)
182
199
 
183
200
  prov_string = prov_string(dep_tree(name))
@@ -125,6 +125,7 @@ class Step
125
125
  def self.migrate(path, search_path, options = {})
126
126
  resource=Rbbt
127
127
 
128
+ orig_path = path
128
129
  other_rsync_args = options[:rsync]
129
130
 
130
131
  recursive = options[:recursive]
@@ -154,6 +155,7 @@ puts files * "\n"
154
155
  else
155
156
  if File.exists?(path)
156
157
  path = resource.identify(path)
158
+ raise "Resource #{resource} could not identify #{orig_path}" if path.nil?
157
159
  else
158
160
  path = Path.setup(path)
159
161
  end
@@ -178,7 +180,7 @@ puts resource[path].find(search_path)
178
180
  subpath_files = {}
179
181
  paths.sort.each do |path|
180
182
  parts = path.split("/")
181
- subpath = parts[0..-4] * "/"
183
+ subpath = parts[0..-4] * "/" + "/"
182
184
 
183
185
  if subpath_files.keys.any? && subpath.start_with?(subpath_files.keys.last)
184
186
  subpath = subpath_files.keys.last
@@ -190,6 +192,7 @@ puts resource[path].find(search_path)
190
192
  subpath_files[subpath] << source
191
193
  end
192
194
 
195
+ synced_files = []
193
196
  subpath_files.each do |subpath, files|
194
197
  if options[:target]
195
198
  CMD.cmd("ssh #{options[:target]} mkdir -p '#{File.dirname(target)}'")
@@ -204,11 +207,15 @@ puts resource[path].find(search_path)
204
207
  end
205
208
  target = [options[:target], target] * ":" if options[:target]
206
209
 
210
+ next if File.exists?(source) && File.exists?(target) && File.expand_path(source) == File.expand_path(target)
211
+
207
212
  files_and_dirs = Set.new( files )
208
213
  files.each do |file|
214
+ synced_files << File.join(subpath, file)
215
+
209
216
  parts = file.split("/")[0..-2].reject{|p| p.empty?}
210
217
  while parts.any?
211
- files_and_dirs << parts * "/"
218
+ files_and_dirs << parts * "/"
212
219
  parts.pop
213
220
  end
214
221
  end
@@ -218,8 +225,7 @@ puts resource[path].find(search_path)
218
225
 
219
226
  cmd = "rsync #{MAIN_RSYNC_ARGS} --progress #{test_str} --files-from='#{tmp_include_file}' #{source}/ #{target}/ #{other_rsync_args}"
220
227
 
221
- cmd << " && rm -Rf #{source}" if options[:delete]
222
-
228
+ #cmd << " && rm -Rf #{source}" if options[:delete]
223
229
  if options[:print]
224
230
  ppp Open.read(tmp_include_file)
225
231
  puts cmd
@@ -228,6 +234,26 @@ puts resource[path].find(search_path)
228
234
  end
229
235
  end
230
236
  end
237
+
238
+ if options[:delete] && synced_files.any?
239
+ puts Log.color :magenta, "About to erase these files:"
240
+ synced_files.each do |p|
241
+ puts Log.color :red, p
242
+ end
243
+
244
+ if options[:non_interactive]
245
+ response = 'yes'
246
+ else
247
+ puts Log.color :magenta, "Type 'yes' if you are sure:"
248
+ response = STDIN.gets.chomp
249
+ end
250
+
251
+ if response == 'yes'
252
+ synced_files.each do |p|
253
+ Open.rm p
254
+ end
255
+ end
256
+ end
231
257
  end
232
258
 
233
259
  def self.purge(path, recursive = false)