rbbt-util 5.27.7 → 5.27.12

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6551d632ae1f2289805a53ceaf9db01d0c0a029ea6c76176aed419e4d6a2a485
4
- data.tar.gz: 72562729535554e718451adb87338503ff40a81874a9e0fed6f8eaf4e6aa0edc
3
+ metadata.gz: 96eabb01327f6e3f5facc629140375ee06c16d160f77788cf3cee63df1fe0a55
4
+ data.tar.gz: 3a30f619c73fb04003c7b6dd7d1a2d2a7f8f0fed2e503892c691280a02055af9
5
5
  SHA512:
6
- metadata.gz: 0d8c3ff2861dd4bbd835e3e41826f5fd6452bdd0b40fbd8378737effdffa6da051ce221e24f4bbacaf952725ccf7da09b533c8c414a7fd10480369ae528a4259
7
- data.tar.gz: 1790b8f38f91dfbb582a394d86953156d90351a8d724fb9535a3bc72ebac5fbbfe24e88fe10cf943e2ed91ff059af68cf581bac333f5480096f85cf6e29b8018
6
+ metadata.gz: ab7cd7746f19395bbc430cfb7b90bd3c6727d9a72069828c338f9abcb69c9776a52f8a6ceeb9244764ed55d9b1ad039739d32e3de27107a5edf7f01e22d10f34
7
+ data.tar.gz: a9f620db6b823d54560b201fe6d706be0c6db502d1cae49100322926011702ad59e7fcdece25dcc2df8ac7cb49cfde67fd432a392b4416e492a0b4eadbeda47c
@@ -261,7 +261,7 @@ module Entity
261
261
 
262
262
  define_method method_name do |*args|
263
263
  id = self.id
264
- persist_name = orig_method_name.to_s << ":" << (Array === id ? Misc.obj2digest(id) : id)
264
+ persist_name = orig_method_name.to_s + ":" << (Array === id ? Misc.obj2digest(id) : id)
265
265
 
266
266
  persist_options = options
267
267
  persist_options = persist_options.merge(:other => {:args => args}) if args and args.any?
@@ -28,7 +28,7 @@ class FixWidthTable
28
28
  else
29
29
  Log.debug "FixWidthTable up-to-date: #{ filename } - (in_memory:#{in_memory})"
30
30
  if in_memory
31
- @file = Open.open(@filename, :mode => 'r:ASCII-ASCII'){|f| StringIO.new f.read}
31
+ @file = Open.open(@filename, :mode => 'r:ASCII-8BIT'){|f| StringIO.new f.read}
32
32
  else
33
33
  @file = File.open(@filename, 'r:ASCII-8BIT')
34
34
  end
@@ -365,12 +365,12 @@ module Persist
365
365
  type ||= :marshal
366
366
 
367
367
  persist_options ||= {}
368
- if type == :memory and persist_options[:file] and persist_options[:persist]
368
+ if type == :memory && persist_options[:file] && persist_options[:persist]
369
369
  repo = persist_options[:repo] || Persist::MEMORY
370
370
  if persist_options[:persist] == :update || persist_options[:update]
371
371
  repo.delete persist_options[:file]
372
372
  end
373
- return repo[persist_options[:file]] ||= yield
373
+ return repo[persist_options[:file]] ||= yield
374
374
  end
375
375
 
376
376
  if FalseClass === persist_options[:persist]
@@ -32,7 +32,8 @@ module TSV
32
32
  done = false
33
33
  Open.write(output) do |os|
34
34
  options.delete :sep if options[:sep] == "\t"
35
- os.puts TSV.header_lines(key_field, fields, options)
35
+ header_lines = TSV.header_lines(key_field, fields, options)
36
+ os.puts header_lines unless header_lines.empty?
36
37
 
37
38
  while line
38
39
  key, *parts = line.sub("\n",'').split(sep, -1)
@@ -212,6 +213,35 @@ module TSV
212
213
  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
213
214
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
214
215
 
216
+ same_key = true
217
+ begin
218
+ case
219
+ when (Misc.match_fields(key_field, other.key_field) and same_key)
220
+ Log.debug "Attachment with same key: #{other.key_field}"
221
+ attach_same_key other, fields
222
+ when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
223
+ Log.debug "Found other key field: #{other.key_field}"
224
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
225
+ when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
226
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
227
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
228
+ else
229
+ index = TSV.find_traversal(self, other, options)
230
+ raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
231
+ Log.debug "Attachment with index: #{other.key_field}"
232
+ attach_index other, index, fields
233
+ end
234
+ rescue Exception
235
+ if same_key
236
+ Log.warn "Could not translate identifiers with same_key"
237
+ same_key = false
238
+ retry
239
+ else
240
+ raise $!
241
+ end
242
+ end
243
+ Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
+
215
245
  if complete
216
246
  fill = TrueClass === complete ? nil : complete
217
247
  field_length = self.fields.length
@@ -219,6 +249,9 @@ module TSV
219
249
  other_common_pos = common_fields.collect{|f| other.fields.index f}
220
250
  this_common_pos = common_fields.collect{|f| self.fields.index f}
221
251
  missing = other.keys - self.keys
252
+
253
+ other = other.to_list if other.type == :single
254
+
222
255
  case type
223
256
  when :single
224
257
  missing.each do |k|
@@ -249,35 +282,6 @@ module TSV
249
282
  end
250
283
  end
251
284
 
252
- same_key = true
253
- begin
254
- case
255
- when (Misc.match_fields(key_field, other.key_field) and same_key)
256
- Log.debug "Attachment with same key: #{other.key_field}"
257
- attach_same_key other, fields
258
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
259
- Log.debug "Found other key field: #{other.key_field}"
260
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
261
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
262
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
263
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
264
- else
265
- index = TSV.find_traversal(self, other, options)
266
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
267
- Log.debug "Attachment with index: #{other.key_field}"
268
- attach_index other, index, fields
269
- end
270
- rescue Exception
271
- if same_key
272
- Log.warn "Could not translate identifiers with same_key"
273
- same_key = false
274
- retry
275
- else
276
- raise $!
277
- end
278
- end
279
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
280
-
281
285
  self
282
286
  end
283
287
 
@@ -4,9 +4,20 @@ module TSV
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
6
  fields = [fields].compact unless Array === fields
7
+
8
+ common_fields = self.fields & fields
9
+
10
+ fields = fields - common_fields
11
+
7
12
  num_fields = fields.length
8
13
 
9
14
  field_positions = fields.collect{|field| other.identify_field field}
15
+
16
+ if common_fields.any?
17
+ common_field_positions = common_fields.collect{|field| self.identify_field field}
18
+ common_field_positions_other = common_fields.collect{|field| other.identify_field field}
19
+ end
20
+
10
21
  other.with_unnamed do
11
22
  with_unnamed do
12
23
  through do |key, values|
@@ -28,6 +39,12 @@ module TSV
28
39
  new_values = field_positions.collect do |pos|
29
40
  pos == :key ? key : other_values[pos]
30
41
  end
42
+
43
+ if common_fields.any?
44
+ common_field_positions.zip(common_field_positions_other).each do |p1,p2|
45
+ current[p1] += other_values[p2]
46
+ end
47
+ end
31
48
  end
32
49
 
33
50
  new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
@@ -292,14 +292,16 @@ def self.add_libdir(dir=nil)
292
292
  end
293
293
  end
294
294
 
295
- def self.bootstrap(elems, num = :current, options = {}, &block)
295
+ def self.bootstrap(elems, num = nil, options = {}, &block)
296
296
  IndiferentHash.setup options
297
+
298
+ num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil
297
299
  num = :current if num.nil?
298
300
  cpus = case num
299
301
  when :current
300
- n = 10
301
- n = elems.length / 2 if n > elems.length/2
302
- n
302
+ n = Etc.nprocessors
303
+ n = elems.length / 2 if n > elems.length/2
304
+ n
303
305
  when String
304
306
  num.to_i
305
307
  when Integer
@@ -163,7 +163,7 @@ module Misc
163
163
 
164
164
  def self.fixutf8(string)
165
165
  return nil if string.nil?
166
- return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
166
+ return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
167
167
  (string.respond_to?(:valid_encoding) && string.valid_encoding)
168
168
 
169
169
  if string.respond_to?(:encode)
@@ -17,14 +17,14 @@ module TmpFile
17
17
 
18
18
  # Creates a random file name, with the given suffix and a random number
19
19
  # up to +max+
20
- def self.random_name(s = "tmp-", max = 10000000)
20
+ def self.random_name(s = "tmp-", max = 1_000_000_000)
21
21
  n = rand(max)
22
22
  s + n.to_s
23
23
  end
24
24
 
25
25
  # Creates a random filename in the temporary directory
26
- def self.tmp_file(s = "tmp-", max=10000000, dir = TMPDIR)
27
- File.expand_path(File.join(dir, random_name(s,max)))
26
+ def self.tmp_file(s = "tmp-", max=1_000_000_000, dir = TMPDIR)
27
+ File.expand_path(File.join(dir, random_name(s, max)))
28
28
  end
29
29
 
30
30
  def self.with_file(content = nil, erase = true, options = {})
@@ -33,7 +33,7 @@ module TmpFile
33
33
 
34
34
  prefix = options[:prefix] || "tmp-"
35
35
  tmpdir = options[:tmpdir] || TMPDIR
36
- max = options[:max] || 10000000
36
+ max = options[:max] || 1_000_000_000
37
37
  tmpfile = tmp_file prefix, max, tmpdir
38
38
  if options[:extension]
39
39
  tmpfile += ".#{options[:extension]}"
@@ -10,9 +10,6 @@ require 'rbbt/workflow/util/provenance'
10
10
 
11
11
  module Workflow
12
12
 
13
- STEP_CACHE = {}
14
- LOAD_STEP_CACHE = {}
15
-
16
13
  class TaskNotFoundException < Exception
17
14
  def initialize(workflow, task = nil)
18
15
  if task
@@ -264,11 +261,11 @@ module Workflow
264
261
  end
265
262
 
266
263
  def step_cache
267
- @step_cache ||= Workflow::STEP_CACHE
264
+ Thread.current[:step_cache] ||= {}
268
265
  end
269
266
 
270
267
  def self.load_step_cache
271
- @load_step_cache ||= Workflow::LOAD_STEP_CACHE
268
+ Thread.current[:load_step_cache] ||= {}
272
269
  end
273
270
 
274
271
 
@@ -418,7 +418,7 @@ module Workflow
418
418
  when :hash
419
419
  clean_inputs = Annotated.purge(inputs)
420
420
  clean_inputs = clean_inputs.collect{|i| Symbol === i ? i.to_s : i }
421
- deps_str = dependencies.collect{|d| Step === d ? d.short_path : d }
421
+ deps_str = dependencies.collect{|d| (Step === d || (defined?(RemoteStep) && RemoteStep === Step)) ? "Step: " << d.short_path : d }
422
422
  key_obj = {:inputs => clean_inputs, :dependencies => deps_str }
423
423
  key_str = Misc.obj2str(key_obj)
424
424
  hash_str = Misc.digest(key_str)
@@ -137,14 +137,16 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ case rok
141
+ when Net::HTTPOK
141
142
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
143
  @url = File.join(task_url, File.basename(_url)) if _url
143
144
  rok.read_body do |c,_a, _b|
144
145
  sin.write c
145
146
  end
146
147
  sin.close
147
- elsif Net::HTTPSeeOther === rok
148
+ when Net::HTTPRedirection, Net::HTTPAccepted
149
+ Thread.current.report_on_exception = false
148
150
  raise TryThis.new(rok)
149
151
  else
150
152
  err = StringIO.new
@@ -178,7 +180,8 @@ class RemoteWorkflow
178
180
  begin
179
181
  RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
182
  rescue TryThis
181
- RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
183
+ url = $!.payload["location"]
184
+ RestClient::Request.execute(:method => :get, :url => url, :block_response => bl)
182
185
  end
183
186
  end
184
187
 
@@ -26,8 +26,8 @@ class RemoteStep < Step
26
26
 
27
27
  def cache_file
28
28
  begin
29
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
- Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
29
+ digest = Misc.obj2digest([base_url, task.to_s, base_name, inputs])
30
+ Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
31
31
  rescue
32
32
  Log.exception $!
33
33
  raise $!
@@ -143,7 +143,11 @@ class RemoteStep < Step
143
143
  @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
144
144
  @last_info_time = Time.now
145
145
  init_job unless @url
146
- info = @adaptor.get_json(File.join(@url, 'info'))
146
+ info = begin
147
+ @adaptor.get_json(File.join(@url, 'info'))
148
+ rescue
149
+ {:status => :noinfo}
150
+ end
147
151
  info = RemoteWorkflow.fix_hash(info)
148
152
  info[:status] = info[:status].to_sym if String === info[:status]
149
153
  info
@@ -243,6 +247,7 @@ class RemoteStep < Step
243
247
  def join
244
248
  return true if cache_files.any?
245
249
  init_job unless @url
250
+ produce unless @started
246
251
  Log.debug{ "Joining RemoteStep: #{path}" }
247
252
 
248
253
  if IO === @result
@@ -258,7 +263,6 @@ class RemoteStep < Step
258
263
  sleep 1 unless self.done? || self.aborted? || self.error?
259
264
  while not (self.done? || self.aborted? || self.error?)
260
265
  sleep 3
261
- iif [self.done?, self.status, self.info]
262
266
  end
263
267
  end
264
268
 
@@ -311,6 +315,10 @@ class RemoteStep < Step
311
315
  end
312
316
  end
313
317
 
318
+ def short_path
319
+ init_job unless @url
320
+ [@task, @name] * "/"
321
+ end
314
322
 
315
323
  def input_checks
316
324
  []
@@ -88,7 +88,7 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end if self.task_dependencies[name.to_sym]
91
+ end if name && self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
@@ -15,6 +15,7 @@ $ rbbt app start [options]
15
15
  -Ho--Host* Host name
16
16
  -B--Bind* Bind IP
17
17
  -p--port* TCP port
18
+ -w--workers* Number of workers for cluster mode (puma)
18
19
  -s--server* Server type: thin, webrick, unicorn, etc
19
20
  -f--finder Start server with finder functionality
20
21
  -fs--file_server Activate file serving for resources
@@ -84,6 +85,12 @@ Misc.in_dir(app_dir) do
84
85
  end
85
86
  end
86
87
 
88
+ fixed_options = {}
89
+ options.each do |k,v| fixed_options[k.to_sym] = v end
90
+ options = fixed_options
91
+
92
+ options[:workers] = options[:workers].to_i if options[:workers]
93
+
87
94
  case server
88
95
  when 'passenger'
89
96
  system ENV, "env RBBT_LOG=0 passenger start -R '#{config_ru_file}' -p #{options[:Port] || "2887"}"
@@ -91,6 +98,7 @@ Misc.in_dir(app_dir) do
91
98
  system ENV, "puma '#{config_ru_file}' -p #{options[:Port] || "2887"} -w 3 -t 8:32 --preload"
92
99
  else
93
100
  options[:config] = config_ru_file
101
+ options[:threads] = "8:8"
94
102
  Rack::Server.start(options)
95
103
  end
96
104
  end
@@ -97,7 +97,7 @@ TSV.traverse jobs do |file,i|
97
97
  else
98
98
  info = begin
99
99
  Open.open(i[:info_file]) do |f|
100
- Step::INFO_SERIALIAZER.load(f)
100
+ Step::INFO_SERIALIZER.load(f)
101
101
  end
102
102
  rescue
103
103
  {:status => :noinfo}
@@ -134,7 +134,7 @@ TSV.traverse jobs do |file,i|
134
134
  end
135
135
  end
136
136
 
137
- if (force and status !~ /done/) or
137
+ if (force && status !~ /done/) or
138
138
  status =~ /\b(old|dirty|nopid|error|missing|aborted|dead|sync)$/ or
139
139
  (status == "noinfo" and not done) or
140
140
  status == ""
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ options = SOPT.setup <<EOF
7
+ Read a TSV file and dump it
8
+
9
+ $ rbbt tsv read [options] <filename.tsv|->
10
+
11
+ Use - to read from STDIN
12
+
13
+ -k--key_field* Key field
14
+ -f--fields* Fields
15
+ -t--type* Type
16
+ -m--merge* Merge from multiple rows
17
+ -h--help Print this help
18
+
19
+ EOF
20
+ rbbt_usage and exit 0 if options[:help]
21
+
22
+ file = ARGV.shift
23
+
24
+ file = STDIN if file == '-'
25
+
26
+ tsv = TSV.open file, :merge => true, :type => :double
27
+
28
+
29
+ puts tsv.to_unmerged_expanded_s
@@ -153,7 +153,7 @@ if recursive
153
153
  dep = deps.shift
154
154
  inputs = {} if inputs.nil?
155
155
  inputs = inputs.merge(dep.info[:inputs] || {})
156
- deps = deps.concat dep.dependencies
156
+ deps = deps.concat dep.dependencies if dep.dependencies
157
157
  end
158
158
 
159
159
  inputs = inputs.merge step.archived_inputs
@@ -123,15 +123,11 @@ TmpFile.with_file do |app_dir|
123
123
  else
124
124
  options[:config] = config_ru_file
125
125
 
126
- clean_options = {}
127
- options.each do |k,v|
128
- begin
129
- clean_options[k.to_sym] = v
130
- rescue
131
- end
132
- end
126
+ fixed_options = {}
127
+ options.each do |k,v| fixed_options[k.to_sym] = v end
128
+ options = fixed_options
133
129
 
134
- Rack::Server.start(clean_options)
130
+ Rack::Server.start(options)
135
131
  end
136
132
  end
137
133
  end
@@ -337,6 +337,8 @@ if clean_task
337
337
  dep.clean
338
338
  dep.set_info :status, :cleaned
339
339
  end
340
+
341
+ job.clean if job.task_name.to_s == clean_task.to_s
340
342
  end
341
343
  end
342
344
 
@@ -213,8 +213,9 @@ row2 E
213
213
 
214
214
  tsv1 = tsv2 = nil
215
215
 
216
- tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
217
- tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
216
+ tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
217
+ tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
218
+ Log.tsv tsv2
218
219
 
219
220
  tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
220
221
 
@@ -0,0 +1,10 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/misc/format'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_fixutf8
6
+ a = "Camión"
7
+ assert_equal a, Misc.fixutf8(a)
8
+ end
9
+ end
10
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.27.7
4
+ version: 5.27.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-04 00:00:00.000000000 Z
11
+ date: 2020-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -403,6 +403,7 @@ files:
403
403
  - share/rbbt_commands/tsv/sort
404
404
  - share/rbbt_commands/tsv/subset
405
405
  - share/rbbt_commands/tsv/transpose
406
+ - share/rbbt_commands/tsv/uncollapse
406
407
  - share/rbbt_commands/tsv/unzip
407
408
  - share/rbbt_commands/tsv/values
408
409
  - share/rbbt_commands/tsv/write_excel
@@ -487,6 +488,7 @@ files:
487
488
  - test/rbbt/util/concurrency/test_threads.rb
488
489
  - test/rbbt/util/log/test_progress.rb
489
490
  - test/rbbt/util/misc/test_bgzf.rb
491
+ - test/rbbt/util/misc/test_format.rb
490
492
  - test/rbbt/util/misc/test_lock.rb
491
493
  - test/rbbt/util/misc/test_multipart_payload.rb
492
494
  - test/rbbt/util/misc/test_omics.rb
@@ -552,6 +554,7 @@ test_files:
552
554
  - test/rbbt/util/test_colorize.rb
553
555
  - test/rbbt/util/misc/test_omics.rb
554
556
  - test/rbbt/util/misc/test_pipes.rb
557
+ - test/rbbt/util/misc/test_format.rb
555
558
  - test/rbbt/util/misc/test_lock.rb
556
559
  - test/rbbt/util/misc/test_multipart_payload.rb
557
560
  - test/rbbt/util/misc/test_bgzf.rb