rbbt-util 5.27.7 → 5.27.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6551d632ae1f2289805a53ceaf9db01d0c0a029ea6c76176aed419e4d6a2a485
4
- data.tar.gz: 72562729535554e718451adb87338503ff40a81874a9e0fed6f8eaf4e6aa0edc
3
+ metadata.gz: 96eabb01327f6e3f5facc629140375ee06c16d160f77788cf3cee63df1fe0a55
4
+ data.tar.gz: 3a30f619c73fb04003c7b6dd7d1a2d2a7f8f0fed2e503892c691280a02055af9
5
5
  SHA512:
6
- metadata.gz: 0d8c3ff2861dd4bbd835e3e41826f5fd6452bdd0b40fbd8378737effdffa6da051ce221e24f4bbacaf952725ccf7da09b533c8c414a7fd10480369ae528a4259
7
- data.tar.gz: 1790b8f38f91dfbb582a394d86953156d90351a8d724fb9535a3bc72ebac5fbbfe24e88fe10cf943e2ed91ff059af68cf581bac333f5480096f85cf6e29b8018
6
+ metadata.gz: ab7cd7746f19395bbc430cfb7b90bd3c6727d9a72069828c338f9abcb69c9776a52f8a6ceeb9244764ed55d9b1ad039739d32e3de27107a5edf7f01e22d10f34
7
+ data.tar.gz: a9f620db6b823d54560b201fe6d706be0c6db502d1cae49100322926011702ad59e7fcdece25dcc2df8ac7cb49cfde67fd432a392b4416e492a0b4eadbeda47c
@@ -261,7 +261,7 @@ module Entity
261
261
 
262
262
  define_method method_name do |*args|
263
263
  id = self.id
264
- persist_name = orig_method_name.to_s << ":" << (Array === id ? Misc.obj2digest(id) : id)
264
+ persist_name = orig_method_name.to_s + ":" << (Array === id ? Misc.obj2digest(id) : id)
265
265
 
266
266
  persist_options = options
267
267
  persist_options = persist_options.merge(:other => {:args => args}) if args and args.any?
@@ -28,7 +28,7 @@ class FixWidthTable
28
28
  else
29
29
  Log.debug "FixWidthTable up-to-date: #{ filename } - (in_memory:#{in_memory})"
30
30
  if in_memory
31
- @file = Open.open(@filename, :mode => 'r:ASCII-ASCII'){|f| StringIO.new f.read}
31
+ @file = Open.open(@filename, :mode => 'r:ASCII-8BIT'){|f| StringIO.new f.read}
32
32
  else
33
33
  @file = File.open(@filename, 'r:ASCII-8BIT')
34
34
  end
@@ -365,12 +365,12 @@ module Persist
365
365
  type ||= :marshal
366
366
 
367
367
  persist_options ||= {}
368
- if type == :memory and persist_options[:file] and persist_options[:persist]
368
+ if type == :memory && persist_options[:file] && persist_options[:persist]
369
369
  repo = persist_options[:repo] || Persist::MEMORY
370
370
  if persist_options[:persist] == :update || persist_options[:update]
371
371
  repo.delete persist_options[:file]
372
372
  end
373
- return repo[persist_options[:file]] ||= yield
373
+ return repo[persist_options[:file]] ||= yield
374
374
  end
375
375
 
376
376
  if FalseClass === persist_options[:persist]
@@ -32,7 +32,8 @@ module TSV
32
32
  done = false
33
33
  Open.write(output) do |os|
34
34
  options.delete :sep if options[:sep] == "\t"
35
- os.puts TSV.header_lines(key_field, fields, options)
35
+ header_lines = TSV.header_lines(key_field, fields, options)
36
+ os.puts header_lines unless header_lines.empty?
36
37
 
37
38
  while line
38
39
  key, *parts = line.sub("\n",'').split(sep, -1)
@@ -212,6 +213,35 @@ module TSV
212
213
  other_filename = other.respond_to?(:filename) ? other.filename : other.inspect
213
214
  Log.low("Attaching fields:#{Misc.fingerprint fields } from #{other_filename}.")
214
215
 
216
+ same_key = true
217
+ begin
218
+ case
219
+ when (Misc.match_fields(key_field, other.key_field) and same_key)
220
+ Log.debug "Attachment with same key: #{other.key_field}"
221
+ attach_same_key other, fields
222
+ when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
223
+ Log.debug "Found other key field: #{other.key_field}"
224
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
225
+ when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
226
+ Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
227
+ attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
228
+ else
229
+ index = TSV.find_traversal(self, other, options)
230
+ raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
231
+ Log.debug "Attachment with index: #{other.key_field}"
232
+ attach_index other, index, fields
233
+ end
234
+ rescue Exception
235
+ if same_key
236
+ Log.warn "Could not translate identifiers with same_key"
237
+ same_key = false
238
+ retry
239
+ else
240
+ raise $!
241
+ end
242
+ end
243
+ Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
244
+
215
245
  if complete
216
246
  fill = TrueClass === complete ? nil : complete
217
247
  field_length = self.fields.length
@@ -219,6 +249,9 @@ module TSV
219
249
  other_common_pos = common_fields.collect{|f| other.fields.index f}
220
250
  this_common_pos = common_fields.collect{|f| self.fields.index f}
221
251
  missing = other.keys - self.keys
252
+
253
+ other = other.to_list if other.type == :single
254
+
222
255
  case type
223
256
  when :single
224
257
  missing.each do |k|
@@ -249,35 +282,6 @@ module TSV
249
282
  end
250
283
  end
251
284
 
252
- same_key = true
253
- begin
254
- case
255
- when (Misc.match_fields(key_field, other.key_field) and same_key)
256
- Log.debug "Attachment with same key: #{other.key_field}"
257
- attach_same_key other, fields
258
- when (not in_namespace and self.fields.select{|f| Misc.match_fields(f, other.key_field)}.any?)
259
- Log.debug "Found other key field: #{other.key_field}"
260
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
261
- when (in_namespace and self.fields_in_namespace.select{|f| Misc.match_fields(f, other.key_field)}.any?)
262
- Log.debug "Found other key field in #{in_namespace}: #{other.key_field}"
263
- attach_source_key other, other.key_field, :fields => fields, :one2one => one2one
264
- else
265
- index = TSV.find_traversal(self, other, options)
266
- raise FieldNotFoundError, "Cannot traverse identifiers" if index.nil?
267
- Log.debug "Attachment with index: #{other.key_field}"
268
- attach_index other, index, fields
269
- end
270
- rescue Exception
271
- if same_key
272
- Log.warn "Could not translate identifiers with same_key"
273
- same_key = false
274
- retry
275
- else
276
- raise $!
277
- end
278
- end
279
- Log.debug("Attachment of fields:#{Misc.fingerprint fields } from #{other.filename.inspect} finished.")
280
-
281
285
  self
282
286
  end
283
287
 
@@ -4,9 +4,20 @@ module TSV
4
4
  fields = other.fields - [key_field].concat(self.fields) if fields.nil?
5
5
 
6
6
  fields = [fields].compact unless Array === fields
7
+
8
+ common_fields = self.fields & fields
9
+
10
+ fields = fields - common_fields
11
+
7
12
  num_fields = fields.length
8
13
 
9
14
  field_positions = fields.collect{|field| other.identify_field field}
15
+
16
+ if common_fields.any?
17
+ common_field_positions = common_fields.collect{|field| self.identify_field field}
18
+ common_field_positions_other = common_fields.collect{|field| other.identify_field field}
19
+ end
20
+
10
21
  other.with_unnamed do
11
22
  with_unnamed do
12
23
  through do |key, values|
@@ -28,6 +39,12 @@ module TSV
28
39
  new_values = field_positions.collect do |pos|
29
40
  pos == :key ? key : other_values[pos]
30
41
  end
42
+
43
+ if common_fields.any?
44
+ common_field_positions.zip(common_field_positions_other).each do |p1,p2|
45
+ current[p1] += other_values[p2]
46
+ end
47
+ end
31
48
  end
32
49
 
33
50
  new_values.collect!{|v| [v]} if type == :double and not (other.type == :double or other.type == :flat)
@@ -292,14 +292,16 @@ def self.add_libdir(dir=nil)
292
292
  end
293
293
  end
294
294
 
295
- def self.bootstrap(elems, num = :current, options = {}, &block)
295
+ def self.bootstrap(elems, num = nil, options = {}, &block)
296
296
  IndiferentHash.setup options
297
+
298
+ num = Rbbt::Config.get :cpus, :default_bootstrap_cpus, :bootstrap_cpus if num == :current || num == nil
297
299
  num = :current if num.nil?
298
300
  cpus = case num
299
301
  when :current
300
- n = 10
301
- n = elems.length / 2 if n > elems.length/2
302
- n
302
+ n = Etc.nprocessors
303
+ n = elems.length / 2 if n > elems.length/2
304
+ n
303
305
  when String
304
306
  num.to_i
305
307
  when Integer
@@ -163,7 +163,7 @@ module Misc
163
163
 
164
164
  def self.fixutf8(string)
165
165
  return nil if string.nil?
166
- return string if string.respond_to?(:encoding) && string.encoding == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
166
+ return string if string.respond_to?(:encoding) && string.encoding.to_s == "UTF-8" && (string.respond_to?(:valid_encoding?) && string.valid_encoding?) ||
167
167
  (string.respond_to?(:valid_encoding) && string.valid_encoding)
168
168
 
169
169
  if string.respond_to?(:encode)
@@ -17,14 +17,14 @@ module TmpFile
17
17
 
18
18
  # Creates a random file name, with the given suffix and a random number
19
19
  # up to +max+
20
- def self.random_name(s = "tmp-", max = 10000000)
20
+ def self.random_name(s = "tmp-", max = 1_000_000_000)
21
21
  n = rand(max)
22
22
  s + n.to_s
23
23
  end
24
24
 
25
25
  # Creates a random filename in the temporary directory
26
- def self.tmp_file(s = "tmp-", max=10000000, dir = TMPDIR)
27
- File.expand_path(File.join(dir, random_name(s,max)))
26
+ def self.tmp_file(s = "tmp-", max=1_000_000_000, dir = TMPDIR)
27
+ File.expand_path(File.join(dir, random_name(s, max)))
28
28
  end
29
29
 
30
30
  def self.with_file(content = nil, erase = true, options = {})
@@ -33,7 +33,7 @@ module TmpFile
33
33
 
34
34
  prefix = options[:prefix] || "tmp-"
35
35
  tmpdir = options[:tmpdir] || TMPDIR
36
- max = options[:max] || 10000000
36
+ max = options[:max] || 1_000_000_000
37
37
  tmpfile = tmp_file prefix, max, tmpdir
38
38
  if options[:extension]
39
39
  tmpfile += ".#{options[:extension]}"
@@ -10,9 +10,6 @@ require 'rbbt/workflow/util/provenance'
10
10
 
11
11
  module Workflow
12
12
 
13
- STEP_CACHE = {}
14
- LOAD_STEP_CACHE = {}
15
-
16
13
  class TaskNotFoundException < Exception
17
14
  def initialize(workflow, task = nil)
18
15
  if task
@@ -264,11 +261,11 @@ module Workflow
264
261
  end
265
262
 
266
263
  def step_cache
267
- @step_cache ||= Workflow::STEP_CACHE
264
+ Thread.current[:step_cache] ||= {}
268
265
  end
269
266
 
270
267
  def self.load_step_cache
271
- @load_step_cache ||= Workflow::LOAD_STEP_CACHE
268
+ Thread.current[:load_step_cache] ||= {}
272
269
  end
273
270
 
274
271
 
@@ -418,7 +418,7 @@ module Workflow
418
418
  when :hash
419
419
  clean_inputs = Annotated.purge(inputs)
420
420
  clean_inputs = clean_inputs.collect{|i| Symbol === i ? i.to_s : i }
421
- deps_str = dependencies.collect{|d| Step === d ? d.short_path : d }
421
+ deps_str = dependencies.collect{|d| (Step === d || (defined?(RemoteStep) && RemoteStep === Step)) ? "Step: " << d.short_path : d }
422
422
  key_obj = {:inputs => clean_inputs, :dependencies => deps_str }
423
423
  key_str = Misc.obj2str(key_obj)
424
424
  hash_str = Misc.digest(key_str)
@@ -137,14 +137,16 @@ class RemoteWorkflow
137
137
 
138
138
  post_thread = Thread.new(Thread.current) do |parent|
139
139
  bl = lambda do |rok|
140
- if Net::HTTPOK === rok
140
+ case rok
141
+ when Net::HTTPOK
141
142
  _url = rok["RBBT-STREAMING-JOB-URL"]
142
143
  @url = File.join(task_url, File.basename(_url)) if _url
143
144
  rok.read_body do |c,_a, _b|
144
145
  sin.write c
145
146
  end
146
147
  sin.close
147
- elsif Net::HTTPSeeOther === rok
148
+ when Net::HTTPRedirection, Net::HTTPAccepted
149
+ Thread.current.report_on_exception = false
148
150
  raise TryThis.new(rok)
149
151
  else
150
152
  err = StringIO.new
@@ -178,7 +180,8 @@ class RemoteWorkflow
178
180
  begin
179
181
  RestClient::Request.execute(:method => :post, :url => task_url, :payload => task_params, :block_response => bl)
180
182
  rescue TryThis
181
- RestClient::Request.execute(:method => :get, :url => $!.payload.header[:location], :block_response => bl)
183
+ url = $!.payload["location"]
184
+ RestClient::Request.execute(:method => :get, :url => url, :block_response => bl)
182
185
  end
183
186
  end
184
187
 
@@ -26,8 +26,8 @@ class RemoteStep < Step
26
26
 
27
27
  def cache_file
28
28
  begin
29
- digest = Misc.obj2digest([base_url, task, base_name, inputs])
30
- Rbbt.var.cache.REST[[task, clean_name, digest].compact * "."].find
29
+ digest = Misc.obj2digest([base_url, task.to_s, base_name, inputs])
30
+ Rbbt.var.cache.REST[task.to_s][[clean_name, digest].compact * "."].find
31
31
  rescue
32
32
  Log.exception $!
33
33
  raise $!
@@ -143,7 +143,11 @@ class RemoteStep < Step
143
143
  @info = Persist.memory("RemoteSteps Info", :url => @url, :persist => true, :update => update) do
144
144
  @last_info_time = Time.now
145
145
  init_job unless @url
146
- info = @adaptor.get_json(File.join(@url, 'info'))
146
+ info = begin
147
+ @adaptor.get_json(File.join(@url, 'info'))
148
+ rescue
149
+ {:status => :noinfo}
150
+ end
147
151
  info = RemoteWorkflow.fix_hash(info)
148
152
  info[:status] = info[:status].to_sym if String === info[:status]
149
153
  info
@@ -243,6 +247,7 @@ class RemoteStep < Step
243
247
  def join
244
248
  return true if cache_files.any?
245
249
  init_job unless @url
250
+ produce unless @started
246
251
  Log.debug{ "Joining RemoteStep: #{path}" }
247
252
 
248
253
  if IO === @result
@@ -258,7 +263,6 @@ class RemoteStep < Step
258
263
  sleep 1 unless self.done? || self.aborted? || self.error?
259
264
  while not (self.done? || self.aborted? || self.error?)
260
265
  sleep 3
261
- iif [self.done?, self.status, self.info]
262
266
  end
263
267
  end
264
268
 
@@ -311,6 +315,10 @@ class RemoteStep < Step
311
315
  end
312
316
  end
313
317
 
318
+ def short_path
319
+ init_job unless @url
320
+ [@task, @name] * "/"
321
+ end
314
322
 
315
323
  def input_checks
316
324
  []
@@ -88,7 +88,7 @@ module Workflow
88
88
  key = [workflow, task]
89
89
 
90
90
  dep_tree[key] = workflow.dep_tree(task)
91
- end if self.task_dependencies[name.to_sym]
91
+ end if name && self.task_dependencies[name.to_sym]
92
92
  dep_tree
93
93
  end
94
94
  end
@@ -15,6 +15,7 @@ $ rbbt app start [options]
15
15
  -Ho--Host* Host name
16
16
  -B--Bind* Bind IP
17
17
  -p--port* TCP port
18
+ -w--workers* Number of workers for cluster mode (puma)
18
19
  -s--server* Server type: thin, webrick, unicorn, etc
19
20
  -f--finder Start server with finder functionality
20
21
  -fs--file_server Activate file serving for resources
@@ -84,6 +85,12 @@ Misc.in_dir(app_dir) do
84
85
  end
85
86
  end
86
87
 
88
+ fixed_options = {}
89
+ options.each do |k,v| fixed_options[k.to_sym] = v end
90
+ options = fixed_options
91
+
92
+ options[:workers] = options[:workers].to_i if options[:workers]
93
+
87
94
  case server
88
95
  when 'passenger'
89
96
  system ENV, "env RBBT_LOG=0 passenger start -R '#{config_ru_file}' -p #{options[:Port] || "2887"}"
@@ -91,6 +98,7 @@ Misc.in_dir(app_dir) do
91
98
  system ENV, "puma '#{config_ru_file}' -p #{options[:Port] || "2887"} -w 3 -t 8:32 --preload"
92
99
  else
93
100
  options[:config] = config_ru_file
101
+ options[:threads] = "8:8"
94
102
  Rack::Server.start(options)
95
103
  end
96
104
  end
@@ -97,7 +97,7 @@ TSV.traverse jobs do |file,i|
97
97
  else
98
98
  info = begin
99
99
  Open.open(i[:info_file]) do |f|
100
- Step::INFO_SERIALIAZER.load(f)
100
+ Step::INFO_SERIALIZER.load(f)
101
101
  end
102
102
  rescue
103
103
  {:status => :noinfo}
@@ -134,7 +134,7 @@ TSV.traverse jobs do |file,i|
134
134
  end
135
135
  end
136
136
 
137
- if (force and status !~ /done/) or
137
+ if (force && status !~ /done/) or
138
138
  status =~ /\b(old|dirty|nopid|error|missing|aborted|dead|sync)$/ or
139
139
  (status == "noinfo" and not done) or
140
140
  status == ""
@@ -0,0 +1,29 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'rbbt-util'
4
+ require 'rbbt/util/simpleopt'
5
+
6
+ options = SOPT.setup <<EOF
7
+ Read a TSV file and dump it
8
+
9
+ $ rbbt tsv read [options] <filename.tsv|->
10
+
11
+ Use - to read from STDIN
12
+
13
+ -k--key_field* Key field
14
+ -f--fields* Fields
15
+ -t--type* Type
16
+ -m--merge* Merge from multiple rows
17
+ -h--help Print this help
18
+
19
+ EOF
20
+ rbbt_usage and exit 0 if options[:help]
21
+
22
+ file = ARGV.shift
23
+
24
+ file = STDIN if file == '-'
25
+
26
+ tsv = TSV.open file, :merge => true, :type => :double
27
+
28
+
29
+ puts tsv.to_unmerged_expanded_s
@@ -153,7 +153,7 @@ if recursive
153
153
  dep = deps.shift
154
154
  inputs = {} if inputs.nil?
155
155
  inputs = inputs.merge(dep.info[:inputs] || {})
156
- deps = deps.concat dep.dependencies
156
+ deps = deps.concat dep.dependencies if dep.dependencies
157
157
  end
158
158
 
159
159
  inputs = inputs.merge step.archived_inputs
@@ -123,15 +123,11 @@ TmpFile.with_file do |app_dir|
123
123
  else
124
124
  options[:config] = config_ru_file
125
125
 
126
- clean_options = {}
127
- options.each do |k,v|
128
- begin
129
- clean_options[k.to_sym] = v
130
- rescue
131
- end
132
- end
126
+ fixed_options = {}
127
+ options.each do |k,v| fixed_options[k.to_sym] = v end
128
+ options = fixed_options
133
129
 
134
- Rack::Server.start(clean_options)
130
+ Rack::Server.start(options)
135
131
  end
136
132
  end
137
133
  end
@@ -337,6 +337,8 @@ if clean_task
337
337
  dep.clean
338
338
  dep.set_info :status, :cleaned
339
339
  end
340
+
341
+ job.clean if job.task_name.to_s == clean_task.to_s
340
342
  end
341
343
  end
342
344
 
@@ -213,8 +213,9 @@ row2 E
213
213
 
214
214
  tsv1 = tsv2 = nil
215
215
 
216
- tsv1 = Rbbt.tmp.test.test1.data.tsv :double, :sep => /\s+/
217
- tsv2 = Rbbt.tmp.test.test2.data.tsv :double, :sep => /\s+/
216
+ tsv1 = Rbbt.tmp.test.test1.data.produce(true).tsv :double, :sep => /\s+/
217
+ tsv2 = Rbbt.tmp.test.test2.data.produce(true).tsv :double, :sep => /\s+/
218
+ Log.tsv tsv2
218
219
 
219
220
  tsv2.identifiers = Rbbt.tmp.test.test2.identifiers.produce.find #.to_s
220
221
 
@@ -0,0 +1,10 @@
1
+ require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
2
+ require 'rbbt/util/misc/format'
3
+
4
+ class TestClass < Test::Unit::TestCase
5
+ def test_fixutf8
6
+ a = "Camión"
7
+ assert_equal a, Misc.fixutf8(a)
8
+ end
9
+ end
10
+
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.27.7
4
+ version: 5.27.12
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-06-04 00:00:00.000000000 Z
11
+ date: 2020-07-14 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -403,6 +403,7 @@ files:
403
403
  - share/rbbt_commands/tsv/sort
404
404
  - share/rbbt_commands/tsv/subset
405
405
  - share/rbbt_commands/tsv/transpose
406
+ - share/rbbt_commands/tsv/uncollapse
406
407
  - share/rbbt_commands/tsv/unzip
407
408
  - share/rbbt_commands/tsv/values
408
409
  - share/rbbt_commands/tsv/write_excel
@@ -487,6 +488,7 @@ files:
487
488
  - test/rbbt/util/concurrency/test_threads.rb
488
489
  - test/rbbt/util/log/test_progress.rb
489
490
  - test/rbbt/util/misc/test_bgzf.rb
491
+ - test/rbbt/util/misc/test_format.rb
490
492
  - test/rbbt/util/misc/test_lock.rb
491
493
  - test/rbbt/util/misc/test_multipart_payload.rb
492
494
  - test/rbbt/util/misc/test_omics.rb
@@ -552,6 +554,7 @@ test_files:
552
554
  - test/rbbt/util/test_colorize.rb
553
555
  - test/rbbt/util/misc/test_omics.rb
554
556
  - test/rbbt/util/misc/test_pipes.rb
557
+ - test/rbbt/util/misc/test_format.rb
555
558
  - test/rbbt/util/misc/test_lock.rb
556
559
  - test/rbbt/util/misc/test_multipart_payload.rb
557
560
  - test/rbbt/util/misc/test_bgzf.rb