rbbt-util 5.21.101 → 5.21.102

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 404f1b0b70c29b749187e586ad6e213e45c672b1
4
- data.tar.gz: 50248d41198d8dcd8a691a663e574a91f720a96a
3
+ metadata.gz: 552cc55adf8fe2641d93a6c2dd9c7c8894e53a64
4
+ data.tar.gz: f762f96ae17cf69f718c5eb15da13683fda1f99d
5
5
  SHA512:
6
- metadata.gz: 6c91ac17825af87d3ec62e432738b494c602063c6c2a9b871d1e3a952a7b3f87b6177392899aa3f925fbb8f6687853084496bfb7c880413df0dfe68ed43eeedd
7
- data.tar.gz: 3b3097806074852df41f79bbb13a7a593842be215c7f7f81373e7f78791b4512cfcf31ac5f72d3ada3f57879d7eccadc1841eedd05f8d43c62bf4948b782039d
6
+ metadata.gz: 53039d9cb4b69653def0d272fe3e8a6d45b058fc7123e1f79c58279d5b217e2de454c49cacee8353b8d262a603507012b7f8b4fc75e716c805df1a14155dfdf6
7
+ data.tar.gz: 38473fb2bd31541c0c745886c710bc484ce9b2ccd38f92af5bb1b8a85ea78a190401e046df9bf8c7f0cd6526de06aa1683dc1d2f2e9a2e82e5de4d5bd4ca76e6
@@ -84,25 +84,25 @@ class FixWidthTable
84
84
  pos(size - 1)
85
85
  end
86
86
 
87
- def pos(index)
87
+ def idx_pos(index)
88
88
  return nil if index < 0 or index >= size
89
89
  @file.seek(5 + (record_size) * index, IO::SEEK_SET)
90
90
  @file.read(4).unpack("l").first
91
91
  end
92
92
 
93
- def pos_end(index)
93
+ def idx_pos_end(index)
94
94
  return nil if index < 0 or index >= size
95
95
  @file.seek(9 + (record_size) * index, IO::SEEK_SET)
96
96
  @file.read(4).unpack("l").first
97
97
  end
98
98
 
99
- def overlap(index)
99
+ def idx_overlap(index)
100
100
  return nil if index < 0 or index >= size
101
101
  @file.seek(13 + (record_size) * index, IO::SEEK_SET)
102
102
  @file.read(4).unpack("l").first
103
103
  end
104
104
 
105
- def value(index)
105
+ def idx_value(index)
106
106
  return nil if index < 0 or index >= size
107
107
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
108
108
  padding = @file.read(4).unpack("l").first+1
@@ -164,7 +164,7 @@ class FixWidthTable
164
164
 
165
165
  while(upper >= lower) do
166
166
  idx = lower + (upper - lower) / 2
167
- pos_idx = pos(idx)
167
+ pos_idx = idx_pos(idx)
168
168
 
169
169
  case pos <=> pos_idx
170
170
  when 0
@@ -183,7 +183,7 @@ class FixWidthTable
183
183
  idx.to_i
184
184
  end
185
185
 
186
- def get_range(pos)
186
+ def get_range(pos, return_idx = false)
187
187
  case pos
188
188
  when Range
189
189
  r_start = pos.begin
@@ -197,29 +197,40 @@ class FixWidthTable
197
197
  idx = closest(r_start)
198
198
 
199
199
  return [] if idx >= size
200
- return [] if idx <0 and r_start == r_end
200
+ return [] if idx < 0 and r_start == r_end
201
201
 
202
202
  idx = 0 if idx < 0
203
203
 
204
- overlap = overlap(idx)
204
+ overlap = idx_overlap(idx)
205
205
 
206
206
  idx -= overlap unless overlap.nil?
207
207
 
208
208
  values = []
209
- l_start = pos(idx)
210
- l_end = pos_end(idx)
211
- while l_start <= r_end
212
- values << value(idx) if l_end >= r_start
213
- idx += 1
214
- break if idx >= size
215
- l_start = pos(idx)
216
- l_end = pos_end(idx)
209
+ l_start = idx_pos(idx)
210
+ l_end = idx_pos_end(idx)
211
+
212
+ if return_idx
213
+ while l_start <= r_end
214
+ values << idx if l_end >= r_start
215
+ idx += 1
216
+ break if idx >= size
217
+ l_start = idx_pos(idx)
218
+ l_end = idx_pos_end(idx)
219
+ end
220
+ else
221
+ while l_start <= r_end
222
+ values << idx_value(idx) if l_end >= r_start
223
+ idx += 1
224
+ break if idx >= size
225
+ l_start = idx_pos(idx)
226
+ l_end = idx_pos_end(idx)
227
+ end
217
228
  end
218
229
 
219
230
  values
220
231
  end
221
232
 
222
- def get_point(pos)
233
+ def get_point(pos, return_idx = false)
223
234
  if Range === pos
224
235
  r_start = pos.begin
225
236
  r_end = pos.end
@@ -231,23 +242,33 @@ class FixWidthTable
231
242
  idx = closest(r_start)
232
243
 
233
244
  return [] if idx >= size
234
- return [] if idx <0 and r_start == r_end
245
+ return [] if idx < 0 and r_start == r_end
235
246
 
236
247
  idx = 0 if idx < 0
237
248
 
238
- idx += 1 unless pos(idx) >= r_start
249
+ idx += 1 unless idx_pos(idx) >= r_start
239
250
 
240
251
  return [] if idx >= size
241
252
 
242
253
  values = []
243
- l_start = pos(idx)
244
- l_end = pos_end(idx)
245
- while l_start <= r_end
246
- values << value(idx)
247
- idx += 1
248
- break if idx >= size
249
- l_start = pos(idx)
250
- l_end = pos_end(idx)
254
+ l_start = idx_pos(idx)
255
+ l_end = idx_pos_end(idx)
256
+ if return_idx
257
+ while l_start <= r_end
258
+ values << idx
259
+ idx += 1
260
+ break if idx >= size
261
+ l_start = idx_pos(idx)
262
+ l_end = idx_pos_end(idx)
263
+ end
264
+ else
265
+ while l_start <= r_end
266
+ values << idx_value(idx)
267
+ idx += 1
268
+ break if idx >= size
269
+ l_start = idx_pos(idx)
270
+ l_end = idx_pos_end(idx)
271
+ end
251
272
  end
252
273
 
253
274
  values
@@ -261,6 +282,20 @@ class FixWidthTable
261
282
  get_point(pos)
262
283
  end
263
284
  end
285
+
286
+ def overlaps(pos, value = false)
287
+ return [] if size == 0
288
+ idxs = if range
289
+ get_range(pos, true)
290
+ else
291
+ get_point(pos, true)
292
+ end
293
+ if value
294
+ idxs.collect{|idx| [idx_pos(idx), idx_pos_end(idx), idx_value(idx)] * ":"}
295
+ else
296
+ idxs.collect{|idx| [idx_pos(idx), idx_pos_end(idx)] * ":"}
297
+ end
298
+ end
264
299
 
265
300
 
266
301
  def values_at(*list)
data/lib/rbbt/persist.rb CHANGED
@@ -116,7 +116,7 @@ module Persist
116
116
  res
117
117
  when :array
118
118
  res = Open.read(path).split("\n", -1)
119
- res.pop if res.last.empty?
119
+ res.pop if res.last and res.last.empty?
120
120
  res
121
121
  when :marshal
122
122
  Open.open(path) do |stream|
@@ -134,13 +134,19 @@ def self.add_libdir(dir=nil)
134
134
  $__did_once = false
135
135
  end
136
136
 
137
- def self.insist(times = 3, sleep = nil, msg = nil)
137
+ def self.insist(times = 4, sleep = nil, msg = nil)
138
138
  if Array === times
139
139
  sleep_array = times
140
140
  times = sleep_array.length
141
141
  sleep = sleep_array.shift
142
142
  end
143
143
  try = 0
144
+
145
+ if sleep.nil?
146
+ sleep_array = ([0] + [0.001, 0.01, 0.1] * (times / 3)).sort[0..times-1]
147
+ sleep = sleep_array.shift
148
+ end
149
+
144
150
  begin
145
151
  yield
146
152
  rescue TryAgain
@@ -166,7 +172,7 @@ def self.add_libdir(dir=nil)
166
172
 
167
173
  if sleep and try > 0
168
174
  sleep sleep
169
- sleep = sleep_array.shift if sleep_array
175
+ sleep = sleep_array.shift || sleep if sleep_array
170
176
  else
171
177
  Thread.pass
172
178
  end
@@ -388,4 +388,48 @@ module Misc
388
388
  intersect_streams(stream1, stream2,sin, sep)
389
389
  end
390
390
  end
391
+
392
+ def self.index_BED(source, destination, sorted = false)
393
+
394
+ pos_function = Proc.new do |k|
395
+ k.split(":").values_at(1, 2).collect{|i| i.to_i}
396
+ end
397
+ if Open.exists? destination
398
+ Persist::Sharder.new destination, false, "fwt", :pos_function => pos_function do |key|
399
+ key.split(":")[0]
400
+ end
401
+ else
402
+ io = IO === io ? io : Open.open(source)
403
+
404
+ max_size = 0
405
+ nio = Misc.open_pipe do |sin|
406
+ while line = io.gets
407
+ chr, start, eend, id, *rest = line.split("\t")
408
+ l = id.length
409
+ max_size = l if max_size < l
410
+ chr = chr.sub('chr','')
411
+ sin << [chr, start, eend, id] * "\t" << "\n"
412
+ end
413
+ end
414
+
415
+ TmpFile.with_file do |tmpfile|
416
+ Misc.consume_stream(nio, false, tmpfile)
417
+
418
+ value_size = max_size
419
+ destination = destination.find if Path === destination
420
+ sharder = Persist::Sharder.new destination, true, "fwt", :value_size => value_size, :range => true, :pos_function => pos_function do |key|
421
+ key.split(":")[0]
422
+ end
423
+
424
+ TSV.traverse tmpfile, :type => :array, :bar => "Creating BED index for #{Misc.fingerprint source}" do |line|
425
+ chr, start, eend, id, *rest = line.split("\t")
426
+ key = [chr, start, eend] * ":"
427
+ sharder[key] = id
428
+ end
429
+ sharder.read
430
+
431
+ sharder
432
+ end
433
+ end
434
+ end
391
435
  end
@@ -242,7 +242,10 @@ module Open
242
242
 
243
243
  if dir_sub_path_source.nil? and dir_sub_path_target.nil?
244
244
  FileUtils.mkdir_p File.dirname(target) unless File.exist? File.dirname(target)
245
- return FileUtils.mv source, target
245
+ tmp_target = File.join(File.dirname(target), '.tmp_mv.' + File.basename(target))
246
+ FileUtils.mv source, tmp_target
247
+ FileUtils.mv tmp_target, target
248
+ return
246
249
  end
247
250
 
248
251
  if dir_sub_path_source.nil?
@@ -7,6 +7,10 @@ module ComputeDependency
7
7
  dep.extend ComputeDependency
8
8
  dep.compute = value
9
9
  end
10
+
11
+ def canfail?
12
+ compute == :canfail || (Array === compute && compute.include?(:canfail))
13
+ end
10
14
  end
11
15
 
12
16
  class Step
@@ -157,6 +161,12 @@ class Step
157
161
  i[key] = value
158
162
  @info_cache = i
159
163
  Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump(i), :force => true, :lock => false)
164
+ #Misc.insist(([0.01,0.1,1] * 3).sort) do
165
+ Misc.insist do
166
+ Open.open(info_file) do |file|
167
+ INFO_SERIALIAZER.load(file)
168
+ end
169
+ end
160
170
  @info_cache_time = Time.now
161
171
  value
162
172
  end
@@ -357,9 +367,18 @@ class Step
357
367
 
358
368
  def dirty?
359
369
  status = self.status
360
- return true if done? and not status == :done and not status == :noinfo
361
- return true if status == :done and not done?
362
- dirty_files = rec_dependencies.collect{|dependency| dependency.path unless dependency.error? and not dependency.recoverable_error? }.compact.uniq.reject{|path| ! (Path === path) || path.exists?}
370
+
371
+ if done? and not status == :done and not status == :noinfo
372
+ return true
373
+ end
374
+ if status == :done and not done?
375
+ return true
376
+ end
377
+
378
+ dirty_files = rec_dependencies.reject{|dep|
379
+ (dep.path && Open.exists?(dep.path)) || (dep.error? && ! dep.recoverable_error?)
380
+ }
381
+
363
382
  if dirty_files.any?
364
383
  true
365
384
  else
@@ -368,7 +387,7 @@ class Step
368
387
  end
369
388
 
370
389
  def done?
371
- path and File.exist? path
390
+ path and Open.exists? path
372
391
  end
373
392
 
374
393
  def streaming?
@@ -283,7 +283,7 @@ class Step
283
283
 
284
284
  def clean
285
285
  status = []
286
- status << "dirty" if dirty?
286
+ status << "dirty" if done? and dirty?
287
287
  status << "not running" if not done? and not running?
288
288
  status.unshift " " if status.any?
289
289
  Log.medium "Cleaning step: #{path}#{status * " "}"
@@ -80,13 +80,17 @@ class Step
80
80
  return if status == 'streaming' and job.running?
81
81
  end
82
82
 
83
- if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
83
+ if (status == 'error' && (job.recoverable_error? || job.dirty?)) ||
84
+ job.aborted? ||
85
+ (job.done? && job.dirty?) ||
86
+ (status == 'waiting' && ! job.running?)
87
+
84
88
  job.clean
85
89
  end
86
90
 
87
91
  (job.init_info and job.dup_inputs) unless status == 'done' or job.started?
88
92
 
89
- canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
93
+ canfail = ComputeDependency === job && job.canfail?
90
94
  raise DependencyError, job if job.error? and not canfail
91
95
  end
92
96
 
@@ -192,13 +196,18 @@ class Step
192
196
  canfail = rest && rest.include?(:canfail)
193
197
 
194
198
  case type
199
+ when :canfail
200
+ list.each do |step|
201
+ step.produce
202
+ nil
203
+ end
195
204
  when :produce, :no_dup
196
205
  list.each do |step|
197
206
  Misc.insist do
198
207
  begin
199
208
  step.produce
200
209
  rescue RbbtException
201
- raise $! unless canfail
210
+ raise $! unless canfail || step.canfail?
202
211
  rescue Exception
203
212
  step.exception $!
204
213
  if step.recoverable_error?
@@ -225,23 +234,23 @@ class Step
225
234
  begin
226
235
  dep.produce
227
236
  Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
228
- rescue Exception
229
- if canfail
230
- Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
231
- else
232
- raise $!
233
- end
237
+
234
238
  rescue Aborted
235
239
  dep.abort
236
240
  Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
237
241
  raise $!
242
+
238
243
  rescue Exception
239
- dep.exception $!
240
- dep.exception $!
241
- if dep.recoverable_error?
242
- raise $!
244
+ if canfail || dep.canfail?
245
+ Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
243
246
  else
244
- raise StopInsist.new($!)
247
+ Log.warn "NOT Allowing failing of #{dep.path}: #{dep.messages.last}"
248
+ dep.exception $!
249
+ if dep.recoverable_error?
250
+ raise $!
251
+ else
252
+ raise StopInsist.new($!)
253
+ end
245
254
  end
246
255
  end
247
256
  end
@@ -281,13 +290,15 @@ class Step
281
290
  end
282
291
  end
283
292
 
293
+ produced = []
284
294
  dependencies.each do |dep|
285
295
  next unless ComputeDependency === dep
286
296
  if dep.compute == :produce
287
297
  dep.produce
298
+ produced << dep.path
288
299
  end
289
300
  end
290
-
301
+
291
302
  self.dup_inputs
292
303
 
293
304
  required_dep_paths = []
@@ -311,6 +322,7 @@ class Step
311
322
  next unless required_dep_paths.include? step.path
312
323
  if dependencies.include?(step) and step.inputs.flatten.select{|i| Step === i}.any?
313
324
  if ComputeDependency === step
325
+ next if produced.include? step.path
314
326
  compute_last_deps[step.compute] ||= []
315
327
  compute_last_deps[step.compute] << step
316
328
  else
@@ -318,6 +330,7 @@ class Step
318
330
  end
319
331
  else
320
332
  if ComputeDependency === step
333
+ next if produced.include? step.path
321
334
  compute_pre_deps[step.compute] ||= []
322
335
  compute_pre_deps[step.compute] << step
323
336
  else
@@ -352,7 +352,7 @@ class Step
352
352
  end
353
353
  end
354
354
 
355
- update
355
+ update if done?
356
356
 
357
357
  if dofork
358
358
  fork(true) unless started?
@@ -53,9 +53,9 @@ def report_msg(status, name, path)
53
53
  workflow = Log.color(:magenta, parts.pop)
54
54
 
55
55
  if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
56
- status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
56
+ status_msg(status.to_s) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
57
57
  else
58
- status_msg(status) << " " << [workflow, task, path] * " " << "\n"
58
+ status_msg(status.to_s) << " " << [workflow, task, path] * " " << "\n"
59
59
  end
60
60
  end
61
61
 
@@ -36,14 +36,14 @@ class TestFixWidthTable < Test::Unit::TestCase
36
36
  f.add [3,4,0], "test2"
37
37
  f.read
38
38
 
39
- assert_equal 1, f.pos(0)
40
- assert_equal 3, f.pos(1)
41
- assert_equal 2, f.pos_end(0)
42
- assert_equal 4, f.pos_end(1)
43
- assert_equal 0, f.overlap(0)
44
- assert_equal 0, f.overlap(1)
45
- assert_equal "test1", f.value(0)
46
- assert_equal "test2", f.value(1)
39
+ assert_equal 1, f.idx_pos(0)
40
+ assert_equal 3, f.idx_pos(1)
41
+ assert_equal 2, f.idx_pos_end(0)
42
+ assert_equal 4, f.idx_pos_end(1)
43
+ assert_equal 0, f.idx_overlap(0)
44
+ assert_equal 0, f.idx_overlap(1)
45
+ assert_equal "test1", f.idx_value(0)
46
+ assert_equal "test2", f.idx_value(1)
47
47
 
48
48
  end
49
49
 
@@ -104,5 +104,32 @@ g: ____
104
104
  end
105
105
  end
106
106
  end
107
+
108
+
109
+ def test_range_pos
110
+ data =<<-EOF
111
+ ##012345678901234567890
112
+ #ID:Range
113
+ a: ______
114
+ b: ______
115
+ c: _______
116
+ d: ____
117
+ e: ______
118
+ f: ___
119
+ g: ____
120
+ EOF
121
+ TmpFile.with_file(data) do |datafile|
122
+ tsv = load_data(datafile)
123
+ TmpFile.with_file do |filename|
124
+ f = FixWidthTable.new filename, 100, true
125
+ f.add_range tsv
126
+ f.read
127
+
128
+ assert_equal %w(), f.overlaps(0).sort
129
+ assert_equal %w(1:6), f.overlaps(1).sort
130
+ assert_equal %w(1:6:b), f.overlaps(1, true).sort
131
+ end
132
+ end
133
+ end
107
134
  end
108
135
 
@@ -65,4 +65,32 @@ class TestMiscOmics < Test::Unit::TestCase
65
65
  assert_equal Misc.translate_prot_mutation_hgvs2rbbt("p.(A775)ins?"), nil
66
66
  assert_equal Misc.translate_prot_mutation_hgvs2rbbt("p.?del"), nil
67
67
  end
68
+
69
+ def test_index_BED
70
+ text= ""
71
+
72
+ %w(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y MT).each do |chr|
73
+ %w(1 2 3 4 5 6 7 8).each do |i|
74
+ start = i.to_i * 100
75
+ eend = start + 50
76
+ id = [chr, i] * ":"
77
+ text << [chr, start.to_s, eend.to_s, id] * "\t" + "\n"
78
+ end
79
+ end
80
+
81
+
82
+ io = Misc.open_pipe do |sin|
83
+ sin.write text
84
+ end
85
+
86
+ TmpFile.with_file do |dir|
87
+ index = Misc.index_BED(io, dir)
88
+ assert_equal ["1:1"], index["1:120:130"]
89
+ index = Misc.index_BED(io, dir)
90
+ assert_equal ["2:2"], index["2:220:230"]
91
+ end
92
+
93
+
94
+
95
+ end
68
96
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.21.101
4
+ version: 5.21.102
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-29 00:00:00.000000000 Z
11
+ date: 2017-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake