rbbt-util 5.21.101 → 5.21.102

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 404f1b0b70c29b749187e586ad6e213e45c672b1
4
- data.tar.gz: 50248d41198d8dcd8a691a663e574a91f720a96a
3
+ metadata.gz: 552cc55adf8fe2641d93a6c2dd9c7c8894e53a64
4
+ data.tar.gz: f762f96ae17cf69f718c5eb15da13683fda1f99d
5
5
  SHA512:
6
- metadata.gz: 6c91ac17825af87d3ec62e432738b494c602063c6c2a9b871d1e3a952a7b3f87b6177392899aa3f925fbb8f6687853084496bfb7c880413df0dfe68ed43eeedd
7
- data.tar.gz: 3b3097806074852df41f79bbb13a7a593842be215c7f7f81373e7f78791b4512cfcf31ac5f72d3ada3f57879d7eccadc1841eedd05f8d43c62bf4948b782039d
6
+ metadata.gz: 53039d9cb4b69653def0d272fe3e8a6d45b058fc7123e1f79c58279d5b217e2de454c49cacee8353b8d262a603507012b7f8b4fc75e716c805df1a14155dfdf6
7
+ data.tar.gz: 38473fb2bd31541c0c745886c710bc484ce9b2ccd38f92af5bb1b8a85ea78a190401e046df9bf8c7f0cd6526de06aa1683dc1d2f2e9a2e82e5de4d5bd4ca76e6
@@ -84,25 +84,25 @@ class FixWidthTable
84
84
  pos(size - 1)
85
85
  end
86
86
 
87
- def pos(index)
87
+ def idx_pos(index)
88
88
  return nil if index < 0 or index >= size
89
89
  @file.seek(5 + (record_size) * index, IO::SEEK_SET)
90
90
  @file.read(4).unpack("l").first
91
91
  end
92
92
 
93
- def pos_end(index)
93
+ def idx_pos_end(index)
94
94
  return nil if index < 0 or index >= size
95
95
  @file.seek(9 + (record_size) * index, IO::SEEK_SET)
96
96
  @file.read(4).unpack("l").first
97
97
  end
98
98
 
99
- def overlap(index)
99
+ def idx_overlap(index)
100
100
  return nil if index < 0 or index >= size
101
101
  @file.seek(13 + (record_size) * index, IO::SEEK_SET)
102
102
  @file.read(4).unpack("l").first
103
103
  end
104
104
 
105
- def value(index)
105
+ def idx_value(index)
106
106
  return nil if index < 0 or index >= size
107
107
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
108
108
  padding = @file.read(4).unpack("l").first+1
@@ -164,7 +164,7 @@ class FixWidthTable
164
164
 
165
165
  while(upper >= lower) do
166
166
  idx = lower + (upper - lower) / 2
167
- pos_idx = pos(idx)
167
+ pos_idx = idx_pos(idx)
168
168
 
169
169
  case pos <=> pos_idx
170
170
  when 0
@@ -183,7 +183,7 @@ class FixWidthTable
183
183
  idx.to_i
184
184
  end
185
185
 
186
- def get_range(pos)
186
+ def get_range(pos, return_idx = false)
187
187
  case pos
188
188
  when Range
189
189
  r_start = pos.begin
@@ -197,29 +197,40 @@ class FixWidthTable
197
197
  idx = closest(r_start)
198
198
 
199
199
  return [] if idx >= size
200
- return [] if idx <0 and r_start == r_end
200
+ return [] if idx < 0 and r_start == r_end
201
201
 
202
202
  idx = 0 if idx < 0
203
203
 
204
- overlap = overlap(idx)
204
+ overlap = idx_overlap(idx)
205
205
 
206
206
  idx -= overlap unless overlap.nil?
207
207
 
208
208
  values = []
209
- l_start = pos(idx)
210
- l_end = pos_end(idx)
211
- while l_start <= r_end
212
- values << value(idx) if l_end >= r_start
213
- idx += 1
214
- break if idx >= size
215
- l_start = pos(idx)
216
- l_end = pos_end(idx)
209
+ l_start = idx_pos(idx)
210
+ l_end = idx_pos_end(idx)
211
+
212
+ if return_idx
213
+ while l_start <= r_end
214
+ values << idx if l_end >= r_start
215
+ idx += 1
216
+ break if idx >= size
217
+ l_start = idx_pos(idx)
218
+ l_end = idx_pos_end(idx)
219
+ end
220
+ else
221
+ while l_start <= r_end
222
+ values << idx_value(idx) if l_end >= r_start
223
+ idx += 1
224
+ break if idx >= size
225
+ l_start = idx_pos(idx)
226
+ l_end = idx_pos_end(idx)
227
+ end
217
228
  end
218
229
 
219
230
  values
220
231
  end
221
232
 
222
- def get_point(pos)
233
+ def get_point(pos, return_idx = false)
223
234
  if Range === pos
224
235
  r_start = pos.begin
225
236
  r_end = pos.end
@@ -231,23 +242,33 @@ class FixWidthTable
231
242
  idx = closest(r_start)
232
243
 
233
244
  return [] if idx >= size
234
- return [] if idx <0 and r_start == r_end
245
+ return [] if idx < 0 and r_start == r_end
235
246
 
236
247
  idx = 0 if idx < 0
237
248
 
238
- idx += 1 unless pos(idx) >= r_start
249
+ idx += 1 unless idx_pos(idx) >= r_start
239
250
 
240
251
  return [] if idx >= size
241
252
 
242
253
  values = []
243
- l_start = pos(idx)
244
- l_end = pos_end(idx)
245
- while l_start <= r_end
246
- values << value(idx)
247
- idx += 1
248
- break if idx >= size
249
- l_start = pos(idx)
250
- l_end = pos_end(idx)
254
+ l_start = idx_pos(idx)
255
+ l_end = idx_pos_end(idx)
256
+ if return_idx
257
+ while l_start <= r_end
258
+ values << idx
259
+ idx += 1
260
+ break if idx >= size
261
+ l_start = idx_pos(idx)
262
+ l_end = idx_pos_end(idx)
263
+ end
264
+ else
265
+ while l_start <= r_end
266
+ values << idx_value(idx)
267
+ idx += 1
268
+ break if idx >= size
269
+ l_start = idx_pos(idx)
270
+ l_end = idx_pos_end(idx)
271
+ end
251
272
  end
252
273
 
253
274
  values
@@ -261,6 +282,20 @@ class FixWidthTable
261
282
  get_point(pos)
262
283
  end
263
284
  end
285
+
286
+ def overlaps(pos, value = false)
287
+ return [] if size == 0
288
+ idxs = if range
289
+ get_range(pos, true)
290
+ else
291
+ get_point(pos, true)
292
+ end
293
+ if value
294
+ idxs.collect{|idx| [idx_pos(idx), idx_pos_end(idx), idx_value(idx)] * ":"}
295
+ else
296
+ idxs.collect{|idx| [idx_pos(idx), idx_pos_end(idx)] * ":"}
297
+ end
298
+ end
264
299
 
265
300
 
266
301
  def values_at(*list)
data/lib/rbbt/persist.rb CHANGED
@@ -116,7 +116,7 @@ module Persist
116
116
  res
117
117
  when :array
118
118
  res = Open.read(path).split("\n", -1)
119
- res.pop if res.last.empty?
119
+ res.pop if res.last and res.last.empty?
120
120
  res
121
121
  when :marshal
122
122
  Open.open(path) do |stream|
@@ -134,13 +134,19 @@ def self.add_libdir(dir=nil)
134
134
  $__did_once = false
135
135
  end
136
136
 
137
- def self.insist(times = 3, sleep = nil, msg = nil)
137
+ def self.insist(times = 4, sleep = nil, msg = nil)
138
138
  if Array === times
139
139
  sleep_array = times
140
140
  times = sleep_array.length
141
141
  sleep = sleep_array.shift
142
142
  end
143
143
  try = 0
144
+
145
+ if sleep.nil?
146
+ sleep_array = ([0] + [0.001, 0.01, 0.1] * (times / 3)).sort[0..times-1]
147
+ sleep = sleep_array.shift
148
+ end
149
+
144
150
  begin
145
151
  yield
146
152
  rescue TryAgain
@@ -166,7 +172,7 @@ def self.add_libdir(dir=nil)
166
172
 
167
173
  if sleep and try > 0
168
174
  sleep sleep
169
- sleep = sleep_array.shift if sleep_array
175
+ sleep = sleep_array.shift || sleep if sleep_array
170
176
  else
171
177
  Thread.pass
172
178
  end
@@ -388,4 +388,48 @@ module Misc
388
388
  intersect_streams(stream1, stream2,sin, sep)
389
389
  end
390
390
  end
391
+
392
+ def self.index_BED(source, destination, sorted = false)
393
+
394
+ pos_function = Proc.new do |k|
395
+ k.split(":").values_at(1, 2).collect{|i| i.to_i}
396
+ end
397
+ if Open.exists? destination
398
+ Persist::Sharder.new destination, false, "fwt", :pos_function => pos_function do |key|
399
+ key.split(":")[0]
400
+ end
401
+ else
402
+ io = IO === io ? io : Open.open(source)
403
+
404
+ max_size = 0
405
+ nio = Misc.open_pipe do |sin|
406
+ while line = io.gets
407
+ chr, start, eend, id, *rest = line.split("\t")
408
+ l = id.length
409
+ max_size = l if max_size < l
410
+ chr = chr.sub('chr','')
411
+ sin << [chr, start, eend, id] * "\t" << "\n"
412
+ end
413
+ end
414
+
415
+ TmpFile.with_file do |tmpfile|
416
+ Misc.consume_stream(nio, false, tmpfile)
417
+
418
+ value_size = max_size
419
+ destination = destination.find if Path === destination
420
+ sharder = Persist::Sharder.new destination, true, "fwt", :value_size => value_size, :range => true, :pos_function => pos_function do |key|
421
+ key.split(":")[0]
422
+ end
423
+
424
+ TSV.traverse tmpfile, :type => :array, :bar => "Creating BED index for #{Misc.fingerprint source}" do |line|
425
+ chr, start, eend, id, *rest = line.split("\t")
426
+ key = [chr, start, eend] * ":"
427
+ sharder[key] = id
428
+ end
429
+ sharder.read
430
+
431
+ sharder
432
+ end
433
+ end
434
+ end
391
435
  end
@@ -242,7 +242,10 @@ module Open
242
242
 
243
243
  if dir_sub_path_source.nil? and dir_sub_path_target.nil?
244
244
  FileUtils.mkdir_p File.dirname(target) unless File.exist? File.dirname(target)
245
- return FileUtils.mv source, target
245
+ tmp_target = File.join(File.dirname(target), '.tmp_mv.' + File.basename(target))
246
+ FileUtils.mv source, tmp_target
247
+ FileUtils.mv tmp_target, target
248
+ return
246
249
  end
247
250
 
248
251
  if dir_sub_path_source.nil?
@@ -7,6 +7,10 @@ module ComputeDependency
7
7
  dep.extend ComputeDependency
8
8
  dep.compute = value
9
9
  end
10
+
11
+ def canfail?
12
+ compute == :canfail || (Array === compute && compute.include?(:canfail))
13
+ end
10
14
  end
11
15
 
12
16
  class Step
@@ -157,6 +161,12 @@ class Step
157
161
  i[key] = value
158
162
  @info_cache = i
159
163
  Misc.sensiblewrite(info_file, INFO_SERIALIAZER.dump(i), :force => true, :lock => false)
164
+ #Misc.insist(([0.01,0.1,1] * 3).sort) do
165
+ Misc.insist do
166
+ Open.open(info_file) do |file|
167
+ INFO_SERIALIAZER.load(file)
168
+ end
169
+ end
160
170
  @info_cache_time = Time.now
161
171
  value
162
172
  end
@@ -357,9 +367,18 @@ class Step
357
367
 
358
368
  def dirty?
359
369
  status = self.status
360
- return true if done? and not status == :done and not status == :noinfo
361
- return true if status == :done and not done?
362
- dirty_files = rec_dependencies.collect{|dependency| dependency.path unless dependency.error? and not dependency.recoverable_error? }.compact.uniq.reject{|path| ! (Path === path) || path.exists?}
370
+
371
+ if done? and not status == :done and not status == :noinfo
372
+ return true
373
+ end
374
+ if status == :done and not done?
375
+ return true
376
+ end
377
+
378
+ dirty_files = rec_dependencies.reject{|dep|
379
+ (dep.path && Open.exists?(dep.path)) || (dep.error? && ! dep.recoverable_error?)
380
+ }
381
+
363
382
  if dirty_files.any?
364
383
  true
365
384
  else
@@ -368,7 +387,7 @@ class Step
368
387
  end
369
388
 
370
389
  def done?
371
- path and File.exist? path
390
+ path and Open.exists? path
372
391
  end
373
392
 
374
393
  def streaming?
@@ -283,7 +283,7 @@ class Step
283
283
 
284
284
  def clean
285
285
  status = []
286
- status << "dirty" if dirty?
286
+ status << "dirty" if done? and dirty?
287
287
  status << "not running" if not done? and not running?
288
288
  status.unshift " " if status.any?
289
289
  Log.medium "Cleaning step: #{path}#{status * " "}"
@@ -80,13 +80,17 @@ class Step
80
80
  return if status == 'streaming' and job.running?
81
81
  end
82
82
 
83
- if ((status == 'error' || job.aborted?) && job.recoverable_error?) || job.dirty?
83
+ if (status == 'error' && (job.recoverable_error? || job.dirty?)) ||
84
+ job.aborted? ||
85
+ (job.done? && job.dirty?) ||
86
+ (status == 'waiting' && ! job.running?)
87
+
84
88
  job.clean
85
89
  end
86
90
 
87
91
  (job.init_info and job.dup_inputs) unless status == 'done' or job.started?
88
92
 
89
- canfail = ComputeDependency === job and Array === job.compute and job.compute.include? :canfail
93
+ canfail = ComputeDependency === job && job.canfail?
90
94
  raise DependencyError, job if job.error? and not canfail
91
95
  end
92
96
 
@@ -192,13 +196,18 @@ class Step
192
196
  canfail = rest && rest.include?(:canfail)
193
197
 
194
198
  case type
199
+ when :canfail
200
+ list.each do |step|
201
+ step.produce
202
+ nil
203
+ end
195
204
  when :produce, :no_dup
196
205
  list.each do |step|
197
206
  Misc.insist do
198
207
  begin
199
208
  step.produce
200
209
  rescue RbbtException
201
- raise $! unless canfail
210
+ raise $! unless canfail || step.canfail?
202
211
  rescue Exception
203
212
  step.exception $!
204
213
  if step.recoverable_error?
@@ -225,23 +234,23 @@ class Step
225
234
  begin
226
235
  dep.produce
227
236
  Log.warn "Error in bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
228
- rescue Exception
229
- if canfail
230
- Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
231
- else
232
- raise $!
233
- end
237
+
234
238
  rescue Aborted
235
239
  dep.abort
236
240
  Log.warn "Aborted bootstrap dependency #{dep.path}: #{dep.messages.last}" if dep.error? or dep.aborted?
237
241
  raise $!
242
+
238
243
  rescue Exception
239
- dep.exception $!
240
- dep.exception $!
241
- if dep.recoverable_error?
242
- raise $!
244
+ if canfail || dep.canfail?
245
+ Log.warn "Allowing failing of #{dep.path}: #{dep.messages.last}"
243
246
  else
244
- raise StopInsist.new($!)
247
+ Log.warn "NOT Allowing failing of #{dep.path}: #{dep.messages.last}"
248
+ dep.exception $!
249
+ if dep.recoverable_error?
250
+ raise $!
251
+ else
252
+ raise StopInsist.new($!)
253
+ end
245
254
  end
246
255
  end
247
256
  end
@@ -281,13 +290,15 @@ class Step
281
290
  end
282
291
  end
283
292
 
293
+ produced = []
284
294
  dependencies.each do |dep|
285
295
  next unless ComputeDependency === dep
286
296
  if dep.compute == :produce
287
297
  dep.produce
298
+ produced << dep.path
288
299
  end
289
300
  end
290
-
301
+
291
302
  self.dup_inputs
292
303
 
293
304
  required_dep_paths = []
@@ -311,6 +322,7 @@ class Step
311
322
  next unless required_dep_paths.include? step.path
312
323
  if dependencies.include?(step) and step.inputs.flatten.select{|i| Step === i}.any?
313
324
  if ComputeDependency === step
325
+ next if produced.include? step.path
314
326
  compute_last_deps[step.compute] ||= []
315
327
  compute_last_deps[step.compute] << step
316
328
  else
@@ -318,6 +330,7 @@ class Step
318
330
  end
319
331
  else
320
332
  if ComputeDependency === step
333
+ next if produced.include? step.path
321
334
  compute_pre_deps[step.compute] ||= []
322
335
  compute_pre_deps[step.compute] << step
323
336
  else
@@ -352,7 +352,7 @@ class Step
352
352
  end
353
353
  end
354
354
 
355
- update
355
+ update if done?
356
356
 
357
357
  if dofork
358
358
  fork(true) unless started?
@@ -53,9 +53,9 @@ def report_msg(status, name, path)
53
53
  workflow = Log.color(:magenta, parts.pop)
54
54
 
55
55
  if not Open.remote?(path) and (File.exists?(path) and $main_mtime and ($main_mtime - File.mtime(path)) < 0)
56
- status_msg(status) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
56
+ status_msg(status.to_s) << " " << [workflow, task, path] * " " << " (#{Log.color(:red, "Mtime out of sync") })\n"
57
57
  else
58
- status_msg(status) << " " << [workflow, task, path] * " " << "\n"
58
+ status_msg(status.to_s) << " " << [workflow, task, path] * " " << "\n"
59
59
  end
60
60
  end
61
61
 
@@ -36,14 +36,14 @@ class TestFixWidthTable < Test::Unit::TestCase
36
36
  f.add [3,4,0], "test2"
37
37
  f.read
38
38
 
39
- assert_equal 1, f.pos(0)
40
- assert_equal 3, f.pos(1)
41
- assert_equal 2, f.pos_end(0)
42
- assert_equal 4, f.pos_end(1)
43
- assert_equal 0, f.overlap(0)
44
- assert_equal 0, f.overlap(1)
45
- assert_equal "test1", f.value(0)
46
- assert_equal "test2", f.value(1)
39
+ assert_equal 1, f.idx_pos(0)
40
+ assert_equal 3, f.idx_pos(1)
41
+ assert_equal 2, f.idx_pos_end(0)
42
+ assert_equal 4, f.idx_pos_end(1)
43
+ assert_equal 0, f.idx_overlap(0)
44
+ assert_equal 0, f.idx_overlap(1)
45
+ assert_equal "test1", f.idx_value(0)
46
+ assert_equal "test2", f.idx_value(1)
47
47
 
48
48
  end
49
49
 
@@ -104,5 +104,32 @@ g: ____
104
104
  end
105
105
  end
106
106
  end
107
+
108
+
109
+ def test_range_pos
110
+ data =<<-EOF
111
+ ##012345678901234567890
112
+ #ID:Range
113
+ a: ______
114
+ b: ______
115
+ c: _______
116
+ d: ____
117
+ e: ______
118
+ f: ___
119
+ g: ____
120
+ EOF
121
+ TmpFile.with_file(data) do |datafile|
122
+ tsv = load_data(datafile)
123
+ TmpFile.with_file do |filename|
124
+ f = FixWidthTable.new filename, 100, true
125
+ f.add_range tsv
126
+ f.read
127
+
128
+ assert_equal %w(), f.overlaps(0).sort
129
+ assert_equal %w(1:6), f.overlaps(1).sort
130
+ assert_equal %w(1:6:b), f.overlaps(1, true).sort
131
+ end
132
+ end
133
+ end
107
134
  end
108
135
 
@@ -65,4 +65,32 @@ class TestMiscOmics < Test::Unit::TestCase
65
65
  assert_equal Misc.translate_prot_mutation_hgvs2rbbt("p.(A775)ins?"), nil
66
66
  assert_equal Misc.translate_prot_mutation_hgvs2rbbt("p.?del"), nil
67
67
  end
68
+
69
+ def test_index_BED
70
+ text= ""
71
+
72
+ %w(1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 X Y MT).each do |chr|
73
+ %w(1 2 3 4 5 6 7 8).each do |i|
74
+ start = i.to_i * 100
75
+ eend = start + 50
76
+ id = [chr, i] * ":"
77
+ text << [chr, start.to_s, eend.to_s, id] * "\t" + "\n"
78
+ end
79
+ end
80
+
81
+
82
+ io = Misc.open_pipe do |sin|
83
+ sin.write text
84
+ end
85
+
86
+ TmpFile.with_file do |dir|
87
+ index = Misc.index_BED(io, dir)
88
+ assert_equal ["1:1"], index["1:120:130"]
89
+ index = Misc.index_BED(io, dir)
90
+ assert_equal ["2:2"], index["2:220:230"]
91
+ end
92
+
93
+
94
+
95
+ end
68
96
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.21.101
4
+ version: 5.21.102
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-10-29 00:00:00.000000000 Z
11
+ date: 2017-10-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake