rbbt-util 5.14.3 → 5.14.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +10 -3
- data/lib/rbbt/persist/tsv/adapter.rb +1 -1
- data/lib/rbbt/persist/tsv/cdb.rb +10 -7
- data/lib/rbbt/persist/tsv/sharder.rb +7 -0
- data/lib/rbbt/tsv/manipulate.rb +1 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +13 -2
- data/lib/rbbt/tsv/parser.rb +0 -1
- data/lib/rbbt/tsv/stream.rb +2 -0
- data/lib/rbbt/tsv/util.rb +4 -0
- data/lib/rbbt/util/R.rb +4 -0
- data/lib/rbbt/util/log/progress.rb +1 -0
- data/lib/rbbt/util/misc/bgzf.rb +154 -0
- data/lib/rbbt/util/misc/pipes.rb +12 -2
- data/lib/rbbt/util/open.rb +12 -5
- data/lib/rbbt/util/semaphore.rb +2 -2
- data/lib/rbbt/workflow/accessor.rb +2 -1
- data/test/rbbt/persist/test_tsv.rb +8 -6
- data/test/rbbt/test_packed_index.rb +42 -2
- data/test/rbbt/util/misc/test_bgzf.rb +39 -0
- data/test/rbbt/util/test_open.rb +11 -10
- data/test/test_helper.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de7c9170a1eadbc1ee1117b6f1930ad96d94b63b
|
4
|
+
data.tar.gz: 7763cbe7760e56fc1faeba8d117ec7deddd25f88
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e082be066b8392739566438b2c8ce7c8064aae3a29622702af4e34980fafeb53f566c6f80472d2d892862f84cef03b6138462ebe4f33aa97a9941734b97bbf75
|
7
|
+
data.tar.gz: 71e7400483d8ba1bc92bc804ccd96f9bd2ea0ddd906fa63e7599bfa2cdea3ec433a4a5ca95ccb147d4616e5693c7032d2708dc174ea54f0bee286dbf6ab618a7
|
data/lib/rbbt/persist.rb
CHANGED
@@ -132,7 +132,10 @@ module Persist
|
|
132
132
|
end
|
133
133
|
|
134
134
|
def self.save_file(path, type, content, lockfile = nil)
|
135
|
-
|
135
|
+
if content.nil?
|
136
|
+
lockfile.unlock if lockfile and lockfile.locked?
|
137
|
+
return
|
138
|
+
end
|
136
139
|
|
137
140
|
case (type || :marshal).to_sym
|
138
141
|
when :nil
|
@@ -292,8 +295,12 @@ module Persist
|
|
292
295
|
retry
|
293
296
|
rescue Exception
|
294
297
|
Log.medium "Error in persist: #{path}#{Open.exists?(path) ? Log.color(:red, " Erasing") : ""}"
|
295
|
-
|
296
|
-
|
298
|
+
|
299
|
+
begin
|
300
|
+
FileUtils.rm path
|
301
|
+
rescue
|
302
|
+
end if Open.exists? path
|
303
|
+
|
297
304
|
raise $!
|
298
305
|
end
|
299
306
|
end
|
@@ -55,7 +55,7 @@ module Persist
|
|
55
55
|
def write_and_read
|
56
56
|
lock_filename = Persist.persistence_path(persistence_path + '.write', {:dir => TSV.lock_dir})
|
57
57
|
#mutex.synchronize do
|
58
|
-
Misc.lock(lock_filename
|
58
|
+
Misc.lock(lock_filename) do
|
59
59
|
write if closed? or not write?
|
60
60
|
res = begin
|
61
61
|
yield
|
data/lib/rbbt/persist/tsv/cdb.rb
CHANGED
@@ -3,7 +3,7 @@ require 'libcdb'
|
|
3
3
|
module Persist
|
4
4
|
|
5
5
|
module CDBAdapter
|
6
|
-
|
6
|
+
include Persist::TSVAdapter
|
7
7
|
|
8
8
|
def self.open(path, write)
|
9
9
|
write = true unless File.exists? path
|
@@ -19,6 +19,10 @@ module Persist
|
|
19
19
|
database
|
20
20
|
end
|
21
21
|
|
22
|
+
def serializer
|
23
|
+
:clean
|
24
|
+
end
|
25
|
+
|
22
26
|
def include?(k)
|
23
27
|
not write? and super(k)
|
24
28
|
end
|
@@ -38,9 +42,8 @@ module Persist
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def fix_io
|
45
|
+
ddd instance_variable_get(:@io)
|
41
46
|
if instance_variable_get(:@io) != persistence_path
|
42
|
-
#close_read if read?
|
43
|
-
#close_write if write?
|
44
47
|
instance_variable_set(:@io, File.open(persistence_path))
|
45
48
|
end
|
46
49
|
end
|
@@ -129,10 +132,10 @@ module Persist
|
|
129
132
|
|
130
133
|
database = Persist::CDBAdapter.open(path, write)
|
131
134
|
|
132
|
-
unless serializer == :clean
|
133
|
-
|
134
|
-
|
135
|
-
end
|
135
|
+
#unless serializer == :clean
|
136
|
+
# TSV.setup database
|
137
|
+
# database.serializer = serializer if serializer
|
138
|
+
#end
|
136
139
|
|
137
140
|
database
|
138
141
|
end
|
@@ -124,6 +124,13 @@ module Persist
|
|
124
124
|
! write?
|
125
125
|
end
|
126
126
|
|
127
|
+
def range(*args)
|
128
|
+
databases.values.inject([]) do |acc,database|
|
129
|
+
acc.concat database.range(*args) if TokyoCabinet::BDB === database
|
130
|
+
acc
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
127
134
|
def each
|
128
135
|
databases.values.each do |database|
|
129
136
|
database.each do |k,v|
|
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -417,7 +417,7 @@ module TSV
|
|
417
417
|
case
|
418
418
|
when (Array === method and (key == :key or key_field == key))
|
419
419
|
with_unnamed do
|
420
|
-
|
420
|
+
Annotated.purge(method).each{|key|
|
421
421
|
new[key] = self[key] if invert ^ (self.include? key)
|
422
422
|
}
|
423
423
|
end
|
@@ -6,7 +6,7 @@ module TSV
|
|
6
6
|
nil
|
7
7
|
when (defined? Step and Step)
|
8
8
|
obj.result
|
9
|
-
when IO, File
|
9
|
+
when IO, File, Zlib::GzipReader, Bgzf
|
10
10
|
obj
|
11
11
|
when TSV::Dumper
|
12
12
|
obj.stream
|
@@ -24,14 +24,19 @@ module TSV
|
|
24
24
|
else
|
25
25
|
nil
|
26
26
|
end
|
27
|
+
when TSV
|
28
|
+
obj.length
|
27
29
|
when Array, Hash
|
28
30
|
obj.size
|
29
31
|
when File
|
32
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
30
33
|
CMD.cmd("wc -l '#{obj.filename}'").read.to_i
|
31
34
|
when Path
|
35
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
32
36
|
CMD.cmd("wc -l '#{obj.find}'").read.to_i
|
33
37
|
when String
|
34
38
|
if File.exists? obj
|
39
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
35
40
|
CMD.cmd("wc -l '#{obj}'").read.to_i
|
36
41
|
else
|
37
42
|
nil
|
@@ -144,6 +149,12 @@ module TSV
|
|
144
149
|
|
145
150
|
if callback
|
146
151
|
while line = io.gets
|
152
|
+
if line[-1] != "\n"
|
153
|
+
while c = io.getc
|
154
|
+
line << c
|
155
|
+
break if c=="\n"
|
156
|
+
end
|
157
|
+
end
|
147
158
|
begin
|
148
159
|
callback.call yield line.strip
|
149
160
|
ensure
|
@@ -213,7 +224,7 @@ module TSV
|
|
213
224
|
else
|
214
225
|
obj.traverse(options, &block)
|
215
226
|
end
|
216
|
-
when IO, File, StringIO
|
227
|
+
when IO, File, Zlib::GzipReader, Bgzf, StringIO
|
217
228
|
begin
|
218
229
|
if options[:type] == :array
|
219
230
|
traverse_io_array(obj, options, &block)
|
data/lib/rbbt/tsv/parser.rb
CHANGED
data/lib/rbbt/tsv/stream.rb
CHANGED
data/lib/rbbt/tsv/util.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -0,0 +1,154 @@
|
|
1
|
+
|
2
|
+
module Bgzf
|
3
|
+
attr_accessor :data_offset, :compressed_stream, :block_cache_size
|
4
|
+
|
5
|
+
def self.setup(compressed_stream)
|
6
|
+
require 'bio-bgzf'
|
7
|
+
reader = Bio::BGZF::Reader.new(compressed_stream)
|
8
|
+
reader.extend Bgzf
|
9
|
+
reader.compressed_stream = compressed_stream
|
10
|
+
reader.data_offset = 0
|
11
|
+
reader
|
12
|
+
end
|
13
|
+
|
14
|
+
def filename
|
15
|
+
@filename ||= begin
|
16
|
+
compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def closed?
|
21
|
+
@compressed_stream.closed?
|
22
|
+
end
|
23
|
+
|
24
|
+
def close
|
25
|
+
@compressed_stream.close unless @compressed_stream.closed?
|
26
|
+
@access.clear
|
27
|
+
@blocks.clear
|
28
|
+
end
|
29
|
+
|
30
|
+
def seek(off)
|
31
|
+
@data_offset = off
|
32
|
+
end
|
33
|
+
|
34
|
+
def _index
|
35
|
+
@_index ||= begin
|
36
|
+
index = []
|
37
|
+
pos = 0
|
38
|
+
while true do
|
39
|
+
blockdata_offset = tell
|
40
|
+
block = read_block
|
41
|
+
break unless block
|
42
|
+
index << [pos, blockdata_offset]
|
43
|
+
pos += block.length
|
44
|
+
end
|
45
|
+
@block_cache_size = Math.log(index.length).to_i + 1
|
46
|
+
index
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def init
|
51
|
+
_index
|
52
|
+
end
|
53
|
+
|
54
|
+
def closest_page(pos)
|
55
|
+
upper = _index.size - 1
|
56
|
+
lower = 0
|
57
|
+
@_index_pos ||= _index.collect{|v| v.first }
|
58
|
+
|
59
|
+
return -1 if upper < lower
|
60
|
+
|
61
|
+
while(upper >= lower) do
|
62
|
+
idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2)
|
63
|
+
pos_idx = @_index_pos[idx]
|
64
|
+
|
65
|
+
case pos <=> pos_idx
|
66
|
+
when 0
|
67
|
+
break
|
68
|
+
when -1
|
69
|
+
upper = idx - 1
|
70
|
+
when 1
|
71
|
+
lower = idx + 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
@last_idx = idx
|
76
|
+
|
77
|
+
if pos_idx > pos
|
78
|
+
idx = idx - 1
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
idx.to_i
|
83
|
+
end
|
84
|
+
|
85
|
+
def block_offset
|
86
|
+
pos = data_offset
|
87
|
+
i = closest_page(data_offset)
|
88
|
+
page = _index[i][1]
|
89
|
+
offset = pos - _index[i][0]
|
90
|
+
[page, offset]
|
91
|
+
end
|
92
|
+
|
93
|
+
def _purge_cache
|
94
|
+
if @blocks.length > @block_cache_size
|
95
|
+
@access.uniq!
|
96
|
+
oldest = @access.last
|
97
|
+
@blocks.delete oldest
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def _get_block(vo)
|
102
|
+
@blocks ||= {}
|
103
|
+
@access ||= []
|
104
|
+
@access << vo
|
105
|
+
if @blocks.include? vo
|
106
|
+
@blocks[vo]
|
107
|
+
else
|
108
|
+
_purge_cache
|
109
|
+
@blocks[vo] ||= read_block_at vo
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_block
|
114
|
+
block_vo, offset = block_offset
|
115
|
+
block = _get_block block_vo
|
116
|
+
block[offset..-1]
|
117
|
+
end
|
118
|
+
|
119
|
+
def read(size)
|
120
|
+
block = get_block
|
121
|
+
return "" if block.nil? or block.empty?
|
122
|
+
len = block.length
|
123
|
+
if len >= size
|
124
|
+
@data_offset += size
|
125
|
+
return block[0..size-1]
|
126
|
+
else
|
127
|
+
@data_offset += len
|
128
|
+
str = block
|
129
|
+
str << read(size - len)
|
130
|
+
str
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def gets
|
135
|
+
str = nil
|
136
|
+
current = @data_offset
|
137
|
+
while true
|
138
|
+
block = read(1024)
|
139
|
+
break if block.empty?
|
140
|
+
str = "" if str.nil?
|
141
|
+
if i = block.index("\n")
|
142
|
+
str << block[0..i]
|
143
|
+
break
|
144
|
+
else
|
145
|
+
str << block
|
146
|
+
end
|
147
|
+
end
|
148
|
+
return nil if str.nil?
|
149
|
+
|
150
|
+
@data_offset = current + str.length
|
151
|
+
|
152
|
+
str
|
153
|
+
end
|
154
|
+
end
|
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -214,13 +214,23 @@ module Misc
|
|
214
214
|
|
215
215
|
def self.sensiblewrite(path, content = nil, options = {}, &block)
|
216
216
|
force = Misc.process_options options, :force
|
217
|
-
|
217
|
+
|
218
|
+
if Open.exists? path and not force
|
219
|
+
Misc.consume_stream content
|
220
|
+
return
|
221
|
+
end
|
222
|
+
|
218
223
|
lock_options = Misc.pull_keys options.dup, :lock
|
219
224
|
lock_options = lock_options[:lock] if Hash === lock_options[:lock]
|
220
225
|
tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
|
221
226
|
tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})
|
222
227
|
Misc.lock tmp_path_lock, lock_options do
|
223
|
-
|
228
|
+
|
229
|
+
if Open.exists? path and not force
|
230
|
+
Misc.consume_stream content
|
231
|
+
return
|
232
|
+
end
|
233
|
+
|
224
234
|
FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
|
225
235
|
FileUtils.rm_f tmp_path if File.exists? tmp_path
|
226
236
|
begin
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
require 'rbbt/util/tmpfile'
|
3
3
|
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/misc/bgzf'
|
4
5
|
|
5
6
|
require 'zlib'
|
6
7
|
|
@@ -285,13 +286,14 @@ module Open
|
|
285
286
|
|
286
287
|
|
287
288
|
# Decompression
|
289
|
+
|
290
|
+
def self.bgunzip(stream)
|
291
|
+
Bgzf.setup stream
|
292
|
+
end
|
288
293
|
|
289
294
|
def self.gunzip(stream)
|
290
|
-
|
291
|
-
|
292
|
-
else
|
293
|
-
CMD.cmd("gunzip", :pipe => true, :in => stream, :post => proc{stream.force_close if stream.respond_to? :force_close})
|
294
|
-
end
|
295
|
+
#Zlib::GzipReader.new(stream)
|
296
|
+
CMD.cmd('zcat', :in => stream, :pipe => true)
|
295
297
|
end
|
296
298
|
|
297
299
|
def self.unzip(stream)
|
@@ -310,6 +312,10 @@ module Open
|
|
310
312
|
!! (file =~ /\.gz$/)
|
311
313
|
end
|
312
314
|
|
315
|
+
def self.bgzip?(file)
|
316
|
+
!! (file =~ /\.bgz$/)
|
317
|
+
end
|
318
|
+
|
313
319
|
def self.zip?(file)
|
314
320
|
!! (file =~ /\.zip/)
|
315
321
|
end
|
@@ -375,6 +381,7 @@ module Open
|
|
375
381
|
end
|
376
382
|
io = unzip(io) if ((String === url and zip?(url)) and not options[:noz]) or options[:zip]
|
377
383
|
io = gunzip(io) if ((String === url and gzip?(url)) and not options[:noz]) or options[:gzip]
|
384
|
+
io = bgunzip(io) if ((String === url and bgzip?(url)) and not options[:noz]) or options[:bgzip]
|
378
385
|
|
379
386
|
if block_given?
|
380
387
|
begin
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -68,11 +68,11 @@ void post_semaphore(char* name){
|
|
68
68
|
end
|
69
69
|
|
70
70
|
begin
|
71
|
-
Log.
|
71
|
+
Log.low "Creating semaphore (#{ size }): #{file}"
|
72
72
|
RbbtSemaphore.create_semaphore(file, size)
|
73
73
|
yield file
|
74
74
|
ensure
|
75
|
-
Log.
|
75
|
+
Log.low "Removing semaphore #{ file }"
|
76
76
|
RbbtSemaphore.delete_semaphore(file)
|
77
77
|
end
|
78
78
|
end
|
@@ -386,6 +386,7 @@ module Workflow
|
|
386
386
|
def task_info(name)
|
387
387
|
name = name.to_sym
|
388
388
|
task = tasks[name]
|
389
|
+
raise "No '#{name}' task in '#{self.to_s}' Workflow" if task.nil?
|
389
390
|
description = task.description
|
390
391
|
result_description = task.result_description
|
391
392
|
result_type = task.result_type
|
@@ -503,7 +504,7 @@ module Workflow
|
|
503
504
|
when Symbol
|
504
505
|
job(dependency, jobname, inputs)
|
505
506
|
when Proc
|
506
|
-
dependency.call jobname, inputs
|
507
|
+
dependency.call jobname, inputs, real_dependencies
|
507
508
|
end
|
508
509
|
end
|
509
510
|
real_dependencies.flatten.compact
|
@@ -21,9 +21,11 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
21
21
|
Log.info "Testing #{ Term::ANSIColor.red(engine) }"
|
22
22
|
TmpFile.with_file nil, false do |tmp_file|
|
23
23
|
db= nil
|
24
|
+
|
24
25
|
Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
|
25
26
|
db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
|
26
27
|
end
|
28
|
+
|
27
29
|
_test = db.keys.sort{rand}[1..100000]
|
28
30
|
Misc.benchmark(5, "Access #{test.length} random entries") do
|
29
31
|
_test.each do |k| db[k] end
|
@@ -36,32 +38,32 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
39
|
-
def
|
41
|
+
def __test_benchmark_tch
|
40
42
|
engine = "HDB"
|
41
43
|
run_bechmark(tsv_path, engine)
|
42
44
|
end
|
43
45
|
|
44
|
-
def
|
46
|
+
def __test_benchmark_tcb
|
45
47
|
engine = "BDB"
|
46
48
|
run_bechmark(tsv_path, engine)
|
47
49
|
end
|
48
50
|
|
49
|
-
def
|
51
|
+
def __test_benchmark_kch
|
50
52
|
engine = "kch"
|
51
53
|
run_bechmark(tsv_path, engine)
|
52
54
|
end
|
53
55
|
|
54
|
-
def
|
56
|
+
def __test_benchmark_kcb
|
55
57
|
engine = "kct"
|
56
58
|
run_bechmark(tsv_path, engine)
|
57
59
|
end
|
58
60
|
|
59
|
-
def
|
61
|
+
def __test_benchmark_cdb
|
60
62
|
engine = "CDB"
|
61
63
|
run_bechmark(tsv_path, engine)
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
66
|
+
def __test_benchmark_leveldb
|
65
67
|
engine = "LevelDB"
|
66
68
|
run_bechmark(tsv_path, engine)
|
67
69
|
end
|
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
|
|
2
2
|
require 'rbbt/packed_index'
|
3
3
|
|
4
4
|
class TestPackedIndex < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_index
|
6
6
|
|
7
7
|
TmpFile.with_file do |tmpfile|
|
8
8
|
pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
|
@@ -12,7 +12,7 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
12
12
|
pi << nil
|
13
13
|
pi << nil
|
14
14
|
pi.close
|
15
|
-
pi = PackedIndex.new
|
15
|
+
pi = PackedIndex.new(tmpfile, false)
|
16
16
|
Misc.benchmark(1000) do
|
17
17
|
100.times do |i|
|
18
18
|
assert_equal i, pi[i][0]
|
@@ -21,7 +21,47 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
21
21
|
end
|
22
22
|
assert_equal nil, pi[100]
|
23
23
|
assert_equal nil, pi[101]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_bgzip
|
28
|
+
|
29
|
+
size = 1000000
|
30
|
+
density = 0.1
|
31
|
+
|
32
|
+
access = []
|
33
|
+
(size * density).to_i.times do
|
34
|
+
access << rand(size-1) + 1
|
35
|
+
end
|
36
|
+
access.sort!
|
37
|
+
access.uniq!
|
38
|
+
|
39
|
+
TmpFile.with_file do |tmpfile|
|
40
|
+
pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
|
41
|
+
size.times do |i|
|
42
|
+
pi << [i, i+2, i.to_s * 10, rand, rand, rand, rand, rand]
|
43
|
+
end
|
44
|
+
pi << nil
|
45
|
+
pi << nil
|
46
|
+
pi.close
|
24
47
|
|
48
|
+
pi = PackedIndex.new(tmpfile, false)
|
49
|
+
Misc.benchmark do
|
50
|
+
access.each do |point|
|
51
|
+
assert_equal point+2, pi[point][1]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
`bgzip #{tmpfile} `
|
56
|
+
`mv #{tmpfile}.gz #{tmpfile}.bgz`
|
57
|
+
|
58
|
+
pi = PackedIndex.new(tmpfile + '.bgz', false)
|
59
|
+
pi[0]
|
60
|
+
Misc.benchmark do
|
61
|
+
access.each do |point|
|
62
|
+
assert_equal point+2, pi[point][1]
|
63
|
+
end
|
64
|
+
end
|
25
65
|
end
|
26
66
|
end
|
27
67
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/util/misc/bgzf'
|
3
|
+
|
4
|
+
class TestBgzf < Test::Unit::TestCase
|
5
|
+
def _test_Bgzf
|
6
|
+
content = "1234567890" * 1000000
|
7
|
+
TmpFile.with_file(content) do |file|
|
8
|
+
compressed = file + '.gz'
|
9
|
+
`bgzip #{file} -c > #{compressed}`
|
10
|
+
stream = Bgzf.setup File.open(compressed)
|
11
|
+
stream.seek 500003
|
12
|
+
assert_equal "4567", stream.read(4)
|
13
|
+
assert_equal "89", stream.read(2)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_tsv
|
18
|
+
require 'rbbt/tsv'
|
19
|
+
TmpFile.with_file(datafile_test(:identifiers).read) do |file|
|
20
|
+
Misc.benchmark do
|
21
|
+
tsv = TSV.open(Open.open(file))
|
22
|
+
end
|
23
|
+
compressed = file + '.bgz'
|
24
|
+
|
25
|
+
`bgzip #{file} -c > #{compressed}`
|
26
|
+
stream = Bgzf.setup File.open(compressed)
|
27
|
+
Misc.benchmark do
|
28
|
+
tsv = TSV.open(stream)
|
29
|
+
end
|
30
|
+
|
31
|
+
`gzip #{file}`
|
32
|
+
stream = Open.open(file + '.gz')
|
33
|
+
Misc.benchmark do
|
34
|
+
tsv = TSV.open(stream)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
data/test/rbbt/util/test_open.rb
CHANGED
@@ -7,11 +7,11 @@ require 'iconv'
|
|
7
7
|
|
8
8
|
class TestOpen < Test::Unit::TestCase
|
9
9
|
|
10
|
-
def
|
10
|
+
def _test_wget
|
11
11
|
assert(Misc.fixutf8(Open.wget('http://google.com', :quiet => true).read) =~ /html/)
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def _test_nice
|
15
15
|
nice = 0.5
|
16
16
|
|
17
17
|
t = Time.now
|
@@ -24,16 +24,16 @@ class TestOpen < Test::Unit::TestCase
|
|
24
24
|
assert(Time.now - t + 0.5 >= nice)
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
27
|
+
def _test_remote?
|
28
28
|
assert(Open.remote?('http://google.com'))
|
29
29
|
assert(! Open.remote?('~/.bashrc'))
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
32
|
+
def _test_open
|
33
33
|
assert(Open.read('http://google.com', :quiet => true) =~ /html/)
|
34
34
|
end
|
35
35
|
|
36
|
-
def
|
36
|
+
def _test_read
|
37
37
|
content =<<-EOF
|
38
38
|
1
|
39
39
|
2
|
@@ -48,7 +48,7 @@ class TestOpen < Test::Unit::TestCase
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def
|
51
|
+
def _test_read_grep
|
52
52
|
content =<<-EOF
|
53
53
|
1
|
54
54
|
2
|
@@ -68,7 +68,7 @@ class TestOpen < Test::Unit::TestCase
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
def
|
71
|
+
def _test_read_grep_invert
|
72
72
|
content =<<-EOF
|
73
73
|
1
|
74
74
|
2
|
@@ -98,13 +98,14 @@ class TestOpen < Test::Unit::TestCase
|
|
98
98
|
4
|
99
99
|
EOF
|
100
100
|
TmpFile.with_file(content) do |file|
|
101
|
-
`
|
101
|
+
`bgzip #{file}`
|
102
102
|
assert_equal(content, Open.read(file + '.gz'))
|
103
|
+
puts content
|
103
104
|
FileUtils.rm file + '.gz'
|
104
105
|
end
|
105
106
|
end
|
106
107
|
|
107
|
-
def
|
108
|
+
def _test_repo_dir
|
108
109
|
file1 = "TEST"
|
109
110
|
file2 = "TEST" * 1000
|
110
111
|
TmpFile.with_file do |tmpdir|
|
@@ -132,7 +133,7 @@ class TestOpen < Test::Unit::TestCase
|
|
132
133
|
end
|
133
134
|
end
|
134
135
|
|
135
|
-
def
|
136
|
+
def _test_repo_dir
|
136
137
|
TmpFile.with_file do |tmpdir|
|
137
138
|
tmpdir = "/home/mvazquezg/tmp/repo_dir"
|
138
139
|
repo = File.join(tmpdir, 'repo')
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -185,6 +185,7 @@ files:
|
|
185
185
|
- lib/rbbt/util/log/progress/report.rb
|
186
186
|
- lib/rbbt/util/log/progress/util.rb
|
187
187
|
- lib/rbbt/util/misc.rb
|
188
|
+
- lib/rbbt/util/misc/bgzf.rb
|
188
189
|
- lib/rbbt/util/misc/concurrent_stream.rb
|
189
190
|
- lib/rbbt/util/misc/development.rb
|
190
191
|
- lib/rbbt/util/misc/exceptions.rb
|
@@ -320,6 +321,7 @@ files:
|
|
320
321
|
- test/rbbt/util/concurrency/test_processes.rb
|
321
322
|
- test/rbbt/util/concurrency/test_threads.rb
|
322
323
|
- test/rbbt/util/log/test_progress.rb
|
324
|
+
- test/rbbt/util/misc/test_bgzf.rb
|
323
325
|
- test/rbbt/util/misc/test_lock.rb
|
324
326
|
- test/rbbt/util/misc/test_pipes.rb
|
325
327
|
- test/rbbt/util/simpleopt/test_get.rb
|
@@ -379,6 +381,7 @@ test_files:
|
|
379
381
|
- test/rbbt/util/test_log.rb
|
380
382
|
- test/rbbt/util/test_open.rb
|
381
383
|
- test/rbbt/util/misc/test_lock.rb
|
384
|
+
- test/rbbt/util/misc/test_bgzf.rb
|
382
385
|
- test/rbbt/util/misc/test_pipes.rb
|
383
386
|
- test/rbbt/util/test_concurrency.rb
|
384
387
|
- test/rbbt/util/test_R.rb
|