rbbt-util 5.14.3 → 5.14.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/rbbt/persist.rb +10 -3
- data/lib/rbbt/persist/tsv/adapter.rb +1 -1
- data/lib/rbbt/persist/tsv/cdb.rb +10 -7
- data/lib/rbbt/persist/tsv/sharder.rb +7 -0
- data/lib/rbbt/tsv/manipulate.rb +1 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +13 -2
- data/lib/rbbt/tsv/parser.rb +0 -1
- data/lib/rbbt/tsv/stream.rb +2 -0
- data/lib/rbbt/tsv/util.rb +4 -0
- data/lib/rbbt/util/R.rb +4 -0
- data/lib/rbbt/util/log/progress.rb +1 -0
- data/lib/rbbt/util/misc/bgzf.rb +154 -0
- data/lib/rbbt/util/misc/pipes.rb +12 -2
- data/lib/rbbt/util/open.rb +12 -5
- data/lib/rbbt/util/semaphore.rb +2 -2
- data/lib/rbbt/workflow/accessor.rb +2 -1
- data/test/rbbt/persist/test_tsv.rb +8 -6
- data/test/rbbt/test_packed_index.rb +42 -2
- data/test/rbbt/util/misc/test_bgzf.rb +39 -0
- data/test/rbbt/util/test_open.rb +11 -10
- data/test/test_helper.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: de7c9170a1eadbc1ee1117b6f1930ad96d94b63b
|
4
|
+
data.tar.gz: 7763cbe7760e56fc1faeba8d117ec7deddd25f88
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e082be066b8392739566438b2c8ce7c8064aae3a29622702af4e34980fafeb53f566c6f80472d2d892862f84cef03b6138462ebe4f33aa97a9941734b97bbf75
|
7
|
+
data.tar.gz: 71e7400483d8ba1bc92bc804ccd96f9bd2ea0ddd906fa63e7599bfa2cdea3ec433a4a5ca95ccb147d4616e5693c7032d2708dc174ea54f0bee286dbf6ab618a7
|
data/lib/rbbt/persist.rb
CHANGED
@@ -132,7 +132,10 @@ module Persist
|
|
132
132
|
end
|
133
133
|
|
134
134
|
def self.save_file(path, type, content, lockfile = nil)
|
135
|
-
|
135
|
+
if content.nil?
|
136
|
+
lockfile.unlock if lockfile and lockfile.locked?
|
137
|
+
return
|
138
|
+
end
|
136
139
|
|
137
140
|
case (type || :marshal).to_sym
|
138
141
|
when :nil
|
@@ -292,8 +295,12 @@ module Persist
|
|
292
295
|
retry
|
293
296
|
rescue Exception
|
294
297
|
Log.medium "Error in persist: #{path}#{Open.exists?(path) ? Log.color(:red, " Erasing") : ""}"
|
295
|
-
|
296
|
-
|
298
|
+
|
299
|
+
begin
|
300
|
+
FileUtils.rm path
|
301
|
+
rescue
|
302
|
+
end if Open.exists? path
|
303
|
+
|
297
304
|
raise $!
|
298
305
|
end
|
299
306
|
end
|
@@ -55,7 +55,7 @@ module Persist
|
|
55
55
|
def write_and_read
|
56
56
|
lock_filename = Persist.persistence_path(persistence_path + '.write', {:dir => TSV.lock_dir})
|
57
57
|
#mutex.synchronize do
|
58
|
-
Misc.lock(lock_filename
|
58
|
+
Misc.lock(lock_filename) do
|
59
59
|
write if closed? or not write?
|
60
60
|
res = begin
|
61
61
|
yield
|
data/lib/rbbt/persist/tsv/cdb.rb
CHANGED
@@ -3,7 +3,7 @@ require 'libcdb'
|
|
3
3
|
module Persist
|
4
4
|
|
5
5
|
module CDBAdapter
|
6
|
-
|
6
|
+
include Persist::TSVAdapter
|
7
7
|
|
8
8
|
def self.open(path, write)
|
9
9
|
write = true unless File.exists? path
|
@@ -19,6 +19,10 @@ module Persist
|
|
19
19
|
database
|
20
20
|
end
|
21
21
|
|
22
|
+
def serializer
|
23
|
+
:clean
|
24
|
+
end
|
25
|
+
|
22
26
|
def include?(k)
|
23
27
|
not write? and super(k)
|
24
28
|
end
|
@@ -38,9 +42,8 @@ module Persist
|
|
38
42
|
end
|
39
43
|
|
40
44
|
def fix_io
|
45
|
+
ddd instance_variable_get(:@io)
|
41
46
|
if instance_variable_get(:@io) != persistence_path
|
42
|
-
#close_read if read?
|
43
|
-
#close_write if write?
|
44
47
|
instance_variable_set(:@io, File.open(persistence_path))
|
45
48
|
end
|
46
49
|
end
|
@@ -129,10 +132,10 @@ module Persist
|
|
129
132
|
|
130
133
|
database = Persist::CDBAdapter.open(path, write)
|
131
134
|
|
132
|
-
unless serializer == :clean
|
133
|
-
|
134
|
-
|
135
|
-
end
|
135
|
+
#unless serializer == :clean
|
136
|
+
# TSV.setup database
|
137
|
+
# database.serializer = serializer if serializer
|
138
|
+
#end
|
136
139
|
|
137
140
|
database
|
138
141
|
end
|
@@ -124,6 +124,13 @@ module Persist
|
|
124
124
|
! write?
|
125
125
|
end
|
126
126
|
|
127
|
+
def range(*args)
|
128
|
+
databases.values.inject([]) do |acc,database|
|
129
|
+
acc.concat database.range(*args) if TokyoCabinet::BDB === database
|
130
|
+
acc
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
127
134
|
def each
|
128
135
|
databases.values.each do |database|
|
129
136
|
database.each do |k,v|
|
data/lib/rbbt/tsv/manipulate.rb
CHANGED
@@ -417,7 +417,7 @@ module TSV
|
|
417
417
|
case
|
418
418
|
when (Array === method and (key == :key or key_field == key))
|
419
419
|
with_unnamed do
|
420
|
-
|
420
|
+
Annotated.purge(method).each{|key|
|
421
421
|
new[key] = self[key] if invert ^ (self.include? key)
|
422
422
|
}
|
423
423
|
end
|
@@ -6,7 +6,7 @@ module TSV
|
|
6
6
|
nil
|
7
7
|
when (defined? Step and Step)
|
8
8
|
obj.result
|
9
|
-
when IO, File
|
9
|
+
when IO, File, Zlib::GzipReader, Bgzf
|
10
10
|
obj
|
11
11
|
when TSV::Dumper
|
12
12
|
obj.stream
|
@@ -24,14 +24,19 @@ module TSV
|
|
24
24
|
else
|
25
25
|
nil
|
26
26
|
end
|
27
|
+
when TSV
|
28
|
+
obj.length
|
27
29
|
when Array, Hash
|
28
30
|
obj.size
|
29
31
|
when File
|
32
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
30
33
|
CMD.cmd("wc -l '#{obj.filename}'").read.to_i
|
31
34
|
when Path
|
35
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
32
36
|
CMD.cmd("wc -l '#{obj.find}'").read.to_i
|
33
37
|
when String
|
34
38
|
if File.exists? obj
|
39
|
+
return nil if Misc.gzip?(file) or Misc.bgzip?(file)
|
35
40
|
CMD.cmd("wc -l '#{obj}'").read.to_i
|
36
41
|
else
|
37
42
|
nil
|
@@ -144,6 +149,12 @@ module TSV
|
|
144
149
|
|
145
150
|
if callback
|
146
151
|
while line = io.gets
|
152
|
+
if line[-1] != "\n"
|
153
|
+
while c = io.getc
|
154
|
+
line << c
|
155
|
+
break if c=="\n"
|
156
|
+
end
|
157
|
+
end
|
147
158
|
begin
|
148
159
|
callback.call yield line.strip
|
149
160
|
ensure
|
@@ -213,7 +224,7 @@ module TSV
|
|
213
224
|
else
|
214
225
|
obj.traverse(options, &block)
|
215
226
|
end
|
216
|
-
when IO, File, StringIO
|
227
|
+
when IO, File, Zlib::GzipReader, Bgzf, StringIO
|
217
228
|
begin
|
218
229
|
if options[:type] == :array
|
219
230
|
traverse_io_array(obj, options, &block)
|
data/lib/rbbt/tsv/parser.rb
CHANGED
data/lib/rbbt/tsv/stream.rb
CHANGED
data/lib/rbbt/tsv/util.rb
CHANGED
data/lib/rbbt/util/R.rb
CHANGED
@@ -0,0 +1,154 @@
|
|
1
|
+
|
2
|
+
module Bgzf
|
3
|
+
attr_accessor :data_offset, :compressed_stream, :block_cache_size
|
4
|
+
|
5
|
+
def self.setup(compressed_stream)
|
6
|
+
require 'bio-bgzf'
|
7
|
+
reader = Bio::BGZF::Reader.new(compressed_stream)
|
8
|
+
reader.extend Bgzf
|
9
|
+
reader.compressed_stream = compressed_stream
|
10
|
+
reader.data_offset = 0
|
11
|
+
reader
|
12
|
+
end
|
13
|
+
|
14
|
+
def filename
|
15
|
+
@filename ||= begin
|
16
|
+
compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
def closed?
|
21
|
+
@compressed_stream.closed?
|
22
|
+
end
|
23
|
+
|
24
|
+
def close
|
25
|
+
@compressed_stream.close unless @compressed_stream.closed?
|
26
|
+
@access.clear
|
27
|
+
@blocks.clear
|
28
|
+
end
|
29
|
+
|
30
|
+
def seek(off)
|
31
|
+
@data_offset = off
|
32
|
+
end
|
33
|
+
|
34
|
+
def _index
|
35
|
+
@_index ||= begin
|
36
|
+
index = []
|
37
|
+
pos = 0
|
38
|
+
while true do
|
39
|
+
blockdata_offset = tell
|
40
|
+
block = read_block
|
41
|
+
break unless block
|
42
|
+
index << [pos, blockdata_offset]
|
43
|
+
pos += block.length
|
44
|
+
end
|
45
|
+
@block_cache_size = Math.log(index.length).to_i + 1
|
46
|
+
index
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def init
|
51
|
+
_index
|
52
|
+
end
|
53
|
+
|
54
|
+
def closest_page(pos)
|
55
|
+
upper = _index.size - 1
|
56
|
+
lower = 0
|
57
|
+
@_index_pos ||= _index.collect{|v| v.first }
|
58
|
+
|
59
|
+
return -1 if upper < lower
|
60
|
+
|
61
|
+
while(upper >= lower) do
|
62
|
+
idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2)
|
63
|
+
pos_idx = @_index_pos[idx]
|
64
|
+
|
65
|
+
case pos <=> pos_idx
|
66
|
+
when 0
|
67
|
+
break
|
68
|
+
when -1
|
69
|
+
upper = idx - 1
|
70
|
+
when 1
|
71
|
+
lower = idx + 1
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
@last_idx = idx
|
76
|
+
|
77
|
+
if pos_idx > pos
|
78
|
+
idx = idx - 1
|
79
|
+
end
|
80
|
+
|
81
|
+
|
82
|
+
idx.to_i
|
83
|
+
end
|
84
|
+
|
85
|
+
def block_offset
|
86
|
+
pos = data_offset
|
87
|
+
i = closest_page(data_offset)
|
88
|
+
page = _index[i][1]
|
89
|
+
offset = pos - _index[i][0]
|
90
|
+
[page, offset]
|
91
|
+
end
|
92
|
+
|
93
|
+
def _purge_cache
|
94
|
+
if @blocks.length > @block_cache_size
|
95
|
+
@access.uniq!
|
96
|
+
oldest = @access.last
|
97
|
+
@blocks.delete oldest
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
def _get_block(vo)
|
102
|
+
@blocks ||= {}
|
103
|
+
@access ||= []
|
104
|
+
@access << vo
|
105
|
+
if @blocks.include? vo
|
106
|
+
@blocks[vo]
|
107
|
+
else
|
108
|
+
_purge_cache
|
109
|
+
@blocks[vo] ||= read_block_at vo
|
110
|
+
end
|
111
|
+
end
|
112
|
+
|
113
|
+
def get_block
|
114
|
+
block_vo, offset = block_offset
|
115
|
+
block = _get_block block_vo
|
116
|
+
block[offset..-1]
|
117
|
+
end
|
118
|
+
|
119
|
+
def read(size)
|
120
|
+
block = get_block
|
121
|
+
return "" if block.nil? or block.empty?
|
122
|
+
len = block.length
|
123
|
+
if len >= size
|
124
|
+
@data_offset += size
|
125
|
+
return block[0..size-1]
|
126
|
+
else
|
127
|
+
@data_offset += len
|
128
|
+
str = block
|
129
|
+
str << read(size - len)
|
130
|
+
str
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
def gets
|
135
|
+
str = nil
|
136
|
+
current = @data_offset
|
137
|
+
while true
|
138
|
+
block = read(1024)
|
139
|
+
break if block.empty?
|
140
|
+
str = "" if str.nil?
|
141
|
+
if i = block.index("\n")
|
142
|
+
str << block[0..i]
|
143
|
+
break
|
144
|
+
else
|
145
|
+
str << block
|
146
|
+
end
|
147
|
+
end
|
148
|
+
return nil if str.nil?
|
149
|
+
|
150
|
+
@data_offset = current + str.length
|
151
|
+
|
152
|
+
str
|
153
|
+
end
|
154
|
+
end
|
data/lib/rbbt/util/misc/pipes.rb
CHANGED
@@ -214,13 +214,23 @@ module Misc
|
|
214
214
|
|
215
215
|
def self.sensiblewrite(path, content = nil, options = {}, &block)
|
216
216
|
force = Misc.process_options options, :force
|
217
|
-
|
217
|
+
|
218
|
+
if Open.exists? path and not force
|
219
|
+
Misc.consume_stream content
|
220
|
+
return
|
221
|
+
end
|
222
|
+
|
218
223
|
lock_options = Misc.pull_keys options.dup, :lock
|
219
224
|
lock_options = lock_options[:lock] if Hash === lock_options[:lock]
|
220
225
|
tmp_path = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_dir})
|
221
226
|
tmp_path_lock = Persist.persistence_path(path, {:dir => Misc.sensiblewrite_lock_dir})
|
222
227
|
Misc.lock tmp_path_lock, lock_options do
|
223
|
-
|
228
|
+
|
229
|
+
if Open.exists? path and not force
|
230
|
+
Misc.consume_stream content
|
231
|
+
return
|
232
|
+
end
|
233
|
+
|
224
234
|
FileUtils.mkdir_p File.dirname(tmp_path) unless File.directory? File.dirname(tmp_path)
|
225
235
|
FileUtils.rm_f tmp_path if File.exists? tmp_path
|
226
236
|
begin
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rbbt/util/cmd'
|
2
2
|
require 'rbbt/util/tmpfile'
|
3
3
|
require 'rbbt/util/misc'
|
4
|
+
require 'rbbt/util/misc/bgzf'
|
4
5
|
|
5
6
|
require 'zlib'
|
6
7
|
|
@@ -285,13 +286,14 @@ module Open
|
|
285
286
|
|
286
287
|
|
287
288
|
# Decompression
|
289
|
+
|
290
|
+
def self.bgunzip(stream)
|
291
|
+
Bgzf.setup stream
|
292
|
+
end
|
288
293
|
|
289
294
|
def self.gunzip(stream)
|
290
|
-
|
291
|
-
|
292
|
-
else
|
293
|
-
CMD.cmd("gunzip", :pipe => true, :in => stream, :post => proc{stream.force_close if stream.respond_to? :force_close})
|
294
|
-
end
|
295
|
+
#Zlib::GzipReader.new(stream)
|
296
|
+
CMD.cmd('zcat', :in => stream, :pipe => true)
|
295
297
|
end
|
296
298
|
|
297
299
|
def self.unzip(stream)
|
@@ -310,6 +312,10 @@ module Open
|
|
310
312
|
!! (file =~ /\.gz$/)
|
311
313
|
end
|
312
314
|
|
315
|
+
def self.bgzip?(file)
|
316
|
+
!! (file =~ /\.bgz$/)
|
317
|
+
end
|
318
|
+
|
313
319
|
def self.zip?(file)
|
314
320
|
!! (file =~ /\.zip/)
|
315
321
|
end
|
@@ -375,6 +381,7 @@ module Open
|
|
375
381
|
end
|
376
382
|
io = unzip(io) if ((String === url and zip?(url)) and not options[:noz]) or options[:zip]
|
377
383
|
io = gunzip(io) if ((String === url and gzip?(url)) and not options[:noz]) or options[:gzip]
|
384
|
+
io = bgunzip(io) if ((String === url and bgzip?(url)) and not options[:noz]) or options[:bgzip]
|
378
385
|
|
379
386
|
if block_given?
|
380
387
|
begin
|
data/lib/rbbt/util/semaphore.rb
CHANGED
@@ -68,11 +68,11 @@ void post_semaphore(char* name){
|
|
68
68
|
end
|
69
69
|
|
70
70
|
begin
|
71
|
-
Log.
|
71
|
+
Log.low "Creating semaphore (#{ size }): #{file}"
|
72
72
|
RbbtSemaphore.create_semaphore(file, size)
|
73
73
|
yield file
|
74
74
|
ensure
|
75
|
-
Log.
|
75
|
+
Log.low "Removing semaphore #{ file }"
|
76
76
|
RbbtSemaphore.delete_semaphore(file)
|
77
77
|
end
|
78
78
|
end
|
@@ -386,6 +386,7 @@ module Workflow
|
|
386
386
|
def task_info(name)
|
387
387
|
name = name.to_sym
|
388
388
|
task = tasks[name]
|
389
|
+
raise "No '#{name}' task in '#{self.to_s}' Workflow" if task.nil?
|
389
390
|
description = task.description
|
390
391
|
result_description = task.result_description
|
391
392
|
result_type = task.result_type
|
@@ -503,7 +504,7 @@ module Workflow
|
|
503
504
|
when Symbol
|
504
505
|
job(dependency, jobname, inputs)
|
505
506
|
when Proc
|
506
|
-
dependency.call jobname, inputs
|
507
|
+
dependency.call jobname, inputs, real_dependencies
|
507
508
|
end
|
508
509
|
end
|
509
510
|
real_dependencies.flatten.compact
|
@@ -21,9 +21,11 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
21
21
|
Log.info "Testing #{ Term::ANSIColor.red(engine) }"
|
22
22
|
TmpFile.with_file nil, false do |tmp_file|
|
23
23
|
db= nil
|
24
|
+
|
24
25
|
Misc.benchmark(1, "Build database with #{MAX - 2} entries") do
|
25
26
|
db = TSV.open(file, :fields => [1], :persist => true, :persist_engine => engine, :persist_dir => tmp_file, :type => :single, :unnamed => true)
|
26
27
|
end
|
28
|
+
|
27
29
|
_test = db.keys.sort{rand}[1..100000]
|
28
30
|
Misc.benchmark(5, "Access #{test.length} random entries") do
|
29
31
|
_test.each do |k| db[k] end
|
@@ -36,32 +38,32 @@ class TestPersistTSV < Test::Unit::TestCase
|
|
36
38
|
end
|
37
39
|
end
|
38
40
|
|
39
|
-
def
|
41
|
+
def __test_benchmark_tch
|
40
42
|
engine = "HDB"
|
41
43
|
run_bechmark(tsv_path, engine)
|
42
44
|
end
|
43
45
|
|
44
|
-
def
|
46
|
+
def __test_benchmark_tcb
|
45
47
|
engine = "BDB"
|
46
48
|
run_bechmark(tsv_path, engine)
|
47
49
|
end
|
48
50
|
|
49
|
-
def
|
51
|
+
def __test_benchmark_kch
|
50
52
|
engine = "kch"
|
51
53
|
run_bechmark(tsv_path, engine)
|
52
54
|
end
|
53
55
|
|
54
|
-
def
|
56
|
+
def __test_benchmark_kcb
|
55
57
|
engine = "kct"
|
56
58
|
run_bechmark(tsv_path, engine)
|
57
59
|
end
|
58
60
|
|
59
|
-
def
|
61
|
+
def __test_benchmark_cdb
|
60
62
|
engine = "CDB"
|
61
63
|
run_bechmark(tsv_path, engine)
|
62
64
|
end
|
63
65
|
|
64
|
-
def
|
66
|
+
def __test_benchmark_leveldb
|
65
67
|
engine = "LevelDB"
|
66
68
|
run_bechmark(tsv_path, engine)
|
67
69
|
end
|
@@ -2,7 +2,7 @@ require File.join(File.expand_path(File.dirname(__FILE__)), '..', 'test_helper.r
|
|
2
2
|
require 'rbbt/packed_index'
|
3
3
|
|
4
4
|
class TestPackedIndex < Test::Unit::TestCase
|
5
|
-
def
|
5
|
+
def _test_index
|
6
6
|
|
7
7
|
TmpFile.with_file do |tmpfile|
|
8
8
|
pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
|
@@ -12,7 +12,7 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
12
12
|
pi << nil
|
13
13
|
pi << nil
|
14
14
|
pi.close
|
15
|
-
pi = PackedIndex.new
|
15
|
+
pi = PackedIndex.new(tmpfile, false)
|
16
16
|
Misc.benchmark(1000) do
|
17
17
|
100.times do |i|
|
18
18
|
assert_equal i, pi[i][0]
|
@@ -21,7 +21,47 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
21
21
|
end
|
22
22
|
assert_equal nil, pi[100]
|
23
23
|
assert_equal nil, pi[101]
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_bgzip
|
28
|
+
|
29
|
+
size = 1000000
|
30
|
+
density = 0.1
|
31
|
+
|
32
|
+
access = []
|
33
|
+
(size * density).to_i.times do
|
34
|
+
access << rand(size-1) + 1
|
35
|
+
end
|
36
|
+
access.sort!
|
37
|
+
access.uniq!
|
38
|
+
|
39
|
+
TmpFile.with_file do |tmpfile|
|
40
|
+
pi = PackedIndex.new tmpfile, true, %w(i i 23s f f f f f)
|
41
|
+
size.times do |i|
|
42
|
+
pi << [i, i+2, i.to_s * 10, rand, rand, rand, rand, rand]
|
43
|
+
end
|
44
|
+
pi << nil
|
45
|
+
pi << nil
|
46
|
+
pi.close
|
24
47
|
|
48
|
+
pi = PackedIndex.new(tmpfile, false)
|
49
|
+
Misc.benchmark do
|
50
|
+
access.each do |point|
|
51
|
+
assert_equal point+2, pi[point][1]
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
`bgzip #{tmpfile} `
|
56
|
+
`mv #{tmpfile}.gz #{tmpfile}.bgz`
|
57
|
+
|
58
|
+
pi = PackedIndex.new(tmpfile + '.bgz', false)
|
59
|
+
pi[0]
|
60
|
+
Misc.benchmark do
|
61
|
+
access.each do |point|
|
62
|
+
assert_equal point+2, pi[point][1]
|
63
|
+
end
|
64
|
+
end
|
25
65
|
end
|
26
66
|
end
|
27
67
|
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require File.join(File.expand_path(File.dirname(__FILE__)), '../../..', 'test_helper.rb')
|
2
|
+
require 'rbbt/util/misc/bgzf'
|
3
|
+
|
4
|
+
class TestBgzf < Test::Unit::TestCase
|
5
|
+
def _test_Bgzf
|
6
|
+
content = "1234567890" * 1000000
|
7
|
+
TmpFile.with_file(content) do |file|
|
8
|
+
compressed = file + '.gz'
|
9
|
+
`bgzip #{file} -c > #{compressed}`
|
10
|
+
stream = Bgzf.setup File.open(compressed)
|
11
|
+
stream.seek 500003
|
12
|
+
assert_equal "4567", stream.read(4)
|
13
|
+
assert_equal "89", stream.read(2)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_tsv
|
18
|
+
require 'rbbt/tsv'
|
19
|
+
TmpFile.with_file(datafile_test(:identifiers).read) do |file|
|
20
|
+
Misc.benchmark do
|
21
|
+
tsv = TSV.open(Open.open(file))
|
22
|
+
end
|
23
|
+
compressed = file + '.bgz'
|
24
|
+
|
25
|
+
`bgzip #{file} -c > #{compressed}`
|
26
|
+
stream = Bgzf.setup File.open(compressed)
|
27
|
+
Misc.benchmark do
|
28
|
+
tsv = TSV.open(stream)
|
29
|
+
end
|
30
|
+
|
31
|
+
`gzip #{file}`
|
32
|
+
stream = Open.open(file + '.gz')
|
33
|
+
Misc.benchmark do
|
34
|
+
tsv = TSV.open(stream)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
data/test/rbbt/util/test_open.rb
CHANGED
@@ -7,11 +7,11 @@ require 'iconv'
|
|
7
7
|
|
8
8
|
class TestOpen < Test::Unit::TestCase
|
9
9
|
|
10
|
-
def
|
10
|
+
def _test_wget
|
11
11
|
assert(Misc.fixutf8(Open.wget('http://google.com', :quiet => true).read) =~ /html/)
|
12
12
|
end
|
13
13
|
|
14
|
-
def
|
14
|
+
def _test_nice
|
15
15
|
nice = 0.5
|
16
16
|
|
17
17
|
t = Time.now
|
@@ -24,16 +24,16 @@ class TestOpen < Test::Unit::TestCase
|
|
24
24
|
assert(Time.now - t + 0.5 >= nice)
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
27
|
+
def _test_remote?
|
28
28
|
assert(Open.remote?('http://google.com'))
|
29
29
|
assert(! Open.remote?('~/.bashrc'))
|
30
30
|
end
|
31
31
|
|
32
|
-
def
|
32
|
+
def _test_open
|
33
33
|
assert(Open.read('http://google.com', :quiet => true) =~ /html/)
|
34
34
|
end
|
35
35
|
|
36
|
-
def
|
36
|
+
def _test_read
|
37
37
|
content =<<-EOF
|
38
38
|
1
|
39
39
|
2
|
@@ -48,7 +48,7 @@ class TestOpen < Test::Unit::TestCase
|
|
48
48
|
end
|
49
49
|
end
|
50
50
|
|
51
|
-
def
|
51
|
+
def _test_read_grep
|
52
52
|
content =<<-EOF
|
53
53
|
1
|
54
54
|
2
|
@@ -68,7 +68,7 @@ class TestOpen < Test::Unit::TestCase
|
|
68
68
|
end
|
69
69
|
end
|
70
70
|
|
71
|
-
def
|
71
|
+
def _test_read_grep_invert
|
72
72
|
content =<<-EOF
|
73
73
|
1
|
74
74
|
2
|
@@ -98,13 +98,14 @@ class TestOpen < Test::Unit::TestCase
|
|
98
98
|
4
|
99
99
|
EOF
|
100
100
|
TmpFile.with_file(content) do |file|
|
101
|
-
`
|
101
|
+
`bgzip #{file}`
|
102
102
|
assert_equal(content, Open.read(file + '.gz'))
|
103
|
+
puts content
|
103
104
|
FileUtils.rm file + '.gz'
|
104
105
|
end
|
105
106
|
end
|
106
107
|
|
107
|
-
def
|
108
|
+
def _test_repo_dir
|
108
109
|
file1 = "TEST"
|
109
110
|
file2 = "TEST" * 1000
|
110
111
|
TmpFile.with_file do |tmpdir|
|
@@ -132,7 +133,7 @@ class TestOpen < Test::Unit::TestCase
|
|
132
133
|
end
|
133
134
|
end
|
134
135
|
|
135
|
-
def
|
136
|
+
def _test_repo_dir
|
136
137
|
TmpFile.with_file do |tmpdir|
|
137
138
|
tmpdir = "/home/mvazquezg/tmp/repo_dir"
|
138
139
|
repo = File.join(tmpdir, 'repo')
|
data/test/test_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.14.
|
4
|
+
version: 5.14.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -185,6 +185,7 @@ files:
|
|
185
185
|
- lib/rbbt/util/log/progress/report.rb
|
186
186
|
- lib/rbbt/util/log/progress/util.rb
|
187
187
|
- lib/rbbt/util/misc.rb
|
188
|
+
- lib/rbbt/util/misc/bgzf.rb
|
188
189
|
- lib/rbbt/util/misc/concurrent_stream.rb
|
189
190
|
- lib/rbbt/util/misc/development.rb
|
190
191
|
- lib/rbbt/util/misc/exceptions.rb
|
@@ -320,6 +321,7 @@ files:
|
|
320
321
|
- test/rbbt/util/concurrency/test_processes.rb
|
321
322
|
- test/rbbt/util/concurrency/test_threads.rb
|
322
323
|
- test/rbbt/util/log/test_progress.rb
|
324
|
+
- test/rbbt/util/misc/test_bgzf.rb
|
323
325
|
- test/rbbt/util/misc/test_lock.rb
|
324
326
|
- test/rbbt/util/misc/test_pipes.rb
|
325
327
|
- test/rbbt/util/simpleopt/test_get.rb
|
@@ -379,6 +381,7 @@ test_files:
|
|
379
381
|
- test/rbbt/util/test_log.rb
|
380
382
|
- test/rbbt/util/test_open.rb
|
381
383
|
- test/rbbt/util/misc/test_lock.rb
|
384
|
+
- test/rbbt/util/misc/test_bgzf.rb
|
382
385
|
- test/rbbt/util/misc/test_pipes.rb
|
383
386
|
- test/rbbt/util/test_concurrency.rb
|
384
387
|
- test/rbbt/util/test_R.rb
|