rbbt-util 5.13.37 → 5.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
@@ -75,5 +75,49 @@ class TestSharder < Test::Unit::TestCase
|
|
75
75
|
assert_equal size, sharder.size
|
76
76
|
end
|
77
77
|
end
|
78
|
+
|
79
|
+
def test_shard_pki
|
80
|
+
TmpFile.with_file do |dir|
|
81
|
+
shard_function = Proc.new do |key|
|
82
|
+
key[0..(key.index(":")-1)]
|
83
|
+
end
|
84
|
+
|
85
|
+
pos_function = Proc.new do |key|
|
86
|
+
key.split(":").last.to_i
|
87
|
+
end
|
88
|
+
|
89
|
+
size = 10
|
90
|
+
chrs = (1..10).to_a
|
91
|
+
sharder = Persist.persist_tsv(nil, "ShardTest", {}, :pattern => %w(f), :update => true, :range => false, :value_size => 64, :engine => 'pki', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :clean) do |db|
|
92
|
+
chrs.each do |c|
|
93
|
+
size.times do |v|
|
94
|
+
v = v
|
95
|
+
chr = "chr" << c.to_s
|
96
|
+
key = chr + ":" << v.to_s
|
97
|
+
iii [key, v]
|
98
|
+
db << [key, [v*2]]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
sharder.read
|
103
|
+
|
104
|
+
assert_equal dir, sharder.persistence_path
|
105
|
+
assert_equal size*chrs.length, sharder.size
|
106
|
+
|
107
|
+
assert_equal [4.0], sharder["chr2:2"]
|
108
|
+
|
109
|
+
count = 0
|
110
|
+
sharder.through do |k,v|
|
111
|
+
count += 1
|
112
|
+
end
|
113
|
+
assert_equal count, size*chrs.length
|
114
|
+
|
115
|
+
sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
|
116
|
+
|
117
|
+
assert_equal [4.0], sharder["chr2:2"]
|
118
|
+
|
119
|
+
assert_equal chrs.length*size, sharder.size
|
120
|
+
end
|
121
|
+
end
|
78
122
|
end
|
79
123
|
|
@@ -13,12 +13,15 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
13
13
|
pi << nil
|
14
14
|
pi.close
|
15
15
|
pi = PackedIndex.new tmpfile, false
|
16
|
+
Misc.benchmark(1000) do
|
16
17
|
100.times do |i|
|
17
18
|
assert_equal i, pi[i][0]
|
18
19
|
assert_equal i+2, pi[i][1]
|
19
20
|
end
|
21
|
+
end
|
20
22
|
assert_equal nil, pi[100]
|
21
23
|
assert_equal nil, pi[101]
|
24
|
+
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
@@ -90,6 +90,38 @@ row2 cc
|
|
90
90
|
assert_equal ["AAA", "BBB", "CCC", "aaa", "bbb", "ccc"], tsv["row3"]
|
91
91
|
end
|
92
92
|
|
93
|
+
def test_paste_stream_missing_2
|
94
|
+
text1=<<-EOF
|
95
|
+
#: :sep=" "
|
96
|
+
#Row LabelA LabelB LabelC
|
97
|
+
row2 AA BB CC
|
98
|
+
row1 A B C
|
99
|
+
EOF
|
100
|
+
|
101
|
+
text2=<<-EOF
|
102
|
+
#: :sep=" "
|
103
|
+
#Row Labela Labelb
|
104
|
+
row2 aa bb
|
105
|
+
EOF
|
106
|
+
|
107
|
+
text3=<<-EOF
|
108
|
+
#: :sep=" "
|
109
|
+
#Row Labelc
|
110
|
+
row3 ccc
|
111
|
+
row2 cc
|
112
|
+
EOF
|
113
|
+
|
114
|
+
s1 = StringIO.new text1
|
115
|
+
s2 = StringIO.new text2
|
116
|
+
s3 = StringIO.new text3
|
117
|
+
tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
|
118
|
+
assert_equal "Row", tsv.key_field
|
119
|
+
assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
|
120
|
+
assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
|
121
|
+
assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
|
122
|
+
assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
|
123
|
+
end
|
124
|
+
|
93
125
|
def test_paste_stream_missing
|
94
126
|
text1=<<-EOF
|
95
127
|
#: :sep=" "
|
@@ -101,7 +133,6 @@ row1 A B C
|
|
101
133
|
text2=<<-EOF
|
102
134
|
#: :sep=" "
|
103
135
|
#Row Labela Labelb
|
104
|
-
row1 a b
|
105
136
|
row2 aa bb
|
106
137
|
EOF
|
107
138
|
|
@@ -118,8 +149,30 @@ row2 cc
|
|
118
149
|
tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
|
119
150
|
assert_equal "Row", tsv.key_field
|
120
151
|
assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
|
121
|
-
assert_equal ["A", "B", "C", "
|
152
|
+
assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
|
122
153
|
assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
|
123
154
|
assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
|
124
155
|
end
|
156
|
+
|
157
|
+
def test_paste_stream_missing_3
|
158
|
+
text1=<<-EOF
|
159
|
+
#: :sep=" "
|
160
|
+
#Row LabelA LabelB LabelC
|
161
|
+
row2 AA BB CC
|
162
|
+
row1 A B C
|
163
|
+
EOF
|
164
|
+
|
165
|
+
text2=<<-EOF
|
166
|
+
#: :sep=" "
|
167
|
+
#Row Labelc
|
168
|
+
EOF
|
169
|
+
|
170
|
+
s1 = StringIO.new text1
|
171
|
+
s2 = StringIO.new text2
|
172
|
+
tsv = TSV.open TSV.paste_streams([s1,s2], :sep => " ", :type => :list, :sort => true)
|
173
|
+
assert_equal "Row", tsv.key_field
|
174
|
+
assert_equal %w(LabelA LabelB LabelC Labelc), tsv.fields
|
175
|
+
assert_equal ["A", "B", "C", ""], tsv["row1"]
|
176
|
+
assert_equal ["AA", "BB", "CC", ""], tsv["row2"]
|
177
|
+
end
|
125
178
|
end
|
@@ -5,7 +5,7 @@ require 'rbbt/util/misc'
|
|
5
5
|
|
6
6
|
class TestMiscPipes < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def test_collapse_stream
|
9
9
|
text=<<-EOF
|
10
10
|
row1 A B C
|
11
11
|
row1 a b c
|
@@ -19,7 +19,7 @@ row2 aa bb cc
|
|
19
19
|
assert_equal ["BB", "bb"], tsv["row2"][1]
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def test_paste_stream
|
23
23
|
text1=<<-EOF
|
24
24
|
row1 A B C
|
25
25
|
row2 AA BB CC
|
@@ -46,9 +46,11 @@ row3 ccc
|
|
46
46
|
assert_equal ["AAA", "BBB", "CCC", "", "", "ccc"], tsv["row3"]
|
47
47
|
end
|
48
48
|
|
49
|
-
def
|
49
|
+
def test_sort_stream
|
50
50
|
text =<<-EOF
|
51
|
-
|
51
|
+
##
|
52
|
+
##
|
53
|
+
##
|
52
54
|
#Row LabelA LabelB LabelC
|
53
55
|
row2 AA BB CC
|
54
56
|
row3 AAA BBB CCC
|
@@ -56,8 +58,8 @@ row1 A B C
|
|
56
58
|
EOF
|
57
59
|
s = StringIO.new text
|
58
60
|
sorted = Misc.sort_stream(s)
|
59
|
-
assert_equal %w(
|
60
|
-
assert_equal %w(
|
61
|
+
assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
|
62
|
+
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
61
63
|
end
|
62
64
|
|
63
65
|
def test_dup_stream
|
@@ -192,7 +192,7 @@ class TestStep < Test::Unit::TestCase
|
|
192
192
|
task = Task.setup do
|
193
193
|
5.times do
|
194
194
|
puts "Process: #{Process.pid}"
|
195
|
-
sleep rand
|
195
|
+
sleep rand
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
@@ -203,11 +203,12 @@ class TestStep < Test::Unit::TestCase
|
|
203
203
|
jobs << step.fork(semaphore)
|
204
204
|
end
|
205
205
|
end
|
206
|
-
jobs
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
end
|
206
|
+
Step.wait_for_jobs(jobs)
|
207
|
+
#jobs.each do |job|
|
208
|
+
# while not job.done?
|
209
|
+
# sleep 1
|
210
|
+
# end
|
211
|
+
#end
|
211
212
|
ensure
|
212
213
|
RbbtSemaphore.delete_semaphore(semaphore)
|
213
214
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/rbbt/persist/tsv/kyotocabinet.rb
|
141
141
|
- lib/rbbt/persist/tsv/leveldb.rb
|
142
142
|
- lib/rbbt/persist/tsv/lmdb.rb
|
143
|
+
- lib/rbbt/persist/tsv/packed_index.rb
|
143
144
|
- lib/rbbt/persist/tsv/sharder.rb
|
144
145
|
- lib/rbbt/persist/tsv/tokyocabinet.rb
|
145
146
|
- lib/rbbt/resource.rb
|