rbbt-util 5.13.37 → 5.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
@@ -75,5 +75,49 @@ class TestSharder < Test::Unit::TestCase
|
|
75
75
|
assert_equal size, sharder.size
|
76
76
|
end
|
77
77
|
end
|
78
|
+
|
79
|
+
def test_shard_pki
|
80
|
+
TmpFile.with_file do |dir|
|
81
|
+
shard_function = Proc.new do |key|
|
82
|
+
key[0..(key.index(":")-1)]
|
83
|
+
end
|
84
|
+
|
85
|
+
pos_function = Proc.new do |key|
|
86
|
+
key.split(":").last.to_i
|
87
|
+
end
|
88
|
+
|
89
|
+
size = 10
|
90
|
+
chrs = (1..10).to_a
|
91
|
+
sharder = Persist.persist_tsv(nil, "ShardTest", {}, :pattern => %w(f), :update => true, :range => false, :value_size => 64, :engine => 'pki', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :clean) do |db|
|
92
|
+
chrs.each do |c|
|
93
|
+
size.times do |v|
|
94
|
+
v = v
|
95
|
+
chr = "chr" << c.to_s
|
96
|
+
key = chr + ":" << v.to_s
|
97
|
+
iii [key, v]
|
98
|
+
db << [key, [v*2]]
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
sharder.read
|
103
|
+
|
104
|
+
assert_equal dir, sharder.persistence_path
|
105
|
+
assert_equal size*chrs.length, sharder.size
|
106
|
+
|
107
|
+
assert_equal [4.0], sharder["chr2:2"]
|
108
|
+
|
109
|
+
count = 0
|
110
|
+
sharder.through do |k,v|
|
111
|
+
count += 1
|
112
|
+
end
|
113
|
+
assert_equal count, size*chrs.length
|
114
|
+
|
115
|
+
sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
|
116
|
+
|
117
|
+
assert_equal [4.0], sharder["chr2:2"]
|
118
|
+
|
119
|
+
assert_equal chrs.length*size, sharder.size
|
120
|
+
end
|
121
|
+
end
|
78
122
|
end
|
79
123
|
|
@@ -13,12 +13,15 @@ class TestPackedIndex < Test::Unit::TestCase
|
|
13
13
|
pi << nil
|
14
14
|
pi.close
|
15
15
|
pi = PackedIndex.new tmpfile, false
|
16
|
+
Misc.benchmark(1000) do
|
16
17
|
100.times do |i|
|
17
18
|
assert_equal i, pi[i][0]
|
18
19
|
assert_equal i+2, pi[i][1]
|
19
20
|
end
|
21
|
+
end
|
20
22
|
assert_equal nil, pi[100]
|
21
23
|
assert_equal nil, pi[101]
|
24
|
+
|
22
25
|
end
|
23
26
|
end
|
24
27
|
end
|
@@ -90,6 +90,38 @@ row2 cc
|
|
90
90
|
assert_equal ["AAA", "BBB", "CCC", "aaa", "bbb", "ccc"], tsv["row3"]
|
91
91
|
end
|
92
92
|
|
93
|
+
def test_paste_stream_missing_2
|
94
|
+
text1=<<-EOF
|
95
|
+
#: :sep=" "
|
96
|
+
#Row LabelA LabelB LabelC
|
97
|
+
row2 AA BB CC
|
98
|
+
row1 A B C
|
99
|
+
EOF
|
100
|
+
|
101
|
+
text2=<<-EOF
|
102
|
+
#: :sep=" "
|
103
|
+
#Row Labela Labelb
|
104
|
+
row2 aa bb
|
105
|
+
EOF
|
106
|
+
|
107
|
+
text3=<<-EOF
|
108
|
+
#: :sep=" "
|
109
|
+
#Row Labelc
|
110
|
+
row3 ccc
|
111
|
+
row2 cc
|
112
|
+
EOF
|
113
|
+
|
114
|
+
s1 = StringIO.new text1
|
115
|
+
s2 = StringIO.new text2
|
116
|
+
s3 = StringIO.new text3
|
117
|
+
tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
|
118
|
+
assert_equal "Row", tsv.key_field
|
119
|
+
assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
|
120
|
+
assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
|
121
|
+
assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
|
122
|
+
assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
|
123
|
+
end
|
124
|
+
|
93
125
|
def test_paste_stream_missing
|
94
126
|
text1=<<-EOF
|
95
127
|
#: :sep=" "
|
@@ -101,7 +133,6 @@ row1 A B C
|
|
101
133
|
text2=<<-EOF
|
102
134
|
#: :sep=" "
|
103
135
|
#Row Labela Labelb
|
104
|
-
row1 a b
|
105
136
|
row2 aa bb
|
106
137
|
EOF
|
107
138
|
|
@@ -118,8 +149,30 @@ row2 cc
|
|
118
149
|
tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
|
119
150
|
assert_equal "Row", tsv.key_field
|
120
151
|
assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
|
121
|
-
assert_equal ["A", "B", "C", "
|
152
|
+
assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
|
122
153
|
assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
|
123
154
|
assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
|
124
155
|
end
|
156
|
+
|
157
|
+
def test_paste_stream_missing_3
|
158
|
+
text1=<<-EOF
|
159
|
+
#: :sep=" "
|
160
|
+
#Row LabelA LabelB LabelC
|
161
|
+
row2 AA BB CC
|
162
|
+
row1 A B C
|
163
|
+
EOF
|
164
|
+
|
165
|
+
text2=<<-EOF
|
166
|
+
#: :sep=" "
|
167
|
+
#Row Labelc
|
168
|
+
EOF
|
169
|
+
|
170
|
+
s1 = StringIO.new text1
|
171
|
+
s2 = StringIO.new text2
|
172
|
+
tsv = TSV.open TSV.paste_streams([s1,s2], :sep => " ", :type => :list, :sort => true)
|
173
|
+
assert_equal "Row", tsv.key_field
|
174
|
+
assert_equal %w(LabelA LabelB LabelC Labelc), tsv.fields
|
175
|
+
assert_equal ["A", "B", "C", ""], tsv["row1"]
|
176
|
+
assert_equal ["AA", "BB", "CC", ""], tsv["row2"]
|
177
|
+
end
|
125
178
|
end
|
@@ -5,7 +5,7 @@ require 'rbbt/util/misc'
|
|
5
5
|
|
6
6
|
class TestMiscPipes < Test::Unit::TestCase
|
7
7
|
|
8
|
-
def
|
8
|
+
def test_collapse_stream
|
9
9
|
text=<<-EOF
|
10
10
|
row1 A B C
|
11
11
|
row1 a b c
|
@@ -19,7 +19,7 @@ row2 aa bb cc
|
|
19
19
|
assert_equal ["BB", "bb"], tsv["row2"][1]
|
20
20
|
end
|
21
21
|
|
22
|
-
def
|
22
|
+
def test_paste_stream
|
23
23
|
text1=<<-EOF
|
24
24
|
row1 A B C
|
25
25
|
row2 AA BB CC
|
@@ -46,9 +46,11 @@ row3 ccc
|
|
46
46
|
assert_equal ["AAA", "BBB", "CCC", "", "", "ccc"], tsv["row3"]
|
47
47
|
end
|
48
48
|
|
49
|
-
def
|
49
|
+
def test_sort_stream
|
50
50
|
text =<<-EOF
|
51
|
-
|
51
|
+
##
|
52
|
+
##
|
53
|
+
##
|
52
54
|
#Row LabelA LabelB LabelC
|
53
55
|
row2 AA BB CC
|
54
56
|
row3 AAA BBB CCC
|
@@ -56,8 +58,8 @@ row1 A B C
|
|
56
58
|
EOF
|
57
59
|
s = StringIO.new text
|
58
60
|
sorted = Misc.sort_stream(s)
|
59
|
-
assert_equal %w(
|
60
|
-
assert_equal %w(
|
61
|
+
assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
|
62
|
+
assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
|
61
63
|
end
|
62
64
|
|
63
65
|
def test_dup_stream
|
@@ -192,7 +192,7 @@ class TestStep < Test::Unit::TestCase
|
|
192
192
|
task = Task.setup do
|
193
193
|
5.times do
|
194
194
|
puts "Process: #{Process.pid}"
|
195
|
-
sleep rand
|
195
|
+
sleep rand
|
196
196
|
end
|
197
197
|
end
|
198
198
|
|
@@ -203,11 +203,12 @@ class TestStep < Test::Unit::TestCase
|
|
203
203
|
jobs << step.fork(semaphore)
|
204
204
|
end
|
205
205
|
end
|
206
|
-
jobs
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
end
|
206
|
+
Step.wait_for_jobs(jobs)
|
207
|
+
#jobs.each do |job|
|
208
|
+
# while not job.done?
|
209
|
+
# sleep 1
|
210
|
+
# end
|
211
|
+
#end
|
211
212
|
ensure
|
212
213
|
RbbtSemaphore.delete_semaphore(semaphore)
|
213
214
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbbt-util
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Miguel Vazquez
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -140,6 +140,7 @@ files:
|
|
140
140
|
- lib/rbbt/persist/tsv/kyotocabinet.rb
|
141
141
|
- lib/rbbt/persist/tsv/leveldb.rb
|
142
142
|
- lib/rbbt/persist/tsv/lmdb.rb
|
143
|
+
- lib/rbbt/persist/tsv/packed_index.rb
|
143
144
|
- lib/rbbt/persist/tsv/sharder.rb
|
144
145
|
- lib/rbbt/persist/tsv/tokyocabinet.rb
|
145
146
|
- lib/rbbt/resource.rb
|