rbbt-util 5.13.37 → 5.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
@@ -269,8 +269,6 @@ end
269
269
 
270
270
  if recursive_clean
271
271
  job.recursive_clean
272
- sleep 1
273
- job = workflow.job(task.name, name, job_options)
274
272
  end
275
273
 
276
274
  require 'pp'
@@ -75,5 +75,49 @@ class TestSharder < Test::Unit::TestCase
75
75
  assert_equal size, sharder.size
76
76
  end
77
77
  end
78
+
79
+ def test_shard_pki
80
+ TmpFile.with_file do |dir|
81
+ shard_function = Proc.new do |key|
82
+ key[0..(key.index(":")-1)]
83
+ end
84
+
85
+ pos_function = Proc.new do |key|
86
+ key.split(":").last.to_i
87
+ end
88
+
89
+ size = 10
90
+ chrs = (1..10).to_a
91
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :pattern => %w(f), :update => true, :range => false, :value_size => 64, :engine => 'pki', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :clean) do |db|
92
+ chrs.each do |c|
93
+ size.times do |v|
94
+ v = v
95
+ chr = "chr" << c.to_s
96
+ key = chr + ":" << v.to_s
97
+ iii [key, v]
98
+ db << [key, [v*2]]
99
+ end
100
+ end
101
+ end
102
+ sharder.read
103
+
104
+ assert_equal dir, sharder.persistence_path
105
+ assert_equal size*chrs.length, sharder.size
106
+
107
+ assert_equal [4.0], sharder["chr2:2"]
108
+
109
+ count = 0
110
+ sharder.through do |k,v|
111
+ count += 1
112
+ end
113
+ assert_equal count, size*chrs.length
114
+
115
+ sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
116
+
117
+ assert_equal [4.0], sharder["chr2:2"]
118
+
119
+ assert_equal chrs.length*size, sharder.size
120
+ end
121
+ end
78
122
  end
79
123
 
@@ -44,6 +44,7 @@ class TestFixWidthTable < Test::Unit::TestCase
44
44
  assert_equal 0, f.overlap(1)
45
45
  assert_equal "test1", f.value(0)
46
46
  assert_equal "test2", f.value(1)
47
+
47
48
  end
48
49
 
49
50
  end
@@ -13,12 +13,15 @@ class TestPackedIndex < Test::Unit::TestCase
13
13
  pi << nil
14
14
  pi.close
15
15
  pi = PackedIndex.new tmpfile, false
16
+ Misc.benchmark(1000) do
16
17
  100.times do |i|
17
18
  assert_equal i, pi[i][0]
18
19
  assert_equal i+2, pi[i][1]
19
20
  end
21
+ end
20
22
  assert_equal nil, pi[100]
21
23
  assert_equal nil, pi[101]
24
+
22
25
  end
23
26
  end
24
27
  end
@@ -90,6 +90,38 @@ row2 cc
90
90
  assert_equal ["AAA", "BBB", "CCC", "aaa", "bbb", "ccc"], tsv["row3"]
91
91
  end
92
92
 
93
+ def test_paste_stream_missing_2
94
+ text1=<<-EOF
95
+ #: :sep=" "
96
+ #Row LabelA LabelB LabelC
97
+ row2 AA BB CC
98
+ row1 A B C
99
+ EOF
100
+
101
+ text2=<<-EOF
102
+ #: :sep=" "
103
+ #Row Labela Labelb
104
+ row2 aa bb
105
+ EOF
106
+
107
+ text3=<<-EOF
108
+ #: :sep=" "
109
+ #Row Labelc
110
+ row3 ccc
111
+ row2 cc
112
+ EOF
113
+
114
+ s1 = StringIO.new text1
115
+ s2 = StringIO.new text2
116
+ s3 = StringIO.new text3
117
+ tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
118
+ assert_equal "Row", tsv.key_field
119
+ assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
120
+ assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
121
+ assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
122
+ assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
123
+ end
124
+
93
125
  def test_paste_stream_missing
94
126
  text1=<<-EOF
95
127
  #: :sep=" "
@@ -101,7 +133,6 @@ row1 A B C
101
133
  text2=<<-EOF
102
134
  #: :sep=" "
103
135
  #Row Labela Labelb
104
- row1 a b
105
136
  row2 aa bb
106
137
  EOF
107
138
 
@@ -118,8 +149,30 @@ row2 cc
118
149
  tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
119
150
  assert_equal "Row", tsv.key_field
120
151
  assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
121
- assert_equal ["A", "B", "C", "a", "b", ""], tsv["row1"]
152
+ assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
122
153
  assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
123
154
  assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
124
155
  end
156
+
157
+ def test_paste_stream_missing_3
158
+ text1=<<-EOF
159
+ #: :sep=" "
160
+ #Row LabelA LabelB LabelC
161
+ row2 AA BB CC
162
+ row1 A B C
163
+ EOF
164
+
165
+ text2=<<-EOF
166
+ #: :sep=" "
167
+ #Row Labelc
168
+ EOF
169
+
170
+ s1 = StringIO.new text1
171
+ s2 = StringIO.new text2
172
+ tsv = TSV.open TSV.paste_streams([s1,s2], :sep => " ", :type => :list, :sort => true)
173
+ assert_equal "Row", tsv.key_field
174
+ assert_equal %w(LabelA LabelB LabelC Labelc), tsv.fields
175
+ assert_equal ["A", "B", "C", ""], tsv["row1"]
176
+ assert_equal ["AA", "BB", "CC", ""], tsv["row2"]
177
+ end
125
178
  end
@@ -5,7 +5,7 @@ require 'rbbt/util/misc'
5
5
 
6
6
  class TestMiscPipes < Test::Unit::TestCase
7
7
 
8
- def _test_collapse_stream
8
+ def test_collapse_stream
9
9
  text=<<-EOF
10
10
  row1 A B C
11
11
  row1 a b c
@@ -19,7 +19,7 @@ row2 aa bb cc
19
19
  assert_equal ["BB", "bb"], tsv["row2"][1]
20
20
  end
21
21
 
22
- def _test_paste_stream
22
+ def test_paste_stream
23
23
  text1=<<-EOF
24
24
  row1 A B C
25
25
  row2 AA BB CC
@@ -46,9 +46,11 @@ row3 ccc
46
46
  assert_equal ["AAA", "BBB", "CCC", "", "", "ccc"], tsv["row3"]
47
47
  end
48
48
 
49
- def _test_sort_stream
49
+ def test_sort_stream
50
50
  text =<<-EOF
51
- #: :sep=" "
51
+ ##
52
+ ##
53
+ ##
52
54
  #Row LabelA LabelB LabelC
53
55
  row2 AA BB CC
54
56
  row3 AAA BBB CCC
@@ -56,8 +58,8 @@ row1 A B C
56
58
  EOF
57
59
  s = StringIO.new text
58
60
  sorted = Misc.sort_stream(s)
59
- assert_equal %w(#: #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
60
- assert_equal %w(#: #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
61
+ assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
62
+ assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
61
63
  end
62
64
 
63
65
  def test_dup_stream
@@ -192,7 +192,7 @@ class TestStep < Test::Unit::TestCase
192
192
  task = Task.setup do
193
193
  5.times do
194
194
  puts "Process: #{Process.pid}"
195
- sleep rand * 2
195
+ sleep rand
196
196
  end
197
197
  end
198
198
 
@@ -203,11 +203,12 @@ class TestStep < Test::Unit::TestCase
203
203
  jobs << step.fork(semaphore)
204
204
  end
205
205
  end
206
- jobs.each do |job|
207
- while not job.done?
208
- sleep 1
209
- end
210
- end
206
+ Step.wait_for_jobs(jobs)
207
+ #jobs.each do |job|
208
+ # while not job.done?
209
+ # sleep 1
210
+ # end
211
+ #end
211
212
  ensure
212
213
  RbbtSemaphore.delete_semaphore(semaphore)
213
214
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.37
4
+ version: 5.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-12 00:00:00.000000000 Z
11
+ date: 2014-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -140,6 +140,7 @@ files:
140
140
  - lib/rbbt/persist/tsv/kyotocabinet.rb
141
141
  - lib/rbbt/persist/tsv/leveldb.rb
142
142
  - lib/rbbt/persist/tsv/lmdb.rb
143
+ - lib/rbbt/persist/tsv/packed_index.rb
143
144
  - lib/rbbt/persist/tsv/sharder.rb
144
145
  - lib/rbbt/persist/tsv/tokyocabinet.rb
145
146
  - lib/rbbt/resource.rb