rbbt-util 5.13.37 → 5.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
@@ -269,8 +269,6 @@ end
269
269
 
270
270
  if recursive_clean
271
271
  job.recursive_clean
272
- sleep 1
273
- job = workflow.job(task.name, name, job_options)
274
272
  end
275
273
 
276
274
  require 'pp'
@@ -75,5 +75,49 @@ class TestSharder < Test::Unit::TestCase
75
75
  assert_equal size, sharder.size
76
76
  end
77
77
  end
78
+
79
+ def test_shard_pki
80
+ TmpFile.with_file do |dir|
81
+ shard_function = Proc.new do |key|
82
+ key[0..(key.index(":")-1)]
83
+ end
84
+
85
+ pos_function = Proc.new do |key|
86
+ key.split(":").last.to_i
87
+ end
88
+
89
+ size = 10
90
+ chrs = (1..10).to_a
91
+ sharder = Persist.persist_tsv(nil, "ShardTest", {}, :pattern => %w(f), :update => true, :range => false, :value_size => 64, :engine => 'pki', :file => dir, :shard_function => shard_function, :pos_function => pos_function, :persist => true, :serializer => :clean) do |db|
92
+ chrs.each do |c|
93
+ size.times do |v|
94
+ v = v
95
+ chr = "chr" << c.to_s
96
+ key = chr + ":" << v.to_s
97
+ iii [key, v]
98
+ db << [key, [v*2]]
99
+ end
100
+ end
101
+ end
102
+ sharder.read
103
+
104
+ assert_equal dir, sharder.persistence_path
105
+ assert_equal size*chrs.length, sharder.size
106
+
107
+ assert_equal [4.0], sharder["chr2:2"]
108
+
109
+ count = 0
110
+ sharder.through do |k,v|
111
+ count += 1
112
+ end
113
+ assert_equal count, size*chrs.length
114
+
115
+ sharder = Persist.open_sharder(dir, false, :float, 'fwt', {:range => false, :value_size => 64, :pos_function => pos_function}, &shard_function)
116
+
117
+ assert_equal [4.0], sharder["chr2:2"]
118
+
119
+ assert_equal chrs.length*size, sharder.size
120
+ end
121
+ end
78
122
  end
79
123
 
@@ -44,6 +44,7 @@ class TestFixWidthTable < Test::Unit::TestCase
44
44
  assert_equal 0, f.overlap(1)
45
45
  assert_equal "test1", f.value(0)
46
46
  assert_equal "test2", f.value(1)
47
+
47
48
  end
48
49
 
49
50
  end
@@ -13,12 +13,15 @@ class TestPackedIndex < Test::Unit::TestCase
13
13
  pi << nil
14
14
  pi.close
15
15
  pi = PackedIndex.new tmpfile, false
16
+ Misc.benchmark(1000) do
16
17
  100.times do |i|
17
18
  assert_equal i, pi[i][0]
18
19
  assert_equal i+2, pi[i][1]
19
20
  end
21
+ end
20
22
  assert_equal nil, pi[100]
21
23
  assert_equal nil, pi[101]
24
+
22
25
  end
23
26
  end
24
27
  end
@@ -90,6 +90,38 @@ row2 cc
90
90
  assert_equal ["AAA", "BBB", "CCC", "aaa", "bbb", "ccc"], tsv["row3"]
91
91
  end
92
92
 
93
+ def test_paste_stream_missing_2
94
+ text1=<<-EOF
95
+ #: :sep=" "
96
+ #Row LabelA LabelB LabelC
97
+ row2 AA BB CC
98
+ row1 A B C
99
+ EOF
100
+
101
+ text2=<<-EOF
102
+ #: :sep=" "
103
+ #Row Labela Labelb
104
+ row2 aa bb
105
+ EOF
106
+
107
+ text3=<<-EOF
108
+ #: :sep=" "
109
+ #Row Labelc
110
+ row3 ccc
111
+ row2 cc
112
+ EOF
113
+
114
+ s1 = StringIO.new text1
115
+ s2 = StringIO.new text2
116
+ s3 = StringIO.new text3
117
+ tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
118
+ assert_equal "Row", tsv.key_field
119
+ assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
120
+ assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
121
+ assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
122
+ assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
123
+ end
124
+
93
125
  def test_paste_stream_missing
94
126
  text1=<<-EOF
95
127
  #: :sep=" "
@@ -101,7 +133,6 @@ row1 A B C
101
133
  text2=<<-EOF
102
134
  #: :sep=" "
103
135
  #Row Labela Labelb
104
- row1 a b
105
136
  row2 aa bb
106
137
  EOF
107
138
 
@@ -118,8 +149,30 @@ row2 cc
118
149
  tsv = TSV.open TSV.paste_streams([s1,s2,s3], :sep => " ", :type => :list, :sort => true)
119
150
  assert_equal "Row", tsv.key_field
120
151
  assert_equal %w(LabelA LabelB LabelC Labela Labelb Labelc), tsv.fields
121
- assert_equal ["A", "B", "C", "a", "b", ""], tsv["row1"]
152
+ assert_equal ["A", "B", "C", "", "", ""], tsv["row1"]
122
153
  assert_equal ["AA", "BB", "CC", "aa", "bb", "cc"], tsv["row2"]
123
154
  assert_equal ["", "", "", "", "", "ccc"], tsv["row3"]
124
155
  end
156
+
157
+ def test_paste_stream_missing_3
158
+ text1=<<-EOF
159
+ #: :sep=" "
160
+ #Row LabelA LabelB LabelC
161
+ row2 AA BB CC
162
+ row1 A B C
163
+ EOF
164
+
165
+ text2=<<-EOF
166
+ #: :sep=" "
167
+ #Row Labelc
168
+ EOF
169
+
170
+ s1 = StringIO.new text1
171
+ s2 = StringIO.new text2
172
+ tsv = TSV.open TSV.paste_streams([s1,s2], :sep => " ", :type => :list, :sort => true)
173
+ assert_equal "Row", tsv.key_field
174
+ assert_equal %w(LabelA LabelB LabelC Labelc), tsv.fields
175
+ assert_equal ["A", "B", "C", ""], tsv["row1"]
176
+ assert_equal ["AA", "BB", "CC", ""], tsv["row2"]
177
+ end
125
178
  end
@@ -5,7 +5,7 @@ require 'rbbt/util/misc'
5
5
 
6
6
  class TestMiscPipes < Test::Unit::TestCase
7
7
 
8
- def _test_collapse_stream
8
+ def test_collapse_stream
9
9
  text=<<-EOF
10
10
  row1 A B C
11
11
  row1 a b c
@@ -19,7 +19,7 @@ row2 aa bb cc
19
19
  assert_equal ["BB", "bb"], tsv["row2"][1]
20
20
  end
21
21
 
22
- def _test_paste_stream
22
+ def test_paste_stream
23
23
  text1=<<-EOF
24
24
  row1 A B C
25
25
  row2 AA BB CC
@@ -46,9 +46,11 @@ row3 ccc
46
46
  assert_equal ["AAA", "BBB", "CCC", "", "", "ccc"], tsv["row3"]
47
47
  end
48
48
 
49
- def _test_sort_stream
49
+ def test_sort_stream
50
50
  text =<<-EOF
51
- #: :sep=" "
51
+ ##
52
+ ##
53
+ ##
52
54
  #Row LabelA LabelB LabelC
53
55
  row2 AA BB CC
54
56
  row3 AAA BBB CCC
@@ -56,8 +58,8 @@ row1 A B C
56
58
  EOF
57
59
  s = StringIO.new text
58
60
  sorted = Misc.sort_stream(s)
59
- assert_equal %w(#: #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
60
- assert_equal %w(#: #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
61
+ assert_equal %w(## ## ## #Row row2 row3 row1), text.split("\n").collect{|l| l.split(" ").first}
62
+ assert_equal %w(## ## ## #Row row1 row2 row3), sorted.read.split("\n").collect{|l| l.split(" ").first}
61
63
  end
62
64
 
63
65
  def test_dup_stream
@@ -192,7 +192,7 @@ class TestStep < Test::Unit::TestCase
192
192
  task = Task.setup do
193
193
  5.times do
194
194
  puts "Process: #{Process.pid}"
195
- sleep rand * 2
195
+ sleep rand
196
196
  end
197
197
  end
198
198
 
@@ -203,11 +203,12 @@ class TestStep < Test::Unit::TestCase
203
203
  jobs << step.fork(semaphore)
204
204
  end
205
205
  end
206
- jobs.each do |job|
207
- while not job.done?
208
- sleep 1
209
- end
210
- end
206
+ Step.wait_for_jobs(jobs)
207
+ #jobs.each do |job|
208
+ # while not job.done?
209
+ # sleep 1
210
+ # end
211
+ #end
211
212
  ensure
212
213
  RbbtSemaphore.delete_semaphore(semaphore)
213
214
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbbt-util
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.13.37
4
+ version: 5.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Miguel Vazquez
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-12 00:00:00.000000000 Z
11
+ date: 2014-06-19 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -140,6 +140,7 @@ files:
140
140
  - lib/rbbt/persist/tsv/kyotocabinet.rb
141
141
  - lib/rbbt/persist/tsv/leveldb.rb
142
142
  - lib/rbbt/persist/tsv/lmdb.rb
143
+ - lib/rbbt/persist/tsv/packed_index.rb
143
144
  - lib/rbbt/persist/tsv/sharder.rb
144
145
  - lib/rbbt/persist/tsv/tokyocabinet.rb
145
146
  - lib/rbbt/resource.rb