rbbt-util 5.13.37 → 5.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26df367cce98f5b14b985bd311d4b6594dcb62a9
4
- data.tar.gz: 1931a495ac7eb8eb17dcfa0f154be8c9d61e8b7f
3
+ metadata.gz: 2c1f84fdf126a66b0e8cf03cd96237dd4625c5d7
4
+ data.tar.gz: 6e9b9ed9bc339fedaa99f1a96d8f537692ebfbb6
5
5
  SHA512:
6
- metadata.gz: e71fee09c427fdf3faf326b75897dc6d070ec605bc94b3785d8cacf88f2bd3fa1fdd8d017e5e06d5e48cff48960cb8e7debb337b3a4eeb3361d2fd3735b2b1f3
7
- data.tar.gz: 14a1b01824edd6bfa2a6ead19828454ea57b1c3846eb1f332a0b15faee9bf5019fdda715732ef57b1326ce3870d8c2f8d8c6c9f2faf6e70218ceea76f373436d
6
+ metadata.gz: 7879d8241fc10774522d28484ae1cf37bd935067038b1f9e370a2ff4af51adb9f7f183b43c7852694ce4e8b44282ab937b3744b7d8d35072a86e3ccbfb51c48e
7
+ data.tar.gz: 12ecd4f1f116f4125bb9f4159ab4556910b6e31d0fc3a351c94667758507296914445091575fc8946c975cc305da957949fa79c25913c990e477aed56506e0a0
data/bin/rbbt CHANGED
@@ -41,9 +41,10 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
41
41
  --profile #{Log.color :yellow, "Profile execution"}
42
42
  --nocolor #{Log.color :yellow, "Disable colored output"}
43
43
  --nobar #{Log.color :yellow, "Disable progress report"}
44
+ --nostream #{Log.color :yellow, "Disable persistance/job streaming"}
44
45
  --locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
45
46
  --dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
46
- --no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
47
+ --no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
47
48
  EOF
48
49
 
49
50
 
@@ -70,6 +71,10 @@ if mem_dump = options.delete(:dump_mem)
70
71
  Rbbt.dump_memory(mem_dump, Symbol)
71
72
  end
72
73
 
74
+ if options.delete :stream
75
+ ENV["RBBT_NO_STREAM"] = "true"
76
+ end
77
+
73
78
  if options.delete :nobar
74
79
  ENV["RBBT_NO_PROGRESS"] = "true"
75
80
  end
@@ -1,6 +1,6 @@
1
1
  class FixWidthTable
2
2
 
3
- attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
3
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask, :write
4
4
  def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
@@ -9,6 +9,7 @@ class FixWidthTable
9
9
  @value_size = value_size
10
10
  @range = range
11
11
  @record_size = @value_size + (@range ? 16 : 8)
12
+ @write = true
12
13
 
13
14
  if %w(memory stringio).include? filename.to_s.downcase
14
15
  @filename = :memory
@@ -21,6 +22,7 @@ class FixWidthTable
21
22
 
22
23
  @file.write [value_size].pack("L")
23
24
  @file.write [@range ? 1 : 0 ].pack("C")
25
+
24
26
  @size = 0
25
27
  else
26
28
  Log.debug "FixWidthTable up-to-date: #{ filename }"
@@ -31,11 +33,17 @@ class FixWidthTable
31
33
  end
32
34
  @value_size = @file.read(4).unpack("L").first
33
35
  @range = @file.read(1).unpack("C").first == 1
34
- @record_size = @value_size + (@range ? 12 : 4)
36
+ @record_size = @value_size + (@range ? 16 : 8)
37
+ @write = false
38
+
35
39
  @size = (File.size(@filename) - 5) / (@record_size)
36
40
  end
37
41
 
38
- @mask = "a#{value_size}"
42
+ @mask = "a#{@value_size}"
43
+ end
44
+
45
+ def write?
46
+ @write
39
47
  end
40
48
 
41
49
  def persistence_path
@@ -46,15 +54,14 @@ class FixWidthTable
46
54
  @filename=value
47
55
  end
48
56
 
49
- CONNECTIONS = {} unless defined? CONNECTIONS
50
57
  def self.get(filename, value_size = nil, range = nil, update = false)
51
58
  return self.new(filename, value_size, range, update) if filename == :memory
52
59
  case
53
- when (!File.exists?(filename) or update or not CONNECTIONS.include?(filename))
54
- CONNECTIONS[filename] = self.new(filename, value_size, range, update)
60
+ when (!File.exists?(filename) or update or not Persist::CONNECTIONS.include?(filename))
61
+ Persist::CONNECTIONS[filename] = self.new(filename, value_size, range, update)
55
62
  end
56
63
 
57
- CONNECTIONS[filename]
64
+ Persist::CONNECTIONS[filename]
58
65
  end
59
66
 
60
67
  def format(pos, value)
@@ -100,16 +107,19 @@ class FixWidthTable
100
107
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
101
108
  padding = @file.read(4).unpack("l").first+1
102
109
  txt = @file.read(value_size)
103
- txt.unpack(mask).first[0..-padding]
110
+ str = txt.unpack(mask).first
111
+ padding > 1 ? str[0..-padding] : str
104
112
  end
105
113
 
106
114
  def read(force = false)
107
115
  return if @filename == :memory
116
+ @write = false
108
117
  @file.close unless @file.closed?
109
118
  @file = File.open(filename, 'r:ASCII-8BIT')
110
119
  end
111
120
 
112
121
  def close
122
+ @write = false
113
123
  @file.close
114
124
  end
115
125
 
@@ -191,7 +201,9 @@ class FixWidthTable
191
201
 
192
202
  idx = 0 if idx < 0
193
203
 
194
- idx -= overlap(idx) unless overlap(idx).nil?
204
+ overlap = overlap(idx)
205
+
206
+ idx -= overlap unless overlap.nil?
195
207
 
196
208
  values = []
197
209
  l_start = pos(idx)
data/lib/rbbt/monitor.rb CHANGED
@@ -4,7 +4,7 @@ module Rbbt
4
4
 
5
5
  LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
6
6
  Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
7
- Rbbt.tmp.produce_locks.find_all
7
+ Rbbt.tmp.produce_locks.find_all + Rbbt.tmp.step_info_locks.find_all
8
8
 
9
9
  SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
10
10
 
@@ -2,10 +2,10 @@ class PackedIndex
2
2
  attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
3
3
 
4
4
  ELEMS = {
5
- "I" => ["q", 8],
6
5
  "i" => ["l", 4],
6
+ "I" => ["q", 8],
7
7
  "f" => ["f", 4],
8
- "F" => ["D", 8],
8
+ "F" => ["d", 8],
9
9
  }
10
10
 
11
11
  def self.process_mask(mask)
@@ -28,6 +28,12 @@ class PackedIndex
28
28
  [str, size]
29
29
  end
30
30
 
31
+ def size
32
+ @size ||= begin
33
+ (File.size(file) - offset) / item_size
34
+ end
35
+ end
36
+
31
37
  def initialize(file, write = false, pattern = nil)
32
38
  @file = file
33
39
  if write
@@ -44,10 +50,18 @@ class PackedIndex
44
50
  @mask = @stream.read(mask_length)
45
51
  @offset = @mask.length + 8
46
52
  end
47
- @nil_string = "[NIL]" + "-" * (@item_size - 5)
53
+ @nil_string = "NIL" << ("-" * (@item_size - 3))
54
+ end
55
+
56
+ def persistence_path
57
+ @file
58
+ end
59
+
60
+ def persistence_path=(value)
61
+ @file=value
48
62
  end
49
63
 
50
- def read
64
+ def read(force = false)
51
65
  close
52
66
  @stream = Open.open(file, :mode => 'rb')
53
67
  end
@@ -63,7 +77,7 @@ class PackedIndex
63
77
  def [](position)
64
78
  @stream.seek(position * item_size + offset)
65
79
  encoded = @stream.read(item_size)
66
- return nil if encoded == nil_string
80
+ return nil if encoded.nil? or encoded == nil_string
67
81
  encoded.unpack mask
68
82
  end
69
83
 
@@ -1,6 +1,7 @@
1
1
  require 'rbbt/persist/tsv/adapter'
2
2
 
3
3
  require 'rbbt/persist/tsv/fix_width_table'
4
+ require 'rbbt/persist/tsv/packed_index'
4
5
 
5
6
  begin
6
7
  require 'rbbt/persist/tsv/tokyocabinet'
@@ -65,6 +66,13 @@ module Persist
65
66
  else
66
67
  Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
67
68
  end
69
+ when 'pki'
70
+ pattern, pos_function = Misc.process_options options.dup, :pattern, :pos_function
71
+ if pos_function
72
+ Persist.open_pki(path, write, pattern, &pos_function)
73
+ else
74
+ Persist.open_pki(path, write, pattern)
75
+ end
68
76
  else
69
77
  Persist.open_tokyocabinet(path, write, serializer, type)
70
78
  end
@@ -113,7 +121,7 @@ module Persist
113
121
  end
114
122
  end
115
123
 
116
- FileUtils.rm path if File.exists? path
124
+ FileUtils.rm_rf path if File.exists? path
117
125
 
118
126
  Log.medium "TSV persistence creating: #{ path }"
119
127
 
@@ -81,7 +81,7 @@ module Persist
81
81
  end
82
82
 
83
83
  def size
84
- @size #+ metadata.keys.length
84
+ @size
85
85
  end
86
86
 
87
87
  def each
@@ -0,0 +1,101 @@
1
+ require 'rbbt/packed_index'
2
+
3
+ module Persist
4
+
5
+ module PKIAdapter
6
+ include Persist::TSVAdapter
7
+
8
+ attr_accessor :pos_function
9
+
10
+ def self.open(path, write, pattern, &pos_function)
11
+ db = CONNECTIONS[path] ||= PackedIndex.new(path, write, pattern)
12
+ db.extend Persist::PKIAdapter
13
+ db.persistence_path = path
14
+ db.pos_function = pos_function
15
+ db
16
+ end
17
+
18
+ def persistence_path=(value)
19
+ @persistence_path = value
20
+ @file = value
21
+ end
22
+
23
+ def metadata_file
24
+ @metadata_file ||= self.persistence_path + '.metadata'
25
+ end
26
+
27
+ def metadata
28
+ return {} unless File.exists? metadata_file
29
+ Open.open(metadata_file, :mode => "rb") do |f|
30
+ Marshal.load(f)
31
+ end
32
+ end
33
+
34
+ def set_metadata(k,v)
35
+ metadata = self.metadata
36
+ metadata[k] = v
37
+ Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
38
+ end
39
+
40
+ def [](key, clean = false)
41
+ if TSV::ENTRY_KEYS.include? key
42
+ metadata[key]
43
+ else
44
+ key = pos_function.call(key) if pos_function and not clean
45
+ res = super(key)
46
+ res.extend MultipleResult unless res.nil?
47
+ res
48
+ end
49
+ end
50
+
51
+ def value(pos)
52
+ self.send(:[], pos, true)
53
+ end
54
+
55
+ def []=(key, value)
56
+ if TSV::ENTRY_KEYS.include? key
57
+ set_metadata(key, value)
58
+ else
59
+ add key, value
60
+ end
61
+ end
62
+
63
+ def add(key, value)
64
+ key = pos_function.call(key) if pos_function
65
+ self.send(:<<, value)
66
+ end
67
+
68
+ def add_range_point(key, value)
69
+ key = pos_function.call(key) if pos_function
70
+ super(key, value)
71
+ end
72
+
73
+ def include?(i)
74
+ return true if Fixnum === i and i < size
75
+ return true if metadata.include? i
76
+ false
77
+ end
78
+
79
+ def each
80
+ size.times do |i|
81
+ yield i, value(i)
82
+ end
83
+ end
84
+
85
+ def keys
86
+ []
87
+ end
88
+ end
89
+
90
+ def self.open_pki(path, write, pattern, &pos_function)
91
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
92
+
93
+ database = Persist::PKIAdapter.open(path, write, pattern, &pos_function)
94
+
95
+ #TSV.setup database
96
+
97
+ #database.serializer = :clean
98
+
99
+ database
100
+ end
101
+ end
@@ -205,7 +205,9 @@ module Persist
205
205
  end
206
206
 
207
207
  def [](key, clean=false)
208
- v = database(key).send(:[], key)
208
+ database = database(key)
209
+ return nil if database.nil?
210
+ v = database.send(:[], key)
209
211
  end
210
212
 
211
213
  def <<(p)
@@ -240,9 +242,15 @@ module Persist
240
242
 
241
243
  database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
242
244
 
243
- unless serializer == :clean #or type.to_s == 'fwt'
245
+ if type.to_s == 'pki'
244
246
  TSV.setup database
245
- database.serializer = serializer if serializer
247
+ database.type = :list
248
+ database.serializer = :clean
249
+ else
250
+ if serializer != :clean
251
+ TSV.setup database
252
+ database.serializer = serializer if serializer
253
+ end
246
254
  end
247
255
 
248
256
  database
@@ -154,7 +154,7 @@ module Path
154
154
  begin
155
155
  self.produce
156
156
  File.exists? self.find
157
- rescue
157
+ rescue Exception
158
158
  false
159
159
  end
160
160
  end
@@ -49,6 +49,7 @@ module Rake
49
49
  end
50
50
  rescue
51
51
  Log.error "Error in rake: #{$!.message}"
52
+ Log.exception $!
52
53
  raise $!
53
54
  end
54
55
  }
@@ -524,19 +524,8 @@ module TSV
524
524
  end
525
525
  end
526
526
 
527
- def to_s(keys = nil, no_options = false)
528
- if FalseClass === keys or TrueClass === keys
529
- no_options = keys
530
- keys = nil
531
- end
532
-
533
- if keys == :sort
534
- with_unnamed do
535
- keys = self.keys.sort
536
- end
537
- end
538
-
539
- io = TSV::Dumper.stream self do |dumper|
527
+ def dumper_stream(keys = nil, no_options = false)
528
+ TSV::Dumper.stream self do |dumper|
540
529
  dumper.init unless no_options
541
530
  begin
542
531
  if keys
@@ -554,8 +543,24 @@ module TSV
554
543
  Log.exception $!
555
544
  raise $!
556
545
  end
546
+ dumper.close
547
+ end
548
+ end
549
+
550
+ def to_s(keys = nil, no_options = false)
551
+ if FalseClass === keys or TrueClass === keys
552
+ no_options = keys
553
+ keys = nil
554
+ end
555
+
556
+ if keys == :sort
557
+ with_unnamed do
558
+ keys = self.keys.sort
559
+ end
557
560
  end
558
561
 
562
+ io = dumper_stream(keys, no_options)
563
+
559
564
  str = ''
560
565
  while block = io.read(2048)
561
566
  str << block
@@ -4,12 +4,8 @@ module TSV
4
4
  def self.stream(options = {}, filename = nil, &block)
5
5
  dumper = TSV::Dumper.new options, filename
6
6
  Thread.new(Thread.current) do |parent|
7
- begin
8
- yield dumper
9
- dumper.close
10
- rescue Exception
11
- raise $!
12
- end
7
+ yield dumper
8
+ dumper.close
13
9
  end
14
10
  dumper.stream
15
11
  end