rbbt-util 5.13.37 → 5.14.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +6 -1
  3. data/lib/rbbt/fix_width_table.rb +21 -9
  4. data/lib/rbbt/monitor.rb +1 -1
  5. data/lib/rbbt/packed_index.rb +19 -5
  6. data/lib/rbbt/persist/tsv.rb +9 -1
  7. data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
  8. data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
  9. data/lib/rbbt/persist/tsv/sharder.rb +11 -3
  10. data/lib/rbbt/resource/path.rb +1 -1
  11. data/lib/rbbt/resource/rake.rb +1 -0
  12. data/lib/rbbt/tsv/accessor.rb +18 -13
  13. data/lib/rbbt/tsv/dumper.rb +2 -6
  14. data/lib/rbbt/tsv/manipulate.rb +6 -4
  15. data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
  16. data/lib/rbbt/tsv/parser.rb +20 -16
  17. data/lib/rbbt/tsv/stream.rb +87 -76
  18. data/lib/rbbt/tsv/util.rb +8 -3
  19. data/lib/rbbt/util/R.rb +1 -1
  20. data/lib/rbbt/util/cmd.rb +0 -3
  21. data/lib/rbbt/util/concurrency/processes.rb +3 -0
  22. data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
  23. data/lib/rbbt/util/log.rb +45 -18
  24. data/lib/rbbt/util/log/progress/report.rb +3 -2
  25. data/lib/rbbt/util/log/progress/util.rb +1 -1
  26. data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
  27. data/lib/rbbt/util/misc/development.rb +10 -4
  28. data/lib/rbbt/util/misc/lock.rb +1 -1
  29. data/lib/rbbt/util/misc/omics.rb +2 -0
  30. data/lib/rbbt/util/misc/pipes.rb +90 -87
  31. data/lib/rbbt/workflow.rb +6 -2
  32. data/lib/rbbt/workflow/accessor.rb +70 -40
  33. data/lib/rbbt/workflow/definition.rb +23 -0
  34. data/lib/rbbt/workflow/step.rb +15 -3
  35. data/lib/rbbt/workflow/step/run.rb +18 -13
  36. data/lib/rbbt/workflow/usage.rb +3 -0
  37. data/share/Rlib/util.R +1 -1
  38. data/share/rbbt_commands/tsv/get +0 -2
  39. data/share/rbbt_commands/tsv/info +13 -5
  40. data/share/rbbt_commands/tsv/subset +1 -1
  41. data/share/rbbt_commands/workflow/info +32 -0
  42. data/share/rbbt_commands/workflow/task +0 -2
  43. data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
  44. data/test/rbbt/test_fix_width_table.rb +1 -0
  45. data/test/rbbt/test_packed_index.rb +3 -0
  46. data/test/rbbt/tsv/test_stream.rb +55 -2
  47. data/test/rbbt/util/misc/test_pipes.rb +8 -6
  48. data/test/rbbt/workflow/test_step.rb +7 -6
  49. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 26df367cce98f5b14b985bd311d4b6594dcb62a9
4
- data.tar.gz: 1931a495ac7eb8eb17dcfa0f154be8c9d61e8b7f
3
+ metadata.gz: 2c1f84fdf126a66b0e8cf03cd96237dd4625c5d7
4
+ data.tar.gz: 6e9b9ed9bc339fedaa99f1a96d8f537692ebfbb6
5
5
  SHA512:
6
- metadata.gz: e71fee09c427fdf3faf326b75897dc6d070ec605bc94b3785d8cacf88f2bd3fa1fdd8d017e5e06d5e48cff48960cb8e7debb337b3a4eeb3361d2fd3735b2b1f3
7
- data.tar.gz: 14a1b01824edd6bfa2a6ead19828454ea57b1c3846eb1f332a0b15faee9bf5019fdda715732ef57b1326ce3870d8c2f8d8c6c9f2faf6e70218ceea76f373436d
6
+ metadata.gz: 7879d8241fc10774522d28484ae1cf37bd935067038b1f9e370a2ff4af51adb9f7f183b43c7852694ce4e8b44282ab937b3744b7d8d35072a86e3ccbfb51c48e
7
+ data.tar.gz: 12ecd4f1f116f4125bb9f4159ab4556910b6e31d0fc3a351c94667758507296914445091575fc8946c975cc305da957949fa79c25913c990e477aed56506e0a0
data/bin/rbbt CHANGED
@@ -41,9 +41,10 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
41
41
  --profile #{Log.color :yellow, "Profile execution"}
42
42
  --nocolor #{Log.color :yellow, "Disable colored output"}
43
43
  --nobar #{Log.color :yellow, "Disable progress report"}
44
+ --nostream #{Log.color :yellow, "Disable persistance/job streaming"}
44
45
  --locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
45
46
  --dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
46
- --no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
47
+ --no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
47
48
  EOF
48
49
 
49
50
 
@@ -70,6 +71,10 @@ if mem_dump = options.delete(:dump_mem)
70
71
  Rbbt.dump_memory(mem_dump, Symbol)
71
72
  end
72
73
 
74
+ if options.delete :stream
75
+ ENV["RBBT_NO_STREAM"] = "true"
76
+ end
77
+
73
78
  if options.delete :nobar
74
79
  ENV["RBBT_NO_PROGRESS"] = "true"
75
80
  end
@@ -1,6 +1,6 @@
1
1
  class FixWidthTable
2
2
 
3
- attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
3
+ attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask, :write
4
4
  def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
5
5
  @filename = filename
6
6
 
@@ -9,6 +9,7 @@ class FixWidthTable
9
9
  @value_size = value_size
10
10
  @range = range
11
11
  @record_size = @value_size + (@range ? 16 : 8)
12
+ @write = true
12
13
 
13
14
  if %w(memory stringio).include? filename.to_s.downcase
14
15
  @filename = :memory
@@ -21,6 +22,7 @@ class FixWidthTable
21
22
 
22
23
  @file.write [value_size].pack("L")
23
24
  @file.write [@range ? 1 : 0 ].pack("C")
25
+
24
26
  @size = 0
25
27
  else
26
28
  Log.debug "FixWidthTable up-to-date: #{ filename }"
@@ -31,11 +33,17 @@ class FixWidthTable
31
33
  end
32
34
  @value_size = @file.read(4).unpack("L").first
33
35
  @range = @file.read(1).unpack("C").first == 1
34
- @record_size = @value_size + (@range ? 12 : 4)
36
+ @record_size = @value_size + (@range ? 16 : 8)
37
+ @write = false
38
+
35
39
  @size = (File.size(@filename) - 5) / (@record_size)
36
40
  end
37
41
 
38
- @mask = "a#{value_size}"
42
+ @mask = "a#{@value_size}"
43
+ end
44
+
45
+ def write?
46
+ @write
39
47
  end
40
48
 
41
49
  def persistence_path
@@ -46,15 +54,14 @@ class FixWidthTable
46
54
  @filename=value
47
55
  end
48
56
 
49
- CONNECTIONS = {} unless defined? CONNECTIONS
50
57
  def self.get(filename, value_size = nil, range = nil, update = false)
51
58
  return self.new(filename, value_size, range, update) if filename == :memory
52
59
  case
53
- when (!File.exists?(filename) or update or not CONNECTIONS.include?(filename))
54
- CONNECTIONS[filename] = self.new(filename, value_size, range, update)
60
+ when (!File.exists?(filename) or update or not Persist::CONNECTIONS.include?(filename))
61
+ Persist::CONNECTIONS[filename] = self.new(filename, value_size, range, update)
55
62
  end
56
63
 
57
- CONNECTIONS[filename]
64
+ Persist::CONNECTIONS[filename]
58
65
  end
59
66
 
60
67
  def format(pos, value)
@@ -100,16 +107,19 @@ class FixWidthTable
100
107
  @file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
101
108
  padding = @file.read(4).unpack("l").first+1
102
109
  txt = @file.read(value_size)
103
- txt.unpack(mask).first[0..-padding]
110
+ str = txt.unpack(mask).first
111
+ padding > 1 ? str[0..-padding] : str
104
112
  end
105
113
 
106
114
  def read(force = false)
107
115
  return if @filename == :memory
116
+ @write = false
108
117
  @file.close unless @file.closed?
109
118
  @file = File.open(filename, 'r:ASCII-8BIT')
110
119
  end
111
120
 
112
121
  def close
122
+ @write = false
113
123
  @file.close
114
124
  end
115
125
 
@@ -191,7 +201,9 @@ class FixWidthTable
191
201
 
192
202
  idx = 0 if idx < 0
193
203
 
194
- idx -= overlap(idx) unless overlap(idx).nil?
204
+ overlap = overlap(idx)
205
+
206
+ idx -= overlap unless overlap.nil?
195
207
 
196
208
  values = []
197
209
  l_start = pos(idx)
data/lib/rbbt/monitor.rb CHANGED
@@ -4,7 +4,7 @@ module Rbbt
4
4
 
5
5
  LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
6
6
  Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
7
- Rbbt.tmp.produce_locks.find_all
7
+ Rbbt.tmp.produce_locks.find_all + Rbbt.tmp.step_info_locks.find_all
8
8
 
9
9
  SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
10
10
 
@@ -2,10 +2,10 @@ class PackedIndex
2
2
  attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
3
3
 
4
4
  ELEMS = {
5
- "I" => ["q", 8],
6
5
  "i" => ["l", 4],
6
+ "I" => ["q", 8],
7
7
  "f" => ["f", 4],
8
- "F" => ["D", 8],
8
+ "F" => ["d", 8],
9
9
  }
10
10
 
11
11
  def self.process_mask(mask)
@@ -28,6 +28,12 @@ class PackedIndex
28
28
  [str, size]
29
29
  end
30
30
 
31
+ def size
32
+ @size ||= begin
33
+ (File.size(file) - offset) / item_size
34
+ end
35
+ end
36
+
31
37
  def initialize(file, write = false, pattern = nil)
32
38
  @file = file
33
39
  if write
@@ -44,10 +50,18 @@ class PackedIndex
44
50
  @mask = @stream.read(mask_length)
45
51
  @offset = @mask.length + 8
46
52
  end
47
- @nil_string = "[NIL]" + "-" * (@item_size - 5)
53
+ @nil_string = "NIL" << ("-" * (@item_size - 3))
54
+ end
55
+
56
+ def persistence_path
57
+ @file
58
+ end
59
+
60
+ def persistence_path=(value)
61
+ @file=value
48
62
  end
49
63
 
50
- def read
64
+ def read(force = false)
51
65
  close
52
66
  @stream = Open.open(file, :mode => 'rb')
53
67
  end
@@ -63,7 +77,7 @@ class PackedIndex
63
77
  def [](position)
64
78
  @stream.seek(position * item_size + offset)
65
79
  encoded = @stream.read(item_size)
66
- return nil if encoded == nil_string
80
+ return nil if encoded.nil? or encoded == nil_string
67
81
  encoded.unpack mask
68
82
  end
69
83
 
@@ -1,6 +1,7 @@
1
1
  require 'rbbt/persist/tsv/adapter'
2
2
 
3
3
  require 'rbbt/persist/tsv/fix_width_table'
4
+ require 'rbbt/persist/tsv/packed_index'
4
5
 
5
6
  begin
6
7
  require 'rbbt/persist/tsv/tokyocabinet'
@@ -65,6 +66,13 @@ module Persist
65
66
  else
66
67
  Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
67
68
  end
69
+ when 'pki'
70
+ pattern, pos_function = Misc.process_options options.dup, :pattern, :pos_function
71
+ if pos_function
72
+ Persist.open_pki(path, write, pattern, &pos_function)
73
+ else
74
+ Persist.open_pki(path, write, pattern)
75
+ end
68
76
  else
69
77
  Persist.open_tokyocabinet(path, write, serializer, type)
70
78
  end
@@ -113,7 +121,7 @@ module Persist
113
121
  end
114
122
  end
115
123
 
116
- FileUtils.rm path if File.exists? path
124
+ FileUtils.rm_rf path if File.exists? path
117
125
 
118
126
  Log.medium "TSV persistence creating: #{ path }"
119
127
 
@@ -81,7 +81,7 @@ module Persist
81
81
  end
82
82
 
83
83
  def size
84
- @size #+ metadata.keys.length
84
+ @size
85
85
  end
86
86
 
87
87
  def each
@@ -0,0 +1,101 @@
1
+ require 'rbbt/packed_index'
2
+
3
+ module Persist
4
+
5
+ module PKIAdapter
6
+ include Persist::TSVAdapter
7
+
8
+ attr_accessor :pos_function
9
+
10
+ def self.open(path, write, pattern, &pos_function)
11
+ db = CONNECTIONS[path] ||= PackedIndex.new(path, write, pattern)
12
+ db.extend Persist::PKIAdapter
13
+ db.persistence_path = path
14
+ db.pos_function = pos_function
15
+ db
16
+ end
17
+
18
+ def persistence_path=(value)
19
+ @persistence_path = value
20
+ @file = value
21
+ end
22
+
23
+ def metadata_file
24
+ @metadata_file ||= self.persistence_path + '.metadata'
25
+ end
26
+
27
+ def metadata
28
+ return {} unless File.exists? metadata_file
29
+ Open.open(metadata_file, :mode => "rb") do |f|
30
+ Marshal.load(f)
31
+ end
32
+ end
33
+
34
+ def set_metadata(k,v)
35
+ metadata = self.metadata
36
+ metadata[k] = v
37
+ Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
38
+ end
39
+
40
+ def [](key, clean = false)
41
+ if TSV::ENTRY_KEYS.include? key
42
+ metadata[key]
43
+ else
44
+ key = pos_function.call(key) if pos_function and not clean
45
+ res = super(key)
46
+ res.extend MultipleResult unless res.nil?
47
+ res
48
+ end
49
+ end
50
+
51
+ def value(pos)
52
+ self.send(:[], pos, true)
53
+ end
54
+
55
+ def []=(key, value)
56
+ if TSV::ENTRY_KEYS.include? key
57
+ set_metadata(key, value)
58
+ else
59
+ add key, value
60
+ end
61
+ end
62
+
63
+ def add(key, value)
64
+ key = pos_function.call(key) if pos_function
65
+ self.send(:<<, value)
66
+ end
67
+
68
+ def add_range_point(key, value)
69
+ key = pos_function.call(key) if pos_function
70
+ super(key, value)
71
+ end
72
+
73
+ def include?(i)
74
+ return true if Fixnum === i and i < size
75
+ return true if metadata.include? i
76
+ false
77
+ end
78
+
79
+ def each
80
+ size.times do |i|
81
+ yield i, value(i)
82
+ end
83
+ end
84
+
85
+ def keys
86
+ []
87
+ end
88
+ end
89
+
90
+ def self.open_pki(path, write, pattern, &pos_function)
91
+ FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
92
+
93
+ database = Persist::PKIAdapter.open(path, write, pattern, &pos_function)
94
+
95
+ #TSV.setup database
96
+
97
+ #database.serializer = :clean
98
+
99
+ database
100
+ end
101
+ end
@@ -205,7 +205,9 @@ module Persist
205
205
  end
206
206
 
207
207
  def [](key, clean=false)
208
- v = database(key).send(:[], key)
208
+ database = database(key)
209
+ return nil if database.nil?
210
+ v = database.send(:[], key)
209
211
  end
210
212
 
211
213
  def <<(p)
@@ -240,9 +242,15 @@ module Persist
240
242
 
241
243
  database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
242
244
 
243
- unless serializer == :clean #or type.to_s == 'fwt'
245
+ if type.to_s == 'pki'
244
246
  TSV.setup database
245
- database.serializer = serializer if serializer
247
+ database.type = :list
248
+ database.serializer = :clean
249
+ else
250
+ if serializer != :clean
251
+ TSV.setup database
252
+ database.serializer = serializer if serializer
253
+ end
246
254
  end
247
255
 
248
256
  database
@@ -154,7 +154,7 @@ module Path
154
154
  begin
155
155
  self.produce
156
156
  File.exists? self.find
157
- rescue
157
+ rescue Exception
158
158
  false
159
159
  end
160
160
  end
@@ -49,6 +49,7 @@ module Rake
49
49
  end
50
50
  rescue
51
51
  Log.error "Error in rake: #{$!.message}"
52
+ Log.exception $!
52
53
  raise $!
53
54
  end
54
55
  }
@@ -524,19 +524,8 @@ module TSV
524
524
  end
525
525
  end
526
526
 
527
- def to_s(keys = nil, no_options = false)
528
- if FalseClass === keys or TrueClass === keys
529
- no_options = keys
530
- keys = nil
531
- end
532
-
533
- if keys == :sort
534
- with_unnamed do
535
- keys = self.keys.sort
536
- end
537
- end
538
-
539
- io = TSV::Dumper.stream self do |dumper|
527
+ def dumper_stream(keys = nil, no_options = false)
528
+ TSV::Dumper.stream self do |dumper|
540
529
  dumper.init unless no_options
541
530
  begin
542
531
  if keys
@@ -554,8 +543,24 @@ module TSV
554
543
  Log.exception $!
555
544
  raise $!
556
545
  end
546
+ dumper.close
547
+ end
548
+ end
549
+
550
+ def to_s(keys = nil, no_options = false)
551
+ if FalseClass === keys or TrueClass === keys
552
+ no_options = keys
553
+ keys = nil
554
+ end
555
+
556
+ if keys == :sort
557
+ with_unnamed do
558
+ keys = self.keys.sort
559
+ end
557
560
  end
558
561
 
562
+ io = dumper_stream(keys, no_options)
563
+
559
564
  str = ''
560
565
  while block = io.read(2048)
561
566
  str << block
@@ -4,12 +4,8 @@ module TSV
4
4
  def self.stream(options = {}, filename = nil, &block)
5
5
  dumper = TSV::Dumper.new options, filename
6
6
  Thread.new(Thread.current) do |parent|
7
- begin
8
- yield dumper
9
- dumper.close
10
- rescue Exception
11
- raise $!
12
- end
7
+ yield dumper
8
+ dumper.close
13
9
  end
14
10
  dumper.stream
15
11
  end