rbbt-util 5.13.37 → 5.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c1f84fdf126a66b0e8cf03cd96237dd4625c5d7
|
4
|
+
data.tar.gz: 6e9b9ed9bc339fedaa99f1a96d8f537692ebfbb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7879d8241fc10774522d28484ae1cf37bd935067038b1f9e370a2ff4af51adb9f7f183b43c7852694ce4e8b44282ab937b3744b7d8d35072a86e3ccbfb51c48e
|
7
|
+
data.tar.gz: 12ecd4f1f116f4125bb9f4159ab4556910b6e31d0fc3a351c94667758507296914445091575fc8946c975cc305da957949fa79c25913c990e477aed56506e0a0
|
data/bin/rbbt
CHANGED
@@ -41,9 +41,10 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
|
|
41
41
|
--profile #{Log.color :yellow, "Profile execution"}
|
42
42
|
--nocolor #{Log.color :yellow, "Disable colored output"}
|
43
43
|
--nobar #{Log.color :yellow, "Disable progress report"}
|
44
|
+
--nostream #{Log.color :yellow, "Disable persistance/job streaming"}
|
44
45
|
--locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
|
45
46
|
--dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
|
46
|
-
--no_lock_id
|
47
|
+
--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
|
47
48
|
EOF
|
48
49
|
|
49
50
|
|
@@ -70,6 +71,10 @@ if mem_dump = options.delete(:dump_mem)
|
|
70
71
|
Rbbt.dump_memory(mem_dump, Symbol)
|
71
72
|
end
|
72
73
|
|
74
|
+
if options.delete :stream
|
75
|
+
ENV["RBBT_NO_STREAM"] = "true"
|
76
|
+
end
|
77
|
+
|
73
78
|
if options.delete :nobar
|
74
79
|
ENV["RBBT_NO_PROGRESS"] = "true"
|
75
80
|
end
|
data/lib/rbbt/fix_width_table.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class FixWidthTable
|
2
2
|
|
3
|
-
attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
|
3
|
+
attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask, :write
|
4
4
|
def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
|
5
5
|
@filename = filename
|
6
6
|
|
@@ -9,6 +9,7 @@ class FixWidthTable
|
|
9
9
|
@value_size = value_size
|
10
10
|
@range = range
|
11
11
|
@record_size = @value_size + (@range ? 16 : 8)
|
12
|
+
@write = true
|
12
13
|
|
13
14
|
if %w(memory stringio).include? filename.to_s.downcase
|
14
15
|
@filename = :memory
|
@@ -21,6 +22,7 @@ class FixWidthTable
|
|
21
22
|
|
22
23
|
@file.write [value_size].pack("L")
|
23
24
|
@file.write [@range ? 1 : 0 ].pack("C")
|
25
|
+
|
24
26
|
@size = 0
|
25
27
|
else
|
26
28
|
Log.debug "FixWidthTable up-to-date: #{ filename }"
|
@@ -31,11 +33,17 @@ class FixWidthTable
|
|
31
33
|
end
|
32
34
|
@value_size = @file.read(4).unpack("L").first
|
33
35
|
@range = @file.read(1).unpack("C").first == 1
|
34
|
-
@record_size = @value_size + (@range ?
|
36
|
+
@record_size = @value_size + (@range ? 16 : 8)
|
37
|
+
@write = false
|
38
|
+
|
35
39
|
@size = (File.size(@filename) - 5) / (@record_size)
|
36
40
|
end
|
37
41
|
|
38
|
-
@mask = "a#{value_size}"
|
42
|
+
@mask = "a#{@value_size}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def write?
|
46
|
+
@write
|
39
47
|
end
|
40
48
|
|
41
49
|
def persistence_path
|
@@ -46,15 +54,14 @@ class FixWidthTable
|
|
46
54
|
@filename=value
|
47
55
|
end
|
48
56
|
|
49
|
-
CONNECTIONS = {} unless defined? CONNECTIONS
|
50
57
|
def self.get(filename, value_size = nil, range = nil, update = false)
|
51
58
|
return self.new(filename, value_size, range, update) if filename == :memory
|
52
59
|
case
|
53
|
-
when (!File.exists?(filename) or update or not CONNECTIONS.include?(filename))
|
54
|
-
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
60
|
+
when (!File.exists?(filename) or update or not Persist::CONNECTIONS.include?(filename))
|
61
|
+
Persist::CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
55
62
|
end
|
56
63
|
|
57
|
-
CONNECTIONS[filename]
|
64
|
+
Persist::CONNECTIONS[filename]
|
58
65
|
end
|
59
66
|
|
60
67
|
def format(pos, value)
|
@@ -100,16 +107,19 @@ class FixWidthTable
|
|
100
107
|
@file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
|
101
108
|
padding = @file.read(4).unpack("l").first+1
|
102
109
|
txt = @file.read(value_size)
|
103
|
-
txt.unpack(mask).first
|
110
|
+
str = txt.unpack(mask).first
|
111
|
+
padding > 1 ? str[0..-padding] : str
|
104
112
|
end
|
105
113
|
|
106
114
|
def read(force = false)
|
107
115
|
return if @filename == :memory
|
116
|
+
@write = false
|
108
117
|
@file.close unless @file.closed?
|
109
118
|
@file = File.open(filename, 'r:ASCII-8BIT')
|
110
119
|
end
|
111
120
|
|
112
121
|
def close
|
122
|
+
@write = false
|
113
123
|
@file.close
|
114
124
|
end
|
115
125
|
|
@@ -191,7 +201,9 @@ class FixWidthTable
|
|
191
201
|
|
192
202
|
idx = 0 if idx < 0
|
193
203
|
|
194
|
-
|
204
|
+
overlap = overlap(idx)
|
205
|
+
|
206
|
+
idx -= overlap unless overlap.nil?
|
195
207
|
|
196
208
|
values = []
|
197
209
|
l_start = pos(idx)
|
data/lib/rbbt/monitor.rb
CHANGED
@@ -4,7 +4,7 @@ module Rbbt
|
|
4
4
|
|
5
5
|
LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
|
6
6
|
Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
|
7
|
-
Rbbt.tmp.produce_locks.find_all
|
7
|
+
Rbbt.tmp.produce_locks.find_all + Rbbt.tmp.step_info_locks.find_all
|
8
8
|
|
9
9
|
SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
|
10
10
|
|
data/lib/rbbt/packed_index.rb
CHANGED
@@ -2,10 +2,10 @@ class PackedIndex
|
|
2
2
|
attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
|
3
3
|
|
4
4
|
ELEMS = {
|
5
|
-
"I" => ["q", 8],
|
6
5
|
"i" => ["l", 4],
|
6
|
+
"I" => ["q", 8],
|
7
7
|
"f" => ["f", 4],
|
8
|
-
"F" => ["
|
8
|
+
"F" => ["d", 8],
|
9
9
|
}
|
10
10
|
|
11
11
|
def self.process_mask(mask)
|
@@ -28,6 +28,12 @@ class PackedIndex
|
|
28
28
|
[str, size]
|
29
29
|
end
|
30
30
|
|
31
|
+
def size
|
32
|
+
@size ||= begin
|
33
|
+
(File.size(file) - offset) / item_size
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
31
37
|
def initialize(file, write = false, pattern = nil)
|
32
38
|
@file = file
|
33
39
|
if write
|
@@ -44,10 +50,18 @@ class PackedIndex
|
|
44
50
|
@mask = @stream.read(mask_length)
|
45
51
|
@offset = @mask.length + 8
|
46
52
|
end
|
47
|
-
@nil_string = "
|
53
|
+
@nil_string = "NIL" << ("-" * (@item_size - 3))
|
54
|
+
end
|
55
|
+
|
56
|
+
def persistence_path
|
57
|
+
@file
|
58
|
+
end
|
59
|
+
|
60
|
+
def persistence_path=(value)
|
61
|
+
@file=value
|
48
62
|
end
|
49
63
|
|
50
|
-
def read
|
64
|
+
def read(force = false)
|
51
65
|
close
|
52
66
|
@stream = Open.open(file, :mode => 'rb')
|
53
67
|
end
|
@@ -63,7 +77,7 @@ class PackedIndex
|
|
63
77
|
def [](position)
|
64
78
|
@stream.seek(position * item_size + offset)
|
65
79
|
encoded = @stream.read(item_size)
|
66
|
-
return nil if encoded == nil_string
|
80
|
+
return nil if encoded.nil? or encoded == nil_string
|
67
81
|
encoded.unpack mask
|
68
82
|
end
|
69
83
|
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rbbt/persist/tsv/adapter'
|
2
2
|
|
3
3
|
require 'rbbt/persist/tsv/fix_width_table'
|
4
|
+
require 'rbbt/persist/tsv/packed_index'
|
4
5
|
|
5
6
|
begin
|
6
7
|
require 'rbbt/persist/tsv/tokyocabinet'
|
@@ -65,6 +66,13 @@ module Persist
|
|
65
66
|
else
|
66
67
|
Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
|
67
68
|
end
|
69
|
+
when 'pki'
|
70
|
+
pattern, pos_function = Misc.process_options options.dup, :pattern, :pos_function
|
71
|
+
if pos_function
|
72
|
+
Persist.open_pki(path, write, pattern, &pos_function)
|
73
|
+
else
|
74
|
+
Persist.open_pki(path, write, pattern)
|
75
|
+
end
|
68
76
|
else
|
69
77
|
Persist.open_tokyocabinet(path, write, serializer, type)
|
70
78
|
end
|
@@ -113,7 +121,7 @@ module Persist
|
|
113
121
|
end
|
114
122
|
end
|
115
123
|
|
116
|
-
FileUtils.
|
124
|
+
FileUtils.rm_rf path if File.exists? path
|
117
125
|
|
118
126
|
Log.medium "TSV persistence creating: #{ path }"
|
119
127
|
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'rbbt/packed_index'
|
2
|
+
|
3
|
+
module Persist
|
4
|
+
|
5
|
+
module PKIAdapter
|
6
|
+
include Persist::TSVAdapter
|
7
|
+
|
8
|
+
attr_accessor :pos_function
|
9
|
+
|
10
|
+
def self.open(path, write, pattern, &pos_function)
|
11
|
+
db = CONNECTIONS[path] ||= PackedIndex.new(path, write, pattern)
|
12
|
+
db.extend Persist::PKIAdapter
|
13
|
+
db.persistence_path = path
|
14
|
+
db.pos_function = pos_function
|
15
|
+
db
|
16
|
+
end
|
17
|
+
|
18
|
+
def persistence_path=(value)
|
19
|
+
@persistence_path = value
|
20
|
+
@file = value
|
21
|
+
end
|
22
|
+
|
23
|
+
def metadata_file
|
24
|
+
@metadata_file ||= self.persistence_path + '.metadata'
|
25
|
+
end
|
26
|
+
|
27
|
+
def metadata
|
28
|
+
return {} unless File.exists? metadata_file
|
29
|
+
Open.open(metadata_file, :mode => "rb") do |f|
|
30
|
+
Marshal.load(f)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_metadata(k,v)
|
35
|
+
metadata = self.metadata
|
36
|
+
metadata[k] = v
|
37
|
+
Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
|
38
|
+
end
|
39
|
+
|
40
|
+
def [](key, clean = false)
|
41
|
+
if TSV::ENTRY_KEYS.include? key
|
42
|
+
metadata[key]
|
43
|
+
else
|
44
|
+
key = pos_function.call(key) if pos_function and not clean
|
45
|
+
res = super(key)
|
46
|
+
res.extend MultipleResult unless res.nil?
|
47
|
+
res
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def value(pos)
|
52
|
+
self.send(:[], pos, true)
|
53
|
+
end
|
54
|
+
|
55
|
+
def []=(key, value)
|
56
|
+
if TSV::ENTRY_KEYS.include? key
|
57
|
+
set_metadata(key, value)
|
58
|
+
else
|
59
|
+
add key, value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add(key, value)
|
64
|
+
key = pos_function.call(key) if pos_function
|
65
|
+
self.send(:<<, value)
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_range_point(key, value)
|
69
|
+
key = pos_function.call(key) if pos_function
|
70
|
+
super(key, value)
|
71
|
+
end
|
72
|
+
|
73
|
+
def include?(i)
|
74
|
+
return true if Fixnum === i and i < size
|
75
|
+
return true if metadata.include? i
|
76
|
+
false
|
77
|
+
end
|
78
|
+
|
79
|
+
def each
|
80
|
+
size.times do |i|
|
81
|
+
yield i, value(i)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def keys
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.open_pki(path, write, pattern, &pos_function)
|
91
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
92
|
+
|
93
|
+
database = Persist::PKIAdapter.open(path, write, pattern, &pos_function)
|
94
|
+
|
95
|
+
#TSV.setup database
|
96
|
+
|
97
|
+
#database.serializer = :clean
|
98
|
+
|
99
|
+
database
|
100
|
+
end
|
101
|
+
end
|
@@ -205,7 +205,9 @@ module Persist
|
|
205
205
|
end
|
206
206
|
|
207
207
|
def [](key, clean=false)
|
208
|
-
|
208
|
+
database = database(key)
|
209
|
+
return nil if database.nil?
|
210
|
+
v = database.send(:[], key)
|
209
211
|
end
|
210
212
|
|
211
213
|
def <<(p)
|
@@ -240,9 +242,15 @@ module Persist
|
|
240
242
|
|
241
243
|
database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
|
242
244
|
|
243
|
-
|
245
|
+
if type.to_s == 'pki'
|
244
246
|
TSV.setup database
|
245
|
-
database.
|
247
|
+
database.type = :list
|
248
|
+
database.serializer = :clean
|
249
|
+
else
|
250
|
+
if serializer != :clean
|
251
|
+
TSV.setup database
|
252
|
+
database.serializer = serializer if serializer
|
253
|
+
end
|
246
254
|
end
|
247
255
|
|
248
256
|
database
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/resource/rake.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -524,19 +524,8 @@ module TSV
|
|
524
524
|
end
|
525
525
|
end
|
526
526
|
|
527
|
-
def
|
528
|
-
|
529
|
-
no_options = keys
|
530
|
-
keys = nil
|
531
|
-
end
|
532
|
-
|
533
|
-
if keys == :sort
|
534
|
-
with_unnamed do
|
535
|
-
keys = self.keys.sort
|
536
|
-
end
|
537
|
-
end
|
538
|
-
|
539
|
-
io = TSV::Dumper.stream self do |dumper|
|
527
|
+
def dumper_stream(keys = nil, no_options = false)
|
528
|
+
TSV::Dumper.stream self do |dumper|
|
540
529
|
dumper.init unless no_options
|
541
530
|
begin
|
542
531
|
if keys
|
@@ -554,8 +543,24 @@ module TSV
|
|
554
543
|
Log.exception $!
|
555
544
|
raise $!
|
556
545
|
end
|
546
|
+
dumper.close
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def to_s(keys = nil, no_options = false)
|
551
|
+
if FalseClass === keys or TrueClass === keys
|
552
|
+
no_options = keys
|
553
|
+
keys = nil
|
554
|
+
end
|
555
|
+
|
556
|
+
if keys == :sort
|
557
|
+
with_unnamed do
|
558
|
+
keys = self.keys.sort
|
559
|
+
end
|
557
560
|
end
|
558
561
|
|
562
|
+
io = dumper_stream(keys, no_options)
|
563
|
+
|
559
564
|
str = ''
|
560
565
|
while block = io.read(2048)
|
561
566
|
str << block
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -4,12 +4,8 @@ module TSV
|
|
4
4
|
def self.stream(options = {}, filename = nil, &block)
|
5
5
|
dumper = TSV::Dumper.new options, filename
|
6
6
|
Thread.new(Thread.current) do |parent|
|
7
|
-
|
8
|
-
|
9
|
-
dumper.close
|
10
|
-
rescue Exception
|
11
|
-
raise $!
|
12
|
-
end
|
7
|
+
yield dumper
|
8
|
+
dumper.close
|
13
9
|
end
|
14
10
|
dumper.stream
|
15
11
|
end
|