rbbt-util 5.13.37 → 5.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +6 -1
- data/lib/rbbt/fix_width_table.rb +21 -9
- data/lib/rbbt/monitor.rb +1 -1
- data/lib/rbbt/packed_index.rb +19 -5
- data/lib/rbbt/persist/tsv.rb +9 -1
- data/lib/rbbt/persist/tsv/fix_width_table.rb +1 -1
- data/lib/rbbt/persist/tsv/packed_index.rb +101 -0
- data/lib/rbbt/persist/tsv/sharder.rb +11 -3
- data/lib/rbbt/resource/path.rb +1 -1
- data/lib/rbbt/resource/rake.rb +1 -0
- data/lib/rbbt/tsv/accessor.rb +18 -13
- data/lib/rbbt/tsv/dumper.rb +2 -6
- data/lib/rbbt/tsv/manipulate.rb +6 -4
- data/lib/rbbt/tsv/parallel/traverse.rb +7 -6
- data/lib/rbbt/tsv/parser.rb +20 -16
- data/lib/rbbt/tsv/stream.rb +87 -76
- data/lib/rbbt/tsv/util.rb +8 -3
- data/lib/rbbt/util/R.rb +1 -1
- data/lib/rbbt/util/cmd.rb +0 -3
- data/lib/rbbt/util/concurrency/processes.rb +3 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +0 -1
- data/lib/rbbt/util/log.rb +45 -18
- data/lib/rbbt/util/log/progress/report.rb +3 -2
- data/lib/rbbt/util/log/progress/util.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +12 -6
- data/lib/rbbt/util/misc/development.rb +10 -4
- data/lib/rbbt/util/misc/lock.rb +1 -1
- data/lib/rbbt/util/misc/omics.rb +2 -0
- data/lib/rbbt/util/misc/pipes.rb +90 -87
- data/lib/rbbt/workflow.rb +6 -2
- data/lib/rbbt/workflow/accessor.rb +70 -40
- data/lib/rbbt/workflow/definition.rb +23 -0
- data/lib/rbbt/workflow/step.rb +15 -3
- data/lib/rbbt/workflow/step/run.rb +18 -13
- data/lib/rbbt/workflow/usage.rb +3 -0
- data/share/Rlib/util.R +1 -1
- data/share/rbbt_commands/tsv/get +0 -2
- data/share/rbbt_commands/tsv/info +13 -5
- data/share/rbbt_commands/tsv/subset +1 -1
- data/share/rbbt_commands/workflow/info +32 -0
- data/share/rbbt_commands/workflow/task +0 -2
- data/test/rbbt/persist/tsv/test_sharder.rb +44 -0
- data/test/rbbt/test_fix_width_table.rb +1 -0
- data/test/rbbt/test_packed_index.rb +3 -0
- data/test/rbbt/tsv/test_stream.rb +55 -2
- data/test/rbbt/util/misc/test_pipes.rb +8 -6
- data/test/rbbt/workflow/test_step.rb +7 -6
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2c1f84fdf126a66b0e8cf03cd96237dd4625c5d7
|
4
|
+
data.tar.gz: 6e9b9ed9bc339fedaa99f1a96d8f537692ebfbb6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7879d8241fc10774522d28484ae1cf37bd935067038b1f9e370a2ff4af51adb9f7f183b43c7852694ce4e8b44282ab937b3744b7d8d35072a86e3ccbfb51c48e
|
7
|
+
data.tar.gz: 12ecd4f1f116f4125bb9f4159ab4556910b6e31d0fc3a351c94667758507296914445091575fc8946c975cc305da957949fa79c25913c990e477aed56506e0a0
|
data/bin/rbbt
CHANGED
@@ -41,9 +41,10 @@ $ rbbt <command> <subcommand> ... -a --arg1 --arg2='value' --arg3 'another-value
|
|
41
41
|
--profile #{Log.color :yellow, "Profile execution"}
|
42
42
|
--nocolor #{Log.color :yellow, "Disable colored output"}
|
43
43
|
--nobar #{Log.color :yellow, "Disable progress report"}
|
44
|
+
--nostream #{Log.color :yellow, "Disable persistance/job streaming"}
|
44
45
|
--locate_file #{Log.color :yellow, "Report the location of the script instead of executing it"}
|
45
46
|
--dump_mem* #{Log.color :yellow, "Dump strings in memory each second into file"}
|
46
|
-
--no_lock_id
|
47
|
+
--no_lock_id #{Log.color :yellow, "Do not track lockfiles with ids (for high-througput and high-concurrency"}
|
47
48
|
EOF
|
48
49
|
|
49
50
|
|
@@ -70,6 +71,10 @@ if mem_dump = options.delete(:dump_mem)
|
|
70
71
|
Rbbt.dump_memory(mem_dump, Symbol)
|
71
72
|
end
|
72
73
|
|
74
|
+
if options.delete :stream
|
75
|
+
ENV["RBBT_NO_STREAM"] = "true"
|
76
|
+
end
|
77
|
+
|
73
78
|
if options.delete :nobar
|
74
79
|
ENV["RBBT_NO_PROGRESS"] = "true"
|
75
80
|
end
|
data/lib/rbbt/fix_width_table.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
class FixWidthTable
|
2
2
|
|
3
|
-
attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask
|
3
|
+
attr_accessor :filename, :file, :value_size, :record_size, :range, :size, :mask, :write
|
4
4
|
def initialize(filename, value_size = nil, range = nil, update = false, in_memory = true)
|
5
5
|
@filename = filename
|
6
6
|
|
@@ -9,6 +9,7 @@ class FixWidthTable
|
|
9
9
|
@value_size = value_size
|
10
10
|
@range = range
|
11
11
|
@record_size = @value_size + (@range ? 16 : 8)
|
12
|
+
@write = true
|
12
13
|
|
13
14
|
if %w(memory stringio).include? filename.to_s.downcase
|
14
15
|
@filename = :memory
|
@@ -21,6 +22,7 @@ class FixWidthTable
|
|
21
22
|
|
22
23
|
@file.write [value_size].pack("L")
|
23
24
|
@file.write [@range ? 1 : 0 ].pack("C")
|
25
|
+
|
24
26
|
@size = 0
|
25
27
|
else
|
26
28
|
Log.debug "FixWidthTable up-to-date: #{ filename }"
|
@@ -31,11 +33,17 @@ class FixWidthTable
|
|
31
33
|
end
|
32
34
|
@value_size = @file.read(4).unpack("L").first
|
33
35
|
@range = @file.read(1).unpack("C").first == 1
|
34
|
-
@record_size = @value_size + (@range ?
|
36
|
+
@record_size = @value_size + (@range ? 16 : 8)
|
37
|
+
@write = false
|
38
|
+
|
35
39
|
@size = (File.size(@filename) - 5) / (@record_size)
|
36
40
|
end
|
37
41
|
|
38
|
-
@mask = "a#{value_size}"
|
42
|
+
@mask = "a#{@value_size}"
|
43
|
+
end
|
44
|
+
|
45
|
+
def write?
|
46
|
+
@write
|
39
47
|
end
|
40
48
|
|
41
49
|
def persistence_path
|
@@ -46,15 +54,14 @@ class FixWidthTable
|
|
46
54
|
@filename=value
|
47
55
|
end
|
48
56
|
|
49
|
-
CONNECTIONS = {} unless defined? CONNECTIONS
|
50
57
|
def self.get(filename, value_size = nil, range = nil, update = false)
|
51
58
|
return self.new(filename, value_size, range, update) if filename == :memory
|
52
59
|
case
|
53
|
-
when (!File.exists?(filename) or update or not CONNECTIONS.include?(filename))
|
54
|
-
CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
60
|
+
when (!File.exists?(filename) or update or not Persist::CONNECTIONS.include?(filename))
|
61
|
+
Persist::CONNECTIONS[filename] = self.new(filename, value_size, range, update)
|
55
62
|
end
|
56
63
|
|
57
|
-
CONNECTIONS[filename]
|
64
|
+
Persist::CONNECTIONS[filename]
|
58
65
|
end
|
59
66
|
|
60
67
|
def format(pos, value)
|
@@ -100,16 +107,19 @@ class FixWidthTable
|
|
100
107
|
@file.seek((range ? 17 : 9 ) + (record_size) * index, IO::SEEK_SET)
|
101
108
|
padding = @file.read(4).unpack("l").first+1
|
102
109
|
txt = @file.read(value_size)
|
103
|
-
txt.unpack(mask).first
|
110
|
+
str = txt.unpack(mask).first
|
111
|
+
padding > 1 ? str[0..-padding] : str
|
104
112
|
end
|
105
113
|
|
106
114
|
def read(force = false)
|
107
115
|
return if @filename == :memory
|
116
|
+
@write = false
|
108
117
|
@file.close unless @file.closed?
|
109
118
|
@file = File.open(filename, 'r:ASCII-8BIT')
|
110
119
|
end
|
111
120
|
|
112
121
|
def close
|
122
|
+
@write = false
|
113
123
|
@file.close
|
114
124
|
end
|
115
125
|
|
@@ -191,7 +201,9 @@ class FixWidthTable
|
|
191
201
|
|
192
202
|
idx = 0 if idx < 0
|
193
203
|
|
194
|
-
|
204
|
+
overlap = overlap(idx)
|
205
|
+
|
206
|
+
idx -= overlap unless overlap.nil?
|
195
207
|
|
196
208
|
values = []
|
197
209
|
l_start = pos(idx)
|
data/lib/rbbt/monitor.rb
CHANGED
@@ -4,7 +4,7 @@ module Rbbt
|
|
4
4
|
|
5
5
|
LOCK_DIRS = Rbbt.share.find_all + Rbbt.var.cache.persistence.find_all + Rbbt.var.jobs.find_all +
|
6
6
|
Rbbt.tmp.tsv_open_locks.find_all + Rbbt.tmp.persist_locks.find_all + Rbbt.tmp.sensiblewrite_lock_dir.find_all +
|
7
|
-
Rbbt.tmp.produce_locks.find_all
|
7
|
+
Rbbt.tmp.produce_locks.find_all + Rbbt.tmp.step_info_locks.find_all
|
8
8
|
|
9
9
|
SENSIBLE_WRITE_DIRS = Misc.sensiblewrite_dir.find_all
|
10
10
|
|
data/lib/rbbt/packed_index.rb
CHANGED
@@ -2,10 +2,10 @@ class PackedIndex
|
|
2
2
|
attr_accessor :file, :mask, :mask_length, :offset, :item_size, :stream, :nil_string
|
3
3
|
|
4
4
|
ELEMS = {
|
5
|
-
"I" => ["q", 8],
|
6
5
|
"i" => ["l", 4],
|
6
|
+
"I" => ["q", 8],
|
7
7
|
"f" => ["f", 4],
|
8
|
-
"F" => ["
|
8
|
+
"F" => ["d", 8],
|
9
9
|
}
|
10
10
|
|
11
11
|
def self.process_mask(mask)
|
@@ -28,6 +28,12 @@ class PackedIndex
|
|
28
28
|
[str, size]
|
29
29
|
end
|
30
30
|
|
31
|
+
def size
|
32
|
+
@size ||= begin
|
33
|
+
(File.size(file) - offset) / item_size
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
31
37
|
def initialize(file, write = false, pattern = nil)
|
32
38
|
@file = file
|
33
39
|
if write
|
@@ -44,10 +50,18 @@ class PackedIndex
|
|
44
50
|
@mask = @stream.read(mask_length)
|
45
51
|
@offset = @mask.length + 8
|
46
52
|
end
|
47
|
-
@nil_string = "
|
53
|
+
@nil_string = "NIL" << ("-" * (@item_size - 3))
|
54
|
+
end
|
55
|
+
|
56
|
+
def persistence_path
|
57
|
+
@file
|
58
|
+
end
|
59
|
+
|
60
|
+
def persistence_path=(value)
|
61
|
+
@file=value
|
48
62
|
end
|
49
63
|
|
50
|
-
def read
|
64
|
+
def read(force = false)
|
51
65
|
close
|
52
66
|
@stream = Open.open(file, :mode => 'rb')
|
53
67
|
end
|
@@ -63,7 +77,7 @@ class PackedIndex
|
|
63
77
|
def [](position)
|
64
78
|
@stream.seek(position * item_size + offset)
|
65
79
|
encoded = @stream.read(item_size)
|
66
|
-
return nil if encoded == nil_string
|
80
|
+
return nil if encoded.nil? or encoded == nil_string
|
67
81
|
encoded.unpack mask
|
68
82
|
end
|
69
83
|
|
data/lib/rbbt/persist/tsv.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require 'rbbt/persist/tsv/adapter'
|
2
2
|
|
3
3
|
require 'rbbt/persist/tsv/fix_width_table'
|
4
|
+
require 'rbbt/persist/tsv/packed_index'
|
4
5
|
|
5
6
|
begin
|
6
7
|
require 'rbbt/persist/tsv/tokyocabinet'
|
@@ -65,6 +66,13 @@ module Persist
|
|
65
66
|
else
|
66
67
|
Persist.open_fwt(path, value_size, range, serializer, update, in_memory)
|
67
68
|
end
|
69
|
+
when 'pki'
|
70
|
+
pattern, pos_function = Misc.process_options options.dup, :pattern, :pos_function
|
71
|
+
if pos_function
|
72
|
+
Persist.open_pki(path, write, pattern, &pos_function)
|
73
|
+
else
|
74
|
+
Persist.open_pki(path, write, pattern)
|
75
|
+
end
|
68
76
|
else
|
69
77
|
Persist.open_tokyocabinet(path, write, serializer, type)
|
70
78
|
end
|
@@ -113,7 +121,7 @@ module Persist
|
|
113
121
|
end
|
114
122
|
end
|
115
123
|
|
116
|
-
FileUtils.
|
124
|
+
FileUtils.rm_rf path if File.exists? path
|
117
125
|
|
118
126
|
Log.medium "TSV persistence creating: #{ path }"
|
119
127
|
|
@@ -0,0 +1,101 @@
|
|
1
|
+
require 'rbbt/packed_index'
|
2
|
+
|
3
|
+
module Persist
|
4
|
+
|
5
|
+
module PKIAdapter
|
6
|
+
include Persist::TSVAdapter
|
7
|
+
|
8
|
+
attr_accessor :pos_function
|
9
|
+
|
10
|
+
def self.open(path, write, pattern, &pos_function)
|
11
|
+
db = CONNECTIONS[path] ||= PackedIndex.new(path, write, pattern)
|
12
|
+
db.extend Persist::PKIAdapter
|
13
|
+
db.persistence_path = path
|
14
|
+
db.pos_function = pos_function
|
15
|
+
db
|
16
|
+
end
|
17
|
+
|
18
|
+
def persistence_path=(value)
|
19
|
+
@persistence_path = value
|
20
|
+
@file = value
|
21
|
+
end
|
22
|
+
|
23
|
+
def metadata_file
|
24
|
+
@metadata_file ||= self.persistence_path + '.metadata'
|
25
|
+
end
|
26
|
+
|
27
|
+
def metadata
|
28
|
+
return {} unless File.exists? metadata_file
|
29
|
+
Open.open(metadata_file, :mode => "rb") do |f|
|
30
|
+
Marshal.load(f)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def set_metadata(k,v)
|
35
|
+
metadata = self.metadata
|
36
|
+
metadata[k] = v
|
37
|
+
Misc.sensiblewrite(metadata_file, Marshal.dump(metadata))
|
38
|
+
end
|
39
|
+
|
40
|
+
def [](key, clean = false)
|
41
|
+
if TSV::ENTRY_KEYS.include? key
|
42
|
+
metadata[key]
|
43
|
+
else
|
44
|
+
key = pos_function.call(key) if pos_function and not clean
|
45
|
+
res = super(key)
|
46
|
+
res.extend MultipleResult unless res.nil?
|
47
|
+
res
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
def value(pos)
|
52
|
+
self.send(:[], pos, true)
|
53
|
+
end
|
54
|
+
|
55
|
+
def []=(key, value)
|
56
|
+
if TSV::ENTRY_KEYS.include? key
|
57
|
+
set_metadata(key, value)
|
58
|
+
else
|
59
|
+
add key, value
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def add(key, value)
|
64
|
+
key = pos_function.call(key) if pos_function
|
65
|
+
self.send(:<<, value)
|
66
|
+
end
|
67
|
+
|
68
|
+
def add_range_point(key, value)
|
69
|
+
key = pos_function.call(key) if pos_function
|
70
|
+
super(key, value)
|
71
|
+
end
|
72
|
+
|
73
|
+
def include?(i)
|
74
|
+
return true if Fixnum === i and i < size
|
75
|
+
return true if metadata.include? i
|
76
|
+
false
|
77
|
+
end
|
78
|
+
|
79
|
+
def each
|
80
|
+
size.times do |i|
|
81
|
+
yield i, value(i)
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def keys
|
86
|
+
[]
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
def self.open_pki(path, write, pattern, &pos_function)
|
91
|
+
FileUtils.mkdir_p File.dirname(path) unless File.exists?(File.dirname(path))
|
92
|
+
|
93
|
+
database = Persist::PKIAdapter.open(path, write, pattern, &pos_function)
|
94
|
+
|
95
|
+
#TSV.setup database
|
96
|
+
|
97
|
+
#database.serializer = :clean
|
98
|
+
|
99
|
+
database
|
100
|
+
end
|
101
|
+
end
|
@@ -205,7 +205,9 @@ module Persist
|
|
205
205
|
end
|
206
206
|
|
207
207
|
def [](key, clean=false)
|
208
|
-
|
208
|
+
database = database(key)
|
209
|
+
return nil if database.nil?
|
210
|
+
v = database.send(:[], key)
|
209
211
|
end
|
210
212
|
|
211
213
|
def <<(p)
|
@@ -240,9 +242,15 @@ module Persist
|
|
240
242
|
|
241
243
|
database = Persist::SharderAdapter.open(path, write, type, options, &shard_function)
|
242
244
|
|
243
|
-
|
245
|
+
if type.to_s == 'pki'
|
244
246
|
TSV.setup database
|
245
|
-
database.
|
247
|
+
database.type = :list
|
248
|
+
database.serializer = :clean
|
249
|
+
else
|
250
|
+
if serializer != :clean
|
251
|
+
TSV.setup database
|
252
|
+
database.serializer = serializer if serializer
|
253
|
+
end
|
246
254
|
end
|
247
255
|
|
248
256
|
database
|
data/lib/rbbt/resource/path.rb
CHANGED
data/lib/rbbt/resource/rake.rb
CHANGED
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -524,19 +524,8 @@ module TSV
|
|
524
524
|
end
|
525
525
|
end
|
526
526
|
|
527
|
-
def
|
528
|
-
|
529
|
-
no_options = keys
|
530
|
-
keys = nil
|
531
|
-
end
|
532
|
-
|
533
|
-
if keys == :sort
|
534
|
-
with_unnamed do
|
535
|
-
keys = self.keys.sort
|
536
|
-
end
|
537
|
-
end
|
538
|
-
|
539
|
-
io = TSV::Dumper.stream self do |dumper|
|
527
|
+
def dumper_stream(keys = nil, no_options = false)
|
528
|
+
TSV::Dumper.stream self do |dumper|
|
540
529
|
dumper.init unless no_options
|
541
530
|
begin
|
542
531
|
if keys
|
@@ -554,8 +543,24 @@ module TSV
|
|
554
543
|
Log.exception $!
|
555
544
|
raise $!
|
556
545
|
end
|
546
|
+
dumper.close
|
547
|
+
end
|
548
|
+
end
|
549
|
+
|
550
|
+
def to_s(keys = nil, no_options = false)
|
551
|
+
if FalseClass === keys or TrueClass === keys
|
552
|
+
no_options = keys
|
553
|
+
keys = nil
|
554
|
+
end
|
555
|
+
|
556
|
+
if keys == :sort
|
557
|
+
with_unnamed do
|
558
|
+
keys = self.keys.sort
|
559
|
+
end
|
557
560
|
end
|
558
561
|
|
562
|
+
io = dumper_stream(keys, no_options)
|
563
|
+
|
559
564
|
str = ''
|
560
565
|
while block = io.read(2048)
|
561
566
|
str << block
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -4,12 +4,8 @@ module TSV
|
|
4
4
|
def self.stream(options = {}, filename = nil, &block)
|
5
5
|
dumper = TSV::Dumper.new options, filename
|
6
6
|
Thread.new(Thread.current) do |parent|
|
7
|
-
|
8
|
-
|
9
|
-
dumper.close
|
10
|
-
rescue Exception
|
11
|
-
raise $!
|
12
|
-
end
|
7
|
+
yield dumper
|
8
|
+
dumper.close
|
13
9
|
end
|
14
10
|
dumper.stream
|
15
11
|
end
|