scout-gear 7.3.0 → 8.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +44 -16
- data/Rakefile +6 -1
- data/VERSION +1 -1
- data/bin/scout +21 -7
- data/doc/lib/scout/path.md +35 -0
- data/doc/lib/scout/workflow/task.md +13 -0
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +24 -25
- data/lib/scout/concurrent_stream.rb +59 -39
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +10 -0
- data/lib/scout/log/color.rb +15 -12
- data/lib/scout/log/progress/report.rb +8 -6
- data/lib/scout/log/progress/util.rb +61 -54
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +115 -52
- data/lib/scout/meta_extension.rb +47 -6
- data/lib/scout/misc/digest.rb +12 -3
- data/lib/scout/misc/format.rb +24 -7
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +22 -0
- data/lib/scout/misc/system.rb +58 -0
- data/lib/scout/named_array.rb +73 -3
- data/lib/scout/offsite/ssh.rb +171 -0
- data/lib/scout/offsite/step.rb +83 -0
- data/lib/scout/offsite/sync.rb +55 -0
- data/lib/scout/offsite.rb +3 -0
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +9 -2
- data/lib/scout/open/remote.rb +16 -1
- data/lib/scout/open/stream.rb +146 -83
- data/lib/scout/open/util.rb +22 -3
- data/lib/scout/open.rb +5 -4
- data/lib/scout/path/find.rb +24 -11
- data/lib/scout/path/util.rb +40 -0
- data/lib/scout/persist/serialize.rb +19 -6
- data/lib/scout/persist.rb +29 -13
- data/lib/scout/resource/path.rb +57 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +12 -5
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +74 -46
- data/lib/scout/tsv/index.rb +85 -87
- data/lib/scout/tsv/open.rb +160 -85
- data/lib/scout/tsv/parser.rb +142 -80
- data/lib/scout/tsv/path.rb +1 -2
- data/lib/scout/tsv/persist/adapter.rb +15 -45
- data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -1
- data/lib/scout/tsv/persist.rb +4 -0
- data/lib/scout/tsv/stream.rb +204 -0
- data/lib/scout/tsv/transformer.rb +152 -0
- data/lib/scout/tsv/traverse.rb +96 -92
- data/lib/scout/tsv/util/filter.rb +9 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +78 -33
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +60 -11
- data/lib/scout/tsv.rb +34 -4
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +51 -20
- data/lib/scout/workflow/definition.rb +23 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/step/dependencies.rb +56 -10
- data/lib/scout/workflow/step/file.rb +5 -0
- data/lib/scout/workflow/step/info.rb +40 -7
- data/lib/scout/workflow/step/load.rb +1 -1
- data/lib/scout/workflow/step/provenance.rb +9 -7
- data/lib/scout/workflow/step/status.rb +43 -0
- data/lib/scout/workflow/step.rb +160 -49
- data/lib/scout/workflow/task/dependencies.rb +114 -0
- data/lib/scout/workflow/task/inputs.rb +40 -32
- data/lib/scout/workflow/task.rb +38 -102
- data/lib/scout/workflow/usage.rb +48 -18
- data/lib/scout/workflow.rb +4 -2
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +6 -0
- data/scout-gear.gemspec +52 -23
- data/scout_commands/doc +37 -0
- data/scout_commands/find +1 -0
- data/scout_commands/offsite +30 -0
- data/scout_commands/update +29 -0
- data/scout_commands/workflow/info +15 -3
- data/scout_commands/workflow/install +102 -0
- data/scout_commands/workflow/task +57 -9
- data/test/scout/offsite/test_ssh.rb +15 -0
- data/test/scout/offsite/test_step.rb +33 -0
- data/test/scout/offsite/test_sync.rb +36 -0
- data/test/scout/offsite/test_task.rb +0 -0
- data/test/scout/open/test_stream.rb +60 -58
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_path.rb +6 -0
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +24 -0
- data/test/scout/test_persist.rb +9 -2
- data/test/scout/test_tsv.rb +229 -2
- data/test/scout/test_work_queue.rb +65 -41
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +49 -3
- data/test/scout/tsv/test_open.rb +160 -2
- data/test/scout/tsv/test_parser.rb +33 -2
- data/test/scout/tsv/test_persist.rb +2 -0
- data/test/scout/tsv/test_stream.rb +200 -0
- data/test/scout/tsv/test_transformer.rb +120 -0
- data/test/scout/tsv/test_traverse.rb +88 -3
- data/test/scout/tsv/test_util.rb +1 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +25 -11
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
- data/test/scout/workflow/step/test_dependencies.rb +68 -0
- data/test/scout/workflow/step/test_info.rb +18 -0
- data/test/scout/workflow/step/test_status.rb +30 -0
- data/test/scout/workflow/task/test_dependencies.rb +355 -0
- data/test/scout/workflow/task/test_inputs.rb +67 -14
- data/test/scout/workflow/test_definition.rb +18 -0
- data/test/scout/workflow/test_documentation.rb +24 -0
- data/test/scout/workflow/test_step.rb +112 -3
- data/test/scout/workflow/test_task.rb +0 -151
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_scout.rb +9 -0
- metadata +100 -8
- data/scout_commands/workflow/task_old +0 -706
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -1,38 +1,14 @@
|
|
1
1
|
module TSV
|
2
2
|
class Dumper
|
3
|
-
def self.header_lines(key_field, fields, entry_hash = nil)
|
4
|
-
if Hash === entry_hash
|
5
|
-
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
6
|
-
preamble = entry_hash[:preamble]
|
7
|
-
header_hash = entry_hash[:header_hash]
|
8
|
-
end
|
9
|
-
|
10
|
-
header_hash = "#" if header_hash.nil?
|
11
|
-
|
12
|
-
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
13
|
-
|
14
|
-
str = ""
|
15
|
-
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
16
|
-
if fields
|
17
|
-
if fields.empty?
|
18
|
-
str << header_hash << (key_field || "ID").to_s << "\n"
|
19
|
-
else
|
20
|
-
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
str
|
25
|
-
end
|
26
|
-
|
27
3
|
def self.header(options={})
|
28
|
-
key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
|
29
|
-
:key_field, :fields, :sep, :header_hash, :preamble,
|
4
|
+
key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
|
5
|
+
:key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
|
30
6
|
:sep => "\t", :header_hash => "#", :preamble => true
|
31
7
|
|
32
|
-
if fields.nil?
|
8
|
+
if fields.nil?
|
33
9
|
fields_str = nil
|
34
10
|
else
|
35
|
-
fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
|
11
|
+
fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
|
36
12
|
end
|
37
13
|
|
38
14
|
if preamble && options.values.compact.any?
|
@@ -45,31 +21,64 @@ module TSV
|
|
45
21
|
end
|
46
22
|
|
47
23
|
|
48
|
-
attr_accessor :options
|
24
|
+
attr_accessor :options, :initialized, :type, :sep
|
49
25
|
def initialize(options = {})
|
26
|
+
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
50
27
|
@sep, @type = IndiferentHash.process_options options,
|
51
28
|
:sep, :type,
|
52
29
|
:sep => "\t", :type => :double
|
53
30
|
@options = options
|
31
|
+
@options[:type] = @type
|
54
32
|
@sout, @sin = Open.pipe
|
33
|
+
Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
|
34
|
+
@initialized = false
|
35
|
+
@mutex = Mutex.new
|
55
36
|
ConcurrentStream.setup(@sin, pair: @sout)
|
56
37
|
ConcurrentStream.setup(@sout, pair: @sin)
|
57
38
|
end
|
58
39
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
40
|
+
def key_field
|
41
|
+
@options[:key_field]
|
42
|
+
end
|
43
|
+
|
44
|
+
def fields
|
45
|
+
@options[:fields]
|
62
46
|
end
|
63
47
|
|
64
|
-
def
|
48
|
+
def key_field=(key_field)
|
49
|
+
@options[:key_field] = key_field
|
50
|
+
end
|
51
|
+
|
52
|
+
def fields=(fields)
|
53
|
+
@options[:fields] = fields
|
54
|
+
end
|
55
|
+
|
56
|
+
def all_fields
|
57
|
+
return nil if fields.nil?
|
58
|
+
[key_field] + fields
|
59
|
+
end
|
65
60
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@
|
71
|
-
|
72
|
-
|
61
|
+
|
62
|
+
def init(preamble: true)
|
63
|
+
header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
|
64
|
+
@mutex.synchronize do
|
65
|
+
@initialized = true
|
66
|
+
@sin.puts header if header and ! header.empty?
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def add(key, value)
|
71
|
+
@mutex.synchronize do
|
72
|
+
|
73
|
+
key = key.to_s unless String === key
|
74
|
+
case @type
|
75
|
+
when :single
|
76
|
+
@sin.puts key + @sep + value.to_s
|
77
|
+
when :list, :flat
|
78
|
+
@sin.puts key + @sep + value * @sep
|
79
|
+
when :double
|
80
|
+
@sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
|
81
|
+
end
|
73
82
|
end
|
74
83
|
end
|
75
84
|
|
@@ -85,15 +94,32 @@ module TSV
|
|
85
94
|
def abort(exception=nil)
|
86
95
|
@sin.abort(exception)
|
87
96
|
end
|
97
|
+
|
98
|
+
def tsv(*args)
|
99
|
+
TSV.open(stream, *args)
|
100
|
+
end
|
101
|
+
|
102
|
+
def fingerprint
|
103
|
+
"Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
104
|
+
end
|
105
|
+
|
106
|
+
def digest_str
|
107
|
+
fingerprint
|
108
|
+
end
|
109
|
+
|
110
|
+
def inspect
|
111
|
+
fingerprint
|
112
|
+
end
|
88
113
|
end
|
89
114
|
|
90
|
-
def
|
91
|
-
|
92
|
-
dumper.
|
115
|
+
def dumper_stream(options = {})
|
116
|
+
preamble = IndiferentHash.process_options options, :preamble, :preamble => true
|
117
|
+
dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
|
93
118
|
t = Thread.new do
|
94
119
|
begin
|
95
120
|
Thread.current.report_on_exception = true
|
96
121
|
Thread.current["name"] = "Dumper thread"
|
122
|
+
dumper.init(preamble: preamble)
|
97
123
|
self.each do |k,v|
|
98
124
|
dumper.add k, v
|
99
125
|
end
|
@@ -103,10 +129,12 @@ module TSV
|
|
103
129
|
end
|
104
130
|
end
|
105
131
|
Thread.pass until t["name"]
|
106
|
-
dumper.stream
|
132
|
+
s = dumper.stream
|
133
|
+
ConcurrentStream.setup(s, :threads => [t])
|
134
|
+
s
|
107
135
|
end
|
108
136
|
|
109
|
-
def to_s
|
110
|
-
|
137
|
+
def to_s(options = {})
|
138
|
+
dumper_stream(options).read
|
111
139
|
end
|
112
140
|
end
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
require_relative 'parser'
|
2
|
+
require_relative 'transformer'
|
2
3
|
require_relative 'persist/fix_width_table'
|
3
4
|
module TSV
|
4
|
-
def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
|
5
|
-
persist, type = IndiferentHash.process_options kwargs,
|
6
|
-
:persist, :persist_type,
|
5
|
+
def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
|
6
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
7
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
7
8
|
:persist => false, :persist_type => "HDB"
|
8
9
|
kwargs.delete :type
|
9
10
|
|
10
|
-
|
11
|
+
fields = :all if fields.nil?
|
12
|
+
|
13
|
+
Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
|
11
14
|
if filename
|
12
15
|
index = ScoutCabinet.open(filename, true, type)
|
13
16
|
TSV.setup(index, :type => :single)
|
@@ -16,11 +19,19 @@ module TSV
|
|
16
19
|
index = TSV.setup({}, :type => :single)
|
17
20
|
end
|
18
21
|
|
19
|
-
|
22
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
23
|
+
|
24
|
+
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
25
|
+
Log.low log_msg
|
26
|
+
bar = log_msg if TrueClass === bar
|
27
|
+
|
20
28
|
if order
|
21
29
|
tmp_index = {}
|
22
|
-
|
30
|
+
include_self = fields == :all || (Array === fields) && fields.include?(target)
|
31
|
+
target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, unnamed: true, bar: bar, **kwargs do |k,values|
|
32
|
+
tmp_index[k] ||= [[k]] if include_self
|
23
33
|
values.each_with_index do |list,i|
|
34
|
+
i += 1 if include_self
|
24
35
|
list.each do |e|
|
25
36
|
tmp_index[e] ||= []
|
26
37
|
tmp_index[e][i] ||= []
|
@@ -31,16 +42,24 @@ module TSV
|
|
31
42
|
tmp_index.each do |e,list|
|
32
43
|
index[e] = list.flatten.compact.uniq.first
|
33
44
|
end
|
45
|
+
|
46
|
+
index.key_field = source_field_names * ","
|
47
|
+
index.fields = [target_key_field]
|
48
|
+
|
49
|
+
tmp_index = {}
|
50
|
+
|
34
51
|
else
|
35
|
-
|
52
|
+
target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
|
36
53
|
values.each do |e|
|
37
54
|
index[e] = k unless index.include?(e)
|
38
55
|
end
|
39
56
|
end
|
57
|
+
|
58
|
+
index.key_field = source_field_names * ","
|
59
|
+
index.fields = [target_key_field]
|
40
60
|
end
|
41
61
|
|
42
|
-
|
43
|
-
index.fields = [dummy_data.key_field]
|
62
|
+
|
44
63
|
index
|
45
64
|
end
|
46
65
|
end
|
@@ -49,17 +68,26 @@ module TSV
|
|
49
68
|
TSV.index(self, *args, **kwargs, &block)
|
50
69
|
end
|
51
70
|
|
52
|
-
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
|
53
|
-
persist, type = IndiferentHash.process_options kwargs,
|
54
|
-
:persist, :persist_type,
|
71
|
+
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
|
72
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
73
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
55
74
|
:persist => false, :persist_type => :fwt
|
56
75
|
kwargs.delete :type
|
76
|
+
kwargs[:unnamed] = true
|
57
77
|
|
58
|
-
Persist.persist(tsv_file, type,
|
78
|
+
Persist.persist(tsv_file, type,
|
79
|
+
:persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
|
80
|
+
:other_options => kwargs) do |filename|
|
81
|
+
|
82
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
83
|
+
|
84
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
|
85
|
+
Log.low log_msg
|
86
|
+
bar = log_msg if TrueClass === bar
|
59
87
|
|
60
88
|
max_key_size = 0
|
61
89
|
index_data = []
|
62
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
|
90
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
|
63
91
|
key_size = key.length
|
64
92
|
max_key_size = key_size if key_size > max_key_size
|
65
93
|
|
@@ -81,81 +109,51 @@ module TSV
|
|
81
109
|
end
|
82
110
|
end
|
83
111
|
|
112
|
+
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
|
113
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
114
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
115
|
+
:persist => false, :persist_type => :fwt
|
116
|
+
kwargs.delete :type
|
117
|
+
kwargs[:unnamed] = true
|
118
|
+
|
119
|
+
Persist.persist(tsv_file, type,
|
120
|
+
:persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
|
121
|
+
:other_options => kwargs) do |filename|
|
122
|
+
|
123
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
124
|
+
|
125
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{pos_field}"
|
126
|
+
Log.low log_msg
|
127
|
+
bar = log_msg if TrueClass === bar
|
128
|
+
|
129
|
+
max_key_size = 0
|
130
|
+
index_data = []
|
131
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, bar: bar, **kwargs do |key, pos|
|
132
|
+
key_size = key.length
|
133
|
+
max_key_size = key_size if key_size > max_key_size
|
134
|
+
|
135
|
+
if Array === pos
|
136
|
+
pos.zip(end_pos).each do |p|
|
137
|
+
index_pos << [key, p]
|
138
|
+
end
|
139
|
+
else
|
140
|
+
index_data << [key, pos]
|
141
|
+
end
|
142
|
+
end
|
143
|
+
|
144
|
+
filename = :memory if filename.nil?
|
145
|
+
index = FixWidthTable.get(filename, max_key_size, false)
|
146
|
+
index.add_point index_data
|
147
|
+
index.read
|
148
|
+
index
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
84
152
|
def range_index(*args, **kwargs, &block)
|
85
153
|
TSV.range_index(self, *args, **kwargs, &block)
|
86
154
|
end
|
87
155
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
# end_field ||= "End"
|
92
|
-
|
93
|
-
# options = Misc.add_defaults options,
|
94
|
-
# :persist => false, :persist_file => nil, :persist_update => false
|
95
|
-
|
96
|
-
# persist_options = Misc.pull_keys options, :persist
|
97
|
-
# persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
|
98
|
-
|
99
|
-
# Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
|
100
|
-
# max_key_size = 0
|
101
|
-
# index_data = []
|
102
|
-
# with_unnamed do
|
103
|
-
# with_monitor :desc => "Creating Index Data", :step => 10000 do
|
104
|
-
# through :key, [start_field, end_field] do |key, values|
|
105
|
-
# key_size = key.length
|
106
|
-
# max_key_size = key_size if key_size > max_key_size
|
107
|
-
|
108
|
-
# start_pos, end_pos = values
|
109
|
-
# if Array === start_pos
|
110
|
-
# start_pos.zip(end_pos).each do |s,e|
|
111
|
-
# index_data << [key, [s.to_i, e.to_i]]
|
112
|
-
# end
|
113
|
-
# else
|
114
|
-
# index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
115
|
-
# end
|
116
|
-
# end
|
117
|
-
# end
|
118
|
-
# end
|
119
|
-
|
120
|
-
# index = FixWidthTable.get(:memory, max_key_size, true)
|
121
|
-
# index.add_range index_data
|
122
|
-
# index.read
|
123
|
-
# index
|
124
|
-
# end
|
125
|
-
#end
|
126
|
-
|
127
|
-
#def self.range_index(file, start_field = nil, end_field = nil, options = {})
|
128
|
-
# start_field ||= "Start"
|
129
|
-
# end_field ||= "End"
|
130
|
-
|
131
|
-
# data_options = Misc.pull_keys options, :data
|
132
|
-
# filename = case
|
133
|
-
# when (String === file or Path === file)
|
134
|
-
# file
|
135
|
-
# when file.respond_to?(:filename)
|
136
|
-
# file.filename
|
137
|
-
# else
|
138
|
-
# file.object_id.to_s
|
139
|
-
# end
|
140
|
-
# persist_options = Misc.pull_keys options, :persist
|
141
|
-
# persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
|
142
|
-
|
143
|
-
# filters = Misc.process_options options, :filters
|
144
|
-
|
145
|
-
# if filters
|
146
|
-
# filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
|
147
|
-
# end
|
148
|
-
|
149
|
-
# Persist.persist(filename, :fwt, persist_options) do
|
150
|
-
# tsv = TSV.open(file, data_options)
|
151
|
-
# if filters
|
152
|
-
# tsv.filter
|
153
|
-
# filters.each do |match, value|
|
154
|
-
# tsv.add_filter match, value
|
155
|
-
# end
|
156
|
-
# end
|
157
|
-
|
158
|
-
# tsv.range_index(start_field, end_field, options)
|
159
|
-
# end
|
160
|
-
#end
|
156
|
+
def pos_index(*args, **kwargs, &block)
|
157
|
+
TSV.pos_index(self, *args, **kwargs, &block)
|
158
|
+
end
|
161
159
|
end
|
data/lib/scout/tsv/open.rb
CHANGED
@@ -1,61 +1,56 @@
|
|
1
1
|
require_relative '../open'
|
2
|
+
require_relative '../work_queue'
|
3
|
+
|
4
|
+
module MultipleResult
|
5
|
+
def self.setup(obj)
|
6
|
+
obj.extend MultipleResult
|
7
|
+
obj
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
2
11
|
module Open
|
3
12
|
def self.traverse_add(into, res)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
into
|
13
|
+
if Array === res && MultipleResult === res
|
14
|
+
res.each do |_res|
|
15
|
+
traverse_add into, _res
|
16
|
+
end
|
17
|
+
else
|
18
|
+
case into
|
19
|
+
when defined?(TSV::Dumper) && TSV::Dumper
|
20
|
+
into.add *res
|
21
|
+
when TSV, Hash
|
22
|
+
key, value = res
|
23
|
+
if into.type == :double
|
24
|
+
into.zip_new key, value, insitu: false
|
25
|
+
else
|
26
|
+
into[key] = value
|
27
|
+
end
|
28
|
+
when Array, Set
|
29
|
+
into << res
|
30
|
+
when IO, StringIO
|
31
|
+
into.puts res
|
32
|
+
end
|
10
33
|
end
|
11
34
|
end
|
12
35
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
|
24
|
-
# when IO
|
25
|
-
# if into && (IO === into || into.respond_to?(:stream) )
|
26
|
-
# into_thread = Thread.new do
|
27
|
-
# Thread.current.report_on_exception = false
|
28
|
-
# Thread.current["name"] = "Traverse into"
|
29
|
-
# TSV.parse obj, **options do |k,v|
|
30
|
-
# begin
|
31
|
-
# res = block.call k, v
|
32
|
-
# traverse_add into, res
|
33
|
-
# rescue
|
34
|
-
# into.abort $!
|
35
|
-
# end
|
36
|
-
# nil
|
37
|
-
# end
|
38
|
-
# into.close if into.respond_to?(:close)
|
39
|
-
# end
|
40
|
-
# Thread.pass until into_thread
|
41
|
-
# into
|
42
|
-
# else
|
43
|
-
# TSV.parse obj, **options do |k,v|
|
44
|
-
# block.call k, v
|
45
|
-
# nil
|
46
|
-
# end
|
47
|
-
# end
|
48
|
-
# end
|
49
|
-
#end
|
50
|
-
|
51
|
-
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
|
36
|
+
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
+
cpus = nil if cpus.to_i == 1
|
38
|
+
|
39
|
+
if into == :stream
|
40
|
+
sout, sin = Open.pipe
|
41
|
+
ConcurrentStream.setup(sout, :pair => sin)
|
42
|
+
ConcurrentStream.setup(sin, :pair => sout)
|
43
|
+
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
|
+
return sout
|
45
|
+
end
|
52
46
|
|
53
47
|
if into || bar
|
54
48
|
orig_callback = callback if callback
|
55
|
-
bar = Log::ProgressBar.get_obj_bar(
|
49
|
+
bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
|
50
|
+
bar.init if bar
|
56
51
|
callback = proc do |res|
|
57
52
|
bar.tick if bar
|
58
|
-
traverse_add into, res if into
|
53
|
+
traverse_add into, res if into && ! res.nil?
|
59
54
|
orig_callback.call res if orig_callback
|
60
55
|
end
|
61
56
|
|
@@ -65,59 +60,115 @@ module Open
|
|
65
60
|
Thread.current["name"] = "Traverse into"
|
66
61
|
error = false
|
67
62
|
begin
|
68
|
-
self.traverse(obj, callback: callback, **options, &block)
|
69
|
-
into.close if into.respond_to?(:close)
|
63
|
+
self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
|
64
|
+
into.close if ! keep_open && into.respond_to?(:close)
|
70
65
|
bar.remove if bar
|
71
66
|
rescue Exception
|
72
67
|
into.abort($!) if into.respond_to?(:abort)
|
73
68
|
bar.remove($!) if bar
|
74
69
|
end
|
75
70
|
end
|
76
|
-
|
71
|
+
|
72
|
+
Thread.pass until into_thread["name"]
|
73
|
+
|
74
|
+
case into
|
75
|
+
when IO
|
76
|
+
ConcurrentStream.setup into, :threads => into_thread
|
77
|
+
when TSV::Dumper
|
78
|
+
ConcurrentStream.setup into.stream, :threads => into_thread
|
79
|
+
end
|
77
80
|
return into
|
78
81
|
end
|
79
82
|
end
|
80
83
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
nil
|
94
|
-
end
|
95
|
-
when String
|
96
|
-
f = Open.open(obj)
|
97
|
-
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
98
|
-
when Step
|
99
|
-
raise obj.exception if obj.error?
|
100
|
-
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
101
|
-
when IO
|
102
|
-
TSV.parse obj, **options do |k,v|
|
103
|
-
res = block.call k, v
|
104
|
-
callback.call res if callback
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
else
|
108
|
-
TSV.parse obj, **options do |k,v|
|
109
|
-
res = block.call k, v
|
110
|
-
callback.call res if callback
|
111
|
-
nil
|
84
|
+
if cpus
|
85
|
+
queue = WorkQueue.new cpus do |args|
|
86
|
+
block.call *args
|
87
|
+
end
|
88
|
+
|
89
|
+
queue.process do |res|
|
90
|
+
callback.call res
|
91
|
+
end
|
92
|
+
|
93
|
+
begin
|
94
|
+
self.traverse(obj, **options) do |*args|
|
95
|
+
queue.write args
|
112
96
|
end
|
97
|
+
|
98
|
+
queue.close
|
99
|
+
|
100
|
+
queue.join(false)
|
101
|
+
|
102
|
+
bar.remove if bar
|
103
|
+
return into
|
104
|
+
rescue Exception
|
105
|
+
bar.remove($!) if bar
|
106
|
+
queue.abort
|
107
|
+
raise $!
|
108
|
+
ensure
|
109
|
+
queue.clean
|
113
110
|
end
|
111
|
+
end
|
112
|
+
|
113
|
+
begin
|
114
|
+
res = case obj
|
115
|
+
when TSV
|
116
|
+
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
117
|
+
res = block.call(k, v, f)
|
118
|
+
callback.call res if callback
|
119
|
+
nil
|
120
|
+
end
|
121
|
+
when Array
|
122
|
+
obj.each do |line|
|
123
|
+
res = block.call(line)
|
124
|
+
callback.call res if callback
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
when String
|
128
|
+
obj = obj.produce_and_find if Path === obj
|
129
|
+
f = Open.open(obj)
|
130
|
+
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
131
|
+
when Step
|
132
|
+
raise obj.exception if obj.error?
|
133
|
+
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
134
|
+
when IO
|
135
|
+
if options[:type] == :array || options[:type] == :line
|
136
|
+
Log.low "Traverse stream by lines #{Log.fingerprint obj}"
|
137
|
+
while line = obj.gets
|
138
|
+
line.strip!
|
139
|
+
res = block.call(line)
|
140
|
+
callback.call res if callback
|
141
|
+
end
|
142
|
+
else
|
143
|
+
Log.low "Traverse stream with parser #{Log.fingerprint obj}"
|
144
|
+
parser = TSV::Parser.new obj
|
145
|
+
parser.traverse **options do |k,v,f|
|
146
|
+
res = block.call k,v,f
|
147
|
+
callback.call res if callback
|
148
|
+
nil
|
149
|
+
end
|
150
|
+
end
|
151
|
+
when TSV::Parser
|
152
|
+
obj.traverse **options do |k,v,f|
|
153
|
+
res = block.call k, v, f
|
154
|
+
callback.call res if callback
|
155
|
+
nil
|
156
|
+
end
|
157
|
+
else
|
158
|
+
TSV.parse obj, **options do |k,v|
|
159
|
+
res = block.call k, v
|
160
|
+
callback.call res if callback
|
161
|
+
nil
|
162
|
+
end
|
163
|
+
end
|
114
164
|
bar.remove if bar
|
115
|
-
rescue
|
116
|
-
|
117
|
-
|
165
|
+
rescue Exception => exception
|
166
|
+
exception = obj.stream_exception if (ConcurrentStream === obj) && obj.stream_exception
|
167
|
+
bar.error if bar
|
168
|
+
raise exception
|
118
169
|
end
|
119
170
|
|
120
|
-
into
|
171
|
+
into || res
|
121
172
|
end
|
122
173
|
end
|
123
174
|
|
@@ -125,4 +176,28 @@ module TSV
|
|
125
176
|
def self.traverse(*args, **kwargs, &block)
|
126
177
|
Open.traverse(*args, **kwargs, &block)
|
127
178
|
end
|
179
|
+
|
180
|
+
def self.process_stream(stream, header_hash: "#", &block)
|
181
|
+
sout = Open.open_pipe do |sin|
|
182
|
+
while line = stream.gets
|
183
|
+
break unless line.start_with?(header_hash)
|
184
|
+
sin.puts line
|
185
|
+
end
|
186
|
+
yield sin, line
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
def self.collapse_stream(stream, *args, **kwargs, &block)
|
191
|
+
stream = stream.stream if stream.respond_to?(:stream)
|
192
|
+
self.process_stream(stream) do |sin, line|
|
193
|
+
collapsed = Open.collapse_stream(stream, line: line)
|
194
|
+
Open.consume_stream(collapsed, false, sin)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
|
198
|
+
def collapse_stream(*args, **kwargs, &block)
|
199
|
+
TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
|
200
|
+
end
|
201
|
+
|
202
|
+
|
128
203
|
end
|