scout-gear 8.0.0 → 8.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +26 -9
- data/Rakefile +6 -1
- data/VERSION +1 -1
- data/bin/scout +15 -4
- data/doc/lib/scout/path.md +35 -0
- data/doc/lib/scout/workflow/task.md +13 -0
- data/lib/scout/cmd.rb +23 -24
- data/lib/scout/concurrent_stream.rb +36 -19
- data/lib/scout/exceptions.rb +10 -0
- data/lib/scout/log/color.rb +11 -11
- data/lib/scout/log/progress/report.rb +7 -5
- data/lib/scout/log/progress/util.rb +3 -0
- data/lib/scout/log/trap.rb +3 -3
- data/lib/scout/log.rb +64 -36
- data/lib/scout/meta_extension.rb +34 -0
- data/lib/scout/misc/digest.rb +11 -2
- data/lib/scout/misc/format.rb +12 -7
- data/lib/scout/misc/monitor.rb +11 -0
- data/lib/scout/misc/system.rb +48 -0
- data/lib/scout/named_array.rb +8 -0
- data/lib/scout/offsite/ssh.rb +171 -0
- data/lib/scout/offsite/step.rb +83 -0
- data/lib/scout/offsite/sync.rb +55 -0
- data/lib/scout/offsite.rb +3 -0
- data/lib/scout/open/lock.rb +5 -24
- data/lib/scout/open/remote.rb +12 -1
- data/lib/scout/open/stream.rb +110 -122
- data/lib/scout/open/util.rb +9 -0
- data/lib/scout/open.rb +5 -4
- data/lib/scout/path/find.rb +15 -10
- data/lib/scout/path/util.rb +5 -0
- data/lib/scout/persist/serialize.rb +3 -3
- data/lib/scout/persist.rb +1 -1
- data/lib/scout/resource/path.rb +4 -0
- data/lib/scout/resource/util.rb +10 -4
- data/lib/scout/tsv/dumper.rb +2 -0
- data/lib/scout/tsv/index.rb +28 -86
- data/lib/scout/tsv/open.rb +35 -14
- data/lib/scout/tsv/parser.rb +9 -2
- data/lib/scout/tsv/persist/tokyocabinet.rb +2 -0
- data/lib/scout/tsv/stream.rb +204 -0
- data/lib/scout/tsv/transformer.rb +11 -0
- data/lib/scout/tsv.rb +9 -2
- data/lib/scout/work_queue/worker.rb +2 -2
- data/lib/scout/work_queue.rb +36 -12
- data/lib/scout/workflow/definition.rb +2 -1
- data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/step/dependencies.rb +37 -11
- data/lib/scout/workflow/step/file.rb +5 -0
- data/lib/scout/workflow/step/info.rb +5 -3
- data/lib/scout/workflow/step/load.rb +1 -1
- data/lib/scout/workflow/step/provenance.rb +1 -0
- data/lib/scout/workflow/step/status.rb +6 -8
- data/lib/scout/workflow/step.rb +75 -30
- data/lib/scout/workflow/task/dependencies.rb +114 -0
- data/lib/scout/workflow/task/inputs.rb +27 -13
- data/lib/scout/workflow/task.rb +9 -108
- data/lib/scout/workflow/usage.rb +40 -12
- data/lib/scout/workflow.rb +4 -2
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +6 -0
- data/scout-gear.gemspec +32 -7
- data/scout_commands/doc +37 -0
- data/scout_commands/find +1 -0
- data/scout_commands/offsite +30 -0
- data/scout_commands/update +29 -0
- data/scout_commands/workflow/info +15 -3
- data/scout_commands/workflow/install +102 -0
- data/scout_commands/workflow/task +26 -5
- data/test/scout/offsite/test_ssh.rb +15 -0
- data/test/scout/offsite/test_step.rb +33 -0
- data/test/scout/offsite/test_sync.rb +36 -0
- data/test/scout/offsite/test_task.rb +0 -0
- data/test/scout/resource/test_path.rb +6 -0
- data/test/scout/test_named_array.rb +6 -0
- data/test/scout/test_persist.rb +3 -2
- data/test/scout/test_tsv.rb +17 -0
- data/test/scout/test_work_queue.rb +63 -41
- data/test/scout/tsv/persist/test_adapter.rb +1 -1
- data/test/scout/tsv/test_index.rb +14 -0
- data/test/scout/tsv/test_parser.rb +14 -0
- data/test/scout/tsv/test_stream.rb +200 -0
- data/test/scout/tsv/test_transformer.rb +12 -0
- data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
- data/test/scout/workflow/step/test_dependencies.rb +68 -0
- data/test/scout/workflow/step/test_info.rb +18 -0
- data/test/scout/workflow/step/test_status.rb +0 -1
- data/test/scout/workflow/task/test_dependencies.rb +355 -0
- data/test/scout/workflow/task/test_inputs.rb +53 -0
- data/test/scout/workflow/test_definition.rb +18 -0
- data/test/scout/workflow/test_documentation.rb +24 -0
- data/test/scout/workflow/test_step.rb +109 -0
- data/test/scout/workflow/test_task.rb +0 -287
- data/test/test_scout.rb +9 -0
- metadata +83 -5
- data/scout_commands/workflow/task_old +0 -706
data/lib/scout/resource/util.rb
CHANGED
@@ -8,7 +8,6 @@ module Resource
|
|
8
8
|
|
9
9
|
map_order ||= (path_maps.keys & Path.basic_map_order) + (path_maps.keys - Path.basic_map_order)
|
10
10
|
map_order -= [:current, "current"]
|
11
|
-
map_order << :current
|
12
11
|
|
13
12
|
choices = []
|
14
13
|
map_order.uniq.each do |name|
|
@@ -17,6 +16,7 @@ module Resource
|
|
17
16
|
next if pattern.nil?
|
18
17
|
|
19
18
|
pattern = pattern.sub('{PWD}', Dir.pwd)
|
19
|
+
pattern = pattern.sub('{HOME}', ENV["HOME"])
|
20
20
|
if String === pattern and pattern.include?('{')
|
21
21
|
regexp = "^" + pattern
|
22
22
|
.gsub(/{(TOPLEVEL)}/,'(?<\1>[^/]+)')
|
@@ -35,14 +35,20 @@ module Resource
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
|
38
|
+
identified = choices.sort_by{|s| s.length }.first
|
39
|
+
|
40
|
+
Path.setup(identified || path, self, nil, path_maps)
|
39
41
|
end
|
40
42
|
|
41
|
-
def self.
|
42
|
-
return path if Open.exists?(path)
|
43
|
+
def self.identify(path)
|
43
44
|
resource = path.pkgdir if Path === path
|
44
45
|
resource = Scout unless Resource === resource
|
45
46
|
unlocated = resource.identify path
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.relocate(path)
|
50
|
+
return path if Open.exists?(path)
|
51
|
+
unlocated = identify(path)
|
46
52
|
unlocated.find
|
47
53
|
end
|
48
54
|
end
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -28,7 +28,9 @@ module TSV
|
|
28
28
|
:sep, :type,
|
29
29
|
:sep => "\t", :type => :double
|
30
30
|
@options = options
|
31
|
+
@options[:type] = @type
|
31
32
|
@sout, @sin = Open.pipe
|
33
|
+
Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
|
32
34
|
@initialized = false
|
33
35
|
@mutex = Mutex.new
|
34
36
|
ConcurrentStream.setup(@sin, pair: @sout)
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -21,12 +21,14 @@ module TSV
|
|
21
21
|
|
22
22
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
23
23
|
|
24
|
-
|
24
|
+
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
25
|
+
Log.low log_msg
|
26
|
+
bar = log_msg if TrueClass === bar
|
25
27
|
|
26
28
|
if order
|
27
29
|
tmp_index = {}
|
28
30
|
include_self = fields == :all || (Array === fields) && fields.include?(target)
|
29
|
-
target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields,
|
31
|
+
target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, unnamed: true, bar: bar, **kwargs do |k,values|
|
30
32
|
tmp_index[k] ||= [[k]] if include_self
|
31
33
|
values.each_with_index do |list,i|
|
32
34
|
i += 1 if include_self
|
@@ -66,19 +68,26 @@ module TSV
|
|
66
68
|
TSV.index(self, *args, **kwargs, &block)
|
67
69
|
end
|
68
70
|
|
69
|
-
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
|
71
|
+
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
|
70
72
|
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
71
73
|
:persist, :persist_type, :persist_update, :data_persist,
|
72
74
|
:persist => false, :persist_type => :fwt
|
73
75
|
kwargs.delete :type
|
76
|
+
kwargs[:unnamed] = true
|
74
77
|
|
75
|
-
Persist.persist(tsv_file, type,
|
78
|
+
Persist.persist(tsv_file, type,
|
79
|
+
:persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
|
80
|
+
:other_options => kwargs) do |filename|
|
76
81
|
|
77
82
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
78
83
|
|
84
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
|
85
|
+
Log.low log_msg
|
86
|
+
bar = log_msg if TrueClass === bar
|
87
|
+
|
79
88
|
max_key_size = 0
|
80
89
|
index_data = []
|
81
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
|
90
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
|
82
91
|
key_size = key.length
|
83
92
|
max_key_size = key_size if key_size > max_key_size
|
84
93
|
|
@@ -100,23 +109,26 @@ module TSV
|
|
100
109
|
end
|
101
110
|
end
|
102
111
|
|
103
|
-
def
|
104
|
-
TSV.range_index(self, *args, **kwargs, &block)
|
105
|
-
end
|
106
|
-
|
107
|
-
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
|
112
|
+
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
|
108
113
|
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
109
114
|
:persist, :persist_type, :persist_update, :data_persist,
|
110
115
|
:persist => false, :persist_type => :fwt
|
111
116
|
kwargs.delete :type
|
117
|
+
kwargs[:unnamed] = true
|
112
118
|
|
113
|
-
Persist.persist(tsv_file, type,
|
119
|
+
Persist.persist(tsv_file, type,
|
120
|
+
:persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
|
121
|
+
:other_options => kwargs) do |filename|
|
114
122
|
|
115
123
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
116
124
|
|
125
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{pos_field}"
|
126
|
+
Log.low log_msg
|
127
|
+
bar = log_msg if TrueClass === bar
|
128
|
+
|
117
129
|
max_key_size = 0
|
118
130
|
index_data = []
|
119
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
|
131
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, bar: bar, **kwargs do |key, pos|
|
120
132
|
key_size = key.length
|
121
133
|
max_key_size = key_size if key_size > max_key_size
|
122
134
|
|
@@ -137,81 +149,11 @@ module TSV
|
|
137
149
|
end
|
138
150
|
end
|
139
151
|
|
152
|
+
def range_index(*args, **kwargs, &block)
|
153
|
+
TSV.range_index(self, *args, **kwargs, &block)
|
154
|
+
end
|
155
|
+
|
140
156
|
def pos_index(*args, **kwargs, &block)
|
141
157
|
TSV.pos_index(self, *args, **kwargs, &block)
|
142
158
|
end
|
143
|
-
|
144
|
-
|
145
|
-
#def range_index(start_field = nil, end_field = nil, options = {})
|
146
|
-
# start_field ||= "Start"
|
147
|
-
# end_field ||= "End"
|
148
|
-
|
149
|
-
# options = Misc.add_defaults options,
|
150
|
-
# :persist => false, :persist_file => nil, :persist_update => false
|
151
|
-
|
152
|
-
# persist_options = Misc.pull_keys options, :persist
|
153
|
-
# persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
|
154
|
-
|
155
|
-
# Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
|
156
|
-
# max_key_size = 0
|
157
|
-
# index_data = []
|
158
|
-
# with_unnamed do
|
159
|
-
# with_monitor :desc => "Creating Index Data", :step => 10000 do
|
160
|
-
# through :key, [start_field, end_field] do |key, values|
|
161
|
-
# key_size = key.length
|
162
|
-
# max_key_size = key_size if key_size > max_key_size
|
163
|
-
|
164
|
-
# start_pos, end_pos = values
|
165
|
-
# if Array === start_pos
|
166
|
-
# start_pos.zip(end_pos).each do |s,e|
|
167
|
-
# index_data << [key, [s.to_i, e.to_i]]
|
168
|
-
# end
|
169
|
-
# else
|
170
|
-
# index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
171
|
-
# end
|
172
|
-
# end
|
173
|
-
# end
|
174
|
-
# end
|
175
|
-
|
176
|
-
# index = FixWidthTable.get(:memory, max_key_size, true)
|
177
|
-
# index.add_range index_data
|
178
|
-
# index.read
|
179
|
-
# index
|
180
|
-
# end
|
181
|
-
#end
|
182
|
-
|
183
|
-
#def self.range_index(file, start_field = nil, end_field = nil, options = {})
|
184
|
-
# start_field ||= "Start"
|
185
|
-
# end_field ||= "End"
|
186
|
-
|
187
|
-
# data_options = Misc.pull_keys options, :data
|
188
|
-
# filename = case
|
189
|
-
# when (String === file or Path === file)
|
190
|
-
# file
|
191
|
-
# when file.respond_to?(:filename)
|
192
|
-
# file.filename
|
193
|
-
# else
|
194
|
-
# file.object_id.to_s
|
195
|
-
# end
|
196
|
-
# persist_options = Misc.pull_keys options, :persist
|
197
|
-
# persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
|
198
|
-
|
199
|
-
# filters = Misc.process_options options, :filters
|
200
|
-
|
201
|
-
# if filters
|
202
|
-
# filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
|
203
|
-
# end
|
204
|
-
|
205
|
-
# Persist.persist(filename, :fwt, persist_options) do
|
206
|
-
# tsv = TSV.open(file, data_options)
|
207
|
-
# if filters
|
208
|
-
# tsv.filter
|
209
|
-
# filters.each do |match, value|
|
210
|
-
# tsv.add_filter match, value
|
211
|
-
# end
|
212
|
-
# end
|
213
|
-
|
214
|
-
# tsv.range_index(start_field, end_field, options)
|
215
|
-
# end
|
216
|
-
#end
|
217
159
|
end
|
data/lib/scout/tsv/open.rb
CHANGED
@@ -34,7 +34,7 @@ module Open
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
-
cpus = nil if cpus == 1
|
37
|
+
cpus = nil if cpus.to_i == 1
|
38
38
|
|
39
39
|
if into == :stream
|
40
40
|
sout, sin = Open.pipe
|
@@ -68,7 +68,15 @@ module Open
|
|
68
68
|
bar.remove($!) if bar
|
69
69
|
end
|
70
70
|
end
|
71
|
+
|
71
72
|
Thread.pass until into_thread["name"]
|
73
|
+
|
74
|
+
case into
|
75
|
+
when IO
|
76
|
+
ConcurrentStream.setup into, :threads => into_thread
|
77
|
+
when TSV::Dumper
|
78
|
+
ConcurrentStream.setup into.stream, :threads => into_thread
|
79
|
+
end
|
72
80
|
return into
|
73
81
|
end
|
74
82
|
end
|
@@ -82,27 +90,29 @@ module Open
|
|
82
90
|
callback.call res
|
83
91
|
end
|
84
92
|
|
85
|
-
self.traverse(obj, **options) do |*args|
|
86
|
-
queue.write args
|
87
|
-
end
|
88
|
-
|
89
93
|
begin
|
94
|
+
self.traverse(obj, **options) do |*args|
|
95
|
+
queue.write args
|
96
|
+
end
|
97
|
+
|
90
98
|
queue.close
|
91
99
|
|
92
|
-
queue.join
|
100
|
+
queue.join(false)
|
93
101
|
|
94
102
|
bar.remove if bar
|
95
103
|
return into
|
96
104
|
rescue Exception
|
97
105
|
bar.remove($!) if bar
|
106
|
+
queue.abort
|
98
107
|
raise $!
|
108
|
+
ensure
|
109
|
+
queue.clean
|
99
110
|
end
|
100
111
|
end
|
101
112
|
|
102
113
|
begin
|
103
114
|
res = case obj
|
104
115
|
when TSV
|
105
|
-
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
116
|
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
117
|
res = block.call(k, v, f)
|
108
118
|
callback.call res if callback
|
@@ -122,11 +132,21 @@ module Open
|
|
122
132
|
raise obj.exception if obj.error?
|
123
133
|
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
134
|
when IO
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
135
|
+
if options[:type] == :array || options[:type] == :line
|
136
|
+
Log.low "Traverse stream by lines #{Log.fingerprint obj}"
|
137
|
+
while line = obj.gets
|
138
|
+
line.strip!
|
139
|
+
res = block.call(line)
|
140
|
+
callback.call res if callback
|
141
|
+
end
|
142
|
+
else
|
143
|
+
Log.low "Traverse stream with parser #{Log.fingerprint obj}"
|
144
|
+
parser = TSV::Parser.new obj
|
145
|
+
parser.traverse **options do |k,v,f|
|
146
|
+
res = block.call k,v,f
|
147
|
+
callback.call res if callback
|
148
|
+
nil
|
149
|
+
end
|
130
150
|
end
|
131
151
|
when TSV::Parser
|
132
152
|
obj.traverse **options do |k,v,f|
|
@@ -142,9 +162,10 @@ module Open
|
|
142
162
|
end
|
143
163
|
end
|
144
164
|
bar.remove if bar
|
145
|
-
rescue
|
165
|
+
rescue Exception => exception
|
166
|
+
exception = obj.stream_exception if (ConcurrentStream === obj) && obj.stream_exception
|
146
167
|
bar.error if bar
|
147
|
-
raise
|
168
|
+
raise exception
|
148
169
|
end
|
149
170
|
|
150
171
|
into || res
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -190,6 +190,11 @@ module TSV
|
|
190
190
|
end
|
191
191
|
|
192
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
193
|
+
if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
|
194
|
+
Open.open(stream) do |f|
|
195
|
+
return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
|
196
|
+
end
|
197
|
+
end
|
193
198
|
raise "Closed stream" if IO === stream && stream.closed?
|
194
199
|
|
195
200
|
opts = {}
|
@@ -240,7 +245,7 @@ module TSV
|
|
240
245
|
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
246
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
247
|
|
243
|
-
[opts, key_field, fields, first_line, preamble]
|
248
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble], %w(options key_field fields first_line preamble))
|
244
249
|
end
|
245
250
|
|
246
251
|
KEY_PARAMETERS = begin
|
@@ -263,7 +268,7 @@ module TSV
|
|
263
268
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
264
269
|
@options[:sep] = sep if @options[:sep].nil?
|
265
270
|
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
-
@type = type
|
271
|
+
@type = @options[:type] || type
|
267
272
|
end
|
268
273
|
|
269
274
|
def all_fields
|
@@ -304,6 +309,8 @@ module TSV
|
|
304
309
|
field_names = @fields
|
305
310
|
end
|
306
311
|
|
312
|
+
kwargs[:positions] = nil if @type == :flat
|
313
|
+
|
307
314
|
if key_field
|
308
315
|
if @fields
|
309
316
|
all_field_names ||= [@key_field] + @fields
|
@@ -0,0 +1,204 @@
|
|
1
|
+
module TSV
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
3
|
+
|
4
|
+
streams = streams.collect do |stream|
|
5
|
+
case stream
|
6
|
+
when(defined? Step and Step)
|
7
|
+
stream.stream
|
8
|
+
when Path
|
9
|
+
stream.open
|
10
|
+
when TSV::Dumper
|
11
|
+
stream.stream
|
12
|
+
else
|
13
|
+
stream
|
14
|
+
end
|
15
|
+
end.compact
|
16
|
+
|
17
|
+
num_streams = streams.length
|
18
|
+
|
19
|
+
streams = streams.collect do |stream|
|
20
|
+
Open.sort_stream(stream, memory: sort_memory)
|
21
|
+
end if sort
|
22
|
+
|
23
|
+
begin
|
24
|
+
|
25
|
+
lines =[]
|
26
|
+
fields =[]
|
27
|
+
sizes =[]
|
28
|
+
key_fields =[]
|
29
|
+
input_options =[]
|
30
|
+
empty =[]
|
31
|
+
preambles =[]
|
32
|
+
parser_types =[]
|
33
|
+
|
34
|
+
type ||= :double
|
35
|
+
|
36
|
+
streams = streams.collect do |stream|
|
37
|
+
|
38
|
+
parser = TSV::Parser.new stream, type: type
|
39
|
+
|
40
|
+
sfields = parser.fields
|
41
|
+
|
42
|
+
if field_prefix
|
43
|
+
index = streams.index stream
|
44
|
+
prefix = field_prefix[index]
|
45
|
+
|
46
|
+
sfields = sfields.collect{|f|[prefix, f]* ":"}
|
47
|
+
end
|
48
|
+
|
49
|
+
first_line = parser.first_line
|
50
|
+
first_line = nil if first_line == ""
|
51
|
+
|
52
|
+
lines << first_line
|
53
|
+
key_fields << parser.key_field
|
54
|
+
fields << sfields
|
55
|
+
sizes << sfields.length if sfields
|
56
|
+
input_options << parser.options
|
57
|
+
preambles << parser.preamble if preamble and not parser.preamble.empty?
|
58
|
+
parser_types << parser.type
|
59
|
+
|
60
|
+
empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
61
|
+
|
62
|
+
stream
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
all_fields = fields.dup
|
67
|
+
|
68
|
+
key_field = key_fields.compact.first
|
69
|
+
|
70
|
+
if same_fields
|
71
|
+
fields = fields.first
|
72
|
+
else
|
73
|
+
fields = fields.compact.flatten
|
74
|
+
end
|
75
|
+
|
76
|
+
options = input_options.first
|
77
|
+
type ||= options[:type]
|
78
|
+
type ||= :list if type == :single
|
79
|
+
type ||= :double if type == :flat
|
80
|
+
|
81
|
+
preamble_txt = case preamble
|
82
|
+
when TrueClass
|
83
|
+
preambles * "\n"
|
84
|
+
when String
|
85
|
+
if preamble[0]== '+'
|
86
|
+
preambles * "\n" + "\n" + preamble[1..-1]
|
87
|
+
else
|
88
|
+
preamble
|
89
|
+
end
|
90
|
+
else
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
empty_pos = empty.collect{|stream| streams.index stream}
|
95
|
+
|
96
|
+
keys =[]
|
97
|
+
parts =[]
|
98
|
+
lines.each_with_index do |line,i|
|
99
|
+
if line.nil? || line.empty?
|
100
|
+
keys[i]= nil
|
101
|
+
parts[i]= nil
|
102
|
+
else
|
103
|
+
vs = line.chomp.split(sep, -1)
|
104
|
+
key, *p = vs
|
105
|
+
keys[i]= key
|
106
|
+
parts[i]= p
|
107
|
+
end
|
108
|
+
sizes[i]||= parts[i].length-1 unless parts[i].nil?
|
109
|
+
end
|
110
|
+
done_streams =[]
|
111
|
+
|
112
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
113
|
+
dumper.init
|
114
|
+
|
115
|
+
t = Thread.new do
|
116
|
+
Thread.report_on_exception = false
|
117
|
+
Thread.current["name"] = "Paste streams"
|
118
|
+
|
119
|
+
last_min = nil
|
120
|
+
while lines.reject{|line| line.nil?}.any?
|
121
|
+
min = keys.compact.sort.first
|
122
|
+
break if min.nil?
|
123
|
+
new_values =[]
|
124
|
+
|
125
|
+
skip = all_match && keys.uniq !=[min]
|
126
|
+
|
127
|
+
keys.each_with_index do |key,i|
|
128
|
+
case key
|
129
|
+
when min
|
130
|
+
new_values << parts[i]
|
131
|
+
|
132
|
+
begin
|
133
|
+
line = lines[i]= begin
|
134
|
+
streams[i].gets
|
135
|
+
rescue
|
136
|
+
Log.exception $!
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
if line.nil?
|
140
|
+
keys[i]= nil
|
141
|
+
parts[i]= nil
|
142
|
+
else
|
143
|
+
k, *p = line.chomp.split(sep, -1)
|
144
|
+
raise TryAgain if k == keys[i]
|
145
|
+
keys[i]= k
|
146
|
+
parts[i]= p.collect{|e| e.nil? ? "" : e}
|
147
|
+
end
|
148
|
+
rescue TryAgain
|
149
|
+
keys[i]= nil
|
150
|
+
parts[i]= nil
|
151
|
+
Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
152
|
+
retry
|
153
|
+
end
|
154
|
+
else
|
155
|
+
p = [nil] * sizes[i]
|
156
|
+
new_values << p
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
next if skip
|
161
|
+
|
162
|
+
if same_fields
|
163
|
+
new_values_same = []
|
164
|
+
new_values.each do |list|
|
165
|
+
list.each_with_index do |l,i|
|
166
|
+
new_values_same[i] ||= []
|
167
|
+
new_values_same[i] << l
|
168
|
+
end
|
169
|
+
end
|
170
|
+
new_values = new_values_same
|
171
|
+
else
|
172
|
+
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
173
|
+
end
|
174
|
+
|
175
|
+
dumper.add min, new_values
|
176
|
+
end
|
177
|
+
|
178
|
+
dumper.close
|
179
|
+
|
180
|
+
streams.each do |stream|
|
181
|
+
stream.close if stream.respond_to?(:close) && ! stream.closed?
|
182
|
+
stream.join if stream.respond_to? :join
|
183
|
+
end
|
184
|
+
end
|
185
|
+
rescue Aborted
|
186
|
+
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
187
|
+
streams.each do |stream|
|
188
|
+
stream.abort if stream.respond_to? :abort
|
189
|
+
end
|
190
|
+
raise $!
|
191
|
+
rescue Exception
|
192
|
+
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
193
|
+
streams.each do |stream|
|
194
|
+
stream.abort if stream.respond_to? :abort
|
195
|
+
end
|
196
|
+
raise $!
|
197
|
+
end
|
198
|
+
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
|
201
|
+
ConcurrentStream.setup(dumper.stream, threads: [t])
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
@@ -137,5 +137,16 @@ module TSV
|
|
137
137
|
end
|
138
138
|
res
|
139
139
|
end
|
140
|
+
|
141
|
+
def to_flat
|
142
|
+
res = self.annotate({})
|
143
|
+
transformer = Transformer.new self, res
|
144
|
+
transformer.type = :flat
|
145
|
+
transformer.traverse do |k,v|
|
146
|
+
v = Array === v ? v.flatten : [v]
|
147
|
+
[k, v]
|
148
|
+
end
|
149
|
+
res
|
150
|
+
end
|
140
151
|
end
|
141
152
|
|
data/lib/scout/tsv.rb
CHANGED
@@ -10,10 +10,11 @@ require_relative 'tsv/traverse'
|
|
10
10
|
require_relative 'tsv/open'
|
11
11
|
require_relative 'tsv/attach'
|
12
12
|
require_relative 'tsv/change_id'
|
13
|
+
require_relative 'tsv/stream'
|
13
14
|
|
14
15
|
module TSV
|
15
16
|
extend MetaExtension
|
16
|
-
extension_attr :key_field, :fields, :type, :filename, :namespace, :unnamed, :identifiers
|
17
|
+
extension_attr :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers
|
17
18
|
|
18
19
|
def self.str2options(str)
|
19
20
|
field_options,_sep, rest = str.partition("#")
|
@@ -40,7 +41,13 @@ module TSV
|
|
40
41
|
data = filename ? ScoutCabinet.open(filename, true, type) : nil
|
41
42
|
options[:data] = data if data
|
42
43
|
options[:filename] = file
|
43
|
-
|
44
|
+
|
45
|
+
if data
|
46
|
+
Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
|
47
|
+
else
|
48
|
+
Log.debug "TSV open #{Log.fingerprint file}"
|
49
|
+
end
|
50
|
+
|
44
51
|
Open.open(file, grep: grep, invert_grep: invert_grep) do |f|
|
45
52
|
TSV.parse(f, **options)
|
46
53
|
end
|