scout-gear 8.0.0 → 8.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +26 -9
- data/Rakefile +6 -1
- data/VERSION +1 -1
- data/bin/scout +15 -4
- data/doc/lib/scout/path.md +35 -0
- data/doc/lib/scout/workflow/task.md +13 -0
- data/lib/scout/cmd.rb +23 -24
- data/lib/scout/concurrent_stream.rb +36 -19
- data/lib/scout/exceptions.rb +10 -0
- data/lib/scout/log/color.rb +11 -11
- data/lib/scout/log/progress/report.rb +7 -5
- data/lib/scout/log/progress/util.rb +3 -0
- data/lib/scout/log/trap.rb +3 -3
- data/lib/scout/log.rb +64 -36
- data/lib/scout/meta_extension.rb +34 -0
- data/lib/scout/misc/digest.rb +11 -2
- data/lib/scout/misc/format.rb +12 -7
- data/lib/scout/misc/monitor.rb +11 -0
- data/lib/scout/misc/system.rb +48 -0
- data/lib/scout/named_array.rb +8 -0
- data/lib/scout/offsite/ssh.rb +171 -0
- data/lib/scout/offsite/step.rb +83 -0
- data/lib/scout/offsite/sync.rb +55 -0
- data/lib/scout/offsite.rb +3 -0
- data/lib/scout/open/lock.rb +5 -24
- data/lib/scout/open/remote.rb +12 -1
- data/lib/scout/open/stream.rb +110 -122
- data/lib/scout/open/util.rb +9 -0
- data/lib/scout/open.rb +5 -4
- data/lib/scout/path/find.rb +15 -10
- data/lib/scout/path/util.rb +5 -0
- data/lib/scout/persist/serialize.rb +3 -3
- data/lib/scout/persist.rb +1 -1
- data/lib/scout/resource/path.rb +4 -0
- data/lib/scout/resource/util.rb +10 -4
- data/lib/scout/tsv/dumper.rb +2 -0
- data/lib/scout/tsv/index.rb +28 -86
- data/lib/scout/tsv/open.rb +35 -14
- data/lib/scout/tsv/parser.rb +9 -2
- data/lib/scout/tsv/persist/tokyocabinet.rb +2 -0
- data/lib/scout/tsv/stream.rb +204 -0
- data/lib/scout/tsv/transformer.rb +11 -0
- data/lib/scout/tsv.rb +9 -2
- data/lib/scout/work_queue/worker.rb +2 -2
- data/lib/scout/work_queue.rb +36 -12
- data/lib/scout/workflow/definition.rb +2 -1
- data/lib/scout/workflow/deployment/orchestrator.rb +245 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/step/dependencies.rb +37 -11
- data/lib/scout/workflow/step/file.rb +5 -0
- data/lib/scout/workflow/step/info.rb +5 -3
- data/lib/scout/workflow/step/load.rb +1 -1
- data/lib/scout/workflow/step/provenance.rb +1 -0
- data/lib/scout/workflow/step/status.rb +6 -8
- data/lib/scout/workflow/step.rb +75 -30
- data/lib/scout/workflow/task/dependencies.rb +114 -0
- data/lib/scout/workflow/task/inputs.rb +27 -13
- data/lib/scout/workflow/task.rb +9 -108
- data/lib/scout/workflow/usage.rb +40 -12
- data/lib/scout/workflow.rb +4 -2
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +6 -0
- data/scout-gear.gemspec +32 -7
- data/scout_commands/doc +37 -0
- data/scout_commands/find +1 -0
- data/scout_commands/offsite +30 -0
- data/scout_commands/update +29 -0
- data/scout_commands/workflow/info +15 -3
- data/scout_commands/workflow/install +102 -0
- data/scout_commands/workflow/task +26 -5
- data/test/scout/offsite/test_ssh.rb +15 -0
- data/test/scout/offsite/test_step.rb +33 -0
- data/test/scout/offsite/test_sync.rb +36 -0
- data/test/scout/offsite/test_task.rb +0 -0
- data/test/scout/resource/test_path.rb +6 -0
- data/test/scout/test_named_array.rb +6 -0
- data/test/scout/test_persist.rb +3 -2
- data/test/scout/test_tsv.rb +17 -0
- data/test/scout/test_work_queue.rb +63 -41
- data/test/scout/tsv/persist/test_adapter.rb +1 -1
- data/test/scout/tsv/test_index.rb +14 -0
- data/test/scout/tsv/test_parser.rb +14 -0
- data/test/scout/tsv/test_stream.rb +200 -0
- data/test/scout/tsv/test_transformer.rb +12 -0
- data/test/scout/workflow/deployment/test_orchestrator.rb +272 -0
- data/test/scout/workflow/step/test_dependencies.rb +68 -0
- data/test/scout/workflow/step/test_info.rb +18 -0
- data/test/scout/workflow/step/test_status.rb +0 -1
- data/test/scout/workflow/task/test_dependencies.rb +355 -0
- data/test/scout/workflow/task/test_inputs.rb +53 -0
- data/test/scout/workflow/test_definition.rb +18 -0
- data/test/scout/workflow/test_documentation.rb +24 -0
- data/test/scout/workflow/test_step.rb +109 -0
- data/test/scout/workflow/test_task.rb +0 -287
- data/test/test_scout.rb +9 -0
- metadata +83 -5
- data/scout_commands/workflow/task_old +0 -706
data/lib/scout/resource/util.rb
CHANGED
@@ -8,7 +8,6 @@ module Resource
|
|
8
8
|
|
9
9
|
map_order ||= (path_maps.keys & Path.basic_map_order) + (path_maps.keys - Path.basic_map_order)
|
10
10
|
map_order -= [:current, "current"]
|
11
|
-
map_order << :current
|
12
11
|
|
13
12
|
choices = []
|
14
13
|
map_order.uniq.each do |name|
|
@@ -17,6 +16,7 @@ module Resource
|
|
17
16
|
next if pattern.nil?
|
18
17
|
|
19
18
|
pattern = pattern.sub('{PWD}', Dir.pwd)
|
19
|
+
pattern = pattern.sub('{HOME}', ENV["HOME"])
|
20
20
|
if String === pattern and pattern.include?('{')
|
21
21
|
regexp = "^" + pattern
|
22
22
|
.gsub(/{(TOPLEVEL)}/,'(?<\1>[^/]+)')
|
@@ -35,14 +35,20 @@ module Resource
|
|
35
35
|
end
|
36
36
|
end
|
37
37
|
|
38
|
-
|
38
|
+
identified = choices.sort_by{|s| s.length }.first
|
39
|
+
|
40
|
+
Path.setup(identified || path, self, nil, path_maps)
|
39
41
|
end
|
40
42
|
|
41
|
-
def self.
|
42
|
-
return path if Open.exists?(path)
|
43
|
+
def self.identify(path)
|
43
44
|
resource = path.pkgdir if Path === path
|
44
45
|
resource = Scout unless Resource === resource
|
45
46
|
unlocated = resource.identify path
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.relocate(path)
|
50
|
+
return path if Open.exists?(path)
|
51
|
+
unlocated = identify(path)
|
46
52
|
unlocated.find
|
47
53
|
end
|
48
54
|
end
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -28,7 +28,9 @@ module TSV
|
|
28
28
|
:sep, :type,
|
29
29
|
:sep => "\t", :type => :double
|
30
30
|
@options = options
|
31
|
+
@options[:type] = @type
|
31
32
|
@sout, @sin = Open.pipe
|
33
|
+
Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
|
32
34
|
@initialized = false
|
33
35
|
@mutex = Mutex.new
|
34
36
|
ConcurrentStream.setup(@sin, pair: @sout)
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -21,12 +21,14 @@ module TSV
|
|
21
21
|
|
22
22
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
23
23
|
|
24
|
-
|
24
|
+
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
25
|
+
Log.low log_msg
|
26
|
+
bar = log_msg if TrueClass === bar
|
25
27
|
|
26
28
|
if order
|
27
29
|
tmp_index = {}
|
28
30
|
include_self = fields == :all || (Array === fields) && fields.include?(target)
|
29
|
-
target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields,
|
31
|
+
target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, unnamed: true, bar: bar, **kwargs do |k,values|
|
30
32
|
tmp_index[k] ||= [[k]] if include_self
|
31
33
|
values.each_with_index do |list,i|
|
32
34
|
i += 1 if include_self
|
@@ -66,19 +68,26 @@ module TSV
|
|
66
68
|
TSV.index(self, *args, **kwargs, &block)
|
67
69
|
end
|
68
70
|
|
69
|
-
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
|
71
|
+
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
|
70
72
|
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
71
73
|
:persist, :persist_type, :persist_update, :data_persist,
|
72
74
|
:persist => false, :persist_type => :fwt
|
73
75
|
kwargs.delete :type
|
76
|
+
kwargs[:unnamed] = true
|
74
77
|
|
75
|
-
Persist.persist(tsv_file, type,
|
78
|
+
Persist.persist(tsv_file, type,
|
79
|
+
:persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
|
80
|
+
:other_options => kwargs) do |filename|
|
76
81
|
|
77
82
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
78
83
|
|
84
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
|
85
|
+
Log.low log_msg
|
86
|
+
bar = log_msg if TrueClass === bar
|
87
|
+
|
79
88
|
max_key_size = 0
|
80
89
|
index_data = []
|
81
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
|
90
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
|
82
91
|
key_size = key.length
|
83
92
|
max_key_size = key_size if key_size > max_key_size
|
84
93
|
|
@@ -100,23 +109,26 @@ module TSV
|
|
100
109
|
end
|
101
110
|
end
|
102
111
|
|
103
|
-
def
|
104
|
-
TSV.range_index(self, *args, **kwargs, &block)
|
105
|
-
end
|
106
|
-
|
107
|
-
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
|
112
|
+
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
|
108
113
|
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
109
114
|
:persist, :persist_type, :persist_update, :data_persist,
|
110
115
|
:persist => false, :persist_type => :fwt
|
111
116
|
kwargs.delete :type
|
117
|
+
kwargs[:unnamed] = true
|
112
118
|
|
113
|
-
Persist.persist(tsv_file, type,
|
119
|
+
Persist.persist(tsv_file, type,
|
120
|
+
:persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
|
121
|
+
:other_options => kwargs) do |filename|
|
114
122
|
|
115
123
|
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
116
124
|
|
125
|
+
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{pos_field}"
|
126
|
+
Log.low log_msg
|
127
|
+
bar = log_msg if TrueClass === bar
|
128
|
+
|
117
129
|
max_key_size = 0
|
118
130
|
index_data = []
|
119
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
|
131
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, bar: bar, **kwargs do |key, pos|
|
120
132
|
key_size = key.length
|
121
133
|
max_key_size = key_size if key_size > max_key_size
|
122
134
|
|
@@ -137,81 +149,11 @@ module TSV
|
|
137
149
|
end
|
138
150
|
end
|
139
151
|
|
152
|
+
def range_index(*args, **kwargs, &block)
|
153
|
+
TSV.range_index(self, *args, **kwargs, &block)
|
154
|
+
end
|
155
|
+
|
140
156
|
def pos_index(*args, **kwargs, &block)
|
141
157
|
TSV.pos_index(self, *args, **kwargs, &block)
|
142
158
|
end
|
143
|
-
|
144
|
-
|
145
|
-
#def range_index(start_field = nil, end_field = nil, options = {})
|
146
|
-
# start_field ||= "Start"
|
147
|
-
# end_field ||= "End"
|
148
|
-
|
149
|
-
# options = Misc.add_defaults options,
|
150
|
-
# :persist => false, :persist_file => nil, :persist_update => false
|
151
|
-
|
152
|
-
# persist_options = Misc.pull_keys options, :persist
|
153
|
-
# persist_options[:prefix] ||= "RangeIndex[#{start_field}-#{end_field}]"
|
154
|
-
|
155
|
-
# Persist.persist(filename || self.object_id.to_s, :fwt, persist_options) do
|
156
|
-
# max_key_size = 0
|
157
|
-
# index_data = []
|
158
|
-
# with_unnamed do
|
159
|
-
# with_monitor :desc => "Creating Index Data", :step => 10000 do
|
160
|
-
# through :key, [start_field, end_field] do |key, values|
|
161
|
-
# key_size = key.length
|
162
|
-
# max_key_size = key_size if key_size > max_key_size
|
163
|
-
|
164
|
-
# start_pos, end_pos = values
|
165
|
-
# if Array === start_pos
|
166
|
-
# start_pos.zip(end_pos).each do |s,e|
|
167
|
-
# index_data << [key, [s.to_i, e.to_i]]
|
168
|
-
# end
|
169
|
-
# else
|
170
|
-
# index_data << [key, [start_pos.to_i, end_pos.to_i]]
|
171
|
-
# end
|
172
|
-
# end
|
173
|
-
# end
|
174
|
-
# end
|
175
|
-
|
176
|
-
# index = FixWidthTable.get(:memory, max_key_size, true)
|
177
|
-
# index.add_range index_data
|
178
|
-
# index.read
|
179
|
-
# index
|
180
|
-
# end
|
181
|
-
#end
|
182
|
-
|
183
|
-
#def self.range_index(file, start_field = nil, end_field = nil, options = {})
|
184
|
-
# start_field ||= "Start"
|
185
|
-
# end_field ||= "End"
|
186
|
-
|
187
|
-
# data_options = Misc.pull_keys options, :data
|
188
|
-
# filename = case
|
189
|
-
# when (String === file or Path === file)
|
190
|
-
# file
|
191
|
-
# when file.respond_to?(:filename)
|
192
|
-
# file.filename
|
193
|
-
# else
|
194
|
-
# file.object_id.to_s
|
195
|
-
# end
|
196
|
-
# persist_options = Misc.pull_keys options, :persist
|
197
|
-
# persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
|
198
|
-
|
199
|
-
# filters = Misc.process_options options, :filters
|
200
|
-
|
201
|
-
# if filters
|
202
|
-
# filename += ":Filtered[#{filters.collect{|f| f * "="} * ", "}]"
|
203
|
-
# end
|
204
|
-
|
205
|
-
# Persist.persist(filename, :fwt, persist_options) do
|
206
|
-
# tsv = TSV.open(file, data_options)
|
207
|
-
# if filters
|
208
|
-
# tsv.filter
|
209
|
-
# filters.each do |match, value|
|
210
|
-
# tsv.add_filter match, value
|
211
|
-
# end
|
212
|
-
# end
|
213
|
-
|
214
|
-
# tsv.range_index(start_field, end_field, options)
|
215
|
-
# end
|
216
|
-
#end
|
217
159
|
end
|
data/lib/scout/tsv/open.rb
CHANGED
@@ -34,7 +34,7 @@ module Open
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
-
cpus = nil if cpus == 1
|
37
|
+
cpus = nil if cpus.to_i == 1
|
38
38
|
|
39
39
|
if into == :stream
|
40
40
|
sout, sin = Open.pipe
|
@@ -68,7 +68,15 @@ module Open
|
|
68
68
|
bar.remove($!) if bar
|
69
69
|
end
|
70
70
|
end
|
71
|
+
|
71
72
|
Thread.pass until into_thread["name"]
|
73
|
+
|
74
|
+
case into
|
75
|
+
when IO
|
76
|
+
ConcurrentStream.setup into, :threads => into_thread
|
77
|
+
when TSV::Dumper
|
78
|
+
ConcurrentStream.setup into.stream, :threads => into_thread
|
79
|
+
end
|
72
80
|
return into
|
73
81
|
end
|
74
82
|
end
|
@@ -82,27 +90,29 @@ module Open
|
|
82
90
|
callback.call res
|
83
91
|
end
|
84
92
|
|
85
|
-
self.traverse(obj, **options) do |*args|
|
86
|
-
queue.write args
|
87
|
-
end
|
88
|
-
|
89
93
|
begin
|
94
|
+
self.traverse(obj, **options) do |*args|
|
95
|
+
queue.write args
|
96
|
+
end
|
97
|
+
|
90
98
|
queue.close
|
91
99
|
|
92
|
-
queue.join
|
100
|
+
queue.join(false)
|
93
101
|
|
94
102
|
bar.remove if bar
|
95
103
|
return into
|
96
104
|
rescue Exception
|
97
105
|
bar.remove($!) if bar
|
106
|
+
queue.abort
|
98
107
|
raise $!
|
108
|
+
ensure
|
109
|
+
queue.clean
|
99
110
|
end
|
100
111
|
end
|
101
112
|
|
102
113
|
begin
|
103
114
|
res = case obj
|
104
115
|
when TSV
|
105
|
-
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
116
|
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
117
|
res = block.call(k, v, f)
|
108
118
|
callback.call res if callback
|
@@ -122,11 +132,21 @@ module Open
|
|
122
132
|
raise obj.exception if obj.error?
|
123
133
|
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
134
|
when IO
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
135
|
+
if options[:type] == :array || options[:type] == :line
|
136
|
+
Log.low "Traverse stream by lines #{Log.fingerprint obj}"
|
137
|
+
while line = obj.gets
|
138
|
+
line.strip!
|
139
|
+
res = block.call(line)
|
140
|
+
callback.call res if callback
|
141
|
+
end
|
142
|
+
else
|
143
|
+
Log.low "Traverse stream with parser #{Log.fingerprint obj}"
|
144
|
+
parser = TSV::Parser.new obj
|
145
|
+
parser.traverse **options do |k,v,f|
|
146
|
+
res = block.call k,v,f
|
147
|
+
callback.call res if callback
|
148
|
+
nil
|
149
|
+
end
|
130
150
|
end
|
131
151
|
when TSV::Parser
|
132
152
|
obj.traverse **options do |k,v,f|
|
@@ -142,9 +162,10 @@ module Open
|
|
142
162
|
end
|
143
163
|
end
|
144
164
|
bar.remove if bar
|
145
|
-
rescue
|
165
|
+
rescue Exception => exception
|
166
|
+
exception = obj.stream_exception if (ConcurrentStream === obj) && obj.stream_exception
|
146
167
|
bar.error if bar
|
147
|
-
raise
|
168
|
+
raise exception
|
148
169
|
end
|
149
170
|
|
150
171
|
into || res
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -190,6 +190,11 @@ module TSV
|
|
190
190
|
end
|
191
191
|
|
192
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
193
|
+
if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
|
194
|
+
Open.open(stream) do |f|
|
195
|
+
return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
|
196
|
+
end
|
197
|
+
end
|
193
198
|
raise "Closed stream" if IO === stream && stream.closed?
|
194
199
|
|
195
200
|
opts = {}
|
@@ -240,7 +245,7 @@ module TSV
|
|
240
245
|
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
246
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
247
|
|
243
|
-
[opts, key_field, fields, first_line, preamble]
|
248
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble], %w(options key_field fields first_line preamble))
|
244
249
|
end
|
245
250
|
|
246
251
|
KEY_PARAMETERS = begin
|
@@ -263,7 +268,7 @@ module TSV
|
|
263
268
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
264
269
|
@options[:sep] = sep if @options[:sep].nil?
|
265
270
|
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
-
@type = type
|
271
|
+
@type = @options[:type] || type
|
267
272
|
end
|
268
273
|
|
269
274
|
def all_fields
|
@@ -304,6 +309,8 @@ module TSV
|
|
304
309
|
field_names = @fields
|
305
310
|
end
|
306
311
|
|
312
|
+
kwargs[:positions] = nil if @type == :flat
|
313
|
+
|
307
314
|
if key_field
|
308
315
|
if @fields
|
309
316
|
all_field_names ||= [@key_field] + @fields
|
@@ -0,0 +1,204 @@
|
|
1
|
+
module TSV
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
3
|
+
|
4
|
+
streams = streams.collect do |stream|
|
5
|
+
case stream
|
6
|
+
when(defined? Step and Step)
|
7
|
+
stream.stream
|
8
|
+
when Path
|
9
|
+
stream.open
|
10
|
+
when TSV::Dumper
|
11
|
+
stream.stream
|
12
|
+
else
|
13
|
+
stream
|
14
|
+
end
|
15
|
+
end.compact
|
16
|
+
|
17
|
+
num_streams = streams.length
|
18
|
+
|
19
|
+
streams = streams.collect do |stream|
|
20
|
+
Open.sort_stream(stream, memory: sort_memory)
|
21
|
+
end if sort
|
22
|
+
|
23
|
+
begin
|
24
|
+
|
25
|
+
lines =[]
|
26
|
+
fields =[]
|
27
|
+
sizes =[]
|
28
|
+
key_fields =[]
|
29
|
+
input_options =[]
|
30
|
+
empty =[]
|
31
|
+
preambles =[]
|
32
|
+
parser_types =[]
|
33
|
+
|
34
|
+
type ||= :double
|
35
|
+
|
36
|
+
streams = streams.collect do |stream|
|
37
|
+
|
38
|
+
parser = TSV::Parser.new stream, type: type
|
39
|
+
|
40
|
+
sfields = parser.fields
|
41
|
+
|
42
|
+
if field_prefix
|
43
|
+
index = streams.index stream
|
44
|
+
prefix = field_prefix[index]
|
45
|
+
|
46
|
+
sfields = sfields.collect{|f|[prefix, f]* ":"}
|
47
|
+
end
|
48
|
+
|
49
|
+
first_line = parser.first_line
|
50
|
+
first_line = nil if first_line == ""
|
51
|
+
|
52
|
+
lines << first_line
|
53
|
+
key_fields << parser.key_field
|
54
|
+
fields << sfields
|
55
|
+
sizes << sfields.length if sfields
|
56
|
+
input_options << parser.options
|
57
|
+
preambles << parser.preamble if preamble and not parser.preamble.empty?
|
58
|
+
parser_types << parser.type
|
59
|
+
|
60
|
+
empty << stream if parser.first_line.nil? || parser.first_line.empty?
|
61
|
+
|
62
|
+
stream
|
63
|
+
end
|
64
|
+
|
65
|
+
|
66
|
+
all_fields = fields.dup
|
67
|
+
|
68
|
+
key_field = key_fields.compact.first
|
69
|
+
|
70
|
+
if same_fields
|
71
|
+
fields = fields.first
|
72
|
+
else
|
73
|
+
fields = fields.compact.flatten
|
74
|
+
end
|
75
|
+
|
76
|
+
options = input_options.first
|
77
|
+
type ||= options[:type]
|
78
|
+
type ||= :list if type == :single
|
79
|
+
type ||= :double if type == :flat
|
80
|
+
|
81
|
+
preamble_txt = case preamble
|
82
|
+
when TrueClass
|
83
|
+
preambles * "\n"
|
84
|
+
when String
|
85
|
+
if preamble[0]== '+'
|
86
|
+
preambles * "\n" + "\n" + preamble[1..-1]
|
87
|
+
else
|
88
|
+
preamble
|
89
|
+
end
|
90
|
+
else
|
91
|
+
nil
|
92
|
+
end
|
93
|
+
|
94
|
+
empty_pos = empty.collect{|stream| streams.index stream}
|
95
|
+
|
96
|
+
keys =[]
|
97
|
+
parts =[]
|
98
|
+
lines.each_with_index do |line,i|
|
99
|
+
if line.nil? || line.empty?
|
100
|
+
keys[i]= nil
|
101
|
+
parts[i]= nil
|
102
|
+
else
|
103
|
+
vs = line.chomp.split(sep, -1)
|
104
|
+
key, *p = vs
|
105
|
+
keys[i]= key
|
106
|
+
parts[i]= p
|
107
|
+
end
|
108
|
+
sizes[i]||= parts[i].length-1 unless parts[i].nil?
|
109
|
+
end
|
110
|
+
done_streams =[]
|
111
|
+
|
112
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
113
|
+
dumper.init
|
114
|
+
|
115
|
+
t = Thread.new do
|
116
|
+
Thread.report_on_exception = false
|
117
|
+
Thread.current["name"] = "Paste streams"
|
118
|
+
|
119
|
+
last_min = nil
|
120
|
+
while lines.reject{|line| line.nil?}.any?
|
121
|
+
min = keys.compact.sort.first
|
122
|
+
break if min.nil?
|
123
|
+
new_values =[]
|
124
|
+
|
125
|
+
skip = all_match && keys.uniq !=[min]
|
126
|
+
|
127
|
+
keys.each_with_index do |key,i|
|
128
|
+
case key
|
129
|
+
when min
|
130
|
+
new_values << parts[i]
|
131
|
+
|
132
|
+
begin
|
133
|
+
line = lines[i]= begin
|
134
|
+
streams[i].gets
|
135
|
+
rescue
|
136
|
+
Log.exception $!
|
137
|
+
nil
|
138
|
+
end
|
139
|
+
if line.nil?
|
140
|
+
keys[i]= nil
|
141
|
+
parts[i]= nil
|
142
|
+
else
|
143
|
+
k, *p = line.chomp.split(sep, -1)
|
144
|
+
raise TryAgain if k == keys[i]
|
145
|
+
keys[i]= k
|
146
|
+
parts[i]= p.collect{|e| e.nil? ? "" : e}
|
147
|
+
end
|
148
|
+
rescue TryAgain
|
149
|
+
keys[i]= nil
|
150
|
+
parts[i]= nil
|
151
|
+
Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
|
152
|
+
retry
|
153
|
+
end
|
154
|
+
else
|
155
|
+
p = [nil] * sizes[i]
|
156
|
+
new_values << p
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
next if skip
|
161
|
+
|
162
|
+
if same_fields
|
163
|
+
new_values_same = []
|
164
|
+
new_values.each do |list|
|
165
|
+
list.each_with_index do |l,i|
|
166
|
+
new_values_same[i] ||= []
|
167
|
+
new_values_same[i] << l
|
168
|
+
end
|
169
|
+
end
|
170
|
+
new_values = new_values_same
|
171
|
+
else
|
172
|
+
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
173
|
+
end
|
174
|
+
|
175
|
+
dumper.add min, new_values
|
176
|
+
end
|
177
|
+
|
178
|
+
dumper.close
|
179
|
+
|
180
|
+
streams.each do |stream|
|
181
|
+
stream.close if stream.respond_to?(:close) && ! stream.closed?
|
182
|
+
stream.join if stream.respond_to? :join
|
183
|
+
end
|
184
|
+
end
|
185
|
+
rescue Aborted
|
186
|
+
Log.error "Aborted pasting streams #{streams.inspect}: #{$!.message}"
|
187
|
+
streams.each do |stream|
|
188
|
+
stream.abort if stream.respond_to? :abort
|
189
|
+
end
|
190
|
+
raise $!
|
191
|
+
rescue Exception
|
192
|
+
Log.error "Exception pasting streams #{streams.inspect}: #{$!.message}"
|
193
|
+
streams.each do |stream|
|
194
|
+
stream.abort if stream.respond_to? :abort
|
195
|
+
end
|
196
|
+
raise $!
|
197
|
+
end
|
198
|
+
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
|
201
|
+
ConcurrentStream.setup(dumper.stream, threads: [t])
|
202
|
+
end
|
203
|
+
|
204
|
+
end
|
@@ -137,5 +137,16 @@ module TSV
|
|
137
137
|
end
|
138
138
|
res
|
139
139
|
end
|
140
|
+
|
141
|
+
def to_flat
|
142
|
+
res = self.annotate({})
|
143
|
+
transformer = Transformer.new self, res
|
144
|
+
transformer.type = :flat
|
145
|
+
transformer.traverse do |k,v|
|
146
|
+
v = Array === v ? v.flatten : [v]
|
147
|
+
[k, v]
|
148
|
+
end
|
149
|
+
res
|
150
|
+
end
|
140
151
|
end
|
141
152
|
|
data/lib/scout/tsv.rb
CHANGED
@@ -10,10 +10,11 @@ require_relative 'tsv/traverse'
|
|
10
10
|
require_relative 'tsv/open'
|
11
11
|
require_relative 'tsv/attach'
|
12
12
|
require_relative 'tsv/change_id'
|
13
|
+
require_relative 'tsv/stream'
|
13
14
|
|
14
15
|
module TSV
|
15
16
|
extend MetaExtension
|
16
|
-
extension_attr :key_field, :fields, :type, :filename, :namespace, :unnamed, :identifiers
|
17
|
+
extension_attr :key_field, :fields, :type, :cast, :filename, :namespace, :unnamed, :identifiers
|
17
18
|
|
18
19
|
def self.str2options(str)
|
19
20
|
field_options,_sep, rest = str.partition("#")
|
@@ -40,7 +41,13 @@ module TSV
|
|
40
41
|
data = filename ? ScoutCabinet.open(filename, true, type) : nil
|
41
42
|
options[:data] = data if data
|
42
43
|
options[:filename] = file
|
43
|
-
|
44
|
+
|
45
|
+
if data
|
46
|
+
Log.debug "TSV open #{Log.fingerprint file} into #{Log.fingerprint data}"
|
47
|
+
else
|
48
|
+
Log.debug "TSV open #{Log.fingerprint file}"
|
49
|
+
end
|
50
|
+
|
44
51
|
Open.open(file, grep: grep, invert_grep: invert_grep) do |f|
|
45
52
|
TSV.parse(f, **options)
|
46
53
|
end
|