scout-gear 10.4.0 → 10.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +100 -656
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/scout +1 -3
- data/lib/scout/association/fields.rb +170 -0
- data/lib/scout/association/index.rb +229 -0
- data/lib/scout/association/item.rb +227 -0
- data/lib/scout/association/util.rb +7 -0
- data/lib/scout/association.rb +100 -0
- data/lib/scout/entity/format.rb +62 -0
- data/lib/scout/entity/identifiers.rb +111 -0
- data/lib/scout/entity/object.rb +20 -0
- data/lib/scout/entity/property.rb +165 -0
- data/lib/scout/entity.rb +40 -0
- data/lib/scout/offsite/step.rb +2 -2
- data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
- data/lib/scout/persist/engine/packed_index.rb +100 -0
- data/lib/scout/persist/engine/sharder.rb +219 -0
- data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
- data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
- data/lib/scout/persist/engine.rb +4 -0
- data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
- data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
- data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
- data/lib/scout/persist/tsv/adapter.rb +6 -0
- data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
- data/lib/scout/persist/tsv.rb +107 -0
- data/lib/scout/tsv/annotation/repo.rb +83 -0
- data/lib/scout/tsv/annotation.rb +169 -0
- data/lib/scout/tsv/attach.rb +95 -19
- data/lib/scout/tsv/change_id/translate.rb +148 -0
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/csv.rb +85 -0
- data/lib/scout/tsv/dumper.rb +113 -25
- data/lib/scout/tsv/entity.rb +5 -0
- data/lib/scout/tsv/index.rb +88 -36
- data/lib/scout/tsv/open.rb +21 -8
- data/lib/scout/tsv/parser.rb +153 -90
- data/lib/scout/tsv/path.rb +7 -2
- data/lib/scout/tsv/stream.rb +48 -6
- data/lib/scout/tsv/transformer.rb +4 -3
- data/lib/scout/tsv/traverse.rb +26 -18
- data/lib/scout/tsv/util/process.rb +7 -0
- data/lib/scout/tsv/util/reorder.rb +25 -15
- data/lib/scout/tsv/util/select.rb +9 -1
- data/lib/scout/tsv/util/sort.rb +90 -2
- data/lib/scout/tsv/util/unzip.rb +56 -0
- data/lib/scout/tsv/util.rb +52 -5
- data/lib/scout/tsv.rb +45 -27
- data/lib/scout/work_queue/socket.rb +8 -0
- data/lib/scout/work_queue/worker.rb +22 -5
- data/lib/scout/work_queue.rb +38 -24
- data/lib/scout/workflow/definition.rb +11 -10
- data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
- data/lib/scout/workflow/deployment/trace.rb +205 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/documentation.rb +1 -1
- data/lib/scout/workflow/step/archive.rb +42 -0
- data/lib/scout/workflow/step/children.rb +51 -0
- data/lib/scout/workflow/step/config.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +24 -7
- data/lib/scout/workflow/step/file.rb +19 -0
- data/lib/scout/workflow/step/info.rb +37 -9
- data/lib/scout/workflow/step/progress.rb +11 -2
- data/lib/scout/workflow/step/status.rb +8 -1
- data/lib/scout/workflow/step.rb +80 -25
- data/lib/scout/workflow/task/dependencies.rb +4 -1
- data/lib/scout/workflow/task/inputs.rb +91 -41
- data/lib/scout/workflow/task.rb +54 -57
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow/util.rb +4 -0
- data/lib/scout/workflow.rb +110 -13
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +0 -1
- data/scout-gear.gemspec +80 -23
- data/scout_commands/rbbt +2 -0
- data/test/data/person/brothers +4 -0
- data/test/data/person/identifiers +10 -0
- data/test/data/person/marriages +3 -0
- data/test/data/person/parents +6 -0
- data/test/scout/association/test_fields.rb +105 -0
- data/test/scout/association/test_index.rb +70 -0
- data/test/scout/association/test_item.rb +21 -0
- data/test/scout/entity/test_format.rb +19 -0
- data/test/scout/entity/test_identifiers.rb +58 -0
- data/test/scout/entity/test_object.rb +0 -0
- data/test/scout/entity/test_property.rb +345 -0
- data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
- data/test/scout/persist/engine/test_packed_index.rb +99 -0
- data/test/scout/persist/engine/test_sharder.rb +31 -0
- data/test/scout/persist/engine/test_tkrzw.rb +0 -0
- data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
- data/test/scout/persist/test_tsv.rb +146 -0
- data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
- data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
- data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
- data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
- data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
- data/test/scout/persist/tsv/test_serialize.rb +12 -0
- data/test/scout/test_association.rb +51 -0
- data/test/scout/test_entity.rb +40 -0
- data/test/scout/test_tsv.rb +33 -4
- data/test/scout/test_work_queue.rb +3 -2
- data/test/scout/test_workflow.rb +16 -15
- data/test/scout/tsv/annotation/test_repo.rb +150 -0
- data/test/scout/tsv/change_id/test_translate.rb +178 -0
- data/test/scout/tsv/test_annotation.rb +52 -0
- data/test/scout/tsv/test_attach.rb +226 -1
- data/test/scout/tsv/test_change_id.rb +25 -0
- data/test/scout/tsv/test_csv.rb +50 -0
- data/test/scout/tsv/test_dumper.rb +38 -0
- data/test/scout/tsv/test_entity.rb +0 -0
- data/test/scout/tsv/test_index.rb +82 -0
- data/test/scout/tsv/test_open.rb +44 -0
- data/test/scout/tsv/test_parser.rb +70 -0
- data/test/scout/tsv/test_stream.rb +22 -0
- data/test/scout/tsv/test_transformer.rb +27 -3
- data/test/scout/tsv/test_traverse.rb +78 -0
- data/test/scout/tsv/util/test_process.rb +16 -0
- data/test/scout/tsv/util/test_reorder.rb +67 -0
- data/test/scout/tsv/util/test_sort.rb +28 -1
- data/test/scout/tsv/util/test_unzip.rb +32 -0
- data/test/scout/work_queue/test_socket.rb +4 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
- data/test/scout/workflow/deployment/test_trace.rb +25 -0
- data/test/scout/workflow/step/test_archive.rb +28 -0
- data/test/scout/workflow/step/test_children.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +16 -0
- data/test/scout/workflow/task/test_dependencies.rb +16 -16
- data/test/scout/workflow/task/test_inputs.rb +45 -1
- data/test/scout/workflow/test_definition.rb +52 -0
- data/test/scout/workflow/test_step.rb +57 -0
- data/test/scout/workflow/test_task.rb +26 -1
- data/test/scout/workflow/test_usage.rb +4 -4
- data/test/test_helper.rb +23 -1
- metadata +71 -14
- data/lib/scout/tsv/persist.rb +0 -27
- data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
- data/test/scout/tsv/test_persist.rb +0 -45
data/lib/scout/tsv/parser.rb
CHANGED
@@ -51,7 +51,7 @@ module TSV
|
|
51
51
|
[key, items]
|
52
52
|
end
|
53
53
|
|
54
|
-
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **
|
54
|
+
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block)
|
55
55
|
begin
|
56
56
|
bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
|
57
57
|
bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
|
@@ -59,6 +59,32 @@ module TSV
|
|
59
59
|
|
60
60
|
source_type = type if source_type.nil?
|
61
61
|
|
62
|
+
type_swap_key = [source_type.to_s, type.to_s] * "_"
|
63
|
+
|
64
|
+
same_type = source_type.to_s == type.to_s
|
65
|
+
|
66
|
+
if data && data.respond_to?(:load_stream) &&
|
67
|
+
data.serializer.to_s.include?("String") &&
|
68
|
+
same_type &&
|
69
|
+
! (head || kwargs[:cast] || kwargs[:positions] || (kwargs[:key] && kwargs[:key] != 0) || Proc === fix ) &&
|
70
|
+
(kwargs[:sep].nil? || kwargs[:sep] == "\t")
|
71
|
+
|
72
|
+
|
73
|
+
Log.debug "Loading #{Log.fingerprint stream} directly into #{Log.fingerprint data}"
|
74
|
+
if first_line
|
75
|
+
full_stream = Open.open_pipe do |sin|
|
76
|
+
sin.puts first_line
|
77
|
+
Open.consume_stream(stream, false, sin)
|
78
|
+
end
|
79
|
+
data.load_stream(full_stream)
|
80
|
+
else
|
81
|
+
data.load_stream(stream)
|
82
|
+
end
|
83
|
+
|
84
|
+
return data
|
85
|
+
end
|
86
|
+
|
87
|
+
|
62
88
|
data = {} if data.nil?
|
63
89
|
merge = false if type != :double && type != :flat
|
64
90
|
line = first_line || stream.gets
|
@@ -77,7 +103,7 @@ module TSV
|
|
77
103
|
next
|
78
104
|
end
|
79
105
|
|
80
|
-
key, items = parse_line(line, type: source_type, field_names: field_names, **
|
106
|
+
key, items = parse_line(line, type: source_type, field_names: field_names, **kwargs)
|
81
107
|
|
82
108
|
next if key.nil?
|
83
109
|
|
@@ -101,38 +127,38 @@ module TSV
|
|
101
127
|
end
|
102
128
|
|
103
129
|
these_items =
|
104
|
-
case
|
105
|
-
when
|
130
|
+
case type_swap_key
|
131
|
+
when "single_single"
|
106
132
|
these_items
|
107
|
-
when
|
133
|
+
when "list_single"
|
108
134
|
these_items.first
|
109
|
-
when
|
135
|
+
when "flat_single"
|
110
136
|
these_items.first
|
111
|
-
when
|
137
|
+
when "double_single"
|
112
138
|
these_items.first.first
|
113
|
-
when
|
139
|
+
when "single_list"
|
114
140
|
[these_items]
|
115
|
-
when
|
141
|
+
when "list_list"
|
116
142
|
these_items
|
117
|
-
when
|
143
|
+
when "flat_list"
|
118
144
|
these_items
|
119
|
-
when
|
145
|
+
when "double_list"
|
120
146
|
these_items.collect{|l| l.first }
|
121
|
-
when
|
147
|
+
when "single_flat"
|
122
148
|
[these_items]
|
123
|
-
when
|
149
|
+
when "list_flat"
|
124
150
|
these_items
|
125
|
-
when
|
151
|
+
when "flat_flat"
|
126
152
|
these_items
|
127
|
-
when
|
153
|
+
when "double_flat"
|
128
154
|
these_items.flatten
|
129
|
-
when
|
155
|
+
when "single_double"
|
130
156
|
[[these_items]]
|
131
|
-
when
|
132
|
-
these_items.collect{|l| [l] }
|
133
|
-
when
|
157
|
+
when "list_double"
|
158
|
+
these_items.collect{|l| l.nil? ? [] : [l] }
|
159
|
+
when "flat_double"
|
134
160
|
[these_items]
|
135
|
-
when
|
161
|
+
when "double_double"
|
136
162
|
these_items
|
137
163
|
end
|
138
164
|
|
@@ -143,6 +169,7 @@ module TSV
|
|
143
169
|
end
|
144
170
|
|
145
171
|
if ! merge || ! data.include?(key)
|
172
|
+
these_items = these_items.collect{|i| i.empty? ? [nil] : i } if type == :double && one2one
|
146
173
|
data[key] = these_items
|
147
174
|
elsif type == :double
|
148
175
|
current = data[key]
|
@@ -169,6 +196,7 @@ module TSV
|
|
169
196
|
end
|
170
197
|
end
|
171
198
|
rescue Exception
|
199
|
+
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
172
200
|
stream.abort($!) if stream.respond_to?(:abort)
|
173
201
|
raise $!
|
174
202
|
ensure
|
@@ -182,7 +210,7 @@ module TSV
|
|
182
210
|
end
|
183
211
|
data
|
184
212
|
ensure
|
185
|
-
if stream.stream_exception
|
213
|
+
if stream.respond_to?(:stream_exception) && stream.stream_exception
|
186
214
|
bar.remove(stream.stream_exception)
|
187
215
|
else
|
188
216
|
bar.remove
|
@@ -200,62 +228,74 @@ module TSV
|
|
200
228
|
end
|
201
229
|
|
202
230
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
231
|
+
sep = "\t" if sep.nil?
|
203
232
|
if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
|
204
233
|
Open.open(stream) do |f|
|
205
234
|
return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
|
206
235
|
end
|
207
236
|
end
|
208
|
-
|
237
|
+
|
238
|
+
if IO === stream && stream.closed?
|
239
|
+
stream.join if stream.respond_to?(:join)
|
240
|
+
raise "Closed stream"
|
241
|
+
end
|
209
242
|
|
210
243
|
opts = {}
|
211
244
|
preamble = []
|
212
245
|
|
213
246
|
# Get line
|
214
247
|
|
215
|
-
|
216
|
-
|
217
|
-
return {} if line.nil?
|
218
|
-
line = Misc.fixutf8 line.chomp if fix
|
219
|
-
|
220
|
-
# Process options line
|
221
|
-
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
222
|
-
opts = IndiferentHash.string2hash m.captures.first.chomp
|
248
|
+
begin
|
249
|
+
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
223
250
|
line = stream.gets
|
224
|
-
if line
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
251
|
+
return {} if line.nil?
|
252
|
+
line = Misc.fixutf8 line.chomp if fix
|
253
|
+
|
254
|
+
# Process options line
|
255
|
+
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
256
|
+
opts = IndiferentHash.string2hash m.captures.first.chomp
|
257
|
+
line = stream.gets
|
258
|
+
if line && fix
|
259
|
+
if Proc === fix
|
260
|
+
line = fix.call line
|
261
|
+
else
|
262
|
+
line = Misc.fixutf8 line.chomp if line && fix
|
263
|
+
end
|
229
264
|
end
|
230
265
|
end
|
231
|
-
end
|
232
|
-
|
233
|
-
# Determine separator
|
234
|
-
sep = opts[:sep] if opts[:sep]
|
235
266
|
|
236
|
-
|
237
|
-
|
238
|
-
while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
|
239
|
-
fields = line.split(sep, -1)
|
240
|
-
key_field = fields.shift
|
241
|
-
key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
|
267
|
+
# Determine separator
|
268
|
+
sep = opts[:sep] if opts[:sep]
|
242
269
|
|
243
|
-
|
244
|
-
line = Misc.fixutf8 line.chomp if line
|
270
|
+
# Process fields line
|
245
271
|
preamble << line if line
|
246
|
-
|
247
|
-
|
272
|
+
while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
|
273
|
+
fields = line.split(sep, -1)
|
274
|
+
key_field = fields.shift
|
275
|
+
key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
|
276
|
+
|
277
|
+
line = (header_hash != "" ? stream.gets : nil)
|
278
|
+
line = Misc.fixutf8 line.chomp if line
|
279
|
+
preamble << line if line
|
280
|
+
break if TrueClass === header_hash || header_hash == ""
|
281
|
+
end
|
248
282
|
|
249
|
-
|
283
|
+
preamble = preamble[0..-3] * "\n"
|
250
284
|
|
251
|
-
|
285
|
+
line ||= stream.gets
|
252
286
|
|
253
|
-
|
287
|
+
first_line = line
|
254
288
|
|
255
|
-
|
256
|
-
|
289
|
+
opts[:type] = opts[:type].to_sym if opts[:type]
|
290
|
+
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
257
291
|
|
258
|
-
|
292
|
+
all_fields = [key_field] + fields if key_field && fields
|
293
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields], %w(options key_field fields first_line preamble all_fields))
|
294
|
+
rescue Exception
|
295
|
+
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
296
|
+
stream.abort($!) if stream.respond_to?(:abort)
|
297
|
+
raise $!
|
298
|
+
end
|
259
299
|
end
|
260
300
|
|
261
301
|
KEY_PARAMETERS = begin
|
@@ -267,7 +307,7 @@ module TSV
|
|
267
307
|
end
|
268
308
|
|
269
309
|
class Parser
|
270
|
-
attr_accessor :stream, :
|
310
|
+
attr_accessor :stream, :source_options, :key_field, :fields, :type, :first_line, :preamble
|
271
311
|
def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
|
272
312
|
if IO === file
|
273
313
|
@stream = file
|
@@ -275,11 +315,15 @@ module TSV
|
|
275
315
|
@stream = Open.open(file)
|
276
316
|
end
|
277
317
|
@fix = fix
|
278
|
-
@
|
279
|
-
@
|
280
|
-
@
|
281
|
-
@
|
282
|
-
@type = @
|
318
|
+
@source_options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
319
|
+
@source_options[:filename] = file if Path.is_filename?(file)
|
320
|
+
@source_options[:sep] = sep if @source_options[:sep].nil?
|
321
|
+
@source_options.merge!(:key_field => @key_field, :fields => @fields)
|
322
|
+
@type = @source_options[:type] || type
|
323
|
+
end
|
324
|
+
|
325
|
+
def options
|
326
|
+
IndiferentHash.add_defaults @source_options.dup, type: type, key_field: key_field, fields: fields
|
283
327
|
end
|
284
328
|
|
285
329
|
def all_fields
|
@@ -288,11 +332,11 @@ module TSV
|
|
288
332
|
end
|
289
333
|
|
290
334
|
def key_field=(key_field)
|
291
|
-
@
|
335
|
+
@source_options[:key_field] = @key_field = key_field
|
292
336
|
end
|
293
337
|
|
294
338
|
def fields=(fields)
|
295
|
-
@
|
339
|
+
@source_options[:fields] = @fields = fields
|
296
340
|
end
|
297
341
|
|
298
342
|
def identify_field(name)
|
@@ -300,7 +344,7 @@ module TSV
|
|
300
344
|
end
|
301
345
|
|
302
346
|
def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
303
|
-
kwargs[:type] ||= self.
|
347
|
+
kwargs[:type] ||= self.source_options[:type] ||= @type
|
304
348
|
kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
|
305
349
|
|
306
350
|
if fields
|
@@ -308,6 +352,7 @@ module TSV
|
|
308
352
|
all_field_names ||= [@key_field] + @fields
|
309
353
|
fields = all_field_names if fields == :all
|
310
354
|
positions = NamedArray.identify_name(all_field_names, fields)
|
355
|
+
raise "Not all fields (#{Log.fingerprint fields}) identified in #{Log.fingerprint all_field_names}" if positions.include?(nil)
|
311
356
|
kwargs[:positions] = positions
|
312
357
|
field_names = all_field_names.values_at *positions
|
313
358
|
elsif fields.reject{|f| Numeric === f}.empty?
|
@@ -327,7 +372,7 @@ module TSV
|
|
327
372
|
all_field_names ||= [@key_field] + @fields
|
328
373
|
key = NamedArray.identify_name(all_field_names, key_field)
|
329
374
|
kwargs[:key] = key == :key ? 0 : key
|
330
|
-
key_field_name = key
|
375
|
+
key_field_name = (key.nil? || key == :key) ? @key_field : all_field_names[key]
|
331
376
|
if fields.nil?
|
332
377
|
field_names = all_field_names - [key_field_name]
|
333
378
|
end
|
@@ -343,21 +388,28 @@ module TSV
|
|
343
388
|
field_names = field_names.slice(0,1)
|
344
389
|
end
|
345
390
|
|
346
|
-
@
|
391
|
+
@source_options.each do |option,value|
|
347
392
|
option = option.to_sym
|
348
393
|
next unless KEY_PARAMETERS.include? option
|
349
394
|
kwargs[option] = value unless kwargs.include?(option)
|
350
395
|
end
|
351
396
|
|
352
|
-
kwargs[:source_type] = @
|
397
|
+
kwargs[:source_type] = @source_options[:type]
|
353
398
|
kwargs[:data] = false if kwargs[:data].nil?
|
354
399
|
|
355
|
-
|
400
|
+
if kwargs[:tsv_grep]
|
401
|
+
data = with_stream do |stream|
|
402
|
+
grep_stream = Open.grep(stream, kwargs.delete(:tsv_grep), kwargs.delete(:tsv_invert_grep))
|
403
|
+
TSV.parse_stream(grep_stream, first_line: nil, fix: @fix, field_names: @fields, **kwargs, &block)
|
404
|
+
end
|
405
|
+
else
|
406
|
+
data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
|
407
|
+
end
|
356
408
|
|
357
409
|
if data
|
358
410
|
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
359
411
|
else
|
360
|
-
[self.key_field, self.fields]
|
412
|
+
[key_field || self.key_field, fields || self.fields]
|
361
413
|
end
|
362
414
|
end
|
363
415
|
|
@@ -372,6 +424,14 @@ module TSV
|
|
372
424
|
def inspect
|
373
425
|
fingerprint
|
374
426
|
end
|
427
|
+
|
428
|
+
def with_stream
|
429
|
+
sout = Open.open_pipe do |sin|
|
430
|
+
sin.puts @first_line
|
431
|
+
Open.consume_stream(@stream, false, sin)
|
432
|
+
end
|
433
|
+
yield sout
|
434
|
+
end
|
375
435
|
end
|
376
436
|
|
377
437
|
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
|
@@ -379,40 +439,43 @@ module TSV
|
|
379
439
|
|
380
440
|
cast = kwargs[:cast]
|
381
441
|
cast = parser.options[:cast] if cast.nil?
|
442
|
+
identifiers = kwargs.delete(:identifiers)
|
382
443
|
type = kwargs[:type] ||= parser.options[:type] ||= :double
|
444
|
+
|
383
445
|
if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
384
446
|
TSV.setup(data, type: type)
|
385
447
|
data.extend TSVAdapter
|
386
|
-
if
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
|
448
|
+
serializer ||= if cast
|
449
|
+
case [cast, type]
|
450
|
+
when [:to_i, :single]
|
451
|
+
:integer
|
452
|
+
when [:to_i, :list], [:to_i, :flat]
|
453
|
+
:integer_array
|
454
|
+
when [:to_f, :single]
|
455
|
+
:float
|
456
|
+
when [:to_f, :list], [:to_f, :flat]
|
457
|
+
:float_array
|
458
|
+
when [:to_f, :double], [:to_i, :double]
|
459
|
+
:marshal
|
460
|
+
else
|
461
|
+
type
|
462
|
+
end
|
463
|
+
else
|
464
|
+
type
|
465
|
+
end
|
466
|
+
data.serializer = TSVAdapter::SERIALIZER_ALIAS[serializer] || serializer
|
405
467
|
end
|
406
468
|
|
407
469
|
kwargs[:data] = {} if kwargs[:data].nil?
|
408
470
|
|
409
471
|
data = parser.traverse **kwargs, &block
|
410
472
|
data.type = type
|
473
|
+
data.cast = cast
|
411
474
|
data.filename = filename || parser.options[:filename]
|
412
475
|
data.namespace = namespace || parser.options[:namespace]
|
413
|
-
data.identifiers =
|
476
|
+
data.identifiers = identifiers
|
414
477
|
data.unnamed = unnamed
|
415
|
-
data.
|
478
|
+
data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
|
416
479
|
data
|
417
480
|
end
|
418
481
|
end
|
data/lib/scout/tsv/path.rb
CHANGED
@@ -4,9 +4,14 @@ module Path
|
|
4
4
|
TSV.open(found, *args, **kwargs, &block)
|
5
5
|
end
|
6
6
|
|
7
|
+
def tsv_options(options = {})
|
8
|
+
self.open do |stream|
|
9
|
+
TSV::Parser.new(stream, **options).options
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
7
13
|
def index(*args, **kwargs, &block)
|
8
|
-
found =
|
9
|
-
found = self.set_extension('tsv').find unless found.exists?
|
14
|
+
found = produce_and_find('tsv')
|
10
15
|
TSV.index(found, *args, **kwargs, &block)
|
11
16
|
end
|
12
17
|
end
|
data/lib/scout/tsv/stream.rb
CHANGED
@@ -9,6 +9,8 @@ module TSV
|
|
9
9
|
stream.open
|
10
10
|
when TSV::Dumper
|
11
11
|
stream.stream
|
12
|
+
when TSV
|
13
|
+
stream.dumper_stream
|
12
14
|
else
|
13
15
|
stream
|
14
16
|
end
|
@@ -35,7 +37,7 @@ module TSV
|
|
35
37
|
|
36
38
|
streams = streams.collect do |stream|
|
37
39
|
|
38
|
-
parser = TSV::Parser.new stream, type: type
|
40
|
+
parser = TSV::Parser.new stream, type: type, sep: sep
|
39
41
|
|
40
42
|
sfields = parser.fields
|
41
43
|
|
@@ -105,12 +107,13 @@ module TSV
|
|
105
107
|
keys[i]= key
|
106
108
|
parts[i]= p
|
107
109
|
end
|
108
|
-
sizes[i]||= parts[i].length
|
110
|
+
sizes[i] ||= parts[i].length unless parts[i].nil?
|
109
111
|
end
|
110
112
|
done_streams =[]
|
111
113
|
|
114
|
+
fields = nil if fields && fields.empty?
|
112
115
|
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
113
|
-
dumper.init
|
116
|
+
dumper.init(preamble: preamble_txt || !!key_field)
|
114
117
|
|
115
118
|
t = Thread.new do
|
116
119
|
Thread.report_on_exception = false
|
@@ -141,14 +144,19 @@ module TSV
|
|
141
144
|
parts[i]= nil
|
142
145
|
else
|
143
146
|
k, *p = line.chomp.split(sep, -1)
|
144
|
-
|
147
|
+
p = p.collect{|e| e.nil? ? "" : e }
|
148
|
+
|
149
|
+
if k == keys[i]
|
150
|
+
new_values = NamedArray.zip_fields(new_values).zip(p).collect{|p| [p.flatten * "|"] }
|
151
|
+
raise TryAgain
|
152
|
+
end
|
145
153
|
keys[i]= k
|
146
|
-
parts[i]= p
|
154
|
+
parts[i]= p
|
147
155
|
end
|
148
156
|
rescue TryAgain
|
149
157
|
keys[i]= nil
|
150
158
|
parts[i]= nil
|
151
|
-
Log.debug "Skipping repeated key in stream #{i}: #{
|
159
|
+
Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
|
152
160
|
retry
|
153
161
|
end
|
154
162
|
else
|
@@ -201,4 +209,38 @@ module TSV
|
|
201
209
|
ConcurrentStream.setup(dumper.stream, threads: [t])
|
202
210
|
end
|
203
211
|
|
212
|
+
def self.concat_streams(streams)
|
213
|
+
|
214
|
+
streams = streams.collect do |stream|
|
215
|
+
case stream
|
216
|
+
when(defined? Step and Step)
|
217
|
+
stream.stream
|
218
|
+
when Path
|
219
|
+
stream.open
|
220
|
+
when TSV::Dumper
|
221
|
+
stream.stream
|
222
|
+
when TSV
|
223
|
+
stream.dumper_stream
|
224
|
+
else
|
225
|
+
stream
|
226
|
+
end
|
227
|
+
end.compact
|
228
|
+
|
229
|
+
done_streams = []
|
230
|
+
Open.open_pipe do |sin|
|
231
|
+
first_stream = streams.first
|
232
|
+
while line = first_stream.gets
|
233
|
+
sin.write line
|
234
|
+
break unless line[0] == "#"
|
235
|
+
end
|
236
|
+
|
237
|
+
while streams.any?
|
238
|
+
streams.each do |stream|
|
239
|
+
line = stream.gets
|
240
|
+
sin.write line unless line[0] == "#"
|
241
|
+
end
|
242
|
+
streams.delete_if{|stream| stream.eof? }
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
204
246
|
end
|
@@ -1,8 +1,8 @@
|
|
1
1
|
module TSV
|
2
2
|
class Transformer
|
3
|
-
attr_accessor :unnamed, :parser, :dumper
|
3
|
+
attr_accessor :unnamed, :parser, :dumper, :namespace
|
4
4
|
|
5
|
-
def initialize(parser, dumper = nil, unnamed: nil)
|
5
|
+
def initialize(parser, dumper = nil, unnamed: nil, namespace: nil)
|
6
6
|
if TSV::Parser === parser
|
7
7
|
@parser = parser
|
8
8
|
elsif TSV === parser
|
@@ -68,6 +68,7 @@ module TSV
|
|
68
68
|
def traverse(*args, **kwargs, &block)
|
69
69
|
kwargs[:into] = @dumper
|
70
70
|
kwargs[:bar] = "Transform #{Log.fingerprint @parser} into #{Log.fingerprint @target}" if TrueClass === kwargs[:bar]
|
71
|
+
@dumper.namespace ||= @namespace
|
71
72
|
@dumper.init if @dumper.respond_to?(:init) && ! @dumper.initialized
|
72
73
|
Log.debug "Transform #{Log.fingerprint @parser} into #{Log.fingerprint @dumper}"
|
73
74
|
Open.traverse(@parser, *args, **kwargs) do |k,v|
|
@@ -107,7 +108,7 @@ module TSV
|
|
107
108
|
end
|
108
109
|
|
109
110
|
def tsv(*args)
|
110
|
-
TSV === @dumper ? @dumper : TSV.open(
|
111
|
+
TSV === @dumper ? @dumper : TSV.open(@dumper, *args)
|
111
112
|
end
|
112
113
|
end
|
113
114
|
|
data/lib/scout/tsv/traverse.rb
CHANGED
@@ -7,8 +7,8 @@ module TSV
|
|
7
7
|
key_pos = self.identify_field(key_field)
|
8
8
|
fields = self.all_fields if fields == :all
|
9
9
|
fields = [fields] unless fields.nil? || Array === fields
|
10
|
-
positions = fields.nil? || fields == :all ? nil : self.identify_field(fields)
|
11
|
-
|
10
|
+
positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
|
11
|
+
positions = nil if fields == self.fields
|
12
12
|
|
13
13
|
if key_pos == :key
|
14
14
|
key_name = @key_field
|
@@ -21,8 +21,10 @@ module TSV
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
fields = positions.collect{|p| p == :key ? self.key_field : self.fields[p] } if positions
|
25
|
+
|
24
26
|
if positions.nil? && key_pos == :key
|
25
|
-
field_names = @fields
|
27
|
+
field_names = @fields.dup
|
26
28
|
elsif positions.nil? && key_pos != :key
|
27
29
|
field_names = @fields.dup
|
28
30
|
field_names.delete_at key_pos unless fields == :all
|
@@ -39,6 +41,7 @@ module TSV
|
|
39
41
|
Log.debug log_message
|
40
42
|
bar = log_message if TrueClass === bar
|
41
43
|
|
44
|
+
type_swap_tag = [type.to_s, @type.to_s] * "_"
|
42
45
|
Log::ProgressBar.with_obj_bar(self, bar) do |bar|
|
43
46
|
with_unnamed unnamed do
|
44
47
|
each do |key,values|
|
@@ -47,13 +50,18 @@ module TSV
|
|
47
50
|
if positions.nil?
|
48
51
|
if key_pos != :key
|
49
52
|
values = values.dup
|
50
|
-
|
53
|
+
if @type == :flat
|
54
|
+
key = values
|
55
|
+
else
|
56
|
+
key = values.delete_at(key_pos)
|
57
|
+
end
|
51
58
|
end
|
52
59
|
else
|
53
60
|
orig_key = key
|
54
|
-
key = values[key_pos] if key_pos != :key
|
61
|
+
key = @type == :flat ? values : values[key_pos] if key_pos != :key
|
55
62
|
|
56
63
|
values = values.values_at(*positions)
|
64
|
+
NamedArray.setup(values, fields)
|
57
65
|
if key_index
|
58
66
|
if @type == :double
|
59
67
|
values.insert key_index, [orig_key]
|
@@ -107,30 +115,30 @@ module TSV
|
|
107
115
|
yield key, values
|
108
116
|
end
|
109
117
|
else
|
110
|
-
case
|
111
|
-
when
|
118
|
+
case type_swap_tag
|
119
|
+
when "double_list"
|
112
120
|
yield key, values.collect{|v| [v] }
|
113
|
-
when
|
121
|
+
when "double_flat"
|
114
122
|
yield key, [values]
|
115
|
-
when
|
123
|
+
when "double_single"
|
116
124
|
yield key, [values]
|
117
|
-
when
|
125
|
+
when "list_double"
|
118
126
|
yield key, values.collect{|v| v.first }
|
119
|
-
when
|
127
|
+
when "list_flat"
|
120
128
|
yield key, [values.first]
|
121
|
-
when
|
129
|
+
when "list_single"
|
122
130
|
yield key, values
|
123
|
-
when
|
131
|
+
when "flat_double"
|
124
132
|
yield key, values.flatten
|
125
|
-
when
|
133
|
+
when "flat_list"
|
126
134
|
yield key, values.flatten
|
127
|
-
when
|
135
|
+
when "flat_single"
|
128
136
|
yield key, values
|
129
|
-
when
|
137
|
+
when "single_double"
|
130
138
|
yield key, values.flatten.first
|
131
|
-
when
|
139
|
+
when "single_list"
|
132
140
|
yield key, values.first
|
133
|
-
when
|
141
|
+
when "single_flat"
|
134
142
|
yield key, values.first
|
135
143
|
end
|
136
144
|
end
|