scout-gear 10.4.0 → 10.7.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +100 -656
  3. data/Rakefile +1 -0
  4. data/VERSION +1 -1
  5. data/bin/scout +1 -3
  6. data/lib/scout/association/fields.rb +170 -0
  7. data/lib/scout/association/index.rb +229 -0
  8. data/lib/scout/association/item.rb +227 -0
  9. data/lib/scout/association/util.rb +7 -0
  10. data/lib/scout/association.rb +100 -0
  11. data/lib/scout/entity/format.rb +62 -0
  12. data/lib/scout/entity/identifiers.rb +111 -0
  13. data/lib/scout/entity/object.rb +20 -0
  14. data/lib/scout/entity/property.rb +165 -0
  15. data/lib/scout/entity.rb +41 -0
  16. data/lib/scout/offsite/step.rb +2 -2
  17. data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
  18. data/lib/scout/persist/engine/packed_index.rb +100 -0
  19. data/lib/scout/persist/engine/sharder.rb +219 -0
  20. data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
  21. data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
  22. data/lib/scout/persist/engine.rb +4 -0
  23. data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
  24. data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
  25. data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
  26. data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
  27. data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
  28. data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
  29. data/lib/scout/persist/tsv/adapter.rb +6 -0
  30. data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
  31. data/lib/scout/persist/tsv.rb +107 -0
  32. data/lib/scout/tsv/annotation/repo.rb +87 -0
  33. data/lib/scout/tsv/annotation.rb +169 -0
  34. data/lib/scout/tsv/attach.rb +97 -21
  35. data/lib/scout/tsv/change_id/translate.rb +148 -0
  36. data/lib/scout/tsv/change_id.rb +3 -0
  37. data/lib/scout/tsv/csv.rb +85 -0
  38. data/lib/scout/tsv/dumper.rb +113 -25
  39. data/lib/scout/tsv/index.rb +88 -36
  40. data/lib/scout/tsv/open.rb +21 -8
  41. data/lib/scout/tsv/parser.rb +153 -90
  42. data/lib/scout/tsv/path.rb +7 -2
  43. data/lib/scout/tsv/stream.rb +48 -6
  44. data/lib/scout/tsv/transformer.rb +5 -3
  45. data/lib/scout/tsv/traverse.rb +28 -19
  46. data/lib/scout/tsv/util/process.rb +7 -0
  47. data/lib/scout/tsv/util/reorder.rb +25 -15
  48. data/lib/scout/tsv/util/select.rb +9 -1
  49. data/lib/scout/tsv/util/sort.rb +90 -2
  50. data/lib/scout/tsv/util/unzip.rb +56 -0
  51. data/lib/scout/tsv/util.rb +52 -5
  52. data/lib/scout/tsv.rb +42 -27
  53. data/lib/scout/work_queue/socket.rb +8 -0
  54. data/lib/scout/work_queue/worker.rb +22 -5
  55. data/lib/scout/work_queue.rb +41 -24
  56. data/lib/scout/workflow/definition.rb +15 -12
  57. data/lib/scout/workflow/deployment/orchestrator.rb +21 -3
  58. data/lib/scout/workflow/deployment/trace.rb +205 -0
  59. data/lib/scout/workflow/deployment.rb +1 -0
  60. data/lib/scout/workflow/documentation.rb +1 -1
  61. data/lib/scout/workflow/step/archive.rb +42 -0
  62. data/lib/scout/workflow/step/children.rb +51 -0
  63. data/lib/scout/workflow/step/config.rb +1 -1
  64. data/lib/scout/workflow/step/dependencies.rb +25 -8
  65. data/lib/scout/workflow/step/file.rb +19 -0
  66. data/lib/scout/workflow/step/info.rb +37 -9
  67. data/lib/scout/workflow/step/progress.rb +11 -2
  68. data/lib/scout/workflow/step/status.rb +9 -1
  69. data/lib/scout/workflow/step.rb +80 -25
  70. data/lib/scout/workflow/task/dependencies.rb +5 -2
  71. data/lib/scout/workflow/task/inputs.rb +91 -41
  72. data/lib/scout/workflow/task.rb +54 -57
  73. data/lib/scout/workflow/usage.rb +1 -1
  74. data/lib/scout/workflow/util.rb +4 -0
  75. data/lib/scout/workflow.rb +110 -13
  76. data/lib/scout-gear.rb +2 -0
  77. data/lib/scout.rb +0 -1
  78. data/scout-gear.gemspec +78 -23
  79. data/scout_commands/rbbt +2 -0
  80. data/test/data/person/brothers +4 -0
  81. data/test/data/person/identifiers +10 -0
  82. data/test/data/person/marriages +3 -0
  83. data/test/data/person/parents +6 -0
  84. data/test/scout/association/test_fields.rb +105 -0
  85. data/test/scout/association/test_index.rb +70 -0
  86. data/test/scout/association/test_item.rb +21 -0
  87. data/test/scout/entity/test_format.rb +19 -0
  88. data/test/scout/entity/test_identifiers.rb +58 -0
  89. data/test/scout/entity/test_object.rb +0 -0
  90. data/test/scout/entity/test_property.rb +345 -0
  91. data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
  92. data/test/scout/persist/engine/test_packed_index.rb +99 -0
  93. data/test/scout/persist/engine/test_sharder.rb +31 -0
  94. data/test/scout/persist/engine/test_tkrzw.rb +0 -0
  95. data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
  96. data/test/scout/persist/test_tsv.rb +146 -0
  97. data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
  98. data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
  99. data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
  100. data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
  101. data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
  102. data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
  103. data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
  104. data/test/scout/persist/tsv/test_serialize.rb +12 -0
  105. data/test/scout/test_association.rb +51 -0
  106. data/test/scout/test_entity.rb +40 -0
  107. data/test/scout/test_tsv.rb +33 -4
  108. data/test/scout/test_work_queue.rb +5 -2
  109. data/test/scout/test_workflow.rb +31 -14
  110. data/test/scout/tsv/annotation/test_repo.rb +150 -0
  111. data/test/scout/tsv/change_id/test_translate.rb +178 -0
  112. data/test/scout/tsv/test_annotation.rb +52 -0
  113. data/test/scout/tsv/test_attach.rb +255 -1
  114. data/test/scout/tsv/test_change_id.rb +25 -0
  115. data/test/scout/tsv/test_csv.rb +50 -0
  116. data/test/scout/tsv/test_dumper.rb +38 -0
  117. data/test/scout/tsv/test_index.rb +82 -0
  118. data/test/scout/tsv/test_open.rb +44 -0
  119. data/test/scout/tsv/test_parser.rb +70 -0
  120. data/test/scout/tsv/test_stream.rb +22 -0
  121. data/test/scout/tsv/test_transformer.rb +27 -3
  122. data/test/scout/tsv/test_traverse.rb +78 -0
  123. data/test/scout/tsv/util/test_process.rb +16 -0
  124. data/test/scout/tsv/util/test_reorder.rb +67 -0
  125. data/test/scout/tsv/util/test_sort.rb +28 -1
  126. data/test/scout/tsv/util/test_unzip.rb +32 -0
  127. data/test/scout/work_queue/test_socket.rb +4 -1
  128. data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
  129. data/test/scout/workflow/deployment/test_trace.rb +25 -0
  130. data/test/scout/workflow/step/test_archive.rb +28 -0
  131. data/test/scout/workflow/step/test_children.rb +25 -0
  132. data/test/scout/workflow/step/test_info.rb +16 -0
  133. data/test/scout/workflow/task/test_dependencies.rb +16 -16
  134. data/test/scout/workflow/task/test_inputs.rb +45 -1
  135. data/test/scout/workflow/test_definition.rb +52 -0
  136. data/test/scout/workflow/test_step.rb +57 -0
  137. data/test/scout/workflow/test_task.rb +26 -1
  138. data/test/scout/workflow/test_usage.rb +4 -4
  139. data/test/test_helper.rb +23 -1
  140. metadata +69 -14
  141. data/lib/scout/tsv/persist.rb +0 -27
  142. data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
  143. data/test/scout/tsv/test_persist.rb +0 -45
@@ -51,7 +51,7 @@ module TSV
51
51
  [key, items]
52
52
  end
53
53
 
54
- def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kargs, &block)
54
+ def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, head: nil, **kwargs, &block)
55
55
  begin
56
56
  bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
57
57
  bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
@@ -59,6 +59,32 @@ module TSV
59
59
 
60
60
  source_type = type if source_type.nil?
61
61
 
62
+ type_swap_key = [source_type.to_s, type.to_s] * "_"
63
+
64
+ same_type = source_type.to_s == type.to_s
65
+
66
+ if data && data.respond_to?(:load_stream) &&
67
+ data.serializer.to_s.include?("String") &&
68
+ same_type &&
69
+ ! (head || kwargs[:cast] || kwargs[:positions] || (kwargs[:key] && kwargs[:key] != 0) || Proc === fix ) &&
70
+ (kwargs[:sep].nil? || kwargs[:sep] == "\t")
71
+
72
+
73
+ Log.debug "Loading #{Log.fingerprint stream} directly into #{Log.fingerprint data}"
74
+ if first_line
75
+ full_stream = Open.open_pipe do |sin|
76
+ sin.puts first_line
77
+ Open.consume_stream(stream, false, sin)
78
+ end
79
+ data.load_stream(full_stream)
80
+ else
81
+ data.load_stream(stream)
82
+ end
83
+
84
+ return data
85
+ end
86
+
87
+
62
88
  data = {} if data.nil?
63
89
  merge = false if type != :double && type != :flat
64
90
  line = first_line || stream.gets
@@ -77,7 +103,7 @@ module TSV
77
103
  next
78
104
  end
79
105
 
80
- key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
106
+ key, items = parse_line(line, type: source_type, field_names: field_names, **kwargs)
81
107
 
82
108
  next if key.nil?
83
109
 
@@ -101,38 +127,38 @@ module TSV
101
127
  end
102
128
 
103
129
  these_items =
104
- case [source_type, type]
105
- when [:single, :single]
130
+ case type_swap_key
131
+ when "single_single"
106
132
  these_items
107
- when [:list, :single]
133
+ when "list_single"
108
134
  these_items.first
109
- when [:flat, :single]
135
+ when "flat_single"
110
136
  these_items.first
111
- when [:double, :single]
137
+ when "double_single"
112
138
  these_items.first.first
113
- when [:single, :list]
139
+ when "single_list"
114
140
  [these_items]
115
- when [:list, :list]
141
+ when "list_list"
116
142
  these_items
117
- when [:flat, :list]
143
+ when "flat_list"
118
144
  these_items
119
- when [:double, :list]
145
+ when "double_list"
120
146
  these_items.collect{|l| l.first }
121
- when [:single, :flat]
147
+ when "single_flat"
122
148
  [these_items]
123
- when [:list, :flat]
149
+ when "list_flat"
124
150
  these_items
125
- when [:flat, :flat]
151
+ when "flat_flat"
126
152
  these_items
127
- when [:double, :flat]
153
+ when "double_flat"
128
154
  these_items.flatten
129
- when [:single, :double]
155
+ when "single_double"
130
156
  [[these_items]]
131
- when [:list, :double]
132
- these_items.collect{|l| [l] }
133
- when [:flat, :double]
157
+ when "list_double"
158
+ these_items.collect{|l| l.nil? ? [] : [l] }
159
+ when "flat_double"
134
160
  [these_items]
135
- when [:double, :double]
161
+ when "double_double"
136
162
  these_items
137
163
  end
138
164
 
@@ -143,6 +169,7 @@ module TSV
143
169
  end
144
170
 
145
171
  if ! merge || ! data.include?(key)
172
+ these_items = these_items.collect{|i| i.empty? ? [nil] : i } if type == :double && one2one
146
173
  data[key] = these_items
147
174
  elsif type == :double
148
175
  current = data[key]
@@ -169,6 +196,7 @@ module TSV
169
196
  end
170
197
  end
171
198
  rescue Exception
199
+ raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
172
200
  stream.abort($!) if stream.respond_to?(:abort)
173
201
  raise $!
174
202
  ensure
@@ -182,7 +210,7 @@ module TSV
182
210
  end
183
211
  data
184
212
  ensure
185
- if stream.stream_exception
213
+ if stream.respond_to?(:stream_exception) && stream.stream_exception
186
214
  bar.remove(stream.stream_exception)
187
215
  else
188
216
  bar.remove
@@ -200,62 +228,74 @@ module TSV
200
228
  end
201
229
 
202
230
  def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
231
+ sep = "\t" if sep.nil?
203
232
  if (Path === stream) || ((String === stream) && Path.is_filename?(stream))
204
233
  Open.open(stream) do |f|
205
234
  return parse_header(f, fix: fix, header_hash: header_hash, sep: sep)
206
235
  end
207
236
  end
208
- raise "Closed stream" if IO === stream && stream.closed?
237
+
238
+ if IO === stream && stream.closed?
239
+ stream.join if stream.respond_to?(:join)
240
+ raise "Closed stream"
241
+ end
209
242
 
210
243
  opts = {}
211
244
  preamble = []
212
245
 
213
246
  # Get line
214
247
 
215
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
216
- line = stream.gets
217
- return {} if line.nil?
218
- line = Misc.fixutf8 line.chomp if fix
219
-
220
- # Process options line
221
- if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
222
- opts = IndiferentHash.string2hash m.captures.first.chomp
248
+ begin
249
+ #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
223
250
  line = stream.gets
224
- if line && fix
225
- if Proc === fix
226
- line = fix.call line
227
- else
228
- line = Misc.fixutf8 line.chomp if line && fix
251
+ return {} if line.nil?
252
+ line = Misc.fixutf8 line.chomp if fix
253
+
254
+ # Process options line
255
+ if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
256
+ opts = IndiferentHash.string2hash m.captures.first.chomp
257
+ line = stream.gets
258
+ if line && fix
259
+ if Proc === fix
260
+ line = fix.call line
261
+ else
262
+ line = Misc.fixutf8 line.chomp if line && fix
263
+ end
229
264
  end
230
265
  end
231
- end
232
-
233
- # Determine separator
234
- sep = opts[:sep] if opts[:sep]
235
266
 
236
- # Process fields line
237
- preamble << line if line
238
- while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
239
- fields = line.split(sep, -1)
240
- key_field = fields.shift
241
- key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
267
+ # Determine separator
268
+ sep = opts[:sep] if opts[:sep]
242
269
 
243
- line = (header_hash != "" ? stream.gets : nil)
244
- line = Misc.fixutf8 line.chomp if line
270
+ # Process fields line
245
271
  preamble << line if line
246
- break if TrueClass === header_hash || header_hash == ""
247
- end
272
+ while line && (TrueClass === header_hash || (String === header_hash && line.start_with?(header_hash)))
273
+ fields = line.split(sep, -1)
274
+ key_field = fields.shift
275
+ key_field = key_field.sub(header_hash, '') if String === header_hash && ! header_hash.empty?
276
+
277
+ line = (header_hash != "" ? stream.gets : nil)
278
+ line = Misc.fixutf8 line.chomp if line
279
+ preamble << line if line
280
+ break if TrueClass === header_hash || header_hash == ""
281
+ end
248
282
 
249
- preamble = preamble[0..-3] * "\n"
283
+ preamble = preamble[0..-3] * "\n"
250
284
 
251
- line ||= stream.gets
285
+ line ||= stream.gets
252
286
 
253
- first_line = line
287
+ first_line = line
254
288
 
255
- opts[:type] = opts[:type].to_sym if opts[:type]
256
- opts[:cast] = opts[:cast].to_sym if opts[:cast]
289
+ opts[:type] = opts[:type].to_sym if opts[:type]
290
+ opts[:cast] = opts[:cast].to_sym if opts[:cast]
257
291
 
258
- NamedArray.setup([opts, key_field, fields, first_line, preamble], %w(options key_field fields first_line preamble))
292
+ all_fields = [key_field] + fields if key_field && fields
293
+ NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields], %w(options key_field fields first_line preamble all_fields))
294
+ rescue Exception
295
+ raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
296
+ stream.abort($!) if stream.respond_to?(:abort)
297
+ raise $!
298
+ end
259
299
  end
260
300
 
261
301
  KEY_PARAMETERS = begin
@@ -267,7 +307,7 @@ module TSV
267
307
  end
268
308
 
269
309
  class Parser
270
- attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
310
+ attr_accessor :stream, :source_options, :key_field, :fields, :type, :first_line, :preamble
271
311
  def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
272
312
  if IO === file
273
313
  @stream = file
@@ -275,11 +315,15 @@ module TSV
275
315
  @stream = Open.open(file)
276
316
  end
277
317
  @fix = fix
278
- @options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
279
- @options[:filename] = file if Path.is_filename?(file)
280
- @options[:sep] = sep if @options[:sep].nil?
281
- @options.merge!(:key_field => @key_field, :fields => @fields)
282
- @type = @options[:type] || type
318
+ @source_options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
319
+ @source_options[:filename] = file if Path.is_filename?(file)
320
+ @source_options[:sep] = sep if @source_options[:sep].nil?
321
+ @source_options.merge!(:key_field => @key_field, :fields => @fields)
322
+ @type = @source_options[:type] || type
323
+ end
324
+
325
+ def options
326
+ IndiferentHash.add_defaults @source_options.dup, type: type, key_field: key_field, fields: fields
283
327
  end
284
328
 
285
329
  def all_fields
@@ -288,11 +332,11 @@ module TSV
288
332
  end
289
333
 
290
334
  def key_field=(key_field)
291
- @options[:key_field] = @key_field = key_field
335
+ @source_options[:key_field] = @key_field = key_field
292
336
  end
293
337
 
294
338
  def fields=(fields)
295
- @options[:fields] = @fields = fields
339
+ @source_options[:fields] = @fields = fields
296
340
  end
297
341
 
298
342
  def identify_field(name)
@@ -300,7 +344,7 @@ module TSV
300
344
  end
301
345
 
302
346
  def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
303
- kwargs[:type] ||= self.options[:type] ||= @type
347
+ kwargs[:type] ||= self.source_options[:type] ||= @type
304
348
  kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
305
349
 
306
350
  if fields
@@ -308,6 +352,7 @@ module TSV
308
352
  all_field_names ||= [@key_field] + @fields
309
353
  fields = all_field_names if fields == :all
310
354
  positions = NamedArray.identify_name(all_field_names, fields)
355
+ raise "Not all fields (#{Log.fingerprint fields}) identified in #{Log.fingerprint all_field_names}" if positions.include?(nil)
311
356
  kwargs[:positions] = positions
312
357
  field_names = all_field_names.values_at *positions
313
358
  elsif fields.reject{|f| Numeric === f}.empty?
@@ -327,7 +372,7 @@ module TSV
327
372
  all_field_names ||= [@key_field] + @fields
328
373
  key = NamedArray.identify_name(all_field_names, key_field)
329
374
  kwargs[:key] = key == :key ? 0 : key
330
- key_field_name = key === :key ? @key_field : all_field_names[key]
375
+ key_field_name = (key.nil? || key == :key) ? @key_field : all_field_names[key]
331
376
  if fields.nil?
332
377
  field_names = all_field_names - [key_field_name]
333
378
  end
@@ -343,21 +388,28 @@ module TSV
343
388
  field_names = field_names.slice(0,1)
344
389
  end
345
390
 
346
- @options.each do |option,value|
391
+ @source_options.each do |option,value|
347
392
  option = option.to_sym
348
393
  next unless KEY_PARAMETERS.include? option
349
394
  kwargs[option] = value unless kwargs.include?(option)
350
395
  end
351
396
 
352
- kwargs[:source_type] = @options[:type]
397
+ kwargs[:source_type] = @source_options[:type]
353
398
  kwargs[:data] = false if kwargs[:data].nil?
354
399
 
355
- data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
400
+ if kwargs[:tsv_grep]
401
+ data = with_stream do |stream|
402
+ grep_stream = Open.grep(stream, kwargs.delete(:tsv_grep), kwargs.delete(:tsv_invert_grep))
403
+ TSV.parse_stream(grep_stream, first_line: nil, fix: @fix, field_names: @fields, **kwargs, &block)
404
+ end
405
+ else
406
+ data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
407
+ end
356
408
 
357
409
  if data
358
410
  TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
359
411
  else
360
- [self.key_field, self.fields]
412
+ [key_field || self.key_field, fields || self.fields]
361
413
  end
362
414
  end
363
415
 
@@ -372,6 +424,14 @@ module TSV
372
424
  def inspect
373
425
  fingerprint
374
426
  end
427
+
428
+ def with_stream
429
+ sout = Open.open_pipe do |sin|
430
+ sin.puts @first_line
431
+ Open.consume_stream(@stream, false, sin)
432
+ end
433
+ yield sout
434
+ end
375
435
  end
376
436
 
377
437
  def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
@@ -379,40 +439,43 @@ module TSV
379
439
 
380
440
  cast = kwargs[:cast]
381
441
  cast = parser.options[:cast] if cast.nil?
442
+ identifiers = kwargs.delete(:identifiers)
382
443
  type = kwargs[:type] ||= parser.options[:type] ||= :double
444
+
383
445
  if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
384
446
  TSV.setup(data, type: type)
385
447
  data.extend TSVAdapter
386
- if serializer
387
- data.serializer = serializer
388
- elsif cast
389
- data.serializer =
390
- case [cast, type]
391
- when [:to_i, :single]
392
- :integer
393
- when [:to_i, :list], [:to_i, :flat]
394
- :integer_array
395
- when [:to_f, :single]
396
- :float
397
- when [:to_f, :list], [:to_f, :flat]
398
- :float_array
399
- else
400
- type
401
- end
402
- else
403
- data.serializer = type
404
- end
448
+ serializer ||= if cast
449
+ case [cast, type]
450
+ when [:to_i, :single]
451
+ :integer
452
+ when [:to_i, :list], [:to_i, :flat]
453
+ :integer_array
454
+ when [:to_f, :single]
455
+ :float
456
+ when [:to_f, :list], [:to_f, :flat]
457
+ :float_array
458
+ when [:to_f, :double], [:to_i, :double]
459
+ :marshal
460
+ else
461
+ type
462
+ end
463
+ else
464
+ type
465
+ end
466
+ data.serializer = TSVAdapter::SERIALIZER_ALIAS[serializer] || serializer
405
467
  end
406
468
 
407
469
  kwargs[:data] = {} if kwargs[:data].nil?
408
470
 
409
471
  data = parser.traverse **kwargs, &block
410
472
  data.type = type
473
+ data.cast = cast
411
474
  data.filename = filename || parser.options[:filename]
412
475
  data.namespace = namespace || parser.options[:namespace]
413
- data.identifiers = parser.options[:identifiers]
476
+ data.identifiers = identifiers
414
477
  data.unnamed = unnamed
415
- data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
478
+ data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
416
479
  data
417
480
  end
418
481
  end
@@ -4,9 +4,14 @@ module Path
4
4
  TSV.open(found, *args, **kwargs, &block)
5
5
  end
6
6
 
7
+ def tsv_options(options = {})
8
+ self.open do |stream|
9
+ TSV::Parser.new(stream, **options).options
10
+ end
11
+ end
12
+
7
13
  def index(*args, **kwargs, &block)
8
- found = self.find
9
- found = self.set_extension('tsv').find unless found.exists?
14
+ found = produce_and_find('tsv')
10
15
  TSV.index(found, *args, **kwargs, &block)
11
16
  end
12
17
  end
@@ -9,6 +9,8 @@ module TSV
9
9
  stream.open
10
10
  when TSV::Dumper
11
11
  stream.stream
12
+ when TSV
13
+ stream.dumper_stream
12
14
  else
13
15
  stream
14
16
  end
@@ -35,7 +37,7 @@ module TSV
35
37
 
36
38
  streams = streams.collect do |stream|
37
39
 
38
- parser = TSV::Parser.new stream, type: type
40
+ parser = TSV::Parser.new stream, type: type, sep: sep
39
41
 
40
42
  sfields = parser.fields
41
43
 
@@ -105,12 +107,13 @@ module TSV
105
107
  keys[i]= key
106
108
  parts[i]= p
107
109
  end
108
- sizes[i]||= parts[i].length-1 unless parts[i].nil?
110
+ sizes[i] ||= parts[i].length unless parts[i].nil?
109
111
  end
110
112
  done_streams =[]
111
113
 
114
+ fields = nil if fields && fields.empty?
112
115
  dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
113
- dumper.init
116
+ dumper.init(preamble: preamble_txt || !!key_field)
114
117
 
115
118
  t = Thread.new do
116
119
  Thread.report_on_exception = false
@@ -141,14 +144,19 @@ module TSV
141
144
  parts[i]= nil
142
145
  else
143
146
  k, *p = line.chomp.split(sep, -1)
144
- raise TryAgain if k == keys[i]
147
+ p = p.collect{|e| e.nil? ? "" : e }
148
+
149
+ if k == keys[i]
150
+ new_values = NamedArray.zip_fields(new_values).zip(p).collect{|p| [p.flatten * "|"] }
151
+ raise TryAgain
152
+ end
145
153
  keys[i]= k
146
- parts[i]= p.collect{|e| e.nil? ? "" : e}
154
+ parts[i]= p
147
155
  end
148
156
  rescue TryAgain
149
157
  keys[i]= nil
150
158
  parts[i]= nil
151
- Log.debug "Skipping repeated key in stream #{i}: #{keys[i]}"
159
+ Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
152
160
  retry
153
161
  end
154
162
  else
@@ -201,4 +209,38 @@ module TSV
201
209
  ConcurrentStream.setup(dumper.stream, threads: [t])
202
210
  end
203
211
 
212
+ def self.concat_streams(streams)
213
+
214
+ streams = streams.collect do |stream|
215
+ case stream
216
+ when(defined? Step and Step)
217
+ stream.stream
218
+ when Path
219
+ stream.open
220
+ when TSV::Dumper
221
+ stream.stream
222
+ when TSV
223
+ stream.dumper_stream
224
+ else
225
+ stream
226
+ end
227
+ end.compact
228
+
229
+ done_streams = []
230
+ Open.open_pipe do |sin|
231
+ first_stream = streams.first
232
+ while line = first_stream.gets
233
+ sin.write line
234
+ break unless line[0] == "#"
235
+ end
236
+
237
+ while streams.any?
238
+ streams.each do |stream|
239
+ line = stream.gets
240
+ sin.write line unless line[0] == "#"
241
+ end
242
+ streams.delete_if{|stream| stream.eof? }
243
+ end
244
+ end
245
+ end
204
246
  end
@@ -1,8 +1,8 @@
1
1
  module TSV
2
2
  class Transformer
3
- attr_accessor :unnamed, :parser, :dumper
3
+ attr_accessor :unnamed, :parser, :dumper, :namespace
4
4
 
5
- def initialize(parser, dumper = nil, unnamed: nil)
5
+ def initialize(parser, dumper = nil, unnamed: nil, namespace: nil)
6
6
  if TSV::Parser === parser
7
7
  @parser = parser
8
8
  elsif TSV === parser
@@ -68,6 +68,7 @@ module TSV
68
68
  def traverse(*args, **kwargs, &block)
69
69
  kwargs[:into] = @dumper
70
70
  kwargs[:bar] = "Transform #{Log.fingerprint @parser} into #{Log.fingerprint @target}" if TrueClass === kwargs[:bar]
71
+ @dumper.namespace ||= @namespace
71
72
  @dumper.init if @dumper.respond_to?(:init) && ! @dumper.initialized
72
73
  Log.debug "Transform #{Log.fingerprint @parser} into #{Log.fingerprint @dumper}"
73
74
  Open.traverse(@parser, *args, **kwargs) do |k,v|
@@ -79,6 +80,7 @@ module TSV
79
80
  def each(*args, **kwargs, &block)
80
81
  kwargs[:into] = @dumper
81
82
  kwargs[:bar] = "Transform #{Log.fingerprint @parser} into #{Log.fingerprint @target}" if TrueClass === kwargs[:bar]
83
+ @dumper.namespace ||= @namespace
82
84
  @dumper.init if @dumper.respond_to?(:init) && ! @dumper.initialized
83
85
  Open.traverse(@parser, *args, **kwargs) do |k,v|
84
86
  NamedArray.setup(v, @parser.fields, k) unless @unnamed
@@ -107,7 +109,7 @@ module TSV
107
109
  end
108
110
 
109
111
  def tsv(*args)
110
- TSV === @dumper ? @dumper : TSV.open(stream, *args)
112
+ TSV === @dumper ? @dumper : TSV.open(@dumper, *args)
111
113
  end
112
114
  end
113
115
 
@@ -1,14 +1,14 @@
1
1
  require_relative 'parser'
2
2
  module TSV
3
- def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: false, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, &block)
3
+ def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: false, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
4
4
  key_field = key_field_pos if key_field.nil?
5
5
  fields = fields_pos.dup if fields.nil?
6
6
  type = @type if type.nil?
7
7
  key_pos = self.identify_field(key_field)
8
8
  fields = self.all_fields if fields == :all
9
9
  fields = [fields] unless fields.nil? || Array === fields
10
- positions = fields.nil? || fields == :all ? nil : self.identify_field(fields)
11
-
10
+ positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
11
+ positions = nil if fields == self.fields
12
12
 
13
13
  if key_pos == :key
14
14
  key_name = @key_field
@@ -21,8 +21,10 @@ module TSV
21
21
  end
22
22
  end
23
23
 
24
+ fields = positions.collect{|p| p == :key ? self.key_field : self.fields[p] } if positions
25
+
24
26
  if positions.nil? && key_pos == :key
25
- field_names = @fields
27
+ field_names = @fields.dup
26
28
  elsif positions.nil? && key_pos != :key
27
29
  field_names = @fields.dup
28
30
  field_names.delete_at key_pos unless fields == :all
@@ -39,6 +41,7 @@ module TSV
39
41
  Log.debug log_message
40
42
  bar = log_message if TrueClass === bar
41
43
 
44
+ type_swap_tag = [type.to_s, @type.to_s] * "_"
42
45
  Log::ProgressBar.with_obj_bar(self, bar) do |bar|
43
46
  with_unnamed unnamed do
44
47
  each do |key,values|
@@ -47,13 +50,18 @@ module TSV
47
50
  if positions.nil?
48
51
  if key_pos != :key
49
52
  values = values.dup
50
- key = values.delete_at(key_pos)
53
+ if @type == :flat
54
+ key = values
55
+ else
56
+ key = values.delete_at(key_pos)
57
+ end
51
58
  end
52
59
  else
53
60
  orig_key = key
54
- key = values[key_pos] if key_pos != :key
61
+ key = @type == :flat ? values : values[key_pos] if key_pos != :key
55
62
 
56
63
  values = values.values_at(*positions)
64
+ NamedArray.setup(values, fields)
57
65
  if key_index
58
66
  if @type == :double
59
67
  values.insert key_index, [orig_key]
@@ -66,6 +74,7 @@ module TSV
66
74
  values = TSV.cast_value(values, cast) if cast
67
75
 
68
76
  if Array === key
77
+ key = key.uniq if uniq
69
78
  if @type == :double && one2one
70
79
  if one2one == :strict
71
80
  key.each_with_index do |key_i,i|
@@ -107,30 +116,30 @@ module TSV
107
116
  yield key, values
108
117
  end
109
118
  else
110
- case [type, @type]
111
- when [:double, :list]
119
+ case type_swap_tag
120
+ when "double_list"
112
121
  yield key, values.collect{|v| [v] }
113
- when [:double, :flat]
122
+ when "double_flat"
114
123
  yield key, [values]
115
- when [:double, :single]
124
+ when "double_single"
116
125
  yield key, [values]
117
- when [:list, :double]
126
+ when "list_double"
118
127
  yield key, values.collect{|v| v.first }
119
- when [:list, :flat]
128
+ when "list_flat"
120
129
  yield key, [values.first]
121
- when [:list, :single]
130
+ when "list_single"
122
131
  yield key, values
123
- when [:flat, :double]
132
+ when "flat_double"
124
133
  yield key, values.flatten
125
- when [:flat, :list]
134
+ when "flat_list"
126
135
  yield key, values.flatten
127
- when [:flat, :single]
136
+ when "flat_single"
128
137
  yield key, values
129
- when [:single, :double]
138
+ when "single_double"
130
139
  yield key, values.flatten.first
131
- when [:single, :list]
140
+ when "single_list"
132
141
  yield key, values.first
133
- when [:single, :flat]
142
+ when "single_flat"
134
143
  yield key, values.first
135
144
  end
136
145
  end