scout-gear 7.2.0 → 8.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. checksums.yaml +4 -4
  2. data/.vimproject +51 -6
  3. data/VERSION +1 -1
  4. data/bin/scout +6 -3
  5. data/lib/rbbt-scout.rb +1 -0
  6. data/lib/scout/cmd.rb +1 -1
  7. data/lib/scout/concurrent_stream.rb +33 -29
  8. data/lib/scout/config.rb +1 -1
  9. data/lib/scout/exceptions.rb +1 -0
  10. data/lib/scout/log/color.rb +4 -2
  11. data/lib/scout/log/progress/report.rb +1 -1
  12. data/lib/scout/log/progress/util.rb +71 -2
  13. data/lib/scout/log/progress.rb +1 -1
  14. data/lib/scout/log/trap.rb +107 -0
  15. data/lib/scout/log.rb +56 -21
  16. data/lib/scout/meta_extension.rb +13 -6
  17. data/lib/scout/misc/digest.rb +1 -1
  18. data/lib/scout/misc/format.rb +12 -0
  19. data/lib/scout/misc/helper.rb +31 -0
  20. data/lib/scout/misc/insist.rb +1 -1
  21. data/lib/scout/misc/monitor.rb +12 -1
  22. data/lib/scout/misc/system.rb +10 -0
  23. data/lib/scout/misc.rb +1 -0
  24. data/lib/scout/named_array.rb +65 -3
  25. data/lib/scout/open/lock/lockfile.rb +587 -0
  26. data/lib/scout/open/lock.rb +28 -2
  27. data/lib/scout/open/remote.rb +4 -0
  28. data/lib/scout/open/stream.rb +111 -42
  29. data/lib/scout/open/util.rb +13 -3
  30. data/lib/scout/path/find.rb +9 -1
  31. data/lib/scout/path/util.rb +35 -0
  32. data/lib/scout/persist/serialize.rb +18 -5
  33. data/lib/scout/persist.rb +60 -30
  34. data/lib/scout/resource/path.rb +53 -0
  35. data/lib/scout/resource/produce.rb +0 -8
  36. data/lib/scout/resource/util.rb +2 -1
  37. data/lib/scout/semaphore.rb +8 -1
  38. data/lib/scout/tmpfile.rb +7 -8
  39. data/lib/scout/tsv/attach.rb +177 -0
  40. data/lib/scout/tsv/change_id.rb +40 -0
  41. data/lib/scout/tsv/dumper.rb +85 -54
  42. data/lib/scout/tsv/index.rb +188 -20
  43. data/lib/scout/tsv/open.rb +182 -0
  44. data/lib/scout/tsv/parser.rb +200 -118
  45. data/lib/scout/tsv/path.rb +5 -6
  46. data/lib/scout/tsv/persist/adapter.rb +26 -37
  47. data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
  48. data/lib/scout/tsv/persist/serialize.rb +117 -0
  49. data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
  50. data/lib/scout/tsv/persist.rb +4 -2
  51. data/lib/scout/tsv/transformer.rb +141 -0
  52. data/lib/scout/tsv/traverse.rb +136 -37
  53. data/lib/scout/tsv/util/filter.rb +312 -0
  54. data/lib/scout/tsv/util/process.rb +73 -0
  55. data/lib/scout/tsv/util/reorder.rb +81 -0
  56. data/lib/scout/tsv/util/select.rb +265 -0
  57. data/lib/scout/tsv/util/unzip.rb +86 -0
  58. data/lib/scout/tsv/util.rb +126 -19
  59. data/lib/scout/tsv.rb +28 -5
  60. data/lib/scout/work_queue/socket.rb +6 -1
  61. data/lib/scout/work_queue/worker.rb +5 -2
  62. data/lib/scout/work_queue.rb +15 -8
  63. data/lib/scout/workflow/definition.rb +29 -2
  64. data/lib/scout/workflow/step/dependencies.rb +24 -4
  65. data/lib/scout/workflow/step/info.rb +40 -5
  66. data/lib/scout/workflow/step/progress.rb +14 -0
  67. data/lib/scout/workflow/step/provenance.rb +8 -7
  68. data/lib/scout/workflow/step/status.rb +45 -0
  69. data/lib/scout/workflow/step.rb +104 -33
  70. data/lib/scout/workflow/task/inputs.rb +14 -20
  71. data/lib/scout/workflow/task.rb +86 -47
  72. data/lib/scout/workflow/usage.rb +10 -6
  73. data/scout-gear.gemspec +30 -3
  74. data/scout_commands/workflow/task +37 -9
  75. data/scout_commands/workflow/task_old +2 -2
  76. data/test/scout/open/test_stream.rb +61 -59
  77. data/test/scout/path/test_find.rb +10 -1
  78. data/test/scout/resource/test_produce.rb +15 -0
  79. data/test/scout/test_meta_extension.rb +25 -0
  80. data/test/scout/test_named_array.rb +18 -0
  81. data/test/scout/test_persist.rb +67 -0
  82. data/test/scout/test_tmpfile.rb +1 -1
  83. data/test/scout/test_tsv.rb +222 -3
  84. data/test/scout/test_work_queue.rb +21 -18
  85. data/test/scout/tsv/persist/test_adapter.rb +11 -1
  86. data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
  87. data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
  88. data/test/scout/tsv/test_attach.rb +227 -0
  89. data/test/scout/tsv/test_change_id.rb +98 -0
  90. data/test/scout/tsv/test_dumper.rb +1 -1
  91. data/test/scout/tsv/test_index.rb +127 -3
  92. data/test/scout/tsv/test_open.rb +167 -0
  93. data/test/scout/tsv/test_parser.rb +45 -3
  94. data/test/scout/tsv/test_persist.rb +9 -0
  95. data/test/scout/tsv/test_transformer.rb +108 -0
  96. data/test/scout/tsv/test_traverse.rb +195 -3
  97. data/test/scout/tsv/test_util.rb +24 -0
  98. data/test/scout/tsv/util/test_filter.rb +188 -0
  99. data/test/scout/tsv/util/test_process.rb +47 -0
  100. data/test/scout/tsv/util/test_reorder.rb +94 -0
  101. data/test/scout/tsv/util/test_select.rb +58 -0
  102. data/test/scout/tsv/util/test_unzip.rb +112 -0
  103. data/test/scout/work_queue/test_socket.rb +0 -1
  104. data/test/scout/work_queue/test_worker.rb +63 -6
  105. data/test/scout/workflow/step/test_load.rb +3 -3
  106. data/test/scout/workflow/step/test_status.rb +31 -0
  107. data/test/scout/workflow/task/test_inputs.rb +14 -14
  108. data/test/scout/workflow/test_step.rb +13 -13
  109. data/test/scout/workflow/test_task.rb +168 -32
  110. data/test/scout/workflow/test_usage.rb +33 -6
  111. data/test/test_helper.rb +3 -1
  112. metadata +29 -2
@@ -0,0 +1,182 @@
1
+ require_relative '../open'
2
+ require_relative '../work_queue'
3
+
4
+ module MultipleResult
5
+ def self.setup(obj)
6
+ obj.extend MultipleResult
7
+ obj
8
+ end
9
+ end
10
+
11
+ module Open
12
+ def self.traverse_add(into, res)
13
+ if Array === res && MultipleResult === res
14
+ res.each do |_res|
15
+ traverse_add into, _res
16
+ end
17
+ else
18
+ case into
19
+ when defined?(TSV::Dumper) && TSV::Dumper
20
+ into.add *res
21
+ when TSV, Hash
22
+ key, value = res
23
+ if into.type == :double
24
+ into.zip_new key, value, insitu: false
25
+ else
26
+ into[key] = value
27
+ end
28
+ when Array, Set
29
+ into << res
30
+ when IO, StringIO
31
+ into.puts res
32
+ end
33
+ end
34
+ end
35
+
36
+ def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
37
+ cpus = nil if cpus == 1
38
+
39
+ if into == :stream
40
+ sout, sin = Open.pipe
41
+ ConcurrentStream.setup(sout, :pair => sin)
42
+ ConcurrentStream.setup(sin, :pair => sout)
43
+ self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
44
+ return sout
45
+ end
46
+
47
+ if into || bar
48
+ orig_callback = callback if callback
49
+ bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
50
+ bar.init if bar
51
+ callback = proc do |res|
52
+ bar.tick if bar
53
+ traverse_add into, res if into && ! res.nil?
54
+ orig_callback.call res if orig_callback
55
+ end
56
+
57
+ if into.respond_to?(:close)
58
+ into_thread = Thread.new do
59
+ Thread.current.report_on_exception = false
60
+ Thread.current["name"] = "Traverse into"
61
+ error = false
62
+ begin
63
+ self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
64
+ into.close if ! keep_open && into.respond_to?(:close)
65
+ bar.remove if bar
66
+ rescue Exception
67
+ into.abort($!) if into.respond_to?(:abort)
68
+ bar.remove($!) if bar
69
+ end
70
+ end
71
+ Thread.pass until into_thread["name"]
72
+ return into
73
+ end
74
+ end
75
+
76
+ if cpus
77
+ queue = WorkQueue.new cpus do |args|
78
+ block.call *args
79
+ end
80
+
81
+ queue.process do |res|
82
+ callback.call res
83
+ end
84
+
85
+ self.traverse(obj, **options) do |*args|
86
+ queue.write args
87
+ end
88
+
89
+ begin
90
+ queue.close
91
+
92
+ queue.join
93
+
94
+ bar.remove if bar
95
+ return into
96
+ rescue Exception
97
+ bar.remove($!) if bar
98
+ raise $!
99
+ end
100
+ end
101
+
102
+ begin
103
+ res = case obj
104
+ when TSV
105
+ #obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
106
+ obj.traverse unnamed: unnamed, **options do |k,v,f|
107
+ res = block.call(k, v, f)
108
+ callback.call res if callback
109
+ nil
110
+ end
111
+ when Array
112
+ obj.each do |line|
113
+ res = block.call(line)
114
+ callback.call res if callback
115
+ nil
116
+ end
117
+ when String
118
+ obj = obj.produce_and_find if Path === obj
119
+ f = Open.open(obj)
120
+ self.traverse(f, cpus: cpus, callback: callback, **options, &block)
121
+ when Step
122
+ raise obj.exception if obj.error?
123
+ self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
124
+ when IO
125
+ parser = TSV::Parser.new obj
126
+ parser.traverse **options do |k,v,f|
127
+ res = block.call k,v,f
128
+ callback.call res if callback
129
+ nil
130
+ end
131
+ when TSV::Parser
132
+ obj.traverse **options do |k,v,f|
133
+ res = block.call k, v, f
134
+ callback.call res if callback
135
+ nil
136
+ end
137
+ else
138
+ TSV.parse obj, **options do |k,v|
139
+ res = block.call k, v
140
+ callback.call res if callback
141
+ nil
142
+ end
143
+ end
144
+ bar.remove if bar
145
+ rescue
146
+ bar.error if bar
147
+ raise $!
148
+ end
149
+
150
+ into || res
151
+ end
152
+ end
153
+
154
+ module TSV
155
+ def self.traverse(*args, **kwargs, &block)
156
+ Open.traverse(*args, **kwargs, &block)
157
+ end
158
+
159
+ def self.process_stream(stream, header_hash: "#", &block)
160
+ sout = Open.open_pipe do |sin|
161
+ while line = stream.gets
162
+ break unless line.start_with?(header_hash)
163
+ sin.puts line
164
+ end
165
+ yield sin, line
166
+ end
167
+ end
168
+
169
+ def self.collapse_stream(stream, *args, **kwargs, &block)
170
+ stream = stream.stream if stream.respond_to?(:stream)
171
+ self.process_stream(stream) do |sin, line|
172
+ collapsed = Open.collapse_stream(stream, line: line)
173
+ Open.consume_stream(collapsed, false, sin)
174
+ end
175
+ end
176
+
177
+ def collapse_stream(*args, **kwargs, &block)
178
+ TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
179
+ end
180
+
181
+
182
+ end
@@ -4,21 +4,32 @@ module TSV
4
4
  if Array === value
5
5
  value.collect{|e| cast_value(e, cast) }
6
6
  else
7
- value.send(cast)
7
+ if Proc === cast
8
+ cast.call value
9
+ else
10
+ value.send(cast)
11
+ end
8
12
  end
9
13
  end
10
14
 
11
- def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
15
+ def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
12
16
  items = line.split(sep, -1)
13
17
 
18
+ return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)
19
+
14
20
  if positions.nil? && key == 0
15
21
  key = items.shift
16
- elsif positions.nil?
17
- key = items.delete_at(key)
22
+ elsif positions.nil?
23
+ if type == :flat
24
+ key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
25
+ items = items.slice(0,1)
26
+ else
27
+ key = items.delete_at(key)
28
+ end
18
29
  key = key.split(sep2) if type == :double
19
30
  else
20
31
  key, items = items[key], items.values_at(*positions)
21
- key = key.split(sep2) if type == :double
32
+ key = key.split(sep2) if type == :double || type == :flat
22
33
  end
23
34
 
24
35
  items = case type
@@ -27,9 +38,9 @@ module TSV
27
38
  when :single
28
39
  items.first
29
40
  when :flat
30
- [items]
41
+ items.collect{|i| i.split(sep2, -1) }.flatten
31
42
  when :double
32
- items.collect{|i| i.split(sep2, -1) }
43
+ items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
33
44
  end
34
45
 
35
46
 
@@ -40,21 +51,34 @@ module TSV
40
51
  [key, items]
41
52
  end
42
53
 
43
- def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
54
+ def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, **kargs, &block)
44
55
  begin
45
- bar = Log::ProgressBar.new_bar(bar) if bar
56
+ bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
57
+ bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
58
+ bar.init if bar
46
59
 
47
60
  source_type = type if source_type.nil?
48
61
 
49
62
  data = {} if data.nil?
50
- merge = false if type != :double
63
+ merge = false if type != :double && type != :flat
51
64
  line = first_line || stream.gets
52
65
  while line
53
66
  begin
54
- line.strip!
55
- line = Misc.fixutf8(line) if fix
67
+ line.chomp!
68
+ if Proc === fix
69
+ line = fix.call line
70
+ elsif fix
71
+ line = Misc.fixutf8(line)
72
+ end
56
73
  bar.tick if bar
57
- key, items = parse_line(line, type: source_type, **kargs)
74
+ if type == :array || type == :line
75
+ block.call line
76
+ next
77
+ end
78
+
79
+ key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
80
+
81
+ next if key.nil?
58
82
 
59
83
  if Array === key
60
84
  keys = key
@@ -75,80 +99,100 @@ module TSV
75
99
  these_items = items
76
100
  end
77
101
 
78
- these_items = case [source_type, type]
79
- when [:single, :single]
80
- these_items
81
- when [:list, :single]
82
- these_items.first
83
- when [:flat, :single]
84
- these_items.first
85
- when [:double, :single]
86
- these_items.first.first
87
- when [:single, :list]
88
- [these_items]
89
- when [:list, :list]
90
- these_items
91
- when [:flat, :list]
92
- these_items
93
- when [:double, :list]
94
- these_items.collect{|l| l.first }
95
- when [:single, :flat]
96
- [these_items]
97
- when [:list, :flat]
98
- these_items
99
- when [:flat, :flat]
100
- these_items
101
- when [:double, :flat]
102
- these_items.flatten
103
- when [:single, :double]
104
- [[these_items]]
105
- when [:list, :double]
106
- these_items.collect{|l| [l] }
107
- when [:flat, :double]
108
- [these_items]
109
- when [:double, :double]
110
- these_items
111
- end
102
+ these_items =
103
+ case [source_type, type]
104
+ when [:single, :single]
105
+ these_items
106
+ when [:list, :single]
107
+ these_items.first
108
+ when [:flat, :single]
109
+ these_items.first
110
+ when [:double, :single]
111
+ these_items.first.first
112
+ when [:single, :list]
113
+ [these_items]
114
+ when [:list, :list]
115
+ these_items
116
+ when [:flat, :list]
117
+ these_items
118
+ when [:double, :list]
119
+ these_items.collect{|l| l.first }
120
+ when [:single, :flat]
121
+ [these_items]
122
+ when [:list, :flat]
123
+ these_items
124
+ when [:flat, :flat]
125
+ these_items
126
+ when [:double, :flat]
127
+ these_items.flatten
128
+ when [:single, :double]
129
+ [[these_items]]
130
+ when [:list, :double]
131
+ these_items.collect{|l| [l] }
132
+ when [:flat, :double]
133
+ [these_items]
134
+ when [:double, :double]
135
+ these_items
136
+ end
112
137
 
113
138
  if block_given?
114
- res = block.call(key, these_items)
139
+ res = block.call(key, these_items, field_names)
115
140
  data[key] = res unless res.nil? || FalseClass === data
116
141
  next
117
142
  end
118
143
 
119
144
  if ! merge || ! data.include?(key)
120
145
  data[key] = these_items
121
- else
146
+ elsif type == :double
122
147
  current = data[key]
123
148
  if merge == :concat
124
149
  these_items.each_with_index do |new,i|
125
- next if new.empty?
150
+ new = [nil] if new.empty?
126
151
  current[i].concat(new)
127
152
  end
128
153
  else
129
154
  merged = []
130
155
  these_items.each_with_index do |new,i|
131
- next if new.empty?
156
+ new = [nil] if new.empty?
132
157
  merged[i] = current[i] + new
133
158
  end
134
159
  data[key] = merged
135
160
  end
161
+ elsif type == :flat
162
+ current = data[key]
163
+ if merge == :concat
164
+ current[i].concat these_items
165
+ else
166
+ data[key] = current + these_items
167
+ end
136
168
  end
137
169
  end
170
+ rescue Exception
171
+ stream.abort($!) if stream.respond_to?(:abort)
172
+ raise $!
138
173
  ensure
139
- line = stream.gets
174
+ if stream.closed?
175
+ line = nil
176
+ else
177
+ line = stream.gets
178
+ end
140
179
  end
141
180
  end
142
181
  data
143
182
  ensure
144
- Log::ProgressBar.remove_bar(bar) if bar
183
+ if stream.stream_exception
184
+ bar.remove(stream.stream_exception)
185
+ else
186
+ bar.remove
187
+ end if bar
188
+ stream.join if stream.respond_to?(:join)
145
189
  end
146
190
  end
147
191
 
148
192
  def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
149
193
  raise "Closed stream" if IO === stream && stream.closed?
150
194
 
151
- options = {}
195
+ opts = {}
152
196
  preamble = []
153
197
 
154
198
  # Get line
@@ -160,13 +204,19 @@ module TSV
160
204
 
161
205
  # Process options line
162
206
  if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
163
- options = IndiferentHash.string2hash m.captures.first.chomp
207
+ opts = IndiferentHash.string2hash m.captures.first.chomp
164
208
  line = stream.gets
165
- line = Misc.fixutf8 line.chomp if line && fix
209
+ if line && fix
210
+ if Proc === fix
211
+ line = fix.call line
212
+ else
213
+ line = Misc.fixutf8 line.chomp if line && fix
214
+ end
215
+ end
166
216
  end
167
217
 
168
218
  # Determine separator
169
- sep = options[:sep] if options[:sep]
219
+ sep = opts[:sep] if opts[:sep]
170
220
 
171
221
  # Process fields line
172
222
  preamble << line if line
@@ -187,7 +237,10 @@ module TSV
187
237
 
188
238
  first_line = line
189
239
 
190
- [options, key_field, fields, first_line, preamble]
240
+ opts[:type] = opts[:type].to_sym if opts[:type]
241
+ opts[:cast] = opts[:cast].to_sym if opts[:cast]
242
+
243
+ [opts, key_field, fields, first_line, preamble]
191
244
  end
192
245
 
193
246
  KEY_PARAMETERS = begin
@@ -199,43 +252,79 @@ module TSV
199
252
  end
200
253
 
201
254
  class Parser
202
- attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
203
- def initialize(file, fix: true, header_hash: "#", sep: "\t")
255
+ attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
256
+ def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
204
257
  if IO === file
205
258
  @stream = file
206
259
  else
207
260
  @stream = Open.open(file)
208
261
  end
262
+ @fix = fix
209
263
  @options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
210
264
  @options[:sep] = sep if @options[:sep].nil?
265
+ @options.merge!(:key_field => @key_field, :fields => @fields)
266
+ @type = type
211
267
  end
212
268
 
213
269
  def all_fields
270
+ return nil if @fields.nil?
214
271
  [@key_field] + @fields
215
272
  end
216
273
 
274
+ def key_field=(key_field)
275
+ @options[:key_field] = @key_field = key_field
276
+ end
277
+
278
+ def fields=(fields)
279
+ @options[:fields] = @fields = fields
280
+ end
281
+
282
+ def identify_field(name)
283
+ TSV.identify_field(@key_field, @fields, name)
284
+ end
285
+
217
286
  def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
287
+ kwargs[:type] ||= self.options[:type] ||= @type
288
+ kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
289
+
218
290
  if fields
219
- all_field_names ||= [@key_field] + @fields
220
- positions = NamedArray.identify_name(all_field_names, fields)
221
- kwargs[:positions] = positions
222
- field_names = all_field_names.values_at *positions
291
+ if @fields
292
+ all_field_names ||= [@key_field] + @fields
293
+ fields = all_field_names if fields == :all
294
+ positions = NamedArray.identify_name(all_field_names, fields)
295
+ kwargs[:positions] = positions
296
+ field_names = all_field_names.values_at *positions
297
+ elsif fields.reject{|f| Numeric === f}.empty?
298
+ positions = fields
299
+ kwargs[:positions] = positions
300
+ else
301
+ raise "Non-numeric fields specified, but no field names available"
302
+ end
223
303
  else
224
304
  field_names = @fields
225
305
  end
226
306
 
227
307
  if key_field
228
- all_field_names ||= [@key_field] + @fields
229
- key = NamedArray.identify_name(all_field_names, key_field)
230
- kwargs[:key] = key
231
- key_field_name = all_field_names[key]
232
- if fields.nil?
233
- field_names = all_field_names - [@key_field]
308
+ if @fields
309
+ all_field_names ||= [@key_field] + @fields
310
+ key = NamedArray.identify_name(all_field_names, key_field)
311
+ kwargs[:key] = key == :key ? 0 : key
312
+ key_field_name = key === :key ? @key_field : all_field_names[key]
313
+ if fields.nil?
314
+ field_names = all_field_names - [key_field_name]
315
+ end
316
+ else
317
+ kwargs[:key] = key_field == :key ? 0 : key_field
318
+ key = key_field
234
319
  end
235
320
  else
236
321
  key_field_name = @key_field
237
322
  end
238
323
 
324
+ if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
325
+ field_names = field_names.slice(0,1)
326
+ end
327
+
239
328
  @options.each do |option,value|
240
329
  option = option.to_sym
241
330
  next unless KEY_PARAMETERS.include? option
@@ -245,23 +334,56 @@ module TSV
245
334
  kwargs[:source_type] = @options[:type]
246
335
  kwargs[:data] = false if kwargs[:data].nil?
247
336
 
248
- data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
337
+ data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
249
338
 
250
- TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type) if data
339
+ if data
340
+ TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
341
+ else
342
+ [self.key_field, self.fields]
343
+ end
344
+ end
345
+
346
+ def fingerprint
347
+ "Parser:{"<< Log.fingerprint(self.all_fields|| []) << "}"
348
+ end
251
349
 
252
- data || self
350
+ def digest_str
351
+ fingerprint
253
352
  end
254
353
 
354
+ def inspect
355
+ fingerprint
356
+ end
255
357
  end
256
358
 
257
- def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, **kwargs, &block)
359
+ def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
258
360
  parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
259
- kwargs = parser.options.merge(kwargs)
260
361
 
261
- type = kwargs[:type] ||= :double
362
+ cast = kwargs[:cast]
363
+ cast = parser.options[:cast] if cast.nil?
364
+ type = kwargs[:type] ||= parser.options[:type] ||= :double
262
365
  if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
263
366
  TSV.setup(data, type: type)
264
367
  data.extend TSVAdapter
368
+ if serializer
369
+ data.serializer = serializer
370
+ elsif cast
371
+ data.serializer =
372
+ case [cast, type]
373
+ when [:to_i, :single]
374
+ :integer
375
+ when [:to_i, :list], [:to_i, :flat]
376
+ :integer_array
377
+ when [:to_f, :single]
378
+ :float
379
+ when [:to_f, :list], [:to_f, :flat]
380
+ :float_array
381
+ else
382
+ type
383
+ end
384
+ else
385
+ data.serializer = type
386
+ end
265
387
  end
266
388
 
267
389
  kwargs[:data] = {} if kwargs[:data].nil?
@@ -270,48 +392,8 @@ module TSV
270
392
  data.type = type
271
393
  data.filename = filename
272
394
  data.namespace = namespace
395
+ data.unnamed = unnamed
396
+ data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
273
397
  data
274
398
  end
275
-
276
- #def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
277
- # options, key_field_name, field_names, first_line, preamble = parse_header(stream)
278
-
279
- # if fields
280
- # all_field_names ||= [key_field_name] + field_names
281
- # positions = NamedArray.identify_name(all_field_names, fields)
282
- # kwargs[:positions] = positions
283
- # field_names = all_field_names.values_at *positions
284
- # end
285
-
286
- # if key_field
287
- # all_field_names ||= [key_field_name] + field_names
288
- # key = NamedArray.identify_name(all_field_names, key_field)
289
- # kwargs[:key] = key
290
- # key_field_name = all_field_names[key]
291
- # if fields.nil?
292
- # field_names = all_field_names - [key_field_name]
293
- # end
294
- # end
295
-
296
- # options.each do |option,value|
297
- # option = option.to_sym
298
- # next unless KEY_PARAMETERS.include? option
299
- # kwargs[option] = value unless kwargs.include?(option)
300
- # end
301
-
302
- # kwargs[:source_type] = options[:type]
303
-
304
- # type = kwargs[:type] ||= :double
305
- # if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
306
- # TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
307
- # data.extend TSVAdapter
308
- # end
309
-
310
- # data = parse_stream(stream, first_line: first_line, **kwargs, &block)
311
-
312
- # TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
313
-
314
- # data
315
- #end
316
-
317
399
  end
@@ -1,13 +1,12 @@
1
1
  module Path
2
- def tsv(...)
3
- found = self.find
4
- found = self.set_extension('tsv').find unless found.exists?
5
- TSV.open(found, ...)
2
+ def tsv(*args, **kwargs, &block)
3
+ found = produce_and_find('tsv')
4
+ TSV.open(found, *args, **kwargs, &block)
6
5
  end
7
6
 
8
- def index(...)
7
+ def index(*args, **kwargs, &block)
9
8
  found = self.find
10
9
  found = self.set_extension('tsv').find unless found.exists?
11
- TSV.index(found, ...)
10
+ TSV.index(found, *args, **kwargs, &block)
12
11
  end
13
12
  end