scout-gear 7.3.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +20 -9
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +26 -23
- data/lib/scout/config.rb +1 -1
- data/lib/scout/log/color.rb +4 -1
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +58 -54
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +11 -0
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +90 -15
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +28 -12
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +72 -46
- data/lib/scout/tsv/index.rb +69 -13
- data/lib/scout/tsv/open.rb +138 -84
- data/lib/scout/tsv/parser.rb +135 -80
- data/lib/scout/tsv/path.rb +1 -2
- data/lib/scout/tsv/persist/adapter.rb +15 -45
- data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +4 -1
- data/lib/scout/tsv/persist.rb +4 -0
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +96 -92
- data/lib/scout/tsv/util/filter.rb +9 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +78 -33
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +60 -11
- data/lib/scout/tsv.rb +26 -3
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +21 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +36 -5
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +100 -34
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +81 -46
- data/lib/scout/workflow/usage.rb +8 -6
- data/scout-gear.gemspec +24 -20
- data/scout_commands/workflow/task +34 -7
- data/test/scout/open/test_stream.rb +60 -58
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +6 -0
- data/test/scout/test_tsv.rb +212 -2
- data/test/scout/test_work_queue.rb +21 -19
- data/test/scout/tsv/persist/test_adapter.rb +1 -1
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +35 -3
- data/test/scout/tsv/test_open.rb +160 -2
- data/test/scout/tsv/test_parser.rb +19 -2
- data/test/scout/tsv/test_persist.rb +2 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +88 -3
- data/test/scout/tsv/test_util.rb +1 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +25 -11
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +3 -3
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- metadata +20 -6
data/lib/scout/tsv/open.rb
CHANGED
@@ -1,61 +1,56 @@
|
|
1
1
|
require_relative '../open'
|
2
|
+
require_relative '../work_queue'
|
3
|
+
|
4
|
+
module MultipleResult
|
5
|
+
def self.setup(obj)
|
6
|
+
obj.extend MultipleResult
|
7
|
+
obj
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
2
11
|
module Open
|
3
12
|
def self.traverse_add(into, res)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
into
|
13
|
+
if Array === res && MultipleResult === res
|
14
|
+
res.each do |_res|
|
15
|
+
traverse_add into, _res
|
16
|
+
end
|
17
|
+
else
|
18
|
+
case into
|
19
|
+
when defined?(TSV::Dumper) && TSV::Dumper
|
20
|
+
into.add *res
|
21
|
+
when TSV, Hash
|
22
|
+
key, value = res
|
23
|
+
if into.type == :double
|
24
|
+
into.zip_new key, value, insitu: false
|
25
|
+
else
|
26
|
+
into[key] = value
|
27
|
+
end
|
28
|
+
when Array, Set
|
29
|
+
into << res
|
30
|
+
when IO, StringIO
|
31
|
+
into.puts res
|
32
|
+
end
|
10
33
|
end
|
11
34
|
end
|
12
35
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
|
24
|
-
# when IO
|
25
|
-
# if into && (IO === into || into.respond_to?(:stream) )
|
26
|
-
# into_thread = Thread.new do
|
27
|
-
# Thread.current.report_on_exception = false
|
28
|
-
# Thread.current["name"] = "Traverse into"
|
29
|
-
# TSV.parse obj, **options do |k,v|
|
30
|
-
# begin
|
31
|
-
# res = block.call k, v
|
32
|
-
# traverse_add into, res
|
33
|
-
# rescue
|
34
|
-
# into.abort $!
|
35
|
-
# end
|
36
|
-
# nil
|
37
|
-
# end
|
38
|
-
# into.close if into.respond_to?(:close)
|
39
|
-
# end
|
40
|
-
# Thread.pass until into_thread
|
41
|
-
# into
|
42
|
-
# else
|
43
|
-
# TSV.parse obj, **options do |k,v|
|
44
|
-
# block.call k, v
|
45
|
-
# nil
|
46
|
-
# end
|
47
|
-
# end
|
48
|
-
# end
|
49
|
-
#end
|
50
|
-
|
51
|
-
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
|
36
|
+
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
+
cpus = nil if cpus == 1
|
38
|
+
|
39
|
+
if into == :stream
|
40
|
+
sout, sin = Open.pipe
|
41
|
+
ConcurrentStream.setup(sout, :pair => sin)
|
42
|
+
ConcurrentStream.setup(sin, :pair => sout)
|
43
|
+
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
|
+
return sout
|
45
|
+
end
|
52
46
|
|
53
47
|
if into || bar
|
54
48
|
orig_callback = callback if callback
|
55
|
-
bar = Log::ProgressBar.get_obj_bar(
|
49
|
+
bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
|
50
|
+
bar.init if bar
|
56
51
|
callback = proc do |res|
|
57
52
|
bar.tick if bar
|
58
|
-
traverse_add into, res if into
|
53
|
+
traverse_add into, res if into && ! res.nil?
|
59
54
|
orig_callback.call res if orig_callback
|
60
55
|
end
|
61
56
|
|
@@ -65,59 +60,94 @@ module Open
|
|
65
60
|
Thread.current["name"] = "Traverse into"
|
66
61
|
error = false
|
67
62
|
begin
|
68
|
-
self.traverse(obj, callback: callback, **options, &block)
|
69
|
-
into.close if into.respond_to?(:close)
|
63
|
+
self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
|
64
|
+
into.close if ! keep_open && into.respond_to?(:close)
|
70
65
|
bar.remove if bar
|
71
66
|
rescue Exception
|
72
67
|
into.abort($!) if into.respond_to?(:abort)
|
73
68
|
bar.remove($!) if bar
|
74
69
|
end
|
75
70
|
end
|
76
|
-
Thread.pass until into_thread
|
71
|
+
Thread.pass until into_thread["name"]
|
77
72
|
return into
|
78
73
|
end
|
79
74
|
end
|
80
75
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
callback.call res if callback
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
else
|
108
|
-
TSV.parse obj, **options do |k,v|
|
109
|
-
res = block.call k, v
|
110
|
-
callback.call res if callback
|
111
|
-
nil
|
112
|
-
end
|
76
|
+
if cpus
|
77
|
+
queue = WorkQueue.new cpus do |args|
|
78
|
+
block.call *args
|
79
|
+
end
|
80
|
+
|
81
|
+
queue.process do |res|
|
82
|
+
callback.call res
|
83
|
+
end
|
84
|
+
|
85
|
+
self.traverse(obj, **options) do |*args|
|
86
|
+
queue.write args
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
queue.close
|
91
|
+
|
92
|
+
queue.join
|
93
|
+
|
94
|
+
bar.remove if bar
|
95
|
+
return into
|
96
|
+
rescue Exception
|
97
|
+
bar.remove($!) if bar
|
98
|
+
raise $!
|
113
99
|
end
|
100
|
+
end
|
101
|
+
|
102
|
+
begin
|
103
|
+
res = case obj
|
104
|
+
when TSV
|
105
|
+
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
|
+
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
|
+
res = block.call(k, v, f)
|
108
|
+
callback.call res if callback
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
when Array
|
112
|
+
obj.each do |line|
|
113
|
+
res = block.call(line)
|
114
|
+
callback.call res if callback
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
when String
|
118
|
+
obj = obj.produce_and_find if Path === obj
|
119
|
+
f = Open.open(obj)
|
120
|
+
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
121
|
+
when Step
|
122
|
+
raise obj.exception if obj.error?
|
123
|
+
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
|
+
when IO
|
125
|
+
parser = TSV::Parser.new obj
|
126
|
+
parser.traverse **options do |k,v,f|
|
127
|
+
res = block.call k,v,f
|
128
|
+
callback.call res if callback
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
when TSV::Parser
|
132
|
+
obj.traverse **options do |k,v,f|
|
133
|
+
res = block.call k, v, f
|
134
|
+
callback.call res if callback
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
else
|
138
|
+
TSV.parse obj, **options do |k,v|
|
139
|
+
res = block.call k, v
|
140
|
+
callback.call res if callback
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
114
144
|
bar.remove if bar
|
115
145
|
rescue
|
116
|
-
bar.
|
146
|
+
bar.error if bar
|
117
147
|
raise $!
|
118
148
|
end
|
119
149
|
|
120
|
-
into
|
150
|
+
into || res
|
121
151
|
end
|
122
152
|
end
|
123
153
|
|
@@ -125,4 +155,28 @@ module TSV
|
|
125
155
|
def self.traverse(*args, **kwargs, &block)
|
126
156
|
Open.traverse(*args, **kwargs, &block)
|
127
157
|
end
|
158
|
+
|
159
|
+
def self.process_stream(stream, header_hash: "#", &block)
|
160
|
+
sout = Open.open_pipe do |sin|
|
161
|
+
while line = stream.gets
|
162
|
+
break unless line.start_with?(header_hash)
|
163
|
+
sin.puts line
|
164
|
+
end
|
165
|
+
yield sin, line
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def self.collapse_stream(stream, *args, **kwargs, &block)
|
170
|
+
stream = stream.stream if stream.respond_to?(:stream)
|
171
|
+
self.process_stream(stream) do |sin, line|
|
172
|
+
collapsed = Open.collapse_stream(stream, line: line)
|
173
|
+
Open.consume_stream(collapsed, false, sin)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def collapse_stream(*args, **kwargs, &block)
|
178
|
+
TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
|
128
182
|
end
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -12,17 +12,24 @@ module TSV
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
|
15
|
+
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
|
16
16
|
items = line.split(sep, -1)
|
17
17
|
|
18
|
+
return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)
|
19
|
+
|
18
20
|
if positions.nil? && key == 0
|
19
21
|
key = items.shift
|
20
|
-
elsif positions.nil?
|
21
|
-
|
22
|
+
elsif positions.nil?
|
23
|
+
if type == :flat
|
24
|
+
key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
|
25
|
+
items = items.slice(0,1)
|
26
|
+
else
|
27
|
+
key = items.delete_at(key)
|
28
|
+
end
|
22
29
|
key = key.split(sep2) if type == :double
|
23
30
|
else
|
24
31
|
key, items = items[key], items.values_at(*positions)
|
25
|
-
key = key.split(sep2) if type == :double
|
32
|
+
key = key.split(sep2) if type == :double || type == :flat
|
26
33
|
end
|
27
34
|
|
28
35
|
items = case type
|
@@ -31,9 +38,9 @@ module TSV
|
|
31
38
|
when :single
|
32
39
|
items.first
|
33
40
|
when :flat
|
34
|
-
|
41
|
+
items.collect{|i| i.split(sep2, -1) }.flatten
|
35
42
|
when :double
|
36
|
-
items.collect{|i| i.split(sep2, -1) }
|
43
|
+
items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
|
37
44
|
end
|
38
45
|
|
39
46
|
|
@@ -44,21 +51,34 @@ module TSV
|
|
44
51
|
[key, items]
|
45
52
|
end
|
46
53
|
|
47
|
-
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
|
54
|
+
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, **kargs, &block)
|
48
55
|
begin
|
49
|
-
bar = Log
|
56
|
+
bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
|
57
|
+
bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
|
58
|
+
bar.init if bar
|
50
59
|
|
51
60
|
source_type = type if source_type.nil?
|
52
61
|
|
53
62
|
data = {} if data.nil?
|
54
|
-
merge = false if type != :double
|
63
|
+
merge = false if type != :double && type != :flat
|
55
64
|
line = first_line || stream.gets
|
56
65
|
while line
|
57
66
|
begin
|
58
|
-
line.
|
59
|
-
|
67
|
+
line.chomp!
|
68
|
+
if Proc === fix
|
69
|
+
line = fix.call line
|
70
|
+
elsif fix
|
71
|
+
line = Misc.fixutf8(line)
|
72
|
+
end
|
60
73
|
bar.tick if bar
|
61
|
-
|
74
|
+
if type == :array || type == :line
|
75
|
+
block.call line
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
|
80
|
+
|
81
|
+
next if key.nil?
|
62
82
|
|
63
83
|
if Array === key
|
64
84
|
keys = key
|
@@ -116,44 +136,63 @@ module TSV
|
|
116
136
|
end
|
117
137
|
|
118
138
|
if block_given?
|
119
|
-
res = block.call(key, these_items)
|
139
|
+
res = block.call(key, these_items, field_names)
|
120
140
|
data[key] = res unless res.nil? || FalseClass === data
|
121
141
|
next
|
122
142
|
end
|
123
143
|
|
124
144
|
if ! merge || ! data.include?(key)
|
125
145
|
data[key] = these_items
|
126
|
-
|
146
|
+
elsif type == :double
|
127
147
|
current = data[key]
|
128
148
|
if merge == :concat
|
129
149
|
these_items.each_with_index do |new,i|
|
130
|
-
|
150
|
+
new = [nil] if new.empty?
|
131
151
|
current[i].concat(new)
|
132
152
|
end
|
133
153
|
else
|
134
154
|
merged = []
|
135
155
|
these_items.each_with_index do |new,i|
|
136
|
-
|
156
|
+
new = [nil] if new.empty?
|
137
157
|
merged[i] = current[i] + new
|
138
158
|
end
|
139
159
|
data[key] = merged
|
140
160
|
end
|
161
|
+
elsif type == :flat
|
162
|
+
current = data[key]
|
163
|
+
if merge == :concat
|
164
|
+
current[i].concat these_items
|
165
|
+
else
|
166
|
+
data[key] = current + these_items
|
167
|
+
end
|
141
168
|
end
|
142
169
|
end
|
170
|
+
rescue Exception
|
171
|
+
stream.abort($!) if stream.respond_to?(:abort)
|
172
|
+
raise $!
|
143
173
|
ensure
|
144
|
-
|
174
|
+
if stream.closed?
|
175
|
+
line = nil
|
176
|
+
else
|
177
|
+
line = stream.gets
|
178
|
+
end
|
145
179
|
end
|
146
180
|
end
|
147
181
|
data
|
148
182
|
ensure
|
149
|
-
|
183
|
+
if stream.stream_exception
|
184
|
+
bar.remove(stream.stream_exception)
|
185
|
+
else
|
186
|
+
bar.remove
|
187
|
+
end if bar
|
188
|
+
stream.join if stream.respond_to?(:join)
|
150
189
|
end
|
151
190
|
end
|
152
191
|
|
153
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
154
193
|
raise "Closed stream" if IO === stream && stream.closed?
|
155
194
|
|
156
|
-
|
195
|
+
opts = {}
|
157
196
|
preamble = []
|
158
197
|
|
159
198
|
# Get line
|
@@ -165,13 +204,19 @@ module TSV
|
|
165
204
|
|
166
205
|
# Process options line
|
167
206
|
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
168
|
-
|
207
|
+
opts = IndiferentHash.string2hash m.captures.first.chomp
|
169
208
|
line = stream.gets
|
170
|
-
|
209
|
+
if line && fix
|
210
|
+
if Proc === fix
|
211
|
+
line = fix.call line
|
212
|
+
else
|
213
|
+
line = Misc.fixutf8 line.chomp if line && fix
|
214
|
+
end
|
215
|
+
end
|
171
216
|
end
|
172
217
|
|
173
218
|
# Determine separator
|
174
|
-
sep =
|
219
|
+
sep = opts[:sep] if opts[:sep]
|
175
220
|
|
176
221
|
# Process fields line
|
177
222
|
preamble << line if line
|
@@ -192,7 +237,10 @@ module TSV
|
|
192
237
|
|
193
238
|
first_line = line
|
194
239
|
|
195
|
-
[
|
240
|
+
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
|
+
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
|
+
|
243
|
+
[opts, key_field, fields, first_line, preamble]
|
196
244
|
end
|
197
245
|
|
198
246
|
KEY_PARAMETERS = begin
|
@@ -204,43 +252,79 @@ module TSV
|
|
204
252
|
end
|
205
253
|
|
206
254
|
class Parser
|
207
|
-
attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
|
208
|
-
def initialize(file, fix: true, header_hash: "#", sep: "\t")
|
255
|
+
attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
|
256
|
+
def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
|
209
257
|
if IO === file
|
210
258
|
@stream = file
|
211
259
|
else
|
212
260
|
@stream = Open.open(file)
|
213
261
|
end
|
262
|
+
@fix = fix
|
214
263
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
215
264
|
@options[:sep] = sep if @options[:sep].nil?
|
265
|
+
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
+
@type = type
|
216
267
|
end
|
217
268
|
|
218
269
|
def all_fields
|
270
|
+
return nil if @fields.nil?
|
219
271
|
[@key_field] + @fields
|
220
272
|
end
|
221
273
|
|
274
|
+
def key_field=(key_field)
|
275
|
+
@options[:key_field] = @key_field = key_field
|
276
|
+
end
|
277
|
+
|
278
|
+
def fields=(fields)
|
279
|
+
@options[:fields] = @fields = fields
|
280
|
+
end
|
281
|
+
|
282
|
+
def identify_field(name)
|
283
|
+
TSV.identify_field(@key_field, @fields, name)
|
284
|
+
end
|
285
|
+
|
222
286
|
def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
287
|
+
kwargs[:type] ||= self.options[:type] ||= @type
|
288
|
+
kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
|
289
|
+
|
223
290
|
if fields
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
291
|
+
if @fields
|
292
|
+
all_field_names ||= [@key_field] + @fields
|
293
|
+
fields = all_field_names if fields == :all
|
294
|
+
positions = NamedArray.identify_name(all_field_names, fields)
|
295
|
+
kwargs[:positions] = positions
|
296
|
+
field_names = all_field_names.values_at *positions
|
297
|
+
elsif fields.reject{|f| Numeric === f}.empty?
|
298
|
+
positions = fields
|
299
|
+
kwargs[:positions] = positions
|
300
|
+
else
|
301
|
+
raise "Non-numeric fields specified, but no field names available"
|
302
|
+
end
|
228
303
|
else
|
229
304
|
field_names = @fields
|
230
305
|
end
|
231
306
|
|
232
307
|
if key_field
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
308
|
+
if @fields
|
309
|
+
all_field_names ||= [@key_field] + @fields
|
310
|
+
key = NamedArray.identify_name(all_field_names, key_field)
|
311
|
+
kwargs[:key] = key == :key ? 0 : key
|
312
|
+
key_field_name = key === :key ? @key_field : all_field_names[key]
|
313
|
+
if fields.nil?
|
314
|
+
field_names = all_field_names - [key_field_name]
|
315
|
+
end
|
316
|
+
else
|
317
|
+
kwargs[:key] = key_field == :key ? 0 : key_field
|
318
|
+
key = key_field
|
239
319
|
end
|
240
320
|
else
|
241
321
|
key_field_name = @key_field
|
242
322
|
end
|
243
323
|
|
324
|
+
if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
|
325
|
+
field_names = field_names.slice(0,1)
|
326
|
+
end
|
327
|
+
|
244
328
|
@options.each do |option,value|
|
245
329
|
option = option.to_sym
|
246
330
|
next unless KEY_PARAMETERS.include? option
|
@@ -250,21 +334,33 @@ module TSV
|
|
250
334
|
kwargs[:source_type] = @options[:type]
|
251
335
|
kwargs[:data] = false if kwargs[:data].nil?
|
252
336
|
|
253
|
-
data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
|
337
|
+
data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
|
254
338
|
|
255
339
|
if data
|
256
340
|
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
257
341
|
else
|
258
|
-
self
|
342
|
+
[self.key_field, self.fields]
|
259
343
|
end
|
260
344
|
end
|
261
345
|
|
346
|
+
def fingerprint
|
347
|
+
"Parser:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
348
|
+
end
|
349
|
+
|
350
|
+
def digest_str
|
351
|
+
fingerprint
|
352
|
+
end
|
353
|
+
|
354
|
+
def inspect
|
355
|
+
fingerprint
|
356
|
+
end
|
262
357
|
end
|
263
358
|
|
264
359
|
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
|
265
360
|
parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
|
266
361
|
|
267
|
-
cast =
|
362
|
+
cast = kwargs[:cast]
|
363
|
+
cast = parser.options[:cast] if cast.nil?
|
268
364
|
type = kwargs[:type] ||= parser.options[:type] ||= :double
|
269
365
|
if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
270
366
|
TSV.setup(data, type: type)
|
@@ -297,48 +393,7 @@ module TSV
|
|
297
393
|
data.filename = filename
|
298
394
|
data.namespace = namespace
|
299
395
|
data.unnamed = unnamed
|
396
|
+
data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
|
300
397
|
data
|
301
398
|
end
|
302
|
-
|
303
|
-
#def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
304
|
-
# options, key_field_name, field_names, first_line, preamble = parse_header(stream)
|
305
|
-
|
306
|
-
# if fields
|
307
|
-
# all_field_names ||= [key_field_name] + field_names
|
308
|
-
# positions = NamedArray.identify_name(all_field_names, fields)
|
309
|
-
# kwargs[:positions] = positions
|
310
|
-
# field_names = all_field_names.values_at *positions
|
311
|
-
# end
|
312
|
-
|
313
|
-
# if key_field
|
314
|
-
# all_field_names ||= [key_field_name] + field_names
|
315
|
-
# key = NamedArray.identify_name(all_field_names, key_field)
|
316
|
-
# kwargs[:key] = key
|
317
|
-
# key_field_name = all_field_names[key]
|
318
|
-
# if fields.nil?
|
319
|
-
# field_names = all_field_names - [key_field_name]
|
320
|
-
# end
|
321
|
-
# end
|
322
|
-
|
323
|
-
# options.each do |option,value|
|
324
|
-
# option = option.to_sym
|
325
|
-
# next unless KEY_PARAMETERS.include? option
|
326
|
-
# kwargs[option] = value unless kwargs.include?(option)
|
327
|
-
# end
|
328
|
-
|
329
|
-
# kwargs[:source_type] = options[:type]
|
330
|
-
|
331
|
-
# type = kwargs[:type] ||= :double
|
332
|
-
# if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
333
|
-
# TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
|
334
|
-
# data.extend TSVAdapter
|
335
|
-
# end
|
336
|
-
|
337
|
-
# data = parse_stream(stream, first_line: first_line, **kwargs, &block)
|
338
|
-
|
339
|
-
# TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
|
340
|
-
|
341
|
-
# data
|
342
|
-
#end
|
343
|
-
|
344
399
|
end
|