scout-gear 7.3.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +20 -9
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +26 -23
- data/lib/scout/config.rb +1 -1
- data/lib/scout/log/color.rb +4 -1
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +58 -54
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +11 -0
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +90 -15
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +28 -12
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +72 -46
- data/lib/scout/tsv/index.rb +69 -13
- data/lib/scout/tsv/open.rb +138 -84
- data/lib/scout/tsv/parser.rb +135 -80
- data/lib/scout/tsv/path.rb +1 -2
- data/lib/scout/tsv/persist/adapter.rb +15 -45
- data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +4 -1
- data/lib/scout/tsv/persist.rb +4 -0
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +96 -92
- data/lib/scout/tsv/util/filter.rb +9 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +78 -33
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +60 -11
- data/lib/scout/tsv.rb +26 -3
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +21 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +36 -5
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +100 -34
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +81 -46
- data/lib/scout/workflow/usage.rb +8 -6
- data/scout-gear.gemspec +24 -20
- data/scout_commands/workflow/task +34 -7
- data/test/scout/open/test_stream.rb +60 -58
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +6 -0
- data/test/scout/test_tsv.rb +212 -2
- data/test/scout/test_work_queue.rb +21 -19
- data/test/scout/tsv/persist/test_adapter.rb +1 -1
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +35 -3
- data/test/scout/tsv/test_open.rb +160 -2
- data/test/scout/tsv/test_parser.rb +19 -2
- data/test/scout/tsv/test_persist.rb +2 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +88 -3
- data/test/scout/tsv/test_util.rb +1 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +25 -11
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +3 -3
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- metadata +20 -6
data/lib/scout/tsv/open.rb
CHANGED
@@ -1,61 +1,56 @@
|
|
1
1
|
require_relative '../open'
|
2
|
+
require_relative '../work_queue'
|
3
|
+
|
4
|
+
module MultipleResult
|
5
|
+
def self.setup(obj)
|
6
|
+
obj.extend MultipleResult
|
7
|
+
obj
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
2
11
|
module Open
|
3
12
|
def self.traverse_add(into, res)
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
into
|
13
|
+
if Array === res && MultipleResult === res
|
14
|
+
res.each do |_res|
|
15
|
+
traverse_add into, _res
|
16
|
+
end
|
17
|
+
else
|
18
|
+
case into
|
19
|
+
when defined?(TSV::Dumper) && TSV::Dumper
|
20
|
+
into.add *res
|
21
|
+
when TSV, Hash
|
22
|
+
key, value = res
|
23
|
+
if into.type == :double
|
24
|
+
into.zip_new key, value, insitu: false
|
25
|
+
else
|
26
|
+
into[key] = value
|
27
|
+
end
|
28
|
+
when Array, Set
|
29
|
+
into << res
|
30
|
+
when IO, StringIO
|
31
|
+
into.puts res
|
32
|
+
end
|
10
33
|
end
|
11
34
|
end
|
12
35
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# self.traverse(obj.stream, into: into, cpus: cpus, bar: bar, **options, &block)
|
24
|
-
# when IO
|
25
|
-
# if into && (IO === into || into.respond_to?(:stream) )
|
26
|
-
# into_thread = Thread.new do
|
27
|
-
# Thread.current.report_on_exception = false
|
28
|
-
# Thread.current["name"] = "Traverse into"
|
29
|
-
# TSV.parse obj, **options do |k,v|
|
30
|
-
# begin
|
31
|
-
# res = block.call k, v
|
32
|
-
# traverse_add into, res
|
33
|
-
# rescue
|
34
|
-
# into.abort $!
|
35
|
-
# end
|
36
|
-
# nil
|
37
|
-
# end
|
38
|
-
# into.close if into.respond_to?(:close)
|
39
|
-
# end
|
40
|
-
# Thread.pass until into_thread
|
41
|
-
# into
|
42
|
-
# else
|
43
|
-
# TSV.parse obj, **options do |k,v|
|
44
|
-
# block.call k, v
|
45
|
-
# nil
|
46
|
-
# end
|
47
|
-
# end
|
48
|
-
# end
|
49
|
-
#end
|
50
|
-
|
51
|
-
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, **options, &block)
|
36
|
+
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
+
cpus = nil if cpus == 1
|
38
|
+
|
39
|
+
if into == :stream
|
40
|
+
sout, sin = Open.pipe
|
41
|
+
ConcurrentStream.setup(sout, :pair => sin)
|
42
|
+
ConcurrentStream.setup(sin, :pair => sout)
|
43
|
+
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
|
+
return sout
|
45
|
+
end
|
52
46
|
|
53
47
|
if into || bar
|
54
48
|
orig_callback = callback if callback
|
55
|
-
bar = Log::ProgressBar.get_obj_bar(
|
49
|
+
bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
|
50
|
+
bar.init if bar
|
56
51
|
callback = proc do |res|
|
57
52
|
bar.tick if bar
|
58
|
-
traverse_add into, res if into
|
53
|
+
traverse_add into, res if into && ! res.nil?
|
59
54
|
orig_callback.call res if orig_callback
|
60
55
|
end
|
61
56
|
|
@@ -65,59 +60,94 @@ module Open
|
|
65
60
|
Thread.current["name"] = "Traverse into"
|
66
61
|
error = false
|
67
62
|
begin
|
68
|
-
self.traverse(obj, callback: callback, **options, &block)
|
69
|
-
into.close if into.respond_to?(:close)
|
63
|
+
self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
|
64
|
+
into.close if ! keep_open && into.respond_to?(:close)
|
70
65
|
bar.remove if bar
|
71
66
|
rescue Exception
|
72
67
|
into.abort($!) if into.respond_to?(:abort)
|
73
68
|
bar.remove($!) if bar
|
74
69
|
end
|
75
70
|
end
|
76
|
-
Thread.pass until into_thread
|
71
|
+
Thread.pass until into_thread["name"]
|
77
72
|
return into
|
78
73
|
end
|
79
74
|
end
|
80
75
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
callback.call res if callback
|
105
|
-
nil
|
106
|
-
end
|
107
|
-
else
|
108
|
-
TSV.parse obj, **options do |k,v|
|
109
|
-
res = block.call k, v
|
110
|
-
callback.call res if callback
|
111
|
-
nil
|
112
|
-
end
|
76
|
+
if cpus
|
77
|
+
queue = WorkQueue.new cpus do |args|
|
78
|
+
block.call *args
|
79
|
+
end
|
80
|
+
|
81
|
+
queue.process do |res|
|
82
|
+
callback.call res
|
83
|
+
end
|
84
|
+
|
85
|
+
self.traverse(obj, **options) do |*args|
|
86
|
+
queue.write args
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
queue.close
|
91
|
+
|
92
|
+
queue.join
|
93
|
+
|
94
|
+
bar.remove if bar
|
95
|
+
return into
|
96
|
+
rescue Exception
|
97
|
+
bar.remove($!) if bar
|
98
|
+
raise $!
|
113
99
|
end
|
100
|
+
end
|
101
|
+
|
102
|
+
begin
|
103
|
+
res = case obj
|
104
|
+
when TSV
|
105
|
+
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
|
+
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
|
+
res = block.call(k, v, f)
|
108
|
+
callback.call res if callback
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
when Array
|
112
|
+
obj.each do |line|
|
113
|
+
res = block.call(line)
|
114
|
+
callback.call res if callback
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
when String
|
118
|
+
obj = obj.produce_and_find if Path === obj
|
119
|
+
f = Open.open(obj)
|
120
|
+
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
121
|
+
when Step
|
122
|
+
raise obj.exception if obj.error?
|
123
|
+
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
|
+
when IO
|
125
|
+
parser = TSV::Parser.new obj
|
126
|
+
parser.traverse **options do |k,v,f|
|
127
|
+
res = block.call k,v,f
|
128
|
+
callback.call res if callback
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
when TSV::Parser
|
132
|
+
obj.traverse **options do |k,v,f|
|
133
|
+
res = block.call k, v, f
|
134
|
+
callback.call res if callback
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
else
|
138
|
+
TSV.parse obj, **options do |k,v|
|
139
|
+
res = block.call k, v
|
140
|
+
callback.call res if callback
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
114
144
|
bar.remove if bar
|
115
145
|
rescue
|
116
|
-
bar.
|
146
|
+
bar.error if bar
|
117
147
|
raise $!
|
118
148
|
end
|
119
149
|
|
120
|
-
into
|
150
|
+
into || res
|
121
151
|
end
|
122
152
|
end
|
123
153
|
|
@@ -125,4 +155,28 @@ module TSV
|
|
125
155
|
def self.traverse(*args, **kwargs, &block)
|
126
156
|
Open.traverse(*args, **kwargs, &block)
|
127
157
|
end
|
158
|
+
|
159
|
+
def self.process_stream(stream, header_hash: "#", &block)
|
160
|
+
sout = Open.open_pipe do |sin|
|
161
|
+
while line = stream.gets
|
162
|
+
break unless line.start_with?(header_hash)
|
163
|
+
sin.puts line
|
164
|
+
end
|
165
|
+
yield sin, line
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def self.collapse_stream(stream, *args, **kwargs, &block)
|
170
|
+
stream = stream.stream if stream.respond_to?(:stream)
|
171
|
+
self.process_stream(stream) do |sin, line|
|
172
|
+
collapsed = Open.collapse_stream(stream, line: line)
|
173
|
+
Open.consume_stream(collapsed, false, sin)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def collapse_stream(*args, **kwargs, &block)
|
178
|
+
TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
|
128
182
|
end
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -12,17 +12,24 @@ module TSV
|
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
|
-
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
|
15
|
+
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
|
16
16
|
items = line.split(sep, -1)
|
17
17
|
|
18
|
+
return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)
|
19
|
+
|
18
20
|
if positions.nil? && key == 0
|
19
21
|
key = items.shift
|
20
|
-
elsif positions.nil?
|
21
|
-
|
22
|
+
elsif positions.nil?
|
23
|
+
if type == :flat
|
24
|
+
key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
|
25
|
+
items = items.slice(0,1)
|
26
|
+
else
|
27
|
+
key = items.delete_at(key)
|
28
|
+
end
|
22
29
|
key = key.split(sep2) if type == :double
|
23
30
|
else
|
24
31
|
key, items = items[key], items.values_at(*positions)
|
25
|
-
key = key.split(sep2) if type == :double
|
32
|
+
key = key.split(sep2) if type == :double || type == :flat
|
26
33
|
end
|
27
34
|
|
28
35
|
items = case type
|
@@ -31,9 +38,9 @@ module TSV
|
|
31
38
|
when :single
|
32
39
|
items.first
|
33
40
|
when :flat
|
34
|
-
|
41
|
+
items.collect{|i| i.split(sep2, -1) }.flatten
|
35
42
|
when :double
|
36
|
-
items.collect{|i| i.split(sep2, -1) }
|
43
|
+
items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
|
37
44
|
end
|
38
45
|
|
39
46
|
|
@@ -44,21 +51,34 @@ module TSV
|
|
44
51
|
[key, items]
|
45
52
|
end
|
46
53
|
|
47
|
-
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
|
54
|
+
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, **kargs, &block)
|
48
55
|
begin
|
49
|
-
bar = Log
|
56
|
+
bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
|
57
|
+
bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
|
58
|
+
bar.init if bar
|
50
59
|
|
51
60
|
source_type = type if source_type.nil?
|
52
61
|
|
53
62
|
data = {} if data.nil?
|
54
|
-
merge = false if type != :double
|
63
|
+
merge = false if type != :double && type != :flat
|
55
64
|
line = first_line || stream.gets
|
56
65
|
while line
|
57
66
|
begin
|
58
|
-
line.
|
59
|
-
|
67
|
+
line.chomp!
|
68
|
+
if Proc === fix
|
69
|
+
line = fix.call line
|
70
|
+
elsif fix
|
71
|
+
line = Misc.fixutf8(line)
|
72
|
+
end
|
60
73
|
bar.tick if bar
|
61
|
-
|
74
|
+
if type == :array || type == :line
|
75
|
+
block.call line
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
|
80
|
+
|
81
|
+
next if key.nil?
|
62
82
|
|
63
83
|
if Array === key
|
64
84
|
keys = key
|
@@ -116,44 +136,63 @@ module TSV
|
|
116
136
|
end
|
117
137
|
|
118
138
|
if block_given?
|
119
|
-
res = block.call(key, these_items)
|
139
|
+
res = block.call(key, these_items, field_names)
|
120
140
|
data[key] = res unless res.nil? || FalseClass === data
|
121
141
|
next
|
122
142
|
end
|
123
143
|
|
124
144
|
if ! merge || ! data.include?(key)
|
125
145
|
data[key] = these_items
|
126
|
-
|
146
|
+
elsif type == :double
|
127
147
|
current = data[key]
|
128
148
|
if merge == :concat
|
129
149
|
these_items.each_with_index do |new,i|
|
130
|
-
|
150
|
+
new = [nil] if new.empty?
|
131
151
|
current[i].concat(new)
|
132
152
|
end
|
133
153
|
else
|
134
154
|
merged = []
|
135
155
|
these_items.each_with_index do |new,i|
|
136
|
-
|
156
|
+
new = [nil] if new.empty?
|
137
157
|
merged[i] = current[i] + new
|
138
158
|
end
|
139
159
|
data[key] = merged
|
140
160
|
end
|
161
|
+
elsif type == :flat
|
162
|
+
current = data[key]
|
163
|
+
if merge == :concat
|
164
|
+
current[i].concat these_items
|
165
|
+
else
|
166
|
+
data[key] = current + these_items
|
167
|
+
end
|
141
168
|
end
|
142
169
|
end
|
170
|
+
rescue Exception
|
171
|
+
stream.abort($!) if stream.respond_to?(:abort)
|
172
|
+
raise $!
|
143
173
|
ensure
|
144
|
-
|
174
|
+
if stream.closed?
|
175
|
+
line = nil
|
176
|
+
else
|
177
|
+
line = stream.gets
|
178
|
+
end
|
145
179
|
end
|
146
180
|
end
|
147
181
|
data
|
148
182
|
ensure
|
149
|
-
|
183
|
+
if stream.stream_exception
|
184
|
+
bar.remove(stream.stream_exception)
|
185
|
+
else
|
186
|
+
bar.remove
|
187
|
+
end if bar
|
188
|
+
stream.join if stream.respond_to?(:join)
|
150
189
|
end
|
151
190
|
end
|
152
191
|
|
153
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
154
193
|
raise "Closed stream" if IO === stream && stream.closed?
|
155
194
|
|
156
|
-
|
195
|
+
opts = {}
|
157
196
|
preamble = []
|
158
197
|
|
159
198
|
# Get line
|
@@ -165,13 +204,19 @@ module TSV
|
|
165
204
|
|
166
205
|
# Process options line
|
167
206
|
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
168
|
-
|
207
|
+
opts = IndiferentHash.string2hash m.captures.first.chomp
|
169
208
|
line = stream.gets
|
170
|
-
|
209
|
+
if line && fix
|
210
|
+
if Proc === fix
|
211
|
+
line = fix.call line
|
212
|
+
else
|
213
|
+
line = Misc.fixutf8 line.chomp if line && fix
|
214
|
+
end
|
215
|
+
end
|
171
216
|
end
|
172
217
|
|
173
218
|
# Determine separator
|
174
|
-
sep =
|
219
|
+
sep = opts[:sep] if opts[:sep]
|
175
220
|
|
176
221
|
# Process fields line
|
177
222
|
preamble << line if line
|
@@ -192,7 +237,10 @@ module TSV
|
|
192
237
|
|
193
238
|
first_line = line
|
194
239
|
|
195
|
-
[
|
240
|
+
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
|
+
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
|
+
|
243
|
+
[opts, key_field, fields, first_line, preamble]
|
196
244
|
end
|
197
245
|
|
198
246
|
KEY_PARAMETERS = begin
|
@@ -204,43 +252,79 @@ module TSV
|
|
204
252
|
end
|
205
253
|
|
206
254
|
class Parser
|
207
|
-
attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
|
208
|
-
def initialize(file, fix: true, header_hash: "#", sep: "\t")
|
255
|
+
attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
|
256
|
+
def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
|
209
257
|
if IO === file
|
210
258
|
@stream = file
|
211
259
|
else
|
212
260
|
@stream = Open.open(file)
|
213
261
|
end
|
262
|
+
@fix = fix
|
214
263
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
215
264
|
@options[:sep] = sep if @options[:sep].nil?
|
265
|
+
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
+
@type = type
|
216
267
|
end
|
217
268
|
|
218
269
|
def all_fields
|
270
|
+
return nil if @fields.nil?
|
219
271
|
[@key_field] + @fields
|
220
272
|
end
|
221
273
|
|
274
|
+
def key_field=(key_field)
|
275
|
+
@options[:key_field] = @key_field = key_field
|
276
|
+
end
|
277
|
+
|
278
|
+
def fields=(fields)
|
279
|
+
@options[:fields] = @fields = fields
|
280
|
+
end
|
281
|
+
|
282
|
+
def identify_field(name)
|
283
|
+
TSV.identify_field(@key_field, @fields, name)
|
284
|
+
end
|
285
|
+
|
222
286
|
def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
287
|
+
kwargs[:type] ||= self.options[:type] ||= @type
|
288
|
+
kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
|
289
|
+
|
223
290
|
if fields
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
291
|
+
if @fields
|
292
|
+
all_field_names ||= [@key_field] + @fields
|
293
|
+
fields = all_field_names if fields == :all
|
294
|
+
positions = NamedArray.identify_name(all_field_names, fields)
|
295
|
+
kwargs[:positions] = positions
|
296
|
+
field_names = all_field_names.values_at *positions
|
297
|
+
elsif fields.reject{|f| Numeric === f}.empty?
|
298
|
+
positions = fields
|
299
|
+
kwargs[:positions] = positions
|
300
|
+
else
|
301
|
+
raise "Non-numeric fields specified, but no field names available"
|
302
|
+
end
|
228
303
|
else
|
229
304
|
field_names = @fields
|
230
305
|
end
|
231
306
|
|
232
307
|
if key_field
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
308
|
+
if @fields
|
309
|
+
all_field_names ||= [@key_field] + @fields
|
310
|
+
key = NamedArray.identify_name(all_field_names, key_field)
|
311
|
+
kwargs[:key] = key == :key ? 0 : key
|
312
|
+
key_field_name = key === :key ? @key_field : all_field_names[key]
|
313
|
+
if fields.nil?
|
314
|
+
field_names = all_field_names - [key_field_name]
|
315
|
+
end
|
316
|
+
else
|
317
|
+
kwargs[:key] = key_field == :key ? 0 : key_field
|
318
|
+
key = key_field
|
239
319
|
end
|
240
320
|
else
|
241
321
|
key_field_name = @key_field
|
242
322
|
end
|
243
323
|
|
324
|
+
if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
|
325
|
+
field_names = field_names.slice(0,1)
|
326
|
+
end
|
327
|
+
|
244
328
|
@options.each do |option,value|
|
245
329
|
option = option.to_sym
|
246
330
|
next unless KEY_PARAMETERS.include? option
|
@@ -250,21 +334,33 @@ module TSV
|
|
250
334
|
kwargs[:source_type] = @options[:type]
|
251
335
|
kwargs[:data] = false if kwargs[:data].nil?
|
252
336
|
|
253
|
-
data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
|
337
|
+
data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
|
254
338
|
|
255
339
|
if data
|
256
340
|
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
257
341
|
else
|
258
|
-
self
|
342
|
+
[self.key_field, self.fields]
|
259
343
|
end
|
260
344
|
end
|
261
345
|
|
346
|
+
def fingerprint
|
347
|
+
"Parser:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
348
|
+
end
|
349
|
+
|
350
|
+
def digest_str
|
351
|
+
fingerprint
|
352
|
+
end
|
353
|
+
|
354
|
+
def inspect
|
355
|
+
fingerprint
|
356
|
+
end
|
262
357
|
end
|
263
358
|
|
264
359
|
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
|
265
360
|
parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
|
266
361
|
|
267
|
-
cast =
|
362
|
+
cast = kwargs[:cast]
|
363
|
+
cast = parser.options[:cast] if cast.nil?
|
268
364
|
type = kwargs[:type] ||= parser.options[:type] ||= :double
|
269
365
|
if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
270
366
|
TSV.setup(data, type: type)
|
@@ -297,48 +393,7 @@ module TSV
|
|
297
393
|
data.filename = filename
|
298
394
|
data.namespace = namespace
|
299
395
|
data.unnamed = unnamed
|
396
|
+
data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
|
300
397
|
data
|
301
398
|
end
|
302
|
-
|
303
|
-
#def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
304
|
-
# options, key_field_name, field_names, first_line, preamble = parse_header(stream)
|
305
|
-
|
306
|
-
# if fields
|
307
|
-
# all_field_names ||= [key_field_name] + field_names
|
308
|
-
# positions = NamedArray.identify_name(all_field_names, fields)
|
309
|
-
# kwargs[:positions] = positions
|
310
|
-
# field_names = all_field_names.values_at *positions
|
311
|
-
# end
|
312
|
-
|
313
|
-
# if key_field
|
314
|
-
# all_field_names ||= [key_field_name] + field_names
|
315
|
-
# key = NamedArray.identify_name(all_field_names, key_field)
|
316
|
-
# kwargs[:key] = key
|
317
|
-
# key_field_name = all_field_names[key]
|
318
|
-
# if fields.nil?
|
319
|
-
# field_names = all_field_names - [key_field_name]
|
320
|
-
# end
|
321
|
-
# end
|
322
|
-
|
323
|
-
# options.each do |option,value|
|
324
|
-
# option = option.to_sym
|
325
|
-
# next unless KEY_PARAMETERS.include? option
|
326
|
-
# kwargs[option] = value unless kwargs.include?(option)
|
327
|
-
# end
|
328
|
-
|
329
|
-
# kwargs[:source_type] = options[:type]
|
330
|
-
|
331
|
-
# type = kwargs[:type] ||= :double
|
332
|
-
# if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
333
|
-
# TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
|
334
|
-
# data.extend TSVAdapter
|
335
|
-
# end
|
336
|
-
|
337
|
-
# data = parse_stream(stream, first_line: first_line, **kwargs, &block)
|
338
|
-
|
339
|
-
# TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
|
340
|
-
|
341
|
-
# data
|
342
|
-
#end
|
343
|
-
|
344
399
|
end
|