scout-gear 7.2.0 → 8.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +51 -6
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +33 -29
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +1 -0
- data/lib/scout/log/color.rb +4 -2
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +71 -2
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/helper.rb +31 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +12 -1
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +111 -42
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +60 -30
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/semaphore.rb +8 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +85 -54
- data/lib/scout/tsv/index.rb +188 -20
- data/lib/scout/tsv/open.rb +182 -0
- data/lib/scout/tsv/parser.rb +200 -118
- data/lib/scout/tsv/path.rb +5 -6
- data/lib/scout/tsv/persist/adapter.rb +26 -37
- data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
- data/lib/scout/tsv/persist/serialize.rb +117 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
- data/lib/scout/tsv/persist.rb +4 -2
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +136 -37
- data/lib/scout/tsv/util/filter.rb +312 -0
- data/lib/scout/tsv/util/process.rb +73 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +265 -0
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +126 -19
- data/lib/scout/tsv.rb +28 -5
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +29 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +40 -5
- data/lib/scout/workflow/step/progress.rb +14 -0
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +104 -33
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +86 -47
- data/lib/scout/workflow/usage.rb +10 -6
- data/scout-gear.gemspec +30 -3
- data/scout_commands/workflow/task +37 -9
- data/scout_commands/workflow/task_old +2 -2
- data/test/scout/open/test_stream.rb +61 -59
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +67 -0
- data/test/scout/test_tmpfile.rb +1 -1
- data/test/scout/test_tsv.rb +222 -3
- data/test/scout/test_work_queue.rb +21 -18
- data/test/scout/tsv/persist/test_adapter.rb +11 -1
- data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +127 -3
- data/test/scout/tsv/test_open.rb +167 -0
- data/test/scout/tsv/test_parser.rb +45 -3
- data/test/scout/tsv/test_persist.rb +9 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +195 -3
- data/test/scout/tsv/test_util.rb +24 -0
- data/test/scout/tsv/util/test_filter.rb +188 -0
- data/test/scout/tsv/util/test_process.rb +47 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +58 -0
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/work_queue/test_worker.rb +63 -6
- data/test/scout/workflow/step/test_load.rb +3 -3
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_helper.rb +3 -1
- metadata +29 -2
@@ -0,0 +1,182 @@
|
|
1
|
+
require_relative '../open'
|
2
|
+
require_relative '../work_queue'
|
3
|
+
|
4
|
+
module MultipleResult
|
5
|
+
def self.setup(obj)
|
6
|
+
obj.extend MultipleResult
|
7
|
+
obj
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Open
|
12
|
+
def self.traverse_add(into, res)
|
13
|
+
if Array === res && MultipleResult === res
|
14
|
+
res.each do |_res|
|
15
|
+
traverse_add into, _res
|
16
|
+
end
|
17
|
+
else
|
18
|
+
case into
|
19
|
+
when defined?(TSV::Dumper) && TSV::Dumper
|
20
|
+
into.add *res
|
21
|
+
when TSV, Hash
|
22
|
+
key, value = res
|
23
|
+
if into.type == :double
|
24
|
+
into.zip_new key, value, insitu: false
|
25
|
+
else
|
26
|
+
into[key] = value
|
27
|
+
end
|
28
|
+
when Array, Set
|
29
|
+
into << res
|
30
|
+
when IO, StringIO
|
31
|
+
into.puts res
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
+
cpus = nil if cpus == 1
|
38
|
+
|
39
|
+
if into == :stream
|
40
|
+
sout, sin = Open.pipe
|
41
|
+
ConcurrentStream.setup(sout, :pair => sin)
|
42
|
+
ConcurrentStream.setup(sin, :pair => sout)
|
43
|
+
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
|
+
return sout
|
45
|
+
end
|
46
|
+
|
47
|
+
if into || bar
|
48
|
+
orig_callback = callback if callback
|
49
|
+
bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
|
50
|
+
bar.init if bar
|
51
|
+
callback = proc do |res|
|
52
|
+
bar.tick if bar
|
53
|
+
traverse_add into, res if into && ! res.nil?
|
54
|
+
orig_callback.call res if orig_callback
|
55
|
+
end
|
56
|
+
|
57
|
+
if into.respond_to?(:close)
|
58
|
+
into_thread = Thread.new do
|
59
|
+
Thread.current.report_on_exception = false
|
60
|
+
Thread.current["name"] = "Traverse into"
|
61
|
+
error = false
|
62
|
+
begin
|
63
|
+
self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
|
64
|
+
into.close if ! keep_open && into.respond_to?(:close)
|
65
|
+
bar.remove if bar
|
66
|
+
rescue Exception
|
67
|
+
into.abort($!) if into.respond_to?(:abort)
|
68
|
+
bar.remove($!) if bar
|
69
|
+
end
|
70
|
+
end
|
71
|
+
Thread.pass until into_thread["name"]
|
72
|
+
return into
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
if cpus
|
77
|
+
queue = WorkQueue.new cpus do |args|
|
78
|
+
block.call *args
|
79
|
+
end
|
80
|
+
|
81
|
+
queue.process do |res|
|
82
|
+
callback.call res
|
83
|
+
end
|
84
|
+
|
85
|
+
self.traverse(obj, **options) do |*args|
|
86
|
+
queue.write args
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
queue.close
|
91
|
+
|
92
|
+
queue.join
|
93
|
+
|
94
|
+
bar.remove if bar
|
95
|
+
return into
|
96
|
+
rescue Exception
|
97
|
+
bar.remove($!) if bar
|
98
|
+
raise $!
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
begin
|
103
|
+
res = case obj
|
104
|
+
when TSV
|
105
|
+
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
|
+
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
|
+
res = block.call(k, v, f)
|
108
|
+
callback.call res if callback
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
when Array
|
112
|
+
obj.each do |line|
|
113
|
+
res = block.call(line)
|
114
|
+
callback.call res if callback
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
when String
|
118
|
+
obj = obj.produce_and_find if Path === obj
|
119
|
+
f = Open.open(obj)
|
120
|
+
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
121
|
+
when Step
|
122
|
+
raise obj.exception if obj.error?
|
123
|
+
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
|
+
when IO
|
125
|
+
parser = TSV::Parser.new obj
|
126
|
+
parser.traverse **options do |k,v,f|
|
127
|
+
res = block.call k,v,f
|
128
|
+
callback.call res if callback
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
when TSV::Parser
|
132
|
+
obj.traverse **options do |k,v,f|
|
133
|
+
res = block.call k, v, f
|
134
|
+
callback.call res if callback
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
else
|
138
|
+
TSV.parse obj, **options do |k,v|
|
139
|
+
res = block.call k, v
|
140
|
+
callback.call res if callback
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
bar.remove if bar
|
145
|
+
rescue
|
146
|
+
bar.error if bar
|
147
|
+
raise $!
|
148
|
+
end
|
149
|
+
|
150
|
+
into || res
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
module TSV
|
155
|
+
def self.traverse(*args, **kwargs, &block)
|
156
|
+
Open.traverse(*args, **kwargs, &block)
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.process_stream(stream, header_hash: "#", &block)
|
160
|
+
sout = Open.open_pipe do |sin|
|
161
|
+
while line = stream.gets
|
162
|
+
break unless line.start_with?(header_hash)
|
163
|
+
sin.puts line
|
164
|
+
end
|
165
|
+
yield sin, line
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def self.collapse_stream(stream, *args, **kwargs, &block)
|
170
|
+
stream = stream.stream if stream.respond_to?(:stream)
|
171
|
+
self.process_stream(stream) do |sin, line|
|
172
|
+
collapsed = Open.collapse_stream(stream, line: line)
|
173
|
+
Open.consume_stream(collapsed, false, sin)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def collapse_stream(*args, **kwargs, &block)
|
178
|
+
TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
end
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -4,21 +4,32 @@ module TSV
|
|
4
4
|
if Array === value
|
5
5
|
value.collect{|e| cast_value(e, cast) }
|
6
6
|
else
|
7
|
-
|
7
|
+
if Proc === cast
|
8
|
+
cast.call value
|
9
|
+
else
|
10
|
+
value.send(cast)
|
11
|
+
end
|
8
12
|
end
|
9
13
|
end
|
10
14
|
|
11
|
-
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
|
15
|
+
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
|
12
16
|
items = line.split(sep, -1)
|
13
17
|
|
18
|
+
return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)
|
19
|
+
|
14
20
|
if positions.nil? && key == 0
|
15
21
|
key = items.shift
|
16
|
-
elsif positions.nil?
|
17
|
-
|
22
|
+
elsif positions.nil?
|
23
|
+
if type == :flat
|
24
|
+
key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
|
25
|
+
items = items.slice(0,1)
|
26
|
+
else
|
27
|
+
key = items.delete_at(key)
|
28
|
+
end
|
18
29
|
key = key.split(sep2) if type == :double
|
19
30
|
else
|
20
31
|
key, items = items[key], items.values_at(*positions)
|
21
|
-
key = key.split(sep2) if type == :double
|
32
|
+
key = key.split(sep2) if type == :double || type == :flat
|
22
33
|
end
|
23
34
|
|
24
35
|
items = case type
|
@@ -27,9 +38,9 @@ module TSV
|
|
27
38
|
when :single
|
28
39
|
items.first
|
29
40
|
when :flat
|
30
|
-
|
41
|
+
items.collect{|i| i.split(sep2, -1) }.flatten
|
31
42
|
when :double
|
32
|
-
items.collect{|i| i.split(sep2, -1) }
|
43
|
+
items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
|
33
44
|
end
|
34
45
|
|
35
46
|
|
@@ -40,21 +51,34 @@ module TSV
|
|
40
51
|
[key, items]
|
41
52
|
end
|
42
53
|
|
43
|
-
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
|
54
|
+
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, **kargs, &block)
|
44
55
|
begin
|
45
|
-
bar = Log
|
56
|
+
bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
|
57
|
+
bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
|
58
|
+
bar.init if bar
|
46
59
|
|
47
60
|
source_type = type if source_type.nil?
|
48
61
|
|
49
62
|
data = {} if data.nil?
|
50
|
-
merge = false if type != :double
|
63
|
+
merge = false if type != :double && type != :flat
|
51
64
|
line = first_line || stream.gets
|
52
65
|
while line
|
53
66
|
begin
|
54
|
-
line.
|
55
|
-
|
67
|
+
line.chomp!
|
68
|
+
if Proc === fix
|
69
|
+
line = fix.call line
|
70
|
+
elsif fix
|
71
|
+
line = Misc.fixutf8(line)
|
72
|
+
end
|
56
73
|
bar.tick if bar
|
57
|
-
|
74
|
+
if type == :array || type == :line
|
75
|
+
block.call line
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
|
80
|
+
|
81
|
+
next if key.nil?
|
58
82
|
|
59
83
|
if Array === key
|
60
84
|
keys = key
|
@@ -75,80 +99,100 @@ module TSV
|
|
75
99
|
these_items = items
|
76
100
|
end
|
77
101
|
|
78
|
-
these_items =
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
102
|
+
these_items =
|
103
|
+
case [source_type, type]
|
104
|
+
when [:single, :single]
|
105
|
+
these_items
|
106
|
+
when [:list, :single]
|
107
|
+
these_items.first
|
108
|
+
when [:flat, :single]
|
109
|
+
these_items.first
|
110
|
+
when [:double, :single]
|
111
|
+
these_items.first.first
|
112
|
+
when [:single, :list]
|
113
|
+
[these_items]
|
114
|
+
when [:list, :list]
|
115
|
+
these_items
|
116
|
+
when [:flat, :list]
|
117
|
+
these_items
|
118
|
+
when [:double, :list]
|
119
|
+
these_items.collect{|l| l.first }
|
120
|
+
when [:single, :flat]
|
121
|
+
[these_items]
|
122
|
+
when [:list, :flat]
|
123
|
+
these_items
|
124
|
+
when [:flat, :flat]
|
125
|
+
these_items
|
126
|
+
when [:double, :flat]
|
127
|
+
these_items.flatten
|
128
|
+
when [:single, :double]
|
129
|
+
[[these_items]]
|
130
|
+
when [:list, :double]
|
131
|
+
these_items.collect{|l| [l] }
|
132
|
+
when [:flat, :double]
|
133
|
+
[these_items]
|
134
|
+
when [:double, :double]
|
135
|
+
these_items
|
136
|
+
end
|
112
137
|
|
113
138
|
if block_given?
|
114
|
-
res = block.call(key, these_items)
|
139
|
+
res = block.call(key, these_items, field_names)
|
115
140
|
data[key] = res unless res.nil? || FalseClass === data
|
116
141
|
next
|
117
142
|
end
|
118
143
|
|
119
144
|
if ! merge || ! data.include?(key)
|
120
145
|
data[key] = these_items
|
121
|
-
|
146
|
+
elsif type == :double
|
122
147
|
current = data[key]
|
123
148
|
if merge == :concat
|
124
149
|
these_items.each_with_index do |new,i|
|
125
|
-
|
150
|
+
new = [nil] if new.empty?
|
126
151
|
current[i].concat(new)
|
127
152
|
end
|
128
153
|
else
|
129
154
|
merged = []
|
130
155
|
these_items.each_with_index do |new,i|
|
131
|
-
|
156
|
+
new = [nil] if new.empty?
|
132
157
|
merged[i] = current[i] + new
|
133
158
|
end
|
134
159
|
data[key] = merged
|
135
160
|
end
|
161
|
+
elsif type == :flat
|
162
|
+
current = data[key]
|
163
|
+
if merge == :concat
|
164
|
+
current[i].concat these_items
|
165
|
+
else
|
166
|
+
data[key] = current + these_items
|
167
|
+
end
|
136
168
|
end
|
137
169
|
end
|
170
|
+
rescue Exception
|
171
|
+
stream.abort($!) if stream.respond_to?(:abort)
|
172
|
+
raise $!
|
138
173
|
ensure
|
139
|
-
|
174
|
+
if stream.closed?
|
175
|
+
line = nil
|
176
|
+
else
|
177
|
+
line = stream.gets
|
178
|
+
end
|
140
179
|
end
|
141
180
|
end
|
142
181
|
data
|
143
182
|
ensure
|
144
|
-
|
183
|
+
if stream.stream_exception
|
184
|
+
bar.remove(stream.stream_exception)
|
185
|
+
else
|
186
|
+
bar.remove
|
187
|
+
end if bar
|
188
|
+
stream.join if stream.respond_to?(:join)
|
145
189
|
end
|
146
190
|
end
|
147
191
|
|
148
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
149
193
|
raise "Closed stream" if IO === stream && stream.closed?
|
150
194
|
|
151
|
-
|
195
|
+
opts = {}
|
152
196
|
preamble = []
|
153
197
|
|
154
198
|
# Get line
|
@@ -160,13 +204,19 @@ module TSV
|
|
160
204
|
|
161
205
|
# Process options line
|
162
206
|
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
163
|
-
|
207
|
+
opts = IndiferentHash.string2hash m.captures.first.chomp
|
164
208
|
line = stream.gets
|
165
|
-
|
209
|
+
if line && fix
|
210
|
+
if Proc === fix
|
211
|
+
line = fix.call line
|
212
|
+
else
|
213
|
+
line = Misc.fixutf8 line.chomp if line && fix
|
214
|
+
end
|
215
|
+
end
|
166
216
|
end
|
167
217
|
|
168
218
|
# Determine separator
|
169
|
-
sep =
|
219
|
+
sep = opts[:sep] if opts[:sep]
|
170
220
|
|
171
221
|
# Process fields line
|
172
222
|
preamble << line if line
|
@@ -187,7 +237,10 @@ module TSV
|
|
187
237
|
|
188
238
|
first_line = line
|
189
239
|
|
190
|
-
[
|
240
|
+
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
|
+
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
|
+
|
243
|
+
[opts, key_field, fields, first_line, preamble]
|
191
244
|
end
|
192
245
|
|
193
246
|
KEY_PARAMETERS = begin
|
@@ -199,43 +252,79 @@ module TSV
|
|
199
252
|
end
|
200
253
|
|
201
254
|
class Parser
|
202
|
-
attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
|
203
|
-
def initialize(file, fix: true, header_hash: "#", sep: "\t")
|
255
|
+
attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
|
256
|
+
def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
|
204
257
|
if IO === file
|
205
258
|
@stream = file
|
206
259
|
else
|
207
260
|
@stream = Open.open(file)
|
208
261
|
end
|
262
|
+
@fix = fix
|
209
263
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
210
264
|
@options[:sep] = sep if @options[:sep].nil?
|
265
|
+
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
+
@type = type
|
211
267
|
end
|
212
268
|
|
213
269
|
def all_fields
|
270
|
+
return nil if @fields.nil?
|
214
271
|
[@key_field] + @fields
|
215
272
|
end
|
216
273
|
|
274
|
+
def key_field=(key_field)
|
275
|
+
@options[:key_field] = @key_field = key_field
|
276
|
+
end
|
277
|
+
|
278
|
+
def fields=(fields)
|
279
|
+
@options[:fields] = @fields = fields
|
280
|
+
end
|
281
|
+
|
282
|
+
def identify_field(name)
|
283
|
+
TSV.identify_field(@key_field, @fields, name)
|
284
|
+
end
|
285
|
+
|
217
286
|
def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
287
|
+
kwargs[:type] ||= self.options[:type] ||= @type
|
288
|
+
kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
|
289
|
+
|
218
290
|
if fields
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
291
|
+
if @fields
|
292
|
+
all_field_names ||= [@key_field] + @fields
|
293
|
+
fields = all_field_names if fields == :all
|
294
|
+
positions = NamedArray.identify_name(all_field_names, fields)
|
295
|
+
kwargs[:positions] = positions
|
296
|
+
field_names = all_field_names.values_at *positions
|
297
|
+
elsif fields.reject{|f| Numeric === f}.empty?
|
298
|
+
positions = fields
|
299
|
+
kwargs[:positions] = positions
|
300
|
+
else
|
301
|
+
raise "Non-numeric fields specified, but no field names available"
|
302
|
+
end
|
223
303
|
else
|
224
304
|
field_names = @fields
|
225
305
|
end
|
226
306
|
|
227
307
|
if key_field
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
308
|
+
if @fields
|
309
|
+
all_field_names ||= [@key_field] + @fields
|
310
|
+
key = NamedArray.identify_name(all_field_names, key_field)
|
311
|
+
kwargs[:key] = key == :key ? 0 : key
|
312
|
+
key_field_name = key === :key ? @key_field : all_field_names[key]
|
313
|
+
if fields.nil?
|
314
|
+
field_names = all_field_names - [key_field_name]
|
315
|
+
end
|
316
|
+
else
|
317
|
+
kwargs[:key] = key_field == :key ? 0 : key_field
|
318
|
+
key = key_field
|
234
319
|
end
|
235
320
|
else
|
236
321
|
key_field_name = @key_field
|
237
322
|
end
|
238
323
|
|
324
|
+
if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
|
325
|
+
field_names = field_names.slice(0,1)
|
326
|
+
end
|
327
|
+
|
239
328
|
@options.each do |option,value|
|
240
329
|
option = option.to_sym
|
241
330
|
next unless KEY_PARAMETERS.include? option
|
@@ -245,23 +334,56 @@ module TSV
|
|
245
334
|
kwargs[:source_type] = @options[:type]
|
246
335
|
kwargs[:data] = false if kwargs[:data].nil?
|
247
336
|
|
248
|
-
data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
|
337
|
+
data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
|
249
338
|
|
250
|
-
|
339
|
+
if data
|
340
|
+
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
341
|
+
else
|
342
|
+
[self.key_field, self.fields]
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def fingerprint
|
347
|
+
"Parser:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
348
|
+
end
|
251
349
|
|
252
|
-
|
350
|
+
def digest_str
|
351
|
+
fingerprint
|
253
352
|
end
|
254
353
|
|
354
|
+
def inspect
|
355
|
+
fingerprint
|
356
|
+
end
|
255
357
|
end
|
256
358
|
|
257
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil,
|
359
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
|
258
360
|
parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
|
259
|
-
kwargs = parser.options.merge(kwargs)
|
260
361
|
|
261
|
-
|
362
|
+
cast = kwargs[:cast]
|
363
|
+
cast = parser.options[:cast] if cast.nil?
|
364
|
+
type = kwargs[:type] ||= parser.options[:type] ||= :double
|
262
365
|
if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
263
366
|
TSV.setup(data, type: type)
|
264
367
|
data.extend TSVAdapter
|
368
|
+
if serializer
|
369
|
+
data.serializer = serializer
|
370
|
+
elsif cast
|
371
|
+
data.serializer =
|
372
|
+
case [cast, type]
|
373
|
+
when [:to_i, :single]
|
374
|
+
:integer
|
375
|
+
when [:to_i, :list], [:to_i, :flat]
|
376
|
+
:integer_array
|
377
|
+
when [:to_f, :single]
|
378
|
+
:float
|
379
|
+
when [:to_f, :list], [:to_f, :flat]
|
380
|
+
:float_array
|
381
|
+
else
|
382
|
+
type
|
383
|
+
end
|
384
|
+
else
|
385
|
+
data.serializer = type
|
386
|
+
end
|
265
387
|
end
|
266
388
|
|
267
389
|
kwargs[:data] = {} if kwargs[:data].nil?
|
@@ -270,48 +392,8 @@ module TSV
|
|
270
392
|
data.type = type
|
271
393
|
data.filename = filename
|
272
394
|
data.namespace = namespace
|
395
|
+
data.unnamed = unnamed
|
396
|
+
data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
|
273
397
|
data
|
274
398
|
end
|
275
|
-
|
276
|
-
#def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
277
|
-
# options, key_field_name, field_names, first_line, preamble = parse_header(stream)
|
278
|
-
|
279
|
-
# if fields
|
280
|
-
# all_field_names ||= [key_field_name] + field_names
|
281
|
-
# positions = NamedArray.identify_name(all_field_names, fields)
|
282
|
-
# kwargs[:positions] = positions
|
283
|
-
# field_names = all_field_names.values_at *positions
|
284
|
-
# end
|
285
|
-
|
286
|
-
# if key_field
|
287
|
-
# all_field_names ||= [key_field_name] + field_names
|
288
|
-
# key = NamedArray.identify_name(all_field_names, key_field)
|
289
|
-
# kwargs[:key] = key
|
290
|
-
# key_field_name = all_field_names[key]
|
291
|
-
# if fields.nil?
|
292
|
-
# field_names = all_field_names - [key_field_name]
|
293
|
-
# end
|
294
|
-
# end
|
295
|
-
|
296
|
-
# options.each do |option,value|
|
297
|
-
# option = option.to_sym
|
298
|
-
# next unless KEY_PARAMETERS.include? option
|
299
|
-
# kwargs[option] = value unless kwargs.include?(option)
|
300
|
-
# end
|
301
|
-
|
302
|
-
# kwargs[:source_type] = options[:type]
|
303
|
-
|
304
|
-
# type = kwargs[:type] ||= :double
|
305
|
-
# if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
306
|
-
# TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
|
307
|
-
# data.extend TSVAdapter
|
308
|
-
# end
|
309
|
-
|
310
|
-
# data = parse_stream(stream, first_line: first_line, **kwargs, &block)
|
311
|
-
|
312
|
-
# TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
|
313
|
-
|
314
|
-
# data
|
315
|
-
#end
|
316
|
-
|
317
399
|
end
|
data/lib/scout/tsv/path.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
module Path
|
2
|
-
def tsv(
|
3
|
-
found =
|
4
|
-
found
|
5
|
-
TSV.open(found, ...)
|
2
|
+
def tsv(*args, **kwargs, &block)
|
3
|
+
found = produce_and_find('tsv')
|
4
|
+
TSV.open(found, *args, **kwargs, &block)
|
6
5
|
end
|
7
6
|
|
8
|
-
def index(
|
7
|
+
def index(*args, **kwargs, &block)
|
9
8
|
found = self.find
|
10
9
|
found = self.set_extension('tsv').find unless found.exists?
|
11
|
-
TSV.index(found,
|
10
|
+
TSV.index(found, *args, **kwargs, &block)
|
12
11
|
end
|
13
12
|
end
|