scout-gear 7.2.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +51 -6
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +33 -29
- data/lib/scout/config.rb +1 -1
- data/lib/scout/exceptions.rb +1 -0
- data/lib/scout/log/color.rb +4 -2
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +71 -2
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/helper.rb +31 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +12 -1
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/misc.rb +1 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +111 -42
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +60 -30
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/semaphore.rb +8 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +85 -54
- data/lib/scout/tsv/index.rb +188 -20
- data/lib/scout/tsv/open.rb +182 -0
- data/lib/scout/tsv/parser.rb +200 -118
- data/lib/scout/tsv/path.rb +5 -6
- data/lib/scout/tsv/persist/adapter.rb +26 -37
- data/lib/scout/tsv/persist/fix_width_table.rb +327 -0
- data/lib/scout/tsv/persist/serialize.rb +117 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +6 -3
- data/lib/scout/tsv/persist.rb +4 -2
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +136 -37
- data/lib/scout/tsv/util/filter.rb +312 -0
- data/lib/scout/tsv/util/process.rb +73 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +265 -0
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +126 -19
- data/lib/scout/tsv.rb +28 -5
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +29 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +40 -5
- data/lib/scout/workflow/step/progress.rb +14 -0
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +104 -33
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +86 -47
- data/lib/scout/workflow/usage.rb +10 -6
- data/scout-gear.gemspec +30 -3
- data/scout_commands/workflow/task +37 -9
- data/scout_commands/workflow/task_old +2 -2
- data/test/scout/open/test_stream.rb +61 -59
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +67 -0
- data/test/scout/test_tmpfile.rb +1 -1
- data/test/scout/test_tsv.rb +222 -3
- data/test/scout/test_work_queue.rb +21 -18
- data/test/scout/tsv/persist/test_adapter.rb +11 -1
- data/test/scout/tsv/persist/test_fix_width_table.rb +134 -0
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +127 -3
- data/test/scout/tsv/test_open.rb +167 -0
- data/test/scout/tsv/test_parser.rb +45 -3
- data/test/scout/tsv/test_persist.rb +9 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +195 -3
- data/test/scout/tsv/test_util.rb +24 -0
- data/test/scout/tsv/util/test_filter.rb +188 -0
- data/test/scout/tsv/util/test_process.rb +47 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +58 -0
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/work_queue/test_worker.rb +63 -6
- data/test/scout/workflow/step/test_load.rb +3 -3
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +13 -13
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- data/test/test_helper.rb +3 -1
- metadata +29 -2
@@ -0,0 +1,182 @@
|
|
1
|
+
require_relative '../open'
|
2
|
+
require_relative '../work_queue'
|
3
|
+
|
4
|
+
module MultipleResult
|
5
|
+
def self.setup(obj)
|
6
|
+
obj.extend MultipleResult
|
7
|
+
obj
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
module Open
|
12
|
+
def self.traverse_add(into, res)
|
13
|
+
if Array === res && MultipleResult === res
|
14
|
+
res.each do |_res|
|
15
|
+
traverse_add into, _res
|
16
|
+
end
|
17
|
+
else
|
18
|
+
case into
|
19
|
+
when defined?(TSV::Dumper) && TSV::Dumper
|
20
|
+
into.add *res
|
21
|
+
when TSV, Hash
|
22
|
+
key, value = res
|
23
|
+
if into.type == :double
|
24
|
+
into.zip_new key, value, insitu: false
|
25
|
+
else
|
26
|
+
into[key] = value
|
27
|
+
end
|
28
|
+
when Array, Set
|
29
|
+
into << res
|
30
|
+
when IO, StringIO
|
31
|
+
into.puts res
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.traverse(obj, into: nil, cpus: nil, bar: nil, callback: nil, unnamed: true, keep_open: false, **options, &block)
|
37
|
+
cpus = nil if cpus == 1
|
38
|
+
|
39
|
+
if into == :stream
|
40
|
+
sout, sin = Open.pipe
|
41
|
+
ConcurrentStream.setup(sout, :pair => sin)
|
42
|
+
ConcurrentStream.setup(sin, :pair => sout)
|
43
|
+
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
|
+
return sout
|
45
|
+
end
|
46
|
+
|
47
|
+
if into || bar
|
48
|
+
orig_callback = callback if callback
|
49
|
+
bar = Log::ProgressBar.get_obj_bar(obj, bar) if bar
|
50
|
+
bar.init if bar
|
51
|
+
callback = proc do |res|
|
52
|
+
bar.tick if bar
|
53
|
+
traverse_add into, res if into && ! res.nil?
|
54
|
+
orig_callback.call res if orig_callback
|
55
|
+
end
|
56
|
+
|
57
|
+
if into.respond_to?(:close)
|
58
|
+
into_thread = Thread.new do
|
59
|
+
Thread.current.report_on_exception = false
|
60
|
+
Thread.current["name"] = "Traverse into"
|
61
|
+
error = false
|
62
|
+
begin
|
63
|
+
self.traverse(obj, callback: callback, cpus: cpus, unnamed: unnamed, **options, &block)
|
64
|
+
into.close if ! keep_open && into.respond_to?(:close)
|
65
|
+
bar.remove if bar
|
66
|
+
rescue Exception
|
67
|
+
into.abort($!) if into.respond_to?(:abort)
|
68
|
+
bar.remove($!) if bar
|
69
|
+
end
|
70
|
+
end
|
71
|
+
Thread.pass until into_thread["name"]
|
72
|
+
return into
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
if cpus
|
77
|
+
queue = WorkQueue.new cpus do |args|
|
78
|
+
block.call *args
|
79
|
+
end
|
80
|
+
|
81
|
+
queue.process do |res|
|
82
|
+
callback.call res
|
83
|
+
end
|
84
|
+
|
85
|
+
self.traverse(obj, **options) do |*args|
|
86
|
+
queue.write args
|
87
|
+
end
|
88
|
+
|
89
|
+
begin
|
90
|
+
queue.close
|
91
|
+
|
92
|
+
queue.join
|
93
|
+
|
94
|
+
bar.remove if bar
|
95
|
+
return into
|
96
|
+
rescue Exception
|
97
|
+
bar.remove($!) if bar
|
98
|
+
raise $!
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
begin
|
103
|
+
res = case obj
|
104
|
+
when TSV
|
105
|
+
#obj.traverse options[:key_field], options[:fields], unnamed: unnamed, **options do |k,v,f|
|
106
|
+
obj.traverse unnamed: unnamed, **options do |k,v,f|
|
107
|
+
res = block.call(k, v, f)
|
108
|
+
callback.call res if callback
|
109
|
+
nil
|
110
|
+
end
|
111
|
+
when Array
|
112
|
+
obj.each do |line|
|
113
|
+
res = block.call(line)
|
114
|
+
callback.call res if callback
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
when String
|
118
|
+
obj = obj.produce_and_find if Path === obj
|
119
|
+
f = Open.open(obj)
|
120
|
+
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
121
|
+
when Step
|
122
|
+
raise obj.exception if obj.error?
|
123
|
+
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
124
|
+
when IO
|
125
|
+
parser = TSV::Parser.new obj
|
126
|
+
parser.traverse **options do |k,v,f|
|
127
|
+
res = block.call k,v,f
|
128
|
+
callback.call res if callback
|
129
|
+
nil
|
130
|
+
end
|
131
|
+
when TSV::Parser
|
132
|
+
obj.traverse **options do |k,v,f|
|
133
|
+
res = block.call k, v, f
|
134
|
+
callback.call res if callback
|
135
|
+
nil
|
136
|
+
end
|
137
|
+
else
|
138
|
+
TSV.parse obj, **options do |k,v|
|
139
|
+
res = block.call k, v
|
140
|
+
callback.call res if callback
|
141
|
+
nil
|
142
|
+
end
|
143
|
+
end
|
144
|
+
bar.remove if bar
|
145
|
+
rescue
|
146
|
+
bar.error if bar
|
147
|
+
raise $!
|
148
|
+
end
|
149
|
+
|
150
|
+
into || res
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
module TSV
|
155
|
+
def self.traverse(*args, **kwargs, &block)
|
156
|
+
Open.traverse(*args, **kwargs, &block)
|
157
|
+
end
|
158
|
+
|
159
|
+
def self.process_stream(stream, header_hash: "#", &block)
|
160
|
+
sout = Open.open_pipe do |sin|
|
161
|
+
while line = stream.gets
|
162
|
+
break unless line.start_with?(header_hash)
|
163
|
+
sin.puts line
|
164
|
+
end
|
165
|
+
yield sin, line
|
166
|
+
end
|
167
|
+
end
|
168
|
+
|
169
|
+
def self.collapse_stream(stream, *args, **kwargs, &block)
|
170
|
+
stream = stream.stream if stream.respond_to?(:stream)
|
171
|
+
self.process_stream(stream) do |sin, line|
|
172
|
+
collapsed = Open.collapse_stream(stream, line: line)
|
173
|
+
Open.consume_stream(collapsed, false, sin)
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def collapse_stream(*args, **kwargs, &block)
|
178
|
+
TSV.collapse_stream(self.dumper_stream, *args, **kwargs, &block)
|
179
|
+
end
|
180
|
+
|
181
|
+
|
182
|
+
end
|
data/lib/scout/tsv/parser.rb
CHANGED
@@ -4,21 +4,32 @@ module TSV
|
|
4
4
|
if Array === value
|
5
5
|
value.collect{|e| cast_value(e, cast) }
|
6
6
|
else
|
7
|
-
|
7
|
+
if Proc === cast
|
8
|
+
cast.call value
|
9
|
+
else
|
10
|
+
value.send(cast)
|
11
|
+
end
|
8
12
|
end
|
9
13
|
end
|
10
14
|
|
11
|
-
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil)
|
15
|
+
def self.parse_line(line, type: :list, key: 0, positions: nil, sep: "\t", sep2: "|", cast: nil, select: nil, field_names: nil)
|
12
16
|
items = line.split(sep, -1)
|
13
17
|
|
18
|
+
return nil if select && ! TSV.select(items[0], items[1..-1], select, fields: field_names, type: type, sep: sep2)
|
19
|
+
|
14
20
|
if positions.nil? && key == 0
|
15
21
|
key = items.shift
|
16
|
-
elsif positions.nil?
|
17
|
-
|
22
|
+
elsif positions.nil?
|
23
|
+
if type == :flat
|
24
|
+
key = items[1..-1].collect{|e| e.split(sep2, -1) }.flatten
|
25
|
+
items = items.slice(0,1)
|
26
|
+
else
|
27
|
+
key = items.delete_at(key)
|
28
|
+
end
|
18
29
|
key = key.split(sep2) if type == :double
|
19
30
|
else
|
20
31
|
key, items = items[key], items.values_at(*positions)
|
21
|
-
key = key.split(sep2) if type == :double
|
32
|
+
key = key.split(sep2) if type == :double || type == :flat
|
22
33
|
end
|
23
34
|
|
24
35
|
items = case type
|
@@ -27,9 +38,9 @@ module TSV
|
|
27
38
|
when :single
|
28
39
|
items.first
|
29
40
|
when :flat
|
30
|
-
|
41
|
+
items.collect{|i| i.split(sep2, -1) }.flatten
|
31
42
|
when :double
|
32
|
-
items.collect{|i| i.split(sep2, -1) }
|
43
|
+
items.collect{|i| i.nil? ? [] : i.split(sep2, -1) }
|
33
44
|
end
|
34
45
|
|
35
46
|
|
@@ -40,21 +51,34 @@ module TSV
|
|
40
51
|
[key, items]
|
41
52
|
end
|
42
53
|
|
43
|
-
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, **kargs, &block)
|
54
|
+
def self.parse_stream(stream, data: nil, source_type: nil, type: :list, merge: true, one2one: false, fix: true, bar: false, first_line: nil, field_names: nil, **kargs, &block)
|
44
55
|
begin
|
45
|
-
bar = Log
|
56
|
+
bar = "Parsing #{Log.fingerprint stream}" if TrueClass === bar
|
57
|
+
bar = Log::ProgressBar.get_obj_bar(stream, bar) if bar
|
58
|
+
bar.init if bar
|
46
59
|
|
47
60
|
source_type = type if source_type.nil?
|
48
61
|
|
49
62
|
data = {} if data.nil?
|
50
|
-
merge = false if type != :double
|
63
|
+
merge = false if type != :double && type != :flat
|
51
64
|
line = first_line || stream.gets
|
52
65
|
while line
|
53
66
|
begin
|
54
|
-
line.
|
55
|
-
|
67
|
+
line.chomp!
|
68
|
+
if Proc === fix
|
69
|
+
line = fix.call line
|
70
|
+
elsif fix
|
71
|
+
line = Misc.fixutf8(line)
|
72
|
+
end
|
56
73
|
bar.tick if bar
|
57
|
-
|
74
|
+
if type == :array || type == :line
|
75
|
+
block.call line
|
76
|
+
next
|
77
|
+
end
|
78
|
+
|
79
|
+
key, items = parse_line(line, type: source_type, field_names: field_names, **kargs)
|
80
|
+
|
81
|
+
next if key.nil?
|
58
82
|
|
59
83
|
if Array === key
|
60
84
|
keys = key
|
@@ -75,80 +99,100 @@ module TSV
|
|
75
99
|
these_items = items
|
76
100
|
end
|
77
101
|
|
78
|
-
these_items =
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
102
|
+
these_items =
|
103
|
+
case [source_type, type]
|
104
|
+
when [:single, :single]
|
105
|
+
these_items
|
106
|
+
when [:list, :single]
|
107
|
+
these_items.first
|
108
|
+
when [:flat, :single]
|
109
|
+
these_items.first
|
110
|
+
when [:double, :single]
|
111
|
+
these_items.first.first
|
112
|
+
when [:single, :list]
|
113
|
+
[these_items]
|
114
|
+
when [:list, :list]
|
115
|
+
these_items
|
116
|
+
when [:flat, :list]
|
117
|
+
these_items
|
118
|
+
when [:double, :list]
|
119
|
+
these_items.collect{|l| l.first }
|
120
|
+
when [:single, :flat]
|
121
|
+
[these_items]
|
122
|
+
when [:list, :flat]
|
123
|
+
these_items
|
124
|
+
when [:flat, :flat]
|
125
|
+
these_items
|
126
|
+
when [:double, :flat]
|
127
|
+
these_items.flatten
|
128
|
+
when [:single, :double]
|
129
|
+
[[these_items]]
|
130
|
+
when [:list, :double]
|
131
|
+
these_items.collect{|l| [l] }
|
132
|
+
when [:flat, :double]
|
133
|
+
[these_items]
|
134
|
+
when [:double, :double]
|
135
|
+
these_items
|
136
|
+
end
|
112
137
|
|
113
138
|
if block_given?
|
114
|
-
res = block.call(key, these_items)
|
139
|
+
res = block.call(key, these_items, field_names)
|
115
140
|
data[key] = res unless res.nil? || FalseClass === data
|
116
141
|
next
|
117
142
|
end
|
118
143
|
|
119
144
|
if ! merge || ! data.include?(key)
|
120
145
|
data[key] = these_items
|
121
|
-
|
146
|
+
elsif type == :double
|
122
147
|
current = data[key]
|
123
148
|
if merge == :concat
|
124
149
|
these_items.each_with_index do |new,i|
|
125
|
-
|
150
|
+
new = [nil] if new.empty?
|
126
151
|
current[i].concat(new)
|
127
152
|
end
|
128
153
|
else
|
129
154
|
merged = []
|
130
155
|
these_items.each_with_index do |new,i|
|
131
|
-
|
156
|
+
new = [nil] if new.empty?
|
132
157
|
merged[i] = current[i] + new
|
133
158
|
end
|
134
159
|
data[key] = merged
|
135
160
|
end
|
161
|
+
elsif type == :flat
|
162
|
+
current = data[key]
|
163
|
+
if merge == :concat
|
164
|
+
current[i].concat these_items
|
165
|
+
else
|
166
|
+
data[key] = current + these_items
|
167
|
+
end
|
136
168
|
end
|
137
169
|
end
|
170
|
+
rescue Exception
|
171
|
+
stream.abort($!) if stream.respond_to?(:abort)
|
172
|
+
raise $!
|
138
173
|
ensure
|
139
|
-
|
174
|
+
if stream.closed?
|
175
|
+
line = nil
|
176
|
+
else
|
177
|
+
line = stream.gets
|
178
|
+
end
|
140
179
|
end
|
141
180
|
end
|
142
181
|
data
|
143
182
|
ensure
|
144
|
-
|
183
|
+
if stream.stream_exception
|
184
|
+
bar.remove(stream.stream_exception)
|
185
|
+
else
|
186
|
+
bar.remove
|
187
|
+
end if bar
|
188
|
+
stream.join if stream.respond_to?(:join)
|
145
189
|
end
|
146
190
|
end
|
147
191
|
|
148
192
|
def self.parse_header(stream, fix: true, header_hash: '#', sep: "\t")
|
149
193
|
raise "Closed stream" if IO === stream && stream.closed?
|
150
194
|
|
151
|
-
|
195
|
+
opts = {}
|
152
196
|
preamble = []
|
153
197
|
|
154
198
|
# Get line
|
@@ -160,13 +204,19 @@ module TSV
|
|
160
204
|
|
161
205
|
# Process options line
|
162
206
|
if line and (String === header_hash && m = line.match(/^#{header_hash}: (.*)/))
|
163
|
-
|
207
|
+
opts = IndiferentHash.string2hash m.captures.first.chomp
|
164
208
|
line = stream.gets
|
165
|
-
|
209
|
+
if line && fix
|
210
|
+
if Proc === fix
|
211
|
+
line = fix.call line
|
212
|
+
else
|
213
|
+
line = Misc.fixutf8 line.chomp if line && fix
|
214
|
+
end
|
215
|
+
end
|
166
216
|
end
|
167
217
|
|
168
218
|
# Determine separator
|
169
|
-
sep =
|
219
|
+
sep = opts[:sep] if opts[:sep]
|
170
220
|
|
171
221
|
# Process fields line
|
172
222
|
preamble << line if line
|
@@ -187,7 +237,10 @@ module TSV
|
|
187
237
|
|
188
238
|
first_line = line
|
189
239
|
|
190
|
-
[
|
240
|
+
opts[:type] = opts[:type].to_sym if opts[:type]
|
241
|
+
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
242
|
+
|
243
|
+
[opts, key_field, fields, first_line, preamble]
|
191
244
|
end
|
192
245
|
|
193
246
|
KEY_PARAMETERS = begin
|
@@ -199,43 +252,79 @@ module TSV
|
|
199
252
|
end
|
200
253
|
|
201
254
|
class Parser
|
202
|
-
attr_accessor :stream, :options, :key_field, :fields, :first_line, :preamble
|
203
|
-
def initialize(file, fix: true, header_hash: "#", sep: "\t")
|
255
|
+
attr_accessor :stream, :options, :key_field, :fields, :type, :first_line, :preamble
|
256
|
+
def initialize(file, fix: true, header_hash: "#", sep: "\t", type: :double)
|
204
257
|
if IO === file
|
205
258
|
@stream = file
|
206
259
|
else
|
207
260
|
@stream = Open.open(file)
|
208
261
|
end
|
262
|
+
@fix = fix
|
209
263
|
@options, @key_field, @fields, @first_line, @preamble = TSV.parse_header(@stream, fix:fix, header_hash:header_hash, sep:sep)
|
210
264
|
@options[:sep] = sep if @options[:sep].nil?
|
265
|
+
@options.merge!(:key_field => @key_field, :fields => @fields)
|
266
|
+
@type = type
|
211
267
|
end
|
212
268
|
|
213
269
|
def all_fields
|
270
|
+
return nil if @fields.nil?
|
214
271
|
[@key_field] + @fields
|
215
272
|
end
|
216
273
|
|
274
|
+
def key_field=(key_field)
|
275
|
+
@options[:key_field] = @key_field = key_field
|
276
|
+
end
|
277
|
+
|
278
|
+
def fields=(fields)
|
279
|
+
@options[:fields] = @fields = fields
|
280
|
+
end
|
281
|
+
|
282
|
+
def identify_field(name)
|
283
|
+
TSV.identify_field(@key_field, @fields, name)
|
284
|
+
end
|
285
|
+
|
217
286
|
def traverse(key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
287
|
+
kwargs[:type] ||= self.options[:type] ||= @type
|
288
|
+
kwargs[:type] = kwargs[:type].to_sym if kwargs[:type]
|
289
|
+
|
218
290
|
if fields
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
291
|
+
if @fields
|
292
|
+
all_field_names ||= [@key_field] + @fields
|
293
|
+
fields = all_field_names if fields == :all
|
294
|
+
positions = NamedArray.identify_name(all_field_names, fields)
|
295
|
+
kwargs[:positions] = positions
|
296
|
+
field_names = all_field_names.values_at *positions
|
297
|
+
elsif fields.reject{|f| Numeric === f}.empty?
|
298
|
+
positions = fields
|
299
|
+
kwargs[:positions] = positions
|
300
|
+
else
|
301
|
+
raise "Non-numeric fields specified, but no field names available"
|
302
|
+
end
|
223
303
|
else
|
224
304
|
field_names = @fields
|
225
305
|
end
|
226
306
|
|
227
307
|
if key_field
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
308
|
+
if @fields
|
309
|
+
all_field_names ||= [@key_field] + @fields
|
310
|
+
key = NamedArray.identify_name(all_field_names, key_field)
|
311
|
+
kwargs[:key] = key == :key ? 0 : key
|
312
|
+
key_field_name = key === :key ? @key_field : all_field_names[key]
|
313
|
+
if fields.nil?
|
314
|
+
field_names = all_field_names - [key_field_name]
|
315
|
+
end
|
316
|
+
else
|
317
|
+
kwargs[:key] = key_field == :key ? 0 : key_field
|
318
|
+
key = key_field
|
234
319
|
end
|
235
320
|
else
|
236
321
|
key_field_name = @key_field
|
237
322
|
end
|
238
323
|
|
324
|
+
if field_names && (kwargs[:type] == :single || kwargs[:type] == :flat)
|
325
|
+
field_names = field_names.slice(0,1)
|
326
|
+
end
|
327
|
+
|
239
328
|
@options.each do |option,value|
|
240
329
|
option = option.to_sym
|
241
330
|
next unless KEY_PARAMETERS.include? option
|
@@ -245,23 +334,56 @@ module TSV
|
|
245
334
|
kwargs[:source_type] = @options[:type]
|
246
335
|
kwargs[:data] = false if kwargs[:data].nil?
|
247
336
|
|
248
|
-
data = TSV.parse_stream(@stream, first_line: @first_line, **kwargs, &block)
|
337
|
+
data = TSV.parse_stream(@stream, first_line: @first_line, fix: @fix, field_names: @fields, **kwargs, &block)
|
249
338
|
|
250
|
-
|
339
|
+
if data
|
340
|
+
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
341
|
+
else
|
342
|
+
[self.key_field, self.fields]
|
343
|
+
end
|
344
|
+
end
|
345
|
+
|
346
|
+
def fingerprint
|
347
|
+
"Parser:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
348
|
+
end
|
251
349
|
|
252
|
-
|
350
|
+
def digest_str
|
351
|
+
fingerprint
|
253
352
|
end
|
254
353
|
|
354
|
+
def inspect
|
355
|
+
fingerprint
|
356
|
+
end
|
255
357
|
end
|
256
358
|
|
257
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil,
|
359
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: false, serializer: nil, **kwargs, &block)
|
258
360
|
parser = TSV::Parser.new stream, fix: fix, header_hash: header_hash, sep: sep
|
259
|
-
kwargs = parser.options.merge(kwargs)
|
260
361
|
|
261
|
-
|
362
|
+
cast = kwargs[:cast]
|
363
|
+
cast = parser.options[:cast] if cast.nil?
|
364
|
+
type = kwargs[:type] ||= parser.options[:type] ||= :double
|
262
365
|
if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
263
366
|
TSV.setup(data, type: type)
|
264
367
|
data.extend TSVAdapter
|
368
|
+
if serializer
|
369
|
+
data.serializer = serializer
|
370
|
+
elsif cast
|
371
|
+
data.serializer =
|
372
|
+
case [cast, type]
|
373
|
+
when [:to_i, :single]
|
374
|
+
:integer
|
375
|
+
when [:to_i, :list], [:to_i, :flat]
|
376
|
+
:integer_array
|
377
|
+
when [:to_f, :single]
|
378
|
+
:float
|
379
|
+
when [:to_f, :list], [:to_f, :flat]
|
380
|
+
:float_array
|
381
|
+
else
|
382
|
+
type
|
383
|
+
end
|
384
|
+
else
|
385
|
+
data.serializer = type
|
386
|
+
end
|
265
387
|
end
|
266
388
|
|
267
389
|
kwargs[:data] = {} if kwargs[:data].nil?
|
@@ -270,48 +392,8 @@ module TSV
|
|
270
392
|
data.type = type
|
271
393
|
data.filename = filename
|
272
394
|
data.namespace = namespace
|
395
|
+
data.unnamed = unnamed
|
396
|
+
data.save_extension_attr_hash if data.respond_to?(:save_extension_attr_hash)
|
273
397
|
data
|
274
398
|
end
|
275
|
-
|
276
|
-
#def self.parse_alt(stream, key_field: nil, fields: nil, filename: nil, namespace: nil, **kwargs, &block)
|
277
|
-
# options, key_field_name, field_names, first_line, preamble = parse_header(stream)
|
278
|
-
|
279
|
-
# if fields
|
280
|
-
# all_field_names ||= [key_field_name] + field_names
|
281
|
-
# positions = NamedArray.identify_name(all_field_names, fields)
|
282
|
-
# kwargs[:positions] = positions
|
283
|
-
# field_names = all_field_names.values_at *positions
|
284
|
-
# end
|
285
|
-
|
286
|
-
# if key_field
|
287
|
-
# all_field_names ||= [key_field_name] + field_names
|
288
|
-
# key = NamedArray.identify_name(all_field_names, key_field)
|
289
|
-
# kwargs[:key] = key
|
290
|
-
# key_field_name = all_field_names[key]
|
291
|
-
# if fields.nil?
|
292
|
-
# field_names = all_field_names - [key_field_name]
|
293
|
-
# end
|
294
|
-
# end
|
295
|
-
|
296
|
-
# options.each do |option,value|
|
297
|
-
# option = option.to_sym
|
298
|
-
# next unless KEY_PARAMETERS.include? option
|
299
|
-
# kwargs[option] = value unless kwargs.include?(option)
|
300
|
-
# end
|
301
|
-
|
302
|
-
# kwargs[:source_type] = options[:type]
|
303
|
-
|
304
|
-
# type = kwargs[:type] ||= :double
|
305
|
-
# if (data = kwargs[:data]) && data.respond_to?(:persistence_class)
|
306
|
-
# TSV.setup(data, type: type, key_field: key_field_name, fields: field_names)
|
307
|
-
# data.extend TSVAdapter
|
308
|
-
# end
|
309
|
-
|
310
|
-
# data = parse_stream(stream, first_line: first_line, **kwargs, &block)
|
311
|
-
|
312
|
-
# TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => type, filename: filename, namespace: namespace)
|
313
|
-
|
314
|
-
# data
|
315
|
-
#end
|
316
|
-
|
317
399
|
end
|
data/lib/scout/tsv/path.rb
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
module Path
|
2
|
-
def tsv(
|
3
|
-
found =
|
4
|
-
found
|
5
|
-
TSV.open(found, ...)
|
2
|
+
def tsv(*args, **kwargs, &block)
|
3
|
+
found = produce_and_find('tsv')
|
4
|
+
TSV.open(found, *args, **kwargs, &block)
|
6
5
|
end
|
7
6
|
|
8
|
-
def index(
|
7
|
+
def index(*args, **kwargs, &block)
|
9
8
|
found = self.find
|
10
9
|
found = self.set_extension('tsv').find unless found.exists?
|
11
|
-
TSV.index(found,
|
10
|
+
TSV.index(found, *args, **kwargs, &block)
|
12
11
|
end
|
13
12
|
end
|