scout-gear 10.7.1 → 10.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +50 -30
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +5 -1
- data/lib/scout/association/item.rb +1 -1
- data/lib/scout/association.rb +46 -11
- data/lib/scout/entity/format.rb +9 -4
- data/lib/scout/entity/identifiers.rb +4 -4
- data/lib/scout/entity/named_array.rb +13 -0
- data/lib/scout/entity/property.rb +3 -1
- data/lib/scout/entity.rb +7 -4
- data/lib/scout/knowledge_base/enrichment.rb +9 -0
- data/lib/scout/knowledge_base/entity.rb +143 -0
- data/lib/scout/knowledge_base/list.rb +95 -0
- data/lib/scout/knowledge_base/query.rb +96 -0
- data/lib/scout/knowledge_base/registry.rb +173 -0
- data/lib/scout/knowledge_base/traverse.rb +329 -0
- data/lib/scout/knowledge_base.rb +91 -0
- data/lib/scout/persist/tsv/adapter/base.rb +13 -1
- data/lib/scout/persist/tsv.rb +2 -1
- data/lib/scout/tsv/annotation.rb +4 -4
- data/lib/scout/tsv/attach.rb +10 -2
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/dumper.rb +34 -30
- data/lib/scout/tsv/index.rb +0 -2
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +21 -10
- data/lib/scout/tsv/path.rb +8 -0
- data/lib/scout/tsv/stream.rb +17 -10
- data/lib/scout/tsv/traverse.rb +12 -2
- data/lib/scout/tsv/util/process.rb +4 -1
- data/lib/scout/tsv/util/select.rb +8 -2
- data/lib/scout/tsv/util/sort.rb +23 -15
- data/lib/scout/tsv/util.rb +11 -2
- data/lib/scout/tsv.rb +25 -11
- data/lib/scout/workflow/definition.rb +3 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +8 -5
- data/lib/scout/workflow/step/dependencies.rb +35 -11
- data/lib/scout/workflow/step/file.rb +2 -1
- data/lib/scout/workflow/step/info.rb +23 -2
- data/lib/scout/workflow/step/load.rb +5 -3
- data/lib/scout/workflow/step/progress.rb +6 -0
- data/lib/scout/workflow/step/provenance.rb +1 -1
- data/lib/scout/workflow/step/status.rb +10 -4
- data/lib/scout/workflow/step.rb +32 -12
- data/lib/scout/workflow/task/dependencies.rb +33 -24
- data/lib/scout/workflow/task/inputs.rb +40 -12
- data/lib/scout/workflow/task.rb +22 -10
- data/lib/scout/workflow/usage.rb +2 -2
- data/lib/scout/workflow.rb +1 -1
- data/scout-gear.gemspec +28 -4
- data/scout_commands/kb/config +33 -0
- data/scout_commands/kb/entities +35 -0
- data/scout_commands/kb/list +39 -0
- data/scout_commands/kb/query +78 -0
- data/scout_commands/kb/register +44 -0
- data/scout_commands/kb/show +37 -0
- data/scout_commands/kb/traverse +66 -0
- data/test/data/person/brothers +1 -1
- data/test/scout/entity/test_identifiers.rb +3 -3
- data/test/scout/entity/test_named_array.rb +21 -0
- data/test/scout/knowledge_base/test_enrichment.rb +0 -0
- data/test/scout/knowledge_base/test_entity.rb +38 -0
- data/test/scout/knowledge_base/test_list.rb +40 -0
- data/test/scout/knowledge_base/test_query.rb +39 -0
- data/test/scout/knowledge_base/test_registry.rb +16 -0
- data/test/scout/knowledge_base/test_traverse.rb +245 -0
- data/test/scout/persist/test_tsv.rb +20 -0
- data/test/scout/persist/tsv/adapter/test_base.rb +20 -0
- data/test/scout/test_association.rb +17 -3
- data/test/scout/test_entity.rb +0 -15
- data/test/scout/test_knowledge_base.rb +27 -0
- data/test/scout/test_tsv.rb +40 -0
- data/test/scout/tsv/test_dumper.rb +24 -0
- data/test/scout/tsv/test_path.rb +24 -0
- data/test/scout/tsv/test_stream.rb +93 -0
- data/test/scout/tsv/test_traverse.rb +99 -0
- data/test/scout/tsv/test_util.rb +2 -0
- data/test/scout/tsv/util/test_select.rb +22 -0
- data/test/scout/tsv/util/test_sort.rb +24 -0
- data/test/scout/workflow/step/test_dependencies.rb +26 -0
- data/test/scout/workflow/step/test_info.rb +35 -0
- data/test/scout/workflow/task/test_dependencies.rb +67 -1
- data/test/scout/workflow/task/test_inputs.rb +24 -7
- data/test/scout/workflow/test_task.rb +36 -0
- data/test/scout/workflow/test_usage.rb +0 -1
- data/test/test_helper.rb +17 -0
- metadata +27 -3
@@ -23,7 +23,9 @@ module TSVAdapter
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def save_annotation_hash
|
26
|
-
self.
|
26
|
+
self.with_write do
|
27
|
+
self.orig_set(ANNOTATION_ATTR_HASH_KEY, ANNOTATION_ATTR_HASH_SERIALIZER.dump(self.annotation_hash))
|
28
|
+
end
|
27
29
|
end
|
28
30
|
|
29
31
|
def self.extended(base)
|
@@ -163,6 +165,16 @@ module TSVAdapter
|
|
163
165
|
end
|
164
166
|
end
|
165
167
|
|
168
|
+
def with_write(*args, &block)
|
169
|
+
if @write
|
170
|
+
yield
|
171
|
+
elsif @closed
|
172
|
+
write_and_close &block
|
173
|
+
else
|
174
|
+
write_and_read &block
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
166
178
|
def close(*args)
|
167
179
|
begin
|
168
180
|
super(*args)
|
data/lib/scout/persist/tsv.rb
CHANGED
@@ -48,7 +48,8 @@ module Persist
|
|
48
48
|
|
49
49
|
def self.tsv(id, options = {}, engine: nil, persist_options: {})
|
50
50
|
engine ||= persist_options[:engine] || :HDB
|
51
|
-
|
51
|
+
persist_options[:other_options] = options unless persist_options.include?(:other_options)
|
52
|
+
Persist.persist(id, engine, persist_options) do |filename|
|
52
53
|
if filename.nil?
|
53
54
|
yield(persist_options[:data] || {})
|
54
55
|
else
|
data/lib/scout/tsv/annotation.rb
CHANGED
@@ -48,12 +48,12 @@ module Annotation
|
|
48
48
|
|
49
49
|
fields = fields.flatten.compact.uniq
|
50
50
|
|
51
|
-
annotations = if Annotation.is_annotated?(objs)
|
52
|
-
objs.
|
51
|
+
annotations = if Annotation.is_annotated?(objs)
|
52
|
+
objs.annotation_hash.keys
|
53
53
|
elsif (Array === objs && objs.any?)
|
54
54
|
first = objs.compact.first
|
55
55
|
if Annotation.is_annotated?(first)
|
56
|
-
objs.compact.first.
|
56
|
+
objs.compact.first.annotation_hash.keys
|
57
57
|
else
|
58
58
|
raise "Objects didn't have annotations"
|
59
59
|
end
|
@@ -158,7 +158,7 @@ module Annotation
|
|
158
158
|
Annotation.load_tsv_values(id, values, tsv.fields)
|
159
159
|
end
|
160
160
|
|
161
|
-
case tsv.key_field
|
161
|
+
case tsv.key_field
|
162
162
|
when "List"
|
163
163
|
annotated_objects.first
|
164
164
|
else
|
data/lib/scout/tsv/attach.rb
CHANGED
@@ -92,6 +92,9 @@ module TSV
|
|
92
92
|
end
|
93
93
|
|
94
94
|
other_field_positions = other.identify_field(fields.dup)
|
95
|
+
fields.zip(other_field_positions) do |o,n|
|
96
|
+
raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil?
|
97
|
+
end
|
95
98
|
|
96
99
|
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
97
100
|
Log.debug log_message
|
@@ -252,8 +255,13 @@ module TSV
|
|
252
255
|
def self.identifier_files(obj)
|
253
256
|
if TSV === obj
|
254
257
|
obj.identifier_files
|
255
|
-
elsif Path
|
256
|
-
obj.
|
258
|
+
elsif Path.is_filename?(obj)
|
259
|
+
path = Path === obj ? obj : Path.setup(obj)
|
260
|
+
if obj.dirname.identifiers.exists?
|
261
|
+
obj.dirname.identifiers
|
262
|
+
else
|
263
|
+
[TSV.parse_options(obj)[:identifiers]]
|
264
|
+
end
|
257
265
|
else
|
258
266
|
nil
|
259
267
|
end
|
data/lib/scout/tsv/change_id.rb
CHANGED
@@ -6,6 +6,9 @@ module TSV
|
|
6
6
|
identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
|
7
7
|
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
8
8
|
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
9
|
+
if Array === identifiers
|
10
|
+
identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last
|
11
|
+
end
|
9
12
|
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
10
13
|
new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
|
11
14
|
return new
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -26,12 +26,13 @@ module TSV
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
attr_accessor :options, :initialized, :type, :sep, :filename, :namespace
|
29
|
+
attr_accessor :options, :initialized, :type, :sep, :compact, :filename, :namespace
|
30
30
|
def initialize(options = {})
|
31
31
|
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
32
32
|
@sep, @type = IndiferentHash.process_options options,
|
33
33
|
:sep, :type,
|
34
34
|
:sep => "\t", :type => :double
|
35
|
+
@compact = options[:compact]
|
35
36
|
@options = options
|
36
37
|
@options[:type] = @type
|
37
38
|
@sout, @sin = Open.pipe
|
@@ -39,8 +40,9 @@ module TSV
|
|
39
40
|
@initialized = false
|
40
41
|
@filename = options[:filename]
|
41
42
|
@mutex = Mutex.new
|
43
|
+
@namespace = options[:namespace]
|
42
44
|
ConcurrentStream.setup(@sin, pair: @sout)
|
43
|
-
ConcurrentStream.setup(@sout, pair: @sin)
|
45
|
+
ConcurrentStream.setup(@sout, pair: @sin, filename: filename)
|
44
46
|
end
|
45
47
|
|
46
48
|
def set_stream(stream)
|
@@ -96,11 +98,11 @@ module TSV
|
|
96
98
|
when :list, :flat
|
97
99
|
@sin << key + @sep + value * @sep << "\n"
|
98
100
|
when :double
|
99
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
101
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
100
102
|
else
|
101
103
|
if Array === value
|
102
104
|
if Array === value.first
|
103
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
105
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
104
106
|
else
|
105
107
|
@sin << key + @sep + value * @sep << "\n"
|
106
108
|
end
|
@@ -173,13 +175,35 @@ module TSV
|
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
+
self.with_unnamed do
|
179
|
+
if stream.nil?
|
180
|
+
t = Thread.new do
|
181
|
+
begin
|
182
|
+
Thread.current.report_on_exception = true
|
183
|
+
Thread.current["name"] = "Dumper thread"
|
184
|
+
dumper.init(preamble: preamble)
|
185
|
+
|
186
|
+
if keys
|
187
|
+
keys.each do |k|
|
188
|
+
dump_entry.call k, self[k]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
self.each &dump_entry
|
192
|
+
end
|
193
|
+
|
194
|
+
dumper.close
|
195
|
+
rescue
|
196
|
+
dumper.abort($!)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
stream = dumper.stream
|
201
|
+
ConcurrentStream.setup(stream, :threads => [t])
|
202
|
+
stream
|
203
|
+
else
|
204
|
+
dumper.set_stream stream
|
178
205
|
begin
|
179
|
-
Thread.current.report_on_exception = true
|
180
|
-
Thread.current["name"] = "Dumper thread"
|
181
206
|
dumper.init(preamble: preamble)
|
182
|
-
|
183
207
|
if keys
|
184
208
|
keys.each do |k|
|
185
209
|
dump_entry.call k, self[k]
|
@@ -192,28 +216,8 @@ module TSV
|
|
192
216
|
rescue
|
193
217
|
dumper.abort($!)
|
194
218
|
end
|
219
|
+
stream
|
195
220
|
end
|
196
|
-
Thread.pass until t["name"]
|
197
|
-
stream = dumper.stream
|
198
|
-
ConcurrentStream.setup(stream, :threads => [t])
|
199
|
-
stream
|
200
|
-
else
|
201
|
-
dumper.set_stream stream
|
202
|
-
begin
|
203
|
-
dumper.init(preamble: preamble)
|
204
|
-
if keys
|
205
|
-
keys.each do |k|
|
206
|
-
dump_entry.call k, self[k]
|
207
|
-
end
|
208
|
-
else
|
209
|
-
self.each &dump_entry
|
210
|
-
end
|
211
|
-
|
212
|
-
dumper.close
|
213
|
-
rescue
|
214
|
-
dumper.abort($!)
|
215
|
-
end
|
216
|
-
stream
|
217
221
|
end
|
218
222
|
end
|
219
223
|
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -66,8 +66,6 @@ module TSV
|
|
66
66
|
index = TSV.setup({}, :type => :single)
|
67
67
|
end
|
68
68
|
|
69
|
-
tsv_file = TSV.open(tsv_file, **data_options) if ! TSV === tsv_file
|
70
|
-
|
71
69
|
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
72
70
|
Log.low log_msg
|
73
71
|
bar = log_msg if TrueClass === bar
|
data/lib/scout/tsv/open.rb
CHANGED
data/lib/scout/tsv/parser.rb
CHANGED
@@ -7,7 +7,11 @@ module TSV
|
|
7
7
|
if Proc === cast
|
8
8
|
cast.call value
|
9
9
|
else
|
10
|
-
value.
|
10
|
+
if value.nil? || value == ""
|
11
|
+
nil
|
12
|
+
else
|
13
|
+
value.send(cast)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -94,6 +98,8 @@ module TSV
|
|
94
98
|
line.chomp!
|
95
99
|
if Proc === fix
|
96
100
|
line = fix.call line
|
101
|
+
break if (FalseClass === line) || :break == line
|
102
|
+
next if line.nil?
|
97
103
|
elsif fix
|
98
104
|
line = Misc.fixutf8(line)
|
99
105
|
end
|
@@ -175,14 +181,14 @@ module TSV
|
|
175
181
|
current = data[key]
|
176
182
|
if merge == :concat
|
177
183
|
these_items.each_with_index do |new,i|
|
178
|
-
new = [nil] if new.empty?
|
184
|
+
new = one2one ? [nil] : [] if new.empty?
|
179
185
|
current[i].concat(new)
|
180
186
|
end
|
181
187
|
else
|
182
188
|
merged = []
|
183
189
|
these_items.each_with_index do |new,i|
|
184
|
-
new = [nil] if new.empty?
|
185
|
-
merged[i] = current[i] + new
|
190
|
+
new = one2one ? [nil] : [] if new.empty?
|
191
|
+
merged[i] = (current[i] || []) + new
|
186
192
|
end
|
187
193
|
data[key] = merged
|
188
194
|
end
|
@@ -290,7 +296,8 @@ module TSV
|
|
290
296
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
291
297
|
|
292
298
|
all_fields = [key_field] + fields if key_field && fields
|
293
|
-
|
299
|
+
namespace = opts[:namespace]
|
300
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace))
|
294
301
|
rescue Exception
|
295
302
|
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
296
303
|
stream.abort($!) if stream.respond_to?(:abort)
|
@@ -298,6 +305,10 @@ module TSV
|
|
298
305
|
end
|
299
306
|
end
|
300
307
|
|
308
|
+
def self.parse_options(...)
|
309
|
+
parse_header(...)[:options]
|
310
|
+
end
|
311
|
+
|
301
312
|
KEY_PARAMETERS = begin
|
302
313
|
params = []
|
303
314
|
(method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
|
@@ -408,7 +419,7 @@ module TSV
|
|
408
419
|
end
|
409
420
|
|
410
421
|
if data
|
411
|
-
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
422
|
+
TSV.setup(data, @source_options.merge(:key_field => key_field_name, :fields => field_names, :type => @type))
|
412
423
|
else
|
413
424
|
[key_field || self.key_field, fields || self.fields]
|
414
425
|
end
|
@@ -435,7 +446,7 @@ module TSV
|
|
435
446
|
end
|
436
447
|
end
|
437
448
|
|
438
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed:
|
449
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block)
|
439
450
|
parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep)
|
440
451
|
|
441
452
|
cast = kwargs[:cast]
|
@@ -472,9 +483,9 @@ module TSV
|
|
472
483
|
data = parser.traverse **kwargs, &block
|
473
484
|
data.type = type
|
474
485
|
data.cast = cast
|
475
|
-
data.filename = filename || parser.options[:filename]
|
476
|
-
data.namespace = namespace || parser.options[:namespace]
|
477
|
-
data.identifiers = identifiers
|
486
|
+
data.filename = filename || parser.options[:filename] if data.filename.nil?
|
487
|
+
data.namespace = namespace || parser.options[:namespace] if data.namespace.nil?
|
488
|
+
data.identifiers = identifiers || parser.options[:identifiers] if data.identifiers.nil?
|
478
489
|
data.unnamed = unnamed
|
479
490
|
data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
|
480
491
|
data
|
data/lib/scout/tsv/path.rb
CHANGED
data/lib/scout/tsv/stream.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module TSV
|
2
|
-
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_cmd_args: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
3
|
+
sep = "\t" if sep.nil?
|
3
4
|
|
4
5
|
streams = streams.collect do |stream|
|
5
6
|
case stream
|
@@ -19,7 +20,7 @@ module TSV
|
|
19
20
|
num_streams = streams.length
|
20
21
|
|
21
22
|
streams = streams.collect do |stream|
|
22
|
-
Open.sort_stream(stream, memory: sort_memory)
|
23
|
+
Open.sort_stream(stream, memory: sort_memory, cmd_args: sort_cmd_args)
|
23
24
|
end if sort
|
24
25
|
|
25
26
|
begin
|
@@ -37,7 +38,8 @@ module TSV
|
|
37
38
|
|
38
39
|
streams = streams.collect do |stream|
|
39
40
|
|
40
|
-
parser = TSV::Parser.new stream,
|
41
|
+
parser = TSV::Parser.new stream, sep: sep
|
42
|
+
#parser.type = type
|
41
43
|
|
42
44
|
sfields = parser.fields
|
43
45
|
|
@@ -102,8 +104,9 @@ module TSV
|
|
102
104
|
keys[i]= nil
|
103
105
|
parts[i]= nil
|
104
106
|
else
|
105
|
-
vs = line.
|
107
|
+
vs = line.split(sep, -1)
|
106
108
|
key, *p = vs
|
109
|
+
p = [p] if parser_types[i] == :flat
|
107
110
|
keys[i]= key
|
108
111
|
parts[i]= p
|
109
112
|
end
|
@@ -112,7 +115,7 @@ module TSV
|
|
112
115
|
done_streams =[]
|
113
116
|
|
114
117
|
fields = nil if fields && fields.empty?
|
115
|
-
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
118
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one
|
116
119
|
dumper.init(preamble: preamble_txt || !!key_field)
|
117
120
|
|
118
121
|
t = Thread.new do
|
@@ -130,7 +133,7 @@ module TSV
|
|
130
133
|
keys.each_with_index do |key,i|
|
131
134
|
case key
|
132
135
|
when min
|
133
|
-
|
136
|
+
new_parts = parts[i]
|
134
137
|
|
135
138
|
begin
|
136
139
|
line = lines[i]= begin
|
@@ -145,18 +148,20 @@ module TSV
|
|
145
148
|
else
|
146
149
|
k, *p = line.chomp.split(sep, -1)
|
147
150
|
p = p.collect{|e| e.nil? ? "" : e }
|
151
|
+
p = [p] if parser_types[i] == :flat
|
148
152
|
|
149
153
|
if k == keys[i]
|
150
|
-
|
154
|
+
new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] }
|
151
155
|
raise TryAgain
|
152
156
|
end
|
153
157
|
keys[i]= k
|
154
158
|
parts[i]= p
|
155
159
|
end
|
160
|
+
|
161
|
+
new_values << new_parts
|
156
162
|
rescue TryAgain
|
157
163
|
keys[i]= nil
|
158
164
|
parts[i]= nil
|
159
|
-
Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
|
160
165
|
retry
|
161
166
|
end
|
162
167
|
else
|
@@ -180,14 +185,16 @@ module TSV
|
|
180
185
|
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
181
186
|
end
|
182
187
|
|
188
|
+
new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one
|
189
|
+
|
183
190
|
dumper.add min, new_values
|
184
191
|
end
|
185
192
|
|
186
193
|
dumper.close
|
187
194
|
|
188
195
|
streams.each do |stream|
|
189
|
-
stream.close if stream.respond_to?(:close)
|
190
|
-
stream.join if stream.respond_to?
|
196
|
+
stream.close if stream.respond_to?(:close)
|
197
|
+
stream.join if stream.respond_to?(:join)
|
191
198
|
end
|
192
199
|
end
|
193
200
|
rescue Aborted
|
data/lib/scout/tsv/traverse.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'parser'
|
2
2
|
module TSV
|
3
|
-
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed:
|
3
|
+
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
|
4
4
|
key_field = key_field_pos if key_field.nil?
|
5
5
|
fields = fields_pos.dup if fields.nil?
|
6
6
|
type = @type if type.nil?
|
@@ -9,6 +9,8 @@ module TSV
|
|
9
9
|
fields = [fields] unless fields.nil? || Array === fields
|
10
10
|
positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
|
11
11
|
positions = nil if fields == self.fields
|
12
|
+
unnamed = @unnamed if unnamed.nil?
|
13
|
+
unnamed = false if unnamed.nil?
|
12
14
|
|
13
15
|
if key_pos == :key
|
14
16
|
key_name = @key_field
|
@@ -61,7 +63,6 @@ module TSV
|
|
61
63
|
key = @type == :flat ? values : values[key_pos] if key_pos != :key
|
62
64
|
|
63
65
|
values = values.values_at(*positions)
|
64
|
-
NamedArray.setup(values, fields)
|
65
66
|
if key_index
|
66
67
|
if @type == :double
|
67
68
|
values.insert key_index, [orig_key]
|
@@ -71,6 +72,15 @@ module TSV
|
|
71
72
|
end
|
72
73
|
end
|
73
74
|
|
75
|
+
if ! unnamed && fields
|
76
|
+
case @type
|
77
|
+
when :flat, :single
|
78
|
+
values = Entity.prepare_entity(values, fields.first)
|
79
|
+
else
|
80
|
+
values = NamedArray.setup(values, fields, entity_options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
values = TSV.cast_value(values, cast) if cast
|
75
85
|
|
76
86
|
if Array === key
|
@@ -44,11 +44,14 @@ module TSV
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def add_field(name = nil)
|
47
|
-
|
47
|
+
keys.each do |key|
|
48
|
+
values = self[key]
|
48
49
|
new_values = yield(key, values)
|
49
50
|
new_values = [new_values].compact if type == :double and not Array === new_values
|
50
51
|
|
51
52
|
case
|
53
|
+
when type == :single
|
54
|
+
values = new_values
|
52
55
|
when (values.nil? and (fields.nil? or fields.empty?))
|
53
56
|
values = [new_values]
|
54
57
|
when values.nil?
|
@@ -156,8 +156,14 @@ module TSV
|
|
156
156
|
case
|
157
157
|
when ((Array === method) and (key == :key or key_field == key))
|
158
158
|
with_unnamed do
|
159
|
-
|
160
|
-
|
159
|
+
if invert
|
160
|
+
keys.each do |key|
|
161
|
+
new[key] = self[key] unless method.include?(key)
|
162
|
+
end
|
163
|
+
else
|
164
|
+
method.each do |key|
|
165
|
+
new[key] = self[key] if self.include?(key)
|
166
|
+
end
|
161
167
|
end
|
162
168
|
end
|
163
169
|
when Array === method
|
data/lib/scout/tsv/util/sort.rb
CHANGED
@@ -1,4 +1,10 @@
|
|
1
|
+
require 'scout/entity'
|
2
|
+
|
1
3
|
module TSV
|
4
|
+
def prepare_entity(...)
|
5
|
+
Entity.prepare_entity(...)
|
6
|
+
end
|
7
|
+
|
2
8
|
def sort_by(field = nil, just_keys = false, &block)
|
3
9
|
field = :all if field.nil?
|
4
10
|
|
@@ -8,16 +14,18 @@ module TSV
|
|
8
14
|
elems = []
|
9
15
|
case type
|
10
16
|
when :single
|
11
|
-
through :key, field do |key,
|
12
|
-
elems << [key,
|
17
|
+
through :key, field do |key, value|
|
18
|
+
elems << [key, value]
|
13
19
|
end
|
14
20
|
when :list, :flat
|
15
|
-
through :key, field do |key,
|
16
|
-
|
21
|
+
through :key, field do |key, value|
|
22
|
+
v = field == :key ? key : value[0]
|
23
|
+
elems << [key, v]
|
17
24
|
end
|
18
25
|
when :double
|
19
|
-
through :key, field do |key,
|
20
|
-
|
26
|
+
through :key, field do |key, value|
|
27
|
+
v = field == :key ? key : value[0]
|
28
|
+
elems << [key, v]
|
21
29
|
end
|
22
30
|
end
|
23
31
|
end
|
@@ -81,16 +89,16 @@ module TSV
|
|
81
89
|
elems = []
|
82
90
|
case type
|
83
91
|
when :single
|
84
|
-
through :key, field do |key,
|
85
|
-
elems << [key,
|
92
|
+
through :key, field do |key, value|
|
93
|
+
elems << [key, value]
|
86
94
|
end
|
87
95
|
when :list, :flat
|
88
|
-
through :key, field do |key,
|
89
|
-
elems << [key,
|
96
|
+
through :key, field do |key, values|
|
97
|
+
elems << [key, values[0]]
|
90
98
|
end
|
91
99
|
when :double
|
92
|
-
through :key, field do |key,
|
93
|
-
elems << [key,
|
100
|
+
through :key, field do |key, values|
|
101
|
+
elems << [key, values[0]]
|
94
102
|
end
|
95
103
|
end
|
96
104
|
end
|
@@ -99,7 +107,7 @@ module TSV
|
|
99
107
|
if fields == :all
|
100
108
|
if just_keys
|
101
109
|
keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
|
102
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
110
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true))
|
103
111
|
else
|
104
112
|
elems.sort_by{|key, value| key }
|
105
113
|
end
|
@@ -128,7 +136,7 @@ module TSV
|
|
128
136
|
end
|
129
137
|
if just_keys
|
130
138
|
keys = sorted.collect{|key, value| key}
|
131
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
139
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
|
132
140
|
keys
|
133
141
|
else
|
134
142
|
sorted.collect{|key, value| [key, self[key]]}
|
@@ -137,7 +145,7 @@ module TSV
|
|
137
145
|
else
|
138
146
|
if just_keys
|
139
147
|
keys = elems.sort(&block).collect{|key, value| key}
|
140
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
148
|
+
keys = prepare_entity(keys, key_field, (entity_options || {}).merge(:dup_array => true)) unless @unnamed
|
141
149
|
keys
|
142
150
|
else
|
143
151
|
elems.sort(&block).collect{|key, value| [key, self[key]]}
|
data/lib/scout/tsv/util.rb
CHANGED
@@ -32,6 +32,7 @@ module TSV
|
|
32
32
|
text
|
33
33
|
end
|
34
34
|
|
35
|
+
path = path.find if Path === path
|
35
36
|
TmpFile.with_file(values.uniq * "\n", false) do |value_file|
|
36
37
|
cmd = "cat '#{ path }' | sed 's/\\t/\\tHEADERNOMATCH/' | grep -w -F -f '#{ value_file }' | sed 's/HEADERNOMATCH//' |sort -u|cut -f 2 |sort|uniq -c|sed 's/^ *//;s/ /\t/'"
|
37
38
|
begin
|
@@ -84,8 +85,9 @@ module TSV
|
|
84
85
|
|
85
86
|
def each(*args, &block)
|
86
87
|
if block_given?
|
88
|
+
actual_unnamed = @unnamed.nil? ? true : @unnamed
|
87
89
|
super(*args) do |k,v|
|
88
|
-
NamedArray.setup(v, @fields) unless
|
90
|
+
NamedArray.setup(v, @fields) unless actual_unnamed || @type == :flat || ! (Array === v)
|
89
91
|
block.call(k, v)
|
90
92
|
end
|
91
93
|
else
|
@@ -105,7 +107,8 @@ module TSV
|
|
105
107
|
end
|
106
108
|
end
|
107
109
|
|
108
|
-
def with_unnamed(unnamed =
|
110
|
+
def with_unnamed(unnamed = nil)
|
111
|
+
unnamed = true if unnamed.nil?
|
109
112
|
begin
|
110
113
|
old_unnamed = @unnamed
|
111
114
|
@unnamed = unnamed
|
@@ -177,4 +180,10 @@ Example:
|
|
177
180
|
self.annotate(super(other))
|
178
181
|
end
|
179
182
|
|
183
|
+
def merge_zip(other)
|
184
|
+
other.each do |k,v|
|
185
|
+
self.zip_new k, v
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
180
189
|
end
|