scout-gear 10.7.0 → 10.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +8 -1
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +1 -1
- data/lib/scout/association.rb +21 -5
- data/lib/scout/entity/format.rb +9 -4
- data/lib/scout/entity/identifiers.rb +2 -2
- data/lib/scout/entity/named_array.rb +13 -0
- data/lib/scout/entity/property.rb +2 -1
- data/lib/scout/entity.rb +9 -4
- data/lib/scout/persist/tsv/adapter/base.rb +13 -1
- data/lib/scout/persist/tsv.rb +2 -1
- data/lib/scout/tsv/attach.rb +10 -2
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/dumper.rb +34 -30
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +22 -10
- data/lib/scout/tsv/path.rb +8 -0
- data/lib/scout/tsv/stream.rb +15 -8
- data/lib/scout/tsv/traverse.rb +12 -2
- data/lib/scout/tsv/util/process.rb +4 -1
- data/lib/scout/tsv/util/select.rb +8 -2
- data/lib/scout/tsv/util/sort.rb +23 -15
- data/lib/scout/tsv/util.rb +11 -2
- data/lib/scout/tsv.rb +23 -11
- data/lib/scout/workflow/definition.rb +24 -9
- data/lib/scout/workflow/deployment/orchestrator.rb +10 -7
- data/lib/scout/workflow/exceptions.rb +1 -0
- data/lib/scout/workflow/path.rb +40 -0
- data/lib/scout/workflow/step/dependencies.rb +36 -12
- data/lib/scout/workflow/step/file.rb +2 -1
- data/lib/scout/workflow/step/info.rb +20 -4
- data/lib/scout/workflow/step/load.rb +5 -3
- data/lib/scout/workflow/step/progress.rb +6 -0
- data/lib/scout/workflow/step/provenance.rb +1 -1
- data/lib/scout/workflow/step/status.rb +11 -4
- data/lib/scout/workflow/step.rb +33 -12
- data/lib/scout/workflow/task/dependencies.rb +33 -24
- data/lib/scout/workflow/task/inputs.rb +42 -12
- data/lib/scout/workflow/task.rb +22 -11
- data/lib/scout/workflow/usage.rb +3 -3
- data/lib/scout/workflow.rb +3 -0
- data/scout-gear.gemspec +13 -4
- data/scout_commands/db/query +83 -0
- data/scout_commands/db/register +43 -0
- data/scout_commands/db/show +47 -0
- data/test/scout/entity/test_named_array.rb +21 -0
- data/test/scout/persist/test_tsv.rb +20 -0
- data/test/scout/persist/tsv/adapter/test_base.rb +20 -0
- data/test/scout/test_tsv.rb +40 -0
- data/test/scout/tsv/test_dumper.rb +24 -0
- data/test/scout/tsv/test_path.rb +24 -0
- data/test/scout/tsv/test_stream.rb +93 -0
- data/test/scout/tsv/test_traverse.rb +99 -0
- data/test/scout/tsv/test_util.rb +2 -0
- data/test/scout/tsv/util/test_select.rb +22 -0
- data/test/scout/tsv/util/test_sort.rb +24 -0
- data/test/scout/workflow/step/test_dependencies.rb +26 -0
- data/test/scout/workflow/step/test_info.rb +35 -0
- data/test/scout/workflow/task/test_dependencies.rb +67 -1
- data/test/scout/workflow/task/test_inputs.rb +24 -7
- data/test/scout/workflow/test_definition.rb +31 -0
- data/test/scout/workflow/test_path.rb +9 -0
- data/test/scout/workflow/test_task.rb +36 -0
- data/test/scout/workflow/test_usage.rb +0 -1
- metadata +12 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1182999631b54e94f842c90017e1f97dde904ddafe47129cd80b51097714a1c4
|
4
|
+
data.tar.gz: ec736a5762fc2e58450a7955dffffe61eb73584f0bdc53bf3cd368e194ae7cbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66a5c6ab90c2aa047cfbd000731eaec3913994880adddc9ac2daac9802ee2ebef14a054c1e292200624a6411ac2244d5f915de8d3ed9113d1237edc3795ab027
|
7
|
+
data.tar.gz: 27a0e87ac1360420bffc87b34b01eb6f21631fdf3f6ef95983fb46152f6e0e390200518dd2bebcc95c880e74d6f6f8a98a76577be2fbad0aea7d1fbe9c0720b1
|
data/.vimproject
CHANGED
@@ -7,10 +7,12 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
7
7
|
scout=scout{
|
8
8
|
workflow.rb
|
9
9
|
workflow=workflow{
|
10
|
+
exceptions.rb
|
10
11
|
definition.rb
|
11
12
|
documentation.rb
|
12
13
|
usage.rb
|
13
14
|
util.rb
|
15
|
+
path.rb
|
14
16
|
step.rb
|
15
17
|
step=step{
|
16
18
|
config.rb
|
@@ -80,7 +82,6 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
80
82
|
sort.rb
|
81
83
|
unzip.rb
|
82
84
|
}
|
83
|
-
entity.rb
|
84
85
|
parser.rb
|
85
86
|
dumper.rb
|
86
87
|
transformer.rb
|
@@ -106,6 +107,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
106
107
|
object.rb
|
107
108
|
format.rb
|
108
109
|
identifiers.rb
|
110
|
+
named_array.rb
|
109
111
|
}
|
110
112
|
association.rb
|
111
113
|
association=association{
|
@@ -136,6 +138,11 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
136
138
|
update
|
137
139
|
template
|
138
140
|
offsite
|
141
|
+
db=db{
|
142
|
+
register
|
143
|
+
query
|
144
|
+
show
|
145
|
+
}
|
139
146
|
workflow=workflow{
|
140
147
|
task
|
141
148
|
list
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
10.7.
|
1
|
+
10.7.2
|
data/lib/scout/association.rb
CHANGED
@@ -6,17 +6,32 @@ require_relative 'association/item'
|
|
6
6
|
|
7
7
|
module Association
|
8
8
|
def self.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
|
9
|
+
|
10
|
+
|
11
|
+
if Path.is_filename?(obj)
|
12
|
+
options = TSV.parse_options(obj).merge(kwargs)
|
13
|
+
else
|
14
|
+
options = kwargs.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
if String === obj && options[:namespace] && obj.include?("NAMESPACE")
|
18
|
+
new_obj = obj.gsub(/\[?NAMESPACE\]?/, options[:namespace])
|
19
|
+
obj.annotate(new_obj)
|
20
|
+
obj = new_obj
|
21
|
+
end
|
22
|
+
|
9
23
|
all_fields = TSV.all_fields(obj)
|
10
|
-
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields,
|
24
|
+
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format))
|
11
25
|
|
12
26
|
original_source_header = all_fields[source_pos]
|
13
27
|
original_field_headers = all_fields.values_at(*field_pos)
|
14
28
|
original_target_header = all_fields[field_pos.first]
|
15
29
|
|
16
|
-
type, identifiers = IndiferentHash.process_options
|
30
|
+
type, identifiers = IndiferentHash.process_options options, :type, :identifiers
|
17
31
|
|
18
32
|
if source_format
|
19
33
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(source_format), identifiers].flatten.compact
|
34
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
20
35
|
source_index = begin
|
21
36
|
TSV.translation_index(translation_files, source_header, source_format)
|
22
37
|
rescue
|
@@ -26,6 +41,7 @@ module Association
|
|
26
41
|
|
27
42
|
if target_format
|
28
43
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(target_format), identifiers].flatten.compact
|
44
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
29
45
|
target_index = begin
|
30
46
|
TSV.translation_index(translation_files, field_headers.first, target_format)
|
31
47
|
rescue
|
@@ -67,10 +83,10 @@ module Association
|
|
67
83
|
if source_index.nil? && target_index.nil?
|
68
84
|
if TSV === obj
|
69
85
|
IndiferentHash.pull_keys kwargs, :persist
|
70
|
-
type =
|
71
|
-
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **
|
86
|
+
type = options[:type] || obj.type
|
87
|
+
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **options.merge(type: type, merge: true)
|
72
88
|
else
|
73
|
-
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **
|
89
|
+
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **options.merge(type: type))
|
74
90
|
end
|
75
91
|
res.key_field = final_key_field
|
76
92
|
res.fields = final_fields
|
data/lib/scout/entity/format.rb
CHANGED
@@ -17,16 +17,20 @@ module Entity
|
|
17
17
|
def find(value)
|
18
18
|
@find_cache ||= {}
|
19
19
|
|
20
|
-
@find_cache
|
20
|
+
if @find_cache.include?(value)
|
21
|
+
@find_cache[value]
|
22
|
+
else
|
23
|
+
@find_cache[value] = begin
|
21
24
|
if orig_include? value
|
22
|
-
|
25
|
+
value
|
23
26
|
else
|
27
|
+
value = value.to_s
|
24
28
|
found = nil
|
25
29
|
each do |k,v|
|
26
|
-
if value
|
30
|
+
if value == k.to_s
|
27
31
|
found = k
|
28
32
|
break
|
29
|
-
elsif value
|
33
|
+
elsif value =~ /\(#{Regexp.quote k.to_s}\)/
|
30
34
|
found = k
|
31
35
|
break
|
32
36
|
end
|
@@ -34,6 +38,7 @@ module Entity
|
|
34
38
|
found
|
35
39
|
end
|
36
40
|
end
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
def [](value)
|
@@ -91,6 +91,8 @@ module Entity
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
+
name = default if name.nil?
|
95
|
+
|
94
96
|
self.send(:include, Entity::Identified) unless Entity::Identified === self
|
95
97
|
|
96
98
|
self.format = all_fields
|
@@ -106,6 +108,4 @@ module Entity
|
|
106
108
|
@identifier_files << file
|
107
109
|
@identifier_files.uniq!
|
108
110
|
end
|
109
|
-
|
110
|
-
|
111
111
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'scout/named_array'
|
2
|
+
require 'scout/entity'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
|
6
|
+
def [](key)
|
7
|
+
pos = NamedArray.identify_name(@fields, key)
|
8
|
+
return nil if pos.nil?
|
9
|
+
v = super(pos)
|
10
|
+
field = @fields && Integer === key && ! @fields.include?(key) ? @fields[key] : key
|
11
|
+
Entity.prepare_entity(v, field)
|
12
|
+
end
|
13
|
+
end
|
@@ -20,7 +20,8 @@ module Entity
|
|
20
20
|
Persist.annotation_repo_persist(repo, [name, obj.id] * ":", &block)
|
21
21
|
else
|
22
22
|
|
23
|
-
|
23
|
+
_id = obj.nil? ? 'nil' : obj.id
|
24
|
+
Persist.persist([name, _id] * ":", type, options.dup, &block)
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
data/lib/scout/entity.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative 'entity/format'
|
|
3
3
|
require_relative 'entity/property'
|
4
4
|
require_relative 'entity/object'
|
5
5
|
require_relative 'entity/identifiers'
|
6
|
+
require_relative 'entity/named_array'
|
6
7
|
|
7
8
|
module Entity
|
8
9
|
def self.extended(base)
|
@@ -12,17 +13,19 @@ module Entity
|
|
12
13
|
base.instance_variable_set(:@persisted_methods, {})
|
13
14
|
base.include Entity::Object
|
14
15
|
base.include AnnotatedArray
|
16
|
+
base.format = base.to_s
|
15
17
|
base
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.prepare_entity(entity, field, options = {})
|
19
21
|
return entity unless defined? Entity
|
20
|
-
return entity unless String === entity or Array === entity
|
21
|
-
options ||= {}
|
22
|
-
|
23
|
-
dup_array = options.delete :dup_array
|
22
|
+
return entity unless String === entity or Array === entity or Numeric === entity
|
24
23
|
|
25
24
|
if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
|
25
|
+
options ||= {}
|
26
|
+
|
27
|
+
dup_array = options.delete :dup_array
|
28
|
+
|
26
29
|
params = options.dup
|
27
30
|
|
28
31
|
params[:format] ||= params.delete "format"
|
@@ -34,6 +37,8 @@ module Entity
|
|
34
37
|
entity = (entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity)
|
35
38
|
|
36
39
|
entity = mod.setup(entity, params)
|
40
|
+
|
41
|
+
entity.extend AnnotatedArray if Array === entity && ! options[:annotated_array] == FalseClass
|
37
42
|
end
|
38
43
|
|
39
44
|
entity
|
@@ -23,7 +23,9 @@ module TSVAdapter
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def save_annotation_hash
|
26
|
-
self.
|
26
|
+
self.with_write do
|
27
|
+
self.orig_set(ANNOTATION_ATTR_HASH_KEY, ANNOTATION_ATTR_HASH_SERIALIZER.dump(self.annotation_hash))
|
28
|
+
end
|
27
29
|
end
|
28
30
|
|
29
31
|
def self.extended(base)
|
@@ -163,6 +165,16 @@ module TSVAdapter
|
|
163
165
|
end
|
164
166
|
end
|
165
167
|
|
168
|
+
def with_write(*args, &block)
|
169
|
+
if @write
|
170
|
+
yield
|
171
|
+
elsif @closed
|
172
|
+
write_and_close &block
|
173
|
+
else
|
174
|
+
write_and_read &block
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
166
178
|
def close(*args)
|
167
179
|
begin
|
168
180
|
super(*args)
|
data/lib/scout/persist/tsv.rb
CHANGED
@@ -48,7 +48,8 @@ module Persist
|
|
48
48
|
|
49
49
|
def self.tsv(id, options = {}, engine: nil, persist_options: {})
|
50
50
|
engine ||= persist_options[:engine] || :HDB
|
51
|
-
|
51
|
+
persist_options[:other_options] = options unless persist_options.include?(:other_options)
|
52
|
+
Persist.persist(id, engine, persist_options) do |filename|
|
52
53
|
if filename.nil?
|
53
54
|
yield(persist_options[:data] || {})
|
54
55
|
else
|
data/lib/scout/tsv/attach.rb
CHANGED
@@ -92,6 +92,9 @@ module TSV
|
|
92
92
|
end
|
93
93
|
|
94
94
|
other_field_positions = other.identify_field(fields.dup)
|
95
|
+
fields.zip(other_field_positions) do |o,n|
|
96
|
+
raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil?
|
97
|
+
end
|
95
98
|
|
96
99
|
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
97
100
|
Log.debug log_message
|
@@ -252,8 +255,13 @@ module TSV
|
|
252
255
|
def self.identifier_files(obj)
|
253
256
|
if TSV === obj
|
254
257
|
obj.identifier_files
|
255
|
-
elsif Path
|
256
|
-
obj.
|
258
|
+
elsif Path.is_filename?(obj)
|
259
|
+
path = Path === obj ? obj : Path.setup(obj)
|
260
|
+
if obj.dirname.identifiers.exists?
|
261
|
+
obj.dirname.identifiers
|
262
|
+
else
|
263
|
+
[TSV.parse_options(obj)[:identifiers]]
|
264
|
+
end
|
257
265
|
else
|
258
266
|
nil
|
259
267
|
end
|
data/lib/scout/tsv/change_id.rb
CHANGED
@@ -6,6 +6,9 @@ module TSV
|
|
6
6
|
identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
|
7
7
|
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
8
8
|
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
9
|
+
if Array === identifiers
|
10
|
+
identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last
|
11
|
+
end
|
9
12
|
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
10
13
|
new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
|
11
14
|
return new
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -26,12 +26,13 @@ module TSV
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
attr_accessor :options, :initialized, :type, :sep, :filename, :namespace
|
29
|
+
attr_accessor :options, :initialized, :type, :sep, :compact, :filename, :namespace
|
30
30
|
def initialize(options = {})
|
31
31
|
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
32
32
|
@sep, @type = IndiferentHash.process_options options,
|
33
33
|
:sep, :type,
|
34
34
|
:sep => "\t", :type => :double
|
35
|
+
@compact = options[:compact]
|
35
36
|
@options = options
|
36
37
|
@options[:type] = @type
|
37
38
|
@sout, @sin = Open.pipe
|
@@ -39,8 +40,9 @@ module TSV
|
|
39
40
|
@initialized = false
|
40
41
|
@filename = options[:filename]
|
41
42
|
@mutex = Mutex.new
|
43
|
+
@namespace = options[:namespace]
|
42
44
|
ConcurrentStream.setup(@sin, pair: @sout)
|
43
|
-
ConcurrentStream.setup(@sout, pair: @sin)
|
45
|
+
ConcurrentStream.setup(@sout, pair: @sin, filename: filename)
|
44
46
|
end
|
45
47
|
|
46
48
|
def set_stream(stream)
|
@@ -96,11 +98,11 @@ module TSV
|
|
96
98
|
when :list, :flat
|
97
99
|
@sin << key + @sep + value * @sep << "\n"
|
98
100
|
when :double
|
99
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
101
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
100
102
|
else
|
101
103
|
if Array === value
|
102
104
|
if Array === value.first
|
103
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
105
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
104
106
|
else
|
105
107
|
@sin << key + @sep + value * @sep << "\n"
|
106
108
|
end
|
@@ -173,13 +175,35 @@ module TSV
|
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
+
self.with_unnamed do
|
179
|
+
if stream.nil?
|
180
|
+
t = Thread.new do
|
181
|
+
begin
|
182
|
+
Thread.current.report_on_exception = true
|
183
|
+
Thread.current["name"] = "Dumper thread"
|
184
|
+
dumper.init(preamble: preamble)
|
185
|
+
|
186
|
+
if keys
|
187
|
+
keys.each do |k|
|
188
|
+
dump_entry.call k, self[k]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
self.each &dump_entry
|
192
|
+
end
|
193
|
+
|
194
|
+
dumper.close
|
195
|
+
rescue
|
196
|
+
dumper.abort($!)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
stream = dumper.stream
|
201
|
+
ConcurrentStream.setup(stream, :threads => [t])
|
202
|
+
stream
|
203
|
+
else
|
204
|
+
dumper.set_stream stream
|
178
205
|
begin
|
179
|
-
Thread.current.report_on_exception = true
|
180
|
-
Thread.current["name"] = "Dumper thread"
|
181
206
|
dumper.init(preamble: preamble)
|
182
|
-
|
183
207
|
if keys
|
184
208
|
keys.each do |k|
|
185
209
|
dump_entry.call k, self[k]
|
@@ -192,28 +216,8 @@ module TSV
|
|
192
216
|
rescue
|
193
217
|
dumper.abort($!)
|
194
218
|
end
|
219
|
+
stream
|
195
220
|
end
|
196
|
-
Thread.pass until t["name"]
|
197
|
-
stream = dumper.stream
|
198
|
-
ConcurrentStream.setup(stream, :threads => [t])
|
199
|
-
stream
|
200
|
-
else
|
201
|
-
dumper.set_stream stream
|
202
|
-
begin
|
203
|
-
dumper.init(preamble: preamble)
|
204
|
-
if keys
|
205
|
-
keys.each do |k|
|
206
|
-
dump_entry.call k, self[k]
|
207
|
-
end
|
208
|
-
else
|
209
|
-
self.each &dump_entry
|
210
|
-
end
|
211
|
-
|
212
|
-
dumper.close
|
213
|
-
rescue
|
214
|
-
dumper.abort($!)
|
215
|
-
end
|
216
|
-
stream
|
217
221
|
end
|
218
222
|
end
|
219
223
|
|
data/lib/scout/tsv/open.rb
CHANGED
data/lib/scout/tsv/parser.rb
CHANGED
@@ -7,7 +7,11 @@ module TSV
|
|
7
7
|
if Proc === cast
|
8
8
|
cast.call value
|
9
9
|
else
|
10
|
-
value.
|
10
|
+
if value.nil? || value.empty?
|
11
|
+
nil
|
12
|
+
else
|
13
|
+
value.send(cast)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -94,6 +98,8 @@ module TSV
|
|
94
98
|
line.chomp!
|
95
99
|
if Proc === fix
|
96
100
|
line = fix.call line
|
101
|
+
break if (FalseClass === line) || :break == line
|
102
|
+
next if line.nil?
|
97
103
|
elsif fix
|
98
104
|
line = Misc.fixutf8(line)
|
99
105
|
end
|
@@ -175,14 +181,14 @@ module TSV
|
|
175
181
|
current = data[key]
|
176
182
|
if merge == :concat
|
177
183
|
these_items.each_with_index do |new,i|
|
178
|
-
new = [nil] if new.empty?
|
184
|
+
new = one2one ? [nil] : [] if new.empty?
|
179
185
|
current[i].concat(new)
|
180
186
|
end
|
181
187
|
else
|
182
188
|
merged = []
|
183
189
|
these_items.each_with_index do |new,i|
|
184
|
-
new = [nil] if new.empty?
|
185
|
-
merged[i] = current[i] + new
|
190
|
+
new = one2one ? [nil] : [] if new.empty?
|
191
|
+
merged[i] = (current[i] || []) + new
|
186
192
|
end
|
187
193
|
data[key] = merged
|
188
194
|
end
|
@@ -290,7 +296,8 @@ module TSV
|
|
290
296
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
291
297
|
|
292
298
|
all_fields = [key_field] + fields if key_field && fields
|
293
|
-
|
299
|
+
namespace = opts[:namespace]
|
300
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace))
|
294
301
|
rescue Exception
|
295
302
|
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
296
303
|
stream.abort($!) if stream.respond_to?(:abort)
|
@@ -298,6 +305,10 @@ module TSV
|
|
298
305
|
end
|
299
306
|
end
|
300
307
|
|
308
|
+
def self.parse_options(...)
|
309
|
+
parse_header(...)[:options]
|
310
|
+
end
|
311
|
+
|
301
312
|
KEY_PARAMETERS = begin
|
302
313
|
params = []
|
303
314
|
(method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
|
@@ -352,6 +363,7 @@ module TSV
|
|
352
363
|
all_field_names ||= [@key_field] + @fields
|
353
364
|
fields = all_field_names if fields == :all
|
354
365
|
positions = NamedArray.identify_name(all_field_names, fields)
|
366
|
+
|
355
367
|
raise "Not all fields (#{Log.fingerprint fields}) identified in #{Log.fingerprint all_field_names}" if positions.include?(nil)
|
356
368
|
kwargs[:positions] = positions
|
357
369
|
field_names = all_field_names.values_at *positions
|
@@ -407,7 +419,7 @@ module TSV
|
|
407
419
|
end
|
408
420
|
|
409
421
|
if data
|
410
|
-
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
422
|
+
TSV.setup(data, @source_options.merge(:key_field => key_field_name, :fields => field_names, :type => @type))
|
411
423
|
else
|
412
424
|
[key_field || self.key_field, fields || self.fields]
|
413
425
|
end
|
@@ -434,7 +446,7 @@ module TSV
|
|
434
446
|
end
|
435
447
|
end
|
436
448
|
|
437
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed:
|
449
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block)
|
438
450
|
parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep)
|
439
451
|
|
440
452
|
cast = kwargs[:cast]
|
@@ -471,9 +483,9 @@ module TSV
|
|
471
483
|
data = parser.traverse **kwargs, &block
|
472
484
|
data.type = type
|
473
485
|
data.cast = cast
|
474
|
-
data.filename = filename || parser.options[:filename]
|
475
|
-
data.namespace = namespace || parser.options[:namespace]
|
476
|
-
data.identifiers = identifiers
|
486
|
+
data.filename = filename || parser.options[:filename] if data.filename.nil?
|
487
|
+
data.namespace = namespace || parser.options[:namespace] if data.namespace.nil?
|
488
|
+
data.identifiers = identifiers || parser.options[:identifiers] if data.identifiers.nil?
|
477
489
|
data.unnamed = unnamed
|
478
490
|
data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
|
479
491
|
data
|
data/lib/scout/tsv/path.rb
CHANGED
data/lib/scout/tsv/stream.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module TSV
|
2
|
-
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
3
|
+
sep = "\t" if sep.nil?
|
3
4
|
|
4
5
|
streams = streams.collect do |stream|
|
5
6
|
case stream
|
@@ -37,7 +38,8 @@ module TSV
|
|
37
38
|
|
38
39
|
streams = streams.collect do |stream|
|
39
40
|
|
40
|
-
parser = TSV::Parser.new stream,
|
41
|
+
parser = TSV::Parser.new stream, sep: sep
|
42
|
+
#parser.type = type
|
41
43
|
|
42
44
|
sfields = parser.fields
|
43
45
|
|
@@ -102,8 +104,9 @@ module TSV
|
|
102
104
|
keys[i]= nil
|
103
105
|
parts[i]= nil
|
104
106
|
else
|
105
|
-
vs = line.
|
107
|
+
vs = line.split(sep, -1)
|
106
108
|
key, *p = vs
|
109
|
+
p = [p] if parser_types[i] == :flat
|
107
110
|
keys[i]= key
|
108
111
|
parts[i]= p
|
109
112
|
end
|
@@ -112,7 +115,7 @@ module TSV
|
|
112
115
|
done_streams =[]
|
113
116
|
|
114
117
|
fields = nil if fields && fields.empty?
|
115
|
-
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
118
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one
|
116
119
|
dumper.init(preamble: preamble_txt || !!key_field)
|
117
120
|
|
118
121
|
t = Thread.new do
|
@@ -130,7 +133,7 @@ module TSV
|
|
130
133
|
keys.each_with_index do |key,i|
|
131
134
|
case key
|
132
135
|
when min
|
133
|
-
|
136
|
+
new_parts = parts[i]
|
134
137
|
|
135
138
|
begin
|
136
139
|
line = lines[i]= begin
|
@@ -145,18 +148,20 @@ module TSV
|
|
145
148
|
else
|
146
149
|
k, *p = line.chomp.split(sep, -1)
|
147
150
|
p = p.collect{|e| e.nil? ? "" : e }
|
151
|
+
p = [p] if parser_types[i] == :flat
|
148
152
|
|
149
153
|
if k == keys[i]
|
150
|
-
|
154
|
+
new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] }
|
151
155
|
raise TryAgain
|
152
156
|
end
|
153
157
|
keys[i]= k
|
154
158
|
parts[i]= p
|
155
159
|
end
|
160
|
+
|
161
|
+
new_values << new_parts
|
156
162
|
rescue TryAgain
|
157
163
|
keys[i]= nil
|
158
164
|
parts[i]= nil
|
159
|
-
Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
|
160
165
|
retry
|
161
166
|
end
|
162
167
|
else
|
@@ -180,6 +185,8 @@ module TSV
|
|
180
185
|
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
181
186
|
end
|
182
187
|
|
188
|
+
new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one
|
189
|
+
|
183
190
|
dumper.add min, new_values
|
184
191
|
end
|
185
192
|
|
@@ -187,7 +194,7 @@ module TSV
|
|
187
194
|
|
188
195
|
streams.each do |stream|
|
189
196
|
stream.close if stream.respond_to?(:close) && ! stream.closed?
|
190
|
-
stream.join if stream.respond_to?
|
197
|
+
stream.join if stream.respond_to?(:join)
|
191
198
|
end
|
192
199
|
end
|
193
200
|
rescue Aborted
|
data/lib/scout/tsv/traverse.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'parser'
|
2
2
|
module TSV
|
3
|
-
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed:
|
3
|
+
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
|
4
4
|
key_field = key_field_pos if key_field.nil?
|
5
5
|
fields = fields_pos.dup if fields.nil?
|
6
6
|
type = @type if type.nil?
|
@@ -9,6 +9,8 @@ module TSV
|
|
9
9
|
fields = [fields] unless fields.nil? || Array === fields
|
10
10
|
positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
|
11
11
|
positions = nil if fields == self.fields
|
12
|
+
unnamed = @unnamed if unnamed.nil?
|
13
|
+
unnamed = false if unnamed.nil?
|
12
14
|
|
13
15
|
if key_pos == :key
|
14
16
|
key_name = @key_field
|
@@ -61,7 +63,6 @@ module TSV
|
|
61
63
|
key = @type == :flat ? values : values[key_pos] if key_pos != :key
|
62
64
|
|
63
65
|
values = values.values_at(*positions)
|
64
|
-
NamedArray.setup(values, fields)
|
65
66
|
if key_index
|
66
67
|
if @type == :double
|
67
68
|
values.insert key_index, [orig_key]
|
@@ -71,6 +72,15 @@ module TSV
|
|
71
72
|
end
|
72
73
|
end
|
73
74
|
|
75
|
+
if ! unnamed && fields
|
76
|
+
case @type
|
77
|
+
when :flat, :single
|
78
|
+
values = Entity.prepare_entity(values, fields.first)
|
79
|
+
else
|
80
|
+
values = NamedArray.setup(values, fields, entity_options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
values = TSV.cast_value(values, cast) if cast
|
75
85
|
|
76
86
|
if Array === key
|