scout-gear 10.7.0 → 10.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +8 -1
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +1 -1
- data/lib/scout/association.rb +21 -5
- data/lib/scout/entity/format.rb +9 -4
- data/lib/scout/entity/identifiers.rb +2 -2
- data/lib/scout/entity/named_array.rb +13 -0
- data/lib/scout/entity/property.rb +2 -1
- data/lib/scout/entity.rb +9 -4
- data/lib/scout/persist/tsv/adapter/base.rb +13 -1
- data/lib/scout/persist/tsv.rb +2 -1
- data/lib/scout/tsv/attach.rb +10 -2
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/dumper.rb +34 -30
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +22 -10
- data/lib/scout/tsv/path.rb +8 -0
- data/lib/scout/tsv/stream.rb +15 -8
- data/lib/scout/tsv/traverse.rb +12 -2
- data/lib/scout/tsv/util/process.rb +4 -1
- data/lib/scout/tsv/util/select.rb +8 -2
- data/lib/scout/tsv/util/sort.rb +23 -15
- data/lib/scout/tsv/util.rb +11 -2
- data/lib/scout/tsv.rb +23 -11
- data/lib/scout/workflow/definition.rb +24 -9
- data/lib/scout/workflow/deployment/orchestrator.rb +10 -7
- data/lib/scout/workflow/exceptions.rb +1 -0
- data/lib/scout/workflow/path.rb +40 -0
- data/lib/scout/workflow/step/dependencies.rb +36 -12
- data/lib/scout/workflow/step/file.rb +2 -1
- data/lib/scout/workflow/step/info.rb +20 -4
- data/lib/scout/workflow/step/load.rb +5 -3
- data/lib/scout/workflow/step/progress.rb +6 -0
- data/lib/scout/workflow/step/provenance.rb +1 -1
- data/lib/scout/workflow/step/status.rb +11 -4
- data/lib/scout/workflow/step.rb +33 -12
- data/lib/scout/workflow/task/dependencies.rb +33 -24
- data/lib/scout/workflow/task/inputs.rb +42 -12
- data/lib/scout/workflow/task.rb +22 -11
- data/lib/scout/workflow/usage.rb +3 -3
- data/lib/scout/workflow.rb +3 -0
- data/scout-gear.gemspec +13 -4
- data/scout_commands/db/query +83 -0
- data/scout_commands/db/register +43 -0
- data/scout_commands/db/show +47 -0
- data/test/scout/entity/test_named_array.rb +21 -0
- data/test/scout/persist/test_tsv.rb +20 -0
- data/test/scout/persist/tsv/adapter/test_base.rb +20 -0
- data/test/scout/test_tsv.rb +40 -0
- data/test/scout/tsv/test_dumper.rb +24 -0
- data/test/scout/tsv/test_path.rb +24 -0
- data/test/scout/tsv/test_stream.rb +93 -0
- data/test/scout/tsv/test_traverse.rb +99 -0
- data/test/scout/tsv/test_util.rb +2 -0
- data/test/scout/tsv/util/test_select.rb +22 -0
- data/test/scout/tsv/util/test_sort.rb +24 -0
- data/test/scout/workflow/step/test_dependencies.rb +26 -0
- data/test/scout/workflow/step/test_info.rb +35 -0
- data/test/scout/workflow/task/test_dependencies.rb +67 -1
- data/test/scout/workflow/task/test_inputs.rb +24 -7
- data/test/scout/workflow/test_definition.rb +31 -0
- data/test/scout/workflow/test_path.rb +9 -0
- data/test/scout/workflow/test_task.rb +36 -0
- data/test/scout/workflow/test_usage.rb +0 -1
- metadata +12 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1182999631b54e94f842c90017e1f97dde904ddafe47129cd80b51097714a1c4
|
4
|
+
data.tar.gz: ec736a5762fc2e58450a7955dffffe61eb73584f0bdc53bf3cd368e194ae7cbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66a5c6ab90c2aa047cfbd000731eaec3913994880adddc9ac2daac9802ee2ebef14a054c1e292200624a6411ac2244d5f915de8d3ed9113d1237edc3795ab027
|
7
|
+
data.tar.gz: 27a0e87ac1360420bffc87b34b01eb6f21631fdf3f6ef95983fb46152f6e0e390200518dd2bebcc95c880e74d6f6f8a98a76577be2fbad0aea7d1fbe9c0720b1
|
data/.vimproject
CHANGED
@@ -7,10 +7,12 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
7
7
|
scout=scout{
|
8
8
|
workflow.rb
|
9
9
|
workflow=workflow{
|
10
|
+
exceptions.rb
|
10
11
|
definition.rb
|
11
12
|
documentation.rb
|
12
13
|
usage.rb
|
13
14
|
util.rb
|
15
|
+
path.rb
|
14
16
|
step.rb
|
15
17
|
step=step{
|
16
18
|
config.rb
|
@@ -80,7 +82,6 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
80
82
|
sort.rb
|
81
83
|
unzip.rb
|
82
84
|
}
|
83
|
-
entity.rb
|
84
85
|
parser.rb
|
85
86
|
dumper.rb
|
86
87
|
transformer.rb
|
@@ -106,6 +107,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
106
107
|
object.rb
|
107
108
|
format.rb
|
108
109
|
identifiers.rb
|
110
|
+
named_array.rb
|
109
111
|
}
|
110
112
|
association.rb
|
111
113
|
association=association{
|
@@ -136,6 +138,11 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
136
138
|
update
|
137
139
|
template
|
138
140
|
offsite
|
141
|
+
db=db{
|
142
|
+
register
|
143
|
+
query
|
144
|
+
show
|
145
|
+
}
|
139
146
|
workflow=workflow{
|
140
147
|
task
|
141
148
|
list
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
10.7.
|
1
|
+
10.7.2
|
data/lib/scout/association.rb
CHANGED
@@ -6,17 +6,32 @@ require_relative 'association/item'
|
|
6
6
|
|
7
7
|
module Association
|
8
8
|
def self.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
|
9
|
+
|
10
|
+
|
11
|
+
if Path.is_filename?(obj)
|
12
|
+
options = TSV.parse_options(obj).merge(kwargs)
|
13
|
+
else
|
14
|
+
options = kwargs.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
if String === obj && options[:namespace] && obj.include?("NAMESPACE")
|
18
|
+
new_obj = obj.gsub(/\[?NAMESPACE\]?/, options[:namespace])
|
19
|
+
obj.annotate(new_obj)
|
20
|
+
obj = new_obj
|
21
|
+
end
|
22
|
+
|
9
23
|
all_fields = TSV.all_fields(obj)
|
10
|
-
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields,
|
24
|
+
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format))
|
11
25
|
|
12
26
|
original_source_header = all_fields[source_pos]
|
13
27
|
original_field_headers = all_fields.values_at(*field_pos)
|
14
28
|
original_target_header = all_fields[field_pos.first]
|
15
29
|
|
16
|
-
type, identifiers = IndiferentHash.process_options
|
30
|
+
type, identifiers = IndiferentHash.process_options options, :type, :identifiers
|
17
31
|
|
18
32
|
if source_format
|
19
33
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(source_format), identifiers].flatten.compact
|
34
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
20
35
|
source_index = begin
|
21
36
|
TSV.translation_index(translation_files, source_header, source_format)
|
22
37
|
rescue
|
@@ -26,6 +41,7 @@ module Association
|
|
26
41
|
|
27
42
|
if target_format
|
28
43
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(target_format), identifiers].flatten.compact
|
44
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
29
45
|
target_index = begin
|
30
46
|
TSV.translation_index(translation_files, field_headers.first, target_format)
|
31
47
|
rescue
|
@@ -67,10 +83,10 @@ module Association
|
|
67
83
|
if source_index.nil? && target_index.nil?
|
68
84
|
if TSV === obj
|
69
85
|
IndiferentHash.pull_keys kwargs, :persist
|
70
|
-
type =
|
71
|
-
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **
|
86
|
+
type = options[:type] || obj.type
|
87
|
+
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **options.merge(type: type, merge: true)
|
72
88
|
else
|
73
|
-
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **
|
89
|
+
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **options.merge(type: type))
|
74
90
|
end
|
75
91
|
res.key_field = final_key_field
|
76
92
|
res.fields = final_fields
|
data/lib/scout/entity/format.rb
CHANGED
@@ -17,16 +17,20 @@ module Entity
|
|
17
17
|
def find(value)
|
18
18
|
@find_cache ||= {}
|
19
19
|
|
20
|
-
@find_cache
|
20
|
+
if @find_cache.include?(value)
|
21
|
+
@find_cache[value]
|
22
|
+
else
|
23
|
+
@find_cache[value] = begin
|
21
24
|
if orig_include? value
|
22
|
-
|
25
|
+
value
|
23
26
|
else
|
27
|
+
value = value.to_s
|
24
28
|
found = nil
|
25
29
|
each do |k,v|
|
26
|
-
if value
|
30
|
+
if value == k.to_s
|
27
31
|
found = k
|
28
32
|
break
|
29
|
-
elsif value
|
33
|
+
elsif value =~ /\(#{Regexp.quote k.to_s}\)/
|
30
34
|
found = k
|
31
35
|
break
|
32
36
|
end
|
@@ -34,6 +38,7 @@ module Entity
|
|
34
38
|
found
|
35
39
|
end
|
36
40
|
end
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
def [](value)
|
@@ -91,6 +91,8 @@ module Entity
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
+
name = default if name.nil?
|
95
|
+
|
94
96
|
self.send(:include, Entity::Identified) unless Entity::Identified === self
|
95
97
|
|
96
98
|
self.format = all_fields
|
@@ -106,6 +108,4 @@ module Entity
|
|
106
108
|
@identifier_files << file
|
107
109
|
@identifier_files.uniq!
|
108
110
|
end
|
109
|
-
|
110
|
-
|
111
111
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'scout/named_array'
|
2
|
+
require 'scout/entity'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
|
6
|
+
def [](key)
|
7
|
+
pos = NamedArray.identify_name(@fields, key)
|
8
|
+
return nil if pos.nil?
|
9
|
+
v = super(pos)
|
10
|
+
field = @fields && Integer === key && ! @fields.include?(key) ? @fields[key] : key
|
11
|
+
Entity.prepare_entity(v, field)
|
12
|
+
end
|
13
|
+
end
|
@@ -20,7 +20,8 @@ module Entity
|
|
20
20
|
Persist.annotation_repo_persist(repo, [name, obj.id] * ":", &block)
|
21
21
|
else
|
22
22
|
|
23
|
-
|
23
|
+
_id = obj.nil? ? 'nil' : obj.id
|
24
|
+
Persist.persist([name, _id] * ":", type, options.dup, &block)
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
data/lib/scout/entity.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative 'entity/format'
|
|
3
3
|
require_relative 'entity/property'
|
4
4
|
require_relative 'entity/object'
|
5
5
|
require_relative 'entity/identifiers'
|
6
|
+
require_relative 'entity/named_array'
|
6
7
|
|
7
8
|
module Entity
|
8
9
|
def self.extended(base)
|
@@ -12,17 +13,19 @@ module Entity
|
|
12
13
|
base.instance_variable_set(:@persisted_methods, {})
|
13
14
|
base.include Entity::Object
|
14
15
|
base.include AnnotatedArray
|
16
|
+
base.format = base.to_s
|
15
17
|
base
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.prepare_entity(entity, field, options = {})
|
19
21
|
return entity unless defined? Entity
|
20
|
-
return entity unless String === entity or Array === entity
|
21
|
-
options ||= {}
|
22
|
-
|
23
|
-
dup_array = options.delete :dup_array
|
22
|
+
return entity unless String === entity or Array === entity or Numeric === entity
|
24
23
|
|
25
24
|
if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
|
25
|
+
options ||= {}
|
26
|
+
|
27
|
+
dup_array = options.delete :dup_array
|
28
|
+
|
26
29
|
params = options.dup
|
27
30
|
|
28
31
|
params[:format] ||= params.delete "format"
|
@@ -34,6 +37,8 @@ module Entity
|
|
34
37
|
entity = (entity.frozen? and not entity.nil?) ? entity.dup : ((Array === entity and dup_array) ? entity.collect{|e| e.nil? ? e : e.dup} : entity)
|
35
38
|
|
36
39
|
entity = mod.setup(entity, params)
|
40
|
+
|
41
|
+
entity.extend AnnotatedArray if Array === entity && ! options[:annotated_array] == FalseClass
|
37
42
|
end
|
38
43
|
|
39
44
|
entity
|
@@ -23,7 +23,9 @@ module TSVAdapter
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def save_annotation_hash
|
26
|
-
self.
|
26
|
+
self.with_write do
|
27
|
+
self.orig_set(ANNOTATION_ATTR_HASH_KEY, ANNOTATION_ATTR_HASH_SERIALIZER.dump(self.annotation_hash))
|
28
|
+
end
|
27
29
|
end
|
28
30
|
|
29
31
|
def self.extended(base)
|
@@ -163,6 +165,16 @@ module TSVAdapter
|
|
163
165
|
end
|
164
166
|
end
|
165
167
|
|
168
|
+
def with_write(*args, &block)
|
169
|
+
if @write
|
170
|
+
yield
|
171
|
+
elsif @closed
|
172
|
+
write_and_close &block
|
173
|
+
else
|
174
|
+
write_and_read &block
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
166
178
|
def close(*args)
|
167
179
|
begin
|
168
180
|
super(*args)
|
data/lib/scout/persist/tsv.rb
CHANGED
@@ -48,7 +48,8 @@ module Persist
|
|
48
48
|
|
49
49
|
def self.tsv(id, options = {}, engine: nil, persist_options: {})
|
50
50
|
engine ||= persist_options[:engine] || :HDB
|
51
|
-
|
51
|
+
persist_options[:other_options] = options unless persist_options.include?(:other_options)
|
52
|
+
Persist.persist(id, engine, persist_options) do |filename|
|
52
53
|
if filename.nil?
|
53
54
|
yield(persist_options[:data] || {})
|
54
55
|
else
|
data/lib/scout/tsv/attach.rb
CHANGED
@@ -92,6 +92,9 @@ module TSV
|
|
92
92
|
end
|
93
93
|
|
94
94
|
other_field_positions = other.identify_field(fields.dup)
|
95
|
+
fields.zip(other_field_positions) do |o,n|
|
96
|
+
raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil?
|
97
|
+
end
|
95
98
|
|
96
99
|
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
97
100
|
Log.debug log_message
|
@@ -252,8 +255,13 @@ module TSV
|
|
252
255
|
def self.identifier_files(obj)
|
253
256
|
if TSV === obj
|
254
257
|
obj.identifier_files
|
255
|
-
elsif Path
|
256
|
-
obj.
|
258
|
+
elsif Path.is_filename?(obj)
|
259
|
+
path = Path === obj ? obj : Path.setup(obj)
|
260
|
+
if obj.dirname.identifiers.exists?
|
261
|
+
obj.dirname.identifiers
|
262
|
+
else
|
263
|
+
[TSV.parse_options(obj)[:identifiers]]
|
264
|
+
end
|
257
265
|
else
|
258
266
|
nil
|
259
267
|
end
|
data/lib/scout/tsv/change_id.rb
CHANGED
@@ -6,6 +6,9 @@ module TSV
|
|
6
6
|
identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
|
7
7
|
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
8
8
|
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
9
|
+
if Array === identifiers
|
10
|
+
identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last
|
11
|
+
end
|
9
12
|
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
10
13
|
new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
|
11
14
|
return new
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -26,12 +26,13 @@ module TSV
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
attr_accessor :options, :initialized, :type, :sep, :filename, :namespace
|
29
|
+
attr_accessor :options, :initialized, :type, :sep, :compact, :filename, :namespace
|
30
30
|
def initialize(options = {})
|
31
31
|
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
32
32
|
@sep, @type = IndiferentHash.process_options options,
|
33
33
|
:sep, :type,
|
34
34
|
:sep => "\t", :type => :double
|
35
|
+
@compact = options[:compact]
|
35
36
|
@options = options
|
36
37
|
@options[:type] = @type
|
37
38
|
@sout, @sin = Open.pipe
|
@@ -39,8 +40,9 @@ module TSV
|
|
39
40
|
@initialized = false
|
40
41
|
@filename = options[:filename]
|
41
42
|
@mutex = Mutex.new
|
43
|
+
@namespace = options[:namespace]
|
42
44
|
ConcurrentStream.setup(@sin, pair: @sout)
|
43
|
-
ConcurrentStream.setup(@sout, pair: @sin)
|
45
|
+
ConcurrentStream.setup(@sout, pair: @sin, filename: filename)
|
44
46
|
end
|
45
47
|
|
46
48
|
def set_stream(stream)
|
@@ -96,11 +98,11 @@ module TSV
|
|
96
98
|
when :list, :flat
|
97
99
|
@sin << key + @sep + value * @sep << "\n"
|
98
100
|
when :double
|
99
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
101
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
100
102
|
else
|
101
103
|
if Array === value
|
102
104
|
if Array === value.first
|
103
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
105
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
104
106
|
else
|
105
107
|
@sin << key + @sep + value * @sep << "\n"
|
106
108
|
end
|
@@ -173,13 +175,35 @@ module TSV
|
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
+
self.with_unnamed do
|
179
|
+
if stream.nil?
|
180
|
+
t = Thread.new do
|
181
|
+
begin
|
182
|
+
Thread.current.report_on_exception = true
|
183
|
+
Thread.current["name"] = "Dumper thread"
|
184
|
+
dumper.init(preamble: preamble)
|
185
|
+
|
186
|
+
if keys
|
187
|
+
keys.each do |k|
|
188
|
+
dump_entry.call k, self[k]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
self.each &dump_entry
|
192
|
+
end
|
193
|
+
|
194
|
+
dumper.close
|
195
|
+
rescue
|
196
|
+
dumper.abort($!)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
stream = dumper.stream
|
201
|
+
ConcurrentStream.setup(stream, :threads => [t])
|
202
|
+
stream
|
203
|
+
else
|
204
|
+
dumper.set_stream stream
|
178
205
|
begin
|
179
|
-
Thread.current.report_on_exception = true
|
180
|
-
Thread.current["name"] = "Dumper thread"
|
181
206
|
dumper.init(preamble: preamble)
|
182
|
-
|
183
207
|
if keys
|
184
208
|
keys.each do |k|
|
185
209
|
dump_entry.call k, self[k]
|
@@ -192,28 +216,8 @@ module TSV
|
|
192
216
|
rescue
|
193
217
|
dumper.abort($!)
|
194
218
|
end
|
219
|
+
stream
|
195
220
|
end
|
196
|
-
Thread.pass until t["name"]
|
197
|
-
stream = dumper.stream
|
198
|
-
ConcurrentStream.setup(stream, :threads => [t])
|
199
|
-
stream
|
200
|
-
else
|
201
|
-
dumper.set_stream stream
|
202
|
-
begin
|
203
|
-
dumper.init(preamble: preamble)
|
204
|
-
if keys
|
205
|
-
keys.each do |k|
|
206
|
-
dump_entry.call k, self[k]
|
207
|
-
end
|
208
|
-
else
|
209
|
-
self.each &dump_entry
|
210
|
-
end
|
211
|
-
|
212
|
-
dumper.close
|
213
|
-
rescue
|
214
|
-
dumper.abort($!)
|
215
|
-
end
|
216
|
-
stream
|
217
221
|
end
|
218
222
|
end
|
219
223
|
|
data/lib/scout/tsv/open.rb
CHANGED
data/lib/scout/tsv/parser.rb
CHANGED
@@ -7,7 +7,11 @@ module TSV
|
|
7
7
|
if Proc === cast
|
8
8
|
cast.call value
|
9
9
|
else
|
10
|
-
value.
|
10
|
+
if value.nil? || value.empty?
|
11
|
+
nil
|
12
|
+
else
|
13
|
+
value.send(cast)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -94,6 +98,8 @@ module TSV
|
|
94
98
|
line.chomp!
|
95
99
|
if Proc === fix
|
96
100
|
line = fix.call line
|
101
|
+
break if (FalseClass === line) || :break == line
|
102
|
+
next if line.nil?
|
97
103
|
elsif fix
|
98
104
|
line = Misc.fixutf8(line)
|
99
105
|
end
|
@@ -175,14 +181,14 @@ module TSV
|
|
175
181
|
current = data[key]
|
176
182
|
if merge == :concat
|
177
183
|
these_items.each_with_index do |new,i|
|
178
|
-
new = [nil] if new.empty?
|
184
|
+
new = one2one ? [nil] : [] if new.empty?
|
179
185
|
current[i].concat(new)
|
180
186
|
end
|
181
187
|
else
|
182
188
|
merged = []
|
183
189
|
these_items.each_with_index do |new,i|
|
184
|
-
new = [nil] if new.empty?
|
185
|
-
merged[i] = current[i] + new
|
190
|
+
new = one2one ? [nil] : [] if new.empty?
|
191
|
+
merged[i] = (current[i] || []) + new
|
186
192
|
end
|
187
193
|
data[key] = merged
|
188
194
|
end
|
@@ -290,7 +296,8 @@ module TSV
|
|
290
296
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
291
297
|
|
292
298
|
all_fields = [key_field] + fields if key_field && fields
|
293
|
-
|
299
|
+
namespace = opts[:namespace]
|
300
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace))
|
294
301
|
rescue Exception
|
295
302
|
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
296
303
|
stream.abort($!) if stream.respond_to?(:abort)
|
@@ -298,6 +305,10 @@ module TSV
|
|
298
305
|
end
|
299
306
|
end
|
300
307
|
|
308
|
+
def self.parse_options(...)
|
309
|
+
parse_header(...)[:options]
|
310
|
+
end
|
311
|
+
|
301
312
|
KEY_PARAMETERS = begin
|
302
313
|
params = []
|
303
314
|
(method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
|
@@ -352,6 +363,7 @@ module TSV
|
|
352
363
|
all_field_names ||= [@key_field] + @fields
|
353
364
|
fields = all_field_names if fields == :all
|
354
365
|
positions = NamedArray.identify_name(all_field_names, fields)
|
366
|
+
|
355
367
|
raise "Not all fields (#{Log.fingerprint fields}) identified in #{Log.fingerprint all_field_names}" if positions.include?(nil)
|
356
368
|
kwargs[:positions] = positions
|
357
369
|
field_names = all_field_names.values_at *positions
|
@@ -407,7 +419,7 @@ module TSV
|
|
407
419
|
end
|
408
420
|
|
409
421
|
if data
|
410
|
-
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
422
|
+
TSV.setup(data, @source_options.merge(:key_field => key_field_name, :fields => field_names, :type => @type))
|
411
423
|
else
|
412
424
|
[key_field || self.key_field, fields || self.fields]
|
413
425
|
end
|
@@ -434,7 +446,7 @@ module TSV
|
|
434
446
|
end
|
435
447
|
end
|
436
448
|
|
437
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed:
|
449
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block)
|
438
450
|
parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep)
|
439
451
|
|
440
452
|
cast = kwargs[:cast]
|
@@ -471,9 +483,9 @@ module TSV
|
|
471
483
|
data = parser.traverse **kwargs, &block
|
472
484
|
data.type = type
|
473
485
|
data.cast = cast
|
474
|
-
data.filename = filename || parser.options[:filename]
|
475
|
-
data.namespace = namespace || parser.options[:namespace]
|
476
|
-
data.identifiers = identifiers
|
486
|
+
data.filename = filename || parser.options[:filename] if data.filename.nil?
|
487
|
+
data.namespace = namespace || parser.options[:namespace] if data.namespace.nil?
|
488
|
+
data.identifiers = identifiers || parser.options[:identifiers] if data.identifiers.nil?
|
477
489
|
data.unnamed = unnamed
|
478
490
|
data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
|
479
491
|
data
|
data/lib/scout/tsv/path.rb
CHANGED
data/lib/scout/tsv/stream.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module TSV
|
2
|
-
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
3
|
+
sep = "\t" if sep.nil?
|
3
4
|
|
4
5
|
streams = streams.collect do |stream|
|
5
6
|
case stream
|
@@ -37,7 +38,8 @@ module TSV
|
|
37
38
|
|
38
39
|
streams = streams.collect do |stream|
|
39
40
|
|
40
|
-
parser = TSV::Parser.new stream,
|
41
|
+
parser = TSV::Parser.new stream, sep: sep
|
42
|
+
#parser.type = type
|
41
43
|
|
42
44
|
sfields = parser.fields
|
43
45
|
|
@@ -102,8 +104,9 @@ module TSV
|
|
102
104
|
keys[i]= nil
|
103
105
|
parts[i]= nil
|
104
106
|
else
|
105
|
-
vs = line.
|
107
|
+
vs = line.split(sep, -1)
|
106
108
|
key, *p = vs
|
109
|
+
p = [p] if parser_types[i] == :flat
|
107
110
|
keys[i]= key
|
108
111
|
parts[i]= p
|
109
112
|
end
|
@@ -112,7 +115,7 @@ module TSV
|
|
112
115
|
done_streams =[]
|
113
116
|
|
114
117
|
fields = nil if fields && fields.empty?
|
115
|
-
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
118
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one
|
116
119
|
dumper.init(preamble: preamble_txt || !!key_field)
|
117
120
|
|
118
121
|
t = Thread.new do
|
@@ -130,7 +133,7 @@ module TSV
|
|
130
133
|
keys.each_with_index do |key,i|
|
131
134
|
case key
|
132
135
|
when min
|
133
|
-
|
136
|
+
new_parts = parts[i]
|
134
137
|
|
135
138
|
begin
|
136
139
|
line = lines[i]= begin
|
@@ -145,18 +148,20 @@ module TSV
|
|
145
148
|
else
|
146
149
|
k, *p = line.chomp.split(sep, -1)
|
147
150
|
p = p.collect{|e| e.nil? ? "" : e }
|
151
|
+
p = [p] if parser_types[i] == :flat
|
148
152
|
|
149
153
|
if k == keys[i]
|
150
|
-
|
154
|
+
new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] }
|
151
155
|
raise TryAgain
|
152
156
|
end
|
153
157
|
keys[i]= k
|
154
158
|
parts[i]= p
|
155
159
|
end
|
160
|
+
|
161
|
+
new_values << new_parts
|
156
162
|
rescue TryAgain
|
157
163
|
keys[i]= nil
|
158
164
|
parts[i]= nil
|
159
|
-
Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
|
160
165
|
retry
|
161
166
|
end
|
162
167
|
else
|
@@ -180,6 +185,8 @@ module TSV
|
|
180
185
|
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
181
186
|
end
|
182
187
|
|
188
|
+
new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one
|
189
|
+
|
183
190
|
dumper.add min, new_values
|
184
191
|
end
|
185
192
|
|
@@ -187,7 +194,7 @@ module TSV
|
|
187
194
|
|
188
195
|
streams.each do |stream|
|
189
196
|
stream.close if stream.respond_to?(:close) && ! stream.closed?
|
190
|
-
stream.join if stream.respond_to?
|
197
|
+
stream.join if stream.respond_to?(:join)
|
191
198
|
end
|
192
199
|
end
|
193
200
|
rescue Aborted
|
data/lib/scout/tsv/traverse.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'parser'
|
2
2
|
module TSV
|
3
|
-
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed:
|
3
|
+
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
|
4
4
|
key_field = key_field_pos if key_field.nil?
|
5
5
|
fields = fields_pos.dup if fields.nil?
|
6
6
|
type = @type if type.nil?
|
@@ -9,6 +9,8 @@ module TSV
|
|
9
9
|
fields = [fields] unless fields.nil? || Array === fields
|
10
10
|
positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
|
11
11
|
positions = nil if fields == self.fields
|
12
|
+
unnamed = @unnamed if unnamed.nil?
|
13
|
+
unnamed = false if unnamed.nil?
|
12
14
|
|
13
15
|
if key_pos == :key
|
14
16
|
key_name = @key_field
|
@@ -61,7 +63,6 @@ module TSV
|
|
61
63
|
key = @type == :flat ? values : values[key_pos] if key_pos != :key
|
62
64
|
|
63
65
|
values = values.values_at(*positions)
|
64
|
-
NamedArray.setup(values, fields)
|
65
66
|
if key_index
|
66
67
|
if @type == :double
|
67
68
|
values.insert key_index, [orig_key]
|
@@ -71,6 +72,15 @@ module TSV
|
|
71
72
|
end
|
72
73
|
end
|
73
74
|
|
75
|
+
if ! unnamed && fields
|
76
|
+
case @type
|
77
|
+
when :flat, :single
|
78
|
+
values = Entity.prepare_entity(values, fields.first)
|
79
|
+
else
|
80
|
+
values = NamedArray.setup(values, fields, entity_options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
values = TSV.cast_value(values, cast) if cast
|
75
85
|
|
76
86
|
if Array === key
|