scout-gear 10.7.1 → 10.7.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +6 -0
- data/VERSION +1 -1
- data/lib/scout/association/index.rb +1 -1
- data/lib/scout/association.rb +21 -5
- data/lib/scout/entity/format.rb +9 -4
- data/lib/scout/entity/identifiers.rb +2 -2
- data/lib/scout/entity/named_array.rb +13 -0
- data/lib/scout/entity/property.rb +2 -1
- data/lib/scout/entity.rb +7 -4
- data/lib/scout/persist/tsv/adapter/base.rb +13 -1
- data/lib/scout/persist/tsv.rb +2 -1
- data/lib/scout/tsv/attach.rb +10 -2
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/dumper.rb +34 -30
- data/lib/scout/tsv/open.rb +1 -0
- data/lib/scout/tsv/parser.rb +21 -10
- data/lib/scout/tsv/path.rb +8 -0
- data/lib/scout/tsv/stream.rb +15 -8
- data/lib/scout/tsv/traverse.rb +12 -2
- data/lib/scout/tsv/util/process.rb +4 -1
- data/lib/scout/tsv/util/select.rb +8 -2
- data/lib/scout/tsv/util/sort.rb +23 -15
- data/lib/scout/tsv/util.rb +11 -2
- data/lib/scout/tsv.rb +23 -11
- data/lib/scout/workflow/definition.rb +3 -3
- data/lib/scout/workflow/deployment/orchestrator.rb +8 -5
- data/lib/scout/workflow/step/dependencies.rb +35 -11
- data/lib/scout/workflow/step/file.rb +2 -1
- data/lib/scout/workflow/step/info.rb +14 -2
- data/lib/scout/workflow/step/load.rb +5 -3
- data/lib/scout/workflow/step/progress.rb +6 -0
- data/lib/scout/workflow/step/provenance.rb +1 -1
- data/lib/scout/workflow/step/status.rb +10 -4
- data/lib/scout/workflow/step.rb +32 -12
- data/lib/scout/workflow/task/dependencies.rb +33 -24
- data/lib/scout/workflow/task/inputs.rb +40 -12
- data/lib/scout/workflow/task.rb +22 -10
- data/lib/scout/workflow/usage.rb +2 -2
- data/lib/scout/workflow.rb +1 -1
- data/scout-gear.gemspec +10 -4
- data/scout_commands/db/query +83 -0
- data/scout_commands/db/register +43 -0
- data/scout_commands/db/show +47 -0
- data/test/scout/entity/test_named_array.rb +21 -0
- data/test/scout/persist/test_tsv.rb +20 -0
- data/test/scout/persist/tsv/adapter/test_base.rb +20 -0
- data/test/scout/test_tsv.rb +40 -0
- data/test/scout/tsv/test_dumper.rb +24 -0
- data/test/scout/tsv/test_path.rb +24 -0
- data/test/scout/tsv/test_stream.rb +93 -0
- data/test/scout/tsv/test_traverse.rb +99 -0
- data/test/scout/tsv/test_util.rb +2 -0
- data/test/scout/tsv/util/test_select.rb +22 -0
- data/test/scout/tsv/util/test_sort.rb +24 -0
- data/test/scout/workflow/step/test_dependencies.rb +26 -0
- data/test/scout/workflow/step/test_info.rb +35 -0
- data/test/scout/workflow/task/test_dependencies.rb +67 -1
- data/test/scout/workflow/task/test_inputs.rb +24 -7
- data/test/scout/workflow/test_task.rb +36 -0
- data/test/scout/workflow/test_usage.rb +0 -1
- metadata +9 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1182999631b54e94f842c90017e1f97dde904ddafe47129cd80b51097714a1c4
|
4
|
+
data.tar.gz: ec736a5762fc2e58450a7955dffffe61eb73584f0bdc53bf3cd368e194ae7cbf
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 66a5c6ab90c2aa047cfbd000731eaec3913994880adddc9ac2daac9802ee2ebef14a054c1e292200624a6411ac2244d5f915de8d3ed9113d1237edc3795ab027
|
7
|
+
data.tar.gz: 27a0e87ac1360420bffc87b34b01eb6f21631fdf3f6ef95983fb46152f6e0e390200518dd2bebcc95c880e74d6f6f8a98a76577be2fbad0aea7d1fbe9c0720b1
|
data/.vimproject
CHANGED
@@ -107,6 +107,7 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
107
107
|
object.rb
|
108
108
|
format.rb
|
109
109
|
identifiers.rb
|
110
|
+
named_array.rb
|
110
111
|
}
|
111
112
|
association.rb
|
112
113
|
association=association{
|
@@ -137,6 +138,11 @@ scout-gear=/$PWD filter="*.rb *.yaml" {
|
|
137
138
|
update
|
138
139
|
template
|
139
140
|
offsite
|
141
|
+
db=db{
|
142
|
+
register
|
143
|
+
query
|
144
|
+
show
|
145
|
+
}
|
140
146
|
workflow=workflow{
|
141
147
|
task
|
142
148
|
list
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
10.7.
|
1
|
+
10.7.2
|
data/lib/scout/association.rb
CHANGED
@@ -6,17 +6,32 @@ require_relative 'association/item'
|
|
6
6
|
|
7
7
|
module Association
|
8
8
|
def self.open(obj, source: nil, target: nil, fields: nil, source_format: nil, target_format: nil, format: nil, **kwargs)
|
9
|
+
|
10
|
+
|
11
|
+
if Path.is_filename?(obj)
|
12
|
+
options = TSV.parse_options(obj).merge(kwargs)
|
13
|
+
else
|
14
|
+
options = kwargs.dup
|
15
|
+
end
|
16
|
+
|
17
|
+
if String === obj && options[:namespace] && obj.include?("NAMESPACE")
|
18
|
+
new_obj = obj.gsub(/\[?NAMESPACE\]?/, options[:namespace])
|
19
|
+
obj.annotate(new_obj)
|
20
|
+
obj = new_obj
|
21
|
+
end
|
22
|
+
|
9
23
|
all_fields = TSV.all_fields(obj)
|
10
|
-
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields,
|
24
|
+
source_pos, field_pos, source_header, field_headers, source_format, target_format = headers(all_fields, fields, options.merge(source: source, target: target, source_format: source_format, target_format: target_format, format: format))
|
11
25
|
|
12
26
|
original_source_header = all_fields[source_pos]
|
13
27
|
original_field_headers = all_fields.values_at(*field_pos)
|
14
28
|
original_target_header = all_fields[field_pos.first]
|
15
29
|
|
16
|
-
type, identifiers = IndiferentHash.process_options
|
30
|
+
type, identifiers = IndiferentHash.process_options options, :type, :identifiers
|
17
31
|
|
18
32
|
if source_format
|
19
33
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(source_format), identifiers].flatten.compact
|
34
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
20
35
|
source_index = begin
|
21
36
|
TSV.translation_index(translation_files, source_header, source_format)
|
22
37
|
rescue
|
@@ -26,6 +41,7 @@ module Association
|
|
26
41
|
|
27
42
|
if target_format
|
28
43
|
translation_files = [TSV.identifier_files(obj), Entity.identifier_files(target_format), identifiers].flatten.compact
|
44
|
+
translation_files.collect!{|f| Path.is_filename?(f, false) ? Path.setup(f.gsub(/\[?NAMESPACE\]?/, options[:namespace])) : f }
|
29
45
|
target_index = begin
|
30
46
|
TSV.translation_index(translation_files, field_headers.first, target_format)
|
31
47
|
rescue
|
@@ -67,10 +83,10 @@ module Association
|
|
67
83
|
if source_index.nil? && target_index.nil?
|
68
84
|
if TSV === obj
|
69
85
|
IndiferentHash.pull_keys kwargs, :persist
|
70
|
-
type =
|
71
|
-
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **
|
86
|
+
type = options[:type] || obj.type
|
87
|
+
res = obj.reorder original_source_header, all_fields.values_at(*field_pos), **options.merge(type: type, merge: true)
|
72
88
|
else
|
73
|
-
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **
|
89
|
+
res = TSV.open(obj, key_field: original_source_header, fields: all_fields.values_at(*field_pos), **options.merge(type: type))
|
74
90
|
end
|
75
91
|
res.key_field = final_key_field
|
76
92
|
res.fields = final_fields
|
data/lib/scout/entity/format.rb
CHANGED
@@ -17,16 +17,20 @@ module Entity
|
|
17
17
|
def find(value)
|
18
18
|
@find_cache ||= {}
|
19
19
|
|
20
|
-
@find_cache
|
20
|
+
if @find_cache.include?(value)
|
21
|
+
@find_cache[value]
|
22
|
+
else
|
23
|
+
@find_cache[value] = begin
|
21
24
|
if orig_include? value
|
22
|
-
|
25
|
+
value
|
23
26
|
else
|
27
|
+
value = value.to_s
|
24
28
|
found = nil
|
25
29
|
each do |k,v|
|
26
|
-
if value
|
30
|
+
if value == k.to_s
|
27
31
|
found = k
|
28
32
|
break
|
29
|
-
elsif value
|
33
|
+
elsif value =~ /\(#{Regexp.quote k.to_s}\)/
|
30
34
|
found = k
|
31
35
|
break
|
32
36
|
end
|
@@ -34,6 +38,7 @@ module Entity
|
|
34
38
|
found
|
35
39
|
end
|
36
40
|
end
|
41
|
+
end
|
37
42
|
end
|
38
43
|
|
39
44
|
def [](value)
|
@@ -91,6 +91,8 @@ module Entity
|
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
|
+
name = default if name.nil?
|
95
|
+
|
94
96
|
self.send(:include, Entity::Identified) unless Entity::Identified === self
|
95
97
|
|
96
98
|
self.format = all_fields
|
@@ -106,6 +108,4 @@ module Entity
|
|
106
108
|
@identifier_files << file
|
107
109
|
@identifier_files.uniq!
|
108
110
|
end
|
109
|
-
|
110
|
-
|
111
111
|
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'scout/named_array'
|
2
|
+
require 'scout/entity'
|
3
|
+
|
4
|
+
module NamedArray
|
5
|
+
|
6
|
+
def [](key)
|
7
|
+
pos = NamedArray.identify_name(@fields, key)
|
8
|
+
return nil if pos.nil?
|
9
|
+
v = super(pos)
|
10
|
+
field = @fields && Integer === key && ! @fields.include?(key) ? @fields[key] : key
|
11
|
+
Entity.prepare_entity(v, field)
|
12
|
+
end
|
13
|
+
end
|
@@ -20,7 +20,8 @@ module Entity
|
|
20
20
|
Persist.annotation_repo_persist(repo, [name, obj.id] * ":", &block)
|
21
21
|
else
|
22
22
|
|
23
|
-
|
23
|
+
_id = obj.nil? ? 'nil' : obj.id
|
24
|
+
Persist.persist([name, _id] * ":", type, options.dup, &block)
|
24
25
|
end
|
25
26
|
end
|
26
27
|
|
data/lib/scout/entity.rb
CHANGED
@@ -3,6 +3,7 @@ require_relative 'entity/format'
|
|
3
3
|
require_relative 'entity/property'
|
4
4
|
require_relative 'entity/object'
|
5
5
|
require_relative 'entity/identifiers'
|
6
|
+
require_relative 'entity/named_array'
|
6
7
|
|
7
8
|
module Entity
|
8
9
|
def self.extended(base)
|
@@ -12,17 +13,19 @@ module Entity
|
|
12
13
|
base.instance_variable_set(:@persisted_methods, {})
|
13
14
|
base.include Entity::Object
|
14
15
|
base.include AnnotatedArray
|
16
|
+
base.format = base.to_s
|
15
17
|
base
|
16
18
|
end
|
17
19
|
|
18
20
|
def self.prepare_entity(entity, field, options = {})
|
19
21
|
return entity unless defined? Entity
|
20
|
-
return entity unless String === entity or Array === entity
|
21
|
-
options ||= {}
|
22
|
-
|
23
|
-
dup_array = options.delete :dup_array
|
22
|
+
return entity unless String === entity or Array === entity or Numeric === entity
|
24
23
|
|
25
24
|
if Entity === field or (Entity.respond_to?(:formats) and (_format = Entity.formats.find(field)))
|
25
|
+
options ||= {}
|
26
|
+
|
27
|
+
dup_array = options.delete :dup_array
|
28
|
+
|
26
29
|
params = options.dup
|
27
30
|
|
28
31
|
params[:format] ||= params.delete "format"
|
@@ -23,7 +23,9 @@ module TSVAdapter
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def save_annotation_hash
|
26
|
-
self.
|
26
|
+
self.with_write do
|
27
|
+
self.orig_set(ANNOTATION_ATTR_HASH_KEY, ANNOTATION_ATTR_HASH_SERIALIZER.dump(self.annotation_hash))
|
28
|
+
end
|
27
29
|
end
|
28
30
|
|
29
31
|
def self.extended(base)
|
@@ -163,6 +165,16 @@ module TSVAdapter
|
|
163
165
|
end
|
164
166
|
end
|
165
167
|
|
168
|
+
def with_write(*args, &block)
|
169
|
+
if @write
|
170
|
+
yield
|
171
|
+
elsif @closed
|
172
|
+
write_and_close &block
|
173
|
+
else
|
174
|
+
write_and_read &block
|
175
|
+
end
|
176
|
+
end
|
177
|
+
|
166
178
|
def close(*args)
|
167
179
|
begin
|
168
180
|
super(*args)
|
data/lib/scout/persist/tsv.rb
CHANGED
@@ -48,7 +48,8 @@ module Persist
|
|
48
48
|
|
49
49
|
def self.tsv(id, options = {}, engine: nil, persist_options: {})
|
50
50
|
engine ||= persist_options[:engine] || :HDB
|
51
|
-
|
51
|
+
persist_options[:other_options] = options unless persist_options.include?(:other_options)
|
52
|
+
Persist.persist(id, engine, persist_options) do |filename|
|
52
53
|
if filename.nil?
|
53
54
|
yield(persist_options[:data] || {})
|
54
55
|
else
|
data/lib/scout/tsv/attach.rb
CHANGED
@@ -92,6 +92,9 @@ module TSV
|
|
92
92
|
end
|
93
93
|
|
94
94
|
other_field_positions = other.identify_field(fields.dup)
|
95
|
+
fields.zip(other_field_positions) do |o,n|
|
96
|
+
raise "Field #{o} not found. Options: #{Log.fingerprint other.fields}" if n.nil?
|
97
|
+
end
|
95
98
|
|
96
99
|
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
97
100
|
Log.debug log_message
|
@@ -252,8 +255,13 @@ module TSV
|
|
252
255
|
def self.identifier_files(obj)
|
253
256
|
if TSV === obj
|
254
257
|
obj.identifier_files
|
255
|
-
elsif Path
|
256
|
-
obj.
|
258
|
+
elsif Path.is_filename?(obj)
|
259
|
+
path = Path === obj ? obj : Path.setup(obj)
|
260
|
+
if obj.dirname.identifiers.exists?
|
261
|
+
obj.dirname.identifiers
|
262
|
+
else
|
263
|
+
[TSV.parse_options(obj)[:identifiers]]
|
264
|
+
end
|
257
265
|
else
|
258
266
|
nil
|
259
267
|
end
|
data/lib/scout/tsv/change_id.rb
CHANGED
@@ -6,6 +6,9 @@ module TSV
|
|
6
6
|
identifiers = source.identifiers if identifiers.nil? and source.respond_to?(:identifiers)
|
7
7
|
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
8
8
|
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
9
|
+
if Array === identifiers
|
10
|
+
identifiers = identifiers.select{|f| f.identify_field(new_key_field) }.last
|
11
|
+
end
|
9
12
|
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
10
13
|
new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one, merge: merge)
|
11
14
|
return new
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -26,12 +26,13 @@ module TSV
|
|
26
26
|
end
|
27
27
|
|
28
28
|
|
29
|
-
attr_accessor :options, :initialized, :type, :sep, :filename, :namespace
|
29
|
+
attr_accessor :options, :initialized, :type, :sep, :compact, :filename, :namespace
|
30
30
|
def initialize(options = {})
|
31
31
|
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
32
32
|
@sep, @type = IndiferentHash.process_options options,
|
33
33
|
:sep, :type,
|
34
34
|
:sep => "\t", :type => :double
|
35
|
+
@compact = options[:compact]
|
35
36
|
@options = options
|
36
37
|
@options[:type] = @type
|
37
38
|
@sout, @sin = Open.pipe
|
@@ -39,8 +40,9 @@ module TSV
|
|
39
40
|
@initialized = false
|
40
41
|
@filename = options[:filename]
|
41
42
|
@mutex = Mutex.new
|
43
|
+
@namespace = options[:namespace]
|
42
44
|
ConcurrentStream.setup(@sin, pair: @sout)
|
43
|
-
ConcurrentStream.setup(@sout, pair: @sin)
|
45
|
+
ConcurrentStream.setup(@sout, pair: @sin, filename: filename)
|
44
46
|
end
|
45
47
|
|
46
48
|
def set_stream(stream)
|
@@ -96,11 +98,11 @@ module TSV
|
|
96
98
|
when :list, :flat
|
97
99
|
@sin << key + @sep + value * @sep << "\n"
|
98
100
|
when :double
|
99
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
101
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
100
102
|
else
|
101
103
|
if Array === value
|
102
104
|
if Array === value.first
|
103
|
-
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
105
|
+
@sin << key + @sep + value.collect{|v| Array === v ? (@compact ? v.compact : v) * "|" : v } * @sep << "\n"
|
104
106
|
else
|
105
107
|
@sin << key + @sep + value * @sep << "\n"
|
106
108
|
end
|
@@ -173,13 +175,35 @@ module TSV
|
|
173
175
|
end
|
174
176
|
end
|
175
177
|
|
176
|
-
|
177
|
-
|
178
|
+
self.with_unnamed do
|
179
|
+
if stream.nil?
|
180
|
+
t = Thread.new do
|
181
|
+
begin
|
182
|
+
Thread.current.report_on_exception = true
|
183
|
+
Thread.current["name"] = "Dumper thread"
|
184
|
+
dumper.init(preamble: preamble)
|
185
|
+
|
186
|
+
if keys
|
187
|
+
keys.each do |k|
|
188
|
+
dump_entry.call k, self[k]
|
189
|
+
end
|
190
|
+
else
|
191
|
+
self.each &dump_entry
|
192
|
+
end
|
193
|
+
|
194
|
+
dumper.close
|
195
|
+
rescue
|
196
|
+
dumper.abort($!)
|
197
|
+
end
|
198
|
+
end
|
199
|
+
Thread.pass until t["name"]
|
200
|
+
stream = dumper.stream
|
201
|
+
ConcurrentStream.setup(stream, :threads => [t])
|
202
|
+
stream
|
203
|
+
else
|
204
|
+
dumper.set_stream stream
|
178
205
|
begin
|
179
|
-
Thread.current.report_on_exception = true
|
180
|
-
Thread.current["name"] = "Dumper thread"
|
181
206
|
dumper.init(preamble: preamble)
|
182
|
-
|
183
207
|
if keys
|
184
208
|
keys.each do |k|
|
185
209
|
dump_entry.call k, self[k]
|
@@ -192,28 +216,8 @@ module TSV
|
|
192
216
|
rescue
|
193
217
|
dumper.abort($!)
|
194
218
|
end
|
219
|
+
stream
|
195
220
|
end
|
196
|
-
Thread.pass until t["name"]
|
197
|
-
stream = dumper.stream
|
198
|
-
ConcurrentStream.setup(stream, :threads => [t])
|
199
|
-
stream
|
200
|
-
else
|
201
|
-
dumper.set_stream stream
|
202
|
-
begin
|
203
|
-
dumper.init(preamble: preamble)
|
204
|
-
if keys
|
205
|
-
keys.each do |k|
|
206
|
-
dump_entry.call k, self[k]
|
207
|
-
end
|
208
|
-
else
|
209
|
-
self.each &dump_entry
|
210
|
-
end
|
211
|
-
|
212
|
-
dumper.close
|
213
|
-
rescue
|
214
|
-
dumper.abort($!)
|
215
|
-
end
|
216
|
-
stream
|
217
221
|
end
|
218
222
|
end
|
219
223
|
|
data/lib/scout/tsv/open.rb
CHANGED
data/lib/scout/tsv/parser.rb
CHANGED
@@ -7,7 +7,11 @@ module TSV
|
|
7
7
|
if Proc === cast
|
8
8
|
cast.call value
|
9
9
|
else
|
10
|
-
value.
|
10
|
+
if value.nil? || value.empty?
|
11
|
+
nil
|
12
|
+
else
|
13
|
+
value.send(cast)
|
14
|
+
end
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -94,6 +98,8 @@ module TSV
|
|
94
98
|
line.chomp!
|
95
99
|
if Proc === fix
|
96
100
|
line = fix.call line
|
101
|
+
break if (FalseClass === line) || :break == line
|
102
|
+
next if line.nil?
|
97
103
|
elsif fix
|
98
104
|
line = Misc.fixutf8(line)
|
99
105
|
end
|
@@ -175,14 +181,14 @@ module TSV
|
|
175
181
|
current = data[key]
|
176
182
|
if merge == :concat
|
177
183
|
these_items.each_with_index do |new,i|
|
178
|
-
new = [nil] if new.empty?
|
184
|
+
new = one2one ? [nil] : [] if new.empty?
|
179
185
|
current[i].concat(new)
|
180
186
|
end
|
181
187
|
else
|
182
188
|
merged = []
|
183
189
|
these_items.each_with_index do |new,i|
|
184
|
-
new = [nil] if new.empty?
|
185
|
-
merged[i] = current[i] + new
|
190
|
+
new = one2one ? [nil] : [] if new.empty?
|
191
|
+
merged[i] = (current[i] || []) + new
|
186
192
|
end
|
187
193
|
data[key] = merged
|
188
194
|
end
|
@@ -290,7 +296,8 @@ module TSV
|
|
290
296
|
opts[:cast] = opts[:cast].to_sym if opts[:cast]
|
291
297
|
|
292
298
|
all_fields = [key_field] + fields if key_field && fields
|
293
|
-
|
299
|
+
namespace = opts[:namespace]
|
300
|
+
NamedArray.setup([opts, key_field, fields, first_line, preamble, all_fields, namespace], %w(options key_field fields first_line preamble all_fields namespace))
|
294
301
|
rescue Exception
|
295
302
|
raise stream.stream_exception if stream.respond_to?(:stream_exception) && stream.stream_exception
|
296
303
|
stream.abort($!) if stream.respond_to?(:abort)
|
@@ -298,6 +305,10 @@ module TSV
|
|
298
305
|
end
|
299
306
|
end
|
300
307
|
|
308
|
+
def self.parse_options(...)
|
309
|
+
parse_header(...)[:options]
|
310
|
+
end
|
311
|
+
|
301
312
|
KEY_PARAMETERS = begin
|
302
313
|
params = []
|
303
314
|
(method(:parse_line).parameters + method(:parse_stream).parameters).each do |type, name|
|
@@ -408,7 +419,7 @@ module TSV
|
|
408
419
|
end
|
409
420
|
|
410
421
|
if data
|
411
|
-
TSV.setup(data, :key_field => key_field_name, :fields => field_names, :type => @type)
|
422
|
+
TSV.setup(data, @source_options.merge(:key_field => key_field_name, :fields => field_names, :type => @type))
|
412
423
|
else
|
413
424
|
[key_field || self.key_field, fields || self.fields]
|
414
425
|
end
|
@@ -435,7 +446,7 @@ module TSV
|
|
435
446
|
end
|
436
447
|
end
|
437
448
|
|
438
|
-
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed:
|
449
|
+
def self.parse(stream, fix: true, header_hash: "#", sep: "\t", filename: nil, namespace: nil, unnamed: nil, serializer: nil, **kwargs, &block)
|
439
450
|
parser = TSV::Parser === stream ? stream : TSV::Parser.new(stream, fix: fix, header_hash: header_hash, sep: sep)
|
440
451
|
|
441
452
|
cast = kwargs[:cast]
|
@@ -472,9 +483,9 @@ module TSV
|
|
472
483
|
data = parser.traverse **kwargs, &block
|
473
484
|
data.type = type
|
474
485
|
data.cast = cast
|
475
|
-
data.filename = filename || parser.options[:filename]
|
476
|
-
data.namespace = namespace || parser.options[:namespace]
|
477
|
-
data.identifiers = identifiers
|
486
|
+
data.filename = filename || parser.options[:filename] if data.filename.nil?
|
487
|
+
data.namespace = namespace || parser.options[:namespace] if data.namespace.nil?
|
488
|
+
data.identifiers = identifiers || parser.options[:identifiers] if data.identifiers.nil?
|
478
489
|
data.unnamed = unnamed
|
479
490
|
data.save_annotation_hash if data.respond_to?(:save_annotation_hash)
|
480
491
|
data
|
data/lib/scout/tsv/path.rb
CHANGED
data/lib/scout/tsv/stream.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
module TSV
|
2
|
-
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, field_prefix: nil)
|
2
|
+
def self.paste_streams(streams, type: nil, sort: nil, sort_memory: nil, sep: nil, preamble: nil, header: nil, same_fields: nil, fix_flat: nil, all_match: nil, one2one: true, field_prefix: nil)
|
3
|
+
sep = "\t" if sep.nil?
|
3
4
|
|
4
5
|
streams = streams.collect do |stream|
|
5
6
|
case stream
|
@@ -37,7 +38,8 @@ module TSV
|
|
37
38
|
|
38
39
|
streams = streams.collect do |stream|
|
39
40
|
|
40
|
-
parser = TSV::Parser.new stream,
|
41
|
+
parser = TSV::Parser.new stream, sep: sep
|
42
|
+
#parser.type = type
|
41
43
|
|
42
44
|
sfields = parser.fields
|
43
45
|
|
@@ -102,8 +104,9 @@ module TSV
|
|
102
104
|
keys[i]= nil
|
103
105
|
parts[i]= nil
|
104
106
|
else
|
105
|
-
vs = line.
|
107
|
+
vs = line.split(sep, -1)
|
106
108
|
key, *p = vs
|
109
|
+
p = [p] if parser_types[i] == :flat
|
107
110
|
keys[i]= key
|
108
111
|
parts[i]= p
|
109
112
|
end
|
@@ -112,7 +115,7 @@ module TSV
|
|
112
115
|
done_streams =[]
|
113
116
|
|
114
117
|
fields = nil if fields && fields.empty?
|
115
|
-
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type
|
118
|
+
dumper = TSV::Dumper.new key_field: key_field, fields: fields, type: type, compact: !one2one
|
116
119
|
dumper.init(preamble: preamble_txt || !!key_field)
|
117
120
|
|
118
121
|
t = Thread.new do
|
@@ -130,7 +133,7 @@ module TSV
|
|
130
133
|
keys.each_with_index do |key,i|
|
131
134
|
case key
|
132
135
|
when min
|
133
|
-
|
136
|
+
new_parts = parts[i]
|
134
137
|
|
135
138
|
begin
|
136
139
|
line = lines[i]= begin
|
@@ -145,18 +148,20 @@ module TSV
|
|
145
148
|
else
|
146
149
|
k, *p = line.chomp.split(sep, -1)
|
147
150
|
p = p.collect{|e| e.nil? ? "" : e }
|
151
|
+
p = [p] if parser_types[i] == :flat
|
148
152
|
|
149
153
|
if k == keys[i]
|
150
|
-
|
154
|
+
new_parts = NamedArray.zip_fields([new_parts]).zip(p).collect{|p| [p.flatten * "|"] }
|
151
155
|
raise TryAgain
|
152
156
|
end
|
153
157
|
keys[i]= k
|
154
158
|
parts[i]= p
|
155
159
|
end
|
160
|
+
|
161
|
+
new_values << new_parts
|
156
162
|
rescue TryAgain
|
157
163
|
keys[i]= nil
|
158
164
|
parts[i]= nil
|
159
|
-
Log.debug "Skipping repeated key in stream #{i}: #{key} - #{min}"
|
160
165
|
retry
|
161
166
|
end
|
162
167
|
else
|
@@ -180,6 +185,8 @@ module TSV
|
|
180
185
|
new_values = new_values.inject([]){|acc,l| acc.concat l }
|
181
186
|
end
|
182
187
|
|
188
|
+
new_values = new_values.collect{|l| Array === l ? l.compact : l } unless one2one
|
189
|
+
|
183
190
|
dumper.add min, new_values
|
184
191
|
end
|
185
192
|
|
@@ -187,7 +194,7 @@ module TSV
|
|
187
194
|
|
188
195
|
streams.each do |stream|
|
189
196
|
stream.close if stream.respond_to?(:close) && ! stream.closed?
|
190
|
-
stream.join if stream.respond_to?
|
197
|
+
stream.join if stream.respond_to?(:join)
|
191
198
|
end
|
192
199
|
end
|
193
200
|
rescue Aborted
|
data/lib/scout/tsv/traverse.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require_relative 'parser'
|
2
2
|
module TSV
|
3
|
-
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed:
|
3
|
+
def traverse(key_field_pos = :key, fields_pos = nil, type: nil, one2one: false, unnamed: nil, key_field: nil, fields: nil, bar: false, cast: nil, select: nil, uniq: false, &block)
|
4
4
|
key_field = key_field_pos if key_field.nil?
|
5
5
|
fields = fields_pos.dup if fields.nil?
|
6
6
|
type = @type if type.nil?
|
@@ -9,6 +9,8 @@ module TSV
|
|
9
9
|
fields = [fields] unless fields.nil? || Array === fields
|
10
10
|
positions = (fields.nil? || fields == :all) ? nil : self.identify_field(fields)
|
11
11
|
positions = nil if fields == self.fields
|
12
|
+
unnamed = @unnamed if unnamed.nil?
|
13
|
+
unnamed = false if unnamed.nil?
|
12
14
|
|
13
15
|
if key_pos == :key
|
14
16
|
key_name = @key_field
|
@@ -61,7 +63,6 @@ module TSV
|
|
61
63
|
key = @type == :flat ? values : values[key_pos] if key_pos != :key
|
62
64
|
|
63
65
|
values = values.values_at(*positions)
|
64
|
-
NamedArray.setup(values, fields)
|
65
66
|
if key_index
|
66
67
|
if @type == :double
|
67
68
|
values.insert key_index, [orig_key]
|
@@ -71,6 +72,15 @@ module TSV
|
|
71
72
|
end
|
72
73
|
end
|
73
74
|
|
75
|
+
if ! unnamed && fields
|
76
|
+
case @type
|
77
|
+
when :flat, :single
|
78
|
+
values = Entity.prepare_entity(values, fields.first)
|
79
|
+
else
|
80
|
+
values = NamedArray.setup(values, fields, entity_options)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
74
84
|
values = TSV.cast_value(values, cast) if cast
|
75
85
|
|
76
86
|
if Array === key
|
@@ -44,11 +44,14 @@ module TSV
|
|
44
44
|
end
|
45
45
|
|
46
46
|
def add_field(name = nil)
|
47
|
-
|
47
|
+
keys.each do |key|
|
48
|
+
values = self[key]
|
48
49
|
new_values = yield(key, values)
|
49
50
|
new_values = [new_values].compact if type == :double and not Array === new_values
|
50
51
|
|
51
52
|
case
|
53
|
+
when type == :single
|
54
|
+
values = new_values
|
52
55
|
when (values.nil? and (fields.nil? or fields.empty?))
|
53
56
|
values = [new_values]
|
54
57
|
when values.nil?
|
@@ -156,8 +156,14 @@ module TSV
|
|
156
156
|
case
|
157
157
|
when ((Array === method) and (key == :key or key_field == key))
|
158
158
|
with_unnamed do
|
159
|
-
|
160
|
-
|
159
|
+
if invert
|
160
|
+
keys.each do |key|
|
161
|
+
new[key] = self[key] unless method.include?(key)
|
162
|
+
end
|
163
|
+
else
|
164
|
+
method.each do |key|
|
165
|
+
new[key] = self[key] if self.include?(key)
|
166
|
+
end
|
161
167
|
end
|
162
168
|
end
|
163
169
|
when Array === method
|