scout-gear 10.4.0 → 10.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.vimproject +100 -656
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/bin/scout +1 -3
- data/lib/scout/association/fields.rb +170 -0
- data/lib/scout/association/index.rb +229 -0
- data/lib/scout/association/item.rb +227 -0
- data/lib/scout/association/util.rb +7 -0
- data/lib/scout/association.rb +100 -0
- data/lib/scout/entity/format.rb +62 -0
- data/lib/scout/entity/identifiers.rb +111 -0
- data/lib/scout/entity/object.rb +20 -0
- data/lib/scout/entity/property.rb +165 -0
- data/lib/scout/entity.rb +40 -0
- data/lib/scout/offsite/step.rb +2 -2
- data/lib/scout/{tsv/persist → persist/engine}/fix_width_table.rb +25 -33
- data/lib/scout/persist/engine/packed_index.rb +100 -0
- data/lib/scout/persist/engine/sharder.rb +219 -0
- data/lib/scout/{tsv/persist → persist/engine}/tkrzw.rb +0 -17
- data/lib/scout/{tsv/persist → persist/engine}/tokyocabinet.rb +55 -31
- data/lib/scout/persist/engine.rb +4 -0
- data/lib/scout/{tsv/persist/adapter.rb → persist/tsv/adapter/base.rb} +80 -51
- data/lib/scout/persist/tsv/adapter/fix_width_table.rb +106 -0
- data/lib/scout/persist/tsv/adapter/packed_index.rb +95 -0
- data/lib/scout/persist/tsv/adapter/sharder.rb +54 -0
- data/lib/scout/persist/tsv/adapter/tkrzw.rb +18 -0
- data/lib/scout/persist/tsv/adapter/tokyocabinet.rb +65 -0
- data/lib/scout/persist/tsv/adapter.rb +6 -0
- data/lib/scout/{tsv/persist → persist/tsv}/serialize.rb +5 -0
- data/lib/scout/persist/tsv.rb +107 -0
- data/lib/scout/tsv/annotation/repo.rb +83 -0
- data/lib/scout/tsv/annotation.rb +169 -0
- data/lib/scout/tsv/attach.rb +95 -19
- data/lib/scout/tsv/change_id/translate.rb +148 -0
- data/lib/scout/tsv/change_id.rb +3 -0
- data/lib/scout/tsv/csv.rb +85 -0
- data/lib/scout/tsv/dumper.rb +113 -25
- data/lib/scout/tsv/entity.rb +5 -0
- data/lib/scout/tsv/index.rb +88 -36
- data/lib/scout/tsv/open.rb +21 -8
- data/lib/scout/tsv/parser.rb +153 -90
- data/lib/scout/tsv/path.rb +7 -2
- data/lib/scout/tsv/stream.rb +48 -6
- data/lib/scout/tsv/transformer.rb +4 -3
- data/lib/scout/tsv/traverse.rb +26 -18
- data/lib/scout/tsv/util/process.rb +7 -0
- data/lib/scout/tsv/util/reorder.rb +25 -15
- data/lib/scout/tsv/util/select.rb +9 -1
- data/lib/scout/tsv/util/sort.rb +90 -2
- data/lib/scout/tsv/util/unzip.rb +56 -0
- data/lib/scout/tsv/util.rb +52 -5
- data/lib/scout/tsv.rb +45 -27
- data/lib/scout/work_queue/socket.rb +8 -0
- data/lib/scout/work_queue/worker.rb +22 -5
- data/lib/scout/work_queue.rb +38 -24
- data/lib/scout/workflow/definition.rb +11 -10
- data/lib/scout/workflow/deployment/orchestrator.rb +20 -3
- data/lib/scout/workflow/deployment/trace.rb +205 -0
- data/lib/scout/workflow/deployment.rb +1 -0
- data/lib/scout/workflow/documentation.rb +1 -1
- data/lib/scout/workflow/step/archive.rb +42 -0
- data/lib/scout/workflow/step/children.rb +51 -0
- data/lib/scout/workflow/step/config.rb +1 -1
- data/lib/scout/workflow/step/dependencies.rb +24 -7
- data/lib/scout/workflow/step/file.rb +19 -0
- data/lib/scout/workflow/step/info.rb +37 -9
- data/lib/scout/workflow/step/progress.rb +11 -2
- data/lib/scout/workflow/step/status.rb +8 -1
- data/lib/scout/workflow/step.rb +80 -25
- data/lib/scout/workflow/task/dependencies.rb +4 -1
- data/lib/scout/workflow/task/inputs.rb +91 -41
- data/lib/scout/workflow/task.rb +54 -57
- data/lib/scout/workflow/usage.rb +1 -1
- data/lib/scout/workflow/util.rb +4 -0
- data/lib/scout/workflow.rb +110 -13
- data/lib/scout-gear.rb +2 -0
- data/lib/scout.rb +0 -1
- data/scout-gear.gemspec +80 -23
- data/scout_commands/rbbt +2 -0
- data/test/data/person/brothers +4 -0
- data/test/data/person/identifiers +10 -0
- data/test/data/person/marriages +3 -0
- data/test/data/person/parents +6 -0
- data/test/scout/association/test_fields.rb +105 -0
- data/test/scout/association/test_index.rb +70 -0
- data/test/scout/association/test_item.rb +21 -0
- data/test/scout/entity/test_format.rb +19 -0
- data/test/scout/entity/test_identifiers.rb +58 -0
- data/test/scout/entity/test_object.rb +0 -0
- data/test/scout/entity/test_property.rb +345 -0
- data/test/scout/{tsv/persist → persist/engine}/test_fix_width_table.rb +0 -1
- data/test/scout/persist/engine/test_packed_index.rb +99 -0
- data/test/scout/persist/engine/test_sharder.rb +31 -0
- data/test/scout/persist/engine/test_tkrzw.rb +0 -0
- data/test/scout/persist/engine/test_tokyocabinet.rb +17 -0
- data/test/scout/persist/test_tsv.rb +146 -0
- data/test/scout/{tsv/persist/test_adapter.rb → persist/tsv/adapter/test_base.rb} +3 -4
- data/test/scout/persist/tsv/adapter/test_fix_width_table.rb +46 -0
- data/test/scout/persist/tsv/adapter/test_packed_index.rb +37 -0
- data/test/scout/persist/tsv/adapter/test_serialize.rb +0 -0
- data/test/scout/persist/tsv/adapter/test_sharder.rb +290 -0
- data/test/scout/{tsv/persist → persist/tsv/adapter}/test_tkrzw.rb +3 -6
- data/test/scout/persist/tsv/adapter/test_tokyocabinet.rb +282 -0
- data/test/scout/persist/tsv/test_serialize.rb +12 -0
- data/test/scout/test_association.rb +51 -0
- data/test/scout/test_entity.rb +40 -0
- data/test/scout/test_tsv.rb +33 -4
- data/test/scout/test_work_queue.rb +3 -2
- data/test/scout/test_workflow.rb +16 -15
- data/test/scout/tsv/annotation/test_repo.rb +150 -0
- data/test/scout/tsv/change_id/test_translate.rb +178 -0
- data/test/scout/tsv/test_annotation.rb +52 -0
- data/test/scout/tsv/test_attach.rb +226 -1
- data/test/scout/tsv/test_change_id.rb +25 -0
- data/test/scout/tsv/test_csv.rb +50 -0
- data/test/scout/tsv/test_dumper.rb +38 -0
- data/test/scout/tsv/test_entity.rb +0 -0
- data/test/scout/tsv/test_index.rb +82 -0
- data/test/scout/tsv/test_open.rb +44 -0
- data/test/scout/tsv/test_parser.rb +70 -0
- data/test/scout/tsv/test_stream.rb +22 -0
- data/test/scout/tsv/test_transformer.rb +27 -3
- data/test/scout/tsv/test_traverse.rb +78 -0
- data/test/scout/tsv/util/test_process.rb +16 -0
- data/test/scout/tsv/util/test_reorder.rb +67 -0
- data/test/scout/tsv/util/test_sort.rb +28 -1
- data/test/scout/tsv/util/test_unzip.rb +32 -0
- data/test/scout/work_queue/test_socket.rb +4 -1
- data/test/scout/workflow/deployment/test_orchestrator.rb +17 -26
- data/test/scout/workflow/deployment/test_trace.rb +25 -0
- data/test/scout/workflow/step/test_archive.rb +28 -0
- data/test/scout/workflow/step/test_children.rb +25 -0
- data/test/scout/workflow/step/test_info.rb +16 -0
- data/test/scout/workflow/task/test_dependencies.rb +16 -16
- data/test/scout/workflow/task/test_inputs.rb +45 -1
- data/test/scout/workflow/test_definition.rb +52 -0
- data/test/scout/workflow/test_step.rb +57 -0
- data/test/scout/workflow/test_task.rb +26 -1
- data/test/scout/workflow/test_usage.rb +4 -4
- data/test/test_helper.rb +23 -1
- metadata +71 -14
- data/lib/scout/tsv/persist.rb +0 -27
- data/test/scout/tsv/persist/test_tokyocabinet.rb +0 -120
- data/test/scout/tsv/test_persist.rb +0 -45
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -7,6 +7,8 @@ module TSV
|
|
7
7
|
|
8
8
|
if fields.nil?
|
9
9
|
fields_str = nil
|
10
|
+
elsif fields.empty?
|
11
|
+
fields_str = "#{header_hash}#{key_field || "Id"}"
|
10
12
|
else
|
11
13
|
fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
|
12
14
|
end
|
@@ -14,7 +16,7 @@ module TSV
|
|
14
16
|
if String === preamble
|
15
17
|
preamble_str = preamble
|
16
18
|
elsif preamble && options.values.compact.any?
|
17
|
-
preamble_str = "#: " << IndiferentHash.hash2string(options)
|
19
|
+
preamble_str = "#: " << IndiferentHash.hash2string(options.merge(serializer: nil))
|
18
20
|
else
|
19
21
|
preamble_str = nil
|
20
22
|
end
|
@@ -24,7 +26,7 @@ module TSV
|
|
24
26
|
end
|
25
27
|
|
26
28
|
|
27
|
-
attr_accessor :options, :initialized, :type, :sep
|
29
|
+
attr_accessor :options, :initialized, :type, :sep, :filename, :namespace
|
28
30
|
def initialize(options = {})
|
29
31
|
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
30
32
|
@sep, @type = IndiferentHash.process_options options,
|
@@ -35,11 +37,18 @@ module TSV
|
|
35
37
|
@sout, @sin = Open.pipe
|
36
38
|
Log.low{"Dumper pipe #{[Log.fingerprint(@sin), Log.fingerprint(@sout)] * " -> "}"}
|
37
39
|
@initialized = false
|
40
|
+
@filename = options[:filename]
|
38
41
|
@mutex = Mutex.new
|
39
42
|
ConcurrentStream.setup(@sin, pair: @sout)
|
40
43
|
ConcurrentStream.setup(@sout, pair: @sin)
|
41
44
|
end
|
42
45
|
|
46
|
+
def set_stream(stream)
|
47
|
+
@sin.close
|
48
|
+
@sout.close
|
49
|
+
@sout = @sin = stream
|
50
|
+
end
|
51
|
+
|
43
52
|
def key_field
|
44
53
|
@options[:key_field]
|
45
54
|
end
|
@@ -56,6 +65,10 @@ module TSV
|
|
56
65
|
@options[:fields] = fields
|
57
66
|
end
|
58
67
|
|
68
|
+
def namespace=(namespace)
|
69
|
+
@options[:namespace] = namespace
|
70
|
+
end
|
71
|
+
|
59
72
|
def all_fields
|
60
73
|
return nil if fields.nil?
|
61
74
|
[key_field] + fields
|
@@ -66,7 +79,7 @@ module TSV
|
|
66
79
|
header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
|
67
80
|
@mutex.synchronize do
|
68
81
|
@initialized = true
|
69
|
-
@sin
|
82
|
+
@sin << header << "\n" if header and ! header.empty?
|
70
83
|
end
|
71
84
|
end
|
72
85
|
|
@@ -74,20 +87,36 @@ module TSV
|
|
74
87
|
@mutex.synchronize do
|
75
88
|
|
76
89
|
key = key.to_s unless String === key
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
90
|
+
if value.nil? || (Array === value && value.empty?)
|
91
|
+
@sin << key << "\n"
|
92
|
+
else
|
93
|
+
case @type
|
94
|
+
when :single
|
95
|
+
@sin << key + @sep + value.to_s << "\n"
|
96
|
+
when :list, :flat
|
97
|
+
@sin << key + @sep + value * @sep << "\n"
|
98
|
+
when :double
|
99
|
+
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
100
|
+
else
|
101
|
+
if Array === value
|
102
|
+
if Array === value.first
|
103
|
+
@sin << key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep << "\n"
|
104
|
+
else
|
105
|
+
@sin << key + @sep + value * @sep << "\n"
|
106
|
+
end
|
107
|
+
else
|
108
|
+
@sin << key + @sep + value.to_s << "\n"
|
109
|
+
end
|
110
|
+
end
|
84
111
|
end
|
85
112
|
end
|
86
113
|
end
|
87
114
|
|
88
115
|
def close
|
89
|
-
@sin
|
90
|
-
|
116
|
+
if @sin != @sout
|
117
|
+
@sin.close if @sin.respond_to?(:close) && ! @sin.closed?
|
118
|
+
@sin.join if @sin.respond_to?(:join) && ! @sin.joined?
|
119
|
+
end
|
91
120
|
end
|
92
121
|
|
93
122
|
def stream
|
@@ -95,7 +124,7 @@ module TSV
|
|
95
124
|
end
|
96
125
|
|
97
126
|
def abort(exception=nil)
|
98
|
-
@sin.abort(exception)
|
127
|
+
@sin.abort(exception) if @sin.respond_to?(:abort)
|
99
128
|
end
|
100
129
|
|
101
130
|
def tsv(*args)
|
@@ -116,28 +145,87 @@ module TSV
|
|
116
145
|
end
|
117
146
|
|
118
147
|
def dumper_stream(options = {})
|
119
|
-
preamble = IndiferentHash.process_options options,
|
120
|
-
|
121
|
-
|
148
|
+
preamble, unmerge, keys, stream = IndiferentHash.process_options options,
|
149
|
+
:preamble, :unmerge, :keys, :stream,
|
150
|
+
:preamble => true, :unmerge => false
|
151
|
+
unmerge = false unless @type === :double
|
152
|
+
dumper = TSV::Dumper.new self.annotation_hash.merge(options)
|
153
|
+
|
154
|
+
dump_entry = Proc.new do |k,value_list|
|
155
|
+
if unmerge
|
156
|
+
max = value_list.collect{|v| v.length}.max
|
157
|
+
|
158
|
+
if unmerge == :expand and max > 1
|
159
|
+
value_list = value_list.collect do |values|
|
160
|
+
if values.length == 1
|
161
|
+
[values.first] * max
|
162
|
+
else
|
163
|
+
values
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
NamedArray.zip_fields(value_list).each do |values|
|
169
|
+
dumper.add k, values
|
170
|
+
end
|
171
|
+
else
|
172
|
+
dumper.add k, value_list
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
if stream.nil?
|
177
|
+
t = Thread.new do
|
178
|
+
begin
|
179
|
+
Thread.current.report_on_exception = true
|
180
|
+
Thread.current["name"] = "Dumper thread"
|
181
|
+
dumper.init(preamble: preamble)
|
182
|
+
|
183
|
+
if keys
|
184
|
+
keys.each do |k|
|
185
|
+
dump_entry.call k, self[k]
|
186
|
+
end
|
187
|
+
else
|
188
|
+
self.each &dump_entry
|
189
|
+
end
|
190
|
+
|
191
|
+
dumper.close
|
192
|
+
rescue
|
193
|
+
dumper.abort($!)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
Thread.pass until t["name"]
|
197
|
+
stream = dumper.stream
|
198
|
+
ConcurrentStream.setup(stream, :threads => [t])
|
199
|
+
stream
|
200
|
+
else
|
201
|
+
dumper.set_stream stream
|
122
202
|
begin
|
123
|
-
Thread.current.report_on_exception = true
|
124
|
-
Thread.current["name"] = "Dumper thread"
|
125
203
|
dumper.init(preamble: preamble)
|
126
|
-
|
127
|
-
|
204
|
+
if keys
|
205
|
+
keys.each do |k|
|
206
|
+
dump_entry.call k, self[k]
|
207
|
+
end
|
208
|
+
else
|
209
|
+
self.each &dump_entry
|
128
210
|
end
|
211
|
+
|
129
212
|
dumper.close
|
130
213
|
rescue
|
131
214
|
dumper.abort($!)
|
132
215
|
end
|
216
|
+
stream
|
133
217
|
end
|
134
|
-
Thread.pass until t["name"]
|
135
|
-
s = dumper.stream
|
136
|
-
ConcurrentStream.setup(s, :threads => [t])
|
137
|
-
s
|
138
218
|
end
|
139
219
|
|
140
220
|
def to_s(options = {})
|
141
|
-
dumper_stream(options)
|
221
|
+
dumper_stream({stream: ''}.merge(options))
|
222
|
+
end
|
223
|
+
|
224
|
+
alias stream dumper_stream
|
225
|
+
|
226
|
+
def write_file(file)
|
227
|
+
Open.open(file, mode: 'w') do |f|
|
228
|
+
dumper_stream(stream: f)
|
229
|
+
end
|
142
230
|
end
|
143
231
|
end
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -1,25 +1,72 @@
|
|
1
1
|
require_relative 'parser'
|
2
2
|
require_relative 'transformer'
|
3
|
-
require_relative 'persist/
|
3
|
+
require_relative '../persist/tsv'
|
4
4
|
module TSV
|
5
|
+
|
6
|
+
def self.select_prefix_str(select)
|
7
|
+
str = begin
|
8
|
+
case select
|
9
|
+
when nil
|
10
|
+
nil
|
11
|
+
when Array
|
12
|
+
case select.first
|
13
|
+
when nil
|
14
|
+
nil
|
15
|
+
when Array
|
16
|
+
select.collect{|p| p * "="}*","
|
17
|
+
else
|
18
|
+
select.collect{|p| p.to_s }*"="
|
19
|
+
end
|
20
|
+
when Hash
|
21
|
+
if select.empty?
|
22
|
+
nil
|
23
|
+
else
|
24
|
+
select.collect do |key,value|
|
25
|
+
[key.to_s, value.to_s] * "="
|
26
|
+
end * ","
|
27
|
+
end
|
28
|
+
end
|
29
|
+
rescue
|
30
|
+
Log.warn "Error in select_prefix_str: #{Log.fingerprint(select)}: #{$!.message}"
|
31
|
+
str = nil
|
32
|
+
end
|
33
|
+
if str.nil?
|
34
|
+
""
|
35
|
+
else
|
36
|
+
"[select:#{str}]"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
5
40
|
def self.index(tsv_file, target: :key, fields: nil, order: true, bar: nil, **kwargs)
|
6
|
-
|
7
|
-
|
8
|
-
:persist => false, :persist_type => "HDB"
|
9
|
-
kwargs.delete :type
|
41
|
+
kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
|
42
|
+
engine = IndiferentHash.process_options kwargs, :engine
|
10
43
|
|
11
44
|
fields = :all if fields.nil?
|
12
45
|
|
13
|
-
|
46
|
+
prefix = case fields
|
47
|
+
when :all
|
48
|
+
"Index[#{target}]"
|
49
|
+
else
|
50
|
+
"Index[#{Log.fingerprint(fields)}->#{target}]"
|
51
|
+
end
|
52
|
+
|
53
|
+
prefix += select_prefix_str(kwargs[:select])
|
54
|
+
|
55
|
+
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
56
|
+
persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :engine => :HDB, :persist => false
|
57
|
+
|
58
|
+
data_options = IndiferentHash.pull_keys kwargs, :data
|
59
|
+
|
60
|
+
Persist.persist(tsv_file, persist_options[:engine], persist_options.merge(other_options: kwargs.merge(target: target, fields: fields, order: order, data_options: data_options))) do |filename|
|
14
61
|
if filename
|
15
|
-
index = ScoutCabinet.open(filename, true,
|
62
|
+
index = ScoutCabinet.open(filename, true, engine)
|
16
63
|
TSV.setup(index, :type => :single)
|
17
64
|
index.extend TSVAdapter
|
18
65
|
else
|
19
66
|
index = TSV.setup({}, :type => :single)
|
20
67
|
end
|
21
68
|
|
22
|
-
tsv_file = TSV.open(tsv_file,
|
69
|
+
tsv_file = TSV.open(tsv_file, **data_options) if ! TSV === tsv_file
|
23
70
|
|
24
71
|
log_msg = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}"
|
25
72
|
Log.low log_msg
|
@@ -28,7 +75,7 @@ module TSV
|
|
28
75
|
if order
|
29
76
|
tmp_index = {}
|
30
77
|
include_self = fields == :all || (Array === fields) && fields.include?(target)
|
31
|
-
target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields,
|
78
|
+
target_key_field, source_field_names = Open.traverse tsv_file, type: :double, key_field: target, fields: fields, bar: bar, **kwargs do |k,values|
|
32
79
|
tmp_index[k] ||= [[k]] if include_self
|
33
80
|
values.each_with_index do |list,i|
|
34
81
|
i += 1 if include_self
|
@@ -54,11 +101,10 @@ module TSV
|
|
54
101
|
index[e] = k unless index.include?(e)
|
55
102
|
end
|
56
103
|
end
|
57
|
-
|
58
|
-
index.key_field = source_field_names * ","
|
59
|
-
index.fields = [target_key_field]
|
60
104
|
end
|
61
105
|
|
106
|
+
index.key_field = source_field_names * ","
|
107
|
+
index.fields = [target_key_field]
|
62
108
|
|
63
109
|
index
|
64
110
|
end
|
@@ -66,20 +112,23 @@ module TSV
|
|
66
112
|
|
67
113
|
def index(*args, **kwargs, &block)
|
68
114
|
TSV.index(self, *args, **kwargs, &block)
|
69
|
-
|
115
|
+
end
|
70
116
|
|
71
117
|
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, bar: nil, **kwargs)
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
kwargs[:unnamed] = true
|
118
|
+
kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
|
119
|
+
type, data_persist = IndiferentHash.process_options kwargs, :type, :data_persist
|
120
|
+
|
121
|
+
prefix = "RangeIndex[#{start_field}-#{end_field}]"
|
77
122
|
|
78
|
-
|
79
|
-
:persist => persist, :prefix => "RangeIndex[#{[start_field, end_field]*"-"}]", update: persist_update,
|
80
|
-
:other_options => kwargs) do |filename|
|
123
|
+
prefix += select_prefix_str(kwargs[:select])
|
81
124
|
|
82
|
-
|
125
|
+
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
126
|
+
persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :type => :fwt, :persist => true
|
127
|
+
|
128
|
+
data_options = IndiferentHash.pull_keys kwargs, :data
|
129
|
+
|
130
|
+
Persist.persist(tsv_file, persist_options[:type], persist_options.merge(other_options: kwargs.merge(start_field: start_field, end_field: end_field, key_field: key_field))) do |filename|
|
131
|
+
tsv_file = TSV.open(tsv_file, *data_options) if data_options[:persist] && ! TSV === tsv_file
|
83
132
|
|
84
133
|
log_msg = "RangeIndex #{Log.fingerprint tsv_file} #{[start_field, end_field]*"-"}"
|
85
134
|
Log.low log_msg
|
@@ -87,7 +136,7 @@ module TSV
|
|
87
136
|
|
88
137
|
max_key_size = 0
|
89
138
|
index_data = []
|
90
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, **kwargs do |key, values|
|
139
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], bar: bar, unnamed: true, **kwargs do |key, values|
|
91
140
|
key_size = key.length
|
92
141
|
max_key_size = key_size if key_size > max_key_size
|
93
142
|
|
@@ -110,31 +159,34 @@ module TSV
|
|
110
159
|
end
|
111
160
|
|
112
161
|
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, bar: nil, **kwargs)
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
162
|
+
kwargs = IndiferentHash.add_defaults kwargs, unnamed: true
|
163
|
+
type, data_persist = IndiferentHash.process_options kwargs, :type
|
164
|
+
|
165
|
+
prefix = "PositionIndex[#{pos_field}]"
|
166
|
+
|
167
|
+
prefix += select_prefix_str(kwargs[:select])
|
168
|
+
|
169
|
+
persist_options = IndiferentHash.pull_keys kwargs, :persist
|
170
|
+
persist_options = IndiferentHash.add_defaults persist_options, :prefix => prefix, :type => :fwt, :persist => true
|
118
171
|
|
119
|
-
|
120
|
-
:persist => persist, :prefix => "RangeIndex[#{pos_field}]", update: persist_update,
|
121
|
-
:other_options => kwargs) do |filename|
|
172
|
+
data_options = IndiferentHash.pull_keys kwargs, :data
|
122
173
|
|
123
|
-
|
174
|
+
Persist.persist(tsv_file, persist_options[:type], persist_options.merge(other_options: kwargs.merge(pos_field: pos_field, key_field: key_field))) do |filename|
|
175
|
+
tsv_file = TSV.open(tsv_file, *data_options) if data_options[:persist] && ! TSV === tsv_file
|
124
176
|
|
125
|
-
log_msg = "
|
177
|
+
log_msg = "PositionIndex #{Log.fingerprint tsv_file} #{pos_field}"
|
126
178
|
Log.low log_msg
|
127
179
|
bar = log_msg if TrueClass === bar
|
128
180
|
|
129
181
|
max_key_size = 0
|
130
182
|
index_data = []
|
131
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :
|
183
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :flat, cast: :to_i, bar: bar, **kwargs do |key, pos|
|
132
184
|
key_size = key.length
|
133
185
|
max_key_size = key_size if key_size > max_key_size
|
134
186
|
|
135
187
|
if Array === pos
|
136
|
-
pos.
|
137
|
-
|
188
|
+
pos.each do |p|
|
189
|
+
index_data << [key, p]
|
138
190
|
end
|
139
191
|
else
|
140
192
|
index_data << [key, pos]
|
data/lib/scout/tsv/open.rb
CHANGED
@@ -42,6 +42,11 @@ module Open
|
|
42
42
|
ConcurrentStream.setup(sin, :pair => sout)
|
43
43
|
self.traverse(obj, into: sin, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
44
44
|
return sout
|
45
|
+
elsif Path === into
|
46
|
+
Open.write(into) do |io|
|
47
|
+
self.traverse(obj, into: io, cpus: cpus, bar: bar, callback: callback, unnamed: unnamed, **options, &block)
|
48
|
+
end
|
49
|
+
return into
|
45
50
|
end
|
46
51
|
|
47
52
|
if into || bar
|
@@ -64,8 +69,8 @@ module Open
|
|
64
69
|
into.close if ! keep_open && into.respond_to?(:close)
|
65
70
|
bar.remove if bar
|
66
71
|
rescue Exception
|
67
|
-
into.abort($!) if into.respond_to?(:abort)
|
68
72
|
bar.remove($!) if bar
|
73
|
+
into.abort($!) if into.respond_to?(:abort)
|
69
74
|
end
|
70
75
|
end
|
71
76
|
|
@@ -87,7 +92,7 @@ module Open
|
|
87
92
|
end
|
88
93
|
|
89
94
|
queue.process do |res|
|
90
|
-
callback.call res
|
95
|
+
callback.call res if callback
|
91
96
|
end
|
92
97
|
|
93
98
|
begin
|
@@ -134,7 +139,7 @@ module Open
|
|
134
139
|
obj = obj.produce_and_find if Path === obj
|
135
140
|
f = Open.open(obj)
|
136
141
|
self.traverse(f, cpus: cpus, callback: callback, **options, &block)
|
137
|
-
when Step
|
142
|
+
when (defined?(Step) && Step)
|
138
143
|
raise obj.exception if obj.error?
|
139
144
|
self.traverse(obj.stream, cpus: cpus, callback: callback, **options, &block)
|
140
145
|
when IO
|
@@ -147,7 +152,7 @@ module Open
|
|
147
152
|
end
|
148
153
|
else
|
149
154
|
Log.low "Traverse stream with parser #{Log.fingerprint obj}"
|
150
|
-
parser = TSV::Parser.new obj
|
155
|
+
parser = options[:sep].nil? ? TSV::Parser.new(obj) : TSV::Parser.new(obj, sep: options[:sep])
|
151
156
|
parser.traverse **options do |k,v,f|
|
152
157
|
res = block.call k,v,f
|
153
158
|
callback.call res if callback
|
@@ -161,10 +166,18 @@ module Open
|
|
161
166
|
nil
|
162
167
|
end
|
163
168
|
else
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
169
|
+
if obj.respond_to?(:pop)
|
170
|
+
while elem = obj.pop
|
171
|
+
res = block.call elem
|
172
|
+
callback.call res if callback
|
173
|
+
break unless obj.any?
|
174
|
+
end
|
175
|
+
else
|
176
|
+
TSV.parse obj, **options do |k,v|
|
177
|
+
res = block.call k, v
|
178
|
+
callback.call res if callback
|
179
|
+
nil
|
180
|
+
end
|
168
181
|
end
|
169
182
|
end
|
170
183
|
bar.remove if bar
|