scout-gear 7.3.0 → 8.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.vimproject +20 -9
- data/VERSION +1 -1
- data/bin/scout +6 -3
- data/lib/rbbt-scout.rb +1 -0
- data/lib/scout/cmd.rb +1 -1
- data/lib/scout/concurrent_stream.rb +26 -23
- data/lib/scout/config.rb +1 -1
- data/lib/scout/log/color.rb +4 -1
- data/lib/scout/log/progress/report.rb +1 -1
- data/lib/scout/log/progress/util.rb +58 -54
- data/lib/scout/log/progress.rb +1 -1
- data/lib/scout/log/trap.rb +107 -0
- data/lib/scout/log.rb +56 -21
- data/lib/scout/meta_extension.rb +13 -6
- data/lib/scout/misc/digest.rb +1 -1
- data/lib/scout/misc/format.rb +12 -0
- data/lib/scout/misc/insist.rb +1 -1
- data/lib/scout/misc/monitor.rb +11 -0
- data/lib/scout/misc/system.rb +10 -0
- data/lib/scout/named_array.rb +65 -3
- data/lib/scout/open/lock/lockfile.rb +587 -0
- data/lib/scout/open/lock.rb +28 -2
- data/lib/scout/open/remote.rb +4 -0
- data/lib/scout/open/stream.rb +90 -15
- data/lib/scout/open/util.rb +13 -3
- data/lib/scout/path/find.rb +9 -1
- data/lib/scout/path/util.rb +35 -0
- data/lib/scout/persist/serialize.rb +18 -5
- data/lib/scout/persist.rb +28 -12
- data/lib/scout/resource/path.rb +53 -0
- data/lib/scout/resource/produce.rb +0 -8
- data/lib/scout/resource/util.rb +2 -1
- data/lib/scout/tmpfile.rb +7 -8
- data/lib/scout/tsv/attach.rb +177 -0
- data/lib/scout/tsv/change_id.rb +40 -0
- data/lib/scout/tsv/dumper.rb +72 -46
- data/lib/scout/tsv/index.rb +69 -13
- data/lib/scout/tsv/open.rb +138 -84
- data/lib/scout/tsv/parser.rb +135 -80
- data/lib/scout/tsv/path.rb +1 -2
- data/lib/scout/tsv/persist/adapter.rb +15 -45
- data/lib/scout/tsv/persist/fix_width_table.rb +3 -0
- data/lib/scout/tsv/persist/tokyocabinet.rb +4 -1
- data/lib/scout/tsv/persist.rb +4 -0
- data/lib/scout/tsv/transformer.rb +141 -0
- data/lib/scout/tsv/traverse.rb +96 -92
- data/lib/scout/tsv/util/filter.rb +9 -0
- data/lib/scout/tsv/util/reorder.rb +81 -0
- data/lib/scout/tsv/util/select.rb +78 -33
- data/lib/scout/tsv/util/unzip.rb +86 -0
- data/lib/scout/tsv/util.rb +60 -11
- data/lib/scout/tsv.rb +26 -3
- data/lib/scout/work_queue/socket.rb +6 -1
- data/lib/scout/work_queue/worker.rb +5 -2
- data/lib/scout/work_queue.rb +15 -8
- data/lib/scout/workflow/definition.rb +21 -2
- data/lib/scout/workflow/step/dependencies.rb +24 -4
- data/lib/scout/workflow/step/info.rb +36 -5
- data/lib/scout/workflow/step/provenance.rb +8 -7
- data/lib/scout/workflow/step/status.rb +45 -0
- data/lib/scout/workflow/step.rb +100 -34
- data/lib/scout/workflow/task/inputs.rb +14 -20
- data/lib/scout/workflow/task.rb +81 -46
- data/lib/scout/workflow/usage.rb +8 -6
- data/scout-gear.gemspec +24 -20
- data/scout_commands/workflow/task +34 -7
- data/test/scout/open/test_stream.rb +60 -58
- data/test/scout/path/test_find.rb +10 -1
- data/test/scout/resource/test_produce.rb +15 -0
- data/test/scout/test_meta_extension.rb +25 -0
- data/test/scout/test_named_array.rb +18 -0
- data/test/scout/test_persist.rb +6 -0
- data/test/scout/test_tsv.rb +212 -2
- data/test/scout/test_work_queue.rb +21 -19
- data/test/scout/tsv/persist/test_adapter.rb +1 -1
- data/test/scout/tsv/persist/test_tokyocabinet.rb +29 -1
- data/test/scout/tsv/test_attach.rb +227 -0
- data/test/scout/tsv/test_change_id.rb +98 -0
- data/test/scout/tsv/test_dumper.rb +1 -1
- data/test/scout/tsv/test_index.rb +35 -3
- data/test/scout/tsv/test_open.rb +160 -2
- data/test/scout/tsv/test_parser.rb +19 -2
- data/test/scout/tsv/test_persist.rb +2 -0
- data/test/scout/tsv/test_transformer.rb +108 -0
- data/test/scout/tsv/test_traverse.rb +88 -3
- data/test/scout/tsv/test_util.rb +1 -0
- data/test/scout/tsv/util/test_reorder.rb +94 -0
- data/test/scout/tsv/util/test_select.rb +25 -11
- data/test/scout/tsv/util/test_unzip.rb +112 -0
- data/test/scout/work_queue/test_socket.rb +0 -1
- data/test/scout/workflow/step/test_status.rb +31 -0
- data/test/scout/workflow/task/test_inputs.rb +14 -14
- data/test/scout/workflow/test_step.rb +3 -3
- data/test/scout/workflow/test_task.rb +168 -32
- data/test/scout/workflow/test_usage.rb +33 -6
- metadata +20 -6
@@ -0,0 +1,177 @@
|
|
1
|
+
module TSV
|
2
|
+
|
3
|
+
def self.match_keys(source, other, match_key: nil, other_key: nil)
|
4
|
+
match_key = (source.all_fields & other.all_fields).first if match_key.nil?
|
5
|
+
|
6
|
+
if match_key.nil?
|
7
|
+
source.all_fields.collect do |f|
|
8
|
+
other_key = other.identify_field(f)
|
9
|
+
if other_key
|
10
|
+
other_key = other.key_field if other_key == :key
|
11
|
+
match_key = f
|
12
|
+
break
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
if match_key.nil?
|
18
|
+
other.all_fields.collect do |f|
|
19
|
+
match_key = source.identify_field(f)
|
20
|
+
if match_key
|
21
|
+
other_key = f
|
22
|
+
break
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
match_key = source.key_field if match_key.nil?
|
28
|
+
|
29
|
+
if other_key.nil?
|
30
|
+
other_key = other.identify_field(match_key)
|
31
|
+
end
|
32
|
+
|
33
|
+
other_key = other.key_field if other_key.nil?
|
34
|
+
|
35
|
+
match_key = :key if match_key == source.key_field
|
36
|
+
other_key = :key if other_key == other.key_field
|
37
|
+
|
38
|
+
[match_key, other_key]
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.attach(source, other, target: nil, fields: nil, match_key: nil, other_key: nil, one2one: true, complete: false, insitu: nil, persist_input: false, bar: nil)
|
42
|
+
source = TSV::Transformer.new source unless TSV === source || TSV::Parser === source
|
43
|
+
other = TSV.open other, persist: persist_input unless TSV === other
|
44
|
+
|
45
|
+
fields = [fields] if String === fields
|
46
|
+
|
47
|
+
match_key, other_key = TSV.match_keys(source, other, match_key: match_key, other_key: other_key)
|
48
|
+
|
49
|
+
if TSV::Transformer === source
|
50
|
+
source.dumper = case target
|
51
|
+
when :stream
|
52
|
+
TSV::Dumper.new(source.options.merge(sep: "\t"))
|
53
|
+
when nil
|
54
|
+
TSV.setup({}, **source.options.dup)
|
55
|
+
else
|
56
|
+
target
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
other.with_unnamed do
|
61
|
+
source.with_unnamed do
|
62
|
+
|
63
|
+
other_key_name = other_key == :key ? other.key_field : other_key
|
64
|
+
other_key_name = other.fields[other_key_name] if Integer === other_key
|
65
|
+
fields = other.all_fields - [other_key_name, source.key_field] if fields.nil?
|
66
|
+
|
67
|
+
if other_key != :key
|
68
|
+
other = other.reorder other_key, fields, one2one: one2one
|
69
|
+
end
|
70
|
+
|
71
|
+
other_field_positions = other.identify_field(fields)
|
72
|
+
|
73
|
+
log_message = "Attach #{Log.fingerprint fields - source.fields} to #{Log.fingerprint source} (#{[match_key, other_key] * "=~"})"
|
74
|
+
Log.debug log_message
|
75
|
+
bar = log_message if TrueClass === bar
|
76
|
+
|
77
|
+
new = fields - source.fields
|
78
|
+
|
79
|
+
source.fields = (source.fields + fields).uniq
|
80
|
+
|
81
|
+
overlaps = source.identify_field(fields)
|
82
|
+
|
83
|
+
empty_other_values = case source.type
|
84
|
+
when :list
|
85
|
+
[nil] * other.fields.length
|
86
|
+
when :flat
|
87
|
+
[]
|
88
|
+
when :double
|
89
|
+
[[]] * other.fields.length
|
90
|
+
end
|
91
|
+
|
92
|
+
insitu = TSV === source ? true : false if insitu.nil?
|
93
|
+
|
94
|
+
match_key_pos = source.identify_field(match_key)
|
95
|
+
source.traverse bar: bar, unnamed: true do |orig_key,current_values|
|
96
|
+
keys = (match_key == :key || match_key_pos == :key) ? [orig_key] : current_values[match_key_pos]
|
97
|
+
keys = [keys] unless Array === keys
|
98
|
+
|
99
|
+
current_values = current_values.dup unless insitu
|
100
|
+
keys.each do |current_key|
|
101
|
+
other_values = other[current_key]
|
102
|
+
|
103
|
+
if other_values.nil?
|
104
|
+
other_values = empty_other_values
|
105
|
+
elsif other.type == :flat
|
106
|
+
other_values = [other_values]
|
107
|
+
elsif other.type == :list && source.type == :double
|
108
|
+
other_values = other_values.collect{|v| [v] }
|
109
|
+
elsif other.type == :double && source.type == :list
|
110
|
+
other_values = other_values.collect{|v| v.first }
|
111
|
+
end
|
112
|
+
|
113
|
+
other_values = other_values.values_at *other_field_positions
|
114
|
+
|
115
|
+
other_values.zip(overlaps).each do |v,overlap|
|
116
|
+
if source.type == :list
|
117
|
+
current_values[overlap] = v if current_values[overlap].nil? || String === current_values[overlap] && current_values[overlap].empty?
|
118
|
+
else
|
119
|
+
current_values[overlap] ||= []
|
120
|
+
current_values[overlap].concat (v - current_values[overlap])
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
source[orig_key] = current_values unless insitu
|
125
|
+
nil
|
126
|
+
end
|
127
|
+
|
128
|
+
if complete && match_key == :key
|
129
|
+
empty_self_values = case source.type
|
130
|
+
when :list
|
131
|
+
[nil] * source.fields.length
|
132
|
+
when :flat
|
133
|
+
[]
|
134
|
+
when :double
|
135
|
+
[[]] * source.fields.length
|
136
|
+
end
|
137
|
+
other.each do |other_key,other_values|
|
138
|
+
next if source.include?(other_key)
|
139
|
+
if other.type == :flat
|
140
|
+
other_values = [other_values]
|
141
|
+
elsif other.type == :list && source.type == :double
|
142
|
+
other_values = other_values.collect{|v| [v] }
|
143
|
+
elsif other.type == :double && source.type == :list
|
144
|
+
other_values = other_values.collect{|v| v.first }
|
145
|
+
end
|
146
|
+
|
147
|
+
new_values = case source.type
|
148
|
+
when :list
|
149
|
+
[nil] * source.fields.length
|
150
|
+
when :flat
|
151
|
+
[]
|
152
|
+
when :double
|
153
|
+
source.fields.length.times.collect{ [] }
|
154
|
+
end
|
155
|
+
|
156
|
+
other_values.zip(overlaps).each do |v,overlap|
|
157
|
+
if false && overlap == :key
|
158
|
+
other_key = Array === v ? v : v.first
|
159
|
+
elsif source.type == :list
|
160
|
+
new_values[overlap] = v if v[overlap].nil? || String === v[overlap] && v[overlap].empty?
|
161
|
+
else
|
162
|
+
new_values[overlap].concat v
|
163
|
+
end
|
164
|
+
end
|
165
|
+
source[other_key] = new_values
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
|
171
|
+
source
|
172
|
+
end
|
173
|
+
|
174
|
+
def attach(*args, **kwargs)
|
175
|
+
TSV.attach(self, *args, **kwargs)
|
176
|
+
end
|
177
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
module TSV
|
2
|
+
def self.change_key(source, new_key_field, identifiers: nil, one2one: false, stream: false, keep: false, persist_identifiers: nil)
|
3
|
+
source = TSV::Parser.new source if String === source
|
4
|
+
if identifiers && source.identify_field(new_key_field, strict: true).nil?
|
5
|
+
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
6
|
+
new = source.attach(identifiers, fields: [new_key_field], insitu: false, one2one: true, persist_input: persist_identifiers)
|
7
|
+
new = new.change_key(new_key_field, keep: keep, stream: stream, one2one: one2one)
|
8
|
+
return new
|
9
|
+
end
|
10
|
+
|
11
|
+
fields = source.fields.dup - [new_key_field]
|
12
|
+
fields.unshift source.key_field if keep
|
13
|
+
transformer = TSV::Transformer.new source
|
14
|
+
transformer.key_field = new_key_field
|
15
|
+
transformer.fields = fields
|
16
|
+
transformer.traverse key_field: new_key_field, fields: fields, one2one: one2one, unnamed: true do |k,v|
|
17
|
+
[k, v]
|
18
|
+
end
|
19
|
+
|
20
|
+
stream ? transformer : transformer.tsv
|
21
|
+
end
|
22
|
+
|
23
|
+
def change_key(*args, **kwargs)
|
24
|
+
TSV.change_key(self, *args, **kwargs)
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.change_id(source, source_id, new_id, identifiers: nil, one2one: false, insitu: false)
|
28
|
+
source = TSV::Parser.new source if String === source
|
29
|
+
|
30
|
+
identifiers = identifiers.nil? ? source.identifiers : identifiers
|
31
|
+
|
32
|
+
new_fields = source.fields.dup
|
33
|
+
new_fields[new_fields.index(source_id)] = new_id
|
34
|
+
return source.attach(identifiers, fields: [new_id], insitu: insitu).slice(new_fields)
|
35
|
+
end
|
36
|
+
|
37
|
+
def change_id(*args, **kwargs)
|
38
|
+
TSV.change_id(self, *args, **kwargs)
|
39
|
+
end
|
40
|
+
end
|
data/lib/scout/tsv/dumper.rb
CHANGED
@@ -1,38 +1,14 @@
|
|
1
1
|
module TSV
|
2
2
|
class Dumper
|
3
|
-
def self.header_lines(key_field, fields, entry_hash = nil)
|
4
|
-
if Hash === entry_hash
|
5
|
-
sep = entry_hash[:sep] ? entry_hash[:sep] : "\t"
|
6
|
-
preamble = entry_hash[:preamble]
|
7
|
-
header_hash = entry_hash[:header_hash]
|
8
|
-
end
|
9
|
-
|
10
|
-
header_hash = "#" if header_hash.nil?
|
11
|
-
|
12
|
-
preamble = "#: " << Misc.hash2string(entry_hash.merge(:key_field => nil, :fields => nil)) << "\n" if preamble.nil? and entry_hash and entry_hash.values.compact.any?
|
13
|
-
|
14
|
-
str = ""
|
15
|
-
str << preamble.strip << "\n" if preamble and not preamble.empty?
|
16
|
-
if fields
|
17
|
-
if fields.empty?
|
18
|
-
str << header_hash << (key_field || "ID").to_s << "\n"
|
19
|
-
else
|
20
|
-
str << header_hash << (key_field || "ID").to_s << sep << (fields * sep) << "\n"
|
21
|
-
end
|
22
|
-
end
|
23
|
-
|
24
|
-
str
|
25
|
-
end
|
26
|
-
|
27
3
|
def self.header(options={})
|
28
|
-
key_field, fields, sep, header_hash, preamble = IndiferentHash.process_options options,
|
29
|
-
:key_field, :fields, :sep, :header_hash, :preamble,
|
4
|
+
key_field, fields, sep, header_hash, preamble, unnamed = IndiferentHash.process_options options,
|
5
|
+
:key_field, :fields, :sep, :header_hash, :preamble, :unnamed,
|
30
6
|
:sep => "\t", :header_hash => "#", :preamble => true
|
31
7
|
|
32
|
-
if fields.nil?
|
8
|
+
if fields.nil?
|
33
9
|
fields_str = nil
|
34
10
|
else
|
35
|
-
fields_str = "#{header_hash}#{key_field}#{sep}#{fields*sep}"
|
11
|
+
fields_str = "#{header_hash}#{key_field || "Id"}#{sep}#{fields*sep}"
|
36
12
|
end
|
37
13
|
|
38
14
|
if preamble && options.values.compact.any?
|
@@ -45,31 +21,62 @@ module TSV
|
|
45
21
|
end
|
46
22
|
|
47
23
|
|
48
|
-
attr_accessor :options
|
24
|
+
attr_accessor :options, :initialized, :type, :sep
|
49
25
|
def initialize(options = {})
|
26
|
+
options = options.options.merge(sep: nil) if TSV::Parser === options || TSV === options
|
50
27
|
@sep, @type = IndiferentHash.process_options options,
|
51
28
|
:sep, :type,
|
52
29
|
:sep => "\t", :type => :double
|
53
30
|
@options = options
|
54
31
|
@sout, @sin = Open.pipe
|
32
|
+
@initialized = false
|
33
|
+
@mutex = Mutex.new
|
55
34
|
ConcurrentStream.setup(@sin, pair: @sout)
|
56
35
|
ConcurrentStream.setup(@sout, pair: @sin)
|
57
36
|
end
|
58
37
|
|
59
|
-
def
|
60
|
-
|
61
|
-
|
38
|
+
def key_field
|
39
|
+
@options[:key_field]
|
40
|
+
end
|
41
|
+
|
42
|
+
def fields
|
43
|
+
@options[:fields]
|
62
44
|
end
|
63
45
|
|
64
|
-
def
|
46
|
+
def key_field=(key_field)
|
47
|
+
@options[:key_field] = key_field
|
48
|
+
end
|
49
|
+
|
50
|
+
def fields=(fields)
|
51
|
+
@options[:fields] = fields
|
52
|
+
end
|
53
|
+
|
54
|
+
def all_fields
|
55
|
+
return nil if fields.nil?
|
56
|
+
[key_field] + fields
|
57
|
+
end
|
65
58
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
@
|
71
|
-
|
72
|
-
|
59
|
+
|
60
|
+
def init(preamble: true)
|
61
|
+
header = Dumper.header(@options.merge(type: @type, sep: @sep, preamble: preamble))
|
62
|
+
@mutex.synchronize do
|
63
|
+
@initialized = true
|
64
|
+
@sin.puts header if header and ! header.empty?
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def add(key, value)
|
69
|
+
@mutex.synchronize do
|
70
|
+
|
71
|
+
key = key.to_s unless String === key
|
72
|
+
case @type
|
73
|
+
when :single
|
74
|
+
@sin.puts key + @sep + value.to_s
|
75
|
+
when :list, :flat
|
76
|
+
@sin.puts key + @sep + value * @sep
|
77
|
+
when :double
|
78
|
+
@sin.puts key + @sep + value.collect{|v| Array === v ? v * "|" : v } * @sep
|
79
|
+
end
|
73
80
|
end
|
74
81
|
end
|
75
82
|
|
@@ -85,15 +92,32 @@ module TSV
|
|
85
92
|
def abort(exception=nil)
|
86
93
|
@sin.abort(exception)
|
87
94
|
end
|
95
|
+
|
96
|
+
def tsv(*args)
|
97
|
+
TSV.open(stream, *args)
|
98
|
+
end
|
99
|
+
|
100
|
+
def fingerprint
|
101
|
+
"Dumper:{"<< Log.fingerprint(self.all_fields|| []) << "}"
|
102
|
+
end
|
103
|
+
|
104
|
+
def digest_str
|
105
|
+
fingerprint
|
106
|
+
end
|
107
|
+
|
108
|
+
def inspect
|
109
|
+
fingerprint
|
110
|
+
end
|
88
111
|
end
|
89
112
|
|
90
|
-
def
|
91
|
-
|
92
|
-
dumper.
|
113
|
+
def dumper_stream(options = {})
|
114
|
+
preamble = IndiferentHash.process_options options, :preamble, :preamble => true
|
115
|
+
dumper = TSV::Dumper.new self.extension_attr_hash.merge(options)
|
93
116
|
t = Thread.new do
|
94
117
|
begin
|
95
118
|
Thread.current.report_on_exception = true
|
96
119
|
Thread.current["name"] = "Dumper thread"
|
120
|
+
dumper.init(preamble: preamble)
|
97
121
|
self.each do |k,v|
|
98
122
|
dumper.add k, v
|
99
123
|
end
|
@@ -103,10 +127,12 @@ module TSV
|
|
103
127
|
end
|
104
128
|
end
|
105
129
|
Thread.pass until t["name"]
|
106
|
-
dumper.stream
|
130
|
+
s = dumper.stream
|
131
|
+
ConcurrentStream.setup(s, :threads => [t])
|
132
|
+
s
|
107
133
|
end
|
108
134
|
|
109
|
-
def to_s
|
110
|
-
|
135
|
+
def to_s(options = {})
|
136
|
+
dumper_stream(options).read
|
111
137
|
end
|
112
138
|
end
|
data/lib/scout/tsv/index.rb
CHANGED
@@ -1,13 +1,16 @@
|
|
1
1
|
require_relative 'parser'
|
2
|
+
require_relative 'transformer'
|
2
3
|
require_relative 'persist/fix_width_table'
|
3
4
|
module TSV
|
4
|
-
def self.index(tsv_file, target: 0, fields: nil, order: true, **kwargs)
|
5
|
-
persist, type = IndiferentHash.process_options kwargs,
|
6
|
-
:persist, :persist_type,
|
5
|
+
def self.index(tsv_file, target: 0, fields: nil, order: true, bar: nil, **kwargs)
|
6
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
7
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
7
8
|
:persist => false, :persist_type => "HDB"
|
8
9
|
kwargs.delete :type
|
9
10
|
|
10
|
-
|
11
|
+
fields = :all if fields.nil?
|
12
|
+
|
13
|
+
Persist.persist(tsv_file, type, kwargs.merge(target: target, fields: fields, persist: persist, update: persist_update, :prefix => "Index", :other_options => kwargs)) do |filename|
|
11
14
|
if filename
|
12
15
|
index = ScoutCabinet.open(filename, true, type)
|
13
16
|
TSV.setup(index, :type => :single)
|
@@ -16,11 +19,17 @@ module TSV
|
|
16
19
|
index = TSV.setup({}, :type => :single)
|
17
20
|
end
|
18
21
|
|
19
|
-
|
22
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
23
|
+
|
24
|
+
bar = "Index #{Log.fingerprint tsv_file} target #{Log.fingerprint target}" if TrueClass === bar
|
25
|
+
|
20
26
|
if order
|
21
27
|
tmp_index = {}
|
22
|
-
|
28
|
+
include_self = fields == :all || (Array === fields) && fields.include?(target)
|
29
|
+
target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :double, unnamed: true, bar: bar, **kwargs do |k,values|
|
30
|
+
tmp_index[k] ||= [[k]] if include_self
|
23
31
|
values.each_with_index do |list,i|
|
32
|
+
i += 1 if include_self
|
24
33
|
list.each do |e|
|
25
34
|
tmp_index[e] ||= []
|
26
35
|
tmp_index[e][i] ||= []
|
@@ -31,16 +40,24 @@ module TSV
|
|
31
40
|
tmp_index.each do |e,list|
|
32
41
|
index[e] = list.flatten.compact.uniq.first
|
33
42
|
end
|
43
|
+
|
44
|
+
index.key_field = source_field_names * ","
|
45
|
+
index.fields = [target_key_field]
|
46
|
+
|
47
|
+
tmp_index = {}
|
48
|
+
|
34
49
|
else
|
35
|
-
|
50
|
+
target_key_field, source_field_names = Open.traverse tsv_file, key_field: target, fields: fields, type: :flat, unnamed: true, bar: bar, **kwargs do |k,values|
|
36
51
|
values.each do |e|
|
37
52
|
index[e] = k unless index.include?(e)
|
38
53
|
end
|
39
54
|
end
|
55
|
+
|
56
|
+
index.key_field = source_field_names * ","
|
57
|
+
index.fields = [target_key_field]
|
40
58
|
end
|
41
59
|
|
42
|
-
|
43
|
-
index.fields = [dummy_data.key_field]
|
60
|
+
|
44
61
|
index
|
45
62
|
end
|
46
63
|
end
|
@@ -50,16 +67,18 @@ module TSV
|
|
50
67
|
end
|
51
68
|
|
52
69
|
def self.range_index(tsv_file, start_field = nil, end_field = nil, key_field: :key, **kwargs)
|
53
|
-
persist, type = IndiferentHash.process_options kwargs,
|
54
|
-
:persist, :persist_type,
|
70
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
71
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
55
72
|
:persist => false, :persist_type => :fwt
|
56
73
|
kwargs.delete :type
|
57
74
|
|
58
|
-
Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :
|
75
|
+
Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, :prefix => "RangeIndex", :other_options => kwargs, update: persist_update)) do |filename|
|
76
|
+
|
77
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
59
78
|
|
60
79
|
max_key_size = 0
|
61
80
|
index_data = []
|
62
|
-
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field] do |key, values|
|
81
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [start_field, end_field], **kwargs do |key, values|
|
63
82
|
key_size = key.length
|
64
83
|
max_key_size = key_size if key_size > max_key_size
|
65
84
|
|
@@ -85,6 +104,43 @@ module TSV
|
|
85
104
|
TSV.range_index(self, *args, **kwargs, &block)
|
86
105
|
end
|
87
106
|
|
107
|
+
def self.pos_index(tsv_file, pos_field = nil, key_field: :key, **kwargs)
|
108
|
+
persist, type, persist_update, data_persist = IndiferentHash.process_options kwargs,
|
109
|
+
:persist, :persist_type, :persist_update, :data_persist,
|
110
|
+
:persist => false, :persist_type => :fwt
|
111
|
+
kwargs.delete :type
|
112
|
+
|
113
|
+
Persist.persist(tsv_file, type, kwargs.merge(:persist => persist, update: persist_update, :prefix => "RangeIndex", :other_options => kwargs)) do |filename|
|
114
|
+
|
115
|
+
tsv_file = TSV.open(tsv_file, persist: true) if data_persist && ! TSV === tsv_file
|
116
|
+
|
117
|
+
max_key_size = 0
|
118
|
+
index_data = []
|
119
|
+
TSV.traverse tsv_file, key_field: key_field, fields: [pos_field], type: :single, cast: :to_i, **kwargs do |key, pos|
|
120
|
+
key_size = key.length
|
121
|
+
max_key_size = key_size if key_size > max_key_size
|
122
|
+
|
123
|
+
if Array === pos
|
124
|
+
pos.zip(end_pos).each do |p|
|
125
|
+
index_pos << [key, p]
|
126
|
+
end
|
127
|
+
else
|
128
|
+
index_data << [key, pos]
|
129
|
+
end
|
130
|
+
end
|
131
|
+
|
132
|
+
filename = :memory if filename.nil?
|
133
|
+
index = FixWidthTable.get(filename, max_key_size, false)
|
134
|
+
index.add_point index_data
|
135
|
+
index.read
|
136
|
+
index
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def pos_index(*args, **kwargs, &block)
|
141
|
+
TSV.pos_index(self, *args, **kwargs, &block)
|
142
|
+
end
|
143
|
+
|
88
144
|
|
89
145
|
#def range_index(start_field = nil, end_field = nil, options = {})
|
90
146
|
# start_field ||= "Start"
|