rbbt-util 5.44.1 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/bin/rbbt +67 -90
- data/bin/rbbt_exec.rb +2 -2
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/filecache.rb +1 -1
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +2 -2
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +92 -105
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
- data/lib/rbbt/workflow/refactor.rb +150 -0
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +96 -4
- data/python/rbbt/workflow/remote.py +104 -0
- data/python/rbbt/workflow.py +64 -0
- data/python/test.py +10 -0
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/retry +43 -0
- data/share/rbbt_commands/workflow/server +12 -2
- data/share/rbbt_commands/workflow/task +80 -73
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +45 -6
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,678 +1,678 @@
|
|
1
|
-
require 'rbbt/util/cmd'
|
2
|
-
module TSV
|
3
|
-
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
|
5
|
-
|
6
|
-
class SKIP_LINE < Exception; end
|
7
|
-
class END_PARSING < Exception; end
|
8
|
-
|
9
|
-
def all_fields
|
10
|
-
all = [key_field] + fields
|
11
|
-
# ToDo: What was this for?
|
12
|
-
#NamedArray.setup all, all
|
13
|
-
all
|
14
|
-
end
|
15
|
-
|
16
|
-
def parse_header(stream)
|
17
|
-
raise "Closed stream" if IO === stream && stream.closed?
|
18
|
-
|
19
|
-
options = {}
|
20
|
-
@preamble = []
|
21
|
-
|
22
|
-
# Get line
|
23
|
-
|
24
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
25
|
-
line = stream.gets
|
26
|
-
return {} if line.nil?
|
27
|
-
#raise "Empty content: #{ stream.inspect }" if line.nil?
|
28
|
-
line = Misc.fixutf8 line.chomp
|
29
|
-
|
30
|
-
# Process options line
|
31
|
-
|
32
|
-
if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
|
33
|
-
options = Misc.string2hash $1.chomp
|
34
|
-
line = stream.gets
|
35
|
-
line = Misc.fixutf8 line.chomp if line
|
36
|
-
end
|
37
|
-
|
38
|
-
# Determine separator
|
39
|
-
|
40
|
-
@sep = options[:sep] if options[:sep]
|
41
|
-
|
42
|
-
# Process fields line
|
43
|
-
|
44
|
-
preamble << line if line
|
45
|
-
while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
|
46
|
-
@fields = line.split(@sep, -1)
|
47
|
-
@key_field = @fields.shift
|
48
|
-
@key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
|
49
|
-
|
50
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
51
|
-
line = (@header_hash != "" ? stream.gets : nil)
|
52
|
-
line = Misc.fixutf8 line.chomp if line
|
53
|
-
preamble << line if line
|
54
|
-
@header_hash = false if TrueClass === @header_hash || @header_hash == ""
|
55
|
-
end
|
56
|
-
|
57
|
-
@preamble = preamble[0..-3] * "\n"
|
58
|
-
|
59
|
-
line ||= stream.gets
|
60
|
-
|
61
|
-
@first_line = line
|
62
|
-
|
63
|
-
options
|
64
|
-
end
|
65
|
-
|
66
|
-
def process(line)
|
67
|
-
l = line.chomp
|
68
|
-
raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
|
69
|
-
l = @fix.call l if Proc === @fix
|
70
|
-
raise END_PARSING unless l
|
71
|
-
l
|
72
|
-
end
|
73
|
-
|
74
|
-
def cast?
|
75
|
-
!! @cast
|
76
|
-
end
|
77
|
-
|
78
|
-
def chop_line(line)
|
79
|
-
@sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
80
|
-
end
|
81
|
-
|
82
|
-
def get_values_single_from_flat(parts)
|
83
|
-
return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
84
|
-
if key_position == 0
|
85
|
-
[parts.shift, parts.first]
|
86
|
-
else
|
87
|
-
key = parts.shift
|
88
|
-
[parts, key]
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
def get_values_double_from_flat(parts)
|
94
|
-
return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
|
95
|
-
if key_position == 0
|
96
|
-
[parts.shift, [parts.flatten]]
|
97
|
-
else
|
98
|
-
value = parts.shift
|
99
|
-
keys = parts.flatten
|
100
|
-
[keys, [[value]]]
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def get_values_single(parts)
|
106
|
-
return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
107
|
-
key = parts[key_position]
|
108
|
-
value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
|
109
|
-
[key, value]
|
110
|
-
end
|
111
|
-
|
112
|
-
def get_values_list(parts)
|
113
|
-
return parts.shift, parts if field_positions.nil? and key_position.nil?
|
114
|
-
key = parts[key_position]
|
115
|
-
|
116
|
-
values = case
|
117
|
-
when field_positions.nil?
|
118
|
-
parts.tap{|o| o.delete_at key_position}
|
119
|
-
when field_positions.empty?
|
120
|
-
[]
|
121
|
-
else
|
122
|
-
parts.values_at *field_positions
|
123
|
-
end
|
124
|
-
|
125
|
-
[key, values]
|
126
|
-
end
|
127
|
-
|
128
|
-
def get_values_double(parts)
|
129
|
-
return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
|
130
|
-
keys = parts[key_position].split(@sep2, -1)
|
131
|
-
values = case
|
132
|
-
when field_positions.nil?
|
133
|
-
parts.tap{|o| o.delete_at key_position}
|
134
|
-
when field_positions.empty?
|
135
|
-
[]
|
136
|
-
else
|
137
|
-
parts.values_at *field_positions
|
138
|
-
end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
|
139
|
-
[keys, values]
|
140
|
-
end
|
141
|
-
|
142
|
-
def get_values_flat_inverse(parts)
|
143
|
-
value = parts.shift
|
144
|
-
keys = parts
|
145
|
-
[keys, [value]]
|
146
|
-
end
|
147
|
-
|
148
|
-
|
149
|
-
def get_values_flat_merge(parts)
|
150
|
-
begin
|
151
|
-
orig = parts
|
152
|
-
|
153
|
-
if key_position and key_position != 0 and field_positions.nil?
|
154
|
-
value = parts.shift.split(@sep2, -1)
|
155
|
-
keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
|
156
|
-
return [keys, value]
|
157
|
-
end
|
158
|
-
|
159
|
-
return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
|
160
|
-
field_positions.nil? and (key_position.nil? or key_position == 0)
|
161
|
-
rescue
|
162
|
-
raise $!
|
163
|
-
end
|
164
|
-
|
165
|
-
if key_position and key_position != 0 and @header_options[:type] == :flat
|
166
|
-
keys = parts[1..-1]
|
167
|
-
else
|
168
|
-
str = parts[key_position]
|
169
|
-
keys = str.split(@sep2, -1)
|
170
|
-
end
|
171
|
-
|
172
|
-
if @take_all
|
173
|
-
values = parts.collect{|e| e.split(@sep2, -1) }.flatten
|
174
|
-
else
|
175
|
-
if field_positions.nil?
|
176
|
-
parts.delete_at key_position
|
177
|
-
values = parts.first
|
178
|
-
else
|
179
|
-
values = parts[field_positions.first]
|
180
|
-
end
|
181
|
-
|
182
|
-
values = values.split(@sep2, -1)
|
183
|
-
end
|
184
|
-
|
185
|
-
[keys, values]
|
186
|
-
end
|
187
|
-
|
188
|
-
def get_values_flat(parts)
|
189
|
-
keys, values = get_values_flat_merge(parts)
|
190
|
-
[keys.first, values]
|
191
|
-
end
|
192
|
-
|
193
|
-
|
194
|
-
def add_to_data_no_merge_list(data, key, values)
|
195
|
-
data[key] = values unless data.include? key
|
196
|
-
nil
|
197
|
-
end
|
198
|
-
|
199
|
-
def add_to_data_flat_keys(data, key, values)
|
200
|
-
data[key] = values unless data.include? key
|
201
|
-
nil
|
202
|
-
end
|
203
|
-
|
204
|
-
def add_to_data_flat(data, key, values)
|
205
|
-
data[key] = values unless data.include? key
|
206
|
-
nil
|
207
|
-
end
|
208
|
-
|
209
|
-
def add_to_data_flat_merge(data, key, values)
|
210
|
-
if data.include? key
|
211
|
-
data[key] = data[key].concat values
|
212
|
-
else
|
213
|
-
data[key] = values
|
214
|
-
end
|
215
|
-
nil
|
216
|
-
end
|
217
|
-
|
218
|
-
def add_to_data_flat_merge_double(data, keys, values)
|
219
|
-
data.write
|
220
|
-
keys.each do |key|
|
221
|
-
if data.include? key
|
222
|
-
data[key] = data[key].concat values
|
223
|
-
else
|
224
|
-
data[key] = values
|
225
|
-
end
|
226
|
-
end
|
227
|
-
nil
|
228
|
-
end
|
229
|
-
|
230
|
-
def add_to_data_flat_merge_keys(data, keys, values)
|
231
|
-
keys.each do |key|
|
232
|
-
if data.include? key
|
233
|
-
data[key] = data[key].concat values
|
234
|
-
else
|
235
|
-
data[key] = values.dup
|
236
|
-
end
|
237
|
-
end
|
238
|
-
nil
|
239
|
-
end
|
240
|
-
|
241
|
-
def add_to_data_no_merge_double(data, keys, values)
|
242
|
-
keys = [keys] unless Array === keys
|
243
|
-
keys.each do |key|
|
244
|
-
next if data.include? key
|
245
|
-
data[key] = values
|
246
|
-
end
|
247
|
-
nil
|
248
|
-
end
|
249
|
-
|
250
|
-
def add_to_data_merge(data, keys, values)
|
251
|
-
keys.uniq.each do |key|
|
252
|
-
if data.include? key
|
253
|
-
new = data[key]
|
254
|
-
new.each_with_index do |old, i|
|
255
|
-
next if values[i].nil?
|
256
|
-
if old.nil?
|
257
|
-
new[i] = values[i]
|
258
|
-
else
|
259
|
-
old.concat values[i]
|
260
|
-
end
|
261
|
-
end
|
262
|
-
data[key] = new
|
263
|
-
else
|
264
|
-
data[key] = values
|
265
|
-
end
|
266
|
-
end
|
267
|
-
nil
|
268
|
-
end
|
269
|
-
|
270
|
-
def add_to_data_merge_zipped(data, keys, values)
|
271
|
-
keys = [keys] unless Array === keys
|
272
|
-
num = keys.length
|
273
|
-
|
274
|
-
values = values.collect do |v|
|
275
|
-
(v.nil? || v.empty?) ? [""] : v
|
276
|
-
end
|
277
|
-
|
278
|
-
if values.first.length > 1 and num == 1
|
279
|
-
keys = keys * values.first.length
|
280
|
-
num = keys.length
|
281
|
-
end
|
282
|
-
|
283
|
-
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
284
|
-
|
285
|
-
all = values
|
286
|
-
all.unshift keys
|
287
|
-
Misc.zip_fields(all).each do |vs|
|
288
|
-
key = vs.shift
|
289
|
-
if data.include? key
|
290
|
-
data[key] = data[key].zip(vs).collect do |old, new|
|
291
|
-
old + [new]
|
292
|
-
end
|
293
|
-
else
|
294
|
-
data[key] = vs.collect{|v| [v] }
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
nil
|
299
|
-
end
|
300
|
-
|
301
|
-
def add_to_data_zipped(data, keys, values)
|
302
|
-
num = keys.length
|
303
|
-
|
304
|
-
if values.first.length > 1 and num == 1
|
305
|
-
keys = keys * values.first.length
|
306
|
-
num = keys.length
|
307
|
-
end
|
308
|
-
|
309
|
-
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
310
|
-
all = values.unshift keys
|
311
|
-
Misc.zip_fields(all).each do |values|
|
312
|
-
key = values.shift
|
313
|
-
next if data.include? key
|
314
|
-
data[key] = values.collect{|v| [v]}
|
315
|
-
end
|
316
|
-
nil
|
317
|
-
end
|
318
|
-
|
319
|
-
|
320
|
-
def cast_values_single(value)
|
321
|
-
case
|
322
|
-
when (value.nil? or value.empty?)
|
323
|
-
nil
|
324
|
-
when Symbol === cast
|
325
|
-
value.send(cast)
|
326
|
-
when Proc === cast
|
327
|
-
cast.call value
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
def cast_values_list(values)
|
332
|
-
case
|
333
|
-
when Symbol === cast
|
334
|
-
values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
335
|
-
when Proc === cast
|
336
|
-
values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
def cast_values_flat(values)
|
341
|
-
case
|
342
|
-
when Symbol === cast
|
343
|
-
values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
344
|
-
when Proc === cast
|
345
|
-
values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
def cast_values_double(values)
|
350
|
-
case
|
351
|
-
when Symbol === cast
|
352
|
-
values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
|
353
|
-
when Proc === cast
|
354
|
-
values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
def rescue_first_line
|
359
|
-
@first_line
|
360
|
-
end
|
361
|
-
|
362
|
-
def fix_fields(options)
|
363
|
-
key_field = Misc.process_options options, :key_field
|
364
|
-
fields = Misc.process_options options, :fields
|
365
|
-
|
366
|
-
if (key_field.nil? or key_field == 0 or key_field == :key) and
|
367
|
-
(fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
368
|
-
|
369
|
-
@straight = true
|
370
|
-
return
|
371
|
-
else
|
372
|
-
@straight = false
|
373
|
-
|
374
|
-
case
|
375
|
-
when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
|
376
|
-
@key_position = 0
|
377
|
-
when Integer === key_field
|
378
|
-
@key_position = key_field
|
379
|
-
when String === key_field
|
380
|
-
@key_position = @fields.dup.unshift(@key_field).index key_field
|
381
|
-
raise "Key field #{ key_field } was not found" if @key_position.nil?
|
382
|
-
when :key == key_field
|
383
|
-
@key_position = 0
|
384
|
-
else
|
385
|
-
raise "Format of key_field not understood: #{key_field.inspect}"
|
386
|
-
end
|
387
|
-
|
388
|
-
if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
389
|
-
if not @fields.nil? and type != :flat
|
390
|
-
@field_positions = (0..@fields.length).to_a
|
391
|
-
@field_positions.delete @key_position
|
392
|
-
end
|
393
|
-
else
|
394
|
-
fields = [fields] if not Array === fields
|
395
|
-
@field_positions = fields.collect{|field|
|
396
|
-
case
|
397
|
-
when Integer === field
|
398
|
-
field
|
399
|
-
when String === field
|
400
|
-
pos = @fields.dup.unshift(@key_field).index field
|
401
|
-
raise "Field not identified: #{ field }" if pos.nil?
|
402
|
-
pos
|
403
|
-
else
|
404
|
-
raise "Format of fields not understood: #{field.inspect}"
|
405
|
-
end
|
406
|
-
}
|
407
|
-
end
|
408
|
-
|
409
|
-
new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
|
410
|
-
@fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
|
411
|
-
@fields ||= fields if Array === fields and String === fields.first
|
412
|
-
@fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
|
413
|
-
@key_field = new_key_field
|
414
|
-
@key_field ||= key_field if String === key_field
|
415
|
-
|
416
|
-
end
|
417
|
-
end
|
418
|
-
|
419
|
-
def initialize(stream = nil, options = {})
|
420
|
-
@header_hash = Misc.process_options(options, :header_hash) || "#"
|
421
|
-
@sep = Misc.process_options(options, :sep) || "\t"
|
422
|
-
@tsv_grep = Misc.process_options(options, :tsv_grep)
|
423
|
-
stream = TSV.get_stream stream
|
424
|
-
@stream = stream
|
425
|
-
|
426
|
-
|
427
|
-
@header_options = parse_header(stream)
|
428
|
-
|
429
|
-
options = @header_options.merge options
|
430
|
-
options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
|
431
|
-
|
432
|
-
@type ||= Misc.process_options(options, :type) || :double
|
433
|
-
@type ||= :double
|
434
|
-
|
435
|
-
@identifiers = Misc.process_options(options, :identifiers)
|
436
|
-
|
437
|
-
@filename = Misc.process_options(options, :filename)
|
438
|
-
@filename ||= stream.filename if stream.respond_to? :filename
|
439
|
-
|
440
|
-
@sep2 = Misc.process_options(options, :sep2) || "|"
|
441
|
-
@cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
442
|
-
@type ||= Misc.process_options options, :type
|
443
|
-
@fix = Misc.process_options(options, :fix)
|
444
|
-
@select= Misc.process_options options, :select
|
445
|
-
@zipped = Misc.process_options options, :zipped
|
446
|
-
@namespace = Misc.process_options options, :namespace
|
447
|
-
merge = Misc.process_options(options, :merge)
|
448
|
-
merge = @zipped if merge.nil?
|
449
|
-
merge = false if merge.nil?
|
450
|
-
|
451
|
-
fields = options[:fields]
|
452
|
-
fix_fields(options)
|
453
|
-
|
454
|
-
@type = @type.strip.to_sym if String === @type
|
455
|
-
#@type ||= :double if merge == true
|
456
|
-
|
457
|
-
case @type
|
458
|
-
when :double
|
459
|
-
if @header_options[:type] == :flat
|
460
|
-
self.instance_eval do alias get_values get_values_double_from_flat end
|
461
|
-
else
|
462
|
-
self.instance_eval do alias get_values get_values_double end
|
463
|
-
end
|
464
|
-
self.instance_eval do alias cast_values cast_values_double end
|
465
|
-
case
|
466
|
-
when (merge and not zipped)
|
467
|
-
self.instance_eval do alias add_to_data add_to_data_merge end
|
468
|
-
when (merge and zipped)
|
469
|
-
self.instance_eval do alias add_to_data add_to_data_merge_zipped end
|
470
|
-
when zipped
|
471
|
-
self.instance_eval do alias add_to_data add_to_data_zipped end
|
472
|
-
else
|
473
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
474
|
-
end
|
475
|
-
when :single
|
476
|
-
if @header_options[:type] == :flat
|
477
|
-
self.instance_eval do alias get_values get_values_single_from_flat end
|
478
|
-
self.instance_eval do alias cast_values cast_values_single end
|
479
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
480
|
-
else
|
481
|
-
self.instance_eval do alias get_values get_values_single end
|
482
|
-
self.instance_eval do alias cast_values cast_values_single end
|
483
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
484
|
-
end
|
485
|
-
when :list
|
486
|
-
self.instance_eval do alias get_values get_values_list end
|
487
|
-
self.instance_eval do alias cast_values cast_values_list end
|
488
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
489
|
-
|
490
|
-
when :flat
|
491
|
-
@take_all = true if field_positions.nil?
|
492
|
-
self.instance_eval do alias cast_values cast_values_flat end
|
493
|
-
merge = true if key_position and key_position != 0 and field_positions.nil?
|
494
|
-
if merge
|
495
|
-
self.instance_eval do alias get_values get_values_flat_merge end
|
496
|
-
if key_position and key_position != 0 and field_positions.nil?
|
497
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
498
|
-
else
|
499
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
500
|
-
end
|
501
|
-
else
|
502
|
-
self.instance_eval do alias get_values get_values_flat_merge end
|
503
|
-
if key_position and key_position != 0 and field_positions.nil?
|
504
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
505
|
-
else
|
506
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
507
|
-
end
|
508
|
-
end
|
509
|
-
else
|
510
|
-
raise "Unknown TSV type: #{@type.inspect}"
|
511
|
-
end
|
512
|
-
|
513
|
-
@straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
|
514
|
-
end
|
515
|
-
|
516
|
-
def setup(data)
|
517
|
-
data.extend TSV unless TSV === data
|
518
|
-
data.type = @type
|
519
|
-
data.key_field = @key_field
|
520
|
-
data.fields = @fields.nil? ? nil : @fields.dup
|
521
|
-
data.namespace = @namespace
|
522
|
-
data.filename = @filename
|
523
|
-
data.identifiers = @identifiers
|
524
|
-
data.cast = @cast if Symbol === @cast
|
525
|
-
data
|
526
|
-
end
|
527
|
-
|
528
|
-
def annotate(data)
|
529
|
-
setup(data)
|
530
|
-
end
|
531
|
-
|
532
|
-
def options
|
533
|
-
options = {}
|
534
|
-
TSV::ENTRIES.each do |entry|
|
535
|
-
if self.respond_to? entry
|
536
|
-
value = self.send(entry)
|
537
|
-
options[entry.to_sym] = value unless value.nil?
|
538
|
-
end
|
539
|
-
end
|
540
|
-
options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
541
|
-
IndiferentHash.setup options
|
542
|
-
end
|
543
|
-
|
544
|
-
def traverse(options = {})
|
545
|
-
monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
|
546
|
-
monitor = bar if bar and monitor.nil?
|
547
|
-
raise "No block given in TSV::Parser#traverse" unless block_given?
|
548
|
-
|
549
|
-
stream = @stream
|
550
|
-
|
551
|
-
|
552
|
-
# first line
|
553
|
-
line = self.rescue_first_line
|
554
|
-
line = stream.gets if line.nil?
|
555
|
-
|
556
|
-
if @tsv_grep || grep
|
557
|
-
|
558
|
-
stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
|
559
|
-
stream.no_fail = true
|
560
|
-
begin
|
561
|
-
match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
|
562
|
-
line = stream.gets if match.empty?
|
563
|
-
rescue Exception
|
564
|
-
Log.exception $!
|
565
|
-
line = stream.gets
|
566
|
-
end
|
567
|
-
end
|
568
|
-
|
569
|
-
progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
570
|
-
|
571
|
-
# setup monitor
|
572
|
-
if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
|
573
|
-
size = case
|
574
|
-
when stream.respond_to?(:size)
|
575
|
-
stream.size
|
576
|
-
else
|
577
|
-
stream.stat.size
|
578
|
-
end
|
579
|
-
size = nil if size.to_i == 0
|
580
|
-
desc = "Parsing Stream"
|
581
|
-
step = 100
|
582
|
-
if Hash === monitor
|
583
|
-
desc = monitor[:desc] if monitor.include? :desc
|
584
|
-
step = monitor[:step] if monitor.include? :step
|
585
|
-
end
|
586
|
-
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
587
|
-
elsif progress_monitor
|
588
|
-
|
589
|
-
size = case
|
590
|
-
when stream.respond_to?(:size)
|
591
|
-
stream.size
|
592
|
-
else
|
593
|
-
stream.stat.size
|
594
|
-
end
|
595
|
-
|
596
|
-
progress_monitor.bytes = true
|
597
|
-
progress_monitor.max = size unless size.to_i == 0
|
598
|
-
elsif monitor
|
599
|
-
desc = "Parsing Stream"
|
600
|
-
step = 100
|
601
|
-
size = nil
|
602
|
-
if Hash === monitor
|
603
|
-
desc = monitor[:desc] if monitor.include? :desc
|
604
|
-
step = monitor[:step] if monitor.include? :step
|
605
|
-
end
|
606
|
-
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
607
|
-
end
|
608
|
-
|
609
|
-
# parser
|
610
|
-
line_num = 1
|
611
|
-
begin
|
612
|
-
|
613
|
-
while not line.nil?
|
614
|
-
begin
|
615
|
-
if progress_monitor
|
616
|
-
progress_monitor.tick(line.bytesize)
|
617
|
-
end
|
618
|
-
|
619
|
-
raise SKIP_LINE if line.empty?
|
620
|
-
|
621
|
-
line = Misc.fixutf8(line)
|
622
|
-
line = self.process line
|
623
|
-
raise SKIP_LINE if line.empty?
|
624
|
-
parts = self.chop_line line
|
625
|
-
key, values = self.get_values parts
|
626
|
-
values = self.cast_values values if self.cast?
|
627
|
-
|
628
|
-
yield key, values, fields
|
629
|
-
|
630
|
-
line = stream.gets
|
631
|
-
|
632
|
-
line_num += 1
|
633
|
-
raise END_PARSING if head and line_num > head.to_i
|
634
|
-
rescue SKIP_LINE
|
635
|
-
begin
|
636
|
-
line = stream.gets
|
637
|
-
next
|
638
|
-
rescue IOError
|
639
|
-
break
|
640
|
-
end
|
641
|
-
rescue END_PARSING
|
642
|
-
stream.close unless stream.closed?
|
643
|
-
begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
644
|
-
break
|
645
|
-
rescue Errno::EPIPE
|
646
|
-
Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
647
|
-
stream.abort if stream.respond_to? :abort
|
648
|
-
raise $!
|
649
|
-
rescue Exception
|
650
|
-
Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
651
|
-
stream.abort $! if stream.respond_to? :abort
|
652
|
-
raise $!
|
653
|
-
end
|
654
|
-
end
|
655
|
-
ensure
|
656
|
-
Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
657
|
-
stream.close unless stream.closed?
|
658
|
-
stream.join if stream.respond_to? :join and not stream.joined?
|
659
|
-
end
|
660
|
-
|
661
|
-
self
|
662
|
-
end
|
663
|
-
|
664
|
-
def identify_field(field)
|
665
|
-
TSV.identify_field(key_field, fields, field)
|
666
|
-
end
|
667
|
-
|
668
|
-
def rewind
|
669
|
-
stream.reopen(filename, "r") if stream.closed? and filename
|
670
|
-
stream.rewind
|
671
|
-
end
|
672
|
-
|
673
|
-
def self.traverse(stream, options = {}, &block)
|
674
|
-
parser = Parser.new(stream, options)
|
675
|
-
parser.traverse(options, &block)
|
676
|
-
end
|
677
|
-
end
|
678
|
-
end
|
1
|
+
#require 'rbbt/util/cmd'
|
2
|
+
#module TSV
|
3
|
+
# class Parser
|
4
|
+
# attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
|
5
|
+
#
|
6
|
+
# class SKIP_LINE < Exception; end
|
7
|
+
# class END_PARSING < Exception; end
|
8
|
+
#
|
9
|
+
# def all_fields
|
10
|
+
# all = [key_field] + fields
|
11
|
+
# # ToDo: What was this for?
|
12
|
+
# #NamedArray.setup all, all
|
13
|
+
# all
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# def parse_header(stream)
|
17
|
+
# raise "Closed stream" if IO === stream && stream.closed?
|
18
|
+
#
|
19
|
+
# options = {}
|
20
|
+
# @preamble = []
|
21
|
+
#
|
22
|
+
# # Get line
|
23
|
+
#
|
24
|
+
# #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
25
|
+
# line = stream.gets
|
26
|
+
# return {} if line.nil?
|
27
|
+
# #raise "Empty content: #{ stream.inspect }" if line.nil?
|
28
|
+
# line = Misc.fixutf8 line.chomp
|
29
|
+
#
|
30
|
+
# # Process options line
|
31
|
+
#
|
32
|
+
# if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
|
33
|
+
# options = Misc.string2hash $1.chomp
|
34
|
+
# line = stream.gets
|
35
|
+
# line = Misc.fixutf8 line.chomp if line
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# # Determine separator
|
39
|
+
#
|
40
|
+
# @sep = options[:sep] if options[:sep]
|
41
|
+
#
|
42
|
+
# # Process fields line
|
43
|
+
#
|
44
|
+
# preamble << line if line
|
45
|
+
# while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
|
46
|
+
# @fields = line.split(@sep, -1)
|
47
|
+
# @key_field = @fields.shift
|
48
|
+
# @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
|
49
|
+
#
|
50
|
+
# #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
51
|
+
# line = (@header_hash != "" ? stream.gets : nil)
|
52
|
+
# line = Misc.fixutf8 line.chomp if line
|
53
|
+
# preamble << line if line
|
54
|
+
# @header_hash = false if TrueClass === @header_hash || @header_hash == ""
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# @preamble = preamble[0..-3] * "\n"
|
58
|
+
#
|
59
|
+
# line ||= stream.gets
|
60
|
+
#
|
61
|
+
# @first_line = line
|
62
|
+
#
|
63
|
+
# options
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# def process(line)
|
67
|
+
# l = line.chomp
|
68
|
+
# raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
|
69
|
+
# l = @fix.call l if Proc === @fix
|
70
|
+
# raise END_PARSING unless l
|
71
|
+
# l
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# def cast?
|
75
|
+
# !! @cast
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# def chop_line(line)
|
79
|
+
# @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# def get_values_single_from_flat(parts)
|
83
|
+
# return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
84
|
+
# if key_position == 0
|
85
|
+
# [parts.shift, parts.first]
|
86
|
+
# else
|
87
|
+
# key = parts.shift
|
88
|
+
# [parts, key]
|
89
|
+
# end
|
90
|
+
#
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# def get_values_double_from_flat(parts)
|
94
|
+
# return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
|
95
|
+
# if key_position == 0
|
96
|
+
# [parts.shift, [parts.flatten]]
|
97
|
+
# else
|
98
|
+
# value = parts.shift
|
99
|
+
# keys = parts.flatten
|
100
|
+
# [keys, [[value]]]
|
101
|
+
# end
|
102
|
+
#
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# def get_values_single(parts)
|
106
|
+
# return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
107
|
+
# key = parts[key_position]
|
108
|
+
# value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
|
109
|
+
# [key, value]
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# def get_values_list(parts)
|
113
|
+
# return parts.shift, parts if field_positions.nil? and key_position.nil?
|
114
|
+
# key = parts[key_position]
|
115
|
+
#
|
116
|
+
# values = case
|
117
|
+
# when field_positions.nil?
|
118
|
+
# parts.tap{|o| o.delete_at key_position}
|
119
|
+
# when field_positions.empty?
|
120
|
+
# []
|
121
|
+
# else
|
122
|
+
# parts.values_at *field_positions
|
123
|
+
# end
|
124
|
+
#
|
125
|
+
# [key, values]
|
126
|
+
# end
|
127
|
+
#
|
128
|
+
# def get_values_double(parts)
|
129
|
+
# return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
|
130
|
+
# keys = parts[key_position].split(@sep2, -1)
|
131
|
+
# values = case
|
132
|
+
# when field_positions.nil?
|
133
|
+
# parts.tap{|o| o.delete_at key_position}
|
134
|
+
# when field_positions.empty?
|
135
|
+
# []
|
136
|
+
# else
|
137
|
+
# parts.values_at *field_positions
|
138
|
+
# end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
|
139
|
+
# [keys, values]
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
# def get_values_flat_inverse(parts)
|
143
|
+
# value = parts.shift
|
144
|
+
# keys = parts
|
145
|
+
# [keys, [value]]
|
146
|
+
# end
|
147
|
+
#
|
148
|
+
#
|
149
|
+
# def get_values_flat_merge(parts)
|
150
|
+
# begin
|
151
|
+
# orig = parts
|
152
|
+
#
|
153
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
154
|
+
# value = parts.shift.split(@sep2, -1)
|
155
|
+
# keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
|
156
|
+
# return [keys, value]
|
157
|
+
# end
|
158
|
+
#
|
159
|
+
# return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
|
160
|
+
# field_positions.nil? and (key_position.nil? or key_position == 0)
|
161
|
+
# rescue
|
162
|
+
# raise $!
|
163
|
+
# end
|
164
|
+
#
|
165
|
+
# if key_position and key_position != 0 and @header_options[:type] == :flat
|
166
|
+
# keys = parts[1..-1]
|
167
|
+
# else
|
168
|
+
# str = parts[key_position]
|
169
|
+
# keys = str.split(@sep2, -1)
|
170
|
+
# end
|
171
|
+
#
|
172
|
+
# if @take_all
|
173
|
+
# values = parts.collect{|e| e.split(@sep2, -1) }.flatten
|
174
|
+
# else
|
175
|
+
# if field_positions.nil?
|
176
|
+
# parts.delete_at key_position
|
177
|
+
# values = parts.first
|
178
|
+
# else
|
179
|
+
# values = parts[field_positions.first]
|
180
|
+
# end
|
181
|
+
#
|
182
|
+
# values = values.split(@sep2, -1)
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# [keys, values]
|
186
|
+
# end
|
187
|
+
#
|
188
|
+
# def get_values_flat(parts)
|
189
|
+
# keys, values = get_values_flat_merge(parts)
|
190
|
+
# [keys.first, values]
|
191
|
+
# end
|
192
|
+
#
|
193
|
+
#
|
194
|
+
# def add_to_data_no_merge_list(data, key, values)
|
195
|
+
# data[key] = values unless data.include? key
|
196
|
+
# nil
|
197
|
+
# end
|
198
|
+
#
|
199
|
+
# def add_to_data_flat_keys(data, key, values)
|
200
|
+
# data[key] = values unless data.include? key
|
201
|
+
# nil
|
202
|
+
# end
|
203
|
+
#
|
204
|
+
# def add_to_data_flat(data, key, values)
|
205
|
+
# data[key] = values unless data.include? key
|
206
|
+
# nil
|
207
|
+
# end
|
208
|
+
#
|
209
|
+
# def add_to_data_flat_merge(data, key, values)
|
210
|
+
# if data.include? key
|
211
|
+
# data[key] = data[key].concat values
|
212
|
+
# else
|
213
|
+
# data[key] = values
|
214
|
+
# end
|
215
|
+
# nil
|
216
|
+
# end
|
217
|
+
#
|
218
|
+
# def add_to_data_flat_merge_double(data, keys, values)
|
219
|
+
# data.write
|
220
|
+
# keys.each do |key|
|
221
|
+
# if data.include? key
|
222
|
+
# data[key] = data[key].concat values
|
223
|
+
# else
|
224
|
+
# data[key] = values
|
225
|
+
# end
|
226
|
+
# end
|
227
|
+
# nil
|
228
|
+
# end
|
229
|
+
#
|
230
|
+
# def add_to_data_flat_merge_keys(data, keys, values)
|
231
|
+
# keys.each do |key|
|
232
|
+
# if data.include? key
|
233
|
+
# data[key] = data[key].concat values
|
234
|
+
# else
|
235
|
+
# data[key] = values.dup
|
236
|
+
# end
|
237
|
+
# end
|
238
|
+
# nil
|
239
|
+
# end
|
240
|
+
#
|
241
|
+
# def add_to_data_no_merge_double(data, keys, values)
|
242
|
+
# keys = [keys] unless Array === keys
|
243
|
+
# keys.each do |key|
|
244
|
+
# next if data.include? key
|
245
|
+
# data[key] = values
|
246
|
+
# end
|
247
|
+
# nil
|
248
|
+
# end
|
249
|
+
#
|
250
|
+
# def add_to_data_merge(data, keys, values)
|
251
|
+
# keys.uniq.each do |key|
|
252
|
+
# if data.include? key
|
253
|
+
# new = data[key]
|
254
|
+
# new.each_with_index do |old, i|
|
255
|
+
# next if values[i].nil?
|
256
|
+
# if old.nil?
|
257
|
+
# new[i] = values[i]
|
258
|
+
# else
|
259
|
+
# old.concat values[i]
|
260
|
+
# end
|
261
|
+
# end
|
262
|
+
# data[key] = new
|
263
|
+
# else
|
264
|
+
# data[key] = values
|
265
|
+
# end
|
266
|
+
# end
|
267
|
+
# nil
|
268
|
+
# end
|
269
|
+
#
|
270
|
+
# def add_to_data_merge_zipped(data, keys, values)
|
271
|
+
# keys = [keys] unless Array === keys
|
272
|
+
# num = keys.length
|
273
|
+
#
|
274
|
+
# values = values.collect do |v|
|
275
|
+
# (v.nil? || v.empty?) ? [""] : v
|
276
|
+
# end
|
277
|
+
#
|
278
|
+
# if values.first.length > 1 and num == 1
|
279
|
+
# keys = keys * values.first.length
|
280
|
+
# num = keys.length
|
281
|
+
# end
|
282
|
+
#
|
283
|
+
# values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
284
|
+
#
|
285
|
+
# all = values
|
286
|
+
# all.unshift keys
|
287
|
+
# Misc.zip_fields(all).each do |vs|
|
288
|
+
# key = vs.shift
|
289
|
+
# if data.include? key
|
290
|
+
# data[key] = data[key].zip(vs).collect do |old, new|
|
291
|
+
# old + [new]
|
292
|
+
# end
|
293
|
+
# else
|
294
|
+
# data[key] = vs.collect{|v| [v] }
|
295
|
+
# end
|
296
|
+
# end
|
297
|
+
#
|
298
|
+
# nil
|
299
|
+
# end
|
300
|
+
#
|
301
|
+
# def add_to_data_zipped(data, keys, values)
|
302
|
+
# num = keys.length
|
303
|
+
#
|
304
|
+
# if values.first.length > 1 and num == 1
|
305
|
+
# keys = keys * values.first.length
|
306
|
+
# num = keys.length
|
307
|
+
# end
|
308
|
+
#
|
309
|
+
# values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
310
|
+
# all = values.unshift keys
|
311
|
+
# Misc.zip_fields(all).each do |values|
|
312
|
+
# key = values.shift
|
313
|
+
# next if data.include? key
|
314
|
+
# data[key] = values.collect{|v| [v]}
|
315
|
+
# end
|
316
|
+
# nil
|
317
|
+
# end
|
318
|
+
#
|
319
|
+
#
|
320
|
+
# def cast_values_single(value)
|
321
|
+
# case
|
322
|
+
# when (value.nil? or value.empty?)
|
323
|
+
# nil
|
324
|
+
# when Symbol === cast
|
325
|
+
# value.send(cast)
|
326
|
+
# when Proc === cast
|
327
|
+
# cast.call value
|
328
|
+
# end
|
329
|
+
# end
|
330
|
+
#
|
331
|
+
# def cast_values_list(values)
|
332
|
+
# case
|
333
|
+
# when Symbol === cast
|
334
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
335
|
+
# when Proc === cast
|
336
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
|
337
|
+
# end
|
338
|
+
# end
|
339
|
+
#
|
340
|
+
# def cast_values_flat(values)
|
341
|
+
# case
|
342
|
+
# when Symbol === cast
|
343
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
344
|
+
# when Proc === cast
|
345
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
|
346
|
+
# end
|
347
|
+
# end
|
348
|
+
#
|
349
|
+
# def cast_values_double(values)
|
350
|
+
# case
|
351
|
+
# when Symbol === cast
|
352
|
+
# values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
|
353
|
+
# when Proc === cast
|
354
|
+
# values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
|
355
|
+
# end
|
356
|
+
# end
|
357
|
+
#
|
358
|
+
# def rescue_first_line
|
359
|
+
# @first_line
|
360
|
+
# end
|
361
|
+
#
|
362
|
+
# def fix_fields(options)
|
363
|
+
# key_field = Misc.process_options options, :key_field
|
364
|
+
# fields = Misc.process_options options, :fields
|
365
|
+
#
|
366
|
+
# if (key_field.nil? or key_field == 0 or key_field == :key) and
|
367
|
+
# (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
368
|
+
#
|
369
|
+
# @straight = true
|
370
|
+
# return
|
371
|
+
# else
|
372
|
+
# @straight = false
|
373
|
+
#
|
374
|
+
# case
|
375
|
+
# when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
|
376
|
+
# @key_position = 0
|
377
|
+
# when Integer === key_field
|
378
|
+
# @key_position = key_field
|
379
|
+
# when String === key_field
|
380
|
+
# @key_position = @fields.dup.unshift(@key_field).index key_field
|
381
|
+
# raise "Key field #{ key_field } was not found" if @key_position.nil?
|
382
|
+
# when :key == key_field
|
383
|
+
# @key_position = 0
|
384
|
+
# else
|
385
|
+
# raise "Format of key_field not understood: #{key_field.inspect}"
|
386
|
+
# end
|
387
|
+
#
|
388
|
+
# if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
389
|
+
# if not @fields.nil? and type != :flat
|
390
|
+
# @field_positions = (0..@fields.length).to_a
|
391
|
+
# @field_positions.delete @key_position
|
392
|
+
# end
|
393
|
+
# else
|
394
|
+
# fields = [fields] if not Array === fields
|
395
|
+
# @field_positions = fields.collect{|field|
|
396
|
+
# case
|
397
|
+
# when Integer === field
|
398
|
+
# field
|
399
|
+
# when String === field
|
400
|
+
# pos = @fields.dup.unshift(@key_field).index field
|
401
|
+
# raise "Field not identified: #{ field }" if pos.nil?
|
402
|
+
# pos
|
403
|
+
# else
|
404
|
+
# raise "Format of fields not understood: #{field.inspect}"
|
405
|
+
# end
|
406
|
+
# }
|
407
|
+
# end
|
408
|
+
#
|
409
|
+
# new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
|
410
|
+
# @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
|
411
|
+
# @fields ||= fields if Array === fields and String === fields.first
|
412
|
+
# @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
|
413
|
+
# @key_field = new_key_field
|
414
|
+
# @key_field ||= key_field if String === key_field
|
415
|
+
#
|
416
|
+
# end
|
417
|
+
# end
|
418
|
+
#
|
419
|
+
# def initialize(stream = nil, options = {})
|
420
|
+
# @header_hash = Misc.process_options(options, :header_hash) || "#"
|
421
|
+
# @sep = Misc.process_options(options, :sep) || "\t"
|
422
|
+
# @tsv_grep = Misc.process_options(options, :tsv_grep)
|
423
|
+
# stream = TSV.get_stream stream
|
424
|
+
# @stream = stream
|
425
|
+
#
|
426
|
+
#
|
427
|
+
# @header_options = parse_header(stream)
|
428
|
+
#
|
429
|
+
# options = @header_options.merge options
|
430
|
+
# options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
|
431
|
+
#
|
432
|
+
# @type ||= Misc.process_options(options, :type) || :double
|
433
|
+
# @type ||= :double
|
434
|
+
#
|
435
|
+
# @identifiers = Misc.process_options(options, :identifiers)
|
436
|
+
#
|
437
|
+
# @filename = Misc.process_options(options, :filename)
|
438
|
+
# @filename ||= stream.filename if stream.respond_to? :filename
|
439
|
+
#
|
440
|
+
# @sep2 = Misc.process_options(options, :sep2) || "|"
|
441
|
+
# @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
442
|
+
# @type ||= Misc.process_options options, :type
|
443
|
+
# @fix = Misc.process_options(options, :fix)
|
444
|
+
# @select= Misc.process_options options, :select
|
445
|
+
# @zipped = Misc.process_options options, :zipped
|
446
|
+
# @namespace = Misc.process_options options, :namespace
|
447
|
+
# merge = Misc.process_options(options, :merge)
|
448
|
+
# merge = @zipped if merge.nil?
|
449
|
+
# merge = false if merge.nil?
|
450
|
+
#
|
451
|
+
# fields = options[:fields]
|
452
|
+
# fix_fields(options)
|
453
|
+
#
|
454
|
+
# @type = @type.strip.to_sym if String === @type
|
455
|
+
# #@type ||= :double if merge == true
|
456
|
+
#
|
457
|
+
# case @type
|
458
|
+
# when :double
|
459
|
+
# if @header_options[:type] == :flat
|
460
|
+
# self.instance_eval do alias get_values get_values_double_from_flat end
|
461
|
+
# else
|
462
|
+
# self.instance_eval do alias get_values get_values_double end
|
463
|
+
# end
|
464
|
+
# self.instance_eval do alias cast_values cast_values_double end
|
465
|
+
# case
|
466
|
+
# when (merge and not zipped)
|
467
|
+
# self.instance_eval do alias add_to_data add_to_data_merge end
|
468
|
+
# when (merge and zipped)
|
469
|
+
# self.instance_eval do alias add_to_data add_to_data_merge_zipped end
|
470
|
+
# when zipped
|
471
|
+
# self.instance_eval do alias add_to_data add_to_data_zipped end
|
472
|
+
# else
|
473
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
474
|
+
# end
|
475
|
+
# when :single
|
476
|
+
# if @header_options[:type] == :flat
|
477
|
+
# self.instance_eval do alias get_values get_values_single_from_flat end
|
478
|
+
# self.instance_eval do alias cast_values cast_values_single end
|
479
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
480
|
+
# else
|
481
|
+
# self.instance_eval do alias get_values get_values_single end
|
482
|
+
# self.instance_eval do alias cast_values cast_values_single end
|
483
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
484
|
+
# end
|
485
|
+
# when :list
|
486
|
+
# self.instance_eval do alias get_values get_values_list end
|
487
|
+
# self.instance_eval do alias cast_values cast_values_list end
|
488
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
489
|
+
#
|
490
|
+
# when :flat
|
491
|
+
# @take_all = true if field_positions.nil?
|
492
|
+
# self.instance_eval do alias cast_values cast_values_flat end
|
493
|
+
# merge = true if key_position and key_position != 0 and field_positions.nil?
|
494
|
+
# if merge
|
495
|
+
# self.instance_eval do alias get_values get_values_flat_merge end
|
496
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
497
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
498
|
+
# else
|
499
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
500
|
+
# end
|
501
|
+
# else
|
502
|
+
# self.instance_eval do alias get_values get_values_flat_merge end
|
503
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
504
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
505
|
+
# else
|
506
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
507
|
+
# end
|
508
|
+
# end
|
509
|
+
# else
|
510
|
+
# raise "Unknown TSV type: #{@type.inspect}"
|
511
|
+
# end
|
512
|
+
#
|
513
|
+
# @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
|
514
|
+
# end
|
515
|
+
#
|
516
|
+
# def setup(data)
|
517
|
+
# data.extend TSV unless TSV === data
|
518
|
+
# data.type = @type
|
519
|
+
# data.key_field = @key_field
|
520
|
+
# data.fields = @fields.nil? ? nil : @fields.dup
|
521
|
+
# data.namespace = @namespace
|
522
|
+
# data.filename = @filename
|
523
|
+
# data.identifiers = @identifiers
|
524
|
+
# data.cast = @cast if Symbol === @cast
|
525
|
+
# data
|
526
|
+
# end
|
527
|
+
#
|
528
|
+
# def annotate(data)
|
529
|
+
# setup(data)
|
530
|
+
# end
|
531
|
+
#
|
532
|
+
# def options
|
533
|
+
# options = {}
|
534
|
+
# TSV::ENTRIES.each do |entry|
|
535
|
+
# if self.respond_to? entry
|
536
|
+
# value = self.send(entry)
|
537
|
+
# options[entry.to_sym] = value unless value.nil?
|
538
|
+
# end
|
539
|
+
# end
|
540
|
+
# options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
541
|
+
# IndiferentHash.setup options
|
542
|
+
# end
|
543
|
+
#
|
544
|
+
# def traverse(options = {})
|
545
|
+
# monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
|
546
|
+
# monitor = bar if bar and monitor.nil?
|
547
|
+
# raise "No block given in TSV::Parser#traverse" unless block_given?
|
548
|
+
#
|
549
|
+
# stream = @stream
|
550
|
+
#
|
551
|
+
#
|
552
|
+
# # first line
|
553
|
+
# line = self.rescue_first_line
|
554
|
+
# line = stream.gets if line.nil?
|
555
|
+
#
|
556
|
+
# if @tsv_grep || grep
|
557
|
+
#
|
558
|
+
# stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
|
559
|
+
# stream.no_fail = true
|
560
|
+
# begin
|
561
|
+
# match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
|
562
|
+
# line = stream.gets if match.empty?
|
563
|
+
# rescue Exception
|
564
|
+
# Log.exception $!
|
565
|
+
# line = stream.gets
|
566
|
+
# end
|
567
|
+
# end
|
568
|
+
#
|
569
|
+
# progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
570
|
+
#
|
571
|
+
# # setup monitor
|
572
|
+
# if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
|
573
|
+
# size = case
|
574
|
+
# when stream.respond_to?(:size)
|
575
|
+
# stream.size
|
576
|
+
# else
|
577
|
+
# stream.stat.size
|
578
|
+
# end
|
579
|
+
# size = nil if size.to_i == 0
|
580
|
+
# desc = "Parsing Stream"
|
581
|
+
# step = 100
|
582
|
+
# if Hash === monitor
|
583
|
+
# desc = monitor[:desc] if monitor.include? :desc
|
584
|
+
# step = monitor[:step] if monitor.include? :step
|
585
|
+
# end
|
586
|
+
# progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
587
|
+
# elsif progress_monitor
|
588
|
+
#
|
589
|
+
# size = case
|
590
|
+
# when stream.respond_to?(:size)
|
591
|
+
# stream.size
|
592
|
+
# else
|
593
|
+
# stream.stat.size
|
594
|
+
# end
|
595
|
+
#
|
596
|
+
# progress_monitor.bytes = true
|
597
|
+
# progress_monitor.max = size unless size.to_i == 0
|
598
|
+
# elsif monitor
|
599
|
+
# desc = "Parsing Stream"
|
600
|
+
# step = 100
|
601
|
+
# size = nil
|
602
|
+
# if Hash === monitor
|
603
|
+
# desc = monitor[:desc] if monitor.include? :desc
|
604
|
+
# step = monitor[:step] if monitor.include? :step
|
605
|
+
# end
|
606
|
+
# progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
607
|
+
# end
|
608
|
+
#
|
609
|
+
# # parser
|
610
|
+
# line_num = 1
|
611
|
+
# begin
|
612
|
+
#
|
613
|
+
# while not line.nil?
|
614
|
+
# begin
|
615
|
+
# if progress_monitor
|
616
|
+
# progress_monitor.tick(line.bytesize)
|
617
|
+
# end
|
618
|
+
#
|
619
|
+
# raise SKIP_LINE if line.empty?
|
620
|
+
#
|
621
|
+
# line = Misc.fixutf8(line)
|
622
|
+
# line = self.process line
|
623
|
+
# raise SKIP_LINE if line.empty?
|
624
|
+
# parts = self.chop_line line
|
625
|
+
# key, values = self.get_values parts
|
626
|
+
# values = self.cast_values values if self.cast?
|
627
|
+
#
|
628
|
+
# yield key, values, fields
|
629
|
+
#
|
630
|
+
# line = stream.gets
|
631
|
+
#
|
632
|
+
# line_num += 1
|
633
|
+
# raise END_PARSING if head and line_num > head.to_i
|
634
|
+
# rescue SKIP_LINE
|
635
|
+
# begin
|
636
|
+
# line = stream.gets
|
637
|
+
# next
|
638
|
+
# rescue IOError
|
639
|
+
# break
|
640
|
+
# end
|
641
|
+
# rescue END_PARSING
|
642
|
+
# stream.close unless stream.closed?
|
643
|
+
# begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
644
|
+
# break
|
645
|
+
# rescue Errno::EPIPE
|
646
|
+
# Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
647
|
+
# stream.abort if stream.respond_to? :abort
|
648
|
+
# raise $!
|
649
|
+
# rescue Exception
|
650
|
+
# Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
651
|
+
# stream.abort $! if stream.respond_to? :abort
|
652
|
+
# raise $!
|
653
|
+
# end
|
654
|
+
# end
|
655
|
+
# ensure
|
656
|
+
# Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
657
|
+
# stream.close unless stream.closed?
|
658
|
+
# stream.join if stream.respond_to? :join and not stream.joined?
|
659
|
+
# end
|
660
|
+
#
|
661
|
+
# self
|
662
|
+
# end
|
663
|
+
#
|
664
|
+
# def identify_field(field)
|
665
|
+
# TSV.identify_field(key_field, fields, field)
|
666
|
+
# end
|
667
|
+
#
|
668
|
+
# def rewind
|
669
|
+
# stream.reopen(filename, "r") if stream.closed? and filename
|
670
|
+
# stream.rewind
|
671
|
+
# end
|
672
|
+
#
|
673
|
+
# def self.traverse(stream, options = {}, &block)
|
674
|
+
# parser = Parser.new(stream, options)
|
675
|
+
# parser.traverse(options, &block)
|
676
|
+
# end
|
677
|
+
# end
|
678
|
+
#end
|