rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -1,678 +1,678 @@
|
|
1
|
-
require 'rbbt/util/cmd'
|
2
|
-
module TSV
|
3
|
-
class Parser
|
4
|
-
attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
|
5
|
-
|
6
|
-
class SKIP_LINE < Exception; end
|
7
|
-
class END_PARSING < Exception; end
|
8
|
-
|
9
|
-
def all_fields
|
10
|
-
all = [key_field] + fields
|
11
|
-
# ToDo: What was this for?
|
12
|
-
#NamedArray.setup all, all
|
13
|
-
all
|
14
|
-
end
|
15
|
-
|
16
|
-
def parse_header(stream)
|
17
|
-
raise "Closed stream" if IO === stream && stream.closed?
|
18
|
-
|
19
|
-
options = {}
|
20
|
-
@preamble = []
|
21
|
-
|
22
|
-
# Get line
|
23
|
-
|
24
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
25
|
-
line = stream.gets
|
26
|
-
return {} if line.nil?
|
27
|
-
#raise "Empty content: #{ stream.inspect }" if line.nil?
|
28
|
-
line = Misc.fixutf8 line.chomp
|
29
|
-
|
30
|
-
# Process options line
|
31
|
-
|
32
|
-
if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
|
33
|
-
options = Misc.string2hash $1.chomp
|
34
|
-
line = stream.gets
|
35
|
-
line = Misc.fixutf8 line.chomp if line
|
36
|
-
end
|
37
|
-
|
38
|
-
# Determine separator
|
39
|
-
|
40
|
-
@sep = options[:sep] if options[:sep]
|
41
|
-
|
42
|
-
# Process fields line
|
43
|
-
|
44
|
-
preamble << line if line
|
45
|
-
while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
|
46
|
-
@fields = line.split(@sep, -1)
|
47
|
-
@key_field = @fields.shift
|
48
|
-
@key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
|
49
|
-
|
50
|
-
#Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
51
|
-
line = (@header_hash != "" ? stream.gets : nil)
|
52
|
-
line = Misc.fixutf8 line.chomp if line
|
53
|
-
preamble << line if line
|
54
|
-
@header_hash = false if TrueClass === @header_hash || @header_hash == ""
|
55
|
-
end
|
56
|
-
|
57
|
-
@preamble = preamble[0..-3] * "\n"
|
58
|
-
|
59
|
-
line ||= stream.gets
|
60
|
-
|
61
|
-
@first_line = line
|
62
|
-
|
63
|
-
options
|
64
|
-
end
|
65
|
-
|
66
|
-
def process(line)
|
67
|
-
l = line.chomp
|
68
|
-
raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
|
69
|
-
l = @fix.call l if Proc === @fix
|
70
|
-
raise END_PARSING unless l
|
71
|
-
l
|
72
|
-
end
|
73
|
-
|
74
|
-
def cast?
|
75
|
-
!! @cast
|
76
|
-
end
|
77
|
-
|
78
|
-
def chop_line(line)
|
79
|
-
@sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
80
|
-
end
|
81
|
-
|
82
|
-
def get_values_single_from_flat(parts)
|
83
|
-
return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
84
|
-
if key_position == 0
|
85
|
-
[parts.shift, parts.first]
|
86
|
-
else
|
87
|
-
key = parts.shift
|
88
|
-
[parts, key]
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
def get_values_double_from_flat(parts)
|
94
|
-
return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
|
95
|
-
if key_position == 0
|
96
|
-
[parts.shift, [parts.flatten]]
|
97
|
-
else
|
98
|
-
value = parts.shift
|
99
|
-
keys = parts.flatten
|
100
|
-
[keys, [[value]]]
|
101
|
-
end
|
102
|
-
|
103
|
-
end
|
104
|
-
|
105
|
-
def get_values_single(parts)
|
106
|
-
return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
107
|
-
key = parts[key_position]
|
108
|
-
value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
|
109
|
-
[key, value]
|
110
|
-
end
|
111
|
-
|
112
|
-
def get_values_list(parts)
|
113
|
-
return parts.shift, parts if field_positions.nil? and key_position.nil?
|
114
|
-
key = parts[key_position]
|
115
|
-
|
116
|
-
values = case
|
117
|
-
when field_positions.nil?
|
118
|
-
parts.tap{|o| o.delete_at key_position}
|
119
|
-
when field_positions.empty?
|
120
|
-
[]
|
121
|
-
else
|
122
|
-
parts.values_at *field_positions
|
123
|
-
end
|
124
|
-
|
125
|
-
[key, values]
|
126
|
-
end
|
127
|
-
|
128
|
-
def get_values_double(parts)
|
129
|
-
return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
|
130
|
-
keys = parts[key_position].split(@sep2, -1)
|
131
|
-
values = case
|
132
|
-
when field_positions.nil?
|
133
|
-
parts.tap{|o| o.delete_at key_position}
|
134
|
-
when field_positions.empty?
|
135
|
-
[]
|
136
|
-
else
|
137
|
-
parts.values_at *field_positions
|
138
|
-
end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
|
139
|
-
[keys, values]
|
140
|
-
end
|
141
|
-
|
142
|
-
def get_values_flat_inverse(parts)
|
143
|
-
value = parts.shift
|
144
|
-
keys = parts
|
145
|
-
[keys, [value]]
|
146
|
-
end
|
147
|
-
|
148
|
-
|
149
|
-
def get_values_flat_merge(parts)
|
150
|
-
begin
|
151
|
-
orig = parts
|
152
|
-
|
153
|
-
if key_position and key_position != 0 and field_positions.nil?
|
154
|
-
value = parts.shift.split(@sep2, -1)
|
155
|
-
keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
|
156
|
-
return [keys, value]
|
157
|
-
end
|
158
|
-
|
159
|
-
return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
|
160
|
-
field_positions.nil? and (key_position.nil? or key_position == 0)
|
161
|
-
rescue
|
162
|
-
raise $!
|
163
|
-
end
|
164
|
-
|
165
|
-
if key_position and key_position != 0 and @header_options[:type] == :flat
|
166
|
-
keys = parts[1..-1]
|
167
|
-
else
|
168
|
-
str = parts[key_position]
|
169
|
-
keys = str.split(@sep2, -1)
|
170
|
-
end
|
171
|
-
|
172
|
-
if @take_all
|
173
|
-
values = parts.collect{|e| e.split(@sep2, -1) }.flatten
|
174
|
-
else
|
175
|
-
if field_positions.nil?
|
176
|
-
parts.delete_at key_position
|
177
|
-
values = parts.first
|
178
|
-
else
|
179
|
-
values = parts[field_positions.first]
|
180
|
-
end
|
181
|
-
|
182
|
-
values = values.split(@sep2, -1)
|
183
|
-
end
|
184
|
-
|
185
|
-
[keys, values]
|
186
|
-
end
|
187
|
-
|
188
|
-
def get_values_flat(parts)
|
189
|
-
keys, values = get_values_flat_merge(parts)
|
190
|
-
[keys.first, values]
|
191
|
-
end
|
192
|
-
|
193
|
-
|
194
|
-
def add_to_data_no_merge_list(data, key, values)
|
195
|
-
data[key] = values unless data.include? key
|
196
|
-
nil
|
197
|
-
end
|
198
|
-
|
199
|
-
def add_to_data_flat_keys(data, key, values)
|
200
|
-
data[key] = values unless data.include? key
|
201
|
-
nil
|
202
|
-
end
|
203
|
-
|
204
|
-
def add_to_data_flat(data, key, values)
|
205
|
-
data[key] = values unless data.include? key
|
206
|
-
nil
|
207
|
-
end
|
208
|
-
|
209
|
-
def add_to_data_flat_merge(data, key, values)
|
210
|
-
if data.include? key
|
211
|
-
data[key] = data[key].concat values
|
212
|
-
else
|
213
|
-
data[key] = values
|
214
|
-
end
|
215
|
-
nil
|
216
|
-
end
|
217
|
-
|
218
|
-
def add_to_data_flat_merge_double(data, keys, values)
|
219
|
-
data.write
|
220
|
-
keys.each do |key|
|
221
|
-
if data.include? key
|
222
|
-
data[key] = data[key].concat values
|
223
|
-
else
|
224
|
-
data[key] = values
|
225
|
-
end
|
226
|
-
end
|
227
|
-
nil
|
228
|
-
end
|
229
|
-
|
230
|
-
def add_to_data_flat_merge_keys(data, keys, values)
|
231
|
-
keys.each do |key|
|
232
|
-
if data.include? key
|
233
|
-
data[key] = data[key].concat values
|
234
|
-
else
|
235
|
-
data[key] = values.dup
|
236
|
-
end
|
237
|
-
end
|
238
|
-
nil
|
239
|
-
end
|
240
|
-
|
241
|
-
def add_to_data_no_merge_double(data, keys, values)
|
242
|
-
keys = [keys] unless Array === keys
|
243
|
-
keys.each do |key|
|
244
|
-
next if data.include? key
|
245
|
-
data[key] = values
|
246
|
-
end
|
247
|
-
nil
|
248
|
-
end
|
249
|
-
|
250
|
-
def add_to_data_merge(data, keys, values)
|
251
|
-
keys.uniq.each do |key|
|
252
|
-
if data.include? key
|
253
|
-
new = data[key]
|
254
|
-
new.each_with_index do |old, i|
|
255
|
-
next if values[i].nil?
|
256
|
-
if old.nil?
|
257
|
-
new[i] = values[i]
|
258
|
-
else
|
259
|
-
old.concat values[i]
|
260
|
-
end
|
261
|
-
end
|
262
|
-
data[key] = new
|
263
|
-
else
|
264
|
-
data[key] = values
|
265
|
-
end
|
266
|
-
end
|
267
|
-
nil
|
268
|
-
end
|
269
|
-
|
270
|
-
def add_to_data_merge_zipped(data, keys, values)
|
271
|
-
keys = [keys] unless Array === keys
|
272
|
-
num = keys.length
|
273
|
-
|
274
|
-
values = values.collect do |v|
|
275
|
-
(v.nil? || v.empty?) ? [""] : v
|
276
|
-
end
|
277
|
-
|
278
|
-
if values.first.length > 1 and num == 1
|
279
|
-
keys = keys * values.first.length
|
280
|
-
num = keys.length
|
281
|
-
end
|
282
|
-
|
283
|
-
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
284
|
-
|
285
|
-
all = values
|
286
|
-
all.unshift keys
|
287
|
-
Misc.zip_fields(all).each do |vs|
|
288
|
-
key = vs.shift
|
289
|
-
if data.include? key
|
290
|
-
data[key] = data[key].zip(vs).collect do |old, new|
|
291
|
-
old + [new]
|
292
|
-
end
|
293
|
-
else
|
294
|
-
data[key] = vs.collect{|v| [v] }
|
295
|
-
end
|
296
|
-
end
|
297
|
-
|
298
|
-
nil
|
299
|
-
end
|
300
|
-
|
301
|
-
def add_to_data_zipped(data, keys, values)
|
302
|
-
num = keys.length
|
303
|
-
|
304
|
-
if values.first.length > 1 and num == 1
|
305
|
-
keys = keys * values.first.length
|
306
|
-
num = keys.length
|
307
|
-
end
|
308
|
-
|
309
|
-
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
310
|
-
all = values.unshift keys
|
311
|
-
Misc.zip_fields(all).each do |values|
|
312
|
-
key = values.shift
|
313
|
-
next if data.include? key
|
314
|
-
data[key] = values.collect{|v| [v]}
|
315
|
-
end
|
316
|
-
nil
|
317
|
-
end
|
318
|
-
|
319
|
-
|
320
|
-
def cast_values_single(value)
|
321
|
-
case
|
322
|
-
when (value.nil? or value.empty?)
|
323
|
-
nil
|
324
|
-
when Symbol === cast
|
325
|
-
value.send(cast)
|
326
|
-
when Proc === cast
|
327
|
-
cast.call value
|
328
|
-
end
|
329
|
-
end
|
330
|
-
|
331
|
-
def cast_values_list(values)
|
332
|
-
case
|
333
|
-
when Symbol === cast
|
334
|
-
values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
335
|
-
when Proc === cast
|
336
|
-
values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
|
337
|
-
end
|
338
|
-
end
|
339
|
-
|
340
|
-
def cast_values_flat(values)
|
341
|
-
case
|
342
|
-
when Symbol === cast
|
343
|
-
values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
344
|
-
when Proc === cast
|
345
|
-
values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
def cast_values_double(values)
|
350
|
-
case
|
351
|
-
when Symbol === cast
|
352
|
-
values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
|
353
|
-
when Proc === cast
|
354
|
-
values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
|
355
|
-
end
|
356
|
-
end
|
357
|
-
|
358
|
-
def rescue_first_line
|
359
|
-
@first_line
|
360
|
-
end
|
361
|
-
|
362
|
-
def fix_fields(options)
|
363
|
-
key_field = Misc.process_options options, :key_field
|
364
|
-
fields = Misc.process_options options, :fields
|
365
|
-
|
366
|
-
if (key_field.nil? or key_field == 0 or key_field == :key) and
|
367
|
-
(fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
368
|
-
|
369
|
-
@straight = true
|
370
|
-
return
|
371
|
-
else
|
372
|
-
@straight = false
|
373
|
-
|
374
|
-
case
|
375
|
-
when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
|
376
|
-
@key_position = 0
|
377
|
-
when Integer === key_field
|
378
|
-
@key_position = key_field
|
379
|
-
when String === key_field
|
380
|
-
@key_position = @fields.dup.unshift(@key_field).index key_field
|
381
|
-
raise "Key field #{ key_field } was not found" if @key_position.nil?
|
382
|
-
when :key == key_field
|
383
|
-
@key_position = 0
|
384
|
-
else
|
385
|
-
raise "Format of key_field not understood: #{key_field.inspect}"
|
386
|
-
end
|
387
|
-
|
388
|
-
if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
389
|
-
if not @fields.nil? and type != :flat
|
390
|
-
@field_positions = (0..@fields.length).to_a
|
391
|
-
@field_positions.delete @key_position
|
392
|
-
end
|
393
|
-
else
|
394
|
-
fields = [fields] if not Array === fields
|
395
|
-
@field_positions = fields.collect{|field|
|
396
|
-
case
|
397
|
-
when Integer === field
|
398
|
-
field
|
399
|
-
when String === field
|
400
|
-
pos = @fields.dup.unshift(@key_field).index field
|
401
|
-
raise "Field not identified: #{ field }" if pos.nil?
|
402
|
-
pos
|
403
|
-
else
|
404
|
-
raise "Format of fields not understood: #{field.inspect}"
|
405
|
-
end
|
406
|
-
}
|
407
|
-
end
|
408
|
-
|
409
|
-
new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
|
410
|
-
@fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
|
411
|
-
@fields ||= fields if Array === fields and String === fields.first
|
412
|
-
@fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
|
413
|
-
@key_field = new_key_field
|
414
|
-
@key_field ||= key_field if String === key_field
|
415
|
-
|
416
|
-
end
|
417
|
-
end
|
418
|
-
|
419
|
-
def initialize(stream = nil, options = {})
|
420
|
-
@header_hash = Misc.process_options(options, :header_hash) || "#"
|
421
|
-
@sep = Misc.process_options(options, :sep) || "\t"
|
422
|
-
@tsv_grep = Misc.process_options(options, :tsv_grep)
|
423
|
-
stream = TSV.get_stream stream
|
424
|
-
@stream = stream
|
425
|
-
|
426
|
-
|
427
|
-
@header_options = parse_header(stream)
|
428
|
-
|
429
|
-
options = @header_options.merge options
|
430
|
-
options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
|
431
|
-
|
432
|
-
@type ||= Misc.process_options(options, :type) || :double
|
433
|
-
@type ||= :double
|
434
|
-
|
435
|
-
@identifiers = Misc.process_options(options, :identifiers)
|
436
|
-
|
437
|
-
@filename = Misc.process_options(options, :filename)
|
438
|
-
@filename ||= stream.filename if stream.respond_to? :filename
|
439
|
-
|
440
|
-
@sep2 = Misc.process_options(options, :sep2) || "|"
|
441
|
-
@cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
442
|
-
@type ||= Misc.process_options options, :type
|
443
|
-
@fix = Misc.process_options(options, :fix)
|
444
|
-
@select= Misc.process_options options, :select
|
445
|
-
@zipped = Misc.process_options options, :zipped
|
446
|
-
@namespace = Misc.process_options options, :namespace
|
447
|
-
merge = Misc.process_options(options, :merge)
|
448
|
-
merge = @zipped if merge.nil?
|
449
|
-
merge = false if merge.nil?
|
450
|
-
|
451
|
-
fields = options[:fields]
|
452
|
-
fix_fields(options)
|
453
|
-
|
454
|
-
@type = @type.strip.to_sym if String === @type
|
455
|
-
#@type ||= :double if merge == true
|
456
|
-
|
457
|
-
case @type
|
458
|
-
when :double
|
459
|
-
if @header_options[:type] == :flat
|
460
|
-
self.instance_eval do alias get_values get_values_double_from_flat end
|
461
|
-
else
|
462
|
-
self.instance_eval do alias get_values get_values_double end
|
463
|
-
end
|
464
|
-
self.instance_eval do alias cast_values cast_values_double end
|
465
|
-
case
|
466
|
-
when (merge and not zipped)
|
467
|
-
self.instance_eval do alias add_to_data add_to_data_merge end
|
468
|
-
when (merge and zipped)
|
469
|
-
self.instance_eval do alias add_to_data add_to_data_merge_zipped end
|
470
|
-
when zipped
|
471
|
-
self.instance_eval do alias add_to_data add_to_data_zipped end
|
472
|
-
else
|
473
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
474
|
-
end
|
475
|
-
when :single
|
476
|
-
if @header_options[:type] == :flat
|
477
|
-
self.instance_eval do alias get_values get_values_single_from_flat end
|
478
|
-
self.instance_eval do alias cast_values cast_values_single end
|
479
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
480
|
-
else
|
481
|
-
self.instance_eval do alias get_values get_values_single end
|
482
|
-
self.instance_eval do alias cast_values cast_values_single end
|
483
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
484
|
-
end
|
485
|
-
when :list
|
486
|
-
self.instance_eval do alias get_values get_values_list end
|
487
|
-
self.instance_eval do alias cast_values cast_values_list end
|
488
|
-
self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
489
|
-
|
490
|
-
when :flat
|
491
|
-
@take_all = true if field_positions.nil?
|
492
|
-
self.instance_eval do alias cast_values cast_values_flat end
|
493
|
-
merge = true if key_position and key_position != 0 and field_positions.nil?
|
494
|
-
if merge
|
495
|
-
self.instance_eval do alias get_values get_values_flat_merge end
|
496
|
-
if key_position and key_position != 0 and field_positions.nil?
|
497
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
498
|
-
else
|
499
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
500
|
-
end
|
501
|
-
else
|
502
|
-
self.instance_eval do alias get_values get_values_flat_merge end
|
503
|
-
if key_position and key_position != 0 and field_positions.nil?
|
504
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
505
|
-
else
|
506
|
-
self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
507
|
-
end
|
508
|
-
end
|
509
|
-
else
|
510
|
-
raise "Unknown TSV type: #{@type.inspect}"
|
511
|
-
end
|
512
|
-
|
513
|
-
@straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
|
514
|
-
end
|
515
|
-
|
516
|
-
def setup(data)
|
517
|
-
data.extend TSV unless TSV === data
|
518
|
-
data.type = @type
|
519
|
-
data.key_field = @key_field
|
520
|
-
data.fields = @fields.nil? ? nil : @fields.dup
|
521
|
-
data.namespace = @namespace
|
522
|
-
data.filename = @filename
|
523
|
-
data.identifiers = @identifiers
|
524
|
-
data.cast = @cast if Symbol === @cast
|
525
|
-
data
|
526
|
-
end
|
527
|
-
|
528
|
-
def annotate(data)
|
529
|
-
setup(data)
|
530
|
-
end
|
531
|
-
|
532
|
-
def options
|
533
|
-
options = {}
|
534
|
-
TSV::ENTRIES.each do |entry|
|
535
|
-
if self.respond_to? entry
|
536
|
-
value = self.send(entry)
|
537
|
-
options[entry.to_sym] = value unless value.nil?
|
538
|
-
end
|
539
|
-
end
|
540
|
-
options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
541
|
-
IndiferentHash.setup options
|
542
|
-
end
|
543
|
-
|
544
|
-
def traverse(options = {})
|
545
|
-
monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
|
546
|
-
monitor = bar if bar and monitor.nil?
|
547
|
-
raise "No block given in TSV::Parser#traverse" unless block_given?
|
548
|
-
|
549
|
-
stream = @stream
|
550
|
-
|
551
|
-
|
552
|
-
# first line
|
553
|
-
line = self.rescue_first_line
|
554
|
-
line = stream.gets if line.nil?
|
555
|
-
|
556
|
-
if @tsv_grep || grep
|
557
|
-
|
558
|
-
stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
|
559
|
-
stream.no_fail = true
|
560
|
-
begin
|
561
|
-
match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
|
562
|
-
line = stream.gets if match.empty?
|
563
|
-
rescue Exception
|
564
|
-
Log.exception $!
|
565
|
-
line = stream.gets
|
566
|
-
end
|
567
|
-
end
|
568
|
-
|
569
|
-
progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
570
|
-
|
571
|
-
# setup monitor
|
572
|
-
if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
|
573
|
-
size = case
|
574
|
-
when stream.respond_to?(:size)
|
575
|
-
stream.size
|
576
|
-
else
|
577
|
-
stream.stat.size
|
578
|
-
end
|
579
|
-
size = nil if size.to_i == 0
|
580
|
-
desc = "Parsing Stream"
|
581
|
-
step = 100
|
582
|
-
if Hash === monitor
|
583
|
-
desc = monitor[:desc] if monitor.include? :desc
|
584
|
-
step = monitor[:step] if monitor.include? :step
|
585
|
-
end
|
586
|
-
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
587
|
-
elsif progress_monitor
|
588
|
-
|
589
|
-
size = case
|
590
|
-
when stream.respond_to?(:size)
|
591
|
-
stream.size
|
592
|
-
else
|
593
|
-
stream.stat.size
|
594
|
-
end
|
595
|
-
|
596
|
-
progress_monitor.bytes = true
|
597
|
-
progress_monitor.max = size unless size.to_i == 0
|
598
|
-
elsif monitor
|
599
|
-
desc = "Parsing Stream"
|
600
|
-
step = 100
|
601
|
-
size = nil
|
602
|
-
if Hash === monitor
|
603
|
-
desc = monitor[:desc] if monitor.include? :desc
|
604
|
-
step = monitor[:step] if monitor.include? :step
|
605
|
-
end
|
606
|
-
progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
607
|
-
end
|
608
|
-
|
609
|
-
# parser
|
610
|
-
line_num = 1
|
611
|
-
begin
|
612
|
-
|
613
|
-
while not line.nil?
|
614
|
-
begin
|
615
|
-
if progress_monitor
|
616
|
-
progress_monitor.tick(line.bytesize)
|
617
|
-
end
|
618
|
-
|
619
|
-
raise SKIP_LINE if line.empty?
|
620
|
-
|
621
|
-
line = Misc.fixutf8(line)
|
622
|
-
line = self.process line
|
623
|
-
raise SKIP_LINE if line.empty?
|
624
|
-
parts = self.chop_line line
|
625
|
-
key, values = self.get_values parts
|
626
|
-
values = self.cast_values values if self.cast?
|
627
|
-
|
628
|
-
yield key, values, fields
|
629
|
-
|
630
|
-
line = stream.gets
|
631
|
-
|
632
|
-
line_num += 1
|
633
|
-
raise END_PARSING if head and line_num > head.to_i
|
634
|
-
rescue SKIP_LINE
|
635
|
-
begin
|
636
|
-
line = stream.gets
|
637
|
-
next
|
638
|
-
rescue IOError
|
639
|
-
break
|
640
|
-
end
|
641
|
-
rescue END_PARSING
|
642
|
-
stream.close unless stream.closed?
|
643
|
-
begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
644
|
-
break
|
645
|
-
rescue Errno::EPIPE
|
646
|
-
Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
647
|
-
stream.abort if stream.respond_to? :abort
|
648
|
-
raise $!
|
649
|
-
rescue Exception
|
650
|
-
Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
651
|
-
stream.abort $! if stream.respond_to? :abort
|
652
|
-
raise $!
|
653
|
-
end
|
654
|
-
end
|
655
|
-
ensure
|
656
|
-
Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
657
|
-
stream.close unless stream.closed?
|
658
|
-
stream.join if stream.respond_to? :join and not stream.joined?
|
659
|
-
end
|
660
|
-
|
661
|
-
self
|
662
|
-
end
|
663
|
-
|
664
|
-
def identify_field(field)
|
665
|
-
TSV.identify_field(key_field, fields, field)
|
666
|
-
end
|
667
|
-
|
668
|
-
def rewind
|
669
|
-
stream.reopen(filename, "r") if stream.closed? and filename
|
670
|
-
stream.rewind
|
671
|
-
end
|
672
|
-
|
673
|
-
def self.traverse(stream, options = {}, &block)
|
674
|
-
parser = Parser.new(stream, options)
|
675
|
-
parser.traverse(options, &block)
|
676
|
-
end
|
677
|
-
end
|
678
|
-
end
|
1
|
+
#require 'rbbt/util/cmd'
|
2
|
+
#module TSV
|
3
|
+
# class Parser
|
4
|
+
# attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
|
5
|
+
#
|
6
|
+
# class SKIP_LINE < Exception; end
|
7
|
+
# class END_PARSING < Exception; end
|
8
|
+
#
|
9
|
+
# def all_fields
|
10
|
+
# all = [key_field] + fields
|
11
|
+
# # ToDo: What was this for?
|
12
|
+
# #NamedArray.setup all, all
|
13
|
+
# all
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# def parse_header(stream)
|
17
|
+
# raise "Closed stream" if IO === stream && stream.closed?
|
18
|
+
#
|
19
|
+
# options = {}
|
20
|
+
# @preamble = []
|
21
|
+
#
|
22
|
+
# # Get line
|
23
|
+
#
|
24
|
+
# #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
25
|
+
# line = stream.gets
|
26
|
+
# return {} if line.nil?
|
27
|
+
# #raise "Empty content: #{ stream.inspect }" if line.nil?
|
28
|
+
# line = Misc.fixutf8 line.chomp
|
29
|
+
#
|
30
|
+
# # Process options line
|
31
|
+
#
|
32
|
+
# if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
|
33
|
+
# options = Misc.string2hash $1.chomp
|
34
|
+
# line = stream.gets
|
35
|
+
# line = Misc.fixutf8 line.chomp if line
|
36
|
+
# end
|
37
|
+
#
|
38
|
+
# # Determine separator
|
39
|
+
#
|
40
|
+
# @sep = options[:sep] if options[:sep]
|
41
|
+
#
|
42
|
+
# # Process fields line
|
43
|
+
#
|
44
|
+
# preamble << line if line
|
45
|
+
# while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
|
46
|
+
# @fields = line.split(@sep, -1)
|
47
|
+
# @key_field = @fields.shift
|
48
|
+
# @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
|
49
|
+
#
|
50
|
+
# #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
|
51
|
+
# line = (@header_hash != "" ? stream.gets : nil)
|
52
|
+
# line = Misc.fixutf8 line.chomp if line
|
53
|
+
# preamble << line if line
|
54
|
+
# @header_hash = false if TrueClass === @header_hash || @header_hash == ""
|
55
|
+
# end
|
56
|
+
#
|
57
|
+
# @preamble = preamble[0..-3] * "\n"
|
58
|
+
#
|
59
|
+
# line ||= stream.gets
|
60
|
+
#
|
61
|
+
# @first_line = line
|
62
|
+
#
|
63
|
+
# options
|
64
|
+
# end
|
65
|
+
#
|
66
|
+
# def process(line)
|
67
|
+
# l = line.chomp
|
68
|
+
# raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
|
69
|
+
# l = @fix.call l if Proc === @fix
|
70
|
+
# raise END_PARSING unless l
|
71
|
+
# l
|
72
|
+
# end
|
73
|
+
#
|
74
|
+
# def cast?
|
75
|
+
# !! @cast
|
76
|
+
# end
|
77
|
+
#
|
78
|
+
# def chop_line(line)
|
79
|
+
# @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
|
80
|
+
# end
|
81
|
+
#
|
82
|
+
# def get_values_single_from_flat(parts)
|
83
|
+
# return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
84
|
+
# if key_position == 0
|
85
|
+
# [parts.shift, parts.first]
|
86
|
+
# else
|
87
|
+
# key = parts.shift
|
88
|
+
# [parts, key]
|
89
|
+
# end
|
90
|
+
#
|
91
|
+
# end
|
92
|
+
#
|
93
|
+
# def get_values_double_from_flat(parts)
|
94
|
+
# return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
|
95
|
+
# if key_position == 0
|
96
|
+
# [parts.shift, [parts.flatten]]
|
97
|
+
# else
|
98
|
+
# value = parts.shift
|
99
|
+
# keys = parts.flatten
|
100
|
+
# [keys, [[value]]]
|
101
|
+
# end
|
102
|
+
#
|
103
|
+
# end
|
104
|
+
#
|
105
|
+
# def get_values_single(parts)
|
106
|
+
# return parts.shift, parts.first if field_positions.nil? and key_position.nil?
|
107
|
+
# key = parts[key_position]
|
108
|
+
# value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
|
109
|
+
# [key, value]
|
110
|
+
# end
|
111
|
+
#
|
112
|
+
# def get_values_list(parts)
|
113
|
+
# return parts.shift, parts if field_positions.nil? and key_position.nil?
|
114
|
+
# key = parts[key_position]
|
115
|
+
#
|
116
|
+
# values = case
|
117
|
+
# when field_positions.nil?
|
118
|
+
# parts.tap{|o| o.delete_at key_position}
|
119
|
+
# when field_positions.empty?
|
120
|
+
# []
|
121
|
+
# else
|
122
|
+
# parts.values_at *field_positions
|
123
|
+
# end
|
124
|
+
#
|
125
|
+
# [key, values]
|
126
|
+
# end
|
127
|
+
#
|
128
|
+
# def get_values_double(parts)
|
129
|
+
# return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
|
130
|
+
# keys = parts[key_position].split(@sep2, -1)
|
131
|
+
# values = case
|
132
|
+
# when field_positions.nil?
|
133
|
+
# parts.tap{|o| o.delete_at key_position}
|
134
|
+
# when field_positions.empty?
|
135
|
+
# []
|
136
|
+
# else
|
137
|
+
# parts.values_at *field_positions
|
138
|
+
# end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
|
139
|
+
# [keys, values]
|
140
|
+
# end
|
141
|
+
#
|
142
|
+
# def get_values_flat_inverse(parts)
|
143
|
+
# value = parts.shift
|
144
|
+
# keys = parts
|
145
|
+
# [keys, [value]]
|
146
|
+
# end
|
147
|
+
#
|
148
|
+
#
|
149
|
+
# def get_values_flat_merge(parts)
|
150
|
+
# begin
|
151
|
+
# orig = parts
|
152
|
+
#
|
153
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
154
|
+
# value = parts.shift.split(@sep2, -1)
|
155
|
+
# keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
|
156
|
+
# return [keys, value]
|
157
|
+
# end
|
158
|
+
#
|
159
|
+
# return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
|
160
|
+
# field_positions.nil? and (key_position.nil? or key_position == 0)
|
161
|
+
# rescue
|
162
|
+
# raise $!
|
163
|
+
# end
|
164
|
+
#
|
165
|
+
# if key_position and key_position != 0 and @header_options[:type] == :flat
|
166
|
+
# keys = parts[1..-1]
|
167
|
+
# else
|
168
|
+
# str = parts[key_position]
|
169
|
+
# keys = str.split(@sep2, -1)
|
170
|
+
# end
|
171
|
+
#
|
172
|
+
# if @take_all
|
173
|
+
# values = parts.collect{|e| e.split(@sep2, -1) }.flatten
|
174
|
+
# else
|
175
|
+
# if field_positions.nil?
|
176
|
+
# parts.delete_at key_position
|
177
|
+
# values = parts.first
|
178
|
+
# else
|
179
|
+
# values = parts[field_positions.first]
|
180
|
+
# end
|
181
|
+
#
|
182
|
+
# values = values.split(@sep2, -1)
|
183
|
+
# end
|
184
|
+
#
|
185
|
+
# [keys, values]
|
186
|
+
# end
|
187
|
+
#
|
188
|
+
# def get_values_flat(parts)
|
189
|
+
# keys, values = get_values_flat_merge(parts)
|
190
|
+
# [keys.first, values]
|
191
|
+
# end
|
192
|
+
#
|
193
|
+
#
|
194
|
+
# def add_to_data_no_merge_list(data, key, values)
|
195
|
+
# data[key] = values unless data.include? key
|
196
|
+
# nil
|
197
|
+
# end
|
198
|
+
#
|
199
|
+
# def add_to_data_flat_keys(data, key, values)
|
200
|
+
# data[key] = values unless data.include? key
|
201
|
+
# nil
|
202
|
+
# end
|
203
|
+
#
|
204
|
+
# def add_to_data_flat(data, key, values)
|
205
|
+
# data[key] = values unless data.include? key
|
206
|
+
# nil
|
207
|
+
# end
|
208
|
+
#
|
209
|
+
# def add_to_data_flat_merge(data, key, values)
|
210
|
+
# if data.include? key
|
211
|
+
# data[key] = data[key].concat values
|
212
|
+
# else
|
213
|
+
# data[key] = values
|
214
|
+
# end
|
215
|
+
# nil
|
216
|
+
# end
|
217
|
+
#
|
218
|
+
# def add_to_data_flat_merge_double(data, keys, values)
|
219
|
+
# data.write
|
220
|
+
# keys.each do |key|
|
221
|
+
# if data.include? key
|
222
|
+
# data[key] = data[key].concat values
|
223
|
+
# else
|
224
|
+
# data[key] = values
|
225
|
+
# end
|
226
|
+
# end
|
227
|
+
# nil
|
228
|
+
# end
|
229
|
+
#
|
230
|
+
# def add_to_data_flat_merge_keys(data, keys, values)
|
231
|
+
# keys.each do |key|
|
232
|
+
# if data.include? key
|
233
|
+
# data[key] = data[key].concat values
|
234
|
+
# else
|
235
|
+
# data[key] = values.dup
|
236
|
+
# end
|
237
|
+
# end
|
238
|
+
# nil
|
239
|
+
# end
|
240
|
+
#
|
241
|
+
# def add_to_data_no_merge_double(data, keys, values)
|
242
|
+
# keys = [keys] unless Array === keys
|
243
|
+
# keys.each do |key|
|
244
|
+
# next if data.include? key
|
245
|
+
# data[key] = values
|
246
|
+
# end
|
247
|
+
# nil
|
248
|
+
# end
|
249
|
+
#
|
250
|
+
# def add_to_data_merge(data, keys, values)
|
251
|
+
# keys.uniq.each do |key|
|
252
|
+
# if data.include? key
|
253
|
+
# new = data[key]
|
254
|
+
# new.each_with_index do |old, i|
|
255
|
+
# next if values[i].nil?
|
256
|
+
# if old.nil?
|
257
|
+
# new[i] = values[i]
|
258
|
+
# else
|
259
|
+
# old.concat values[i]
|
260
|
+
# end
|
261
|
+
# end
|
262
|
+
# data[key] = new
|
263
|
+
# else
|
264
|
+
# data[key] = values
|
265
|
+
# end
|
266
|
+
# end
|
267
|
+
# nil
|
268
|
+
# end
|
269
|
+
#
|
270
|
+
# def add_to_data_merge_zipped(data, keys, values)
|
271
|
+
# keys = [keys] unless Array === keys
|
272
|
+
# num = keys.length
|
273
|
+
#
|
274
|
+
# values = values.collect do |v|
|
275
|
+
# (v.nil? || v.empty?) ? [""] : v
|
276
|
+
# end
|
277
|
+
#
|
278
|
+
# if values.first.length > 1 and num == 1
|
279
|
+
# keys = keys * values.first.length
|
280
|
+
# num = keys.length
|
281
|
+
# end
|
282
|
+
#
|
283
|
+
# values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
284
|
+
#
|
285
|
+
# all = values
|
286
|
+
# all.unshift keys
|
287
|
+
# Misc.zip_fields(all).each do |vs|
|
288
|
+
# key = vs.shift
|
289
|
+
# if data.include? key
|
290
|
+
# data[key] = data[key].zip(vs).collect do |old, new|
|
291
|
+
# old + [new]
|
292
|
+
# end
|
293
|
+
# else
|
294
|
+
# data[key] = vs.collect{|v| [v] }
|
295
|
+
# end
|
296
|
+
# end
|
297
|
+
#
|
298
|
+
# nil
|
299
|
+
# end
|
300
|
+
#
|
301
|
+
# def add_to_data_zipped(data, keys, values)
|
302
|
+
# num = keys.length
|
303
|
+
#
|
304
|
+
# if values.first.length > 1 and num == 1
|
305
|
+
# keys = keys * values.first.length
|
306
|
+
# num = keys.length
|
307
|
+
# end
|
308
|
+
#
|
309
|
+
# values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
310
|
+
# all = values.unshift keys
|
311
|
+
# Misc.zip_fields(all).each do |values|
|
312
|
+
# key = values.shift
|
313
|
+
# next if data.include? key
|
314
|
+
# data[key] = values.collect{|v| [v]}
|
315
|
+
# end
|
316
|
+
# nil
|
317
|
+
# end
|
318
|
+
#
|
319
|
+
#
|
320
|
+
# def cast_values_single(value)
|
321
|
+
# case
|
322
|
+
# when (value.nil? or value.empty?)
|
323
|
+
# nil
|
324
|
+
# when Symbol === cast
|
325
|
+
# value.send(cast)
|
326
|
+
# when Proc === cast
|
327
|
+
# cast.call value
|
328
|
+
# end
|
329
|
+
# end
|
330
|
+
#
|
331
|
+
# def cast_values_list(values)
|
332
|
+
# case
|
333
|
+
# when Symbol === cast
|
334
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
335
|
+
# when Proc === cast
|
336
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
|
337
|
+
# end
|
338
|
+
# end
|
339
|
+
#
|
340
|
+
# def cast_values_flat(values)
|
341
|
+
# case
|
342
|
+
# when Symbol === cast
|
343
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
|
344
|
+
# when Proc === cast
|
345
|
+
# values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
|
346
|
+
# end
|
347
|
+
# end
|
348
|
+
#
|
349
|
+
# def cast_values_double(values)
|
350
|
+
# case
|
351
|
+
# when Symbol === cast
|
352
|
+
# values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
|
353
|
+
# when Proc === cast
|
354
|
+
# values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
|
355
|
+
# end
|
356
|
+
# end
|
357
|
+
#
|
358
|
+
# def rescue_first_line
|
359
|
+
# @first_line
|
360
|
+
# end
|
361
|
+
#
|
362
|
+
# def fix_fields(options)
|
363
|
+
# key_field = Misc.process_options options, :key_field
|
364
|
+
# fields = Misc.process_options options, :fields
|
365
|
+
#
|
366
|
+
# if (key_field.nil? or key_field == 0 or key_field == :key) and
|
367
|
+
# (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
368
|
+
#
|
369
|
+
# @straight = true
|
370
|
+
# return
|
371
|
+
# else
|
372
|
+
# @straight = false
|
373
|
+
#
|
374
|
+
# case
|
375
|
+
# when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
|
376
|
+
# @key_position = 0
|
377
|
+
# when Integer === key_field
|
378
|
+
# @key_position = key_field
|
379
|
+
# when String === key_field
|
380
|
+
# @key_position = @fields.dup.unshift(@key_field).index key_field
|
381
|
+
# raise "Key field #{ key_field } was not found" if @key_position.nil?
|
382
|
+
# when :key == key_field
|
383
|
+
# @key_position = 0
|
384
|
+
# else
|
385
|
+
# raise "Format of key_field not understood: #{key_field.inspect}"
|
386
|
+
# end
|
387
|
+
#
|
388
|
+
# if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
|
389
|
+
# if not @fields.nil? and type != :flat
|
390
|
+
# @field_positions = (0..@fields.length).to_a
|
391
|
+
# @field_positions.delete @key_position
|
392
|
+
# end
|
393
|
+
# else
|
394
|
+
# fields = [fields] if not Array === fields
|
395
|
+
# @field_positions = fields.collect{|field|
|
396
|
+
# case
|
397
|
+
# when Integer === field
|
398
|
+
# field
|
399
|
+
# when String === field
|
400
|
+
# pos = @fields.dup.unshift(@key_field).index field
|
401
|
+
# raise "Field not identified: #{ field }" if pos.nil?
|
402
|
+
# pos
|
403
|
+
# else
|
404
|
+
# raise "Format of fields not understood: #{field.inspect}"
|
405
|
+
# end
|
406
|
+
# }
|
407
|
+
# end
|
408
|
+
#
|
409
|
+
# new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
|
410
|
+
# @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
|
411
|
+
# @fields ||= fields if Array === fields and String === fields.first
|
412
|
+
# @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
|
413
|
+
# @key_field = new_key_field
|
414
|
+
# @key_field ||= key_field if String === key_field
|
415
|
+
#
|
416
|
+
# end
|
417
|
+
# end
|
418
|
+
#
|
419
|
+
# def initialize(stream = nil, options = {})
|
420
|
+
# @header_hash = Misc.process_options(options, :header_hash) || "#"
|
421
|
+
# @sep = Misc.process_options(options, :sep) || "\t"
|
422
|
+
# @tsv_grep = Misc.process_options(options, :tsv_grep)
|
423
|
+
# stream = TSV.get_stream stream
|
424
|
+
# @stream = stream
|
425
|
+
#
|
426
|
+
#
|
427
|
+
# @header_options = parse_header(stream)
|
428
|
+
#
|
429
|
+
# options = @header_options.merge options
|
430
|
+
# options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
|
431
|
+
#
|
432
|
+
# @type ||= Misc.process_options(options, :type) || :double
|
433
|
+
# @type ||= :double
|
434
|
+
#
|
435
|
+
# @identifiers = Misc.process_options(options, :identifiers)
|
436
|
+
#
|
437
|
+
# @filename = Misc.process_options(options, :filename)
|
438
|
+
# @filename ||= stream.filename if stream.respond_to? :filename
|
439
|
+
#
|
440
|
+
# @sep2 = Misc.process_options(options, :sep2) || "|"
|
441
|
+
# @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
|
442
|
+
# @type ||= Misc.process_options options, :type
|
443
|
+
# @fix = Misc.process_options(options, :fix)
|
444
|
+
# @select= Misc.process_options options, :select
|
445
|
+
# @zipped = Misc.process_options options, :zipped
|
446
|
+
# @namespace = Misc.process_options options, :namespace
|
447
|
+
# merge = Misc.process_options(options, :merge)
|
448
|
+
# merge = @zipped if merge.nil?
|
449
|
+
# merge = false if merge.nil?
|
450
|
+
#
|
451
|
+
# fields = options[:fields]
|
452
|
+
# fix_fields(options)
|
453
|
+
#
|
454
|
+
# @type = @type.strip.to_sym if String === @type
|
455
|
+
# #@type ||= :double if merge == true
|
456
|
+
#
|
457
|
+
# case @type
|
458
|
+
# when :double
|
459
|
+
# if @header_options[:type] == :flat
|
460
|
+
# self.instance_eval do alias get_values get_values_double_from_flat end
|
461
|
+
# else
|
462
|
+
# self.instance_eval do alias get_values get_values_double end
|
463
|
+
# end
|
464
|
+
# self.instance_eval do alias cast_values cast_values_double end
|
465
|
+
# case
|
466
|
+
# when (merge and not zipped)
|
467
|
+
# self.instance_eval do alias add_to_data add_to_data_merge end
|
468
|
+
# when (merge and zipped)
|
469
|
+
# self.instance_eval do alias add_to_data add_to_data_merge_zipped end
|
470
|
+
# when zipped
|
471
|
+
# self.instance_eval do alias add_to_data add_to_data_zipped end
|
472
|
+
# else
|
473
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
474
|
+
# end
|
475
|
+
# when :single
|
476
|
+
# if @header_options[:type] == :flat
|
477
|
+
# self.instance_eval do alias get_values get_values_single_from_flat end
|
478
|
+
# self.instance_eval do alias cast_values cast_values_single end
|
479
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_double end
|
480
|
+
# else
|
481
|
+
# self.instance_eval do alias get_values get_values_single end
|
482
|
+
# self.instance_eval do alias cast_values cast_values_single end
|
483
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
484
|
+
# end
|
485
|
+
# when :list
|
486
|
+
# self.instance_eval do alias get_values get_values_list end
|
487
|
+
# self.instance_eval do alias cast_values cast_values_list end
|
488
|
+
# self.instance_eval do alias add_to_data add_to_data_no_merge_list end
|
489
|
+
#
|
490
|
+
# when :flat
|
491
|
+
# @take_all = true if field_positions.nil?
|
492
|
+
# self.instance_eval do alias cast_values cast_values_flat end
|
493
|
+
# merge = true if key_position and key_position != 0 and field_positions.nil?
|
494
|
+
# if merge
|
495
|
+
# self.instance_eval do alias get_values get_values_flat_merge end
|
496
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
497
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
498
|
+
# else
|
499
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
500
|
+
# end
|
501
|
+
# else
|
502
|
+
# self.instance_eval do alias get_values get_values_flat_merge end
|
503
|
+
# if key_position and key_position != 0 and field_positions.nil?
|
504
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
|
505
|
+
# else
|
506
|
+
# self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
|
507
|
+
# end
|
508
|
+
# end
|
509
|
+
# else
|
510
|
+
# raise "Unknown TSV type: #{@type.inspect}"
|
511
|
+
# end
|
512
|
+
#
|
513
|
+
# @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
|
514
|
+
# end
|
515
|
+
#
|
516
|
+
# def setup(data)
|
517
|
+
# data.extend TSV unless TSV === data
|
518
|
+
# data.type = @type
|
519
|
+
# data.key_field = @key_field
|
520
|
+
# data.fields = @fields.nil? ? nil : @fields.dup
|
521
|
+
# data.namespace = @namespace
|
522
|
+
# data.filename = @filename
|
523
|
+
# data.identifiers = @identifiers
|
524
|
+
# data.cast = @cast if Symbol === @cast
|
525
|
+
# data
|
526
|
+
# end
|
527
|
+
#
|
528
|
+
# def annotate(data)
|
529
|
+
# setup(data)
|
530
|
+
# end
|
531
|
+
#
|
532
|
+
# def options
|
533
|
+
# options = {}
|
534
|
+
# TSV::ENTRIES.each do |entry|
|
535
|
+
# if self.respond_to? entry
|
536
|
+
# value = self.send(entry)
|
537
|
+
# options[entry.to_sym] = value unless value.nil?
|
538
|
+
# end
|
539
|
+
# end
|
540
|
+
# options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
|
541
|
+
# IndiferentHash.setup options
|
542
|
+
# end
|
543
|
+
#
|
544
|
+
# def traverse(options = {})
|
545
|
+
# monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
|
546
|
+
# monitor = bar if bar and monitor.nil?
|
547
|
+
# raise "No block given in TSV::Parser#traverse" unless block_given?
|
548
|
+
#
|
549
|
+
# stream = @stream
|
550
|
+
#
|
551
|
+
#
|
552
|
+
# # first line
|
553
|
+
# line = self.rescue_first_line
|
554
|
+
# line = stream.gets if line.nil?
|
555
|
+
#
|
556
|
+
# if @tsv_grep || grep
|
557
|
+
#
|
558
|
+
# stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
|
559
|
+
# stream.no_fail = true
|
560
|
+
# begin
|
561
|
+
# match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
|
562
|
+
# line = stream.gets if match.empty?
|
563
|
+
# rescue Exception
|
564
|
+
# Log.exception $!
|
565
|
+
# line = stream.gets
|
566
|
+
# end
|
567
|
+
# end
|
568
|
+
#
|
569
|
+
# progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
|
570
|
+
#
|
571
|
+
# # setup monitor
|
572
|
+
# if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
|
573
|
+
# size = case
|
574
|
+
# when stream.respond_to?(:size)
|
575
|
+
# stream.size
|
576
|
+
# else
|
577
|
+
# stream.stat.size
|
578
|
+
# end
|
579
|
+
# size = nil if size.to_i == 0
|
580
|
+
# desc = "Parsing Stream"
|
581
|
+
# step = 100
|
582
|
+
# if Hash === monitor
|
583
|
+
# desc = monitor[:desc] if monitor.include? :desc
|
584
|
+
# step = monitor[:step] if monitor.include? :step
|
585
|
+
# end
|
586
|
+
# progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
587
|
+
# elsif progress_monitor
|
588
|
+
#
|
589
|
+
# size = case
|
590
|
+
# when stream.respond_to?(:size)
|
591
|
+
# stream.size
|
592
|
+
# else
|
593
|
+
# stream.stat.size
|
594
|
+
# end
|
595
|
+
#
|
596
|
+
# progress_monitor.bytes = true
|
597
|
+
# progress_monitor.max = size unless size.to_i == 0
|
598
|
+
# elsif monitor
|
599
|
+
# desc = "Parsing Stream"
|
600
|
+
# step = 100
|
601
|
+
# size = nil
|
602
|
+
# if Hash === monitor
|
603
|
+
# desc = monitor[:desc] if monitor.include? :desc
|
604
|
+
# step = monitor[:step] if monitor.include? :step
|
605
|
+
# end
|
606
|
+
# progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
|
607
|
+
# end
|
608
|
+
#
|
609
|
+
# # parser
|
610
|
+
# line_num = 1
|
611
|
+
# begin
|
612
|
+
#
|
613
|
+
# while not line.nil?
|
614
|
+
# begin
|
615
|
+
# if progress_monitor
|
616
|
+
# progress_monitor.tick(line.bytesize)
|
617
|
+
# end
|
618
|
+
#
|
619
|
+
# raise SKIP_LINE if line.empty?
|
620
|
+
#
|
621
|
+
# line = Misc.fixutf8(line)
|
622
|
+
# line = self.process line
|
623
|
+
# raise SKIP_LINE if line.empty?
|
624
|
+
# parts = self.chop_line line
|
625
|
+
# key, values = self.get_values parts
|
626
|
+
# values = self.cast_values values if self.cast?
|
627
|
+
#
|
628
|
+
# yield key, values, fields
|
629
|
+
#
|
630
|
+
# line = stream.gets
|
631
|
+
#
|
632
|
+
# line_num += 1
|
633
|
+
# raise END_PARSING if head and line_num > head.to_i
|
634
|
+
# rescue SKIP_LINE
|
635
|
+
# begin
|
636
|
+
# line = stream.gets
|
637
|
+
# next
|
638
|
+
# rescue IOError
|
639
|
+
# break
|
640
|
+
# end
|
641
|
+
# rescue END_PARSING
|
642
|
+
# stream.close unless stream.closed?
|
643
|
+
# begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
|
644
|
+
# break
|
645
|
+
# rescue Errno::EPIPE
|
646
|
+
# Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
|
647
|
+
# stream.abort if stream.respond_to? :abort
|
648
|
+
# raise $!
|
649
|
+
# rescue Exception
|
650
|
+
# Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
|
651
|
+
# stream.abort $! if stream.respond_to? :abort
|
652
|
+
# raise $!
|
653
|
+
# end
|
654
|
+
# end
|
655
|
+
# ensure
|
656
|
+
# Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
|
657
|
+
# stream.close unless stream.closed?
|
658
|
+
# stream.join if stream.respond_to? :join and not stream.joined?
|
659
|
+
# end
|
660
|
+
#
|
661
|
+
# self
|
662
|
+
# end
|
663
|
+
#
|
664
|
+
# def identify_field(field)
|
665
|
+
# TSV.identify_field(key_field, fields, field)
|
666
|
+
# end
|
667
|
+
#
|
668
|
+
# def rewind
|
669
|
+
# stream.reopen(filename, "r") if stream.closed? and filename
|
670
|
+
# stream.rewind
|
671
|
+
# end
|
672
|
+
#
|
673
|
+
# def self.traverse(stream, options = {}, &block)
|
674
|
+
# parser = Parser.new(stream, options)
|
675
|
+
# parser.traverse(options, &block)
|
676
|
+
# end
|
677
|
+
# end
|
678
|
+
#end
|