rbbt-util 5.44.1 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +2 -2
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  33. data/lib/rbbt/persist/tsv.rb +187 -185
  34. data/lib/rbbt/persist.rb +556 -551
  35. data/lib/rbbt/refactor.rb +20 -0
  36. data/lib/rbbt/resource/path/refactor.rb +178 -0
  37. data/lib/rbbt/resource/path.rb +317 -497
  38. data/lib/rbbt/resource/util.rb +0 -48
  39. data/lib/rbbt/resource.rb +3 -390
  40. data/lib/rbbt/tsv/accessor.rb +2 -838
  41. data/lib/rbbt/tsv/attach.rb +303 -299
  42. data/lib/rbbt/tsv/change_id.rb +244 -245
  43. data/lib/rbbt/tsv/csv.rb +87 -85
  44. data/lib/rbbt/tsv/dumper.rb +2 -100
  45. data/lib/rbbt/tsv/excel.rb +26 -24
  46. data/lib/rbbt/tsv/field_index.rb +4 -1
  47. data/lib/rbbt/tsv/filter.rb +3 -2
  48. data/lib/rbbt/tsv/index.rb +2 -284
  49. data/lib/rbbt/tsv/manipulate.rb +750 -747
  50. data/lib/rbbt/tsv/marshal.rb +3 -3
  51. data/lib/rbbt/tsv/matrix.rb +2 -2
  52. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  53. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  54. data/lib/rbbt/tsv/parser.rb +678 -678
  55. data/lib/rbbt/tsv/refactor.rb +195 -0
  56. data/lib/rbbt/tsv/stream.rb +253 -251
  57. data/lib/rbbt/tsv/util.rb +420 -420
  58. data/lib/rbbt/tsv.rb +210 -208
  59. data/lib/rbbt/util/R/eval.rb +4 -4
  60. data/lib/rbbt/util/R/plot.rb +62 -166
  61. data/lib/rbbt/util/R.rb +21 -18
  62. data/lib/rbbt/util/cmd.rb +2 -318
  63. data/lib/rbbt/util/color.rb +269 -269
  64. data/lib/rbbt/util/colorize.rb +89 -89
  65. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  66. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  67. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  68. data/lib/rbbt/util/config.rb +169 -167
  69. data/lib/rbbt/util/iruby.rb +20 -0
  70. data/lib/rbbt/util/log/progress/report.rb +241 -241
  71. data/lib/rbbt/util/log/progress/util.rb +99 -99
  72. data/lib/rbbt/util/log/progress.rb +102 -102
  73. data/lib/rbbt/util/log/refactor.rb +49 -0
  74. data/lib/rbbt/util/log.rb +486 -532
  75. data/lib/rbbt/util/migrate.rb +1 -1
  76. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  77. data/lib/rbbt/util/misc/development.rb +12 -11
  78. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  79. data/lib/rbbt/util/misc/format.rb +2 -230
  80. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  81. data/lib/rbbt/util/misc/inspect.rb +2 -476
  82. data/lib/rbbt/util/misc/lock.rb +109 -106
  83. data/lib/rbbt/util/misc/omics.rb +9 -1
  84. data/lib/rbbt/util/misc/pipes.rb +765 -793
  85. data/lib/rbbt/util/misc/refactor.rb +20 -0
  86. data/lib/rbbt/util/misc/ssw.rb +27 -17
  87. data/lib/rbbt/util/misc/system.rb +0 -15
  88. data/lib/rbbt/util/misc.rb +39 -20
  89. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  90. data/lib/rbbt/util/named_array.rb +3 -220
  91. data/lib/rbbt/util/open/refactor.rb +7 -0
  92. data/lib/rbbt/util/open.rb +3 -857
  93. data/lib/rbbt/util/procpath.rb +6 -6
  94. data/lib/rbbt/util/python/paths.rb +27 -0
  95. data/lib/rbbt/util/python/run.rb +115 -0
  96. data/lib/rbbt/util/python/script.rb +110 -0
  97. data/lib/rbbt/util/python/util.rb +3 -3
  98. data/lib/rbbt/util/python.rb +22 -81
  99. data/lib/rbbt/util/semaphore.rb +152 -148
  100. data/lib/rbbt/util/simpleopt.rb +9 -8
  101. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  102. data/lib/rbbt/util/ssh.rb +122 -118
  103. data/lib/rbbt/util/tar.rb +117 -115
  104. data/lib/rbbt/util/tmpfile.rb +69 -67
  105. data/lib/rbbt/util/version.rb +2 -0
  106. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  107. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  108. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  109. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  110. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  111. data/lib/rbbt/workflow/refactor.rb +153 -0
  112. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  113. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  115. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  116. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  117. data/lib/rbbt/workflow/step/run.rb +766 -766
  118. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  119. data/lib/rbbt/workflow/step.rb +2 -362
  120. data/lib/rbbt/workflow/task.rb +118 -118
  121. data/lib/rbbt/workflow/usage.rb +289 -287
  122. data/lib/rbbt/workflow/util/archive.rb +6 -5
  123. data/lib/rbbt/workflow/util/data.rb +1 -1
  124. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  125. data/lib/rbbt/workflow/util/trace.rb +79 -44
  126. data/lib/rbbt/workflow.rb +4 -882
  127. data/lib/rbbt-util.rb +21 -13
  128. data/lib/rbbt.rb +16 -3
  129. data/python/rbbt/__init__.py +19 -1
  130. data/share/Rlib/plot.R +37 -37
  131. data/share/Rlib/svg.R +22 -5
  132. data/share/install/software/lib/install_helpers +1 -1
  133. data/share/rbbt_commands/hpc/list +2 -3
  134. data/share/rbbt_commands/hpc/orchestrate +4 -4
  135. data/share/rbbt_commands/hpc/tail +2 -0
  136. data/share/rbbt_commands/hpc/task +10 -7
  137. data/share/rbbt_commands/lsf/list +2 -3
  138. data/share/rbbt_commands/lsf/orchestrate +4 -4
  139. data/share/rbbt_commands/lsf/tail +2 -0
  140. data/share/rbbt_commands/lsf/task +10 -7
  141. data/share/rbbt_commands/migrate +1 -1
  142. data/share/rbbt_commands/pbs/list +2 -3
  143. data/share/rbbt_commands/pbs/orchestrate +4 -4
  144. data/share/rbbt_commands/pbs/tail +2 -0
  145. data/share/rbbt_commands/pbs/task +10 -7
  146. data/share/rbbt_commands/resource/produce +8 -1
  147. data/share/rbbt_commands/slurm/list +2 -3
  148. data/share/rbbt_commands/slurm/orchestrate +4 -4
  149. data/share/rbbt_commands/slurm/tail +2 -0
  150. data/share/rbbt_commands/slurm/task +10 -7
  151. data/share/rbbt_commands/system/clean +5 -5
  152. data/share/rbbt_commands/system/status +5 -5
  153. data/share/rbbt_commands/tsv/get +2 -3
  154. data/share/rbbt_commands/tsv/info +10 -13
  155. data/share/rbbt_commands/tsv/keys +18 -14
  156. data/share/rbbt_commands/tsv/slice +2 -2
  157. data/share/rbbt_commands/tsv/transpose +6 -2
  158. data/share/rbbt_commands/workflow/info +20 -24
  159. data/share/rbbt_commands/workflow/list +1 -1
  160. data/share/rbbt_commands/workflow/prov +20 -13
  161. data/share/rbbt_commands/workflow/server +11 -1
  162. data/share/rbbt_commands/workflow/task +76 -71
  163. data/share/rbbt_commands/workflow/write_info +26 -9
  164. data/share/software/opt/ssw/ssw.c +861 -0
  165. data/share/software/opt/ssw/ssw.h +130 -0
  166. data/share/workflow_config.ru +3 -3
  167. metadata +40 -2
@@ -1,678 +1,678 @@
1
- require 'rbbt/util/cmd'
2
- module TSV
3
- class Parser
4
- attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
5
-
6
- class SKIP_LINE < Exception; end
7
- class END_PARSING < Exception; end
8
-
9
- def all_fields
10
- all = [key_field] + fields
11
- # ToDo: What was this for?
12
- #NamedArray.setup all, all
13
- all
14
- end
15
-
16
- def parse_header(stream)
17
- raise "Closed stream" if IO === stream && stream.closed?
18
-
19
- options = {}
20
- @preamble = []
21
-
22
- # Get line
23
-
24
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
25
- line = stream.gets
26
- return {} if line.nil?
27
- #raise "Empty content: #{ stream.inspect }" if line.nil?
28
- line = Misc.fixutf8 line.chomp
29
-
30
- # Process options line
31
-
32
- if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
33
- options = Misc.string2hash $1.chomp
34
- line = stream.gets
35
- line = Misc.fixutf8 line.chomp if line
36
- end
37
-
38
- # Determine separator
39
-
40
- @sep = options[:sep] if options[:sep]
41
-
42
- # Process fields line
43
-
44
- preamble << line if line
45
- while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
46
- @fields = line.split(@sep, -1)
47
- @key_field = @fields.shift
48
- @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
49
-
50
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
51
- line = (@header_hash != "" ? stream.gets : nil)
52
- line = Misc.fixutf8 line.chomp if line
53
- preamble << line if line
54
- @header_hash = false if TrueClass === @header_hash || @header_hash == ""
55
- end
56
-
57
- @preamble = preamble[0..-3] * "\n"
58
-
59
- line ||= stream.gets
60
-
61
- @first_line = line
62
-
63
- options
64
- end
65
-
66
- def process(line)
67
- l = line.chomp
68
- raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
69
- l = @fix.call l if Proc === @fix
70
- raise END_PARSING unless l
71
- l
72
- end
73
-
74
- def cast?
75
- !! @cast
76
- end
77
-
78
- def chop_line(line)
79
- @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
80
- end
81
-
82
- def get_values_single_from_flat(parts)
83
- return parts.shift, parts.first if field_positions.nil? and key_position.nil?
84
- if key_position == 0
85
- [parts.shift, parts.first]
86
- else
87
- key = parts.shift
88
- [parts, key]
89
- end
90
-
91
- end
92
-
93
- def get_values_double_from_flat(parts)
94
- return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
95
- if key_position == 0
96
- [parts.shift, [parts.flatten]]
97
- else
98
- value = parts.shift
99
- keys = parts.flatten
100
- [keys, [[value]]]
101
- end
102
-
103
- end
104
-
105
- def get_values_single(parts)
106
- return parts.shift, parts.first if field_positions.nil? and key_position.nil?
107
- key = parts[key_position]
108
- value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
109
- [key, value]
110
- end
111
-
112
- def get_values_list(parts)
113
- return parts.shift, parts if field_positions.nil? and key_position.nil?
114
- key = parts[key_position]
115
-
116
- values = case
117
- when field_positions.nil?
118
- parts.tap{|o| o.delete_at key_position}
119
- when field_positions.empty?
120
- []
121
- else
122
- parts.values_at *field_positions
123
- end
124
-
125
- [key, values]
126
- end
127
-
128
- def get_values_double(parts)
129
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
130
- keys = parts[key_position].split(@sep2, -1)
131
- values = case
132
- when field_positions.nil?
133
- parts.tap{|o| o.delete_at key_position}
134
- when field_positions.empty?
135
- []
136
- else
137
- parts.values_at *field_positions
138
- end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
139
- [keys, values]
140
- end
141
-
142
- def get_values_flat_inverse(parts)
143
- value = parts.shift
144
- keys = parts
145
- [keys, [value]]
146
- end
147
-
148
-
149
- def get_values_flat_merge(parts)
150
- begin
151
- orig = parts
152
-
153
- if key_position and key_position != 0 and field_positions.nil?
154
- value = parts.shift.split(@sep2, -1)
155
- keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
156
- return [keys, value]
157
- end
158
-
159
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
160
- field_positions.nil? and (key_position.nil? or key_position == 0)
161
- rescue
162
- raise $!
163
- end
164
-
165
- if key_position and key_position != 0 and @header_options[:type] == :flat
166
- keys = parts[1..-1]
167
- else
168
- str = parts[key_position]
169
- keys = str.split(@sep2, -1)
170
- end
171
-
172
- if @take_all
173
- values = parts.collect{|e| e.split(@sep2, -1) }.flatten
174
- else
175
- if field_positions.nil?
176
- parts.delete_at key_position
177
- values = parts.first
178
- else
179
- values = parts[field_positions.first]
180
- end
181
-
182
- values = values.split(@sep2, -1)
183
- end
184
-
185
- [keys, values]
186
- end
187
-
188
- def get_values_flat(parts)
189
- keys, values = get_values_flat_merge(parts)
190
- [keys.first, values]
191
- end
192
-
193
-
194
- def add_to_data_no_merge_list(data, key, values)
195
- data[key] = values unless data.include? key
196
- nil
197
- end
198
-
199
- def add_to_data_flat_keys(data, key, values)
200
- data[key] = values unless data.include? key
201
- nil
202
- end
203
-
204
- def add_to_data_flat(data, key, values)
205
- data[key] = values unless data.include? key
206
- nil
207
- end
208
-
209
- def add_to_data_flat_merge(data, key, values)
210
- if data.include? key
211
- data[key] = data[key].concat values
212
- else
213
- data[key] = values
214
- end
215
- nil
216
- end
217
-
218
- def add_to_data_flat_merge_double(data, keys, values)
219
- data.write
220
- keys.each do |key|
221
- if data.include? key
222
- data[key] = data[key].concat values
223
- else
224
- data[key] = values
225
- end
226
- end
227
- nil
228
- end
229
-
230
- def add_to_data_flat_merge_keys(data, keys, values)
231
- keys.each do |key|
232
- if data.include? key
233
- data[key] = data[key].concat values
234
- else
235
- data[key] = values.dup
236
- end
237
- end
238
- nil
239
- end
240
-
241
- def add_to_data_no_merge_double(data, keys, values)
242
- keys = [keys] unless Array === keys
243
- keys.each do |key|
244
- next if data.include? key
245
- data[key] = values
246
- end
247
- nil
248
- end
249
-
250
- def add_to_data_merge(data, keys, values)
251
- keys.uniq.each do |key|
252
- if data.include? key
253
- new = data[key]
254
- new.each_with_index do |old, i|
255
- next if values[i].nil?
256
- if old.nil?
257
- new[i] = values[i]
258
- else
259
- old.concat values[i]
260
- end
261
- end
262
- data[key] = new
263
- else
264
- data[key] = values
265
- end
266
- end
267
- nil
268
- end
269
-
270
- def add_to_data_merge_zipped(data, keys, values)
271
- keys = [keys] unless Array === keys
272
- num = keys.length
273
-
274
- values = values.collect do |v|
275
- (v.nil? || v.empty?) ? [""] : v
276
- end
277
-
278
- if values.first.length > 1 and num == 1
279
- keys = keys * values.first.length
280
- num = keys.length
281
- end
282
-
283
- values = values.collect{|v| v.length != num ? [v.first] * num : v}
284
-
285
- all = values
286
- all.unshift keys
287
- Misc.zip_fields(all).each do |vs|
288
- key = vs.shift
289
- if data.include? key
290
- data[key] = data[key].zip(vs).collect do |old, new|
291
- old + [new]
292
- end
293
- else
294
- data[key] = vs.collect{|v| [v] }
295
- end
296
- end
297
-
298
- nil
299
- end
300
-
301
- def add_to_data_zipped(data, keys, values)
302
- num = keys.length
303
-
304
- if values.first.length > 1 and num == 1
305
- keys = keys * values.first.length
306
- num = keys.length
307
- end
308
-
309
- values = values.collect{|v| v.length != num ? [v.first] * num : v}
310
- all = values.unshift keys
311
- Misc.zip_fields(all).each do |values|
312
- key = values.shift
313
- next if data.include? key
314
- data[key] = values.collect{|v| [v]}
315
- end
316
- nil
317
- end
318
-
319
-
320
- def cast_values_single(value)
321
- case
322
- when (value.nil? or value.empty?)
323
- nil
324
- when Symbol === cast
325
- value.send(cast)
326
- when Proc === cast
327
- cast.call value
328
- end
329
- end
330
-
331
- def cast_values_list(values)
332
- case
333
- when Symbol === cast
334
- values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
335
- when Proc === cast
336
- values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
337
- end
338
- end
339
-
340
- def cast_values_flat(values)
341
- case
342
- when Symbol === cast
343
- values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
344
- when Proc === cast
345
- values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
346
- end
347
- end
348
-
349
- def cast_values_double(values)
350
- case
351
- when Symbol === cast
352
- values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
353
- when Proc === cast
354
- values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
355
- end
356
- end
357
-
358
- def rescue_first_line
359
- @first_line
360
- end
361
-
362
- def fix_fields(options)
363
- key_field = Misc.process_options options, :key_field
364
- fields = Misc.process_options options, :fields
365
-
366
- if (key_field.nil? or key_field == 0 or key_field == :key) and
367
- (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
368
-
369
- @straight = true
370
- return
371
- else
372
- @straight = false
373
-
374
- case
375
- when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
376
- @key_position = 0
377
- when Integer === key_field
378
- @key_position = key_field
379
- when String === key_field
380
- @key_position = @fields.dup.unshift(@key_field).index key_field
381
- raise "Key field #{ key_field } was not found" if @key_position.nil?
382
- when :key == key_field
383
- @key_position = 0
384
- else
385
- raise "Format of key_field not understood: #{key_field.inspect}"
386
- end
387
-
388
- if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
389
- if not @fields.nil? and type != :flat
390
- @field_positions = (0..@fields.length).to_a
391
- @field_positions.delete @key_position
392
- end
393
- else
394
- fields = [fields] if not Array === fields
395
- @field_positions = fields.collect{|field|
396
- case
397
- when Integer === field
398
- field
399
- when String === field
400
- pos = @fields.dup.unshift(@key_field).index field
401
- raise "Field not identified: #{ field }" if pos.nil?
402
- pos
403
- else
404
- raise "Format of fields not understood: #{field.inspect}"
405
- end
406
- }
407
- end
408
-
409
- new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
410
- @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
411
- @fields ||= fields if Array === fields and String === fields.first
412
- @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
413
- @key_field = new_key_field
414
- @key_field ||= key_field if String === key_field
415
-
416
- end
417
- end
418
-
419
- def initialize(stream = nil, options = {})
420
- @header_hash = Misc.process_options(options, :header_hash) || "#"
421
- @sep = Misc.process_options(options, :sep) || "\t"
422
- @tsv_grep = Misc.process_options(options, :tsv_grep)
423
- stream = TSV.get_stream stream
424
- @stream = stream
425
-
426
-
427
- @header_options = parse_header(stream)
428
-
429
- options = @header_options.merge options
430
- options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
431
-
432
- @type ||= Misc.process_options(options, :type) || :double
433
- @type ||= :double
434
-
435
- @identifiers = Misc.process_options(options, :identifiers)
436
-
437
- @filename = Misc.process_options(options, :filename)
438
- @filename ||= stream.filename if stream.respond_to? :filename
439
-
440
- @sep2 = Misc.process_options(options, :sep2) || "|"
441
- @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
442
- @type ||= Misc.process_options options, :type
443
- @fix = Misc.process_options(options, :fix)
444
- @select= Misc.process_options options, :select
445
- @zipped = Misc.process_options options, :zipped
446
- @namespace = Misc.process_options options, :namespace
447
- merge = Misc.process_options(options, :merge)
448
- merge = @zipped if merge.nil?
449
- merge = false if merge.nil?
450
-
451
- fields = options[:fields]
452
- fix_fields(options)
453
-
454
- @type = @type.strip.to_sym if String === @type
455
- #@type ||= :double if merge == true
456
-
457
- case @type
458
- when :double
459
- if @header_options[:type] == :flat
460
- self.instance_eval do alias get_values get_values_double_from_flat end
461
- else
462
- self.instance_eval do alias get_values get_values_double end
463
- end
464
- self.instance_eval do alias cast_values cast_values_double end
465
- case
466
- when (merge and not zipped)
467
- self.instance_eval do alias add_to_data add_to_data_merge end
468
- when (merge and zipped)
469
- self.instance_eval do alias add_to_data add_to_data_merge_zipped end
470
- when zipped
471
- self.instance_eval do alias add_to_data add_to_data_zipped end
472
- else
473
- self.instance_eval do alias add_to_data add_to_data_no_merge_double end
474
- end
475
- when :single
476
- if @header_options[:type] == :flat
477
- self.instance_eval do alias get_values get_values_single_from_flat end
478
- self.instance_eval do alias cast_values cast_values_single end
479
- self.instance_eval do alias add_to_data add_to_data_no_merge_double end
480
- else
481
- self.instance_eval do alias get_values get_values_single end
482
- self.instance_eval do alias cast_values cast_values_single end
483
- self.instance_eval do alias add_to_data add_to_data_no_merge_list end
484
- end
485
- when :list
486
- self.instance_eval do alias get_values get_values_list end
487
- self.instance_eval do alias cast_values cast_values_list end
488
- self.instance_eval do alias add_to_data add_to_data_no_merge_list end
489
-
490
- when :flat
491
- @take_all = true if field_positions.nil?
492
- self.instance_eval do alias cast_values cast_values_flat end
493
- merge = true if key_position and key_position != 0 and field_positions.nil?
494
- if merge
495
- self.instance_eval do alias get_values get_values_flat_merge end
496
- if key_position and key_position != 0 and field_positions.nil?
497
- self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
498
- else
499
- self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
500
- end
501
- else
502
- self.instance_eval do alias get_values get_values_flat_merge end
503
- if key_position and key_position != 0 and field_positions.nil?
504
- self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
505
- else
506
- self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
507
- end
508
- end
509
- else
510
- raise "Unknown TSV type: #{@type.inspect}"
511
- end
512
-
513
- @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
514
- end
515
-
516
- def setup(data)
517
- data.extend TSV unless TSV === data
518
- data.type = @type
519
- data.key_field = @key_field
520
- data.fields = @fields.nil? ? nil : @fields.dup
521
- data.namespace = @namespace
522
- data.filename = @filename
523
- data.identifiers = @identifiers
524
- data.cast = @cast if Symbol === @cast
525
- data
526
- end
527
-
528
- def annotate(data)
529
- setup(data)
530
- end
531
-
532
- def options
533
- options = {}
534
- TSV::ENTRIES.each do |entry|
535
- if self.respond_to? entry
536
- value = self.send(entry)
537
- options[entry.to_sym] = value unless value.nil?
538
- end
539
- end
540
- options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
541
- IndiferentHash.setup options
542
- end
543
-
544
- def traverse(options = {})
545
- monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
546
- monitor = bar if bar and monitor.nil?
547
- raise "No block given in TSV::Parser#traverse" unless block_given?
548
-
549
- stream = @stream
550
-
551
-
552
- # first line
553
- line = self.rescue_first_line
554
- line = stream.gets if line.nil?
555
-
556
- if @tsv_grep || grep
557
-
558
- stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
559
- stream.no_fail = true
560
- begin
561
- match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
562
- line = stream.gets if match.empty?
563
- rescue Exception
564
- Log.exception $!
565
- line = stream.gets
566
- end
567
- end
568
-
569
- progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
570
-
571
- # setup monitor
572
- if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
573
- size = case
574
- when stream.respond_to?(:size)
575
- stream.size
576
- else
577
- stream.stat.size
578
- end
579
- size = nil if size.to_i == 0
580
- desc = "Parsing Stream"
581
- step = 100
582
- if Hash === monitor
583
- desc = monitor[:desc] if monitor.include? :desc
584
- step = monitor[:step] if monitor.include? :step
585
- end
586
- progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
587
- elsif progress_monitor
588
-
589
- size = case
590
- when stream.respond_to?(:size)
591
- stream.size
592
- else
593
- stream.stat.size
594
- end
595
-
596
- progress_monitor.bytes = true
597
- progress_monitor.max = size unless size.to_i == 0
598
- elsif monitor
599
- desc = "Parsing Stream"
600
- step = 100
601
- size = nil
602
- if Hash === monitor
603
- desc = monitor[:desc] if monitor.include? :desc
604
- step = monitor[:step] if monitor.include? :step
605
- end
606
- progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
607
- end
608
-
609
- # parser
610
- line_num = 1
611
- begin
612
-
613
- while not line.nil?
614
- begin
615
- if progress_monitor
616
- progress_monitor.tick(line.bytesize)
617
- end
618
-
619
- raise SKIP_LINE if line.empty?
620
-
621
- line = Misc.fixutf8(line)
622
- line = self.process line
623
- raise SKIP_LINE if line.empty?
624
- parts = self.chop_line line
625
- key, values = self.get_values parts
626
- values = self.cast_values values if self.cast?
627
-
628
- yield key, values, fields
629
-
630
- line = stream.gets
631
-
632
- line_num += 1
633
- raise END_PARSING if head and line_num > head.to_i
634
- rescue SKIP_LINE
635
- begin
636
- line = stream.gets
637
- next
638
- rescue IOError
639
- break
640
- end
641
- rescue END_PARSING
642
- stream.close unless stream.closed?
643
- begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
644
- break
645
- rescue Errno::EPIPE
646
- Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
647
- stream.abort if stream.respond_to? :abort
648
- raise $!
649
- rescue Exception
650
- Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
651
- stream.abort $! if stream.respond_to? :abort
652
- raise $!
653
- end
654
- end
655
- ensure
656
- Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
657
- stream.close unless stream.closed?
658
- stream.join if stream.respond_to? :join and not stream.joined?
659
- end
660
-
661
- self
662
- end
663
-
664
- def identify_field(field)
665
- TSV.identify_field(key_field, fields, field)
666
- end
667
-
668
- def rewind
669
- stream.reopen(filename, "r") if stream.closed? and filename
670
- stream.rewind
671
- end
672
-
673
- def self.traverse(stream, options = {}, &block)
674
- parser = Parser.new(stream, options)
675
- parser.traverse(options, &block)
676
- end
677
- end
678
- end
1
+ #require 'rbbt/util/cmd'
2
+ #module TSV
3
+ # class Parser
4
+ # attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
5
+ #
6
+ # class SKIP_LINE < Exception; end
7
+ # class END_PARSING < Exception; end
8
+ #
9
+ # def all_fields
10
+ # all = [key_field] + fields
11
+ # # ToDo: What was this for?
12
+ # #NamedArray.setup all, all
13
+ # all
14
+ # end
15
+ #
16
+ # def parse_header(stream)
17
+ # raise "Closed stream" if IO === stream && stream.closed?
18
+ #
19
+ # options = {}
20
+ # @preamble = []
21
+ #
22
+ # # Get line
23
+ #
24
+ # #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
25
+ # line = stream.gets
26
+ # return {} if line.nil?
27
+ # #raise "Empty content: #{ stream.inspect }" if line.nil?
28
+ # line = Misc.fixutf8 line.chomp
29
+ #
30
+ # # Process options line
31
+ #
32
+ # if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
33
+ # options = Misc.string2hash $1.chomp
34
+ # line = stream.gets
35
+ # line = Misc.fixutf8 line.chomp if line
36
+ # end
37
+ #
38
+ # # Determine separator
39
+ #
40
+ # @sep = options[:sep] if options[:sep]
41
+ #
42
+ # # Process fields line
43
+ #
44
+ # preamble << line if line
45
+ # while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
46
+ # @fields = line.split(@sep, -1)
47
+ # @key_field = @fields.shift
48
+ # @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
49
+ #
50
+ # #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
51
+ # line = (@header_hash != "" ? stream.gets : nil)
52
+ # line = Misc.fixutf8 line.chomp if line
53
+ # preamble << line if line
54
+ # @header_hash = false if TrueClass === @header_hash || @header_hash == ""
55
+ # end
56
+ #
57
+ # @preamble = preamble[0..-3] * "\n"
58
+ #
59
+ # line ||= stream.gets
60
+ #
61
+ # @first_line = line
62
+ #
63
+ # options
64
+ # end
65
+ #
66
+ # def process(line)
67
+ # l = line.chomp
68
+ # raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
69
+ # l = @fix.call l if Proc === @fix
70
+ # raise END_PARSING unless l
71
+ # l
72
+ # end
73
+ #
74
+ # def cast?
75
+ # !! @cast
76
+ # end
77
+ #
78
+ # def chop_line(line)
79
+ # @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
80
+ # end
81
+ #
82
+ # def get_values_single_from_flat(parts)
83
+ # return parts.shift, parts.first if field_positions.nil? and key_position.nil?
84
+ # if key_position == 0
85
+ # [parts.shift, parts.first]
86
+ # else
87
+ # key = parts.shift
88
+ # [parts, key]
89
+ # end
90
+ #
91
+ # end
92
+ #
93
+ # def get_values_double_from_flat(parts)
94
+ # return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
95
+ # if key_position == 0
96
+ # [parts.shift, [parts.flatten]]
97
+ # else
98
+ # value = parts.shift
99
+ # keys = parts.flatten
100
+ # [keys, [[value]]]
101
+ # end
102
+ #
103
+ # end
104
+ #
105
+ # def get_values_single(parts)
106
+ # return parts.shift, parts.first if field_positions.nil? and key_position.nil?
107
+ # key = parts[key_position]
108
+ # value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
109
+ # [key, value]
110
+ # end
111
+ #
112
+ # def get_values_list(parts)
113
+ # return parts.shift, parts if field_positions.nil? and key_position.nil?
114
+ # key = parts[key_position]
115
+ #
116
+ # values = case
117
+ # when field_positions.nil?
118
+ # parts.tap{|o| o.delete_at key_position}
119
+ # when field_positions.empty?
120
+ # []
121
+ # else
122
+ # parts.values_at *field_positions
123
+ # end
124
+ #
125
+ # [key, values]
126
+ # end
127
+ #
128
+ # def get_values_double(parts)
129
+ # return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
130
+ # keys = parts[key_position].split(@sep2, -1)
131
+ # values = case
132
+ # when field_positions.nil?
133
+ # parts.tap{|o| o.delete_at key_position}
134
+ # when field_positions.empty?
135
+ # []
136
+ # else
137
+ # parts.values_at *field_positions
138
+ # end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
139
+ # [keys, values]
140
+ # end
141
+ #
142
+ # def get_values_flat_inverse(parts)
143
+ # value = parts.shift
144
+ # keys = parts
145
+ # [keys, [value]]
146
+ # end
147
+ #
148
+ #
149
+ # def get_values_flat_merge(parts)
150
+ # begin
151
+ # orig = parts
152
+ #
153
+ # if key_position and key_position != 0 and field_positions.nil?
154
+ # value = parts.shift.split(@sep2, -1)
155
+ # keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
156
+ # return [keys, value]
157
+ # end
158
+ #
159
+ # return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
160
+ # field_positions.nil? and (key_position.nil? or key_position == 0)
161
+ # rescue
162
+ # raise $!
163
+ # end
164
+ #
165
+ # if key_position and key_position != 0 and @header_options[:type] == :flat
166
+ # keys = parts[1..-1]
167
+ # else
168
+ # str = parts[key_position]
169
+ # keys = str.split(@sep2, -1)
170
+ # end
171
+ #
172
+ # if @take_all
173
+ # values = parts.collect{|e| e.split(@sep2, -1) }.flatten
174
+ # else
175
+ # if field_positions.nil?
176
+ # parts.delete_at key_position
177
+ # values = parts.first
178
+ # else
179
+ # values = parts[field_positions.first]
180
+ # end
181
+ #
182
+ # values = values.split(@sep2, -1)
183
+ # end
184
+ #
185
+ # [keys, values]
186
+ # end
187
+ #
188
+ # def get_values_flat(parts)
189
+ # keys, values = get_values_flat_merge(parts)
190
+ # [keys.first, values]
191
+ # end
192
+ #
193
+ #
194
+ # def add_to_data_no_merge_list(data, key, values)
195
+ # data[key] = values unless data.include? key
196
+ # nil
197
+ # end
198
+ #
199
+ # def add_to_data_flat_keys(data, key, values)
200
+ # data[key] = values unless data.include? key
201
+ # nil
202
+ # end
203
+ #
204
+ # def add_to_data_flat(data, key, values)
205
+ # data[key] = values unless data.include? key
206
+ # nil
207
+ # end
208
+ #
209
+ # def add_to_data_flat_merge(data, key, values)
210
+ # if data.include? key
211
+ # data[key] = data[key].concat values
212
+ # else
213
+ # data[key] = values
214
+ # end
215
+ # nil
216
+ # end
217
+ #
218
+ # def add_to_data_flat_merge_double(data, keys, values)
219
+ # data.write
220
+ # keys.each do |key|
221
+ # if data.include? key
222
+ # data[key] = data[key].concat values
223
+ # else
224
+ # data[key] = values
225
+ # end
226
+ # end
227
+ # nil
228
+ # end
229
+ #
230
+ # def add_to_data_flat_merge_keys(data, keys, values)
231
+ # keys.each do |key|
232
+ # if data.include? key
233
+ # data[key] = data[key].concat values
234
+ # else
235
+ # data[key] = values.dup
236
+ # end
237
+ # end
238
+ # nil
239
+ # end
240
+ #
241
+ # def add_to_data_no_merge_double(data, keys, values)
242
+ # keys = [keys] unless Array === keys
243
+ # keys.each do |key|
244
+ # next if data.include? key
245
+ # data[key] = values
246
+ # end
247
+ # nil
248
+ # end
249
+ #
250
+ # def add_to_data_merge(data, keys, values)
251
+ # keys.uniq.each do |key|
252
+ # if data.include? key
253
+ # new = data[key]
254
+ # new.each_with_index do |old, i|
255
+ # next if values[i].nil?
256
+ # if old.nil?
257
+ # new[i] = values[i]
258
+ # else
259
+ # old.concat values[i]
260
+ # end
261
+ # end
262
+ # data[key] = new
263
+ # else
264
+ # data[key] = values
265
+ # end
266
+ # end
267
+ # nil
268
+ # end
269
+ #
270
+ # def add_to_data_merge_zipped(data, keys, values)
271
+ # keys = [keys] unless Array === keys
272
+ # num = keys.length
273
+ #
274
+ # values = values.collect do |v|
275
+ # (v.nil? || v.empty?) ? [""] : v
276
+ # end
277
+ #
278
+ # if values.first.length > 1 and num == 1
279
+ # keys = keys * values.first.length
280
+ # num = keys.length
281
+ # end
282
+ #
283
+ # values = values.collect{|v| v.length != num ? [v.first] * num : v}
284
+ #
285
+ # all = values
286
+ # all.unshift keys
287
+ # Misc.zip_fields(all).each do |vs|
288
+ # key = vs.shift
289
+ # if data.include? key
290
+ # data[key] = data[key].zip(vs).collect do |old, new|
291
+ # old + [new]
292
+ # end
293
+ # else
294
+ # data[key] = vs.collect{|v| [v] }
295
+ # end
296
+ # end
297
+ #
298
+ # nil
299
+ # end
300
+ #
301
+ # def add_to_data_zipped(data, keys, values)
302
+ # num = keys.length
303
+ #
304
+ # if values.first.length > 1 and num == 1
305
+ # keys = keys * values.first.length
306
+ # num = keys.length
307
+ # end
308
+ #
309
+ # values = values.collect{|v| v.length != num ? [v.first] * num : v}
310
+ # all = values.unshift keys
311
+ # Misc.zip_fields(all).each do |values|
312
+ # key = values.shift
313
+ # next if data.include? key
314
+ # data[key] = values.collect{|v| [v]}
315
+ # end
316
+ # nil
317
+ # end
318
+ #
319
+ #
320
+ # def cast_values_single(value)
321
+ # case
322
+ # when (value.nil? or value.empty?)
323
+ # nil
324
+ # when Symbol === cast
325
+ # value.send(cast)
326
+ # when Proc === cast
327
+ # cast.call value
328
+ # end
329
+ # end
330
+ #
331
+ # def cast_values_list(values)
332
+ # case
333
+ # when Symbol === cast
334
+ # values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
335
+ # when Proc === cast
336
+ # values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
337
+ # end
338
+ # end
339
+ #
340
+ # def cast_values_flat(values)
341
+ # case
342
+ # when Symbol === cast
343
+ # values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
344
+ # when Proc === cast
345
+ # values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
346
+ # end
347
+ # end
348
+ #
349
+ # def cast_values_double(values)
350
+ # case
351
+ # when Symbol === cast
352
+ # values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
353
+ # when Proc === cast
354
+ # values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
355
+ # end
356
+ # end
357
+ #
358
+ # def rescue_first_line
359
+ # @first_line
360
+ # end
361
+ #
362
+ # def fix_fields(options)
363
+ # key_field = Misc.process_options options, :key_field
364
+ # fields = Misc.process_options options, :fields
365
+ #
366
+ # if (key_field.nil? or key_field == 0 or key_field == :key) and
367
+ # (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
368
+ #
369
+ # @straight = true
370
+ # return
371
+ # else
372
+ # @straight = false
373
+ #
374
+ # case
375
+ # when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
376
+ # @key_position = 0
377
+ # when Integer === key_field
378
+ # @key_position = key_field
379
+ # when String === key_field
380
+ # @key_position = @fields.dup.unshift(@key_field).index key_field
381
+ # raise "Key field #{ key_field } was not found" if @key_position.nil?
382
+ # when :key == key_field
383
+ # @key_position = 0
384
+ # else
385
+ # raise "Format of key_field not understood: #{key_field.inspect}"
386
+ # end
387
+ #
388
+ # if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
389
+ # if not @fields.nil? and type != :flat
390
+ # @field_positions = (0..@fields.length).to_a
391
+ # @field_positions.delete @key_position
392
+ # end
393
+ # else
394
+ # fields = [fields] if not Array === fields
395
+ # @field_positions = fields.collect{|field|
396
+ # case
397
+ # when Integer === field
398
+ # field
399
+ # when String === field
400
+ # pos = @fields.dup.unshift(@key_field).index field
401
+ # raise "Field not identified: #{ field }" if pos.nil?
402
+ # pos
403
+ # else
404
+ # raise "Format of fields not understood: #{field.inspect}"
405
+ # end
406
+ # }
407
+ # end
408
+ #
409
+ # new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
410
+ # @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
411
+ # @fields ||= fields if Array === fields and String === fields.first
412
+ # @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
413
+ # @key_field = new_key_field
414
+ # @key_field ||= key_field if String === key_field
415
+ #
416
+ # end
417
+ # end
418
+ #
419
+ # def initialize(stream = nil, options = {})
420
+ # @header_hash = Misc.process_options(options, :header_hash) || "#"
421
+ # @sep = Misc.process_options(options, :sep) || "\t"
422
+ # @tsv_grep = Misc.process_options(options, :tsv_grep)
423
+ # stream = TSV.get_stream stream
424
+ # @stream = stream
425
+ #
426
+ #
427
+ # @header_options = parse_header(stream)
428
+ #
429
+ # options = @header_options.merge options
430
+ # options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
431
+ #
432
+ # @type ||= Misc.process_options(options, :type) || :double
433
+ # @type ||= :double
434
+ #
435
+ # @identifiers = Misc.process_options(options, :identifiers)
436
+ #
437
+ # @filename = Misc.process_options(options, :filename)
438
+ # @filename ||= stream.filename if stream.respond_to? :filename
439
+ #
440
+ # @sep2 = Misc.process_options(options, :sep2) || "|"
441
+ # @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
442
+ # @type ||= Misc.process_options options, :type
443
+ # @fix = Misc.process_options(options, :fix)
444
+ # @select= Misc.process_options options, :select
445
+ # @zipped = Misc.process_options options, :zipped
446
+ # @namespace = Misc.process_options options, :namespace
447
+ # merge = Misc.process_options(options, :merge)
448
+ # merge = @zipped if merge.nil?
449
+ # merge = false if merge.nil?
450
+ #
451
+ # fields = options[:fields]
452
+ # fix_fields(options)
453
+ #
454
+ # @type = @type.strip.to_sym if String === @type
455
+ # #@type ||= :double if merge == true
456
+ #
457
+ # case @type
458
+ # when :double
459
+ # if @header_options[:type] == :flat
460
+ # self.instance_eval do alias get_values get_values_double_from_flat end
461
+ # else
462
+ # self.instance_eval do alias get_values get_values_double end
463
+ # end
464
+ # self.instance_eval do alias cast_values cast_values_double end
465
+ # case
466
+ # when (merge and not zipped)
467
+ # self.instance_eval do alias add_to_data add_to_data_merge end
468
+ # when (merge and zipped)
469
+ # self.instance_eval do alias add_to_data add_to_data_merge_zipped end
470
+ # when zipped
471
+ # self.instance_eval do alias add_to_data add_to_data_zipped end
472
+ # else
473
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_double end
474
+ # end
475
+ # when :single
476
+ # if @header_options[:type] == :flat
477
+ # self.instance_eval do alias get_values get_values_single_from_flat end
478
+ # self.instance_eval do alias cast_values cast_values_single end
479
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_double end
480
+ # else
481
+ # self.instance_eval do alias get_values get_values_single end
482
+ # self.instance_eval do alias cast_values cast_values_single end
483
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_list end
484
+ # end
485
+ # when :list
486
+ # self.instance_eval do alias get_values get_values_list end
487
+ # self.instance_eval do alias cast_values cast_values_list end
488
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_list end
489
+ #
490
+ # when :flat
491
+ # @take_all = true if field_positions.nil?
492
+ # self.instance_eval do alias cast_values cast_values_flat end
493
+ # merge = true if key_position and key_position != 0 and field_positions.nil?
494
+ # if merge
495
+ # self.instance_eval do alias get_values get_values_flat_merge end
496
+ # if key_position and key_position != 0 and field_positions.nil?
497
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
498
+ # else
499
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
500
+ # end
501
+ # else
502
+ # self.instance_eval do alias get_values get_values_flat_merge end
503
+ # if key_position and key_position != 0 and field_positions.nil?
504
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
505
+ # else
506
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
507
+ # end
508
+ # end
509
+ # else
510
+ # raise "Unknown TSV type: #{@type.inspect}"
511
+ # end
512
+ #
513
+ # @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
514
+ # end
515
+ #
516
+ # def setup(data)
517
+ # data.extend TSV unless TSV === data
518
+ # data.type = @type
519
+ # data.key_field = @key_field
520
+ # data.fields = @fields.nil? ? nil : @fields.dup
521
+ # data.namespace = @namespace
522
+ # data.filename = @filename
523
+ # data.identifiers = @identifiers
524
+ # data.cast = @cast if Symbol === @cast
525
+ # data
526
+ # end
527
+ #
528
+ # def annotate(data)
529
+ # setup(data)
530
+ # end
531
+ #
532
+ # def options
533
+ # options = {}
534
+ # TSV::ENTRIES.each do |entry|
535
+ # if self.respond_to? entry
536
+ # value = self.send(entry)
537
+ # options[entry.to_sym] = value unless value.nil?
538
+ # end
539
+ # end
540
+ # options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
541
+ # IndiferentHash.setup options
542
+ # end
543
+ #
544
+ # def traverse(options = {})
545
+ # monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
546
+ # monitor = bar if bar and monitor.nil?
547
+ # raise "No block given in TSV::Parser#traverse" unless block_given?
548
+ #
549
+ # stream = @stream
550
+ #
551
+ #
552
+ # # first line
553
+ # line = self.rescue_first_line
554
+ # line = stream.gets if line.nil?
555
+ #
556
+ # if @tsv_grep || grep
557
+ #
558
+ # stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
559
+ # stream.no_fail = true
560
+ # begin
561
+ # match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
562
+ # line = stream.gets if match.empty?
563
+ # rescue Exception
564
+ # Log.exception $!
565
+ # line = stream.gets
566
+ # end
567
+ # end
568
+ #
569
+ # progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
570
+ #
571
+ # # setup monitor
572
+ # if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
573
+ # size = case
574
+ # when stream.respond_to?(:size)
575
+ # stream.size
576
+ # else
577
+ # stream.stat.size
578
+ # end
579
+ # size = nil if size.to_i == 0
580
+ # desc = "Parsing Stream"
581
+ # step = 100
582
+ # if Hash === monitor
583
+ # desc = monitor[:desc] if monitor.include? :desc
584
+ # step = monitor[:step] if monitor.include? :step
585
+ # end
586
+ # progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
587
+ # elsif progress_monitor
588
+ #
589
+ # size = case
590
+ # when stream.respond_to?(:size)
591
+ # stream.size
592
+ # else
593
+ # stream.stat.size
594
+ # end
595
+ #
596
+ # progress_monitor.bytes = true
597
+ # progress_monitor.max = size unless size.to_i == 0
598
+ # elsif monitor
599
+ # desc = "Parsing Stream"
600
+ # step = 100
601
+ # size = nil
602
+ # if Hash === monitor
603
+ # desc = monitor[:desc] if monitor.include? :desc
604
+ # step = monitor[:step] if monitor.include? :step
605
+ # end
606
+ # progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
607
+ # end
608
+ #
609
+ # # parser
610
+ # line_num = 1
611
+ # begin
612
+ #
613
+ # while not line.nil?
614
+ # begin
615
+ # if progress_monitor
616
+ # progress_monitor.tick(line.bytesize)
617
+ # end
618
+ #
619
+ # raise SKIP_LINE if line.empty?
620
+ #
621
+ # line = Misc.fixutf8(line)
622
+ # line = self.process line
623
+ # raise SKIP_LINE if line.empty?
624
+ # parts = self.chop_line line
625
+ # key, values = self.get_values parts
626
+ # values = self.cast_values values if self.cast?
627
+ #
628
+ # yield key, values, fields
629
+ #
630
+ # line = stream.gets
631
+ #
632
+ # line_num += 1
633
+ # raise END_PARSING if head and line_num > head.to_i
634
+ # rescue SKIP_LINE
635
+ # begin
636
+ # line = stream.gets
637
+ # next
638
+ # rescue IOError
639
+ # break
640
+ # end
641
+ # rescue END_PARSING
642
+ # stream.close unless stream.closed?
643
+ # begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
644
+ # break
645
+ # rescue Errno::EPIPE
646
+ # Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
647
+ # stream.abort if stream.respond_to? :abort
648
+ # raise $!
649
+ # rescue Exception
650
+ # Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
651
+ # stream.abort $! if stream.respond_to? :abort
652
+ # raise $!
653
+ # end
654
+ # end
655
+ # ensure
656
+ # Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
657
+ # stream.close unless stream.closed?
658
+ # stream.join if stream.respond_to? :join and not stream.joined?
659
+ # end
660
+ #
661
+ # self
662
+ # end
663
+ #
664
+ # def identify_field(field)
665
+ # TSV.identify_field(key_field, fields, field)
666
+ # end
667
+ #
668
+ # def rewind
669
+ # stream.reopen(filename, "r") if stream.closed? and filename
670
+ # stream.rewind
671
+ # end
672
+ #
673
+ # def self.traverse(stream, options = {}, &block)
674
+ # parser = Parser.new(stream, options)
675
+ # parser.traverse(options, &block)
676
+ # end
677
+ # end
678
+ #end