rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,678 +1,678 @@
1
- require 'rbbt/util/cmd'
2
- module TSV
3
- class Parser
4
- attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
5
-
6
- class SKIP_LINE < Exception; end
7
- class END_PARSING < Exception; end
8
-
9
- def all_fields
10
- all = [key_field] + fields
11
- # ToDo: What was this for?
12
- #NamedArray.setup all, all
13
- all
14
- end
15
-
16
- def parse_header(stream)
17
- raise "Closed stream" if IO === stream && stream.closed?
18
-
19
- options = {}
20
- @preamble = []
21
-
22
- # Get line
23
-
24
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
25
- line = stream.gets
26
- return {} if line.nil?
27
- #raise "Empty content: #{ stream.inspect }" if line.nil?
28
- line = Misc.fixutf8 line.chomp
29
-
30
- # Process options line
31
-
32
- if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
33
- options = Misc.string2hash $1.chomp
34
- line = stream.gets
35
- line = Misc.fixutf8 line.chomp if line
36
- end
37
-
38
- # Determine separator
39
-
40
- @sep = options[:sep] if options[:sep]
41
-
42
- # Process fields line
43
-
44
- preamble << line if line
45
- while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
46
- @fields = line.split(@sep, -1)
47
- @key_field = @fields.shift
48
- @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
49
-
50
- #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
51
- line = (@header_hash != "" ? stream.gets : nil)
52
- line = Misc.fixutf8 line.chomp if line
53
- preamble << line if line
54
- @header_hash = false if TrueClass === @header_hash || @header_hash == ""
55
- end
56
-
57
- @preamble = preamble[0..-3] * "\n"
58
-
59
- line ||= stream.gets
60
-
61
- @first_line = line
62
-
63
- options
64
- end
65
-
66
- def process(line)
67
- l = line.chomp
68
- raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
69
- l = @fix.call l if Proc === @fix
70
- raise END_PARSING unless l
71
- l
72
- end
73
-
74
- def cast?
75
- !! @cast
76
- end
77
-
78
- def chop_line(line)
79
- @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
80
- end
81
-
82
- def get_values_single_from_flat(parts)
83
- return parts.shift, parts.first if field_positions.nil? and key_position.nil?
84
- if key_position == 0
85
- [parts.shift, parts.first]
86
- else
87
- key = parts.shift
88
- [parts, key]
89
- end
90
-
91
- end
92
-
93
- def get_values_double_from_flat(parts)
94
- return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
95
- if key_position == 0
96
- [parts.shift, [parts.flatten]]
97
- else
98
- value = parts.shift
99
- keys = parts.flatten
100
- [keys, [[value]]]
101
- end
102
-
103
- end
104
-
105
- def get_values_single(parts)
106
- return parts.shift, parts.first if field_positions.nil? and key_position.nil?
107
- key = parts[key_position]
108
- value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
109
- [key, value]
110
- end
111
-
112
- def get_values_list(parts)
113
- return parts.shift, parts if field_positions.nil? and key_position.nil?
114
- key = parts[key_position]
115
-
116
- values = case
117
- when field_positions.nil?
118
- parts.tap{|o| o.delete_at key_position}
119
- when field_positions.empty?
120
- []
121
- else
122
- parts.values_at *field_positions
123
- end
124
-
125
- [key, values]
126
- end
127
-
128
- def get_values_double(parts)
129
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
130
- keys = parts[key_position].split(@sep2, -1)
131
- values = case
132
- when field_positions.nil?
133
- parts.tap{|o| o.delete_at key_position}
134
- when field_positions.empty?
135
- []
136
- else
137
- parts.values_at *field_positions
138
- end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
139
- [keys, values]
140
- end
141
-
142
- def get_values_flat_inverse(parts)
143
- value = parts.shift
144
- keys = parts
145
- [keys, [value]]
146
- end
147
-
148
-
149
- def get_values_flat_merge(parts)
150
- begin
151
- orig = parts
152
-
153
- if key_position and key_position != 0 and field_positions.nil?
154
- value = parts.shift.split(@sep2, -1)
155
- keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
156
- return [keys, value]
157
- end
158
-
159
- return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
160
- field_positions.nil? and (key_position.nil? or key_position == 0)
161
- rescue
162
- raise $!
163
- end
164
-
165
- if key_position and key_position != 0 and @header_options[:type] == :flat
166
- keys = parts[1..-1]
167
- else
168
- str = parts[key_position]
169
- keys = str.split(@sep2, -1)
170
- end
171
-
172
- if @take_all
173
- values = parts.collect{|e| e.split(@sep2, -1) }.flatten
174
- else
175
- if field_positions.nil?
176
- parts.delete_at key_position
177
- values = parts.first
178
- else
179
- values = parts[field_positions.first]
180
- end
181
-
182
- values = values.split(@sep2, -1)
183
- end
184
-
185
- [keys, values]
186
- end
187
-
188
- def get_values_flat(parts)
189
- keys, values = get_values_flat_merge(parts)
190
- [keys.first, values]
191
- end
192
-
193
-
194
- def add_to_data_no_merge_list(data, key, values)
195
- data[key] = values unless data.include? key
196
- nil
197
- end
198
-
199
- def add_to_data_flat_keys(data, key, values)
200
- data[key] = values unless data.include? key
201
- nil
202
- end
203
-
204
- def add_to_data_flat(data, key, values)
205
- data[key] = values unless data.include? key
206
- nil
207
- end
208
-
209
- def add_to_data_flat_merge(data, key, values)
210
- if data.include? key
211
- data[key] = data[key].concat values
212
- else
213
- data[key] = values
214
- end
215
- nil
216
- end
217
-
218
- def add_to_data_flat_merge_double(data, keys, values)
219
- data.write
220
- keys.each do |key|
221
- if data.include? key
222
- data[key] = data[key].concat values
223
- else
224
- data[key] = values
225
- end
226
- end
227
- nil
228
- end
229
-
230
- def add_to_data_flat_merge_keys(data, keys, values)
231
- keys.each do |key|
232
- if data.include? key
233
- data[key] = data[key].concat values
234
- else
235
- data[key] = values.dup
236
- end
237
- end
238
- nil
239
- end
240
-
241
- def add_to_data_no_merge_double(data, keys, values)
242
- keys = [keys] unless Array === keys
243
- keys.each do |key|
244
- next if data.include? key
245
- data[key] = values
246
- end
247
- nil
248
- end
249
-
250
- def add_to_data_merge(data, keys, values)
251
- keys.uniq.each do |key|
252
- if data.include? key
253
- new = data[key]
254
- new.each_with_index do |old, i|
255
- next if values[i].nil?
256
- if old.nil?
257
- new[i] = values[i]
258
- else
259
- old.concat values[i]
260
- end
261
- end
262
- data[key] = new
263
- else
264
- data[key] = values
265
- end
266
- end
267
- nil
268
- end
269
-
270
- def add_to_data_merge_zipped(data, keys, values)
271
- keys = [keys] unless Array === keys
272
- num = keys.length
273
-
274
- values = values.collect do |v|
275
- (v.nil? || v.empty?) ? [""] : v
276
- end
277
-
278
- if values.first.length > 1 and num == 1
279
- keys = keys * values.first.length
280
- num = keys.length
281
- end
282
-
283
- values = values.collect{|v| v.length != num ? [v.first] * num : v}
284
-
285
- all = values
286
- all.unshift keys
287
- Misc.zip_fields(all).each do |vs|
288
- key = vs.shift
289
- if data.include? key
290
- data[key] = data[key].zip(vs).collect do |old, new|
291
- old + [new]
292
- end
293
- else
294
- data[key] = vs.collect{|v| [v] }
295
- end
296
- end
297
-
298
- nil
299
- end
300
-
301
- def add_to_data_zipped(data, keys, values)
302
- num = keys.length
303
-
304
- if values.first.length > 1 and num == 1
305
- keys = keys * values.first.length
306
- num = keys.length
307
- end
308
-
309
- values = values.collect{|v| v.length != num ? [v.first] * num : v}
310
- all = values.unshift keys
311
- Misc.zip_fields(all).each do |values|
312
- key = values.shift
313
- next if data.include? key
314
- data[key] = values.collect{|v| [v]}
315
- end
316
- nil
317
- end
318
-
319
-
320
- def cast_values_single(value)
321
- case
322
- when (value.nil? or value.empty?)
323
- nil
324
- when Symbol === cast
325
- value.send(cast)
326
- when Proc === cast
327
- cast.call value
328
- end
329
- end
330
-
331
- def cast_values_list(values)
332
- case
333
- when Symbol === cast
334
- values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
335
- when Proc === cast
336
- values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
337
- end
338
- end
339
-
340
- def cast_values_flat(values)
341
- case
342
- when Symbol === cast
343
- values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
344
- when Proc === cast
345
- values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
346
- end
347
- end
348
-
349
- def cast_values_double(values)
350
- case
351
- when Symbol === cast
352
- values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
353
- when Proc === cast
354
- values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
355
- end
356
- end
357
-
358
- def rescue_first_line
359
- @first_line
360
- end
361
-
362
- def fix_fields(options)
363
- key_field = Misc.process_options options, :key_field
364
- fields = Misc.process_options options, :fields
365
-
366
- if (key_field.nil? or key_field == 0 or key_field == :key) and
367
- (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
368
-
369
- @straight = true
370
- return
371
- else
372
- @straight = false
373
-
374
- case
375
- when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
376
- @key_position = 0
377
- when Integer === key_field
378
- @key_position = key_field
379
- when String === key_field
380
- @key_position = @fields.dup.unshift(@key_field).index key_field
381
- raise "Key field #{ key_field } was not found" if @key_position.nil?
382
- when :key == key_field
383
- @key_position = 0
384
- else
385
- raise "Format of key_field not understood: #{key_field.inspect}"
386
- end
387
-
388
- if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
389
- if not @fields.nil? and type != :flat
390
- @field_positions = (0..@fields.length).to_a
391
- @field_positions.delete @key_position
392
- end
393
- else
394
- fields = [fields] if not Array === fields
395
- @field_positions = fields.collect{|field|
396
- case
397
- when Integer === field
398
- field
399
- when String === field
400
- pos = @fields.dup.unshift(@key_field).index field
401
- raise "Field not identified: #{ field }" if pos.nil?
402
- pos
403
- else
404
- raise "Format of fields not understood: #{field.inspect}"
405
- end
406
- }
407
- end
408
-
409
- new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
410
- @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
411
- @fields ||= fields if Array === fields and String === fields.first
412
- @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
413
- @key_field = new_key_field
414
- @key_field ||= key_field if String === key_field
415
-
416
- end
417
- end
418
-
419
- def initialize(stream = nil, options = {})
420
- @header_hash = Misc.process_options(options, :header_hash) || "#"
421
- @sep = Misc.process_options(options, :sep) || "\t"
422
- @tsv_grep = Misc.process_options(options, :tsv_grep)
423
- stream = TSV.get_stream stream
424
- @stream = stream
425
-
426
-
427
- @header_options = parse_header(stream)
428
-
429
- options = @header_options.merge options
430
- options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
431
-
432
- @type ||= Misc.process_options(options, :type) || :double
433
- @type ||= :double
434
-
435
- @identifiers = Misc.process_options(options, :identifiers)
436
-
437
- @filename = Misc.process_options(options, :filename)
438
- @filename ||= stream.filename if stream.respond_to? :filename
439
-
440
- @sep2 = Misc.process_options(options, :sep2) || "|"
441
- @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
442
- @type ||= Misc.process_options options, :type
443
- @fix = Misc.process_options(options, :fix)
444
- @select= Misc.process_options options, :select
445
- @zipped = Misc.process_options options, :zipped
446
- @namespace = Misc.process_options options, :namespace
447
- merge = Misc.process_options(options, :merge)
448
- merge = @zipped if merge.nil?
449
- merge = false if merge.nil?
450
-
451
- fields = options[:fields]
452
- fix_fields(options)
453
-
454
- @type = @type.strip.to_sym if String === @type
455
- #@type ||= :double if merge == true
456
-
457
- case @type
458
- when :double
459
- if @header_options[:type] == :flat
460
- self.instance_eval do alias get_values get_values_double_from_flat end
461
- else
462
- self.instance_eval do alias get_values get_values_double end
463
- end
464
- self.instance_eval do alias cast_values cast_values_double end
465
- case
466
- when (merge and not zipped)
467
- self.instance_eval do alias add_to_data add_to_data_merge end
468
- when (merge and zipped)
469
- self.instance_eval do alias add_to_data add_to_data_merge_zipped end
470
- when zipped
471
- self.instance_eval do alias add_to_data add_to_data_zipped end
472
- else
473
- self.instance_eval do alias add_to_data add_to_data_no_merge_double end
474
- end
475
- when :single
476
- if @header_options[:type] == :flat
477
- self.instance_eval do alias get_values get_values_single_from_flat end
478
- self.instance_eval do alias cast_values cast_values_single end
479
- self.instance_eval do alias add_to_data add_to_data_no_merge_double end
480
- else
481
- self.instance_eval do alias get_values get_values_single end
482
- self.instance_eval do alias cast_values cast_values_single end
483
- self.instance_eval do alias add_to_data add_to_data_no_merge_list end
484
- end
485
- when :list
486
- self.instance_eval do alias get_values get_values_list end
487
- self.instance_eval do alias cast_values cast_values_list end
488
- self.instance_eval do alias add_to_data add_to_data_no_merge_list end
489
-
490
- when :flat
491
- @take_all = true if field_positions.nil?
492
- self.instance_eval do alias cast_values cast_values_flat end
493
- merge = true if key_position and key_position != 0 and field_positions.nil?
494
- if merge
495
- self.instance_eval do alias get_values get_values_flat_merge end
496
- if key_position and key_position != 0 and field_positions.nil?
497
- self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
498
- else
499
- self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
500
- end
501
- else
502
- self.instance_eval do alias get_values get_values_flat_merge end
503
- if key_position and key_position != 0 and field_positions.nil?
504
- self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
505
- else
506
- self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
507
- end
508
- end
509
- else
510
- raise "Unknown TSV type: #{@type.inspect}"
511
- end
512
-
513
- @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
514
- end
515
-
516
- def setup(data)
517
- data.extend TSV unless TSV === data
518
- data.type = @type
519
- data.key_field = @key_field
520
- data.fields = @fields.nil? ? nil : @fields.dup
521
- data.namespace = @namespace
522
- data.filename = @filename
523
- data.identifiers = @identifiers
524
- data.cast = @cast if Symbol === @cast
525
- data
526
- end
527
-
528
- def annotate(data)
529
- setup(data)
530
- end
531
-
532
- def options
533
- options = {}
534
- TSV::ENTRIES.each do |entry|
535
- if self.respond_to? entry
536
- value = self.send(entry)
537
- options[entry.to_sym] = value unless value.nil?
538
- end
539
- end
540
- options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
541
- IndiferentHash.setup options
542
- end
543
-
544
- def traverse(options = {})
545
- monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
546
- monitor = bar if bar and monitor.nil?
547
- raise "No block given in TSV::Parser#traverse" unless block_given?
548
-
549
- stream = @stream
550
-
551
-
552
- # first line
553
- line = self.rescue_first_line
554
- line = stream.gets if line.nil?
555
-
556
- if @tsv_grep || grep
557
-
558
- stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
559
- stream.no_fail = true
560
- begin
561
- match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
562
- line = stream.gets if match.empty?
563
- rescue Exception
564
- Log.exception $!
565
- line = stream.gets
566
- end
567
- end
568
-
569
- progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
570
-
571
- # setup monitor
572
- if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
573
- size = case
574
- when stream.respond_to?(:size)
575
- stream.size
576
- else
577
- stream.stat.size
578
- end
579
- size = nil if size.to_i == 0
580
- desc = "Parsing Stream"
581
- step = 100
582
- if Hash === monitor
583
- desc = monitor[:desc] if monitor.include? :desc
584
- step = monitor[:step] if monitor.include? :step
585
- end
586
- progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
587
- elsif progress_monitor
588
-
589
- size = case
590
- when stream.respond_to?(:size)
591
- stream.size
592
- else
593
- stream.stat.size
594
- end
595
-
596
- progress_monitor.bytes = true
597
- progress_monitor.max = size unless size.to_i == 0
598
- elsif monitor
599
- desc = "Parsing Stream"
600
- step = 100
601
- size = nil
602
- if Hash === monitor
603
- desc = monitor[:desc] if monitor.include? :desc
604
- step = monitor[:step] if monitor.include? :step
605
- end
606
- progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
607
- end
608
-
609
- # parser
610
- line_num = 1
611
- begin
612
-
613
- while not line.nil?
614
- begin
615
- if progress_monitor
616
- progress_monitor.tick(line.bytesize)
617
- end
618
-
619
- raise SKIP_LINE if line.empty?
620
-
621
- line = Misc.fixutf8(line)
622
- line = self.process line
623
- raise SKIP_LINE if line.empty?
624
- parts = self.chop_line line
625
- key, values = self.get_values parts
626
- values = self.cast_values values if self.cast?
627
-
628
- yield key, values, fields
629
-
630
- line = stream.gets
631
-
632
- line_num += 1
633
- raise END_PARSING if head and line_num > head.to_i
634
- rescue SKIP_LINE
635
- begin
636
- line = stream.gets
637
- next
638
- rescue IOError
639
- break
640
- end
641
- rescue END_PARSING
642
- stream.close unless stream.closed?
643
- begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
644
- break
645
- rescue Errno::EPIPE
646
- Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
647
- stream.abort if stream.respond_to? :abort
648
- raise $!
649
- rescue Exception
650
- Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
651
- stream.abort $! if stream.respond_to? :abort
652
- raise $!
653
- end
654
- end
655
- ensure
656
- Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
657
- stream.close unless stream.closed?
658
- stream.join if stream.respond_to? :join and not stream.joined?
659
- end
660
-
661
- self
662
- end
663
-
664
- def identify_field(field)
665
- TSV.identify_field(key_field, fields, field)
666
- end
667
-
668
- def rewind
669
- stream.reopen(filename, "r") if stream.closed? and filename
670
- stream.rewind
671
- end
672
-
673
- def self.traverse(stream, options = {}, &block)
674
- parser = Parser.new(stream, options)
675
- parser.traverse(options, &block)
676
- end
677
- end
678
- end
1
+ #require 'rbbt/util/cmd'
2
+ #module TSV
3
+ # class Parser
4
+ # attr_accessor :stream, :filename, :header_hash, :sep, :sep2, :type, :key_position, :field_positions, :cast, :key_field, :fields, :fix, :select, :serializer, :straight, :take_all, :zipped, :namespace, :first_line, :stream, :preamble, :identifiers, :header_options
5
+ #
6
+ # class SKIP_LINE < Exception; end
7
+ # class END_PARSING < Exception; end
8
+ #
9
+ # def all_fields
10
+ # all = [key_field] + fields
11
+ # # ToDo: What was this for?
12
+ # #NamedArray.setup all, all
13
+ # all
14
+ # end
15
+ #
16
+ # def parse_header(stream)
17
+ # raise "Closed stream" if IO === stream && stream.closed?
18
+ #
19
+ # options = {}
20
+ # @preamble = []
21
+ #
22
+ # # Get line
23
+ #
24
+ # #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
25
+ # line = stream.gets
26
+ # return {} if line.nil?
27
+ # #raise "Empty content: #{ stream.inspect }" if line.nil?
28
+ # line = Misc.fixutf8 line.chomp
29
+ #
30
+ # # Process options line
31
+ #
32
+ # if line and (String === @header_hash and line =~ /^#{@header_hash}: (.*)/)
33
+ # options = Misc.string2hash $1.chomp
34
+ # line = stream.gets
35
+ # line = Misc.fixutf8 line.chomp if line
36
+ # end
37
+ #
38
+ # # Determine separator
39
+ #
40
+ # @sep = options[:sep] if options[:sep]
41
+ #
42
+ # # Process fields line
43
+ #
44
+ # preamble << line if line
45
+ # while line && (TrueClass === @header_hash || (String === @header_hash && Misc.fixutf8(line) =~ /^#{@header_hash}/ ))
46
+ # @fields = line.split(@sep, -1)
47
+ # @key_field = @fields.shift
48
+ # @key_field = @key_field[(0 + header_hash.length)..-1] if String === @header_hash
49
+ #
50
+ # #Thread.pass while IO.select([stream], nil, nil, 1).nil? if IO === stream
51
+ # line = (@header_hash != "" ? stream.gets : nil)
52
+ # line = Misc.fixutf8 line.chomp if line
53
+ # preamble << line if line
54
+ # @header_hash = false if TrueClass === @header_hash || @header_hash == ""
55
+ # end
56
+ #
57
+ # @preamble = preamble[0..-3] * "\n"
58
+ #
59
+ # line ||= stream.gets
60
+ #
61
+ # @first_line = line
62
+ #
63
+ # options
64
+ # end
65
+ #
66
+ # def process(line)
67
+ # l = line.chomp
68
+ # raise SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l)
69
+ # l = @fix.call l if Proc === @fix
70
+ # raise END_PARSING unless l
71
+ # l
72
+ # end
73
+ #
74
+ # def cast?
75
+ # !! @cast
76
+ # end
77
+ #
78
+ # def chop_line(line)
79
+ # @sep == " " ? line.split(/ /, -1) : line.split(@sep, -1)
80
+ # end
81
+ #
82
+ # def get_values_single_from_flat(parts)
83
+ # return parts.shift, parts.first if field_positions.nil? and key_position.nil?
84
+ # if key_position == 0
85
+ # [parts.shift, parts.first]
86
+ # else
87
+ # key = parts.shift
88
+ # [parts, key]
89
+ # end
90
+ #
91
+ # end
92
+ #
93
+ # def get_values_double_from_flat(parts)
94
+ # return [parts.shift], [parts.flatten] if field_positions.nil? and key_position.nil?
95
+ # if key_position == 0
96
+ # [parts.shift, [parts.flatten]]
97
+ # else
98
+ # value = parts.shift
99
+ # keys = parts.flatten
100
+ # [keys, [[value]]]
101
+ # end
102
+ #
103
+ # end
104
+ #
105
+ # def get_values_single(parts)
106
+ # return parts.shift, parts.first if field_positions.nil? and key_position.nil?
107
+ # key = parts[key_position]
108
+ # value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first]
109
+ # [key, value]
110
+ # end
111
+ #
112
+ # def get_values_list(parts)
113
+ # return parts.shift, parts if field_positions.nil? and key_position.nil?
114
+ # key = parts[key_position]
115
+ #
116
+ # values = case
117
+ # when field_positions.nil?
118
+ # parts.tap{|o| o.delete_at key_position}
119
+ # when field_positions.empty?
120
+ # []
121
+ # else
122
+ # parts.values_at *field_positions
123
+ # end
124
+ #
125
+ # [key, values]
126
+ # end
127
+ #
128
+ # def get_values_double(parts)
129
+ # return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil?
130
+ # keys = parts[key_position].split(@sep2, -1)
131
+ # values = case
132
+ # when field_positions.nil?
133
+ # parts.tap{|o| o.delete_at key_position}
134
+ # when field_positions.empty?
135
+ # []
136
+ # else
137
+ # parts.values_at *field_positions
138
+ # end.collect{|value| (value.nil? || value.empty?) ? [""] : value.split(@sep2, -1) }
139
+ # [keys, values]
140
+ # end
141
+ #
142
+ # def get_values_flat_inverse(parts)
143
+ # value = parts.shift
144
+ # keys = parts
145
+ # [keys, [value]]
146
+ # end
147
+ #
148
+ #
149
+ # def get_values_flat_merge(parts)
150
+ # begin
151
+ # orig = parts
152
+ #
153
+ # if key_position and key_position != 0 and field_positions.nil?
154
+ # value = parts.shift.split(@sep2, -1)
155
+ # keys = parts.collect{|p| p.split(@sep2, -1) }.flatten
156
+ # return [keys, value]
157
+ # end
158
+ #
159
+ # return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)}.flatten if
160
+ # field_positions.nil? and (key_position.nil? or key_position == 0)
161
+ # rescue
162
+ # raise $!
163
+ # end
164
+ #
165
+ # if key_position and key_position != 0 and @header_options[:type] == :flat
166
+ # keys = parts[1..-1]
167
+ # else
168
+ # str = parts[key_position]
169
+ # keys = str.split(@sep2, -1)
170
+ # end
171
+ #
172
+ # if @take_all
173
+ # values = parts.collect{|e| e.split(@sep2, -1) }.flatten
174
+ # else
175
+ # if field_positions.nil?
176
+ # parts.delete_at key_position
177
+ # values = parts.first
178
+ # else
179
+ # values = parts[field_positions.first]
180
+ # end
181
+ #
182
+ # values = values.split(@sep2, -1)
183
+ # end
184
+ #
185
+ # [keys, values]
186
+ # end
187
+ #
188
+ # def get_values_flat(parts)
189
+ # keys, values = get_values_flat_merge(parts)
190
+ # [keys.first, values]
191
+ # end
192
+ #
193
+ #
194
+ # def add_to_data_no_merge_list(data, key, values)
195
+ # data[key] = values unless data.include? key
196
+ # nil
197
+ # end
198
+ #
199
+ # def add_to_data_flat_keys(data, key, values)
200
+ # data[key] = values unless data.include? key
201
+ # nil
202
+ # end
203
+ #
204
+ # def add_to_data_flat(data, key, values)
205
+ # data[key] = values unless data.include? key
206
+ # nil
207
+ # end
208
+ #
209
+ # def add_to_data_flat_merge(data, key, values)
210
+ # if data.include? key
211
+ # data[key] = data[key].concat values
212
+ # else
213
+ # data[key] = values
214
+ # end
215
+ # nil
216
+ # end
217
+ #
218
+ # def add_to_data_flat_merge_double(data, keys, values)
219
+ # data.write
220
+ # keys.each do |key|
221
+ # if data.include? key
222
+ # data[key] = data[key].concat values
223
+ # else
224
+ # data[key] = values
225
+ # end
226
+ # end
227
+ # nil
228
+ # end
229
+ #
230
+ # def add_to_data_flat_merge_keys(data, keys, values)
231
+ # keys.each do |key|
232
+ # if data.include? key
233
+ # data[key] = data[key].concat values
234
+ # else
235
+ # data[key] = values.dup
236
+ # end
237
+ # end
238
+ # nil
239
+ # end
240
+ #
241
+ # def add_to_data_no_merge_double(data, keys, values)
242
+ # keys = [keys] unless Array === keys
243
+ # keys.each do |key|
244
+ # next if data.include? key
245
+ # data[key] = values
246
+ # end
247
+ # nil
248
+ # end
249
+ #
250
+ # def add_to_data_merge(data, keys, values)
251
+ # keys.uniq.each do |key|
252
+ # if data.include? key
253
+ # new = data[key]
254
+ # new.each_with_index do |old, i|
255
+ # next if values[i].nil?
256
+ # if old.nil?
257
+ # new[i] = values[i]
258
+ # else
259
+ # old.concat values[i]
260
+ # end
261
+ # end
262
+ # data[key] = new
263
+ # else
264
+ # data[key] = values
265
+ # end
266
+ # end
267
+ # nil
268
+ # end
269
+ #
270
+ # def add_to_data_merge_zipped(data, keys, values)
271
+ # keys = [keys] unless Array === keys
272
+ # num = keys.length
273
+ #
274
+ # values = values.collect do |v|
275
+ # (v.nil? || v.empty?) ? [""] : v
276
+ # end
277
+ #
278
+ # if values.first.length > 1 and num == 1
279
+ # keys = keys * values.first.length
280
+ # num = keys.length
281
+ # end
282
+ #
283
+ # values = values.collect{|v| v.length != num ? [v.first] * num : v}
284
+ #
285
+ # all = values
286
+ # all.unshift keys
287
+ # Misc.zip_fields(all).each do |vs|
288
+ # key = vs.shift
289
+ # if data.include? key
290
+ # data[key] = data[key].zip(vs).collect do |old, new|
291
+ # old + [new]
292
+ # end
293
+ # else
294
+ # data[key] = vs.collect{|v| [v] }
295
+ # end
296
+ # end
297
+ #
298
+ # nil
299
+ # end
300
+ #
301
+ # def add_to_data_zipped(data, keys, values)
302
+ # num = keys.length
303
+ #
304
+ # if values.first.length > 1 and num == 1
305
+ # keys = keys * values.first.length
306
+ # num = keys.length
307
+ # end
308
+ #
309
+ # values = values.collect{|v| v.length != num ? [v.first] * num : v}
310
+ # all = values.unshift keys
311
+ # Misc.zip_fields(all).each do |values|
312
+ # key = values.shift
313
+ # next if data.include? key
314
+ # data[key] = values.collect{|v| [v]}
315
+ # end
316
+ # nil
317
+ # end
318
+ #
319
+ #
320
+ # def cast_values_single(value)
321
+ # case
322
+ # when (value.nil? or value.empty?)
323
+ # nil
324
+ # when Symbol === cast
325
+ # value.send(cast)
326
+ # when Proc === cast
327
+ # cast.call value
328
+ # end
329
+ # end
330
+ #
331
+ # def cast_values_list(values)
332
+ # case
333
+ # when Symbol === cast
334
+ # values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
335
+ # when Proc === cast
336
+ # values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v)}
337
+ # end
338
+ # end
339
+ #
340
+ # def cast_values_flat(values)
341
+ # case
342
+ # when Symbol === cast
343
+ # values.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}
344
+ # when Proc === cast
345
+ # values.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }
346
+ # end
347
+ # end
348
+ #
349
+ # def cast_values_double(values)
350
+ # case
351
+ # when Symbol === cast
352
+ # values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : v.send(cast)}}
353
+ # when Proc === cast
354
+ # values.collect{|list| list.collect{|v| v.nil? or v.empty? ? nil : cast.call(v) }}
355
+ # end
356
+ # end
357
+ #
358
+ # def rescue_first_line
359
+ # @first_line
360
+ # end
361
+ #
362
+ # def fix_fields(options)
363
+ # key_field = Misc.process_options options, :key_field
364
+ # fields = Misc.process_options options, :fields
365
+ #
366
+ # if (key_field.nil? or key_field == 0 or key_field == :key) and
367
+ # (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
368
+ #
369
+ # @straight = true
370
+ # return
371
+ # else
372
+ # @straight = false
373
+ #
374
+ # case
375
+ # when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0)
376
+ # @key_position = 0
377
+ # when Integer === key_field
378
+ # @key_position = key_field
379
+ # when String === key_field
380
+ # @key_position = @fields.dup.unshift(@key_field).index key_field
381
+ # raise "Key field #{ key_field } was not found" if @key_position.nil?
382
+ # when :key == key_field
383
+ # @key_position = 0
384
+ # else
385
+ # raise "Format of key_field not understood: #{key_field.inspect}"
386
+ # end
387
+ #
388
+ # if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a))
389
+ # if not @fields.nil? and type != :flat
390
+ # @field_positions = (0..@fields.length).to_a
391
+ # @field_positions.delete @key_position
392
+ # end
393
+ # else
394
+ # fields = [fields] if not Array === fields
395
+ # @field_positions = fields.collect{|field|
396
+ # case
397
+ # when Integer === field
398
+ # field
399
+ # when String === field
400
+ # pos = @fields.dup.unshift(@key_field).index field
401
+ # raise "Field not identified: #{ field }" if pos.nil?
402
+ # pos
403
+ # else
404
+ # raise "Format of fields not understood: #{field.inspect}"
405
+ # end
406
+ # }
407
+ # end
408
+ #
409
+ # new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil?
410
+ # @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil?
411
+ # @fields ||= fields if Array === fields and String === fields.first
412
+ # @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil?
413
+ # @key_field = new_key_field
414
+ # @key_field ||= key_field if String === key_field
415
+ #
416
+ # end
417
+ # end
418
+ #
419
+ # def initialize(stream = nil, options = {})
420
+ # @header_hash = Misc.process_options(options, :header_hash) || "#"
421
+ # @sep = Misc.process_options(options, :sep) || "\t"
422
+ # @tsv_grep = Misc.process_options(options, :tsv_grep)
423
+ # stream = TSV.get_stream stream
424
+ # @stream = stream
425
+ #
426
+ #
427
+ # @header_options = parse_header(stream)
428
+ #
429
+ # options = @header_options.merge options
430
+ # options = Misc.add_defaults options, :fields => [1] if options[:type] == :single and options[:fields].nil?
431
+ #
432
+ # @type ||= Misc.process_options(options, :type) || :double
433
+ # @type ||= :double
434
+ #
435
+ # @identifiers = Misc.process_options(options, :identifiers)
436
+ #
437
+ # @filename = Misc.process_options(options, :filename)
438
+ # @filename ||= stream.filename if stream.respond_to? :filename
439
+ #
440
+ # @sep2 = Misc.process_options(options, :sep2) || "|"
441
+ # @cast = Misc.process_options options, :cast; @cast = @cast.to_sym if String === @cast
442
+ # @type ||= Misc.process_options options, :type
443
+ # @fix = Misc.process_options(options, :fix)
444
+ # @select= Misc.process_options options, :select
445
+ # @zipped = Misc.process_options options, :zipped
446
+ # @namespace = Misc.process_options options, :namespace
447
+ # merge = Misc.process_options(options, :merge)
448
+ # merge = @zipped if merge.nil?
449
+ # merge = false if merge.nil?
450
+ #
451
+ # fields = options[:fields]
452
+ # fix_fields(options)
453
+ #
454
+ # @type = @type.strip.to_sym if String === @type
455
+ # #@type ||= :double if merge == true
456
+ #
457
+ # case @type
458
+ # when :double
459
+ # if @header_options[:type] == :flat
460
+ # self.instance_eval do alias get_values get_values_double_from_flat end
461
+ # else
462
+ # self.instance_eval do alias get_values get_values_double end
463
+ # end
464
+ # self.instance_eval do alias cast_values cast_values_double end
465
+ # case
466
+ # when (merge and not zipped)
467
+ # self.instance_eval do alias add_to_data add_to_data_merge end
468
+ # when (merge and zipped)
469
+ # self.instance_eval do alias add_to_data add_to_data_merge_zipped end
470
+ # when zipped
471
+ # self.instance_eval do alias add_to_data add_to_data_zipped end
472
+ # else
473
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_double end
474
+ # end
475
+ # when :single
476
+ # if @header_options[:type] == :flat
477
+ # self.instance_eval do alias get_values get_values_single_from_flat end
478
+ # self.instance_eval do alias cast_values cast_values_single end
479
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_double end
480
+ # else
481
+ # self.instance_eval do alias get_values get_values_single end
482
+ # self.instance_eval do alias cast_values cast_values_single end
483
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_list end
484
+ # end
485
+ # when :list
486
+ # self.instance_eval do alias get_values get_values_list end
487
+ # self.instance_eval do alias cast_values cast_values_list end
488
+ # self.instance_eval do alias add_to_data add_to_data_no_merge_list end
489
+ #
490
+ # when :flat
491
+ # @take_all = true if field_positions.nil?
492
+ # self.instance_eval do alias cast_values cast_values_flat end
493
+ # merge = true if key_position and key_position != 0 and field_positions.nil?
494
+ # if merge
495
+ # self.instance_eval do alias get_values get_values_flat_merge end
496
+ # if key_position and key_position != 0 and field_positions.nil?
497
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
498
+ # else
499
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
500
+ # end
501
+ # else
502
+ # self.instance_eval do alias get_values get_values_flat_merge end
503
+ # if key_position and key_position != 0 and field_positions.nil?
504
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_keys end
505
+ # else
506
+ # self.instance_eval do alias add_to_data add_to_data_flat_merge_double end
507
+ # end
508
+ # end
509
+ # else
510
+ # raise "Unknown TSV type: #{@type.inspect}"
511
+ # end
512
+ #
513
+ # @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields)
514
+ # end
515
+ #
516
+ # def setup(data)
517
+ # data.extend TSV unless TSV === data
518
+ # data.type = @type
519
+ # data.key_field = @key_field
520
+ # data.fields = @fields.nil? ? nil : @fields.dup
521
+ # data.namespace = @namespace
522
+ # data.filename = @filename
523
+ # data.identifiers = @identifiers
524
+ # data.cast = @cast if Symbol === @cast
525
+ # data
526
+ # end
527
+ #
528
+ # def annotate(data)
529
+ # setup(data)
530
+ # end
531
+ #
532
+ # def options
533
+ # options = {}
534
+ # TSV::ENTRIES.each do |entry|
535
+ # if self.respond_to? entry
536
+ # value = self.send(entry)
537
+ # options[entry.to_sym] = value unless value.nil?
538
+ # end
539
+ # end
540
+ # options[:sep] = @sep if @sep and @sep != "\t" and @sep != /\t/
541
+ # IndiferentHash.setup options
542
+ # end
543
+ #
544
+ # def traverse(options = {})
545
+ # monitor, bar, grep, invert_grep, head, fixed_grep = Misc.process_options options, :monitor, :bar, :grep, :invert_grep, :head, :fixed_grep
546
+ # monitor = bar if bar and monitor.nil?
547
+ # raise "No block given in TSV::Parser#traverse" unless block_given?
548
+ #
549
+ # stream = @stream
550
+ #
551
+ #
552
+ # # first line
553
+ # line = self.rescue_first_line
554
+ # line = stream.gets if line.nil?
555
+ #
556
+ # if @tsv_grep || grep
557
+ #
558
+ # stream = Open.grep(stream, @tsv_grep || grep, invert_grep, fixed_grep)
559
+ # stream.no_fail = true
560
+ # begin
561
+ # match = Open.grep(StringIO.new(line), @tsv_grep || grep, invert_grep, fixed_grep).read
562
+ # line = stream.gets if match.empty?
563
+ # rescue Exception
564
+ # Log.exception $!
565
+ # line = stream.gets
566
+ # end
567
+ # end
568
+ #
569
+ # progress_monitor, monitor = monitor, nil if Log::ProgressBar === monitor
570
+ #
571
+ # # setup monitor
572
+ # if monitor && (stream.respond_to?(:size) || (stream.respond_to?(:stat) && stream.stat.file? && stream.stat.respond_to?(:size) && stream.respond_to?(:pos)))
573
+ # size = case
574
+ # when stream.respond_to?(:size)
575
+ # stream.size
576
+ # else
577
+ # stream.stat.size
578
+ # end
579
+ # size = nil if size.to_i == 0
580
+ # desc = "Parsing Stream"
581
+ # step = 100
582
+ # if Hash === monitor
583
+ # desc = monitor[:desc] if monitor.include? :desc
584
+ # step = monitor[:step] if monitor.include? :step
585
+ # end
586
+ # progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
587
+ # elsif progress_monitor
588
+ #
589
+ # size = case
590
+ # when stream.respond_to?(:size)
591
+ # stream.size
592
+ # else
593
+ # stream.stat.size
594
+ # end
595
+ #
596
+ # progress_monitor.bytes = true
597
+ # progress_monitor.max = size unless size.to_i == 0
598
+ # elsif monitor
599
+ # desc = "Parsing Stream"
600
+ # step = 100
601
+ # size = nil
602
+ # if Hash === monitor
603
+ # desc = monitor[:desc] if monitor.include? :desc
604
+ # step = monitor[:step] if monitor.include? :step
605
+ # end
606
+ # progress_monitor = Log::ProgressBar.new_bar(size, :desc => desc, :bytes => true)
607
+ # end
608
+ #
609
+ # # parser
610
+ # line_num = 1
611
+ # begin
612
+ #
613
+ # while not line.nil?
614
+ # begin
615
+ # if progress_monitor
616
+ # progress_monitor.tick(line.bytesize)
617
+ # end
618
+ #
619
+ # raise SKIP_LINE if line.empty?
620
+ #
621
+ # line = Misc.fixutf8(line)
622
+ # line = self.process line
623
+ # raise SKIP_LINE if line.empty?
624
+ # parts = self.chop_line line
625
+ # key, values = self.get_values parts
626
+ # values = self.cast_values values if self.cast?
627
+ #
628
+ # yield key, values, fields
629
+ #
630
+ # line = stream.gets
631
+ #
632
+ # line_num += 1
633
+ # raise END_PARSING if head and line_num > head.to_i
634
+ # rescue SKIP_LINE
635
+ # begin
636
+ # line = stream.gets
637
+ # next
638
+ # rescue IOError
639
+ # break
640
+ # end
641
+ # rescue END_PARSING
642
+ # stream.close unless stream.closed?
643
+ # begin stream.join; rescue Exception; end if stream.respond_to? :join and not stream.joined?
644
+ # break
645
+ # rescue Errno::EPIPE
646
+ # Log.error "Pipe closed while parsing #{Misc.fingerprint stream}: #{$!.message}"
647
+ # stream.abort if stream.respond_to? :abort
648
+ # raise $!
649
+ # rescue Exception
650
+ # Log.error "Exception parsing #{Misc.fingerprint stream}: #{$!.message}"
651
+ # stream.abort $! if stream.respond_to? :abort
652
+ # raise $!
653
+ # end
654
+ # end
655
+ # ensure
656
+ # Log::ProgressBar.remove_bar(progress_monitor) if progress_monitor
657
+ # stream.close unless stream.closed?
658
+ # stream.join if stream.respond_to? :join and not stream.joined?
659
+ # end
660
+ #
661
+ # self
662
+ # end
663
+ #
664
+ # def identify_field(field)
665
+ # TSV.identify_field(key_field, fields, field)
666
+ # end
667
+ #
668
+ # def rewind
669
+ # stream.reopen(filename, "r") if stream.closed? and filename
670
+ # stream.rewind
671
+ # end
672
+ #
673
+ # def self.traverse(stream, options = {}, &block)
674
+ # parser = Parser.new(stream, options)
675
+ # parser.traverse(options, &block)
676
+ # end
677
+ # end
678
+ #end