rbbt-util 5.44.1 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +2 -2
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  33. data/lib/rbbt/persist/tsv.rb +187 -185
  34. data/lib/rbbt/persist.rb +556 -551
  35. data/lib/rbbt/refactor.rb +20 -0
  36. data/lib/rbbt/resource/path/refactor.rb +178 -0
  37. data/lib/rbbt/resource/path.rb +317 -497
  38. data/lib/rbbt/resource/util.rb +0 -48
  39. data/lib/rbbt/resource.rb +3 -390
  40. data/lib/rbbt/tsv/accessor.rb +2 -838
  41. data/lib/rbbt/tsv/attach.rb +303 -299
  42. data/lib/rbbt/tsv/change_id.rb +244 -245
  43. data/lib/rbbt/tsv/csv.rb +87 -85
  44. data/lib/rbbt/tsv/dumper.rb +2 -100
  45. data/lib/rbbt/tsv/excel.rb +26 -24
  46. data/lib/rbbt/tsv/field_index.rb +4 -1
  47. data/lib/rbbt/tsv/filter.rb +3 -2
  48. data/lib/rbbt/tsv/index.rb +2 -284
  49. data/lib/rbbt/tsv/manipulate.rb +750 -747
  50. data/lib/rbbt/tsv/marshal.rb +3 -3
  51. data/lib/rbbt/tsv/matrix.rb +2 -2
  52. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  53. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  54. data/lib/rbbt/tsv/parser.rb +678 -678
  55. data/lib/rbbt/tsv/refactor.rb +195 -0
  56. data/lib/rbbt/tsv/stream.rb +253 -251
  57. data/lib/rbbt/tsv/util.rb +420 -420
  58. data/lib/rbbt/tsv.rb +210 -208
  59. data/lib/rbbt/util/R/eval.rb +4 -4
  60. data/lib/rbbt/util/R/plot.rb +62 -166
  61. data/lib/rbbt/util/R.rb +21 -18
  62. data/lib/rbbt/util/cmd.rb +2 -318
  63. data/lib/rbbt/util/color.rb +269 -269
  64. data/lib/rbbt/util/colorize.rb +89 -89
  65. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  66. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  67. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  68. data/lib/rbbt/util/config.rb +169 -167
  69. data/lib/rbbt/util/iruby.rb +20 -0
  70. data/lib/rbbt/util/log/progress/report.rb +241 -241
  71. data/lib/rbbt/util/log/progress/util.rb +99 -99
  72. data/lib/rbbt/util/log/progress.rb +102 -102
  73. data/lib/rbbt/util/log/refactor.rb +49 -0
  74. data/lib/rbbt/util/log.rb +486 -532
  75. data/lib/rbbt/util/migrate.rb +1 -1
  76. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  77. data/lib/rbbt/util/misc/development.rb +12 -11
  78. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  79. data/lib/rbbt/util/misc/format.rb +2 -230
  80. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  81. data/lib/rbbt/util/misc/inspect.rb +2 -476
  82. data/lib/rbbt/util/misc/lock.rb +109 -106
  83. data/lib/rbbt/util/misc/omics.rb +9 -1
  84. data/lib/rbbt/util/misc/pipes.rb +765 -793
  85. data/lib/rbbt/util/misc/refactor.rb +20 -0
  86. data/lib/rbbt/util/misc/ssw.rb +27 -17
  87. data/lib/rbbt/util/misc/system.rb +0 -15
  88. data/lib/rbbt/util/misc.rb +39 -20
  89. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  90. data/lib/rbbt/util/named_array.rb +3 -220
  91. data/lib/rbbt/util/open/refactor.rb +7 -0
  92. data/lib/rbbt/util/open.rb +3 -857
  93. data/lib/rbbt/util/procpath.rb +6 -6
  94. data/lib/rbbt/util/python/paths.rb +27 -0
  95. data/lib/rbbt/util/python/run.rb +115 -0
  96. data/lib/rbbt/util/python/script.rb +110 -0
  97. data/lib/rbbt/util/python/util.rb +3 -3
  98. data/lib/rbbt/util/python.rb +22 -81
  99. data/lib/rbbt/util/semaphore.rb +152 -148
  100. data/lib/rbbt/util/simpleopt.rb +9 -8
  101. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  102. data/lib/rbbt/util/ssh.rb +122 -118
  103. data/lib/rbbt/util/tar.rb +117 -115
  104. data/lib/rbbt/util/tmpfile.rb +69 -67
  105. data/lib/rbbt/util/version.rb +2 -0
  106. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  107. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  108. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  109. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  110. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  111. data/lib/rbbt/workflow/refactor.rb +153 -0
  112. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  113. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  115. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  116. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  117. data/lib/rbbt/workflow/step/run.rb +766 -766
  118. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  119. data/lib/rbbt/workflow/step.rb +2 -362
  120. data/lib/rbbt/workflow/task.rb +118 -118
  121. data/lib/rbbt/workflow/usage.rb +289 -287
  122. data/lib/rbbt/workflow/util/archive.rb +6 -5
  123. data/lib/rbbt/workflow/util/data.rb +1 -1
  124. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  125. data/lib/rbbt/workflow/util/trace.rb +79 -44
  126. data/lib/rbbt/workflow.rb +4 -882
  127. data/lib/rbbt-util.rb +21 -13
  128. data/lib/rbbt.rb +16 -3
  129. data/python/rbbt/__init__.py +19 -1
  130. data/share/Rlib/plot.R +37 -37
  131. data/share/Rlib/svg.R +22 -5
  132. data/share/install/software/lib/install_helpers +1 -1
  133. data/share/rbbt_commands/hpc/list +2 -3
  134. data/share/rbbt_commands/hpc/orchestrate +4 -4
  135. data/share/rbbt_commands/hpc/tail +2 -0
  136. data/share/rbbt_commands/hpc/task +10 -7
  137. data/share/rbbt_commands/lsf/list +2 -3
  138. data/share/rbbt_commands/lsf/orchestrate +4 -4
  139. data/share/rbbt_commands/lsf/tail +2 -0
  140. data/share/rbbt_commands/lsf/task +10 -7
  141. data/share/rbbt_commands/migrate +1 -1
  142. data/share/rbbt_commands/pbs/list +2 -3
  143. data/share/rbbt_commands/pbs/orchestrate +4 -4
  144. data/share/rbbt_commands/pbs/tail +2 -0
  145. data/share/rbbt_commands/pbs/task +10 -7
  146. data/share/rbbt_commands/resource/produce +8 -1
  147. data/share/rbbt_commands/slurm/list +2 -3
  148. data/share/rbbt_commands/slurm/orchestrate +4 -4
  149. data/share/rbbt_commands/slurm/tail +2 -0
  150. data/share/rbbt_commands/slurm/task +10 -7
  151. data/share/rbbt_commands/system/clean +5 -5
  152. data/share/rbbt_commands/system/status +5 -5
  153. data/share/rbbt_commands/tsv/get +2 -3
  154. data/share/rbbt_commands/tsv/info +10 -13
  155. data/share/rbbt_commands/tsv/keys +18 -14
  156. data/share/rbbt_commands/tsv/slice +2 -2
  157. data/share/rbbt_commands/tsv/transpose +6 -2
  158. data/share/rbbt_commands/workflow/info +20 -24
  159. data/share/rbbt_commands/workflow/list +1 -1
  160. data/share/rbbt_commands/workflow/prov +20 -13
  161. data/share/rbbt_commands/workflow/server +11 -1
  162. data/share/rbbt_commands/workflow/task +76 -71
  163. data/share/rbbt_commands/workflow/write_info +26 -9
  164. data/share/software/opt/ssw/ssw.c +861 -0
  165. data/share/software/opt/ssw/ssw.h +130 -0
  166. data/share/workflow_config.ru +3 -3
  167. metadata +40 -2
@@ -1,838 +1,2 @@
1
- require 'yaml'
2
- require 'rbbt/annotations'
3
- require 'rbbt/tsv/dumper'
4
- require 'set'
5
-
6
- module TSV
7
-
8
- TSV_SERIALIZER = YAML
9
- SERIALIZED_NIL = TSV_SERIALIZER.dump nil
10
-
11
- attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
12
-
13
- def info
14
- {:key_field => key_field, :fields => fields.dup, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
15
- end
16
-
17
- def annotate(tsv)
18
- TSV.setup(tsv, info)
19
- tsv.entity_options = self.entity_options
20
- tsv.entity_templates = self.entity_templates
21
- tsv
22
- end
23
-
24
- def entity_options
25
- @entity_options ||= nil
26
- if @entity_options.nil?
27
- @entity_options = namespace ? {:namespace => namespace, :organism => namespace} : {}
28
- @entity_templates = nil
29
- end
30
- @entity_options
31
- end
32
-
33
- def entity_options=(options)
34
- @entity_options = options || {}
35
- if namespace
36
- @entity_options[:organism] ||= namespace
37
- @entity_options[:namespace] ||= namespace
38
- end
39
- @entity_templates = nil
40
- end
41
-
42
-
43
- def entity_templates
44
- @entity_templates ||= {}
45
- end
46
-
47
- def prepare_entity(entity, field, options = {})
48
- return entity if entity.nil?
49
- return entity unless defined? Entity
50
- entity = entity if options.delete :dup_array
51
- if (template = entity_templates[field]) and template.respond_to?(:annotate)
52
- if String === entity or Array === entity
53
- entity = entity.dup if entity.frozen?
54
- template.annotate entity
55
- entity.extend AnnotatedArray if Array === entity
56
- end
57
- entity
58
- else
59
- if entity_templates.include? field
60
- entity
61
- else
62
- template = Misc.prepare_entity("TEMPLATE", field, options)
63
- if template.respond_to?(:annotate)
64
- entity_templates[field] = template
65
- if String === entity or Array === entity
66
- entity = entity.dup if entity.frozen?
67
- template.annotate entity
68
- entity.extend AnnotatedArray if Array === entity
69
- end
70
- entity
71
- else
72
- entity_templates[field] = nil
73
- entity
74
- end
75
- end
76
- end
77
- end
78
-
79
- def setup_array(*args)
80
- res = NamedArray.setup(*args)
81
- return res if res.nil?
82
- res.instance_variable_set(:@entity_templates, entity_templates)
83
- res
84
- end
85
-
86
- def with_unnamed
87
- saved_unnamed = @unnamed
88
- @unnamed = true
89
- res = yield
90
- @unnamed = saved_unnamed
91
- res
92
- end
93
-
94
- def with_monitor(value = true)
95
- saved_monitor = @monitor
96
- @monitor = value.nil? ? false : value
97
- res = yield
98
- @monitor = saved_monitor
99
- res
100
- end
101
-
102
- def close
103
- begin
104
- super
105
- rescue Exception
106
- self
107
- end
108
- end
109
-
110
- def read(force = false)
111
- begin
112
- super
113
- rescue Exception
114
- Log.exception $!
115
- @writable = false
116
- self
117
- end
118
- end
119
-
120
- def write(force = false)
121
- begin
122
- super
123
- rescue Exception
124
- @writable = true
125
- self
126
- end
127
- end
128
-
129
- def write?
130
- @writable ||= false
131
- end
132
-
133
- def self._extended(data)
134
- if not data.respond_to? :write
135
- class << data
136
- attr_accessor :writable
137
-
138
- end
139
- end
140
- end
141
-
142
- #{{{ TSV ENTRIES and ENTRY_KEYS
143
-
144
- KEY_PREFIX = "__tsv_hash_"
145
- ENTRIES = []
146
- ENTRY_KEYS = Set.new
147
- NIL_VALUE = "NIL_VALUE"
148
-
149
- def load_entry_value(value)
150
- return value unless respond_to? :persistence_path
151
- (value.nil? or value == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(value)
152
- end
153
-
154
- def dump_entry_value(value)
155
- return value unless respond_to? :persistence_path
156
- (value.nil? or value == SERIALIZED_NIL) ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)
157
- end
158
-
159
- def self.entry(*entries)
160
- entries = entries.collect{|entry| entry.to_s}
161
- ENTRIES.concat entries
162
- entries.each do |entry|
163
- key = KEY_PREFIX + entry
164
- ENTRY_KEYS << key
165
- var_name = ("@" << entry).to_sym
166
-
167
- TSV.send(:define_method, entry) do
168
- return instance_variable_get(var_name) if instance_variables.include? var_name
169
- svalue = self.send(:[], key, :entry_key)
170
- value = load_entry_value(svalue)
171
- instance_variable_set(var_name, value)
172
- value
173
- end
174
-
175
- TSV.send(:define_method, entry + "=") do |value|
176
- instance_variable_set(var_name, value)
177
- value = value.to_s if Path === value
178
- self.send(:[]=, key, dump_entry_value(value), :entry_key)
179
- value
180
- end
181
-
182
- end
183
- end
184
-
185
- entry :key_field,
186
- :type,
187
- :fields,
188
- :cast,
189
- :identifiers,
190
- :namespace,
191
- :filename,
192
- :serializer
193
-
194
- attr_reader :serializer_module
195
-
196
- def serializer=(serializer)
197
- @serializer = serializer
198
- self.send(:[]=, KEY_PREFIX + 'serializer', dump_entry_value(serializer), :entry_key)
199
- @serializar_module = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
200
- end
201
-
202
-
203
- def serializer_module
204
- @serializer_module ||= begin
205
- serializer = self.serializer
206
- mod = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
207
- raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
208
- mod
209
- end
210
- end
211
-
212
- def empty?
213
- length == 0
214
- end
215
-
216
- #{{{ GETTERS AND SETTERS
217
-
218
- def prepare_value(key, value)
219
- value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
220
-
221
- return value if @unnamed or fields.nil?
222
-
223
- case type
224
- when :double, :list
225
- setup_array value, fields, key, entity_options, entity_templates
226
- when :flat, :single
227
- begin value = value.dup; rescue; end if value.frozen?
228
-
229
- value = prepare_entity(value, fields.first, entity_options)
230
- end
231
- value
232
- end
233
-
234
- def [](key, clean = false)
235
- value = super(key)
236
- return value if clean or value.nil?
237
- @serializer_module ||= self.serializer_module
238
-
239
- if MultipleResult === value
240
- res = value.collect{|v| prepare_value key, v }
241
- res.extend MultipleResult
242
- res
243
- else
244
- prepare_value key, value
245
- end
246
- end
247
-
248
- def []=(key, value, clean = false)
249
- return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
250
- super(key, @serializer_module.dump(value))
251
- end
252
-
253
- def zip_new(key, values)
254
- values = [values] unless Array === values
255
- case type
256
- when :double
257
- if self.include? key
258
- new = []
259
- self[key, true].each_with_index do |v,i|
260
- _v = values[i]
261
- case _v
262
- when Array
263
- _n = v + _v
264
- else
265
- _n = v << _v
266
- end
267
- new << _n
268
- end
269
- self[key] = new
270
- else
271
- self[key] = Array === values.first ? values.dup : values.collect{|v| [v] }
272
- end
273
- when :flat
274
- if self.include? key
275
- self[key] = (self[key] + values).uniq
276
- else
277
- self[key] = values
278
- end
279
- else
280
- raise "Cannot zip_new for type: #{type}"
281
- end
282
- end
283
-
284
- def keys
285
- keys = super - ENTRY_KEYS.to_a
286
- return keys if @unnamed or key_field.nil?
287
-
288
- prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
289
- end
290
-
291
- def values
292
- values = chunked_values_at(keys)
293
- return values if @unnamed or fields.nil?
294
-
295
- case type
296
- when :double, :list
297
- values.each{|value| setup_array value, fields, nil, entity_options}
298
- when :single
299
- values = prepare_entity(values, fields.first, entity_options)
300
- when :flat
301
- values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
302
- end
303
-
304
- values
305
- end
306
-
307
- def each
308
- fields = self.fields
309
-
310
- serializer_module = self.serializer_module
311
- super do |key, value|
312
- next if ENTRY_KEYS.include? key
313
-
314
- # TODO Update this to be more efficient
315
- value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
316
-
317
- # Annotated with Entity and NamedArray
318
- if not @unnamed
319
- if not fields.nil?
320
- case type
321
- when :double, :list
322
- setup_array value, fields, key, entity_options, entity_templates if Array == value
323
- when :flat, :single
324
- prepare_entity(value, fields.first, entity_options)
325
- end
326
- end
327
- key = prepare_entity(key, key_field, entity_options)
328
- end
329
-
330
- yield key, value if block_given?
331
- [key, value]
332
- end
333
- end
334
-
335
- def collect
336
- serializer_module = self.serializer_module
337
- super do |key, value|
338
- next if ENTRY_KEYS.include? key
339
-
340
- # TODO Update this to be more efficient
341
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
342
-
343
- # Annotated with Entity and NamedArray
344
- if not @unnamed
345
- if not fields.nil?
346
- case type
347
- when :double, :list
348
- setup_array value, fields, key, entity_options if Array === value
349
- when :flat, :single
350
- value = prepare_entity(value, fields.first, entity_options)
351
- end
352
- end
353
- key = prepare_entity(key, key_field, entity_options)
354
- end
355
-
356
- if block_given?
357
- yield key, value
358
- else
359
- [key, value]
360
- end
361
- end
362
- end
363
-
364
- def size
365
- super - ENTRY_KEYS.select{|k| self.include? k}.length
366
- end
367
-
368
- def length
369
- keys.length
370
- end
371
-
372
- #def _values_at(*keys)
373
- # keys.collect do |key|
374
- # self[key]
375
- # end
376
- #end
377
-
378
- def chunked_values_at(keys, max = 5000)
379
- Misc.ordered_divide(keys, max).inject([]) do |acc,c|
380
- new = self.values_at(*c)
381
- new.annotate acc if new.respond_to? :annotate and acc.empty?
382
- acc.concat(new)
383
- end
384
- end
385
-
386
- #{{{ Sorting
387
-
388
- def sort_by(field = nil, just_keys = false, &block)
389
- field = :all if field.nil?
390
-
391
- if field == :all
392
- elems = collect
393
- else
394
- elems = []
395
- case type
396
- when :single
397
- through :key, field do |key, field|
398
- elems << [key, field]
399
- end
400
- when :list, :flat
401
- through :key, field do |key, fields|
402
- elems << [key, fields.first]
403
- end
404
- when :double
405
- through :key, field do |key, fields|
406
- elems << [key, fields.first]
407
- end
408
- end
409
- end
410
-
411
- if not block_given?
412
- if fields == :all
413
- if just_keys
414
- keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
415
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
416
- else
417
- elems.sort_by{|key, value| key }
418
- end
419
- else
420
- sorted = elems.sort do |a, b|
421
- a_value = a.last
422
- b_value = b.last
423
- a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
424
- b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
425
- case
426
- when (a_empty and b_empty)
427
- 0
428
- when a_empty
429
- -1
430
- when b_empty
431
- 1
432
- when Array === a_value
433
- if a_value.length == 1 and b_value.length == 1
434
- a_value.first <=> b_value.first
435
- else
436
- a_value.length <=> b_value.length
437
- end
438
- else
439
- a_value <=> b_value
440
- end
441
- end
442
- if just_keys
443
- keys = sorted.collect{|key, value| key}
444
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
445
- keys
446
- else
447
- sorted.collect{|key, value| [key, self[key]]}
448
- end
449
- end
450
- else
451
- if just_keys
452
- keys = elems.sort_by(&block).collect{|key, value| key}
453
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
454
- keys
455
- else
456
- elems.sort_by(&block).collect{|key, value| [key, self[key]]}
457
- end
458
- end
459
- end
460
-
461
- def tsv_sort(&block)
462
- collect.sort &block
463
- end
464
-
465
- # Starts in page 1
466
- def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
467
- pstart = psize * (pnum - 1)
468
- pend = psize * pnum - 1
469
- field = :key if field == "key"
470
- keys = sort_by(field || :key, true, &block)
471
- keys.reverse! if reverse
472
-
473
- if just_keys
474
- keys[pstart..pend]
475
- else
476
- select :key => keys[pstart..pend]
477
- end
478
- end
479
-
480
-
481
- def fields
482
- #@fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
483
- @fields ||= load_entry_value(self.send(:[], "__tsv_hash_fields", :entry_key))
484
- if true or @fields.nil? or @unnamed
485
- @fields
486
- else
487
- @named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
488
- end
489
- end
490
-
491
- def namespace=(value)
492
- self.send(:[]=, "__tsv_hash_namespace", dump_entry_value(value), true)
493
- @namespace = value
494
- end
495
-
496
- def fields=(value)
497
- clean = true
498
- self.send(:[]=, "__tsv_hash_fields", dump_entry_value(value), clean)
499
- @fields = value
500
- @named_fields = nil
501
- end
502
-
503
- def self.zip_fields(list, fields = nil)
504
- return [] if list.nil? || list.empty?
505
- fields ||= list.fields if list.respond_to? :fields
506
- zipped = list[0].zip(*list[1..-1])
507
- zipped = zipped.collect{|v| setup_array(v, fields)} if fields
508
- zipped
509
- end
510
-
511
- def identifier_files
512
- case
513
- when (identifiers and TSV === identifiers)
514
- [identifiers]
515
- when (identifiers and Array === identifiers)
516
- case
517
- when (TSV === identifiers.first or identifiers.empty?)
518
- identifiers
519
- else
520
- identifiers.collect{|f| Path === f ? f : Path.setup(f)}
521
- end
522
- when identifiers
523
- [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
524
- when Path === filename
525
- filename.identifier_files
526
- when filename
527
- Path.setup(filename.dup).identifier_files
528
- else
529
- []
530
- end
531
- end
532
-
533
- def options
534
- options = {}
535
- ENTRIES.each do |entry|
536
- options[entry.to_sym] = self.send(entry)
537
- end
538
- IndiferentHash.setup options
539
- end
540
-
541
-
542
- def all_fields
543
- return nil if key_field.nil? or fields.nil?
544
- [key_field] + fields
545
- end
546
-
547
- def values_to_s(values)
548
- case values
549
- when nil
550
- if fields.nil? or fields.empty?
551
- "\n"
552
- else
553
- "\t" << ([""] * fields.length) * "\t" << "\n"
554
- end
555
- when Array
556
- if fields.nil? or fields.empty?
557
- "\n"
558
- else
559
- "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
560
- end
561
- else
562
- if fields.nil? or fields.empty?
563
- "\n"
564
- else
565
- "\t" << values.to_s << "\n"
566
- end
567
- end
568
- end
569
-
570
- def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
571
- unmerge = false unless type == :double
572
-
573
- options = self.options
574
- options[:type] = :list if unmerge
575
-
576
- TSV::Dumper.stream options, filename, stream do |dumper|
577
- case no_options
578
- when FalseClass, nil
579
- dumper.init
580
- when Hash
581
- dumper.init(no_options)
582
- end
583
-
584
- begin
585
- if keys
586
- keys.each do |key|
587
- if unmerge
588
- value_list = self[key]
589
- max = value_list.collect{|v| v.length}.max
590
-
591
- if unmerge == :expand and max > 1
592
- value_list = value_list.collect do |values|
593
- if values.length == 1
594
- [values.first] * max
595
- else
596
- values
597
- end
598
- end
599
- end
600
-
601
- Misc.zip_fields(value_list).each do |values|
602
- dumper.add key, values
603
- end
604
- else
605
- dumper.add key, self[key]
606
- end
607
- end
608
- else
609
- with_unnamed do
610
- each do |k,value_list|
611
-
612
- if unmerge
613
- max = value_list.collect{|v| v.length}.max
614
-
615
- if unmerge == :expand and max > 1
616
- value_list = value_list.collect do |values|
617
- if values.length == 1
618
- [values.first] * max
619
- else
620
- values
621
- end
622
- end
623
- end
624
-
625
- Misc.zip_fields(value_list).each do |values|
626
- dumper.add k, values
627
- end
628
- else
629
- dumper.add k, value_list
630
- end
631
- end
632
- end
633
- end
634
- dumper.close
635
- rescue Exception
636
- Log.exception $!
637
- raise $!
638
- end
639
- end
640
- end
641
-
642
- def to_s(keys = nil, no_options = false, unmerge = false)
643
- if FalseClass === keys or TrueClass === keys or Hash === keys
644
- no_options = keys
645
- keys = nil
646
- end
647
-
648
- if keys == :sort
649
- with_unnamed do
650
- keys = self.keys.sort
651
- end
652
- end
653
-
654
- io = dumper_stream(keys, no_options, unmerge, StringIO.new)
655
- io.rewind
656
- io.read
657
- end
658
-
659
- def to_unmerged_s(keys = nil, no_options = false)
660
- to_s keys, no_options, true
661
- end
662
-
663
- def to_unmerged_expanded_s(keys = nil, no_options = false)
664
- to_s keys, no_options, :expand
665
- end
666
-
667
- def value_peek
668
- peek = {}
669
- i = 0
670
- begin
671
- through do |k,v|
672
- peek[k] = v
673
- i += 1
674
- raise "STOP" if i > 10
675
- end
676
- rescue
677
- end
678
- peek
679
- end
680
-
681
- def head_str(times=10)
682
- stream = dumper_stream
683
- str = ""
684
- times.times do |i|
685
- break if stream.eof?
686
- str << stream.gets
687
- end
688
- str
689
- end
690
-
691
- def head_tsv(times = 10)
692
- new = self.annotate({})
693
- i = 0
694
- self.each do |k,v|
695
- return new if i == times
696
- new[k] = v
697
- i += 1
698
- end
699
- new
700
- end
701
-
702
- alias head head_tsv
703
-
704
- def summary
705
-
706
- key = nil
707
- values = nil
708
- self.each do |k, v|
709
- key = k
710
- values = v
711
- break
712
- end
713
-
714
- filename = @filename
715
- filename = "No filename" if filename.nil? || filename.empty?
716
- filename.find if Path === filename
717
- filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path
718
-
719
- with_unnamed do
720
- <<-EOF
721
- Filename = #{filename}
722
- Key field = #{key_field || "*No key field*"}
723
- Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
724
- Type = #{type}
725
- Serializer = #{serializer.inspect}
726
- Size = #{size}
727
- namespace = #{Misc.fingerprint namespace}
728
- identifiers = #{Misc.fingerprint identifiers}
729
- Example:
730
- - #{key} -- #{Misc.fingerprint values }
731
- EOF
732
- end
733
- end
734
-
735
- def to_hash
736
- new = self.dup
737
- ENTRY_KEYS.each{|entry| new.delete entry}
738
- new
739
- end
740
-
741
- def unzip(field = 0, merge = false, sep = ":", delete = true)
742
- new = {}
743
- self.annotate new
744
-
745
- field_pos = self.identify_field field
746
- new.with_unnamed do
747
- if merge
748
- self.through do |key,values|
749
- field_values = values[field_pos]
750
- if delete
751
- values = values.dup
752
- values.delete_at(field_pos)
753
- end
754
- next if field_values.nil?
755
- zipped = Misc.zip_fields(values)
756
- field_values.zip(zipped).each do |field_value,rest|
757
- rest = [nil] * values.length if rest.nil?
758
- k = [key,field_value]*sep
759
- if new.include? k
760
- new[k] = Misc.zip_fields(Misc.zip_fields(new[k]) << rest)
761
- else
762
- new[k] = rest.nil? ? nil : rest.collect{|v| [v]}
763
- end
764
- end
765
- end
766
- new.type = :double
767
- else
768
- self.through do |key,values|
769
- field_values = values[field_pos]
770
- values.delete_at(field_pos) if delete
771
- next if field_values.nil?
772
- zipped = Misc.zip_fields(values)
773
- field_values.zip(zipped).each do |field_value,rest|
774
- rest = [nil] * values.length if rest.nil?
775
- k = [key,field_value]*sep
776
- new[k] = rest
777
- end
778
- end
779
- new.type = :list
780
- end
781
- end
782
-
783
- if self.key_field and self.fields
784
- new.key_field = [self.key_field, self.fields[field_pos]] * sep
785
- new_fields = self.fields.dup
786
- new_fields.delete_at(field_pos) if delete
787
- new.fields = new_fields
788
- end
789
-
790
- new
791
- end
792
-
793
- def zip(merge = false, field = "New Field", sep = ":")
794
- new = {}
795
- self.annotate new
796
-
797
- new.type = :double if merge
798
-
799
- new.with_unnamed do
800
- if merge
801
- self.through do |key,values|
802
- new_key, new_value = key.split(sep)
803
- new_values = values + [[new_value] * values.first.length]
804
- if new.include? new_key
805
- current = new[new_key]
806
- current.each_with_index do |v,i|
807
- v.concat(new_values[i])
808
- end
809
- else
810
- new[new_key] = new_values
811
- end
812
- end
813
- else
814
- self.through do |key,values|
815
- new_key, new_value = key.split(sep)
816
- new_values = values + [new_value]
817
- new[new_key] = new_values
818
- end
819
- end
820
- end
821
-
822
- if self.key_field and self.fields
823
- new.key_field = self.key_field.partition(sep).first
824
- new.fields = new.fields + [field]
825
- end
826
-
827
- new
828
- end
829
-
830
- def remove_duplicates(pivot = 0)
831
- new = self.annotate({})
832
- self.through do |k,values|
833
- new[k] = Misc.zip_fields(Misc.zip_fields(values).uniq)
834
- end
835
- new
836
- end
837
- end
838
-
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/tsv'