rbbt-util 5.44.1 → 6.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE +1 -1
  3. data/bin/rbbt +67 -90
  4. data/bin/rbbt_exec.rb +2 -2
  5. data/etc/app.d/base.rb +2 -2
  6. data/etc/app.d/semaphores.rb +3 -3
  7. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  8. data/lib/rbbt/annotations/refactor.rb +27 -0
  9. data/lib/rbbt/annotations/util.rb +282 -282
  10. data/lib/rbbt/annotations.rb +343 -320
  11. data/lib/rbbt/association/database.rb +200 -225
  12. data/lib/rbbt/association/index.rb +294 -291
  13. data/lib/rbbt/association/item.rb +227 -227
  14. data/lib/rbbt/association/open.rb +35 -34
  15. data/lib/rbbt/association/util.rb +0 -169
  16. data/lib/rbbt/association.rb +2 -4
  17. data/lib/rbbt/entity/identifiers.rb +119 -118
  18. data/lib/rbbt/entity/refactor.rb +12 -0
  19. data/lib/rbbt/entity.rb +319 -315
  20. data/lib/rbbt/hpc/batch.rb +72 -53
  21. data/lib/rbbt/hpc/lsf.rb +2 -2
  22. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  24. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  25. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  26. data/lib/rbbt/hpc/slurm.rb +18 -18
  27. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  28. data/lib/rbbt/knowledge_base/query.rb +2 -2
  29. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  30. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  31. data/lib/rbbt/knowledge_base.rb +1 -1
  32. data/lib/rbbt/monitor.rb +36 -25
  33. data/lib/rbbt/persist/refactor.rb +166 -0
  34. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  35. data/lib/rbbt/persist/tsv.rb +187 -185
  36. data/lib/rbbt/persist.rb +556 -551
  37. data/lib/rbbt/refactor.rb +20 -0
  38. data/lib/rbbt/resource/path/refactor.rb +178 -0
  39. data/lib/rbbt/resource/path.rb +317 -497
  40. data/lib/rbbt/resource/util.rb +0 -48
  41. data/lib/rbbt/resource.rb +3 -390
  42. data/lib/rbbt/tsv/accessor.rb +2 -838
  43. data/lib/rbbt/tsv/attach.rb +303 -299
  44. data/lib/rbbt/tsv/change_id.rb +244 -245
  45. data/lib/rbbt/tsv/csv.rb +87 -85
  46. data/lib/rbbt/tsv/dumper.rb +2 -100
  47. data/lib/rbbt/tsv/excel.rb +26 -24
  48. data/lib/rbbt/tsv/field_index.rb +4 -1
  49. data/lib/rbbt/tsv/filter.rb +3 -2
  50. data/lib/rbbt/tsv/index.rb +2 -284
  51. data/lib/rbbt/tsv/manipulate.rb +750 -747
  52. data/lib/rbbt/tsv/marshal.rb +3 -3
  53. data/lib/rbbt/tsv/matrix.rb +2 -2
  54. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  55. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  56. data/lib/rbbt/tsv/parser.rb +678 -678
  57. data/lib/rbbt/tsv/refactor.rb +195 -0
  58. data/lib/rbbt/tsv/stream.rb +253 -251
  59. data/lib/rbbt/tsv/util.rb +420 -420
  60. data/lib/rbbt/tsv.rb +210 -208
  61. data/lib/rbbt/util/R/eval.rb +4 -4
  62. data/lib/rbbt/util/R/plot.rb +62 -166
  63. data/lib/rbbt/util/R.rb +21 -18
  64. data/lib/rbbt/util/cmd.rb +2 -318
  65. data/lib/rbbt/util/color.rb +269 -269
  66. data/lib/rbbt/util/colorize.rb +89 -89
  67. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  68. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  69. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  70. data/lib/rbbt/util/config.rb +169 -167
  71. data/lib/rbbt/util/filecache.rb +1 -1
  72. data/lib/rbbt/util/iruby.rb +20 -0
  73. data/lib/rbbt/util/log/progress/report.rb +241 -241
  74. data/lib/rbbt/util/log/progress/util.rb +99 -99
  75. data/lib/rbbt/util/log/progress.rb +102 -102
  76. data/lib/rbbt/util/log/refactor.rb +49 -0
  77. data/lib/rbbt/util/log.rb +486 -532
  78. data/lib/rbbt/util/migrate.rb +2 -2
  79. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  80. data/lib/rbbt/util/misc/development.rb +12 -11
  81. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  82. data/lib/rbbt/util/misc/format.rb +2 -230
  83. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  84. data/lib/rbbt/util/misc/inspect.rb +2 -476
  85. data/lib/rbbt/util/misc/lock.rb +109 -106
  86. data/lib/rbbt/util/misc/omics.rb +9 -1
  87. data/lib/rbbt/util/misc/pipes.rb +765 -793
  88. data/lib/rbbt/util/misc/refactor.rb +20 -0
  89. data/lib/rbbt/util/misc/ssw.rb +27 -17
  90. data/lib/rbbt/util/misc/system.rb +92 -105
  91. data/lib/rbbt/util/misc.rb +39 -20
  92. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  93. data/lib/rbbt/util/named_array.rb +3 -220
  94. data/lib/rbbt/util/open/refactor.rb +7 -0
  95. data/lib/rbbt/util/open.rb +3 -857
  96. data/lib/rbbt/util/procpath.rb +6 -6
  97. data/lib/rbbt/util/python/paths.rb +27 -0
  98. data/lib/rbbt/util/python/run.rb +115 -0
  99. data/lib/rbbt/util/python/script.rb +110 -0
  100. data/lib/rbbt/util/python/util.rb +3 -3
  101. data/lib/rbbt/util/python.rb +22 -81
  102. data/lib/rbbt/util/semaphore.rb +152 -148
  103. data/lib/rbbt/util/simpleopt.rb +9 -8
  104. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  105. data/lib/rbbt/util/ssh.rb +122 -118
  106. data/lib/rbbt/util/tar.rb +117 -115
  107. data/lib/rbbt/util/tmpfile.rb +69 -67
  108. data/lib/rbbt/util/version.rb +2 -0
  109. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  110. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  111. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  112. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  113. data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
  114. data/lib/rbbt/workflow/refactor.rb +150 -0
  115. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
  116. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  117. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  118. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  119. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  120. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  121. data/lib/rbbt/workflow/step/run.rb +766 -766
  122. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  123. data/lib/rbbt/workflow/step.rb +2 -362
  124. data/lib/rbbt/workflow/task.rb +118 -118
  125. data/lib/rbbt/workflow/usage.rb +289 -287
  126. data/lib/rbbt/workflow/util/archive.rb +6 -5
  127. data/lib/rbbt/workflow/util/data.rb +1 -1
  128. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  129. data/lib/rbbt/workflow/util/trace.rb +79 -44
  130. data/lib/rbbt/workflow.rb +4 -882
  131. data/lib/rbbt-util.rb +21 -13
  132. data/lib/rbbt.rb +16 -3
  133. data/python/rbbt/__init__.py +96 -4
  134. data/python/rbbt/workflow/remote.py +104 -0
  135. data/python/rbbt/workflow.py +64 -0
  136. data/python/test.py +10 -0
  137. data/share/Rlib/plot.R +37 -37
  138. data/share/Rlib/svg.R +22 -5
  139. data/share/install/software/lib/install_helpers +1 -1
  140. data/share/rbbt_commands/hpc/list +2 -3
  141. data/share/rbbt_commands/hpc/orchestrate +4 -4
  142. data/share/rbbt_commands/hpc/tail +2 -0
  143. data/share/rbbt_commands/hpc/task +10 -7
  144. data/share/rbbt_commands/lsf/list +2 -3
  145. data/share/rbbt_commands/lsf/orchestrate +4 -4
  146. data/share/rbbt_commands/lsf/tail +2 -0
  147. data/share/rbbt_commands/lsf/task +10 -7
  148. data/share/rbbt_commands/migrate +1 -1
  149. data/share/rbbt_commands/pbs/list +2 -3
  150. data/share/rbbt_commands/pbs/orchestrate +4 -4
  151. data/share/rbbt_commands/pbs/tail +2 -0
  152. data/share/rbbt_commands/pbs/task +10 -7
  153. data/share/rbbt_commands/resource/produce +8 -1
  154. data/share/rbbt_commands/slurm/list +2 -3
  155. data/share/rbbt_commands/slurm/orchestrate +4 -4
  156. data/share/rbbt_commands/slurm/tail +2 -0
  157. data/share/rbbt_commands/slurm/task +10 -7
  158. data/share/rbbt_commands/system/clean +5 -5
  159. data/share/rbbt_commands/system/status +5 -5
  160. data/share/rbbt_commands/tsv/get +2 -3
  161. data/share/rbbt_commands/tsv/info +10 -13
  162. data/share/rbbt_commands/tsv/keys +18 -14
  163. data/share/rbbt_commands/tsv/slice +2 -2
  164. data/share/rbbt_commands/tsv/transpose +6 -2
  165. data/share/rbbt_commands/workflow/info +20 -24
  166. data/share/rbbt_commands/workflow/list +1 -1
  167. data/share/rbbt_commands/workflow/prov +20 -13
  168. data/share/rbbt_commands/workflow/retry +43 -0
  169. data/share/rbbt_commands/workflow/server +12 -2
  170. data/share/rbbt_commands/workflow/task +80 -73
  171. data/share/rbbt_commands/workflow/write_info +26 -9
  172. data/share/software/opt/ssw/ssw.c +861 -0
  173. data/share/software/opt/ssw/ssw.h +130 -0
  174. data/share/workflow_config.ru +3 -3
  175. metadata +45 -6
@@ -1,838 +1,2 @@
1
- require 'yaml'
2
- require 'rbbt/annotations'
3
- require 'rbbt/tsv/dumper'
4
- require 'set'
5
-
6
- module TSV
7
-
8
- TSV_SERIALIZER = YAML
9
- SERIALIZED_NIL = TSV_SERIALIZER.dump nil
10
-
11
- attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
12
-
13
- def info
14
- {:key_field => key_field, :fields => fields.dup, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
15
- end
16
-
17
- def annotate(tsv)
18
- TSV.setup(tsv, info)
19
- tsv.entity_options = self.entity_options
20
- tsv.entity_templates = self.entity_templates
21
- tsv
22
- end
23
-
24
- def entity_options
25
- @entity_options ||= nil
26
- if @entity_options.nil?
27
- @entity_options = namespace ? {:namespace => namespace, :organism => namespace} : {}
28
- @entity_templates = nil
29
- end
30
- @entity_options
31
- end
32
-
33
- def entity_options=(options)
34
- @entity_options = options || {}
35
- if namespace
36
- @entity_options[:organism] ||= namespace
37
- @entity_options[:namespace] ||= namespace
38
- end
39
- @entity_templates = nil
40
- end
41
-
42
-
43
- def entity_templates
44
- @entity_templates ||= {}
45
- end
46
-
47
- def prepare_entity(entity, field, options = {})
48
- return entity if entity.nil?
49
- return entity unless defined? Entity
50
- entity = entity if options.delete :dup_array
51
- if (template = entity_templates[field]) and template.respond_to?(:annotate)
52
- if String === entity or Array === entity
53
- entity = entity.dup if entity.frozen?
54
- template.annotate entity
55
- entity.extend AnnotatedArray if Array === entity
56
- end
57
- entity
58
- else
59
- if entity_templates.include? field
60
- entity
61
- else
62
- template = Misc.prepare_entity("TEMPLATE", field, options)
63
- if template.respond_to?(:annotate)
64
- entity_templates[field] = template
65
- if String === entity or Array === entity
66
- entity = entity.dup if entity.frozen?
67
- template.annotate entity
68
- entity.extend AnnotatedArray if Array === entity
69
- end
70
- entity
71
- else
72
- entity_templates[field] = nil
73
- entity
74
- end
75
- end
76
- end
77
- end
78
-
79
- def setup_array(*args)
80
- res = NamedArray.setup(*args)
81
- return res if res.nil?
82
- res.instance_variable_set(:@entity_templates, entity_templates)
83
- res
84
- end
85
-
86
- def with_unnamed
87
- saved_unnamed = @unnamed
88
- @unnamed = true
89
- res = yield
90
- @unnamed = saved_unnamed
91
- res
92
- end
93
-
94
- def with_monitor(value = true)
95
- saved_monitor = @monitor
96
- @monitor = value.nil? ? false : value
97
- res = yield
98
- @monitor = saved_monitor
99
- res
100
- end
101
-
102
- def close
103
- begin
104
- super
105
- rescue Exception
106
- self
107
- end
108
- end
109
-
110
- def read(force = false)
111
- begin
112
- super
113
- rescue Exception
114
- Log.exception $!
115
- @writable = false
116
- self
117
- end
118
- end
119
-
120
- def write(force = false)
121
- begin
122
- super
123
- rescue Exception
124
- @writable = true
125
- self
126
- end
127
- end
128
-
129
- def write?
130
- @writable ||= false
131
- end
132
-
133
- def self._extended(data)
134
- if not data.respond_to? :write
135
- class << data
136
- attr_accessor :writable
137
-
138
- end
139
- end
140
- end
141
-
142
- #{{{ TSV ENTRIES and ENTRY_KEYS
143
-
144
- KEY_PREFIX = "__tsv_hash_"
145
- ENTRIES = []
146
- ENTRY_KEYS = Set.new
147
- NIL_VALUE = "NIL_VALUE"
148
-
149
- def load_entry_value(value)
150
- return value unless respond_to? :persistence_path
151
- (value.nil? or value == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(value)
152
- end
153
-
154
- def dump_entry_value(value)
155
- return value unless respond_to? :persistence_path
156
- (value.nil? or value == SERIALIZED_NIL) ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)
157
- end
158
-
159
- def self.entry(*entries)
160
- entries = entries.collect{|entry| entry.to_s}
161
- ENTRIES.concat entries
162
- entries.each do |entry|
163
- key = KEY_PREFIX + entry
164
- ENTRY_KEYS << key
165
- var_name = ("@" << entry).to_sym
166
-
167
- TSV.send(:define_method, entry) do
168
- return instance_variable_get(var_name) if instance_variables.include? var_name
169
- svalue = self.send(:[], key, :entry_key)
170
- value = load_entry_value(svalue)
171
- instance_variable_set(var_name, value)
172
- value
173
- end
174
-
175
- TSV.send(:define_method, entry + "=") do |value|
176
- instance_variable_set(var_name, value)
177
- value = value.to_s if Path === value
178
- self.send(:[]=, key, dump_entry_value(value), :entry_key)
179
- value
180
- end
181
-
182
- end
183
- end
184
-
185
- entry :key_field,
186
- :type,
187
- :fields,
188
- :cast,
189
- :identifiers,
190
- :namespace,
191
- :filename,
192
- :serializer
193
-
194
- attr_reader :serializer_module
195
-
196
- def serializer=(serializer)
197
- @serializer = serializer
198
- self.send(:[]=, KEY_PREFIX + 'serializer', dump_entry_value(serializer), :entry_key)
199
- @serializar_module = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
200
- end
201
-
202
-
203
- def serializer_module
204
- @serializer_module ||= begin
205
- serializer = self.serializer
206
- mod = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
207
- raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
208
- mod
209
- end
210
- end
211
-
212
- def empty?
213
- length == 0
214
- end
215
-
216
- #{{{ GETTERS AND SETTERS
217
-
218
- def prepare_value(key, value)
219
- value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
220
-
221
- return value if @unnamed or fields.nil?
222
-
223
- case type
224
- when :double, :list
225
- setup_array value, fields, key, entity_options, entity_templates
226
- when :flat, :single
227
- begin value = value.dup; rescue; end if value.frozen?
228
-
229
- value = prepare_entity(value, fields.first, entity_options)
230
- end
231
- value
232
- end
233
-
234
- def [](key, clean = false)
235
- value = super(key)
236
- return value if clean or value.nil?
237
- @serializer_module ||= self.serializer_module
238
-
239
- if MultipleResult === value
240
- res = value.collect{|v| prepare_value key, v }
241
- res.extend MultipleResult
242
- res
243
- else
244
- prepare_value key, value
245
- end
246
- end
247
-
248
- def []=(key, value, clean = false)
249
- return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
250
- super(key, @serializer_module.dump(value))
251
- end
252
-
253
- def zip_new(key, values)
254
- values = [values] unless Array === values
255
- case type
256
- when :double
257
- if self.include? key
258
- new = []
259
- self[key, true].each_with_index do |v,i|
260
- _v = values[i]
261
- case _v
262
- when Array
263
- _n = v + _v
264
- else
265
- _n = v << _v
266
- end
267
- new << _n
268
- end
269
- self[key] = new
270
- else
271
- self[key] = Array === values.first ? values.dup : values.collect{|v| [v] }
272
- end
273
- when :flat
274
- if self.include? key
275
- self[key] = (self[key] + values).uniq
276
- else
277
- self[key] = values
278
- end
279
- else
280
- raise "Cannot zip_new for type: #{type}"
281
- end
282
- end
283
-
284
- def keys
285
- keys = super - ENTRY_KEYS.to_a
286
- return keys if @unnamed or key_field.nil?
287
-
288
- prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
289
- end
290
-
291
- def values
292
- values = chunked_values_at(keys)
293
- return values if @unnamed or fields.nil?
294
-
295
- case type
296
- when :double, :list
297
- values.each{|value| setup_array value, fields, nil, entity_options}
298
- when :single
299
- values = prepare_entity(values, fields.first, entity_options)
300
- when :flat
301
- values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
302
- end
303
-
304
- values
305
- end
306
-
307
- def each
308
- fields = self.fields
309
-
310
- serializer_module = self.serializer_module
311
- super do |key, value|
312
- next if ENTRY_KEYS.include? key
313
-
314
- # TODO Update this to be more efficient
315
- value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
316
-
317
- # Annotated with Entity and NamedArray
318
- if not @unnamed
319
- if not fields.nil?
320
- case type
321
- when :double, :list
322
- setup_array value, fields, key, entity_options, entity_templates if Array == value
323
- when :flat, :single
324
- prepare_entity(value, fields.first, entity_options)
325
- end
326
- end
327
- key = prepare_entity(key, key_field, entity_options)
328
- end
329
-
330
- yield key, value if block_given?
331
- [key, value]
332
- end
333
- end
334
-
335
- def collect
336
- serializer_module = self.serializer_module
337
- super do |key, value|
338
- next if ENTRY_KEYS.include? key
339
-
340
- # TODO Update this to be more efficient
341
- value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
342
-
343
- # Annotated with Entity and NamedArray
344
- if not @unnamed
345
- if not fields.nil?
346
- case type
347
- when :double, :list
348
- setup_array value, fields, key, entity_options if Array === value
349
- when :flat, :single
350
- value = prepare_entity(value, fields.first, entity_options)
351
- end
352
- end
353
- key = prepare_entity(key, key_field, entity_options)
354
- end
355
-
356
- if block_given?
357
- yield key, value
358
- else
359
- [key, value]
360
- end
361
- end
362
- end
363
-
364
- def size
365
- super - ENTRY_KEYS.select{|k| self.include? k}.length
366
- end
367
-
368
- def length
369
- keys.length
370
- end
371
-
372
- #def _values_at(*keys)
373
- # keys.collect do |key|
374
- # self[key]
375
- # end
376
- #end
377
-
378
- def chunked_values_at(keys, max = 5000)
379
- Misc.ordered_divide(keys, max).inject([]) do |acc,c|
380
- new = self.values_at(*c)
381
- new.annotate acc if new.respond_to? :annotate and acc.empty?
382
- acc.concat(new)
383
- end
384
- end
385
-
386
- #{{{ Sorting
387
-
388
- def sort_by(field = nil, just_keys = false, &block)
389
- field = :all if field.nil?
390
-
391
- if field == :all
392
- elems = collect
393
- else
394
- elems = []
395
- case type
396
- when :single
397
- through :key, field do |key, field|
398
- elems << [key, field]
399
- end
400
- when :list, :flat
401
- through :key, field do |key, fields|
402
- elems << [key, fields.first]
403
- end
404
- when :double
405
- through :key, field do |key, fields|
406
- elems << [key, fields.first]
407
- end
408
- end
409
- end
410
-
411
- if not block_given?
412
- if fields == :all
413
- if just_keys
414
- keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
415
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
416
- else
417
- elems.sort_by{|key, value| key }
418
- end
419
- else
420
- sorted = elems.sort do |a, b|
421
- a_value = a.last
422
- b_value = b.last
423
- a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
424
- b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
425
- case
426
- when (a_empty and b_empty)
427
- 0
428
- when a_empty
429
- -1
430
- when b_empty
431
- 1
432
- when Array === a_value
433
- if a_value.length == 1 and b_value.length == 1
434
- a_value.first <=> b_value.first
435
- else
436
- a_value.length <=> b_value.length
437
- end
438
- else
439
- a_value <=> b_value
440
- end
441
- end
442
- if just_keys
443
- keys = sorted.collect{|key, value| key}
444
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
445
- keys
446
- else
447
- sorted.collect{|key, value| [key, self[key]]}
448
- end
449
- end
450
- else
451
- if just_keys
452
- keys = elems.sort_by(&block).collect{|key, value| key}
453
- keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
454
- keys
455
- else
456
- elems.sort_by(&block).collect{|key, value| [key, self[key]]}
457
- end
458
- end
459
- end
460
-
461
- def tsv_sort(&block)
462
- collect.sort &block
463
- end
464
-
465
- # Starts in page 1
466
- def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
467
- pstart = psize * (pnum - 1)
468
- pend = psize * pnum - 1
469
- field = :key if field == "key"
470
- keys = sort_by(field || :key, true, &block)
471
- keys.reverse! if reverse
472
-
473
- if just_keys
474
- keys[pstart..pend]
475
- else
476
- select :key => keys[pstart..pend]
477
- end
478
- end
479
-
480
-
481
- def fields
482
- #@fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
483
- @fields ||= load_entry_value(self.send(:[], "__tsv_hash_fields", :entry_key))
484
- if true or @fields.nil? or @unnamed
485
- @fields
486
- else
487
- @named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
488
- end
489
- end
490
-
491
- def namespace=(value)
492
- self.send(:[]=, "__tsv_hash_namespace", dump_entry_value(value), true)
493
- @namespace = value
494
- end
495
-
496
- def fields=(value)
497
- clean = true
498
- self.send(:[]=, "__tsv_hash_fields", dump_entry_value(value), clean)
499
- @fields = value
500
- @named_fields = nil
501
- end
502
-
503
- def self.zip_fields(list, fields = nil)
504
- return [] if list.nil? || list.empty?
505
- fields ||= list.fields if list.respond_to? :fields
506
- zipped = list[0].zip(*list[1..-1])
507
- zipped = zipped.collect{|v| setup_array(v, fields)} if fields
508
- zipped
509
- end
510
-
511
- def identifier_files
512
- case
513
- when (identifiers and TSV === identifiers)
514
- [identifiers]
515
- when (identifiers and Array === identifiers)
516
- case
517
- when (TSV === identifiers.first or identifiers.empty?)
518
- identifiers
519
- else
520
- identifiers.collect{|f| Path === f ? f : Path.setup(f)}
521
- end
522
- when identifiers
523
- [ Path === identifiers ? identifiers : Path.setup(identifiers) ]
524
- when Path === filename
525
- filename.identifier_files
526
- when filename
527
- Path.setup(filename.dup).identifier_files
528
- else
529
- []
530
- end
531
- end
532
-
533
- def options
534
- options = {}
535
- ENTRIES.each do |entry|
536
- options[entry.to_sym] = self.send(entry)
537
- end
538
- IndiferentHash.setup options
539
- end
540
-
541
-
542
- def all_fields
543
- return nil if key_field.nil? or fields.nil?
544
- [key_field] + fields
545
- end
546
-
547
- def values_to_s(values)
548
- case values
549
- when nil
550
- if fields.nil? or fields.empty?
551
- "\n"
552
- else
553
- "\t" << ([""] * fields.length) * "\t" << "\n"
554
- end
555
- when Array
556
- if fields.nil? or fields.empty?
557
- "\n"
558
- else
559
- "\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
560
- end
561
- else
562
- if fields.nil? or fields.empty?
563
- "\n"
564
- else
565
- "\t" << values.to_s << "\n"
566
- end
567
- end
568
- end
569
-
570
- def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
571
- unmerge = false unless type == :double
572
-
573
- options = self.options
574
- options[:type] = :list if unmerge
575
-
576
- TSV::Dumper.stream options, filename, stream do |dumper|
577
- case no_options
578
- when FalseClass, nil
579
- dumper.init
580
- when Hash
581
- dumper.init(no_options)
582
- end
583
-
584
- begin
585
- if keys
586
- keys.each do |key|
587
- if unmerge
588
- value_list = self[key]
589
- max = value_list.collect{|v| v.length}.max
590
-
591
- if unmerge == :expand and max > 1
592
- value_list = value_list.collect do |values|
593
- if values.length == 1
594
- [values.first] * max
595
- else
596
- values
597
- end
598
- end
599
- end
600
-
601
- Misc.zip_fields(value_list).each do |values|
602
- dumper.add key, values
603
- end
604
- else
605
- dumper.add key, self[key]
606
- end
607
- end
608
- else
609
- with_unnamed do
610
- each do |k,value_list|
611
-
612
- if unmerge
613
- max = value_list.collect{|v| v.length}.max
614
-
615
- if unmerge == :expand and max > 1
616
- value_list = value_list.collect do |values|
617
- if values.length == 1
618
- [values.first] * max
619
- else
620
- values
621
- end
622
- end
623
- end
624
-
625
- Misc.zip_fields(value_list).each do |values|
626
- dumper.add k, values
627
- end
628
- else
629
- dumper.add k, value_list
630
- end
631
- end
632
- end
633
- end
634
- dumper.close
635
- rescue Exception
636
- Log.exception $!
637
- raise $!
638
- end
639
- end
640
- end
641
-
642
- def to_s(keys = nil, no_options = false, unmerge = false)
643
- if FalseClass === keys or TrueClass === keys or Hash === keys
644
- no_options = keys
645
- keys = nil
646
- end
647
-
648
- if keys == :sort
649
- with_unnamed do
650
- keys = self.keys.sort
651
- end
652
- end
653
-
654
- io = dumper_stream(keys, no_options, unmerge, StringIO.new)
655
- io.rewind
656
- io.read
657
- end
658
-
659
- def to_unmerged_s(keys = nil, no_options = false)
660
- to_s keys, no_options, true
661
- end
662
-
663
- def to_unmerged_expanded_s(keys = nil, no_options = false)
664
- to_s keys, no_options, :expand
665
- end
666
-
667
- def value_peek
668
- peek = {}
669
- i = 0
670
- begin
671
- through do |k,v|
672
- peek[k] = v
673
- i += 1
674
- raise "STOP" if i > 10
675
- end
676
- rescue
677
- end
678
- peek
679
- end
680
-
681
- def head_str(times=10)
682
- stream = dumper_stream
683
- str = ""
684
- times.times do |i|
685
- break if stream.eof?
686
- str << stream.gets
687
- end
688
- str
689
- end
690
-
691
- def head_tsv(times = 10)
692
- new = self.annotate({})
693
- i = 0
694
- self.each do |k,v|
695
- return new if i == times
696
- new[k] = v
697
- i += 1
698
- end
699
- new
700
- end
701
-
702
- alias head head_tsv
703
-
704
- def summary
705
-
706
- key = nil
707
- values = nil
708
- self.each do |k, v|
709
- key = k
710
- values = v
711
- break
712
- end
713
-
714
- filename = @filename
715
- filename = "No filename" if filename.nil? || filename.empty?
716
- filename.find if Path === filename
717
- filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path
718
-
719
- with_unnamed do
720
- <<-EOF
721
- Filename = #{filename}
722
- Key field = #{key_field || "*No key field*"}
723
- Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
724
- Type = #{type}
725
- Serializer = #{serializer.inspect}
726
- Size = #{size}
727
- namespace = #{Misc.fingerprint namespace}
728
- identifiers = #{Misc.fingerprint identifiers}
729
- Example:
730
- - #{key} -- #{Misc.fingerprint values }
731
- EOF
732
- end
733
- end
734
-
735
- def to_hash
736
- new = self.dup
737
- ENTRY_KEYS.each{|entry| new.delete entry}
738
- new
739
- end
740
-
741
- def unzip(field = 0, merge = false, sep = ":", delete = true)
742
- new = {}
743
- self.annotate new
744
-
745
- field_pos = self.identify_field field
746
- new.with_unnamed do
747
- if merge
748
- self.through do |key,values|
749
- field_values = values[field_pos]
750
- if delete
751
- values = values.dup
752
- values.delete_at(field_pos)
753
- end
754
- next if field_values.nil?
755
- zipped = Misc.zip_fields(values)
756
- field_values.zip(zipped).each do |field_value,rest|
757
- rest = [nil] * values.length if rest.nil?
758
- k = [key,field_value]*sep
759
- if new.include? k
760
- new[k] = Misc.zip_fields(Misc.zip_fields(new[k]) << rest)
761
- else
762
- new[k] = rest.nil? ? nil : rest.collect{|v| [v]}
763
- end
764
- end
765
- end
766
- new.type = :double
767
- else
768
- self.through do |key,values|
769
- field_values = values[field_pos]
770
- values.delete_at(field_pos) if delete
771
- next if field_values.nil?
772
- zipped = Misc.zip_fields(values)
773
- field_values.zip(zipped).each do |field_value,rest|
774
- rest = [nil] * values.length if rest.nil?
775
- k = [key,field_value]*sep
776
- new[k] = rest
777
- end
778
- end
779
- new.type = :list
780
- end
781
- end
782
-
783
- if self.key_field and self.fields
784
- new.key_field = [self.key_field, self.fields[field_pos]] * sep
785
- new_fields = self.fields.dup
786
- new_fields.delete_at(field_pos) if delete
787
- new.fields = new_fields
788
- end
789
-
790
- new
791
- end
792
-
793
- def zip(merge = false, field = "New Field", sep = ":")
794
- new = {}
795
- self.annotate new
796
-
797
- new.type = :double if merge
798
-
799
- new.with_unnamed do
800
- if merge
801
- self.through do |key,values|
802
- new_key, new_value = key.split(sep)
803
- new_values = values + [[new_value] * values.first.length]
804
- if new.include? new_key
805
- current = new[new_key]
806
- current.each_with_index do |v,i|
807
- v.concat(new_values[i])
808
- end
809
- else
810
- new[new_key] = new_values
811
- end
812
- end
813
- else
814
- self.through do |key,values|
815
- new_key, new_value = key.split(sep)
816
- new_values = values + [new_value]
817
- new[new_key] = new_values
818
- end
819
- end
820
- end
821
-
822
- if self.key_field and self.fields
823
- new.key_field = self.key_field.partition(sep).first
824
- new.fields = new.fields + [field]
825
- end
826
-
827
- new
828
- end
829
-
830
- def remove_duplicates(pivot = 0)
831
- new = self.annotate({})
832
- self.through do |k,values|
833
- new[k] = Misc.zip_fields(Misc.zip_fields(values).uniq)
834
- end
835
- new
836
- end
837
- end
838
-
1
+ require_relative '../refactor'
2
+ Rbbt.require_instead 'scout/tsv'