rbbt-util 5.44.1 → 6.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/LICENSE +1 -1
- data/bin/rbbt +67 -90
- data/bin/rbbt_exec.rb +2 -2
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/filecache.rb +1 -1
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +2 -2
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +92 -105
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +66 -0
- data/lib/rbbt/workflow/refactor.rb +150 -0
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +1 -2
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +96 -4
- data/python/rbbt/workflow/remote.py +104 -0
- data/python/rbbt/workflow.py +64 -0
- data/python/test.py +10 -0
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/retry +43 -0
- data/share/rbbt_commands/workflow/server +12 -2
- data/share/rbbt_commands/workflow/task +80 -73
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +45 -6
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -1,838 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'rbbt/tsv/dumper'
|
4
|
-
require 'set'
|
5
|
-
|
6
|
-
module TSV
|
7
|
-
|
8
|
-
TSV_SERIALIZER = YAML
|
9
|
-
SERIALIZED_NIL = TSV_SERIALIZER.dump nil
|
10
|
-
|
11
|
-
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
12
|
-
|
13
|
-
def info
|
14
|
-
{:key_field => key_field, :fields => fields.dup, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
|
15
|
-
end
|
16
|
-
|
17
|
-
def annotate(tsv)
|
18
|
-
TSV.setup(tsv, info)
|
19
|
-
tsv.entity_options = self.entity_options
|
20
|
-
tsv.entity_templates = self.entity_templates
|
21
|
-
tsv
|
22
|
-
end
|
23
|
-
|
24
|
-
def entity_options
|
25
|
-
@entity_options ||= nil
|
26
|
-
if @entity_options.nil?
|
27
|
-
@entity_options = namespace ? {:namespace => namespace, :organism => namespace} : {}
|
28
|
-
@entity_templates = nil
|
29
|
-
end
|
30
|
-
@entity_options
|
31
|
-
end
|
32
|
-
|
33
|
-
def entity_options=(options)
|
34
|
-
@entity_options = options || {}
|
35
|
-
if namespace
|
36
|
-
@entity_options[:organism] ||= namespace
|
37
|
-
@entity_options[:namespace] ||= namespace
|
38
|
-
end
|
39
|
-
@entity_templates = nil
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
def entity_templates
|
44
|
-
@entity_templates ||= {}
|
45
|
-
end
|
46
|
-
|
47
|
-
def prepare_entity(entity, field, options = {})
|
48
|
-
return entity if entity.nil?
|
49
|
-
return entity unless defined? Entity
|
50
|
-
entity = entity if options.delete :dup_array
|
51
|
-
if (template = entity_templates[field]) and template.respond_to?(:annotate)
|
52
|
-
if String === entity or Array === entity
|
53
|
-
entity = entity.dup if entity.frozen?
|
54
|
-
template.annotate entity
|
55
|
-
entity.extend AnnotatedArray if Array === entity
|
56
|
-
end
|
57
|
-
entity
|
58
|
-
else
|
59
|
-
if entity_templates.include? field
|
60
|
-
entity
|
61
|
-
else
|
62
|
-
template = Misc.prepare_entity("TEMPLATE", field, options)
|
63
|
-
if template.respond_to?(:annotate)
|
64
|
-
entity_templates[field] = template
|
65
|
-
if String === entity or Array === entity
|
66
|
-
entity = entity.dup if entity.frozen?
|
67
|
-
template.annotate entity
|
68
|
-
entity.extend AnnotatedArray if Array === entity
|
69
|
-
end
|
70
|
-
entity
|
71
|
-
else
|
72
|
-
entity_templates[field] = nil
|
73
|
-
entity
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def setup_array(*args)
|
80
|
-
res = NamedArray.setup(*args)
|
81
|
-
return res if res.nil?
|
82
|
-
res.instance_variable_set(:@entity_templates, entity_templates)
|
83
|
-
res
|
84
|
-
end
|
85
|
-
|
86
|
-
def with_unnamed
|
87
|
-
saved_unnamed = @unnamed
|
88
|
-
@unnamed = true
|
89
|
-
res = yield
|
90
|
-
@unnamed = saved_unnamed
|
91
|
-
res
|
92
|
-
end
|
93
|
-
|
94
|
-
def with_monitor(value = true)
|
95
|
-
saved_monitor = @monitor
|
96
|
-
@monitor = value.nil? ? false : value
|
97
|
-
res = yield
|
98
|
-
@monitor = saved_monitor
|
99
|
-
res
|
100
|
-
end
|
101
|
-
|
102
|
-
def close
|
103
|
-
begin
|
104
|
-
super
|
105
|
-
rescue Exception
|
106
|
-
self
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def read(force = false)
|
111
|
-
begin
|
112
|
-
super
|
113
|
-
rescue Exception
|
114
|
-
Log.exception $!
|
115
|
-
@writable = false
|
116
|
-
self
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
def write(force = false)
|
121
|
-
begin
|
122
|
-
super
|
123
|
-
rescue Exception
|
124
|
-
@writable = true
|
125
|
-
self
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def write?
|
130
|
-
@writable ||= false
|
131
|
-
end
|
132
|
-
|
133
|
-
def self._extended(data)
|
134
|
-
if not data.respond_to? :write
|
135
|
-
class << data
|
136
|
-
attr_accessor :writable
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
#{{{ TSV ENTRIES and ENTRY_KEYS
|
143
|
-
|
144
|
-
KEY_PREFIX = "__tsv_hash_"
|
145
|
-
ENTRIES = []
|
146
|
-
ENTRY_KEYS = Set.new
|
147
|
-
NIL_VALUE = "NIL_VALUE"
|
148
|
-
|
149
|
-
def load_entry_value(value)
|
150
|
-
return value unless respond_to? :persistence_path
|
151
|
-
(value.nil? or value == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(value)
|
152
|
-
end
|
153
|
-
|
154
|
-
def dump_entry_value(value)
|
155
|
-
return value unless respond_to? :persistence_path
|
156
|
-
(value.nil? or value == SERIALIZED_NIL) ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)
|
157
|
-
end
|
158
|
-
|
159
|
-
def self.entry(*entries)
|
160
|
-
entries = entries.collect{|entry| entry.to_s}
|
161
|
-
ENTRIES.concat entries
|
162
|
-
entries.each do |entry|
|
163
|
-
key = KEY_PREFIX + entry
|
164
|
-
ENTRY_KEYS << key
|
165
|
-
var_name = ("@" << entry).to_sym
|
166
|
-
|
167
|
-
TSV.send(:define_method, entry) do
|
168
|
-
return instance_variable_get(var_name) if instance_variables.include? var_name
|
169
|
-
svalue = self.send(:[], key, :entry_key)
|
170
|
-
value = load_entry_value(svalue)
|
171
|
-
instance_variable_set(var_name, value)
|
172
|
-
value
|
173
|
-
end
|
174
|
-
|
175
|
-
TSV.send(:define_method, entry + "=") do |value|
|
176
|
-
instance_variable_set(var_name, value)
|
177
|
-
value = value.to_s if Path === value
|
178
|
-
self.send(:[]=, key, dump_entry_value(value), :entry_key)
|
179
|
-
value
|
180
|
-
end
|
181
|
-
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
entry :key_field,
|
186
|
-
:type,
|
187
|
-
:fields,
|
188
|
-
:cast,
|
189
|
-
:identifiers,
|
190
|
-
:namespace,
|
191
|
-
:filename,
|
192
|
-
:serializer
|
193
|
-
|
194
|
-
attr_reader :serializer_module
|
195
|
-
|
196
|
-
def serializer=(serializer)
|
197
|
-
@serializer = serializer
|
198
|
-
self.send(:[]=, KEY_PREFIX + 'serializer', dump_entry_value(serializer), :entry_key)
|
199
|
-
@serializar_module = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
|
200
|
-
end
|
201
|
-
|
202
|
-
|
203
|
-
def serializer_module
|
204
|
-
@serializer_module ||= begin
|
205
|
-
serializer = self.serializer
|
206
|
-
mod = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
|
207
|
-
raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
|
208
|
-
mod
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def empty?
|
213
|
-
length == 0
|
214
|
-
end
|
215
|
-
|
216
|
-
#{{{ GETTERS AND SETTERS
|
217
|
-
|
218
|
-
def prepare_value(key, value)
|
219
|
-
value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
|
220
|
-
|
221
|
-
return value if @unnamed or fields.nil?
|
222
|
-
|
223
|
-
case type
|
224
|
-
when :double, :list
|
225
|
-
setup_array value, fields, key, entity_options, entity_templates
|
226
|
-
when :flat, :single
|
227
|
-
begin value = value.dup; rescue; end if value.frozen?
|
228
|
-
|
229
|
-
value = prepare_entity(value, fields.first, entity_options)
|
230
|
-
end
|
231
|
-
value
|
232
|
-
end
|
233
|
-
|
234
|
-
def [](key, clean = false)
|
235
|
-
value = super(key)
|
236
|
-
return value if clean or value.nil?
|
237
|
-
@serializer_module ||= self.serializer_module
|
238
|
-
|
239
|
-
if MultipleResult === value
|
240
|
-
res = value.collect{|v| prepare_value key, v }
|
241
|
-
res.extend MultipleResult
|
242
|
-
res
|
243
|
-
else
|
244
|
-
prepare_value key, value
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def []=(key, value, clean = false)
|
249
|
-
return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
|
250
|
-
super(key, @serializer_module.dump(value))
|
251
|
-
end
|
252
|
-
|
253
|
-
def zip_new(key, values)
|
254
|
-
values = [values] unless Array === values
|
255
|
-
case type
|
256
|
-
when :double
|
257
|
-
if self.include? key
|
258
|
-
new = []
|
259
|
-
self[key, true].each_with_index do |v,i|
|
260
|
-
_v = values[i]
|
261
|
-
case _v
|
262
|
-
when Array
|
263
|
-
_n = v + _v
|
264
|
-
else
|
265
|
-
_n = v << _v
|
266
|
-
end
|
267
|
-
new << _n
|
268
|
-
end
|
269
|
-
self[key] = new
|
270
|
-
else
|
271
|
-
self[key] = Array === values.first ? values.dup : values.collect{|v| [v] }
|
272
|
-
end
|
273
|
-
when :flat
|
274
|
-
if self.include? key
|
275
|
-
self[key] = (self[key] + values).uniq
|
276
|
-
else
|
277
|
-
self[key] = values
|
278
|
-
end
|
279
|
-
else
|
280
|
-
raise "Cannot zip_new for type: #{type}"
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
def keys
|
285
|
-
keys = super - ENTRY_KEYS.to_a
|
286
|
-
return keys if @unnamed or key_field.nil?
|
287
|
-
|
288
|
-
prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
289
|
-
end
|
290
|
-
|
291
|
-
def values
|
292
|
-
values = chunked_values_at(keys)
|
293
|
-
return values if @unnamed or fields.nil?
|
294
|
-
|
295
|
-
case type
|
296
|
-
when :double, :list
|
297
|
-
values.each{|value| setup_array value, fields, nil, entity_options}
|
298
|
-
when :single
|
299
|
-
values = prepare_entity(values, fields.first, entity_options)
|
300
|
-
when :flat
|
301
|
-
values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
|
302
|
-
end
|
303
|
-
|
304
|
-
values
|
305
|
-
end
|
306
|
-
|
307
|
-
def each
|
308
|
-
fields = self.fields
|
309
|
-
|
310
|
-
serializer_module = self.serializer_module
|
311
|
-
super do |key, value|
|
312
|
-
next if ENTRY_KEYS.include? key
|
313
|
-
|
314
|
-
# TODO Update this to be more efficient
|
315
|
-
value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
316
|
-
|
317
|
-
# Annotated with Entity and NamedArray
|
318
|
-
if not @unnamed
|
319
|
-
if not fields.nil?
|
320
|
-
case type
|
321
|
-
when :double, :list
|
322
|
-
setup_array value, fields, key, entity_options, entity_templates if Array == value
|
323
|
-
when :flat, :single
|
324
|
-
prepare_entity(value, fields.first, entity_options)
|
325
|
-
end
|
326
|
-
end
|
327
|
-
key = prepare_entity(key, key_field, entity_options)
|
328
|
-
end
|
329
|
-
|
330
|
-
yield key, value if block_given?
|
331
|
-
[key, value]
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
|
-
def collect
|
336
|
-
serializer_module = self.serializer_module
|
337
|
-
super do |key, value|
|
338
|
-
next if ENTRY_KEYS.include? key
|
339
|
-
|
340
|
-
# TODO Update this to be more efficient
|
341
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
342
|
-
|
343
|
-
# Annotated with Entity and NamedArray
|
344
|
-
if not @unnamed
|
345
|
-
if not fields.nil?
|
346
|
-
case type
|
347
|
-
when :double, :list
|
348
|
-
setup_array value, fields, key, entity_options if Array === value
|
349
|
-
when :flat, :single
|
350
|
-
value = prepare_entity(value, fields.first, entity_options)
|
351
|
-
end
|
352
|
-
end
|
353
|
-
key = prepare_entity(key, key_field, entity_options)
|
354
|
-
end
|
355
|
-
|
356
|
-
if block_given?
|
357
|
-
yield key, value
|
358
|
-
else
|
359
|
-
[key, value]
|
360
|
-
end
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
def size
|
365
|
-
super - ENTRY_KEYS.select{|k| self.include? k}.length
|
366
|
-
end
|
367
|
-
|
368
|
-
def length
|
369
|
-
keys.length
|
370
|
-
end
|
371
|
-
|
372
|
-
#def _values_at(*keys)
|
373
|
-
# keys.collect do |key|
|
374
|
-
# self[key]
|
375
|
-
# end
|
376
|
-
#end
|
377
|
-
|
378
|
-
def chunked_values_at(keys, max = 5000)
|
379
|
-
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
380
|
-
new = self.values_at(*c)
|
381
|
-
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
382
|
-
acc.concat(new)
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
#{{{ Sorting
|
387
|
-
|
388
|
-
def sort_by(field = nil, just_keys = false, &block)
|
389
|
-
field = :all if field.nil?
|
390
|
-
|
391
|
-
if field == :all
|
392
|
-
elems = collect
|
393
|
-
else
|
394
|
-
elems = []
|
395
|
-
case type
|
396
|
-
when :single
|
397
|
-
through :key, field do |key, field|
|
398
|
-
elems << [key, field]
|
399
|
-
end
|
400
|
-
when :list, :flat
|
401
|
-
through :key, field do |key, fields|
|
402
|
-
elems << [key, fields.first]
|
403
|
-
end
|
404
|
-
when :double
|
405
|
-
through :key, field do |key, fields|
|
406
|
-
elems << [key, fields.first]
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
|
-
if not block_given?
|
412
|
-
if fields == :all
|
413
|
-
if just_keys
|
414
|
-
keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
|
415
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
416
|
-
else
|
417
|
-
elems.sort_by{|key, value| key }
|
418
|
-
end
|
419
|
-
else
|
420
|
-
sorted = elems.sort do |a, b|
|
421
|
-
a_value = a.last
|
422
|
-
b_value = b.last
|
423
|
-
a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
|
424
|
-
b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
|
425
|
-
case
|
426
|
-
when (a_empty and b_empty)
|
427
|
-
0
|
428
|
-
when a_empty
|
429
|
-
-1
|
430
|
-
when b_empty
|
431
|
-
1
|
432
|
-
when Array === a_value
|
433
|
-
if a_value.length == 1 and b_value.length == 1
|
434
|
-
a_value.first <=> b_value.first
|
435
|
-
else
|
436
|
-
a_value.length <=> b_value.length
|
437
|
-
end
|
438
|
-
else
|
439
|
-
a_value <=> b_value
|
440
|
-
end
|
441
|
-
end
|
442
|
-
if just_keys
|
443
|
-
keys = sorted.collect{|key, value| key}
|
444
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
445
|
-
keys
|
446
|
-
else
|
447
|
-
sorted.collect{|key, value| [key, self[key]]}
|
448
|
-
end
|
449
|
-
end
|
450
|
-
else
|
451
|
-
if just_keys
|
452
|
-
keys = elems.sort_by(&block).collect{|key, value| key}
|
453
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
454
|
-
keys
|
455
|
-
else
|
456
|
-
elems.sort_by(&block).collect{|key, value| [key, self[key]]}
|
457
|
-
end
|
458
|
-
end
|
459
|
-
end
|
460
|
-
|
461
|
-
def tsv_sort(&block)
|
462
|
-
collect.sort &block
|
463
|
-
end
|
464
|
-
|
465
|
-
# Starts in page 1
|
466
|
-
def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
|
467
|
-
pstart = psize * (pnum - 1)
|
468
|
-
pend = psize * pnum - 1
|
469
|
-
field = :key if field == "key"
|
470
|
-
keys = sort_by(field || :key, true, &block)
|
471
|
-
keys.reverse! if reverse
|
472
|
-
|
473
|
-
if just_keys
|
474
|
-
keys[pstart..pend]
|
475
|
-
else
|
476
|
-
select :key => keys[pstart..pend]
|
477
|
-
end
|
478
|
-
end
|
479
|
-
|
480
|
-
|
481
|
-
def fields
|
482
|
-
#@fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
|
483
|
-
@fields ||= load_entry_value(self.send(:[], "__tsv_hash_fields", :entry_key))
|
484
|
-
if true or @fields.nil? or @unnamed
|
485
|
-
@fields
|
486
|
-
else
|
487
|
-
@named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
|
488
|
-
end
|
489
|
-
end
|
490
|
-
|
491
|
-
def namespace=(value)
|
492
|
-
self.send(:[]=, "__tsv_hash_namespace", dump_entry_value(value), true)
|
493
|
-
@namespace = value
|
494
|
-
end
|
495
|
-
|
496
|
-
def fields=(value)
|
497
|
-
clean = true
|
498
|
-
self.send(:[]=, "__tsv_hash_fields", dump_entry_value(value), clean)
|
499
|
-
@fields = value
|
500
|
-
@named_fields = nil
|
501
|
-
end
|
502
|
-
|
503
|
-
def self.zip_fields(list, fields = nil)
|
504
|
-
return [] if list.nil? || list.empty?
|
505
|
-
fields ||= list.fields if list.respond_to? :fields
|
506
|
-
zipped = list[0].zip(*list[1..-1])
|
507
|
-
zipped = zipped.collect{|v| setup_array(v, fields)} if fields
|
508
|
-
zipped
|
509
|
-
end
|
510
|
-
|
511
|
-
def identifier_files
|
512
|
-
case
|
513
|
-
when (identifiers and TSV === identifiers)
|
514
|
-
[identifiers]
|
515
|
-
when (identifiers and Array === identifiers)
|
516
|
-
case
|
517
|
-
when (TSV === identifiers.first or identifiers.empty?)
|
518
|
-
identifiers
|
519
|
-
else
|
520
|
-
identifiers.collect{|f| Path === f ? f : Path.setup(f)}
|
521
|
-
end
|
522
|
-
when identifiers
|
523
|
-
[ Path === identifiers ? identifiers : Path.setup(identifiers) ]
|
524
|
-
when Path === filename
|
525
|
-
filename.identifier_files
|
526
|
-
when filename
|
527
|
-
Path.setup(filename.dup).identifier_files
|
528
|
-
else
|
529
|
-
[]
|
530
|
-
end
|
531
|
-
end
|
532
|
-
|
533
|
-
def options
|
534
|
-
options = {}
|
535
|
-
ENTRIES.each do |entry|
|
536
|
-
options[entry.to_sym] = self.send(entry)
|
537
|
-
end
|
538
|
-
IndiferentHash.setup options
|
539
|
-
end
|
540
|
-
|
541
|
-
|
542
|
-
def all_fields
|
543
|
-
return nil if key_field.nil? or fields.nil?
|
544
|
-
[key_field] + fields
|
545
|
-
end
|
546
|
-
|
547
|
-
def values_to_s(values)
|
548
|
-
case values
|
549
|
-
when nil
|
550
|
-
if fields.nil? or fields.empty?
|
551
|
-
"\n"
|
552
|
-
else
|
553
|
-
"\t" << ([""] * fields.length) * "\t" << "\n"
|
554
|
-
end
|
555
|
-
when Array
|
556
|
-
if fields.nil? or fields.empty?
|
557
|
-
"\n"
|
558
|
-
else
|
559
|
-
"\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
|
560
|
-
end
|
561
|
-
else
|
562
|
-
if fields.nil? or fields.empty?
|
563
|
-
"\n"
|
564
|
-
else
|
565
|
-
"\t" << values.to_s << "\n"
|
566
|
-
end
|
567
|
-
end
|
568
|
-
end
|
569
|
-
|
570
|
-
def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
|
571
|
-
unmerge = false unless type == :double
|
572
|
-
|
573
|
-
options = self.options
|
574
|
-
options[:type] = :list if unmerge
|
575
|
-
|
576
|
-
TSV::Dumper.stream options, filename, stream do |dumper|
|
577
|
-
case no_options
|
578
|
-
when FalseClass, nil
|
579
|
-
dumper.init
|
580
|
-
when Hash
|
581
|
-
dumper.init(no_options)
|
582
|
-
end
|
583
|
-
|
584
|
-
begin
|
585
|
-
if keys
|
586
|
-
keys.each do |key|
|
587
|
-
if unmerge
|
588
|
-
value_list = self[key]
|
589
|
-
max = value_list.collect{|v| v.length}.max
|
590
|
-
|
591
|
-
if unmerge == :expand and max > 1
|
592
|
-
value_list = value_list.collect do |values|
|
593
|
-
if values.length == 1
|
594
|
-
[values.first] * max
|
595
|
-
else
|
596
|
-
values
|
597
|
-
end
|
598
|
-
end
|
599
|
-
end
|
600
|
-
|
601
|
-
Misc.zip_fields(value_list).each do |values|
|
602
|
-
dumper.add key, values
|
603
|
-
end
|
604
|
-
else
|
605
|
-
dumper.add key, self[key]
|
606
|
-
end
|
607
|
-
end
|
608
|
-
else
|
609
|
-
with_unnamed do
|
610
|
-
each do |k,value_list|
|
611
|
-
|
612
|
-
if unmerge
|
613
|
-
max = value_list.collect{|v| v.length}.max
|
614
|
-
|
615
|
-
if unmerge == :expand and max > 1
|
616
|
-
value_list = value_list.collect do |values|
|
617
|
-
if values.length == 1
|
618
|
-
[values.first] * max
|
619
|
-
else
|
620
|
-
values
|
621
|
-
end
|
622
|
-
end
|
623
|
-
end
|
624
|
-
|
625
|
-
Misc.zip_fields(value_list).each do |values|
|
626
|
-
dumper.add k, values
|
627
|
-
end
|
628
|
-
else
|
629
|
-
dumper.add k, value_list
|
630
|
-
end
|
631
|
-
end
|
632
|
-
end
|
633
|
-
end
|
634
|
-
dumper.close
|
635
|
-
rescue Exception
|
636
|
-
Log.exception $!
|
637
|
-
raise $!
|
638
|
-
end
|
639
|
-
end
|
640
|
-
end
|
641
|
-
|
642
|
-
def to_s(keys = nil, no_options = false, unmerge = false)
|
643
|
-
if FalseClass === keys or TrueClass === keys or Hash === keys
|
644
|
-
no_options = keys
|
645
|
-
keys = nil
|
646
|
-
end
|
647
|
-
|
648
|
-
if keys == :sort
|
649
|
-
with_unnamed do
|
650
|
-
keys = self.keys.sort
|
651
|
-
end
|
652
|
-
end
|
653
|
-
|
654
|
-
io = dumper_stream(keys, no_options, unmerge, StringIO.new)
|
655
|
-
io.rewind
|
656
|
-
io.read
|
657
|
-
end
|
658
|
-
|
659
|
-
def to_unmerged_s(keys = nil, no_options = false)
|
660
|
-
to_s keys, no_options, true
|
661
|
-
end
|
662
|
-
|
663
|
-
def to_unmerged_expanded_s(keys = nil, no_options = false)
|
664
|
-
to_s keys, no_options, :expand
|
665
|
-
end
|
666
|
-
|
667
|
-
def value_peek
|
668
|
-
peek = {}
|
669
|
-
i = 0
|
670
|
-
begin
|
671
|
-
through do |k,v|
|
672
|
-
peek[k] = v
|
673
|
-
i += 1
|
674
|
-
raise "STOP" if i > 10
|
675
|
-
end
|
676
|
-
rescue
|
677
|
-
end
|
678
|
-
peek
|
679
|
-
end
|
680
|
-
|
681
|
-
def head_str(times=10)
|
682
|
-
stream = dumper_stream
|
683
|
-
str = ""
|
684
|
-
times.times do |i|
|
685
|
-
break if stream.eof?
|
686
|
-
str << stream.gets
|
687
|
-
end
|
688
|
-
str
|
689
|
-
end
|
690
|
-
|
691
|
-
def head_tsv(times = 10)
|
692
|
-
new = self.annotate({})
|
693
|
-
i = 0
|
694
|
-
self.each do |k,v|
|
695
|
-
return new if i == times
|
696
|
-
new[k] = v
|
697
|
-
i += 1
|
698
|
-
end
|
699
|
-
new
|
700
|
-
end
|
701
|
-
|
702
|
-
alias head head_tsv
|
703
|
-
|
704
|
-
def summary
|
705
|
-
|
706
|
-
key = nil
|
707
|
-
values = nil
|
708
|
-
self.each do |k, v|
|
709
|
-
key = k
|
710
|
-
values = v
|
711
|
-
break
|
712
|
-
end
|
713
|
-
|
714
|
-
filename = @filename
|
715
|
-
filename = "No filename" if filename.nil? || filename.empty?
|
716
|
-
filename.find if Path === filename
|
717
|
-
filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path
|
718
|
-
|
719
|
-
with_unnamed do
|
720
|
-
<<-EOF
|
721
|
-
Filename = #{filename}
|
722
|
-
Key field = #{key_field || "*No key field*"}
|
723
|
-
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
|
724
|
-
Type = #{type}
|
725
|
-
Serializer = #{serializer.inspect}
|
726
|
-
Size = #{size}
|
727
|
-
namespace = #{Misc.fingerprint namespace}
|
728
|
-
identifiers = #{Misc.fingerprint identifiers}
|
729
|
-
Example:
|
730
|
-
- #{key} -- #{Misc.fingerprint values }
|
731
|
-
EOF
|
732
|
-
end
|
733
|
-
end
|
734
|
-
|
735
|
-
def to_hash
|
736
|
-
new = self.dup
|
737
|
-
ENTRY_KEYS.each{|entry| new.delete entry}
|
738
|
-
new
|
739
|
-
end
|
740
|
-
|
741
|
-
def unzip(field = 0, merge = false, sep = ":", delete = true)
|
742
|
-
new = {}
|
743
|
-
self.annotate new
|
744
|
-
|
745
|
-
field_pos = self.identify_field field
|
746
|
-
new.with_unnamed do
|
747
|
-
if merge
|
748
|
-
self.through do |key,values|
|
749
|
-
field_values = values[field_pos]
|
750
|
-
if delete
|
751
|
-
values = values.dup
|
752
|
-
values.delete_at(field_pos)
|
753
|
-
end
|
754
|
-
next if field_values.nil?
|
755
|
-
zipped = Misc.zip_fields(values)
|
756
|
-
field_values.zip(zipped).each do |field_value,rest|
|
757
|
-
rest = [nil] * values.length if rest.nil?
|
758
|
-
k = [key,field_value]*sep
|
759
|
-
if new.include? k
|
760
|
-
new[k] = Misc.zip_fields(Misc.zip_fields(new[k]) << rest)
|
761
|
-
else
|
762
|
-
new[k] = rest.nil? ? nil : rest.collect{|v| [v]}
|
763
|
-
end
|
764
|
-
end
|
765
|
-
end
|
766
|
-
new.type = :double
|
767
|
-
else
|
768
|
-
self.through do |key,values|
|
769
|
-
field_values = values[field_pos]
|
770
|
-
values.delete_at(field_pos) if delete
|
771
|
-
next if field_values.nil?
|
772
|
-
zipped = Misc.zip_fields(values)
|
773
|
-
field_values.zip(zipped).each do |field_value,rest|
|
774
|
-
rest = [nil] * values.length if rest.nil?
|
775
|
-
k = [key,field_value]*sep
|
776
|
-
new[k] = rest
|
777
|
-
end
|
778
|
-
end
|
779
|
-
new.type = :list
|
780
|
-
end
|
781
|
-
end
|
782
|
-
|
783
|
-
if self.key_field and self.fields
|
784
|
-
new.key_field = [self.key_field, self.fields[field_pos]] * sep
|
785
|
-
new_fields = self.fields.dup
|
786
|
-
new_fields.delete_at(field_pos) if delete
|
787
|
-
new.fields = new_fields
|
788
|
-
end
|
789
|
-
|
790
|
-
new
|
791
|
-
end
|
792
|
-
|
793
|
-
def zip(merge = false, field = "New Field", sep = ":")
|
794
|
-
new = {}
|
795
|
-
self.annotate new
|
796
|
-
|
797
|
-
new.type = :double if merge
|
798
|
-
|
799
|
-
new.with_unnamed do
|
800
|
-
if merge
|
801
|
-
self.through do |key,values|
|
802
|
-
new_key, new_value = key.split(sep)
|
803
|
-
new_values = values + [[new_value] * values.first.length]
|
804
|
-
if new.include? new_key
|
805
|
-
current = new[new_key]
|
806
|
-
current.each_with_index do |v,i|
|
807
|
-
v.concat(new_values[i])
|
808
|
-
end
|
809
|
-
else
|
810
|
-
new[new_key] = new_values
|
811
|
-
end
|
812
|
-
end
|
813
|
-
else
|
814
|
-
self.through do |key,values|
|
815
|
-
new_key, new_value = key.split(sep)
|
816
|
-
new_values = values + [new_value]
|
817
|
-
new[new_key] = new_values
|
818
|
-
end
|
819
|
-
end
|
820
|
-
end
|
821
|
-
|
822
|
-
if self.key_field and self.fields
|
823
|
-
new.key_field = self.key_field.partition(sep).first
|
824
|
-
new.fields = new.fields + [field]
|
825
|
-
end
|
826
|
-
|
827
|
-
new
|
828
|
-
end
|
829
|
-
|
830
|
-
def remove_duplicates(pivot = 0)
|
831
|
-
new = self.annotate({})
|
832
|
-
self.through do |k,values|
|
833
|
-
new[k] = Misc.zip_fields(Misc.zip_fields(values).uniq)
|
834
|
-
end
|
835
|
-
new
|
836
|
-
end
|
837
|
-
end
|
838
|
-
|
1
|
+
require_relative '../refactor'
|
2
|
+
Rbbt.require_instead 'scout/tsv'
|