rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -1,838 +1,2 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
require 'rbbt/tsv/dumper'
|
4
|
-
require 'set'
|
5
|
-
|
6
|
-
module TSV
|
7
|
-
|
8
|
-
TSV_SERIALIZER = YAML
|
9
|
-
SERIALIZED_NIL = TSV_SERIALIZER.dump nil
|
10
|
-
|
11
|
-
attr_accessor :unnamed, :serializer_module, :entity_options, :entity_templates
|
12
|
-
|
13
|
-
def info
|
14
|
-
{:key_field => key_field, :fields => fields.dup, :namespace => namespace, :entity_options => entity_options, :type => type, :filename => filename, :identifiers => identifiers, :unnamed => unnamed, :cast => cast}.delete_if{|k,v| v.nil? }
|
15
|
-
end
|
16
|
-
|
17
|
-
def annotate(tsv)
|
18
|
-
TSV.setup(tsv, info)
|
19
|
-
tsv.entity_options = self.entity_options
|
20
|
-
tsv.entity_templates = self.entity_templates
|
21
|
-
tsv
|
22
|
-
end
|
23
|
-
|
24
|
-
def entity_options
|
25
|
-
@entity_options ||= nil
|
26
|
-
if @entity_options.nil?
|
27
|
-
@entity_options = namespace ? {:namespace => namespace, :organism => namespace} : {}
|
28
|
-
@entity_templates = nil
|
29
|
-
end
|
30
|
-
@entity_options
|
31
|
-
end
|
32
|
-
|
33
|
-
def entity_options=(options)
|
34
|
-
@entity_options = options || {}
|
35
|
-
if namespace
|
36
|
-
@entity_options[:organism] ||= namespace
|
37
|
-
@entity_options[:namespace] ||= namespace
|
38
|
-
end
|
39
|
-
@entity_templates = nil
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
def entity_templates
|
44
|
-
@entity_templates ||= {}
|
45
|
-
end
|
46
|
-
|
47
|
-
def prepare_entity(entity, field, options = {})
|
48
|
-
return entity if entity.nil?
|
49
|
-
return entity unless defined? Entity
|
50
|
-
entity = entity if options.delete :dup_array
|
51
|
-
if (template = entity_templates[field]) and template.respond_to?(:annotate)
|
52
|
-
if String === entity or Array === entity
|
53
|
-
entity = entity.dup if entity.frozen?
|
54
|
-
template.annotate entity
|
55
|
-
entity.extend AnnotatedArray if Array === entity
|
56
|
-
end
|
57
|
-
entity
|
58
|
-
else
|
59
|
-
if entity_templates.include? field
|
60
|
-
entity
|
61
|
-
else
|
62
|
-
template = Misc.prepare_entity("TEMPLATE", field, options)
|
63
|
-
if template.respond_to?(:annotate)
|
64
|
-
entity_templates[field] = template
|
65
|
-
if String === entity or Array === entity
|
66
|
-
entity = entity.dup if entity.frozen?
|
67
|
-
template.annotate entity
|
68
|
-
entity.extend AnnotatedArray if Array === entity
|
69
|
-
end
|
70
|
-
entity
|
71
|
-
else
|
72
|
-
entity_templates[field] = nil
|
73
|
-
entity
|
74
|
-
end
|
75
|
-
end
|
76
|
-
end
|
77
|
-
end
|
78
|
-
|
79
|
-
def setup_array(*args)
|
80
|
-
res = NamedArray.setup(*args)
|
81
|
-
return res if res.nil?
|
82
|
-
res.instance_variable_set(:@entity_templates, entity_templates)
|
83
|
-
res
|
84
|
-
end
|
85
|
-
|
86
|
-
def with_unnamed
|
87
|
-
saved_unnamed = @unnamed
|
88
|
-
@unnamed = true
|
89
|
-
res = yield
|
90
|
-
@unnamed = saved_unnamed
|
91
|
-
res
|
92
|
-
end
|
93
|
-
|
94
|
-
def with_monitor(value = true)
|
95
|
-
saved_monitor = @monitor
|
96
|
-
@monitor = value.nil? ? false : value
|
97
|
-
res = yield
|
98
|
-
@monitor = saved_monitor
|
99
|
-
res
|
100
|
-
end
|
101
|
-
|
102
|
-
def close
|
103
|
-
begin
|
104
|
-
super
|
105
|
-
rescue Exception
|
106
|
-
self
|
107
|
-
end
|
108
|
-
end
|
109
|
-
|
110
|
-
def read(force = false)
|
111
|
-
begin
|
112
|
-
super
|
113
|
-
rescue Exception
|
114
|
-
Log.exception $!
|
115
|
-
@writable = false
|
116
|
-
self
|
117
|
-
end
|
118
|
-
end
|
119
|
-
|
120
|
-
def write(force = false)
|
121
|
-
begin
|
122
|
-
super
|
123
|
-
rescue Exception
|
124
|
-
@writable = true
|
125
|
-
self
|
126
|
-
end
|
127
|
-
end
|
128
|
-
|
129
|
-
def write?
|
130
|
-
@writable ||= false
|
131
|
-
end
|
132
|
-
|
133
|
-
def self._extended(data)
|
134
|
-
if not data.respond_to? :write
|
135
|
-
class << data
|
136
|
-
attr_accessor :writable
|
137
|
-
|
138
|
-
end
|
139
|
-
end
|
140
|
-
end
|
141
|
-
|
142
|
-
#{{{ TSV ENTRIES and ENTRY_KEYS
|
143
|
-
|
144
|
-
KEY_PREFIX = "__tsv_hash_"
|
145
|
-
ENTRIES = []
|
146
|
-
ENTRY_KEYS = Set.new
|
147
|
-
NIL_VALUE = "NIL_VALUE"
|
148
|
-
|
149
|
-
def load_entry_value(value)
|
150
|
-
return value unless respond_to? :persistence_path
|
151
|
-
(value.nil? or value == SERIALIZED_NIL) ? nil : TSV_SERIALIZER.load(value)
|
152
|
-
end
|
153
|
-
|
154
|
-
def dump_entry_value(value)
|
155
|
-
return value unless respond_to? :persistence_path
|
156
|
-
(value.nil? or value == SERIALIZED_NIL) ? SERIALIZED_NIL : TSV_SERIALIZER.dump(value)
|
157
|
-
end
|
158
|
-
|
159
|
-
def self.entry(*entries)
|
160
|
-
entries = entries.collect{|entry| entry.to_s}
|
161
|
-
ENTRIES.concat entries
|
162
|
-
entries.each do |entry|
|
163
|
-
key = KEY_PREFIX + entry
|
164
|
-
ENTRY_KEYS << key
|
165
|
-
var_name = ("@" << entry).to_sym
|
166
|
-
|
167
|
-
TSV.send(:define_method, entry) do
|
168
|
-
return instance_variable_get(var_name) if instance_variables.include? var_name
|
169
|
-
svalue = self.send(:[], key, :entry_key)
|
170
|
-
value = load_entry_value(svalue)
|
171
|
-
instance_variable_set(var_name, value)
|
172
|
-
value
|
173
|
-
end
|
174
|
-
|
175
|
-
TSV.send(:define_method, entry + "=") do |value|
|
176
|
-
instance_variable_set(var_name, value)
|
177
|
-
value = value.to_s if Path === value
|
178
|
-
self.send(:[]=, key, dump_entry_value(value), :entry_key)
|
179
|
-
value
|
180
|
-
end
|
181
|
-
|
182
|
-
end
|
183
|
-
end
|
184
|
-
|
185
|
-
entry :key_field,
|
186
|
-
:type,
|
187
|
-
:fields,
|
188
|
-
:cast,
|
189
|
-
:identifiers,
|
190
|
-
:namespace,
|
191
|
-
:filename,
|
192
|
-
:serializer
|
193
|
-
|
194
|
-
attr_reader :serializer_module
|
195
|
-
|
196
|
-
def serializer=(serializer)
|
197
|
-
@serializer = serializer
|
198
|
-
self.send(:[]=, KEY_PREFIX + 'serializer', dump_entry_value(serializer), :entry_key)
|
199
|
-
@serializar_module = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
|
200
|
-
end
|
201
|
-
|
202
|
-
|
203
|
-
def serializer_module
|
204
|
-
@serializer_module ||= begin
|
205
|
-
serializer = self.serializer
|
206
|
-
mod = serializer.nil? ? TSV::CleanSerializer : (Module === serializer ? serializer : SERIALIZER_ALIAS[serializer.to_sym])
|
207
|
-
raise "No serializer_module for: #{ serializer.inspect }" if mod.nil?
|
208
|
-
mod
|
209
|
-
end
|
210
|
-
end
|
211
|
-
|
212
|
-
def empty?
|
213
|
-
length == 0
|
214
|
-
end
|
215
|
-
|
216
|
-
#{{{ GETTERS AND SETTERS
|
217
|
-
|
218
|
-
def prepare_value(key, value)
|
219
|
-
value = @serializer_module.load(value) if @serializer_module and not TSV::CleanSerializer == @serializer_module
|
220
|
-
|
221
|
-
return value if @unnamed or fields.nil?
|
222
|
-
|
223
|
-
case type
|
224
|
-
when :double, :list
|
225
|
-
setup_array value, fields, key, entity_options, entity_templates
|
226
|
-
when :flat, :single
|
227
|
-
begin value = value.dup; rescue; end if value.frozen?
|
228
|
-
|
229
|
-
value = prepare_entity(value, fields.first, entity_options)
|
230
|
-
end
|
231
|
-
value
|
232
|
-
end
|
233
|
-
|
234
|
-
def [](key, clean = false)
|
235
|
-
value = super(key)
|
236
|
-
return value if clean or value.nil?
|
237
|
-
@serializer_module ||= self.serializer_module
|
238
|
-
|
239
|
-
if MultipleResult === value
|
240
|
-
res = value.collect{|v| prepare_value key, v }
|
241
|
-
res.extend MultipleResult
|
242
|
-
res
|
243
|
-
else
|
244
|
-
prepare_value key, value
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def []=(key, value, clean = false)
|
249
|
-
return super(key, value) if clean || value.nil? || TSV::CleanSerializer == self.serializer_module
|
250
|
-
super(key, @serializer_module.dump(value))
|
251
|
-
end
|
252
|
-
|
253
|
-
def zip_new(key, values)
|
254
|
-
values = [values] unless Array === values
|
255
|
-
case type
|
256
|
-
when :double
|
257
|
-
if self.include? key
|
258
|
-
new = []
|
259
|
-
self[key, true].each_with_index do |v,i|
|
260
|
-
_v = values[i]
|
261
|
-
case _v
|
262
|
-
when Array
|
263
|
-
_n = v + _v
|
264
|
-
else
|
265
|
-
_n = v << _v
|
266
|
-
end
|
267
|
-
new << _n
|
268
|
-
end
|
269
|
-
self[key] = new
|
270
|
-
else
|
271
|
-
self[key] = Array === values.first ? values.dup : values.collect{|v| [v] }
|
272
|
-
end
|
273
|
-
when :flat
|
274
|
-
if self.include? key
|
275
|
-
self[key] = (self[key] + values).uniq
|
276
|
-
else
|
277
|
-
self[key] = values
|
278
|
-
end
|
279
|
-
else
|
280
|
-
raise "Cannot zip_new for type: #{type}"
|
281
|
-
end
|
282
|
-
end
|
283
|
-
|
284
|
-
def keys
|
285
|
-
keys = super - ENTRY_KEYS.to_a
|
286
|
-
return keys if @unnamed or key_field.nil?
|
287
|
-
|
288
|
-
prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
289
|
-
end
|
290
|
-
|
291
|
-
def values
|
292
|
-
values = chunked_values_at(keys)
|
293
|
-
return values if @unnamed or fields.nil?
|
294
|
-
|
295
|
-
case type
|
296
|
-
when :double, :list
|
297
|
-
values.each{|value| setup_array value, fields, nil, entity_options}
|
298
|
-
when :single
|
299
|
-
values = prepare_entity(values, fields.first, entity_options)
|
300
|
-
when :flat
|
301
|
-
values = values.collect{|v| prepare_entity(v, fields.first, entity_options)}
|
302
|
-
end
|
303
|
-
|
304
|
-
values
|
305
|
-
end
|
306
|
-
|
307
|
-
def each
|
308
|
-
fields = self.fields
|
309
|
-
|
310
|
-
serializer_module = self.serializer_module
|
311
|
-
super do |key, value|
|
312
|
-
next if ENTRY_KEYS.include? key
|
313
|
-
|
314
|
-
# TODO Update this to be more efficient
|
315
|
-
value = serializer_module.load(value) unless value.nil? or serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
316
|
-
|
317
|
-
# Annotated with Entity and NamedArray
|
318
|
-
if not @unnamed
|
319
|
-
if not fields.nil?
|
320
|
-
case type
|
321
|
-
when :double, :list
|
322
|
-
setup_array value, fields, key, entity_options, entity_templates if Array == value
|
323
|
-
when :flat, :single
|
324
|
-
prepare_entity(value, fields.first, entity_options)
|
325
|
-
end
|
326
|
-
end
|
327
|
-
key = prepare_entity(key, key_field, entity_options)
|
328
|
-
end
|
329
|
-
|
330
|
-
yield key, value if block_given?
|
331
|
-
[key, value]
|
332
|
-
end
|
333
|
-
end
|
334
|
-
|
335
|
-
def collect
|
336
|
-
serializer_module = self.serializer_module
|
337
|
-
super do |key, value|
|
338
|
-
next if ENTRY_KEYS.include? key
|
339
|
-
|
340
|
-
# TODO Update this to be more efficient
|
341
|
-
value = serializer_module.load(value) unless serializer_module.nil? or TSV::CleanSerializer == serializer_module
|
342
|
-
|
343
|
-
# Annotated with Entity and NamedArray
|
344
|
-
if not @unnamed
|
345
|
-
if not fields.nil?
|
346
|
-
case type
|
347
|
-
when :double, :list
|
348
|
-
setup_array value, fields, key, entity_options if Array === value
|
349
|
-
when :flat, :single
|
350
|
-
value = prepare_entity(value, fields.first, entity_options)
|
351
|
-
end
|
352
|
-
end
|
353
|
-
key = prepare_entity(key, key_field, entity_options)
|
354
|
-
end
|
355
|
-
|
356
|
-
if block_given?
|
357
|
-
yield key, value
|
358
|
-
else
|
359
|
-
[key, value]
|
360
|
-
end
|
361
|
-
end
|
362
|
-
end
|
363
|
-
|
364
|
-
def size
|
365
|
-
super - ENTRY_KEYS.select{|k| self.include? k}.length
|
366
|
-
end
|
367
|
-
|
368
|
-
def length
|
369
|
-
keys.length
|
370
|
-
end
|
371
|
-
|
372
|
-
#def _values_at(*keys)
|
373
|
-
# keys.collect do |key|
|
374
|
-
# self[key]
|
375
|
-
# end
|
376
|
-
#end
|
377
|
-
|
378
|
-
def chunked_values_at(keys, max = 5000)
|
379
|
-
Misc.ordered_divide(keys, max).inject([]) do |acc,c|
|
380
|
-
new = self.values_at(*c)
|
381
|
-
new.annotate acc if new.respond_to? :annotate and acc.empty?
|
382
|
-
acc.concat(new)
|
383
|
-
end
|
384
|
-
end
|
385
|
-
|
386
|
-
#{{{ Sorting
|
387
|
-
|
388
|
-
def sort_by(field = nil, just_keys = false, &block)
|
389
|
-
field = :all if field.nil?
|
390
|
-
|
391
|
-
if field == :all
|
392
|
-
elems = collect
|
393
|
-
else
|
394
|
-
elems = []
|
395
|
-
case type
|
396
|
-
when :single
|
397
|
-
through :key, field do |key, field|
|
398
|
-
elems << [key, field]
|
399
|
-
end
|
400
|
-
when :list, :flat
|
401
|
-
through :key, field do |key, fields|
|
402
|
-
elems << [key, fields.first]
|
403
|
-
end
|
404
|
-
when :double
|
405
|
-
through :key, field do |key, fields|
|
406
|
-
elems << [key, fields.first]
|
407
|
-
end
|
408
|
-
end
|
409
|
-
end
|
410
|
-
|
411
|
-
if not block_given?
|
412
|
-
if fields == :all
|
413
|
-
if just_keys
|
414
|
-
keys = elems.sort_by{|key, value| key }.collect{|key, values| key}
|
415
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true))
|
416
|
-
else
|
417
|
-
elems.sort_by{|key, value| key }
|
418
|
-
end
|
419
|
-
else
|
420
|
-
sorted = elems.sort do |a, b|
|
421
|
-
a_value = a.last
|
422
|
-
b_value = b.last
|
423
|
-
a_empty = a_value.nil? or (a_value.respond_to?(:empty?) and a_value.empty?)
|
424
|
-
b_empty = b_value.nil? or (b_value.respond_to?(:empty?) and b_value.empty?)
|
425
|
-
case
|
426
|
-
when (a_empty and b_empty)
|
427
|
-
0
|
428
|
-
when a_empty
|
429
|
-
-1
|
430
|
-
when b_empty
|
431
|
-
1
|
432
|
-
when Array === a_value
|
433
|
-
if a_value.length == 1 and b_value.length == 1
|
434
|
-
a_value.first <=> b_value.first
|
435
|
-
else
|
436
|
-
a_value.length <=> b_value.length
|
437
|
-
end
|
438
|
-
else
|
439
|
-
a_value <=> b_value
|
440
|
-
end
|
441
|
-
end
|
442
|
-
if just_keys
|
443
|
-
keys = sorted.collect{|key, value| key}
|
444
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
445
|
-
keys
|
446
|
-
else
|
447
|
-
sorted.collect{|key, value| [key, self[key]]}
|
448
|
-
end
|
449
|
-
end
|
450
|
-
else
|
451
|
-
if just_keys
|
452
|
-
keys = elems.sort_by(&block).collect{|key, value| key}
|
453
|
-
keys = prepare_entity(keys, key_field, entity_options.merge(:dup_array => true)) unless @unnamed
|
454
|
-
keys
|
455
|
-
else
|
456
|
-
elems.sort_by(&block).collect{|key, value| [key, self[key]]}
|
457
|
-
end
|
458
|
-
end
|
459
|
-
end
|
460
|
-
|
461
|
-
def tsv_sort(&block)
|
462
|
-
collect.sort &block
|
463
|
-
end
|
464
|
-
|
465
|
-
# Starts in page 1
|
466
|
-
def page(pnum, psize, field = nil, just_keys = false, reverse = false, &block)
|
467
|
-
pstart = psize * (pnum - 1)
|
468
|
-
pend = psize * pnum - 1
|
469
|
-
field = :key if field == "key"
|
470
|
-
keys = sort_by(field || :key, true, &block)
|
471
|
-
keys.reverse! if reverse
|
472
|
-
|
473
|
-
if just_keys
|
474
|
-
keys[pstart..pend]
|
475
|
-
else
|
476
|
-
select :key => keys[pstart..pend]
|
477
|
-
end
|
478
|
-
end
|
479
|
-
|
480
|
-
|
481
|
-
def fields
|
482
|
-
#@fields ||= TSV_SERIALIZER.load(self.send(:[], "__tsv_hash_fields", :entry_key) || SERIALIZED_NIL)
|
483
|
-
@fields ||= load_entry_value(self.send(:[], "__tsv_hash_fields", :entry_key))
|
484
|
-
if true or @fields.nil? or @unnamed
|
485
|
-
@fields
|
486
|
-
else
|
487
|
-
@named_fields ||= NamedArray.setup @fields, @fields, nil, entity_options, entity_templates
|
488
|
-
end
|
489
|
-
end
|
490
|
-
|
491
|
-
def namespace=(value)
|
492
|
-
self.send(:[]=, "__tsv_hash_namespace", dump_entry_value(value), true)
|
493
|
-
@namespace = value
|
494
|
-
end
|
495
|
-
|
496
|
-
def fields=(value)
|
497
|
-
clean = true
|
498
|
-
self.send(:[]=, "__tsv_hash_fields", dump_entry_value(value), clean)
|
499
|
-
@fields = value
|
500
|
-
@named_fields = nil
|
501
|
-
end
|
502
|
-
|
503
|
-
def self.zip_fields(list, fields = nil)
|
504
|
-
return [] if list.nil? || list.empty?
|
505
|
-
fields ||= list.fields if list.respond_to? :fields
|
506
|
-
zipped = list[0].zip(*list[1..-1])
|
507
|
-
zipped = zipped.collect{|v| setup_array(v, fields)} if fields
|
508
|
-
zipped
|
509
|
-
end
|
510
|
-
|
511
|
-
def identifier_files
|
512
|
-
case
|
513
|
-
when (identifiers and TSV === identifiers)
|
514
|
-
[identifiers]
|
515
|
-
when (identifiers and Array === identifiers)
|
516
|
-
case
|
517
|
-
when (TSV === identifiers.first or identifiers.empty?)
|
518
|
-
identifiers
|
519
|
-
else
|
520
|
-
identifiers.collect{|f| Path === f ? f : Path.setup(f)}
|
521
|
-
end
|
522
|
-
when identifiers
|
523
|
-
[ Path === identifiers ? identifiers : Path.setup(identifiers) ]
|
524
|
-
when Path === filename
|
525
|
-
filename.identifier_files
|
526
|
-
when filename
|
527
|
-
Path.setup(filename.dup).identifier_files
|
528
|
-
else
|
529
|
-
[]
|
530
|
-
end
|
531
|
-
end
|
532
|
-
|
533
|
-
def options
|
534
|
-
options = {}
|
535
|
-
ENTRIES.each do |entry|
|
536
|
-
options[entry.to_sym] = self.send(entry)
|
537
|
-
end
|
538
|
-
IndiferentHash.setup options
|
539
|
-
end
|
540
|
-
|
541
|
-
|
542
|
-
def all_fields
|
543
|
-
return nil if key_field.nil? or fields.nil?
|
544
|
-
[key_field] + fields
|
545
|
-
end
|
546
|
-
|
547
|
-
def values_to_s(values)
|
548
|
-
case values
|
549
|
-
when nil
|
550
|
-
if fields.nil? or fields.empty?
|
551
|
-
"\n"
|
552
|
-
else
|
553
|
-
"\t" << ([""] * fields.length) * "\t" << "\n"
|
554
|
-
end
|
555
|
-
when Array
|
556
|
-
if fields.nil? or fields.empty?
|
557
|
-
"\n"
|
558
|
-
else
|
559
|
-
"\t" << values.collect{|v| Array === v ? v * "|" : v} * "\t" << "\n"
|
560
|
-
end
|
561
|
-
else
|
562
|
-
if fields.nil? or fields.empty?
|
563
|
-
"\n"
|
564
|
-
else
|
565
|
-
"\t" << values.to_s << "\n"
|
566
|
-
end
|
567
|
-
end
|
568
|
-
end
|
569
|
-
|
570
|
-
def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
|
571
|
-
unmerge = false unless type == :double
|
572
|
-
|
573
|
-
options = self.options
|
574
|
-
options[:type] = :list if unmerge
|
575
|
-
|
576
|
-
TSV::Dumper.stream options, filename, stream do |dumper|
|
577
|
-
case no_options
|
578
|
-
when FalseClass, nil
|
579
|
-
dumper.init
|
580
|
-
when Hash
|
581
|
-
dumper.init(no_options)
|
582
|
-
end
|
583
|
-
|
584
|
-
begin
|
585
|
-
if keys
|
586
|
-
keys.each do |key|
|
587
|
-
if unmerge
|
588
|
-
value_list = self[key]
|
589
|
-
max = value_list.collect{|v| v.length}.max
|
590
|
-
|
591
|
-
if unmerge == :expand and max > 1
|
592
|
-
value_list = value_list.collect do |values|
|
593
|
-
if values.length == 1
|
594
|
-
[values.first] * max
|
595
|
-
else
|
596
|
-
values
|
597
|
-
end
|
598
|
-
end
|
599
|
-
end
|
600
|
-
|
601
|
-
Misc.zip_fields(value_list).each do |values|
|
602
|
-
dumper.add key, values
|
603
|
-
end
|
604
|
-
else
|
605
|
-
dumper.add key, self[key]
|
606
|
-
end
|
607
|
-
end
|
608
|
-
else
|
609
|
-
with_unnamed do
|
610
|
-
each do |k,value_list|
|
611
|
-
|
612
|
-
if unmerge
|
613
|
-
max = value_list.collect{|v| v.length}.max
|
614
|
-
|
615
|
-
if unmerge == :expand and max > 1
|
616
|
-
value_list = value_list.collect do |values|
|
617
|
-
if values.length == 1
|
618
|
-
[values.first] * max
|
619
|
-
else
|
620
|
-
values
|
621
|
-
end
|
622
|
-
end
|
623
|
-
end
|
624
|
-
|
625
|
-
Misc.zip_fields(value_list).each do |values|
|
626
|
-
dumper.add k, values
|
627
|
-
end
|
628
|
-
else
|
629
|
-
dumper.add k, value_list
|
630
|
-
end
|
631
|
-
end
|
632
|
-
end
|
633
|
-
end
|
634
|
-
dumper.close
|
635
|
-
rescue Exception
|
636
|
-
Log.exception $!
|
637
|
-
raise $!
|
638
|
-
end
|
639
|
-
end
|
640
|
-
end
|
641
|
-
|
642
|
-
def to_s(keys = nil, no_options = false, unmerge = false)
|
643
|
-
if FalseClass === keys or TrueClass === keys or Hash === keys
|
644
|
-
no_options = keys
|
645
|
-
keys = nil
|
646
|
-
end
|
647
|
-
|
648
|
-
if keys == :sort
|
649
|
-
with_unnamed do
|
650
|
-
keys = self.keys.sort
|
651
|
-
end
|
652
|
-
end
|
653
|
-
|
654
|
-
io = dumper_stream(keys, no_options, unmerge, StringIO.new)
|
655
|
-
io.rewind
|
656
|
-
io.read
|
657
|
-
end
|
658
|
-
|
659
|
-
def to_unmerged_s(keys = nil, no_options = false)
|
660
|
-
to_s keys, no_options, true
|
661
|
-
end
|
662
|
-
|
663
|
-
def to_unmerged_expanded_s(keys = nil, no_options = false)
|
664
|
-
to_s keys, no_options, :expand
|
665
|
-
end
|
666
|
-
|
667
|
-
def value_peek
|
668
|
-
peek = {}
|
669
|
-
i = 0
|
670
|
-
begin
|
671
|
-
through do |k,v|
|
672
|
-
peek[k] = v
|
673
|
-
i += 1
|
674
|
-
raise "STOP" if i > 10
|
675
|
-
end
|
676
|
-
rescue
|
677
|
-
end
|
678
|
-
peek
|
679
|
-
end
|
680
|
-
|
681
|
-
def head_str(times=10)
|
682
|
-
stream = dumper_stream
|
683
|
-
str = ""
|
684
|
-
times.times do |i|
|
685
|
-
break if stream.eof?
|
686
|
-
str << stream.gets
|
687
|
-
end
|
688
|
-
str
|
689
|
-
end
|
690
|
-
|
691
|
-
def head_tsv(times = 10)
|
692
|
-
new = self.annotate({})
|
693
|
-
i = 0
|
694
|
-
self.each do |k,v|
|
695
|
-
return new if i == times
|
696
|
-
new[k] = v
|
697
|
-
i += 1
|
698
|
-
end
|
699
|
-
new
|
700
|
-
end
|
701
|
-
|
702
|
-
alias head head_tsv
|
703
|
-
|
704
|
-
def summary
|
705
|
-
|
706
|
-
key = nil
|
707
|
-
values = nil
|
708
|
-
self.each do |k, v|
|
709
|
-
key = k
|
710
|
-
values = v
|
711
|
-
break
|
712
|
-
end
|
713
|
-
|
714
|
-
filename = @filename
|
715
|
-
filename = "No filename" if filename.nil? || filename.empty?
|
716
|
-
filename.find if Path === filename
|
717
|
-
filename = File.basename(filename) + " [" + File.basename(persistence_path) + "]" if respond_to?(:persistence_path) and persistence_path
|
718
|
-
|
719
|
-
with_unnamed do
|
720
|
-
<<-EOF
|
721
|
-
Filename = #{filename}
|
722
|
-
Key field = #{key_field || "*No key field*"}
|
723
|
-
Fields = #{fields ? Misc.fingerprint(fields) : "*No field info*"}
|
724
|
-
Type = #{type}
|
725
|
-
Serializer = #{serializer.inspect}
|
726
|
-
Size = #{size}
|
727
|
-
namespace = #{Misc.fingerprint namespace}
|
728
|
-
identifiers = #{Misc.fingerprint identifiers}
|
729
|
-
Example:
|
730
|
-
- #{key} -- #{Misc.fingerprint values }
|
731
|
-
EOF
|
732
|
-
end
|
733
|
-
end
|
734
|
-
|
735
|
-
def to_hash
|
736
|
-
new = self.dup
|
737
|
-
ENTRY_KEYS.each{|entry| new.delete entry}
|
738
|
-
new
|
739
|
-
end
|
740
|
-
|
741
|
-
def unzip(field = 0, merge = false, sep = ":", delete = true)
|
742
|
-
new = {}
|
743
|
-
self.annotate new
|
744
|
-
|
745
|
-
field_pos = self.identify_field field
|
746
|
-
new.with_unnamed do
|
747
|
-
if merge
|
748
|
-
self.through do |key,values|
|
749
|
-
field_values = values[field_pos]
|
750
|
-
if delete
|
751
|
-
values = values.dup
|
752
|
-
values.delete_at(field_pos)
|
753
|
-
end
|
754
|
-
next if field_values.nil?
|
755
|
-
zipped = Misc.zip_fields(values)
|
756
|
-
field_values.zip(zipped).each do |field_value,rest|
|
757
|
-
rest = [nil] * values.length if rest.nil?
|
758
|
-
k = [key,field_value]*sep
|
759
|
-
if new.include? k
|
760
|
-
new[k] = Misc.zip_fields(Misc.zip_fields(new[k]) << rest)
|
761
|
-
else
|
762
|
-
new[k] = rest.nil? ? nil : rest.collect{|v| [v]}
|
763
|
-
end
|
764
|
-
end
|
765
|
-
end
|
766
|
-
new.type = :double
|
767
|
-
else
|
768
|
-
self.through do |key,values|
|
769
|
-
field_values = values[field_pos]
|
770
|
-
values.delete_at(field_pos) if delete
|
771
|
-
next if field_values.nil?
|
772
|
-
zipped = Misc.zip_fields(values)
|
773
|
-
field_values.zip(zipped).each do |field_value,rest|
|
774
|
-
rest = [nil] * values.length if rest.nil?
|
775
|
-
k = [key,field_value]*sep
|
776
|
-
new[k] = rest
|
777
|
-
end
|
778
|
-
end
|
779
|
-
new.type = :list
|
780
|
-
end
|
781
|
-
end
|
782
|
-
|
783
|
-
if self.key_field and self.fields
|
784
|
-
new.key_field = [self.key_field, self.fields[field_pos]] * sep
|
785
|
-
new_fields = self.fields.dup
|
786
|
-
new_fields.delete_at(field_pos) if delete
|
787
|
-
new.fields = new_fields
|
788
|
-
end
|
789
|
-
|
790
|
-
new
|
791
|
-
end
|
792
|
-
|
793
|
-
def zip(merge = false, field = "New Field", sep = ":")
|
794
|
-
new = {}
|
795
|
-
self.annotate new
|
796
|
-
|
797
|
-
new.type = :double if merge
|
798
|
-
|
799
|
-
new.with_unnamed do
|
800
|
-
if merge
|
801
|
-
self.through do |key,values|
|
802
|
-
new_key, new_value = key.split(sep)
|
803
|
-
new_values = values + [[new_value] * values.first.length]
|
804
|
-
if new.include? new_key
|
805
|
-
current = new[new_key]
|
806
|
-
current.each_with_index do |v,i|
|
807
|
-
v.concat(new_values[i])
|
808
|
-
end
|
809
|
-
else
|
810
|
-
new[new_key] = new_values
|
811
|
-
end
|
812
|
-
end
|
813
|
-
else
|
814
|
-
self.through do |key,values|
|
815
|
-
new_key, new_value = key.split(sep)
|
816
|
-
new_values = values + [new_value]
|
817
|
-
new[new_key] = new_values
|
818
|
-
end
|
819
|
-
end
|
820
|
-
end
|
821
|
-
|
822
|
-
if self.key_field and self.fields
|
823
|
-
new.key_field = self.key_field.partition(sep).first
|
824
|
-
new.fields = new.fields + [field]
|
825
|
-
end
|
826
|
-
|
827
|
-
new
|
828
|
-
end
|
829
|
-
|
830
|
-
def remove_duplicates(pivot = 0)
|
831
|
-
new = self.annotate({})
|
832
|
-
self.through do |k,values|
|
833
|
-
new[k] = Misc.zip_fields(Misc.zip_fields(values).uniq)
|
834
|
-
end
|
835
|
-
new
|
836
|
-
end
|
837
|
-
end
|
838
|
-
|
1
|
+
require_relative '../refactor'
|
2
|
+
Rbbt.require_instead 'scout/tsv'
|