rbbt-util 5.42.0 → 5.44.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/rbbt/annotations/util.rb +1 -1
- data/lib/rbbt/association/database.rb +2 -1
- data/lib/rbbt/association/index.rb +1 -0
- data/lib/rbbt/association/util.rb +14 -4
- data/lib/rbbt/entity.rb +6 -5
- data/lib/rbbt/hpc/batch.rb +19 -17
- data/lib/rbbt/hpc/orchestrate/batches.rb +1 -1
- data/lib/rbbt/hpc/orchestrate/chains.rb +3 -4
- data/lib/rbbt/knowledge_base/traverse.rb +24 -16
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +1 -1
- data/lib/rbbt/resource/path.rb +7 -2
- data/lib/rbbt/resource.rb +1 -1
- data/lib/rbbt/tsv/accessor.rb +6 -11
- data/lib/rbbt/tsv/dumper.rb +21 -10
- data/lib/rbbt/tsv/index.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +44 -0
- data/lib/rbbt/tsv/parser.rb +7 -6
- data/lib/rbbt/tsv/serializers.rb +1 -1
- data/lib/rbbt/tsv/util.rb +12 -0
- data/lib/rbbt/util/log/progress.rb +1 -1
- data/lib/rbbt/util/misc/bgzf.rb +5 -4
- data/lib/rbbt/util/misc/development.rb +4 -4
- data/lib/rbbt/util/misc/math.rb +0 -1
- data/lib/rbbt/util/misc.rb +1 -1
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +8 -3
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +4 -0
- data/lib/rbbt/workflow/step/info.rb +1 -0
- data/lib/rbbt/workflow/step.rb +2 -1
- data/lib/rbbt/workflow/util/trace.rb +2 -2
- data/lib/rbbt/workflow.rb +2 -2
- data/python/rbbt/__init__.py +2 -2
- data/share/rbbt_commands/workflow/task +1 -1
- metadata +4 -220
- data/test/rbbt/annotations/test_util.rb +0 -43
- data/test/rbbt/association/test_database.rb +0 -89
- data/test/rbbt/association/test_index.rb +0 -127
- data/test/rbbt/association/test_item.rb +0 -15
- data/test/rbbt/association/test_open.rb +0 -63
- data/test/rbbt/association/test_util.rb +0 -108
- data/test/rbbt/entity/test_identifiers.rb +0 -74
- data/test/rbbt/hpc/orchestrate/test_batches.rb +0 -113
- data/test/rbbt/hpc/orchestrate/test_chains.rb +0 -139
- data/test/rbbt/hpc/orchestrate/test_rules.rb +0 -92
- data/test/rbbt/hpc/test_batch.rb +0 -68
- data/test/rbbt/hpc/test_orchestrate.rb +0 -144
- data/test/rbbt/hpc/test_pbs.rb +0 -43
- data/test/rbbt/hpc/test_slurm.rb +0 -32
- data/test/rbbt/knowledge_base/test_enrichment.rb +0 -48
- data/test/rbbt/knowledge_base/test_entity.rb +0 -42
- data/test/rbbt/knowledge_base/test_query.rb +0 -46
- data/test/rbbt/knowledge_base/test_registry.rb +0 -81
- data/test/rbbt/knowledge_base/test_syndicate.rb +0 -48
- data/test/rbbt/knowledge_base/test_traverse.rb +0 -85
- data/test/rbbt/persist/test_tsv.rb +0 -88
- data/test/rbbt/persist/tsv/test_cdb.rb +0 -18
- data/test/rbbt/persist/tsv/test_kyotocabinet.rb +0 -27
- data/test/rbbt/persist/tsv/test_leveldb.rb +0 -18
- data/test/rbbt/persist/tsv/test_lmdb.rb +0 -18
- data/test/rbbt/persist/tsv/test_sharder.rb +0 -164
- data/test/rbbt/persist/tsv/test_tokyocabinet.rb +0 -262
- data/test/rbbt/resource/test_path.rb +0 -49
- data/test/rbbt/test_annotations.rb +0 -167
- data/test/rbbt/test_association.rb +0 -103
- data/test/rbbt/test_entity.rb +0 -249
- data/test/rbbt/test_fix_width_table.rb +0 -135
- data/test/rbbt/test_hpc.rb +0 -16
- data/test/rbbt/test_knowledge_base.rb +0 -149
- data/test/rbbt/test_monitor.rb +0 -11
- data/test/rbbt/test_packed_index.rb +0 -68
- data/test/rbbt/test_persist.rb +0 -85
- data/test/rbbt/test_resource.rb +0 -110
- data/test/rbbt/test_tsv.rb +0 -669
- data/test/rbbt/test_workflow.rb +0 -604
- data/test/rbbt/tsv/parallel/test_through.rb +0 -40
- data/test/rbbt/tsv/parallel/test_traverse.rb +0 -456
- data/test/rbbt/tsv/test_accessor.rb +0 -315
- data/test/rbbt/tsv/test_attach.rb +0 -718
- data/test/rbbt/tsv/test_change_id.rb +0 -61
- data/test/rbbt/tsv/test_csv.rb +0 -49
- data/test/rbbt/tsv/test_excel.rb +0 -171
- data/test/rbbt/tsv/test_field_index.rb +0 -19
- data/test/rbbt/tsv/test_filter.rb +0 -187
- data/test/rbbt/tsv/test_index.rb +0 -302
- data/test/rbbt/tsv/test_manipulate.rb +0 -337
- data/test/rbbt/tsv/test_marshal.rb +0 -24
- data/test/rbbt/tsv/test_matrix.rb +0 -20
- data/test/rbbt/tsv/test_parallel.rb +0 -7
- data/test/rbbt/tsv/test_parser.rb +0 -101
- data/test/rbbt/tsv/test_stream.rb +0 -253
- data/test/rbbt/tsv/test_util.rb +0 -52
- data/test/rbbt/util/R/test_eval.rb +0 -43
- data/test/rbbt/util/R/test_model.rb +0 -130
- data/test/rbbt/util/R/test_plot.rb +0 -38
- data/test/rbbt/util/concurrency/processes/test_socket.rb +0 -70
- data/test/rbbt/util/concurrency/test_processes.rb +0 -192
- data/test/rbbt/util/concurrency/test_threads.rb +0 -40
- data/test/rbbt/util/log/test_progress.rb +0 -111
- data/test/rbbt/util/misc/test_bgzf.rb +0 -48
- data/test/rbbt/util/misc/test_communication.rb +0 -11
- data/test/rbbt/util/misc/test_development.rb +0 -26
- data/test/rbbt/util/misc/test_format.rb +0 -10
- data/test/rbbt/util/misc/test_indiferent_hash.rb +0 -14
- data/test/rbbt/util/misc/test_lock.rb +0 -77
- data/test/rbbt/util/misc/test_multipart_payload.rb +0 -202
- data/test/rbbt/util/misc/test_omics.rb +0 -116
- data/test/rbbt/util/misc/test_pipes.rb +0 -343
- data/test/rbbt/util/misc/test_serialize.rb +0 -24
- data/test/rbbt/util/python/test_util.rb +0 -25
- data/test/rbbt/util/simpleopt/test_get.rb +0 -12
- data/test/rbbt/util/simpleopt/test_parse.rb +0 -10
- data/test/rbbt/util/simpleopt/test_setup.rb +0 -76
- data/test/rbbt/util/test_R.rb +0 -37
- data/test/rbbt/util/test_chain_methods.rb +0 -22
- data/test/rbbt/util/test_cmd.rb +0 -87
- data/test/rbbt/util/test_colorize.rb +0 -22
- data/test/rbbt/util/test_concurrency.rb +0 -6
- data/test/rbbt/util/test_config.rb +0 -69
- data/test/rbbt/util/test_excel2tsv.rb +0 -10
- data/test/rbbt/util/test_filecache.rb +0 -36
- data/test/rbbt/util/test_log.rb +0 -52
- data/test/rbbt/util/test_migrate.rb +0 -36
- data/test/rbbt/util/test_misc.rb +0 -723
- data/test/rbbt/util/test_open.rb +0 -200
- data/test/rbbt/util/test_procpath.rb +0 -23
- data/test/rbbt/util/test_python.rb +0 -140
- data/test/rbbt/util/test_semaphore.rb +0 -36
- data/test/rbbt/util/test_simpleDSL.rb +0 -55
- data/test/rbbt/util/test_simpleopt.rb +0 -11
- data/test/rbbt/util/test_ssh.rb +0 -10
- data/test/rbbt/util/test_tmpfile.rb +0 -32
- data/test/rbbt/workflow/step/test_dependencies.rb +0 -299
- data/test/rbbt/workflow/step/test_save_load_inputs.rb +0 -136
- data/test/rbbt/workflow/test_doc.rb +0 -30
- data/test/rbbt/workflow/test_remote_workflow.rb +0 -98
- data/test/rbbt/workflow/test_schedule.rb +0 -0
- data/test/rbbt/workflow/test_step.rb +0 -231
- data/test/rbbt/workflow/test_task.rb +0 -85
- data/test/rbbt/workflow/util/test_archive.rb +0 -31
- data/test/rbbt/workflow/util/test_data.rb +0 -67
- data/test/rbbt/workflow/util/test_orchestrator.rb +0 -273
- data/test/test_helper.rb +0 -90
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1c39baa8ee0c2633bea3878720d29aa0f49a057db7ec6700187685c7a07f7eae
|
4
|
+
data.tar.gz: ad0f8a09cb34faa3d0aa3388c26733d4849d307a02df02989656c0aced5ea72a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dcfcf7f6ea2b45dd4ca5e1257fb22cec0f06280be2b6c9aac3b118e1e649a8f5948db0162f62735b9c113ee9a878cb89e8f98a99fd7026a7f58f061531b93f89
|
7
|
+
data.tar.gz: 59e94fecbb50bf9d7635545aab508ee6b986e44f81acac59aac7e888f6c9c5748fb2d0324a0e83d784c78b249e67a50baaaba1d40b58612f194de111fd4c6579
|
@@ -93,7 +93,7 @@ module Association
|
|
93
93
|
tmp_data = Persist.open_database(tmpfile, true, :double, "HDB")
|
94
94
|
|
95
95
|
tsv.with_monitor(options[:monitor]) do
|
96
|
-
tsv = tsv.reorder source_field,
|
96
|
+
tsv = tsv.reorder source_field, tsv.all_fields.values_at(*field_pos), :persist => persist, :persist_data => tmp_data if true or source_field != tsv.key_field or (fields and tsv.fields != fields)
|
97
97
|
end
|
98
98
|
|
99
99
|
tsv.key_field = source_header
|
@@ -216,6 +216,7 @@ module Association
|
|
216
216
|
open_stream(stream, options.dup)
|
217
217
|
end
|
218
218
|
|
219
|
+
database.filename = file if Path === file && file.identifier_files.any?
|
219
220
|
database.entity_options = options[:entity_options] if options[:entity_options]
|
220
221
|
|
221
222
|
database
|
@@ -10,7 +10,7 @@ module Association
|
|
10
10
|
end
|
11
11
|
|
12
12
|
def self.parse_field_specification(spec)
|
13
|
-
return [
|
13
|
+
return [spec,nil,nil] if Numeric === spec
|
14
14
|
spec = spec.split "=>" unless Array === spec
|
15
15
|
field_part, final_format = spec
|
16
16
|
|
@@ -115,8 +115,14 @@ module Association
|
|
115
115
|
source_field = specs[:source][0]
|
116
116
|
target_field = specs[:target][0]
|
117
117
|
|
118
|
-
source_pos = all_fields.index source_field
|
119
|
-
target_pos = all_fields.index target_field
|
118
|
+
#source_pos = all_fields.index source_field
|
119
|
+
#target_pos = all_fields.index target_field
|
120
|
+
|
121
|
+
source_pos = TSV.identify_field all_fields.first, all_fields[1..-1], source_field
|
122
|
+
target_pos = TSV.identify_field all_fields.first, all_fields[1..-1], target_field
|
123
|
+
|
124
|
+
source_pos = source_pos == :key ? 0 : source_pos + 1
|
125
|
+
target_pos = target_pos == :key ? 0 : target_pos + 1
|
120
126
|
|
121
127
|
source_header = specs[:source][1] || specs[:source][0]
|
122
128
|
target_header = specs[:target][1] || specs[:target][0]
|
@@ -140,7 +146,11 @@ module Association
|
|
140
146
|
field_headers << header
|
141
147
|
end
|
142
148
|
|
143
|
-
field_pos = info_fields.collect
|
149
|
+
field_pos = info_fields.collect do |f|
|
150
|
+
p = TSV.identify_field all_fields.first, all_fields[1..-1], f
|
151
|
+
p == :key ? 0 : p + 1
|
152
|
+
end
|
153
|
+
|
144
154
|
field_pos.delete source_pos
|
145
155
|
|
146
156
|
source_format = specs[:source][2]
|
data/lib/rbbt/entity.rb
CHANGED
@@ -289,16 +289,17 @@ module Entity
|
|
289
289
|
end
|
290
290
|
|
291
291
|
def self.unpersist(method_name)
|
292
|
-
return unless persisted?
|
293
|
-
orig_name = UNPERSISTED_PREFIX + method_name.to_s
|
294
|
-
|
292
|
+
return unless orig_name = persisted?(method_name)
|
295
293
|
alias_method method_name, orig_name
|
296
294
|
remove_method orig_name
|
297
295
|
end
|
298
296
|
|
299
297
|
def self.persisted?(method_name)
|
300
|
-
|
301
|
-
|
298
|
+
["", "_multiple_", "_single_"].each do |type|
|
299
|
+
orig_name = (UNPERSISTED_PREFIX + type + method_name.to_s).to_sym
|
300
|
+
return orig_name if instance_methods.include?(orig_name)
|
301
|
+
end
|
302
|
+
return false
|
302
303
|
end
|
303
304
|
|
304
305
|
def self.with_persisted(method_name)
|
data/lib/rbbt/hpc/batch.rb
CHANGED
@@ -110,23 +110,25 @@ module HPC
|
|
110
110
|
|
111
111
|
task = Symbol === job.overriden ? job.overriden : job.task_name
|
112
112
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
113
|
+
override_deps = job.overriden_deps.collect do |dep|
|
114
|
+
name = [dep.workflow.to_s, dep.task_name] * "#"
|
115
|
+
[name, dep.path] * "="
|
116
|
+
end.uniq * ","
|
117
|
+
|
118
|
+
options[:override_deps] = override_deps unless override_deps.empty?
|
119
|
+
|
120
|
+
#if job.overriden?
|
121
|
+
# #override_deps = job.rec_dependencies.
|
122
|
+
# # select{|dep| Symbol === dep.overriden }.
|
123
|
+
#
|
124
|
+
# override_deps = job.overriden_deps.
|
125
|
+
# collect do |dep|
|
126
|
+
# name = [dep.workflow.to_s, dep.task_name] * "#"
|
127
|
+
# [name, dep.path] * "="
|
128
|
+
# end.uniq * ","
|
129
|
+
|
130
|
+
# options[:override_deps] = override_deps unless override_deps.empty?
|
131
|
+
#end
|
130
132
|
|
131
133
|
# Save inputs into inputs_dir
|
132
134
|
inputs_dir = Misc.process_options options, :inputs_dir
|
@@ -39,9 +39,8 @@ module HPC
|
|
39
39
|
(job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
|
40
40
|
end
|
41
41
|
|
42
|
-
def self.job_chains(rules, job)
|
43
|
-
|
44
|
-
@@job_chains[Misc.digest([rules, job.path].inspect)] ||=
|
42
|
+
def self.job_chains(rules, job, computed = {})
|
43
|
+
computed[Misc.fingerprint([rules, job.path, job.object_id])] ||=
|
45
44
|
begin
|
46
45
|
chains = self.parse_chains(rules)
|
47
46
|
|
@@ -55,7 +54,7 @@ module HPC
|
|
55
54
|
dep_matches = check_chains(chains, dep)
|
56
55
|
common = matches & dep_matches
|
57
56
|
|
58
|
-
dep_chains = job_chains(rules, dep)
|
57
|
+
dep_chains = job_chains(rules, dep, computed)
|
59
58
|
found = []
|
60
59
|
dep_chains.each do |match,info|
|
61
60
|
if common.include?(match)
|
@@ -146,7 +146,7 @@ class KnowledgeBase
|
|
146
146
|
if conditions
|
147
147
|
Misc.tokenize(conditions).each do |condition|
|
148
148
|
if condition.index "="
|
149
|
-
key, value =
|
149
|
+
key, value = condition.split("=")
|
150
150
|
matches = matches.select{|m| Misc.match_value(m.info[key.strip], value)}
|
151
151
|
else
|
152
152
|
matches = matches.select{|m| m.info[condition.strip].to_s =~ /\btrue\b/}
|
@@ -158,17 +158,19 @@ class KnowledgeBase
|
|
158
158
|
end
|
159
159
|
|
160
160
|
def id_dbs(db)
|
161
|
+
# ToDo: Revise this, I'm not sure what id does anymore
|
162
|
+
# I think it deals with syndication
|
161
163
|
if db.include? '?'
|
162
|
-
all_dbs = kb.registry.keys
|
164
|
+
all_dbs = kb.registry.keys.collect{|k| k.to_s }
|
163
165
|
_name, _sep, _kb = db.partition("@")
|
164
166
|
case
|
165
167
|
when _name[0] == '?'
|
166
168
|
dbs = all_dbs.select{|_db|
|
167
169
|
n,_s,d=_db.partition("@");
|
168
|
-
d.nil? or d.empty? or (d ==
|
170
|
+
d.nil? or d.empty? or (d == _kb and assignments[_name].include?(n))
|
169
171
|
}
|
170
172
|
when _kb[0] == '?'
|
171
|
-
dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
|
173
|
+
dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d) }
|
172
174
|
end
|
173
175
|
else
|
174
176
|
dbs = [db]
|
@@ -181,11 +183,12 @@ class KnowledgeBase
|
|
181
183
|
all_matches = []
|
182
184
|
path_rules = []
|
183
185
|
acc_var = nil
|
186
|
+
pre_acc_var_assignments = nil
|
184
187
|
rules.each do |rule|
|
185
188
|
rule = rule.strip
|
186
189
|
next if rule.empty?
|
187
190
|
|
188
|
-
if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+(
|
191
|
+
if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+(.*))?/)
|
189
192
|
Log.debug "Traverse rule: #{rule}"
|
190
193
|
path_rules << rule
|
191
194
|
|
@@ -199,17 +202,19 @@ class KnowledgeBase
|
|
199
202
|
|
200
203
|
next if matches.nil? or matches.empty?
|
201
204
|
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
205
|
+
# ToDo: Revise this, I'm not sure what id does anymore
|
206
|
+
#
|
207
|
+
#if db.include? '?'
|
208
|
+
# _name, _sep, _kb = db.partition("@")
|
209
|
+
# case
|
210
|
+
# when _kb[0] == '?'
|
211
|
+
# assignments[_kb] ||= []
|
212
|
+
# assignments[_kb] << _db.partition("@").reject{|p| p.empty?}.last
|
213
|
+
# when _name[0] == '?'
|
214
|
+
# assignments[_name] ||= []
|
215
|
+
# assignments[_name] << _db.partition("@").first
|
216
|
+
# end
|
217
|
+
#end
|
213
218
|
|
214
219
|
matches.each do |m|
|
215
220
|
rule_matches << m
|
@@ -244,11 +249,14 @@ class KnowledgeBase
|
|
244
249
|
|
245
250
|
elsif m = rule.match(/(\?[^\s{]+)\s*{/)
|
246
251
|
acc_var = m.captures.first
|
252
|
+
pre_acc_var_assignments = assignments.dup
|
247
253
|
Log.debug "Start assign block: #{acc_var}"
|
248
254
|
elsif m = rule.match(/^\s*}\s*$/)
|
249
255
|
Log.debug "Close assign block: #{acc_var}"
|
250
256
|
saved_assign = assignments[acc_var]
|
251
257
|
assignments.clear
|
258
|
+
assignments.merge!(pre_acc_var_assignments)
|
259
|
+
pre_acc_var_assignments = nil
|
252
260
|
assignments[acc_var] = saved_assign
|
253
261
|
all_matches = []
|
254
262
|
path_rules = []
|
@@ -21,7 +21,7 @@ module Persist
|
|
21
21
|
tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
|
22
22
|
tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
|
23
23
|
|
24
|
-
database = CONNECTIONS[path] ||= tokyocabinet_class.new
|
24
|
+
database = CONNECTIONS[path] ||= Log.ignore_stderr do tokyocabinet_class.new end
|
25
25
|
|
26
26
|
if big and not Open.exists?(path)
|
27
27
|
database.tune(nil,nil,nil,tokyocabinet_class::TLARGE | tokyocabinet_class::TDEFLATE)
|
data/lib/rbbt/resource/path.rb
CHANGED
@@ -161,8 +161,13 @@ module Path
|
|
161
161
|
def find(where = nil, caller_lib = nil, paths = nil)
|
162
162
|
|
163
163
|
if located?
|
164
|
-
|
165
|
-
|
164
|
+
path = self
|
165
|
+
path = path + '.gz' if File.exist?(path + '.gz')
|
166
|
+
path = path + '.bgz' if File.exist?(path + '.bgz')
|
167
|
+
self.annotate(path)
|
168
|
+
|
169
|
+
path.original = self
|
170
|
+
return path
|
166
171
|
end
|
167
172
|
|
168
173
|
if where == :all || where == 'all'
|
data/lib/rbbt/resource.rb
CHANGED
@@ -374,7 +374,7 @@ url='#{url}'
|
|
374
374
|
if ! m.named_captures.include?("PKGDIR") || m["PKGDIR"] == resource.pkgdir
|
375
375
|
unlocated = ([m["TOPLEVEL"],m["SUBPATH"],m["REST"]] * "/")
|
376
376
|
unlocated.gsub!(/\/+/,'/')
|
377
|
-
if self.subdir && ! self.subdir.empty?
|
377
|
+
if self.subdir && ! self.subdir.empty? && unlocated.include?(subdir)
|
378
378
|
subdir = self.subdir
|
379
379
|
subdir += "/" unless subdir.end_with?("/")
|
380
380
|
unlocated[subdir] = ""
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -567,13 +567,13 @@ module TSV
|
|
567
567
|
end
|
568
568
|
end
|
569
569
|
|
570
|
-
def dumper_stream(keys = nil, no_options = false, unmerge = false)
|
570
|
+
def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
|
571
571
|
unmerge = false unless type == :double
|
572
572
|
|
573
573
|
options = self.options
|
574
574
|
options[:type] = :list if unmerge
|
575
575
|
|
576
|
-
TSV::Dumper.stream options do |dumper|
|
576
|
+
TSV::Dumper.stream options, filename, stream do |dumper|
|
577
577
|
case no_options
|
578
578
|
when FalseClass, nil
|
579
579
|
dumper.init
|
@@ -631,11 +631,11 @@ module TSV
|
|
631
631
|
end
|
632
632
|
end
|
633
633
|
end
|
634
|
+
dumper.close
|
634
635
|
rescue Exception
|
635
636
|
Log.exception $!
|
636
637
|
raise $!
|
637
638
|
end
|
638
|
-
dumper.close
|
639
639
|
end
|
640
640
|
end
|
641
641
|
|
@@ -651,14 +651,9 @@ module TSV
|
|
651
651
|
end
|
652
652
|
end
|
653
653
|
|
654
|
-
io = dumper_stream(keys, no_options, unmerge)
|
655
|
-
|
656
|
-
|
657
|
-
while block = io.read(Misc::BLOCK_SIZE)
|
658
|
-
str << block
|
659
|
-
end
|
660
|
-
|
661
|
-
str
|
654
|
+
io = dumper_stream(keys, no_options, unmerge, StringIO.new)
|
655
|
+
io.rewind
|
656
|
+
io.read
|
662
657
|
end
|
663
658
|
|
664
659
|
def to_unmerged_s(keys = nil, no_options = false)
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -1,13 +1,24 @@
|
|
1
1
|
module TSV
|
2
2
|
class Dumper
|
3
3
|
attr_accessor :in_stream, :stream, :options, :filename, :sep
|
4
|
-
def self.stream(options = {}, filename = nil, &block)
|
4
|
+
def self.stream(options = {}, filename = nil, stream = nil, &block)
|
5
5
|
dumper = TSV::Dumper.new options, filename
|
6
|
-
|
6
|
+
if stream
|
7
|
+
dumper.set_stream stream if stream
|
7
8
|
yield dumper
|
8
|
-
|
9
|
+
stream
|
10
|
+
else
|
11
|
+
thread = Thread.new(Thread.current) do |parent|
|
12
|
+
yield dumper
|
13
|
+
end
|
14
|
+
ConcurrentStream.setup(dumper.stream, threads: thread)
|
9
15
|
end
|
10
|
-
|
16
|
+
end
|
17
|
+
|
18
|
+
def set_stream(stream)
|
19
|
+
@stream.close
|
20
|
+
@in_stream.close
|
21
|
+
@in_stream = @stream = stream
|
11
22
|
end
|
12
23
|
|
13
24
|
def initialize(options, filename = nil)
|
@@ -56,16 +67,16 @@ module TSV
|
|
56
67
|
|
57
68
|
str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
|
58
69
|
|
59
|
-
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
70
|
+
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
|
60
71
|
|
61
|
-
@in_stream
|
72
|
+
@in_stream << str
|
62
73
|
end
|
63
74
|
|
64
75
|
def add(k,v)
|
65
76
|
@fields ||= @options[:fields]
|
66
77
|
@sep ||= @options[:sep]
|
67
78
|
begin
|
68
|
-
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
79
|
+
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
|
69
80
|
@in_stream << k << TSV::Dumper.values_to_s(v, @fields, @sep)
|
70
81
|
rescue IOError
|
71
82
|
rescue Exception
|
@@ -74,16 +85,16 @@ module TSV
|
|
74
85
|
end
|
75
86
|
|
76
87
|
def close_out
|
77
|
-
@stream.close unless @stream.closed?
|
88
|
+
@stream.close unless StringIO === @stream || @stream.closed?
|
78
89
|
end
|
79
90
|
|
80
91
|
def close_in
|
81
92
|
@in_stream.join if @in_stream.respond_to?(:join) && ! @in_stream.joined?
|
82
|
-
@in_stream.close
|
93
|
+
@in_stream.close if @in_stream.respond_to?(:close) && ! @in_stream.closed?
|
83
94
|
end
|
84
95
|
|
85
96
|
def close
|
86
|
-
close_in
|
97
|
+
close_in unless @in_stream == @stream
|
87
98
|
end
|
88
99
|
end
|
89
100
|
end
|
data/lib/rbbt/tsv/index.rb
CHANGED
@@ -125,7 +125,7 @@ module TSV
|
|
125
125
|
def self.index(file, options = {})
|
126
126
|
persist_options = Misc.pull_keys options, :persist
|
127
127
|
persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
|
128
|
-
|
128
|
+
|
129
129
|
Log.debug "Static Index: #{ file } - #{Misc.fingerprint options}"
|
130
130
|
Persist.persist_tsv nil, file, options, persist_options do |data|
|
131
131
|
data_options = Misc.pull_keys options, :data
|
@@ -258,6 +258,7 @@ module TSV
|
|
258
258
|
else
|
259
259
|
file.object_id.to_s
|
260
260
|
end
|
261
|
+
|
261
262
|
persist_options = Misc.pull_keys options, :persist
|
262
263
|
persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
|
263
264
|
|
@@ -181,6 +181,48 @@ module TSV
|
|
181
181
|
end
|
182
182
|
end
|
183
183
|
|
184
|
+
def self.traverse_enumerable(enum, options = {}, &block)
|
185
|
+
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
186
|
+
|
187
|
+
begin
|
188
|
+
error = false
|
189
|
+
if callback
|
190
|
+
bar.init if bar
|
191
|
+
while enum.any?
|
192
|
+
e = enum.pop
|
193
|
+
begin
|
194
|
+
callback.call yield(e)
|
195
|
+
rescue Exception
|
196
|
+
Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
|
197
|
+
raise $!
|
198
|
+
ensure
|
199
|
+
bar.tick if bar
|
200
|
+
end
|
201
|
+
end
|
202
|
+
else
|
203
|
+
bar.init if bar
|
204
|
+
while enum.any?
|
205
|
+
e = enum.pop
|
206
|
+
begin
|
207
|
+
yield e
|
208
|
+
rescue Exception
|
209
|
+
Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
|
210
|
+
raise $!
|
211
|
+
ensure
|
212
|
+
bar.tick if bar
|
213
|
+
end
|
214
|
+
end
|
215
|
+
end
|
216
|
+
|
217
|
+
rescue
|
218
|
+
error = true
|
219
|
+
raise $!
|
220
|
+
ensure
|
221
|
+
join.call(error) if join
|
222
|
+
Log::ProgressBar.remove_bar(bar, error) if bar
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
184
226
|
def self.traverse_priority_queue(queue, options = {}, &block)
|
185
227
|
callback, bar, join = Misc.process_options options, :callback, :bar, :join
|
186
228
|
|
@@ -396,6 +438,8 @@ module TSV
|
|
396
438
|
else
|
397
439
|
raise "Can not open obj for traversal #{Misc.fingerprint obj}"
|
398
440
|
end
|
441
|
+
when Enumerable
|
442
|
+
traverse_enumerable(obj, options, &block)
|
399
443
|
when nil
|
400
444
|
raise "Can not traverse nil object into #{stream_name(options[:into])}"
|
401
445
|
else
|
data/lib/rbbt/tsv/parser.rb
CHANGED
@@ -281,19 +281,20 @@ module TSV
|
|
281
281
|
end
|
282
282
|
|
283
283
|
values = values.collect{|v| v.length != num ? [v.first] * num : v}
|
284
|
+
|
284
285
|
all = values
|
285
286
|
all.unshift keys
|
286
|
-
Misc.zip_fields(all).each do |
|
287
|
-
key =
|
287
|
+
Misc.zip_fields(all).each do |vs|
|
288
|
+
key = vs.shift
|
288
289
|
if data.include? key
|
289
|
-
data[key] = data[key].zip(
|
290
|
-
old
|
291
|
-
old
|
290
|
+
data[key] = data[key].zip(vs).collect do |old, new|
|
291
|
+
old + [new]
|
292
292
|
end
|
293
293
|
else
|
294
|
-
data[key] =
|
294
|
+
data[key] = vs.collect{|v| [v] }
|
295
295
|
end
|
296
296
|
end
|
297
|
+
|
297
298
|
nil
|
298
299
|
end
|
299
300
|
|
data/lib/rbbt/tsv/serializers.rb
CHANGED
@@ -63,7 +63,7 @@ module TSV
|
|
63
63
|
class StringDoubleArraySerializer
|
64
64
|
def self.dump(array)
|
65
65
|
begin
|
66
|
-
|
66
|
+
array.collect{|a| a.collect{|a| a.to_s } * "|"} * "\t"
|
67
67
|
rescue Encoding::CompatibilityError
|
68
68
|
array.collect{|a| a.collect{|a| a.to_s.force_encoding('UTF-8')} * "|"} * "\t"
|
69
69
|
end
|
data/lib/rbbt/tsv/util.rb
CHANGED
@@ -168,6 +168,8 @@ module TSV
|
|
168
168
|
end
|
169
169
|
when Set
|
170
170
|
get_stream(file.to_a, open_options)
|
171
|
+
when Enumerable
|
172
|
+
file
|
171
173
|
else
|
172
174
|
raise "Cannot get stream from: #{file.inspect}"
|
173
175
|
end
|
@@ -189,6 +191,16 @@ module TSV
|
|
189
191
|
pos = fields.index field
|
190
192
|
return pos if pos
|
191
193
|
return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
|
194
|
+
if fields.select{|f| f.include?("(") }.any?
|
195
|
+
simplify_fields = fields.collect do |f|
|
196
|
+
if m = f.match(/(.*)\s+\(.*\)/)
|
197
|
+
m[1]
|
198
|
+
else
|
199
|
+
f
|
200
|
+
end
|
201
|
+
end
|
202
|
+
return identify_field(key_field, simplify_fields, field)
|
203
|
+
end
|
192
204
|
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
|
193
205
|
else
|
194
206
|
raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
|
data/lib/rbbt/util/misc/bgzf.rb
CHANGED
@@ -29,8 +29,8 @@ module Bgzf
|
|
29
29
|
|
30
30
|
def filename
|
31
31
|
@filename ||= begin
|
32
|
-
|
33
|
-
|
32
|
+
compressed_stream.respond_to?(:filename) ? compressed_stream.filename : rand(1000000000).to_s
|
33
|
+
end
|
34
34
|
end
|
35
35
|
|
36
36
|
def closed?
|
@@ -49,7 +49,8 @@ module Bgzf
|
|
49
49
|
|
50
50
|
def _index
|
51
51
|
@_index ||= begin
|
52
|
-
|
52
|
+
prefix_code = "BGZF index" + (filename || "").sub(/.bgz$/,'')
|
53
|
+
index = Persist.persist(prefix_code, :marshal) do
|
53
54
|
index = []
|
54
55
|
pos = 0
|
55
56
|
while true do
|
@@ -66,7 +67,7 @@ module Bgzf
|
|
66
67
|
end
|
67
68
|
index
|
68
69
|
end
|
69
|
-
@block_cache_size = Math.log(index.length).to_i + 1
|
70
|
+
@block_cache_size = Math.log(index.length + 1).to_i + 1
|
70
71
|
index
|
71
72
|
end
|
72
73
|
end
|
@@ -39,13 +39,13 @@ module Misc
|
|
39
39
|
end
|
40
40
|
end
|
41
41
|
if message
|
42
|
-
|
42
|
+
STDERR.pust "#{message }: #{ repeats } repeats"
|
43
43
|
else
|
44
|
-
puts "Benchmark for #{ repeats } repeats"
|
44
|
+
STDERR.puts "Benchmark for #{ repeats } repeats"
|
45
45
|
end
|
46
|
-
puts measure
|
46
|
+
STDERR.puts measure
|
47
47
|
rescue Exception
|
48
|
-
puts "Benchmark aborted"
|
48
|
+
STDERR.puts "Benchmark aborted"
|
49
49
|
raise $!
|
50
50
|
end
|
51
51
|
res
|
data/lib/rbbt/util/misc/math.rb
CHANGED
data/lib/rbbt/util/misc.rb
CHANGED
data/lib/rbbt/util/open.rb
CHANGED