rbbt-util 5.42.0 → 5.44.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/lib/rbbt/annotations/util.rb +1 -1
  3. data/lib/rbbt/association/database.rb +2 -1
  4. data/lib/rbbt/association/index.rb +1 -0
  5. data/lib/rbbt/association/util.rb +14 -4
  6. data/lib/rbbt/entity.rb +6 -5
  7. data/lib/rbbt/hpc/batch.rb +19 -17
  8. data/lib/rbbt/hpc/orchestrate/batches.rb +1 -1
  9. data/lib/rbbt/hpc/orchestrate/chains.rb +3 -4
  10. data/lib/rbbt/knowledge_base/traverse.rb +24 -16
  11. data/lib/rbbt/persist/tsv/tokyocabinet.rb +1 -1
  12. data/lib/rbbt/resource/path.rb +7 -2
  13. data/lib/rbbt/resource.rb +1 -1
  14. data/lib/rbbt/tsv/accessor.rb +6 -11
  15. data/lib/rbbt/tsv/dumper.rb +21 -10
  16. data/lib/rbbt/tsv/index.rb +2 -1
  17. data/lib/rbbt/tsv/parallel/traverse.rb +44 -0
  18. data/lib/rbbt/tsv/parser.rb +7 -6
  19. data/lib/rbbt/tsv/serializers.rb +1 -1
  20. data/lib/rbbt/tsv/util.rb +12 -0
  21. data/lib/rbbt/util/log/progress.rb +1 -1
  22. data/lib/rbbt/util/misc/bgzf.rb +5 -4
  23. data/lib/rbbt/util/misc/development.rb +4 -4
  24. data/lib/rbbt/util/misc/math.rb +0 -1
  25. data/lib/rbbt/util/misc.rb +1 -1
  26. data/lib/rbbt/util/open.rb +1 -1
  27. data/lib/rbbt/workflow/remote_workflow/driver/rest.rb +8 -3
  28. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +4 -0
  29. data/lib/rbbt/workflow/step/info.rb +1 -0
  30. data/lib/rbbt/workflow/step.rb +2 -1
  31. data/lib/rbbt/workflow/util/trace.rb +2 -2
  32. data/lib/rbbt/workflow.rb +2 -2
  33. data/python/rbbt/__init__.py +2 -2
  34. data/share/rbbt_commands/workflow/task +1 -1
  35. metadata +4 -220
  36. data/test/rbbt/annotations/test_util.rb +0 -43
  37. data/test/rbbt/association/test_database.rb +0 -89
  38. data/test/rbbt/association/test_index.rb +0 -127
  39. data/test/rbbt/association/test_item.rb +0 -15
  40. data/test/rbbt/association/test_open.rb +0 -63
  41. data/test/rbbt/association/test_util.rb +0 -108
  42. data/test/rbbt/entity/test_identifiers.rb +0 -74
  43. data/test/rbbt/hpc/orchestrate/test_batches.rb +0 -113
  44. data/test/rbbt/hpc/orchestrate/test_chains.rb +0 -139
  45. data/test/rbbt/hpc/orchestrate/test_rules.rb +0 -92
  46. data/test/rbbt/hpc/test_batch.rb +0 -68
  47. data/test/rbbt/hpc/test_orchestrate.rb +0 -144
  48. data/test/rbbt/hpc/test_pbs.rb +0 -43
  49. data/test/rbbt/hpc/test_slurm.rb +0 -32
  50. data/test/rbbt/knowledge_base/test_enrichment.rb +0 -48
  51. data/test/rbbt/knowledge_base/test_entity.rb +0 -42
  52. data/test/rbbt/knowledge_base/test_query.rb +0 -46
  53. data/test/rbbt/knowledge_base/test_registry.rb +0 -81
  54. data/test/rbbt/knowledge_base/test_syndicate.rb +0 -48
  55. data/test/rbbt/knowledge_base/test_traverse.rb +0 -85
  56. data/test/rbbt/persist/test_tsv.rb +0 -88
  57. data/test/rbbt/persist/tsv/test_cdb.rb +0 -18
  58. data/test/rbbt/persist/tsv/test_kyotocabinet.rb +0 -27
  59. data/test/rbbt/persist/tsv/test_leveldb.rb +0 -18
  60. data/test/rbbt/persist/tsv/test_lmdb.rb +0 -18
  61. data/test/rbbt/persist/tsv/test_sharder.rb +0 -164
  62. data/test/rbbt/persist/tsv/test_tokyocabinet.rb +0 -262
  63. data/test/rbbt/resource/test_path.rb +0 -49
  64. data/test/rbbt/test_annotations.rb +0 -167
  65. data/test/rbbt/test_association.rb +0 -103
  66. data/test/rbbt/test_entity.rb +0 -249
  67. data/test/rbbt/test_fix_width_table.rb +0 -135
  68. data/test/rbbt/test_hpc.rb +0 -16
  69. data/test/rbbt/test_knowledge_base.rb +0 -149
  70. data/test/rbbt/test_monitor.rb +0 -11
  71. data/test/rbbt/test_packed_index.rb +0 -68
  72. data/test/rbbt/test_persist.rb +0 -85
  73. data/test/rbbt/test_resource.rb +0 -110
  74. data/test/rbbt/test_tsv.rb +0 -669
  75. data/test/rbbt/test_workflow.rb +0 -604
  76. data/test/rbbt/tsv/parallel/test_through.rb +0 -40
  77. data/test/rbbt/tsv/parallel/test_traverse.rb +0 -456
  78. data/test/rbbt/tsv/test_accessor.rb +0 -315
  79. data/test/rbbt/tsv/test_attach.rb +0 -718
  80. data/test/rbbt/tsv/test_change_id.rb +0 -61
  81. data/test/rbbt/tsv/test_csv.rb +0 -49
  82. data/test/rbbt/tsv/test_excel.rb +0 -171
  83. data/test/rbbt/tsv/test_field_index.rb +0 -19
  84. data/test/rbbt/tsv/test_filter.rb +0 -187
  85. data/test/rbbt/tsv/test_index.rb +0 -302
  86. data/test/rbbt/tsv/test_manipulate.rb +0 -337
  87. data/test/rbbt/tsv/test_marshal.rb +0 -24
  88. data/test/rbbt/tsv/test_matrix.rb +0 -20
  89. data/test/rbbt/tsv/test_parallel.rb +0 -7
  90. data/test/rbbt/tsv/test_parser.rb +0 -101
  91. data/test/rbbt/tsv/test_stream.rb +0 -253
  92. data/test/rbbt/tsv/test_util.rb +0 -52
  93. data/test/rbbt/util/R/test_eval.rb +0 -43
  94. data/test/rbbt/util/R/test_model.rb +0 -130
  95. data/test/rbbt/util/R/test_plot.rb +0 -38
  96. data/test/rbbt/util/concurrency/processes/test_socket.rb +0 -70
  97. data/test/rbbt/util/concurrency/test_processes.rb +0 -192
  98. data/test/rbbt/util/concurrency/test_threads.rb +0 -40
  99. data/test/rbbt/util/log/test_progress.rb +0 -111
  100. data/test/rbbt/util/misc/test_bgzf.rb +0 -48
  101. data/test/rbbt/util/misc/test_communication.rb +0 -11
  102. data/test/rbbt/util/misc/test_development.rb +0 -26
  103. data/test/rbbt/util/misc/test_format.rb +0 -10
  104. data/test/rbbt/util/misc/test_indiferent_hash.rb +0 -14
  105. data/test/rbbt/util/misc/test_lock.rb +0 -77
  106. data/test/rbbt/util/misc/test_multipart_payload.rb +0 -202
  107. data/test/rbbt/util/misc/test_omics.rb +0 -116
  108. data/test/rbbt/util/misc/test_pipes.rb +0 -343
  109. data/test/rbbt/util/misc/test_serialize.rb +0 -24
  110. data/test/rbbt/util/python/test_util.rb +0 -25
  111. data/test/rbbt/util/simpleopt/test_get.rb +0 -12
  112. data/test/rbbt/util/simpleopt/test_parse.rb +0 -10
  113. data/test/rbbt/util/simpleopt/test_setup.rb +0 -76
  114. data/test/rbbt/util/test_R.rb +0 -37
  115. data/test/rbbt/util/test_chain_methods.rb +0 -22
  116. data/test/rbbt/util/test_cmd.rb +0 -87
  117. data/test/rbbt/util/test_colorize.rb +0 -22
  118. data/test/rbbt/util/test_concurrency.rb +0 -6
  119. data/test/rbbt/util/test_config.rb +0 -69
  120. data/test/rbbt/util/test_excel2tsv.rb +0 -10
  121. data/test/rbbt/util/test_filecache.rb +0 -36
  122. data/test/rbbt/util/test_log.rb +0 -52
  123. data/test/rbbt/util/test_migrate.rb +0 -36
  124. data/test/rbbt/util/test_misc.rb +0 -723
  125. data/test/rbbt/util/test_open.rb +0 -200
  126. data/test/rbbt/util/test_procpath.rb +0 -23
  127. data/test/rbbt/util/test_python.rb +0 -140
  128. data/test/rbbt/util/test_semaphore.rb +0 -36
  129. data/test/rbbt/util/test_simpleDSL.rb +0 -55
  130. data/test/rbbt/util/test_simpleopt.rb +0 -11
  131. data/test/rbbt/util/test_ssh.rb +0 -10
  132. data/test/rbbt/util/test_tmpfile.rb +0 -32
  133. data/test/rbbt/workflow/step/test_dependencies.rb +0 -299
  134. data/test/rbbt/workflow/step/test_save_load_inputs.rb +0 -136
  135. data/test/rbbt/workflow/test_doc.rb +0 -30
  136. data/test/rbbt/workflow/test_remote_workflow.rb +0 -98
  137. data/test/rbbt/workflow/test_schedule.rb +0 -0
  138. data/test/rbbt/workflow/test_step.rb +0 -231
  139. data/test/rbbt/workflow/test_task.rb +0 -85
  140. data/test/rbbt/workflow/util/test_archive.rb +0 -31
  141. data/test/rbbt/workflow/util/test_data.rb +0 -67
  142. data/test/rbbt/workflow/util/test_orchestrator.rb +0 -273
  143. data/test/test_helper.rb +0 -90
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8b23de77e3900d1a81e8ffa349ab9f88a4283f30bb2cec62c04bff4539243170
4
- data.tar.gz: 494b18d5e2ba4802225617a03b2f69255ec9deab173e8d18e06654a7b415325c
3
+ metadata.gz: 1c39baa8ee0c2633bea3878720d29aa0f49a057db7ec6700187685c7a07f7eae
4
+ data.tar.gz: ad0f8a09cb34faa3d0aa3388c26733d4849d307a02df02989656c0aced5ea72a
5
5
  SHA512:
6
- metadata.gz: d57bd3bc7ff84a4e8101dd907f98ef20923dbcab95c3187523e6b84eba6774d900f269ed2457fd6e1a81821edf3e2d1f11fcbdd5863f791282d549bcf83a48a5
7
- data.tar.gz: a4857d82292ab4234cf10fbf2fcb242b466e6400f9476cb4de4bc625ff808542a690f27fbd9dede4ff692a82644561ebfc086ccdf953102cb4f7a0020b094849
6
+ metadata.gz: dcfcf7f6ea2b45dd4ca5e1257fb22cec0f06280be2b6c9aac3b118e1e649a8f5948db0162f62735b9c113ee9a878cb89e8f98a99fd7026a7f58f061531b93f89
7
+ data.tar.gz: 59e94fecbb50bf9d7635545aab508ee6b986e44f81acac59aac7e888f6c9c5748fb2d0324a0e83d784c78b249e67a50baaaba1d40b58612f194de111fd4c6579
@@ -43,7 +43,7 @@ module Annotated
43
43
  end
44
44
 
45
45
  def self.resolve_array(entry)
46
- if entry =~ /^Array:/
46
+ if String === entry && entry =~ /^Array:/
47
47
  entry["Array:".length..-1].split("|")
48
48
  else
49
49
  entry
@@ -93,7 +93,7 @@ module Association
93
93
  tmp_data = Persist.open_database(tmpfile, true, :double, "HDB")
94
94
 
95
95
  tsv.with_monitor(options[:monitor]) do
96
- tsv = tsv.reorder source_field, fields, :persist => persist, :persist_data => tmp_data if true or source_field != tsv.key_field or (fields and tsv.fields != fields)
96
+ tsv = tsv.reorder source_field, tsv.all_fields.values_at(*field_pos), :persist => persist, :persist_data => tmp_data if true or source_field != tsv.key_field or (fields and tsv.fields != fields)
97
97
  end
98
98
 
99
99
  tsv.key_field = source_header
@@ -216,6 +216,7 @@ module Association
216
216
  open_stream(stream, options.dup)
217
217
  end
218
218
 
219
+ database.filename = file if Path === file && file.identifier_files.any?
219
220
  database.entity_options = options[:entity_options] if options[:entity_options]
220
221
 
221
222
  database
@@ -38,6 +38,7 @@ module Association
38
38
  data.fields = fields[1..-1]
39
39
  data.type = :list
40
40
  data.serializer ||= serializer
41
+ data.filename ||= file if String === file
41
42
 
42
43
  database.with_unnamed do
43
44
  database.with_monitor(options[:monitor]) do
@@ -10,7 +10,7 @@ module Association
10
10
  end
11
11
 
12
12
  def self.parse_field_specification(spec)
13
- return [2,nil,nil] if Numeric === spec
13
+ return [spec,nil,nil] if Numeric === spec
14
14
  spec = spec.split "=>" unless Array === spec
15
15
  field_part, final_format = spec
16
16
 
@@ -115,8 +115,14 @@ module Association
115
115
  source_field = specs[:source][0]
116
116
  target_field = specs[:target][0]
117
117
 
118
- source_pos = all_fields.index source_field
119
- target_pos = all_fields.index target_field
118
+ #source_pos = all_fields.index source_field
119
+ #target_pos = all_fields.index target_field
120
+
121
+ source_pos = TSV.identify_field all_fields.first, all_fields[1..-1], source_field
122
+ target_pos = TSV.identify_field all_fields.first, all_fields[1..-1], target_field
123
+
124
+ source_pos = source_pos == :key ? 0 : source_pos + 1
125
+ target_pos = target_pos == :key ? 0 : target_pos + 1
120
126
 
121
127
  source_header = specs[:source][1] || specs[:source][0]
122
128
  target_header = specs[:target][1] || specs[:target][0]
@@ -140,7 +146,11 @@ module Association
140
146
  field_headers << header
141
147
  end
142
148
 
143
- field_pos = info_fields.collect{|f| raise "Field #{f} not found. Options: #{all_fields * ", "}" unless all_fields.include?(f); f == :key ? 0 : all_fields.index(f); }
149
+ field_pos = info_fields.collect do |f|
150
+ p = TSV.identify_field all_fields.first, all_fields[1..-1], f
151
+ p == :key ? 0 : p + 1
152
+ end
153
+
144
154
  field_pos.delete source_pos
145
155
 
146
156
  source_format = specs[:source][2]
data/lib/rbbt/entity.rb CHANGED
@@ -289,16 +289,17 @@ module Entity
289
289
  end
290
290
 
291
291
  def self.unpersist(method_name)
292
- return unless persisted? method_name
293
- orig_name = UNPERSISTED_PREFIX + method_name.to_s
294
-
292
+ return unless orig_name = persisted?(method_name)
295
293
  alias_method method_name, orig_name
296
294
  remove_method orig_name
297
295
  end
298
296
 
299
297
  def self.persisted?(method_name)
300
- orig_name = UNPERSISTED_PREFIX + method_name.to_s
301
- instance_methods.include? orig_name.to_sym
298
+ ["", "_multiple_", "_single_"].each do |type|
299
+ orig_name = (UNPERSISTED_PREFIX + type + method_name.to_s).to_sym
300
+ return orig_name if instance_methods.include?(orig_name)
301
+ end
302
+ return false
302
303
  end
303
304
 
304
305
  def self.with_persisted(method_name)
@@ -110,23 +110,25 @@ module HPC
110
110
 
111
111
  task = Symbol === job.overriden ? job.overriden : job.task_name
112
112
 
113
- #override_deps = job.overriden_deps.collect do |dep|
114
- # name = [dep.workflow.to_s, dep.task_name] * "#"
115
- # [name, dep.path] * "="
116
- #end.uniq * ","
117
-
118
- if job.overriden?
119
- #override_deps = job.rec_dependencies.
120
- # select{|dep| Symbol === dep.overriden }.
121
-
122
- override_deps = job.overriden_deps.
123
- collect do |dep|
124
- name = [dep.workflow.to_s, dep.task_name] * "#"
125
- [name, dep.path] * "="
126
- end.uniq * ","
127
-
128
- options[:override_deps] = override_deps unless override_deps.empty?
129
- end
113
+ override_deps = job.overriden_deps.collect do |dep|
114
+ name = [dep.workflow.to_s, dep.task_name] * "#"
115
+ [name, dep.path] * "="
116
+ end.uniq * ","
117
+
118
+ options[:override_deps] = override_deps unless override_deps.empty?
119
+
120
+ #if job.overriden?
121
+ # #override_deps = job.rec_dependencies.
122
+ # # select{|dep| Symbol === dep.overriden }.
123
+ #
124
+ # override_deps = job.overriden_deps.
125
+ # collect do |dep|
126
+ # name = [dep.workflow.to_s, dep.task_name] * "#"
127
+ # [name, dep.path] * "="
128
+ # end.uniq * ","
129
+
130
+ # options[:override_deps] = override_deps unless override_deps.empty?
131
+ #end
130
132
 
131
133
  # Save inputs into inputs_dir
132
134
  inputs_dir = Misc.process_options options, :inputs_dir
@@ -152,7 +152,7 @@ module HPC
152
152
  end
153
153
 
154
154
  def self.job_batches(rules, job)
155
- job_chains = self.job_chains(rules, job)
155
+ job_chains = self.job_chains(rules, job).dup
156
156
 
157
157
  workload = job_workload(job).uniq
158
158
 
@@ -39,9 +39,8 @@ module HPC
39
39
  (job.dependencies + job.input_dependencies).uniq.select{|d| ! d.done? || d.dirty? }
40
40
  end
41
41
 
42
- def self.job_chains(rules, job)
43
- @@job_chains ||= {}
44
- @@job_chains[Misc.digest([rules, job.path].inspect)] ||=
42
+ def self.job_chains(rules, job, computed = {})
43
+ computed[Misc.fingerprint([rules, job.path, job.object_id])] ||=
45
44
  begin
46
45
  chains = self.parse_chains(rules)
47
46
 
@@ -55,7 +54,7 @@ module HPC
55
54
  dep_matches = check_chains(chains, dep)
56
55
  common = matches & dep_matches
57
56
 
58
- dep_chains = job_chains(rules, dep)
57
+ dep_chains = job_chains(rules, dep, computed)
59
58
  found = []
60
59
  dep_chains.each do |match,info|
61
60
  if common.include?(match)
@@ -146,7 +146,7 @@ class KnowledgeBase
146
146
  if conditions
147
147
  Misc.tokenize(conditions).each do |condition|
148
148
  if condition.index "="
149
- key, value = conditions.split("=")
149
+ key, value = condition.split("=")
150
150
  matches = matches.select{|m| Misc.match_value(m.info[key.strip], value)}
151
151
  else
152
152
  matches = matches.select{|m| m.info[condition.strip].to_s =~ /\btrue\b/}
@@ -158,17 +158,19 @@ class KnowledgeBase
158
158
  end
159
159
 
160
160
  def id_dbs(db)
161
+ # ToDo: Revise this, I'm not sure what id does anymore
162
+ # I think it deals with syndication
161
163
  if db.include? '?'
162
- all_dbs = kb.registry.keys
164
+ all_dbs = kb.registry.keys.collect{|k| k.to_s }
163
165
  _name, _sep, _kb = db.partition("@")
164
166
  case
165
167
  when _name[0] == '?'
166
168
  dbs = all_dbs.select{|_db|
167
169
  n,_s,d=_db.partition("@");
168
- d.nil? or d.empty? or (d == _kd and assignments[_name].include?(n))
170
+ d.nil? or d.empty? or (d == _kb and assignments[_name].include?(n))
169
171
  }
170
172
  when _kb[0] == '?'
171
- dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d)}
173
+ dbs = all_dbs.select{|_db| n,_s,d=_db.partition("@"); n == _name and assignments[_kb].include?(d) }
172
174
  end
173
175
  else
174
176
  dbs = [db]
@@ -181,11 +183,12 @@ class KnowledgeBase
181
183
  all_matches = []
182
184
  path_rules = []
183
185
  acc_var = nil
186
+ pre_acc_var_assignments = nil
184
187
  rules.each do |rule|
185
188
  rule = rule.strip
186
189
  next if rule.empty?
187
190
 
188
- if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+([^\s]+))?/)
191
+ if m = rule.match(/([^\s]+)\s+([^\s=]+)\s+([^\s]+)(?:\s+-\s+(.*))?/)
189
192
  Log.debug "Traverse rule: #{rule}"
190
193
  path_rules << rule
191
194
 
@@ -199,17 +202,19 @@ class KnowledgeBase
199
202
 
200
203
  next if matches.nil? or matches.empty?
201
204
 
202
- if db.include? '?'
203
- _name, _sep, _kb = db.partition("@")
204
- case
205
- when _kb[0] == '?'
206
- assignments[_kb] ||= []
207
- assignments[_kb] << _db.partition("@").reject{|p| p.empty?}.last
208
- when _name[0] == '?'
209
- assignments[_name] ||= []
210
- assignments[_name] << _db.partition("@").first
211
- end
212
- end
205
+ # ToDo: Revise this, I'm not sure what id does anymore
206
+ #
207
+ #if db.include? '?'
208
+ # _name, _sep, _kb = db.partition("@")
209
+ # case
210
+ # when _kb[0] == '?'
211
+ # assignments[_kb] ||= []
212
+ # assignments[_kb] << _db.partition("@").reject{|p| p.empty?}.last
213
+ # when _name[0] == '?'
214
+ # assignments[_name] ||= []
215
+ # assignments[_name] << _db.partition("@").first
216
+ # end
217
+ #end
213
218
 
214
219
  matches.each do |m|
215
220
  rule_matches << m
@@ -244,11 +249,14 @@ class KnowledgeBase
244
249
 
245
250
  elsif m = rule.match(/(\?[^\s{]+)\s*{/)
246
251
  acc_var = m.captures.first
252
+ pre_acc_var_assignments = assignments.dup
247
253
  Log.debug "Start assign block: #{acc_var}"
248
254
  elsif m = rule.match(/^\s*}\s*$/)
249
255
  Log.debug "Close assign block: #{acc_var}"
250
256
  saved_assign = assignments[acc_var]
251
257
  assignments.clear
258
+ assignments.merge!(pre_acc_var_assignments)
259
+ pre_acc_var_assignments = nil
252
260
  assignments[acc_var] = saved_assign
253
261
  all_matches = []
254
262
  path_rules = []
@@ -21,7 +21,7 @@ module Persist
21
21
  tokyocabinet_class = TokyoCabinet::HDB if tokyocabinet_class == "HDB" or tokyocabinet_class.nil?
22
22
  tokyocabinet_class = TokyoCabinet::BDB if tokyocabinet_class == "BDB"
23
23
 
24
- database = CONNECTIONS[path] ||= tokyocabinet_class.new
24
+ database = CONNECTIONS[path] ||= Log.ignore_stderr do tokyocabinet_class.new end
25
25
 
26
26
  if big and not Open.exists?(path)
27
27
  database.tune(nil,nil,nil,tokyocabinet_class::TLARGE | tokyocabinet_class::TDEFLATE)
@@ -161,8 +161,13 @@ module Path
161
161
  def find(where = nil, caller_lib = nil, paths = nil)
162
162
 
163
163
  if located?
164
- self.original ||= self
165
- return self
164
+ path = self
165
+ path = path + '.gz' if File.exist?(path + '.gz')
166
+ path = path + '.bgz' if File.exist?(path + '.bgz')
167
+ self.annotate(path)
168
+
169
+ path.original = self
170
+ return path
166
171
  end
167
172
 
168
173
  if where == :all || where == 'all'
data/lib/rbbt/resource.rb CHANGED
@@ -374,7 +374,7 @@ url='#{url}'
374
374
  if ! m.named_captures.include?("PKGDIR") || m["PKGDIR"] == resource.pkgdir
375
375
  unlocated = ([m["TOPLEVEL"],m["SUBPATH"],m["REST"]] * "/")
376
376
  unlocated.gsub!(/\/+/,'/')
377
- if self.subdir && ! self.subdir.empty?
377
+ if self.subdir && ! self.subdir.empty? && unlocated.include?(subdir)
378
378
  subdir = self.subdir
379
379
  subdir += "/" unless subdir.end_with?("/")
380
380
  unlocated[subdir] = ""
@@ -567,13 +567,13 @@ module TSV
567
567
  end
568
568
  end
569
569
 
570
- def dumper_stream(keys = nil, no_options = false, unmerge = false)
570
+ def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
571
571
  unmerge = false unless type == :double
572
572
 
573
573
  options = self.options
574
574
  options[:type] = :list if unmerge
575
575
 
576
- TSV::Dumper.stream options do |dumper|
576
+ TSV::Dumper.stream options, filename, stream do |dumper|
577
577
  case no_options
578
578
  when FalseClass, nil
579
579
  dumper.init
@@ -631,11 +631,11 @@ module TSV
631
631
  end
632
632
  end
633
633
  end
634
+ dumper.close
634
635
  rescue Exception
635
636
  Log.exception $!
636
637
  raise $!
637
638
  end
638
- dumper.close
639
639
  end
640
640
  end
641
641
 
@@ -651,14 +651,9 @@ module TSV
651
651
  end
652
652
  end
653
653
 
654
- io = dumper_stream(keys, no_options, unmerge)
655
-
656
- str = ''
657
- while block = io.read(Misc::BLOCK_SIZE)
658
- str << block
659
- end
660
-
661
- str
654
+ io = dumper_stream(keys, no_options, unmerge, StringIO.new)
655
+ io.rewind
656
+ io.read
662
657
  end
663
658
 
664
659
  def to_unmerged_s(keys = nil, no_options = false)
@@ -1,13 +1,24 @@
1
1
  module TSV
2
2
  class Dumper
3
3
  attr_accessor :in_stream, :stream, :options, :filename, :sep
4
- def self.stream(options = {}, filename = nil, &block)
4
+ def self.stream(options = {}, filename = nil, stream = nil, &block)
5
5
  dumper = TSV::Dumper.new options, filename
6
- Thread.new(Thread.current) do |parent|
6
+ if stream
7
+ dumper.set_stream stream if stream
7
8
  yield dumper
8
- dumper.close
9
+ stream
10
+ else
11
+ thread = Thread.new(Thread.current) do |parent|
12
+ yield dumper
13
+ end
14
+ ConcurrentStream.setup(dumper.stream, threads: thread)
9
15
  end
10
- dumper.stream
16
+ end
17
+
18
+ def set_stream(stream)
19
+ @stream.close
20
+ @in_stream.close
21
+ @in_stream = @stream = stream
11
22
  end
12
23
 
13
24
  def initialize(options, filename = nil)
@@ -56,16 +67,16 @@ module TSV
56
67
 
57
68
  str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
58
69
 
59
- Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
70
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
60
71
 
61
- @in_stream.puts str
72
+ @in_stream << str
62
73
  end
63
74
 
64
75
  def add(k,v)
65
76
  @fields ||= @options[:fields]
66
77
  @sep ||= @options[:sep]
67
78
  begin
68
- Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
79
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
69
80
  @in_stream << k << TSV::Dumper.values_to_s(v, @fields, @sep)
70
81
  rescue IOError
71
82
  rescue Exception
@@ -74,16 +85,16 @@ module TSV
74
85
  end
75
86
 
76
87
  def close_out
77
- @stream.close unless @stream.closed?
88
+ @stream.close unless StringIO === @stream || @stream.closed?
78
89
  end
79
90
 
80
91
  def close_in
81
92
  @in_stream.join if @in_stream.respond_to?(:join) && ! @in_stream.joined?
82
- @in_stream.close unless @in_stream.closed?
93
+ @in_stream.close if @in_stream.respond_to?(:close) && ! @in_stream.closed?
83
94
  end
84
95
 
85
96
  def close
86
- close_in
97
+ close_in unless @in_stream == @stream
87
98
  end
88
99
  end
89
100
  end
@@ -125,7 +125,7 @@ module TSV
125
125
  def self.index(file, options = {})
126
126
  persist_options = Misc.pull_keys options, :persist
127
127
  persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
128
-
128
+
129
129
  Log.debug "Static Index: #{ file } - #{Misc.fingerprint options}"
130
130
  Persist.persist_tsv nil, file, options, persist_options do |data|
131
131
  data_options = Misc.pull_keys options, :data
@@ -258,6 +258,7 @@ module TSV
258
258
  else
259
259
  file.object_id.to_s
260
260
  end
261
+
261
262
  persist_options = Misc.pull_keys options, :persist
262
263
  persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
263
264
 
@@ -181,6 +181,48 @@ module TSV
181
181
  end
182
182
  end
183
183
 
184
+ def self.traverse_enumerable(enum, options = {}, &block)
185
+ callback, bar, join = Misc.process_options options, :callback, :bar, :join
186
+
187
+ begin
188
+ error = false
189
+ if callback
190
+ bar.init if bar
191
+ while enum.any?
192
+ e = enum.pop
193
+ begin
194
+ callback.call yield(e)
195
+ rescue Exception
196
+ Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
197
+ raise $!
198
+ ensure
199
+ bar.tick if bar
200
+ end
201
+ end
202
+ else
203
+ bar.init if bar
204
+ while enum.any?
205
+ e = enum.pop
206
+ begin
207
+ yield e
208
+ rescue Exception
209
+ Log.warn "Traverse exception on element: #{Misc.fingerprint(e)}"
210
+ raise $!
211
+ ensure
212
+ bar.tick if bar
213
+ end
214
+ end
215
+ end
216
+
217
+ rescue
218
+ error = true
219
+ raise $!
220
+ ensure
221
+ join.call(error) if join
222
+ Log::ProgressBar.remove_bar(bar, error) if bar
223
+ end
224
+ end
225
+
184
226
  def self.traverse_priority_queue(queue, options = {}, &block)
185
227
  callback, bar, join = Misc.process_options options, :callback, :bar, :join
186
228
 
@@ -396,6 +438,8 @@ module TSV
396
438
  else
397
439
  raise "Can not open obj for traversal #{Misc.fingerprint obj}"
398
440
  end
441
+ when Enumerable
442
+ traverse_enumerable(obj, options, &block)
399
443
  when nil
400
444
  raise "Can not traverse nil object into #{stream_name(options[:into])}"
401
445
  else
@@ -281,19 +281,20 @@ module TSV
281
281
  end
282
282
 
283
283
  values = values.collect{|v| v.length != num ? [v.first] * num : v}
284
+
284
285
  all = values
285
286
  all.unshift keys
286
- Misc.zip_fields(all).each do |values|
287
- key = values.shift
287
+ Misc.zip_fields(all).each do |vs|
288
+ key = vs.shift
288
289
  if data.include? key
289
- data[key] = data[key].zip(values).collect do |old, new|
290
- old.push new
291
- old
290
+ data[key] = data[key].zip(vs).collect do |old, new|
291
+ old + [new]
292
292
  end
293
293
  else
294
- data[key] = values.collect{|v| [v]}
294
+ data[key] = vs.collect{|v| [v] }
295
295
  end
296
296
  end
297
+
297
298
  nil
298
299
  end
299
300
 
@@ -63,7 +63,7 @@ module TSV
63
63
  class StringDoubleArraySerializer
64
64
  def self.dump(array)
65
65
  begin
66
- array.collect{|a| a.collect{|a| a.to_s } * "|"} * "\t"
66
+ array.collect{|a| a.collect{|a| a.to_s } * "|"} * "\t"
67
67
  rescue Encoding::CompatibilityError
68
68
  array.collect{|a| a.collect{|a| a.to_s.force_encoding('UTF-8')} * "|"} * "\t"
69
69
  end
data/lib/rbbt/tsv/util.rb CHANGED
@@ -168,6 +168,8 @@ module TSV
168
168
  end
169
169
  when Set
170
170
  get_stream(file.to_a, open_options)
171
+ when Enumerable
172
+ file
171
173
  else
172
174
  raise "Cannot get stream from: #{file.inspect}"
173
175
  end
@@ -189,6 +191,16 @@ module TSV
189
191
  pos = fields.index field
190
192
  return pos if pos
191
193
  return identify_field(key_field, fields, field.to_i) if field =~ /^\d+$/
194
+ if fields.select{|f| f.include?("(") }.any?
195
+ simplify_fields = fields.collect do |f|
196
+ if m = f.match(/(.*)\s+\(.*\)/)
197
+ m[1]
198
+ else
199
+ f
200
+ end
201
+ end
202
+ return identify_field(key_field, simplify_fields, field)
203
+ end
192
204
  raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}" if pos.nil?
193
205
  else
194
206
  raise "Field '#{ field }' was not found. Options: (#{key_field || "NO_KEY_FIELD"}), #{(fields || ["NO_FIELDS"]) * ", "}"
@@ -9,7 +9,7 @@ module Log
9
9
 
10
10
  def self.no_bar
11
11
  @@no_bar = false unless defined?(@@no_bar)
12
- @@no_bar || ENV["RBBT_NO_PROGRESS"] == "true"
12
+ (@@no_bar || ENV["RBBT_NO_PROGRESS"]).to_s == "true"
13
13
  end
14
14
 
15
15
  class ProgressBar
@@ -29,8 +29,8 @@ module Bgzf
29
29
 
30
30
  def filename
31
31
  @filename ||= begin
32
- compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil
33
- end
32
+ compressed_stream.respond_to?(:filename) ? compressed_stream.filename : rand(1000000000).to_s
33
+ end
34
34
  end
35
35
 
36
36
  def closed?
@@ -49,7 +49,8 @@ module Bgzf
49
49
 
50
50
  def _index
51
51
  @_index ||= begin
52
- index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do
52
+ prefix_code = "BGZF index" + (filename || "").sub(/.bgz$/,'')
53
+ index = Persist.persist(prefix_code, :marshal) do
53
54
  index = []
54
55
  pos = 0
55
56
  while true do
@@ -66,7 +67,7 @@ module Bgzf
66
67
  end
67
68
  index
68
69
  end
69
- @block_cache_size = Math.log(index.length).to_i + 1
70
+ @block_cache_size = Math.log(index.length + 1).to_i + 1
70
71
  index
71
72
  end
72
73
  end
@@ -39,13 +39,13 @@ module Misc
39
39
  end
40
40
  end
41
41
  if message
42
- puts "#{message }: #{ repeats } repeats"
42
+ STDERR.pust "#{message }: #{ repeats } repeats"
43
43
  else
44
- puts "Benchmark for #{ repeats } repeats"
44
+ STDERR.puts "Benchmark for #{ repeats } repeats"
45
45
  end
46
- puts measure
46
+ STDERR.puts measure
47
47
  rescue Exception
48
- puts "Benchmark aborted"
48
+ STDERR.puts "Benchmark aborted"
49
49
  raise $!
50
50
  end
51
51
  res
@@ -1,4 +1,3 @@
1
-
2
1
  module Misc
3
2
 
4
3
  Log2Multiplier = 1.0 / Math.log(2.0)
@@ -69,7 +69,7 @@ module Misc
69
69
  end
70
70
 
71
71
  def self.tokenize(str)
72
- str.scan(/"[^"]*"|'[^']*'|[^"'\s]+/)
72
+ str.scan(/"([^"]*)"|'([^']*)'|([^"'\s]+)/).flatten.compact
73
73
  end
74
74
 
75
75
  def self.timespan(str, default = "s")
@@ -729,7 +729,7 @@ module Open
729
729
  dir_sub_path.push content
730
730
  save_content_in_repo(*dir_sub_path)
731
731
  else
732
- FileUtils.mkdir_p File.dirname(file)
732
+ FileUtils.mkdir_p File.dirname(file) unless File.directory?(file)
733
733
  case
734
734
  when block_given?
735
735
  begin