rbbt-util 5.43.0 → 5.44.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (126) hide show
  1. checksums.yaml +4 -4
  2. data/etc/app.d/base.rb +1 -1
  3. data/lib/rbbt/association/util.rb +1 -1
  4. data/lib/rbbt/hpc/batch.rb +19 -17
  5. data/lib/rbbt/persist/tsv/sharder.rb +1 -1
  6. data/lib/rbbt/tsv/accessor.rb +6 -11
  7. data/lib/rbbt/tsv/dumper.rb +21 -10
  8. data/lib/rbbt/tsv/index.rb +2 -1
  9. data/lib/rbbt/util/misc/math.rb +0 -1
  10. data/lib/rbbt/util/misc/omics.rb +2 -1
  11. data/lib/rbbt/util/misc/ssw.rb +298 -0
  12. data/lib/rbbt/util/open.rb +1 -1
  13. data/lib/rbbt/workflow/step/info.rb +3 -2
  14. data/lib/rbbt/workflow/step.rb +2 -1
  15. data/python/rbbt/__init__.py +2 -2
  16. data/share/rbbt_commands/workflow/server +6 -1
  17. data/share/rbbt_commands/workflow/task +1 -1
  18. metadata +5 -220
  19. data/test/rbbt/annotations/test_util.rb +0 -43
  20. data/test/rbbt/association/test_database.rb +0 -87
  21. data/test/rbbt/association/test_index.rb +0 -127
  22. data/test/rbbt/association/test_item.rb +0 -15
  23. data/test/rbbt/association/test_open.rb +0 -63
  24. data/test/rbbt/association/test_util.rb +0 -108
  25. data/test/rbbt/entity/test_identifiers.rb +0 -34
  26. data/test/rbbt/hpc/orchestrate/test_batches.rb +0 -70
  27. data/test/rbbt/hpc/orchestrate/test_chains.rb +0 -108
  28. data/test/rbbt/hpc/orchestrate/test_rules.rb +0 -59
  29. data/test/rbbt/hpc/test_batch.rb +0 -64
  30. data/test/rbbt/hpc/test_hpc_test_workflows.rb +0 -0
  31. data/test/rbbt/hpc/test_orchestrate.rb +0 -144
  32. data/test/rbbt/hpc/test_pbs.rb +0 -43
  33. data/test/rbbt/hpc/test_slurm.rb +0 -28
  34. data/test/rbbt/knowledge_base/test_enrichment.rb +0 -50
  35. data/test/rbbt/knowledge_base/test_entity.rb +0 -62
  36. data/test/rbbt/knowledge_base/test_query.rb +0 -46
  37. data/test/rbbt/knowledge_base/test_registry.rb +0 -74
  38. data/test/rbbt/knowledge_base/test_syndicate.rb +0 -48
  39. data/test/rbbt/knowledge_base/test_traverse.rb +0 -133
  40. data/test/rbbt/persist/test_tsv.rb +0 -88
  41. data/test/rbbt/persist/tsv/test_cdb.rb +0 -18
  42. data/test/rbbt/persist/tsv/test_kyotocabinet.rb +0 -27
  43. data/test/rbbt/persist/tsv/test_leveldb.rb +0 -18
  44. data/test/rbbt/persist/tsv/test_lmdb.rb +0 -20
  45. data/test/rbbt/persist/tsv/test_sharder.rb +0 -164
  46. data/test/rbbt/persist/tsv/test_tokyocabinet.rb +0 -262
  47. data/test/rbbt/resource/test_path.rb +0 -49
  48. data/test/rbbt/test_annotations.rb +0 -167
  49. data/test/rbbt/test_association.rb +0 -103
  50. data/test/rbbt/test_entity.rb +0 -252
  51. data/test/rbbt/test_fix_width_table.rb +0 -135
  52. data/test/rbbt/test_knowledge_base.rb +0 -226
  53. data/test/rbbt/test_monitor.rb +0 -11
  54. data/test/rbbt/test_packed_index.rb +0 -68
  55. data/test/rbbt/test_persist.rb +0 -85
  56. data/test/rbbt/test_resource.rb +0 -110
  57. data/test/rbbt/test_tsv.rb +0 -669
  58. data/test/rbbt/test_workflow.rb +0 -609
  59. data/test/rbbt/tsv/parallel/test_through.rb +0 -40
  60. data/test/rbbt/tsv/parallel/test_traverse.rb +0 -456
  61. data/test/rbbt/tsv/test_accessor.rb +0 -319
  62. data/test/rbbt/tsv/test_attach.rb +0 -715
  63. data/test/rbbt/tsv/test_change_id.rb +0 -61
  64. data/test/rbbt/tsv/test_csv.rb +0 -49
  65. data/test/rbbt/tsv/test_excel.rb +0 -171
  66. data/test/rbbt/tsv/test_field_index.rb +0 -19
  67. data/test/rbbt/tsv/test_filter.rb +0 -187
  68. data/test/rbbt/tsv/test_index.rb +0 -308
  69. data/test/rbbt/tsv/test_manipulate.rb +0 -334
  70. data/test/rbbt/tsv/test_marshal.rb +0 -24
  71. data/test/rbbt/tsv/test_matrix.rb +0 -20
  72. data/test/rbbt/tsv/test_parallel.rb +0 -7
  73. data/test/rbbt/tsv/test_parser.rb +0 -101
  74. data/test/rbbt/tsv/test_stream.rb +0 -253
  75. data/test/rbbt/tsv/test_util.rb +0 -52
  76. data/test/rbbt/util/R/test_eval.rb +0 -43
  77. data/test/rbbt/util/R/test_model.rb +0 -128
  78. data/test/rbbt/util/R/test_plot.rb +0 -38
  79. data/test/rbbt/util/concurrency/processes/test_socket.rb +0 -70
  80. data/test/rbbt/util/concurrency/test_processes.rb +0 -192
  81. data/test/rbbt/util/concurrency/test_threads.rb +0 -40
  82. data/test/rbbt/util/log/test_progress.rb +0 -111
  83. data/test/rbbt/util/misc/test_bgzf.rb +0 -48
  84. data/test/rbbt/util/misc/test_communication.rb +0 -13
  85. data/test/rbbt/util/misc/test_development.rb +0 -26
  86. data/test/rbbt/util/misc/test_format.rb +0 -10
  87. data/test/rbbt/util/misc/test_indiferent_hash.rb +0 -14
  88. data/test/rbbt/util/misc/test_lock.rb +0 -77
  89. data/test/rbbt/util/misc/test_multipart_payload.rb +0 -202
  90. data/test/rbbt/util/misc/test_omics.rb +0 -116
  91. data/test/rbbt/util/misc/test_pipes.rb +0 -343
  92. data/test/rbbt/util/misc/test_serialize.rb +0 -24
  93. data/test/rbbt/util/python/test_util.rb +0 -25
  94. data/test/rbbt/util/simpleopt/test_get.rb +0 -12
  95. data/test/rbbt/util/simpleopt/test_parse.rb +0 -10
  96. data/test/rbbt/util/simpleopt/test_setup.rb +0 -76
  97. data/test/rbbt/util/test_R.rb +0 -37
  98. data/test/rbbt/util/test_chain_methods.rb +0 -22
  99. data/test/rbbt/util/test_cmd.rb +0 -87
  100. data/test/rbbt/util/test_colorize.rb +0 -22
  101. data/test/rbbt/util/test_concurrency.rb +0 -6
  102. data/test/rbbt/util/test_config.rb +0 -69
  103. data/test/rbbt/util/test_excel2tsv.rb +0 -10
  104. data/test/rbbt/util/test_filecache.rb +0 -36
  105. data/test/rbbt/util/test_log.rb +0 -52
  106. data/test/rbbt/util/test_migrate.rb +0 -34
  107. data/test/rbbt/util/test_misc.rb +0 -728
  108. data/test/rbbt/util/test_open.rb +0 -200
  109. data/test/rbbt/util/test_procpath.rb +0 -23
  110. data/test/rbbt/util/test_python.rb +0 -144
  111. data/test/rbbt/util/test_semaphore.rb +0 -36
  112. data/test/rbbt/util/test_simpleDSL.rb +0 -55
  113. data/test/rbbt/util/test_simpleopt.rb +0 -11
  114. data/test/rbbt/util/test_ssh.rb +0 -10
  115. data/test/rbbt/util/test_tmpfile.rb +0 -32
  116. data/test/rbbt/workflow/step/test_dependencies.rb +0 -295
  117. data/test/rbbt/workflow/step/test_save_load_inputs.rb +0 -136
  118. data/test/rbbt/workflow/test_doc.rb +0 -30
  119. data/test/rbbt/workflow/test_remote_workflow.rb +0 -99
  120. data/test/rbbt/workflow/test_schedule.rb +0 -0
  121. data/test/rbbt/workflow/test_step.rb +0 -231
  122. data/test/rbbt/workflow/test_task.rb +0 -85
  123. data/test/rbbt/workflow/util/test_archive.rb +0 -27
  124. data/test/rbbt/workflow/util/test_data.rb +0 -67
  125. data/test/rbbt/workflow/util/test_orchestrator.rb +0 -263
  126. data/test/test_helper.rb +0 -114
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d1bbf2cefca44eecad7465a8d4dc1b88f53f18d1e63e310353061b43ad46bed9
4
- data.tar.gz: f54f659da9eca312d116430d88019f067ce5ccf519724a2a448e3b4d31dfb767
3
+ metadata.gz: a9269fd6f3ed8e0b03f575c59f50d42c45a11bc07e138f4377e5069f3050fb29
4
+ data.tar.gz: 488a57247f23d41f8c94aa64cb38cf666c99ee43c4fa14399ed77b8bb1854519
5
5
  SHA512:
6
- metadata.gz: eaad34515129ffd123064e9af920b5f83f3897c957de45e3bef04a1101d961d419863729f21665773b81f535f3fdba45d490ce41870f9db87f0e0e5d5f987233
7
- data.tar.gz: d48552f30e6434b0e27e173eb62f41a3126ecb9065cba58f2f5737f8e3aef0e2183503076a40cc6586832d90458ae5e572a55171758452f3699a4225704bc25e
6
+ metadata.gz: 3a288fcd2e11209621bc61f911097c104e75c22c759ca7ac6ca0a1f908bf1ea1711587bf776b262f082a60c0296a9004cfc8fad85de738dabd6bad0bc45f6fed
7
+ data.tar.gz: 91e775b843358c29df1d87e1d9d5db8a92c37d842fd05757cc810a5f4c77a38cfe154a0a368429abb921cf43014333361426546a43aac09eabe81f07ca4315a5
data/etc/app.d/base.rb CHANGED
@@ -19,7 +19,7 @@ end
19
19
  use Rack::Session::Cookie, :key => 'rack.session',
20
20
  :path => '/',
21
21
  :expire_after => 2592000,
22
- :secret => "#{self.to_s} secret!!"
22
+ :secret => Misc.digest("#{self.to_s} secret!!") * 4
23
23
 
24
24
  #{{{ DIRECTORIES
25
25
  global_var = Rbbt.var.sinatra
@@ -10,7 +10,7 @@ module Association
10
10
  end
11
11
 
12
12
  def self.parse_field_specification(spec)
13
- return [2,nil,nil] if Numeric === spec
13
+ return [spec,nil,nil] if Numeric === spec
14
14
  spec = spec.split "=>" unless Array === spec
15
15
  field_part, final_format = spec
16
16
 
@@ -110,23 +110,25 @@ module HPC
110
110
 
111
111
  task = Symbol === job.overriden ? job.overriden : job.task_name
112
112
 
113
- #override_deps = job.overriden_deps.collect do |dep|
114
- # name = [dep.workflow.to_s, dep.task_name] * "#"
115
- # [name, dep.path] * "="
116
- #end.uniq * ","
117
-
118
- if job.overriden?
119
- #override_deps = job.rec_dependencies.
120
- # select{|dep| Symbol === dep.overriden }.
121
-
122
- override_deps = job.overriden_deps.
123
- collect do |dep|
124
- name = [dep.workflow.to_s, dep.task_name] * "#"
125
- [name, dep.path] * "="
126
- end.uniq * ","
127
-
128
- options[:override_deps] = override_deps unless override_deps.empty?
129
- end
113
+ override_deps = job.overriden_deps.collect do |dep|
114
+ name = [dep.workflow.to_s, dep.task_name] * "#"
115
+ [name, dep.path] * "="
116
+ end.uniq * ","
117
+
118
+ options[:override_deps] = override_deps unless override_deps.empty?
119
+
120
+ #if job.overriden?
121
+ # #override_deps = job.rec_dependencies.
122
+ # # select{|dep| Symbol === dep.overriden }.
123
+ #
124
+ # override_deps = job.overriden_deps.
125
+ # collect do |dep|
126
+ # name = [dep.workflow.to_s, dep.task_name] * "#"
127
+ # [name, dep.path] * "="
128
+ # end.uniq * ","
129
+
130
+ # options[:override_deps] = override_deps unless override_deps.empty?
131
+ #end
130
132
 
131
133
  # Save inputs into inputs_dir
132
134
  inputs_dir = Misc.process_options options, :inputs_dir
@@ -214,7 +214,7 @@ module Persist
214
214
  def [](key, clean=false)
215
215
  database = database(key)
216
216
  return nil if database.nil?
217
- v = database.send(:[], key)
217
+ database.send(:[], key)
218
218
  end
219
219
 
220
220
  def <<(p)
@@ -567,13 +567,13 @@ module TSV
567
567
  end
568
568
  end
569
569
 
570
- def dumper_stream(keys = nil, no_options = false, unmerge = false)
570
+ def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
571
571
  unmerge = false unless type == :double
572
572
 
573
573
  options = self.options
574
574
  options[:type] = :list if unmerge
575
575
 
576
- TSV::Dumper.stream options do |dumper|
576
+ TSV::Dumper.stream options, filename, stream do |dumper|
577
577
  case no_options
578
578
  when FalseClass, nil
579
579
  dumper.init
@@ -631,11 +631,11 @@ module TSV
631
631
  end
632
632
  end
633
633
  end
634
+ dumper.close
634
635
  rescue Exception
635
636
  Log.exception $!
636
637
  raise $!
637
638
  end
638
- dumper.close
639
639
  end
640
640
  end
641
641
 
@@ -651,14 +651,9 @@ module TSV
651
651
  end
652
652
  end
653
653
 
654
- io = dumper_stream(keys, no_options, unmerge)
655
-
656
- str = ''
657
- while block = io.read(Misc::BLOCK_SIZE)
658
- str << block
659
- end
660
-
661
- str
654
+ io = dumper_stream(keys, no_options, unmerge, StringIO.new)
655
+ io.rewind
656
+ io.read
662
657
  end
663
658
 
664
659
  def to_unmerged_s(keys = nil, no_options = false)
@@ -1,13 +1,24 @@
1
1
  module TSV
2
2
  class Dumper
3
3
  attr_accessor :in_stream, :stream, :options, :filename, :sep
4
- def self.stream(options = {}, filename = nil, &block)
4
+ def self.stream(options = {}, filename = nil, stream = nil, &block)
5
5
  dumper = TSV::Dumper.new options, filename
6
- Thread.new(Thread.current) do |parent|
6
+ if stream
7
+ dumper.set_stream stream if stream
7
8
  yield dumper
8
- dumper.close
9
+ stream
10
+ else
11
+ thread = Thread.new(Thread.current) do |parent|
12
+ yield dumper
13
+ end
14
+ ConcurrentStream.setup(dumper.stream, threads: thread)
9
15
  end
10
- dumper.stream
16
+ end
17
+
18
+ def set_stream(stream)
19
+ @stream.close
20
+ @in_stream.close
21
+ @in_stream = @stream = stream
11
22
  end
12
23
 
13
24
  def initialize(options, filename = nil)
@@ -56,16 +67,16 @@ module TSV
56
67
 
57
68
  str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
58
69
 
59
- Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
70
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
60
71
 
61
- @in_stream.puts str
72
+ @in_stream << str
62
73
  end
63
74
 
64
75
  def add(k,v)
65
76
  @fields ||= @options[:fields]
66
77
  @sep ||= @options[:sep]
67
78
  begin
68
- Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
79
+ Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
69
80
  @in_stream << k << TSV::Dumper.values_to_s(v, @fields, @sep)
70
81
  rescue IOError
71
82
  rescue Exception
@@ -74,16 +85,16 @@ module TSV
74
85
  end
75
86
 
76
87
  def close_out
77
- @stream.close unless @stream.closed?
88
+ @stream.close unless StringIO === @stream || @stream.closed?
78
89
  end
79
90
 
80
91
  def close_in
81
92
  @in_stream.join if @in_stream.respond_to?(:join) && ! @in_stream.joined?
82
- @in_stream.close unless @in_stream.closed?
93
+ @in_stream.close if @in_stream.respond_to?(:close) && ! @in_stream.closed?
83
94
  end
84
95
 
85
96
  def close
86
- close_in
97
+ close_in unless @in_stream == @stream
87
98
  end
88
99
  end
89
100
  end
@@ -125,7 +125,7 @@ module TSV
125
125
  def self.index(file, options = {})
126
126
  persist_options = Misc.pull_keys options, :persist
127
127
  persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
128
-
128
+
129
129
  Log.debug "Static Index: #{ file } - #{Misc.fingerprint options}"
130
130
  Persist.persist_tsv nil, file, options, persist_options do |data|
131
131
  data_options = Misc.pull_keys options, :data
@@ -258,6 +258,7 @@ module TSV
258
258
  else
259
259
  file.object_id.to_s
260
260
  end
261
+
261
262
  persist_options = Misc.pull_keys options, :persist
262
263
  persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
263
264
 
@@ -1,4 +1,3 @@
1
-
2
1
  module Misc
3
2
 
4
3
  Log2Multiplier = 1.0 / Math.log(2.0)
@@ -256,8 +256,9 @@ module Misc
256
256
  ref = m[1]
257
257
  num = m[2]
258
258
  alt = m[3]
259
+ alt = "*" if alt == "Ter"
259
260
  ref = THREE_TO_ONE_AA_CODE[ref.downcase]
260
- alt = THREE_TO_ONE_AA_CODE[alt.downcase]
261
+ alt = THREE_TO_ONE_AA_CODE[alt.downcase] unless alt == "*"
261
262
  mutation = [ref, num, alt] * ""
262
263
  end
263
264
  one_aa_code = THREE_TO_ONE_AA_CODE.values
@@ -0,0 +1,298 @@
1
+ require 'rbbt'
2
+
3
+ require 'inline'
4
+
5
+ # From: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
6
+ # Citation: SSW Library: An SIMD Smith-Waterman C/C++ Library for Use in Genomic Applications
7
+ # Mengyao Zhao, Wan-Ping Lee, Gabor T. Marth
8
+ # http://arxiv.org/abs/1208.6350
9
+ module SmithWaterman
10
+
11
+ inline(:C) do |builder|
12
+ prefix =<<-EOF
13
+ #include <stdlib.h>
14
+ #include <stdio.h>
15
+ #include <stdint.h>
16
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.h"].find + '"'}
17
+ #include #{'"' + Rbbt.share.software.opt.ssw["ssw.c"].find + '"'}
18
+ EOF
19
+
20
+ prefix +=<<-'EOF'
21
+ void ssw_write (s_align* a,
22
+ char* ref_seq,
23
+ char* read_seq,
24
+ int8_t* table,
25
+ int fd) {
26
+
27
+ int max_length = 1000000;
28
+ dprintf(fd, "optimal_alignment_score: %d\tsub-optimal_alignment_score: %d\t", a->score1, a->score2);
29
+ if (a->ref_begin1 + 1) dprintf(fd, "target_begin: %d\t", a->ref_begin1 + 1);
30
+ dprintf(fd, "target_end: %d\t", a->ref_end1 + 1);
31
+ if (a->read_begin1 + 1) dprintf(fd, "query_begin: %d\t", a->read_begin1 + 1);
32
+ dprintf(fd, "query_end: %d\n\n", a->read_end1 + 1);
33
+ if (a->cigar) {
34
+ int32_t i, c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
35
+ while (e < a->cigarLen || left > 0) {
36
+ int32_t count = 0;
37
+ int32_t q = qb;
38
+ int32_t p = pb;
39
+ dprintf(fd, "Target: %8d ", q + 1);
40
+ for (c = e; c < a->cigarLen; ++c) {
41
+ int32_t letter = 0xf&*(a->cigar + c);
42
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
43
+ int32_t l = (count == 0 && left > 0) ? left: length;
44
+ for (i = 0; i < l; ++i) {
45
+ if (letter == 1) dprintf(fd, "-");
46
+ else {
47
+ dprintf(fd, "%c", *(ref_seq + q));
48
+ ++ q;
49
+ }
50
+ ++ count;
51
+ if (count == max_length) goto step2;
52
+ }
53
+ }
54
+ step2:
55
+ dprintf(fd, " %d\n ", q);
56
+ q = qb;
57
+ count = 0;
58
+ for (c = e; c < a->cigarLen; ++c) {
59
+ int32_t letter = 0xf&*(a->cigar + c);
60
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
61
+ int32_t l = (count == 0 && left > 0) ? left: length;
62
+ for (i = 0; i < l; ++i){
63
+ if (letter == 0) {
64
+ if (table[(int)*(ref_seq + q)] == table[(int)*(read_seq + p)])dprintf(fd, "|");
65
+ else dprintf(fd, "*");
66
+ ++q;
67
+ ++p;
68
+ } else {
69
+ dprintf(fd, "*");
70
+ if (letter == 1) ++p;
71
+ else ++q;
72
+ }
73
+ ++ count;
74
+ if (count == max_length) {
75
+ qb = q;
76
+ goto step3;
77
+ }
78
+ }
79
+ }
80
+ step3:
81
+ p = pb;
82
+ dprintf(fd, "\nQuery: %8d ", p + 1);
83
+ count = 0;
84
+ for (c = e; c < a->cigarLen; ++c) {
85
+ int32_t letter = 0xf&*(a->cigar + c);
86
+ int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
87
+ int32_t l = (count == 0 && left > 0) ? left: length;
88
+ for (i = 0; i < l; ++i) {
89
+ if (letter == 2) dprintf(fd, "-");
90
+ else {
91
+ dprintf(fd, "%c", *(read_seq + p));
92
+ ++p;
93
+ }
94
+ ++ count;
95
+ if (count == max_length) {
96
+ pb = p;
97
+ left = l - i - 1;
98
+ e = (left == 0) ? (c + 1) : c;
99
+ goto end;
100
+ }
101
+ }
102
+ }
103
+ e = c;
104
+ left = 0;
105
+ end:
106
+ dprintf(fd, " %d\n\n", p);
107
+ }
108
+ }
109
+ }
110
+
111
+ EOF
112
+
113
+ builder.prefix prefix
114
+
115
+
116
+ script = <<-EOF
117
+ int ssw_nt(char * read_seq, char * ref_seq){
118
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
119
+ // reference sequence
120
+ //char ref_seq[40] = {'C', 'A', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'T', 'G', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'A', 'A', 'T',
121
+ // 'C', 'A', 'A', 'A', 'A', 'T', 'A', 'G', 'G', 'C', 'A', 'C', 'A', 'A', 'C', 'A', 'A', 'A', '\0'};
122
+ //char read_seq[16] = {'C', 'T', 'G', 'A', 'G', 'C', 'C', 'G', 'G', 'T', 'A', 'A', 'A', 'T', 'C', '\0'}; // read sequence
123
+
124
+ s_profile* profile;
125
+ int8_t* num = (int8_t*)malloc(16); // the read sequence represented in numbers
126
+ int8_t* ref_num = (int8_t*)malloc(64); // the read sequence represented in numbers
127
+ s_align* result;
128
+
129
+ /* This table is used to transform nucleotide letters into numbers. */
130
+ int8_t nt_table[128] = {
131
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
132
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
133
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
134
+ 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
135
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
136
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
137
+ 4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
138
+ 4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
139
+ };
140
+
141
+ // initialize scoring matrix for genome sequences
142
+ // A C G T N (or other ambiguous code)
143
+ // 2 -2 -2 -2 0 A
144
+ // -2 2 -2 -2 0 C
145
+ // -2 -2 2 -2 0 G
146
+ // -2 -2 -2 2 0 T
147
+ // 0 0 0 0 0 N (or other ambiguous code)
148
+ int8_t* mat = (int8_t*)calloc(25, sizeof(int8_t));
149
+ for (l = k = 0; l < 4; ++l) {
150
+ for (m = 0; m < 4; ++m) mat[k++] = l == m ? match : - mismatch; /* weight_match : -weight_mismatch */
151
+ mat[k++] = 0; // ambiguous base: no penalty
152
+ }
153
+ for (m = 0; m < 5; ++m) mat[k++] = 0;
154
+
155
+ for (m = 0; m < 15; ++m) num[m] = nt_table[(int)read_seq[m]];
156
+ profile = ssw_init(num, 15, mat, 5, 2);
157
+ for (m = 0; m < 39; ++m) ref_num[m] = nt_table[(int)ref_seq[m]];
158
+
159
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
160
+ result = ssw_align (profile, ref_num, 39, gap_open, gap_extension, 1, 0, 0, 15);
161
+ //ssw_write(result, ref_seq, read_seq, nt_tablte);
162
+
163
+ free(mat);
164
+ free(ref_num);
165
+ free(num);
166
+ return(0);
167
+ }
168
+
169
+ EOF
170
+ builder.c_singleton script
171
+
172
+ script = <<-EOF
173
+ int ssw_aa(char * read_seq, char * ref_seq, int read_seq_len, int ref_seq_len, int fd){
174
+
175
+ int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
176
+
177
+ s_profile* profile;
178
+ int8_t* num = (int8_t*)malloc(read_seq_len); // the read sequence represented in numbers
179
+ int8_t* ref_num = (int8_t*)malloc(ref_seq_len); // the reference sequence represented in numbers
180
+ s_align* result;
181
+
182
+ /* This table is used to transform amino acid letters into numbers. */
183
+ int8_t aa_table[128] = {
184
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
185
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
186
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
187
+ 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
188
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
189
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
190
+ 23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
191
+ 14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
192
+ };
193
+
194
+ int8_t mat[] = {
195
+ // A R N D C Q E G H I L K M F P S T W Y V B Z X *
196
+ 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, // A
197
+ -2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, // R
198
+ -1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, // N
199
+ -2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, // D
200
+ -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, // C
201
+ -1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, // Q
202
+ -1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, // E
203
+ 0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, // G
204
+ -2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, // H
205
+ -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, // I
206
+ -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, // L
207
+ -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, // K
208
+ -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, // M
209
+ -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, // F
210
+ -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, // P
211
+ 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, // S
212
+ 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, // T
213
+ -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, // W
214
+ -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, // Y
215
+ 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, // V
216
+ -2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, // B
217
+ -1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, // Z
218
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, // X
219
+ -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 // *
220
+ };
221
+
222
+
223
+ for (m = 0; m < read_seq_len; ++m) num[m] = aa_table[(int)read_seq[m]];
224
+ profile = ssw_init(num, read_seq_len, mat, 24, 2);
225
+ for (m = 0; m < ref_seq_len; ++m) ref_num[m] = aa_table[(int)ref_seq[m]];
226
+
227
+ // Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
228
+ result = ssw_align(profile, ref_num, ref_seq_len, gap_open, gap_extension, 1, 0, 0, read_seq_len );
229
+ ssw_write(result, ref_seq, read_seq, aa_table, fd);
230
+
231
+ free(num);
232
+ free(ref_num);
233
+ return(0);
234
+ }
235
+
236
+ EOF
237
+ builder.c_singleton script
238
+
239
+ end
240
+
241
+ def self.align(query_sequence, target_sequence)
242
+ Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }
243
+
244
+ begin
245
+ raise "No query sequence" if query_sequence.nil?
246
+ raise "No target sequence" if target_sequence.nil?
247
+
248
+ s_out = Misc.open_pipe do |s_in|
249
+ SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
250
+ end
251
+
252
+ txt = s_out.read
253
+ s_out.close
254
+ s_out.join
255
+ txt
256
+
257
+ target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
258
+
259
+ query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
260
+
261
+ txt.replace ""
262
+ [("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
263
+ rescue
264
+ Log.warn("Error in aligmnent: #{$!.message}")
265
+ return ["-", "-"]
266
+ end
267
+ end
268
+
269
+ def self.alignment_map(source, target)
270
+ alignment_source, alignment_target = SmithWaterman.align(source, target)
271
+ map = {}
272
+
273
+ offset_source, alignment_source = alignment_source.match(/^(_*)(.*)/).values_at( 1, 2)
274
+ offset_target, alignment_target = alignment_target.match(/^(_*)(.*)/).values_at( 1, 2)
275
+
276
+ gaps_source = 0
277
+ gaps_target = 0
278
+ miss_match = 0
279
+ alignment_source.chars.zip(alignment_target.chars).each_with_index do |p,i|
280
+ char_source, char_target = p
281
+ gaps_source += 1 if char_source == '-'
282
+ gaps_target += 1 if char_target == '-'
283
+ source_pos = i + 1 + offset_source.length - gaps_source
284
+ target_pos = i + 1 + offset_target.length - gaps_target
285
+ if char_source != char_target or char_source == "-"
286
+ miss_match += 1
287
+ else
288
+ map[source_pos] = target_pos
289
+ end
290
+ end
291
+
292
+ if miss_match + gaps_source > alignment_source.length.to_f / 2
293
+ {}
294
+ else
295
+ map
296
+ end
297
+ end
298
+ end
@@ -729,7 +729,7 @@ module Open
729
729
  dir_sub_path.push content
730
730
  save_content_in_repo(*dir_sub_path)
731
731
  else
732
- FileUtils.mkdir_p File.dirname(file)
732
+ FileUtils.mkdir_p File.dirname(file) unless File.directory?(file)
733
733
  case
734
734
  when block_given?
735
735
  begin
@@ -35,8 +35,8 @@ class Step
35
35
  rescue Exception
36
36
  Log.debug{"Error loading info file: " + info_file}
37
37
  Log.exception $!
38
- Open.rm info_file
39
- Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
38
+ #Open.rm info_file
39
+ #Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
40
40
  raise $!
41
41
  end
42
42
  end
@@ -62,6 +62,7 @@ class Step
62
62
  def load_dependencies_from_info
63
63
  relocated = nil
64
64
  @dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
65
+ dep_path = task if dep_path.nil?
65
66
  if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
66
67
  Workflow._load_step dep_path
67
68
  else
@@ -303,7 +303,8 @@ class Step
303
303
 
304
304
  if result_description
305
305
  entity_info = info.dup
306
- entity_info.merge! info[:inputs] if info[:inputs]
306
+ # Also load entity_info in rbbt6
307
+ entity_info.merge! info[:inputs] if Hash === info[:inputs]
307
308
  res = prepare_result res, result_description, entity_info
308
309
  end
309
310
 
@@ -67,10 +67,10 @@ def tsv_preamble(line, comment_char="#"):
67
67
  return header
68
68
 
69
69
 
70
- def tsv_header(filename, sep="\t", comment_char="#"):
70
+ def tsv_header(filename, sep="\t", comment_char="#", encoding='utf8'):
71
71
  import re
72
72
 
73
- f = open(filename)
73
+ f = open(filename, encoding=encoding)
74
74
  line = f.readline().strip()
75
75
 
76
76
  if (not line.startswith(comment_char)):
@@ -134,7 +134,12 @@ TmpFile.with_file do |app_dir|
134
134
  options.each do |k,v| fixed_options[k.to_sym] = v end
135
135
  options = fixed_options
136
136
 
137
- Rack::Server.start(options)
137
+ begin
138
+ Rack::Server.start(options)
139
+ rescue LoadError
140
+ require 'rackup'
141
+ Rackup::Server.start(options)
142
+ end
138
143
  end
139
144
  end
140
145
  end
@@ -484,7 +484,7 @@ begin
484
484
 
485
485
  if server = options.delete(:relay)
486
486
  require 'rbbt/workflow/remote_workflow'
487
- relay_dependencies = options.delete(:relay_dependencies).split(",")
487
+ relay_dependencies = options.delete(:relay_dependencies).split(",") if options[:relay_dependencies]
488
488
  produce_dependencies_for_relay = options.delete(:produce_dependencies_for_relay)
489
489
 
490
490
  jobs_to_relay = relay_dependencies ? match_dependencies(relay_dependencies, job.rec_dependencies) : [job]