rbbt-util 5.43.0 → 5.44.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/etc/app.d/base.rb +1 -1
- data/lib/rbbt/association/util.rb +1 -1
- data/lib/rbbt/hpc/batch.rb +19 -17
- data/lib/rbbt/persist/tsv/sharder.rb +1 -1
- data/lib/rbbt/tsv/accessor.rb +6 -11
- data/lib/rbbt/tsv/dumper.rb +21 -10
- data/lib/rbbt/tsv/index.rb +2 -1
- data/lib/rbbt/util/misc/math.rb +0 -1
- data/lib/rbbt/util/misc/omics.rb +2 -1
- data/lib/rbbt/util/misc/ssw.rb +298 -0
- data/lib/rbbt/util/open.rb +1 -1
- data/lib/rbbt/workflow/step/info.rb +3 -2
- data/lib/rbbt/workflow/step.rb +2 -1
- data/python/rbbt/__init__.py +2 -2
- data/share/rbbt_commands/workflow/server +6 -1
- data/share/rbbt_commands/workflow/task +1 -1
- metadata +5 -220
- data/test/rbbt/annotations/test_util.rb +0 -43
- data/test/rbbt/association/test_database.rb +0 -87
- data/test/rbbt/association/test_index.rb +0 -127
- data/test/rbbt/association/test_item.rb +0 -15
- data/test/rbbt/association/test_open.rb +0 -63
- data/test/rbbt/association/test_util.rb +0 -108
- data/test/rbbt/entity/test_identifiers.rb +0 -34
- data/test/rbbt/hpc/orchestrate/test_batches.rb +0 -70
- data/test/rbbt/hpc/orchestrate/test_chains.rb +0 -108
- data/test/rbbt/hpc/orchestrate/test_rules.rb +0 -59
- data/test/rbbt/hpc/test_batch.rb +0 -64
- data/test/rbbt/hpc/test_hpc_test_workflows.rb +0 -0
- data/test/rbbt/hpc/test_orchestrate.rb +0 -144
- data/test/rbbt/hpc/test_pbs.rb +0 -43
- data/test/rbbt/hpc/test_slurm.rb +0 -28
- data/test/rbbt/knowledge_base/test_enrichment.rb +0 -50
- data/test/rbbt/knowledge_base/test_entity.rb +0 -62
- data/test/rbbt/knowledge_base/test_query.rb +0 -46
- data/test/rbbt/knowledge_base/test_registry.rb +0 -74
- data/test/rbbt/knowledge_base/test_syndicate.rb +0 -48
- data/test/rbbt/knowledge_base/test_traverse.rb +0 -133
- data/test/rbbt/persist/test_tsv.rb +0 -88
- data/test/rbbt/persist/tsv/test_cdb.rb +0 -18
- data/test/rbbt/persist/tsv/test_kyotocabinet.rb +0 -27
- data/test/rbbt/persist/tsv/test_leveldb.rb +0 -18
- data/test/rbbt/persist/tsv/test_lmdb.rb +0 -20
- data/test/rbbt/persist/tsv/test_sharder.rb +0 -164
- data/test/rbbt/persist/tsv/test_tokyocabinet.rb +0 -262
- data/test/rbbt/resource/test_path.rb +0 -49
- data/test/rbbt/test_annotations.rb +0 -167
- data/test/rbbt/test_association.rb +0 -103
- data/test/rbbt/test_entity.rb +0 -252
- data/test/rbbt/test_fix_width_table.rb +0 -135
- data/test/rbbt/test_knowledge_base.rb +0 -226
- data/test/rbbt/test_monitor.rb +0 -11
- data/test/rbbt/test_packed_index.rb +0 -68
- data/test/rbbt/test_persist.rb +0 -85
- data/test/rbbt/test_resource.rb +0 -110
- data/test/rbbt/test_tsv.rb +0 -669
- data/test/rbbt/test_workflow.rb +0 -609
- data/test/rbbt/tsv/parallel/test_through.rb +0 -40
- data/test/rbbt/tsv/parallel/test_traverse.rb +0 -456
- data/test/rbbt/tsv/test_accessor.rb +0 -319
- data/test/rbbt/tsv/test_attach.rb +0 -715
- data/test/rbbt/tsv/test_change_id.rb +0 -61
- data/test/rbbt/tsv/test_csv.rb +0 -49
- data/test/rbbt/tsv/test_excel.rb +0 -171
- data/test/rbbt/tsv/test_field_index.rb +0 -19
- data/test/rbbt/tsv/test_filter.rb +0 -187
- data/test/rbbt/tsv/test_index.rb +0 -308
- data/test/rbbt/tsv/test_manipulate.rb +0 -334
- data/test/rbbt/tsv/test_marshal.rb +0 -24
- data/test/rbbt/tsv/test_matrix.rb +0 -20
- data/test/rbbt/tsv/test_parallel.rb +0 -7
- data/test/rbbt/tsv/test_parser.rb +0 -101
- data/test/rbbt/tsv/test_stream.rb +0 -253
- data/test/rbbt/tsv/test_util.rb +0 -52
- data/test/rbbt/util/R/test_eval.rb +0 -43
- data/test/rbbt/util/R/test_model.rb +0 -128
- data/test/rbbt/util/R/test_plot.rb +0 -38
- data/test/rbbt/util/concurrency/processes/test_socket.rb +0 -70
- data/test/rbbt/util/concurrency/test_processes.rb +0 -192
- data/test/rbbt/util/concurrency/test_threads.rb +0 -40
- data/test/rbbt/util/log/test_progress.rb +0 -111
- data/test/rbbt/util/misc/test_bgzf.rb +0 -48
- data/test/rbbt/util/misc/test_communication.rb +0 -13
- data/test/rbbt/util/misc/test_development.rb +0 -26
- data/test/rbbt/util/misc/test_format.rb +0 -10
- data/test/rbbt/util/misc/test_indiferent_hash.rb +0 -14
- data/test/rbbt/util/misc/test_lock.rb +0 -77
- data/test/rbbt/util/misc/test_multipart_payload.rb +0 -202
- data/test/rbbt/util/misc/test_omics.rb +0 -116
- data/test/rbbt/util/misc/test_pipes.rb +0 -343
- data/test/rbbt/util/misc/test_serialize.rb +0 -24
- data/test/rbbt/util/python/test_util.rb +0 -25
- data/test/rbbt/util/simpleopt/test_get.rb +0 -12
- data/test/rbbt/util/simpleopt/test_parse.rb +0 -10
- data/test/rbbt/util/simpleopt/test_setup.rb +0 -76
- data/test/rbbt/util/test_R.rb +0 -37
- data/test/rbbt/util/test_chain_methods.rb +0 -22
- data/test/rbbt/util/test_cmd.rb +0 -87
- data/test/rbbt/util/test_colorize.rb +0 -22
- data/test/rbbt/util/test_concurrency.rb +0 -6
- data/test/rbbt/util/test_config.rb +0 -69
- data/test/rbbt/util/test_excel2tsv.rb +0 -10
- data/test/rbbt/util/test_filecache.rb +0 -36
- data/test/rbbt/util/test_log.rb +0 -52
- data/test/rbbt/util/test_migrate.rb +0 -34
- data/test/rbbt/util/test_misc.rb +0 -728
- data/test/rbbt/util/test_open.rb +0 -200
- data/test/rbbt/util/test_procpath.rb +0 -23
- data/test/rbbt/util/test_python.rb +0 -144
- data/test/rbbt/util/test_semaphore.rb +0 -36
- data/test/rbbt/util/test_simpleDSL.rb +0 -55
- data/test/rbbt/util/test_simpleopt.rb +0 -11
- data/test/rbbt/util/test_ssh.rb +0 -10
- data/test/rbbt/util/test_tmpfile.rb +0 -32
- data/test/rbbt/workflow/step/test_dependencies.rb +0 -295
- data/test/rbbt/workflow/step/test_save_load_inputs.rb +0 -136
- data/test/rbbt/workflow/test_doc.rb +0 -30
- data/test/rbbt/workflow/test_remote_workflow.rb +0 -99
- data/test/rbbt/workflow/test_schedule.rb +0 -0
- data/test/rbbt/workflow/test_step.rb +0 -231
- data/test/rbbt/workflow/test_task.rb +0 -85
- data/test/rbbt/workflow/util/test_archive.rb +0 -27
- data/test/rbbt/workflow/util/test_data.rb +0 -67
- data/test/rbbt/workflow/util/test_orchestrator.rb +0 -263
- data/test/test_helper.rb +0 -114
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9269fd6f3ed8e0b03f575c59f50d42c45a11bc07e138f4377e5069f3050fb29
|
4
|
+
data.tar.gz: 488a57247f23d41f8c94aa64cb38cf666c99ee43c4fa14399ed77b8bb1854519
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3a288fcd2e11209621bc61f911097c104e75c22c759ca7ac6ca0a1f908bf1ea1711587bf776b262f082a60c0296a9004cfc8fad85de738dabd6bad0bc45f6fed
|
7
|
+
data.tar.gz: 91e775b843358c29df1d87e1d9d5db8a92c37d842fd05757cc810a5f4c77a38cfe154a0a368429abb921cf43014333361426546a43aac09eabe81f07ca4315a5
|
data/etc/app.d/base.rb
CHANGED
data/lib/rbbt/hpc/batch.rb
CHANGED
@@ -110,23 +110,25 @@ module HPC
|
|
110
110
|
|
111
111
|
task = Symbol === job.overriden ? job.overriden : job.task_name
|
112
112
|
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
113
|
+
override_deps = job.overriden_deps.collect do |dep|
|
114
|
+
name = [dep.workflow.to_s, dep.task_name] * "#"
|
115
|
+
[name, dep.path] * "="
|
116
|
+
end.uniq * ","
|
117
|
+
|
118
|
+
options[:override_deps] = override_deps unless override_deps.empty?
|
119
|
+
|
120
|
+
#if job.overriden?
|
121
|
+
# #override_deps = job.rec_dependencies.
|
122
|
+
# # select{|dep| Symbol === dep.overriden }.
|
123
|
+
#
|
124
|
+
# override_deps = job.overriden_deps.
|
125
|
+
# collect do |dep|
|
126
|
+
# name = [dep.workflow.to_s, dep.task_name] * "#"
|
127
|
+
# [name, dep.path] * "="
|
128
|
+
# end.uniq * ","
|
129
|
+
|
130
|
+
# options[:override_deps] = override_deps unless override_deps.empty?
|
131
|
+
#end
|
130
132
|
|
131
133
|
# Save inputs into inputs_dir
|
132
134
|
inputs_dir = Misc.process_options options, :inputs_dir
|
data/lib/rbbt/tsv/accessor.rb
CHANGED
@@ -567,13 +567,13 @@ module TSV
|
|
567
567
|
end
|
568
568
|
end
|
569
569
|
|
570
|
-
def dumper_stream(keys = nil, no_options = false, unmerge = false)
|
570
|
+
def dumper_stream(keys = nil, no_options = false, unmerge = false, stream = nil)
|
571
571
|
unmerge = false unless type == :double
|
572
572
|
|
573
573
|
options = self.options
|
574
574
|
options[:type] = :list if unmerge
|
575
575
|
|
576
|
-
TSV::Dumper.stream options do |dumper|
|
576
|
+
TSV::Dumper.stream options, filename, stream do |dumper|
|
577
577
|
case no_options
|
578
578
|
when FalseClass, nil
|
579
579
|
dumper.init
|
@@ -631,11 +631,11 @@ module TSV
|
|
631
631
|
end
|
632
632
|
end
|
633
633
|
end
|
634
|
+
dumper.close
|
634
635
|
rescue Exception
|
635
636
|
Log.exception $!
|
636
637
|
raise $!
|
637
638
|
end
|
638
|
-
dumper.close
|
639
639
|
end
|
640
640
|
end
|
641
641
|
|
@@ -651,14 +651,9 @@ module TSV
|
|
651
651
|
end
|
652
652
|
end
|
653
653
|
|
654
|
-
io = dumper_stream(keys, no_options, unmerge)
|
655
|
-
|
656
|
-
|
657
|
-
while block = io.read(Misc::BLOCK_SIZE)
|
658
|
-
str << block
|
659
|
-
end
|
660
|
-
|
661
|
-
str
|
654
|
+
io = dumper_stream(keys, no_options, unmerge, StringIO.new)
|
655
|
+
io.rewind
|
656
|
+
io.read
|
662
657
|
end
|
663
658
|
|
664
659
|
def to_unmerged_s(keys = nil, no_options = false)
|
data/lib/rbbt/tsv/dumper.rb
CHANGED
@@ -1,13 +1,24 @@
|
|
1
1
|
module TSV
|
2
2
|
class Dumper
|
3
3
|
attr_accessor :in_stream, :stream, :options, :filename, :sep
|
4
|
-
def self.stream(options = {}, filename = nil, &block)
|
4
|
+
def self.stream(options = {}, filename = nil, stream = nil, &block)
|
5
5
|
dumper = TSV::Dumper.new options, filename
|
6
|
-
|
6
|
+
if stream
|
7
|
+
dumper.set_stream stream if stream
|
7
8
|
yield dumper
|
8
|
-
|
9
|
+
stream
|
10
|
+
else
|
11
|
+
thread = Thread.new(Thread.current) do |parent|
|
12
|
+
yield dumper
|
13
|
+
end
|
14
|
+
ConcurrentStream.setup(dumper.stream, threads: thread)
|
9
15
|
end
|
10
|
-
|
16
|
+
end
|
17
|
+
|
18
|
+
def set_stream(stream)
|
19
|
+
@stream.close
|
20
|
+
@in_stream.close
|
21
|
+
@in_stream = @stream = stream
|
11
22
|
end
|
12
23
|
|
13
24
|
def initialize(options, filename = nil)
|
@@ -56,16 +67,16 @@ module TSV
|
|
56
67
|
|
57
68
|
str = TSV.header_lines(key_field, fields, options.merge(init_options || {}))
|
58
69
|
|
59
|
-
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
70
|
+
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
|
60
71
|
|
61
|
-
@in_stream
|
72
|
+
@in_stream << str
|
62
73
|
end
|
63
74
|
|
64
75
|
def add(k,v)
|
65
76
|
@fields ||= @options[:fields]
|
66
77
|
@sep ||= @options[:sep]
|
67
78
|
begin
|
68
|
-
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil?
|
79
|
+
Thread.pass while IO.select(nil, [@in_stream],nil,1).nil? if IO === @in_stream
|
69
80
|
@in_stream << k << TSV::Dumper.values_to_s(v, @fields, @sep)
|
70
81
|
rescue IOError
|
71
82
|
rescue Exception
|
@@ -74,16 +85,16 @@ module TSV
|
|
74
85
|
end
|
75
86
|
|
76
87
|
def close_out
|
77
|
-
@stream.close unless @stream.closed?
|
88
|
+
@stream.close unless StringIO === @stream || @stream.closed?
|
78
89
|
end
|
79
90
|
|
80
91
|
def close_in
|
81
92
|
@in_stream.join if @in_stream.respond_to?(:join) && ! @in_stream.joined?
|
82
|
-
@in_stream.close
|
93
|
+
@in_stream.close if @in_stream.respond_to?(:close) && ! @in_stream.closed?
|
83
94
|
end
|
84
95
|
|
85
96
|
def close
|
86
|
-
close_in
|
97
|
+
close_in unless @in_stream == @stream
|
87
98
|
end
|
88
99
|
end
|
89
100
|
end
|
data/lib/rbbt/tsv/index.rb
CHANGED
@@ -125,7 +125,7 @@ module TSV
|
|
125
125
|
def self.index(file, options = {})
|
126
126
|
persist_options = Misc.pull_keys options, :persist
|
127
127
|
persist_options[:prefix] ||= "StaticIndex[#{options[:target] || :key}]"
|
128
|
-
|
128
|
+
|
129
129
|
Log.debug "Static Index: #{ file } - #{Misc.fingerprint options}"
|
130
130
|
Persist.persist_tsv nil, file, options, persist_options do |data|
|
131
131
|
data_options = Misc.pull_keys options, :data
|
@@ -258,6 +258,7 @@ module TSV
|
|
258
258
|
else
|
259
259
|
file.object_id.to_s
|
260
260
|
end
|
261
|
+
|
261
262
|
persist_options = Misc.pull_keys options, :persist
|
262
263
|
persist_options[:prefix] ||= "StaticRangeIndex[#{start_field}-#{end_field}]"
|
263
264
|
|
data/lib/rbbt/util/misc/math.rb
CHANGED
data/lib/rbbt/util/misc/omics.rb
CHANGED
@@ -256,8 +256,9 @@ module Misc
|
|
256
256
|
ref = m[1]
|
257
257
|
num = m[2]
|
258
258
|
alt = m[3]
|
259
|
+
alt = "*" if alt == "Ter"
|
259
260
|
ref = THREE_TO_ONE_AA_CODE[ref.downcase]
|
260
|
-
alt = THREE_TO_ONE_AA_CODE[alt.downcase]
|
261
|
+
alt = THREE_TO_ONE_AA_CODE[alt.downcase] unless alt == "*"
|
261
262
|
mutation = [ref, num, alt] * ""
|
262
263
|
end
|
263
264
|
one_aa_code = THREE_TO_ONE_AA_CODE.values
|
@@ -0,0 +1,298 @@
|
|
1
|
+
require 'rbbt'
|
2
|
+
|
3
|
+
require 'inline'
|
4
|
+
|
5
|
+
# From: https://github.com/mengyao/Complete-Striped-Smith-Waterman-Library
|
6
|
+
# Citation: SSW Library: An SIMD Smith-Waterman C/C++ Library for Use in Genomic Applications
|
7
|
+
# Mengyao Zhao, Wan-Ping Lee, Gabor T. Marth
|
8
|
+
# http://arxiv.org/abs/1208.6350
|
9
|
+
module SmithWaterman
|
10
|
+
|
11
|
+
inline(:C) do |builder|
|
12
|
+
prefix =<<-EOF
|
13
|
+
#include <stdlib.h>
|
14
|
+
#include <stdio.h>
|
15
|
+
#include <stdint.h>
|
16
|
+
#include #{'"' + Rbbt.share.software.opt.ssw["ssw.h"].find + '"'}
|
17
|
+
#include #{'"' + Rbbt.share.software.opt.ssw["ssw.c"].find + '"'}
|
18
|
+
EOF
|
19
|
+
|
20
|
+
prefix +=<<-'EOF'
|
21
|
+
void ssw_write (s_align* a,
|
22
|
+
char* ref_seq,
|
23
|
+
char* read_seq,
|
24
|
+
int8_t* table,
|
25
|
+
int fd) {
|
26
|
+
|
27
|
+
int max_length = 1000000;
|
28
|
+
dprintf(fd, "optimal_alignment_score: %d\tsub-optimal_alignment_score: %d\t", a->score1, a->score2);
|
29
|
+
if (a->ref_begin1 + 1) dprintf(fd, "target_begin: %d\t", a->ref_begin1 + 1);
|
30
|
+
dprintf(fd, "target_end: %d\t", a->ref_end1 + 1);
|
31
|
+
if (a->read_begin1 + 1) dprintf(fd, "query_begin: %d\t", a->read_begin1 + 1);
|
32
|
+
dprintf(fd, "query_end: %d\n\n", a->read_end1 + 1);
|
33
|
+
if (a->cigar) {
|
34
|
+
int32_t i, c = 0, left = 0, e = 0, qb = a->ref_begin1, pb = a->read_begin1;
|
35
|
+
while (e < a->cigarLen || left > 0) {
|
36
|
+
int32_t count = 0;
|
37
|
+
int32_t q = qb;
|
38
|
+
int32_t p = pb;
|
39
|
+
dprintf(fd, "Target: %8d ", q + 1);
|
40
|
+
for (c = e; c < a->cigarLen; ++c) {
|
41
|
+
int32_t letter = 0xf&*(a->cigar + c);
|
42
|
+
int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
|
43
|
+
int32_t l = (count == 0 && left > 0) ? left: length;
|
44
|
+
for (i = 0; i < l; ++i) {
|
45
|
+
if (letter == 1) dprintf(fd, "-");
|
46
|
+
else {
|
47
|
+
dprintf(fd, "%c", *(ref_seq + q));
|
48
|
+
++ q;
|
49
|
+
}
|
50
|
+
++ count;
|
51
|
+
if (count == max_length) goto step2;
|
52
|
+
}
|
53
|
+
}
|
54
|
+
step2:
|
55
|
+
dprintf(fd, " %d\n ", q);
|
56
|
+
q = qb;
|
57
|
+
count = 0;
|
58
|
+
for (c = e; c < a->cigarLen; ++c) {
|
59
|
+
int32_t letter = 0xf&*(a->cigar + c);
|
60
|
+
int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
|
61
|
+
int32_t l = (count == 0 && left > 0) ? left: length;
|
62
|
+
for (i = 0; i < l; ++i){
|
63
|
+
if (letter == 0) {
|
64
|
+
if (table[(int)*(ref_seq + q)] == table[(int)*(read_seq + p)])dprintf(fd, "|");
|
65
|
+
else dprintf(fd, "*");
|
66
|
+
++q;
|
67
|
+
++p;
|
68
|
+
} else {
|
69
|
+
dprintf(fd, "*");
|
70
|
+
if (letter == 1) ++p;
|
71
|
+
else ++q;
|
72
|
+
}
|
73
|
+
++ count;
|
74
|
+
if (count == max_length) {
|
75
|
+
qb = q;
|
76
|
+
goto step3;
|
77
|
+
}
|
78
|
+
}
|
79
|
+
}
|
80
|
+
step3:
|
81
|
+
p = pb;
|
82
|
+
dprintf(fd, "\nQuery: %8d ", p + 1);
|
83
|
+
count = 0;
|
84
|
+
for (c = e; c < a->cigarLen; ++c) {
|
85
|
+
int32_t letter = 0xf&*(a->cigar + c);
|
86
|
+
int32_t length = (0xfffffff0&*(a->cigar + c))>>4;
|
87
|
+
int32_t l = (count == 0 && left > 0) ? left: length;
|
88
|
+
for (i = 0; i < l; ++i) {
|
89
|
+
if (letter == 2) dprintf(fd, "-");
|
90
|
+
else {
|
91
|
+
dprintf(fd, "%c", *(read_seq + p));
|
92
|
+
++p;
|
93
|
+
}
|
94
|
+
++ count;
|
95
|
+
if (count == max_length) {
|
96
|
+
pb = p;
|
97
|
+
left = l - i - 1;
|
98
|
+
e = (left == 0) ? (c + 1) : c;
|
99
|
+
goto end;
|
100
|
+
}
|
101
|
+
}
|
102
|
+
}
|
103
|
+
e = c;
|
104
|
+
left = 0;
|
105
|
+
end:
|
106
|
+
dprintf(fd, " %d\n\n", p);
|
107
|
+
}
|
108
|
+
}
|
109
|
+
}
|
110
|
+
|
111
|
+
EOF
|
112
|
+
|
113
|
+
builder.prefix prefix
|
114
|
+
|
115
|
+
|
116
|
+
script = <<-EOF
|
117
|
+
int ssw_nt(char * read_seq, char * ref_seq){
|
118
|
+
int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
|
119
|
+
// reference sequence
|
120
|
+
//char ref_seq[40] = {'C', 'A', 'G', 'C', 'C', 'T', 'T', 'T', 'C', 'T', 'G', 'A', 'C', 'C', 'C', 'G', 'G', 'A', 'A', 'A', 'T',
|
121
|
+
// 'C', 'A', 'A', 'A', 'A', 'T', 'A', 'G', 'G', 'C', 'A', 'C', 'A', 'A', 'C', 'A', 'A', 'A', '\0'};
|
122
|
+
//char read_seq[16] = {'C', 'T', 'G', 'A', 'G', 'C', 'C', 'G', 'G', 'T', 'A', 'A', 'A', 'T', 'C', '\0'}; // read sequence
|
123
|
+
|
124
|
+
s_profile* profile;
|
125
|
+
int8_t* num = (int8_t*)malloc(16); // the read sequence represented in numbers
|
126
|
+
int8_t* ref_num = (int8_t*)malloc(64); // the read sequence represented in numbers
|
127
|
+
s_align* result;
|
128
|
+
|
129
|
+
/* This table is used to transform nucleotide letters into numbers. */
|
130
|
+
int8_t nt_table[128] = {
|
131
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
132
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
133
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
134
|
+
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
135
|
+
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
136
|
+
4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
|
137
|
+
4, 0, 4, 1, 4, 4, 4, 2, 4, 4, 4, 4, 4, 4, 4, 4,
|
138
|
+
4, 4, 4, 4, 3, 0, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
|
139
|
+
};
|
140
|
+
|
141
|
+
// initialize scoring matrix for genome sequences
|
142
|
+
// A C G T N (or other ambiguous code)
|
143
|
+
// 2 -2 -2 -2 0 A
|
144
|
+
// -2 2 -2 -2 0 C
|
145
|
+
// -2 -2 2 -2 0 G
|
146
|
+
// -2 -2 -2 2 0 T
|
147
|
+
// 0 0 0 0 0 N (or other ambiguous code)
|
148
|
+
int8_t* mat = (int8_t*)calloc(25, sizeof(int8_t));
|
149
|
+
for (l = k = 0; l < 4; ++l) {
|
150
|
+
for (m = 0; m < 4; ++m) mat[k++] = l == m ? match : - mismatch; /* weight_match : -weight_mismatch */
|
151
|
+
mat[k++] = 0; // ambiguous base: no penalty
|
152
|
+
}
|
153
|
+
for (m = 0; m < 5; ++m) mat[k++] = 0;
|
154
|
+
|
155
|
+
for (m = 0; m < 15; ++m) num[m] = nt_table[(int)read_seq[m]];
|
156
|
+
profile = ssw_init(num, 15, mat, 5, 2);
|
157
|
+
for (m = 0; m < 39; ++m) ref_num[m] = nt_table[(int)ref_seq[m]];
|
158
|
+
|
159
|
+
// Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
|
160
|
+
result = ssw_align (profile, ref_num, 39, gap_open, gap_extension, 1, 0, 0, 15);
|
161
|
+
//ssw_write(result, ref_seq, read_seq, nt_tablte);
|
162
|
+
|
163
|
+
free(mat);
|
164
|
+
free(ref_num);
|
165
|
+
free(num);
|
166
|
+
return(0);
|
167
|
+
}
|
168
|
+
|
169
|
+
EOF
|
170
|
+
builder.c_singleton script
|
171
|
+
|
172
|
+
script = <<-EOF
|
173
|
+
int ssw_aa(char * read_seq, char * ref_seq, int read_seq_len, int ref_seq_len, int fd){
|
174
|
+
|
175
|
+
int32_t l, m, k, match = 2, mismatch = 2, gap_open = 3, gap_extension = 1; // default parameters for genome sequence alignment
|
176
|
+
|
177
|
+
s_profile* profile;
|
178
|
+
int8_t* num = (int8_t*)malloc(read_seq_len); // the read sequence represented in numbers
|
179
|
+
int8_t* ref_num = (int8_t*)malloc(ref_seq_len); // the reference sequence represented in numbers
|
180
|
+
s_align* result;
|
181
|
+
|
182
|
+
/* This table is used to transform amino acid letters into numbers. */
|
183
|
+
int8_t aa_table[128] = {
|
184
|
+
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
185
|
+
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
186
|
+
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
187
|
+
23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23, 23,
|
188
|
+
23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
|
189
|
+
14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23,
|
190
|
+
23, 0, 20, 4, 3, 6, 13, 7, 8, 9, 23, 11, 10, 12, 2, 23,
|
191
|
+
14, 5, 1, 15, 16, 23, 19, 17, 22, 18, 21, 23, 23, 23, 23, 23
|
192
|
+
};
|
193
|
+
|
194
|
+
int8_t mat[] = {
|
195
|
+
// A R N D C Q E G H I L K M F P S T W Y V B Z X *
|
196
|
+
5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, // A
|
197
|
+
-2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, // R
|
198
|
+
-1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, // N
|
199
|
+
-2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, // D
|
200
|
+
-1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, // C
|
201
|
+
-1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, // Q
|
202
|
+
-1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, // E
|
203
|
+
0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, // G
|
204
|
+
-2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, // H
|
205
|
+
-1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, // I
|
206
|
+
-2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, // L
|
207
|
+
-1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, // K
|
208
|
+
-1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, // M
|
209
|
+
-3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, // F
|
210
|
+
-1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, // P
|
211
|
+
1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, // S
|
212
|
+
0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, // T
|
213
|
+
-3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, // W
|
214
|
+
-2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, // Y
|
215
|
+
0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, // V
|
216
|
+
-2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, // B
|
217
|
+
-1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, // Z
|
218
|
+
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, // X
|
219
|
+
-5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 // *
|
220
|
+
};
|
221
|
+
|
222
|
+
|
223
|
+
for (m = 0; m < read_seq_len; ++m) num[m] = aa_table[(int)read_seq[m]];
|
224
|
+
profile = ssw_init(num, read_seq_len, mat, 24, 2);
|
225
|
+
for (m = 0; m < ref_seq_len; ++m) ref_num[m] = aa_table[(int)ref_seq[m]];
|
226
|
+
|
227
|
+
// Only the 8 bit of the flag is setted. ssw_align will always return the best alignment beginning position and cigar.
|
228
|
+
result = ssw_align(profile, ref_num, ref_seq_len, gap_open, gap_extension, 1, 0, 0, read_seq_len );
|
229
|
+
ssw_write(result, ref_seq, read_seq, aa_table, fd);
|
230
|
+
|
231
|
+
free(num);
|
232
|
+
free(ref_num);
|
233
|
+
return(0);
|
234
|
+
}
|
235
|
+
|
236
|
+
EOF
|
237
|
+
builder.c_singleton script
|
238
|
+
|
239
|
+
end
|
240
|
+
|
241
|
+
def self.align(query_sequence, target_sequence)
|
242
|
+
Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }
|
243
|
+
|
244
|
+
begin
|
245
|
+
raise "No query sequence" if query_sequence.nil?
|
246
|
+
raise "No target sequence" if target_sequence.nil?
|
247
|
+
|
248
|
+
s_out = Misc.open_pipe do |s_in|
|
249
|
+
SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
|
250
|
+
end
|
251
|
+
|
252
|
+
txt = s_out.read
|
253
|
+
s_out.close
|
254
|
+
s_out.join
|
255
|
+
txt
|
256
|
+
|
257
|
+
target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
|
258
|
+
|
259
|
+
query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3
|
260
|
+
|
261
|
+
txt.replace ""
|
262
|
+
[("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
|
263
|
+
rescue
|
264
|
+
Log.warn("Error in aligmnent: #{$!.message}")
|
265
|
+
return ["-", "-"]
|
266
|
+
end
|
267
|
+
end
|
268
|
+
|
269
|
+
def self.alignment_map(source, target)
|
270
|
+
alignment_source, alignment_target = SmithWaterman.align(source, target)
|
271
|
+
map = {}
|
272
|
+
|
273
|
+
offset_source, alignment_source = alignment_source.match(/^(_*)(.*)/).values_at( 1, 2)
|
274
|
+
offset_target, alignment_target = alignment_target.match(/^(_*)(.*)/).values_at( 1, 2)
|
275
|
+
|
276
|
+
gaps_source = 0
|
277
|
+
gaps_target = 0
|
278
|
+
miss_match = 0
|
279
|
+
alignment_source.chars.zip(alignment_target.chars).each_with_index do |p,i|
|
280
|
+
char_source, char_target = p
|
281
|
+
gaps_source += 1 if char_source == '-'
|
282
|
+
gaps_target += 1 if char_target == '-'
|
283
|
+
source_pos = i + 1 + offset_source.length - gaps_source
|
284
|
+
target_pos = i + 1 + offset_target.length - gaps_target
|
285
|
+
if char_source != char_target or char_source == "-"
|
286
|
+
miss_match += 1
|
287
|
+
else
|
288
|
+
map[source_pos] = target_pos
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
if miss_match + gaps_source > alignment_source.length.to_f / 2
|
293
|
+
{}
|
294
|
+
else
|
295
|
+
map
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
data/lib/rbbt/util/open.rb
CHANGED
@@ -35,8 +35,8 @@ class Step
|
|
35
35
|
rescue Exception
|
36
36
|
Log.debug{"Error loading info file: " + info_file}
|
37
37
|
Log.exception $!
|
38
|
-
Open.rm info_file
|
39
|
-
Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
|
38
|
+
#Open.rm info_file
|
39
|
+
#Misc.sensiblewrite(info_file, Step.serialize_info({:status => :error, :messages => ["Info file lost"]}))
|
40
40
|
raise $!
|
41
41
|
end
|
42
42
|
end
|
@@ -62,6 +62,7 @@ class Step
|
|
62
62
|
def load_dependencies_from_info
|
63
63
|
relocated = nil
|
64
64
|
@dependencies = (self.info[:dependencies] || []).collect do |task,name,dep_path|
|
65
|
+
dep_path = task if dep_path.nil?
|
65
66
|
if Open.exists?(dep_path) || Open.exists?(dep_path + '.info')
|
66
67
|
Workflow._load_step dep_path
|
67
68
|
else
|
data/lib/rbbt/workflow/step.rb
CHANGED
@@ -303,7 +303,8 @@ class Step
|
|
303
303
|
|
304
304
|
if result_description
|
305
305
|
entity_info = info.dup
|
306
|
-
entity_info
|
306
|
+
# Also load entity_info in rbbt6
|
307
|
+
entity_info.merge! info[:inputs] if Hash === info[:inputs]
|
307
308
|
res = prepare_result res, result_description, entity_info
|
308
309
|
end
|
309
310
|
|
data/python/rbbt/__init__.py
CHANGED
@@ -67,10 +67,10 @@ def tsv_preamble(line, comment_char="#"):
|
|
67
67
|
return header
|
68
68
|
|
69
69
|
|
70
|
-
def tsv_header(filename, sep="\t", comment_char="#"):
|
70
|
+
def tsv_header(filename, sep="\t", comment_char="#", encoding='utf8'):
|
71
71
|
import re
|
72
72
|
|
73
|
-
f = open(filename)
|
73
|
+
f = open(filename, encoding=encoding)
|
74
74
|
line = f.readline().strip()
|
75
75
|
|
76
76
|
if (not line.startswith(comment_char)):
|
@@ -134,7 +134,12 @@ TmpFile.with_file do |app_dir|
|
|
134
134
|
options.each do |k,v| fixed_options[k.to_sym] = v end
|
135
135
|
options = fixed_options
|
136
136
|
|
137
|
-
|
137
|
+
begin
|
138
|
+
Rack::Server.start(options)
|
139
|
+
rescue LoadError
|
140
|
+
require 'rackup'
|
141
|
+
Rackup::Server.start(options)
|
142
|
+
end
|
138
143
|
end
|
139
144
|
end
|
140
145
|
end
|
@@ -484,7 +484,7 @@ begin
|
|
484
484
|
|
485
485
|
if server = options.delete(:relay)
|
486
486
|
require 'rbbt/workflow/remote_workflow'
|
487
|
-
relay_dependencies = options.delete(:relay_dependencies).split(",")
|
487
|
+
relay_dependencies = options.delete(:relay_dependencies).split(",") if options[:relay_dependencies]
|
488
488
|
produce_dependencies_for_relay = options.delete(:produce_dependencies_for_relay)
|
489
489
|
|
490
490
|
jobs_to_relay = relay_dependencies ? match_dependencies(relay_dependencies, job.rec_dependencies) : [job]
|