rbbt-util 5.44.1 → 6.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/rbbt +67 -90
- data/etc/app.d/base.rb +2 -2
- data/etc/app.d/semaphores.rb +3 -3
- data/lib/rbbt/annotations/annotated_array.rb +207 -207
- data/lib/rbbt/annotations/refactor.rb +27 -0
- data/lib/rbbt/annotations/util.rb +282 -282
- data/lib/rbbt/annotations.rb +343 -320
- data/lib/rbbt/association/database.rb +200 -225
- data/lib/rbbt/association/index.rb +294 -291
- data/lib/rbbt/association/item.rb +227 -227
- data/lib/rbbt/association/open.rb +35 -34
- data/lib/rbbt/association/util.rb +0 -169
- data/lib/rbbt/association.rb +2 -4
- data/lib/rbbt/entity/identifiers.rb +119 -118
- data/lib/rbbt/entity/refactor.rb +12 -0
- data/lib/rbbt/entity.rb +319 -315
- data/lib/rbbt/hpc/batch.rb +72 -53
- data/lib/rbbt/hpc/lsf.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
- data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
- data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
- data/lib/rbbt/hpc/orchestrate.rb +19 -13
- data/lib/rbbt/hpc/slurm.rb +18 -18
- data/lib/rbbt/knowledge_base/entity.rb +13 -5
- data/lib/rbbt/knowledge_base/query.rb +2 -2
- data/lib/rbbt/knowledge_base/registry.rb +32 -31
- data/lib/rbbt/knowledge_base/traverse.rb +1 -1
- data/lib/rbbt/knowledge_base.rb +1 -1
- data/lib/rbbt/monitor.rb +36 -25
- data/lib/rbbt/persist/refactor.rb +166 -0
- data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
- data/lib/rbbt/persist/tsv.rb +187 -185
- data/lib/rbbt/persist.rb +556 -551
- data/lib/rbbt/refactor.rb +20 -0
- data/lib/rbbt/resource/path/refactor.rb +178 -0
- data/lib/rbbt/resource/path.rb +317 -497
- data/lib/rbbt/resource/util.rb +0 -48
- data/lib/rbbt/resource.rb +3 -390
- data/lib/rbbt/tsv/accessor.rb +2 -838
- data/lib/rbbt/tsv/attach.rb +303 -299
- data/lib/rbbt/tsv/change_id.rb +244 -245
- data/lib/rbbt/tsv/csv.rb +87 -85
- data/lib/rbbt/tsv/dumper.rb +2 -100
- data/lib/rbbt/tsv/excel.rb +26 -24
- data/lib/rbbt/tsv/field_index.rb +4 -1
- data/lib/rbbt/tsv/filter.rb +3 -2
- data/lib/rbbt/tsv/index.rb +2 -284
- data/lib/rbbt/tsv/manipulate.rb +750 -747
- data/lib/rbbt/tsv/marshal.rb +3 -3
- data/lib/rbbt/tsv/matrix.rb +2 -2
- data/lib/rbbt/tsv/parallel/through.rb +2 -1
- data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
- data/lib/rbbt/tsv/parser.rb +678 -678
- data/lib/rbbt/tsv/refactor.rb +195 -0
- data/lib/rbbt/tsv/stream.rb +253 -251
- data/lib/rbbt/tsv/util.rb +420 -420
- data/lib/rbbt/tsv.rb +210 -208
- data/lib/rbbt/util/R/eval.rb +4 -4
- data/lib/rbbt/util/R/plot.rb +62 -166
- data/lib/rbbt/util/R.rb +21 -18
- data/lib/rbbt/util/cmd.rb +2 -318
- data/lib/rbbt/util/color.rb +269 -269
- data/lib/rbbt/util/colorize.rb +89 -89
- data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
- data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
- data/lib/rbbt/util/concurrency/processes.rb +389 -386
- data/lib/rbbt/util/config.rb +169 -167
- data/lib/rbbt/util/iruby.rb +20 -0
- data/lib/rbbt/util/log/progress/report.rb +241 -241
- data/lib/rbbt/util/log/progress/util.rb +99 -99
- data/lib/rbbt/util/log/progress.rb +102 -102
- data/lib/rbbt/util/log/refactor.rb +49 -0
- data/lib/rbbt/util/log.rb +486 -532
- data/lib/rbbt/util/migrate.rb +1 -1
- data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
- data/lib/rbbt/util/misc/development.rb +12 -11
- data/lib/rbbt/util/misc/exceptions.rb +117 -112
- data/lib/rbbt/util/misc/format.rb +2 -230
- data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
- data/lib/rbbt/util/misc/inspect.rb +2 -476
- data/lib/rbbt/util/misc/lock.rb +109 -106
- data/lib/rbbt/util/misc/omics.rb +9 -1
- data/lib/rbbt/util/misc/pipes.rb +765 -793
- data/lib/rbbt/util/misc/refactor.rb +20 -0
- data/lib/rbbt/util/misc/ssw.rb +27 -17
- data/lib/rbbt/util/misc/system.rb +0 -15
- data/lib/rbbt/util/misc.rb +39 -20
- data/lib/rbbt/util/named_array/refactor.rb +4 -0
- data/lib/rbbt/util/named_array.rb +3 -220
- data/lib/rbbt/util/open/refactor.rb +7 -0
- data/lib/rbbt/util/open.rb +3 -857
- data/lib/rbbt/util/procpath.rb +6 -6
- data/lib/rbbt/util/python/paths.rb +27 -0
- data/lib/rbbt/util/python/run.rb +115 -0
- data/lib/rbbt/util/python/script.rb +110 -0
- data/lib/rbbt/util/python/util.rb +3 -3
- data/lib/rbbt/util/python.rb +22 -81
- data/lib/rbbt/util/semaphore.rb +152 -148
- data/lib/rbbt/util/simpleopt.rb +9 -8
- data/lib/rbbt/util/ssh/refactor.rb +19 -0
- data/lib/rbbt/util/ssh.rb +122 -118
- data/lib/rbbt/util/tar.rb +117 -115
- data/lib/rbbt/util/tmpfile.rb +69 -67
- data/lib/rbbt/util/version.rb +2 -0
- data/lib/rbbt/workflow/refactor/entity.rb +11 -0
- data/lib/rbbt/workflow/refactor/export.rb +66 -0
- data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
- data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
- data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
- data/lib/rbbt/workflow/refactor.rb +153 -0
- data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
- data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
- data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
- data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
- data/lib/rbbt/workflow/remote_workflow.rb +6 -1
- data/lib/rbbt/workflow/step/run.rb +766 -766
- data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
- data/lib/rbbt/workflow/step.rb +2 -362
- data/lib/rbbt/workflow/task.rb +118 -118
- data/lib/rbbt/workflow/usage.rb +289 -287
- data/lib/rbbt/workflow/util/archive.rb +6 -5
- data/lib/rbbt/workflow/util/data.rb +1 -1
- data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
- data/lib/rbbt/workflow/util/trace.rb +79 -44
- data/lib/rbbt/workflow.rb +4 -882
- data/lib/rbbt-util.rb +21 -13
- data/lib/rbbt.rb +16 -3
- data/python/rbbt/__init__.py +19 -1
- data/share/Rlib/plot.R +37 -37
- data/share/Rlib/svg.R +22 -5
- data/share/install/software/lib/install_helpers +1 -1
- data/share/rbbt_commands/hpc/list +2 -3
- data/share/rbbt_commands/hpc/orchestrate +4 -4
- data/share/rbbt_commands/hpc/tail +2 -0
- data/share/rbbt_commands/hpc/task +10 -7
- data/share/rbbt_commands/lsf/list +2 -3
- data/share/rbbt_commands/lsf/orchestrate +4 -4
- data/share/rbbt_commands/lsf/tail +2 -0
- data/share/rbbt_commands/lsf/task +10 -7
- data/share/rbbt_commands/migrate +1 -1
- data/share/rbbt_commands/pbs/list +2 -3
- data/share/rbbt_commands/pbs/orchestrate +4 -4
- data/share/rbbt_commands/pbs/tail +2 -0
- data/share/rbbt_commands/pbs/task +10 -7
- data/share/rbbt_commands/resource/produce +8 -1
- data/share/rbbt_commands/slurm/list +2 -3
- data/share/rbbt_commands/slurm/orchestrate +4 -4
- data/share/rbbt_commands/slurm/tail +2 -0
- data/share/rbbt_commands/slurm/task +10 -7
- data/share/rbbt_commands/system/clean +5 -5
- data/share/rbbt_commands/system/status +5 -5
- data/share/rbbt_commands/tsv/get +2 -3
- data/share/rbbt_commands/tsv/info +10 -13
- data/share/rbbt_commands/tsv/keys +18 -14
- data/share/rbbt_commands/tsv/slice +2 -2
- data/share/rbbt_commands/tsv/transpose +6 -2
- data/share/rbbt_commands/workflow/info +20 -24
- data/share/rbbt_commands/workflow/list +1 -1
- data/share/rbbt_commands/workflow/prov +20 -13
- data/share/rbbt_commands/workflow/server +11 -1
- data/share/rbbt_commands/workflow/task +76 -71
- data/share/rbbt_commands/workflow/write_info +26 -9
- data/share/software/opt/ssw/ssw.c +861 -0
- data/share/software/opt/ssw/ssw.h +130 -0
- data/share/workflow_config.ru +3 -3
- metadata +40 -2
@@ -0,0 +1,861 @@
|
|
1
|
+
/* The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2012-1015 Boston College.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
|
20
|
+
BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
|
21
|
+
ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
22
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
23
|
+
SOFTWARE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
/* Contact: Mengyao Zhao <zhangmp@bc.edu> */
|
27
|
+
|
28
|
+
/*
|
29
|
+
* ssw.c
|
30
|
+
*
|
31
|
+
* Created by Mengyao Zhao on 6/22/10.
|
32
|
+
* Copyright 2010 Boston College. All rights reserved.
|
33
|
+
* Version 0.1.4
|
34
|
+
* Last revision by Mengyao Zhao on 12/07/12.
|
35
|
+
*
|
36
|
+
*/
|
37
|
+
|
38
|
+
#include <emmintrin.h>
|
39
|
+
#include <stdint.h>
|
40
|
+
#include <stdlib.h>
|
41
|
+
#include <stdio.h>
|
42
|
+
#include <string.h>
|
43
|
+
#include <math.h>
|
44
|
+
#include "ssw.h"
|
45
|
+
|
46
|
+
#ifdef __GNUC__
|
47
|
+
#define LIKELY(x) __builtin_expect((x),1)
|
48
|
+
#define UNLIKELY(x) __builtin_expect((x),0)
|
49
|
+
#else
|
50
|
+
#define LIKELY(x) (x)
|
51
|
+
#define UNLIKELY(x) (x)
|
52
|
+
#endif
|
53
|
+
|
54
|
+
/* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */
|
55
|
+
#define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; }
|
56
|
+
|
57
|
+
/* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */
|
58
|
+
#define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; }
|
59
|
+
|
60
|
+
/*! @function
|
61
|
+
@abstract Round an integer to the next closest power-2 integer.
|
62
|
+
@param x integer to be rounded (in place)
|
63
|
+
@discussion x will be modified.
|
64
|
+
*/
|
65
|
+
#define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
|
66
|
+
|
67
|
+
typedef struct {
|
68
|
+
uint16_t score;
|
69
|
+
int32_t ref; //0-based position
|
70
|
+
int32_t read; //alignment ending position on read, 0-based
|
71
|
+
} alignment_end;
|
72
|
+
|
73
|
+
typedef struct {
|
74
|
+
uint32_t* seq;
|
75
|
+
int32_t length;
|
76
|
+
} cigar;
|
77
|
+
|
78
|
+
struct _profile{
|
79
|
+
__m128i* profile_byte; // 0: none
|
80
|
+
__m128i* profile_word; // 0: none
|
81
|
+
const int8_t* read;
|
82
|
+
const int8_t* mat;
|
83
|
+
int32_t readLen;
|
84
|
+
int32_t n;
|
85
|
+
uint8_t bias;
|
86
|
+
};
|
87
|
+
|
88
|
+
/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
|
89
|
+
__m128i* qP_byte (const int8_t* read_num,
|
90
|
+
const int8_t* mat,
|
91
|
+
const int32_t readLen,
|
92
|
+
const int32_t n, /* the edge length of the squre matrix mat */
|
93
|
+
uint8_t bias) {
|
94
|
+
|
95
|
+
int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces.
|
96
|
+
Each piece is 8 bit. Split the read into 16 segments.
|
97
|
+
Calculat 16 segments in parallel.
|
98
|
+
*/
|
99
|
+
__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
|
100
|
+
int8_t* t = (int8_t*)vProfile;
|
101
|
+
int32_t nt, i, j, segNum;
|
102
|
+
|
103
|
+
/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
|
104
|
+
for (nt = 0; LIKELY(nt < n); nt ++) {
|
105
|
+
for (i = 0; i < segLen; i ++) {
|
106
|
+
j = i;
|
107
|
+
for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) {
|
108
|
+
*t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias;
|
109
|
+
j += segLen;
|
110
|
+
}
|
111
|
+
}
|
112
|
+
}
|
113
|
+
return vProfile;
|
114
|
+
}
|
115
|
+
|
116
|
+
/* Striped Smith-Waterman
|
117
|
+
Record the highest score of each reference position.
|
118
|
+
Return the alignment score and ending position of the best alignment, 2nd best alignment, etc.
|
119
|
+
Gap begin and gap extension are different.
|
120
|
+
wight_match > 0, all other weights < 0.
|
121
|
+
The returned positions are 0-based.
|
122
|
+
*/
|
123
|
+
alignment_end* sw_sse2_byte (const int8_t* ref,
|
124
|
+
int8_t ref_dir, // 0: forward ref; 1: reverse ref
|
125
|
+
int32_t refLen,
|
126
|
+
int32_t readLen,
|
127
|
+
const uint8_t weight_gapO, /* will be used as - */
|
128
|
+
const uint8_t weight_gapE, /* will be used as - */
|
129
|
+
__m128i* vProfile,
|
130
|
+
uint8_t terminate, /* the best alignment score: used to terminate
|
131
|
+
the matrix calculation when locating the
|
132
|
+
alignment beginning point. If this score
|
133
|
+
is set to 0, it will not be used */
|
134
|
+
uint8_t bias, /* Shift 0 point to a positive value. */
|
135
|
+
int32_t maskLen) {
|
136
|
+
|
137
|
+
#define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
|
138
|
+
(vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
|
139
|
+
(vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
|
140
|
+
(vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
|
141
|
+
(m) = _mm_extract_epi16((vm), 0)
|
142
|
+
|
143
|
+
uint8_t max = 0; /* the max alignment score */
|
144
|
+
int32_t end_read = readLen - 1;
|
145
|
+
int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */
|
146
|
+
int32_t segLen = (readLen + 15) / 16; /* number of segment */
|
147
|
+
|
148
|
+
/* array to record the largest score of each reference position */
|
149
|
+
uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1);
|
150
|
+
|
151
|
+
/* array to record the alignment read ending position of the largest score of each reference position */
|
152
|
+
int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
|
153
|
+
|
154
|
+
/* Define 16 byte 0 vector. */
|
155
|
+
__m128i vZero = _mm_set1_epi32(0);
|
156
|
+
|
157
|
+
__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
|
158
|
+
__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
|
159
|
+
__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
|
160
|
+
__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
|
161
|
+
|
162
|
+
int32_t i, j;
|
163
|
+
/* 16 byte insertion begin vector */
|
164
|
+
__m128i vGapO = _mm_set1_epi8(weight_gapO);
|
165
|
+
|
166
|
+
/* 16 byte insertion extension vector */
|
167
|
+
__m128i vGapE = _mm_set1_epi8(weight_gapE);
|
168
|
+
|
169
|
+
/* 16 byte bias vector */
|
170
|
+
__m128i vBias = _mm_set1_epi8(bias);
|
171
|
+
|
172
|
+
__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
|
173
|
+
__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
|
174
|
+
__m128i vTemp;
|
175
|
+
int32_t edge, begin = 0, end = refLen, step = 1;
|
176
|
+
// int32_t distance = readLen * 2 / 3;
|
177
|
+
// int32_t distance = readLen / 2;
|
178
|
+
// int32_t distance = readLen;
|
179
|
+
|
180
|
+
/* outer loop to process the reference sequence */
|
181
|
+
if (ref_dir == 1) {
|
182
|
+
begin = refLen - 1;
|
183
|
+
end = -1;
|
184
|
+
step = -1;
|
185
|
+
}
|
186
|
+
for (i = begin; LIKELY(i != end); i += step) {
|
187
|
+
int32_t cmp;
|
188
|
+
__m128i e = vZero, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
|
189
|
+
Any errors to vH values will be corrected in the Lazy_F loop.
|
190
|
+
*/
|
191
|
+
// max16(maxColumn[i], vMaxColumn);
|
192
|
+
// fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
|
193
|
+
|
194
|
+
__m128i vH = pvHStore[segLen - 1];
|
195
|
+
vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
|
196
|
+
__m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
|
197
|
+
|
198
|
+
/* Swap the 2 H buffers. */
|
199
|
+
__m128i* pv = pvHLoad;
|
200
|
+
pvHLoad = pvHStore;
|
201
|
+
pvHStore = pv;
|
202
|
+
|
203
|
+
/* inner loop to process the query sequence */
|
204
|
+
for (j = 0; LIKELY(j < segLen); ++j) {
|
205
|
+
vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
|
206
|
+
vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
|
207
|
+
// max16(maxColumn[i], vH);
|
208
|
+
// fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]);
|
209
|
+
// int8_t* t;
|
210
|
+
// int32_t ti;
|
211
|
+
//for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
|
212
|
+
|
213
|
+
/* Get max from vH, vE and vF. */
|
214
|
+
e = _mm_load_si128(pvE + j);
|
215
|
+
vH = _mm_max_epu8(vH, e);
|
216
|
+
vH = _mm_max_epu8(vH, vF);
|
217
|
+
vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
|
218
|
+
|
219
|
+
// max16(maxColumn[i], vMaxColumn);
|
220
|
+
// fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
|
221
|
+
// for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
|
222
|
+
|
223
|
+
/* Save vH values. */
|
224
|
+
_mm_store_si128(pvHStore + j, vH);
|
225
|
+
|
226
|
+
/* Update vE value. */
|
227
|
+
vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
|
228
|
+
e = _mm_subs_epu8(e, vGapE);
|
229
|
+
e = _mm_max_epu8(e, vH);
|
230
|
+
_mm_store_si128(pvE + j, e);
|
231
|
+
|
232
|
+
/* Update vF value. */
|
233
|
+
vF = _mm_subs_epu8(vF, vGapE);
|
234
|
+
vF = _mm_max_epu8(vF, vH);
|
235
|
+
|
236
|
+
/* Load the next vH. */
|
237
|
+
vH = _mm_load_si128(pvHLoad + j);
|
238
|
+
}
|
239
|
+
|
240
|
+
/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
|
241
|
+
/* reset pointers to the start of the saved data */
|
242
|
+
j = 0;
|
243
|
+
vH = _mm_load_si128 (pvHStore + j);
|
244
|
+
|
245
|
+
/* the computed vF value is for the given column. since */
|
246
|
+
/* we are at the end, we need to shift the vF value over */
|
247
|
+
/* to the next column. */
|
248
|
+
vF = _mm_slli_si128 (vF, 1);
|
249
|
+
vTemp = _mm_subs_epu8 (vH, vGapO);
|
250
|
+
vTemp = _mm_subs_epu8 (vF, vTemp);
|
251
|
+
vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
|
252
|
+
cmp = _mm_movemask_epi8 (vTemp);
|
253
|
+
|
254
|
+
while (cmp != 0xffff)
|
255
|
+
{
|
256
|
+
vH = _mm_max_epu8 (vH, vF);
|
257
|
+
vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
|
258
|
+
_mm_store_si128 (pvHStore + j, vH);
|
259
|
+
vF = _mm_subs_epu8 (vF, vGapE);
|
260
|
+
j++;
|
261
|
+
if (j >= segLen)
|
262
|
+
{
|
263
|
+
j = 0;
|
264
|
+
vF = _mm_slli_si128 (vF, 1);
|
265
|
+
}
|
266
|
+
vH = _mm_load_si128 (pvHStore + j);
|
267
|
+
|
268
|
+
vTemp = _mm_subs_epu8 (vH, vGapO);
|
269
|
+
vTemp = _mm_subs_epu8 (vF, vTemp);
|
270
|
+
vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
|
271
|
+
cmp = _mm_movemask_epi8 (vTemp);
|
272
|
+
}
|
273
|
+
|
274
|
+
vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
|
275
|
+
vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
|
276
|
+
cmp = _mm_movemask_epi8(vTemp);
|
277
|
+
if (cmp != 0xffff) {
|
278
|
+
uint8_t temp;
|
279
|
+
vMaxMark = vMaxScore;
|
280
|
+
max16(temp, vMaxScore);
|
281
|
+
vMaxScore = vMaxMark;
|
282
|
+
|
283
|
+
if (LIKELY(temp > max)) {
|
284
|
+
max = temp;
|
285
|
+
if (max + bias >= 255) break; //overflow
|
286
|
+
end_ref = i;
|
287
|
+
|
288
|
+
/* Store the column with the highest alignment score in order to trace the alignment ending position on read. */
|
289
|
+
for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
|
290
|
+
}
|
291
|
+
}
|
292
|
+
|
293
|
+
/* Record the max score of current column. */
|
294
|
+
max16(maxColumn[i], vMaxColumn);
|
295
|
+
// fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
|
296
|
+
if (maxColumn[i] == terminate) break;
|
297
|
+
}
|
298
|
+
|
299
|
+
/* Trace the alignment ending position on read. */
|
300
|
+
uint8_t *t = (uint8_t*)pvHmax;
|
301
|
+
int32_t column_len = segLen * 16;
|
302
|
+
for (i = 0; LIKELY(i < column_len); ++i, ++t) {
|
303
|
+
int32_t temp;
|
304
|
+
if (*t == max) {
|
305
|
+
temp = i / 16 + i % 16 * segLen;
|
306
|
+
if (temp < end_read) end_read = temp;
|
307
|
+
}
|
308
|
+
}
|
309
|
+
|
310
|
+
free(pvHmax);
|
311
|
+
free(pvE);
|
312
|
+
free(pvHLoad);
|
313
|
+
free(pvHStore);
|
314
|
+
|
315
|
+
/* Find the most possible 2nd best alignment. */
|
316
|
+
alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
|
317
|
+
bests[0].score = max + bias >= 255 ? 255 : max;
|
318
|
+
bests[0].ref = end_ref;
|
319
|
+
bests[0].read = end_read;
|
320
|
+
|
321
|
+
bests[1].score = 0;
|
322
|
+
bests[1].ref = 0;
|
323
|
+
bests[1].read = 0;
|
324
|
+
|
325
|
+
edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
|
326
|
+
for (i = 0; i < edge; i ++) {
|
327
|
+
// fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
|
328
|
+
if (maxColumn[i] > bests[1].score) {
|
329
|
+
bests[1].score = maxColumn[i];
|
330
|
+
bests[1].ref = i;
|
331
|
+
}
|
332
|
+
}
|
333
|
+
edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
|
334
|
+
for (i = edge + 1; i < refLen; i ++) {
|
335
|
+
// fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]);
|
336
|
+
if (maxColumn[i] > bests[1].score) {
|
337
|
+
bests[1].score = maxColumn[i];
|
338
|
+
bests[1].ref = i;
|
339
|
+
}
|
340
|
+
}
|
341
|
+
|
342
|
+
free(maxColumn);
|
343
|
+
free(end_read_column);
|
344
|
+
return bests;
|
345
|
+
}
|
346
|
+
|
347
|
+
__m128i* qP_word (const int8_t* read_num,
|
348
|
+
const int8_t* mat,
|
349
|
+
const int32_t readLen,
|
350
|
+
const int32_t n) {
|
351
|
+
|
352
|
+
int32_t segLen = (readLen + 7) / 8;
|
353
|
+
__m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
|
354
|
+
int16_t* t = (int16_t*)vProfile;
|
355
|
+
int32_t nt, i, j;
|
356
|
+
int32_t segNum;
|
357
|
+
|
358
|
+
/* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
|
359
|
+
for (nt = 0; LIKELY(nt < n); nt ++) {
|
360
|
+
for (i = 0; i < segLen; i ++) {
|
361
|
+
j = i;
|
362
|
+
for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) {
|
363
|
+
*t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]];
|
364
|
+
j += segLen;
|
365
|
+
}
|
366
|
+
}
|
367
|
+
}
|
368
|
+
return vProfile;
|
369
|
+
}
|
370
|
+
|
371
|
+
alignment_end* sw_sse2_word (const int8_t* ref,
|
372
|
+
int8_t ref_dir, // 0: forward ref; 1: reverse ref
|
373
|
+
int32_t refLen,
|
374
|
+
int32_t readLen,
|
375
|
+
const uint8_t weight_gapO, /* will be used as - */
|
376
|
+
const uint8_t weight_gapE, /* will be used as - */
|
377
|
+
__m128i* vProfile,
|
378
|
+
uint16_t terminate,
|
379
|
+
int32_t maskLen) {
|
380
|
+
|
381
|
+
#define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
|
382
|
+
(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
|
383
|
+
(vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
|
384
|
+
(m) = _mm_extract_epi16((vm), 0)
|
385
|
+
|
386
|
+
uint16_t max = 0; /* the max alignment score */
|
387
|
+
int32_t end_read = readLen - 1;
|
388
|
+
int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */
|
389
|
+
int32_t segLen = (readLen + 7) / 8; /* number of segment */
|
390
|
+
|
391
|
+
/* array to record the largest score of each reference position */
|
392
|
+
uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2);
|
393
|
+
|
394
|
+
/* array to record the alignment read ending position of the largest score of each reference position */
|
395
|
+
int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
|
396
|
+
|
397
|
+
/* Define 16 byte 0 vector. */
|
398
|
+
__m128i vZero = _mm_set1_epi32(0);
|
399
|
+
|
400
|
+
__m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
|
401
|
+
__m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
|
402
|
+
__m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
|
403
|
+
__m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
|
404
|
+
|
405
|
+
int32_t i, j, k;
|
406
|
+
/* 16 byte insertion begin vector */
|
407
|
+
__m128i vGapO = _mm_set1_epi16(weight_gapO);
|
408
|
+
|
409
|
+
/* 16 byte insertion extension vector */
|
410
|
+
__m128i vGapE = _mm_set1_epi16(weight_gapE);
|
411
|
+
|
412
|
+
/* 16 byte bias vector */
|
413
|
+
__m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
|
414
|
+
__m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
|
415
|
+
__m128i vTemp;
|
416
|
+
int32_t edge, begin = 0, end = refLen, step = 1;
|
417
|
+
|
418
|
+
/* outer loop to process the reference sequence */
|
419
|
+
if (ref_dir == 1) {
|
420
|
+
begin = refLen - 1;
|
421
|
+
end = -1;
|
422
|
+
step = -1;
|
423
|
+
}
|
424
|
+
for (i = begin; LIKELY(i != end); i += step) {
|
425
|
+
int32_t cmp;
|
426
|
+
__m128i e = vZero, vF = vZero; /* Initialize F value to 0.
|
427
|
+
Any errors to vH values will be corrected in the Lazy_F loop.
|
428
|
+
*/
|
429
|
+
__m128i vH = pvHStore[segLen - 1];
|
430
|
+
vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
|
431
|
+
|
432
|
+
/* Swap the 2 H buffers. */
|
433
|
+
__m128i* pv = pvHLoad;
|
434
|
+
|
435
|
+
__m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
|
436
|
+
|
437
|
+
__m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
|
438
|
+
pvHLoad = pvHStore;
|
439
|
+
pvHStore = pv;
|
440
|
+
|
441
|
+
/* inner loop to process the query sequence */
|
442
|
+
for (j = 0; LIKELY(j < segLen); j ++) {
|
443
|
+
vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
|
444
|
+
|
445
|
+
/* Get max from vH, vE and vF. */
|
446
|
+
e = _mm_load_si128(pvE + j);
|
447
|
+
vH = _mm_max_epi16(vH, e);
|
448
|
+
vH = _mm_max_epi16(vH, vF);
|
449
|
+
vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
|
450
|
+
|
451
|
+
/* Save vH values. */
|
452
|
+
_mm_store_si128(pvHStore + j, vH);
|
453
|
+
|
454
|
+
/* Update vE value. */
|
455
|
+
vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
|
456
|
+
e = _mm_subs_epu16(e, vGapE);
|
457
|
+
e = _mm_max_epi16(e, vH);
|
458
|
+
_mm_store_si128(pvE + j, e);
|
459
|
+
|
460
|
+
/* Update vF value. */
|
461
|
+
vF = _mm_subs_epu16(vF, vGapE);
|
462
|
+
vF = _mm_max_epi16(vF, vH);
|
463
|
+
|
464
|
+
/* Load the next vH. */
|
465
|
+
vH = _mm_load_si128(pvHLoad + j);
|
466
|
+
}
|
467
|
+
|
468
|
+
/* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
|
469
|
+
for (k = 0; LIKELY(k < 8); ++k) {
|
470
|
+
vF = _mm_slli_si128 (vF, 2);
|
471
|
+
for (j = 0; LIKELY(j < segLen); ++j) {
|
472
|
+
vH = _mm_load_si128(pvHStore + j);
|
473
|
+
vH = _mm_max_epi16(vH, vF);
|
474
|
+
_mm_store_si128(pvHStore + j, vH);
|
475
|
+
vH = _mm_subs_epu16(vH, vGapO);
|
476
|
+
vF = _mm_subs_epu16(vF, vGapE);
|
477
|
+
if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
|
478
|
+
}
|
479
|
+
}
|
480
|
+
|
481
|
+
end:
|
482
|
+
vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
|
483
|
+
vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
|
484
|
+
cmp = _mm_movemask_epi8(vTemp);
|
485
|
+
if (cmp != 0xffff) {
|
486
|
+
uint16_t temp;
|
487
|
+
vMaxMark = vMaxScore;
|
488
|
+
max8(temp, vMaxScore);
|
489
|
+
vMaxScore = vMaxMark;
|
490
|
+
|
491
|
+
if (LIKELY(temp > max)) {
|
492
|
+
max = temp;
|
493
|
+
end_ref = i;
|
494
|
+
for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
|
495
|
+
}
|
496
|
+
}
|
497
|
+
|
498
|
+
/* Record the max score of current column. */
|
499
|
+
max8(maxColumn[i], vMaxColumn);
|
500
|
+
if (maxColumn[i] == terminate) break;
|
501
|
+
}
|
502
|
+
|
503
|
+
/* Trace the alignment ending position on read. */
|
504
|
+
uint16_t *t = (uint16_t*)pvHmax;
|
505
|
+
int32_t column_len = segLen * 8;
|
506
|
+
for (i = 0; LIKELY(i < column_len); ++i, ++t) {
|
507
|
+
int32_t temp;
|
508
|
+
if (*t == max) {
|
509
|
+
temp = i / 8 + i % 8 * segLen;
|
510
|
+
if (temp < end_read) end_read = temp;
|
511
|
+
}
|
512
|
+
}
|
513
|
+
|
514
|
+
free(pvHmax);
|
515
|
+
free(pvE);
|
516
|
+
free(pvHLoad);
|
517
|
+
free(pvHStore);
|
518
|
+
|
519
|
+
/* Find the most possible 2nd best alignment. */
|
520
|
+
alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
|
521
|
+
bests[0].score = max;
|
522
|
+
bests[0].ref = end_ref;
|
523
|
+
bests[0].read = end_read;
|
524
|
+
|
525
|
+
bests[1].score = 0;
|
526
|
+
bests[1].ref = 0;
|
527
|
+
bests[1].read = 0;
|
528
|
+
|
529
|
+
edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
|
530
|
+
for (i = 0; i < edge; i ++) {
|
531
|
+
if (maxColumn[i] > bests[1].score) {
|
532
|
+
bests[1].score = maxColumn[i];
|
533
|
+
bests[1].ref = i;
|
534
|
+
}
|
535
|
+
}
|
536
|
+
edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
|
537
|
+
for (i = edge; i < refLen; i ++) {
|
538
|
+
if (maxColumn[i] > bests[1].score) {
|
539
|
+
bests[1].score = maxColumn[i];
|
540
|
+
bests[1].ref = i;
|
541
|
+
}
|
542
|
+
}
|
543
|
+
|
544
|
+
free(maxColumn);
|
545
|
+
free(end_read_column);
|
546
|
+
return bests;
|
547
|
+
}
|
548
|
+
|
549
|
+
cigar* banded_sw (const int8_t* ref,
|
550
|
+
const int8_t* read,
|
551
|
+
int32_t refLen,
|
552
|
+
int32_t readLen,
|
553
|
+
int32_t score,
|
554
|
+
const uint32_t weight_gapO, /* will be used as - */
|
555
|
+
const uint32_t weight_gapE, /* will be used as - */
|
556
|
+
int32_t band_width,
|
557
|
+
const int8_t* mat, /* pointer to the weight matrix */
|
558
|
+
int32_t n) {
|
559
|
+
|
560
|
+
uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
|
561
|
+
int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, s2 = 1024, l, max = 0;
|
562
|
+
int32_t width, width_d, *h_b, *e_b, *h_c;
|
563
|
+
int8_t *direction, *direction_line;
|
564
|
+
cigar* result = (cigar*)malloc(sizeof(cigar));
|
565
|
+
h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
|
566
|
+
e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
|
567
|
+
h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
|
568
|
+
direction = (int8_t*)malloc(s2 * sizeof(int8_t));
|
569
|
+
|
570
|
+
do {
|
571
|
+
width = band_width * 2 + 3, width_d = band_width * 2 + 1;
|
572
|
+
while (width >= s1) {
|
573
|
+
++s1;
|
574
|
+
kroundup32(s1);
|
575
|
+
h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
|
576
|
+
e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
|
577
|
+
h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
|
578
|
+
}
|
579
|
+
while (width_d * readLen * 3 >= s2) {
|
580
|
+
++s2;
|
581
|
+
kroundup32(s2);
|
582
|
+
if (s2 < 0) {
|
583
|
+
fprintf(stderr, "Alignment score and position are not consensus.\n");
|
584
|
+
exit(1);
|
585
|
+
}
|
586
|
+
direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
|
587
|
+
}
|
588
|
+
direction_line = direction;
|
589
|
+
for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
|
590
|
+
for (i = 0; LIKELY(i < readLen); i ++) {
|
591
|
+
int32_t beg = 0, end = refLen - 1, u = 0, edge;
|
592
|
+
j = i - band_width; beg = beg > j ? beg : j; // band start
|
593
|
+
j = i + band_width; end = end < j ? end : j; // band end
|
594
|
+
edge = end + 1 < width - 1 ? end + 1 : width - 1;
|
595
|
+
f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
|
596
|
+
direction_line = direction + width_d * i * 3;
|
597
|
+
|
598
|
+
for (j = beg; LIKELY(j <= end); j ++) {
|
599
|
+
int32_t b, e1, f1, d, de, df, dh;
|
600
|
+
set_u(u, band_width, i, j); set_u(e, band_width, i - 1, j);
|
601
|
+
set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
|
602
|
+
set_d(de, band_width, i, j, 0);
|
603
|
+
set_d(df, band_width, i, j, 1);
|
604
|
+
set_d(dh, band_width, i, j, 2);
|
605
|
+
|
606
|
+
temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
|
607
|
+
temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
|
608
|
+
e_b[u] = temp1 > temp2 ? temp1 : temp2;
|
609
|
+
direction_line[de] = temp1 > temp2 ? 3 : 2;
|
610
|
+
|
611
|
+
temp1 = h_c[b] - weight_gapO;
|
612
|
+
temp2 = f - weight_gapE;
|
613
|
+
f = temp1 > temp2 ? temp1 : temp2;
|
614
|
+
direction_line[df] = temp1 > temp2 ? 5 : 4;
|
615
|
+
|
616
|
+
e1 = e_b[u] > 0 ? e_b[u] : 0;
|
617
|
+
f1 = f > 0 ? f : 0;
|
618
|
+
temp1 = e1 > f1 ? e1 : f1;
|
619
|
+
temp2 = h_b[d] + mat[ref[j] * n + read[i]];
|
620
|
+
h_c[u] = temp1 > temp2 ? temp1 : temp2;
|
621
|
+
|
622
|
+
if (h_c[u] > max) max = h_c[u];
|
623
|
+
|
624
|
+
if (temp1 <= temp2) direction_line[dh] = 1;
|
625
|
+
else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
|
626
|
+
}
|
627
|
+
for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
|
628
|
+
}
|
629
|
+
band_width *= 2;
|
630
|
+
} while (LIKELY(max < score));
|
631
|
+
band_width /= 2;
|
632
|
+
|
633
|
+
// trace back
|
634
|
+
i = readLen - 1;
|
635
|
+
j = refLen - 1;
|
636
|
+
e = 0; // Count the number of M, D or I.
|
637
|
+
l = 0; // record length of current cigar
|
638
|
+
f = max = 0; // M
|
639
|
+
temp2 = 2; // h
|
640
|
+
while (LIKELY(i > 0)) {
|
641
|
+
set_d(temp1, band_width, i, j, temp2);
|
642
|
+
switch (direction_line[temp1]) {
|
643
|
+
case 1:
|
644
|
+
--i;
|
645
|
+
--j;
|
646
|
+
temp2 = 2;
|
647
|
+
direction_line -= width_d * 3;
|
648
|
+
f = 0; // M
|
649
|
+
break;
|
650
|
+
case 2:
|
651
|
+
--i;
|
652
|
+
temp2 = 0; // e
|
653
|
+
direction_line -= width_d * 3;
|
654
|
+
f = 1; // I
|
655
|
+
break;
|
656
|
+
case 3:
|
657
|
+
--i;
|
658
|
+
temp2 = 2;
|
659
|
+
direction_line -= width_d * 3;
|
660
|
+
f = 1; // I
|
661
|
+
break;
|
662
|
+
case 4:
|
663
|
+
--j;
|
664
|
+
temp2 = 1;
|
665
|
+
f = 2; // D
|
666
|
+
break;
|
667
|
+
case 5:
|
668
|
+
--j;
|
669
|
+
temp2 = 2;
|
670
|
+
f = 2; // D
|
671
|
+
break;
|
672
|
+
default:
|
673
|
+
fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
|
674
|
+
return 0;
|
675
|
+
}
|
676
|
+
if (f == max) ++e;
|
677
|
+
else {
|
678
|
+
++l;
|
679
|
+
while (l >= s) {
|
680
|
+
++s;
|
681
|
+
kroundup32(s);
|
682
|
+
c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
|
683
|
+
}
|
684
|
+
c[l - 1] = e<<4|max;
|
685
|
+
max = f;
|
686
|
+
e = 1;
|
687
|
+
}
|
688
|
+
}
|
689
|
+
if (f == 0) {
|
690
|
+
++l;
|
691
|
+
while (l >= s) {
|
692
|
+
++s;
|
693
|
+
kroundup32(s);
|
694
|
+
c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
|
695
|
+
}
|
696
|
+
c[l - 1] = (e+1)<<4;
|
697
|
+
}else {
|
698
|
+
l += 2;
|
699
|
+
while (l >= s) {
|
700
|
+
++s;
|
701
|
+
kroundup32(s);
|
702
|
+
c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
|
703
|
+
}
|
704
|
+
c[l - 2] = e<<4|f;
|
705
|
+
c[l - 1] = 16; // 1M
|
706
|
+
}
|
707
|
+
|
708
|
+
// reverse cigar
|
709
|
+
c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
|
710
|
+
s = 0;
|
711
|
+
e = l - 1;
|
712
|
+
while (LIKELY(s <= e)) {
|
713
|
+
c1[s] = c[e];
|
714
|
+
c1[e] = c[s];
|
715
|
+
++ s;
|
716
|
+
-- e;
|
717
|
+
}
|
718
|
+
result->seq = c1;
|
719
|
+
result->length = l;
|
720
|
+
|
721
|
+
free(direction);
|
722
|
+
free(h_c);
|
723
|
+
free(e_b);
|
724
|
+
free(h_b);
|
725
|
+
free(c);
|
726
|
+
return result;
|
727
|
+
}
|
728
|
+
|
729
|
+
int8_t* seq_reverse(const int8_t* seq, int32_t end) /* end is 0-based alignment ending position */
|
730
|
+
{
|
731
|
+
int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t));
|
732
|
+
int32_t start = 0;
|
733
|
+
while (LIKELY(start <= end)) {
|
734
|
+
reverse[start] = seq[end];
|
735
|
+
reverse[end] = seq[start];
|
736
|
+
++ start;
|
737
|
+
-- end;
|
738
|
+
}
|
739
|
+
return reverse;
|
740
|
+
}
|
741
|
+
|
742
|
+
s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) {
|
743
|
+
s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile));
|
744
|
+
p->profile_byte = 0;
|
745
|
+
p->profile_word = 0;
|
746
|
+
p->bias = 0;
|
747
|
+
|
748
|
+
if (score_size == 0 || score_size == 2) {
|
749
|
+
/* Find the bias to use in the substitution matrix */
|
750
|
+
int32_t bias = 0, i;
|
751
|
+
for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i];
|
752
|
+
bias = abs(bias);
|
753
|
+
|
754
|
+
p->bias = bias;
|
755
|
+
p->profile_byte = qP_byte (read, mat, readLen, n, bias);
|
756
|
+
}
|
757
|
+
if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n);
|
758
|
+
p->read = read;
|
759
|
+
p->mat = mat;
|
760
|
+
p->readLen = readLen;
|
761
|
+
p->n = n;
|
762
|
+
return p;
|
763
|
+
}
|
764
|
+
|
765
|
+
void init_destroy (s_profile* p) {
|
766
|
+
free(p->profile_byte);
|
767
|
+
free(p->profile_word);
|
768
|
+
free(p);
|
769
|
+
}
|
770
|
+
|
771
|
+
s_align* ssw_align (const s_profile* prof,
|
772
|
+
const int8_t* ref,
|
773
|
+
int32_t refLen,
|
774
|
+
const uint8_t weight_gapO,
|
775
|
+
const uint8_t weight_gapE,
|
776
|
+
const uint8_t flag, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
|
777
|
+
const uint16_t filters,
|
778
|
+
const int32_t filterd,
|
779
|
+
const int32_t maskLen) {
|
780
|
+
|
781
|
+
alignment_end* bests = 0, *bests_reverse = 0;
|
782
|
+
__m128i* vP = 0;
|
783
|
+
int32_t word = 0, band_width = 0, readLen = prof->readLen;
|
784
|
+
int8_t* read_reverse = 0;
|
785
|
+
cigar* path;
|
786
|
+
s_align* r = (s_align*)calloc(1, sizeof(s_align));
|
787
|
+
r->ref_begin1 = -1;
|
788
|
+
r->read_begin1 = -1;
|
789
|
+
r->cigar = 0;
|
790
|
+
r->cigarLen = 0;
|
791
|
+
if (maskLen < 15) {
|
792
|
+
//fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
|
793
|
+
}
|
794
|
+
|
795
|
+
// Find the alignment scores and ending positions
|
796
|
+
if (prof->profile_byte) {
|
797
|
+
bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
|
798
|
+
if (prof->profile_word && bests[0].score == 255) {
|
799
|
+
free(bests);
|
800
|
+
bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
|
801
|
+
word = 1;
|
802
|
+
} else if (bests[0].score == 255) {
|
803
|
+
fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
|
804
|
+
return 0;
|
805
|
+
}
|
806
|
+
}else if (prof->profile_word) {
|
807
|
+
bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
|
808
|
+
word = 1;
|
809
|
+
}else {
|
810
|
+
fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
|
811
|
+
return 0;
|
812
|
+
}
|
813
|
+
r->score1 = bests[0].score;
|
814
|
+
r->ref_end1 = bests[0].ref;
|
815
|
+
r->read_end1 = bests[0].read;
|
816
|
+
if (maskLen >= 15) {
|
817
|
+
r->score2 = bests[1].score;
|
818
|
+
r->ref_end2 = bests[1].ref;
|
819
|
+
} else {
|
820
|
+
r->score2 = 0;
|
821
|
+
r->ref_end2 = -1;
|
822
|
+
}
|
823
|
+
free(bests);
|
824
|
+
if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;
|
825
|
+
|
826
|
+
// Find the beginning position of the best alignment.
|
827
|
+
read_reverse = seq_reverse(prof->read, r->read_end1);
|
828
|
+
if (word == 0) {
|
829
|
+
vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
|
830
|
+
bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
|
831
|
+
} else {
|
832
|
+
vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
|
833
|
+
bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
|
834
|
+
}
|
835
|
+
free(vP);
|
836
|
+
free(read_reverse);
|
837
|
+
r->ref_begin1 = bests_reverse[0].ref;
|
838
|
+
r->read_begin1 = r->read_end1 - bests_reverse[0].read;
|
839
|
+
free(bests_reverse);
|
840
|
+
if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;
|
841
|
+
|
842
|
+
// Generate cigar.
|
843
|
+
refLen = r->ref_end1 - r->ref_begin1 + 1;
|
844
|
+
readLen = r->read_end1 - r->read_begin1 + 1;
|
845
|
+
band_width = abs(refLen - readLen) + 1;
|
846
|
+
path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
|
847
|
+
if (path == 0) r = 0;
|
848
|
+
else {
|
849
|
+
r->cigar = path->seq;
|
850
|
+
r->cigarLen = path->length;
|
851
|
+
free(path);
|
852
|
+
}
|
853
|
+
|
854
|
+
end:
|
855
|
+
return r;
|
856
|
+
}
|
857
|
+
|
858
|
+
void align_destroy (s_align* a) {
|
859
|
+
free(a->cigar);
|
860
|
+
free(a);
|
861
|
+
}
|