rbbt-util 5.44.1 → 6.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/bin/rbbt +67 -90
  3. data/etc/app.d/base.rb +2 -2
  4. data/etc/app.d/semaphores.rb +3 -3
  5. data/lib/rbbt/annotations/annotated_array.rb +207 -207
  6. data/lib/rbbt/annotations/refactor.rb +27 -0
  7. data/lib/rbbt/annotations/util.rb +282 -282
  8. data/lib/rbbt/annotations.rb +343 -320
  9. data/lib/rbbt/association/database.rb +200 -225
  10. data/lib/rbbt/association/index.rb +294 -291
  11. data/lib/rbbt/association/item.rb +227 -227
  12. data/lib/rbbt/association/open.rb +35 -34
  13. data/lib/rbbt/association/util.rb +0 -169
  14. data/lib/rbbt/association.rb +2 -4
  15. data/lib/rbbt/entity/identifiers.rb +119 -118
  16. data/lib/rbbt/entity/refactor.rb +12 -0
  17. data/lib/rbbt/entity.rb +319 -315
  18. data/lib/rbbt/hpc/batch.rb +72 -53
  19. data/lib/rbbt/hpc/lsf.rb +2 -2
  20. data/lib/rbbt/hpc/orchestrate/batches.rb +2 -2
  21. data/lib/rbbt/hpc/orchestrate/chains.rb +25 -5
  22. data/lib/rbbt/hpc/orchestrate/rules.rb +2 -2
  23. data/lib/rbbt/hpc/orchestrate.rb +19 -13
  24. data/lib/rbbt/hpc/slurm.rb +18 -18
  25. data/lib/rbbt/knowledge_base/entity.rb +13 -5
  26. data/lib/rbbt/knowledge_base/query.rb +2 -2
  27. data/lib/rbbt/knowledge_base/registry.rb +32 -31
  28. data/lib/rbbt/knowledge_base/traverse.rb +1 -1
  29. data/lib/rbbt/knowledge_base.rb +1 -1
  30. data/lib/rbbt/monitor.rb +36 -25
  31. data/lib/rbbt/persist/refactor.rb +166 -0
  32. data/lib/rbbt/persist/tsv/tokyocabinet.rb +105 -105
  33. data/lib/rbbt/persist/tsv.rb +187 -185
  34. data/lib/rbbt/persist.rb +556 -551
  35. data/lib/rbbt/refactor.rb +20 -0
  36. data/lib/rbbt/resource/path/refactor.rb +178 -0
  37. data/lib/rbbt/resource/path.rb +317 -497
  38. data/lib/rbbt/resource/util.rb +0 -48
  39. data/lib/rbbt/resource.rb +3 -390
  40. data/lib/rbbt/tsv/accessor.rb +2 -838
  41. data/lib/rbbt/tsv/attach.rb +303 -299
  42. data/lib/rbbt/tsv/change_id.rb +244 -245
  43. data/lib/rbbt/tsv/csv.rb +87 -85
  44. data/lib/rbbt/tsv/dumper.rb +2 -100
  45. data/lib/rbbt/tsv/excel.rb +26 -24
  46. data/lib/rbbt/tsv/field_index.rb +4 -1
  47. data/lib/rbbt/tsv/filter.rb +3 -2
  48. data/lib/rbbt/tsv/index.rb +2 -284
  49. data/lib/rbbt/tsv/manipulate.rb +750 -747
  50. data/lib/rbbt/tsv/marshal.rb +3 -3
  51. data/lib/rbbt/tsv/matrix.rb +2 -2
  52. data/lib/rbbt/tsv/parallel/through.rb +2 -1
  53. data/lib/rbbt/tsv/parallel/traverse.rb +783 -781
  54. data/lib/rbbt/tsv/parser.rb +678 -678
  55. data/lib/rbbt/tsv/refactor.rb +195 -0
  56. data/lib/rbbt/tsv/stream.rb +253 -251
  57. data/lib/rbbt/tsv/util.rb +420 -420
  58. data/lib/rbbt/tsv.rb +210 -208
  59. data/lib/rbbt/util/R/eval.rb +4 -4
  60. data/lib/rbbt/util/R/plot.rb +62 -166
  61. data/lib/rbbt/util/R.rb +21 -18
  62. data/lib/rbbt/util/cmd.rb +2 -318
  63. data/lib/rbbt/util/color.rb +269 -269
  64. data/lib/rbbt/util/colorize.rb +89 -89
  65. data/lib/rbbt/util/concurrency/processes/refactor.rb +22 -0
  66. data/lib/rbbt/util/concurrency/processes/worker.rb +2 -2
  67. data/lib/rbbt/util/concurrency/processes.rb +389 -386
  68. data/lib/rbbt/util/config.rb +169 -167
  69. data/lib/rbbt/util/iruby.rb +20 -0
  70. data/lib/rbbt/util/log/progress/report.rb +241 -241
  71. data/lib/rbbt/util/log/progress/util.rb +99 -99
  72. data/lib/rbbt/util/log/progress.rb +102 -102
  73. data/lib/rbbt/util/log/refactor.rb +49 -0
  74. data/lib/rbbt/util/log.rb +486 -532
  75. data/lib/rbbt/util/migrate.rb +1 -1
  76. data/lib/rbbt/util/misc/concurrent_stream.rb +248 -246
  77. data/lib/rbbt/util/misc/development.rb +12 -11
  78. data/lib/rbbt/util/misc/exceptions.rb +117 -112
  79. data/lib/rbbt/util/misc/format.rb +2 -230
  80. data/lib/rbbt/util/misc/indiferent_hash.rb +2 -107
  81. data/lib/rbbt/util/misc/inspect.rb +2 -476
  82. data/lib/rbbt/util/misc/lock.rb +109 -106
  83. data/lib/rbbt/util/misc/omics.rb +9 -1
  84. data/lib/rbbt/util/misc/pipes.rb +765 -793
  85. data/lib/rbbt/util/misc/refactor.rb +20 -0
  86. data/lib/rbbt/util/misc/ssw.rb +27 -17
  87. data/lib/rbbt/util/misc/system.rb +0 -15
  88. data/lib/rbbt/util/misc.rb +39 -20
  89. data/lib/rbbt/util/named_array/refactor.rb +4 -0
  90. data/lib/rbbt/util/named_array.rb +3 -220
  91. data/lib/rbbt/util/open/refactor.rb +7 -0
  92. data/lib/rbbt/util/open.rb +3 -857
  93. data/lib/rbbt/util/procpath.rb +6 -6
  94. data/lib/rbbt/util/python/paths.rb +27 -0
  95. data/lib/rbbt/util/python/run.rb +115 -0
  96. data/lib/rbbt/util/python/script.rb +110 -0
  97. data/lib/rbbt/util/python/util.rb +3 -3
  98. data/lib/rbbt/util/python.rb +22 -81
  99. data/lib/rbbt/util/semaphore.rb +152 -148
  100. data/lib/rbbt/util/simpleopt.rb +9 -8
  101. data/lib/rbbt/util/ssh/refactor.rb +19 -0
  102. data/lib/rbbt/util/ssh.rb +122 -118
  103. data/lib/rbbt/util/tar.rb +117 -115
  104. data/lib/rbbt/util/tmpfile.rb +69 -67
  105. data/lib/rbbt/util/version.rb +2 -0
  106. data/lib/rbbt/workflow/refactor/entity.rb +11 -0
  107. data/lib/rbbt/workflow/refactor/export.rb +66 -0
  108. data/lib/rbbt/workflow/refactor/inputs.rb +24 -0
  109. data/lib/rbbt/workflow/refactor/recursive.rb +64 -0
  110. data/lib/rbbt/workflow/refactor/task_info.rb +65 -0
  111. data/lib/rbbt/workflow/refactor.rb +153 -0
  112. data/lib/rbbt/workflow/remote_workflow/driver/ssh.rb +55 -32
  113. data/lib/rbbt/workflow/remote_workflow/remote_step/rest.rb +3 -1
  114. data/lib/rbbt/workflow/remote_workflow/remote_step/ssh.rb +14 -5
  115. data/lib/rbbt/workflow/remote_workflow/remote_step.rb +19 -7
  116. data/lib/rbbt/workflow/remote_workflow.rb +6 -1
  117. data/lib/rbbt/workflow/step/run.rb +766 -766
  118. data/lib/rbbt/workflow/step/save_load_inputs.rb +254 -254
  119. data/lib/rbbt/workflow/step.rb +2 -362
  120. data/lib/rbbt/workflow/task.rb +118 -118
  121. data/lib/rbbt/workflow/usage.rb +289 -287
  122. data/lib/rbbt/workflow/util/archive.rb +6 -5
  123. data/lib/rbbt/workflow/util/data.rb +1 -1
  124. data/lib/rbbt/workflow/util/orchestrator.rb +249 -246
  125. data/lib/rbbt/workflow/util/trace.rb +79 -44
  126. data/lib/rbbt/workflow.rb +4 -882
  127. data/lib/rbbt-util.rb +21 -13
  128. data/lib/rbbt.rb +16 -3
  129. data/python/rbbt/__init__.py +19 -1
  130. data/share/Rlib/plot.R +37 -37
  131. data/share/Rlib/svg.R +22 -5
  132. data/share/install/software/lib/install_helpers +1 -1
  133. data/share/rbbt_commands/hpc/list +2 -3
  134. data/share/rbbt_commands/hpc/orchestrate +4 -4
  135. data/share/rbbt_commands/hpc/tail +2 -0
  136. data/share/rbbt_commands/hpc/task +10 -7
  137. data/share/rbbt_commands/lsf/list +2 -3
  138. data/share/rbbt_commands/lsf/orchestrate +4 -4
  139. data/share/rbbt_commands/lsf/tail +2 -0
  140. data/share/rbbt_commands/lsf/task +10 -7
  141. data/share/rbbt_commands/migrate +1 -1
  142. data/share/rbbt_commands/pbs/list +2 -3
  143. data/share/rbbt_commands/pbs/orchestrate +4 -4
  144. data/share/rbbt_commands/pbs/tail +2 -0
  145. data/share/rbbt_commands/pbs/task +10 -7
  146. data/share/rbbt_commands/resource/produce +8 -1
  147. data/share/rbbt_commands/slurm/list +2 -3
  148. data/share/rbbt_commands/slurm/orchestrate +4 -4
  149. data/share/rbbt_commands/slurm/tail +2 -0
  150. data/share/rbbt_commands/slurm/task +10 -7
  151. data/share/rbbt_commands/system/clean +5 -5
  152. data/share/rbbt_commands/system/status +5 -5
  153. data/share/rbbt_commands/tsv/get +2 -3
  154. data/share/rbbt_commands/tsv/info +10 -13
  155. data/share/rbbt_commands/tsv/keys +18 -14
  156. data/share/rbbt_commands/tsv/slice +2 -2
  157. data/share/rbbt_commands/tsv/transpose +6 -2
  158. data/share/rbbt_commands/workflow/info +20 -24
  159. data/share/rbbt_commands/workflow/list +1 -1
  160. data/share/rbbt_commands/workflow/prov +20 -13
  161. data/share/rbbt_commands/workflow/server +11 -1
  162. data/share/rbbt_commands/workflow/task +76 -71
  163. data/share/rbbt_commands/workflow/write_info +26 -9
  164. data/share/software/opt/ssw/ssw.c +861 -0
  165. data/share/software/opt/ssw/ssw.h +130 -0
  166. data/share/workflow_config.ru +3 -3
  167. metadata +40 -2
@@ -0,0 +1,861 @@
1
+ /* The MIT License
2
+
3
+ Copyright (c) 2012-1015 Boston College.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20
+ BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
+ SOFTWARE.
24
+ */
25
+
26
+ /* Contact: Mengyao Zhao <zhangmp@bc.edu> */
27
+
28
+ /*
29
+ * ssw.c
30
+ *
31
+ * Created by Mengyao Zhao on 6/22/10.
32
+ * Copyright 2010 Boston College. All rights reserved.
33
+ * Version 0.1.4
34
+ * Last revision by Mengyao Zhao on 12/07/12.
35
+ *
36
+ */
37
+
38
+ #include <emmintrin.h>
39
+ #include <stdint.h>
40
+ #include <stdlib.h>
41
+ #include <stdio.h>
42
+ #include <string.h>
43
+ #include <math.h>
44
+ #include "ssw.h"
45
+
46
+ #ifdef __GNUC__
47
+ #define LIKELY(x) __builtin_expect((x),1)
48
+ #define UNLIKELY(x) __builtin_expect((x),0)
49
+ #else
50
+ #define LIKELY(x) (x)
51
+ #define UNLIKELY(x) (x)
52
+ #endif
53
+
54
+ /* Convert the coordinate in the scoring matrix into the coordinate in one line of the band. */
55
+ #define set_u(u, w, i, j) { int x=(i)-(w); x=x>0?x:0; (u)=(j)-x+1; }
56
+
57
+ /* Convert the coordinate in the direction matrix into the coordinate in one line of the band. */
58
+ #define set_d(u, w, i, j, p) { int x=(i)-(w); x=x>0?x:0; x=(j)-x; (u)=x*3+p; }
59
+
60
+ /*! @function
61
+ @abstract Round an integer to the next closest power-2 integer.
62
+ @param x integer to be rounded (in place)
63
+ @discussion x will be modified.
64
+ */
65
+ #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
66
+
67
+ typedef struct {
68
+ uint16_t score;
69
+ int32_t ref; //0-based position
70
+ int32_t read; //alignment ending position on read, 0-based
71
+ } alignment_end;
72
+
73
+ typedef struct {
74
+ uint32_t* seq;
75
+ int32_t length;
76
+ } cigar;
77
+
78
+ struct _profile{
79
+ __m128i* profile_byte; // 0: none
80
+ __m128i* profile_word; // 0: none
81
+ const int8_t* read;
82
+ const int8_t* mat;
83
+ int32_t readLen;
84
+ int32_t n;
85
+ uint8_t bias;
86
+ };
87
+
88
+ /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch. */
89
+ __m128i* qP_byte (const int8_t* read_num,
90
+ const int8_t* mat,
91
+ const int32_t readLen,
92
+ const int32_t n, /* the edge length of the squre matrix mat */
93
+ uint8_t bias) {
94
+
95
+ int32_t segLen = (readLen + 15) / 16; /* Split the 128 bit register into 16 pieces.
96
+ Each piece is 8 bit. Split the read into 16 segments.
97
+ Calculat 16 segments in parallel.
98
+ */
99
+ __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
100
+ int8_t* t = (int8_t*)vProfile;
101
+ int32_t nt, i, j, segNum;
102
+
103
+ /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
104
+ for (nt = 0; LIKELY(nt < n); nt ++) {
105
+ for (i = 0; i < segLen; i ++) {
106
+ j = i;
107
+ for (segNum = 0; LIKELY(segNum < 16) ; segNum ++) {
108
+ *t++ = j>= readLen ? bias : mat[nt * n + read_num[j]] + bias;
109
+ j += segLen;
110
+ }
111
+ }
112
+ }
113
+ return vProfile;
114
+ }
115
+
116
+ /* Striped Smith-Waterman
117
+ Record the highest score of each reference position.
118
+ Return the alignment score and ending position of the best alignment, 2nd best alignment, etc.
119
+ Gap begin and gap extension are different.
120
+ wight_match > 0, all other weights < 0.
121
+ The returned positions are 0-based.
122
+ */
123
+ alignment_end* sw_sse2_byte (const int8_t* ref,
124
+ int8_t ref_dir, // 0: forward ref; 1: reverse ref
125
+ int32_t refLen,
126
+ int32_t readLen,
127
+ const uint8_t weight_gapO, /* will be used as - */
128
+ const uint8_t weight_gapE, /* will be used as - */
129
+ __m128i* vProfile,
130
+ uint8_t terminate, /* the best alignment score: used to terminate
131
+ the matrix calculation when locating the
132
+ alignment beginning point. If this score
133
+ is set to 0, it will not be used */
134
+ uint8_t bias, /* Shift 0 point to a positive value. */
135
+ int32_t maskLen) {
136
+
137
+ #define max16(m, vm) (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 8)); \
138
+ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 4)); \
139
+ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 2)); \
140
+ (vm) = _mm_max_epu8((vm), _mm_srli_si128((vm), 1)); \
141
+ (m) = _mm_extract_epi16((vm), 0)
142
+
143
+ uint8_t max = 0; /* the max alignment score */
144
+ int32_t end_read = readLen - 1;
145
+ int32_t end_ref = -1; /* 0_based best alignment ending point; Initialized as isn't aligned -1. */
146
+ int32_t segLen = (readLen + 15) / 16; /* number of segment */
147
+
148
+ /* array to record the largest score of each reference position */
149
+ uint8_t* maxColumn = (uint8_t*) calloc(refLen, 1);
150
+
151
+ /* array to record the alignment read ending position of the largest score of each reference position */
152
+ int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
153
+
154
+ /* Define 16 byte 0 vector. */
155
+ __m128i vZero = _mm_set1_epi32(0);
156
+
157
+ __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
158
+ __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
159
+ __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
160
+ __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
161
+
162
+ int32_t i, j;
163
+ /* 16 byte insertion begin vector */
164
+ __m128i vGapO = _mm_set1_epi8(weight_gapO);
165
+
166
+ /* 16 byte insertion extension vector */
167
+ __m128i vGapE = _mm_set1_epi8(weight_gapE);
168
+
169
+ /* 16 byte bias vector */
170
+ __m128i vBias = _mm_set1_epi8(bias);
171
+
172
+ __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
173
+ __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
174
+ __m128i vTemp;
175
+ int32_t edge, begin = 0, end = refLen, step = 1;
176
+ // int32_t distance = readLen * 2 / 3;
177
+ // int32_t distance = readLen / 2;
178
+ // int32_t distance = readLen;
179
+
180
+ /* outer loop to process the reference sequence */
181
+ if (ref_dir == 1) {
182
+ begin = refLen - 1;
183
+ end = -1;
184
+ step = -1;
185
+ }
186
+ for (i = begin; LIKELY(i != end); i += step) {
187
+ int32_t cmp;
188
+ __m128i e = vZero, vF = vZero, vMaxColumn = vZero; /* Initialize F value to 0.
189
+ Any errors to vH values will be corrected in the Lazy_F loop.
190
+ */
191
+ // max16(maxColumn[i], vMaxColumn);
192
+ // fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
193
+
194
+ __m128i vH = pvHStore[segLen - 1];
195
+ vH = _mm_slli_si128 (vH, 1); /* Shift the 128-bit value in vH left by 1 byte. */
196
+ __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
197
+
198
+ /* Swap the 2 H buffers. */
199
+ __m128i* pv = pvHLoad;
200
+ pvHLoad = pvHStore;
201
+ pvHStore = pv;
202
+
203
+ /* inner loop to process the query sequence */
204
+ for (j = 0; LIKELY(j < segLen); ++j) {
205
+ vH = _mm_adds_epu8(vH, _mm_load_si128(vP + j));
206
+ vH = _mm_subs_epu8(vH, vBias); /* vH will be always > 0 */
207
+ // max16(maxColumn[i], vH);
208
+ // fprintf(stderr, "H[%d]: %d\n", i, maxColumn[i]);
209
+ // int8_t* t;
210
+ // int32_t ti;
211
+ //for (t = (int8_t*)&vH, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
212
+
213
+ /* Get max from vH, vE and vF. */
214
+ e = _mm_load_si128(pvE + j);
215
+ vH = _mm_max_epu8(vH, e);
216
+ vH = _mm_max_epu8(vH, vF);
217
+ vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
218
+
219
+ // max16(maxColumn[i], vMaxColumn);
220
+ // fprintf(stderr, "middle[%d]: %d\n", i, maxColumn[i]);
221
+ // for (t = (int8_t*)&vMaxColumn, ti = 0; ti < 16; ++ti) fprintf(stderr, "%d\t", *t++);
222
+
223
+ /* Save vH values. */
224
+ _mm_store_si128(pvHStore + j, vH);
225
+
226
+ /* Update vE value. */
227
+ vH = _mm_subs_epu8(vH, vGapO); /* saturation arithmetic, result >= 0 */
228
+ e = _mm_subs_epu8(e, vGapE);
229
+ e = _mm_max_epu8(e, vH);
230
+ _mm_store_si128(pvE + j, e);
231
+
232
+ /* Update vF value. */
233
+ vF = _mm_subs_epu8(vF, vGapE);
234
+ vF = _mm_max_epu8(vF, vH);
235
+
236
+ /* Load the next vH. */
237
+ vH = _mm_load_si128(pvHLoad + j);
238
+ }
239
+
240
+ /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
241
+ /* reset pointers to the start of the saved data */
242
+ j = 0;
243
+ vH = _mm_load_si128 (pvHStore + j);
244
+
245
+ /* the computed vF value is for the given column. since */
246
+ /* we are at the end, we need to shift the vF value over */
247
+ /* to the next column. */
248
+ vF = _mm_slli_si128 (vF, 1);
249
+ vTemp = _mm_subs_epu8 (vH, vGapO);
250
+ vTemp = _mm_subs_epu8 (vF, vTemp);
251
+ vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
252
+ cmp = _mm_movemask_epi8 (vTemp);
253
+
254
+ while (cmp != 0xffff)
255
+ {
256
+ vH = _mm_max_epu8 (vH, vF);
257
+ vMaxColumn = _mm_max_epu8(vMaxColumn, vH);
258
+ _mm_store_si128 (pvHStore + j, vH);
259
+ vF = _mm_subs_epu8 (vF, vGapE);
260
+ j++;
261
+ if (j >= segLen)
262
+ {
263
+ j = 0;
264
+ vF = _mm_slli_si128 (vF, 1);
265
+ }
266
+ vH = _mm_load_si128 (pvHStore + j);
267
+
268
+ vTemp = _mm_subs_epu8 (vH, vGapO);
269
+ vTemp = _mm_subs_epu8 (vF, vTemp);
270
+ vTemp = _mm_cmpeq_epi8 (vTemp, vZero);
271
+ cmp = _mm_movemask_epi8 (vTemp);
272
+ }
273
+
274
+ vMaxScore = _mm_max_epu8(vMaxScore, vMaxColumn);
275
+ vTemp = _mm_cmpeq_epi8(vMaxMark, vMaxScore);
276
+ cmp = _mm_movemask_epi8(vTemp);
277
+ if (cmp != 0xffff) {
278
+ uint8_t temp;
279
+ vMaxMark = vMaxScore;
280
+ max16(temp, vMaxScore);
281
+ vMaxScore = vMaxMark;
282
+
283
+ if (LIKELY(temp > max)) {
284
+ max = temp;
285
+ if (max + bias >= 255) break; //overflow
286
+ end_ref = i;
287
+
288
+ /* Store the column with the highest alignment score in order to trace the alignment ending position on read. */
289
+ for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
290
+ }
291
+ }
292
+
293
+ /* Record the max score of current column. */
294
+ max16(maxColumn[i], vMaxColumn);
295
+ // fprintf(stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
296
+ if (maxColumn[i] == terminate) break;
297
+ }
298
+
299
+ /* Trace the alignment ending position on read. */
300
+ uint8_t *t = (uint8_t*)pvHmax;
301
+ int32_t column_len = segLen * 16;
302
+ for (i = 0; LIKELY(i < column_len); ++i, ++t) {
303
+ int32_t temp;
304
+ if (*t == max) {
305
+ temp = i / 16 + i % 16 * segLen;
306
+ if (temp < end_read) end_read = temp;
307
+ }
308
+ }
309
+
310
+ free(pvHmax);
311
+ free(pvE);
312
+ free(pvHLoad);
313
+ free(pvHStore);
314
+
315
+ /* Find the most possible 2nd best alignment. */
316
+ alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
317
+ bests[0].score = max + bias >= 255 ? 255 : max;
318
+ bests[0].ref = end_ref;
319
+ bests[0].read = end_read;
320
+
321
+ bests[1].score = 0;
322
+ bests[1].ref = 0;
323
+ bests[1].read = 0;
324
+
325
+ edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
326
+ for (i = 0; i < edge; i ++) {
327
+ // fprintf (stderr, "maxColumn[%d]: %d\n", i, maxColumn[i]);
328
+ if (maxColumn[i] > bests[1].score) {
329
+ bests[1].score = maxColumn[i];
330
+ bests[1].ref = i;
331
+ }
332
+ }
333
+ edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
334
+ for (i = edge + 1; i < refLen; i ++) {
335
+ // fprintf (stderr, "refLen: %d\tmaxColumn[%d]: %d\n", refLen, i, maxColumn[i]);
336
+ if (maxColumn[i] > bests[1].score) {
337
+ bests[1].score = maxColumn[i];
338
+ bests[1].ref = i;
339
+ }
340
+ }
341
+
342
+ free(maxColumn);
343
+ free(end_read_column);
344
+ return bests;
345
+ }
346
+
347
+ __m128i* qP_word (const int8_t* read_num,
348
+ const int8_t* mat,
349
+ const int32_t readLen,
350
+ const int32_t n) {
351
+
352
+ int32_t segLen = (readLen + 7) / 8;
353
+ __m128i* vProfile = (__m128i*)malloc(n * segLen * sizeof(__m128i));
354
+ int16_t* t = (int16_t*)vProfile;
355
+ int32_t nt, i, j;
356
+ int32_t segNum;
357
+
358
+ /* Generate query profile rearrange query sequence & calculate the weight of match/mismatch */
359
+ for (nt = 0; LIKELY(nt < n); nt ++) {
360
+ for (i = 0; i < segLen; i ++) {
361
+ j = i;
362
+ for (segNum = 0; LIKELY(segNum < 8) ; segNum ++) {
363
+ *t++ = j>= readLen ? 0 : mat[nt * n + read_num[j]];
364
+ j += segLen;
365
+ }
366
+ }
367
+ }
368
+ return vProfile;
369
+ }
370
+
371
+ alignment_end* sw_sse2_word (const int8_t* ref,
372
+ int8_t ref_dir, // 0: forward ref; 1: reverse ref
373
+ int32_t refLen,
374
+ int32_t readLen,
375
+ const uint8_t weight_gapO, /* will be used as - */
376
+ const uint8_t weight_gapE, /* will be used as - */
377
+ __m128i* vProfile,
378
+ uint16_t terminate,
379
+ int32_t maskLen) {
380
+
381
+ #define max8(m, vm) (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 8)); \
382
+ (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 4)); \
383
+ (vm) = _mm_max_epi16((vm), _mm_srli_si128((vm), 2)); \
384
+ (m) = _mm_extract_epi16((vm), 0)
385
+
386
+ uint16_t max = 0; /* the max alignment score */
387
+ int32_t end_read = readLen - 1;
388
+ int32_t end_ref = 0; /* 1_based best alignment ending point; Initialized as isn't aligned - 0. */
389
+ int32_t segLen = (readLen + 7) / 8; /* number of segment */
390
+
391
+ /* array to record the largest score of each reference position */
392
+ uint16_t* maxColumn = (uint16_t*) calloc(refLen, 2);
393
+
394
+ /* array to record the alignment read ending position of the largest score of each reference position */
395
+ int32_t* end_read_column = (int32_t*) calloc(refLen, sizeof(int32_t));
396
+
397
+ /* Define 16 byte 0 vector. */
398
+ __m128i vZero = _mm_set1_epi32(0);
399
+
400
+ __m128i* pvHStore = (__m128i*) calloc(segLen, sizeof(__m128i));
401
+ __m128i* pvHLoad = (__m128i*) calloc(segLen, sizeof(__m128i));
402
+ __m128i* pvE = (__m128i*) calloc(segLen, sizeof(__m128i));
403
+ __m128i* pvHmax = (__m128i*) calloc(segLen, sizeof(__m128i));
404
+
405
+ int32_t i, j, k;
406
+ /* 16 byte insertion begin vector */
407
+ __m128i vGapO = _mm_set1_epi16(weight_gapO);
408
+
409
+ /* 16 byte insertion extension vector */
410
+ __m128i vGapE = _mm_set1_epi16(weight_gapE);
411
+
412
+ /* 16 byte bias vector */
413
+ __m128i vMaxScore = vZero; /* Trace the highest score of the whole SW matrix. */
414
+ __m128i vMaxMark = vZero; /* Trace the highest score till the previous column. */
415
+ __m128i vTemp;
416
+ int32_t edge, begin = 0, end = refLen, step = 1;
417
+
418
+ /* outer loop to process the reference sequence */
419
+ if (ref_dir == 1) {
420
+ begin = refLen - 1;
421
+ end = -1;
422
+ step = -1;
423
+ }
424
+ for (i = begin; LIKELY(i != end); i += step) {
425
+ int32_t cmp;
426
+ __m128i e = vZero, vF = vZero; /* Initialize F value to 0.
427
+ Any errors to vH values will be corrected in the Lazy_F loop.
428
+ */
429
+ __m128i vH = pvHStore[segLen - 1];
430
+ vH = _mm_slli_si128 (vH, 2); /* Shift the 128-bit value in vH left by 2 byte. */
431
+
432
+ /* Swap the 2 H buffers. */
433
+ __m128i* pv = pvHLoad;
434
+
435
+ __m128i vMaxColumn = vZero; /* vMaxColumn is used to record the max values of column i. */
436
+
437
+ __m128i* vP = vProfile + ref[i] * segLen; /* Right part of the vProfile */
438
+ pvHLoad = pvHStore;
439
+ pvHStore = pv;
440
+
441
+ /* inner loop to process the query sequence */
442
+ for (j = 0; LIKELY(j < segLen); j ++) {
443
+ vH = _mm_adds_epi16(vH, _mm_load_si128(vP + j));
444
+
445
+ /* Get max from vH, vE and vF. */
446
+ e = _mm_load_si128(pvE + j);
447
+ vH = _mm_max_epi16(vH, e);
448
+ vH = _mm_max_epi16(vH, vF);
449
+ vMaxColumn = _mm_max_epi16(vMaxColumn, vH);
450
+
451
+ /* Save vH values. */
452
+ _mm_store_si128(pvHStore + j, vH);
453
+
454
+ /* Update vE value. */
455
+ vH = _mm_subs_epu16(vH, vGapO); /* saturation arithmetic, result >= 0 */
456
+ e = _mm_subs_epu16(e, vGapE);
457
+ e = _mm_max_epi16(e, vH);
458
+ _mm_store_si128(pvE + j, e);
459
+
460
+ /* Update vF value. */
461
+ vF = _mm_subs_epu16(vF, vGapE);
462
+ vF = _mm_max_epi16(vF, vH);
463
+
464
+ /* Load the next vH. */
465
+ vH = _mm_load_si128(pvHLoad + j);
466
+ }
467
+
468
+ /* Lazy_F loop: has been revised to disallow adjecent insertion and then deletion, so don't update E(i, j), learn from SWPS3 */
469
+ for (k = 0; LIKELY(k < 8); ++k) {
470
+ vF = _mm_slli_si128 (vF, 2);
471
+ for (j = 0; LIKELY(j < segLen); ++j) {
472
+ vH = _mm_load_si128(pvHStore + j);
473
+ vH = _mm_max_epi16(vH, vF);
474
+ _mm_store_si128(pvHStore + j, vH);
475
+ vH = _mm_subs_epu16(vH, vGapO);
476
+ vF = _mm_subs_epu16(vF, vGapE);
477
+ if (UNLIKELY(! _mm_movemask_epi8(_mm_cmpgt_epi16(vF, vH)))) goto end;
478
+ }
479
+ }
480
+
481
+ end:
482
+ vMaxScore = _mm_max_epi16(vMaxScore, vMaxColumn);
483
+ vTemp = _mm_cmpeq_epi16(vMaxMark, vMaxScore);
484
+ cmp = _mm_movemask_epi8(vTemp);
485
+ if (cmp != 0xffff) {
486
+ uint16_t temp;
487
+ vMaxMark = vMaxScore;
488
+ max8(temp, vMaxScore);
489
+ vMaxScore = vMaxMark;
490
+
491
+ if (LIKELY(temp > max)) {
492
+ max = temp;
493
+ end_ref = i;
494
+ for (j = 0; LIKELY(j < segLen); ++j) pvHmax[j] = pvHStore[j];
495
+ }
496
+ }
497
+
498
+ /* Record the max score of current column. */
499
+ max8(maxColumn[i], vMaxColumn);
500
+ if (maxColumn[i] == terminate) break;
501
+ }
502
+
503
+ /* Trace the alignment ending position on read. */
504
+ uint16_t *t = (uint16_t*)pvHmax;
505
+ int32_t column_len = segLen * 8;
506
+ for (i = 0; LIKELY(i < column_len); ++i, ++t) {
507
+ int32_t temp;
508
+ if (*t == max) {
509
+ temp = i / 8 + i % 8 * segLen;
510
+ if (temp < end_read) end_read = temp;
511
+ }
512
+ }
513
+
514
+ free(pvHmax);
515
+ free(pvE);
516
+ free(pvHLoad);
517
+ free(pvHStore);
518
+
519
+ /* Find the most possible 2nd best alignment. */
520
+ alignment_end* bests = (alignment_end*) calloc(2, sizeof(alignment_end));
521
+ bests[0].score = max;
522
+ bests[0].ref = end_ref;
523
+ bests[0].read = end_read;
524
+
525
+ bests[1].score = 0;
526
+ bests[1].ref = 0;
527
+ bests[1].read = 0;
528
+
529
+ edge = (end_ref - maskLen) > 0 ? (end_ref - maskLen) : 0;
530
+ for (i = 0; i < edge; i ++) {
531
+ if (maxColumn[i] > bests[1].score) {
532
+ bests[1].score = maxColumn[i];
533
+ bests[1].ref = i;
534
+ }
535
+ }
536
+ edge = (end_ref + maskLen) > refLen ? refLen : (end_ref + maskLen);
537
+ for (i = edge; i < refLen; i ++) {
538
+ if (maxColumn[i] > bests[1].score) {
539
+ bests[1].score = maxColumn[i];
540
+ bests[1].ref = i;
541
+ }
542
+ }
543
+
544
+ free(maxColumn);
545
+ free(end_read_column);
546
+ return bests;
547
+ }
548
+
549
+ cigar* banded_sw (const int8_t* ref,
550
+ const int8_t* read,
551
+ int32_t refLen,
552
+ int32_t readLen,
553
+ int32_t score,
554
+ const uint32_t weight_gapO, /* will be used as - */
555
+ const uint32_t weight_gapE, /* will be used as - */
556
+ int32_t band_width,
557
+ const int8_t* mat, /* pointer to the weight matrix */
558
+ int32_t n) {
559
+
560
+ uint32_t *c = (uint32_t*)malloc(16 * sizeof(uint32_t)), *c1;
561
+ int32_t i, j, e, f, temp1, temp2, s = 16, s1 = 8, s2 = 1024, l, max = 0;
562
+ int32_t width, width_d, *h_b, *e_b, *h_c;
563
+ int8_t *direction, *direction_line;
564
+ cigar* result = (cigar*)malloc(sizeof(cigar));
565
+ h_b = (int32_t*)malloc(s1 * sizeof(int32_t));
566
+ e_b = (int32_t*)malloc(s1 * sizeof(int32_t));
567
+ h_c = (int32_t*)malloc(s1 * sizeof(int32_t));
568
+ direction = (int8_t*)malloc(s2 * sizeof(int8_t));
569
+
570
+ do {
571
+ width = band_width * 2 + 3, width_d = band_width * 2 + 1;
572
+ while (width >= s1) {
573
+ ++s1;
574
+ kroundup32(s1);
575
+ h_b = (int32_t*)realloc(h_b, s1 * sizeof(int32_t));
576
+ e_b = (int32_t*)realloc(e_b, s1 * sizeof(int32_t));
577
+ h_c = (int32_t*)realloc(h_c, s1 * sizeof(int32_t));
578
+ }
579
+ while (width_d * readLen * 3 >= s2) {
580
+ ++s2;
581
+ kroundup32(s2);
582
+ if (s2 < 0) {
583
+ fprintf(stderr, "Alignment score and position are not consensus.\n");
584
+ exit(1);
585
+ }
586
+ direction = (int8_t*)realloc(direction, s2 * sizeof(int8_t));
587
+ }
588
+ direction_line = direction;
589
+ for (j = 1; LIKELY(j < width - 1); j ++) h_b[j] = 0;
590
+ for (i = 0; LIKELY(i < readLen); i ++) {
591
+ int32_t beg = 0, end = refLen - 1, u = 0, edge;
592
+ j = i - band_width; beg = beg > j ? beg : j; // band start
593
+ j = i + band_width; end = end < j ? end : j; // band end
594
+ edge = end + 1 < width - 1 ? end + 1 : width - 1;
595
+ f = h_b[0] = e_b[0] = h_b[edge] = e_b[edge] = h_c[0] = 0;
596
+ direction_line = direction + width_d * i * 3;
597
+
598
+ for (j = beg; LIKELY(j <= end); j ++) {
599
+ int32_t b, e1, f1, d, de, df, dh;
600
+ set_u(u, band_width, i, j); set_u(e, band_width, i - 1, j);
601
+ set_u(b, band_width, i, j - 1); set_u(d, band_width, i - 1, j - 1);
602
+ set_d(de, band_width, i, j, 0);
603
+ set_d(df, band_width, i, j, 1);
604
+ set_d(dh, band_width, i, j, 2);
605
+
606
+ temp1 = i == 0 ? -weight_gapO : h_b[e] - weight_gapO;
607
+ temp2 = i == 0 ? -weight_gapE : e_b[e] - weight_gapE;
608
+ e_b[u] = temp1 > temp2 ? temp1 : temp2;
609
+ direction_line[de] = temp1 > temp2 ? 3 : 2;
610
+
611
+ temp1 = h_c[b] - weight_gapO;
612
+ temp2 = f - weight_gapE;
613
+ f = temp1 > temp2 ? temp1 : temp2;
614
+ direction_line[df] = temp1 > temp2 ? 5 : 4;
615
+
616
+ e1 = e_b[u] > 0 ? e_b[u] : 0;
617
+ f1 = f > 0 ? f : 0;
618
+ temp1 = e1 > f1 ? e1 : f1;
619
+ temp2 = h_b[d] + mat[ref[j] * n + read[i]];
620
+ h_c[u] = temp1 > temp2 ? temp1 : temp2;
621
+
622
+ if (h_c[u] > max) max = h_c[u];
623
+
624
+ if (temp1 <= temp2) direction_line[dh] = 1;
625
+ else direction_line[dh] = e1 > f1 ? direction_line[de] : direction_line[df];
626
+ }
627
+ for (j = 1; j <= u; j ++) h_b[j] = h_c[j];
628
+ }
629
+ band_width *= 2;
630
+ } while (LIKELY(max < score));
631
+ band_width /= 2;
632
+
633
+ // trace back
634
+ i = readLen - 1;
635
+ j = refLen - 1;
636
+ e = 0; // Count the number of M, D or I.
637
+ l = 0; // record length of current cigar
638
+ f = max = 0; // M
639
+ temp2 = 2; // h
640
+ while (LIKELY(i > 0)) {
641
+ set_d(temp1, band_width, i, j, temp2);
642
+ switch (direction_line[temp1]) {
643
+ case 1:
644
+ --i;
645
+ --j;
646
+ temp2 = 2;
647
+ direction_line -= width_d * 3;
648
+ f = 0; // M
649
+ break;
650
+ case 2:
651
+ --i;
652
+ temp2 = 0; // e
653
+ direction_line -= width_d * 3;
654
+ f = 1; // I
655
+ break;
656
+ case 3:
657
+ --i;
658
+ temp2 = 2;
659
+ direction_line -= width_d * 3;
660
+ f = 1; // I
661
+ break;
662
+ case 4:
663
+ --j;
664
+ temp2 = 1;
665
+ f = 2; // D
666
+ break;
667
+ case 5:
668
+ --j;
669
+ temp2 = 2;
670
+ f = 2; // D
671
+ break;
672
+ default:
673
+ fprintf(stderr, "Trace back error: %d.\n", direction_line[temp1 - 1]);
674
+ return 0;
675
+ }
676
+ if (f == max) ++e;
677
+ else {
678
+ ++l;
679
+ while (l >= s) {
680
+ ++s;
681
+ kroundup32(s);
682
+ c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
683
+ }
684
+ c[l - 1] = e<<4|max;
685
+ max = f;
686
+ e = 1;
687
+ }
688
+ }
689
+ if (f == 0) {
690
+ ++l;
691
+ while (l >= s) {
692
+ ++s;
693
+ kroundup32(s);
694
+ c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
695
+ }
696
+ c[l - 1] = (e+1)<<4;
697
+ }else {
698
+ l += 2;
699
+ while (l >= s) {
700
+ ++s;
701
+ kroundup32(s);
702
+ c = (uint32_t*)realloc(c, s * sizeof(uint32_t));
703
+ }
704
+ c[l - 2] = e<<4|f;
705
+ c[l - 1] = 16; // 1M
706
+ }
707
+
708
+ // reverse cigar
709
+ c1 = (uint32_t*)malloc(l * sizeof(uint32_t));
710
+ s = 0;
711
+ e = l - 1;
712
+ while (LIKELY(s <= e)) {
713
+ c1[s] = c[e];
714
+ c1[e] = c[s];
715
+ ++ s;
716
+ -- e;
717
+ }
718
+ result->seq = c1;
719
+ result->length = l;
720
+
721
+ free(direction);
722
+ free(h_c);
723
+ free(e_b);
724
+ free(h_b);
725
+ free(c);
726
+ return result;
727
+ }
728
+
729
+ int8_t* seq_reverse(const int8_t* seq, int32_t end) /* end is 0-based alignment ending position */
730
+ {
731
+ int8_t* reverse = (int8_t*)calloc(end + 1, sizeof(int8_t));
732
+ int32_t start = 0;
733
+ while (LIKELY(start <= end)) {
734
+ reverse[start] = seq[end];
735
+ reverse[end] = seq[start];
736
+ ++ start;
737
+ -- end;
738
+ }
739
+ return reverse;
740
+ }
741
+
742
+ s_profile* ssw_init (const int8_t* read, const int32_t readLen, const int8_t* mat, const int32_t n, const int8_t score_size) {
743
+ s_profile* p = (s_profile*)calloc(1, sizeof(struct _profile));
744
+ p->profile_byte = 0;
745
+ p->profile_word = 0;
746
+ p->bias = 0;
747
+
748
+ if (score_size == 0 || score_size == 2) {
749
+ /* Find the bias to use in the substitution matrix */
750
+ int32_t bias = 0, i;
751
+ for (i = 0; i < n*n; i++) if (mat[i] < bias) bias = mat[i];
752
+ bias = abs(bias);
753
+
754
+ p->bias = bias;
755
+ p->profile_byte = qP_byte (read, mat, readLen, n, bias);
756
+ }
757
+ if (score_size == 1 || score_size == 2) p->profile_word = qP_word (read, mat, readLen, n);
758
+ p->read = read;
759
+ p->mat = mat;
760
+ p->readLen = readLen;
761
+ p->n = n;
762
+ return p;
763
+ }
764
+
765
+ void init_destroy (s_profile* p) {
766
+ free(p->profile_byte);
767
+ free(p->profile_word);
768
+ free(p);
769
+ }
770
+
771
+ s_align* ssw_align (const s_profile* prof,
772
+ const int8_t* ref,
773
+ int32_t refLen,
774
+ const uint8_t weight_gapO,
775
+ const uint8_t weight_gapE,
776
+ const uint8_t flag, // (from high to low) bit 5: return the best alignment beginning position; 6: if (ref_end1 - ref_begin1 <= filterd) && (read_end1 - read_begin1 <= filterd), return cigar; 7: if max score >= filters, return cigar; 8: always return cigar; if 6 & 7 are both setted, only return cigar when both filter fulfilled
777
+ const uint16_t filters,
778
+ const int32_t filterd,
779
+ const int32_t maskLen) {
780
+
781
+ alignment_end* bests = 0, *bests_reverse = 0;
782
+ __m128i* vP = 0;
783
+ int32_t word = 0, band_width = 0, readLen = prof->readLen;
784
+ int8_t* read_reverse = 0;
785
+ cigar* path;
786
+ s_align* r = (s_align*)calloc(1, sizeof(s_align));
787
+ r->ref_begin1 = -1;
788
+ r->read_begin1 = -1;
789
+ r->cigar = 0;
790
+ r->cigarLen = 0;
791
+ if (maskLen < 15) {
792
+ //fprintf(stderr, "When maskLen < 15, the function ssw_align doesn't return 2nd best alignment information.\n");
793
+ }
794
+
795
+ // Find the alignment scores and ending positions
796
+ if (prof->profile_byte) {
797
+ bests = sw_sse2_byte(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_byte, -1, prof->bias, maskLen);
798
+ if (prof->profile_word && bests[0].score == 255) {
799
+ free(bests);
800
+ bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
801
+ word = 1;
802
+ } else if (bests[0].score == 255) {
803
+ fprintf(stderr, "Please set 2 to the score_size parameter of the function ssw_init, otherwise the alignment results will be incorrect.\n");
804
+ return 0;
805
+ }
806
+ }else if (prof->profile_word) {
807
+ bests = sw_sse2_word(ref, 0, refLen, readLen, weight_gapO, weight_gapE, prof->profile_word, -1, maskLen);
808
+ word = 1;
809
+ }else {
810
+ fprintf(stderr, "Please call the function ssw_init before ssw_align.\n");
811
+ return 0;
812
+ }
813
+ r->score1 = bests[0].score;
814
+ r->ref_end1 = bests[0].ref;
815
+ r->read_end1 = bests[0].read;
816
+ if (maskLen >= 15) {
817
+ r->score2 = bests[1].score;
818
+ r->ref_end2 = bests[1].ref;
819
+ } else {
820
+ r->score2 = 0;
821
+ r->ref_end2 = -1;
822
+ }
823
+ free(bests);
824
+ if (flag == 0 || (flag == 2 && r->score1 < filters)) goto end;
825
+
826
+ // Find the beginning position of the best alignment.
827
+ read_reverse = seq_reverse(prof->read, r->read_end1);
828
+ if (word == 0) {
829
+ vP = qP_byte(read_reverse, prof->mat, r->read_end1 + 1, prof->n, prof->bias);
830
+ bests_reverse = sw_sse2_byte(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, prof->bias, maskLen);
831
+ } else {
832
+ vP = qP_word(read_reverse, prof->mat, r->read_end1 + 1, prof->n);
833
+ bests_reverse = sw_sse2_word(ref, 1, r->ref_end1 + 1, r->read_end1 + 1, weight_gapO, weight_gapE, vP, r->score1, maskLen);
834
+ }
835
+ free(vP);
836
+ free(read_reverse);
837
+ r->ref_begin1 = bests_reverse[0].ref;
838
+ r->read_begin1 = r->read_end1 - bests_reverse[0].read;
839
+ free(bests_reverse);
840
+ if ((7&flag) == 0 || ((2&flag) != 0 && r->score1 < filters) || ((4&flag) != 0 && (r->ref_end1 - r->ref_begin1 > filterd || r->read_end1 - r->read_begin1 > filterd))) goto end;
841
+
842
+ // Generate cigar.
843
+ refLen = r->ref_end1 - r->ref_begin1 + 1;
844
+ readLen = r->read_end1 - r->read_begin1 + 1;
845
+ band_width = abs(refLen - readLen) + 1;
846
+ path = banded_sw(ref + r->ref_begin1, prof->read + r->read_begin1, refLen, readLen, r->score1, weight_gapO, weight_gapE, band_width, prof->mat, prof->n);
847
+ if (path == 0) r = 0;
848
+ else {
849
+ r->cigar = path->seq;
850
+ r->cigarLen = path->length;
851
+ free(path);
852
+ }
853
+
854
+ end:
855
+ return r;
856
+ }
857
+
858
+ void align_destroy (s_align* a) {
859
+ free(a->cigar);
860
+ free(a);
861
+ }