minimap2 0.2.25.0 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (123) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/minimap2/Makefile +6 -2
  4. data/ext/minimap2/NEWS.md +38 -0
  5. data/ext/minimap2/README.md +9 -3
  6. data/ext/minimap2/align.c +5 -3
  7. data/ext/minimap2/cookbook.md +2 -2
  8. data/ext/minimap2/format.c +7 -4
  9. data/ext/minimap2/kalloc.c +20 -1
  10. data/ext/minimap2/kalloc.h +13 -2
  11. data/ext/minimap2/ksw2.h +1 -0
  12. data/ext/minimap2/ksw2_extd2_sse.c +1 -1
  13. data/ext/minimap2/ksw2_exts2_sse.c +79 -40
  14. data/ext/minimap2/ksw2_extz2_sse.c +1 -1
  15. data/ext/minimap2/lchain.c +15 -16
  16. data/ext/minimap2/lib/simde/CONTRIBUTING.md +114 -0
  17. data/ext/minimap2/lib/simde/COPYING +20 -0
  18. data/ext/minimap2/lib/simde/README.md +333 -0
  19. data/ext/minimap2/lib/simde/amalgamate.py +58 -0
  20. data/ext/minimap2/lib/simde/meson.build +33 -0
  21. data/ext/minimap2/lib/simde/netlify.toml +20 -0
  22. data/ext/minimap2/lib/simde/simde/arm/neon/float32x2.h +140 -0
  23. data/ext/minimap2/lib/simde/simde/arm/neon/float32x4.h +137 -0
  24. data/ext/minimap2/lib/simde/simde/arm/neon/float64x1.h +142 -0
  25. data/ext/minimap2/lib/simde/simde/arm/neon/float64x2.h +145 -0
  26. data/ext/minimap2/lib/simde/simde/arm/neon/int16x4.h +140 -0
  27. data/ext/minimap2/lib/simde/simde/arm/neon/int16x8.h +145 -0
  28. data/ext/minimap2/lib/simde/simde/arm/neon/int32x2.h +140 -0
  29. data/ext/minimap2/lib/simde/simde/arm/neon/int32x4.h +143 -0
  30. data/ext/minimap2/lib/simde/simde/arm/neon/int64x1.h +137 -0
  31. data/ext/minimap2/lib/simde/simde/arm/neon/int64x2.h +141 -0
  32. data/ext/minimap2/lib/simde/simde/arm/neon/int8x16.h +147 -0
  33. data/ext/minimap2/lib/simde/simde/arm/neon/int8x8.h +141 -0
  34. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x4.h +134 -0
  35. data/ext/minimap2/lib/simde/simde/arm/neon/uint16x8.h +138 -0
  36. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x2.h +134 -0
  37. data/ext/minimap2/lib/simde/simde/arm/neon/uint32x4.h +137 -0
  38. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x1.h +131 -0
  39. data/ext/minimap2/lib/simde/simde/arm/neon/uint64x2.h +135 -0
  40. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x16.h +141 -0
  41. data/ext/minimap2/lib/simde/simde/arm/neon/uint8x8.h +135 -0
  42. data/ext/minimap2/lib/simde/simde/arm/neon.h +97 -0
  43. data/ext/minimap2/lib/simde/simde/check.h +267 -0
  44. data/ext/minimap2/lib/simde/simde/debug-trap.h +83 -0
  45. data/ext/minimap2/lib/simde/simde/hedley.h +1899 -0
  46. data/ext/minimap2/lib/simde/simde/simde-arch.h +445 -0
  47. data/ext/minimap2/lib/simde/simde/simde-common.h +697 -0
  48. data/ext/minimap2/lib/simde/simde/x86/avx.h +5385 -0
  49. data/ext/minimap2/lib/simde/simde/x86/avx2.h +2402 -0
  50. data/ext/minimap2/lib/simde/simde/x86/avx512bw.h +391 -0
  51. data/ext/minimap2/lib/simde/simde/x86/avx512f.h +3389 -0
  52. data/ext/minimap2/lib/simde/simde/x86/avx512vl.h +112 -0
  53. data/ext/minimap2/lib/simde/simde/x86/fma.h +659 -0
  54. data/ext/minimap2/lib/simde/simde/x86/mmx.h +2210 -0
  55. data/ext/minimap2/lib/simde/simde/x86/sse.h +3696 -0
  56. data/ext/minimap2/lib/simde/simde/x86/sse2.h +5991 -0
  57. data/ext/minimap2/lib/simde/simde/x86/sse3.h +343 -0
  58. data/ext/minimap2/lib/simde/simde/x86/sse4.1.h +1783 -0
  59. data/ext/minimap2/lib/simde/simde/x86/sse4.2.h +105 -0
  60. data/ext/minimap2/lib/simde/simde/x86/ssse3.h +1053 -0
  61. data/ext/minimap2/lib/simde/simde/x86/svml.h +543 -0
  62. data/ext/minimap2/lib/simde/test/CMakeLists.txt +166 -0
  63. data/ext/minimap2/lib/simde/test/arm/meson.build +4 -0
  64. data/ext/minimap2/lib/simde/test/arm/neon/meson.build +23 -0
  65. data/ext/minimap2/lib/simde/test/arm/neon/skel.c +871 -0
  66. data/ext/minimap2/lib/simde/test/arm/neon/test-neon-internal.h +134 -0
  67. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.c +39 -0
  68. data/ext/minimap2/lib/simde/test/arm/neon/test-neon.h +10 -0
  69. data/ext/minimap2/lib/simde/test/arm/neon/vadd.c +1260 -0
  70. data/ext/minimap2/lib/simde/test/arm/neon/vdup_n.c +873 -0
  71. data/ext/minimap2/lib/simde/test/arm/neon/vmul.c +1084 -0
  72. data/ext/minimap2/lib/simde/test/arm/neon/vsub.c +1260 -0
  73. data/ext/minimap2/lib/simde/test/arm/test-arm-internal.h +18 -0
  74. data/ext/minimap2/lib/simde/test/arm/test-arm.c +20 -0
  75. data/ext/minimap2/lib/simde/test/arm/test-arm.h +8 -0
  76. data/ext/minimap2/lib/simde/test/cmake/AddCompilerFlags.cmake +171 -0
  77. data/ext/minimap2/lib/simde/test/cmake/ExtraWarningFlags.cmake +68 -0
  78. data/ext/minimap2/lib/simde/test/meson.build +64 -0
  79. data/ext/minimap2/lib/simde/test/munit/COPYING +21 -0
  80. data/ext/minimap2/lib/simde/test/munit/Makefile +55 -0
  81. data/ext/minimap2/lib/simde/test/munit/README.md +54 -0
  82. data/ext/minimap2/lib/simde/test/munit/example.c +351 -0
  83. data/ext/minimap2/lib/simde/test/munit/meson.build +37 -0
  84. data/ext/minimap2/lib/simde/test/munit/munit.c +2055 -0
  85. data/ext/minimap2/lib/simde/test/munit/munit.h +535 -0
  86. data/ext/minimap2/lib/simde/test/run-tests.c +20 -0
  87. data/ext/minimap2/lib/simde/test/run-tests.h +260 -0
  88. data/ext/minimap2/lib/simde/test/x86/avx.c +13752 -0
  89. data/ext/minimap2/lib/simde/test/x86/avx2.c +9977 -0
  90. data/ext/minimap2/lib/simde/test/x86/avx512bw.c +2664 -0
  91. data/ext/minimap2/lib/simde/test/x86/avx512f.c +10416 -0
  92. data/ext/minimap2/lib/simde/test/x86/avx512vl.c +210 -0
  93. data/ext/minimap2/lib/simde/test/x86/fma.c +2557 -0
  94. data/ext/minimap2/lib/simde/test/x86/meson.build +33 -0
  95. data/ext/minimap2/lib/simde/test/x86/mmx.c +2878 -0
  96. data/ext/minimap2/lib/simde/test/x86/skel.c +2984 -0
  97. data/ext/minimap2/lib/simde/test/x86/sse.c +5121 -0
  98. data/ext/minimap2/lib/simde/test/x86/sse2.c +9860 -0
  99. data/ext/minimap2/lib/simde/test/x86/sse3.c +486 -0
  100. data/ext/minimap2/lib/simde/test/x86/sse4.1.c +3446 -0
  101. data/ext/minimap2/lib/simde/test/x86/sse4.2.c +101 -0
  102. data/ext/minimap2/lib/simde/test/x86/ssse3.c +2084 -0
  103. data/ext/minimap2/lib/simde/test/x86/svml.c +1545 -0
  104. data/ext/minimap2/lib/simde/test/x86/test-avx.h +16 -0
  105. data/ext/minimap2/lib/simde/test/x86/test-avx512.h +25 -0
  106. data/ext/minimap2/lib/simde/test/x86/test-mmx.h +13 -0
  107. data/ext/minimap2/lib/simde/test/x86/test-sse.h +13 -0
  108. data/ext/minimap2/lib/simde/test/x86/test-sse2.h +13 -0
  109. data/ext/minimap2/lib/simde/test/x86/test-x86-internal.h +196 -0
  110. data/ext/minimap2/lib/simde/test/x86/test-x86.c +48 -0
  111. data/ext/minimap2/lib/simde/test/x86/test-x86.h +8 -0
  112. data/ext/minimap2/main.c +13 -6
  113. data/ext/minimap2/map.c +0 -5
  114. data/ext/minimap2/minimap.h +40 -31
  115. data/ext/minimap2/minimap2.1 +19 -5
  116. data/ext/minimap2/misc/paftools.js +545 -24
  117. data/ext/minimap2/options.c +1 -1
  118. data/ext/minimap2/pyproject.toml +2 -0
  119. data/ext/minimap2/python/mappy.pyx +3 -1
  120. data/ext/minimap2/seed.c +1 -1
  121. data/ext/minimap2/setup.py +32 -22
  122. data/lib/minimap2/version.rb +1 -1
  123. metadata +100 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c31d89fb8741da359e9fe0002b19af7b02a21aff94d9b5a03d7a112cb98f771
4
- data.tar.gz: c5fbb3f83361bd5b76f0742c5ab592aecd13b8ef0decfbdab6d536ee191351b9
3
+ metadata.gz: f58943e39da8f734af4ee4d16b5d335825ee7cf94cc45d9cf88a41d7adfe6afe
4
+ data.tar.gz: 2cb372b02bcb2cc763fb3a9fcd82219c653e61ac7f28a039e0a4d03b054aab35
5
5
  SHA512:
6
- metadata.gz: 4d25538bbf117a6ece697f28660e58caaac1ea57a09fb33d9017b683655ee08a776a061d95c327d15cbb8868672ab3ee383ac503263804529ec67a3bf255159b
7
- data.tar.gz: 70462c3229d0335ae5d1542a359c074d139d6ca4953de5cc3e7032115ac92d8eef0a93409256e85a689476bb55177bed16d3877c052a58e738a4ab876f32aa3f
6
+ metadata.gz: 52c827db192ac69bf99cfa1e8ff701a4d87965b7f443a82cdd10d109eebf31a6ac2ca966cc5b4a9d3bdcf1fdb4180f51a0f1c78f1bc6c5e937419ad3a78fa5d3
7
+ data.tar.gz: 95fd63410ffc2aa088877c9545566aa32326899658ff5fb9ec92ac9cc1c8d89c28c6a849e50bd50d5fce4bc33c6af8b3f40a65c0270f7a6ccec181375c549fb1
data/README.md CHANGED
@@ -175,9 +175,8 @@ ruby-minimap2 is a library under development and there are many points to be imp
175
175
 
176
176
  Please feel free to report [bugs](https://github.com/kojix2/ruby-minimap2/issues) and [pull requests](https://github.com/kojix2/ruby-minimap2/pulls)!
177
177
 
178
- Do you need commit rights to my repository?
179
- Do you want to get admin rights and take over the project?
180
- If so, please feel free to contact me @kojix2.
178
+ Many OSS projects become abandoned because only the founder has commit rights to the original repository.
179
+ If you need commit rights to ruby-minimap2 repository or want to get admin rights and take over the project, please feel free to contact me @kojix2.
181
180
 
182
181
  ## License
183
182
 
@@ -8,6 +8,10 @@ PROG= minimap2
8
8
  PROG_EXTRA= sdust minimap2-lite
9
9
  LIBS= -lm -lz -lpthread
10
10
 
11
+ ifneq ($(aarch64),)
12
+ arm_neon=1
13
+ endif
14
+
11
15
  ifeq ($(arm_neon),) # if arm_neon is not defined
12
16
  ifeq ($(sse2only),) # if sse2only is not defined
13
17
  OBJS+=ksw2_extz2_sse41.o ksw2_extd2_sse41.o ksw2_exts2_sse41.o ksw2_extz2_sse2.o ksw2_extd2_sse2.o ksw2_exts2_sse2.o ksw2_dispatch.o
@@ -26,12 +30,12 @@ endif
26
30
 
27
31
  ifneq ($(asan),)
28
32
  CFLAGS+=-fsanitize=address
29
- LIBS+=-fsanitize=address
33
+ LIBS+=-fsanitize=address -ldl
30
34
  endif
31
35
 
32
36
  ifneq ($(tsan),)
33
37
  CFLAGS+=-fsanitize=thread
34
- LIBS+=-fsanitize=thread
38
+ LIBS+=-fsanitize=thread -ldl
35
39
  endif
36
40
 
37
41
  .PHONY:all extra clean depend
data/ext/minimap2/NEWS.md CHANGED
@@ -1,3 +1,41 @@
1
+ Release 2.25-r1173 (25 April 2023)
2
+ ----------------------------------
3
+
4
+ Notable changes:
5
+
6
+ * Improvement: use the miniprot splice model for RNA-seq alignment by default.
7
+ This model considers non-GT-AG splice sites and leads to slightly higher
8
+ (<0.1%) accuracy and sensitivity on real human data.
9
+
10
+ * Change: increased the default `-I` to `8G` such that minimap2 would create a
11
+ uni-part index for a pair of mammalian genomes. This change may increase the
12
+ memory for all-vs-all read overlap alignment given large datasets.
13
+
14
+ * New feature: output the sequences in secondary alignments with option
15
+ `--secondary-seq` (#687).
16
+
17
+ * Bugfix: --rmq was not parsed correctly (#1010)
18
+
19
+ * Bugfix: possibly incorrect coordinate when applying end bonus to the target
20
+ sequence (#1025). This is a ksw2 bug. It does not affect minimap2 as
21
+ minimap2 is not using the affected feature.
22
+
23
+ * Improvement: incorporated several changes for better compatibility with
24
+ Windows (#1051) and for minimap2 integration at Oxford Nanopore Technologies
25
+ (#1048 and #1033).
26
+
27
+ * Improvement: output the HD-line in SAM output (#1019).
28
+
29
+ * Improvement: check minimap2 index file in mappy to prevent segmentation
30
+ fault for certain indices (#1008).
31
+
32
+ For genomic sequences, minimap2 should give identical output to v2.24.
33
+ Long-read RNA-seq alignment may occasionally differ from previous versions.
34
+
35
+ (2.25: 25 April 2023, r1173)
36
+
37
+
38
+
1
39
  Release 2.24-r1122 (26 December 2021)
2
40
  -------------------------------------
3
41
 
@@ -74,8 +74,8 @@ Detailed evaluations are available from the [minimap2 paper][doi] or the
74
74
  Minimap2 is optimized for x86-64 CPUs. You can acquire precompiled binaries from
75
75
  the [release page][release] with:
76
76
  ```sh
77
- curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar -jxvf -
78
- ./minimap2-2.24_x64-linux/minimap2
77
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar -jxvf -
78
+ ./minimap2-2.25_x64-linux/minimap2
79
79
  ```
80
80
  If you want to compile from the source, you need to have a C compiler, GNU make
81
81
  and zlib development files installed. Then type `make` in the source code
@@ -350,6 +350,11 @@ If you use minimap2 in your work, please cite:
350
350
  > Li, H. (2018). Minimap2: pairwise alignment for nucleotide sequences.
351
351
  > *Bioinformatics*, **34**:3094-3100. [doi:10.1093/bioinformatics/bty191][doi]
352
352
 
353
+ and/or:
354
+
355
+ > Li, H. (2021). New strategies to improve minimap2 alignment accuracy.
356
+ > *Bioinformatics*, **37**:4572-4574. [doi:10.1093/bioinformatics/btab705][doi2]
357
+
353
358
  ## <a name="dguide"></a>Developers' Guide
354
359
 
355
360
  Minimap2 is not only a command line tool, but also a programming library.
@@ -399,5 +404,6 @@ mappy` or [from BioConda][mappyconda] via `conda install -c bioconda mappy`.
399
404
  [manpage]: https://lh3.github.io/minimap2/minimap2.html
400
405
  [manpage-cs]: https://lh3.github.io/minimap2/minimap2.html#10
401
406
  [doi]: https://doi.org/10.1093/bioinformatics/bty191
402
- [smide]: https://github.com/nemequ/simde
407
+ [doi2]: https://doi.org/10.1093/bioinformatics/btab705
408
+ [simde]: https://github.com/nemequ/simde
403
409
  [unimap]: https://github.com/lh3/unimap
data/ext/minimap2/align.c CHANGED
@@ -326,9 +326,11 @@ static void mm_align_pair(void *km, const mm_mapopt_t *opt, int qlen, const uint
326
326
  if (opt->max_sw_mat > 0 && (int64_t)tlen * qlen > opt->max_sw_mat) {
327
327
  ksw_reset_extz(ez);
328
328
  ez->zdropped = 1;
329
- } else if (opt->flag & MM_F_SPLICE)
330
- ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag, junc, ez);
331
- else if (opt->q == opt->q2 && opt->e == opt->e2)
329
+ } else if (opt->flag & MM_F_SPLICE) {
330
+ int flag_tmp = flag;
331
+ if (!(opt->flag & MM_F_SPLICE_OLD)) flag_tmp |= KSW_EZ_SPLICE_CMPLX;
332
+ ksw_exts2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->noncan, zdrop, opt->junc_bonus, flag_tmp, junc, ez);
333
+ } else if (opt->q == opt->q2 && opt->e == opt->e2)
332
334
  ksw_extz2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, w, zdrop, end_bonus, flag, ez);
333
335
  else
334
336
  ksw_extd2_sse(km, qlen, qseq, tlen, tseq, 5, mat, opt->q, opt->e, opt->q2, opt->e2, w, zdrop, end_bonus, flag, ez);
@@ -31,8 +31,8 @@ To acquire the data used in this cookbook and to install minimap2 and paftools,
31
31
  please follow the command lines below:
32
32
  ```sh
33
33
  # install minimap2 executables
34
- curl -L https://github.com/lh3/minimap2/releases/download/v2.24/minimap2-2.24_x64-linux.tar.bz2 | tar jxf -
35
- cp minimap2-2.24_x64-linux/{minimap2,k8,paftools.js} . # copy executables
34
+ curl -L https://github.com/lh3/minimap2/releases/download/v2.25/minimap2-2.25_x64-linux.tar.bz2 | tar jxf -
35
+ cp minimap2-2.25_x64-linux/{minimap2,k8,paftools.js} . # copy executables
36
36
  export PATH="$PATH:"`pwd` # put the current directory on PATH
37
37
  # download example datasets
38
38
  curl -L https://github.com/lh3/minimap2/releases/download/v2.10/cookbook-data.tgz | tar zxf -
@@ -119,6 +119,7 @@ int mm_write_sam_hdr(const mm_idx_t *idx, const char *rg, const char *ver, int a
119
119
  {
120
120
  kstring_t str = {0,0,0};
121
121
  int ret = 0;
122
+ mm_sprintf_lite(&str, "@HD\tVN:1.6\tSO:unsorted\tGO:query\n");
122
123
  if (idx) {
123
124
  uint32_t i;
124
125
  for (i = 0; i < idx->n_seq; ++i)
@@ -369,14 +370,16 @@ static void write_sam_cigar(kstring_t *s, int sam_flag, int in_tag, int qlen, co
369
370
  clip_len[0] = r->rev? qlen - r->qe : r->qs;
370
371
  clip_len[1] = r->rev? r->qs : qlen - r->qe;
371
372
  if (in_tag) {
372
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 5 : 4;
373
+ int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
374
+ !(opt_flag&MM_F_SOFTCLIP)) ? 5 : 4;
373
375
  mm_sprintf_lite(s, "\tCG:B:I");
374
376
  if (clip_len[0]) mm_sprintf_lite(s, ",%u", clip_len[0]<<4|clip_char);
375
377
  for (k = 0; k < r->p->n_cigar; ++k)
376
378
  mm_sprintf_lite(s, ",%u", r->p->cigar[k]);
377
379
  if (clip_len[1]) mm_sprintf_lite(s, ",%u", clip_len[1]<<4|clip_char);
378
380
  } else {
379
- int clip_char = (sam_flag&0x800) && !(opt_flag&MM_F_SOFTCLIP)? 'H' : 'S';
381
+ int clip_char = (((sam_flag&0x800) || ((sam_flag&0x100) && (opt_flag&MM_F_SECONDARY_SEQ))) &&
382
+ !(opt_flag&MM_F_SOFTCLIP)) ? 'H' : 'S';
380
383
  assert(clip_len[0] < qlen && clip_len[1] < qlen);
381
384
  if (clip_len[0]) mm_sprintf_lite(s, "%d%c", clip_len[0], clip_char);
382
385
  for (k = 0; k < r->p->n_cigar; ++k)
@@ -451,7 +454,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
451
454
  if (cigar_in_tag) {
452
455
  int slen;
453
456
  if ((flag & 0x900) == 0 || (opt_flag & MM_F_SOFTCLIP)) slen = t->l_seq;
454
- else if (flag & 0x100) slen = 0;
457
+ else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)) slen = 0;
455
458
  else slen = r->qe - r->qs;
456
459
  mm_sprintf_lite(s, "%dS%dN", slen, r->re - r->rs);
457
460
  } else write_sam_cigar(s, flag, 0, t->l_seq, r, opt_flag);
@@ -492,7 +495,7 @@ void mm_write_sam3(kstring_t *s, const mm_idx_t *mi, const mm_bseq1_t *t, int se
492
495
  mm_sprintf_lite(s, "\t");
493
496
  if (t->qual) sam_write_sq(s, t->qual, t->l_seq, r->rev, 0);
494
497
  else mm_sprintf_lite(s, "*");
495
- } else if (flag & 0x100) {
498
+ } else if ((flag & 0x100) && !(opt_flag & MM_F_SECONDARY_SEQ)){
496
499
  mm_sprintf_lite(s, "*\t*");
497
500
  } else {
498
501
  sam_write_sq(s, t->seq + r->qs, r->qe - r->qs, r->rev, r->rev);
@@ -40,7 +40,8 @@ void *km_init2(void *km_par, size_t min_core_size)
40
40
  kmem_t *km;
41
41
  km = (kmem_t*)kcalloc(km_par, 1, sizeof(kmem_t));
42
42
  km->par = km_par;
43
- km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
43
+ if (km_par) km->min_core_size = min_core_size > 0? min_core_size : ((kmem_t*)km_par)->min_core_size - 2;
44
+ else km->min_core_size = min_core_size > 0? min_core_size : 0x80000;
44
45
  return (void*)km;
45
46
  }
46
47
 
@@ -183,6 +184,16 @@ void *krealloc(void *_km, void *ap, size_t n_bytes) // TODO: this can be made mo
183
184
  return q;
184
185
  }
185
186
 
187
+ void *krelocate(void *km, void *ap, size_t n_bytes)
188
+ {
189
+ void *p;
190
+ if (km == 0 || ap == 0) return ap;
191
+ p = kmalloc(km, n_bytes);
192
+ memcpy(p, ap, n_bytes);
193
+ kfree(km, ap);
194
+ return p;
195
+ }
196
+
186
197
  void km_stat(const void *_km, km_stat_t *s)
187
198
  {
188
199
  kmem_t *km = (kmem_t*)_km;
@@ -203,3 +214,11 @@ void km_stat(const void *_km, km_stat_t *s)
203
214
  s->largest = s->largest > size? s->largest : size;
204
215
  }
205
216
  }
217
+
218
+ void km_stat_print(const void *km)
219
+ {
220
+ km_stat_t st;
221
+ km_stat(km, &st);
222
+ fprintf(stderr, "[km_stat] cap=%ld, avail=%ld, largest=%ld, n_core=%ld, n_block=%ld\n",
223
+ st.capacity, st.available, st.largest, st.n_blocks, st.n_cores);
224
+ }
@@ -13,6 +13,7 @@ typedef struct {
13
13
 
14
14
  void *kmalloc(void *km, size_t size);
15
15
  void *krealloc(void *km, void *ptr, size_t size);
16
+ void *krelocate(void *km, void *ap, size_t n_bytes);
16
17
  void *kcalloc(void *km, size_t count, size_t size);
17
18
  void kfree(void *km, void *ptr);
18
19
 
@@ -20,11 +21,21 @@ void *km_init(void);
20
21
  void *km_init2(void *km_par, size_t min_core_size);
21
22
  void km_destroy(void *km);
22
23
  void km_stat(const void *_km, km_stat_t *s);
24
+ void km_stat_print(const void *km);
23
25
 
24
26
  #ifdef __cplusplus
25
27
  }
26
28
  #endif
27
29
 
30
+ #define Kmalloc(km, type, cnt) ((type*)kmalloc((km), (cnt) * sizeof(type)))
31
+ #define Kcalloc(km, type, cnt) ((type*)kcalloc((km), (cnt), sizeof(type)))
32
+ #define Krealloc(km, type, ptr, cnt) ((type*)krealloc((km), (ptr), (cnt) * sizeof(type)))
33
+
34
+ #define Kexpand(km, type, a, m) do { \
35
+ (m) = (m) >= 4? (m) + ((m)>>1) : 16; \
36
+ (a) = Krealloc(km, type, (a), (m)); \
37
+ } while (0)
38
+
28
39
  #define KMALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kmalloc((km), (len) * sizeof(*(ptr))))
29
40
  #define KCALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))kcalloc((km), (len), sizeof(*(ptr))))
30
41
  #define KREALLOC(km, ptr, len) ((ptr) = (__typeof__(ptr))krealloc((km), (ptr), (len) * sizeof(*(ptr))))
@@ -50,7 +61,7 @@ void km_stat(const void *_km, km_stat_t *s);
50
61
  } kmp_##name##_t; \
51
62
  SCOPE kmp_##name##_t *kmp_init_##name(void *km) { \
52
63
  kmp_##name##_t *mp; \
53
- KCALLOC(km, mp, 1); \
64
+ mp = Kcalloc(km, kmp_##name##_t, 1); \
54
65
  mp->km = km; \
55
66
  return mp; \
56
67
  } \
@@ -66,7 +77,7 @@ void km_stat(const void *_km, km_stat_t *s);
66
77
  } \
67
78
  SCOPE void kmp_free_##name(kmp_##name##_t *mp, kmptype_t *p) { \
68
79
  --mp->cnt; \
69
- if (mp->n == mp->max) KEXPAND(mp->km, mp->buf, mp->max); \
80
+ if (mp->n == mp->max) Kexpand(mp->km, kmptype_t*, mp->buf, mp->max); \
70
81
  mp->buf[mp->n++] = p; \
71
82
  }
72
83
 
data/ext/minimap2/ksw2.h CHANGED
@@ -15,6 +15,7 @@
15
15
  #define KSW_EZ_SPLICE_FOR 0x100
16
16
  #define KSW_EZ_SPLICE_REV 0x200
17
17
  #define KSW_EZ_SPLICE_FLANK 0x400
18
+ #define KSW_EZ_SPLICE_CMPLX 0x800
18
19
 
19
20
  // The subset of CIGAR operators used by ksw code.
20
21
  // Use MM_CIGAR_* from minimap.h if you need the full list.
@@ -358,7 +358,7 @@ void ksw_extd2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
358
358
  } else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
359
359
  // update ez
360
360
  if (en0 == tlen - 1 && H[en0] > ez->mte)
361
- ez->mte = H[en0], ez->mte_q = r - en;
361
+ ez->mte = H[en0], ez->mte_q = r - en0;
362
362
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
363
363
  ez->mqe = H[st0], ez->mqe_t = st0;
364
364
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e2)) break;
@@ -71,6 +71,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
71
71
 
72
72
  ksw_reset_extz(ez);
73
73
  if (m <= 1 || qlen <= 0 || tlen <= 0 || q2 <= q + e) return;
74
+ assert((flag & KSW_EZ_SPLICE_FOR) == 0 || (flag & KSW_EZ_SPLICE_REV) == 0); // can't be both set
74
75
 
75
76
  zero_ = _mm_set1_epi8(0);
76
77
  q_ = _mm_set1_epi8(q);
@@ -118,55 +119,93 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
118
119
 
119
120
  // set the donor and acceptor arrays. TODO: this assumes 0/1/2/3 encoding!
120
121
  if (flag & (KSW_EZ_SPLICE_FOR|KSW_EZ_SPLICE_REV)) {
121
- int semi_cost = flag&KSW_EZ_SPLICE_FLANK? -noncan/2 : 0; // GTr or yAG is worth 0.5 bit; see PMID:18688272
122
- memset(donor, -noncan, tlen_ * 16);
123
- memset(acceptor, -noncan, tlen_ * 16);
122
+ const int sp0[4] = { 8, 15, 21, 30 };
123
+ int sp[4];
124
+ if (flag & KSW_EZ_SPLICE_CMPLX) {
125
+ for (t = 0; t < 4; ++t)
126
+ sp[t] = (int)((double)sp0[t] / 3. + .499);
127
+ } else {
128
+ sp[0] = flag&KSW_EZ_SPLICE_FLANK? noncan / 2 : 0;
129
+ sp[1] = sp[2] = sp[3] = noncan;
130
+ }
131
+ memset(donor, -sp[3], tlen_ * 16);
132
+ memset(acceptor, -sp[3], tlen_ * 16);
124
133
  if (!(flag & KSW_EZ_REV_CIGAR)) {
125
134
  for (t = 0; t < tlen - 4; ++t) {
126
- int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
127
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 3) can_type = 1; // GTr...
128
- if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 3) can_type = 1; // CTr...
129
- if (can_type && (target[t+3] == 0 || target[t+3] == 2)) can_type = 2;
130
- if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
135
+ int z = 3;
136
+ if (flag & KSW_EZ_SPLICE_FOR) {
137
+ if (target[t+1] == 2 && target[t+2] == 3) // |GT.
138
+ z = target[t+3] == 0 || target[t+3] == 2? -1 : 0; // |GTr or not
139
+ else if (target[t+1] == 2 && target[t+2] == 1) z = 1; // |GC.
140
+ else if (target[t+1] == 0 && target[t+2] == 3) z = 2; // |AT.
141
+ } else if (flag & KSW_EZ_SPLICE_REV) {
142
+ if (target[t+1] == 1 && target[t+2] == 3) // |CT. (revcomp of .AG|)
143
+ z = target[t+3] == 0 || target[t+3] == 2? -1 : 0;
144
+ else if (target[t+1] == 2 && target[t+2] == 3) z = 2; // |GT. (revcomp of .AC|)
145
+ }
146
+ ((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
131
147
  }
132
- if (junc)
133
- for (t = 0; t < tlen - 1; ++t)
134
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
135
- ((int8_t*)donor)[t] += junc_bonus;
136
148
  for (t = 2; t < tlen; ++t) {
137
- int can_type = 0;
138
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 0 && target[t] == 2) can_type = 1; // ...yAG
139
- if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 0 && target[t] == 1) can_type = 1; // ...yAC
140
- if (can_type && (target[t-2] == 1 || target[t-2] == 3)) can_type = 2;
141
- if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
149
+ int z = 3;
150
+ if (flag & KSW_EZ_SPLICE_FOR) {
151
+ if (target[t-1] == 0 && target[t] == 2) // .AG|
152
+ z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAG| or not
153
+ else if (target[t-1] == 0 && target[t] == 1) z = 2; // .AC|
154
+ } else if (flag & KSW_EZ_SPLICE_REV) {
155
+ if (target[t-1] == 0 && target[t] == 1) // .AC| (revcomp of |GT.)
156
+ z = target[t-2] == 1 || target[t-2] == 3? -1 : 0; // yAC| or not
157
+ else if (target[t-1] == 2 && target[t] == 1) z = 1; // .GC| (revcomp of |GC.)
158
+ else if (target[t-1] == 0 && target[t] == 3) z = 2; // .AT| (revcomp of |AT.)
159
+ }
160
+ ((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
142
161
  }
143
- if (junc)
144
- for (t = 0; t < tlen; ++t)
145
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
146
- ((int8_t*)acceptor)[t] += junc_bonus;
147
162
  } else {
148
163
  for (t = 0; t < tlen - 4; ++t) {
149
- int can_type = 0; // type of canonical site: 0=none, 1=GT/AG only, 2=GTr/yAG
150
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t+1] == 2 && target[t+2] == 0) can_type = 1; // GAy...
151
- if ((flag & KSW_EZ_SPLICE_REV) && target[t+1] == 1 && target[t+2] == 0) can_type = 1; // CAy...
152
- if (can_type && (target[t+3] == 1 || target[t+3] == 3)) can_type = 2;
153
- if (can_type) ((int8_t*)donor)[t] = can_type == 2? 0 : semi_cost;
164
+ int z = 3;
165
+ if (flag & KSW_EZ_SPLICE_FOR) {
166
+ if (target[t+1] == 2 && target[t+2] == 0) // |GA. (rev of .AG|)
167
+ z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
168
+ else if (target[t+1] == 1 && target[t+2] == 0) z = 2; // |CA. (rev of .AC|)
169
+ } else if (flag & KSW_EZ_SPLICE_REV) {
170
+ if (target[t+1] == 1 && target[t+2] == 0) // |CA. (comp of |GT.)
171
+ z = target[t+3] == 1 || target[t+3] == 3? -1 : 0;
172
+ else if (target[t+1] == 1 && target[t+2] == 2) z = 1; // |CG. (comp of |GC.)
173
+ else if (target[t+1] == 3 && target[t+2] == 0) z = 2; // |TA. (comp of |AT.)
174
+ }
175
+ ((int8_t*)donor)[t] = z < 0? 0 : -sp[z];
154
176
  }
155
- if (junc)
156
- for (t = 0; t < tlen - 1; ++t)
157
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
158
- ((int8_t*)donor)[t] += junc_bonus;
159
177
  for (t = 2; t < tlen; ++t) {
160
- int can_type = 0;
161
- if ((flag & KSW_EZ_SPLICE_FOR) && target[t-1] == 3 && target[t] == 2) can_type = 1; // ...rTG
162
- if ((flag & KSW_EZ_SPLICE_REV) && target[t-1] == 3 && target[t] == 1) can_type = 1; // ...rTC
163
- if (can_type && (target[t-2] == 0 || target[t-2] == 2)) can_type = 2;
164
- if (can_type) ((int8_t*)acceptor)[t] = can_type == 2? 0 : semi_cost;
178
+ int z = 3;
179
+ if (flag & KSW_EZ_SPLICE_FOR) {
180
+ if (target[t-1] == 3 && target[t] == 2) // .TG| (rev of |GT.)
181
+ z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
182
+ else if (target[t-1] == 1 && target[t] == 2) z = 1; // .CG| (rev of |GC.)
183
+ else if (target[t-1] == 3 && target[t] == 0) z = 2; // .TA| (rev of |AT.)
184
+ } else if (flag & KSW_EZ_SPLICE_REV) {
185
+ if (target[t-1] == 3 && target[t] == 1) // .TC| (comp of .AG|)
186
+ z = target[t-2] == 0 || target[t-2] == 2? -1 : 0;
187
+ else if (target[t-1] == 3 && target[t] == 2) z = 2; // .TG| (comp of .AC|)
188
+ }
189
+ ((int8_t*)acceptor)[t] = z < 0? 0 : -sp[z];
165
190
  }
166
- if (junc)
167
- for (t = 0; t < tlen; ++t)
168
- if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
169
- ((int8_t*)acceptor)[t] += junc_bonus;
191
+ }
192
+ }
193
+
194
+ if (junc) {
195
+ if (!(flag & KSW_EZ_REV_CIGAR)) {
196
+ for (t = 0; t < tlen - 1; ++t)
197
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&8)))
198
+ ((int8_t*)donor)[t] += junc_bonus;
199
+ for (t = 0; t < tlen; ++t)
200
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&4)))
201
+ ((int8_t*)acceptor)[t] += junc_bonus;
202
+ } else {
203
+ for (t = 0; t < tlen - 1; ++t)
204
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t+1]&2)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t+1]&4)))
205
+ ((int8_t*)donor)[t] += junc_bonus;
206
+ for (t = 0; t < tlen; ++t)
207
+ if (((flag & KSW_EZ_SPLICE_FOR) && (junc[t]&1)) || ((flag & KSW_EZ_SPLICE_REV) && (junc[t]&8)))
208
+ ((int8_t*)acceptor)[t] += junc_bonus;
170
209
  }
171
210
  }
172
211
 
@@ -376,7 +415,7 @@ void ksw_exts2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
376
415
  } else H[0] = v8[0] - qe, max_H = H[0], max_t = 0; // special casing r==0
377
416
  // update ez
378
417
  if (en0 == tlen - 1 && H[en0] > ez->mte)
379
- ez->mte = H[en0], ez->mte_q = r - en;
418
+ ez->mte = H[en0], ez->mte_q = r - en0;
380
419
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
381
420
  ez->mqe = H[st0], ez->mqe_t = st0;
382
421
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, 0)) break;
@@ -269,7 +269,7 @@ void ksw_extz2_sse(void *km, int qlen, const uint8_t *query, int tlen, const uin
269
269
  } else H[0] = v8[0] - qe - qe, max_H = H[0], max_t = 0; // special casing r==0
270
270
  // update ez
271
271
  if (en0 == tlen - 1 && H[en0] > ez->mte)
272
- ez->mte = H[en0], ez->mte_q = r - en;
272
+ ez->mte = H[en0], ez->mte_q = r - en0;
273
273
  if (r - st0 == qlen - 1 && H[st0] > ez->mqe)
274
274
  ez->mqe = H[st0], ez->mqe_t = st0;
275
275
  if (ksw_apply_zdrop(ez, 1, max_H, r, max_t, zdrop, e)) break;
@@ -35,7 +35,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
35
35
  for (i = 0, n_z = 0; i < n; ++i) // precompute n_z
36
36
  if (f[i] >= min_sc) ++n_z;
37
37
  if (n_z == 0) return 0;
38
- KMALLOC(km, z, n_z);
38
+ z = Kmalloc(km, mm128_t, n_z);
39
39
  for (i = 0, k = 0; i < n; ++i) // populate z[]
40
40
  if (f[i] >= min_sc) z[k].x = f[i], z[k++].y = i;
41
41
  radix_sort_128x(z, z + n_z);
@@ -54,7 +54,7 @@ uint64_t *mg_chain_backtrack(void *km, int64_t n, const int32_t *f, const int64_
54
54
  else n_v = n_v0;
55
55
  }
56
56
  }
57
- KMALLOC(km, u, n_u);
57
+ u = Kmalloc(km, uint64_t, n_u);
58
58
  memset(t, 0, n * 4);
59
59
  for (k = n_z - 1, n_v = n_u = 0; k >= 0; --k) { // populate u[]
60
60
  if (t[z[k].y] == 0) {
@@ -82,7 +82,7 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
82
82
  int64_t i, j, k;
83
83
 
84
84
  // write the result to b[]
85
- KMALLOC(km, b, n_v);
85
+ b = Kmalloc(km, mm128_t, n_v);
86
86
  for (i = 0, k = 0; i < n_u; ++i) {
87
87
  int32_t k0 = k, ni = (int32_t)u[i];
88
88
  for (j = 0; j < ni; ++j)
@@ -91,13 +91,13 @@ static mm128_t *compact_a(void *km, int32_t n_u, uint64_t *u, int32_t n_v, int32
91
91
  kfree(km, v);
92
92
 
93
93
  // sort u[] and a[] by the target position, such that adjacent chains may be joined
94
- KMALLOC(km, w, n_u);
94
+ w = Kmalloc(km, mm128_t, n_u);
95
95
  for (i = k = 0; i < n_u; ++i) {
96
96
  w[i].x = b[k].x, w[i].y = (uint64_t)k<<32|i;
97
97
  k += (int32_t)u[i];
98
98
  }
99
99
  radix_sort_128x(w, w + n_u);
100
- KMALLOC(km, u2, n_u);
100
+ u2 = Kmalloc(km, uint64_t, n_u);
101
101
  for (i = k = 0; i < n_u; ++i) {
102
102
  int32_t j = (int32_t)w[i].y, n = (int32_t)u[j];
103
103
  u2[i] = u[j];
@@ -138,7 +138,7 @@ static inline int32_t comput_sc(const mm128_t *ai, const mm128_t *aj, int32_t ma
138
138
  }
139
139
 
140
140
  /* Input:
141
- * a[].x: tid<<33 | rev<<32 | tpos
141
+ * a[].x: rev<<63 | tid<<32 | tpos
142
142
  * a[].y: flags<<40 | q_span<<32 | q_pos
143
143
  * Output:
144
144
  * n_u: #chains
@@ -160,10 +160,10 @@ mm128_t *mg_lchain_dp(int max_dist_x, int max_dist_y, int bw, int max_skip, int
160
160
  if (max_dist_x < bw) max_dist_x = bw;
161
161
  if (max_dist_y < bw && !is_cdna) max_dist_y = bw;
162
162
  if (is_cdna) max_drop = INT32_MAX;
163
- KMALLOC(km, p, n);
164
- KMALLOC(km, f, n);
165
- KMALLOC(km, v, n);
166
- KCALLOC(km, t, n);
163
+ p = Kmalloc(km, int64_t, n);
164
+ f = Kmalloc(km, int32_t, n);
165
+ v = Kmalloc(km, int32_t, n);
166
+ t = Kcalloc(km, int32_t, n);
167
167
 
168
168
  // fill the score and backtrack arrays
169
169
  for (i = 0, max_ii = -1; i < n; ++i) {
@@ -251,7 +251,7 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
251
251
  int64_t n, mm128_t *a, int *n_u_, uint64_t **_u, void *km)
252
252
  {
253
253
  int32_t *f,*t, *v, n_u, n_v, mmax_f = 0, max_rmq_size = 0, max_drop = bw;
254
- int64_t *p, i, i0, st = 0, st_inner = 0, n_iter = 0;
254
+ int64_t *p, i, i0, st = 0, st_inner = 0;
255
255
  uint64_t *u;
256
256
  lc_elem_t *root = 0, *root_inner = 0;
257
257
  void *mem_mp = 0;
@@ -264,10 +264,10 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
264
264
  }
265
265
  if (max_dist < bw) max_dist = bw;
266
266
  if (max_dist_inner <= 0 || max_dist_inner >= max_dist) max_dist_inner = 0;
267
- KMALLOC(km, p, n);
268
- KMALLOC(km, f, n);
269
- KCALLOC(km, t, n);
270
- KMALLOC(km, v, n);
267
+ p = Kmalloc(km, int64_t, n);
268
+ f = Kmalloc(km, int32_t, n);
269
+ t = Kcalloc(km, int32_t, n);
270
+ v = Kmalloc(km, int32_t, n);
271
271
  mem_mp = km_init2(km, 0x10000);
272
272
  mp = kmp_init_rmq(mem_mp);
273
273
 
@@ -345,7 +345,6 @@ mm128_t *mg_lchain_rmq(int max_dist, int max_dist_inner, int bw, int max_chn_ski
345
345
  }
346
346
  if (!krmq_itr_prev(lc_elem, &itr)) break;
347
347
  }
348
- n_iter += n_rmq_iter;
349
348
  }
350
349
  }
351
350
  }