minimap2 0.2.24.6 → 0.2.25.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -8,7 +8,7 @@ void mm_idxopt_init(mm_idxopt_t *opt)
8
8
  opt->k = 15, opt->w = 10, opt->flag = 0;
9
9
  opt->bucket_bits = 14;
10
10
  opt->mini_batch_size = 50000000;
11
- opt->batch_size = 4000000000ULL;
11
+ opt->batch_size = 8000000000ULL;
12
12
  }
13
13
 
14
14
  void mm_mapopt_init(mm_mapopt_t *opt)
@@ -0,0 +1,2 @@
1
+ [build-system]
2
+ requires = ["setuptools", "wheel", "Cython"]
@@ -3,7 +3,7 @@ from libc.stdlib cimport free
3
3
  cimport cmappy
4
4
  import sys
5
5
 
6
- __version__ = '2.24'
6
+ __version__ = '2.25'
7
7
 
8
8
  cmappy.mm_reset_timer()
9
9
 
@@ -172,6 +172,7 @@ cdef class Aligner:
172
172
  cdef cmappy.mm_mapopt_t map_opt
173
173
 
174
174
  if self._idx == NULL: return
175
+ if ((self.map_opt.flag & 4) and (self._idx.flag & 2)): return
175
176
  map_opt = self.map_opt
176
177
  if max_frag_len is not None: map_opt.max_frag_len = max_frag_len
177
178
  if extra_flags is not None: map_opt.flag |= extra_flags
@@ -217,6 +218,7 @@ cdef class Aligner:
217
218
  cdef int l
218
219
  cdef char *s
219
220
  if self._idx == NULL: return
221
+ if ((self.map_opt.flag & 4) and (self._idx.flag & 2)): return
220
222
  s = cmappy.mappy_fetch_seq(self._idx, name.encode(), start, end, &l)
221
223
  if l == 0: return None
222
224
  r = s[:l] if isinstance(s, str) else s[:l].decode()
data/ext/minimap2/seed.c CHANGED
@@ -7,7 +7,7 @@ void mm_seed_mz_flt(void *km, mm128_v *mv, int32_t q_occ_max, float q_occ_frac)
7
7
  mm128_t *a;
8
8
  size_t i, j, st;
9
9
  if (mv->n <= q_occ_max || q_occ_frac <= 0.0f || q_occ_max <= 0) return;
10
- KMALLOC(km, a, mv->n);
10
+ a = Kmalloc(km, mm128_t, mv->n);
11
11
  for (i = 0; i < mv->n; ++i)
12
12
  a[i].x = mv->a[i].x, a[i].y = i;
13
13
  radix_sort_128x(a, a + mv->n);
@@ -1,29 +1,40 @@
1
1
  try:
2
2
  from setuptools import setup, Extension
3
+ from setuptools.command.build_ext import build_ext
3
4
  except ImportError:
4
5
  from distutils.core import setup
5
6
  from distutils.extension import Extension
7
+ from distutils.command.build_ext import build_ext
6
8
 
7
- import sys, platform
9
+ import sys, platform, subprocess
8
10
 
9
- sys.path.append('python')
10
-
11
- extra_compile_args = ['-DHAVE_KALLOC']
12
- include_dirs = ["."]
13
-
14
- if platform.machine() in ["aarch64", "arm64"]:
15
- include_dirs.append("sse2neon/")
16
- extra_compile_args.extend(['-ftree-vectorize', '-DKSW_SSE2_ONLY', '-D__SSE2__'])
17
- else:
18
- extra_compile_args.append('-msse4.1') # WARNING: ancient x86_64 CPUs don't have SSE4
19
11
 
20
12
  def readme():
21
13
  with open('python/README.rst') as f:
22
14
  return f.read()
23
15
 
16
+
17
+ class LibMM2Build(build_ext):
18
+ # Uses Makefile to build library, avoids duplicating logic
19
+ # determining which objects to compile but does require
20
+ # end users to have Make (since precompiled wheels are not
21
+ # distributed on PyPI).
22
+ def run(self):
23
+ def compile_libminimap2(*args, **kwargs):
24
+ cmd = ['make', 'libminimap2.a'] + list(args)
25
+ subprocess.check_call(cmd)
26
+ options = []
27
+ if platform.machine() in ["aarch64", "arm64"]:
28
+ options = ["arm_neon=1", "aarch64=1"]
29
+ self.execute(
30
+ compile_libminimap2, options,
31
+ 'Compiling libminimap2 using Makefile')
32
+ build_ext.run(self)
33
+
34
+
24
35
  setup(
25
36
  name = 'mappy',
26
- version = '2.24',
37
+ version = '2.25',
27
38
  url = 'https://github.com/lh3/minimap2',
28
39
  description = 'Minimap2 python binding',
29
40
  long_description = readme(),
@@ -32,16 +43,15 @@ setup(
32
43
  license = 'MIT',
33
44
  keywords = 'sequence-alignment',
34
45
  scripts = ['python/minimap2.py'],
35
- ext_modules = [Extension('mappy',
36
- sources = ['python/mappy.pyx', 'align.c', 'bseq.c', 'lchain.c', 'seed.c', 'format.c', 'hit.c', 'index.c', 'pe.c', 'options.c',
37
- 'ksw2_extd2_sse.c', 'ksw2_exts2_sse.c', 'ksw2_extz2_sse.c', 'ksw2_ll_sse.c',
38
- 'kalloc.c', 'kthread.c', 'map.c', 'misc.c', 'sdust.c', 'sketch.c', 'esterr.c', 'splitidx.c'],
39
- depends = ['minimap.h', 'bseq.h', 'kalloc.h', 'kdq.h', 'khash.h', 'kseq.h', 'ksort.h',
40
- 'ksw2.h', 'kthread.h', 'kvec.h', 'mmpriv.h', 'sdust.h',
41
- 'python/cmappy.h', 'python/cmappy.pxd'],
42
- extra_compile_args = extra_compile_args,
43
- include_dirs = include_dirs,
44
- libraries = ['z', 'm', 'pthread'])],
46
+ cmdclass = {'build_ext': LibMM2Build},
47
+ ext_modules = [
48
+ Extension(
49
+ 'mappy',
50
+ sources = ['python/mappy.pyx'],
51
+ depends = ['python/cmappy.h', 'python/cmappy.pxd'],
52
+ include_dirs = ['.'],
53
+ extra_objects = ['libminimap2.a'],
54
+ libraries = ['z', 'm', 'pthread'])],
45
55
  classifiers = [
46
56
  'Development Status :: 5 - Production/Stable',
47
57
  'License :: OSI Approved :: MIT License',
data/ext/minimap2.patch CHANGED
@@ -1,5 +1,5 @@
1
- --- Makefile.org 2021-05-27 15:45:11.993128205 +0900
2
- +++ Makefile 2021-05-27 15:46:02.320569154 +0900
1
+ --- a/Makefile
2
+ +++ b/Makefile
3
3
  @@ -1,9 +1,9 @@
4
4
  -CFLAGS= -g -Wall -O2 -Wc++-compat #-Wextra
5
5
  +CFLAGS= -g -Wall -O2 -Wc++-compat -fPIC #-Wextra
@@ -12,7 +12,7 @@
12
12
  PROG= minimap2
13
13
  PROG_EXTRA= sdust minimap2-lite
14
14
  LIBS= -lm -lz -lpthread
15
- @@ -130,3 +130,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
15
+ @@ -134,3 +134,4 @@ sdust.o: kalloc.h kdq.h kvec.h sdust.h
16
16
  seed.o: mmpriv.h minimap.h bseq.h kseq.h kalloc.h ksort.h
17
17
  sketch.o: kvec.h kalloc.h mmpriv.h minimap.h bseq.h kseq.h
18
18
  splitidx.o: mmpriv.h minimap.h bseq.h kseq.h
@@ -139,6 +139,7 @@ module Minimap2
139
139
  )
140
140
 
141
141
  return if index.null?
142
+ return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?
142
143
 
143
144
  map_opt[:max_frag_len] = max_frag_len if max_frag_len
144
145
  map_opt[:flag] |= extra_flags if extra_flags
@@ -201,6 +202,9 @@ module Minimap2
201
202
  # @param stop
202
203
 
203
204
  def seq(name, start = 0, stop = 0x7fffffff)
205
+ return if index.null?
206
+ return if (map_opt[:flag] & 4).zero? && (index[:flag] & 2).zero?
207
+
204
208
  lp = ::FFI::MemoryPointer.new(:int)
205
209
  s = FFI.mappy_fetch_seq(index, name, start, stop, lp)
206
210
  l = lp.read_int
@@ -34,10 +34,12 @@ module Minimap2
34
34
  NO_END_FLT = 0x10000000
35
35
  HARD_MLEVEL = 0x20000000
36
36
  SAM_HIT_ONLY = 0x40000000
37
- RMQ = 0x80000000 # LL
38
- QSTRAND = 0x100000000 # LL
39
- NO_INV = 0x200000000 # LL
40
- NO_HASH_NAME = 0x400000000 # LL
37
+ RMQ = 0x80000000
38
+ QSTRAND = 0x100000000
39
+ NO_INV = 0x200000000
40
+ NO_HASH_NAME = 0x400000000
41
+ SPLICE_OLD = 0x800000000
42
+ SECONDARY_SEQ = 0x1000000000 # output SEQ field for seqondary alignments using hard clipping
41
43
 
42
44
  HPC = 0x1
43
45
  NO_SEQ = 0x2
@@ -74,79 +76,6 @@ module Minimap2
74
76
  :a, MM128.ptr
75
77
  end
76
78
 
77
- # indexing option
78
- class IdxOpt < ::FFI::Struct
79
- layout \
80
- :k, :short,
81
- :w, :short,
82
- :flag, :short,
83
- :bucket_bits, :short,
84
- :mini_batch_size, :int64_t,
85
- :batch_size, :uint64_t
86
- end
87
-
88
- # mapping option
89
- class MapOpt < ::FFI::Struct
90
- layout \
91
- :flag, :int64_t, # see MM_F_* macros
92
- :seed, :int,
93
- :sdust_thres, :int, # score threshold for SDUST; 0 to disable
94
- :max_qlen, :int, # max query length
95
- :bw, :int, # bandwidth
96
- :bw_long, :int,
97
- :max_gap, :int, # break a chain if there are no minimizers in a max_gap window
98
- :max_gap_ref, :int,
99
- :max_frag_len, :int,
100
- :max_chain_skip, :int,
101
- :max_chain_iter, :int,
102
- :min_cnt, :int, # min number of minimizers on each chain
103
- :min_chain_score, :int, # min chaining score
104
- :chain_gap_scale, :float,
105
- :chain_skip_scale, :float,
106
- :rmq_size_cap, :int,
107
- :rmq_inner_dist, :int,
108
- :rmq_rescue_size, :int,
109
- :rmq_rescue_ratio, :float,
110
- :mask_level, :float,
111
- :mask_len, :int,
112
- :pri_ratio, :float,
113
- :best_n, :int, # top best_n chains are subjected to DP alignment
114
- :alt_drop, :float,
115
- :a, :int, # matching score
116
- :b, :int, # mismatch
117
- :q, :int, # gap-open
118
- :e, :int, # gap-ext
119
- :q2, :int, # gap-open
120
- :e2, :int, # gap-ext
121
- :sc_ambi, :int, # score when one or both bases are "N"
122
- :noncan, :int, # cost of non-canonical splicing sites
123
- :junc_bonus, :int,
124
- :zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
125
- :zdrop_inv, :int,
126
- :end_bonus, :int,
127
- :min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
128
- :min_ksw_len, :int,
129
- :anchor_ext_len, :int,
130
- :anchor_ext_shift, :int,
131
- :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
132
- :rank_min_len, :int,
133
- :rank_frac, :float,
134
- :pe_ori, :int,
135
- :pe_bonus, :int,
136
- :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
137
- :q_occ_frac, :float,
138
- :min_mid_occ, :int32,
139
- :max_mid_occ, :int32,
140
- :mid_occ, :int32, # ignore seeds with occurrences above this threshold
141
- :max_occ, :int32,
142
- :max_max_occ, :int32,
143
- :occ_dist, :int32,
144
- :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
145
- :max_sw_mat, :int64_t,
146
- :cap_kalloc, :int64_t,
147
- :split_prefix, :string
148
- end
149
-
150
79
  # minimap2 index
151
80
  class IdxSeq < ::FFI::Struct
152
81
  layout \
@@ -173,17 +102,6 @@ module Minimap2
173
102
  :h, :pointer
174
103
  end
175
104
 
176
- # index reader
177
- class IdxReader < ::FFI::Struct
178
- layout \
179
- :is_idx, :int,
180
- :n_parts, :int,
181
- :idx_size, :int64_t,
182
- :opt, IdxOpt,
183
- :fp_out, :pointer, # FILE
184
- :seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
185
- end
186
-
187
105
  # minimap2 alignment
188
106
  class Extra < ::FFI::BitStruct
189
107
  layout \
@@ -242,6 +160,90 @@ module Minimap2
242
160
  :dummy, 5
243
161
  end
244
162
 
163
+ # indexing option
164
+ class IdxOpt < ::FFI::Struct
165
+ layout \
166
+ :k, :short,
167
+ :w, :short,
168
+ :flag, :short,
169
+ :bucket_bits, :short,
170
+ :mini_batch_size, :int64_t,
171
+ :batch_size, :uint64_t
172
+ end
173
+
174
+ # mapping option
175
+ class MapOpt < ::FFI::Struct
176
+ layout \
177
+ :flag, :int64_t, # see MM_F_* macros
178
+ :seed, :int,
179
+ :sdust_thres, :int, # score threshold for SDUST; 0 to disable
180
+ :max_qlen, :int, # max query length
181
+ :bw, :int, # bandwidth
182
+ :bw_long, :int,
183
+ :max_gap, :int, # break a chain if there are no minimizers in a max_gap window
184
+ :max_gap_ref, :int,
185
+ :max_frag_len, :int,
186
+ :max_chain_skip, :int,
187
+ :max_chain_iter, :int,
188
+ :min_cnt, :int, # min number of minimizers on each chain
189
+ :min_chain_score, :int, # min chaining score
190
+ :chain_gap_scale, :float,
191
+ :chain_skip_scale, :float,
192
+ :rmq_size_cap, :int,
193
+ :rmq_inner_dist, :int,
194
+ :rmq_rescue_size, :int,
195
+ :rmq_rescue_ratio, :float,
196
+ :mask_level, :float,
197
+ :mask_len, :int,
198
+ :pri_ratio, :float,
199
+ :best_n, :int, # top best_n chains are subjected to DP alignment
200
+ :alt_drop, :float,
201
+ :a, :int, # matching score
202
+ :b, :int, # mismatch
203
+ :q, :int, # gap-open
204
+ :e, :int, # gap-ext
205
+ :q2, :int, # gap-open
206
+ :e2, :int, # gap-ext
207
+ :sc_ambi, :int, # score when one or both bases are "N"
208
+ :noncan, :int, # cost of non-canonical splicing sites
209
+ :junc_bonus, :int,
210
+ :zdrop, :int, # break alignment if alignment score drops too fast along the diagonal
211
+ :zdrop_inv, :int,
212
+ :end_bonus, :int,
213
+ :min_dp_max, :int, # drop an alignment if the score of the max scoring segment is below this threshold
214
+ :min_ksw_len, :int,
215
+ :anchor_ext_len, :int,
216
+ :anchor_ext_shift, :int,
217
+ :max_clip_ratio, :float, # drop an alignment if BOTH ends are clipped above this ratio
218
+ :rank_min_len, :int,
219
+ :rank_frac, :float,
220
+ :pe_ori, :int,
221
+ :pe_bonus, :int,
222
+ :mid_occ_frac, :float, # only used by mm_mapopt_update(); see below
223
+ :q_occ_frac, :float,
224
+ :min_mid_occ, :int32,
225
+ :max_mid_occ, :int32,
226
+ :mid_occ, :int32, # ignore seeds with occurrences above this threshold
227
+ :max_occ, :int32,
228
+ :max_max_occ, :int32,
229
+ :occ_dist, :int32,
230
+ :mini_batch_size, :int64_t, # size of a batch of query bases to process in parallel
231
+ :max_sw_mat, :int64_t,
232
+ :cap_kalloc, :int64_t,
233
+ :split_prefix, :string
234
+ end
235
+
236
+ # index reader
237
+ class IdxReader < ::FFI::Struct
238
+ layout \
239
+ :is_idx, :int,
240
+ :n_parts, :int,
241
+ :idx_size, :int64_t,
242
+ :opt, IdxOpt,
243
+ :fp_out, :pointer, # FILE
244
+ :seq_or_idx, :pointer # FIXME: Union mm_bseq_files or FILE
245
+ end
246
+
245
247
  # memory buffer for thread-local storage during mapping
246
248
  class TBuf < ::FFI::Struct
247
249
  layout \
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Minimap2
4
- # Minimap2-2.24 (r1122)
5
- VERSION = "0.2.24.6"
4
+ # Minimap2-2.25 (r1173)
5
+ VERSION = "0.2.25.1"
6
6
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: minimap2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.24.6
4
+ version: 0.2.25.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - kojix2
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-04-27 00:00:00.000000000 Z
11
+ date: 2023-05-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ffi
@@ -194,6 +194,7 @@ files:
194
194
  - ext/minimap2/mmpriv.h
195
195
  - ext/minimap2/options.c
196
196
  - ext/minimap2/pe.c
197
+ - ext/minimap2/pyproject.toml
197
198
  - ext/minimap2/python/README.rst
198
199
  - ext/minimap2/python/cmappy.h
199
200
  - ext/minimap2/python/cmappy.pxd
@@ -260,7 +261,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
260
261
  - !ruby/object:Gem::Version
261
262
  version: '0'
262
263
  requirements: []
263
- rubygems_version: 3.4.1
264
+ rubygems_version: 3.4.6
264
265
  signing_key:
265
266
  specification_version: 4
266
267
  summary: minimap2