pyaragorn 0.1.0__cp38-cp38-win_amd64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pyaragorn might be problematic. Click here for more details.

pyaragorn/lib.pyx ADDED
@@ -0,0 +1,715 @@
1
+ # coding: utf-8
2
+ # cython: language_level=3, linetrace=True, binding=True
3
+
4
+ """Bindings to ARAGORN, a (t|mt|tm)RNA gene finder.
5
+
6
+ Attributes:
7
+ ARAGORN_VERSION (`str`): The version of ARAGORN currently wrapped
8
+ in PyARAGORN.
9
+ TRANSLATION_TABLES (`set` of `int`): A set containing all the
10
+ translation tables supported by PyARAGORN.
11
+
12
+ Example:
13
+ PyARAGORN can work on any DNA sequence stored in either a text or a
14
+ byte array. To load a sequence from one of the common sequence formats,
15
+ you can use an external dedicated library such as
16
+ `Biopython <https://github.com/biopython/biopython>`_::
17
+
18
+ >>> import gzip
19
+ >>> import Bio.SeqIO
20
+ >>> with gzip.open("CP001621.fna.gz", "rt") as f:
21
+ ... record = Bio.SeqIO.read(f, "fasta")
22
+
23
+ Then use PyARAGORN to find the tRNA genes using the
24
+ bacterial genetic code (translation table 11):
25
+
26
+ >>> import pyaragorn
27
+ >>> rna_finder = pyaragorn.RNAFinder(11, trna=True, tmrna=False)
28
+ >>> for gene in rna_finder.find_rna(record.seq.encode()):
29
+ ... print(gene.anticodon, gene.amino_acid, gene.begin, gene.end)
30
+ tag Leu 87124 87207
31
+ ttt Lys 87210 87285
32
+ ...
33
+
34
+ The gene coordinates are 1-indexed, inclusive, similarly to
35
+ `Pyrodigal <https://pyrodigal.readthedocs.io>`_ genes.
36
+
37
+ References:
38
+ - Laslett, Dean, and Björn Canback.
39
+ “ARAGORN, a program to detect tRNA genes and tmRNA genes in nucleotide
40
+ sequences.” Nucleic acids research vol. 32,1 11-6. 2 Jan. 2004,
41
+ :doi:`10.1093/nar/gkh152`. :pmid:`14704338`. :pmcid:`PMC373265`.
42
+ - Laslett, Dean, and Björn Canbäck.
43
+ “ARWEN: a program to detect tRNA genes in metazoan mitochondrial
44
+ nucleotide sequences.” Bioinformatics (Oxford, England) vol. 24,2
45
+ (2008): 172-5. :doi:`10.1093/bioinformatics/btm573`. :pmid:`18033792`.
46
+
47
+ """
48
+
49
+ from cython.operator cimport postincrement, dereference
50
+ from cpython.bytes cimport PyBytes_FromStringAndSize
51
+ from cpython.exc cimport PyErr_CheckSignals
52
+ from cpython.unicode cimport (
53
+ PyUnicode_KIND,
54
+ PyUnicode_DATA,
55
+ PyUnicode_1BYTE_KIND,
56
+ PyUnicode_GET_LENGTH,
57
+ # PyUnicode_READ,
58
+ )
59
+
60
+ from libc.stdio cimport FILE, fopen, fdopen, fclose, fprintf, fputc, stdout, stderr
61
+ from libc.stdlib cimport calloc, free
62
+ from libc.string cimport memcpy
63
+ from libc.stdint cimport intptr_t
64
+
65
+ cimport aragorn
66
+ from aragorn cimport csw, data_set, gene
67
+
68
+ # --- Helpers ------------------------------------------------------------------
69
+
70
+ cdef extern from * nogil:
71
+ Py_UCS4 PyUnicode_READ(int kind, const void* data, size_t pos)
72
+
73
+ cdef extern from * nogil:
74
+ """
75
+ void default_sw(csw* sw) {
76
+ csw x = {
77
+ {"tRNA", "tmRNA", "", "", "CDS", "overall"},
78
+ NULL, NULL, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, STANDARD, 0,
79
+ {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
80
+ 0, METAZOAN_MT, 1, 0, 5, 5, 1, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0,
81
+ 3, 0, 2, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
82
+ {0, 0, 0, 0, 0, 0}, 0, 0, 0, 0, NTAG, 10, 30,
83
+ {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0}, {0, 0, 0, 0, 0, 0},
84
+ 0, 0, 0, 0, 0L, 100.0, 1.0, tRNAthresh, 4.0, 29.0, 26.0, 7.5, 8.0,
85
+ mtRNAtthresh, mtRNAdthresh, mtRNAdtthresh, -7.9, -6.0, tmRNAthresh,
86
+ 14.0, 10.0, 25.0, 9.0, srpRNAthresh, CDSthresh,
87
+ {tRNAthresh, tmRNAthresh, srpRNAthresh, 0.0, CDSthresh},
88
+ {
89
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
90
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 10, 65,
91
+ 82, 65, 71, 79, 82, 78, 32, 118, 49, 46, 50, 46, 52, 49, 32,
92
+ 32, 32, 68, 101, 97, 110, 32, 76, 97, 115, 108, 101, 116, 116,
93
+ 10, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45,
94
+ 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, 10,
95
+ TERM
96
+ }
97
+ };
98
+ memcpy(sw, &x, sizeof(csw));
99
+ }
100
+ """
101
+ void default_sw(csw* sw)
102
+
103
+ cdef inline long int sq(data_set* d, long int pos) nogil:
104
+ return (pos + d.psmax - 1) % d.psmax + 1
105
+
106
+
107
+ # --- Constants ----------------------------------------------------------------
108
+
109
+ cdef set _TRANSLATION_TABLES = set(range(1, 7)) | set(range(9, 17)) | set(range(21, 27)) | {29, 30} | {32, 33}
110
+
111
+ __version__ = PROJECT_VERSION
112
+
113
+ TRANSLATION_TABLES = _TRANSLATION_TABLES
114
+ ARAGORN_VERSION = PROJECT_ARAGORN_VERSION
115
+
116
+
117
+ # --- Classes ------------------------------------------------------------------
118
+
119
+ cdef class Gene:
120
+ """A gene identified by ARAGORN.
121
+ """
122
+
123
+ cdef gene _gene
124
+ cdef int _genetic_code
125
+
126
+ @staticmethod
127
+ cdef Gene _new_gene(gene* _gene, int _genetic_code):
128
+ cdef Gene obj
129
+
130
+ if _gene.genetype == aragorn.tRNA:
131
+ obj = TRNAGene.__new__(TRNAGene)
132
+ elif _gene.genetype == aragorn.tmRNA:
133
+ obj = TMRNAGene.__new__(TMRNAGene)
134
+ else:
135
+ raise NotImplementedError
136
+
137
+ memcpy(&obj._gene, _gene, sizeof(gene))
138
+ obj._genetic_code = _genetic_code
139
+ return obj
140
+
141
+ def __sizeof__(self):
142
+ return sizeof(self)
143
+
144
+ @property
145
+ def type(self):
146
+ return ["tRNA", "tmRNA", "", "", "CDS"][<int> self._gene.genetype]
147
+
148
+ @property
149
+ def begin(self):
150
+ """`int`: The sequence coordinate at which the gene begins.
151
+
152
+ Hint:
153
+ This coordinate is 1-based, inclusive. To use it to index
154
+ a Python array or string, subtract one.
155
+
156
+ """
157
+ return self._gene.start
158
+
159
+ @property
160
+ def end(self):
161
+ """`int`: The sequence coordinate at which the gene end.
162
+
163
+ Hint:
164
+ This coordinate is 1-based, inclusive. To use it to index
165
+ a Python array or string, subtract one.
166
+
167
+ """
168
+ return self._gene.stop
169
+
170
+ @property
171
+ def length(self):
172
+ """`int`: The length of the RNA gene.
173
+ """
174
+ return aragorn.seqlen(&self._gene)
175
+
176
+ @property
177
+ def strand(self):
178
+ """`int`: *-1* if the gene is on the reverse strand, *+1* otherwise.
179
+ """
180
+ return -1 if self._gene.comp else +1
181
+
182
+ @property
183
+ def energy(self):
184
+ """`float`: The approximated energy of the RNA structure.
185
+ """
186
+ cdef csw sw
187
+ default_sw(&sw) # FIXME?
188
+ return aragorn.nenergy(&self._gene, &sw)
189
+
190
+ def sequence(self):
191
+ """Retrieve the full sequence of the RNA gene.
192
+ """
193
+ cdef int i
194
+ cdef int l = aragorn.seqlen(&self._gene)
195
+ cdef bytearray b = bytearray(l)
196
+ for i in range(l):
197
+ b[i] = aragorn.cpbase(self._gene.seq[i])
198
+ return b.decode('ascii')
199
+
200
+
201
+ cdef class TRNAGene(Gene):
202
+ """A transfer RNA (tRNA) gene.
203
+ """
204
+
205
+ @property
206
+ def amino_acid(self):
207
+ """`str` or (`str`, `str`): The 3-letter amino-acid(s) for this gene.
208
+
209
+ Hint:
210
+ A single string is given if the anticodon loop was identified
211
+ with exactly 3 nucleotides. Otherwise, this property stores
212
+ a pair of amino-acids.
213
+
214
+ """
215
+ cdef csw sw
216
+ cdef int* s = self._gene.seq + self._gene.anticodon
217
+ (<int*> &sw.geneticcode)[0] = self._genetic_code
218
+ if self._gene.cloop == 6:
219
+ return (
220
+ aragorn.aa(s - 1, &sw).decode('ascii'),
221
+ aragorn.aa(s, &sw).decode('ascii'),
222
+ )
223
+ elif self._gene.cloop == 8:
224
+ return (
225
+ aragorn.aa(s, &sw).decode('ascii'),
226
+ aragorn.aa(s + 1, &sw).decode('ascii')
227
+ )
228
+ else:
229
+ return aragorn.aa(s, &sw).decode('ascii')
230
+
231
+ @property
232
+ def anticodon(self):
233
+ """`str`: The anticodon of the tRNA gene.
234
+ """
235
+ cdef tuple c
236
+ cdef int* s = self._gene.seq + self._gene.anticodon
237
+ if self._gene.cloop == 6:
238
+ c = ( aragorn.cbase(s[0]), aragorn.cbase(s[1]) )
239
+ elif self._gene.cloop == 8:
240
+ c = ( aragorn.cbase(s[0]), aragorn.cbase(s[1]), aragorn.cbase(s[2]), aragorn.cbase(s[3]) )
241
+ else:
242
+ c = ( aragorn.cbase(s[0]), aragorn.cbase(s[1]), aragorn.cbase(s[2]) )
243
+ return ''.join(map(chr, c))
244
+
245
+ @property
246
+ def anticodon_offset(self):
247
+ """`int`: The offset in the gene at which the anticodon starts.
248
+ """
249
+ cdef int x = 1 + self._gene.anticodon
250
+ if self._gene.nintron > 0 and self._gene.intron <= self._gene.anticodon:
251
+ x += self._gene.nintron
252
+ return x
253
+
254
+ @property
255
+ def anticodon_length(self):
256
+ """`int`: The length of the anticodon (in nucleotides).
257
+ """
258
+ if self._gene.cloop == 6:
259
+ return 2
260
+ elif self._gene.cloop == 8:
261
+ return 4
262
+ else:
263
+ return 3
264
+
265
+
266
+ cdef class TMRNAGene(Gene):
267
+ """A transfer-messenger RNA (tmRNA) gene.
268
+
269
+ Example:
270
+ >>> rna_finder = pyaragorn.RNAFinder(11, trna=False, tmrna=True)
271
+ >>> tmrna = rna_finder.find_rna(str(record.seq))[0]
272
+ >>> tmrna.begin, tmrna.end
273
+ (198037, 198447)
274
+ >>> tmrna.peptide()
275
+ 'AEKNEENFEMPAFMINNASAGANYMFA**'
276
+
277
+ """
278
+
279
+ @property
280
+ def cds_offset(self):
281
+ """`int`: The offset in the gene at which the coding sequence starts.
282
+ """
283
+ return self._gene.tps + 1
284
+
285
+ @property
286
+ def cds_length(self):
287
+ """`int`: The length of the coding sequence (in nucleotides).
288
+ """
289
+ cdef int tpe = self._gene.tpe
290
+ cdef int* se = (self._gene.eseq + tpe) + 1
291
+ cdef int* sb = (self._gene.eseq + self._gene.tps)
292
+ cdef int stride = 3
293
+
294
+ cdef csw sw
295
+ (<int*> &sw.geneticcode)[0] = self._genetic_code
296
+
297
+ while aragorn.ltranslate(se, &self._gene, &sw) == ord('*'):
298
+ se += stride
299
+ tpe += stride
300
+
301
+ return tpe - self._gene.tps
302
+
303
+ def cds(self, include_stop=True):
304
+ """Retrieve the coding sequence of the mRNA-like region.
305
+
306
+ Arguments:
307
+ include_stop (`bool`): Whether or not to include the STOP codons
308
+ in the returned nucleotide sequence. Defaults to `True`.
309
+
310
+ Returns:
311
+ `str`: The sequence of the mRNA-like region in the tmRNA
312
+ gene, optionally without STOP codons.
313
+
314
+ """
315
+ cdef int tpe = self._gene.tpe
316
+ cdef int* se = (self._gene.eseq + tpe) + 1
317
+ cdef int* sb = (self._gene.eseq + self._gene.tps)
318
+ cdef int stride = 3 if include_stop else -3
319
+
320
+ cdef csw sw
321
+ (<int*> &sw.geneticcode)[0] = self._genetic_code
322
+
323
+ while aragorn.ltranslate(se, &self._gene, &sw) == ord('*'):
324
+ se += stride
325
+ tpe += stride
326
+
327
+ cds = bytearray()
328
+ while sb < se:
329
+ cds.append(aragorn.cpbase(sb[0]))
330
+ sb += 1
331
+
332
+ return cds.decode('ascii')
333
+
334
+ def peptide(self, include_stop=True):
335
+ """Retrieve the peptide sequence of the mRNA-like region.
336
+
337
+ Arguments:
338
+ include_stop (`bool`): Whether or not to include the STOP codons
339
+ in the returned peptide sequence. Defaults to `True`.
340
+
341
+ Returns:
342
+ `str`: The translation of the mRNA-like region of the tmRNA
343
+ gene, optionally without STOP codons.
344
+
345
+ """
346
+ cdef int tpe = self._gene.tpe
347
+ cdef int* se = (self._gene.eseq + tpe) + 1
348
+ cdef int* sb = (self._gene.eseq + self._gene.tps)
349
+ cdef int stride = 3 if include_stop else -3
350
+
351
+ cdef csw sw
352
+ (<int*> &sw.geneticcode)[0] = self._genetic_code
353
+
354
+ while aragorn.ltranslate(se, &self._gene, &sw) == ord('*'):
355
+ se += stride
356
+ tpe += stride
357
+
358
+ peptide = bytearray()
359
+ while sb < se:
360
+ peptide.append(aragorn.ltranslate(sb, &self._gene, &sw))
361
+ sb += 3
362
+
363
+ return peptide.decode('ascii')
364
+
365
+
366
+ cdef class Cursor:
367
+ cdef object obj
368
+ cdef const void* data
369
+ cdef int kind
370
+ cdef size_t length
371
+ cdef data_set ds
372
+
373
+ def __init__(self, obj):
374
+ cdef const unsigned char[::1] view
375
+ if isinstance(obj, str):
376
+ self.kind = PyUnicode_KIND(obj)
377
+ self.data = PyUnicode_DATA(obj)
378
+ self.length = PyUnicode_GET_LENGTH(obj)
379
+ else:
380
+ view = obj
381
+ self.kind = PyUnicode_1BYTE_KIND
382
+ self.data = &view[0]
383
+ self.length = view.shape[0]
384
+
385
+ # keep a reference to the data source
386
+ self.obj = obj
387
+
388
+ # reinitialize dataset book-keeping
389
+ self.ds.filepointer = 0
390
+ self.ds.ns = 0
391
+ self.ds.nf = 0
392
+ self.ds.nextseq = 0L
393
+ self.ds.nextseqoff = 0L
394
+ self.ds.seqstart = 0
395
+ self.ds.seqstartoff = 0
396
+ self.ds.ps = 0
397
+ self.ds.psmax = self.length
398
+
399
+ # count GC%
400
+ self.ds.gc = self._gc()
401
+
402
+ cdef int _forward(self) noexcept nogil:
403
+ cdef Py_UCS4 x
404
+ cdef int base
405
+
406
+ if self.ds.ps >= self.ds.psmax:
407
+ return <int> aragorn.base.TERM
408
+
409
+ x = PyUnicode_READ(self.kind, self.data, self.ds.ps)
410
+ if x >= 128:
411
+ return <int> aragorn.base.NOBASE
412
+
413
+ base = aragorn.map[x]
414
+ if base >= <int> aragorn.base.Adenine:
415
+ self.ds.ps += 1
416
+ return base
417
+ else:
418
+ return <int> aragorn.base.NOBASE
419
+
420
+ cdef double _gc(self) noexcept nogil:
421
+ cdef long i
422
+ cdef Py_UCS4 x
423
+ cdef int base
424
+ cdef long ngc = 0
425
+ cdef long ps = 0
426
+
427
+ for i in range(self.length):
428
+ x = PyUnicode_READ(self.kind, self.data, i)
429
+ base = aragorn.map[x]
430
+ if base == -1:
431
+ break
432
+ ngc += (base == <int> aragorn.base.Cytosine) or (base == <int> aragorn.base.Guanine)
433
+ ps += 1
434
+
435
+ return <double> ngc / <double> ps
436
+
437
+
438
+ cdef class RNAFinder:
439
+ """A configurable RNA gene finder.
440
+ """
441
+ cdef csw _sw
442
+
443
+ def __init__(
444
+ self,
445
+ int translation_table = 1,
446
+ *,
447
+ bint trna = True,
448
+ bint tmrna = True,
449
+ bint linear = False,
450
+ ):
451
+ """__init__(self, translation_table=1, *, trna=True, tmrna=True, linear=False)\n--\n
452
+
453
+ Create a new RNA finder.
454
+
455
+ Arguments:
456
+ translation_table (`int`, optional): The translation table to
457
+ use. Check the :wiki:`List of genetic codes` page
458
+ listing all genetic codes for the available values, or
459
+ the :attr:`pyaragorn.TRANSLATION_TABLES` constant for allowed
460
+ values.
461
+
462
+ """
463
+ default_sw(&self._sw)
464
+ self._sw.trna = trna
465
+ self._sw.tmrna = tmrna
466
+ self._sw.linear = linear
467
+ self._sw.f = stdout
468
+ self._sw.verbose = False #True
469
+
470
+ if translation_table not in _TRANSLATION_TABLES:
471
+ raise ValueError(f"invalid translation table: {translation_table!r}")
472
+ self._sw.geneticcode = translation_table
473
+
474
+ def find_rna(self, object sequence):
475
+ """Find RNA genes in the input DNA sequence.
476
+
477
+ Arguments:
478
+ sequence (`str` or buffer): The nucleotide sequence to process,
479
+ either as a string of nucleotides (upper- or lowercase), or
480
+ as an object implementing the buffer protocol.
481
+
482
+ Returns:
483
+ `list` of `~pyaragorn.Gene`: A list of `~pyaragorn.Gene` (either
484
+ `~pyaragorn.TRNAGene` or `~pyaragorn.TMRNAGene`) corresponding
485
+ to RNA genes detected in the sequence according to the `RNAFinder`
486
+ parameters.
487
+
488
+ """
489
+ cdef int n
490
+ cdef int nt
491
+ cdef csw sw
492
+ cdef int* vsort = NULL
493
+ cdef Cursor cursor = Cursor(sequence)
494
+
495
+ # copy parameters to ensure the `find_rna` method is re-entrant
496
+ memcpy(&sw, &self._sw, sizeof(csw))
497
+
498
+ try:
499
+ with nogil:
500
+ # allocate memory for the result genes
501
+ sw.genespace = aragorn.NT
502
+ sw.genes = <gene*> calloc(sw.genespace, sizeof(gene))
503
+ if sw.genes is NULL:
504
+ raise MemoryError("failed to allocate memory")
505
+ # detect RNA genes with the "batched" algorithm
506
+ nt = self._bopt(cursor, &sw)
507
+ # allocate array for sorting genes
508
+ vsort = <int*> calloc(nt, sizeof(int))
509
+ if vsort is NULL:
510
+ raise MemoryError("failed to allocate memory")
511
+ # sort and threshold genes
512
+ n = aragorn.gene_sort(&cursor.ds, nt, vsort, &sw)
513
+ # recover genes
514
+ genes = []
515
+ for i in range(n):
516
+ genes.append(Gene._new_gene(&sw.genes[vsort[i]], sw.geneticcode))
517
+ finally:
518
+ free(vsort)
519
+ free(sw.genes)
520
+
521
+ return genes
522
+
523
+ cdef int _bopt(
524
+ self,
525
+ Cursor cursor,
526
+ csw* sw
527
+ ) except -1 nogil:
528
+ # adapted from bopt_fastafile to use with our own `Cursor` dataset
529
+ cdef int nt
530
+ cdef int seq[((2 * aragorn.LSEQ) + aragorn.WRAP) + 1]
531
+ cdef int cseq[((2 * aragorn.LSEQ) + aragorn.WRAP) + 1]
532
+ cdef int wseq[(2 * aragorn.WRAP) + 1]
533
+ cdef long i
534
+ cdef long rewind
535
+ cdef long drewind
536
+ cdef long tmaxlen
537
+ cdef bint flag
538
+ cdef int length
539
+ cdef int *s
540
+ cdef int *sf
541
+ cdef int *se
542
+ cdef int *sc
543
+ cdef int *swrap
544
+ cdef long gap
545
+ cdef long start
546
+ cdef bint loop
547
+ cdef bint NX
548
+ cdef bint SH
549
+
550
+ # compute width of sliding windows
551
+ rewind = aragorn.MAXTAGDIST + 20
552
+ if sw.trna or sw.mtrna:
553
+ tmaxlen = aragorn.MAXTRNALEN + sw.maxintronlen
554
+ if rewind < tmaxlen:
555
+ rewind = tmaxlen
556
+ if sw.tmrna:
557
+ if rewind < aragorn.MAXTMRNALEN:
558
+ rewind = aragorn.MAXTMRNALEN
559
+ if sw.peptide:
560
+ if sw.tagthresh >= 5 and rewind < aragorn.TSWEEP:
561
+ rewind = aragorn.TSWEEP
562
+
563
+ sw.loffset = rewind
564
+ sw.roffset = rewind
565
+ drewind = 2 * rewind
566
+
567
+ # cleanly initialize gene array
568
+ aragorn.init_gene(sw.genes, 0, aragorn.NT)
569
+
570
+ nt = 0
571
+ flag = 0
572
+ start = 1L
573
+
574
+ loop = True
575
+ NX = True
576
+ SH = True
577
+
578
+ se = seq
579
+ if sw.linear:
580
+ for i in range(rewind):
581
+ postincrement(se)[0] = aragorn.NOBASE
582
+ start -= rewind
583
+ else:
584
+ if cursor.ds.psmax <= drewind:
585
+ gap = drewind - cursor.ds.psmax
586
+ sc = se + gap
587
+ while se < sc:
588
+ postincrement(se)[0] = aragorn.NOBASE
589
+
590
+ swrap = wseq
591
+ sc = se + cursor.ds.psmax
592
+ while se < sc:
593
+ se[0] = cursor._forward()
594
+ postincrement(swrap)[0] = postincrement(se)[0]
595
+
596
+ sc = swrap + gap
597
+ while swrap < sc:
598
+ postincrement(swrap)[0] = aragorn.NOBASE
599
+
600
+ swrap = wseq
601
+ sc = swrap + cursor.ds.psmax
602
+ while swrap < sc:
603
+ postincrement(se)[0] = postincrement(swrap)[0]
604
+
605
+ swrap = wseq
606
+ sc = swrap + drewind
607
+ while swrap < sc:
608
+ postincrement(se)[0] = postincrement(swrap)[0]
609
+
610
+ sw.loffset = drewind
611
+ sw.roffset = drewind
612
+ start -= drewind
613
+ flag = 1
614
+ # goto SH
615
+ loop = True
616
+ SH = True
617
+ NX = False
618
+
619
+ else:
620
+ swrap = wseq
621
+ sc = seq + drewind
622
+ while se < sc:
623
+ se[0] = cursor._forward()
624
+ postincrement(swrap)[0] = postincrement(se)[0]
625
+
626
+ # weird ass loop to emulate a GOTO
627
+ while loop:
628
+
629
+ # label NX: next
630
+ sc = seq + aragorn.LSEQ
631
+ if NX:
632
+ while (se < sc):
633
+ postincrement(se)[0] = cursor._forward()
634
+ if cursor.ds.ps >= cursor.ds.psmax:
635
+ if sw.linear:
636
+ for i in range(rewind):
637
+ postincrement(se)[0] = aragorn.NOBASE
638
+ else:
639
+ sc = wseq + drewind
640
+ swrap = wseq
641
+ while (swrap < sc):
642
+ postincrement(se)[0] = postincrement(swrap)[0]
643
+ flag = 1
644
+ SH = True
645
+ break
646
+
647
+ # label SH: search
648
+ if SH:
649
+ length = <int> (se - seq)
650
+
651
+ with gil:
652
+ PyErr_CheckSignals()
653
+
654
+ # if (sw.verbose):
655
+ # vstart = sq(d, start + sw.loffset)
656
+ # vstop = sq(d, ((start + length) - sw.roffset) - 1)
657
+ # if (vstop < vstart):
658
+ # fprintf(stderr, "Searching from %ld to %ld\n", vstart, d.psmax)
659
+ # fprintf(stderr, "Searching from 1 to %ld\n", vstop)
660
+ # else:
661
+ # fprintf(stderr, "Searching from %ld to %ld\n", vstart, vstop)
662
+
663
+ if (sw.both != 1):
664
+ sw.start = start
665
+ sw.comp = 0
666
+ nt = aragorn.tmioptimise(&cursor.ds, seq, length, nt, sw)
667
+
668
+ if (sw.both > 0):
669
+ aragorn.sense_switch(seq, cseq, length)
670
+ sw.start = start + length
671
+ sw.comp = 1
672
+ nt = aragorn.tmioptimise(&cursor.ds, cseq, length, nt, sw)
673
+
674
+ if not flag:
675
+ s = seq
676
+ sf = se - drewind
677
+ se = seq + drewind
678
+ while (s < se):
679
+ postincrement(s)[0] = postincrement(sf)[0]
680
+ start += length - drewind
681
+ # goto NX
682
+ NX = SH = loop = True
683
+ continue
684
+
685
+ if nt < 1:
686
+ cursor.ds.nf += 1
687
+ if sw.maxintronlen > 0:
688
+ aragorn.remove_overlapping_trna(&cursor.ds, nt, sw)
689
+ if sw.updatetmrnatags:
690
+ aragorn.update_tmrna_tag_database(sw.genes, nt, sw)
691
+
692
+ # FIXME: here should sort genes and filter them with `gene_sort`
693
+ # aragorn.batch_gene_set(d, nt, sw)
694
+
695
+ # if sw.verbose:
696
+ # fprintf(stderr, "%s\nSearch Finished\n\n", d.seqname)
697
+
698
+ cursor.ds.ns += 1
699
+ # exit loop
700
+ loop = False
701
+
702
+ return nt
703
+
704
+ # if (d.ns > 1) and (sw.batch < 2):
705
+ # fprintf(f, ">end \t%d sequences", d.ns)
706
+ # if sw.trna or sw.mtrna:
707
+ # fprintf(f, " %d tRNA genes", sw.ngene[<int> aragorn.tRNA])
708
+ # if sw.tmrna:
709
+ # fprintf(f, " %d tmRNA genes", sw.ngene[<int> aragorn.tmRNA])
710
+ # if d.nf > 0:
711
+ # sens = (100.0 * (d.ns - d.nf)) / d.ns
712
+ # fprintf(f, ", nothing found in %d sequences, (%.2lf%% sensitivity)", d.nf, sens)
713
+ # fputc('\n', f)
714
+ # if sw.updatetmrnatags:
715
+ # aragorn.report_new_tmrna_tags(sw)