htslib 0.0.8 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,594 +2,99 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- # Generates a new unpopulated header structure.
6
- attach_function \
7
- :sam_hdr_init,
8
- [],
9
- SamHdr.by_ref
10
-
11
- # Read the header from a BAM compressed file.
12
- attach_function \
13
- :bam_hdr_read,
14
- [BGZF],
15
- SamHdr.by_ref
16
-
17
- # Writes the header to a BAM file.
18
- attach_function \
19
- :bam_hdr_write,
20
- [BGZF, SamHdr],
21
- :int
22
-
23
- # Frees the resources associated with a header.
24
- attach_function \
25
- :sam_hdr_destroy,
26
- [SamHdr],
27
- :void
28
-
29
- # Duplicate a header structure.
30
- attach_function \
31
- :sam_hdr_dup,
32
- [SamHdr],
33
- SamHdr.by_ref
34
-
35
- # Create a header from existing text.
36
- attach_function \
37
- :sam_hdr_parse,
38
- %i[size_t string],
39
- SamHdr.by_ref
40
-
41
- # Read a header from a SAM, BAM or CRAM file.
42
- attach_function \
43
- :sam_hdr_read,
44
- [SamFile],
45
- SamHdr.by_ref
46
-
47
- # Write a header to a SAM, BAM or CRAM file.
48
- attach_function \
49
- :sam_hdr_write,
50
- [SamFile, SamHdr],
51
- :int
52
-
53
- # Returns the current length of the header text.
54
- attach_function \
55
- :sam_hdr_length,
56
- [SamHdr],
57
- :size_t
58
-
59
- # Returns the text representation of the header.
60
- attach_function \
61
- :sam_hdr_str,
62
- [SamHdr],
63
- :string
64
-
65
- # Returns the number of references in the header.
66
- attach_function \
67
- :sam_hdr_nref,
68
- [SamHdr],
69
- :int
70
-
71
- # Add formatted lines to an existing header.
72
- attach_function \
73
- :sam_hdr_add_lines,
74
- [SamHdr, :string, :size_t],
75
- :int
76
-
77
- # Adds a single line to an existing header.
78
- attach_function \
79
- :sam_hdr_add_line,
80
- [SamHdr, :string, :varargs],
81
- :int
82
-
83
- # Returns a complete line of formatted text for a given type and ID.
84
- attach_function \
85
- :sam_hdr_find_line_id,
86
- [SamHdr, :string, :string, :string, KString],
87
- :int
88
-
89
- # Returns a complete line of formatted text for a given type and index.
90
- attach_function \
91
- :sam_hdr_find_line_pos,
92
- [SamHdr, :string, :int, KString],
93
- :int
94
-
95
- # Remove a line with given type / id from a header
96
- attach_function \
97
- :sam_hdr_remove_line_id,
98
- [SamHdr, :string, :string, :string],
99
- :int
100
-
101
- # Remove nth line of a given type from a header
102
- attach_function \
103
- :sam_hdr_remove_line_pos,
104
- [SamHdr, :string, :int],
105
- :int
106
-
107
- # Add or update tag key,value pairs in a header line.
108
- attach_function \
109
- :sam_hdr_update_line,
110
- [SamHdr, :string, :string, :string, :varargs],
111
- :int
112
-
113
- # Remove all lines of a given type from a header, except the one matching an ID
114
- attach_function \
115
- :sam_hdr_remove_except,
116
- [SamHdr, :string, :string, :string],
117
- :int
118
-
119
- # Remove header lines of a given type, except those in a given ID set
120
- attach_function \
121
- :sam_hdr_remove_lines,
122
- [SamHdr, :string, :string, :pointer],
123
- :int
124
-
125
- # Count the number of lines for a given header type
126
- attach_function \
127
- :sam_hdr_count_lines,
128
- [SamHdr, :string],
129
- :int
130
-
131
- # Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
132
- attach_function \
133
- :sam_hdr_line_index,
134
- [SamHdr, :string, :string],
135
- :int
136
-
137
- # Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
138
- attach_function \
139
- :sam_hdr_line_name,
140
- [SamHdr, :string, :int],
141
- :string
142
-
143
- # Return the value associated with a key for a header line identified by ID_key:ID_val
144
- attach_function \
145
- :sam_hdr_find_tag_id,
146
- [SamHdr, :string, :string, :string, :string, KString],
147
- :int
148
-
149
- # Return the value associated with a key for a header line identified by position
150
- attach_function \
151
- :sam_hdr_find_tag_pos,
152
- [SamHdr, :string, :int, :string, KString],
153
- :int
154
-
155
- # Remove the key from the line identified by type, ID_key and ID_value.
156
- attach_function \
157
- :sam_hdr_remove_tag_id,
158
- [SamHdr, :string, :string, :string, :string],
159
- :int
160
-
161
- # Get the target id for a given reference sequence name
162
- attach_function \
163
- :sam_hdr_name2tid,
164
- [SamHdr, :string],
165
- :int
166
-
167
- # Get the reference sequence name from a target index
168
- attach_function \
169
- :sam_hdr_tid2name,
170
- [SamHdr, :int],
171
- :string
172
-
173
- # Get the reference sequence length from a target index
174
- attach_function \
175
- :sam_hdr_tid2len,
176
- [SamHdr, :int],
177
- :hts_pos_t
178
-
179
- # Generate a unique \@PG ID: value
180
- attach_function \
181
- :sam_hdr_pg_id,
182
- [SamHdr, :string],
183
- :string
184
-
185
- # Add an \@PG line.
186
- attach_function \
187
- :sam_hdr_add_pg,
188
- [SamHdr, :string, :varargs],
189
- :int
190
-
191
- # A function to help with construction of CL tags in @PG records.
192
- attach_function \
193
- :stringify_argv,
194
- %i[int pointer],
195
- :string
196
-
197
- # Increments the reference count on a header
198
- attach_function \
199
- :sam_hdr_incr_ref,
200
- [SamHdr],
201
- :void
202
-
203
- # Create a new bam1_t alignment structure
204
- attach_function \
205
- :bam_init1,
206
- [],
207
- Bam1.by_ref
208
-
209
- # Destroy a bam1_t structure
210
- attach_function \
211
- :bam_destroy1,
212
- [Bam1],
213
- :void
214
-
215
- # Read a BAM format alignment record
216
- attach_function \
217
- :bam_read1,
218
- [BGZF, Bam1],
219
- :int
220
-
221
- # Write a BAM format alignment record
222
- attach_function \
223
- :bam_write1,
224
- [BGZF, Bam1],
225
- :int
226
-
227
- # Copy alignment record data
228
- attach_function \
229
- :bam_copy1,
230
- [Bam1, Bam1],
231
- Bam1.by_ref
232
-
233
- # Create a duplicate alignment record
234
- attach_function \
235
- :bam_dup1,
236
- [Bam1],
237
- Bam1.by_ref
238
-
239
- # Calculate query length from CIGAR data
240
- attach_function \
241
- :bam_cigar2qlen,
242
- %i[int pointer],
243
- :int64
244
-
245
- # Calculate reference length from CIGAR data
246
- attach_function \
247
- :bam_cigar2rlen,
248
- %i[int pointer],
249
- :hts_pos_t
250
-
251
- # Calculate the rightmost base position of an alignment on the reference genome.
252
- attach_function \
253
- :bam_endpos,
254
- [Bam1],
255
- :hts_pos_t
256
-
257
- attach_function \
258
- :bam_str2flag,
259
- [:string],
260
- :int
261
-
262
- attach_function \
263
- :bam_flag2str,
264
- [:int],
265
- :string
266
-
267
- # Set the name of the query
268
- attach_function \
269
- :bam_set_qname,
270
- [Bam1, :string],
271
- :int
272
-
273
- # Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
274
- attach_function \
275
- :sam_idx_init,
276
- [HtsFile, SamHdr, :int, :string],
277
- :int
278
-
279
- # Writes the index initialised with sam_idx_init to disk.
280
- attach_function \
281
- :sam_idx_save,
282
- [HtsFile],
283
- :int
284
-
285
- # Load a BAM (.csi or .bai) or CRAM (.crai) index file
286
- attach_function \
287
- :sam_index_load,
288
- [HtsFile, :string],
289
- HtsIdx.by_ref
290
-
291
- # Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
292
- attach_function \
293
- :sam_index_load2,
294
- [HtsFile, :string, :string],
295
- HtsIdx.by_ref
296
-
297
- # Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
298
- attach_function \
299
- :sam_index_load3,
300
- [HtsFile, :string, :string, :int],
301
- HtsIdx.by_ref
302
-
303
- # Generate and save an index file
304
- attach_function \
305
- :sam_index_build,
306
- %i[string int],
307
- :int
308
-
309
- # Generate and save an index to a specific file
310
- attach_function \
311
- :sam_index_build2,
312
- %i[string string int],
313
- :int
314
-
315
- # Generate and save an index to a specific file
316
- attach_function \
317
- :sam_index_build3,
318
- %i[string string int int],
319
- :int
320
-
321
- # Create a BAM/CRAM iterator
322
- attach_function \
323
- :sam_itr_queryi,
324
- [HtsIdx, :int, :hts_pos_t, :hts_pos_t],
325
- HtsItr.by_ref
326
-
327
- # Create a SAM/BAM/CRAM iterator
328
- attach_function \
329
- :sam_itr_querys,
330
- [HtsIdx, SamHdr, :string],
331
- HtsItr.by_ref
332
-
333
- # Create a multi-region iterator
334
- attach_function \
335
- :sam_itr_regions,
336
- [HtsIdx, SamHdr, :pointer, :uint],
337
- HtsItr.by_ref
338
-
339
- # Create a multi-region iterator
340
- attach_function \
341
- :sam_itr_regarray,
342
- [HtsIdx, SamHdr, :pointer, :uint],
343
- HtsItr.by_ref
344
-
345
- # Get the next read from a SAM/BAM/CRAM iterator
346
- def self.sam_itr_next(htsfp, itr, r)
347
- # FIXME: check if htsfp is compressed BGZF
348
- hts_log_error("Null iterator") if itr.null?
349
- # FIXME: check multi
350
- hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
22
+ # macros
23
+ class << self
24
+ def bam_cigar_op(c)
25
+ c & BAM_CIGAR_MASK
26
+ end
27
+
28
+ def bam_cigar_oplen(c)
29
+ c >> BAM_CIGAR_SHIFT
30
+ end
31
+
32
+ def bam_cigar_opchr(c)
33
+ ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
34
+ end
35
+
36
+ def bam_cigar_gen(l, o)
37
+ l << BAM_CIGAR_SHIFT | o
38
+ end
39
+
40
+ def bam_cigar_type(o)
41
+ BAM_CIGAR_TYPE >> (o << 1) & 3
42
+ end
351
43
  end
352
44
 
353
- attach_function \
354
- :sam_parse_region,
355
- [SamHdr, :string, :pointer, :pointer, :pointer, :int],
356
- :string
357
-
358
- # SAM I/O
359
-
360
- # macros (or alias)
361
- # sam_open
362
- # sam_open_format
363
- # sam_close
364
-
365
- attach_function \
366
- :sam_open_mode,
367
- %i[string string string],
368
- :int
369
-
370
- # A version of sam_open_mode that can handle ,key=value options.
371
- attach_function \
372
- :sam_open_mode_opts,
373
- %i[string string string],
374
- :string
375
-
376
- attach_function \
377
- :sam_hdr_change_HD,
378
- [SamHdr, :string, :string],
379
- :int
380
-
381
- attach_function \
382
- :sam_parse1,
383
- [KString, SamHdr, Bam1],
384
- :int
385
-
386
- attach_function \
387
- :sam_format1,
388
- [SamHdr, Bam1, KString],
389
- :int
390
-
391
- # Read a record from a file
392
- attach_function \
393
- :sam_read1,
394
- [HtsFile, SamHdr, Bam1],
395
- :int
396
-
397
- # Write a record to a file
398
- attach_function \
399
- :sam_write1,
400
- [HtsFile, SamHdr, Bam1],
401
- :int
402
-
403
- # Return a pointer to an aux record
404
- attach_function \
405
- :bam_aux_get,
406
- [Bam1, :string], # FIXME
407
- :pointer
408
-
409
- # Get an integer aux value
410
- attach_function \
411
- :bam_aux2i,
412
- [:pointer],
413
- :int64
414
-
415
- # Get an integer aux value
416
- attach_function \
417
- :bam_aux2f,
418
- [:pointer],
419
- :double
420
-
421
- # Get a character aux value
422
- attach_function \
423
- :bam_aux2A,
424
- [:pointer],
425
- :char
426
-
427
- # Get a string aux value
428
- attach_function \
429
- :bam_aux2Z,
430
- [:pointer],
431
- :string
432
-
433
- # Get the length of an array-type ('B') tag
434
- attach_function \
435
- :bam_auxB_len,
436
- [:pointer],
437
- :uint
438
-
439
- # Get an integer value from an array-type tag
440
- attach_function \
441
- :bam_auxB2i,
442
- %i[pointer uint],
443
- :int64
444
-
445
- # Get a floating-point value from an array-type tag
446
- attach_function \
447
- :bam_auxB2f,
448
- %i[pointer uint],
449
- :double
450
-
451
- # Append tag data to a bam record
452
- attach_function \
453
- :bam_aux_append,
454
- [Bam1, :string, :string, :int, :pointer],
455
- :int
456
-
457
- # Delete tag data from a bam record
458
- attach_function \
459
- :bam_aux_del,
460
- [Bam1, :pointer],
461
- :int
462
-
463
- # Update or add a string-type tag
464
- attach_function \
465
- :bam_aux_update_str,
466
- [Bam1, :string, :int, :string],
467
- :int
468
-
469
- # Update or add an integer tag
470
- attach_function \
471
- :bam_aux_update_int,
472
- [Bam1, :string, :int64],
473
- :int
474
-
475
- # Update or add a floating-point tag
476
- attach_function \
477
- :bam_aux_update_float,
478
- [Bam1, :string, :float],
479
- :int
480
-
481
- # Update or add an array tag
482
- attach_function \
483
- :bam_aux_update_array,
484
- [Bam1, :string, :uint8, :uint32, :pointer],
485
- :int
486
-
487
- # sets an iterator over multiple
488
- attach_function \
489
- :bam_plp_init,
490
- %i[bam_plp_auto_f pointer],
491
- :bam_plp
492
-
493
- attach_function \
494
- :bam_plp_destroy,
495
- [:bam_plp],
496
- :void
497
-
498
- attach_function \
499
- :bam_plp_push,
500
- [:bam_plp, Bam1],
501
- :int
502
-
503
- attach_function \
504
- :bam_plp_next,
505
- %i[bam_plp pointer pointer pointer],
506
- BamPileup1.by_ref
507
-
508
- attach_function \
509
- :bam_plp_auto,
510
- %i[bam_plp pointer pointer pointer],
511
- BamPileup1.by_ref
512
-
513
- attach_function \
514
- :bam_plp64_next,
515
- %i[bam_plp pointer pointer pointer],
516
- BamPileup1.by_ref
517
-
518
- attach_function \
519
- :bam_plp64_auto,
520
- %i[bam_plp pointer pointer pointer],
521
- BamPileup1.by_ref
522
-
523
- attach_function \
524
- :bam_plp_set_maxcnt,
525
- %i[bam_plp int],
526
- :void
527
-
528
- attach_function \
529
- :bam_plp_reset,
530
- [:bam_plp],
531
- :void
532
-
533
- # sets a callback to initialise any per-pileup1_t fields.
534
- attach_function \
535
- :bam_plp_insertion,
536
- [BamPileup1, KString, :pointer],
537
- :int
538
-
539
- # sets a callback to initialise any per-pileup1_t fields.
540
- # bam_plp_constructor
541
-
542
- # bam_plp_destructor
543
-
544
- # Get pileup padded insertion sequence
545
- # bam_plp_insertion
546
-
547
- attach_function \
548
- :bam_mplp_init,
549
- %i[int bam_plp_auto_f pointer],
550
- :bam_mplp
551
-
552
- attach_function \
553
- :bam_mplp_init_overlaps,
554
- [:bam_mplp],
555
- :int
556
-
557
- attach_function \
558
- :bam_mplp_destroy,
559
- [:bam_mplp],
560
- :void
561
-
562
- attach_function \
563
- :bam_mplp_set_maxcnt,
564
- %i[bam_mplp int],
565
- :void
566
-
567
- attach_function \
568
- :bam_mplp_auto,
569
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
570
- :int
571
-
572
- attach_function \
573
- :bam_mplp64_auto,
574
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
575
- :int
576
-
577
- attach_function \
578
- :bam_mplp_reset,
579
- [:bam_mplp],
580
- :void
581
-
582
- # bam_mplp_constructor
583
- # bam_mplp_destructor
584
-
585
- attach_function \
586
- :sam_cap_mapq,
587
- [Bam1, :string, :hts_pos_t, :int],
588
- :int
589
-
590
- attach_function \
591
- :sam_prob_realn,
592
- [Bam1, :string, :hts_pos_t, :int],
593
- :int
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
58
+ # macros
59
+ # function-like macros
60
+ class << self
61
+ def bam_is_rev(b)
62
+ b[:core][:flag] & BAM_FREVERSE != 0
63
+ end
64
+
65
+ def bam_is_mrev(b)
66
+ b[:core][:flag] & BAM_FMREVERSE != 0
67
+ end
68
+
69
+ def bam_get_qname(b)
70
+ b[:data]
71
+ end
72
+
73
+ def bam_get_cigar(b)
74
+ b[:data] + b[:core][:l_qname]
75
+ end
76
+
77
+ def bam_get_seq(b)
78
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
79
+ end
80
+
81
+ def bam_get_qual(b)
82
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
83
+ end
84
+
85
+ def bam_get_aux(b)
86
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
87
+ end
88
+
89
+ def bam_get_l_aux(b)
90
+ b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
91
+ end
92
+
93
+ def bam_seqi(s, i)
94
+ s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
96
+
97
+ # def bam_set_seqi(s, i, b)
98
+ end
594
99
  end
595
100
  end