htslib 0.0.10 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,595 +2,138 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- # Generates a new unpopulated header structure.
6
- attach_function \
7
- :sam_hdr_init,
8
- [],
9
- SamHdr.by_ref
10
-
11
- # Read the header from a BAM compressed file.
12
- attach_function \
13
- :bam_hdr_read,
14
- [BGZF],
15
- SamHdr.by_ref
16
-
17
- # Writes the header to a BAM file.
18
- attach_function \
19
- :bam_hdr_write,
20
- [BGZF, SamHdr],
21
- :int
22
-
23
- # Frees the resources associated with a header.
24
- attach_function \
25
- :sam_hdr_destroy,
26
- [SamHdr],
27
- :void
28
-
29
- # Duplicate a header structure.
30
- attach_function \
31
- :sam_hdr_dup,
32
- [SamHdr],
33
- SamHdr.by_ref
34
-
35
- # Create a header from existing text.
36
- attach_function \
37
- :sam_hdr_parse,
38
- %i[size_t string],
39
- SamHdr.by_ref
40
-
41
- # Read a header from a SAM, BAM or CRAM file.
42
- attach_function \
43
- :sam_hdr_read,
44
- [SamFile],
45
- SamHdr.by_ref
46
-
47
- # Write a header to a SAM, BAM or CRAM file.
48
- attach_function \
49
- :sam_hdr_write,
50
- [SamFile, SamHdr],
51
- :int
52
-
53
- # Returns the current length of the header text.
54
- attach_function \
55
- :sam_hdr_length,
56
- [SamHdr],
57
- :size_t
58
-
59
- # Returns the text representation of the header.
60
- attach_function \
61
- :sam_hdr_str,
62
- [SamHdr],
63
- :string
64
-
65
- # Returns the number of references in the header.
66
- attach_function \
67
- :sam_hdr_nref,
68
- [SamHdr],
69
- :int
70
-
71
- # Add formatted lines to an existing header.
72
- attach_function \
73
- :sam_hdr_add_lines,
74
- [SamHdr, :string, :size_t],
75
- :int
76
-
77
- # Adds a single line to an existing header.
78
- attach_function \
79
- :sam_hdr_add_line,
80
- [SamHdr, :string, :varargs],
81
- :int
82
-
83
- # Returns a complete line of formatted text for a given type and ID.
84
- attach_function \
85
- :sam_hdr_find_line_id,
86
- [SamHdr, :string, :string, :string, KString],
87
- :int
88
-
89
- # Returns a complete line of formatted text for a given type and index.
90
- attach_function \
91
- :sam_hdr_find_line_pos,
92
- [SamHdr, :string, :int, KString],
93
- :int
94
-
95
- # Remove a line with given type / id from a header
96
- attach_function \
97
- :sam_hdr_remove_line_id,
98
- [SamHdr, :string, :string, :string],
99
- :int
100
-
101
- # Remove nth line of a given type from a header
102
- attach_function \
103
- :sam_hdr_remove_line_pos,
104
- [SamHdr, :string, :int],
105
- :int
106
-
107
- # Add or update tag key,value pairs in a header line.
108
- attach_function \
109
- :sam_hdr_update_line,
110
- [SamHdr, :string, :string, :string, :varargs],
111
- :int
112
-
113
- # Remove all lines of a given type from a header, except the one matching an ID
114
- attach_function \
115
- :sam_hdr_remove_except,
116
- [SamHdr, :string, :string, :string],
117
- :int
118
-
119
- # Remove header lines of a given type, except those in a given ID set
120
- attach_function \
121
- :sam_hdr_remove_lines,
122
- [SamHdr, :string, :string, :pointer],
123
- :int
124
-
125
- # Count the number of lines for a given header type
126
- attach_function \
127
- :sam_hdr_count_lines,
128
- [SamHdr, :string],
129
- :int
130
-
131
- # Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
132
- attach_function \
133
- :sam_hdr_line_index,
134
- [SamHdr, :string, :string],
135
- :int
136
-
137
- # Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
138
- attach_function \
139
- :sam_hdr_line_name,
140
- [SamHdr, :string, :int],
141
- :string
142
-
143
- # Return the value associated with a key for a header line identified by ID_key:ID_val
144
- attach_function \
145
- :sam_hdr_find_tag_id,
146
- [SamHdr, :string, :string, :string, :string, KString],
147
- :int
148
-
149
- # Return the value associated with a key for a header line identified by position
150
- attach_function \
151
- :sam_hdr_find_tag_pos,
152
- [SamHdr, :string, :int, :string, KString],
153
- :int
154
-
155
- # Remove the key from the line identified by type, ID_key and ID_value.
156
- attach_function \
157
- :sam_hdr_remove_tag_id,
158
- [SamHdr, :string, :string, :string, :string],
159
- :int
160
-
161
- # Get the target id for a given reference sequence name
162
- attach_function \
163
- :sam_hdr_name2tid,
164
- [SamHdr, :string],
165
- :int
166
-
167
- # Get the reference sequence name from a target index
168
- attach_function \
169
- :sam_hdr_tid2name,
170
- [SamHdr, :int],
171
- :string
172
-
173
- # Get the reference sequence length from a target index
174
- attach_function \
175
- :sam_hdr_tid2len,
176
- [SamHdr, :int],
177
- :hts_pos_t
178
-
179
- # Generate a unique \@PG ID: value
180
- attach_function \
181
- :sam_hdr_pg_id,
182
- [SamHdr, :string],
183
- :string
184
-
185
- # Add an \@PG line.
186
- attach_function \
187
- :sam_hdr_add_pg,
188
- [SamHdr, :string, :varargs],
189
- :int
190
-
191
- # A function to help with construction of CL tags in @PG records.
192
- attach_function \
193
- :stringify_argv,
194
- %i[int pointer],
195
- :string
196
-
197
- # Increments the reference count on a header
198
- attach_function \
199
- :sam_hdr_incr_ref,
200
- [SamHdr],
201
- :void
202
-
203
- # Create a new bam1_t alignment structure
204
- attach_function \
205
- :bam_init1,
206
- [],
207
- Bam1.by_ref
208
-
209
- # Destroy a bam1_t structure
210
- attach_function \
211
- :bam_destroy1,
212
- [Bam1],
213
- :void
214
-
215
- # Read a BAM format alignment record
216
- attach_function \
217
- :bam_read1,
218
- [BGZF, Bam1],
219
- :int
220
-
221
- # Write a BAM format alignment record
222
- attach_function \
223
- :bam_write1,
224
- [BGZF, Bam1],
225
- :int
226
-
227
- # Copy alignment record data
228
- attach_function \
229
- :bam_copy1,
230
- [Bam1, Bam1],
231
- Bam1.by_ref
232
-
233
- # Create a duplicate alignment record
234
- attach_function \
235
- :bam_dup1,
236
- [Bam1],
237
- Bam1.by_ref
238
-
239
- # Calculate query length from CIGAR data
240
- attach_function \
241
- :bam_cigar2qlen,
242
- %i[int pointer],
243
- :int64
244
-
245
- # Calculate reference length from CIGAR data
246
- attach_function \
247
- :bam_cigar2rlen,
248
- %i[int pointer],
249
- :hts_pos_t
250
-
251
- # Calculate the rightmost base position of an alignment on the reference genome.
252
- attach_function \
253
- :bam_endpos,
254
- [Bam1],
255
- :hts_pos_t
256
-
257
- attach_function \
258
- :bam_str2flag,
259
- [:string],
260
- :int
261
-
262
- attach_function \
263
- :bam_flag2str,
264
- [:int],
265
- :string
266
-
267
- # Set the name of the query
268
- attach_function \
269
- :bam_set_qname,
270
- [Bam1, :string],
271
- :int
272
-
273
- # Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
274
- attach_function \
275
- :sam_idx_init,
276
- [HtsFile, SamHdr, :int, :string],
277
- :int
278
-
279
- # Writes the index initialised with sam_idx_init to disk.
280
- attach_function \
281
- :sam_idx_save,
282
- [HtsFile],
283
- :int
284
-
285
- # Load a BAM (.csi or .bai) or CRAM (.crai) index file
286
- attach_function \
287
- :sam_index_load,
288
- [HtsFile, :string],
289
- HtsIdx.by_ref
290
-
291
- # Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
292
- attach_function \
293
- :sam_index_load2,
294
- [HtsFile, :string, :string],
295
- HtsIdx.by_ref
296
-
297
- # Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
298
- attach_function \
299
- :sam_index_load3,
300
- [HtsFile, :string, :string, :int],
301
- HtsIdx.by_ref
302
-
303
- # Generate and save an index file
304
- attach_function \
305
- :sam_index_build,
306
- %i[string int],
307
- :int
308
-
309
- # Generate and save an index to a specific file
310
- attach_function \
311
- :sam_index_build2,
312
- %i[string string int],
313
- :int
314
-
315
- # Generate and save an index to a specific file
316
- attach_function \
317
- :sam_index_build3,
318
- %i[string string int int],
319
- :int
320
-
321
- # Create a BAM/CRAM iterator
322
- attach_function \
323
- :sam_itr_queryi,
324
- [HtsIdx, :int, :hts_pos_t, :hts_pos_t],
325
- HtsItr.by_ref
326
-
327
- # Create a SAM/BAM/CRAM iterator
328
- attach_function \
329
- :sam_itr_querys,
330
- [HtsIdx, SamHdr, :string],
331
- HtsItr.by_ref
332
-
333
- # Create a multi-region iterator
334
- attach_function \
335
- :sam_itr_regions,
336
- [HtsIdx, SamHdr, :pointer, :uint],
337
- HtsItr.by_ref
338
-
339
- # Create a multi-region iterator
340
- attach_function \
341
- :sam_itr_regarray,
342
- [HtsIdx, SamHdr, :pointer, :uint],
343
- HtsItr.by_ref
344
-
345
- # Get the next read from a SAM/BAM/CRAM iterator
346
- def self.sam_itr_next(htsfp, itr, r)
347
- # FIXME: check if htsfp is compressed BGZF
348
- raise("Null iterator") if itr.null?
349
-
350
- # FIXME: check multi
351
- hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
22
+ # macros
23
+ class << self
24
+ def bam_cigar_op(c)
25
+ c & BAM_CIGAR_MASK
26
+ end
27
+
28
+ def bam_cigar_oplen(c)
29
+ c >> BAM_CIGAR_SHIFT
30
+ end
31
+
32
+ def bam_cigar_opchr(c)
33
+ "#{BAM_CIGAR_STR}??????"[bam_cigar_op(c)]
34
+ end
35
+
36
+ def bam_cigar_gen(l, o)
37
+ l << BAM_CIGAR_SHIFT | o
38
+ end
39
+
40
+ def bam_cigar_type(o)
41
+ BAM_CIGAR_TYPE >> (o << 1) & 3
42
+ end
352
43
  end
353
44
 
354
- attach_function \
355
- :sam_parse_region,
356
- [SamHdr, :string, :pointer, :pointer, :pointer, :int],
357
- :string
358
-
359
- # SAM I/O
360
-
361
- # macros (or alias)
362
- # sam_open
363
- # sam_open_format
364
- # sam_close
365
-
366
- attach_function \
367
- :sam_open_mode,
368
- %i[string string string],
369
- :int
370
-
371
- # A version of sam_open_mode that can handle ,key=value options.
372
- attach_function \
373
- :sam_open_mode_opts,
374
- %i[string string string],
375
- :string
376
-
377
- attach_function \
378
- :sam_hdr_change_HD,
379
- [SamHdr, :string, :string],
380
- :int
381
-
382
- attach_function \
383
- :sam_parse1,
384
- [KString, SamHdr, Bam1],
385
- :int
386
-
387
- attach_function \
388
- :sam_format1,
389
- [SamHdr, Bam1, KString],
390
- :int
391
-
392
- # Read a record from a file
393
- attach_function \
394
- :sam_read1,
395
- [HtsFile, SamHdr, Bam1],
396
- :int
397
-
398
- # Write a record to a file
399
- attach_function \
400
- :sam_write1,
401
- [HtsFile, SamHdr, Bam1],
402
- :int
403
-
404
- # Return a pointer to an aux record
405
- attach_function \
406
- :bam_aux_get,
407
- [Bam1, :string], # FIXME
408
- :pointer
409
-
410
- # Get an integer aux value
411
- attach_function \
412
- :bam_aux2i,
413
- [:pointer],
414
- :int64
415
-
416
- # Get an integer aux value
417
- attach_function \
418
- :bam_aux2f,
419
- [:pointer],
420
- :double
421
-
422
- # Get a character aux value
423
- attach_function \
424
- :bam_aux2A,
425
- [:pointer],
426
- :char
427
-
428
- # Get a string aux value
429
- attach_function \
430
- :bam_aux2Z,
431
- [:pointer],
432
- :string
433
-
434
- # Get the length of an array-type ('B') tag
435
- attach_function \
436
- :bam_auxB_len,
437
- [:pointer],
438
- :uint
439
-
440
- # Get an integer value from an array-type tag
441
- attach_function \
442
- :bam_auxB2i,
443
- %i[pointer uint],
444
- :int64
445
-
446
- # Get a floating-point value from an array-type tag
447
- attach_function \
448
- :bam_auxB2f,
449
- %i[pointer uint],
450
- :double
451
-
452
- # Append tag data to a bam record
453
- attach_function \
454
- :bam_aux_append,
455
- [Bam1, :string, :string, :int, :pointer],
456
- :int
457
-
458
- # Delete tag data from a bam record
459
- attach_function \
460
- :bam_aux_del,
461
- [Bam1, :pointer],
462
- :int
463
-
464
- # Update or add a string-type tag
465
- attach_function \
466
- :bam_aux_update_str,
467
- [Bam1, :string, :int, :string],
468
- :int
469
-
470
- # Update or add an integer tag
471
- attach_function \
472
- :bam_aux_update_int,
473
- [Bam1, :string, :int64],
474
- :int
475
-
476
- # Update or add a floating-point tag
477
- attach_function \
478
- :bam_aux_update_float,
479
- [Bam1, :string, :float],
480
- :int
481
-
482
- # Update or add an array tag
483
- attach_function \
484
- :bam_aux_update_array,
485
- [Bam1, :string, :uint8, :uint32, :pointer],
486
- :int
487
-
488
- # sets an iterator over multiple
489
- attach_function \
490
- :bam_plp_init,
491
- %i[bam_plp_auto_f pointer],
492
- :bam_plp
493
-
494
- attach_function \
495
- :bam_plp_destroy,
496
- [:bam_plp],
497
- :void
498
-
499
- attach_function \
500
- :bam_plp_push,
501
- [:bam_plp, Bam1],
502
- :int
503
-
504
- attach_function \
505
- :bam_plp_next,
506
- %i[bam_plp pointer pointer pointer],
507
- BamPileup1.by_ref
508
-
509
- attach_function \
510
- :bam_plp_auto,
511
- %i[bam_plp pointer pointer pointer],
512
- BamPileup1.by_ref
513
-
514
- attach_function \
515
- :bam_plp64_next,
516
- %i[bam_plp pointer pointer pointer],
517
- BamPileup1.by_ref
518
-
519
- attach_function \
520
- :bam_plp64_auto,
521
- %i[bam_plp pointer pointer pointer],
522
- BamPileup1.by_ref
523
-
524
- attach_function \
525
- :bam_plp_set_maxcnt,
526
- %i[bam_plp int],
527
- :void
528
-
529
- attach_function \
530
- :bam_plp_reset,
531
- [:bam_plp],
532
- :void
533
-
534
- # sets a callback to initialise any per-pileup1_t fields.
535
- attach_function \
536
- :bam_plp_insertion,
537
- [BamPileup1, KString, :pointer],
538
- :int
539
-
540
- # sets a callback to initialise any per-pileup1_t fields.
541
- # bam_plp_constructor
542
-
543
- # bam_plp_destructor
544
-
545
- # Get pileup padded insertion sequence
546
- # bam_plp_insertion
547
-
548
- attach_function \
549
- :bam_mplp_init,
550
- %i[int bam_plp_auto_f pointer],
551
- :bam_mplp
552
-
553
- attach_function \
554
- :bam_mplp_init_overlaps,
555
- [:bam_mplp],
556
- :int
557
-
558
- attach_function \
559
- :bam_mplp_destroy,
560
- [:bam_mplp],
561
- :void
562
-
563
- attach_function \
564
- :bam_mplp_set_maxcnt,
565
- %i[bam_mplp int],
566
- :void
567
-
568
- attach_function \
569
- :bam_mplp_auto,
570
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
571
- :int
572
-
573
- attach_function \
574
- :bam_mplp64_auto,
575
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
576
- :int
577
-
578
- attach_function \
579
- :bam_mplp_reset,
580
- [:bam_mplp],
581
- :void
582
-
583
- # bam_mplp_constructor
584
- # bam_mplp_destructor
585
-
586
- attach_function \
587
- :sam_cap_mapq,
588
- [Bam1, :string, :hts_pos_t, :int],
589
- :int
590
-
591
- attach_function \
592
- :sam_prob_realn,
593
- [Bam1, :string, :hts_pos_t, :int],
594
- :int
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
58
+ # macros
59
+ # function-like macros
60
+ class << self
61
+ def bam_is_rev(b)
62
+ b[:core][:flag] & BAM_FREVERSE != 0
63
+ end
64
+
65
+ def bam_is_mrev(b)
66
+ b[:core][:flag] & BAM_FMREVERSE != 0
67
+ end
68
+
69
+ def bam_get_qname(b)
70
+ b[:data]
71
+ end
72
+
73
+ def bam_get_cigar(b)
74
+ b[:data] + b[:core][:l_qname]
75
+ end
76
+
77
+ def bam_get_seq(b)
78
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
79
+ end
80
+
81
+ def bam_get_qual(b)
82
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
83
+ end
84
+
85
+ def bam_get_aux(b)
86
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
87
+ end
88
+
89
+ def bam_get_l_aux(b)
90
+ b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
91
+ end
92
+
93
+ def bam_seqi(s, i)
94
+ s[i >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
96
+
97
+ def bam_set_seqi(s, i, b)
98
+ s[i >> 1] = (s[i >> 1] & (0xf0 >> ((~i & 1) << 2))) | ((b) << ((~i & 1) << 2))
99
+ end
100
+
101
+ def sam_hdr_find_hd(h, ks)
102
+ sam_hdr_find_line_id(h, "HD", nil, nil, ks)
103
+ end
104
+
105
+ def sam_hdr_find_tag_hd(h, key, ks)
106
+ sam_hdr_find_tag_id(h, "HD", nil, nil, key, ks)
107
+ end
108
+
109
+ def sam_hdr_update_hd(h, *args)
110
+ sam_hdr_update_line(h, "HD", nil, nil, *args, nil)
111
+ end
112
+
113
+ def sam_hdr_remove_tag_hd(h, key)
114
+ sam_hdr_remove_tag_id(h, "HD", nil, nil, key)
115
+ end
116
+
117
+ BAM_USER_OWNS_STRUCT = 1
118
+ BAM_USER_OWNS_DATA = 2
119
+
120
+ alias bam_itr_destroy hts_itr_destroy
121
+ alias bam_itr_queryi sam_itr_queryi
122
+ alias bam_itr_querys sam_itr_querys
123
+ alias bam_itr_next sam_itr_next
124
+
125
+ def bam_index_load(fn)
126
+ hts_idx_load(fn, HTS_FMT_BAI)
127
+ end
128
+
129
+ alias bam_index_build sam_index_build
130
+
131
+ alias sam_itr_destroy hts_itr_destroy
132
+
133
+ alias sam_open hts_open
134
+ alias sam_open_format hts_open_format
135
+ alias sam_flush hts_flush
136
+ alias sam_close hts_close
137
+ end
595
138
  end
596
139
  end