htslib 0.0.8 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,594 +2,99 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- # Generates a new unpopulated header structure.
6
- attach_function \
7
- :sam_hdr_init,
8
- [],
9
- SamHdr.by_ref
10
-
11
- # Read the header from a BAM compressed file.
12
- attach_function \
13
- :bam_hdr_read,
14
- [BGZF],
15
- SamHdr.by_ref
16
-
17
- # Writes the header to a BAM file.
18
- attach_function \
19
- :bam_hdr_write,
20
- [BGZF, SamHdr],
21
- :int
22
-
23
- # Frees the resources associated with a header.
24
- attach_function \
25
- :sam_hdr_destroy,
26
- [SamHdr],
27
- :void
28
-
29
- # Duplicate a header structure.
30
- attach_function \
31
- :sam_hdr_dup,
32
- [SamHdr],
33
- SamHdr.by_ref
34
-
35
- # Create a header from existing text.
36
- attach_function \
37
- :sam_hdr_parse,
38
- %i[size_t string],
39
- SamHdr.by_ref
40
-
41
- # Read a header from a SAM, BAM or CRAM file.
42
- attach_function \
43
- :sam_hdr_read,
44
- [SamFile],
45
- SamHdr.by_ref
46
-
47
- # Write a header to a SAM, BAM or CRAM file.
48
- attach_function \
49
- :sam_hdr_write,
50
- [SamFile, SamHdr],
51
- :int
52
-
53
- # Returns the current length of the header text.
54
- attach_function \
55
- :sam_hdr_length,
56
- [SamHdr],
57
- :size_t
58
-
59
- # Returns the text representation of the header.
60
- attach_function \
61
- :sam_hdr_str,
62
- [SamHdr],
63
- :string
64
-
65
- # Returns the number of references in the header.
66
- attach_function \
67
- :sam_hdr_nref,
68
- [SamHdr],
69
- :int
70
-
71
- # Add formatted lines to an existing header.
72
- attach_function \
73
- :sam_hdr_add_lines,
74
- [SamHdr, :string, :size_t],
75
- :int
76
-
77
- # Adds a single line to an existing header.
78
- attach_function \
79
- :sam_hdr_add_line,
80
- [SamHdr, :string, :varargs],
81
- :int
82
-
83
- # Returns a complete line of formatted text for a given type and ID.
84
- attach_function \
85
- :sam_hdr_find_line_id,
86
- [SamHdr, :string, :string, :string, KString],
87
- :int
88
-
89
- # Returns a complete line of formatted text for a given type and index.
90
- attach_function \
91
- :sam_hdr_find_line_pos,
92
- [SamHdr, :string, :int, KString],
93
- :int
94
-
95
- # Remove a line with given type / id from a header
96
- attach_function \
97
- :sam_hdr_remove_line_id,
98
- [SamHdr, :string, :string, :string],
99
- :int
100
-
101
- # Remove nth line of a given type from a header
102
- attach_function \
103
- :sam_hdr_remove_line_pos,
104
- [SamHdr, :string, :int],
105
- :int
106
-
107
- # Add or update tag key,value pairs in a header line.
108
- attach_function \
109
- :sam_hdr_update_line,
110
- [SamHdr, :string, :string, :string, :varargs],
111
- :int
112
-
113
- # Remove all lines of a given type from a header, except the one matching an ID
114
- attach_function \
115
- :sam_hdr_remove_except,
116
- [SamHdr, :string, :string, :string],
117
- :int
118
-
119
- # Remove header lines of a given type, except those in a given ID set
120
- attach_function \
121
- :sam_hdr_remove_lines,
122
- [SamHdr, :string, :string, :pointer],
123
- :int
124
-
125
- # Count the number of lines for a given header type
126
- attach_function \
127
- :sam_hdr_count_lines,
128
- [SamHdr, :string],
129
- :int
130
-
131
- # Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
132
- attach_function \
133
- :sam_hdr_line_index,
134
- [SamHdr, :string, :string],
135
- :int
136
-
137
- # Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
138
- attach_function \
139
- :sam_hdr_line_name,
140
- [SamHdr, :string, :int],
141
- :string
142
-
143
- # Return the value associated with a key for a header line identified by ID_key:ID_val
144
- attach_function \
145
- :sam_hdr_find_tag_id,
146
- [SamHdr, :string, :string, :string, :string, KString],
147
- :int
148
-
149
- # Return the value associated with a key for a header line identified by position
150
- attach_function \
151
- :sam_hdr_find_tag_pos,
152
- [SamHdr, :string, :int, :string, KString],
153
- :int
154
-
155
- # Remove the key from the line identified by type, ID_key and ID_value.
156
- attach_function \
157
- :sam_hdr_remove_tag_id,
158
- [SamHdr, :string, :string, :string, :string],
159
- :int
160
-
161
- # Get the target id for a given reference sequence name
162
- attach_function \
163
- :sam_hdr_name2tid,
164
- [SamHdr, :string],
165
- :int
166
-
167
- # Get the reference sequence name from a target index
168
- attach_function \
169
- :sam_hdr_tid2name,
170
- [SamHdr, :int],
171
- :string
172
-
173
- # Get the reference sequence length from a target index
174
- attach_function \
175
- :sam_hdr_tid2len,
176
- [SamHdr, :int],
177
- :hts_pos_t
178
-
179
- # Generate a unique \@PG ID: value
180
- attach_function \
181
- :sam_hdr_pg_id,
182
- [SamHdr, :string],
183
- :string
184
-
185
- # Add an \@PG line.
186
- attach_function \
187
- :sam_hdr_add_pg,
188
- [SamHdr, :string, :varargs],
189
- :int
190
-
191
- # A function to help with construction of CL tags in @PG records.
192
- attach_function \
193
- :stringify_argv,
194
- %i[int pointer],
195
- :string
196
-
197
- # Increments the reference count on a header
198
- attach_function \
199
- :sam_hdr_incr_ref,
200
- [SamHdr],
201
- :void
202
-
203
- # Create a new bam1_t alignment structure
204
- attach_function \
205
- :bam_init1,
206
- [],
207
- Bam1.by_ref
208
-
209
- # Destroy a bam1_t structure
210
- attach_function \
211
- :bam_destroy1,
212
- [Bam1],
213
- :void
214
-
215
- # Read a BAM format alignment record
216
- attach_function \
217
- :bam_read1,
218
- [BGZF, Bam1],
219
- :int
220
-
221
- # Write a BAM format alignment record
222
- attach_function \
223
- :bam_write1,
224
- [BGZF, Bam1],
225
- :int
226
-
227
- # Copy alignment record data
228
- attach_function \
229
- :bam_copy1,
230
- [Bam1, Bam1],
231
- Bam1.by_ref
232
-
233
- # Create a duplicate alignment record
234
- attach_function \
235
- :bam_dup1,
236
- [Bam1],
237
- Bam1.by_ref
238
-
239
- # Calculate query length from CIGAR data
240
- attach_function \
241
- :bam_cigar2qlen,
242
- %i[int pointer],
243
- :int64
244
-
245
- # Calculate reference length from CIGAR data
246
- attach_function \
247
- :bam_cigar2rlen,
248
- %i[int pointer],
249
- :hts_pos_t
250
-
251
- # Calculate the rightmost base position of an alignment on the reference genome.
252
- attach_function \
253
- :bam_endpos,
254
- [Bam1],
255
- :hts_pos_t
256
-
257
- attach_function \
258
- :bam_str2flag,
259
- [:string],
260
- :int
261
-
262
- attach_function \
263
- :bam_flag2str,
264
- [:int],
265
- :string
266
-
267
- # Set the name of the query
268
- attach_function \
269
- :bam_set_qname,
270
- [Bam1, :string],
271
- :int
272
-
273
- # Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
274
- attach_function \
275
- :sam_idx_init,
276
- [HtsFile, SamHdr, :int, :string],
277
- :int
278
-
279
- # Writes the index initialised with sam_idx_init to disk.
280
- attach_function \
281
- :sam_idx_save,
282
- [HtsFile],
283
- :int
284
-
285
- # Load a BAM (.csi or .bai) or CRAM (.crai) index file
286
- attach_function \
287
- :sam_index_load,
288
- [HtsFile, :string],
289
- HtsIdx.by_ref
290
-
291
- # Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
292
- attach_function \
293
- :sam_index_load2,
294
- [HtsFile, :string, :string],
295
- HtsIdx.by_ref
296
-
297
- # Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
298
- attach_function \
299
- :sam_index_load3,
300
- [HtsFile, :string, :string, :int],
301
- HtsIdx.by_ref
302
-
303
- # Generate and save an index file
304
- attach_function \
305
- :sam_index_build,
306
- %i[string int],
307
- :int
308
-
309
- # Generate and save an index to a specific file
310
- attach_function \
311
- :sam_index_build2,
312
- %i[string string int],
313
- :int
314
-
315
- # Generate and save an index to a specific file
316
- attach_function \
317
- :sam_index_build3,
318
- %i[string string int int],
319
- :int
320
-
321
- # Create a BAM/CRAM iterator
322
- attach_function \
323
- :sam_itr_queryi,
324
- [HtsIdx, :int, :hts_pos_t, :hts_pos_t],
325
- HtsItr.by_ref
326
-
327
- # Create a SAM/BAM/CRAM iterator
328
- attach_function \
329
- :sam_itr_querys,
330
- [HtsIdx, SamHdr, :string],
331
- HtsItr.by_ref
332
-
333
- # Create a multi-region iterator
334
- attach_function \
335
- :sam_itr_regions,
336
- [HtsIdx, SamHdr, :pointer, :uint],
337
- HtsItr.by_ref
338
-
339
- # Create a multi-region iterator
340
- attach_function \
341
- :sam_itr_regarray,
342
- [HtsIdx, SamHdr, :pointer, :uint],
343
- HtsItr.by_ref
344
-
345
- # Get the next read from a SAM/BAM/CRAM iterator
346
- def self.sam_itr_next(htsfp, itr, r)
347
- # FIXME: check if htsfp is compressed BGZF
348
- hts_log_error("Null iterator") if itr.null?
349
- # FIXME: check multi
350
- hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
22
+ # macros
23
+ class << self
24
+ def bam_cigar_op(c)
25
+ c & BAM_CIGAR_MASK
26
+ end
27
+
28
+ def bam_cigar_oplen(c)
29
+ c >> BAM_CIGAR_SHIFT
30
+ end
31
+
32
+ def bam_cigar_opchr(c)
33
+ ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
34
+ end
35
+
36
+ def bam_cigar_gen(l, o)
37
+ l << BAM_CIGAR_SHIFT | o
38
+ end
39
+
40
+ def bam_cigar_type(o)
41
+ BAM_CIGAR_TYPE >> (o << 1) & 3
42
+ end
351
43
  end
352
44
 
353
- attach_function \
354
- :sam_parse_region,
355
- [SamHdr, :string, :pointer, :pointer, :pointer, :int],
356
- :string
357
-
358
- # SAM I/O
359
-
360
- # macros (or alias)
361
- # sam_open
362
- # sam_open_format
363
- # sam_close
364
-
365
- attach_function \
366
- :sam_open_mode,
367
- %i[string string string],
368
- :int
369
-
370
- # A version of sam_open_mode that can handle ,key=value options.
371
- attach_function \
372
- :sam_open_mode_opts,
373
- %i[string string string],
374
- :string
375
-
376
- attach_function \
377
- :sam_hdr_change_HD,
378
- [SamHdr, :string, :string],
379
- :int
380
-
381
- attach_function \
382
- :sam_parse1,
383
- [KString, SamHdr, Bam1],
384
- :int
385
-
386
- attach_function \
387
- :sam_format1,
388
- [SamHdr, Bam1, KString],
389
- :int
390
-
391
- # Read a record from a file
392
- attach_function \
393
- :sam_read1,
394
- [HtsFile, SamHdr, Bam1],
395
- :int
396
-
397
- # Write a record to a file
398
- attach_function \
399
- :sam_write1,
400
- [HtsFile, SamHdr, Bam1],
401
- :int
402
-
403
- # Return a pointer to an aux record
404
- attach_function \
405
- :bam_aux_get,
406
- [Bam1, :string], # FIXME
407
- :pointer
408
-
409
- # Get an integer aux value
410
- attach_function \
411
- :bam_aux2i,
412
- [:pointer],
413
- :int64
414
-
415
- # Get an integer aux value
416
- attach_function \
417
- :bam_aux2f,
418
- [:pointer],
419
- :double
420
-
421
- # Get a character aux value
422
- attach_function \
423
- :bam_aux2A,
424
- [:pointer],
425
- :char
426
-
427
- # Get a string aux value
428
- attach_function \
429
- :bam_aux2Z,
430
- [:pointer],
431
- :string
432
-
433
- # Get the length of an array-type ('B') tag
434
- attach_function \
435
- :bam_auxB_len,
436
- [:pointer],
437
- :uint
438
-
439
- # Get an integer value from an array-type tag
440
- attach_function \
441
- :bam_auxB2i,
442
- %i[pointer uint],
443
- :int64
444
-
445
- # Get a floating-point value from an array-type tag
446
- attach_function \
447
- :bam_auxB2f,
448
- %i[pointer uint],
449
- :double
450
-
451
- # Append tag data to a bam record
452
- attach_function \
453
- :bam_aux_append,
454
- [Bam1, :string, :string, :int, :pointer],
455
- :int
456
-
457
- # Delete tag data from a bam record
458
- attach_function \
459
- :bam_aux_del,
460
- [Bam1, :pointer],
461
- :int
462
-
463
- # Update or add a string-type tag
464
- attach_function \
465
- :bam_aux_update_str,
466
- [Bam1, :string, :int, :string],
467
- :int
468
-
469
- # Update or add an integer tag
470
- attach_function \
471
- :bam_aux_update_int,
472
- [Bam1, :string, :int64],
473
- :int
474
-
475
- # Update or add a floating-point tag
476
- attach_function \
477
- :bam_aux_update_float,
478
- [Bam1, :string, :float],
479
- :int
480
-
481
- # Update or add an array tag
482
- attach_function \
483
- :bam_aux_update_array,
484
- [Bam1, :string, :uint8, :uint32, :pointer],
485
- :int
486
-
487
- # sets an iterator over multiple
488
- attach_function \
489
- :bam_plp_init,
490
- %i[bam_plp_auto_f pointer],
491
- :bam_plp
492
-
493
- attach_function \
494
- :bam_plp_destroy,
495
- [:bam_plp],
496
- :void
497
-
498
- attach_function \
499
- :bam_plp_push,
500
- [:bam_plp, Bam1],
501
- :int
502
-
503
- attach_function \
504
- :bam_plp_next,
505
- %i[bam_plp pointer pointer pointer],
506
- BamPileup1.by_ref
507
-
508
- attach_function \
509
- :bam_plp_auto,
510
- %i[bam_plp pointer pointer pointer],
511
- BamPileup1.by_ref
512
-
513
- attach_function \
514
- :bam_plp64_next,
515
- %i[bam_plp pointer pointer pointer],
516
- BamPileup1.by_ref
517
-
518
- attach_function \
519
- :bam_plp64_auto,
520
- %i[bam_plp pointer pointer pointer],
521
- BamPileup1.by_ref
522
-
523
- attach_function \
524
- :bam_plp_set_maxcnt,
525
- %i[bam_plp int],
526
- :void
527
-
528
- attach_function \
529
- :bam_plp_reset,
530
- [:bam_plp],
531
- :void
532
-
533
- # sets a callback to initialise any per-pileup1_t fields.
534
- attach_function \
535
- :bam_plp_insertion,
536
- [BamPileup1, KString, :pointer],
537
- :int
538
-
539
- # sets a callback to initialise any per-pileup1_t fields.
540
- # bam_plp_constructor
541
-
542
- # bam_plp_destructor
543
-
544
- # Get pileup padded insertion sequence
545
- # bam_plp_insertion
546
-
547
- attach_function \
548
- :bam_mplp_init,
549
- %i[int bam_plp_auto_f pointer],
550
- :bam_mplp
551
-
552
- attach_function \
553
- :bam_mplp_init_overlaps,
554
- [:bam_mplp],
555
- :int
556
-
557
- attach_function \
558
- :bam_mplp_destroy,
559
- [:bam_mplp],
560
- :void
561
-
562
- attach_function \
563
- :bam_mplp_set_maxcnt,
564
- %i[bam_mplp int],
565
- :void
566
-
567
- attach_function \
568
- :bam_mplp_auto,
569
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
570
- :int
571
-
572
- attach_function \
573
- :bam_mplp64_auto,
574
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
575
- :int
576
-
577
- attach_function \
578
- :bam_mplp_reset,
579
- [:bam_mplp],
580
- :void
581
-
582
- # bam_mplp_constructor
583
- # bam_mplp_destructor
584
-
585
- attach_function \
586
- :sam_cap_mapq,
587
- [Bam1, :string, :hts_pos_t, :int],
588
- :int
589
-
590
- attach_function \
591
- :sam_prob_realn,
592
- [Bam1, :string, :hts_pos_t, :int],
593
- :int
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
58
+ # macros
59
+ # function-like macros
60
+ class << self
61
+ def bam_is_rev(b)
62
+ b[:core][:flag] & BAM_FREVERSE != 0
63
+ end
64
+
65
+ def bam_is_mrev(b)
66
+ b[:core][:flag] & BAM_FMREVERSE != 0
67
+ end
68
+
69
+ def bam_get_qname(b)
70
+ b[:data]
71
+ end
72
+
73
+ def bam_get_cigar(b)
74
+ b[:data] + b[:core][:l_qname]
75
+ end
76
+
77
+ def bam_get_seq(b)
78
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
79
+ end
80
+
81
+ def bam_get_qual(b)
82
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
83
+ end
84
+
85
+ def bam_get_aux(b)
86
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
87
+ end
88
+
89
+ def bam_get_l_aux(b)
90
+ b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
91
+ end
92
+
93
+ def bam_seqi(s, i)
94
+ s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
96
+
97
+ # def bam_set_seqi(s, i, b)
98
+ end
594
99
  end
595
100
  end