htslib 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,595 +2,99 @@
2
2
 
3
3
  module HTS
4
4
  module LibHTS
5
- # Generates a new unpopulated header structure.
6
- attach_function \
7
- :sam_hdr_init,
8
- [],
9
- SamHdr.by_ref
10
-
11
- # Read the header from a BAM compressed file.
12
- attach_function \
13
- :bam_hdr_read,
14
- [BGZF],
15
- SamHdr.by_ref
16
-
17
- # Writes the header to a BAM file.
18
- attach_function \
19
- :bam_hdr_write,
20
- [BGZF, SamHdr],
21
- :int
22
-
23
- # Frees the resources associated with a header.
24
- attach_function \
25
- :sam_hdr_destroy,
26
- [SamHdr],
27
- :void
28
-
29
- # Duplicate a header structure.
30
- attach_function \
31
- :sam_hdr_dup,
32
- [SamHdr],
33
- SamHdr.by_ref
34
-
35
- # Create a header from existing text.
36
- attach_function \
37
- :sam_hdr_parse,
38
- %i[size_t string],
39
- SamHdr.by_ref
40
-
41
- # Read a header from a SAM, BAM or CRAM file.
42
- attach_function \
43
- :sam_hdr_read,
44
- [SamFile],
45
- SamHdr.by_ref
46
-
47
- # Write a header to a SAM, BAM or CRAM file.
48
- attach_function \
49
- :sam_hdr_write,
50
- [SamFile, SamHdr],
51
- :int
52
-
53
- # Returns the current length of the header text.
54
- attach_function \
55
- :sam_hdr_length,
56
- [SamHdr],
57
- :size_t
58
-
59
- # Returns the text representation of the header.
60
- attach_function \
61
- :sam_hdr_str,
62
- [SamHdr],
63
- :string
64
-
65
- # Returns the number of references in the header.
66
- attach_function \
67
- :sam_hdr_nref,
68
- [SamHdr],
69
- :int
70
-
71
- # Add formatted lines to an existing header.
72
- attach_function \
73
- :sam_hdr_add_lines,
74
- [SamHdr, :string, :size_t],
75
- :int
76
-
77
- # Adds a single line to an existing header.
78
- attach_function \
79
- :sam_hdr_add_line,
80
- [SamHdr, :string, :varargs],
81
- :int
82
-
83
- # Returns a complete line of formatted text for a given type and ID.
84
- attach_function \
85
- :sam_hdr_find_line_id,
86
- [SamHdr, :string, :string, :string, KString],
87
- :int
88
-
89
- # Returns a complete line of formatted text for a given type and index.
90
- attach_function \
91
- :sam_hdr_find_line_pos,
92
- [SamHdr, :string, :int, KString],
93
- :int
94
-
95
- # Remove a line with given type / id from a header
96
- attach_function \
97
- :sam_hdr_remove_line_id,
98
- [SamHdr, :string, :string, :string],
99
- :int
100
-
101
- # Remove nth line of a given type from a header
102
- attach_function \
103
- :sam_hdr_remove_line_pos,
104
- [SamHdr, :string, :int],
105
- :int
106
-
107
- # Add or update tag key,value pairs in a header line.
108
- attach_function \
109
- :sam_hdr_update_line,
110
- [SamHdr, :string, :string, :string, :varargs],
111
- :int
112
-
113
- # Remove all lines of a given type from a header, except the one matching an ID
114
- attach_function \
115
- :sam_hdr_remove_except,
116
- [SamHdr, :string, :string, :string],
117
- :int
118
-
119
- # Remove header lines of a given type, except those in a given ID set
120
- attach_function \
121
- :sam_hdr_remove_lines,
122
- [SamHdr, :string, :string, :pointer],
123
- :int
124
-
125
- # Count the number of lines for a given header type
126
- attach_function \
127
- :sam_hdr_count_lines,
128
- [SamHdr, :string],
129
- :int
130
-
131
- # Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
132
- attach_function \
133
- :sam_hdr_line_index,
134
- [SamHdr, :string, :string],
135
- :int
136
-
137
- # Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
138
- attach_function \
139
- :sam_hdr_line_name,
140
- [SamHdr, :string, :int],
141
- :string
142
-
143
- # Return the value associated with a key for a header line identified by ID_key:ID_val
144
- attach_function \
145
- :sam_hdr_find_tag_id,
146
- [SamHdr, :string, :string, :string, :string, KString],
147
- :int
148
-
149
- # Return the value associated with a key for a header line identified by position
150
- attach_function \
151
- :sam_hdr_find_tag_pos,
152
- [SamHdr, :string, :int, :string, KString],
153
- :int
154
-
155
- # Remove the key from the line identified by type, ID_key and ID_value.
156
- attach_function \
157
- :sam_hdr_remove_tag_id,
158
- [SamHdr, :string, :string, :string, :string],
159
- :int
160
-
161
- # Get the target id for a given reference sequence name
162
- attach_function \
163
- :sam_hdr_name2tid,
164
- [SamHdr, :string],
165
- :int
166
-
167
- # Get the reference sequence name from a target index
168
- attach_function \
169
- :sam_hdr_tid2name,
170
- [SamHdr, :int],
171
- :string
172
-
173
- # Get the reference sequence length from a target index
174
- attach_function \
175
- :sam_hdr_tid2len,
176
- [SamHdr, :int],
177
- :hts_pos_t
178
-
179
- # Generate a unique \@PG ID: value
180
- attach_function \
181
- :sam_hdr_pg_id,
182
- [SamHdr, :string],
183
- :string
184
-
185
- # Add an \@PG line.
186
- attach_function \
187
- :sam_hdr_add_pg,
188
- [SamHdr, :string, :varargs],
189
- :int
190
-
191
- # A function to help with construction of CL tags in @PG records.
192
- attach_function \
193
- :stringify_argv,
194
- %i[int pointer],
195
- :string
196
-
197
- # Increments the reference count on a header
198
- attach_function \
199
- :sam_hdr_incr_ref,
200
- [SamHdr],
201
- :void
202
-
203
- # Create a new bam1_t alignment structure
204
- attach_function \
205
- :bam_init1,
206
- [],
207
- Bam1.by_ref
208
-
209
- # Destroy a bam1_t structure
210
- attach_function \
211
- :bam_destroy1,
212
- [Bam1],
213
- :void
214
-
215
- # Read a BAM format alignment record
216
- attach_function \
217
- :bam_read1,
218
- [BGZF, Bam1],
219
- :int
220
-
221
- # Write a BAM format alignment record
222
- attach_function \
223
- :bam_write1,
224
- [BGZF, Bam1],
225
- :int
226
-
227
- # Copy alignment record data
228
- attach_function \
229
- :bam_copy1,
230
- [Bam1, Bam1],
231
- Bam1.by_ref
232
-
233
- # Create a duplicate alignment record
234
- attach_function \
235
- :bam_dup1,
236
- [Bam1],
237
- Bam1.by_ref
238
-
239
- # Calculate query length from CIGAR data
240
- attach_function \
241
- :bam_cigar2qlen,
242
- %i[int pointer],
243
- :int64
244
-
245
- # Calculate reference length from CIGAR data
246
- attach_function \
247
- :bam_cigar2rlen,
248
- %i[int pointer],
249
- :hts_pos_t
250
-
251
- # Calculate the rightmost base position of an alignment on the reference genome.
252
- attach_function \
253
- :bam_endpos,
254
- [Bam1],
255
- :hts_pos_t
256
-
257
- attach_function \
258
- :bam_str2flag,
259
- [:string],
260
- :int
261
-
262
- attach_function \
263
- :bam_flag2str,
264
- [:int],
265
- :string
266
-
267
- # Set the name of the query
268
- attach_function \
269
- :bam_set_qname,
270
- [Bam1, :string],
271
- :int
272
-
273
- # Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
274
- attach_function \
275
- :sam_idx_init,
276
- [HtsFile, SamHdr, :int, :string],
277
- :int
278
-
279
- # Writes the index initialised with sam_idx_init to disk.
280
- attach_function \
281
- :sam_idx_save,
282
- [HtsFile],
283
- :int
284
-
285
- # Load a BAM (.csi or .bai) or CRAM (.crai) index file
286
- attach_function \
287
- :sam_index_load,
288
- [HtsFile, :string],
289
- HtsIdx.by_ref
290
-
291
- # Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
292
- attach_function \
293
- :sam_index_load2,
294
- [HtsFile, :string, :string],
295
- HtsIdx.by_ref
296
-
297
- # Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
298
- attach_function \
299
- :sam_index_load3,
300
- [HtsFile, :string, :string, :int],
301
- HtsIdx.by_ref
302
-
303
- # Generate and save an index file
304
- attach_function \
305
- :sam_index_build,
306
- %i[string int],
307
- :int
308
-
309
- # Generate and save an index to a specific file
310
- attach_function \
311
- :sam_index_build2,
312
- %i[string string int],
313
- :int
314
-
315
- # Generate and save an index to a specific file
316
- attach_function \
317
- :sam_index_build3,
318
- %i[string string int int],
319
- :int
320
-
321
- # Create a BAM/CRAM iterator
322
- attach_function \
323
- :sam_itr_queryi,
324
- [HtsIdx, :int, :hts_pos_t, :hts_pos_t],
325
- HtsItr.by_ref
326
-
327
- # Create a SAM/BAM/CRAM iterator
328
- attach_function \
329
- :sam_itr_querys,
330
- [HtsIdx, SamHdr, :string],
331
- HtsItr.by_ref
332
-
333
- # Create a multi-region iterator
334
- attach_function \
335
- :sam_itr_regions,
336
- [HtsIdx, SamHdr, :pointer, :uint],
337
- HtsItr.by_ref
338
-
339
- # Create a multi-region iterator
340
- attach_function \
341
- :sam_itr_regarray,
342
- [HtsIdx, SamHdr, :pointer, :uint],
343
- HtsItr.by_ref
344
-
345
- # Get the next read from a SAM/BAM/CRAM iterator
346
- def self.sam_itr_next(htsfp, itr, r)
347
- # FIXME: check if htsfp is compressed BGZF
348
- raise("Null iterator") if itr.null?
349
-
350
- # FIXME: check multi
351
- hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
22
+ # macros
23
+ class << self
24
+ def bam_cigar_op(c)
25
+ c & BAM_CIGAR_MASK
26
+ end
27
+
28
+ def bam_cigar_oplen(c)
29
+ c >> BAM_CIGAR_SHIFT
30
+ end
31
+
32
+ def bam_cigar_opchr(c)
33
+ ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
34
+ end
35
+
36
+ def bam_cigar_gen(l, o)
37
+ l << BAM_CIGAR_SHIFT | o
38
+ end
39
+
40
+ def bam_cigar_type(o)
41
+ BAM_CIGAR_TYPE >> (o << 1) & 3
42
+ end
352
43
  end
353
44
 
354
- attach_function \
355
- :sam_parse_region,
356
- [SamHdr, :string, :pointer, :pointer, :pointer, :int],
357
- :string
358
-
359
- # SAM I/O
360
-
361
- # macros (or alias)
362
- # sam_open
363
- # sam_open_format
364
- # sam_close
365
-
366
- attach_function \
367
- :sam_open_mode,
368
- %i[string string string],
369
- :int
370
-
371
- # A version of sam_open_mode that can handle ,key=value options.
372
- attach_function \
373
- :sam_open_mode_opts,
374
- %i[string string string],
375
- :string
376
-
377
- attach_function \
378
- :sam_hdr_change_HD,
379
- [SamHdr, :string, :string],
380
- :int
381
-
382
- attach_function \
383
- :sam_parse1,
384
- [KString, SamHdr, Bam1],
385
- :int
386
-
387
- attach_function \
388
- :sam_format1,
389
- [SamHdr, Bam1, KString],
390
- :int
391
-
392
- # Read a record from a file
393
- attach_function \
394
- :sam_read1,
395
- [HtsFile, SamHdr, Bam1],
396
- :int
397
-
398
- # Write a record to a file
399
- attach_function \
400
- :sam_write1,
401
- [HtsFile, SamHdr, Bam1],
402
- :int
403
-
404
- # Return a pointer to an aux record
405
- attach_function \
406
- :bam_aux_get,
407
- [Bam1, :string], # FIXME
408
- :pointer
409
-
410
- # Get an integer aux value
411
- attach_function \
412
- :bam_aux2i,
413
- [:pointer],
414
- :int64
415
-
416
- # Get an integer aux value
417
- attach_function \
418
- :bam_aux2f,
419
- [:pointer],
420
- :double
421
-
422
- # Get a character aux value
423
- attach_function \
424
- :bam_aux2A,
425
- [:pointer],
426
- :char
427
-
428
- # Get a string aux value
429
- attach_function \
430
- :bam_aux2Z,
431
- [:pointer],
432
- :string
433
-
434
- # Get the length of an array-type ('B') tag
435
- attach_function \
436
- :bam_auxB_len,
437
- [:pointer],
438
- :uint
439
-
440
- # Get an integer value from an array-type tag
441
- attach_function \
442
- :bam_auxB2i,
443
- %i[pointer uint],
444
- :int64
445
-
446
- # Get a floating-point value from an array-type tag
447
- attach_function \
448
- :bam_auxB2f,
449
- %i[pointer uint],
450
- :double
451
-
452
- # Append tag data to a bam record
453
- attach_function \
454
- :bam_aux_append,
455
- [Bam1, :string, :string, :int, :pointer],
456
- :int
457
-
458
- # Delete tag data from a bam record
459
- attach_function \
460
- :bam_aux_del,
461
- [Bam1, :pointer],
462
- :int
463
-
464
- # Update or add a string-type tag
465
- attach_function \
466
- :bam_aux_update_str,
467
- [Bam1, :string, :int, :string],
468
- :int
469
-
470
- # Update or add an integer tag
471
- attach_function \
472
- :bam_aux_update_int,
473
- [Bam1, :string, :int64],
474
- :int
475
-
476
- # Update or add a floating-point tag
477
- attach_function \
478
- :bam_aux_update_float,
479
- [Bam1, :string, :float],
480
- :int
481
-
482
- # Update or add an array tag
483
- attach_function \
484
- :bam_aux_update_array,
485
- [Bam1, :string, :uint8, :uint32, :pointer],
486
- :int
487
-
488
- # sets an iterator over multiple
489
- attach_function \
490
- :bam_plp_init,
491
- %i[bam_plp_auto_f pointer],
492
- :bam_plp
493
-
494
- attach_function \
495
- :bam_plp_destroy,
496
- [:bam_plp],
497
- :void
498
-
499
- attach_function \
500
- :bam_plp_push,
501
- [:bam_plp, Bam1],
502
- :int
503
-
504
- attach_function \
505
- :bam_plp_next,
506
- %i[bam_plp pointer pointer pointer],
507
- BamPileup1.by_ref
508
-
509
- attach_function \
510
- :bam_plp_auto,
511
- %i[bam_plp pointer pointer pointer],
512
- BamPileup1.by_ref
513
-
514
- attach_function \
515
- :bam_plp64_next,
516
- %i[bam_plp pointer pointer pointer],
517
- BamPileup1.by_ref
518
-
519
- attach_function \
520
- :bam_plp64_auto,
521
- %i[bam_plp pointer pointer pointer],
522
- BamPileup1.by_ref
523
-
524
- attach_function \
525
- :bam_plp_set_maxcnt,
526
- %i[bam_plp int],
527
- :void
528
-
529
- attach_function \
530
- :bam_plp_reset,
531
- [:bam_plp],
532
- :void
533
-
534
- # sets a callback to initialise any per-pileup1_t fields.
535
- attach_function \
536
- :bam_plp_insertion,
537
- [BamPileup1, KString, :pointer],
538
- :int
539
-
540
- # sets a callback to initialise any per-pileup1_t fields.
541
- # bam_plp_constructor
542
-
543
- # bam_plp_destructor
544
-
545
- # Get pileup padded insertion sequence
546
- # bam_plp_insertion
547
-
548
- attach_function \
549
- :bam_mplp_init,
550
- %i[int bam_plp_auto_f pointer],
551
- :bam_mplp
552
-
553
- attach_function \
554
- :bam_mplp_init_overlaps,
555
- [:bam_mplp],
556
- :int
557
-
558
- attach_function \
559
- :bam_mplp_destroy,
560
- [:bam_mplp],
561
- :void
562
-
563
- attach_function \
564
- :bam_mplp_set_maxcnt,
565
- %i[bam_mplp int],
566
- :void
567
-
568
- attach_function \
569
- :bam_mplp_auto,
570
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
571
- :int
572
-
573
- attach_function \
574
- :bam_mplp64_auto,
575
- %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
576
- :int
577
-
578
- attach_function \
579
- :bam_mplp_reset,
580
- [:bam_mplp],
581
- :void
582
-
583
- # bam_mplp_constructor
584
- # bam_mplp_destructor
585
-
586
- attach_function \
587
- :sam_cap_mapq,
588
- [Bam1, :string, :hts_pos_t, :int],
589
- :int
590
-
591
- attach_function \
592
- :sam_prob_realn,
593
- [Bam1, :string, :hts_pos_t, :int],
594
- :int
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
58
+ # macros
59
+ # function-like macros
60
+ class << self
61
+ def bam_is_rev(b)
62
+ b[:core][:flag] & BAM_FREVERSE != 0
63
+ end
64
+
65
+ def bam_is_mrev(b)
66
+ b[:core][:flag] & BAM_FMREVERSE != 0
67
+ end
68
+
69
+ def bam_get_qname(b)
70
+ b[:data]
71
+ end
72
+
73
+ def bam_get_cigar(b)
74
+ b[:data] + b[:core][:l_qname]
75
+ end
76
+
77
+ def bam_get_seq(b)
78
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
79
+ end
80
+
81
+ def bam_get_qual(b)
82
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
83
+ end
84
+
85
+ def bam_get_aux(b)
86
+ b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
87
+ end
88
+
89
+ def bam_get_l_aux(b)
90
+ b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
91
+ end
92
+
93
+ def bam_seqi(s, i)
94
+ s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
96
+
97
+ # def bam_set_seqi(s, i, b)
98
+ end
595
99
  end
596
100
  end