htslib 0.0.2 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -22
- data/lib/hts/bam/cigar.rb +11 -6
- data/lib/hts/bam/flag.rb +97 -0
- data/lib/hts/bam/header.rb +17 -7
- data/lib/hts/bam/record.rb +199 -0
- data/lib/hts/bam.rb +67 -32
- data/lib/hts/bcf/format.rb +53 -0
- data/lib/hts/bcf/header.rb +26 -0
- data/lib/hts/bcf/info.rb +94 -0
- data/lib/hts/bcf/record.rb +113 -0
- data/lib/hts/bcf.rb +73 -0
- data/lib/hts/faidx.rb +59 -0
- data/lib/hts/ffi_ext/README.md +8 -0
- data/lib/hts/ffi_ext/struct.rb +45 -0
- data/lib/hts/{ffi → libhts}/bgzf.rb +1 -1
- data/lib/hts/{ffi → libhts}/constants.rb +126 -47
- data/lib/hts/{ffi → libhts}/faidx.rb +1 -1
- data/lib/hts/{ffi → libhts}/hfile.rb +1 -1
- data/lib/hts/{ffi → libhts}/hts.rb +13 -1
- data/lib/hts/{ffi → libhts}/kfunc.rb +1 -1
- data/lib/hts/libhts/sam.rb +102 -0
- data/lib/hts/{ffi/sam.rb → libhts/sam_funcs.rb} +24 -120
- data/lib/hts/{ffi → libhts}/tbx.rb +1 -1
- data/lib/hts/libhts/vcf.rb +226 -0
- data/lib/hts/{ffi/vcf.rb → libhts/vcf_funcs.rb} +1 -70
- data/lib/hts/libhts.rb +33 -0
- data/lib/hts/tabix.rb +28 -0
- data/lib/hts/version.rb +1 -1
- data/lib/htslib.rb +16 -19
- metadata +48 -27
- data/lib/hts/bam/alignment.rb +0 -155
- data/lib/hts/fai.rb +0 -57
- data/lib/hts/ffi.rb +0 -85
- data/lib/hts/tbx.rb +0 -16
- data/lib/hts/vcf/header.rb +0 -24
- data/lib/hts/vcf/variant.rb +0 -43
- data/lib/hts/vcf.rb +0 -42
@@ -1,103 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
5
|
-
# constants
|
6
|
-
BAM_CMATCH = 0
|
7
|
-
BAM_CINS = 1
|
8
|
-
BAM_CDEL = 2
|
9
|
-
BAM_CREF_SKIP = 3
|
10
|
-
BAM_CSOFT_CLIP = 4
|
11
|
-
BAM_CHARD_CLIP = 5
|
12
|
-
BAM_CPAD = 6
|
13
|
-
BAM_CEQUAL = 7
|
14
|
-
BAM_CDIFF = 8
|
15
|
-
BAM_CBACK = 9
|
16
|
-
|
17
|
-
BAM_CIGAR_STR = 'MIDNSHP=XB'
|
18
|
-
BAM_CIGAR_STR_PADDED = 'MIDNSHP=XB??????'
|
19
|
-
BAM_CIGAR_SHIFT = 4
|
20
|
-
BAM_CIGAR_MASK = 0xf
|
21
|
-
BAM_CIGAR_TYPE = 0x3C1A7
|
22
|
-
|
23
|
-
# macros
|
24
|
-
class << self
|
25
|
-
def bam_cigar_op(c)
|
26
|
-
c & BAM_CIGAR_MASK
|
27
|
-
end
|
28
|
-
|
29
|
-
def bam_cigar_oplen(c)
|
30
|
-
c >> BAM_CIGAR_SHIFT
|
31
|
-
end
|
32
|
-
|
33
|
-
def bam_cigar_opchr(c)
|
34
|
-
("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
|
35
|
-
end
|
36
|
-
|
37
|
-
def bam_cigar_gen(l, o)
|
38
|
-
l << BAM_CIGAR_SHIFT | o
|
39
|
-
end
|
40
|
-
|
41
|
-
def bam_cigar_type(o)
|
42
|
-
BAM_CIGAR_TYPE >> (o << 1) & 3
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
BAM_FPAIRED = 1
|
47
|
-
BAM_FPROPER_PAIR = 2
|
48
|
-
BAM_FUNMAP = 4
|
49
|
-
BAM_FMUNMAP = 8
|
50
|
-
BAM_FREVERSE = 16
|
51
|
-
BAM_FMREVERSE = 32
|
52
|
-
BAM_FREAD1 = 64
|
53
|
-
BAM_FREAD2 = 128
|
54
|
-
BAM_FSECONDARY = 256
|
55
|
-
BAM_FQCFAIL = 512
|
56
|
-
BAM_FDUP = 1024
|
57
|
-
BAM_FSUPPLEMENTARY = 2048
|
58
|
-
|
59
|
-
# macros
|
60
|
-
# function-like macros
|
61
|
-
class << self
|
62
|
-
def bam_is_rev(b)
|
63
|
-
b[:core][:flag] & BAM_FREVERSE != 0
|
64
|
-
end
|
65
|
-
|
66
|
-
def bam_is_mrev(b)
|
67
|
-
b[:core][:flag] & BAM_FMREVERSE != 0
|
68
|
-
end
|
69
|
-
|
70
|
-
def bam_get_qname(b)
|
71
|
-
b[:data]
|
72
|
-
end
|
73
|
-
|
74
|
-
def bam_get_cigar(b)
|
75
|
-
b[:data] + b[:core][:l_qname]
|
76
|
-
end
|
77
|
-
|
78
|
-
def bam_get_seq(b)
|
79
|
-
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname]
|
80
|
-
end
|
81
|
-
|
82
|
-
def bam_get_qual(b)
|
83
|
-
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1)
|
84
|
-
end
|
85
|
-
|
86
|
-
def bam_get_aux(b)
|
87
|
-
b[:data] + (b[:core][:n_cigar] << 2) + b[:core][:l_qname] + ((b[:core][:l_qseq] + 1) >> 1) + b[:core][:l_qseq]
|
88
|
-
end
|
89
|
-
|
90
|
-
def bam_get_l_aux(b)
|
91
|
-
b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
|
92
|
-
end
|
93
|
-
|
94
|
-
def bam_seqi(s, i)
|
95
|
-
s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
|
96
|
-
end
|
97
|
-
|
98
|
-
# def bam_set_seqi(s, i, b)
|
99
|
-
end
|
100
|
-
|
4
|
+
module LibHTS
|
101
5
|
# Generates a new unpopulated header structure.
|
102
6
|
attach_function \
|
103
7
|
:sam_hdr_init,
|
@@ -441,7 +345,7 @@ module HTS
|
|
441
345
|
# Get the next read from a SAM/BAM/CRAM iterator
|
442
346
|
def self.sam_itr_next(htsfp, itr, r)
|
443
347
|
# FIXME: check if htsfp is compressed BGZF
|
444
|
-
hts_log_error(
|
348
|
+
hts_log_error("Null iterator") if itr.null?
|
445
349
|
# FIXME: check multi
|
446
350
|
hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
|
447
351
|
end
|
@@ -518,7 +422,7 @@ module HTS
|
|
518
422
|
attach_function \
|
519
423
|
:bam_aux2A,
|
520
424
|
[:pointer],
|
521
|
-
:
|
425
|
+
:char
|
522
426
|
|
523
427
|
# Get a string aux value
|
524
428
|
attach_function \
|
@@ -584,52 +488,52 @@ module HTS
|
|
584
488
|
attach_function \
|
585
489
|
:bam_plp_init,
|
586
490
|
%i[bam_plp_auto_f pointer],
|
587
|
-
|
491
|
+
:bam_plp
|
588
492
|
|
589
493
|
attach_function \
|
590
494
|
:bam_plp_destroy,
|
591
|
-
[
|
495
|
+
[:bam_plp],
|
592
496
|
:void
|
593
497
|
|
594
498
|
attach_function \
|
595
499
|
:bam_plp_push,
|
596
|
-
[
|
500
|
+
[:bam_plp, Bam1],
|
597
501
|
:int
|
598
502
|
|
599
503
|
attach_function \
|
600
504
|
:bam_plp_next,
|
601
|
-
[
|
602
|
-
|
505
|
+
%i[bam_plp pointer pointer pointer],
|
506
|
+
BamPileup1.by_ref
|
603
507
|
|
604
508
|
attach_function \
|
605
509
|
:bam_plp_auto,
|
606
|
-
[
|
607
|
-
|
510
|
+
%i[bam_plp pointer pointer pointer],
|
511
|
+
BamPileup1.by_ref
|
608
512
|
|
609
513
|
attach_function \
|
610
514
|
:bam_plp64_next,
|
611
|
-
[
|
612
|
-
|
515
|
+
%i[bam_plp pointer pointer pointer],
|
516
|
+
BamPileup1.by_ref
|
613
517
|
|
614
518
|
attach_function \
|
615
519
|
:bam_plp64_auto,
|
616
|
-
[
|
617
|
-
|
520
|
+
%i[bam_plp pointer pointer pointer],
|
521
|
+
BamPileup1.by_ref
|
618
522
|
|
619
523
|
attach_function \
|
620
524
|
:bam_plp_set_maxcnt,
|
621
|
-
[
|
525
|
+
%i[bam_plp int],
|
622
526
|
:void
|
623
527
|
|
624
528
|
attach_function \
|
625
529
|
:bam_plp_reset,
|
626
|
-
[
|
530
|
+
[:bam_plp],
|
627
531
|
:void
|
628
532
|
|
629
533
|
# sets a callback to initialise any per-pileup1_t fields.
|
630
534
|
attach_function \
|
631
535
|
:bam_plp_insertion,
|
632
|
-
[
|
536
|
+
[BamPileup1, KString, :pointer],
|
633
537
|
:int
|
634
538
|
|
635
539
|
# sets a callback to initialise any per-pileup1_t fields.
|
@@ -643,36 +547,36 @@ module HTS
|
|
643
547
|
attach_function \
|
644
548
|
:bam_mplp_init,
|
645
549
|
%i[int bam_plp_auto_f pointer],
|
646
|
-
|
550
|
+
:bam_mplp
|
647
551
|
|
648
552
|
attach_function \
|
649
553
|
:bam_mplp_init_overlaps,
|
650
|
-
[
|
554
|
+
[:bam_mplp],
|
651
555
|
:int
|
652
556
|
|
653
557
|
attach_function \
|
654
558
|
:bam_mplp_destroy,
|
655
|
-
[
|
559
|
+
[:bam_mplp],
|
656
560
|
:void
|
657
561
|
|
658
562
|
attach_function \
|
659
563
|
:bam_mplp_set_maxcnt,
|
660
|
-
[
|
564
|
+
%i[bam_mplp int],
|
661
565
|
:void
|
662
566
|
|
663
567
|
attach_function \
|
664
568
|
:bam_mplp_auto,
|
665
|
-
[
|
569
|
+
%i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
|
666
570
|
:int
|
667
571
|
|
668
572
|
attach_function \
|
669
573
|
:bam_mplp64_auto,
|
670
|
-
[
|
574
|
+
%i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
|
671
575
|
:int
|
672
576
|
|
673
577
|
attach_function \
|
674
578
|
:bam_mplp_reset,
|
675
|
-
[
|
579
|
+
[:bam_mplp],
|
676
580
|
:void
|
677
581
|
|
678
582
|
# bam_mplp_constructor
|
@@ -0,0 +1,226 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "vcf_funcs"
|
4
|
+
|
5
|
+
module HTS
|
6
|
+
module LibHTS
|
7
|
+
# constants
|
8
|
+
BCF_HL_FLT = 0 # header line
|
9
|
+
BCF_HL_INFO = 1
|
10
|
+
BCF_HL_FMT = 2
|
11
|
+
BCF_HL_CTG = 3
|
12
|
+
BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
|
13
|
+
BCF_HL_GEN = 5 # generic header line
|
14
|
+
|
15
|
+
BCF_HT_FLAG = 0 # header type
|
16
|
+
BCF_HT_INT = 1
|
17
|
+
BCF_HT_REAL = 2
|
18
|
+
BCF_HT_STR = 3
|
19
|
+
BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
|
20
|
+
|
21
|
+
BCF_VL_FIXED = 0 # variable length
|
22
|
+
BCF_VL_VAR = 1
|
23
|
+
BCF_VL_A = 2
|
24
|
+
BCF_VL_G = 3
|
25
|
+
BCF_VL_R = 4
|
26
|
+
|
27
|
+
BCF_DT_ID = 0 # dictionary type
|
28
|
+
BCF_DT_CTG = 1
|
29
|
+
BCF_DT_SAMPLE = 2
|
30
|
+
|
31
|
+
BCF_BT_NULL = 0
|
32
|
+
BCF_BT_INT8 = 1
|
33
|
+
BCF_BT_INT16 = 2
|
34
|
+
BCF_BT_INT32 = 3
|
35
|
+
BCF_BT_INT64 = 4 # Unofficial, for internal use only.
|
36
|
+
BCF_BT_FLOAT = 5
|
37
|
+
BCF_BT_CHAR = 7
|
38
|
+
|
39
|
+
VCF_REF = 0
|
40
|
+
VCF_SNP = 1
|
41
|
+
VCF_MNP = 2
|
42
|
+
VCF_INDEL = 4
|
43
|
+
VCF_OTHER = 8
|
44
|
+
VCF_BND = 16 # breakend
|
45
|
+
VCF_OVERLAP = 32 # overlapping deletion, ALT=*
|
46
|
+
|
47
|
+
BCF1_DIRTY_ID = 1
|
48
|
+
BCF1_DIRTY_ALS = 2
|
49
|
+
BCF1_DIRTY_FLT = 4
|
50
|
+
BCF1_DIRTY_INF = 8
|
51
|
+
|
52
|
+
BCF_ERR_CTG_UNDEF = 1
|
53
|
+
BCF_ERR_TAG_UNDEF = 2
|
54
|
+
BCF_ERR_NCOLS = 4
|
55
|
+
BCF_ERR_LIMITS = 8
|
56
|
+
BCF_ERR_CHAR = 16
|
57
|
+
BCF_ERR_CTG_INVALID = 32
|
58
|
+
BCF_ERR_TAG_INVALID = 64
|
59
|
+
|
60
|
+
# macros
|
61
|
+
class << self
|
62
|
+
alias bcf_init1 bcf_init
|
63
|
+
alias bcf_read1 bcf_read
|
64
|
+
alias vcf_read1 vcf_read
|
65
|
+
alias bcf_write1 bcf_write
|
66
|
+
alias vcf_write1 vcf_write
|
67
|
+
alias bcf_destroy1 bcf_destroy
|
68
|
+
alias bcf_empty1 bcf_empty
|
69
|
+
alias vcf_parse1 vcf_parse
|
70
|
+
alias bcf_clear1 bcf_clear
|
71
|
+
alias vcf_format1 vcf_format
|
72
|
+
|
73
|
+
alias bcf_open hts_open
|
74
|
+
alias vcf_open hts_open
|
75
|
+
if respond_to?(:hts_flush)
|
76
|
+
alias bcf_flush hts_flush
|
77
|
+
alias vcf_flush hts_flush
|
78
|
+
end
|
79
|
+
alias bcf_close hts_close
|
80
|
+
alias vcf_close hts_close
|
81
|
+
end
|
82
|
+
|
83
|
+
BCF_UN_STR = 1 # up to ALT inclusive
|
84
|
+
BCF_UN_FLT = 2 # up to FILTER
|
85
|
+
BCF_UN_INFO = 4 # up to INFO
|
86
|
+
BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
|
87
|
+
BCF_UN_FMT = 8 # unpack format and each sample
|
88
|
+
BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
|
89
|
+
BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
|
90
|
+
|
91
|
+
class << self
|
92
|
+
def bcf_hdr_nsamples(hdr)
|
93
|
+
hdr[:n][BCF_DT_SAMPLE]
|
94
|
+
end
|
95
|
+
|
96
|
+
def bcf_update_info_int32(hdr, line, key, values, n)
|
97
|
+
bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
|
98
|
+
end
|
99
|
+
|
100
|
+
def bcf_update_info_float(hdr, line, key, values, n)
|
101
|
+
bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
|
102
|
+
end
|
103
|
+
|
104
|
+
def bcf_update_info_flag(hdr, line, key, string, n)
|
105
|
+
bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
|
106
|
+
end
|
107
|
+
|
108
|
+
def bcf_update_info_string(hdr, line, key, string)
|
109
|
+
bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
|
110
|
+
end
|
111
|
+
|
112
|
+
def bcf_update_format_int32(hdr, line, key, values, n)
|
113
|
+
bcf_update_format(hdr, line, key, values, n,
|
114
|
+
BCF_HT_INT)
|
115
|
+
end
|
116
|
+
|
117
|
+
def bcf_update_format_float(hdr, line, key, values, n)
|
118
|
+
bcf_update_format(hdr, line, key, values, n,
|
119
|
+
BCF_HT_REAL)
|
120
|
+
end
|
121
|
+
|
122
|
+
def bcf_update_format_char(hdr, line, key, values, n)
|
123
|
+
bcf_update_format(hdr, line, key, values, n,
|
124
|
+
BCF_HT_STR)
|
125
|
+
end
|
126
|
+
|
127
|
+
def bcf_update_genotypes(hdr, line, gts, n)
|
128
|
+
bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
|
129
|
+
end
|
130
|
+
|
131
|
+
def bcf_gt_phased(idx)
|
132
|
+
((idx + 1) << 1 | 1)
|
133
|
+
end
|
134
|
+
|
135
|
+
def bcf_gt_unphased(idx)
|
136
|
+
((idx + 1) << 1)
|
137
|
+
end
|
138
|
+
|
139
|
+
def bcf_gt_missing
|
140
|
+
0
|
141
|
+
end
|
142
|
+
|
143
|
+
def bcf_gt_is_missing(val)
|
144
|
+
((val) >> 1 ? 0 : 1)
|
145
|
+
end
|
146
|
+
|
147
|
+
def bcf_gt_is_phased(idx)
|
148
|
+
((idx) & 1)
|
149
|
+
end
|
150
|
+
|
151
|
+
def bcf_gt_allele(val)
|
152
|
+
(((val) >> 1) - 1)
|
153
|
+
end
|
154
|
+
|
155
|
+
def bcf_alleles2gt(a, b)
|
156
|
+
((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
|
157
|
+
end
|
158
|
+
|
159
|
+
def bcf_get_info_int32(hdr, line, tag, dst, ndst)
|
160
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
161
|
+
end
|
162
|
+
|
163
|
+
def bcf_get_info_float(hdr, line, tag, dst, ndst)
|
164
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
165
|
+
end
|
166
|
+
|
167
|
+
def bcf_get_info_string(hdr, line, tag, dst, ndst)
|
168
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
|
169
|
+
end
|
170
|
+
|
171
|
+
def bcf_get_info_flag(hdr, line, tag, dst, ndst)
|
172
|
+
bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
|
173
|
+
end
|
174
|
+
|
175
|
+
def bcf_get_format_int32(hdr, line, tag, dst, ndst)
|
176
|
+
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
|
177
|
+
end
|
178
|
+
|
179
|
+
def bcf_get_format_float(hdr, line, tag, dst, ndst)
|
180
|
+
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
|
181
|
+
end
|
182
|
+
|
183
|
+
def bcf_get_format_char(hdr, line, tag, dst, ndst)
|
184
|
+
bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
|
185
|
+
end
|
186
|
+
|
187
|
+
def bcf_get_genotypes(hdr, line, dst, ndst)
|
188
|
+
bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
|
189
|
+
end
|
190
|
+
|
191
|
+
def bcf_hdr_int2id(hdr, type, int_id)
|
192
|
+
LibHTS::BcfIdpair.new(
|
193
|
+
hdr[:id][type].to_ptr +
|
194
|
+
LibHTS::BcfIdpair.size * int_id # offsets
|
195
|
+
)[:key]
|
196
|
+
end
|
197
|
+
|
198
|
+
def bcf_hdr_name2id(hdr, id)
|
199
|
+
bcf_hdr_id2int(hdr, BCF_DT_CTG, id)
|
200
|
+
end
|
201
|
+
|
202
|
+
def bcf_hdr_id2name(hdr, rid)
|
203
|
+
return nil if hdr.null? || rid < 0 || rid >= hdr[:n][LibHTS::BCF_DT_CTG]
|
204
|
+
|
205
|
+
LibHTS::BcfIdpair.new(
|
206
|
+
hdr[:id][LibHTS::BCF_DT_CTG].to_ptr +
|
207
|
+
LibHTS::BcfIdpair.size * rid # offset
|
208
|
+
)[:key]
|
209
|
+
end
|
210
|
+
|
211
|
+
def bcf_hdr_id2length(hdr, type, int_id)
|
212
|
+
LibHTS::BcfIdpair.new(
|
213
|
+
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
214
|
+
LibHTS::BcfIdpair.size * int_id # offset
|
215
|
+
)[:val][:info][type] >> 8 & 0xf
|
216
|
+
end
|
217
|
+
|
218
|
+
def bcf_hdr_id2number(hdr, type, int_id)
|
219
|
+
LibHTS::BcfIdpair.new(
|
220
|
+
hdr[:id][LibHTS::BCF_DT_ID].to_ptr +
|
221
|
+
LibHTS::BcfIdpair.size * int_id # offset
|
222
|
+
)[:val][:info][type] >> 12
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
end
|
@@ -1,76 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
module HTS
|
4
|
-
module
|
5
|
-
# constants
|
6
|
-
BCF_HL_FLT = 0 # header line
|
7
|
-
BCF_HL_INFO = 1
|
8
|
-
BCF_HL_FMT = 2
|
9
|
-
BCF_HL_CTG = 3
|
10
|
-
BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
|
11
|
-
BCF_HL_GEN = 5 # generic header line
|
12
|
-
BCF_HT_FLAG = 0 # header type
|
13
|
-
|
14
|
-
BCF_HT_INT = 1
|
15
|
-
BCF_HT_REAL = 2
|
16
|
-
BCF_HT_STR = 3
|
17
|
-
BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
|
18
|
-
|
19
|
-
BCF_VL_FIXED = 0 # variable length
|
20
|
-
BCF_VL_VAR = 1
|
21
|
-
BCF_VL_A = 2
|
22
|
-
BCF_VL_G = 3
|
23
|
-
BCF_VL_R = 4
|
24
|
-
|
25
|
-
BCF_DT_ID = 0 # dictionary type
|
26
|
-
BCF_DT_CTG = 1
|
27
|
-
BCF_DT_SAMPLE = 2
|
28
|
-
|
29
|
-
BCF_BT_NULL = 0
|
30
|
-
BCF_BT_INT8 = 1
|
31
|
-
BCF_BT_INT16 = 2
|
32
|
-
BCF_BT_INT32 = 3
|
33
|
-
BCF_BT_INT64 = 4 # Unofficial, for internal use only.
|
34
|
-
BCF_BT_FLOAT = 5
|
35
|
-
BCF_BT_CHAR = 7
|
36
|
-
|
37
|
-
VCF_REF = 0
|
38
|
-
VCF_SNP = 1
|
39
|
-
VCF_MNP = 2
|
40
|
-
VCF_INDEL = 4
|
41
|
-
VCF_OTHER = 8
|
42
|
-
VCF_BND = 16 # breakend
|
43
|
-
VCF_OVERLAP = 32 # overlapping deletion, ALT=*
|
44
|
-
|
45
|
-
BCF1_DIRTY_ID = 1
|
46
|
-
BCF1_DIRTY_ALS = 2
|
47
|
-
BCF1_DIRTY_FLT = 4
|
48
|
-
BCF1_DIRTY_INF = 8
|
49
|
-
|
50
|
-
BCF_ERR_CTG_UNDEF = 1
|
51
|
-
BCF_ERR_TAG_UNDEF = 2
|
52
|
-
BCF_ERR_NCOLS = 4
|
53
|
-
BCF_ERR_LIMITS = 8
|
54
|
-
BCF_ERR_CHAR = 16
|
55
|
-
BCF_ERR_CTG_INVALID = 32
|
56
|
-
BCF_ERR_TAG_INVALID = 64
|
57
|
-
|
58
|
-
# macros
|
59
|
-
class << self
|
60
|
-
def bcf_hdr_nsamples(hdr)
|
61
|
-
hdr[:n][BCF_DT_SAMPLE]
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# constants
|
66
|
-
BCF_UN_STR = 1 # up to ALT inclusive
|
67
|
-
BCF_UN_FLT = 2 # up to FILTER
|
68
|
-
BCF_UN_INFO = 4 # up to INFO
|
69
|
-
BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
|
70
|
-
BCF_UN_FMT = 8 # unpack format and each sample
|
71
|
-
BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
|
72
|
-
BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
|
73
|
-
|
4
|
+
module LibHTS
|
74
5
|
attach_function \
|
75
6
|
:bcf_hdr_init,
|
76
7
|
[:string],
|
data/lib/hts/libhts.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "ffi_ext/struct"
|
4
|
+
|
5
|
+
module HTS
|
6
|
+
module LibHTS
|
7
|
+
extend FFI::Library
|
8
|
+
|
9
|
+
begin
|
10
|
+
ffi_lib HTS.lib_path
|
11
|
+
rescue LoadError => e
|
12
|
+
raise LoadError, "#{e}\nCould not find #{HTS.lib_path}"
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.attach_function(*)
|
16
|
+
super
|
17
|
+
rescue FFI::NotFoundError => e
|
18
|
+
warn e.message
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
require_relative "libhts/constants"
|
24
|
+
|
25
|
+
# This is alphabetical order.
|
26
|
+
require_relative "libhts/bgzf"
|
27
|
+
require_relative "libhts/faidx"
|
28
|
+
require_relative "libhts/hfile"
|
29
|
+
require_relative "libhts/hts"
|
30
|
+
require_relative "libhts/sam"
|
31
|
+
require_relative "libhts/kfunc"
|
32
|
+
require_relative "libhts/tbx"
|
33
|
+
require_relative "libhts/vcf"
|
data/lib/hts/tabix.rb
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# Based on hts-python
|
4
|
+
# https://github.com/quinlan-lab/hts-python
|
5
|
+
|
6
|
+
module HTS
|
7
|
+
class Tabix
|
8
|
+
class << self
|
9
|
+
alias open new
|
10
|
+
end
|
11
|
+
def initialize
|
12
|
+
# IO like API
|
13
|
+
if block_given?
|
14
|
+
begin
|
15
|
+
yield self
|
16
|
+
ensure
|
17
|
+
close
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def build; end
|
23
|
+
|
24
|
+
def sequences; end
|
25
|
+
|
26
|
+
# def __call__\
|
27
|
+
end
|
28
|
+
end
|
data/lib/hts/version.rb
CHANGED
data/lib/htslib.rb
CHANGED
@@ -1,27 +1,27 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require
|
3
|
+
require "ffi"
|
4
4
|
|
5
|
-
require
|
5
|
+
require "hts/version"
|
6
6
|
|
7
7
|
module HTS
|
8
8
|
class Error < StandardError; end
|
9
9
|
|
10
10
|
class << self
|
11
|
-
attr_accessor :
|
11
|
+
attr_accessor :lib_path
|
12
12
|
|
13
13
|
def search_htslib(name = nil)
|
14
|
-
name ||= "libhts.#{
|
15
|
-
lib_path = if ENV[
|
16
|
-
File.expand_path(name, ENV[
|
14
|
+
name ||= "libhts.#{FFI::Platform::LIBSUFFIX}"
|
15
|
+
lib_path = if ENV["HTSLIBDIR"]
|
16
|
+
File.expand_path(name, ENV["HTSLIBDIR"])
|
17
17
|
else
|
18
18
|
File.expand_path("../vendor/#{name}", __dir__)
|
19
19
|
end
|
20
20
|
return lib_path if File.exist?(lib_path)
|
21
21
|
|
22
22
|
begin
|
23
|
-
require
|
24
|
-
lib_dir = PKGConfig.variable(
|
23
|
+
require "pkg-config"
|
24
|
+
lib_dir = PKGConfig.variable("htslib", "libdir")
|
25
25
|
lib_path = File.expand_path(name, lib_dir)
|
26
26
|
rescue PackageConfig::NotFoundError
|
27
27
|
warn "htslib.pc was not found in the pkg-config search path."
|
@@ -32,17 +32,14 @@ module HTS
|
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
|
-
self.
|
35
|
+
self.lib_path = search_htslib
|
36
36
|
|
37
|
-
# You can change the path of the shared library with `HTS.
|
38
|
-
# before calling the
|
39
|
-
autoload :
|
37
|
+
# You can change the path of the shared library with `HTS.lib_path=`
|
38
|
+
# before calling the LibHTS module.
|
39
|
+
autoload :LibHTS, "hts/libhts"
|
40
40
|
end
|
41
41
|
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
require_relative
|
46
|
-
require_relative 'hts/fai'
|
47
|
-
require_relative 'hts/tbx'
|
48
|
-
require_relative 'hts/vcf'
|
42
|
+
require_relative "hts/bam"
|
43
|
+
require_relative "hts/faidx"
|
44
|
+
require_relative "hts/tabix"
|
45
|
+
require_relative "hts/bcf"
|