htslib 0.0.0 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,7 +1,24 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
+ # constants
6
+ BAM_CMATCH = 0
7
+ BAM_CINS = 1
8
+ BAM_CDEL = 2
9
+ BAM_CREF_SKIP = 3
10
+ BAM_CSOFT_CLIP = 4
11
+ BAM_CHARD_CLIP = 5
12
+ BAM_CPAD = 6
13
+ BAM_CEQUAL = 7
14
+ BAM_CDIFF = 8
15
+ BAM_CBACK = 9
16
+
17
+ BAM_CIGAR_STR = "MIDNSHP=XB"
18
+ BAM_CIGAR_SHIFT = 4
19
+ BAM_CIGAR_MASK = 0xf
20
+ BAM_CIGAR_TYPE = 0x3C1A7
21
+
5
22
  # macros
6
23
  class << self
7
24
  def bam_cigar_op(c)
@@ -12,8 +29,8 @@ module HTS
12
29
  c >> BAM_CIGAR_SHIFT
13
30
  end
14
31
 
15
- def bam_cigar_opchar(c)
16
- _BAM_CIGAR_STR_PADDED[bam_cigar_op(c)]
32
+ def bam_cigar_opchr(c)
33
+ ("#{BAM_CIGAR_STR}??????")[bam_cigar_op(c)]
17
34
  end
18
35
 
19
36
  def bam_cigar_gen(l, o)
@@ -25,6 +42,19 @@ module HTS
25
42
  end
26
43
  end
27
44
 
45
+ BAM_FPAIRED = 1
46
+ BAM_FPROPER_PAIR = 2
47
+ BAM_FUNMAP = 4
48
+ BAM_FMUNMAP = 8
49
+ BAM_FREVERSE = 16
50
+ BAM_FMREVERSE = 32
51
+ BAM_FREAD1 = 64
52
+ BAM_FREAD2 = 128
53
+ BAM_FSECONDARY = 256
54
+ BAM_FQCFAIL = 512
55
+ BAM_FDUP = 1024
56
+ BAM_FSUPPLEMENTARY = 2048
57
+
28
58
  # macros
29
59
  # function-like macros
30
60
  class << self
@@ -60,7 +90,9 @@ module HTS
60
90
  b[:l_data] - (b[:core][:n_cigar] << 2) - b[:core][:l_qname] - b[:core][:l_qseq] - ((b[:core][:l_qseq] + 1) >> 1)
61
91
  end
62
92
 
63
- # def bam_seqi(s, i)
93
+ def bam_seqi(s, i)
94
+ s[(i) >> 1].read_uint8 >> ((~i & 1) << 2) & 0xf
95
+ end
64
96
 
65
97
  # def bam_set_seqi(s, i, b)
66
98
  end
@@ -146,13 +178,13 @@ module HTS
146
178
  # Returns a complete line of formatted text for a given type and ID.
147
179
  attach_function \
148
180
  :sam_hdr_find_line_id,
149
- [SamHdr, :string, :string, :string, Kstring],
181
+ [SamHdr, :string, :string, :string, KString],
150
182
  :int
151
183
 
152
184
  # Returns a complete line of formatted text for a given type and index.
153
185
  attach_function \
154
186
  :sam_hdr_find_line_pos,
155
- [SamHdr, :string, :int, Kstring],
187
+ [SamHdr, :string, :int, KString],
156
188
  :int
157
189
 
158
190
  # Remove a line with given type / id from a header
@@ -206,13 +238,13 @@ module HTS
206
238
  # Return the value associated with a key for a header line identified by ID_key:ID_val
207
239
  attach_function \
208
240
  :sam_hdr_find_tag_id,
209
- [SamHdr, :string, :string, :string, :string, Kstring],
241
+ [SamHdr, :string, :string, :string, :string, KString],
210
242
  :int
211
243
 
212
244
  # Return the value associated with a key for a header line identified by position
213
245
  attach_function \
214
246
  :sam_hdr_find_tag_pos,
215
- [SamHdr, :string, :int, :string, Kstring],
247
+ [SamHdr, :string, :int, :string, KString],
216
248
  :int
217
249
 
218
250
  # Remove the key from the line identified by type, ID_key and ID_value.
@@ -405,6 +437,14 @@ module HTS
405
437
  [HtsIdx, SamHdr, :pointer, :uint],
406
438
  HtsItr.by_ref
407
439
 
440
+ # Get the next read from a SAM/BAM/CRAM iterator
441
+ def self.sam_itr_next(htsfp, itr, r)
442
+ # FIXME: check if htsfp is compressed BGZF
443
+ hts_log_error("Null iterator") if itr.null?
444
+ # FIXME: check multi
445
+ hts_itr_next(htsfp[:fp][:bgzf], itr, r, htsfp)
446
+ end
447
+
408
448
  attach_function \
409
449
  :sam_parse_region,
410
450
  [SamHdr, :string, :pointer, :pointer, :pointer, :int],
@@ -435,12 +475,12 @@ module HTS
435
475
 
436
476
  attach_function \
437
477
  :sam_parse1,
438
- [Kstring, SamHdr, Bam1],
478
+ [KString, SamHdr, Bam1],
439
479
  :int
440
480
 
441
481
  attach_function \
442
482
  :sam_format1,
443
- [SamHdr, Bam1, Kstring],
483
+ [SamHdr, Bam1, KString],
444
484
  :int
445
485
 
446
486
  # Read a record from a file
@@ -543,52 +583,52 @@ module HTS
543
583
  attach_function \
544
584
  :bam_plp_init,
545
585
  %i[bam_plp_auto_f pointer],
546
- BamPlp
586
+ :bam_plp
547
587
 
548
588
  attach_function \
549
589
  :bam_plp_destroy,
550
- [BamPlp],
590
+ [:bam_plp],
551
591
  :void
552
592
 
553
593
  attach_function \
554
594
  :bam_plp_push,
555
- [BamPlp, Bam1],
595
+ [:bam_plp, Bam1],
556
596
  :int
557
597
 
558
598
  attach_function \
559
599
  :bam_plp_next,
560
- [BamPlp, :pointer, :pointer, :pointer],
561
- :pointer
600
+ %i[bam_plp pointer pointer pointer],
601
+ BamPileup1.by_ref
562
602
 
563
603
  attach_function \
564
604
  :bam_plp_auto,
565
- [BamPlp, :pointer, :pointer, :pointer],
566
- :pointer
605
+ %i[bam_plp pointer pointer pointer],
606
+ BamPileup1.by_ref
567
607
 
568
608
  attach_function \
569
609
  :bam_plp64_next,
570
- [BamPlp, :pointer, :pointer, :pointer],
571
- :pointer
610
+ %i[bam_plp pointer pointer pointer],
611
+ BamPileup1.by_ref
572
612
 
573
613
  attach_function \
574
614
  :bam_plp64_auto,
575
- [BamPlp, :pointer, :pointer, :pointer],
576
- :pointer
615
+ %i[bam_plp pointer pointer pointer],
616
+ BamPileup1.by_ref
577
617
 
578
618
  attach_function \
579
619
  :bam_plp_set_maxcnt,
580
- [BamPlp, :int],
620
+ %i[bam_plp int],
581
621
  :void
582
622
 
583
623
  attach_function \
584
624
  :bam_plp_reset,
585
- [BamPlp],
625
+ [:bam_plp],
586
626
  :void
587
627
 
588
628
  # sets a callback to initialise any per-pileup1_t fields.
589
629
  attach_function \
590
630
  :bam_plp_insertion,
591
- [:pointer, Kstring, :pointer],
631
+ [BamPileup1, KString, :pointer],
592
632
  :int
593
633
 
594
634
  # sets a callback to initialise any per-pileup1_t fields.
@@ -602,36 +642,36 @@ module HTS
602
642
  attach_function \
603
643
  :bam_mplp_init,
604
644
  %i[int bam_plp_auto_f pointer],
605
- BamMplp.by_ref
645
+ :bam_mplp
606
646
 
607
647
  attach_function \
608
648
  :bam_mplp_init_overlaps,
609
- [BamMplp],
649
+ [:bam_mplp],
610
650
  :int
611
651
 
612
652
  attach_function \
613
653
  :bam_mplp_destroy,
614
- [BamMplp],
654
+ [:bam_mplp],
615
655
  :void
616
656
 
617
657
  attach_function \
618
658
  :bam_mplp_set_maxcnt,
619
- [BamMplp, :int],
659
+ %i[bam_mplp int],
620
660
  :void
621
661
 
622
662
  attach_function \
623
663
  :bam_mplp_auto,
624
- [BamMplp, :pointer, :pointer, :pointer, :pointer],
664
+ %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
625
665
  :int
626
666
 
627
667
  attach_function \
628
668
  :bam_mplp64_auto,
629
- [BamMplp, :pointer, :pointer, :pointer, :pointer],
669
+ %i[bam_mplp pointer pointer pointer pointer], # BamPileup1T
630
670
  :int
631
671
 
632
672
  attach_function \
633
673
  :bam_mplp_reset,
634
- [BamMplp],
674
+ [:bam_mplp],
635
675
  :void
636
676
 
637
677
  # bam_mplp_constructor
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
5
  attach_function \
6
6
  :tbx_name2id,
7
7
  [Tbx, :string],
@@ -1,7 +1,183 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- module FFI
4
+ module LibHTS
5
+ # constants
6
+ BCF_HL_FLT = 0 # header line
7
+ BCF_HL_INFO = 1
8
+ BCF_HL_FMT = 2
9
+ BCF_HL_CTG = 3
10
+ BCF_HL_STR = 4 # structured header line TAG=<A=..,B=..>
11
+ BCF_HL_GEN = 5 # generic header line
12
+ BCF_HT_FLAG = 0 # header type
13
+
14
+ BCF_HT_INT = 1
15
+ BCF_HT_REAL = 2
16
+ BCF_HT_STR = 3
17
+ BCF_HT_LONG = (BCF_HT_INT | 0x100) # BCF_HT_INT, but for int64_t values; VCF only!
18
+
19
+ BCF_VL_FIXED = 0 # variable length
20
+ BCF_VL_VAR = 1
21
+ BCF_VL_A = 2
22
+ BCF_VL_G = 3
23
+ BCF_VL_R = 4
24
+
25
+ BCF_DT_ID = 0 # dictionary type
26
+ BCF_DT_CTG = 1
27
+ BCF_DT_SAMPLE = 2
28
+
29
+ BCF_BT_NULL = 0
30
+ BCF_BT_INT8 = 1
31
+ BCF_BT_INT16 = 2
32
+ BCF_BT_INT32 = 3
33
+ BCF_BT_INT64 = 4 # Unofficial, for internal use only.
34
+ BCF_BT_FLOAT = 5
35
+ BCF_BT_CHAR = 7
36
+
37
+ VCF_REF = 0
38
+ VCF_SNP = 1
39
+ VCF_MNP = 2
40
+ VCF_INDEL = 4
41
+ VCF_OTHER = 8
42
+ VCF_BND = 16 # breakend
43
+ VCF_OVERLAP = 32 # overlapping deletion, ALT=*
44
+
45
+ BCF1_DIRTY_ID = 1
46
+ BCF1_DIRTY_ALS = 2
47
+ BCF1_DIRTY_FLT = 4
48
+ BCF1_DIRTY_INF = 8
49
+
50
+ BCF_ERR_CTG_UNDEF = 1
51
+ BCF_ERR_TAG_UNDEF = 2
52
+ BCF_ERR_NCOLS = 4
53
+ BCF_ERR_LIMITS = 8
54
+ BCF_ERR_CHAR = 16
55
+ BCF_ERR_CTG_INVALID = 32
56
+ BCF_ERR_TAG_INVALID = 64
57
+
58
+ # macros
59
+ class << self
60
+ alias bcf_open hts_open
61
+ alias vcf_open hts_open
62
+ alias bcf_close hts_close
63
+ alias vcf_close hts_close
64
+
65
+ def bcf_hdr_nsamples(hdr)
66
+ hdr[:n][BCF_DT_SAMPLE]
67
+ end
68
+
69
+ def bcf_update_info_int32(hdr, line, key, values, n)
70
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_INT)
71
+ end
72
+
73
+ def bcf_update_info_float(hdr, line, key, values, n)
74
+ bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL)
75
+ end
76
+
77
+ def bcf_update_info_flag(hdr, line, key, string, n)
78
+ bcf_update_info(hdr, line, key, string, n, BCF_HT_FLAG)
79
+ end
80
+
81
+ def bcf_update_info_string(hdr, line, key, string)
82
+ bcf_update_info(hdr, line, key, string, 1, BCF_HT_STR)
83
+ end
84
+
85
+ def bcf_update_format_int32(hdr, line, key, values, n)
86
+ bcf_update_format(hdr, line, key, values, n,
87
+ BCF_HT_INT)
88
+ end
89
+
90
+ def bcf_update_format_float(hdr, line, key, values, n)
91
+ bcf_update_format(hdr, line, key, values, n,
92
+ BCF_HT_REAL)
93
+ end
94
+
95
+ def bcf_update_format_char(hdr, line, key, values, n)
96
+ bcf_update_format(hdr, line, key, values, n,
97
+ BCF_HT_STR)
98
+ end
99
+
100
+ def bcf_update_genotypes(hdr, line, gts, n)
101
+ bcf_update_format(hdr, line, "GT", gts, n, BCF_HT_INT)
102
+ end
103
+
104
+ def bcf_gt_phased(idx)
105
+ ((idx + 1) << 1 | 1)
106
+ end
107
+
108
+ def bcf_gt_unphased(idx)
109
+ ((idx + 1) << 1)
110
+ end
111
+
112
+ def bcf_gt_missing
113
+ 0
114
+ end
115
+
116
+ def bcf_gt_is_missing(val)
117
+ ((val) >> 1 ? 0 : 1)
118
+ end
119
+
120
+ def bcf_gt_is_phased(idx)
121
+ ((idx) & 1)
122
+ end
123
+
124
+ def bcf_gt_allele(val)
125
+ (((val) >> 1) - 1)
126
+ end
127
+
128
+ def bcf_alleles2gt(a, b)
129
+ ((a) > (b) ? (a * (a + 1) / 2 + b) : (b * (b + 1) / 2 + a))
130
+ end
131
+
132
+ def bcf_get_info_int32(hdr, line, tag, dst, ndst)
133
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
134
+ end
135
+
136
+ def bcf_get_info_float(hdr, line, tag, dst, ndst)
137
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
138
+ end
139
+
140
+ def bcf_get_info_string(hdr, line, tag, dst, ndst)
141
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
142
+ end
143
+
144
+ def bcf_get_info_flag(hdr, line, tag, dst, ndst)
145
+ bcf_get_info_values(hdr, line, tag, dst, ndst, BCF_HT_FLAG)
146
+ end
147
+
148
+ def bcf_get_format_int32(hdr, line, tag, dst, ndst)
149
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_INT)
150
+ end
151
+
152
+ def bcf_get_format_float(hdr, line, tag, dst, ndst)
153
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_REAL)
154
+ end
155
+
156
+ def bcf_get_format_char(hdr, line, tag, dst, ndst)
157
+ bcf_get_format_values(hdr, line, tag, dst, ndst, BCF_HT_STR)
158
+ end
159
+
160
+ def bcf_get_genotypes(hdr, line, dst, ndst)
161
+ bcf_get_format_values(hdr, line, "GT", dst, ndst, BCF_HT_INT)
162
+ end
163
+
164
+ def bcf_hdr_int2id(hdr, type, int_id)
165
+ LibHTS::BcfIdpair.new(
166
+ hdr[:id][type].to_ptr +
167
+ LibHTS::BcfIdpair.size * int_id # offsets
168
+ )[:key]
169
+ end
170
+ end
171
+
172
+ # constants
173
+ BCF_UN_STR = 1 # up to ALT inclusive
174
+ BCF_UN_FLT = 2 # up to FILTER
175
+ BCF_UN_INFO = 4 # up to INFO
176
+ BCF_UN_SHR = (BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO) # all shared information
177
+ BCF_UN_FMT = 8 # unpack format and each sample
178
+ BCF_UN_IND = BCF_UN_FMT # a synonym of BCF_UN_FMT
179
+ BCF_UN_ALL = (BCF_UN_SHR | BCF_UN_FMT) # everything
180
+
5
181
  attach_function \
6
182
  :bcf_hdr_init,
7
183
  [:string],
@@ -14,7 +190,7 @@ module HTS
14
190
 
15
191
  attach_function \
16
192
  :bcf_init,
17
- [:void],
193
+ [],
18
194
  Bcf1.by_ref
19
195
 
20
196
  attach_function \
@@ -54,7 +230,7 @@ module HTS
54
230
 
55
231
  attach_function \
56
232
  :vcf_parse,
57
- [Kstring, BcfHdr, Bcf1],
233
+ [KString, BcfHdr, Bcf1],
58
234
  :int
59
235
 
60
236
  attach_function \
@@ -64,7 +240,7 @@ module HTS
64
240
 
65
241
  attach_function \
66
242
  :vcf_format,
67
- [BcfHdr, Bcf1, Kstring],
243
+ [BcfHdr, Bcf1, KString],
68
244
  :int
69
245
 
70
246
  attach_function \
@@ -119,7 +295,7 @@ module HTS
119
295
 
120
296
  attach_function \
121
297
  :vcf_write_line,
122
- [HtsFile, Kstring],
298
+ [HtsFile, KString],
123
299
  :int
124
300
 
125
301
  attach_function \
@@ -144,7 +320,7 @@ module HTS
144
320
 
145
321
  attach_function \
146
322
  :bcf_hdr_format,
147
- [BcfHdr, :int, Kstring],
323
+ [BcfHdr, :int, KString],
148
324
  :int
149
325
 
150
326
  attach_function \
@@ -204,7 +380,7 @@ module HTS
204
380
 
205
381
  attach_function \
206
382
  :bcf_hrec_format,
207
- [BcfHrec, Kstring],
383
+ [BcfHrec, KString],
208
384
  :int
209
385
 
210
386
  attach_function \
@@ -369,27 +545,27 @@ module HTS
369
545
 
370
546
  attach_function \
371
547
  :bcf_fmt_array,
372
- [Kstring, :int, :int, :pointer],
548
+ [KString, :int, :int, :pointer],
373
549
  :int
374
550
 
375
551
  attach_function \
376
552
  :bcf_fmt_sized_array,
377
- [Kstring, :pointer],
553
+ [KString, :pointer],
378
554
  :uint8_t
379
555
 
380
556
  attach_function \
381
557
  :bcf_enc_vchar,
382
- [Kstring, :int, :string],
558
+ [KString, :int, :string],
383
559
  :int
384
560
 
385
561
  attach_function \
386
562
  :bcf_enc_vint,
387
- [Kstring, :int, :pointer, :int],
563
+ [KString, :int, :pointer, :int],
388
564
  :int
389
565
 
390
566
  attach_function \
391
567
  :bcf_enc_vfloat,
392
- [Kstring, :int, :pointer],
568
+ [KString, :int, :pointer],
393
569
  :int
394
570
 
395
571
  attach_function \
data/lib/hts/libhts.rb ADDED
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "ffi_ext/struct"
4
+
5
+ module HTS
6
+ module LibHTS
7
+ extend FFI::Library
8
+
9
+ begin
10
+ ffi_lib HTS.lib_path
11
+ rescue LoadError => e
12
+ raise LoadError, "#{e}\nCould not find #{HTS.lib_path}"
13
+ end
14
+
15
+ def self.attach_function(*)
16
+ super
17
+ rescue FFI::NotFoundError => e
18
+ warn e.message
19
+ end
20
+ end
21
+ end
22
+
23
+ require_relative "libhts/constants"
24
+
25
+ # This is alphabetical order.
26
+ require_relative "libhts/bgzf"
27
+ require_relative "libhts/faidx"
28
+ require_relative "libhts/hfile"
29
+ require_relative "libhts/hts"
30
+ require_relative "libhts/sam"
31
+ require_relative "libhts/kfunc"
32
+ require_relative "libhts/tbx"
33
+ require_relative "libhts/vcf"
data/lib/hts/tabix.rb ADDED
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-python
4
+ # https://github.com/quinlan-lab/hts-python
5
+
6
+ require_relative "utils/open_method"
7
+
8
+ module HTS
9
+ class Tabix
10
+ extend Utils::OpenMethod
11
+
12
+ def initialize; end
13
+
14
+ def build; end
15
+
16
+ def sequences; end
17
+
18
+ # def __call__\
19
+ end
20
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ module HTS
4
+ module Utils
5
+ module OpenMethod
6
+ def open(path)
7
+ object = new(path)
8
+ if block_given?
9
+ yield(object)
10
+ object.close
11
+ else
12
+ object
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
data/lib/hts/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module HTS
4
- VERSION = '0.0.0'
4
+ VERSION = "0.0.4"
5
5
  end
data/lib/htslib.rb CHANGED
@@ -1,18 +1,45 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'ffi'
3
+ require "ffi"
4
4
 
5
- require 'hts/version'
5
+ require "hts/version"
6
6
 
7
7
  module HTS
8
8
  class Error < StandardError; end
9
9
 
10
10
  class << self
11
- attr_accessor :ffi_lib
11
+ attr_accessor :lib_path
12
+
13
+ def search_htslib(name = nil)
14
+ name ||= "libhts.#{FFI::Platform::LIBSUFFIX}"
15
+ lib_path = if ENV["HTSLIBDIR"]
16
+ File.expand_path(name, ENV["HTSLIBDIR"])
17
+ else
18
+ File.expand_path("../vendor/#{name}", __dir__)
19
+ end
20
+ return lib_path if File.exist?(lib_path)
21
+
22
+ begin
23
+ require "pkg-config"
24
+ lib_dir = PKGConfig.variable("htslib", "libdir")
25
+ lib_path = File.expand_path(name, lib_dir)
26
+ rescue PackageConfig::NotFoundError
27
+ warn "htslib.pc was not found in the pkg-config search path."
28
+ end
29
+ return lib_path if File.exist?(lib_path)
30
+
31
+ warn "htslib shared library '#{name}' not found."
32
+ end
12
33
  end
13
- self.ffi_lib = File.expand_path("libhts.#{FFI::Platform::LIBSUFFIX}", ENV['HTSLIBDIR'])
14
- autoload :FFI, 'hts/ffi'
34
+
35
+ self.lib_path = search_htslib
36
+
37
+ # You can change the path of the shared library with `HTS.lib_path=`
38
+ # before calling the LibHTS module.
39
+ autoload :LibHTS, "hts/libhts"
15
40
  end
16
41
 
17
- # alias
18
- HTSlib = HTS
42
+ require_relative "hts/bam"
43
+ require_relative "hts/faidx"
44
+ require_relative "hts/tabix"
45
+ require_relative "hts/bcf"