htslib 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d4fe8c4a8f710ee3b35793e997f394ba8f80e0fe5b507768de2af2c5ab1297a0
4
- data.tar.gz: 1bb06ced745342f4de8665f046304364c9d419ae05eee355c3cd852d6b2b454a
3
+ metadata.gz: 30f42b474bc317136d665b00781fbfcb11caaf588b091e76bc86bf9cdf8d5e3f
4
+ data.tar.gz: d48e5f74fb0efed4de5af2955b0093d1eb7ef5ee7767bc7ee50bf3e296e7ce28
5
5
  SHA512:
6
- metadata.gz: 8c7fb677d2462a1ddf4cd146d9f9962b570f39762e98d893d77f89bbbf61c92329ed8391030f772bf4f98fb3dc874edc3c365dd2d85266e1e966b24effc1715c
7
- data.tar.gz: fee9ec647e3ef51e83a1aec1c62ac02c796dcfefe4d5fd6c901e4b04e4253619e7337cee651df1ece9ca58c74a7a50620befd5cd8ab823696d0f893a437bd562
6
+ metadata.gz: 6c1bf27a8fdc04a4a9ba678923df5bb579439c286802a5d1f2a4e6f11d7102217eafa0e4e42c2fa853e9ee82c706756315a0a1d6f97c5b5fab58ee909add4eb0
7
+ data.tar.gz: 59219371057e45cf31951eda2dae250acaedd12c593c09fb08f19720818be514797c57e9b45be8e1f6ea60f2fbc79fe6038a1aced6ba66d8f39a544eb6516a0a
data/README.md CHANGED
@@ -1,4 +1,4 @@
1
- # HTSlib
1
+ # ruby-htslib
2
2
 
3
3
  [![Gem Version](https://badge.fury.io/rb/htslib.svg)](https://badge.fury.io/rb/htslib)
4
4
  ![CI](https://github.com/kojix2/ruby-htslib/workflows/CI/badge.svg)
@@ -10,7 +10,7 @@
10
10
 
11
11
  :apple: Feel free to fork it out if you can develop it!
12
12
 
13
- :bowtie: Just a prototype. Pre-alpha stage.
13
+ :bowtie: alpha stage.
14
14
 
15
15
  ## Requirements
16
16
 
@@ -24,7 +24,7 @@
24
24
  gem install htslib
25
25
  ```
26
26
 
27
- If you installed htslib with Ubuntu/apt or Mac/homebrew, pkg-config will automatically detect the location of the shared library.
27
+ If you installed htslib with Ubuntu/apt or Mac/homebrew, [pkg-config](https://github.com/ruby-gnome/pkg-config) will automatically detect the location of the shared library.
28
28
 
29
29
  Or you can set the environment variable `HTSLIBDIR`.
30
30
 
@@ -34,33 +34,39 @@ export HTSLIBDIR="/your/path/to/htslib" # libhts.so
34
34
 
35
35
  ## Usage
36
36
 
37
- HTS::FFI - Low-level API
37
+ ### Low level API
38
+
39
+ HTS::LibHTS
38
40
 
39
41
  ```ruby
40
42
  require 'htslib'
41
43
 
42
- a = HTS::FFI.hts_open("a.bam", "r")
43
- b = HTS::FFI.hts_get_format(a)
44
+ a = HTS::LibHTS.hts_open("a.bam", "r")
45
+ b = HTS::LibHTS.hts_get_format(a)
44
46
  p b[:category]
45
47
  p b[:format]
46
48
  ```
47
49
 
48
- A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) is under development.
50
+ Note: Managed struct is not used in ruby-htslib. You may need to free the memory by yourself.
51
+
52
+ ### High level API
53
+
54
+ A high-level API based on [hts-python](https://github.com/quinlan-lab/hts-python) or [hts-nim](https://github.com/brentp/hts-nim) is under development. We will change and improve the API to make it better.
49
55
 
50
56
  ```ruby
51
57
  require 'htslib'
52
58
 
53
59
  bam = HTS::Bam.new("a.bam")
54
60
 
55
- bam.each do |aln|
56
- p name: aln.qname,
57
- flag: aln.flag,
58
- start: aln.start + 1,
59
- mpos: aln.mate_pos + 1,
60
- mqual: aln.mapping_quality,
61
- seq: aln.sequence,
62
- cigar: aln.cigar.to_s,
63
- qual: aln.base_qualities.map { |i| (i + 33).chr }.join
61
+ bam.each do |r|
62
+ p name: r.qname,
63
+ flag: r.flag,
64
+ start: r.start + 1,
65
+ mpos: r.mate_pos + 1,
66
+ mqual: r.mapping_quality,
67
+ seq: r.sequence,
68
+ cigar: r.cigar.to_s,
69
+ qual: r.base_qualities.map { |i| (i + 33).chr }.join
64
70
  end
65
71
  ```
66
72
 
@@ -80,6 +86,9 @@ bundle exec rake htslib:build
80
86
  bundle exec rake test
81
87
  ```
82
88
 
89
+ [c2ffi](https://github.com/rpav/c2ffi) :
90
+ I am trying to find a way to automatically generate a low-level API using c2ffi.
91
+
83
92
  ## Contributing
84
93
 
85
94
  Ruby-htslib is a library under development, so even small improvements like typofix are welcome! Please feel free to send us your pull requests.
data/lib/hts/bam.rb CHANGED
@@ -3,76 +3,82 @@
3
3
  # Based on hts-python
4
4
  # https://github.com/quinlan-lab/hts-python
5
5
 
6
- require_relative 'bam/header'
7
- require_relative 'bam/cigar'
8
- require_relative 'bam/alignment'
6
+ require_relative "bam/header"
7
+ require_relative "bam/cigar"
8
+ require_relative "bam/flag"
9
+ require_relative "bam/record"
9
10
 
10
11
  module HTS
11
12
  class Bam
12
13
  include Enumerable
13
- attr_reader :file_path, :mode, :header, :htf
14
+ attr_reader :file_path, :mode, :htf, :header
14
15
 
15
- def initialize(file_path, mode = 'r', create_index: nil, header: nil, fasta: nil)
16
- @file_path = File.expand_path(file_path)
17
- File.exist?(@file_path) || raise("No such SAM/BAM file - #{@file_path}")
16
+ def initialize(file_path, mode = "r", create_index: nil)
17
+ file_path = File.expand_path(file_path)
18
18
 
19
- @mode = mode
20
- @htf = FFI.hts_open(@file_path, mode)
19
+ raise("No such SAM/BAM file - #{file_path}") unless File.exist?(file_path)
21
20
 
22
- if mode[0] == 'r'
23
- @idx = FFI.sam_index_load(@htf, @file_path)
24
- if (@idx.null? && create_index.nil?) || create_index
25
- FFI.sam_index_build(file_path, -1)
26
- @idx = FFI.sam_index_load(@htf, @file_path)
27
- warn 'NO querying'
28
- end
29
- @header = Bam::Header.new(FFI.sam_hdr_read(@htf))
30
- @b = FFI.bam_init1
21
+ @file_path = file_path
22
+ @mode = mode
23
+ @htf = LibHTS.hts_open(@file_path, mode)
24
+ @header = Bam::Header.new(LibHTS.sam_hdr_read(htf))
25
+ # FIXME: should be defined here?
26
+ @b = LibHTS.bam_init1
31
27
 
28
+ # read
29
+ if mode[0] == "r"
30
+ # load index
31
+ @idx = LibHTS.sam_index_load(htf, file_path)
32
+ # create index
33
+ if create_index || (@idx.null? && create_index.nil?)
34
+ warn "Create index for #{file_path}"
35
+ LibHTS.sam_index_build(file_path, -1)
36
+ @idx = LibHTS.sam_index_load(@htf, @file_path)
37
+ end
32
38
  else
33
- # FIXME
34
- raise 'not implemented yet.'
35
-
39
+ # FIXME: implement
40
+ raise "not implemented yet."
36
41
  end
37
42
  end
38
43
 
39
- def self.header_from_fasta; end
40
-
41
44
  def write(alns)
42
45
  alns.each do
43
- FFI.sam_write1(@htf, @header, alns.b) > 0 || raise
46
+ LibHTS.sam_write1(htf, header, alns.b) > 0 || raise
44
47
  end
45
48
  end
46
49
 
47
50
  # Close the current file.
48
51
  def close
49
- FFI.hts_close(@htf)
52
+ LibHTS.hts_close(htf)
50
53
  end
51
54
 
52
55
  # Flush the current file.
53
56
  def flush
54
57
  raise
55
- # FFI.bgzf_flush(@htf.fp.bgzf)
58
+ # LibHTS.bgzf_flush(@htf.fp.bgzf)
56
59
  end
57
60
 
58
61
  def each(&block)
59
62
  # Each does not always start at the beginning of the file.
60
63
  # This is the common behavior of IO objects in Ruby.
61
64
  # This may change in the future.
62
- block.call(Alignment.new(@b, @header.h)) while FFI.sam_read1(@htf, @header.h, @b) > 0
65
+ while LibHTS.sam_read1(htf, header.h, @b) > 0
66
+ record = Record.new(@b, header.h)
67
+ block.call(record)
68
+ end
63
69
  end
64
70
 
65
71
  # query [WIP]
66
72
  def query(region)
67
- qiter = FFI.sam_itr_querys(@idx, @header.h, region)
73
+ qiter = LibHTS.sam_itr_querys(@idx, header.h, region)
68
74
  begin
69
- slen = FFI.sam_itr_next(@htf, qiter, @b)
75
+ slen = LibHTS.sam_itr_next(htf, qiter, @b)
70
76
  while slen > 0
71
- yield Alignment.new(@b, @header.h)
72
- slen = FFI.sam_itr_next(@htf, qiter, @b)
77
+ yield Record.new(@b, header.h)
78
+ slen = LibHTS.sam_itr_next(htf, qiter, @b)
73
79
  end
74
80
  ensure
75
- FFI.hts_itr_destroy(qiter)
81
+ LibHTS.hts_itr_destroy(qiter)
76
82
  end
77
83
  end
78
84
  end
data/lib/hts/bam/cigar.rb CHANGED
@@ -7,7 +7,7 @@ module HTS
7
7
  class Bam
8
8
  class Cigar
9
9
  include Enumerable
10
- OPS = 'MIDNSHP=XB'
10
+ OPS = "MIDNSHP=XB"
11
11
 
12
12
  def initialize(cigar, n_cigar)
13
13
  @c = cigar
@@ -21,8 +21,8 @@ module HTS
21
21
  def each
22
22
  @n_cigar.times do |i|
23
23
  c = @c[i].read_uint32
24
- yield [FFI.bam_cigar_oplen(c),
25
- FFI.bam_cigar_opchr(c)]
24
+ yield [LibHTS.bam_cigar_oplen(c),
25
+ LibHTS.bam_cigar_opchr(c)]
26
26
  end
27
27
  end
28
28
  end
@@ -0,0 +1,91 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Based on hts-nim
4
+ # https://github.com/brentp/hts-nim/blob/master/src/hts/bam/flag.nim
5
+
6
+ module HTS
7
+ class Bam
8
+ class Flag
9
+ def initialize(flag_value)
10
+ @value = flag_value # tytpe check?
11
+ end
12
+
13
+ attr_accessor :value
14
+
15
+ # BAM_FPAIRED = 1
16
+ # BAM_FPROPER_PAIR = 2
17
+ # BAM_FUNMAP = 4
18
+ # BAM_FMUNMAP = 8
19
+ # BAM_FREVERSE = 16
20
+ # BAM_FMREVERSE = 32
21
+ # BAM_FREAD1 = 64
22
+ # BAM_FREAD2 = 128
23
+ # BAM_FSECONDARY = 256
24
+ # BAM_FQCFAIL = 512
25
+ # BAM_FDUP = 1024
26
+ # BAM_FSUPPLEMENTARY = 2048
27
+
28
+ # TODO: Enabling bitwise operations
29
+ # hts-nim
30
+ # proc `and`*(f: Flag, o: uint16): uint16 {. borrow, inline .}
31
+ # proc `and`*(f: Flag, o: Flag): uint16 {. borrow, inline .}
32
+ # proc `or`*(f: Flag, o: uint16): uint16 {. borrow .}
33
+ # proc `or`*(o: uint16, f: Flag): uint16 {. borrow .}
34
+ # proc `==`*(f: Flag, o: Flag): bool {. borrow, inline .}
35
+ # proc `==`*(f: Flag, o: uint16): bool {. borrow, inline .}
36
+ # proc `==`*(o: uint16, f: Flag): bool {. borrow, inline .}
37
+
38
+ def paired?
39
+ has_flag? LibHTS::BAM_FPAIRED
40
+ end
41
+
42
+ def proper_pair?
43
+ has_flag? LibHTS::BAM_FPROPER_PAIR
44
+ end
45
+
46
+ def unmapped?
47
+ has_flag? LibHTS::BAM_FUNMAP
48
+ end
49
+
50
+ def mate_unmapped?
51
+ has_flag? LibHTS::BAM_FMUNMAP
52
+ end
53
+
54
+ def reverse?
55
+ has_flag? LibHTS::BAM_FREVERSE
56
+ end
57
+
58
+ def mate_reverse?
59
+ has_flag? LibHTS::BAM_FMREVERSE
60
+ end
61
+
62
+ def read1?
63
+ has_flag? LibHTS::BAM_FREAD1
64
+ end
65
+
66
+ def read2?
67
+ has_flag? LibHTS::BAM_FREAD2
68
+ end
69
+
70
+ def secondary?
71
+ has_flag? LibHTS::BAM_FSECONDARY
72
+ end
73
+
74
+ def qcfail?
75
+ has_flag? LibHTS::BAM_FQCFAIL
76
+ end
77
+
78
+ def dup?
79
+ has_flag? LibHTS::BAM_FDUP
80
+ end
81
+
82
+ def supplementary?
83
+ has_flag? LibHTS::BAM_FSUPPLEMENTARY
84
+ end
85
+
86
+ def has_flag?(o)
87
+ (@value & o) != 0
88
+ end
89
+ end
90
+ end
91
+ end
@@ -15,12 +15,12 @@ module HTS
15
15
  # FIXME: better name?
16
16
  def seqs
17
17
  Array.new(@h[:n_targets]) do |i|
18
- FFI.sam_hdr_tid2name(@h, i)
18
+ LibHTS.sam_hdr_tid2name(@h, i)
19
19
  end
20
20
  end
21
21
 
22
22
  def text
23
- FFI.sam_hdr_str(@h)
23
+ LibHTS.sam_hdr_str(@h)
24
24
  end
25
25
  end
26
26
  end
@@ -5,7 +5,9 @@
5
5
 
6
6
  module HTS
7
7
  class Bam
8
- class Alignment
8
+ class Record
9
+ SEQ_NT16_STR = "=ACMGRSVTWYHKDBN"
10
+
9
11
  def initialize(bam1_t, bam_hdr_t)
10
12
  @b = bam1_t
11
13
  @h = bam_hdr_t
@@ -19,9 +21,9 @@ module HTS
19
21
 
20
22
  def tags; end
21
23
 
22
- # Read (query) name.
24
+ # returns the query name.
23
25
  def qname
24
- FFI.bam_get_qname(@b).read_string
26
+ LibHTS.bam_get_qname(@b).read_string
25
27
  end
26
28
 
27
29
  # Set (query) name.
@@ -29,12 +31,9 @@ module HTS
29
31
  # raise 'Not Implemented'
30
32
  # end
31
33
 
32
- # returns the chromosome of the mate or '' if not mapped.
33
- def mate_chrom
34
- tid = @b[:core][:mtid]
35
- return '' if tid == -1
36
-
37
- FFI.sam_hdr_tid2name(@h, tid)
34
+ # returns the tid of the record or -1 if not mapped.
35
+ def tid
36
+ @b[:core][:tid]
38
37
  end
39
38
 
40
39
  # returns the tid of the mate or -1 if not mapped.
@@ -42,16 +41,6 @@ module HTS
42
41
  @b[:core][:mtid]
43
42
  end
44
43
 
45
- # returns the tid of the alignment or -1 if not mapped.
46
- def tid
47
- @b[:core][:tid]
48
- end
49
-
50
- # mate position
51
- def mate_pos
52
- @b[:core][:mpos]
53
- end
54
-
55
44
  # returns 0-based start position.
56
45
  def start
57
46
  @b[:core][:pos]
@@ -59,19 +48,33 @@ module HTS
59
48
 
60
49
  # returns end position of the read.
61
50
  def stop
62
- FFI.bam_endpos @b
51
+ LibHTS.bam_endpos @b
52
+ end
53
+
54
+ # returns 0-based mate position
55
+ def mate_start
56
+ @b[:core][:mpos]
63
57
  end
58
+ alias mate_pos mate_start
64
59
 
65
60
  # returns the chromosome or '' if not mapped.
66
61
  def chrom
67
62
  tid = @b[:core][:tid]
68
- return '' if tid == -1
63
+ return "" if tid == -1
64
+
65
+ LibHTS.sam_hdr_tid2name(@h, tid)
66
+ end
67
+
68
+ # returns the chromosome of the mate or '' if not mapped.
69
+ def mate_chrom
70
+ tid = @b[:core][:mtid]
71
+ return "" if tid == -1
69
72
 
70
- FFI.sam_hdr_tid2name(@h, tid)
73
+ LibHTS.sam_hdr_tid2name(@h, tid)
71
74
  end
72
75
 
73
76
  def strand
74
- FFI.bam_is_rev(@b) ? '-' : '+'
77
+ LibHTS.bam_is_rev(@b) ? "-" : "+"
75
78
  end
76
79
 
77
80
  # def start=(v)
@@ -90,61 +93,64 @@ module HTS
90
93
 
91
94
  # returns a `Cigar` object.
92
95
  def cigar
93
- Cigar.new(FFI.bam_get_cigar(@b), @b[:core][:n_cigar])
96
+ Cigar.new(LibHTS.bam_get_cigar(@b), @b[:core][:n_cigar])
94
97
  end
95
98
 
96
99
  def qlen
97
- FFI.bam_cigar2qlen(
100
+ LibHTS.bam_cigar2qlen(
98
101
  @b[:core][:n_cigar],
99
- FFI.bam_get_cigar(@b)
102
+ LibHTS.bam_get_cigar(@b)
100
103
  )
101
104
  end
102
105
 
103
106
  def rlen
104
- FFI.bam_cigar2rlen(
107
+ LibHTS.bam_cigar2rlen(
105
108
  @b[:core][:n_cigar],
106
- FFI.bam_get_cigar(@b)
109
+ LibHTS.bam_get_cigar(@b)
107
110
  )
108
111
  end
109
112
 
110
113
  # return the read sequence
111
114
  def sequence
112
- seq_nt16_str = '=ACMGRSVTWYHKDBN'
113
- r = FFI.bam_get_seq(@b)
114
- Array.new(@b[:core][:l_qseq]) do |i|
115
- seq_nt16_str[FFI.bam_seqi(r, i)]
116
- end.join
115
+ r = LibHTS.bam_get_seq(@b)
116
+ seq = String.new
117
+ (@b[:core][:l_qseq]).times do |i|
118
+ seq << SEQ_NT16_STR[LibHTS.bam_seqi(r, i)]
119
+ end
120
+ seq
117
121
  end
118
122
 
123
+ # return only the base of the requested index "i" of the query sequence.
119
124
  def base_at(n)
120
125
  n += @b[:core][:l_qseq] if n < 0
121
- seq_nt16_str = '=ACMGRSVTWYHKDBN'
122
- return '.' if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
126
+ return "." if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_at(-1000)
123
127
 
124
- r = FFI.bam_get_seq(@b)
125
- seq_nt16_str[FFI.bam_seqi(r, n)]
128
+ r = LibHTS.bam_get_seq(@b)
129
+ SEQ_NT16_STR[LibHTS.bam_seqi(r, n)]
126
130
  end
127
131
 
132
+ # return the base qualities
128
133
  def base_qualities
129
- q_ptr = FFI.bam_get_qual(@b)
134
+ q_ptr = LibHTS.bam_get_qual(@b)
130
135
  q_ptr.read_array_of_uint8(@b[:core][:l_qseq])
131
136
  end
132
137
 
138
+ # return only the base quality of the requested index "i" of the query sequence.
133
139
  def base_quality_at(n)
134
- n += @b[:core][:l_qseq] if n < 0 # eg. base_quality_at(-1000)
135
- return 0 if (n >= @b[:core][:l_qseq]) || (n < 0)
140
+ n += @b[:core][:l_qseq] if n < 0
141
+ return 0 if (n >= @b[:core][:l_qseq]) || (n < 0) # eg. base_quality_at(-1000)
136
142
 
137
- q_ptr = FFI.bam_get_qual(@b)
143
+ q_ptr = LibHTS.bam_get_qual(@b)
138
144
  q_ptr.get_uint8(n)
139
145
  end
140
146
 
141
147
  def flag_str
142
- FFI.bam_flag2str(flag)
148
+ LibHTS.bam_flag2str(@b[:core][:flag])
143
149
  end
144
150
 
145
151
  # returns a `Flag` object.
146
152
  def flag
147
- @b[:core][:flag]
153
+ Flag.new(@b[:core][:flag])
148
154
  end
149
155
 
150
156
  # TODO: