bix 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/bix.rb +7 -0
- data/lib/bix/blast.rb +87 -0
- data/lib/bix/fasta.rb +29 -0
- data/lib/bix/fastq.rb +22 -21
- metadata +40 -6
data/lib/bix.rb
CHANGED
data/lib/bix/blast.rb
ADDED
@@ -0,0 +1,87 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'swak'
|
4
|
+
|
5
|
+
module Bix
|
6
|
+
module Blast
|
7
|
+
class Hit
|
8
|
+
Attribs = [:query, :subject, :perc_id, :align_len, :mismatches,
|
9
|
+
:gap_opens, :query_start, :query_end, :subject_start,
|
10
|
+
:subject_end, :e_value, :bit_score]
|
11
|
+
|
12
|
+
attr_accessor *Attribs
|
13
|
+
|
14
|
+
def initialize(line=nil)
|
15
|
+
if line != nil && line[0] != '#'
|
16
|
+
f = line.chomp.split("\t", -1)
|
17
|
+
|
18
|
+
raise "Illegal number of fields for blast hit" unless f.size == 12
|
19
|
+
|
20
|
+
@query = f[0]
|
21
|
+
@subject = f[1]
|
22
|
+
@perc_id = f[2].to_f_strict
|
23
|
+
@align_len = f[3].to_i_strict
|
24
|
+
@mismatches = f[4].to_i_strict
|
25
|
+
@gap_opens = f[5].to_i_strict
|
26
|
+
@query_start = f[6].to_i_strict
|
27
|
+
@query_end = f[7].to_i_strict
|
28
|
+
@subject_start = f[8].to_i_strict
|
29
|
+
@subject_end = f[9].to_i_strict
|
30
|
+
@e_value = f[10].to_f_strict
|
31
|
+
@bit_score = f[11].to_f_strict
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def to_arr
|
36
|
+
return Attribs.map{|a| self.send(a)}
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
class QueryHits
|
41
|
+
attr_accessor :query, :hits, :best_hit, :pref_min_aln_len
|
42
|
+
|
43
|
+
# Suppose we want the best_hit to point to the best e-value of all hits over a certain aln len
|
44
|
+
# If there are no hits over that length, then we want the best e-value
|
45
|
+
def initialize(hits, pref_min_aln_len=0)
|
46
|
+
@pref_min_aln_len = pref_min_aln_len
|
47
|
+
@hits = hits
|
48
|
+
|
49
|
+
raise "QueryHits error: must have > 0 hits" unless hits.size > 0
|
50
|
+
|
51
|
+
@query = hits.first.query
|
52
|
+
@best_hit = hits.first
|
53
|
+
|
54
|
+
for h in hits
|
55
|
+
raise "QueryHits error: not all hits have same query!" if h.query != query
|
56
|
+
|
57
|
+
if @best_hit.align_len < pref_min_aln_len && h.align_len >= pref_min_aln_len
|
58
|
+
@best_hit = h
|
59
|
+
elsif h.e_value < @best_hit.e_value
|
60
|
+
@best_hit = h
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
def self.get_all_query_hits(io, pref_min_aln_len=0)
|
67
|
+
hits_by_query = {}
|
68
|
+
|
69
|
+
# Read all blast hits into memory, group them by query name
|
70
|
+
for line in io
|
71
|
+
next if line[0] == '#'
|
72
|
+
h = Hit.new(line)
|
73
|
+
hits_by_query[h.query] ||= []
|
74
|
+
hits_by_query[h.query] << h
|
75
|
+
end
|
76
|
+
|
77
|
+
# Build vector of QueryHits objects
|
78
|
+
query_hits = []
|
79
|
+
|
80
|
+
for query, hits in hits_by_query
|
81
|
+
query_hits << QueryHits.new(hits, pref_min_aln_len)
|
82
|
+
end
|
83
|
+
|
84
|
+
return query_hits
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
data/lib/bix/fasta.rb
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
module Bix
|
2
|
+
class Fasta
|
3
|
+
attr_accessor :header, :seq
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.read_fastas(io)
|
7
|
+
fastas = []
|
8
|
+
|
9
|
+
fa = nil
|
10
|
+
|
11
|
+
for line in io
|
12
|
+
line.chomp!
|
13
|
+
if line[0] == '>'
|
14
|
+
fastas << fa if fa != nil
|
15
|
+
|
16
|
+
fa = Bix::Fasta.new
|
17
|
+
fa.header = line
|
18
|
+
fa.seq = ""
|
19
|
+
else
|
20
|
+
fa.seq << line
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
fastas << fa if fa != nil
|
26
|
+
|
27
|
+
return fastas
|
28
|
+
end
|
29
|
+
end
|
data/lib/bix/fastq.rb
CHANGED
@@ -1,28 +1,29 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
module Bix
|
2
|
+
class Fastq
|
3
|
+
attr_accessor :header, :seq, :qual, :invalid
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def initialize(header="", seq="", qual="")
|
6
|
+
@header = header
|
7
|
+
@seq = seq
|
8
|
+
@qual = qual
|
9
|
+
end
|
9
10
|
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
11
|
+
def from_io(io)
|
12
|
+
@header = io.gets
|
13
|
+
@seq = io.gets
|
14
|
+
io.gets #plus line
|
15
|
+
@qual = io.gets
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
17
|
+
return false if @header == nil || @seq == nil || @qual == nil
|
18
|
+
return true
|
19
|
+
end
|
19
20
|
|
20
|
-
|
21
|
-
|
22
|
-
|
21
|
+
def qual_val(i)
|
22
|
+
return @qual[i].ord
|
23
|
+
end
|
23
24
|
|
24
|
-
|
25
|
-
|
25
|
+
def to_lines
|
26
|
+
return "#{@header}\n#{@seq}\n+\n#{@qual}"
|
27
|
+
end
|
26
28
|
end
|
27
|
-
|
28
29
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: bix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.2
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Jesse Rodriguez
|
@@ -10,9 +10,41 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-
|
14
|
-
dependencies:
|
15
|
-
|
13
|
+
date: 2011-09-01 00:00:00 Z
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: swak
|
17
|
+
prerelease: false
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
19
|
+
none: false
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
type: :runtime
|
25
|
+
version_requirements: *id001
|
26
|
+
- !ruby/object:Gem::Dependency
|
27
|
+
name: vcf
|
28
|
+
prerelease: false
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
30
|
+
none: false
|
31
|
+
requirements:
|
32
|
+
- - ">="
|
33
|
+
- !ruby/object:Gem::Version
|
34
|
+
version: "0"
|
35
|
+
type: :runtime
|
36
|
+
version_requirements: *id002
|
37
|
+
- !ruby/object:Gem::Dependency
|
38
|
+
name: sam
|
39
|
+
prerelease: false
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
type: :runtime
|
47
|
+
version_requirements: *id003
|
16
48
|
description: Lightweight bioinformatics tools for Ruby, with a focus on next-gen sequencing
|
17
49
|
email: jesserod@cs.stanford.edu
|
18
50
|
executables: []
|
@@ -23,8 +55,10 @@ extra_rdoc_files: []
|
|
23
55
|
|
24
56
|
files:
|
25
57
|
- lib/bix.rb
|
58
|
+
- lib/bix/fasta.rb
|
26
59
|
- lib/bix/fastq.rb
|
27
|
-
|
60
|
+
- lib/bix/blast.rb
|
61
|
+
homepage: https://github.com/jesserod/bixruby
|
28
62
|
licenses: []
|
29
63
|
|
30
64
|
post_install_message:
|
@@ -47,7 +81,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
47
81
|
requirements: []
|
48
82
|
|
49
83
|
rubyforge_project:
|
50
|
-
rubygems_version: 1.8.
|
84
|
+
rubygems_version: 1.8.10
|
51
85
|
signing_key:
|
52
86
|
specification_version: 3
|
53
87
|
summary: Lightweight bioinformatics tools for Ruby
|