bix 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/bix.rb CHANGED
@@ -1 +1,8 @@
1
+ # Define it here, will get used elsewhere
2
+ module Bix
3
+ end
4
+
1
5
  require "#{File.dirname(__FILE__)}/bix/fastq"
6
+ require "#{File.dirname(__FILE__)}/bix/fasta"
7
+ require "#{File.dirname(__FILE__)}/bix/blast"
8
+
@@ -0,0 +1,87 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'swak'
4
+
5
+ module Bix
6
+ module Blast
7
+ class Hit
8
+ Attribs = [:query, :subject, :perc_id, :align_len, :mismatches,
9
+ :gap_opens, :query_start, :query_end, :subject_start,
10
+ :subject_end, :e_value, :bit_score]
11
+
12
+ attr_accessor *Attribs
13
+
14
+ def initialize(line=nil)
15
+ if line != nil && line[0] != '#'
16
+ f = line.chomp.split("\t", -1)
17
+
18
+ raise "Illegal number of fields for blast hit" unless f.size == 12
19
+
20
+ @query = f[0]
21
+ @subject = f[1]
22
+ @perc_id = f[2].to_f_strict
23
+ @align_len = f[3].to_i_strict
24
+ @mismatches = f[4].to_i_strict
25
+ @gap_opens = f[5].to_i_strict
26
+ @query_start = f[6].to_i_strict
27
+ @query_end = f[7].to_i_strict
28
+ @subject_start = f[8].to_i_strict
29
+ @subject_end = f[9].to_i_strict
30
+ @e_value = f[10].to_f_strict
31
+ @bit_score = f[11].to_f_strict
32
+ end
33
+ end
34
+
35
+ def to_arr
36
+ return Attribs.map{|a| self.send(a)}
37
+ end
38
+ end
39
+
40
+ class QueryHits
41
+ attr_accessor :query, :hits, :best_hit, :pref_min_aln_len
42
+
43
+ # Suppose we want the best_hit to point to the best e-value of all hits over a certain aln len
44
+ # If there are no hits over that length, then we want the best e-value
45
+ def initialize(hits, pref_min_aln_len=0)
46
+ @pref_min_aln_len = pref_min_aln_len
47
+ @hits = hits
48
+
49
+ raise "QueryHits error: must have > 0 hits" unless hits.size > 0
50
+
51
+ @query = hits.first.query
52
+ @best_hit = hits.first
53
+
54
+ for h in hits
55
+ raise "QueryHits error: not all hits have same query!" if h.query != query
56
+
57
+ if @best_hit.align_len < pref_min_aln_len && h.align_len >= pref_min_aln_len
58
+ @best_hit = h
59
+ elsif h.e_value < @best_hit.e_value
60
+ @best_hit = h
61
+ end
62
+ end
63
+ end
64
+ end
65
+
66
+ def self.get_all_query_hits(io, pref_min_aln_len=0)
67
+ hits_by_query = {}
68
+
69
+ # Read all blast hits into memory, group them by query name
70
+ for line in io
71
+ next if line[0] == '#'
72
+ h = Hit.new(line)
73
+ hits_by_query[h.query] ||= []
74
+ hits_by_query[h.query] << h
75
+ end
76
+
77
+ # Build vector of QueryHits objects
78
+ query_hits = []
79
+
80
+ for query, hits in hits_by_query
81
+ query_hits << QueryHits.new(hits, pref_min_aln_len)
82
+ end
83
+
84
+ return query_hits
85
+ end
86
+ end
87
+ end
@@ -0,0 +1,29 @@
1
+ module Bix
2
+ class Fasta
3
+ attr_accessor :header, :seq
4
+ end
5
+
6
+ def self.read_fastas(io)
7
+ fastas = []
8
+
9
+ fa = nil
10
+
11
+ for line in io
12
+ line.chomp!
13
+ if line[0] == '>'
14
+ fastas << fa if fa != nil
15
+
16
+ fa = Bix::Fasta.new
17
+ fa.header = line
18
+ fa.seq = ""
19
+ else
20
+ fa.seq << line
21
+ end
22
+
23
+ end
24
+
25
+ fastas << fa if fa != nil
26
+
27
+ return fastas
28
+ end
29
+ end
@@ -1,28 +1,29 @@
1
- class Fastq
2
- attr_accessor :header, :seq, :qual, :invalid
1
+ module Bix
2
+ class Fastq
3
+ attr_accessor :header, :seq, :qual, :invalid
3
4
 
4
- def initialize(header="", seq="", qual="")
5
- @header = header
6
- @seq = seq
7
- @qual = qual
8
- end
5
+ def initialize(header="", seq="", qual="")
6
+ @header = header
7
+ @seq = seq
8
+ @qual = qual
9
+ end
9
10
 
10
- def from_io(io)
11
- @header = io.gets
12
- @seq = io.gets
13
- io.gets #plus line
14
- @qual = io.gets
11
+ def from_io(io)
12
+ @header = io.gets
13
+ @seq = io.gets
14
+ io.gets #plus line
15
+ @qual = io.gets
15
16
 
16
- return false if @header == nil || @seq == nil || @qual == nil
17
- return true
18
- end
17
+ return false if @header == nil || @seq == nil || @qual == nil
18
+ return true
19
+ end
19
20
 
20
- def qual_val(i)
21
- return @qual[i].ord
22
- end
21
+ def qual_val(i)
22
+ return @qual[i].ord
23
+ end
23
24
 
24
- def to_lines
25
- return "#{@header}\n#{@seq}\n+\n#{@qual}"
25
+ def to_lines
26
+ return "#{@header}\n#{@seq}\n+\n#{@qual}"
27
+ end
26
28
  end
27
-
28
29
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: bix
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.1
5
+ version: 0.0.2
6
6
  platform: ruby
7
7
  authors:
8
8
  - Jesse Rodriguez
@@ -10,9 +10,41 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-08-09 00:00:00 Z
14
- dependencies: []
15
-
13
+ date: 2011-09-01 00:00:00 Z
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: swak
17
+ prerelease: false
18
+ requirement: &id001 !ruby/object:Gem::Requirement
19
+ none: false
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ type: :runtime
25
+ version_requirements: *id001
26
+ - !ruby/object:Gem::Dependency
27
+ name: vcf
28
+ prerelease: false
29
+ requirement: &id002 !ruby/object:Gem::Requirement
30
+ none: false
31
+ requirements:
32
+ - - ">="
33
+ - !ruby/object:Gem::Version
34
+ version: "0"
35
+ type: :runtime
36
+ version_requirements: *id002
37
+ - !ruby/object:Gem::Dependency
38
+ name: sam
39
+ prerelease: false
40
+ requirement: &id003 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ type: :runtime
47
+ version_requirements: *id003
16
48
  description: Lightweight bioinformatics tools for Ruby, with a focus on next-gen sequencing
17
49
  email: jesserod@cs.stanford.edu
18
50
  executables: []
@@ -23,8 +55,10 @@ extra_rdoc_files: []
23
55
 
24
56
  files:
25
57
  - lib/bix.rb
58
+ - lib/bix/fasta.rb
26
59
  - lib/bix/fastq.rb
27
- homepage: http://rubygems.org/gems/bix
60
+ - lib/bix/blast.rb
61
+ homepage: https://github.com/jesserod/bixruby
28
62
  licenses: []
29
63
 
30
64
  post_install_message:
@@ -47,7 +81,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
81
  requirements: []
48
82
 
49
83
  rubyforge_project:
50
- rubygems_version: 1.8.5
84
+ rubygems_version: 1.8.10
51
85
  signing_key:
52
86
  specification_version: 3
53
87
  summary: Lightweight bioinformatics tools for Ruby