parse_fasta 1.7.1 → 1.7.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/README.md +11 -1
- data/lib/parse_fasta/quality.rb +11 -0
- data/lib/parse_fasta/sequence.rb +14 -4
- data/lib/parse_fasta/version.rb +1 -1
- data/spec/lib/quality_spec.rb +10 -2
- data/spec/lib/sequence_spec.rb +9 -1
- data/test_files/test.fa +2 -2
- data/test_files/test.fa.gz +0 -0
- data/test_files/test.fq +3 -3
- data/test_files/test.fq.gz +0 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NzY3MWVlYjVmYzM4NzhjZjIzNDUyYjFlNWZhMjMxMGE4NWQ3YTI4Yw==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
OTVjMThlYzg0NzUwZTdmMzIyZmJlYjc5NjBiNDUzMmE2NjJjMGVkOA==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZTVkOTQ4MGMzMTQ4ZDg5MzczMDllMzgwNDY3Y2NmNzU4NDc5MzMwOGIwYzA3
|
10
|
+
Y2FjMzQ1YzA4NDhiMjY0OWMyYjMyMWFhOWM5MTI5OGFiMTJiMDFhYmJlNTI3
|
11
|
+
NjBkZDU5MzMzOTJlOGY0NzdiZWRiYWI5ZWE2YTBiZjJkYTk1ZTU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
MWJkZjY5ZWY5MmYyNTJjMjIyODE0ZmQ4YWU4OTZlMTY5MzIyNGNlZGJkNjU2
|
14
|
+
YjBjNmI5YTIyYTdmMGJhZmVlNTExZWJkN2U4OWRlMDE0NzM3MGI4ZGE4M2Rj
|
15
|
+
MzNjODU3YzYzMDFmZDg1MjU1YzQ2MDgxODIwNmQ3ZDJkZDU3MWQ=
|
data/README.md
CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
|
|
27
27
|
## Documentation ##
|
28
28
|
|
29
29
|
Checkout
|
30
|
-
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.
|
30
|
+
[parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.2/frames)
|
31
31
|
for the full api documentation.
|
32
32
|
|
33
33
|
## Usage ##
|
@@ -70,6 +70,16 @@ Read fasta file into a hash.
|
|
70
70
|
|
71
71
|
Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
|
72
72
|
|
73
|
+
#### 1.7.2 ####
|
74
|
+
|
75
|
+
Strip spaces (not all whitespace) from `Sequence` and `Quality` strings.
|
76
|
+
|
77
|
+
Some alignment fastas have spaces for easier reading. Strip these
|
78
|
+
out. For consistency, also strips spaces from `Quality` strings. If
|
79
|
+
there are spaces that don't match in the quality and sequence in a
|
80
|
+
fastQ file, then things will get messed up in the FastQ file. FastQ
|
81
|
+
shouldn't have spaces though.
|
82
|
+
|
73
83
|
### 1.6 ###
|
74
84
|
|
75
85
|
Added `SeqFile` class, which accepts either fastA or fastQ files. It
|
data/lib/parse_fasta/quality.rb
CHANGED
@@ -19,6 +19,17 @@
|
|
19
19
|
# Provide some methods for dealing with common tasks regarding
|
20
20
|
# quality strings.
|
21
21
|
class Quality < String
|
22
|
+
|
23
|
+
# Strips whitespace from the str argument before calling super
|
24
|
+
#
|
25
|
+
# @return [Quality] A Quality string
|
26
|
+
#
|
27
|
+
# @example Removes whitespace
|
28
|
+
# Quality.new "I I 2 ! " #=> "II2!"
|
29
|
+
def initialize(str)
|
30
|
+
super(str.gsub(/ +/, ""))
|
31
|
+
end
|
32
|
+
|
22
33
|
# Returns the mean quality for the record. This will be a good deal
|
23
34
|
# faster than getting the average with `qual_scores` and reduce.
|
24
35
|
#
|
data/lib/parse_fasta/sequence.rb
CHANGED
@@ -20,6 +20,16 @@
|
|
20
20
|
# nucleotide sequences.
|
21
21
|
class Sequence < String
|
22
22
|
|
23
|
+
# Strips whitespace from the str argument before calling super
|
24
|
+
#
|
25
|
+
# @return [Sequence] A Sequence string
|
26
|
+
#
|
27
|
+
# @example Removes whitespace
|
28
|
+
# Sequence.new "AA CC TT" #=> "AACCTT"
|
29
|
+
def initialize(str)
|
30
|
+
super(str.gsub(/ +/, ""))
|
31
|
+
end
|
32
|
+
|
23
33
|
# Calculates GC content
|
24
34
|
#
|
25
35
|
# Calculates GC content by dividing count of G + C divided by count
|
@@ -45,7 +55,7 @@ class Sequence < String
|
|
45
55
|
t = s.count('t')
|
46
56
|
a = s.count('a')
|
47
57
|
u = s.count('u')
|
48
|
-
|
58
|
+
|
49
59
|
return 0 if c + g + t + a + u == 0
|
50
60
|
return (c + g) / (c + g + t + a + u).to_f
|
51
61
|
end
|
@@ -87,9 +97,9 @@ class Sequence < String
|
|
87
97
|
warn('ERROR: A sequence contains both T and U')
|
88
98
|
counts[:t], counts[:u] = t, u
|
89
99
|
end
|
90
|
-
|
100
|
+
|
91
101
|
counts[:n] = s.count('n') if count_ambiguous_bases
|
92
|
-
|
102
|
+
|
93
103
|
counts
|
94
104
|
end
|
95
105
|
|
@@ -116,7 +126,7 @@ class Sequence < String
|
|
116
126
|
def base_frequencies(count_ambiguous_bases=nil)
|
117
127
|
base_counts = self.base_counts(count_ambiguous_bases)
|
118
128
|
total_bases = base_counts.values.reduce(:+).to_f
|
119
|
-
base_freqs =
|
129
|
+
base_freqs =
|
120
130
|
base_counts.map { |base, count| [base, count/total_bases] }.flatten
|
121
131
|
Hash[*base_freqs]
|
122
132
|
end
|
data/lib/parse_fasta/version.rb
CHANGED
data/spec/lib/quality_spec.rb
CHANGED
@@ -21,10 +21,18 @@ require 'bio'
|
|
21
21
|
|
22
22
|
describe Quality do
|
23
23
|
let(:qual_string) { qual_string = Quality.new('ab%63:K') }
|
24
|
-
let(:bioruby_qual_scores) do
|
24
|
+
let(:bioruby_qual_scores) do
|
25
25
|
Bio::Fastq.new("@seq1\nACTGACT\n+\n#{qual_string}").quality_scores
|
26
26
|
end
|
27
27
|
|
28
|
+
describe "::new" do
|
29
|
+
it "removes any spaces in the quality string" do
|
30
|
+
q = " ab # :m, ! "
|
31
|
+
q_no_spaces = "ab#:m,!"
|
32
|
+
expect(Quality.new(q)).to eq q_no_spaces
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
28
36
|
describe "#qual_scores" do
|
29
37
|
context "with illumina style quality scores" do
|
30
38
|
it "returns an array of quality scores" do
|
@@ -39,5 +47,5 @@ describe Quality do
|
|
39
47
|
mean_quality = qual_string.qual_scores.reduce(:+) / len
|
40
48
|
expect(qual_string.mean_qual).to eq mean_quality
|
41
49
|
end
|
42
|
-
end
|
50
|
+
end
|
43
51
|
end
|
data/spec/lib/sequence_spec.rb
CHANGED
@@ -25,6 +25,14 @@ describe Sequence do
|
|
25
25
|
expect(Sequence.new('ACTG')).to be_a String
|
26
26
|
end
|
27
27
|
|
28
|
+
describe "::new" do
|
29
|
+
it "removes any spaces in the sequence" do
|
30
|
+
s = "ACT ACT ACT GCT "
|
31
|
+
s_no_spaces = "ACTACTACTGCT"
|
32
|
+
expect(Sequence.new(s)).to eq s_no_spaces
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
28
36
|
describe "#gc" do
|
29
37
|
it "gives the same answer as BioRuby" do
|
30
38
|
s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
|
@@ -75,7 +83,7 @@ describe Sequence do
|
|
75
83
|
expect(s.base_counts).to eq({ a: 2, c: 2, u: 2, g: 2 })
|
76
84
|
end
|
77
85
|
end
|
78
|
-
|
86
|
+
|
79
87
|
context "for an RNA sequence with truthy argument" do
|
80
88
|
it "returns a map of A, C, U, G and N counts" do
|
81
89
|
s = Sequence.new('ACUGNacugn')
|
data/test_files/test.fa
CHANGED
data/test_files/test.fa.gz
CHANGED
Binary file
|
data/test_files/test.fq
CHANGED
data/test_files/test.fq.gz
CHANGED
Binary file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-10-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|