parse_fasta 1.6.2 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 704dd834a4d948441636422507ca52a9fd44141b
4
- data.tar.gz: 83d5170a4636337ba9dff578c0de90683f610172
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YjUzNjQwMGYzNDQzNWNiYzczOTFmYmE3OGY2YWI0MmJkNjhkNmE0ZA==
5
+ data.tar.gz: !binary |-
6
+ ODFjMmIxNWE5MDc1ODE0MGUyOWRmYTg5YmFkNWJkMjE1MzUxMjg1Yg==
5
7
  SHA512:
6
- metadata.gz: ec6f5c7bca2f75e2ec82d18efe31f08e72ca5e3a67f7a7c2c61f5cc5392813396246d601b95c34697dd8a3b133f9655608733ff05e36912f33a6858fe2815c37
7
- data.tar.gz: ba8e6522891519acf8a22d019500cd4ec46e01fa39b954f52f3b4c2c68586ae6e431a6eb2375799ab52fed0ada667c731be0b34a54ccce732d0f06f1d8dfa7f6
8
+ metadata.gz: !binary |-
9
+ NmFmNjcwODhhM2E0NmM2NzE1OTA0NmYwOGVmZTM0NjA3Mzg4ZmUyNWUwYjNm
10
+ ZTRjNmE0NWI2MTc1NWM5NTMwM2I5YjEzYzQxMjcwYTJhMjdiMmVkZmJmNWVj
11
+ ZTU1ODY1OGE1ZGRiOGQ3ZGU1NTIyYWI0ZTJlN2I4YTBiMGIzOTg=
12
+ data.tar.gz: !binary |-
13
+ ZTM2YTBhNGU4N2E4ZDJkYTI1OWNiMTI5YjQ5Njc1ZTlmYTI4MzMxYWRmZDNk
14
+ N2U0Zjk1N2M1M2YzZWMxNjk0NDZlNmIzYjFjYTAzNWQ2YjllYWQxYjk2ZDAx
15
+ NDRiYTc3ZjFiNzIxM2VmNjQxODlkMjgxZWQ4MTRlY2M0NjRkOWQ=
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --markup-provider rdiscount
data/README.md CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
27
27
  ## Documentation ##
28
28
 
29
29
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.6.2/frames)
30
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.0/frames)
31
31
  for the full api documentation.
32
32
 
33
33
  ## Usage ##
@@ -60,8 +60,16 @@ What if you don't care if the input is a fastA or a fastQ? No problem!
60
60
  puts [header, seq].join "\t"
61
61
  end
62
62
 
63
+ Read fasta file into a hash.
64
+
65
+ seqs = FastaFile.open(ARGV[0]).to_hash
66
+
63
67
  ## Versions ##
64
68
 
69
+ ### 1.7 ###
70
+
71
+ Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
72
+
65
73
  ### 1.6 ###
66
74
 
67
75
  Added `SeqFile` class, which accepts either fastA or fastQ files. It
@@ -43,6 +43,23 @@ class FastaFile < File
43
43
  super
44
44
  end
45
45
 
46
+ # Returns the records in the fasta file as a hash map with the
47
+ # headers as keys and the Sequences as values.
48
+ #
49
+ # @example Read a fastA into a hash table.
50
+ # seqs = FastaFile.open('reads.fa').to_hash
51
+ #
52
+ # @return [Hash] A hash with headers as keys, sequences as the
53
+ # values (Sequence objects)
54
+ def to_hash
55
+ hash = {}
56
+ self.each_record do |head, seq|
57
+ hash[head] = seq
58
+ end
59
+
60
+ hash
61
+ end
62
+
46
63
  # Analagous to IO#each_line, #each_record is used to go through a
47
64
  # fasta file record by record. It will accept gzipped files as well.
48
65
  #
@@ -22,6 +22,25 @@ require 'zlib'
22
22
  # format files. Gzipped files are no problem.
23
23
  class FastqFile < File
24
24
 
25
+ # Returns the records in the fastq file as a hash map with the
26
+ # headers as keys pointing to a hash map like so
27
+ # { "seq1" => { head: "seq1", seq: "ACTG", desc: "", qual: "II3*"} }
28
+ #
29
+ # @example Read a fastQ into a hash table.
30
+ # seqs = FastqFile.open('reads.fq.gz').to_hash
31
+ #
32
+ # @return [Hash] A hash with headers as keys, and a hash map as the
33
+ # value with keys :head, :seq, :desc, :qual, for header, sequence,
34
+ # description, and quality.
35
+ def to_hash
36
+ hash = {}
37
+ self.each_record do |head, seq, desc, qual|
38
+ hash[head] = { head: head, seq: seq, desc: desc, qual: qual }
39
+ end
40
+
41
+ hash
42
+ end
43
+
25
44
  # Analagous to IO#each_line, #each_record is used to go through a
26
45
  # fastq file record by record. It will accept gzipped files as well.
27
46
  #
@@ -33,7 +52,7 @@ class FastqFile < File
33
52
  # FastqFile.open('reads.fq.gz').each_record do |head, seq, desc, qual|
34
53
  # # do some fun stuff here!
35
54
  # end
36
- #
55
+ #
37
56
  # @yield The header, sequence, description and quality string for
38
57
  # each record in the fastq file to the block
39
58
  # @yieldparam header [String] The header of the fastq record without
@@ -54,8 +73,8 @@ class FastqFile < File
54
73
  f = Zlib::GzipReader.open(self)
55
74
  rescue Zlib::GzipFile::Error => e
56
75
  f = self
57
- end
58
-
76
+ end
77
+
59
78
  f.each_line do |line|
60
79
  line.chomp!
61
80
 
@@ -70,10 +89,10 @@ class FastqFile < File
70
89
  quality = Quality.new(line)
71
90
  yield(header, sequence, description, quality)
72
91
  end
73
-
92
+
74
93
  count += 1
75
94
  end
76
-
95
+
77
96
  f.close if f.instance_of?(Zlib::GzipReader)
78
97
  return f
79
98
  end
@@ -20,6 +20,27 @@
20
20
  # depending on what the user provides. Handles, gzipped files.
21
21
  class SeqFile < File
22
22
 
23
+ # Returns the records in the sequence file as a hash map with the
24
+ # headers as keys and the Sequences as values. For a fastq file,
25
+ # acts the same as `FastaFile#to_hash`
26
+ #
27
+ # @example Read a fastA into a hash table.
28
+ # seqs = SeqFile.open('reads.fa').to_hash
29
+ #
30
+ # @return [Hash] A hash with headers as keys, sequences as the
31
+ # values (Sequence objects)
32
+ def to_hash
33
+ first_char = get_first_char(self)
34
+
35
+ if first_char == '>'
36
+ FastaFile.open(self).to_hash
37
+ elsif first_char == '@'
38
+ FastqFile.open(self).to_hash
39
+ else
40
+ raise ArgumentError, "Input does not look like FASTA or FASTQ"
41
+ end
42
+ end
43
+
23
44
  # Analagous to IO#each_line, #each_record will go through a fastA or
24
45
  # fastQ file record by record.
25
46
  #
@@ -54,7 +75,7 @@ class SeqFile < File
54
75
  # @yieldparam sequence [Sequence] The sequence of the record.
55
76
  def each_record
56
77
  first_char = get_first_char(self)
57
-
78
+
58
79
  if first_char == '>'
59
80
  FastaFile.open(self).each_record do |header, sequence|
60
81
  yield(header, sequence)
@@ -65,7 +86,7 @@ class SeqFile < File
65
86
  end
66
87
  else
67
88
  raise ArgumentError, "Input does not look like FASTA or FASTQ"
68
- end
89
+ end
69
90
  end
70
91
 
71
92
  private
@@ -75,7 +96,7 @@ class SeqFile < File
75
96
  handle = Zlib::GzipReader.open(f)
76
97
  rescue Zlib::GzipFile::Error => e
77
98
  handle = f
78
- end
99
+ end
79
100
 
80
101
  handle.each_line.peek[0]
81
102
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.6.2"
20
+ VERSION = "1.7.0"
21
21
  end
data/parse_fasta.gemspec CHANGED
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
28
28
  spec.add_development_dependency "rspec", "~> 2.14"
29
29
  spec.add_development_dependency "bio", "~> 1.4"
30
30
  spec.add_development_dependency "yard", "~> 0.8"
31
+ spec.add_development_dependency "rdiscount"
31
32
  spec.add_development_dependency "coveralls", "~> 0.7"
32
33
  end
@@ -44,6 +44,22 @@ describe FastaFile do
44
44
  end
45
45
  end
46
46
 
47
+ describe "#to_hash" do
48
+ let(:records) { Helpers::RECORDS_MAP }
49
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
50
+ let(:fasta) { FastaFile.open(fname) }
51
+
52
+ it "reads the records into a hash: header as key and seq as val" do
53
+ expect(fasta.to_hash).to eq records
54
+ end
55
+
56
+ it "passes the values as Sequence objects" do
57
+ expect(
58
+ fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
59
+ ).to eq true
60
+ end
61
+ end
62
+
47
63
  describe "#each_record" do
48
64
  let(:records) { Helpers::RECORDS }
49
65
 
@@ -43,9 +43,40 @@ describe FastqFile do
43
43
  expect(qual).to be_an_instance_of Quality
44
44
  end
45
45
  end
46
+ end
47
+
48
+ describe "#to_hash" do
49
+ let(:records) {
50
+ { "seq1" => { head: "seq1",
51
+ seq: "AACCTTGG",
52
+ desc: "",
53
+ qual: ")#3gTqN8" },
54
+ "seq2 apples" => { head: "seq2 apples",
55
+ seq: "ACTG",
56
+ desc: "seq2 apples",
57
+ qual: "*ujM" }
58
+ }
59
+ }
60
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
61
+ let(:fastq) { FastqFile.open(fname) }
62
+
63
+ it "reads the records into a hash: header as key and seq as val" do
64
+ expect(fastq.to_hash).to eq records
65
+ end
66
+
67
+ it "passes the seqs as Sequence objects" do
68
+ expect(
69
+ fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
70
+ ).to eq true
71
+ end
46
72
 
73
+ it "passes the quals as Quality objects" do
74
+ expect(
75
+ fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
76
+ ).to eq true
77
+ end
47
78
  end
48
-
79
+
49
80
  context "with a 4 line per record fastq file" do
50
81
  describe "#each_record" do
51
82
  context "with a gzipped file" do
@@ -80,6 +111,5 @@ describe FastqFile do
80
111
  end
81
112
  end
82
113
  end
83
- end
114
+ end
84
115
  end
85
-
@@ -19,6 +19,57 @@
19
19
  require 'spec_helper'
20
20
 
21
21
  describe SeqFile do
22
+
23
+ describe "#to_hash" do
24
+ context "when input is a fasta file" do
25
+ let(:records) { Helpers::RECORDS_MAP }
26
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
27
+ let(:fasta) { SeqFile.open(fname) }
28
+
29
+ it "reads the records into a hash: header as key and seq as val" do
30
+ expect(fasta.to_hash).to eq records
31
+ end
32
+
33
+ it "passes the values as Sequence objects" do
34
+ expect(
35
+ fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
36
+ ).to eq true
37
+ end
38
+ end
39
+
40
+ context "when input is a fastq file" do
41
+ let(:records) {
42
+ { "seq1" => { head: "seq1",
43
+ seq: "AACCTTGG",
44
+ desc: "",
45
+ qual: ")#3gTqN8" },
46
+ "seq2 apples" => { head: "seq2 apples",
47
+ seq: "ACTG",
48
+ desc: "seq2 apples",
49
+ qual: "*ujM" }
50
+ }
51
+ }
52
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
53
+ let(:fastq) { SeqFile.open(fname) }
54
+
55
+ it "reads the records into a hash: header as key and seq as val" do
56
+ expect(fastq.to_hash).to eq records
57
+ end
58
+
59
+ it "passes the seqs as Sequence objects" do
60
+ expect(
61
+ fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
62
+ ).to eq true
63
+ end
64
+
65
+ it "passes the quals as Quality objects" do
66
+ expect(
67
+ fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
68
+ ).to eq true
69
+ end
70
+ end
71
+ end
72
+
22
73
  describe "#each_record" do
23
74
 
24
75
  context "when input is a fasta file" do
@@ -138,8 +189,8 @@ describe SeqFile do
138
189
  err_msg = "Input does not look like FASTA or FASTQ"
139
190
 
140
191
  expect { SeqFile.open(fname).each_record do |h, s|
141
- puts [h, s].join ' '
142
- end
192
+ puts [h, s].join ' '
193
+ end
143
194
  }.to raise_error(ArgumentError, err_msg)
144
195
  end
145
196
  end
data/spec/spec_helper.rb CHANGED
@@ -31,6 +31,17 @@ module Helpers
31
31
  ["seq3", "yyyyyyyyyyyyyyyNNN"],
32
32
  ["empty seq at end", ""]]
33
33
 
34
+ RECORDS_MAP = {
35
+ "empty seq at beginning" => "",
36
+ "seq1 is fun" => "AACTGGNNN",
37
+ "seq2" => "AATCCTGNNN",
38
+ "empty seq 1" => "",
39
+ "empty seq 2" => "",
40
+ "seq3" => "yyyyyyyyyyyyyyyNNN",
41
+ "empty seq at end" => ""
42
+ }
43
+
44
+
34
45
  TRUTHY_RECORDS = [["empty seq at beginning", []],
35
46
  ["seq1 is fun", ["AACTGGNNN"]],
36
47
  ["seq2", ["AAT", "CCTGNNN"]],
metadata CHANGED
@@ -1,97 +1,111 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.2
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-22 00:00:00.000000000 Z
11
+ date: 2015-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.6'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.3'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.3'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: bio
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.4'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ~>
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.4'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: yard
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - "~>"
73
+ - - ~>
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.8'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - "~>"
80
+ - - ~>
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.8'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdiscount
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: coveralls
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - "~>"
101
+ - - ~>
88
102
  - !ruby/object:Gem::Version
89
103
  version: '0.7'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - "~>"
108
+ - - ~>
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0.7'
97
111
  description: Provides nice, programmatic access to fasta and fastq files, as well
@@ -102,9 +116,10 @@ executables: []
102
116
  extensions: []
103
117
  extra_rdoc_files: []
104
118
  files:
105
- - ".coveralls.yml"
106
- - ".gitignore"
107
- - ".travis.yml"
119
+ - .coveralls.yml
120
+ - .gitignore
121
+ - .travis.yml
122
+ - .yardopts
108
123
  - COPYING
109
124
  - Gemfile
110
125
  - README.md
@@ -131,7 +146,7 @@ files:
131
146
  - test_files/test.fq.gz
132
147
  homepage: https://github.com/mooreryan/parse_fasta
133
148
  licenses:
134
- - 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
149
+ - ! 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
135
150
  metadata: {}
136
151
  post_install_message:
137
152
  rdoc_options: []
@@ -139,17 +154,17 @@ require_paths:
139
154
  - lib
140
155
  required_ruby_version: !ruby/object:Gem::Requirement
141
156
  requirements:
142
- - - ">="
157
+ - - ! '>='
143
158
  - !ruby/object:Gem::Version
144
159
  version: 1.9.3
145
160
  required_rubygems_version: !ruby/object:Gem::Requirement
146
161
  requirements:
147
- - - ">="
162
+ - - ! '>='
148
163
  - !ruby/object:Gem::Version
149
164
  version: '0'
150
165
  requirements: []
151
166
  rubyforge_project:
152
- rubygems_version: 2.4.6
167
+ rubygems_version: 2.4.8
153
168
  signing_key:
154
169
  specification_version: 4
155
170
  summary: Easy-peasy parsing of fasta & fastq files!