parse_fasta 1.6.2 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,15 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 704dd834a4d948441636422507ca52a9fd44141b
4
- data.tar.gz: 83d5170a4636337ba9dff578c0de90683f610172
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ YjUzNjQwMGYzNDQzNWNiYzczOTFmYmE3OGY2YWI0MmJkNjhkNmE0ZA==
5
+ data.tar.gz: !binary |-
6
+ ODFjMmIxNWE5MDc1ODE0MGUyOWRmYTg5YmFkNWJkMjE1MzUxMjg1Yg==
5
7
  SHA512:
6
- metadata.gz: ec6f5c7bca2f75e2ec82d18efe31f08e72ca5e3a67f7a7c2c61f5cc5392813396246d601b95c34697dd8a3b133f9655608733ff05e36912f33a6858fe2815c37
7
- data.tar.gz: ba8e6522891519acf8a22d019500cd4ec46e01fa39b954f52f3b4c2c68586ae6e431a6eb2375799ab52fed0ada667c731be0b34a54ccce732d0f06f1d8dfa7f6
8
+ metadata.gz: !binary |-
9
+ NmFmNjcwODhhM2E0NmM2NzE1OTA0NmYwOGVmZTM0NjA3Mzg4ZmUyNWUwYjNm
10
+ ZTRjNmE0NWI2MTc1NWM5NTMwM2I5YjEzYzQxMjcwYTJhMjdiMmVkZmJmNWVj
11
+ ZTU1ODY1OGE1ZGRiOGQ3ZGU1NTIyYWI0ZTJlN2I4YTBiMGIzOTg=
12
+ data.tar.gz: !binary |-
13
+ ZTM2YTBhNGU4N2E4ZDJkYTI1OWNiMTI5YjQ5Njc1ZTlmYTI4MzMxYWRmZDNk
14
+ N2U0Zjk1N2M1M2YzZWMxNjk0NDZlNmIzYjFjYTAzNWQ2YjllYWQxYjk2ZDAx
15
+ NDRiYTc3ZjFiNzIxM2VmNjQxODlkMjgxZWQ4MTRlY2M0NjRkOWQ=
data/.yardopts ADDED
@@ -0,0 +1 @@
1
+ --markup-provider rdiscount
data/README.md CHANGED
@@ -27,7 +27,7 @@ lightweight than BioRuby. And more fun! ;)
27
27
  ## Documentation ##
28
28
 
29
29
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.6.2/frames)
30
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.7.0/frames)
31
31
  for the full api documentation.
32
32
 
33
33
  ## Usage ##
@@ -60,8 +60,16 @@ What if you don't care if the input is a fastA or a fastQ? No problem!
60
60
  puts [header, seq].join "\t"
61
61
  end
62
62
 
63
+ Read fasta file into a hash.
64
+
65
+ seqs = FastaFile.open(ARGV[0]).to_hash
66
+
63
67
  ## Versions ##
64
68
 
69
+ ### 1.7 ###
70
+
71
+ Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
72
+
65
73
  ### 1.6 ###
66
74
 
67
75
  Added `SeqFile` class, which accepts either fastA or fastQ files. It
@@ -43,6 +43,23 @@ class FastaFile < File
43
43
  super
44
44
  end
45
45
 
46
+ # Returns the records in the fasta file as a hash map with the
47
+ # headers as keys and the Sequences as values.
48
+ #
49
+ # @example Read a fastA into a hash table.
50
+ # seqs = FastaFile.open('reads.fa').to_hash
51
+ #
52
+ # @return [Hash] A hash with headers as keys, sequences as the
53
+ # values (Sequence objects)
54
+ def to_hash
55
+ hash = {}
56
+ self.each_record do |head, seq|
57
+ hash[head] = seq
58
+ end
59
+
60
+ hash
61
+ end
62
+
46
63
  # Analagous to IO#each_line, #each_record is used to go through a
47
64
  # fasta file record by record. It will accept gzipped files as well.
48
65
  #
@@ -22,6 +22,25 @@ require 'zlib'
22
22
  # format files. Gzipped files are no problem.
23
23
  class FastqFile < File
24
24
 
25
+ # Returns the records in the fastq file as a hash map with the
26
+ # headers as keys pointing to a hash map like so
27
+ # { "seq1" => { head: "seq1", seq: "ACTG", desc: "", qual: "II3*"} }
28
+ #
29
+ # @example Read a fastQ into a hash table.
30
+ # seqs = FastqFile.open('reads.fq.gz').to_hash
31
+ #
32
+ # @return [Hash] A hash with headers as keys, and a hash map as the
33
+ # value with keys :head, :seq, :desc, :qual, for header, sequence,
34
+ # description, and quality.
35
+ def to_hash
36
+ hash = {}
37
+ self.each_record do |head, seq, desc, qual|
38
+ hash[head] = { head: head, seq: seq, desc: desc, qual: qual }
39
+ end
40
+
41
+ hash
42
+ end
43
+
25
44
  # Analagous to IO#each_line, #each_record is used to go through a
26
45
  # fastq file record by record. It will accept gzipped files as well.
27
46
  #
@@ -33,7 +52,7 @@ class FastqFile < File
33
52
  # FastqFile.open('reads.fq.gz').each_record do |head, seq, desc, qual|
34
53
  # # do some fun stuff here!
35
54
  # end
36
- #
55
+ #
37
56
  # @yield The header, sequence, description and quality string for
38
57
  # each record in the fastq file to the block
39
58
  # @yieldparam header [String] The header of the fastq record without
@@ -54,8 +73,8 @@ class FastqFile < File
54
73
  f = Zlib::GzipReader.open(self)
55
74
  rescue Zlib::GzipFile::Error => e
56
75
  f = self
57
- end
58
-
76
+ end
77
+
59
78
  f.each_line do |line|
60
79
  line.chomp!
61
80
 
@@ -70,10 +89,10 @@ class FastqFile < File
70
89
  quality = Quality.new(line)
71
90
  yield(header, sequence, description, quality)
72
91
  end
73
-
92
+
74
93
  count += 1
75
94
  end
76
-
95
+
77
96
  f.close if f.instance_of?(Zlib::GzipReader)
78
97
  return f
79
98
  end
@@ -20,6 +20,27 @@
20
20
  # depending on what the user provides. Handles, gzipped files.
21
21
  class SeqFile < File
22
22
 
23
+ # Returns the records in the sequence file as a hash map with the
24
+ # headers as keys and the Sequences as values. For a fastq file,
25
+ # acts the same as `FastaFile#to_hash`
26
+ #
27
+ # @example Read a fastA into a hash table.
28
+ # seqs = SeqFile.open('reads.fa').to_hash
29
+ #
30
+ # @return [Hash] A hash with headers as keys, sequences as the
31
+ # values (Sequence objects)
32
+ def to_hash
33
+ first_char = get_first_char(self)
34
+
35
+ if first_char == '>'
36
+ FastaFile.open(self).to_hash
37
+ elsif first_char == '@'
38
+ FastqFile.open(self).to_hash
39
+ else
40
+ raise ArgumentError, "Input does not look like FASTA or FASTQ"
41
+ end
42
+ end
43
+
23
44
  # Analagous to IO#each_line, #each_record will go through a fastA or
24
45
  # fastQ file record by record.
25
46
  #
@@ -54,7 +75,7 @@ class SeqFile < File
54
75
  # @yieldparam sequence [Sequence] The sequence of the record.
55
76
  def each_record
56
77
  first_char = get_first_char(self)
57
-
78
+
58
79
  if first_char == '>'
59
80
  FastaFile.open(self).each_record do |header, sequence|
60
81
  yield(header, sequence)
@@ -65,7 +86,7 @@ class SeqFile < File
65
86
  end
66
87
  else
67
88
  raise ArgumentError, "Input does not look like FASTA or FASTQ"
68
- end
89
+ end
69
90
  end
70
91
 
71
92
  private
@@ -75,7 +96,7 @@ class SeqFile < File
75
96
  handle = Zlib::GzipReader.open(f)
76
97
  rescue Zlib::GzipFile::Error => e
77
98
  handle = f
78
- end
99
+ end
79
100
 
80
101
  handle.each_line.peek[0]
81
102
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.6.2"
20
+ VERSION = "1.7.0"
21
21
  end
data/parse_fasta.gemspec CHANGED
@@ -28,5 +28,6 @@ Gem::Specification.new do |spec|
28
28
  spec.add_development_dependency "rspec", "~> 2.14"
29
29
  spec.add_development_dependency "bio", "~> 1.4"
30
30
  spec.add_development_dependency "yard", "~> 0.8"
31
+ spec.add_development_dependency "rdiscount"
31
32
  spec.add_development_dependency "coveralls", "~> 0.7"
32
33
  end
@@ -44,6 +44,22 @@ describe FastaFile do
44
44
  end
45
45
  end
46
46
 
47
+ describe "#to_hash" do
48
+ let(:records) { Helpers::RECORDS_MAP }
49
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
50
+ let(:fasta) { FastaFile.open(fname) }
51
+
52
+ it "reads the records into a hash: header as key and seq as val" do
53
+ expect(fasta.to_hash).to eq records
54
+ end
55
+
56
+ it "passes the values as Sequence objects" do
57
+ expect(
58
+ fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
59
+ ).to eq true
60
+ end
61
+ end
62
+
47
63
  describe "#each_record" do
48
64
  let(:records) { Helpers::RECORDS }
49
65
 
@@ -43,9 +43,40 @@ describe FastqFile do
43
43
  expect(qual).to be_an_instance_of Quality
44
44
  end
45
45
  end
46
+ end
47
+
48
+ describe "#to_hash" do
49
+ let(:records) {
50
+ { "seq1" => { head: "seq1",
51
+ seq: "AACCTTGG",
52
+ desc: "",
53
+ qual: ")#3gTqN8" },
54
+ "seq2 apples" => { head: "seq2 apples",
55
+ seq: "ACTG",
56
+ desc: "seq2 apples",
57
+ qual: "*ujM" }
58
+ }
59
+ }
60
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
61
+ let(:fastq) { FastqFile.open(fname) }
62
+
63
+ it "reads the records into a hash: header as key and seq as val" do
64
+ expect(fastq.to_hash).to eq records
65
+ end
66
+
67
+ it "passes the seqs as Sequence objects" do
68
+ expect(
69
+ fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
70
+ ).to eq true
71
+ end
46
72
 
73
+ it "passes the quals as Quality objects" do
74
+ expect(
75
+ fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
76
+ ).to eq true
77
+ end
47
78
  end
48
-
79
+
49
80
  context "with a 4 line per record fastq file" do
50
81
  describe "#each_record" do
51
82
  context "with a gzipped file" do
@@ -80,6 +111,5 @@ describe FastqFile do
80
111
  end
81
112
  end
82
113
  end
83
- end
114
+ end
84
115
  end
85
-
@@ -19,6 +19,57 @@
19
19
  require 'spec_helper'
20
20
 
21
21
  describe SeqFile do
22
+
23
+ describe "#to_hash" do
24
+ context "when input is a fasta file" do
25
+ let(:records) { Helpers::RECORDS_MAP }
26
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa.gz" }
27
+ let(:fasta) { SeqFile.open(fname) }
28
+
29
+ it "reads the records into a hash: header as key and seq as val" do
30
+ expect(fasta.to_hash).to eq records
31
+ end
32
+
33
+ it "passes the values as Sequence objects" do
34
+ expect(
35
+ fasta.to_hash.values.all? { |val| val.instance_of? Sequence }
36
+ ).to eq true
37
+ end
38
+ end
39
+
40
+ context "when input is a fastq file" do
41
+ let(:records) {
42
+ { "seq1" => { head: "seq1",
43
+ seq: "AACCTTGG",
44
+ desc: "",
45
+ qual: ")#3gTqN8" },
46
+ "seq2 apples" => { head: "seq2 apples",
47
+ seq: "ACTG",
48
+ desc: "seq2 apples",
49
+ qual: "*ujM" }
50
+ }
51
+ }
52
+ let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fq.gz" }
53
+ let(:fastq) { SeqFile.open(fname) }
54
+
55
+ it "reads the records into a hash: header as key and seq as val" do
56
+ expect(fastq.to_hash).to eq records
57
+ end
58
+
59
+ it "passes the seqs as Sequence objects" do
60
+ expect(
61
+ fastq.to_hash.values.all? { |val| val[:seq].instance_of? Sequence }
62
+ ).to eq true
63
+ end
64
+
65
+ it "passes the quals as Quality objects" do
66
+ expect(
67
+ fastq.to_hash.values.all? { |val| val[:qual].instance_of? Quality }
68
+ ).to eq true
69
+ end
70
+ end
71
+ end
72
+
22
73
  describe "#each_record" do
23
74
 
24
75
  context "when input is a fasta file" do
@@ -138,8 +189,8 @@ describe SeqFile do
138
189
  err_msg = "Input does not look like FASTA or FASTQ"
139
190
 
140
191
  expect { SeqFile.open(fname).each_record do |h, s|
141
- puts [h, s].join ' '
142
- end
192
+ puts [h, s].join ' '
193
+ end
143
194
  }.to raise_error(ArgumentError, err_msg)
144
195
  end
145
196
  end
data/spec/spec_helper.rb CHANGED
@@ -31,6 +31,17 @@ module Helpers
31
31
  ["seq3", "yyyyyyyyyyyyyyyNNN"],
32
32
  ["empty seq at end", ""]]
33
33
 
34
+ RECORDS_MAP = {
35
+ "empty seq at beginning" => "",
36
+ "seq1 is fun" => "AACTGGNNN",
37
+ "seq2" => "AATCCTGNNN",
38
+ "empty seq 1" => "",
39
+ "empty seq 2" => "",
40
+ "seq3" => "yyyyyyyyyyyyyyyNNN",
41
+ "empty seq at end" => ""
42
+ }
43
+
44
+
34
45
  TRUTHY_RECORDS = [["empty seq at beginning", []],
35
46
  ["seq1 is fun", ["AACTGGNNN"]],
36
47
  ["seq2", ["AAT", "CCTGNNN"]],
metadata CHANGED
@@ -1,97 +1,111 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.2
4
+ version: 1.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-05-22 00:00:00.000000000 Z
11
+ date: 2015-07-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.6'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: '10.3'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: '10.3'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rspec
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: '2.14'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: '2.14'
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: bio
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
- - - "~>"
59
+ - - ~>
60
60
  - !ruby/object:Gem::Version
61
61
  version: '1.4'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
- - - "~>"
66
+ - - ~>
67
67
  - !ruby/object:Gem::Version
68
68
  version: '1.4'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: yard
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - "~>"
73
+ - - ~>
74
74
  - !ruby/object:Gem::Version
75
75
  version: '0.8'
76
76
  type: :development
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - "~>"
80
+ - - ~>
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.8'
83
+ - !ruby/object:Gem::Dependency
84
+ name: rdiscount
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ! '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: coveralls
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
- - - "~>"
101
+ - - ~>
88
102
  - !ruby/object:Gem::Version
89
103
  version: '0.7'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
- - - "~>"
108
+ - - ~>
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0.7'
97
111
  description: Provides nice, programmatic access to fasta and fastq files, as well
@@ -102,9 +116,10 @@ executables: []
102
116
  extensions: []
103
117
  extra_rdoc_files: []
104
118
  files:
105
- - ".coveralls.yml"
106
- - ".gitignore"
107
- - ".travis.yml"
119
+ - .coveralls.yml
120
+ - .gitignore
121
+ - .travis.yml
122
+ - .yardopts
108
123
  - COPYING
109
124
  - Gemfile
110
125
  - README.md
@@ -131,7 +146,7 @@ files:
131
146
  - test_files/test.fq.gz
132
147
  homepage: https://github.com/mooreryan/parse_fasta
133
148
  licenses:
134
- - 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
149
+ - ! 'GPLv3: http://www.gnu.org/licenses/gpl.txt'
135
150
  metadata: {}
136
151
  post_install_message:
137
152
  rdoc_options: []
@@ -139,17 +154,17 @@ require_paths:
139
154
  - lib
140
155
  required_ruby_version: !ruby/object:Gem::Requirement
141
156
  requirements:
142
- - - ">="
157
+ - - ! '>='
143
158
  - !ruby/object:Gem::Version
144
159
  version: 1.9.3
145
160
  required_rubygems_version: !ruby/object:Gem::Requirement
146
161
  requirements:
147
- - - ">="
162
+ - - ! '>='
148
163
  - !ruby/object:Gem::Version
149
164
  version: '0'
150
165
  requirements: []
151
166
  rubyforge_project:
152
- rubygems_version: 2.4.6
167
+ rubygems_version: 2.4.8
153
168
  signing_key:
154
169
  specification_version: 4
155
170
  summary: Easy-peasy parsing of fasta & fastq files!