parse_fasta 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b140f0d7d54cdb4f0938bbcd5aa2484f069fcb50
4
- data.tar.gz: 9ff3a6ace4f834da41b298075aa470cb730bd4f6
3
+ metadata.gz: ae03847d56379d572d6118a176876a1e11a21a49
4
+ data.tar.gz: ffb84a464f8f7057f3a363b2b872f4b60fcca9c2
5
5
  SHA512:
6
- metadata.gz: f9b9dd477b595a28970f2f299503f360c7fb353a62013b2e848f353a296ddebf1def65f5f6a5f02ea046d20041f28cfe0c32581cdc10793cd6ac75daec0576ec
7
- data.tar.gz: aa1e8e8aff902c6a16315fa6513f755f92887ae79c0c4cc8f42d7ae7c530e8885250885c7397cf1861d47136ceb6012deed7fab9be3d98da0d3d4c4d11720f60
6
+ metadata.gz: 0b6b3694de307b868df3b1d5b38b08e539e545827d8d5f47a8b41a550f60d6354164b0e563b6eacf39ab63bd844c2ab68578fc0bfbc5fd5a1f3a63b31a09cfc9
7
+ data.tar.gz: 719f6fb5c112b06ecf969662617de3bdc02156a09fcb542ff0a9553de55f2d7d6400fc2d6384be27c780517b1ab95887991a8a22c9d05ab3070e7d0e2b05962f
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # parse_fasta #
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/parse_fasta.svg)](http://badge.fury.io/rb/parse_fasta)
4
+
3
5
  So you want to parse a fasta file...
4
6
 
5
7
  ## Installation ##
@@ -27,7 +29,7 @@ and over.
27
29
  ## Documentation ##
28
30
 
29
31
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.1.0/frames)
32
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.3.0/frames)
31
33
  to see the full documentation.
32
34
 
33
35
  ## Usage ##
@@ -56,6 +58,45 @@ Now we can parse fastq files as well!
56
58
 
57
59
  ## Versions ##
58
60
 
61
+ ### 1.3.0 ###
62
+
63
+ Add additional functionality to `each_record` method.
64
+
65
+ #### Info ####
66
+
67
+ I often like to use the fasta format for other things like so
68
+
69
+ >fruits
70
+ pineapple
71
+ pear
72
+ peach
73
+ >veggies
74
+ peppers
75
+ parsnip
76
+ peas
77
+
78
+ rather than having this in a two column file like this
79
+
80
+ fruit,pineapple
81
+ fruit,pear
82
+ fruit,peach
83
+ veggie,peppers
84
+ veggie,parsnip
85
+ veggie,peas
86
+
87
+ So I added functionality to `each_record` to keep each line a record
88
+ separate in an array. Here's an example using the above file.
89
+
90
+ info = []
91
+ FastaFile.open(f, 'r').each_record(1) do |header, lines|
92
+ info << [header, lines]
93
+ end
94
+
95
+ Then info will contain the following arrays
96
+
97
+ ['fruits', ['pineapple', 'pear', 'peach']],
98
+ ['veggies', ['peppers', 'parsnip', 'peas']]
99
+
59
100
  ### 1.2.0 ###
60
101
 
61
102
  Added `mean_qual` method to the `Quality` class.
@@ -22,20 +22,42 @@ class FastaFile < File
22
22
  # Analagous to File#each_line, #each_record is used to go through a
23
23
  # fasta file record by record.
24
24
  #
25
- # @example Parsing a fasta file
25
+ # @param separate_lines [Object] If truthy, separate lines of record
26
+ # into an array, but if falsy, yield a Sequence object for the
27
+ # sequence instead.
28
+ #
29
+ # @example Parsing a fasta file (default behavior)
26
30
  # FastaFile.open('reads.fna', 'r').each_record do |header, sequence|
27
31
  # puts [header, sequence.gc].join("\t")
28
32
  # end
29
33
  #
34
+ # @example Parsing a fasta file (with truthy value param)
35
+ # FastaFile.open('reads.fna','r').each_record(1) do |header, sequence|
36
+ # # header => 'sequence_1'
37
+ # # sequence => ['AACTG', 'AGTCGT', ... ]
38
+ # end
39
+ #
30
40
  # @yield The header and sequence for each record in the fasta
31
41
  # file to the block
42
+ #
32
43
  # @yieldparam header [String] The header of the fasta record without
33
44
  # the leading '>'
34
- # @yieldparam sequence [Sequence] The sequence of the fasta record
35
- def each_record
36
- self.each("\n>") do |line|
37
- header, sequence = parse_line(line)
38
- yield(header.strip, Sequence.new(sequence))
45
+ #
46
+ # @yieldparam sequence [Sequence, Array<String>] The sequence of the
47
+ # fasta record. If `separate_lines` is falsy (the default
48
+ # behavior), will be Sequence, but if truthy will be
49
+ # Array<String>.
50
+ def each_record(separate_lines=nil)
51
+ if separate_lines
52
+ self.each("\n>") do |line|
53
+ header, sequence = parse_line_separately(line)
54
+ yield(header.strip, sequence)
55
+ end
56
+ else
57
+ self.each("\n>") do |line|
58
+ header, sequence = parse_line(line)
59
+ yield(header.strip, Sequence.new(sequence))
60
+ end
39
61
  end
40
62
  end
41
63
 
@@ -43,4 +65,13 @@ class FastaFile < File
43
65
  def parse_line(line)
44
66
  line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
45
67
  end
68
+
69
+ def parse_line_separately(line)
70
+ #line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
71
+ header, sequence =
72
+ line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
73
+ sequences = sequence.split("\n").reject { |s| s.empty? }
74
+
75
+ [header, sequences]
76
+ end
46
77
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.2.0"
20
+ VERSION = "1.3.0"
21
21
  end
@@ -22,25 +22,39 @@ describe FastaFile do
22
22
  describe "#each_record" do
23
23
 
24
24
  let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
25
- it "yields a block with header and sequence for each record in a fasta file" do
26
- seqs = []
27
- FastaFile.open(fname, 'r').each_record do |header, sequence|
28
- seqs << [header, sequence]
25
+
26
+ context "with no arguments" do
27
+ it "yields header and sequence for each record in a fasta file" do
28
+ seqs = []
29
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
30
+ seqs << [header, sequence]
31
+ end
32
+
33
+ expect(seqs).to eq([["seq1 is fun", "AACTGGNNN"],
34
+ ["seq2", "AATCCTGNNN"],
35
+ ["seq3", "yyyyyyyyyyyyyyyNNN"]])
36
+
29
37
  end
30
-
31
- expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
32
- ["seq2", "AATCCTGend"],
33
- ["seq3", "yyyyyyyyyyyyyyyend"]])
34
38
 
39
+ it "yields sequence of type Sequence as second parameter" do
40
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
41
+ expect(sequence).to be_an_instance_of Sequence
42
+ break
43
+ end
44
+ end
35
45
  end
36
46
 
37
- it "passes header of type string as first parameter" do
38
- sequence_class = nil
39
- FastaFile.open(fname, 'r').each_record do |header, sequence|
40
- sequence_class = sequence.class
41
- break
47
+ context "with a truthy argument" do
48
+ it "yields header and array of lines for each record" do
49
+ seqs = []
50
+ FastaFile.open(fname, 'r').each_record(1) do |header, sequence|
51
+ seqs << [header, sequence]
52
+ end
53
+
54
+ expect(seqs).to eq([["seq1 is fun", ["AACTGGNNN"]],
55
+ ["seq2", ["AAT", "CCTGNNN"]],
56
+ ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]])
42
57
  end
43
- expect(sequence_class).to be Sequence
44
- end
58
+ end
45
59
  end
46
60
  end
@@ -20,8 +20,12 @@ require 'spec_helper'
20
20
  require 'bio'
21
21
 
22
22
  describe Sequence do
23
- describe "#gc" do
24
23
 
24
+ it "inherits from String" do
25
+ expect(Sequence.new('ACTG')).to be_a String
26
+ end
27
+
28
+ describe "#gc" do
25
29
  it "gives the same answer as BioRuby" do
26
30
  s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
27
31
  bioruby_gc = Bio::Sequence::NA.new(s).gc_content
data/test_files/test.fa CHANGED
@@ -1,9 +1,10 @@
1
1
  >seq1 is fun
2
- AACTGGend
2
+ AACTGGNNN
3
3
  >seq2
4
4
  AAT
5
- CCTGend
5
+ CCTGNNN
6
6
  >seq3
7
7
  yyyyyyyyyy
8
+
8
9
  yyyyy
9
- end
10
+ NNN
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-03 00:00:00.000000000 Z
11
+ date: 2014-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler