parse_fasta 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b140f0d7d54cdb4f0938bbcd5aa2484f069fcb50
4
- data.tar.gz: 9ff3a6ace4f834da41b298075aa470cb730bd4f6
3
+ metadata.gz: ae03847d56379d572d6118a176876a1e11a21a49
4
+ data.tar.gz: ffb84a464f8f7057f3a363b2b872f4b60fcca9c2
5
5
  SHA512:
6
- metadata.gz: f9b9dd477b595a28970f2f299503f360c7fb353a62013b2e848f353a296ddebf1def65f5f6a5f02ea046d20041f28cfe0c32581cdc10793cd6ac75daec0576ec
7
- data.tar.gz: aa1e8e8aff902c6a16315fa6513f755f92887ae79c0c4cc8f42d7ae7c530e8885250885c7397cf1861d47136ceb6012deed7fab9be3d98da0d3d4c4d11720f60
6
+ metadata.gz: 0b6b3694de307b868df3b1d5b38b08e539e545827d8d5f47a8b41a550f60d6354164b0e563b6eacf39ab63bd844c2ab68578fc0bfbc5fd5a1f3a63b31a09cfc9
7
+ data.tar.gz: 719f6fb5c112b06ecf969662617de3bdc02156a09fcb542ff0a9553de55f2d7d6400fc2d6384be27c780517b1ab95887991a8a22c9d05ab3070e7d0e2b05962f
data/README.md CHANGED
@@ -1,5 +1,7 @@
1
1
  # parse_fasta #
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/parse_fasta.svg)](http://badge.fury.io/rb/parse_fasta)
4
+
3
5
  So you want to parse a fasta file...
4
6
 
5
7
  ## Installation ##
@@ -27,7 +29,7 @@ and over.
27
29
  ## Documentation ##
28
30
 
29
31
  Checkout
30
- [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.1.0/frames)
32
+ [parse_fasta docs](http://rubydoc.info/gems/parse_fasta/1.3.0/frames)
31
33
  to see the full documentation.
32
34
 
33
35
  ## Usage ##
@@ -56,6 +58,45 @@ Now we can parse fastq files as well!
56
58
 
57
59
  ## Versions ##
58
60
 
61
+ ### 1.3.0 ###
62
+
63
+ Add additional functionality to `each_record` method.
64
+
65
+ #### Info ####
66
+
67
+ I often like to use the fasta format for other things like so
68
+
69
+ >fruits
70
+ pineapple
71
+ pear
72
+ peach
73
+ >veggies
74
+ peppers
75
+ parsnip
76
+ peas
77
+
78
+ rather than having this in a two column file like this
79
+
80
+ fruit,pineapple
81
+ fruit,pear
82
+ fruit,peach
83
+ veggie,peppers
84
+ veggie,parsnip
85
+ veggie,peas
86
+
87
+ So I added functionality to `each_record` to keep each line a record
88
+ separate in an array. Here's an example using the above file.
89
+
90
+ info = []
91
+ FastaFile.open(f, 'r').each_record(1) do |header, lines|
92
+ info << [header, lines]
93
+ end
94
+
95
+ Then info will contain the following arrays
96
+
97
+ ['fruits', ['pineapple', 'pear', 'peach']],
98
+ ['veggies', ['peppers', 'parsnip', 'peas']]
99
+
59
100
  ### 1.2.0 ###
60
101
 
61
102
  Added `mean_qual` method to the `Quality` class.
@@ -22,20 +22,42 @@ class FastaFile < File
22
22
  # Analagous to File#each_line, #each_record is used to go through a
23
23
  # fasta file record by record.
24
24
  #
25
- # @example Parsing a fasta file
25
+ # @param separate_lines [Object] If truthy, separate lines of record
26
+ # into an array, but if falsy, yield a Sequence object for the
27
+ # sequence instead.
28
+ #
29
+ # @example Parsing a fasta file (default behavior)
26
30
  # FastaFile.open('reads.fna', 'r').each_record do |header, sequence|
27
31
  # puts [header, sequence.gc].join("\t")
28
32
  # end
29
33
  #
34
+ # @example Parsing a fasta file (with truthy value param)
35
+ # FastaFile.open('reads.fna','r').each_record(1) do |header, sequence|
36
+ # # header => 'sequence_1'
37
+ # # sequence => ['AACTG', 'AGTCGT', ... ]
38
+ # end
39
+ #
30
40
  # @yield The header and sequence for each record in the fasta
31
41
  # file to the block
42
+ #
32
43
  # @yieldparam header [String] The header of the fasta record without
33
44
  # the leading '>'
34
- # @yieldparam sequence [Sequence] The sequence of the fasta record
35
- def each_record
36
- self.each("\n>") do |line|
37
- header, sequence = parse_line(line)
38
- yield(header.strip, Sequence.new(sequence))
45
+ #
46
+ # @yieldparam sequence [Sequence, Array<String>] The sequence of the
47
+ # fasta record. If `separate_lines` is falsy (the default
48
+ # behavior), will be Sequence, but if truthy will be
49
+ # Array<String>.
50
+ def each_record(separate_lines=nil)
51
+ if separate_lines
52
+ self.each("\n>") do |line|
53
+ header, sequence = parse_line_separately(line)
54
+ yield(header.strip, sequence)
55
+ end
56
+ else
57
+ self.each("\n>") do |line|
58
+ header, sequence = parse_line(line)
59
+ yield(header.strip, Sequence.new(sequence))
60
+ end
39
61
  end
40
62
  end
41
63
 
@@ -43,4 +65,13 @@ class FastaFile < File
43
65
  def parse_line(line)
44
66
  line.chomp.split("\n", 2).map { |s| s.gsub(/\n|>/, '') }
45
67
  end
68
+
69
+ def parse_line_separately(line)
70
+ #line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
71
+ header, sequence =
72
+ line.chomp.split("\n", 2).map { |s| s.gsub(/>/, '') }
73
+ sequences = sequence.split("\n").reject { |s| s.empty? }
74
+
75
+ [header, sequences]
76
+ end
46
77
  end
@@ -17,5 +17,5 @@
17
17
  # along with parse_fasta. If not, see <http://www.gnu.org/licenses/>.
18
18
 
19
19
  module ParseFasta
20
- VERSION = "1.2.0"
20
+ VERSION = "1.3.0"
21
21
  end
@@ -22,25 +22,39 @@ describe FastaFile do
22
22
  describe "#each_record" do
23
23
 
24
24
  let(:fname) { "#{File.dirname(__FILE__)}/../../test_files/test.fa" }
25
- it "yields a block with header and sequence for each record in a fasta file" do
26
- seqs = []
27
- FastaFile.open(fname, 'r').each_record do |header, sequence|
28
- seqs << [header, sequence]
25
+
26
+ context "with no arguments" do
27
+ it "yields header and sequence for each record in a fasta file" do
28
+ seqs = []
29
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
30
+ seqs << [header, sequence]
31
+ end
32
+
33
+ expect(seqs).to eq([["seq1 is fun", "AACTGGNNN"],
34
+ ["seq2", "AATCCTGNNN"],
35
+ ["seq3", "yyyyyyyyyyyyyyyNNN"]])
36
+
29
37
  end
30
-
31
- expect(seqs).to eq([["seq1 is fun", "AACTGGend"],
32
- ["seq2", "AATCCTGend"],
33
- ["seq3", "yyyyyyyyyyyyyyyend"]])
34
38
 
39
+ it "yields sequence of type Sequence as second parameter" do
40
+ FastaFile.open(fname, 'r').each_record do |header, sequence|
41
+ expect(sequence).to be_an_instance_of Sequence
42
+ break
43
+ end
44
+ end
35
45
  end
36
46
 
37
- it "passes header of type string as first parameter" do
38
- sequence_class = nil
39
- FastaFile.open(fname, 'r').each_record do |header, sequence|
40
- sequence_class = sequence.class
41
- break
47
+ context "with a truthy argument" do
48
+ it "yields header and array of lines for each record" do
49
+ seqs = []
50
+ FastaFile.open(fname, 'r').each_record(1) do |header, sequence|
51
+ seqs << [header, sequence]
52
+ end
53
+
54
+ expect(seqs).to eq([["seq1 is fun", ["AACTGGNNN"]],
55
+ ["seq2", ["AAT", "CCTGNNN"]],
56
+ ["seq3", ["yyyyyyyyyy", "yyyyy", "NNN"]]])
42
57
  end
43
- expect(sequence_class).to be Sequence
44
- end
58
+ end
45
59
  end
46
60
  end
@@ -20,8 +20,12 @@ require 'spec_helper'
20
20
  require 'bio'
21
21
 
22
22
  describe Sequence do
23
- describe "#gc" do
24
23
 
24
+ it "inherits from String" do
25
+ expect(Sequence.new('ACTG')).to be_a String
26
+ end
27
+
28
+ describe "#gc" do
25
29
  it "gives the same answer as BioRuby" do
26
30
  s = 'ACtgcGAtcgCgAaTtGgCcnNuU'
27
31
  bioruby_gc = Bio::Sequence::NA.new(s).gc_content
data/test_files/test.fa CHANGED
@@ -1,9 +1,10 @@
1
1
  >seq1 is fun
2
- AACTGGend
2
+ AACTGGNNN
3
3
  >seq2
4
4
  AAT
5
- CCTGend
5
+ CCTGNNN
6
6
  >seq3
7
7
  yyyyyyyyyy
8
+
8
9
  yyyyy
9
- end
10
+ NNN
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parse_fasta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ryan Moore
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-07-03 00:00:00.000000000 Z
11
+ date: 2014-07-24 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler