parse_fasta 2.4.2 → 2.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +3 -0
- data/.travis.yml +32 -3
- data/README.md +4 -0
- data/lib/parse_fasta/seq_file.rb +5 -1
- data/lib/parse_fasta/version.rb +1 -1
- data/parse_fasta.gemspec +18 -20
- metadata +29 -32
- data/.yardopts +0 -1
- data/CHANGELOG.md +0 -194
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b58d57b0b3d32e90276f0a95e1bede38e0573439
|
4
|
+
data.tar.gz: 6577ae5b5793f164526b24c2f748a9d782150653
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c4d717b98e4f8263b9e233bbf398cc09813a34c22dd5899fc4281909869f10776f3298412d546e9546acb423747b5e065d636e1ebbcdb3cb2e3d20bdc5bf2530
|
7
|
+
data.tar.gz: 222bb97de866d13b3e8eeac9208f0f3c838e6e00f8833962bae299de59045b7a9c2a9f58f4a95b81777a1fe0161c4646322dfb0d69e214ed4a7218d7bcfe60f0
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -7,12 +7,41 @@ rvm:
|
|
7
7
|
- 2.3
|
8
8
|
- 2.4
|
9
9
|
- 2.5
|
10
|
-
|
10
|
+
- ruby-head
|
11
|
+
- jruby-head
|
12
|
+
|
13
|
+
jdk:
|
14
|
+
- oraclejdk8
|
15
|
+
- oraclejdk9
|
16
|
+
|
17
|
+
matrix:
|
18
|
+
exclude:
|
19
|
+
- rvm: 2.1
|
20
|
+
jdk: oraclejdk8
|
21
|
+
- rvm: 2.1
|
22
|
+
jdk: oraclejdk9
|
23
|
+
- rvm: 2.2
|
24
|
+
jdk: oraclejdk8
|
25
|
+
- rvm: 2.2
|
26
|
+
jdk: oraclejdk9
|
27
|
+
- rvm: 2.3
|
28
|
+
jdk: oraclejdk8
|
29
|
+
- rvm: 2.3
|
30
|
+
jdk: oraclejdk9
|
31
|
+
- rvm: 2.4
|
32
|
+
jdk: oraclejdk8
|
33
|
+
- rvm: 2.4
|
34
|
+
jdk: oraclejdk9
|
35
|
+
- rvm: 2.5
|
36
|
+
jdk: oraclejdk8
|
37
|
+
- rvm: 2.5
|
38
|
+
jdk: oraclejdk9
|
39
|
+
|
11
40
|
script: 'bundle exec rake'
|
12
41
|
|
13
42
|
notifications:
|
14
43
|
email:
|
15
44
|
recipients:
|
16
45
|
- moorer@udel.edu
|
17
|
-
on_failure:
|
18
|
-
on_success:
|
46
|
+
on_failure: always
|
47
|
+
on_success: always
|
data/README.md
CHANGED
data/lib/parse_fasta/seq_file.rb
CHANGED
@@ -209,6 +209,8 @@ module ParseFasta
|
|
209
209
|
line_reader
|
210
210
|
end
|
211
211
|
|
212
|
+
# Get the first char of the file whether it is gzip'd or not. No
|
213
|
+
# need to rewind the stream afterwards.
|
212
214
|
def get_first_char fname
|
213
215
|
if File.exists? fname
|
214
216
|
begin
|
@@ -217,8 +219,10 @@ module ParseFasta
|
|
217
219
|
f = File.open fname
|
218
220
|
end
|
219
221
|
|
222
|
+
|
220
223
|
begin
|
221
|
-
first_char = f.
|
224
|
+
first_char = f.each.peek[0]
|
225
|
+
|
222
226
|
return first_char
|
223
227
|
ensure
|
224
228
|
f.close
|
data/lib/parse_fasta/version.rb
CHANGED
data/parse_fasta.gemspec
CHANGED
@@ -1,34 +1,32 @@
|
|
1
1
|
# coding: utf-8
|
2
|
-
lib = File.expand_path(
|
2
|
+
lib = File.expand_path("../lib", __FILE__)
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
-
require
|
4
|
+
require "parse_fasta/version"
|
5
5
|
|
6
6
|
Gem::Specification.new do |spec|
|
7
|
-
spec.name
|
8
|
-
spec.version
|
9
|
-
spec.authors
|
10
|
-
spec.email
|
11
|
-
spec.summary
|
12
|
-
spec.description
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
spec.
|
17
|
-
spec.license = "GPLv3: http://www.gnu.org/licenses/gpl.txt"
|
7
|
+
spec.name = "parse_fasta"
|
8
|
+
spec.version = ParseFasta::VERSION
|
9
|
+
spec.authors = ["Ryan Moore"]
|
10
|
+
spec.email = ["moorer@udel.edu"]
|
11
|
+
spec.summary = %q{Easy-peasy parsing of fasta & fastq files!}
|
12
|
+
spec.description = <<-EOF
|
13
|
+
Provides nice, programmatic access to fasta and fastq files, as well as providing Sequence and Quality helper classes. No need for BioRuby ;)
|
14
|
+
EOF
|
15
|
+
spec.homepage = "https://github.com/mooreryan/parse_fasta"
|
16
|
+
spec.license = "MIT"
|
18
17
|
|
19
18
|
spec.files = `git ls-files -z`.split("\x0")
|
20
|
-
spec.executables = spec.files.grep(%r{^bin/}) {
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/}) {|f| File.basename(f)}
|
21
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
22
21
|
spec.require_paths = ["lib"]
|
23
22
|
|
24
23
|
spec.required_ruby_version = ">= 2.1"
|
25
24
|
|
26
|
-
spec.add_development_dependency "bundler", "~> 1.
|
27
|
-
spec.add_development_dependency "rake", "~>
|
28
|
-
spec.add_development_dependency "rspec", "~>
|
25
|
+
spec.add_development_dependency "bundler", "~> 1.16", ">= 1.16.1"
|
26
|
+
spec.add_development_dependency "rake", "~> 12.3", ">= 12.3.1"
|
27
|
+
spec.add_development_dependency "rspec", "~> 3.7"
|
29
28
|
spec.add_development_dependency "bio", "~> 1.4"
|
30
|
-
spec.add_development_dependency "yard", "~> 0.
|
31
|
-
spec.add_development_dependency "
|
32
|
-
spec.add_development_dependency "coveralls", "~> 0.7"
|
29
|
+
spec.add_development_dependency "yard", "~> 0.9.12"
|
30
|
+
spec.add_development_dependency "coveralls", "~> 0.8.21"
|
33
31
|
spec.add_development_dependency "benchmark-ips", "~> 2.7", ">= 2.7.2"
|
34
32
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parse_fasta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ryan Moore
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -16,42 +16,54 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.16'
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 1.16.1
|
20
23
|
type: :development
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
27
|
- - "~>"
|
25
28
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
29
|
+
version: '1.16'
|
30
|
+
- - ">="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 1.16.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
34
|
name: rake
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
39
|
+
version: '12.3'
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
version: 12.3.1
|
34
43
|
type: :development
|
35
44
|
prerelease: false
|
36
45
|
version_requirements: !ruby/object:Gem::Requirement
|
37
46
|
requirements:
|
38
47
|
- - "~>"
|
39
48
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
49
|
+
version: '12.3'
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: 12.3.1
|
41
53
|
- !ruby/object:Gem::Dependency
|
42
54
|
name: rspec
|
43
55
|
requirement: !ruby/object:Gem::Requirement
|
44
56
|
requirements:
|
45
57
|
- - "~>"
|
46
58
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
59
|
+
version: '3.7'
|
48
60
|
type: :development
|
49
61
|
prerelease: false
|
50
62
|
version_requirements: !ruby/object:Gem::Requirement
|
51
63
|
requirements:
|
52
64
|
- - "~>"
|
53
65
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
66
|
+
version: '3.7'
|
55
67
|
- !ruby/object:Gem::Dependency
|
56
68
|
name: bio
|
57
69
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,42 +84,28 @@ dependencies:
|
|
72
84
|
requirements:
|
73
85
|
- - "~>"
|
74
86
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
87
|
+
version: 0.9.12
|
76
88
|
type: :development
|
77
89
|
prerelease: false
|
78
90
|
version_requirements: !ruby/object:Gem::Requirement
|
79
91
|
requirements:
|
80
92
|
- - "~>"
|
81
93
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rdiscount
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
94
|
+
version: 0.9.12
|
97
95
|
- !ruby/object:Gem::Dependency
|
98
96
|
name: coveralls
|
99
97
|
requirement: !ruby/object:Gem::Requirement
|
100
98
|
requirements:
|
101
99
|
- - "~>"
|
102
100
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
101
|
+
version: 0.8.21
|
104
102
|
type: :development
|
105
103
|
prerelease: false
|
106
104
|
version_requirements: !ruby/object:Gem::Requirement
|
107
105
|
requirements:
|
108
106
|
- - "~>"
|
109
107
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
108
|
+
version: 0.8.21
|
111
109
|
- !ruby/object:Gem::Dependency
|
112
110
|
name: benchmark-ips
|
113
111
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,8 +126,10 @@ dependencies:
|
|
128
126
|
- - ">="
|
129
127
|
- !ruby/object:Gem::Version
|
130
128
|
version: 2.7.2
|
131
|
-
description: Provides nice, programmatic access to fasta and fastq files, as well
|
129
|
+
description: 'Provides nice, programmatic access to fasta and fastq files, as well
|
132
130
|
as providing Sequence and Quality helper classes. No need for BioRuby ;)
|
131
|
+
|
132
|
+
'
|
133
133
|
email:
|
134
134
|
- moorer@udel.edu
|
135
135
|
executables:
|
@@ -142,8 +142,6 @@ files:
|
|
142
142
|
- ".gitignore"
|
143
143
|
- ".rspec"
|
144
144
|
- ".travis.yml"
|
145
|
-
- ".yardopts"
|
146
|
-
- CHANGELOG.md
|
147
145
|
- Gemfile
|
148
146
|
- LICENSE
|
149
147
|
- README.md
|
@@ -182,7 +180,7 @@ files:
|
|
182
180
|
- spec/test_files/with_rec_sep_in_seq.fa
|
183
181
|
homepage: https://github.com/mooreryan/parse_fasta
|
184
182
|
licenses:
|
185
|
-
-
|
183
|
+
- MIT
|
186
184
|
metadata: {}
|
187
185
|
post_install_message:
|
188
186
|
rdoc_options: []
|
@@ -200,7 +198,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
200
198
|
version: '0'
|
201
199
|
requirements: []
|
202
200
|
rubyforge_project:
|
203
|
-
rubygems_version: 2.
|
201
|
+
rubygems_version: 2.6.14
|
204
202
|
signing_key:
|
205
203
|
specification_version: 4
|
206
204
|
summary: Easy-peasy parsing of fasta & fastq files!
|
@@ -228,4 +226,3 @@ test_files:
|
|
228
226
|
- spec/test_files/seqs.fq.gz
|
229
227
|
- spec/test_files/test.rb
|
230
228
|
- spec/test_files/with_rec_sep_in_seq.fa
|
231
|
-
has_rdoc:
|
data/.yardopts
DELETED
@@ -1 +0,0 @@
|
|
1
|
-
--markup-provider rdiscount
|
data/CHANGELOG.md
DELETED
@@ -1,194 +0,0 @@
|
|
1
|
-
## Versions ##
|
2
|
-
|
3
|
-
### 2.3.0 ###
|
4
|
-
|
5
|
-
Allow parsing of fastA files with `>` characters in the sequence with the `check_fasta_seq: false` option.
|
6
|
-
|
7
|
-
### 2.2.0 ###
|
8
|
-
|
9
|
-
Add `id` attribute to `Record`.
|
10
|
-
|
11
|
-
### 2.1.1 ###
|
12
|
-
|
13
|
-
Speed up `Record.new`
|
14
|
-
|
15
|
-
### 2.1.0 ###
|
16
|
-
|
17
|
-
Add `#to_s`, `#to_fasta`, and `#to_fastq` to `Record`.
|
18
|
-
|
19
|
-
### 2.0.0 ###
|
20
|
-
|
21
|
-
A weird feature of `Zlib::GzipReader` made it so that if a gzipped file was created like this.
|
22
|
-
|
23
|
-
```bash
|
24
|
-
gzip -c a.fa > z.fa.gz
|
25
|
-
gzip -c b.fa >> z.fa.gz
|
26
|
-
```
|
27
|
-
|
28
|
-
Then the gzip reader would only read the lines from `a.fa` without some fiddling around. Since this was a pretty low level thing, I just decided to make a bunch of under the hood changes that I've been meaning to get to.
|
29
|
-
|
30
|
-
#### Other things
|
31
|
-
|
32
|
-
- Everything is namespaced under `ParseFasta` module
|
33
|
-
- Removed `FastaFile` and `FastqFile` classes, `SeqFile` only remains
|
34
|
-
- Removed `Sequence` and `Quality` classes. These might get put back in at some point, but I almost never used them anyway
|
35
|
-
- `SeqFile#each_record` yields a `Record` object so you can use the same code to parse fastA and fastQ files
|
36
|
-
- Other stuff that I'm forgetting!
|
37
|
-
|
38
|
-
|
39
|
-
### 1.9.2 ###
|
40
|
-
|
41
|
-
Speed up fastA `each_record` and `each_record_fast`.
|
42
|
-
|
43
|
-
### 1.9.1 ###
|
44
|
-
|
45
|
-
Speed up fastQ `each_record` and `each_record_fast`. Courtesy of
|
46
|
-
[Matthew Ralston](https://github.com/MatthewRalston).
|
47
|
-
|
48
|
-
### 1.9.0 ###
|
49
|
-
|
50
|
-
Added "fast" versions of `each_record` methods
|
51
|
-
(`each_record_fast`). Basically, they return sequences and quality
|
52
|
-
strings as Ruby `Sring` objects instead of aa `Sequence` or `Quality`
|
53
|
-
objects. Also, if the sequence or quality string has spaces, they will
|
54
|
-
be retained. If this is a problem, use the original `each_record`
|
55
|
-
methods.
|
56
|
-
|
57
|
-
### 1.8.2 ###
|
58
|
-
|
59
|
-
Speed up `FastqFile#each_record`.
|
60
|
-
|
61
|
-
### 1.8.1 ###
|
62
|
-
|
63
|
-
An error will be raised if a fasta file has a `>` in the
|
64
|
-
sequence. Sometimes files are not terminated with a newline
|
65
|
-
character. If this is the case, then catting two fasta files will
|
66
|
-
smush the first header of the second file right in with the last
|
67
|
-
sequence of the first file. This is bad, raise an error! ;)
|
68
|
-
|
69
|
-
Example
|
70
|
-
|
71
|
-
>seq1
|
72
|
-
ACTG>seq2
|
73
|
-
ACTG
|
74
|
-
>seq3
|
75
|
-
ACTG
|
76
|
-
|
77
|
-
This will raise `ParseFasta::SequenceFormatError`.
|
78
|
-
|
79
|
-
Also, headers with lots of `>` within are fine now.
|
80
|
-
|
81
|
-
### 1.8 ###
|
82
|
-
|
83
|
-
Add `Sequence#rev_comp`. It can handle IUPAC characters. Since
|
84
|
-
`parse_fasta` doesn't check whether the seq is AA or NA, if called on
|
85
|
-
an amino acid string, things will get weird as it will complement the
|
86
|
-
IUPAC characters in the AA string and leave others.
|
87
|
-
|
88
|
-
### 1.7.2 ###
|
89
|
-
|
90
|
-
Strip spaces (not all whitespace) from `Sequence` and `Quality` strings.
|
91
|
-
|
92
|
-
Some alignment fastas have spaces for easier reading. Strip these
|
93
|
-
out. For consistency, also strips spaces from `Quality` strings. If
|
94
|
-
there are spaces that don't match in the quality and sequence in a
|
95
|
-
fastQ file, then things will get messed up in the FastQ file. FastQ
|
96
|
-
shouldn't have spaces though.
|
97
|
-
|
98
|
-
### 1.7 ###
|
99
|
-
|
100
|
-
Add `SeqFile#to_hash`, `FastaFile#to_hash` and `FastqFile#to_hash`.
|
101
|
-
|
102
|
-
### 1.6.2 ###
|
103
|
-
|
104
|
-
`FastaFile::open` now raises a `ParseFasta::DataFormatError` when passed files
|
105
|
-
that don't begin with a `>`.
|
106
|
-
|
107
|
-
### 1.6.1 ###
|
108
|
-
|
109
|
-
Better internal handling of empty sequences -- instead of raising
|
110
|
-
errors, pass empty sequences.
|
111
|
-
|
112
|
-
### 1.6 ###
|
113
|
-
|
114
|
-
Added `SeqFile` class, which accepts either fastA or fastQ files. It
|
115
|
-
uses FastaFile and FastqFile internally. You can use this class if you
|
116
|
-
want your scripts to accept either fastA or fastQ files.
|
117
|
-
|
118
|
-
If you need the description and quality string, you should use
|
119
|
-
FastqFile instead.
|
120
|
-
|
121
|
-
### 1.5 ###
|
122
|
-
|
123
|
-
Now accepts gzipped files. Huzzah!
|
124
|
-
|
125
|
-
### 1.4 ###
|
126
|
-
|
127
|
-
Added methods:
|
128
|
-
|
129
|
-
Sequence.base_counts
|
130
|
-
Sequence.base_frequencies
|
131
|
-
|
132
|
-
### 1.3 ###
|
133
|
-
|
134
|
-
Add additional functionality to `each_record` method.
|
135
|
-
|
136
|
-
#### Info ####
|
137
|
-
|
138
|
-
I often like to use the fasta format for other things like so
|
139
|
-
|
140
|
-
>fruits
|
141
|
-
pineapple
|
142
|
-
pear
|
143
|
-
peach
|
144
|
-
>veggies
|
145
|
-
peppers
|
146
|
-
parsnip
|
147
|
-
peas
|
148
|
-
|
149
|
-
rather than having this in a two column file like this
|
150
|
-
|
151
|
-
fruit,pineapple
|
152
|
-
fruit,pear
|
153
|
-
fruit,peach
|
154
|
-
veggie,peppers
|
155
|
-
veggie,parsnip
|
156
|
-
veggie,peas
|
157
|
-
|
158
|
-
So I added functionality to `each_record` to keep each line a record
|
159
|
-
separate in an array. Here's an example using the above file.
|
160
|
-
|
161
|
-
info = []
|
162
|
-
FastaFile.open(f, 'r').each_record(1) do |header, lines|
|
163
|
-
info << [header, lines]
|
164
|
-
end
|
165
|
-
|
166
|
-
Then info will contain the following arrays
|
167
|
-
|
168
|
-
['fruits', ['pineapple', 'pear', 'peach']],
|
169
|
-
['veggies', ['peppers', 'parsnip', 'peas']]
|
170
|
-
|
171
|
-
### 1.2 ###
|
172
|
-
|
173
|
-
Added `mean_qual` method to the `Quality` class.
|
174
|
-
|
175
|
-
### 1.1.2 ###
|
176
|
-
|
177
|
-
Dropped Ruby requirement to 1.9.3
|
178
|
-
|
179
|
-
(Note, if you want to build the docs with yard and you're using
|
180
|
-
Ruby 1.9.3, you may have to install the redcarpet gem.)
|
181
|
-
|
182
|
-
### 1.1 ###
|
183
|
-
|
184
|
-
Added: Fastq and Quality classes
|
185
|
-
|
186
|
-
### 1.0 ###
|
187
|
-
|
188
|
-
Added: Fasta and Sequence classes
|
189
|
-
|
190
|
-
Removed: File monkey patch
|
191
|
-
|
192
|
-
### 0.0.5 ###
|
193
|
-
|
194
|
-
Last version with File monkey patch.
|