bio-gff3 0.8.6 → 0.8.7
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +13 -12
- data/README.rdoc +72 -32
- data/Rakefile +2 -2
- data/VERSION +1 -1
- data/bio-gff3.gemspec +9 -11
- data/lib/bio/db/gff/gff3parsefile.rb +1 -1
- metadata +78 -118
data/Gemfile.lock
CHANGED
@@ -2,8 +2,8 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
bio (1.4.1)
|
5
|
-
bio-logger (0.
|
6
|
-
log4r (
|
5
|
+
bio-logger (1.0.0)
|
6
|
+
log4r (>= 1.1.9)
|
7
7
|
diff-lcs (1.1.2)
|
8
8
|
git (1.2.5)
|
9
9
|
jeweler (1.5.2)
|
@@ -11,16 +11,16 @@ GEM
|
|
11
11
|
git (>= 1.2.5)
|
12
12
|
rake
|
13
13
|
log4r (1.1.9)
|
14
|
-
rake (0.
|
14
|
+
rake (0.9.2)
|
15
15
|
rcov (0.9.9)
|
16
|
-
rspec (2.
|
17
|
-
rspec-core (~> 2.
|
18
|
-
rspec-expectations (~> 2.
|
19
|
-
rspec-mocks (~> 2.
|
20
|
-
rspec-core (2.
|
21
|
-
rspec-expectations (2.
|
16
|
+
rspec (2.6.0)
|
17
|
+
rspec-core (~> 2.6.0)
|
18
|
+
rspec-expectations (~> 2.6.0)
|
19
|
+
rspec-mocks (~> 2.6.0)
|
20
|
+
rspec-core (2.6.4)
|
21
|
+
rspec-expectations (2.6.0)
|
22
22
|
diff-lcs (~> 1.1.2)
|
23
|
-
rspec-mocks (2.
|
23
|
+
rspec-mocks (2.6.0)
|
24
24
|
shoulda (2.11.3)
|
25
25
|
|
26
26
|
PLATFORMS
|
@@ -28,9 +28,10 @@ PLATFORMS
|
|
28
28
|
|
29
29
|
DEPENDENCIES
|
30
30
|
bio (>= 1.3.1)
|
31
|
-
bio-logger
|
31
|
+
bio-logger (> 0.8.0)
|
32
32
|
bundler (~> 1.0.0)
|
33
33
|
jeweler (~> 1.5.2)
|
34
|
+
log4r (> 1.1.6)
|
34
35
|
rcov
|
35
|
-
rspec (>= 2.
|
36
|
+
rspec (>= 2.3.0)
|
36
37
|
shoulda
|
data/README.rdoc
CHANGED
@@ -5,15 +5,17 @@ including assembled mRNA, protein and CDS sequences.
|
|
5
5
|
|
6
6
|
Features:
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
8
|
+
* Take GFF3 (genome browser) information of any type, and assemble sequences, e.g. mRNA and CDS
|
9
|
+
* Options for low memory use and caching of records
|
10
|
+
* Support for external FASTA input files
|
11
|
+
* Use of multi-cores (NYI)
|
12
12
|
|
13
13
|
Currently the output is a FASTA file.
|
14
14
|
|
15
|
-
You can use this plugin in two ways. First as a standalone program,
|
16
|
-
plugin library to BioRuby.
|
15
|
+
You can use this plugin in two ways. First as a standalone program,
|
16
|
+
second as a plugin library to BioRuby.
|
17
|
+
|
18
|
+
Note: a really fast GFF3 parser is in the works at https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
|
17
19
|
|
18
20
|
== Install and run gff3-fetch
|
19
21
|
|
@@ -61,32 +63,35 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
|
|
61
63
|
|
62
64
|
Fetch and assemble GFF3 types (e.g. ORF, mRNA, CDS) + print in FASTA format.
|
63
65
|
|
64
|
-
gff3-fetch [
|
65
|
-
|
66
|
-
Where (NYI == Not Yet Implemented):
|
66
|
+
gff3-fetch [options] type [filename.fa] filename.gff3
|
67
67
|
|
68
|
-
--translate
|
69
|
-
--validate
|
70
|
-
--fix
|
71
|
-
--fix-wormbase
|
72
|
-
--no-assemble
|
73
|
-
--
|
68
|
+
--translate : output as amino acid sequence
|
69
|
+
--validate : validate GFF3 file by translating
|
70
|
+
--fix : check 3-frame translation and fix, if possible
|
71
|
+
--fix-wormbase : fix 3-frame translation on ORFs named 'gene1'
|
72
|
+
--no-assemble : output each record as a sequence
|
73
|
+
--phase : output records using phase (useful w. no-assemble CDS to AA)
|
74
74
|
|
75
75
|
type is any valid type in the GFF3 definition. For example:
|
76
76
|
|
77
|
-
mRNA
|
78
|
-
CDS
|
79
|
-
exon
|
80
|
-
gene|ORF
|
81
|
-
other
|
77
|
+
mRNA : assemble mRNA
|
78
|
+
CDS : assemble CDS
|
79
|
+
exon : list all exons
|
80
|
+
gene|ORF : list gene ORFs
|
81
|
+
other : use any type from GFF3 definition, e.g. 'Terminate'
|
82
82
|
|
83
83
|
and the following performance options:
|
84
84
|
|
85
|
-
|
86
|
-
--
|
87
|
-
--
|
88
|
-
--
|
89
|
-
--
|
85
|
+
--parser bioruby : use BioRuby GFF3 parser (slow)
|
86
|
+
--parser line : use GFF3 line parser (faster, default)
|
87
|
+
--block : parse GFF3 by block (optimistic) -- NYI
|
88
|
+
--cache full : load all in RAM (fast, default)
|
89
|
+
--cache none : do not load anything in memory (slow)
|
90
|
+
--cache lru : use least recently used cache (limit RAM use, fast) -- NYI
|
91
|
+
--max-cpus num : use num threads -- NYI
|
92
|
+
--emboss : use EMBOSS translation (fast) -- NYI
|
93
|
+
|
94
|
+
Where (NYI == Not Yet Implemented):
|
90
95
|
|
91
96
|
Multiple GFF3 files can be used. With external FASTA files, always the last
|
92
97
|
one before the GFF3 filename is matched.
|
@@ -103,7 +108,7 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
|
|
103
108
|
|
104
109
|
Find CDS records from external FASTA file, adding phase and translate to protein sequence
|
105
110
|
|
106
|
-
gff3-fetch --no-assemble --
|
111
|
+
gff3-fetch --no-assemble --phase --translate CDS test/data/gff/MhA1_Contig1133.fa test/data/gff/MhA1_Contig1133.gff3
|
107
112
|
|
108
113
|
Find mRNA from external FASTA file, without loading everything in RAM
|
109
114
|
|
@@ -118,15 +123,50 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
|
|
118
123
|
|
119
124
|
gff3-fetch terminal chromosome1.fa geneid.gff3
|
120
125
|
|
126
|
+
Fine tuning output - show errors only
|
127
|
+
|
128
|
+
gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR
|
129
|
+
|
130
|
+
Fine tuning outpt - show messages matching regex
|
131
|
+
|
132
|
+
gff3-fetch mRNA test/data/gff/test.gff3 --trace '=msg =~ /component/'
|
133
|
+
|
134
|
+
Fine tuning output - write log messages to file.log
|
135
|
+
|
136
|
+
gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR --logger file.log
|
137
|
+
|
138
|
+
For more information on output, see the bioruby-logger plugin.
|
139
|
+
|
121
140
|
== Performance
|
122
141
|
|
123
|
-
time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 > test.fa
|
142
|
+
time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 2> /dev/null > test.fa
|
143
|
+
|
144
|
+
Digesting parser:
|
145
|
+
|
146
|
+
Cache real user sys version RAM
|
147
|
+
------------------------------------------------------------
|
148
|
+
full,bioruby 12m41 12m28 0m09 (0.8.0)
|
149
|
+
full,line 12m13 12m06 0m07 (0.8.5)
|
150
|
+
full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M
|
151
|
+
|
152
|
+
none,bioruby 504m 477m 26m50 (0.8.0)
|
153
|
+
none,line 297m 267m 28m36 (0.8.5)
|
154
|
+
none,line,lazy 132m 106m 26m01 (0.8.6) 650M
|
155
|
+
|
156
|
+
lru,bioruby 533m 510m 22m47 (0.8.5)
|
157
|
+
lru,line 353m 326m 26m44 (0.8.5) 1K
|
158
|
+
lru,line 305m 281m 22m30 (0.8.5) 10K
|
159
|
+
lru,line,lazy 182m 161m 21m10 (0.8.6) 10K
|
160
|
+
lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M
|
161
|
+
------------------------------------------------------------
|
162
|
+
|
163
|
+
Block parser:
|
124
164
|
|
125
|
-
Cache
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
165
|
+
Cache real user sys gff3 version
|
166
|
+
------------------------------------------------------------
|
167
|
+
in preparation see also biolib/HPC:
|
168
|
+
https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
|
169
|
+
------------------------------------------------------------
|
130
170
|
|
131
171
|
where
|
132
172
|
|
data/Rakefile
CHANGED
@@ -13,9 +13,9 @@ require 'jeweler'
|
|
13
13
|
Jeweler::Tasks.new do |gem|
|
14
14
|
# gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
|
15
15
|
gem.name = "bio-gff3"
|
16
|
-
gem.homepage = "
|
16
|
+
gem.homepage = "https://github.com/pjotrp/bioruby-gff3-plugin"
|
17
17
|
gem.license = "MIT"
|
18
|
-
gem.summary = %Q{
|
18
|
+
gem.summary = %Q{GFF3 parser for big data}
|
19
19
|
gem.description = %Q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
|
20
20
|
Options for low memory use and caching of records.
|
21
21
|
Support for external FASTA files.
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.8.
|
1
|
+
0.8.7
|
data/bio-gff3.gemspec
CHANGED
@@ -5,18 +5,17 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{bio-gff3}
|
8
|
-
s.version = "0.8.
|
8
|
+
s.version = "0.8.7"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
-
s.authors = [
|
12
|
-
s.date = %q{2011-
|
13
|
-
s.default_executable = %q{gff3-fetch}
|
11
|
+
s.authors = [%q{Pjotr Prins}]
|
12
|
+
s.date = %q{2011-07-31}
|
14
13
|
s.description = %q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
|
15
14
|
Options for low memory use and caching of records.
|
16
15
|
Support for external FASTA files.
|
17
16
|
}
|
18
17
|
s.email = %q{pjotr.prins@thebird.nl}
|
19
|
-
s.executables = [
|
18
|
+
s.executables = [%q{gff3-fetch}]
|
20
19
|
s.extra_rdoc_files = [
|
21
20
|
"LICENSE.txt",
|
22
21
|
"README.rdoc"
|
@@ -75,11 +74,11 @@ Support for external FASTA files.
|
|
75
74
|
"test/regressiontest.rb",
|
76
75
|
"test/test_bio-gff3.rb"
|
77
76
|
]
|
78
|
-
s.homepage = %q{
|
79
|
-
s.licenses = [
|
80
|
-
s.require_paths = [
|
81
|
-
s.rubygems_version = %q{1.
|
82
|
-
s.summary = %q{
|
77
|
+
s.homepage = %q{https://github.com/pjotrp/bioruby-gff3-plugin}
|
78
|
+
s.licenses = [%q{MIT}]
|
79
|
+
s.require_paths = [%q{lib}]
|
80
|
+
s.rubygems_version = %q{1.8.6}
|
81
|
+
s.summary = %q{GFF3 parser for big data}
|
83
82
|
s.test_files = [
|
84
83
|
"spec/gff3_assemble2_spec.rb",
|
85
84
|
"spec/gff3_assemble3_spec.rb",
|
@@ -93,7 +92,6 @@ Support for external FASTA files.
|
|
93
92
|
]
|
94
93
|
|
95
94
|
if s.respond_to? :specification_version then
|
96
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
97
95
|
s.specification_version = 3
|
98
96
|
|
99
97
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
metadata
CHANGED
@@ -1,152 +1,119 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: bio-gff3
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
6
|
-
- 0
|
7
|
-
- 8
|
8
|
-
- 6
|
9
|
-
version: 0.8.6
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.8.7
|
5
|
+
prerelease:
|
10
6
|
platform: ruby
|
11
|
-
authors:
|
7
|
+
authors:
|
12
8
|
- Pjotr Prins
|
13
9
|
autorequire:
|
14
10
|
bindir: bin
|
15
11
|
cert_chain: []
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
dependencies:
|
20
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2011-07-31 00:00:00.000000000Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
21
15
|
name: bio
|
22
|
-
requirement: &
|
16
|
+
requirement: &76668620 !ruby/object:Gem::Requirement
|
23
17
|
none: false
|
24
|
-
requirements:
|
25
|
-
- -
|
26
|
-
- !ruby/object:Gem::Version
|
27
|
-
segments:
|
28
|
-
- 1
|
29
|
-
- 3
|
30
|
-
- 1
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
31
21
|
version: 1.3.1
|
32
22
|
type: :runtime
|
33
23
|
prerelease: false
|
34
|
-
version_requirements: *
|
35
|
-
- !ruby/object:Gem::Dependency
|
24
|
+
version_requirements: *76668620
|
25
|
+
- !ruby/object:Gem::Dependency
|
36
26
|
name: log4r
|
37
|
-
requirement: &
|
27
|
+
requirement: &76668160 !ruby/object:Gem::Requirement
|
38
28
|
none: false
|
39
|
-
requirements:
|
40
|
-
- -
|
41
|
-
- !ruby/object:Gem::Version
|
42
|
-
segments:
|
43
|
-
- 1
|
44
|
-
- 1
|
45
|
-
- 6
|
29
|
+
requirements:
|
30
|
+
- - ! '>'
|
31
|
+
- !ruby/object:Gem::Version
|
46
32
|
version: 1.1.6
|
47
33
|
type: :runtime
|
48
34
|
prerelease: false
|
49
|
-
version_requirements: *
|
50
|
-
- !ruby/object:Gem::Dependency
|
35
|
+
version_requirements: *76668160
|
36
|
+
- !ruby/object:Gem::Dependency
|
51
37
|
name: bio-logger
|
52
|
-
requirement: &
|
38
|
+
requirement: &76667770 !ruby/object:Gem::Requirement
|
53
39
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
segments:
|
58
|
-
- 0
|
59
|
-
- 8
|
60
|
-
- 0
|
40
|
+
requirements:
|
41
|
+
- - ! '>'
|
42
|
+
- !ruby/object:Gem::Version
|
61
43
|
version: 0.8.0
|
62
44
|
type: :runtime
|
63
45
|
prerelease: false
|
64
|
-
version_requirements: *
|
65
|
-
- !ruby/object:Gem::Dependency
|
46
|
+
version_requirements: *76667770
|
47
|
+
- !ruby/object:Gem::Dependency
|
66
48
|
name: shoulda
|
67
|
-
requirement: &
|
49
|
+
requirement: &76667270 !ruby/object:Gem::Requirement
|
68
50
|
none: false
|
69
|
-
requirements:
|
70
|
-
- -
|
71
|
-
- !ruby/object:Gem::Version
|
72
|
-
|
73
|
-
- 0
|
74
|
-
version: "0"
|
51
|
+
requirements:
|
52
|
+
- - ! '>='
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
75
55
|
type: :development
|
76
56
|
prerelease: false
|
77
|
-
version_requirements: *
|
78
|
-
- !ruby/object:Gem::Dependency
|
57
|
+
version_requirements: *76667270
|
58
|
+
- !ruby/object:Gem::Dependency
|
79
59
|
name: bundler
|
80
|
-
requirement: &
|
60
|
+
requirement: &76666730 !ruby/object:Gem::Requirement
|
81
61
|
none: false
|
82
|
-
requirements:
|
62
|
+
requirements:
|
83
63
|
- - ~>
|
84
|
-
- !ruby/object:Gem::Version
|
85
|
-
segments:
|
86
|
-
- 1
|
87
|
-
- 0
|
88
|
-
- 0
|
64
|
+
- !ruby/object:Gem::Version
|
89
65
|
version: 1.0.0
|
90
66
|
type: :development
|
91
67
|
prerelease: false
|
92
|
-
version_requirements: *
|
93
|
-
- !ruby/object:Gem::Dependency
|
68
|
+
version_requirements: *76666730
|
69
|
+
- !ruby/object:Gem::Dependency
|
94
70
|
name: jeweler
|
95
|
-
requirement: &
|
71
|
+
requirement: &76662470 !ruby/object:Gem::Requirement
|
96
72
|
none: false
|
97
|
-
requirements:
|
73
|
+
requirements:
|
98
74
|
- - ~>
|
99
|
-
- !ruby/object:Gem::Version
|
100
|
-
segments:
|
101
|
-
- 1
|
102
|
-
- 5
|
103
|
-
- 2
|
75
|
+
- !ruby/object:Gem::Version
|
104
76
|
version: 1.5.2
|
105
77
|
type: :development
|
106
78
|
prerelease: false
|
107
|
-
version_requirements: *
|
108
|
-
- !ruby/object:Gem::Dependency
|
79
|
+
version_requirements: *76662470
|
80
|
+
- !ruby/object:Gem::Dependency
|
109
81
|
name: rcov
|
110
|
-
requirement: &
|
82
|
+
requirement: &76662040 !ruby/object:Gem::Requirement
|
111
83
|
none: false
|
112
|
-
requirements:
|
113
|
-
- -
|
114
|
-
- !ruby/object:Gem::Version
|
115
|
-
|
116
|
-
- 0
|
117
|
-
version: "0"
|
84
|
+
requirements:
|
85
|
+
- - ! '>='
|
86
|
+
- !ruby/object:Gem::Version
|
87
|
+
version: '0'
|
118
88
|
type: :development
|
119
89
|
prerelease: false
|
120
|
-
version_requirements: *
|
121
|
-
- !ruby/object:Gem::Dependency
|
90
|
+
version_requirements: *76662040
|
91
|
+
- !ruby/object:Gem::Dependency
|
122
92
|
name: rspec
|
123
|
-
requirement: &
|
93
|
+
requirement: &76661550 !ruby/object:Gem::Requirement
|
124
94
|
none: false
|
125
|
-
requirements:
|
126
|
-
- -
|
127
|
-
- !ruby/object:Gem::Version
|
128
|
-
segments:
|
129
|
-
- 2
|
130
|
-
- 3
|
131
|
-
- 0
|
95
|
+
requirements:
|
96
|
+
- - ! '>='
|
97
|
+
- !ruby/object:Gem::Version
|
132
98
|
version: 2.3.0
|
133
99
|
type: :development
|
134
100
|
prerelease: false
|
135
|
-
version_requirements: *
|
136
|
-
description:
|
137
|
-
|
101
|
+
version_requirements: *76661550
|
102
|
+
description: ! 'GFF3 (genome browser) information and digest mRNA and CDS sequences.
|
103
|
+
|
138
104
|
Options for low memory use and caching of records.
|
105
|
+
|
139
106
|
Support for external FASTA files.
|
140
107
|
|
108
|
+
'
|
141
109
|
email: pjotr.prins@thebird.nl
|
142
|
-
executables:
|
110
|
+
executables:
|
143
111
|
- gff3-fetch
|
144
112
|
extensions: []
|
145
|
-
|
146
|
-
extra_rdoc_files:
|
113
|
+
extra_rdoc_files:
|
147
114
|
- LICENSE.txt
|
148
115
|
- README.rdoc
|
149
|
-
files:
|
116
|
+
files:
|
150
117
|
- Gemfile
|
151
118
|
- Gemfile.lock
|
152
119
|
- LICENSE.txt
|
@@ -199,39 +166,32 @@ files:
|
|
199
166
|
- test/helper.rb
|
200
167
|
- test/regressiontest.rb
|
201
168
|
- test/test_bio-gff3.rb
|
202
|
-
|
203
|
-
|
204
|
-
licenses:
|
169
|
+
homepage: https://github.com/pjotrp/bioruby-gff3-plugin
|
170
|
+
licenses:
|
205
171
|
- MIT
|
206
172
|
post_install_message:
|
207
173
|
rdoc_options: []
|
208
|
-
|
209
|
-
require_paths:
|
174
|
+
require_paths:
|
210
175
|
- lib
|
211
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
176
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
212
177
|
none: false
|
213
|
-
requirements:
|
214
|
-
- -
|
215
|
-
- !ruby/object:Gem::Version
|
216
|
-
|
217
|
-
|
218
|
-
version: "0"
|
219
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
178
|
+
requirements:
|
179
|
+
- - ! '>='
|
180
|
+
- !ruby/object:Gem::Version
|
181
|
+
version: '0'
|
182
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
220
183
|
none: false
|
221
|
-
requirements:
|
222
|
-
- -
|
223
|
-
- !ruby/object:Gem::Version
|
224
|
-
|
225
|
-
- 0
|
226
|
-
version: "0"
|
184
|
+
requirements:
|
185
|
+
- - ! '>='
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0'
|
227
188
|
requirements: []
|
228
|
-
|
229
189
|
rubyforge_project:
|
230
|
-
rubygems_version: 1.
|
190
|
+
rubygems_version: 1.8.6
|
231
191
|
signing_key:
|
232
192
|
specification_version: 3
|
233
|
-
summary:
|
234
|
-
test_files:
|
193
|
+
summary: GFF3 parser for big data
|
194
|
+
test_files:
|
235
195
|
- spec/gff3_assemble2_spec.rb
|
236
196
|
- spec/gff3_assemble3_spec.rb
|
237
197
|
- spec/gff3_assemble_spec.rb
|