bio-gff3 0.8.6 → 0.8.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,8 +2,8 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  bio (1.4.1)
5
- bio-logger (0.6.1)
6
- log4r (> 1.1.6)
5
+ bio-logger (1.0.0)
6
+ log4r (>= 1.1.9)
7
7
  diff-lcs (1.1.2)
8
8
  git (1.2.5)
9
9
  jeweler (1.5.2)
@@ -11,16 +11,16 @@ GEM
11
11
  git (>= 1.2.5)
12
12
  rake
13
13
  log4r (1.1.9)
14
- rake (0.8.7)
14
+ rake (0.9.2)
15
15
  rcov (0.9.9)
16
- rspec (2.3.0)
17
- rspec-core (~> 2.3.0)
18
- rspec-expectations (~> 2.3.0)
19
- rspec-mocks (~> 2.3.0)
20
- rspec-core (2.3.1)
21
- rspec-expectations (2.3.0)
16
+ rspec (2.6.0)
17
+ rspec-core (~> 2.6.0)
18
+ rspec-expectations (~> 2.6.0)
19
+ rspec-mocks (~> 2.6.0)
20
+ rspec-core (2.6.4)
21
+ rspec-expectations (2.6.0)
22
22
  diff-lcs (~> 1.1.2)
23
- rspec-mocks (2.3.0)
23
+ rspec-mocks (2.6.0)
24
24
  shoulda (2.11.3)
25
25
 
26
26
  PLATFORMS
@@ -28,9 +28,10 @@ PLATFORMS
28
28
 
29
29
  DEPENDENCIES
30
30
  bio (>= 1.3.1)
31
- bio-logger
31
+ bio-logger (> 0.8.0)
32
32
  bundler (~> 1.0.0)
33
33
  jeweler (~> 1.5.2)
34
+ log4r (> 1.1.6)
34
35
  rcov
35
- rspec (>= 2.0.0)
36
+ rspec (>= 2.3.0)
36
37
  shoulda
@@ -5,15 +5,17 @@ including assembled mRNA, protein and CDS sequences.
5
5
 
6
6
  Features:
7
7
 
8
- # Take GFF3 (genome browser) information of any type, and assemble sequences, e.g. mRNA and CDS
9
- # Options for low memory use and caching of records
10
- # Support for external FASTA input files
11
- # Use of multi-cores (NYI)
8
+ * Take GFF3 (genome browser) information of any type, and assemble sequences, e.g. mRNA and CDS
9
+ * Options for low memory use and caching of records
10
+ * Support for external FASTA input files
11
+ * Use of multi-cores (NYI)
12
12
 
13
13
  Currently the output is a FASTA file.
14
14
 
15
- You can use this plugin in two ways. First as a standalone program, next as a
16
- plugin library to BioRuby.
15
+ You can use this plugin in two ways. First as a standalone program,
16
+ second as a plugin library to BioRuby.
17
+
18
+ Note: a really fast GFF3 parser is in the works at https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
17
19
 
18
20
  == Install and run gff3-fetch
19
21
 
@@ -61,32 +63,35 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
61
63
 
62
64
  Fetch and assemble GFF3 types (e.g. ORF, mRNA, CDS) + print in FASTA format.
63
65
 
64
- gff3-fetch [--low-mem] [--validate] type [filename.fa] filename.gff3
65
-
66
- Where (NYI == Not Yet Implemented):
66
+ gff3-fetch [options] type [filename.fa] filename.gff3
67
67
 
68
- --translate : output as amino acid sequence
69
- --validate : validate GFF3 file by translating
70
- --fix : check 3-frame translation and fix, if possible
71
- --fix-wormbase : fix 3-frame translation on ORFs named 'gene1'
72
- --no-assemble : output each record as a sequence -- NYI
73
- --add-phase : output records using phase (useful w. no-assemble CDS to AA) --NYI
68
+ --translate : output as amino acid sequence
69
+ --validate : validate GFF3 file by translating
70
+ --fix : check 3-frame translation and fix, if possible
71
+ --fix-wormbase : fix 3-frame translation on ORFs named 'gene1'
72
+ --no-assemble : output each record as a sequence
73
+ --phase : output records using phase (useful w. no-assemble CDS to AA)
74
74
 
75
75
  type is any valid type in the GFF3 definition. For example:
76
76
 
77
- mRNA : assemble mRNA
78
- CDS : assemble CDS
79
- exon : list all exons
80
- gene|ORF : list gene ORFs
81
- other : use any type from GFF3 definition, e.g. 'Terminate' -- NYI
77
+ mRNA : assemble mRNA
78
+ CDS : assemble CDS
79
+ exon : list all exons
80
+ gene|ORF : list gene ORFs
81
+ other : use any type from GFF3 definition, e.g. 'Terminate'
82
82
 
83
83
  and the following performance options:
84
84
 
85
- --cache full : load all in RAM (fast)
86
- --cache none : do not load anything in memory (slow)
87
- --low-mem : use LRU cache (limit RAM use, fast) -- NYI
88
- --max-cpus num : use num threads -- NYI
89
- --emboss : use EMBOSS translation (fast) -- NYI
85
+ --parser bioruby : use BioRuby GFF3 parser (slow)
86
+ --parser line : use GFF3 line parser (faster, default)
87
+ --block : parse GFF3 by block (optimistic) -- NYI
88
+ --cache full : load all in RAM (fast, default)
89
+ --cache none : do not load anything in memory (slow)
90
+ --cache lru : use least recently used cache (limit RAM use, fast) -- NYI
91
+ --max-cpus num : use num threads -- NYI
92
+ --emboss : use EMBOSS translation (fast) -- NYI
93
+
94
+ Where (NYI == Not Yet Implemented):
90
95
 
91
96
  Multiple GFF3 files can be used. With external FASTA files, always the last
92
97
  one before the GFF3 filename is matched.
@@ -103,7 +108,7 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
103
108
 
104
109
  Find CDS records from external FASTA file, adding phase and translate to protein sequence
105
110
 
106
- gff3-fetch --no-assemble --add-phase --translate CDS test/data/gff/MhA1_Contig1133.fa test/data/gff/MhA1_Contig1133.gff3
111
+ gff3-fetch --no-assemble --phase --translate CDS test/data/gff/MhA1_Contig1133.fa test/data/gff/MhA1_Contig1133.gff3
107
112
 
108
113
  Find mRNA from external FASTA file, without loading everything in RAM
109
114
 
@@ -118,15 +123,50 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
118
123
 
119
124
  gff3-fetch terminal chromosome1.fa geneid.gff3
120
125
 
126
+ Fine tuning output - show errors only
127
+
128
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR
129
+
130
+ Fine tuning outpt - show messages matching regex
131
+
132
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace '=msg =~ /component/'
133
+
134
+ Fine tuning output - write log messages to file.log
135
+
136
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR --logger file.log
137
+
138
+ For more information on output, see the bioruby-logger plugin.
139
+
121
140
  == Performance
122
141
 
123
- time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 > test.fa
142
+ time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 2> /dev/null > test.fa
143
+
144
+ Digesting parser:
145
+
146
+ Cache real user sys version RAM
147
+ ------------------------------------------------------------
148
+ full,bioruby 12m41 12m28 0m09 (0.8.0)
149
+ full,line 12m13 12m06 0m07 (0.8.5)
150
+ full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M
151
+
152
+ none,bioruby 504m 477m 26m50 (0.8.0)
153
+ none,line 297m 267m 28m36 (0.8.5)
154
+ none,line,lazy 132m 106m 26m01 (0.8.6) 650M
155
+
156
+ lru,bioruby 533m 510m 22m47 (0.8.5)
157
+ lru,line 353m 326m 26m44 (0.8.5) 1K
158
+ lru,line 305m 281m 22m30 (0.8.5) 10K
159
+ lru,line,lazy 182m 161m 21m10 (0.8.6) 10K
160
+ lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M
161
+ ------------------------------------------------------------
162
+
163
+ Block parser:
124
164
 
125
- Cache real user sys
126
- ----------------------------------------------------
127
- full 12m41s 12m28s 0m09s (0.8.0 Jan. 2011)
128
- none 504m39s 477m49s 26m50s (0.8.0 Jan. 2011)
129
- ----------------------------------------------------
165
+ Cache real user sys gff3 version
166
+ ------------------------------------------------------------
167
+ in preparation see also biolib/HPC:
168
+ https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
169
+ ------------------------------------------------------------
130
170
 
131
171
  where
132
172
 
data/Rakefile CHANGED
@@ -13,9 +13,9 @@ require 'jeweler'
13
13
  Jeweler::Tasks.new do |gem|
14
14
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
15
  gem.name = "bio-gff3"
16
- gem.homepage = "http://github.com/pjotrp/bioruby-gff3"
16
+ gem.homepage = "https://github.com/pjotrp/bioruby-gff3-plugin"
17
17
  gem.license = "MIT"
18
- gem.summary = %Q{BioRuby GFF3 plugin for big data}
18
+ gem.summary = %Q{GFF3 parser for big data}
19
19
  gem.description = %Q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
20
20
  Options for low memory use and caching of records.
21
21
  Support for external FASTA files.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.6
1
+ 0.8.7
@@ -5,18 +5,17 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bio-gff3}
8
- s.version = "0.8.6"
8
+ s.version = "0.8.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Pjotr Prins"]
12
- s.date = %q{2011-01-17}
13
- s.default_executable = %q{gff3-fetch}
11
+ s.authors = [%q{Pjotr Prins}]
12
+ s.date = %q{2011-07-31}
14
13
  s.description = %q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
15
14
  Options for low memory use and caching of records.
16
15
  Support for external FASTA files.
17
16
  }
18
17
  s.email = %q{pjotr.prins@thebird.nl}
19
- s.executables = ["gff3-fetch"]
18
+ s.executables = [%q{gff3-fetch}]
20
19
  s.extra_rdoc_files = [
21
20
  "LICENSE.txt",
22
21
  "README.rdoc"
@@ -75,11 +74,11 @@ Support for external FASTA files.
75
74
  "test/regressiontest.rb",
76
75
  "test/test_bio-gff3.rb"
77
76
  ]
78
- s.homepage = %q{http://github.com/pjotrp/bioruby-gff3}
79
- s.licenses = ["MIT"]
80
- s.require_paths = ["lib"]
81
- s.rubygems_version = %q{1.3.7}
82
- s.summary = %q{BioRuby GFF3 plugin for big data}
77
+ s.homepage = %q{https://github.com/pjotrp/bioruby-gff3-plugin}
78
+ s.licenses = [%q{MIT}]
79
+ s.require_paths = [%q{lib}]
80
+ s.rubygems_version = %q{1.8.6}
81
+ s.summary = %q{GFF3 parser for big data}
83
82
  s.test_files = [
84
83
  "spec/gff3_assemble2_spec.rb",
85
84
  "spec/gff3_assemble3_spec.rb",
@@ -93,7 +92,6 @@ Support for external FASTA files.
93
92
  ]
94
93
 
95
94
  if s.respond_to? :specification_version then
96
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
97
95
  s.specification_version = 3
98
96
 
99
97
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -18,7 +18,7 @@ module Bio
18
18
  if s == '##FASTA'
19
19
  break
20
20
  end
21
- next if s.length == 0 or s[0] == '#'
21
+ next if s.length == 0 or s =~ /^#/
22
22
  @records.push FastLineRecord.new(parse_line_fast(s))
23
23
  end
24
24
  fasta = Bio::GFF::FastaReader.new(fh)
metadata CHANGED
@@ -1,152 +1,119 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bio-gff3
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 8
8
- - 6
9
- version: 0.8.6
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.7
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Pjotr Prins
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-01-17 00:00:00 +01:00
18
- default_executable: gff3-fetch
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-07-31 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: bio
22
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &76668620 !ruby/object:Gem::Requirement
23
17
  none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 1
29
- - 3
30
- - 1
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
31
21
  version: 1.3.1
32
22
  type: :runtime
33
23
  prerelease: false
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
24
+ version_requirements: *76668620
25
+ - !ruby/object:Gem::Dependency
36
26
  name: log4r
37
- requirement: &id002 !ruby/object:Gem::Requirement
27
+ requirement: &76668160 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">"
41
- - !ruby/object:Gem::Version
42
- segments:
43
- - 1
44
- - 1
45
- - 6
29
+ requirements:
30
+ - - ! '>'
31
+ - !ruby/object:Gem::Version
46
32
  version: 1.1.6
47
33
  type: :runtime
48
34
  prerelease: false
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
35
+ version_requirements: *76668160
36
+ - !ruby/object:Gem::Dependency
51
37
  name: bio-logger
52
- requirement: &id003 !ruby/object:Gem::Requirement
38
+ requirement: &76667770 !ruby/object:Gem::Requirement
53
39
  none: false
54
- requirements:
55
- - - ">"
56
- - !ruby/object:Gem::Version
57
- segments:
58
- - 0
59
- - 8
60
- - 0
40
+ requirements:
41
+ - - ! '>'
42
+ - !ruby/object:Gem::Version
61
43
  version: 0.8.0
62
44
  type: :runtime
63
45
  prerelease: false
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
46
+ version_requirements: *76667770
47
+ - !ruby/object:Gem::Dependency
66
48
  name: shoulda
67
- requirement: &id004 !ruby/object:Gem::Requirement
49
+ requirement: &76667270 !ruby/object:Gem::Requirement
68
50
  none: false
69
- requirements:
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- segments:
73
- - 0
74
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
75
55
  type: :development
76
56
  prerelease: false
77
- version_requirements: *id004
78
- - !ruby/object:Gem::Dependency
57
+ version_requirements: *76667270
58
+ - !ruby/object:Gem::Dependency
79
59
  name: bundler
80
- requirement: &id005 !ruby/object:Gem::Requirement
60
+ requirement: &76666730 !ruby/object:Gem::Requirement
81
61
  none: false
82
- requirements:
62
+ requirements:
83
63
  - - ~>
84
- - !ruby/object:Gem::Version
85
- segments:
86
- - 1
87
- - 0
88
- - 0
64
+ - !ruby/object:Gem::Version
89
65
  version: 1.0.0
90
66
  type: :development
91
67
  prerelease: false
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
68
+ version_requirements: *76666730
69
+ - !ruby/object:Gem::Dependency
94
70
  name: jeweler
95
- requirement: &id006 !ruby/object:Gem::Requirement
71
+ requirement: &76662470 !ruby/object:Gem::Requirement
96
72
  none: false
97
- requirements:
73
+ requirements:
98
74
  - - ~>
99
- - !ruby/object:Gem::Version
100
- segments:
101
- - 1
102
- - 5
103
- - 2
75
+ - !ruby/object:Gem::Version
104
76
  version: 1.5.2
105
77
  type: :development
106
78
  prerelease: false
107
- version_requirements: *id006
108
- - !ruby/object:Gem::Dependency
79
+ version_requirements: *76662470
80
+ - !ruby/object:Gem::Dependency
109
81
  name: rcov
110
- requirement: &id007 !ruby/object:Gem::Requirement
82
+ requirement: &76662040 !ruby/object:Gem::Requirement
111
83
  none: false
112
- requirements:
113
- - - ">="
114
- - !ruby/object:Gem::Version
115
- segments:
116
- - 0
117
- version: "0"
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
118
88
  type: :development
119
89
  prerelease: false
120
- version_requirements: *id007
121
- - !ruby/object:Gem::Dependency
90
+ version_requirements: *76662040
91
+ - !ruby/object:Gem::Dependency
122
92
  name: rspec
123
- requirement: &id008 !ruby/object:Gem::Requirement
93
+ requirement: &76661550 !ruby/object:Gem::Requirement
124
94
  none: false
125
- requirements:
126
- - - ">="
127
- - !ruby/object:Gem::Version
128
- segments:
129
- - 2
130
- - 3
131
- - 0
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
132
98
  version: 2.3.0
133
99
  type: :development
134
100
  prerelease: false
135
- version_requirements: *id008
136
- description: |
137
- GFF3 (genome browser) information and digest mRNA and CDS sequences.
101
+ version_requirements: *76661550
102
+ description: ! 'GFF3 (genome browser) information and digest mRNA and CDS sequences.
103
+
138
104
  Options for low memory use and caching of records.
105
+
139
106
  Support for external FASTA files.
140
107
 
108
+ '
141
109
  email: pjotr.prins@thebird.nl
142
- executables:
110
+ executables:
143
111
  - gff3-fetch
144
112
  extensions: []
145
-
146
- extra_rdoc_files:
113
+ extra_rdoc_files:
147
114
  - LICENSE.txt
148
115
  - README.rdoc
149
- files:
116
+ files:
150
117
  - Gemfile
151
118
  - Gemfile.lock
152
119
  - LICENSE.txt
@@ -199,39 +166,32 @@ files:
199
166
  - test/helper.rb
200
167
  - test/regressiontest.rb
201
168
  - test/test_bio-gff3.rb
202
- has_rdoc: true
203
- homepage: http://github.com/pjotrp/bioruby-gff3
204
- licenses:
169
+ homepage: https://github.com/pjotrp/bioruby-gff3-plugin
170
+ licenses:
205
171
  - MIT
206
172
  post_install_message:
207
173
  rdoc_options: []
208
-
209
- require_paths:
174
+ require_paths:
210
175
  - lib
211
- required_ruby_version: !ruby/object:Gem::Requirement
176
+ required_ruby_version: !ruby/object:Gem::Requirement
212
177
  none: false
213
- requirements:
214
- - - ">="
215
- - !ruby/object:Gem::Version
216
- segments:
217
- - 0
218
- version: "0"
219
- required_rubygems_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
220
183
  none: false
221
- requirements:
222
- - - ">="
223
- - !ruby/object:Gem::Version
224
- segments:
225
- - 0
226
- version: "0"
184
+ requirements:
185
+ - - ! '>='
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
227
188
  requirements: []
228
-
229
189
  rubyforge_project:
230
- rubygems_version: 1.3.7
190
+ rubygems_version: 1.8.6
231
191
  signing_key:
232
192
  specification_version: 3
233
- summary: BioRuby GFF3 plugin for big data
234
- test_files:
193
+ summary: GFF3 parser for big data
194
+ test_files:
235
195
  - spec/gff3_assemble2_spec.rb
236
196
  - spec/gff3_assemble3_spec.rb
237
197
  - spec/gff3_assemble_spec.rb