bio-gff3 0.8.6 → 0.8.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,8 +2,8 @@ GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
4
  bio (1.4.1)
5
- bio-logger (0.6.1)
6
- log4r (> 1.1.6)
5
+ bio-logger (1.0.0)
6
+ log4r (>= 1.1.9)
7
7
  diff-lcs (1.1.2)
8
8
  git (1.2.5)
9
9
  jeweler (1.5.2)
@@ -11,16 +11,16 @@ GEM
11
11
  git (>= 1.2.5)
12
12
  rake
13
13
  log4r (1.1.9)
14
- rake (0.8.7)
14
+ rake (0.9.2)
15
15
  rcov (0.9.9)
16
- rspec (2.3.0)
17
- rspec-core (~> 2.3.0)
18
- rspec-expectations (~> 2.3.0)
19
- rspec-mocks (~> 2.3.0)
20
- rspec-core (2.3.1)
21
- rspec-expectations (2.3.0)
16
+ rspec (2.6.0)
17
+ rspec-core (~> 2.6.0)
18
+ rspec-expectations (~> 2.6.0)
19
+ rspec-mocks (~> 2.6.0)
20
+ rspec-core (2.6.4)
21
+ rspec-expectations (2.6.0)
22
22
  diff-lcs (~> 1.1.2)
23
- rspec-mocks (2.3.0)
23
+ rspec-mocks (2.6.0)
24
24
  shoulda (2.11.3)
25
25
 
26
26
  PLATFORMS
@@ -28,9 +28,10 @@ PLATFORMS
28
28
 
29
29
  DEPENDENCIES
30
30
  bio (>= 1.3.1)
31
- bio-logger
31
+ bio-logger (> 0.8.0)
32
32
  bundler (~> 1.0.0)
33
33
  jeweler (~> 1.5.2)
34
+ log4r (> 1.1.6)
34
35
  rcov
35
- rspec (>= 2.0.0)
36
+ rspec (>= 2.3.0)
36
37
  shoulda
@@ -5,15 +5,17 @@ including assembled mRNA, protein and CDS sequences.
5
5
 
6
6
  Features:
7
7
 
8
- # Take GFF3 (genome browser) information of any type, and assemble sequences, e.g. mRNA and CDS
9
- # Options for low memory use and caching of records
10
- # Support for external FASTA input files
11
- # Use of multi-cores (NYI)
8
+ * Take GFF3 (genome browser) information of any type, and assemble sequences, e.g. mRNA and CDS
9
+ * Options for low memory use and caching of records
10
+ * Support for external FASTA input files
11
+ * Use of multi-cores (NYI)
12
12
 
13
13
  Currently the output is a FASTA file.
14
14
 
15
- You can use this plugin in two ways. First as a standalone program, next as a
16
- plugin library to BioRuby.
15
+ You can use this plugin in two ways. First as a standalone program,
16
+ second as a plugin library to BioRuby.
17
+
18
+ Note: a really fast GFF3 parser is in the works at https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
17
19
 
18
20
  == Install and run gff3-fetch
19
21
 
@@ -61,32 +63,35 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
61
63
 
62
64
  Fetch and assemble GFF3 types (e.g. ORF, mRNA, CDS) + print in FASTA format.
63
65
 
64
- gff3-fetch [--low-mem] [--validate] type [filename.fa] filename.gff3
65
-
66
- Where (NYI == Not Yet Implemented):
66
+ gff3-fetch [options] type [filename.fa] filename.gff3
67
67
 
68
- --translate : output as amino acid sequence
69
- --validate : validate GFF3 file by translating
70
- --fix : check 3-frame translation and fix, if possible
71
- --fix-wormbase : fix 3-frame translation on ORFs named 'gene1'
72
- --no-assemble : output each record as a sequence -- NYI
73
- --add-phase : output records using phase (useful w. no-assemble CDS to AA) --NYI
68
+ --translate : output as amino acid sequence
69
+ --validate : validate GFF3 file by translating
70
+ --fix : check 3-frame translation and fix, if possible
71
+ --fix-wormbase : fix 3-frame translation on ORFs named 'gene1'
72
+ --no-assemble : output each record as a sequence
73
+ --phase : output records using phase (useful w. no-assemble CDS to AA)
74
74
 
75
75
  type is any valid type in the GFF3 definition. For example:
76
76
 
77
- mRNA : assemble mRNA
78
- CDS : assemble CDS
79
- exon : list all exons
80
- gene|ORF : list gene ORFs
81
- other : use any type from GFF3 definition, e.g. 'Terminate' -- NYI
77
+ mRNA : assemble mRNA
78
+ CDS : assemble CDS
79
+ exon : list all exons
80
+ gene|ORF : list gene ORFs
81
+ other : use any type from GFF3 definition, e.g. 'Terminate'
82
82
 
83
83
  and the following performance options:
84
84
 
85
- --cache full : load all in RAM (fast)
86
- --cache none : do not load anything in memory (slow)
87
- --low-mem : use LRU cache (limit RAM use, fast) -- NYI
88
- --max-cpus num : use num threads -- NYI
89
- --emboss : use EMBOSS translation (fast) -- NYI
85
+ --parser bioruby : use BioRuby GFF3 parser (slow)
86
+ --parser line : use GFF3 line parser (faster, default)
87
+ --block : parse GFF3 by block (optimistic) -- NYI
88
+ --cache full : load all in RAM (fast, default)
89
+ --cache none : do not load anything in memory (slow)
90
+ --cache lru : use least recently used cache (limit RAM use, fast) -- NYI
91
+ --max-cpus num : use num threads -- NYI
92
+ --emboss : use EMBOSS translation (fast) -- NYI
93
+
94
+ Where (NYI == Not Yet Implemented):
90
95
 
91
96
  Multiple GFF3 files can be used. With external FASTA files, always the last
92
97
  one before the GFF3 filename is matched.
@@ -103,7 +108,7 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
103
108
 
104
109
  Find CDS records from external FASTA file, adding phase and translate to protein sequence
105
110
 
106
- gff3-fetch --no-assemble --add-phase --translate CDS test/data/gff/MhA1_Contig1133.fa test/data/gff/MhA1_Contig1133.gff3
111
+ gff3-fetch --no-assemble --phase --translate CDS test/data/gff/MhA1_Contig1133.fa test/data/gff/MhA1_Contig1133.gff3
107
112
 
108
113
  Find mRNA from external FASTA file, without loading everything in RAM
109
114
 
@@ -118,15 +123,50 @@ Copyright (C) 2010,2011 Pjotr Prins <pjotr.prins@thebird.nl>
118
123
 
119
124
  gff3-fetch terminal chromosome1.fa geneid.gff3
120
125
 
126
+ Fine tuning output - show errors only
127
+
128
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR
129
+
130
+ Fine tuning outpt - show messages matching regex
131
+
132
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace '=msg =~ /component/'
133
+
134
+ Fine tuning output - write log messages to file.log
135
+
136
+ gff3-fetch mRNA test/data/gff/test.gff3 --trace ERROR --logger file.log
137
+
138
+ For more information on output, see the bioruby-logger plugin.
139
+
121
140
  == Performance
122
141
 
123
- time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 > test.fa
142
+ time gff3-fetch cds m_hapla.WS217.dna.fa m_hapla.WS217.gff3 2> /dev/null > test.fa
143
+
144
+ Digesting parser:
145
+
146
+ Cache real user sys version RAM
147
+ ------------------------------------------------------------
148
+ full,bioruby 12m41 12m28 0m09 (0.8.0)
149
+ full,line 12m13 12m06 0m07 (0.8.5)
150
+ full,line,lazy 11m51 11m43 0m07 (0.8.6) 6,600M
151
+
152
+ none,bioruby 504m 477m 26m50 (0.8.0)
153
+ none,line 297m 267m 28m36 (0.8.5)
154
+ none,line,lazy 132m 106m 26m01 (0.8.6) 650M
155
+
156
+ lru,bioruby 533m 510m 22m47 (0.8.5)
157
+ lru,line 353m 326m 26m44 (0.8.5) 1K
158
+ lru,line 305m 281m 22m30 (0.8.5) 10K
159
+ lru,line,lazy 182m 161m 21m10 (0.8.6) 10K
160
+ lru,line,lazy 75m 75m 0m17 (0.8.6) 50K 730M
161
+ ------------------------------------------------------------
162
+
163
+ Block parser:
124
164
 
125
- Cache real user sys
126
- ----------------------------------------------------
127
- full 12m41s 12m28s 0m09s (0.8.0 Jan. 2011)
128
- none 504m39s 477m49s 26m50s (0.8.0 Jan. 2011)
129
- ----------------------------------------------------
165
+ Cache real user sys gff3 version
166
+ ------------------------------------------------------------
167
+ in preparation see also biolib/HPC:
168
+ https://github.com/pjotrp/biolib_hpc/tree/master/modules/gff3
169
+ ------------------------------------------------------------
130
170
 
131
171
  where
132
172
 
data/Rakefile CHANGED
@@ -13,9 +13,9 @@ require 'jeweler'
13
13
  Jeweler::Tasks.new do |gem|
14
14
  # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
15
15
  gem.name = "bio-gff3"
16
- gem.homepage = "http://github.com/pjotrp/bioruby-gff3"
16
+ gem.homepage = "https://github.com/pjotrp/bioruby-gff3-plugin"
17
17
  gem.license = "MIT"
18
- gem.summary = %Q{BioRuby GFF3 plugin for big data}
18
+ gem.summary = %Q{GFF3 parser for big data}
19
19
  gem.description = %Q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
20
20
  Options for low memory use and caching of records.
21
21
  Support for external FASTA files.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.8.6
1
+ 0.8.7
@@ -5,18 +5,17 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bio-gff3}
8
- s.version = "0.8.6"
8
+ s.version = "0.8.7"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
- s.authors = ["Pjotr Prins"]
12
- s.date = %q{2011-01-17}
13
- s.default_executable = %q{gff3-fetch}
11
+ s.authors = [%q{Pjotr Prins}]
12
+ s.date = %q{2011-07-31}
14
13
  s.description = %q{GFF3 (genome browser) information and digest mRNA and CDS sequences.
15
14
  Options for low memory use and caching of records.
16
15
  Support for external FASTA files.
17
16
  }
18
17
  s.email = %q{pjotr.prins@thebird.nl}
19
- s.executables = ["gff3-fetch"]
18
+ s.executables = [%q{gff3-fetch}]
20
19
  s.extra_rdoc_files = [
21
20
  "LICENSE.txt",
22
21
  "README.rdoc"
@@ -75,11 +74,11 @@ Support for external FASTA files.
75
74
  "test/regressiontest.rb",
76
75
  "test/test_bio-gff3.rb"
77
76
  ]
78
- s.homepage = %q{http://github.com/pjotrp/bioruby-gff3}
79
- s.licenses = ["MIT"]
80
- s.require_paths = ["lib"]
81
- s.rubygems_version = %q{1.3.7}
82
- s.summary = %q{BioRuby GFF3 plugin for big data}
77
+ s.homepage = %q{https://github.com/pjotrp/bioruby-gff3-plugin}
78
+ s.licenses = [%q{MIT}]
79
+ s.require_paths = [%q{lib}]
80
+ s.rubygems_version = %q{1.8.6}
81
+ s.summary = %q{GFF3 parser for big data}
83
82
  s.test_files = [
84
83
  "spec/gff3_assemble2_spec.rb",
85
84
  "spec/gff3_assemble3_spec.rb",
@@ -93,7 +92,6 @@ Support for external FASTA files.
93
92
  ]
94
93
 
95
94
  if s.respond_to? :specification_version then
96
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
97
95
  s.specification_version = 3
98
96
 
99
97
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
@@ -18,7 +18,7 @@ module Bio
18
18
  if s == '##FASTA'
19
19
  break
20
20
  end
21
- next if s.length == 0 or s[0] == '#'
21
+ next if s.length == 0 or s =~ /^#/
22
22
  @records.push FastLineRecord.new(parse_line_fast(s))
23
23
  end
24
24
  fasta = Bio::GFF::FastaReader.new(fh)
metadata CHANGED
@@ -1,152 +1,119 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: bio-gff3
3
- version: !ruby/object:Gem::Version
4
- prerelease: false
5
- segments:
6
- - 0
7
- - 8
8
- - 6
9
- version: 0.8.6
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.7
5
+ prerelease:
10
6
  platform: ruby
11
- authors:
7
+ authors:
12
8
  - Pjotr Prins
13
9
  autorequire:
14
10
  bindir: bin
15
11
  cert_chain: []
16
-
17
- date: 2011-01-17 00:00:00 +01:00
18
- default_executable: gff3-fetch
19
- dependencies:
20
- - !ruby/object:Gem::Dependency
12
+ date: 2011-07-31 00:00:00.000000000Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
21
15
  name: bio
22
- requirement: &id001 !ruby/object:Gem::Requirement
16
+ requirement: &76668620 !ruby/object:Gem::Requirement
23
17
  none: false
24
- requirements:
25
- - - ">="
26
- - !ruby/object:Gem::Version
27
- segments:
28
- - 1
29
- - 3
30
- - 1
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
31
21
  version: 1.3.1
32
22
  type: :runtime
33
23
  prerelease: false
34
- version_requirements: *id001
35
- - !ruby/object:Gem::Dependency
24
+ version_requirements: *76668620
25
+ - !ruby/object:Gem::Dependency
36
26
  name: log4r
37
- requirement: &id002 !ruby/object:Gem::Requirement
27
+ requirement: &76668160 !ruby/object:Gem::Requirement
38
28
  none: false
39
- requirements:
40
- - - ">"
41
- - !ruby/object:Gem::Version
42
- segments:
43
- - 1
44
- - 1
45
- - 6
29
+ requirements:
30
+ - - ! '>'
31
+ - !ruby/object:Gem::Version
46
32
  version: 1.1.6
47
33
  type: :runtime
48
34
  prerelease: false
49
- version_requirements: *id002
50
- - !ruby/object:Gem::Dependency
35
+ version_requirements: *76668160
36
+ - !ruby/object:Gem::Dependency
51
37
  name: bio-logger
52
- requirement: &id003 !ruby/object:Gem::Requirement
38
+ requirement: &76667770 !ruby/object:Gem::Requirement
53
39
  none: false
54
- requirements:
55
- - - ">"
56
- - !ruby/object:Gem::Version
57
- segments:
58
- - 0
59
- - 8
60
- - 0
40
+ requirements:
41
+ - - ! '>'
42
+ - !ruby/object:Gem::Version
61
43
  version: 0.8.0
62
44
  type: :runtime
63
45
  prerelease: false
64
- version_requirements: *id003
65
- - !ruby/object:Gem::Dependency
46
+ version_requirements: *76667770
47
+ - !ruby/object:Gem::Dependency
66
48
  name: shoulda
67
- requirement: &id004 !ruby/object:Gem::Requirement
49
+ requirement: &76667270 !ruby/object:Gem::Requirement
68
50
  none: false
69
- requirements:
70
- - - ">="
71
- - !ruby/object:Gem::Version
72
- segments:
73
- - 0
74
- version: "0"
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
75
55
  type: :development
76
56
  prerelease: false
77
- version_requirements: *id004
78
- - !ruby/object:Gem::Dependency
57
+ version_requirements: *76667270
58
+ - !ruby/object:Gem::Dependency
79
59
  name: bundler
80
- requirement: &id005 !ruby/object:Gem::Requirement
60
+ requirement: &76666730 !ruby/object:Gem::Requirement
81
61
  none: false
82
- requirements:
62
+ requirements:
83
63
  - - ~>
84
- - !ruby/object:Gem::Version
85
- segments:
86
- - 1
87
- - 0
88
- - 0
64
+ - !ruby/object:Gem::Version
89
65
  version: 1.0.0
90
66
  type: :development
91
67
  prerelease: false
92
- version_requirements: *id005
93
- - !ruby/object:Gem::Dependency
68
+ version_requirements: *76666730
69
+ - !ruby/object:Gem::Dependency
94
70
  name: jeweler
95
- requirement: &id006 !ruby/object:Gem::Requirement
71
+ requirement: &76662470 !ruby/object:Gem::Requirement
96
72
  none: false
97
- requirements:
73
+ requirements:
98
74
  - - ~>
99
- - !ruby/object:Gem::Version
100
- segments:
101
- - 1
102
- - 5
103
- - 2
75
+ - !ruby/object:Gem::Version
104
76
  version: 1.5.2
105
77
  type: :development
106
78
  prerelease: false
107
- version_requirements: *id006
108
- - !ruby/object:Gem::Dependency
79
+ version_requirements: *76662470
80
+ - !ruby/object:Gem::Dependency
109
81
  name: rcov
110
- requirement: &id007 !ruby/object:Gem::Requirement
82
+ requirement: &76662040 !ruby/object:Gem::Requirement
111
83
  none: false
112
- requirements:
113
- - - ">="
114
- - !ruby/object:Gem::Version
115
- segments:
116
- - 0
117
- version: "0"
84
+ requirements:
85
+ - - ! '>='
86
+ - !ruby/object:Gem::Version
87
+ version: '0'
118
88
  type: :development
119
89
  prerelease: false
120
- version_requirements: *id007
121
- - !ruby/object:Gem::Dependency
90
+ version_requirements: *76662040
91
+ - !ruby/object:Gem::Dependency
122
92
  name: rspec
123
- requirement: &id008 !ruby/object:Gem::Requirement
93
+ requirement: &76661550 !ruby/object:Gem::Requirement
124
94
  none: false
125
- requirements:
126
- - - ">="
127
- - !ruby/object:Gem::Version
128
- segments:
129
- - 2
130
- - 3
131
- - 0
95
+ requirements:
96
+ - - ! '>='
97
+ - !ruby/object:Gem::Version
132
98
  version: 2.3.0
133
99
  type: :development
134
100
  prerelease: false
135
- version_requirements: *id008
136
- description: |
137
- GFF3 (genome browser) information and digest mRNA and CDS sequences.
101
+ version_requirements: *76661550
102
+ description: ! 'GFF3 (genome browser) information and digest mRNA and CDS sequences.
103
+
138
104
  Options for low memory use and caching of records.
105
+
139
106
  Support for external FASTA files.
140
107
 
108
+ '
141
109
  email: pjotr.prins@thebird.nl
142
- executables:
110
+ executables:
143
111
  - gff3-fetch
144
112
  extensions: []
145
-
146
- extra_rdoc_files:
113
+ extra_rdoc_files:
147
114
  - LICENSE.txt
148
115
  - README.rdoc
149
- files:
116
+ files:
150
117
  - Gemfile
151
118
  - Gemfile.lock
152
119
  - LICENSE.txt
@@ -199,39 +166,32 @@ files:
199
166
  - test/helper.rb
200
167
  - test/regressiontest.rb
201
168
  - test/test_bio-gff3.rb
202
- has_rdoc: true
203
- homepage: http://github.com/pjotrp/bioruby-gff3
204
- licenses:
169
+ homepage: https://github.com/pjotrp/bioruby-gff3-plugin
170
+ licenses:
205
171
  - MIT
206
172
  post_install_message:
207
173
  rdoc_options: []
208
-
209
- require_paths:
174
+ require_paths:
210
175
  - lib
211
- required_ruby_version: !ruby/object:Gem::Requirement
176
+ required_ruby_version: !ruby/object:Gem::Requirement
212
177
  none: false
213
- requirements:
214
- - - ">="
215
- - !ruby/object:Gem::Version
216
- segments:
217
- - 0
218
- version: "0"
219
- required_rubygems_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ! '>='
180
+ - !ruby/object:Gem::Version
181
+ version: '0'
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
220
183
  none: false
221
- requirements:
222
- - - ">="
223
- - !ruby/object:Gem::Version
224
- segments:
225
- - 0
226
- version: "0"
184
+ requirements:
185
+ - - ! '>='
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
227
188
  requirements: []
228
-
229
189
  rubyforge_project:
230
- rubygems_version: 1.3.7
190
+ rubygems_version: 1.8.6
231
191
  signing_key:
232
192
  specification_version: 3
233
- summary: BioRuby GFF3 plugin for big data
234
- test_files:
193
+ summary: GFF3 parser for big data
194
+ test_files:
235
195
  - spec/gff3_assemble2_spec.rb
236
196
  - spec/gff3_assemble3_spec.rb
237
197
  - spec/gff3_assemble_spec.rb