swissparser 0.11.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - paradigmatic
@@ -9,30 +14,46 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-11-16 00:00:00 +01:00
17
+ date: 2010-12-18 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: cucumber
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
20
25
  requirements:
21
26
  - - ">="
22
27
  - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 4
23
31
  version: "0.4"
24
- version:
32
+ type: :development
33
+ version_requirements: *id001
25
34
  - !ruby/object:Gem::Dependency
26
35
  name: bones
27
- type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
30
39
  requirements:
31
40
  - - ">="
32
41
  - !ruby/object:Gem::Version
33
- version: 3.0.1
34
- version:
35
- description: Simple DSL to define parser for flat files formats common in biofinformatics.
42
+ segments:
43
+ - 3
44
+ - 5
45
+ - 4
46
+ version: 3.5.4
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: |-
50
+ Simple DSL to define parser for flat files formats common in
51
+ biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
52
+
53
+ SwissParser API was changed in its version 1.0.0 to simplify parser
54
+ definition. The code was tested on entire Uniprot and KEGG releases
55
+ and functional testing guarantees that existing features will not
56
+ break after an update.
36
57
  email: paradigmatic@streum.org
37
58
  executables: []
38
59
 
@@ -41,33 +62,31 @@ extensions: []
41
62
  extra_rdoc_files:
42
63
  - CHANGELOG.rdoc
43
64
  - README.rdoc
44
- - benchmarks/whole_uniprot.txt
65
+ - lib/swissparser.rbc
66
+ - lib/swissparser/entries.rbc
67
+ - lib/swissparser/rules.rbc
45
68
  files:
69
+ - .gitignore
46
70
  - CHANGELOG.rdoc
47
71
  - LICENSE
48
72
  - README.rdoc
49
73
  - Rakefile
50
- - benchmarks/whole_uniprot.txt
74
+ - Rakefile.compiled.rbc
51
75
  - examples/data/EColPositives_noTAT.bas
52
76
  - examples/data/kegg_enzyme_short.txt
53
77
  - examples/data/uniprot.txt
54
78
  - examples/kegg_demo.rb
55
- - examples/parse_from_uri.rb
56
- - examples/signal_demo.rb
57
- - examples/tutorial_1.rb
58
- - examples/tutorial_2.rb
59
- - examples/uniprot_param_demo.rb
79
+ - examples/uniprot.rb
60
80
  - features/basic_parsing.feature
61
- - features/parser_extension.feature
62
- - features/parsing_context.feature
63
- - features/polite.feature
64
- - features/step_definitions/core.rb
65
- - features/step_definitions/definitions.rb
66
- - features/step_definitions/extra.rb
67
- - lib/swiss_parser.rb
81
+ - features/extra.feature
82
+ - features/step_definitions/basic_steps.rb
83
+ - features/step_definitions/sugar_steps.rb
68
84
  - lib/swissparser.rb
69
- - lib/swissparser/parsing_context.rb
70
- - lib/swissparser/parsing_rules.rb
85
+ - lib/swissparser.rbc
86
+ - lib/swissparser/entries.rb
87
+ - lib/swissparser/entries.rbc
88
+ - lib/swissparser/rules.rb
89
+ - lib/swissparser/rules.rbc
71
90
  has_rdoc: true
72
91
  homepage: http://github.com/paradigmatic/SwissParser
73
92
  licenses: []
@@ -79,23 +98,27 @@ rdoc_options:
79
98
  require_paths:
80
99
  - lib
81
100
  required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
82
102
  requirements:
83
103
  - - ">="
84
104
  - !ruby/object:Gem::Version
105
+ segments:
106
+ - 0
85
107
  version: "0"
86
- version:
87
108
  required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
88
110
  requirements:
89
111
  - - ">="
90
112
  - !ruby/object:Gem::Version
113
+ segments:
114
+ - 0
91
115
  version: "0"
92
- version:
93
116
  requirements: []
94
117
 
95
118
  rubyforge_project: swissparser
96
- rubygems_version: 1.3.5
119
+ rubygems_version: 1.3.7
97
120
  signing_key:
98
121
  specification_version: 3
99
- summary: Simple DSL to define parser for flat files formats common in biofinformatics
122
+ summary: Simple DSL to define parser for flat files formats common in biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
100
123
  test_files: []
101
124
 
@@ -1,7 +0,0 @@
1
- $ time ruby tutorial_1.rb uniprot_sprot.dat
2
- Parsed: 512205
3
- real 3313.672 user 3239.700 sys 66.360 pcpu 99.77
4
-
5
- $ time ruby bioruby.rb uniprot_sprot.dat
6
- 512205
7
- real 1054.322 user 998.170 sys 55.580 pcpu 99.94
@@ -1,88 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'yaml'
23
- require 'swissparser.rb'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- Parser = Swiss::Parser.define do
39
-
40
- # Each entry must be stored in a Protein instance
41
- new_entry do
42
- Protein.new
43
- end
44
-
45
- rules do
46
-
47
- # Parse the uniprot id
48
- with("ID") do |content,protein|
49
- content =~ /([A-Z]\w+)\D+(\d+)/
50
- protein.id = $1
51
- protein.size = $2.to_i
52
- end
53
-
54
- # Parse the organism
55
- with("OS") do |content,protein|
56
- content =~ /(\w+ \w+)/
57
- protein.species = $1
58
- end
59
-
60
- # Parse the complete taxonomy
61
- with("OC") do |content,protein|
62
- ary = content.gsub(".","").split("; ")
63
- protein.taxonomy += ary
64
- end
65
-
66
- # Parse the Sequence
67
- with_text_after("SQ") do |content,protein|
68
- seq = content.strip.gsub(" ","")
69
- protein.sequence += seq
70
- end
71
-
72
- end
73
-
74
- end
75
-
76
- end
77
-
78
- if $0 == __FILE__
79
-
80
- uri = ARGV.shift
81
-
82
- entries = Uniprot::Parser.parse_URI( uri )
83
-
84
- entries.each do |e|
85
- puts e.to_yaml
86
- end
87
-
88
- end
@@ -1,100 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- require 'swissparser.rb'
21
- require 'yaml'
22
-
23
- class Protein
24
- attr_accessor :name, :sequence, :size
25
- end
26
-
27
- parser = Swiss::Parser.define do
28
-
29
- new_entry do
30
- Protein.new
31
- end
32
-
33
- rules do
34
-
35
- set_separator '/'
36
-
37
- with('N') do |content,entry|
38
- entry.name = content
39
- end
40
-
41
- with('C') do |content,entry|
42
- entry.size = content.to_i
43
- end
44
-
45
- with('S') do |content,entry|
46
- entry.sequence = content
47
- end
48
-
49
- end
50
-
51
- end
52
-
53
-
54
- stat_parser = parser.extend do
55
-
56
- before do
57
- { :min => 1_000, :max => 0, :sum => 0, :n => 0 }
58
- end
59
-
60
- finish_entry do |entry,h|
61
- if entry.size < h[:min]
62
- h[:min] = entry.size
63
- end
64
- if entry.size > h[:max]
65
- h[:max] = entry.size
66
- end
67
- h[:sum] += entry.size
68
- h[:n] += 1
69
- end
70
-
71
- after do |h|
72
- h[:average] = h[:sum].to_f / h[:n]
73
- h
74
- end
75
-
76
- end
77
-
78
-
79
- if $0 == __FILE__
80
-
81
- filename = ARGV.shift
82
-
83
- entries = parser.parse_file( filename )
84
-
85
- entries.each do |e|
86
- puts e.to_yaml
87
- end
88
-
89
- puts
90
-
91
- results = stat_parser.parse_file( filename )
92
-
93
- puts "Min: #{results[:min]}"
94
- puts "Max: #{results[:max]}"
95
- puts "Average: #{results[:average]}"
96
- puts "Size: #{results[:n]}"
97
-
98
- end
99
-
100
-
@@ -1,88 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'yaml'
23
- require 'swissparser'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- Parser = Swiss::Parser.define do
39
-
40
- # Each entry must be stored in a Protein instance
41
- new_entry do
42
- Protein.new
43
- end
44
-
45
- rules do
46
-
47
- # Parse the uniprot id
48
- with("ID") do |content,protein|
49
- content =~ /([A-Z]\w+)\D+(\d+)/
50
- protein.id = $1
51
- protein.size = $2.to_i
52
- end
53
-
54
- # Parse the organism
55
- with("OS") do |content,protein|
56
- content =~ /(\w+ \w+)/
57
- protein.species = $1
58
- end
59
-
60
- # Parse the complete taxonomy
61
- with("OC") do |content,protein|
62
- ary = content.gsub(".","").split("; ")
63
- protein.taxonomy += ary
64
- end
65
-
66
- # Parse the Sequence
67
- with_text_after("SQ") do |content,protein|
68
- seq = content.strip.gsub(" ","")
69
- protein.sequence += seq
70
- end
71
-
72
- end
73
-
74
- end
75
-
76
- end
77
-
78
- if $0 == __FILE__
79
-
80
- filename = ARGV.shift
81
-
82
- entries = Uniprot::Parser.parse_file( filename )
83
-
84
- entries.each do |e|
85
- puts e.to_yaml
86
- end
87
-
88
- end
@@ -1,65 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'swissparser'
23
- require 'examples/tutorial_1'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- SpeciesParser = Uniprot::Parser.extend do
39
-
40
- before do
41
- {}
42
- end
43
-
44
- finish_entry do |protein, container|
45
- if container[protein.species].nil?
46
- container[protein.species] = []
47
- end
48
- container[protein.species] << protein
49
- end
50
-
51
- end
52
-
53
- end
54
-
55
- if $0 == __FILE__
56
-
57
- filename = ARGV.shift
58
-
59
- result = Uniprot::SpeciesParser.parse_file( filename )
60
-
61
- result.each do |species, ary|
62
- puts "#{species} => #{ary.map{ |p| p.id }.join(', ')}"
63
- end
64
-
65
- end