swissparser 0.11.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
metadata CHANGED
@@ -1,7 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.1
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
5
10
  platform: ruby
6
11
  authors:
7
12
  - paradigmatic
@@ -9,30 +14,46 @@ autorequire:
9
14
  bindir: bin
10
15
  cert_chain: []
11
16
 
12
- date: 2009-11-16 00:00:00 +01:00
17
+ date: 2010-12-18 00:00:00 +01:00
13
18
  default_executable:
14
19
  dependencies:
15
20
  - !ruby/object:Gem::Dependency
16
21
  name: cucumber
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ none: false
20
25
  requirements:
21
26
  - - ">="
22
27
  - !ruby/object:Gem::Version
28
+ segments:
29
+ - 0
30
+ - 4
23
31
  version: "0.4"
24
- version:
32
+ type: :development
33
+ version_requirements: *id001
25
34
  - !ruby/object:Gem::Dependency
26
35
  name: bones
27
- type: :development
28
- version_requirement:
29
- version_requirements: !ruby/object:Gem::Requirement
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ none: false
30
39
  requirements:
31
40
  - - ">="
32
41
  - !ruby/object:Gem::Version
33
- version: 3.0.1
34
- version:
35
- description: Simple DSL to define parser for flat files formats common in biofinformatics.
42
+ segments:
43
+ - 3
44
+ - 5
45
+ - 4
46
+ version: 3.5.4
47
+ type: :development
48
+ version_requirements: *id002
49
+ description: |-
50
+ Simple DSL to define parser for flat files formats common in
51
+ biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
52
+
53
+ SwissParser API was changed in its version 1.0.0 to simplify parser
54
+ definition. The code was tested on entire Uniprot and KEGG releases
55
+ and functional testing guarantees that existing features will not
56
+ break after an update.
36
57
  email: paradigmatic@streum.org
37
58
  executables: []
38
59
 
@@ -41,33 +62,31 @@ extensions: []
41
62
  extra_rdoc_files:
42
63
  - CHANGELOG.rdoc
43
64
  - README.rdoc
44
- - benchmarks/whole_uniprot.txt
65
+ - lib/swissparser.rbc
66
+ - lib/swissparser/entries.rbc
67
+ - lib/swissparser/rules.rbc
45
68
  files:
69
+ - .gitignore
46
70
  - CHANGELOG.rdoc
47
71
  - LICENSE
48
72
  - README.rdoc
49
73
  - Rakefile
50
- - benchmarks/whole_uniprot.txt
74
+ - Rakefile.compiled.rbc
51
75
  - examples/data/EColPositives_noTAT.bas
52
76
  - examples/data/kegg_enzyme_short.txt
53
77
  - examples/data/uniprot.txt
54
78
  - examples/kegg_demo.rb
55
- - examples/parse_from_uri.rb
56
- - examples/signal_demo.rb
57
- - examples/tutorial_1.rb
58
- - examples/tutorial_2.rb
59
- - examples/uniprot_param_demo.rb
79
+ - examples/uniprot.rb
60
80
  - features/basic_parsing.feature
61
- - features/parser_extension.feature
62
- - features/parsing_context.feature
63
- - features/polite.feature
64
- - features/step_definitions/core.rb
65
- - features/step_definitions/definitions.rb
66
- - features/step_definitions/extra.rb
67
- - lib/swiss_parser.rb
81
+ - features/extra.feature
82
+ - features/step_definitions/basic_steps.rb
83
+ - features/step_definitions/sugar_steps.rb
68
84
  - lib/swissparser.rb
69
- - lib/swissparser/parsing_context.rb
70
- - lib/swissparser/parsing_rules.rb
85
+ - lib/swissparser.rbc
86
+ - lib/swissparser/entries.rb
87
+ - lib/swissparser/entries.rbc
88
+ - lib/swissparser/rules.rb
89
+ - lib/swissparser/rules.rbc
71
90
  has_rdoc: true
72
91
  homepage: http://github.com/paradigmatic/SwissParser
73
92
  licenses: []
@@ -79,23 +98,27 @@ rdoc_options:
79
98
  require_paths:
80
99
  - lib
81
100
  required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
82
102
  requirements:
83
103
  - - ">="
84
104
  - !ruby/object:Gem::Version
105
+ segments:
106
+ - 0
85
107
  version: "0"
86
- version:
87
108
  required_rubygems_version: !ruby/object:Gem::Requirement
109
+ none: false
88
110
  requirements:
89
111
  - - ">="
90
112
  - !ruby/object:Gem::Version
113
+ segments:
114
+ - 0
91
115
  version: "0"
92
- version:
93
116
  requirements: []
94
117
 
95
118
  rubyforge_project: swissparser
96
- rubygems_version: 1.3.5
119
+ rubygems_version: 1.3.7
97
120
  signing_key:
98
121
  specification_version: 3
99
- summary: Simple DSL to define parser for flat files formats common in biofinformatics
122
+ summary: Simple DSL to define parser for flat files formats common in biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
100
123
  test_files: []
101
124
 
@@ -1,7 +0,0 @@
1
- $ time ruby tutorial_1.rb uniprot_sprot.dat
2
- Parsed: 512205
3
- real 3313.672 user 3239.700 sys 66.360 pcpu 99.77
4
-
5
- $ time ruby bioruby.rb uniprot_sprot.dat
6
- 512205
7
- real 1054.322 user 998.170 sys 55.580 pcpu 99.94
@@ -1,88 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'yaml'
23
- require 'swissparser.rb'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- Parser = Swiss::Parser.define do
39
-
40
- # Each entry must be stored in a Protein instance
41
- new_entry do
42
- Protein.new
43
- end
44
-
45
- rules do
46
-
47
- # Parse the uniprot id
48
- with("ID") do |content,protein|
49
- content =~ /([A-Z]\w+)\D+(\d+)/
50
- protein.id = $1
51
- protein.size = $2.to_i
52
- end
53
-
54
- # Parse the organism
55
- with("OS") do |content,protein|
56
- content =~ /(\w+ \w+)/
57
- protein.species = $1
58
- end
59
-
60
- # Parse the complete taxonomy
61
- with("OC") do |content,protein|
62
- ary = content.gsub(".","").split("; ")
63
- protein.taxonomy += ary
64
- end
65
-
66
- # Parse the Sequence
67
- with_text_after("SQ") do |content,protein|
68
- seq = content.strip.gsub(" ","")
69
- protein.sequence += seq
70
- end
71
-
72
- end
73
-
74
- end
75
-
76
- end
77
-
78
- if $0 == __FILE__
79
-
80
- uri = ARGV.shift
81
-
82
- entries = Uniprot::Parser.parse_URI( uri )
83
-
84
- entries.each do |e|
85
- puts e.to_yaml
86
- end
87
-
88
- end
@@ -1,100 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- require 'swissparser.rb'
21
- require 'yaml'
22
-
23
- class Protein
24
- attr_accessor :name, :sequence, :size
25
- end
26
-
27
- parser = Swiss::Parser.define do
28
-
29
- new_entry do
30
- Protein.new
31
- end
32
-
33
- rules do
34
-
35
- set_separator '/'
36
-
37
- with('N') do |content,entry|
38
- entry.name = content
39
- end
40
-
41
- with('C') do |content,entry|
42
- entry.size = content.to_i
43
- end
44
-
45
- with('S') do |content,entry|
46
- entry.sequence = content
47
- end
48
-
49
- end
50
-
51
- end
52
-
53
-
54
- stat_parser = parser.extend do
55
-
56
- before do
57
- { :min => 1_000, :max => 0, :sum => 0, :n => 0 }
58
- end
59
-
60
- finish_entry do |entry,h|
61
- if entry.size < h[:min]
62
- h[:min] = entry.size
63
- end
64
- if entry.size > h[:max]
65
- h[:max] = entry.size
66
- end
67
- h[:sum] += entry.size
68
- h[:n] += 1
69
- end
70
-
71
- after do |h|
72
- h[:average] = h[:sum].to_f / h[:n]
73
- h
74
- end
75
-
76
- end
77
-
78
-
79
- if $0 == __FILE__
80
-
81
- filename = ARGV.shift
82
-
83
- entries = parser.parse_file( filename )
84
-
85
- entries.each do |e|
86
- puts e.to_yaml
87
- end
88
-
89
- puts
90
-
91
- results = stat_parser.parse_file( filename )
92
-
93
- puts "Min: #{results[:min]}"
94
- puts "Max: #{results[:max]}"
95
- puts "Average: #{results[:average]}"
96
- puts "Size: #{results[:n]}"
97
-
98
- end
99
-
100
-
@@ -1,88 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'yaml'
23
- require 'swissparser'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- Parser = Swiss::Parser.define do
39
-
40
- # Each entry must be stored in a Protein instance
41
- new_entry do
42
- Protein.new
43
- end
44
-
45
- rules do
46
-
47
- # Parse the uniprot id
48
- with("ID") do |content,protein|
49
- content =~ /([A-Z]\w+)\D+(\d+)/
50
- protein.id = $1
51
- protein.size = $2.to_i
52
- end
53
-
54
- # Parse the organism
55
- with("OS") do |content,protein|
56
- content =~ /(\w+ \w+)/
57
- protein.species = $1
58
- end
59
-
60
- # Parse the complete taxonomy
61
- with("OC") do |content,protein|
62
- ary = content.gsub(".","").split("; ")
63
- protein.taxonomy += ary
64
- end
65
-
66
- # Parse the Sequence
67
- with_text_after("SQ") do |content,protein|
68
- seq = content.strip.gsub(" ","")
69
- protein.sequence += seq
70
- end
71
-
72
- end
73
-
74
- end
75
-
76
- end
77
-
78
- if $0 == __FILE__
79
-
80
- filename = ARGV.shift
81
-
82
- entries = Uniprot::Parser.parse_file( filename )
83
-
84
- entries.each do |e|
85
- puts e.to_yaml
86
- end
87
-
88
- end
@@ -1,65 +0,0 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
- #!/usr/bin/ruby -w
21
-
22
- require 'swissparser'
23
- require 'examples/tutorial_1'
24
-
25
- class Protein
26
-
27
- attr_accessor :id, :size, :species, :taxonomy, :sequence
28
-
29
- def initialize
30
- @taxonomy = []
31
- @sequence = ""
32
- end
33
-
34
- end
35
-
36
- module Uniprot
37
-
38
- SpeciesParser = Uniprot::Parser.extend do
39
-
40
- before do
41
- {}
42
- end
43
-
44
- finish_entry do |protein, container|
45
- if container[protein.species].nil?
46
- container[protein.species] = []
47
- end
48
- container[protein.species] << protein
49
- end
50
-
51
- end
52
-
53
- end
54
-
55
- if $0 == __FILE__
56
-
57
- filename = ARGV.shift
58
-
59
- result = Uniprot::SpeciesParser.parse_file( filename )
60
-
61
- result.each do |species, ary|
62
- puts "#{species} => #{ary.map{ |p| p.id }.join(', ')}"
63
- end
64
-
65
- end