swissparser 0.11.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- paradigmatic
|
@@ -9,30 +14,46 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date:
|
17
|
+
date: 2010-12-18 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: cucumber
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
20
25
|
requirements:
|
21
26
|
- - ">="
|
22
27
|
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
- 4
|
23
31
|
version: "0.4"
|
24
|
-
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
25
34
|
- !ruby/object:Gem::Dependency
|
26
35
|
name: bones
|
27
|
-
|
28
|
-
|
29
|
-
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
30
39
|
requirements:
|
31
40
|
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
|
-
|
34
|
-
|
35
|
-
|
42
|
+
segments:
|
43
|
+
- 3
|
44
|
+
- 5
|
45
|
+
- 4
|
46
|
+
version: 3.5.4
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: |-
|
50
|
+
Simple DSL to define parser for flat files formats common in
|
51
|
+
biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
|
52
|
+
|
53
|
+
SwissParser API was changed in its version 1.0.0 to simplify parser
|
54
|
+
definition. The code was tested on entire Uniprot and KEGG releases
|
55
|
+
and functional testing guarantees that existing features will not
|
56
|
+
break after an update.
|
36
57
|
email: paradigmatic@streum.org
|
37
58
|
executables: []
|
38
59
|
|
@@ -41,33 +62,31 @@ extensions: []
|
|
41
62
|
extra_rdoc_files:
|
42
63
|
- CHANGELOG.rdoc
|
43
64
|
- README.rdoc
|
44
|
-
-
|
65
|
+
- lib/swissparser.rbc
|
66
|
+
- lib/swissparser/entries.rbc
|
67
|
+
- lib/swissparser/rules.rbc
|
45
68
|
files:
|
69
|
+
- .gitignore
|
46
70
|
- CHANGELOG.rdoc
|
47
71
|
- LICENSE
|
48
72
|
- README.rdoc
|
49
73
|
- Rakefile
|
50
|
-
-
|
74
|
+
- Rakefile.compiled.rbc
|
51
75
|
- examples/data/EColPositives_noTAT.bas
|
52
76
|
- examples/data/kegg_enzyme_short.txt
|
53
77
|
- examples/data/uniprot.txt
|
54
78
|
- examples/kegg_demo.rb
|
55
|
-
- examples/
|
56
|
-
- examples/signal_demo.rb
|
57
|
-
- examples/tutorial_1.rb
|
58
|
-
- examples/tutorial_2.rb
|
59
|
-
- examples/uniprot_param_demo.rb
|
79
|
+
- examples/uniprot.rb
|
60
80
|
- features/basic_parsing.feature
|
61
|
-
- features/
|
62
|
-
- features/
|
63
|
-
- features/
|
64
|
-
- features/step_definitions/core.rb
|
65
|
-
- features/step_definitions/definitions.rb
|
66
|
-
- features/step_definitions/extra.rb
|
67
|
-
- lib/swiss_parser.rb
|
81
|
+
- features/extra.feature
|
82
|
+
- features/step_definitions/basic_steps.rb
|
83
|
+
- features/step_definitions/sugar_steps.rb
|
68
84
|
- lib/swissparser.rb
|
69
|
-
- lib/swissparser
|
70
|
-
- lib/swissparser/
|
85
|
+
- lib/swissparser.rbc
|
86
|
+
- lib/swissparser/entries.rb
|
87
|
+
- lib/swissparser/entries.rbc
|
88
|
+
- lib/swissparser/rules.rb
|
89
|
+
- lib/swissparser/rules.rbc
|
71
90
|
has_rdoc: true
|
72
91
|
homepage: http://github.com/paradigmatic/SwissParser
|
73
92
|
licenses: []
|
@@ -79,23 +98,27 @@ rdoc_options:
|
|
79
98
|
require_paths:
|
80
99
|
- lib
|
81
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
82
102
|
requirements:
|
83
103
|
- - ">="
|
84
104
|
- !ruby/object:Gem::Version
|
105
|
+
segments:
|
106
|
+
- 0
|
85
107
|
version: "0"
|
86
|
-
version:
|
87
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
88
110
|
requirements:
|
89
111
|
- - ">="
|
90
112
|
- !ruby/object:Gem::Version
|
113
|
+
segments:
|
114
|
+
- 0
|
91
115
|
version: "0"
|
92
|
-
version:
|
93
116
|
requirements: []
|
94
117
|
|
95
118
|
rubyforge_project: swissparser
|
96
|
-
rubygems_version: 1.3.
|
119
|
+
rubygems_version: 1.3.7
|
97
120
|
signing_key:
|
98
121
|
specification_version: 3
|
99
|
-
summary: Simple DSL to define parser for flat files formats common in biofinformatics
|
122
|
+
summary: Simple DSL to define parser for flat files formats common in biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
|
100
123
|
test_files: []
|
101
124
|
|
data/examples/parse_from_uri.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser.rb'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
Parser = Swiss::Parser.define do
|
39
|
-
|
40
|
-
# Each entry must be stored in a Protein instance
|
41
|
-
new_entry do
|
42
|
-
Protein.new
|
43
|
-
end
|
44
|
-
|
45
|
-
rules do
|
46
|
-
|
47
|
-
# Parse the uniprot id
|
48
|
-
with("ID") do |content,protein|
|
49
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
-
protein.id = $1
|
51
|
-
protein.size = $2.to_i
|
52
|
-
end
|
53
|
-
|
54
|
-
# Parse the organism
|
55
|
-
with("OS") do |content,protein|
|
56
|
-
content =~ /(\w+ \w+)/
|
57
|
-
protein.species = $1
|
58
|
-
end
|
59
|
-
|
60
|
-
# Parse the complete taxonomy
|
61
|
-
with("OC") do |content,protein|
|
62
|
-
ary = content.gsub(".","").split("; ")
|
63
|
-
protein.taxonomy += ary
|
64
|
-
end
|
65
|
-
|
66
|
-
# Parse the Sequence
|
67
|
-
with_text_after("SQ") do |content,protein|
|
68
|
-
seq = content.strip.gsub(" ","")
|
69
|
-
protein.sequence += seq
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if $0 == __FILE__
|
79
|
-
|
80
|
-
uri = ARGV.shift
|
81
|
-
|
82
|
-
entries = Uniprot::Parser.parse_URI( uri )
|
83
|
-
|
84
|
-
entries.each do |e|
|
85
|
-
puts e.to_yaml
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
data/examples/signal_demo.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
require 'swissparser.rb'
|
21
|
-
require 'yaml'
|
22
|
-
|
23
|
-
class Protein
|
24
|
-
attr_accessor :name, :sequence, :size
|
25
|
-
end
|
26
|
-
|
27
|
-
parser = Swiss::Parser.define do
|
28
|
-
|
29
|
-
new_entry do
|
30
|
-
Protein.new
|
31
|
-
end
|
32
|
-
|
33
|
-
rules do
|
34
|
-
|
35
|
-
set_separator '/'
|
36
|
-
|
37
|
-
with('N') do |content,entry|
|
38
|
-
entry.name = content
|
39
|
-
end
|
40
|
-
|
41
|
-
with('C') do |content,entry|
|
42
|
-
entry.size = content.to_i
|
43
|
-
end
|
44
|
-
|
45
|
-
with('S') do |content,entry|
|
46
|
-
entry.sequence = content
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
stat_parser = parser.extend do
|
55
|
-
|
56
|
-
before do
|
57
|
-
{ :min => 1_000, :max => 0, :sum => 0, :n => 0 }
|
58
|
-
end
|
59
|
-
|
60
|
-
finish_entry do |entry,h|
|
61
|
-
if entry.size < h[:min]
|
62
|
-
h[:min] = entry.size
|
63
|
-
end
|
64
|
-
if entry.size > h[:max]
|
65
|
-
h[:max] = entry.size
|
66
|
-
end
|
67
|
-
h[:sum] += entry.size
|
68
|
-
h[:n] += 1
|
69
|
-
end
|
70
|
-
|
71
|
-
after do |h|
|
72
|
-
h[:average] = h[:sum].to_f / h[:n]
|
73
|
-
h
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
if $0 == __FILE__
|
80
|
-
|
81
|
-
filename = ARGV.shift
|
82
|
-
|
83
|
-
entries = parser.parse_file( filename )
|
84
|
-
|
85
|
-
entries.each do |e|
|
86
|
-
puts e.to_yaml
|
87
|
-
end
|
88
|
-
|
89
|
-
puts
|
90
|
-
|
91
|
-
results = stat_parser.parse_file( filename )
|
92
|
-
|
93
|
-
puts "Min: #{results[:min]}"
|
94
|
-
puts "Max: #{results[:max]}"
|
95
|
-
puts "Average: #{results[:average]}"
|
96
|
-
puts "Size: #{results[:n]}"
|
97
|
-
|
98
|
-
end
|
99
|
-
|
100
|
-
|
data/examples/tutorial_1.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
Parser = Swiss::Parser.define do
|
39
|
-
|
40
|
-
# Each entry must be stored in a Protein instance
|
41
|
-
new_entry do
|
42
|
-
Protein.new
|
43
|
-
end
|
44
|
-
|
45
|
-
rules do
|
46
|
-
|
47
|
-
# Parse the uniprot id
|
48
|
-
with("ID") do |content,protein|
|
49
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
-
protein.id = $1
|
51
|
-
protein.size = $2.to_i
|
52
|
-
end
|
53
|
-
|
54
|
-
# Parse the organism
|
55
|
-
with("OS") do |content,protein|
|
56
|
-
content =~ /(\w+ \w+)/
|
57
|
-
protein.species = $1
|
58
|
-
end
|
59
|
-
|
60
|
-
# Parse the complete taxonomy
|
61
|
-
with("OC") do |content,protein|
|
62
|
-
ary = content.gsub(".","").split("; ")
|
63
|
-
protein.taxonomy += ary
|
64
|
-
end
|
65
|
-
|
66
|
-
# Parse the Sequence
|
67
|
-
with_text_after("SQ") do |content,protein|
|
68
|
-
seq = content.strip.gsub(" ","")
|
69
|
-
protein.sequence += seq
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if $0 == __FILE__
|
79
|
-
|
80
|
-
filename = ARGV.shift
|
81
|
-
|
82
|
-
entries = Uniprot::Parser.parse_file( filename )
|
83
|
-
|
84
|
-
entries.each do |e|
|
85
|
-
puts e.to_yaml
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
data/examples/tutorial_2.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'swissparser'
|
23
|
-
require 'examples/tutorial_1'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
SpeciesParser = Uniprot::Parser.extend do
|
39
|
-
|
40
|
-
before do
|
41
|
-
{}
|
42
|
-
end
|
43
|
-
|
44
|
-
finish_entry do |protein, container|
|
45
|
-
if container[protein.species].nil?
|
46
|
-
container[protein.species] = []
|
47
|
-
end
|
48
|
-
container[protein.species] << protein
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
if $0 == __FILE__
|
56
|
-
|
57
|
-
filename = ARGV.shift
|
58
|
-
|
59
|
-
result = Uniprot::SpeciesParser.parse_file( filename )
|
60
|
-
|
61
|
-
result.each do |species, ary|
|
62
|
-
puts "#{species} => #{ary.map{ |p| p.id }.join(', ')}"
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|