swissparser 0.11.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
metadata
CHANGED
@@ -1,7 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 1
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 1.0.0
|
5
10
|
platform: ruby
|
6
11
|
authors:
|
7
12
|
- paradigmatic
|
@@ -9,30 +14,46 @@ autorequire:
|
|
9
14
|
bindir: bin
|
10
15
|
cert_chain: []
|
11
16
|
|
12
|
-
date:
|
17
|
+
date: 2010-12-18 00:00:00 +01:00
|
13
18
|
default_executable:
|
14
19
|
dependencies:
|
15
20
|
- !ruby/object:Gem::Dependency
|
16
21
|
name: cucumber
|
17
|
-
|
18
|
-
|
19
|
-
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
20
25
|
requirements:
|
21
26
|
- - ">="
|
22
27
|
- !ruby/object:Gem::Version
|
28
|
+
segments:
|
29
|
+
- 0
|
30
|
+
- 4
|
23
31
|
version: "0.4"
|
24
|
-
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
25
34
|
- !ruby/object:Gem::Dependency
|
26
35
|
name: bones
|
27
|
-
|
28
|
-
|
29
|
-
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
30
39
|
requirements:
|
31
40
|
- - ">="
|
32
41
|
- !ruby/object:Gem::Version
|
33
|
-
|
34
|
-
|
35
|
-
|
42
|
+
segments:
|
43
|
+
- 3
|
44
|
+
- 5
|
45
|
+
- 4
|
46
|
+
version: 3.5.4
|
47
|
+
type: :development
|
48
|
+
version_requirements: *id002
|
49
|
+
description: |-
|
50
|
+
Simple DSL to define parser for flat files formats common in
|
51
|
+
biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
|
52
|
+
|
53
|
+
SwissParser API was changed in its version 1.0.0 to simplify parser
|
54
|
+
definition. The code was tested on entire Uniprot and KEGG releases
|
55
|
+
and functional testing guarantees that existing features will not
|
56
|
+
break after an update.
|
36
57
|
email: paradigmatic@streum.org
|
37
58
|
executables: []
|
38
59
|
|
@@ -41,33 +62,31 @@ extensions: []
|
|
41
62
|
extra_rdoc_files:
|
42
63
|
- CHANGELOG.rdoc
|
43
64
|
- README.rdoc
|
44
|
-
-
|
65
|
+
- lib/swissparser.rbc
|
66
|
+
- lib/swissparser/entries.rbc
|
67
|
+
- lib/swissparser/rules.rbc
|
45
68
|
files:
|
69
|
+
- .gitignore
|
46
70
|
- CHANGELOG.rdoc
|
47
71
|
- LICENSE
|
48
72
|
- README.rdoc
|
49
73
|
- Rakefile
|
50
|
-
-
|
74
|
+
- Rakefile.compiled.rbc
|
51
75
|
- examples/data/EColPositives_noTAT.bas
|
52
76
|
- examples/data/kegg_enzyme_short.txt
|
53
77
|
- examples/data/uniprot.txt
|
54
78
|
- examples/kegg_demo.rb
|
55
|
-
- examples/
|
56
|
-
- examples/signal_demo.rb
|
57
|
-
- examples/tutorial_1.rb
|
58
|
-
- examples/tutorial_2.rb
|
59
|
-
- examples/uniprot_param_demo.rb
|
79
|
+
- examples/uniprot.rb
|
60
80
|
- features/basic_parsing.feature
|
61
|
-
- features/
|
62
|
-
- features/
|
63
|
-
- features/
|
64
|
-
- features/step_definitions/core.rb
|
65
|
-
- features/step_definitions/definitions.rb
|
66
|
-
- features/step_definitions/extra.rb
|
67
|
-
- lib/swiss_parser.rb
|
81
|
+
- features/extra.feature
|
82
|
+
- features/step_definitions/basic_steps.rb
|
83
|
+
- features/step_definitions/sugar_steps.rb
|
68
84
|
- lib/swissparser.rb
|
69
|
-
- lib/swissparser
|
70
|
-
- lib/swissparser/
|
85
|
+
- lib/swissparser.rbc
|
86
|
+
- lib/swissparser/entries.rb
|
87
|
+
- lib/swissparser/entries.rbc
|
88
|
+
- lib/swissparser/rules.rb
|
89
|
+
- lib/swissparser/rules.rbc
|
71
90
|
has_rdoc: true
|
72
91
|
homepage: http://github.com/paradigmatic/SwissParser
|
73
92
|
licenses: []
|
@@ -79,23 +98,27 @@ rdoc_options:
|
|
79
98
|
require_paths:
|
80
99
|
- lib
|
81
100
|
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
82
102
|
requirements:
|
83
103
|
- - ">="
|
84
104
|
- !ruby/object:Gem::Version
|
105
|
+
segments:
|
106
|
+
- 0
|
85
107
|
version: "0"
|
86
|
-
version:
|
87
108
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
109
|
+
none: false
|
88
110
|
requirements:
|
89
111
|
- - ">="
|
90
112
|
- !ruby/object:Gem::Version
|
113
|
+
segments:
|
114
|
+
- 0
|
91
115
|
version: "0"
|
92
|
-
version:
|
93
116
|
requirements: []
|
94
117
|
|
95
118
|
rubyforge_project: swissparser
|
96
|
-
rubygems_version: 1.3.
|
119
|
+
rubygems_version: 1.3.7
|
97
120
|
signing_key:
|
98
121
|
specification_version: 3
|
99
|
-
summary: Simple DSL to define parser for flat files formats common in biofinformatics
|
122
|
+
summary: Simple DSL to define parser for flat files formats common in biofinformatics, such as Swissprot, Uniprot, KEGG, TREMBL, etc.
|
100
123
|
test_files: []
|
101
124
|
|
data/examples/parse_from_uri.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser.rb'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
Parser = Swiss::Parser.define do
|
39
|
-
|
40
|
-
# Each entry must be stored in a Protein instance
|
41
|
-
new_entry do
|
42
|
-
Protein.new
|
43
|
-
end
|
44
|
-
|
45
|
-
rules do
|
46
|
-
|
47
|
-
# Parse the uniprot id
|
48
|
-
with("ID") do |content,protein|
|
49
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
-
protein.id = $1
|
51
|
-
protein.size = $2.to_i
|
52
|
-
end
|
53
|
-
|
54
|
-
# Parse the organism
|
55
|
-
with("OS") do |content,protein|
|
56
|
-
content =~ /(\w+ \w+)/
|
57
|
-
protein.species = $1
|
58
|
-
end
|
59
|
-
|
60
|
-
# Parse the complete taxonomy
|
61
|
-
with("OC") do |content,protein|
|
62
|
-
ary = content.gsub(".","").split("; ")
|
63
|
-
protein.taxonomy += ary
|
64
|
-
end
|
65
|
-
|
66
|
-
# Parse the Sequence
|
67
|
-
with_text_after("SQ") do |content,protein|
|
68
|
-
seq = content.strip.gsub(" ","")
|
69
|
-
protein.sequence += seq
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if $0 == __FILE__
|
79
|
-
|
80
|
-
uri = ARGV.shift
|
81
|
-
|
82
|
-
entries = Uniprot::Parser.parse_URI( uri )
|
83
|
-
|
84
|
-
entries.each do |e|
|
85
|
-
puts e.to_yaml
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
data/examples/signal_demo.rb
DELETED
@@ -1,100 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
require 'swissparser.rb'
|
21
|
-
require 'yaml'
|
22
|
-
|
23
|
-
class Protein
|
24
|
-
attr_accessor :name, :sequence, :size
|
25
|
-
end
|
26
|
-
|
27
|
-
parser = Swiss::Parser.define do
|
28
|
-
|
29
|
-
new_entry do
|
30
|
-
Protein.new
|
31
|
-
end
|
32
|
-
|
33
|
-
rules do
|
34
|
-
|
35
|
-
set_separator '/'
|
36
|
-
|
37
|
-
with('N') do |content,entry|
|
38
|
-
entry.name = content
|
39
|
-
end
|
40
|
-
|
41
|
-
with('C') do |content,entry|
|
42
|
-
entry.size = content.to_i
|
43
|
-
end
|
44
|
-
|
45
|
-
with('S') do |content,entry|
|
46
|
-
entry.sequence = content
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
|
54
|
-
stat_parser = parser.extend do
|
55
|
-
|
56
|
-
before do
|
57
|
-
{ :min => 1_000, :max => 0, :sum => 0, :n => 0 }
|
58
|
-
end
|
59
|
-
|
60
|
-
finish_entry do |entry,h|
|
61
|
-
if entry.size < h[:min]
|
62
|
-
h[:min] = entry.size
|
63
|
-
end
|
64
|
-
if entry.size > h[:max]
|
65
|
-
h[:max] = entry.size
|
66
|
-
end
|
67
|
-
h[:sum] += entry.size
|
68
|
-
h[:n] += 1
|
69
|
-
end
|
70
|
-
|
71
|
-
after do |h|
|
72
|
-
h[:average] = h[:sum].to_f / h[:n]
|
73
|
-
h
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
|
79
|
-
if $0 == __FILE__
|
80
|
-
|
81
|
-
filename = ARGV.shift
|
82
|
-
|
83
|
-
entries = parser.parse_file( filename )
|
84
|
-
|
85
|
-
entries.each do |e|
|
86
|
-
puts e.to_yaml
|
87
|
-
end
|
88
|
-
|
89
|
-
puts
|
90
|
-
|
91
|
-
results = stat_parser.parse_file( filename )
|
92
|
-
|
93
|
-
puts "Min: #{results[:min]}"
|
94
|
-
puts "Max: #{results[:max]}"
|
95
|
-
puts "Average: #{results[:average]}"
|
96
|
-
puts "Size: #{results[:n]}"
|
97
|
-
|
98
|
-
end
|
99
|
-
|
100
|
-
|
data/examples/tutorial_1.rb
DELETED
@@ -1,88 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
Parser = Swiss::Parser.define do
|
39
|
-
|
40
|
-
# Each entry must be stored in a Protein instance
|
41
|
-
new_entry do
|
42
|
-
Protein.new
|
43
|
-
end
|
44
|
-
|
45
|
-
rules do
|
46
|
-
|
47
|
-
# Parse the uniprot id
|
48
|
-
with("ID") do |content,protein|
|
49
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
-
protein.id = $1
|
51
|
-
protein.size = $2.to_i
|
52
|
-
end
|
53
|
-
|
54
|
-
# Parse the organism
|
55
|
-
with("OS") do |content,protein|
|
56
|
-
content =~ /(\w+ \w+)/
|
57
|
-
protein.species = $1
|
58
|
-
end
|
59
|
-
|
60
|
-
# Parse the complete taxonomy
|
61
|
-
with("OC") do |content,protein|
|
62
|
-
ary = content.gsub(".","").split("; ")
|
63
|
-
protein.taxonomy += ary
|
64
|
-
end
|
65
|
-
|
66
|
-
# Parse the Sequence
|
67
|
-
with_text_after("SQ") do |content,protein|
|
68
|
-
seq = content.strip.gsub(" ","")
|
69
|
-
protein.sequence += seq
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
if $0 == __FILE__
|
79
|
-
|
80
|
-
filename = ARGV.shift
|
81
|
-
|
82
|
-
entries = Uniprot::Parser.parse_file( filename )
|
83
|
-
|
84
|
-
entries.each do |e|
|
85
|
-
puts e.to_yaml
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
data/examples/tutorial_2.rb
DELETED
@@ -1,65 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'swissparser'
|
23
|
-
require 'examples/tutorial_1'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
module Uniprot
|
37
|
-
|
38
|
-
SpeciesParser = Uniprot::Parser.extend do
|
39
|
-
|
40
|
-
before do
|
41
|
-
{}
|
42
|
-
end
|
43
|
-
|
44
|
-
finish_entry do |protein, container|
|
45
|
-
if container[protein.species].nil?
|
46
|
-
container[protein.species] = []
|
47
|
-
end
|
48
|
-
container[protein.species] << protein
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
54
|
-
|
55
|
-
if $0 == __FILE__
|
56
|
-
|
57
|
-
filename = ARGV.shift
|
58
|
-
|
59
|
-
result = Uniprot::SpeciesParser.parse_file( filename )
|
60
|
-
|
61
|
-
result.each do |species, ary|
|
62
|
-
puts "#{species} => #{ary.map{ |p| p.id }.join(', ')}"
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|