swissparser 0.11.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
data/examples/kegg_demo.rb
CHANGED
@@ -1,104 +1,80 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
1
|
require 'swissparser.rb'
|
21
2
|
require 'yaml'
|
22
|
-
|
3
|
+
|
23
4
|
class Enzyme
|
24
5
|
|
25
6
|
attr_accessor :id, :genes
|
26
7
|
|
27
8
|
end
|
28
9
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
10
|
+
module Kegg
|
11
|
+
|
12
|
+
Parser = Swiss::Rules.define do
|
13
|
+
|
14
|
+
helpers do
|
15
|
+
def parse_gene_ids(string)
|
16
|
+
string.split(" ").each do |item|
|
17
|
+
if item =~ /(\d+)\(\w+\)/
|
18
|
+
unless @genes
|
19
|
+
@genes = []
|
20
|
+
end
|
21
|
+
@genes << $1
|
22
|
+
end
|
23
|
+
end
|
42
24
|
end
|
43
25
|
end
|
44
|
-
end
|
45
|
-
|
46
|
-
rules do
|
47
26
|
|
48
27
|
human = "HSA"
|
49
28
|
|
50
29
|
set_separator( "///" )
|
51
30
|
|
52
|
-
with("ENTRY") do |content
|
31
|
+
with("ENTRY") do |content|
|
53
32
|
content =~ /((\d+|-)\.(\d+|-)\.(\d+|-)\.(\d+|-))/
|
54
|
-
|
33
|
+
@id = $1
|
55
34
|
end
|
56
35
|
|
57
|
-
with("GENES") do |content
|
58
|
-
content =~ /^([A-Z]+): (.*)/
|
36
|
+
with("GENES") do |content|
|
37
|
+
content =~ /^([A-Z]+): (.*)/
|
59
38
|
org,genes = $1,$2
|
60
|
-
|
39
|
+
@last_organism = org
|
61
40
|
if org == human
|
62
|
-
parse_gene_ids( genes
|
41
|
+
parse_gene_ids( genes )
|
63
42
|
end
|
64
43
|
end
|
65
44
|
|
66
|
-
with_text_after("GENES") do |content
|
45
|
+
with_text_after("GENES") do |content|
|
67
46
|
if content =~ /([A-Z]+): (.*)/
|
68
47
|
org,genes = $1,$2
|
69
|
-
|
48
|
+
@last_organism = org
|
70
49
|
if org == human
|
71
|
-
parse_gene_ids( genes
|
50
|
+
parse_gene_ids( genes )
|
72
51
|
end
|
73
|
-
elsif
|
74
|
-
parse_gene_ids( content
|
75
|
-
end
|
52
|
+
elsif @last_organism == human
|
53
|
+
parse_gene_ids( content )
|
54
|
+
end
|
76
55
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
finish_entry do |entry,container|
|
81
|
-
if entry[:genes].size > 0
|
56
|
+
end.make_parser do |entries|
|
57
|
+
results = []
|
58
|
+
entries.each do |entry|
|
82
59
|
e = Enzyme.new
|
83
|
-
e.id = entry
|
84
|
-
e.genes = entry
|
85
|
-
|
60
|
+
e.id = entry.id
|
61
|
+
e.genes = entry.genes
|
62
|
+
results << e
|
86
63
|
end
|
64
|
+
results
|
87
65
|
end
|
88
|
-
|
89
66
|
end
|
90
67
|
|
91
|
-
|
68
|
+
|
92
69
|
if $0 == __FILE__
|
93
|
-
|
70
|
+
|
94
71
|
filename = ARGV.shift
|
95
|
-
|
96
|
-
enzymes =
|
72
|
+
|
73
|
+
enzymes = Kegg::Parser.parse_file( filename )
|
97
74
|
|
98
75
|
enzymes.each do |e|
|
99
76
|
puts e.to_yaml
|
100
77
|
end
|
101
|
-
|
78
|
+
|
102
79
|
end
|
103
80
|
|
104
|
-
|
data/examples/uniprot.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'swissparser'
|
5
|
+
|
6
|
+
class Protein
|
7
|
+
|
8
|
+
attr_accessor :swiss_id, :size, :species, :taxonomy, :sequence
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@taxonomy = []
|
12
|
+
@sequence = ""
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
module Uniprot
|
18
|
+
|
19
|
+
Rules = Swiss::Rules.define do
|
20
|
+
|
21
|
+
# Parse the uniprot id
|
22
|
+
with("ID") do |content|
|
23
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
24
|
+
@swiss_id = $1
|
25
|
+
@size = $2.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
# Parse the organism
|
29
|
+
with("OS") do |content|
|
30
|
+
content =~ /(\w+ \w+)/
|
31
|
+
@species = $1
|
32
|
+
end
|
33
|
+
|
34
|
+
# Parse the complete taxonomy
|
35
|
+
with("OC") do |content|
|
36
|
+
ary = content.gsub(".","").split("; ")
|
37
|
+
if @taxonomy.nil?
|
38
|
+
@taxonomy = []
|
39
|
+
end
|
40
|
+
@taxonomy += ary
|
41
|
+
end
|
42
|
+
|
43
|
+
# Parse the Sequence
|
44
|
+
with_text_after("SQ") do |content|
|
45
|
+
seq = content.strip.gsub(" ","")
|
46
|
+
if @seq.nil?
|
47
|
+
@seq = ""
|
48
|
+
end
|
49
|
+
@seq += seq
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
#With the rules defined above, creates a parser
|
55
|
+
# which returns an array of Protein instances.
|
56
|
+
Parser = Rules.make_parser do |entries|
|
57
|
+
results = []
|
58
|
+
entries.each do |e|
|
59
|
+
p = Protein.new
|
60
|
+
p.swiss_id = e.swiss_id
|
61
|
+
p.species = e.species
|
62
|
+
p.taxonomy = e.taxonomy
|
63
|
+
p.sequence = e.seq
|
64
|
+
p.size = e.size
|
65
|
+
results << p
|
66
|
+
end
|
67
|
+
results
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
if $0 == __FILE__
|
74
|
+
|
75
|
+
puts Swiss::VERSION
|
76
|
+
|
77
|
+
filename = ARGV.shift
|
78
|
+
|
79
|
+
proteins = Uniprot::Parser.parse_file( filename )
|
80
|
+
|
81
|
+
proteins.each do |e|
|
82
|
+
puts e.to_yaml
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -1,30 +1,79 @@
|
|
1
|
-
Feature:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
1
|
+
Feature:
|
2
|
+
I want to parse a flat-file on my disk.
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given sample data:
|
6
|
+
"""
|
7
|
+
AA x1
|
8
|
+
BB y1
|
9
|
+
CC z1
|
10
|
+
abcd
|
11
|
+
//
|
12
|
+
AA x2
|
13
|
+
BB y2
|
14
|
+
CC z2
|
15
|
+
efgh
|
16
|
+
//
|
17
|
+
AA x3
|
18
|
+
BB y3
|
19
|
+
CC z3
|
20
|
+
ijkl
|
21
|
+
//
|
22
|
+
"""
|
23
|
+
|
24
|
+
Scenario: By default the separator is "//"
|
25
|
+
Given the default rules
|
26
|
+
And I define a parser which counts entry
|
27
|
+
And I run the parser on sample data
|
28
|
+
Then the result is "3"
|
29
|
+
|
30
|
+
Scenario: I can change the separator
|
31
|
+
Given the default rules
|
32
|
+
And I set the separator to "%%"
|
33
|
+
And I define a parser which counts entry
|
34
|
+
And sample data:
|
35
|
+
"""
|
36
|
+
//
|
37
|
+
jdjdj
|
38
|
+
//
|
39
|
+
%%
|
40
|
+
//
|
41
|
+
jjdhhd
|
42
|
+
//
|
43
|
+
%%
|
44
|
+
"""
|
45
|
+
And I run the parser on sample data
|
46
|
+
Then the result is "2"
|
47
|
+
|
48
|
+
Scenario: I can define a simple 'with' rule
|
49
|
+
Given the default rules
|
50
|
+
And I define a simple rule to extract "BB"
|
51
|
+
And I define a simple parser which returns an array
|
52
|
+
And I run the parser on sample data
|
53
|
+
Then the result evals to "%w{ y1 y2 y3}"
|
54
|
+
|
55
|
+
Scenario: I can define a simple 'with_text_after' rule
|
56
|
+
Given the default rules
|
57
|
+
And I define a simple rule to extract text after "CC"
|
58
|
+
And I define a simple parser which returns an array
|
59
|
+
And I run the parser on sample data
|
60
|
+
Then the result evals to "%w{ abcd efgh ijkl }"
|
61
|
+
|
62
|
+
|
63
|
+
Scenario: I can define several rules
|
64
|
+
Given the default rules
|
65
|
+
And I define a simple rule to add "BB" to an array
|
66
|
+
And I define a simple rule to add "CC" to an array
|
67
|
+
And I define a simple parser which returns an array
|
68
|
+
And I run the parser on sample data
|
69
|
+
Then the result evals to "[ %w{y1 z1}, %w{y2 z2}, %w{y3 z3}]"
|
70
|
+
|
71
|
+
Scenario: I can redefine rules
|
72
|
+
Given the default rules
|
73
|
+
And I define a simple rule to extract "CC"
|
74
|
+
And I define a simple rule to return "foo" with "CC"
|
75
|
+
And I define a simple parser which returns an array
|
76
|
+
And I run the parser on sample data
|
77
|
+
Then the result evals to "%w{foo foo foo}"
|
78
|
+
|
79
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
Feature:
|
2
|
+
SwissParsers comes with user friendly features.
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given sample data:
|
6
|
+
"""
|
7
|
+
AA x1
|
8
|
+
BB y1
|
9
|
+
CC z1
|
10
|
+
abcd
|
11
|
+
//
|
12
|
+
AA x2
|
13
|
+
BB y2
|
14
|
+
CC z2
|
15
|
+
efgh
|
16
|
+
//
|
17
|
+
AA x3
|
18
|
+
BB y3
|
19
|
+
CC z3
|
20
|
+
ijkl
|
21
|
+
//
|
22
|
+
"""
|
23
|
+
|
24
|
+
Scenario: Parsing options
|
25
|
+
Given the default rules
|
26
|
+
And I define a simple rule to return option "foo" with "BB"
|
27
|
+
And I define a simple parser which returns an array
|
28
|
+
And I set option "foo" = "bar"
|
29
|
+
And I run the parser on sample data
|
30
|
+
Then the result evals to "%w{ bar bar bar}"
|
31
|
+
|
32
|
+
@skip
|
33
|
+
Scenario: Parsing from file
|
34
|
+
Given the default rules
|
35
|
+
And I define a simple parser which returns an array
|
36
|
+
When I run the parser on file "input.txt"
|
37
|
+
Then File.open should be called with "input.txt"
|
38
|
+
|
39
|
+
@skip
|
40
|
+
Scenario: Parsing from URI
|
41
|
+
Given the default rules
|
42
|
+
And I define a simple parser which returns an array
|
43
|
+
When I run it on remote file "http://www.example.com/input.txt"
|
44
|
+
Then OpenUri.open should be called with "http://www.example.com/input.txt"
|
45
|
+
|
46
|
+
Scenario: Helper Methods
|
47
|
+
Given the default rules
|
48
|
+
And I define a simple rule to return "bar" via helper with "BB"
|
49
|
+
And I define a simple parser which returns an array
|
50
|
+
And I run the parser on sample data
|
51
|
+
Then the result evals to "%w{ bar bar bar}"
|
52
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'swissparser'
|
2
|
+
require 'rspec'
|
3
|
+
|
4
|
+
Given /^sample data:$/ do |string|
|
5
|
+
@data = string
|
6
|
+
end
|
7
|
+
|
8
|
+
Given /^the default rules$/ do
|
9
|
+
@rules = Swiss::DefaultRules
|
10
|
+
end
|
11
|
+
|
12
|
+
Given /^I set the separator to "([^\"]*)"$/ do |sep|
|
13
|
+
@rules = @rules.refine do
|
14
|
+
set_separator( sep )
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Given /^I define a simple rule to extract "([^\"]*)"$/ do |key|
|
19
|
+
@rules = @rules.refine do
|
20
|
+
with( key ) do |content|
|
21
|
+
@text = content
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Given /^I define a simple rule to extract text after "([^\"]*)"$/ do |key|
|
27
|
+
@rules = @rules.refine do
|
28
|
+
with_text_after( key ) do |content|
|
29
|
+
@text = "" if @text.nil?
|
30
|
+
@text << content
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
Given /^I define a simple rule to add "([^\"]*)" to an array$/ do |key|
|
36
|
+
@rules = @rules.refine do
|
37
|
+
with( key ) do |content|
|
38
|
+
@text = [] if @text.nil?
|
39
|
+
@text << content
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
Given /^I define a simple rule to return "([^\"]*)" with "([^\"]*)"$/ do |val, key|
|
45
|
+
@rules = @rules.refine do
|
46
|
+
with( key ) do |content|
|
47
|
+
@text = val
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
Given /^I define a parser which counts entry$/ do
|
53
|
+
@parser = Swiss::Parser.new(@rules) do |entries|
|
54
|
+
entries.size
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
Given /^I define a simple parser which returns an array$/ do
|
59
|
+
@parser = Swiss::Parser.new(@rules) do |entries|
|
60
|
+
result = []
|
61
|
+
entries.each do |entry|
|
62
|
+
result << entry.text
|
63
|
+
end
|
64
|
+
result
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
Given /^I run the parser on sample data$/ do
|
69
|
+
@result = if @opt.nil?
|
70
|
+
@parser.parse( @data )
|
71
|
+
else
|
72
|
+
@parser.parse( @data, @opt )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
Then /^the result evals to "([^\"]*)"$/ do |expected|
|
78
|
+
obj = eval( expected )
|
79
|
+
@result.should == obj
|
80
|
+
end
|
81
|
+
|
82
|
+
Then /^the result is "([^\"]*)"$/ do |expected|
|
83
|
+
@result.to_s.should == expected
|
84
|
+
end
|