swissparser 0.11.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
data/examples/kegg_demo.rb
CHANGED
@@ -1,104 +1,80 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
1
|
require 'swissparser.rb'
|
21
2
|
require 'yaml'
|
22
|
-
|
3
|
+
|
23
4
|
class Enzyme
|
24
5
|
|
25
6
|
attr_accessor :id, :genes
|
26
7
|
|
27
8
|
end
|
28
9
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
10
|
+
module Kegg
|
11
|
+
|
12
|
+
Parser = Swiss::Rules.define do
|
13
|
+
|
14
|
+
helpers do
|
15
|
+
def parse_gene_ids(string)
|
16
|
+
string.split(" ").each do |item|
|
17
|
+
if item =~ /(\d+)\(\w+\)/
|
18
|
+
unless @genes
|
19
|
+
@genes = []
|
20
|
+
end
|
21
|
+
@genes << $1
|
22
|
+
end
|
23
|
+
end
|
42
24
|
end
|
43
25
|
end
|
44
|
-
end
|
45
|
-
|
46
|
-
rules do
|
47
26
|
|
48
27
|
human = "HSA"
|
49
28
|
|
50
29
|
set_separator( "///" )
|
51
30
|
|
52
|
-
with("ENTRY") do |content
|
31
|
+
with("ENTRY") do |content|
|
53
32
|
content =~ /((\d+|-)\.(\d+|-)\.(\d+|-)\.(\d+|-))/
|
54
|
-
|
33
|
+
@id = $1
|
55
34
|
end
|
56
35
|
|
57
|
-
with("GENES") do |content
|
58
|
-
content =~ /^([A-Z]+): (.*)/
|
36
|
+
with("GENES") do |content|
|
37
|
+
content =~ /^([A-Z]+): (.*)/
|
59
38
|
org,genes = $1,$2
|
60
|
-
|
39
|
+
@last_organism = org
|
61
40
|
if org == human
|
62
|
-
parse_gene_ids( genes
|
41
|
+
parse_gene_ids( genes )
|
63
42
|
end
|
64
43
|
end
|
65
44
|
|
66
|
-
with_text_after("GENES") do |content
|
45
|
+
with_text_after("GENES") do |content|
|
67
46
|
if content =~ /([A-Z]+): (.*)/
|
68
47
|
org,genes = $1,$2
|
69
|
-
|
48
|
+
@last_organism = org
|
70
49
|
if org == human
|
71
|
-
parse_gene_ids( genes
|
50
|
+
parse_gene_ids( genes )
|
72
51
|
end
|
73
|
-
elsif
|
74
|
-
parse_gene_ids( content
|
75
|
-
end
|
52
|
+
elsif @last_organism == human
|
53
|
+
parse_gene_ids( content )
|
54
|
+
end
|
76
55
|
end
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
finish_entry do |entry,container|
|
81
|
-
if entry[:genes].size > 0
|
56
|
+
end.make_parser do |entries|
|
57
|
+
results = []
|
58
|
+
entries.each do |entry|
|
82
59
|
e = Enzyme.new
|
83
|
-
e.id = entry
|
84
|
-
e.genes = entry
|
85
|
-
|
60
|
+
e.id = entry.id
|
61
|
+
e.genes = entry.genes
|
62
|
+
results << e
|
86
63
|
end
|
64
|
+
results
|
87
65
|
end
|
88
|
-
|
89
66
|
end
|
90
67
|
|
91
|
-
|
68
|
+
|
92
69
|
if $0 == __FILE__
|
93
|
-
|
70
|
+
|
94
71
|
filename = ARGV.shift
|
95
|
-
|
96
|
-
enzymes =
|
72
|
+
|
73
|
+
enzymes = Kegg::Parser.parse_file( filename )
|
97
74
|
|
98
75
|
enzymes.each do |e|
|
99
76
|
puts e.to_yaml
|
100
77
|
end
|
101
|
-
|
78
|
+
|
102
79
|
end
|
103
80
|
|
104
|
-
|
data/examples/uniprot.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
#!/usr/bin/ruby -w
|
2
|
+
|
3
|
+
require 'yaml'
|
4
|
+
require 'swissparser'
|
5
|
+
|
6
|
+
class Protein
|
7
|
+
|
8
|
+
attr_accessor :swiss_id, :size, :species, :taxonomy, :sequence
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
@taxonomy = []
|
12
|
+
@sequence = ""
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
module Uniprot
|
18
|
+
|
19
|
+
Rules = Swiss::Rules.define do
|
20
|
+
|
21
|
+
# Parse the uniprot id
|
22
|
+
with("ID") do |content|
|
23
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
24
|
+
@swiss_id = $1
|
25
|
+
@size = $2.to_i
|
26
|
+
end
|
27
|
+
|
28
|
+
# Parse the organism
|
29
|
+
with("OS") do |content|
|
30
|
+
content =~ /(\w+ \w+)/
|
31
|
+
@species = $1
|
32
|
+
end
|
33
|
+
|
34
|
+
# Parse the complete taxonomy
|
35
|
+
with("OC") do |content|
|
36
|
+
ary = content.gsub(".","").split("; ")
|
37
|
+
if @taxonomy.nil?
|
38
|
+
@taxonomy = []
|
39
|
+
end
|
40
|
+
@taxonomy += ary
|
41
|
+
end
|
42
|
+
|
43
|
+
# Parse the Sequence
|
44
|
+
with_text_after("SQ") do |content|
|
45
|
+
seq = content.strip.gsub(" ","")
|
46
|
+
if @seq.nil?
|
47
|
+
@seq = ""
|
48
|
+
end
|
49
|
+
@seq += seq
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
#With the rules defined above, creates a parser
|
55
|
+
# which returns an array of Protein instances.
|
56
|
+
Parser = Rules.make_parser do |entries|
|
57
|
+
results = []
|
58
|
+
entries.each do |e|
|
59
|
+
p = Protein.new
|
60
|
+
p.swiss_id = e.swiss_id
|
61
|
+
p.species = e.species
|
62
|
+
p.taxonomy = e.taxonomy
|
63
|
+
p.sequence = e.seq
|
64
|
+
p.size = e.size
|
65
|
+
results << p
|
66
|
+
end
|
67
|
+
results
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
if $0 == __FILE__
|
74
|
+
|
75
|
+
puts Swiss::VERSION
|
76
|
+
|
77
|
+
filename = ARGV.shift
|
78
|
+
|
79
|
+
proteins = Uniprot::Parser.parse_file( filename )
|
80
|
+
|
81
|
+
proteins.each do |e|
|
82
|
+
puts e.to_yaml
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
@@ -1,30 +1,79 @@
|
|
1
|
-
Feature:
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
1
|
+
Feature:
|
2
|
+
I want to parse a flat-file on my disk.
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given sample data:
|
6
|
+
"""
|
7
|
+
AA x1
|
8
|
+
BB y1
|
9
|
+
CC z1
|
10
|
+
abcd
|
11
|
+
//
|
12
|
+
AA x2
|
13
|
+
BB y2
|
14
|
+
CC z2
|
15
|
+
efgh
|
16
|
+
//
|
17
|
+
AA x3
|
18
|
+
BB y3
|
19
|
+
CC z3
|
20
|
+
ijkl
|
21
|
+
//
|
22
|
+
"""
|
23
|
+
|
24
|
+
Scenario: By default the separator is "//"
|
25
|
+
Given the default rules
|
26
|
+
And I define a parser which counts entry
|
27
|
+
And I run the parser on sample data
|
28
|
+
Then the result is "3"
|
29
|
+
|
30
|
+
Scenario: I can change the separator
|
31
|
+
Given the default rules
|
32
|
+
And I set the separator to "%%"
|
33
|
+
And I define a parser which counts entry
|
34
|
+
And sample data:
|
35
|
+
"""
|
36
|
+
//
|
37
|
+
jdjdj
|
38
|
+
//
|
39
|
+
%%
|
40
|
+
//
|
41
|
+
jjdhhd
|
42
|
+
//
|
43
|
+
%%
|
44
|
+
"""
|
45
|
+
And I run the parser on sample data
|
46
|
+
Then the result is "2"
|
47
|
+
|
48
|
+
Scenario: I can define a simple 'with' rule
|
49
|
+
Given the default rules
|
50
|
+
And I define a simple rule to extract "BB"
|
51
|
+
And I define a simple parser which returns an array
|
52
|
+
And I run the parser on sample data
|
53
|
+
Then the result evals to "%w{ y1 y2 y3}"
|
54
|
+
|
55
|
+
Scenario: I can define a simple 'with_text_after' rule
|
56
|
+
Given the default rules
|
57
|
+
And I define a simple rule to extract text after "CC"
|
58
|
+
And I define a simple parser which returns an array
|
59
|
+
And I run the parser on sample data
|
60
|
+
Then the result evals to "%w{ abcd efgh ijkl }"
|
61
|
+
|
62
|
+
|
63
|
+
Scenario: I can define several rules
|
64
|
+
Given the default rules
|
65
|
+
And I define a simple rule to add "BB" to an array
|
66
|
+
And I define a simple rule to add "CC" to an array
|
67
|
+
And I define a simple parser which returns an array
|
68
|
+
And I run the parser on sample data
|
69
|
+
Then the result evals to "[ %w{y1 z1}, %w{y2 z2}, %w{y3 z3}]"
|
70
|
+
|
71
|
+
Scenario: I can redefine rules
|
72
|
+
Given the default rules
|
73
|
+
And I define a simple rule to extract "CC"
|
74
|
+
And I define a simple rule to return "foo" with "CC"
|
75
|
+
And I define a simple parser which returns an array
|
76
|
+
And I run the parser on sample data
|
77
|
+
Then the result evals to "%w{foo foo foo}"
|
78
|
+
|
79
|
+
|
@@ -0,0 +1,52 @@
|
|
1
|
+
Feature:
|
2
|
+
SwissParsers comes with user friendly features.
|
3
|
+
|
4
|
+
Background:
|
5
|
+
Given sample data:
|
6
|
+
"""
|
7
|
+
AA x1
|
8
|
+
BB y1
|
9
|
+
CC z1
|
10
|
+
abcd
|
11
|
+
//
|
12
|
+
AA x2
|
13
|
+
BB y2
|
14
|
+
CC z2
|
15
|
+
efgh
|
16
|
+
//
|
17
|
+
AA x3
|
18
|
+
BB y3
|
19
|
+
CC z3
|
20
|
+
ijkl
|
21
|
+
//
|
22
|
+
"""
|
23
|
+
|
24
|
+
Scenario: Parsing options
|
25
|
+
Given the default rules
|
26
|
+
And I define a simple rule to return option "foo" with "BB"
|
27
|
+
And I define a simple parser which returns an array
|
28
|
+
And I set option "foo" = "bar"
|
29
|
+
And I run the parser on sample data
|
30
|
+
Then the result evals to "%w{ bar bar bar}"
|
31
|
+
|
32
|
+
@skip
|
33
|
+
Scenario: Parsing from file
|
34
|
+
Given the default rules
|
35
|
+
And I define a simple parser which returns an array
|
36
|
+
When I run the parser on file "input.txt"
|
37
|
+
Then File.open should be called with "input.txt"
|
38
|
+
|
39
|
+
@skip
|
40
|
+
Scenario: Parsing from URI
|
41
|
+
Given the default rules
|
42
|
+
And I define a simple parser which returns an array
|
43
|
+
When I run it on remote file "http://www.example.com/input.txt"
|
44
|
+
Then OpenUri.open should be called with "http://www.example.com/input.txt"
|
45
|
+
|
46
|
+
Scenario: Helper Methods
|
47
|
+
Given the default rules
|
48
|
+
And I define a simple rule to return "bar" via helper with "BB"
|
49
|
+
And I define a simple parser which returns an array
|
50
|
+
And I run the parser on sample data
|
51
|
+
Then the result evals to "%w{ bar bar bar}"
|
52
|
+
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require 'swissparser'
|
2
|
+
require 'rspec'
|
3
|
+
|
4
|
+
Given /^sample data:$/ do |string|
|
5
|
+
@data = string
|
6
|
+
end
|
7
|
+
|
8
|
+
Given /^the default rules$/ do
|
9
|
+
@rules = Swiss::DefaultRules
|
10
|
+
end
|
11
|
+
|
12
|
+
Given /^I set the separator to "([^\"]*)"$/ do |sep|
|
13
|
+
@rules = @rules.refine do
|
14
|
+
set_separator( sep )
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
Given /^I define a simple rule to extract "([^\"]*)"$/ do |key|
|
19
|
+
@rules = @rules.refine do
|
20
|
+
with( key ) do |content|
|
21
|
+
@text = content
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
Given /^I define a simple rule to extract text after "([^\"]*)"$/ do |key|
|
27
|
+
@rules = @rules.refine do
|
28
|
+
with_text_after( key ) do |content|
|
29
|
+
@text = "" if @text.nil?
|
30
|
+
@text << content
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
Given /^I define a simple rule to add "([^\"]*)" to an array$/ do |key|
|
36
|
+
@rules = @rules.refine do
|
37
|
+
with( key ) do |content|
|
38
|
+
@text = [] if @text.nil?
|
39
|
+
@text << content
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
Given /^I define a simple rule to return "([^\"]*)" with "([^\"]*)"$/ do |val, key|
|
45
|
+
@rules = @rules.refine do
|
46
|
+
with( key ) do |content|
|
47
|
+
@text = val
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
Given /^I define a parser which counts entry$/ do
|
53
|
+
@parser = Swiss::Parser.new(@rules) do |entries|
|
54
|
+
entries.size
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
Given /^I define a simple parser which returns an array$/ do
|
59
|
+
@parser = Swiss::Parser.new(@rules) do |entries|
|
60
|
+
result = []
|
61
|
+
entries.each do |entry|
|
62
|
+
result << entry.text
|
63
|
+
end
|
64
|
+
result
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
Given /^I run the parser on sample data$/ do
|
69
|
+
@result = if @opt.nil?
|
70
|
+
@parser.parse( @data )
|
71
|
+
else
|
72
|
+
@parser.parse( @data, @opt )
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
|
77
|
+
Then /^the result evals to "([^\"]*)"$/ do |expected|
|
78
|
+
obj = eval( expected )
|
79
|
+
@result.should == obj
|
80
|
+
end
|
81
|
+
|
82
|
+
Then /^the result is "([^\"]*)"$/ do |expected|
|
83
|
+
@result.to_s.should == expected
|
84
|
+
end
|