swissparser 0.11.1 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
@@ -1,85 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser.rb'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
uniprot_parser = Swiss::Parser.define do
|
38
|
-
|
39
|
-
new_entry do
|
40
|
-
puts param(:msg)
|
41
|
-
Protein.new
|
42
|
-
end
|
43
|
-
|
44
|
-
rules do
|
45
|
-
|
46
|
-
with("ID") do |content,protein|
|
47
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
48
|
-
protein.id = $1
|
49
|
-
protein.size = $2.to_i
|
50
|
-
end
|
51
|
-
|
52
|
-
with("OS") do |content,protein|
|
53
|
-
content =~ /(\w+ \w+)/
|
54
|
-
protein.species = $1
|
55
|
-
end
|
56
|
-
|
57
|
-
with("OC") do |content,protein|
|
58
|
-
ary = content.gsub(".","").split("; ")
|
59
|
-
protein.taxonomy += ary
|
60
|
-
end
|
61
|
-
|
62
|
-
with_text_after("SQ") do |content,protein|
|
63
|
-
puts param(:found_seq)
|
64
|
-
seq = content.strip.gsub(" ","")
|
65
|
-
protein.sequence += seq
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
if $0 == __FILE__
|
74
|
-
|
75
|
-
filename = ARGV.shift
|
76
|
-
|
77
|
-
entries = uniprot_parser.parse_file( filename, :msg => "Hello", :found_seq => "Youpie" )
|
78
|
-
|
79
|
-
puts entries.size
|
80
|
-
|
81
|
-
entries.each do |e|
|
82
|
-
puts e.to_yaml
|
83
|
-
end
|
84
|
-
|
85
|
-
end
|
@@ -1,83 +0,0 @@
|
|
1
|
-
Feature: Parser Extension
|
2
|
-
I can extend existing parser
|
3
|
-
And replace existing rules
|
4
|
-
|
5
|
-
Background:
|
6
|
-
Given input data
|
7
|
-
"""
|
8
|
-
XX a1
|
9
|
-
YY b1
|
10
|
-
c1
|
11
|
-
//
|
12
|
-
XX a1
|
13
|
-
YY b2
|
14
|
-
c2
|
15
|
-
//
|
16
|
-
"""
|
17
|
-
|
18
|
-
Scenario: Extension without redefinition
|
19
|
-
Given a simple parser
|
20
|
-
When I extend it
|
21
|
-
Then the extended parser should parse it as the original one
|
22
|
-
|
23
|
-
Scenario: With replacing separator
|
24
|
-
Given a simple parser
|
25
|
-
When I extend it
|
26
|
-
And I replace with("XX") to return always 'foo'
|
27
|
-
And I replace with("YY") to do nothing
|
28
|
-
And I run the extended parser on data
|
29
|
-
Then the result should be "[{ 'XX' => 'foo'}, { 'XX' => 'foo'}]"
|
30
|
-
|
31
|
-
Scenario: Text after replacing
|
32
|
-
Given a simple parser
|
33
|
-
When I extend it
|
34
|
-
And I replace with("XX") to do nothing
|
35
|
-
And I replace with("YY") to return always 'bar'
|
36
|
-
And I replace with_text_after("YY") to return always 'foo'
|
37
|
-
And I run the extended parser on data
|
38
|
-
Then the result should be "[{ 'YY' => 'bar', 'txt-YY' => 'foo'}, { 'YY' => 'bar', 'txt-YY' => 'foo'}]"
|
39
|
-
|
40
|
-
Scenario: Separator replacement
|
41
|
-
Given a simple parser
|
42
|
-
And input data
|
43
|
-
"""
|
44
|
-
XX a1
|
45
|
-
YY b1
|
46
|
-
c1
|
47
|
-
%
|
48
|
-
XX a1
|
49
|
-
YY b2
|
50
|
-
c2
|
51
|
-
%
|
52
|
-
"""
|
53
|
-
When I extend it
|
54
|
-
And I set the separator to '%'
|
55
|
-
And I run the extended parser on data
|
56
|
-
Then the result should contain '2' entries
|
57
|
-
|
58
|
-
Scenario: Using custom entries objects
|
59
|
-
Given a simple parser
|
60
|
-
When I extend it
|
61
|
-
And I replace with("XX") to do nothing
|
62
|
-
And I replace with("YY") to do nothing
|
63
|
-
And I return "foo" in new entry
|
64
|
-
And I run the extended parser on data
|
65
|
-
Then the result should be "['foo','foo']"
|
66
|
-
|
67
|
-
Scenario: Changing the container
|
68
|
-
Given a simple parser
|
69
|
-
When I extend it
|
70
|
-
And I replace the container with a counter
|
71
|
-
And I run the extended parser on data
|
72
|
-
Then the result should be "2"
|
73
|
-
|
74
|
-
Scenario: Changing the entry finalization
|
75
|
-
Given a simple parser
|
76
|
-
When I extend it
|
77
|
-
And entry finalize always returns "foo"
|
78
|
-
And I run the extended parser on data
|
79
|
-
Then the result should be "['foo', 'foo']"
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
@@ -1,48 +0,0 @@
|
|
1
|
-
Feature: Sharing context
|
2
|
-
During parsing, rules share a context object.
|
3
|
-
|
4
|
-
Background:
|
5
|
-
Given input data
|
6
|
-
"""
|
7
|
-
XX a1
|
8
|
-
YY b1
|
9
|
-
c1
|
10
|
-
//
|
11
|
-
XX a2
|
12
|
-
YY b2
|
13
|
-
c2
|
14
|
-
//
|
15
|
-
"""
|
16
|
-
|
17
|
-
Scenario: Helper Method
|
18
|
-
Given a simple parser
|
19
|
-
When I extend it
|
20
|
-
And I define 'foo' helper
|
21
|
-
And I call 'foo' helper in after action
|
22
|
-
And I run the extended parser on data
|
23
|
-
Then the result should be "'foo'"
|
24
|
-
|
25
|
-
Scenario: Parsing Parameters
|
26
|
-
Given a simple parser
|
27
|
-
When I extend it
|
28
|
-
And I return param "foo" in after action
|
29
|
-
And I run the extended parser on data with param "foo" = "bar"
|
30
|
-
Then the result should be "'bar'"
|
31
|
-
|
32
|
-
Scenario: Instance variables
|
33
|
-
Given a simple parser
|
34
|
-
When I extend it
|
35
|
-
And the before action sets @foo="bar"
|
36
|
-
And the after action returns @foo
|
37
|
-
And I run the extended parser on data
|
38
|
-
Then the result should be "'bar'"
|
39
|
-
|
40
|
-
Scenario: Skipping entries
|
41
|
-
Given a simple parser
|
42
|
-
When I extend it
|
43
|
-
And I set it to skip entries with("XX") containing "a1"
|
44
|
-
And I run the extended parser on data
|
45
|
-
Then the result should contain '1' entries
|
46
|
-
|
47
|
-
|
48
|
-
|
data/features/polite.feature
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
Feature: Politeness
|
2
|
-
SwissParser is polite and reporst errors
|
3
|
-
|
4
|
-
Scenario: Missing
|
5
|
-
Given input data
|
6
|
-
"""
|
7
|
-
XX a1
|
8
|
-
YY b1
|
9
|
-
c1
|
10
|
-
//
|
11
|
-
XX a2
|
12
|
-
YY b2
|
13
|
-
c2
|
14
|
-
"""
|
15
|
-
And a simple parser
|
16
|
-
Then the simple parser should raise an error when parsing data
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
require 'spec/mocks'
|
4
|
-
|
5
|
-
|
6
|
-
Given /^a simple parser$/ do
|
7
|
-
@simple_parser = Swiss::Parser.define do
|
8
|
-
rules do
|
9
|
-
with("XX") {|c,e| e["XX"] = c}
|
10
|
-
with("YY") {|c,e| e["YY"] = c}
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
Given /^input data$/ do |string|
|
16
|
-
@data = string
|
17
|
-
end
|
18
|
-
|
19
|
-
When /^I extend it$/ do
|
20
|
-
@ext_parser = @simple_parser.extend {}
|
21
|
-
end
|
22
|
-
|
23
|
-
When /^I run the simple parser on data$/ do
|
24
|
-
@result = @simple_parser.parse(@data)
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
When /^I run the extended parser on data$/ do
|
29
|
-
@result = @ext_parser.parse(@data)
|
30
|
-
end
|
31
|
-
|
32
|
-
When /^I run the extended parser on data with param "([^\"]*)" = "([^\"]*)"$/ do |key, val|
|
33
|
-
@result = @ext_parser.parse(@data, key => val)
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
When /^I run it on file "([^\"]*)"$/ do |filename|
|
38
|
-
File.stub!(:open).and_return(@data)
|
39
|
-
@result = @simple_parser.parse_file( filename )
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
When /^I run it on a remote file "([^\"]*)"$/ do |arg1|
|
44
|
-
OpenURI.stub!(:open).and_return(@data)
|
45
|
-
end
|
46
|
-
|
47
|
-
Then /^the extended parser should parse it as the original one$/ do
|
48
|
-
@simple_parser.parse( @data ).should == @ext_parser.parse( @data )
|
49
|
-
end
|
50
|
-
|
51
|
-
Then /^the result should be "([^\"]*)"$/ do |ruby_exp|
|
52
|
-
result = eval(ruby_exp)
|
53
|
-
@result.should == result
|
54
|
-
end
|
55
|
-
|
56
|
-
Then /^the result should contain '([^\']*)' entries$/ do |n|
|
57
|
-
@result.size.should == n.to_i
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
Then /^File\.open should be called with "([^\"]*)"$/ do |filename|
|
62
|
-
File.should_receive(:open).with(filename,'w')
|
63
|
-
end
|
64
|
-
|
65
|
-
Then /^OpenUri\.open should be called with "([^\"]*)"$/ do |filename|
|
66
|
-
OpenURI.should_receive(:open).with(filename)
|
67
|
-
end
|
68
|
-
|
69
|
-
Then /^the simple parser should raise an error when parsing data$/ do
|
70
|
-
lambda{@simple_parser.parse(@data)}.should raise_error
|
71
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
|
4
|
-
When /^I replace with\("([^\"]*)"\) to return always '([^\']*)'$/ do |key,out|
|
5
|
-
@ext_parser = @ext_parser.extend do
|
6
|
-
rules do
|
7
|
-
with( key ) {|c,e| e[key] = out }
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
When /^I replace with\("([^\"]*)"\) to do nothing$/ do |key|
|
13
|
-
@ext_parser = @ext_parser.extend do
|
14
|
-
rules do
|
15
|
-
with( key ) {|c,e| }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
When /^I replace with_text_after\("([^\"]*)"\) to return always '([^\']*)'$/ do |key,out|
|
21
|
-
text_key = "txt-#{key}"
|
22
|
-
@ext_parser = @ext_parser.extend do
|
23
|
-
rules do
|
24
|
-
with_text_after( key ) {|c,e| e[text_key] = out }
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
When /^I set the separator to '([^\']*)'$/ do |sep|
|
30
|
-
@ext_parser = @ext_parser.extend do
|
31
|
-
rules do
|
32
|
-
set_separator( sep )
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
When /^I return "([^\"]*)" in new entry$/ do |value|
|
38
|
-
@ext_parser = @ext_parser.extend do
|
39
|
-
new_entry { value }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
When /^I replace the container with a counter$/ do
|
44
|
-
class Counter
|
45
|
-
def initialize
|
46
|
-
@n = 0
|
47
|
-
end
|
48
|
-
def <<(i)
|
49
|
-
@n += 1
|
50
|
-
end
|
51
|
-
def count
|
52
|
-
@n
|
53
|
-
end
|
54
|
-
end
|
55
|
-
@ext_parser = @ext_parser.extend do
|
56
|
-
before { Counter.new }
|
57
|
-
after {|c| c.count }
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
When /^entry finalize always returns "([^\"]*)"$/ do |val|
|
62
|
-
@ext_parser = @ext_parser.extend do
|
63
|
-
finish_entry {|e,c| c << val }
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
|
68
|
-
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
|
4
|
-
When /^I define '([^\']*)' helper$/ do |name|
|
5
|
-
@ext_parser = @ext_parser.extend do
|
6
|
-
helper(name.to_sym) do
|
7
|
-
name
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
When /^I call '([^\']*)' helper in after action$/ do |name|
|
13
|
-
l = eval("lambda { |x| #{name} }")
|
14
|
-
@ext_parser = @ext_parser.extend do
|
15
|
-
after(&l)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
When /^I return param "([^\"]*)" in after action$/ do |name|
|
20
|
-
l = eval("lambda { |x| param(#{name}) }")
|
21
|
-
@ext_parser = @ext_parser.extend do
|
22
|
-
after(&l)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
When /^the before action sets @foo="([^\"]*)"$/ do |val|
|
28
|
-
@ext_parser = @ext_parser.extend do
|
29
|
-
before { @foo=val; [] }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
When /^the after action returns @foo$/ do
|
34
|
-
@ext_parser = @ext_parser.extend do
|
35
|
-
after { @foo }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
When /^set with\("([^\"]*)"\) to skip the entry$/ do |key|
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
When /^I set it to skip entries with\("([^\"]*)"\) containing "([^\"]*)"$/ do |key, val|
|
47
|
-
@ext_parser = @ext_parser.extend do
|
48
|
-
rules do
|
49
|
-
with(key) do |c,e|
|
50
|
-
if c.include?(val)
|
51
|
-
skip_entry!
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
data/lib/swiss_parser.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
Msg = <<HERE
|
2
|
-
|
3
|
-
=====================================================
|
4
|
-
Since version 0.7, SwissParser is now required with:
|
5
|
-
|
6
|
-
require 'swissparser'
|
7
|
-
|
8
|
-
Please update your code.
|
9
|
-
=====================================================
|
10
|
-
HERE
|
11
|
-
|
12
|
-
|
13
|
-
fail(Msg)
|