swissparser 0.11.1 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/CHANGELOG.rdoc +9 -0
- data/README.rdoc +28 -17
- data/Rakefile +2 -2
- data/Rakefile.compiled.rbc +622 -0
- data/examples/kegg_demo.rb +39 -63
- data/examples/uniprot.rb +85 -0
- data/features/basic_parsing.feature +79 -30
- data/features/extra.feature +52 -0
- data/features/step_definitions/basic_steps.rb +84 -0
- data/features/step_definitions/sugar_steps.rb +71 -0
- data/lib/swissparser.rb +39 -194
- data/lib/swissparser.rbc +928 -0
- data/lib/swissparser/entries.rb +137 -0
- data/lib/swissparser/entries.rbc +2360 -0
- data/lib/swissparser/rules.rb +112 -0
- data/lib/swissparser/rules.rbc +1699 -0
- metadata +55 -32
- data/benchmarks/whole_uniprot.txt +0 -7
- data/examples/parse_from_uri.rb +0 -88
- data/examples/signal_demo.rb +0 -100
- data/examples/tutorial_1.rb +0 -88
- data/examples/tutorial_2.rb +0 -65
- data/examples/uniprot_param_demo.rb +0 -85
- data/features/parser_extension.feature +0 -83
- data/features/parsing_context.feature +0 -48
- data/features/polite.feature +0 -16
- data/features/step_definitions/core.rb +0 -71
- data/features/step_definitions/definitions.rb +0 -68
- data/features/step_definitions/extra.rb +0 -56
- data/lib/swiss_parser.rb +0 -13
- data/lib/swissparser/parsing_context.rb +0 -60
- data/lib/swissparser/parsing_rules.rb +0 -39
@@ -1,85 +0,0 @@
|
|
1
|
-
=begin
|
2
|
-
Copyright (C) 2009 Paradigmatic
|
3
|
-
|
4
|
-
This file is part of SwissParser.
|
5
|
-
|
6
|
-
SwissParser is free software: you can redistribute it and/or modify
|
7
|
-
it under the terms of the GNU General Public License as published by
|
8
|
-
the Free Software Foundation, either version 3 of the License, or
|
9
|
-
(at your option) any later version.
|
10
|
-
|
11
|
-
SwissParser is distributed in the hope that it will be useful,
|
12
|
-
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
-
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
-
GNU General Public License for more details.
|
15
|
-
|
16
|
-
You should have received a copy of the GNU General Public License
|
17
|
-
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
-
=end
|
19
|
-
|
20
|
-
#!/usr/bin/ruby -w
|
21
|
-
|
22
|
-
require 'yaml'
|
23
|
-
require 'swissparser.rb'
|
24
|
-
|
25
|
-
class Protein
|
26
|
-
|
27
|
-
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
-
|
29
|
-
def initialize
|
30
|
-
@taxonomy = []
|
31
|
-
@sequence = ""
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
uniprot_parser = Swiss::Parser.define do
|
38
|
-
|
39
|
-
new_entry do
|
40
|
-
puts param(:msg)
|
41
|
-
Protein.new
|
42
|
-
end
|
43
|
-
|
44
|
-
rules do
|
45
|
-
|
46
|
-
with("ID") do |content,protein|
|
47
|
-
content =~ /([A-Z]\w+)\D+(\d+)/
|
48
|
-
protein.id = $1
|
49
|
-
protein.size = $2.to_i
|
50
|
-
end
|
51
|
-
|
52
|
-
with("OS") do |content,protein|
|
53
|
-
content =~ /(\w+ \w+)/
|
54
|
-
protein.species = $1
|
55
|
-
end
|
56
|
-
|
57
|
-
with("OC") do |content,protein|
|
58
|
-
ary = content.gsub(".","").split("; ")
|
59
|
-
protein.taxonomy += ary
|
60
|
-
end
|
61
|
-
|
62
|
-
with_text_after("SQ") do |content,protein|
|
63
|
-
puts param(:found_seq)
|
64
|
-
seq = content.strip.gsub(" ","")
|
65
|
-
protein.sequence += seq
|
66
|
-
end
|
67
|
-
|
68
|
-
end
|
69
|
-
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
if $0 == __FILE__
|
74
|
-
|
75
|
-
filename = ARGV.shift
|
76
|
-
|
77
|
-
entries = uniprot_parser.parse_file( filename, :msg => "Hello", :found_seq => "Youpie" )
|
78
|
-
|
79
|
-
puts entries.size
|
80
|
-
|
81
|
-
entries.each do |e|
|
82
|
-
puts e.to_yaml
|
83
|
-
end
|
84
|
-
|
85
|
-
end
|
@@ -1,83 +0,0 @@
|
|
1
|
-
Feature: Parser Extension
|
2
|
-
I can extend existing parser
|
3
|
-
And replace existing rules
|
4
|
-
|
5
|
-
Background:
|
6
|
-
Given input data
|
7
|
-
"""
|
8
|
-
XX a1
|
9
|
-
YY b1
|
10
|
-
c1
|
11
|
-
//
|
12
|
-
XX a1
|
13
|
-
YY b2
|
14
|
-
c2
|
15
|
-
//
|
16
|
-
"""
|
17
|
-
|
18
|
-
Scenario: Extension without redefinition
|
19
|
-
Given a simple parser
|
20
|
-
When I extend it
|
21
|
-
Then the extended parser should parse it as the original one
|
22
|
-
|
23
|
-
Scenario: With replacing separator
|
24
|
-
Given a simple parser
|
25
|
-
When I extend it
|
26
|
-
And I replace with("XX") to return always 'foo'
|
27
|
-
And I replace with("YY") to do nothing
|
28
|
-
And I run the extended parser on data
|
29
|
-
Then the result should be "[{ 'XX' => 'foo'}, { 'XX' => 'foo'}]"
|
30
|
-
|
31
|
-
Scenario: Text after replacing
|
32
|
-
Given a simple parser
|
33
|
-
When I extend it
|
34
|
-
And I replace with("XX") to do nothing
|
35
|
-
And I replace with("YY") to return always 'bar'
|
36
|
-
And I replace with_text_after("YY") to return always 'foo'
|
37
|
-
And I run the extended parser on data
|
38
|
-
Then the result should be "[{ 'YY' => 'bar', 'txt-YY' => 'foo'}, { 'YY' => 'bar', 'txt-YY' => 'foo'}]"
|
39
|
-
|
40
|
-
Scenario: Separator replacement
|
41
|
-
Given a simple parser
|
42
|
-
And input data
|
43
|
-
"""
|
44
|
-
XX a1
|
45
|
-
YY b1
|
46
|
-
c1
|
47
|
-
%
|
48
|
-
XX a1
|
49
|
-
YY b2
|
50
|
-
c2
|
51
|
-
%
|
52
|
-
"""
|
53
|
-
When I extend it
|
54
|
-
And I set the separator to '%'
|
55
|
-
And I run the extended parser on data
|
56
|
-
Then the result should contain '2' entries
|
57
|
-
|
58
|
-
Scenario: Using custom entries objects
|
59
|
-
Given a simple parser
|
60
|
-
When I extend it
|
61
|
-
And I replace with("XX") to do nothing
|
62
|
-
And I replace with("YY") to do nothing
|
63
|
-
And I return "foo" in new entry
|
64
|
-
And I run the extended parser on data
|
65
|
-
Then the result should be "['foo','foo']"
|
66
|
-
|
67
|
-
Scenario: Changing the container
|
68
|
-
Given a simple parser
|
69
|
-
When I extend it
|
70
|
-
And I replace the container with a counter
|
71
|
-
And I run the extended parser on data
|
72
|
-
Then the result should be "2"
|
73
|
-
|
74
|
-
Scenario: Changing the entry finalization
|
75
|
-
Given a simple parser
|
76
|
-
When I extend it
|
77
|
-
And entry finalize always returns "foo"
|
78
|
-
And I run the extended parser on data
|
79
|
-
Then the result should be "['foo', 'foo']"
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
@@ -1,48 +0,0 @@
|
|
1
|
-
Feature: Sharing context
|
2
|
-
During parsing, rules share a context object.
|
3
|
-
|
4
|
-
Background:
|
5
|
-
Given input data
|
6
|
-
"""
|
7
|
-
XX a1
|
8
|
-
YY b1
|
9
|
-
c1
|
10
|
-
//
|
11
|
-
XX a2
|
12
|
-
YY b2
|
13
|
-
c2
|
14
|
-
//
|
15
|
-
"""
|
16
|
-
|
17
|
-
Scenario: Helper Method
|
18
|
-
Given a simple parser
|
19
|
-
When I extend it
|
20
|
-
And I define 'foo' helper
|
21
|
-
And I call 'foo' helper in after action
|
22
|
-
And I run the extended parser on data
|
23
|
-
Then the result should be "'foo'"
|
24
|
-
|
25
|
-
Scenario: Parsing Parameters
|
26
|
-
Given a simple parser
|
27
|
-
When I extend it
|
28
|
-
And I return param "foo" in after action
|
29
|
-
And I run the extended parser on data with param "foo" = "bar"
|
30
|
-
Then the result should be "'bar'"
|
31
|
-
|
32
|
-
Scenario: Instance variables
|
33
|
-
Given a simple parser
|
34
|
-
When I extend it
|
35
|
-
And the before action sets @foo="bar"
|
36
|
-
And the after action returns @foo
|
37
|
-
And I run the extended parser on data
|
38
|
-
Then the result should be "'bar'"
|
39
|
-
|
40
|
-
Scenario: Skipping entries
|
41
|
-
Given a simple parser
|
42
|
-
When I extend it
|
43
|
-
And I set it to skip entries with("XX") containing "a1"
|
44
|
-
And I run the extended parser on data
|
45
|
-
Then the result should contain '1' entries
|
46
|
-
|
47
|
-
|
48
|
-
|
data/features/polite.feature
DELETED
@@ -1,16 +0,0 @@
|
|
1
|
-
Feature: Politeness
|
2
|
-
SwissParser is polite and reporst errors
|
3
|
-
|
4
|
-
Scenario: Missing
|
5
|
-
Given input data
|
6
|
-
"""
|
7
|
-
XX a1
|
8
|
-
YY b1
|
9
|
-
c1
|
10
|
-
//
|
11
|
-
XX a2
|
12
|
-
YY b2
|
13
|
-
c2
|
14
|
-
"""
|
15
|
-
And a simple parser
|
16
|
-
Then the simple parser should raise an error when parsing data
|
@@ -1,71 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
require 'spec/mocks'
|
4
|
-
|
5
|
-
|
6
|
-
Given /^a simple parser$/ do
|
7
|
-
@simple_parser = Swiss::Parser.define do
|
8
|
-
rules do
|
9
|
-
with("XX") {|c,e| e["XX"] = c}
|
10
|
-
with("YY") {|c,e| e["YY"] = c}
|
11
|
-
end
|
12
|
-
end
|
13
|
-
end
|
14
|
-
|
15
|
-
Given /^input data$/ do |string|
|
16
|
-
@data = string
|
17
|
-
end
|
18
|
-
|
19
|
-
When /^I extend it$/ do
|
20
|
-
@ext_parser = @simple_parser.extend {}
|
21
|
-
end
|
22
|
-
|
23
|
-
When /^I run the simple parser on data$/ do
|
24
|
-
@result = @simple_parser.parse(@data)
|
25
|
-
end
|
26
|
-
|
27
|
-
|
28
|
-
When /^I run the extended parser on data$/ do
|
29
|
-
@result = @ext_parser.parse(@data)
|
30
|
-
end
|
31
|
-
|
32
|
-
When /^I run the extended parser on data with param "([^\"]*)" = "([^\"]*)"$/ do |key, val|
|
33
|
-
@result = @ext_parser.parse(@data, key => val)
|
34
|
-
end
|
35
|
-
|
36
|
-
|
37
|
-
When /^I run it on file "([^\"]*)"$/ do |filename|
|
38
|
-
File.stub!(:open).and_return(@data)
|
39
|
-
@result = @simple_parser.parse_file( filename )
|
40
|
-
end
|
41
|
-
|
42
|
-
|
43
|
-
When /^I run it on a remote file "([^\"]*)"$/ do |arg1|
|
44
|
-
OpenURI.stub!(:open).and_return(@data)
|
45
|
-
end
|
46
|
-
|
47
|
-
Then /^the extended parser should parse it as the original one$/ do
|
48
|
-
@simple_parser.parse( @data ).should == @ext_parser.parse( @data )
|
49
|
-
end
|
50
|
-
|
51
|
-
Then /^the result should be "([^\"]*)"$/ do |ruby_exp|
|
52
|
-
result = eval(ruby_exp)
|
53
|
-
@result.should == result
|
54
|
-
end
|
55
|
-
|
56
|
-
Then /^the result should contain '([^\']*)' entries$/ do |n|
|
57
|
-
@result.size.should == n.to_i
|
58
|
-
end
|
59
|
-
|
60
|
-
|
61
|
-
Then /^File\.open should be called with "([^\"]*)"$/ do |filename|
|
62
|
-
File.should_receive(:open).with(filename,'w')
|
63
|
-
end
|
64
|
-
|
65
|
-
Then /^OpenUri\.open should be called with "([^\"]*)"$/ do |filename|
|
66
|
-
OpenURI.should_receive(:open).with(filename)
|
67
|
-
end
|
68
|
-
|
69
|
-
Then /^the simple parser should raise an error when parsing data$/ do
|
70
|
-
lambda{@simple_parser.parse(@data)}.should raise_error
|
71
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
|
4
|
-
When /^I replace with\("([^\"]*)"\) to return always '([^\']*)'$/ do |key,out|
|
5
|
-
@ext_parser = @ext_parser.extend do
|
6
|
-
rules do
|
7
|
-
with( key ) {|c,e| e[key] = out }
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
When /^I replace with\("([^\"]*)"\) to do nothing$/ do |key|
|
13
|
-
@ext_parser = @ext_parser.extend do
|
14
|
-
rules do
|
15
|
-
with( key ) {|c,e| }
|
16
|
-
end
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
When /^I replace with_text_after\("([^\"]*)"\) to return always '([^\']*)'$/ do |key,out|
|
21
|
-
text_key = "txt-#{key}"
|
22
|
-
@ext_parser = @ext_parser.extend do
|
23
|
-
rules do
|
24
|
-
with_text_after( key ) {|c,e| e[text_key] = out }
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
When /^I set the separator to '([^\']*)'$/ do |sep|
|
30
|
-
@ext_parser = @ext_parser.extend do
|
31
|
-
rules do
|
32
|
-
set_separator( sep )
|
33
|
-
end
|
34
|
-
end
|
35
|
-
end
|
36
|
-
|
37
|
-
When /^I return "([^\"]*)" in new entry$/ do |value|
|
38
|
-
@ext_parser = @ext_parser.extend do
|
39
|
-
new_entry { value }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
When /^I replace the container with a counter$/ do
|
44
|
-
class Counter
|
45
|
-
def initialize
|
46
|
-
@n = 0
|
47
|
-
end
|
48
|
-
def <<(i)
|
49
|
-
@n += 1
|
50
|
-
end
|
51
|
-
def count
|
52
|
-
@n
|
53
|
-
end
|
54
|
-
end
|
55
|
-
@ext_parser = @ext_parser.extend do
|
56
|
-
before { Counter.new }
|
57
|
-
after {|c| c.count }
|
58
|
-
end
|
59
|
-
end
|
60
|
-
|
61
|
-
When /^entry finalize always returns "([^\"]*)"$/ do |val|
|
62
|
-
@ext_parser = @ext_parser.extend do
|
63
|
-
finish_entry {|e,c| c << val }
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
|
68
|
-
|
@@ -1,56 +0,0 @@
|
|
1
|
-
require 'lib/swissparser'
|
2
|
-
require 'spec/expectations'
|
3
|
-
|
4
|
-
When /^I define '([^\']*)' helper$/ do |name|
|
5
|
-
@ext_parser = @ext_parser.extend do
|
6
|
-
helper(name.to_sym) do
|
7
|
-
name
|
8
|
-
end
|
9
|
-
end
|
10
|
-
end
|
11
|
-
|
12
|
-
When /^I call '([^\']*)' helper in after action$/ do |name|
|
13
|
-
l = eval("lambda { |x| #{name} }")
|
14
|
-
@ext_parser = @ext_parser.extend do
|
15
|
-
after(&l)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
When /^I return param "([^\"]*)" in after action$/ do |name|
|
20
|
-
l = eval("lambda { |x| param(#{name}) }")
|
21
|
-
@ext_parser = @ext_parser.extend do
|
22
|
-
after(&l)
|
23
|
-
end
|
24
|
-
|
25
|
-
end
|
26
|
-
|
27
|
-
When /^the before action sets @foo="([^\"]*)"$/ do |val|
|
28
|
-
@ext_parser = @ext_parser.extend do
|
29
|
-
before { @foo=val; [] }
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
When /^the after action returns @foo$/ do
|
34
|
-
@ext_parser = @ext_parser.extend do
|
35
|
-
after { @foo }
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
When /^set with\("([^\"]*)"\) to skip the entry$/ do |key|
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
When /^I set it to skip entries with\("([^\"]*)"\) containing "([^\"]*)"$/ do |key, val|
|
47
|
-
@ext_parser = @ext_parser.extend do
|
48
|
-
rules do
|
49
|
-
with(key) do |c,e|
|
50
|
-
if c.include?(val)
|
51
|
-
skip_entry!
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
end
|
56
|
-
end
|
data/lib/swiss_parser.rb
DELETED
@@ -1,13 +0,0 @@
|
|
1
|
-
Msg = <<HERE
|
2
|
-
|
3
|
-
=====================================================
|
4
|
-
Since version 0.7, SwissParser is now required with:
|
5
|
-
|
6
|
-
require 'swissparser'
|
7
|
-
|
8
|
-
Please update your code.
|
9
|
-
=====================================================
|
10
|
-
HERE
|
11
|
-
|
12
|
-
|
13
|
-
fail(Msg)
|