swissparser 0.11.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,104 +1,80 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
1
  require 'swissparser.rb'
21
2
  require 'yaml'
22
-
3
+
23
4
  class Enzyme
24
5
 
25
6
  attr_accessor :id, :genes
26
7
 
27
8
  end
28
9
 
29
-
30
- enzyme_parser = Swiss::Parser.define do
31
-
32
-
33
- new_entry do
34
- { :genes => [] }
35
- end
36
-
37
-
38
- helper :parse_gene_ids do |string, entry|
39
- string.split(" ").each do |item|
40
- if item =~ /(\d+)\(\w+\)/
41
- entry[:genes] << $1
10
+ module Kegg
11
+
12
+ Parser = Swiss::Rules.define do
13
+
14
+ helpers do
15
+ def parse_gene_ids(string)
16
+ string.split(" ").each do |item|
17
+ if item =~ /(\d+)\(\w+\)/
18
+ unless @genes
19
+ @genes = []
20
+ end
21
+ @genes << $1
22
+ end
23
+ end
42
24
  end
43
25
  end
44
- end
45
-
46
- rules do
47
26
 
48
27
  human = "HSA"
49
28
 
50
29
  set_separator( "///" )
51
30
 
52
- with("ENTRY") do |content,entry|
31
+ with("ENTRY") do |content|
53
32
  content =~ /((\d+|-)\.(\d+|-)\.(\d+|-)\.(\d+|-))/
54
- entry[:id] = $1
33
+ @id = $1
55
34
  end
56
35
 
57
- with("GENES") do |content,entry|
58
- content =~ /^([A-Z]+): (.*)/
36
+ with("GENES") do |content|
37
+ content =~ /^([A-Z]+): (.*)/
59
38
  org,genes = $1,$2
60
- entry[:last_organism] = org
39
+ @last_organism = org
61
40
  if org == human
62
- parse_gene_ids( genes, entry )
41
+ parse_gene_ids( genes )
63
42
  end
64
43
  end
65
44
 
66
- with_text_after("GENES") do |content,entry|
45
+ with_text_after("GENES") do |content|
67
46
  if content =~ /([A-Z]+): (.*)/
68
47
  org,genes = $1,$2
69
- entry[:last_organism] = org
48
+ @last_organism = org
70
49
  if org == human
71
- parse_gene_ids( genes, entry )
50
+ parse_gene_ids( genes )
72
51
  end
73
- elsif entry[:last_organism] == human
74
- parse_gene_ids( content, entry )
75
- end
52
+ elsif @last_organism == human
53
+ parse_gene_ids( content )
54
+ end
76
55
  end
77
-
78
- end
79
-
80
- finish_entry do |entry,container|
81
- if entry[:genes].size > 0
56
+ end.make_parser do |entries|
57
+ results = []
58
+ entries.each do |entry|
82
59
  e = Enzyme.new
83
- e.id = entry[:id]
84
- e.genes = entry[:genes]
85
- container << e
60
+ e.id = entry.id
61
+ e.genes = entry.genes
62
+ results << e
86
63
  end
64
+ results
87
65
  end
88
-
89
66
  end
90
67
 
91
-
68
+
92
69
  if $0 == __FILE__
93
-
70
+
94
71
  filename = ARGV.shift
95
-
96
- enzymes = enzyme_parser.parse_file( filename )
72
+
73
+ enzymes = Kegg::Parser.parse_file( filename )
97
74
 
98
75
  enzymes.each do |e|
99
76
  puts e.to_yaml
100
77
  end
101
-
78
+
102
79
  end
103
80
 
104
-
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'yaml'
4
+ require 'swissparser'
5
+
6
+ class Protein
7
+
8
+ attr_accessor :swiss_id, :size, :species, :taxonomy, :sequence
9
+
10
+ def initialize
11
+ @taxonomy = []
12
+ @sequence = ""
13
+ end
14
+
15
+ end
16
+
17
+ module Uniprot
18
+
19
+ Rules = Swiss::Rules.define do
20
+
21
+ # Parse the uniprot id
22
+ with("ID") do |content|
23
+ content =~ /([A-Z]\w+)\D+(\d+)/
24
+ @swiss_id = $1
25
+ @size = $2.to_i
26
+ end
27
+
28
+ # Parse the organism
29
+ with("OS") do |content|
30
+ content =~ /(\w+ \w+)/
31
+ @species = $1
32
+ end
33
+
34
+ # Parse the complete taxonomy
35
+ with("OC") do |content|
36
+ ary = content.gsub(".","").split("; ")
37
+ if @taxonomy.nil?
38
+ @taxonomy = []
39
+ end
40
+ @taxonomy += ary
41
+ end
42
+
43
+ # Parse the Sequence
44
+ with_text_after("SQ") do |content|
45
+ seq = content.strip.gsub(" ","")
46
+ if @seq.nil?
47
+ @seq = ""
48
+ end
49
+ @seq += seq
50
+ end
51
+
52
+ end
53
+
54
+ #With the rules defined above, creates a parser
55
+ # which returns an array of Protein instances.
56
+ Parser = Rules.make_parser do |entries|
57
+ results = []
58
+ entries.each do |e|
59
+ p = Protein.new
60
+ p.swiss_id = e.swiss_id
61
+ p.species = e.species
62
+ p.taxonomy = e.taxonomy
63
+ p.sequence = e.seq
64
+ p.size = e.size
65
+ results << p
66
+ end
67
+ results
68
+ end
69
+
70
+ end
71
+
72
+
73
+ if $0 == __FILE__
74
+
75
+ puts Swiss::VERSION
76
+
77
+ filename = ARGV.shift
78
+
79
+ proteins = Uniprot::Parser.parse_file( filename )
80
+
81
+ proteins.each do |e|
82
+ puts e.to_yaml
83
+ end
84
+
85
+ end
@@ -1,30 +1,79 @@
1
- Feature: Basic Parsing
2
- I can parse from different sources
3
-
4
- Background:
5
- Given input data
6
- """
7
- XX a1
8
- YY b1
9
- c1
10
- //
11
- XX a2
12
- YY b2
13
- c2
14
- //
15
- """
16
-
17
- Scenario: Extension without redefinition
18
- Given a simple parser
19
- When I run the simple parser on data
20
- Then the result should be "[{'XX'=>'a1','YY'=>'b1'},{'XX'=>'a2','YY'=>'b2'}]"
21
-
22
- Scenario: Parsing from file
23
- Given a simple parser
24
- When I run it on file "input.txt"
25
- Then File.open should be called with "input.txt"
26
-
27
- Scenario: Parsing from URI
28
- Given a simple parser
29
- When I run it on a remote file "http://www.example.com/input.txt"
30
- Then OpenUri.open should be called with "http://www.example.com/input.txt"
1
+ Feature:
2
+ I want to parse a flat-file on my disk.
3
+
4
+ Background:
5
+ Given sample data:
6
+ """
7
+ AA x1
8
+ BB y1
9
+ CC z1
10
+ abcd
11
+ //
12
+ AA x2
13
+ BB y2
14
+ CC z2
15
+ efgh
16
+ //
17
+ AA x3
18
+ BB y3
19
+ CC z3
20
+ ijkl
21
+ //
22
+ """
23
+
24
+ Scenario: By default the separator is "//"
25
+ Given the default rules
26
+ And I define a parser which counts entry
27
+ And I run the parser on sample data
28
+ Then the result is "3"
29
+
30
+ Scenario: I can change the separator
31
+ Given the default rules
32
+ And I set the separator to "%%"
33
+ And I define a parser which counts entry
34
+ And sample data:
35
+ """
36
+ //
37
+ jdjdj
38
+ //
39
+ %%
40
+ //
41
+ jjdhhd
42
+ //
43
+ %%
44
+ """
45
+ And I run the parser on sample data
46
+ Then the result is "2"
47
+
48
+ Scenario: I can define a simple 'with' rule
49
+ Given the default rules
50
+ And I define a simple rule to extract "BB"
51
+ And I define a simple parser which returns an array
52
+ And I run the parser on sample data
53
+ Then the result evals to "%w{ y1 y2 y3}"
54
+
55
+ Scenario: I can define a simple 'with_text_after' rule
56
+ Given the default rules
57
+ And I define a simple rule to extract text after "CC"
58
+ And I define a simple parser which returns an array
59
+ And I run the parser on sample data
60
+ Then the result evals to "%w{ abcd efgh ijkl }"
61
+
62
+
63
+ Scenario: I can define several rules
64
+ Given the default rules
65
+ And I define a simple rule to add "BB" to an array
66
+ And I define a simple rule to add "CC" to an array
67
+ And I define a simple parser which returns an array
68
+ And I run the parser on sample data
69
+ Then the result evals to "[ %w{y1 z1}, %w{y2 z2}, %w{y3 z3}]"
70
+
71
+ Scenario: I can redefine rules
72
+ Given the default rules
73
+ And I define a simple rule to extract "CC"
74
+ And I define a simple rule to return "foo" with "CC"
75
+ And I define a simple parser which returns an array
76
+ And I run the parser on sample data
77
+ Then the result evals to "%w{foo foo foo}"
78
+
79
+
@@ -0,0 +1,52 @@
1
+ Feature:
2
+ SwissParsers comes with user friendly features.
3
+
4
+ Background:
5
+ Given sample data:
6
+ """
7
+ AA x1
8
+ BB y1
9
+ CC z1
10
+ abcd
11
+ //
12
+ AA x2
13
+ BB y2
14
+ CC z2
15
+ efgh
16
+ //
17
+ AA x3
18
+ BB y3
19
+ CC z3
20
+ ijkl
21
+ //
22
+ """
23
+
24
+ Scenario: Parsing options
25
+ Given the default rules
26
+ And I define a simple rule to return option "foo" with "BB"
27
+ And I define a simple parser which returns an array
28
+ And I set option "foo" = "bar"
29
+ And I run the parser on sample data
30
+ Then the result evals to "%w{ bar bar bar}"
31
+
32
+ @skip
33
+ Scenario: Parsing from file
34
+ Given the default rules
35
+ And I define a simple parser which returns an array
36
+ When I run the parser on file "input.txt"
37
+ Then File.open should be called with "input.txt"
38
+
39
+ @skip
40
+ Scenario: Parsing from URI
41
+ Given the default rules
42
+ And I define a simple parser which returns an array
43
+ When I run it on remote file "http://www.example.com/input.txt"
44
+ Then OpenUri.open should be called with "http://www.example.com/input.txt"
45
+
46
+ Scenario: Helper Methods
47
+ Given the default rules
48
+ And I define a simple rule to return "bar" via helper with "BB"
49
+ And I define a simple parser which returns an array
50
+ And I run the parser on sample data
51
+ Then the result evals to "%w{ bar bar bar}"
52
+
@@ -0,0 +1,84 @@
1
+ require 'swissparser'
2
+ require 'rspec'
3
+
4
+ Given /^sample data:$/ do |string|
5
+ @data = string
6
+ end
7
+
8
+ Given /^the default rules$/ do
9
+ @rules = Swiss::DefaultRules
10
+ end
11
+
12
+ Given /^I set the separator to "([^\"]*)"$/ do |sep|
13
+ @rules = @rules.refine do
14
+ set_separator( sep )
15
+ end
16
+ end
17
+
18
+ Given /^I define a simple rule to extract "([^\"]*)"$/ do |key|
19
+ @rules = @rules.refine do
20
+ with( key ) do |content|
21
+ @text = content
22
+ end
23
+ end
24
+ end
25
+
26
+ Given /^I define a simple rule to extract text after "([^\"]*)"$/ do |key|
27
+ @rules = @rules.refine do
28
+ with_text_after( key ) do |content|
29
+ @text = "" if @text.nil?
30
+ @text << content
31
+ end
32
+ end
33
+ end
34
+
35
+ Given /^I define a simple rule to add "([^\"]*)" to an array$/ do |key|
36
+ @rules = @rules.refine do
37
+ with( key ) do |content|
38
+ @text = [] if @text.nil?
39
+ @text << content
40
+ end
41
+ end
42
+ end
43
+
44
+ Given /^I define a simple rule to return "([^\"]*)" with "([^\"]*)"$/ do |val, key|
45
+ @rules = @rules.refine do
46
+ with( key ) do |content|
47
+ @text = val
48
+ end
49
+ end
50
+ end
51
+
52
+ Given /^I define a parser which counts entry$/ do
53
+ @parser = Swiss::Parser.new(@rules) do |entries|
54
+ entries.size
55
+ end
56
+ end
57
+
58
+ Given /^I define a simple parser which returns an array$/ do
59
+ @parser = Swiss::Parser.new(@rules) do |entries|
60
+ result = []
61
+ entries.each do |entry|
62
+ result << entry.text
63
+ end
64
+ result
65
+ end
66
+ end
67
+
68
+ Given /^I run the parser on sample data$/ do
69
+ @result = if @opt.nil?
70
+ @parser.parse( @data )
71
+ else
72
+ @parser.parse( @data, @opt )
73
+ end
74
+ end
75
+
76
+
77
+ Then /^the result evals to "([^\"]*)"$/ do |expected|
78
+ obj = eval( expected )
79
+ @result.should == obj
80
+ end
81
+
82
+ Then /^the result is "([^\"]*)"$/ do |expected|
83
+ @result.to_s.should == expected
84
+ end