swissparser 0.11.1 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,104 +1,80 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
3
-
4
- This file is part of SwissParser.
5
-
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
10
-
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
15
-
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
-
20
1
  require 'swissparser.rb'
21
2
  require 'yaml'
22
-
3
+
23
4
  class Enzyme
24
5
 
25
6
  attr_accessor :id, :genes
26
7
 
27
8
  end
28
9
 
29
-
30
- enzyme_parser = Swiss::Parser.define do
31
-
32
-
33
- new_entry do
34
- { :genes => [] }
35
- end
36
-
37
-
38
- helper :parse_gene_ids do |string, entry|
39
- string.split(" ").each do |item|
40
- if item =~ /(\d+)\(\w+\)/
41
- entry[:genes] << $1
10
+ module Kegg
11
+
12
+ Parser = Swiss::Rules.define do
13
+
14
+ helpers do
15
+ def parse_gene_ids(string)
16
+ string.split(" ").each do |item|
17
+ if item =~ /(\d+)\(\w+\)/
18
+ unless @genes
19
+ @genes = []
20
+ end
21
+ @genes << $1
22
+ end
23
+ end
42
24
  end
43
25
  end
44
- end
45
-
46
- rules do
47
26
 
48
27
  human = "HSA"
49
28
 
50
29
  set_separator( "///" )
51
30
 
52
- with("ENTRY") do |content,entry|
31
+ with("ENTRY") do |content|
53
32
  content =~ /((\d+|-)\.(\d+|-)\.(\d+|-)\.(\d+|-))/
54
- entry[:id] = $1
33
+ @id = $1
55
34
  end
56
35
 
57
- with("GENES") do |content,entry|
58
- content =~ /^([A-Z]+): (.*)/
36
+ with("GENES") do |content|
37
+ content =~ /^([A-Z]+): (.*)/
59
38
  org,genes = $1,$2
60
- entry[:last_organism] = org
39
+ @last_organism = org
61
40
  if org == human
62
- parse_gene_ids( genes, entry )
41
+ parse_gene_ids( genes )
63
42
  end
64
43
  end
65
44
 
66
- with_text_after("GENES") do |content,entry|
45
+ with_text_after("GENES") do |content|
67
46
  if content =~ /([A-Z]+): (.*)/
68
47
  org,genes = $1,$2
69
- entry[:last_organism] = org
48
+ @last_organism = org
70
49
  if org == human
71
- parse_gene_ids( genes, entry )
50
+ parse_gene_ids( genes )
72
51
  end
73
- elsif entry[:last_organism] == human
74
- parse_gene_ids( content, entry )
75
- end
52
+ elsif @last_organism == human
53
+ parse_gene_ids( content )
54
+ end
76
55
  end
77
-
78
- end
79
-
80
- finish_entry do |entry,container|
81
- if entry[:genes].size > 0
56
+ end.make_parser do |entries|
57
+ results = []
58
+ entries.each do |entry|
82
59
  e = Enzyme.new
83
- e.id = entry[:id]
84
- e.genes = entry[:genes]
85
- container << e
60
+ e.id = entry.id
61
+ e.genes = entry.genes
62
+ results << e
86
63
  end
64
+ results
87
65
  end
88
-
89
66
  end
90
67
 
91
-
68
+
92
69
  if $0 == __FILE__
93
-
70
+
94
71
  filename = ARGV.shift
95
-
96
- enzymes = enzyme_parser.parse_file( filename )
72
+
73
+ enzymes = Kegg::Parser.parse_file( filename )
97
74
 
98
75
  enzymes.each do |e|
99
76
  puts e.to_yaml
100
77
  end
101
-
78
+
102
79
  end
103
80
 
104
-
@@ -0,0 +1,85 @@
1
+ #!/usr/bin/ruby -w
2
+
3
+ require 'yaml'
4
+ require 'swissparser'
5
+
6
+ class Protein
7
+
8
+ attr_accessor :swiss_id, :size, :species, :taxonomy, :sequence
9
+
10
+ def initialize
11
+ @taxonomy = []
12
+ @sequence = ""
13
+ end
14
+
15
+ end
16
+
17
+ module Uniprot
18
+
19
+ Rules = Swiss::Rules.define do
20
+
21
+ # Parse the uniprot id
22
+ with("ID") do |content|
23
+ content =~ /([A-Z]\w+)\D+(\d+)/
24
+ @swiss_id = $1
25
+ @size = $2.to_i
26
+ end
27
+
28
+ # Parse the organism
29
+ with("OS") do |content|
30
+ content =~ /(\w+ \w+)/
31
+ @species = $1
32
+ end
33
+
34
+ # Parse the complete taxonomy
35
+ with("OC") do |content|
36
+ ary = content.gsub(".","").split("; ")
37
+ if @taxonomy.nil?
38
+ @taxonomy = []
39
+ end
40
+ @taxonomy += ary
41
+ end
42
+
43
+ # Parse the Sequence
44
+ with_text_after("SQ") do |content|
45
+ seq = content.strip.gsub(" ","")
46
+ if @seq.nil?
47
+ @seq = ""
48
+ end
49
+ @seq += seq
50
+ end
51
+
52
+ end
53
+
54
+ #With the rules defined above, creates a parser
55
+ # which returns an array of Protein instances.
56
+ Parser = Rules.make_parser do |entries|
57
+ results = []
58
+ entries.each do |e|
59
+ p = Protein.new
60
+ p.swiss_id = e.swiss_id
61
+ p.species = e.species
62
+ p.taxonomy = e.taxonomy
63
+ p.sequence = e.seq
64
+ p.size = e.size
65
+ results << p
66
+ end
67
+ results
68
+ end
69
+
70
+ end
71
+
72
+
73
+ if $0 == __FILE__
74
+
75
+ puts Swiss::VERSION
76
+
77
+ filename = ARGV.shift
78
+
79
+ proteins = Uniprot::Parser.parse_file( filename )
80
+
81
+ proteins.each do |e|
82
+ puts e.to_yaml
83
+ end
84
+
85
+ end
@@ -1,30 +1,79 @@
1
- Feature: Basic Parsing
2
- I can parse from different sources
3
-
4
- Background:
5
- Given input data
6
- """
7
- XX a1
8
- YY b1
9
- c1
10
- //
11
- XX a2
12
- YY b2
13
- c2
14
- //
15
- """
16
-
17
- Scenario: Extension without redefinition
18
- Given a simple parser
19
- When I run the simple parser on data
20
- Then the result should be "[{'XX'=>'a1','YY'=>'b1'},{'XX'=>'a2','YY'=>'b2'}]"
21
-
22
- Scenario: Parsing from file
23
- Given a simple parser
24
- When I run it on file "input.txt"
25
- Then File.open should be called with "input.txt"
26
-
27
- Scenario: Parsing from URI
28
- Given a simple parser
29
- When I run it on a remote file "http://www.example.com/input.txt"
30
- Then OpenUri.open should be called with "http://www.example.com/input.txt"
1
+ Feature:
2
+ I want to parse a flat-file on my disk.
3
+
4
+ Background:
5
+ Given sample data:
6
+ """
7
+ AA x1
8
+ BB y1
9
+ CC z1
10
+ abcd
11
+ //
12
+ AA x2
13
+ BB y2
14
+ CC z2
15
+ efgh
16
+ //
17
+ AA x3
18
+ BB y3
19
+ CC z3
20
+ ijkl
21
+ //
22
+ """
23
+
24
+ Scenario: By default the separator is "//"
25
+ Given the default rules
26
+ And I define a parser which counts entry
27
+ And I run the parser on sample data
28
+ Then the result is "3"
29
+
30
+ Scenario: I can change the separator
31
+ Given the default rules
32
+ And I set the separator to "%%"
33
+ And I define a parser which counts entry
34
+ And sample data:
35
+ """
36
+ //
37
+ jdjdj
38
+ //
39
+ %%
40
+ //
41
+ jjdhhd
42
+ //
43
+ %%
44
+ """
45
+ And I run the parser on sample data
46
+ Then the result is "2"
47
+
48
+ Scenario: I can define a simple 'with' rule
49
+ Given the default rules
50
+ And I define a simple rule to extract "BB"
51
+ And I define a simple parser which returns an array
52
+ And I run the parser on sample data
53
+ Then the result evals to "%w{ y1 y2 y3}"
54
+
55
+ Scenario: I can define a simple 'with_text_after' rule
56
+ Given the default rules
57
+ And I define a simple rule to extract text after "CC"
58
+ And I define a simple parser which returns an array
59
+ And I run the parser on sample data
60
+ Then the result evals to "%w{ abcd efgh ijkl }"
61
+
62
+
63
+ Scenario: I can define several rules
64
+ Given the default rules
65
+ And I define a simple rule to add "BB" to an array
66
+ And I define a simple rule to add "CC" to an array
67
+ And I define a simple parser which returns an array
68
+ And I run the parser on sample data
69
+ Then the result evals to "[ %w{y1 z1}, %w{y2 z2}, %w{y3 z3}]"
70
+
71
+ Scenario: I can redefine rules
72
+ Given the default rules
73
+ And I define a simple rule to extract "CC"
74
+ And I define a simple rule to return "foo" with "CC"
75
+ And I define a simple parser which returns an array
76
+ And I run the parser on sample data
77
+ Then the result evals to "%w{foo foo foo}"
78
+
79
+
@@ -0,0 +1,52 @@
1
+ Feature:
2
+ SwissParsers comes with user friendly features.
3
+
4
+ Background:
5
+ Given sample data:
6
+ """
7
+ AA x1
8
+ BB y1
9
+ CC z1
10
+ abcd
11
+ //
12
+ AA x2
13
+ BB y2
14
+ CC z2
15
+ efgh
16
+ //
17
+ AA x3
18
+ BB y3
19
+ CC z3
20
+ ijkl
21
+ //
22
+ """
23
+
24
+ Scenario: Parsing options
25
+ Given the default rules
26
+ And I define a simple rule to return option "foo" with "BB"
27
+ And I define a simple parser which returns an array
28
+ And I set option "foo" = "bar"
29
+ And I run the parser on sample data
30
+ Then the result evals to "%w{ bar bar bar}"
31
+
32
+ @skip
33
+ Scenario: Parsing from file
34
+ Given the default rules
35
+ And I define a simple parser which returns an array
36
+ When I run the parser on file "input.txt"
37
+ Then File.open should be called with "input.txt"
38
+
39
+ @skip
40
+ Scenario: Parsing from URI
41
+ Given the default rules
42
+ And I define a simple parser which returns an array
43
+ When I run it on remote file "http://www.example.com/input.txt"
44
+ Then OpenUri.open should be called with "http://www.example.com/input.txt"
45
+
46
+ Scenario: Helper Methods
47
+ Given the default rules
48
+ And I define a simple rule to return "bar" via helper with "BB"
49
+ And I define a simple parser which returns an array
50
+ And I run the parser on sample data
51
+ Then the result evals to "%w{ bar bar bar}"
52
+
@@ -0,0 +1,84 @@
1
+ require 'swissparser'
2
+ require 'rspec'
3
+
4
+ Given /^sample data:$/ do |string|
5
+ @data = string
6
+ end
7
+
8
+ Given /^the default rules$/ do
9
+ @rules = Swiss::DefaultRules
10
+ end
11
+
12
+ Given /^I set the separator to "([^\"]*)"$/ do |sep|
13
+ @rules = @rules.refine do
14
+ set_separator( sep )
15
+ end
16
+ end
17
+
18
+ Given /^I define a simple rule to extract "([^\"]*)"$/ do |key|
19
+ @rules = @rules.refine do
20
+ with( key ) do |content|
21
+ @text = content
22
+ end
23
+ end
24
+ end
25
+
26
+ Given /^I define a simple rule to extract text after "([^\"]*)"$/ do |key|
27
+ @rules = @rules.refine do
28
+ with_text_after( key ) do |content|
29
+ @text = "" if @text.nil?
30
+ @text << content
31
+ end
32
+ end
33
+ end
34
+
35
+ Given /^I define a simple rule to add "([^\"]*)" to an array$/ do |key|
36
+ @rules = @rules.refine do
37
+ with( key ) do |content|
38
+ @text = [] if @text.nil?
39
+ @text << content
40
+ end
41
+ end
42
+ end
43
+
44
+ Given /^I define a simple rule to return "([^\"]*)" with "([^\"]*)"$/ do |val, key|
45
+ @rules = @rules.refine do
46
+ with( key ) do |content|
47
+ @text = val
48
+ end
49
+ end
50
+ end
51
+
52
+ Given /^I define a parser which counts entry$/ do
53
+ @parser = Swiss::Parser.new(@rules) do |entries|
54
+ entries.size
55
+ end
56
+ end
57
+
58
+ Given /^I define a simple parser which returns an array$/ do
59
+ @parser = Swiss::Parser.new(@rules) do |entries|
60
+ result = []
61
+ entries.each do |entry|
62
+ result << entry.text
63
+ end
64
+ result
65
+ end
66
+ end
67
+
68
+ Given /^I run the parser on sample data$/ do
69
+ @result = if @opt.nil?
70
+ @parser.parse( @data )
71
+ else
72
+ @parser.parse( @data, @opt )
73
+ end
74
+ end
75
+
76
+
77
+ Then /^the result evals to "([^\"]*)"$/ do |expected|
78
+ obj = eval( expected )
79
+ @result.should == obj
80
+ end
81
+
82
+ Then /^the result is "([^\"]*)"$/ do |expected|
83
+ @result.to_s.should == expected
84
+ end