RubyGems - swissparser - Versions diffs - 0.11.1 → 1.0.0 - Mend

swissparser 0.11.1 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

data/.gitignore +9 -0
data/CHANGELOG.rdoc +9 -0
data/README.rdoc +28 -17
data/Rakefile +2 -2
data/Rakefile.compiled.rbc +622 -0
data/examples/kegg_demo.rb +39 -63
data/examples/uniprot.rb +85 -0
data/features/basic_parsing.feature +79 -30
data/features/extra.feature +52 -0
data/features/step_definitions/basic_steps.rb +84 -0
data/features/step_definitions/sugar_steps.rb +71 -0
data/lib/swissparser.rb +39 -194
data/lib/swissparser.rbc +928 -0
data/lib/swissparser/entries.rb +137 -0
data/lib/swissparser/entries.rbc +2360 -0
data/lib/swissparser/rules.rb +112 -0
data/lib/swissparser/rules.rbc +1699 -0
metadata +55 -32
data/benchmarks/whole_uniprot.txt +0 -7
data/examples/parse_from_uri.rb +0 -88
data/examples/signal_demo.rb +0 -100
data/examples/tutorial_1.rb +0 -88
data/examples/tutorial_2.rb +0 -65
data/examples/uniprot_param_demo.rb +0 -85
data/features/parser_extension.feature +0 -83
data/features/parsing_context.feature +0 -48
data/features/polite.feature +0 -16
data/features/step_definitions/core.rb +0 -71
data/features/step_definitions/definitions.rb +0 -68
data/features/step_definitions/extra.rb +0 -56
data/lib/swiss_parser.rb +0 -13
data/lib/swissparser/parsing_context.rb +0 -60
data/lib/swissparser/parsing_rules.rb +0 -39

data/examples/kegg_demo.rb CHANGED

@@ -1,104 +1,80 @@
-=begin
-Copyright (C) 2009 Paradigmatic
-This file is part of SwissParser.
-SwissParser is free software: you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation, either version 3 of the License, or
-(at your option) any later version.
-SwissParser is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-You should have received a copy of the GNU General Public License
-along with SwissParser.  If not, see <http://www.gnu.org/licenses/>.
-=end
 require 'swissparser.rb'
 require 'yaml'
 class Enzyme
   attr_accessor :id, :genes
 end
-enzyme_parser = Swiss::Parser.define do
-  new_entry do
-    { :genes => [] }
-  end
-  helper :parse_gene_ids do |string, entry|
-    string.split(" ").each do |item|
-      if item =~ /(\d+)\(\w+\)/
-        entry[:genes] << $1
+module Kegg
+  Parser = Swiss::Rules.define do
+    helpers do
+      def parse_gene_ids(string)
+        string.split(" ").each do |item|
+          if item =~ /(\d+)\(\w+\)/
+            unless @genes
+              @genes = []
+            end
+            @genes << $1
+          end
+        end
       end
     end
-  end
-  rules do
     human = "HSA"
     set_separator( "///" )
-    with("ENTRY") do |content,entry|
+    with("ENTRY") do |content|
       content =~ /((\d+|-)\.(\d+|-)\.(\d+|-)\.(\d+|-))/
-      entry[:id] = $1
+      @id = $1
     end
-    with("GENES") do |content,entry|
-      content =~ /^([A-Z]+): (.*)/
+    with("GENES") do |content|
+      content =~ /^([A-Z]+): (.*)/
       org,genes = $1,$2
-      entry[:last_organism] = org
+      @last_organism = org
       if org == human
-        parse_gene_ids( genes, entry )
+        parse_gene_ids( genes )
       end
     end
-    with_text_after("GENES") do |content,entry|
+    with_text_after("GENES") do |content|
       if content =~ /([A-Z]+): (.*)/
         org,genes = $1,$2
-        entry[:last_organism] = org
+        @last_organism = org
         if org == human
-          parse_gene_ids( genes, entry )
+          parse_gene_ids( genes )
         end
-      elsif entry[:last_organism] == human
-        parse_gene_ids( content, entry )
-      end
+      elsif @last_organism == human
+        parse_gene_ids( content )
+      end
     end
-  end
-  finish_entry do |entry,container|
-    if entry[:genes].size > 0
+  end.make_parser do |entries|
+    results = []
+    entries.each do |entry|
       e = Enzyme.new
-      e.id = entry[:id]
-      e.genes = entry[:genes]
-      container << e
+      e.id = entry.id
+      e.genes = entry.genes
+      results << e
     end
+    results
   end
 end
 if $0 == __FILE__
   filename = ARGV.shift
-  enzymes = enzyme_parser.parse_file( filename )
+  enzymes = Kegg::Parser.parse_file( filename )
   enzymes.each do |e|
     puts e.to_yaml
   end
 end

data/examples/uniprot.rb ADDED

@@ -0,0 +1,85 @@
+#!/usr/bin/ruby -w
+require 'yaml'
+require 'swissparser'
+class Protein
+  attr_accessor :swiss_id, :size, :species, :taxonomy, :sequence
+  def initialize
+    @taxonomy = []
+    @sequence = ""
+  end
+end
+module Uniprot
+  Rules = Swiss::Rules.define do
+    # Parse the uniprot id
+    with("ID") do |content|
+      content =~ /([A-Z]\w+)\D+(\d+)/
+      @swiss_id = $1
+      @size = $2.to_i
+    end
+    # Parse the organism
+    with("OS") do |content|
+      content =~ /(\w+ \w+)/
+      @species = $1
+    end
+    # Parse the complete taxonomy
+    with("OC") do |content|
+      ary = content.gsub(".","").split("; ")
+      if @taxonomy.nil?
+        @taxonomy = []
+      end
+      @taxonomy += ary
+    end
+    # Parse the Sequence
+    with_text_after("SQ") do |content|
+      seq = content.strip.gsub(" ","")
+      if @seq.nil?
+        @seq = ""
+      end
+      @seq += seq
+    end
+  end
+  #With the rules defined above, creates a parser
+  # which returns an array of Protein instances.
+  Parser = Rules.make_parser do |entries|
+    results = []
+    entries.each do |e|
+      p = Protein.new
+      p.swiss_id = e.swiss_id
+      p.species = e.species
+      p.taxonomy = e.taxonomy
+      p.sequence = e.seq
+      p.size = e.size
+      results << p
+    end
+    results
+  end
+end
+if $0 == __FILE__
+  puts Swiss::VERSION
+  filename = ARGV.shift
+  proteins = Uniprot::Parser.parse_file( filename )
+  proteins.each do |e|
+    puts e.to_yaml
+  end
+end

data/features/basic_parsing.feature CHANGED

@@ -1,30 +1,79 @@
-Feature: Basic Parsing
-  I can parse from different sources
-  Background:
-   Given input data
-     """
-     XX a1
-     YY b1
-     c1
-     //
-     XX a2
-     YY b2
-     c2
-     //
-     """
-  Scenario: Extension without redefinition
-    Given a simple parser
-    When I run the simple parser on data
-    Then the result should be "[{'XX'=>'a1','YY'=>'b1'},{'XX'=>'a2','YY'=>'b2'}]"
-  Scenario: Parsing from file
-    Given a simple parser
-    When I run it on file "input.txt"
-    Then File.open should be called with "input.txt"
-  Scenario: Parsing from URI
-    Given a simple parser
-    When I run it on a remote file "http://www.example.com/input.txt"
-    Then OpenUri.open should be called with "http://www.example.com/input.txt"
+Feature:
+I want to parse a flat-file on my disk.
+Background:
+  Given sample data:
+"""
+AA x1
+BB y1
+CC z1
+abcd
+//
+AA x2
+BB y2
+CC z2
+efgh
+//
+AA x3
+BB y3
+CC z3
+ijkl
+//
+"""
+Scenario: By default the separator is "//"
+  Given the default rules
+    And I define a parser which counts entry
+    And I run the parser on sample data
+  Then the result is "3"
+Scenario: I can change the separator
+  Given the default rules
+    And I set the separator to "%%"
+    And I define a parser which counts entry
+    And sample data:
+"""
+//
+jdjdj
+//
+%%
+//
+jjdhhd
+//
+%%
+"""
+    And I run the parser on sample data
+  Then the result is "2"
+Scenario: I can define a simple 'with' rule
+  Given the default rules
+    And I define a simple rule to extract "BB"
+    And I define a simple parser which returns an array
+    And I run the parser on sample data
+  Then the result evals to "%w{ y1 y2 y3}"
+Scenario: I can define a simple 'with_text_after' rule
+  Given the default rules
+    And I define a simple rule to extract text after "CC"
+    And I define a simple parser which returns an array
+    And I run the parser on sample data
+  Then the result evals to "%w{ abcd efgh ijkl }"
+Scenario: I can define several rules
+  Given the default rules
+    And I define a simple rule to add "BB" to an array
+    And I define a simple rule to add "CC" to an array
+    And I define a simple parser which returns an array
+    And I run the parser on sample data
+  Then the result evals to "[ %w{y1 z1}, %w{y2 z2}, %w{y3 z3}]"
+Scenario: I can redefine rules
+  Given the default rules
+    And I define a simple rule to extract "CC"
+    And I define a simple rule to return "foo" with "CC"
+    And I define a simple parser which returns an array
+    And I run the parser on sample data
+  Then the result evals to "%w{foo foo foo}"

data/features/extra.feature ADDED

@@ -0,0 +1,52 @@
+Feature:
+SwissParsers comes with user friendly features.
+Background:
+  Given sample data:
+"""
+AA x1
+BB y1
+CC z1
+abcd
+//
+AA x2
+BB y2
+CC z2
+efgh
+//
+AA x3
+BB y3
+CC z3
+ijkl
+//
+"""
+Scenario: Parsing options
+  Given the default rules
+    And I define a simple rule to return option "foo" with "BB"
+    And I define a simple parser which returns an array
+    And I set option "foo" = "bar"
+    And I run the parser on sample data
+  Then the result evals to "%w{ bar bar bar}"
+@skip
+Scenario: Parsing from file
+  Given the default rules
+    And I define a simple parser which returns an array
+  When I run the parser on file "input.txt"
+  Then File.open should be called with "input.txt"
+@skip
+Scenario: Parsing from URI
+  Given the default rules
+    And I define a simple parser which returns an array
+  When I run it on remote file "http://www.example.com/input.txt"
+  Then OpenUri.open should be called with "http://www.example.com/input.txt"
+Scenario: Helper Methods
+  Given the default rules
+    And I define a simple rule to return "bar" via helper with "BB"
+    And I define a simple parser which returns an array
+    And I run the parser on sample data
+  Then the result evals to "%w{ bar bar bar}"

data/features/step_definitions/basic_steps.rb ADDED

@@ -0,0 +1,84 @@
+require 'swissparser'
+require 'rspec'
+Given /^sample data:$/ do |string|
+  @data = string
+end
+Given /^the default rules$/ do
+  @rules = Swiss::DefaultRules
+end
+Given /^I set the separator to "([^\"]*)"$/ do |sep|
+  @rules = @rules.refine do
+    set_separator( sep )
+  end
+end
+Given /^I define a simple rule to extract "([^\"]*)"$/ do |key|
+  @rules = @rules.refine do
+    with( key ) do |content|
+      @text = content
+    end
+  end
+end
+Given /^I define a simple rule to extract text after "([^\"]*)"$/ do |key|
+  @rules = @rules.refine do
+    with_text_after( key ) do |content|
+      @text = "" if @text.nil?
+      @text << content
+    end
+  end
+end
+Given /^I define a simple rule to add "([^\"]*)" to an array$/ do |key|
+  @rules = @rules.refine do
+    with( key ) do |content|
+      @text = [] if @text.nil?
+      @text << content
+    end
+  end
+end
+Given /^I define a simple rule to return "([^\"]*)" with "([^\"]*)"$/ do |val, key|
+   @rules = @rules.refine do
+    with( key ) do |content|
+      @text = val
+    end
+  end
+end
+Given /^I define a parser which counts entry$/ do
+  @parser =  Swiss::Parser.new(@rules) do |entries|
+    entries.size
+  end
+end
+Given /^I define a simple parser which returns an array$/ do
+  @parser = Swiss::Parser.new(@rules) do |entries|
+    result = []
+    entries.each do |entry|
+      result << entry.text
+    end
+    result
+  end
+end
+Given /^I run the parser on sample data$/ do
+  @result = if @opt.nil?
+              @parser.parse( @data )
+            else
+              @parser.parse( @data, @opt )
+            end
+end
+Then /^the result evals to "([^\"]*)"$/ do |expected|
+  obj = eval( expected )
+  @result.should == obj
+end
+Then /^the result is "([^\"]*)"$/ do |expected|
+  @result.to_s.should == expected
+end