swissparser 0.8.1 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.9.0 / 2009-11-14
2
+
3
+ * 1 new feature:
4
+ - SwissParser can now directly parse remote files using HTTP or FTP.
5
+
1
6
  == 0.8.1 / 2009-11-14
2
7
 
3
8
  * 1 bugfix:
@@ -6,8 +11,7 @@
6
11
  == 0.8.0 / 2009-11-14
7
12
 
8
13
  * 1 new feature:
9
- - helper methods are now defined with the helper method. When the
10
- parser is extended they can be overriden in a one per one basis.
14
+ - helper methods are now defined with the helper method. When the parser is extended they can be overriden in a one per one basis.
11
15
 
12
16
  == 0.7.0 / 2009-11-14
13
17
 
@@ -18,7 +22,7 @@
18
22
 
19
23
  == 0.6.0 / 2009-11-13
20
24
 
21
- * 2 new features
25
+ * 2 new features:
22
26
  - Parsing parameters are now accessed thru the +params+ method
23
27
  and are accessible to parsing rules too.
24
28
  - Helpers methods are now defined in an helper block and are
data/README.rdoc CHANGED
@@ -13,8 +13,9 @@ be stable enough to be used for bioinformatics research.
13
13
 
14
14
  * Defines parsers with a clear and compact declarative syntax.
15
15
  * The whole parsing workflow is configurable.
16
- * The user can create new parsers by extending existing parsers.
17
- * Parser have access to global parameters and user defined helper methods.
16
+ * Able to parse remote files accessible from a web or an FTP server.
17
+ * Users can create new parsers by extending existing parsers.
18
+ * A parser have access to global parameters and user defined helper methods.
18
19
 
19
20
  == USAGE:
20
21
 
@@ -0,0 +1,88 @@
1
+ =begin
2
+ Copyright (C) 2009 Paradigmatic
3
+
4
+ This file is part of SwissParser.
5
+
6
+ SwissParser is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ SwissParser is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
+ =end
19
+
20
+ #!/usr/bin/ruby -w
21
+
22
+ require 'yaml'
23
+ require 'swissparser.rb'
24
+
25
+ class Protein
26
+
27
+ attr_accessor :id, :size, :species, :taxonomy, :sequence
28
+
29
+ def initialize
30
+ @taxonomy = []
31
+ @sequence = ""
32
+ end
33
+
34
+ end
35
+
36
+ module Uniprot
37
+
38
+ Parser = Swiss::Parser.define do
39
+
40
+ # Each entry must be stored in a Protein instance
41
+ new_entry do
42
+ Protein.new
43
+ end
44
+
45
+ rules do
46
+
47
+ # Parse the uniprot id
48
+ with("ID") do |content,protein|
49
+ content =~ /([A-Z]\w+)\D+(\d+)/
50
+ protein.id = $1
51
+ protein.size = $2.to_i
52
+ end
53
+
54
+ # Parse the organism
55
+ with("OS") do |content,protein|
56
+ content =~ /(\w+ \w+)/
57
+ protein.species = $1
58
+ end
59
+
60
+ # Parse the complete taxonomy
61
+ with("OC") do |content,protein|
62
+ ary = content.gsub(".","").split("; ")
63
+ protein.taxonomy += ary
64
+ end
65
+
66
+ # Parse the Sequence
67
+ with_text_after("SQ") do |content,protein|
68
+ seq = content.strip.gsub(" ","")
69
+ protein.sequence += seq
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ if $0 == __FILE__
79
+
80
+ uri = ARGV.shift
81
+
82
+ entries = Uniprot::Parser.parse_URI( uri )
83
+
84
+ entries.each do |e|
85
+ puts e.to_yaml
86
+ end
87
+
88
+ end
data/lib/swissparser.rb CHANGED
@@ -17,9 +17,11 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
+ require 'open-uri'
21
+
20
22
  module Swiss
21
23
 
22
- VERSION = "0.8.1"
24
+ VERSION = "0.9.0"
23
25
 
24
26
  # This class defines parsing rules. Its methods
25
27
  # are accessible within the +rules+ section of
@@ -216,6 +218,26 @@ module Swiss
216
218
  # It returns the value specified in the +after+ block. By default,
217
219
  # it returns an array containing _entry_ objects.
218
220
  def parse_file( filename, params={} )
221
+ File.open( filename, 'r' ) do |file|
222
+ parse( file, params )
223
+ end
224
+ end
225
+
226
+ # Parses a file specified by an +URI+. Both http and ftp are
227
+ # supported. An optional hash of arbitrary arguments (+params+)
228
+ # can be specified. It is passed to the workflow methods blocks
229
+ # (+before+, +new_entry+, ...) It returns the value specified in
230
+ # the +after+ block. By default, it returns an array containing
231
+ # _entry_ objects.
232
+ def parse_URI( uri, params={} )
233
+ open( uri ) do |file|
234
+ parse( file, params )
235
+ end
236
+ end
237
+
238
+ private
239
+
240
+ def parse( file, params )
219
241
  @ctx = ParsingContext.new( params )
220
242
  helperModule = Module.new
221
243
  @helpers.each do |name, proc|
@@ -223,21 +245,17 @@ module Swiss
223
245
  end
224
246
  @ctx.extend( helperModule )
225
247
  container = @ctx.instance_exec( &@before )
226
- File.open( filename, 'r' ) do |file|
227
- entry = @ctx.instance_exec( &@begin )
228
- file.each_line do |line|
229
- state = parse_line( line, entry )
230
- if state == :end
231
- @ctx.instance_exec( entry, container, &@end )
232
- entry = @ctx.instance_exec( &@begin )
233
- end
248
+ entry = @ctx.instance_exec( &@begin )
249
+ file.each_line do |line|
250
+ state = parse_line( line, entry )
251
+ if state == :end
252
+ @ctx.instance_exec( entry, container, &@end )
253
+ entry = @ctx.instance_exec( &@begin )
234
254
  end
235
255
  end
236
256
  @ctx.instance_exec( container, &@after )
237
257
  end
238
258
 
239
- private
240
-
241
259
  PROTOTYPE = Parser.new
242
260
  PROTOTYPE.instance_eval do
243
261
  before { || [] }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - paradigmatic
@@ -41,6 +41,7 @@ files:
41
41
  - examples/data/kegg_enzyme_short.txt
42
42
  - examples/data/uniprot.txt
43
43
  - examples/kegg_demo.rb
44
+ - examples/parse_from_uri.rb
44
45
  - examples/signal_demo.rb
45
46
  - examples/tutorial_1.rb
46
47
  - examples/uniprot_param_demo.rb