swissparser 0.8.1 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG.rdoc CHANGED
@@ -1,3 +1,8 @@
1
+ == 0.9.0 / 2009-11-14
2
+
3
+ * 1 new feature:
4
+ - SwissParser can now directly parse remote files using HTTP or FTP.
5
+
1
6
  == 0.8.1 / 2009-11-14
2
7
 
3
8
  * 1 bugfix:
@@ -6,8 +11,7 @@
6
11
  == 0.8.0 / 2009-11-14
7
12
 
8
13
  * 1 new feature:
9
- - helper methods are now defined with the helper method. When the
10
- parser is extended they can be overriden in a one per one basis.
14
+ - helper methods are now defined with the helper method. When the parser is extended they can be overriden in a one per one basis.
11
15
 
12
16
  == 0.7.0 / 2009-11-14
13
17
 
@@ -18,7 +22,7 @@
18
22
 
19
23
  == 0.6.0 / 2009-11-13
20
24
 
21
- * 2 new features
25
+ * 2 new features:
22
26
  - Parsing parameters are now accessed thru the +params+ method
23
27
  and are accessible to parsing rules too.
24
28
  - Helpers methods are now defined in an helper block and are
data/README.rdoc CHANGED
@@ -13,8 +13,9 @@ be stable enough to be used for bioinformatics research.
13
13
 
14
14
  * Defines parsers with a clear and compact declarative syntax.
15
15
  * The whole parsing workflow is configurable.
16
- * The user can create new parsers by extending existing parsers.
17
- * Parser have access to global parameters and user defined helper methods.
16
+ * Able to parse remote files accessible from a web or an FTP server.
17
+ * Users can create new parsers by extending existing parsers.
18
+ * A parser have access to global parameters and user defined helper methods.
18
19
 
19
20
  == USAGE:
20
21
 
@@ -0,0 +1,88 @@
1
+ =begin
2
+ Copyright (C) 2009 Paradigmatic
3
+
4
+ This file is part of SwissParser.
5
+
6
+ SwissParser is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ SwissParser is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
+ =end
19
+
20
+ #!/usr/bin/ruby -w
21
+
22
+ require 'yaml'
23
+ require 'swissparser.rb'
24
+
25
+ class Protein
26
+
27
+ attr_accessor :id, :size, :species, :taxonomy, :sequence
28
+
29
+ def initialize
30
+ @taxonomy = []
31
+ @sequence = ""
32
+ end
33
+
34
+ end
35
+
36
+ module Uniprot
37
+
38
+ Parser = Swiss::Parser.define do
39
+
40
+ # Each entry must be stored in a Protein instance
41
+ new_entry do
42
+ Protein.new
43
+ end
44
+
45
+ rules do
46
+
47
+ # Parse the uniprot id
48
+ with("ID") do |content,protein|
49
+ content =~ /([A-Z]\w+)\D+(\d+)/
50
+ protein.id = $1
51
+ protein.size = $2.to_i
52
+ end
53
+
54
+ # Parse the organism
55
+ with("OS") do |content,protein|
56
+ content =~ /(\w+ \w+)/
57
+ protein.species = $1
58
+ end
59
+
60
+ # Parse the complete taxonomy
61
+ with("OC") do |content,protein|
62
+ ary = content.gsub(".","").split("; ")
63
+ protein.taxonomy += ary
64
+ end
65
+
66
+ # Parse the Sequence
67
+ with_text_after("SQ") do |content,protein|
68
+ seq = content.strip.gsub(" ","")
69
+ protein.sequence += seq
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ if $0 == __FILE__
79
+
80
+ uri = ARGV.shift
81
+
82
+ entries = Uniprot::Parser.parse_URI( uri )
83
+
84
+ entries.each do |e|
85
+ puts e.to_yaml
86
+ end
87
+
88
+ end
data/lib/swissparser.rb CHANGED
@@ -17,9 +17,11 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
+ require 'open-uri'
21
+
20
22
  module Swiss
21
23
 
22
- VERSION = "0.8.1"
24
+ VERSION = "0.9.0"
23
25
 
24
26
  # This class defines parsing rules. Its methods
25
27
  # are accessible within the +rules+ section of
@@ -216,6 +218,26 @@ module Swiss
216
218
  # It returns the value specified in the +after+ block. By default,
217
219
  # it returns an array containing _entry_ objects.
218
220
  def parse_file( filename, params={} )
221
+ File.open( filename, 'r' ) do |file|
222
+ parse( file, params )
223
+ end
224
+ end
225
+
226
+ # Parses a file specified by an +URI+. Both http and ftp are
227
+ # supported. An optional hash of arbitrary arguments (+params+)
228
+ # can be specified. It is passed to the workflow methods blocks
229
+ # (+before+, +new_entry+, ...) It returns the value specified in
230
+ # the +after+ block. By default, it returns an array containing
231
+ # _entry_ objects.
232
+ def parse_URI( uri, params={} )
233
+ open( uri ) do |file|
234
+ parse( file, params )
235
+ end
236
+ end
237
+
238
+ private
239
+
240
+ def parse( file, params )
219
241
  @ctx = ParsingContext.new( params )
220
242
  helperModule = Module.new
221
243
  @helpers.each do |name, proc|
@@ -223,21 +245,17 @@ module Swiss
223
245
  end
224
246
  @ctx.extend( helperModule )
225
247
  container = @ctx.instance_exec( &@before )
226
- File.open( filename, 'r' ) do |file|
227
- entry = @ctx.instance_exec( &@begin )
228
- file.each_line do |line|
229
- state = parse_line( line, entry )
230
- if state == :end
231
- @ctx.instance_exec( entry, container, &@end )
232
- entry = @ctx.instance_exec( &@begin )
233
- end
248
+ entry = @ctx.instance_exec( &@begin )
249
+ file.each_line do |line|
250
+ state = parse_line( line, entry )
251
+ if state == :end
252
+ @ctx.instance_exec( entry, container, &@end )
253
+ entry = @ctx.instance_exec( &@begin )
234
254
  end
235
255
  end
236
256
  @ctx.instance_exec( container, &@after )
237
257
  end
238
258
 
239
- private
240
-
241
259
  PROTOTYPE = Parser.new
242
260
  PROTOTYPE.instance_eval do
243
261
  before { || [] }
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.1
4
+ version: 0.9.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - paradigmatic
@@ -41,6 +41,7 @@ files:
41
41
  - examples/data/kegg_enzyme_short.txt
42
42
  - examples/data/uniprot.txt
43
43
  - examples/kegg_demo.rb
44
+ - examples/parse_from_uri.rb
44
45
  - examples/signal_demo.rb
45
46
  - examples/tutorial_1.rb
46
47
  - examples/uniprot_param_demo.rb