swissparser 0.8.1 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +7 -3
- data/README.rdoc +3 -2
- data/examples/parse_from_uri.rb +88 -0
- data/lib/swissparser.rb +29 -11
- metadata +2 -1
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== 0.9.0 / 2009-11-14
|
2
|
+
|
3
|
+
* 1 new feature:
|
4
|
+
- SwissParser can now directly parse remote files using HTTP or FTP.
|
5
|
+
|
1
6
|
== 0.8.1 / 2009-11-14
|
2
7
|
|
3
8
|
* 1 bugfix:
|
@@ -6,8 +11,7 @@
|
|
6
11
|
== 0.8.0 / 2009-11-14
|
7
12
|
|
8
13
|
* 1 new feature:
|
9
|
-
- helper methods are now defined with the helper method. When the
|
10
|
-
parser is extended they can be overriden in a one per one basis.
|
14
|
+
- helper methods are now defined with the helper method. When the parser is extended they can be overriden in a one per one basis.
|
11
15
|
|
12
16
|
== 0.7.0 / 2009-11-14
|
13
17
|
|
@@ -18,7 +22,7 @@
|
|
18
22
|
|
19
23
|
== 0.6.0 / 2009-11-13
|
20
24
|
|
21
|
-
* 2 new features
|
25
|
+
* 2 new features:
|
22
26
|
- Parsing parameters are now accessed thru the +params+ method
|
23
27
|
and are accessible to parsing rules too.
|
24
28
|
- Helpers methods are now defined in an helper block and are
|
data/README.rdoc
CHANGED
@@ -13,8 +13,9 @@ be stable enough to be used for bioinformatics research.
|
|
13
13
|
|
14
14
|
* Defines parsers with a clear and compact declarative syntax.
|
15
15
|
* The whole parsing workflow is configurable.
|
16
|
-
*
|
17
|
-
*
|
16
|
+
* Able to parse remote files accessible from a web or an FTP server.
|
17
|
+
* Users can create new parsers by extending existing parsers.
|
18
|
+
* A parser have access to global parameters and user defined helper methods.
|
18
19
|
|
19
20
|
== USAGE:
|
20
21
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright (C) 2009 Paradigmatic
|
3
|
+
|
4
|
+
This file is part of SwissParser.
|
5
|
+
|
6
|
+
SwissParser is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
SwissParser is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
=end
|
19
|
+
|
20
|
+
#!/usr/bin/ruby -w
|
21
|
+
|
22
|
+
require 'yaml'
|
23
|
+
require 'swissparser.rb'
|
24
|
+
|
25
|
+
class Protein
|
26
|
+
|
27
|
+
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@taxonomy = []
|
31
|
+
@sequence = ""
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
module Uniprot
|
37
|
+
|
38
|
+
Parser = Swiss::Parser.define do
|
39
|
+
|
40
|
+
# Each entry must be stored in a Protein instance
|
41
|
+
new_entry do
|
42
|
+
Protein.new
|
43
|
+
end
|
44
|
+
|
45
|
+
rules do
|
46
|
+
|
47
|
+
# Parse the uniprot id
|
48
|
+
with("ID") do |content,protein|
|
49
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
+
protein.id = $1
|
51
|
+
protein.size = $2.to_i
|
52
|
+
end
|
53
|
+
|
54
|
+
# Parse the organism
|
55
|
+
with("OS") do |content,protein|
|
56
|
+
content =~ /(\w+ \w+)/
|
57
|
+
protein.species = $1
|
58
|
+
end
|
59
|
+
|
60
|
+
# Parse the complete taxonomy
|
61
|
+
with("OC") do |content,protein|
|
62
|
+
ary = content.gsub(".","").split("; ")
|
63
|
+
protein.taxonomy += ary
|
64
|
+
end
|
65
|
+
|
66
|
+
# Parse the Sequence
|
67
|
+
with_text_after("SQ") do |content,protein|
|
68
|
+
seq = content.strip.gsub(" ","")
|
69
|
+
protein.sequence += seq
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
if $0 == __FILE__
|
79
|
+
|
80
|
+
uri = ARGV.shift
|
81
|
+
|
82
|
+
entries = Uniprot::Parser.parse_URI( uri )
|
83
|
+
|
84
|
+
entries.each do |e|
|
85
|
+
puts e.to_yaml
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
data/lib/swissparser.rb
CHANGED
@@ -17,9 +17,11 @@ You should have received a copy of the GNU General Public License
|
|
17
17
|
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
=end
|
19
19
|
|
20
|
+
require 'open-uri'
|
21
|
+
|
20
22
|
module Swiss
|
21
23
|
|
22
|
-
VERSION = "0.
|
24
|
+
VERSION = "0.9.0"
|
23
25
|
|
24
26
|
# This class defines parsing rules. Its methods
|
25
27
|
# are accessible within the +rules+ section of
|
@@ -216,6 +218,26 @@ module Swiss
|
|
216
218
|
# It returns the value specified in the +after+ block. By default,
|
217
219
|
# it returns an array containing _entry_ objects.
|
218
220
|
def parse_file( filename, params={} )
|
221
|
+
File.open( filename, 'r' ) do |file|
|
222
|
+
parse( file, params )
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Parses a file specified by an +URI+. Both http and ftp are
|
227
|
+
# supported. An optional hash of arbitrary arguments (+params+)
|
228
|
+
# can be specified. It is passed to the workflow methods blocks
|
229
|
+
# (+before+, +new_entry+, ...) It returns the value specified in
|
230
|
+
# the +after+ block. By default, it returns an array containing
|
231
|
+
# _entry_ objects.
|
232
|
+
def parse_URI( uri, params={} )
|
233
|
+
open( uri ) do |file|
|
234
|
+
parse( file, params )
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
private
|
239
|
+
|
240
|
+
def parse( file, params )
|
219
241
|
@ctx = ParsingContext.new( params )
|
220
242
|
helperModule = Module.new
|
221
243
|
@helpers.each do |name, proc|
|
@@ -223,21 +245,17 @@ module Swiss
|
|
223
245
|
end
|
224
246
|
@ctx.extend( helperModule )
|
225
247
|
container = @ctx.instance_exec( &@before )
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
entry = @ctx.instance_exec( &@begin )
|
233
|
-
end
|
248
|
+
entry = @ctx.instance_exec( &@begin )
|
249
|
+
file.each_line do |line|
|
250
|
+
state = parse_line( line, entry )
|
251
|
+
if state == :end
|
252
|
+
@ctx.instance_exec( entry, container, &@end )
|
253
|
+
entry = @ctx.instance_exec( &@begin )
|
234
254
|
end
|
235
255
|
end
|
236
256
|
@ctx.instance_exec( container, &@after )
|
237
257
|
end
|
238
258
|
|
239
|
-
private
|
240
|
-
|
241
259
|
PROTOTYPE = Parser.new
|
242
260
|
PROTOTYPE.instance_eval do
|
243
261
|
before { || [] }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- paradigmatic
|
@@ -41,6 +41,7 @@ files:
|
|
41
41
|
- examples/data/kegg_enzyme_short.txt
|
42
42
|
- examples/data/uniprot.txt
|
43
43
|
- examples/kegg_demo.rb
|
44
|
+
- examples/parse_from_uri.rb
|
44
45
|
- examples/signal_demo.rb
|
45
46
|
- examples/tutorial_1.rb
|
46
47
|
- examples/uniprot_param_demo.rb
|