swissparser 0.8.1 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.rdoc +7 -3
- data/README.rdoc +3 -2
- data/examples/parse_from_uri.rb +88 -0
- data/lib/swissparser.rb +29 -11
- metadata +2 -1
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
== 0.9.0 / 2009-11-14
|
2
|
+
|
3
|
+
* 1 new feature:
|
4
|
+
- SwissParser can now directly parse remote files using HTTP or FTP.
|
5
|
+
|
1
6
|
== 0.8.1 / 2009-11-14
|
2
7
|
|
3
8
|
* 1 bugfix:
|
@@ -6,8 +11,7 @@
|
|
6
11
|
== 0.8.0 / 2009-11-14
|
7
12
|
|
8
13
|
* 1 new feature:
|
9
|
-
- helper methods are now defined with the helper method. When the
|
10
|
-
parser is extended they can be overriden in a one per one basis.
|
14
|
+
- helper methods are now defined with the helper method. When the parser is extended they can be overriden in a one per one basis.
|
11
15
|
|
12
16
|
== 0.7.0 / 2009-11-14
|
13
17
|
|
@@ -18,7 +22,7 @@
|
|
18
22
|
|
19
23
|
== 0.6.0 / 2009-11-13
|
20
24
|
|
21
|
-
* 2 new features
|
25
|
+
* 2 new features:
|
22
26
|
- Parsing parameters are now accessed thru the +params+ method
|
23
27
|
and are accessible to parsing rules too.
|
24
28
|
- Helpers methods are now defined in an helper block and are
|
data/README.rdoc
CHANGED
@@ -13,8 +13,9 @@ be stable enough to be used for bioinformatics research.
|
|
13
13
|
|
14
14
|
* Defines parsers with a clear and compact declarative syntax.
|
15
15
|
* The whole parsing workflow is configurable.
|
16
|
-
*
|
17
|
-
*
|
16
|
+
* Able to parse remote files accessible from a web or an FTP server.
|
17
|
+
* Users can create new parsers by extending existing parsers.
|
18
|
+
* A parser have access to global parameters and user defined helper methods.
|
18
19
|
|
19
20
|
== USAGE:
|
20
21
|
|
@@ -0,0 +1,88 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright (C) 2009 Paradigmatic
|
3
|
+
|
4
|
+
This file is part of SwissParser.
|
5
|
+
|
6
|
+
SwissParser is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
SwissParser is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
=end
|
19
|
+
|
20
|
+
#!/usr/bin/ruby -w
|
21
|
+
|
22
|
+
require 'yaml'
|
23
|
+
require 'swissparser.rb'
|
24
|
+
|
25
|
+
class Protein
|
26
|
+
|
27
|
+
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@taxonomy = []
|
31
|
+
@sequence = ""
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
module Uniprot
|
37
|
+
|
38
|
+
Parser = Swiss::Parser.define do
|
39
|
+
|
40
|
+
# Each entry must be stored in a Protein instance
|
41
|
+
new_entry do
|
42
|
+
Protein.new
|
43
|
+
end
|
44
|
+
|
45
|
+
rules do
|
46
|
+
|
47
|
+
# Parse the uniprot id
|
48
|
+
with("ID") do |content,protein|
|
49
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
50
|
+
protein.id = $1
|
51
|
+
protein.size = $2.to_i
|
52
|
+
end
|
53
|
+
|
54
|
+
# Parse the organism
|
55
|
+
with("OS") do |content,protein|
|
56
|
+
content =~ /(\w+ \w+)/
|
57
|
+
protein.species = $1
|
58
|
+
end
|
59
|
+
|
60
|
+
# Parse the complete taxonomy
|
61
|
+
with("OC") do |content,protein|
|
62
|
+
ary = content.gsub(".","").split("; ")
|
63
|
+
protein.taxonomy += ary
|
64
|
+
end
|
65
|
+
|
66
|
+
# Parse the Sequence
|
67
|
+
with_text_after("SQ") do |content,protein|
|
68
|
+
seq = content.strip.gsub(" ","")
|
69
|
+
protein.sequence += seq
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
if $0 == __FILE__
|
79
|
+
|
80
|
+
uri = ARGV.shift
|
81
|
+
|
82
|
+
entries = Uniprot::Parser.parse_URI( uri )
|
83
|
+
|
84
|
+
entries.each do |e|
|
85
|
+
puts e.to_yaml
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
data/lib/swissparser.rb
CHANGED
@@ -17,9 +17,11 @@ You should have received a copy of the GNU General Public License
|
|
17
17
|
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
=end
|
19
19
|
|
20
|
+
require 'open-uri'
|
21
|
+
|
20
22
|
module Swiss
|
21
23
|
|
22
|
-
VERSION = "0.
|
24
|
+
VERSION = "0.9.0"
|
23
25
|
|
24
26
|
# This class defines parsing rules. Its methods
|
25
27
|
# are accessible within the +rules+ section of
|
@@ -216,6 +218,26 @@ module Swiss
|
|
216
218
|
# It returns the value specified in the +after+ block. By default,
|
217
219
|
# it returns an array containing _entry_ objects.
|
218
220
|
def parse_file( filename, params={} )
|
221
|
+
File.open( filename, 'r' ) do |file|
|
222
|
+
parse( file, params )
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
# Parses a file specified by an +URI+. Both http and ftp are
|
227
|
+
# supported. An optional hash of arbitrary arguments (+params+)
|
228
|
+
# can be specified. It is passed to the workflow methods blocks
|
229
|
+
# (+before+, +new_entry+, ...) It returns the value specified in
|
230
|
+
# the +after+ block. By default, it returns an array containing
|
231
|
+
# _entry_ objects.
|
232
|
+
def parse_URI( uri, params={} )
|
233
|
+
open( uri ) do |file|
|
234
|
+
parse( file, params )
|
235
|
+
end
|
236
|
+
end
|
237
|
+
|
238
|
+
private
|
239
|
+
|
240
|
+
def parse( file, params )
|
219
241
|
@ctx = ParsingContext.new( params )
|
220
242
|
helperModule = Module.new
|
221
243
|
@helpers.each do |name, proc|
|
@@ -223,21 +245,17 @@ module Swiss
|
|
223
245
|
end
|
224
246
|
@ctx.extend( helperModule )
|
225
247
|
container = @ctx.instance_exec( &@before )
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
entry = @ctx.instance_exec( &@begin )
|
233
|
-
end
|
248
|
+
entry = @ctx.instance_exec( &@begin )
|
249
|
+
file.each_line do |line|
|
250
|
+
state = parse_line( line, entry )
|
251
|
+
if state == :end
|
252
|
+
@ctx.instance_exec( entry, container, &@end )
|
253
|
+
entry = @ctx.instance_exec( &@begin )
|
234
254
|
end
|
235
255
|
end
|
236
256
|
@ctx.instance_exec( container, &@after )
|
237
257
|
end
|
238
258
|
|
239
|
-
private
|
240
|
-
|
241
259
|
PROTOTYPE = Parser.new
|
242
260
|
PROTOTYPE.instance_eval do
|
243
261
|
before { || [] }
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.9.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- paradigmatic
|
@@ -41,6 +41,7 @@ files:
|
|
41
41
|
- examples/data/kegg_enzyme_short.txt
|
42
42
|
- examples/data/uniprot.txt
|
43
43
|
- examples/kegg_demo.rb
|
44
|
+
- examples/parse_from_uri.rb
|
44
45
|
- examples/signal_demo.rb
|
45
46
|
- examples/tutorial_1.rb
|
46
47
|
- examples/uniprot_param_demo.rb
|