swissparser 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +8 -0
- data/examples/kegg_demo.rb +9 -4
- data/examples/signal_demo.rb +3 -3
- data/examples/uniprot_param_demo.rb +85 -0
- data/lib/swiss_parser.rb +73 -26
- metadata +2 -1
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
== 0.6.0 / 2009-11-13
|
2
|
+
|
3
|
+
* 2 new features
|
4
|
+
- Parsing parameters are now accessed thru the +params+ method
|
5
|
+
and are accessible to parsing rules too.
|
6
|
+
- Helpers methods are now defined in an helper block and are
|
7
|
+
accessible to actions too.
|
8
|
+
|
1
9
|
== 0.5.1 / 2009-11-13
|
2
10
|
|
3
11
|
* Added basic RDOC documentation.
|
data/examples/kegg_demo.rb
CHANGED
@@ -30,12 +30,13 @@ end
|
|
30
30
|
enzyme_parser = Swiss::Parser.define do
|
31
31
|
|
32
32
|
|
33
|
-
new_entry do
|
33
|
+
new_entry do
|
34
34
|
{ :genes => [] }
|
35
35
|
end
|
36
|
-
|
37
|
-
rules do
|
38
36
|
|
37
|
+
|
38
|
+
helpers do
|
39
|
+
|
39
40
|
def parse_gene_ids( string, entry )
|
40
41
|
string.split(" ").each do |item|
|
41
42
|
if item =~ /(\d+)\(\w+\)/
|
@@ -43,7 +44,11 @@ enzyme_parser = Swiss::Parser.define do
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
47
|
+
end
|
46
48
|
|
49
|
+
|
50
|
+
rules do
|
51
|
+
|
47
52
|
human = "HSA"
|
48
53
|
|
49
54
|
set_separator( "///" )
|
@@ -76,7 +81,7 @@ enzyme_parser = Swiss::Parser.define do
|
|
76
81
|
|
77
82
|
end
|
78
83
|
|
79
|
-
finish_entry do |entry,container
|
84
|
+
finish_entry do |entry,container|
|
80
85
|
if entry[:genes].size > 0
|
81
86
|
e = Enzyme.new
|
82
87
|
e.id = entry[:id]
|
data/examples/signal_demo.rb
CHANGED
@@ -53,11 +53,11 @@ end
|
|
53
53
|
|
54
54
|
stat_parser = parser.extend do
|
55
55
|
|
56
|
-
before do
|
56
|
+
before do
|
57
57
|
{ :min => 1_000, :max => 0, :sum => 0, :n => 0 }
|
58
58
|
end
|
59
59
|
|
60
|
-
finish_entry do |entry,h
|
60
|
+
finish_entry do |entry,h|
|
61
61
|
if entry.size < h[:min]
|
62
62
|
h[:min] = entry.size
|
63
63
|
end
|
@@ -68,7 +68,7 @@ stat_parser = parser.extend do
|
|
68
68
|
h[:n] += 1
|
69
69
|
end
|
70
70
|
|
71
|
-
after do |h
|
71
|
+
after do |h|
|
72
72
|
h[:average] = h[:sum].to_f / h[:n]
|
73
73
|
h
|
74
74
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright (C) 2009 Paradigmatic
|
3
|
+
|
4
|
+
This file is part of SwissParser.
|
5
|
+
|
6
|
+
SwissParser is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
SwissParser is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
=end
|
19
|
+
|
20
|
+
#!/usr/bin/ruby -w
|
21
|
+
|
22
|
+
require 'yaml'
|
23
|
+
require 'swiss_parser.rb'
|
24
|
+
|
25
|
+
class Protein
|
26
|
+
|
27
|
+
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@taxonomy = []
|
31
|
+
@sequence = ""
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
uniprot_parser = Swiss::Parser.define do
|
38
|
+
|
39
|
+
new_entry do
|
40
|
+
puts param(:msg)
|
41
|
+
Protein.new
|
42
|
+
end
|
43
|
+
|
44
|
+
rules do
|
45
|
+
|
46
|
+
with("ID") do |content,protein|
|
47
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
48
|
+
protein.id = $1
|
49
|
+
protein.size = $2.to_i
|
50
|
+
end
|
51
|
+
|
52
|
+
with("OS") do |content,protein|
|
53
|
+
content =~ /(\w+ \w+)/
|
54
|
+
protein.species = $1
|
55
|
+
end
|
56
|
+
|
57
|
+
with("OC") do |content,protein|
|
58
|
+
ary = content.gsub(".","").split("; ")
|
59
|
+
protein.taxonomy += ary
|
60
|
+
end
|
61
|
+
|
62
|
+
with_text_after("SQ") do |content,protein|
|
63
|
+
puts param(:found_seq)
|
64
|
+
seq = content.strip.gsub(" ","")
|
65
|
+
protein.sequence += seq
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
if $0 == __FILE__
|
74
|
+
|
75
|
+
filename = ARGV.shift
|
76
|
+
|
77
|
+
entries = uniprot_parser.parse_file( filename, :msg => "Hello", :found_seq => "Youpie" )
|
78
|
+
|
79
|
+
puts entries.size
|
80
|
+
|
81
|
+
entries.each do |e|
|
82
|
+
puts e.to_yaml
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
data/lib/swiss_parser.rb
CHANGED
@@ -17,11 +17,9 @@ You should have received a copy of the GNU General Public License
|
|
17
17
|
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
=end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
20
|
module Swiss
|
23
21
|
|
24
|
-
VERSION = "0.
|
22
|
+
VERSION = "0.6.0"
|
25
23
|
|
26
24
|
# This class defines parsing rules. Its methods
|
27
25
|
# are accessible within the +rules+ section of
|
@@ -59,6 +57,47 @@ module Swiss
|
|
59
57
|
|
60
58
|
end
|
61
59
|
|
60
|
+
# Methods of this class are accessible to rules and actions.
|
61
|
+
# Methods defined in +helpers+ block are added to this class.
|
62
|
+
class ParsingContext
|
63
|
+
|
64
|
+
def initialize(parameters)
|
65
|
+
@params = parameters
|
66
|
+
end
|
67
|
+
|
68
|
+
# Retrieves a parsing parameter by key. Returns nil if
|
69
|
+
# there is no parameter with the provided key.
|
70
|
+
def param( key )
|
71
|
+
@params[key]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
module InstanceExecHelper #:nodoc:
|
76
|
+
end
|
77
|
+
|
78
|
+
include InstanceExecHelper
|
79
|
+
|
80
|
+
#Used to execute rules and action using the ParsingContext as context
|
81
|
+
#Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
|
82
|
+
def instance_exec(*args, &block)
|
83
|
+
begin
|
84
|
+
old_critical, Thread.critical = Thread.critical, true
|
85
|
+
n = 0
|
86
|
+
n += 1 while respond_to?(mname="__instance_exec#{n}")
|
87
|
+
InstanceExecHelper.module_eval{ define_method(mname, &block) }
|
88
|
+
ensure
|
89
|
+
Thread.critical = old_critical
|
90
|
+
end
|
91
|
+
begin
|
92
|
+
ret = send(mname, *args)
|
93
|
+
ensure
|
94
|
+
InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
|
95
|
+
end
|
96
|
+
ret
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
62
101
|
|
63
102
|
# Parser for a typical bioinformatic flat file.
|
64
103
|
class Parser
|
@@ -73,8 +112,9 @@ module Swiss
|
|
73
112
|
@separator = DEFAULT_SEPARATOR
|
74
113
|
@actions = {}
|
75
114
|
@actions[:text] = {}
|
76
|
-
|
77
|
-
|
115
|
+
@helpers = lambda {}
|
116
|
+
elsif args.size == 7
|
117
|
+
actions,separator,before,the_begin,the_end,after,helpers = *args
|
78
118
|
@actions = actions.clone
|
79
119
|
@actions[:text] = actions[:text].clone
|
80
120
|
@separator = separator
|
@@ -82,47 +122,52 @@ module Swiss
|
|
82
122
|
@end = the_end
|
83
123
|
@begin = the_begin
|
84
124
|
@after = after
|
125
|
+
@helpers = helpers
|
85
126
|
else
|
86
|
-
raise "Wrong arg number, either 0 or
|
127
|
+
raise "Wrong arg number, either 0 or 7."
|
87
128
|
end
|
129
|
+
@ctx = nil
|
88
130
|
end
|
89
131
|
|
90
132
|
# Defines how to create the _entry_ _object_. The +proc+
|
91
|
-
#
|
92
|
-
#
|
133
|
+
# does not take arguments, but it must return a new
|
134
|
+
# _entry_ _object_.
|
93
135
|
# Default:: creates an empty hash.
|
94
136
|
def new_entry(&proc)
|
95
137
|
@begin = proc
|
96
138
|
end
|
97
139
|
|
98
140
|
# Defines how to finalize an _entry_ _object_. The +proc+
|
99
|
-
# takes
|
141
|
+
# takes two arguments:
|
100
142
|
# * The entry object ready to be finalized
|
101
143
|
# * The context object
|
102
|
-
# * An hash containing parsing options.
|
103
144
|
# Default:: Adds the entry object to the context object using +<<+ method.
|
104
145
|
def finish_entry(&proc)
|
105
146
|
@end = proc
|
106
147
|
end
|
107
148
|
|
108
149
|
# Defines how to set the context before using the parser.
|
109
|
-
# The +proc+
|
110
|
-
# parsing options. It must return a _context_ object.
|
150
|
+
# The +proc+ does not take arguments. It must return a _context_ object.
|
111
151
|
# Default:: creates an empty array
|
112
152
|
def before (&proc)
|
113
153
|
@before = proc
|
114
154
|
end
|
115
155
|
|
116
156
|
# Defines how to finalize the whole parsing.
|
117
|
-
# The +proc+ takes
|
157
|
+
# The +proc+ takes a single argument:
|
118
158
|
# * The context object
|
119
|
-
# * An hash containing parsing options.
|
120
159
|
# The value returned by the +proc+ is then returned by the parsing method.
|
121
160
|
# Default:: just returns the context object.
|
122
161
|
def after(&proc)
|
123
162
|
@after = proc
|
124
163
|
end
|
125
164
|
|
165
|
+
# Helpers methods accessible to rules and actions can be
|
166
|
+
# defined using this method.
|
167
|
+
def helpers(&proc)
|
168
|
+
@helpers = proc
|
169
|
+
end
|
170
|
+
|
126
171
|
# Defines parsing rules inside a parser definition. The ParsingRules
|
127
172
|
# methods can then be called inside the proc.
|
128
173
|
def rules(&proc)
|
@@ -149,7 +194,7 @@ module Swiss
|
|
149
194
|
# After extension, the new parser is independent of the original one,
|
150
195
|
# i.e. a change to the original parser will not affect the derived one.
|
151
196
|
def extend(&proc)
|
152
|
-
clone = Parser.new( @actions, @separator, @before, @begin, @end, @after )
|
197
|
+
clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
|
153
198
|
clone.instance_eval( &proc )
|
154
199
|
clone
|
155
200
|
end
|
@@ -165,28 +210,30 @@ module Swiss
|
|
165
210
|
# It returns the value specified in the +after+ block. By default,
|
166
211
|
# it returns an array containing _entry_ objects.
|
167
212
|
def parse_file( filename, params={} )
|
168
|
-
|
213
|
+
@ctx = ParsingContext.new( params )
|
214
|
+
@ctx.instance_exec( &@helpers )
|
215
|
+
container = @ctx.instance_exec( &@before )
|
169
216
|
File.open( filename, 'r' ) do |file|
|
170
|
-
entry = @
|
217
|
+
entry = @ctx.instance_exec( &@begin )
|
171
218
|
file.each_line do |line|
|
172
219
|
state = parse_line( line, entry )
|
173
220
|
if state == :end
|
174
|
-
@
|
175
|
-
entry = @
|
221
|
+
@ctx.instance_exec( entry, container, &@end )
|
222
|
+
entry = @ctx.instance_exec( &@begin )
|
176
223
|
end
|
177
224
|
end
|
178
225
|
end
|
179
|
-
@
|
226
|
+
@ctx.instance_exec( container, &@after )
|
180
227
|
end
|
181
228
|
|
182
229
|
private
|
183
230
|
|
184
231
|
PROTOTYPE = Parser.new
|
185
232
|
PROTOTYPE.instance_eval do
|
186
|
-
before {
|
187
|
-
new_entry {
|
188
|
-
finish_entry {|e,c
|
189
|
-
after {|c
|
233
|
+
before { || [] }
|
234
|
+
new_entry { || {} }
|
235
|
+
finish_entry {|e,c| c << e }
|
236
|
+
after {|c| c }
|
190
237
|
end
|
191
238
|
|
192
239
|
|
@@ -198,12 +245,12 @@ module Swiss
|
|
198
245
|
key,value = $1,$2
|
199
246
|
@last_key = key
|
200
247
|
if @actions[key]
|
201
|
-
@
|
248
|
+
@ctx.instance_exec( value, holder, &@actions[key] )
|
202
249
|
end
|
203
250
|
:parsing
|
204
251
|
else
|
205
252
|
if @actions[:text][@last_key]
|
206
|
-
@actions[:text][@last_key]
|
253
|
+
@ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
|
207
254
|
end
|
208
255
|
:parsing
|
209
256
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- paradigmatic
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- examples/kegg_demo.rb
|
43
43
|
- examples/signal_demo.rb
|
44
44
|
- examples/uniprot_demo.rb
|
45
|
+
- examples/uniprot_param_demo.rb
|
45
46
|
- lib/swiss_parser.rb
|
46
47
|
has_rdoc: true
|
47
48
|
homepage: http://github.com/paradigmatic/SwissParser
|