swissparser 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +8 -0
- data/examples/kegg_demo.rb +9 -4
- data/examples/signal_demo.rb +3 -3
- data/examples/uniprot_param_demo.rb +85 -0
- data/lib/swiss_parser.rb +73 -26
- metadata +2 -1
data/History.txt
CHANGED
@@ -1,3 +1,11 @@
|
|
1
|
+
== 0.6.0 / 2009-11-13
|
2
|
+
|
3
|
+
* 2 new features
|
4
|
+
- Parsing parameters are now accessed thru the +params+ method
|
5
|
+
and are accessible to parsing rules too.
|
6
|
+
- Helpers methods are now defined in an helper block and are
|
7
|
+
accessible to actions too.
|
8
|
+
|
1
9
|
== 0.5.1 / 2009-11-13
|
2
10
|
|
3
11
|
* Added basic RDOC documentation.
|
data/examples/kegg_demo.rb
CHANGED
@@ -30,12 +30,13 @@ end
|
|
30
30
|
enzyme_parser = Swiss::Parser.define do
|
31
31
|
|
32
32
|
|
33
|
-
new_entry do
|
33
|
+
new_entry do
|
34
34
|
{ :genes => [] }
|
35
35
|
end
|
36
|
-
|
37
|
-
rules do
|
38
36
|
|
37
|
+
|
38
|
+
helpers do
|
39
|
+
|
39
40
|
def parse_gene_ids( string, entry )
|
40
41
|
string.split(" ").each do |item|
|
41
42
|
if item =~ /(\d+)\(\w+\)/
|
@@ -43,7 +44,11 @@ enzyme_parser = Swiss::Parser.define do
|
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
47
|
+
end
|
46
48
|
|
49
|
+
|
50
|
+
rules do
|
51
|
+
|
47
52
|
human = "HSA"
|
48
53
|
|
49
54
|
set_separator( "///" )
|
@@ -76,7 +81,7 @@ enzyme_parser = Swiss::Parser.define do
|
|
76
81
|
|
77
82
|
end
|
78
83
|
|
79
|
-
finish_entry do |entry,container
|
84
|
+
finish_entry do |entry,container|
|
80
85
|
if entry[:genes].size > 0
|
81
86
|
e = Enzyme.new
|
82
87
|
e.id = entry[:id]
|
data/examples/signal_demo.rb
CHANGED
@@ -53,11 +53,11 @@ end
|
|
53
53
|
|
54
54
|
stat_parser = parser.extend do
|
55
55
|
|
56
|
-
before do
|
56
|
+
before do
|
57
57
|
{ :min => 1_000, :max => 0, :sum => 0, :n => 0 }
|
58
58
|
end
|
59
59
|
|
60
|
-
finish_entry do |entry,h
|
60
|
+
finish_entry do |entry,h|
|
61
61
|
if entry.size < h[:min]
|
62
62
|
h[:min] = entry.size
|
63
63
|
end
|
@@ -68,7 +68,7 @@ stat_parser = parser.extend do
|
|
68
68
|
h[:n] += 1
|
69
69
|
end
|
70
70
|
|
71
|
-
after do |h
|
71
|
+
after do |h|
|
72
72
|
h[:average] = h[:sum].to_f / h[:n]
|
73
73
|
h
|
74
74
|
end
|
@@ -0,0 +1,85 @@
|
|
1
|
+
=begin
|
2
|
+
Copyright (C) 2009 Paradigmatic
|
3
|
+
|
4
|
+
This file is part of SwissParser.
|
5
|
+
|
6
|
+
SwissParser is free software: you can redistribute it and/or modify
|
7
|
+
it under the terms of the GNU General Public License as published by
|
8
|
+
the Free Software Foundation, either version 3 of the License, or
|
9
|
+
(at your option) any later version.
|
10
|
+
|
11
|
+
SwissParser is distributed in the hope that it will be useful,
|
12
|
+
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
GNU General Public License for more details.
|
15
|
+
|
16
|
+
You should have received a copy of the GNU General Public License
|
17
|
+
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
=end
|
19
|
+
|
20
|
+
#!/usr/bin/ruby -w
|
21
|
+
|
22
|
+
require 'yaml'
|
23
|
+
require 'swiss_parser.rb'
|
24
|
+
|
25
|
+
class Protein
|
26
|
+
|
27
|
+
attr_accessor :id, :size, :species, :taxonomy, :sequence
|
28
|
+
|
29
|
+
def initialize
|
30
|
+
@taxonomy = []
|
31
|
+
@sequence = ""
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
|
37
|
+
uniprot_parser = Swiss::Parser.define do
|
38
|
+
|
39
|
+
new_entry do
|
40
|
+
puts param(:msg)
|
41
|
+
Protein.new
|
42
|
+
end
|
43
|
+
|
44
|
+
rules do
|
45
|
+
|
46
|
+
with("ID") do |content,protein|
|
47
|
+
content =~ /([A-Z]\w+)\D+(\d+)/
|
48
|
+
protein.id = $1
|
49
|
+
protein.size = $2.to_i
|
50
|
+
end
|
51
|
+
|
52
|
+
with("OS") do |content,protein|
|
53
|
+
content =~ /(\w+ \w+)/
|
54
|
+
protein.species = $1
|
55
|
+
end
|
56
|
+
|
57
|
+
with("OC") do |content,protein|
|
58
|
+
ary = content.gsub(".","").split("; ")
|
59
|
+
protein.taxonomy += ary
|
60
|
+
end
|
61
|
+
|
62
|
+
with_text_after("SQ") do |content,protein|
|
63
|
+
puts param(:found_seq)
|
64
|
+
seq = content.strip.gsub(" ","")
|
65
|
+
protein.sequence += seq
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
if $0 == __FILE__
|
74
|
+
|
75
|
+
filename = ARGV.shift
|
76
|
+
|
77
|
+
entries = uniprot_parser.parse_file( filename, :msg => "Hello", :found_seq => "Youpie" )
|
78
|
+
|
79
|
+
puts entries.size
|
80
|
+
|
81
|
+
entries.each do |e|
|
82
|
+
puts e.to_yaml
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
data/lib/swiss_parser.rb
CHANGED
@@ -17,11 +17,9 @@ You should have received a copy of the GNU General Public License
|
|
17
17
|
along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
|
18
18
|
=end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
20
|
module Swiss
|
23
21
|
|
24
|
-
VERSION = "0.
|
22
|
+
VERSION = "0.6.0"
|
25
23
|
|
26
24
|
# This class defines parsing rules. Its methods
|
27
25
|
# are accessible within the +rules+ section of
|
@@ -59,6 +57,47 @@ module Swiss
|
|
59
57
|
|
60
58
|
end
|
61
59
|
|
60
|
+
# Methods of this class are accessible to rules and actions.
|
61
|
+
# Methods defined in +helpers+ block are added to this class.
|
62
|
+
class ParsingContext
|
63
|
+
|
64
|
+
def initialize(parameters)
|
65
|
+
@params = parameters
|
66
|
+
end
|
67
|
+
|
68
|
+
# Retrieves a parsing parameter by key. Returns nil if
|
69
|
+
# there is no parameter with the provided key.
|
70
|
+
def param( key )
|
71
|
+
@params[key]
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
module InstanceExecHelper #:nodoc:
|
76
|
+
end
|
77
|
+
|
78
|
+
include InstanceExecHelper
|
79
|
+
|
80
|
+
#Used to execute rules and action using the ParsingContext as context
|
81
|
+
#Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
|
82
|
+
def instance_exec(*args, &block)
|
83
|
+
begin
|
84
|
+
old_critical, Thread.critical = Thread.critical, true
|
85
|
+
n = 0
|
86
|
+
n += 1 while respond_to?(mname="__instance_exec#{n}")
|
87
|
+
InstanceExecHelper.module_eval{ define_method(mname, &block) }
|
88
|
+
ensure
|
89
|
+
Thread.critical = old_critical
|
90
|
+
end
|
91
|
+
begin
|
92
|
+
ret = send(mname, *args)
|
93
|
+
ensure
|
94
|
+
InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
|
95
|
+
end
|
96
|
+
ret
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
62
101
|
|
63
102
|
# Parser for a typical bioinformatic flat file.
|
64
103
|
class Parser
|
@@ -73,8 +112,9 @@ module Swiss
|
|
73
112
|
@separator = DEFAULT_SEPARATOR
|
74
113
|
@actions = {}
|
75
114
|
@actions[:text] = {}
|
76
|
-
|
77
|
-
|
115
|
+
@helpers = lambda {}
|
116
|
+
elsif args.size == 7
|
117
|
+
actions,separator,before,the_begin,the_end,after,helpers = *args
|
78
118
|
@actions = actions.clone
|
79
119
|
@actions[:text] = actions[:text].clone
|
80
120
|
@separator = separator
|
@@ -82,47 +122,52 @@ module Swiss
|
|
82
122
|
@end = the_end
|
83
123
|
@begin = the_begin
|
84
124
|
@after = after
|
125
|
+
@helpers = helpers
|
85
126
|
else
|
86
|
-
raise "Wrong arg number, either 0 or
|
127
|
+
raise "Wrong arg number, either 0 or 7."
|
87
128
|
end
|
129
|
+
@ctx = nil
|
88
130
|
end
|
89
131
|
|
90
132
|
# Defines how to create the _entry_ _object_. The +proc+
|
91
|
-
#
|
92
|
-
#
|
133
|
+
# does not take arguments, but it must return a new
|
134
|
+
# _entry_ _object_.
|
93
135
|
# Default:: creates an empty hash.
|
94
136
|
def new_entry(&proc)
|
95
137
|
@begin = proc
|
96
138
|
end
|
97
139
|
|
98
140
|
# Defines how to finalize an _entry_ _object_. The +proc+
|
99
|
-
# takes
|
141
|
+
# takes two arguments:
|
100
142
|
# * The entry object ready to be finalized
|
101
143
|
# * The context object
|
102
|
-
# * An hash containing parsing options.
|
103
144
|
# Default:: Adds the entry object to the context object using +<<+ method.
|
104
145
|
def finish_entry(&proc)
|
105
146
|
@end = proc
|
106
147
|
end
|
107
148
|
|
108
149
|
# Defines how to set the context before using the parser.
|
109
|
-
# The +proc+
|
110
|
-
# parsing options. It must return a _context_ object.
|
150
|
+
# The +proc+ does not take arguments. It must return a _context_ object.
|
111
151
|
# Default:: creates an empty array
|
112
152
|
def before (&proc)
|
113
153
|
@before = proc
|
114
154
|
end
|
115
155
|
|
116
156
|
# Defines how to finalize the whole parsing.
|
117
|
-
# The +proc+ takes
|
157
|
+
# The +proc+ takes a single argument:
|
118
158
|
# * The context object
|
119
|
-
# * An hash containing parsing options.
|
120
159
|
# The value returned by the +proc+ is then returned by the parsing method.
|
121
160
|
# Default:: just returns the context object.
|
122
161
|
def after(&proc)
|
123
162
|
@after = proc
|
124
163
|
end
|
125
164
|
|
165
|
+
# Helpers methods accessible to rules and actions can be
|
166
|
+
# defined using this method.
|
167
|
+
def helpers(&proc)
|
168
|
+
@helpers = proc
|
169
|
+
end
|
170
|
+
|
126
171
|
# Defines parsing rules inside a parser definition. The ParsingRules
|
127
172
|
# methods can then be called inside the proc.
|
128
173
|
def rules(&proc)
|
@@ -149,7 +194,7 @@ module Swiss
|
|
149
194
|
# After extension, the new parser is independent of the original one,
|
150
195
|
# i.e. a change to the original parser will not affect the derived one.
|
151
196
|
def extend(&proc)
|
152
|
-
clone = Parser.new( @actions, @separator, @before, @begin, @end, @after )
|
197
|
+
clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
|
153
198
|
clone.instance_eval( &proc )
|
154
199
|
clone
|
155
200
|
end
|
@@ -165,28 +210,30 @@ module Swiss
|
|
165
210
|
# It returns the value specified in the +after+ block. By default,
|
166
211
|
# it returns an array containing _entry_ objects.
|
167
212
|
def parse_file( filename, params={} )
|
168
|
-
|
213
|
+
@ctx = ParsingContext.new( params )
|
214
|
+
@ctx.instance_exec( &@helpers )
|
215
|
+
container = @ctx.instance_exec( &@before )
|
169
216
|
File.open( filename, 'r' ) do |file|
|
170
|
-
entry = @
|
217
|
+
entry = @ctx.instance_exec( &@begin )
|
171
218
|
file.each_line do |line|
|
172
219
|
state = parse_line( line, entry )
|
173
220
|
if state == :end
|
174
|
-
@
|
175
|
-
entry = @
|
221
|
+
@ctx.instance_exec( entry, container, &@end )
|
222
|
+
entry = @ctx.instance_exec( &@begin )
|
176
223
|
end
|
177
224
|
end
|
178
225
|
end
|
179
|
-
@
|
226
|
+
@ctx.instance_exec( container, &@after )
|
180
227
|
end
|
181
228
|
|
182
229
|
private
|
183
230
|
|
184
231
|
PROTOTYPE = Parser.new
|
185
232
|
PROTOTYPE.instance_eval do
|
186
|
-
before {
|
187
|
-
new_entry {
|
188
|
-
finish_entry {|e,c
|
189
|
-
after {|c
|
233
|
+
before { || [] }
|
234
|
+
new_entry { || {} }
|
235
|
+
finish_entry {|e,c| c << e }
|
236
|
+
after {|c| c }
|
190
237
|
end
|
191
238
|
|
192
239
|
|
@@ -198,12 +245,12 @@ module Swiss
|
|
198
245
|
key,value = $1,$2
|
199
246
|
@last_key = key
|
200
247
|
if @actions[key]
|
201
|
-
@
|
248
|
+
@ctx.instance_exec( value, holder, &@actions[key] )
|
202
249
|
end
|
203
250
|
:parsing
|
204
251
|
else
|
205
252
|
if @actions[:text][@last_key]
|
206
|
-
@actions[:text][@last_key]
|
253
|
+
@ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
|
207
254
|
end
|
208
255
|
:parsing
|
209
256
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swissparser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- paradigmatic
|
@@ -42,6 +42,7 @@ files:
|
|
42
42
|
- examples/kegg_demo.rb
|
43
43
|
- examples/signal_demo.rb
|
44
44
|
- examples/uniprot_demo.rb
|
45
|
+
- examples/uniprot_param_demo.rb
|
45
46
|
- lib/swiss_parser.rb
|
46
47
|
has_rdoc: true
|
47
48
|
homepage: http://github.com/paradigmatic/SwissParser
|