swissparser 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,11 @@
1
+ == 0.6.0 / 2009-11-13
2
+
3
+ * 2 new features
4
+ - Parsing parameters are now accessed thru the +params+ method
5
+ and are accessible to parsing rules too.
6
+ - Helpers methods are now defined in an helper block and are
7
+ accessible to actions too.
8
+
1
9
  == 0.5.1 / 2009-11-13
2
10
 
3
11
  * Added basic RDOC documentation.
@@ -30,12 +30,13 @@ end
30
30
  enzyme_parser = Swiss::Parser.define do
31
31
 
32
32
 
33
- new_entry do |params|
33
+ new_entry do
34
34
  { :genes => [] }
35
35
  end
36
-
37
- rules do
38
36
 
37
+
38
+ helpers do
39
+
39
40
  def parse_gene_ids( string, entry )
40
41
  string.split(" ").each do |item|
41
42
  if item =~ /(\d+)\(\w+\)/
@@ -43,7 +44,11 @@ enzyme_parser = Swiss::Parser.define do
43
44
  end
44
45
  end
45
46
  end
47
+ end
46
48
 
49
+
50
+ rules do
51
+
47
52
  human = "HSA"
48
53
 
49
54
  set_separator( "///" )
@@ -76,7 +81,7 @@ enzyme_parser = Swiss::Parser.define do
76
81
 
77
82
  end
78
83
 
79
- finish_entry do |entry,container,params|
84
+ finish_entry do |entry,container|
80
85
  if entry[:genes].size > 0
81
86
  e = Enzyme.new
82
87
  e.id = entry[:id]
@@ -53,11 +53,11 @@ end
53
53
 
54
54
  stat_parser = parser.extend do
55
55
 
56
- before do |params|
56
+ before do
57
57
  { :min => 1_000, :max => 0, :sum => 0, :n => 0 }
58
58
  end
59
59
 
60
- finish_entry do |entry,h,params|
60
+ finish_entry do |entry,h|
61
61
  if entry.size < h[:min]
62
62
  h[:min] = entry.size
63
63
  end
@@ -68,7 +68,7 @@ stat_parser = parser.extend do
68
68
  h[:n] += 1
69
69
  end
70
70
 
71
- after do |h,params|
71
+ after do |h|
72
72
  h[:average] = h[:sum].to_f / h[:n]
73
73
  h
74
74
  end
@@ -0,0 +1,85 @@
1
+ =begin
2
+ Copyright (C) 2009 Paradigmatic
3
+
4
+ This file is part of SwissParser.
5
+
6
+ SwissParser is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ SwissParser is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
+ =end
19
+
20
+ #!/usr/bin/ruby -w
21
+
22
+ require 'yaml'
23
+ require 'swiss_parser.rb'
24
+
25
+ class Protein
26
+
27
+ attr_accessor :id, :size, :species, :taxonomy, :sequence
28
+
29
+ def initialize
30
+ @taxonomy = []
31
+ @sequence = ""
32
+ end
33
+
34
+ end
35
+
36
+
37
+ uniprot_parser = Swiss::Parser.define do
38
+
39
+ new_entry do
40
+ puts param(:msg)
41
+ Protein.new
42
+ end
43
+
44
+ rules do
45
+
46
+ with("ID") do |content,protein|
47
+ content =~ /([A-Z]\w+)\D+(\d+)/
48
+ protein.id = $1
49
+ protein.size = $2.to_i
50
+ end
51
+
52
+ with("OS") do |content,protein|
53
+ content =~ /(\w+ \w+)/
54
+ protein.species = $1
55
+ end
56
+
57
+ with("OC") do |content,protein|
58
+ ary = content.gsub(".","").split("; ")
59
+ protein.taxonomy += ary
60
+ end
61
+
62
+ with_text_after("SQ") do |content,protein|
63
+ puts param(:found_seq)
64
+ seq = content.strip.gsub(" ","")
65
+ protein.sequence += seq
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+
72
+
73
+ if $0 == __FILE__
74
+
75
+ filename = ARGV.shift
76
+
77
+ entries = uniprot_parser.parse_file( filename, :msg => "Hello", :found_seq => "Youpie" )
78
+
79
+ puts entries.size
80
+
81
+ entries.each do |e|
82
+ puts e.to_yaml
83
+ end
84
+
85
+ end
data/lib/swiss_parser.rb CHANGED
@@ -17,11 +17,9 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
-
21
-
22
20
  module Swiss
23
21
 
24
- VERSION = "0.5.1"
22
+ VERSION = "0.6.0"
25
23
 
26
24
  # This class defines parsing rules. Its methods
27
25
  # are accessible within the +rules+ section of
@@ -59,6 +57,47 @@ module Swiss
59
57
 
60
58
  end
61
59
 
60
+ # Methods of this class are accessible to rules and actions.
61
+ # Methods defined in +helpers+ block are added to this class.
62
+ class ParsingContext
63
+
64
+ def initialize(parameters)
65
+ @params = parameters
66
+ end
67
+
68
+ # Retrieves a parsing parameter by key. Returns nil if
69
+ # there is no parameter with the provided key.
70
+ def param( key )
71
+ @params[key]
72
+ end
73
+
74
+
75
+ module InstanceExecHelper #:nodoc:
76
+ end
77
+
78
+ include InstanceExecHelper
79
+
80
+ #Used to execute rules and action using the ParsingContext as context
81
+ #Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
82
+ def instance_exec(*args, &block)
83
+ begin
84
+ old_critical, Thread.critical = Thread.critical, true
85
+ n = 0
86
+ n += 1 while respond_to?(mname="__instance_exec#{n}")
87
+ InstanceExecHelper.module_eval{ define_method(mname, &block) }
88
+ ensure
89
+ Thread.critical = old_critical
90
+ end
91
+ begin
92
+ ret = send(mname, *args)
93
+ ensure
94
+ InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
95
+ end
96
+ ret
97
+ end
98
+
99
+ end
100
+
62
101
 
63
102
  # Parser for a typical bioinformatic flat file.
64
103
  class Parser
@@ -73,8 +112,9 @@ module Swiss
73
112
  @separator = DEFAULT_SEPARATOR
74
113
  @actions = {}
75
114
  @actions[:text] = {}
76
- elsif args.size == 6
77
- actions,separator,before,the_begin,the_end,after = *args
115
+ @helpers = lambda {}
116
+ elsif args.size == 7
117
+ actions,separator,before,the_begin,the_end,after,helpers = *args
78
118
  @actions = actions.clone
79
119
  @actions[:text] = actions[:text].clone
80
120
  @separator = separator
@@ -82,47 +122,52 @@ module Swiss
82
122
  @end = the_end
83
123
  @begin = the_begin
84
124
  @after = after
125
+ @helpers = helpers
85
126
  else
86
- raise "Wrong arg number, either 0 or 6."
127
+ raise "Wrong arg number, either 0 or 7."
87
128
  end
129
+ @ctx = nil
88
130
  end
89
131
 
90
132
  # Defines how to create the _entry_ _object_. The +proc+
91
- # takes a single argument which is a hash containing
92
- # parsing options. It must return a new _entry_ _object_.
133
+ # does not take arguments, but it must return a new
134
+ # _entry_ _object_.
93
135
  # Default:: creates an empty hash.
94
136
  def new_entry(&proc)
95
137
  @begin = proc
96
138
  end
97
139
 
98
140
  # Defines how to finalize an _entry_ _object_. The +proc+
99
- # takes three arguments:
141
+ # takes two arguments:
100
142
  # * The entry object ready to be finalized
101
143
  # * The context object
102
- # * An hash containing parsing options.
103
144
  # Default:: Adds the entry object to the context object using +<<+ method.
104
145
  def finish_entry(&proc)
105
146
  @end = proc
106
147
  end
107
148
 
108
149
  # Defines how to set the context before using the parser.
109
- # The +proc+ takes a single argument which is a hash containing
110
- # parsing options. It must return a _context_ object.
150
+ # The +proc+ does not take arguments. It must return a _context_ object.
111
151
  # Default:: creates an empty array
112
152
  def before (&proc)
113
153
  @before = proc
114
154
  end
115
155
 
116
156
  # Defines how to finalize the whole parsing.
117
- # The +proc+ takes two arguments:
157
+ # The +proc+ takes a single argument:
118
158
  # * The context object
119
- # * An hash containing parsing options.
120
159
  # The value returned by the +proc+ is then returned by the parsing method.
121
160
  # Default:: just returns the context object.
122
161
  def after(&proc)
123
162
  @after = proc
124
163
  end
125
164
 
165
+ # Helpers methods accessible to rules and actions can be
166
+ # defined using this method.
167
+ def helpers(&proc)
168
+ @helpers = proc
169
+ end
170
+
126
171
  # Defines parsing rules inside a parser definition. The ParsingRules
127
172
  # methods can then be called inside the proc.
128
173
  def rules(&proc)
@@ -149,7 +194,7 @@ module Swiss
149
194
  # After extension, the new parser is independent of the original one,
150
195
  # i.e. a change to the original parser will not affect the derived one.
151
196
  def extend(&proc)
152
- clone = Parser.new( @actions, @separator, @before, @begin, @end, @after )
197
+ clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
153
198
  clone.instance_eval( &proc )
154
199
  clone
155
200
  end
@@ -165,28 +210,30 @@ module Swiss
165
210
  # It returns the value specified in the +after+ block. By default,
166
211
  # it returns an array containing _entry_ objects.
167
212
  def parse_file( filename, params={} )
168
- context = @before.call( params )
213
+ @ctx = ParsingContext.new( params )
214
+ @ctx.instance_exec( &@helpers )
215
+ container = @ctx.instance_exec( &@before )
169
216
  File.open( filename, 'r' ) do |file|
170
- entry = @begin.call( params )
217
+ entry = @ctx.instance_exec( &@begin )
171
218
  file.each_line do |line|
172
219
  state = parse_line( line, entry )
173
220
  if state == :end
174
- @end.call( entry, context, params )
175
- entry = @begin.call( params )
221
+ @ctx.instance_exec( entry, container, &@end )
222
+ entry = @ctx.instance_exec( &@begin )
176
223
  end
177
224
  end
178
225
  end
179
- @after.call( context, params )
226
+ @ctx.instance_exec( container, &@after )
180
227
  end
181
228
 
182
229
  private
183
230
 
184
231
  PROTOTYPE = Parser.new
185
232
  PROTOTYPE.instance_eval do
186
- before { |p| [] }
187
- new_entry { |p| {} }
188
- finish_entry {|e,c,p| c << e }
189
- after {|c,p| c }
233
+ before { || [] }
234
+ new_entry { || {} }
235
+ finish_entry {|e,c| c << e }
236
+ after {|c| c }
190
237
  end
191
238
 
192
239
 
@@ -198,12 +245,12 @@ module Swiss
198
245
  key,value = $1,$2
199
246
  @last_key = key
200
247
  if @actions[key]
201
- @actions[key].call( value, holder )
248
+ @ctx.instance_exec( value, holder, &@actions[key] )
202
249
  end
203
250
  :parsing
204
251
  else
205
252
  if @actions[:text][@last_key]
206
- @actions[:text][@last_key].call( line, holder )
253
+ @ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
207
254
  end
208
255
  :parsing
209
256
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - paradigmatic
@@ -42,6 +42,7 @@ files:
42
42
  - examples/kegg_demo.rb
43
43
  - examples/signal_demo.rb
44
44
  - examples/uniprot_demo.rb
45
+ - examples/uniprot_param_demo.rb
45
46
  - lib/swiss_parser.rb
46
47
  has_rdoc: true
47
48
  homepage: http://github.com/paradigmatic/SwissParser