swissparser 0.6.0 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.7.0 / 2009-11-14
2
+
3
+ * *Important* *change*: SwissParser is now required with:
4
+ require 'swissparser'
5
+
6
+ * Examples: added the example for tutorial 1.
7
+
1
8
  == 0.6.0 / 2009-11-13
2
9
 
3
10
  * 2 new features
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ rescue LoadError
5
5
  end
6
6
 
7
7
  ensure_in_path 'lib'
8
- require 'swiss_parser'
8
+ require 'swissparser'
9
9
 
10
10
  #task :default => 'test:run'
11
11
  #task 'gem:release' => 'test:run'
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
- require 'swiss_parser.rb'
20
+ require 'swissparser.rb'
21
21
  require 'yaml'
22
22
 
23
23
  class Enzyme
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
- require 'swiss_parser.rb'
20
+ require 'swissparser.rb'
21
21
  require 'yaml'
22
22
 
23
23
  class Protein
@@ -20,7 +20,7 @@ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
20
20
  #!/usr/bin/ruby -w
21
21
 
22
22
  require 'yaml'
23
- require 'swiss_parser.rb'
23
+ require 'swissparser.rb'
24
24
 
25
25
  class Protein
26
26
 
@@ -33,48 +33,53 @@ class Protein
33
33
 
34
34
  end
35
35
 
36
+ module Uniprot
36
37
 
37
- uniprot_parser = Swiss::Parser.define do
38
+ Parser = Swiss::Parser.define do
38
39
 
39
- new_entry do
40
- Protein.new
41
- end
42
-
43
- rules do
44
-
45
- with("ID") do |content,protein|
46
- content =~ /([A-Z]\w+)\D+(\d+)/
47
- protein.id = $1
48
- protein.size = $2.to_i
40
+ # Each entry must be stored in a Protein instance
41
+ new_entry do
42
+ Protein.new
49
43
  end
50
44
 
51
- with("OS") do |content,protein|
52
- content =~ /(\w+ \w+)/
53
- protein.species = $1
45
+ rules do
46
+
47
+ # Parse the uniprot id
48
+ with("ID") do |content,protein|
49
+ content =~ /([A-Z]\w+)\D+(\d+)/
50
+ protein.id = $1
51
+ protein.size = $2.to_i
52
+ end
53
+
54
+ # Parse the organism
55
+ with("OS") do |content,protein|
56
+ content =~ /(\w+ \w+)/
57
+ protein.species = $1
58
+ end
59
+
60
+ # Parse the complete taxonomy
61
+ with("OC") do |content,protein|
62
+ ary = content.gsub(".","").split("; ")
63
+ protein.taxonomy += ary
64
+ end
65
+
66
+ # Parse the Sequence
67
+ with_text_after("SQ") do |content,protein|
68
+ seq = content.strip.gsub(" ","")
69
+ protein.sequence += seq
70
+ end
71
+
54
72
  end
55
73
 
56
- with("OC") do |content,protein|
57
- ary = content.gsub(".","").split("; ")
58
- protein.taxonomy += ary
59
- end
60
-
61
- with_text_after("SQ") do |content,protein|
62
- seq = content.strip.gsub(" ","")
63
- protein.sequence += seq
64
- end
65
-
66
74
  end
67
-
75
+
68
76
  end
69
-
70
-
77
+
71
78
  if $0 == __FILE__
72
-
79
+
73
80
  filename = ARGV.shift
74
81
 
75
- entries = uniprot_parser.parse_file( filename )
76
-
77
- puts entries.size
82
+ entries = Uniprot::Parser.parse_file( filename )
78
83
 
79
84
  entries.each do |e|
80
85
  puts e.to_yaml
@@ -20,7 +20,7 @@ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
20
20
  #!/usr/bin/ruby -w
21
21
 
22
22
  require 'yaml'
23
- require 'swiss_parser.rb'
23
+ require 'swissparser.rb'
24
24
 
25
25
  class Protein
26
26
 
data/lib/swiss_parser.rb CHANGED
@@ -1,261 +1,13 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
1
+ Msg = <<HERE
3
2
 
4
- This file is part of SwissParser.
3
+ =====================================================
4
+ Since version 0.7, SwissParser is now required with:
5
5
 
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
6
+ require 'swissparser'
10
7
 
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
8
+ Please update your code.
9
+ =====================================================
10
+ HERE
15
11
 
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
12
 
20
- module Swiss
21
-
22
- VERSION = "0.6.0"
23
-
24
- # This class defines parsing rules. Its methods
25
- # are accessible within the +rules+ section of
26
- # a parser definition.
27
- class ParsingRules
28
-
29
- attr_reader :separator, :actions
30
-
31
- # *Do* *not* create directly this class but access it
32
- # through a +rules+ section in a parser definition.
33
- def initialize
34
- @actions = { :text => {} }
35
- end
36
-
37
- # Sets the entry separator line. Default: "//"
38
- def set_separator(string)
39
- @separator = string
40
- end
41
-
42
- # Defines how to parse a line starting with +key+. The +proc+
43
- # takes two arguments:
44
- # * the rest of the line
45
- # * the entry object
46
- def with( key, &proc )
47
- @actions[key] = proc
48
- end
49
-
50
- # Defines how to parse a line without key coming *after*
51
- # a specified key. The +proc+ takes two arguments:
52
- # * the rest of the line
53
- # * the entry object
54
- def with_text_after( key, &proc )
55
- @actions[:text][key] = proc
56
- end
57
-
58
- end
59
-
60
- # Methods of this class are accessible to rules and actions.
61
- # Methods defined in +helpers+ block are added to this class.
62
- class ParsingContext
63
-
64
- def initialize(parameters)
65
- @params = parameters
66
- end
67
-
68
- # Retrieves a parsing parameter by key. Returns nil if
69
- # there is no parameter with the provided key.
70
- def param( key )
71
- @params[key]
72
- end
73
-
74
-
75
- module InstanceExecHelper #:nodoc:
76
- end
77
-
78
- include InstanceExecHelper
79
-
80
- #Used to execute rules and action using the ParsingContext as context
81
- #Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
82
- def instance_exec(*args, &block)
83
- begin
84
- old_critical, Thread.critical = Thread.critical, true
85
- n = 0
86
- n += 1 while respond_to?(mname="__instance_exec#{n}")
87
- InstanceExecHelper.module_eval{ define_method(mname, &block) }
88
- ensure
89
- Thread.critical = old_critical
90
- end
91
- begin
92
- ret = send(mname, *args)
93
- ensure
94
- InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
95
- end
96
- ret
97
- end
98
-
99
- end
100
-
101
-
102
- # Parser for a typical bioinformatic flat file.
103
- class Parser
104
-
105
- #Default entry separator
106
- DEFAULT_SEPARATOR = "//"
107
-
108
- #*Do* *not* *use* this method to instatiate a parser. Use rather
109
- #the +define+ class method.
110
- def initialize(*args)
111
- if args.size == 0
112
- @separator = DEFAULT_SEPARATOR
113
- @actions = {}
114
- @actions[:text] = {}
115
- @helpers = lambda {}
116
- elsif args.size == 7
117
- actions,separator,before,the_begin,the_end,after,helpers = *args
118
- @actions = actions.clone
119
- @actions[:text] = actions[:text].clone
120
- @separator = separator
121
- @before = before
122
- @end = the_end
123
- @begin = the_begin
124
- @after = after
125
- @helpers = helpers
126
- else
127
- raise "Wrong arg number, either 0 or 7."
128
- end
129
- @ctx = nil
130
- end
131
-
132
- # Defines how to create the _entry_ _object_. The +proc+
133
- # does not take arguments, but it must return a new
134
- # _entry_ _object_.
135
- # Default:: creates an empty hash.
136
- def new_entry(&proc)
137
- @begin = proc
138
- end
139
-
140
- # Defines how to finalize an _entry_ _object_. The +proc+
141
- # takes two arguments:
142
- # * The entry object ready to be finalized
143
- # * The context object
144
- # Default:: Adds the entry object to the context object using +<<+ method.
145
- def finish_entry(&proc)
146
- @end = proc
147
- end
148
-
149
- # Defines how to set the context before using the parser.
150
- # The +proc+ does not take arguments. It must return a _context_ object.
151
- # Default:: creates an empty array
152
- def before (&proc)
153
- @before = proc
154
- end
155
-
156
- # Defines how to finalize the whole parsing.
157
- # The +proc+ takes a single argument:
158
- # * The context object
159
- # The value returned by the +proc+ is then returned by the parsing method.
160
- # Default:: just returns the context object.
161
- def after(&proc)
162
- @after = proc
163
- end
164
-
165
- # Helpers methods accessible to rules and actions can be
166
- # defined using this method.
167
- def helpers(&proc)
168
- @helpers = proc
169
- end
170
-
171
- # Defines parsing rules inside a parser definition. The ParsingRules
172
- # methods can then be called inside the proc.
173
- def rules(&proc)
174
- r = ParsingRules.new
175
- r.instance_eval(&proc)
176
- r.actions.each do |k,v|
177
- if k == :text
178
- next
179
- end
180
- @actions[k] = v
181
- r.actions[:text].each do |k,v|
182
- @actions[:text][k] = v
183
- end
184
- if r.separator
185
- @separator = r.separator
186
- end
187
- end
188
- end
189
-
190
-
191
-
192
- # Extends an existing parser by allowing to redefine rules. The
193
- # changes in the new parser simply replace the original defintions.
194
- # After extension, the new parser is independent of the original one,
195
- # i.e. a change to the original parser will not affect the derived one.
196
- def extend(&proc)
197
- clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
198
- clone.instance_eval( &proc )
199
- clone
200
- end
201
-
202
- # Defines a new parser.
203
- def self.define( &proc )
204
- PROTOTYPE.extend( &proc )
205
- end
206
-
207
- # Parses a file specified by +filename+. An optional hash
208
- # of arbitrary arguments (+params+) can be specified. It is
209
- # passed to the workflow methods blocks (+before+, +new_entry+, ...)
210
- # It returns the value specified in the +after+ block. By default,
211
- # it returns an array containing _entry_ objects.
212
- def parse_file( filename, params={} )
213
- @ctx = ParsingContext.new( params )
214
- @ctx.instance_exec( &@helpers )
215
- container = @ctx.instance_exec( &@before )
216
- File.open( filename, 'r' ) do |file|
217
- entry = @ctx.instance_exec( &@begin )
218
- file.each_line do |line|
219
- state = parse_line( line, entry )
220
- if state == :end
221
- @ctx.instance_exec( entry, container, &@end )
222
- entry = @ctx.instance_exec( &@begin )
223
- end
224
- end
225
- end
226
- @ctx.instance_exec( container, &@after )
227
- end
228
-
229
- private
230
-
231
- PROTOTYPE = Parser.new
232
- PROTOTYPE.instance_eval do
233
- before { || [] }
234
- new_entry { || {} }
235
- finish_entry {|e,c| c << e }
236
- after {|c| c }
237
- end
238
-
239
-
240
- def parse_line( line, holder )
241
- line.chomp!
242
- if line == @separator
243
- :end
244
- elsif line =~ /^(\S+)\s+(.*)$/
245
- key,value = $1,$2
246
- @last_key = key
247
- if @actions[key]
248
- @ctx.instance_exec( value, holder, &@actions[key] )
249
- end
250
- :parsing
251
- else
252
- if @actions[:text][@last_key]
253
- @ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
254
- end
255
- :parsing
256
- end
257
- end
258
-
259
- end
260
-
261
- end
13
+ fail(Msg)
@@ -0,0 +1,261 @@
1
+ =begin
2
+ Copyright (C) 2009 Paradigmatic
3
+
4
+ This file is part of SwissParser.
5
+
6
+ SwissParser is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ SwissParser is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
+ =end
19
+
20
+ module Swiss
21
+
22
+ VERSION = "0.7.0"
23
+
24
+ # This class defines parsing rules. Its methods
25
+ # are accessible within the +rules+ section of
26
+ # a parser definition.
27
+ class ParsingRules
28
+
29
+ attr_reader :separator, :actions
30
+
31
+ # *Do* *not* create directly this class but access it
32
+ # through a +rules+ section in a parser definition.
33
+ def initialize
34
+ @actions = { :text => {} }
35
+ end
36
+
37
+ # Sets the entry separator line. Default: "//"
38
+ def set_separator(string)
39
+ @separator = string
40
+ end
41
+
42
+ # Defines how to parse a line starting with +key+. The +proc+
43
+ # takes two arguments:
44
+ # * the rest of the line
45
+ # * the entry object
46
+ def with( key, &proc )
47
+ @actions[key] = proc
48
+ end
49
+
50
+ # Defines how to parse a line without key coming *after*
51
+ # a specified key. The +proc+ takes two arguments:
52
+ # * the rest of the line
53
+ # * the entry object
54
+ def with_text_after( key, &proc )
55
+ @actions[:text][key] = proc
56
+ end
57
+
58
+ end
59
+
60
+ # Methods of this class are accessible to rules and actions.
61
+ # Methods defined in +helpers+ block are added to this class.
62
+ class ParsingContext
63
+
64
+ def initialize(parameters)
65
+ @params = parameters
66
+ end
67
+
68
+ # Retrieves a parsing parameter by key. Returns nil if
69
+ # there is no parameter with the provided key.
70
+ def param( key )
71
+ @params[key]
72
+ end
73
+
74
+
75
+ module InstanceExecHelper #:nodoc:
76
+ end
77
+
78
+ include InstanceExecHelper
79
+
80
+ #Used to execute rules and action using the ParsingContext as context
81
+ #Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
82
+ def instance_exec(*args, &block)
83
+ begin
84
+ old_critical, Thread.critical = Thread.critical, true
85
+ n = 0
86
+ n += 1 while respond_to?(mname="__instance_exec#{n}")
87
+ InstanceExecHelper.module_eval{ define_method(mname, &block) }
88
+ ensure
89
+ Thread.critical = old_critical
90
+ end
91
+ begin
92
+ ret = send(mname, *args)
93
+ ensure
94
+ InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
95
+ end
96
+ ret
97
+ end
98
+
99
+ end
100
+
101
+
102
+ # Parser for a typical bioinformatic flat file.
103
+ class Parser
104
+
105
+ #Default entry separator
106
+ DEFAULT_SEPARATOR = "//"
107
+
108
+ #*Do* *not* *use* this method to instatiate a parser. Use rather
109
+ #the +define+ class method.
110
+ def initialize(*args)
111
+ if args.size == 0
112
+ @separator = DEFAULT_SEPARATOR
113
+ @actions = {}
114
+ @actions[:text] = {}
115
+ @helpers = lambda {}
116
+ elsif args.size == 7
117
+ actions,separator,before,the_begin,the_end,after,helpers = *args
118
+ @actions = actions.clone
119
+ @actions[:text] = actions[:text].clone
120
+ @separator = separator
121
+ @before = before
122
+ @end = the_end
123
+ @begin = the_begin
124
+ @after = after
125
+ @helpers = helpers
126
+ else
127
+ raise "Wrong arg number, either 0 or 7."
128
+ end
129
+ @ctx = nil
130
+ end
131
+
132
+ # Defines how to create the _entry_ _object_. The +proc+
133
+ # does not take arguments, but it must return a new
134
+ # _entry_ _object_.
135
+ # Default:: creates an empty hash.
136
+ def new_entry(&proc)
137
+ @begin = proc
138
+ end
139
+
140
+ # Defines how to finalize an _entry_ _object_. The +proc+
141
+ # takes two arguments:
142
+ # * The entry object ready to be finalized
143
+ # * The context object
144
+ # Default:: Adds the entry object to the context object using +<<+ method.
145
+ def finish_entry(&proc)
146
+ @end = proc
147
+ end
148
+
149
+ # Defines how to set the context before using the parser.
150
+ # The +proc+ does not take arguments. It must return a _context_ object.
151
+ # Default:: creates an empty array
152
+ def before (&proc)
153
+ @before = proc
154
+ end
155
+
156
+ # Defines how to finalize the whole parsing.
157
+ # The +proc+ takes a single argument:
158
+ # * The context object
159
+ # The value returned by the +proc+ is then returned by the parsing method.
160
+ # Default:: just returns the context object.
161
+ def after(&proc)
162
+ @after = proc
163
+ end
164
+
165
+ # Helpers methods accessible to rules and actions can be
166
+ # defined using this method.
167
+ def helpers(&proc)
168
+ @helpers = proc
169
+ end
170
+
171
+ # Defines parsing rules inside a parser definition. The ParsingRules
172
+ # methods can then be called inside the proc.
173
+ def rules(&proc)
174
+ r = ParsingRules.new
175
+ r.instance_eval(&proc)
176
+ r.actions.each do |k,v|
177
+ if k == :text
178
+ next
179
+ end
180
+ @actions[k] = v
181
+ r.actions[:text].each do |k,v|
182
+ @actions[:text][k] = v
183
+ end
184
+ if r.separator
185
+ @separator = r.separator
186
+ end
187
+ end
188
+ end
189
+
190
+
191
+
192
+ # Extends an existing parser by allowing to redefine rules. The
193
+ # changes in the new parser simply replace the original defintions.
194
+ # After extension, the new parser is independent of the original one,
195
+ # i.e. a change to the original parser will not affect the derived one.
196
+ def extend(&proc)
197
+ clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
198
+ clone.instance_eval( &proc )
199
+ clone
200
+ end
201
+
202
+ # Defines a new parser.
203
+ def self.define( &proc )
204
+ PROTOTYPE.extend( &proc )
205
+ end
206
+
207
+ # Parses a file specified by +filename+. An optional hash
208
+ # of arbitrary arguments (+params+) can be specified. It is
209
+ # passed to the workflow methods blocks (+before+, +new_entry+, ...)
210
+ # It returns the value specified in the +after+ block. By default,
211
+ # it returns an array containing _entry_ objects.
212
+ def parse_file( filename, params={} )
213
+ @ctx = ParsingContext.new( params )
214
+ @ctx.instance_exec( &@helpers )
215
+ container = @ctx.instance_exec( &@before )
216
+ File.open( filename, 'r' ) do |file|
217
+ entry = @ctx.instance_exec( &@begin )
218
+ file.each_line do |line|
219
+ state = parse_line( line, entry )
220
+ if state == :end
221
+ @ctx.instance_exec( entry, container, &@end )
222
+ entry = @ctx.instance_exec( &@begin )
223
+ end
224
+ end
225
+ end
226
+ @ctx.instance_exec( container, &@after )
227
+ end
228
+
229
+ private
230
+
231
+ PROTOTYPE = Parser.new
232
+ PROTOTYPE.instance_eval do
233
+ before { || [] }
234
+ new_entry { || {} }
235
+ finish_entry {|e,c| c << e }
236
+ after {|c| c }
237
+ end
238
+
239
+
240
+ def parse_line( line, holder )
241
+ line.chomp!
242
+ if line == @separator
243
+ :end
244
+ elsif line =~ /^(\S+)\s+(.*)$/
245
+ key,value = $1,$2
246
+ @last_key = key
247
+ if @actions[key]
248
+ @ctx.instance_exec( value, holder, &@actions[key] )
249
+ end
250
+ :parsing
251
+ else
252
+ if @actions[:text][@last_key]
253
+ @ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
254
+ end
255
+ :parsing
256
+ end
257
+ end
258
+
259
+ end
260
+
261
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - paradigmatic
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-13 00:00:00 +01:00
12
+ date: 2009-11-14 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -41,9 +41,10 @@ files:
41
41
  - examples/data/uniprot.txt
42
42
  - examples/kegg_demo.rb
43
43
  - examples/signal_demo.rb
44
- - examples/uniprot_demo.rb
44
+ - examples/tutorial_1.rb
45
45
  - examples/uniprot_param_demo.rb
46
46
  - lib/swiss_parser.rb
47
+ - lib/swissparser.rb
47
48
  has_rdoc: true
48
49
  homepage: http://github.com/paradigmatic/SwissParser
49
50
  licenses: []