swissparser 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt CHANGED
@@ -1,3 +1,10 @@
1
+ == 0.7.0 / 2009-11-14
2
+
3
+ * *Important* *change*: SwissParser is now required with:
4
+ require 'swissparser'
5
+
6
+ * Examples: added the example for tutorial 1.
7
+
1
8
  == 0.6.0 / 2009-11-13
2
9
 
3
10
  * 2 new features
data/Rakefile CHANGED
@@ -5,7 +5,7 @@ rescue LoadError
5
5
  end
6
6
 
7
7
  ensure_in_path 'lib'
8
- require 'swiss_parser'
8
+ require 'swissparser'
9
9
 
10
10
  #task :default => 'test:run'
11
11
  #task 'gem:release' => 'test:run'
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
- require 'swiss_parser.rb'
20
+ require 'swissparser.rb'
21
21
  require 'yaml'
22
22
 
23
23
  class Enzyme
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
17
17
  along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
18
  =end
19
19
 
20
- require 'swiss_parser.rb'
20
+ require 'swissparser.rb'
21
21
  require 'yaml'
22
22
 
23
23
  class Protein
@@ -20,7 +20,7 @@ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
20
20
  #!/usr/bin/ruby -w
21
21
 
22
22
  require 'yaml'
23
- require 'swiss_parser.rb'
23
+ require 'swissparser.rb'
24
24
 
25
25
  class Protein
26
26
 
@@ -33,48 +33,53 @@ class Protein
33
33
 
34
34
  end
35
35
 
36
+ module Uniprot
36
37
 
37
- uniprot_parser = Swiss::Parser.define do
38
+ Parser = Swiss::Parser.define do
38
39
 
39
- new_entry do
40
- Protein.new
41
- end
42
-
43
- rules do
44
-
45
- with("ID") do |content,protein|
46
- content =~ /([A-Z]\w+)\D+(\d+)/
47
- protein.id = $1
48
- protein.size = $2.to_i
40
+ # Each entry must be stored in a Protein instance
41
+ new_entry do
42
+ Protein.new
49
43
  end
50
44
 
51
- with("OS") do |content,protein|
52
- content =~ /(\w+ \w+)/
53
- protein.species = $1
45
+ rules do
46
+
47
+ # Parse the uniprot id
48
+ with("ID") do |content,protein|
49
+ content =~ /([A-Z]\w+)\D+(\d+)/
50
+ protein.id = $1
51
+ protein.size = $2.to_i
52
+ end
53
+
54
+ # Parse the organism
55
+ with("OS") do |content,protein|
56
+ content =~ /(\w+ \w+)/
57
+ protein.species = $1
58
+ end
59
+
60
+ # Parse the complete taxonomy
61
+ with("OC") do |content,protein|
62
+ ary = content.gsub(".","").split("; ")
63
+ protein.taxonomy += ary
64
+ end
65
+
66
+ # Parse the Sequence
67
+ with_text_after("SQ") do |content,protein|
68
+ seq = content.strip.gsub(" ","")
69
+ protein.sequence += seq
70
+ end
71
+
54
72
  end
55
73
 
56
- with("OC") do |content,protein|
57
- ary = content.gsub(".","").split("; ")
58
- protein.taxonomy += ary
59
- end
60
-
61
- with_text_after("SQ") do |content,protein|
62
- seq = content.strip.gsub(" ","")
63
- protein.sequence += seq
64
- end
65
-
66
74
  end
67
-
75
+
68
76
  end
69
-
70
-
77
+
71
78
  if $0 == __FILE__
72
-
79
+
73
80
  filename = ARGV.shift
74
81
 
75
- entries = uniprot_parser.parse_file( filename )
76
-
77
- puts entries.size
82
+ entries = Uniprot::Parser.parse_file( filename )
78
83
 
79
84
  entries.each do |e|
80
85
  puts e.to_yaml
@@ -20,7 +20,7 @@ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
20
20
  #!/usr/bin/ruby -w
21
21
 
22
22
  require 'yaml'
23
- require 'swiss_parser.rb'
23
+ require 'swissparser.rb'
24
24
 
25
25
  class Protein
26
26
 
data/lib/swiss_parser.rb CHANGED
@@ -1,261 +1,13 @@
1
- =begin
2
- Copyright (C) 2009 Paradigmatic
1
+ Msg = <<HERE
3
2
 
4
- This file is part of SwissParser.
3
+ =====================================================
4
+ Since version 0.7, SwissParser is now required with:
5
5
 
6
- SwissParser is free software: you can redistribute it and/or modify
7
- it under the terms of the GNU General Public License as published by
8
- the Free Software Foundation, either version 3 of the License, or
9
- (at your option) any later version.
6
+ require 'swissparser'
10
7
 
11
- SwissParser is distributed in the hope that it will be useful,
12
- but WITHOUT ANY WARRANTY; without even the implied warranty of
13
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
- GNU General Public License for more details.
8
+ Please update your code.
9
+ =====================================================
10
+ HERE
15
11
 
16
- You should have received a copy of the GNU General Public License
17
- along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
- =end
19
12
 
20
- module Swiss
21
-
22
- VERSION = "0.6.0"
23
-
24
- # This class defines parsing rules. Its methods
25
- # are accessible within the +rules+ section of
26
- # a parser definition.
27
- class ParsingRules
28
-
29
- attr_reader :separator, :actions
30
-
31
- # *Do* *not* create directly this class but access it
32
- # through a +rules+ section in a parser definition.
33
- def initialize
34
- @actions = { :text => {} }
35
- end
36
-
37
- # Sets the entry separator line. Default: "//"
38
- def set_separator(string)
39
- @separator = string
40
- end
41
-
42
- # Defines how to parse a line starting with +key+. The +proc+
43
- # takes two arguments:
44
- # * the rest of the line
45
- # * the entry object
46
- def with( key, &proc )
47
- @actions[key] = proc
48
- end
49
-
50
- # Defines how to parse a line without key coming *after*
51
- # a specified key. The +proc+ takes two arguments:
52
- # * the rest of the line
53
- # * the entry object
54
- def with_text_after( key, &proc )
55
- @actions[:text][key] = proc
56
- end
57
-
58
- end
59
-
60
- # Methods of this class are accessible to rules and actions.
61
- # Methods defined in +helpers+ block are added to this class.
62
- class ParsingContext
63
-
64
- def initialize(parameters)
65
- @params = parameters
66
- end
67
-
68
- # Retrieves a parsing parameter by key. Returns nil if
69
- # there is no parameter with the provided key.
70
- def param( key )
71
- @params[key]
72
- end
73
-
74
-
75
- module InstanceExecHelper #:nodoc:
76
- end
77
-
78
- include InstanceExecHelper
79
-
80
- #Used to execute rules and action using the ParsingContext as context
81
- #Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
82
- def instance_exec(*args, &block)
83
- begin
84
- old_critical, Thread.critical = Thread.critical, true
85
- n = 0
86
- n += 1 while respond_to?(mname="__instance_exec#{n}")
87
- InstanceExecHelper.module_eval{ define_method(mname, &block) }
88
- ensure
89
- Thread.critical = old_critical
90
- end
91
- begin
92
- ret = send(mname, *args)
93
- ensure
94
- InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
95
- end
96
- ret
97
- end
98
-
99
- end
100
-
101
-
102
- # Parser for a typical bioinformatic flat file.
103
- class Parser
104
-
105
- #Default entry separator
106
- DEFAULT_SEPARATOR = "//"
107
-
108
- #*Do* *not* *use* this method to instatiate a parser. Use rather
109
- #the +define+ class method.
110
- def initialize(*args)
111
- if args.size == 0
112
- @separator = DEFAULT_SEPARATOR
113
- @actions = {}
114
- @actions[:text] = {}
115
- @helpers = lambda {}
116
- elsif args.size == 7
117
- actions,separator,before,the_begin,the_end,after,helpers = *args
118
- @actions = actions.clone
119
- @actions[:text] = actions[:text].clone
120
- @separator = separator
121
- @before = before
122
- @end = the_end
123
- @begin = the_begin
124
- @after = after
125
- @helpers = helpers
126
- else
127
- raise "Wrong arg number, either 0 or 7."
128
- end
129
- @ctx = nil
130
- end
131
-
132
- # Defines how to create the _entry_ _object_. The +proc+
133
- # does not take arguments, but it must return a new
134
- # _entry_ _object_.
135
- # Default:: creates an empty hash.
136
- def new_entry(&proc)
137
- @begin = proc
138
- end
139
-
140
- # Defines how to finalize an _entry_ _object_. The +proc+
141
- # takes two arguments:
142
- # * The entry object ready to be finalized
143
- # * The context object
144
- # Default:: Adds the entry object to the context object using +<<+ method.
145
- def finish_entry(&proc)
146
- @end = proc
147
- end
148
-
149
- # Defines how to set the context before using the parser.
150
- # The +proc+ does not take arguments. It must return a _context_ object.
151
- # Default:: creates an empty array
152
- def before (&proc)
153
- @before = proc
154
- end
155
-
156
- # Defines how to finalize the whole parsing.
157
- # The +proc+ takes a single argument:
158
- # * The context object
159
- # The value returned by the +proc+ is then returned by the parsing method.
160
- # Default:: just returns the context object.
161
- def after(&proc)
162
- @after = proc
163
- end
164
-
165
- # Helpers methods accessible to rules and actions can be
166
- # defined using this method.
167
- def helpers(&proc)
168
- @helpers = proc
169
- end
170
-
171
- # Defines parsing rules inside a parser definition. The ParsingRules
172
- # methods can then be called inside the proc.
173
- def rules(&proc)
174
- r = ParsingRules.new
175
- r.instance_eval(&proc)
176
- r.actions.each do |k,v|
177
- if k == :text
178
- next
179
- end
180
- @actions[k] = v
181
- r.actions[:text].each do |k,v|
182
- @actions[:text][k] = v
183
- end
184
- if r.separator
185
- @separator = r.separator
186
- end
187
- end
188
- end
189
-
190
-
191
-
192
- # Extends an existing parser by allowing to redefine rules. The
193
- # changes in the new parser simply replace the original defintions.
194
- # After extension, the new parser is independent of the original one,
195
- # i.e. a change to the original parser will not affect the derived one.
196
- def extend(&proc)
197
- clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
198
- clone.instance_eval( &proc )
199
- clone
200
- end
201
-
202
- # Defines a new parser.
203
- def self.define( &proc )
204
- PROTOTYPE.extend( &proc )
205
- end
206
-
207
- # Parses a file specified by +filename+. An optional hash
208
- # of arbitrary arguments (+params+) can be specified. It is
209
- # passed to the workflow methods blocks (+before+, +new_entry+, ...)
210
- # It returns the value specified in the +after+ block. By default,
211
- # it returns an array containing _entry_ objects.
212
- def parse_file( filename, params={} )
213
- @ctx = ParsingContext.new( params )
214
- @ctx.instance_exec( &@helpers )
215
- container = @ctx.instance_exec( &@before )
216
- File.open( filename, 'r' ) do |file|
217
- entry = @ctx.instance_exec( &@begin )
218
- file.each_line do |line|
219
- state = parse_line( line, entry )
220
- if state == :end
221
- @ctx.instance_exec( entry, container, &@end )
222
- entry = @ctx.instance_exec( &@begin )
223
- end
224
- end
225
- end
226
- @ctx.instance_exec( container, &@after )
227
- end
228
-
229
- private
230
-
231
- PROTOTYPE = Parser.new
232
- PROTOTYPE.instance_eval do
233
- before { || [] }
234
- new_entry { || {} }
235
- finish_entry {|e,c| c << e }
236
- after {|c| c }
237
- end
238
-
239
-
240
- def parse_line( line, holder )
241
- line.chomp!
242
- if line == @separator
243
- :end
244
- elsif line =~ /^(\S+)\s+(.*)$/
245
- key,value = $1,$2
246
- @last_key = key
247
- if @actions[key]
248
- @ctx.instance_exec( value, holder, &@actions[key] )
249
- end
250
- :parsing
251
- else
252
- if @actions[:text][@last_key]
253
- @ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
254
- end
255
- :parsing
256
- end
257
- end
258
-
259
- end
260
-
261
- end
13
+ fail(Msg)
@@ -0,0 +1,261 @@
1
+ =begin
2
+ Copyright (C) 2009 Paradigmatic
3
+
4
+ This file is part of SwissParser.
5
+
6
+ SwissParser is free software: you can redistribute it and/or modify
7
+ it under the terms of the GNU General Public License as published by
8
+ the Free Software Foundation, either version 3 of the License, or
9
+ (at your option) any later version.
10
+
11
+ SwissParser is distributed in the hope that it will be useful,
12
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ GNU General Public License for more details.
15
+
16
+ You should have received a copy of the GNU General Public License
17
+ along with SwissParser. If not, see <http://www.gnu.org/licenses/>.
18
+ =end
19
+
20
+ module Swiss
21
+
22
+ VERSION = "0.7.0"
23
+
24
+ # This class defines parsing rules. Its methods
25
+ # are accessible within the +rules+ section of
26
+ # a parser definition.
27
+ class ParsingRules
28
+
29
+ attr_reader :separator, :actions
30
+
31
+ # *Do* *not* create directly this class but access it
32
+ # through a +rules+ section in a parser definition.
33
+ def initialize
34
+ @actions = { :text => {} }
35
+ end
36
+
37
+ # Sets the entry separator line. Default: "//"
38
+ def set_separator(string)
39
+ @separator = string
40
+ end
41
+
42
+ # Defines how to parse a line starting with +key+. The +proc+
43
+ # takes two arguments:
44
+ # * the rest of the line
45
+ # * the entry object
46
+ def with( key, &proc )
47
+ @actions[key] = proc
48
+ end
49
+
50
+ # Defines how to parse a line without key coming *after*
51
+ # a specified key. The +proc+ takes two arguments:
52
+ # * the rest of the line
53
+ # * the entry object
54
+ def with_text_after( key, &proc )
55
+ @actions[:text][key] = proc
56
+ end
57
+
58
+ end
59
+
60
+ # Methods of this class are accessible to rules and actions.
61
+ # Methods defined in +helpers+ block are added to this class.
62
+ class ParsingContext
63
+
64
+ def initialize(parameters)
65
+ @params = parameters
66
+ end
67
+
68
+ # Retrieves a parsing parameter by key. Returns nil if
69
+ # there is no parameter with the provided key.
70
+ def param( key )
71
+ @params[key]
72
+ end
73
+
74
+
75
+ module InstanceExecHelper #:nodoc:
76
+ end
77
+
78
+ include InstanceExecHelper
79
+
80
+ #Used to execute rules and action using the ParsingContext as context
81
+ #Stolen from http://eigenclass.org/hiki/bounded+space+instance_exec
82
+ def instance_exec(*args, &block)
83
+ begin
84
+ old_critical, Thread.critical = Thread.critical, true
85
+ n = 0
86
+ n += 1 while respond_to?(mname="__instance_exec#{n}")
87
+ InstanceExecHelper.module_eval{ define_method(mname, &block) }
88
+ ensure
89
+ Thread.critical = old_critical
90
+ end
91
+ begin
92
+ ret = send(mname, *args)
93
+ ensure
94
+ InstanceExecHelper.module_eval{ remove_method(mname) } rescue nil
95
+ end
96
+ ret
97
+ end
98
+
99
+ end
100
+
101
+
102
+ # Parser for a typical bioinformatic flat file.
103
+ class Parser
104
+
105
+ #Default entry separator
106
+ DEFAULT_SEPARATOR = "//"
107
+
108
+ #*Do* *not* *use* this method to instatiate a parser. Use rather
109
+ #the +define+ class method.
110
+ def initialize(*args)
111
+ if args.size == 0
112
+ @separator = DEFAULT_SEPARATOR
113
+ @actions = {}
114
+ @actions[:text] = {}
115
+ @helpers = lambda {}
116
+ elsif args.size == 7
117
+ actions,separator,before,the_begin,the_end,after,helpers = *args
118
+ @actions = actions.clone
119
+ @actions[:text] = actions[:text].clone
120
+ @separator = separator
121
+ @before = before
122
+ @end = the_end
123
+ @begin = the_begin
124
+ @after = after
125
+ @helpers = helpers
126
+ else
127
+ raise "Wrong arg number, either 0 or 7."
128
+ end
129
+ @ctx = nil
130
+ end
131
+
132
+ # Defines how to create the _entry_ _object_. The +proc+
133
+ # does not take arguments, but it must return a new
134
+ # _entry_ _object_.
135
+ # Default:: creates an empty hash.
136
+ def new_entry(&proc)
137
+ @begin = proc
138
+ end
139
+
140
+ # Defines how to finalize an _entry_ _object_. The +proc+
141
+ # takes two arguments:
142
+ # * The entry object ready to be finalized
143
+ # * The context object
144
+ # Default:: Adds the entry object to the context object using +<<+ method.
145
+ def finish_entry(&proc)
146
+ @end = proc
147
+ end
148
+
149
+ # Defines how to set the context before using the parser.
150
+ # The +proc+ does not take arguments. It must return a _context_ object.
151
+ # Default:: creates an empty array
152
+ def before (&proc)
153
+ @before = proc
154
+ end
155
+
156
+ # Defines how to finalize the whole parsing.
157
+ # The +proc+ takes a single argument:
158
+ # * The context object
159
+ # The value returned by the +proc+ is then returned by the parsing method.
160
+ # Default:: just returns the context object.
161
+ def after(&proc)
162
+ @after = proc
163
+ end
164
+
165
+ # Helpers methods accessible to rules and actions can be
166
+ # defined using this method.
167
+ def helpers(&proc)
168
+ @helpers = proc
169
+ end
170
+
171
+ # Defines parsing rules inside a parser definition. The ParsingRules
172
+ # methods can then be called inside the proc.
173
+ def rules(&proc)
174
+ r = ParsingRules.new
175
+ r.instance_eval(&proc)
176
+ r.actions.each do |k,v|
177
+ if k == :text
178
+ next
179
+ end
180
+ @actions[k] = v
181
+ r.actions[:text].each do |k,v|
182
+ @actions[:text][k] = v
183
+ end
184
+ if r.separator
185
+ @separator = r.separator
186
+ end
187
+ end
188
+ end
189
+
190
+
191
+
192
+ # Extends an existing parser by allowing to redefine rules. The
193
+ # changes in the new parser simply replace the original defintions.
194
+ # After extension, the new parser is independent of the original one,
195
+ # i.e. a change to the original parser will not affect the derived one.
196
+ def extend(&proc)
197
+ clone = Parser.new( @actions, @separator, @before, @begin, @end, @after, @helpers )
198
+ clone.instance_eval( &proc )
199
+ clone
200
+ end
201
+
202
+ # Defines a new parser.
203
+ def self.define( &proc )
204
+ PROTOTYPE.extend( &proc )
205
+ end
206
+
207
+ # Parses a file specified by +filename+. An optional hash
208
+ # of arbitrary arguments (+params+) can be specified. It is
209
+ # passed to the workflow methods blocks (+before+, +new_entry+, ...)
210
+ # It returns the value specified in the +after+ block. By default,
211
+ # it returns an array containing _entry_ objects.
212
+ def parse_file( filename, params={} )
213
+ @ctx = ParsingContext.new( params )
214
+ @ctx.instance_exec( &@helpers )
215
+ container = @ctx.instance_exec( &@before )
216
+ File.open( filename, 'r' ) do |file|
217
+ entry = @ctx.instance_exec( &@begin )
218
+ file.each_line do |line|
219
+ state = parse_line( line, entry )
220
+ if state == :end
221
+ @ctx.instance_exec( entry, container, &@end )
222
+ entry = @ctx.instance_exec( &@begin )
223
+ end
224
+ end
225
+ end
226
+ @ctx.instance_exec( container, &@after )
227
+ end
228
+
229
+ private
230
+
231
+ PROTOTYPE = Parser.new
232
+ PROTOTYPE.instance_eval do
233
+ before { || [] }
234
+ new_entry { || {} }
235
+ finish_entry {|e,c| c << e }
236
+ after {|c| c }
237
+ end
238
+
239
+
240
+ def parse_line( line, holder )
241
+ line.chomp!
242
+ if line == @separator
243
+ :end
244
+ elsif line =~ /^(\S+)\s+(.*)$/
245
+ key,value = $1,$2
246
+ @last_key = key
247
+ if @actions[key]
248
+ @ctx.instance_exec( value, holder, &@actions[key] )
249
+ end
250
+ :parsing
251
+ else
252
+ if @actions[:text][@last_key]
253
+ @ctx.instance_exec( line, holder, &@actions[:text][@last_key] )
254
+ end
255
+ :parsing
256
+ end
257
+ end
258
+
259
+ end
260
+
261
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swissparser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - paradigmatic
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-13 00:00:00 +01:00
12
+ date: 2009-11-14 00:00:00 +01:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -41,9 +41,10 @@ files:
41
41
  - examples/data/uniprot.txt
42
42
  - examples/kegg_demo.rb
43
43
  - examples/signal_demo.rb
44
- - examples/uniprot_demo.rb
44
+ - examples/tutorial_1.rb
45
45
  - examples/uniprot_param_demo.rb
46
46
  - lib/swiss_parser.rb
47
+ - lib/swissparser.rb
47
48
  has_rdoc: true
48
49
  homepage: http://github.com/paradigmatic/SwissParser
49
50
  licenses: []