parsey 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,12 +1,8 @@
1
1
  # parsey
2
2
 
3
- Parsey is a very simple class to match a string with a pattern and retrieve data from it.
4
- It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
5
- and then that is matched to the string given.
3
+ Parsey is a simple class to match a string with a pattern and retrieve data from it. It takes a string, a pattern, and a hash of regular expressions (as strings). The pattern is filled with the regular expressions and then that is matched to the string given.
6
4
 
7
- The pattern uses {} to surround the name of the regex it should be replaced with. You can
8
- also use <> to surround parts of the pattern that are optional, though these obviously
9
- must be nested properly.
5
+ The pattern uses {} to surround the name of the regex it should be replaced with. You can also use <> to surround parts of the pattern that are optional, though these obviously must be nested properly.
10
6
 
11
7
  ## Install
12
8
 
@@ -22,7 +18,7 @@ must be nested properly.
22
18
  #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
23
19
 
24
20
  Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
25
- #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
21
+ #=> {"file-name"=>"my file", "ext"=>"txt"}
26
22
 
27
23
  ## Copyright
28
24
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
@@ -1,6 +1,8 @@
1
- # Parsey is a very simple class to match a string with a pattern and retrieve data from it.
2
- # It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
3
- # and then that is matched to the string given.
1
+ require 'strscan'
2
+
3
+ # Parsey is a simple class to match a string with a pattern and retrieve data from it. It
4
+ # takes a string, a pattern, and a hash of regular expressions. The pattern is filled with the
5
+ # regular expressiobs and then that is matched to the string given.
4
6
  #
5
7
  # The pattern uses {} to surround the name of the regex it should be replaced with. You can
6
8
  # also use <> to surround parts of the pattern that are optional, though these obviously
@@ -16,11 +18,18 @@
16
18
  # #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
17
19
  #
18
20
  # Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
19
- # #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
21
+ # #=> {"file-name"=>"my file", "ext"=>"txt"}
20
22
  #
21
23
  class Parsey
22
24
 
23
- attr_accessor :to_parse, :pattern, :partials, :data
25
+ class ParseError < StandardError; end
26
+
27
+ attr_accessor :to_parse, :pattern, :partials, :scanners
28
+
29
+ # Depth keeps track of how many levels the optional blocks go down, so that the scanner
30
+ # to use can be properly tracked. Each level of recursion needs a new scanner object
31
+ # to refer to or it will just clear the text that was stored.
32
+ attr_accessor :depth
24
33
 
25
34
  # Creates a new Parsey instance.
26
35
  #
@@ -36,95 +45,272 @@ class Parsey
36
45
  @pattern = pattern
37
46
  @partials = partials
38
47
 
39
- @data = {}
48
+ @scanners = []
49
+ @depth = -1
40
50
  end
41
51
 
42
- # Runs through +pattern+ and replaces each of the keywords with the
43
- # correct regex from +partials+. It then adds '()?' round any parts of
44
- # the pattern marked optional. And turns the final string into a regex.
52
+ # This is a convenience method to allow you to easily parse something
53
+ # in just one line
54
+ #
55
+ # @param [String] to_parse
56
+ # the string which is to be parsed
57
+ # @param [String] pattern
58
+ # for the string to match
59
+ # @param [Hash{String => String}] partials
60
+ # the regex patterns (as strings) to use when matching
61
+ #
62
+ # @return [Hash{String => String}]
63
+ # the data retrieved from +to_parse+
45
64
  #
46
- # @return [Regex]
47
- # the regular expression to match against when parsing
65
+ def self.parse(to_parse, pattern, partials)
66
+ a = Parsey.new(to_parse, pattern, partials)
67
+ a.parse
68
+ end
69
+
70
+ # This is a front for r_place so that a regex is returned as expected
48
71
  #
72
+ # @param [Array] pat the pattern to turn into a regular expression
73
+ # @return [Regexp] the regex that will be used for parsing
74
+ # @see r_place
49
75
  def regex
50
- m = @pattern.gsub(/\{([a-z-]+)\}/) do
51
- @partials[$1]
52
- end
76
+ Regexp.new(r_place(scan))
77
+ end
78
+
79
+ # @return [StringScanner] the current scanner to use
80
+ def scanner
81
+ @scanners[@depth]
82
+ end
83
+
84
+ # Finds matches from +to_parse+ using #regex. Then uses this data
85
+ # and the pattern created with #scan to match the data with names.
86
+ #
87
+ # @return [Hash{String => String}]
88
+ # the data taken fron +to_parse+
89
+ def parse
90
+ match = @to_parse.match(self.regex).captures
91
+ data = {}
53
92
 
54
- # replace optional '<stuff>'
55
- m.gsub!(/<(.+)>/) do
56
- "(#{$1})?"
93
+ self.scan.flatten.each_with_type_indexed do |t, c, i|
94
+ if (t == :block) && (match[i] != nil)
95
+ data[c] = match[i]
96
+ end
57
97
  end
58
98
 
59
- Regexp.new(m)
99
+ data
60
100
  end
61
101
 
62
- # Gets the order of the different tags within the pattern. It inserts nil
63
- # when it encounters an optional section so that it can easily be skipped
64
- # during parsing.
102
+
103
+ # Need to reset scanners after every full run, so this provides a front
104
+ # for r_scan, which resets +scanners+ and still returns the correct value.
65
105
  #
66
- # @return [Array]
67
- # the order in which the tags appear in the +pattern+
106
+ # @see #r_scan
107
+ # @return [ScanArray]
108
+ def scan
109
+ r = self.r_scan(@pattern)
110
+ @scanners =[]
111
+ r
112
+ end
113
+
114
+ # Creates a new StringScanner, then scans for blocks, optionals or text
115
+ # and adds the result to +parsed+ until it reaches the end of +str+.
68
116
  #
69
- def order
70
- if @pattern =~ /<(.+)>/
71
- parts = @pattern.dup.split('<')
72
- parts.insert(1, nil)
73
- parts.collect! {|i|
74
- i.split('>') unless i.nil?
75
- }.flatten!
76
-
77
- parts.collect! {|i|
78
- i.split('}') unless i.nil?
79
- }.flatten!
80
-
81
- parts.collect! {|i|
82
- i.gsub!(/[^a-zA-Z0-9_-]/, '') unless i.nil?
83
- }
84
-
85
- parts.delete_if {|i| i == ''}
86
-
87
- return parts
88
- else
89
- parts = []
90
- @pattern.gsub(/\{([a-z-]+)\}/) do
91
- parts << $1
92
- end
93
- return parts
117
+ # @param [String] str the string to scan through
118
+ # @return [ScanArray]
119
+ def r_scan(str)
120
+ parsed = ScanArray.new
121
+
122
+ @depth += 1
123
+ @scanners[@depth] = StringScanner.new(str)
124
+ until self.scanner.eos?
125
+ a = scan_blocks || a = scan_optionals || a = scan_text
126
+ parsed << a
94
127
  end
128
+ @depth -= 1
129
+
130
+ parsed
95
131
  end
96
132
 
97
- # This does the parsing of +to_parse+ using +regex+. It fills the hash
98
- # +data+ using +order+ to match the data up with the correct name.
133
+ # Finds next {...} in the StringScanner, and checks that it is closed.
99
134
  #
100
- # @return [Hash{String => String}]
101
- # the data retrieved from +to_parse+
135
+ # @return [Array]
136
+ # an array of the form [:block, ...]
137
+ def scan_blocks
138
+ return unless self.scanner.scan(/\{/)
139
+ content = scan_until(:block)
140
+
141
+ raise ParseError unless self.scanner.scan(/\}/) # no closing block
142
+ raise NoPartialError unless @partials[content]
143
+
144
+ [:block, content]
145
+ end
146
+
147
+ # Finds next <...> in the StringScanner, and checks that it is closed.
148
+ # Then scans the contents of the optional block.
102
149
  #
103
- def parse
104
- @to_parse.match( self.regex ).captures.each_with_index do |item, i|
105
- unless self.order[i].nil?
106
- @data[ self.order[i] ] = item
107
- end
108
- end
109
- @data
150
+ # @return [Array]
151
+ # an array of the form [:optional, [...]]
152
+ def scan_optionals
153
+ return unless self.scanner.scan(/</)
154
+ content = scan_until(:optional)
155
+
156
+ raise ParseError unless self.scanner.scan(/>/) # no closing block
157
+
158
+ [:optional, r_scan(content)]
110
159
  end
111
160
 
112
- # This is a convenience method to allow you to easily parse something
113
- # in just one go!
161
+ # Finds plain text, and checks whether there are any blocks left.
114
162
  #
115
- # @param [String] to_parse
116
- # the string which is to be parsed
117
- # @param [String] pattern
118
- # for the string to match
119
- # @param [Hash{String => String}] partials
120
- # the regex patterns (as strings) to use when matching
163
+ # @return [Array]
164
+ # text before next block, or rest of text in the form [:text, ...]
165
+ def scan_text
166
+ text = scan_until(:open)
167
+
168
+ if text.nil?
169
+ text = self.scanner.rest
170
+ self.scanner.clear
171
+ end
172
+
173
+ [:text, text]
174
+ end
175
+
176
+ # Scans the string until a tag is found of the type given.
121
177
  #
122
- # @return [Hash{String => String}]
123
- # the data retrieved from +to_parse+
178
+ # @param [Symbol] type of tag to look for.
179
+ # +:block+ for a closing block tag (+}+),
180
+ # +:optional+ for a closing optional tag (+>+),
181
+ # +:open+ for an opening tag (+{+ or +<+).
182
+ # @return [String, nil]
183
+ # the text before the tag, or nil if no match found
184
+ def scan_until(type)
185
+ case type
186
+ when :block
187
+ regex = /\}/
188
+ when :optional
189
+ regex = />/
190
+ when :open
191
+ regex = /(\{|<)/
192
+ end
193
+ pos = self.scanner.pos
194
+ if self.scanner.scan_until(regex)
195
+ self.scanner.pos -= self.scanner.matched.size
196
+ self.scanner.pre_match[pos..-1]
197
+ end
198
+ end
199
+
200
+ # Puts the regexps in the correct place, but returns a string so it can
201
+ # still work recursively
124
202
  #
125
- def self.parse(to_parse, pattern, partials)
126
- a = Parsey.new(to_parse, pattern, partials)
127
- a.parse
203
+ # @param [ScanArray] pat the pattern to turn into a regular expression
204
+ # @return [String] the regular expression as a string
205
+ def r_place(pat)
206
+ str = ''
207
+ pat.each_with_type do |t, c|
208
+ case t
209
+ when :block
210
+ str << @partials[c]
211
+ when :text
212
+ str << c
213
+ when :optional
214
+ str << "(#{r_place(c)})?"
215
+ end
216
+ end
217
+
218
+ str
128
219
  end
129
220
 
221
+ # ScanArray is an array of tokens created when scanning the pattern.
222
+ # It looks like this:
223
+ # [[:block, 'what-'], [:optional, [[:text, "hi-"]]], [:text, "oh"]]
224
+ #
225
+ class ScanArray < Array
226
+
227
+ # @see #flatten
228
+ def flatten!
229
+ self.replace(self.flatten)
230
+ end
231
+
232
+ # Removes all :text nodes from +pat+ and puts :optional nodes contents' into the
233
+ # main array, and puts a nil in place
234
+ #
235
+ # @return [Array]
236
+ #
237
+ # @example
238
+ #
239
+ # sa = ScanArray.new([[:text, 'hey-'],
240
+ # [:optional,
241
+ # [[:block, '([a-z]+)'],
242
+ # [:text, '-what']]
243
+ # ]])
244
+ #
245
+ # sa.flatten
246
+ # #=> [[:optional, nil], [:block, "([a-z]+)"]]
247
+ #
248
+ def flatten
249
+ # Flatten the array with Array#flatten before starting
250
+ flat = super
251
+
252
+ indexes = []
253
+ flat.each_with_index do |v, i|
254
+ if v == :optional
255
+ indexes << i
256
+ end
257
+ end
258
+
259
+ # Need to start from the back so as not to alter the indexes of the
260
+ # other items when inserting
261
+ indexes.reverse.each do |i|
262
+ flat.insert(i+1, nil)
263
+ end
264
+
265
+ flat.reverse!
266
+ r = ScanArray.new
267
+ while flat.size > 0
268
+ r << [flat.pop, flat.pop]
269
+ end
270
+
271
+ r.delete_if {|i| i[0] == :text}
272
+ r
273
+ end
274
+
275
+ # Loops through the types and contents of each tag separately, passing them
276
+ # to the block given.
277
+ #
278
+ # @return [StringScanner] returns self
279
+ # @yield [Symbol, Object] gives the type and content of each block in turn
280
+ #
281
+ # @example
282
+ #
283
+ # sa = ScanArray.new([[:text, 'hey-'],
284
+ # [:optional,
285
+ # [[:block, '([a-z]+)'],
286
+ # [:text, '-what']]
287
+ # ]])
288
+ #
289
+ # sa.each_with_type do |type, content|
290
+ # puts "#{type} -> #{content}"
291
+ # end
292
+ # #=> text -> hey-
293
+ # #=> optional -> [[:block, "([a-z]+)"], [:text, "-what"]]
294
+ #
295
+ def each_with_type(&blck)
296
+ ts = self.collect {|i| i[0]}
297
+ cs = self.collect {|i| i[1]}
298
+ (0...ts.size).each do |i|
299
+ yield(ts[i], cs[i])
300
+ end
301
+ self
302
+ end
303
+
304
+ # @see #each_with_type
305
+ # @yield [Symbol, Object Integer] gives the type, content and index of each block in turn
306
+ def each_with_type_indexed(&blck)
307
+ ts = self.collect {|i| i[0]}
308
+ cs = self.collect {|i| i[1]}
309
+ (0...ts.size).each do |i|
310
+ yield(ts[i], cs[i], i)
311
+ end
312
+ self
313
+ end
314
+
315
+ end
130
316
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{parsey}
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Joshua Hawxwell"]
12
- s.date = %q{2010-07-16}
12
+ s.date = %q{2010-07-22}
13
13
  s.description = %q{Parsey matches a string with a pattern to retrieve data from it.}
14
14
  s.email = %q{m@hawx.me}
15
15
  s.extra_rdoc_files = [
@@ -14,16 +14,18 @@ class TestParsey < Test::Unit::TestCase
14
14
  assert_equal Regexp.new("(f)?\/n.e"), t.regex
15
15
  end
16
16
 
17
- should "create correct order" do
17
+ should "scan correctly" do
18
18
  partials = {'folder' => 'f', 'name' => 'n', 'ext' => 'e'}
19
19
  t = Parsey.new('', '<{folder}/>{name}.{ext}', partials)
20
- assert_equal [nil, 'folder', 'name', 'ext'], t.order
20
+ r = [[ :optional, [[:block, "folder"], [:text, "/"]] ], [:block, "name"], [:text, "."], [:block, "ext"]]
21
+ assert_equal r, t.scan
21
22
  end
22
23
 
23
24
  should "create correct order when optional is in the middle" do
24
25
  partials = {'folder' => 'folder', 'name' => 'name', 'ext' => 'ext'}
25
26
  t = Parsey.new('', '{folder}/<{name}>.{ext}', partials)
26
- assert_equal ['folder', nil, 'name', 'ext'], t.order
27
+ r = [[:block, "folder"], [:text, "/"], [:optional, [[:block, "name"]]], [:text, "."], [:block, "ext"]]
28
+ assert_equal r, t.scan
27
29
  end
28
30
 
29
31
  should "parse properly" do
@@ -33,4 +35,55 @@ class TestParsey < Test::Unit::TestCase
33
35
  assert_equal hash, t.parse
34
36
  end
35
37
 
38
+ should "parse long patterns properly" do
39
+ partials = {'word' => '([a-z]+)',
40
+ 'number' => '([0-9]+)',
41
+ 'date' => '(\d{4}-\d{2}-\d{2})',
42
+ 'time' => '(\d{2}:\d{2})',
43
+ 'person' => '(John|Dave|Luke|Josh)'}
44
+
45
+ pattern = 'Hello my name is {person}, I was born on {date} at {time}. I am {number} years old, and my favourite animal is a {word}.'
46
+ string = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
47
+
48
+ hash = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
49
+ assert_equal hash, Parsey.parse(string, pattern, partials)
50
+ end
51
+
52
+ should "parse multiple optionals correctly" do
53
+ partials = {'word' => '([a-z]+)',
54
+ 'number' => '([0-9]+)',
55
+ 'date' => '(\d{4}-\d{2}-\d{2})',
56
+ 'time' => '(\d{2}:\d{2})',
57
+ 'person' => '(John|Dave|Luke|Josh)'}
58
+ pattern = 'Hello my name is {person}, I was born on {date}< at {time}>. I am {number} years old<, and my favourite animal is a {word}>.'
59
+ string1 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
60
+ hash1 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
61
+
62
+ string2 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old.'
63
+ hash2 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17'}
64
+
65
+ string3 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old, and my favourite animal is a shark.'
66
+ hash3 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17', 'word' => 'shark'}
67
+
68
+ string4 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old.'
69
+ hash4 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17'}
70
+
71
+ assert_equal hash1, Parsey.parse(string1, pattern, partials)
72
+ assert_equal hash2, Parsey.parse(string2, pattern, partials)
73
+ assert_equal hash3, Parsey.parse(string3, pattern, partials)
74
+ assert_equal hash4, Parsey.parse(string4, pattern, partials)
75
+ end
76
+
77
+ should "raise an error when blocks not closed" do
78
+ assert_raise Parsey::ParseError do
79
+ Parsey.parse('what', '{question', {'question' => '([a-z ]+\?)'})
80
+ end
81
+ end
82
+
83
+ should "raise an error when optional not closed" do
84
+ assert_raise Parsey::ParseError do
85
+ Parsey.parse('hmm', '<{sound}', {'sound' => '(hmm|boo)'})
86
+ end
87
+ end
88
+
36
89
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parsey
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 3
10
- version: 0.1.3
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Joshua Hawxwell
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-16 00:00:00 +01:00
18
+ date: 2010-07-22 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency