parsey 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,8 @@
1
1
  # parsey
2
2
 
3
- Parsey is a very simple class to match a string with a pattern and retrieve data from it.
4
- It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
5
- and then that is matched to the string given.
3
+ Parsey is a simple class to match a string with a pattern and retrieve data from it. It takes a string, a pattern, and a hash of regular expressions (as strings). The pattern is filled with the regular expressions and then that is matched to the string given.
6
4
 
7
- The pattern uses {} to surround the name of the regex it should be replaced with. You can
8
- also use <> to surround parts of the pattern that are optional, though these obviously
9
- must be nested properly.
5
+ The pattern uses {} to surround the name of the regex it should be replaced with. You can also use <> to surround parts of the pattern that are optional, though these obviously must be nested properly.
10
6
 
11
7
  ## Install
12
8
 
@@ -22,7 +18,7 @@ must be nested properly.
22
18
  #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
23
19
 
24
20
  Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
25
- #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
21
+ #=> {"file-name"=>"my file", "ext"=>"txt"}
26
22
 
27
23
  ## Copyright
28
24
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
@@ -1,6 +1,8 @@
1
- # Parsey is a very simple class to match a string with a pattern and retrieve data from it.
2
- # It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
3
- # and then that is matched to the string given.
1
+ require 'strscan'
2
+
3
+ # Parsey is a simple class to match a string with a pattern and retrieve data from it. It
4
+ # takes a string, a pattern, and a hash of regular expressions. The pattern is filled with the
5
+ # regular expressiobs and then that is matched to the string given.
4
6
  #
5
7
  # The pattern uses {} to surround the name of the regex it should be replaced with. You can
6
8
  # also use <> to surround parts of the pattern that are optional, though these obviously
@@ -16,11 +18,18 @@
16
18
  # #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
17
19
  #
18
20
  # Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
19
- # #=> {"folder"=>nil, "file-name"=>"my file", "ext"=>"txt"}
21
+ # #=> {"file-name"=>"my file", "ext"=>"txt"}
20
22
  #
21
23
  class Parsey
22
24
 
23
- attr_accessor :to_parse, :pattern, :partials, :data
25
+ class ParseError < StandardError; end
26
+
27
+ attr_accessor :to_parse, :pattern, :partials, :scanners
28
+
29
+ # Depth keeps track of how many levels the optional blocks go down, so that the scanner
30
+ # to use can be properly tracked. Each level of recursion needs a new scanner object
31
+ # to refer to or it will just clear the text that was stored.
32
+ attr_accessor :depth
24
33
 
25
34
  # Creates a new Parsey instance.
26
35
  #
@@ -36,95 +45,272 @@ class Parsey
36
45
  @pattern = pattern
37
46
  @partials = partials
38
47
 
39
- @data = {}
48
+ @scanners = []
49
+ @depth = -1
40
50
  end
41
51
 
42
- # Runs through +pattern+ and replaces each of the keywords with the
43
- # correct regex from +partials+. It then adds '()?' round any parts of
44
- # the pattern marked optional. And turns the final string into a regex.
52
+ # This is a convenience method to allow you to easily parse something
53
+ # in just one line
54
+ #
55
+ # @param [String] to_parse
56
+ # the string which is to be parsed
57
+ # @param [String] pattern
58
+ # for the string to match
59
+ # @param [Hash{String => String}] partials
60
+ # the regex patterns (as strings) to use when matching
61
+ #
62
+ # @return [Hash{String => String}]
63
+ # the data retrieved from +to_parse+
45
64
  #
46
- # @return [Regex]
47
- # the regular expression to match against when parsing
65
+ def self.parse(to_parse, pattern, partials)
66
+ a = Parsey.new(to_parse, pattern, partials)
67
+ a.parse
68
+ end
69
+
70
+ # This is a front for r_place so that a regex is returned as expected
48
71
  #
72
+ # @param [Array] pat the pattern to turn into a regular expression
73
+ # @return [Regexp] the regex that will be used for parsing
74
+ # @see r_place
49
75
  def regex
50
- m = @pattern.gsub(/\{([a-z-]+)\}/) do
51
- @partials[$1]
52
- end
76
+ Regexp.new(r_place(scan))
77
+ end
78
+
79
+ # @return [StringScanner] the current scanner to use
80
+ def scanner
81
+ @scanners[@depth]
82
+ end
83
+
84
+ # Finds matches from +to_parse+ using #regex. Then uses this data
85
+ # and the pattern created with #scan to match the data with names.
86
+ #
87
+ # @return [Hash{String => String}]
88
+ # the data taken fron +to_parse+
89
+ def parse
90
+ match = @to_parse.match(self.regex).captures
91
+ data = {}
53
92
 
54
- # replace optional '<stuff>'
55
- m.gsub!(/<(.+)>/) do
56
- "(#{$1})?"
93
+ self.scan.flatten.each_with_type_indexed do |t, c, i|
94
+ if (t == :block) && (match[i] != nil)
95
+ data[c] = match[i]
96
+ end
57
97
  end
58
98
 
59
- Regexp.new(m)
99
+ data
60
100
  end
61
101
 
62
- # Gets the order of the different tags within the pattern. It inserts nil
63
- # when it encounters an optional section so that it can easily be skipped
64
- # during parsing.
102
+
103
+ # Need to reset scanners after every full run, so this provides a front
104
+ # for r_scan, which resets +scanners+ and still returns the correct value.
65
105
  #
66
- # @return [Array]
67
- # the order in which the tags appear in the +pattern+
106
+ # @see #r_scan
107
+ # @return [ScanArray]
108
+ def scan
109
+ r = self.r_scan(@pattern)
110
+ @scanners =[]
111
+ r
112
+ end
113
+
114
+ # Creates a new StringScanner, then scans for blocks, optionals or text
115
+ # and adds the result to +parsed+ until it reaches the end of +str+.
68
116
  #
69
- def order
70
- if @pattern =~ /<(.+)>/
71
- parts = @pattern.dup.split('<')
72
- parts.insert(1, nil)
73
- parts.collect! {|i|
74
- i.split('>') unless i.nil?
75
- }.flatten!
76
-
77
- parts.collect! {|i|
78
- i.split('}') unless i.nil?
79
- }.flatten!
80
-
81
- parts.collect! {|i|
82
- i.gsub!(/[^a-zA-Z0-9_-]/, '') unless i.nil?
83
- }
84
-
85
- parts.delete_if {|i| i == ''}
86
-
87
- return parts
88
- else
89
- parts = []
90
- @pattern.gsub(/\{([a-z-]+)\}/) do
91
- parts << $1
92
- end
93
- return parts
117
+ # @param [String] str the string to scan through
118
+ # @return [ScanArray]
119
+ def r_scan(str)
120
+ parsed = ScanArray.new
121
+
122
+ @depth += 1
123
+ @scanners[@depth] = StringScanner.new(str)
124
+ until self.scanner.eos?
125
+ a = scan_blocks || a = scan_optionals || a = scan_text
126
+ parsed << a
94
127
  end
128
+ @depth -= 1
129
+
130
+ parsed
95
131
  end
96
132
 
97
- # This does the parsing of +to_parse+ using +regex+. It fills the hash
98
- # +data+ using +order+ to match the data up with the correct name.
133
+ # Finds next {...} in the StringScanner, and checks that it is closed.
99
134
  #
100
- # @return [Hash{String => String}]
101
- # the data retrieved from +to_parse+
135
+ # @return [Array]
136
+ # an array of the form [:block, ...]
137
+ def scan_blocks
138
+ return unless self.scanner.scan(/\{/)
139
+ content = scan_until(:block)
140
+
141
+ raise ParseError unless self.scanner.scan(/\}/) # no closing block
142
+ raise NoPartialError unless @partials[content]
143
+
144
+ [:block, content]
145
+ end
146
+
147
+ # Finds next <...> in the StringScanner, and checks that it is closed.
148
+ # Then scans the contents of the optional block.
102
149
  #
103
- def parse
104
- @to_parse.match( self.regex ).captures.each_with_index do |item, i|
105
- unless self.order[i].nil?
106
- @data[ self.order[i] ] = item
107
- end
108
- end
109
- @data
150
+ # @return [Array]
151
+ # an array of the form [:optional, [...]]
152
+ def scan_optionals
153
+ return unless self.scanner.scan(/</)
154
+ content = scan_until(:optional)
155
+
156
+ raise ParseError unless self.scanner.scan(/>/) # no closing block
157
+
158
+ [:optional, r_scan(content)]
110
159
  end
111
160
 
112
- # This is a convenience method to allow you to easily parse something
113
- # in just one go!
161
+ # Finds plain text, and checks whether there are any blocks left.
114
162
  #
115
- # @param [String] to_parse
116
- # the string which is to be parsed
117
- # @param [String] pattern
118
- # for the string to match
119
- # @param [Hash{String => String}] partials
120
- # the regex patterns (as strings) to use when matching
163
+ # @return [Array]
164
+ # text before next block, or rest of text in the form [:text, ...]
165
+ def scan_text
166
+ text = scan_until(:open)
167
+
168
+ if text.nil?
169
+ text = self.scanner.rest
170
+ self.scanner.clear
171
+ end
172
+
173
+ [:text, text]
174
+ end
175
+
176
+ # Scans the string until a tag is found of the type given.
121
177
  #
122
- # @return [Hash{String => String}]
123
- # the data retrieved from +to_parse+
178
+ # @param [Symbol] type of tag to look for.
179
+ # +:block+ for a closing block tag (+}+),
180
+ # +:optional+ for a closing optional tag (+>+),
181
+ # +:open+ for an opening tag (+{+ or +<+).
182
+ # @return [String, nil]
183
+ # the text before the tag, or nil if no match found
184
+ def scan_until(type)
185
+ case type
186
+ when :block
187
+ regex = /\}/
188
+ when :optional
189
+ regex = />/
190
+ when :open
191
+ regex = /(\{|<)/
192
+ end
193
+ pos = self.scanner.pos
194
+ if self.scanner.scan_until(regex)
195
+ self.scanner.pos -= self.scanner.matched.size
196
+ self.scanner.pre_match[pos..-1]
197
+ end
198
+ end
199
+
200
+ # Puts the regexps in the correct place, but returns a string so it can
201
+ # still work recursively
124
202
  #
125
- def self.parse(to_parse, pattern, partials)
126
- a = Parsey.new(to_parse, pattern, partials)
127
- a.parse
203
+ # @param [ScanArray] pat the pattern to turn into a regular expression
204
+ # @return [String] the regular expression as a string
205
+ def r_place(pat)
206
+ str = ''
207
+ pat.each_with_type do |t, c|
208
+ case t
209
+ when :block
210
+ str << @partials[c]
211
+ when :text
212
+ str << c
213
+ when :optional
214
+ str << "(#{r_place(c)})?"
215
+ end
216
+ end
217
+
218
+ str
128
219
  end
129
220
 
221
+ # ScanArray is an array of tokens created when scanning the pattern.
222
+ # It looks like this:
223
+ # [[:block, 'what-'], [:optional, [[:text, "hi-"]]], [:text, "oh"]]
224
+ #
225
+ class ScanArray < Array
226
+
227
+ # @see #flatten
228
+ def flatten!
229
+ self.replace(self.flatten)
230
+ end
231
+
232
+ # Removes all :text nodes from +pat+ and puts :optional nodes contents' into the
233
+ # main array, and puts a nil in place
234
+ #
235
+ # @return [Array]
236
+ #
237
+ # @example
238
+ #
239
+ # sa = ScanArray.new([[:text, 'hey-'],
240
+ # [:optional,
241
+ # [[:block, '([a-z]+)'],
242
+ # [:text, '-what']]
243
+ # ]])
244
+ #
245
+ # sa.flatten
246
+ # #=> [[:optional, nil], [:block, "([a-z]+)"]]
247
+ #
248
+ def flatten
249
+ # Flatten the array with Array#flatten before starting
250
+ flat = super
251
+
252
+ indexes = []
253
+ flat.each_with_index do |v, i|
254
+ if v == :optional
255
+ indexes << i
256
+ end
257
+ end
258
+
259
+ # Need to start from the back so as not to alter the indexes of the
260
+ # other items when inserting
261
+ indexes.reverse.each do |i|
262
+ flat.insert(i+1, nil)
263
+ end
264
+
265
+ flat.reverse!
266
+ r = ScanArray.new
267
+ while flat.size > 0
268
+ r << [flat.pop, flat.pop]
269
+ end
270
+
271
+ r.delete_if {|i| i[0] == :text}
272
+ r
273
+ end
274
+
275
+ # Loops through the types and contents of each tag separately, passing them
276
+ # to the block given.
277
+ #
278
+ # @return [StringScanner] returns self
279
+ # @yield [Symbol, Object] gives the type and content of each block in turn
280
+ #
281
+ # @example
282
+ #
283
+ # sa = ScanArray.new([[:text, 'hey-'],
284
+ # [:optional,
285
+ # [[:block, '([a-z]+)'],
286
+ # [:text, '-what']]
287
+ # ]])
288
+ #
289
+ # sa.each_with_type do |type, content|
290
+ # puts "#{type} -> #{content}"
291
+ # end
292
+ # #=> text -> hey-
293
+ # #=> optional -> [[:block, "([a-z]+)"], [:text, "-what"]]
294
+ #
295
+ def each_with_type(&blck)
296
+ ts = self.collect {|i| i[0]}
297
+ cs = self.collect {|i| i[1]}
298
+ (0...ts.size).each do |i|
299
+ yield(ts[i], cs[i])
300
+ end
301
+ self
302
+ end
303
+
304
+ # @see #each_with_type
305
+ # @yield [Symbol, Object Integer] gives the type, content and index of each block in turn
306
+ def each_with_type_indexed(&blck)
307
+ ts = self.collect {|i| i[0]}
308
+ cs = self.collect {|i| i[1]}
309
+ (0...ts.size).each do |i|
310
+ yield(ts[i], cs[i], i)
311
+ end
312
+ self
313
+ end
314
+
315
+ end
130
316
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{parsey}
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Joshua Hawxwell"]
12
- s.date = %q{2010-07-16}
12
+ s.date = %q{2010-07-22}
13
13
  s.description = %q{Parsey matches a string with a pattern to retrieve data from it.}
14
14
  s.email = %q{m@hawx.me}
15
15
  s.extra_rdoc_files = [
@@ -14,16 +14,18 @@ class TestParsey < Test::Unit::TestCase
14
14
  assert_equal Regexp.new("(f)?\/n.e"), t.regex
15
15
  end
16
16
 
17
- should "create correct order" do
17
+ should "scan correctly" do
18
18
  partials = {'folder' => 'f', 'name' => 'n', 'ext' => 'e'}
19
19
  t = Parsey.new('', '<{folder}/>{name}.{ext}', partials)
20
- assert_equal [nil, 'folder', 'name', 'ext'], t.order
20
+ r = [[ :optional, [[:block, "folder"], [:text, "/"]] ], [:block, "name"], [:text, "."], [:block, "ext"]]
21
+ assert_equal r, t.scan
21
22
  end
22
23
 
23
24
  should "create correct order when optional is in the middle" do
24
25
  partials = {'folder' => 'folder', 'name' => 'name', 'ext' => 'ext'}
25
26
  t = Parsey.new('', '{folder}/<{name}>.{ext}', partials)
26
- assert_equal ['folder', nil, 'name', 'ext'], t.order
27
+ r = [[:block, "folder"], [:text, "/"], [:optional, [[:block, "name"]]], [:text, "."], [:block, "ext"]]
28
+ assert_equal r, t.scan
27
29
  end
28
30
 
29
31
  should "parse properly" do
@@ -33,4 +35,55 @@ class TestParsey < Test::Unit::TestCase
33
35
  assert_equal hash, t.parse
34
36
  end
35
37
 
38
+ should "parse long patterns properly" do
39
+ partials = {'word' => '([a-z]+)',
40
+ 'number' => '([0-9]+)',
41
+ 'date' => '(\d{4}-\d{2}-\d{2})',
42
+ 'time' => '(\d{2}:\d{2})',
43
+ 'person' => '(John|Dave|Luke|Josh)'}
44
+
45
+ pattern = 'Hello my name is {person}, I was born on {date} at {time}. I am {number} years old, and my favourite animal is a {word}.'
46
+ string = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
47
+
48
+ hash = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
49
+ assert_equal hash, Parsey.parse(string, pattern, partials)
50
+ end
51
+
52
+ should "parse multiple optionals correctly" do
53
+ partials = {'word' => '([a-z]+)',
54
+ 'number' => '([0-9]+)',
55
+ 'date' => '(\d{4}-\d{2}-\d{2})',
56
+ 'time' => '(\d{2}:\d{2})',
57
+ 'person' => '(John|Dave|Luke|Josh)'}
58
+ pattern = 'Hello my name is {person}, I was born on {date}< at {time}>. I am {number} years old<, and my favourite animal is a {word}>.'
59
+ string1 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
60
+ hash1 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
61
+
62
+ string2 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old.'
63
+ hash2 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17'}
64
+
65
+ string3 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old, and my favourite animal is a shark.'
66
+ hash3 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17', 'word' => 'shark'}
67
+
68
+ string4 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old.'
69
+ hash4 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17'}
70
+
71
+ assert_equal hash1, Parsey.parse(string1, pattern, partials)
72
+ assert_equal hash2, Parsey.parse(string2, pattern, partials)
73
+ assert_equal hash3, Parsey.parse(string3, pattern, partials)
74
+ assert_equal hash4, Parsey.parse(string4, pattern, partials)
75
+ end
76
+
77
+ should "raise an error when blocks not closed" do
78
+ assert_raise Parsey::ParseError do
79
+ Parsey.parse('what', '{question', {'question' => '([a-z ]+\?)'})
80
+ end
81
+ end
82
+
83
+ should "raise an error when optional not closed" do
84
+ assert_raise Parsey::ParseError do
85
+ Parsey.parse('hmm', '<{sound}', {'sound' => '(hmm|boo)'})
86
+ end
87
+ end
88
+
36
89
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: parsey
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 3
10
- version: 0.1.3
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Joshua Hawxwell
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-07-16 00:00:00 +01:00
18
+ date: 2010-07-22 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency