parsey 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.markdown +3 -7
- data/VERSION +1 -1
- data/lib/parsey.rb +258 -72
- data/parsey.gemspec +2 -2
- data/test/test_parsey.rb +56 -3
- metadata +5 -5
data/README.markdown
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
# parsey
|
2
2
|
|
3
|
-
Parsey is a
|
4
|
-
It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
|
5
|
-
and then that is matched to the string given.
|
3
|
+
Parsey is a simple class to match a string with a pattern and retrieve data from it. It takes a string, a pattern, and a hash of regular expressions (as strings). The pattern is filled with the regular expressions and then that is matched to the string given.
|
6
4
|
|
7
|
-
The pattern uses {} to surround the name of the regex it should be replaced with. You can
|
8
|
-
also use <> to surround parts of the pattern that are optional, though these obviously
|
9
|
-
must be nested properly.
|
5
|
+
The pattern uses {} to surround the name of the regex it should be replaced with. You can also use <> to surround parts of the pattern that are optional, though these obviously must be nested properly.
|
10
6
|
|
11
7
|
## Install
|
12
8
|
|
@@ -22,7 +18,7 @@ must be nested properly.
|
|
22
18
|
#=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
|
23
19
|
|
24
20
|
Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
|
25
|
-
#=> {"
|
21
|
+
#=> {"file-name"=>"my file", "ext"=>"txt"}
|
26
22
|
|
27
23
|
## Copyright
|
28
24
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/parsey.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
# Parsey is a simple class to match a string with a pattern and retrieve data from it. It
|
4
|
+
# takes a string, a pattern, and a hash of regular expressions. The pattern is filled with the
|
5
|
+
# regular expressiobs and then that is matched to the string given.
|
4
6
|
#
|
5
7
|
# The pattern uses {} to surround the name of the regex it should be replaced with. You can
|
6
8
|
# also use <> to surround parts of the pattern that are optional, though these obviously
|
@@ -16,11 +18,18 @@
|
|
16
18
|
# #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
|
17
19
|
#
|
18
20
|
# Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
|
19
|
-
# #=> {"
|
21
|
+
# #=> {"file-name"=>"my file", "ext"=>"txt"}
|
20
22
|
#
|
21
23
|
class Parsey
|
22
24
|
|
23
|
-
|
25
|
+
class ParseError < StandardError; end
|
26
|
+
|
27
|
+
attr_accessor :to_parse, :pattern, :partials, :scanners
|
28
|
+
|
29
|
+
# Depth keeps track of how many levels the optional blocks go down, so that the scanner
|
30
|
+
# to use can be properly tracked. Each level of recursion needs a new scanner object
|
31
|
+
# to refer to or it will just clear the text that was stored.
|
32
|
+
attr_accessor :depth
|
24
33
|
|
25
34
|
# Creates a new Parsey instance.
|
26
35
|
#
|
@@ -36,95 +45,272 @@ class Parsey
|
|
36
45
|
@pattern = pattern
|
37
46
|
@partials = partials
|
38
47
|
|
39
|
-
@
|
48
|
+
@scanners = []
|
49
|
+
@depth = -1
|
40
50
|
end
|
41
51
|
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
52
|
+
# This is a convenience method to allow you to easily parse something
|
53
|
+
# in just one line
|
54
|
+
#
|
55
|
+
# @param [String] to_parse
|
56
|
+
# the string which is to be parsed
|
57
|
+
# @param [String] pattern
|
58
|
+
# for the string to match
|
59
|
+
# @param [Hash{String => String}] partials
|
60
|
+
# the regex patterns (as strings) to use when matching
|
61
|
+
#
|
62
|
+
# @return [Hash{String => String}]
|
63
|
+
# the data retrieved from +to_parse+
|
45
64
|
#
|
46
|
-
|
47
|
-
|
65
|
+
def self.parse(to_parse, pattern, partials)
|
66
|
+
a = Parsey.new(to_parse, pattern, partials)
|
67
|
+
a.parse
|
68
|
+
end
|
69
|
+
|
70
|
+
# This is a front for r_place so that a regex is returned as expected
|
48
71
|
#
|
72
|
+
# @param [Array] pat the pattern to turn into a regular expression
|
73
|
+
# @return [Regexp] the regex that will be used for parsing
|
74
|
+
# @see r_place
|
49
75
|
def regex
|
50
|
-
|
51
|
-
|
52
|
-
|
76
|
+
Regexp.new(r_place(scan))
|
77
|
+
end
|
78
|
+
|
79
|
+
# @return [StringScanner] the current scanner to use
|
80
|
+
def scanner
|
81
|
+
@scanners[@depth]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Finds matches from +to_parse+ using #regex. Then uses this data
|
85
|
+
# and the pattern created with #scan to match the data with names.
|
86
|
+
#
|
87
|
+
# @return [Hash{String => String}]
|
88
|
+
# the data taken fron +to_parse+
|
89
|
+
def parse
|
90
|
+
match = @to_parse.match(self.regex).captures
|
91
|
+
data = {}
|
53
92
|
|
54
|
-
|
55
|
-
|
56
|
-
|
93
|
+
self.scan.flatten.each_with_type_indexed do |t, c, i|
|
94
|
+
if (t == :block) && (match[i] != nil)
|
95
|
+
data[c] = match[i]
|
96
|
+
end
|
57
97
|
end
|
58
98
|
|
59
|
-
|
99
|
+
data
|
60
100
|
end
|
61
101
|
|
62
|
-
|
63
|
-
#
|
64
|
-
#
|
102
|
+
|
103
|
+
# Need to reset scanners after every full run, so this provides a front
|
104
|
+
# for r_scan, which resets +scanners+ and still returns the correct value.
|
65
105
|
#
|
66
|
-
# @
|
67
|
-
#
|
106
|
+
# @see #r_scan
|
107
|
+
# @return [ScanArray]
|
108
|
+
def scan
|
109
|
+
r = self.r_scan(@pattern)
|
110
|
+
@scanners =[]
|
111
|
+
r
|
112
|
+
end
|
113
|
+
|
114
|
+
# Creates a new StringScanner, then scans for blocks, optionals or text
|
115
|
+
# and adds the result to +parsed+ until it reaches the end of +str+.
|
68
116
|
#
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
}.flatten!
|
80
|
-
|
81
|
-
parts.collect! {|i|
|
82
|
-
i.gsub!(/[^a-zA-Z0-9_-]/, '') unless i.nil?
|
83
|
-
}
|
84
|
-
|
85
|
-
parts.delete_if {|i| i == ''}
|
86
|
-
|
87
|
-
return parts
|
88
|
-
else
|
89
|
-
parts = []
|
90
|
-
@pattern.gsub(/\{([a-z-]+)\}/) do
|
91
|
-
parts << $1
|
92
|
-
end
|
93
|
-
return parts
|
117
|
+
# @param [String] str the string to scan through
|
118
|
+
# @return [ScanArray]
|
119
|
+
def r_scan(str)
|
120
|
+
parsed = ScanArray.new
|
121
|
+
|
122
|
+
@depth += 1
|
123
|
+
@scanners[@depth] = StringScanner.new(str)
|
124
|
+
until self.scanner.eos?
|
125
|
+
a = scan_blocks || a = scan_optionals || a = scan_text
|
126
|
+
parsed << a
|
94
127
|
end
|
128
|
+
@depth -= 1
|
129
|
+
|
130
|
+
parsed
|
95
131
|
end
|
96
132
|
|
97
|
-
#
|
98
|
-
# +data+ using +order+ to match the data up with the correct name.
|
133
|
+
# Finds next {...} in the StringScanner, and checks that it is closed.
|
99
134
|
#
|
100
|
-
# @return [
|
101
|
-
# the
|
135
|
+
# @return [Array]
|
136
|
+
# an array of the form [:block, ...]
|
137
|
+
def scan_blocks
|
138
|
+
return unless self.scanner.scan(/\{/)
|
139
|
+
content = scan_until(:block)
|
140
|
+
|
141
|
+
raise ParseError unless self.scanner.scan(/\}/) # no closing block
|
142
|
+
raise NoPartialError unless @partials[content]
|
143
|
+
|
144
|
+
[:block, content]
|
145
|
+
end
|
146
|
+
|
147
|
+
# Finds next <...> in the StringScanner, and checks that it is closed.
|
148
|
+
# Then scans the contents of the optional block.
|
102
149
|
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
150
|
+
# @return [Array]
|
151
|
+
# an array of the form [:optional, [...]]
|
152
|
+
def scan_optionals
|
153
|
+
return unless self.scanner.scan(/</)
|
154
|
+
content = scan_until(:optional)
|
155
|
+
|
156
|
+
raise ParseError unless self.scanner.scan(/>/) # no closing block
|
157
|
+
|
158
|
+
[:optional, r_scan(content)]
|
110
159
|
end
|
111
160
|
|
112
|
-
#
|
113
|
-
# in just one go!
|
161
|
+
# Finds plain text, and checks whether there are any blocks left.
|
114
162
|
#
|
115
|
-
# @
|
116
|
-
#
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
163
|
+
# @return [Array]
|
164
|
+
# text before next block, or rest of text in the form [:text, ...]
|
165
|
+
def scan_text
|
166
|
+
text = scan_until(:open)
|
167
|
+
|
168
|
+
if text.nil?
|
169
|
+
text = self.scanner.rest
|
170
|
+
self.scanner.clear
|
171
|
+
end
|
172
|
+
|
173
|
+
[:text, text]
|
174
|
+
end
|
175
|
+
|
176
|
+
# Scans the string until a tag is found of the type given.
|
121
177
|
#
|
122
|
-
# @
|
123
|
-
#
|
178
|
+
# @param [Symbol] type of tag to look for.
|
179
|
+
# +:block+ for a closing block tag (+}+),
|
180
|
+
# +:optional+ for a closing optional tag (+>+),
|
181
|
+
# +:open+ for an opening tag (+{+ or +<+).
|
182
|
+
# @return [String, nil]
|
183
|
+
# the text before the tag, or nil if no match found
|
184
|
+
def scan_until(type)
|
185
|
+
case type
|
186
|
+
when :block
|
187
|
+
regex = /\}/
|
188
|
+
when :optional
|
189
|
+
regex = />/
|
190
|
+
when :open
|
191
|
+
regex = /(\{|<)/
|
192
|
+
end
|
193
|
+
pos = self.scanner.pos
|
194
|
+
if self.scanner.scan_until(regex)
|
195
|
+
self.scanner.pos -= self.scanner.matched.size
|
196
|
+
self.scanner.pre_match[pos..-1]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# Puts the regexps in the correct place, but returns a string so it can
|
201
|
+
# still work recursively
|
124
202
|
#
|
125
|
-
|
126
|
-
|
127
|
-
|
203
|
+
# @param [ScanArray] pat the pattern to turn into a regular expression
|
204
|
+
# @return [String] the regular expression as a string
|
205
|
+
def r_place(pat)
|
206
|
+
str = ''
|
207
|
+
pat.each_with_type do |t, c|
|
208
|
+
case t
|
209
|
+
when :block
|
210
|
+
str << @partials[c]
|
211
|
+
when :text
|
212
|
+
str << c
|
213
|
+
when :optional
|
214
|
+
str << "(#{r_place(c)})?"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
str
|
128
219
|
end
|
129
220
|
|
221
|
+
# ScanArray is an array of tokens created when scanning the pattern.
|
222
|
+
# It looks like this:
|
223
|
+
# [[:block, 'what-'], [:optional, [[:text, "hi-"]]], [:text, "oh"]]
|
224
|
+
#
|
225
|
+
class ScanArray < Array
|
226
|
+
|
227
|
+
# @see #flatten
|
228
|
+
def flatten!
|
229
|
+
self.replace(self.flatten)
|
230
|
+
end
|
231
|
+
|
232
|
+
# Removes all :text nodes from +pat+ and puts :optional nodes contents' into the
|
233
|
+
# main array, and puts a nil in place
|
234
|
+
#
|
235
|
+
# @return [Array]
|
236
|
+
#
|
237
|
+
# @example
|
238
|
+
#
|
239
|
+
# sa = ScanArray.new([[:text, 'hey-'],
|
240
|
+
# [:optional,
|
241
|
+
# [[:block, '([a-z]+)'],
|
242
|
+
# [:text, '-what']]
|
243
|
+
# ]])
|
244
|
+
#
|
245
|
+
# sa.flatten
|
246
|
+
# #=> [[:optional, nil], [:block, "([a-z]+)"]]
|
247
|
+
#
|
248
|
+
def flatten
|
249
|
+
# Flatten the array with Array#flatten before starting
|
250
|
+
flat = super
|
251
|
+
|
252
|
+
indexes = []
|
253
|
+
flat.each_with_index do |v, i|
|
254
|
+
if v == :optional
|
255
|
+
indexes << i
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Need to start from the back so as not to alter the indexes of the
|
260
|
+
# other items when inserting
|
261
|
+
indexes.reverse.each do |i|
|
262
|
+
flat.insert(i+1, nil)
|
263
|
+
end
|
264
|
+
|
265
|
+
flat.reverse!
|
266
|
+
r = ScanArray.new
|
267
|
+
while flat.size > 0
|
268
|
+
r << [flat.pop, flat.pop]
|
269
|
+
end
|
270
|
+
|
271
|
+
r.delete_if {|i| i[0] == :text}
|
272
|
+
r
|
273
|
+
end
|
274
|
+
|
275
|
+
# Loops through the types and contents of each tag separately, passing them
|
276
|
+
# to the block given.
|
277
|
+
#
|
278
|
+
# @return [StringScanner] returns self
|
279
|
+
# @yield [Symbol, Object] gives the type and content of each block in turn
|
280
|
+
#
|
281
|
+
# @example
|
282
|
+
#
|
283
|
+
# sa = ScanArray.new([[:text, 'hey-'],
|
284
|
+
# [:optional,
|
285
|
+
# [[:block, '([a-z]+)'],
|
286
|
+
# [:text, '-what']]
|
287
|
+
# ]])
|
288
|
+
#
|
289
|
+
# sa.each_with_type do |type, content|
|
290
|
+
# puts "#{type} -> #{content}"
|
291
|
+
# end
|
292
|
+
# #=> text -> hey-
|
293
|
+
# #=> optional -> [[:block, "([a-z]+)"], [:text, "-what"]]
|
294
|
+
#
|
295
|
+
def each_with_type(&blck)
|
296
|
+
ts = self.collect {|i| i[0]}
|
297
|
+
cs = self.collect {|i| i[1]}
|
298
|
+
(0...ts.size).each do |i|
|
299
|
+
yield(ts[i], cs[i])
|
300
|
+
end
|
301
|
+
self
|
302
|
+
end
|
303
|
+
|
304
|
+
# @see #each_with_type
|
305
|
+
# @yield [Symbol, Object Integer] gives the type, content and index of each block in turn
|
306
|
+
def each_with_type_indexed(&blck)
|
307
|
+
ts = self.collect {|i| i[0]}
|
308
|
+
cs = self.collect {|i| i[1]}
|
309
|
+
(0...ts.size).each do |i|
|
310
|
+
yield(ts[i], cs[i], i)
|
311
|
+
end
|
312
|
+
self
|
313
|
+
end
|
314
|
+
|
315
|
+
end
|
130
316
|
end
|
data/parsey.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{parsey}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Joshua Hawxwell"]
|
12
|
-
s.date = %q{2010-07-
|
12
|
+
s.date = %q{2010-07-22}
|
13
13
|
s.description = %q{Parsey matches a string with a pattern to retrieve data from it.}
|
14
14
|
s.email = %q{m@hawx.me}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/test_parsey.rb
CHANGED
@@ -14,16 +14,18 @@ class TestParsey < Test::Unit::TestCase
|
|
14
14
|
assert_equal Regexp.new("(f)?\/n.e"), t.regex
|
15
15
|
end
|
16
16
|
|
17
|
-
should "
|
17
|
+
should "scan correctly" do
|
18
18
|
partials = {'folder' => 'f', 'name' => 'n', 'ext' => 'e'}
|
19
19
|
t = Parsey.new('', '<{folder}/>{name}.{ext}', partials)
|
20
|
-
|
20
|
+
r = [[ :optional, [[:block, "folder"], [:text, "/"]] ], [:block, "name"], [:text, "."], [:block, "ext"]]
|
21
|
+
assert_equal r, t.scan
|
21
22
|
end
|
22
23
|
|
23
24
|
should "create correct order when optional is in the middle" do
|
24
25
|
partials = {'folder' => 'folder', 'name' => 'name', 'ext' => 'ext'}
|
25
26
|
t = Parsey.new('', '{folder}/<{name}>.{ext}', partials)
|
26
|
-
|
27
|
+
r = [[:block, "folder"], [:text, "/"], [:optional, [[:block, "name"]]], [:text, "."], [:block, "ext"]]
|
28
|
+
assert_equal r, t.scan
|
27
29
|
end
|
28
30
|
|
29
31
|
should "parse properly" do
|
@@ -33,4 +35,55 @@ class TestParsey < Test::Unit::TestCase
|
|
33
35
|
assert_equal hash, t.parse
|
34
36
|
end
|
35
37
|
|
38
|
+
should "parse long patterns properly" do
|
39
|
+
partials = {'word' => '([a-z]+)',
|
40
|
+
'number' => '([0-9]+)',
|
41
|
+
'date' => '(\d{4}-\d{2}-\d{2})',
|
42
|
+
'time' => '(\d{2}:\d{2})',
|
43
|
+
'person' => '(John|Dave|Luke|Josh)'}
|
44
|
+
|
45
|
+
pattern = 'Hello my name is {person}, I was born on {date} at {time}. I am {number} years old, and my favourite animal is a {word}.'
|
46
|
+
string = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
|
47
|
+
|
48
|
+
hash = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
|
49
|
+
assert_equal hash, Parsey.parse(string, pattern, partials)
|
50
|
+
end
|
51
|
+
|
52
|
+
should "parse multiple optionals correctly" do
|
53
|
+
partials = {'word' => '([a-z]+)',
|
54
|
+
'number' => '([0-9]+)',
|
55
|
+
'date' => '(\d{4}-\d{2}-\d{2})',
|
56
|
+
'time' => '(\d{2}:\d{2})',
|
57
|
+
'person' => '(John|Dave|Luke|Josh)'}
|
58
|
+
pattern = 'Hello my name is {person}, I was born on {date}< at {time}>. I am {number} years old<, and my favourite animal is a {word}>.'
|
59
|
+
string1 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
|
60
|
+
hash1 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
|
61
|
+
|
62
|
+
string2 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old.'
|
63
|
+
hash2 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17'}
|
64
|
+
|
65
|
+
string3 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old, and my favourite animal is a shark.'
|
66
|
+
hash3 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17', 'word' => 'shark'}
|
67
|
+
|
68
|
+
string4 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old.'
|
69
|
+
hash4 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17'}
|
70
|
+
|
71
|
+
assert_equal hash1, Parsey.parse(string1, pattern, partials)
|
72
|
+
assert_equal hash2, Parsey.parse(string2, pattern, partials)
|
73
|
+
assert_equal hash3, Parsey.parse(string3, pattern, partials)
|
74
|
+
assert_equal hash4, Parsey.parse(string4, pattern, partials)
|
75
|
+
end
|
76
|
+
|
77
|
+
should "raise an error when blocks not closed" do
|
78
|
+
assert_raise Parsey::ParseError do
|
79
|
+
Parsey.parse('what', '{question', {'question' => '([a-z ]+\?)'})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
should "raise an error when optional not closed" do
|
84
|
+
assert_raise Parsey::ParseError do
|
85
|
+
Parsey.parse('hmm', '<{sound}', {'sound' => '(hmm|boo)'})
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
36
89
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parsey
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Joshua Hawxwell
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-07-
|
18
|
+
date: 2010-07-22 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|