parsey 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.markdown +3 -7
- data/VERSION +1 -1
- data/lib/parsey.rb +258 -72
- data/parsey.gemspec +2 -2
- data/test/test_parsey.rb +56 -3
- metadata +5 -5
data/README.markdown
CHANGED
@@ -1,12 +1,8 @@
|
|
1
1
|
# parsey
|
2
2
|
|
3
|
-
Parsey is a
|
4
|
-
It takes a string, a pattern, and a hash of regexes. The pattern is filled with the regexes
|
5
|
-
and then that is matched to the string given.
|
3
|
+
Parsey is a simple class to match a string with a pattern and retrieve data from it. It takes a string, a pattern, and a hash of regular expressions (as strings). The pattern is filled with the regular expressions and then that is matched to the string given.
|
6
4
|
|
7
|
-
The pattern uses {} to surround the name of the regex it should be replaced with. You can
|
8
|
-
also use <> to surround parts of the pattern that are optional, though these obviously
|
9
|
-
must be nested properly.
|
5
|
+
The pattern uses {} to surround the name of the regex it should be replaced with. You can also use <> to surround parts of the pattern that are optional, though these obviously must be nested properly.
|
10
6
|
|
11
7
|
## Install
|
12
8
|
|
@@ -22,7 +18,7 @@ must be nested properly.
|
|
22
18
|
#=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
|
23
19
|
|
24
20
|
Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
|
25
|
-
#=> {"
|
21
|
+
#=> {"file-name"=>"my file", "ext"=>"txt"}
|
26
22
|
|
27
23
|
## Copyright
|
28
24
|
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
data/lib/parsey.rb
CHANGED
@@ -1,6 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
#
|
1
|
+
require 'strscan'
|
2
|
+
|
3
|
+
# Parsey is a simple class to match a string with a pattern and retrieve data from it. It
|
4
|
+
# takes a string, a pattern, and a hash of regular expressions. The pattern is filled with the
|
5
|
+
# regular expressiobs and then that is matched to the string given.
|
4
6
|
#
|
5
7
|
# The pattern uses {} to surround the name of the regex it should be replaced with. You can
|
6
8
|
# also use <> to surround parts of the pattern that are optional, though these obviously
|
@@ -16,11 +18,18 @@
|
|
16
18
|
# #=> {"folder"=>"my-folder", "file-name"=>"my file", "ext"=>"txt"}
|
17
19
|
#
|
18
20
|
# Parsey.parse('my file.txt', '<{folder}/>{file-name}.{ext}', partials)
|
19
|
-
# #=> {"
|
21
|
+
# #=> {"file-name"=>"my file", "ext"=>"txt"}
|
20
22
|
#
|
21
23
|
class Parsey
|
22
24
|
|
23
|
-
|
25
|
+
class ParseError < StandardError; end
|
26
|
+
|
27
|
+
attr_accessor :to_parse, :pattern, :partials, :scanners
|
28
|
+
|
29
|
+
# Depth keeps track of how many levels the optional blocks go down, so that the scanner
|
30
|
+
# to use can be properly tracked. Each level of recursion needs a new scanner object
|
31
|
+
# to refer to or it will just clear the text that was stored.
|
32
|
+
attr_accessor :depth
|
24
33
|
|
25
34
|
# Creates a new Parsey instance.
|
26
35
|
#
|
@@ -36,95 +45,272 @@ class Parsey
|
|
36
45
|
@pattern = pattern
|
37
46
|
@partials = partials
|
38
47
|
|
39
|
-
@
|
48
|
+
@scanners = []
|
49
|
+
@depth = -1
|
40
50
|
end
|
41
51
|
|
42
|
-
#
|
43
|
-
#
|
44
|
-
#
|
52
|
+
# This is a convenience method to allow you to easily parse something
|
53
|
+
# in just one line
|
54
|
+
#
|
55
|
+
# @param [String] to_parse
|
56
|
+
# the string which is to be parsed
|
57
|
+
# @param [String] pattern
|
58
|
+
# for the string to match
|
59
|
+
# @param [Hash{String => String}] partials
|
60
|
+
# the regex patterns (as strings) to use when matching
|
61
|
+
#
|
62
|
+
# @return [Hash{String => String}]
|
63
|
+
# the data retrieved from +to_parse+
|
45
64
|
#
|
46
|
-
|
47
|
-
|
65
|
+
def self.parse(to_parse, pattern, partials)
|
66
|
+
a = Parsey.new(to_parse, pattern, partials)
|
67
|
+
a.parse
|
68
|
+
end
|
69
|
+
|
70
|
+
# This is a front for r_place so that a regex is returned as expected
|
48
71
|
#
|
72
|
+
# @param [Array] pat the pattern to turn into a regular expression
|
73
|
+
# @return [Regexp] the regex that will be used for parsing
|
74
|
+
# @see r_place
|
49
75
|
def regex
|
50
|
-
|
51
|
-
|
52
|
-
|
76
|
+
Regexp.new(r_place(scan))
|
77
|
+
end
|
78
|
+
|
79
|
+
# @return [StringScanner] the current scanner to use
|
80
|
+
def scanner
|
81
|
+
@scanners[@depth]
|
82
|
+
end
|
83
|
+
|
84
|
+
# Finds matches from +to_parse+ using #regex. Then uses this data
|
85
|
+
# and the pattern created with #scan to match the data with names.
|
86
|
+
#
|
87
|
+
# @return [Hash{String => String}]
|
88
|
+
# the data taken fron +to_parse+
|
89
|
+
def parse
|
90
|
+
match = @to_parse.match(self.regex).captures
|
91
|
+
data = {}
|
53
92
|
|
54
|
-
|
55
|
-
|
56
|
-
|
93
|
+
self.scan.flatten.each_with_type_indexed do |t, c, i|
|
94
|
+
if (t == :block) && (match[i] != nil)
|
95
|
+
data[c] = match[i]
|
96
|
+
end
|
57
97
|
end
|
58
98
|
|
59
|
-
|
99
|
+
data
|
60
100
|
end
|
61
101
|
|
62
|
-
|
63
|
-
#
|
64
|
-
#
|
102
|
+
|
103
|
+
# Need to reset scanners after every full run, so this provides a front
|
104
|
+
# for r_scan, which resets +scanners+ and still returns the correct value.
|
65
105
|
#
|
66
|
-
# @
|
67
|
-
#
|
106
|
+
# @see #r_scan
|
107
|
+
# @return [ScanArray]
|
108
|
+
def scan
|
109
|
+
r = self.r_scan(@pattern)
|
110
|
+
@scanners =[]
|
111
|
+
r
|
112
|
+
end
|
113
|
+
|
114
|
+
# Creates a new StringScanner, then scans for blocks, optionals or text
|
115
|
+
# and adds the result to +parsed+ until it reaches the end of +str+.
|
68
116
|
#
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
}.flatten!
|
80
|
-
|
81
|
-
parts.collect! {|i|
|
82
|
-
i.gsub!(/[^a-zA-Z0-9_-]/, '') unless i.nil?
|
83
|
-
}
|
84
|
-
|
85
|
-
parts.delete_if {|i| i == ''}
|
86
|
-
|
87
|
-
return parts
|
88
|
-
else
|
89
|
-
parts = []
|
90
|
-
@pattern.gsub(/\{([a-z-]+)\}/) do
|
91
|
-
parts << $1
|
92
|
-
end
|
93
|
-
return parts
|
117
|
+
# @param [String] str the string to scan through
|
118
|
+
# @return [ScanArray]
|
119
|
+
def r_scan(str)
|
120
|
+
parsed = ScanArray.new
|
121
|
+
|
122
|
+
@depth += 1
|
123
|
+
@scanners[@depth] = StringScanner.new(str)
|
124
|
+
until self.scanner.eos?
|
125
|
+
a = scan_blocks || a = scan_optionals || a = scan_text
|
126
|
+
parsed << a
|
94
127
|
end
|
128
|
+
@depth -= 1
|
129
|
+
|
130
|
+
parsed
|
95
131
|
end
|
96
132
|
|
97
|
-
#
|
98
|
-
# +data+ using +order+ to match the data up with the correct name.
|
133
|
+
# Finds next {...} in the StringScanner, and checks that it is closed.
|
99
134
|
#
|
100
|
-
# @return [
|
101
|
-
# the
|
135
|
+
# @return [Array]
|
136
|
+
# an array of the form [:block, ...]
|
137
|
+
def scan_blocks
|
138
|
+
return unless self.scanner.scan(/\{/)
|
139
|
+
content = scan_until(:block)
|
140
|
+
|
141
|
+
raise ParseError unless self.scanner.scan(/\}/) # no closing block
|
142
|
+
raise NoPartialError unless @partials[content]
|
143
|
+
|
144
|
+
[:block, content]
|
145
|
+
end
|
146
|
+
|
147
|
+
# Finds next <...> in the StringScanner, and checks that it is closed.
|
148
|
+
# Then scans the contents of the optional block.
|
102
149
|
#
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
150
|
+
# @return [Array]
|
151
|
+
# an array of the form [:optional, [...]]
|
152
|
+
def scan_optionals
|
153
|
+
return unless self.scanner.scan(/</)
|
154
|
+
content = scan_until(:optional)
|
155
|
+
|
156
|
+
raise ParseError unless self.scanner.scan(/>/) # no closing block
|
157
|
+
|
158
|
+
[:optional, r_scan(content)]
|
110
159
|
end
|
111
160
|
|
112
|
-
#
|
113
|
-
# in just one go!
|
161
|
+
# Finds plain text, and checks whether there are any blocks left.
|
114
162
|
#
|
115
|
-
# @
|
116
|
-
#
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
163
|
+
# @return [Array]
|
164
|
+
# text before next block, or rest of text in the form [:text, ...]
|
165
|
+
def scan_text
|
166
|
+
text = scan_until(:open)
|
167
|
+
|
168
|
+
if text.nil?
|
169
|
+
text = self.scanner.rest
|
170
|
+
self.scanner.clear
|
171
|
+
end
|
172
|
+
|
173
|
+
[:text, text]
|
174
|
+
end
|
175
|
+
|
176
|
+
# Scans the string until a tag is found of the type given.
|
121
177
|
#
|
122
|
-
# @
|
123
|
-
#
|
178
|
+
# @param [Symbol] type of tag to look for.
|
179
|
+
# +:block+ for a closing block tag (+}+),
|
180
|
+
# +:optional+ for a closing optional tag (+>+),
|
181
|
+
# +:open+ for an opening tag (+{+ or +<+).
|
182
|
+
# @return [String, nil]
|
183
|
+
# the text before the tag, or nil if no match found
|
184
|
+
def scan_until(type)
|
185
|
+
case type
|
186
|
+
when :block
|
187
|
+
regex = /\}/
|
188
|
+
when :optional
|
189
|
+
regex = />/
|
190
|
+
when :open
|
191
|
+
regex = /(\{|<)/
|
192
|
+
end
|
193
|
+
pos = self.scanner.pos
|
194
|
+
if self.scanner.scan_until(regex)
|
195
|
+
self.scanner.pos -= self.scanner.matched.size
|
196
|
+
self.scanner.pre_match[pos..-1]
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
# Puts the regexps in the correct place, but returns a string so it can
|
201
|
+
# still work recursively
|
124
202
|
#
|
125
|
-
|
126
|
-
|
127
|
-
|
203
|
+
# @param [ScanArray] pat the pattern to turn into a regular expression
|
204
|
+
# @return [String] the regular expression as a string
|
205
|
+
def r_place(pat)
|
206
|
+
str = ''
|
207
|
+
pat.each_with_type do |t, c|
|
208
|
+
case t
|
209
|
+
when :block
|
210
|
+
str << @partials[c]
|
211
|
+
when :text
|
212
|
+
str << c
|
213
|
+
when :optional
|
214
|
+
str << "(#{r_place(c)})?"
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
str
|
128
219
|
end
|
129
220
|
|
221
|
+
# ScanArray is an array of tokens created when scanning the pattern.
|
222
|
+
# It looks like this:
|
223
|
+
# [[:block, 'what-'], [:optional, [[:text, "hi-"]]], [:text, "oh"]]
|
224
|
+
#
|
225
|
+
class ScanArray < Array
|
226
|
+
|
227
|
+
# @see #flatten
|
228
|
+
def flatten!
|
229
|
+
self.replace(self.flatten)
|
230
|
+
end
|
231
|
+
|
232
|
+
# Removes all :text nodes from +pat+ and puts :optional nodes contents' into the
|
233
|
+
# main array, and puts a nil in place
|
234
|
+
#
|
235
|
+
# @return [Array]
|
236
|
+
#
|
237
|
+
# @example
|
238
|
+
#
|
239
|
+
# sa = ScanArray.new([[:text, 'hey-'],
|
240
|
+
# [:optional,
|
241
|
+
# [[:block, '([a-z]+)'],
|
242
|
+
# [:text, '-what']]
|
243
|
+
# ]])
|
244
|
+
#
|
245
|
+
# sa.flatten
|
246
|
+
# #=> [[:optional, nil], [:block, "([a-z]+)"]]
|
247
|
+
#
|
248
|
+
def flatten
|
249
|
+
# Flatten the array with Array#flatten before starting
|
250
|
+
flat = super
|
251
|
+
|
252
|
+
indexes = []
|
253
|
+
flat.each_with_index do |v, i|
|
254
|
+
if v == :optional
|
255
|
+
indexes << i
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
# Need to start from the back so as not to alter the indexes of the
|
260
|
+
# other items when inserting
|
261
|
+
indexes.reverse.each do |i|
|
262
|
+
flat.insert(i+1, nil)
|
263
|
+
end
|
264
|
+
|
265
|
+
flat.reverse!
|
266
|
+
r = ScanArray.new
|
267
|
+
while flat.size > 0
|
268
|
+
r << [flat.pop, flat.pop]
|
269
|
+
end
|
270
|
+
|
271
|
+
r.delete_if {|i| i[0] == :text}
|
272
|
+
r
|
273
|
+
end
|
274
|
+
|
275
|
+
# Loops through the types and contents of each tag separately, passing them
|
276
|
+
# to the block given.
|
277
|
+
#
|
278
|
+
# @return [StringScanner] returns self
|
279
|
+
# @yield [Symbol, Object] gives the type and content of each block in turn
|
280
|
+
#
|
281
|
+
# @example
|
282
|
+
#
|
283
|
+
# sa = ScanArray.new([[:text, 'hey-'],
|
284
|
+
# [:optional,
|
285
|
+
# [[:block, '([a-z]+)'],
|
286
|
+
# [:text, '-what']]
|
287
|
+
# ]])
|
288
|
+
#
|
289
|
+
# sa.each_with_type do |type, content|
|
290
|
+
# puts "#{type} -> #{content}"
|
291
|
+
# end
|
292
|
+
# #=> text -> hey-
|
293
|
+
# #=> optional -> [[:block, "([a-z]+)"], [:text, "-what"]]
|
294
|
+
#
|
295
|
+
def each_with_type(&blck)
|
296
|
+
ts = self.collect {|i| i[0]}
|
297
|
+
cs = self.collect {|i| i[1]}
|
298
|
+
(0...ts.size).each do |i|
|
299
|
+
yield(ts[i], cs[i])
|
300
|
+
end
|
301
|
+
self
|
302
|
+
end
|
303
|
+
|
304
|
+
# @see #each_with_type
|
305
|
+
# @yield [Symbol, Object Integer] gives the type, content and index of each block in turn
|
306
|
+
def each_with_type_indexed(&blck)
|
307
|
+
ts = self.collect {|i| i[0]}
|
308
|
+
cs = self.collect {|i| i[1]}
|
309
|
+
(0...ts.size).each do |i|
|
310
|
+
yield(ts[i], cs[i], i)
|
311
|
+
end
|
312
|
+
self
|
313
|
+
end
|
314
|
+
|
315
|
+
end
|
130
316
|
end
|
data/parsey.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{parsey}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Joshua Hawxwell"]
|
12
|
-
s.date = %q{2010-07-
|
12
|
+
s.date = %q{2010-07-22}
|
13
13
|
s.description = %q{Parsey matches a string with a pattern to retrieve data from it.}
|
14
14
|
s.email = %q{m@hawx.me}
|
15
15
|
s.extra_rdoc_files = [
|
data/test/test_parsey.rb
CHANGED
@@ -14,16 +14,18 @@ class TestParsey < Test::Unit::TestCase
|
|
14
14
|
assert_equal Regexp.new("(f)?\/n.e"), t.regex
|
15
15
|
end
|
16
16
|
|
17
|
-
should "
|
17
|
+
should "scan correctly" do
|
18
18
|
partials = {'folder' => 'f', 'name' => 'n', 'ext' => 'e'}
|
19
19
|
t = Parsey.new('', '<{folder}/>{name}.{ext}', partials)
|
20
|
-
|
20
|
+
r = [[ :optional, [[:block, "folder"], [:text, "/"]] ], [:block, "name"], [:text, "."], [:block, "ext"]]
|
21
|
+
assert_equal r, t.scan
|
21
22
|
end
|
22
23
|
|
23
24
|
should "create correct order when optional is in the middle" do
|
24
25
|
partials = {'folder' => 'folder', 'name' => 'name', 'ext' => 'ext'}
|
25
26
|
t = Parsey.new('', '{folder}/<{name}>.{ext}', partials)
|
26
|
-
|
27
|
+
r = [[:block, "folder"], [:text, "/"], [:optional, [[:block, "name"]]], [:text, "."], [:block, "ext"]]
|
28
|
+
assert_equal r, t.scan
|
27
29
|
end
|
28
30
|
|
29
31
|
should "parse properly" do
|
@@ -33,4 +35,55 @@ class TestParsey < Test::Unit::TestCase
|
|
33
35
|
assert_equal hash, t.parse
|
34
36
|
end
|
35
37
|
|
38
|
+
should "parse long patterns properly" do
|
39
|
+
partials = {'word' => '([a-z]+)',
|
40
|
+
'number' => '([0-9]+)',
|
41
|
+
'date' => '(\d{4}-\d{2}-\d{2})',
|
42
|
+
'time' => '(\d{2}:\d{2})',
|
43
|
+
'person' => '(John|Dave|Luke|Josh)'}
|
44
|
+
|
45
|
+
pattern = 'Hello my name is {person}, I was born on {date} at {time}. I am {number} years old, and my favourite animal is a {word}.'
|
46
|
+
string = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
|
47
|
+
|
48
|
+
hash = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
|
49
|
+
assert_equal hash, Parsey.parse(string, pattern, partials)
|
50
|
+
end
|
51
|
+
|
52
|
+
should "parse multiple optionals correctly" do
|
53
|
+
partials = {'word' => '([a-z]+)',
|
54
|
+
'number' => '([0-9]+)',
|
55
|
+
'date' => '(\d{4}-\d{2}-\d{2})',
|
56
|
+
'time' => '(\d{2}:\d{2})',
|
57
|
+
'person' => '(John|Dave|Luke|Josh)'}
|
58
|
+
pattern = 'Hello my name is {person}, I was born on {date}< at {time}>. I am {number} years old<, and my favourite animal is a {word}>.'
|
59
|
+
string1 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old, and my favourite animal is a shark.'
|
60
|
+
hash1 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17', 'word' => 'shark'}
|
61
|
+
|
62
|
+
string2 = 'Hello my name is Josh, I was born on 1992-09-17 at 06:24. I am 17 years old.'
|
63
|
+
hash2 = {'person' => 'Josh', 'date' => '1992-09-17', 'time' => '06:24', 'number' => '17'}
|
64
|
+
|
65
|
+
string3 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old, and my favourite animal is a shark.'
|
66
|
+
hash3 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17', 'word' => 'shark'}
|
67
|
+
|
68
|
+
string4 = 'Hello my name is Josh, I was born on 1992-09-17. I am 17 years old.'
|
69
|
+
hash4 = {'person' => 'Josh', 'date' => '1992-09-17', 'number' => '17'}
|
70
|
+
|
71
|
+
assert_equal hash1, Parsey.parse(string1, pattern, partials)
|
72
|
+
assert_equal hash2, Parsey.parse(string2, pattern, partials)
|
73
|
+
assert_equal hash3, Parsey.parse(string3, pattern, partials)
|
74
|
+
assert_equal hash4, Parsey.parse(string4, pattern, partials)
|
75
|
+
end
|
76
|
+
|
77
|
+
should "raise an error when blocks not closed" do
|
78
|
+
assert_raise Parsey::ParseError do
|
79
|
+
Parsey.parse('what', '{question', {'question' => '([a-z ]+\?)'})
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
should "raise an error when optional not closed" do
|
84
|
+
assert_raise Parsey::ParseError do
|
85
|
+
Parsey.parse('hmm', '<{sound}', {'sound' => '(hmm|boo)'})
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
36
89
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: parsey
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Joshua Hawxwell
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-07-
|
18
|
+
date: 2010-07-22 00:00:00 +01:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|