regexp-examples 0.5.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/regexp-examples/chargroup_parser.rb +70 -0
- data/lib/regexp-examples/constants.rb +33 -12
- data/lib/regexp-examples/groups.rb +2 -58
- data/lib/regexp-examples/parser.rb +7 -3
- data/lib/regexp-examples/version.rb +1 -1
- data/spec/regexp-examples_spec.rb +32 -7
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 062a1310c8b7c861a7724fd75745c1e9bff9257f
|
4
|
+
data.tar.gz: b05ce36dbb3c0afee079091d5c1016a429f1d099
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5519ec6e710a257c165b35f6b4138bc03d37973c31fcb4a7c6091713ab347438dd8e8bbde722c5ab21f63d6ede0565be2b77de0747d42f9f1e83413312b71d14
|
7
|
+
data.tar.gz: 58d67c5e25de2dbd238cb53a192c784047b8ab7e8a836d87c47c9cf5316133d3383a8fdec0a7c9fe07eec0518266b3c66a66c8953e830277b41e1bf9ca53e525
|
data/README.md
CHANGED
@@ -43,6 +43,7 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
43
43
|
* Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
|
44
44
|
* Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
|
45
45
|
* Octal characters, e.g. `/\10/`, `/\177/`
|
46
|
+
* POSIX bracket expressions (including negation), e.g. `/[[:alnum:]]/`, `/[[:^space:]]/`
|
46
47
|
* **Arbitrarily complex combinations of all the above!**
|
47
48
|
|
48
49
|
* Regexp options can also be used:
|
@@ -54,14 +55,13 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
54
55
|
## Bugs and Not-Yet-Supported syntax
|
55
56
|
|
56
57
|
* Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
|
57
|
-
* `/[[abc]]/.examples` (which _should_ return `["a", "b", "c"]`)
|
58
|
+
* `/[[abc]de]/.examples` (which _should_ return `["a", "b", "c", "d", "e"]`)
|
58
59
|
* `/[[a-d]&&[c-f]]/.examples` (which _should_ return: `["c", "d"]`)
|
59
60
|
|
60
61
|
* Conditional capture groups, such as `/(group1) (?(1)yes|no)`
|
61
62
|
|
62
63
|
Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
|
63
64
|
|
64
|
-
* POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
|
65
65
|
* Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
|
66
66
|
* Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)
|
67
67
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# Given an array of chars from inside a character set,
|
3
|
+
# Interprets all backslashes, ranges and negations
|
4
|
+
# TODO: This needs a bit of a rewrite because:
|
5
|
+
# A) It's ugly
|
6
|
+
# B) It doesn't take into account nested character groups, or set intersection
|
7
|
+
# To achieve this, the algorithm needs to be recursive, like the main Parser.
|
8
|
+
class ChargroupParser
|
9
|
+
def initialize(chars)
|
10
|
+
@chars = chars
|
11
|
+
if @chars[0] == "^"
|
12
|
+
@negative = true
|
13
|
+
@chars = @chars[1..-1]
|
14
|
+
else
|
15
|
+
@negative = false
|
16
|
+
end
|
17
|
+
|
18
|
+
init_backslash_chars
|
19
|
+
init_ranges
|
20
|
+
end
|
21
|
+
|
22
|
+
def result
|
23
|
+
@negative ? (CharSets::Any - @chars) : @chars
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def init_backslash_chars
|
28
|
+
@chars.each_with_index do |char, i|
|
29
|
+
if char == "\\"
|
30
|
+
if BackslashCharMap.keys.include?(@chars[i+1])
|
31
|
+
@chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
|
32
|
+
elsif @chars[i+1] == 'b'
|
33
|
+
@chars[i..i+1] = "\b"
|
34
|
+
elsif @chars[i+1] == "\\"
|
35
|
+
@chars.delete_at(i+1)
|
36
|
+
else
|
37
|
+
@chars.delete_at(i)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def init_ranges
|
44
|
+
# remove hyphen ("-") from front/back, if present
|
45
|
+
hyphen = nil
|
46
|
+
hyphen = @chars.shift if @chars.first == "-"
|
47
|
+
hyphen ||= @chars.pop if @chars.last == "-"
|
48
|
+
# Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
|
49
|
+
while i = @chars.index("-")
|
50
|
+
# Prevent infinite loops from expanding [",", "-", "."] to itself
|
51
|
+
# (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
|
52
|
+
if (@chars[i-1] == ',' && @chars[i+1] == '.')
|
53
|
+
hyphen = @chars.delete_at(i)
|
54
|
+
else
|
55
|
+
@chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
|
56
|
+
end
|
57
|
+
end
|
58
|
+
# restore hyphen, if stripped out earlier
|
59
|
+
@chars.unshift(hyphen) if hyphen
|
60
|
+
end
|
61
|
+
|
62
|
+
def move_backslash_to_front(chars)
|
63
|
+
if index = chars.index { |char| char == '\\' }
|
64
|
+
chars.unshift chars.delete_at(index)
|
65
|
+
end
|
66
|
+
chars
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
@@ -32,21 +32,25 @@ module RegexpExamples
|
|
32
32
|
end
|
33
33
|
|
34
34
|
module CharSets
|
35
|
-
Lower
|
36
|
-
Upper
|
37
|
-
Digit
|
38
|
-
Punct
|
39
|
-
Hex
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
Lower = Array('a'..'z')
|
36
|
+
Upper = Array('A'..'Z')
|
37
|
+
Digit = Array('0'..'9')
|
38
|
+
Punct = %w(! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { })
|
39
|
+
Hex = Array('a'..'f') | Array('A'..'F') | Digit
|
40
|
+
Word = Lower | Upper | Digit | ['_']
|
41
|
+
Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
|
42
|
+
Control = (0..31).map(&:chr) | ["\x7f"]
|
43
|
+
# Ensure that the "common" characters appear first in the array
|
44
|
+
Any = Lower | Upper | Digit | Punct | (0..127).map(&:chr)
|
45
|
+
AnyNoNewLine = Any - ["\n"]
|
46
|
+
end.freeze
|
43
47
|
|
44
48
|
# Map of special regex characters, to their associated character sets
|
45
49
|
BackslashCharMap = {
|
46
50
|
'd' => CharSets::Digit,
|
47
|
-
'D' => CharSets::
|
48
|
-
'w' => CharSets::
|
49
|
-
'W' => CharSets::
|
51
|
+
'D' => CharSets::Any - CharSets::Digit,
|
52
|
+
'w' => CharSets::Word,
|
53
|
+
'W' => CharSets::Any - CharSets::Word,
|
50
54
|
's' => CharSets::Whitespace,
|
51
55
|
'S' => CharSets::Any - CharSets::Whitespace,
|
52
56
|
'h' => CharSets::Hex,
|
@@ -59,6 +63,23 @@ module RegexpExamples
|
|
59
63
|
'a' => ["\a"], # alarm
|
60
64
|
'v' => ["\v"], # vertical tab
|
61
65
|
'e' => ["\e"], # escape
|
62
|
-
}
|
66
|
+
}.freeze
|
67
|
+
|
68
|
+
POSIXCharMap = {
|
69
|
+
'alnum' => CharSets::Upper | CharSets::Lower | CharSets::Digit,
|
70
|
+
'alpha' => CharSets::Upper | CharSets::Lower,
|
71
|
+
'blank' => [" ", "\t"],
|
72
|
+
'cntrl' => CharSets::Control,
|
73
|
+
'digit' => CharSets::Digit,
|
74
|
+
'graph' => (CharSets::Any - CharSets::Control) - [" "], # Visible chars
|
75
|
+
'lower' => CharSets::Lower,
|
76
|
+
'print' => CharSets::Any - CharSets::Control,
|
77
|
+
'punct' => CharSets::Punct,
|
78
|
+
'space' => CharSets::Whitespace,
|
79
|
+
'upper' => CharSets::Upper,
|
80
|
+
'xdigit' => CharSets::Hex,
|
81
|
+
'word' => CharSets::Word,
|
82
|
+
'ascii' => CharSets::Any
|
83
|
+
}.freeze
|
63
84
|
end
|
64
85
|
|
@@ -63,69 +63,14 @@ module RegexpExamples
|
|
63
63
|
def initialize(chars, ignorecase)
|
64
64
|
@chars = chars
|
65
65
|
@ignorecase = ignorecase
|
66
|
-
if chars[0] == "^"
|
67
|
-
@negative = true
|
68
|
-
@chars = @chars[1..-1]
|
69
|
-
else
|
70
|
-
@negative = false
|
71
|
-
end
|
72
|
-
|
73
|
-
init_backslash_chars
|
74
|
-
init_ranges
|
75
|
-
end
|
76
|
-
|
77
|
-
def init_ranges
|
78
|
-
# save first and last "-" if present
|
79
|
-
|
80
|
-
first = nil
|
81
|
-
last = nil
|
82
|
-
first = @chars.shift if @chars.first == "-"
|
83
|
-
last = @chars.pop if @chars.last == "-"
|
84
|
-
# Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
|
85
|
-
while i = @chars.index("-")
|
86
|
-
# Prevent infinite loops from expanding [",", "-", "."] to itself
|
87
|
-
# (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
|
88
|
-
if (@chars[i-1] == ',' && @chars[i+1] == '.')
|
89
|
-
first = '-'
|
90
|
-
@chars.delete_at(i)
|
91
|
-
else
|
92
|
-
@chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
|
93
|
-
end
|
94
|
-
end
|
95
|
-
# restore them back
|
96
|
-
@chars.unshift(first) if first
|
97
|
-
@chars.push(last) if last
|
98
|
-
end
|
99
|
-
|
100
|
-
def init_backslash_chars
|
101
|
-
@chars.each_with_index do |char, i|
|
102
|
-
if char == "\\"
|
103
|
-
if BackslashCharMap.keys.include?(@chars[i+1])
|
104
|
-
@chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
|
105
|
-
elsif @chars[i+1] == 'b'
|
106
|
-
@chars[i..i+1] = "\b"
|
107
|
-
elsif @chars[i+1] == "\\"
|
108
|
-
@chars.delete_at(i+1)
|
109
|
-
else
|
110
|
-
@chars.delete_at(i)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
66
|
end
|
115
67
|
|
116
68
|
def result
|
117
|
-
|
69
|
+
@chars.map do |result|
|
118
70
|
GroupResult.new(result)
|
119
71
|
end
|
120
72
|
end
|
121
73
|
|
122
|
-
private
|
123
|
-
def move_backslash_to_front(chars)
|
124
|
-
if index = chars.index { |char| char == '\\' }
|
125
|
-
chars.unshift chars.delete_at(index)
|
126
|
-
end
|
127
|
-
chars
|
128
|
-
end
|
129
74
|
end
|
130
75
|
|
131
76
|
class DotGroup
|
@@ -135,8 +80,7 @@ module RegexpExamples
|
|
135
80
|
end
|
136
81
|
|
137
82
|
def result
|
138
|
-
chars = CharSets::Any
|
139
|
-
chars = (["\n"] | chars) if multiline
|
83
|
+
chars = multiline ? CharSets::Any : CharSets::AnyNoNewLine
|
140
84
|
chars.map do |result|
|
141
85
|
GroupResult.new(result)
|
142
86
|
end
|
@@ -218,8 +218,11 @@ module RegexpExamples
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def parse_char_group
|
221
|
-
|
222
|
-
|
221
|
+
# TODO: Extract all this logic into ChargroupParser
|
222
|
+
if rest_of_string =~ /\A\[\[:(\^?)([^:]+):\]\]/
|
223
|
+
@current_position += (6 + $1.length + $2.length)
|
224
|
+
chars = $1.empty? ? POSIXCharMap[$2] : CharSets::Any - POSIXCharMap[$2]
|
225
|
+
return CharGroup.new(chars, @ignorecase)
|
223
226
|
end
|
224
227
|
chars = []
|
225
228
|
@current_position += 1
|
@@ -238,7 +241,8 @@ module RegexpExamples
|
|
238
241
|
chars << next_char
|
239
242
|
@current_position += 1
|
240
243
|
end
|
241
|
-
|
244
|
+
parsed_chars = ChargroupParser.new(chars).result
|
245
|
+
CharGroup.new(parsed_chars, @ignorecase)
|
242
246
|
end
|
243
247
|
|
244
248
|
def parse_dot_group
|
@@ -2,10 +2,16 @@ RSpec.describe Regexp, "#examples" do
|
|
2
2
|
def self.examples_exist_and_match(*regexps)
|
3
3
|
regexps.each do |regexp|
|
4
4
|
it do
|
5
|
-
|
6
|
-
|
5
|
+
begin
|
6
|
+
regexp_examples = regexp.examples(max_group_results: 999)
|
7
|
+
rescue
|
8
|
+
# TODO: Find a nicer way to display this?
|
9
|
+
puts "Error generating examples for /#{regexp.source}/"
|
10
|
+
raise $!
|
11
|
+
end
|
12
|
+
expect(regexp_examples).not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
|
7
13
|
regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
|
8
|
-
# Note: /\A...\z/ is used
|
14
|
+
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
9
15
|
# For example, we don't want things like:
|
10
16
|
# /a*/.examples to include "xyz"
|
11
17
|
# /a|b/.examples to include "bad"
|
@@ -32,7 +38,7 @@ RSpec.describe Regexp, "#examples" do
|
|
32
38
|
def self.examples_are_empty(*regexps)
|
33
39
|
regexps.each do |regexp|
|
34
40
|
it do
|
35
|
-
expect(regexp.examples).to be_empty
|
41
|
+
expect(regexp.examples).to be_empty, "Unexpected examples for regexp: /#{regexp.source}/"
|
36
42
|
end
|
37
43
|
end
|
38
44
|
end
|
@@ -181,8 +187,7 @@ RSpec.describe Regexp, "#examples" do
|
|
181
187
|
/\p{L}/,
|
182
188
|
/\p{Arabic}/,
|
183
189
|
/\p{^Ll}/,
|
184
|
-
/(?<name> ... \g<name>*)
|
185
|
-
/[[:space:]]/
|
190
|
+
/(?<name> ... \g<name>*)/
|
186
191
|
)
|
187
192
|
end
|
188
193
|
|
@@ -230,13 +235,33 @@ RSpec.describe Regexp, "#examples" do
|
|
230
235
|
)
|
231
236
|
end
|
232
237
|
|
233
|
-
context "comment
|
238
|
+
context "for comment groups" do
|
234
239
|
examples_exist_and_match(
|
235
240
|
/a(?#comment)b/,
|
236
241
|
/a(?#ugly backslashy\ comment\\\))b/
|
237
242
|
)
|
238
243
|
end
|
239
244
|
|
245
|
+
context "for POSIX groups" do
|
246
|
+
examples_exist_and_match(
|
247
|
+
/[[:alnum:]]/,
|
248
|
+
/[[:alpha:]]/,
|
249
|
+
/[[:blank:]]/,
|
250
|
+
/[[:cntrl:]]/,
|
251
|
+
/[[:digit:]]/,
|
252
|
+
/[[:graph:]]/,
|
253
|
+
/[[:lower:]]/,
|
254
|
+
/[[:print:]]/,
|
255
|
+
/[[:punct:]]/,
|
256
|
+
/[[:space:]]/,
|
257
|
+
/[[:upper:]]/,
|
258
|
+
/[[:xdigit:]]/,
|
259
|
+
/[[:word:]]/,
|
260
|
+
/[[:ascii:]]/,
|
261
|
+
/[[:^alnum:]]/ # Negated
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
240
265
|
context "exact examples match" do
|
241
266
|
# More rigorous tests to assert that ALL examples are being listed
|
242
267
|
context "default config options" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- coverage/coverage-badge.png
|
56
56
|
- lib/regexp-examples.rb
|
57
57
|
- lib/regexp-examples/backreferences.rb
|
58
|
+
- lib/regexp-examples/chargroup_parser.rb
|
58
59
|
- lib/regexp-examples/constants.rb
|
59
60
|
- lib/regexp-examples/core_extensions/regexp/examples.rb
|
60
61
|
- lib/regexp-examples/exceptions.rb
|