regexp-examples 0.5.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/lib/regexp-examples/chargroup_parser.rb +70 -0
- data/lib/regexp-examples/constants.rb +33 -12
- data/lib/regexp-examples/groups.rb +2 -58
- data/lib/regexp-examples/parser.rb +7 -3
- data/lib/regexp-examples/version.rb +1 -1
- data/spec/regexp-examples_spec.rb +32 -7
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 062a1310c8b7c861a7724fd75745c1e9bff9257f
|
4
|
+
data.tar.gz: b05ce36dbb3c0afee079091d5c1016a429f1d099
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5519ec6e710a257c165b35f6b4138bc03d37973c31fcb4a7c6091713ab347438dd8e8bbde722c5ab21f63d6ede0565be2b77de0747d42f9f1e83413312b71d14
|
7
|
+
data.tar.gz: 58d67c5e25de2dbd238cb53a192c784047b8ab7e8a836d87c47c9cf5316133d3383a8fdec0a7c9fe07eec0518266b3c66a66c8953e830277b41e1bf9ca53e525
|
data/README.md
CHANGED
@@ -43,6 +43,7 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
43
43
|
* Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
|
44
44
|
* Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
|
45
45
|
* Octal characters, e.g. `/\10/`, `/\177/`
|
46
|
+
* POSIX bracket expressions (including negation), e.g. `/[[:alnum:]]/`, `/[[:^space:]]/`
|
46
47
|
* **Arbitrarily complex combinations of all the above!**
|
47
48
|
|
48
49
|
* Regexp options can also be used:
|
@@ -54,14 +55,13 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
54
55
|
## Bugs and Not-Yet-Supported syntax
|
55
56
|
|
56
57
|
* Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
|
57
|
-
* `/[[abc]]/.examples` (which _should_ return `["a", "b", "c"]`)
|
58
|
+
* `/[[abc]de]/.examples` (which _should_ return `["a", "b", "c", "d", "e"]`)
|
58
59
|
* `/[[a-d]&&[c-f]]/.examples` (which _should_ return: `["c", "d"]`)
|
59
60
|
|
60
61
|
* Conditional capture groups, such as `/(group1) (?(1)yes|no)`
|
61
62
|
|
62
63
|
Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
|
63
64
|
|
64
|
-
* POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
|
65
65
|
* Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
|
66
66
|
* Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)
|
67
67
|
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module RegexpExamples
|
2
|
+
# Given an array of chars from inside a character set,
|
3
|
+
# Interprets all backslashes, ranges and negations
|
4
|
+
# TODO: This needs a bit of a rewrite because:
|
5
|
+
# A) It's ugly
|
6
|
+
# B) It doesn't take into account nested character groups, or set intersection
|
7
|
+
# To achieve this, the algorithm needs to be recursive, like the main Parser.
|
8
|
+
class ChargroupParser
|
9
|
+
def initialize(chars)
|
10
|
+
@chars = chars
|
11
|
+
if @chars[0] == "^"
|
12
|
+
@negative = true
|
13
|
+
@chars = @chars[1..-1]
|
14
|
+
else
|
15
|
+
@negative = false
|
16
|
+
end
|
17
|
+
|
18
|
+
init_backslash_chars
|
19
|
+
init_ranges
|
20
|
+
end
|
21
|
+
|
22
|
+
def result
|
23
|
+
@negative ? (CharSets::Any - @chars) : @chars
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
def init_backslash_chars
|
28
|
+
@chars.each_with_index do |char, i|
|
29
|
+
if char == "\\"
|
30
|
+
if BackslashCharMap.keys.include?(@chars[i+1])
|
31
|
+
@chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
|
32
|
+
elsif @chars[i+1] == 'b'
|
33
|
+
@chars[i..i+1] = "\b"
|
34
|
+
elsif @chars[i+1] == "\\"
|
35
|
+
@chars.delete_at(i+1)
|
36
|
+
else
|
37
|
+
@chars.delete_at(i)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def init_ranges
|
44
|
+
# remove hyphen ("-") from front/back, if present
|
45
|
+
hyphen = nil
|
46
|
+
hyphen = @chars.shift if @chars.first == "-"
|
47
|
+
hyphen ||= @chars.pop if @chars.last == "-"
|
48
|
+
# Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
|
49
|
+
while i = @chars.index("-")
|
50
|
+
# Prevent infinite loops from expanding [",", "-", "."] to itself
|
51
|
+
# (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
|
52
|
+
if (@chars[i-1] == ',' && @chars[i+1] == '.')
|
53
|
+
hyphen = @chars.delete_at(i)
|
54
|
+
else
|
55
|
+
@chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
|
56
|
+
end
|
57
|
+
end
|
58
|
+
# restore hyphen, if stripped out earlier
|
59
|
+
@chars.unshift(hyphen) if hyphen
|
60
|
+
end
|
61
|
+
|
62
|
+
def move_backslash_to_front(chars)
|
63
|
+
if index = chars.index { |char| char == '\\' }
|
64
|
+
chars.unshift chars.delete_at(index)
|
65
|
+
end
|
66
|
+
chars
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
@@ -32,21 +32,25 @@ module RegexpExamples
|
|
32
32
|
end
|
33
33
|
|
34
34
|
module CharSets
|
35
|
-
Lower
|
36
|
-
Upper
|
37
|
-
Digit
|
38
|
-
Punct
|
39
|
-
Hex
|
40
|
-
|
41
|
-
|
42
|
-
|
35
|
+
Lower = Array('a'..'z')
|
36
|
+
Upper = Array('A'..'Z')
|
37
|
+
Digit = Array('0'..'9')
|
38
|
+
Punct = %w(! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { })
|
39
|
+
Hex = Array('a'..'f') | Array('A'..'F') | Digit
|
40
|
+
Word = Lower | Upper | Digit | ['_']
|
41
|
+
Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"]
|
42
|
+
Control = (0..31).map(&:chr) | ["\x7f"]
|
43
|
+
# Ensure that the "common" characters appear first in the array
|
44
|
+
Any = Lower | Upper | Digit | Punct | (0..127).map(&:chr)
|
45
|
+
AnyNoNewLine = Any - ["\n"]
|
46
|
+
end.freeze
|
43
47
|
|
44
48
|
# Map of special regex characters, to their associated character sets
|
45
49
|
BackslashCharMap = {
|
46
50
|
'd' => CharSets::Digit,
|
47
|
-
'D' => CharSets::
|
48
|
-
'w' => CharSets::
|
49
|
-
'W' => CharSets::
|
51
|
+
'D' => CharSets::Any - CharSets::Digit,
|
52
|
+
'w' => CharSets::Word,
|
53
|
+
'W' => CharSets::Any - CharSets::Word,
|
50
54
|
's' => CharSets::Whitespace,
|
51
55
|
'S' => CharSets::Any - CharSets::Whitespace,
|
52
56
|
'h' => CharSets::Hex,
|
@@ -59,6 +63,23 @@ module RegexpExamples
|
|
59
63
|
'a' => ["\a"], # alarm
|
60
64
|
'v' => ["\v"], # vertical tab
|
61
65
|
'e' => ["\e"], # escape
|
62
|
-
}
|
66
|
+
}.freeze
|
67
|
+
|
68
|
+
POSIXCharMap = {
|
69
|
+
'alnum' => CharSets::Upper | CharSets::Lower | CharSets::Digit,
|
70
|
+
'alpha' => CharSets::Upper | CharSets::Lower,
|
71
|
+
'blank' => [" ", "\t"],
|
72
|
+
'cntrl' => CharSets::Control,
|
73
|
+
'digit' => CharSets::Digit,
|
74
|
+
'graph' => (CharSets::Any - CharSets::Control) - [" "], # Visible chars
|
75
|
+
'lower' => CharSets::Lower,
|
76
|
+
'print' => CharSets::Any - CharSets::Control,
|
77
|
+
'punct' => CharSets::Punct,
|
78
|
+
'space' => CharSets::Whitespace,
|
79
|
+
'upper' => CharSets::Upper,
|
80
|
+
'xdigit' => CharSets::Hex,
|
81
|
+
'word' => CharSets::Word,
|
82
|
+
'ascii' => CharSets::Any
|
83
|
+
}.freeze
|
63
84
|
end
|
64
85
|
|
@@ -63,69 +63,14 @@ module RegexpExamples
|
|
63
63
|
def initialize(chars, ignorecase)
|
64
64
|
@chars = chars
|
65
65
|
@ignorecase = ignorecase
|
66
|
-
if chars[0] == "^"
|
67
|
-
@negative = true
|
68
|
-
@chars = @chars[1..-1]
|
69
|
-
else
|
70
|
-
@negative = false
|
71
|
-
end
|
72
|
-
|
73
|
-
init_backslash_chars
|
74
|
-
init_ranges
|
75
|
-
end
|
76
|
-
|
77
|
-
def init_ranges
|
78
|
-
# save first and last "-" if present
|
79
|
-
|
80
|
-
first = nil
|
81
|
-
last = nil
|
82
|
-
first = @chars.shift if @chars.first == "-"
|
83
|
-
last = @chars.pop if @chars.last == "-"
|
84
|
-
# Replace all instances of e.g. ["a", "-", "z"] with ["a", "b", ..., "z"]
|
85
|
-
while i = @chars.index("-")
|
86
|
-
# Prevent infinite loops from expanding [",", "-", "."] to itself
|
87
|
-
# (Since ",".ord = 44, "-".ord = 45, ".".ord = 46)
|
88
|
-
if (@chars[i-1] == ',' && @chars[i+1] == '.')
|
89
|
-
first = '-'
|
90
|
-
@chars.delete_at(i)
|
91
|
-
else
|
92
|
-
@chars[i-1..i+1] = (@chars[i-1]..@chars[i+1]).to_a
|
93
|
-
end
|
94
|
-
end
|
95
|
-
# restore them back
|
96
|
-
@chars.unshift(first) if first
|
97
|
-
@chars.push(last) if last
|
98
|
-
end
|
99
|
-
|
100
|
-
def init_backslash_chars
|
101
|
-
@chars.each_with_index do |char, i|
|
102
|
-
if char == "\\"
|
103
|
-
if BackslashCharMap.keys.include?(@chars[i+1])
|
104
|
-
@chars[i..i+1] = move_backslash_to_front( BackslashCharMap[@chars[i+1]] )
|
105
|
-
elsif @chars[i+1] == 'b'
|
106
|
-
@chars[i..i+1] = "\b"
|
107
|
-
elsif @chars[i+1] == "\\"
|
108
|
-
@chars.delete_at(i+1)
|
109
|
-
else
|
110
|
-
@chars.delete_at(i)
|
111
|
-
end
|
112
|
-
end
|
113
|
-
end
|
114
66
|
end
|
115
67
|
|
116
68
|
def result
|
117
|
-
|
69
|
+
@chars.map do |result|
|
118
70
|
GroupResult.new(result)
|
119
71
|
end
|
120
72
|
end
|
121
73
|
|
122
|
-
private
|
123
|
-
def move_backslash_to_front(chars)
|
124
|
-
if index = chars.index { |char| char == '\\' }
|
125
|
-
chars.unshift chars.delete_at(index)
|
126
|
-
end
|
127
|
-
chars
|
128
|
-
end
|
129
74
|
end
|
130
75
|
|
131
76
|
class DotGroup
|
@@ -135,8 +80,7 @@ module RegexpExamples
|
|
135
80
|
end
|
136
81
|
|
137
82
|
def result
|
138
|
-
chars = CharSets::Any
|
139
|
-
chars = (["\n"] | chars) if multiline
|
83
|
+
chars = multiline ? CharSets::Any : CharSets::AnyNoNewLine
|
140
84
|
chars.map do |result|
|
141
85
|
GroupResult.new(result)
|
142
86
|
end
|
@@ -218,8 +218,11 @@ module RegexpExamples
|
|
218
218
|
end
|
219
219
|
|
220
220
|
def parse_char_group
|
221
|
-
|
222
|
-
|
221
|
+
# TODO: Extract all this logic into ChargroupParser
|
222
|
+
if rest_of_string =~ /\A\[\[:(\^?)([^:]+):\]\]/
|
223
|
+
@current_position += (6 + $1.length + $2.length)
|
224
|
+
chars = $1.empty? ? POSIXCharMap[$2] : CharSets::Any - POSIXCharMap[$2]
|
225
|
+
return CharGroup.new(chars, @ignorecase)
|
223
226
|
end
|
224
227
|
chars = []
|
225
228
|
@current_position += 1
|
@@ -238,7 +241,8 @@ module RegexpExamples
|
|
238
241
|
chars << next_char
|
239
242
|
@current_position += 1
|
240
243
|
end
|
241
|
-
|
244
|
+
parsed_chars = ChargroupParser.new(chars).result
|
245
|
+
CharGroup.new(parsed_chars, @ignorecase)
|
242
246
|
end
|
243
247
|
|
244
248
|
def parse_dot_group
|
@@ -2,10 +2,16 @@ RSpec.describe Regexp, "#examples" do
|
|
2
2
|
def self.examples_exist_and_match(*regexps)
|
3
3
|
regexps.each do |regexp|
|
4
4
|
it do
|
5
|
-
|
6
|
-
|
5
|
+
begin
|
6
|
+
regexp_examples = regexp.examples(max_group_results: 999)
|
7
|
+
rescue
|
8
|
+
# TODO: Find a nicer way to display this?
|
9
|
+
puts "Error generating examples for /#{regexp.source}/"
|
10
|
+
raise $!
|
11
|
+
end
|
12
|
+
expect(regexp_examples).not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
|
7
13
|
regexp_examples.each { |example| expect(example).to match(/\A(?:#{regexp.source})\z/) }
|
8
|
-
# Note: /\A...\z/ is used
|
14
|
+
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
9
15
|
# For example, we don't want things like:
|
10
16
|
# /a*/.examples to include "xyz"
|
11
17
|
# /a|b/.examples to include "bad"
|
@@ -32,7 +38,7 @@ RSpec.describe Regexp, "#examples" do
|
|
32
38
|
def self.examples_are_empty(*regexps)
|
33
39
|
regexps.each do |regexp|
|
34
40
|
it do
|
35
|
-
expect(regexp.examples).to be_empty
|
41
|
+
expect(regexp.examples).to be_empty, "Unexpected examples for regexp: /#{regexp.source}/"
|
36
42
|
end
|
37
43
|
end
|
38
44
|
end
|
@@ -181,8 +187,7 @@ RSpec.describe Regexp, "#examples" do
|
|
181
187
|
/\p{L}/,
|
182
188
|
/\p{Arabic}/,
|
183
189
|
/\p{^Ll}/,
|
184
|
-
/(?<name> ... \g<name>*)
|
185
|
-
/[[:space:]]/
|
190
|
+
/(?<name> ... \g<name>*)/
|
186
191
|
)
|
187
192
|
end
|
188
193
|
|
@@ -230,13 +235,33 @@ RSpec.describe Regexp, "#examples" do
|
|
230
235
|
)
|
231
236
|
end
|
232
237
|
|
233
|
-
context "comment
|
238
|
+
context "for comment groups" do
|
234
239
|
examples_exist_and_match(
|
235
240
|
/a(?#comment)b/,
|
236
241
|
/a(?#ugly backslashy\ comment\\\))b/
|
237
242
|
)
|
238
243
|
end
|
239
244
|
|
245
|
+
context "for POSIX groups" do
|
246
|
+
examples_exist_and_match(
|
247
|
+
/[[:alnum:]]/,
|
248
|
+
/[[:alpha:]]/,
|
249
|
+
/[[:blank:]]/,
|
250
|
+
/[[:cntrl:]]/,
|
251
|
+
/[[:digit:]]/,
|
252
|
+
/[[:graph:]]/,
|
253
|
+
/[[:lower:]]/,
|
254
|
+
/[[:print:]]/,
|
255
|
+
/[[:punct:]]/,
|
256
|
+
/[[:space:]]/,
|
257
|
+
/[[:upper:]]/,
|
258
|
+
/[[:xdigit:]]/,
|
259
|
+
/[[:word:]]/,
|
260
|
+
/[[:ascii:]]/,
|
261
|
+
/[[:^alnum:]]/ # Negated
|
262
|
+
)
|
263
|
+
end
|
264
|
+
|
240
265
|
context "exact examples match" do
|
241
266
|
# More rigorous tests to assert that ALL examples are being listed
|
242
267
|
context "default config options" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
@@ -55,6 +55,7 @@ files:
|
|
55
55
|
- coverage/coverage-badge.png
|
56
56
|
- lib/regexp-examples.rb
|
57
57
|
- lib/regexp-examples/backreferences.rb
|
58
|
+
- lib/regexp-examples/chargroup_parser.rb
|
58
59
|
- lib/regexp-examples/constants.rb
|
59
60
|
- lib/regexp-examples/core_extensions/regexp/examples.rb
|
60
61
|
- lib/regexp-examples/exceptions.rb
|