regexp-examples 1.4.0 → 1.4.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/lib/regexp-examples.rb +2 -1
- data/lib/regexp-examples/char_sets.rb +59 -0
- data/lib/regexp-examples/chargroup_parser.rb +4 -8
- data/lib/regexp-examples/config.rb +64 -0
- data/lib/regexp-examples/max_results_limiter.rb +5 -5
- data/lib/regexp-examples/parser.rb +1 -1
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +3 -3
- data/lib/regexp-examples/repeaters.rb +3 -3
- data/lib/regexp-examples/unicode_char_ranges.rb +2 -0
- data/lib/regexp-examples/version.rb +1 -1
- data/spec/config_spec.rb +15 -17
- data/spec/regexp-examples_spec.rb +27 -23
- data/spec/spec_helper.rb +1 -1
- metadata +4 -3
- data/lib/regexp-examples/constants.rb +0 -121
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c9d287717179b54b2ee8426c8519609cc839ab06
|
4
|
+
data.tar.gz: d9f2f1c463927507aef8a199788ee136bff4d23e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7624a0d9392ca22ee8706a96c2ea7a06fe7476602e05dd121b56609d13f5cea53a6b026b3475bd7178ad2db426d469c5813b6d47fafb5b56cb9afe5ee268fab7
|
7
|
+
data.tar.gz: 236752b70d1d56bb5534c4c6786237194d624991f8763ec5fb14639f232163494b755c881c5f4084582570258df7cc59fb8f310c5a8a845dceb61821f5837694
|
data/.rubocop.yml
CHANGED
data/lib/regexp-examples.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require_relative 'regexp-examples/unicode_char_ranges'
|
2
2
|
require_relative 'regexp-examples/chargroup_parser'
|
3
|
-
require_relative 'regexp-examples/
|
3
|
+
require_relative 'regexp-examples/config'
|
4
|
+
require_relative 'regexp-examples/char_sets'
|
4
5
|
require_relative 'regexp-examples/groups'
|
5
6
|
require_relative 'regexp-examples/backreferences'
|
6
7
|
require_relative 'regexp-examples/max_results_limiter'
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# :nodoc:
|
2
|
+
module RegexpExamples
|
3
|
+
# Definitions of various special characters, used in regular expressions.
|
4
|
+
# For example, `/\h/.examples` will return the value of `Hex` in this module
|
5
|
+
module CharSets
|
6
|
+
Lower = Array('a'..'z')
|
7
|
+
Upper = Array('A'..'Z')
|
8
|
+
Digit = Array('0'..'9')
|
9
|
+
Punct = %w[! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { }] \
|
10
|
+
| (RUBY_VERSION >= '2.4.0' ? %w[$ + < = > ^ ` | ~] : [])
|
11
|
+
Hex = Array('a'..'f') | Array('A'..'F') | Digit
|
12
|
+
Word = Lower | Upper | Digit | ['_']
|
13
|
+
Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"].freeze
|
14
|
+
Control = (0..31).map(&:chr) | ["\x7f"]
|
15
|
+
# Ensure that the "common" characters appear first in the array
|
16
|
+
# Also, ensure "\n" comes first, to make it obvious when included
|
17
|
+
Any = ["\n"] | Lower | Upper | Digit | Punct | (0..127).map(&:chr)
|
18
|
+
AnyNoNewLine = Any - ["\n"]
|
19
|
+
|
20
|
+
# Map of special regex characters, to their associated character sets
|
21
|
+
BackslashCharMap = {
|
22
|
+
'd' => Digit,
|
23
|
+
'D' => Any - Digit,
|
24
|
+
'w' => Word,
|
25
|
+
'W' => Any - Word,
|
26
|
+
's' => Whitespace,
|
27
|
+
'S' => Any - Whitespace,
|
28
|
+
'h' => Hex,
|
29
|
+
'H' => Any - Hex,
|
30
|
+
|
31
|
+
't' => ["\t"], # tab
|
32
|
+
'n' => ["\n"], # new line
|
33
|
+
'r' => ["\r"], # carriage return
|
34
|
+
'f' => ["\f"], # form feed
|
35
|
+
'a' => ["\a"], # alarm
|
36
|
+
'v' => ["\v"], # vertical tab
|
37
|
+
'e' => ["\e"], # escape
|
38
|
+
}.freeze
|
39
|
+
|
40
|
+
POSIXCharMap = {
|
41
|
+
'alnum' => Upper | Lower | Digit,
|
42
|
+
'alpha' => Upper | Lower,
|
43
|
+
'blank' => [' ', "\t"],
|
44
|
+
'cntrl' => Control,
|
45
|
+
'digit' => Digit,
|
46
|
+
'graph' => (Any - Control) - [' '], # Visible chars
|
47
|
+
'lower' => Lower,
|
48
|
+
'print' => Any - Control,
|
49
|
+
'punct' => Punct,
|
50
|
+
'space' => Whitespace,
|
51
|
+
'upper' => Upper,
|
52
|
+
'xdigit' => Hex,
|
53
|
+
'word' => Word,
|
54
|
+
'ascii' => Any
|
55
|
+
}.freeze
|
56
|
+
|
57
|
+
NamedPropertyCharMap = UnicodeCharRanges.instance
|
58
|
+
end.freeze
|
59
|
+
end
|
@@ -67,7 +67,7 @@ module RegexpExamples
|
|
67
67
|
end
|
68
68
|
|
69
69
|
def parse_posix_group(negation_flag, name)
|
70
|
-
@charset.concat negate_if(POSIXCharMap[name], !negation_flag.empty?)
|
70
|
+
@charset.concat negate_if(CharSets::POSIXCharMap[name], !negation_flag.empty?)
|
71
71
|
@current_position += (negation_flag.length + # 0 or 1, if '^' is present
|
72
72
|
name.length +
|
73
73
|
2) # Length of opening and closing colons (always 2)
|
@@ -84,13 +84,10 @@ module RegexpExamples
|
|
84
84
|
end
|
85
85
|
|
86
86
|
def parse_after_backslash
|
87
|
-
|
88
|
-
when 'b'
|
87
|
+
if next_char == 'b'
|
89
88
|
["\b"]
|
90
|
-
when *BackslashCharMap.keys
|
91
|
-
BackslashCharMap[next_char]
|
92
89
|
else
|
93
|
-
[next_char]
|
90
|
+
CharSets::BackslashCharMap.fetch(next_char, [next_char])
|
94
91
|
end
|
95
92
|
end
|
96
93
|
|
@@ -122,12 +119,11 @@ module RegexpExamples
|
|
122
119
|
def parse_after_hyphen
|
123
120
|
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
|
124
121
|
@charset << '-'
|
125
|
-
@current_position += 1
|
126
122
|
else
|
127
123
|
@current_position += 1
|
128
124
|
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
|
129
|
-
@current_position += 1
|
130
125
|
end
|
126
|
+
@current_position += 1
|
131
127
|
end
|
132
128
|
|
133
129
|
def rest_of_string
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# :nodoc:
|
2
|
+
module RegexpExamples
|
3
|
+
# Configuration settings to limit the number/length of Regexp examples generated
|
4
|
+
class Config
|
5
|
+
# The maximum variance for any given repeater, to prevent a huge/infinite number of
|
6
|
+
# examples from being listed. For example, if self.max_repeater_variance = 2 then:
|
7
|
+
# .* is equivalent to .{0,2}
|
8
|
+
# .+ is equivalent to .{1,3}
|
9
|
+
# .{2,} is equivalent to .{2,4}
|
10
|
+
# .{,3} is equivalent to .{0,2}
|
11
|
+
# .{3,8} is equivalent to .{3,5}
|
12
|
+
MAX_REPEATER_VARIANCE_DEFAULT = 2
|
13
|
+
|
14
|
+
# Maximum number of characters returned from a char set, to reduce output spam
|
15
|
+
# For example, if self.max_group_results = 5 then:
|
16
|
+
# \d is equivalent to [01234]
|
17
|
+
# \w is equivalent to [abcde]
|
18
|
+
MAX_GROUP_RESULTS_DEFAULT = 5
|
19
|
+
|
20
|
+
# Maximum number of results to be generated, for Regexp#examples
|
21
|
+
# This is to prevent the system "freezing" when given instructions like:
|
22
|
+
# /[ab]{30}/.examples
|
23
|
+
# (Which would attempt to generate 2**30 == 1073741824 examples!!!)
|
24
|
+
MAX_RESULTS_LIMIT_DEFAULT = 10_000
|
25
|
+
class << self
|
26
|
+
def with_configuration(**new_config)
|
27
|
+
original_config = config.dup
|
28
|
+
|
29
|
+
begin
|
30
|
+
self.config = new_config
|
31
|
+
result = yield
|
32
|
+
ensure
|
33
|
+
self.config = original_config
|
34
|
+
end
|
35
|
+
|
36
|
+
result
|
37
|
+
end
|
38
|
+
|
39
|
+
# Thread-safe getters and setters
|
40
|
+
%i[max_repeater_variance max_group_results max_results_limit].each do |m|
|
41
|
+
define_method(m) do
|
42
|
+
config[m]
|
43
|
+
end
|
44
|
+
define_method("#{m}=") do |value|
|
45
|
+
config[m] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
def config=(**args)
|
52
|
+
Thread.current[:regexp_examples_config].merge!(args)
|
53
|
+
end
|
54
|
+
|
55
|
+
def config
|
56
|
+
Thread.current[:regexp_examples_config] ||= {
|
57
|
+
max_repeater_variance: MAX_REPEATER_VARIANCE_DEFAULT,
|
58
|
+
max_group_results: MAX_GROUP_RESULTS_DEFAULT,
|
59
|
+
max_results_limit: MAX_RESULTS_LIMIT_DEFAULT
|
60
|
+
}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -17,11 +17,11 @@ module RegexpExamples
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def cumulate_total(new_results_count, cumulator_method)
|
20
|
-
if @results_count.zero?
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
20
|
+
@results_count = if @results_count.zero?
|
21
|
+
new_results_count
|
22
|
+
else
|
23
|
+
@results_count.public_send(cumulator_method, new_results_count)
|
24
|
+
end
|
25
25
|
end
|
26
26
|
|
27
27
|
def results_allowed_from(partial_results, limiter_method)
|
@@ -9,7 +9,7 @@ module RegexpExamples
|
|
9
9
|
parse_regular_backreference_group(Regexp.last_match(1))
|
10
10
|
elsif rest_of_string =~ /\Ak['<]([\w-]+)['>]/
|
11
11
|
parse_named_backreference_group(Regexp.last_match(1))
|
12
|
-
elsif BackslashCharMap.keys.include?(next_char)
|
12
|
+
elsif CharSets::BackslashCharMap.keys.include?(next_char)
|
13
13
|
parse_backslash_special_char
|
14
14
|
elsif rest_of_string =~ /\A(c|C-)(.)/
|
15
15
|
parse_backslash_control_char(Regexp.last_match(1), Regexp.last_match(2))
|
@@ -60,7 +60,7 @@ module RegexpExamples
|
|
60
60
|
|
61
61
|
def parse_backslash_special_char
|
62
62
|
CharGroup.new(
|
63
|
-
BackslashCharMap[next_char].dup,
|
63
|
+
CharSets::BackslashCharMap[next_char].dup,
|
64
64
|
@ignorecase
|
65
65
|
)
|
66
66
|
end
|
@@ -97,7 +97,7 @@ module RegexpExamples
|
|
97
97
|
# Beware of double negatives! E.g. /\P{^Space}/
|
98
98
|
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
|
99
99
|
CharGroup.new(
|
100
|
-
negate_if(NamedPropertyCharMap[property_name.downcase], is_negative),
|
100
|
+
negate_if(CharSets::NamedPropertyCharMap[property_name.downcase], is_negative),
|
101
101
|
@ignorecase
|
102
102
|
)
|
103
103
|
end
|
@@ -80,13 +80,13 @@ module RegexpExamples
|
|
80
80
|
def initialize(group, min, has_comma, max)
|
81
81
|
super(group)
|
82
82
|
@min_repeats = min || 0
|
83
|
-
@max_repeats = if
|
84
|
-
@min_repeats
|
85
|
-
else
|
83
|
+
@max_repeats = if has_comma # e.g. a{1,}, a{,3} or a{1,3}
|
86
84
|
[
|
87
85
|
max,
|
88
86
|
@min_repeats + RegexpExamples::Config.max_repeater_variance
|
89
87
|
].compact.min
|
88
|
+
else # e.g. a{1}
|
89
|
+
@min_repeats
|
90
90
|
end
|
91
91
|
end
|
92
92
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'pstore'
|
2
|
+
require 'singleton'
|
2
3
|
|
3
4
|
module RegexpExamples
|
4
5
|
# Interface to the retrieve the character sets that match a regex named property.
|
@@ -6,6 +7,7 @@ module RegexpExamples
|
|
6
7
|
# These matching values are stored, compressed, in a PStore. They are specific to
|
7
8
|
# the ruby minor version.
|
8
9
|
class UnicodeCharRanges
|
10
|
+
include Singleton
|
9
11
|
# These values were generated by: scripts/unicode_lister.rb
|
10
12
|
# Note: Only the first 128 results are listed, for performance.
|
11
13
|
# Also, some groups seem to have no matches (weird!)
|
data/spec/config_spec.rb
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
RSpec.describe RegexpExamples::Config do
|
2
|
-
|
3
2
|
describe 'max_repeater_variance' do
|
4
3
|
context 'as a passed parameter' do
|
5
4
|
it 'with low limit' do
|
6
5
|
expect(/[A-Z]/.examples(max_results_limit: 5))
|
7
|
-
.to match_array %w
|
6
|
+
.to match_array %w[A B C D E]
|
8
7
|
end
|
9
8
|
it 'with (default) high limit' do
|
10
9
|
expect(/[ab]{14}/.examples.length)
|
11
|
-
.to be <=
|
10
|
+
.to be <= 10_000 # NOT 2**14 == 16384, because it's been limited
|
12
11
|
end
|
13
12
|
it 'with (custom) high limit' do
|
14
|
-
expect(/[ab]{14}/.examples(max_results_limit:
|
15
|
-
.to eq
|
13
|
+
expect(/[ab]{14}/.examples(max_results_limit: 20_000).length)
|
14
|
+
.to eq 16_384 # NOT 10000, because it's below the limit
|
16
15
|
end
|
17
16
|
it 'for boolean or groups' do
|
18
17
|
expect(/[ab]{3}|[cd]{3}/.examples(max_results_limit: 10).length)
|
@@ -47,7 +46,7 @@ RSpec.describe RegexpExamples::Config do
|
|
47
46
|
|
48
47
|
it 'sets limit without passing explicitly' do
|
49
48
|
expect(/[A-Z]/.examples)
|
50
|
-
.to match_array %w
|
49
|
+
.to match_array %w[A B C D E]
|
51
50
|
end
|
52
51
|
end
|
53
52
|
end # describe 'max_results_limit'
|
@@ -56,11 +55,11 @@ RSpec.describe RegexpExamples::Config do
|
|
56
55
|
context 'as a passed parameter' do
|
57
56
|
it 'with a larger value' do
|
58
57
|
expect(/a+/.examples(max_repeater_variance: 5))
|
59
|
-
.to match_array %w
|
58
|
+
.to match_array %w[a aa aaa aaaa aaaaa aaaaaa]
|
60
59
|
end
|
61
60
|
it 'with a lower value' do
|
62
61
|
expect(/a{4,8}/.examples(max_repeater_variance: 0))
|
63
|
-
.to eq %w
|
62
|
+
.to eq %w[aaaa]
|
64
63
|
end
|
65
64
|
end
|
66
65
|
|
@@ -75,7 +74,7 @@ RSpec.describe RegexpExamples::Config do
|
|
75
74
|
|
76
75
|
it 'sets limit without passing explicitly' do
|
77
76
|
expect(/a+/.examples)
|
78
|
-
.to match_array %w
|
77
|
+
.to match_array %w[a aa aaa aaaa aaaaa aaaaaa]
|
79
78
|
end
|
80
79
|
end
|
81
80
|
end # describe 'max_repeater_variance'
|
@@ -84,11 +83,11 @@ RSpec.describe RegexpExamples::Config do
|
|
84
83
|
context 'as a passed parameter' do
|
85
84
|
it 'with a larger value' do
|
86
85
|
expect(/\d/.examples(max_group_results: 10))
|
87
|
-
.to match_array %w
|
86
|
+
.to match_array %w[0 1 2 3 4 5 6 7 8 9]
|
88
87
|
end
|
89
88
|
it 'with a lower value' do
|
90
89
|
expect(/\d/.examples(max_group_results: 3))
|
91
|
-
.to match_array %w
|
90
|
+
.to match_array %w[0 1 2]
|
92
91
|
end
|
93
92
|
end
|
94
93
|
|
@@ -103,7 +102,7 @@ RSpec.describe RegexpExamples::Config do
|
|
103
102
|
|
104
103
|
it 'sets limit without passing explicitly' do
|
105
104
|
expect(/\d/.examples)
|
106
|
-
.to match_array %w
|
105
|
+
.to match_array %w[0 1 2 3 4 5 6 7 8 9]
|
107
106
|
end
|
108
107
|
end
|
109
108
|
end # describe 'max_group_results'
|
@@ -112,24 +111,23 @@ RSpec.describe RegexpExamples::Config do
|
|
112
111
|
it 'uses thread-local global config values' do
|
113
112
|
thread = Thread.new do
|
114
113
|
RegexpExamples::Config.max_group_results = 1
|
115
|
-
expect(/\d/.examples).to eq %w
|
114
|
+
expect(/\d/.examples).to eq %w[0]
|
116
115
|
end
|
117
116
|
sleep 0.1 # Give the above thread time to run
|
118
|
-
expect(/\d/.examples).to eq %w
|
117
|
+
expect(/\d/.examples).to eq %w[0 1 2 3 4]
|
119
118
|
thread.join
|
120
119
|
end
|
121
120
|
|
122
121
|
it 'uses thread-local block config values' do
|
123
122
|
thread = Thread.new do
|
124
123
|
RegexpExamples::Config.with_configuration(max_group_results: 1) do
|
125
|
-
expect(/\d/.examples).to eq %w
|
124
|
+
expect(/\d/.examples).to eq %w[0]
|
126
125
|
sleep 0.2 # Give the below thread time to run while this block is open
|
127
126
|
end
|
128
127
|
end
|
129
128
|
sleep 0.1 # Give the above thread time to run
|
130
|
-
expect(/\d/.examples).to eq %w
|
129
|
+
expect(/\d/.examples).to eq %w[0 1 2 3 4]
|
131
130
|
thread.join
|
132
131
|
end
|
133
132
|
end # describe 'thread safety'
|
134
|
-
|
135
133
|
end
|
@@ -100,7 +100,7 @@ RSpec.describe Regexp, '#examples' do
|
|
100
100
|
|
101
101
|
context 'for escaped characters' do
|
102
102
|
all_letters = Array('a'..'z') | Array('A'..'Z')
|
103
|
-
special_letters = %w
|
103
|
+
special_letters = %w[b c g p u x z A B C G M P Z]
|
104
104
|
valid_letters = all_letters - special_letters
|
105
105
|
|
106
106
|
valid_letters.each do |char|
|
@@ -185,7 +185,7 @@ RSpec.describe Regexp, '#examples' do
|
|
185
185
|
)
|
186
186
|
# An exhaustive set of tests for all named properties!!! This is useful
|
187
187
|
# for verifying the PStore contains correct values for all ruby versions
|
188
|
-
%w
|
188
|
+
%w[
|
189
189
|
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
|
190
190
|
Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
|
191
191
|
Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
|
@@ -196,7 +196,7 @@ RSpec.describe Regexp, '#examples' do
|
|
196
196
|
Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
|
197
197
|
Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
|
198
198
|
Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
|
199
|
-
|
199
|
+
].each do |property|
|
200
200
|
it "examples for /\p{#{property}}/" do
|
201
201
|
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
202
202
|
expect(regexp_examples)
|
@@ -209,10 +209,10 @@ RSpec.describe Regexp, '#examples' do
|
|
209
209
|
end
|
210
210
|
|
211
211
|
# The following seem to genuinely have no matching examples (!!??!!?!)
|
212
|
-
%w
|
212
|
+
%w[
|
213
213
|
Cs Carian Cuneiform Cypriot Deseret Gothic Kharoshthi Linear_B Lycian
|
214
214
|
Lydian Old_Italic Old_Persian Osmanya Phoenician Shavian Ugaritic
|
215
|
-
|
215
|
+
].each do |property|
|
216
216
|
examples_are_empty(/\p{#{property}}/)
|
217
217
|
end
|
218
218
|
end
|
@@ -261,6 +261,10 @@ RSpec.describe Regexp, '#examples' do
|
|
261
261
|
)
|
262
262
|
end
|
263
263
|
|
264
|
+
context 'for empty regex' do
|
265
|
+
it { expect(//.examples).to eq [''] }
|
266
|
+
end
|
267
|
+
|
264
268
|
context 'for comment groups' do
|
265
269
|
examples_exist_and_match(
|
266
270
|
/a(?#comment)b/,
|
@@ -292,21 +296,21 @@ RSpec.describe Regexp, '#examples' do
|
|
292
296
|
# More rigorous tests to assert that ALL examples are being listed
|
293
297
|
context 'default config options' do
|
294
298
|
# Simple examples
|
295
|
-
it { expect(/[ab]{2}/.examples).to match_array %w
|
296
|
-
it { expect(/(a|b){2}/.examples).to match_array %w
|
299
|
+
it { expect(/[ab]{2}/.examples).to match_array %w[aa ab ba bb] }
|
300
|
+
it { expect(/(a|b){2}/.examples).to match_array %w[aa ab ba bb] }
|
297
301
|
it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }
|
298
302
|
|
299
303
|
# Only display unique examples:
|
300
|
-
it { expect(/a|a|b|b/.examples).to match_array %w
|
301
|
-
it { expect(/[ccdd]/.examples).to match_array %w
|
304
|
+
it { expect(/a|a|b|b/.examples).to match_array %w[a b] }
|
305
|
+
it { expect(/[ccdd]/.examples).to match_array %w[c d] }
|
302
306
|
|
303
307
|
# a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
|
304
308
|
it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
|
305
309
|
end
|
306
310
|
|
307
311
|
context 'end of string' do
|
308
|
-
it { expect(/test\z/.examples).to match_array %w
|
309
|
-
it { expect(/test\Z/.examples).to match_array [
|
312
|
+
it { expect(/test\z/.examples).to match_array %w[test] }
|
313
|
+
it { expect(/test\Z/.examples).to match_array %W[test test\n] }
|
310
314
|
end
|
311
315
|
|
312
316
|
context 'backreferences and escaped octal combined' do
|
@@ -317,12 +321,12 @@ RSpec.describe Regexp, '#examples' do
|
|
317
321
|
end
|
318
322
|
|
319
323
|
context 'case insensitive' do
|
320
|
-
it { expect(/ab/i.examples).to match_array %w
|
324
|
+
it { expect(/ab/i.examples).to match_array %w[ab aB Ab AB] }
|
321
325
|
it do
|
322
326
|
expect(/a+/i.examples)
|
323
|
-
.to match_array %w
|
327
|
+
.to match_array %w[a A aa aA Aa AA aaa aaA aAa aAA Aaa AaA AAa AAA]
|
324
328
|
end
|
325
|
-
it { expect(/([ab])\1/i.examples).to match_array %w
|
329
|
+
it { expect(/([ab])\1/i.examples).to match_array %w[aa bb AA BB] }
|
326
330
|
end
|
327
331
|
|
328
332
|
context 'multiline' do
|
@@ -331,30 +335,30 @@ RSpec.describe Regexp, '#examples' do
|
|
331
335
|
end
|
332
336
|
|
333
337
|
context 'exteded form' do
|
334
|
-
it { expect(/a b c/x.examples).to eq %w
|
335
|
-
it { expect(/a#comment/x.examples).to eq %w
|
338
|
+
it { expect(/a b c/x.examples).to eq %w[abc] }
|
339
|
+
it { expect(/a#comment/x.examples).to eq %w[a] }
|
336
340
|
it do
|
337
341
|
expect(
|
338
342
|
/
|
339
343
|
line1 #comment
|
340
344
|
line2 #comment
|
341
345
|
/x.examples
|
342
|
-
).to eq %w
|
346
|
+
).to eq %w[line1line2]
|
343
347
|
end
|
344
348
|
end
|
345
349
|
|
346
350
|
context 'options toggling' do
|
347
351
|
context 'rest of string' do
|
348
|
-
it { expect(/a(?i)b(?-i)c/.examples).to match_array %w
|
349
|
-
it { expect(/a(?x) b(?-x) c/.examples).to eq %w
|
352
|
+
it { expect(/a(?i)b(?-i)c/.examples).to match_array %w[abc aBc] }
|
353
|
+
it { expect(/a(?x) b(?-x) c/.examples).to eq %w[ab\ c] }
|
350
354
|
it { expect(/(?m)./.examples(max_group_results: 999)).to include "\n" }
|
351
355
|
# Toggle "groups" should not increase backref group count:
|
352
|
-
it { expect(/(?i)(a)-\1/.examples).to match_array %w
|
356
|
+
it { expect(/(?i)(a)-\1/.examples).to match_array %w[a-a A-A] }
|
353
357
|
end
|
354
358
|
context 'subexpression' do
|
355
|
-
it { expect(/a(?i:b)c/.examples).to match_array %w
|
356
|
-
it { expect(/a(?i:b(?-i:c))/.examples).to match_array %w
|
357
|
-
it { expect(/a(?-i:b)c/i.examples).to match_array %w
|
359
|
+
it { expect(/a(?i:b)c/.examples).to match_array %w[abc aBc] }
|
360
|
+
it { expect(/a(?i:b(?-i:c))/.examples).to match_array %w[abc aBc] }
|
361
|
+
it { expect(/a(?-i:b)c/i.examples).to match_array %w[abc abC Abc AbC] }
|
358
362
|
end
|
359
363
|
end
|
360
364
|
end # context 'exact examples match'
|
data/spec/spec_helper.rb
CHANGED
@@ -8,7 +8,7 @@ require 'pry'
|
|
8
8
|
# Several of these tests (intentionally) use "weird" regex patterns,
|
9
9
|
# that spam annoying warnings when running.
|
10
10
|
# E.g. warning: invalid back reference: /\k/
|
11
|
-
# and warning: character class has ']' without escape: /[]]/
|
11
|
+
# and warning: character class has ']' without escape: /[]]/
|
12
12
|
# This config disables those warnings.
|
13
13
|
$VERBOSE = nil
|
14
14
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-11-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -60,8 +60,9 @@ files:
|
|
60
60
|
- lib/core_extensions/regexp/examples.rb
|
61
61
|
- lib/regexp-examples.rb
|
62
62
|
- lib/regexp-examples/backreferences.rb
|
63
|
+
- lib/regexp-examples/char_sets.rb
|
63
64
|
- lib/regexp-examples/chargroup_parser.rb
|
64
|
-
- lib/regexp-examples/
|
65
|
+
- lib/regexp-examples/config.rb
|
65
66
|
- lib/regexp-examples/groups.rb
|
66
67
|
- lib/regexp-examples/helpers.rb
|
67
68
|
- lib/regexp-examples/max_results_limiter.rb
|
@@ -1,121 +0,0 @@
|
|
1
|
-
# :nodoc:
|
2
|
-
module RegexpExamples
|
3
|
-
# Configuration settings to limit the number/length of Regexp examples generated
|
4
|
-
class Config
|
5
|
-
class << self
|
6
|
-
def with_configuration(**new_config)
|
7
|
-
original_config = config.dup
|
8
|
-
|
9
|
-
begin
|
10
|
-
self.config = new_config
|
11
|
-
result = yield
|
12
|
-
ensure
|
13
|
-
self.config = original_config
|
14
|
-
end
|
15
|
-
|
16
|
-
result
|
17
|
-
end
|
18
|
-
|
19
|
-
# Thread-safe getters and setters
|
20
|
-
%i[max_repeater_variance max_group_results max_results_limit].each do |m|
|
21
|
-
define_method(m) do
|
22
|
-
config[m]
|
23
|
-
end
|
24
|
-
define_method("#{m}=") do |value|
|
25
|
-
config[m] = value
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
private
|
30
|
-
|
31
|
-
def config=(**args)
|
32
|
-
Thread.current[:regexp_examples_config].merge!(args)
|
33
|
-
end
|
34
|
-
|
35
|
-
def config
|
36
|
-
Thread.current[:regexp_examples_config] ||= {
|
37
|
-
max_repeater_variance: MAX_REPEATER_VARIANCE_DEFAULT,
|
38
|
-
max_group_results: MAX_GROUP_RESULTS_DEFAULT,
|
39
|
-
max_results_limit: MAX_RESULTS_LIMIT_DEFAULT
|
40
|
-
}
|
41
|
-
end
|
42
|
-
end
|
43
|
-
# The maximum variance for any given repeater, to prevent a huge/infinite number of
|
44
|
-
# examples from being listed. For example, if self.max_repeater_variance = 2 then:
|
45
|
-
# .* is equivalent to .{0,2}
|
46
|
-
# .+ is equivalent to .{1,3}
|
47
|
-
# .{2,} is equivalent to .{2,4}
|
48
|
-
# .{,3} is equivalent to .{0,2}
|
49
|
-
# .{3,8} is equivalent to .{3,5}
|
50
|
-
MAX_REPEATER_VARIANCE_DEFAULT = 2
|
51
|
-
|
52
|
-
# Maximum number of characters returned from a char set, to reduce output spam
|
53
|
-
# For example, if self.max_group_results = 5 then:
|
54
|
-
# \d is equivalent to [01234]
|
55
|
-
# \w is equivalent to [abcde]
|
56
|
-
MAX_GROUP_RESULTS_DEFAULT = 5
|
57
|
-
|
58
|
-
# Maximum number of results to be generated, for Regexp#examples
|
59
|
-
# This is to prevent the system "freezing" when given instructions like:
|
60
|
-
# /[ab]{30}/.examples
|
61
|
-
# (Which would attempt to generate 2**30 == 1073741824 examples!!!)
|
62
|
-
MAX_RESULTS_LIMIT_DEFAULT = 10_000
|
63
|
-
end
|
64
|
-
|
65
|
-
# Definitions of various special characters, used in regular expressions.
|
66
|
-
# For example, `/\h/.examples` will return the value of `Hex` in this module
|
67
|
-
module CharSets
|
68
|
-
Lower = Array('a'..'z')
|
69
|
-
Upper = Array('A'..'Z')
|
70
|
-
Digit = Array('0'..'9')
|
71
|
-
Punct = %w[! " # % & ' ( ) * , - . / : ; ? @ [ \\ ] _ { }] \
|
72
|
-
| (RUBY_VERSION >= '2.4.0' ? %w[$ + < = > ^ ` | ~] : [])
|
73
|
-
Hex = Array('a'..'f') | Array('A'..'F') | Digit
|
74
|
-
Word = Lower | Upper | Digit | ['_']
|
75
|
-
Whitespace = [' ', "\t", "\n", "\r", "\v", "\f"].freeze
|
76
|
-
Control = (0..31).map(&:chr) | ["\x7f"]
|
77
|
-
# Ensure that the "common" characters appear first in the array
|
78
|
-
# Also, ensure "\n" comes first, to make it obvious when included
|
79
|
-
Any = ["\n"] | Lower | Upper | Digit | Punct | (0..127).map(&:chr)
|
80
|
-
AnyNoNewLine = Any - ["\n"]
|
81
|
-
end.freeze
|
82
|
-
|
83
|
-
# Map of special regex characters, to their associated character sets
|
84
|
-
BackslashCharMap = {
|
85
|
-
'd' => CharSets::Digit,
|
86
|
-
'D' => CharSets::Any - CharSets::Digit,
|
87
|
-
'w' => CharSets::Word,
|
88
|
-
'W' => CharSets::Any - CharSets::Word,
|
89
|
-
's' => CharSets::Whitespace,
|
90
|
-
'S' => CharSets::Any - CharSets::Whitespace,
|
91
|
-
'h' => CharSets::Hex,
|
92
|
-
'H' => CharSets::Any - CharSets::Hex,
|
93
|
-
|
94
|
-
't' => ["\t"], # tab
|
95
|
-
'n' => ["\n"], # new line
|
96
|
-
'r' => ["\r"], # carriage return
|
97
|
-
'f' => ["\f"], # form feed
|
98
|
-
'a' => ["\a"], # alarm
|
99
|
-
'v' => ["\v"], # vertical tab
|
100
|
-
'e' => ["\e"], # escape
|
101
|
-
}.freeze
|
102
|
-
|
103
|
-
POSIXCharMap = {
|
104
|
-
'alnum' => CharSets::Upper | CharSets::Lower | CharSets::Digit,
|
105
|
-
'alpha' => CharSets::Upper | CharSets::Lower,
|
106
|
-
'blank' => [' ', "\t"],
|
107
|
-
'cntrl' => CharSets::Control,
|
108
|
-
'digit' => CharSets::Digit,
|
109
|
-
'graph' => (CharSets::Any - CharSets::Control) - [' '], # Visible chars
|
110
|
-
'lower' => CharSets::Lower,
|
111
|
-
'print' => CharSets::Any - CharSets::Control,
|
112
|
-
'punct' => CharSets::Punct,
|
113
|
-
'space' => CharSets::Whitespace,
|
114
|
-
'upper' => CharSets::Upper,
|
115
|
-
'xdigit' => CharSets::Hex,
|
116
|
-
'word' => CharSets::Word,
|
117
|
-
'ascii' => CharSets::Any
|
118
|
-
}.freeze
|
119
|
-
|
120
|
-
NamedPropertyCharMap = UnicodeCharRanges.new
|
121
|
-
end
|