regexp-examples 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.travis.yml +6 -2
- data/README.md +2 -1
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/lib/regexp-examples/chargroup_parser.rb +50 -26
- data/lib/regexp-examples/groups.rb +51 -11
- data/lib/regexp-examples/parser.rb +16 -17
- data/lib/regexp-examples/unicode_char_ranges.rb +1 -1
- data/lib/regexp-examples/version.rb +1 -1
- data/scripts/unicode_lister.rb +4 -4
- data/spec/regexp-examples_spec.rb +37 -19
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aa40a179d06f78f6bb791ebda870b8f82b027d4e
|
4
|
+
data.tar.gz: 2abbd95ec396bb61b69d10035e9fd61e0d851fff
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 368a793ed4f68e60449525e4d1ce539da4513f83e4a8b301dfcf909643b9b6ca865752962d2f8d344485820ca2621a7ea2e30ee6cde7baf4930d220dcf4d4b87
|
7
|
+
data.tar.gz: 79475ed89f948dec9731457af73fec7fa6b5f87f50f5c75f3fd9ba7f2c5508a7876d4778804f0c4b64a68c6d5bfda2a75036bda73cf394656304cbddc48d041b
|
data/.rubocop.yml
ADDED
data/.travis.yml
CHANGED
@@ -1,9 +1,13 @@
|
|
1
1
|
language: ruby
|
2
2
|
rvm:
|
3
3
|
- 2.0.0-p598
|
4
|
-
- 2.1.
|
4
|
+
- 2.1.6
|
5
5
|
- 2.2.0
|
6
|
+
- 2.2.2
|
7
|
+
- ruby-head
|
8
|
+
matrix:
|
9
|
+
allow_failures:
|
6
10
|
# One (ruby 2.3-dev) test fails, due to a change of behaviour in Array#delete_if,
|
7
11
|
# but I don't know if this is intentional. I'll fix it once the behaviour change is documented.
|
8
12
|
# For now, I don't really care if 2.3-dev tests all pass.
|
9
|
-
|
13
|
+
- rvm: ruby-head
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
[![Gem Version](https://badge.fury.io/rb/regexp-examples.svg)](http://badge.fury.io/rb/regexp-examples)
|
3
3
|
[![Build Status](https://travis-ci.org/tom-lord/regexp-examples.svg?branch=master)](https://travis-ci.org/tom-lord/regexp-examples/builds)
|
4
4
|
[![Coverage Status](https://coveralls.io/repos/tom-lord/regexp-examples/badge.svg?branch=master)](https://coveralls.io/r/tom-lord/regexp-examples?branch=master)
|
5
|
+
[![Code Climate](https://codeclimate.com/github/tom-lord/regexp-examples/badges/gpa.svg)](https://codeclimate.com/github/tom-lord/regexp-examples)
|
5
6
|
|
6
7
|
Extends the Regexp class with the methods: `Regexp#examples` and `Regexp#random_example`
|
7
8
|
|
@@ -111,7 +112,7 @@ Some of the most obscure regexp features are not even mentioned in the ruby docs
|
|
111
112
|
## Impossible features ("illegal syntax")
|
112
113
|
|
113
114
|
The following features in the regex language can never be properly implemented into this gem because, put simply, they are not technically "regular"!
|
114
|
-
If you'd like to understand this in more detail, check out what I had to say in [my blog post](http://tom-lord.weebly.com/blog/reverse-engineering-regular-expressions) about this gem
|
115
|
+
If you'd like to understand this in more detail, check out what I had to say in [my blog post](http://tom-lord.weebly.com/blog/reverse-engineering-regular-expressions) about this gem.
|
115
116
|
|
116
117
|
Using any of the following will raise a RegexpExamples::IllegalSyntax exception:
|
117
118
|
|
Binary file
|
@@ -25,29 +25,11 @@ module RegexpExamples
|
|
25
25
|
until next_char == ']'
|
26
26
|
case next_char
|
27
27
|
when '['
|
28
|
-
|
29
|
-
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
30
|
-
@charset.concat sub_group_parser.result
|
31
|
-
@current_position += sub_group_parser.length
|
28
|
+
parse_sub_group_concat
|
32
29
|
when '-'
|
33
|
-
|
34
|
-
@charset << '-'
|
35
|
-
@current_position += 1
|
36
|
-
else
|
37
|
-
@current_position += 1
|
38
|
-
@charset.concat (@charset.last..parse_checking_backlash.first).to_a
|
39
|
-
@current_position += 1
|
40
|
-
end
|
30
|
+
parse_after_hyphen
|
41
31
|
when '&'
|
42
|
-
|
43
|
-
@current_position += 2
|
44
|
-
sub_group_parser = self.class.new(rest_of_string, is_sub_group: @is_sub_group)
|
45
|
-
@charset &= sub_group_parser.result
|
46
|
-
@current_position += (sub_group_parser.length - 1)
|
47
|
-
else
|
48
|
-
@charset << '&'
|
49
|
-
@current_position += 1
|
50
|
-
end
|
32
|
+
parse_after_ampersand
|
51
33
|
else
|
52
34
|
@charset.concat parse_checking_backlash
|
53
35
|
@current_position += 1
|
@@ -79,14 +61,22 @@ module RegexpExamples
|
|
79
61
|
@charset << next_char
|
80
62
|
@current_position += 1
|
81
63
|
when /\A:(\^?)([^:]+):\]/ # e.g. [[:alpha:]] - POSIX group
|
82
|
-
if @is_sub_group
|
83
|
-
chars = Regexp.last_match(1).empty? ? POSIXCharMap[Regexp.last_match(2)] : (CharSets::Any - POSIXCharMap[Regexp.last_match(2)])
|
84
|
-
@charset.concat chars
|
85
|
-
@current_position += (Regexp.last_match(1).length + Regexp.last_match(2).length + 2)
|
86
|
-
end
|
64
|
+
parse_posix_group(Regexp.last_match(1), Regexp.last_match(2)) if @is_sub_group
|
87
65
|
end
|
88
66
|
end
|
89
67
|
|
68
|
+
def parse_posix_group(negation_flag, name)
|
69
|
+
chars = if negation_flag.empty?
|
70
|
+
POSIXCharMap[name]
|
71
|
+
else
|
72
|
+
CharSets::Any - POSIXCharMap[name]
|
73
|
+
end
|
74
|
+
@charset.concat chars
|
75
|
+
@current_position += (negation_flag.length + # 0 or 1, if '^' is present
|
76
|
+
name.length +
|
77
|
+
2) # Length of opening and closing colons (always 2)
|
78
|
+
end
|
79
|
+
|
90
80
|
# Always returns an Array, for consistency
|
91
81
|
def parse_checking_backlash
|
92
82
|
if next_char == '\\'
|
@@ -108,6 +98,40 @@ module RegexpExamples
|
|
108
98
|
end
|
109
99
|
end
|
110
100
|
|
101
|
+
def parse_sub_group_concat
|
102
|
+
@current_position += 1
|
103
|
+
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
104
|
+
@charset.concat sub_group_parser.result
|
105
|
+
@current_position += sub_group_parser.length
|
106
|
+
end
|
107
|
+
|
108
|
+
def parse_after_ampersand
|
109
|
+
if regexp_string[@current_position + 1] == '&'
|
110
|
+
parse_sub_group_intersect
|
111
|
+
else
|
112
|
+
@charset << '&'
|
113
|
+
@current_position += 1
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def parse_sub_group_intersect
|
118
|
+
@current_position += 2
|
119
|
+
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
120
|
+
@charset &= sub_group_parser.result
|
121
|
+
@current_position += (sub_group_parser.length - 1)
|
122
|
+
end
|
123
|
+
|
124
|
+
def parse_after_hyphen
|
125
|
+
if regexp_string[@current_position + 1] == ']' # e.g. /[abc-]/ -- not a range!
|
126
|
+
@charset << '-'
|
127
|
+
@current_position += 1
|
128
|
+
else
|
129
|
+
@current_position += 1
|
130
|
+
@charset.concat (@charset.last..parse_checking_backlash.first).to_a
|
131
|
+
@current_position += 1
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
111
135
|
def rest_of_string
|
112
136
|
regexp_string[@current_position..-1]
|
113
137
|
end
|
@@ -21,12 +21,18 @@ module RegexpExamples
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
+
# A helper method for mixing in to Group classes...
|
25
|
+
# Needed because sometimes (for performace) group results are lazy enumerators;
|
26
|
+
# Meanwhile other times (again, for performance!) group results are just arrays
|
24
27
|
module ForceLazyEnumerators
|
25
28
|
def force_if_lazy(arr_or_enum)
|
26
29
|
arr_or_enum.respond_to?(:force) ? arr_or_enum.force : arr_or_enum
|
27
30
|
end
|
28
31
|
end
|
29
32
|
|
33
|
+
# A helper method for mixing in to Group classes...
|
34
|
+
# Needed for generating a complete results set when the ignorecase
|
35
|
+
# regexp option has been set
|
30
36
|
module GroupWithIgnoreCase
|
31
37
|
include ForceLazyEnumerators
|
32
38
|
attr_reader :ignorecase
|
@@ -43,6 +49,9 @@ module RegexpExamples
|
|
43
49
|
end
|
44
50
|
end
|
45
51
|
|
52
|
+
# A helper method for mixing in to Group classes...
|
53
|
+
# Uses Array#sample to randomly choose one result from all
|
54
|
+
# possible examples
|
46
55
|
module RandomResultBySample
|
47
56
|
include ForceLazyEnumerators
|
48
57
|
def random_result
|
@@ -50,6 +59,8 @@ module RegexpExamples
|
|
50
59
|
end
|
51
60
|
end
|
52
61
|
|
62
|
+
# The most "basic" possible group.
|
63
|
+
# For example, /x/ contains one SingleCharGroup
|
53
64
|
class SingleCharGroup
|
54
65
|
include RandomResultBySample
|
55
66
|
prepend GroupWithIgnoreCase
|
@@ -74,6 +85,11 @@ module RegexpExamples
|
|
74
85
|
end
|
75
86
|
end
|
76
87
|
|
88
|
+
# The most generic type of group, which contains 0 or more characters.
|
89
|
+
# Technically, this is the ONLY type of group that is truly necessary
|
90
|
+
# However, having others both improves performance through various optimisations,
|
91
|
+
# and clarifies the code's intention.
|
92
|
+
# The most common example of CharGroups is: /[abc]/
|
77
93
|
class CharGroup
|
78
94
|
include RandomResultBySample
|
79
95
|
prepend GroupWithIgnoreCase
|
@@ -89,6 +105,8 @@ module RegexpExamples
|
|
89
105
|
end
|
90
106
|
end
|
91
107
|
|
108
|
+
# A special case of CharGroup, for the pattern /./
|
109
|
+
# (For example, we never need to care about ignorecase here!)
|
92
110
|
class DotGroup
|
93
111
|
include RandomResultBySample
|
94
112
|
attr_reader :multiline
|
@@ -104,6 +122,9 @@ module RegexpExamples
|
|
104
122
|
end
|
105
123
|
end
|
106
124
|
|
125
|
+
# A collection of other groups. Basically any regex that contains
|
126
|
+
# brackets will be parsed using one of these. The simplest example is:
|
127
|
+
# /(a)/ - Which is a MultiGroup, containing one SingleCharGroup
|
107
128
|
class MultiGroup
|
108
129
|
attr_reader :group_id
|
109
130
|
def initialize(groups, group_id)
|
@@ -131,10 +152,18 @@ module RegexpExamples
|
|
131
152
|
end
|
132
153
|
end
|
133
154
|
|
155
|
+
# A boolean "or" group.
|
156
|
+
# The implementation is to pass in 2 set of (repeaters of) groups.
|
157
|
+
# The simplest example is: /a|b/
|
158
|
+
# If you have more than one boolean "or" operator, then this is initially
|
159
|
+
# parsed as an OrGroup containing another OrGroup. However, in order to avoid
|
160
|
+
# probability distribution issues in Regexp#random_example, this then gets
|
161
|
+
# simplified down to one OrGroup containing 3+ repeaters.
|
134
162
|
class OrGroup
|
163
|
+
attr_reader :repeaters_list
|
164
|
+
|
135
165
|
def initialize(left_repeaters, right_repeaters)
|
136
|
-
@
|
137
|
-
@right_repeaters = right_repeaters
|
166
|
+
@repeaters_list = [left_repeaters, *merge_if_orgroup(right_repeaters)]
|
138
167
|
end
|
139
168
|
|
140
169
|
def result
|
@@ -142,25 +171,36 @@ module RegexpExamples
|
|
142
171
|
end
|
143
172
|
|
144
173
|
def random_result
|
145
|
-
# TODO: This logic is flawed in terms of choosing a truly "random" example!
|
146
|
-
# E.g. /a|b|c|d/.random_example will choose a letter with the following probabilities:
|
147
|
-
# a = 50%, b = 25%, c = 12.5%, d = 12.5%
|
148
|
-
# In order to fix this, I must either apply some weighted selection logic,
|
149
|
-
# or change how the OrGroup examples are generated - i.e. make this class work with >2 repeaters
|
150
174
|
result_by_method(:map_random_result).sample(1)
|
151
175
|
end
|
152
176
|
|
153
177
|
private
|
154
178
|
|
155
179
|
def result_by_method(method)
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
180
|
+
repeaters_list.map do |repeaters|
|
181
|
+
RegexpExamples.public_send(method, repeaters)
|
182
|
+
end
|
183
|
+
.inject(:concat)
|
184
|
+
.map do |result|
|
185
|
+
GroupResult.new(result)
|
186
|
+
end
|
187
|
+
.uniq
|
188
|
+
end
|
189
|
+
|
190
|
+
def merge_if_orgroup(repeaters)
|
191
|
+
if repeaters.size == 1 && repeaters.first.is_a?(OrGroup)
|
192
|
+
repeaters.first.repeaters_list
|
193
|
+
else
|
194
|
+
[repeaters]
|
160
195
|
end
|
161
196
|
end
|
162
197
|
end
|
163
198
|
|
199
|
+
# This is a bit magic...
|
200
|
+
# We substitute backreferences with PLACEHOLDERS. These are then, later,
|
201
|
+
# replaced by the appropriate value. (See BackReferenceReplacer)
|
202
|
+
# The simplest example is /(a) \1/ - So, we temporarily treat the "result"
|
203
|
+
# of /\1/ as being "__1__". It later gets updated.
|
164
204
|
class BackReferenceGroup
|
165
205
|
include RandomResultBySample
|
166
206
|
attr_reader :id
|
@@ -15,7 +15,7 @@ module RegexpExamples
|
|
15
15
|
repeaters = []
|
16
16
|
until end_of_regexp
|
17
17
|
group = parse_group(repeaters)
|
18
|
-
return [
|
18
|
+
return [group] if group.is_a? OrGroup
|
19
19
|
@current_position += 1
|
20
20
|
repeaters << parse_repeater(group)
|
21
21
|
end
|
@@ -120,7 +120,7 @@ module RegexpExamples
|
|
120
120
|
group = parse_single_char_group(parse_control_character(Regexp.last_match(2)))
|
121
121
|
when rest_of_string =~ /\Ax(\h{1,2})/ # Escape sequence
|
122
122
|
@current_position += Regexp.last_match(1).length
|
123
|
-
group = parse_single_char_group(
|
123
|
+
group = parse_single_char_group(parse_unicode_sequence(Regexp.last_match(1)))
|
124
124
|
when rest_of_string =~ /\Au(\h{4}|\{\h{1,4}\})/ # Unicode sequence
|
125
125
|
@current_position += Regexp.last_match(1).length
|
126
126
|
sequence = Regexp.last_match(1).match(/\h{1,4}/)[0] # Strip off "{" and "}"
|
@@ -148,7 +148,7 @@ module RegexpExamples
|
|
148
148
|
) # Using "\r\n" as one character is little bit hacky...
|
149
149
|
when next_char == 'g' # Subexpression call
|
150
150
|
fail IllegalSyntaxError,
|
151
|
-
|
151
|
+
'Subexpression calls (\\g) cannot be supported, as they are not regular'
|
152
152
|
when next_char =~ /[bB]/ # Anchors
|
153
153
|
raise_anchors_exception!
|
154
154
|
when next_char =~ /[AG]/ # Start of string
|
@@ -159,6 +159,7 @@ module RegexpExamples
|
|
159
159
|
end
|
160
160
|
when next_char =~ /[zZ]/ # End of string
|
161
161
|
if @current_position == (regexp_string.length - 1)
|
162
|
+
# TODO: /\Z/ should be treated as /\n?/
|
162
163
|
group = PlaceHolderGroup.new
|
163
164
|
else
|
164
165
|
raise_anchors_exception!
|
@@ -212,10 +213,10 @@ module RegexpExamples
|
|
212
213
|
end
|
213
214
|
when %w(! =).include?(match[2]) # e.g. /(?=lookahead)/, /(?!neglookahead)/
|
214
215
|
fail IllegalSyntaxError,
|
215
|
-
|
216
|
+
'Lookaheads are not regular; cannot generate examples'
|
216
217
|
when %w(! =).include?(match[3]) # e.g. /(?<=lookbehind)/, /(?<!neglookbehind)/
|
217
218
|
fail IllegalSyntaxError,
|
218
|
-
|
219
|
+
'Lookbehinds are not regular; cannot generate examples'
|
219
220
|
else # e.g. /(?<name>namedgroup)/
|
220
221
|
@current_position += (match[3].length + 3)
|
221
222
|
group_id = match[3]
|
@@ -237,12 +238,14 @@ module RegexpExamples
|
|
237
238
|
end
|
238
239
|
|
239
240
|
def regexp_options_toggle(on, off)
|
240
|
-
@ignorecase
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
241
|
+
regexp_option_toggle(on, off, '@ignorecase', 'i')
|
242
|
+
regexp_option_toggle(on, off, '@multiline', 'm')
|
243
|
+
regexp_option_toggle(on, off, '@extended', 'x')
|
244
|
+
end
|
245
|
+
|
246
|
+
def regexp_option_toggle(on, off, var, char)
|
247
|
+
instance_variable_set(var, true) if on.include? char
|
248
|
+
instance_variable_set(var, false) if off.include? char
|
246
249
|
end
|
247
250
|
|
248
251
|
def parse_char_group
|
@@ -276,12 +279,8 @@ module RegexpExamples
|
|
276
279
|
# eval "?\\C-#{char.chr}" # Doesn't work for e.g. char = "?"
|
277
280
|
end
|
278
281
|
|
279
|
-
def parse_escape_sequence(match)
|
280
|
-
eval "?\\x#{match}"
|
281
|
-
end
|
282
|
-
|
283
282
|
def parse_unicode_sequence(match)
|
284
|
-
|
283
|
+
[match.to_i(16)].pack('U')
|
285
284
|
end
|
286
285
|
|
287
286
|
def parse_star_repeater(group)
|
@@ -331,7 +330,7 @@ module RegexpExamples
|
|
331
330
|
|
332
331
|
def raise_anchors_exception!
|
333
332
|
fail IllegalSyntaxError,
|
334
|
-
|
333
|
+
"Anchors ('#{next_char}') cannot be supported, as they are not regular"
|
335
334
|
end
|
336
335
|
|
337
336
|
def parse_one_time_repeater(group)
|
data/scripts/unicode_lister.rb
CHANGED
@@ -45,7 +45,7 @@ def calculate_ranges(matching_codes)
|
|
45
45
|
end
|
46
46
|
|
47
47
|
count = 0
|
48
|
-
filename = RegexpExamples::UnicodeCharRanges::STORE_FILENAME
|
48
|
+
filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
|
49
49
|
store = PStore.new(filename)
|
50
50
|
store.transaction do
|
51
51
|
NamedGroups.each do |name|
|
@@ -54,11 +54,11 @@ store.transaction do
|
|
54
54
|
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
55
55
|
# painfully slow and bloated... Especially the test suite.)
|
56
56
|
matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
|
57
|
-
.
|
58
|
-
|
57
|
+
.select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
58
|
+
.first(128)
|
59
59
|
store[name.downcase] = calculate_ranges(matching_codes)
|
60
60
|
puts "(#{count}/#{NamedGroups.length}) Finished property: #{name}"
|
61
61
|
end
|
62
62
|
puts '*' * 50
|
63
|
-
puts "Finished! Result stored in:
|
63
|
+
puts "Finished! Result stored in: #{filename}"
|
64
64
|
end
|
@@ -4,8 +4,12 @@ RSpec.describe Regexp, '#examples' do
|
|
4
4
|
it "examples for /#{regexp.source}/" do
|
5
5
|
regexp_examples = regexp.examples(max_group_results: 99_999)
|
6
6
|
|
7
|
-
expect(regexp_examples)
|
8
|
-
|
7
|
+
expect(regexp_examples)
|
8
|
+
.not_to be_empty,
|
9
|
+
"No examples were generated for regexp: /#{regexp.source}/"
|
10
|
+
regexp_examples.each do |example|
|
11
|
+
expect(example).to match(/\A(?:#{regexp.source})\z/)
|
12
|
+
end
|
9
13
|
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
10
14
|
# For example, we don't want things like:
|
11
15
|
# /a*/.examples to include "xyz"
|
@@ -121,7 +125,7 @@ RSpec.describe Regexp, '#examples' do
|
|
121
125
|
/(ref1) (ref2) \1 \2/,
|
122
126
|
/((ref2)ref1) \1 \2/,
|
123
127
|
/((ref1and2)) \1 \2/,
|
124
|
-
/(
|
128
|
+
/(1)(2)(3)(4)(5)(6)(7)(8)(9)(10) \10\9\8\7\6\5\4\3\2\1/,
|
125
129
|
/(a?(b?(c?(d?(e?)))))/,
|
126
130
|
/(a)? \1/,
|
127
131
|
/(a|(b)) \2/,
|
@@ -186,30 +190,36 @@ RSpec.describe Regexp, '#examples' do
|
|
186
190
|
/\P{Ll}/, # Negation syntax type 2
|
187
191
|
/\P{^Ll}/ # Double negation!! (Should cancel out)
|
188
192
|
)
|
189
|
-
# An exhaustive set of tests for all named properties!!!
|
190
|
-
#
|
193
|
+
# An exhaustive set of tests for all named properties!!! This is useful
|
194
|
+
# for verifying the PStore contains correct values for all ruby versions
|
191
195
|
%w(
|
192
|
-
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
|
193
|
-
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
196
|
+
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
|
197
|
+
Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
|
198
|
+
Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
|
199
|
+
Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
200
|
+
Cham Cherokee Common Coptic Cyrillic Devanagari Ethiopic Georgian
|
201
|
+
Glagolitic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana
|
202
|
+
Inherited Kannada Katakana Kayah_Li Khmer Lao Latin Lepcha Limbu Malayalam
|
203
|
+
Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
|
204
|
+
Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
|
205
|
+
Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
|
200
206
|
).each do |property|
|
201
207
|
it "examples for /\p{#{property}}/" do
|
202
208
|
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
203
|
-
expect(regexp_examples)
|
209
|
+
expect(regexp_examples)
|
210
|
+
.not_to be_empty,
|
211
|
+
"No examples were generated for regexp: /\p{#{property}}/"
|
204
212
|
# Just do one big check, for test system performance (~30% faster)
|
205
|
-
# (Otherwise, we
|
213
|
+
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
|
206
214
|
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
|
207
215
|
end
|
208
216
|
end
|
209
217
|
|
210
218
|
# The following seem to genuinely have no matching examples (!!??!!?!)
|
211
|
-
%w(
|
212
|
-
|
219
|
+
%w(
|
220
|
+
Cs Carian Cuneiform Cypriot Deseret Gothic Kharoshthi Linear_B Lycian
|
221
|
+
Lydian Old_Italic Old_Persian Osmanya Phoenician Shavian Ugaritic
|
222
|
+
).each do |property|
|
213
223
|
examples_are_empty(/\p{#{property}}/)
|
214
224
|
end
|
215
225
|
end
|
@@ -293,6 +303,10 @@ RSpec.describe Regexp, '#examples' do
|
|
293
303
|
it { expect(/(a|b){2}/.examples).to match_array %w(aa ab ba bb) }
|
294
304
|
it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }
|
295
305
|
|
306
|
+
# Only display unique examples:
|
307
|
+
it { expect(/a|a|b|b/.examples).to match_array ['a', 'b'] }
|
308
|
+
it { expect(/[ccdd]/.examples).to match_array ['c', 'd'] }
|
309
|
+
|
296
310
|
# a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
|
297
311
|
it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
|
298
312
|
end
|
@@ -324,7 +338,10 @@ RSpec.describe Regexp, '#examples' do
|
|
324
338
|
|
325
339
|
context 'case insensitive' do
|
326
340
|
it { expect(/ab/i.examples).to match_array %w(ab aB Ab AB) }
|
327
|
-
it
|
341
|
+
it do
|
342
|
+
expect(/a+/i.examples)
|
343
|
+
.to match_array %w(a A aa aA Aa AA aaa aaA aAa aAA Aaa AaA AAa AAA)
|
344
|
+
end
|
328
345
|
it { expect(/([ab])\1/i.examples).to match_array %w(aa bb AA BB) }
|
329
346
|
end
|
330
347
|
|
@@ -351,7 +368,8 @@ RSpec.describe Regexp, '#examples' do
|
|
351
368
|
it { expect(/a(?i)b(?-i)c/.examples).to match_array %w(abc aBc) }
|
352
369
|
it { expect(/a(?x) b(?-x) c/.examples).to eq %w(ab\ c) }
|
353
370
|
it { expect(/(?m)./.examples(max_group_results: 999)).to include "\n" }
|
354
|
-
|
371
|
+
# Toggle "groups" should not increase backref group count:
|
372
|
+
it { expect(/(?i)(a)-\1/.examples).to match_array %w(a-a A-A) }
|
355
373
|
end
|
356
374
|
context 'subexpression' do
|
357
375
|
it { expect(/a(?i:b)c/.examples).to match_array %w(abc aBc) }
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-07-
|
11
|
+
date: 2015-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -46,6 +46,7 @@ extra_rdoc_files: []
|
|
46
46
|
files:
|
47
47
|
- ".gitignore"
|
48
48
|
- ".rspec"
|
49
|
+
- ".rubocop.yml"
|
49
50
|
- ".travis.yml"
|
50
51
|
- Gemfile
|
51
52
|
- LICENSE.txt
|
@@ -54,6 +55,7 @@ files:
|
|
54
55
|
- db/unicode_ranges_2.0.pstore
|
55
56
|
- db/unicode_ranges_2.1.pstore
|
56
57
|
- db/unicode_ranges_2.2.pstore
|
58
|
+
- db/unicode_ranges_2.3.pstore
|
57
59
|
- lib/core_extensions/regexp/examples.rb
|
58
60
|
- lib/regexp-examples.rb
|
59
61
|
- lib/regexp-examples/backreferences.rb
|