regexp-examples 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43bddf0ebdc30ee2ae0e7ed35722165921d25c93
|
4
|
+
data.tar.gz: 3e52ef064a6dd7c484f06814c15135997f7fb19d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e53baeb48bce1a0e5d00e610b0ea933bc5a014d1fe0ea5b9678875c63b5507aaa32baab9f339e60610ce1b9c97e4173729a89340b8b75530912a6560352ca19
|
7
|
+
data.tar.gz: e63cf93bfb1ec76e4aa710bb15da91bd61e27f259c30d6011e0805c64f3b219420b20cbe2343cf6736ba2e7e40c7cc413b6b8a04779eb4cf7438cad3e4aa04cf
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
@@ -5,9 +5,3 @@ rvm:
|
|
5
5
|
- 2.2.0
|
6
6
|
- 2.2.2
|
7
7
|
- ruby-head
|
8
|
-
matrix:
|
9
|
-
allow_failures:
|
10
|
-
# One (ruby 2.3-dev) test fails, due to a change of behaviour in Array#delete_if,
|
11
|
-
# but I don't know if this is intentional. I'll fix it once the behaviour change is documented.
|
12
|
-
# For now, I don't really care if 2.3-dev tests all pass.
|
13
|
-
- rvm: ruby-head
|
@@ -0,0 +1 @@
|
|
1
|
+
unicode_ranges_2.0.pstore
|
Binary file
|
Binary file
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module CoreExtensions
|
2
2
|
module Regexp
|
3
|
+
# A wrapper module to namespace/isolate the Regexp#examples and Regexp#random_exanple
|
4
|
+
# monkey patches.
|
5
|
+
# No core classes are extended in any way, other than the above two methods.
|
3
6
|
module Examples
|
4
7
|
def examples(**config_options)
|
5
8
|
RegexpExamples::ResultCountLimiters.configure!(
|
@@ -1,24 +1,40 @@
|
|
1
1
|
module RegexpExamples
|
2
|
+
# A helper class to fill-in backrefences AFTER the example(s) have been generated.
|
3
|
+
# In a nutshell, this works by doing the following:
|
4
|
+
# * Given a regex that contains a capute group and backreference, e.g. `/(a|b) \1/`
|
5
|
+
# * After generating examples, the backreference is tored as a placeholder:
|
6
|
+
# `["a __1__", "b __1__"]`
|
7
|
+
# * This class is used to fill in each placeholder accordingly:
|
8
|
+
# `["a a", "b b"]`
|
9
|
+
# * Also, beware of octal groups and cases where the backref invalidates the example!!
|
2
10
|
class BackReferenceReplacer
|
3
|
-
|
11
|
+
# Named capture groups can only contain alphanumeric chars, and hyphens
|
12
|
+
PLACEHOLDER_REGEX = Regexp.new(
|
13
|
+
RegexpExamples::BackReferenceGroup::PLACEHOLDER_FORMAT % '([a-zA-Z0-9-]+)'
|
14
|
+
)
|
4
15
|
|
5
16
|
def substitute_backreferences(full_examples)
|
6
17
|
full_examples.map do |full_example|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
full_example
|
12
|
-
rescue BackrefNotFound
|
13
|
-
# For instance, one "full example" from /(a|(b)) \2/: "a __2__"
|
14
|
-
# should be rejected because the backref (\2) does not exist
|
15
|
-
nil
|
18
|
+
# For instance, one "full example" from /(a|(b)) \2/: "a __2__"
|
19
|
+
# should be rejected because the backref (\2) does not exist
|
20
|
+
catch(:backref_not_found) do
|
21
|
+
substitute_backrefs_one_at_a_time(full_example)
|
16
22
|
end
|
17
23
|
end.compact
|
18
24
|
end
|
19
25
|
|
20
26
|
private
|
21
27
|
|
28
|
+
def substitute_backrefs_one_at_a_time(full_example)
|
29
|
+
while full_example.match(PLACEHOLDER_REGEX)
|
30
|
+
full_example.sub!(
|
31
|
+
PLACEHOLDER_REGEX,
|
32
|
+
find_backref_for(full_example, Regexp.last_match(1))
|
33
|
+
)
|
34
|
+
end
|
35
|
+
full_example
|
36
|
+
end
|
37
|
+
|
22
38
|
def find_backref_for(full_example, group_id)
|
23
39
|
full_example.all_subgroups.detect do |subgroup|
|
24
40
|
subgroup.group_id == group_id
|
@@ -26,11 +42,11 @@ module RegexpExamples
|
|
26
42
|
end
|
27
43
|
|
28
44
|
def octal_char_for(octal_chars)
|
29
|
-
# For octal characters in the range \
|
30
|
-
if octal_chars =~ /\A[01]?[0-7]{1,2}\z/ && octal_chars.
|
45
|
+
# For octal characters in the range \00 - \177
|
46
|
+
if octal_chars =~ /\A[01]?[0-7]{1,2}\z/ && octal_chars.length > 1
|
31
47
|
Integer(octal_chars, 8).chr
|
32
48
|
else
|
33
|
-
|
49
|
+
throw :backref_not_found
|
34
50
|
end
|
35
51
|
end
|
36
52
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'parser_helpers/charset_negation_helper'
|
2
|
+
|
1
3
|
module RegexpExamples
|
2
4
|
# A "sub-parser", for char groups in a regular expression
|
3
5
|
# Some examples of what this class needs to parse:
|
@@ -7,20 +9,23 @@ module RegexpExamples
|
|
7
9
|
# [^abc] - negated group
|
8
10
|
# [[a][bc]] - sub-groups (should match "a", "b" or "c")
|
9
11
|
# [[:lower:]] - POSIX group
|
10
|
-
# [[a-f]&&[d-z]] - set intersection (should match "d", "
|
12
|
+
# [[a-f]&&[d-z]] - set intersection (should match "d", "e" or "f")
|
11
13
|
# [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n")
|
12
14
|
class ChargroupParser
|
13
|
-
|
15
|
+
include CharsetNegationHelper
|
16
|
+
|
17
|
+
attr_reader :regexp_string, :current_position
|
18
|
+
alias_method :length, :current_position
|
19
|
+
|
14
20
|
def initialize(regexp_string, is_sub_group: false)
|
15
21
|
@regexp_string = regexp_string
|
16
22
|
@is_sub_group = is_sub_group
|
17
23
|
@current_position = 0
|
18
|
-
|
24
|
+
@charset = []
|
25
|
+
@negative = false
|
19
26
|
end
|
20
27
|
|
21
28
|
def parse
|
22
|
-
@charset = []
|
23
|
-
@negative = false
|
24
29
|
parse_first_chars
|
25
30
|
until next_char == ']'
|
26
31
|
case next_char
|
@@ -40,12 +45,8 @@ module RegexpExamples
|
|
40
45
|
@current_position += 1 # To account for final "]"
|
41
46
|
end
|
42
47
|
|
43
|
-
def length
|
44
|
-
@current_position
|
45
|
-
end
|
46
|
-
|
47
48
|
def result
|
48
|
-
|
49
|
+
negate_if(@charset, @negative)
|
49
50
|
end
|
50
51
|
|
51
52
|
private
|
@@ -66,12 +67,7 @@ module RegexpExamples
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def parse_posix_group(negation_flag, name)
|
69
|
-
|
70
|
-
POSIXCharMap[name]
|
71
|
-
else
|
72
|
-
CharSets::Any - POSIXCharMap[name]
|
73
|
-
end
|
74
|
-
@charset.concat chars
|
70
|
+
@charset.concat negate_if(POSIXCharMap[name], !negation_flag.empty?)
|
75
71
|
@current_position += (negation_flag.length + # 0 or 1, if '^' is present
|
76
72
|
name.length +
|
77
73
|
2) # Length of opening and closing colons (always 2)
|
@@ -101,6 +97,7 @@ module RegexpExamples
|
|
101
97
|
def parse_sub_group_concat
|
102
98
|
@current_position += 1
|
103
99
|
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
100
|
+
sub_group_parser.parse
|
104
101
|
@charset.concat sub_group_parser.result
|
105
102
|
@current_position += sub_group_parser.length
|
106
103
|
end
|
@@ -117,6 +114,7 @@ module RegexpExamples
|
|
117
114
|
def parse_sub_group_intersect
|
118
115
|
@current_position += 2
|
119
116
|
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
117
|
+
sub_group_parser.parse
|
120
118
|
@charset &= sub_group_parser.result
|
121
119
|
@current_position += (sub_group_parser.length - 1)
|
122
120
|
end
|
@@ -127,7 +125,7 @@ module RegexpExamples
|
|
127
125
|
@current_position += 1
|
128
126
|
else
|
129
127
|
@current_position += 1
|
130
|
-
@charset.concat
|
128
|
+
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
|
131
129
|
@current_position += 1
|
132
130
|
end
|
133
131
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# :nodoc:
|
1
2
|
module RegexpExamples
|
3
|
+
# Configuration settings to limit the number/length of Regexp examples generated
|
2
4
|
class ResultCountLimiters
|
3
5
|
# The maximum variance for any given repeater, to prevent a huge/infinite number of
|
4
6
|
# examples from being listed. For example, if @@max_repeater_variance = 2 then:
|
@@ -7,30 +9,32 @@ module RegexpExamples
|
|
7
9
|
# .{2,} is equivalent to .{2,4}
|
8
10
|
# .{,3} is equivalent to .{0,2}
|
9
11
|
# .{3,8} is equivalent to .{3,5}
|
10
|
-
|
12
|
+
MAX_REPEATER_VARIANCE_DEFAULT = 2
|
11
13
|
|
12
14
|
# Maximum number of characters returned from a char set, to reduce output spam
|
13
15
|
# For example, if @@max_group_results = 5 then:
|
14
16
|
# \d is equivalent to [01234]
|
15
17
|
# \w is equivalent to [abcde]
|
16
|
-
|
18
|
+
MAX_GROUP_RESULTS_DEFAULT = 5
|
17
19
|
|
18
20
|
class << self
|
19
21
|
attr_reader :max_repeater_variance, :max_group_results
|
20
22
|
def configure!(max_repeater_variance, max_group_results = nil)
|
21
|
-
@max_repeater_variance = (max_repeater_variance ||
|
22
|
-
@max_group_results = (max_group_results ||
|
23
|
+
@max_repeater_variance = (max_repeater_variance || MAX_REPEATER_VARIANCE_DEFAULT)
|
24
|
+
@max_group_results = (max_group_results || MAX_GROUP_RESULTS_DEFAULT)
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
27
|
-
def self.
|
29
|
+
def self.max_repeater_variance
|
28
30
|
ResultCountLimiters.max_repeater_variance
|
29
31
|
end
|
30
|
-
def self.
|
32
|
+
def self.max_group_results
|
31
33
|
ResultCountLimiters.max_group_results
|
32
34
|
end
|
33
35
|
|
36
|
+
# Definitions of various special characters, used in regular expressions.
|
37
|
+
# For example, `/\h/.examples` will return the value of `Hex` in this module
|
34
38
|
module CharSets
|
35
39
|
Lower = Array('a'..'z')
|
36
40
|
Upper = Array('A'..'Z')
|
@@ -6,13 +6,12 @@ module RegexpExamples
|
|
6
6
|
attr_reader :group_id, :subgroups
|
7
7
|
def initialize(result, group_id = nil, subgroups = [])
|
8
8
|
@group_id = group_id
|
9
|
-
@subgroups = subgroups
|
10
|
-
@subgroups = result.all_subgroups if result.respond_to?(:group_id)
|
9
|
+
@subgroups = result.respond_to?(:group_id) ? result.all_subgroups : subgroups
|
11
10
|
super(result)
|
12
11
|
end
|
13
12
|
|
14
13
|
def all_subgroups
|
15
|
-
[self, subgroups].flatten.
|
14
|
+
[self, subgroups].flatten.keep_if(&:group_id)
|
16
15
|
end
|
17
16
|
|
18
17
|
def swapcase
|
@@ -132,24 +131,16 @@ module RegexpExamples
|
|
132
131
|
@group_id = group_id
|
133
132
|
end
|
134
133
|
|
135
|
-
def result
|
136
|
-
result_by_method(:result)
|
137
|
-
end
|
138
|
-
|
139
|
-
def random_result
|
140
|
-
result_by_method(:random_result)
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
|
145
134
|
# Generates the result of each contained group
|
146
135
|
# and adds the filled group of each result to itself
|
147
|
-
def
|
148
|
-
strings = @groups.map { |repeater| repeater.public_send(
|
136
|
+
def result
|
137
|
+
strings = @groups.map { |repeater| repeater.public_send(__method__) }
|
149
138
|
RegexpExamples.permutations_of_strings(strings).map do |result|
|
150
139
|
GroupResult.new(result, group_id)
|
151
140
|
end
|
152
141
|
end
|
142
|
+
|
143
|
+
alias_method :random_result, :result
|
153
144
|
end
|
154
145
|
|
155
146
|
# A boolean "or" group.
|
@@ -177,13 +168,10 @@ module RegexpExamples
|
|
177
168
|
private
|
178
169
|
|
179
170
|
def result_by_method(method)
|
180
|
-
repeaters_list
|
181
|
-
RegexpExamples.public_send(method, repeaters)
|
182
|
-
end
|
171
|
+
repeaters_list
|
172
|
+
.map { |repeaters| RegexpExamples.public_send(method, repeaters) }
|
183
173
|
.inject(:concat)
|
184
|
-
.map
|
185
|
-
GroupResult.new(result)
|
186
|
-
end
|
174
|
+
.map { |result| GroupResult.new(result) }
|
187
175
|
.uniq
|
188
176
|
end
|
189
177
|
|
@@ -203,13 +191,14 @@ module RegexpExamples
|
|
203
191
|
# of /\1/ as being "__1__". It later gets updated.
|
204
192
|
class BackReferenceGroup
|
205
193
|
include RandomResultBySample
|
194
|
+
PLACEHOLDER_FORMAT = '__%s__'
|
206
195
|
attr_reader :id
|
207
196
|
def initialize(id)
|
208
197
|
@id = id
|
209
198
|
end
|
210
199
|
|
211
200
|
def result
|
212
|
-
[GroupResult.new(
|
201
|
+
[GroupResult.new(PLACEHOLDER_FORMAT % @id)]
|
213
202
|
end
|
214
203
|
end
|
215
204
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# :nodoc:
|
1
2
|
module RegexpExamples
|
2
3
|
# Given an array of arrays of strings, returns all possible perutations
|
3
4
|
# for strings, created by joining one element from each array
|
@@ -14,15 +15,13 @@ module RegexpExamples
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def self.join_preserving_capture_groups(result)
|
17
|
-
|
18
|
+
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
19
|
+
# (Hence the need for "reverse"!)
|
18
20
|
subgroups = result
|
19
|
-
.
|
20
|
-
.
|
21
|
+
.flat_map(&:all_subgroups)
|
22
|
+
.reverse
|
23
|
+
.uniq(&:group_id)
|
21
24
|
|
22
|
-
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
23
|
-
subgroups.delete_if do |subgroup|
|
24
|
-
subgroups.count { |other_subgroup| other_subgroup.group_id == subgroup.group_id } > 1
|
25
|
-
end
|
26
25
|
GroupResult.new(result.join, nil, subgroups)
|
27
26
|
end
|
28
27
|
|