regexp-examples 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 43bddf0ebdc30ee2ae0e7ed35722165921d25c93
|
4
|
+
data.tar.gz: 3e52ef064a6dd7c484f06814c15135997f7fb19d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e53baeb48bce1a0e5d00e610b0ea933bc5a014d1fe0ea5b9678875c63b5507aaa32baab9f339e60610ce1b9c97e4173729a89340b8b75530912a6560352ca19
|
7
|
+
data.tar.gz: e63cf93bfb1ec76e4aa710bb15da91bd61e27f259c30d6011e0805c64f3b219420b20cbe2343cf6736ba2e7e40c7cc413b6b8a04779eb4cf7438cad3e4aa04cf
|
data/.rubocop.yml
CHANGED
data/.travis.yml
CHANGED
@@ -5,9 +5,3 @@ rvm:
|
|
5
5
|
- 2.2.0
|
6
6
|
- 2.2.2
|
7
7
|
- ruby-head
|
8
|
-
matrix:
|
9
|
-
allow_failures:
|
10
|
-
# One (ruby 2.3-dev) test fails, due to a change of behaviour in Array#delete_if,
|
11
|
-
# but I don't know if this is intentional. I'll fix it once the behaviour change is documented.
|
12
|
-
# For now, I don't really care if 2.3-dev tests all pass.
|
13
|
-
- rvm: ruby-head
|
@@ -0,0 +1 @@
|
|
1
|
+
unicode_ranges_2.0.pstore
|
Binary file
|
Binary file
|
@@ -1,5 +1,8 @@
|
|
1
1
|
module CoreExtensions
|
2
2
|
module Regexp
|
3
|
+
# A wrapper module to namespace/isolate the Regexp#examples and Regexp#random_exanple
|
4
|
+
# monkey patches.
|
5
|
+
# No core classes are extended in any way, other than the above two methods.
|
3
6
|
module Examples
|
4
7
|
def examples(**config_options)
|
5
8
|
RegexpExamples::ResultCountLimiters.configure!(
|
@@ -1,24 +1,40 @@
|
|
1
1
|
module RegexpExamples
|
2
|
+
# A helper class to fill-in backrefences AFTER the example(s) have been generated.
|
3
|
+
# In a nutshell, this works by doing the following:
|
4
|
+
# * Given a regex that contains a capute group and backreference, e.g. `/(a|b) \1/`
|
5
|
+
# * After generating examples, the backreference is tored as a placeholder:
|
6
|
+
# `["a __1__", "b __1__"]`
|
7
|
+
# * This class is used to fill in each placeholder accordingly:
|
8
|
+
# `["a a", "b b"]`
|
9
|
+
# * Also, beware of octal groups and cases where the backref invalidates the example!!
|
2
10
|
class BackReferenceReplacer
|
3
|
-
|
11
|
+
# Named capture groups can only contain alphanumeric chars, and hyphens
|
12
|
+
PLACEHOLDER_REGEX = Regexp.new(
|
13
|
+
RegexpExamples::BackReferenceGroup::PLACEHOLDER_FORMAT % '([a-zA-Z0-9-]+)'
|
14
|
+
)
|
4
15
|
|
5
16
|
def substitute_backreferences(full_examples)
|
6
17
|
full_examples.map do |full_example|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
full_example
|
12
|
-
rescue BackrefNotFound
|
13
|
-
# For instance, one "full example" from /(a|(b)) \2/: "a __2__"
|
14
|
-
# should be rejected because the backref (\2) does not exist
|
15
|
-
nil
|
18
|
+
# For instance, one "full example" from /(a|(b)) \2/: "a __2__"
|
19
|
+
# should be rejected because the backref (\2) does not exist
|
20
|
+
catch(:backref_not_found) do
|
21
|
+
substitute_backrefs_one_at_a_time(full_example)
|
16
22
|
end
|
17
23
|
end.compact
|
18
24
|
end
|
19
25
|
|
20
26
|
private
|
21
27
|
|
28
|
+
def substitute_backrefs_one_at_a_time(full_example)
|
29
|
+
while full_example.match(PLACEHOLDER_REGEX)
|
30
|
+
full_example.sub!(
|
31
|
+
PLACEHOLDER_REGEX,
|
32
|
+
find_backref_for(full_example, Regexp.last_match(1))
|
33
|
+
)
|
34
|
+
end
|
35
|
+
full_example
|
36
|
+
end
|
37
|
+
|
22
38
|
def find_backref_for(full_example, group_id)
|
23
39
|
full_example.all_subgroups.detect do |subgroup|
|
24
40
|
subgroup.group_id == group_id
|
@@ -26,11 +42,11 @@ module RegexpExamples
|
|
26
42
|
end
|
27
43
|
|
28
44
|
def octal_char_for(octal_chars)
|
29
|
-
# For octal characters in the range \
|
30
|
-
if octal_chars =~ /\A[01]?[0-7]{1,2}\z/ && octal_chars.
|
45
|
+
# For octal characters in the range \00 - \177
|
46
|
+
if octal_chars =~ /\A[01]?[0-7]{1,2}\z/ && octal_chars.length > 1
|
31
47
|
Integer(octal_chars, 8).chr
|
32
48
|
else
|
33
|
-
|
49
|
+
throw :backref_not_found
|
34
50
|
end
|
35
51
|
end
|
36
52
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require_relative 'parser_helpers/charset_negation_helper'
|
2
|
+
|
1
3
|
module RegexpExamples
|
2
4
|
# A "sub-parser", for char groups in a regular expression
|
3
5
|
# Some examples of what this class needs to parse:
|
@@ -7,20 +9,23 @@ module RegexpExamples
|
|
7
9
|
# [^abc] - negated group
|
8
10
|
# [[a][bc]] - sub-groups (should match "a", "b" or "c")
|
9
11
|
# [[:lower:]] - POSIX group
|
10
|
-
# [[a-f]&&[d-z]] - set intersection (should match "d", "
|
12
|
+
# [[a-f]&&[d-z]] - set intersection (should match "d", "e" or "f")
|
11
13
|
# [[^:alpha:]&&[\n]a-c] - all of the above!!!! (should match "\n")
|
12
14
|
class ChargroupParser
|
13
|
-
|
15
|
+
include CharsetNegationHelper
|
16
|
+
|
17
|
+
attr_reader :regexp_string, :current_position
|
18
|
+
alias_method :length, :current_position
|
19
|
+
|
14
20
|
def initialize(regexp_string, is_sub_group: false)
|
15
21
|
@regexp_string = regexp_string
|
16
22
|
@is_sub_group = is_sub_group
|
17
23
|
@current_position = 0
|
18
|
-
|
24
|
+
@charset = []
|
25
|
+
@negative = false
|
19
26
|
end
|
20
27
|
|
21
28
|
def parse
|
22
|
-
@charset = []
|
23
|
-
@negative = false
|
24
29
|
parse_first_chars
|
25
30
|
until next_char == ']'
|
26
31
|
case next_char
|
@@ -40,12 +45,8 @@ module RegexpExamples
|
|
40
45
|
@current_position += 1 # To account for final "]"
|
41
46
|
end
|
42
47
|
|
43
|
-
def length
|
44
|
-
@current_position
|
45
|
-
end
|
46
|
-
|
47
48
|
def result
|
48
|
-
|
49
|
+
negate_if(@charset, @negative)
|
49
50
|
end
|
50
51
|
|
51
52
|
private
|
@@ -66,12 +67,7 @@ module RegexpExamples
|
|
66
67
|
end
|
67
68
|
|
68
69
|
def parse_posix_group(negation_flag, name)
|
69
|
-
|
70
|
-
POSIXCharMap[name]
|
71
|
-
else
|
72
|
-
CharSets::Any - POSIXCharMap[name]
|
73
|
-
end
|
74
|
-
@charset.concat chars
|
70
|
+
@charset.concat negate_if(POSIXCharMap[name], !negation_flag.empty?)
|
75
71
|
@current_position += (negation_flag.length + # 0 or 1, if '^' is present
|
76
72
|
name.length +
|
77
73
|
2) # Length of opening and closing colons (always 2)
|
@@ -101,6 +97,7 @@ module RegexpExamples
|
|
101
97
|
def parse_sub_group_concat
|
102
98
|
@current_position += 1
|
103
99
|
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
100
|
+
sub_group_parser.parse
|
104
101
|
@charset.concat sub_group_parser.result
|
105
102
|
@current_position += sub_group_parser.length
|
106
103
|
end
|
@@ -117,6 +114,7 @@ module RegexpExamples
|
|
117
114
|
def parse_sub_group_intersect
|
118
115
|
@current_position += 2
|
119
116
|
sub_group_parser = self.class.new(rest_of_string, is_sub_group: true)
|
117
|
+
sub_group_parser.parse
|
120
118
|
@charset &= sub_group_parser.result
|
121
119
|
@current_position += (sub_group_parser.length - 1)
|
122
120
|
end
|
@@ -127,7 +125,7 @@ module RegexpExamples
|
|
127
125
|
@current_position += 1
|
128
126
|
else
|
129
127
|
@current_position += 1
|
130
|
-
@charset.concat
|
128
|
+
@charset.concat((@charset.last..parse_checking_backlash.first).to_a)
|
131
129
|
@current_position += 1
|
132
130
|
end
|
133
131
|
end
|
@@ -1,4 +1,6 @@
|
|
1
|
+
# :nodoc:
|
1
2
|
module RegexpExamples
|
3
|
+
# Configuration settings to limit the number/length of Regexp examples generated
|
2
4
|
class ResultCountLimiters
|
3
5
|
# The maximum variance for any given repeater, to prevent a huge/infinite number of
|
4
6
|
# examples from being listed. For example, if @@max_repeater_variance = 2 then:
|
@@ -7,30 +9,32 @@ module RegexpExamples
|
|
7
9
|
# .{2,} is equivalent to .{2,4}
|
8
10
|
# .{,3} is equivalent to .{0,2}
|
9
11
|
# .{3,8} is equivalent to .{3,5}
|
10
|
-
|
12
|
+
MAX_REPEATER_VARIANCE_DEFAULT = 2
|
11
13
|
|
12
14
|
# Maximum number of characters returned from a char set, to reduce output spam
|
13
15
|
# For example, if @@max_group_results = 5 then:
|
14
16
|
# \d is equivalent to [01234]
|
15
17
|
# \w is equivalent to [abcde]
|
16
|
-
|
18
|
+
MAX_GROUP_RESULTS_DEFAULT = 5
|
17
19
|
|
18
20
|
class << self
|
19
21
|
attr_reader :max_repeater_variance, :max_group_results
|
20
22
|
def configure!(max_repeater_variance, max_group_results = nil)
|
21
|
-
@max_repeater_variance = (max_repeater_variance ||
|
22
|
-
@max_group_results = (max_group_results ||
|
23
|
+
@max_repeater_variance = (max_repeater_variance || MAX_REPEATER_VARIANCE_DEFAULT)
|
24
|
+
@max_group_results = (max_group_results || MAX_GROUP_RESULTS_DEFAULT)
|
23
25
|
end
|
24
26
|
end
|
25
27
|
end
|
26
28
|
|
27
|
-
def self.
|
29
|
+
def self.max_repeater_variance
|
28
30
|
ResultCountLimiters.max_repeater_variance
|
29
31
|
end
|
30
|
-
def self.
|
32
|
+
def self.max_group_results
|
31
33
|
ResultCountLimiters.max_group_results
|
32
34
|
end
|
33
35
|
|
36
|
+
# Definitions of various special characters, used in regular expressions.
|
37
|
+
# For example, `/\h/.examples` will return the value of `Hex` in this module
|
34
38
|
module CharSets
|
35
39
|
Lower = Array('a'..'z')
|
36
40
|
Upper = Array('A'..'Z')
|
@@ -6,13 +6,12 @@ module RegexpExamples
|
|
6
6
|
attr_reader :group_id, :subgroups
|
7
7
|
def initialize(result, group_id = nil, subgroups = [])
|
8
8
|
@group_id = group_id
|
9
|
-
@subgroups = subgroups
|
10
|
-
@subgroups = result.all_subgroups if result.respond_to?(:group_id)
|
9
|
+
@subgroups = result.respond_to?(:group_id) ? result.all_subgroups : subgroups
|
11
10
|
super(result)
|
12
11
|
end
|
13
12
|
|
14
13
|
def all_subgroups
|
15
|
-
[self, subgroups].flatten.
|
14
|
+
[self, subgroups].flatten.keep_if(&:group_id)
|
16
15
|
end
|
17
16
|
|
18
17
|
def swapcase
|
@@ -132,24 +131,16 @@ module RegexpExamples
|
|
132
131
|
@group_id = group_id
|
133
132
|
end
|
134
133
|
|
135
|
-
def result
|
136
|
-
result_by_method(:result)
|
137
|
-
end
|
138
|
-
|
139
|
-
def random_result
|
140
|
-
result_by_method(:random_result)
|
141
|
-
end
|
142
|
-
|
143
|
-
private
|
144
|
-
|
145
134
|
# Generates the result of each contained group
|
146
135
|
# and adds the filled group of each result to itself
|
147
|
-
def
|
148
|
-
strings = @groups.map { |repeater| repeater.public_send(
|
136
|
+
def result
|
137
|
+
strings = @groups.map { |repeater| repeater.public_send(__method__) }
|
149
138
|
RegexpExamples.permutations_of_strings(strings).map do |result|
|
150
139
|
GroupResult.new(result, group_id)
|
151
140
|
end
|
152
141
|
end
|
142
|
+
|
143
|
+
alias_method :random_result, :result
|
153
144
|
end
|
154
145
|
|
155
146
|
# A boolean "or" group.
|
@@ -177,13 +168,10 @@ module RegexpExamples
|
|
177
168
|
private
|
178
169
|
|
179
170
|
def result_by_method(method)
|
180
|
-
repeaters_list
|
181
|
-
RegexpExamples.public_send(method, repeaters)
|
182
|
-
end
|
171
|
+
repeaters_list
|
172
|
+
.map { |repeaters| RegexpExamples.public_send(method, repeaters) }
|
183
173
|
.inject(:concat)
|
184
|
-
.map
|
185
|
-
GroupResult.new(result)
|
186
|
-
end
|
174
|
+
.map { |result| GroupResult.new(result) }
|
187
175
|
.uniq
|
188
176
|
end
|
189
177
|
|
@@ -203,13 +191,14 @@ module RegexpExamples
|
|
203
191
|
# of /\1/ as being "__1__". It later gets updated.
|
204
192
|
class BackReferenceGroup
|
205
193
|
include RandomResultBySample
|
194
|
+
PLACEHOLDER_FORMAT = '__%s__'
|
206
195
|
attr_reader :id
|
207
196
|
def initialize(id)
|
208
197
|
@id = id
|
209
198
|
end
|
210
199
|
|
211
200
|
def result
|
212
|
-
[GroupResult.new(
|
201
|
+
[GroupResult.new(PLACEHOLDER_FORMAT % @id)]
|
213
202
|
end
|
214
203
|
end
|
215
204
|
end
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# :nodoc:
|
1
2
|
module RegexpExamples
|
2
3
|
# Given an array of arrays of strings, returns all possible perutations
|
3
4
|
# for strings, created by joining one element from each array
|
@@ -14,15 +15,13 @@ module RegexpExamples
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def self.join_preserving_capture_groups(result)
|
17
|
-
|
18
|
+
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
19
|
+
# (Hence the need for "reverse"!)
|
18
20
|
subgroups = result
|
19
|
-
.
|
20
|
-
.
|
21
|
+
.flat_map(&:all_subgroups)
|
22
|
+
.reverse
|
23
|
+
.uniq(&:group_id)
|
21
24
|
|
22
|
-
# Only save the LAST group from repeated capture groups, e.g. /([ab]){2}/
|
23
|
-
subgroups.delete_if do |subgroup|
|
24
|
-
subgroups.count { |other_subgroup| other_subgroup.group_id == subgroup.group_id } > 1
|
25
|
-
end
|
26
25
|
GroupResult.new(result.join, nil, subgroups)
|
27
26
|
end
|
28
27
|
|