regexp-examples 0.5.0 → 0.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +8 -7
- data/lib/regexp-examples/groups.rb +18 -16
- data/lib/regexp-examples/parser.rb +21 -11
- data/lib/regexp-examples/version.rb +1 -1
- data/spec/regexp-examples_spec.rb +21 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ff205ed61ad0ca0c2dc2383afd17930110763b44
|
4
|
+
data.tar.gz: a13f23a7d56b9b7d7e4323380d705276793e6e4f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 068c7dbd13945c0487b91c61e9cae304323775adc82cd4e80092ab32505eb46f31aaf98736f5a353eb1ad06a471864f9432ce3d203097f2dcfc6e696dba07e12
|
7
|
+
data.tar.gz: c2014a4055874af1edbdc3fbee60e58b3f182ba37278f5f0738f4699256a0669b84e8b7d731494c5493e04fb249c3135cb31e5a716e687cd677fd3f9fa1a8da9
|
data/README.md
CHANGED
@@ -39,15 +39,16 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
39
39
|
* Groups work fine, even if nested or optional e.g. `/(even(this(works?))) \1 \2 \3/`, `/what about (this)? \1/`
|
40
40
|
* Non-capture groups, e.g. `/(?:foo)/`
|
41
41
|
* Control characters, e.g. `/\ca/`, `/\cZ/`, `/\C-9/`
|
42
|
-
* Escape sequences, e.g. `/\x42/`, `/\
|
42
|
+
* Escape sequences, e.g. `/\x42/`, `/\x5word/`, `/#{"\x80".force_encoding("ASCII-8BIT")}/`
|
43
43
|
* Unicode characters, e.g. `/\u0123/`, `/\uabcd/`, `/\u{789}/`
|
44
44
|
* **Arbitrarily complex combinations of all the above!**
|
45
45
|
|
46
|
-
|
46
|
+
* Regexp options can also be used:
|
47
|
+
* Case insensitive examples: `/cool/i.examples #=> ["cool", "cooL", "coOl", "coOL", ...]`
|
48
|
+
* Multiline examples: `/./m.examples(max_group_results: 999) #=> ["a", "b", "c", ..., "\n"]`
|
49
|
+
* Extended form examples: `/line1 #comment \n line2/x.examples #=> ["line1line2"]`
|
47
50
|
|
48
|
-
|
49
|
-
* `/white space/x.examples` will not strip out the whitespace from the pattern, i.e. this incorrectly returns `["white space"]` rather than `["whitespace"]`
|
50
|
-
* `/./m.examples(max_group_results: 999)` will not include `"\n"`
|
51
|
+
## Bugs and Not-Yet-Supported syntax
|
51
52
|
|
52
53
|
* Nested character classes, and the use of set intersection ([See here](http://www.ruby-doc.org/core-2.2.0/Regexp.html#class-Regexp-label-Character+Classes) for the official documentation on this.) For example:
|
53
54
|
* `/[[abc]]/.examples` (which _should_ return `["a", "b", "c"]`)
|
@@ -60,14 +61,14 @@ For more detail on this, see [configuration options](#configuration-options).
|
|
60
61
|
|
61
62
|
* The patterns: `/\10/` ... `/\77/` should match the octal representation of their character code, if there is no nth grouped subexpression. For example, `/\10/.examples` should return `["\x08"]`. Funnily enough, I did not think of this when writing my regexp parser.
|
62
63
|
|
63
|
-
There are loads more (increasingly obscure) unsupported bits of syntax, which I cannot be bothered to write out here. Full documentation on all the various other obscurities in the ruby (version 2.x) regexp parser can be found [here](https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/RE).
|
64
|
-
|
65
64
|
Using any of the following will raise a RegexpExamples::UnsupportedSyntax exception (until such time as they are implemented!):
|
66
65
|
|
67
66
|
* POSIX bracket expressions, e.g. `/[[:alnum:]]/`, `/[[:space:]]/`
|
68
67
|
* Named properties, e.g. `/\p{L}/` ("Letter"), `/\p{Arabic}/` ("Arabic character"), `/\p{^Ll}/` ("Not a lowercase letter")
|
69
68
|
* Subexpression calls, e.g. `/(?<name> ... \g<name>* )/` (Note: These could get _really_ ugly to implement, and may even be impossible, so I highly doubt it's worth the effort!)
|
70
69
|
|
70
|
+
There are loads more (increasingly obscure) unsupported bits of syntax, which I cannot be bothered to write out here. Full documentation on all the various other obscurities in the ruby (version 2.x) regexp parser can be found [here](https://raw.githubusercontent.com/k-takata/Onigmo/master/doc/RE).
|
71
|
+
|
71
72
|
## Impossible features ("illegal syntax")
|
72
73
|
|
73
74
|
The following features in the regex language can never be properly implemented into this gem because, put simply, they are not technically "regular"!
|
@@ -23,11 +23,11 @@ module RegexpExamples
|
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
module
|
27
|
-
attr_reader :
|
26
|
+
module GroupWithIgnoreCase
|
27
|
+
attr_reader :ignorecase
|
28
28
|
def result
|
29
29
|
group_result = super
|
30
|
-
if
|
30
|
+
if ignorecase
|
31
31
|
group_result
|
32
32
|
.concat( group_result.map(&:swapcase) )
|
33
33
|
.uniq
|
@@ -38,10 +38,10 @@ module RegexpExamples
|
|
38
38
|
end
|
39
39
|
|
40
40
|
class SingleCharGroup
|
41
|
-
prepend
|
42
|
-
def initialize(char,
|
41
|
+
prepend GroupWithIgnoreCase
|
42
|
+
def initialize(char, ignorecase)
|
43
43
|
@char = char
|
44
|
-
@
|
44
|
+
@ignorecase = ignorecase
|
45
45
|
end
|
46
46
|
def result
|
47
47
|
[GroupResult.new(@char)]
|
@@ -49,10 +49,10 @@ module RegexpExamples
|
|
49
49
|
end
|
50
50
|
|
51
51
|
class CharGroup
|
52
|
-
prepend
|
53
|
-
def initialize(chars,
|
52
|
+
prepend GroupWithIgnoreCase
|
53
|
+
def initialize(chars, ignorecase)
|
54
54
|
@chars = chars
|
55
|
-
@
|
55
|
+
@ignorecase = ignorecase
|
56
56
|
if chars[0] == "^"
|
57
57
|
@negative = true
|
58
58
|
@chars = @chars[1..-1]
|
@@ -119,25 +119,27 @@ module RegexpExamples
|
|
119
119
|
end
|
120
120
|
|
121
121
|
class DotGroup
|
122
|
-
|
123
|
-
def initialize(
|
124
|
-
@
|
122
|
+
attr_reader :multiline
|
123
|
+
def initialize(multiline)
|
124
|
+
@multiline = multiline
|
125
125
|
end
|
126
126
|
|
127
127
|
def result
|
128
|
-
CharSets::Any
|
128
|
+
chars = CharSets::Any
|
129
|
+
chars |= ["\n"] if multiline
|
130
|
+
chars.map do |result|
|
129
131
|
GroupResult.new(result)
|
130
132
|
end
|
131
133
|
end
|
132
134
|
end
|
133
135
|
|
134
136
|
class MultiGroup
|
135
|
-
prepend
|
137
|
+
prepend GroupWithIgnoreCase
|
136
138
|
attr_reader :group_id
|
137
|
-
def initialize(groups, group_id,
|
139
|
+
def initialize(groups, group_id, ignorecase)
|
138
140
|
@groups = groups
|
139
141
|
@group_id = group_id
|
140
|
-
@
|
142
|
+
@ignorecase = ignorecase
|
141
143
|
end
|
142
144
|
|
143
145
|
# Generates the result of each contained group
|
@@ -3,10 +3,12 @@ module RegexpExamples
|
|
3
3
|
attr_reader :regexp_string
|
4
4
|
def initialize(regexp_string, regexp_options, config_options={})
|
5
5
|
@regexp_string = regexp_string
|
6
|
-
@ignorecase = (
|
6
|
+
@ignorecase = !(regexp_options & Regexp::IGNORECASE).zero?
|
7
|
+
@multiline = !(regexp_options & Regexp::MULTILINE).zero?
|
8
|
+
@extended = !(regexp_options & Regexp::EXTENDED).zero?
|
7
9
|
@num_groups = 0
|
8
10
|
@current_position = 0
|
9
|
-
|
11
|
+
ResultCountLimiters.configure!(
|
10
12
|
config_options[:max_repeater_variance],
|
11
13
|
config_options[:max_group_results]
|
12
14
|
)
|
@@ -28,10 +30,6 @@ module RegexpExamples
|
|
28
30
|
|
29
31
|
private
|
30
32
|
|
31
|
-
def regexp_options
|
32
|
-
{ignorecase: @ignorecase}
|
33
|
-
end
|
34
|
-
|
35
33
|
def parse_group(repeaters)
|
36
34
|
case next_char
|
37
35
|
when '('
|
@@ -58,12 +56,24 @@ module RegexpExamples
|
|
58
56
|
else
|
59
57
|
raise IllegalSyntaxError, "Anchors cannot be supported, as they are not regular"
|
60
58
|
end
|
59
|
+
when /[#\s]/
|
60
|
+
if @extended
|
61
|
+
parse_extended_whitespace
|
62
|
+
group = parse_single_char_group('') # Ignore the whitespace/comment
|
63
|
+
else
|
64
|
+
group = parse_single_char_group(next_char)
|
65
|
+
end
|
61
66
|
else
|
62
67
|
group = parse_single_char_group(next_char)
|
63
68
|
end
|
64
69
|
group
|
65
70
|
end
|
66
71
|
|
72
|
+
def parse_extended_whitespace
|
73
|
+
whitespace_chars = rest_of_string.match(/#.*|\s+/)[0]
|
74
|
+
@current_position += whitespace_chars.length - 1
|
75
|
+
end
|
76
|
+
|
67
77
|
def parse_after_backslash_group
|
68
78
|
@current_position += 1
|
69
79
|
case
|
@@ -78,7 +88,7 @@ module RegexpExamples
|
|
78
88
|
# Note: The `.dup` is important, as it prevents modifying the constant, in
|
79
89
|
# CharGroup#init_ranges (where the '-' is moved to the front)
|
80
90
|
BackslashCharMap[next_char].dup,
|
81
|
-
|
91
|
+
@ignorecase
|
82
92
|
)
|
83
93
|
when rest_of_string =~ /\A(c|C-)(.)/ # Control character
|
84
94
|
@current_position += $1.length
|
@@ -153,7 +163,7 @@ module RegexpExamples
|
|
153
163
|
end
|
154
164
|
end
|
155
165
|
groups = parse
|
156
|
-
MultiGroup.new(groups, group_id,
|
166
|
+
MultiGroup.new(groups, group_id, @ignorecase)
|
157
167
|
end
|
158
168
|
|
159
169
|
def parse_multi_end_group
|
@@ -181,11 +191,11 @@ module RegexpExamples
|
|
181
191
|
chars << next_char
|
182
192
|
@current_position += 1
|
183
193
|
end
|
184
|
-
CharGroup.new(chars,
|
194
|
+
CharGroup.new(chars, @ignorecase)
|
185
195
|
end
|
186
196
|
|
187
197
|
def parse_dot_group
|
188
|
-
DotGroup.new(
|
198
|
+
DotGroup.new(@multiline)
|
189
199
|
end
|
190
200
|
|
191
201
|
def parse_or_group(left_repeaters)
|
@@ -196,7 +206,7 @@ module RegexpExamples
|
|
196
206
|
|
197
207
|
|
198
208
|
def parse_single_char_group(char)
|
199
|
-
SingleCharGroup.new(char,
|
209
|
+
SingleCharGroup.new(char, @ignorecase)
|
200
210
|
end
|
201
211
|
|
202
212
|
def parse_backreference_group(match)
|
@@ -233,7 +233,7 @@ RSpec.describe Regexp, "#examples" do
|
|
233
233
|
|
234
234
|
context "exact examples match" do
|
235
235
|
# More rigorous tests to assert that ALL examples are being listed
|
236
|
-
context "default options" do
|
236
|
+
context "default config options" do
|
237
237
|
# Simple examples
|
238
238
|
it { expect(/[ab]{2}/.examples).to eq ["aa", "ab", "ba", "bb"] }
|
239
239
|
it { expect(/(a|b){2}/.examples).to eq ["aa", "ab", "ba", "bb"] }
|
@@ -243,7 +243,7 @@ RSpec.describe Regexp, "#examples" do
|
|
243
243
|
it { expect(/a{1}?/.examples).to eq ["", "a"] }
|
244
244
|
end
|
245
245
|
|
246
|
-
context "max_repeater_variance option" do
|
246
|
+
context "max_repeater_variance config option" do
|
247
247
|
it do
|
248
248
|
expect(/a+/.examples(max_repeater_variance: 5))
|
249
249
|
.to eq %w(a aa aaa aaaa aaaaa aaaaaa)
|
@@ -254,7 +254,7 @@ RSpec.describe Regexp, "#examples" do
|
|
254
254
|
end
|
255
255
|
end
|
256
256
|
|
257
|
-
context "max_group_results option" do
|
257
|
+
context "max_group_results config option" do
|
258
258
|
it do
|
259
259
|
expect(/\d/.examples(max_group_results: 10))
|
260
260
|
.to eq %w(0 1 2 3 4 5 6 7 8 9)
|
@@ -266,6 +266,24 @@ RSpec.describe Regexp, "#examples" do
|
|
266
266
|
it { expect(/a+/i.examples).to eq %w(a A aa aA Aa AA aaa aaA aAa aAA Aaa AaA AAa AAA) }
|
267
267
|
it { expect(/([ab])\1/i.examples).to eq %w(aa bb AA BB) }
|
268
268
|
end
|
269
|
+
|
270
|
+
context "multiline" do
|
271
|
+
it { expect(/./.examples(max_group_results: 999)).not_to include "\n" }
|
272
|
+
it { expect(/./m.examples(max_group_results: 999)).to include "\n" }
|
273
|
+
end
|
274
|
+
|
275
|
+
context "exteded form" do
|
276
|
+
it { expect(/a b c/x.examples).to eq %w(abc) }
|
277
|
+
it { expect(/a#comment/x.examples).to eq %w(a) }
|
278
|
+
it do
|
279
|
+
expect(
|
280
|
+
/
|
281
|
+
line1 #comment
|
282
|
+
line2 #comment
|
283
|
+
/x.examples
|
284
|
+
).to eq %w(line1line2)
|
285
|
+
end
|
286
|
+
end
|
269
287
|
end
|
270
288
|
|
271
289
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.5.
|
4
|
+
version: 0.5.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-02-
|
11
|
+
date: 2015-02-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|