regexp-examples 1.1.3 → 1.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
@@ -1,4 +1,8 @@
|
|
1
1
|
module RegexpExamples
|
2
|
+
# An abstract base class for all other repeater groups.
|
3
|
+
# Since all repeaters (quantifiers) are really just shorthand syntaxes for the generic:
|
4
|
+
# `/.{a,b}/`, the methods for generating "between `a` and `b` results" are fully
|
5
|
+
# generalised here.
|
2
6
|
class BaseRepeater
|
3
7
|
attr_reader :group, :min_repeats, :max_repeats
|
4
8
|
def initialize(group)
|
@@ -6,7 +10,7 @@ module RegexpExamples
|
|
6
10
|
end
|
7
11
|
|
8
12
|
def result
|
9
|
-
group_results = group.result.first(RegexpExamples.
|
13
|
+
group_results = group.result.first(RegexpExamples.max_group_results)
|
10
14
|
results = []
|
11
15
|
min_repeats.upto(max_repeats) do |repeats|
|
12
16
|
if repeats.zero?
|
@@ -28,6 +32,9 @@ module RegexpExamples
|
|
28
32
|
end
|
29
33
|
end
|
30
34
|
|
35
|
+
# When there is "no repeater", we interpret this as a "one time repeater".
|
36
|
+
# For example, `/a/` is a "OneTimeRepeater" of "a"
|
37
|
+
# Equivalent to `/a{1}/`
|
31
38
|
class OneTimeRepeater < BaseRepeater
|
32
39
|
def initialize(group)
|
33
40
|
super
|
@@ -36,22 +43,28 @@ module RegexpExamples
|
|
36
43
|
end
|
37
44
|
end
|
38
45
|
|
46
|
+
# When a klein star is used, e.g. `/a*/`
|
47
|
+
# Equivalent to `/a{0,}/`
|
39
48
|
class StarRepeater < BaseRepeater
|
40
49
|
def initialize(group)
|
41
50
|
super
|
42
51
|
@min_repeats = 0
|
43
|
-
@max_repeats = RegexpExamples.
|
52
|
+
@max_repeats = RegexpExamples.max_repeater_variance
|
44
53
|
end
|
45
54
|
end
|
46
55
|
|
56
|
+
# When a plus is used, e.g. `/a+/`
|
57
|
+
# Equivalent to `/a{1,}/`
|
47
58
|
class PlusRepeater < BaseRepeater
|
48
59
|
def initialize(group)
|
49
60
|
super
|
50
61
|
@min_repeats = 1
|
51
|
-
@max_repeats = RegexpExamples.
|
62
|
+
@max_repeats = RegexpExamples.max_repeater_variance + 1
|
52
63
|
end
|
53
64
|
end
|
54
65
|
|
66
|
+
# When a question mark is used, e.g. `/a?/`
|
67
|
+
# Equivalent to `/a{0,1}/`
|
55
68
|
class QuestionMarkRepeater < BaseRepeater
|
56
69
|
def initialize(group)
|
57
70
|
super
|
@@ -60,14 +73,15 @@ module RegexpExamples
|
|
60
73
|
end
|
61
74
|
end
|
62
75
|
|
76
|
+
# When a range is used, e.g. `/a{1}/`, `/a{1,}/`, `/a{1,3}/`, `/a{,3}/`
|
63
77
|
class RangeRepeater < BaseRepeater
|
64
78
|
def initialize(group, min, has_comma, max)
|
65
79
|
super(group)
|
66
80
|
@min_repeats = min || 0
|
67
|
-
if max # e.g. {1,100} --> Treat as {1,3}
|
68
|
-
@max_repeats = smallest(max, @min_repeats + RegexpExamples.
|
69
|
-
elsif has_comma # e.g. {2,} --> Treat as {2,4}
|
70
|
-
@max_repeats = @min_repeats + RegexpExamples.
|
81
|
+
if max # e.g. {1,100} --> Treat as {1,3} (by default max_repeater_variance)
|
82
|
+
@max_repeats = smallest(max, @min_repeats + RegexpExamples.max_repeater_variance)
|
83
|
+
elsif has_comma # e.g. {2,} --> Treat as {2,4} (by default max_repeater_variance)
|
84
|
+
@max_repeats = @min_repeats + RegexpExamples.max_repeater_variance
|
71
85
|
else # e.g. {3} --> Treat as {3,3}
|
72
86
|
@max_repeats = @min_repeats
|
73
87
|
end
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'pstore'
|
2
2
|
|
3
3
|
module RegexpExamples
|
4
|
+
# Interface to the retrieve the character sets that match a regex named property.
|
5
|
+
# E.g. `/\p{Alpha}/`
|
6
|
+
# These matching values are stored, compressed, in a PStore. They are specific to
|
7
|
+
# the ruby minor version.
|
4
8
|
class UnicodeCharRanges
|
5
9
|
# These values were generated by: scripts/unicode_lister.rb
|
6
10
|
# Note: Only the first 128 results are listed, for performance.
|
data/lib/regexp-examples.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require_relative 'regexp-examples/unicode_char_ranges'
|
2
|
-
require_relative 'regexp-examples/backreferences'
|
3
2
|
require_relative 'regexp-examples/chargroup_parser'
|
4
3
|
require_relative 'regexp-examples/constants'
|
5
4
|
require_relative 'regexp-examples/groups'
|
5
|
+
require_relative 'regexp-examples/backreferences'
|
6
6
|
require_relative 'regexp-examples/helpers'
|
7
7
|
require_relative 'regexp-examples/parser'
|
8
8
|
require_relative 'regexp-examples/repeaters'
|
data/regexp-examples.gemspec
CHANGED
@@ -3,14 +3,15 @@ require File.expand_path('../lib/regexp-examples/version', __FILE__)
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'regexp-examples'
|
5
5
|
s.version = RegexpExamples::VERSION
|
6
|
-
s.summary = "Extends the Regexp class with '#examples'"
|
6
|
+
s.summary = "Extends the Regexp class with '#examples' and '#random_example'"
|
7
7
|
s.description =
|
8
|
-
'Regexp#examples returns a list of strings that are matched by the regex'
|
8
|
+
'Regexp#examples returns a list of "all" strings that are matched by the regex. '\
|
9
|
+
+ 'Regexp#random_example returns one, random string that matches.'
|
9
10
|
s.authors = ['Tom Lord']
|
10
11
|
s.email = 'lord.thom@gmail.com'
|
11
12
|
s.files = `git ls-files -z`.split("\x0")
|
12
|
-
s.executables = s.files.grep(
|
13
|
-
s.test_files = s.files.grep(
|
13
|
+
s.executables = s.files.grep(/^bin\//) { |f| File.basename(f) }
|
14
|
+
s.test_files = s.files.grep(/^(test|spec|features)\//)
|
14
15
|
s.require_paths = ['lib']
|
15
16
|
s.homepage =
|
16
17
|
'http://rubygems.org/gems/regexp-examples'
|
data/scripts/unicode_lister.rb
CHANGED
@@ -9,17 +9,21 @@ require_relative '../lib/regexp-examples/unicode_char_ranges'
|
|
9
9
|
|
10
10
|
# Taken from ruby documentation:
|
11
11
|
# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
|
12
|
-
|
13
|
-
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
|
12
|
+
NAMED_GROUPS = %w(
|
13
|
+
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
|
14
|
+
Any Assigned
|
14
15
|
|
15
|
-
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
|
16
|
+
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
|
17
|
+
Zp C Cc Cf Cn Co Cs
|
16
18
|
|
17
|
-
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
18
|
-
Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
|
19
|
-
Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
20
|
+
Carian Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
|
21
|
+
Ethiopic Georgian Glagolitic Gothic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
|
22
|
+
Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer Lao Latin Lepcha Limbu
|
23
|
+
Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
|
24
|
+
Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra
|
25
|
+
Shavian Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu
|
26
|
+
Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
|
23
27
|
)
|
24
28
|
|
25
29
|
# Note: For the range 55296..57343, these are reserved values that are not legal
|
@@ -48,7 +52,7 @@ count = 0
|
|
48
52
|
filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
|
49
53
|
store = PStore.new(filename)
|
50
54
|
store.transaction do
|
51
|
-
|
55
|
+
NAMED_GROUPS.each do |name|
|
52
56
|
count += 1
|
53
57
|
# Only generating first 128 matches, for performance...
|
54
58
|
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
@@ -57,7 +61,7 @@ store.transaction do
|
|
57
61
|
.select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
58
62
|
.first(128)
|
59
63
|
store[name.downcase] = calculate_ranges(matching_codes)
|
60
|
-
puts "(#{count}/#{
|
64
|
+
puts "(#{count}/#{NAMED_GROUPS.length}) Finished property: #{name}"
|
61
65
|
end
|
62
66
|
puts '*' * 50
|
63
67
|
puts "Finished! Result stored in: #{filename}"
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Common helper methods, for lots of tests
|
2
|
+
# Included into the RSpec config, so accessible inside test blocks.
|
3
|
+
module Helpers
|
4
|
+
def examples_exist(regexp, regexp_examples)
|
5
|
+
expect(regexp_examples)
|
6
|
+
.not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
|
7
|
+
end
|
8
|
+
|
9
|
+
def examples_match(regexp, regexp_examples)
|
10
|
+
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
11
|
+
# For example, we don't want things like:
|
12
|
+
# /a*/.examples to include "xyz"
|
13
|
+
# /a|b/.examples to include "bad"
|
14
|
+
regexp_examples.each do |example|
|
15
|
+
expect(example).to match(/\A(?:#{regexp.source})\z/)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -3,17 +3,8 @@ RSpec.describe Regexp, '#examples' do
|
|
3
3
|
regexps.each do |regexp|
|
4
4
|
it "examples for /#{regexp.source}/" do
|
5
5
|
regexp_examples = regexp.examples(max_group_results: 99_999)
|
6
|
-
|
7
|
-
|
8
|
-
.not_to be_empty,
|
9
|
-
"No examples were generated for regexp: /#{regexp.source}/"
|
10
|
-
regexp_examples.each do |example|
|
11
|
-
expect(example).to match(/\A(?:#{regexp.source})\z/)
|
12
|
-
end
|
13
|
-
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
14
|
-
# For example, we don't want things like:
|
15
|
-
# /a*/.examples to include "xyz"
|
16
|
-
# /a|b/.examples to include "bad"
|
6
|
+
examples_exist(regexp, regexp_examples)
|
7
|
+
examples_match(regexp, regexp_examples)
|
17
8
|
end
|
18
9
|
end
|
19
10
|
end
|
@@ -137,6 +128,7 @@ RSpec.describe Regexp, '#examples' do
|
|
137
128
|
context 'for escaped octal characters' do
|
138
129
|
examples_exist_and_match(
|
139
130
|
/\10\20\30\40\50/,
|
131
|
+
/\00\07\100\177/,
|
140
132
|
/\177123/ # Should work for numbers up to 177
|
141
133
|
)
|
142
134
|
end
|
@@ -144,7 +136,7 @@ RSpec.describe Regexp, '#examples' do
|
|
144
136
|
context 'for complex patterns' do
|
145
137
|
# Longer combinations of the above
|
146
138
|
examples_exist_and_match(
|
147
|
-
|
139
|
+
%r{https?://(www\.)github\.com},
|
148
140
|
/(I(N(C(E(P(T(I(O(N)))))))))*/,
|
149
141
|
/[\w]{1}/,
|
150
142
|
/((a?b*c+)) \1/,
|
@@ -208,7 +200,7 @@ RSpec.describe Regexp, '#examples' do
|
|
208
200
|
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
209
201
|
expect(regexp_examples)
|
210
202
|
.not_to be_empty,
|
211
|
-
|
203
|
+
"No examples were generated for regexp: /\p{#{property}}/"
|
212
204
|
# Just do one big check, for test system performance (~30% faster)
|
213
205
|
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
|
214
206
|
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
|
@@ -304,8 +296,8 @@ RSpec.describe Regexp, '#examples' do
|
|
304
296
|
it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }
|
305
297
|
|
306
298
|
# Only display unique examples:
|
307
|
-
it { expect(/a|a|b|b/.examples).to match_array
|
308
|
-
it { expect(/[ccdd]/.examples).to match_array
|
299
|
+
it { expect(/a|a|b|b/.examples).to match_array %w(a b) }
|
300
|
+
it { expect(/[ccdd]/.examples).to match_array %w(c d) }
|
309
301
|
|
310
302
|
# a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
|
311
303
|
it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
|
@@ -5,14 +5,16 @@ RSpec.describe Regexp, '#random_example' do
|
|
5
5
|
random_example = regexp.random_example
|
6
6
|
|
7
7
|
expect(random_example).to be_a String # Not an Array!
|
8
|
-
expect(random_example)
|
8
|
+
expect(random_example)
|
9
|
+
.to match(Regexp.new("\\A(?:#{regexp.source})\\z", regexp.options))
|
9
10
|
end
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
14
|
context 'smoke tests' do
|
14
15
|
# Just a few "smoke tests", to ensure the basic method isn't broken.
|
15
|
-
# Testing of the RegexpExamples::Parser class is all
|
16
|
+
# Testing of the RegexpExamples::Parser class is all
|
17
|
+
# covered by Regexp#examples test already.
|
16
18
|
random_example_matches(
|
17
19
|
/\w{10}/,
|
18
20
|
/(we(need(to(go(deeper)?)?)?)?) \1/,
|
data/spec/spec_helper.rb
CHANGED
@@ -2,9 +2,19 @@ require 'coveralls'
|
|
2
2
|
Coveralls.wear!
|
3
3
|
|
4
4
|
require './lib/regexp-examples.rb'
|
5
|
+
require 'helpers'
|
5
6
|
require 'pry'
|
6
7
|
|
8
|
+
# Several of these tests (intentionally) use "weird" regex patterns,
|
9
|
+
# that spam annoying warnings when running.
|
10
|
+
# E.g. warning: invalid back reference: /\k/
|
11
|
+
# and warning: character class has ']' without escape: /[]]/
|
12
|
+
# This config disables those warnings.
|
13
|
+
$VERBOSE = nil
|
14
|
+
|
7
15
|
RSpec.configure do |config|
|
16
|
+
config.include Helpers
|
17
|
+
|
8
18
|
config.expect_with :rspec do |expectations|
|
9
19
|
# This option will default to `true` in RSpec 4. It makes the `description`
|
10
20
|
# and `failure_message` of custom matchers include text for helper methods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,7 +38,8 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Regexp#examples returns a list of strings that are matched by the
|
41
|
+
description: Regexp#examples returns a list of "all" strings that are matched by the
|
42
|
+
regex. Regexp#random_example returns one, random string that matches.
|
42
43
|
email: lord.thom@gmail.com
|
43
44
|
executables: []
|
44
45
|
extensions: []
|
@@ -56,6 +57,7 @@ files:
|
|
56
57
|
- db/unicode_ranges_2.1.pstore
|
57
58
|
- db/unicode_ranges_2.2.pstore
|
58
59
|
- db/unicode_ranges_2.3.pstore
|
60
|
+
- db/unicode_ranges_2.4.pstore
|
59
61
|
- lib/core_extensions/regexp/examples.rb
|
60
62
|
- lib/regexp-examples.rb
|
61
63
|
- lib/regexp-examples/backreferences.rb
|
@@ -64,11 +66,17 @@ files:
|
|
64
66
|
- lib/regexp-examples/groups.rb
|
65
67
|
- lib/regexp-examples/helpers.rb
|
66
68
|
- lib/regexp-examples/parser.rb
|
69
|
+
- lib/regexp-examples/parser_helpers/charset_negation_helper.rb
|
70
|
+
- lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
|
71
|
+
- lib/regexp-examples/parser_helpers/parse_group_helper.rb
|
72
|
+
- lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb
|
73
|
+
- lib/regexp-examples/parser_helpers/parse_repeater_helper.rb
|
67
74
|
- lib/regexp-examples/repeaters.rb
|
68
75
|
- lib/regexp-examples/unicode_char_ranges.rb
|
69
76
|
- lib/regexp-examples/version.rb
|
70
77
|
- regexp-examples.gemspec
|
71
78
|
- scripts/unicode_lister.rb
|
79
|
+
- spec/helpers.rb
|
72
80
|
- spec/regexp-examples_spec.rb
|
73
81
|
- spec/regexp-random_example_spec.rb
|
74
82
|
- spec/spec_helper.rb
|
@@ -92,11 +100,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
100
|
version: '0'
|
93
101
|
requirements: []
|
94
102
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.5.1
|
96
104
|
signing_key:
|
97
105
|
specification_version: 4
|
98
|
-
summary: Extends the Regexp class with '#examples'
|
106
|
+
summary: Extends the Regexp class with '#examples' and '#random_example'
|
99
107
|
test_files:
|
108
|
+
- spec/helpers.rb
|
100
109
|
- spec/regexp-examples_spec.rb
|
101
110
|
- spec/regexp-random_example_spec.rb
|
102
111
|
- spec/spec_helper.rb
|
Binary file
|