regexp-examples 1.1.3 → 1.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +3 -0
- data/.travis.yml +0 -6
- data/db/unicode_ranges_2.1.pstore +1 -0
- data/db/unicode_ranges_2.3.pstore +0 -0
- data/db/unicode_ranges_2.4.pstore +0 -0
- data/lib/core_extensions/regexp/examples.rb +3 -0
- data/lib/regexp-examples/backreferences.rb +29 -13
- data/lib/regexp-examples/chargroup_parser.rb +15 -17
- data/lib/regexp-examples/constants.rb +10 -6
- data/lib/regexp-examples/groups.rb +11 -22
- data/lib/regexp-examples/helpers.rb +6 -7
- data/lib/regexp-examples/parser.rb +31 -285
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +8 -0
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +144 -0
- data/lib/regexp-examples/parser_helpers/parse_group_helper.rb +58 -0
- data/lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb +85 -0
- data/lib/regexp-examples/parser_helpers/parse_repeater_helper.rb +51 -0
- data/lib/regexp-examples/repeaters.rb +21 -7
- data/lib/regexp-examples/unicode_char_ranges.rb +4 -0
- data/lib/regexp-examples/version.rb +2 -1
- data/lib/regexp-examples.rb +1 -1
- data/regexp-examples.gemspec +5 -4
- data/scripts/unicode_lister.rb +15 -11
- data/spec/helpers.rb +18 -0
- data/spec/regexp-examples_spec.rb +7 -15
- data/spec/regexp-random_example_spec.rb +4 -2
- data/spec/spec_helper.rb +10 -0
- metadata +14 -5
- data/db/unicode_ranges_2.1.pstore +0 -0
@@ -1,4 +1,8 @@
|
|
1
1
|
module RegexpExamples
|
2
|
+
# An abstract base class for all other repeater groups.
|
3
|
+
# Since all repeaters (quantifiers) are really just shorthand syntaxes for the generic:
|
4
|
+
# `/.{a,b}/`, the methods for generating "between `a` and `b` results" are fully
|
5
|
+
# generalised here.
|
2
6
|
class BaseRepeater
|
3
7
|
attr_reader :group, :min_repeats, :max_repeats
|
4
8
|
def initialize(group)
|
@@ -6,7 +10,7 @@ module RegexpExamples
|
|
6
10
|
end
|
7
11
|
|
8
12
|
def result
|
9
|
-
group_results = group.result.first(RegexpExamples.
|
13
|
+
group_results = group.result.first(RegexpExamples.max_group_results)
|
10
14
|
results = []
|
11
15
|
min_repeats.upto(max_repeats) do |repeats|
|
12
16
|
if repeats.zero?
|
@@ -28,6 +32,9 @@ module RegexpExamples
|
|
28
32
|
end
|
29
33
|
end
|
30
34
|
|
35
|
+
# When there is "no repeater", we interpret this as a "one time repeater".
|
36
|
+
# For example, `/a/` is a "OneTimeRepeater" of "a"
|
37
|
+
# Equivalent to `/a{1}/`
|
31
38
|
class OneTimeRepeater < BaseRepeater
|
32
39
|
def initialize(group)
|
33
40
|
super
|
@@ -36,22 +43,28 @@ module RegexpExamples
|
|
36
43
|
end
|
37
44
|
end
|
38
45
|
|
46
|
+
# When a klein star is used, e.g. `/a*/`
|
47
|
+
# Equivalent to `/a{0,}/`
|
39
48
|
class StarRepeater < BaseRepeater
|
40
49
|
def initialize(group)
|
41
50
|
super
|
42
51
|
@min_repeats = 0
|
43
|
-
@max_repeats = RegexpExamples.
|
52
|
+
@max_repeats = RegexpExamples.max_repeater_variance
|
44
53
|
end
|
45
54
|
end
|
46
55
|
|
56
|
+
# When a plus is used, e.g. `/a+/`
|
57
|
+
# Equivalent to `/a{1,}/`
|
47
58
|
class PlusRepeater < BaseRepeater
|
48
59
|
def initialize(group)
|
49
60
|
super
|
50
61
|
@min_repeats = 1
|
51
|
-
@max_repeats = RegexpExamples.
|
62
|
+
@max_repeats = RegexpExamples.max_repeater_variance + 1
|
52
63
|
end
|
53
64
|
end
|
54
65
|
|
66
|
+
# When a question mark is used, e.g. `/a?/`
|
67
|
+
# Equivalent to `/a{0,1}/`
|
55
68
|
class QuestionMarkRepeater < BaseRepeater
|
56
69
|
def initialize(group)
|
57
70
|
super
|
@@ -60,14 +73,15 @@ module RegexpExamples
|
|
60
73
|
end
|
61
74
|
end
|
62
75
|
|
76
|
+
# When a range is used, e.g. `/a{1}/`, `/a{1,}/`, `/a{1,3}/`, `/a{,3}/`
|
63
77
|
class RangeRepeater < BaseRepeater
|
64
78
|
def initialize(group, min, has_comma, max)
|
65
79
|
super(group)
|
66
80
|
@min_repeats = min || 0
|
67
|
-
if max # e.g. {1,100} --> Treat as {1,3}
|
68
|
-
@max_repeats = smallest(max, @min_repeats + RegexpExamples.
|
69
|
-
elsif has_comma # e.g. {2,} --> Treat as {2,4}
|
70
|
-
@max_repeats = @min_repeats + RegexpExamples.
|
81
|
+
if max # e.g. {1,100} --> Treat as {1,3} (by default max_repeater_variance)
|
82
|
+
@max_repeats = smallest(max, @min_repeats + RegexpExamples.max_repeater_variance)
|
83
|
+
elsif has_comma # e.g. {2,} --> Treat as {2,4} (by default max_repeater_variance)
|
84
|
+
@max_repeats = @min_repeats + RegexpExamples.max_repeater_variance
|
71
85
|
else # e.g. {3} --> Treat as {3,3}
|
72
86
|
@max_repeats = @min_repeats
|
73
87
|
end
|
@@ -1,6 +1,10 @@
|
|
1
1
|
require 'pstore'
|
2
2
|
|
3
3
|
module RegexpExamples
|
4
|
+
# Interface to the retrieve the character sets that match a regex named property.
|
5
|
+
# E.g. `/\p{Alpha}/`
|
6
|
+
# These matching values are stored, compressed, in a PStore. They are specific to
|
7
|
+
# the ruby minor version.
|
4
8
|
class UnicodeCharRanges
|
5
9
|
# These values were generated by: scripts/unicode_lister.rb
|
6
10
|
# Note: Only the first 128 results are listed, for performance.
|
data/lib/regexp-examples.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require_relative 'regexp-examples/unicode_char_ranges'
|
2
|
-
require_relative 'regexp-examples/backreferences'
|
3
2
|
require_relative 'regexp-examples/chargroup_parser'
|
4
3
|
require_relative 'regexp-examples/constants'
|
5
4
|
require_relative 'regexp-examples/groups'
|
5
|
+
require_relative 'regexp-examples/backreferences'
|
6
6
|
require_relative 'regexp-examples/helpers'
|
7
7
|
require_relative 'regexp-examples/parser'
|
8
8
|
require_relative 'regexp-examples/repeaters'
|
data/regexp-examples.gemspec
CHANGED
@@ -3,14 +3,15 @@ require File.expand_path('../lib/regexp-examples/version', __FILE__)
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = 'regexp-examples'
|
5
5
|
s.version = RegexpExamples::VERSION
|
6
|
-
s.summary = "Extends the Regexp class with '#examples'"
|
6
|
+
s.summary = "Extends the Regexp class with '#examples' and '#random_example'"
|
7
7
|
s.description =
|
8
|
-
'Regexp#examples returns a list of strings that are matched by the regex'
|
8
|
+
'Regexp#examples returns a list of "all" strings that are matched by the regex. '\
|
9
|
+
+ 'Regexp#random_example returns one, random string that matches.'
|
9
10
|
s.authors = ['Tom Lord']
|
10
11
|
s.email = 'lord.thom@gmail.com'
|
11
12
|
s.files = `git ls-files -z`.split("\x0")
|
12
|
-
s.executables = s.files.grep(
|
13
|
-
s.test_files = s.files.grep(
|
13
|
+
s.executables = s.files.grep(/^bin\//) { |f| File.basename(f) }
|
14
|
+
s.test_files = s.files.grep(/^(test|spec|features)\//)
|
14
15
|
s.require_paths = ['lib']
|
15
16
|
s.homepage =
|
16
17
|
'http://rubygems.org/gems/regexp-examples'
|
data/scripts/unicode_lister.rb
CHANGED
@@ -9,17 +9,21 @@ require_relative '../lib/regexp-examples/unicode_char_ranges'
|
|
9
9
|
|
10
10
|
# Taken from ruby documentation:
|
11
11
|
# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
|
12
|
-
|
13
|
-
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
|
12
|
+
NAMED_GROUPS = %w(
|
13
|
+
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
|
14
|
+
Any Assigned
|
14
15
|
|
15
|
-
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
|
16
|
+
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
|
17
|
+
Zp C Cc Cf Cn Co Cs
|
16
18
|
|
17
|
-
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
18
|
-
Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
|
19
|
-
Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
|
20
|
-
|
21
|
-
|
22
|
-
|
19
|
+
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
20
|
+
Carian Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
|
21
|
+
Ethiopic Georgian Glagolitic Gothic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
|
22
|
+
Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer Lao Latin Lepcha Limbu
|
23
|
+
Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
|
24
|
+
Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra
|
25
|
+
Shavian Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu
|
26
|
+
Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
|
23
27
|
)
|
24
28
|
|
25
29
|
# Note: For the range 55296..57343, these are reserved values that are not legal
|
@@ -48,7 +52,7 @@ count = 0
|
|
48
52
|
filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
|
49
53
|
store = PStore.new(filename)
|
50
54
|
store.transaction do
|
51
|
-
|
55
|
+
NAMED_GROUPS.each do |name|
|
52
56
|
count += 1
|
53
57
|
# Only generating first 128 matches, for performance...
|
54
58
|
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
@@ -57,7 +61,7 @@ store.transaction do
|
|
57
61
|
.select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
58
62
|
.first(128)
|
59
63
|
store[name.downcase] = calculate_ranges(matching_codes)
|
60
|
-
puts "(#{count}/#{
|
64
|
+
puts "(#{count}/#{NAMED_GROUPS.length}) Finished property: #{name}"
|
61
65
|
end
|
62
66
|
puts '*' * 50
|
63
67
|
puts "Finished! Result stored in: #{filename}"
|
data/spec/helpers.rb
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
# Common helper methods, for lots of tests
|
2
|
+
# Included into the RSpec config, so accessible inside test blocks.
|
3
|
+
module Helpers
|
4
|
+
def examples_exist(regexp, regexp_examples)
|
5
|
+
expect(regexp_examples)
|
6
|
+
.not_to be_empty, "No examples were generated for regexp: /#{regexp.source}/"
|
7
|
+
end
|
8
|
+
|
9
|
+
def examples_match(regexp, regexp_examples)
|
10
|
+
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
11
|
+
# For example, we don't want things like:
|
12
|
+
# /a*/.examples to include "xyz"
|
13
|
+
# /a|b/.examples to include "bad"
|
14
|
+
regexp_examples.each do |example|
|
15
|
+
expect(example).to match(/\A(?:#{regexp.source})\z/)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -3,17 +3,8 @@ RSpec.describe Regexp, '#examples' do
|
|
3
3
|
regexps.each do |regexp|
|
4
4
|
it "examples for /#{regexp.source}/" do
|
5
5
|
regexp_examples = regexp.examples(max_group_results: 99_999)
|
6
|
-
|
7
|
-
|
8
|
-
.not_to be_empty,
|
9
|
-
"No examples were generated for regexp: /#{regexp.source}/"
|
10
|
-
regexp_examples.each do |example|
|
11
|
-
expect(example).to match(/\A(?:#{regexp.source})\z/)
|
12
|
-
end
|
13
|
-
# Note: /\A...\z/ is used to prevent misleading examples from passing the test.
|
14
|
-
# For example, we don't want things like:
|
15
|
-
# /a*/.examples to include "xyz"
|
16
|
-
# /a|b/.examples to include "bad"
|
6
|
+
examples_exist(regexp, regexp_examples)
|
7
|
+
examples_match(regexp, regexp_examples)
|
17
8
|
end
|
18
9
|
end
|
19
10
|
end
|
@@ -137,6 +128,7 @@ RSpec.describe Regexp, '#examples' do
|
|
137
128
|
context 'for escaped octal characters' do
|
138
129
|
examples_exist_and_match(
|
139
130
|
/\10\20\30\40\50/,
|
131
|
+
/\00\07\100\177/,
|
140
132
|
/\177123/ # Should work for numbers up to 177
|
141
133
|
)
|
142
134
|
end
|
@@ -144,7 +136,7 @@ RSpec.describe Regexp, '#examples' do
|
|
144
136
|
context 'for complex patterns' do
|
145
137
|
# Longer combinations of the above
|
146
138
|
examples_exist_and_match(
|
147
|
-
|
139
|
+
%r{https?://(www\.)github\.com},
|
148
140
|
/(I(N(C(E(P(T(I(O(N)))))))))*/,
|
149
141
|
/[\w]{1}/,
|
150
142
|
/((a?b*c+)) \1/,
|
@@ -208,7 +200,7 @@ RSpec.describe Regexp, '#examples' do
|
|
208
200
|
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
209
201
|
expect(regexp_examples)
|
210
202
|
.not_to be_empty,
|
211
|
-
|
203
|
+
"No examples were generated for regexp: /\p{#{property}}/"
|
212
204
|
# Just do one big check, for test system performance (~30% faster)
|
213
205
|
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
|
214
206
|
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
|
@@ -304,8 +296,8 @@ RSpec.describe Regexp, '#examples' do
|
|
304
296
|
it { expect(/a+|b?/.examples).to match_array ['a', 'aa', 'aaa', '', 'b'] }
|
305
297
|
|
306
298
|
# Only display unique examples:
|
307
|
-
it { expect(/a|a|b|b/.examples).to match_array
|
308
|
-
it { expect(/[ccdd]/.examples).to match_array
|
299
|
+
it { expect(/a|a|b|b/.examples).to match_array %w(a b) }
|
300
|
+
it { expect(/[ccdd]/.examples).to match_array %w(c d) }
|
309
301
|
|
310
302
|
# a{1}? should be equivalent to (?:a{1})?, i.e. NOT a "non-greedy quantifier"
|
311
303
|
it { expect(/a{1}?/.examples).to match_array ['', 'a'] }
|
@@ -5,14 +5,16 @@ RSpec.describe Regexp, '#random_example' do
|
|
5
5
|
random_example = regexp.random_example
|
6
6
|
|
7
7
|
expect(random_example).to be_a String # Not an Array!
|
8
|
-
expect(random_example)
|
8
|
+
expect(random_example)
|
9
|
+
.to match(Regexp.new("\\A(?:#{regexp.source})\\z", regexp.options))
|
9
10
|
end
|
10
11
|
end
|
11
12
|
end
|
12
13
|
|
13
14
|
context 'smoke tests' do
|
14
15
|
# Just a few "smoke tests", to ensure the basic method isn't broken.
|
15
|
-
# Testing of the RegexpExamples::Parser class is all
|
16
|
+
# Testing of the RegexpExamples::Parser class is all
|
17
|
+
# covered by Regexp#examples test already.
|
16
18
|
random_example_matches(
|
17
19
|
/\w{10}/,
|
18
20
|
/(we(need(to(go(deeper)?)?)?)?) \1/,
|
data/spec/spec_helper.rb
CHANGED
@@ -2,9 +2,19 @@ require 'coveralls'
|
|
2
2
|
Coveralls.wear!
|
3
3
|
|
4
4
|
require './lib/regexp-examples.rb'
|
5
|
+
require 'helpers'
|
5
6
|
require 'pry'
|
6
7
|
|
8
|
+
# Several of these tests (intentionally) use "weird" regex patterns,
|
9
|
+
# that spam annoying warnings when running.
|
10
|
+
# E.g. warning: invalid back reference: /\k/
|
11
|
+
# and warning: character class has ']' without escape: /[]]/
|
12
|
+
# This config disables those warnings.
|
13
|
+
$VERBOSE = nil
|
14
|
+
|
7
15
|
RSpec.configure do |config|
|
16
|
+
config.include Helpers
|
17
|
+
|
8
18
|
config.expect_with :rspec do |expectations|
|
9
19
|
# This option will default to `true` in RSpec 4. It makes the `description`
|
10
20
|
# and `failure_message` of custom matchers include text for helper methods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-12-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -38,7 +38,8 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
-
description: Regexp#examples returns a list of strings that are matched by the
|
41
|
+
description: Regexp#examples returns a list of "all" strings that are matched by the
|
42
|
+
regex. Regexp#random_example returns one, random string that matches.
|
42
43
|
email: lord.thom@gmail.com
|
43
44
|
executables: []
|
44
45
|
extensions: []
|
@@ -56,6 +57,7 @@ files:
|
|
56
57
|
- db/unicode_ranges_2.1.pstore
|
57
58
|
- db/unicode_ranges_2.2.pstore
|
58
59
|
- db/unicode_ranges_2.3.pstore
|
60
|
+
- db/unicode_ranges_2.4.pstore
|
59
61
|
- lib/core_extensions/regexp/examples.rb
|
60
62
|
- lib/regexp-examples.rb
|
61
63
|
- lib/regexp-examples/backreferences.rb
|
@@ -64,11 +66,17 @@ files:
|
|
64
66
|
- lib/regexp-examples/groups.rb
|
65
67
|
- lib/regexp-examples/helpers.rb
|
66
68
|
- lib/regexp-examples/parser.rb
|
69
|
+
- lib/regexp-examples/parser_helpers/charset_negation_helper.rb
|
70
|
+
- lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb
|
71
|
+
- lib/regexp-examples/parser_helpers/parse_group_helper.rb
|
72
|
+
- lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb
|
73
|
+
- lib/regexp-examples/parser_helpers/parse_repeater_helper.rb
|
67
74
|
- lib/regexp-examples/repeaters.rb
|
68
75
|
- lib/regexp-examples/unicode_char_ranges.rb
|
69
76
|
- lib/regexp-examples/version.rb
|
70
77
|
- regexp-examples.gemspec
|
71
78
|
- scripts/unicode_lister.rb
|
79
|
+
- spec/helpers.rb
|
72
80
|
- spec/regexp-examples_spec.rb
|
73
81
|
- spec/regexp-random_example_spec.rb
|
74
82
|
- spec/spec_helper.rb
|
@@ -92,11 +100,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
100
|
version: '0'
|
93
101
|
requirements: []
|
94
102
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.5.1
|
96
104
|
signing_key:
|
97
105
|
specification_version: 4
|
98
|
-
summary: Extends the Regexp class with '#examples'
|
106
|
+
summary: Extends the Regexp class with '#examples' and '#random_example'
|
99
107
|
test_files:
|
108
|
+
- spec/helpers.rb
|
100
109
|
- spec/regexp-examples_spec.rb
|
101
110
|
- spec/regexp-random_example_spec.rb
|
102
111
|
- spec/spec_helper.rb
|
Binary file
|