regexp-examples 1.5.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.ruby-version +1 -1
- data/.travis.yml +3 -1
- data/Gemfile +0 -1
- data/README.md +1 -1
- data/lib/regexp-examples/char_sets.rb +0 -2
- data/lib/regexp-examples/parser_helpers/charset_negation_helper.rb +1 -1
- data/lib/regexp-examples/parser_helpers/parse_after_backslash_group_helper.rb +10 -2
- data/lib/regexp-examples/version.rb +1 -1
- data/lib/regexp-examples.rb +0 -2
- data/regexp-examples.gemspec +4 -3
- data/spec/regexp-examples_spec.rb +17 -30
- metadata +30 -18
- data/lib/regexp-examples/unicode_char_ranges.rb +0 -59
- data/scripts/unicode_lister.rb +0 -68
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d1bdebf385885ec8033d7f7b34d5d473837c7ae983b4fbda4facb45f8bbc96a0
|
4
|
+
data.tar.gz: 79b2c7a8c76ebe3e3d6d2e0d6ee12ffaefc5a86acdbe69d8d5476afab1c25f96
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5858e4813711b88549a2237857f23db0c941b5d3f9fd88275ab83b7b0e2002b854bbfa774d0d8c05e99df9098b2935f03346823df2f57819159f30c9d36e9767
|
7
|
+
data.tar.gz: 126a04b363ed23ef0e4ea4ee5f36f40b20280e44e8892985f87f0eef391ddcb20c2299a87324b15693aa02c5ab2eb484c8d2622889f057a5b1b29f82fed73c7b
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.1.2
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -52,7 +52,7 @@ Obviously, you will get different (random) results if you try these yourself!
|
|
52
52
|
```
|
53
53
|
|
54
54
|
## Supported ruby versions
|
55
|
-
MRI 2.4.0 (oldest non-[EOL](https://www.ruby-lang.org/en/news/2019/03/31/support-of-ruby-2-3-has-ended/) version) -->
|
55
|
+
MRI 2.4.0 (oldest non-[EOL](https://www.ruby-lang.org/en/news/2019/03/31/support-of-ruby-2-3-has-ended/) version) --> 3.0.0 (latest stable version)
|
56
56
|
|
57
57
|
MRI 2.0.0 --> 2.3.x were supported until version `1.5.0` of this library. Support was dropped primarily
|
58
58
|
because of the need to use `RbConfig::CONFIG['UNICODE_VERSION']`, which was added to ruby version `2.4.0`.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'regexp_property_values'
|
2
|
+
|
1
3
|
module RegexpExamples
|
2
4
|
# A collection of related helper methods, utilised by the `Parser` class
|
3
5
|
module ParseAfterBackslashGroupHelper
|
@@ -91,13 +93,19 @@ module RegexpExamples
|
|
91
93
|
end
|
92
94
|
|
93
95
|
def parse_backslash_named_property(p_negation, caret_negation, property_name)
|
94
|
-
@current_position += (caret_negation.length + # 0 or 1,
|
96
|
+
@current_position += (caret_negation.length + # 0 or 1, if '^' is present
|
95
97
|
property_name.length +
|
96
98
|
2) # Length of opening and closing brackets (always 2)
|
97
99
|
# Beware of double negatives! E.g. /\P{^Space}/
|
98
100
|
is_negative = (p_negation == 'P') ^ (caret_negation == '^')
|
99
101
|
CharGroup.new(
|
100
|
-
negate_if(
|
102
|
+
negate_if(
|
103
|
+
RegexpPropertyValues[property_name]
|
104
|
+
.matched_codepoints
|
105
|
+
.lazy
|
106
|
+
.filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },
|
107
|
+
is_negative
|
108
|
+
),
|
101
109
|
@ignorecase
|
102
110
|
)
|
103
111
|
end
|
data/lib/regexp-examples.rb
CHANGED
@@ -1,4 +1,3 @@
|
|
1
|
-
require_relative 'regexp-examples/unicode_char_ranges'
|
2
1
|
require_relative 'regexp-examples/chargroup_parser'
|
3
2
|
require_relative 'regexp-examples/config'
|
4
3
|
require_relative 'regexp-examples/char_sets'
|
@@ -8,6 +7,5 @@ require_relative 'regexp-examples/max_results_limiter'
|
|
8
7
|
require_relative 'regexp-examples/helpers'
|
9
8
|
require_relative 'regexp-examples/parser'
|
10
9
|
require_relative 'regexp-examples/repeaters'
|
11
|
-
require_relative 'regexp-examples/unicode_char_ranges'
|
12
10
|
require_relative 'regexp-examples/version'
|
13
11
|
require_relative 'core_extensions/regexp/examples'
|
data/regexp-examples.gemspec
CHANGED
@@ -14,9 +14,10 @@ Gem::Specification.new do |s|
|
|
14
14
|
s.test_files = s.files.grep(/^(test|spec|features)\//)
|
15
15
|
s.require_paths = ['lib']
|
16
16
|
s.homepage = 'http://rubygems.org/gems/regexp-examples'
|
17
|
-
s.
|
18
|
-
s.add_development_dependency '
|
19
|
-
s.add_development_dependency '
|
17
|
+
s.add_dependency 'regexp_property_values', '~> 1.5'
|
18
|
+
s.add_development_dependency 'bundler', '~> 2.4'
|
19
|
+
s.add_development_dependency 'rake', '~> 13.0'
|
20
|
+
s.add_development_dependency 'pry'
|
20
21
|
s.add_development_dependency 'warning', '~> 0.10.0'
|
21
22
|
s.license = 'MIT'
|
22
23
|
s.required_ruby_version = '>= 2.4.0'
|
@@ -190,37 +190,24 @@ RSpec.describe Regexp, '#examples' do
|
|
190
190
|
/\P{Ll}/, # Negation syntax type 2
|
191
191
|
/\P{^Ll}/ # Double negation!! (Should cancel out)
|
192
192
|
)
|
193
|
-
# An exhaustive set of tests for all named properties!!! This is useful
|
194
|
-
# for verifying the PStore contains correct values for all ruby versions
|
195
|
-
%w[
|
196
|
-
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
|
197
|
-
Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
|
198
|
-
Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
|
199
|
-
Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
200
|
-
Cham Cherokee Common Coptic Cyrillic Devanagari Ethiopic Georgian
|
201
|
-
Glagolitic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana
|
202
|
-
Inherited Kannada Katakana Kayah_Li Khmer Lao Latin Lepcha Limbu Malayalam
|
203
|
-
Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
|
204
|
-
Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
|
205
|
-
Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
|
206
|
-
].each do |property|
|
207
|
-
it "examples for /\p{#{property}}/" do
|
208
|
-
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
209
|
-
expect(regexp_examples)
|
210
|
-
.not_to be_empty,
|
211
|
-
"No examples were generated for regexp: /\p{#{property}}/"
|
212
|
-
# Just do one big check, for test system performance (~30% faster)
|
213
|
-
# (Otherwise, we're doing up to 128 checks on 123 properties!!!)
|
214
|
-
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
|
215
|
-
end
|
216
|
-
end
|
217
193
|
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
194
|
+
expected_empty_properties = %w[surrogate inlowsurrogates inhighsurrogates inhighprivateusesurrogates]
|
195
|
+
|
196
|
+
RegexpPropertyValues.all_for_current_ruby.map(&:identifier).each do |property|
|
197
|
+
if(expected_empty_properties).include?(property)
|
198
|
+
examples_are_empty(/\p{#{property}}/)
|
199
|
+
else
|
200
|
+
it "examples for /\p{#{property}}/" do
|
201
|
+
regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
|
202
|
+
|
203
|
+
expect(regexp_examples)
|
204
|
+
.not_to be_empty,
|
205
|
+
"No examples were generated for regexp: /\p{#{property}}/"
|
206
|
+
# Just do one big check, for test system performance (~30% faster)
|
207
|
+
# (Otherwise, we're potentially doing 99999 checks on 123 properties!!!)
|
208
|
+
expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
|
209
|
+
end
|
210
|
+
end
|
224
211
|
end
|
225
212
|
end
|
226
213
|
|
metadata
CHANGED
@@ -1,57 +1,71 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: regexp-examples
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Lord
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: regexp_property_values
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.5'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.5'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: bundler
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
16
30
|
requirements:
|
17
|
-
- - "
|
31
|
+
- - "~>"
|
18
32
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
33
|
+
version: '2.4'
|
20
34
|
type: :development
|
21
35
|
prerelease: false
|
22
36
|
version_requirements: !ruby/object:Gem::Requirement
|
23
37
|
requirements:
|
24
|
-
- - "
|
38
|
+
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
40
|
+
version: '2.4'
|
27
41
|
- !ruby/object:Gem::Dependency
|
28
42
|
name: rake
|
29
43
|
requirement: !ruby/object:Gem::Requirement
|
30
44
|
requirements:
|
31
45
|
- - "~>"
|
32
46
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
47
|
+
version: '13.0'
|
34
48
|
type: :development
|
35
49
|
prerelease: false
|
36
50
|
version_requirements: !ruby/object:Gem::Requirement
|
37
51
|
requirements:
|
38
52
|
- - "~>"
|
39
53
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
54
|
+
version: '13.0'
|
41
55
|
- !ruby/object:Gem::Dependency
|
42
56
|
name: pry
|
43
57
|
requirement: !ruby/object:Gem::Requirement
|
44
58
|
requirements:
|
45
|
-
- - "
|
59
|
+
- - ">="
|
46
60
|
- !ruby/object:Gem::Version
|
47
|
-
version: 0
|
61
|
+
version: '0'
|
48
62
|
type: :development
|
49
63
|
prerelease: false
|
50
64
|
version_requirements: !ruby/object:Gem::Requirement
|
51
65
|
requirements:
|
52
|
-
- - "
|
66
|
+
- - ">="
|
53
67
|
- !ruby/object:Gem::Version
|
54
|
-
version: 0
|
68
|
+
version: '0'
|
55
69
|
- !ruby/object:Gem::Dependency
|
56
70
|
name: warning
|
57
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -103,10 +117,8 @@ files:
|
|
103
117
|
- lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb
|
104
118
|
- lib/regexp-examples/parser_helpers/parse_repeater_helper.rb
|
105
119
|
- lib/regexp-examples/repeaters.rb
|
106
|
-
- lib/regexp-examples/unicode_char_ranges.rb
|
107
120
|
- lib/regexp-examples/version.rb
|
108
121
|
- regexp-examples.gemspec
|
109
|
-
- scripts/unicode_lister.rb
|
110
122
|
- spec/config_spec.rb
|
111
123
|
- spec/gem_helper.rb
|
112
124
|
- spec/helpers.rb
|
@@ -118,7 +130,7 @@ homepage: http://rubygems.org/gems/regexp-examples
|
|
118
130
|
licenses:
|
119
131
|
- MIT
|
120
132
|
metadata: {}
|
121
|
-
post_install_message:
|
133
|
+
post_install_message:
|
122
134
|
rdoc_options: []
|
123
135
|
require_paths:
|
124
136
|
- lib
|
@@ -133,8 +145,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
133
145
|
- !ruby/object:Gem::Version
|
134
146
|
version: '0'
|
135
147
|
requirements: []
|
136
|
-
rubygems_version: 3.
|
137
|
-
signing_key:
|
148
|
+
rubygems_version: 3.3.7
|
149
|
+
signing_key:
|
138
150
|
specification_version: 4
|
139
151
|
summary: Extends the Regexp class with '#examples' and '#random_example'
|
140
152
|
test_files:
|
@@ -1,59 +0,0 @@
|
|
1
|
-
require 'pstore'
|
2
|
-
require 'singleton'
|
3
|
-
|
4
|
-
module RegexpExamples
|
5
|
-
# Interface to the retrieve the character sets that match a regex named property.
|
6
|
-
# E.g. `/\p{Alpha}/`
|
7
|
-
# These matching values are stored, compressed, in a PStore. They are specific to
|
8
|
-
# the ruby minor version.
|
9
|
-
class UnicodeCharRanges
|
10
|
-
include Singleton
|
11
|
-
# These values were generated by: scripts/unicode_lister.rb
|
12
|
-
# Note: Only the first 128 results are listed, for performance.
|
13
|
-
# Also, some groups seem to have no matches (weird!)
|
14
|
-
STORE_FILENAME = "unicode_ranges_#{RbConfig::CONFIG['UNICODE_VERSION']}.pstore".freeze
|
15
|
-
|
16
|
-
attr_reader :range_store
|
17
|
-
|
18
|
-
def initialize
|
19
|
-
@range_store = PStore.new(unicode_ranges_file)
|
20
|
-
end
|
21
|
-
|
22
|
-
def get(key)
|
23
|
-
range_store.transaction(true) do
|
24
|
-
ranges_to_unicode(range_store[key])
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
alias [] get
|
29
|
-
|
30
|
-
private
|
31
|
-
|
32
|
-
# The method is written like this to future-proof it a little,
|
33
|
-
# i.e. the gem won't completely break for a new ruby version release
|
34
|
-
def unicode_ranges_file
|
35
|
-
db_path = File.join(__dir__, '../../db')
|
36
|
-
Dir["#{db_path}/*.pstore"].sort.select do |file|
|
37
|
-
file <= "#{db_path}/#{STORE_FILENAME}"
|
38
|
-
end.last
|
39
|
-
end
|
40
|
-
|
41
|
-
# TODO: Document example input/output of this method
|
42
|
-
# It's pretty simple, but this code is a little confusing!!
|
43
|
-
def ranges_to_unicode(ranges)
|
44
|
-
result = []
|
45
|
-
ranges.each do |range|
|
46
|
-
if range.is_a? Integer # Small hack to increase data compression
|
47
|
-
result << hex_to_unicode(range.to_s(16))
|
48
|
-
else
|
49
|
-
range.each { |num| result << hex_to_unicode(num.to_s(16)) }
|
50
|
-
end
|
51
|
-
end
|
52
|
-
result
|
53
|
-
end
|
54
|
-
|
55
|
-
def hex_to_unicode(hex)
|
56
|
-
[hex.to_i(16)].pack('U')
|
57
|
-
end
|
58
|
-
end
|
59
|
-
end
|
data/scripts/unicode_lister.rb
DELETED
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'pstore'
|
2
|
-
require_relative '../lib/regexp-examples/unicode_char_ranges'
|
3
|
-
# A script to generate lists of all unicode characters
|
4
|
-
# that match all named group/character properties regexps.
|
5
|
-
# For use in e.g. /\p{Arabic}/.examples
|
6
|
-
|
7
|
-
# To (re-)generate this list, simply run this file!
|
8
|
-
# > ruby scripts/unicode_lister.rb
|
9
|
-
|
10
|
-
# Taken from ruby documentation:
|
11
|
-
# http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
|
12
|
-
NAMED_GROUPS = %w(
|
13
|
-
Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
|
14
|
-
Any Assigned
|
15
|
-
|
16
|
-
L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
|
17
|
-
Zp C Cc Cf Cn Co Cs
|
18
|
-
|
19
|
-
Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
|
20
|
-
Carian Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
|
21
|
-
Ethiopic Georgian Glagolitic Gothic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
|
22
|
-
Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer Lao Latin Lepcha Limbu
|
23
|
-
Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
|
24
|
-
Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra
|
25
|
-
Shavian Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu
|
26
|
-
Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
|
27
|
-
)
|
28
|
-
|
29
|
-
# Note: For the range 55296..57343, these are reserved values that are not legal
|
30
|
-
# unicode characters.
|
31
|
-
# I.e. a character encoding-related exception gets raised when you do:
|
32
|
-
# `/regex/ =~ eval("?\\u{#{x.to_s(16)}}")`
|
33
|
-
# TODO: Add a link to somewhere that explains this better.
|
34
|
-
|
35
|
-
# "Compresses" the values in an array by using ranges.
|
36
|
-
# Example input: [1, 2, 3, 4, 6, 7, 12, 14]
|
37
|
-
# Example output: [1..4, 6..7, 12, 14]
|
38
|
-
def calculate_ranges(matching_codes)
|
39
|
-
return [] if matching_codes.empty?
|
40
|
-
first = matching_codes.shift
|
41
|
-
matching_codes.inject([first..first]) do |r, x|
|
42
|
-
if r.last.last.succ != x
|
43
|
-
r << (x..x) # Start new range
|
44
|
-
else
|
45
|
-
r[0..-2] << (r.last.first..x) # Update last range
|
46
|
-
end
|
47
|
-
end
|
48
|
-
.map { |range| range.size == 1 ? range.first : range } # Replace `int..int` with `int`
|
49
|
-
end
|
50
|
-
|
51
|
-
count = 0
|
52
|
-
filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
|
53
|
-
store = PStore.new(filename)
|
54
|
-
store.transaction do
|
55
|
-
NAMED_GROUPS.each do |name|
|
56
|
-
count += 1
|
57
|
-
# Only generating first 128 matches, for performance...
|
58
|
-
# (I have tried this with generating ALL examples, and it makes the ruby gem
|
59
|
-
# painfully slow and bloated... Especially the test suite.)
|
60
|
-
matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
|
61
|
-
.select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
|
62
|
-
.first(128)
|
63
|
-
store[name.downcase] = calculate_ranges(matching_codes)
|
64
|
-
puts "(#{count}/#{NAMED_GROUPS.length}) Finished property: #{name}"
|
65
|
-
end
|
66
|
-
puts '*' * 50
|
67
|
-
puts "Finished! Result stored in: #{filename}"
|
68
|
-
end
|