regexp-examples 1.5.1 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 38fbbe3d506284921194e15fded3d672d114212223e46cc380bba3f0f669b432
4
- data.tar.gz: d0c47861293efcf61aa4f0d7ce3eee05aac8e57c43affe9a0e58af61e8cbc50b
3
+ metadata.gz: d1bdebf385885ec8033d7f7b34d5d473837c7ae983b4fbda4facb45f8bbc96a0
4
+ data.tar.gz: 79b2c7a8c76ebe3e3d6d2e0d6ee12ffaefc5a86acdbe69d8d5476afab1c25f96
5
5
  SHA512:
6
- metadata.gz: 989d3c74e5120a0612d340b3919e511b8aad3eabc06a5c939bab2deb724f0168644af75f4e47c5b5c577689df768c8414ac1c93a1e1d4cf0885f75a8a983d1ca
7
- data.tar.gz: 8125cb96126af56ce2fc283e47da4963706e86f11f9fa287c8fdd4b495514852c6ab64aad061ce60a19160694eea39382621cfda56ad92808d2f4fc855f0448d
6
+ metadata.gz: 5858e4813711b88549a2237857f23db0c941b5d3f9fd88275ab83b7b0e2002b854bbfa774d0d8c05e99df9098b2935f03346823df2f57819159f30c9d36e9767
7
+ data.tar.gz: 126a04b363ed23ef0e4ea4ee5f36f40b20280e44e8892985f87f0eef391ddcb20c2299a87324b15693aa02c5ab2eb484c8d2622889f057a5b1b29f82fed73c7b
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.6.3
1
+ 3.1.2
data/.travis.yml CHANGED
@@ -1,10 +1,12 @@
1
1
  language: ruby
2
+ cache: bundler
2
3
  rvm:
3
4
  - 2.4.5
4
5
  - 2.5.5
5
6
  - 2.6.2 # Uses unicode 12.0.0
6
7
  - 2.6.3 # Uses unicode 12.1.0
7
- - 2.7.0
8
+ - 2.7.2
9
+ - 3.0.0
8
10
  - ruby-head
9
11
  matrix:
10
12
  allow_failures:
data/Gemfile CHANGED
@@ -3,7 +3,6 @@ source 'https://rubygems.org'
3
3
  group :test do
4
4
  gem 'rspec'
5
5
  gem 'coveralls', require: false
6
- gem 'pry'
7
6
  end
8
7
 
9
8
  # Specify your gem's dependencies in regexp-examples.gemspec
data/README.md CHANGED
@@ -52,7 +52,7 @@ Obviously, you will get different (random) results if you try these yourself!
52
52
  ```
53
53
 
54
54
  ## Supported ruby versions
55
- MRI 2.4.0 (oldest non-[EOL](https://www.ruby-lang.org/en/news/2019/03/31/support-of-ruby-2-3-has-ended/) version) --> 2.6.3 (latest stable version)
55
+ MRI 2.4.0 (oldest non-[EOL](https://www.ruby-lang.org/en/news/2019/03/31/support-of-ruby-2-3-has-ended/) version) --> 3.0.0 (latest stable version)
56
56
 
57
57
  MRI 2.0.0 --> 2.3.x were supported until version `1.5.0` of this library. Support was dropped primarily
58
58
  because of the need to use `RbConfig::CONFIG['UNICODE_VERSION']`, which was added to ruby version `2.4.0`.
@@ -53,7 +53,5 @@ module RegexpExamples
53
53
  'word' => Word,
54
54
  'ascii' => Any
55
55
  }.freeze
56
-
57
- NamedPropertyCharMap = UnicodeCharRanges.instance
58
56
  end.freeze
59
57
  end
@@ -2,7 +2,7 @@
2
2
  module RegexpExamples
3
3
  module CharsetNegationHelper
4
4
  def negate_if(charset, is_negative)
5
- is_negative ? (CharSets::Any.dup - charset) : charset
5
+ is_negative ? (CharSets::Any.dup - charset.to_a) : charset
6
6
  end
7
7
  end
8
8
  end
@@ -1,3 +1,5 @@
1
+ require 'regexp_property_values'
2
+
1
3
  module RegexpExamples
2
4
  # A collection of related helper methods, utilised by the `Parser` class
3
5
  module ParseAfterBackslashGroupHelper
@@ -91,13 +93,19 @@ module RegexpExamples
91
93
  end
92
94
 
93
95
  def parse_backslash_named_property(p_negation, caret_negation, property_name)
94
- @current_position += (caret_negation.length + # 0 or 1, of '^' is present
96
+ @current_position += (caret_negation.length + # 0 or 1, if '^' is present
95
97
  property_name.length +
96
98
  2) # Length of opening and closing brackets (always 2)
97
99
  # Beware of double negatives! E.g. /\P{^Space}/
98
100
  is_negative = (p_negation == 'P') ^ (caret_negation == '^')
99
101
  CharGroup.new(
100
- negate_if(CharSets::NamedPropertyCharMap[property_name.downcase], is_negative),
102
+ negate_if(
103
+ RegexpPropertyValues[property_name]
104
+ .matched_codepoints
105
+ .lazy
106
+ .filter_map { |cp| cp.chr('utf-8') unless cp.between?(0xD800, 0xDFFF) },
107
+ is_negative
108
+ ),
101
109
  @ignorecase
102
110
  )
103
111
  end
@@ -1,4 +1,4 @@
1
1
  # Gem version
2
2
  module RegexpExamples
3
- VERSION = '1.5.1'.freeze
3
+ VERSION = '1.6.0'.freeze
4
4
  end
@@ -1,4 +1,3 @@
1
- require_relative 'regexp-examples/unicode_char_ranges'
2
1
  require_relative 'regexp-examples/chargroup_parser'
3
2
  require_relative 'regexp-examples/config'
4
3
  require_relative 'regexp-examples/char_sets'
@@ -8,6 +7,5 @@ require_relative 'regexp-examples/max_results_limiter'
8
7
  require_relative 'regexp-examples/helpers'
9
8
  require_relative 'regexp-examples/parser'
10
9
  require_relative 'regexp-examples/repeaters'
11
- require_relative 'regexp-examples/unicode_char_ranges'
12
10
  require_relative 'regexp-examples/version'
13
11
  require_relative 'core_extensions/regexp/examples'
@@ -14,9 +14,10 @@ Gem::Specification.new do |s|
14
14
  s.test_files = s.files.grep(/^(test|spec|features)\//)
15
15
  s.require_paths = ['lib']
16
16
  s.homepage = 'http://rubygems.org/gems/regexp-examples'
17
- s.add_development_dependency 'bundler', '> 1.7'
18
- s.add_development_dependency 'rake', '~> 12.0'
19
- s.add_development_dependency 'pry', '~> 0.12.0'
17
+ s.add_dependency 'regexp_property_values', '~> 1.5'
18
+ s.add_development_dependency 'bundler', '~> 2.4'
19
+ s.add_development_dependency 'rake', '~> 13.0'
20
+ s.add_development_dependency 'pry'
20
21
  s.add_development_dependency 'warning', '~> 0.10.0'
21
22
  s.license = 'MIT'
22
23
  s.required_ruby_version = '>= 2.4.0'
@@ -190,37 +190,24 @@ RSpec.describe Regexp, '#examples' do
190
190
  /\P{Ll}/, # Negation syntax type 2
191
191
  /\P{^Ll}/ # Double negation!! (Should cancel out)
192
192
  )
193
- # An exhaustive set of tests for all named properties!!! This is useful
194
- # for verifying the PStore contains correct values for all ruby versions
195
- %w[
196
- Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit
197
- Word ASCII Any Assigned L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd
198
- Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl Zp C Cc Cf Cn Co Arabic Armenian
199
- Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
200
- Cham Cherokee Common Coptic Cyrillic Devanagari Ethiopic Georgian
201
- Glagolitic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew Hiragana
202
- Inherited Kannada Katakana Kayah_Li Khmer Lao Latin Lepcha Limbu Malayalam
203
- Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki Oriya Phags_Pa Rejang
204
- Runic Saurashtra Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa
205
- Tai_Le Tamil Telugu Thaana Thai Tibetan Tifinagh Vai Yi
206
- ].each do |property|
207
- it "examples for /\p{#{property}}/" do
208
- regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
209
- expect(regexp_examples)
210
- .not_to be_empty,
211
- "No examples were generated for regexp: /\p{#{property}}/"
212
- # Just do one big check, for test system performance (~30% faster)
213
- # (Otherwise, we're doing up to 128 checks on 123 properties!!!)
214
- expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
215
- end
216
- end
217
193
 
218
- # The following seem to genuinely have no matching examples (!!??!!?!)
219
- %w[
220
- Cs Carian Cuneiform Cypriot Deseret Gothic Kharoshthi Linear_B Lycian
221
- Lydian Old_Italic Old_Persian Osmanya Phoenician Shavian Ugaritic
222
- ].each do |property|
223
- examples_are_empty(/\p{#{property}}/)
194
+ expected_empty_properties = %w[surrogate inlowsurrogates inhighsurrogates inhighprivateusesurrogates]
195
+
196
+ RegexpPropertyValues.all_for_current_ruby.map(&:identifier).each do |property|
197
+ if(expected_empty_properties).include?(property)
198
+ examples_are_empty(/\p{#{property}}/)
199
+ else
200
+ it "examples for /\p{#{property}}/" do
201
+ regexp_examples = /\p{#{property}}/.examples(max_group_results: 99_999)
202
+
203
+ expect(regexp_examples)
204
+ .not_to be_empty,
205
+ "No examples were generated for regexp: /\p{#{property}}/"
206
+ # Just do one big check, for test system performance (~30% faster)
207
+ # (Otherwise, we're potentially doing 99999 checks on 123 properties!!!)
208
+ expect(regexp_examples.join('')).to match(/\A\p{#{property}}+\z/)
209
+ end
210
+ end
224
211
  end
225
212
  end
226
213
 
metadata CHANGED
@@ -1,57 +1,71 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: regexp-examples
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.1
4
+ version: 1.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Lord
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-09 00:00:00.000000000 Z
11
+ date: 2024-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: regexp_property_values
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.5'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.5'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: bundler
15
29
  requirement: !ruby/object:Gem::Requirement
16
30
  requirements:
17
- - - ">"
31
+ - - "~>"
18
32
  - !ruby/object:Gem::Version
19
- version: '1.7'
33
+ version: '2.4'
20
34
  type: :development
21
35
  prerelease: false
22
36
  version_requirements: !ruby/object:Gem::Requirement
23
37
  requirements:
24
- - - ">"
38
+ - - "~>"
25
39
  - !ruby/object:Gem::Version
26
- version: '1.7'
40
+ version: '2.4'
27
41
  - !ruby/object:Gem::Dependency
28
42
  name: rake
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - "~>"
32
46
  - !ruby/object:Gem::Version
33
- version: '12.0'
47
+ version: '13.0'
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - "~>"
39
53
  - !ruby/object:Gem::Version
40
- version: '12.0'
54
+ version: '13.0'
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: pry
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
- - - "~>"
59
+ - - ">="
46
60
  - !ruby/object:Gem::Version
47
- version: 0.12.0
61
+ version: '0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
- - - "~>"
66
+ - - ">="
53
67
  - !ruby/object:Gem::Version
54
- version: 0.12.0
68
+ version: '0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: warning
57
71
  requirement: !ruby/object:Gem::Requirement
@@ -103,10 +117,8 @@ files:
103
117
  - lib/regexp-examples/parser_helpers/parse_multi_group_helper.rb
104
118
  - lib/regexp-examples/parser_helpers/parse_repeater_helper.rb
105
119
  - lib/regexp-examples/repeaters.rb
106
- - lib/regexp-examples/unicode_char_ranges.rb
107
120
  - lib/regexp-examples/version.rb
108
121
  - regexp-examples.gemspec
109
- - scripts/unicode_lister.rb
110
122
  - spec/config_spec.rb
111
123
  - spec/gem_helper.rb
112
124
  - spec/helpers.rb
@@ -118,7 +130,7 @@ homepage: http://rubygems.org/gems/regexp-examples
118
130
  licenses:
119
131
  - MIT
120
132
  metadata: {}
121
- post_install_message:
133
+ post_install_message:
122
134
  rdoc_options: []
123
135
  require_paths:
124
136
  - lib
@@ -133,8 +145,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
133
145
  - !ruby/object:Gem::Version
134
146
  version: '0'
135
147
  requirements: []
136
- rubygems_version: 3.0.3
137
- signing_key:
148
+ rubygems_version: 3.3.7
149
+ signing_key:
138
150
  specification_version: 4
139
151
  summary: Extends the Regexp class with '#examples' and '#random_example'
140
152
  test_files:
@@ -1,59 +0,0 @@
1
- require 'pstore'
2
- require 'singleton'
3
-
4
- module RegexpExamples
5
- # Interface to the retrieve the character sets that match a regex named property.
6
- # E.g. `/\p{Alpha}/`
7
- # These matching values are stored, compressed, in a PStore. They are specific to
8
- # the ruby minor version.
9
- class UnicodeCharRanges
10
- include Singleton
11
- # These values were generated by: scripts/unicode_lister.rb
12
- # Note: Only the first 128 results are listed, for performance.
13
- # Also, some groups seem to have no matches (weird!)
14
- STORE_FILENAME = "unicode_ranges_#{RbConfig::CONFIG['UNICODE_VERSION']}.pstore".freeze
15
-
16
- attr_reader :range_store
17
-
18
- def initialize
19
- @range_store = PStore.new(unicode_ranges_file)
20
- end
21
-
22
- def get(key)
23
- range_store.transaction(true) do
24
- ranges_to_unicode(range_store[key])
25
- end
26
- end
27
-
28
- alias [] get
29
-
30
- private
31
-
32
- # The method is written like this to future-proof it a little,
33
- # i.e. the gem won't completely break for a new ruby version release
34
- def unicode_ranges_file
35
- db_path = File.join(__dir__, '../../db')
36
- Dir["#{db_path}/*.pstore"].sort.select do |file|
37
- file <= "#{db_path}/#{STORE_FILENAME}"
38
- end.last
39
- end
40
-
41
- # TODO: Document example input/output of this method
42
- # It's pretty simple, but this code is a little confusing!!
43
- def ranges_to_unicode(ranges)
44
- result = []
45
- ranges.each do |range|
46
- if range.is_a? Integer # Small hack to increase data compression
47
- result << hex_to_unicode(range.to_s(16))
48
- else
49
- range.each { |num| result << hex_to_unicode(num.to_s(16)) }
50
- end
51
- end
52
- result
53
- end
54
-
55
- def hex_to_unicode(hex)
56
- [hex.to_i(16)].pack('U')
57
- end
58
- end
59
- end
@@ -1,68 +0,0 @@
1
- require 'pstore'
2
- require_relative '../lib/regexp-examples/unicode_char_ranges'
3
- # A script to generate lists of all unicode characters
4
- # that match all named group/character properties regexps.
5
- # For use in e.g. /\p{Arabic}/.examples
6
-
7
- # To (re-)generate this list, simply run this file!
8
- # > ruby scripts/unicode_lister.rb
9
-
10
- # Taken from ruby documentation:
11
- # http://ruby-doc.org//core-2.2.0/Regexp.html#class-Regexp-label-Character+Properties
12
- NAMED_GROUPS = %w(
13
- Alnum Alpha Blank Cntrl Digit Graph Lower Print Punct Space Upper XDigit Word ASCII
14
- Any Assigned
15
-
16
- L Ll Lm Lo Lt Lu M Mn Mc Me N Nd Nl No P Pc Pd Ps Pe Pi Pf Po S Sm Sc Sk So Z Zs Zl
17
- Zp C Cc Cf Cn Co Cs
18
-
19
- Arabic Armenian Balinese Bengali Bopomofo Braille Buginese Buhid Canadian_Aboriginal
20
- Carian Cham Cherokee Common Coptic Cuneiform Cypriot Cyrillic Deseret Devanagari
21
- Ethiopic Georgian Glagolitic Gothic Greek Gujarati Gurmukhi Han Hangul Hanunoo Hebrew
22
- Hiragana Inherited Kannada Katakana Kayah_Li Kharoshthi Khmer Lao Latin Lepcha Limbu
23
- Linear_B Lycian Lydian Malayalam Mongolian Myanmar New_Tai_Lue Nko Ogham Ol_Chiki
24
- Old_Italic Old_Persian Oriya Osmanya Phags_Pa Phoenician Rejang Runic Saurashtra
25
- Shavian Sinhala Sundanese Syloti_Nagri Syriac Tagalog Tagbanwa Tai_Le Tamil Telugu
26
- Thaana Thai Tibetan Tifinagh Ugaritic Vai Yi
27
- )
28
-
29
- # Note: For the range 55296..57343, these are reserved values that are not legal
30
- # unicode characters.
31
- # I.e. a character encoding-related exception gets raised when you do:
32
- # `/regex/ =~ eval("?\\u{#{x.to_s(16)}}")`
33
- # TODO: Add a link to somewhere that explains this better.
34
-
35
- # "Compresses" the values in an array by using ranges.
36
- # Example input: [1, 2, 3, 4, 6, 7, 12, 14]
37
- # Example output: [1..4, 6..7, 12, 14]
38
- def calculate_ranges(matching_codes)
39
- return [] if matching_codes.empty?
40
- first = matching_codes.shift
41
- matching_codes.inject([first..first]) do |r, x|
42
- if r.last.last.succ != x
43
- r << (x..x) # Start new range
44
- else
45
- r[0..-2] << (r.last.first..x) # Update last range
46
- end
47
- end
48
- .map { |range| range.size == 1 ? range.first : range } # Replace `int..int` with `int`
49
- end
50
-
51
- count = 0
52
- filename = "./db/#{RegexpExamples::UnicodeCharRanges::STORE_FILENAME}"
53
- store = PStore.new(filename)
54
- store.transaction do
55
- NAMED_GROUPS.each do |name|
56
- count += 1
57
- # Only generating first 128 matches, for performance...
58
- # (I have tried this with generating ALL examples, and it makes the ruby gem
59
- # painfully slow and bloated... Especially the test suite.)
60
- matching_codes = [(0..55_295), (57_344..65_535)].map(&:to_a).flatten.lazy
61
- .select { |x| /\p{#{name}}/ =~ eval("?\\u{#{x.to_s(16)}}") }
62
- .first(128)
63
- store[name.downcase] = calculate_ranges(matching_codes)
64
- puts "(#{count}/#{NAMED_GROUPS.length}) Finished property: #{name}"
65
- end
66
- puts '*' * 50
67
- puts "Finished! Result stored in: #{filename}"
68
- end