regexp_parser 2.0.2 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3d9161fb969c7c5aac0798a31054ed4f0d5da8992b8167afa7025492d1042c9
4
- data.tar.gz: 8321d42545c5dbfc810ad3f32fb03677e95d1dba3f3145f20af2e655f1ca45c3
3
+ metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
4
+ data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
5
5
  SHA512:
6
- metadata.gz: def5d282e5720c260bcb581e666704d04579534f8e04986867382836e58f4949d1eea0d3f7b3b2efdb71c5796b3bc6e5c4f92594aab9c9cd61985de18627c026
7
- data.tar.gz: 3e2671325e18a6b4d61b5cd9da410f21a3cdc24382f415b9517498437d2c5e8e473ebe481aeb610667f33aa9d538d05d43b3fae5565b1fb856690e81377e82e5
6
+ metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
7
+ data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
data/CHANGELOG.md CHANGED
@@ -1,5 +1,59 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Added
6
+
7
+ - Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
8
+
9
+ ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
10
+
11
+ ### Fixed
12
+
13
+ - fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
14
+ * thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
15
+
16
+ ## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
17
+
18
+ ### Added
19
+
20
+ - common ancestor for all scanning/parsing/lexing errors
21
+ * `Regexp::Parser::Error` can now be rescued as a catch-all
22
+ * the following errors (and their many descendants) now inherit from it:
23
+ - `Regexp::Expression::Conditional::TooManyBranches`
24
+ - `Regexp::Parser::ParserError`
25
+ - `Regexp::Scanner::ScannerError`
26
+ - `Regexp::Scanner::ValidationError`
27
+ - `Regexp::Syntax::SyntaxError`
28
+ * it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
29
+ * thanks to [sandstrom](https://github.com/sandstrom) for the cue
30
+
31
+ ### Fixed
32
+
33
+ - fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
34
+ * a regression in v2.0.1 had caused them to be scanned as literals
35
+ - fixed scanning of some backreference and subexpression call edge cases
36
+ * e.g. `\k<+1>`, `\g<x-1>`
37
+ - fixed tokenization of some escapes in character sets
38
+ * `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
39
+ * all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
40
+ * if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
41
+ * the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
42
+ - fixed handling of control/metacontrol escapes in character sets
43
+ * e.g. `[\cX]`, `[\M-\C-X]`
44
+ * they were misread as bunch of individual literals, escapes, and ranges
45
+ - fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
46
+
47
+ ## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
48
+
49
+ ### Fixed
50
+
51
+ - fixed error when scanning some unlikely and redundant but valid charset patterns
52
+ * e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
53
+ - fixed ancestry of some error classes related to syntax version lookup
54
+ * `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
55
+ * they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
56
+
3
57
  ## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
4
58
 
5
59
  ### Fixed
data/Gemfile CHANGED
@@ -6,5 +6,9 @@ group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
8
  gem 'regexp_property_values', '~> 1.0'
9
- gem 'rspec', '~> 3.8'
9
+ gem 'rspec', '~> 3.10'
10
+ if RUBY_VERSION.to_f >= 2.7
11
+ gem 'gouteur'
12
+ gem 'rubocop', '~> 1.7'
13
+ end
10
14
  end
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
357
360
  | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
358
361
  | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
359
362
  | **String Escapes** | | &#x22f1; |
360
- | &emsp;&nbsp;_**Control**_ | `\C-C`, `\cD` | &#x2713; |
363
+ | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
361
364
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
362
- | &emsp;&nbsp;_**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
365
+ | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
363
366
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
364
367
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
368
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
369
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
370
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
371
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
371
374
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
375
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
376
 
377
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
+
374
381
  ##### Inapplicable Features
375
382
 
376
383
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
391
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
392
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
393
 
387
-
388
394
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
395
 
390
396
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
392
398
 
393
399
 
394
400
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
398
-
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
401
+ To run the tests simply run rake from the root directory.
406
402
 
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
403
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
404
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
405
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
406
 
413
407
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
408
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
409
  ```
416
410
 
417
411
  ## Building
data/Rakefile CHANGED
@@ -1,35 +1,31 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
11
- RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
8
+ RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
+ RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
10
  RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
11
 
14
-
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
19
+ task full: [:'ragel:rb', :spec]
24
20
  end
25
21
 
26
22
  namespace :ragel do
27
23
  desc "Process the ragel source files and output ruby code"
28
24
  task :rb do
29
- RAGEL_SOURCE_FILES.each do |file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
25
+ RAGEL_SOURCE_FILES.each do |source_file|
26
+ output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
31
27
  # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
28
+ sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
33
29
 
34
30
  contents = File.read(output_file)
35
31
 
@@ -49,19 +45,17 @@ namespace :ragel do
49
45
  end
50
46
  end
51
47
 
52
-
53
48
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
49
  # latest scanner code is generated and included in the build.
55
50
  desc "Runs ragel:rb before building the gem"
56
51
  task :build => ['ragel:rb']
57
52
 
58
-
59
53
  namespace :props do
60
54
  desc 'Write new property value hashes for the properties scanner'
61
55
  task :update do
62
56
  require 'regexp_property_values'
63
57
  RegexpPropertyValues.update
64
- dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
58
+ dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
59
 
66
60
  require 'psych'
67
61
  write_hash_to_file = ->(hash, path) do
@@ -0,0 +1,4 @@
1
+ class Regexp::Parser
2
+ # base class for all gem-specific errors (inherited but never raised itself)
3
+ class Error < StandardError; end
4
+ end
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -2,6 +2,11 @@ module Regexp::Expression
2
2
  module Backreference
3
3
  class Base < Regexp::Expression::Base
4
4
  attr_accessor :referenced_expression
5
+
6
+ def initialize_copy(orig)
7
+ self.referenced_expression = orig.referenced_expression.dup
8
+ super
9
+ end
5
10
  end
6
11
 
7
12
  class Number < Backreference::Base
@@ -7,7 +7,8 @@ module Regexp::Expression
7
7
  alias :ts :starts_at
8
8
 
9
9
  def <<(exp)
10
- complete? && raise("Can't add more than 2 expressions to a Range")
10
+ complete? and raise Regexp::Parser::Error,
11
+ "Can't add more than 2 expressions to a Range"
11
12
  super
12
13
  end
13
14
 
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Conditional
3
- class TooManyBranches < StandardError
3
+ class TooManyBranches < Regexp::Parser::Error
4
4
  def initialize
5
5
  super('The conditional expression has more than 2 branches')
6
6
  end
@@ -15,6 +15,11 @@ module Regexp::Expression
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
+
19
+ def initialize_copy(orig)
20
+ self.referenced_expression = orig.referenced_expression.dup
21
+ super
22
+ end
18
23
  end
19
24
 
20
25
  class Branch < Regexp::Expression::Sequence; end
@@ -53,6 +58,11 @@ module Regexp::Expression
53
58
  def to_s(format = :full)
54
59
  "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
55
60
  end
61
+
62
+ def initialize_copy(orig)
63
+ self.referenced_expression = orig.referenced_expression.dup
64
+ super
65
+ end
56
66
  end
57
67
  end
58
68
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
5
  require 'yaml'
@@ -1,8 +1,7 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
- raise "Can not quantify a free space object"
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -35,6 +35,11 @@ module Regexp::Expression
35
35
  class Atomic < Group::Base; end
36
36
  class Options < Group::Base
37
37
  attr_accessor :option_changes
38
+
39
+ def initialize_copy(orig)
40
+ self.option_changes = orig.option_changes.dup
41
+ super
42
+ end
38
43
  end
39
44
 
40
45
  class Capture < Group::Base
@@ -53,7 +58,7 @@ module Regexp::Expression
53
58
  super
54
59
  end
55
60
 
56
- def initialize_clone(orig)
61
+ def initialize_copy(orig)
57
62
  @name = orig.name.dup
58
63
  super
59
64
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -7,7 +6,7 @@ module Regexp::Expression
7
6
  end
8
7
 
9
8
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
9
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
10
  end
12
11
 
13
12
  def shortcut
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -12,7 +12,7 @@ module Regexp::Expression
12
12
  @max = max
13
13
  end
14
14
 
15
- def initialize_clone(orig)
15
+ def initialize_copy(orig)
16
16
  @text = orig.text.dup
17
17
  super
18
18
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -41,17 +40,11 @@ module Regexp::Expression
41
40
  alias :ts :starts_at
42
41
 
43
42
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
44
- offset = -1
45
- target = expressions[offset]
46
- while target.is_a?(FreeSpace)
47
- target = expressions[offset -= 1]
48
- end
49
-
50
- target || raise(ArgumentError, "No valid target found for '#{text}' "\
51
- 'quantifier')
43
+ target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
+ target or raise Regexp::Parser::Error,
45
+ "No valid target found for '#{text}' quantifier"
52
46
 
53
47
  target.quantify(token, text, min, max, mode)
54
48
  end
55
49
  end
56
-
57
50
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -12,7 +11,7 @@ module Regexp::Expression
12
11
  end
13
12
 
14
13
  # Override base method to clone the expressions as well.
15
- def initialize_clone(orig)
14
+ def initialize_copy(orig)
16
15
  self.expressions = orig.expressions.map(&:clone)
17
16
  super
18
17
  end