regexp_parser 2.0.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +54 -0
  3. data/Gemfile +5 -1
  4. data/README.md +15 -21
  5. data/Rakefile +11 -17
  6. data/lib/regexp_parser/error.rb +4 -0
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
  14. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
  15. data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
  16. data/lib/regexp_parser/expression/classes/group.rb +6 -1
  17. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  18. data/lib/regexp_parser/expression/classes/property.rb +1 -3
  19. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  20. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  21. data/lib/regexp_parser/expression/quantifier.rb +2 -2
  22. data/lib/regexp_parser/expression/sequence.rb +3 -10
  23. data/lib/regexp_parser/expression/subexpression.rb +1 -2
  24. data/lib/regexp_parser/expression.rb +7 -130
  25. data/lib/regexp_parser/lexer.rb +7 -5
  26. data/lib/regexp_parser/parser.rb +282 -334
  27. data/lib/regexp_parser/scanner/properties/long.yml +13 -0
  28. data/lib/regexp_parser/scanner/properties/short.yml +9 -1
  29. data/lib/regexp_parser/scanner/scanner.rl +64 -87
  30. data/lib/regexp_parser/scanner.rb +1024 -1073
  31. data/lib/regexp_parser/syntax/any.rb +2 -4
  32. data/lib/regexp_parser/syntax/base.rb +10 -10
  33. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
  36. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  37. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  38. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  39. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  40. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  41. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  42. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  43. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  44. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  45. data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
  46. data/lib/regexp_parser/syntax/token.rb +45 -0
  47. data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
  48. data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
  49. data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
  50. data/lib/regexp_parser/syntax.rb +8 -6
  51. data/lib/regexp_parser/token.rb +9 -20
  52. data/lib/regexp_parser/version.rb +1 -1
  53. data/lib/regexp_parser.rb +0 -2
  54. data/spec/expression/clone_spec.rb +36 -4
  55. data/spec/expression/free_space_spec.rb +2 -2
  56. data/spec/expression/methods/match_length_spec.rb +2 -2
  57. data/spec/lexer/nesting_spec.rb +2 -2
  58. data/spec/lexer/refcalls_spec.rb +5 -0
  59. data/spec/parser/all_spec.rb +2 -2
  60. data/spec/parser/escapes_spec.rb +43 -31
  61. data/spec/parser/properties_spec.rb +6 -4
  62. data/spec/parser/refcalls_spec.rb +5 -0
  63. data/spec/parser/set/ranges_spec.rb +26 -16
  64. data/spec/scanner/escapes_spec.rb +29 -20
  65. data/spec/scanner/refcalls_spec.rb +19 -0
  66. data/spec/scanner/sets_spec.rb +66 -23
  67. data/spec/spec_helper.rb +13 -1
  68. data/spec/support/capturing_stderr.rb +9 -0
  69. data/spec/syntax/versions/1.8.6_spec.rb +2 -2
  70. data/spec/syntax/versions/2.0.0_spec.rb +2 -2
  71. data/spec/syntax/versions/aliases_spec.rb +1 -0
  72. metadata +27 -26
  73. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  74. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  75. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  76. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  77. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  78. data/spec/support/runner.rb +0 -42
  79. data/spec/support/warning_extractor.rb +0 -60
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a3d9161fb969c7c5aac0798a31054ed4f0d5da8992b8167afa7025492d1042c9
4
- data.tar.gz: 8321d42545c5dbfc810ad3f32fb03677e95d1dba3f3145f20af2e655f1ca45c3
3
+ metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
4
+ data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
5
5
  SHA512:
6
- metadata.gz: def5d282e5720c260bcb581e666704d04579534f8e04986867382836e58f4949d1eea0d3f7b3b2efdb71c5796b3bc6e5c4f92594aab9c9cd61985de18627c026
7
- data.tar.gz: 3e2671325e18a6b4d61b5cd9da410f21a3cdc24382f415b9517498437d2c5e8e473ebe481aeb610667f33aa9d538d05d43b3fae5565b1fb856690e81377e82e5
6
+ metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
7
+ data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
data/CHANGELOG.md CHANGED
@@ -1,5 +1,59 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Added
6
+
7
+ - Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
8
+
9
+ ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
10
+
11
+ ### Fixed
12
+
13
+ - fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
14
+ * thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
15
+
16
+ ## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
17
+
18
+ ### Added
19
+
20
+ - common ancestor for all scanning/parsing/lexing errors
21
+ * `Regexp::Parser::Error` can now be rescued as a catch-all
22
+ * the following errors (and their many descendants) now inherit from it:
23
+ - `Regexp::Expression::Conditional::TooManyBranches`
24
+ - `Regexp::Parser::ParserError`
25
+ - `Regexp::Scanner::ScannerError`
26
+ - `Regexp::Scanner::ValidationError`
27
+ - `Regexp::Syntax::SyntaxError`
28
+ * it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
29
+ * thanks to [sandstrom](https://github.com/sandstrom) for the cue
30
+
31
+ ### Fixed
32
+
33
+ - fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
34
+ * a regression in v2.0.1 had caused them to be scanned as literals
35
+ - fixed scanning of some backreference and subexpression call edge cases
36
+ * e.g. `\k<+1>`, `\g<x-1>`
37
+ - fixed tokenization of some escapes in character sets
38
+ * `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
39
+ * all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
40
+ * if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
41
+ * the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
42
+ - fixed handling of control/metacontrol escapes in character sets
43
+ * e.g. `[\cX]`, `[\M-\C-X]`
44
+ * they were misread as bunch of individual literals, escapes, and ranges
45
+ - fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
46
+
47
+ ## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
48
+
49
+ ### Fixed
50
+
51
+ - fixed error when scanning some unlikely and redundant but valid charset patterns
52
+ * e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
53
+ - fixed ancestry of some error classes related to syntax version lookup
54
+ * `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
55
+ * they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
56
+
3
57
  ## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
4
58
 
5
59
  ### Fixed
data/Gemfile CHANGED
@@ -6,5 +6,9 @@ group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
8
  gem 'regexp_property_values', '~> 1.0'
9
- gem 'rspec', '~> 3.8'
9
+ gem 'rspec', '~> 3.10'
10
+ if RUBY_VERSION.to_f >= 2.7
11
+ gem 'gouteur'
12
+ gem 'rubocop', '~> 1.7'
13
+ end
10
14
  end
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
357
360
  | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
358
361
  | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
359
362
  | **String Escapes** | | &#x22f1; |
360
- | &emsp;&nbsp;_**Control**_ | `\C-C`, `\cD` | &#x2713; |
363
+ | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
361
364
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
362
- | &emsp;&nbsp;_**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
365
+ | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
363
366
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
364
367
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
368
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
369
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
370
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
371
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
371
374
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
375
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
376
 
377
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
378
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
379
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
380
+
374
381
  ##### Inapplicable Features
375
382
 
376
383
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
391
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
392
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
393
 
387
-
388
394
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
395
 
390
396
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
392
398
 
393
399
 
394
400
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
398
-
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
401
+ To run the tests simply run rake from the root directory.
406
402
 
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
403
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
404
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
405
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
406
 
413
407
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
408
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
409
  ```
416
410
 
417
411
  ## Building
data/Rakefile CHANGED
@@ -1,35 +1,31 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
11
- RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
8
+ RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
9
+ RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
10
  RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
11
 
14
-
15
12
  Bundler::GemHelper.install_tasks
16
13
 
14
+ RSpec::Core::RakeTask.new(:spec)
17
15
 
18
16
  task :default => [:'test:full']
19
17
 
20
18
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
19
+ task full: [:'ragel:rb', :spec]
24
20
  end
25
21
 
26
22
  namespace :ragel do
27
23
  desc "Process the ragel source files and output ruby code"
28
24
  task :rb do
29
- RAGEL_SOURCE_FILES.each do |file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
25
+ RAGEL_SOURCE_FILES.each do |source_file|
26
+ output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
31
27
  # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
28
+ sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
33
29
 
34
30
  contents = File.read(output_file)
35
31
 
@@ -49,19 +45,17 @@ namespace :ragel do
49
45
  end
50
46
  end
51
47
 
52
-
53
48
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
49
  # latest scanner code is generated and included in the build.
55
50
  desc "Runs ragel:rb before building the gem"
56
51
  task :build => ['ragel:rb']
57
52
 
58
-
59
53
  namespace :props do
60
54
  desc 'Write new property value hashes for the properties scanner'
61
55
  task :update do
62
56
  require 'regexp_property_values'
63
57
  RegexpPropertyValues.update
64
- dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
58
+ dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
59
 
66
60
  require 'psych'
67
61
  write_hash_to_file = ->(hash, path) do
@@ -0,0 +1,4 @@
1
+ class Regexp::Parser
2
+ # base class for all gem-specific errors (inherited but never raised itself)
3
+ class Error < StandardError; end
4
+ end
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -2,6 +2,11 @@ module Regexp::Expression
2
2
  module Backreference
3
3
  class Base < Regexp::Expression::Base
4
4
  attr_accessor :referenced_expression
5
+
6
+ def initialize_copy(orig)
7
+ self.referenced_expression = orig.referenced_expression.dup
8
+ super
9
+ end
5
10
  end
6
11
 
7
12
  class Number < Backreference::Base
@@ -7,7 +7,8 @@ module Regexp::Expression
7
7
  alias :ts :starts_at
8
8
 
9
9
  def <<(exp)
10
- complete? && raise("Can't add more than 2 expressions to a Range")
10
+ complete? and raise Regexp::Parser::Error,
11
+ "Can't add more than 2 expressions to a Range"
11
12
  super
12
13
  end
13
14
 
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  module Conditional
3
- class TooManyBranches < StandardError
3
+ class TooManyBranches < Regexp::Parser::Error
4
4
  def initialize
5
5
  super('The conditional expression has more than 2 branches')
6
6
  end
@@ -15,6 +15,11 @@ module Regexp::Expression
15
15
  ref = text.tr("'<>()", "")
16
16
  ref =~ /\D/ ? ref : Integer(ref)
17
17
  end
18
+
19
+ def initialize_copy(orig)
20
+ self.referenced_expression = orig.referenced_expression.dup
21
+ super
22
+ end
18
23
  end
19
24
 
20
25
  class Branch < Regexp::Expression::Sequence; end
@@ -53,6 +58,11 @@ module Regexp::Expression
53
58
  def to_s(format = :full)
54
59
  "#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
55
60
  end
61
+
62
+ def initialize_copy(orig)
63
+ self.referenced_expression = orig.referenced_expression.dup
64
+ super
65
+ end
56
66
  end
57
67
  end
58
68
  end
@@ -1,4 +1,5 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
5
  require 'yaml'
@@ -1,8 +1,7 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
- raise "Can not quantify a free space object"
4
+ raise Regexp::Parser::Error, 'Can not quantify a free space object'
6
5
  end
7
6
  end
8
7
 
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -35,6 +35,11 @@ module Regexp::Expression
35
35
  class Atomic < Group::Base; end
36
36
  class Options < Group::Base
37
37
  attr_accessor :option_changes
38
+
39
+ def initialize_copy(orig)
40
+ self.option_changes = orig.option_changes.dup
41
+ super
42
+ end
38
43
  end
39
44
 
40
45
  class Capture < Group::Base
@@ -53,7 +58,7 @@ module Regexp::Expression
53
58
  super
54
59
  end
55
60
 
56
- def initialize_clone(orig)
61
+ def initialize_copy(orig)
57
62
  @name = orig.name.dup
58
63
  super
59
64
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -7,7 +6,7 @@ module Regexp::Expression
7
6
  end
8
7
 
9
8
  def name
10
- text =~ /\A\\[pP]\{([^}]+)\}\z/; $1
9
+ text[/\A\\[pP]\{([^}]+)\}\z/, 1]
11
10
  end
12
11
 
13
12
  def shortcut
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -12,7 +12,7 @@ module Regexp::Expression
12
12
  @max = max
13
13
  end
14
14
 
15
- def initialize_clone(orig)
15
+ def initialize_copy(orig)
16
16
  @text = orig.text.dup
17
17
  super
18
18
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -41,17 +40,11 @@ module Regexp::Expression
41
40
  alias :ts :starts_at
42
41
 
43
42
  def quantify(token, text, min = nil, max = nil, mode = :greedy)
44
- offset = -1
45
- target = expressions[offset]
46
- while target.is_a?(FreeSpace)
47
- target = expressions[offset -= 1]
48
- end
49
-
50
- target || raise(ArgumentError, "No valid target found for '#{text}' "\
51
- 'quantifier')
43
+ target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
44
+ target or raise Regexp::Parser::Error,
45
+ "No valid target found for '#{text}' quantifier"
52
46
 
53
47
  target.quantify(token, text, min, max, mode)
54
48
  end
55
49
  end
56
-
57
50
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4
 
@@ -12,7 +11,7 @@ module Regexp::Expression
12
11
  end
13
12
 
14
13
  # Override base method to clone the expressions as well.
15
- def initialize_clone(orig)
14
+ def initialize_copy(orig)
16
15
  self.expressions = orig.expressions.map(&:clone)
17
16
  super
18
17
  end