regexp_parser 2.0.2 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
|
4
|
+
data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
|
7
|
+
data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,59 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
|
8
|
+
|
9
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
|
13
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
14
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
15
|
+
|
16
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
- common ancestor for all scanning/parsing/lexing errors
|
21
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
22
|
+
* the following errors (and their many descendants) now inherit from it:
|
23
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
24
|
+
- `Regexp::Parser::ParserError`
|
25
|
+
- `Regexp::Scanner::ScannerError`
|
26
|
+
- `Regexp::Scanner::ValidationError`
|
27
|
+
- `Regexp::Syntax::SyntaxError`
|
28
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
29
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
34
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
35
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
36
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
37
|
+
- fixed tokenization of some escapes in character sets
|
38
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
39
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
40
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
41
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
42
|
+
- fixed handling of control/metacontrol escapes in character sets
|
43
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
44
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
45
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
46
|
+
|
47
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
|
+
|
49
|
+
### Fixed
|
50
|
+
|
51
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
52
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
53
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
54
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
55
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
56
|
+
|
3
57
|
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
58
|
|
5
59
|
### Fixed
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
360
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
361
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
362
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
363
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
364
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
365
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
366
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
367
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
368
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
369
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
370
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
371
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
374
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
375
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
376
|
|
377
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
|
+
|
374
381
|
##### Inapplicable Features
|
375
382
|
|
376
383
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
391
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
392
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
393
|
|
387
|
-
|
388
394
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
395
|
|
390
396
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
398
|
|
393
399
|
|
394
400
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
398
|
-
|
399
|
-
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
401
|
+
To run the tests simply run rake from the root directory.
|
406
402
|
|
407
|
-
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
403
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
404
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
405
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
406
|
|
413
407
|
```
|
414
|
-
rake ragel:rb &&
|
408
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
409
|
```
|
416
410
|
|
417
411
|
## Building
|
data/Rakefile
CHANGED
@@ -1,35 +1,31 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
|
11
|
-
RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
|
8
|
+
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
9
|
+
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
10
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
11
|
|
14
|
-
|
15
12
|
Bundler::GemHelper.install_tasks
|
16
13
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
15
|
|
18
16
|
task :default => [:'test:full']
|
19
17
|
|
20
18
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
19
|
+
task full: [:'ragel:rb', :spec]
|
24
20
|
end
|
25
21
|
|
26
22
|
namespace :ragel do
|
27
23
|
desc "Process the ragel source files and output ruby code"
|
28
24
|
task :rb do
|
29
|
-
RAGEL_SOURCE_FILES.each do |
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{
|
25
|
+
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
+
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
31
27
|
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{
|
28
|
+
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
33
29
|
|
34
30
|
contents = File.read(output_file)
|
35
31
|
|
@@ -49,19 +45,17 @@ namespace :ragel do
|
|
49
45
|
end
|
50
46
|
end
|
51
47
|
|
52
|
-
|
53
48
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
49
|
# latest scanner code is generated and included in the build.
|
55
50
|
desc "Runs ragel:rb before building the gem"
|
56
51
|
task :build => ['ragel:rb']
|
57
52
|
|
58
|
-
|
59
53
|
namespace :props do
|
60
54
|
desc 'Write new property value hashes for the properties scanner'
|
61
55
|
task :update do
|
62
56
|
require 'regexp_property_values'
|
63
57
|
RegexpPropertyValues.update
|
64
|
-
dir = File.
|
58
|
+
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
65
59
|
|
66
60
|
require 'psych'
|
67
61
|
write_hash_to_file = ->(hash, path) do
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
@@ -2,6 +2,11 @@ module Regexp::Expression
|
|
2
2
|
module Backreference
|
3
3
|
class Base < Regexp::Expression::Base
|
4
4
|
attr_accessor :referenced_expression
|
5
|
+
|
6
|
+
def initialize_copy(orig)
|
7
|
+
self.referenced_expression = orig.referenced_expression.dup
|
8
|
+
super
|
9
|
+
end
|
5
10
|
end
|
6
11
|
|
7
12
|
class Number < Backreference::Base
|
File without changes
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -53,6 +58,11 @@ module Regexp::Expression
|
|
53
58
|
def to_s(format = :full)
|
54
59
|
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
55
60
|
end
|
61
|
+
|
62
|
+
def initialize_copy(orig)
|
63
|
+
self.referenced_expression = orig.referenced_expression.dup
|
64
|
+
super
|
65
|
+
end
|
56
66
|
end
|
57
67
|
end
|
58
68
|
end
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
|
-
raise
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -35,6 +35,11 @@ module Regexp::Expression
|
|
35
35
|
class Atomic < Group::Base; end
|
36
36
|
class Options < Group::Base
|
37
37
|
attr_accessor :option_changes
|
38
|
+
|
39
|
+
def initialize_copy(orig)
|
40
|
+
self.option_changes = orig.option_changes.dup
|
41
|
+
super
|
42
|
+
end
|
38
43
|
end
|
39
44
|
|
40
45
|
class Capture < Group::Base
|
@@ -53,7 +58,7 @@ module Regexp::Expression
|
|
53
58
|
super
|
54
59
|
end
|
55
60
|
|
56
|
-
def
|
61
|
+
def initialize_copy(orig)
|
57
62
|
@name = orig.name.dup
|
58
63
|
super
|
59
64
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -7,7 +6,7 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def name
|
10
|
-
text
|
9
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Quantifier
|
3
|
-
MODES = [
|
3
|
+
MODES = %i[greedy possessive reluctant]
|
4
4
|
|
5
5
|
attr_reader :token, :text, :min, :max, :mode
|
6
6
|
|
@@ -12,7 +12,7 @@ module Regexp::Expression
|
|
12
12
|
@max = max
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
15
|
+
def initialize_copy(orig)
|
16
16
|
@text = orig.text.dup
|
17
17
|
super
|
18
18
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -41,17 +40,11 @@ module Regexp::Expression
|
|
41
40
|
alias :ts :starts_at
|
42
41
|
|
43
42
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
44
|
-
|
45
|
-
target
|
46
|
-
|
47
|
-
target = expressions[offset -= 1]
|
48
|
-
end
|
49
|
-
|
50
|
-
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
51
|
-
'quantifier')
|
43
|
+
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
|
+
target or raise Regexp::Parser::Error,
|
45
|
+
"No valid target found for '#{text}' quantifier"
|
52
46
|
|
53
47
|
target.quantify(token, text, min, max, mode)
|
54
48
|
end
|
55
49
|
end
|
56
|
-
|
57
50
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
@@ -12,7 +11,7 @@ module Regexp::Expression
|
|
12
11
|
end
|
13
12
|
|
14
13
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
14
|
+
def initialize_copy(orig)
|
16
15
|
self.expressions = orig.expressions.map(&:clone)
|
17
16
|
super
|
18
17
|
end
|