regexp_parser 2.0.2 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +54 -0
- data/Gemfile +5 -1
- data/README.md +15 -21
- data/Rakefile +11 -17
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +5 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +2 -1
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -1
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +1 -3
- data/lib/regexp_parser/expression/classes/group.rb +6 -1
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +1 -3
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +2 -2
- data/lib/regexp_parser/expression/sequence.rb +3 -10
- data/lib/regexp_parser/expression/subexpression.rb +1 -2
- data/lib/regexp_parser/expression.rb +7 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/parser.rb +282 -334
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/scanner/scanner.rl +64 -87
- data/lib/regexp_parser/scanner.rb +1024 -1073
- data/lib/regexp_parser/syntax/any.rb +2 -4
- data/lib/regexp_parser/syntax/base.rb +10 -10
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/expression/clone_spec.rb +36 -4
- data/spec/expression/free_space_spec.rb +2 -2
- data/spec/expression/methods/match_length_spec.rb +2 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/lexer/refcalls_spec.rb +5 -0
- data/spec/parser/all_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/refcalls_spec.rb +5 -0
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +29 -20
- data/spec/scanner/refcalls_spec.rb +19 -0
- data/spec/scanner/sets_spec.rb +66 -23
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +27 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
|
4
|
+
data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
|
7
|
+
data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,59 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
|
8
|
+
|
9
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
10
|
+
|
11
|
+
### Fixed
|
12
|
+
|
13
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
14
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
15
|
+
|
16
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
17
|
+
|
18
|
+
### Added
|
19
|
+
|
20
|
+
- common ancestor for all scanning/parsing/lexing errors
|
21
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
22
|
+
* the following errors (and their many descendants) now inherit from it:
|
23
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
24
|
+
- `Regexp::Parser::ParserError`
|
25
|
+
- `Regexp::Scanner::ScannerError`
|
26
|
+
- `Regexp::Scanner::ValidationError`
|
27
|
+
- `Regexp::Syntax::SyntaxError`
|
28
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
29
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
30
|
+
|
31
|
+
### Fixed
|
32
|
+
|
33
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
34
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
35
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
36
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
37
|
+
- fixed tokenization of some escapes in character sets
|
38
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
39
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
40
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
41
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
42
|
+
- fixed handling of control/metacontrol escapes in character sets
|
43
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
44
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
45
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
46
|
+
|
47
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
|
+
|
49
|
+
### Fixed
|
50
|
+
|
51
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
52
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
53
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
54
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
55
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
56
|
+
|
3
57
|
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
58
|
|
5
59
|
### Fixed
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
360
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
361
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
362
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
363
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
364
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
365
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
366
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
367
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
368
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
369
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
370
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
371
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
374
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
375
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
376
|
|
377
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
|
+
|
374
381
|
##### Inapplicable Features
|
375
382
|
|
376
383
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
391
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
392
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
393
|
|
387
|
-
|
388
394
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
395
|
|
390
396
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
398
|
|
393
399
|
|
394
400
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
398
|
-
|
399
|
-
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
401
|
+
To run the tests simply run rake from the root directory.
|
406
402
|
|
407
|
-
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
403
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
404
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
405
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
406
|
|
413
407
|
```
|
414
|
-
rake ragel:rb &&
|
408
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
409
|
```
|
416
410
|
|
417
411
|
## Building
|
data/Rakefile
CHANGED
@@ -1,35 +1,31 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
5
7
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
|
11
|
-
RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
|
8
|
+
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
9
|
+
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
10
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
11
|
|
14
|
-
|
15
12
|
Bundler::GemHelper.install_tasks
|
16
13
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
15
|
|
18
16
|
task :default => [:'test:full']
|
19
17
|
|
20
18
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
19
|
+
task full: [:'ragel:rb', :spec]
|
24
20
|
end
|
25
21
|
|
26
22
|
namespace :ragel do
|
27
23
|
desc "Process the ragel source files and output ruby code"
|
28
24
|
task :rb do
|
29
|
-
RAGEL_SOURCE_FILES.each do |
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{
|
25
|
+
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
+
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
31
27
|
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{
|
28
|
+
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
33
29
|
|
34
30
|
contents = File.read(output_file)
|
35
31
|
|
@@ -49,19 +45,17 @@ namespace :ragel do
|
|
49
45
|
end
|
50
46
|
end
|
51
47
|
|
52
|
-
|
53
48
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
49
|
# latest scanner code is generated and included in the build.
|
55
50
|
desc "Runs ragel:rb before building the gem"
|
56
51
|
task :build => ['ragel:rb']
|
57
52
|
|
58
|
-
|
59
53
|
namespace :props do
|
60
54
|
desc 'Write new property value hashes for the properties scanner'
|
61
55
|
task :update do
|
62
56
|
require 'regexp_property_values'
|
63
57
|
RegexpPropertyValues.update
|
64
|
-
dir = File.
|
58
|
+
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
65
59
|
|
66
60
|
require 'psych'
|
67
61
|
write_hash_to_file = ->(hash, path) do
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
@@ -2,6 +2,11 @@ module Regexp::Expression
|
|
2
2
|
module Backreference
|
3
3
|
class Base < Regexp::Expression::Base
|
4
4
|
attr_accessor :referenced_expression
|
5
|
+
|
6
|
+
def initialize_copy(orig)
|
7
|
+
self.referenced_expression = orig.referenced_expression.dup
|
8
|
+
super
|
9
|
+
end
|
5
10
|
end
|
6
11
|
|
7
12
|
class Number < Backreference::Base
|
File without changes
|
File without changes
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -53,6 +58,11 @@ module Regexp::Expression
|
|
53
58
|
def to_s(format = :full)
|
54
59
|
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
55
60
|
end
|
61
|
+
|
62
|
+
def initialize_copy(orig)
|
63
|
+
self.referenced_expression = orig.referenced_expression.dup
|
64
|
+
super
|
65
|
+
end
|
56
66
|
end
|
57
67
|
end
|
58
68
|
end
|
@@ -1,8 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
|
-
raise
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -35,6 +35,11 @@ module Regexp::Expression
|
|
35
35
|
class Atomic < Group::Base; end
|
36
36
|
class Options < Group::Base
|
37
37
|
attr_accessor :option_changes
|
38
|
+
|
39
|
+
def initialize_copy(orig)
|
40
|
+
self.option_changes = orig.option_changes.dup
|
41
|
+
super
|
42
|
+
end
|
38
43
|
end
|
39
44
|
|
40
45
|
class Capture < Group::Base
|
@@ -53,7 +58,7 @@ module Regexp::Expression
|
|
53
58
|
super
|
54
59
|
end
|
55
60
|
|
56
|
-
def
|
61
|
+
def initialize_copy(orig)
|
57
62
|
@name = orig.name.dup
|
58
63
|
super
|
59
64
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -7,7 +6,7 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def name
|
10
|
-
text
|
9
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class Quantifier
|
3
|
-
MODES = [
|
3
|
+
MODES = %i[greedy possessive reluctant]
|
4
4
|
|
5
5
|
attr_reader :token, :text, :min, :max, :mode
|
6
6
|
|
@@ -12,7 +12,7 @@ module Regexp::Expression
|
|
12
12
|
@max = max
|
13
13
|
end
|
14
14
|
|
15
|
-
def
|
15
|
+
def initialize_copy(orig)
|
16
16
|
@text = orig.text.dup
|
17
17
|
super
|
18
18
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
# A sequence of expressions. Differs from a Subexpressions by how it handles
|
4
3
|
# quantifiers, as it applies them to its last element instead of itself as
|
5
4
|
# a whole subexpression.
|
@@ -41,17 +40,11 @@ module Regexp::Expression
|
|
41
40
|
alias :ts :starts_at
|
42
41
|
|
43
42
|
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
44
|
-
|
45
|
-
target
|
46
|
-
|
47
|
-
target = expressions[offset -= 1]
|
48
|
-
end
|
49
|
-
|
50
|
-
target || raise(ArgumentError, "No valid target found for '#{text}' "\
|
51
|
-
'quantifier')
|
43
|
+
target = expressions.reverse.find { |exp| !exp.is_a?(FreeSpace) }
|
44
|
+
target or raise Regexp::Parser::Error,
|
45
|
+
"No valid target found for '#{text}' quantifier"
|
52
46
|
|
53
47
|
target.quantify(token, text, min, max, mode)
|
54
48
|
end
|
55
49
|
end
|
56
|
-
|
57
50
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Subexpression < Regexp::Expression::Base
|
4
3
|
include Enumerable
|
5
4
|
|
@@ -12,7 +11,7 @@ module Regexp::Expression
|
|
12
11
|
end
|
13
12
|
|
14
13
|
# Override base method to clone the expressions as well.
|
15
|
-
def
|
14
|
+
def initialize_copy(orig)
|
16
15
|
self.expressions = orig.expressions.map(&:clone)
|
17
16
|
super
|
18
17
|
end
|