regexp_parser 2.1.1 → 2.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +15 -21
- data/Rakefile +5 -11
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +0 -2
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +0 -1
- data/lib/regexp_parser/expression/subexpression.rb +0 -1
- data/lib/regexp_parser/expression.rb +6 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/syntax/any.rb +1 -3
- data/lib/regexp_parser/syntax/base.rb +9 -9
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +28 -19
- data/spec/scanner/sets_spec.rb +9 -9
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +26 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
|
4
|
+
data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
|
7
|
+
data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
|
8
|
+
|
3
9
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
10
|
|
5
11
|
### Fixed
|
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
360
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
361
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
362
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
363
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
364
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
365
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
366
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
367
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
368
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
369
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
370
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
371
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
374
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
375
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
376
|
|
377
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
|
+
|
374
381
|
##### Inapplicable Features
|
375
382
|
|
376
383
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
391
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
392
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
393
|
|
387
|
-
|
388
394
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
395
|
|
390
396
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
398
|
|
393
399
|
|
394
400
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
398
|
-
|
399
|
-
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
401
|
+
To run the tests simply run rake from the root directory.
|
406
402
|
|
407
|
-
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
403
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
404
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
405
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
406
|
|
413
407
|
```
|
414
|
-
rake ragel:rb &&
|
408
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
409
|
```
|
416
410
|
|
417
411
|
## Building
|
data/Rakefile
CHANGED
@@ -1,26 +1,22 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
5
|
-
|
6
|
-
require 'bundler'
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
6
|
+
require 'rspec/core/rake_task'
|
9
7
|
|
10
8
|
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
11
9
|
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
10
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
11
|
|
14
|
-
|
15
12
|
Bundler::GemHelper.install_tasks
|
16
13
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
15
|
|
18
16
|
task :default => [:'test:full']
|
19
17
|
|
20
18
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
19
|
+
task full: [:'ragel:rb', :spec]
|
24
20
|
end
|
25
21
|
|
26
22
|
namespace :ragel do
|
@@ -49,13 +45,11 @@ namespace :ragel do
|
|
49
45
|
end
|
50
46
|
end
|
51
47
|
|
52
|
-
|
53
48
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
49
|
# latest scanner code is generated and included in the build.
|
55
50
|
desc "Runs ragel:rb before building the gem"
|
56
51
|
task :build => ['ragel:rb']
|
57
52
|
|
58
|
-
|
59
53
|
namespace :props do
|
60
54
|
desc 'Write new property value hashes for the properties scanner'
|
61
55
|
task :update do
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
4
|
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -1,130 +1,6 @@
|
|
1
1
|
require 'regexp_parser/error'
|
2
2
|
|
3
|
-
|
4
|
-
class Base
|
5
|
-
attr_accessor :type, :token
|
6
|
-
attr_accessor :text, :ts
|
7
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
8
|
-
|
9
|
-
attr_accessor :quantifier
|
10
|
-
attr_accessor :options
|
11
|
-
|
12
|
-
def initialize(token, options = {})
|
13
|
-
self.type = token.type
|
14
|
-
self.token = token.token
|
15
|
-
self.text = token.text
|
16
|
-
self.ts = token.ts
|
17
|
-
self.level = token.level
|
18
|
-
self.set_level = token.set_level
|
19
|
-
self.conditional_level = token.conditional_level
|
20
|
-
self.nesting_level = 0
|
21
|
-
self.quantifier = nil
|
22
|
-
self.options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize_copy(orig)
|
26
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
27
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
28
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
29
|
-
super
|
30
|
-
end
|
31
|
-
|
32
|
-
def to_re(format = :full)
|
33
|
-
::Regexp.new(to_s(format))
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :starts_at :ts
|
37
|
-
|
38
|
-
def base_length
|
39
|
-
to_s(:base).length
|
40
|
-
end
|
41
|
-
|
42
|
-
def full_length
|
43
|
-
to_s.length
|
44
|
-
end
|
45
|
-
|
46
|
-
def offset
|
47
|
-
[starts_at, full_length]
|
48
|
-
end
|
49
|
-
|
50
|
-
def coded_offset
|
51
|
-
'@%d+%d' % offset
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_s(format = :full)
|
55
|
-
"#{text}#{quantifier_affix(format)}"
|
56
|
-
end
|
57
|
-
|
58
|
-
def quantifier_affix(expression_format)
|
59
|
-
quantifier.to_s if quantified? && expression_format != :base
|
60
|
-
end
|
61
|
-
|
62
|
-
def terminal?
|
63
|
-
!respond_to?(:expressions)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
67
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
68
|
-
end
|
69
|
-
|
70
|
-
def unquantified_clone
|
71
|
-
clone.tap { |exp| exp.quantifier = nil }
|
72
|
-
end
|
73
|
-
|
74
|
-
def quantified?
|
75
|
-
!quantifier.nil?
|
76
|
-
end
|
77
|
-
|
78
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
79
|
-
def quantity
|
80
|
-
return [nil,nil] unless quantified?
|
81
|
-
[quantifier.min, quantifier.max]
|
82
|
-
end
|
83
|
-
|
84
|
-
def repetitions
|
85
|
-
return 1..1 unless quantified?
|
86
|
-
min = quantifier.min
|
87
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
88
|
-
range = min..max
|
89
|
-
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
90
|
-
if RUBY_VERSION.to_f < 2.7
|
91
|
-
range.define_singleton_method(:minmax) { [min, max] }
|
92
|
-
end
|
93
|
-
range
|
94
|
-
end
|
95
|
-
|
96
|
-
def greedy?
|
97
|
-
quantified? and quantifier.greedy?
|
98
|
-
end
|
99
|
-
|
100
|
-
def reluctant?
|
101
|
-
quantified? and quantifier.reluctant?
|
102
|
-
end
|
103
|
-
alias :lazy? :reluctant?
|
104
|
-
|
105
|
-
def possessive?
|
106
|
-
quantified? and quantifier.possessive?
|
107
|
-
end
|
108
|
-
|
109
|
-
def attributes
|
110
|
-
{
|
111
|
-
type: type,
|
112
|
-
token: token,
|
113
|
-
text: to_s(:base),
|
114
|
-
starts_at: ts,
|
115
|
-
length: full_length,
|
116
|
-
level: level,
|
117
|
-
set_level: set_level,
|
118
|
-
conditional_level: conditional_level,
|
119
|
-
options: options,
|
120
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
121
|
-
}
|
122
|
-
end
|
123
|
-
alias :to_h :attributes
|
124
|
-
end
|
125
|
-
|
126
|
-
end # module Regexp::Expression
|
127
|
-
|
3
|
+
require 'regexp_parser/expression/base'
|
128
4
|
require 'regexp_parser/expression/quantifier'
|
129
5
|
require 'regexp_parser/expression/subexpression'
|
130
6
|
require 'regexp_parser/expression/sequence'
|
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
|
|
132
8
|
|
133
9
|
require 'regexp_parser/expression/classes/alternation'
|
134
10
|
require 'regexp_parser/expression/classes/anchor'
|
135
|
-
require 'regexp_parser/expression/classes/
|
11
|
+
require 'regexp_parser/expression/classes/backreference'
|
12
|
+
require 'regexp_parser/expression/classes/character_set'
|
13
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
136
15
|
require 'regexp_parser/expression/classes/conditional'
|
137
|
-
require 'regexp_parser/expression/classes/
|
16
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
138
17
|
require 'regexp_parser/expression/classes/free_space'
|
139
18
|
require 'regexp_parser/expression/classes/group'
|
140
19
|
require 'regexp_parser/expression/classes/keep'
|
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
|
|
142
21
|
require 'regexp_parser/expression/classes/posix_class'
|
143
22
|
require 'regexp_parser/expression/classes/property'
|
144
23
|
require 'regexp_parser/expression/classes/root'
|
145
|
-
require 'regexp_parser/expression/classes/set'
|
146
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
147
|
-
require 'regexp_parser/expression/classes/set/range'
|
148
24
|
require 'regexp_parser/expression/classes/type'
|
149
25
|
|
150
26
|
require 'regexp_parser/expression/methods/match'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,12 +4,14 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
|
+
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
13
15
|
|
14
16
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
15
17
|
new.lex(input, syntax, options: options, &block)
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|
@@ -8,6 +8,7 @@ age=10.0: age=10.0
|
|
8
8
|
age=11.0: age=11.0
|
9
9
|
age=12.0: age=12.0
|
10
10
|
age=12.1: age=12.1
|
11
|
+
age=13.0: age=13.0
|
11
12
|
age=2.0: age=2.0
|
12
13
|
age=2.1: age=2.1
|
13
14
|
age=3.0: age=3.0
|
@@ -64,6 +65,7 @@ changeswhenlowercased: changes_when_lowercased
|
|
64
65
|
changeswhentitlecased: changes_when_titlecased
|
65
66
|
changeswhenuppercased: changes_when_uppercased
|
66
67
|
cherokee: cherokee
|
68
|
+
chorasmian: chorasmian
|
67
69
|
closepunctuation: close_punctuation
|
68
70
|
cntrl: cntrl
|
69
71
|
common: common
|
@@ -83,6 +85,7 @@ deseret: deseret
|
|
83
85
|
devanagari: devanagari
|
84
86
|
diacritic: diacritic
|
85
87
|
digit: digit
|
88
|
+
divesakuru: dives_akuru
|
86
89
|
dogra: dogra
|
87
90
|
duployan: duployan
|
88
91
|
egyptianhieroglyphs: egyptian_hieroglyphs
|
@@ -167,6 +170,7 @@ incham: in_cham
|
|
167
170
|
incherokee: in_cherokee
|
168
171
|
incherokeesupplement: in_cherokee_supplement
|
169
172
|
inchesssymbols: in_chess_symbols
|
173
|
+
inchorasmian: in_chorasmian
|
170
174
|
incjkcompatibility: in_cjk_compatibility
|
171
175
|
incjkcompatibilityforms: in_cjk_compatibility_forms
|
172
176
|
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
@@ -181,6 +185,7 @@ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
|
|
181
185
|
incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
|
182
186
|
incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
|
183
187
|
incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
|
188
|
+
incjkunifiedideographsextensiong: in_cjk_unified_ideographs_extension_g
|
184
189
|
incombiningdiacriticalmarks: in_combining_diacritical_marks
|
185
190
|
incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
|
186
191
|
incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
|
@@ -204,6 +209,7 @@ indeseret: in_deseret
|
|
204
209
|
indevanagari: in_devanagari
|
205
210
|
indevanagariextended: in_devanagari_extended
|
206
211
|
indingbats: in_dingbats
|
212
|
+
indivesakuru: in_dives_akuru
|
207
213
|
indogra: in_dogra
|
208
214
|
indominotiles: in_domino_tiles
|
209
215
|
induployan: in_duployan
|
@@ -269,6 +275,7 @@ inkatakana: in_katakana
|
|
269
275
|
inkatakanaphoneticextensions: in_katakana_phonetic_extensions
|
270
276
|
inkayahli: in_kayah_li
|
271
277
|
inkharoshthi: in_kharoshthi
|
278
|
+
inkhitansmallscript: in_khitan_small_script
|
272
279
|
inkhmer: in_khmer
|
273
280
|
inkhmersymbols: in_khmer_symbols
|
274
281
|
inkhojki: in_khojki
|
@@ -288,6 +295,7 @@ inlineara: in_linear_a
|
|
288
295
|
inlinearbideograms: in_linear_b_ideograms
|
289
296
|
inlinearbsyllabary: in_linear_b_syllabary
|
290
297
|
inlisu: in_lisu
|
298
|
+
inlisusupplement: in_lisu_supplement
|
291
299
|
inlowsurrogates: in_low_surrogates
|
292
300
|
inlycian: in_lycian
|
293
301
|
inlydian: in_lydian
|
@@ -395,6 +403,7 @@ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
|
|
395
403
|
insuttonsignwriting: in_sutton_signwriting
|
396
404
|
insylotinagri: in_syloti_nagri
|
397
405
|
insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
|
406
|
+
insymbolsforlegacycomputing: in_symbols_for_legacy_computing
|
398
407
|
insyriac: in_syriac
|
399
408
|
insyriacsupplement: in_syriac_supplement
|
400
409
|
intagalog: in_tagalog
|
@@ -409,6 +418,7 @@ intamil: in_tamil
|
|
409
418
|
intamilsupplement: in_tamil_supplement
|
410
419
|
intangut: in_tangut
|
411
420
|
intangutcomponents: in_tangut_components
|
421
|
+
intangutsupplement: in_tangut_supplement
|
412
422
|
intelugu: in_telugu
|
413
423
|
inthaana: in_thaana
|
414
424
|
inthai: in_thai
|
@@ -426,6 +436,7 @@ invedicextensions: in_vedic_extensions
|
|
426
436
|
inverticalforms: in_vertical_forms
|
427
437
|
inwancho: in_wancho
|
428
438
|
inwarangciti: in_warang_citi
|
439
|
+
inyezidi: in_yezidi
|
429
440
|
inyijinghexagramsymbols: in_yijing_hexagram_symbols
|
430
441
|
inyiradicals: in_yi_radicals
|
431
442
|
inyisyllables: in_yi_syllables
|
@@ -437,6 +448,7 @@ kannada: kannada
|
|
437
448
|
katakana: katakana
|
438
449
|
kayahli: kayah_li
|
439
450
|
kharoshthi: kharoshthi
|
451
|
+
khitansmallscript: khitan_small_script
|
440
452
|
khmer: khmer
|
441
453
|
khojki: khojki
|
442
454
|
khudawadi: khudawadi
|
@@ -590,5 +602,6 @@ xdigit: xdigit
|
|
590
602
|
xidcontinue: xid_continue
|
591
603
|
xidstart: xid_start
|
592
604
|
xposixpunct: xposixpunct
|
605
|
+
yezidi: yezidi
|
593
606
|
yi: yi
|
594
607
|
zanabazarsquare: zanabazar_square
|