regexp_parser 2.1.1 → 2.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +15 -21
- data/Rakefile +5 -11
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +1 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +0 -2
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +0 -1
- data/lib/regexp_parser/expression/subexpression.rb +0 -1
- data/lib/regexp_parser/expression.rb +6 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/scanner/properties/long.yml +13 -0
- data/lib/regexp_parser/scanner/properties/short.yml +9 -1
- data/lib/regexp_parser/syntax/any.rb +1 -3
- data/lib/regexp_parser/syntax/base.rb +9 -9
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens/backref.rb → token/backreference.rb} +6 -5
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/spec/lexer/nesting_spec.rb +2 -2
- data/spec/parser/escapes_spec.rb +43 -31
- data/spec/parser/properties_spec.rb +6 -4
- data/spec/parser/set/ranges_spec.rb +26 -16
- data/spec/scanner/escapes_spec.rb +28 -19
- data/spec/scanner/sets_spec.rb +9 -9
- data/spec/spec_helper.rb +13 -1
- data/spec/support/capturing_stderr.rb +9 -0
- data/spec/syntax/versions/1.8.6_spec.rb +2 -2
- data/spec/syntax/versions/2.0.0_spec.rb +2 -2
- data/spec/syntax/versions/aliases_spec.rb +1 -0
- metadata +26 -26
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/support/runner.rb +0 -42
- data/spec/support/warning_extractor.rb +0 -60
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 332259c898b9b344e10961053bb2b761f4dd5530182a5f6195639dba9cbb99f9
|
4
|
+
data.tar.gz: b537f9bd23db799ee562494633f1e8423501651540a04b634ae07dfe8f3b19c3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 393ecc1cc20189e4a79252e6acf6dab7dd6dc07ba9c47ae7479746eaf8ebe2ccfd1ebcb82fd027edc2c5c938eb490f2f36a93587d2405a54017e0e2727a35a15
|
7
|
+
data.tar.gz: 6c961232ce5f3f409c91d0b66dd23c809e92f47aa6c1f94f2f1929e8eeccfb4bc25fcdf5935fc968d7e0c0ae632992a6d38bc8e982858f2da996a8eac54d3c89
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- Added support for 13 new unicode properties introduced in Ruby 3.1.0-dev
|
8
|
+
|
3
9
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
10
|
|
5
11
|
### Fixed
|
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
360
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
361
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
362
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
363
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
364
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
365
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
366
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
367
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
368
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
369
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
370
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
371
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
374
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
375
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
376
|
|
377
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
|
+
|
374
381
|
##### Inapplicable Features
|
375
382
|
|
376
383
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
391
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
392
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
393
|
|
387
|
-
|
388
394
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
395
|
|
390
396
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
398
|
|
393
399
|
|
394
400
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
398
|
-
|
399
|
-
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
401
|
+
To run the tests simply run rake from the root directory.
|
406
402
|
|
407
|
-
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
403
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
404
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
405
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
406
|
|
413
407
|
```
|
414
|
-
rake ragel:rb &&
|
408
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
409
|
```
|
416
410
|
|
417
411
|
## Building
|
data/Rakefile
CHANGED
@@ -1,26 +1,22 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
5
|
-
|
6
|
-
require 'bundler'
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
6
|
+
require 'rspec/core/rake_task'
|
9
7
|
|
10
8
|
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
11
9
|
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
10
|
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
11
|
|
14
|
-
|
15
12
|
Bundler::GemHelper.install_tasks
|
16
13
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
15
|
|
18
16
|
task :default => [:'test:full']
|
19
17
|
|
20
18
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
19
|
+
task full: [:'ragel:rb', :spec]
|
24
20
|
end
|
25
21
|
|
26
22
|
namespace :ragel do
|
@@ -49,13 +45,11 @@ namespace :ragel do
|
|
49
45
|
end
|
50
46
|
end
|
51
47
|
|
52
|
-
|
53
48
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
49
|
# latest scanner code is generated and included in the build.
|
55
50
|
desc "Runs ragel:rb before building the gem"
|
56
51
|
task :build => ['ragel:rb']
|
57
52
|
|
58
|
-
|
59
53
|
namespace :props do
|
60
54
|
desc 'Write new property value hashes for the properties scanner'
|
61
55
|
task :update do
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
4
|
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -1,130 +1,6 @@
|
|
1
1
|
require 'regexp_parser/error'
|
2
2
|
|
3
|
-
|
4
|
-
class Base
|
5
|
-
attr_accessor :type, :token
|
6
|
-
attr_accessor :text, :ts
|
7
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
8
|
-
|
9
|
-
attr_accessor :quantifier
|
10
|
-
attr_accessor :options
|
11
|
-
|
12
|
-
def initialize(token, options = {})
|
13
|
-
self.type = token.type
|
14
|
-
self.token = token.token
|
15
|
-
self.text = token.text
|
16
|
-
self.ts = token.ts
|
17
|
-
self.level = token.level
|
18
|
-
self.set_level = token.set_level
|
19
|
-
self.conditional_level = token.conditional_level
|
20
|
-
self.nesting_level = 0
|
21
|
-
self.quantifier = nil
|
22
|
-
self.options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize_copy(orig)
|
26
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
27
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
28
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
29
|
-
super
|
30
|
-
end
|
31
|
-
|
32
|
-
def to_re(format = :full)
|
33
|
-
::Regexp.new(to_s(format))
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :starts_at :ts
|
37
|
-
|
38
|
-
def base_length
|
39
|
-
to_s(:base).length
|
40
|
-
end
|
41
|
-
|
42
|
-
def full_length
|
43
|
-
to_s.length
|
44
|
-
end
|
45
|
-
|
46
|
-
def offset
|
47
|
-
[starts_at, full_length]
|
48
|
-
end
|
49
|
-
|
50
|
-
def coded_offset
|
51
|
-
'@%d+%d' % offset
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_s(format = :full)
|
55
|
-
"#{text}#{quantifier_affix(format)}"
|
56
|
-
end
|
57
|
-
|
58
|
-
def quantifier_affix(expression_format)
|
59
|
-
quantifier.to_s if quantified? && expression_format != :base
|
60
|
-
end
|
61
|
-
|
62
|
-
def terminal?
|
63
|
-
!respond_to?(:expressions)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
67
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
68
|
-
end
|
69
|
-
|
70
|
-
def unquantified_clone
|
71
|
-
clone.tap { |exp| exp.quantifier = nil }
|
72
|
-
end
|
73
|
-
|
74
|
-
def quantified?
|
75
|
-
!quantifier.nil?
|
76
|
-
end
|
77
|
-
|
78
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
79
|
-
def quantity
|
80
|
-
return [nil,nil] unless quantified?
|
81
|
-
[quantifier.min, quantifier.max]
|
82
|
-
end
|
83
|
-
|
84
|
-
def repetitions
|
85
|
-
return 1..1 unless quantified?
|
86
|
-
min = quantifier.min
|
87
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
88
|
-
range = min..max
|
89
|
-
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
90
|
-
if RUBY_VERSION.to_f < 2.7
|
91
|
-
range.define_singleton_method(:minmax) { [min, max] }
|
92
|
-
end
|
93
|
-
range
|
94
|
-
end
|
95
|
-
|
96
|
-
def greedy?
|
97
|
-
quantified? and quantifier.greedy?
|
98
|
-
end
|
99
|
-
|
100
|
-
def reluctant?
|
101
|
-
quantified? and quantifier.reluctant?
|
102
|
-
end
|
103
|
-
alias :lazy? :reluctant?
|
104
|
-
|
105
|
-
def possessive?
|
106
|
-
quantified? and quantifier.possessive?
|
107
|
-
end
|
108
|
-
|
109
|
-
def attributes
|
110
|
-
{
|
111
|
-
type: type,
|
112
|
-
token: token,
|
113
|
-
text: to_s(:base),
|
114
|
-
starts_at: ts,
|
115
|
-
length: full_length,
|
116
|
-
level: level,
|
117
|
-
set_level: set_level,
|
118
|
-
conditional_level: conditional_level,
|
119
|
-
options: options,
|
120
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
121
|
-
}
|
122
|
-
end
|
123
|
-
alias :to_h :attributes
|
124
|
-
end
|
125
|
-
|
126
|
-
end # module Regexp::Expression
|
127
|
-
|
3
|
+
require 'regexp_parser/expression/base'
|
128
4
|
require 'regexp_parser/expression/quantifier'
|
129
5
|
require 'regexp_parser/expression/subexpression'
|
130
6
|
require 'regexp_parser/expression/sequence'
|
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
|
|
132
8
|
|
133
9
|
require 'regexp_parser/expression/classes/alternation'
|
134
10
|
require 'regexp_parser/expression/classes/anchor'
|
135
|
-
require 'regexp_parser/expression/classes/
|
11
|
+
require 'regexp_parser/expression/classes/backreference'
|
12
|
+
require 'regexp_parser/expression/classes/character_set'
|
13
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
136
15
|
require 'regexp_parser/expression/classes/conditional'
|
137
|
-
require 'regexp_parser/expression/classes/
|
16
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
138
17
|
require 'regexp_parser/expression/classes/free_space'
|
139
18
|
require 'regexp_parser/expression/classes/group'
|
140
19
|
require 'regexp_parser/expression/classes/keep'
|
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
|
|
142
21
|
require 'regexp_parser/expression/classes/posix_class'
|
143
22
|
require 'regexp_parser/expression/classes/property'
|
144
23
|
require 'regexp_parser/expression/classes/root'
|
145
|
-
require 'regexp_parser/expression/classes/set'
|
146
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
147
|
-
require 'regexp_parser/expression/classes/set/range'
|
148
24
|
require 'regexp_parser/expression/classes/type'
|
149
25
|
|
150
26
|
require 'regexp_parser/expression/methods/match'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,12 +4,14 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
|
+
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
13
15
|
|
14
16
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
15
17
|
new.lex(input, syntax, options: options, &block)
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|
@@ -8,6 +8,7 @@ age=10.0: age=10.0
|
|
8
8
|
age=11.0: age=11.0
|
9
9
|
age=12.0: age=12.0
|
10
10
|
age=12.1: age=12.1
|
11
|
+
age=13.0: age=13.0
|
11
12
|
age=2.0: age=2.0
|
12
13
|
age=2.1: age=2.1
|
13
14
|
age=3.0: age=3.0
|
@@ -64,6 +65,7 @@ changeswhenlowercased: changes_when_lowercased
|
|
64
65
|
changeswhentitlecased: changes_when_titlecased
|
65
66
|
changeswhenuppercased: changes_when_uppercased
|
66
67
|
cherokee: cherokee
|
68
|
+
chorasmian: chorasmian
|
67
69
|
closepunctuation: close_punctuation
|
68
70
|
cntrl: cntrl
|
69
71
|
common: common
|
@@ -83,6 +85,7 @@ deseret: deseret
|
|
83
85
|
devanagari: devanagari
|
84
86
|
diacritic: diacritic
|
85
87
|
digit: digit
|
88
|
+
divesakuru: dives_akuru
|
86
89
|
dogra: dogra
|
87
90
|
duployan: duployan
|
88
91
|
egyptianhieroglyphs: egyptian_hieroglyphs
|
@@ -167,6 +170,7 @@ incham: in_cham
|
|
167
170
|
incherokee: in_cherokee
|
168
171
|
incherokeesupplement: in_cherokee_supplement
|
169
172
|
inchesssymbols: in_chess_symbols
|
173
|
+
inchorasmian: in_chorasmian
|
170
174
|
incjkcompatibility: in_cjk_compatibility
|
171
175
|
incjkcompatibilityforms: in_cjk_compatibility_forms
|
172
176
|
incjkcompatibilityideographs: in_cjk_compatibility_ideographs
|
@@ -181,6 +185,7 @@ incjkunifiedideographsextensionc: in_cjk_unified_ideographs_extension_c
|
|
181
185
|
incjkunifiedideographsextensiond: in_cjk_unified_ideographs_extension_d
|
182
186
|
incjkunifiedideographsextensione: in_cjk_unified_ideographs_extension_e
|
183
187
|
incjkunifiedideographsextensionf: in_cjk_unified_ideographs_extension_f
|
188
|
+
incjkunifiedideographsextensiong: in_cjk_unified_ideographs_extension_g
|
184
189
|
incombiningdiacriticalmarks: in_combining_diacritical_marks
|
185
190
|
incombiningdiacriticalmarksextended: in_combining_diacritical_marks_extended
|
186
191
|
incombiningdiacriticalmarksforsymbols: in_combining_diacritical_marks_for_symbols
|
@@ -204,6 +209,7 @@ indeseret: in_deseret
|
|
204
209
|
indevanagari: in_devanagari
|
205
210
|
indevanagariextended: in_devanagari_extended
|
206
211
|
indingbats: in_dingbats
|
212
|
+
indivesakuru: in_dives_akuru
|
207
213
|
indogra: in_dogra
|
208
214
|
indominotiles: in_domino_tiles
|
209
215
|
induployan: in_duployan
|
@@ -269,6 +275,7 @@ inkatakana: in_katakana
|
|
269
275
|
inkatakanaphoneticextensions: in_katakana_phonetic_extensions
|
270
276
|
inkayahli: in_kayah_li
|
271
277
|
inkharoshthi: in_kharoshthi
|
278
|
+
inkhitansmallscript: in_khitan_small_script
|
272
279
|
inkhmer: in_khmer
|
273
280
|
inkhmersymbols: in_khmer_symbols
|
274
281
|
inkhojki: in_khojki
|
@@ -288,6 +295,7 @@ inlineara: in_linear_a
|
|
288
295
|
inlinearbideograms: in_linear_b_ideograms
|
289
296
|
inlinearbsyllabary: in_linear_b_syllabary
|
290
297
|
inlisu: in_lisu
|
298
|
+
inlisusupplement: in_lisu_supplement
|
291
299
|
inlowsurrogates: in_low_surrogates
|
292
300
|
inlycian: in_lycian
|
293
301
|
inlydian: in_lydian
|
@@ -395,6 +403,7 @@ insupplementaryprivateuseareab: in_supplementary_private_use_area_b
|
|
395
403
|
insuttonsignwriting: in_sutton_signwriting
|
396
404
|
insylotinagri: in_syloti_nagri
|
397
405
|
insymbolsandpictographsextendeda: in_symbols_and_pictographs_extended_a
|
406
|
+
insymbolsforlegacycomputing: in_symbols_for_legacy_computing
|
398
407
|
insyriac: in_syriac
|
399
408
|
insyriacsupplement: in_syriac_supplement
|
400
409
|
intagalog: in_tagalog
|
@@ -409,6 +418,7 @@ intamil: in_tamil
|
|
409
418
|
intamilsupplement: in_tamil_supplement
|
410
419
|
intangut: in_tangut
|
411
420
|
intangutcomponents: in_tangut_components
|
421
|
+
intangutsupplement: in_tangut_supplement
|
412
422
|
intelugu: in_telugu
|
413
423
|
inthaana: in_thaana
|
414
424
|
inthai: in_thai
|
@@ -426,6 +436,7 @@ invedicextensions: in_vedic_extensions
|
|
426
436
|
inverticalforms: in_vertical_forms
|
427
437
|
inwancho: in_wancho
|
428
438
|
inwarangciti: in_warang_citi
|
439
|
+
inyezidi: in_yezidi
|
429
440
|
inyijinghexagramsymbols: in_yijing_hexagram_symbols
|
430
441
|
inyiradicals: in_yi_radicals
|
431
442
|
inyisyllables: in_yi_syllables
|
@@ -437,6 +448,7 @@ kannada: kannada
|
|
437
448
|
katakana: katakana
|
438
449
|
kayahli: kayah_li
|
439
450
|
kharoshthi: kharoshthi
|
451
|
+
khitansmallscript: khitan_small_script
|
440
452
|
khmer: khmer
|
441
453
|
khojki: khojki
|
442
454
|
khudawadi: khudawadi
|
@@ -590,5 +602,6 @@ xdigit: xdigit
|
|
590
602
|
xidcontinue: xid_continue
|
591
603
|
xidstart: xid_start
|
592
604
|
xposixpunct: xposixpunct
|
605
|
+
yezidi: yezidi
|
593
606
|
yi: yi
|
594
607
|
zanabazarsquare: zanabazar_square
|