regexp_parser 2.1.1 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +19 -1
- data/LICENSE +1 -1
- data/README.md +17 -23
- data/Rakefile +10 -19
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +0 -2
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +0 -1
- data/lib/regexp_parser/expression/subexpression.rb +0 -1
- data/lib/regexp_parser/expression.rb +6 -130
- data/lib/regexp_parser/lexer.rb +7 -5
- data/lib/regexp_parser/scanner/properties/long.csv +604 -0
- data/lib/regexp_parser/scanner/properties/short.csv +242 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -4
- data/lib/regexp_parser/scanner.rb +126 -124
- data/lib/regexp_parser/syntax/any.rb +1 -3
- data/lib/regexp_parser/syntax/base.rb +12 -14
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +696 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -2
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +1 -1
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +10 -0
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +32 -164
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 381a794200168f95ff6329cc8a01330d21a05e02b75e0b06dcc6bd8f763c111d
|
4
|
+
data.tar.gz: bd7617cb3763e6d759c8e1364aed037ae2fff85af3cf28823476cadd14ff080e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0a039012013e9b57329fd685aaf29386d8b848071e514f59df0acc3437a1dae5c76b6bf94158cc3deece08f3a1fec9437ac84590d97f8590d8dcee1e0dc6c726
|
7
|
+
data.tar.gz: 4d67da41fbef9b9336ccfd02e3a742286bf4ef96d469c8aa2bbb9a6a55ed4aa6027a28b10ba6c9993b15937e3fe51a349632bcf5808f6237cf77a1d29ceb74f2
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,22 @@
|
|
1
|
-
## [
|
1
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
2
|
+
|
3
|
+
### Fixed
|
4
|
+
|
5
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
6
|
+
- the lexer accepted them for any Ruby version
|
7
|
+
- now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
8
|
+
- reduced gem size by excluding specs from package
|
9
|
+
- removed deprecated `test_files` gemspec setting
|
10
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
11
|
+
- no longer depend on `set`
|
12
|
+
- `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
13
|
+
- this made it a hidden/undeclared dependency of `regexp_parser`
|
14
|
+
|
15
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
16
|
+
|
17
|
+
### Added
|
18
|
+
|
19
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
2
20
|
|
3
21
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
22
|
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -357,12 +360,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
360
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
361
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
362
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
363
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
364
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
365
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
366
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
367
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
368
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
369
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
370
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
371
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +374,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
374
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
375
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
376
|
|
377
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
378
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
379
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
380
|
+
|
374
381
|
##### Inapplicable Features
|
375
382
|
|
376
383
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +391,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
391
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
392
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
393
|
|
387
|
-
|
388
394
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
395
|
|
390
396
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +398,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
398
|
|
393
399
|
|
394
400
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
398
|
-
|
399
|
-
The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
401
|
+
To run the tests simply run rake from the root directory.
|
406
402
|
|
407
|
-
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
403
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
404
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
405
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
406
|
|
413
407
|
```
|
414
|
-
rake ragel:rb &&
|
408
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
409
|
```
|
416
410
|
|
417
411
|
## Building
|
@@ -443,7 +437,7 @@ Projects using regexp_parser.
|
|
443
437
|
|
444
438
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
445
439
|
|
446
|
-
- [mutant](https://github.com/mbj/mutant)
|
440
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
447
441
|
|
448
442
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
449
443
|
|
@@ -476,4 +470,4 @@ Documentation and books used while working on this project.
|
|
476
470
|
|
477
471
|
---
|
478
472
|
##### Copyright
|
479
|
-
_Copyright (c) 2010-
|
473
|
+
_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
|
data/Rakefile
CHANGED
@@ -1,26 +1,22 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
5
|
-
|
6
|
-
require 'bundler'
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
6
|
+
require 'rspec/core/rake_task'
|
9
7
|
|
10
8
|
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
11
9
|
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
|
-
RAGEL_SOURCE_FILES = %w
|
13
|
-
|
10
|
+
RAGEL_SOURCE_FILES = %w[scanner] # scanner.rl imports the other files
|
14
11
|
|
15
12
|
Bundler::GemHelper.install_tasks
|
16
13
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
15
|
|
18
16
|
task :default => [:'test:full']
|
19
17
|
|
20
18
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
19
|
+
task full: [:'ragel:rb', :spec]
|
24
20
|
end
|
25
21
|
|
26
22
|
namespace :ragel do
|
@@ -49,13 +45,11 @@ namespace :ragel do
|
|
49
45
|
end
|
50
46
|
end
|
51
47
|
|
52
|
-
|
53
48
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
49
|
# latest scanner code is generated and included in the build.
|
55
50
|
desc "Runs ragel:rb before building the gem"
|
56
51
|
task :build => ['ragel:rb']
|
57
52
|
|
58
|
-
|
59
53
|
namespace :props do
|
60
54
|
desc 'Write new property value hashes for the properties scanner'
|
61
55
|
task :update do
|
@@ -63,13 +57,10 @@ namespace :props do
|
|
63
57
|
RegexpPropertyValues.update
|
64
58
|
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
65
59
|
|
66
|
-
require 'psych'
|
67
60
|
write_hash_to_file = ->(hash, path) do
|
68
61
|
File.open(path, 'w') do |f|
|
69
|
-
f.puts
|
70
|
-
|
71
|
-
'#',
|
72
|
-
hash.sort.to_h.to_yaml
|
62
|
+
f.puts "# THIS FILE IS AUTO-GENERATED BY `rake props:update` - DO NOT EDIT",
|
63
|
+
*hash.sort.map { |pair| pair.join(',') }
|
73
64
|
end
|
74
65
|
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
66
|
end
|
@@ -77,11 +68,11 @@ namespace :props do
|
|
77
68
|
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
78
69
|
[val.identifier, val.full_name.downcase]
|
79
70
|
end
|
80
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.
|
71
|
+
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.csv")
|
81
72
|
|
82
73
|
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
83
74
|
[k.identifier, v.full_name.downcase]
|
84
75
|
end
|
85
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.
|
76
|
+
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.csv")
|
86
77
|
end
|
87
78
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,16 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
|
2
3
|
module EscapeSequence
|
3
4
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
5
|
def codepoint
|
12
6
|
char.ord
|
13
7
|
end
|
8
|
+
|
9
|
+
if ''.respond_to?(:undump)
|
10
|
+
def char
|
11
|
+
%("#{text}").undump
|
12
|
+
end
|
13
|
+
else
|
14
|
+
# poor man's unescape without using eval
|
15
|
+
require 'yaml'
|
16
|
+
def char
|
17
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
+
end
|
19
|
+
end
|
14
20
|
end
|
15
21
|
|
16
22
|
class Literal < EscapeSequence::Base
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
4
|
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
@@ -1,130 +1,6 @@
|
|
1
1
|
require 'regexp_parser/error'
|
2
2
|
|
3
|
-
|
4
|
-
class Base
|
5
|
-
attr_accessor :type, :token
|
6
|
-
attr_accessor :text, :ts
|
7
|
-
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
8
|
-
|
9
|
-
attr_accessor :quantifier
|
10
|
-
attr_accessor :options
|
11
|
-
|
12
|
-
def initialize(token, options = {})
|
13
|
-
self.type = token.type
|
14
|
-
self.token = token.token
|
15
|
-
self.text = token.text
|
16
|
-
self.ts = token.ts
|
17
|
-
self.level = token.level
|
18
|
-
self.set_level = token.set_level
|
19
|
-
self.conditional_level = token.conditional_level
|
20
|
-
self.nesting_level = 0
|
21
|
-
self.quantifier = nil
|
22
|
-
self.options = options
|
23
|
-
end
|
24
|
-
|
25
|
-
def initialize_copy(orig)
|
26
|
-
self.text = (orig.text ? orig.text.dup : nil)
|
27
|
-
self.options = (orig.options ? orig.options.dup : nil)
|
28
|
-
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
29
|
-
super
|
30
|
-
end
|
31
|
-
|
32
|
-
def to_re(format = :full)
|
33
|
-
::Regexp.new(to_s(format))
|
34
|
-
end
|
35
|
-
|
36
|
-
alias :starts_at :ts
|
37
|
-
|
38
|
-
def base_length
|
39
|
-
to_s(:base).length
|
40
|
-
end
|
41
|
-
|
42
|
-
def full_length
|
43
|
-
to_s.length
|
44
|
-
end
|
45
|
-
|
46
|
-
def offset
|
47
|
-
[starts_at, full_length]
|
48
|
-
end
|
49
|
-
|
50
|
-
def coded_offset
|
51
|
-
'@%d+%d' % offset
|
52
|
-
end
|
53
|
-
|
54
|
-
def to_s(format = :full)
|
55
|
-
"#{text}#{quantifier_affix(format)}"
|
56
|
-
end
|
57
|
-
|
58
|
-
def quantifier_affix(expression_format)
|
59
|
-
quantifier.to_s if quantified? && expression_format != :base
|
60
|
-
end
|
61
|
-
|
62
|
-
def terminal?
|
63
|
-
!respond_to?(:expressions)
|
64
|
-
end
|
65
|
-
|
66
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
67
|
-
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
68
|
-
end
|
69
|
-
|
70
|
-
def unquantified_clone
|
71
|
-
clone.tap { |exp| exp.quantifier = nil }
|
72
|
-
end
|
73
|
-
|
74
|
-
def quantified?
|
75
|
-
!quantifier.nil?
|
76
|
-
end
|
77
|
-
|
78
|
-
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
79
|
-
def quantity
|
80
|
-
return [nil,nil] unless quantified?
|
81
|
-
[quantifier.min, quantifier.max]
|
82
|
-
end
|
83
|
-
|
84
|
-
def repetitions
|
85
|
-
return 1..1 unless quantified?
|
86
|
-
min = quantifier.min
|
87
|
-
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
88
|
-
range = min..max
|
89
|
-
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
90
|
-
if RUBY_VERSION.to_f < 2.7
|
91
|
-
range.define_singleton_method(:minmax) { [min, max] }
|
92
|
-
end
|
93
|
-
range
|
94
|
-
end
|
95
|
-
|
96
|
-
def greedy?
|
97
|
-
quantified? and quantifier.greedy?
|
98
|
-
end
|
99
|
-
|
100
|
-
def reluctant?
|
101
|
-
quantified? and quantifier.reluctant?
|
102
|
-
end
|
103
|
-
alias :lazy? :reluctant?
|
104
|
-
|
105
|
-
def possessive?
|
106
|
-
quantified? and quantifier.possessive?
|
107
|
-
end
|
108
|
-
|
109
|
-
def attributes
|
110
|
-
{
|
111
|
-
type: type,
|
112
|
-
token: token,
|
113
|
-
text: to_s(:base),
|
114
|
-
starts_at: ts,
|
115
|
-
length: full_length,
|
116
|
-
level: level,
|
117
|
-
set_level: set_level,
|
118
|
-
conditional_level: conditional_level,
|
119
|
-
options: options,
|
120
|
-
quantifier: quantified? ? quantifier.to_h : nil,
|
121
|
-
}
|
122
|
-
end
|
123
|
-
alias :to_h :attributes
|
124
|
-
end
|
125
|
-
|
126
|
-
end # module Regexp::Expression
|
127
|
-
|
3
|
+
require 'regexp_parser/expression/base'
|
128
4
|
require 'regexp_parser/expression/quantifier'
|
129
5
|
require 'regexp_parser/expression/subexpression'
|
130
6
|
require 'regexp_parser/expression/sequence'
|
@@ -132,9 +8,12 @@ require 'regexp_parser/expression/sequence_operation'
|
|
132
8
|
|
133
9
|
require 'regexp_parser/expression/classes/alternation'
|
134
10
|
require 'regexp_parser/expression/classes/anchor'
|
135
|
-
require 'regexp_parser/expression/classes/
|
11
|
+
require 'regexp_parser/expression/classes/backreference'
|
12
|
+
require 'regexp_parser/expression/classes/character_set'
|
13
|
+
require 'regexp_parser/expression/classes/character_set/intersection'
|
14
|
+
require 'regexp_parser/expression/classes/character_set/range'
|
136
15
|
require 'regexp_parser/expression/classes/conditional'
|
137
|
-
require 'regexp_parser/expression/classes/
|
16
|
+
require 'regexp_parser/expression/classes/escape_sequence'
|
138
17
|
require 'regexp_parser/expression/classes/free_space'
|
139
18
|
require 'regexp_parser/expression/classes/group'
|
140
19
|
require 'regexp_parser/expression/classes/keep'
|
@@ -142,9 +21,6 @@ require 'regexp_parser/expression/classes/literal'
|
|
142
21
|
require 'regexp_parser/expression/classes/posix_class'
|
143
22
|
require 'regexp_parser/expression/classes/property'
|
144
23
|
require 'regexp_parser/expression/classes/root'
|
145
|
-
require 'regexp_parser/expression/classes/set'
|
146
|
-
require 'regexp_parser/expression/classes/set/intersection'
|
147
|
-
require 'regexp_parser/expression/classes/set/range'
|
148
24
|
require 'regexp_parser/expression/classes/type'
|
149
25
|
|
150
26
|
require 'regexp_parser/expression/methods/match'
|
data/lib/regexp_parser/lexer.rb
CHANGED
@@ -4,12 +4,14 @@
|
|
4
4
|
# given syntax flavor.
|
5
5
|
class Regexp::Lexer
|
6
6
|
|
7
|
-
OPENING_TOKENS = [
|
8
|
-
|
9
|
-
|
7
|
+
OPENING_TOKENS = %i[
|
8
|
+
capture passive lookahead nlookahead lookbehind nlookbehind
|
9
|
+
atomic options options_switch named absence
|
10
10
|
].freeze
|
11
11
|
|
12
|
-
CLOSING_TOKENS = [
|
12
|
+
CLOSING_TOKENS = %i[close].freeze
|
13
|
+
|
14
|
+
CONDITION_TOKENS = %i[condition condition_close].freeze
|
13
15
|
|
14
16
|
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
|
15
17
|
new.lex(input, syntax, options: options, &block)
|
@@ -40,7 +42,7 @@ class Regexp::Lexer
|
|
40
42
|
nesting, set_nesting, conditional_nesting)
|
41
43
|
|
42
44
|
current = merge_condition(current) if type == :conditional and
|
43
|
-
|
45
|
+
CONDITION_TOKENS.include?(token)
|
44
46
|
|
45
47
|
last.next = current if last
|
46
48
|
current.previous = last if last
|