regexp_parser 2.1.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +31 -0
- data/Gemfile +1 -1
- data/LICENSE +1 -1
- data/README.md +31 -27
- data/Rakefile +6 -70
- data/lib/regexp_parser/expression/base.rb +123 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/property.rb +0 -2
- data/lib/regexp_parser/expression/classes/root.rb +0 -1
- data/lib/regexp_parser/expression/classes/type.rb +0 -2
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +1 -1
- data/lib/regexp_parser/expression/sequence.rb +0 -1
- data/lib/regexp_parser/expression/subexpression.rb +0 -1
- data/lib/regexp_parser/expression.rb +6 -130
- data/lib/regexp_parser/lexer.rb +8 -6
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -4
- data/lib/regexp_parser/scanner.rb +126 -124
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +31 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +722 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/syntax.rb +1 -1
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +36 -167
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 369b108d8410e12bd6af5c659f58cb56c583e48780c1b35b6270bb21cc6a4ee7
|
4
|
+
data.tar.gz: 30cd2c0823ae154a2db04c705f898f252774ec8ab9ef304833c5e3546ba7406a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4104bec7dd02a7ea099de9aeacb766fb1a2db50cb52bd84f44e4bde93431d436b75d0f1b3f4d62242713a1eeca3f4d8c0be034270d515979aad8ad2d504880b0
|
7
|
+
data.tar.gz: 11deb2d7c8a6fad3fa9cb18b3f29cae15bab7e12e6cbbc968706dd02c16b0d1a6b1d69f05a5f665f7b46947315b0ea4ecda62dab8ddca8b5ef71f521b877da74
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,36 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Added
|
6
|
+
|
7
|
+
- improved parsing performance through `Syntax` refactoring
|
8
|
+
- instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
9
|
+
- this approximately doubles the parsing speed for simple regexps
|
10
|
+
- added methods to `Syntax` classes to show relative feature sets
|
11
|
+
- e.g. `Regexp::Syntax::V3_2_0.added_features`
|
12
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
13
|
+
|
14
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
|
18
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
19
|
+
- the lexer accepted them for any Ruby version
|
20
|
+
- now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
21
|
+
- reduced gem size by excluding specs from package
|
22
|
+
- removed deprecated `test_files` gemspec setting
|
23
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
24
|
+
- no longer depend on `set`
|
25
|
+
- `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
26
|
+
- this made it a hidden/undeclared dependency of `regexp_parser`
|
27
|
+
|
28
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
29
|
+
|
30
|
+
### Added
|
31
|
+
|
32
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
33
|
+
|
3
34
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
35
|
|
5
36
|
### Fixed
|
data/Gemfile
CHANGED
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,9 @@
|
|
1
1
|
# Regexp::Parser
|
2
2
|
|
3
|
-
[](http://badge.fury.io/rb/regexp_parser)
|
3
|
+
[](http://badge.fury.io/rb/regexp_parser)
|
4
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
5
|
+
[](https://github.com/ammar/regexp_parser/actions)
|
6
|
+
[](https://codeclimate.com/github/ammar/regexp_parser/badges)
|
4
7
|
|
5
8
|
A Ruby gem for tokenizing, parsing, and transforming regular expressions.
|
6
9
|
|
@@ -154,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
|
|
154
157
|
flavor variations. Syntax only comes into play in the lexer.
|
155
158
|
|
156
159
|
#### Example
|
157
|
-
The following
|
160
|
+
The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
|
158
161
|
checks a few of their implementation features.
|
159
162
|
|
160
163
|
```ruby
|
161
164
|
require 'regexp_parser'
|
162
165
|
|
163
|
-
ruby_20 = Regexp::Syntax.
|
166
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0'
|
164
167
|
ruby_20.implements? :quantifier, :zero_or_one # => true
|
165
168
|
ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
|
166
169
|
ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
|
167
170
|
ruby_20.implements? :conditional, :condition # => true
|
168
171
|
|
169
|
-
ruby_19 = Regexp::Syntax.
|
172
|
+
ruby_19 = Regexp::Syntax.for 'ruby/1.9'
|
170
173
|
ruby_19.implements? :quantifier, :zero_or_one # => true
|
171
174
|
ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
|
172
175
|
ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
|
173
176
|
ruby_19.implements? :conditional, :condition # => false
|
174
177
|
|
175
|
-
ruby_18 = Regexp::Syntax.
|
178
|
+
ruby_18 = Regexp::Syntax.for 'ruby/1.8'
|
176
179
|
ruby_18.implements? :quantifier, :zero_or_one # => true
|
177
180
|
ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
|
178
181
|
ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
|
179
182
|
ruby_18.implements? :conditional, :condition # => false
|
180
183
|
```
|
181
184
|
|
185
|
+
Syntax objects can also be queried about their complete and relative feature sets.
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
require 'regexp_parser'
|
189
|
+
|
190
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
|
191
|
+
ruby_20.added_features # => { conditional: [...], ... }
|
192
|
+
ruby_20.removed_features # => { property: [:newline], ... }
|
193
|
+
ruby_20.features # => { anchor: [...], ... }
|
194
|
+
```
|
182
195
|
|
183
196
|
#### Notes
|
184
197
|
* Variations on a token, for example a named group with angle brackets (< and >)
|
@@ -357,12 +370,12 @@ _Note that not all of these are available in all versions of Ruby_
|
|
357
370
|
|   _**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | ✓ |
|
358
371
|
|   _**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | ✓ |
|
359
372
|
| **String Escapes** | | ⋱ |
|
360
|
-
|   _**Control**_
|
373
|
+
|   _**Control** \[1\]_ | `\C-C`, `\cD` | ✓ |
|
361
374
|
|   _**Hex**_ | `\x20`, `\x{701230}` | ✓ |
|
362
|
-
|   _**Meta**_
|
375
|
+
|   _**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | ✓ |
|
363
376
|
|   _**Octal**_ | `\0`, `\01`, `\012` | ✓ |
|
364
377
|
|   _**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | ✓ |
|
365
|
-
| **Unicode Properties** | _<sub>([Unicode
|
378
|
+
| **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | ⋱ |
|
366
379
|
|   _**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | ✓ |
|
367
380
|
|   _**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | ✓ |
|
368
381
|
|   _**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | ✓ |
|
@@ -371,6 +384,10 @@ _Note that not all of these are available in all versions of Ruby_
|
|
371
384
|
|   _**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | ✓ |
|
372
385
|
|   _**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | ✓ |
|
373
386
|
|
387
|
+
**\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
|
388
|
+
https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
|
389
|
+
scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
|
390
|
+
|
374
391
|
##### Inapplicable Features
|
375
392
|
|
376
393
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
@@ -384,7 +401,6 @@ expressions library (Onigmo). They are not supported by the scanner.
|
|
384
401
|
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
385
402
|
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
386
403
|
|
387
|
-
|
388
404
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
389
405
|
|
390
406
|
_**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
|
@@ -392,26 +408,14 @@ or incorrectly return tokens/objects as literals._
|
|
392
408
|
|
393
409
|
|
394
410
|
## Testing
|
395
|
-
To run the tests simply run rake from the root directory
|
396
|
-
|
397
|
-
It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
|
411
|
+
To run the tests simply run rake from the root directory.
|
398
412
|
|
399
|
-
The
|
400
|
-
|
401
|
-
```
|
402
|
-
bin/test
|
403
|
-
```
|
404
|
-
|
405
|
-
You can run a specific test like so:
|
406
|
-
|
407
|
-
```
|
408
|
-
bin/test spec/scanner/properties_spec.rb
|
409
|
-
```
|
413
|
+
The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
|
410
414
|
|
411
|
-
Note that changes to Ragel files will not be reflected when running `rspec`
|
415
|
+
Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
|
412
416
|
|
413
417
|
```
|
414
|
-
rake ragel:rb &&
|
418
|
+
rake ragel:rb && rspec spec/scanner/properties_spec.rb
|
415
419
|
```
|
416
420
|
|
417
421
|
## Building
|
@@ -443,7 +447,7 @@ Projects using regexp_parser.
|
|
443
447
|
|
444
448
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
445
449
|
|
446
|
-
- [mutant](https://github.com/mbj/mutant)
|
450
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
447
451
|
|
448
452
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
449
453
|
|
@@ -476,4 +480,4 @@ Documentation and books used while working on this project.
|
|
476
480
|
|
477
481
|
---
|
478
482
|
##### Copyright
|
479
|
-
_Copyright (c) 2010-
|
483
|
+
_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
|
data/Rakefile
CHANGED
@@ -1,87 +1,23 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
5
7
|
|
6
|
-
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
9
|
-
|
10
|
-
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
11
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
12
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
|
-
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
14
9
|
|
15
10
|
Bundler::GemHelper.install_tasks
|
16
11
|
|
12
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
13
|
|
18
14
|
task :default => [:'test:full']
|
19
15
|
|
20
16
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
17
|
+
task full: [:'ragel:rb', :spec]
|
24
18
|
end
|
25
19
|
|
26
|
-
namespace :ragel do
|
27
|
-
desc "Process the ragel source files and output ruby code"
|
28
|
-
task :rb do
|
29
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
31
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
33
|
-
|
34
|
-
contents = File.read(output_file)
|
35
|
-
|
36
|
-
File.open(output_file, 'r+') do |file|
|
37
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
38
|
-
|
39
|
-
file.write(contents)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
desc "Delete the ragel generated source file(s)"
|
45
|
-
task :clean do
|
46
|
-
RAGEL_SOURCE_FILES.each do |file|
|
47
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
|
53
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
21
|
# latest scanner code is generated and included in the build.
|
55
22
|
desc "Runs ragel:rb before building the gem"
|
56
23
|
task :build => ['ragel:rb']
|
57
|
-
|
58
|
-
|
59
|
-
namespace :props do
|
60
|
-
desc 'Write new property value hashes for the properties scanner'
|
61
|
-
task :update do
|
62
|
-
require 'regexp_property_values'
|
63
|
-
RegexpPropertyValues.update
|
64
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
65
|
-
|
66
|
-
require 'psych'
|
67
|
-
write_hash_to_file = ->(hash, path) do
|
68
|
-
File.open(path, 'w') do |f|
|
69
|
-
f.puts '#',
|
70
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
71
|
-
'#',
|
72
|
-
hash.sort.to_h.to_yaml
|
73
|
-
end
|
74
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
|
-
end
|
76
|
-
|
77
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
78
|
-
[val.identifier, val.full_name.downcase]
|
79
|
-
end
|
80
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
81
|
-
|
82
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
83
|
-
[k.identifier, v.full_name.downcase]
|
84
|
-
end
|
85
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
86
|
-
end
|
87
|
-
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
attr_accessor :type, :token
|
4
|
+
attr_accessor :text, :ts
|
5
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
6
|
+
|
7
|
+
attr_accessor :quantifier
|
8
|
+
attr_accessor :options
|
9
|
+
|
10
|
+
def initialize(token, options = {})
|
11
|
+
self.type = token.type
|
12
|
+
self.token = token.token
|
13
|
+
self.text = token.text
|
14
|
+
self.ts = token.ts
|
15
|
+
self.level = token.level
|
16
|
+
self.set_level = token.set_level
|
17
|
+
self.conditional_level = token.conditional_level
|
18
|
+
self.nesting_level = 0
|
19
|
+
self.quantifier = nil
|
20
|
+
self.options = options
|
21
|
+
end
|
22
|
+
|
23
|
+
def initialize_copy(orig)
|
24
|
+
self.text = (orig.text ? orig.text.dup : nil)
|
25
|
+
self.options = (orig.options ? orig.options.dup : nil)
|
26
|
+
self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_re(format = :full)
|
31
|
+
::Regexp.new(to_s(format))
|
32
|
+
end
|
33
|
+
|
34
|
+
alias :starts_at :ts
|
35
|
+
|
36
|
+
def base_length
|
37
|
+
to_s(:base).length
|
38
|
+
end
|
39
|
+
|
40
|
+
def full_length
|
41
|
+
to_s.length
|
42
|
+
end
|
43
|
+
|
44
|
+
def offset
|
45
|
+
[starts_at, full_length]
|
46
|
+
end
|
47
|
+
|
48
|
+
def coded_offset
|
49
|
+
'@%d+%d' % offset
|
50
|
+
end
|
51
|
+
|
52
|
+
def to_s(format = :full)
|
53
|
+
"#{text}#{quantifier_affix(format)}"
|
54
|
+
end
|
55
|
+
|
56
|
+
def quantifier_affix(expression_format)
|
57
|
+
quantifier.to_s if quantified? && expression_format != :base
|
58
|
+
end
|
59
|
+
|
60
|
+
def terminal?
|
61
|
+
!respond_to?(:expressions)
|
62
|
+
end
|
63
|
+
|
64
|
+
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
65
|
+
self.quantifier = Quantifier.new(token, text, min, max, mode)
|
66
|
+
end
|
67
|
+
|
68
|
+
def unquantified_clone
|
69
|
+
clone.tap { |exp| exp.quantifier = nil }
|
70
|
+
end
|
71
|
+
|
72
|
+
def quantified?
|
73
|
+
!quantifier.nil?
|
74
|
+
end
|
75
|
+
|
76
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
77
|
+
def quantity
|
78
|
+
return [nil,nil] unless quantified?
|
79
|
+
[quantifier.min, quantifier.max]
|
80
|
+
end
|
81
|
+
|
82
|
+
def repetitions
|
83
|
+
return 1..1 unless quantified?
|
84
|
+
min = quantifier.min
|
85
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
86
|
+
range = min..max
|
87
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
88
|
+
if RUBY_VERSION.to_f < 2.7
|
89
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
90
|
+
end
|
91
|
+
range
|
92
|
+
end
|
93
|
+
|
94
|
+
def greedy?
|
95
|
+
quantified? and quantifier.greedy?
|
96
|
+
end
|
97
|
+
|
98
|
+
def reluctant?
|
99
|
+
quantified? and quantifier.reluctant?
|
100
|
+
end
|
101
|
+
alias :lazy? :reluctant?
|
102
|
+
|
103
|
+
def possessive?
|
104
|
+
quantified? and quantifier.possessive?
|
105
|
+
end
|
106
|
+
|
107
|
+
def attributes
|
108
|
+
{
|
109
|
+
type: type,
|
110
|
+
token: token,
|
111
|
+
text: to_s(:base),
|
112
|
+
starts_at: ts,
|
113
|
+
length: full_length,
|
114
|
+
level: level,
|
115
|
+
set_level: set_level,
|
116
|
+
conditional_level: conditional_level,
|
117
|
+
options: options,
|
118
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
119
|
+
}
|
120
|
+
end
|
121
|
+
alias :to_h :attributes
|
122
|
+
end
|
123
|
+
end
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
@@ -1,16 +1,22 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
+
# TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
|
2
3
|
module EscapeSequence
|
3
4
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
5
|
def codepoint
|
12
6
|
char.ord
|
13
7
|
end
|
8
|
+
|
9
|
+
if ''.respond_to?(:undump)
|
10
|
+
def char
|
11
|
+
%("#{text}").undump
|
12
|
+
end
|
13
|
+
else
|
14
|
+
# poor man's unescape without using eval
|
15
|
+
require 'yaml'
|
16
|
+
def char
|
17
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
+
end
|
19
|
+
end
|
14
20
|
end
|
15
21
|
|
16
22
|
class Literal < EscapeSequence::Base
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
3
|
def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
|
5
4
|
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
@@ -13,5 +12,4 @@ module Regexp::Expression
|
|
13
12
|
text << exp.text
|
14
13
|
end
|
15
14
|
end
|
16
|
-
|
17
15
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -116,5 +115,4 @@ module Regexp::Expression
|
|
116
115
|
class Script < UnicodeProperty::Base; end
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
|
-
|
120
118
|
end # module Regexp::Expression
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|