regexp_parser 2.1.1 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (143) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +31 -0
  3. data/Gemfile +1 -1
  4. data/LICENSE +1 -1
  5. data/README.md +31 -27
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/expression/base.rb +123 -0
  8. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  9. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +0 -0
  10. data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
  11. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +0 -0
  12. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +0 -0
  13. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +13 -7
  14. data/lib/regexp_parser/expression/classes/free_space.rb +0 -2
  15. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  16. data/lib/regexp_parser/expression/classes/property.rb +0 -2
  17. data/lib/regexp_parser/expression/classes/root.rb +0 -1
  18. data/lib/regexp_parser/expression/classes/type.rb +0 -2
  19. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  20. data/lib/regexp_parser/expression/quantifier.rb +1 -1
  21. data/lib/regexp_parser/expression/sequence.rb +0 -1
  22. data/lib/regexp_parser/expression/subexpression.rb +0 -1
  23. data/lib/regexp_parser/expression.rb +6 -130
  24. data/lib/regexp_parser/lexer.rb +8 -6
  25. data/lib/regexp_parser/scanner/properties/long.csv +622 -0
  26. data/lib/regexp_parser/scanner/properties/short.csv +246 -0
  27. data/lib/regexp_parser/scanner/scanner.rl +6 -4
  28. data/lib/regexp_parser/scanner.rb +126 -124
  29. data/lib/regexp_parser/syntax/any.rb +2 -7
  30. data/lib/regexp_parser/syntax/base.rb +91 -66
  31. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  32. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  33. data/lib/regexp_parser/syntax/token/backreference.rb +30 -0
  34. data/lib/regexp_parser/syntax/{tokens → token}/character_set.rb +2 -2
  35. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  36. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  37. data/lib/regexp_parser/syntax/token/escape.rb +31 -0
  38. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  39. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  40. data/lib/regexp_parser/syntax/{tokens → token}/meta.rb +2 -2
  41. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  42. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  43. data/lib/regexp_parser/syntax/token/unicode_property.rb +722 -0
  44. data/lib/regexp_parser/syntax/token.rb +45 -0
  45. data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
  46. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  47. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  48. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  49. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  50. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  51. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  52. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  53. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  54. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  55. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  56. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  57. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  58. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  59. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  60. data/lib/regexp_parser/syntax/versions.rb +1 -1
  61. data/lib/regexp_parser/syntax.rb +1 -1
  62. data/lib/regexp_parser/token.rb +9 -20
  63. data/lib/regexp_parser/version.rb +1 -1
  64. data/lib/regexp_parser.rb +0 -2
  65. data/regexp_parser.gemspec +20 -22
  66. metadata +36 -167
  67. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  68. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  69. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  70. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  71. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  72. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  73. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  74. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  75. data/spec/expression/base_spec.rb +0 -104
  76. data/spec/expression/clone_spec.rb +0 -152
  77. data/spec/expression/conditional_spec.rb +0 -89
  78. data/spec/expression/free_space_spec.rb +0 -27
  79. data/spec/expression/methods/match_length_spec.rb +0 -161
  80. data/spec/expression/methods/match_spec.rb +0 -25
  81. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  82. data/spec/expression/methods/tests_spec.rb +0 -99
  83. data/spec/expression/methods/traverse_spec.rb +0 -161
  84. data/spec/expression/options_spec.rb +0 -128
  85. data/spec/expression/subexpression_spec.rb +0 -50
  86. data/spec/expression/to_h_spec.rb +0 -26
  87. data/spec/expression/to_s_spec.rb +0 -108
  88. data/spec/lexer/all_spec.rb +0 -22
  89. data/spec/lexer/conditionals_spec.rb +0 -53
  90. data/spec/lexer/delimiters_spec.rb +0 -68
  91. data/spec/lexer/escapes_spec.rb +0 -14
  92. data/spec/lexer/keep_spec.rb +0 -10
  93. data/spec/lexer/literals_spec.rb +0 -64
  94. data/spec/lexer/nesting_spec.rb +0 -99
  95. data/spec/lexer/refcalls_spec.rb +0 -60
  96. data/spec/parser/all_spec.rb +0 -43
  97. data/spec/parser/alternation_spec.rb +0 -88
  98. data/spec/parser/anchors_spec.rb +0 -17
  99. data/spec/parser/conditionals_spec.rb +0 -179
  100. data/spec/parser/errors_spec.rb +0 -30
  101. data/spec/parser/escapes_spec.rb +0 -121
  102. data/spec/parser/free_space_spec.rb +0 -130
  103. data/spec/parser/groups_spec.rb +0 -108
  104. data/spec/parser/keep_spec.rb +0 -6
  105. data/spec/parser/options_spec.rb +0 -28
  106. data/spec/parser/posix_classes_spec.rb +0 -8
  107. data/spec/parser/properties_spec.rb +0 -115
  108. data/spec/parser/quantifiers_spec.rb +0 -68
  109. data/spec/parser/refcalls_spec.rb +0 -117
  110. data/spec/parser/set/intersections_spec.rb +0 -127
  111. data/spec/parser/set/ranges_spec.rb +0 -111
  112. data/spec/parser/sets_spec.rb +0 -178
  113. data/spec/parser/types_spec.rb +0 -18
  114. data/spec/scanner/all_spec.rb +0 -18
  115. data/spec/scanner/anchors_spec.rb +0 -21
  116. data/spec/scanner/conditionals_spec.rb +0 -128
  117. data/spec/scanner/delimiters_spec.rb +0 -52
  118. data/spec/scanner/errors_spec.rb +0 -67
  119. data/spec/scanner/escapes_spec.rb +0 -64
  120. data/spec/scanner/free_space_spec.rb +0 -165
  121. data/spec/scanner/groups_spec.rb +0 -61
  122. data/spec/scanner/keep_spec.rb +0 -10
  123. data/spec/scanner/literals_spec.rb +0 -39
  124. data/spec/scanner/meta_spec.rb +0 -18
  125. data/spec/scanner/options_spec.rb +0 -36
  126. data/spec/scanner/properties_spec.rb +0 -64
  127. data/spec/scanner/quantifiers_spec.rb +0 -25
  128. data/spec/scanner/refcalls_spec.rb +0 -55
  129. data/spec/scanner/sets_spec.rb +0 -151
  130. data/spec/scanner/types_spec.rb +0 -14
  131. data/spec/spec_helper.rb +0 -16
  132. data/spec/support/runner.rb +0 -42
  133. data/spec/support/shared_examples.rb +0 -77
  134. data/spec/support/warning_extractor.rb +0 -60
  135. data/spec/syntax/syntax_spec.rb +0 -48
  136. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  137. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  138. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  139. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  140. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  141. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  142. data/spec/syntax/versions/aliases_spec.rb +0 -37
  143. data/spec/token/token_spec.rb +0 -85
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 077b8a0c90d90cf46e44671ec1335a5373eef72c61a0bcf4de43ba5217a188c3
4
- data.tar.gz: b9aed868af73adcdf40c09720c5d10091b25a53b25a792717ceb5591039a2931
3
+ metadata.gz: 369b108d8410e12bd6af5c659f58cb56c583e48780c1b35b6270bb21cc6a4ee7
4
+ data.tar.gz: 30cd2c0823ae154a2db04c705f898f252774ec8ab9ef304833c5e3546ba7406a
5
5
  SHA512:
6
- metadata.gz: 9c04d9a6434c6e3f322e97e8e2a1c86b3ddda88bd8821368a37b92f5836e4c3df1dc27a79165303420c3e8d5eea31bda1483824da01a40ce30961b645ba65ddd
7
- data.tar.gz: 01e5c261e9dca0c4df7c696128dbc0520ca40aa6b9393cc8d6c3bdb8386470aeb773566000b811f98c1407038216c8d2c0b444c7955ea5a881ac759796f8a440
6
+ metadata.gz: 4104bec7dd02a7ea099de9aeacb766fb1a2db50cb52bd84f44e4bde93431d436b75d0f1b3f4d62242713a1eeca3f4d8c0be034270d515979aad8ad2d504880b0
7
+ data.tar.gz: 11deb2d7c8a6fad3fa9cb18b3f29cae15bab7e12e6cbbc968706dd02c16b0d1a6b1d69f05a5f665f7b46947315b0ea4ecda62dab8ddca8b5ef71f521b877da74
data/CHANGELOG.md CHANGED
@@ -1,5 +1,36 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
4
+
5
+ ### Added
6
+
7
+ - improved parsing performance through `Syntax` refactoring
8
+ - instead of fresh `Syntax` instances, pre-loaded constants are now re-used
9
+ - this approximately doubles the parsing speed for simple regexps
10
+ - added methods to `Syntax` classes to show relative feature sets
11
+ - e.g. `Regexp::Syntax::V3_2_0.added_features`
12
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
13
+
14
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
15
+
16
+ ### Fixed
17
+
18
+ - fixed Syntax version of absence groups (`(?~...)`)
19
+ - the lexer accepted them for any Ruby version
20
+ - now they are only recognized for Ruby >= 2.4.1 in which they were introduced
21
+ - reduced gem size by excluding specs from package
22
+ - removed deprecated `test_files` gemspec setting
23
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
24
+ - no longer depend on `set`
25
+ - `set` was removed from the stdlib and made a standalone gem as of Ruby 3
26
+ - this made it a hidden/undeclared dependency of `regexp_parser`
27
+
28
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
29
+
30
+ ### Added
31
+
32
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
33
+
3
34
  ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
4
35
 
5
36
  ### Fixed
data/Gemfile CHANGED
@@ -5,7 +5,7 @@ gemspec
5
5
  group :development, :test do
6
6
  gem 'ice_nine', '~> 0.11.2'
7
7
  gem 'rake', '~> 13.0'
8
- gem 'regexp_property_values', '~> 1.0'
8
+ gem 'regexp_property_values', '~> 1.3'
9
9
  gem 'rspec', '~> 3.10'
10
10
  if RUBY_VERSION.to_f >= 2.7
11
11
  gem 'gouteur'
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2022, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation
data/README.md CHANGED
@@ -1,6 +1,9 @@
1
1
  # Regexp::Parser
2
2
 
3
- [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser) [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions) [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
3
+ [![Gem Version](https://badge.fury.io/rb/regexp_parser.svg)](http://badge.fury.io/rb/regexp_parser)
4
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/tests/badge.svg)](https://github.com/ammar/regexp_parser/actions)
5
+ [![Build Status](https://github.com/ammar/regexp_parser/workflows/gouteur/badge.svg)](https://github.com/ammar/regexp_parser/actions)
6
+ [![Code Climate](https://codeclimate.com/github/ammar/regexp_parser.svg)](https://codeclimate.com/github/ammar/regexp_parser/badges)
4
7
 
5
8
  A Ruby gem for tokenizing, parsing, and transforming regular expressions.
6
9
 
@@ -154,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
154
157
  flavor variations. Syntax only comes into play in the lexer.
155
158
 
156
159
  #### Example
157
- The following instantiates syntax objects for Ruby 2.0, 1.9, 1.8, and
160
+ The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
158
161
  checks a few of their implementation features.
159
162
 
160
163
  ```ruby
161
164
  require 'regexp_parser'
162
165
 
163
- ruby_20 = Regexp::Syntax.new 'ruby/2.0'
166
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0'
164
167
  ruby_20.implements? :quantifier, :zero_or_one # => true
165
168
  ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
166
169
  ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
167
170
  ruby_20.implements? :conditional, :condition # => true
168
171
 
169
- ruby_19 = Regexp::Syntax.new 'ruby/1.9'
172
+ ruby_19 = Regexp::Syntax.for 'ruby/1.9'
170
173
  ruby_19.implements? :quantifier, :zero_or_one # => true
171
174
  ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
172
175
  ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
173
176
  ruby_19.implements? :conditional, :condition # => false
174
177
 
175
- ruby_18 = Regexp::Syntax.new 'ruby/1.8'
178
+ ruby_18 = Regexp::Syntax.for 'ruby/1.8'
176
179
  ruby_18.implements? :quantifier, :zero_or_one # => true
177
180
  ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
178
181
  ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
179
182
  ruby_18.implements? :conditional, :condition # => false
180
183
  ```
181
184
 
185
+ Syntax objects can also be queried about their complete and relative feature sets.
186
+
187
+ ```ruby
188
+ require 'regexp_parser'
189
+
190
+ ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
191
+ ruby_20.added_features # => { conditional: [...], ... }
192
+ ruby_20.removed_features # => { property: [:newline], ... }
193
+ ruby_20.features # => { anchor: [...], ... }
194
+ ```
182
195
 
183
196
  #### Notes
184
197
  * Variations on a token, for example a named group with angle brackets (< and >)
@@ -357,12 +370,12 @@ _Note that not all of these are available in all versions of Ruby_
357
370
  | &emsp;&nbsp;_**Reluctant** (Lazy)_ | `??`, `*?`, `+?`, `{m,M}?` | &#x2713; |
358
371
  | &emsp;&nbsp;_**Possessive**_ | `?+`, `*+`, `++`, `{m,M}+` | &#x2713; |
359
372
  | **String Escapes** | | &#x22f1; |
360
- | &emsp;&nbsp;_**Control**_ | `\C-C`, `\cD` | &#x2713; |
373
+ | &emsp;&nbsp;_**Control** \[1\]_ | `\C-C`, `\cD` | &#x2713; |
361
374
  | &emsp;&nbsp;_**Hex**_ | `\x20`, `\x{701230}` | &#x2713; |
362
- | &emsp;&nbsp;_**Meta**_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
375
+ | &emsp;&nbsp;_**Meta** \[1\]_ | `\M-c`, `\M-\C-C`, `\M-\cC`, `\C-\M-C`, `\c\M-C` | &#x2713; |
363
376
  | &emsp;&nbsp;_**Octal**_ | `\0`, `\01`, `\012` | &#x2713; |
364
377
  | &emsp;&nbsp;_**Unicode**_ | `\uHHHH`, `\u{H+ H+}` | &#x2713; |
365
- | **Unicode Properties** | _<sub>([Unicode 11.0.0](http://www.unicode.org/versions/Unicode11.0.0/))</sub>_ | &#x22f1; |
378
+ | **Unicode Properties** | _<sub>([Unicode 13.0.0](https://www.unicode.org/versions/Unicode13.0.0/))</sub>_ | &#x22f1; |
366
379
  | &emsp;&nbsp;_**Age**_ | `\p{Age=5.2}`, `\P{age=7.0}`, `\p{^age=8.0}` | &#x2713; |
367
380
  | &emsp;&nbsp;_**Blocks**_ | `\p{InArmenian}`, `\P{InKhmer}`, `\p{^InThai}` | &#x2713; |
368
381
  | &emsp;&nbsp;_**Classes**_ | `\p{Alpha}`, `\P{Space}`, `\p{^Alnum}` | &#x2713; |
@@ -371,6 +384,10 @@ _Note that not all of these are available in all versions of Ruby_
371
384
  | &emsp;&nbsp;_**Scripts**_ | `\p{Arabic}`, `\P{Hiragana}`, `\p{^Greek}` | &#x2713; |
372
385
  | &emsp;&nbsp;_**Simple**_ | `\p{Dash}`, `\p{Extender}`, `\p{^Hyphen}` | &#x2713; |
373
386
 
387
+ **\[1\]**: As of Ruby 3.1, meta and control sequences are [pre-processed to hex escapes when used in Regexp literals](
388
+ https://github.com/ruby/ruby/commit/11ae581a4a7f5d5f5ec6378872eab8f25381b1b9 ), so they will only reach the
389
+ scanner and will only be emitted if a String or a Regexp that has been built with the `::new` constructor is scanned.
390
+
374
391
  ##### Inapplicable Features
375
392
 
376
393
  Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
@@ -384,7 +401,6 @@ expressions library (Onigmo). They are not supported by the scanner.
384
401
  - **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
385
402
  - **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
386
403
 
387
-
388
404
  See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
389
405
 
390
406
  _**Note**: Attempting to process expressions with unsupported syntax features can raise an error,
@@ -392,26 +408,14 @@ or incorrectly return tokens/objects as literals._
392
408
 
393
409
 
394
410
  ## Testing
395
- To run the tests simply run rake from the root directory, as 'test' is the default task.
396
-
397
- It generates the scanner's code from the Ragel source files and runs all the tests, thus it requires Ragel to be installed.
411
+ To run the tests simply run rake from the root directory.
398
412
 
399
- The tests use RSpec. They can also be run with the test runner that whitelists some warnings:
400
-
401
- ```
402
- bin/test
403
- ```
404
-
405
- You can run a specific test like so:
406
-
407
- ```
408
- bin/test spec/scanner/properties_spec.rb
409
- ```
413
+ The default task generates the scanner's code from the Ragel source files and runs all the specs, thus it requires Ragel to be installed.
410
414
 
411
- Note that changes to Ragel files will not be reflected when running `rspec` or `bin/test`, so you might want to run:
415
+ Note that changes to Ragel files will not be reflected when running `rspec` on its own, so to run individual tests you might want to run:
412
416
 
413
417
  ```
414
- rake ragel:rb && bin/test spec/scanner/properties_spec.rb
418
+ rake ragel:rb && rspec spec/scanner/properties_spec.rb
415
419
  ```
416
420
 
417
421
  ## Building
@@ -443,7 +447,7 @@ Projects using regexp_parser.
443
447
 
444
448
  - [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
445
449
 
446
- - [mutant](https://github.com/mbj/mutant) (before v0.9.0) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
450
+ - [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
447
451
 
448
452
  - [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
449
453
 
@@ -476,4 +480,4 @@ Documentation and books used while working on this project.
476
480
 
477
481
  ---
478
482
  ##### Copyright
479
- _Copyright (c) 2010-2020 Ammar Ali. See LICENSE file for details._
483
+ _Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
data/Rakefile CHANGED
@@ -1,87 +1,23 @@
1
+ require 'bundler'
1
2
  require 'rubygems'
2
-
3
+ require 'rubygems/package_task'
3
4
  require 'rake'
4
5
  require 'rake/testtask'
6
+ require 'rspec/core/rake_task'
5
7
 
6
- require 'bundler'
7
- require 'rubygems/package_task'
8
-
9
-
10
- RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
11
- RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
12
- RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
13
-
8
+ Dir['tasks/**/*.rake'].each { |file| load(file) }
14
9
 
15
10
  Bundler::GemHelper.install_tasks
16
11
 
12
+ RSpec::Core::RakeTask.new(:spec)
17
13
 
18
14
  task :default => [:'test:full']
19
15
 
20
16
  namespace :test do
21
- task full: :'ragel:rb' do
22
- sh 'bin/test'
23
- end
17
+ task full: [:'ragel:rb', :spec]
24
18
  end
25
19
 
26
- namespace :ragel do
27
- desc "Process the ragel source files and output ruby code"
28
- task :rb do
29
- RAGEL_SOURCE_FILES.each do |source_file|
30
- output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
31
- # using faster flat table driven FSM, about 25% larger code, but about 30% faster
32
- sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
33
-
34
- contents = File.read(output_file)
35
-
36
- File.open(output_file, 'r+') do |file|
37
- contents = "# -*- warn-indent:false; -*-\n" + contents
38
-
39
- file.write(contents)
40
- end
41
- end
42
- end
43
-
44
- desc "Delete the ragel generated source file(s)"
45
- task :clean do
46
- RAGEL_SOURCE_FILES.each do |file|
47
- sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
48
- end
49
- end
50
- end
51
-
52
-
53
20
  # Add ragel task as a prerequisite for building the gem to ensure that the
54
21
  # latest scanner code is generated and included in the build.
55
22
  desc "Runs ragel:rb before building the gem"
56
23
  task :build => ['ragel:rb']
57
-
58
-
59
- namespace :props do
60
- desc 'Write new property value hashes for the properties scanner'
61
- task :update do
62
- require 'regexp_property_values'
63
- RegexpPropertyValues.update
64
- dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
65
-
66
- require 'psych'
67
- write_hash_to_file = ->(hash, path) do
68
- File.open(path, 'w') do |f|
69
- f.puts '#',
70
- "# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
71
- '#',
72
- hash.sort.to_h.to_yaml
73
- end
74
- puts "Wrote #{hash.count} aliases to `#{path}`"
75
- end
76
-
77
- long_names_to_tokens = RegexpPropertyValues.all.map do |val|
78
- [val.identifier, val.full_name.downcase]
79
- end
80
- write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
81
-
82
- short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
83
- [k.identifier, v.full_name.downcase]
84
- end
85
- write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
86
- end
87
- end
@@ -0,0 +1,123 @@
1
+ module Regexp::Expression
2
+ class Base
3
+ attr_accessor :type, :token
4
+ attr_accessor :text, :ts
5
+ attr_accessor :level, :set_level, :conditional_level, :nesting_level
6
+
7
+ attr_accessor :quantifier
8
+ attr_accessor :options
9
+
10
+ def initialize(token, options = {})
11
+ self.type = token.type
12
+ self.token = token.token
13
+ self.text = token.text
14
+ self.ts = token.ts
15
+ self.level = token.level
16
+ self.set_level = token.set_level
17
+ self.conditional_level = token.conditional_level
18
+ self.nesting_level = 0
19
+ self.quantifier = nil
20
+ self.options = options
21
+ end
22
+
23
+ def initialize_copy(orig)
24
+ self.text = (orig.text ? orig.text.dup : nil)
25
+ self.options = (orig.options ? orig.options.dup : nil)
26
+ self.quantifier = (orig.quantifier ? orig.quantifier.clone : nil)
27
+ super
28
+ end
29
+
30
+ def to_re(format = :full)
31
+ ::Regexp.new(to_s(format))
32
+ end
33
+
34
+ alias :starts_at :ts
35
+
36
+ def base_length
37
+ to_s(:base).length
38
+ end
39
+
40
+ def full_length
41
+ to_s.length
42
+ end
43
+
44
+ def offset
45
+ [starts_at, full_length]
46
+ end
47
+
48
+ def coded_offset
49
+ '@%d+%d' % offset
50
+ end
51
+
52
+ def to_s(format = :full)
53
+ "#{text}#{quantifier_affix(format)}"
54
+ end
55
+
56
+ def quantifier_affix(expression_format)
57
+ quantifier.to_s if quantified? && expression_format != :base
58
+ end
59
+
60
+ def terminal?
61
+ !respond_to?(:expressions)
62
+ end
63
+
64
+ def quantify(token, text, min = nil, max = nil, mode = :greedy)
65
+ self.quantifier = Quantifier.new(token, text, min, max, mode)
66
+ end
67
+
68
+ def unquantified_clone
69
+ clone.tap { |exp| exp.quantifier = nil }
70
+ end
71
+
72
+ def quantified?
73
+ !quantifier.nil?
74
+ end
75
+
76
+ # Deprecated. Prefer `#repetitions` which has a more uniform interface.
77
+ def quantity
78
+ return [nil,nil] unless quantified?
79
+ [quantifier.min, quantifier.max]
80
+ end
81
+
82
+ def repetitions
83
+ return 1..1 unless quantified?
84
+ min = quantifier.min
85
+ max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
86
+ range = min..max
87
+ # fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
88
+ if RUBY_VERSION.to_f < 2.7
89
+ range.define_singleton_method(:minmax) { [min, max] }
90
+ end
91
+ range
92
+ end
93
+
94
+ def greedy?
95
+ quantified? and quantifier.greedy?
96
+ end
97
+
98
+ def reluctant?
99
+ quantified? and quantifier.reluctant?
100
+ end
101
+ alias :lazy? :reluctant?
102
+
103
+ def possessive?
104
+ quantified? and quantifier.possessive?
105
+ end
106
+
107
+ def attributes
108
+ {
109
+ type: type,
110
+ token: token,
111
+ text: to_s(:base),
112
+ starts_at: ts,
113
+ length: full_length,
114
+ level: level,
115
+ set_level: set_level,
116
+ conditional_level: conditional_level,
117
+ options: options,
118
+ quantifier: quantified? ? quantifier.to_h : nil,
119
+ }
120
+ end
121
+ alias :to_h :attributes
122
+ end
123
+ end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module Anchor
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -22,5 +21,4 @@ module Regexp::Expression
22
21
  EOS = EndOfString
23
22
  EOSobEOL = EndOfStringOrBeforeEndOfLine
24
23
  end
25
-
26
24
  end
@@ -1,16 +1,22 @@
1
1
  module Regexp::Expression
2
+ # TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
2
3
  module EscapeSequence
3
4
  class Base < Regexp::Expression::Base
4
- require 'yaml'
5
-
6
- def char
7
- # poor man's unescape without using eval
8
- YAML.load(%Q(---\n"#{text}"\n))
9
- end
10
-
11
5
  def codepoint
12
6
  char.ord
13
7
  end
8
+
9
+ if ''.respond_to?(:undump)
10
+ def char
11
+ %("#{text}").undump
12
+ end
13
+ else
14
+ # poor man's unescape without using eval
15
+ require 'yaml'
16
+ def char
17
+ YAML.load(%Q(---\n"#{text}"\n))
18
+ end
19
+ end
14
20
  end
15
21
 
16
22
  class Literal < EscapeSequence::Base
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class FreeSpace < Regexp::Expression::Base
4
3
  def quantify(_token, _text, _min = nil, _max = nil, _mode = :greedy)
5
4
  raise Regexp::Parser::Error, 'Can not quantify a free space object'
@@ -13,5 +12,4 @@ module Regexp::Expression
13
12
  text << exp.text
14
13
  end
15
14
  end
16
-
17
15
  end
@@ -1,7 +1,3 @@
1
1
  module Regexp::Expression
2
-
3
- class Literal < Regexp::Expression::Base
4
- # Obviously nothing special here, yet.
5
- end
6
-
2
+ class Literal < Regexp::Expression::Base; end
7
3
  end
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module UnicodeProperty
4
3
  class Base < Regexp::Expression::Base
5
4
  def negative?
@@ -116,5 +115,4 @@ module Regexp::Expression
116
115
  class Script < UnicodeProperty::Base; end
117
116
  class Block < UnicodeProperty::Base; end
118
117
  end
119
-
120
118
  end # module Regexp::Expression
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Root < Regexp::Expression::Subexpression
4
3
  def self.build(options = {})
5
4
  new(build_token, options)
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  module CharacterType
4
3
  class Base < Regexp::Expression::Base; end
5
4
 
@@ -15,5 +14,4 @@ module Regexp::Expression
15
14
  class Linebreak < CharacterType::Base; end
16
15
  class ExtendedGrapheme < CharacterType::Base; end
17
16
  end
18
-
19
17
  end
@@ -43,7 +43,7 @@ module Regexp::Expression
43
43
 
44
44
  # Order is important! Fields that use other fields in their
45
45
  # definition must appear before the fields they use.
46
- part_keys = %w{a m b o i l x s e S y k c q Q z Z t ~t T >}
46
+ part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
47
47
  part.keys.each {|k| part[k] = "<?#{k}?>"}
48
48
 
49
49
  part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
@@ -1,6 +1,6 @@
1
1
  module Regexp::Expression
2
2
  class Quantifier
3
- MODES = [:greedy, :possessive, :reluctant]
3
+ MODES = %i[greedy possessive reluctant]
4
4
 
5
5
  attr_reader :token, :text, :min, :max, :mode
6
6
 
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  # A sequence of expressions. Differs from a Subexpressions by how it handles
4
3
  # quantifiers, as it applies them to its last element instead of itself as
5
4
  # a whole subexpression.
@@ -1,5 +1,4 @@
1
1
  module Regexp::Expression
2
-
3
2
  class Subexpression < Regexp::Expression::Base
4
3
  include Enumerable
5
4