regexp_parser 1.7.0 → 2.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +364 -22
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/README.md +124 -88
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +295 -368
- data/lib/regexp_parser/scanner.rb +1405 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +49 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
|
4
|
+
data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
|
7
|
+
data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,364 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
|
+
|
1
8
|
## [Unreleased]
|
2
9
|
|
10
|
+
## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- support for extpict unicode property, added in Ruby 2.6
|
15
|
+
- support for 10 unicode script/block properties added in Ruby 3.2
|
16
|
+
|
17
|
+
## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
18
|
+
|
19
|
+
### Added
|
20
|
+
|
21
|
+
- `Regexp::Expression::Shared#ends_at`
|
22
|
+
* e.g. `parse(/a +/x)[0].ends_at # => 3`
|
23
|
+
* e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
|
24
|
+
- `Regexp::Expression::Shared#{capturing?,comment?}`
|
25
|
+
* previously only available on capturing and comment groups
|
26
|
+
- `Regexp::Expression::Shared#{decorative?}`
|
27
|
+
* true for decorations: comment groups as well as comments and whitespace in x-mode
|
28
|
+
- `Regexp::Expression::Shared#parent`
|
29
|
+
- new format argument `:original` for `Regexp::Expression::Base#to_s`
|
30
|
+
* includes decorative elements between node and its quantifier
|
31
|
+
* e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
|
32
|
+
* using it is not needed when calling `Root#to_s` as Root can't be quantified
|
33
|
+
- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
|
34
|
+
* in this case, only the expressions are passed to the block, no indices
|
35
|
+
- support calling test methods at Expression class level
|
36
|
+
- `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
|
37
|
+
- e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
|
38
|
+
|
39
|
+
### Fixed
|
40
|
+
|
41
|
+
- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
|
42
|
+
* e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
|
43
|
+
- `Subexpression#to_s` output with children with whitespace before their quantifier
|
44
|
+
* e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
|
45
|
+
* calling `#to_s` on sub-nodes still omits such decorative interludes by default
|
46
|
+
- use new `#to_s` format `:original` to include it
|
47
|
+
- e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
|
48
|
+
- fixed `Subexpression#te` behaving differently from other expressions
|
49
|
+
* only `Subexpression#te` used to include the quantifier
|
50
|
+
* now `#te` is the end index without quantifier, as for other expressions
|
51
|
+
- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
|
52
|
+
* e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
|
53
|
+
* e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
|
54
|
+
- fixed nested comment groups breaking local x-options
|
55
|
+
* e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
|
56
|
+
- fixed nested comment groups breaking conditionals
|
57
|
+
* e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
|
58
|
+
- fixed quantifiers after comment groups being mis-assigned to that group
|
59
|
+
* e.g. in `/a(?#foo){3}/` (matches 'aaa')
|
60
|
+
- fixed Scanner accepting two cases of invalid Regexp syntax
|
61
|
+
* unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
|
62
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
63
|
+
* they now raise a `Regexp::Scanner::ScannerError`
|
64
|
+
- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
|
65
|
+
- reduced verbosity of inspect / pretty print output
|
66
|
+
|
67
|
+
## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
68
|
+
|
69
|
+
### Added
|
70
|
+
|
71
|
+
- `Regexp::Lexer.lex` now streams tokens when called with a block
|
72
|
+
* it can now take arbitrarily large input, just like `Regexp::Scanner`
|
73
|
+
* this also slightly improves `Regexp::Parser.parse` performance
|
74
|
+
* note: `Regexp::Parser.parse` still does not and will not support streaming
|
75
|
+
- improved performance of `Subexpression#each_expression`
|
76
|
+
- minor improvements to `Regexp::Scanner` performance
|
77
|
+
- overall improvement of parse performance: about 10% for large Regexps
|
78
|
+
|
79
|
+
### Fixed
|
80
|
+
|
81
|
+
- parsing of octal escape sequences in sets, e.g. `[\141]`
|
82
|
+
* thanks to [Randy Stauner](https://github.com/rwstauner) for the report
|
83
|
+
|
84
|
+
## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
|
85
|
+
|
86
|
+
### Fixed
|
87
|
+
|
88
|
+
- fixed `SystemStackError` when cloning recursive subexpression calls
|
89
|
+
* e.g. `Regexp::Parser.parse(/a|b\g<0>/).dup`
|
90
|
+
|
91
|
+
## [2.6.1] - 2022-11-16 - [Janosch Müller](mailto:janosch84@gmail.com)
|
92
|
+
|
93
|
+
### Fixed
|
94
|
+
|
95
|
+
- fixed scanning of two negative lookbehind edge cases
|
96
|
+
* `(?<!x)y>` used to raise a ScannerError
|
97
|
+
* `(?<!x>)y` used to be misinterpreted as a named group
|
98
|
+
* thanks to [Sergio Medina](https://github.com/serch) for the report
|
99
|
+
|
100
|
+
## [2.6.0] - 2022-09-26 - [Janosch Müller](mailto:janosch84@gmail.com)
|
101
|
+
|
102
|
+
### Fixed
|
103
|
+
|
104
|
+
- fixed `#referenced_expression` for `\g<0>` (was `nil`, is now the `Root` exp)
|
105
|
+
- fixed `#reference`, `#referenced_expression` for recursion level backrefs
|
106
|
+
* e.g. `(a)(b)\k<-1+1>`
|
107
|
+
* `#referenced_expression` was `nil`, now it is the correct `Group` exp
|
108
|
+
- detect and raise for two more syntax errors when parsing String input
|
109
|
+
* quantification of option switches (e.g. `(?i)+`)
|
110
|
+
* invalid references (e.g. `/\k<1>/`)
|
111
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
112
|
+
|
113
|
+
### Added
|
114
|
+
|
115
|
+
- `Regexp::Expression::Base#human_name`
|
116
|
+
* returns a nice, human-readable description of the expression
|
117
|
+
- `Regexp::Expression::Base#optional?`
|
118
|
+
* returns `true` if the expression is quantified accordingly (e.g. with `*`, `{,n}`)
|
119
|
+
- added a deprecation warning when calling `#to_re` on set members
|
120
|
+
|
121
|
+
## [2.5.0] - 2022-05-27 - [Janosch Müller](mailto:janosch84@gmail.com)
|
122
|
+
|
123
|
+
### Added
|
124
|
+
|
125
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
126
|
+
* see the [wiki](https://github.com/ammar/regexp_parser/wiki) for details
|
127
|
+
|
128
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
129
|
+
|
130
|
+
### Fixed
|
131
|
+
|
132
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
133
|
+
* they used to be treated as reluctant or possessive mode indicators
|
134
|
+
* however, Ruby does not support these modes for interval quantifiers
|
135
|
+
* they are now treated as chained quantifiers instead, as Ruby does it
|
136
|
+
* c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
137
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
138
|
+
* e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
139
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
140
|
+
* they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
141
|
+
* they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
142
|
+
|
143
|
+
### Added
|
144
|
+
|
145
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
146
|
+
- added `Expression::Base#parts`
|
147
|
+
* returns the text elements and subexpressions of an expression
|
148
|
+
* e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
149
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
150
|
+
* `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
151
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
152
|
+
* `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
153
|
+
* `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
154
|
+
* `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
155
|
+
* this allows a more unified handling with `Expression::Base` instances
|
156
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
157
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
158
|
+
|
159
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
160
|
+
is deprecated.
|
161
|
+
|
162
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
163
|
+
|
164
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
165
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
166
|
+
will be derived automatically.
|
167
|
+
|
168
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
169
|
+
|
170
|
+
This is consistent with how Expression::Base instances are created.
|
171
|
+
|
172
|
+
|
173
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
174
|
+
|
175
|
+
### Fixed
|
176
|
+
|
177
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
178
|
+
* these were never supported by Ruby or the `Regexp::Scanner`
|
179
|
+
* thanks to [Markus Schirp](https://github.com/mbj) for the report
|
180
|
+
|
181
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
182
|
+
|
183
|
+
### Added
|
184
|
+
|
185
|
+
- improved parsing performance through `Syntax` refactoring
|
186
|
+
* instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
187
|
+
* this approximately doubles the parsing speed for simple regexps
|
188
|
+
- added methods to `Syntax` classes to show relative feature sets
|
189
|
+
* e.g. `Regexp::Syntax::V3_2_0.added_features`
|
190
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
191
|
+
|
192
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
193
|
+
|
194
|
+
### Fixed
|
195
|
+
|
196
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
197
|
+
* the lexer accepted them for any Ruby version
|
198
|
+
* now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
199
|
+
- reduced gem size by excluding specs from package
|
200
|
+
- removed deprecated `test_files` gemspec setting
|
201
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
202
|
+
- no longer depend on `set`
|
203
|
+
* `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
204
|
+
* this made it a hidden/undeclared dependency of `regexp_parser`
|
205
|
+
|
206
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
207
|
+
|
208
|
+
### Added
|
209
|
+
|
210
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
211
|
+
|
212
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
213
|
+
|
214
|
+
### Fixed
|
215
|
+
|
216
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
217
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
218
|
+
|
219
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
220
|
+
|
221
|
+
### Added
|
222
|
+
|
223
|
+
- common ancestor for all scanning/parsing/lexing errors
|
224
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
225
|
+
* the following errors (and their many descendants) now inherit from it:
|
226
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
227
|
+
- `Regexp::Parser::ParserError`
|
228
|
+
- `Regexp::Scanner::ScannerError`
|
229
|
+
- `Regexp::Scanner::ValidationError`
|
230
|
+
- `Regexp::Syntax::SyntaxError`
|
231
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
232
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
233
|
+
|
234
|
+
### Fixed
|
235
|
+
|
236
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
237
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
238
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
239
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
240
|
+
- fixed tokenization of some escapes in character sets
|
241
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
242
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
243
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
244
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
245
|
+
- fixed handling of control/metacontrol escapes in character sets
|
246
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
247
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
248
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
249
|
+
|
250
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
251
|
+
|
252
|
+
### Fixed
|
253
|
+
|
254
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
255
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
256
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
257
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
258
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
259
|
+
|
260
|
+
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
261
|
+
|
262
|
+
### Fixed
|
263
|
+
|
264
|
+
- fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
|
265
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon)
|
266
|
+
|
267
|
+
## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
268
|
+
|
269
|
+
### Fixed
|
270
|
+
|
271
|
+
- fixed error when scanning some group names
|
272
|
+
* this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
|
273
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
274
|
+
- fixed error when scanning hex escapes with just one hex digit
|
275
|
+
* e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
|
276
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
277
|
+
|
278
|
+
## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
279
|
+
|
280
|
+
### Changed
|
281
|
+
|
282
|
+
- some methods that used to return byte-based indices now return char-based indices
|
283
|
+
* the returned values have only changed for Regexps that contain multibyte chars
|
284
|
+
* this is only a breaking change if you used such methods directly AND relied on them pointing to bytes
|
285
|
+
* affected methods:
|
286
|
+
* `Regexp::Token` `#length`, `#offset`, `#te`, `#ts`
|
287
|
+
* `Regexp::Expression::Base` `#full_length`, `#offset`, `#starts_at`, `#te`, `#ts`
|
288
|
+
* thanks to [Akinori MUSHA](https://github.com/knu) for the report
|
289
|
+
- removed some deprecated methods/signatures
|
290
|
+
* these are rarely used and have been showing deprecation warnings for a long time
|
291
|
+
* `Regexp::Expression::Subexpression.new` with 3 arguments
|
292
|
+
* `Regexp::Expression::Root.new` without a token argument
|
293
|
+
* `Regexp::Expression.parsed`
|
294
|
+
|
295
|
+
### Added
|
296
|
+
|
297
|
+
- `Regexp::Expression::Base#base_length`
|
298
|
+
* returns the character count of an expression body, ignoring any quantifier
|
299
|
+
- pragmatic, experimental support for chained quantifiers
|
300
|
+
* e.g.: `/^a{10}{4,6}$/` matches exactly 40, 50 or 60 `a`s
|
301
|
+
* successive quantifiers used to be silently dropped by the parser
|
302
|
+
* they are now wrapped with passive groups as if they were written `(?:a{10}){4,6}`
|
303
|
+
* thanks to [calfeld](https://github.com/calfeld) for reporting this a while back
|
304
|
+
|
305
|
+
### Fixed
|
306
|
+
|
307
|
+
- incorrect encoding output for non-ascii comments
|
308
|
+
* this led to a crash when calling `#to_s` on parse results containing such comments
|
309
|
+
* thanks to [Michael Glass](https://github.com/michaelglass) for the report
|
310
|
+
- some crashes when scanning contrived patterns such as `'\😋'`
|
311
|
+
|
312
|
+
### [1.8.2] - 2020-10-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
313
|
+
|
314
|
+
### Fixed
|
315
|
+
|
316
|
+
- fix `FrozenError` in `Expression::Base#repetitions` on Ruby 3.0
|
317
|
+
* thanks to [Thomas Walpole](https://github.com/twalpole)
|
318
|
+
- removed "unknown future version" warning on Ruby 3.0
|
319
|
+
|
320
|
+
### [1.8.1] - 2020-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
321
|
+
|
322
|
+
### Fixed
|
323
|
+
|
324
|
+
- fixed scanning of comment-like text in normal mode
|
325
|
+
* this was an old bug, but had become more prevalent in v1.8.0
|
326
|
+
* thanks to [Tietew](https://github.com/Tietew) for the report
|
327
|
+
- specified correct minimum Ruby version in gemspec
|
328
|
+
* it said 1.9 but really required 2.0 as of v1.8.0
|
329
|
+
|
330
|
+
### [1.8.0] - 2020-09-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
331
|
+
|
332
|
+
### Changed
|
333
|
+
|
334
|
+
- dropped support for running on Ruby 1.9.x
|
335
|
+
|
336
|
+
### Added
|
337
|
+
|
338
|
+
- regexp flags can now be passed when parsing a `String` as regexp body
|
339
|
+
* see the [README](/README.md#usage) for details
|
340
|
+
* thanks to [Owen Stephens](https://github.com/owst)
|
341
|
+
- bare occurrences of `\g` and `\k` are now allowed and scanned as literal escapes
|
342
|
+
* matches Onigmo behavior
|
343
|
+
* thanks for the report to [Marc-André Lafortune](https://github.com/marcandre)
|
344
|
+
|
345
|
+
### Fixed
|
346
|
+
|
347
|
+
- fixed parsing comments without preceding space or trailing newline in x-mode
|
348
|
+
* thanks to [Owen Stephens](https://github.com/owst)
|
349
|
+
|
350
|
+
### [1.7.1] - 2020-06-07 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
351
|
+
|
352
|
+
### Fixed
|
353
|
+
|
354
|
+
- Support for literals that include the unescaped delimiters `{`, `}`, and `]`. These
|
355
|
+
delimiters are informally supported by various regexp engines.
|
356
|
+
|
3
357
|
### [1.7.0] - 2020-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
358
|
|
5
359
|
### Added
|
6
360
|
|
7
|
-
- `Expression#each_expression` and
|
361
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
8
362
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
9
363
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
10
364
|
|
@@ -30,7 +384,7 @@
|
|
30
384
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
31
385
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
32
386
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
33
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
387
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
34
388
|
|
35
389
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
36
390
|
|
@@ -38,15 +392,15 @@
|
|
38
392
|
|
39
393
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
40
394
|
* returns the `Group` expression that is being referenced via name or number
|
41
|
-
- Added `Expression#repetitions`
|
395
|
+
- Added `Expression::Base#repetitions`
|
42
396
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
43
397
|
* like `#quantity` but with a more uniform interface
|
44
|
-
- Added `Expression#match_length`
|
398
|
+
- Added `Expression::Base#match_length`
|
45
399
|
* allows to inspect and iterate over String lengths matched by the Expression
|
46
400
|
|
47
401
|
### Fixed
|
48
402
|
|
49
|
-
- Fixed `Expression#clone` "direction"
|
403
|
+
- Fixed `Expression::Base#clone` "direction"
|
50
404
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
51
405
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
52
406
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -85,8 +439,8 @@
|
|
85
439
|
|
86
440
|
- Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
|
87
441
|
- `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
|
88
|
-
|
89
|
-
|
442
|
+
* it used to be the only expression stored in a custom ivar, complicating traversal
|
443
|
+
* its setter and getter (`#condition=`, `#condition`) still work as before
|
90
444
|
|
91
445
|
## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
92
446
|
|
@@ -94,8 +448,8 @@
|
|
94
448
|
|
95
449
|
- Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
|
96
450
|
- Added `Group::Options#option_changes`
|
97
|
-
|
98
|
-
|
451
|
+
* shows the options enabled or disabled by the given options group
|
452
|
+
* as with all other expressions, `#options` shows the overall active options
|
99
453
|
- Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
|
100
454
|
- Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
|
101
455
|
|
@@ -208,7 +562,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
208
562
|
- Fixed a thread safety issue (issue #45)
|
209
563
|
- Some public class methods that were only reliable for
|
210
564
|
internal use are now private instance methods (PR #46)
|
211
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
565
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
212
566
|
#options and derived methods such as #i?, #m? and #x? are now
|
213
567
|
defined for all Expressions that are affected by such flags.
|
214
568
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
@@ -279,7 +633,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
279
633
|
* Fixed scanning of zero length comments (PR #12)
|
280
634
|
* Fixed missing escape:codepoint_list syntax token (PR #14)
|
281
635
|
* Fixed to_s for modified interval quantifiers (PR #17)
|
282
|
-
- Added a note about MRI implementation quirks to Scanner section
|
283
636
|
|
284
637
|
## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
285
638
|
|
@@ -305,7 +658,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
305
658
|
- Renamed Lexer's method to lex, added an alias to the old name (scan)
|
306
659
|
- Use #map instead of #each to run the block in Lexer.lex.
|
307
660
|
- Replaced VERSION.yml file with a constant.
|
308
|
-
- Updated README
|
309
661
|
- Update tokens and scanner with new additions in Unicode 7.0.
|
310
662
|
|
311
663
|
## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
@@ -315,20 +667,11 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
315
667
|
- Added syntax files for missing ruby 2.x versions. These do not add
|
316
668
|
extra syntax support, they just make the gem work with the newer
|
317
669
|
ruby versions.
|
318
|
-
- Added .travis.yml to project root.
|
319
|
-
- README:
|
320
|
-
- Removed note purporting runtime support for ruby 1.8.6.
|
321
|
-
- Added a section identifying the main unsupported syntax features.
|
322
|
-
- Added sections for Testing and Building
|
323
|
-
- Added badges for gem version, Travis CI, and code climate.
|
324
|
-
- Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
|
325
670
|
- Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
|
326
|
-
|
327
671
|
- Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
|
328
672
|
|
329
673
|
## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
330
674
|
|
331
|
-
- Correct ChangeLog.
|
332
675
|
- Added syntax stubs for ruby versions 2.0 and 2.1
|
333
676
|
- Added clone methods for deep copying expressions.
|
334
677
|
- Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
|
@@ -337,7 +680,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
337
680
|
- Improved EOF handling in general and especially from sequences like hex and control escapes.
|
338
681
|
- Fixed a bug where named groups with an empty name would return a blank token [].
|
339
682
|
- Fixed a bug where member of a parent set where being added to its last subset.
|
340
|
-
- Various code cleanups in scanner.rl
|
341
683
|
- Fixed a few mutable string bugs by calling dup on the originals.
|
342
684
|
- Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
|
343
685
|
- Removed look-behind assertions (positive and negative) from 1.8 syntax
|
data/Gemfile
CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
+
gem 'leto', '~> 2.0'
|
6
7
|
gem 'rake', '~> 13.0'
|
7
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
-
gem 'rspec', '~> 3.
|
8
|
+
gem 'regexp_property_values', '~> 1.4'
|
9
|
+
gem 'rspec', '~> 3.10'
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.7'
|
14
|
+
end
|
9
15
|
end
|
data/LICENSE
CHANGED