regexp_parser 1.7.0 → 2.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +364 -22
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/README.md +124 -88
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +295 -368
- data/lib/regexp_parser/scanner.rb +1405 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +20 -22
- metadata +49 -166
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
|
4
|
+
data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
|
7
|
+
data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,364 @@
|
|
1
|
+
# Changelog
|
2
|
+
|
3
|
+
All notable changes to this project will be documented in this file.
|
4
|
+
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
7
|
+
|
1
8
|
## [Unreleased]
|
2
9
|
|
10
|
+
## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
|
11
|
+
|
12
|
+
### Fixed
|
13
|
+
|
14
|
+
- support for extpict unicode property, added in Ruby 2.6
|
15
|
+
- support for 10 unicode script/block properties added in Ruby 3.2
|
16
|
+
|
17
|
+
## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
18
|
+
|
19
|
+
### Added
|
20
|
+
|
21
|
+
- `Regexp::Expression::Shared#ends_at`
|
22
|
+
* e.g. `parse(/a +/x)[0].ends_at # => 3`
|
23
|
+
* e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
|
24
|
+
- `Regexp::Expression::Shared#{capturing?,comment?}`
|
25
|
+
* previously only available on capturing and comment groups
|
26
|
+
- `Regexp::Expression::Shared#{decorative?}`
|
27
|
+
* true for decorations: comment groups as well as comments and whitespace in x-mode
|
28
|
+
- `Regexp::Expression::Shared#parent`
|
29
|
+
- new format argument `:original` for `Regexp::Expression::Base#to_s`
|
30
|
+
* includes decorative elements between node and its quantifier
|
31
|
+
* e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
|
32
|
+
* using it is not needed when calling `Root#to_s` as Root can't be quantified
|
33
|
+
- support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
|
34
|
+
* in this case, only the expressions are passed to the block, no indices
|
35
|
+
- support calling test methods at Expression class level
|
36
|
+
- `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
|
37
|
+
- e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
|
38
|
+
|
39
|
+
### Fixed
|
40
|
+
|
41
|
+
- `Regexp::Expression::Shared#full_length` with whitespace before quantifier
|
42
|
+
* e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
|
43
|
+
- `Subexpression#to_s` output with children with whitespace before their quantifier
|
44
|
+
* e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
|
45
|
+
* calling `#to_s` on sub-nodes still omits such decorative interludes by default
|
46
|
+
- use new `#to_s` format `:original` to include it
|
47
|
+
- e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
|
48
|
+
- fixed `Subexpression#te` behaving differently from other expressions
|
49
|
+
* only `Subexpression#te` used to include the quantifier
|
50
|
+
* now `#te` is the end index without quantifier, as for other expressions
|
51
|
+
- fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
|
52
|
+
* e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
|
53
|
+
* e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
|
54
|
+
- fixed nested comment groups breaking local x-options
|
55
|
+
* e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
|
56
|
+
- fixed nested comment groups breaking conditionals
|
57
|
+
* e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
|
58
|
+
- fixed quantifiers after comment groups being mis-assigned to that group
|
59
|
+
* e.g. in `/a(?#foo){3}/` (matches 'aaa')
|
60
|
+
- fixed Scanner accepting two cases of invalid Regexp syntax
|
61
|
+
* unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
|
62
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
63
|
+
* they now raise a `Regexp::Scanner::ScannerError`
|
64
|
+
- fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
|
65
|
+
- reduced verbosity of inspect / pretty print output
|
66
|
+
|
67
|
+
## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
68
|
+
|
69
|
+
### Added
|
70
|
+
|
71
|
+
- `Regexp::Lexer.lex` now streams tokens when called with a block
|
72
|
+
* it can now take arbitrarily large input, just like `Regexp::Scanner`
|
73
|
+
* this also slightly improves `Regexp::Parser.parse` performance
|
74
|
+
* note: `Regexp::Parser.parse` still does not and will not support streaming
|
75
|
+
- improved performance of `Subexpression#each_expression`
|
76
|
+
- minor improvements to `Regexp::Scanner` performance
|
77
|
+
- overall improvement of parse performance: about 10% for large Regexps
|
78
|
+
|
79
|
+
### Fixed
|
80
|
+
|
81
|
+
- parsing of octal escape sequences in sets, e.g. `[\141]`
|
82
|
+
* thanks to [Randy Stauner](https://github.com/rwstauner) for the report
|
83
|
+
|
84
|
+
## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
|
85
|
+
|
86
|
+
### Fixed
|
87
|
+
|
88
|
+
- fixed `SystemStackError` when cloning recursive subexpression calls
|
89
|
+
* e.g. `Regexp::Parser.parse(/a|b\g<0>/).dup`
|
90
|
+
|
91
|
+
## [2.6.1] - 2022-11-16 - [Janosch Müller](mailto:janosch84@gmail.com)
|
92
|
+
|
93
|
+
### Fixed
|
94
|
+
|
95
|
+
- fixed scanning of two negative lookbehind edge cases
|
96
|
+
* `(?<!x)y>` used to raise a ScannerError
|
97
|
+
* `(?<!x>)y` used to be misinterpreted as a named group
|
98
|
+
* thanks to [Sergio Medina](https://github.com/serch) for the report
|
99
|
+
|
100
|
+
## [2.6.0] - 2022-09-26 - [Janosch Müller](mailto:janosch84@gmail.com)
|
101
|
+
|
102
|
+
### Fixed
|
103
|
+
|
104
|
+
- fixed `#referenced_expression` for `\g<0>` (was `nil`, is now the `Root` exp)
|
105
|
+
- fixed `#reference`, `#referenced_expression` for recursion level backrefs
|
106
|
+
* e.g. `(a)(b)\k<-1+1>`
|
107
|
+
* `#referenced_expression` was `nil`, now it is the correct `Group` exp
|
108
|
+
- detect and raise for two more syntax errors when parsing String input
|
109
|
+
* quantification of option switches (e.g. `(?i)+`)
|
110
|
+
* invalid references (e.g. `/\k<1>/`)
|
111
|
+
* these are a `SyntaxError` in Ruby, so could only be passed as a String
|
112
|
+
|
113
|
+
### Added
|
114
|
+
|
115
|
+
- `Regexp::Expression::Base#human_name`
|
116
|
+
* returns a nice, human-readable description of the expression
|
117
|
+
- `Regexp::Expression::Base#optional?`
|
118
|
+
* returns `true` if the expression is quantified accordingly (e.g. with `*`, `{,n}`)
|
119
|
+
- added a deprecation warning when calling `#to_re` on set members
|
120
|
+
|
121
|
+
## [2.5.0] - 2022-05-27 - [Janosch Müller](mailto:janosch84@gmail.com)
|
122
|
+
|
123
|
+
### Added
|
124
|
+
|
125
|
+
- `Regexp::Expression::Base.construct` and `.token_class` methods
|
126
|
+
* see the [wiki](https://github.com/ammar/regexp_parser/wiki) for details
|
127
|
+
|
128
|
+
## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
|
129
|
+
|
130
|
+
### Fixed
|
131
|
+
|
132
|
+
- fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
|
133
|
+
* they used to be treated as reluctant or possessive mode indicators
|
134
|
+
* however, Ruby does not support these modes for interval quantifiers
|
135
|
+
* they are now treated as chained quantifiers instead, as Ruby does it
|
136
|
+
* c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
|
137
|
+
- fixed `Expression::Base#nesting_level` for some tree rewrite cases
|
138
|
+
* e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
|
139
|
+
- fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
|
140
|
+
* they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
|
141
|
+
* they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
|
142
|
+
|
143
|
+
### Added
|
144
|
+
|
145
|
+
- added `Expression::Base#==` for (deep) comparison of expressions
|
146
|
+
- added `Expression::Base#parts`
|
147
|
+
* returns the text elements and subexpressions of an expression
|
148
|
+
* e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
|
149
|
+
- added `Expression::Base#te` (a.k.a. token end index)
|
150
|
+
* `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
|
151
|
+
- made some `Expression::Base` methods available on `Quantifier` instances, too
|
152
|
+
* `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
|
153
|
+
* `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
|
154
|
+
* `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
|
155
|
+
* this allows a more unified handling with `Expression::Base` instances
|
156
|
+
- allowed `Quantifier#initialize` to take a token and options Hash like other nodes
|
157
|
+
- added a deprecation warning for initializing Quantifiers with 4+ arguments:
|
158
|
+
|
159
|
+
Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
|
160
|
+
is deprecated.
|
161
|
+
|
162
|
+
It will no longer be supported in regexp_parser v3.0.0.
|
163
|
+
|
164
|
+
Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
|
165
|
+
with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
|
166
|
+
will be derived automatically.
|
167
|
+
|
168
|
+
Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
|
169
|
+
|
170
|
+
This is consistent with how Expression::Base instances are created.
|
171
|
+
|
172
|
+
|
173
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
174
|
+
|
175
|
+
### Fixed
|
176
|
+
|
177
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
178
|
+
* these were never supported by Ruby or the `Regexp::Scanner`
|
179
|
+
* thanks to [Markus Schirp](https://github.com/mbj) for the report
|
180
|
+
|
181
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
182
|
+
|
183
|
+
### Added
|
184
|
+
|
185
|
+
- improved parsing performance through `Syntax` refactoring
|
186
|
+
* instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
187
|
+
* this approximately doubles the parsing speed for simple regexps
|
188
|
+
- added methods to `Syntax` classes to show relative feature sets
|
189
|
+
* e.g. `Regexp::Syntax::V3_2_0.added_features`
|
190
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
191
|
+
|
192
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
193
|
+
|
194
|
+
### Fixed
|
195
|
+
|
196
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
197
|
+
* the lexer accepted them for any Ruby version
|
198
|
+
* now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
199
|
+
- reduced gem size by excluding specs from package
|
200
|
+
- removed deprecated `test_files` gemspec setting
|
201
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
202
|
+
- no longer depend on `set`
|
203
|
+
* `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
204
|
+
* this made it a hidden/undeclared dependency of `regexp_parser`
|
205
|
+
|
206
|
+
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
207
|
+
|
208
|
+
### Added
|
209
|
+
|
210
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
211
|
+
|
212
|
+
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
213
|
+
|
214
|
+
### Fixed
|
215
|
+
|
216
|
+
- fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
|
217
|
+
* thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
|
218
|
+
|
219
|
+
## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
|
220
|
+
|
221
|
+
### Added
|
222
|
+
|
223
|
+
- common ancestor for all scanning/parsing/lexing errors
|
224
|
+
* `Regexp::Parser::Error` can now be rescued as a catch-all
|
225
|
+
* the following errors (and their many descendants) now inherit from it:
|
226
|
+
- `Regexp::Expression::Conditional::TooManyBranches`
|
227
|
+
- `Regexp::Parser::ParserError`
|
228
|
+
- `Regexp::Scanner::ScannerError`
|
229
|
+
- `Regexp::Scanner::ValidationError`
|
230
|
+
- `Regexp::Syntax::SyntaxError`
|
231
|
+
* it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
|
232
|
+
* thanks to [sandstrom](https://github.com/sandstrom) for the cue
|
233
|
+
|
234
|
+
### Fixed
|
235
|
+
|
236
|
+
- fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
|
237
|
+
* a regression in v2.0.1 had caused them to be scanned as literals
|
238
|
+
- fixed scanning of some backreference and subexpression call edge cases
|
239
|
+
* e.g. `\k<+1>`, `\g<x-1>`
|
240
|
+
- fixed tokenization of some escapes in character sets
|
241
|
+
* `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
|
242
|
+
* all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
|
243
|
+
* if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
|
244
|
+
* the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
|
245
|
+
- fixed handling of control/metacontrol escapes in character sets
|
246
|
+
* e.g. `[\cX]`, `[\M-\C-X]`
|
247
|
+
* they were misread as bunch of individual literals, escapes, and ranges
|
248
|
+
- fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
|
249
|
+
|
250
|
+
## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
251
|
+
|
252
|
+
### Fixed
|
253
|
+
|
254
|
+
- fixed error when scanning some unlikely and redundant but valid charset patterns
|
255
|
+
* e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
|
256
|
+
- fixed ancestry of some error classes related to syntax version lookup
|
257
|
+
* `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
|
258
|
+
* they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
|
259
|
+
|
260
|
+
## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
261
|
+
|
262
|
+
### Fixed
|
263
|
+
|
264
|
+
- fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
|
265
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon)
|
266
|
+
|
267
|
+
## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
268
|
+
|
269
|
+
### Fixed
|
270
|
+
|
271
|
+
- fixed error when scanning some group names
|
272
|
+
* this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
|
273
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
274
|
+
- fixed error when scanning hex escapes with just one hex digit
|
275
|
+
* e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
|
276
|
+
* thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
|
277
|
+
|
278
|
+
## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
|
279
|
+
|
280
|
+
### Changed
|
281
|
+
|
282
|
+
- some methods that used to return byte-based indices now return char-based indices
|
283
|
+
* the returned values have only changed for Regexps that contain multibyte chars
|
284
|
+
* this is only a breaking change if you used such methods directly AND relied on them pointing to bytes
|
285
|
+
* affected methods:
|
286
|
+
* `Regexp::Token` `#length`, `#offset`, `#te`, `#ts`
|
287
|
+
* `Regexp::Expression::Base` `#full_length`, `#offset`, `#starts_at`, `#te`, `#ts`
|
288
|
+
* thanks to [Akinori MUSHA](https://github.com/knu) for the report
|
289
|
+
- removed some deprecated methods/signatures
|
290
|
+
* these are rarely used and have been showing deprecation warnings for a long time
|
291
|
+
* `Regexp::Expression::Subexpression.new` with 3 arguments
|
292
|
+
* `Regexp::Expression::Root.new` without a token argument
|
293
|
+
* `Regexp::Expression.parsed`
|
294
|
+
|
295
|
+
### Added
|
296
|
+
|
297
|
+
- `Regexp::Expression::Base#base_length`
|
298
|
+
* returns the character count of an expression body, ignoring any quantifier
|
299
|
+
- pragmatic, experimental support for chained quantifiers
|
300
|
+
* e.g.: `/^a{10}{4,6}$/` matches exactly 40, 50 or 60 `a`s
|
301
|
+
* successive quantifiers used to be silently dropped by the parser
|
302
|
+
* they are now wrapped with passive groups as if they were written `(?:a{10}){4,6}`
|
303
|
+
* thanks to [calfeld](https://github.com/calfeld) for reporting this a while back
|
304
|
+
|
305
|
+
### Fixed
|
306
|
+
|
307
|
+
- incorrect encoding output for non-ascii comments
|
308
|
+
* this led to a crash when calling `#to_s` on parse results containing such comments
|
309
|
+
* thanks to [Michael Glass](https://github.com/michaelglass) for the report
|
310
|
+
- some crashes when scanning contrived patterns such as `'\😋'`
|
311
|
+
|
312
|
+
### [1.8.2] - 2020-10-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
313
|
+
|
314
|
+
### Fixed
|
315
|
+
|
316
|
+
- fix `FrozenError` in `Expression::Base#repetitions` on Ruby 3.0
|
317
|
+
* thanks to [Thomas Walpole](https://github.com/twalpole)
|
318
|
+
- removed "unknown future version" warning on Ruby 3.0
|
319
|
+
|
320
|
+
### [1.8.1] - 2020-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
|
321
|
+
|
322
|
+
### Fixed
|
323
|
+
|
324
|
+
- fixed scanning of comment-like text in normal mode
|
325
|
+
* this was an old bug, but had become more prevalent in v1.8.0
|
326
|
+
* thanks to [Tietew](https://github.com/Tietew) for the report
|
327
|
+
- specified correct minimum Ruby version in gemspec
|
328
|
+
* it said 1.9 but really required 2.0 as of v1.8.0
|
329
|
+
|
330
|
+
### [1.8.0] - 2020-09-20 - [Janosch Müller](mailto:janosch84@gmail.com)
|
331
|
+
|
332
|
+
### Changed
|
333
|
+
|
334
|
+
- dropped support for running on Ruby 1.9.x
|
335
|
+
|
336
|
+
### Added
|
337
|
+
|
338
|
+
- regexp flags can now be passed when parsing a `String` as regexp body
|
339
|
+
* see the [README](/README.md#usage) for details
|
340
|
+
* thanks to [Owen Stephens](https://github.com/owst)
|
341
|
+
- bare occurrences of `\g` and `\k` are now allowed and scanned as literal escapes
|
342
|
+
* matches Onigmo behavior
|
343
|
+
* thanks for the report to [Marc-André Lafortune](https://github.com/marcandre)
|
344
|
+
|
345
|
+
### Fixed
|
346
|
+
|
347
|
+
- fixed parsing comments without preceding space or trailing newline in x-mode
|
348
|
+
* thanks to [Owen Stephens](https://github.com/owst)
|
349
|
+
|
350
|
+
### [1.7.1] - 2020-06-07 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
351
|
+
|
352
|
+
### Fixed
|
353
|
+
|
354
|
+
- Support for literals that include the unescaped delimiters `{`, `}`, and `]`. These
|
355
|
+
delimiters are informally supported by various regexp engines.
|
356
|
+
|
3
357
|
### [1.7.0] - 2020-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
358
|
|
5
359
|
### Added
|
6
360
|
|
7
|
-
- `Expression#each_expression` and
|
361
|
+
- `Expression::Base#each_expression` and `#traverse` can now be called without a block
|
8
362
|
* this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
|
9
363
|
* thanks to [Masataka Kuwabara](https://github.com/pocke)
|
10
364
|
|
@@ -30,7 +384,7 @@
|
|
30
384
|
- Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
|
31
385
|
- Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
|
32
386
|
- Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
|
33
|
-
- Fixed `Expression#match` and `#=~` not working with a single argument
|
387
|
+
- Fixed `Expression::Base#match` and `#=~` not working with a single argument
|
34
388
|
|
35
389
|
### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
|
36
390
|
|
@@ -38,15 +392,15 @@
|
|
38
392
|
|
39
393
|
- Added `#referenced_expression` for backrefs, subexp calls and conditionals
|
40
394
|
* returns the `Group` expression that is being referenced via name or number
|
41
|
-
- Added `Expression#repetitions`
|
395
|
+
- Added `Expression::Base#repetitions`
|
42
396
|
* returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
|
43
397
|
* like `#quantity` but with a more uniform interface
|
44
|
-
- Added `Expression#match_length`
|
398
|
+
- Added `Expression::Base#match_length`
|
45
399
|
* allows to inspect and iterate over String lengths matched by the Expression
|
46
400
|
|
47
401
|
### Fixed
|
48
402
|
|
49
|
-
- Fixed `Expression#clone` "direction"
|
403
|
+
- Fixed `Expression::Base#clone` "direction"
|
50
404
|
* it used to dup ivars onto the callee, leaving only the clone referencing the original objects
|
51
405
|
* this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
|
52
406
|
- Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
|
@@ -85,8 +439,8 @@
|
|
85
439
|
|
86
440
|
- Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
|
87
441
|
- `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
|
88
|
-
|
89
|
-
|
442
|
+
* it used to be the only expression stored in a custom ivar, complicating traversal
|
443
|
+
* its setter and getter (`#condition=`, `#condition`) still work as before
|
90
444
|
|
91
445
|
## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
|
92
446
|
|
@@ -94,8 +448,8 @@
|
|
94
448
|
|
95
449
|
- Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
|
96
450
|
- Added `Group::Options#option_changes`
|
97
|
-
|
98
|
-
|
451
|
+
* shows the options enabled or disabled by the given options group
|
452
|
+
* as with all other expressions, `#options` shows the overall active options
|
99
453
|
- Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
|
100
454
|
- Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
|
101
455
|
|
@@ -208,7 +562,7 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
208
562
|
- Fixed a thread safety issue (issue #45)
|
209
563
|
- Some public class methods that were only reliable for
|
210
564
|
internal use are now private instance methods (PR #46)
|
211
|
-
- Improved the usefulness of Expression#options (issue #43) -
|
565
|
+
- Improved the usefulness of Expression::Base#options (issue #43) -
|
212
566
|
#options and derived methods such as #i?, #m? and #x? are now
|
213
567
|
defined for all Expressions that are affected by such flags.
|
214
568
|
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
@@ -279,7 +633,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
279
633
|
* Fixed scanning of zero length comments (PR #12)
|
280
634
|
* Fixed missing escape:codepoint_list syntax token (PR #14)
|
281
635
|
* Fixed to_s for modified interval quantifiers (PR #17)
|
282
|
-
- Added a note about MRI implementation quirks to Scanner section
|
283
636
|
|
284
637
|
## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
285
638
|
|
@@ -305,7 +658,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
305
658
|
- Renamed Lexer's method to lex, added an alias to the old name (scan)
|
306
659
|
- Use #map instead of #each to run the block in Lexer.lex.
|
307
660
|
- Replaced VERSION.yml file with a constant.
|
308
|
-
- Updated README
|
309
661
|
- Update tokens and scanner with new additions in Unicode 7.0.
|
310
662
|
|
311
663
|
## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
@@ -315,20 +667,11 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
315
667
|
- Added syntax files for missing ruby 2.x versions. These do not add
|
316
668
|
extra syntax support, they just make the gem work with the newer
|
317
669
|
ruby versions.
|
318
|
-
- Added .travis.yml to project root.
|
319
|
-
- README:
|
320
|
-
- Removed note purporting runtime support for ruby 1.8.6.
|
321
|
-
- Added a section identifying the main unsupported syntax features.
|
322
|
-
- Added sections for Testing and Building
|
323
|
-
- Added badges for gem version, Travis CI, and code climate.
|
324
|
-
- Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
|
325
670
|
- Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
|
326
|
-
|
327
671
|
- Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
|
328
672
|
|
329
673
|
## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
330
674
|
|
331
|
-
- Correct ChangeLog.
|
332
675
|
- Added syntax stubs for ruby versions 2.0 and 2.1
|
333
676
|
- Added clone methods for deep copying expressions.
|
334
677
|
- Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
|
@@ -337,7 +680,6 @@ This release includes several breaking changes, mostly to character sets, #map a
|
|
337
680
|
- Improved EOF handling in general and especially from sequences like hex and control escapes.
|
338
681
|
- Fixed a bug where named groups with an empty name would return a blank token [].
|
339
682
|
- Fixed a bug where member of a parent set where being added to its last subset.
|
340
|
-
- Various code cleanups in scanner.rl
|
341
683
|
- Fixed a few mutable string bugs by calling dup on the originals.
|
342
684
|
- Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
|
343
685
|
- Removed look-behind assertions (positive and negative) from 1.8 syntax
|
data/Gemfile
CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
+
gem 'leto', '~> 2.0'
|
6
7
|
gem 'rake', '~> 13.0'
|
7
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
-
gem 'rspec', '~> 3.
|
8
|
+
gem 'regexp_property_values', '~> 1.4'
|
9
|
+
gem 'rspec', '~> 3.10'
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.7'
|
14
|
+
end
|
9
15
|
end
|
data/LICENSE
CHANGED