regexp_parser 1.7.0 → 2.8.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (165) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +364 -22
  3. data/Gemfile +8 -2
  4. data/LICENSE +1 -1
  5. data/README.md +124 -88
  6. data/Rakefile +6 -70
  7. data/lib/regexp_parser/error.rb +4 -0
  8. data/lib/regexp_parser/expression/base.rb +76 -0
  9. data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
  10. data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
  11. data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
  12. data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
  13. data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
  14. data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
  15. data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
  16. data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
  17. data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
  18. data/lib/regexp_parser/expression/classes/group.rb +28 -15
  19. data/lib/regexp_parser/expression/classes/keep.rb +2 -0
  20. data/lib/regexp_parser/expression/classes/literal.rb +1 -5
  21. data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
  22. data/lib/regexp_parser/expression/classes/root.rb +4 -19
  23. data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
  24. data/lib/regexp_parser/expression/methods/construct.rb +41 -0
  25. data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
  26. data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
  27. data/lib/regexp_parser/expression/methods/parts.rb +23 -0
  28. data/lib/regexp_parser/expression/methods/printing.rb +26 -0
  29. data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
  30. data/lib/regexp_parser/expression/methods/tests.rb +47 -1
  31. data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
  32. data/lib/regexp_parser/expression/quantifier.rb +57 -17
  33. data/lib/regexp_parser/expression/sequence.rb +11 -47
  34. data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
  35. data/lib/regexp_parser/expression/shared.rb +111 -0
  36. data/lib/regexp_parser/expression/subexpression.rb +27 -19
  37. data/lib/regexp_parser/expression.rb +14 -141
  38. data/lib/regexp_parser/lexer.rb +83 -41
  39. data/lib/regexp_parser/parser.rb +371 -429
  40. data/lib/regexp_parser/scanner/char_type.rl +11 -11
  41. data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
  42. data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
  43. data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
  44. data/lib/regexp_parser/scanner/properties/long.csv +633 -0
  45. data/lib/regexp_parser/scanner/properties/short.csv +248 -0
  46. data/lib/regexp_parser/scanner/property.rl +4 -4
  47. data/lib/regexp_parser/scanner/scanner.rl +295 -368
  48. data/lib/regexp_parser/scanner.rb +1405 -1674
  49. data/lib/regexp_parser/syntax/any.rb +2 -7
  50. data/lib/regexp_parser/syntax/base.rb +92 -67
  51. data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
  52. data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
  53. data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
  54. data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
  55. data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
  56. data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
  57. data/lib/regexp_parser/syntax/token/escape.rb +33 -0
  58. data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
  59. data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
  60. data/lib/regexp_parser/syntax/token/meta.rb +20 -0
  61. data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
  62. data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
  63. data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
  64. data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
  65. data/lib/regexp_parser/syntax/token.rb +45 -0
  66. data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
  67. data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
  68. data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
  69. data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
  70. data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
  71. data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
  72. data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
  73. data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
  74. data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
  75. data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
  76. data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
  77. data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
  78. data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
  79. data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
  80. data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
  81. data/lib/regexp_parser/syntax/versions.rb +3 -1
  82. data/lib/regexp_parser/syntax.rb +8 -6
  83. data/lib/regexp_parser/token.rb +9 -20
  84. data/lib/regexp_parser/version.rb +1 -1
  85. data/lib/regexp_parser.rb +0 -2
  86. data/regexp_parser.gemspec +20 -22
  87. metadata +49 -166
  88. data/lib/regexp_parser/scanner/properties/long.yml +0 -594
  89. data/lib/regexp_parser/scanner/properties/short.yml +0 -237
  90. data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
  91. data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
  92. data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
  93. data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
  94. data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
  95. data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
  96. data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
  97. data/lib/regexp_parser/syntax/tokens.rb +0 -45
  98. data/spec/expression/base_spec.rb +0 -94
  99. data/spec/expression/clone_spec.rb +0 -120
  100. data/spec/expression/conditional_spec.rb +0 -89
  101. data/spec/expression/free_space_spec.rb +0 -27
  102. data/spec/expression/methods/match_length_spec.rb +0 -161
  103. data/spec/expression/methods/match_spec.rb +0 -25
  104. data/spec/expression/methods/strfregexp_spec.rb +0 -224
  105. data/spec/expression/methods/tests_spec.rb +0 -99
  106. data/spec/expression/methods/traverse_spec.rb +0 -161
  107. data/spec/expression/options_spec.rb +0 -128
  108. data/spec/expression/root_spec.rb +0 -9
  109. data/spec/expression/sequence_spec.rb +0 -9
  110. data/spec/expression/subexpression_spec.rb +0 -50
  111. data/spec/expression/to_h_spec.rb +0 -26
  112. data/spec/expression/to_s_spec.rb +0 -100
  113. data/spec/lexer/all_spec.rb +0 -22
  114. data/spec/lexer/conditionals_spec.rb +0 -53
  115. data/spec/lexer/escapes_spec.rb +0 -14
  116. data/spec/lexer/keep_spec.rb +0 -10
  117. data/spec/lexer/literals_spec.rb +0 -89
  118. data/spec/lexer/nesting_spec.rb +0 -99
  119. data/spec/lexer/refcalls_spec.rb +0 -55
  120. data/spec/parser/all_spec.rb +0 -43
  121. data/spec/parser/alternation_spec.rb +0 -88
  122. data/spec/parser/anchors_spec.rb +0 -17
  123. data/spec/parser/conditionals_spec.rb +0 -179
  124. data/spec/parser/errors_spec.rb +0 -30
  125. data/spec/parser/escapes_spec.rb +0 -121
  126. data/spec/parser/free_space_spec.rb +0 -130
  127. data/spec/parser/groups_spec.rb +0 -108
  128. data/spec/parser/keep_spec.rb +0 -6
  129. data/spec/parser/posix_classes_spec.rb +0 -8
  130. data/spec/parser/properties_spec.rb +0 -115
  131. data/spec/parser/quantifiers_spec.rb +0 -51
  132. data/spec/parser/refcalls_spec.rb +0 -112
  133. data/spec/parser/set/intersections_spec.rb +0 -127
  134. data/spec/parser/set/ranges_spec.rb +0 -111
  135. data/spec/parser/sets_spec.rb +0 -178
  136. data/spec/parser/types_spec.rb +0 -18
  137. data/spec/scanner/all_spec.rb +0 -18
  138. data/spec/scanner/anchors_spec.rb +0 -21
  139. data/spec/scanner/conditionals_spec.rb +0 -128
  140. data/spec/scanner/errors_spec.rb +0 -68
  141. data/spec/scanner/escapes_spec.rb +0 -53
  142. data/spec/scanner/free_space_spec.rb +0 -133
  143. data/spec/scanner/groups_spec.rb +0 -52
  144. data/spec/scanner/keep_spec.rb +0 -10
  145. data/spec/scanner/literals_spec.rb +0 -49
  146. data/spec/scanner/meta_spec.rb +0 -18
  147. data/spec/scanner/properties_spec.rb +0 -64
  148. data/spec/scanner/quantifiers_spec.rb +0 -20
  149. data/spec/scanner/refcalls_spec.rb +0 -36
  150. data/spec/scanner/sets_spec.rb +0 -102
  151. data/spec/scanner/types_spec.rb +0 -14
  152. data/spec/spec_helper.rb +0 -15
  153. data/spec/support/runner.rb +0 -42
  154. data/spec/support/shared_examples.rb +0 -77
  155. data/spec/support/warning_extractor.rb +0 -60
  156. data/spec/syntax/syntax_spec.rb +0 -48
  157. data/spec/syntax/syntax_token_map_spec.rb +0 -23
  158. data/spec/syntax/versions/1.8.6_spec.rb +0 -17
  159. data/spec/syntax/versions/1.9.1_spec.rb +0 -10
  160. data/spec/syntax/versions/1.9.3_spec.rb +0 -9
  161. data/spec/syntax/versions/2.0.0_spec.rb +0 -13
  162. data/spec/syntax/versions/2.2.0_spec.rb +0 -9
  163. data/spec/syntax/versions/aliases_spec.rb +0 -37
  164. data/spec/token/token_spec.rb +0 -85
  165. /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: d7b93dde993f6fe427ff43755738bf7de50f8613cf6e8097c9d791646d803e4c
4
- data.tar.gz: 993a88720a4ee1d8a34f4c95e167089adc6455289bfeb356de8c028a9bbee63d
3
+ metadata.gz: e1426faee272654c45e3da8e262e94cfdbcf134dbad7804aed8cd945334c07be
4
+ data.tar.gz: 37eec721839fe2ebfc25c9d614756289b59ee766f5e7e60ecf4839b554bbb93e
5
5
  SHA512:
6
- metadata.gz: 0bf5c142591b2d5a65023c53f76a64a13106074050042d24614963cc14dabda197ea9140fccd93f26ad06885293369b076bb5e9198967a6e3762654df8033455
7
- data.tar.gz: 1311b3dfa90633ef456edc12abf6ace2d7311c7be8450f3768a436f9c8491c3a87987f3d6ac24c6966b6f4de5363e0f6f874bfe9e1b038a6cf5d9c043553b58e
6
+ metadata.gz: abed9d7f387634b5e16eb19cbfd5d9aab03288dd4d284b1c52688f958714479783275c5418ee623607ced96b301124ab82dff546e7e4146c7c5ec7feae3e089d
7
+ data.tar.gz: 62c0757df1c73df52fcf71ef8de666ab9a51a4a8145e71321424ab0ff8408cb2b707cf154dae64ebbcc5a9c8a12ee377a3eadab7549432a9d0e6ee0e65afddd1
data/CHANGELOG.md CHANGED
@@ -1,10 +1,364 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
1
8
  ## [Unreleased]
2
9
 
10
+ ## [2.8.1] - 2023-06-10 - [Janosch Müller](mailto:janosch84@gmail.com)
11
+
12
+ ### Fixed
13
+
14
+ - support for extpict unicode property, added in Ruby 2.6
15
+ - support for 10 unicode script/block properties added in Ruby 3.2
16
+
17
+ ## [2.8.0] - 2023-04-17 - [Janosch Müller](mailto:janosch84@gmail.com)
18
+
19
+ ### Added
20
+
21
+ - `Regexp::Expression::Shared#ends_at`
22
+ * e.g. `parse(/a +/x)[0].ends_at # => 3`
23
+ * e.g. `parse(/a +/x)[0].ends_at(include_quantifier = false) # => 1`
24
+ - `Regexp::Expression::Shared#{capturing?,comment?}`
25
+ * previously only available on capturing and comment groups
26
+ - `Regexp::Expression::Shared#{decorative?}`
27
+ * true for decorations: comment groups as well as comments and whitespace in x-mode
28
+ - `Regexp::Expression::Shared#parent`
29
+ - new format argument `:original` for `Regexp::Expression::Base#to_s`
30
+ * includes decorative elements between node and its quantifier
31
+ * e.g. `parse(/a (?#comment) +/x)[0].to_s(:original) # => "a (?#comment) +"`
32
+ * using it is not needed when calling `Root#to_s` as Root can't be quantified
33
+ - support calling `Subexpression#{each_expression,flat_map}` with a one-argument block
34
+ * in this case, only the expressions are passed to the block, no indices
35
+ - support calling test methods at Expression class level
36
+ - `capturing?`, `comment?`, `decorative?`, `referential?`, `terminal?`
37
+ - e.g. `Regexp::Expression::CharacterSet.terminal? # => false`
38
+
39
+ ### Fixed
40
+
41
+ - `Regexp::Expression::Shared#full_length` with whitespace before quantifier
42
+ * e.g. `parse(/a +/x)[0].full_length` used to yield `2`, now it yields `3`
43
+ - `Subexpression#to_s` output with children with whitespace before their quantifier
44
+ * e.g. `parse(/a + /x).to_s` used to yield `"a+ "`, now it yields `"a + "`
45
+ * calling `#to_s` on sub-nodes still omits such decorative interludes by default
46
+ - use new `#to_s` format `:original` to include it
47
+ - e.g. `parse(/a + /x)[0].to_s(:original) # => "a +"`
48
+ - fixed `Subexpression#te` behaving differently from other expressions
49
+ * only `Subexpression#te` used to include the quantifier
50
+ * now `#te` is the end index without quantifier, as for other expressions
51
+ - fixed `NoMethodError` when calling `#starts_at` or `#ts` on empty sequences
52
+ * e.g. `Regexp::Parser.parse(/|/)[0].starts_at`
53
+ * e.g. `Regexp::Parser.parse(/[&&]/)[0][0].starts_at`
54
+ - fixed nested comment groups breaking local x-options
55
+ * e.g. in `/(?x:(?#hello)) /`, the x-option wrongly applied to the whitespace
56
+ - fixed nested comment groups breaking conditionals
57
+ * e.g. in `/(a)(?(1)b|c(?#hello)d)e/`, the 2nd conditional branch included "e"
58
+ - fixed quantifiers after comment groups being mis-assigned to that group
59
+ * e.g. in `/a(?#foo){3}/` (matches 'aaa')
60
+ - fixed Scanner accepting two cases of invalid Regexp syntax
61
+ * unmatched closing parentheses (`)`) and k-backrefs with number 0 (`\k<0>`)
62
+ * these are a `SyntaxError` in Ruby, so could only be passed as a String
63
+ * they now raise a `Regexp::Scanner::ScannerError`
64
+ - fixed some scanner errors not inheriting from `Regexp::Scanner::ScannerError`
65
+ - reduced verbosity of inspect / pretty print output
66
+
67
+ ## [2.7.0] - 2023-02-08 - [Janosch Müller](mailto:janosch84@gmail.com)
68
+
69
+ ### Added
70
+
71
+ - `Regexp::Lexer.lex` now streams tokens when called with a block
72
+ * it can now take arbitrarily large input, just like `Regexp::Scanner`
73
+ * this also slightly improves `Regexp::Parser.parse` performance
74
+ * note: `Regexp::Parser.parse` still does not and will not support streaming
75
+ - improved performance of `Subexpression#each_expression`
76
+ - minor improvements to `Regexp::Scanner` performance
77
+ - overall improvement of parse performance: about 10% for large Regexps
78
+
79
+ ### Fixed
80
+
81
+ - parsing of octal escape sequences in sets, e.g. `[\141]`
82
+ * thanks to [Randy Stauner](https://github.com/rwstauner) for the report
83
+
84
+ ## [2.6.2] - 2023-01-19 - [Janosch Müller](mailto:janosch84@gmail.com)
85
+
86
+ ### Fixed
87
+
88
+ - fixed `SystemStackError` when cloning recursive subexpression calls
89
+ * e.g. `Regexp::Parser.parse(/a|b\g<0>/).dup`
90
+
91
+ ## [2.6.1] - 2022-11-16 - [Janosch Müller](mailto:janosch84@gmail.com)
92
+
93
+ ### Fixed
94
+
95
+ - fixed scanning of two negative lookbehind edge cases
96
+ * `(?<!x)y>` used to raise a ScannerError
97
+ * `(?<!x>)y` used to be misinterpreted as a named group
98
+ * thanks to [Sergio Medina](https://github.com/serch) for the report
99
+
100
+ ## [2.6.0] - 2022-09-26 - [Janosch Müller](mailto:janosch84@gmail.com)
101
+
102
+ ### Fixed
103
+
104
+ - fixed `#referenced_expression` for `\g<0>` (was `nil`, is now the `Root` exp)
105
+ - fixed `#reference`, `#referenced_expression` for recursion level backrefs
106
+ * e.g. `(a)(b)\k<-1+1>`
107
+ * `#referenced_expression` was `nil`, now it is the correct `Group` exp
108
+ - detect and raise for two more syntax errors when parsing String input
109
+ * quantification of option switches (e.g. `(?i)+`)
110
+ * invalid references (e.g. `/\k<1>/`)
111
+ * these are a `SyntaxError` in Ruby, so could only be passed as a String
112
+
113
+ ### Added
114
+
115
+ - `Regexp::Expression::Base#human_name`
116
+ * returns a nice, human-readable description of the expression
117
+ - `Regexp::Expression::Base#optional?`
118
+ * returns `true` if the expression is quantified accordingly (e.g. with `*`, `{,n}`)
119
+ - added a deprecation warning when calling `#to_re` on set members
120
+
121
+ ## [2.5.0] - 2022-05-27 - [Janosch Müller](mailto:janosch84@gmail.com)
122
+
123
+ ### Added
124
+
125
+ - `Regexp::Expression::Base.construct` and `.token_class` methods
126
+ * see the [wiki](https://github.com/ammar/regexp_parser/wiki) for details
127
+
128
+ ## [2.4.0] - 2022-05-09 - [Janosch Müller](mailto:janosch84@gmail.com)
129
+
130
+ ### Fixed
131
+
132
+ - fixed interpretation of `+` and `?` after interval quantifiers (`{n,n}`)
133
+ * they used to be treated as reluctant or possessive mode indicators
134
+ * however, Ruby does not support these modes for interval quantifiers
135
+ * they are now treated as chained quantifiers instead, as Ruby does it
136
+ * c.f. [#3](https://github.com/ammar/regexp_parser/issues/3)
137
+ - fixed `Expression::Base#nesting_level` for some tree rewrite cases
138
+ * e.g. the alternatives in `/a|[b]/` had an inconsistent nesting_level
139
+ - fixed `Scanner` accepting invalid posix classes, e.g. `[[:foo:]]`
140
+ * they raise a `SyntaxError` when used in a Regexp, so could only be passed as String
141
+ * they now raise a `Regexp::Scanner::ValidationError` in the `Scanner`
142
+
143
+ ### Added
144
+
145
+ - added `Expression::Base#==` for (deep) comparison of expressions
146
+ - added `Expression::Base#parts`
147
+ * returns the text elements and subexpressions of an expression
148
+ * e.g. `parse(/(a)/)[0].parts # => ["(", #<Literal @text="a"...>, ")"]`
149
+ - added `Expression::Base#te` (a.k.a. token end index)
150
+ * `Expression::Subexpression` always had `#te`, only terminal nodes lacked it so far
151
+ - made some `Expression::Base` methods available on `Quantifier` instances, too
152
+ * `#type`, `#type?`, `#is?`, `#one_of?`, `#options`, `#terminal?`
153
+ * `#base_length`, `#full_length`, `#starts_at`, `#te`, `#ts`, `#offset`
154
+ * `#conditional_level`, `#level`, `#nesting_level` , `#set_level`
155
+ * this allows a more unified handling with `Expression::Base` instances
156
+ - allowed `Quantifier#initialize` to take a token and options Hash like other nodes
157
+ - added a deprecation warning for initializing Quantifiers with 4+ arguments:
158
+
159
+ Calling `Expression::Base#quantify` or `Quantifier.new` with 4+ arguments
160
+ is deprecated.
161
+
162
+ It will no longer be supported in regexp_parser v3.0.0.
163
+
164
+ Please pass a Regexp::Token instead, e.g. replace `token, text, min, max, mode`
165
+ with `::Regexp::Token.new(:quantifier, token, text)`. min, max, and mode
166
+ will be derived automatically.
167
+
168
+ Or do `exp.quantifier = Quantifier.construct(token: token, text: str)`.
169
+
170
+ This is consistent with how Expression::Base instances are created.
171
+
172
+
173
+ ## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
174
+
175
+ ### Fixed
176
+
177
+ - removed five inexistent unicode properties from `Syntax#features`
178
+ * these were never supported by Ruby or the `Regexp::Scanner`
179
+ * thanks to [Markus Schirp](https://github.com/mbj) for the report
180
+
181
+ ## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
182
+
183
+ ### Added
184
+
185
+ - improved parsing performance through `Syntax` refactoring
186
+ * instead of fresh `Syntax` instances, pre-loaded constants are now re-used
187
+ * this approximately doubles the parsing speed for simple regexps
188
+ - added methods to `Syntax` classes to show relative feature sets
189
+ * e.g. `Regexp::Syntax::V3_2_0.added_features`
190
+ - support for new unicode properties of Ruby 3.2 / Unicode 14.0
191
+
192
+ ## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
193
+
194
+ ### Fixed
195
+
196
+ - fixed Syntax version of absence groups (`(?~...)`)
197
+ * the lexer accepted them for any Ruby version
198
+ * now they are only recognized for Ruby >= 2.4.1 in which they were introduced
199
+ - reduced gem size by excluding specs from package
200
+ - removed deprecated `test_files` gemspec setting
201
+ - no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
202
+ - no longer depend on `set`
203
+ * `set` was removed from the stdlib and made a standalone gem as of Ruby 3
204
+ * this made it a hidden/undeclared dependency of `regexp_parser`
205
+
206
+ ## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
207
+
208
+ ### Added
209
+
210
+ - added support for 13 new unicode properties introduced in Ruby 3.1.0
211
+
212
+ ## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
213
+
214
+ ### Fixed
215
+
216
+ - fixed `NameError` when requiring only `'regexp_parser/scanner'` in v2.1.0
217
+ * thanks to [Jared White and Sam Ruby](https://github.com/ruby2js/ruby2js) for the report
218
+
219
+ ## [2.1.0] - 2021-02-22 - [Janosch Müller](mailto:janosch84@gmail.com)
220
+
221
+ ### Added
222
+
223
+ - common ancestor for all scanning/parsing/lexing errors
224
+ * `Regexp::Parser::Error` can now be rescued as a catch-all
225
+ * the following errors (and their many descendants) now inherit from it:
226
+ - `Regexp::Expression::Conditional::TooManyBranches`
227
+ - `Regexp::Parser::ParserError`
228
+ - `Regexp::Scanner::ScannerError`
229
+ - `Regexp::Scanner::ValidationError`
230
+ - `Regexp::Syntax::SyntaxError`
231
+ * it replaces `ArgumentError` in some rare cases (`Regexp::Parser.parse('?')`)
232
+ * thanks to [sandstrom](https://github.com/sandstrom) for the cue
233
+
234
+ ### Fixed
235
+
236
+ - fixed scanning of whole-pattern recursion calls `\g<0>` and `\g'0'`
237
+ * a regression in v2.0.1 had caused them to be scanned as literals
238
+ - fixed scanning of some backreference and subexpression call edge cases
239
+ * e.g. `\k<+1>`, `\g<x-1>`
240
+ - fixed tokenization of some escapes in character sets
241
+ * `.`, `|`, `{`, `}`, `(`, `)`, `^`, `$`, `?`, `+`, `*`
242
+ * all of these correctly emitted `#type` `:literal` and `#token` `:literal` if *not* escaped
243
+ * if escaped, they emitted e.g. `#type` `:escape` and `#token` `:group_open` for `[\(]`
244
+ * the escaped versions now correctly emit `#type` `:escape` and `#token` `:literal`
245
+ - fixed handling of control/metacontrol escapes in character sets
246
+ * e.g. `[\cX]`, `[\M-\C-X]`
247
+ * they were misread as bunch of individual literals, escapes, and ranges
248
+ - fixed some cases where calling `#dup`/`#clone` on expressions led to shared state
249
+
250
+ ## [2.0.3] - 2020-12-28 - [Janosch Müller](mailto:janosch84@gmail.com)
251
+
252
+ ### Fixed
253
+
254
+ - fixed error when scanning some unlikely and redundant but valid charset patterns
255
+ * e.g. `/[[.a-b.]]/`, `/[[=e=]]/`,
256
+ - fixed ancestry of some error classes related to syntax version lookup
257
+ * `NotImplementedError`, `InvalidVersionNameError`, `UnknownSyntaxNameError`
258
+ * they now correctly inherit from `Regexp::Syntax::SyntaxError` instead of Rubys `::SyntaxError`
259
+
260
+ ## [2.0.2] - 2020-12-25 - [Janosch Müller](mailto:janosch84@gmail.com)
261
+
262
+ ### Fixed
263
+
264
+ - fixed `FrozenError` when calling `#to_s` on a frozen `Group::Passive`
265
+ * thanks to [Daniel Gollahon](https://github.com/dgollahon)
266
+
267
+ ## [2.0.1] - 2020-12-20 - [Janosch Müller](mailto:janosch84@gmail.com)
268
+
269
+ ### Fixed
270
+
271
+ - fixed error when scanning some group names
272
+ * this affected names containing hyphens, digits or multibyte chars, e.g. `/(?<a1>a)/`
273
+ * thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
274
+ - fixed error when scanning hex escapes with just one hex digit
275
+ * e.g. `/\x0A/` was scanned correctly, but the equivalent `/\xA/` was not
276
+ * thanks to [Daniel Gollahon](https://github.com/dgollahon) for the report
277
+
278
+ ## [2.0.0] - 2020-11-25 - [Janosch Müller](mailto:janosch84@gmail.com)
279
+
280
+ ### Changed
281
+
282
+ - some methods that used to return byte-based indices now return char-based indices
283
+ * the returned values have only changed for Regexps that contain multibyte chars
284
+ * this is only a breaking change if you used such methods directly AND relied on them pointing to bytes
285
+ * affected methods:
286
+ * `Regexp::Token` `#length`, `#offset`, `#te`, `#ts`
287
+ * `Regexp::Expression::Base` `#full_length`, `#offset`, `#starts_at`, `#te`, `#ts`
288
+ * thanks to [Akinori MUSHA](https://github.com/knu) for the report
289
+ - removed some deprecated methods/signatures
290
+ * these are rarely used and have been showing deprecation warnings for a long time
291
+ * `Regexp::Expression::Subexpression.new` with 3 arguments
292
+ * `Regexp::Expression::Root.new` without a token argument
293
+ * `Regexp::Expression.parsed`
294
+
295
+ ### Added
296
+
297
+ - `Regexp::Expression::Base#base_length`
298
+ * returns the character count of an expression body, ignoring any quantifier
299
+ - pragmatic, experimental support for chained quantifiers
300
+ * e.g.: `/^a{10}{4,6}$/` matches exactly 40, 50 or 60 `a`s
301
+ * successive quantifiers used to be silently dropped by the parser
302
+ * they are now wrapped with passive groups as if they were written `(?:a{10}){4,6}`
303
+ * thanks to [calfeld](https://github.com/calfeld) for reporting this a while back
304
+
305
+ ### Fixed
306
+
307
+ - incorrect encoding output for non-ascii comments
308
+ * this led to a crash when calling `#to_s` on parse results containing such comments
309
+ * thanks to [Michael Glass](https://github.com/michaelglass) for the report
310
+ - some crashes when scanning contrived patterns such as `'\😋'`
311
+
312
+ ### [1.8.2] - 2020-10-11 - [Janosch Müller](mailto:janosch84@gmail.com)
313
+
314
+ ### Fixed
315
+
316
+ - fix `FrozenError` in `Expression::Base#repetitions` on Ruby 3.0
317
+ * thanks to [Thomas Walpole](https://github.com/twalpole)
318
+ - removed "unknown future version" warning on Ruby 3.0
319
+
320
+ ### [1.8.1] - 2020-09-28 - [Janosch Müller](mailto:janosch84@gmail.com)
321
+
322
+ ### Fixed
323
+
324
+ - fixed scanning of comment-like text in normal mode
325
+ * this was an old bug, but had become more prevalent in v1.8.0
326
+ * thanks to [Tietew](https://github.com/Tietew) for the report
327
+ - specified correct minimum Ruby version in gemspec
328
+ * it said 1.9 but really required 2.0 as of v1.8.0
329
+
330
+ ### [1.8.0] - 2020-09-20 - [Janosch Müller](mailto:janosch84@gmail.com)
331
+
332
+ ### Changed
333
+
334
+ - dropped support for running on Ruby 1.9.x
335
+
336
+ ### Added
337
+
338
+ - regexp flags can now be passed when parsing a `String` as regexp body
339
+ * see the [README](/README.md#usage) for details
340
+ * thanks to [Owen Stephens](https://github.com/owst)
341
+ - bare occurrences of `\g` and `\k` are now allowed and scanned as literal escapes
342
+ * matches Onigmo behavior
343
+ * thanks for the report to [Marc-André Lafortune](https://github.com/marcandre)
344
+
345
+ ### Fixed
346
+
347
+ - fixed parsing comments without preceding space or trailing newline in x-mode
348
+ * thanks to [Owen Stephens](https://github.com/owst)
349
+
350
+ ### [1.7.1] - 2020-06-07 - [Ammar Ali](mailto:ammarabuali@gmail.com)
351
+
352
+ ### Fixed
353
+
354
+ - Support for literals that include the unescaped delimiters `{`, `}`, and `]`. These
355
+ delimiters are informally supported by various regexp engines.
356
+
3
357
  ### [1.7.0] - 2020-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
4
358
 
5
359
  ### Added
6
360
 
7
- - `Expression#each_expression` and `1.#traverse` can now be called without a block
361
+ - `Expression::Base#each_expression` and `#traverse` can now be called without a block
8
362
  * this returns an `Enumerator` and allows chaining, e.g. `each_expression.select`
9
363
  * thanks to [Masataka Kuwabara](https://github.com/pocke)
10
364
 
@@ -30,7 +384,7 @@
30
384
  - Fixed `Group#option_changes` not accounting for indirectly disabled (overridden) encoding flags
31
385
  - Fixed `Scanner` allowing negative encoding options if there were no positive options, e.g. '(?-u)'
32
386
  - Fixed `ScannerError` for some valid meta/control sequences such as '\\C-\\\\'
33
- - Fixed `Expression#match` and `#=~` not working with a single argument
387
+ - Fixed `Expression::Base#match` and `#=~` not working with a single argument
34
388
 
35
389
  ### [1.5.0] - 2019-05-14 - [Janosch Müller](mailto:janosch84@gmail.com)
36
390
 
@@ -38,15 +392,15 @@
38
392
 
39
393
  - Added `#referenced_expression` for backrefs, subexp calls and conditionals
40
394
  * returns the `Group` expression that is being referenced via name or number
41
- - Added `Expression#repetitions`
395
+ - Added `Expression::Base#repetitions`
42
396
  * returns a `Range` of allowed repetitions (`1..1` if there is no quantifier)
43
397
  * like `#quantity` but with a more uniform interface
44
- - Added `Expression#match_length`
398
+ - Added `Expression::Base#match_length`
45
399
  * allows to inspect and iterate over String lengths matched by the Expression
46
400
 
47
401
  ### Fixed
48
402
 
49
- - Fixed `Expression#clone` "direction"
403
+ - Fixed `Expression::Base#clone` "direction"
50
404
  * it used to dup ivars onto the callee, leaving only the clone referencing the original objects
51
405
  * this will affect you if you call `#eql?`/`#equal?` on expressions or use them as Hash keys
52
406
  - Fixed `#clone` results for `Sequences`, e.g. alternations and conditionals
@@ -85,8 +439,8 @@
85
439
 
86
440
  - Fixed missing quantifier in `Conditional::Expression` methods `#to_s`, `#to_re`
87
441
  - `Conditional::Condition` no longer lives outside the recursive `#expressions` tree
88
- - it used to be the only expression stored in a custom ivar, complicating traversal
89
- - its setter and getter (`#condition=`, `#condition`) still work as before
442
+ * it used to be the only expression stored in a custom ivar, complicating traversal
443
+ * its setter and getter (`#condition=`, `#condition`) still work as before
90
444
 
91
445
  ## [1.1.0] - 2018-09-17 - [Janosch Müller](mailto:janosch84@gmail.com)
92
446
 
@@ -94,8 +448,8 @@
94
448
 
95
449
  - Added `Quantifier` methods `#greedy?`, `#possessive?`, `#reluctant?`/`#lazy?`
96
450
  - Added `Group::Options#option_changes`
97
- - shows the options enabled or disabled by the given options group
98
- - as with all other expressions, `#options` shows the overall active options
451
+ * shows the options enabled or disabled by the given options group
452
+ * as with all other expressions, `#options` shows the overall active options
99
453
  - Added `Conditional#reference` and `Condition#reference`, indicating the determinative group
100
454
  - Added `Subexpression#dig`, acts like [`Array#dig`](http://ruby-doc.org/core-2.5.0/Array.html#method-i-dig)
101
455
 
@@ -208,7 +562,7 @@ This release includes several breaking changes, mostly to character sets, #map a
208
562
  - Fixed a thread safety issue (issue #45)
209
563
  - Some public class methods that were only reliable for
210
564
  internal use are now private instance methods (PR #46)
211
- - Improved the usefulness of Expression#options (issue #43) -
565
+ - Improved the usefulness of Expression::Base#options (issue #43) -
212
566
  #options and derived methods such as #i?, #m? and #x? are now
213
567
  defined for all Expressions that are affected by such flags.
214
568
  - Fixed scanning of whitespace following (?x) (commit 5c94bd2)
@@ -279,7 +633,6 @@ This release includes several breaking changes, mostly to character sets, #map a
279
633
  * Fixed scanning of zero length comments (PR #12)
280
634
  * Fixed missing escape:codepoint_list syntax token (PR #14)
281
635
  * Fixed to_s for modified interval quantifiers (PR #17)
282
- - Added a note about MRI implementation quirks to Scanner section
283
636
 
284
637
  ## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
285
638
 
@@ -305,7 +658,6 @@ This release includes several breaking changes, mostly to character sets, #map a
305
658
  - Renamed Lexer's method to lex, added an alias to the old name (scan)
306
659
  - Use #map instead of #each to run the block in Lexer.lex.
307
660
  - Replaced VERSION.yml file with a constant.
308
- - Updated README
309
661
  - Update tokens and scanner with new additions in Unicode 7.0.
310
662
 
311
663
  ## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
@@ -315,20 +667,11 @@ This release includes several breaking changes, mostly to character sets, #map a
315
667
  - Added syntax files for missing ruby 2.x versions. These do not add
316
668
  extra syntax support, they just make the gem work with the newer
317
669
  ruby versions.
318
- - Added .travis.yml to project root.
319
- - README:
320
- - Removed note purporting runtime support for ruby 1.8.6.
321
- - Added a section identifying the main unsupported syntax features.
322
- - Added sections for Testing and Building
323
- - Added badges for gem version, Travis CI, and code climate.
324
- - Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
325
670
  - Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
326
-
327
671
  - Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
328
672
 
329
673
  ## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
330
674
 
331
- - Correct ChangeLog.
332
675
  - Added syntax stubs for ruby versions 2.0 and 2.1
333
676
  - Added clone methods for deep copying expressions.
334
677
  - Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
@@ -337,7 +680,6 @@ This release includes several breaking changes, mostly to character sets, #map a
337
680
  - Improved EOF handling in general and especially from sequences like hex and control escapes.
338
681
  - Fixed a bug where named groups with an empty name would return a blank token [].
339
682
  - Fixed a bug where member of a parent set where being added to its last subset.
340
- - Various code cleanups in scanner.rl
341
683
  - Fixed a few mutable string bugs by calling dup on the originals.
342
684
  - Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
343
685
  - Removed look-behind assertions (positive and negative) from 1.8 syntax
data/Gemfile CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  group :development, :test do
6
+ gem 'leto', '~> 2.0'
6
7
  gem 'rake', '~> 13.0'
7
- gem 'regexp_property_values', '~> 1.0'
8
- gem 'rspec', '~> 3.8'
8
+ gem 'regexp_property_values', '~> 1.4'
9
+ gem 'rspec', '~> 3.10'
10
+ if RUBY_VERSION.to_f >= 2.7
11
+ gem 'benchmark-ips', '~> 2.1'
12
+ gem 'gouteur', '~> 1.1'
13
+ gem 'rubocop', '~> 1.7'
14
+ end
9
15
  end
data/LICENSE CHANGED
@@ -1,4 +1,4 @@
1
- Copyright (c) 2010, 2012-2015, Ammar Ali
1
+ Copyright (c) 2010, 2012-2023, Ammar Ali
2
2
 
3
3
  Permission is hereby granted, free of charge, to any person
4
4
  obtaining a copy of this software and associated documentation