regexp_parser 0.5.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a48ea55ff32a9a023b9f07c13590acbafa412fbb4e8289f4ac8b825c37f9dc5
|
4
|
+
data.tar.gz: 5fd466646d0e9cfc22cae4f4d4dce1e2647d066d49ad230d6f09a98f790c2be6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1b26d0a1431ebd9a00423a98f58abfd5098e5f625b075270b13ed34c73e9b0bc74cdb6fb5b50ca84f6aad2786df86d2128ace3a7e645571a11656580ca64dfe
|
7
|
+
data.tar.gz: 1536cf6aaa222823fc5319a03aa67a7cc157d60a47ce41e70384d86a986c42de2f252a0c9ca7088b87cb1dd4d89a666d0d7080bf5e9d18e0c3e54b20aa9f5606
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
## [Unreleased]
|
2
|
+
|
3
|
+
This release includes several breaking changes, mostly to character sets, #map and properties.
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
|
7
|
+
- Changed handling of sets (a.k.a. character classes or "bracket expressions")
|
8
|
+
* see PR #55 / issue #47 for details
|
9
|
+
* sets are now parsed to expression trees like other nestable expressions
|
10
|
+
* #scan now emits the same tokens as outside sets (no longer :set, :member)
|
11
|
+
* CharacterSet#members has been removed
|
12
|
+
* new Range and Intersection classes represent corresponding syntax features
|
13
|
+
* a new PosixClass expression class represents e.g. [[:ascii:]]
|
14
|
+
* PosixClass instances behave like Property ones, e.g. support #negative?
|
15
|
+
* #scan emits :(non)posixclass, :<type> instead of :set, :char_(non)<type>
|
16
|
+
- Changed Subexpression#map to act like regular Enumerable#map
|
17
|
+
* the old behavior is available as Subexpression#flat_map
|
18
|
+
* e.g. parse(/[a]/).map(&:to_s) == ["[a]"]; used to be ["[a]", "a"]
|
19
|
+
- Changed Expression emissions for some escape sequences
|
20
|
+
* EscapeSequence::Codepoint, CodepointList, Hex and Octal are now all used
|
21
|
+
* they already existed, but were all parsed as EscapeSequence::Literal
|
22
|
+
* e.g. \x97 is now EscapeSequence::Hex instead of EscapeSequence::Literal
|
23
|
+
- Changed naming of many property tokens (emitted for \p{...})
|
24
|
+
* if you work with these tokens, see PR #56 for details
|
25
|
+
* e.g. :punct_dash is now :dash_punctuation
|
26
|
+
- Changed (?m) and the likes to emit as :options_switch token (@4ade4d1)
|
27
|
+
* allows differentiating from group-local :options, e.g. (?m:.)
|
28
|
+
- Changed name of Backreference::..NestLevel to ..RecursionLevel (@4184339)
|
29
|
+
- Changed Backreference::Number#number from String to Integer (@40a2231)
|
30
|
+
|
31
|
+
### Added
|
32
|
+
|
33
|
+
- Added support for all previously missing properties (about 250)
|
34
|
+
- Added Expression::UnicodeProperty#shortcut (e.g. returns "m" for "\p{mark}")
|
35
|
+
- Added #char(s) and #codepoint(s) methods to all EscapeSequence expressions
|
36
|
+
- Added #number/#name/#recursion_level to all backref/call expressions (@174bf21)
|
37
|
+
- Added #number and #number_at_level to capturing group expressions (@40a2231)
|
38
|
+
|
39
|
+
### Fixed
|
40
|
+
|
41
|
+
- Fixed ruby version mapping of some properties
|
42
|
+
- Fixed scanning of some property spellings, e.g. with dashes
|
43
|
+
- Fixed some incorrect property alias normalizations
|
44
|
+
- Fixed scanning of codepoint escapes with 6 digits (e.g. \u{10FFFF})
|
45
|
+
- Fixed scanning of \R and \X within sets; they act as literals there
|
46
|
+
|
47
|
+
## [0.5.0] - 2018-04-29 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
|
+
|
49
|
+
### Changed
|
50
|
+
|
51
|
+
- Changed handling of Ruby versions (PR #53)
|
52
|
+
* New Ruby versions are now supported by default
|
53
|
+
* Some deep-lying APIs have changed, which should not affect most users:
|
54
|
+
* `Regexp::Syntax::VERSIONS` is gone
|
55
|
+
* Syntax version names have changed from `Regexp::Syntax::Ruby::Vnnn`
|
56
|
+
to `Regexp::Syntax::Vn_n_n`
|
57
|
+
* Syntax version classes for Ruby versions without regex feature changes
|
58
|
+
are no longer predefined and are now only created on demand / lazily
|
59
|
+
* `Regexp::Syntax::supported?` returns true for any argument >= 1.8.6
|
60
|
+
|
61
|
+
### Fixed
|
62
|
+
|
63
|
+
- Fixed some use cases of Expression methods #strfregexp and #to_h (@e738107)
|
64
|
+
|
65
|
+
### Added
|
66
|
+
|
67
|
+
- Added full signature support to collection methods of Expressions (@aa7c55a)
|
68
|
+
|
69
|
+
## [0.4.13] - 2018-04-04 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
70
|
+
|
71
|
+
- Added ruby version files for 2.2.10 and 2.3.7
|
72
|
+
|
73
|
+
## [0.4.12] - 2018-03-30 - [Janosch Müller](mailto:janosch84@gmail.com)
|
74
|
+
|
75
|
+
- Added ruby version files for 2.4.4 and 2.5.1
|
76
|
+
|
77
|
+
## [0.4.11] - 2018-03-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
78
|
+
|
79
|
+
- Fixed UnknownSyntaxNameError introduced in v0.4.10 if
|
80
|
+
the gems parent dir tree included a 'ruby' dir
|
81
|
+
|
82
|
+
## [0.4.10] - 2018-03-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
83
|
+
|
84
|
+
- Added ruby version file for 2.6.0
|
85
|
+
- Added support for Emoji properties (available in Ruby since 2.5.0)
|
86
|
+
- Added support for XPosixPunct and Regional_Indicator properties
|
87
|
+
- Fixed parsing of Unicode 6.0 and 7.0 script properties
|
88
|
+
- Fixed parsing of the special Assigned property
|
89
|
+
- Fixed scanning of InCyrillic_Supplement property
|
90
|
+
|
91
|
+
## [0.4.9] - 2017-12-25 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
92
|
+
|
93
|
+
- Added ruby version file for 2.5.0
|
94
|
+
|
95
|
+
## [0.4.8] - 2017-12-18 - [Janosch Müller](mailto:janosch84@gmail.com)
|
96
|
+
|
97
|
+
- Added ruby version files for 2.2.9, 2.3.6, and 2.4.3
|
98
|
+
|
99
|
+
## [0.4.7] - 2017-10-15 - [Janosch Müller](mailto:janosch84@gmail.com)
|
100
|
+
|
101
|
+
- Fixed a thread safety issue (issue #45)
|
102
|
+
- Some public class methods that were only reliable for
|
103
|
+
internal use are now private instance methods (PR #46)
|
104
|
+
- Improved the usefulness of Expression#options (issue #43) -
|
105
|
+
#options and derived methods such as #i?, #m? and #x? are now
|
106
|
+
defined for all Expressions that are affected by such flags.
|
107
|
+
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
108
|
+
- Fixed a Parser bug where the #number attribute of traditional
|
109
|
+
numerical backreferences was not set correctly (commit 851b620)
|
110
|
+
|
111
|
+
## [0.4.6] - 2017-09-18 - [Janosch Müller](mailto:janosch84@gmail.com)
|
112
|
+
|
113
|
+
- Added Parser support for hex escapes in sets (PR #36)
|
114
|
+
- Added Parser support for octal escapes (PR #37)
|
115
|
+
- Added support for cluster types \R and \X (PR #38)
|
116
|
+
- Added support for more metacontrol notations (PR #39)
|
117
|
+
|
118
|
+
## [0.4.5] - 2017-09-17 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
119
|
+
|
120
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
121
|
+
* Support ruby 2.2.7 (PR #42)
|
122
|
+
- Added ruby version files for 2.2.8, 2.3.5, and 2.4.2
|
123
|
+
|
124
|
+
## [0.4.4] - 2017-07-10 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
125
|
+
|
126
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
127
|
+
* Add support for new absence operator (PR #33)
|
128
|
+
- Thanks to Bartek Bułat (https://github.com/barthez):
|
129
|
+
* Add support for Ruby 2.3.4 version (PR #40)
|
130
|
+
|
131
|
+
## [0.4.3] - 2017-03-24 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
132
|
+
|
133
|
+
- Added ruby version file for 2.4.1
|
134
|
+
|
135
|
+
## [0.4.2] - 2017-01-10 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
136
|
+
|
137
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
138
|
+
* Support ruby 2.4 (PR #30)
|
139
|
+
* Improve codepoint handling (PR #27)
|
140
|
+
|
141
|
+
## [0.4.1] - 2016-11-22 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
142
|
+
|
143
|
+
- Updated ruby version file for 2.3.3
|
144
|
+
|
145
|
+
## [0.4.0] - 2016-11-20 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
146
|
+
|
147
|
+
- Added Syntax.supported? method
|
148
|
+
- Updated ruby versions for latest releases; 2.1.10, 2.2.6, and 2.3.2
|
149
|
+
|
150
|
+
## [0.3.6] - 2016-06-08 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
151
|
+
|
152
|
+
- Thanks to John Backus (https://github.com/backus):
|
153
|
+
* Remove warnings (PR #26)
|
154
|
+
|
155
|
+
## [0.3.5] - 2016-05-30 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
156
|
+
|
157
|
+
- Thanks to John Backus (https://github.com/backus):
|
158
|
+
* Fix parsing of /\xFF/n (hex:escape) (PR #24)
|
159
|
+
|
160
|
+
## [0.3.4] - 2016-05-25 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
161
|
+
|
162
|
+
- Thanks to John Backus (https://github.com/backus):
|
163
|
+
* Fix warnings (PR #19)
|
164
|
+
- Thanks to Dana Scheider (https://github.com/danascheider):
|
165
|
+
* Correct error in README (PR #20)
|
166
|
+
- Fixed mistyped \h and \H character types (issue #21)
|
167
|
+
- Added ancestry syntax files for latest rubies (issue #22)
|
168
|
+
|
169
|
+
## [0.3.3] - 2016-04-26 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
170
|
+
|
171
|
+
- Thanks to John Backus (https://github.com/backus):
|
172
|
+
* Fixed scanning of zero length comments (PR #12)
|
173
|
+
* Fixed missing escape:codepoint_list syntax token (PR #14)
|
174
|
+
* Fixed to_s for modified interval quantifiers (PR #17)
|
175
|
+
- Added a note about MRI implementation quirks to Scanner section
|
176
|
+
|
177
|
+
## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
178
|
+
|
179
|
+
- Updated ruby versions for latest releases; 2.1.8, 2.2.4, and 2.3.0
|
180
|
+
- Fixed class name for UnknownSyntaxNameError exception
|
181
|
+
- Added UnicodeBlocks support to the parser.
|
182
|
+
- Added UnicodeBlocks support to the scanner.
|
183
|
+
- Added expand_members method to CharacterSet, returns traditional
|
184
|
+
or unicode property forms of shothands (\d, \W, \s, etc.)
|
185
|
+
- Improved meaning and output of %t and %T in strfregexp.
|
186
|
+
- Added syntax versions for ruby 2.1.4 and 2.1.5 and updated
|
187
|
+
latest 2.1 version.
|
188
|
+
- Added to_h methods to Expression, Subexpression, and Quantifier.
|
189
|
+
- Added traversal methods; traverse, each_expression, and map.
|
190
|
+
- Added token/type test methods; type?, is?, and one_of?
|
191
|
+
- Added printing method strfregexp, inspired by strftime.
|
192
|
+
- Added scanning and parsing of free spacing (x mode) expressions.
|
193
|
+
- Improved handling of inline options (?mixdau:...)
|
194
|
+
- Added conditional expressions. Ruby 2.0.
|
195
|
+
- Added keep (\K) markers. Ruby 2.0.
|
196
|
+
- Added d, a, and u options. Ruby 2.0.
|
197
|
+
- Added missing meta sequences to the parser. They were supported by the scanner only.
|
198
|
+
- Renamed Lexer's method to lex, added an alias to the old name (scan)
|
199
|
+
- Use #map instead of #each to run the block in Lexer.lex.
|
200
|
+
- Replaced VERSION.yml file with a constant.
|
201
|
+
- Updated README
|
202
|
+
- Update tokens and scanner with new additions in Unicode 7.0.
|
203
|
+
|
204
|
+
## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
205
|
+
|
206
|
+
- Fixed test and gem building rake tasks and extracted the gem
|
207
|
+
specification from the Rakefile into a .gemspec file.
|
208
|
+
- Added syntax files for missing ruby 2.x versions. These do not add
|
209
|
+
extra syntax support, they just make the gem work with the newer
|
210
|
+
ruby versions.
|
211
|
+
- Added .travis.yml to project root.
|
212
|
+
- README:
|
213
|
+
- Removed note purporting runtime support for ruby 1.8.6.
|
214
|
+
- Added a section identifying the main unsupported syntax features.
|
215
|
+
- Added sections for Testing and Building
|
216
|
+
- Added badges for gem version, Travis CI, and code climate.
|
217
|
+
- Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
|
218
|
+
- Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
|
219
|
+
|
220
|
+
- Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
|
221
|
+
|
222
|
+
## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
223
|
+
|
224
|
+
- Correct ChangeLog.
|
225
|
+
- Added syntax stubs for ruby versions 2.0 and 2.1
|
226
|
+
- Added clone methods for deep copying expressions.
|
227
|
+
- Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
|
228
|
+
- Renamed the :beginning_of_line and :end_of_line tokens to :bol and :eol.
|
229
|
+
- Fixed a bug where alternations with more than two alternatives and one of them ending in a group were being incorrectly nested.
|
230
|
+
- Improved EOF handling in general and especially from sequences like hex and control escapes.
|
231
|
+
- Fixed a bug where named groups with an empty name would return a blank token [].
|
232
|
+
- Fixed a bug where member of a parent set where being added to its last subset.
|
233
|
+
- Various code cleanups in scanner.rl
|
234
|
+
- Fixed a few mutable string bugs by calling dup on the originals.
|
235
|
+
- Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
|
236
|
+
- Removed look-behind assertions (positive and negative) from 1.8 syntax
|
237
|
+
- Added control (\cc and \C-c) and meta (\M-c) escapes to 1.8 syntax
|
238
|
+
- The default syntax is now the one of the running ruby version in both the lexer and the parser.
|
239
|
+
|
240
|
+
## [0.1.0] - 2010-11-21 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
241
|
+
|
242
|
+
- Initial release
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,7 @@ A ruby gem for tokenizing, parsing, and transforming regular expressions.
|
|
12
12
|
* Recognizes ruby 1.8, 1.9, and 2.x regular expressions [See Supported Syntax](#supported-syntax)
|
13
13
|
|
14
14
|
|
15
|
-
_For
|
15
|
+
_For examples of regexp_parser in use, see [Example Projects](#example-projects)._
|
16
16
|
|
17
17
|
|
18
18
|
---
|
@@ -369,15 +369,15 @@ _Note that not all of these are available in all versions of Ruby_
|
|
369
369
|
##### Inapplicable Features
|
370
370
|
|
371
371
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
372
|
-
appear in its source.
|
373
|
-
[See](http://www.ruby-doc.org/core-2.
|
372
|
+
appear in its source. Other such modifiers include the encoding modifiers `e` and `n`
|
373
|
+
[See](http://www.ruby-doc.org/core-2.5.0/Regexp.html#class-Regexp-label-Encoding).
|
374
374
|
These are not seen by the scanner.
|
375
375
|
|
376
376
|
The following features are not currently enabled for Ruby by its regular
|
377
377
|
expressions library (Onigmo). They are not supported by the scanner.
|
378
378
|
|
379
|
-
- **Quotes**: `\Q...\E` _
|
380
|
-
- **Capture History**: `(?@...)`, `(?@<name>...)` _
|
379
|
+
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
380
|
+
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
381
381
|
|
382
382
|
|
383
383
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
@@ -389,18 +389,7 @@ or incorrectly return tokens/objects as literals._
|
|
389
389
|
## Testing
|
390
390
|
To run the tests simply run rake from the root directory, as 'test' is the default task.
|
391
391
|
|
392
|
-
|
393
|
-
tasks, which only run the tests for one component at a time. These are:
|
394
|
-
|
395
|
-
* test:scanner
|
396
|
-
* test:lexer
|
397
|
-
* test:parser
|
398
|
-
* test:expression
|
399
|
-
* test:syntax
|
400
|
-
|
401
|
-
_A special task 'test:full' generates the scanner's code from the ragel source files and
|
402
|
-
runs all the tests. This task requires ragel to be installed._
|
403
|
-
|
392
|
+
It generates the scanner's code from the ragel source files and runs all the tests, thus it requires ragel to be installed.
|
404
393
|
|
405
394
|
The tests use ruby's test/unit. They can also be run with:
|
406
395
|
|
@@ -420,6 +409,11 @@ It is sometimes helpful during development to focus on a specific test case, for
|
|
420
409
|
bin/test test/expression/test_base.rb -n test_expression_to_re
|
421
410
|
```
|
422
411
|
|
412
|
+
Note that changes to ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
|
+
|
414
|
+
```
|
415
|
+
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
416
|
+
```
|
423
417
|
|
424
418
|
## Building
|
425
419
|
Building the scanner and the gem requires [ragel](http://www.colm.net/open-source/ragel/) to be
|
@@ -441,6 +435,16 @@ rake install
|
|
441
435
|
```
|
442
436
|
|
443
437
|
|
438
|
+
## Example Projects
|
439
|
+
Projects using regexp_parser.
|
440
|
+
|
441
|
+
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
442
|
+
|
443
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
444
|
+
|
445
|
+
- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
446
|
+
|
447
|
+
|
444
448
|
## References
|
445
449
|
Documentation and books used while working on this project.
|
446
450
|
|
data/Rakefile
CHANGED
@@ -55,3 +55,34 @@ end
|
|
55
55
|
desc "Runs ragel:rb before building the gem"
|
56
56
|
task :build => ['ragel:rb']
|
57
57
|
|
58
|
+
|
59
|
+
namespace :props do
|
60
|
+
desc 'Write new property value hashes for the properties scanner'
|
61
|
+
task :update do
|
62
|
+
require 'regexp_property_values'
|
63
|
+
RegexpPropertyValues.update
|
64
|
+
dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
|
65
|
+
|
66
|
+
require 'psych'
|
67
|
+
write_hash_to_file = ->(hash, path) do
|
68
|
+
File.open(path, 'w') do |f|
|
69
|
+
f.puts '#',
|
70
|
+
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
71
|
+
'#',
|
72
|
+
hash.sort.to_h.to_yaml
|
73
|
+
end
|
74
|
+
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
|
+
end
|
76
|
+
|
77
|
+
_, long_names = RegexpPropertyValues.short_and_long_names
|
78
|
+
long_names_to_tokens = long_names.map do |name|
|
79
|
+
[name.downcase.gsub(/[^0-9a-z=.]/, ''), name.downcase]
|
80
|
+
end
|
81
|
+
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
82
|
+
|
83
|
+
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
84
|
+
[k.downcase.gsub(/[^0-9a-z=.]/, ''), v.downcase]
|
85
|
+
end
|
86
|
+
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
87
|
+
end
|
88
|
+
end
|
@@ -3,7 +3,7 @@ module Regexp::Expression
|
|
3
3
|
class Base
|
4
4
|
attr_accessor :type, :token
|
5
5
|
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level
|
6
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
7
|
|
8
8
|
attr_accessor :quantifier
|
9
9
|
attr_accessor :options
|
@@ -16,18 +16,16 @@ module Regexp::Expression
|
|
16
16
|
self.level = token.level
|
17
17
|
self.set_level = token.set_level
|
18
18
|
self.conditional_level = token.conditional_level
|
19
|
+
self.nesting_level = 0
|
19
20
|
self.quantifier = nil
|
20
21
|
self.options = options
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
copy.quantifier = (quantifier ? quantifier.clone : nil)
|
29
|
-
|
30
|
-
copy
|
24
|
+
def initialize_clone(other)
|
25
|
+
other.text = (text ? text.dup : nil)
|
26
|
+
other.options = (options ? options.dup : nil)
|
27
|
+
other.quantifier = (quantifier ? quantifier.clone : nil)
|
28
|
+
super
|
31
29
|
end
|
32
30
|
|
33
31
|
def to_re(format = :full)
|
@@ -169,6 +167,7 @@ require 'regexp_parser/expression/methods/strfregexp'
|
|
169
167
|
require 'regexp_parser/expression/quantifier'
|
170
168
|
require 'regexp_parser/expression/subexpression'
|
171
169
|
require 'regexp_parser/expression/sequence'
|
170
|
+
require 'regexp_parser/expression/sequence_operation'
|
172
171
|
|
173
172
|
require 'regexp_parser/expression/classes/alternation'
|
174
173
|
require 'regexp_parser/expression/classes/anchor'
|
@@ -179,7 +178,10 @@ require 'regexp_parser/expression/classes/free_space'
|
|
179
178
|
require 'regexp_parser/expression/classes/group'
|
180
179
|
require 'regexp_parser/expression/classes/keep'
|
181
180
|
require 'regexp_parser/expression/classes/literal'
|
181
|
+
require 'regexp_parser/expression/classes/posix_class'
|
182
182
|
require 'regexp_parser/expression/classes/property'
|
183
183
|
require 'regexp_parser/expression/classes/root'
|
184
184
|
require 'regexp_parser/expression/classes/set'
|
185
|
+
require 'regexp_parser/expression/classes/set/intersection'
|
186
|
+
require 'regexp_parser/expression/classes/set/range'
|
185
187
|
require 'regexp_parser/expression/classes/type'
|
@@ -1,33 +1,10 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
|
-
# This is not a subexpression really, but considering it one simplifies
|
4
|
-
# the API when it comes to handling the alternatives.
|
5
|
-
class Alternation < Regexp::Expression::Subexpression
|
6
|
-
alias :alternatives :expressions
|
7
|
-
|
8
|
-
def starts_at
|
9
|
-
expressions.first.starts_at
|
10
|
-
end
|
11
|
-
alias :ts :starts_at
|
12
|
-
|
13
|
-
def <<(exp)
|
14
|
-
expressions.last << exp
|
15
|
-
end
|
16
|
-
|
17
|
-
def alternative(exp = nil)
|
18
|
-
expressions << (exp ? exp : Alternative.new(level, set_level, conditional_level))
|
19
|
-
end
|
20
|
-
|
21
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
22
|
-
alternatives.last.last.quantify(token, text, min, max, mode)
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_s(format = :full)
|
26
|
-
alternatives.map{|e| e.to_s(format)}.join('|')
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
2
|
# A sequence of expressions, used by Alternation as one of its alternative.
|
31
3
|
class Alternative < Regexp::Expression::Sequence; end
|
32
4
|
|
5
|
+
class Alternation < Regexp::Expression::SequenceOperation
|
6
|
+
OPERAND = Alternative
|
7
|
+
|
8
|
+
alias :alternatives :expressions
|
9
|
+
end
|
33
10
|
end
|