regexp_parser 0.5.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +242 -0
- data/Gemfile +1 -0
- data/README.md +21 -17
- data/Rakefile +31 -0
- data/lib/regexp_parser/expression.rb +11 -9
- data/lib/regexp_parser/expression/classes/alternation.rb +5 -28
- data/lib/regexp_parser/expression/classes/backref.rb +21 -16
- data/lib/regexp_parser/expression/classes/escape.rb +81 -10
- data/lib/regexp_parser/expression/classes/group.rb +20 -20
- data/lib/regexp_parser/expression/classes/{character_class.rb → posix_class.rb} +2 -2
- data/lib/regexp_parser/expression/classes/property.rb +6 -0
- data/lib/regexp_parser/expression/classes/set.rb +10 -93
- data/lib/regexp_parser/expression/classes/set/intersection.rb +9 -0
- data/lib/regexp_parser/expression/classes/set/range.rb +23 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +6 -4
- data/lib/regexp_parser/expression/methods/tests.rb +4 -14
- data/lib/regexp_parser/expression/methods/traverse.rb +1 -1
- data/lib/regexp_parser/expression/quantifier.rb +3 -4
- data/lib/regexp_parser/expression/sequence_operation.rb +34 -0
- data/lib/regexp_parser/expression/subexpression.rb +6 -10
- data/lib/regexp_parser/lexer.rb +13 -17
- data/lib/regexp_parser/parser.rb +170 -116
- data/lib/regexp_parser/scanner.rb +952 -2431
- data/lib/regexp_parser/scanner/char_type.rl +31 -0
- data/lib/regexp_parser/scanner/properties/long.yml +561 -0
- data/lib/regexp_parser/scanner/properties/short.yml +225 -0
- data/lib/regexp_parser/scanner/property.rl +7 -806
- data/lib/regexp_parser/scanner/scanner.rl +112 -154
- data/lib/regexp_parser/syntax/base.rb +4 -4
- data/lib/regexp_parser/syntax/tokens.rb +1 -0
- data/lib/regexp_parser/syntax/tokens/backref.rb +2 -2
- data/lib/regexp_parser/syntax/tokens/character_set.rb +3 -38
- data/lib/regexp_parser/syntax/tokens/escape.rb +2 -3
- data/lib/regexp_parser/syntax/tokens/group.rb +5 -4
- data/lib/regexp_parser/syntax/tokens/{character_class.rb → posix_class.rb} +5 -5
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +519 -266
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -4
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +4 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +0 -2
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +2 -1
- data/test/expression/test_base.rb +2 -1
- data/test/expression/test_clone.rb +0 -57
- data/test/expression/test_set.rb +31 -8
- data/test/expression/test_strfregexp.rb +13 -4
- data/test/expression/test_subexpression.rb +25 -0
- data/test/expression/test_traverse.rb +25 -25
- data/test/helpers.rb +1 -0
- data/test/lexer/test_all.rb +1 -1
- data/test/lexer/test_conditionals.rb +9 -7
- data/test/lexer/test_nesting.rb +39 -21
- data/test/lexer/test_refcalls.rb +4 -4
- data/test/parser/set/test_intersections.rb +127 -0
- data/test/parser/set/test_ranges.rb +111 -0
- data/test/parser/test_all.rb +4 -1
- data/test/parser/test_escapes.rb +41 -9
- data/test/parser/test_groups.rb +22 -3
- data/test/parser/test_posix_classes.rb +27 -0
- data/test/parser/test_properties.rb +17 -290
- data/test/parser/test_refcalls.rb +66 -26
- data/test/parser/test_sets.rb +132 -129
- data/test/scanner/test_all.rb +1 -7
- data/test/scanner/test_conditionals.rb +16 -16
- data/test/scanner/test_errors.rb +0 -30
- data/test/scanner/test_escapes.rb +1 -2
- data/test/scanner/test_free_space.rb +28 -28
- data/test/scanner/test_groups.rb +35 -35
- data/test/scanner/test_meta.rb +1 -1
- data/test/scanner/test_properties.rb +87 -114
- data/test/scanner/test_refcalls.rb +18 -18
- data/test/scanner/test_scripts.rb +19 -351
- data/test/scanner/test_sets.rb +87 -60
- data/test/scanner/test_unicode_blocks.rb +4 -105
- data/test/support/warning_extractor.rb +1 -1
- data/test/syntax/test_syntax.rb +7 -0
- data/test/syntax/versions/test_1.8.rb +2 -4
- metadata +17 -7
- data/ChangeLog +0 -325
- data/test/scanner/test_emojis.rb +0 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8a48ea55ff32a9a023b9f07c13590acbafa412fbb4e8289f4ac8b825c37f9dc5
|
4
|
+
data.tar.gz: 5fd466646d0e9cfc22cae4f4d4dce1e2647d066d49ad230d6f09a98f790c2be6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b1b26d0a1431ebd9a00423a98f58abfd5098e5f625b075270b13ed34c73e9b0bc74cdb6fb5b50ca84f6aad2786df86d2128ace3a7e645571a11656580ca64dfe
|
7
|
+
data.tar.gz: 1536cf6aaa222823fc5319a03aa67a7cc157d60a47ce41e70384d86a986c42de2f252a0c9ca7088b87cb1dd4d89a666d0d7080bf5e9d18e0c3e54b20aa9f5606
|
data/CHANGELOG.md
ADDED
@@ -0,0 +1,242 @@
|
|
1
|
+
## [Unreleased]
|
2
|
+
|
3
|
+
This release includes several breaking changes, mostly to character sets, #map and properties.
|
4
|
+
|
5
|
+
### Changed
|
6
|
+
|
7
|
+
- Changed handling of sets (a.k.a. character classes or "bracket expressions")
|
8
|
+
* see PR #55 / issue #47 for details
|
9
|
+
* sets are now parsed to expression trees like other nestable expressions
|
10
|
+
* #scan now emits the same tokens as outside sets (no longer :set, :member)
|
11
|
+
* CharacterSet#members has been removed
|
12
|
+
* new Range and Intersection classes represent corresponding syntax features
|
13
|
+
* a new PosixClass expression class represents e.g. [[:ascii:]]
|
14
|
+
* PosixClass instances behave like Property ones, e.g. support #negative?
|
15
|
+
* #scan emits :(non)posixclass, :<type> instead of :set, :char_(non)<type>
|
16
|
+
- Changed Subexpression#map to act like regular Enumerable#map
|
17
|
+
* the old behavior is available as Subexpression#flat_map
|
18
|
+
* e.g. parse(/[a]/).map(&:to_s) == ["[a]"]; used to be ["[a]", "a"]
|
19
|
+
- Changed Expression emissions for some escape sequences
|
20
|
+
* EscapeSequence::Codepoint, CodepointList, Hex and Octal are now all used
|
21
|
+
* they already existed, but were all parsed as EscapeSequence::Literal
|
22
|
+
* e.g. \x97 is now EscapeSequence::Hex instead of EscapeSequence::Literal
|
23
|
+
- Changed naming of many property tokens (emitted for \p{...})
|
24
|
+
* if you work with these tokens, see PR #56 for details
|
25
|
+
* e.g. :punct_dash is now :dash_punctuation
|
26
|
+
- Changed (?m) and the likes to emit as :options_switch token (@4ade4d1)
|
27
|
+
* allows differentiating from group-local :options, e.g. (?m:.)
|
28
|
+
- Changed name of Backreference::..NestLevel to ..RecursionLevel (@4184339)
|
29
|
+
- Changed Backreference::Number#number from String to Integer (@40a2231)
|
30
|
+
|
31
|
+
### Added
|
32
|
+
|
33
|
+
- Added support for all previously missing properties (about 250)
|
34
|
+
- Added Expression::UnicodeProperty#shortcut (e.g. returns "m" for "\p{mark}")
|
35
|
+
- Added #char(s) and #codepoint(s) methods to all EscapeSequence expressions
|
36
|
+
- Added #number/#name/#recursion_level to all backref/call expressions (@174bf21)
|
37
|
+
- Added #number and #number_at_level to capturing group expressions (@40a2231)
|
38
|
+
|
39
|
+
### Fixed
|
40
|
+
|
41
|
+
- Fixed ruby version mapping of some properties
|
42
|
+
- Fixed scanning of some property spellings, e.g. with dashes
|
43
|
+
- Fixed some incorrect property alias normalizations
|
44
|
+
- Fixed scanning of codepoint escapes with 6 digits (e.g. \u{10FFFF})
|
45
|
+
- Fixed scanning of \R and \X within sets; they act as literals there
|
46
|
+
|
47
|
+
## [0.5.0] - 2018-04-29 - [Janosch Müller](mailto:janosch84@gmail.com)
|
48
|
+
|
49
|
+
### Changed
|
50
|
+
|
51
|
+
- Changed handling of Ruby versions (PR #53)
|
52
|
+
* New Ruby versions are now supported by default
|
53
|
+
* Some deep-lying APIs have changed, which should not affect most users:
|
54
|
+
* `Regexp::Syntax::VERSIONS` is gone
|
55
|
+
* Syntax version names have changed from `Regexp::Syntax::Ruby::Vnnn`
|
56
|
+
to `Regexp::Syntax::Vn_n_n`
|
57
|
+
* Syntax version classes for Ruby versions without regex feature changes
|
58
|
+
are no longer predefined and are now only created on demand / lazily
|
59
|
+
* `Regexp::Syntax::supported?` returns true for any argument >= 1.8.6
|
60
|
+
|
61
|
+
### Fixed
|
62
|
+
|
63
|
+
- Fixed some use cases of Expression methods #strfregexp and #to_h (@e738107)
|
64
|
+
|
65
|
+
### Added
|
66
|
+
|
67
|
+
- Added full signature support to collection methods of Expressions (@aa7c55a)
|
68
|
+
|
69
|
+
## [0.4.13] - 2018-04-04 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
70
|
+
|
71
|
+
- Added ruby version files for 2.2.10 and 2.3.7
|
72
|
+
|
73
|
+
## [0.4.12] - 2018-03-30 - [Janosch Müller](mailto:janosch84@gmail.com)
|
74
|
+
|
75
|
+
- Added ruby version files for 2.4.4 and 2.5.1
|
76
|
+
|
77
|
+
## [0.4.11] - 2018-03-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
78
|
+
|
79
|
+
- Fixed UnknownSyntaxNameError introduced in v0.4.10 if
|
80
|
+
the gems parent dir tree included a 'ruby' dir
|
81
|
+
|
82
|
+
## [0.4.10] - 2018-03-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
83
|
+
|
84
|
+
- Added ruby version file for 2.6.0
|
85
|
+
- Added support for Emoji properties (available in Ruby since 2.5.0)
|
86
|
+
- Added support for XPosixPunct and Regional_Indicator properties
|
87
|
+
- Fixed parsing of Unicode 6.0 and 7.0 script properties
|
88
|
+
- Fixed parsing of the special Assigned property
|
89
|
+
- Fixed scanning of InCyrillic_Supplement property
|
90
|
+
|
91
|
+
## [0.4.9] - 2017-12-25 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
92
|
+
|
93
|
+
- Added ruby version file for 2.5.0
|
94
|
+
|
95
|
+
## [0.4.8] - 2017-12-18 - [Janosch Müller](mailto:janosch84@gmail.com)
|
96
|
+
|
97
|
+
- Added ruby version files for 2.2.9, 2.3.6, and 2.4.3
|
98
|
+
|
99
|
+
## [0.4.7] - 2017-10-15 - [Janosch Müller](mailto:janosch84@gmail.com)
|
100
|
+
|
101
|
+
- Fixed a thread safety issue (issue #45)
|
102
|
+
- Some public class methods that were only reliable for
|
103
|
+
internal use are now private instance methods (PR #46)
|
104
|
+
- Improved the usefulness of Expression#options (issue #43) -
|
105
|
+
#options and derived methods such as #i?, #m? and #x? are now
|
106
|
+
defined for all Expressions that are affected by such flags.
|
107
|
+
- Fixed scanning of whitespace following (?x) (commit 5c94bd2)
|
108
|
+
- Fixed a Parser bug where the #number attribute of traditional
|
109
|
+
numerical backreferences was not set correctly (commit 851b620)
|
110
|
+
|
111
|
+
## [0.4.6] - 2017-09-18 - [Janosch Müller](mailto:janosch84@gmail.com)
|
112
|
+
|
113
|
+
- Added Parser support for hex escapes in sets (PR #36)
|
114
|
+
- Added Parser support for octal escapes (PR #37)
|
115
|
+
- Added support for cluster types \R and \X (PR #38)
|
116
|
+
- Added support for more metacontrol notations (PR #39)
|
117
|
+
|
118
|
+
## [0.4.5] - 2017-09-17 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
119
|
+
|
120
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
121
|
+
* Support ruby 2.2.7 (PR #42)
|
122
|
+
- Added ruby version files for 2.2.8, 2.3.5, and 2.4.2
|
123
|
+
|
124
|
+
## [0.4.4] - 2017-07-10 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
125
|
+
|
126
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
127
|
+
* Add support for new absence operator (PR #33)
|
128
|
+
- Thanks to Bartek Bułat (https://github.com/barthez):
|
129
|
+
* Add support for Ruby 2.3.4 version (PR #40)
|
130
|
+
|
131
|
+
## [0.4.3] - 2017-03-24 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
132
|
+
|
133
|
+
- Added ruby version file for 2.4.1
|
134
|
+
|
135
|
+
## [0.4.2] - 2017-01-10 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
136
|
+
|
137
|
+
- Thanks to Janosch Müller (https://github.com/janosch-x):
|
138
|
+
* Support ruby 2.4 (PR #30)
|
139
|
+
* Improve codepoint handling (PR #27)
|
140
|
+
|
141
|
+
## [0.4.1] - 2016-11-22 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
142
|
+
|
143
|
+
- Updated ruby version file for 2.3.3
|
144
|
+
|
145
|
+
## [0.4.0] - 2016-11-20 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
146
|
+
|
147
|
+
- Added Syntax.supported? method
|
148
|
+
- Updated ruby versions for latest releases; 2.1.10, 2.2.6, and 2.3.2
|
149
|
+
|
150
|
+
## [0.3.6] - 2016-06-08 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
151
|
+
|
152
|
+
- Thanks to John Backus (https://github.com/backus):
|
153
|
+
* Remove warnings (PR #26)
|
154
|
+
|
155
|
+
## [0.3.5] - 2016-05-30 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
156
|
+
|
157
|
+
- Thanks to John Backus (https://github.com/backus):
|
158
|
+
* Fix parsing of /\xFF/n (hex:escape) (PR #24)
|
159
|
+
|
160
|
+
## [0.3.4] - 2016-05-25 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
161
|
+
|
162
|
+
- Thanks to John Backus (https://github.com/backus):
|
163
|
+
* Fix warnings (PR #19)
|
164
|
+
- Thanks to Dana Scheider (https://github.com/danascheider):
|
165
|
+
* Correct error in README (PR #20)
|
166
|
+
- Fixed mistyped \h and \H character types (issue #21)
|
167
|
+
- Added ancestry syntax files for latest rubies (issue #22)
|
168
|
+
|
169
|
+
## [0.3.3] - 2016-04-26 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
170
|
+
|
171
|
+
- Thanks to John Backus (https://github.com/backus):
|
172
|
+
* Fixed scanning of zero length comments (PR #12)
|
173
|
+
* Fixed missing escape:codepoint_list syntax token (PR #14)
|
174
|
+
* Fixed to_s for modified interval quantifiers (PR #17)
|
175
|
+
- Added a note about MRI implementation quirks to Scanner section
|
176
|
+
|
177
|
+
## [0.3.2] - 2016-01-01 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
178
|
+
|
179
|
+
- Updated ruby versions for latest releases; 2.1.8, 2.2.4, and 2.3.0
|
180
|
+
- Fixed class name for UnknownSyntaxNameError exception
|
181
|
+
- Added UnicodeBlocks support to the parser.
|
182
|
+
- Added UnicodeBlocks support to the scanner.
|
183
|
+
- Added expand_members method to CharacterSet, returns traditional
|
184
|
+
or unicode property forms of shothands (\d, \W, \s, etc.)
|
185
|
+
- Improved meaning and output of %t and %T in strfregexp.
|
186
|
+
- Added syntax versions for ruby 2.1.4 and 2.1.5 and updated
|
187
|
+
latest 2.1 version.
|
188
|
+
- Added to_h methods to Expression, Subexpression, and Quantifier.
|
189
|
+
- Added traversal methods; traverse, each_expression, and map.
|
190
|
+
- Added token/type test methods; type?, is?, and one_of?
|
191
|
+
- Added printing method strfregexp, inspired by strftime.
|
192
|
+
- Added scanning and parsing of free spacing (x mode) expressions.
|
193
|
+
- Improved handling of inline options (?mixdau:...)
|
194
|
+
- Added conditional expressions. Ruby 2.0.
|
195
|
+
- Added keep (\K) markers. Ruby 2.0.
|
196
|
+
- Added d, a, and u options. Ruby 2.0.
|
197
|
+
- Added missing meta sequences to the parser. They were supported by the scanner only.
|
198
|
+
- Renamed Lexer's method to lex, added an alias to the old name (scan)
|
199
|
+
- Use #map instead of #each to run the block in Lexer.lex.
|
200
|
+
- Replaced VERSION.yml file with a constant.
|
201
|
+
- Updated README
|
202
|
+
- Update tokens and scanner with new additions in Unicode 7.0.
|
203
|
+
|
204
|
+
## [0.1.6] - 2014-10-06 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
205
|
+
|
206
|
+
- Fixed test and gem building rake tasks and extracted the gem
|
207
|
+
specification from the Rakefile into a .gemspec file.
|
208
|
+
- Added syntax files for missing ruby 2.x versions. These do not add
|
209
|
+
extra syntax support, they just make the gem work with the newer
|
210
|
+
ruby versions.
|
211
|
+
- Added .travis.yml to project root.
|
212
|
+
- README:
|
213
|
+
- Removed note purporting runtime support for ruby 1.8.6.
|
214
|
+
- Added a section identifying the main unsupported syntax features.
|
215
|
+
- Added sections for Testing and Building
|
216
|
+
- Added badges for gem version, Travis CI, and code climate.
|
217
|
+
- Updated README, fixing broken examples, and converting it from a rdoc file to Github's flavor of Markdown.
|
218
|
+
- Fixed a parser bug where an alternation sequence that contained nested expressions was incorrectly being appended to the parent expression when the nesting was exited. e.g. in /a|(b)c/, c was appended to the root.
|
219
|
+
|
220
|
+
- Fixed a bug where character types were not being correctly scanned within character sets. e.g. in [\d], two tokens were scanned; one for the backslash '\' and one for the 'd'
|
221
|
+
|
222
|
+
## [0.1.5] - 2014-01-14 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
223
|
+
|
224
|
+
- Correct ChangeLog.
|
225
|
+
- Added syntax stubs for ruby versions 2.0 and 2.1
|
226
|
+
- Added clone methods for deep copying expressions.
|
227
|
+
- Added optional format argument for to_s on expressions to return the text of the expression with (:full, the default) or without (:base) its quantifier.
|
228
|
+
- Renamed the :beginning_of_line and :end_of_line tokens to :bol and :eol.
|
229
|
+
- Fixed a bug where alternations with more than two alternatives and one of them ending in a group were being incorrectly nested.
|
230
|
+
- Improved EOF handling in general and especially from sequences like hex and control escapes.
|
231
|
+
- Fixed a bug where named groups with an empty name would return a blank token [].
|
232
|
+
- Fixed a bug where member of a parent set where being added to its last subset.
|
233
|
+
- Various code cleanups in scanner.rl
|
234
|
+
- Fixed a few mutable string bugs by calling dup on the originals.
|
235
|
+
- Made ruby 1.8.6 the base for all 1.8 syntax, and the 1.8 name a pointer to the latest (1.8.7 at this time)
|
236
|
+
- Removed look-behind assertions (positive and negative) from 1.8 syntax
|
237
|
+
- Added control (\cc and \C-c) and meta (\M-c) escapes to 1.8 syntax
|
238
|
+
- The default syntax is now the one of the running ruby version in both the lexer and the parser.
|
239
|
+
|
240
|
+
## [0.1.0] - 2010-11-21 - [Ammar Ali](mailto:ammarabuali@gmail.com)
|
241
|
+
|
242
|
+
- Initial release
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -12,7 +12,7 @@ A ruby gem for tokenizing, parsing, and transforming regular expressions.
|
|
12
12
|
* Recognizes ruby 1.8, 1.9, and 2.x regular expressions [See Supported Syntax](#supported-syntax)
|
13
13
|
|
14
14
|
|
15
|
-
_For
|
15
|
+
_For examples of regexp_parser in use, see [Example Projects](#example-projects)._
|
16
16
|
|
17
17
|
|
18
18
|
---
|
@@ -369,15 +369,15 @@ _Note that not all of these are available in all versions of Ruby_
|
|
369
369
|
##### Inapplicable Features
|
370
370
|
|
371
371
|
Some modifiers, like `o` and `s`, apply to the **Regexp** object itself and do not
|
372
|
-
appear in its source.
|
373
|
-
[See](http://www.ruby-doc.org/core-2.
|
372
|
+
appear in its source. Other such modifiers include the encoding modifiers `e` and `n`
|
373
|
+
[See](http://www.ruby-doc.org/core-2.5.0/Regexp.html#class-Regexp-label-Encoding).
|
374
374
|
These are not seen by the scanner.
|
375
375
|
|
376
376
|
The following features are not currently enabled for Ruby by its regular
|
377
377
|
expressions library (Onigmo). They are not supported by the scanner.
|
378
378
|
|
379
|
-
- **Quotes**: `\Q...\E` _
|
380
|
-
- **Capture History**: `(?@...)`, `(?@<name>...)` _
|
379
|
+
- **Quotes**: `\Q...\E` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L499)_
|
380
|
+
- **Capture History**: `(?@...)`, `(?@<name>...)` _[[See]](https://github.com/k-takata/Onigmo/blob/7911409/doc/RE#L550)_
|
381
381
|
|
382
382
|
|
383
383
|
See something missing? Please submit an [issue](https://github.com/ammar/regexp_parser/issues)
|
@@ -389,18 +389,7 @@ or incorrectly return tokens/objects as literals._
|
|
389
389
|
## Testing
|
390
390
|
To run the tests simply run rake from the root directory, as 'test' is the default task.
|
391
391
|
|
392
|
-
|
393
|
-
tasks, which only run the tests for one component at a time. These are:
|
394
|
-
|
395
|
-
* test:scanner
|
396
|
-
* test:lexer
|
397
|
-
* test:parser
|
398
|
-
* test:expression
|
399
|
-
* test:syntax
|
400
|
-
|
401
|
-
_A special task 'test:full' generates the scanner's code from the ragel source files and
|
402
|
-
runs all the tests. This task requires ragel to be installed._
|
403
|
-
|
392
|
+
It generates the scanner's code from the ragel source files and runs all the tests, thus it requires ragel to be installed.
|
404
393
|
|
405
394
|
The tests use ruby's test/unit. They can also be run with:
|
406
395
|
|
@@ -420,6 +409,11 @@ It is sometimes helpful during development to focus on a specific test case, for
|
|
420
409
|
bin/test test/expression/test_base.rb -n test_expression_to_re
|
421
410
|
```
|
422
411
|
|
412
|
+
Note that changes to ragel files will not be reflected when using `bin/test`, so you might want to run:
|
413
|
+
|
414
|
+
```
|
415
|
+
rake ragel:rb && bin/test test/scanner/test_properties.rb
|
416
|
+
```
|
423
417
|
|
424
418
|
## Building
|
425
419
|
Building the scanner and the gem requires [ragel](http://www.colm.net/open-source/ragel/) to be
|
@@ -441,6 +435,16 @@ rake install
|
|
441
435
|
```
|
442
436
|
|
443
437
|
|
438
|
+
## Example Projects
|
439
|
+
Projects using regexp_parser.
|
440
|
+
|
441
|
+
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
442
|
+
|
443
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
444
|
+
|
445
|
+
- [js_regex](https://github.com/janosch-x/js_regex) converts Ruby regular expressions to JavaScript-compatible regular expressions.
|
446
|
+
|
447
|
+
|
444
448
|
## References
|
445
449
|
Documentation and books used while working on this project.
|
446
450
|
|
data/Rakefile
CHANGED
@@ -55,3 +55,34 @@ end
|
|
55
55
|
desc "Runs ragel:rb before building the gem"
|
56
56
|
task :build => ['ragel:rb']
|
57
57
|
|
58
|
+
|
59
|
+
namespace :props do
|
60
|
+
desc 'Write new property value hashes for the properties scanner'
|
61
|
+
task :update do
|
62
|
+
require 'regexp_property_values'
|
63
|
+
RegexpPropertyValues.update
|
64
|
+
dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
|
65
|
+
|
66
|
+
require 'psych'
|
67
|
+
write_hash_to_file = ->(hash, path) do
|
68
|
+
File.open(path, 'w') do |f|
|
69
|
+
f.puts '#',
|
70
|
+
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
71
|
+
'#',
|
72
|
+
hash.sort.to_h.to_yaml
|
73
|
+
end
|
74
|
+
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
|
+
end
|
76
|
+
|
77
|
+
_, long_names = RegexpPropertyValues.short_and_long_names
|
78
|
+
long_names_to_tokens = long_names.map do |name|
|
79
|
+
[name.downcase.gsub(/[^0-9a-z=.]/, ''), name.downcase]
|
80
|
+
end
|
81
|
+
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
82
|
+
|
83
|
+
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
84
|
+
[k.downcase.gsub(/[^0-9a-z=.]/, ''), v.downcase]
|
85
|
+
end
|
86
|
+
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
87
|
+
end
|
88
|
+
end
|
@@ -3,7 +3,7 @@ module Regexp::Expression
|
|
3
3
|
class Base
|
4
4
|
attr_accessor :type, :token
|
5
5
|
attr_accessor :text, :ts
|
6
|
-
attr_accessor :level, :set_level, :conditional_level
|
6
|
+
attr_accessor :level, :set_level, :conditional_level, :nesting_level
|
7
7
|
|
8
8
|
attr_accessor :quantifier
|
9
9
|
attr_accessor :options
|
@@ -16,18 +16,16 @@ module Regexp::Expression
|
|
16
16
|
self.level = token.level
|
17
17
|
self.set_level = token.set_level
|
18
18
|
self.conditional_level = token.conditional_level
|
19
|
+
self.nesting_level = 0
|
19
20
|
self.quantifier = nil
|
20
21
|
self.options = options
|
21
22
|
end
|
22
23
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
copy.quantifier = (quantifier ? quantifier.clone : nil)
|
29
|
-
|
30
|
-
copy
|
24
|
+
def initialize_clone(other)
|
25
|
+
other.text = (text ? text.dup : nil)
|
26
|
+
other.options = (options ? options.dup : nil)
|
27
|
+
other.quantifier = (quantifier ? quantifier.clone : nil)
|
28
|
+
super
|
31
29
|
end
|
32
30
|
|
33
31
|
def to_re(format = :full)
|
@@ -169,6 +167,7 @@ require 'regexp_parser/expression/methods/strfregexp'
|
|
169
167
|
require 'regexp_parser/expression/quantifier'
|
170
168
|
require 'regexp_parser/expression/subexpression'
|
171
169
|
require 'regexp_parser/expression/sequence'
|
170
|
+
require 'regexp_parser/expression/sequence_operation'
|
172
171
|
|
173
172
|
require 'regexp_parser/expression/classes/alternation'
|
174
173
|
require 'regexp_parser/expression/classes/anchor'
|
@@ -179,7 +178,10 @@ require 'regexp_parser/expression/classes/free_space'
|
|
179
178
|
require 'regexp_parser/expression/classes/group'
|
180
179
|
require 'regexp_parser/expression/classes/keep'
|
181
180
|
require 'regexp_parser/expression/classes/literal'
|
181
|
+
require 'regexp_parser/expression/classes/posix_class'
|
182
182
|
require 'regexp_parser/expression/classes/property'
|
183
183
|
require 'regexp_parser/expression/classes/root'
|
184
184
|
require 'regexp_parser/expression/classes/set'
|
185
|
+
require 'regexp_parser/expression/classes/set/intersection'
|
186
|
+
require 'regexp_parser/expression/classes/set/range'
|
185
187
|
require 'regexp_parser/expression/classes/type'
|
@@ -1,33 +1,10 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
|
-
# This is not a subexpression really, but considering it one simplifies
|
4
|
-
# the API when it comes to handling the alternatives.
|
5
|
-
class Alternation < Regexp::Expression::Subexpression
|
6
|
-
alias :alternatives :expressions
|
7
|
-
|
8
|
-
def starts_at
|
9
|
-
expressions.first.starts_at
|
10
|
-
end
|
11
|
-
alias :ts :starts_at
|
12
|
-
|
13
|
-
def <<(exp)
|
14
|
-
expressions.last << exp
|
15
|
-
end
|
16
|
-
|
17
|
-
def alternative(exp = nil)
|
18
|
-
expressions << (exp ? exp : Alternative.new(level, set_level, conditional_level))
|
19
|
-
end
|
20
|
-
|
21
|
-
def quantify(token, text, min = nil, max = nil, mode = :greedy)
|
22
|
-
alternatives.last.last.quantify(token, text, min, max, mode)
|
23
|
-
end
|
24
|
-
|
25
|
-
def to_s(format = :full)
|
26
|
-
alternatives.map{|e| e.to_s(format)}.join('|')
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
2
|
# A sequence of expressions, used by Alternation as one of its alternative.
|
31
3
|
class Alternative < Regexp::Expression::Sequence; end
|
32
4
|
|
5
|
+
class Alternation < Regexp::Expression::SequenceOperation
|
6
|
+
OPERAND = Alternative
|
7
|
+
|
8
|
+
alias :alternatives :expressions
|
9
|
+
end
|
33
10
|
end
|