regexp_parser 2.2.0 → 2.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +16 -6
- data/Rakefile +1 -59
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/lexer.rb +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -4
- data/lib/regexp_parser/scanner.rb +126 -124
- data/lib/regexp_parser/syntax/any.rb +2 -5
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
- data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
- data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +20 -22
- metadata +12 -143
- data/lib/regexp_parser/scanner/properties/long.yml +0 -607
- data/lib/regexp_parser/scanner/properties/short.yml +0 -245
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -133
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -117
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -121
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -73
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -28
- data/spec/support/capturing_stderr.rb +0 -9
- data/spec/support/shared_examples.rb +0 -77
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -38
- data/spec/token/token_spec.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6dedba6b051b22dd917febd957e35e6800b37af958780e331007de1f59b2b466
|
4
|
+
data.tar.gz: 55fa98afa6031a38cac2045f8c592290fad9a4e500597277f3f79be75f1076f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b2b463f3a28450527691d90bcfc8901b815bd4a32e88bcdee1f95db4588599993766687a4c7a1b785b450c0f0025039caf07bb93aaf1013fa365e4ed16fc040
|
7
|
+
data.tar.gz: 8aabccda06bb1f20485610076ad679a0d62e8630f992c55be7158c01f3b8b6dd6eb44a206f71b613c0335db13789d8e21d652973ecfb7c262c25a6d54e35a371
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,43 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Fixed
|
6
|
+
|
7
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
8
|
+
- these were never supported by Ruby but incorrectly accepted by the parser
|
9
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
10
|
+
|
11
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
12
|
+
|
13
|
+
### Added
|
14
|
+
|
15
|
+
- improved parsing performance through `Syntax` refactoring
|
16
|
+
- instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
17
|
+
- this approximately doubles the parsing speed for simple regexps
|
18
|
+
- added methods to `Syntax` classes to show relative feature sets
|
19
|
+
- e.g. `Regexp::Syntax::V3_2_0.added_features`
|
20
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
21
|
+
|
22
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
27
|
+
- the lexer accepted them for any Ruby version
|
28
|
+
- now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
29
|
+
- reduced gem size by excluding specs from package
|
30
|
+
- removed deprecated `test_files` gemspec setting
|
31
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
32
|
+
- no longer depend on `set`
|
33
|
+
- `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
34
|
+
- this made it a hidden/undeclared dependency of `regexp_parser`
|
35
|
+
|
3
36
|
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
37
|
|
5
38
|
### Added
|
6
39
|
|
7
|
-
-
|
40
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
8
41
|
|
9
42
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
10
43
|
|
data/Gemfile
CHANGED
@@ -5,9 +5,10 @@ gemspec
|
|
5
5
|
group :development, :test do
|
6
6
|
gem 'ice_nine', '~> 0.11.2'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
+
gem 'regexp_property_values', '~> 1.3'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
11
12
|
gem 'gouteur'
|
12
13
|
gem 'rubocop', '~> 1.7'
|
13
14
|
end
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
|
|
157
157
|
flavor variations. Syntax only comes into play in the lexer.
|
158
158
|
|
159
159
|
#### Example
|
160
|
-
The following
|
160
|
+
The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
|
161
161
|
checks a few of their implementation features.
|
162
162
|
|
163
163
|
```ruby
|
164
164
|
require 'regexp_parser'
|
165
165
|
|
166
|
-
ruby_20 = Regexp::Syntax.
|
166
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0'
|
167
167
|
ruby_20.implements? :quantifier, :zero_or_one # => true
|
168
168
|
ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
|
169
169
|
ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
|
170
170
|
ruby_20.implements? :conditional, :condition # => true
|
171
171
|
|
172
|
-
ruby_19 = Regexp::Syntax.
|
172
|
+
ruby_19 = Regexp::Syntax.for 'ruby/1.9'
|
173
173
|
ruby_19.implements? :quantifier, :zero_or_one # => true
|
174
174
|
ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
|
175
175
|
ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
|
176
176
|
ruby_19.implements? :conditional, :condition # => false
|
177
177
|
|
178
|
-
ruby_18 = Regexp::Syntax.
|
178
|
+
ruby_18 = Regexp::Syntax.for 'ruby/1.8'
|
179
179
|
ruby_18.implements? :quantifier, :zero_or_one # => true
|
180
180
|
ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
|
181
181
|
ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
|
182
182
|
ruby_18.implements? :conditional, :condition # => false
|
183
183
|
```
|
184
184
|
|
185
|
+
Syntax objects can also be queried about their complete and relative feature sets.
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
require 'regexp_parser'
|
189
|
+
|
190
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
|
191
|
+
ruby_20.added_features # => { conditional: [...], ... }
|
192
|
+
ruby_20.removed_features # => { property: [:newline], ... }
|
193
|
+
ruby_20.features # => { anchor: [...], ... }
|
194
|
+
```
|
185
195
|
|
186
196
|
#### Notes
|
187
197
|
* Variations on a token, for example a named group with angle brackets (< and >)
|
@@ -437,7 +447,7 @@ Projects using regexp_parser.
|
|
437
447
|
|
438
448
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
439
449
|
|
440
|
-
- [mutant](https://github.com/mbj/mutant)
|
450
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
441
451
|
|
442
452
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
443
453
|
|
@@ -470,4 +480,4 @@ Documentation and books used while working on this project.
|
|
470
480
|
|
471
481
|
---
|
472
482
|
##### Copyright
|
473
|
-
_Copyright (c) 2010-
|
483
|
+
_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
|
data/Rakefile
CHANGED
@@ -5,9 +5,7 @@ require 'rake'
|
|
5
5
|
require 'rake/testtask'
|
6
6
|
require 'rspec/core/rake_task'
|
7
7
|
|
8
|
-
|
9
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
10
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
11
9
|
|
12
10
|
Bundler::GemHelper.install_tasks
|
13
11
|
|
@@ -19,63 +17,7 @@ namespace :test do
|
|
19
17
|
task full: [:'ragel:rb', :spec]
|
20
18
|
end
|
21
19
|
|
22
|
-
namespace :ragel do
|
23
|
-
desc "Process the ragel source files and output ruby code"
|
24
|
-
task :rb do
|
25
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
27
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
28
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
29
|
-
|
30
|
-
contents = File.read(output_file)
|
31
|
-
|
32
|
-
File.open(output_file, 'r+') do |file|
|
33
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
34
|
-
|
35
|
-
file.write(contents)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "Delete the ragel generated source file(s)"
|
41
|
-
task :clean do
|
42
|
-
RAGEL_SOURCE_FILES.each do |file|
|
43
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
49
21
|
# latest scanner code is generated and included in the build.
|
50
22
|
desc "Runs ragel:rb before building the gem"
|
51
23
|
task :build => ['ragel:rb']
|
52
|
-
|
53
|
-
namespace :props do
|
54
|
-
desc 'Write new property value hashes for the properties scanner'
|
55
|
-
task :update do
|
56
|
-
require 'regexp_property_values'
|
57
|
-
RegexpPropertyValues.update
|
58
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
59
|
-
|
60
|
-
require 'psych'
|
61
|
-
write_hash_to_file = ->(hash, path) do
|
62
|
-
File.open(path, 'w') do |f|
|
63
|
-
f.puts '#',
|
64
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
65
|
-
'#',
|
66
|
-
hash.sort.to_h.to_yaml
|
67
|
-
end
|
68
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
69
|
-
end
|
70
|
-
|
71
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
72
|
-
[val.identifier, val.full_name.downcase]
|
73
|
-
end
|
74
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
75
|
-
|
76
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
77
|
-
[k.identifier, v.full_name.downcase]
|
78
|
-
end
|
79
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
80
|
-
end
|
81
|
-
end
|
@@ -2,16 +2,21 @@ module Regexp::Expression
|
|
2
2
|
# TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
|
3
3
|
module EscapeSequence
|
4
4
|
class Base < Regexp::Expression::Base
|
5
|
-
require 'yaml'
|
6
|
-
|
7
|
-
def char
|
8
|
-
# poor man's unescape without using eval
|
9
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
10
|
-
end
|
11
|
-
|
12
5
|
def codepoint
|
13
6
|
char.ord
|
14
7
|
end
|
8
|
+
|
9
|
+
if ''.respond_to?(:undump)
|
10
|
+
def char
|
11
|
+
%("#{text}").undump
|
12
|
+
end
|
13
|
+
else
|
14
|
+
# poor man's unescape without using eval
|
15
|
+
require 'yaml'
|
16
|
+
def char
|
17
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
+
end
|
19
|
+
end
|
15
20
|
end
|
16
21
|
|
17
22
|
class Literal < EscapeSequence::Base
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
data/lib/regexp_parser/lexer.rb
CHANGED