regexp_parser 2.2.0 → 2.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -1
- data/Gemfile +2 -1
- data/LICENSE +1 -1
- data/README.md +16 -6
- data/Rakefile +1 -59
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +12 -7
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/lexer.rb +1 -1
- data/lib/regexp_parser/scanner/properties/long.csv +622 -0
- data/lib/regexp_parser/scanner/properties/short.csv +246 -0
- data/lib/regexp_parser/scanner/scanner.rl +6 -4
- data/lib/regexp_parser/scanner.rb +126 -124
- data/lib/regexp_parser/syntax/any.rb +2 -5
- data/lib/regexp_parser/syntax/base.rb +91 -66
- data/lib/regexp_parser/syntax/token/backreference.rb +7 -2
- data/lib/regexp_parser/syntax/token/quantifier.rb +4 -4
- data/lib/regexp_parser/syntax/token/unicode_property.rb +26 -5
- data/lib/regexp_parser/syntax/version_lookup.rb +20 -29
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +1 -1
- data/lib/regexp_parser/version.rb +1 -1
- data/regexp_parser.gemspec +20 -22
- metadata +12 -143
- data/lib/regexp_parser/scanner/properties/long.yml +0 -607
- data/lib/regexp_parser/scanner/properties/short.yml +0 -245
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -133
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -117
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -121
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -73
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -28
- data/spec/support/capturing_stderr.rb +0 -9
- data/spec/support/shared_examples.rb +0 -77
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -38
- data/spec/token/token_spec.rb +0 -85
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6dedba6b051b22dd917febd957e35e6800b37af958780e331007de1f59b2b466
|
4
|
+
data.tar.gz: 55fa98afa6031a38cac2045f8c592290fad9a4e500597277f3f79be75f1076f3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b2b463f3a28450527691d90bcfc8901b815bd4a32e88bcdee1f95db4588599993766687a4c7a1b785b450c0f0025039caf07bb93aaf1013fa365e4ed16fc040
|
7
|
+
data.tar.gz: 8aabccda06bb1f20485610076ad679a0d62e8630f992c55be7158c01f3b8b6dd6eb44a206f71b613c0335db13789d8e21d652973ecfb7c262c25a6d54e35a371
|
data/CHANGELOG.md
CHANGED
@@ -1,10 +1,43 @@
|
|
1
1
|
## [Unreleased]
|
2
2
|
|
3
|
+
## [2.3.1] - 2022-04-24 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
|
+
|
5
|
+
### Fixed
|
6
|
+
|
7
|
+
- removed five inexistent unicode properties from `Syntax#features`
|
8
|
+
- these were never supported by Ruby but incorrectly accepted by the parser
|
9
|
+
- thanks to [Markus Schirp](https://github.com/mbj) for the report
|
10
|
+
|
11
|
+
## [2.3.0] - 2022-04-08 - [Janosch Müller](mailto:janosch84@gmail.com)
|
12
|
+
|
13
|
+
### Added
|
14
|
+
|
15
|
+
- improved parsing performance through `Syntax` refactoring
|
16
|
+
- instead of fresh `Syntax` instances, pre-loaded constants are now re-used
|
17
|
+
- this approximately doubles the parsing speed for simple regexps
|
18
|
+
- added methods to `Syntax` classes to show relative feature sets
|
19
|
+
- e.g. `Regexp::Syntax::V3_2_0.added_features`
|
20
|
+
- support for new unicode properties of Ruby 3.2 / Unicode 14.0
|
21
|
+
|
22
|
+
## [2.2.1] - 2022-02-11 - [Janosch Müller](mailto:janosch84@gmail.com)
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
|
26
|
+
- fixed Syntax version of absence groups (`(?~...)`)
|
27
|
+
- the lexer accepted them for any Ruby version
|
28
|
+
- now they are only recognized for Ruby >= 2.4.1 in which they were introduced
|
29
|
+
- reduced gem size by excluding specs from package
|
30
|
+
- removed deprecated `test_files` gemspec setting
|
31
|
+
- no longer depend on `yaml`/`psych` (except for Ruby <= 2.4)
|
32
|
+
- no longer depend on `set`
|
33
|
+
- `set` was removed from the stdlib and made a standalone gem as of Ruby 3
|
34
|
+
- this made it a hidden/undeclared dependency of `regexp_parser`
|
35
|
+
|
3
36
|
## [2.2.0] - 2021-12-04 - [Janosch Müller](mailto:janosch84@gmail.com)
|
4
37
|
|
5
38
|
### Added
|
6
39
|
|
7
|
-
-
|
40
|
+
- added support for 13 new unicode properties introduced in Ruby 3.1.0
|
8
41
|
|
9
42
|
## [2.1.1] - 2021-02-23 - [Janosch Müller](mailto:janosch84@gmail.com)
|
10
43
|
|
data/Gemfile
CHANGED
@@ -5,9 +5,10 @@ gemspec
|
|
5
5
|
group :development, :test do
|
6
6
|
gem 'ice_nine', '~> 0.11.2'
|
7
7
|
gem 'rake', '~> 13.0'
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
+
gem 'regexp_property_values', '~> 1.3'
|
9
9
|
gem 'rspec', '~> 3.10'
|
10
10
|
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
11
12
|
gem 'gouteur'
|
12
13
|
gem 'rubocop', '~> 1.7'
|
13
14
|
end
|
data/LICENSE
CHANGED
data/README.md
CHANGED
@@ -157,31 +157,41 @@ flavor). Syntax classes act as lookup tables, and are layered to create
|
|
157
157
|
flavor variations. Syntax only comes into play in the lexer.
|
158
158
|
|
159
159
|
#### Example
|
160
|
-
The following
|
160
|
+
The following fetches syntax objects for Ruby 2.0, 1.9, 1.8, and
|
161
161
|
checks a few of their implementation features.
|
162
162
|
|
163
163
|
```ruby
|
164
164
|
require 'regexp_parser'
|
165
165
|
|
166
|
-
ruby_20 = Regexp::Syntax.
|
166
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0'
|
167
167
|
ruby_20.implements? :quantifier, :zero_or_one # => true
|
168
168
|
ruby_20.implements? :quantifier, :zero_or_one_reluctant # => true
|
169
169
|
ruby_20.implements? :quantifier, :zero_or_one_possessive # => true
|
170
170
|
ruby_20.implements? :conditional, :condition # => true
|
171
171
|
|
172
|
-
ruby_19 = Regexp::Syntax.
|
172
|
+
ruby_19 = Regexp::Syntax.for 'ruby/1.9'
|
173
173
|
ruby_19.implements? :quantifier, :zero_or_one # => true
|
174
174
|
ruby_19.implements? :quantifier, :zero_or_one_reluctant # => true
|
175
175
|
ruby_19.implements? :quantifier, :zero_or_one_possessive # => true
|
176
176
|
ruby_19.implements? :conditional, :condition # => false
|
177
177
|
|
178
|
-
ruby_18 = Regexp::Syntax.
|
178
|
+
ruby_18 = Regexp::Syntax.for 'ruby/1.8'
|
179
179
|
ruby_18.implements? :quantifier, :zero_or_one # => true
|
180
180
|
ruby_18.implements? :quantifier, :zero_or_one_reluctant # => true
|
181
181
|
ruby_18.implements? :quantifier, :zero_or_one_possessive # => false
|
182
182
|
ruby_18.implements? :conditional, :condition # => false
|
183
183
|
```
|
184
184
|
|
185
|
+
Syntax objects can also be queried about their complete and relative feature sets.
|
186
|
+
|
187
|
+
```ruby
|
188
|
+
require 'regexp_parser'
|
189
|
+
|
190
|
+
ruby_20 = Regexp::Syntax.for 'ruby/2.0' # => Regexp::Syntax::V2_0_0
|
191
|
+
ruby_20.added_features # => { conditional: [...], ... }
|
192
|
+
ruby_20.removed_features # => { property: [:newline], ... }
|
193
|
+
ruby_20.features # => { anchor: [...], ... }
|
194
|
+
```
|
185
195
|
|
186
196
|
#### Notes
|
187
197
|
* Variations on a token, for example a named group with angle brackets (< and >)
|
@@ -437,7 +447,7 @@ Projects using regexp_parser.
|
|
437
447
|
|
438
448
|
- [meta_re](https://github.com/ammar/meta_re) is a regular expression preprocessor with alias support.
|
439
449
|
|
440
|
-
- [mutant](https://github.com/mbj/mutant)
|
450
|
+
- [mutant](https://github.com/mbj/mutant) manipulates your regular expressions (amongst others) to see if your tests cover their behavior.
|
441
451
|
|
442
452
|
- [rubocop](https://github.com/rubocop-hq/rubocop) is a linter for Ruby that uses regexp_parser to lint Regexps.
|
443
453
|
|
@@ -470,4 +480,4 @@ Documentation and books used while working on this project.
|
|
470
480
|
|
471
481
|
---
|
472
482
|
##### Copyright
|
473
|
-
_Copyright (c) 2010-
|
483
|
+
_Copyright (c) 2010-2022 Ammar Ali. See LICENSE file for details._
|
data/Rakefile
CHANGED
@@ -5,9 +5,7 @@ require 'rake'
|
|
5
5
|
require 'rake/testtask'
|
6
6
|
require 'rspec/core/rake_task'
|
7
7
|
|
8
|
-
|
9
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
10
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
11
9
|
|
12
10
|
Bundler::GemHelper.install_tasks
|
13
11
|
|
@@ -19,63 +17,7 @@ namespace :test do
|
|
19
17
|
task full: [:'ragel:rb', :spec]
|
20
18
|
end
|
21
19
|
|
22
|
-
namespace :ragel do
|
23
|
-
desc "Process the ragel source files and output ruby code"
|
24
|
-
task :rb do
|
25
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
26
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
27
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
28
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
29
|
-
|
30
|
-
contents = File.read(output_file)
|
31
|
-
|
32
|
-
File.open(output_file, 'r+') do |file|
|
33
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
34
|
-
|
35
|
-
file.write(contents)
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
desc "Delete the ragel generated source file(s)"
|
41
|
-
task :clean do
|
42
|
-
RAGEL_SOURCE_FILES.each do |file|
|
43
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
44
|
-
end
|
45
|
-
end
|
46
|
-
end
|
47
|
-
|
48
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
49
21
|
# latest scanner code is generated and included in the build.
|
50
22
|
desc "Runs ragel:rb before building the gem"
|
51
23
|
task :build => ['ragel:rb']
|
52
|
-
|
53
|
-
namespace :props do
|
54
|
-
desc 'Write new property value hashes for the properties scanner'
|
55
|
-
task :update do
|
56
|
-
require 'regexp_property_values'
|
57
|
-
RegexpPropertyValues.update
|
58
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
59
|
-
|
60
|
-
require 'psych'
|
61
|
-
write_hash_to_file = ->(hash, path) do
|
62
|
-
File.open(path, 'w') do |f|
|
63
|
-
f.puts '#',
|
64
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
65
|
-
'#',
|
66
|
-
hash.sort.to_h.to_yaml
|
67
|
-
end
|
68
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
69
|
-
end
|
70
|
-
|
71
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
72
|
-
[val.identifier, val.full_name.downcase]
|
73
|
-
end
|
74
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
75
|
-
|
76
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
77
|
-
[k.identifier, v.full_name.downcase]
|
78
|
-
end
|
79
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
80
|
-
end
|
81
|
-
end
|
@@ -2,16 +2,21 @@ module Regexp::Expression
|
|
2
2
|
# TODO: unify naming with Token::Escape, on way or the other, in v3.0.0
|
3
3
|
module EscapeSequence
|
4
4
|
class Base < Regexp::Expression::Base
|
5
|
-
require 'yaml'
|
6
|
-
|
7
|
-
def char
|
8
|
-
# poor man's unescape without using eval
|
9
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
10
|
-
end
|
11
|
-
|
12
5
|
def codepoint
|
13
6
|
char.ord
|
14
7
|
end
|
8
|
+
|
9
|
+
if ''.respond_to?(:undump)
|
10
|
+
def char
|
11
|
+
%("#{text}").undump
|
12
|
+
end
|
13
|
+
else
|
14
|
+
# poor man's unescape without using eval
|
15
|
+
require 'yaml'
|
16
|
+
def char
|
17
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
18
|
+
end
|
19
|
+
end
|
15
20
|
end
|
16
21
|
|
17
22
|
class Literal < EscapeSequence::Base
|
@@ -43,7 +43,7 @@ module Regexp::Expression
|
|
43
43
|
|
44
44
|
# Order is important! Fields that use other fields in their
|
45
45
|
# definition must appear before the fields they use.
|
46
|
-
part_keys = %w
|
46
|
+
part_keys = %w[a m b o i l x s e S y k c q Q z Z t ~t T >]
|
47
47
|
part.keys.each {|k| part[k] = "<?#{k}?>"}
|
48
48
|
|
49
49
|
part['>'] = print_level ? (' ' * (print_level + indent_offset)) : ''
|
data/lib/regexp_parser/lexer.rb
CHANGED