regexp_parser 2.1.1 → 2.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -5
- data/LICENSE +1 -1
- data/Rakefile +10 -72
- data/lib/regexp_parser/error.rb +3 -1
- data/lib/regexp_parser/expression/base.rb +78 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +3 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +2 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +8 -10
- data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +2 -0
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -7
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +6 -8
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +2 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +4 -20
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +33 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +6 -4
- data/lib/regexp_parser/expression/classes/group.rb +12 -22
- data/lib/regexp_parser/expression/classes/keep.rb +4 -0
- data/lib/regexp_parser/expression/classes/literal.rb +3 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +7 -5
- data/lib/regexp_parser/expression/classes/root.rb +5 -6
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +12 -11
- data/lib/regexp_parser/expression/methods/construct.rb +43 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +7 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +76 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +45 -0
- data/lib/regexp_parser/expression/methods/match.rb +2 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -5
- data/lib/regexp_parser/expression/methods/negative.rb +22 -0
- data/lib/regexp_parser/expression/methods/options.rb +2 -0
- data/lib/regexp_parser/expression/methods/parts.rb +25 -0
- data/lib/regexp_parser/expression/methods/printing.rb +28 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +30 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +3 -1
- data/lib/regexp_parser/expression/methods/tests.rb +49 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +37 -19
- data/lib/regexp_parser/expression/quantifier.rb +57 -24
- data/lib/regexp_parser/expression/sequence.rb +13 -31
- data/lib/regexp_parser/expression/sequence_operation.rb +6 -9
- data/lib/regexp_parser/expression/shared.rb +114 -0
- data/lib/regexp_parser/expression/subexpression.rb +28 -18
- data/lib/regexp_parser/expression.rb +42 -155
- data/lib/regexp_parser/lexer.rb +83 -39
- data/lib/regexp_parser/parser.rb +142 -174
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +10 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +65 -0
- data/lib/regexp_parser/scanner/properties/long.csv +670 -0
- data/lib/regexp_parser/scanner/properties/short.csv +257 -0
- data/lib/regexp_parser/scanner/property.rl +2 -2
- data/lib/regexp_parser/scanner/scanner.rl +167 -189
- data/lib/regexp_parser/scanner.rb +1419 -1549
- data/lib/regexp_parser/syntax/any.rb +4 -7
- data/lib/regexp_parser/syntax/base.rb +92 -65
- data/lib/regexp_parser/syntax/token/anchor.rb +17 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +4 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +35 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +18 -0
- data/lib/regexp_parser/syntax/token/character_type.rb +18 -0
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +5 -3
- data/lib/regexp_parser/syntax/token/escape.rb +35 -0
- data/lib/regexp_parser/syntax/token/group.rb +25 -0
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +22 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +5 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +37 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +766 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +13 -0
- data/lib/regexp_parser/syntax/token.rb +47 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +21 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +14 -19
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +11 -16
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +4 -9
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +9 -14
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +3 -7
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +4 -8
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +4 -8
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +6 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +6 -0
- data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +6 -2
- data/lib/regexp_parser/syntax.rb +4 -2
- data/lib/regexp_parser/token.rb +11 -20
- data/lib/regexp_parser/version.rb +3 -1
- data/lib/regexp_parser.rb +7 -7
- data/regexp_parser.gemspec +22 -22
- metadata +53 -174
- data/CHANGELOG.md +0 -494
- data/README.md +0 -479
- data/lib/regexp_parser/expression/classes/escape.rb +0 -94
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/character_type.rb +0 -16
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/group.rb +0 -23
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -104
- data/spec/expression/clone_spec.rb +0 -152
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -108
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/delimiters_spec.rb +0 -68
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -64
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -60
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/options_spec.rb +0 -28
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -68
- data/spec/parser/refcalls_spec.rb +0 -117
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/delimiters_spec.rb +0 -52
- data/spec/scanner/errors_spec.rb +0 -67
- data/spec/scanner/escapes_spec.rb +0 -64
- data/spec/scanner/free_space_spec.rb +0 -165
- data/spec/scanner/groups_spec.rb +0 -61
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -39
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/options_spec.rb +0 -36
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -25
- data/spec/scanner/refcalls_spec.rb +0 -55
- data/spec/scanner/sets_spec.rb +0 -151
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -16
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ba0845a7ebcd158dc60281b731adb0d597b71028a734209a9cf6e850986c03b4
|
|
4
|
+
data.tar.gz: '078369f6bdbf716aff8f435a318e3f1a8e83593951ee7b21c94bbcd597213d54'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e4539f7196c10d233aca76dc0da3fc8ae8df48b11afd3cc8c7548eedf5893a1202ba06f5fa841444b8afc7d4b0178b6cfb2f16db5e4d05401c64ba26fb05d1de
|
|
7
|
+
data.tar.gz: 801716036ad9a094641094077a8f1695d82cda38020369fb7385a9a7c34d7df0fc90c1629865072d22921fdcfa02a11f70c504220be2bd8df699a10d6d787647
|
data/Gemfile
CHANGED
|
@@ -1,14 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
source 'https://rubygems.org'
|
|
2
4
|
|
|
3
5
|
gemspec
|
|
4
6
|
|
|
5
7
|
group :development, :test do
|
|
6
|
-
gem '
|
|
7
|
-
gem 'rake', '~> 13.
|
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
|
8
|
+
gem 'leto', '~> 2.1'
|
|
9
|
+
gem 'rake', '~> 13.1'
|
|
10
|
+
gem 'regexp_property_values', '~> 1.5'
|
|
9
11
|
gem 'rspec', '~> 3.10'
|
|
10
12
|
if RUBY_VERSION.to_f >= 2.7
|
|
11
|
-
gem '
|
|
12
|
-
gem '
|
|
13
|
+
gem 'benchmark-ips', '~> 2.1'
|
|
14
|
+
gem 'gouteur', '~> 1.1'
|
|
15
|
+
gem 'rubocop', '>= 1.80.2'
|
|
13
16
|
end
|
|
14
17
|
end
|
data/LICENSE
CHANGED
data/Rakefile
CHANGED
|
@@ -1,87 +1,25 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
require 'rake'
|
|
4
|
-
require 'rake/testtask'
|
|
1
|
+
# frozen_string_literal: true
|
|
5
2
|
|
|
6
3
|
require 'bundler'
|
|
4
|
+
require 'rubygems'
|
|
7
5
|
require 'rubygems/package_task'
|
|
6
|
+
require 'rake'
|
|
7
|
+
require 'rake/testtask'
|
|
8
|
+
require 'rspec/core/rake_task'
|
|
8
9
|
|
|
9
|
-
|
|
10
|
-
RAGEL_SOURCE_DIR = File.join(__dir__, 'lib/regexp_parser/scanner')
|
|
11
|
-
RAGEL_OUTPUT_DIR = File.join(__dir__, 'lib/regexp_parser')
|
|
12
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
|
13
|
-
|
|
10
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
|
14
11
|
|
|
15
12
|
Bundler::GemHelper.install_tasks
|
|
16
13
|
|
|
14
|
+
RSpec::Core::RakeTask.new(:spec)
|
|
17
15
|
|
|
18
16
|
task :default => [:'test:full']
|
|
19
17
|
|
|
20
18
|
namespace :test do
|
|
21
|
-
task full: :
|
|
22
|
-
sh 'bin/test'
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
namespace :ragel do
|
|
27
|
-
desc "Process the ragel source files and output ruby code"
|
|
28
|
-
task :rb do
|
|
29
|
-
RAGEL_SOURCE_FILES.each do |source_file|
|
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{source_file}.rb"
|
|
31
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{source_file}.rl -o #{output_file}"
|
|
33
|
-
|
|
34
|
-
contents = File.read(output_file)
|
|
35
|
-
|
|
36
|
-
File.open(output_file, 'r+') do |file|
|
|
37
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
|
38
|
-
|
|
39
|
-
file.write(contents)
|
|
40
|
-
end
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
desc "Delete the ragel generated source file(s)"
|
|
45
|
-
task :clean do
|
|
46
|
-
RAGEL_SOURCE_FILES.each do |file|
|
|
47
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
|
48
|
-
end
|
|
49
|
-
end
|
|
19
|
+
task full: [:ragel, :spec]
|
|
50
20
|
end
|
|
51
21
|
|
|
52
|
-
|
|
53
22
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
|
54
23
|
# latest scanner code is generated and included in the build.
|
|
55
|
-
desc "Runs ragel
|
|
56
|
-
task :
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
namespace :props do
|
|
60
|
-
desc 'Write new property value hashes for the properties scanner'
|
|
61
|
-
task :update do
|
|
62
|
-
require 'regexp_property_values'
|
|
63
|
-
RegexpPropertyValues.update
|
|
64
|
-
dir = File.join(__dir__, 'lib/regexp_parser/scanner/properties')
|
|
65
|
-
|
|
66
|
-
require 'psych'
|
|
67
|
-
write_hash_to_file = ->(hash, path) do
|
|
68
|
-
File.open(path, 'w') do |f|
|
|
69
|
-
f.puts '#',
|
|
70
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
|
71
|
-
'#',
|
|
72
|
-
hash.sort.to_h.to_yaml
|
|
73
|
-
end
|
|
74
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
|
78
|
-
[val.identifier, val.full_name.downcase]
|
|
79
|
-
end
|
|
80
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
|
81
|
-
|
|
82
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
|
83
|
-
[k.identifier, v.full_name.downcase]
|
|
84
|
-
end
|
|
85
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
|
86
|
-
end
|
|
87
|
-
end
|
|
24
|
+
desc "Runs ragel before building the gem"
|
|
25
|
+
task build: :ragel
|
data/lib/regexp_parser/error.rb
CHANGED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Regexp::Expression
|
|
4
|
+
class Base
|
|
5
|
+
include Regexp::Expression::Shared
|
|
6
|
+
|
|
7
|
+
def initialize(token, options = {})
|
|
8
|
+
init_from_token_and_options(token, options)
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def to_re(format = :full)
|
|
12
|
+
if set_level > 0
|
|
13
|
+
warn "Calling #to_re on character set members is deprecated - "\
|
|
14
|
+
"their behavior might not be equivalent outside of the set."
|
|
15
|
+
end
|
|
16
|
+
::Regexp.new(to_s(format))
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def quantify(*args)
|
|
20
|
+
self.quantifier = Quantifier.new(*args)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def unquantified_clone
|
|
24
|
+
clone.tap { |exp| exp.quantifier = nil }
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
|
28
|
+
def quantity
|
|
29
|
+
return [nil,nil] unless quantified?
|
|
30
|
+
[quantifier.min, quantifier.max]
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def repetitions
|
|
34
|
+
@repetitions ||=
|
|
35
|
+
if quantified?
|
|
36
|
+
min = quantifier.min
|
|
37
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
|
38
|
+
range = min..max
|
|
39
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
|
40
|
+
if RUBY_VERSION.to_f < 2.7
|
|
41
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
|
42
|
+
end
|
|
43
|
+
range
|
|
44
|
+
else
|
|
45
|
+
1..1
|
|
46
|
+
end
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def greedy?
|
|
50
|
+
quantified? and quantifier.greedy?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def reluctant?
|
|
54
|
+
quantified? and quantifier.reluctant?
|
|
55
|
+
end
|
|
56
|
+
alias :lazy? :reluctant?
|
|
57
|
+
|
|
58
|
+
def possessive?
|
|
59
|
+
quantified? and quantifier.possessive?
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def to_h
|
|
63
|
+
{
|
|
64
|
+
type: type,
|
|
65
|
+
token: token,
|
|
66
|
+
text: to_s(:base),
|
|
67
|
+
starts_at: ts,
|
|
68
|
+
length: full_length,
|
|
69
|
+
level: level,
|
|
70
|
+
set_level: set_level,
|
|
71
|
+
conditional_level: conditional_level,
|
|
72
|
+
options: options,
|
|
73
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
|
74
|
+
}
|
|
75
|
+
end
|
|
76
|
+
alias :attributes :to_h
|
|
77
|
+
end
|
|
78
|
+
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
|
4
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
|
3
5
|
class Alternative < Regexp::Expression::Sequence; end
|
|
4
6
|
|
|
5
7
|
class Alternation < Regexp::Expression::SequenceOperation
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
module Anchor
|
|
4
5
|
class Base < Regexp::Expression::Base; end
|
|
5
6
|
|
|
@@ -22,5 +23,4 @@ module Regexp::Expression
|
|
|
22
23
|
EOS = EndOfString
|
|
23
24
|
EOSobEOL = EndOfStringOrBeforeEndOfLine
|
|
24
25
|
end
|
|
25
|
-
|
|
26
26
|
end
|
|
@@ -1,20 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Backreference
|
|
3
|
-
class Base < Regexp::Expression::Base
|
|
4
|
-
attr_accessor :referenced_expression
|
|
5
|
-
|
|
6
|
-
def initialize_copy(orig)
|
|
7
|
-
self.referenced_expression = orig.referenced_expression.dup
|
|
8
|
-
super
|
|
9
|
-
end
|
|
10
|
-
end
|
|
5
|
+
class Base < Regexp::Expression::Base; end
|
|
11
6
|
|
|
12
7
|
class Number < Backreference::Base
|
|
13
8
|
attr_reader :number
|
|
14
9
|
alias reference number
|
|
15
10
|
|
|
16
11
|
def initialize(token, options = {})
|
|
17
|
-
@number = token.text[
|
|
12
|
+
@number = token.text[/-?\d+/].to_i
|
|
18
13
|
super
|
|
19
14
|
end
|
|
20
15
|
end
|
|
@@ -38,7 +33,7 @@ module Regexp::Expression
|
|
|
38
33
|
class NameCall < Backreference::Name; end
|
|
39
34
|
class NumberCallRelative < Backreference::NumberRelative; end
|
|
40
35
|
|
|
41
|
-
class NumberRecursionLevel < Backreference::
|
|
36
|
+
class NumberRecursionLevel < Backreference::NumberRelative
|
|
42
37
|
attr_reader :recursion_level
|
|
43
38
|
|
|
44
39
|
def initialize(token, options = {})
|
|
@@ -57,4 +52,7 @@ module Regexp::Expression
|
|
|
57
52
|
end
|
|
58
53
|
end
|
|
59
54
|
end
|
|
55
|
+
|
|
56
|
+
# alias for symmetry between token symbol and Expression class name
|
|
57
|
+
Backref = Backreference
|
|
60
58
|
end
|
|
@@ -1,10 +1,11 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
class CharacterSet < Regexp::Expression::Subexpression
|
|
3
5
|
class Range < Regexp::Expression::Subexpression
|
|
4
|
-
def
|
|
5
|
-
expressions.first.
|
|
6
|
+
def ts
|
|
7
|
+
(head = expressions.first) ? head.ts : @ts
|
|
6
8
|
end
|
|
7
|
-
alias :ts :starts_at
|
|
8
9
|
|
|
9
10
|
def <<(exp)
|
|
10
11
|
complete? and raise Regexp::Parser::Error,
|
|
@@ -15,10 +16,6 @@ module Regexp::Expression
|
|
|
15
16
|
def complete?
|
|
16
17
|
count == 2
|
|
17
18
|
end
|
|
18
|
-
|
|
19
|
-
def to_s(_format = :full)
|
|
20
|
-
expressions.join(text)
|
|
21
|
-
end
|
|
22
19
|
end
|
|
23
20
|
end
|
|
24
21
|
end
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
class CharacterSet < Regexp::Expression::Subexpression
|
|
3
5
|
attr_accessor :closed, :negative
|
|
4
|
-
|
|
5
|
-
alias :negative? :negative
|
|
6
|
-
alias :negated? :negative
|
|
7
|
-
alias :closed? :closed
|
|
6
|
+
alias :closed? :closed
|
|
8
7
|
|
|
9
8
|
def initialize(token, options = {})
|
|
10
9
|
self.negative = false
|
|
@@ -19,9 +18,8 @@ module Regexp::Expression
|
|
|
19
18
|
def close
|
|
20
19
|
self.closed = true
|
|
21
20
|
end
|
|
22
|
-
|
|
23
|
-
def to_s(format = :full)
|
|
24
|
-
"#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
|
|
25
|
-
end
|
|
26
21
|
end
|
|
22
|
+
|
|
23
|
+
# alias for symmetry between token symbol and Expression class name
|
|
24
|
+
Set = CharacterSet
|
|
27
25
|
end # module Regexp::Expression
|
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
module CharacterType
|
|
4
5
|
class Base < Regexp::Expression::Base; end
|
|
5
6
|
|
|
@@ -15,5 +16,4 @@ module Regexp::Expression
|
|
|
15
16
|
class Linebreak < CharacterType::Base; end
|
|
16
17
|
class ExtendedGrapheme < CharacterType::Base; end
|
|
17
18
|
end
|
|
18
|
-
|
|
19
19
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Conditional
|
|
3
5
|
class TooManyBranches < Regexp::Parser::Error
|
|
@@ -7,33 +9,24 @@ module Regexp::Expression
|
|
|
7
9
|
end
|
|
8
10
|
|
|
9
11
|
class Condition < Regexp::Expression::Base
|
|
10
|
-
attr_accessor :referenced_expression
|
|
11
|
-
|
|
12
12
|
# Name or number of the referenced capturing group that determines state.
|
|
13
13
|
# Returns a String if reference is by name, Integer if by number.
|
|
14
14
|
def reference
|
|
15
15
|
ref = text.tr("'<>()", "")
|
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
|
17
17
|
end
|
|
18
|
-
|
|
19
|
-
def initialize_copy(orig)
|
|
20
|
-
self.referenced_expression = orig.referenced_expression.dup
|
|
21
|
-
super
|
|
22
|
-
end
|
|
23
18
|
end
|
|
24
19
|
|
|
25
20
|
class Branch < Regexp::Expression::Sequence; end
|
|
26
21
|
|
|
27
22
|
class Expression < Regexp::Expression::Subexpression
|
|
28
|
-
attr_accessor :referenced_expression
|
|
29
|
-
|
|
30
23
|
def <<(exp)
|
|
31
24
|
expressions.last << exp
|
|
32
25
|
end
|
|
33
26
|
|
|
34
|
-
def add_sequence(active_opts = {})
|
|
27
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
|
35
28
|
raise TooManyBranches.new if branches.length == 2
|
|
36
|
-
params = { conditional_level: conditional_level + 1 }
|
|
29
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
|
37
30
|
Branch.add_to(self, params, active_opts)
|
|
38
31
|
end
|
|
39
32
|
alias :branch :add_sequence
|
|
@@ -54,15 +47,6 @@ module Regexp::Expression
|
|
|
54
47
|
def reference
|
|
55
48
|
condition.reference
|
|
56
49
|
end
|
|
57
|
-
|
|
58
|
-
def to_s(format = :full)
|
|
59
|
-
"#{text}#{condition}#{branches.join('|')})#{quantifier_affix(format)}"
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
def initialize_copy(orig)
|
|
63
|
-
self.referenced_expression = orig.referenced_expression.dup
|
|
64
|
-
super
|
|
65
|
-
end
|
|
66
50
|
end
|
|
67
51
|
end
|
|
68
52
|
end
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Regexp::Expression
|
|
4
|
+
module EscapeSequence
|
|
5
|
+
Base = Class.new(Regexp::Expression::Base)
|
|
6
|
+
|
|
7
|
+
AsciiEscape = Class.new(Base) # \e
|
|
8
|
+
Backspace = Class.new(Base) # \b
|
|
9
|
+
Bell = Class.new(Base) # \a
|
|
10
|
+
FormFeed = Class.new(Base) # \f
|
|
11
|
+
Newline = Class.new(Base) # \n
|
|
12
|
+
Return = Class.new(Base) # \r
|
|
13
|
+
Tab = Class.new(Base) # \t
|
|
14
|
+
VerticalTab = Class.new(Base) # \v
|
|
15
|
+
|
|
16
|
+
Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
|
|
17
|
+
|
|
18
|
+
Octal = Class.new(Base) # e.g. \012
|
|
19
|
+
Hex = Class.new(Base) # e.g. \x0A
|
|
20
|
+
Codepoint = Class.new(Base) # e.g. \u000A
|
|
21
|
+
|
|
22
|
+
CodepointList = Class.new(Base) # e.g. \u{A B}
|
|
23
|
+
UTF8Hex = Class.new(Base) # e.g. \xE2\x82\xAC
|
|
24
|
+
|
|
25
|
+
AbstractMetaControlSequence = Class.new(Base)
|
|
26
|
+
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
|
27
|
+
Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
|
|
28
|
+
MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# alias for symmetry between Token::* and Expression::*
|
|
32
|
+
Escape = EscapeSequence
|
|
33
|
+
end
|
|
@@ -1,17 +1,19 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
class FreeSpace < Regexp::Expression::Base
|
|
4
|
-
def quantify(
|
|
5
|
+
def quantify(*_args)
|
|
5
6
|
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
|
6
7
|
end
|
|
7
8
|
end
|
|
8
9
|
|
|
9
|
-
class Comment < Regexp::Expression::FreeSpace
|
|
10
|
+
class Comment < Regexp::Expression::FreeSpace
|
|
11
|
+
end
|
|
10
12
|
|
|
11
13
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
|
12
14
|
def merge(exp)
|
|
15
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
|
13
16
|
text << exp.text
|
|
14
17
|
end
|
|
15
18
|
end
|
|
16
|
-
|
|
17
19
|
end
|
|
@@ -1,13 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Group
|
|
3
5
|
class Base < Regexp::Expression::Subexpression
|
|
4
|
-
def to_s(format = :full)
|
|
5
|
-
"#{text}#{expressions.join})#{quantifier_affix(format)}"
|
|
6
|
-
end
|
|
7
|
-
|
|
8
|
-
def capturing?; false end
|
|
9
|
-
|
|
10
|
-
def comment?; false end
|
|
11
6
|
end
|
|
12
7
|
|
|
13
8
|
class Passive < Group::Base
|
|
@@ -18,14 +13,6 @@ module Regexp::Expression
|
|
|
18
13
|
super
|
|
19
14
|
end
|
|
20
15
|
|
|
21
|
-
def to_s(format = :full)
|
|
22
|
-
if implicit?
|
|
23
|
-
"#{expressions.join}#{quantifier_affix(format)}"
|
|
24
|
-
else
|
|
25
|
-
super
|
|
26
|
-
end
|
|
27
|
-
end
|
|
28
|
-
|
|
29
16
|
def implicit?
|
|
30
17
|
@implicit
|
|
31
18
|
end
|
|
@@ -33,6 +20,8 @@ module Regexp::Expression
|
|
|
33
20
|
|
|
34
21
|
class Absence < Group::Base; end
|
|
35
22
|
class Atomic < Group::Base; end
|
|
23
|
+
# TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
|
|
24
|
+
# longer inherit from Group because it is effectively a terminal expression.
|
|
36
25
|
class Options < Group::Base
|
|
37
26
|
attr_accessor :option_changes
|
|
38
27
|
|
|
@@ -40,13 +29,19 @@ module Regexp::Expression
|
|
|
40
29
|
self.option_changes = orig.option_changes.dup
|
|
41
30
|
super
|
|
42
31
|
end
|
|
32
|
+
|
|
33
|
+
def quantify(*args)
|
|
34
|
+
if token == :options_switch
|
|
35
|
+
raise Regexp::Parser::Error, 'Can not quantify an option switch'
|
|
36
|
+
else
|
|
37
|
+
super
|
|
38
|
+
end
|
|
39
|
+
end
|
|
43
40
|
end
|
|
44
41
|
|
|
45
42
|
class Capture < Group::Base
|
|
46
43
|
attr_accessor :number, :number_at_level
|
|
47
44
|
alias identifier number
|
|
48
|
-
|
|
49
|
-
def capturing?; true end
|
|
50
45
|
end
|
|
51
46
|
|
|
52
47
|
class Named < Group::Capture
|
|
@@ -65,11 +60,6 @@ module Regexp::Expression
|
|
|
65
60
|
end
|
|
66
61
|
|
|
67
62
|
class Comment < Group::Base
|
|
68
|
-
def to_s(_format = :full)
|
|
69
|
-
text.dup
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
def comment?; true end
|
|
73
63
|
end
|
|
74
64
|
end
|
|
75
65
|
|
|
@@ -1,11 +1,13 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
class PosixClass < Regexp::Expression::Base
|
|
3
|
-
def negative?
|
|
4
|
-
type == :nonposixclass
|
|
5
|
-
end
|
|
6
|
-
|
|
7
5
|
def name
|
|
8
|
-
|
|
6
|
+
text[/\w+/]
|
|
9
7
|
end
|
|
10
8
|
end
|
|
9
|
+
|
|
10
|
+
# alias for symmetry between token symbol and Expression class name
|
|
11
|
+
Posixclass = PosixClass
|
|
12
|
+
Nonposixclass = PosixClass
|
|
11
13
|
end
|
|
@@ -1,12 +1,11 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
class Root < Regexp::Expression::Subexpression
|
|
4
5
|
def self.build(options = {})
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def self.build_token
|
|
9
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
|
6
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
|
7
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
|
8
|
+
construct(options: options)
|
|
10
9
|
end
|
|
11
10
|
end
|
|
12
11
|
end
|
|
@@ -1,17 +1,14 @@
|
|
|
1
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
module Regexp::Expression
|
|
3
4
|
module UnicodeProperty
|
|
4
5
|
class Base < Regexp::Expression::Base
|
|
5
|
-
def negative?
|
|
6
|
-
type == :nonproperty
|
|
7
|
-
end
|
|
8
|
-
|
|
9
6
|
def name
|
|
10
7
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
|
11
8
|
end
|
|
12
9
|
|
|
13
10
|
def shortcut
|
|
14
|
-
|
|
11
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
|
15
12
|
end
|
|
16
13
|
end
|
|
17
14
|
|
|
@@ -110,11 +107,15 @@ module Regexp::Expression
|
|
|
110
107
|
class Unassigned < Codepoint::Base; end
|
|
111
108
|
end
|
|
112
109
|
|
|
113
|
-
class Age
|
|
114
|
-
class
|
|
115
|
-
class
|
|
116
|
-
class
|
|
117
|
-
class
|
|
110
|
+
class Age < UnicodeProperty::Base; end
|
|
111
|
+
class Block < UnicodeProperty::Base; end
|
|
112
|
+
class Derived < UnicodeProperty::Base; end
|
|
113
|
+
class Emoji < UnicodeProperty::Base; end
|
|
114
|
+
class Enumerated < UnicodeProperty::Base; end
|
|
115
|
+
class Script < UnicodeProperty::Base; end
|
|
118
116
|
end
|
|
119
117
|
|
|
118
|
+
# alias for symmetry between token symbol and Expression class name
|
|
119
|
+
Property = UnicodeProperty
|
|
120
|
+
Nonproperty = UnicodeProperty
|
|
120
121
|
end # module Regexp::Expression
|