regexp_parser 1.7.0 → 2.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +9 -3
- data/LICENSE +1 -1
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +4 -8
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -5
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +11 -12
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/negative.rb +20 -0
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +15 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +372 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +651 -0
- data/lib/regexp_parser/scanner/properties/short.csv +249 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +303 -368
- data/lib/regexp_parser/scanner.rb +1423 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +751 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +19 -23
- metadata +53 -171
- data/CHANGELOG.md +0 -349
- data/README.md +0 -470
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8cc1826647cde51d6d1b5a5a58fb005efd2a38a85fa0e817616591ee2fad7862
|
4
|
+
data.tar.gz: 572a6203741b9970bcedc1ace243ea0b9c300ca60b71ac263036eb0f4222dd50
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3521fe6dab4be0c0db3c37f3f8d196fc754ff72937336a73ef5547a15ae4f2d366aa28e73d6e5756920d610b943ee51cb2db8e51e53ccb19c1c235a8c45da708
|
7
|
+
data.tar.gz: d05b7babb79c118bdc36ae168d8199ee3500b0cff33cb00ed46d51a4a88725130e931c588146a3f989dd87778b1f39684b2c8a5541c9ac8f91427fc31b1ec97a
|
data/Gemfile
CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
-
gem '
|
7
|
-
gem '
|
8
|
-
gem '
|
6
|
+
gem 'leto', '~> 2.1'
|
7
|
+
gem 'rake', '~> 13.1'
|
8
|
+
gem 'regexp_property_values', '~> 1.5'
|
9
|
+
gem 'rspec', '~> 3.10'
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.59'
|
14
|
+
end
|
9
15
|
end
|
data/LICENSE
CHANGED
data/Rakefile
CHANGED
@@ -1,87 +1,23 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
5
7
|
|
6
|
-
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
9
|
-
|
10
|
-
RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
|
11
|
-
RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
|
12
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
|
-
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
14
9
|
|
15
10
|
Bundler::GemHelper.install_tasks
|
16
11
|
|
12
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
13
|
|
18
14
|
task :default => [:'test:full']
|
19
15
|
|
20
16
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
17
|
+
task full: [:'ragel:rb', :spec]
|
24
18
|
end
|
25
19
|
|
26
|
-
namespace :ragel do
|
27
|
-
desc "Process the ragel source files and output ruby code"
|
28
|
-
task :rb do |t|
|
29
|
-
RAGEL_SOURCE_FILES.each do |file|
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
31
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
|
33
|
-
|
34
|
-
contents = File.read(output_file)
|
35
|
-
|
36
|
-
File.open(output_file, 'r+') do |file|
|
37
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
38
|
-
|
39
|
-
file.write(contents)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
desc "Delete the ragel generated source file(s)"
|
45
|
-
task :clean do |t|
|
46
|
-
RAGEL_SOURCE_FILES.each do |file|
|
47
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
|
53
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
21
|
# latest scanner code is generated and included in the build.
|
55
22
|
desc "Runs ragel:rb before building the gem"
|
56
23
|
task :build => ['ragel:rb']
|
57
|
-
|
58
|
-
|
59
|
-
namespace :props do
|
60
|
-
desc 'Write new property value hashes for the properties scanner'
|
61
|
-
task :update do
|
62
|
-
require 'regexp_property_values'
|
63
|
-
RegexpPropertyValues.update
|
64
|
-
dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
|
65
|
-
|
66
|
-
require 'psych'
|
67
|
-
write_hash_to_file = ->(hash, path) do
|
68
|
-
File.open(path, 'w') do |f|
|
69
|
-
f.puts '#',
|
70
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
71
|
-
'#',
|
72
|
-
hash.sort.to_h.to_yaml
|
73
|
-
end
|
74
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
|
-
end
|
76
|
-
|
77
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
78
|
-
[val.identifier, val.full_name.downcase]
|
79
|
-
end
|
80
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
81
|
-
|
82
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
83
|
-
[k.identifier, v.full_name.downcase]
|
84
|
-
end
|
85
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
86
|
-
end
|
87
|
-
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
include Regexp::Expression::Shared
|
4
|
+
|
5
|
+
def initialize(token, options = {})
|
6
|
+
init_from_token_and_options(token, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_re(format = :full)
|
10
|
+
if set_level > 0
|
11
|
+
warn "Calling #to_re on character set members is deprecated - "\
|
12
|
+
"their behavior might not be equivalent outside of the set."
|
13
|
+
end
|
14
|
+
::Regexp.new(to_s(format))
|
15
|
+
end
|
16
|
+
|
17
|
+
def quantify(*args)
|
18
|
+
self.quantifier = Quantifier.new(*args)
|
19
|
+
end
|
20
|
+
|
21
|
+
def unquantified_clone
|
22
|
+
clone.tap { |exp| exp.quantifier = nil }
|
23
|
+
end
|
24
|
+
|
25
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
26
|
+
def quantity
|
27
|
+
return [nil,nil] unless quantified?
|
28
|
+
[quantifier.min, quantifier.max]
|
29
|
+
end
|
30
|
+
|
31
|
+
def repetitions
|
32
|
+
@repetitions ||=
|
33
|
+
if quantified?
|
34
|
+
min = quantifier.min
|
35
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
36
|
+
range = min..max
|
37
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
38
|
+
if RUBY_VERSION.to_f < 2.7
|
39
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
40
|
+
end
|
41
|
+
range
|
42
|
+
else
|
43
|
+
1..1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def greedy?
|
48
|
+
quantified? and quantifier.greedy?
|
49
|
+
end
|
50
|
+
|
51
|
+
def reluctant?
|
52
|
+
quantified? and quantifier.reluctant?
|
53
|
+
end
|
54
|
+
alias :lazy? :reluctant?
|
55
|
+
|
56
|
+
def possessive?
|
57
|
+
quantified? and quantifier.possessive?
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_h
|
61
|
+
{
|
62
|
+
type: type,
|
63
|
+
token: token,
|
64
|
+
text: to_s(:base),
|
65
|
+
starts_at: ts,
|
66
|
+
length: full_length,
|
67
|
+
level: level,
|
68
|
+
set_level: set_level,
|
69
|
+
conditional_level: conditional_level,
|
70
|
+
options: options,
|
71
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
72
|
+
}
|
73
|
+
end
|
74
|
+
alias :attributes :to_h
|
75
|
+
end
|
76
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -2,6 +2,23 @@ module Regexp::Expression
|
|
2
2
|
module Backreference
|
3
3
|
class Base < Regexp::Expression::Base
|
4
4
|
attr_accessor :referenced_expression
|
5
|
+
|
6
|
+
def initialize_copy(orig)
|
7
|
+
exp_id = [self.class, self.starts_at]
|
8
|
+
|
9
|
+
# prevent infinite recursion for recursive subexp calls
|
10
|
+
copied = @@copied ||= {}
|
11
|
+
self.referenced_expression =
|
12
|
+
if copied[exp_id]
|
13
|
+
orig.referenced_expression
|
14
|
+
else
|
15
|
+
copied[exp_id] = true
|
16
|
+
orig.referenced_expression.dup
|
17
|
+
end
|
18
|
+
copied.clear
|
19
|
+
|
20
|
+
super
|
21
|
+
end
|
5
22
|
end
|
6
23
|
|
7
24
|
class Number < Backreference::Base
|
@@ -9,7 +26,7 @@ module Regexp::Expression
|
|
9
26
|
alias reference number
|
10
27
|
|
11
28
|
def initialize(token, options = {})
|
12
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
13
30
|
super
|
14
31
|
end
|
15
32
|
end
|
@@ -33,7 +50,7 @@ module Regexp::Expression
|
|
33
50
|
class NameCall < Backreference::Name; end
|
34
51
|
class NumberCallRelative < Backreference::NumberRelative; end
|
35
52
|
|
36
|
-
class NumberRecursionLevel < Backreference::
|
53
|
+
class NumberRecursionLevel < Backreference::NumberRelative
|
37
54
|
attr_reader :recursion_level
|
38
55
|
|
39
56
|
def initialize(token, options = {})
|
@@ -52,4 +69,7 @@ module Regexp::Expression
|
|
52
69
|
end
|
53
70
|
end
|
54
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
55
75
|
end
|
@@ -1,23 +1,19 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
|
-
complete?
|
9
|
+
complete? and raise Regexp::Parser::Error,
|
10
|
+
"Can't add more than 2 expressions to a Range"
|
11
11
|
super
|
12
12
|
end
|
13
13
|
|
14
14
|
def complete?
|
15
15
|
count == 2
|
16
16
|
end
|
17
|
-
|
18
|
-
def to_s(_format = :full)
|
19
|
-
expressions.join(text)
|
20
|
-
end
|
21
17
|
end
|
22
18
|
end
|
23
19
|
end
|
@@ -1,10 +1,7 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
attr_accessor :closed, :negative
|
4
|
-
|
5
|
-
alias :negative? :negative
|
6
|
-
alias :negated? :negative
|
7
|
-
alias :closed? :closed
|
4
|
+
alias :closed? :closed
|
8
5
|
|
9
6
|
def initialize(token, options = {})
|
10
7
|
self.negative = false
|
@@ -19,9 +16,8 @@ module Regexp::Expression
|
|
19
16
|
def close
|
20
17
|
self.closed = true
|
21
18
|
end
|
22
|
-
|
23
|
-
def to_s(format = :full)
|
24
|
-
"#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
|
25
|
-
end
|
26
19
|
end
|
20
|
+
|
21
|
+
# alias for symmetry between token symbol and Expression class name
|
22
|
+
Set = CharacterSet
|
27
23
|
end # module Regexp::Expression
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -26,9 +31,9 @@ module Regexp::Expression
|
|
26
31
|
expressions.last << exp
|
27
32
|
end
|
28
33
|
|
29
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
30
35
|
raise TooManyBranches.new if branches.length == 2
|
31
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
32
37
|
Branch.add_to(self, params, active_opts)
|
33
38
|
end
|
34
39
|
alias :branch :add_sequence
|
@@ -50,8 +55,9 @@ module Regexp::Expression
|
|
50
55
|
condition.reference
|
51
56
|
end
|
52
57
|
|
53
|
-
def
|
54
|
-
|
58
|
+
def initialize_copy(orig)
|
59
|
+
self.referenced_expression = orig.referenced_expression.dup
|
60
|
+
super
|
55
61
|
end
|
56
62
|
end
|
57
63
|
end
|
@@ -1,16 +1,21 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module EscapeSequence
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
4
|
def codepoint
|
12
5
|
char.ord
|
13
6
|
end
|
7
|
+
|
8
|
+
if ''.respond_to?(:undump)
|
9
|
+
def char
|
10
|
+
%("#{text}").undump
|
11
|
+
end
|
12
|
+
else
|
13
|
+
# poor man's unescape without using eval
|
14
|
+
require 'yaml'
|
15
|
+
def char
|
16
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
17
|
+
end
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
16
21
|
class Literal < EscapeSequence::Base
|
@@ -91,4 +96,7 @@ module Regexp::Expression
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
94
102
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
|
-
def quantify(
|
5
|
-
raise
|
3
|
+
def quantify(*_args)
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
9
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
10
10
|
|
11
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
12
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
13
14
|
text << exp.text
|
14
15
|
end
|
15
16
|
end
|
16
|
-
|
17
17
|
end
|
@@ -1,27 +1,45 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
"#{text}#{expressions.join})#{quantifier_affix(format)}"
|
6
|
-
end
|
4
|
+
end
|
7
5
|
|
8
|
-
|
6
|
+
class Passive < Group::Base
|
7
|
+
attr_writer :implicit
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
@implicit = false
|
11
|
+
super
|
12
|
+
end
|
9
13
|
|
10
|
-
def
|
14
|
+
def implicit?
|
15
|
+
@implicit
|
16
|
+
end
|
11
17
|
end
|
12
18
|
|
13
|
-
class Atomic < Group::Base; end
|
14
|
-
class Passive < Group::Base; end
|
15
19
|
class Absence < Group::Base; end
|
20
|
+
class Atomic < Group::Base; end
|
21
|
+
# TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
|
22
|
+
# longer inherit from Group because it is effectively a terminal expression.
|
16
23
|
class Options < Group::Base
|
17
24
|
attr_accessor :option_changes
|
25
|
+
|
26
|
+
def initialize_copy(orig)
|
27
|
+
self.option_changes = orig.option_changes.dup
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
def quantify(*args)
|
32
|
+
if token == :options_switch
|
33
|
+
raise Regexp::Parser::Error, 'Can not quantify an option switch'
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
18
38
|
end
|
19
39
|
|
20
40
|
class Capture < Group::Base
|
21
41
|
attr_accessor :number, :number_at_level
|
22
42
|
alias identifier number
|
23
|
-
|
24
|
-
def capturing?; true end
|
25
43
|
end
|
26
44
|
|
27
45
|
class Named < Group::Capture
|
@@ -33,18 +51,13 @@ module Regexp::Expression
|
|
33
51
|
super
|
34
52
|
end
|
35
53
|
|
36
|
-
def
|
54
|
+
def initialize_copy(orig)
|
37
55
|
@name = orig.name.dup
|
38
56
|
super
|
39
57
|
end
|
40
58
|
end
|
41
59
|
|
42
60
|
class Comment < Group::Base
|
43
|
-
def to_s(_format = :full)
|
44
|
-
text.dup
|
45
|
-
end
|
46
|
-
|
47
|
-
def comment?; true end
|
48
61
|
end
|
49
62
|
end
|
50
63
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class PosixClass < Regexp::Expression::Base
|
3
|
-
def negative?
|
4
|
-
type == :nonposixclass
|
5
|
-
end
|
6
|
-
|
7
3
|
def name
|
8
|
-
|
4
|
+
text[/\w+/]
|
9
5
|
end
|
10
6
|
end
|
7
|
+
|
8
|
+
# alias for symmetry between token symbol and Expression class name
|
9
|
+
Posixclass = PosixClass
|
10
|
+
Nonposixclass = PosixClass
|
11
11
|
end
|
@@ -1,24 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
-
return super(self.class.build_token, *args)
|
10
|
-
end
|
11
|
-
super
|
12
|
-
end
|
13
|
-
|
14
|
-
class << self
|
15
|
-
def build(options = {})
|
16
|
-
new(build_token, options)
|
17
|
-
end
|
18
|
-
|
19
|
-
def build_token
|
20
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
-
end
|
3
|
+
def self.build(options = {})
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
22
7
|
end
|
23
8
|
end
|
24
9
|
end
|
@@ -1,17 +1,12 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
|
-
def negative?
|
6
|
-
type == :nonproperty
|
7
|
-
end
|
8
|
-
|
9
4
|
def name
|
10
|
-
text
|
5
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
6
|
end
|
12
7
|
|
13
8
|
def shortcut
|
14
|
-
|
9
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
10
|
end
|
16
11
|
end
|
17
12
|
|
@@ -110,11 +105,15 @@ module Regexp::Expression
|
|
110
105
|
class Unassigned < Codepoint::Base; end
|
111
106
|
end
|
112
107
|
|
113
|
-
class Age
|
114
|
-
class
|
115
|
-
class
|
116
|
-
class
|
117
|
-
class
|
108
|
+
class Age < UnicodeProperty::Base; end
|
109
|
+
class Block < UnicodeProperty::Base; end
|
110
|
+
class Derived < UnicodeProperty::Base; end
|
111
|
+
class Emoji < UnicodeProperty::Base; end
|
112
|
+
class Enumerated < UnicodeProperty::Base; end
|
113
|
+
class Script < UnicodeProperty::Base; end
|
118
114
|
end
|
119
115
|
|
116
|
+
# alias for symmetry between token symbol and Expression class name
|
117
|
+
Property = UnicodeProperty
|
118
|
+
Nonproperty = UnicodeProperty
|
120
119
|
end # module Regexp::Expression
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
else
|
32
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def token_class
|
38
|
+
self.class.token_class
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|