regexp_parser 1.7.0 → 2.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +8 -2
- data/LICENSE +1 -1
- data/Rakefile +6 -70
- data/lib/regexp_parser/error.rb +4 -0
- data/lib/regexp_parser/expression/base.rb +76 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +1 -1
- data/lib/regexp_parser/expression/classes/anchor.rb +0 -2
- data/lib/regexp_parser/expression/classes/{backref.rb → backreference.rb} +22 -2
- data/lib/regexp_parser/expression/classes/{set → character_set}/range.rb +4 -8
- data/lib/regexp_parser/expression/classes/{set.rb → character_set.rb} +3 -4
- data/lib/regexp_parser/expression/classes/{type.rb → character_type.rb} +0 -2
- data/lib/regexp_parser/expression/classes/conditional.rb +11 -5
- data/lib/regexp_parser/expression/classes/{escape.rb → escape_sequence.rb} +15 -7
- data/lib/regexp_parser/expression/classes/free_space.rb +5 -5
- data/lib/regexp_parser/expression/classes/group.rb +28 -15
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +1 -5
- data/lib/regexp_parser/expression/classes/posix_class.rb +5 -1
- data/lib/regexp_parser/expression/classes/root.rb +4 -19
- data/lib/regexp_parser/expression/classes/{property.rb → unicode_property.rb} +5 -3
- data/lib/regexp_parser/expression/methods/construct.rb +41 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +43 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +11 -7
- data/lib/regexp_parser/expression/methods/parts.rb +23 -0
- data/lib/regexp_parser/expression/methods/printing.rb +26 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +1 -1
- data/lib/regexp_parser/expression/methods/tests.rb +47 -1
- data/lib/regexp_parser/expression/methods/traverse.rb +34 -18
- data/lib/regexp_parser/expression/quantifier.rb +57 -17
- data/lib/regexp_parser/expression/sequence.rb +11 -47
- data/lib/regexp_parser/expression/sequence_operation.rb +4 -9
- data/lib/regexp_parser/expression/shared.rb +111 -0
- data/lib/regexp_parser/expression/subexpression.rb +27 -19
- data/lib/regexp_parser/expression.rb +14 -141
- data/lib/regexp_parser/lexer.rb +83 -41
- data/lib/regexp_parser/parser.rb +371 -429
- data/lib/regexp_parser/scanner/char_type.rl +11 -11
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +8 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +6 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +63 -0
- data/lib/regexp_parser/scanner/properties/long.csv +633 -0
- data/lib/regexp_parser/scanner/properties/short.csv +248 -0
- data/lib/regexp_parser/scanner/property.rl +4 -4
- data/lib/regexp_parser/scanner/scanner.rl +303 -368
- data/lib/regexp_parser/scanner.rb +1423 -1674
- data/lib/regexp_parser/syntax/any.rb +2 -7
- data/lib/regexp_parser/syntax/base.rb +92 -67
- data/lib/regexp_parser/syntax/token/anchor.rb +15 -0
- data/lib/regexp_parser/syntax/{tokens → token}/assertion.rb +2 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +33 -0
- data/lib/regexp_parser/syntax/token/character_set.rb +16 -0
- data/lib/regexp_parser/syntax/{tokens → token}/character_type.rb +3 -3
- data/lib/regexp_parser/syntax/{tokens → token}/conditional.rb +3 -3
- data/lib/regexp_parser/syntax/token/escape.rb +33 -0
- data/lib/regexp_parser/syntax/{tokens → token}/group.rb +7 -7
- data/lib/regexp_parser/syntax/{tokens → token}/keep.rb +1 -1
- data/lib/regexp_parser/syntax/token/meta.rb +20 -0
- data/lib/regexp_parser/syntax/{tokens → token}/posix_class.rb +3 -3
- data/lib/regexp_parser/syntax/token/quantifier.rb +35 -0
- data/lib/regexp_parser/syntax/token/unicode_property.rb +733 -0
- data/lib/regexp_parser/syntax/token/virtual.rb +11 -0
- data/lib/regexp_parser/syntax/token.rb +45 -0
- data/lib/regexp_parser/syntax/version_lookup.rb +19 -36
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +13 -20
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +10 -17
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +3 -10
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +8 -15
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -8
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +3 -9
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +3 -9
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +8 -6
- data/lib/regexp_parser/token.rb +9 -20
- data/lib/regexp_parser/version.rb +1 -1
- data/lib/regexp_parser.rb +0 -2
- data/regexp_parser.gemspec +19 -23
- metadata +52 -171
- data/CHANGELOG.md +0 -349
- data/README.md +0 -470
- data/lib/regexp_parser/scanner/properties/long.yml +0 -594
- data/lib/regexp_parser/scanner/properties/short.yml +0 -237
- data/lib/regexp_parser/syntax/tokens/anchor.rb +0 -15
- data/lib/regexp_parser/syntax/tokens/backref.rb +0 -24
- data/lib/regexp_parser/syntax/tokens/character_set.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/escape.rb +0 -30
- data/lib/regexp_parser/syntax/tokens/meta.rb +0 -13
- data/lib/regexp_parser/syntax/tokens/quantifier.rb +0 -35
- data/lib/regexp_parser/syntax/tokens/unicode_property.rb +0 -675
- data/lib/regexp_parser/syntax/tokens.rb +0 -45
- data/spec/expression/base_spec.rb +0 -94
- data/spec/expression/clone_spec.rb +0 -120
- data/spec/expression/conditional_spec.rb +0 -89
- data/spec/expression/free_space_spec.rb +0 -27
- data/spec/expression/methods/match_length_spec.rb +0 -161
- data/spec/expression/methods/match_spec.rb +0 -25
- data/spec/expression/methods/strfregexp_spec.rb +0 -224
- data/spec/expression/methods/tests_spec.rb +0 -99
- data/spec/expression/methods/traverse_spec.rb +0 -161
- data/spec/expression/options_spec.rb +0 -128
- data/spec/expression/root_spec.rb +0 -9
- data/spec/expression/sequence_spec.rb +0 -9
- data/spec/expression/subexpression_spec.rb +0 -50
- data/spec/expression/to_h_spec.rb +0 -26
- data/spec/expression/to_s_spec.rb +0 -100
- data/spec/lexer/all_spec.rb +0 -22
- data/spec/lexer/conditionals_spec.rb +0 -53
- data/spec/lexer/escapes_spec.rb +0 -14
- data/spec/lexer/keep_spec.rb +0 -10
- data/spec/lexer/literals_spec.rb +0 -89
- data/spec/lexer/nesting_spec.rb +0 -99
- data/spec/lexer/refcalls_spec.rb +0 -55
- data/spec/parser/all_spec.rb +0 -43
- data/spec/parser/alternation_spec.rb +0 -88
- data/spec/parser/anchors_spec.rb +0 -17
- data/spec/parser/conditionals_spec.rb +0 -179
- data/spec/parser/errors_spec.rb +0 -30
- data/spec/parser/escapes_spec.rb +0 -121
- data/spec/parser/free_space_spec.rb +0 -130
- data/spec/parser/groups_spec.rb +0 -108
- data/spec/parser/keep_spec.rb +0 -6
- data/spec/parser/posix_classes_spec.rb +0 -8
- data/spec/parser/properties_spec.rb +0 -115
- data/spec/parser/quantifiers_spec.rb +0 -51
- data/spec/parser/refcalls_spec.rb +0 -112
- data/spec/parser/set/intersections_spec.rb +0 -127
- data/spec/parser/set/ranges_spec.rb +0 -111
- data/spec/parser/sets_spec.rb +0 -178
- data/spec/parser/types_spec.rb +0 -18
- data/spec/scanner/all_spec.rb +0 -18
- data/spec/scanner/anchors_spec.rb +0 -21
- data/spec/scanner/conditionals_spec.rb +0 -128
- data/spec/scanner/errors_spec.rb +0 -68
- data/spec/scanner/escapes_spec.rb +0 -53
- data/spec/scanner/free_space_spec.rb +0 -133
- data/spec/scanner/groups_spec.rb +0 -52
- data/spec/scanner/keep_spec.rb +0 -10
- data/spec/scanner/literals_spec.rb +0 -49
- data/spec/scanner/meta_spec.rb +0 -18
- data/spec/scanner/properties_spec.rb +0 -64
- data/spec/scanner/quantifiers_spec.rb +0 -20
- data/spec/scanner/refcalls_spec.rb +0 -36
- data/spec/scanner/sets_spec.rb +0 -102
- data/spec/scanner/types_spec.rb +0 -14
- data/spec/spec_helper.rb +0 -15
- data/spec/support/runner.rb +0 -42
- data/spec/support/shared_examples.rb +0 -77
- data/spec/support/warning_extractor.rb +0 -60
- data/spec/syntax/syntax_spec.rb +0 -48
- data/spec/syntax/syntax_token_map_spec.rb +0 -23
- data/spec/syntax/versions/1.8.6_spec.rb +0 -17
- data/spec/syntax/versions/1.9.1_spec.rb +0 -10
- data/spec/syntax/versions/1.9.3_spec.rb +0 -9
- data/spec/syntax/versions/2.0.0_spec.rb +0 -13
- data/spec/syntax/versions/2.2.0_spec.rb +0 -9
- data/spec/syntax/versions/aliases_spec.rb +0 -37
- data/spec/token/token_spec.rb +0 -85
- /data/lib/regexp_parser/expression/classes/{set → character_set}/intersection.rb +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 53bc2105c4601ec650c24172f48b4dfa4ffa356f84f4de2a58cca4429cff45a4
|
4
|
+
data.tar.gz: 3cb580ee3db70e9490b350722fcd63b77640cc1cbaed4c7555b192f9ddcc341b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcdf19c19bc62b11a3c8a22a75c54ef996635dfc3cbabf7512eee49090c0215b21c751a275923561a0be33bb0070e04ce57f49a5bc95bedb386afd763ba6d7ba
|
7
|
+
data.tar.gz: 5744e2e8baa4ecf52d87718a821c4ba69e8d2d1ced6deffca3224cf415a72714726f8465d50c0f504421fed5992c94519c3c8e0a0e7f79e24037a9fad5386347
|
data/Gemfile
CHANGED
@@ -3,7 +3,13 @@ source 'https://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
group :development, :test do
|
6
|
+
gem 'leto', '~> 2.0'
|
6
7
|
gem 'rake', '~> 13.0'
|
7
|
-
gem 'regexp_property_values', '~> 1.
|
8
|
-
gem 'rspec', '~> 3.
|
8
|
+
gem 'regexp_property_values', '~> 1.4'
|
9
|
+
gem 'rspec', '~> 3.10'
|
10
|
+
if RUBY_VERSION.to_f >= 2.7
|
11
|
+
gem 'benchmark-ips', '~> 2.1'
|
12
|
+
gem 'gouteur', '~> 1.1'
|
13
|
+
gem 'rubocop', '~> 1.7'
|
14
|
+
end
|
9
15
|
end
|
data/LICENSE
CHANGED
data/Rakefile
CHANGED
@@ -1,87 +1,23 @@
|
|
1
|
+
require 'bundler'
|
1
2
|
require 'rubygems'
|
2
|
-
|
3
|
+
require 'rubygems/package_task'
|
3
4
|
require 'rake'
|
4
5
|
require 'rake/testtask'
|
6
|
+
require 'rspec/core/rake_task'
|
5
7
|
|
6
|
-
|
7
|
-
require 'rubygems/package_task'
|
8
|
-
|
9
|
-
|
10
|
-
RAGEL_SOURCE_DIR = File.expand_path '../lib/regexp_parser/scanner', __FILE__
|
11
|
-
RAGEL_OUTPUT_DIR = File.expand_path '../lib/regexp_parser', __FILE__
|
12
|
-
RAGEL_SOURCE_FILES = %w{scanner} # scanner.rl includes property.rl
|
13
|
-
|
8
|
+
Dir['tasks/**/*.rake'].each { |file| load(file) }
|
14
9
|
|
15
10
|
Bundler::GemHelper.install_tasks
|
16
11
|
|
12
|
+
RSpec::Core::RakeTask.new(:spec)
|
17
13
|
|
18
14
|
task :default => [:'test:full']
|
19
15
|
|
20
16
|
namespace :test do
|
21
|
-
task full: :'ragel:rb'
|
22
|
-
sh 'bin/test'
|
23
|
-
end
|
17
|
+
task full: [:'ragel:rb', :spec]
|
24
18
|
end
|
25
19
|
|
26
|
-
namespace :ragel do
|
27
|
-
desc "Process the ragel source files and output ruby code"
|
28
|
-
task :rb do |t|
|
29
|
-
RAGEL_SOURCE_FILES.each do |file|
|
30
|
-
output_file = "#{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
31
|
-
# using faster flat table driven FSM, about 25% larger code, but about 30% faster
|
32
|
-
sh "ragel -F1 -R #{RAGEL_SOURCE_DIR}/#{file}.rl -o #{output_file}"
|
33
|
-
|
34
|
-
contents = File.read(output_file)
|
35
|
-
|
36
|
-
File.open(output_file, 'r+') do |file|
|
37
|
-
contents = "# -*- warn-indent:false; -*-\n" + contents
|
38
|
-
|
39
|
-
file.write(contents)
|
40
|
-
end
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
desc "Delete the ragel generated source file(s)"
|
45
|
-
task :clean do |t|
|
46
|
-
RAGEL_SOURCE_FILES.each do |file|
|
47
|
-
sh "rm -f #{RAGEL_OUTPUT_DIR}/#{file}.rb"
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
|
53
20
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
54
21
|
# latest scanner code is generated and included in the build.
|
55
22
|
desc "Runs ragel:rb before building the gem"
|
56
23
|
task :build => ['ragel:rb']
|
57
|
-
|
58
|
-
|
59
|
-
namespace :props do
|
60
|
-
desc 'Write new property value hashes for the properties scanner'
|
61
|
-
task :update do
|
62
|
-
require 'regexp_property_values'
|
63
|
-
RegexpPropertyValues.update
|
64
|
-
dir = File.expand_path('../lib/regexp_parser/scanner/properties', __FILE__)
|
65
|
-
|
66
|
-
require 'psych'
|
67
|
-
write_hash_to_file = ->(hash, path) do
|
68
|
-
File.open(path, 'w') do |f|
|
69
|
-
f.puts '#',
|
70
|
-
"# THIS FILE IS AUTO-GENERATED BY `rake props:update`, DO NOT EDIT",
|
71
|
-
'#',
|
72
|
-
hash.sort.to_h.to_yaml
|
73
|
-
end
|
74
|
-
puts "Wrote #{hash.count} aliases to `#{path}`"
|
75
|
-
end
|
76
|
-
|
77
|
-
long_names_to_tokens = RegexpPropertyValues.all.map do |val|
|
78
|
-
[val.identifier, val.full_name.downcase]
|
79
|
-
end
|
80
|
-
write_hash_to_file.call(long_names_to_tokens, "#{dir}/long.yml")
|
81
|
-
|
82
|
-
short_names_to_tokens = RegexpPropertyValues.alias_hash.map do |k, v|
|
83
|
-
[k.identifier, v.full_name.downcase]
|
84
|
-
end
|
85
|
-
write_hash_to_file.call(short_names_to_tokens, "#{dir}/short.yml")
|
86
|
-
end
|
87
|
-
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
class Base
|
3
|
+
include Regexp::Expression::Shared
|
4
|
+
|
5
|
+
def initialize(token, options = {})
|
6
|
+
init_from_token_and_options(token, options)
|
7
|
+
end
|
8
|
+
|
9
|
+
def to_re(format = :full)
|
10
|
+
if set_level > 0
|
11
|
+
warn "Calling #to_re on character set members is deprecated - "\
|
12
|
+
"their behavior might not be equivalent outside of the set."
|
13
|
+
end
|
14
|
+
::Regexp.new(to_s(format))
|
15
|
+
end
|
16
|
+
|
17
|
+
def quantify(*args)
|
18
|
+
self.quantifier = Quantifier.new(*args)
|
19
|
+
end
|
20
|
+
|
21
|
+
def unquantified_clone
|
22
|
+
clone.tap { |exp| exp.quantifier = nil }
|
23
|
+
end
|
24
|
+
|
25
|
+
# Deprecated. Prefer `#repetitions` which has a more uniform interface.
|
26
|
+
def quantity
|
27
|
+
return [nil,nil] unless quantified?
|
28
|
+
[quantifier.min, quantifier.max]
|
29
|
+
end
|
30
|
+
|
31
|
+
def repetitions
|
32
|
+
@repetitions ||=
|
33
|
+
if quantified?
|
34
|
+
min = quantifier.min
|
35
|
+
max = quantifier.max < 0 ? Float::INFINITY : quantifier.max
|
36
|
+
range = min..max
|
37
|
+
# fix Range#minmax on old Rubies - https://bugs.ruby-lang.org/issues/15807
|
38
|
+
if RUBY_VERSION.to_f < 2.7
|
39
|
+
range.define_singleton_method(:minmax) { [min, max] }
|
40
|
+
end
|
41
|
+
range
|
42
|
+
else
|
43
|
+
1..1
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def greedy?
|
48
|
+
quantified? and quantifier.greedy?
|
49
|
+
end
|
50
|
+
|
51
|
+
def reluctant?
|
52
|
+
quantified? and quantifier.reluctant?
|
53
|
+
end
|
54
|
+
alias :lazy? :reluctant?
|
55
|
+
|
56
|
+
def possessive?
|
57
|
+
quantified? and quantifier.possessive?
|
58
|
+
end
|
59
|
+
|
60
|
+
def to_h
|
61
|
+
{
|
62
|
+
type: type,
|
63
|
+
token: token,
|
64
|
+
text: to_s(:base),
|
65
|
+
starts_at: ts,
|
66
|
+
length: full_length,
|
67
|
+
level: level,
|
68
|
+
set_level: set_level,
|
69
|
+
conditional_level: conditional_level,
|
70
|
+
options: options,
|
71
|
+
quantifier: quantified? ? quantifier.to_h : nil,
|
72
|
+
}
|
73
|
+
end
|
74
|
+
alias :attributes :to_h
|
75
|
+
end
|
76
|
+
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
# A sequence of expressions, used by Alternation as one of its
|
2
|
+
# A sequence of expressions, used by Alternation as one of its alternatives.
|
3
3
|
class Alternative < Regexp::Expression::Sequence; end
|
4
4
|
|
5
5
|
class Alternation < Regexp::Expression::SequenceOperation
|
@@ -2,6 +2,23 @@ module Regexp::Expression
|
|
2
2
|
module Backreference
|
3
3
|
class Base < Regexp::Expression::Base
|
4
4
|
attr_accessor :referenced_expression
|
5
|
+
|
6
|
+
def initialize_copy(orig)
|
7
|
+
exp_id = [self.class, self.starts_at]
|
8
|
+
|
9
|
+
# prevent infinite recursion for recursive subexp calls
|
10
|
+
copied = @@copied ||= {}
|
11
|
+
self.referenced_expression =
|
12
|
+
if copied[exp_id]
|
13
|
+
orig.referenced_expression
|
14
|
+
else
|
15
|
+
copied[exp_id] = true
|
16
|
+
orig.referenced_expression.dup
|
17
|
+
end
|
18
|
+
copied.clear
|
19
|
+
|
20
|
+
super
|
21
|
+
end
|
5
22
|
end
|
6
23
|
|
7
24
|
class Number < Backreference::Base
|
@@ -9,7 +26,7 @@ module Regexp::Expression
|
|
9
26
|
alias reference number
|
10
27
|
|
11
28
|
def initialize(token, options = {})
|
12
|
-
@number = token.text[
|
29
|
+
@number = token.text[/-?\d+/].to_i
|
13
30
|
super
|
14
31
|
end
|
15
32
|
end
|
@@ -33,7 +50,7 @@ module Regexp::Expression
|
|
33
50
|
class NameCall < Backreference::Name; end
|
34
51
|
class NumberCallRelative < Backreference::NumberRelative; end
|
35
52
|
|
36
|
-
class NumberRecursionLevel < Backreference::
|
53
|
+
class NumberRecursionLevel < Backreference::NumberRelative
|
37
54
|
attr_reader :recursion_level
|
38
55
|
|
39
56
|
def initialize(token, options = {})
|
@@ -52,4 +69,7 @@ module Regexp::Expression
|
|
52
69
|
end
|
53
70
|
end
|
54
71
|
end
|
72
|
+
|
73
|
+
# alias for symmetry between token symbol and Expression class name
|
74
|
+
Backref = Backreference
|
55
75
|
end
|
@@ -1,23 +1,19 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
class CharacterSet < Regexp::Expression::Subexpression
|
3
3
|
class Range < Regexp::Expression::Subexpression
|
4
|
-
def
|
5
|
-
expressions.first.
|
4
|
+
def ts
|
5
|
+
(head = expressions.first) ? head.ts : @ts
|
6
6
|
end
|
7
|
-
alias :ts :starts_at
|
8
7
|
|
9
8
|
def <<(exp)
|
10
|
-
complete?
|
9
|
+
complete? and raise Regexp::Parser::Error,
|
10
|
+
"Can't add more than 2 expressions to a Range"
|
11
11
|
super
|
12
12
|
end
|
13
13
|
|
14
14
|
def complete?
|
15
15
|
count == 2
|
16
16
|
end
|
17
|
-
|
18
|
-
def to_s(_format = :full)
|
19
|
-
expressions.join(text)
|
20
|
-
end
|
21
17
|
end
|
22
18
|
end
|
23
19
|
end
|
@@ -19,9 +19,8 @@ module Regexp::Expression
|
|
19
19
|
def close
|
20
20
|
self.closed = true
|
21
21
|
end
|
22
|
-
|
23
|
-
def to_s(format = :full)
|
24
|
-
"#{text}#{'^' if negated?}#{expressions.join}]#{quantifier_affix(format)}"
|
25
|
-
end
|
26
22
|
end
|
23
|
+
|
24
|
+
# alias for symmetry between token symbol and Expression class name
|
25
|
+
Set = CharacterSet
|
27
26
|
end # module Regexp::Expression
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Conditional
|
3
|
-
class TooManyBranches <
|
3
|
+
class TooManyBranches < Regexp::Parser::Error
|
4
4
|
def initialize
|
5
5
|
super('The conditional expression has more than 2 branches')
|
6
6
|
end
|
@@ -15,6 +15,11 @@ module Regexp::Expression
|
|
15
15
|
ref = text.tr("'<>()", "")
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
17
17
|
end
|
18
|
+
|
19
|
+
def initialize_copy(orig)
|
20
|
+
self.referenced_expression = orig.referenced_expression.dup
|
21
|
+
super
|
22
|
+
end
|
18
23
|
end
|
19
24
|
|
20
25
|
class Branch < Regexp::Expression::Sequence; end
|
@@ -26,9 +31,9 @@ module Regexp::Expression
|
|
26
31
|
expressions.last << exp
|
27
32
|
end
|
28
33
|
|
29
|
-
def add_sequence(active_opts = {})
|
34
|
+
def add_sequence(active_opts = {}, params = { ts: 0 })
|
30
35
|
raise TooManyBranches.new if branches.length == 2
|
31
|
-
params = { conditional_level: conditional_level + 1 }
|
36
|
+
params = params.merge({ conditional_level: conditional_level + 1 })
|
32
37
|
Branch.add_to(self, params, active_opts)
|
33
38
|
end
|
34
39
|
alias :branch :add_sequence
|
@@ -50,8 +55,9 @@ module Regexp::Expression
|
|
50
55
|
condition.reference
|
51
56
|
end
|
52
57
|
|
53
|
-
def
|
54
|
-
|
58
|
+
def initialize_copy(orig)
|
59
|
+
self.referenced_expression = orig.referenced_expression.dup
|
60
|
+
super
|
55
61
|
end
|
56
62
|
end
|
57
63
|
end
|
@@ -1,16 +1,21 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module EscapeSequence
|
3
3
|
class Base < Regexp::Expression::Base
|
4
|
-
require 'yaml'
|
5
|
-
|
6
|
-
def char
|
7
|
-
# poor man's unescape without using eval
|
8
|
-
YAML.load(%Q(---\n"#{text}"\n))
|
9
|
-
end
|
10
|
-
|
11
4
|
def codepoint
|
12
5
|
char.ord
|
13
6
|
end
|
7
|
+
|
8
|
+
if ''.respond_to?(:undump)
|
9
|
+
def char
|
10
|
+
%("#{text}").undump
|
11
|
+
end
|
12
|
+
else
|
13
|
+
# poor man's unescape without using eval
|
14
|
+
require 'yaml'
|
15
|
+
def char
|
16
|
+
YAML.load(%Q(---\n"#{text}"\n))
|
17
|
+
end
|
18
|
+
end
|
14
19
|
end
|
15
20
|
|
16
21
|
class Literal < EscapeSequence::Base
|
@@ -91,4 +96,7 @@ module Regexp::Expression
|
|
91
96
|
end
|
92
97
|
end
|
93
98
|
end
|
99
|
+
|
100
|
+
# alias for symmetry between Token::* and Expression::*
|
101
|
+
Escape = EscapeSequence
|
94
102
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class FreeSpace < Regexp::Expression::Base
|
4
|
-
def quantify(
|
5
|
-
raise
|
3
|
+
def quantify(*_args)
|
4
|
+
raise Regexp::Parser::Error, 'Can not quantify a free space object'
|
6
5
|
end
|
7
6
|
end
|
8
7
|
|
9
|
-
class Comment < Regexp::Expression::FreeSpace
|
8
|
+
class Comment < Regexp::Expression::FreeSpace
|
9
|
+
end
|
10
10
|
|
11
11
|
class WhiteSpace < Regexp::Expression::FreeSpace
|
12
12
|
def merge(exp)
|
13
|
+
warn("#{self.class}##{__method__} is deprecated and will be removed in v3.0.0.")
|
13
14
|
text << exp.text
|
14
15
|
end
|
15
16
|
end
|
16
|
-
|
17
17
|
end
|
@@ -1,27 +1,45 @@
|
|
1
1
|
module Regexp::Expression
|
2
2
|
module Group
|
3
3
|
class Base < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
"#{text}#{expressions.join})#{quantifier_affix(format)}"
|
6
|
-
end
|
4
|
+
end
|
7
5
|
|
8
|
-
|
6
|
+
class Passive < Group::Base
|
7
|
+
attr_writer :implicit
|
8
|
+
|
9
|
+
def initialize(*)
|
10
|
+
@implicit = false
|
11
|
+
super
|
12
|
+
end
|
9
13
|
|
10
|
-
def
|
14
|
+
def implicit?
|
15
|
+
@implicit
|
16
|
+
end
|
11
17
|
end
|
12
18
|
|
13
|
-
class Atomic < Group::Base; end
|
14
|
-
class Passive < Group::Base; end
|
15
19
|
class Absence < Group::Base; end
|
20
|
+
class Atomic < Group::Base; end
|
21
|
+
# TODO: should split off OptionsSwitch in v3.0.0. Maybe even make it no
|
22
|
+
# longer inherit from Group because it is effectively a terminal expression.
|
16
23
|
class Options < Group::Base
|
17
24
|
attr_accessor :option_changes
|
25
|
+
|
26
|
+
def initialize_copy(orig)
|
27
|
+
self.option_changes = orig.option_changes.dup
|
28
|
+
super
|
29
|
+
end
|
30
|
+
|
31
|
+
def quantify(*args)
|
32
|
+
if token == :options_switch
|
33
|
+
raise Regexp::Parser::Error, 'Can not quantify an option switch'
|
34
|
+
else
|
35
|
+
super
|
36
|
+
end
|
37
|
+
end
|
18
38
|
end
|
19
39
|
|
20
40
|
class Capture < Group::Base
|
21
41
|
attr_accessor :number, :number_at_level
|
22
42
|
alias identifier number
|
23
|
-
|
24
|
-
def capturing?; true end
|
25
43
|
end
|
26
44
|
|
27
45
|
class Named < Group::Capture
|
@@ -33,18 +51,13 @@ module Regexp::Expression
|
|
33
51
|
super
|
34
52
|
end
|
35
53
|
|
36
|
-
def
|
54
|
+
def initialize_copy(orig)
|
37
55
|
@name = orig.name.dup
|
38
56
|
super
|
39
57
|
end
|
40
58
|
end
|
41
59
|
|
42
60
|
class Comment < Group::Base
|
43
|
-
def to_s(_format = :full)
|
44
|
-
text.dup
|
45
|
-
end
|
46
|
-
|
47
|
-
def comment?; true end
|
48
61
|
end
|
49
62
|
end
|
50
63
|
|
@@ -1,24 +1,9 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
class Root < Regexp::Expression::Subexpression
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
'will be removed in 2.0.0. Use Root.build for the old behavior.')
|
9
|
-
return super(self.class.build_token, *args)
|
10
|
-
end
|
11
|
-
super
|
12
|
-
end
|
13
|
-
|
14
|
-
class << self
|
15
|
-
def build(options = {})
|
16
|
-
new(build_token, options)
|
17
|
-
end
|
18
|
-
|
19
|
-
def build_token
|
20
|
-
Regexp::Token.new(:expression, :root, '', 0)
|
21
|
-
end
|
3
|
+
def self.build(options = {})
|
4
|
+
warn "`#{self.class}.build(options)` is deprecated and will raise in "\
|
5
|
+
"regexp_parser v3.0.0. Please use `.construct(options: options)`."
|
6
|
+
construct(options: options)
|
22
7
|
end
|
23
8
|
end
|
24
9
|
end
|
@@ -1,5 +1,4 @@
|
|
1
1
|
module Regexp::Expression
|
2
|
-
|
3
2
|
module UnicodeProperty
|
4
3
|
class Base < Regexp::Expression::Base
|
5
4
|
def negative?
|
@@ -7,11 +6,11 @@ module Regexp::Expression
|
|
7
6
|
end
|
8
7
|
|
9
8
|
def name
|
10
|
-
text
|
9
|
+
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
11
10
|
end
|
12
11
|
|
13
12
|
def shortcut
|
14
|
-
|
13
|
+
Regexp::Scanner.short_prop_map.key(token.to_s)
|
15
14
|
end
|
16
15
|
end
|
17
16
|
|
@@ -117,4 +116,7 @@ module Regexp::Expression
|
|
117
116
|
class Block < UnicodeProperty::Base; end
|
118
117
|
end
|
119
118
|
|
119
|
+
# alias for symmetry between token symbol and Expression class name
|
120
|
+
Property = UnicodeProperty
|
121
|
+
Nonproperty = UnicodeProperty
|
120
122
|
end # module Regexp::Expression
|
@@ -0,0 +1,41 @@
|
|
1
|
+
module Regexp::Expression
|
2
|
+
module Shared
|
3
|
+
module ClassMethods
|
4
|
+
# Convenience method to init a valid Expression without a Regexp::Token
|
5
|
+
def construct(params = {})
|
6
|
+
attrs = construct_defaults.merge(params)
|
7
|
+
options = attrs.delete(:options)
|
8
|
+
token_args = Regexp::TOKEN_KEYS.map { |k| attrs.delete(k) }
|
9
|
+
token = Regexp::Token.new(*token_args)
|
10
|
+
raise ArgumentError, "unsupported attribute(s): #{attrs}" if attrs.any?
|
11
|
+
|
12
|
+
new(token, options)
|
13
|
+
end
|
14
|
+
|
15
|
+
def construct_defaults
|
16
|
+
if self == Root
|
17
|
+
{ type: :expression, token: :root, ts: 0 }
|
18
|
+
elsif self < Sequence
|
19
|
+
{ type: :expression, token: :sequence }
|
20
|
+
else
|
21
|
+
{ type: token_class::Type }
|
22
|
+
end.merge(level: 0, set_level: 0, conditional_level: 0, text: '')
|
23
|
+
end
|
24
|
+
|
25
|
+
def token_class
|
26
|
+
if self == Root || self < Sequence
|
27
|
+
nil # no token class because these objects are Parser-generated
|
28
|
+
# TODO: synch exp class, token class & type names for this in v3.0.0
|
29
|
+
elsif self == CharacterType::Any
|
30
|
+
Regexp::Syntax::Token::Meta
|
31
|
+
else
|
32
|
+
Regexp::Syntax::Token.const_get(name.split('::')[2])
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def token_class
|
38
|
+
self.class.token_class
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|