regexp_parser 2.8.1 → 2.11.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +6 -4
- data/LICENSE +1 -1
- data/Rakefile +5 -3
- data/lib/regexp_parser/error.rb +2 -0
- data/lib/regexp_parser/expression/base.rb +2 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +2 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +2 -0
- data/lib/regexp_parser/expression/classes/backreference.rb +3 -20
- data/lib/regexp_parser/expression/classes/character_set/intersection.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set.rb +3 -4
- data/lib/regexp_parser/expression/classes/character_type.rb +2 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -14
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +26 -95
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -0
- data/lib/regexp_parser/expression/classes/group.rb +2 -0
- data/lib/regexp_parser/expression/classes/keep.rb +3 -1
- data/lib/regexp_parser/expression/classes/literal.rb +2 -0
- data/lib/regexp_parser/expression/classes/posix_class.rb +2 -4
- data/lib/regexp_parser/expression/classes/root.rb +2 -0
- data/lib/regexp_parser/expression/classes/unicode_property.rb +8 -9
- data/lib/regexp_parser/expression/methods/construct.rb +2 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +7 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +76 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +2 -0
- data/lib/regexp_parser/expression/methods/match.rb +2 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -0
- data/lib/regexp_parser/expression/methods/negative.rb +22 -0
- data/lib/regexp_parser/expression/methods/options.rb +2 -0
- data/lib/regexp_parser/expression/methods/parts.rb +2 -0
- data/lib/regexp_parser/expression/methods/printing.rb +2 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +30 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +2 -0
- data/lib/regexp_parser/expression/methods/tests.rb +2 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -0
- data/lib/regexp_parser/expression/quantifier.rb +3 -1
- data/lib/regexp_parser/expression/sequence.rb +2 -0
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -0
- data/lib/regexp_parser/expression/shared.rb +6 -3
- data/lib/regexp_parser/expression/subexpression.rb +2 -0
- data/lib/regexp_parser/expression.rb +39 -33
- data/lib/regexp_parser/lexer.rb +2 -0
- data/lib/regexp_parser/parser.rb +16 -9
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +2 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +3 -1
- data/lib/regexp_parser/scanner/errors/validation_error.rb +2 -0
- data/lib/regexp_parser/scanner/properties/long.csv +37 -0
- data/lib/regexp_parser/scanner/properties/short.csv +9 -0
- data/lib/regexp_parser/scanner/scanner.rl +62 -18
- data/lib/regexp_parser/scanner.rb +1041 -936
- data/lib/regexp_parser/syntax/any.rb +2 -0
- data/lib/regexp_parser/syntax/base.rb +2 -0
- data/lib/regexp_parser/syntax/token/anchor.rb +5 -3
- data/lib/regexp_parser/syntax/token/assertion.rb +4 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +8 -6
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -1
- data/lib/regexp_parser/syntax/token/character_type.rb +6 -4
- data/lib/regexp_parser/syntax/token/conditional.rb +5 -3
- data/lib/regexp_parser/syntax/token/escape.rb +9 -7
- data/lib/regexp_parser/syntax/token/group.rb +8 -6
- data/lib/regexp_parser/syntax/token/keep.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +4 -2
- data/lib/regexp_parser/syntax/token/posix_class.rb +4 -2
- data/lib/regexp_parser/syntax/token/quantifier.rb +8 -6
- data/lib/regexp_parser/syntax/token/unicode_property.rb +79 -46
- data/lib/regexp_parser/syntax/token/virtual.rb +5 -3
- data/lib/regexp_parser/syntax/token.rb +18 -16
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +3 -1
- data/lib/regexp_parser/syntax.rb +3 -1
- data/lib/regexp_parser/token.rb +2 -0
- data/lib/regexp_parser/version.rb +3 -1
- data/lib/regexp_parser.rb +8 -6
- data/regexp_parser.gemspec +7 -5
- metadata +12 -11
- data/CHANGELOG.md +0 -691
- data/README.md +0 -506
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ba0845a7ebcd158dc60281b731adb0d597b71028a734209a9cf6e850986c03b4
|
|
4
|
+
data.tar.gz: '078369f6bdbf716aff8f435a318e3f1a8e83593951ee7b21c94bbcd597213d54'
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e4539f7196c10d233aca76dc0da3fc8ae8df48b11afd3cc8c7548eedf5893a1202ba06f5fa841444b8afc7d4b0178b6cfb2f16db5e4d05401c64ba26fb05d1de
|
|
7
|
+
data.tar.gz: 801716036ad9a094641094077a8f1695d82cda38020369fb7385a9a7c34d7df0fc90c1629865072d22921fdcfa02a11f70c504220be2bd8df699a10d6d787647
|
data/Gemfile
CHANGED
|
@@ -1,15 +1,17 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
source 'https://rubygems.org'
|
|
2
4
|
|
|
3
5
|
gemspec
|
|
4
6
|
|
|
5
7
|
group :development, :test do
|
|
6
|
-
gem 'leto', '~> 2.
|
|
7
|
-
gem 'rake', '~> 13.
|
|
8
|
-
gem 'regexp_property_values', '~> 1.
|
|
8
|
+
gem 'leto', '~> 2.1'
|
|
9
|
+
gem 'rake', '~> 13.1'
|
|
10
|
+
gem 'regexp_property_values', '~> 1.5'
|
|
9
11
|
gem 'rspec', '~> 3.10'
|
|
10
12
|
if RUBY_VERSION.to_f >= 2.7
|
|
11
13
|
gem 'benchmark-ips', '~> 2.1'
|
|
12
14
|
gem 'gouteur', '~> 1.1'
|
|
13
|
-
gem 'rubocop', '
|
|
15
|
+
gem 'rubocop', '>= 1.80.2'
|
|
14
16
|
end
|
|
15
17
|
end
|
data/LICENSE
CHANGED
data/Rakefile
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'bundler'
|
|
2
4
|
require 'rubygems'
|
|
3
5
|
require 'rubygems/package_task'
|
|
@@ -14,10 +16,10 @@ RSpec::Core::RakeTask.new(:spec)
|
|
|
14
16
|
task :default => [:'test:full']
|
|
15
17
|
|
|
16
18
|
namespace :test do
|
|
17
|
-
task full: [:
|
|
19
|
+
task full: [:ragel, :spec]
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Add ragel task as a prerequisite for building the gem to ensure that the
|
|
21
23
|
# latest scanner code is generated and included in the build.
|
|
22
|
-
desc "Runs ragel
|
|
23
|
-
task :
|
|
24
|
+
desc "Runs ragel before building the gem"
|
|
25
|
+
task build: :ragel
|
data/lib/regexp_parser/error.rb
CHANGED
|
@@ -1,25 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Backreference
|
|
3
|
-
class Base < Regexp::Expression::Base
|
|
4
|
-
attr_accessor :referenced_expression
|
|
5
|
-
|
|
6
|
-
def initialize_copy(orig)
|
|
7
|
-
exp_id = [self.class, self.starts_at]
|
|
8
|
-
|
|
9
|
-
# prevent infinite recursion for recursive subexp calls
|
|
10
|
-
copied = @@copied ||= {}
|
|
11
|
-
self.referenced_expression =
|
|
12
|
-
if copied[exp_id]
|
|
13
|
-
orig.referenced_expression
|
|
14
|
-
else
|
|
15
|
-
copied[exp_id] = true
|
|
16
|
-
orig.referenced_expression.dup
|
|
17
|
-
end
|
|
18
|
-
copied.clear
|
|
19
|
-
|
|
20
|
-
super
|
|
21
|
-
end
|
|
22
|
-
end
|
|
5
|
+
class Base < Regexp::Expression::Base; end
|
|
23
6
|
|
|
24
7
|
class Number < Backreference::Base
|
|
25
8
|
attr_reader :number
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
class CharacterSet < Regexp::Expression::Subexpression
|
|
3
5
|
attr_accessor :closed, :negative
|
|
4
|
-
|
|
5
|
-
alias :negative? :negative
|
|
6
|
-
alias :negated? :negative
|
|
7
|
-
alias :closed? :closed
|
|
6
|
+
alias :closed? :closed
|
|
8
7
|
|
|
9
8
|
def initialize(token, options = {})
|
|
10
9
|
self.negative = false
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Conditional
|
|
3
5
|
class TooManyBranches < Regexp::Parser::Error
|
|
@@ -7,26 +9,17 @@ module Regexp::Expression
|
|
|
7
9
|
end
|
|
8
10
|
|
|
9
11
|
class Condition < Regexp::Expression::Base
|
|
10
|
-
attr_accessor :referenced_expression
|
|
11
|
-
|
|
12
12
|
# Name or number of the referenced capturing group that determines state.
|
|
13
13
|
# Returns a String if reference is by name, Integer if by number.
|
|
14
14
|
def reference
|
|
15
15
|
ref = text.tr("'<>()", "")
|
|
16
16
|
ref =~ /\D/ ? ref : Integer(ref)
|
|
17
17
|
end
|
|
18
|
-
|
|
19
|
-
def initialize_copy(orig)
|
|
20
|
-
self.referenced_expression = orig.referenced_expression.dup
|
|
21
|
-
super
|
|
22
|
-
end
|
|
23
18
|
end
|
|
24
19
|
|
|
25
20
|
class Branch < Regexp::Expression::Sequence; end
|
|
26
21
|
|
|
27
22
|
class Expression < Regexp::Expression::Subexpression
|
|
28
|
-
attr_accessor :referenced_expression
|
|
29
|
-
|
|
30
23
|
def <<(exp)
|
|
31
24
|
expressions.last << exp
|
|
32
25
|
end
|
|
@@ -54,11 +47,6 @@ module Regexp::Expression
|
|
|
54
47
|
def reference
|
|
55
48
|
condition.reference
|
|
56
49
|
end
|
|
57
|
-
|
|
58
|
-
def initialize_copy(orig)
|
|
59
|
-
self.referenced_expression = orig.referenced_expression.dup
|
|
60
|
-
super
|
|
61
|
-
end
|
|
62
50
|
end
|
|
63
51
|
end
|
|
64
52
|
end
|
|
@@ -1,100 +1,31 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module EscapeSequence
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
class AsciiEscape < EscapeSequence::Base; end
|
|
28
|
-
class Backspace < EscapeSequence::Base; end
|
|
29
|
-
class Bell < EscapeSequence::Base; end
|
|
30
|
-
class FormFeed < EscapeSequence::Base; end
|
|
31
|
-
class Newline < EscapeSequence::Base; end
|
|
32
|
-
class Return < EscapeSequence::Base; end
|
|
33
|
-
class Tab < EscapeSequence::Base; end
|
|
34
|
-
class VerticalTab < EscapeSequence::Base; end
|
|
35
|
-
|
|
36
|
-
class Hex < EscapeSequence::Base; end
|
|
37
|
-
class Codepoint < EscapeSequence::Base; end
|
|
38
|
-
|
|
39
|
-
class CodepointList < EscapeSequence::Base
|
|
40
|
-
def char
|
|
41
|
-
raise NoMethodError, 'CodepointList responds only to #chars'
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
def codepoint
|
|
45
|
-
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
def chars
|
|
49
|
-
codepoints.map { |cp| cp.chr('utf-8') }
|
|
50
|
-
end
|
|
51
|
-
|
|
52
|
-
def codepoints
|
|
53
|
-
text.scan(/\h+/).map(&:hex)
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
class Octal < EscapeSequence::Base
|
|
58
|
-
def char
|
|
59
|
-
text[1..-1].to_i(8).chr('utf-8')
|
|
60
|
-
end
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
class AbstractMetaControlSequence < EscapeSequence::Base
|
|
64
|
-
def char
|
|
65
|
-
codepoint.chr('utf-8')
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
private
|
|
69
|
-
|
|
70
|
-
def control_sequence_to_s(control_sequence)
|
|
71
|
-
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
|
72
|
-
["000#{five_lsb}"].pack('B*')
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
def meta_char_to_codepoint(meta_char)
|
|
76
|
-
byte_value = meta_char.ord
|
|
77
|
-
byte_value < 128 ? byte_value + 128 : byte_value
|
|
78
|
-
end
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
class Control < AbstractMetaControlSequence
|
|
82
|
-
def codepoint
|
|
83
|
-
control_sequence_to_s(text).ord
|
|
84
|
-
end
|
|
85
|
-
end
|
|
86
|
-
|
|
87
|
-
class Meta < AbstractMetaControlSequence
|
|
88
|
-
def codepoint
|
|
89
|
-
meta_char_to_codepoint(text[-1])
|
|
90
|
-
end
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
class MetaControl < AbstractMetaControlSequence
|
|
94
|
-
def codepoint
|
|
95
|
-
meta_char_to_codepoint(control_sequence_to_s(text))
|
|
96
|
-
end
|
|
97
|
-
end
|
|
5
|
+
Base = Class.new(Regexp::Expression::Base)
|
|
6
|
+
|
|
7
|
+
AsciiEscape = Class.new(Base) # \e
|
|
8
|
+
Backspace = Class.new(Base) # \b
|
|
9
|
+
Bell = Class.new(Base) # \a
|
|
10
|
+
FormFeed = Class.new(Base) # \f
|
|
11
|
+
Newline = Class.new(Base) # \n
|
|
12
|
+
Return = Class.new(Base) # \r
|
|
13
|
+
Tab = Class.new(Base) # \t
|
|
14
|
+
VerticalTab = Class.new(Base) # \v
|
|
15
|
+
|
|
16
|
+
Literal = Class.new(Base) # e.g. \j, \@, \😀 (ineffectual escapes)
|
|
17
|
+
|
|
18
|
+
Octal = Class.new(Base) # e.g. \012
|
|
19
|
+
Hex = Class.new(Base) # e.g. \x0A
|
|
20
|
+
Codepoint = Class.new(Base) # e.g. \u000A
|
|
21
|
+
|
|
22
|
+
CodepointList = Class.new(Base) # e.g. \u{A B}
|
|
23
|
+
UTF8Hex = Class.new(Base) # e.g. \xE2\x82\xAC
|
|
24
|
+
|
|
25
|
+
AbstractMetaControlSequence = Class.new(Base)
|
|
26
|
+
Control = Class.new(AbstractMetaControlSequence) # e.g. \cB
|
|
27
|
+
Meta = Class.new(AbstractMetaControlSequence) # e.g. \M-Z
|
|
28
|
+
MetaControl = Class.new(AbstractMetaControlSequence) # e.g. \M-\cX
|
|
98
29
|
end
|
|
99
30
|
|
|
100
31
|
# alias for symmetry between Token::* and Expression::*
|
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module Keep
|
|
3
|
-
#
|
|
5
|
+
# TODO: in regexp_parser v3.0.0 this should possibly be a Subexpression
|
|
4
6
|
# that contains all expressions to its left.
|
|
5
7
|
class Mark < Regexp::Expression::Base; end
|
|
6
8
|
end
|
|
@@ -1,10 +1,8 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
module UnicodeProperty
|
|
3
5
|
class Base < Regexp::Expression::Base
|
|
4
|
-
def negative?
|
|
5
|
-
type == :nonproperty
|
|
6
|
-
end
|
|
7
|
-
|
|
8
6
|
def name
|
|
9
7
|
text[/\A\\[pP]\{([^}]+)\}\z/, 1]
|
|
10
8
|
end
|
|
@@ -109,11 +107,12 @@ module Regexp::Expression
|
|
|
109
107
|
class Unassigned < Codepoint::Base; end
|
|
110
108
|
end
|
|
111
109
|
|
|
112
|
-
class Age
|
|
113
|
-
class
|
|
114
|
-
class
|
|
115
|
-
class
|
|
116
|
-
class
|
|
110
|
+
class Age < UnicodeProperty::Base; end
|
|
111
|
+
class Block < UnicodeProperty::Base; end
|
|
112
|
+
class Derived < UnicodeProperty::Base; end
|
|
113
|
+
class Emoji < UnicodeProperty::Base; end
|
|
114
|
+
class Enumerated < UnicodeProperty::Base; end
|
|
115
|
+
class Script < UnicodeProperty::Base; end
|
|
117
116
|
end
|
|
118
117
|
|
|
119
118
|
# alias for symmetry between token symbol and Expression class name
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Regexp::Expression::EscapeSequence
|
|
4
|
+
AsciiEscape.class_eval { def codepoint; 0x1B end }
|
|
5
|
+
Backspace.class_eval { def codepoint; 0x8 end }
|
|
6
|
+
Bell.class_eval { def codepoint; 0x7 end }
|
|
7
|
+
FormFeed.class_eval { def codepoint; 0xC end }
|
|
8
|
+
Newline.class_eval { def codepoint; 0xA end }
|
|
9
|
+
Return.class_eval { def codepoint; 0xD end }
|
|
10
|
+
Tab.class_eval { def codepoint; 0x9 end }
|
|
11
|
+
VerticalTab.class_eval { def codepoint; 0xB end }
|
|
12
|
+
|
|
13
|
+
Literal.class_eval { def codepoint; text[1].ord end }
|
|
14
|
+
|
|
15
|
+
Octal.class_eval { def codepoint; text[/\d+/].to_i(8) end }
|
|
16
|
+
|
|
17
|
+
Hex.class_eval { def codepoint; text[/\h+/].hex end }
|
|
18
|
+
Codepoint.class_eval { def codepoint; text[/\h+/].hex end }
|
|
19
|
+
|
|
20
|
+
UTF8Hex.class_eval do
|
|
21
|
+
def codepoint
|
|
22
|
+
text.scan(/\h+/).map(&:hex).pack('C*').force_encoding('utf-8').ord
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
CodepointList.class_eval do
|
|
27
|
+
# Maybe this should be a unique top-level expression class?
|
|
28
|
+
def char
|
|
29
|
+
raise NoMethodError, 'CodepointList responds only to #chars'
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
def codepoint
|
|
33
|
+
raise NoMethodError, 'CodepointList responds only to #codepoints'
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def chars
|
|
37
|
+
codepoints.map { |cp| cp.chr('utf-8') }
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def codepoints
|
|
41
|
+
text.scan(/\h+/).map(&:hex)
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
AbstractMetaControlSequence.class_eval do
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
def control_sequence_to_s(control_sequence)
|
|
49
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
|
50
|
+
["000#{five_lsb}"].pack('B*')
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
def meta_char_to_codepoint(meta_char)
|
|
54
|
+
byte_value = meta_char.ord
|
|
55
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
Control.class_eval do
|
|
60
|
+
def codepoint
|
|
61
|
+
control_sequence_to_s(text).ord
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
Meta.class_eval do
|
|
66
|
+
def codepoint
|
|
67
|
+
meta_char_to_codepoint(text[-1])
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
MetaControl.class_eval do
|
|
72
|
+
def codepoint
|
|
73
|
+
meta_char_to_codepoint(control_sequence_to_s(text))
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Regexp::Expression
|
|
4
|
+
module Shared
|
|
5
|
+
def negative?
|
|
6
|
+
false
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
# not an alias so as to respect overrides of #negative?
|
|
10
|
+
def negated?
|
|
11
|
+
negative?
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
Anchor::NonWordBoundary.class_eval { def negative?; true end }
|
|
16
|
+
Assertion::NegativeLookahead.class_eval { def negative?; true end }
|
|
17
|
+
Assertion::NegativeLookbehind.class_eval { def negative?; true end }
|
|
18
|
+
CharacterSet.class_eval { def negative?; negative end }
|
|
19
|
+
CharacterType::Base.class_eval { def negative?; token.to_s.start_with?('non') end }
|
|
20
|
+
PosixClass.class_eval { def negative?; type == :nonposixclass end }
|
|
21
|
+
UnicodeProperty::Base.class_eval { def negative?; type == :nonproperty end }
|
|
22
|
+
end
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Regexp::Expression
|
|
4
|
+
module ReferencedExpressions
|
|
5
|
+
attr_accessor :referenced_expressions
|
|
6
|
+
|
|
7
|
+
def referenced_expression
|
|
8
|
+
referenced_expressions && referenced_expressions.first
|
|
9
|
+
end
|
|
10
|
+
|
|
11
|
+
def initialize_copy(orig)
|
|
12
|
+
exp_id = [self.class, self.starts_at]
|
|
13
|
+
|
|
14
|
+
# prevent infinite recursion for recursive subexp calls
|
|
15
|
+
copied = self.class.instance_eval { @copied_ref_exps ||= {} }
|
|
16
|
+
self.referenced_expressions =
|
|
17
|
+
if copied[exp_id]
|
|
18
|
+
orig.referenced_expressions
|
|
19
|
+
else
|
|
20
|
+
copied[exp_id] = true
|
|
21
|
+
orig.referenced_expressions && orig.referenced_expressions.map(&:dup)
|
|
22
|
+
end
|
|
23
|
+
copied.clear
|
|
24
|
+
|
|
25
|
+
super
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
Base.include ReferencedExpressions
|
|
30
|
+
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
module Regexp::Expression
|
|
2
4
|
# TODO: in v3.0.0, maybe put Shared back into Base, and inherit from Base and
|
|
3
5
|
# call super in #initialize, but raise in #quantifier= and #quantify,
|
|
@@ -6,7 +8,7 @@ module Regexp::Expression
|
|
|
6
8
|
class Quantifier
|
|
7
9
|
include Regexp::Expression::Shared
|
|
8
10
|
|
|
9
|
-
MODES = %i[greedy possessive reluctant]
|
|
11
|
+
MODES = %i[greedy possessive reluctant].freeze
|
|
10
12
|
|
|
11
13
|
def initialize(*args)
|
|
12
14
|
deprecated_old_init(*args) and return if args.count == 4 || args.count == 5
|