regexp_parser 2.11.0 → 2.11.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Rakefile +5 -3
- data/lib/regexp_parser/error.rb +2 -0
- data/lib/regexp_parser/expression/base.rb +2 -0
- data/lib/regexp_parser/expression/classes/alternation.rb +2 -0
- data/lib/regexp_parser/expression/classes/anchor.rb +2 -0
- data/lib/regexp_parser/expression/classes/backreference.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set/intersection.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set/range.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_set.rb +2 -0
- data/lib/regexp_parser/expression/classes/character_type.rb +2 -0
- data/lib/regexp_parser/expression/classes/conditional.rb +2 -0
- data/lib/regexp_parser/expression/classes/escape_sequence.rb +2 -0
- data/lib/regexp_parser/expression/classes/free_space.rb +2 -0
- data/lib/regexp_parser/expression/classes/group.rb +2 -0
- data/lib/regexp_parser/expression/classes/keep.rb +2 -0
- data/lib/regexp_parser/expression/classes/literal.rb +2 -0
- data/lib/regexp_parser/expression/classes/posix_class.rb +2 -0
- data/lib/regexp_parser/expression/classes/root.rb +2 -0
- data/lib/regexp_parser/expression/classes/unicode_property.rb +2 -0
- data/lib/regexp_parser/expression/methods/construct.rb +2 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_char.rb +2 -0
- data/lib/regexp_parser/expression/methods/escape_sequence_codepoint.rb +2 -0
- data/lib/regexp_parser/expression/methods/human_name.rb +2 -0
- data/lib/regexp_parser/expression/methods/match.rb +2 -0
- data/lib/regexp_parser/expression/methods/match_length.rb +2 -0
- data/lib/regexp_parser/expression/methods/negative.rb +2 -0
- data/lib/regexp_parser/expression/methods/options.rb +2 -0
- data/lib/regexp_parser/expression/methods/parts.rb +2 -0
- data/lib/regexp_parser/expression/methods/printing.rb +2 -0
- data/lib/regexp_parser/expression/methods/referenced_expressions.rb +2 -0
- data/lib/regexp_parser/expression/methods/strfregexp.rb +2 -0
- data/lib/regexp_parser/expression/methods/tests.rb +2 -0
- data/lib/regexp_parser/expression/methods/traverse.rb +2 -0
- data/lib/regexp_parser/expression/quantifier.rb +3 -1
- data/lib/regexp_parser/expression/sequence.rb +2 -0
- data/lib/regexp_parser/expression/sequence_operation.rb +2 -0
- data/lib/regexp_parser/expression/shared.rb +6 -3
- data/lib/regexp_parser/expression/subexpression.rb +2 -0
- data/lib/regexp_parser/expression.rb +2 -0
- data/lib/regexp_parser/lexer.rb +2 -0
- data/lib/regexp_parser/parser.rb +2 -0
- data/lib/regexp_parser/scanner/errors/premature_end_error.rb +2 -0
- data/lib/regexp_parser/scanner/errors/scanner_error.rb +2 -0
- data/lib/regexp_parser/scanner/errors/validation_error.rb +2 -0
- data/lib/regexp_parser/scanner/properties/long.csv +19 -0
- data/lib/regexp_parser/scanner/properties/short.csv +8 -0
- data/lib/regexp_parser/scanner/scanner.rl +15 -6
- data/lib/regexp_parser/scanner.rb +17 -8
- data/lib/regexp_parser/syntax/any.rb +2 -0
- data/lib/regexp_parser/syntax/base.rb +2 -0
- data/lib/regexp_parser/syntax/token/anchor.rb +5 -3
- data/lib/regexp_parser/syntax/token/assertion.rb +4 -2
- data/lib/regexp_parser/syntax/token/backreference.rb +8 -6
- data/lib/regexp_parser/syntax/token/character_set.rb +3 -1
- data/lib/regexp_parser/syntax/token/character_type.rb +6 -4
- data/lib/regexp_parser/syntax/token/conditional.rb +5 -3
- data/lib/regexp_parser/syntax/token/escape.rb +9 -7
- data/lib/regexp_parser/syntax/token/group.rb +8 -6
- data/lib/regexp_parser/syntax/token/keep.rb +3 -1
- data/lib/regexp_parser/syntax/token/meta.rb +4 -2
- data/lib/regexp_parser/syntax/token/posix_class.rb +4 -2
- data/lib/regexp_parser/syntax/token/quantifier.rb +8 -6
- data/lib/regexp_parser/syntax/token/unicode_property.rb +62 -47
- data/lib/regexp_parser/syntax/token/virtual.rb +5 -3
- data/lib/regexp_parser/syntax/token.rb +5 -3
- data/lib/regexp_parser/syntax/version_lookup.rb +4 -2
- data/lib/regexp_parser/syntax/versions/1.8.6.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/1.9.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.0.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.3.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.4.1.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.5.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.2.rb +2 -0
- data/lib/regexp_parser/syntax/versions/2.6.3.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.1.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.2.0.rb +2 -0
- data/lib/regexp_parser/syntax/versions/3.5.0.rb +4 -0
- data/lib/regexp_parser/syntax/versions.rb +2 -0
- data/lib/regexp_parser/syntax.rb +2 -0
- data/lib/regexp_parser/token.rb +2 -0
- data/lib/regexp_parser/version.rb +3 -1
- data/lib/regexp_parser.rb +2 -0
- data/regexp_parser.gemspec +2 -0
- metadata +4 -6
@@ -1,9 +1,9 @@
|
|
1
|
-
# -*- warn-indent:false
|
1
|
+
# -*- frozen_string_literal: true; warn-indent: false -*-
|
2
2
|
#
|
3
3
|
# THIS IS A GENERATED FILE, DO NOT EDIT DIRECTLY
|
4
4
|
#
|
5
5
|
# This file was generated from scanner.rl
|
6
|
-
# by running
|
6
|
+
# by running `$ bundle exec rake ragel`
|
7
7
|
|
8
8
|
|
9
9
|
require_relative 'scanner/errors/scanner_error'
|
@@ -28,7 +28,7 @@ class Regexp::Scanner
|
|
28
28
|
|
29
29
|
input = input_object.is_a?(Regexp) ? input_object.source : input_object
|
30
30
|
self.free_spacing = free_spacing?(input_object, options)
|
31
|
-
self.regexp_encoding = input_object
|
31
|
+
self.regexp_encoding = extract_encoding(input_object, options)
|
32
32
|
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]
|
33
33
|
|
34
34
|
data = input.unpack("c*")
|
@@ -1464,7 +1464,7 @@ p = p - 1; begin
|
|
1464
1464
|
# If not enough groups have been opened, there is a fallback to either an
|
1465
1465
|
# octal or literal interpretation for 2+ digit numerical escapes.
|
1466
1466
|
digits = text[1..-1]
|
1467
|
-
if digits.size == 1 || digits.to_i <=
|
1467
|
+
if digits.size == 1 || digits.to_i <= capturing_group_count
|
1468
1468
|
emit(:backref, :number, text)
|
1469
1469
|
elsif digits =~ /\A[0-7]{2,}\z/
|
1470
1470
|
emit(:escape, :octal, text)
|
@@ -1888,7 +1888,7 @@ p = p - 1; begin
|
|
1888
1888
|
begin
|
1889
1889
|
te = p
|
1890
1890
|
p = p - 1; begin
|
1891
|
-
self.capturing_group_count
|
1891
|
+
self.capturing_group_count = capturing_group_count + 1
|
1892
1892
|
text = copy(data, ts, te)
|
1893
1893
|
emit(:group, :capture, text)
|
1894
1894
|
end
|
@@ -2382,9 +2382,11 @@ end
|
|
2382
2382
|
File.read("#{__dir__}/scanner/properties/#{name}.csv").scan(/(.+),(.+)/).to_h
|
2383
2383
|
end
|
2384
2384
|
|
2385
|
+
# Use each_with_object for required_ruby_version >= 2.2,or #to_h for >= 2.6
|
2385
2386
|
POSIX_CLASSES =
|
2386
2387
|
%w[alnum alpha ascii blank cntrl digit graph
|
2387
|
-
lower print punct space upper word xdigit]
|
2388
|
+
lower print punct space upper word xdigit]
|
2389
|
+
.inject({}) { |o, e| o.merge(e => true) }.freeze
|
2388
2390
|
|
2389
2391
|
# Emits an array with the details of the scanned pattern
|
2390
2392
|
def emit(type, token, text)
|
@@ -2412,7 +2414,7 @@ end
|
|
2412
2414
|
end
|
2413
2415
|
end
|
2414
2416
|
|
2415
|
-
attr_accessor :literal_run # only public for #||= to work on ruby <= 2.5
|
2417
|
+
attr_accessor :capturing_group_count, :literal_run # only public for #||= to work on ruby <= 2.5
|
2416
2418
|
|
2417
2419
|
private
|
2418
2420
|
|
@@ -2421,9 +2423,16 @@ end
|
|
2421
2423
|
:free_spacing, :spacing_stack,
|
2422
2424
|
:regexp_encoding,
|
2423
2425
|
:group_depth, :set_depth, :conditional_stack,
|
2424
|
-
:capturing_group_count,
|
2425
2426
|
:char_pos
|
2426
2427
|
|
2428
|
+
def extract_encoding(input_object, options)
|
2429
|
+
if input_object.is_a?(::Regexp)
|
2430
|
+
input_object.encoding
|
2431
|
+
elsif options && (options & Regexp::NOENCODING)
|
2432
|
+
Encoding::BINARY
|
2433
|
+
end
|
2434
|
+
end
|
2435
|
+
|
2427
2436
|
def free_spacing?(input_object, options)
|
2428
2437
|
if options && !input_object.is_a?(String)
|
2429
2438
|
raise ArgumentError, 'options cannot be supplied unless scanning a String'
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Anchor
|
4
|
-
Basic = %i[bol eol]
|
6
|
+
Basic = %i[bol eol].freeze
|
5
7
|
Extended = Basic + %i[word_boundary nonword_boundary]
|
6
|
-
String = %i[bos eos eos_ob_eol]
|
7
|
-
MatchStart = %i[match_start]
|
8
|
+
String = %i[bos eos eos_ob_eol].freeze
|
9
|
+
MatchStart = %i[match_start].freeze
|
8
10
|
|
9
11
|
All = Extended + String + MatchStart
|
10
12
|
Type = :anchor
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Assertion
|
4
|
-
Lookahead = %i[lookahead nlookahead]
|
5
|
-
Lookbehind = %i[lookbehind nlookbehind]
|
6
|
+
Lookahead = %i[lookahead nlookahead].freeze
|
7
|
+
Lookbehind = %i[lookbehind nlookbehind].freeze
|
6
8
|
|
7
9
|
All = Lookahead + Lookbehind
|
8
10
|
Type = :assertion
|
@@ -1,12 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Backreference
|
4
|
-
Plain = %i[number]
|
5
|
-
NumberRef = %i[number_ref number_rel_ref]
|
6
|
+
Plain = %i[number].freeze
|
7
|
+
NumberRef = %i[number_ref number_rel_ref].freeze
|
6
8
|
Number = Plain + NumberRef
|
7
|
-
Name = %i[name_ref]
|
9
|
+
Name = %i[name_ref].freeze
|
8
10
|
|
9
|
-
RecursionLevel = %i[name_recursion_ref number_recursion_ref]
|
11
|
+
RecursionLevel = %i[name_recursion_ref number_recursion_ref].freeze
|
10
12
|
|
11
13
|
V1_8_6 = Plain
|
12
14
|
|
@@ -18,8 +20,8 @@ module Regexp::Syntax
|
|
18
20
|
|
19
21
|
# Type is the same as Backreference so keeping it here, for now.
|
20
22
|
module SubexpressionCall
|
21
|
-
Name = %i[name_call]
|
22
|
-
Number = %i[number_call number_rel_call]
|
23
|
+
Name = %i[name_call].freeze
|
24
|
+
Number = %i[number_call number_rel_call].freeze
|
23
25
|
|
24
26
|
All = Name + Number
|
25
27
|
end
|
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module CharacterType
|
4
|
-
Basic = []
|
5
|
-
Extended = %i[digit nondigit space nonspace word nonword]
|
6
|
-
Hex = %i[hex nonhex]
|
6
|
+
Basic = [].freeze
|
7
|
+
Extended = %i[digit nondigit space nonspace word nonword].freeze
|
8
|
+
Hex = %i[hex nonhex].freeze
|
7
9
|
|
8
|
-
Clustered = %i[linebreak xgrapheme]
|
10
|
+
Clustered = %i[linebreak xgrapheme].freeze
|
9
11
|
|
10
12
|
All = Basic + Extended + Hex + Clustered
|
11
13
|
Type = :type
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Conditional
|
4
|
-
Delimiters = %i[open close]
|
6
|
+
Delimiters = %i[open close].freeze
|
5
7
|
|
6
|
-
Condition = %i[condition_open condition condition_close]
|
7
|
-
Separator = %i[separator]
|
8
|
+
Condition = %i[condition_open condition condition_close].freeze
|
9
|
+
Separator = %i[separator].freeze
|
8
10
|
|
9
11
|
All = Conditional::Delimiters + Conditional::Condition + Conditional::Separator
|
10
12
|
|
@@ -1,25 +1,27 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Escape
|
4
|
-
Basic = %i[backslash literal]
|
6
|
+
Basic = %i[backslash literal].freeze
|
5
7
|
|
6
|
-
Control = %i[control meta_sequence]
|
8
|
+
Control = %i[control meta_sequence].freeze
|
7
9
|
|
8
10
|
ASCII = %i[bell backspace escape form_feed newline carriage
|
9
|
-
tab vertical_tab]
|
11
|
+
tab vertical_tab].freeze
|
10
12
|
|
11
|
-
Unicode = %i[codepoint codepoint_list]
|
13
|
+
Unicode = %i[codepoint codepoint_list].freeze
|
12
14
|
|
13
15
|
Meta = %i[dot alternation
|
14
16
|
zero_or_one zero_or_more one_or_more
|
15
17
|
bol eol
|
16
18
|
group_open group_close
|
17
19
|
interval_open interval_close
|
18
|
-
set_open set_close]
|
20
|
+
set_open set_close].freeze
|
19
21
|
|
20
|
-
Hex = %i[hex utf8_hex]
|
22
|
+
Hex = %i[hex utf8_hex].freeze
|
21
23
|
|
22
|
-
Octal = %i[octal]
|
24
|
+
Octal = %i[octal].freeze
|
23
25
|
|
24
26
|
All = Basic + Control + ASCII + Unicode + Meta + Hex + Octal
|
25
27
|
Type = :escape
|
@@ -1,18 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Group
|
4
|
-
Basic = %i[capture close]
|
6
|
+
Basic = %i[capture close].freeze
|
5
7
|
Extended = Basic + %i[options options_switch]
|
6
8
|
|
7
|
-
Named = %i[named]
|
8
|
-
Atomic = %i[atomic]
|
9
|
-
Passive = %i[passive]
|
10
|
-
Comment = %i[comment]
|
9
|
+
Named = %i[named].freeze
|
10
|
+
Atomic = %i[atomic].freeze
|
11
|
+
Passive = %i[passive].freeze
|
12
|
+
Comment = %i[comment].freeze
|
11
13
|
|
12
14
|
V1_8_6 = Group::Extended + Group::Named + Group::Atomic +
|
13
15
|
Group::Passive + Group::Comment
|
14
16
|
|
15
|
-
V2_4_1 = %i[absence]
|
17
|
+
V2_4_1 = %i[absence].freeze
|
16
18
|
|
17
19
|
All = V1_8_6 + V2_4_1
|
18
20
|
Type = :group
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module PosixClass
|
4
6
|
Standard = %i[alnum alpha blank cntrl digit graph
|
5
|
-
lower print punct space upper xdigit]
|
7
|
+
lower print punct space upper xdigit].freeze
|
6
8
|
|
7
|
-
Extensions = %i[ascii word]
|
9
|
+
Extensions = %i[ascii word].freeze
|
8
10
|
|
9
11
|
All = Standard + Extensions
|
10
12
|
Type = :posixclass
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Quantifier
|
@@ -5,23 +7,23 @@ module Regexp::Syntax
|
|
5
7
|
zero_or_one
|
6
8
|
zero_or_more
|
7
9
|
one_or_more
|
8
|
-
]
|
10
|
+
].freeze
|
9
11
|
|
10
12
|
Reluctant = %i[
|
11
13
|
zero_or_one_reluctant
|
12
14
|
zero_or_more_reluctant
|
13
15
|
one_or_more_reluctant
|
14
|
-
]
|
16
|
+
].freeze
|
15
17
|
|
16
18
|
Possessive = %i[
|
17
19
|
zero_or_one_possessive
|
18
20
|
zero_or_more_possessive
|
19
21
|
one_or_more_possessive
|
20
|
-
]
|
22
|
+
].freeze
|
21
23
|
|
22
|
-
Interval = %i[interval]
|
23
|
-
IntervalReluctant = %i[interval_reluctant]
|
24
|
-
IntervalPossessive = %i[interval_possessive]
|
24
|
+
Interval = %i[interval].freeze
|
25
|
+
IntervalReluctant = %i[interval_reluctant].freeze
|
26
|
+
IntervalPossessive = %i[interval_possessive].freeze
|
25
27
|
|
26
28
|
IntervalAll = Interval + IntervalReluctant + IntervalPossessive
|
27
29
|
|
@@ -1,37 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module UnicodeProperty
|
4
6
|
all = proc { |name| constants.grep(/#{name}/).flat_map(&method(:const_get)) }
|
5
7
|
|
6
8
|
CharType_V1_9_0 = %i[alnum alpha ascii blank cntrl digit graph
|
7
|
-
lower print punct space upper word xdigit]
|
9
|
+
lower print punct space upper word xdigit].freeze
|
8
10
|
|
9
|
-
CharType_V2_5_0 = %i[xposixpunct]
|
11
|
+
CharType_V2_5_0 = %i[xposixpunct].freeze
|
10
12
|
|
11
|
-
POSIX = %i[any assigned newline]
|
13
|
+
POSIX = %i[any assigned newline].freeze
|
12
14
|
|
13
15
|
module Category
|
14
16
|
Letter = %i[letter uppercase_letter lowercase_letter
|
15
|
-
titlecase_letter modifier_letter other_letter]
|
17
|
+
titlecase_letter modifier_letter other_letter].freeze
|
16
18
|
|
17
19
|
Mark = %i[mark nonspacing_mark spacing_mark
|
18
|
-
enclosing_mark]
|
20
|
+
enclosing_mark].freeze
|
19
21
|
|
20
22
|
Number = %i[number decimal_number letter_number
|
21
|
-
other_number]
|
23
|
+
other_number].freeze
|
22
24
|
|
23
25
|
Punctuation = %i[punctuation connector_punctuation dash_punctuation
|
24
26
|
open_punctuation close_punctuation initial_punctuation
|
25
|
-
final_punctuation other_punctuation]
|
27
|
+
final_punctuation other_punctuation].freeze
|
26
28
|
|
27
29
|
Symbol = %i[symbol math_symbol currency_symbol
|
28
|
-
modifier_symbol other_symbol]
|
30
|
+
modifier_symbol other_symbol].freeze
|
29
31
|
|
30
32
|
Separator = %i[separator space_separator line_separator
|
31
|
-
paragraph_separator]
|
33
|
+
paragraph_separator].freeze
|
32
34
|
|
33
35
|
Codepoint = %i[other control format
|
34
|
-
surrogate private_use unassigned]
|
36
|
+
surrogate private_use unassigned].freeze
|
35
37
|
|
36
38
|
All = Letter + Mark + Number + Punctuation +
|
37
39
|
Symbol + Separator + Codepoint
|
@@ -39,27 +41,29 @@ module Regexp::Syntax
|
|
39
41
|
|
40
42
|
Age_V1_9_3 = %i[age=1.1 age=2.0 age=2.1 age=3.0 age=3.1
|
41
43
|
age=3.2 age=4.0 age=4.1 age=5.0 age=5.1
|
42
|
-
age=5.2 age=6.0]
|
44
|
+
age=5.2 age=6.0].freeze
|
45
|
+
|
46
|
+
Age_V2_0_0 = %i[age=6.1].freeze
|
43
47
|
|
44
|
-
|
48
|
+
Age_V2_2_0 = %i[age=6.2 age=6.3 age=7.0].freeze
|
45
49
|
|
46
|
-
|
50
|
+
Age_V2_3_0 = %i[age=8.0].freeze
|
47
51
|
|
48
|
-
|
52
|
+
Age_V2_4_0 = %i[age=9.0].freeze
|
49
53
|
|
50
|
-
|
54
|
+
Age_V2_5_0 = %i[age=10.0].freeze
|
51
55
|
|
52
|
-
|
56
|
+
Age_V2_6_0 = %i[age=11.0].freeze
|
53
57
|
|
54
|
-
|
58
|
+
Age_V2_6_2 = %i[age=12.0].freeze
|
55
59
|
|
56
|
-
|
60
|
+
Age_V2_6_3 = %i[age=12.1].freeze
|
57
61
|
|
58
|
-
|
62
|
+
Age_V3_1_0 = %i[age=13.0].freeze
|
59
63
|
|
60
|
-
|
64
|
+
Age_V3_2_0 = %i[age=14.0 age=15.0].freeze
|
61
65
|
|
62
|
-
|
66
|
+
Age_V3_5_0 = %i[age=15.1]
|
63
67
|
|
64
68
|
Age = all[:Age_V]
|
65
69
|
|
@@ -115,19 +119,25 @@ module Regexp::Syntax
|
|
115
119
|
white_space
|
116
120
|
xid_start
|
117
121
|
xid_continue
|
118
|
-
]
|
122
|
+
].freeze
|
119
123
|
|
120
124
|
Derived_V2_0_0 = %i[
|
121
125
|
cased_letter
|
122
126
|
combining_mark
|
123
|
-
]
|
127
|
+
].freeze
|
124
128
|
|
125
129
|
Derived_V2_4_0 = %i[
|
126
130
|
prepended_concatenation_mark
|
127
|
-
]
|
131
|
+
].freeze
|
128
132
|
|
129
133
|
Derived_V2_5_0 = %i[
|
130
134
|
regional_indicator
|
135
|
+
].freeze
|
136
|
+
|
137
|
+
Derived_V3_5_0 = %i[
|
138
|
+
id_compat_math_continue
|
139
|
+
id_compat_math_start
|
140
|
+
ids_unary_operator
|
131
141
|
]
|
132
142
|
|
133
143
|
Derived = all[:Derived_V]
|
@@ -226,13 +236,13 @@ module Regexp::Syntax
|
|
226
236
|
inherited
|
227
237
|
common
|
228
238
|
unknown
|
229
|
-
]
|
239
|
+
].freeze
|
230
240
|
|
231
241
|
Script_V1_9_3 = %i[
|
232
242
|
brahmi
|
233
243
|
batak
|
234
244
|
mandaic
|
235
|
-
]
|
245
|
+
].freeze
|
236
246
|
|
237
247
|
Script_V2_0_0 = %i[
|
238
248
|
chakma
|
@@ -242,7 +252,7 @@ module Regexp::Syntax
|
|
242
252
|
sharada
|
243
253
|
sora_sompeng
|
244
254
|
takri
|
245
|
-
]
|
255
|
+
].freeze
|
246
256
|
|
247
257
|
Script_V2_2_0 = %i[
|
248
258
|
caucasian_albanian
|
@@ -268,7 +278,7 @@ module Regexp::Syntax
|
|
268
278
|
khudawadi
|
269
279
|
tirhuta
|
270
280
|
warang_citi
|
271
|
-
]
|
281
|
+
].freeze
|
272
282
|
|
273
283
|
Script_V2_3_0 = %i[
|
274
284
|
ahom
|
@@ -277,7 +287,7 @@ module Regexp::Syntax
|
|
277
287
|
multani
|
278
288
|
old_hungarian
|
279
289
|
signwriting
|
280
|
-
]
|
290
|
+
].freeze
|
281
291
|
|
282
292
|
Script_V2_4_0 = %i[
|
283
293
|
adlam
|
@@ -286,14 +296,14 @@ module Regexp::Syntax
|
|
286
296
|
newa
|
287
297
|
osage
|
288
298
|
tangut
|
289
|
-
]
|
299
|
+
].freeze
|
290
300
|
|
291
301
|
Script_V2_5_0 = %i[
|
292
302
|
masaram_gondi
|
293
303
|
nushu
|
294
304
|
soyombo
|
295
305
|
zanabazar_square
|
296
|
-
]
|
306
|
+
].freeze
|
297
307
|
|
298
308
|
Script_V2_6_0 = %i[
|
299
309
|
dogra
|
@@ -303,21 +313,21 @@ module Regexp::Syntax
|
|
303
313
|
medefaidrin
|
304
314
|
old_sogdian
|
305
315
|
sogdian
|
306
|
-
]
|
316
|
+
].freeze
|
307
317
|
|
308
318
|
Script_V2_6_2 = %i[
|
309
319
|
elymaic
|
310
320
|
nandinagari
|
311
321
|
nyiakeng_puachue_hmong
|
312
322
|
wancho
|
313
|
-
]
|
323
|
+
].freeze
|
314
324
|
|
315
325
|
Script_V3_1_0 = %i[
|
316
326
|
chorasmian
|
317
327
|
dives_akuru
|
318
328
|
khitan_small_script
|
319
329
|
yezidi
|
320
|
-
]
|
330
|
+
].freeze
|
321
331
|
|
322
332
|
Script_V3_2_0 = %i[
|
323
333
|
cypro_minoan
|
@@ -327,7 +337,7 @@ module Regexp::Syntax
|
|
327
337
|
tangsa
|
328
338
|
toto
|
329
339
|
vithkuqi
|
330
|
-
]
|
340
|
+
].freeze
|
331
341
|
|
332
342
|
Script = all[:Script_V]
|
333
343
|
|
@@ -428,7 +438,7 @@ module Regexp::Syntax
|
|
428
438
|
in_yi_radicals
|
429
439
|
in_yi_syllables
|
430
440
|
in_yijing_hexagram_symbols
|
431
|
-
]
|
441
|
+
].freeze
|
432
442
|
|
433
443
|
UnicodeBlock_V2_0_0 = %i[
|
434
444
|
in_aegean_numbers
|
@@ -556,7 +566,7 @@ module Regexp::Syntax
|
|
556
566
|
in_variation_selectors_supplement
|
557
567
|
in_vedic_extensions
|
558
568
|
in_vertical_forms
|
559
|
-
]
|
569
|
+
].freeze
|
560
570
|
|
561
571
|
UnicodeBlock_V2_2_0 = %i[
|
562
572
|
in_bassa_vah
|
@@ -591,7 +601,7 @@ module Regexp::Syntax
|
|
591
601
|
in_supplemental_arrows_c
|
592
602
|
in_tirhuta
|
593
603
|
in_warang_citi
|
594
|
-
]
|
604
|
+
].freeze
|
595
605
|
|
596
606
|
UnicodeBlock_V2_3_0 = %i[
|
597
607
|
in_ahom
|
@@ -604,7 +614,7 @@ module Regexp::Syntax
|
|
604
614
|
in_old_hungarian
|
605
615
|
in_supplemental_symbols_and_pictographs
|
606
616
|
in_sutton_signwriting
|
607
|
-
]
|
617
|
+
].freeze
|
608
618
|
|
609
619
|
UnicodeBlock_V2_4_0 = %i[
|
610
620
|
in_adlam
|
@@ -618,7 +628,7 @@ module Regexp::Syntax
|
|
618
628
|
in_osage
|
619
629
|
in_tangut
|
620
630
|
in_tangut_components
|
621
|
-
]
|
631
|
+
].freeze
|
622
632
|
|
623
633
|
UnicodeBlock_V2_5_0 = %i[
|
624
634
|
in_cjk_unified_ideographs_extension_f
|
@@ -628,7 +638,7 @@ module Regexp::Syntax
|
|
628
638
|
in_soyombo
|
629
639
|
in_syriac_supplement
|
630
640
|
in_zanabazar_square
|
631
|
-
]
|
641
|
+
].freeze
|
632
642
|
|
633
643
|
UnicodeBlock_V2_6_0 = %i[
|
634
644
|
in_chess_symbols
|
@@ -642,7 +652,7 @@ module Regexp::Syntax
|
|
642
652
|
in_medefaidrin
|
643
653
|
in_old_sogdian
|
644
654
|
in_sogdian
|
645
|
-
]
|
655
|
+
].freeze
|
646
656
|
|
647
657
|
UnicodeBlock_V2_6_2 = %i[
|
648
658
|
in_egyptian_hieroglyph_format_controls
|
@@ -654,7 +664,7 @@ module Regexp::Syntax
|
|
654
664
|
in_symbols_and_pictographs_extended_a
|
655
665
|
in_tamil_supplement
|
656
666
|
in_wancho
|
657
|
-
]
|
667
|
+
].freeze
|
658
668
|
|
659
669
|
UnicodeBlock_V3_1_0 = %i[
|
660
670
|
in_chorasmian
|
@@ -665,7 +675,7 @@ module Regexp::Syntax
|
|
665
675
|
in_symbols_for_legacy_computing
|
666
676
|
in_tangut_supplement
|
667
677
|
in_yezidi
|
668
|
-
]
|
678
|
+
].freeze
|
669
679
|
|
670
680
|
UnicodeBlock_V3_2_0 = %i[
|
671
681
|
in_arabic_extended_b
|
@@ -687,6 +697,10 @@ module Regexp::Syntax
|
|
687
697
|
in_unified_canadian_aboriginal_syllabics_extended_a
|
688
698
|
in_vithkuqi
|
689
699
|
in_znamenny_musical_notation
|
700
|
+
].freeze
|
701
|
+
|
702
|
+
UnicodeBlock_V3_5_0 = %i[
|
703
|
+
in_cjk_unified_ideographs_extension_i
|
690
704
|
]
|
691
705
|
|
692
706
|
UnicodeBlock = all[:UnicodeBlock_V]
|
@@ -697,11 +711,11 @@ module Regexp::Syntax
|
|
697
711
|
emoji_modifier
|
698
712
|
emoji_modifier_base
|
699
713
|
emoji_presentation
|
700
|
-
]
|
714
|
+
].freeze
|
701
715
|
|
702
716
|
Emoji_V2_6_0 = %i[
|
703
717
|
extended_pictographic
|
704
|
-
]
|
718
|
+
].freeze
|
705
719
|
|
706
720
|
Enumerated_V2_4_0 = %i[
|
707
721
|
grapheme_cluster_break=control
|
@@ -717,7 +731,7 @@ module Regexp::Syntax
|
|
717
731
|
grapheme_cluster_break=t
|
718
732
|
grapheme_cluster_break=v
|
719
733
|
grapheme_cluster_break=zwj
|
720
|
-
]
|
734
|
+
].freeze
|
721
735
|
|
722
736
|
Enumerated = all[:Enumerated_V]
|
723
737
|
|
@@ -735,6 +749,7 @@ module Regexp::Syntax
|
|
735
749
|
V2_6_3 = all[:V2_6_3]
|
736
750
|
V3_1_0 = all[:V3_1_0]
|
737
751
|
V3_2_0 = all[:V3_2_0]
|
752
|
+
V3_5_0 = all[:V3_5_0]
|
738
753
|
|
739
754
|
All = all[/^V\d+_\d+_\d+$/]
|
740
755
|
|
@@ -1,10 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Regexp::Syntax
|
2
4
|
module Token
|
3
5
|
module Virtual
|
4
|
-
Root = %i[root]
|
5
|
-
Sequence = %i[sequence]
|
6
|
+
Root = %i[root].freeze
|
7
|
+
Sequence = %i[sequence].freeze
|
6
8
|
|
7
|
-
All = %i[root sequence]
|
9
|
+
All = %i[root sequence].freeze
|
8
10
|
Type = :expression
|
9
11
|
end
|
10
12
|
end
|