js_regex 3.12.0 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +3 -1
- data/lib/js_regex/converter/anchor_converter.rb +15 -2
- data/lib/js_regex/converter/assertion_converter.rb +2 -0
- data/lib/js_regex/converter/backreference_converter.rb +65 -6
- data/lib/js_regex/converter/base.rb +13 -3
- data/lib/js_regex/converter/conditional_converter.rb +2 -0
- data/lib/js_regex/converter/context.rb +38 -2
- data/lib/js_regex/converter/escape_converter.rb +5 -3
- data/lib/js_regex/converter/freespace_converter.rb +2 -0
- data/lib/js_regex/converter/group_converter.rb +7 -11
- data/lib/js_regex/converter/keep_converter.rb +2 -0
- data/lib/js_regex/converter/literal_converter.rb +5 -3
- data/lib/js_regex/converter/meta_converter.rb +2 -0
- data/lib/js_regex/converter/property_converter.rb +2 -0
- data/lib/js_regex/converter/set_converter.rb +17 -6
- data/lib/js_regex/converter/subexpression_converter.rb +2 -0
- data/lib/js_regex/converter/type_converter.rb +37 -4
- data/lib/js_regex/converter/unsupported_token_converter.rb +2 -0
- data/lib/js_regex/converter.rb +2 -0
- data/lib/js_regex/error.rb +2 -0
- data/lib/js_regex/node.rb +8 -1
- data/lib/js_regex/second_pass.rb +49 -14
- data/lib/js_regex/target.rb +2 -0
- data/lib/js_regex/version.rb +3 -1
- data/lib/js_regex.rb +2 -0
- metadata +7 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 81da8ab5dab32613d0486bb8b1d0c9926b1cd5dcf5cbf1dae7f7c7b86b695718
|
4
|
+
data.tar.gz: ad87e55f4c1834b237d8f18318fae065625e1e94f7645158ec4a7450967e5b69
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5b6e9d0eea5aa5656d3eaae3dd0ad769a21ef8a32d7304237afbe965266e8a8e8229c256a9110ce1573b2faf52ef112f354fcf84af61183b1f5aeb79035869f5
|
7
|
+
data.tar.gz: e546194d186650cc46aed54b73d92e6320251cb2954e213859fe3e5d4e06e04f502f8e0b5fe7a331db12578b59678484e184485ce4397dd61391677fe4766cd2
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
#
|
3
5
|
# This class acts as a facade, passing a Regexp to the Converters.
|
@@ -37,7 +39,7 @@ class JsRegex
|
|
37
39
|
end
|
38
40
|
|
39
41
|
def convert_options(input, custom_options, required_options)
|
40
|
-
options = custom_options.to_s.scan(/[
|
42
|
+
options = custom_options.to_s.scan(/[dgimsuvy]/) + required_options
|
41
43
|
if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
|
42
44
|
options << 'i'
|
43
45
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -10,8 +12,10 @@ class JsRegex
|
|
10
12
|
|
11
13
|
def convert_data
|
12
14
|
case subtype
|
13
|
-
when :bol
|
14
|
-
when :
|
15
|
+
when :bol then convert_bol
|
16
|
+
when :bos then '^'
|
17
|
+
when :eol then '(?=$|\n)'
|
18
|
+
when :eos then '$'
|
15
19
|
when :eos_ob_eol then '(?=\n?$)'
|
16
20
|
when :word_boundary then convert_boundary
|
17
21
|
when :nonword_boundary then convert_nonboundary
|
@@ -20,6 +24,15 @@ class JsRegex
|
|
20
24
|
end
|
21
25
|
end
|
22
26
|
|
27
|
+
def convert_bol
|
28
|
+
if context.es_2018_or_higher?
|
29
|
+
'(?<=^|\n(?!$))'
|
30
|
+
else
|
31
|
+
# TODO: warn in v4.0.0, or drop ES2009 & ES2015 support
|
32
|
+
'^'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
23
36
|
def convert_boundary
|
24
37
|
if context.es_2018_or_higher? && context.enable_u_option
|
25
38
|
BOUNDARY_EXPANSION
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -19,17 +21,39 @@ class JsRegex
|
|
19
21
|
end
|
20
22
|
|
21
23
|
def convert_name_ref
|
22
|
-
if
|
23
|
-
|
24
|
-
|
24
|
+
# Check if this is a multiplexed named group reference
|
25
|
+
if expression.referenced_expressions.count > 1
|
26
|
+
convert_multiplexed_name_ref
|
25
27
|
else
|
28
|
+
# Always use numeric backrefs since we convert all named groups to numbered
|
29
|
+
# (see comment in GroupConverter)
|
26
30
|
convert_to_plain_num_ref
|
27
31
|
end
|
28
32
|
end
|
29
33
|
|
30
34
|
def convert_to_plain_num_ref
|
31
35
|
position = new_position
|
32
|
-
|
36
|
+
|
37
|
+
# Check if this backreference refers to a group that was recursively called
|
38
|
+
original_group = target_position
|
39
|
+
if (recursive_position = context.get_recursive_group_position(original_group))
|
40
|
+
# Use the position of the group created by the recursive call
|
41
|
+
position = recursive_position
|
42
|
+
end
|
43
|
+
|
44
|
+
text = "\\#{position}#{'(?:)' if expression.x?}"
|
45
|
+
Node.new(text, reference: position, type: :backref)
|
46
|
+
end
|
47
|
+
|
48
|
+
def convert_multiplexed_name_ref
|
49
|
+
# Create alternation of all groups with the same name
|
50
|
+
positions = expression.referenced_expressions.map do |ref_exp|
|
51
|
+
context.new_capturing_group_position(ref_exp.number)
|
52
|
+
end
|
53
|
+
|
54
|
+
# Build alternation like (?:\1|\2)
|
55
|
+
alternation = positions.map { |pos| "\\#{pos}" }.join('|')
|
56
|
+
Node.new("(?:#{alternation})")
|
33
57
|
end
|
34
58
|
|
35
59
|
def new_position
|
@@ -43,18 +67,53 @@ class JsRegex
|
|
43
67
|
def convert_call
|
44
68
|
if context.recursions(expression) >= 5
|
45
69
|
warn_of("Recursion for '#{expression}' curtailed at 5 levels")
|
46
|
-
return
|
70
|
+
return drop
|
47
71
|
end
|
48
72
|
|
49
73
|
context.count_recursion(expression)
|
74
|
+
|
75
|
+
# Track groups before the wrapper group is added
|
76
|
+
groups_before_wrapper = context.capturing_group_count
|
77
|
+
|
50
78
|
context.increment_local_capturing_group_count
|
51
79
|
target_copy = expression.referenced_expression.unquantified_clone
|
52
80
|
# avoid "Duplicate capture group name" error in JS
|
53
81
|
target_copy.token = :capture if target_copy.is?(:named, :group)
|
82
|
+
context.start_subexp_recursion
|
54
83
|
result = convert_expression(target_copy)
|
55
|
-
|
84
|
+
context.end_subexp_recursion
|
85
|
+
|
86
|
+
# Track all groups created during this recursive call
|
87
|
+
# This handles both the directly called group and any nested groups within it
|
88
|
+
# Get all group numbers from the referenced expression
|
89
|
+
original_groups = collect_group_numbers(expression.referenced_expression)
|
90
|
+
|
91
|
+
# The first new group number is groups_before_wrapper + 1
|
92
|
+
# (the wrapper group from increment_local_capturing_group_count doesn't appear in output)
|
93
|
+
first_new_group = groups_before_wrapper + 1
|
94
|
+
|
95
|
+
# Map each original group to its corresponding new group
|
96
|
+
# For example, if we recursively called group 1 which contains group 2,
|
97
|
+
# and this created groups 3 and 4, then:
|
98
|
+
# - group 1 -> group 3
|
99
|
+
# - group 2 -> group 4
|
100
|
+
original_groups.each_with_index do |old_group_num, index|
|
101
|
+
new_group_num = first_new_group + index
|
102
|
+
context.track_recursive_group_call(old_group_num, new_group_num)
|
103
|
+
end
|
104
|
+
|
105
|
+
# wrap in passive group if it is a full-pattern recursion
|
56
106
|
expression.reference == 0 ? Node.new('(?:', result, ')') : result
|
57
107
|
end
|
108
|
+
|
109
|
+
def collect_group_numbers(exp)
|
110
|
+
return [] if exp.terminal?
|
111
|
+
|
112
|
+
numbers = []
|
113
|
+
numbers << exp.number if exp.capturing?
|
114
|
+
exp.each_expression { |sub| numbers += collect_group_numbers(sub) }
|
115
|
+
numbers
|
116
|
+
end
|
58
117
|
end
|
59
118
|
end
|
60
119
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
module Converter
|
3
5
|
#
|
@@ -44,7 +46,9 @@ class JsRegex
|
|
44
46
|
end
|
45
47
|
|
46
48
|
def convert_subexpressions
|
47
|
-
|
49
|
+
# mark alternation and conditional branches for processing in second pass
|
50
|
+
type = expression.is?(:sequence) ? :branch : :plain
|
51
|
+
Node.new(*expression.map { |subexp| convert_expression(subexp) }, type: type)
|
48
52
|
end
|
49
53
|
|
50
54
|
def convert_expression(expression)
|
@@ -78,9 +82,15 @@ class JsRegex
|
|
78
82
|
def wrap_in_backrefed_lookahead(content)
|
79
83
|
number = context.capturing_group_count + 1
|
80
84
|
backref_node = Node.new("\\#{number}", reference: number, type: :backref)
|
85
|
+
backrefed_group = Node.new('(', *content, ')', reference: number, type: :captured_group)
|
81
86
|
context.increment_local_capturing_group_count
|
82
|
-
#
|
83
|
-
|
87
|
+
# The surrounding group is added so that quantifiers apply to the whole.
|
88
|
+
# Without it, `(?:)` would need to be appended as literal digits may follow.
|
89
|
+
Node.new('(?:(?=', backrefed_group, ')', backref_node, ')')
|
90
|
+
end
|
91
|
+
|
92
|
+
def unmatchable_substitution
|
93
|
+
'(?!)'
|
84
94
|
end
|
85
95
|
end
|
86
96
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
module Converter
|
3
5
|
#
|
@@ -10,6 +12,7 @@ class JsRegex
|
|
10
12
|
:case_insensitive_root,
|
11
13
|
:fail_fast,
|
12
14
|
:in_atomic_group,
|
15
|
+
:in_subexp_recursion,
|
13
16
|
:warnings
|
14
17
|
|
15
18
|
def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
|
@@ -17,8 +20,10 @@ class JsRegex
|
|
17
20
|
self.capturing_group_count = 0
|
18
21
|
self.fail_fast = fail_fast
|
19
22
|
self.recursions_per_expression = {}
|
23
|
+
self.recursion_stack = []
|
20
24
|
self.required_options_hash = {}
|
21
25
|
self.warnings = []
|
26
|
+
self.recursive_group_map = {}
|
22
27
|
|
23
28
|
self.case_insensitive_root = case_insensitive_root
|
24
29
|
self.target = target
|
@@ -70,17 +75,34 @@ class JsRegex
|
|
70
75
|
end
|
71
76
|
|
72
77
|
def recursions(exp)
|
73
|
-
|
78
|
+
# Count recursions in the current stack path only
|
79
|
+
recursion_stack.count { |e| recursion_id(e) == recursion_id(exp) }
|
74
80
|
end
|
75
81
|
|
76
82
|
def count_recursion(exp)
|
77
|
-
|
83
|
+
recursion_stack.push(exp)
|
78
84
|
end
|
79
85
|
|
80
86
|
def recursion_id(exp)
|
81
87
|
[exp.class, exp.starts_at]
|
82
88
|
end
|
83
89
|
|
90
|
+
def start_subexp_recursion
|
91
|
+
self.in_subexp_recursion = true
|
92
|
+
self.recursion_start_group_count = capturing_group_count
|
93
|
+
end
|
94
|
+
|
95
|
+
def end_subexp_recursion
|
96
|
+
self.in_subexp_recursion = false
|
97
|
+
# Pop the last recursion from stack when exiting
|
98
|
+
recursion_stack.pop if recursion_stack.any?
|
99
|
+
end
|
100
|
+
|
101
|
+
# Get the number of groups at the start of the current recursion
|
102
|
+
def recursion_start_group_count
|
103
|
+
self.recursion_start_group_count || 0
|
104
|
+
end
|
105
|
+
|
84
106
|
# takes and returns 1-indexed group positions.
|
85
107
|
# new is different from old if capturing groups were added in between.
|
86
108
|
def new_capturing_group_position(old_position)
|
@@ -95,17 +117,31 @@ class JsRegex
|
|
95
117
|
capturing_group_count - total_added_capturing_groups
|
96
118
|
end
|
97
119
|
|
120
|
+
# Track that a group was created by a recursive call
|
121
|
+
def track_recursive_group_call(original_group_num, new_group_num)
|
122
|
+
recursive_group_map[original_group_num] = new_group_num
|
123
|
+
end
|
124
|
+
|
125
|
+
# Get the group number created by a recursive call
|
126
|
+
def get_recursive_group_position(original_group_num)
|
127
|
+
recursive_group_map[original_group_num]
|
128
|
+
end
|
129
|
+
|
98
130
|
private
|
99
131
|
|
100
132
|
attr_accessor :added_capturing_groups_after_group,
|
101
133
|
:recursions_per_expression,
|
134
|
+
:recursion_stack,
|
102
135
|
:required_options_hash,
|
136
|
+
:recursive_group_map,
|
103
137
|
:target
|
104
138
|
|
105
139
|
attr_writer :capturing_group_count,
|
106
140
|
:case_insensitive_root,
|
107
141
|
:fail_fast,
|
108
142
|
:in_atomic_group,
|
143
|
+
:in_subexp_recursion,
|
144
|
+
:recursion_start_group_count,
|
109
145
|
:warnings
|
110
146
|
|
111
147
|
def total_added_capturing_groups
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'literal_converter'
|
3
5
|
|
@@ -38,14 +40,14 @@ class JsRegex
|
|
38
40
|
case subtype
|
39
41
|
when :codepoint_list
|
40
42
|
convert_codepoint_list
|
41
|
-
when :control, :meta_sequence
|
43
|
+
when :control, :meta_sequence, :utf8_hex
|
42
44
|
unicode_escape_codepoint
|
43
45
|
when :literal
|
44
46
|
LiteralConverter.convert_data(expression.char, context)
|
47
|
+
when :bell, :escape, :hex, :octal
|
48
|
+
hex_escape_codepoint
|
45
49
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
46
50
|
pass_through
|
47
|
-
when :bell, :escape, :octal
|
48
|
-
hex_escape_codepoint
|
49
51
|
else
|
50
52
|
warn_of_unsupported_feature
|
51
53
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -22,12 +24,10 @@ class JsRegex
|
|
22
24
|
end
|
23
25
|
|
24
26
|
def build_named_group
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
build_group
|
30
|
-
end
|
27
|
+
# Always convert named groups to numbered groups. ES2018+ supports named
|
28
|
+
# groups, but can not handle repeated names in multiplexing or conditional
|
29
|
+
# expansion scenarios.
|
30
|
+
build_group
|
31
31
|
end
|
32
32
|
|
33
33
|
def emulate_atomic_group
|
@@ -69,10 +69,6 @@ class JsRegex
|
|
69
69
|
expression.empty?
|
70
70
|
end
|
71
71
|
|
72
|
-
def unmatchable_substitution
|
73
|
-
'(?!)'
|
74
|
-
end
|
75
|
-
|
76
72
|
def build_absence_group
|
77
73
|
head = "(?:(?:.|\\n){,#{expression.inner_match_length.min - 1}}|(?:(?!"
|
78
74
|
tail = ')(?:.|\n))*)'
|
@@ -84,7 +80,7 @@ class JsRegex
|
|
84
80
|
tail = opts[:tail] || ')'
|
85
81
|
return Node.new(*wrap(head, tail)) if opts[:capturing].equal?(false)
|
86
82
|
|
87
|
-
context.capture_group
|
83
|
+
context.capture_group unless context.in_subexp_recursion
|
88
84
|
ref = expression.number
|
89
85
|
Node.new(*wrap(head, tail), reference: ref, type: :captured_group)
|
90
86
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -6,8 +8,8 @@ class JsRegex
|
|
6
8
|
# Template class implementation.
|
7
9
|
#
|
8
10
|
class LiteralConverter < JsRegex::Converter::Base
|
9
|
-
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]
|
10
|
-
LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]
|
11
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/.freeze
|
12
|
+
LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/.freeze
|
11
13
|
|
12
14
|
class << self
|
13
15
|
def convert_data(data, context)
|
@@ -59,7 +61,7 @@ class JsRegex
|
|
59
61
|
result
|
60
62
|
end
|
61
63
|
|
62
|
-
HAS_CASE_PATTERN = /[\p{lower}\p{upper}]
|
64
|
+
HAS_CASE_PATTERN = /[\p{lower}\p{upper}]/.freeze
|
63
65
|
|
64
66
|
def handle_locally_case_insensitive_literal(literal)
|
65
67
|
literal =~ HAS_CASE_PATTERN ? case_insensitivize(literal) : literal
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'escape_converter'
|
3
5
|
require_relative 'type_converter'
|
@@ -41,11 +43,7 @@ class JsRegex
|
|
41
43
|
def simple_convert_child(exp)
|
42
44
|
case exp.type
|
43
45
|
when :literal
|
44
|
-
|
45
|
-
exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
|
46
|
-
!context.enable_u_option
|
47
|
-
|
48
|
-
LiteralConverter.escape_incompatible_bmp_literals(exp.text)
|
46
|
+
simple_convert_literal_child(exp)
|
49
47
|
when :set
|
50
48
|
# full conversion is needed for nested sets and intersections
|
51
49
|
exp.token.equal?(:range) && exp.expressions.map do |op|
|
@@ -67,7 +65,20 @@ class JsRegex
|
|
67
65
|
end
|
68
66
|
end
|
69
67
|
|
70
|
-
|
68
|
+
def simple_convert_literal_child(exp)
|
69
|
+
if !context.u? &&
|
70
|
+
exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
|
71
|
+
!context.enable_u_option
|
72
|
+
false
|
73
|
+
elsif SET_LITERALS_REQUIRING_ESCAPE_PATTERN.match?(exp.text)
|
74
|
+
"\\#{exp.text}"
|
75
|
+
else
|
76
|
+
LiteralConverter.escape_incompatible_bmp_literals(exp.text)
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
SET_LITERALS_REQUIRING_ESCAPE_PATTERN = Regexp.union(%w<( ) [ ] { } / - |>)
|
81
|
+
SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/.freeze
|
71
82
|
CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
|
72
83
|
EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
|
73
84
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -10,10 +12,37 @@ class JsRegex
|
|
10
12
|
NONHEX_EXPANSION = '[^0-9A-Fa-f]'
|
11
13
|
I_MODE_HEX_EXPANSION = '[0-9A-F]'
|
12
14
|
I_MODE_NONHEX_EXPANSION = '[^0-9A-F]'
|
15
|
+
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
13
16
|
ES2018_HEX_EXPANSION = '\p{AHex}'
|
14
17
|
ES2018_NONHEX_EXPANSION = '\P{AHex}'
|
15
|
-
|
16
|
-
|
18
|
+
# partially taken from https://unicode.org/reports/tr51/#EBNF_and_Regex
|
19
|
+
ES2018_XGRAPHEME_EXPANSION = <<-'REGEXP'.gsub(/\s+/, '')
|
20
|
+
(?:
|
21
|
+
\r\n
|
22
|
+
|
|
23
|
+
\p{RI}\p{RI}
|
24
|
+
|
|
25
|
+
\p{Emoji}
|
26
|
+
(?:
|
27
|
+
\p{EMod}
|
28
|
+
|
|
29
|
+
\uFE0F\u20E3?
|
30
|
+
|
|
31
|
+
[\u{E0020}-\u{E007E}]+\u{E007F}
|
32
|
+
)?
|
33
|
+
(?:
|
34
|
+
\u200D
|
35
|
+
(?:
|
36
|
+
\p{RI}\p{RI}
|
37
|
+
|
|
38
|
+
\p{Emoji}(?:\p{EMod}|\uFE0F\u20E3?|[\u{E0020}-\u{E007E}]+\u{E007F})?
|
39
|
+
)
|
40
|
+
)*
|
41
|
+
|
|
42
|
+
[\P{M}\P{Lm}](?:\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*
|
43
|
+
)
|
44
|
+
REGEXP
|
45
|
+
|
17
46
|
|
18
47
|
def self.directly_compatible?(expression, _context = nil)
|
19
48
|
case expression.token
|
@@ -30,7 +59,7 @@ class JsRegex
|
|
30
59
|
case subtype
|
31
60
|
when :hex then hex_expansion
|
32
61
|
when :nonhex then nonhex_expansion
|
33
|
-
when :linebreak then
|
62
|
+
when :linebreak then linebreak_expansion
|
34
63
|
when :xgrapheme then xgrapheme
|
35
64
|
when :digit, :space, :word
|
36
65
|
return pass_through if self.class.directly_compatible?(expression)
|
@@ -63,6 +92,10 @@ class JsRegex
|
|
63
92
|
end
|
64
93
|
end
|
65
94
|
|
95
|
+
def linebreak_expansion
|
96
|
+
wrap_in_backrefed_lookahead(LINEBREAK_EXPANSION)
|
97
|
+
end
|
98
|
+
|
66
99
|
def negative_set_substitution
|
67
100
|
# ::of_expression returns an inverted set for negative expressions,
|
68
101
|
# so we need to un-invert before wrapping in [^ and ]. Kinda lame.
|
@@ -79,7 +112,7 @@ class JsRegex
|
|
79
112
|
|
80
113
|
def xgrapheme
|
81
114
|
if context.es_2018_or_higher? && context.enable_u_option
|
82
|
-
ES2018_XGRAPHEME_EXPANSION
|
115
|
+
wrap_in_backrefed_lookahead(ES2018_XGRAPHEME_EXPANSION)
|
83
116
|
else
|
84
117
|
warn_of_unsupported_feature
|
85
118
|
end
|
data/lib/js_regex/converter.rb
CHANGED
data/lib/js_regex/error.rb
CHANGED
data/lib/js_regex/node.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
#
|
3
5
|
# Converter#convert result. Represents a branch or leaf node with an optional
|
@@ -10,6 +12,7 @@ class JsRegex
|
|
10
12
|
|
11
13
|
TYPES = %i[
|
12
14
|
backref
|
15
|
+
branch
|
13
16
|
captured_group
|
14
17
|
conditional
|
15
18
|
dropped
|
@@ -47,7 +50,7 @@ class JsRegex
|
|
47
50
|
case type
|
48
51
|
when :dropped
|
49
52
|
''
|
50
|
-
when :backref, :captured_group, :plain
|
53
|
+
when :backref, :branch, :captured_group, :plain
|
51
54
|
children.join << quantifier.to_s
|
52
55
|
else
|
53
56
|
raise TypeError.new(
|
@@ -63,6 +66,10 @@ class JsRegex
|
|
63
66
|
self
|
64
67
|
end
|
65
68
|
|
69
|
+
def optional?
|
70
|
+
quantifier && quantifier.min == 0
|
71
|
+
end
|
72
|
+
|
66
73
|
private
|
67
74
|
|
68
75
|
TypeError = Class.new(::TypeError).extend(JsRegex::Error)
|
data/lib/js_regex/second_pass.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
#
|
3
5
|
# After conversion of a full Regexp::Expression tree, this
|
@@ -8,6 +10,7 @@ class JsRegex
|
|
8
10
|
def call(tree)
|
9
11
|
substitute_root_level_keep_mark(tree)
|
10
12
|
alternate_conditional_permutations(tree)
|
13
|
+
handle_non_participating_backrefs(tree)
|
11
14
|
tree
|
12
15
|
end
|
13
16
|
|
@@ -26,12 +29,49 @@ class JsRegex
|
|
26
29
|
tree.update(children: [lookbehind, *post])
|
27
30
|
end
|
28
31
|
|
32
|
+
def handle_non_participating_backrefs(tree)
|
33
|
+
level = 0
|
34
|
+
completed_group_numbers = {}
|
35
|
+
group_branches = {}
|
36
|
+
branch_stack = []
|
37
|
+
|
38
|
+
crawl(tree, true) do |node, event|
|
39
|
+
case [node.type, event]
|
40
|
+
when [:branch, :enter]
|
41
|
+
branch_stack.push(node)
|
42
|
+
when [:branch, :exit]
|
43
|
+
branch_stack.pop
|
44
|
+
when [:captured_group, :enter]
|
45
|
+
level += 1
|
46
|
+
when [:captured_group, :exit]
|
47
|
+
unless node.optional? # ignore optional groups
|
48
|
+
group_branches[node.reference] = branch_stack.last
|
49
|
+
end
|
50
|
+
number = level
|
51
|
+
number += 1 while completed_group_numbers[number]
|
52
|
+
completed_group_numbers[number] = true
|
53
|
+
level -= 1
|
54
|
+
when [:backref, :exit]
|
55
|
+
ref_branch = group_branches[node.reference]
|
56
|
+
current_branch = branch_stack.last
|
57
|
+
|
58
|
+
# make bad backrefs non-matchable
|
59
|
+
references_other_branch =
|
60
|
+
ref_branch && current_branch && ref_branch != current_branch
|
61
|
+
forward_reference = !completed_group_numbers[node.reference]
|
62
|
+
if references_other_branch || forward_reference
|
63
|
+
node.update(type: :plain, children: ['(?!)'])
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
29
69
|
def alternate_conditional_permutations(tree)
|
30
70
|
permutations = conditional_tree_permutations(tree)
|
31
71
|
return if permutations.empty?
|
32
72
|
|
33
73
|
alternatives = permutations.map.with_index do |variant, i|
|
34
|
-
Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
|
74
|
+
Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')', type: :branch)
|
35
75
|
end
|
36
76
|
tree.update(children: alternatives)
|
37
77
|
end
|
@@ -48,13 +88,16 @@ class JsRegex
|
|
48
88
|
crawl(tree_permutation) do |node|
|
49
89
|
build_permutation(node, conds, truthy_conds, caps_per_branch, i)
|
50
90
|
end
|
91
|
+
tree_permutation
|
51
92
|
end
|
52
93
|
end
|
53
94
|
|
54
|
-
def crawl(node, &block)
|
95
|
+
def crawl(node, trace = false, &block)
|
55
96
|
return if node.instance_of?(String)
|
56
|
-
|
57
|
-
|
97
|
+
|
98
|
+
trace ? yield(node, :enter) : yield(node)
|
99
|
+
node.children.each { |child| crawl(child, trace, &block) }
|
100
|
+
trace && yield(node, :exit)
|
58
101
|
end
|
59
102
|
|
60
103
|
def conditions(tree)
|
@@ -90,11 +133,6 @@ class JsRegex
|
|
90
133
|
# backref numbers need to be incremented for subsequent "branches"
|
91
134
|
adapt_backref_to_permutation(node, caps_per_branch, i)
|
92
135
|
when :captured_group
|
93
|
-
# Remove name, c.f. :backref handling.
|
94
|
-
node.update(children: [
|
95
|
-
node.children.first.sub(/\?<.*>/, ''),
|
96
|
-
*node.children[1..-1]
|
97
|
-
])
|
98
136
|
# if the group is referenced by any condition, modulate its quantity
|
99
137
|
if conds.include?(node.reference)
|
100
138
|
adapt_referenced_group_to_permutation(node, truthy)
|
@@ -124,8 +162,9 @@ class JsRegex
|
|
124
162
|
end
|
125
163
|
|
126
164
|
def min_quantify(node)
|
127
|
-
return
|
165
|
+
return unless node.optional?
|
128
166
|
|
167
|
+
qtf = node.quantifier
|
129
168
|
if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
|
130
169
|
node.update(quantifier: nil)
|
131
170
|
else
|
@@ -135,10 +174,6 @@ class JsRegex
|
|
135
174
|
end
|
136
175
|
end
|
137
176
|
|
138
|
-
def guarantees_at_least_one_match?(quantifier)
|
139
|
-
quantifier.nil? || quantifier.min > 0
|
140
|
-
end
|
141
|
-
|
142
177
|
def null_quantify(node)
|
143
178
|
null_quantifier = Regexp::Expression::Quantifier.construct(text: '{0}')
|
144
179
|
node.update(quantifier: null_quantifier)
|
data/lib/js_regex/target.rb
CHANGED
data/lib/js_regex/version.rb
CHANGED
data/lib/js_regex.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
12
11
|
dependencies:
|
13
12
|
- !ruby/object:Gem::Dependency
|
14
13
|
name: character_set
|
@@ -28,22 +27,16 @@ dependencies:
|
|
28
27
|
name: regexp_parser
|
29
28
|
requirement: !ruby/object:Gem::Requirement
|
30
29
|
requirements:
|
31
|
-
- - "
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 2.6.2
|
34
|
-
- - "<"
|
30
|
+
- - "~>"
|
35
31
|
- !ruby/object:Gem::Version
|
36
|
-
version:
|
32
|
+
version: '2.11'
|
37
33
|
type: :runtime
|
38
34
|
prerelease: false
|
39
35
|
version_requirements: !ruby/object:Gem::Requirement
|
40
36
|
requirements:
|
41
|
-
- - "
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
version: 2.6.2
|
44
|
-
- - "<"
|
37
|
+
- - "~>"
|
45
38
|
- !ruby/object:Gem::Version
|
46
|
-
version:
|
39
|
+
version: '2.11'
|
47
40
|
- !ruby/object:Gem::Dependency
|
48
41
|
name: regexp_property_values
|
49
42
|
requirement: !ruby/object:Gem::Requirement
|
@@ -96,7 +89,6 @@ homepage: https://github.com/jaynetics/js_regex
|
|
96
89
|
licenses:
|
97
90
|
- MIT
|
98
91
|
metadata: {}
|
99
|
-
post_install_message:
|
100
92
|
rdoc_options: []
|
101
93
|
require_paths:
|
102
94
|
- lib
|
@@ -111,8 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
111
103
|
- !ruby/object:Gem::Version
|
112
104
|
version: '0'
|
113
105
|
requirements: []
|
114
|
-
rubygems_version: 3.
|
115
|
-
signing_key:
|
106
|
+
rubygems_version: 3.6.7
|
116
107
|
specification_version: 4
|
117
108
|
summary: Converts Ruby regexes to JavaScript regexes.
|
118
109
|
test_files: []
|