js_regex 3.9.0 → 3.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +4 -3
- data/lib/js_regex/converter/base.rb +5 -1
- data/lib/js_regex/converter/context.rb +9 -2
- data/lib/js_regex/converter/literal_converter.rb +10 -5
- data/lib/js_regex/converter/set_converter.rb +50 -31
- data/lib/js_regex/converter/type_converter.rb +1 -1
- data/lib/js_regex/target.rb +1 -1
- data/lib/js_regex/version.rb +1 -1
- data/lib/js_regex.rb +11 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6c55b0dded778c8851267d5d6d24c4a1afcbdb64e6d28ef3e48da5ba8acaf4d8
|
4
|
+
data.tar.gz: 71cf3ead5808405c0e32a2fa2dd63a8efa6f5f02df9ef707c12d4238087121a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5cb89a802b4c90382146f6a49e2c0fbefa5aed7580d0698e8bd392e05ed172880a713dc619b484e9fa0f28689b1b03c35eb08f9efc6673895ca1a450985723e
|
7
|
+
data.tar.gz: c0c8fda9db5419e5d05b43036190d1fb3d9e44d32c7f57598f21cb6c16d444208dae18ed6cdeb871357188619113475f83deb70c4eef2babfc44a37c27cbbbb9
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -13,20 +13,21 @@ class JsRegex
|
|
13
13
|
require_relative 'target'
|
14
14
|
|
15
15
|
class << self
|
16
|
-
def of(input, options: nil, target: Target::ES2009)
|
16
|
+
def of(input, options: nil, target: Target::ES2009, fail_fast: false)
|
17
17
|
target = Target.cast(target)
|
18
|
-
source, warnings, extra_opts = convert_source(input, target)
|
18
|
+
source, warnings, extra_opts = convert_source(input, target, fail_fast)
|
19
19
|
options_string = convert_options(input, options, extra_opts)
|
20
20
|
[source, options_string, warnings, target]
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
24
|
|
25
|
-
def convert_source(input, target)
|
25
|
+
def convert_source(input, target, fail_fast)
|
26
26
|
tree = Regexp::Parser.parse(input)
|
27
27
|
context = Converter::Context.new(
|
28
28
|
case_insensitive_root: tree.i?,
|
29
29
|
target: target,
|
30
|
+
fail_fast: fail_fast,
|
30
31
|
)
|
31
32
|
converted_tree = Converter.convert(tree, context)
|
32
33
|
final_tree = SecondPass.call(converted_tree)
|
@@ -8,15 +8,17 @@ class JsRegex
|
|
8
8
|
class Context
|
9
9
|
attr_reader :capturing_group_count,
|
10
10
|
:case_insensitive_root,
|
11
|
+
:fail_fast,
|
11
12
|
:in_atomic_group,
|
12
13
|
:warnings
|
13
14
|
|
14
|
-
def initialize(case_insensitive_root: false, target: nil)
|
15
|
+
def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
|
15
16
|
self.added_capturing_groups_after_group = Hash.new(0)
|
16
17
|
self.capturing_group_count = 0
|
18
|
+
self.fail_fast = fail_fast
|
17
19
|
self.recursions_per_expression = {}
|
18
|
-
self.warnings = []
|
19
20
|
self.required_options_hash = {}
|
21
|
+
self.warnings = []
|
20
22
|
|
21
23
|
self.case_insensitive_root = case_insensitive_root
|
22
24
|
self.target = target
|
@@ -40,6 +42,10 @@ class JsRegex
|
|
40
42
|
required_options_hash['u'] = true
|
41
43
|
end
|
42
44
|
|
45
|
+
def u?
|
46
|
+
required_options_hash['u']
|
47
|
+
end
|
48
|
+
|
43
49
|
def required_options
|
44
50
|
required_options_hash.keys
|
45
51
|
end
|
@@ -98,6 +104,7 @@ class JsRegex
|
|
98
104
|
|
99
105
|
attr_writer :capturing_group_count,
|
100
106
|
:case_insensitive_root,
|
107
|
+
:fail_fast,
|
101
108
|
:in_atomic_group,
|
102
109
|
:warnings
|
103
110
|
|
@@ -6,11 +6,12 @@ class JsRegex
|
|
6
6
|
# Template class implementation.
|
7
7
|
#
|
8
8
|
class LiteralConverter < JsRegex::Converter::Base
|
9
|
-
|
10
|
-
|
9
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
|
10
|
+
LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
|
11
11
|
|
12
|
+
class << self
|
12
13
|
def convert_data(data, context)
|
13
|
-
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
|
+
if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
15
|
if context.enable_u_option
|
15
16
|
escape_incompatible_bmp_literals(data)
|
16
17
|
else
|
@@ -23,7 +24,7 @@ class JsRegex
|
|
23
24
|
|
24
25
|
def convert_astral_data(data)
|
25
26
|
data.each_char.each_with_object(Node.new) do |char, node|
|
26
|
-
if char
|
27
|
+
if char.ord > 0xFFFF
|
27
28
|
node << surrogate_substitution_for(char)
|
28
29
|
else
|
29
30
|
node << escape_incompatible_bmp_literals(char)
|
@@ -31,8 +32,12 @@ class JsRegex
|
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
35
|
+
ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
|
36
|
+
.merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
|
37
|
+
.merge('/' => '\\/')
|
38
|
+
|
34
39
|
def escape_incompatible_bmp_literals(data)
|
35
|
-
data.gsub(
|
40
|
+
data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
|
36
41
|
end
|
37
42
|
|
38
43
|
private
|
@@ -17,54 +17,73 @@ class JsRegex
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def convert_data
|
20
|
-
|
20
|
+
simple_conversion || full_recalculation
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
if
|
24
|
-
content = content.case_insensitive
|
25
|
-
elsif !expression.case_insensitive? && context.case_insensitive_root
|
26
|
-
warn_of_unsupported_feature('nested case-sensitive set')
|
27
|
-
end
|
23
|
+
def simple_conversion
|
24
|
+
return false if casefolding_needed?
|
28
25
|
|
29
|
-
if
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
result = "[#{'^' if expression.negative?}".dup
|
27
|
+
|
28
|
+
expression.expressions.each do |subexp|
|
29
|
+
return false unless (child_res = simple_convert_child(subexp))
|
30
|
+
|
31
|
+
result << child_res.to_s
|
34
32
|
end
|
35
|
-
end
|
36
33
|
|
37
|
-
|
38
|
-
all_children_directly_compatible? && !casefolding_needed?
|
34
|
+
result << ']'
|
39
35
|
end
|
40
36
|
|
41
|
-
def
|
42
|
-
|
43
|
-
expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
|
37
|
+
def casefolding_needed?
|
38
|
+
expression.case_insensitive? ^ context.case_insensitive_root
|
44
39
|
end
|
45
40
|
|
46
|
-
def
|
41
|
+
def simple_convert_child(exp)
|
47
42
|
case exp.type
|
48
43
|
when :literal
|
49
|
-
|
50
|
-
|
44
|
+
return false if !context.u? &&
|
45
|
+
exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
|
46
|
+
!context.enable_u_option
|
47
|
+
|
48
|
+
LiteralConverter.escape_incompatible_bmp_literals(exp.text)
|
51
49
|
when :set
|
52
|
-
# conversion needed for nested sets
|
53
|
-
exp.token.equal?(:range)
|
50
|
+
# full conversion is needed for nested sets and intersections
|
51
|
+
exp.token.equal?(:range) && exp.expressions.map do |op|
|
52
|
+
simple_convert_child(op) or return false
|
53
|
+
end.join('-')
|
54
54
|
when :type
|
55
|
-
TypeConverter.directly_compatible?(exp)
|
55
|
+
TypeConverter.directly_compatible?(exp, context) &&
|
56
|
+
exp.text
|
56
57
|
when :escape
|
57
|
-
|
58
|
+
return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text)
|
59
|
+
|
60
|
+
case exp.token
|
61
|
+
when *CONVERTIBLE_ESCAPE_TOKENS
|
62
|
+
EscapeConverter.new.convert(exp, context)
|
63
|
+
when :literal
|
64
|
+
exp.char.ord <= 0xFFFF &&
|
65
|
+
LiteralConverter.escape_incompatible_bmp_literals(exp.char)
|
66
|
+
end
|
58
67
|
end
|
59
68
|
end
|
60
69
|
|
61
|
-
|
62
|
-
|
63
|
-
|
70
|
+
SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
|
71
|
+
CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
|
72
|
+
EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
|
64
73
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
74
|
+
def full_recalculation
|
75
|
+
content = CharacterSet.of_expression(expression)
|
76
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
77
|
+
content = content.case_insensitive
|
78
|
+
elsif !expression.case_insensitive? && context.case_insensitive_root
|
79
|
+
warn_of_unsupported_feature('nested case-sensitive set')
|
80
|
+
end
|
81
|
+
if context.es_2015_or_higher?
|
82
|
+
context.enable_u_option if content.astral_part?
|
83
|
+
content.to_s(format: 'es6', in_brackets: true)
|
84
|
+
else
|
85
|
+
content.to_s_with_surrogate_ranges
|
86
|
+
end
|
68
87
|
end
|
69
88
|
end
|
70
89
|
end
|
@@ -13,7 +13,7 @@ class JsRegex
|
|
13
13
|
ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
|
14
14
|
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
15
15
|
|
16
|
-
def self.directly_compatible?(expression)
|
16
|
+
def self.directly_compatible?(expression, _context = nil)
|
17
17
|
case expression.token
|
18
18
|
when :space, :nonspace
|
19
19
|
!expression.ascii_classes?
|
data/lib/js_regex/target.rb
CHANGED
@@ -8,7 +8,7 @@ class JsRegex
|
|
8
8
|
def self.cast(arg)
|
9
9
|
return ES2009 if arg.nil?
|
10
10
|
|
11
|
-
normalized_arg = arg.to_s.upcase
|
11
|
+
normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES')
|
12
12
|
return normalized_arg if SUPPORTED.include?(normalized_arg)
|
13
13
|
|
14
14
|
raise ArgumentError.new(
|
data/lib/js_regex/version.rb
CHANGED
data/lib/js_regex.rb
CHANGED
@@ -30,14 +30,17 @@ class JsRegex
|
|
30
30
|
"/#{source.empty? ? '(?:)' : source}/#{options}"
|
31
31
|
end
|
32
32
|
|
33
|
+
# @raise JsRegex::ConversionError
|
33
34
|
def self.new!(ruby_regex, **kwargs)
|
34
|
-
|
35
|
-
if js_regex.warnings.any?
|
36
|
-
raise StandardError.new(
|
37
|
-
"Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
|
38
|
-
js_regex.warnings.join("\n")
|
39
|
-
).extend(JsRegex::Error)
|
40
|
-
end
|
41
|
-
js_regex
|
35
|
+
new(ruby_regex, fail_fast: true, **kwargs)
|
42
36
|
end
|
37
|
+
|
38
|
+
def self.compatible?(ruby_regex, **kwargs)
|
39
|
+
new!(ruby_regex, **kwargs)
|
40
|
+
true
|
41
|
+
rescue ConversionError
|
42
|
+
false
|
43
|
+
end
|
44
|
+
|
45
|
+
ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
|
43
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.11.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-03-23 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|