js_regex 3.9.0 → 3.10.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +4 -3
- data/lib/js_regex/converter/base.rb +5 -1
- data/lib/js_regex/converter/context.rb +9 -2
- data/lib/js_regex/converter/literal_converter.rb +10 -5
- data/lib/js_regex/converter/set_converter.rb +47 -31
- data/lib/js_regex/converter/type_converter.rb +1 -1
- data/lib/js_regex/version.rb +1 -1
- data/lib/js_regex.rb +11 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d173eb73a162ff8217d802e307396f07560d040afdccced0a4eb6ecfffc8b03c
|
4
|
+
data.tar.gz: 7067783f86ada3bdf592db5b9507127c7531437ade9db9c35b09578758db30b8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3de515b4bfa03c8cefbbe5924adb628fb1029f65b66c50931f70105a8617c96c52402fc6ffb43fe7fc949741a4067d6180c7108f8fa36cbdd23754168e9829a6
|
7
|
+
data.tar.gz: 544e70d9a700e145e74e91758194f41a79d56bbf4655e8d80ef8340a3c6bf3f8695b05057ca02e930461751bacea4a5aeaa6fbb4fb5908129f059fa3d908f077
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -13,20 +13,21 @@ class JsRegex
|
|
13
13
|
require_relative 'target'
|
14
14
|
|
15
15
|
class << self
|
16
|
-
def of(input, options: nil, target: Target::ES2009)
|
16
|
+
def of(input, options: nil, target: Target::ES2009, fail_fast: false)
|
17
17
|
target = Target.cast(target)
|
18
|
-
source, warnings, extra_opts = convert_source(input, target)
|
18
|
+
source, warnings, extra_opts = convert_source(input, target, fail_fast)
|
19
19
|
options_string = convert_options(input, options, extra_opts)
|
20
20
|
[source, options_string, warnings, target]
|
21
21
|
end
|
22
22
|
|
23
23
|
private
|
24
24
|
|
25
|
-
def convert_source(input, target)
|
25
|
+
def convert_source(input, target, fail_fast)
|
26
26
|
tree = Regexp::Parser.parse(input)
|
27
27
|
context = Converter::Context.new(
|
28
28
|
case_insensitive_root: tree.i?,
|
29
29
|
target: target,
|
30
|
+
fail_fast: fail_fast,
|
30
31
|
)
|
31
32
|
converted_tree = Converter.convert(tree, context)
|
32
33
|
final_tree = SecondPass.call(converted_tree)
|
@@ -8,15 +8,17 @@ class JsRegex
|
|
8
8
|
class Context
|
9
9
|
attr_reader :capturing_group_count,
|
10
10
|
:case_insensitive_root,
|
11
|
+
:fail_fast,
|
11
12
|
:in_atomic_group,
|
12
13
|
:warnings
|
13
14
|
|
14
|
-
def initialize(case_insensitive_root: false, target: nil)
|
15
|
+
def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
|
15
16
|
self.added_capturing_groups_after_group = Hash.new(0)
|
16
17
|
self.capturing_group_count = 0
|
18
|
+
self.fail_fast = fail_fast
|
17
19
|
self.recursions_per_expression = {}
|
18
|
-
self.warnings = []
|
19
20
|
self.required_options_hash = {}
|
21
|
+
self.warnings = []
|
20
22
|
|
21
23
|
self.case_insensitive_root = case_insensitive_root
|
22
24
|
self.target = target
|
@@ -40,6 +42,10 @@ class JsRegex
|
|
40
42
|
required_options_hash['u'] = true
|
41
43
|
end
|
42
44
|
|
45
|
+
def u?
|
46
|
+
required_options_hash['u']
|
47
|
+
end
|
48
|
+
|
43
49
|
def required_options
|
44
50
|
required_options_hash.keys
|
45
51
|
end
|
@@ -98,6 +104,7 @@ class JsRegex
|
|
98
104
|
|
99
105
|
attr_writer :capturing_group_count,
|
100
106
|
:case_insensitive_root,
|
107
|
+
:fail_fast,
|
101
108
|
:in_atomic_group,
|
102
109
|
:warnings
|
103
110
|
|
@@ -6,11 +6,12 @@ class JsRegex
|
|
6
6
|
# Template class implementation.
|
7
7
|
#
|
8
8
|
class LiteralConverter < JsRegex::Converter::Base
|
9
|
-
|
10
|
-
|
9
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
|
10
|
+
LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
|
11
11
|
|
12
|
+
class << self
|
12
13
|
def convert_data(data, context)
|
13
|
-
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
|
+
if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
15
|
if context.enable_u_option
|
15
16
|
escape_incompatible_bmp_literals(data)
|
16
17
|
else
|
@@ -23,7 +24,7 @@ class JsRegex
|
|
23
24
|
|
24
25
|
def convert_astral_data(data)
|
25
26
|
data.each_char.each_with_object(Node.new) do |char, node|
|
26
|
-
if char
|
27
|
+
if char.ord > 0xFFFF
|
27
28
|
node << surrogate_substitution_for(char)
|
28
29
|
else
|
29
30
|
node << escape_incompatible_bmp_literals(char)
|
@@ -31,8 +32,12 @@ class JsRegex
|
|
31
32
|
end
|
32
33
|
end
|
33
34
|
|
35
|
+
ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
|
36
|
+
.merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
|
37
|
+
.merge('/' => '\\/')
|
38
|
+
|
34
39
|
def escape_incompatible_bmp_literals(data)
|
35
|
-
data.gsub(
|
40
|
+
data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
|
36
41
|
end
|
37
42
|
|
38
43
|
private
|
@@ -17,54 +17,70 @@ class JsRegex
|
|
17
17
|
private
|
18
18
|
|
19
19
|
def convert_data
|
20
|
-
|
20
|
+
simple_conversion || full_recalculation
|
21
|
+
end
|
21
22
|
|
22
|
-
|
23
|
-
if
|
24
|
-
content = content.case_insensitive
|
25
|
-
elsif !expression.case_insensitive? && context.case_insensitive_root
|
26
|
-
warn_of_unsupported_feature('nested case-sensitive set')
|
27
|
-
end
|
23
|
+
def simple_conversion
|
24
|
+
return false if casefolding_needed?
|
28
25
|
|
29
|
-
if
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
26
|
+
result = "[#{'^' if expression.negative?}".dup
|
27
|
+
|
28
|
+
expression.expressions.each do |subexp|
|
29
|
+
return false unless (child_res = simple_convert_child(subexp))
|
30
|
+
|
31
|
+
result << child_res.to_s
|
34
32
|
end
|
35
|
-
end
|
36
33
|
|
37
|
-
|
38
|
-
all_children_directly_compatible? && !casefolding_needed?
|
34
|
+
result << ']'
|
39
35
|
end
|
40
36
|
|
41
|
-
def
|
42
|
-
|
43
|
-
expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
|
37
|
+
def casefolding_needed?
|
38
|
+
expression.case_insensitive? ^ context.case_insensitive_root
|
44
39
|
end
|
45
40
|
|
46
|
-
def
|
41
|
+
def simple_convert_child(exp)
|
47
42
|
case exp.type
|
48
43
|
when :literal
|
49
|
-
|
50
|
-
|
44
|
+
return false if !context.u? &&
|
45
|
+
exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
|
46
|
+
!context.enable_u_option
|
47
|
+
|
48
|
+
LiteralConverter.escape_incompatible_bmp_literals(exp.text)
|
51
49
|
when :set
|
52
|
-
# conversion needed for nested sets
|
53
|
-
exp.token.equal?(:range)
|
50
|
+
# full conversion is needed for nested sets and intersections
|
51
|
+
exp.token.equal?(:range) && exp.expressions.map do |op|
|
52
|
+
simple_convert_child(op) or return false
|
53
|
+
end.join('-')
|
54
54
|
when :type
|
55
|
-
TypeConverter.directly_compatible?(exp)
|
55
|
+
TypeConverter.directly_compatible?(exp, context) &&
|
56
|
+
exp.text
|
56
57
|
when :escape
|
57
|
-
|
58
|
+
case exp.token
|
59
|
+
when *CONVERTIBLE_ESCAPE_TOKENS
|
60
|
+
EscapeConverter.new.convert(exp, context)
|
61
|
+
when :literal
|
62
|
+
exp.char.ord <= 0xFFFF &&
|
63
|
+
LiteralConverter.escape_incompatible_bmp_literals(exp.char)
|
64
|
+
end
|
58
65
|
end
|
59
66
|
end
|
60
67
|
|
61
|
-
|
62
|
-
|
63
|
-
end
|
68
|
+
CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
|
69
|
+
EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
|
64
70
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
71
|
+
def full_recalculation
|
72
|
+
content = CharacterSet.of_expression(expression)
|
73
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
74
|
+
content = content.case_insensitive
|
75
|
+
elsif !expression.case_insensitive? && context.case_insensitive_root
|
76
|
+
warn_of_unsupported_feature('nested case-sensitive set')
|
77
|
+
end
|
78
|
+
if context.es_2015_or_higher?
|
79
|
+
context.enable_u_option if content.astral_part?
|
80
|
+
content.to_s(format: 'es6', in_brackets: true)
|
81
|
+
else
|
82
|
+
content.to_s_with_surrogate_ranges
|
83
|
+
end
|
68
84
|
end
|
69
85
|
end
|
70
86
|
end
|
@@ -13,7 +13,7 @@ class JsRegex
|
|
13
13
|
ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
|
14
14
|
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
15
15
|
|
16
|
-
def self.directly_compatible?(expression)
|
16
|
+
def self.directly_compatible?(expression, _context = nil)
|
17
17
|
case expression.token
|
18
18
|
when :space, :nonspace
|
19
19
|
!expression.ascii_classes?
|
data/lib/js_regex/version.rb
CHANGED
data/lib/js_regex.rb
CHANGED
@@ -30,14 +30,17 @@ class JsRegex
|
|
30
30
|
"/#{source.empty? ? '(?:)' : source}/#{options}"
|
31
31
|
end
|
32
32
|
|
33
|
+
# @raise JsRegex::ConversionError
|
33
34
|
def self.new!(ruby_regex, **kwargs)
|
34
|
-
|
35
|
-
if js_regex.warnings.any?
|
36
|
-
raise StandardError.new(
|
37
|
-
"Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
|
38
|
-
js_regex.warnings.join("\n")
|
39
|
-
).extend(JsRegex::Error)
|
40
|
-
end
|
41
|
-
js_regex
|
35
|
+
new(ruby_regex, fail_fast: true, **kwargs)
|
42
36
|
end
|
37
|
+
|
38
|
+
def self.compatible?(ruby_regex, **kwargs)
|
39
|
+
new!(ruby_regex, **kwargs)
|
40
|
+
true
|
41
|
+
rescue ConversionError
|
42
|
+
false
|
43
|
+
end
|
44
|
+
|
45
|
+
ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
|
43
46
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.10.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-01-
|
11
|
+
date: 2023-01-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|