js_regex 3.7.2 → 3.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +16 -11
- data/lib/js_regex/converter/anchor_converter.rb +28 -6
- data/lib/js_regex/converter/assertion_converter.rb +15 -4
- data/lib/js_regex/converter/backreference_converter.rb +23 -6
- data/lib/js_regex/converter/base.rb +10 -6
- data/lib/js_regex/converter/conditional_converter.rb +2 -2
- data/lib/js_regex/converter/context.rb +28 -2
- data/lib/js_regex/converter/escape_converter.rb +11 -3
- data/lib/js_regex/converter/group_converter.rb +14 -8
- data/lib/js_regex/converter/keep_converter.rb +24 -0
- data/lib/js_regex/converter/literal_converter.rb +8 -4
- data/lib/js_regex/converter/meta_converter.rb +10 -0
- data/lib/js_regex/converter/property_converter.rb +15 -0
- data/lib/js_regex/converter/property_map.csv +171 -0
- data/lib/js_regex/converter/set_converter.rb +8 -3
- data/lib/js_regex/converter/type_converter.rb +23 -5
- data/lib/js_regex/converter.rb +1 -0
- data/lib/js_regex/node.rb +4 -2
- data/lib/js_regex/second_pass.rb +41 -13
- data/lib/js_regex/target.rb +19 -0
- data/lib/js_regex/version.rb +1 -1
- data/lib/js_regex.rb +5 -5
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
|
4
|
+
data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
|
7
|
+
data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
class JsRegex
|
2
2
|
#
|
3
|
-
# This class acts as a facade, passing a
|
3
|
+
# This class acts as a facade, passing a Regexp to the Converters.
|
4
4
|
#
|
5
|
-
# ::of returns a source String, options String,
|
5
|
+
# ::of returns a source String, options String, warnings Array, target String.
|
6
6
|
#
|
7
7
|
class Conversion
|
8
8
|
require 'regexp_parser'
|
@@ -10,28 +10,33 @@ class JsRegex
|
|
10
10
|
require_relative 'error'
|
11
11
|
require_relative 'node'
|
12
12
|
require_relative 'second_pass'
|
13
|
+
require_relative 'target'
|
13
14
|
|
14
15
|
class << self
|
15
|
-
def of(input, options: nil)
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
def of(input, options: nil, target: Target::ES2009)
|
17
|
+
target = Target.cast(target)
|
18
|
+
source, warnings, extra_opts = convert_source(input, target)
|
19
|
+
options_string = convert_options(input, options, extra_opts)
|
20
|
+
[source, options_string, warnings, target]
|
19
21
|
end
|
20
22
|
|
21
23
|
private
|
22
24
|
|
23
|
-
def convert_source(input)
|
25
|
+
def convert_source(input, target)
|
24
26
|
tree = Regexp::Parser.parse(input)
|
25
|
-
context = Converter::Context.new(
|
27
|
+
context = Converter::Context.new(
|
28
|
+
case_insensitive_root: tree.i?,
|
29
|
+
target: target,
|
30
|
+
)
|
26
31
|
converted_tree = Converter.convert(tree, context)
|
27
32
|
final_tree = SecondPass.call(converted_tree)
|
28
|
-
[final_tree.to_s, context.warnings]
|
33
|
+
[final_tree.to_s, context.warnings, context.required_options]
|
29
34
|
rescue Regexp::Parser::Error => e
|
30
35
|
raise e.extend(JsRegex::Error)
|
31
36
|
end
|
32
37
|
|
33
|
-
def convert_options(input, custom_options)
|
34
|
-
options = custom_options.to_s.scan(/[
|
38
|
+
def convert_options(input, custom_options, required_options)
|
39
|
+
options = custom_options.to_s.scan(/[gimsuy]/) + required_options
|
35
40
|
if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
|
36
41
|
options << 'i'
|
37
42
|
end
|
@@ -13,17 +13,39 @@ class JsRegex
|
|
13
13
|
when :bol, :bos then '^'
|
14
14
|
when :eol, :eos then '$'
|
15
15
|
when :eos_ob_eol then '(?=\n?$)'
|
16
|
-
when :word_boundary then
|
17
|
-
when :nonword_boundary then
|
16
|
+
when :word_boundary then convert_boundary
|
17
|
+
when :nonword_boundary then convert_nonboundary
|
18
18
|
else
|
19
19
|
warn_of_unsupported_feature
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def convert_boundary
|
24
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
25
|
+
BOUNDARY_EXPANSION
|
26
|
+
else
|
27
|
+
pass_boundary_with_warning
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_nonboundary
|
32
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
33
|
+
NONBOUNDARY_EXPANSION
|
34
|
+
else
|
35
|
+
pass_boundary_with_warning
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# This is an approximation to the word boundary behavior in Ruby, c.f.
|
40
|
+
# https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130
|
41
|
+
W = '\d\p{L}\p{M}\p{Pc}'
|
42
|
+
BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))"
|
43
|
+
NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])"
|
44
|
+
|
45
|
+
def pass_boundary_with_warning
|
46
|
+
warn_of("The anchor '#{data}' at index #{expression.ts} only works "\
|
47
|
+
'at ASCII word boundaries with targets below ES2018".')
|
48
|
+
pass_through
|
27
49
|
end
|
28
50
|
end
|
29
51
|
end
|
@@ -14,13 +14,24 @@ class JsRegex
|
|
14
14
|
def convert_data
|
15
15
|
case subtype
|
16
16
|
when :lookahead, :nlookahead
|
17
|
-
|
17
|
+
keep_as_is
|
18
|
+
when :lookbehind
|
19
|
+
return keep_as_is if context.es_2018_or_higher?
|
20
|
+
|
21
|
+
warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018)
|
22
|
+
build_passive_group
|
18
23
|
when :nlookbehind
|
19
|
-
|
20
|
-
|
21
|
-
|
24
|
+
return keep_as_is if context.es_2018_or_higher?
|
25
|
+
|
26
|
+
warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018)
|
27
|
+
else
|
28
|
+
warn_of_unsupported_feature
|
22
29
|
end
|
23
30
|
end
|
31
|
+
|
32
|
+
def keep_as_is
|
33
|
+
build_group(head: pass_through, capturing: false)
|
34
|
+
end
|
24
35
|
end
|
25
36
|
end
|
26
37
|
end
|
@@ -10,16 +10,30 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :name_ref
|
14
|
-
when :
|
13
|
+
when :name_ref then convert_name_ref
|
14
|
+
when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref
|
15
|
+
when :name_call, :number_call, :number_rel_call then convert_call
|
15
16
|
else # name_recursion_ref, number_recursion_ref, ...
|
16
17
|
warn_of_unsupported_feature
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
21
|
+
def convert_name_ref
|
22
|
+
if context.es_2018_or_higher?
|
23
|
+
# ES 2018+ supports named backrefs, but only the angled-bracket syntax
|
24
|
+
Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
|
25
|
+
else
|
26
|
+
convert_to_plain_num_ref
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert_to_plain_num_ref
|
31
|
+
position = new_position
|
32
|
+
Node.new("\\#{position}", reference: position, type: :backref)
|
33
|
+
end
|
34
|
+
|
35
|
+
def new_position
|
36
|
+
context.new_capturing_group_position(target_position)
|
23
37
|
end
|
24
38
|
|
25
39
|
def target_position
|
@@ -31,7 +45,10 @@ class JsRegex
|
|
31
45
|
return warn_of_unsupported_feature('whole-pattern recursion')
|
32
46
|
end
|
33
47
|
context.increment_local_capturing_group_count
|
34
|
-
|
48
|
+
target_copy = expression.referenced_expression.unquantified_clone
|
49
|
+
# avoid "Duplicate capture group name" error in JS
|
50
|
+
target_copy.token = :capture if target_copy.is?(:named, :group)
|
51
|
+
convert_expression(target_copy)
|
35
52
|
end
|
36
53
|
end
|
37
54
|
end
|
@@ -51,10 +51,14 @@ class JsRegex
|
|
51
51
|
Converter.convert(expression, context)
|
52
52
|
end
|
53
53
|
|
54
|
-
def warn_of_unsupported_feature(description = nil)
|
54
|
+
def warn_of_unsupported_feature(description = nil, min_target: nil)
|
55
55
|
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
56
|
-
|
57
|
-
|
56
|
+
full_text = "Dropped unsupported #{description} '#{expression}' "\
|
57
|
+
"at index #{expression.ts}"
|
58
|
+
if min_target
|
59
|
+
full_text += " (requires at least `target: '#{min_target}'`)"
|
60
|
+
end
|
61
|
+
warn_of(full_text)
|
58
62
|
drop
|
59
63
|
end
|
60
64
|
|
@@ -68,11 +72,11 @@ class JsRegex
|
|
68
72
|
alias drop_without_warning drop
|
69
73
|
|
70
74
|
def wrap_in_backrefed_lookahead(content)
|
71
|
-
|
72
|
-
|
75
|
+
number = context.capturing_group_count + 1
|
76
|
+
backref_node = Node.new("\\#{number}", reference: number, type: :backref)
|
73
77
|
context.increment_local_capturing_group_count
|
74
78
|
# an empty passive group (?:) is appended as literal digits may follow
|
75
|
-
Node.new('(?=(', *content, '))
|
79
|
+
Node.new('(?=(', *content, '))', backref_node, '(?:)')
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -10,12 +10,12 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :open then
|
13
|
+
when :open then mark_conditional_for_second_pass
|
14
14
|
else warn_of_unsupported_feature
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def mark_conditional_for_second_pass
|
19
19
|
reference = expression.referenced_expression.number
|
20
20
|
node = Node.new('(?:', reference: reference, type: :conditional)
|
21
21
|
expression.branches.each do |branch|
|
@@ -11,12 +11,36 @@ class JsRegex
|
|
11
11
|
:in_atomic_group,
|
12
12
|
:warnings
|
13
13
|
|
14
|
-
def initialize(case_insensitive_root: false)
|
14
|
+
def initialize(case_insensitive_root: false, target: nil)
|
15
15
|
self.added_capturing_groups_after_group = Hash.new(0)
|
16
16
|
self.capturing_group_count = 0
|
17
17
|
self.warnings = []
|
18
|
+
self.required_options_hash = {}
|
18
19
|
|
19
20
|
self.case_insensitive_root = case_insensitive_root
|
21
|
+
self.target = target
|
22
|
+
end
|
23
|
+
|
24
|
+
# target context
|
25
|
+
|
26
|
+
def es_2015_or_higher?
|
27
|
+
target >= Target::ES2015
|
28
|
+
end
|
29
|
+
|
30
|
+
def es_2018_or_higher?
|
31
|
+
target >= Target::ES2018
|
32
|
+
end
|
33
|
+
|
34
|
+
# these methods allow appending options to the final Conversion output
|
35
|
+
|
36
|
+
def enable_u_option
|
37
|
+
return false unless es_2015_or_higher?
|
38
|
+
|
39
|
+
required_options_hash['u'] = true
|
40
|
+
end
|
41
|
+
|
42
|
+
def required_options
|
43
|
+
required_options_hash.keys
|
20
44
|
end
|
21
45
|
|
22
46
|
# group context
|
@@ -54,7 +78,9 @@ class JsRegex
|
|
54
78
|
|
55
79
|
private
|
56
80
|
|
57
|
-
attr_accessor :added_capturing_groups_after_group
|
81
|
+
attr_accessor :added_capturing_groups_after_group,
|
82
|
+
:required_options_hash,
|
83
|
+
:target
|
58
84
|
|
59
85
|
attr_writer :capturing_group_count,
|
60
86
|
:case_insensitive_root,
|
@@ -41,7 +41,7 @@ class JsRegex
|
|
41
41
|
when :control, :meta_sequence
|
42
42
|
unicode_escape_codepoint
|
43
43
|
when :literal
|
44
|
-
LiteralConverter.convert_data(expression.char)
|
44
|
+
LiteralConverter.convert_data(expression.char, context)
|
45
45
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
46
46
|
pass_through
|
47
47
|
when :bell, :escape, :octal
|
@@ -52,11 +52,19 @@ class JsRegex
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def convert_codepoint_list
|
55
|
-
|
56
|
-
|
55
|
+
if context.enable_u_option
|
56
|
+
split_codepoint_list
|
57
|
+
else
|
58
|
+
expression.chars.each_with_object(Node.new) do |char, node|
|
59
|
+
node << LiteralConverter.convert_data(Regexp.escape(char), context)
|
60
|
+
end
|
57
61
|
end
|
58
62
|
end
|
59
63
|
|
64
|
+
def split_codepoint_list
|
65
|
+
expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join
|
66
|
+
end
|
67
|
+
|
60
68
|
def unicode_escape_codepoint
|
61
69
|
"\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
|
62
70
|
end
|
@@ -10,19 +10,30 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :capture
|
13
|
+
when :capture then build_group
|
14
|
+
when :named then build_named_group
|
14
15
|
when :atomic then emulate_atomic_group
|
15
16
|
when :comment then drop_without_warning
|
16
17
|
when :options, :options_switch then build_options_group
|
17
18
|
when :passive then build_passive_group
|
18
19
|
when :absence then build_absence_group_if_simple
|
19
|
-
else
|
20
|
+
else warn_of_unsupported_feature
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def build_named_group
|
25
|
+
if context.es_2018_or_higher?
|
26
|
+
# ES 2018+ supports named groups, but only the angled-bracket syntax
|
27
|
+
build_group(head: "(?<#{expression.name}>")
|
28
|
+
else
|
29
|
+
build_group
|
20
30
|
end
|
21
31
|
end
|
22
32
|
|
23
33
|
def emulate_atomic_group
|
24
34
|
if context.in_atomic_group
|
25
|
-
|
35
|
+
warn_of_unsupported_feature('nested atomic group')
|
36
|
+
build_passive_group
|
26
37
|
else
|
27
38
|
context.start_atomic_group
|
28
39
|
result = wrap_in_backrefed_lookahead(convert_subexpressions)
|
@@ -68,11 +79,6 @@ class JsRegex
|
|
68
79
|
build_group(head: head, tail: tail, capturing: false)
|
69
80
|
end
|
70
81
|
|
71
|
-
def build_unsupported_group(description = nil)
|
72
|
-
warn_of_unsupported_feature(description)
|
73
|
-
build_passive_group
|
74
|
-
end
|
75
|
-
|
76
82
|
def build_group(opts = {})
|
77
83
|
head = opts[:head] || '('
|
78
84
|
tail = opts[:tail] || ')'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'base'
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
module Converter
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class KeepConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
if context.es_2018_or_higher?
|
13
|
+
if expression.level.zero?
|
14
|
+
Node.new(type: :keep_mark) # mark for conversion in SecondPass
|
15
|
+
else
|
16
|
+
warn_of_unsupported_feature('nested keep mark')
|
17
|
+
end
|
18
|
+
else
|
19
|
+
warn_of_unsupported_feature('keep mark', min_target: Target::ES2018)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -7,11 +7,15 @@ class JsRegex
|
|
7
7
|
#
|
8
8
|
class LiteralConverter < JsRegex::Converter::Base
|
9
9
|
class << self
|
10
|
-
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{
|
10
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
|
11
11
|
|
12
|
-
def convert_data(data)
|
12
|
+
def convert_data(data, context)
|
13
13
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
|
-
|
14
|
+
if context.enable_u_option
|
15
|
+
escape_incompatible_bmp_literals(data)
|
16
|
+
else
|
17
|
+
convert_astral_data(data)
|
18
|
+
end
|
15
19
|
else
|
16
20
|
escape_incompatible_bmp_literals(data)
|
17
21
|
end
|
@@ -41,7 +45,7 @@ class JsRegex
|
|
41
45
|
private
|
42
46
|
|
43
47
|
def convert_data
|
44
|
-
result = self.class.convert_data(data)
|
48
|
+
result = self.class.convert_data(data, context)
|
45
49
|
if context.case_insensitive_root && !expression.case_insensitive?
|
46
50
|
warn_of_unsupported_feature('nested case-sensitive literal')
|
47
51
|
elsif !context.case_insensitive_root && expression.case_insensitive?
|
@@ -8,6 +8,16 @@ class JsRegex
|
|
8
8
|
class MetaConverter < JsRegex::Converter::Base
|
9
9
|
DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])'
|
10
10
|
ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])'
|
11
|
+
# Possible improvements for dot conversion:
|
12
|
+
#
|
13
|
+
# In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately
|
14
|
+
# the dot keeps matching lone surrogates even with this flag, so the use
|
15
|
+
# of an expansion is still necessary to get the same behavior as in Ruby.
|
16
|
+
#
|
17
|
+
# ES2018 has the dotall flag 's', but it is tricky to use in conversions.
|
18
|
+
# 's' activates matching of BOTH astral chars and "\n", whereas the dot in
|
19
|
+
# Ruby doesn't match "\n" by default, and even with the 'm' flag set on
|
20
|
+
# the root, subexps might still exclude "\n" like so: /.(?-m:.)./m
|
11
21
|
|
12
22
|
private
|
13
23
|
|
@@ -10,9 +10,24 @@ class JsRegex
|
|
10
10
|
# codepoints matched by the property and build a set string from them.
|
11
11
|
#
|
12
12
|
class PropertyConverter < JsRegex::Converter::Base
|
13
|
+
# A map of normalized Ruby property names to names supported by ES2018+.
|
14
|
+
def self.map
|
15
|
+
@map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h
|
16
|
+
end
|
17
|
+
|
13
18
|
private
|
14
19
|
|
15
20
|
def convert_data
|
21
|
+
if context.es_2018_or_higher? &&
|
22
|
+
(prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')])
|
23
|
+
context.enable_u_option
|
24
|
+
"\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}"
|
25
|
+
else
|
26
|
+
build_character_set
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def build_character_set
|
16
31
|
content = CharacterSet.of_expression(expression)
|
17
32
|
|
18
33
|
if expression.case_insensitive? && !context.case_insensitive_root
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT
|
2
|
+
ascii,ASCII
|
3
|
+
asciihexdigit,ASCII_Hex_Digit
|
4
|
+
adlam,Script=Adlam
|
5
|
+
anatolianhieroglyphs,Script=Anatolian_Hieroglyphs
|
6
|
+
armenian,Script=Armenian
|
7
|
+
avestan,Script=Avestan
|
8
|
+
bamum,Script=Bamum
|
9
|
+
bassavah,Script=Bassa_Vah
|
10
|
+
batak,Script=Batak
|
11
|
+
bengali,Script=Bengali
|
12
|
+
bhaiksuki,Script=Bhaiksuki
|
13
|
+
bidicontrol,Bidi_Control
|
14
|
+
bopomofo,Script=Bopomofo
|
15
|
+
braille,Script=Braille
|
16
|
+
buginese,Script=Buginese
|
17
|
+
buhid,Script=Buhid
|
18
|
+
carian,Script=Carian
|
19
|
+
caucasianalbanian,Script=Caucasian_Albanian
|
20
|
+
chakma,Script=Chakma
|
21
|
+
cham,Script=Cham
|
22
|
+
cherokee,Script=Cherokee
|
23
|
+
chorasmian,Script=Chorasmian
|
24
|
+
connectorpunctuation,Connector_Punctuation
|
25
|
+
control,Control
|
26
|
+
coptic,Script=Coptic
|
27
|
+
cuneiform,Script=Cuneiform
|
28
|
+
cypriot,Script=Cypriot
|
29
|
+
cyrillic,Script=Cyrillic
|
30
|
+
deprecated,Deprecated
|
31
|
+
deseret,Script=Deseret
|
32
|
+
devanagari,Script=Devanagari
|
33
|
+
divesakuru,Script=Dives_Akuru
|
34
|
+
dogra,Script=Dogra
|
35
|
+
duployan,Script=Duployan
|
36
|
+
egyptianhieroglyphs,Script=Egyptian_Hieroglyphs
|
37
|
+
elbasan,Script=Elbasan
|
38
|
+
elymaic,Script=Elymaic
|
39
|
+
emojicomponent,Emoji_Component
|
40
|
+
emojimodifier,Emoji_Modifier
|
41
|
+
enclosingmark,Enclosing_Mark
|
42
|
+
finalpunctuation,Final_Punctuation
|
43
|
+
georgian,Script=Georgian
|
44
|
+
gothic,Script=Gothic
|
45
|
+
grantha,Script=Grantha
|
46
|
+
greek,Script=Greek
|
47
|
+
gujarati,Script=Gujarati
|
48
|
+
gunjalagondi,Script=Gunjala_Gondi
|
49
|
+
gurmukhi,Script=Gurmukhi
|
50
|
+
hangul,Script=Hangul
|
51
|
+
hanifirohingya,Script=Hanifi_Rohingya
|
52
|
+
hanunoo,Script=Hanunoo
|
53
|
+
hatran,Script=Hatran
|
54
|
+
hebrew,Script=Hebrew
|
55
|
+
hexdigit,Hex_Digit
|
56
|
+
idsbinaryoperator,IDS_Binary_Operator
|
57
|
+
idstrinaryoperator,IDS_Trinary_Operator
|
58
|
+
imperialaramaic,Script=Imperial_Aramaic
|
59
|
+
initialpunctuation,Initial_Punctuation
|
60
|
+
inscriptionalpahlavi,Script=Inscriptional_Pahlavi
|
61
|
+
inscriptionalparthian,Script=Inscriptional_Parthian
|
62
|
+
javanese,Script=Javanese
|
63
|
+
joincontrol,Join_Control
|
64
|
+
kayahli,Script=Kayah_Li
|
65
|
+
kharoshthi,Script=Kharoshthi
|
66
|
+
khitansmallscript,Script=Khitan_Small_Script
|
67
|
+
khmer,Script=Khmer
|
68
|
+
khojki,Script=Khojki
|
69
|
+
khudawadi,Script=Khudawadi
|
70
|
+
lao,Script=Lao
|
71
|
+
lepcha,Script=Lepcha
|
72
|
+
letternumber,Letter_Number
|
73
|
+
limbu,Script=Limbu
|
74
|
+
lineseparator,Line_Separator
|
75
|
+
lineara,Script=Linear_A
|
76
|
+
linearb,Script=Linear_B
|
77
|
+
lisu,Script=Lisu
|
78
|
+
logicalorderexception,Logical_Order_Exception
|
79
|
+
lycian,Script=Lycian
|
80
|
+
lydian,Script=Lydian
|
81
|
+
mahajani,Script=Mahajani
|
82
|
+
makasar,Script=Makasar
|
83
|
+
malayalam,Script=Malayalam
|
84
|
+
mandaic,Script=Mandaic
|
85
|
+
manichaean,Script=Manichaean
|
86
|
+
marchen,Script=Marchen
|
87
|
+
masaramgondi,Script=Masaram_Gondi
|
88
|
+
math,Math
|
89
|
+
mathsymbol,Math_Symbol
|
90
|
+
medefaidrin,Script=Medefaidrin
|
91
|
+
meeteimayek,Script=Meetei_Mayek
|
92
|
+
mendekikakui,Script=Mende_Kikakui
|
93
|
+
meroiticcursive,Script=Meroitic_Cursive
|
94
|
+
meroitichieroglyphs,Script=Meroitic_Hieroglyphs
|
95
|
+
miao,Script=Miao
|
96
|
+
modi,Script=Modi
|
97
|
+
mro,Script=Mro
|
98
|
+
multani,Script=Multani
|
99
|
+
myanmar,Script=Myanmar
|
100
|
+
nabataean,Script=Nabataean
|
101
|
+
nandinagari,Script=Nandinagari
|
102
|
+
newtailue,Script=New_Tai_Lue
|
103
|
+
newa,Script=Newa
|
104
|
+
nko,Script=Nko
|
105
|
+
noncharactercodepoint,Noncharacter_Code_Point
|
106
|
+
nushu,Script=Nushu
|
107
|
+
nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong
|
108
|
+
ogham,Script=Ogham
|
109
|
+
olchiki,Script=Ol_Chiki
|
110
|
+
oldhungarian,Script=Old_Hungarian
|
111
|
+
olditalic,Script=Old_Italic
|
112
|
+
oldnortharabian,Script=Old_North_Arabian
|
113
|
+
oldpermic,Script=Old_Permic
|
114
|
+
oldpersian,Script=Old_Persian
|
115
|
+
oldsogdian,Script=Old_Sogdian
|
116
|
+
oldsoutharabian,Script=Old_South_Arabian
|
117
|
+
oldturkic,Script=Old_Turkic
|
118
|
+
oriya,Script=Oriya
|
119
|
+
osage,Script=Osage
|
120
|
+
osmanya,Script=Osmanya
|
121
|
+
othernumber,Other_Number
|
122
|
+
pahawhhmong,Script=Pahawh_Hmong
|
123
|
+
palmyrene,Script=Palmyrene
|
124
|
+
paragraphseparator,Paragraph_Separator
|
125
|
+
patternsyntax,Pattern_Syntax
|
126
|
+
patternwhitespace,Pattern_White_Space
|
127
|
+
paucinhau,Script=Pau_Cin_Hau
|
128
|
+
phagspa,Script=Phags_Pa
|
129
|
+
phoenician,Script=Phoenician
|
130
|
+
privateuse,Private_Use
|
131
|
+
psalterpahlavi,Script=Psalter_Pahlavi
|
132
|
+
quotationmark,Quotation_Mark
|
133
|
+
radical,Radical
|
134
|
+
regionalindicator,Regional_Indicator
|
135
|
+
rejang,Script=Rejang
|
136
|
+
runic,Script=Runic
|
137
|
+
samaritan,Script=Samaritan
|
138
|
+
saurashtra,Script=Saurashtra
|
139
|
+
separator,Separator
|
140
|
+
sharada,Script=Sharada
|
141
|
+
shavian,Script=Shavian
|
142
|
+
siddham,Script=Siddham
|
143
|
+
signwriting,Script=SignWriting
|
144
|
+
sinhala,Script=Sinhala
|
145
|
+
sogdian,Script=Sogdian
|
146
|
+
sorasompeng,Script=Sora_Sompeng
|
147
|
+
soyombo,Script=Soyombo
|
148
|
+
spaceseparator,Space_Separator
|
149
|
+
sundanese,Script=Sundanese
|
150
|
+
sylotinagri,Script=Syloti_Nagri
|
151
|
+
syriac,Script=Syriac
|
152
|
+
tagbanwa,Script=Tagbanwa
|
153
|
+
taile,Script=Tai_Le
|
154
|
+
taitham,Script=Tai_Tham
|
155
|
+
taiviet,Script=Tai_Viet
|
156
|
+
tamil,Script=Tamil
|
157
|
+
tangut,Script=Tangut
|
158
|
+
thaana,Script=Thaana
|
159
|
+
thai,Script=Thai
|
160
|
+
tibetan,Script=Tibetan
|
161
|
+
tifinagh,Script=Tifinagh
|
162
|
+
tirhuta,Script=Tirhuta
|
163
|
+
titlecaseletter,Titlecase_Letter
|
164
|
+
ugaritic,Script=Ugaritic
|
165
|
+
vai,Script=Vai
|
166
|
+
wancho,Script=Wancho
|
167
|
+
warangciti,Script=Warang_Citi
|
168
|
+
whitespace,White_Space
|
169
|
+
yezidi,Script=Yezidi
|
170
|
+
yi,Script=Yi
|
171
|
+
zanabazarsquare,Script=Zanabazar_Square
|
@@ -26,7 +26,12 @@ class JsRegex
|
|
26
26
|
warn_of_unsupported_feature('nested case-sensitive set')
|
27
27
|
end
|
28
28
|
|
29
|
-
|
29
|
+
if context.es_2015_or_higher?
|
30
|
+
context.enable_u_option if content.astral_part?
|
31
|
+
content.to_s(format: 'es6', in_brackets: true)
|
32
|
+
else
|
33
|
+
content.to_s_with_surrogate_ranges
|
34
|
+
end
|
30
35
|
end
|
31
36
|
|
32
37
|
def directly_compatible?
|
@@ -41,8 +46,8 @@ class JsRegex
|
|
41
46
|
def child_directly_compatible?(exp)
|
42
47
|
case exp.type
|
43
48
|
when :literal
|
44
|
-
# surrogate pair substitution needed if astral
|
45
|
-
exp.text.ord <= 0xFFFF
|
49
|
+
# surrogate pair substitution needed on ES2009 if astral
|
50
|
+
exp.text.ord <= 0xFFFF || context.enable_u_option
|
46
51
|
when :set
|
47
52
|
# conversion needed for nested sets, intersections
|
48
53
|
exp.token.equal?(:range)
|
@@ -6,9 +6,11 @@ class JsRegex
|
|
6
6
|
# Template class implementation.
|
7
7
|
#
|
8
8
|
class TypeConverter < JsRegex::Converter::Base
|
9
|
-
HEX_EXPANSION
|
10
|
-
NONHEX_EXPANSION
|
11
|
-
|
9
|
+
HEX_EXPANSION = '[0-9A-Fa-f]'
|
10
|
+
NONHEX_EXPANSION = '[^0-9A-Fa-f]'
|
11
|
+
ES2018_HEX_EXPANSION = '\p{AHex}'
|
12
|
+
ES2018_NONHEX_EXPANSION = '\P{AHex}'
|
13
|
+
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
12
14
|
|
13
15
|
def self.directly_compatible?(expression)
|
14
16
|
case expression.token
|
@@ -23,8 +25,8 @@ class JsRegex
|
|
23
25
|
|
24
26
|
def convert_data
|
25
27
|
case subtype
|
26
|
-
when :hex then
|
27
|
-
when :nonhex then
|
28
|
+
when :hex then hex_expansion
|
29
|
+
when :nonhex then nonhex_expansion
|
28
30
|
when :linebreak then LINEBREAK_EXPANSION
|
29
31
|
when :digit, :space, :word
|
30
32
|
return pass_through if self.class.directly_compatible?(expression)
|
@@ -37,6 +39,22 @@ class JsRegex
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
42
|
+
def hex_expansion
|
43
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
44
|
+
ES2018_HEX_EXPANSION
|
45
|
+
else
|
46
|
+
HEX_EXPANSION
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def nonhex_expansion
|
51
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
52
|
+
ES2018_NONHEX_EXPANSION
|
53
|
+
else
|
54
|
+
NONHEX_EXPANSION
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
40
58
|
def negative_set_substitution
|
41
59
|
# ::of_expression returns an inverted set for negative expressions,
|
42
60
|
# so we need to un-invert before wrapping in [^ and ]. Kinda lame.
|
data/lib/js_regex/converter.rb
CHANGED
data/lib/js_regex/node.rb
CHANGED
@@ -9,10 +9,11 @@ class JsRegex
|
|
9
9
|
attr_reader :children, :quantifier, :reference, :type
|
10
10
|
|
11
11
|
TYPES = %i[
|
12
|
-
|
12
|
+
backref
|
13
13
|
captured_group
|
14
14
|
conditional
|
15
15
|
dropped
|
16
|
+
keep_mark
|
16
17
|
plain
|
17
18
|
].freeze
|
18
19
|
|
@@ -46,7 +47,7 @@ class JsRegex
|
|
46
47
|
case type
|
47
48
|
when :dropped
|
48
49
|
''
|
49
|
-
when :
|
50
|
+
when :backref, :captured_group, :plain
|
50
51
|
children.join << quantifier.to_s
|
51
52
|
else
|
52
53
|
raise TypeError.new(
|
@@ -59,6 +60,7 @@ class JsRegex
|
|
59
60
|
self.children = attrs.fetch(:children) if attrs.key?(:children)
|
60
61
|
self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier)
|
61
62
|
self.type = attrs.fetch(:type) if attrs.key?(:type)
|
63
|
+
self
|
62
64
|
end
|
63
65
|
|
64
66
|
private
|
data/lib/js_regex/second_pass.rb
CHANGED
@@ -6,12 +6,26 @@ class JsRegex
|
|
6
6
|
module SecondPass
|
7
7
|
class << self
|
8
8
|
def call(tree)
|
9
|
+
substitute_root_level_keep_mark(tree)
|
9
10
|
alternate_conditional_permutations(tree)
|
10
11
|
tree
|
11
12
|
end
|
12
13
|
|
13
14
|
private
|
14
15
|
|
16
|
+
def substitute_root_level_keep_mark(tree)
|
17
|
+
keep_mark_index = nil
|
18
|
+
tree.children.each.with_index do |child, i|
|
19
|
+
break keep_mark_index = i if child.type == :keep_mark
|
20
|
+
end
|
21
|
+
return unless keep_mark_index
|
22
|
+
|
23
|
+
pre = tree.children[0...keep_mark_index]
|
24
|
+
post = tree.children[(keep_mark_index + 1)..-1]
|
25
|
+
lookbehind = Node.new('(?<=', *pre, ')')
|
26
|
+
tree.update(children: [lookbehind, *post])
|
27
|
+
end
|
28
|
+
|
15
29
|
def alternate_conditional_permutations(tree)
|
16
30
|
permutations = conditional_tree_permutations(tree)
|
17
31
|
return if permutations.empty?
|
@@ -23,16 +37,16 @@ class JsRegex
|
|
23
37
|
end
|
24
38
|
|
25
39
|
def conditional_tree_permutations(tree)
|
26
|
-
|
27
|
-
return [] if
|
40
|
+
conds = conditions(tree)
|
41
|
+
return [] if conds.empty?
|
28
42
|
|
29
43
|
caps_per_branch = captured_group_count(tree)
|
30
44
|
|
31
|
-
condition_permutations(
|
45
|
+
condition_permutations(conds).map.with_index do |truthy_conds, i|
|
32
46
|
tree_permutation = tree.clone
|
33
47
|
# find referenced groups and conditionals and make one-sided
|
34
48
|
crawl(tree_permutation) do |node|
|
35
|
-
build_permutation(node,
|
49
|
+
build_permutation(node, conds, truthy_conds, caps_per_branch, i)
|
36
50
|
end
|
37
51
|
end
|
38
52
|
end
|
@@ -63,16 +77,30 @@ class JsRegex
|
|
63
77
|
end
|
64
78
|
end
|
65
79
|
|
66
|
-
def build_permutation(node,
|
80
|
+
def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
|
67
81
|
truthy = truthy_conds.include?(node.reference)
|
68
82
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
83
|
+
case node.type
|
84
|
+
when :backref
|
85
|
+
# We cannot use named groups or backrefs in the conditional expansion,
|
86
|
+
# their repetition would cause a "Duplicate capture group name" error in JS.
|
87
|
+
node.update(children: [
|
88
|
+
node.children.first.sub(/k<.*>/, node.reference.to_s)
|
89
|
+
])
|
90
|
+
# backref numbers need to be incremented for subsequent "branches"
|
75
91
|
adapt_backref_to_permutation(node, caps_per_branch, i)
|
92
|
+
when :captured_group
|
93
|
+
# Remove name, c.f. :backref handling.
|
94
|
+
node.update(children: [
|
95
|
+
node.children.first.sub(/\?<.*>/, ''),
|
96
|
+
*node.children[1..-1]
|
97
|
+
])
|
98
|
+
# if the group is referenced by any condition, modulate its quantity
|
99
|
+
if conds.include?(node.reference)
|
100
|
+
adapt_referenced_group_to_permutation(node, truthy)
|
101
|
+
end
|
102
|
+
when :conditional
|
103
|
+
adapt_conditional_to_permutation(node, truthy)
|
76
104
|
end
|
77
105
|
end
|
78
106
|
|
@@ -91,8 +119,8 @@ class JsRegex
|
|
91
119
|
end
|
92
120
|
|
93
121
|
def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
|
94
|
-
new_num = backref_node.
|
95
|
-
backref_node.update(children: [new_num
|
122
|
+
new_num = backref_node.reference + caps_per_branch * i
|
123
|
+
backref_node.update(children: ["\\#{new_num}"])
|
96
124
|
end
|
97
125
|
|
98
126
|
def min_quantify(node)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Target
|
3
|
+
ES2009 = 'ES2009'
|
4
|
+
ES2015 = 'ES2015'
|
5
|
+
ES2018 = 'ES2018'
|
6
|
+
SUPPORTED = [ES2009, ES2015, ES2018].freeze
|
7
|
+
|
8
|
+
def self.cast(arg)
|
9
|
+
return ES2009 if arg.nil?
|
10
|
+
|
11
|
+
normalized_arg = arg.to_s.upcase
|
12
|
+
return normalized_arg if SUPPORTED.include?(normalized_arg)
|
13
|
+
|
14
|
+
raise ArgumentError.new(
|
15
|
+
"Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}."
|
16
|
+
).extend(JsRegex::Error)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/js_regex/version.rb
CHANGED
data/lib/js_regex.rb
CHANGED
@@ -12,10 +12,10 @@ class JsRegex
|
|
12
12
|
require_relative File.join('js_regex', 'version')
|
13
13
|
require 'json'
|
14
14
|
|
15
|
-
attr_reader :source, :options, :warnings
|
15
|
+
attr_reader :source, :options, :warnings, :target
|
16
16
|
|
17
|
-
def initialize(ruby_regex,
|
18
|
-
@source, @options, @warnings = Conversion.of(ruby_regex,
|
17
|
+
def initialize(ruby_regex, **kwargs)
|
18
|
+
@source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs)
|
19
19
|
end
|
20
20
|
|
21
21
|
def to_h
|
@@ -30,8 +30,8 @@ class JsRegex
|
|
30
30
|
"/#{source.empty? ? '(?:)' : source}/#{options}"
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.new!(ruby_regex,
|
34
|
-
js_regex = new(ruby_regex,
|
33
|
+
def self.new!(ruby_regex, **kwargs)
|
34
|
+
js_regex = new(ruby_regex, **kwargs)
|
35
35
|
if js_regex.warnings.any?
|
36
36
|
raise StandardError.new(
|
37
37
|
"Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -72,9 +72,11 @@ files:
|
|
72
72
|
- lib/js_regex/converter/escape_converter.rb
|
73
73
|
- lib/js_regex/converter/freespace_converter.rb
|
74
74
|
- lib/js_regex/converter/group_converter.rb
|
75
|
+
- lib/js_regex/converter/keep_converter.rb
|
75
76
|
- lib/js_regex/converter/literal_converter.rb
|
76
77
|
- lib/js_regex/converter/meta_converter.rb
|
77
78
|
- lib/js_regex/converter/property_converter.rb
|
79
|
+
- lib/js_regex/converter/property_map.csv
|
78
80
|
- lib/js_regex/converter/set_converter.rb
|
79
81
|
- lib/js_regex/converter/subexpression_converter.rb
|
80
82
|
- lib/js_regex/converter/type_converter.rb
|
@@ -82,6 +84,7 @@ files:
|
|
82
84
|
- lib/js_regex/error.rb
|
83
85
|
- lib/js_regex/node.rb
|
84
86
|
- lib/js_regex/second_pass.rb
|
87
|
+
- lib/js_regex/target.rb
|
85
88
|
- lib/js_regex/version.rb
|
86
89
|
homepage: https://github.com/jaynetics/js_regex
|
87
90
|
licenses:
|