js_regex 3.7.2 → 3.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +16 -11
- data/lib/js_regex/converter/anchor_converter.rb +28 -6
- data/lib/js_regex/converter/assertion_converter.rb +15 -4
- data/lib/js_regex/converter/backreference_converter.rb +23 -6
- data/lib/js_regex/converter/base.rb +10 -6
- data/lib/js_regex/converter/conditional_converter.rb +2 -2
- data/lib/js_regex/converter/context.rb +28 -2
- data/lib/js_regex/converter/escape_converter.rb +11 -3
- data/lib/js_regex/converter/group_converter.rb +14 -8
- data/lib/js_regex/converter/keep_converter.rb +24 -0
- data/lib/js_regex/converter/literal_converter.rb +8 -4
- data/lib/js_regex/converter/meta_converter.rb +10 -0
- data/lib/js_regex/converter/property_converter.rb +15 -0
- data/lib/js_regex/converter/property_map.csv +171 -0
- data/lib/js_regex/converter/set_converter.rb +8 -3
- data/lib/js_regex/converter/type_converter.rb +23 -5
- data/lib/js_regex/converter.rb +1 -0
- data/lib/js_regex/node.rb +4 -2
- data/lib/js_regex/second_pass.rb +41 -13
- data/lib/js_regex/target.rb +19 -0
- data/lib/js_regex/version.rb +1 -1
- data/lib/js_regex.rb +5 -5
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
|
4
|
+
data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
|
7
|
+
data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
class JsRegex
|
2
2
|
#
|
3
|
-
# This class acts as a facade, passing a
|
3
|
+
# This class acts as a facade, passing a Regexp to the Converters.
|
4
4
|
#
|
5
|
-
# ::of returns a source String, options String,
|
5
|
+
# ::of returns a source String, options String, warnings Array, target String.
|
6
6
|
#
|
7
7
|
class Conversion
|
8
8
|
require 'regexp_parser'
|
@@ -10,28 +10,33 @@ class JsRegex
|
|
10
10
|
require_relative 'error'
|
11
11
|
require_relative 'node'
|
12
12
|
require_relative 'second_pass'
|
13
|
+
require_relative 'target'
|
13
14
|
|
14
15
|
class << self
|
15
|
-
def of(input, options: nil)
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
def of(input, options: nil, target: Target::ES2009)
|
17
|
+
target = Target.cast(target)
|
18
|
+
source, warnings, extra_opts = convert_source(input, target)
|
19
|
+
options_string = convert_options(input, options, extra_opts)
|
20
|
+
[source, options_string, warnings, target]
|
19
21
|
end
|
20
22
|
|
21
23
|
private
|
22
24
|
|
23
|
-
def convert_source(input)
|
25
|
+
def convert_source(input, target)
|
24
26
|
tree = Regexp::Parser.parse(input)
|
25
|
-
context = Converter::Context.new(
|
27
|
+
context = Converter::Context.new(
|
28
|
+
case_insensitive_root: tree.i?,
|
29
|
+
target: target,
|
30
|
+
)
|
26
31
|
converted_tree = Converter.convert(tree, context)
|
27
32
|
final_tree = SecondPass.call(converted_tree)
|
28
|
-
[final_tree.to_s, context.warnings]
|
33
|
+
[final_tree.to_s, context.warnings, context.required_options]
|
29
34
|
rescue Regexp::Parser::Error => e
|
30
35
|
raise e.extend(JsRegex::Error)
|
31
36
|
end
|
32
37
|
|
33
|
-
def convert_options(input, custom_options)
|
34
|
-
options = custom_options.to_s.scan(/[
|
38
|
+
def convert_options(input, custom_options, required_options)
|
39
|
+
options = custom_options.to_s.scan(/[gimsuy]/) + required_options
|
35
40
|
if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
|
36
41
|
options << 'i'
|
37
42
|
end
|
@@ -13,17 +13,39 @@ class JsRegex
|
|
13
13
|
when :bol, :bos then '^'
|
14
14
|
when :eol, :eos then '$'
|
15
15
|
when :eos_ob_eol then '(?=\n?$)'
|
16
|
-
when :word_boundary then
|
17
|
-
when :nonword_boundary then
|
16
|
+
when :word_boundary then convert_boundary
|
17
|
+
when :nonword_boundary then convert_nonboundary
|
18
18
|
else
|
19
19
|
warn_of_unsupported_feature
|
20
20
|
end
|
21
21
|
end
|
22
22
|
|
23
|
-
def
|
24
|
-
|
25
|
-
|
26
|
-
|
23
|
+
def convert_boundary
|
24
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
25
|
+
BOUNDARY_EXPANSION
|
26
|
+
else
|
27
|
+
pass_boundary_with_warning
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_nonboundary
|
32
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
33
|
+
NONBOUNDARY_EXPANSION
|
34
|
+
else
|
35
|
+
pass_boundary_with_warning
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# This is an approximation to the word boundary behavior in Ruby, c.f.
|
40
|
+
# https://github.com/ruby/ruby/blob/08476c45/tool/enc-unicode.rb#L130
|
41
|
+
W = '\d\p{L}\p{M}\p{Pc}'
|
42
|
+
BOUNDARY_EXPANSION = "(?:(?<=[#{W}])(?=[^#{W}]|$)|(?<=[^#{W}]|^)(?=[#{W}]))"
|
43
|
+
NONBOUNDARY_EXPANSION = "(?<=[#{W}])(?=[#{W}])"
|
44
|
+
|
45
|
+
def pass_boundary_with_warning
|
46
|
+
warn_of("The anchor '#{data}' at index #{expression.ts} only works "\
|
47
|
+
'at ASCII word boundaries with targets below ES2018".')
|
48
|
+
pass_through
|
27
49
|
end
|
28
50
|
end
|
29
51
|
end
|
@@ -14,13 +14,24 @@ class JsRegex
|
|
14
14
|
def convert_data
|
15
15
|
case subtype
|
16
16
|
when :lookahead, :nlookahead
|
17
|
-
|
17
|
+
keep_as_is
|
18
|
+
when :lookbehind
|
19
|
+
return keep_as_is if context.es_2018_or_higher?
|
20
|
+
|
21
|
+
warn_of_unsupported_feature('lookbehind', min_target: Target::ES2018)
|
22
|
+
build_passive_group
|
18
23
|
when :nlookbehind
|
19
|
-
|
20
|
-
|
21
|
-
|
24
|
+
return keep_as_is if context.es_2018_or_higher?
|
25
|
+
|
26
|
+
warn_of_unsupported_feature('negative lookbehind', min_target: Target::ES2018)
|
27
|
+
else
|
28
|
+
warn_of_unsupported_feature
|
22
29
|
end
|
23
30
|
end
|
31
|
+
|
32
|
+
def keep_as_is
|
33
|
+
build_group(head: pass_through, capturing: false)
|
34
|
+
end
|
24
35
|
end
|
25
36
|
end
|
26
37
|
end
|
@@ -10,16 +10,30 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :name_ref
|
14
|
-
when :
|
13
|
+
when :name_ref then convert_name_ref
|
14
|
+
when :number, :number_ref, :number_rel_ref then convert_to_plain_num_ref
|
15
|
+
when :name_call, :number_call, :number_rel_call then convert_call
|
15
16
|
else # name_recursion_ref, number_recursion_ref, ...
|
16
17
|
warn_of_unsupported_feature
|
17
18
|
end
|
18
19
|
end
|
19
20
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
21
|
+
def convert_name_ref
|
22
|
+
if context.es_2018_or_higher?
|
23
|
+
# ES 2018+ supports named backrefs, but only the angled-bracket syntax
|
24
|
+
Node.new("\\k<#{expression.name}>", reference: new_position, type: :backref)
|
25
|
+
else
|
26
|
+
convert_to_plain_num_ref
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def convert_to_plain_num_ref
|
31
|
+
position = new_position
|
32
|
+
Node.new("\\#{position}", reference: position, type: :backref)
|
33
|
+
end
|
34
|
+
|
35
|
+
def new_position
|
36
|
+
context.new_capturing_group_position(target_position)
|
23
37
|
end
|
24
38
|
|
25
39
|
def target_position
|
@@ -31,7 +45,10 @@ class JsRegex
|
|
31
45
|
return warn_of_unsupported_feature('whole-pattern recursion')
|
32
46
|
end
|
33
47
|
context.increment_local_capturing_group_count
|
34
|
-
|
48
|
+
target_copy = expression.referenced_expression.unquantified_clone
|
49
|
+
# avoid "Duplicate capture group name" error in JS
|
50
|
+
target_copy.token = :capture if target_copy.is?(:named, :group)
|
51
|
+
convert_expression(target_copy)
|
35
52
|
end
|
36
53
|
end
|
37
54
|
end
|
@@ -51,10 +51,14 @@ class JsRegex
|
|
51
51
|
Converter.convert(expression, context)
|
52
52
|
end
|
53
53
|
|
54
|
-
def warn_of_unsupported_feature(description = nil)
|
54
|
+
def warn_of_unsupported_feature(description = nil, min_target: nil)
|
55
55
|
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
56
|
-
|
57
|
-
|
56
|
+
full_text = "Dropped unsupported #{description} '#{expression}' "\
|
57
|
+
"at index #{expression.ts}"
|
58
|
+
if min_target
|
59
|
+
full_text += " (requires at least `target: '#{min_target}'`)"
|
60
|
+
end
|
61
|
+
warn_of(full_text)
|
58
62
|
drop
|
59
63
|
end
|
60
64
|
|
@@ -68,11 +72,11 @@ class JsRegex
|
|
68
72
|
alias drop_without_warning drop
|
69
73
|
|
70
74
|
def wrap_in_backrefed_lookahead(content)
|
71
|
-
|
72
|
-
|
75
|
+
number = context.capturing_group_count + 1
|
76
|
+
backref_node = Node.new("\\#{number}", reference: number, type: :backref)
|
73
77
|
context.increment_local_capturing_group_count
|
74
78
|
# an empty passive group (?:) is appended as literal digits may follow
|
75
|
-
Node.new('(?=(', *content, '))
|
79
|
+
Node.new('(?=(', *content, '))', backref_node, '(?:)')
|
76
80
|
end
|
77
81
|
end
|
78
82
|
end
|
@@ -10,12 +10,12 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :open then
|
13
|
+
when :open then mark_conditional_for_second_pass
|
14
14
|
else warn_of_unsupported_feature
|
15
15
|
end
|
16
16
|
end
|
17
17
|
|
18
|
-
def
|
18
|
+
def mark_conditional_for_second_pass
|
19
19
|
reference = expression.referenced_expression.number
|
20
20
|
node = Node.new('(?:', reference: reference, type: :conditional)
|
21
21
|
expression.branches.each do |branch|
|
@@ -11,12 +11,36 @@ class JsRegex
|
|
11
11
|
:in_atomic_group,
|
12
12
|
:warnings
|
13
13
|
|
14
|
-
def initialize(case_insensitive_root: false)
|
14
|
+
def initialize(case_insensitive_root: false, target: nil)
|
15
15
|
self.added_capturing_groups_after_group = Hash.new(0)
|
16
16
|
self.capturing_group_count = 0
|
17
17
|
self.warnings = []
|
18
|
+
self.required_options_hash = {}
|
18
19
|
|
19
20
|
self.case_insensitive_root = case_insensitive_root
|
21
|
+
self.target = target
|
22
|
+
end
|
23
|
+
|
24
|
+
# target context
|
25
|
+
|
26
|
+
def es_2015_or_higher?
|
27
|
+
target >= Target::ES2015
|
28
|
+
end
|
29
|
+
|
30
|
+
def es_2018_or_higher?
|
31
|
+
target >= Target::ES2018
|
32
|
+
end
|
33
|
+
|
34
|
+
# these methods allow appending options to the final Conversion output
|
35
|
+
|
36
|
+
def enable_u_option
|
37
|
+
return false unless es_2015_or_higher?
|
38
|
+
|
39
|
+
required_options_hash['u'] = true
|
40
|
+
end
|
41
|
+
|
42
|
+
def required_options
|
43
|
+
required_options_hash.keys
|
20
44
|
end
|
21
45
|
|
22
46
|
# group context
|
@@ -54,7 +78,9 @@ class JsRegex
|
|
54
78
|
|
55
79
|
private
|
56
80
|
|
57
|
-
attr_accessor :added_capturing_groups_after_group
|
81
|
+
attr_accessor :added_capturing_groups_after_group,
|
82
|
+
:required_options_hash,
|
83
|
+
:target
|
58
84
|
|
59
85
|
attr_writer :capturing_group_count,
|
60
86
|
:case_insensitive_root,
|
@@ -41,7 +41,7 @@ class JsRegex
|
|
41
41
|
when :control, :meta_sequence
|
42
42
|
unicode_escape_codepoint
|
43
43
|
when :literal
|
44
|
-
LiteralConverter.convert_data(expression.char)
|
44
|
+
LiteralConverter.convert_data(expression.char, context)
|
45
45
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
46
46
|
pass_through
|
47
47
|
when :bell, :escape, :octal
|
@@ -52,11 +52,19 @@ class JsRegex
|
|
52
52
|
end
|
53
53
|
|
54
54
|
def convert_codepoint_list
|
55
|
-
|
56
|
-
|
55
|
+
if context.enable_u_option
|
56
|
+
split_codepoint_list
|
57
|
+
else
|
58
|
+
expression.chars.each_with_object(Node.new) do |char, node|
|
59
|
+
node << LiteralConverter.convert_data(Regexp.escape(char), context)
|
60
|
+
end
|
57
61
|
end
|
58
62
|
end
|
59
63
|
|
64
|
+
def split_codepoint_list
|
65
|
+
expression.codepoints.map { |cp| "\\u{#{cp.to_s(16).upcase}}" }.join
|
66
|
+
end
|
67
|
+
|
60
68
|
def unicode_escape_codepoint
|
61
69
|
"\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
|
62
70
|
end
|
@@ -10,19 +10,30 @@ class JsRegex
|
|
10
10
|
|
11
11
|
def convert_data
|
12
12
|
case subtype
|
13
|
-
when :capture
|
13
|
+
when :capture then build_group
|
14
|
+
when :named then build_named_group
|
14
15
|
when :atomic then emulate_atomic_group
|
15
16
|
when :comment then drop_without_warning
|
16
17
|
when :options, :options_switch then build_options_group
|
17
18
|
when :passive then build_passive_group
|
18
19
|
when :absence then build_absence_group_if_simple
|
19
|
-
else
|
20
|
+
else warn_of_unsupported_feature
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def build_named_group
|
25
|
+
if context.es_2018_or_higher?
|
26
|
+
# ES 2018+ supports named groups, but only the angled-bracket syntax
|
27
|
+
build_group(head: "(?<#{expression.name}>")
|
28
|
+
else
|
29
|
+
build_group
|
20
30
|
end
|
21
31
|
end
|
22
32
|
|
23
33
|
def emulate_atomic_group
|
24
34
|
if context.in_atomic_group
|
25
|
-
|
35
|
+
warn_of_unsupported_feature('nested atomic group')
|
36
|
+
build_passive_group
|
26
37
|
else
|
27
38
|
context.start_atomic_group
|
28
39
|
result = wrap_in_backrefed_lookahead(convert_subexpressions)
|
@@ -68,11 +79,6 @@ class JsRegex
|
|
68
79
|
build_group(head: head, tail: tail, capturing: false)
|
69
80
|
end
|
70
81
|
|
71
|
-
def build_unsupported_group(description = nil)
|
72
|
-
warn_of_unsupported_feature(description)
|
73
|
-
build_passive_group
|
74
|
-
end
|
75
|
-
|
76
82
|
def build_group(opts = {})
|
77
83
|
head = opts[:head] || '('
|
78
84
|
tail = opts[:tail] || ')'
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require_relative 'base'
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
module Converter
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class KeepConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
if context.es_2018_or_higher?
|
13
|
+
if expression.level.zero?
|
14
|
+
Node.new(type: :keep_mark) # mark for conversion in SecondPass
|
15
|
+
else
|
16
|
+
warn_of_unsupported_feature('nested keep mark')
|
17
|
+
end
|
18
|
+
else
|
19
|
+
warn_of_unsupported_feature('keep mark', min_target: Target::ES2018)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -7,11 +7,15 @@ class JsRegex
|
|
7
7
|
#
|
8
8
|
class LiteralConverter < JsRegex::Converter::Base
|
9
9
|
class << self
|
10
|
-
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{
|
10
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
|
11
11
|
|
12
|
-
def convert_data(data)
|
12
|
+
def convert_data(data, context)
|
13
13
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
14
|
-
|
14
|
+
if context.enable_u_option
|
15
|
+
escape_incompatible_bmp_literals(data)
|
16
|
+
else
|
17
|
+
convert_astral_data(data)
|
18
|
+
end
|
15
19
|
else
|
16
20
|
escape_incompatible_bmp_literals(data)
|
17
21
|
end
|
@@ -41,7 +45,7 @@ class JsRegex
|
|
41
45
|
private
|
42
46
|
|
43
47
|
def convert_data
|
44
|
-
result = self.class.convert_data(data)
|
48
|
+
result = self.class.convert_data(data, context)
|
45
49
|
if context.case_insensitive_root && !expression.case_insensitive?
|
46
50
|
warn_of_unsupported_feature('nested case-sensitive literal')
|
47
51
|
elsif !context.case_insensitive_root && expression.case_insensitive?
|
@@ -8,6 +8,16 @@ class JsRegex
|
|
8
8
|
class MetaConverter < JsRegex::Converter::Base
|
9
9
|
DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\n\uD800-\uDFFF])'
|
10
10
|
ML_DOT_EXPANSION = '(?:[\uD800-\uDBFF][\uDC00-\uDFFF]|[^\uD800-\uDFFF])'
|
11
|
+
# Possible improvements for dot conversion:
|
12
|
+
#
|
13
|
+
# In ES2015, the 'u' flag allows dots to match astral chars. Unfortunately
|
14
|
+
# the dot keeps matching lone surrogates even with this flag, so the use
|
15
|
+
# of an expansion is still necessary to get the same behavior as in Ruby.
|
16
|
+
#
|
17
|
+
# ES2018 has the dotall flag 's', but it is tricky to use in conversions.
|
18
|
+
# 's' activates matching of BOTH astral chars and "\n", whereas the dot in
|
19
|
+
# Ruby doesn't match "\n" by default, and even with the 'm' flag set on
|
20
|
+
# the root, subexps might still exclude "\n" like so: /.(?-m:.)./m
|
11
21
|
|
12
22
|
private
|
13
23
|
|
@@ -10,9 +10,24 @@ class JsRegex
|
|
10
10
|
# codepoints matched by the property and build a set string from them.
|
11
11
|
#
|
12
12
|
class PropertyConverter < JsRegex::Converter::Base
|
13
|
+
# A map of normalized Ruby property names to names supported by ES2018+.
|
14
|
+
def self.map
|
15
|
+
@map ||= File.read("#{__dir__}/property_map.csv").scan(/(.+),(.+)/).to_h
|
16
|
+
end
|
17
|
+
|
13
18
|
private
|
14
19
|
|
15
20
|
def convert_data
|
21
|
+
if context.es_2018_or_higher? &&
|
22
|
+
(prop_name_in_js = self.class.map[subtype.to_s.tr('_', '')])
|
23
|
+
context.enable_u_option
|
24
|
+
"\\#{expression.negative? ? 'P' : 'p'}{#{prop_name_in_js}}"
|
25
|
+
else
|
26
|
+
build_character_set
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def build_character_set
|
16
31
|
content = CharacterSet.of_expression(expression)
|
17
32
|
|
18
33
|
if expression.case_insensitive? && !context.case_insensitive_root
|
@@ -0,0 +1,171 @@
|
|
1
|
+
# THIS FILE IS GENERATED BY $ rake build_prop_map - DO NOT EDIT
|
2
|
+
ascii,ASCII
|
3
|
+
asciihexdigit,ASCII_Hex_Digit
|
4
|
+
adlam,Script=Adlam
|
5
|
+
anatolianhieroglyphs,Script=Anatolian_Hieroglyphs
|
6
|
+
armenian,Script=Armenian
|
7
|
+
avestan,Script=Avestan
|
8
|
+
bamum,Script=Bamum
|
9
|
+
bassavah,Script=Bassa_Vah
|
10
|
+
batak,Script=Batak
|
11
|
+
bengali,Script=Bengali
|
12
|
+
bhaiksuki,Script=Bhaiksuki
|
13
|
+
bidicontrol,Bidi_Control
|
14
|
+
bopomofo,Script=Bopomofo
|
15
|
+
braille,Script=Braille
|
16
|
+
buginese,Script=Buginese
|
17
|
+
buhid,Script=Buhid
|
18
|
+
carian,Script=Carian
|
19
|
+
caucasianalbanian,Script=Caucasian_Albanian
|
20
|
+
chakma,Script=Chakma
|
21
|
+
cham,Script=Cham
|
22
|
+
cherokee,Script=Cherokee
|
23
|
+
chorasmian,Script=Chorasmian
|
24
|
+
connectorpunctuation,Connector_Punctuation
|
25
|
+
control,Control
|
26
|
+
coptic,Script=Coptic
|
27
|
+
cuneiform,Script=Cuneiform
|
28
|
+
cypriot,Script=Cypriot
|
29
|
+
cyrillic,Script=Cyrillic
|
30
|
+
deprecated,Deprecated
|
31
|
+
deseret,Script=Deseret
|
32
|
+
devanagari,Script=Devanagari
|
33
|
+
divesakuru,Script=Dives_Akuru
|
34
|
+
dogra,Script=Dogra
|
35
|
+
duployan,Script=Duployan
|
36
|
+
egyptianhieroglyphs,Script=Egyptian_Hieroglyphs
|
37
|
+
elbasan,Script=Elbasan
|
38
|
+
elymaic,Script=Elymaic
|
39
|
+
emojicomponent,Emoji_Component
|
40
|
+
emojimodifier,Emoji_Modifier
|
41
|
+
enclosingmark,Enclosing_Mark
|
42
|
+
finalpunctuation,Final_Punctuation
|
43
|
+
georgian,Script=Georgian
|
44
|
+
gothic,Script=Gothic
|
45
|
+
grantha,Script=Grantha
|
46
|
+
greek,Script=Greek
|
47
|
+
gujarati,Script=Gujarati
|
48
|
+
gunjalagondi,Script=Gunjala_Gondi
|
49
|
+
gurmukhi,Script=Gurmukhi
|
50
|
+
hangul,Script=Hangul
|
51
|
+
hanifirohingya,Script=Hanifi_Rohingya
|
52
|
+
hanunoo,Script=Hanunoo
|
53
|
+
hatran,Script=Hatran
|
54
|
+
hebrew,Script=Hebrew
|
55
|
+
hexdigit,Hex_Digit
|
56
|
+
idsbinaryoperator,IDS_Binary_Operator
|
57
|
+
idstrinaryoperator,IDS_Trinary_Operator
|
58
|
+
imperialaramaic,Script=Imperial_Aramaic
|
59
|
+
initialpunctuation,Initial_Punctuation
|
60
|
+
inscriptionalpahlavi,Script=Inscriptional_Pahlavi
|
61
|
+
inscriptionalparthian,Script=Inscriptional_Parthian
|
62
|
+
javanese,Script=Javanese
|
63
|
+
joincontrol,Join_Control
|
64
|
+
kayahli,Script=Kayah_Li
|
65
|
+
kharoshthi,Script=Kharoshthi
|
66
|
+
khitansmallscript,Script=Khitan_Small_Script
|
67
|
+
khmer,Script=Khmer
|
68
|
+
khojki,Script=Khojki
|
69
|
+
khudawadi,Script=Khudawadi
|
70
|
+
lao,Script=Lao
|
71
|
+
lepcha,Script=Lepcha
|
72
|
+
letternumber,Letter_Number
|
73
|
+
limbu,Script=Limbu
|
74
|
+
lineseparator,Line_Separator
|
75
|
+
lineara,Script=Linear_A
|
76
|
+
linearb,Script=Linear_B
|
77
|
+
lisu,Script=Lisu
|
78
|
+
logicalorderexception,Logical_Order_Exception
|
79
|
+
lycian,Script=Lycian
|
80
|
+
lydian,Script=Lydian
|
81
|
+
mahajani,Script=Mahajani
|
82
|
+
makasar,Script=Makasar
|
83
|
+
malayalam,Script=Malayalam
|
84
|
+
mandaic,Script=Mandaic
|
85
|
+
manichaean,Script=Manichaean
|
86
|
+
marchen,Script=Marchen
|
87
|
+
masaramgondi,Script=Masaram_Gondi
|
88
|
+
math,Math
|
89
|
+
mathsymbol,Math_Symbol
|
90
|
+
medefaidrin,Script=Medefaidrin
|
91
|
+
meeteimayek,Script=Meetei_Mayek
|
92
|
+
mendekikakui,Script=Mende_Kikakui
|
93
|
+
meroiticcursive,Script=Meroitic_Cursive
|
94
|
+
meroitichieroglyphs,Script=Meroitic_Hieroglyphs
|
95
|
+
miao,Script=Miao
|
96
|
+
modi,Script=Modi
|
97
|
+
mro,Script=Mro
|
98
|
+
multani,Script=Multani
|
99
|
+
myanmar,Script=Myanmar
|
100
|
+
nabataean,Script=Nabataean
|
101
|
+
nandinagari,Script=Nandinagari
|
102
|
+
newtailue,Script=New_Tai_Lue
|
103
|
+
newa,Script=Newa
|
104
|
+
nko,Script=Nko
|
105
|
+
noncharactercodepoint,Noncharacter_Code_Point
|
106
|
+
nushu,Script=Nushu
|
107
|
+
nyiakengpuachuehmong,Script=Nyiakeng_Puachue_Hmong
|
108
|
+
ogham,Script=Ogham
|
109
|
+
olchiki,Script=Ol_Chiki
|
110
|
+
oldhungarian,Script=Old_Hungarian
|
111
|
+
olditalic,Script=Old_Italic
|
112
|
+
oldnortharabian,Script=Old_North_Arabian
|
113
|
+
oldpermic,Script=Old_Permic
|
114
|
+
oldpersian,Script=Old_Persian
|
115
|
+
oldsogdian,Script=Old_Sogdian
|
116
|
+
oldsoutharabian,Script=Old_South_Arabian
|
117
|
+
oldturkic,Script=Old_Turkic
|
118
|
+
oriya,Script=Oriya
|
119
|
+
osage,Script=Osage
|
120
|
+
osmanya,Script=Osmanya
|
121
|
+
othernumber,Other_Number
|
122
|
+
pahawhhmong,Script=Pahawh_Hmong
|
123
|
+
palmyrene,Script=Palmyrene
|
124
|
+
paragraphseparator,Paragraph_Separator
|
125
|
+
patternsyntax,Pattern_Syntax
|
126
|
+
patternwhitespace,Pattern_White_Space
|
127
|
+
paucinhau,Script=Pau_Cin_Hau
|
128
|
+
phagspa,Script=Phags_Pa
|
129
|
+
phoenician,Script=Phoenician
|
130
|
+
privateuse,Private_Use
|
131
|
+
psalterpahlavi,Script=Psalter_Pahlavi
|
132
|
+
quotationmark,Quotation_Mark
|
133
|
+
radical,Radical
|
134
|
+
regionalindicator,Regional_Indicator
|
135
|
+
rejang,Script=Rejang
|
136
|
+
runic,Script=Runic
|
137
|
+
samaritan,Script=Samaritan
|
138
|
+
saurashtra,Script=Saurashtra
|
139
|
+
separator,Separator
|
140
|
+
sharada,Script=Sharada
|
141
|
+
shavian,Script=Shavian
|
142
|
+
siddham,Script=Siddham
|
143
|
+
signwriting,Script=SignWriting
|
144
|
+
sinhala,Script=Sinhala
|
145
|
+
sogdian,Script=Sogdian
|
146
|
+
sorasompeng,Script=Sora_Sompeng
|
147
|
+
soyombo,Script=Soyombo
|
148
|
+
spaceseparator,Space_Separator
|
149
|
+
sundanese,Script=Sundanese
|
150
|
+
sylotinagri,Script=Syloti_Nagri
|
151
|
+
syriac,Script=Syriac
|
152
|
+
tagbanwa,Script=Tagbanwa
|
153
|
+
taile,Script=Tai_Le
|
154
|
+
taitham,Script=Tai_Tham
|
155
|
+
taiviet,Script=Tai_Viet
|
156
|
+
tamil,Script=Tamil
|
157
|
+
tangut,Script=Tangut
|
158
|
+
thaana,Script=Thaana
|
159
|
+
thai,Script=Thai
|
160
|
+
tibetan,Script=Tibetan
|
161
|
+
tifinagh,Script=Tifinagh
|
162
|
+
tirhuta,Script=Tirhuta
|
163
|
+
titlecaseletter,Titlecase_Letter
|
164
|
+
ugaritic,Script=Ugaritic
|
165
|
+
vai,Script=Vai
|
166
|
+
wancho,Script=Wancho
|
167
|
+
warangciti,Script=Warang_Citi
|
168
|
+
whitespace,White_Space
|
169
|
+
yezidi,Script=Yezidi
|
170
|
+
yi,Script=Yi
|
171
|
+
zanabazarsquare,Script=Zanabazar_Square
|
@@ -26,7 +26,12 @@ class JsRegex
|
|
26
26
|
warn_of_unsupported_feature('nested case-sensitive set')
|
27
27
|
end
|
28
28
|
|
29
|
-
|
29
|
+
if context.es_2015_or_higher?
|
30
|
+
context.enable_u_option if content.astral_part?
|
31
|
+
content.to_s(format: 'es6', in_brackets: true)
|
32
|
+
else
|
33
|
+
content.to_s_with_surrogate_ranges
|
34
|
+
end
|
30
35
|
end
|
31
36
|
|
32
37
|
def directly_compatible?
|
@@ -41,8 +46,8 @@ class JsRegex
|
|
41
46
|
def child_directly_compatible?(exp)
|
42
47
|
case exp.type
|
43
48
|
when :literal
|
44
|
-
# surrogate pair substitution needed if astral
|
45
|
-
exp.text.ord <= 0xFFFF
|
49
|
+
# surrogate pair substitution needed on ES2009 if astral
|
50
|
+
exp.text.ord <= 0xFFFF || context.enable_u_option
|
46
51
|
when :set
|
47
52
|
# conversion needed for nested sets, intersections
|
48
53
|
exp.token.equal?(:range)
|
@@ -6,9 +6,11 @@ class JsRegex
|
|
6
6
|
# Template class implementation.
|
7
7
|
#
|
8
8
|
class TypeConverter < JsRegex::Converter::Base
|
9
|
-
HEX_EXPANSION
|
10
|
-
NONHEX_EXPANSION
|
11
|
-
|
9
|
+
HEX_EXPANSION = '[0-9A-Fa-f]'
|
10
|
+
NONHEX_EXPANSION = '[^0-9A-Fa-f]'
|
11
|
+
ES2018_HEX_EXPANSION = '\p{AHex}'
|
12
|
+
ES2018_NONHEX_EXPANSION = '\P{AHex}'
|
13
|
+
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
12
14
|
|
13
15
|
def self.directly_compatible?(expression)
|
14
16
|
case expression.token
|
@@ -23,8 +25,8 @@ class JsRegex
|
|
23
25
|
|
24
26
|
def convert_data
|
25
27
|
case subtype
|
26
|
-
when :hex then
|
27
|
-
when :nonhex then
|
28
|
+
when :hex then hex_expansion
|
29
|
+
when :nonhex then nonhex_expansion
|
28
30
|
when :linebreak then LINEBREAK_EXPANSION
|
29
31
|
when :digit, :space, :word
|
30
32
|
return pass_through if self.class.directly_compatible?(expression)
|
@@ -37,6 +39,22 @@ class JsRegex
|
|
37
39
|
end
|
38
40
|
end
|
39
41
|
|
42
|
+
def hex_expansion
|
43
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
44
|
+
ES2018_HEX_EXPANSION
|
45
|
+
else
|
46
|
+
HEX_EXPANSION
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
def nonhex_expansion
|
51
|
+
if context.es_2018_or_higher? && context.enable_u_option
|
52
|
+
ES2018_NONHEX_EXPANSION
|
53
|
+
else
|
54
|
+
NONHEX_EXPANSION
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
40
58
|
def negative_set_substitution
|
41
59
|
# ::of_expression returns an inverted set for negative expressions,
|
42
60
|
# so we need to un-invert before wrapping in [^ and ]. Kinda lame.
|
data/lib/js_regex/converter.rb
CHANGED
data/lib/js_regex/node.rb
CHANGED
@@ -9,10 +9,11 @@ class JsRegex
|
|
9
9
|
attr_reader :children, :quantifier, :reference, :type
|
10
10
|
|
11
11
|
TYPES = %i[
|
12
|
-
|
12
|
+
backref
|
13
13
|
captured_group
|
14
14
|
conditional
|
15
15
|
dropped
|
16
|
+
keep_mark
|
16
17
|
plain
|
17
18
|
].freeze
|
18
19
|
|
@@ -46,7 +47,7 @@ class JsRegex
|
|
46
47
|
case type
|
47
48
|
when :dropped
|
48
49
|
''
|
49
|
-
when :
|
50
|
+
when :backref, :captured_group, :plain
|
50
51
|
children.join << quantifier.to_s
|
51
52
|
else
|
52
53
|
raise TypeError.new(
|
@@ -59,6 +60,7 @@ class JsRegex
|
|
59
60
|
self.children = attrs.fetch(:children) if attrs.key?(:children)
|
60
61
|
self.quantifier = attrs.fetch(:quantifier) if attrs.key?(:quantifier)
|
61
62
|
self.type = attrs.fetch(:type) if attrs.key?(:type)
|
63
|
+
self
|
62
64
|
end
|
63
65
|
|
64
66
|
private
|
data/lib/js_regex/second_pass.rb
CHANGED
@@ -6,12 +6,26 @@ class JsRegex
|
|
6
6
|
module SecondPass
|
7
7
|
class << self
|
8
8
|
def call(tree)
|
9
|
+
substitute_root_level_keep_mark(tree)
|
9
10
|
alternate_conditional_permutations(tree)
|
10
11
|
tree
|
11
12
|
end
|
12
13
|
|
13
14
|
private
|
14
15
|
|
16
|
+
def substitute_root_level_keep_mark(tree)
|
17
|
+
keep_mark_index = nil
|
18
|
+
tree.children.each.with_index do |child, i|
|
19
|
+
break keep_mark_index = i if child.type == :keep_mark
|
20
|
+
end
|
21
|
+
return unless keep_mark_index
|
22
|
+
|
23
|
+
pre = tree.children[0...keep_mark_index]
|
24
|
+
post = tree.children[(keep_mark_index + 1)..-1]
|
25
|
+
lookbehind = Node.new('(?<=', *pre, ')')
|
26
|
+
tree.update(children: [lookbehind, *post])
|
27
|
+
end
|
28
|
+
|
15
29
|
def alternate_conditional_permutations(tree)
|
16
30
|
permutations = conditional_tree_permutations(tree)
|
17
31
|
return if permutations.empty?
|
@@ -23,16 +37,16 @@ class JsRegex
|
|
23
37
|
end
|
24
38
|
|
25
39
|
def conditional_tree_permutations(tree)
|
26
|
-
|
27
|
-
return [] if
|
40
|
+
conds = conditions(tree)
|
41
|
+
return [] if conds.empty?
|
28
42
|
|
29
43
|
caps_per_branch = captured_group_count(tree)
|
30
44
|
|
31
|
-
condition_permutations(
|
45
|
+
condition_permutations(conds).map.with_index do |truthy_conds, i|
|
32
46
|
tree_permutation = tree.clone
|
33
47
|
# find referenced groups and conditionals and make one-sided
|
34
48
|
crawl(tree_permutation) do |node|
|
35
|
-
build_permutation(node,
|
49
|
+
build_permutation(node, conds, truthy_conds, caps_per_branch, i)
|
36
50
|
end
|
37
51
|
end
|
38
52
|
end
|
@@ -63,16 +77,30 @@ class JsRegex
|
|
63
77
|
end
|
64
78
|
end
|
65
79
|
|
66
|
-
def build_permutation(node,
|
80
|
+
def build_permutation(node, conds, truthy_conds, caps_per_branch, i)
|
67
81
|
truthy = truthy_conds.include?(node.reference)
|
68
82
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
83
|
+
case node.type
|
84
|
+
when :backref
|
85
|
+
# We cannot use named groups or backrefs in the conditional expansion,
|
86
|
+
# their repetition would cause a "Duplicate capture group name" error in JS.
|
87
|
+
node.update(children: [
|
88
|
+
node.children.first.sub(/k<.*>/, node.reference.to_s)
|
89
|
+
])
|
90
|
+
# backref numbers need to be incremented for subsequent "branches"
|
75
91
|
adapt_backref_to_permutation(node, caps_per_branch, i)
|
92
|
+
when :captured_group
|
93
|
+
# Remove name, c.f. :backref handling.
|
94
|
+
node.update(children: [
|
95
|
+
node.children.first.sub(/\?<.*>/, ''),
|
96
|
+
*node.children[1..-1]
|
97
|
+
])
|
98
|
+
# if the group is referenced by any condition, modulate its quantity
|
99
|
+
if conds.include?(node.reference)
|
100
|
+
adapt_referenced_group_to_permutation(node, truthy)
|
101
|
+
end
|
102
|
+
when :conditional
|
103
|
+
adapt_conditional_to_permutation(node, truthy)
|
76
104
|
end
|
77
105
|
end
|
78
106
|
|
@@ -91,8 +119,8 @@ class JsRegex
|
|
91
119
|
end
|
92
120
|
|
93
121
|
def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
|
94
|
-
new_num = backref_node.
|
95
|
-
backref_node.update(children: [new_num
|
122
|
+
new_num = backref_node.reference + caps_per_branch * i
|
123
|
+
backref_node.update(children: ["\\#{new_num}"])
|
96
124
|
end
|
97
125
|
|
98
126
|
def min_quantify(node)
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Target
|
3
|
+
ES2009 = 'ES2009'
|
4
|
+
ES2015 = 'ES2015'
|
5
|
+
ES2018 = 'ES2018'
|
6
|
+
SUPPORTED = [ES2009, ES2015, ES2018].freeze
|
7
|
+
|
8
|
+
def self.cast(arg)
|
9
|
+
return ES2009 if arg.nil?
|
10
|
+
|
11
|
+
normalized_arg = arg.to_s.upcase
|
12
|
+
return normalized_arg if SUPPORTED.include?(normalized_arg)
|
13
|
+
|
14
|
+
raise ArgumentError.new(
|
15
|
+
"Unknown target: #{arg.inspect}. Try one of #{SUPPORTED}."
|
16
|
+
).extend(JsRegex::Error)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/js_regex/version.rb
CHANGED
data/lib/js_regex.rb
CHANGED
@@ -12,10 +12,10 @@ class JsRegex
|
|
12
12
|
require_relative File.join('js_regex', 'version')
|
13
13
|
require 'json'
|
14
14
|
|
15
|
-
attr_reader :source, :options, :warnings
|
15
|
+
attr_reader :source, :options, :warnings, :target
|
16
16
|
|
17
|
-
def initialize(ruby_regex,
|
18
|
-
@source, @options, @warnings = Conversion.of(ruby_regex,
|
17
|
+
def initialize(ruby_regex, **kwargs)
|
18
|
+
@source, @options, @warnings, @target = Conversion.of(ruby_regex, **kwargs)
|
19
19
|
end
|
20
20
|
|
21
21
|
def to_h
|
@@ -30,8 +30,8 @@ class JsRegex
|
|
30
30
|
"/#{source.empty? ? '(?:)' : source}/#{options}"
|
31
31
|
end
|
32
32
|
|
33
|
-
def self.new!(ruby_regex,
|
34
|
-
js_regex = new(ruby_regex,
|
33
|
+
def self.new!(ruby_regex, **kwargs)
|
34
|
+
js_regex = new(ruby_regex, **kwargs)
|
35
35
|
if js_regex.warnings.any?
|
36
36
|
raise StandardError.new(
|
37
37
|
"Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -72,9 +72,11 @@ files:
|
|
72
72
|
- lib/js_regex/converter/escape_converter.rb
|
73
73
|
- lib/js_regex/converter/freespace_converter.rb
|
74
74
|
- lib/js_regex/converter/group_converter.rb
|
75
|
+
- lib/js_regex/converter/keep_converter.rb
|
75
76
|
- lib/js_regex/converter/literal_converter.rb
|
76
77
|
- lib/js_regex/converter/meta_converter.rb
|
77
78
|
- lib/js_regex/converter/property_converter.rb
|
79
|
+
- lib/js_regex/converter/property_map.csv
|
78
80
|
- lib/js_regex/converter/set_converter.rb
|
79
81
|
- lib/js_regex/converter/subexpression_converter.rb
|
80
82
|
- lib/js_regex/converter/type_converter.rb
|
@@ -82,6 +84,7 @@ files:
|
|
82
84
|
- lib/js_regex/error.rb
|
83
85
|
- lib/js_regex/node.rb
|
84
86
|
- lib/js_regex/second_pass.rb
|
87
|
+
- lib/js_regex/target.rb
|
85
88
|
- lib/js_regex/version.rb
|
86
89
|
homepage: https://github.com/jaynetics/js_regex
|
87
90
|
licenses:
|