js_regex 3.0.0 → 3.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +7 -5
- data/lib/js_regex/converter/backreference_converter.rb +22 -28
- data/lib/js_regex/converter/base.rb +29 -18
- data/lib/js_regex/converter/conditional_converter.rb +13 -5
- data/lib/js_regex/converter/context.rb +6 -12
- data/lib/js_regex/converter/escape_converter.rb +3 -3
- data/lib/js_regex/converter/group_converter.rb +20 -6
- data/lib/js_regex/converter/literal_converter.rb +19 -6
- data/lib/js_regex/converter/meta_converter.rb +9 -6
- data/lib/js_regex/converter/property_converter.rb +6 -7
- data/lib/js_regex/converter/set_converter.rb +7 -2
- data/lib/js_regex/converter/{root_converter.rb → subexpression_converter.rb} +1 -1
- data/lib/js_regex/converter/type_converter.rb +1 -1
- data/lib/js_regex/converter.rb +16 -11
- data/lib/js_regex/node.rb +70 -0
- data/lib/js_regex/second_pass.rb +120 -0
- data/lib/js_regex/version.rb +1 -1
- metadata +7 -6
- data/lib/js_regex/converter/nonproperty_converter.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7219e7e794aa7b4df64655f362d448122fd07f901686ead1706626e925919976
|
4
|
+
data.tar.gz: 97924f576ecabd32288c9a4827edf5538b21d807813afb8c7ee2cfc639136b2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0de5c05e55115fa6bc400551e6cd6eeaf9370b65bcf64262bebb6aaee7612ed4d0e6d67c10eeae794ae28af83839e1765f4cbc2b5ab0274e117ff8c20a4b30be
|
7
|
+
data.tar.gz: b71ef3b13a3969b65c332b1ecb42e164fa6acf7e317872eda254a485647378870749c74f04486d520d8b854ac39c5d9a1e90d6f84f898ebbda8ffd4f7bb339df
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -9,6 +9,8 @@ class JsRegex
|
|
9
9
|
class Conversion
|
10
10
|
require 'regexp_parser'
|
11
11
|
require_relative 'converter'
|
12
|
+
require_relative 'node'
|
13
|
+
require_relative 'second_pass'
|
12
14
|
|
13
15
|
class << self
|
14
16
|
def of(ruby_regex, options: nil)
|
@@ -20,11 +22,11 @@ class JsRegex
|
|
20
22
|
private
|
21
23
|
|
22
24
|
def convert_source(ruby_regex)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
]
|
25
|
+
tree = Regexp::Parser.parse(ruby_regex)
|
26
|
+
context = Converter::Context.new(case_insensitive_root: tree.i?)
|
27
|
+
converted_tree = Converter.convert(tree, context)
|
28
|
+
final_tree = SecondPass.call(converted_tree)
|
29
|
+
[final_tree.to_s, context.warnings]
|
28
30
|
end
|
29
31
|
|
30
32
|
def convert_options(ruby_regex, custom_options)
|
@@ -15,60 +15,54 @@ class JsRegex
|
|
15
15
|
when :name_ref then convert_name_ref
|
16
16
|
when :number, :number_ref then convert_number_ref
|
17
17
|
when :number_rel_ref then convert_number_rel_ref
|
18
|
-
when :name_call then
|
19
|
-
when :number_call then
|
20
|
-
when :number_rel_call then
|
18
|
+
when :name_call then mark_name_call
|
19
|
+
when :number_call then mark_number_call
|
20
|
+
when :number_rel_call then mark_number_rel_call
|
21
21
|
else # name_recursion_ref, number_recursion_ref, ...
|
22
22
|
warn_of_unsupported_feature
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
26
|
def convert_name_ref
|
27
|
-
|
27
|
+
convert_ref(context.named_group_positions.fetch(expression.name))
|
28
28
|
end
|
29
29
|
|
30
30
|
def convert_number_ref
|
31
|
-
|
31
|
+
convert_ref(context.new_capturing_group_position(expression.number))
|
32
32
|
end
|
33
33
|
|
34
34
|
def convert_number_rel_ref
|
35
|
-
|
35
|
+
convert_ref(context.new_capturing_group_position(absolute_position))
|
36
|
+
end
|
37
|
+
|
38
|
+
def convert_ref(position)
|
39
|
+
Node.new('\\', Node.new(position.to_s, type: :backref_num))
|
36
40
|
end
|
37
41
|
|
38
42
|
def absolute_position
|
39
43
|
expression.number + context.original_capturing_group_count + 1
|
40
44
|
end
|
41
45
|
|
42
|
-
def
|
43
|
-
|
44
|
-
group.token == :named && group.name == expression.name
|
45
|
-
end
|
46
|
+
def mark_name_call
|
47
|
+
mark_call(expression.name)
|
46
48
|
end
|
47
49
|
|
48
|
-
def
|
49
|
-
if expression.number
|
50
|
+
def mark_number_call
|
51
|
+
if expression.number.equal?(0)
|
50
52
|
return warn_of_unsupported_feature('whole-pattern recursion')
|
51
53
|
end
|
52
|
-
|
53
|
-
[:capture, :options].include?(group.token) &&
|
54
|
-
group.number.equal?(expression.number)
|
55
|
-
end
|
54
|
+
mark_call(expression.number)
|
56
55
|
end
|
57
56
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
group.number.equal?(absolute_position)
|
62
|
-
end
|
57
|
+
def mark_number_rel_call
|
58
|
+
is_forward_referring = data.include?('+') # e.g. \g<+2>
|
59
|
+
mark_call(absolute_position - (is_forward_referring ? 1 : 0))
|
63
60
|
end
|
64
61
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
71
|
-
''
|
62
|
+
def mark_call(reference)
|
63
|
+
# increment group count as calls will be substituted with groups
|
64
|
+
context.increment_local_capturing_group_count
|
65
|
+
Node.new(reference: reference, type: :subexp_call)
|
72
66
|
end
|
73
67
|
end
|
74
68
|
end
|
@@ -3,15 +3,18 @@
|
|
3
3
|
class JsRegex
|
4
4
|
module Converter
|
5
5
|
#
|
6
|
-
# Template class. Implement #convert_data in subclasses
|
6
|
+
# Template class. Implement #convert_data in subclasses and return
|
7
|
+
# instance of String or Node from it.
|
7
8
|
#
|
8
9
|
class Base
|
10
|
+
# returns instance of Node with #quantifier attached.
|
9
11
|
def convert(expression, context)
|
10
12
|
self.context = context
|
11
13
|
self.expression = expression
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
node = convert_data
|
16
|
+
node = Node.new(node) if node.instance_of?(String)
|
17
|
+
apply_quantifier(node)
|
15
18
|
end
|
16
19
|
|
17
20
|
private
|
@@ -27,41 +30,49 @@ class JsRegex
|
|
27
30
|
end
|
28
31
|
alias pass_through data
|
29
32
|
|
30
|
-
def apply_quantifier(
|
31
|
-
return
|
33
|
+
def apply_quantifier(node)
|
34
|
+
return node if node.dropped? || (qtf = expression.quantifier).nil?
|
32
35
|
|
33
|
-
if
|
34
|
-
|
36
|
+
if qtf.possessive?
|
37
|
+
node.update(quantifier: qtf.text[0..-2])
|
38
|
+
return wrap_in_backrefed_lookahead([node])
|
35
39
|
else
|
36
|
-
|
40
|
+
node.update(quantifier: qtf)
|
37
41
|
end
|
38
|
-
end
|
39
42
|
|
40
|
-
|
41
|
-
convert_expressions(subexpressions)
|
43
|
+
node
|
42
44
|
end
|
43
45
|
|
44
|
-
def
|
45
|
-
expressions.map { |exp|
|
46
|
+
def convert_subexpressions
|
47
|
+
Node.new(*expression.expressions.map { |exp| convert_expression(exp) })
|
46
48
|
end
|
47
49
|
|
48
|
-
def
|
49
|
-
expression
|
50
|
+
def convert_expression(expression)
|
51
|
+
Converter.convert(expression, context)
|
50
52
|
end
|
51
53
|
|
52
54
|
def warn_of_unsupported_feature(description = nil)
|
53
55
|
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
54
56
|
full_desc = "#{description} '#{expression}'"
|
55
57
|
warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
|
56
|
-
|
58
|
+
drop
|
57
59
|
end
|
58
60
|
|
59
61
|
def warn(text)
|
60
62
|
context.warnings << text
|
61
63
|
end
|
62
64
|
|
63
|
-
def
|
64
|
-
|
65
|
+
def drop
|
66
|
+
Node.new(type: :dropped)
|
67
|
+
end
|
68
|
+
alias drop_without_warning drop
|
69
|
+
|
70
|
+
def wrap_in_backrefed_lookahead(content)
|
71
|
+
backref_num = context.capturing_group_count + 1
|
72
|
+
backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
|
73
|
+
context.increment_local_capturing_group_count
|
74
|
+
# an empty passive group (?:) is appended as literal digits may follow
|
75
|
+
Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
|
65
76
|
end
|
66
77
|
end
|
67
78
|
end
|
@@ -11,12 +11,20 @@ class JsRegex
|
|
11
11
|
private
|
12
12
|
|
13
13
|
def convert_data
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
case subtype
|
15
|
+
when :open then mark_conditional
|
16
|
+
when :condition then drop_without_warning
|
17
|
+
else warn_of_unsupported_feature
|
18
18
|
end
|
19
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def mark_conditional
|
22
|
+
reference = expression.reference
|
23
|
+
node = Node.new('(?:', reference: reference, type: :conditional)
|
24
|
+
expression.branches.each do |branch|
|
25
|
+
node << Node.new('(?:', convert_expression(branch), ')')
|
26
|
+
end
|
27
|
+
node << ')'
|
20
28
|
end
|
21
29
|
end
|
22
30
|
end
|
@@ -8,20 +8,19 @@ class JsRegex
|
|
8
8
|
# The Converters themselves are stateless.
|
9
9
|
#
|
10
10
|
class Context
|
11
|
-
attr_reader :
|
11
|
+
attr_reader :capturing_group_count,
|
12
12
|
:case_insensitive_root,
|
13
13
|
:in_atomic_group,
|
14
14
|
:named_group_positions,
|
15
15
|
:warnings
|
16
16
|
|
17
|
-
def initialize(
|
17
|
+
def initialize(case_insensitive_root: false)
|
18
18
|
self.added_capturing_groups_after_group = Hash.new(0)
|
19
19
|
self.capturing_group_count = 0
|
20
20
|
self.named_group_positions = {}
|
21
21
|
self.warnings = []
|
22
22
|
|
23
|
-
self.
|
24
|
-
self.case_insensitive_root = ast.case_insensitive?
|
23
|
+
self.case_insensitive_root = case_insensitive_root
|
25
24
|
end
|
26
25
|
|
27
26
|
# group context
|
@@ -38,13 +37,9 @@ class JsRegex
|
|
38
37
|
self.in_atomic_group = false
|
39
38
|
end
|
40
39
|
|
41
|
-
def
|
42
|
-
new_backref_num = capturing_group_count + 1
|
43
|
-
# an empty passive group (?:) is appended as literal digits may follow
|
44
|
-
result = "(?=(#{content}))\\#{new_backref_num}(?:)"
|
40
|
+
def increment_local_capturing_group_count
|
45
41
|
added_capturing_groups_after_group[original_capturing_group_count] += 1
|
46
42
|
capture_group
|
47
|
-
result
|
48
43
|
end
|
49
44
|
|
50
45
|
# takes and returns 1-indexed group positions.
|
@@ -67,10 +62,9 @@ class JsRegex
|
|
67
62
|
|
68
63
|
private
|
69
64
|
|
70
|
-
attr_accessor :added_capturing_groups_after_group
|
71
|
-
:capturing_group_count
|
65
|
+
attr_accessor :added_capturing_groups_after_group
|
72
66
|
|
73
|
-
attr_writer :
|
67
|
+
attr_writer :capturing_group_count,
|
74
68
|
:case_insensitive_root,
|
75
69
|
:in_atomic_group,
|
76
70
|
:named_group_positions,
|
@@ -55,9 +55,9 @@ class JsRegex
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def convert_codepoint_list
|
58
|
-
expression.chars.
|
59
|
-
LiteralConverter.convert_data(Regexp.escape(char))
|
60
|
-
end
|
58
|
+
expression.chars.each_with_object(Node.new) do |char, node|
|
59
|
+
node << LiteralConverter.convert_data(Regexp.escape(char))
|
60
|
+
end
|
61
61
|
end
|
62
62
|
|
63
63
|
def unicode_escape_codepoint
|
@@ -28,7 +28,7 @@ class JsRegex
|
|
28
28
|
build_unsupported_group('nested atomic group')
|
29
29
|
else
|
30
30
|
context.start_atomic_group
|
31
|
-
result =
|
31
|
+
result = wrap_in_backrefed_lookahead(convert_subexpressions)
|
32
32
|
context.end_atomic_group
|
33
33
|
result
|
34
34
|
end
|
@@ -37,15 +37,19 @@ class JsRegex
|
|
37
37
|
def build_named_group
|
38
38
|
# remember position, then drop name part without warning
|
39
39
|
context.store_named_group_position(expression.name)
|
40
|
-
build_group(head: '(')
|
40
|
+
build_group(head: '(', reference: expression.name)
|
41
41
|
end
|
42
42
|
|
43
43
|
def build_options_group
|
44
44
|
unless (encoding_options = data.scan(/[adu]/)).empty?
|
45
45
|
warn_of_unsupported_feature("encoding options #{encoding_options}")
|
46
46
|
end
|
47
|
-
|
48
|
-
|
47
|
+
if subtype.equal?(:options_switch)
|
48
|
+
# can be ignored since #options on subsequent Expressions are correct
|
49
|
+
drop_without_warning
|
50
|
+
else
|
51
|
+
build_passive_group
|
52
|
+
end
|
49
53
|
end
|
50
54
|
|
51
55
|
def build_passive_group
|
@@ -58,9 +62,19 @@ class JsRegex
|
|
58
62
|
end
|
59
63
|
|
60
64
|
def build_group(opts = {})
|
61
|
-
context.capture_group unless opts[:capturing].equal?(false)
|
62
65
|
head = opts[:head] || pass_through
|
63
|
-
|
66
|
+
if opts[:capturing].equal?(false)
|
67
|
+
return Node.new(*group_with_head(head))
|
68
|
+
end
|
69
|
+
|
70
|
+
context.capture_group
|
71
|
+
|
72
|
+
ref = opts[:reference] || expression.number
|
73
|
+
Node.new(*group_with_head(head), reference: ref, type: :captured_group)
|
74
|
+
end
|
75
|
+
|
76
|
+
def group_with_head(head)
|
77
|
+
[head, *convert_subexpressions, ')']
|
64
78
|
end
|
65
79
|
end
|
66
80
|
end
|
@@ -9,18 +9,28 @@ class JsRegex
|
|
9
9
|
#
|
10
10
|
class LiteralConverter < JsRegex::Converter::Base
|
11
11
|
class << self
|
12
|
-
ASTRAL_PLANE_CODEPOINT_PATTERN =
|
12
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
|
13
13
|
|
14
14
|
def convert_data(data)
|
15
15
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
16
|
-
|
16
|
+
data.each_char.each_with_object(Node.new) do |chr, node|
|
17
|
+
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
18
|
+
node << surrogate_pair_for(chr)
|
19
|
+
else
|
20
|
+
node << convert_bmp_data(chr)
|
21
|
+
end
|
22
|
+
end
|
17
23
|
else
|
18
|
-
|
19
|
-
ensure_forward_slashes_are_escaped(data)
|
20
|
-
)
|
24
|
+
convert_bmp_data(data)
|
21
25
|
end
|
22
26
|
end
|
23
27
|
|
28
|
+
def convert_bmp_data(data)
|
29
|
+
ensure_json_compatibility(
|
30
|
+
ensure_forward_slashes_are_escaped(data)
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
24
34
|
private
|
25
35
|
|
26
36
|
def surrogate_pair_for(astral_char)
|
@@ -56,7 +66,10 @@ class JsRegex
|
|
56
66
|
|
57
67
|
def handle_locally_case_insensitive_literal(literal)
|
58
68
|
return literal unless literal =~ HAS_CASE_PATTERN
|
59
|
-
|
69
|
+
|
70
|
+
literal.each_char.each_with_object(Node.new) do |chr, node|
|
71
|
+
node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr)
|
72
|
+
end
|
60
73
|
end
|
61
74
|
end
|
62
75
|
end
|
@@ -13,7 +13,7 @@ class JsRegex
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
15
|
when :alternation
|
16
|
-
|
16
|
+
convert_alternatives
|
17
17
|
when :dot
|
18
18
|
expression.multiline? ? '(?:.|\n)' : '.'
|
19
19
|
else
|
@@ -21,12 +21,15 @@ class JsRegex
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
def convert_alternatives
|
25
|
+
kept_any = false
|
26
|
+
|
27
|
+
convert_subexpressions.map do |node|
|
28
|
+
dropped = !node.children.empty? && node.children.all?(&:dropped?)
|
29
|
+
node.children.unshift('|') if kept_any.equal?(true) && !dropped
|
30
|
+
kept_any = true unless dropped
|
31
|
+
node
|
28
32
|
end
|
29
|
-
branches.join('|')
|
30
33
|
end
|
31
34
|
end
|
32
35
|
end
|
@@ -8,20 +8,19 @@ class JsRegex
|
|
8
8
|
#
|
9
9
|
# Template class implementation.
|
10
10
|
#
|
11
|
+
# Uses the `character_set` and `regexp_property_values` gems to get the
|
12
|
+
# codepoints matched by the property and build a set string from them.
|
13
|
+
#
|
11
14
|
class PropertyConverter < JsRegex::Converter::Base
|
12
15
|
private
|
13
16
|
|
14
17
|
def convert_data
|
15
|
-
convert_property
|
16
|
-
end
|
17
|
-
|
18
|
-
def convert_property(negated = nil)
|
19
18
|
content = CharacterSet.of_property(subtype)
|
20
19
|
if expression.case_insensitive? && !context.case_insensitive_root
|
21
20
|
content = content.case_insensitive
|
22
21
|
end
|
23
22
|
|
24
|
-
if
|
23
|
+
if expression.negative?
|
25
24
|
if content.astral_part.empty?
|
26
25
|
return "[^#{content.to_s(format: :js)}]"
|
27
26
|
else
|
@@ -35,10 +34,10 @@ class JsRegex
|
|
35
34
|
end
|
36
35
|
|
37
36
|
bmp_part = content.bmp_part
|
38
|
-
return
|
37
|
+
return drop if bmp_part.empty?
|
39
38
|
|
40
39
|
string = bmp_part.to_s(format: :js)
|
41
|
-
|
40
|
+
expression.negative? ? "[^#{string}]" : "[#{string}]"
|
42
41
|
end
|
43
42
|
end
|
44
43
|
end
|
@@ -10,6 +10,11 @@ class JsRegex
|
|
10
10
|
#
|
11
11
|
# Template class implementation.
|
12
12
|
#
|
13
|
+
# Unlike other converters, this one does not recurse on subexpressions,
|
14
|
+
# since many are unsupported by JavaScript. If it detects incompatible
|
15
|
+
# children, it uses the `character_set` gem to establish the codepoints
|
16
|
+
# matched by the whole set and build a completely new set string.
|
17
|
+
#
|
13
18
|
class SetConverter < JsRegex::Converter::Base
|
14
19
|
private
|
15
20
|
|
@@ -32,7 +37,7 @@ class JsRegex
|
|
32
37
|
else
|
33
38
|
warn_of_unsupported_feature('large astral plane match of set')
|
34
39
|
bmp_part = content.bmp_part
|
35
|
-
bmp_part.empty? ?
|
40
|
+
bmp_part.empty? ? drop : bmp_part.to_s(format: :js, in_brackets: true)
|
36
41
|
end
|
37
42
|
end
|
38
43
|
|
@@ -42,7 +47,7 @@ class JsRegex
|
|
42
47
|
return false
|
43
48
|
end
|
44
49
|
|
45
|
-
# check for
|
50
|
+
# check for children needing conversion (#each_expression is recursive)
|
46
51
|
expression.each_expression do |node|
|
47
52
|
case node.type
|
48
53
|
when :literal
|
data/lib/js_regex/converter.rb
CHANGED
@@ -12,27 +12,32 @@ class JsRegex
|
|
12
12
|
backref: BackreferenceConverter,
|
13
13
|
conditional: ConditionalConverter,
|
14
14
|
escape: EscapeConverter,
|
15
|
+
expression: SubexpressionConverter,
|
15
16
|
free_space: FreespaceConverter,
|
16
17
|
group: GroupConverter,
|
17
18
|
literal: LiteralConverter,
|
18
19
|
meta: MetaConverter,
|
19
|
-
nonproperty:
|
20
|
+
nonproperty: PropertyConverter,
|
20
21
|
property: PropertyConverter,
|
21
22
|
set: SetConverter,
|
22
23
|
type: TypeConverter
|
23
24
|
).freeze
|
24
25
|
|
25
|
-
def self.for(expression)
|
26
|
-
MAP[expression.type].new
|
27
|
-
end
|
28
|
-
|
29
|
-
# Limit the number of generated surrogate pairs, else the output might
|
30
|
-
# get to large for certain applications. The chosen number is somewhat
|
31
|
-
# arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
|
32
|
-
# count of all properties supported by Ruby is 92. 75% are below 300 chars.
|
33
|
-
#
|
34
|
-
# Set this to nil if you need full unicode matches and size doesn't matter.
|
35
26
|
class << self
|
27
|
+
def convert(exp, context = nil)
|
28
|
+
self.for(exp).convert(exp, context || Context.new)
|
29
|
+
end
|
30
|
+
|
31
|
+
def for(expression)
|
32
|
+
MAP[expression.type].new
|
33
|
+
end
|
34
|
+
|
35
|
+
# Limit the number of generated surrogate pairs, else the output might
|
36
|
+
# get to large for certain applications. The chosen number is somewhat
|
37
|
+
# arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
|
38
|
+
# count of all properties supported by Ruby is 92. 75% are below 300 chars.
|
39
|
+
#
|
40
|
+
# Set this to nil if you need full unicode matches and size doesn't matter.
|
36
41
|
attr_accessor :surrogate_pair_limit
|
37
42
|
end
|
38
43
|
self.surrogate_pair_limit = 300
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
#
|
5
|
+
# Converter#convert result. Represents a branch or leaf node with an optional
|
6
|
+
# quantifier as well as type and reference annotations for SecondPass.
|
7
|
+
#
|
8
|
+
class Node
|
9
|
+
attr_reader :children, :quantifier, :reference, :type
|
10
|
+
|
11
|
+
TYPES = %i[
|
12
|
+
backref_num
|
13
|
+
captured_group
|
14
|
+
conditional
|
15
|
+
dropped
|
16
|
+
plain
|
17
|
+
subexp_call
|
18
|
+
]
|
19
|
+
|
20
|
+
def initialize(*children, quantifier: nil, reference: nil, type: :plain)
|
21
|
+
raise ArgumentError, "bad type #{type}" unless TYPES.include?(type)
|
22
|
+
self.children = children
|
23
|
+
self.quantifier = quantifier
|
24
|
+
self.reference = reference
|
25
|
+
self.type = type
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize_copy(orig)
|
29
|
+
super
|
30
|
+
self.children = orig.children.map(&:clone)
|
31
|
+
self.quantifier = orig.quantifier && orig.quantifier.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def map(&block)
|
35
|
+
clone.tap { |node| node.children.replace(children.map(&block)) }
|
36
|
+
end
|
37
|
+
|
38
|
+
def <<(node)
|
39
|
+
children << node
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
def dropped?
|
44
|
+
# keep everything else, including empty or depleted capturing groups
|
45
|
+
# so as not to not mess with reference numbers (e.g. backrefs)
|
46
|
+
type.equal?(:dropped)
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s
|
50
|
+
case type
|
51
|
+
when :dropped
|
52
|
+
''
|
53
|
+
when :backref_num, :captured_group, :plain
|
54
|
+
children.join << quantifier.to_s
|
55
|
+
else
|
56
|
+
raise TypeError, "#{type} must be substituted before stringification"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def update(attrs)
|
61
|
+
self.children = attrs[:children] if attrs.key?(:children)
|
62
|
+
self.quantifier = attrs[:quantifier] if attrs.key?(:quantifier)
|
63
|
+
self.type = attrs[:type] if attrs.key?(:type)
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
attr_writer :children, :reference, :quantifier, :type
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
#
|
5
|
+
# After conversion of a full Regexp::Expression tree, this class
|
6
|
+
# checks for Node instances that need further processing.
|
7
|
+
#
|
8
|
+
# E.g. subexpression calls (such as \g<1>) can be look-ahead,
|
9
|
+
# so the full Regexp must have been processed first, and only then can
|
10
|
+
# they be substituted with the conversion result of their targeted group.
|
11
|
+
#
|
12
|
+
module SecondPass
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def call(tree)
|
16
|
+
substitute_subexp_calls(tree)
|
17
|
+
alternate_conditional_permutations(tree)
|
18
|
+
tree
|
19
|
+
end
|
20
|
+
|
21
|
+
def substitute_subexp_calls(tree)
|
22
|
+
crawl(tree) do |node|
|
23
|
+
if node.type == :subexp_call
|
24
|
+
called_group = find_group_by_reference(node.reference, in_node: tree)
|
25
|
+
node.update(children: called_group.children, type: :captured_group)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def crawl(node, &block)
|
31
|
+
return if node.instance_of?(String)
|
32
|
+
yield(node)
|
33
|
+
node.children.each { |child| crawl(child, &block) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def alternate_conditional_permutations(tree)
|
37
|
+
permutations = conditional_tree_permutations(tree)
|
38
|
+
return tree if permutations.empty?
|
39
|
+
|
40
|
+
alternatives = permutations.map.with_index do |variant, i|
|
41
|
+
Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
|
42
|
+
end
|
43
|
+
tree.update(children: alternatives)
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_group_by_reference(ref, in_node: nil)
|
47
|
+
crawl(in_node) do |node|
|
48
|
+
return node if node.type == :captured_group && node.reference == ref
|
49
|
+
end
|
50
|
+
Node.new('()')
|
51
|
+
end
|
52
|
+
|
53
|
+
def conditional_tree_permutations(tree)
|
54
|
+
all_conditions = conditions(tree)
|
55
|
+
return [] if all_conditions.empty?
|
56
|
+
|
57
|
+
captured_groups_per_branch = captured_group_count(tree)
|
58
|
+
|
59
|
+
condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
|
60
|
+
tree_permutation = tree.clone
|
61
|
+
# find referenced groups and conditionals and make one-sided
|
62
|
+
crawl(tree_permutation) do |node|
|
63
|
+
truthy = truthy_conds.include?(node.reference)
|
64
|
+
|
65
|
+
if node.type.equal?(:captured_group) &&
|
66
|
+
all_conditions.include?(node.reference)
|
67
|
+
truthy ? min_quantify(node) : null_quantify(node)
|
68
|
+
elsif node.type.equal?(:conditional)
|
69
|
+
branches = node.children[1...-1]
|
70
|
+
if branches.count == 1
|
71
|
+
truthy || null_quantify(branches.first)
|
72
|
+
else
|
73
|
+
null_quantify(truthy ? branches.last : branches.first)
|
74
|
+
end
|
75
|
+
node.update(type: :plain)
|
76
|
+
elsif node.type.equal?(:backref_num)
|
77
|
+
new_num = node.children[0].to_i + captured_groups_per_branch * i
|
78
|
+
node.update(children: [new_num.to_s])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def conditions(tree)
|
85
|
+
conditions = []
|
86
|
+
crawl(tree) do |node|
|
87
|
+
conditions << node.reference if node.type.equal?(:conditional)
|
88
|
+
end
|
89
|
+
conditions
|
90
|
+
end
|
91
|
+
|
92
|
+
def captured_group_count(tree)
|
93
|
+
count = 0
|
94
|
+
crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group)}
|
95
|
+
count
|
96
|
+
end
|
97
|
+
|
98
|
+
def condition_permutations(conditions)
|
99
|
+
return [] if conditions.empty?
|
100
|
+
|
101
|
+
condition_permutations = (0..(conditions.length)).inject([]) do |arr, n|
|
102
|
+
arr += conditions.combination(n).to_a
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def min_quantify(node)
|
107
|
+
return if (qtf = node.quantifier).nil? || qtf.min > 0
|
108
|
+
|
109
|
+
if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
|
110
|
+
node.update(quantifier: nil)
|
111
|
+
else
|
112
|
+
node.update(quantifier: "{1,#{qtf.max}}#{'?' if qtf.reluctant?}")
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def null_quantify(node)
|
117
|
+
node.update(quantifier: '{0}')
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
33
|
+
version: '1.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
40
|
+
version: '1.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: regexp_property_values
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,12 +144,13 @@ files:
|
|
144
144
|
- lib/js_regex/converter/group_converter.rb
|
145
145
|
- lib/js_regex/converter/literal_converter.rb
|
146
146
|
- lib/js_regex/converter/meta_converter.rb
|
147
|
-
- lib/js_regex/converter/nonproperty_converter.rb
|
148
147
|
- lib/js_regex/converter/property_converter.rb
|
149
|
-
- lib/js_regex/converter/root_converter.rb
|
150
148
|
- lib/js_regex/converter/set_converter.rb
|
149
|
+
- lib/js_regex/converter/subexpression_converter.rb
|
151
150
|
- lib/js_regex/converter/type_converter.rb
|
152
151
|
- lib/js_regex/converter/unsupported_token_converter.rb
|
152
|
+
- lib/js_regex/node.rb
|
153
|
+
- lib/js_regex/second_pass.rb
|
153
154
|
- lib/js_regex/version.rb
|
154
155
|
homepage: https://github.com/janosch-x/js_regex
|
155
156
|
licenses:
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
require_relative 'property_converter'
|
5
|
-
|
6
|
-
class JsRegex
|
7
|
-
module Converter
|
8
|
-
#
|
9
|
-
# Template class implementation.
|
10
|
-
#
|
11
|
-
# Note the inheritance from PropertyConverter.
|
12
|
-
#
|
13
|
-
class NonpropertyConverter < JsRegex::Converter::PropertyConverter
|
14
|
-
private
|
15
|
-
|
16
|
-
def convert_data
|
17
|
-
convert_property(true)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|