js_regex 3.0.0 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +7 -5
- data/lib/js_regex/converter/backreference_converter.rb +22 -28
- data/lib/js_regex/converter/base.rb +29 -18
- data/lib/js_regex/converter/conditional_converter.rb +13 -5
- data/lib/js_regex/converter/context.rb +6 -12
- data/lib/js_regex/converter/escape_converter.rb +3 -3
- data/lib/js_regex/converter/group_converter.rb +20 -6
- data/lib/js_regex/converter/literal_converter.rb +19 -6
- data/lib/js_regex/converter/meta_converter.rb +9 -6
- data/lib/js_regex/converter/property_converter.rb +6 -7
- data/lib/js_regex/converter/set_converter.rb +7 -2
- data/lib/js_regex/converter/{root_converter.rb → subexpression_converter.rb} +1 -1
- data/lib/js_regex/converter/type_converter.rb +1 -1
- data/lib/js_regex/converter.rb +16 -11
- data/lib/js_regex/node.rb +70 -0
- data/lib/js_regex/second_pass.rb +120 -0
- data/lib/js_regex/version.rb +1 -1
- metadata +7 -6
- data/lib/js_regex/converter/nonproperty_converter.rb +0 -21
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7219e7e794aa7b4df64655f362d448122fd07f901686ead1706626e925919976
|
4
|
+
data.tar.gz: 97924f576ecabd32288c9a4827edf5538b21d807813afb8c7ee2cfc639136b2d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0de5c05e55115fa6bc400551e6cd6eeaf9370b65bcf64262bebb6aaee7612ed4d0e6d67c10eeae794ae28af83839e1765f4cbc2b5ab0274e117ff8c20a4b30be
|
7
|
+
data.tar.gz: b71ef3b13a3969b65c332b1ecb42e164fa6acf7e317872eda254a485647378870749c74f04486d520d8b854ac39c5d9a1e90d6f84f898ebbda8ffd4f7bb339df
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -9,6 +9,8 @@ class JsRegex
|
|
9
9
|
class Conversion
|
10
10
|
require 'regexp_parser'
|
11
11
|
require_relative 'converter'
|
12
|
+
require_relative 'node'
|
13
|
+
require_relative 'second_pass'
|
12
14
|
|
13
15
|
class << self
|
14
16
|
def of(ruby_regex, options: nil)
|
@@ -20,11 +22,11 @@ class JsRegex
|
|
20
22
|
private
|
21
23
|
|
22
24
|
def convert_source(ruby_regex)
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
]
|
25
|
+
tree = Regexp::Parser.parse(ruby_regex)
|
26
|
+
context = Converter::Context.new(case_insensitive_root: tree.i?)
|
27
|
+
converted_tree = Converter.convert(tree, context)
|
28
|
+
final_tree = SecondPass.call(converted_tree)
|
29
|
+
[final_tree.to_s, context.warnings]
|
28
30
|
end
|
29
31
|
|
30
32
|
def convert_options(ruby_regex, custom_options)
|
@@ -15,60 +15,54 @@ class JsRegex
|
|
15
15
|
when :name_ref then convert_name_ref
|
16
16
|
when :number, :number_ref then convert_number_ref
|
17
17
|
when :number_rel_ref then convert_number_rel_ref
|
18
|
-
when :name_call then
|
19
|
-
when :number_call then
|
20
|
-
when :number_rel_call then
|
18
|
+
when :name_call then mark_name_call
|
19
|
+
when :number_call then mark_number_call
|
20
|
+
when :number_rel_call then mark_number_rel_call
|
21
21
|
else # name_recursion_ref, number_recursion_ref, ...
|
22
22
|
warn_of_unsupported_feature
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
26
|
def convert_name_ref
|
27
|
-
|
27
|
+
convert_ref(context.named_group_positions.fetch(expression.name))
|
28
28
|
end
|
29
29
|
|
30
30
|
def convert_number_ref
|
31
|
-
|
31
|
+
convert_ref(context.new_capturing_group_position(expression.number))
|
32
32
|
end
|
33
33
|
|
34
34
|
def convert_number_rel_ref
|
35
|
-
|
35
|
+
convert_ref(context.new_capturing_group_position(absolute_position))
|
36
|
+
end
|
37
|
+
|
38
|
+
def convert_ref(position)
|
39
|
+
Node.new('\\', Node.new(position.to_s, type: :backref_num))
|
36
40
|
end
|
37
41
|
|
38
42
|
def absolute_position
|
39
43
|
expression.number + context.original_capturing_group_count + 1
|
40
44
|
end
|
41
45
|
|
42
|
-
def
|
43
|
-
|
44
|
-
group.token == :named && group.name == expression.name
|
45
|
-
end
|
46
|
+
def mark_name_call
|
47
|
+
mark_call(expression.name)
|
46
48
|
end
|
47
49
|
|
48
|
-
def
|
49
|
-
if expression.number
|
50
|
+
def mark_number_call
|
51
|
+
if expression.number.equal?(0)
|
50
52
|
return warn_of_unsupported_feature('whole-pattern recursion')
|
51
53
|
end
|
52
|
-
|
53
|
-
[:capture, :options].include?(group.token) &&
|
54
|
-
group.number.equal?(expression.number)
|
55
|
-
end
|
54
|
+
mark_call(expression.number)
|
56
55
|
end
|
57
56
|
|
58
|
-
def
|
59
|
-
|
60
|
-
|
61
|
-
group.number.equal?(absolute_position)
|
62
|
-
end
|
57
|
+
def mark_number_rel_call
|
58
|
+
is_forward_referring = data.include?('+') # e.g. \g<+2>
|
59
|
+
mark_call(absolute_position - (is_forward_referring ? 1 : 0))
|
63
60
|
end
|
64
61
|
|
65
|
-
def
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
end
|
70
|
-
end
|
71
|
-
''
|
62
|
+
def mark_call(reference)
|
63
|
+
# increment group count as calls will be substituted with groups
|
64
|
+
context.increment_local_capturing_group_count
|
65
|
+
Node.new(reference: reference, type: :subexp_call)
|
72
66
|
end
|
73
67
|
end
|
74
68
|
end
|
@@ -3,15 +3,18 @@
|
|
3
3
|
class JsRegex
|
4
4
|
module Converter
|
5
5
|
#
|
6
|
-
# Template class. Implement #convert_data in subclasses
|
6
|
+
# Template class. Implement #convert_data in subclasses and return
|
7
|
+
# instance of String or Node from it.
|
7
8
|
#
|
8
9
|
class Base
|
10
|
+
# returns instance of Node with #quantifier attached.
|
9
11
|
def convert(expression, context)
|
10
12
|
self.context = context
|
11
13
|
self.expression = expression
|
12
14
|
|
13
|
-
|
14
|
-
|
15
|
+
node = convert_data
|
16
|
+
node = Node.new(node) if node.instance_of?(String)
|
17
|
+
apply_quantifier(node)
|
15
18
|
end
|
16
19
|
|
17
20
|
private
|
@@ -27,41 +30,49 @@ class JsRegex
|
|
27
30
|
end
|
28
31
|
alias pass_through data
|
29
32
|
|
30
|
-
def apply_quantifier(
|
31
|
-
return
|
33
|
+
def apply_quantifier(node)
|
34
|
+
return node if node.dropped? || (qtf = expression.quantifier).nil?
|
32
35
|
|
33
|
-
if
|
34
|
-
|
36
|
+
if qtf.possessive?
|
37
|
+
node.update(quantifier: qtf.text[0..-2])
|
38
|
+
return wrap_in_backrefed_lookahead([node])
|
35
39
|
else
|
36
|
-
|
40
|
+
node.update(quantifier: qtf)
|
37
41
|
end
|
38
|
-
end
|
39
42
|
|
40
|
-
|
41
|
-
convert_expressions(subexpressions)
|
43
|
+
node
|
42
44
|
end
|
43
45
|
|
44
|
-
def
|
45
|
-
expressions.map { |exp|
|
46
|
+
def convert_subexpressions
|
47
|
+
Node.new(*expression.expressions.map { |exp| convert_expression(exp) })
|
46
48
|
end
|
47
49
|
|
48
|
-
def
|
49
|
-
expression
|
50
|
+
def convert_expression(expression)
|
51
|
+
Converter.convert(expression, context)
|
50
52
|
end
|
51
53
|
|
52
54
|
def warn_of_unsupported_feature(description = nil)
|
53
55
|
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
54
56
|
full_desc = "#{description} '#{expression}'"
|
55
57
|
warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
|
56
|
-
|
58
|
+
drop
|
57
59
|
end
|
58
60
|
|
59
61
|
def warn(text)
|
60
62
|
context.warnings << text
|
61
63
|
end
|
62
64
|
|
63
|
-
def
|
64
|
-
|
65
|
+
def drop
|
66
|
+
Node.new(type: :dropped)
|
67
|
+
end
|
68
|
+
alias drop_without_warning drop
|
69
|
+
|
70
|
+
def wrap_in_backrefed_lookahead(content)
|
71
|
+
backref_num = context.capturing_group_count + 1
|
72
|
+
backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
|
73
|
+
context.increment_local_capturing_group_count
|
74
|
+
# an empty passive group (?:) is appended as literal digits may follow
|
75
|
+
Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
|
65
76
|
end
|
66
77
|
end
|
67
78
|
end
|
@@ -11,12 +11,20 @@ class JsRegex
|
|
11
11
|
private
|
12
12
|
|
13
13
|
def convert_data
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
14
|
+
case subtype
|
15
|
+
when :open then mark_conditional
|
16
|
+
when :condition then drop_without_warning
|
17
|
+
else warn_of_unsupported_feature
|
18
18
|
end
|
19
|
-
|
19
|
+
end
|
20
|
+
|
21
|
+
def mark_conditional
|
22
|
+
reference = expression.reference
|
23
|
+
node = Node.new('(?:', reference: reference, type: :conditional)
|
24
|
+
expression.branches.each do |branch|
|
25
|
+
node << Node.new('(?:', convert_expression(branch), ')')
|
26
|
+
end
|
27
|
+
node << ')'
|
20
28
|
end
|
21
29
|
end
|
22
30
|
end
|
@@ -8,20 +8,19 @@ class JsRegex
|
|
8
8
|
# The Converters themselves are stateless.
|
9
9
|
#
|
10
10
|
class Context
|
11
|
-
attr_reader :
|
11
|
+
attr_reader :capturing_group_count,
|
12
12
|
:case_insensitive_root,
|
13
13
|
:in_atomic_group,
|
14
14
|
:named_group_positions,
|
15
15
|
:warnings
|
16
16
|
|
17
|
-
def initialize(
|
17
|
+
def initialize(case_insensitive_root: false)
|
18
18
|
self.added_capturing_groups_after_group = Hash.new(0)
|
19
19
|
self.capturing_group_count = 0
|
20
20
|
self.named_group_positions = {}
|
21
21
|
self.warnings = []
|
22
22
|
|
23
|
-
self.
|
24
|
-
self.case_insensitive_root = ast.case_insensitive?
|
23
|
+
self.case_insensitive_root = case_insensitive_root
|
25
24
|
end
|
26
25
|
|
27
26
|
# group context
|
@@ -38,13 +37,9 @@ class JsRegex
|
|
38
37
|
self.in_atomic_group = false
|
39
38
|
end
|
40
39
|
|
41
|
-
def
|
42
|
-
new_backref_num = capturing_group_count + 1
|
43
|
-
# an empty passive group (?:) is appended as literal digits may follow
|
44
|
-
result = "(?=(#{content}))\\#{new_backref_num}(?:)"
|
40
|
+
def increment_local_capturing_group_count
|
45
41
|
added_capturing_groups_after_group[original_capturing_group_count] += 1
|
46
42
|
capture_group
|
47
|
-
result
|
48
43
|
end
|
49
44
|
|
50
45
|
# takes and returns 1-indexed group positions.
|
@@ -67,10 +62,9 @@ class JsRegex
|
|
67
62
|
|
68
63
|
private
|
69
64
|
|
70
|
-
attr_accessor :added_capturing_groups_after_group
|
71
|
-
:capturing_group_count
|
65
|
+
attr_accessor :added_capturing_groups_after_group
|
72
66
|
|
73
|
-
attr_writer :
|
67
|
+
attr_writer :capturing_group_count,
|
74
68
|
:case_insensitive_root,
|
75
69
|
:in_atomic_group,
|
76
70
|
:named_group_positions,
|
@@ -55,9 +55,9 @@ class JsRegex
|
|
55
55
|
end
|
56
56
|
|
57
57
|
def convert_codepoint_list
|
58
|
-
expression.chars.
|
59
|
-
LiteralConverter.convert_data(Regexp.escape(char))
|
60
|
-
end
|
58
|
+
expression.chars.each_with_object(Node.new) do |char, node|
|
59
|
+
node << LiteralConverter.convert_data(Regexp.escape(char))
|
60
|
+
end
|
61
61
|
end
|
62
62
|
|
63
63
|
def unicode_escape_codepoint
|
@@ -28,7 +28,7 @@ class JsRegex
|
|
28
28
|
build_unsupported_group('nested atomic group')
|
29
29
|
else
|
30
30
|
context.start_atomic_group
|
31
|
-
result =
|
31
|
+
result = wrap_in_backrefed_lookahead(convert_subexpressions)
|
32
32
|
context.end_atomic_group
|
33
33
|
result
|
34
34
|
end
|
@@ -37,15 +37,19 @@ class JsRegex
|
|
37
37
|
def build_named_group
|
38
38
|
# remember position, then drop name part without warning
|
39
39
|
context.store_named_group_position(expression.name)
|
40
|
-
build_group(head: '(')
|
40
|
+
build_group(head: '(', reference: expression.name)
|
41
41
|
end
|
42
42
|
|
43
43
|
def build_options_group
|
44
44
|
unless (encoding_options = data.scan(/[adu]/)).empty?
|
45
45
|
warn_of_unsupported_feature("encoding options #{encoding_options}")
|
46
46
|
end
|
47
|
-
|
48
|
-
|
47
|
+
if subtype.equal?(:options_switch)
|
48
|
+
# can be ignored since #options on subsequent Expressions are correct
|
49
|
+
drop_without_warning
|
50
|
+
else
|
51
|
+
build_passive_group
|
52
|
+
end
|
49
53
|
end
|
50
54
|
|
51
55
|
def build_passive_group
|
@@ -58,9 +62,19 @@ class JsRegex
|
|
58
62
|
end
|
59
63
|
|
60
64
|
def build_group(opts = {})
|
61
|
-
context.capture_group unless opts[:capturing].equal?(false)
|
62
65
|
head = opts[:head] || pass_through
|
63
|
-
|
66
|
+
if opts[:capturing].equal?(false)
|
67
|
+
return Node.new(*group_with_head(head))
|
68
|
+
end
|
69
|
+
|
70
|
+
context.capture_group
|
71
|
+
|
72
|
+
ref = opts[:reference] || expression.number
|
73
|
+
Node.new(*group_with_head(head), reference: ref, type: :captured_group)
|
74
|
+
end
|
75
|
+
|
76
|
+
def group_with_head(head)
|
77
|
+
[head, *convert_subexpressions, ')']
|
64
78
|
end
|
65
79
|
end
|
66
80
|
end
|
@@ -9,18 +9,28 @@ class JsRegex
|
|
9
9
|
#
|
10
10
|
class LiteralConverter < JsRegex::Converter::Base
|
11
11
|
class << self
|
12
|
-
ASTRAL_PLANE_CODEPOINT_PATTERN =
|
12
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
|
13
13
|
|
14
14
|
def convert_data(data)
|
15
15
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
16
|
-
|
16
|
+
data.each_char.each_with_object(Node.new) do |chr, node|
|
17
|
+
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
18
|
+
node << surrogate_pair_for(chr)
|
19
|
+
else
|
20
|
+
node << convert_bmp_data(chr)
|
21
|
+
end
|
22
|
+
end
|
17
23
|
else
|
18
|
-
|
19
|
-
ensure_forward_slashes_are_escaped(data)
|
20
|
-
)
|
24
|
+
convert_bmp_data(data)
|
21
25
|
end
|
22
26
|
end
|
23
27
|
|
28
|
+
def convert_bmp_data(data)
|
29
|
+
ensure_json_compatibility(
|
30
|
+
ensure_forward_slashes_are_escaped(data)
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
24
34
|
private
|
25
35
|
|
26
36
|
def surrogate_pair_for(astral_char)
|
@@ -56,7 +66,10 @@ class JsRegex
|
|
56
66
|
|
57
67
|
def handle_locally_case_insensitive_literal(literal)
|
58
68
|
return literal unless literal =~ HAS_CASE_PATTERN
|
59
|
-
|
69
|
+
|
70
|
+
literal.each_char.each_with_object(Node.new) do |chr, node|
|
71
|
+
node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr)
|
72
|
+
end
|
60
73
|
end
|
61
74
|
end
|
62
75
|
end
|
@@ -13,7 +13,7 @@ class JsRegex
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
15
|
when :alternation
|
16
|
-
|
16
|
+
convert_alternatives
|
17
17
|
when :dot
|
18
18
|
expression.multiline? ? '(?:.|\n)' : '.'
|
19
19
|
else
|
@@ -21,12 +21,15 @@ class JsRegex
|
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
def convert_alternatives
|
25
|
+
kept_any = false
|
26
|
+
|
27
|
+
convert_subexpressions.map do |node|
|
28
|
+
dropped = !node.children.empty? && node.children.all?(&:dropped?)
|
29
|
+
node.children.unshift('|') if kept_any.equal?(true) && !dropped
|
30
|
+
kept_any = true unless dropped
|
31
|
+
node
|
28
32
|
end
|
29
|
-
branches.join('|')
|
30
33
|
end
|
31
34
|
end
|
32
35
|
end
|
@@ -8,20 +8,19 @@ class JsRegex
|
|
8
8
|
#
|
9
9
|
# Template class implementation.
|
10
10
|
#
|
11
|
+
# Uses the `character_set` and `regexp_property_values` gems to get the
|
12
|
+
# codepoints matched by the property and build a set string from them.
|
13
|
+
#
|
11
14
|
class PropertyConverter < JsRegex::Converter::Base
|
12
15
|
private
|
13
16
|
|
14
17
|
def convert_data
|
15
|
-
convert_property
|
16
|
-
end
|
17
|
-
|
18
|
-
def convert_property(negated = nil)
|
19
18
|
content = CharacterSet.of_property(subtype)
|
20
19
|
if expression.case_insensitive? && !context.case_insensitive_root
|
21
20
|
content = content.case_insensitive
|
22
21
|
end
|
23
22
|
|
24
|
-
if
|
23
|
+
if expression.negative?
|
25
24
|
if content.astral_part.empty?
|
26
25
|
return "[^#{content.to_s(format: :js)}]"
|
27
26
|
else
|
@@ -35,10 +34,10 @@ class JsRegex
|
|
35
34
|
end
|
36
35
|
|
37
36
|
bmp_part = content.bmp_part
|
38
|
-
return
|
37
|
+
return drop if bmp_part.empty?
|
39
38
|
|
40
39
|
string = bmp_part.to_s(format: :js)
|
41
|
-
|
40
|
+
expression.negative? ? "[^#{string}]" : "[#{string}]"
|
42
41
|
end
|
43
42
|
end
|
44
43
|
end
|
@@ -10,6 +10,11 @@ class JsRegex
|
|
10
10
|
#
|
11
11
|
# Template class implementation.
|
12
12
|
#
|
13
|
+
# Unlike other converters, this one does not recurse on subexpressions,
|
14
|
+
# since many are unsupported by JavaScript. If it detects incompatible
|
15
|
+
# children, it uses the `character_set` gem to establish the codepoints
|
16
|
+
# matched by the whole set and build a completely new set string.
|
17
|
+
#
|
13
18
|
class SetConverter < JsRegex::Converter::Base
|
14
19
|
private
|
15
20
|
|
@@ -32,7 +37,7 @@ class JsRegex
|
|
32
37
|
else
|
33
38
|
warn_of_unsupported_feature('large astral plane match of set')
|
34
39
|
bmp_part = content.bmp_part
|
35
|
-
bmp_part.empty? ?
|
40
|
+
bmp_part.empty? ? drop : bmp_part.to_s(format: :js, in_brackets: true)
|
36
41
|
end
|
37
42
|
end
|
38
43
|
|
@@ -42,7 +47,7 @@ class JsRegex
|
|
42
47
|
return false
|
43
48
|
end
|
44
49
|
|
45
|
-
# check for
|
50
|
+
# check for children needing conversion (#each_expression is recursive)
|
46
51
|
expression.each_expression do |node|
|
47
52
|
case node.type
|
48
53
|
when :literal
|
data/lib/js_regex/converter.rb
CHANGED
@@ -12,27 +12,32 @@ class JsRegex
|
|
12
12
|
backref: BackreferenceConverter,
|
13
13
|
conditional: ConditionalConverter,
|
14
14
|
escape: EscapeConverter,
|
15
|
+
expression: SubexpressionConverter,
|
15
16
|
free_space: FreespaceConverter,
|
16
17
|
group: GroupConverter,
|
17
18
|
literal: LiteralConverter,
|
18
19
|
meta: MetaConverter,
|
19
|
-
nonproperty:
|
20
|
+
nonproperty: PropertyConverter,
|
20
21
|
property: PropertyConverter,
|
21
22
|
set: SetConverter,
|
22
23
|
type: TypeConverter
|
23
24
|
).freeze
|
24
25
|
|
25
|
-
def self.for(expression)
|
26
|
-
MAP[expression.type].new
|
27
|
-
end
|
28
|
-
|
29
|
-
# Limit the number of generated surrogate pairs, else the output might
|
30
|
-
# get to large for certain applications. The chosen number is somewhat
|
31
|
-
# arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
|
32
|
-
# count of all properties supported by Ruby is 92. 75% are below 300 chars.
|
33
|
-
#
|
34
|
-
# Set this to nil if you need full unicode matches and size doesn't matter.
|
35
26
|
class << self
|
27
|
+
def convert(exp, context = nil)
|
28
|
+
self.for(exp).convert(exp, context || Context.new)
|
29
|
+
end
|
30
|
+
|
31
|
+
def for(expression)
|
32
|
+
MAP[expression.type].new
|
33
|
+
end
|
34
|
+
|
35
|
+
# Limit the number of generated surrogate pairs, else the output might
|
36
|
+
# get to large for certain applications. The chosen number is somewhat
|
37
|
+
# arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
|
38
|
+
# count of all properties supported by Ruby is 92. 75% are below 300 chars.
|
39
|
+
#
|
40
|
+
# Set this to nil if you need full unicode matches and size doesn't matter.
|
36
41
|
attr_accessor :surrogate_pair_limit
|
37
42
|
end
|
38
43
|
self.surrogate_pair_limit = 300
|
@@ -0,0 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
#
|
5
|
+
# Converter#convert result. Represents a branch or leaf node with an optional
|
6
|
+
# quantifier as well as type and reference annotations for SecondPass.
|
7
|
+
#
|
8
|
+
class Node
|
9
|
+
attr_reader :children, :quantifier, :reference, :type
|
10
|
+
|
11
|
+
TYPES = %i[
|
12
|
+
backref_num
|
13
|
+
captured_group
|
14
|
+
conditional
|
15
|
+
dropped
|
16
|
+
plain
|
17
|
+
subexp_call
|
18
|
+
]
|
19
|
+
|
20
|
+
def initialize(*children, quantifier: nil, reference: nil, type: :plain)
|
21
|
+
raise ArgumentError, "bad type #{type}" unless TYPES.include?(type)
|
22
|
+
self.children = children
|
23
|
+
self.quantifier = quantifier
|
24
|
+
self.reference = reference
|
25
|
+
self.type = type
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize_copy(orig)
|
29
|
+
super
|
30
|
+
self.children = orig.children.map(&:clone)
|
31
|
+
self.quantifier = orig.quantifier && orig.quantifier.clone
|
32
|
+
end
|
33
|
+
|
34
|
+
def map(&block)
|
35
|
+
clone.tap { |node| node.children.replace(children.map(&block)) }
|
36
|
+
end
|
37
|
+
|
38
|
+
def <<(node)
|
39
|
+
children << node
|
40
|
+
self
|
41
|
+
end
|
42
|
+
|
43
|
+
def dropped?
|
44
|
+
# keep everything else, including empty or depleted capturing groups
|
45
|
+
# so as not to not mess with reference numbers (e.g. backrefs)
|
46
|
+
type.equal?(:dropped)
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_s
|
50
|
+
case type
|
51
|
+
when :dropped
|
52
|
+
''
|
53
|
+
when :backref_num, :captured_group, :plain
|
54
|
+
children.join << quantifier.to_s
|
55
|
+
else
|
56
|
+
raise TypeError, "#{type} must be substituted before stringification"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
def update(attrs)
|
61
|
+
self.children = attrs[:children] if attrs.key?(:children)
|
62
|
+
self.quantifier = attrs[:quantifier] if attrs.key?(:quantifier)
|
63
|
+
self.type = attrs[:type] if attrs.key?(:type)
|
64
|
+
end
|
65
|
+
|
66
|
+
private
|
67
|
+
|
68
|
+
attr_writer :children, :reference, :quantifier, :type
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
#
|
5
|
+
# After conversion of a full Regexp::Expression tree, this class
|
6
|
+
# checks for Node instances that need further processing.
|
7
|
+
#
|
8
|
+
# E.g. subexpression calls (such as \g<1>) can be look-ahead,
|
9
|
+
# so the full Regexp must have been processed first, and only then can
|
10
|
+
# they be substituted with the conversion result of their targeted group.
|
11
|
+
#
|
12
|
+
module SecondPass
|
13
|
+
module_function
|
14
|
+
|
15
|
+
def call(tree)
|
16
|
+
substitute_subexp_calls(tree)
|
17
|
+
alternate_conditional_permutations(tree)
|
18
|
+
tree
|
19
|
+
end
|
20
|
+
|
21
|
+
def substitute_subexp_calls(tree)
|
22
|
+
crawl(tree) do |node|
|
23
|
+
if node.type == :subexp_call
|
24
|
+
called_group = find_group_by_reference(node.reference, in_node: tree)
|
25
|
+
node.update(children: called_group.children, type: :captured_group)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def crawl(node, &block)
|
31
|
+
return if node.instance_of?(String)
|
32
|
+
yield(node)
|
33
|
+
node.children.each { |child| crawl(child, &block) }
|
34
|
+
end
|
35
|
+
|
36
|
+
def alternate_conditional_permutations(tree)
|
37
|
+
permutations = conditional_tree_permutations(tree)
|
38
|
+
return tree if permutations.empty?
|
39
|
+
|
40
|
+
alternatives = permutations.map.with_index do |variant, i|
|
41
|
+
Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
|
42
|
+
end
|
43
|
+
tree.update(children: alternatives)
|
44
|
+
end
|
45
|
+
|
46
|
+
def find_group_by_reference(ref, in_node: nil)
|
47
|
+
crawl(in_node) do |node|
|
48
|
+
return node if node.type == :captured_group && node.reference == ref
|
49
|
+
end
|
50
|
+
Node.new('()')
|
51
|
+
end
|
52
|
+
|
53
|
+
def conditional_tree_permutations(tree)
|
54
|
+
all_conditions = conditions(tree)
|
55
|
+
return [] if all_conditions.empty?
|
56
|
+
|
57
|
+
captured_groups_per_branch = captured_group_count(tree)
|
58
|
+
|
59
|
+
condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
|
60
|
+
tree_permutation = tree.clone
|
61
|
+
# find referenced groups and conditionals and make one-sided
|
62
|
+
crawl(tree_permutation) do |node|
|
63
|
+
truthy = truthy_conds.include?(node.reference)
|
64
|
+
|
65
|
+
if node.type.equal?(:captured_group) &&
|
66
|
+
all_conditions.include?(node.reference)
|
67
|
+
truthy ? min_quantify(node) : null_quantify(node)
|
68
|
+
elsif node.type.equal?(:conditional)
|
69
|
+
branches = node.children[1...-1]
|
70
|
+
if branches.count == 1
|
71
|
+
truthy || null_quantify(branches.first)
|
72
|
+
else
|
73
|
+
null_quantify(truthy ? branches.last : branches.first)
|
74
|
+
end
|
75
|
+
node.update(type: :plain)
|
76
|
+
elsif node.type.equal?(:backref_num)
|
77
|
+
new_num = node.children[0].to_i + captured_groups_per_branch * i
|
78
|
+
node.update(children: [new_num.to_s])
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def conditions(tree)
|
85
|
+
conditions = []
|
86
|
+
crawl(tree) do |node|
|
87
|
+
conditions << node.reference if node.type.equal?(:conditional)
|
88
|
+
end
|
89
|
+
conditions
|
90
|
+
end
|
91
|
+
|
92
|
+
def captured_group_count(tree)
|
93
|
+
count = 0
|
94
|
+
crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group)}
|
95
|
+
count
|
96
|
+
end
|
97
|
+
|
98
|
+
def condition_permutations(conditions)
|
99
|
+
return [] if conditions.empty?
|
100
|
+
|
101
|
+
condition_permutations = (0..(conditions.length)).inject([]) do |arr, n|
|
102
|
+
arr += conditions.combination(n).to_a
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def min_quantify(node)
|
107
|
+
return if (qtf = node.quantifier).nil? || qtf.min > 0
|
108
|
+
|
109
|
+
if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
|
110
|
+
node.update(quantifier: nil)
|
111
|
+
else
|
112
|
+
node.update(quantifier: "{1,#{qtf.max}}#{'?' if qtf.reluctant?}")
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def null_quantify(node)
|
117
|
+
node.update(quantifier: '{0}')
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-09-
|
11
|
+
date: 2018-09-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -30,14 +30,14 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: '1.
|
33
|
+
version: '1.1'
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: '1.
|
40
|
+
version: '1.1'
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: regexp_property_values
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -144,12 +144,13 @@ files:
|
|
144
144
|
- lib/js_regex/converter/group_converter.rb
|
145
145
|
- lib/js_regex/converter/literal_converter.rb
|
146
146
|
- lib/js_regex/converter/meta_converter.rb
|
147
|
-
- lib/js_regex/converter/nonproperty_converter.rb
|
148
147
|
- lib/js_regex/converter/property_converter.rb
|
149
|
-
- lib/js_regex/converter/root_converter.rb
|
150
148
|
- lib/js_regex/converter/set_converter.rb
|
149
|
+
- lib/js_regex/converter/subexpression_converter.rb
|
151
150
|
- lib/js_regex/converter/type_converter.rb
|
152
151
|
- lib/js_regex/converter/unsupported_token_converter.rb
|
152
|
+
- lib/js_regex/node.rb
|
153
|
+
- lib/js_regex/second_pass.rb
|
153
154
|
- lib/js_regex/version.rb
|
154
155
|
homepage: https://github.com/janosch-x/js_regex
|
155
156
|
licenses:
|
@@ -1,21 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
require_relative 'property_converter'
|
5
|
-
|
6
|
-
class JsRegex
|
7
|
-
module Converter
|
8
|
-
#
|
9
|
-
# Template class implementation.
|
10
|
-
#
|
11
|
-
# Note the inheritance from PropertyConverter.
|
12
|
-
#
|
13
|
-
class NonpropertyConverter < JsRegex::Converter::PropertyConverter
|
14
|
-
private
|
15
|
-
|
16
|
-
def convert_data
|
17
|
-
convert_property(true)
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|