js_regex 3.2.0 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex/converter/group_converter.rb +0 -3
- data/lib/js_regex/converter/literal_converter.rb +11 -7
- data/lib/js_regex/converter/meta_converter.rb +9 -4
- data/lib/js_regex/converter/property_converter.rb +14 -4
- data/lib/js_regex/converter/set_converter.rb +36 -26
- data/lib/js_regex/converter/type_converter.rb +29 -11
- data/lib/js_regex/second_pass.rb +42 -22
- data/lib/js_regex/version.rb +1 -1
- metadata +4 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f418197d1c8fea37bd549b701049f1130bfca486c5a11ba25e56d7d0cf990f8
|
4
|
+
data.tar.gz: 8679277fb51aca528933f4727cd0e70f1e1204c78acaf4e20da48d6b790ea7be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddeadbe690e2928c068d8c2962d80e88c50f5fef46304e46fa98e8dc215c6c57c7790e9ef7ba8b1f640e71c3ca834b7233449c9b7d68466ac76581de73c7ffe3
|
7
|
+
data.tar.gz: 9c10d4e6a529ec54385ed55b34e4a910d354d7665f0831bc65b65bc20f176f4e46f9aa90cff2b36ea8f1306dfa65f7e42f4a93516ab228f206e74db7d708b779
|
@@ -34,9 +34,6 @@ class JsRegex
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def build_options_group
|
37
|
-
unless (encoding_options = data.scan(/[adu]/)).empty?
|
38
|
-
warn_of_unsupported_feature("encoding options #{encoding_options}")
|
39
|
-
end
|
40
37
|
if subtype.equal?(:options_switch)
|
41
38
|
# can be ignored since #options on subsequent Expressions are correct
|
42
39
|
drop_without_warning
|
@@ -13,18 +13,22 @@ class JsRegex
|
|
13
13
|
|
14
14
|
def convert_data(data)
|
15
15
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
16
|
-
data
|
17
|
-
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
18
|
-
node << surrogate_pair_for(chr)
|
19
|
-
else
|
20
|
-
node << convert_bmp_data(chr)
|
21
|
-
end
|
22
|
-
end
|
16
|
+
convert_astral_data(data)
|
23
17
|
else
|
24
18
|
convert_bmp_data(data)
|
25
19
|
end
|
26
20
|
end
|
27
21
|
|
22
|
+
def convert_astral_data(data)
|
23
|
+
data.each_char.each_with_object(Node.new) do |chr, node|
|
24
|
+
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
25
|
+
node << surrogate_pair_for(chr)
|
26
|
+
else
|
27
|
+
node << convert_bmp_data(chr)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
28
32
|
def convert_bmp_data(data)
|
29
33
|
ensure_json_compatibility(
|
30
34
|
ensure_forward_slashes_are_escaped(data)
|
@@ -22,15 +22,20 @@ class JsRegex
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def convert_alternatives
|
25
|
-
|
25
|
+
kept_any_previous_branch = nil
|
26
26
|
|
27
27
|
convert_subexpressions.transform do |node|
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
unless dropped_branch?(node)
|
29
|
+
node.children.unshift('|') if kept_any_previous_branch
|
30
|
+
kept_any_previous_branch = true
|
31
|
+
end
|
31
32
|
node
|
32
33
|
end
|
33
34
|
end
|
35
|
+
|
36
|
+
def dropped_branch?(branch_node)
|
37
|
+
branch_node.children.any? && branch_node.children.all?(&:dropped?)
|
38
|
+
end
|
34
39
|
end
|
35
40
|
end
|
36
41
|
end
|
@@ -15,10 +15,7 @@ class JsRegex
|
|
15
15
|
private
|
16
16
|
|
17
17
|
def convert_data
|
18
|
-
content =
|
19
|
-
if expression.case_insensitive? && !context.case_insensitive_root
|
20
|
-
content = content.case_insensitive
|
21
|
-
end
|
18
|
+
content = character_set_of_property
|
22
19
|
|
23
20
|
if expression.negative?
|
24
21
|
if content.astral_part?
|
@@ -30,6 +27,19 @@ class JsRegex
|
|
30
27
|
warn_of_unsupported_feature('large astral plane match of property')
|
31
28
|
end
|
32
29
|
|
30
|
+
limit_to_bmp_part(content)
|
31
|
+
end
|
32
|
+
|
33
|
+
def character_set_of_property
|
34
|
+
character_set = CharacterSet.of_property(subtype)
|
35
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
36
|
+
character_set.case_insensitive
|
37
|
+
else
|
38
|
+
character_set
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def limit_to_bmp_part(content)
|
33
43
|
bmp_part = content.bmp_part
|
34
44
|
return drop if bmp_part.empty?
|
35
45
|
|
@@ -19,10 +19,7 @@ class JsRegex
|
|
19
19
|
private
|
20
20
|
|
21
21
|
def convert_data
|
22
|
-
if directly_compatible?
|
23
|
-
return expression.to_s(:base)
|
24
|
-
.gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
|
25
|
-
end
|
22
|
+
return pass_through_with_escaping if directly_compatible?
|
26
23
|
|
27
24
|
content = CharacterSet.of_expression(expression)
|
28
25
|
if expression.case_insensitive? && !context.case_insensitive_root
|
@@ -34,35 +31,48 @@ class JsRegex
|
|
34
31
|
if Converter.in_surrogate_pair_limit? { content.astral_part.size }
|
35
32
|
content.to_s_with_surrogate_alternation
|
36
33
|
else
|
37
|
-
|
38
|
-
bmp_part = content.bmp_part
|
39
|
-
bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
|
34
|
+
limit_to_bmp_part_with_warning(content)
|
40
35
|
end
|
41
36
|
end
|
42
37
|
|
43
38
|
def directly_compatible?
|
44
|
-
|
45
|
-
|
46
|
-
return
|
47
|
-
end
|
39
|
+
all_children_directly_compatible? && !casefolding_needed?
|
40
|
+
end
|
48
41
|
|
49
|
-
|
42
|
+
def all_children_directly_compatible?
|
43
|
+
# note that #each_expression is recursive
|
50
44
|
expression.each_expression do |exp|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
45
|
+
return unless child_directly_compatible?(exp)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def child_directly_compatible?(exp)
|
50
|
+
case exp.type
|
51
|
+
when :literal
|
52
|
+
# surrogate pair substitution needed if astral
|
53
|
+
exp.text.ord <= 0xFFFF
|
54
|
+
when :set
|
55
|
+
# conversion needed for nested sets, intersections
|
56
|
+
exp.token.equal?(:range)
|
57
|
+
when :type
|
58
|
+
TypeConverter.directly_compatible?(exp)
|
59
|
+
when :escape
|
60
|
+
EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
|
64
61
|
end
|
65
|
-
|
62
|
+
end
|
63
|
+
|
64
|
+
def casefolding_needed?
|
65
|
+
expression.case_insensitive? ^ context.case_insensitive_root
|
66
|
+
end
|
67
|
+
|
68
|
+
def pass_through_with_escaping
|
69
|
+
expression.to_s(:base).gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def limit_to_bmp_part_with_warning(content)
|
73
|
+
warn_of_unsupported_feature('large astral plane match of set')
|
74
|
+
bmp_part = content.bmp_part
|
75
|
+
bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
|
66
76
|
end
|
67
77
|
end
|
68
78
|
end
|
@@ -8,19 +8,19 @@ class JsRegex
|
|
8
8
|
# Template class implementation.
|
9
9
|
#
|
10
10
|
class TypeConverter < JsRegex::Converter::Base
|
11
|
-
TYPES_SHARED_BY_RUBY_AND_JS = %i[
|
12
|
-
digit
|
13
|
-
nondigit
|
14
|
-
word
|
15
|
-
nonword
|
16
|
-
space
|
17
|
-
nonspace
|
18
|
-
].freeze
|
19
|
-
|
20
11
|
HEX_EXPANSION = '[0-9A-Fa-f]'
|
21
12
|
NONHEX_EXPANSION = '[^0-9A-Fa-f]'
|
22
13
|
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
23
14
|
|
15
|
+
def self.directly_compatible?(expression)
|
16
|
+
case expression.token
|
17
|
+
when :space, :nonspace
|
18
|
+
!expression.ascii_classes?
|
19
|
+
when :digit, :nondigit, :word, :nonword
|
20
|
+
!expression.unicode_classes?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
24
|
private
|
25
25
|
|
26
26
|
def convert_data
|
@@ -28,12 +28,30 @@ class JsRegex
|
|
28
28
|
when :hex then HEX_EXPANSION
|
29
29
|
when :nonhex then NONHEX_EXPANSION
|
30
30
|
when :linebreak then LINEBREAK_EXPANSION
|
31
|
-
when
|
32
|
-
pass_through
|
31
|
+
when :digit, :space, :word
|
32
|
+
return pass_through if self.class.directly_compatible?(expression)
|
33
|
+
set_substitution
|
34
|
+
when :nondigit, :nonspace, :nonword
|
35
|
+
return pass_through if self.class.directly_compatible?(expression)
|
36
|
+
negative_set_substitution
|
33
37
|
else
|
34
38
|
warn_of_unsupported_feature
|
35
39
|
end
|
36
40
|
end
|
41
|
+
|
42
|
+
def negative_set_substitution
|
43
|
+
# ::of_expression returns an inverted set for negative expressions,
|
44
|
+
# so we need to un-invert before wrapping in [^ and ]. Kinda lame.
|
45
|
+
"[^#{character_set.inversion.bmp_part}]"
|
46
|
+
end
|
47
|
+
|
48
|
+
def set_substitution
|
49
|
+
character_set.bmp_part.to_s(in_brackets: true)
|
50
|
+
end
|
51
|
+
|
52
|
+
def character_set
|
53
|
+
CharacterSet.of_expression(expression)
|
54
|
+
end
|
37
55
|
end
|
38
56
|
end
|
39
57
|
end
|
data/lib/js_regex/second_pass.rb
CHANGED
@@ -25,32 +25,16 @@ class JsRegex
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def conditional_tree_permutations(tree)
|
28
|
-
|
29
|
-
return [] if
|
28
|
+
all_conds = conditions(tree)
|
29
|
+
return [] if all_conds.empty?
|
30
30
|
|
31
|
-
|
31
|
+
caps_per_branch = captured_group_count(tree)
|
32
32
|
|
33
|
-
condition_permutations(
|
33
|
+
condition_permutations(all_conds).map.with_index do |truthy_conds, i|
|
34
34
|
tree_permutation = tree.clone
|
35
35
|
# find referenced groups and conditionals and make one-sided
|
36
36
|
crawl(tree_permutation) do |node|
|
37
|
-
|
38
|
-
|
39
|
-
if node.type.equal?(:captured_group) &&
|
40
|
-
all_conditions.include?(node.reference)
|
41
|
-
truthy ? min_quantify(node) : null_quantify(node)
|
42
|
-
elsif node.type.equal?(:conditional)
|
43
|
-
branches = node.children[1...-1]
|
44
|
-
if branches.count == 1
|
45
|
-
truthy || null_quantify(branches.first)
|
46
|
-
else
|
47
|
-
null_quantify(truthy ? branches.last : branches.first)
|
48
|
-
end
|
49
|
-
node.update(type: :plain)
|
50
|
-
elsif node.type.equal?(:backref_num)
|
51
|
-
new_num = node.children[0].to_i + captured_groups_per_branch * i
|
52
|
-
node.update(children: [new_num.to_s])
|
53
|
-
end
|
37
|
+
build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
|
54
38
|
end
|
55
39
|
end
|
56
40
|
end
|
@@ -81,8 +65,40 @@ class JsRegex
|
|
81
65
|
end
|
82
66
|
end
|
83
67
|
|
68
|
+
def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
|
69
|
+
truthy = truthy_conds.include?(node.reference)
|
70
|
+
|
71
|
+
if node.type.equal?(:captured_group) &&
|
72
|
+
all_conds.include?(node.reference)
|
73
|
+
adapt_referenced_group_to_permutation(node, truthy)
|
74
|
+
elsif node.type.equal?(:conditional)
|
75
|
+
adapt_conditional_to_permutation(node, truthy)
|
76
|
+
elsif node.type.equal?(:backref_num)
|
77
|
+
adapt_backref_to_permutation(node, caps_per_branch, i)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def adapt_referenced_group_to_permutation(group_node, truthy)
|
82
|
+
truthy ? min_quantify(group_node) : null_quantify(group_node)
|
83
|
+
end
|
84
|
+
|
85
|
+
def adapt_conditional_to_permutation(conditional_node, truthy)
|
86
|
+
branches = conditional_node.children[1...-1]
|
87
|
+
if branches.count == 1
|
88
|
+
truthy || null_quantify(branches.first)
|
89
|
+
else
|
90
|
+
null_quantify(truthy ? branches.last : branches.first)
|
91
|
+
end
|
92
|
+
conditional_node.update(type: :plain)
|
93
|
+
end
|
94
|
+
|
95
|
+
def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
|
96
|
+
new_num = backref_node.children[0].to_i + caps_per_branch * i
|
97
|
+
backref_node.update(children: [new_num.to_s])
|
98
|
+
end
|
99
|
+
|
84
100
|
def min_quantify(node)
|
85
|
-
return if (qtf = node.quantifier)
|
101
|
+
return if guarantees_at_least_one_match?(qtf = node.quantifier)
|
86
102
|
|
87
103
|
if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
|
88
104
|
node.update(quantifier: nil)
|
@@ -91,6 +107,10 @@ class JsRegex
|
|
91
107
|
end
|
92
108
|
end
|
93
109
|
|
110
|
+
def guarantees_at_least_one_match?(quantifier)
|
111
|
+
quantifier.nil? || quantifier.min > 0
|
112
|
+
end
|
113
|
+
|
94
114
|
def null_quantify(node)
|
95
115
|
node.update(quantifier: '{0}')
|
96
116
|
end
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: regexp_parser
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.12'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: mutant-rspec
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0.8'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0.8'
|
139
125
|
description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
|
140
126
|
care of various incompatibilities and returning warnings for unsolvable differences.
|
141
127
|
email:
|