js_regex 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/converter/group_converter.rb +0 -3
- data/lib/js_regex/converter/literal_converter.rb +11 -7
- data/lib/js_regex/converter/meta_converter.rb +9 -4
- data/lib/js_regex/converter/property_converter.rb +14 -4
- data/lib/js_regex/converter/set_converter.rb +36 -26
- data/lib/js_regex/converter/type_converter.rb +29 -11
- data/lib/js_regex/second_pass.rb +42 -22
- data/lib/js_regex/version.rb +1 -1
- metadata +4 -18
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0f418197d1c8fea37bd549b701049f1130bfca486c5a11ba25e56d7d0cf990f8
|
4
|
+
data.tar.gz: 8679277fb51aca528933f4727cd0e70f1e1204c78acaf4e20da48d6b790ea7be
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ddeadbe690e2928c068d8c2962d80e88c50f5fef46304e46fa98e8dc215c6c57c7790e9ef7ba8b1f640e71c3ca834b7233449c9b7d68466ac76581de73c7ffe3
|
7
|
+
data.tar.gz: 9c10d4e6a529ec54385ed55b34e4a910d354d7665f0831bc65b65bc20f176f4e46f9aa90cff2b36ea8f1306dfa65f7e42f4a93516ab228f206e74db7d708b779
|
@@ -34,9 +34,6 @@ class JsRegex
|
|
34
34
|
end
|
35
35
|
|
36
36
|
def build_options_group
|
37
|
-
unless (encoding_options = data.scan(/[adu]/)).empty?
|
38
|
-
warn_of_unsupported_feature("encoding options #{encoding_options}")
|
39
|
-
end
|
40
37
|
if subtype.equal?(:options_switch)
|
41
38
|
# can be ignored since #options on subsequent Expressions are correct
|
42
39
|
drop_without_warning
|
@@ -13,18 +13,22 @@ class JsRegex
|
|
13
13
|
|
14
14
|
def convert_data(data)
|
15
15
|
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
16
|
-
data
|
17
|
-
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
18
|
-
node << surrogate_pair_for(chr)
|
19
|
-
else
|
20
|
-
node << convert_bmp_data(chr)
|
21
|
-
end
|
22
|
-
end
|
16
|
+
convert_astral_data(data)
|
23
17
|
else
|
24
18
|
convert_bmp_data(data)
|
25
19
|
end
|
26
20
|
end
|
27
21
|
|
22
|
+
def convert_astral_data(data)
|
23
|
+
data.each_char.each_with_object(Node.new) do |chr, node|
|
24
|
+
if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
25
|
+
node << surrogate_pair_for(chr)
|
26
|
+
else
|
27
|
+
node << convert_bmp_data(chr)
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
28
32
|
def convert_bmp_data(data)
|
29
33
|
ensure_json_compatibility(
|
30
34
|
ensure_forward_slashes_are_escaped(data)
|
@@ -22,15 +22,20 @@ class JsRegex
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def convert_alternatives
|
25
|
-
|
25
|
+
kept_any_previous_branch = nil
|
26
26
|
|
27
27
|
convert_subexpressions.transform do |node|
|
28
|
-
|
29
|
-
|
30
|
-
|
28
|
+
unless dropped_branch?(node)
|
29
|
+
node.children.unshift('|') if kept_any_previous_branch
|
30
|
+
kept_any_previous_branch = true
|
31
|
+
end
|
31
32
|
node
|
32
33
|
end
|
33
34
|
end
|
35
|
+
|
36
|
+
def dropped_branch?(branch_node)
|
37
|
+
branch_node.children.any? && branch_node.children.all?(&:dropped?)
|
38
|
+
end
|
34
39
|
end
|
35
40
|
end
|
36
41
|
end
|
@@ -15,10 +15,7 @@ class JsRegex
|
|
15
15
|
private
|
16
16
|
|
17
17
|
def convert_data
|
18
|
-
content =
|
19
|
-
if expression.case_insensitive? && !context.case_insensitive_root
|
20
|
-
content = content.case_insensitive
|
21
|
-
end
|
18
|
+
content = character_set_of_property
|
22
19
|
|
23
20
|
if expression.negative?
|
24
21
|
if content.astral_part?
|
@@ -30,6 +27,19 @@ class JsRegex
|
|
30
27
|
warn_of_unsupported_feature('large astral plane match of property')
|
31
28
|
end
|
32
29
|
|
30
|
+
limit_to_bmp_part(content)
|
31
|
+
end
|
32
|
+
|
33
|
+
def character_set_of_property
|
34
|
+
character_set = CharacterSet.of_property(subtype)
|
35
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
36
|
+
character_set.case_insensitive
|
37
|
+
else
|
38
|
+
character_set
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
def limit_to_bmp_part(content)
|
33
43
|
bmp_part = content.bmp_part
|
34
44
|
return drop if bmp_part.empty?
|
35
45
|
|
@@ -19,10 +19,7 @@ class JsRegex
|
|
19
19
|
private
|
20
20
|
|
21
21
|
def convert_data
|
22
|
-
if directly_compatible?
|
23
|
-
return expression.to_s(:base)
|
24
|
-
.gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
|
25
|
-
end
|
22
|
+
return pass_through_with_escaping if directly_compatible?
|
26
23
|
|
27
24
|
content = CharacterSet.of_expression(expression)
|
28
25
|
if expression.case_insensitive? && !context.case_insensitive_root
|
@@ -34,35 +31,48 @@ class JsRegex
|
|
34
31
|
if Converter.in_surrogate_pair_limit? { content.astral_part.size }
|
35
32
|
content.to_s_with_surrogate_alternation
|
36
33
|
else
|
37
|
-
|
38
|
-
bmp_part = content.bmp_part
|
39
|
-
bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
|
34
|
+
limit_to_bmp_part_with_warning(content)
|
40
35
|
end
|
41
36
|
end
|
42
37
|
|
43
38
|
def directly_compatible?
|
44
|
-
|
45
|
-
|
46
|
-
return
|
47
|
-
end
|
39
|
+
all_children_directly_compatible? && !casefolding_needed?
|
40
|
+
end
|
48
41
|
|
49
|
-
|
42
|
+
def all_children_directly_compatible?
|
43
|
+
# note that #each_expression is recursive
|
50
44
|
expression.each_expression do |exp|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
45
|
+
return unless child_directly_compatible?(exp)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def child_directly_compatible?(exp)
|
50
|
+
case exp.type
|
51
|
+
when :literal
|
52
|
+
# surrogate pair substitution needed if astral
|
53
|
+
exp.text.ord <= 0xFFFF
|
54
|
+
when :set
|
55
|
+
# conversion needed for nested sets, intersections
|
56
|
+
exp.token.equal?(:range)
|
57
|
+
when :type
|
58
|
+
TypeConverter.directly_compatible?(exp)
|
59
|
+
when :escape
|
60
|
+
EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
|
64
61
|
end
|
65
|
-
|
62
|
+
end
|
63
|
+
|
64
|
+
def casefolding_needed?
|
65
|
+
expression.case_insensitive? ^ context.case_insensitive_root
|
66
|
+
end
|
67
|
+
|
68
|
+
def pass_through_with_escaping
|
69
|
+
expression.to_s(:base).gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
|
70
|
+
end
|
71
|
+
|
72
|
+
def limit_to_bmp_part_with_warning(content)
|
73
|
+
warn_of_unsupported_feature('large astral plane match of set')
|
74
|
+
bmp_part = content.bmp_part
|
75
|
+
bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
|
66
76
|
end
|
67
77
|
end
|
68
78
|
end
|
@@ -8,19 +8,19 @@ class JsRegex
|
|
8
8
|
# Template class implementation.
|
9
9
|
#
|
10
10
|
class TypeConverter < JsRegex::Converter::Base
|
11
|
-
TYPES_SHARED_BY_RUBY_AND_JS = %i[
|
12
|
-
digit
|
13
|
-
nondigit
|
14
|
-
word
|
15
|
-
nonword
|
16
|
-
space
|
17
|
-
nonspace
|
18
|
-
].freeze
|
19
|
-
|
20
11
|
HEX_EXPANSION = '[0-9A-Fa-f]'
|
21
12
|
NONHEX_EXPANSION = '[^0-9A-Fa-f]'
|
22
13
|
LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
|
23
14
|
|
15
|
+
def self.directly_compatible?(expression)
|
16
|
+
case expression.token
|
17
|
+
when :space, :nonspace
|
18
|
+
!expression.ascii_classes?
|
19
|
+
when :digit, :nondigit, :word, :nonword
|
20
|
+
!expression.unicode_classes?
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
24
|
private
|
25
25
|
|
26
26
|
def convert_data
|
@@ -28,12 +28,30 @@ class JsRegex
|
|
28
28
|
when :hex then HEX_EXPANSION
|
29
29
|
when :nonhex then NONHEX_EXPANSION
|
30
30
|
when :linebreak then LINEBREAK_EXPANSION
|
31
|
-
when
|
32
|
-
pass_through
|
31
|
+
when :digit, :space, :word
|
32
|
+
return pass_through if self.class.directly_compatible?(expression)
|
33
|
+
set_substitution
|
34
|
+
when :nondigit, :nonspace, :nonword
|
35
|
+
return pass_through if self.class.directly_compatible?(expression)
|
36
|
+
negative_set_substitution
|
33
37
|
else
|
34
38
|
warn_of_unsupported_feature
|
35
39
|
end
|
36
40
|
end
|
41
|
+
|
42
|
+
def negative_set_substitution
|
43
|
+
# ::of_expression returns an inverted set for negative expressions,
|
44
|
+
# so we need to un-invert before wrapping in [^ and ]. Kinda lame.
|
45
|
+
"[^#{character_set.inversion.bmp_part}]"
|
46
|
+
end
|
47
|
+
|
48
|
+
def set_substitution
|
49
|
+
character_set.bmp_part.to_s(in_brackets: true)
|
50
|
+
end
|
51
|
+
|
52
|
+
def character_set
|
53
|
+
CharacterSet.of_expression(expression)
|
54
|
+
end
|
37
55
|
end
|
38
56
|
end
|
39
57
|
end
|
data/lib/js_regex/second_pass.rb
CHANGED
@@ -25,32 +25,16 @@ class JsRegex
|
|
25
25
|
end
|
26
26
|
|
27
27
|
def conditional_tree_permutations(tree)
|
28
|
-
|
29
|
-
return [] if
|
28
|
+
all_conds = conditions(tree)
|
29
|
+
return [] if all_conds.empty?
|
30
30
|
|
31
|
-
|
31
|
+
caps_per_branch = captured_group_count(tree)
|
32
32
|
|
33
|
-
condition_permutations(
|
33
|
+
condition_permutations(all_conds).map.with_index do |truthy_conds, i|
|
34
34
|
tree_permutation = tree.clone
|
35
35
|
# find referenced groups and conditionals and make one-sided
|
36
36
|
crawl(tree_permutation) do |node|
|
37
|
-
|
38
|
-
|
39
|
-
if node.type.equal?(:captured_group) &&
|
40
|
-
all_conditions.include?(node.reference)
|
41
|
-
truthy ? min_quantify(node) : null_quantify(node)
|
42
|
-
elsif node.type.equal?(:conditional)
|
43
|
-
branches = node.children[1...-1]
|
44
|
-
if branches.count == 1
|
45
|
-
truthy || null_quantify(branches.first)
|
46
|
-
else
|
47
|
-
null_quantify(truthy ? branches.last : branches.first)
|
48
|
-
end
|
49
|
-
node.update(type: :plain)
|
50
|
-
elsif node.type.equal?(:backref_num)
|
51
|
-
new_num = node.children[0].to_i + captured_groups_per_branch * i
|
52
|
-
node.update(children: [new_num.to_s])
|
53
|
-
end
|
37
|
+
build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
|
54
38
|
end
|
55
39
|
end
|
56
40
|
end
|
@@ -81,8 +65,40 @@ class JsRegex
|
|
81
65
|
end
|
82
66
|
end
|
83
67
|
|
68
|
+
def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
|
69
|
+
truthy = truthy_conds.include?(node.reference)
|
70
|
+
|
71
|
+
if node.type.equal?(:captured_group) &&
|
72
|
+
all_conds.include?(node.reference)
|
73
|
+
adapt_referenced_group_to_permutation(node, truthy)
|
74
|
+
elsif node.type.equal?(:conditional)
|
75
|
+
adapt_conditional_to_permutation(node, truthy)
|
76
|
+
elsif node.type.equal?(:backref_num)
|
77
|
+
adapt_backref_to_permutation(node, caps_per_branch, i)
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def adapt_referenced_group_to_permutation(group_node, truthy)
|
82
|
+
truthy ? min_quantify(group_node) : null_quantify(group_node)
|
83
|
+
end
|
84
|
+
|
85
|
+
def adapt_conditional_to_permutation(conditional_node, truthy)
|
86
|
+
branches = conditional_node.children[1...-1]
|
87
|
+
if branches.count == 1
|
88
|
+
truthy || null_quantify(branches.first)
|
89
|
+
else
|
90
|
+
null_quantify(truthy ? branches.last : branches.first)
|
91
|
+
end
|
92
|
+
conditional_node.update(type: :plain)
|
93
|
+
end
|
94
|
+
|
95
|
+
def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
|
96
|
+
new_num = backref_node.children[0].to_i + caps_per_branch * i
|
97
|
+
backref_node.update(children: [new_num.to_s])
|
98
|
+
end
|
99
|
+
|
84
100
|
def min_quantify(node)
|
85
|
-
return if (qtf = node.quantifier)
|
101
|
+
return if guarantees_at_least_one_match?(qtf = node.quantifier)
|
86
102
|
|
87
103
|
if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
|
88
104
|
node.update(quantifier: nil)
|
@@ -91,6 +107,10 @@ class JsRegex
|
|
91
107
|
end
|
92
108
|
end
|
93
109
|
|
110
|
+
def guarantees_at_least_one_match?(quantifier)
|
111
|
+
quantifier.nil? || quantifier.min > 0
|
112
|
+
end
|
113
|
+
|
94
114
|
def null_quantify(node)
|
95
115
|
node.update(quantifier: '{0}')
|
96
116
|
end
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-05-
|
11
|
+
date: 2019-05-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: character_set
|
@@ -16,14 +16,14 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '1.
|
19
|
+
version: '1.3'
|
20
20
|
type: :runtime
|
21
21
|
prerelease: false
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
23
23
|
requirements:
|
24
24
|
- - "~>"
|
25
25
|
- !ruby/object:Gem::Version
|
26
|
-
version: '1.
|
26
|
+
version: '1.3'
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: regexp_parser
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,20 +122,6 @@ dependencies:
|
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0.12'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: mutant-rspec
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - "~>"
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0.8'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - "~>"
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0.8'
|
139
125
|
description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
|
140
126
|
care of various incompatibilities and returning warnings for unsolvable differences.
|
141
127
|
email:
|