js_regex 3.2.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48540644a806114addad63fb0f3ff9ba7ad00a084b3b281b9132d2eddbe610ff
4
- data.tar.gz: 775fd5d3f54c8b3c32b145414707c0b69c5c783cbd5270d9bc8d5e1564a6fec7
3
+ metadata.gz: 0f418197d1c8fea37bd549b701049f1130bfca486c5a11ba25e56d7d0cf990f8
4
+ data.tar.gz: 8679277fb51aca528933f4727cd0e70f1e1204c78acaf4e20da48d6b790ea7be
5
5
  SHA512:
6
- metadata.gz: 97810021249f6b512ebae5898f6ffe51ae95269f1e47909fb7b95fbe1e8aa6ba9696e4c7966bac27cbc7c1bd36668729aeb5d0ab4c243e85a88bb70fd6cbb203
7
- data.tar.gz: 5681e9d950c14f7260596b46a382a9829c0be4326bd898acbdcf5a257936c0f543018b5c7b1435582d8ba976a680d3b08ac0189e85d82b49e621e5f8918b93a9
6
+ metadata.gz: ddeadbe690e2928c068d8c2962d80e88c50f5fef46304e46fa98e8dc215c6c57c7790e9ef7ba8b1f640e71c3ca834b7233449c9b7d68466ac76581de73c7ffe3
7
+ data.tar.gz: 9c10d4e6a529ec54385ed55b34e4a910d354d7665f0831bc65b65bc20f176f4e46f9aa90cff2b36ea8f1306dfa65f7e42f4a93516ab228f206e74db7d708b779
@@ -34,9 +34,6 @@ class JsRegex
34
34
  end
35
35
 
36
36
  def build_options_group
37
- unless (encoding_options = data.scan(/[adu]/)).empty?
38
- warn_of_unsupported_feature("encoding options #{encoding_options}")
39
- end
40
37
  if subtype.equal?(:options_switch)
41
38
  # can be ignored since #options on subsequent Expressions are correct
42
39
  drop_without_warning
@@ -13,18 +13,22 @@ class JsRegex
13
13
 
14
14
  def convert_data(data)
15
15
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
16
- data.each_char.each_with_object(Node.new) do |chr, node|
17
- if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
18
- node << surrogate_pair_for(chr)
19
- else
20
- node << convert_bmp_data(chr)
21
- end
22
- end
16
+ convert_astral_data(data)
23
17
  else
24
18
  convert_bmp_data(data)
25
19
  end
26
20
  end
27
21
 
22
+ def convert_astral_data(data)
23
+ data.each_char.each_with_object(Node.new) do |chr, node|
24
+ if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
25
+ node << surrogate_pair_for(chr)
26
+ else
27
+ node << convert_bmp_data(chr)
28
+ end
29
+ end
30
+ end
31
+
28
32
  def convert_bmp_data(data)
29
33
  ensure_json_compatibility(
30
34
  ensure_forward_slashes_are_escaped(data)
@@ -22,15 +22,20 @@ class JsRegex
22
22
  end
23
23
 
24
24
  def convert_alternatives
25
- kept_any = nil
25
+ kept_any_previous_branch = nil
26
26
 
27
27
  convert_subexpressions.transform do |node|
28
- dropped = !node.children.empty? && node.children.all?(&:dropped?)
29
- node.children.unshift('|') if kept_any && !dropped
30
- kept_any = true unless dropped
28
+ unless dropped_branch?(node)
29
+ node.children.unshift('|') if kept_any_previous_branch
30
+ kept_any_previous_branch = true
31
+ end
31
32
  node
32
33
  end
33
34
  end
35
+
36
+ def dropped_branch?(branch_node)
37
+ branch_node.children.any? && branch_node.children.all?(&:dropped?)
38
+ end
34
39
  end
35
40
  end
36
41
  end
@@ -15,10 +15,7 @@ class JsRegex
15
15
  private
16
16
 
17
17
  def convert_data
18
- content = CharacterSet.of_property(subtype)
19
- if expression.case_insensitive? && !context.case_insensitive_root
20
- content = content.case_insensitive
21
- end
18
+ content = character_set_of_property
22
19
 
23
20
  if expression.negative?
24
21
  if content.astral_part?
@@ -30,6 +27,19 @@ class JsRegex
30
27
  warn_of_unsupported_feature('large astral plane match of property')
31
28
  end
32
29
 
30
+ limit_to_bmp_part(content)
31
+ end
32
+
33
+ def character_set_of_property
34
+ character_set = CharacterSet.of_property(subtype)
35
+ if expression.case_insensitive? && !context.case_insensitive_root
36
+ character_set.case_insensitive
37
+ else
38
+ character_set
39
+ end
40
+ end
41
+
42
+ def limit_to_bmp_part(content)
33
43
  bmp_part = content.bmp_part
34
44
  return drop if bmp_part.empty?
35
45
 
@@ -19,10 +19,7 @@ class JsRegex
19
19
  private
20
20
 
21
21
  def convert_data
22
- if directly_compatible?
23
- return expression.to_s(:base)
24
- .gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
25
- end
22
+ return pass_through_with_escaping if directly_compatible?
26
23
 
27
24
  content = CharacterSet.of_expression(expression)
28
25
  if expression.case_insensitive? && !context.case_insensitive_root
@@ -34,35 +31,48 @@ class JsRegex
34
31
  if Converter.in_surrogate_pair_limit? { content.astral_part.size }
35
32
  content.to_s_with_surrogate_alternation
36
33
  else
37
- warn_of_unsupported_feature('large astral plane match of set')
38
- bmp_part = content.bmp_part
39
- bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
34
+ limit_to_bmp_part_with_warning(content)
40
35
  end
41
36
  end
42
37
 
43
38
  def directly_compatible?
44
- if expression.case_insensitive? ^ context.case_insensitive_root
45
- # casefolding needed
46
- return
47
- end
39
+ all_children_directly_compatible? && !casefolding_needed?
40
+ end
48
41
 
49
- # check for children needing conversion (#each_expression is recursive)
42
+ def all_children_directly_compatible?
43
+ # note that #each_expression is recursive
50
44
  expression.each_expression do |exp|
51
- case exp.type
52
- when :literal
53
- # surrogate pair substitution needed if astral
54
- next if exp.text.ord <= 0xFFFF
55
- when :set
56
- # conversion needed for nested sets, intersections
57
- next if exp.token.equal?(:range)
58
- when :type
59
- next if TypeConverter::TYPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
60
- when :escape
61
- next if EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
62
- end
63
- return
45
+ return unless child_directly_compatible?(exp)
46
+ end
47
+ end
48
+
49
+ def child_directly_compatible?(exp)
50
+ case exp.type
51
+ when :literal
52
+ # surrogate pair substitution needed if astral
53
+ exp.text.ord <= 0xFFFF
54
+ when :set
55
+ # conversion needed for nested sets, intersections
56
+ exp.token.equal?(:range)
57
+ when :type
58
+ TypeConverter.directly_compatible?(exp)
59
+ when :escape
60
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
64
61
  end
65
- true
62
+ end
63
+
64
+ def casefolding_needed?
65
+ expression.case_insensitive? ^ context.case_insensitive_root
66
+ end
67
+
68
+ def pass_through_with_escaping
69
+ expression.to_s(:base).gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
70
+ end
71
+
72
+ def limit_to_bmp_part_with_warning(content)
73
+ warn_of_unsupported_feature('large astral plane match of set')
74
+ bmp_part = content.bmp_part
75
+ bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
66
76
  end
67
77
  end
68
78
  end
@@ -8,19 +8,19 @@ class JsRegex
8
8
  # Template class implementation.
9
9
  #
10
10
  class TypeConverter < JsRegex::Converter::Base
11
- TYPES_SHARED_BY_RUBY_AND_JS = %i[
12
- digit
13
- nondigit
14
- word
15
- nonword
16
- space
17
- nonspace
18
- ].freeze
19
-
20
11
  HEX_EXPANSION = '[0-9A-Fa-f]'
21
12
  NONHEX_EXPANSION = '[^0-9A-Fa-f]'
22
13
  LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
23
14
 
15
+ def self.directly_compatible?(expression)
16
+ case expression.token
17
+ when :space, :nonspace
18
+ !expression.ascii_classes?
19
+ when :digit, :nondigit, :word, :nonword
20
+ !expression.unicode_classes?
21
+ end
22
+ end
23
+
24
24
  private
25
25
 
26
26
  def convert_data
@@ -28,12 +28,30 @@ class JsRegex
28
28
  when :hex then HEX_EXPANSION
29
29
  when :nonhex then NONHEX_EXPANSION
30
30
  when :linebreak then LINEBREAK_EXPANSION
31
- when *TYPES_SHARED_BY_RUBY_AND_JS
32
- pass_through
31
+ when :digit, :space, :word
32
+ return pass_through if self.class.directly_compatible?(expression)
33
+ set_substitution
34
+ when :nondigit, :nonspace, :nonword
35
+ return pass_through if self.class.directly_compatible?(expression)
36
+ negative_set_substitution
33
37
  else
34
38
  warn_of_unsupported_feature
35
39
  end
36
40
  end
41
+
42
+ def negative_set_substitution
43
+ # ::of_expression returns an inverted set for negative expressions,
44
+ # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
45
+ "[^#{character_set.inversion.bmp_part}]"
46
+ end
47
+
48
+ def set_substitution
49
+ character_set.bmp_part.to_s(in_brackets: true)
50
+ end
51
+
52
+ def character_set
53
+ CharacterSet.of_expression(expression)
54
+ end
37
55
  end
38
56
  end
39
57
  end
@@ -25,32 +25,16 @@ class JsRegex
25
25
  end
26
26
 
27
27
  def conditional_tree_permutations(tree)
28
- all_conditions = conditions(tree)
29
- return [] if all_conditions.empty?
28
+ all_conds = conditions(tree)
29
+ return [] if all_conds.empty?
30
30
 
31
- captured_groups_per_branch = captured_group_count(tree)
31
+ caps_per_branch = captured_group_count(tree)
32
32
 
33
- condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
33
+ condition_permutations(all_conds).map.with_index do |truthy_conds, i|
34
34
  tree_permutation = tree.clone
35
35
  # find referenced groups and conditionals and make one-sided
36
36
  crawl(tree_permutation) do |node|
37
- truthy = truthy_conds.include?(node.reference)
38
-
39
- if node.type.equal?(:captured_group) &&
40
- all_conditions.include?(node.reference)
41
- truthy ? min_quantify(node) : null_quantify(node)
42
- elsif node.type.equal?(:conditional)
43
- branches = node.children[1...-1]
44
- if branches.count == 1
45
- truthy || null_quantify(branches.first)
46
- else
47
- null_quantify(truthy ? branches.last : branches.first)
48
- end
49
- node.update(type: :plain)
50
- elsif node.type.equal?(:backref_num)
51
- new_num = node.children[0].to_i + captured_groups_per_branch * i
52
- node.update(children: [new_num.to_s])
53
- end
37
+ build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
54
38
  end
55
39
  end
56
40
  end
@@ -81,8 +65,40 @@ class JsRegex
81
65
  end
82
66
  end
83
67
 
68
+ def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
69
+ truthy = truthy_conds.include?(node.reference)
70
+
71
+ if node.type.equal?(:captured_group) &&
72
+ all_conds.include?(node.reference)
73
+ adapt_referenced_group_to_permutation(node, truthy)
74
+ elsif node.type.equal?(:conditional)
75
+ adapt_conditional_to_permutation(node, truthy)
76
+ elsif node.type.equal?(:backref_num)
77
+ adapt_backref_to_permutation(node, caps_per_branch, i)
78
+ end
79
+ end
80
+
81
+ def adapt_referenced_group_to_permutation(group_node, truthy)
82
+ truthy ? min_quantify(group_node) : null_quantify(group_node)
83
+ end
84
+
85
+ def adapt_conditional_to_permutation(conditional_node, truthy)
86
+ branches = conditional_node.children[1...-1]
87
+ if branches.count == 1
88
+ truthy || null_quantify(branches.first)
89
+ else
90
+ null_quantify(truthy ? branches.last : branches.first)
91
+ end
92
+ conditional_node.update(type: :plain)
93
+ end
94
+
95
+ def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
96
+ new_num = backref_node.children[0].to_i + caps_per_branch * i
97
+ backref_node.update(children: [new_num.to_s])
98
+ end
99
+
84
100
  def min_quantify(node)
85
- return if (qtf = node.quantifier).nil? || qtf.min > 0
101
+ return if guarantees_at_least_one_match?(qtf = node.quantifier)
86
102
 
87
103
  if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
88
104
  node.update(quantifier: nil)
@@ -91,6 +107,10 @@ class JsRegex
91
107
  end
92
108
  end
93
109
 
110
+ def guarantees_at_least_one_match?(quantifier)
111
+ quantifier.nil? || quantifier.min > 0
112
+ end
113
+
94
114
  def null_quantify(node)
95
115
  node.update(quantifier: '{0}')
96
116
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-05-18 00:00:00.000000000 Z
11
+ date: 2019-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.2'
19
+ version: '1.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.2'
26
+ version: '1.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: regexp_parser
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -122,20 +122,6 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0.12'
125
- - !ruby/object:Gem::Dependency
126
- name: mutant-rspec
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - "~>"
130
- - !ruby/object:Gem::Version
131
- version: '0.8'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - "~>"
137
- - !ruby/object:Gem::Version
138
- version: '0.8'
139
125
  description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
140
126
  care of various incompatibilities and returning warnings for unsolvable differences.
141
127
  email: