js_regex 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 48540644a806114addad63fb0f3ff9ba7ad00a084b3b281b9132d2eddbe610ff
4
- data.tar.gz: 775fd5d3f54c8b3c32b145414707c0b69c5c783cbd5270d9bc8d5e1564a6fec7
3
+ metadata.gz: 0f418197d1c8fea37bd549b701049f1130bfca486c5a11ba25e56d7d0cf990f8
4
+ data.tar.gz: 8679277fb51aca528933f4727cd0e70f1e1204c78acaf4e20da48d6b790ea7be
5
5
  SHA512:
6
- metadata.gz: 97810021249f6b512ebae5898f6ffe51ae95269f1e47909fb7b95fbe1e8aa6ba9696e4c7966bac27cbc7c1bd36668729aeb5d0ab4c243e85a88bb70fd6cbb203
7
- data.tar.gz: 5681e9d950c14f7260596b46a382a9829c0be4326bd898acbdcf5a257936c0f543018b5c7b1435582d8ba976a680d3b08ac0189e85d82b49e621e5f8918b93a9
6
+ metadata.gz: ddeadbe690e2928c068d8c2962d80e88c50f5fef46304e46fa98e8dc215c6c57c7790e9ef7ba8b1f640e71c3ca834b7233449c9b7d68466ac76581de73c7ffe3
7
+ data.tar.gz: 9c10d4e6a529ec54385ed55b34e4a910d354d7665f0831bc65b65bc20f176f4e46f9aa90cff2b36ea8f1306dfa65f7e42f4a93516ab228f206e74db7d708b779
@@ -34,9 +34,6 @@ class JsRegex
34
34
  end
35
35
 
36
36
  def build_options_group
37
- unless (encoding_options = data.scan(/[adu]/)).empty?
38
- warn_of_unsupported_feature("encoding options #{encoding_options}")
39
- end
40
37
  if subtype.equal?(:options_switch)
41
38
  # can be ignored since #options on subsequent Expressions are correct
42
39
  drop_without_warning
@@ -13,18 +13,22 @@ class JsRegex
13
13
 
14
14
  def convert_data(data)
15
15
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
16
- data.each_char.each_with_object(Node.new) do |chr, node|
17
- if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
18
- node << surrogate_pair_for(chr)
19
- else
20
- node << convert_bmp_data(chr)
21
- end
22
- end
16
+ convert_astral_data(data)
23
17
  else
24
18
  convert_bmp_data(data)
25
19
  end
26
20
  end
27
21
 
22
+ def convert_astral_data(data)
23
+ data.each_char.each_with_object(Node.new) do |chr, node|
24
+ if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
25
+ node << surrogate_pair_for(chr)
26
+ else
27
+ node << convert_bmp_data(chr)
28
+ end
29
+ end
30
+ end
31
+
28
32
  def convert_bmp_data(data)
29
33
  ensure_json_compatibility(
30
34
  ensure_forward_slashes_are_escaped(data)
@@ -22,15 +22,20 @@ class JsRegex
22
22
  end
23
23
 
24
24
  def convert_alternatives
25
- kept_any = nil
25
+ kept_any_previous_branch = nil
26
26
 
27
27
  convert_subexpressions.transform do |node|
28
- dropped = !node.children.empty? && node.children.all?(&:dropped?)
29
- node.children.unshift('|') if kept_any && !dropped
30
- kept_any = true unless dropped
28
+ unless dropped_branch?(node)
29
+ node.children.unshift('|') if kept_any_previous_branch
30
+ kept_any_previous_branch = true
31
+ end
31
32
  node
32
33
  end
33
34
  end
35
+
36
+ def dropped_branch?(branch_node)
37
+ branch_node.children.any? && branch_node.children.all?(&:dropped?)
38
+ end
34
39
  end
35
40
  end
36
41
  end
@@ -15,10 +15,7 @@ class JsRegex
15
15
  private
16
16
 
17
17
  def convert_data
18
- content = CharacterSet.of_property(subtype)
19
- if expression.case_insensitive? && !context.case_insensitive_root
20
- content = content.case_insensitive
21
- end
18
+ content = character_set_of_property
22
19
 
23
20
  if expression.negative?
24
21
  if content.astral_part?
@@ -30,6 +27,19 @@ class JsRegex
30
27
  warn_of_unsupported_feature('large astral plane match of property')
31
28
  end
32
29
 
30
+ limit_to_bmp_part(content)
31
+ end
32
+
33
+ def character_set_of_property
34
+ character_set = CharacterSet.of_property(subtype)
35
+ if expression.case_insensitive? && !context.case_insensitive_root
36
+ character_set.case_insensitive
37
+ else
38
+ character_set
39
+ end
40
+ end
41
+
42
+ def limit_to_bmp_part(content)
33
43
  bmp_part = content.bmp_part
34
44
  return drop if bmp_part.empty?
35
45
 
@@ -19,10 +19,7 @@ class JsRegex
19
19
  private
20
20
 
21
21
  def convert_data
22
- if directly_compatible?
23
- return expression.to_s(:base)
24
- .gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
25
- end
22
+ return pass_through_with_escaping if directly_compatible?
26
23
 
27
24
  content = CharacterSet.of_expression(expression)
28
25
  if expression.case_insensitive? && !context.case_insensitive_root
@@ -34,35 +31,48 @@ class JsRegex
34
31
  if Converter.in_surrogate_pair_limit? { content.astral_part.size }
35
32
  content.to_s_with_surrogate_alternation
36
33
  else
37
- warn_of_unsupported_feature('large astral plane match of set')
38
- bmp_part = content.bmp_part
39
- bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
34
+ limit_to_bmp_part_with_warning(content)
40
35
  end
41
36
  end
42
37
 
43
38
  def directly_compatible?
44
- if expression.case_insensitive? ^ context.case_insensitive_root
45
- # casefolding needed
46
- return
47
- end
39
+ all_children_directly_compatible? && !casefolding_needed?
40
+ end
48
41
 
49
- # check for children needing conversion (#each_expression is recursive)
42
+ def all_children_directly_compatible?
43
+ # note that #each_expression is recursive
50
44
  expression.each_expression do |exp|
51
- case exp.type
52
- when :literal
53
- # surrogate pair substitution needed if astral
54
- next if exp.text.ord <= 0xFFFF
55
- when :set
56
- # conversion needed for nested sets, intersections
57
- next if exp.token.equal?(:range)
58
- when :type
59
- next if TypeConverter::TYPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
60
- when :escape
61
- next if EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
62
- end
63
- return
45
+ return unless child_directly_compatible?(exp)
46
+ end
47
+ end
48
+
49
+ def child_directly_compatible?(exp)
50
+ case exp.type
51
+ when :literal
52
+ # surrogate pair substitution needed if astral
53
+ exp.text.ord <= 0xFFFF
54
+ when :set
55
+ # conversion needed for nested sets, intersections
56
+ exp.token.equal?(:range)
57
+ when :type
58
+ TypeConverter.directly_compatible?(exp)
59
+ when :escape
60
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
64
61
  end
65
- true
62
+ end
63
+
64
+ def casefolding_needed?
65
+ expression.case_insensitive? ^ context.case_insensitive_root
66
+ end
67
+
68
+ def pass_through_with_escaping
69
+ expression.to_s(:base).gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
70
+ end
71
+
72
+ def limit_to_bmp_part_with_warning(content)
73
+ warn_of_unsupported_feature('large astral plane match of set')
74
+ bmp_part = content.bmp_part
75
+ bmp_part.empty? ? drop : bmp_part.to_s(in_brackets: true)
66
76
  end
67
77
  end
68
78
  end
@@ -8,19 +8,19 @@ class JsRegex
8
8
  # Template class implementation.
9
9
  #
10
10
  class TypeConverter < JsRegex::Converter::Base
11
- TYPES_SHARED_BY_RUBY_AND_JS = %i[
12
- digit
13
- nondigit
14
- word
15
- nonword
16
- space
17
- nonspace
18
- ].freeze
19
-
20
11
  HEX_EXPANSION = '[0-9A-Fa-f]'
21
12
  NONHEX_EXPANSION = '[^0-9A-Fa-f]'
22
13
  LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
23
14
 
15
+ def self.directly_compatible?(expression)
16
+ case expression.token
17
+ when :space, :nonspace
18
+ !expression.ascii_classes?
19
+ when :digit, :nondigit, :word, :nonword
20
+ !expression.unicode_classes?
21
+ end
22
+ end
23
+
24
24
  private
25
25
 
26
26
  def convert_data
@@ -28,12 +28,30 @@ class JsRegex
28
28
  when :hex then HEX_EXPANSION
29
29
  when :nonhex then NONHEX_EXPANSION
30
30
  when :linebreak then LINEBREAK_EXPANSION
31
- when *TYPES_SHARED_BY_RUBY_AND_JS
32
- pass_through
31
+ when :digit, :space, :word
32
+ return pass_through if self.class.directly_compatible?(expression)
33
+ set_substitution
34
+ when :nondigit, :nonspace, :nonword
35
+ return pass_through if self.class.directly_compatible?(expression)
36
+ negative_set_substitution
33
37
  else
34
38
  warn_of_unsupported_feature
35
39
  end
36
40
  end
41
+
42
+ def negative_set_substitution
43
+ # ::of_expression returns an inverted set for negative expressions,
44
+ # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
45
+ "[^#{character_set.inversion.bmp_part}]"
46
+ end
47
+
48
+ def set_substitution
49
+ character_set.bmp_part.to_s(in_brackets: true)
50
+ end
51
+
52
+ def character_set
53
+ CharacterSet.of_expression(expression)
54
+ end
37
55
  end
38
56
  end
39
57
  end
@@ -25,32 +25,16 @@ class JsRegex
25
25
  end
26
26
 
27
27
  def conditional_tree_permutations(tree)
28
- all_conditions = conditions(tree)
29
- return [] if all_conditions.empty?
28
+ all_conds = conditions(tree)
29
+ return [] if all_conds.empty?
30
30
 
31
- captured_groups_per_branch = captured_group_count(tree)
31
+ caps_per_branch = captured_group_count(tree)
32
32
 
33
- condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
33
+ condition_permutations(all_conds).map.with_index do |truthy_conds, i|
34
34
  tree_permutation = tree.clone
35
35
  # find referenced groups and conditionals and make one-sided
36
36
  crawl(tree_permutation) do |node|
37
- truthy = truthy_conds.include?(node.reference)
38
-
39
- if node.type.equal?(:captured_group) &&
40
- all_conditions.include?(node.reference)
41
- truthy ? min_quantify(node) : null_quantify(node)
42
- elsif node.type.equal?(:conditional)
43
- branches = node.children[1...-1]
44
- if branches.count == 1
45
- truthy || null_quantify(branches.first)
46
- else
47
- null_quantify(truthy ? branches.last : branches.first)
48
- end
49
- node.update(type: :plain)
50
- elsif node.type.equal?(:backref_num)
51
- new_num = node.children[0].to_i + captured_groups_per_branch * i
52
- node.update(children: [new_num.to_s])
53
- end
37
+ build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
54
38
  end
55
39
  end
56
40
  end
@@ -81,8 +65,40 @@ class JsRegex
81
65
  end
82
66
  end
83
67
 
68
+ def build_permutation(node, all_conds, truthy_conds, caps_per_branch, i)
69
+ truthy = truthy_conds.include?(node.reference)
70
+
71
+ if node.type.equal?(:captured_group) &&
72
+ all_conds.include?(node.reference)
73
+ adapt_referenced_group_to_permutation(node, truthy)
74
+ elsif node.type.equal?(:conditional)
75
+ adapt_conditional_to_permutation(node, truthy)
76
+ elsif node.type.equal?(:backref_num)
77
+ adapt_backref_to_permutation(node, caps_per_branch, i)
78
+ end
79
+ end
80
+
81
+ def adapt_referenced_group_to_permutation(group_node, truthy)
82
+ truthy ? min_quantify(group_node) : null_quantify(group_node)
83
+ end
84
+
85
+ def adapt_conditional_to_permutation(conditional_node, truthy)
86
+ branches = conditional_node.children[1...-1]
87
+ if branches.count == 1
88
+ truthy || null_quantify(branches.first)
89
+ else
90
+ null_quantify(truthy ? branches.last : branches.first)
91
+ end
92
+ conditional_node.update(type: :plain)
93
+ end
94
+
95
+ def adapt_backref_to_permutation(backref_node, caps_per_branch, i)
96
+ new_num = backref_node.children[0].to_i + caps_per_branch * i
97
+ backref_node.update(children: [new_num.to_s])
98
+ end
99
+
84
100
  def min_quantify(node)
85
- return if (qtf = node.quantifier).nil? || qtf.min > 0
101
+ return if guarantees_at_least_one_match?(qtf = node.quantifier)
86
102
 
87
103
  if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
88
104
  node.update(quantifier: nil)
@@ -91,6 +107,10 @@ class JsRegex
91
107
  end
92
108
  end
93
109
 
110
+ def guarantees_at_least_one_match?(quantifier)
111
+ quantifier.nil? || quantifier.min > 0
112
+ end
113
+
94
114
  def null_quantify(node)
95
115
  node.update(quantifier: '{0}')
96
116
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '3.2.0'
4
+ VERSION = '3.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-05-18 00:00:00.000000000 Z
11
+ date: 2019-05-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.2'
19
+ version: '1.3'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.2'
26
+ version: '1.3'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: regexp_parser
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -122,20 +122,6 @@ dependencies:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0.12'
125
- - !ruby/object:Gem::Dependency
126
- name: mutant-rspec
127
- requirement: !ruby/object:Gem::Requirement
128
- requirements:
129
- - - "~>"
130
- - !ruby/object:Gem::Version
131
- version: '0.8'
132
- type: :development
133
- prerelease: false
134
- version_requirements: !ruby/object:Gem::Requirement
135
- requirements:
136
- - - "~>"
137
- - !ruby/object:Gem::Version
138
- version: '0.8'
139
125
  description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
140
126
  care of various incompatibilities and returning warnings for unsolvable differences.
141
127
  email: