js_regex 3.8.0 → 3.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8971658980813740deb03ece3c5af5bfdfd7412f0630fc4a3f172e4c06b11c52
4
- data.tar.gz: bdafa3639a230b1ec1ac4661828050d99339eb18ec1768fa2a6f1b5e69d95f1b
3
+ metadata.gz: 054de046ea7e8e7244c80e8c8385270ffd821f0fdd690b8a15a79ea1c1d2a6f4
4
+ data.tar.gz: d6d3dbd4d191e16008bfd586ff22a10a8240275ac328c0a080af910d99b650a1
5
5
  SHA512:
6
- metadata.gz: 13abdf7b41485194f05cce79751ca60e8b1f9fc864b17f58294650df9f9e485a889b9571d847bf564aa12b709fd572c53f27a5a2900e3dc8bfa765f522b58e62
7
- data.tar.gz: 31941c0d7a4842fdea84d5f649f3df30c54e8da24e6dbf722cff3e48661ae89646117f66f33ee419c0fc0e393b11f284cb565751b4f77aa5eb0bbbc8d38d903d
6
+ metadata.gz: 93e7c0a574cadfb867c598a9adbcbd9b18585204912865e589691246ce879bec7b7d48a02872c599447db3f2feeda80236d08b1030aecd1d6bf0568792ac859a
7
+ data.tar.gz: e766f2ce0305ed0e6b10372d58f07fb0137e51a4a62a607b1f0272ac4b431c820fdb9ca2b8d2ce18de30d4bd33ca6cd8c49846d5c8efa79c081324342b990e87
@@ -13,20 +13,21 @@ class JsRegex
13
13
  require_relative 'target'
14
14
 
15
15
  class << self
16
- def of(input, options: nil, target: Target::ES2009)
16
+ def of(input, options: nil, target: Target::ES2009, fail_fast: false)
17
17
  target = Target.cast(target)
18
- source, warnings, extra_opts = convert_source(input, target)
18
+ source, warnings, extra_opts = convert_source(input, target, fail_fast)
19
19
  options_string = convert_options(input, options, extra_opts)
20
20
  [source, options_string, warnings, target]
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def convert_source(input, target)
25
+ def convert_source(input, target, fail_fast)
26
26
  tree = Regexp::Parser.parse(input)
27
27
  context = Converter::Context.new(
28
28
  case_insensitive_root: tree.i?,
29
29
  target: target,
30
+ fail_fast: fail_fast,
30
31
  )
31
32
  converted_tree = Converter.convert(tree, context)
32
33
  final_tree = SecondPass.call(converted_tree)
@@ -36,7 +37,7 @@ class JsRegex
36
37
  end
37
38
 
38
39
  def convert_options(input, custom_options, required_options)
39
- options = custom_options.to_s.scan(/[gimsuy]/) + required_options
40
+ options = custom_options.to_s.scan(/[dgimsuvy]/) + required_options
40
41
  if input.is_a?(Regexp) && (input.options & Regexp::IGNORECASE).nonzero?
41
42
  options << 'i'
42
43
  end
@@ -10,8 +10,10 @@ class JsRegex
10
10
 
11
11
  def convert_data
12
12
  case subtype
13
- when :bol, :bos then '^'
14
- when :eol, :eos then '$'
13
+ when :bol then convert_bol
14
+ when :bos then '^'
15
+ when :eol then '(?=$|\n)'
16
+ when :eos then '$'
15
17
  when :eos_ob_eol then '(?=\n?$)'
16
18
  when :word_boundary then convert_boundary
17
19
  when :nonword_boundary then convert_nonboundary
@@ -20,6 +22,15 @@ class JsRegex
20
22
  end
21
23
  end
22
24
 
25
+ def convert_bol
26
+ if context.es_2018_or_higher?
27
+ '(?<=^|\n(?!$))'
28
+ else
29
+ # TODO: warn in v4.0.0, or drop ES2009 & ES2015 support
30
+ '^'
31
+ end
32
+ end
33
+
23
34
  def convert_boundary
24
35
  if context.es_2018_or_higher? && context.enable_u_option
25
36
  BOUNDARY_EXPANSION
@@ -29,7 +29,8 @@ class JsRegex
29
29
 
30
30
  def convert_to_plain_num_ref
31
31
  position = new_position
32
- Node.new("\\#{position}", reference: position, type: :backref)
32
+ text = "\\#{position}#{'(?:)' if expression.x?}"
33
+ Node.new(text, reference: position, type: :backref)
33
34
  end
34
35
 
35
36
  def new_position
@@ -41,14 +42,21 @@ class JsRegex
41
42
  end
42
43
 
43
44
  def convert_call
44
- if expression.respond_to?(:number) && expression.number.equal?(0)
45
- return warn_of_unsupported_feature('whole-pattern recursion')
45
+ if context.recursions(expression) >= 5
46
+ warn_of("Recursion for '#{expression}' curtailed at 5 levels")
47
+ return drop
46
48
  end
49
+
50
+ context.count_recursion(expression)
47
51
  context.increment_local_capturing_group_count
48
52
  target_copy = expression.referenced_expression.unquantified_clone
49
53
  # avoid "Duplicate capture group name" error in JS
50
54
  target_copy.token = :capture if target_copy.is?(:named, :group)
51
- convert_expression(target_copy)
55
+ context.start_subexp_recursion
56
+ result = convert_expression(target_copy)
57
+ context.end_subexp_recursion
58
+ # wrap in group if it is a full-pattern recursion
59
+ expression.reference == 0 ? Node.new('(?:', result, ')') : result
52
60
  end
53
61
  end
54
62
  end
@@ -63,7 +63,11 @@ class JsRegex
63
63
  end
64
64
 
65
65
  def warn_of(text)
66
- context.warnings << text
66
+ if context.fail_fast
67
+ raise ConversionError, text.sub(/^Dropped /, '')
68
+ else
69
+ context.warnings << text
70
+ end
67
71
  end
68
72
 
69
73
  def drop
@@ -75,8 +79,9 @@ class JsRegex
75
79
  number = context.capturing_group_count + 1
76
80
  backref_node = Node.new("\\#{number}", reference: number, type: :backref)
77
81
  context.increment_local_capturing_group_count
78
- # an empty passive group (?:) is appended as literal digits may follow
79
- Node.new('(?=(', *content, '))', backref_node, '(?:)')
82
+ # The surrounding group is added so that quantifiers apply to the whole.
83
+ # Without it, `(?:)` would need to be appended as literal digits may follow.
84
+ Node.new('(?:(?=(', *content, '))', backref_node, ')')
80
85
  end
81
86
  end
82
87
  end
@@ -8,14 +8,18 @@ class JsRegex
8
8
  class Context
9
9
  attr_reader :capturing_group_count,
10
10
  :case_insensitive_root,
11
+ :fail_fast,
11
12
  :in_atomic_group,
13
+ :in_subexp_recursion,
12
14
  :warnings
13
15
 
14
- def initialize(case_insensitive_root: false, target: nil)
16
+ def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
15
17
  self.added_capturing_groups_after_group = Hash.new(0)
16
18
  self.capturing_group_count = 0
17
- self.warnings = []
19
+ self.fail_fast = fail_fast
20
+ self.recursions_per_expression = {}
18
21
  self.required_options_hash = {}
22
+ self.warnings = []
19
23
 
20
24
  self.case_insensitive_root = case_insensitive_root
21
25
  self.target = target
@@ -39,6 +43,10 @@ class JsRegex
39
43
  required_options_hash['u'] = true
40
44
  end
41
45
 
46
+ def u?
47
+ required_options_hash['u']
48
+ end
49
+
42
50
  def required_options
43
51
  required_options_hash.keys
44
52
  end
@@ -62,6 +70,26 @@ class JsRegex
62
70
  capture_group
63
71
  end
64
72
 
73
+ def recursions(exp)
74
+ recursions_per_expression[recursion_id(exp)] || 0
75
+ end
76
+
77
+ def count_recursion(exp)
78
+ recursions_per_expression[recursion_id(exp)] = recursions(exp) + 1
79
+ end
80
+
81
+ def recursion_id(exp)
82
+ [exp.class, exp.starts_at]
83
+ end
84
+
85
+ def start_subexp_recursion
86
+ self.in_subexp_recursion = true
87
+ end
88
+
89
+ def end_subexp_recursion
90
+ self.in_subexp_recursion = false
91
+ end
92
+
65
93
  # takes and returns 1-indexed group positions.
66
94
  # new is different from old if capturing groups were added in between.
67
95
  def new_capturing_group_position(old_position)
@@ -79,12 +107,15 @@ class JsRegex
79
107
  private
80
108
 
81
109
  attr_accessor :added_capturing_groups_after_group,
110
+ :recursions_per_expression,
82
111
  :required_options_hash,
83
112
  :target
84
113
 
85
114
  attr_writer :capturing_group_count,
86
115
  :case_insensitive_root,
116
+ :fail_fast,
87
117
  :in_atomic_group,
118
+ :in_subexp_recursion,
88
119
  :warnings
89
120
 
90
121
  def total_added_capturing_groups
@@ -42,10 +42,10 @@ class JsRegex
42
42
  unicode_escape_codepoint
43
43
  when :literal
44
44
  LiteralConverter.convert_data(expression.char, context)
45
+ when :bell, :escape, :hex, :octal
46
+ hex_escape_codepoint
45
47
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
46
48
  pass_through
47
- when :bell, :escape, :octal
48
- hex_escape_codepoint
49
49
  else
50
50
  warn_of_unsupported_feature
51
51
  end
@@ -84,7 +84,7 @@ class JsRegex
84
84
  tail = opts[:tail] || ')'
85
85
  return Node.new(*wrap(head, tail)) if opts[:capturing].equal?(false)
86
86
 
87
- context.capture_group
87
+ context.capture_group unless context.in_subexp_recursion
88
88
  ref = expression.number
89
89
  Node.new(*wrap(head, tail), reference: ref, type: :captured_group)
90
90
  end
@@ -6,11 +6,12 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class LiteralConverter < JsRegex::Converter::Base
9
- class << self
10
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
9
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
10
+ LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
11
11
 
12
+ class << self
12
13
  def convert_data(data, context)
13
- if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
+ if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
15
  if context.enable_u_option
15
16
  escape_incompatible_bmp_literals(data)
16
17
  else
@@ -23,7 +24,7 @@ class JsRegex
23
24
 
24
25
  def convert_astral_data(data)
25
26
  data.each_char.each_with_object(Node.new) do |char, node|
26
- if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN
27
+ if char.ord > 0xFFFF
27
28
  node << surrogate_substitution_for(char)
28
29
  else
29
30
  node << escape_incompatible_bmp_literals(char)
@@ -31,8 +32,12 @@ class JsRegex
31
32
  end
32
33
  end
33
34
 
35
+ ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
36
+ .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
37
+ .merge('/' => '\\/')
38
+
34
39
  def escape_incompatible_bmp_literals(data)
35
- data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) }
40
+ data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
36
41
  end
37
42
 
38
43
  private
@@ -17,54 +17,92 @@ class JsRegex
17
17
  private
18
18
 
19
19
  def convert_data
20
- return pass_through_with_escaping if directly_compatible?
20
+ simple_conversion || full_recalculation
21
+ end
21
22
 
22
- content = CharacterSet.of_expression(expression)
23
- if expression.case_insensitive? && !context.case_insensitive_root
24
- content = content.case_insensitive
25
- elsif !expression.case_insensitive? && context.case_insensitive_root
26
- warn_of_unsupported_feature('nested case-sensitive set')
27
- end
23
+ def simple_conversion
24
+ return false if casefolding_needed?
28
25
 
29
- if context.es_2015_or_higher?
30
- context.enable_u_option if content.astral_part?
31
- content.to_s(format: 'es6', in_brackets: true)
32
- else
33
- content.to_s_with_surrogate_ranges
26
+ result = "[#{'^' if expression.negative?}".dup
27
+
28
+ expression.expressions.each do |subexp|
29
+ return false unless (child_res = simple_convert_child(subexp))
30
+
31
+ result << child_res.to_s
34
32
  end
35
- end
36
33
 
37
- def directly_compatible?
38
- all_children_directly_compatible? && !casefolding_needed?
34
+ result << ']'
39
35
  end
40
36
 
41
- def all_children_directly_compatible?
42
- # note that #each_expression is recursive
43
- expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
37
+ def casefolding_needed?
38
+ expression.case_insensitive? ^ context.case_insensitive_root
44
39
  end
45
40
 
46
- def child_directly_compatible?(exp)
41
+ def simple_convert_child(exp)
47
42
  case exp.type
48
43
  when :literal
49
- # surrogate pair substitution needed on ES2009 if astral
50
- exp.text.ord <= 0xFFFF || context.enable_u_option
44
+ simple_convert_literal_child(exp)
51
45
  when :set
52
- # conversion needed for nested sets, intersections
53
- exp.token.equal?(:range)
46
+ # full conversion is needed for nested sets and intersections
47
+ exp.token.equal?(:range) && exp.expressions.map do |op|
48
+ simple_convert_child(op) or return false
49
+ end.join('-')
54
50
  when :type
55
- TypeConverter.directly_compatible?(exp)
51
+ TypeConverter.directly_compatible?(exp, context) &&
52
+ exp.text
56
53
  when :escape
57
- EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
54
+ return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text)
55
+
56
+ case exp.token
57
+ when *CONVERTIBLE_ESCAPE_TOKENS
58
+ EscapeConverter.new.convert(exp, context)
59
+ when :literal
60
+ exp.char.ord <= 0xFFFF &&
61
+ LiteralConverter.escape_incompatible_bmp_literals(exp.char)
62
+ end
58
63
  end
59
64
  end
60
65
 
61
- def casefolding_needed?
62
- expression.case_insensitive? ^ context.case_insensitive_root
66
+ def simple_convert_literal_child(exp)
67
+ if !context.u? &&
68
+ exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
69
+ !context.enable_u_option
70
+ false
71
+ elsif SET_LITERALS_REQUIRING_ESCAPE_PATTERN.match?(exp.text)
72
+ "\\#{exp.text}"
73
+ else
74
+ LiteralConverter.escape_incompatible_bmp_literals(exp.text)
75
+ end
76
+ end
77
+
78
+ SET_LITERALS_REQUIRING_ESCAPE_PATTERN = Regexp.union(%w<( ) [ ] { } / - |>)
79
+ SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
80
+ CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
81
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
82
+
83
+ def full_recalculation
84
+ # Fetch codepoints as if the set was case-sensitive, then re-add
85
+ # case-insensitivity if needed.
86
+ # This way we preserve the casing of the original set in cases where the
87
+ # whole regexp is case-insensitive, e.g. /[ABc]/i => /[ABc]/i.
88
+ content = original_case_character_set
89
+ if expression.case_insensitive? && !context.case_insensitive_root
90
+ content = content.case_insensitive
91
+ elsif !expression.case_insensitive? && context.case_insensitive_root
92
+ warn_of_unsupported_feature('nested case-sensitive set')
93
+ end
94
+ if context.es_2015_or_higher?
95
+ context.enable_u_option if content.astral_part?
96
+ content.to_s(format: 'es6', in_brackets: true)
97
+ else
98
+ content.to_s_with_surrogate_ranges
99
+ end
63
100
  end
64
101
 
65
- def pass_through_with_escaping
66
- string = expression.to_s(:base)
67
- LiteralConverter.escape_incompatible_bmp_literals(string)
102
+ def original_case_character_set
103
+ neutral_set = expression.dup
104
+ neutral_set.each_expression(true) { |exp| exp.options[:i] = false }
105
+ CharacterSet.of_expression(neutral_set)
68
106
  end
69
107
  end
70
108
  end
@@ -6,13 +6,16 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class TypeConverter < JsRegex::Converter::Base
9
- HEX_EXPANSION = '[0-9A-Fa-f]'
10
- NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
- ES2018_HEX_EXPANSION = '\p{AHex}'
12
- ES2018_NONHEX_EXPANSION = '\P{AHex}'
13
- LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
9
+ HEX_EXPANSION = '[0-9A-Fa-f]'
10
+ NONHEX_EXPANSION = '[^0-9A-Fa-f]'
11
+ I_MODE_HEX_EXPANSION = '[0-9A-F]'
12
+ I_MODE_NONHEX_EXPANSION = '[^0-9A-F]'
13
+ ES2018_HEX_EXPANSION = '\p{AHex}'
14
+ ES2018_NONHEX_EXPANSION = '\P{AHex}'
15
+ ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
16
+ LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
14
17
 
15
- def self.directly_compatible?(expression)
18
+ def self.directly_compatible?(expression, _context = nil)
16
19
  case expression.token
17
20
  when :space, :nonspace
18
21
  !expression.ascii_classes?
@@ -27,7 +30,8 @@ class JsRegex
27
30
  case subtype
28
31
  when :hex then hex_expansion
29
32
  when :nonhex then nonhex_expansion
30
- when :linebreak then LINEBREAK_EXPANSION
33
+ when :linebreak then linebreak_expansion
34
+ when :xgrapheme then xgrapheme
31
35
  when :digit, :space, :word
32
36
  return pass_through if self.class.directly_compatible?(expression)
33
37
  set_substitution
@@ -42,6 +46,8 @@ class JsRegex
42
46
  def hex_expansion
43
47
  if context.es_2018_or_higher? && context.enable_u_option
44
48
  ES2018_HEX_EXPANSION
49
+ elsif context.case_insensitive_root
50
+ I_MODE_HEX_EXPANSION
45
51
  else
46
52
  HEX_EXPANSION
47
53
  end
@@ -50,11 +56,17 @@ class JsRegex
50
56
  def nonhex_expansion
51
57
  if context.es_2018_or_higher? && context.enable_u_option
52
58
  ES2018_NONHEX_EXPANSION
59
+ elsif context.case_insensitive_root
60
+ I_MODE_NONHEX_EXPANSION
53
61
  else
54
62
  NONHEX_EXPANSION
55
63
  end
56
64
  end
57
65
 
66
+ def linebreak_expansion
67
+ wrap_in_backrefed_lookahead(LINEBREAK_EXPANSION)
68
+ end
69
+
58
70
  def negative_set_substitution
59
71
  # ::of_expression returns an inverted set for negative expressions,
60
72
  # so we need to un-invert before wrapping in [^ and ]. Kinda lame.
@@ -68,6 +80,14 @@ class JsRegex
68
80
  def character_set
69
81
  CharacterSet.of_expression(expression)
70
82
  end
83
+
84
+ def xgrapheme
85
+ if context.es_2018_or_higher? && context.enable_u_option
86
+ wrap_in_backrefed_lookahead(ES2018_XGRAPHEME_EXPANSION)
87
+ else
88
+ warn_of_unsupported_feature
89
+ end
90
+ end
71
91
  end
72
92
  end
73
93
  end
@@ -8,7 +8,7 @@ class JsRegex
8
8
  def self.cast(arg)
9
9
  return ES2009 if arg.nil?
10
10
 
11
- normalized_arg = arg.to_s.upcase
11
+ normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES')
12
12
  return normalized_arg if SUPPORTED.include?(normalized_arg)
13
13
 
14
14
  raise ArgumentError.new(
@@ -1,3 +1,3 @@
1
1
  class JsRegex
2
- VERSION = '3.8.0'
2
+ VERSION = '3.13.0'
3
3
  end
data/lib/js_regex.rb CHANGED
@@ -30,14 +30,17 @@ class JsRegex
30
30
  "/#{source.empty? ? '(?:)' : source}/#{options}"
31
31
  end
32
32
 
33
+ # @raise JsRegex::ConversionError
33
34
  def self.new!(ruby_regex, **kwargs)
34
- js_regex = new(ruby_regex, **kwargs)
35
- if js_regex.warnings.any?
36
- raise StandardError.new(
37
- "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
38
- js_regex.warnings.join("\n")
39
- ).extend(JsRegex::Error)
40
- end
41
- js_regex
35
+ new(ruby_regex, fail_fast: true, **kwargs)
42
36
  end
37
+
38
+ def self.compatible?(ruby_regex, **kwargs)
39
+ new!(ruby_regex, **kwargs)
40
+ true
41
+ rescue ConversionError
42
+ false
43
+ end
44
+
45
+ ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
43
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.8.0
4
+ version: 3.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-09-25 00:00:00.000000000 Z
11
+ date: 2025-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '2.5'
33
+ version: '2.10'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '2.5'
40
+ version: '2.10'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: regexp_property_values
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -105,7 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
105
105
  - !ruby/object:Gem::Version
106
106
  version: '0'
107
107
  requirements: []
108
- rubygems_version: 3.4.0.dev
108
+ rubygems_version: 3.5.22
109
109
  signing_key:
110
110
  specification_version: 4
111
111
  summary: Converts Ruby regexes to JavaScript regexes.