js_regex 3.9.0 → 3.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4710feb6c1da4ea2665ec125ab16bf92c35117ee028b2c6986ec9ebe61cc5eea
4
- data.tar.gz: 7152f08cbea846150bc370cb2aeb2d9a99f7e7c18b8b448e080845a54fbbe05d
3
+ metadata.gz: 6c55b0dded778c8851267d5d6d24c4a1afcbdb64e6d28ef3e48da5ba8acaf4d8
4
+ data.tar.gz: 71cf3ead5808405c0e32a2fa2dd63a8efa6f5f02df9ef707c12d4238087121a6
5
5
  SHA512:
6
- metadata.gz: cee471265231e58c04d682a707184c5d8a468481eff5f2e0df498b7323fe75ee245b95e1d2ca69016fa83e9378faecaa95541e70af07711773bcd7cbe4bdae57
7
- data.tar.gz: '0924f4a4f73f2786c8dcac578588f04fb3c6d86913ce6c8f1ac74169e8e226f6e4b993ee43a100e7e913ade4062cb18d6e53ac87573aeaabe3780c6f75beb0d0'
6
+ metadata.gz: d5cb89a802b4c90382146f6a49e2c0fbefa5aed7580d0698e8bd392e05ed172880a713dc619b484e9fa0f28689b1b03c35eb08f9efc6673895ca1a450985723e
7
+ data.tar.gz: c0c8fda9db5419e5d05b43036190d1fb3d9e44d32c7f57598f21cb6c16d444208dae18ed6cdeb871357188619113475f83deb70c4eef2babfc44a37c27cbbbb9
@@ -13,20 +13,21 @@ class JsRegex
13
13
  require_relative 'target'
14
14
 
15
15
  class << self
16
- def of(input, options: nil, target: Target::ES2009)
16
+ def of(input, options: nil, target: Target::ES2009, fail_fast: false)
17
17
  target = Target.cast(target)
18
- source, warnings, extra_opts = convert_source(input, target)
18
+ source, warnings, extra_opts = convert_source(input, target, fail_fast)
19
19
  options_string = convert_options(input, options, extra_opts)
20
20
  [source, options_string, warnings, target]
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def convert_source(input, target)
25
+ def convert_source(input, target, fail_fast)
26
26
  tree = Regexp::Parser.parse(input)
27
27
  context = Converter::Context.new(
28
28
  case_insensitive_root: tree.i?,
29
29
  target: target,
30
+ fail_fast: fail_fast,
30
31
  )
31
32
  converted_tree = Converter.convert(tree, context)
32
33
  final_tree = SecondPass.call(converted_tree)
@@ -63,7 +63,11 @@ class JsRegex
63
63
  end
64
64
 
65
65
  def warn_of(text)
66
- context.warnings << text
66
+ if context.fail_fast
67
+ raise ConversionError, text.sub(/^Dropped /, '')
68
+ else
69
+ context.warnings << text
70
+ end
67
71
  end
68
72
 
69
73
  def drop
@@ -8,15 +8,17 @@ class JsRegex
8
8
  class Context
9
9
  attr_reader :capturing_group_count,
10
10
  :case_insensitive_root,
11
+ :fail_fast,
11
12
  :in_atomic_group,
12
13
  :warnings
13
14
 
14
- def initialize(case_insensitive_root: false, target: nil)
15
+ def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
15
16
  self.added_capturing_groups_after_group = Hash.new(0)
16
17
  self.capturing_group_count = 0
18
+ self.fail_fast = fail_fast
17
19
  self.recursions_per_expression = {}
18
- self.warnings = []
19
20
  self.required_options_hash = {}
21
+ self.warnings = []
20
22
 
21
23
  self.case_insensitive_root = case_insensitive_root
22
24
  self.target = target
@@ -40,6 +42,10 @@ class JsRegex
40
42
  required_options_hash['u'] = true
41
43
  end
42
44
 
45
+ def u?
46
+ required_options_hash['u']
47
+ end
48
+
43
49
  def required_options
44
50
  required_options_hash.keys
45
51
  end
@@ -98,6 +104,7 @@ class JsRegex
98
104
 
99
105
  attr_writer :capturing_group_count,
100
106
  :case_insensitive_root,
107
+ :fail_fast,
101
108
  :in_atomic_group,
102
109
  :warnings
103
110
 
@@ -6,11 +6,12 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class LiteralConverter < JsRegex::Converter::Base
9
- class << self
10
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
9
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
10
+ LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
11
11
 
12
+ class << self
12
13
  def convert_data(data, context)
13
- if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
+ if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
15
  if context.enable_u_option
15
16
  escape_incompatible_bmp_literals(data)
16
17
  else
@@ -23,7 +24,7 @@ class JsRegex
23
24
 
24
25
  def convert_astral_data(data)
25
26
  data.each_char.each_with_object(Node.new) do |char, node|
26
- if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN
27
+ if char.ord > 0xFFFF
27
28
  node << surrogate_substitution_for(char)
28
29
  else
29
30
  node << escape_incompatible_bmp_literals(char)
@@ -31,8 +32,12 @@ class JsRegex
31
32
  end
32
33
  end
33
34
 
35
+ ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
36
+ .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
37
+ .merge('/' => '\\/')
38
+
34
39
  def escape_incompatible_bmp_literals(data)
35
- data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) }
40
+ data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
36
41
  end
37
42
 
38
43
  private
@@ -17,54 +17,73 @@ class JsRegex
17
17
  private
18
18
 
19
19
  def convert_data
20
- return pass_through_with_escaping if directly_compatible?
20
+ simple_conversion || full_recalculation
21
+ end
21
22
 
22
- content = CharacterSet.of_expression(expression)
23
- if expression.case_insensitive? && !context.case_insensitive_root
24
- content = content.case_insensitive
25
- elsif !expression.case_insensitive? && context.case_insensitive_root
26
- warn_of_unsupported_feature('nested case-sensitive set')
27
- end
23
+ def simple_conversion
24
+ return false if casefolding_needed?
28
25
 
29
- if context.es_2015_or_higher?
30
- context.enable_u_option if content.astral_part?
31
- content.to_s(format: 'es6', in_brackets: true)
32
- else
33
- content.to_s_with_surrogate_ranges
26
+ result = "[#{'^' if expression.negative?}".dup
27
+
28
+ expression.expressions.each do |subexp|
29
+ return false unless (child_res = simple_convert_child(subexp))
30
+
31
+ result << child_res.to_s
34
32
  end
35
- end
36
33
 
37
- def directly_compatible?
38
- all_children_directly_compatible? && !casefolding_needed?
34
+ result << ']'
39
35
  end
40
36
 
41
- def all_children_directly_compatible?
42
- # note that #each_expression is recursive
43
- expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
37
+ def casefolding_needed?
38
+ expression.case_insensitive? ^ context.case_insensitive_root
44
39
  end
45
40
 
46
- def child_directly_compatible?(exp)
41
+ def simple_convert_child(exp)
47
42
  case exp.type
48
43
  when :literal
49
- # surrogate pair substitution needed on ES2009 if astral
50
- exp.text.ord <= 0xFFFF || context.enable_u_option
44
+ return false if !context.u? &&
45
+ exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
46
+ !context.enable_u_option
47
+
48
+ LiteralConverter.escape_incompatible_bmp_literals(exp.text)
51
49
  when :set
52
- # conversion needed for nested sets, intersections
53
- exp.token.equal?(:range)
50
+ # full conversion is needed for nested sets and intersections
51
+ exp.token.equal?(:range) && exp.expressions.map do |op|
52
+ simple_convert_child(op) or return false
53
+ end.join('-')
54
54
  when :type
55
- TypeConverter.directly_compatible?(exp)
55
+ TypeConverter.directly_compatible?(exp, context) &&
56
+ exp.text
56
57
  when :escape
57
- EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
58
+ return exp.text if SET_SPECIFIC_ESCAPES_PATTERN.match?(exp.text)
59
+
60
+ case exp.token
61
+ when *CONVERTIBLE_ESCAPE_TOKENS
62
+ EscapeConverter.new.convert(exp, context)
63
+ when :literal
64
+ exp.char.ord <= 0xFFFF &&
65
+ LiteralConverter.escape_incompatible_bmp_literals(exp.char)
66
+ end
58
67
  end
59
68
  end
60
69
 
61
- def casefolding_needed?
62
- expression.case_insensitive? ^ context.case_insensitive_root
63
- end
70
+ SET_SPECIFIC_ESCAPES_PATTERN = /[\^\-]/
71
+ CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
72
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
64
73
 
65
- def pass_through_with_escaping
66
- string = expression.to_s(:base)
67
- LiteralConverter.escape_incompatible_bmp_literals(string)
74
+ def full_recalculation
75
+ content = CharacterSet.of_expression(expression)
76
+ if expression.case_insensitive? && !context.case_insensitive_root
77
+ content = content.case_insensitive
78
+ elsif !expression.case_insensitive? && context.case_insensitive_root
79
+ warn_of_unsupported_feature('nested case-sensitive set')
80
+ end
81
+ if context.es_2015_or_higher?
82
+ context.enable_u_option if content.astral_part?
83
+ content.to_s(format: 'es6', in_brackets: true)
84
+ else
85
+ content.to_s_with_surrogate_ranges
86
+ end
68
87
  end
69
88
  end
70
89
  end
@@ -13,7 +13,7 @@ class JsRegex
13
13
  ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
14
14
  LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
15
15
 
16
- def self.directly_compatible?(expression)
16
+ def self.directly_compatible?(expression, _context = nil)
17
17
  case expression.token
18
18
  when :space, :nonspace
19
19
  !expression.ascii_classes?
@@ -8,7 +8,7 @@ class JsRegex
8
8
  def self.cast(arg)
9
9
  return ES2009 if arg.nil?
10
10
 
11
- normalized_arg = arg.to_s.upcase
11
+ normalized_arg = arg.to_s.upcase.sub(/^(ECMASCRIPT|ES|JAVASCRIPT|JS)? ?/, 'ES')
12
12
  return normalized_arg if SUPPORTED.include?(normalized_arg)
13
13
 
14
14
  raise ArgumentError.new(
@@ -1,3 +1,3 @@
1
1
  class JsRegex
2
- VERSION = '3.9.0'
2
+ VERSION = '3.11.0'
3
3
  end
data/lib/js_regex.rb CHANGED
@@ -30,14 +30,17 @@ class JsRegex
30
30
  "/#{source.empty? ? '(?:)' : source}/#{options}"
31
31
  end
32
32
 
33
+ # @raise JsRegex::ConversionError
33
34
  def self.new!(ruby_regex, **kwargs)
34
- js_regex = new(ruby_regex, **kwargs)
35
- if js_regex.warnings.any?
36
- raise StandardError.new(
37
- "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
38
- js_regex.warnings.join("\n")
39
- ).extend(JsRegex::Error)
40
- end
41
- js_regex
35
+ new(ruby_regex, fail_fast: true, **kwargs)
42
36
  end
37
+
38
+ def self.compatible?(ruby_regex, **kwargs)
39
+ new!(ruby_regex, **kwargs)
40
+ true
41
+ rescue ConversionError
42
+ false
43
+ end
44
+
45
+ ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
43
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.0
4
+ version: 3.11.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-24 00:00:00.000000000 Z
11
+ date: 2023-03-23 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set