js_regex 3.9.0 → 3.10.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4710feb6c1da4ea2665ec125ab16bf92c35117ee028b2c6986ec9ebe61cc5eea
4
- data.tar.gz: 7152f08cbea846150bc370cb2aeb2d9a99f7e7c18b8b448e080845a54fbbe05d
3
+ metadata.gz: d173eb73a162ff8217d802e307396f07560d040afdccced0a4eb6ecfffc8b03c
4
+ data.tar.gz: 7067783f86ada3bdf592db5b9507127c7531437ade9db9c35b09578758db30b8
5
5
  SHA512:
6
- metadata.gz: cee471265231e58c04d682a707184c5d8a468481eff5f2e0df498b7323fe75ee245b95e1d2ca69016fa83e9378faecaa95541e70af07711773bcd7cbe4bdae57
7
- data.tar.gz: '0924f4a4f73f2786c8dcac578588f04fb3c6d86913ce6c8f1ac74169e8e226f6e4b993ee43a100e7e913ade4062cb18d6e53ac87573aeaabe3780c6f75beb0d0'
6
+ metadata.gz: 3de515b4bfa03c8cefbbe5924adb628fb1029f65b66c50931f70105a8617c96c52402fc6ffb43fe7fc949741a4067d6180c7108f8fa36cbdd23754168e9829a6
7
+ data.tar.gz: 544e70d9a700e145e74e91758194f41a79d56bbf4655e8d80ef8340a3c6bf3f8695b05057ca02e930461751bacea4a5aeaa6fbb4fb5908129f059fa3d908f077
@@ -13,20 +13,21 @@ class JsRegex
13
13
  require_relative 'target'
14
14
 
15
15
  class << self
16
- def of(input, options: nil, target: Target::ES2009)
16
+ def of(input, options: nil, target: Target::ES2009, fail_fast: false)
17
17
  target = Target.cast(target)
18
- source, warnings, extra_opts = convert_source(input, target)
18
+ source, warnings, extra_opts = convert_source(input, target, fail_fast)
19
19
  options_string = convert_options(input, options, extra_opts)
20
20
  [source, options_string, warnings, target]
21
21
  end
22
22
 
23
23
  private
24
24
 
25
- def convert_source(input, target)
25
+ def convert_source(input, target, fail_fast)
26
26
  tree = Regexp::Parser.parse(input)
27
27
  context = Converter::Context.new(
28
28
  case_insensitive_root: tree.i?,
29
29
  target: target,
30
+ fail_fast: fail_fast,
30
31
  )
31
32
  converted_tree = Converter.convert(tree, context)
32
33
  final_tree = SecondPass.call(converted_tree)
@@ -63,7 +63,11 @@ class JsRegex
63
63
  end
64
64
 
65
65
  def warn_of(text)
66
- context.warnings << text
66
+ if context.fail_fast
67
+ raise ConversionError, text.sub(/^Dropped /, '')
68
+ else
69
+ context.warnings << text
70
+ end
67
71
  end
68
72
 
69
73
  def drop
@@ -8,15 +8,17 @@ class JsRegex
8
8
  class Context
9
9
  attr_reader :capturing_group_count,
10
10
  :case_insensitive_root,
11
+ :fail_fast,
11
12
  :in_atomic_group,
12
13
  :warnings
13
14
 
14
- def initialize(case_insensitive_root: false, target: nil)
15
+ def initialize(case_insensitive_root: false, fail_fast: false, target: nil)
15
16
  self.added_capturing_groups_after_group = Hash.new(0)
16
17
  self.capturing_group_count = 0
18
+ self.fail_fast = fail_fast
17
19
  self.recursions_per_expression = {}
18
- self.warnings = []
19
20
  self.required_options_hash = {}
21
+ self.warnings = []
20
22
 
21
23
  self.case_insensitive_root = case_insensitive_root
22
24
  self.target = target
@@ -40,6 +42,10 @@ class JsRegex
40
42
  required_options_hash['u'] = true
41
43
  end
42
44
 
45
+ def u?
46
+ required_options_hash['u']
47
+ end
48
+
43
49
  def required_options
44
50
  required_options_hash.keys
45
51
  end
@@ -98,6 +104,7 @@ class JsRegex
98
104
 
99
105
  attr_writer :capturing_group_count,
100
106
  :case_insensitive_root,
107
+ :fail_fast,
101
108
  :in_atomic_group,
102
109
  :warnings
103
110
 
@@ -6,11 +6,12 @@ class JsRegex
6
6
  # Template class implementation.
7
7
  #
8
8
  class LiteralConverter < JsRegex::Converter::Base
9
- class << self
10
- ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
9
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{10FFFF}]/
10
+ LITERAL_REQUIRING_ESCAPE_PATTERN = /[\/\f\n\r\t\v]/
11
11
 
12
+ class << self
12
13
  def convert_data(data, context)
13
- if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
+ if !context.u? && data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
14
15
  if context.enable_u_option
15
16
  escape_incompatible_bmp_literals(data)
16
17
  else
@@ -23,7 +24,7 @@ class JsRegex
23
24
 
24
25
  def convert_astral_data(data)
25
26
  data.each_char.each_with_object(Node.new) do |char, node|
26
- if char =~ ASTRAL_PLANE_CODEPOINT_PATTERN
27
+ if char.ord > 0xFFFF
27
28
  node << surrogate_substitution_for(char)
28
29
  else
29
30
  node << escape_incompatible_bmp_literals(char)
@@ -31,8 +32,12 @@ class JsRegex
31
32
  end
32
33
  end
33
34
 
35
+ ESCAPES = Hash.new { |h, k| raise KeyError, "#{h}[#{k.inspect}]" }
36
+ .merge("\f\n\r\t\v".chars.to_h { |c| [c, Regexp.escape(c)] })
37
+ .merge('/' => '\\/')
38
+
34
39
  def escape_incompatible_bmp_literals(data)
35
- data.gsub('/', '\\/').gsub(/[\f\n\r\t]/) { |lit| Regexp.escape(lit) }
40
+ data.gsub(LITERAL_REQUIRING_ESCAPE_PATTERN, ESCAPES)
36
41
  end
37
42
 
38
43
  private
@@ -17,54 +17,70 @@ class JsRegex
17
17
  private
18
18
 
19
19
  def convert_data
20
- return pass_through_with_escaping if directly_compatible?
20
+ simple_conversion || full_recalculation
21
+ end
21
22
 
22
- content = CharacterSet.of_expression(expression)
23
- if expression.case_insensitive? && !context.case_insensitive_root
24
- content = content.case_insensitive
25
- elsif !expression.case_insensitive? && context.case_insensitive_root
26
- warn_of_unsupported_feature('nested case-sensitive set')
27
- end
23
+ def simple_conversion
24
+ return false if casefolding_needed?
28
25
 
29
- if context.es_2015_or_higher?
30
- context.enable_u_option if content.astral_part?
31
- content.to_s(format: 'es6', in_brackets: true)
32
- else
33
- content.to_s_with_surrogate_ranges
26
+ result = "[#{'^' if expression.negative?}".dup
27
+
28
+ expression.expressions.each do |subexp|
29
+ return false unless (child_res = simple_convert_child(subexp))
30
+
31
+ result << child_res.to_s
34
32
  end
35
- end
36
33
 
37
- def directly_compatible?
38
- all_children_directly_compatible? && !casefolding_needed?
34
+ result << ']'
39
35
  end
40
36
 
41
- def all_children_directly_compatible?
42
- # note that #each_expression is recursive
43
- expression.each_expression.all? { |ch| child_directly_compatible?(ch) }
37
+ def casefolding_needed?
38
+ expression.case_insensitive? ^ context.case_insensitive_root
44
39
  end
45
40
 
46
- def child_directly_compatible?(exp)
41
+ def simple_convert_child(exp)
47
42
  case exp.type
48
43
  when :literal
49
- # surrogate pair substitution needed on ES2009 if astral
50
- exp.text.ord <= 0xFFFF || context.enable_u_option
44
+ return false if !context.u? &&
45
+ exp.text =~ LiteralConverter::ASTRAL_PLANE_CODEPOINT_PATTERN &&
46
+ !context.enable_u_option
47
+
48
+ LiteralConverter.escape_incompatible_bmp_literals(exp.text)
51
49
  when :set
52
- # conversion needed for nested sets, intersections
53
- exp.token.equal?(:range)
50
+ # full conversion is needed for nested sets and intersections
51
+ exp.token.equal?(:range) && exp.expressions.map do |op|
52
+ simple_convert_child(op) or return false
53
+ end.join('-')
54
54
  when :type
55
- TypeConverter.directly_compatible?(exp)
55
+ TypeConverter.directly_compatible?(exp, context) &&
56
+ exp.text
56
57
  when :escape
57
- EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(exp.token)
58
+ case exp.token
59
+ when *CONVERTIBLE_ESCAPE_TOKENS
60
+ EscapeConverter.new.convert(exp, context)
61
+ when :literal
62
+ exp.char.ord <= 0xFFFF &&
63
+ LiteralConverter.escape_incompatible_bmp_literals(exp.char)
64
+ end
58
65
  end
59
66
  end
60
67
 
61
- def casefolding_needed?
62
- expression.case_insensitive? ^ context.case_insensitive_root
63
- end
68
+ CONVERTIBLE_ESCAPE_TOKENS = %i[control meta_sequence bell escape octal] +
69
+ EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS
64
70
 
65
- def pass_through_with_escaping
66
- string = expression.to_s(:base)
67
- LiteralConverter.escape_incompatible_bmp_literals(string)
71
+ def full_recalculation
72
+ content = CharacterSet.of_expression(expression)
73
+ if expression.case_insensitive? && !context.case_insensitive_root
74
+ content = content.case_insensitive
75
+ elsif !expression.case_insensitive? && context.case_insensitive_root
76
+ warn_of_unsupported_feature('nested case-sensitive set')
77
+ end
78
+ if context.es_2015_or_higher?
79
+ context.enable_u_option if content.astral_part?
80
+ content.to_s(format: 'es6', in_brackets: true)
81
+ else
82
+ content.to_s_with_surrogate_ranges
83
+ end
68
84
  end
69
85
  end
70
86
  end
@@ -13,7 +13,7 @@ class JsRegex
13
13
  ES2018_XGRAPHEME_EXPANSION = '[\P{M}\P{Lm}](?:(?:[\u035C\u0361]\P{M}\p{M}*)|\u200d|\p{M}|\p{Lm}|\p{Emoji_Modifier})*'
14
14
  LINEBREAK_EXPANSION = '(?:\r\n|[\n\v\f\r\u0085\u2028\u2029])'
15
15
 
16
- def self.directly_compatible?(expression)
16
+ def self.directly_compatible?(expression, _context = nil)
17
17
  case expression.token
18
18
  when :space, :nonspace
19
19
  !expression.ascii_classes?
@@ -1,3 +1,3 @@
1
1
  class JsRegex
2
- VERSION = '3.9.0'
2
+ VERSION = '3.10.0'
3
3
  end
data/lib/js_regex.rb CHANGED
@@ -30,14 +30,17 @@ class JsRegex
30
30
  "/#{source.empty? ? '(?:)' : source}/#{options}"
31
31
  end
32
32
 
33
+ # @raise JsRegex::ConversionError
33
34
  def self.new!(ruby_regex, **kwargs)
34
- js_regex = new(ruby_regex, **kwargs)
35
- if js_regex.warnings.any?
36
- raise StandardError.new(
37
- "Could not fully convert the given regex #{ruby_regex.inspect}:\n" +
38
- js_regex.warnings.join("\n")
39
- ).extend(JsRegex::Error)
40
- end
41
- js_regex
35
+ new(ruby_regex, fail_fast: true, **kwargs)
42
36
  end
37
+
38
+ def self.compatible?(ruby_regex, **kwargs)
39
+ new!(ruby_regex, **kwargs)
40
+ true
41
+ rescue ConversionError
42
+ false
43
+ end
44
+
45
+ ConversionError = Class.new(StandardError).send(:include, JsRegex::Error)
43
46
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.9.0
4
+ version: 3.10.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-01-24 00:00:00.000000000 Z
11
+ date: 2023-01-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set