js_regex 3.0.0 → 3.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f8f83148f3bcfeb5262259d0893fe92db40e59f64627c4430deee7eaee194c2
4
- data.tar.gz: 9cf144827bd01a075552cf12bfd16152c417e82eab064f1fa6a65133381d95ac
3
+ metadata.gz: 7219e7e794aa7b4df64655f362d448122fd07f901686ead1706626e925919976
4
+ data.tar.gz: 97924f576ecabd32288c9a4827edf5538b21d807813afb8c7ee2cfc639136b2d
5
5
  SHA512:
6
- metadata.gz: bf9b4ff58756d2f12be785a803fda5e75aeffd556cdd49860e7474caf963957414b11e9fd1f3d35c6aee90375f3f23dc4435033ee1d3ba086534fdd7cf8d7caf
7
- data.tar.gz: b6d4e6dd07949b8fa3394e4868214d1a1977ee3fe65713c7eb000cdffc50e1d485be9af2f7fcffcc1893c883eb36cd4bd5c4c687b2aebabd62f2454820f57db5
6
+ metadata.gz: 0de5c05e55115fa6bc400551e6cd6eeaf9370b65bcf64262bebb6aaee7612ed4d0e6d67c10eeae794ae28af83839e1765f4cbc2b5ab0274e117ff8c20a4b30be
7
+ data.tar.gz: b71ef3b13a3969b65c332b1ecb42e164fa6acf7e317872eda254a485647378870749c74f04486d520d8b854ac39c5d9a1e90d6f84f898ebbda8ffd4f7bb339df
@@ -9,6 +9,8 @@ class JsRegex
9
9
  class Conversion
10
10
  require 'regexp_parser'
11
11
  require_relative 'converter'
12
+ require_relative 'node'
13
+ require_relative 'second_pass'
12
14
 
13
15
  class << self
14
16
  def of(ruby_regex, options: nil)
@@ -20,11 +22,11 @@ class JsRegex
20
22
  private
21
23
 
22
24
  def convert_source(ruby_regex)
23
- context = Converter::Context.new(ruby_regex)
24
- [
25
- Converter::RootConverter.new.convert(context.ast, context),
26
- context.warnings
27
- ]
25
+ tree = Regexp::Parser.parse(ruby_regex)
26
+ context = Converter::Context.new(case_insensitive_root: tree.i?)
27
+ converted_tree = Converter.convert(tree, context)
28
+ final_tree = SecondPass.call(converted_tree)
29
+ [final_tree.to_s, context.warnings]
28
30
  end
29
31
 
30
32
  def convert_options(ruby_regex, custom_options)
@@ -15,60 +15,54 @@ class JsRegex
15
15
  when :name_ref then convert_name_ref
16
16
  when :number, :number_ref then convert_number_ref
17
17
  when :number_rel_ref then convert_number_rel_ref
18
- when :name_call then convert_name_call
19
- when :number_call then convert_number_call
20
- when :number_rel_call then convert_number_rel_call
18
+ when :name_call then mark_name_call
19
+ when :number_call then mark_number_call
20
+ when :number_rel_call then mark_number_rel_call
21
21
  else # name_recursion_ref, number_recursion_ref, ...
22
22
  warn_of_unsupported_feature
23
23
  end
24
24
  end
25
25
 
26
26
  def convert_name_ref
27
- "\\#{context.named_group_positions.fetch(expression.name)}"
27
+ convert_ref(context.named_group_positions.fetch(expression.name))
28
28
  end
29
29
 
30
30
  def convert_number_ref
31
- "\\#{context.new_capturing_group_position(expression.number)}"
31
+ convert_ref(context.new_capturing_group_position(expression.number))
32
32
  end
33
33
 
34
34
  def convert_number_rel_ref
35
- "\\#{context.new_capturing_group_position(absolute_position)}"
35
+ convert_ref(context.new_capturing_group_position(absolute_position))
36
+ end
37
+
38
+ def convert_ref(position)
39
+ Node.new('\\', Node.new(position.to_s, type: :backref_num))
36
40
  end
37
41
 
38
42
  def absolute_position
39
43
  expression.number + context.original_capturing_group_count + 1
40
44
  end
41
45
 
42
- def convert_name_call
43
- replace_with_group do |group|
44
- group.token == :named && group.name == expression.name
45
- end
46
+ def mark_name_call
47
+ mark_call(expression.name)
46
48
  end
47
49
 
48
- def convert_number_call
49
- if expression.number == 0
50
+ def mark_number_call
51
+ if expression.number.equal?(0)
50
52
  return warn_of_unsupported_feature('whole-pattern recursion')
51
53
  end
52
- replace_with_group do |group|
53
- [:capture, :options].include?(group.token) &&
54
- group.number.equal?(expression.number)
55
- end
54
+ mark_call(expression.number)
56
55
  end
57
56
 
58
- def convert_number_rel_call
59
- replace_with_group do |group|
60
- [:capture, :options].include?(group.token) &&
61
- group.number.equal?(absolute_position)
62
- end
57
+ def mark_number_rel_call
58
+ is_forward_referring = data.include?('+') # e.g. \g<+2>
59
+ mark_call(absolute_position - (is_forward_referring ? 1 : 0))
63
60
  end
64
61
 
65
- def replace_with_group
66
- context.ast.each_expression do |subexp|
67
- if subexp.type == :group && yield(subexp)
68
- return Converter.for(subexp).convert(subexp, context)
69
- end
70
- end
71
- ''
62
+ def mark_call(reference)
63
+ # increment group count as calls will be substituted with groups
64
+ context.increment_local_capturing_group_count
65
+ Node.new(reference: reference, type: :subexp_call)
72
66
  end
73
67
  end
74
68
  end
@@ -3,15 +3,18 @@
3
3
  class JsRegex
4
4
  module Converter
5
5
  #
6
- # Template class. Implement #convert_data in subclasses.
6
+ # Template class. Implement #convert_data in subclasses and return
7
+ # instance of String or Node from it.
7
8
  #
8
9
  class Base
10
+ # returns instance of Node with #quantifier attached.
9
11
  def convert(expression, context)
10
12
  self.context = context
11
13
  self.expression = expression
12
14
 
13
- source = convert_data
14
- apply_quantifier(source)
15
+ node = convert_data
16
+ node = Node.new(node) if node.instance_of?(String)
17
+ apply_quantifier(node)
15
18
  end
16
19
 
17
20
  private
@@ -27,41 +30,49 @@ class JsRegex
27
30
  end
28
31
  alias pass_through data
29
32
 
30
- def apply_quantifier(source)
31
- return source if source.empty? || !(quantifier = expression.quantifier)
33
+ def apply_quantifier(node)
34
+ return node if node.dropped? || (qtf = expression.quantifier).nil?
32
35
 
33
- if quantifier.mode.equal?(:possessive)
34
- context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
36
+ if qtf.possessive?
37
+ node.update(quantifier: qtf.text[0..-2])
38
+ return wrap_in_backrefed_lookahead([node])
35
39
  else
36
- source + quantifier
40
+ node.update(quantifier: qtf)
37
41
  end
38
- end
39
42
 
40
- def convert_subexpressions
41
- convert_expressions(subexpressions)
43
+ node
42
44
  end
43
45
 
44
- def convert_expressions(expressions)
45
- expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
46
+ def convert_subexpressions
47
+ Node.new(*expression.expressions.map { |exp| convert_expression(exp) })
46
48
  end
47
49
 
48
- def subexpressions
49
- expression.expressions
50
+ def convert_expression(expression)
51
+ Converter.convert(expression, context)
50
52
  end
51
53
 
52
54
  def warn_of_unsupported_feature(description = nil)
53
55
  description ||= "#{subtype} #{expression.type}".tr('_', ' ')
54
56
  full_desc = "#{description} '#{expression}'"
55
57
  warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
- ''
58
+ drop
57
59
  end
58
60
 
59
61
  def warn(text)
60
62
  context.warnings << text
61
63
  end
62
64
 
63
- def drop_without_warning
64
- ''
65
+ def drop
66
+ Node.new(type: :dropped)
67
+ end
68
+ alias drop_without_warning drop
69
+
70
+ def wrap_in_backrefed_lookahead(content)
71
+ backref_num = context.capturing_group_count + 1
72
+ backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
73
+ context.increment_local_capturing_group_count
74
+ # an empty passive group (?:) is appended as literal digits may follow
75
+ Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
65
76
  end
66
77
  end
67
78
  end
@@ -11,12 +11,20 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- warn_of_unsupported_feature('conditional')
15
- branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
16
- converted_branch = convert_expressions(branch)
17
- arr << converted_branch unless converted_branch.eql?('')
14
+ case subtype
15
+ when :open then mark_conditional
16
+ when :condition then drop_without_warning
17
+ else warn_of_unsupported_feature
18
18
  end
19
- "(?:#{branches.join('|')})"
19
+ end
20
+
21
+ def mark_conditional
22
+ reference = expression.reference
23
+ node = Node.new('(?:', reference: reference, type: :conditional)
24
+ expression.branches.each do |branch|
25
+ node << Node.new('(?:', convert_expression(branch), ')')
26
+ end
27
+ node << ')'
20
28
  end
21
29
  end
22
30
  end
@@ -8,20 +8,19 @@ class JsRegex
8
8
  # The Converters themselves are stateless.
9
9
  #
10
10
  class Context
11
- attr_reader :ast,
11
+ attr_reader :capturing_group_count,
12
12
  :case_insensitive_root,
13
13
  :in_atomic_group,
14
14
  :named_group_positions,
15
15
  :warnings
16
16
 
17
- def initialize(ruby_regex)
17
+ def initialize(case_insensitive_root: false)
18
18
  self.added_capturing_groups_after_group = Hash.new(0)
19
19
  self.capturing_group_count = 0
20
20
  self.named_group_positions = {}
21
21
  self.warnings = []
22
22
 
23
- self.ast = Regexp::Parser.parse(ruby_regex)
24
- self.case_insensitive_root = ast.case_insensitive?
23
+ self.case_insensitive_root = case_insensitive_root
25
24
  end
26
25
 
27
26
  # group context
@@ -38,13 +37,9 @@ class JsRegex
38
37
  self.in_atomic_group = false
39
38
  end
40
39
 
41
- def wrap_in_backrefed_lookahead(content)
42
- new_backref_num = capturing_group_count + 1
43
- # an empty passive group (?:) is appended as literal digits may follow
44
- result = "(?=(#{content}))\\#{new_backref_num}(?:)"
40
+ def increment_local_capturing_group_count
45
41
  added_capturing_groups_after_group[original_capturing_group_count] += 1
46
42
  capture_group
47
- result
48
43
  end
49
44
 
50
45
  # takes and returns 1-indexed group positions.
@@ -67,10 +62,9 @@ class JsRegex
67
62
 
68
63
  private
69
64
 
70
- attr_accessor :added_capturing_groups_after_group,
71
- :capturing_group_count
65
+ attr_accessor :added_capturing_groups_after_group
72
66
 
73
- attr_writer :ast,
67
+ attr_writer :capturing_group_count,
74
68
  :case_insensitive_root,
75
69
  :in_atomic_group,
76
70
  :named_group_positions,
@@ -55,9 +55,9 @@ class JsRegex
55
55
  end
56
56
 
57
57
  def convert_codepoint_list
58
- expression.chars.map do |char|
59
- LiteralConverter.convert_data(Regexp.escape(char))
60
- end.join
58
+ expression.chars.each_with_object(Node.new) do |char, node|
59
+ node << LiteralConverter.convert_data(Regexp.escape(char))
60
+ end
61
61
  end
62
62
 
63
63
  def unicode_escape_codepoint
@@ -28,7 +28,7 @@ class JsRegex
28
28
  build_unsupported_group('nested atomic group')
29
29
  else
30
30
  context.start_atomic_group
31
- result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
31
+ result = wrap_in_backrefed_lookahead(convert_subexpressions)
32
32
  context.end_atomic_group
33
33
  result
34
34
  end
@@ -37,15 +37,19 @@ class JsRegex
37
37
  def build_named_group
38
38
  # remember position, then drop name part without warning
39
39
  context.store_named_group_position(expression.name)
40
- build_group(head: '(')
40
+ build_group(head: '(', reference: expression.name)
41
41
  end
42
42
 
43
43
  def build_options_group
44
44
  unless (encoding_options = data.scan(/[adu]/)).empty?
45
45
  warn_of_unsupported_feature("encoding options #{encoding_options}")
46
46
  end
47
- switch_only = subtype.equal?(:options_switch)
48
- switch_only ? drop_without_warning : build_group(head: '(')
47
+ if subtype.equal?(:options_switch)
48
+ # can be ignored since #options on subsequent Expressions are correct
49
+ drop_without_warning
50
+ else
51
+ build_passive_group
52
+ end
49
53
  end
50
54
 
51
55
  def build_passive_group
@@ -58,9 +62,19 @@ class JsRegex
58
62
  end
59
63
 
60
64
  def build_group(opts = {})
61
- context.capture_group unless opts[:capturing].equal?(false)
62
65
  head = opts[:head] || pass_through
63
- "#{head}#{convert_subexpressions})"
66
+ if opts[:capturing].equal?(false)
67
+ return Node.new(*group_with_head(head))
68
+ end
69
+
70
+ context.capture_group
71
+
72
+ ref = opts[:reference] || expression.number
73
+ Node.new(*group_with_head(head), reference: ref, type: :captured_group)
74
+ end
75
+
76
+ def group_with_head(head)
77
+ [head, *convert_subexpressions, ')']
64
78
  end
65
79
  end
66
80
  end
@@ -9,18 +9,28 @@ class JsRegex
9
9
  #
10
10
  class LiteralConverter < JsRegex::Converter::Base
11
11
  class << self
12
- ASTRAL_PLANE_CODEPOINT_PATTERN = /\A[\u{10000}-\u{FFFFF}]\z/
12
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
13
13
 
14
14
  def convert_data(data)
15
15
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
16
- surrogate_pair_for(data)
16
+ data.each_char.each_with_object(Node.new) do |chr, node|
17
+ if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
18
+ node << surrogate_pair_for(chr)
19
+ else
20
+ node << convert_bmp_data(chr)
21
+ end
22
+ end
17
23
  else
18
- ensure_json_compatibility(
19
- ensure_forward_slashes_are_escaped(data)
20
- )
24
+ convert_bmp_data(data)
21
25
  end
22
26
  end
23
27
 
28
+ def convert_bmp_data(data)
29
+ ensure_json_compatibility(
30
+ ensure_forward_slashes_are_escaped(data)
31
+ )
32
+ end
33
+
24
34
  private
25
35
 
26
36
  def surrogate_pair_for(astral_char)
@@ -56,7 +66,10 @@ class JsRegex
56
66
 
57
67
  def handle_locally_case_insensitive_literal(literal)
58
68
  return literal unless literal =~ HAS_CASE_PATTERN
59
- "[#{literal}#{literal.swapcase}]"
69
+
70
+ literal.each_char.each_with_object(Node.new) do |chr, node|
71
+ node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr)
72
+ end
60
73
  end
61
74
  end
62
75
  end
@@ -13,7 +13,7 @@ class JsRegex
13
13
  def convert_data
14
14
  case subtype
15
15
  when :alternation
16
- convert_alternation
16
+ convert_alternatives
17
17
  when :dot
18
18
  expression.multiline? ? '(?:.|\n)' : '.'
19
19
  else
@@ -21,12 +21,15 @@ class JsRegex
21
21
  end
22
22
  end
23
23
 
24
- def convert_alternation
25
- branches = subexpressions.each_with_object([]) do |branch, arr|
26
- converted_branch = convert_expressions(branch.expressions)
27
- arr << converted_branch unless converted_branch.eql?('')
24
+ def convert_alternatives
25
+ kept_any = false
26
+
27
+ convert_subexpressions.map do |node|
28
+ dropped = !node.children.empty? && node.children.all?(&:dropped?)
29
+ node.children.unshift('|') if kept_any.equal?(true) && !dropped
30
+ kept_any = true unless dropped
31
+ node
28
32
  end
29
- branches.join('|')
30
33
  end
31
34
  end
32
35
  end
@@ -8,20 +8,19 @@ class JsRegex
8
8
  #
9
9
  # Template class implementation.
10
10
  #
11
+ # Uses the `character_set` and `regexp_property_values` gems to get the
12
+ # codepoints matched by the property and build a set string from them.
13
+ #
11
14
  class PropertyConverter < JsRegex::Converter::Base
12
15
  private
13
16
 
14
17
  def convert_data
15
- convert_property
16
- end
17
-
18
- def convert_property(negated = nil)
19
18
  content = CharacterSet.of_property(subtype)
20
19
  if expression.case_insensitive? && !context.case_insensitive_root
21
20
  content = content.case_insensitive
22
21
  end
23
22
 
24
- if negated
23
+ if expression.negative?
25
24
  if content.astral_part.empty?
26
25
  return "[^#{content.to_s(format: :js)}]"
27
26
  else
@@ -35,10 +34,10 @@ class JsRegex
35
34
  end
36
35
 
37
36
  bmp_part = content.bmp_part
38
- return '' if bmp_part.empty?
37
+ return drop if bmp_part.empty?
39
38
 
40
39
  string = bmp_part.to_s(format: :js)
41
- negated ? "[^#{string}]" : "[#{string}]"
40
+ expression.negative? ? "[^#{string}]" : "[#{string}]"
42
41
  end
43
42
  end
44
43
  end
@@ -10,6 +10,11 @@ class JsRegex
10
10
  #
11
11
  # Template class implementation.
12
12
  #
13
+ # Unlike other converters, this one does not recurse on subexpressions,
14
+ # since many are unsupported by JavaScript. If it detects incompatible
15
+ # children, it uses the `character_set` gem to establish the codepoints
16
+ # matched by the whole set and build a completely new set string.
17
+ #
13
18
  class SetConverter < JsRegex::Converter::Base
14
19
  private
15
20
 
@@ -32,7 +37,7 @@ class JsRegex
32
37
  else
33
38
  warn_of_unsupported_feature('large astral plane match of set')
34
39
  bmp_part = content.bmp_part
35
- bmp_part.empty? ? '' : bmp_part.to_s(format: :js, in_brackets: true)
40
+ bmp_part.empty? ? drop : bmp_part.to_s(format: :js, in_brackets: true)
36
41
  end
37
42
  end
38
43
 
@@ -42,7 +47,7 @@ class JsRegex
42
47
  return false
43
48
  end
44
49
 
45
- # check for subexpressions that need conversion
50
+ # check for children needing conversion (#each_expression is recursive)
46
51
  expression.each_expression do |node|
47
52
  case node.type
48
53
  when :literal
@@ -7,7 +7,7 @@ class JsRegex
7
7
  #
8
8
  # Template class implementation.
9
9
  #
10
- class RootConverter < JsRegex::Converter::Base
10
+ class SubexpressionConverter < JsRegex::Converter::Base
11
11
  private
12
12
 
13
13
  def convert_data
@@ -19,7 +19,7 @@ class JsRegex
19
19
 
20
20
  HEX_EXPANSION = '[0-9A-Fa-f]'
21
21
  NONHEX_EXPANSION = '[^0-9A-Fa-f]'
22
- LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
22
+ LINEBREAK_EXPANSION = '(?:\r\n|\r|\n)'
23
23
 
24
24
  private
25
25
 
@@ -12,27 +12,32 @@ class JsRegex
12
12
  backref: BackreferenceConverter,
13
13
  conditional: ConditionalConverter,
14
14
  escape: EscapeConverter,
15
+ expression: SubexpressionConverter,
15
16
  free_space: FreespaceConverter,
16
17
  group: GroupConverter,
17
18
  literal: LiteralConverter,
18
19
  meta: MetaConverter,
19
- nonproperty: NonpropertyConverter,
20
+ nonproperty: PropertyConverter,
20
21
  property: PropertyConverter,
21
22
  set: SetConverter,
22
23
  type: TypeConverter
23
24
  ).freeze
24
25
 
25
- def self.for(expression)
26
- MAP[expression.type].new
27
- end
28
-
29
- # Limit the number of generated surrogate pairs, else the output might
30
- # get to large for certain applications. The chosen number is somewhat
31
- # arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
32
- # count of all properties supported by Ruby is 92. 75% are below 300 chars.
33
- #
34
- # Set this to nil if you need full unicode matches and size doesn't matter.
35
26
  class << self
27
+ def convert(exp, context = nil)
28
+ self.for(exp).convert(exp, context || Context.new)
29
+ end
30
+
31
+ def for(expression)
32
+ MAP[expression.type].new
33
+ end
34
+
35
+ # Limit the number of generated surrogate pairs, else the output might
36
+ # get to large for certain applications. The chosen number is somewhat
37
+ # arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
38
+ # count of all properties supported by Ruby is 92. 75% are below 300 chars.
39
+ #
40
+ # Set this to nil if you need full unicode matches and size doesn't matter.
36
41
  attr_accessor :surrogate_pair_limit
37
42
  end
38
43
  self.surrogate_pair_limit = 300
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ #
5
+ # Converter#convert result. Represents a branch or leaf node with an optional
6
+ # quantifier as well as type and reference annotations for SecondPass.
7
+ #
8
+ class Node
9
+ attr_reader :children, :quantifier, :reference, :type
10
+
11
+ TYPES = %i[
12
+ backref_num
13
+ captured_group
14
+ conditional
15
+ dropped
16
+ plain
17
+ subexp_call
18
+ ]
19
+
20
+ def initialize(*children, quantifier: nil, reference: nil, type: :plain)
21
+ raise ArgumentError, "bad type #{type}" unless TYPES.include?(type)
22
+ self.children = children
23
+ self.quantifier = quantifier
24
+ self.reference = reference
25
+ self.type = type
26
+ end
27
+
28
+ def initialize_copy(orig)
29
+ super
30
+ self.children = orig.children.map(&:clone)
31
+ self.quantifier = orig.quantifier && orig.quantifier.clone
32
+ end
33
+
34
+ def map(&block)
35
+ clone.tap { |node| node.children.replace(children.map(&block)) }
36
+ end
37
+
38
+ def <<(node)
39
+ children << node
40
+ self
41
+ end
42
+
43
+ def dropped?
44
+ # keep everything else, including empty or depleted capturing groups
45
+ # so as not to not mess with reference numbers (e.g. backrefs)
46
+ type.equal?(:dropped)
47
+ end
48
+
49
+ def to_s
50
+ case type
51
+ when :dropped
52
+ ''
53
+ when :backref_num, :captured_group, :plain
54
+ children.join << quantifier.to_s
55
+ else
56
+ raise TypeError, "#{type} must be substituted before stringification"
57
+ end
58
+ end
59
+
60
+ def update(attrs)
61
+ self.children = attrs[:children] if attrs.key?(:children)
62
+ self.quantifier = attrs[:quantifier] if attrs.key?(:quantifier)
63
+ self.type = attrs[:type] if attrs.key?(:type)
64
+ end
65
+
66
+ private
67
+
68
+ attr_writer :children, :reference, :quantifier, :type
69
+ end
70
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ #
5
+ # After conversion of a full Regexp::Expression tree, this class
6
+ # checks for Node instances that need further processing.
7
+ #
8
+ # E.g. subexpression calls (such as \g<1>) can be look-ahead,
9
+ # so the full Regexp must have been processed first, and only then can
10
+ # they be substituted with the conversion result of their targeted group.
11
+ #
12
+ module SecondPass
13
+ module_function
14
+
15
+ def call(tree)
16
+ substitute_subexp_calls(tree)
17
+ alternate_conditional_permutations(tree)
18
+ tree
19
+ end
20
+
21
+ def substitute_subexp_calls(tree)
22
+ crawl(tree) do |node|
23
+ if node.type == :subexp_call
24
+ called_group = find_group_by_reference(node.reference, in_node: tree)
25
+ node.update(children: called_group.children, type: :captured_group)
26
+ end
27
+ end
28
+ end
29
+
30
+ def crawl(node, &block)
31
+ return if node.instance_of?(String)
32
+ yield(node)
33
+ node.children.each { |child| crawl(child, &block) }
34
+ end
35
+
36
+ def alternate_conditional_permutations(tree)
37
+ permutations = conditional_tree_permutations(tree)
38
+ return tree if permutations.empty?
39
+
40
+ alternatives = permutations.map.with_index do |variant, i|
41
+ Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
42
+ end
43
+ tree.update(children: alternatives)
44
+ end
45
+
46
+ def find_group_by_reference(ref, in_node: nil)
47
+ crawl(in_node) do |node|
48
+ return node if node.type == :captured_group && node.reference == ref
49
+ end
50
+ Node.new('()')
51
+ end
52
+
53
+ def conditional_tree_permutations(tree)
54
+ all_conditions = conditions(tree)
55
+ return [] if all_conditions.empty?
56
+
57
+ captured_groups_per_branch = captured_group_count(tree)
58
+
59
+ condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
60
+ tree_permutation = tree.clone
61
+ # find referenced groups and conditionals and make one-sided
62
+ crawl(tree_permutation) do |node|
63
+ truthy = truthy_conds.include?(node.reference)
64
+
65
+ if node.type.equal?(:captured_group) &&
66
+ all_conditions.include?(node.reference)
67
+ truthy ? min_quantify(node) : null_quantify(node)
68
+ elsif node.type.equal?(:conditional)
69
+ branches = node.children[1...-1]
70
+ if branches.count == 1
71
+ truthy || null_quantify(branches.first)
72
+ else
73
+ null_quantify(truthy ? branches.last : branches.first)
74
+ end
75
+ node.update(type: :plain)
76
+ elsif node.type.equal?(:backref_num)
77
+ new_num = node.children[0].to_i + captured_groups_per_branch * i
78
+ node.update(children: [new_num.to_s])
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ def conditions(tree)
85
+ conditions = []
86
+ crawl(tree) do |node|
87
+ conditions << node.reference if node.type.equal?(:conditional)
88
+ end
89
+ conditions
90
+ end
91
+
92
+ def captured_group_count(tree)
93
+ count = 0
94
+ crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group)}
95
+ count
96
+ end
97
+
98
+ def condition_permutations(conditions)
99
+ return [] if conditions.empty?
100
+
101
+ condition_permutations = (0..(conditions.length)).inject([]) do |arr, n|
102
+ arr += conditions.combination(n).to_a
103
+ end
104
+ end
105
+
106
+ def min_quantify(node)
107
+ return if (qtf = node.quantifier).nil? || qtf.min > 0
108
+
109
+ if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
110
+ node.update(quantifier: nil)
111
+ else
112
+ node.update(quantifier: "{1,#{qtf.max}}#{'?' if qtf.reluctant?}")
113
+ end
114
+ end
115
+
116
+ def null_quantify(node)
117
+ node.update(quantifier: '{0}')
118
+ end
119
+ end
120
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '3.0.0'
4
+ VERSION = '3.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-04 00:00:00.000000000 Z
11
+ date: 2018-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.0'
33
+ version: '1.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.0'
40
+ version: '1.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: regexp_property_values
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -144,12 +144,13 @@ files:
144
144
  - lib/js_regex/converter/group_converter.rb
145
145
  - lib/js_regex/converter/literal_converter.rb
146
146
  - lib/js_regex/converter/meta_converter.rb
147
- - lib/js_regex/converter/nonproperty_converter.rb
148
147
  - lib/js_regex/converter/property_converter.rb
149
- - lib/js_regex/converter/root_converter.rb
150
148
  - lib/js_regex/converter/set_converter.rb
149
+ - lib/js_regex/converter/subexpression_converter.rb
151
150
  - lib/js_regex/converter/type_converter.rb
152
151
  - lib/js_regex/converter/unsupported_token_converter.rb
152
+ - lib/js_regex/node.rb
153
+ - lib/js_regex/second_pass.rb
153
154
  - lib/js_regex/version.rb
154
155
  homepage: https://github.com/janosch-x/js_regex
155
156
  licenses:
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
- require_relative 'property_converter'
5
-
6
- class JsRegex
7
- module Converter
8
- #
9
- # Template class implementation.
10
- #
11
- # Note the inheritance from PropertyConverter.
12
- #
13
- class NonpropertyConverter < JsRegex::Converter::PropertyConverter
14
- private
15
-
16
- def convert_data
17
- convert_property(true)
18
- end
19
- end
20
- end
21
- end