js_regex 3.0.0 → 3.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4f8f83148f3bcfeb5262259d0893fe92db40e59f64627c4430deee7eaee194c2
4
- data.tar.gz: 9cf144827bd01a075552cf12bfd16152c417e82eab064f1fa6a65133381d95ac
3
+ metadata.gz: 7219e7e794aa7b4df64655f362d448122fd07f901686ead1706626e925919976
4
+ data.tar.gz: 97924f576ecabd32288c9a4827edf5538b21d807813afb8c7ee2cfc639136b2d
5
5
  SHA512:
6
- metadata.gz: bf9b4ff58756d2f12be785a803fda5e75aeffd556cdd49860e7474caf963957414b11e9fd1f3d35c6aee90375f3f23dc4435033ee1d3ba086534fdd7cf8d7caf
7
- data.tar.gz: b6d4e6dd07949b8fa3394e4868214d1a1977ee3fe65713c7eb000cdffc50e1d485be9af2f7fcffcc1893c883eb36cd4bd5c4c687b2aebabd62f2454820f57db5
6
+ metadata.gz: 0de5c05e55115fa6bc400551e6cd6eeaf9370b65bcf64262bebb6aaee7612ed4d0e6d67c10eeae794ae28af83839e1765f4cbc2b5ab0274e117ff8c20a4b30be
7
+ data.tar.gz: b71ef3b13a3969b65c332b1ecb42e164fa6acf7e317872eda254a485647378870749c74f04486d520d8b854ac39c5d9a1e90d6f84f898ebbda8ffd4f7bb339df
@@ -9,6 +9,8 @@ class JsRegex
9
9
  class Conversion
10
10
  require 'regexp_parser'
11
11
  require_relative 'converter'
12
+ require_relative 'node'
13
+ require_relative 'second_pass'
12
14
 
13
15
  class << self
14
16
  def of(ruby_regex, options: nil)
@@ -20,11 +22,11 @@ class JsRegex
20
22
  private
21
23
 
22
24
  def convert_source(ruby_regex)
23
- context = Converter::Context.new(ruby_regex)
24
- [
25
- Converter::RootConverter.new.convert(context.ast, context),
26
- context.warnings
27
- ]
25
+ tree = Regexp::Parser.parse(ruby_regex)
26
+ context = Converter::Context.new(case_insensitive_root: tree.i?)
27
+ converted_tree = Converter.convert(tree, context)
28
+ final_tree = SecondPass.call(converted_tree)
29
+ [final_tree.to_s, context.warnings]
28
30
  end
29
31
 
30
32
  def convert_options(ruby_regex, custom_options)
@@ -15,60 +15,54 @@ class JsRegex
15
15
  when :name_ref then convert_name_ref
16
16
  when :number, :number_ref then convert_number_ref
17
17
  when :number_rel_ref then convert_number_rel_ref
18
- when :name_call then convert_name_call
19
- when :number_call then convert_number_call
20
- when :number_rel_call then convert_number_rel_call
18
+ when :name_call then mark_name_call
19
+ when :number_call then mark_number_call
20
+ when :number_rel_call then mark_number_rel_call
21
21
  else # name_recursion_ref, number_recursion_ref, ...
22
22
  warn_of_unsupported_feature
23
23
  end
24
24
  end
25
25
 
26
26
  def convert_name_ref
27
- "\\#{context.named_group_positions.fetch(expression.name)}"
27
+ convert_ref(context.named_group_positions.fetch(expression.name))
28
28
  end
29
29
 
30
30
  def convert_number_ref
31
- "\\#{context.new_capturing_group_position(expression.number)}"
31
+ convert_ref(context.new_capturing_group_position(expression.number))
32
32
  end
33
33
 
34
34
  def convert_number_rel_ref
35
- "\\#{context.new_capturing_group_position(absolute_position)}"
35
+ convert_ref(context.new_capturing_group_position(absolute_position))
36
+ end
37
+
38
+ def convert_ref(position)
39
+ Node.new('\\', Node.new(position.to_s, type: :backref_num))
36
40
  end
37
41
 
38
42
  def absolute_position
39
43
  expression.number + context.original_capturing_group_count + 1
40
44
  end
41
45
 
42
- def convert_name_call
43
- replace_with_group do |group|
44
- group.token == :named && group.name == expression.name
45
- end
46
+ def mark_name_call
47
+ mark_call(expression.name)
46
48
  end
47
49
 
48
- def convert_number_call
49
- if expression.number == 0
50
+ def mark_number_call
51
+ if expression.number.equal?(0)
50
52
  return warn_of_unsupported_feature('whole-pattern recursion')
51
53
  end
52
- replace_with_group do |group|
53
- [:capture, :options].include?(group.token) &&
54
- group.number.equal?(expression.number)
55
- end
54
+ mark_call(expression.number)
56
55
  end
57
56
 
58
- def convert_number_rel_call
59
- replace_with_group do |group|
60
- [:capture, :options].include?(group.token) &&
61
- group.number.equal?(absolute_position)
62
- end
57
+ def mark_number_rel_call
58
+ is_forward_referring = data.include?('+') # e.g. \g<+2>
59
+ mark_call(absolute_position - (is_forward_referring ? 1 : 0))
63
60
  end
64
61
 
65
- def replace_with_group
66
- context.ast.each_expression do |subexp|
67
- if subexp.type == :group && yield(subexp)
68
- return Converter.for(subexp).convert(subexp, context)
69
- end
70
- end
71
- ''
62
+ def mark_call(reference)
63
+ # increment group count as calls will be substituted with groups
64
+ context.increment_local_capturing_group_count
65
+ Node.new(reference: reference, type: :subexp_call)
72
66
  end
73
67
  end
74
68
  end
@@ -3,15 +3,18 @@
3
3
  class JsRegex
4
4
  module Converter
5
5
  #
6
- # Template class. Implement #convert_data in subclasses.
6
+ # Template class. Implement #convert_data in subclasses and return
7
+ # instance of String or Node from it.
7
8
  #
8
9
  class Base
10
+ # returns instance of Node with #quantifier attached.
9
11
  def convert(expression, context)
10
12
  self.context = context
11
13
  self.expression = expression
12
14
 
13
- source = convert_data
14
- apply_quantifier(source)
15
+ node = convert_data
16
+ node = Node.new(node) if node.instance_of?(String)
17
+ apply_quantifier(node)
15
18
  end
16
19
 
17
20
  private
@@ -27,41 +30,49 @@ class JsRegex
27
30
  end
28
31
  alias pass_through data
29
32
 
30
- def apply_quantifier(source)
31
- return source if source.empty? || !(quantifier = expression.quantifier)
33
+ def apply_quantifier(node)
34
+ return node if node.dropped? || (qtf = expression.quantifier).nil?
32
35
 
33
- if quantifier.mode.equal?(:possessive)
34
- context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
36
+ if qtf.possessive?
37
+ node.update(quantifier: qtf.text[0..-2])
38
+ return wrap_in_backrefed_lookahead([node])
35
39
  else
36
- source + quantifier
40
+ node.update(quantifier: qtf)
37
41
  end
38
- end
39
42
 
40
- def convert_subexpressions
41
- convert_expressions(subexpressions)
43
+ node
42
44
  end
43
45
 
44
- def convert_expressions(expressions)
45
- expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
46
+ def convert_subexpressions
47
+ Node.new(*expression.expressions.map { |exp| convert_expression(exp) })
46
48
  end
47
49
 
48
- def subexpressions
49
- expression.expressions
50
+ def convert_expression(expression)
51
+ Converter.convert(expression, context)
50
52
  end
51
53
 
52
54
  def warn_of_unsupported_feature(description = nil)
53
55
  description ||= "#{subtype} #{expression.type}".tr('_', ' ')
54
56
  full_desc = "#{description} '#{expression}'"
55
57
  warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
- ''
58
+ drop
57
59
  end
58
60
 
59
61
  def warn(text)
60
62
  context.warnings << text
61
63
  end
62
64
 
63
- def drop_without_warning
64
- ''
65
+ def drop
66
+ Node.new(type: :dropped)
67
+ end
68
+ alias drop_without_warning drop
69
+
70
+ def wrap_in_backrefed_lookahead(content)
71
+ backref_num = context.capturing_group_count + 1
72
+ backref_num_node = Node.new(backref_num.to_s, type: :backref_num)
73
+ context.increment_local_capturing_group_count
74
+ # an empty passive group (?:) is appended as literal digits may follow
75
+ Node.new('(?=(', *content, '))\\', backref_num_node, '(?:)')
65
76
  end
66
77
  end
67
78
  end
@@ -11,12 +11,20 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- warn_of_unsupported_feature('conditional')
15
- branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
16
- converted_branch = convert_expressions(branch)
17
- arr << converted_branch unless converted_branch.eql?('')
14
+ case subtype
15
+ when :open then mark_conditional
16
+ when :condition then drop_without_warning
17
+ else warn_of_unsupported_feature
18
18
  end
19
- "(?:#{branches.join('|')})"
19
+ end
20
+
21
+ def mark_conditional
22
+ reference = expression.reference
23
+ node = Node.new('(?:', reference: reference, type: :conditional)
24
+ expression.branches.each do |branch|
25
+ node << Node.new('(?:', convert_expression(branch), ')')
26
+ end
27
+ node << ')'
20
28
  end
21
29
  end
22
30
  end
@@ -8,20 +8,19 @@ class JsRegex
8
8
  # The Converters themselves are stateless.
9
9
  #
10
10
  class Context
11
- attr_reader :ast,
11
+ attr_reader :capturing_group_count,
12
12
  :case_insensitive_root,
13
13
  :in_atomic_group,
14
14
  :named_group_positions,
15
15
  :warnings
16
16
 
17
- def initialize(ruby_regex)
17
+ def initialize(case_insensitive_root: false)
18
18
  self.added_capturing_groups_after_group = Hash.new(0)
19
19
  self.capturing_group_count = 0
20
20
  self.named_group_positions = {}
21
21
  self.warnings = []
22
22
 
23
- self.ast = Regexp::Parser.parse(ruby_regex)
24
- self.case_insensitive_root = ast.case_insensitive?
23
+ self.case_insensitive_root = case_insensitive_root
25
24
  end
26
25
 
27
26
  # group context
@@ -38,13 +37,9 @@ class JsRegex
38
37
  self.in_atomic_group = false
39
38
  end
40
39
 
41
- def wrap_in_backrefed_lookahead(content)
42
- new_backref_num = capturing_group_count + 1
43
- # an empty passive group (?:) is appended as literal digits may follow
44
- result = "(?=(#{content}))\\#{new_backref_num}(?:)"
40
+ def increment_local_capturing_group_count
45
41
  added_capturing_groups_after_group[original_capturing_group_count] += 1
46
42
  capture_group
47
- result
48
43
  end
49
44
 
50
45
  # takes and returns 1-indexed group positions.
@@ -67,10 +62,9 @@ class JsRegex
67
62
 
68
63
  private
69
64
 
70
- attr_accessor :added_capturing_groups_after_group,
71
- :capturing_group_count
65
+ attr_accessor :added_capturing_groups_after_group
72
66
 
73
- attr_writer :ast,
67
+ attr_writer :capturing_group_count,
74
68
  :case_insensitive_root,
75
69
  :in_atomic_group,
76
70
  :named_group_positions,
@@ -55,9 +55,9 @@ class JsRegex
55
55
  end
56
56
 
57
57
  def convert_codepoint_list
58
- expression.chars.map do |char|
59
- LiteralConverter.convert_data(Regexp.escape(char))
60
- end.join
58
+ expression.chars.each_with_object(Node.new) do |char, node|
59
+ node << LiteralConverter.convert_data(Regexp.escape(char))
60
+ end
61
61
  end
62
62
 
63
63
  def unicode_escape_codepoint
@@ -28,7 +28,7 @@ class JsRegex
28
28
  build_unsupported_group('nested atomic group')
29
29
  else
30
30
  context.start_atomic_group
31
- result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
31
+ result = wrap_in_backrefed_lookahead(convert_subexpressions)
32
32
  context.end_atomic_group
33
33
  result
34
34
  end
@@ -37,15 +37,19 @@ class JsRegex
37
37
  def build_named_group
38
38
  # remember position, then drop name part without warning
39
39
  context.store_named_group_position(expression.name)
40
- build_group(head: '(')
40
+ build_group(head: '(', reference: expression.name)
41
41
  end
42
42
 
43
43
  def build_options_group
44
44
  unless (encoding_options = data.scan(/[adu]/)).empty?
45
45
  warn_of_unsupported_feature("encoding options #{encoding_options}")
46
46
  end
47
- switch_only = subtype.equal?(:options_switch)
48
- switch_only ? drop_without_warning : build_group(head: '(')
47
+ if subtype.equal?(:options_switch)
48
+ # can be ignored since #options on subsequent Expressions are correct
49
+ drop_without_warning
50
+ else
51
+ build_passive_group
52
+ end
49
53
  end
50
54
 
51
55
  def build_passive_group
@@ -58,9 +62,19 @@ class JsRegex
58
62
  end
59
63
 
60
64
  def build_group(opts = {})
61
- context.capture_group unless opts[:capturing].equal?(false)
62
65
  head = opts[:head] || pass_through
63
- "#{head}#{convert_subexpressions})"
66
+ if opts[:capturing].equal?(false)
67
+ return Node.new(*group_with_head(head))
68
+ end
69
+
70
+ context.capture_group
71
+
72
+ ref = opts[:reference] || expression.number
73
+ Node.new(*group_with_head(head), reference: ref, type: :captured_group)
74
+ end
75
+
76
+ def group_with_head(head)
77
+ [head, *convert_subexpressions, ')']
64
78
  end
65
79
  end
66
80
  end
@@ -9,18 +9,28 @@ class JsRegex
9
9
  #
10
10
  class LiteralConverter < JsRegex::Converter::Base
11
11
  class << self
12
- ASTRAL_PLANE_CODEPOINT_PATTERN = /\A[\u{10000}-\u{FFFFF}]\z/
12
+ ASTRAL_PLANE_CODEPOINT_PATTERN = /[\u{10000}-\u{FFFFF}]/
13
13
 
14
14
  def convert_data(data)
15
15
  if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
16
- surrogate_pair_for(data)
16
+ data.each_char.each_with_object(Node.new) do |chr, node|
17
+ if chr =~ ASTRAL_PLANE_CODEPOINT_PATTERN
18
+ node << surrogate_pair_for(chr)
19
+ else
20
+ node << convert_bmp_data(chr)
21
+ end
22
+ end
17
23
  else
18
- ensure_json_compatibility(
19
- ensure_forward_slashes_are_escaped(data)
20
- )
24
+ convert_bmp_data(data)
21
25
  end
22
26
  end
23
27
 
28
+ def convert_bmp_data(data)
29
+ ensure_json_compatibility(
30
+ ensure_forward_slashes_are_escaped(data)
31
+ )
32
+ end
33
+
24
34
  private
25
35
 
26
36
  def surrogate_pair_for(astral_char)
@@ -56,7 +66,10 @@ class JsRegex
56
66
 
57
67
  def handle_locally_case_insensitive_literal(literal)
58
68
  return literal unless literal =~ HAS_CASE_PATTERN
59
- "[#{literal}#{literal.swapcase}]"
69
+
70
+ literal.each_char.each_with_object(Node.new) do |chr, node|
71
+ node << (chr =~ HAS_CASE_PATTERN ? "[#{chr}#{chr.swapcase}]" : chr)
72
+ end
60
73
  end
61
74
  end
62
75
  end
@@ -13,7 +13,7 @@ class JsRegex
13
13
  def convert_data
14
14
  case subtype
15
15
  when :alternation
16
- convert_alternation
16
+ convert_alternatives
17
17
  when :dot
18
18
  expression.multiline? ? '(?:.|\n)' : '.'
19
19
  else
@@ -21,12 +21,15 @@ class JsRegex
21
21
  end
22
22
  end
23
23
 
24
- def convert_alternation
25
- branches = subexpressions.each_with_object([]) do |branch, arr|
26
- converted_branch = convert_expressions(branch.expressions)
27
- arr << converted_branch unless converted_branch.eql?('')
24
+ def convert_alternatives
25
+ kept_any = false
26
+
27
+ convert_subexpressions.map do |node|
28
+ dropped = !node.children.empty? && node.children.all?(&:dropped?)
29
+ node.children.unshift('|') if kept_any.equal?(true) && !dropped
30
+ kept_any = true unless dropped
31
+ node
28
32
  end
29
- branches.join('|')
30
33
  end
31
34
  end
32
35
  end
@@ -8,20 +8,19 @@ class JsRegex
8
8
  #
9
9
  # Template class implementation.
10
10
  #
11
+ # Uses the `character_set` and `regexp_property_values` gems to get the
12
+ # codepoints matched by the property and build a set string from them.
13
+ #
11
14
  class PropertyConverter < JsRegex::Converter::Base
12
15
  private
13
16
 
14
17
  def convert_data
15
- convert_property
16
- end
17
-
18
- def convert_property(negated = nil)
19
18
  content = CharacterSet.of_property(subtype)
20
19
  if expression.case_insensitive? && !context.case_insensitive_root
21
20
  content = content.case_insensitive
22
21
  end
23
22
 
24
- if negated
23
+ if expression.negative?
25
24
  if content.astral_part.empty?
26
25
  return "[^#{content.to_s(format: :js)}]"
27
26
  else
@@ -35,10 +34,10 @@ class JsRegex
35
34
  end
36
35
 
37
36
  bmp_part = content.bmp_part
38
- return '' if bmp_part.empty?
37
+ return drop if bmp_part.empty?
39
38
 
40
39
  string = bmp_part.to_s(format: :js)
41
- negated ? "[^#{string}]" : "[#{string}]"
40
+ expression.negative? ? "[^#{string}]" : "[#{string}]"
42
41
  end
43
42
  end
44
43
  end
@@ -10,6 +10,11 @@ class JsRegex
10
10
  #
11
11
  # Template class implementation.
12
12
  #
13
+ # Unlike other converters, this one does not recurse on subexpressions,
14
+ # since many are unsupported by JavaScript. If it detects incompatible
15
+ # children, it uses the `character_set` gem to establish the codepoints
16
+ # matched by the whole set and build a completely new set string.
17
+ #
13
18
  class SetConverter < JsRegex::Converter::Base
14
19
  private
15
20
 
@@ -32,7 +37,7 @@ class JsRegex
32
37
  else
33
38
  warn_of_unsupported_feature('large astral plane match of set')
34
39
  bmp_part = content.bmp_part
35
- bmp_part.empty? ? '' : bmp_part.to_s(format: :js, in_brackets: true)
40
+ bmp_part.empty? ? drop : bmp_part.to_s(format: :js, in_brackets: true)
36
41
  end
37
42
  end
38
43
 
@@ -42,7 +47,7 @@ class JsRegex
42
47
  return false
43
48
  end
44
49
 
45
- # check for subexpressions that need conversion
50
+ # check for children needing conversion (#each_expression is recursive)
46
51
  expression.each_expression do |node|
47
52
  case node.type
48
53
  when :literal
@@ -7,7 +7,7 @@ class JsRegex
7
7
  #
8
8
  # Template class implementation.
9
9
  #
10
- class RootConverter < JsRegex::Converter::Base
10
+ class SubexpressionConverter < JsRegex::Converter::Base
11
11
  private
12
12
 
13
13
  def convert_data
@@ -19,7 +19,7 @@ class JsRegex
19
19
 
20
20
  HEX_EXPANSION = '[0-9A-Fa-f]'
21
21
  NONHEX_EXPANSION = '[^0-9A-Fa-f]'
22
- LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
22
+ LINEBREAK_EXPANSION = '(?:\r\n|\r|\n)'
23
23
 
24
24
  private
25
25
 
@@ -12,27 +12,32 @@ class JsRegex
12
12
  backref: BackreferenceConverter,
13
13
  conditional: ConditionalConverter,
14
14
  escape: EscapeConverter,
15
+ expression: SubexpressionConverter,
15
16
  free_space: FreespaceConverter,
16
17
  group: GroupConverter,
17
18
  literal: LiteralConverter,
18
19
  meta: MetaConverter,
19
- nonproperty: NonpropertyConverter,
20
+ nonproperty: PropertyConverter,
20
21
  property: PropertyConverter,
21
22
  set: SetConverter,
22
23
  type: TypeConverter
23
24
  ).freeze
24
25
 
25
- def self.for(expression)
26
- MAP[expression.type].new
27
- end
28
-
29
- # Limit the number of generated surrogate pairs, else the output might
30
- # get to large for certain applications. The chosen number is somewhat
31
- # arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
32
- # count of all properties supported by Ruby is 92. 75% are below 300 chars.
33
- #
34
- # Set this to nil if you need full unicode matches and size doesn't matter.
35
26
  class << self
27
+ def convert(exp, context = nil)
28
+ self.for(exp).convert(exp, context || Context.new)
29
+ end
30
+
31
+ def for(expression)
32
+ MAP[expression.type].new
33
+ end
34
+
35
+ # Limit the number of generated surrogate pairs, else the output might
36
+ # get to large for certain applications. The chosen number is somewhat
37
+ # arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
38
+ # count of all properties supported by Ruby is 92. 75% are below 300 chars.
39
+ #
40
+ # Set this to nil if you need full unicode matches and size doesn't matter.
36
41
  attr_accessor :surrogate_pair_limit
37
42
  end
38
43
  self.surrogate_pair_limit = 300
@@ -0,0 +1,70 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ #
5
+ # Converter#convert result. Represents a branch or leaf node with an optional
6
+ # quantifier as well as type and reference annotations for SecondPass.
7
+ #
8
+ class Node
9
+ attr_reader :children, :quantifier, :reference, :type
10
+
11
+ TYPES = %i[
12
+ backref_num
13
+ captured_group
14
+ conditional
15
+ dropped
16
+ plain
17
+ subexp_call
18
+ ]
19
+
20
+ def initialize(*children, quantifier: nil, reference: nil, type: :plain)
21
+ raise ArgumentError, "bad type #{type}" unless TYPES.include?(type)
22
+ self.children = children
23
+ self.quantifier = quantifier
24
+ self.reference = reference
25
+ self.type = type
26
+ end
27
+
28
+ def initialize_copy(orig)
29
+ super
30
+ self.children = orig.children.map(&:clone)
31
+ self.quantifier = orig.quantifier && orig.quantifier.clone
32
+ end
33
+
34
+ def map(&block)
35
+ clone.tap { |node| node.children.replace(children.map(&block)) }
36
+ end
37
+
38
+ def <<(node)
39
+ children << node
40
+ self
41
+ end
42
+
43
+ def dropped?
44
+ # keep everything else, including empty or depleted capturing groups
45
+ # so as not to not mess with reference numbers (e.g. backrefs)
46
+ type.equal?(:dropped)
47
+ end
48
+
49
+ def to_s
50
+ case type
51
+ when :dropped
52
+ ''
53
+ when :backref_num, :captured_group, :plain
54
+ children.join << quantifier.to_s
55
+ else
56
+ raise TypeError, "#{type} must be substituted before stringification"
57
+ end
58
+ end
59
+
60
+ def update(attrs)
61
+ self.children = attrs[:children] if attrs.key?(:children)
62
+ self.quantifier = attrs[:quantifier] if attrs.key?(:quantifier)
63
+ self.type = attrs[:type] if attrs.key?(:type)
64
+ end
65
+
66
+ private
67
+
68
+ attr_writer :children, :reference, :quantifier, :type
69
+ end
70
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ #
5
+ # After conversion of a full Regexp::Expression tree, this class
6
+ # checks for Node instances that need further processing.
7
+ #
8
+ # E.g. subexpression calls (such as \g<1>) can be look-ahead,
9
+ # so the full Regexp must have been processed first, and only then can
10
+ # they be substituted with the conversion result of their targeted group.
11
+ #
12
+ module SecondPass
13
+ module_function
14
+
15
+ def call(tree)
16
+ substitute_subexp_calls(tree)
17
+ alternate_conditional_permutations(tree)
18
+ tree
19
+ end
20
+
21
+ def substitute_subexp_calls(tree)
22
+ crawl(tree) do |node|
23
+ if node.type == :subexp_call
24
+ called_group = find_group_by_reference(node.reference, in_node: tree)
25
+ node.update(children: called_group.children, type: :captured_group)
26
+ end
27
+ end
28
+ end
29
+
30
+ def crawl(node, &block)
31
+ return if node.instance_of?(String)
32
+ yield(node)
33
+ node.children.each { |child| crawl(child, &block) }
34
+ end
35
+
36
+ def alternate_conditional_permutations(tree)
37
+ permutations = conditional_tree_permutations(tree)
38
+ return tree if permutations.empty?
39
+
40
+ alternatives = permutations.map.with_index do |variant, i|
41
+ Node.new((i.zero? ? '(?:' : '|(?:'), variant, ')')
42
+ end
43
+ tree.update(children: alternatives)
44
+ end
45
+
46
+ def find_group_by_reference(ref, in_node: nil)
47
+ crawl(in_node) do |node|
48
+ return node if node.type == :captured_group && node.reference == ref
49
+ end
50
+ Node.new('()')
51
+ end
52
+
53
+ def conditional_tree_permutations(tree)
54
+ all_conditions = conditions(tree)
55
+ return [] if all_conditions.empty?
56
+
57
+ captured_groups_per_branch = captured_group_count(tree)
58
+
59
+ condition_permutations(all_conditions).map.with_index do |truthy_conds, i|
60
+ tree_permutation = tree.clone
61
+ # find referenced groups and conditionals and make one-sided
62
+ crawl(tree_permutation) do |node|
63
+ truthy = truthy_conds.include?(node.reference)
64
+
65
+ if node.type.equal?(:captured_group) &&
66
+ all_conditions.include?(node.reference)
67
+ truthy ? min_quantify(node) : null_quantify(node)
68
+ elsif node.type.equal?(:conditional)
69
+ branches = node.children[1...-1]
70
+ if branches.count == 1
71
+ truthy || null_quantify(branches.first)
72
+ else
73
+ null_quantify(truthy ? branches.last : branches.first)
74
+ end
75
+ node.update(type: :plain)
76
+ elsif node.type.equal?(:backref_num)
77
+ new_num = node.children[0].to_i + captured_groups_per_branch * i
78
+ node.update(children: [new_num.to_s])
79
+ end
80
+ end
81
+ end
82
+ end
83
+
84
+ def conditions(tree)
85
+ conditions = []
86
+ crawl(tree) do |node|
87
+ conditions << node.reference if node.type.equal?(:conditional)
88
+ end
89
+ conditions
90
+ end
91
+
92
+ def captured_group_count(tree)
93
+ count = 0
94
+ crawl(tree) { |node| count += 1 if node.type.equal?(:captured_group)}
95
+ count
96
+ end
97
+
98
+ def condition_permutations(conditions)
99
+ return [] if conditions.empty?
100
+
101
+ condition_permutations = (0..(conditions.length)).inject([]) do |arr, n|
102
+ arr += conditions.combination(n).to_a
103
+ end
104
+ end
105
+
106
+ def min_quantify(node)
107
+ return if (qtf = node.quantifier).nil? || qtf.min > 0
108
+
109
+ if qtf.max.equal?(1) # any zero_or_one quantifier (?, ??, ?+)
110
+ node.update(quantifier: nil)
111
+ else
112
+ node.update(quantifier: "{1,#{qtf.max}}#{'?' if qtf.reluctant?}")
113
+ end
114
+ end
115
+
116
+ def null_quantify(node)
117
+ node.update(quantifier: '{0}')
118
+ end
119
+ end
120
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '3.0.0'
4
+ VERSION = '3.1.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0
4
+ version: 3.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-04 00:00:00.000000000 Z
11
+ date: 2018-09-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: character_set
@@ -30,14 +30,14 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '1.0'
33
+ version: '1.1'
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '1.0'
40
+ version: '1.1'
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: regexp_property_values
43
43
  requirement: !ruby/object:Gem::Requirement
@@ -144,12 +144,13 @@ files:
144
144
  - lib/js_regex/converter/group_converter.rb
145
145
  - lib/js_regex/converter/literal_converter.rb
146
146
  - lib/js_regex/converter/meta_converter.rb
147
- - lib/js_regex/converter/nonproperty_converter.rb
148
147
  - lib/js_regex/converter/property_converter.rb
149
- - lib/js_regex/converter/root_converter.rb
150
148
  - lib/js_regex/converter/set_converter.rb
149
+ - lib/js_regex/converter/subexpression_converter.rb
151
150
  - lib/js_regex/converter/type_converter.rb
152
151
  - lib/js_regex/converter/unsupported_token_converter.rb
152
+ - lib/js_regex/node.rb
153
+ - lib/js_regex/second_pass.rb
153
154
  - lib/js_regex/version.rb
154
155
  homepage: https://github.com/janosch-x/js_regex
155
156
  licenses:
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
- require_relative 'property_converter'
5
-
6
- class JsRegex
7
- module Converter
8
- #
9
- # Template class implementation.
10
- #
11
- # Note the inheritance from PropertyConverter.
12
- #
13
- class NonpropertyConverter < JsRegex::Converter::PropertyConverter
14
- private
15
-
16
- def convert_data
17
- convert_property(true)
18
- end
19
- end
20
- end
21
- end