js_regex 1.2.3 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d910abcbfe22ab6a470b8784025da0d81a7d823
4
- data.tar.gz: 8a30eddfc5efe71a598afe7064f3a0ee944e017e
3
+ metadata.gz: 2abae347b737c7635396c45a3a1489c72d6957e5
4
+ data.tar.gz: 5b674eaf6902686ab94648000308090fa154783c
5
5
  SHA512:
6
- metadata.gz: 031334a1f4d48e5f432cb6bfada63eb2821b61d8d2c8e052548c98db0f5a8fd83f606305c00976c51c6e6f1caaa6f0def2c0f405914a4daa694c2606b92a5c25
7
- data.tar.gz: eb7be820e5112f488fa69326e4870a0f9ac8d4538138c41d83e201551a8aa2a2cfe36ca5cba18e944a5a89a0fd25fc8783306062bea7017f1fddbb559d1fb299
6
+ metadata.gz: 3406867e15cb70cbc0a9f137f2449fce1af94fb448bcdbee3cfa6e439f64c92d4df9a5407fee251ea127a59faf8f6cbf81d55b38279d79e1a6de24e5b1c6673a
7
+ data.tar.gz: d0e63535e25eb8adb3e8d95859a884ce0eaec81ba25f1da8970213f4572fd558b9cbb7e3b25dc59d261213a9a0d7f9e0e7bb29b4f14cffba5fa9df49b0ec4780
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- #
2
+
3
3
  # JsRegex converts ::Regexp instances to JavaScript.
4
4
  #
5
5
  # Usage:
@@ -10,6 +10,7 @@
10
10
  #
11
11
  class JsRegex
12
12
  require_relative File.join('js_regex', 'conversion')
13
+ require_relative File.join('js_regex', 'version')
13
14
  require 'json'
14
15
 
15
16
  attr_reader :source, :options, :warnings
@@ -2,88 +2,36 @@
2
2
 
3
3
  class JsRegex
4
4
  #
5
- # This class acts as a facade, creating specific Converters and
6
- # passing Regexp::Scanner tokens to them, reusing Converters as needed.
5
+ # This class acts as a facade, passing a regex to the converters.
7
6
  #
8
7
  # ::of returns a source String, options String, and warnings Array.
9
8
  #
10
9
  class Conversion
11
10
  require 'regexp_parser'
12
- Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
11
+ require_relative 'converter'
13
12
 
14
- attr_reader :ruby_regex, :context, :converters, :source, :options, :warnings
15
-
16
- def initialize(ruby_regex)
17
- self.ruby_regex = ruby_regex
18
-
19
- self.context = Converter::Context.new
20
- self.converters = {}
21
-
22
- self.source = ''.dup
23
- self.options = ''.dup
24
- self.warnings = []
25
-
26
- convert_source
27
- convert_options
28
- perform_sanity_check
29
- end
30
-
31
- def self.of(ruby_regex)
32
- conversion = new(ruby_regex)
33
- [conversion.source, conversion.options, conversion.warnings]
34
- end
35
-
36
- private
37
-
38
- attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
39
-
40
- CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
41
- anchor: Converter::AnchorConverter,
42
- assertion: Converter::AssertionConverter,
43
- backref: Converter::BackreferenceConverter,
44
- conditional: Converter::ConditionalConverter,
45
- escape: Converter::EscapeConverter,
46
- free_space: Converter::FreespaceConverter,
47
- group: Converter::GroupConverter,
48
- literal: Converter::LiteralConverter,
49
- meta: Converter::MetaConverter,
50
- nonproperty: Converter::NonpropertyConverter,
51
- property: Converter::PropertyConverter,
52
- quantifier: Converter::QuantifierConverter,
53
- set: Converter::SetConverter,
54
- subset: Converter::SetConverter,
55
- type: Converter::TypeConverter
56
- ).freeze
57
-
58
- def convert_source
59
- Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
60
- # There might be a lot of tokens, so don't wrap their data in objects.
61
- # Even just wrapping them in simple structs or attr_reader objects
62
- # can lead to 60%+ longer processing times for large regexes.
63
- converter_for_token_class(token_class)
64
- .convert(token_class, subtype, data, s, e)
13
+ class << self
14
+ def of(ruby_regex)
15
+ source, warnings = convert_source(ruby_regex)
16
+ options = convert_options(ruby_regex)
17
+ [source, options, warnings]
65
18
  end
66
- end
67
-
68
- def converter_for_token_class(token_class)
69
- converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
70
- end
71
19
 
72
- def convert_options
73
- options << 'g' # all Ruby regexes are what is called "global" in JS
74
- options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
75
- end
20
+ private
76
21
 
77
- SURROGATE_CODEPOINT_PATTERN = /\\uD[89A-F]\h\h/i
22
+ def convert_source(ruby_regex)
23
+ context = Converter::Context.new(ruby_regex)
24
+ expression_tree = Regexp::Parser.parse(ruby_regex)
25
+ [
26
+ Converter::RootConverter.new.convert(expression_tree, context),
27
+ context.warnings
28
+ ]
29
+ end
78
30
 
79
- def perform_sanity_check
80
- # Ruby regex capabilities are a superset of JS regex capabilities in
81
- # the source part. So if this raises an Error, a Converter messed up.
82
- # Ignore that Ruby won't accept surrogate pairs, though.
83
- Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
84
- rescue ArgumentError, RegexpError, SyntaxError => e
85
- self.source = ''
86
- warnings << e.message
31
+ def convert_options(ruby_regex)
32
+ ignore_case = (ruby_regex.options & Regexp::IGNORECASE).nonzero?
33
+ ignore_case ? 'gi' : 'g'
34
+ end
87
35
  end
88
36
  end
89
37
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ module Converter
5
+ Dir[File.join(File.dirname(__FILE__), 'converter', '*.rb')].each do |file|
6
+ require file
7
+ end
8
+
9
+ MAP = Hash.new(UnsupportedTokenConverter).merge(
10
+ anchor: AnchorConverter,
11
+ assertion: AssertionConverter,
12
+ backref: BackreferenceConverter,
13
+ conditional: ConditionalConverter,
14
+ escape: EscapeConverter,
15
+ free_space: FreespaceConverter,
16
+ group: GroupConverter,
17
+ literal: LiteralConverter,
18
+ meta: MetaConverter,
19
+ nonproperty: NonpropertyConverter,
20
+ property: PropertyConverter,
21
+ set: SetConverter,
22
+ type: TypeConverter
23
+ ).freeze
24
+
25
+ def self.for(expression)
26
+ MAP[expression.type].new
27
+ end
28
+ end
29
+ end
@@ -15,12 +15,19 @@ class JsRegex
15
15
  when :bol, :bos then '^'
16
16
  when :eol, :eos then '$'
17
17
  when :eos_ob_eol then '(?=\n?$)'
18
- when :word_boundary then '\b'
19
- when :nonword_boundary then '\B'
18
+ when :word_boundary then pass_boundary_with_warning('\b')
19
+ when :nonword_boundary then pass_boundary_with_warning('\B')
20
20
  else
21
21
  warn_of_unsupported_feature
22
22
  end
23
23
  end
24
+
25
+ def pass_boundary_with_warning(boundary)
26
+ warn("The boundary '#{boundary}' at index #{expression.ts} "\
27
+ 'is not unicode-aware in JavaScript, '\
28
+ 'so it might act differently than in Ruby.')
29
+ boundary
30
+ end
24
31
  end
25
32
  end
26
33
  end
@@ -16,12 +16,11 @@ class JsRegex
16
16
  def convert_data
17
17
  case subtype
18
18
  when :lookahead, :nlookahead
19
- open_group(capturing: false)
19
+ build_group(capturing: false)
20
20
  when :nlookbehind
21
- context.start_negative_lookbehind
22
21
  warn_of_unsupported_feature('negative lookbehind assertion')
23
22
  else # :lookbehind, ...
24
- open_unsupported_group
23
+ build_unsupported_group
25
24
  end
26
25
  end
27
26
  end
@@ -12,20 +12,30 @@ class JsRegex
12
12
 
13
13
  def convert_data
14
14
  case subtype
15
- when :number
16
- convert_number_backref
15
+ when :number, :number_ref
16
+ convert_number_ref
17
+ when :number_rel_ref
18
+ convert_number_rel_ref
19
+ when :name_ref
20
+ convert_name_ref
17
21
  else
18
22
  warn_of_unsupported_feature
19
23
  end
20
24
  end
21
25
 
22
- def convert_number_backref
23
- if context.group_count_changed
24
- warn_of_unsupported_feature('number backreference following a '\
25
- 'feature that changes the group count (such as an atomic group)')
26
- else
27
- pass_through
28
- end
26
+ def convert_number_ref
27
+ # after regexp_parser update, replace data[/\d+/] with expression.number
28
+ "\\#{context.new_capturing_group_position(Integer(data[/\d+/]))}"
29
+ end
30
+
31
+ def convert_number_rel_ref
32
+ absolute_position = Integer(expression.number) +
33
+ context.original_capturing_group_count + 1
34
+ "\\#{context.new_capturing_group_position(absolute_position)}"
35
+ end
36
+
37
+ def convert_name_ref
38
+ "\\#{context.named_group_positions.fetch(expression.name)}"
29
39
  end
30
40
  end
31
41
  end
@@ -6,35 +6,61 @@ class JsRegex
6
6
  # Template class. Implement #convert_data in subclasses.
7
7
  #
8
8
  class Base
9
- attr_reader :target, :context
9
+ def convert(expression, context)
10
+ self.context = context
11
+ self.expression = expression
10
12
 
11
- def initialize(target, context)
12
- @target = target
13
- @context = context
13
+ source = convert_data
14
+ apply_quantifier(source)
14
15
  end
15
16
 
16
- def convert(token_class, subtype, data, start_index, end_index)
17
- self.token_class = token_class
18
- self.subtype = subtype
19
- self.data = data
20
- self.start_index = start_index
21
- self.end_index = end_index
17
+ private
18
+
19
+ attr_accessor :context, :expression
22
20
 
23
- result = convert_data
24
- target.source << (context.valid? ? result : '')
21
+ def subtype
22
+ expression.token
25
23
  end
26
24
 
27
- private
25
+ def data
26
+ expression.text
27
+ end
28
+ alias pass_through data
28
29
 
29
- attr_accessor :token_class, :subtype, :data, :start_index, :end_index
30
+ def apply_quantifier(source)
31
+ return source if source.empty? || !(quantifier = expression.quantifier)
30
32
 
31
- alias pass_through data
33
+ if quantifier.mode.equal?(:possessive)
34
+ context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
35
+ else
36
+ source + quantifier
37
+ end
38
+ end
39
+
40
+ def convert_subexpressions
41
+ convert_expressions(subexpressions)
42
+ end
43
+
44
+ def convert_expressions(expressions)
45
+ expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
46
+ end
47
+
48
+ def subexpressions
49
+ expression.expressions
50
+ end
32
51
 
33
52
  def warn_of_unsupported_feature(description = nil)
34
- description ||= "#{subtype} #{token_class}".tr('_', ' ')
35
- full_description = "#{description} '#{data}'"
36
- target.warnings << "Dropped unsupported #{full_description} "\
37
- "at index #{start_index}...#{end_index}"
53
+ description ||= "#{subtype} #{expression.type}".tr('_', ' ')
54
+ full_desc = "#{description} '#{expression}'"
55
+ warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
+ ''
57
+ end
58
+
59
+ def warn(text)
60
+ context.warnings << text
61
+ end
62
+
63
+ def drop_without_warning
38
64
  ''
39
65
  end
40
66
  end
@@ -11,15 +11,12 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- case subtype
15
- when :open
16
- warn_of_unsupported_feature('conditional')
17
- '('
18
- when :separator, :close
19
- pass_through
20
- else
21
- '' # one warning is enough, don't warn about other parts
14
+ warn_of_unsupported_feature('conditional')
15
+ branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
16
+ converted_branch = convert_expressions(branch)
17
+ arr << converted_branch unless converted_branch.eql?('')
22
18
  end
19
+ "(?:#{branches.join('|')})"
23
20
  end
24
21
  end
25
22
  end
@@ -10,120 +10,97 @@ class JsRegex
10
10
  class Context
11
11
  attr_reader :buffered_set_extractions,
12
12
  :buffered_set_members,
13
- :captured_group_count,
14
- :group_count_changed,
15
- :group_level_for_backreference,
16
- :negative_lookbehind
13
+ :in_atomic_group,
14
+ :named_group_positions,
15
+ :negative_base_set,
16
+ :root_options,
17
+ :warnings
17
18
 
18
- def initialize
19
- self.buffered_set_members = []
20
- self.buffered_set_extractions = []
21
- self.captured_group_count = 0
22
- self.group_level = 0
23
- self.negative_set_levels = []
24
- self.set_level = 0
25
- end
26
-
27
- def valid?
28
- !negative_lookbehind
29
- end
30
-
31
- def stacked_quantifier?(quantifier_start_index, quantifier_end_index)
32
- is_stacked = last_quantifier_end_index.equal?(quantifier_start_index)
33
- self.last_quantifier_end_index = quantifier_end_index
34
- is_stacked
35
- end
36
-
37
- # set context
38
-
39
- def open_set
40
- self.set_level = set_level + 1
41
- if set_level == 1
42
- buffered_set_members.clear
43
- buffered_set_extractions.clear
44
- end
45
- negative_set_levels.delete(set_level)
46
- end
19
+ def initialize(ruby_regex)
20
+ self.added_capturing_groups_after_group = Hash.new(0)
21
+ self.capturing_group_count = 0
22
+ self.named_group_positions = {}
23
+ self.warnings = []
47
24
 
48
- def negate_set
49
- self.negative_set_levels = negative_set_levels | [set_level]
25
+ self.root_options = {}
26
+ root_options[:m] = !(ruby_regex.options & Regexp::MULTILINE).equal?(0)
50
27
  end
51
28
 
52
- def close_set
53
- self.set_level = set_level - 1
54
- end
29
+ # option context
55
30
 
56
- def set?
57
- set_level > 0
31
+ def multiline?
32
+ root_options.fetch(:m)
58
33
  end
59
34
 
60
- def negative_set?(level = set_level)
61
- negative_set_levels.include?(level)
62
- end
35
+ # set context
63
36
 
64
- def nested_negation?
65
- nested_set? && negative_set?
37
+ def negate_base_set
38
+ self.negative_base_set = true
66
39
  end
67
40
 
68
- def nested_set?
69
- set_level > 1
41
+ def reset_set_context
42
+ self.buffered_set_extractions = []
43
+ self.buffered_set_members = []
44
+ self.negative_base_set = false
70
45
  end
71
46
 
72
47
  # group context
73
48
 
74
- def open_group
75
- self.group_level = group_level + 1
76
- end
77
-
78
49
  def capture_group
79
- self.captured_group_count = captured_group_count + 1
50
+ self.capturing_group_count = capturing_group_count + 1
80
51
  end
81
52
 
82
53
  def start_atomic_group
83
- self.group_level_for_backreference = group_level
54
+ self.in_atomic_group = true
84
55
  end
85
56
 
86
- def start_negative_lookbehind
87
- self.negative_lookbehind = true
57
+ def end_atomic_group
58
+ self.in_atomic_group = false
88
59
  end
89
60
 
90
- def close_group
91
- self.group_level = group_level - 1
61
+ def wrap_in_backrefed_lookahead(content)
62
+ new_backref_num = capturing_group_count + 1
63
+ # an empty passive group (?:) is appended as literal digits may follow
64
+ result = "(?=(#{content}))\\#{new_backref_num}(?:)"
65
+ added_capturing_groups_after_group[original_capturing_group_count] += 1
66
+ capture_group
67
+ result
92
68
  end
93
69
 
94
- def close_atomic_group
95
- close_group
96
- self.group_level_for_backreference = nil
97
- self.group_count_changed = true
70
+ # takes and returns 1-indexed group positions.
71
+ # new is different from old if capturing groups were added in between.
72
+ def new_capturing_group_position(old_position)
73
+ increment = 0
74
+ added_capturing_groups_after_group.each do |after_n_groups, count|
75
+ increment += count if after_n_groups < old_position
76
+ end
77
+ old_position + increment
98
78
  end
99
79
 
100
- def close_negative_lookbehind
101
- close_group
102
- self.negative_lookbehind = false
80
+ def original_capturing_group_count
81
+ capturing_group_count - total_added_capturing_groups
103
82
  end
104
83
 
105
- def atomic_group?
106
- group_level_for_backreference
84
+ def total_added_capturing_groups
85
+ added_capturing_groups_after_group.values.inject(0, &:+)
107
86
  end
108
87
 
109
- def base_level_of_atomic_group?
110
- group_level_for_backreference &&
111
- group_level.equal?(group_level_for_backreference + 1)
88
+ def store_named_group_position(name)
89
+ named_group_positions[name] = capturing_group_count + 1
112
90
  end
113
91
 
114
92
  private
115
93
 
116
- attr_accessor :group_level,
117
- :last_quantifier_end_index,
118
- :negative_set_levels,
119
- :set_level
94
+ attr_accessor :added_capturing_groups_after_group,
95
+ :capturing_group_count
120
96
 
121
97
  attr_writer :buffered_set_extractions,
122
98
  :buffered_set_members,
123
- :captured_group_count,
124
- :group_count_changed,
125
- :group_level_for_backreference,
126
- :negative_lookbehind
99
+ :in_atomic_group,
100
+ :named_group_positions,
101
+ :negative_base_set,
102
+ :root_options,
103
+ :warnings
127
104
  end
128
105
  end
129
106
  end
@@ -39,12 +39,16 @@ class JsRegex
39
39
  case subtype
40
40
  when :codepoint_list
41
41
  convert_codepoint_list
42
+ when :control
43
+ convert_control_sequence
42
44
  when :literal
43
45
  LiteralConverter.convert_data(data)
46
+ when :meta_sequence
47
+ convert_meta_sequence
44
48
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
45
49
  pass_through
46
50
  else
47
- # Bell, Escape, HexWide, Control, Meta, MetaControl, ...
51
+ # Bell, Escape, HexWide, ...
48
52
  warn_of_unsupported_feature
49
53
  end
50
54
  end
@@ -56,6 +60,35 @@ class JsRegex
56
60
  end
57
61
  elements.join
58
62
  end
63
+
64
+ def convert_control_sequence
65
+ convert_meta_control_sequence ||
66
+ unicode_escape_for(control_sequence_to_s(data))
67
+ end
68
+
69
+ def convert_meta_sequence
70
+ convert_meta_control_sequence ||
71
+ unicode_escape_for(meta_char_to_char_code(data[-1]))
72
+ end
73
+
74
+ def convert_meta_control_sequence
75
+ return unless expression.class.to_s.include?('MetaControl')
76
+ unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
77
+ end
78
+
79
+ def unicode_escape_for(char)
80
+ "\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
81
+ end
82
+
83
+ def control_sequence_to_s(control_sequence)
84
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
85
+ ["000#{five_lsb}"].pack('B*')
86
+ end
87
+
88
+ def meta_char_to_char_code(meta_char)
89
+ byte_value = meta_char.ord
90
+ byte_value < 128 ? byte_value + 128 : byte_value
91
+ end
59
92
  end
60
93
  end
61
94
  end
@@ -11,7 +11,7 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- '' # drop data without warning
14
+ drop_without_warning
15
15
  end
16
16
  end
17
17
  end
@@ -12,66 +12,52 @@ class JsRegex
12
12
 
13
13
  def convert_data
14
14
  case subtype
15
- when :atomic then open_atomic_group
16
- when :capture then open_group
17
- when :close then close_group
18
- when :comment then '' # drop whole group without warning
19
- when :named_ab, :named_sq then open_named_group
20
- when :options then open_options_group
21
- when :passive then open_passive_group
22
- else open_unsupported_group
15
+ when :atomic then emulate_atomic_group
16
+ when :capture then build_group
17
+ when :comment then drop_without_warning
18
+ when :named then build_named_group
19
+ when :options then build_options_group
20
+ when :passive then build_passive_group
21
+ when :absence then warn_of_unsupported_feature
22
+ else build_unsupported_group
23
23
  end
24
24
  end
25
25
 
26
- def open_atomic_group
27
- # Atomicity is emulated using backreferenced lookahead groups:
28
- # http://instanceof.me/post/52245507631
29
- # regex-emulate-atomic-grouping-with-lookahead
30
- if context.atomic_group?
31
- open_unsupported_group('nested atomic group')
26
+ def emulate_atomic_group
27
+ if context.in_atomic_group
28
+ build_unsupported_group('nested atomic group')
32
29
  else
33
30
  context.start_atomic_group
34
- open_group(head: '(?=(')
31
+ result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
32
+ context.end_atomic_group
33
+ result
35
34
  end
36
35
  end
37
36
 
38
- def open_named_group
39
- # drop name without warning
40
- open_group(head: '(')
37
+ def build_named_group
38
+ # remember position, then drop name part without warning
39
+ context.store_named_group_position(expression.name)
40
+ build_group(head: '(')
41
41
  end
42
42
 
43
- def open_options_group
43
+ def build_options_group
44
44
  warn_of_unsupported_feature('group-specific options')
45
- open_group(head: '(')
45
+ build_group(head: '(')
46
46
  end
47
47
 
48
- def open_passive_group
49
- open_group(head: '(?:', capturing: false)
48
+ def build_passive_group
49
+ build_group(head: '(?:', capturing: false)
50
50
  end
51
51
 
52
- def open_unsupported_group(description = nil)
52
+ def build_unsupported_group(description = nil)
53
53
  warn_of_unsupported_feature(description)
54
- open_passive_group
54
+ build_passive_group
55
55
  end
56
56
 
57
- def open_group(opts = {})
58
- context.open_group
57
+ def build_group(opts = {})
59
58
  context.capture_group unless opts[:capturing].equal?(false)
60
- opts[:head] || pass_through
61
- end
62
-
63
- def close_group
64
- if context.negative_lookbehind
65
- context.close_negative_lookbehind
66
- ''
67
- elsif context.base_level_of_atomic_group?
68
- context.close_atomic_group
69
- # an empty passive group (?:) is appended as literal digits may follow
70
- "))\\#{context.captured_group_count}(?:)"
71
- else
72
- context.close_group
73
- ')'
74
- end
59
+ head = opts[:head] || pass_through
60
+ "#{head}#{convert_subexpressions})"
75
61
  end
76
62
  end
77
63
  end
@@ -13,16 +13,20 @@ class JsRegex
13
13
  def convert_data
14
14
  case subtype
15
15
  when :alternation
16
- pass_through
16
+ convert_alternation
17
17
  when :dot
18
- ruby_multiline_mode? ? '(?:.|\n)' : '.'
18
+ context.multiline? ? '(?:.|\n)' : '.'
19
19
  else
20
20
  warn_of_unsupported_feature
21
21
  end
22
22
  end
23
23
 
24
- def ruby_multiline_mode?
25
- (target.ruby_regex.options & Regexp::MULTILINE).nonzero?
24
+ def convert_alternation
25
+ branches = subexpressions.each_with_object([]) do |branch, arr|
26
+ converted_branch = convert_expressions(branch.expressions)
27
+ arr << converted_branch unless converted_branch.eql?('')
28
+ end
29
+ branches.join('|')
26
30
  end
27
31
  end
28
32
  end
@@ -14,12 +14,7 @@ class JsRegex
14
14
  private
15
15
 
16
16
  def convert_data
17
- if context.set?
18
- context.buffered_set_extractions << convert_property(true)
19
- ''
20
- else
21
- convert_property(true)
22
- end
17
+ convert_property(true)
23
18
  end
24
19
  end
25
20
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ class JsRegex
6
+ module Converter
7
+ #
8
+ # Template class implementation.
9
+ #
10
+ class RootConverter < JsRegex::Converter::Base
11
+ private
12
+
13
+ def convert_data
14
+ convert_subexpressions
15
+ end
16
+ end
17
+ end
18
+ end
@@ -3,107 +3,102 @@
3
3
  require_relative 'base'
4
4
  require_relative 'literal_converter'
5
5
  require_relative 'property_converter'
6
- require_relative 'type_converter'
7
6
 
8
7
  class JsRegex
9
8
  module Converter
10
9
  #
11
10
  # Template class implementation.
12
11
  #
13
- # This converter works a little differently from the others.
14
- #
15
- # It buffers anything that it finds within a set in the Context's
16
- # #buffered_set_members and #buffered_set_extractions Arrays,
17
- # returning an empty String for all passed tokens, and only when
18
- # the set is closed does it compile and return the final String.
19
- #
20
12
  class SetConverter < JsRegex::Converter::Base
21
13
  private
22
14
 
23
15
  def convert_data
24
- case subtype
25
- when :open then convert_open_subtype
26
- when :negate then convert_negate_subtype
27
- when :close then convert_close_subtype
28
- when :member, :member_hex, :range, :range_hex, :escape
29
- convert_member_subtype
30
- when /\Aclass_/ then convert_class_subtype
31
- when /\Atype_/ then convert_type_subtype
32
- when :backspace then convert_backspace_subtype
33
- when :intersection then warn_of_unsupported_feature('set intersection')
34
- else try_replacing_potential_property_subtype
16
+ if expression.set_level.equal?(0) # reached end of set expression
17
+ context.reset_set_context
18
+ context.negate_base_set if negative_set?
19
+ process_members
20
+ finalize_set
21
+ elsif negative_set?
22
+ warn_of_unsupported_feature('nested negative set data')
23
+ else # positive subset
24
+ process_members
35
25
  end
36
26
  end
37
27
 
38
- def convert_open_subtype
39
- context.open_set
40
- ''
28
+ def negative_set?
29
+ expression.negative?
41
30
  end
42
31
 
43
- def convert_negate_subtype
44
- if context.nested_set?
45
- warn_of_unsupported_feature('nested negative set data')
46
- end
47
- context.negate_set
48
- ''
32
+ def process_members
33
+ expression.each { |member| process_member(member) }
49
34
  end
50
35
 
51
- def convert_close_subtype
52
- context.close_set
53
- context.set? ? '' : finalize_set
54
- end
36
+ ASTRAL_PLANE_PATTERN = /[\u{10000}-\u{FFFFF}]/
37
+ PROPERTY_PATTERN = /\A(?:\[:|\\([pP])\{)(\^?)([^:\}]+)/
38
+
39
+ def process_member(member)
40
+ return convert_subset(member) unless member.instance_of?(String)
55
41
 
56
- def convert_member_subtype
57
- utf8_data = data.force_encoding('UTF-8')
58
- if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
42
+ utf8_data = member.dup.force_encoding('UTF-8')
43
+ case utf8_data
44
+ when ASTRAL_PLANE_PATTERN
59
45
  warn_of_unsupported_feature('astral plane set member')
46
+ when '\\h'
47
+ handle_hex_type
48
+ when '\\H'
49
+ handle_nonhex_type
50
+ when '&&'
51
+ warn_of_unsupported_feature('set intersection')
52
+ when PROPERTY_PATTERN
53
+ handle_property($1, $2, $3)
60
54
  else
61
55
  literal_conversion = LiteralConverter.convert_data(utf8_data)
62
56
  buffer_set_member(literal_conversion)
63
57
  end
64
58
  end
65
59
 
66
- def convert_class_subtype
67
- negated = subtype.to_s.start_with?('class_non')
68
- name = subtype[(negated ? 9 : 6)..-1]
69
- try_replacing_property(name, negated)
60
+ HEX_RANGES = 'A-Fa-f0-9'
61
+ NONHEX_SET = '[^A-Fa-f0-9]'
62
+
63
+ def handle_hex_type
64
+ buffer_set_member(HEX_RANGES)
70
65
  end
71
66
 
72
- def try_replacing_potential_property_subtype
73
- negated = data.start_with?('\\P')
74
- try_replacing_property(subtype, negated)
67
+ def handle_nonhex_type
68
+ if context.negative_base_set
69
+ warn_of_unsupported_feature('nonhex type in negative set')
70
+ else
71
+ buffer_set_extraction(NONHEX_SET)
72
+ end
75
73
  end
76
74
 
77
- def try_replacing_property(name, negated)
78
- if (replacement = PropertyConverter.property_replacement(name, negated))
75
+ def handle_property(sign, caret, name)
76
+ if context.negative_base_set
77
+ return warn_of_unsupported_feature('property in negative set')
78
+ end
79
+ std = standardize_property_name(name)
80
+ negated = sign.eql?('P') ^ caret.eql?('^')
81
+ if (replacement = PropertyConverter.property_replacement(std, negated))
79
82
  buffer_set_extraction(replacement)
80
83
  else
81
84
  warn_of_unsupported_feature('property')
82
85
  end
83
86
  end
84
87
 
85
- def convert_type_subtype
86
- if subtype.equal?(:type_hex)
87
- buffer_set_extraction(TypeConverter::HEX_EXPANSION)
88
- elsif subtype.equal?(:type_nonhex)
89
- buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
90
- else
91
- buffer_set_member(data)
92
- end
88
+ def standardize_property_name(name)
89
+ Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
93
90
  end
94
91
 
95
- def convert_backspace_subtype
96
- buffer_set_extraction('[\b]')
92
+ def buffer_set_member(data)
93
+ context.buffered_set_members << data
97
94
  end
98
95
 
99
- def buffer_set_member(m)
100
- context.buffered_set_members << m unless context.nested_negation?
101
- ''
96
+ def buffer_set_extraction(data)
97
+ context.buffered_set_extractions << data
102
98
  end
103
99
 
104
- def buffer_set_extraction(e)
105
- context.buffered_set_extractions << e unless context.nested_negation?
106
- ''
100
+ def convert_subset(subset)
101
+ SetConverter.new.convert(subset, context)
107
102
  end
108
103
 
109
104
  def finalize_set
@@ -125,7 +120,7 @@ class JsRegex
125
120
  end
126
121
 
127
122
  def finalize_nondepleted_set(buffered_members, buffered_extractions)
128
- set = "[#{'^' if context.negative_set?(1)}#{buffered_members.join}]"
123
+ set = "[#{'^' if negative_set?}#{buffered_members.join}]"
129
124
  if buffered_extractions.empty?
130
125
  set
131
126
  else
@@ -8,8 +8,9 @@ class JsRegex
8
8
  # Template class implementation.
9
9
  #
10
10
  class TypeConverter < JsRegex::Converter::Base
11
- HEX_EXPANSION = '[A-Fa-f0-9]'
12
- NONHEX_EXPANSION = '[^A-Fa-f0-9]'
11
+ HEX_EXPANSION = '[A-Fa-f0-9]'
12
+ NONHEX_EXPANSION = '[^A-Fa-f0-9]'
13
+ LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
13
14
 
14
15
  private
15
16
 
@@ -17,6 +18,7 @@ class JsRegex
17
18
  case subtype
18
19
  when :hex then HEX_EXPANSION
19
20
  when :nonhex then NONHEX_EXPANSION
21
+ when :linebreak then LINEBREAK_EXPANSION
20
22
  when :digit, :nondigit, :word, :nonword, :space, :nonspace
21
23
  pass_through
22
24
  else
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  # frozen_string_literal: true
3
- #
3
+
4
4
  # This hash maps named properties that are available in Ruby's ::Regexp to
5
5
  # standard sets that can be handled by JavaScript.
6
6
  #
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '1.2.3'
4
+ VERSION = '2.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-12 00:00:00.000000000 Z
11
+ date: 2017-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: regexp_parser
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.3.6
19
+ version: 0.4.6
20
20
  - - "<="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 0.5.0
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.3.6
29
+ version: 0.4.6
30
30
  - - "<="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 0.5.0
@@ -100,6 +100,34 @@ dependencies:
100
100
  - - "~>"
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0.12'
103
+ - !ruby/object:Gem::Dependency
104
+ name: codeclimate-test-reporter
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: mutant-rspec
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.8'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.8'
103
131
  description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
104
132
  care of various incompatibilities and returning warnings for unsolvable differences.
105
133
  email:
@@ -110,6 +138,7 @@ extra_rdoc_files: []
110
138
  files:
111
139
  - lib/js_regex.rb
112
140
  - lib/js_regex/conversion.rb
141
+ - lib/js_regex/converter.rb
113
142
  - lib/js_regex/converter/anchor_converter.rb
114
143
  - lib/js_regex/converter/assertion_converter.rb
115
144
  - lib/js_regex/converter/backreference_converter.rb
@@ -123,7 +152,7 @@ files:
123
152
  - lib/js_regex/converter/meta_converter.rb
124
153
  - lib/js_regex/converter/nonproperty_converter.rb
125
154
  - lib/js_regex/converter/property_converter.rb
126
- - lib/js_regex/converter/quantifier_converter.rb
155
+ - lib/js_regex/converter/root_converter.rb
127
156
  - lib/js_regex/converter/set_converter.rb
128
157
  - lib/js_regex/converter/type_converter.rb
129
158
  - lib/js_regex/converter/unsupported_token_converter.rb
@@ -149,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
149
178
  version: '0'
150
179
  requirements: []
151
180
  rubyforge_project:
152
- rubygems_version: 2.6.11
181
+ rubygems_version: 2.6.13
153
182
  signing_key:
154
183
  specification_version: 4
155
184
  summary: Converts Ruby regexes to JavaScript regexes.
@@ -1,31 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
-
5
- class JsRegex
6
- module Converter
7
- #
8
- # Template class implementation.
9
- #
10
- class QuantifierConverter < JsRegex::Converter::Base
11
- private
12
-
13
- def convert_data
14
- if context.stacked_quantifier?(start_index, end_index)
15
- warn_of_unsupported_feature('adjacent quantifiers')
16
- else
17
- convert_quantifier
18
- end
19
- end
20
-
21
- def convert_quantifier
22
- if data.length > 1 && data.end_with?('+')
23
- warn_of_unsupported_feature('declaration of quantifier as possessive')
24
- data[0..-2]
25
- else
26
- pass_through
27
- end
28
- end
29
- end
30
- end
31
- end