js_regex 1.2.3 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3d910abcbfe22ab6a470b8784025da0d81a7d823
4
- data.tar.gz: 8a30eddfc5efe71a598afe7064f3a0ee944e017e
3
+ metadata.gz: 2abae347b737c7635396c45a3a1489c72d6957e5
4
+ data.tar.gz: 5b674eaf6902686ab94648000308090fa154783c
5
5
  SHA512:
6
- metadata.gz: 031334a1f4d48e5f432cb6bfada63eb2821b61d8d2c8e052548c98db0f5a8fd83f606305c00976c51c6e6f1caaa6f0def2c0f405914a4daa694c2606b92a5c25
7
- data.tar.gz: eb7be820e5112f488fa69326e4870a0f9ac8d4538138c41d83e201551a8aa2a2cfe36ca5cba18e944a5a89a0fd25fc8783306062bea7017f1fddbb559d1fb299
6
+ metadata.gz: 3406867e15cb70cbc0a9f137f2449fce1af94fb448bcdbee3cfa6e439f64c92d4df9a5407fee251ea127a59faf8f6cbf81d55b38279d79e1a6de24e5b1c6673a
7
+ data.tar.gz: d0e63535e25eb8adb3e8d95859a884ce0eaec81ba25f1da8970213f4572fd558b9cbb7e3b25dc59d261213a9a0d7f9e0e7bb29b4f14cffba5fa9df49b0ec4780
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
- #
2
+
3
3
  # JsRegex converts ::Regexp instances to JavaScript.
4
4
  #
5
5
  # Usage:
@@ -10,6 +10,7 @@
10
10
  #
11
11
  class JsRegex
12
12
  require_relative File.join('js_regex', 'conversion')
13
+ require_relative File.join('js_regex', 'version')
13
14
  require 'json'
14
15
 
15
16
  attr_reader :source, :options, :warnings
@@ -2,88 +2,36 @@
2
2
 
3
3
  class JsRegex
4
4
  #
5
- # This class acts as a facade, creating specific Converters and
6
- # passing Regexp::Scanner tokens to them, reusing Converters as needed.
5
+ # This class acts as a facade, passing a regex to the converters.
7
6
  #
8
7
  # ::of returns a source String, options String, and warnings Array.
9
8
  #
10
9
  class Conversion
11
10
  require 'regexp_parser'
12
- Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
11
+ require_relative 'converter'
13
12
 
14
- attr_reader :ruby_regex, :context, :converters, :source, :options, :warnings
15
-
16
- def initialize(ruby_regex)
17
- self.ruby_regex = ruby_regex
18
-
19
- self.context = Converter::Context.new
20
- self.converters = {}
21
-
22
- self.source = ''.dup
23
- self.options = ''.dup
24
- self.warnings = []
25
-
26
- convert_source
27
- convert_options
28
- perform_sanity_check
29
- end
30
-
31
- def self.of(ruby_regex)
32
- conversion = new(ruby_regex)
33
- [conversion.source, conversion.options, conversion.warnings]
34
- end
35
-
36
- private
37
-
38
- attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
39
-
40
- CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
41
- anchor: Converter::AnchorConverter,
42
- assertion: Converter::AssertionConverter,
43
- backref: Converter::BackreferenceConverter,
44
- conditional: Converter::ConditionalConverter,
45
- escape: Converter::EscapeConverter,
46
- free_space: Converter::FreespaceConverter,
47
- group: Converter::GroupConverter,
48
- literal: Converter::LiteralConverter,
49
- meta: Converter::MetaConverter,
50
- nonproperty: Converter::NonpropertyConverter,
51
- property: Converter::PropertyConverter,
52
- quantifier: Converter::QuantifierConverter,
53
- set: Converter::SetConverter,
54
- subset: Converter::SetConverter,
55
- type: Converter::TypeConverter
56
- ).freeze
57
-
58
- def convert_source
59
- Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
60
- # There might be a lot of tokens, so don't wrap their data in objects.
61
- # Even just wrapping them in simple structs or attr_reader objects
62
- # can lead to 60%+ longer processing times for large regexes.
63
- converter_for_token_class(token_class)
64
- .convert(token_class, subtype, data, s, e)
13
+ class << self
14
+ def of(ruby_regex)
15
+ source, warnings = convert_source(ruby_regex)
16
+ options = convert_options(ruby_regex)
17
+ [source, options, warnings]
65
18
  end
66
- end
67
-
68
- def converter_for_token_class(token_class)
69
- converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
70
- end
71
19
 
72
- def convert_options
73
- options << 'g' # all Ruby regexes are what is called "global" in JS
74
- options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
75
- end
20
+ private
76
21
 
77
- SURROGATE_CODEPOINT_PATTERN = /\\uD[89A-F]\h\h/i
22
+ def convert_source(ruby_regex)
23
+ context = Converter::Context.new(ruby_regex)
24
+ expression_tree = Regexp::Parser.parse(ruby_regex)
25
+ [
26
+ Converter::RootConverter.new.convert(expression_tree, context),
27
+ context.warnings
28
+ ]
29
+ end
78
30
 
79
- def perform_sanity_check
80
- # Ruby regex capabilities are a superset of JS regex capabilities in
81
- # the source part. So if this raises an Error, a Converter messed up.
82
- # Ignore that Ruby won't accept surrogate pairs, though.
83
- Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
84
- rescue ArgumentError, RegexpError, SyntaxError => e
85
- self.source = ''
86
- warnings << e.message
31
+ def convert_options(ruby_regex)
32
+ ignore_case = (ruby_regex.options & Regexp::IGNORECASE).nonzero?
33
+ ignore_case ? 'gi' : 'g'
34
+ end
87
35
  end
88
36
  end
89
37
  end
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ class JsRegex
4
+ module Converter
5
+ Dir[File.join(File.dirname(__FILE__), 'converter', '*.rb')].each do |file|
6
+ require file
7
+ end
8
+
9
+ MAP = Hash.new(UnsupportedTokenConverter).merge(
10
+ anchor: AnchorConverter,
11
+ assertion: AssertionConverter,
12
+ backref: BackreferenceConverter,
13
+ conditional: ConditionalConverter,
14
+ escape: EscapeConverter,
15
+ free_space: FreespaceConverter,
16
+ group: GroupConverter,
17
+ literal: LiteralConverter,
18
+ meta: MetaConverter,
19
+ nonproperty: NonpropertyConverter,
20
+ property: PropertyConverter,
21
+ set: SetConverter,
22
+ type: TypeConverter
23
+ ).freeze
24
+
25
+ def self.for(expression)
26
+ MAP[expression.type].new
27
+ end
28
+ end
29
+ end
@@ -15,12 +15,19 @@ class JsRegex
15
15
  when :bol, :bos then '^'
16
16
  when :eol, :eos then '$'
17
17
  when :eos_ob_eol then '(?=\n?$)'
18
- when :word_boundary then '\b'
19
- when :nonword_boundary then '\B'
18
+ when :word_boundary then pass_boundary_with_warning('\b')
19
+ when :nonword_boundary then pass_boundary_with_warning('\B')
20
20
  else
21
21
  warn_of_unsupported_feature
22
22
  end
23
23
  end
24
+
25
+ def pass_boundary_with_warning(boundary)
26
+ warn("The boundary '#{boundary}' at index #{expression.ts} "\
27
+ 'is not unicode-aware in JavaScript, '\
28
+ 'so it might act differently than in Ruby.')
29
+ boundary
30
+ end
24
31
  end
25
32
  end
26
33
  end
@@ -16,12 +16,11 @@ class JsRegex
16
16
  def convert_data
17
17
  case subtype
18
18
  when :lookahead, :nlookahead
19
- open_group(capturing: false)
19
+ build_group(capturing: false)
20
20
  when :nlookbehind
21
- context.start_negative_lookbehind
22
21
  warn_of_unsupported_feature('negative lookbehind assertion')
23
22
  else # :lookbehind, ...
24
- open_unsupported_group
23
+ build_unsupported_group
25
24
  end
26
25
  end
27
26
  end
@@ -12,20 +12,30 @@ class JsRegex
12
12
 
13
13
  def convert_data
14
14
  case subtype
15
- when :number
16
- convert_number_backref
15
+ when :number, :number_ref
16
+ convert_number_ref
17
+ when :number_rel_ref
18
+ convert_number_rel_ref
19
+ when :name_ref
20
+ convert_name_ref
17
21
  else
18
22
  warn_of_unsupported_feature
19
23
  end
20
24
  end
21
25
 
22
- def convert_number_backref
23
- if context.group_count_changed
24
- warn_of_unsupported_feature('number backreference following a '\
25
- 'feature that changes the group count (such as an atomic group)')
26
- else
27
- pass_through
28
- end
26
+ def convert_number_ref
27
+ # after regexp_parser update, replace data[/\d+/] with expression.number
28
+ "\\#{context.new_capturing_group_position(Integer(data[/\d+/]))}"
29
+ end
30
+
31
+ def convert_number_rel_ref
32
+ absolute_position = Integer(expression.number) +
33
+ context.original_capturing_group_count + 1
34
+ "\\#{context.new_capturing_group_position(absolute_position)}"
35
+ end
36
+
37
+ def convert_name_ref
38
+ "\\#{context.named_group_positions.fetch(expression.name)}"
29
39
  end
30
40
  end
31
41
  end
@@ -6,35 +6,61 @@ class JsRegex
6
6
  # Template class. Implement #convert_data in subclasses.
7
7
  #
8
8
  class Base
9
- attr_reader :target, :context
9
+ def convert(expression, context)
10
+ self.context = context
11
+ self.expression = expression
10
12
 
11
- def initialize(target, context)
12
- @target = target
13
- @context = context
13
+ source = convert_data
14
+ apply_quantifier(source)
14
15
  end
15
16
 
16
- def convert(token_class, subtype, data, start_index, end_index)
17
- self.token_class = token_class
18
- self.subtype = subtype
19
- self.data = data
20
- self.start_index = start_index
21
- self.end_index = end_index
17
+ private
18
+
19
+ attr_accessor :context, :expression
22
20
 
23
- result = convert_data
24
- target.source << (context.valid? ? result : '')
21
+ def subtype
22
+ expression.token
25
23
  end
26
24
 
27
- private
25
+ def data
26
+ expression.text
27
+ end
28
+ alias pass_through data
28
29
 
29
- attr_accessor :token_class, :subtype, :data, :start_index, :end_index
30
+ def apply_quantifier(source)
31
+ return source if source.empty? || !(quantifier = expression.quantifier)
30
32
 
31
- alias pass_through data
33
+ if quantifier.mode.equal?(:possessive)
34
+ context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
35
+ else
36
+ source + quantifier
37
+ end
38
+ end
39
+
40
+ def convert_subexpressions
41
+ convert_expressions(subexpressions)
42
+ end
43
+
44
+ def convert_expressions(expressions)
45
+ expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
46
+ end
47
+
48
+ def subexpressions
49
+ expression.expressions
50
+ end
32
51
 
33
52
  def warn_of_unsupported_feature(description = nil)
34
- description ||= "#{subtype} #{token_class}".tr('_', ' ')
35
- full_description = "#{description} '#{data}'"
36
- target.warnings << "Dropped unsupported #{full_description} "\
37
- "at index #{start_index}...#{end_index}"
53
+ description ||= "#{subtype} #{expression.type}".tr('_', ' ')
54
+ full_desc = "#{description} '#{expression}'"
55
+ warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
56
+ ''
57
+ end
58
+
59
+ def warn(text)
60
+ context.warnings << text
61
+ end
62
+
63
+ def drop_without_warning
38
64
  ''
39
65
  end
40
66
  end
@@ -11,15 +11,12 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- case subtype
15
- when :open
16
- warn_of_unsupported_feature('conditional')
17
- '('
18
- when :separator, :close
19
- pass_through
20
- else
21
- '' # one warning is enough, don't warn about other parts
14
+ warn_of_unsupported_feature('conditional')
15
+ branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
16
+ converted_branch = convert_expressions(branch)
17
+ arr << converted_branch unless converted_branch.eql?('')
22
18
  end
19
+ "(?:#{branches.join('|')})"
23
20
  end
24
21
  end
25
22
  end
@@ -10,120 +10,97 @@ class JsRegex
10
10
  class Context
11
11
  attr_reader :buffered_set_extractions,
12
12
  :buffered_set_members,
13
- :captured_group_count,
14
- :group_count_changed,
15
- :group_level_for_backreference,
16
- :negative_lookbehind
13
+ :in_atomic_group,
14
+ :named_group_positions,
15
+ :negative_base_set,
16
+ :root_options,
17
+ :warnings
17
18
 
18
- def initialize
19
- self.buffered_set_members = []
20
- self.buffered_set_extractions = []
21
- self.captured_group_count = 0
22
- self.group_level = 0
23
- self.negative_set_levels = []
24
- self.set_level = 0
25
- end
26
-
27
- def valid?
28
- !negative_lookbehind
29
- end
30
-
31
- def stacked_quantifier?(quantifier_start_index, quantifier_end_index)
32
- is_stacked = last_quantifier_end_index.equal?(quantifier_start_index)
33
- self.last_quantifier_end_index = quantifier_end_index
34
- is_stacked
35
- end
36
-
37
- # set context
38
-
39
- def open_set
40
- self.set_level = set_level + 1
41
- if set_level == 1
42
- buffered_set_members.clear
43
- buffered_set_extractions.clear
44
- end
45
- negative_set_levels.delete(set_level)
46
- end
19
+ def initialize(ruby_regex)
20
+ self.added_capturing_groups_after_group = Hash.new(0)
21
+ self.capturing_group_count = 0
22
+ self.named_group_positions = {}
23
+ self.warnings = []
47
24
 
48
- def negate_set
49
- self.negative_set_levels = negative_set_levels | [set_level]
25
+ self.root_options = {}
26
+ root_options[:m] = !(ruby_regex.options & Regexp::MULTILINE).equal?(0)
50
27
  end
51
28
 
52
- def close_set
53
- self.set_level = set_level - 1
54
- end
29
+ # option context
55
30
 
56
- def set?
57
- set_level > 0
31
+ def multiline?
32
+ root_options.fetch(:m)
58
33
  end
59
34
 
60
- def negative_set?(level = set_level)
61
- negative_set_levels.include?(level)
62
- end
35
+ # set context
63
36
 
64
- def nested_negation?
65
- nested_set? && negative_set?
37
+ def negate_base_set
38
+ self.negative_base_set = true
66
39
  end
67
40
 
68
- def nested_set?
69
- set_level > 1
41
+ def reset_set_context
42
+ self.buffered_set_extractions = []
43
+ self.buffered_set_members = []
44
+ self.negative_base_set = false
70
45
  end
71
46
 
72
47
  # group context
73
48
 
74
- def open_group
75
- self.group_level = group_level + 1
76
- end
77
-
78
49
  def capture_group
79
- self.captured_group_count = captured_group_count + 1
50
+ self.capturing_group_count = capturing_group_count + 1
80
51
  end
81
52
 
82
53
  def start_atomic_group
83
- self.group_level_for_backreference = group_level
54
+ self.in_atomic_group = true
84
55
  end
85
56
 
86
- def start_negative_lookbehind
87
- self.negative_lookbehind = true
57
+ def end_atomic_group
58
+ self.in_atomic_group = false
88
59
  end
89
60
 
90
- def close_group
91
- self.group_level = group_level - 1
61
+ def wrap_in_backrefed_lookahead(content)
62
+ new_backref_num = capturing_group_count + 1
63
+ # an empty passive group (?:) is appended as literal digits may follow
64
+ result = "(?=(#{content}))\\#{new_backref_num}(?:)"
65
+ added_capturing_groups_after_group[original_capturing_group_count] += 1
66
+ capture_group
67
+ result
92
68
  end
93
69
 
94
- def close_atomic_group
95
- close_group
96
- self.group_level_for_backreference = nil
97
- self.group_count_changed = true
70
+ # takes and returns 1-indexed group positions.
71
+ # new is different from old if capturing groups were added in between.
72
+ def new_capturing_group_position(old_position)
73
+ increment = 0
74
+ added_capturing_groups_after_group.each do |after_n_groups, count|
75
+ increment += count if after_n_groups < old_position
76
+ end
77
+ old_position + increment
98
78
  end
99
79
 
100
- def close_negative_lookbehind
101
- close_group
102
- self.negative_lookbehind = false
80
+ def original_capturing_group_count
81
+ capturing_group_count - total_added_capturing_groups
103
82
  end
104
83
 
105
- def atomic_group?
106
- group_level_for_backreference
84
+ def total_added_capturing_groups
85
+ added_capturing_groups_after_group.values.inject(0, &:+)
107
86
  end
108
87
 
109
- def base_level_of_atomic_group?
110
- group_level_for_backreference &&
111
- group_level.equal?(group_level_for_backreference + 1)
88
+ def store_named_group_position(name)
89
+ named_group_positions[name] = capturing_group_count + 1
112
90
  end
113
91
 
114
92
  private
115
93
 
116
- attr_accessor :group_level,
117
- :last_quantifier_end_index,
118
- :negative_set_levels,
119
- :set_level
94
+ attr_accessor :added_capturing_groups_after_group,
95
+ :capturing_group_count
120
96
 
121
97
  attr_writer :buffered_set_extractions,
122
98
  :buffered_set_members,
123
- :captured_group_count,
124
- :group_count_changed,
125
- :group_level_for_backreference,
126
- :negative_lookbehind
99
+ :in_atomic_group,
100
+ :named_group_positions,
101
+ :negative_base_set,
102
+ :root_options,
103
+ :warnings
127
104
  end
128
105
  end
129
106
  end
@@ -39,12 +39,16 @@ class JsRegex
39
39
  case subtype
40
40
  when :codepoint_list
41
41
  convert_codepoint_list
42
+ when :control
43
+ convert_control_sequence
42
44
  when :literal
43
45
  LiteralConverter.convert_data(data)
46
+ when :meta_sequence
47
+ convert_meta_sequence
44
48
  when *ESCAPES_SHARED_BY_RUBY_AND_JS
45
49
  pass_through
46
50
  else
47
- # Bell, Escape, HexWide, Control, Meta, MetaControl, ...
51
+ # Bell, Escape, HexWide, ...
48
52
  warn_of_unsupported_feature
49
53
  end
50
54
  end
@@ -56,6 +60,35 @@ class JsRegex
56
60
  end
57
61
  elements.join
58
62
  end
63
+
64
+ def convert_control_sequence
65
+ convert_meta_control_sequence ||
66
+ unicode_escape_for(control_sequence_to_s(data))
67
+ end
68
+
69
+ def convert_meta_sequence
70
+ convert_meta_control_sequence ||
71
+ unicode_escape_for(meta_char_to_char_code(data[-1]))
72
+ end
73
+
74
+ def convert_meta_control_sequence
75
+ return unless expression.class.to_s.include?('MetaControl')
76
+ unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
77
+ end
78
+
79
+ def unicode_escape_for(char)
80
+ "\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
81
+ end
82
+
83
+ def control_sequence_to_s(control_sequence)
84
+ five_lsb = control_sequence.unpack('B*').first[-5..-1]
85
+ ["000#{five_lsb}"].pack('B*')
86
+ end
87
+
88
+ def meta_char_to_char_code(meta_char)
89
+ byte_value = meta_char.ord
90
+ byte_value < 128 ? byte_value + 128 : byte_value
91
+ end
59
92
  end
60
93
  end
61
94
  end
@@ -11,7 +11,7 @@ class JsRegex
11
11
  private
12
12
 
13
13
  def convert_data
14
- '' # drop data without warning
14
+ drop_without_warning
15
15
  end
16
16
  end
17
17
  end
@@ -12,66 +12,52 @@ class JsRegex
12
12
 
13
13
  def convert_data
14
14
  case subtype
15
- when :atomic then open_atomic_group
16
- when :capture then open_group
17
- when :close then close_group
18
- when :comment then '' # drop whole group without warning
19
- when :named_ab, :named_sq then open_named_group
20
- when :options then open_options_group
21
- when :passive then open_passive_group
22
- else open_unsupported_group
15
+ when :atomic then emulate_atomic_group
16
+ when :capture then build_group
17
+ when :comment then drop_without_warning
18
+ when :named then build_named_group
19
+ when :options then build_options_group
20
+ when :passive then build_passive_group
21
+ when :absence then warn_of_unsupported_feature
22
+ else build_unsupported_group
23
23
  end
24
24
  end
25
25
 
26
- def open_atomic_group
27
- # Atomicity is emulated using backreferenced lookahead groups:
28
- # http://instanceof.me/post/52245507631
29
- # regex-emulate-atomic-grouping-with-lookahead
30
- if context.atomic_group?
31
- open_unsupported_group('nested atomic group')
26
+ def emulate_atomic_group
27
+ if context.in_atomic_group
28
+ build_unsupported_group('nested atomic group')
32
29
  else
33
30
  context.start_atomic_group
34
- open_group(head: '(?=(')
31
+ result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
32
+ context.end_atomic_group
33
+ result
35
34
  end
36
35
  end
37
36
 
38
- def open_named_group
39
- # drop name without warning
40
- open_group(head: '(')
37
+ def build_named_group
38
+ # remember position, then drop name part without warning
39
+ context.store_named_group_position(expression.name)
40
+ build_group(head: '(')
41
41
  end
42
42
 
43
- def open_options_group
43
+ def build_options_group
44
44
  warn_of_unsupported_feature('group-specific options')
45
- open_group(head: '(')
45
+ build_group(head: '(')
46
46
  end
47
47
 
48
- def open_passive_group
49
- open_group(head: '(?:', capturing: false)
48
+ def build_passive_group
49
+ build_group(head: '(?:', capturing: false)
50
50
  end
51
51
 
52
- def open_unsupported_group(description = nil)
52
+ def build_unsupported_group(description = nil)
53
53
  warn_of_unsupported_feature(description)
54
- open_passive_group
54
+ build_passive_group
55
55
  end
56
56
 
57
- def open_group(opts = {})
58
- context.open_group
57
+ def build_group(opts = {})
59
58
  context.capture_group unless opts[:capturing].equal?(false)
60
- opts[:head] || pass_through
61
- end
62
-
63
- def close_group
64
- if context.negative_lookbehind
65
- context.close_negative_lookbehind
66
- ''
67
- elsif context.base_level_of_atomic_group?
68
- context.close_atomic_group
69
- # an empty passive group (?:) is appended as literal digits may follow
70
- "))\\#{context.captured_group_count}(?:)"
71
- else
72
- context.close_group
73
- ')'
74
- end
59
+ head = opts[:head] || pass_through
60
+ "#{head}#{convert_subexpressions})"
75
61
  end
76
62
  end
77
63
  end
@@ -13,16 +13,20 @@ class JsRegex
13
13
  def convert_data
14
14
  case subtype
15
15
  when :alternation
16
- pass_through
16
+ convert_alternation
17
17
  when :dot
18
- ruby_multiline_mode? ? '(?:.|\n)' : '.'
18
+ context.multiline? ? '(?:.|\n)' : '.'
19
19
  else
20
20
  warn_of_unsupported_feature
21
21
  end
22
22
  end
23
23
 
24
- def ruby_multiline_mode?
25
- (target.ruby_regex.options & Regexp::MULTILINE).nonzero?
24
+ def convert_alternation
25
+ branches = subexpressions.each_with_object([]) do |branch, arr|
26
+ converted_branch = convert_expressions(branch.expressions)
27
+ arr << converted_branch unless converted_branch.eql?('')
28
+ end
29
+ branches.join('|')
26
30
  end
27
31
  end
28
32
  end
@@ -14,12 +14,7 @@ class JsRegex
14
14
  private
15
15
 
16
16
  def convert_data
17
- if context.set?
18
- context.buffered_set_extractions << convert_property(true)
19
- ''
20
- else
21
- convert_property(true)
22
- end
17
+ convert_property(true)
23
18
  end
24
19
  end
25
20
  end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ class JsRegex
6
+ module Converter
7
+ #
8
+ # Template class implementation.
9
+ #
10
+ class RootConverter < JsRegex::Converter::Base
11
+ private
12
+
13
+ def convert_data
14
+ convert_subexpressions
15
+ end
16
+ end
17
+ end
18
+ end
@@ -3,107 +3,102 @@
3
3
  require_relative 'base'
4
4
  require_relative 'literal_converter'
5
5
  require_relative 'property_converter'
6
- require_relative 'type_converter'
7
6
 
8
7
  class JsRegex
9
8
  module Converter
10
9
  #
11
10
  # Template class implementation.
12
11
  #
13
- # This converter works a little differently from the others.
14
- #
15
- # It buffers anything that it finds within a set in the Context's
16
- # #buffered_set_members and #buffered_set_extractions Arrays,
17
- # returning an empty String for all passed tokens, and only when
18
- # the set is closed does it compile and return the final String.
19
- #
20
12
  class SetConverter < JsRegex::Converter::Base
21
13
  private
22
14
 
23
15
  def convert_data
24
- case subtype
25
- when :open then convert_open_subtype
26
- when :negate then convert_negate_subtype
27
- when :close then convert_close_subtype
28
- when :member, :member_hex, :range, :range_hex, :escape
29
- convert_member_subtype
30
- when /\Aclass_/ then convert_class_subtype
31
- when /\Atype_/ then convert_type_subtype
32
- when :backspace then convert_backspace_subtype
33
- when :intersection then warn_of_unsupported_feature('set intersection')
34
- else try_replacing_potential_property_subtype
16
+ if expression.set_level.equal?(0) # reached end of set expression
17
+ context.reset_set_context
18
+ context.negate_base_set if negative_set?
19
+ process_members
20
+ finalize_set
21
+ elsif negative_set?
22
+ warn_of_unsupported_feature('nested negative set data')
23
+ else # positive subset
24
+ process_members
35
25
  end
36
26
  end
37
27
 
38
- def convert_open_subtype
39
- context.open_set
40
- ''
28
+ def negative_set?
29
+ expression.negative?
41
30
  end
42
31
 
43
- def convert_negate_subtype
44
- if context.nested_set?
45
- warn_of_unsupported_feature('nested negative set data')
46
- end
47
- context.negate_set
48
- ''
32
+ def process_members
33
+ expression.each { |member| process_member(member) }
49
34
  end
50
35
 
51
- def convert_close_subtype
52
- context.close_set
53
- context.set? ? '' : finalize_set
54
- end
36
+ ASTRAL_PLANE_PATTERN = /[\u{10000}-\u{FFFFF}]/
37
+ PROPERTY_PATTERN = /\A(?:\[:|\\([pP])\{)(\^?)([^:\}]+)/
38
+
39
+ def process_member(member)
40
+ return convert_subset(member) unless member.instance_of?(String)
55
41
 
56
- def convert_member_subtype
57
- utf8_data = data.force_encoding('UTF-8')
58
- if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
42
+ utf8_data = member.dup.force_encoding('UTF-8')
43
+ case utf8_data
44
+ when ASTRAL_PLANE_PATTERN
59
45
  warn_of_unsupported_feature('astral plane set member')
46
+ when '\\h'
47
+ handle_hex_type
48
+ when '\\H'
49
+ handle_nonhex_type
50
+ when '&&'
51
+ warn_of_unsupported_feature('set intersection')
52
+ when PROPERTY_PATTERN
53
+ handle_property($1, $2, $3)
60
54
  else
61
55
  literal_conversion = LiteralConverter.convert_data(utf8_data)
62
56
  buffer_set_member(literal_conversion)
63
57
  end
64
58
  end
65
59
 
66
- def convert_class_subtype
67
- negated = subtype.to_s.start_with?('class_non')
68
- name = subtype[(negated ? 9 : 6)..-1]
69
- try_replacing_property(name, negated)
60
+ HEX_RANGES = 'A-Fa-f0-9'
61
+ NONHEX_SET = '[^A-Fa-f0-9]'
62
+
63
+ def handle_hex_type
64
+ buffer_set_member(HEX_RANGES)
70
65
  end
71
66
 
72
- def try_replacing_potential_property_subtype
73
- negated = data.start_with?('\\P')
74
- try_replacing_property(subtype, negated)
67
+ def handle_nonhex_type
68
+ if context.negative_base_set
69
+ warn_of_unsupported_feature('nonhex type in negative set')
70
+ else
71
+ buffer_set_extraction(NONHEX_SET)
72
+ end
75
73
  end
76
74
 
77
- def try_replacing_property(name, negated)
78
- if (replacement = PropertyConverter.property_replacement(name, negated))
75
+ def handle_property(sign, caret, name)
76
+ if context.negative_base_set
77
+ return warn_of_unsupported_feature('property in negative set')
78
+ end
79
+ std = standardize_property_name(name)
80
+ negated = sign.eql?('P') ^ caret.eql?('^')
81
+ if (replacement = PropertyConverter.property_replacement(std, negated))
79
82
  buffer_set_extraction(replacement)
80
83
  else
81
84
  warn_of_unsupported_feature('property')
82
85
  end
83
86
  end
84
87
 
85
- def convert_type_subtype
86
- if subtype.equal?(:type_hex)
87
- buffer_set_extraction(TypeConverter::HEX_EXPANSION)
88
- elsif subtype.equal?(:type_nonhex)
89
- buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
90
- else
91
- buffer_set_member(data)
92
- end
88
+ def standardize_property_name(name)
89
+ Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
93
90
  end
94
91
 
95
- def convert_backspace_subtype
96
- buffer_set_extraction('[\b]')
92
+ def buffer_set_member(data)
93
+ context.buffered_set_members << data
97
94
  end
98
95
 
99
- def buffer_set_member(m)
100
- context.buffered_set_members << m unless context.nested_negation?
101
- ''
96
+ def buffer_set_extraction(data)
97
+ context.buffered_set_extractions << data
102
98
  end
103
99
 
104
- def buffer_set_extraction(e)
105
- context.buffered_set_extractions << e unless context.nested_negation?
106
- ''
100
+ def convert_subset(subset)
101
+ SetConverter.new.convert(subset, context)
107
102
  end
108
103
 
109
104
  def finalize_set
@@ -125,7 +120,7 @@ class JsRegex
125
120
  end
126
121
 
127
122
  def finalize_nondepleted_set(buffered_members, buffered_extractions)
128
- set = "[#{'^' if context.negative_set?(1)}#{buffered_members.join}]"
123
+ set = "[#{'^' if negative_set?}#{buffered_members.join}]"
129
124
  if buffered_extractions.empty?
130
125
  set
131
126
  else
@@ -8,8 +8,9 @@ class JsRegex
8
8
  # Template class implementation.
9
9
  #
10
10
  class TypeConverter < JsRegex::Converter::Base
11
- HEX_EXPANSION = '[A-Fa-f0-9]'
12
- NONHEX_EXPANSION = '[^A-Fa-f0-9]'
11
+ HEX_EXPANSION = '[A-Fa-f0-9]'
12
+ NONHEX_EXPANSION = '[^A-Fa-f0-9]'
13
+ LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
13
14
 
14
15
  private
15
16
 
@@ -17,6 +18,7 @@ class JsRegex
17
18
  case subtype
18
19
  when :hex then HEX_EXPANSION
19
20
  when :nonhex then NONHEX_EXPANSION
21
+ when :linebreak then LINEBREAK_EXPANSION
20
22
  when :digit, :nondigit, :word, :nonword, :space, :nonspace
21
23
  pass_through
22
24
  else
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  # frozen_string_literal: true
3
- #
3
+
4
4
  # This hash maps named properties that are available in Ruby's ::Regexp to
5
5
  # standard sets that can be handled by JavaScript.
6
6
  #
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  class JsRegex
4
- VERSION = '1.2.3'
4
+ VERSION = '2.0.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: js_regex
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-04-12 00:00:00.000000000 Z
11
+ date: 2017-09-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: regexp_parser
@@ -16,7 +16,7 @@ dependencies:
16
16
  requirements:
17
17
  - - ">="
18
18
  - !ruby/object:Gem::Version
19
- version: 0.3.6
19
+ version: 0.4.6
20
20
  - - "<="
21
21
  - !ruby/object:Gem::Version
22
22
  version: 0.5.0
@@ -26,7 +26,7 @@ dependencies:
26
26
  requirements:
27
27
  - - ">="
28
28
  - !ruby/object:Gem::Version
29
- version: 0.3.6
29
+ version: 0.4.6
30
30
  - - "<="
31
31
  - !ruby/object:Gem::Version
32
32
  version: 0.5.0
@@ -100,6 +100,34 @@ dependencies:
100
100
  - - "~>"
101
101
  - !ruby/object:Gem::Version
102
102
  version: '0.12'
103
+ - !ruby/object:Gem::Dependency
104
+ name: codeclimate-test-reporter
105
+ requirement: !ruby/object:Gem::Requirement
106
+ requirements:
107
+ - - "~>"
108
+ - !ruby/object:Gem::Version
109
+ version: '1.0'
110
+ type: :development
111
+ prerelease: false
112
+ version_requirements: !ruby/object:Gem::Requirement
113
+ requirements:
114
+ - - "~>"
115
+ - !ruby/object:Gem::Version
116
+ version: '1.0'
117
+ - !ruby/object:Gem::Dependency
118
+ name: mutant-rspec
119
+ requirement: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '0.8'
124
+ type: :development
125
+ prerelease: false
126
+ version_requirements: !ruby/object:Gem::Requirement
127
+ requirements:
128
+ - - "~>"
129
+ - !ruby/object:Gem::Version
130
+ version: '0.8'
103
131
  description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
104
132
  care of various incompatibilities and returning warnings for unsolvable differences.
105
133
  email:
@@ -110,6 +138,7 @@ extra_rdoc_files: []
110
138
  files:
111
139
  - lib/js_regex.rb
112
140
  - lib/js_regex/conversion.rb
141
+ - lib/js_regex/converter.rb
113
142
  - lib/js_regex/converter/anchor_converter.rb
114
143
  - lib/js_regex/converter/assertion_converter.rb
115
144
  - lib/js_regex/converter/backreference_converter.rb
@@ -123,7 +152,7 @@ files:
123
152
  - lib/js_regex/converter/meta_converter.rb
124
153
  - lib/js_regex/converter/nonproperty_converter.rb
125
154
  - lib/js_regex/converter/property_converter.rb
126
- - lib/js_regex/converter/quantifier_converter.rb
155
+ - lib/js_regex/converter/root_converter.rb
127
156
  - lib/js_regex/converter/set_converter.rb
128
157
  - lib/js_regex/converter/type_converter.rb
129
158
  - lib/js_regex/converter/unsupported_token_converter.rb
@@ -149,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
149
178
  version: '0'
150
179
  requirements: []
151
180
  rubyforge_project:
152
- rubygems_version: 2.6.11
181
+ rubygems_version: 2.6.13
153
182
  signing_key:
154
183
  specification_version: 4
155
184
  summary: Converts Ruby regexes to JavaScript regexes.
@@ -1,31 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'base'
4
-
5
- class JsRegex
6
- module Converter
7
- #
8
- # Template class implementation.
9
- #
10
- class QuantifierConverter < JsRegex::Converter::Base
11
- private
12
-
13
- def convert_data
14
- if context.stacked_quantifier?(start_index, end_index)
15
- warn_of_unsupported_feature('adjacent quantifiers')
16
- else
17
- convert_quantifier
18
- end
19
- end
20
-
21
- def convert_quantifier
22
- if data.length > 1 && data.end_with?('+')
23
- warn_of_unsupported_feature('declaration of quantifier as possessive')
24
- data[0..-2]
25
- else
26
- pass_through
27
- end
28
- end
29
- end
30
- end
31
- end