js_regex 1.2.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex.rb +2 -1
- data/lib/js_regex/conversion.rb +20 -72
- data/lib/js_regex/converter.rb +29 -0
- data/lib/js_regex/converter/anchor_converter.rb +9 -2
- data/lib/js_regex/converter/assertion_converter.rb +2 -3
- data/lib/js_regex/converter/backreference_converter.rb +19 -9
- data/lib/js_regex/converter/base.rb +45 -19
- data/lib/js_regex/converter/conditional_converter.rb +5 -8
- data/lib/js_regex/converter/context.rb +54 -77
- data/lib/js_regex/converter/escape_converter.rb +34 -1
- data/lib/js_regex/converter/freespace_converter.rb +1 -1
- data/lib/js_regex/converter/group_converter.rb +27 -41
- data/lib/js_regex/converter/meta_converter.rb +8 -4
- data/lib/js_regex/converter/nonproperty_converter.rb +1 -6
- data/lib/js_regex/converter/root_converter.rb +18 -0
- data/lib/js_regex/converter/set_converter.rb +56 -61
- data/lib/js_regex/converter/type_converter.rb +4 -2
- data/lib/js_regex/property_map.rb +1 -1
- data/lib/js_regex/version.rb +1 -1
- metadata +35 -6
- data/lib/js_regex/converter/quantifier_converter.rb +0 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2abae347b737c7635396c45a3a1489c72d6957e5
|
4
|
+
data.tar.gz: 5b674eaf6902686ab94648000308090fa154783c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3406867e15cb70cbc0a9f137f2449fce1af94fb448bcdbee3cfa6e439f64c92d4df9a5407fee251ea127a59faf8f6cbf81d55b38279d79e1a6de24e5b1c6673a
|
7
|
+
data.tar.gz: d0e63535e25eb8adb3e8d95859a884ce0eaec81ba25f1da8970213f4572fd558b9cbb7e3b25dc59d261213a9a0d7f9e0e7bb29b4f14cffba5fa9df49b0ec4780
|
data/lib/js_regex.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
|
3
3
|
# JsRegex converts ::Regexp instances to JavaScript.
|
4
4
|
#
|
5
5
|
# Usage:
|
@@ -10,6 +10,7 @@
|
|
10
10
|
#
|
11
11
|
class JsRegex
|
12
12
|
require_relative File.join('js_regex', 'conversion')
|
13
|
+
require_relative File.join('js_regex', 'version')
|
13
14
|
require 'json'
|
14
15
|
|
15
16
|
attr_reader :source, :options, :warnings
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -2,88 +2,36 @@
|
|
2
2
|
|
3
3
|
class JsRegex
|
4
4
|
#
|
5
|
-
# This class acts as a facade,
|
6
|
-
# passing Regexp::Scanner tokens to them, reusing Converters as needed.
|
5
|
+
# This class acts as a facade, passing a regex to the converters.
|
7
6
|
#
|
8
7
|
# ::of returns a source String, options String, and warnings Array.
|
9
8
|
#
|
10
9
|
class Conversion
|
11
10
|
require 'regexp_parser'
|
12
|
-
|
11
|
+
require_relative 'converter'
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
self.context = Converter::Context.new
|
20
|
-
self.converters = {}
|
21
|
-
|
22
|
-
self.source = ''.dup
|
23
|
-
self.options = ''.dup
|
24
|
-
self.warnings = []
|
25
|
-
|
26
|
-
convert_source
|
27
|
-
convert_options
|
28
|
-
perform_sanity_check
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.of(ruby_regex)
|
32
|
-
conversion = new(ruby_regex)
|
33
|
-
[conversion.source, conversion.options, conversion.warnings]
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
|
39
|
-
|
40
|
-
CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
|
41
|
-
anchor: Converter::AnchorConverter,
|
42
|
-
assertion: Converter::AssertionConverter,
|
43
|
-
backref: Converter::BackreferenceConverter,
|
44
|
-
conditional: Converter::ConditionalConverter,
|
45
|
-
escape: Converter::EscapeConverter,
|
46
|
-
free_space: Converter::FreespaceConverter,
|
47
|
-
group: Converter::GroupConverter,
|
48
|
-
literal: Converter::LiteralConverter,
|
49
|
-
meta: Converter::MetaConverter,
|
50
|
-
nonproperty: Converter::NonpropertyConverter,
|
51
|
-
property: Converter::PropertyConverter,
|
52
|
-
quantifier: Converter::QuantifierConverter,
|
53
|
-
set: Converter::SetConverter,
|
54
|
-
subset: Converter::SetConverter,
|
55
|
-
type: Converter::TypeConverter
|
56
|
-
).freeze
|
57
|
-
|
58
|
-
def convert_source
|
59
|
-
Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
|
60
|
-
# There might be a lot of tokens, so don't wrap their data in objects.
|
61
|
-
# Even just wrapping them in simple structs or attr_reader objects
|
62
|
-
# can lead to 60%+ longer processing times for large regexes.
|
63
|
-
converter_for_token_class(token_class)
|
64
|
-
.convert(token_class, subtype, data, s, e)
|
13
|
+
class << self
|
14
|
+
def of(ruby_regex)
|
15
|
+
source, warnings = convert_source(ruby_regex)
|
16
|
+
options = convert_options(ruby_regex)
|
17
|
+
[source, options, warnings]
|
65
18
|
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def converter_for_token_class(token_class)
|
69
|
-
converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
|
70
|
-
end
|
71
19
|
|
72
|
-
|
73
|
-
options << 'g' # all Ruby regexes are what is called "global" in JS
|
74
|
-
options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
75
|
-
end
|
20
|
+
private
|
76
21
|
|
77
|
-
|
22
|
+
def convert_source(ruby_regex)
|
23
|
+
context = Converter::Context.new(ruby_regex)
|
24
|
+
expression_tree = Regexp::Parser.parse(ruby_regex)
|
25
|
+
[
|
26
|
+
Converter::RootConverter.new.convert(expression_tree, context),
|
27
|
+
context.warnings
|
28
|
+
]
|
29
|
+
end
|
78
30
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
|
84
|
-
rescue ArgumentError, RegexpError, SyntaxError => e
|
85
|
-
self.source = ''
|
86
|
-
warnings << e.message
|
31
|
+
def convert_options(ruby_regex)
|
32
|
+
ignore_case = (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
33
|
+
ignore_case ? 'gi' : 'g'
|
34
|
+
end
|
87
35
|
end
|
88
36
|
end
|
89
37
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
module Converter
|
5
|
+
Dir[File.join(File.dirname(__FILE__), 'converter', '*.rb')].each do |file|
|
6
|
+
require file
|
7
|
+
end
|
8
|
+
|
9
|
+
MAP = Hash.new(UnsupportedTokenConverter).merge(
|
10
|
+
anchor: AnchorConverter,
|
11
|
+
assertion: AssertionConverter,
|
12
|
+
backref: BackreferenceConverter,
|
13
|
+
conditional: ConditionalConverter,
|
14
|
+
escape: EscapeConverter,
|
15
|
+
free_space: FreespaceConverter,
|
16
|
+
group: GroupConverter,
|
17
|
+
literal: LiteralConverter,
|
18
|
+
meta: MetaConverter,
|
19
|
+
nonproperty: NonpropertyConverter,
|
20
|
+
property: PropertyConverter,
|
21
|
+
set: SetConverter,
|
22
|
+
type: TypeConverter
|
23
|
+
).freeze
|
24
|
+
|
25
|
+
def self.for(expression)
|
26
|
+
MAP[expression.type].new
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -15,12 +15,19 @@ class JsRegex
|
|
15
15
|
when :bol, :bos then '^'
|
16
16
|
when :eol, :eos then '$'
|
17
17
|
when :eos_ob_eol then '(?=\n?$)'
|
18
|
-
when :word_boundary then '\b'
|
19
|
-
when :nonword_boundary then '\B'
|
18
|
+
when :word_boundary then pass_boundary_with_warning('\b')
|
19
|
+
when :nonword_boundary then pass_boundary_with_warning('\B')
|
20
20
|
else
|
21
21
|
warn_of_unsupported_feature
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
25
|
+
def pass_boundary_with_warning(boundary)
|
26
|
+
warn("The boundary '#{boundary}' at index #{expression.ts} "\
|
27
|
+
'is not unicode-aware in JavaScript, '\
|
28
|
+
'so it might act differently than in Ruby.')
|
29
|
+
boundary
|
30
|
+
end
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -16,12 +16,11 @@ class JsRegex
|
|
16
16
|
def convert_data
|
17
17
|
case subtype
|
18
18
|
when :lookahead, :nlookahead
|
19
|
-
|
19
|
+
build_group(capturing: false)
|
20
20
|
when :nlookbehind
|
21
|
-
context.start_negative_lookbehind
|
22
21
|
warn_of_unsupported_feature('negative lookbehind assertion')
|
23
22
|
else # :lookbehind, ...
|
24
|
-
|
23
|
+
build_unsupported_group
|
25
24
|
end
|
26
25
|
end
|
27
26
|
end
|
@@ -12,20 +12,30 @@ class JsRegex
|
|
12
12
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
|
-
when :number
|
16
|
-
|
15
|
+
when :number, :number_ref
|
16
|
+
convert_number_ref
|
17
|
+
when :number_rel_ref
|
18
|
+
convert_number_rel_ref
|
19
|
+
when :name_ref
|
20
|
+
convert_name_ref
|
17
21
|
else
|
18
22
|
warn_of_unsupported_feature
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
def convert_number_ref
|
27
|
+
# after regexp_parser update, replace data[/\d+/] with expression.number
|
28
|
+
"\\#{context.new_capturing_group_position(Integer(data[/\d+/]))}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_number_rel_ref
|
32
|
+
absolute_position = Integer(expression.number) +
|
33
|
+
context.original_capturing_group_count + 1
|
34
|
+
"\\#{context.new_capturing_group_position(absolute_position)}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def convert_name_ref
|
38
|
+
"\\#{context.named_group_positions.fetch(expression.name)}"
|
29
39
|
end
|
30
40
|
end
|
31
41
|
end
|
@@ -6,35 +6,61 @@ class JsRegex
|
|
6
6
|
# Template class. Implement #convert_data in subclasses.
|
7
7
|
#
|
8
8
|
class Base
|
9
|
-
|
9
|
+
def convert(expression, context)
|
10
|
+
self.context = context
|
11
|
+
self.expression = expression
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
@context = context
|
13
|
+
source = convert_data
|
14
|
+
apply_quantifier(source)
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
self.data = data
|
20
|
-
self.start_index = start_index
|
21
|
-
self.end_index = end_index
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_accessor :context, :expression
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
def subtype
|
22
|
+
expression.token
|
25
23
|
end
|
26
24
|
|
27
|
-
|
25
|
+
def data
|
26
|
+
expression.text
|
27
|
+
end
|
28
|
+
alias pass_through data
|
28
29
|
|
29
|
-
|
30
|
+
def apply_quantifier(source)
|
31
|
+
return source if source.empty? || !(quantifier = expression.quantifier)
|
30
32
|
|
31
|
-
|
33
|
+
if quantifier.mode.equal?(:possessive)
|
34
|
+
context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
|
35
|
+
else
|
36
|
+
source + quantifier
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def convert_subexpressions
|
41
|
+
convert_expressions(subexpressions)
|
42
|
+
end
|
43
|
+
|
44
|
+
def convert_expressions(expressions)
|
45
|
+
expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
|
46
|
+
end
|
47
|
+
|
48
|
+
def subexpressions
|
49
|
+
expression.expressions
|
50
|
+
end
|
32
51
|
|
33
52
|
def warn_of_unsupported_feature(description = nil)
|
34
|
-
description ||= "#{subtype} #{
|
35
|
-
|
36
|
-
|
37
|
-
|
53
|
+
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
54
|
+
full_desc = "#{description} '#{expression}'"
|
55
|
+
warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
|
56
|
+
''
|
57
|
+
end
|
58
|
+
|
59
|
+
def warn(text)
|
60
|
+
context.warnings << text
|
61
|
+
end
|
62
|
+
|
63
|
+
def drop_without_warning
|
38
64
|
''
|
39
65
|
end
|
40
66
|
end
|
@@ -11,15 +11,12 @@ class JsRegex
|
|
11
11
|
private
|
12
12
|
|
13
13
|
def convert_data
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
when :separator, :close
|
19
|
-
pass_through
|
20
|
-
else
|
21
|
-
'' # one warning is enough, don't warn about other parts
|
14
|
+
warn_of_unsupported_feature('conditional')
|
15
|
+
branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
|
16
|
+
converted_branch = convert_expressions(branch)
|
17
|
+
arr << converted_branch unless converted_branch.eql?('')
|
22
18
|
end
|
19
|
+
"(?:#{branches.join('|')})"
|
23
20
|
end
|
24
21
|
end
|
25
22
|
end
|
@@ -10,120 +10,97 @@ class JsRegex
|
|
10
10
|
class Context
|
11
11
|
attr_reader :buffered_set_extractions,
|
12
12
|
:buffered_set_members,
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
13
|
+
:in_atomic_group,
|
14
|
+
:named_group_positions,
|
15
|
+
:negative_base_set,
|
16
|
+
:root_options,
|
17
|
+
:warnings
|
17
18
|
|
18
|
-
def initialize
|
19
|
-
self.
|
20
|
-
self.
|
21
|
-
self.
|
22
|
-
self.
|
23
|
-
self.negative_set_levels = []
|
24
|
-
self.set_level = 0
|
25
|
-
end
|
26
|
-
|
27
|
-
def valid?
|
28
|
-
!negative_lookbehind
|
29
|
-
end
|
30
|
-
|
31
|
-
def stacked_quantifier?(quantifier_start_index, quantifier_end_index)
|
32
|
-
is_stacked = last_quantifier_end_index.equal?(quantifier_start_index)
|
33
|
-
self.last_quantifier_end_index = quantifier_end_index
|
34
|
-
is_stacked
|
35
|
-
end
|
36
|
-
|
37
|
-
# set context
|
38
|
-
|
39
|
-
def open_set
|
40
|
-
self.set_level = set_level + 1
|
41
|
-
if set_level == 1
|
42
|
-
buffered_set_members.clear
|
43
|
-
buffered_set_extractions.clear
|
44
|
-
end
|
45
|
-
negative_set_levels.delete(set_level)
|
46
|
-
end
|
19
|
+
def initialize(ruby_regex)
|
20
|
+
self.added_capturing_groups_after_group = Hash.new(0)
|
21
|
+
self.capturing_group_count = 0
|
22
|
+
self.named_group_positions = {}
|
23
|
+
self.warnings = []
|
47
24
|
|
48
|
-
|
49
|
-
|
25
|
+
self.root_options = {}
|
26
|
+
root_options[:m] = !(ruby_regex.options & Regexp::MULTILINE).equal?(0)
|
50
27
|
end
|
51
28
|
|
52
|
-
|
53
|
-
self.set_level = set_level - 1
|
54
|
-
end
|
29
|
+
# option context
|
55
30
|
|
56
|
-
def
|
57
|
-
|
31
|
+
def multiline?
|
32
|
+
root_options.fetch(:m)
|
58
33
|
end
|
59
34
|
|
60
|
-
|
61
|
-
negative_set_levels.include?(level)
|
62
|
-
end
|
35
|
+
# set context
|
63
36
|
|
64
|
-
def
|
65
|
-
|
37
|
+
def negate_base_set
|
38
|
+
self.negative_base_set = true
|
66
39
|
end
|
67
40
|
|
68
|
-
def
|
69
|
-
|
41
|
+
def reset_set_context
|
42
|
+
self.buffered_set_extractions = []
|
43
|
+
self.buffered_set_members = []
|
44
|
+
self.negative_base_set = false
|
70
45
|
end
|
71
46
|
|
72
47
|
# group context
|
73
48
|
|
74
|
-
def open_group
|
75
|
-
self.group_level = group_level + 1
|
76
|
-
end
|
77
|
-
|
78
49
|
def capture_group
|
79
|
-
self.
|
50
|
+
self.capturing_group_count = capturing_group_count + 1
|
80
51
|
end
|
81
52
|
|
82
53
|
def start_atomic_group
|
83
|
-
self.
|
54
|
+
self.in_atomic_group = true
|
84
55
|
end
|
85
56
|
|
86
|
-
def
|
87
|
-
self.
|
57
|
+
def end_atomic_group
|
58
|
+
self.in_atomic_group = false
|
88
59
|
end
|
89
60
|
|
90
|
-
def
|
91
|
-
|
61
|
+
def wrap_in_backrefed_lookahead(content)
|
62
|
+
new_backref_num = capturing_group_count + 1
|
63
|
+
# an empty passive group (?:) is appended as literal digits may follow
|
64
|
+
result = "(?=(#{content}))\\#{new_backref_num}(?:)"
|
65
|
+
added_capturing_groups_after_group[original_capturing_group_count] += 1
|
66
|
+
capture_group
|
67
|
+
result
|
92
68
|
end
|
93
69
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
70
|
+
# takes and returns 1-indexed group positions.
|
71
|
+
# new is different from old if capturing groups were added in between.
|
72
|
+
def new_capturing_group_position(old_position)
|
73
|
+
increment = 0
|
74
|
+
added_capturing_groups_after_group.each do |after_n_groups, count|
|
75
|
+
increment += count if after_n_groups < old_position
|
76
|
+
end
|
77
|
+
old_position + increment
|
98
78
|
end
|
99
79
|
|
100
|
-
def
|
101
|
-
|
102
|
-
self.negative_lookbehind = false
|
80
|
+
def original_capturing_group_count
|
81
|
+
capturing_group_count - total_added_capturing_groups
|
103
82
|
end
|
104
83
|
|
105
|
-
def
|
106
|
-
|
84
|
+
def total_added_capturing_groups
|
85
|
+
added_capturing_groups_after_group.values.inject(0, &:+)
|
107
86
|
end
|
108
87
|
|
109
|
-
def
|
110
|
-
|
111
|
-
group_level.equal?(group_level_for_backreference + 1)
|
88
|
+
def store_named_group_position(name)
|
89
|
+
named_group_positions[name] = capturing_group_count + 1
|
112
90
|
end
|
113
91
|
|
114
92
|
private
|
115
93
|
|
116
|
-
attr_accessor :
|
117
|
-
:
|
118
|
-
:negative_set_levels,
|
119
|
-
:set_level
|
94
|
+
attr_accessor :added_capturing_groups_after_group,
|
95
|
+
:capturing_group_count
|
120
96
|
|
121
97
|
attr_writer :buffered_set_extractions,
|
122
98
|
:buffered_set_members,
|
123
|
-
:
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
99
|
+
:in_atomic_group,
|
100
|
+
:named_group_positions,
|
101
|
+
:negative_base_set,
|
102
|
+
:root_options,
|
103
|
+
:warnings
|
127
104
|
end
|
128
105
|
end
|
129
106
|
end
|
@@ -39,12 +39,16 @@ class JsRegex
|
|
39
39
|
case subtype
|
40
40
|
when :codepoint_list
|
41
41
|
convert_codepoint_list
|
42
|
+
when :control
|
43
|
+
convert_control_sequence
|
42
44
|
when :literal
|
43
45
|
LiteralConverter.convert_data(data)
|
46
|
+
when :meta_sequence
|
47
|
+
convert_meta_sequence
|
44
48
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
45
49
|
pass_through
|
46
50
|
else
|
47
|
-
# Bell, Escape, HexWide,
|
51
|
+
# Bell, Escape, HexWide, ...
|
48
52
|
warn_of_unsupported_feature
|
49
53
|
end
|
50
54
|
end
|
@@ -56,6 +60,35 @@ class JsRegex
|
|
56
60
|
end
|
57
61
|
elements.join
|
58
62
|
end
|
63
|
+
|
64
|
+
def convert_control_sequence
|
65
|
+
convert_meta_control_sequence ||
|
66
|
+
unicode_escape_for(control_sequence_to_s(data))
|
67
|
+
end
|
68
|
+
|
69
|
+
def convert_meta_sequence
|
70
|
+
convert_meta_control_sequence ||
|
71
|
+
unicode_escape_for(meta_char_to_char_code(data[-1]))
|
72
|
+
end
|
73
|
+
|
74
|
+
def convert_meta_control_sequence
|
75
|
+
return unless expression.class.to_s.include?('MetaControl')
|
76
|
+
unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
|
77
|
+
end
|
78
|
+
|
79
|
+
def unicode_escape_for(char)
|
80
|
+
"\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
|
81
|
+
end
|
82
|
+
|
83
|
+
def control_sequence_to_s(control_sequence)
|
84
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
85
|
+
["000#{five_lsb}"].pack('B*')
|
86
|
+
end
|
87
|
+
|
88
|
+
def meta_char_to_char_code(meta_char)
|
89
|
+
byte_value = meta_char.ord
|
90
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
91
|
+
end
|
59
92
|
end
|
60
93
|
end
|
61
94
|
end
|
@@ -12,66 +12,52 @@ class JsRegex
|
|
12
12
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
|
-
when :atomic then
|
16
|
-
when :capture then
|
17
|
-
when :
|
18
|
-
when :
|
19
|
-
when :
|
20
|
-
when :
|
21
|
-
when :
|
22
|
-
else
|
15
|
+
when :atomic then emulate_atomic_group
|
16
|
+
when :capture then build_group
|
17
|
+
when :comment then drop_without_warning
|
18
|
+
when :named then build_named_group
|
19
|
+
when :options then build_options_group
|
20
|
+
when :passive then build_passive_group
|
21
|
+
when :absence then warn_of_unsupported_feature
|
22
|
+
else build_unsupported_group
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
# regex-emulate-atomic-grouping-with-lookahead
|
30
|
-
if context.atomic_group?
|
31
|
-
open_unsupported_group('nested atomic group')
|
26
|
+
def emulate_atomic_group
|
27
|
+
if context.in_atomic_group
|
28
|
+
build_unsupported_group('nested atomic group')
|
32
29
|
else
|
33
30
|
context.start_atomic_group
|
34
|
-
|
31
|
+
result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
|
32
|
+
context.end_atomic_group
|
33
|
+
result
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
38
|
-
def
|
39
|
-
# drop name without warning
|
40
|
-
|
37
|
+
def build_named_group
|
38
|
+
# remember position, then drop name part without warning
|
39
|
+
context.store_named_group_position(expression.name)
|
40
|
+
build_group(head: '(')
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
43
|
+
def build_options_group
|
44
44
|
warn_of_unsupported_feature('group-specific options')
|
45
|
-
|
45
|
+
build_group(head: '(')
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
49
|
-
|
48
|
+
def build_passive_group
|
49
|
+
build_group(head: '(?:', capturing: false)
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def build_unsupported_group(description = nil)
|
53
53
|
warn_of_unsupported_feature(description)
|
54
|
-
|
54
|
+
build_passive_group
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
context.open_group
|
57
|
+
def build_group(opts = {})
|
59
58
|
context.capture_group unless opts[:capturing].equal?(false)
|
60
|
-
opts[:head] || pass_through
|
61
|
-
|
62
|
-
|
63
|
-
def close_group
|
64
|
-
if context.negative_lookbehind
|
65
|
-
context.close_negative_lookbehind
|
66
|
-
''
|
67
|
-
elsif context.base_level_of_atomic_group?
|
68
|
-
context.close_atomic_group
|
69
|
-
# an empty passive group (?:) is appended as literal digits may follow
|
70
|
-
"))\\#{context.captured_group_count}(?:)"
|
71
|
-
else
|
72
|
-
context.close_group
|
73
|
-
')'
|
74
|
-
end
|
59
|
+
head = opts[:head] || pass_through
|
60
|
+
"#{head}#{convert_subexpressions})"
|
75
61
|
end
|
76
62
|
end
|
77
63
|
end
|
@@ -13,16 +13,20 @@ class JsRegex
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
15
|
when :alternation
|
16
|
-
|
16
|
+
convert_alternation
|
17
17
|
when :dot
|
18
|
-
|
18
|
+
context.multiline? ? '(?:.|\n)' : '.'
|
19
19
|
else
|
20
20
|
warn_of_unsupported_feature
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
(
|
24
|
+
def convert_alternation
|
25
|
+
branches = subexpressions.each_with_object([]) do |branch, arr|
|
26
|
+
converted_branch = convert_expressions(branch.expressions)
|
27
|
+
arr << converted_branch unless converted_branch.eql?('')
|
28
|
+
end
|
29
|
+
branches.join('|')
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
class JsRegex
|
6
|
+
module Converter
|
7
|
+
#
|
8
|
+
# Template class implementation.
|
9
|
+
#
|
10
|
+
class RootConverter < JsRegex::Converter::Base
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
convert_subexpressions
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -3,107 +3,102 @@
|
|
3
3
|
require_relative 'base'
|
4
4
|
require_relative 'literal_converter'
|
5
5
|
require_relative 'property_converter'
|
6
|
-
require_relative 'type_converter'
|
7
6
|
|
8
7
|
class JsRegex
|
9
8
|
module Converter
|
10
9
|
#
|
11
10
|
# Template class implementation.
|
12
11
|
#
|
13
|
-
# This converter works a little differently from the others.
|
14
|
-
#
|
15
|
-
# It buffers anything that it finds within a set in the Context's
|
16
|
-
# #buffered_set_members and #buffered_set_extractions Arrays,
|
17
|
-
# returning an empty String for all passed tokens, and only when
|
18
|
-
# the set is closed does it compile and return the final String.
|
19
|
-
#
|
20
12
|
class SetConverter < JsRegex::Converter::Base
|
21
13
|
private
|
22
14
|
|
23
15
|
def convert_data
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
when :intersection then warn_of_unsupported_feature('set intersection')
|
34
|
-
else try_replacing_potential_property_subtype
|
16
|
+
if expression.set_level.equal?(0) # reached end of set expression
|
17
|
+
context.reset_set_context
|
18
|
+
context.negate_base_set if negative_set?
|
19
|
+
process_members
|
20
|
+
finalize_set
|
21
|
+
elsif negative_set?
|
22
|
+
warn_of_unsupported_feature('nested negative set data')
|
23
|
+
else # positive subset
|
24
|
+
process_members
|
35
25
|
end
|
36
26
|
end
|
37
27
|
|
38
|
-
def
|
39
|
-
|
40
|
-
''
|
28
|
+
def negative_set?
|
29
|
+
expression.negative?
|
41
30
|
end
|
42
31
|
|
43
|
-
def
|
44
|
-
|
45
|
-
warn_of_unsupported_feature('nested negative set data')
|
46
|
-
end
|
47
|
-
context.negate_set
|
48
|
-
''
|
32
|
+
def process_members
|
33
|
+
expression.each { |member| process_member(member) }
|
49
34
|
end
|
50
35
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
36
|
+
ASTRAL_PLANE_PATTERN = /[\u{10000}-\u{FFFFF}]/
|
37
|
+
PROPERTY_PATTERN = /\A(?:\[:|\\([pP])\{)(\^?)([^:\}]+)/
|
38
|
+
|
39
|
+
def process_member(member)
|
40
|
+
return convert_subset(member) unless member.instance_of?(String)
|
55
41
|
|
56
|
-
|
57
|
-
utf8_data
|
58
|
-
|
42
|
+
utf8_data = member.dup.force_encoding('UTF-8')
|
43
|
+
case utf8_data
|
44
|
+
when ASTRAL_PLANE_PATTERN
|
59
45
|
warn_of_unsupported_feature('astral plane set member')
|
46
|
+
when '\\h'
|
47
|
+
handle_hex_type
|
48
|
+
when '\\H'
|
49
|
+
handle_nonhex_type
|
50
|
+
when '&&'
|
51
|
+
warn_of_unsupported_feature('set intersection')
|
52
|
+
when PROPERTY_PATTERN
|
53
|
+
handle_property($1, $2, $3)
|
60
54
|
else
|
61
55
|
literal_conversion = LiteralConverter.convert_data(utf8_data)
|
62
56
|
buffer_set_member(literal_conversion)
|
63
57
|
end
|
64
58
|
end
|
65
59
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
60
|
+
HEX_RANGES = 'A-Fa-f0-9'
|
61
|
+
NONHEX_SET = '[^A-Fa-f0-9]'
|
62
|
+
|
63
|
+
def handle_hex_type
|
64
|
+
buffer_set_member(HEX_RANGES)
|
70
65
|
end
|
71
66
|
|
72
|
-
def
|
73
|
-
|
74
|
-
|
67
|
+
def handle_nonhex_type
|
68
|
+
if context.negative_base_set
|
69
|
+
warn_of_unsupported_feature('nonhex type in negative set')
|
70
|
+
else
|
71
|
+
buffer_set_extraction(NONHEX_SET)
|
72
|
+
end
|
75
73
|
end
|
76
74
|
|
77
|
-
def
|
78
|
-
if
|
75
|
+
def handle_property(sign, caret, name)
|
76
|
+
if context.negative_base_set
|
77
|
+
return warn_of_unsupported_feature('property in negative set')
|
78
|
+
end
|
79
|
+
std = standardize_property_name(name)
|
80
|
+
negated = sign.eql?('P') ^ caret.eql?('^')
|
81
|
+
if (replacement = PropertyConverter.property_replacement(std, negated))
|
79
82
|
buffer_set_extraction(replacement)
|
80
83
|
else
|
81
84
|
warn_of_unsupported_feature('property')
|
82
85
|
end
|
83
86
|
end
|
84
87
|
|
85
|
-
def
|
86
|
-
|
87
|
-
buffer_set_extraction(TypeConverter::HEX_EXPANSION)
|
88
|
-
elsif subtype.equal?(:type_nonhex)
|
89
|
-
buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
|
90
|
-
else
|
91
|
-
buffer_set_member(data)
|
92
|
-
end
|
88
|
+
def standardize_property_name(name)
|
89
|
+
Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
|
93
90
|
end
|
94
91
|
|
95
|
-
def
|
96
|
-
|
92
|
+
def buffer_set_member(data)
|
93
|
+
context.buffered_set_members << data
|
97
94
|
end
|
98
95
|
|
99
|
-
def
|
100
|
-
context.
|
101
|
-
''
|
96
|
+
def buffer_set_extraction(data)
|
97
|
+
context.buffered_set_extractions << data
|
102
98
|
end
|
103
99
|
|
104
|
-
def
|
105
|
-
|
106
|
-
''
|
100
|
+
def convert_subset(subset)
|
101
|
+
SetConverter.new.convert(subset, context)
|
107
102
|
end
|
108
103
|
|
109
104
|
def finalize_set
|
@@ -125,7 +120,7 @@ class JsRegex
|
|
125
120
|
end
|
126
121
|
|
127
122
|
def finalize_nondepleted_set(buffered_members, buffered_extractions)
|
128
|
-
set = "[#{'^' if
|
123
|
+
set = "[#{'^' if negative_set?}#{buffered_members.join}]"
|
129
124
|
if buffered_extractions.empty?
|
130
125
|
set
|
131
126
|
else
|
@@ -8,8 +8,9 @@ class JsRegex
|
|
8
8
|
# Template class implementation.
|
9
9
|
#
|
10
10
|
class TypeConverter < JsRegex::Converter::Base
|
11
|
-
HEX_EXPANSION
|
12
|
-
NONHEX_EXPANSION
|
11
|
+
HEX_EXPANSION = '[A-Fa-f0-9]'
|
12
|
+
NONHEX_EXPANSION = '[^A-Fa-f0-9]'
|
13
|
+
LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
|
13
14
|
|
14
15
|
private
|
15
16
|
|
@@ -17,6 +18,7 @@ class JsRegex
|
|
17
18
|
case subtype
|
18
19
|
when :hex then HEX_EXPANSION
|
19
20
|
when :nonhex then NONHEX_EXPANSION
|
21
|
+
when :linebreak then LINEBREAK_EXPANSION
|
20
22
|
when :digit, :nondigit, :word, :nonword, :space, :nonspace
|
21
23
|
pass_through
|
22
24
|
else
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: regexp_parser
|
@@ -16,7 +16,7 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.4.6
|
20
20
|
- - "<="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 0.5.0
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.
|
29
|
+
version: 0.4.6
|
30
30
|
- - "<="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 0.5.0
|
@@ -100,6 +100,34 @@ dependencies:
|
|
100
100
|
- - "~>"
|
101
101
|
- !ruby/object:Gem::Version
|
102
102
|
version: '0.12'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: codeclimate-test-reporter
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.0'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: mutant-rspec
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0.8'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0.8'
|
103
131
|
description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
|
104
132
|
care of various incompatibilities and returning warnings for unsolvable differences.
|
105
133
|
email:
|
@@ -110,6 +138,7 @@ extra_rdoc_files: []
|
|
110
138
|
files:
|
111
139
|
- lib/js_regex.rb
|
112
140
|
- lib/js_regex/conversion.rb
|
141
|
+
- lib/js_regex/converter.rb
|
113
142
|
- lib/js_regex/converter/anchor_converter.rb
|
114
143
|
- lib/js_regex/converter/assertion_converter.rb
|
115
144
|
- lib/js_regex/converter/backreference_converter.rb
|
@@ -123,7 +152,7 @@ files:
|
|
123
152
|
- lib/js_regex/converter/meta_converter.rb
|
124
153
|
- lib/js_regex/converter/nonproperty_converter.rb
|
125
154
|
- lib/js_regex/converter/property_converter.rb
|
126
|
-
- lib/js_regex/converter/
|
155
|
+
- lib/js_regex/converter/root_converter.rb
|
127
156
|
- lib/js_regex/converter/set_converter.rb
|
128
157
|
- lib/js_regex/converter/type_converter.rb
|
129
158
|
- lib/js_regex/converter/unsupported_token_converter.rb
|
@@ -149,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
178
|
version: '0'
|
150
179
|
requirements: []
|
151
180
|
rubyforge_project:
|
152
|
-
rubygems_version: 2.6.
|
181
|
+
rubygems_version: 2.6.13
|
153
182
|
signing_key:
|
154
183
|
specification_version: 4
|
155
184
|
summary: Converts Ruby regexes to JavaScript regexes.
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
|
5
|
-
class JsRegex
|
6
|
-
module Converter
|
7
|
-
#
|
8
|
-
# Template class implementation.
|
9
|
-
#
|
10
|
-
class QuantifierConverter < JsRegex::Converter::Base
|
11
|
-
private
|
12
|
-
|
13
|
-
def convert_data
|
14
|
-
if context.stacked_quantifier?(start_index, end_index)
|
15
|
-
warn_of_unsupported_feature('adjacent quantifiers')
|
16
|
-
else
|
17
|
-
convert_quantifier
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def convert_quantifier
|
22
|
-
if data.length > 1 && data.end_with?('+')
|
23
|
-
warn_of_unsupported_feature('declaration of quantifier as possessive')
|
24
|
-
data[0..-2]
|
25
|
-
else
|
26
|
-
pass_through
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|