js_regex 1.2.3 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex.rb +2 -1
- data/lib/js_regex/conversion.rb +20 -72
- data/lib/js_regex/converter.rb +29 -0
- data/lib/js_regex/converter/anchor_converter.rb +9 -2
- data/lib/js_regex/converter/assertion_converter.rb +2 -3
- data/lib/js_regex/converter/backreference_converter.rb +19 -9
- data/lib/js_regex/converter/base.rb +45 -19
- data/lib/js_regex/converter/conditional_converter.rb +5 -8
- data/lib/js_regex/converter/context.rb +54 -77
- data/lib/js_regex/converter/escape_converter.rb +34 -1
- data/lib/js_regex/converter/freespace_converter.rb +1 -1
- data/lib/js_regex/converter/group_converter.rb +27 -41
- data/lib/js_regex/converter/meta_converter.rb +8 -4
- data/lib/js_regex/converter/nonproperty_converter.rb +1 -6
- data/lib/js_regex/converter/root_converter.rb +18 -0
- data/lib/js_regex/converter/set_converter.rb +56 -61
- data/lib/js_regex/converter/type_converter.rb +4 -2
- data/lib/js_regex/property_map.rb +1 -1
- data/lib/js_regex/version.rb +1 -1
- metadata +35 -6
- data/lib/js_regex/converter/quantifier_converter.rb +0 -31
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2abae347b737c7635396c45a3a1489c72d6957e5
|
4
|
+
data.tar.gz: 5b674eaf6902686ab94648000308090fa154783c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3406867e15cb70cbc0a9f137f2449fce1af94fb448bcdbee3cfa6e439f64c92d4df9a5407fee251ea127a59faf8f6cbf81d55b38279d79e1a6de24e5b1c6673a
|
7
|
+
data.tar.gz: d0e63535e25eb8adb3e8d95859a884ce0eaec81ba25f1da8970213f4572fd558b9cbb7e3b25dc59d261213a9a0d7f9e0e7bb29b4f14cffba5fa9df49b0ec4780
|
data/lib/js_regex.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
2
|
+
|
3
3
|
# JsRegex converts ::Regexp instances to JavaScript.
|
4
4
|
#
|
5
5
|
# Usage:
|
@@ -10,6 +10,7 @@
|
|
10
10
|
#
|
11
11
|
class JsRegex
|
12
12
|
require_relative File.join('js_regex', 'conversion')
|
13
|
+
require_relative File.join('js_regex', 'version')
|
13
14
|
require 'json'
|
14
15
|
|
15
16
|
attr_reader :source, :options, :warnings
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -2,88 +2,36 @@
|
|
2
2
|
|
3
3
|
class JsRegex
|
4
4
|
#
|
5
|
-
# This class acts as a facade,
|
6
|
-
# passing Regexp::Scanner tokens to them, reusing Converters as needed.
|
5
|
+
# This class acts as a facade, passing a regex to the converters.
|
7
6
|
#
|
8
7
|
# ::of returns a source String, options String, and warnings Array.
|
9
8
|
#
|
10
9
|
class Conversion
|
11
10
|
require 'regexp_parser'
|
12
|
-
|
11
|
+
require_relative 'converter'
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
self.context = Converter::Context.new
|
20
|
-
self.converters = {}
|
21
|
-
|
22
|
-
self.source = ''.dup
|
23
|
-
self.options = ''.dup
|
24
|
-
self.warnings = []
|
25
|
-
|
26
|
-
convert_source
|
27
|
-
convert_options
|
28
|
-
perform_sanity_check
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.of(ruby_regex)
|
32
|
-
conversion = new(ruby_regex)
|
33
|
-
[conversion.source, conversion.options, conversion.warnings]
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
|
39
|
-
|
40
|
-
CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
|
41
|
-
anchor: Converter::AnchorConverter,
|
42
|
-
assertion: Converter::AssertionConverter,
|
43
|
-
backref: Converter::BackreferenceConverter,
|
44
|
-
conditional: Converter::ConditionalConverter,
|
45
|
-
escape: Converter::EscapeConverter,
|
46
|
-
free_space: Converter::FreespaceConverter,
|
47
|
-
group: Converter::GroupConverter,
|
48
|
-
literal: Converter::LiteralConverter,
|
49
|
-
meta: Converter::MetaConverter,
|
50
|
-
nonproperty: Converter::NonpropertyConverter,
|
51
|
-
property: Converter::PropertyConverter,
|
52
|
-
quantifier: Converter::QuantifierConverter,
|
53
|
-
set: Converter::SetConverter,
|
54
|
-
subset: Converter::SetConverter,
|
55
|
-
type: Converter::TypeConverter
|
56
|
-
).freeze
|
57
|
-
|
58
|
-
def convert_source
|
59
|
-
Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
|
60
|
-
# There might be a lot of tokens, so don't wrap their data in objects.
|
61
|
-
# Even just wrapping them in simple structs or attr_reader objects
|
62
|
-
# can lead to 60%+ longer processing times for large regexes.
|
63
|
-
converter_for_token_class(token_class)
|
64
|
-
.convert(token_class, subtype, data, s, e)
|
13
|
+
class << self
|
14
|
+
def of(ruby_regex)
|
15
|
+
source, warnings = convert_source(ruby_regex)
|
16
|
+
options = convert_options(ruby_regex)
|
17
|
+
[source, options, warnings]
|
65
18
|
end
|
66
|
-
end
|
67
|
-
|
68
|
-
def converter_for_token_class(token_class)
|
69
|
-
converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
|
70
|
-
end
|
71
19
|
|
72
|
-
|
73
|
-
options << 'g' # all Ruby regexes are what is called "global" in JS
|
74
|
-
options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
75
|
-
end
|
20
|
+
private
|
76
21
|
|
77
|
-
|
22
|
+
def convert_source(ruby_regex)
|
23
|
+
context = Converter::Context.new(ruby_regex)
|
24
|
+
expression_tree = Regexp::Parser.parse(ruby_regex)
|
25
|
+
[
|
26
|
+
Converter::RootConverter.new.convert(expression_tree, context),
|
27
|
+
context.warnings
|
28
|
+
]
|
29
|
+
end
|
78
30
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
|
84
|
-
rescue ArgumentError, RegexpError, SyntaxError => e
|
85
|
-
self.source = ''
|
86
|
-
warnings << e.message
|
31
|
+
def convert_options(ruby_regex)
|
32
|
+
ignore_case = (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
33
|
+
ignore_case ? 'gi' : 'g'
|
34
|
+
end
|
87
35
|
end
|
88
36
|
end
|
89
37
|
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class JsRegex
|
4
|
+
module Converter
|
5
|
+
Dir[File.join(File.dirname(__FILE__), 'converter', '*.rb')].each do |file|
|
6
|
+
require file
|
7
|
+
end
|
8
|
+
|
9
|
+
MAP = Hash.new(UnsupportedTokenConverter).merge(
|
10
|
+
anchor: AnchorConverter,
|
11
|
+
assertion: AssertionConverter,
|
12
|
+
backref: BackreferenceConverter,
|
13
|
+
conditional: ConditionalConverter,
|
14
|
+
escape: EscapeConverter,
|
15
|
+
free_space: FreespaceConverter,
|
16
|
+
group: GroupConverter,
|
17
|
+
literal: LiteralConverter,
|
18
|
+
meta: MetaConverter,
|
19
|
+
nonproperty: NonpropertyConverter,
|
20
|
+
property: PropertyConverter,
|
21
|
+
set: SetConverter,
|
22
|
+
type: TypeConverter
|
23
|
+
).freeze
|
24
|
+
|
25
|
+
def self.for(expression)
|
26
|
+
MAP[expression.type].new
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -15,12 +15,19 @@ class JsRegex
|
|
15
15
|
when :bol, :bos then '^'
|
16
16
|
when :eol, :eos then '$'
|
17
17
|
when :eos_ob_eol then '(?=\n?$)'
|
18
|
-
when :word_boundary then '\b'
|
19
|
-
when :nonword_boundary then '\B'
|
18
|
+
when :word_boundary then pass_boundary_with_warning('\b')
|
19
|
+
when :nonword_boundary then pass_boundary_with_warning('\B')
|
20
20
|
else
|
21
21
|
warn_of_unsupported_feature
|
22
22
|
end
|
23
23
|
end
|
24
|
+
|
25
|
+
def pass_boundary_with_warning(boundary)
|
26
|
+
warn("The boundary '#{boundary}' at index #{expression.ts} "\
|
27
|
+
'is not unicode-aware in JavaScript, '\
|
28
|
+
'so it might act differently than in Ruby.')
|
29
|
+
boundary
|
30
|
+
end
|
24
31
|
end
|
25
32
|
end
|
26
33
|
end
|
@@ -16,12 +16,11 @@ class JsRegex
|
|
16
16
|
def convert_data
|
17
17
|
case subtype
|
18
18
|
when :lookahead, :nlookahead
|
19
|
-
|
19
|
+
build_group(capturing: false)
|
20
20
|
when :nlookbehind
|
21
|
-
context.start_negative_lookbehind
|
22
21
|
warn_of_unsupported_feature('negative lookbehind assertion')
|
23
22
|
else # :lookbehind, ...
|
24
|
-
|
23
|
+
build_unsupported_group
|
25
24
|
end
|
26
25
|
end
|
27
26
|
end
|
@@ -12,20 +12,30 @@ class JsRegex
|
|
12
12
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
|
-
when :number
|
16
|
-
|
15
|
+
when :number, :number_ref
|
16
|
+
convert_number_ref
|
17
|
+
when :number_rel_ref
|
18
|
+
convert_number_rel_ref
|
19
|
+
when :name_ref
|
20
|
+
convert_name_ref
|
17
21
|
else
|
18
22
|
warn_of_unsupported_feature
|
19
23
|
end
|
20
24
|
end
|
21
25
|
|
22
|
-
def
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
def convert_number_ref
|
27
|
+
# after regexp_parser update, replace data[/\d+/] with expression.number
|
28
|
+
"\\#{context.new_capturing_group_position(Integer(data[/\d+/]))}"
|
29
|
+
end
|
30
|
+
|
31
|
+
def convert_number_rel_ref
|
32
|
+
absolute_position = Integer(expression.number) +
|
33
|
+
context.original_capturing_group_count + 1
|
34
|
+
"\\#{context.new_capturing_group_position(absolute_position)}"
|
35
|
+
end
|
36
|
+
|
37
|
+
def convert_name_ref
|
38
|
+
"\\#{context.named_group_positions.fetch(expression.name)}"
|
29
39
|
end
|
30
40
|
end
|
31
41
|
end
|
@@ -6,35 +6,61 @@ class JsRegex
|
|
6
6
|
# Template class. Implement #convert_data in subclasses.
|
7
7
|
#
|
8
8
|
class Base
|
9
|
-
|
9
|
+
def convert(expression, context)
|
10
|
+
self.context = context
|
11
|
+
self.expression = expression
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
@context = context
|
13
|
+
source = convert_data
|
14
|
+
apply_quantifier(source)
|
14
15
|
end
|
15
16
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
self.data = data
|
20
|
-
self.start_index = start_index
|
21
|
-
self.end_index = end_index
|
17
|
+
private
|
18
|
+
|
19
|
+
attr_accessor :context, :expression
|
22
20
|
|
23
|
-
|
24
|
-
|
21
|
+
def subtype
|
22
|
+
expression.token
|
25
23
|
end
|
26
24
|
|
27
|
-
|
25
|
+
def data
|
26
|
+
expression.text
|
27
|
+
end
|
28
|
+
alias pass_through data
|
28
29
|
|
29
|
-
|
30
|
+
def apply_quantifier(source)
|
31
|
+
return source if source.empty? || !(quantifier = expression.quantifier)
|
30
32
|
|
31
|
-
|
33
|
+
if quantifier.mode.equal?(:possessive)
|
34
|
+
context.wrap_in_backrefed_lookahead(source + quantifier.text[0..-2])
|
35
|
+
else
|
36
|
+
source + quantifier
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def convert_subexpressions
|
41
|
+
convert_expressions(subexpressions)
|
42
|
+
end
|
43
|
+
|
44
|
+
def convert_expressions(expressions)
|
45
|
+
expressions.map { |exp| Converter.for(exp).convert(exp, context) }.join
|
46
|
+
end
|
47
|
+
|
48
|
+
def subexpressions
|
49
|
+
expression.expressions
|
50
|
+
end
|
32
51
|
|
33
52
|
def warn_of_unsupported_feature(description = nil)
|
34
|
-
description ||= "#{subtype} #{
|
35
|
-
|
36
|
-
|
37
|
-
|
53
|
+
description ||= "#{subtype} #{expression.type}".tr('_', ' ')
|
54
|
+
full_desc = "#{description} '#{expression}'"
|
55
|
+
warn("Dropped unsupported #{full_desc} at index #{expression.ts}")
|
56
|
+
''
|
57
|
+
end
|
58
|
+
|
59
|
+
def warn(text)
|
60
|
+
context.warnings << text
|
61
|
+
end
|
62
|
+
|
63
|
+
def drop_without_warning
|
38
64
|
''
|
39
65
|
end
|
40
66
|
end
|
@@ -11,15 +11,12 @@ class JsRegex
|
|
11
11
|
private
|
12
12
|
|
13
13
|
def convert_data
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
when :separator, :close
|
19
|
-
pass_through
|
20
|
-
else
|
21
|
-
'' # one warning is enough, don't warn about other parts
|
14
|
+
warn_of_unsupported_feature('conditional')
|
15
|
+
branches = subexpressions.drop(1).each_with_object([]) do |branch, arr|
|
16
|
+
converted_branch = convert_expressions(branch)
|
17
|
+
arr << converted_branch unless converted_branch.eql?('')
|
22
18
|
end
|
19
|
+
"(?:#{branches.join('|')})"
|
23
20
|
end
|
24
21
|
end
|
25
22
|
end
|
@@ -10,120 +10,97 @@ class JsRegex
|
|
10
10
|
class Context
|
11
11
|
attr_reader :buffered_set_extractions,
|
12
12
|
:buffered_set_members,
|
13
|
-
:
|
14
|
-
:
|
15
|
-
:
|
16
|
-
:
|
13
|
+
:in_atomic_group,
|
14
|
+
:named_group_positions,
|
15
|
+
:negative_base_set,
|
16
|
+
:root_options,
|
17
|
+
:warnings
|
17
18
|
|
18
|
-
def initialize
|
19
|
-
self.
|
20
|
-
self.
|
21
|
-
self.
|
22
|
-
self.
|
23
|
-
self.negative_set_levels = []
|
24
|
-
self.set_level = 0
|
25
|
-
end
|
26
|
-
|
27
|
-
def valid?
|
28
|
-
!negative_lookbehind
|
29
|
-
end
|
30
|
-
|
31
|
-
def stacked_quantifier?(quantifier_start_index, quantifier_end_index)
|
32
|
-
is_stacked = last_quantifier_end_index.equal?(quantifier_start_index)
|
33
|
-
self.last_quantifier_end_index = quantifier_end_index
|
34
|
-
is_stacked
|
35
|
-
end
|
36
|
-
|
37
|
-
# set context
|
38
|
-
|
39
|
-
def open_set
|
40
|
-
self.set_level = set_level + 1
|
41
|
-
if set_level == 1
|
42
|
-
buffered_set_members.clear
|
43
|
-
buffered_set_extractions.clear
|
44
|
-
end
|
45
|
-
negative_set_levels.delete(set_level)
|
46
|
-
end
|
19
|
+
def initialize(ruby_regex)
|
20
|
+
self.added_capturing_groups_after_group = Hash.new(0)
|
21
|
+
self.capturing_group_count = 0
|
22
|
+
self.named_group_positions = {}
|
23
|
+
self.warnings = []
|
47
24
|
|
48
|
-
|
49
|
-
|
25
|
+
self.root_options = {}
|
26
|
+
root_options[:m] = !(ruby_regex.options & Regexp::MULTILINE).equal?(0)
|
50
27
|
end
|
51
28
|
|
52
|
-
|
53
|
-
self.set_level = set_level - 1
|
54
|
-
end
|
29
|
+
# option context
|
55
30
|
|
56
|
-
def
|
57
|
-
|
31
|
+
def multiline?
|
32
|
+
root_options.fetch(:m)
|
58
33
|
end
|
59
34
|
|
60
|
-
|
61
|
-
negative_set_levels.include?(level)
|
62
|
-
end
|
35
|
+
# set context
|
63
36
|
|
64
|
-
def
|
65
|
-
|
37
|
+
def negate_base_set
|
38
|
+
self.negative_base_set = true
|
66
39
|
end
|
67
40
|
|
68
|
-
def
|
69
|
-
|
41
|
+
def reset_set_context
|
42
|
+
self.buffered_set_extractions = []
|
43
|
+
self.buffered_set_members = []
|
44
|
+
self.negative_base_set = false
|
70
45
|
end
|
71
46
|
|
72
47
|
# group context
|
73
48
|
|
74
|
-
def open_group
|
75
|
-
self.group_level = group_level + 1
|
76
|
-
end
|
77
|
-
|
78
49
|
def capture_group
|
79
|
-
self.
|
50
|
+
self.capturing_group_count = capturing_group_count + 1
|
80
51
|
end
|
81
52
|
|
82
53
|
def start_atomic_group
|
83
|
-
self.
|
54
|
+
self.in_atomic_group = true
|
84
55
|
end
|
85
56
|
|
86
|
-
def
|
87
|
-
self.
|
57
|
+
def end_atomic_group
|
58
|
+
self.in_atomic_group = false
|
88
59
|
end
|
89
60
|
|
90
|
-
def
|
91
|
-
|
61
|
+
def wrap_in_backrefed_lookahead(content)
|
62
|
+
new_backref_num = capturing_group_count + 1
|
63
|
+
# an empty passive group (?:) is appended as literal digits may follow
|
64
|
+
result = "(?=(#{content}))\\#{new_backref_num}(?:)"
|
65
|
+
added_capturing_groups_after_group[original_capturing_group_count] += 1
|
66
|
+
capture_group
|
67
|
+
result
|
92
68
|
end
|
93
69
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
70
|
+
# takes and returns 1-indexed group positions.
|
71
|
+
# new is different from old if capturing groups were added in between.
|
72
|
+
def new_capturing_group_position(old_position)
|
73
|
+
increment = 0
|
74
|
+
added_capturing_groups_after_group.each do |after_n_groups, count|
|
75
|
+
increment += count if after_n_groups < old_position
|
76
|
+
end
|
77
|
+
old_position + increment
|
98
78
|
end
|
99
79
|
|
100
|
-
def
|
101
|
-
|
102
|
-
self.negative_lookbehind = false
|
80
|
+
def original_capturing_group_count
|
81
|
+
capturing_group_count - total_added_capturing_groups
|
103
82
|
end
|
104
83
|
|
105
|
-
def
|
106
|
-
|
84
|
+
def total_added_capturing_groups
|
85
|
+
added_capturing_groups_after_group.values.inject(0, &:+)
|
107
86
|
end
|
108
87
|
|
109
|
-
def
|
110
|
-
|
111
|
-
group_level.equal?(group_level_for_backreference + 1)
|
88
|
+
def store_named_group_position(name)
|
89
|
+
named_group_positions[name] = capturing_group_count + 1
|
112
90
|
end
|
113
91
|
|
114
92
|
private
|
115
93
|
|
116
|
-
attr_accessor :
|
117
|
-
:
|
118
|
-
:negative_set_levels,
|
119
|
-
:set_level
|
94
|
+
attr_accessor :added_capturing_groups_after_group,
|
95
|
+
:capturing_group_count
|
120
96
|
|
121
97
|
attr_writer :buffered_set_extractions,
|
122
98
|
:buffered_set_members,
|
123
|
-
:
|
124
|
-
:
|
125
|
-
:
|
126
|
-
:
|
99
|
+
:in_atomic_group,
|
100
|
+
:named_group_positions,
|
101
|
+
:negative_base_set,
|
102
|
+
:root_options,
|
103
|
+
:warnings
|
127
104
|
end
|
128
105
|
end
|
129
106
|
end
|
@@ -39,12 +39,16 @@ class JsRegex
|
|
39
39
|
case subtype
|
40
40
|
when :codepoint_list
|
41
41
|
convert_codepoint_list
|
42
|
+
when :control
|
43
|
+
convert_control_sequence
|
42
44
|
when :literal
|
43
45
|
LiteralConverter.convert_data(data)
|
46
|
+
when :meta_sequence
|
47
|
+
convert_meta_sequence
|
44
48
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
45
49
|
pass_through
|
46
50
|
else
|
47
|
-
# Bell, Escape, HexWide,
|
51
|
+
# Bell, Escape, HexWide, ...
|
48
52
|
warn_of_unsupported_feature
|
49
53
|
end
|
50
54
|
end
|
@@ -56,6 +60,35 @@ class JsRegex
|
|
56
60
|
end
|
57
61
|
elements.join
|
58
62
|
end
|
63
|
+
|
64
|
+
def convert_control_sequence
|
65
|
+
convert_meta_control_sequence ||
|
66
|
+
unicode_escape_for(control_sequence_to_s(data))
|
67
|
+
end
|
68
|
+
|
69
|
+
def convert_meta_sequence
|
70
|
+
convert_meta_control_sequence ||
|
71
|
+
unicode_escape_for(meta_char_to_char_code(data[-1]))
|
72
|
+
end
|
73
|
+
|
74
|
+
def convert_meta_control_sequence
|
75
|
+
return unless expression.class.to_s.include?('MetaControl')
|
76
|
+
unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
|
77
|
+
end
|
78
|
+
|
79
|
+
def unicode_escape_for(char)
|
80
|
+
"\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
|
81
|
+
end
|
82
|
+
|
83
|
+
def control_sequence_to_s(control_sequence)
|
84
|
+
five_lsb = control_sequence.unpack('B*').first[-5..-1]
|
85
|
+
["000#{five_lsb}"].pack('B*')
|
86
|
+
end
|
87
|
+
|
88
|
+
def meta_char_to_char_code(meta_char)
|
89
|
+
byte_value = meta_char.ord
|
90
|
+
byte_value < 128 ? byte_value + 128 : byte_value
|
91
|
+
end
|
59
92
|
end
|
60
93
|
end
|
61
94
|
end
|
@@ -12,66 +12,52 @@ class JsRegex
|
|
12
12
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
|
-
when :atomic then
|
16
|
-
when :capture then
|
17
|
-
when :
|
18
|
-
when :
|
19
|
-
when :
|
20
|
-
when :
|
21
|
-
when :
|
22
|
-
else
|
15
|
+
when :atomic then emulate_atomic_group
|
16
|
+
when :capture then build_group
|
17
|
+
when :comment then drop_without_warning
|
18
|
+
when :named then build_named_group
|
19
|
+
when :options then build_options_group
|
20
|
+
when :passive then build_passive_group
|
21
|
+
when :absence then warn_of_unsupported_feature
|
22
|
+
else build_unsupported_group
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
-
def
|
27
|
-
|
28
|
-
|
29
|
-
# regex-emulate-atomic-grouping-with-lookahead
|
30
|
-
if context.atomic_group?
|
31
|
-
open_unsupported_group('nested atomic group')
|
26
|
+
def emulate_atomic_group
|
27
|
+
if context.in_atomic_group
|
28
|
+
build_unsupported_group('nested atomic group')
|
32
29
|
else
|
33
30
|
context.start_atomic_group
|
34
|
-
|
31
|
+
result = context.wrap_in_backrefed_lookahead(convert_subexpressions)
|
32
|
+
context.end_atomic_group
|
33
|
+
result
|
35
34
|
end
|
36
35
|
end
|
37
36
|
|
38
|
-
def
|
39
|
-
# drop name without warning
|
40
|
-
|
37
|
+
def build_named_group
|
38
|
+
# remember position, then drop name part without warning
|
39
|
+
context.store_named_group_position(expression.name)
|
40
|
+
build_group(head: '(')
|
41
41
|
end
|
42
42
|
|
43
|
-
def
|
43
|
+
def build_options_group
|
44
44
|
warn_of_unsupported_feature('group-specific options')
|
45
|
-
|
45
|
+
build_group(head: '(')
|
46
46
|
end
|
47
47
|
|
48
|
-
def
|
49
|
-
|
48
|
+
def build_passive_group
|
49
|
+
build_group(head: '(?:', capturing: false)
|
50
50
|
end
|
51
51
|
|
52
|
-
def
|
52
|
+
def build_unsupported_group(description = nil)
|
53
53
|
warn_of_unsupported_feature(description)
|
54
|
-
|
54
|
+
build_passive_group
|
55
55
|
end
|
56
56
|
|
57
|
-
def
|
58
|
-
context.open_group
|
57
|
+
def build_group(opts = {})
|
59
58
|
context.capture_group unless opts[:capturing].equal?(false)
|
60
|
-
opts[:head] || pass_through
|
61
|
-
|
62
|
-
|
63
|
-
def close_group
|
64
|
-
if context.negative_lookbehind
|
65
|
-
context.close_negative_lookbehind
|
66
|
-
''
|
67
|
-
elsif context.base_level_of_atomic_group?
|
68
|
-
context.close_atomic_group
|
69
|
-
# an empty passive group (?:) is appended as literal digits may follow
|
70
|
-
"))\\#{context.captured_group_count}(?:)"
|
71
|
-
else
|
72
|
-
context.close_group
|
73
|
-
')'
|
74
|
-
end
|
59
|
+
head = opts[:head] || pass_through
|
60
|
+
"#{head}#{convert_subexpressions})"
|
75
61
|
end
|
76
62
|
end
|
77
63
|
end
|
@@ -13,16 +13,20 @@ class JsRegex
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
15
|
when :alternation
|
16
|
-
|
16
|
+
convert_alternation
|
17
17
|
when :dot
|
18
|
-
|
18
|
+
context.multiline? ? '(?:.|\n)' : '.'
|
19
19
|
else
|
20
20
|
warn_of_unsupported_feature
|
21
21
|
end
|
22
22
|
end
|
23
23
|
|
24
|
-
def
|
25
|
-
(
|
24
|
+
def convert_alternation
|
25
|
+
branches = subexpressions.each_with_object([]) do |branch, arr|
|
26
|
+
converted_branch = convert_expressions(branch.expressions)
|
27
|
+
arr << converted_branch unless converted_branch.eql?('')
|
28
|
+
end
|
29
|
+
branches.join('|')
|
26
30
|
end
|
27
31
|
end
|
28
32
|
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'base'
|
4
|
+
|
5
|
+
class JsRegex
|
6
|
+
module Converter
|
7
|
+
#
|
8
|
+
# Template class implementation.
|
9
|
+
#
|
10
|
+
class RootConverter < JsRegex::Converter::Base
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
convert_subexpressions
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -3,107 +3,102 @@
|
|
3
3
|
require_relative 'base'
|
4
4
|
require_relative 'literal_converter'
|
5
5
|
require_relative 'property_converter'
|
6
|
-
require_relative 'type_converter'
|
7
6
|
|
8
7
|
class JsRegex
|
9
8
|
module Converter
|
10
9
|
#
|
11
10
|
# Template class implementation.
|
12
11
|
#
|
13
|
-
# This converter works a little differently from the others.
|
14
|
-
#
|
15
|
-
# It buffers anything that it finds within a set in the Context's
|
16
|
-
# #buffered_set_members and #buffered_set_extractions Arrays,
|
17
|
-
# returning an empty String for all passed tokens, and only when
|
18
|
-
# the set is closed does it compile and return the final String.
|
19
|
-
#
|
20
12
|
class SetConverter < JsRegex::Converter::Base
|
21
13
|
private
|
22
14
|
|
23
15
|
def convert_data
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
when :intersection then warn_of_unsupported_feature('set intersection')
|
34
|
-
else try_replacing_potential_property_subtype
|
16
|
+
if expression.set_level.equal?(0) # reached end of set expression
|
17
|
+
context.reset_set_context
|
18
|
+
context.negate_base_set if negative_set?
|
19
|
+
process_members
|
20
|
+
finalize_set
|
21
|
+
elsif negative_set?
|
22
|
+
warn_of_unsupported_feature('nested negative set data')
|
23
|
+
else # positive subset
|
24
|
+
process_members
|
35
25
|
end
|
36
26
|
end
|
37
27
|
|
38
|
-
def
|
39
|
-
|
40
|
-
''
|
28
|
+
def negative_set?
|
29
|
+
expression.negative?
|
41
30
|
end
|
42
31
|
|
43
|
-
def
|
44
|
-
|
45
|
-
warn_of_unsupported_feature('nested negative set data')
|
46
|
-
end
|
47
|
-
context.negate_set
|
48
|
-
''
|
32
|
+
def process_members
|
33
|
+
expression.each { |member| process_member(member) }
|
49
34
|
end
|
50
35
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
36
|
+
ASTRAL_PLANE_PATTERN = /[\u{10000}-\u{FFFFF}]/
|
37
|
+
PROPERTY_PATTERN = /\A(?:\[:|\\([pP])\{)(\^?)([^:\}]+)/
|
38
|
+
|
39
|
+
def process_member(member)
|
40
|
+
return convert_subset(member) unless member.instance_of?(String)
|
55
41
|
|
56
|
-
|
57
|
-
utf8_data
|
58
|
-
|
42
|
+
utf8_data = member.dup.force_encoding('UTF-8')
|
43
|
+
case utf8_data
|
44
|
+
when ASTRAL_PLANE_PATTERN
|
59
45
|
warn_of_unsupported_feature('astral plane set member')
|
46
|
+
when '\\h'
|
47
|
+
handle_hex_type
|
48
|
+
when '\\H'
|
49
|
+
handle_nonhex_type
|
50
|
+
when '&&'
|
51
|
+
warn_of_unsupported_feature('set intersection')
|
52
|
+
when PROPERTY_PATTERN
|
53
|
+
handle_property($1, $2, $3)
|
60
54
|
else
|
61
55
|
literal_conversion = LiteralConverter.convert_data(utf8_data)
|
62
56
|
buffer_set_member(literal_conversion)
|
63
57
|
end
|
64
58
|
end
|
65
59
|
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
60
|
+
HEX_RANGES = 'A-Fa-f0-9'
|
61
|
+
NONHEX_SET = '[^A-Fa-f0-9]'
|
62
|
+
|
63
|
+
def handle_hex_type
|
64
|
+
buffer_set_member(HEX_RANGES)
|
70
65
|
end
|
71
66
|
|
72
|
-
def
|
73
|
-
|
74
|
-
|
67
|
+
def handle_nonhex_type
|
68
|
+
if context.negative_base_set
|
69
|
+
warn_of_unsupported_feature('nonhex type in negative set')
|
70
|
+
else
|
71
|
+
buffer_set_extraction(NONHEX_SET)
|
72
|
+
end
|
75
73
|
end
|
76
74
|
|
77
|
-
def
|
78
|
-
if
|
75
|
+
def handle_property(sign, caret, name)
|
76
|
+
if context.negative_base_set
|
77
|
+
return warn_of_unsupported_feature('property in negative set')
|
78
|
+
end
|
79
|
+
std = standardize_property_name(name)
|
80
|
+
negated = sign.eql?('P') ^ caret.eql?('^')
|
81
|
+
if (replacement = PropertyConverter.property_replacement(std, negated))
|
79
82
|
buffer_set_extraction(replacement)
|
80
83
|
else
|
81
84
|
warn_of_unsupported_feature('property')
|
82
85
|
end
|
83
86
|
end
|
84
87
|
|
85
|
-
def
|
86
|
-
|
87
|
-
buffer_set_extraction(TypeConverter::HEX_EXPANSION)
|
88
|
-
elsif subtype.equal?(:type_nonhex)
|
89
|
-
buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
|
90
|
-
else
|
91
|
-
buffer_set_member(data)
|
92
|
-
end
|
88
|
+
def standardize_property_name(name)
|
89
|
+
Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
|
93
90
|
end
|
94
91
|
|
95
|
-
def
|
96
|
-
|
92
|
+
def buffer_set_member(data)
|
93
|
+
context.buffered_set_members << data
|
97
94
|
end
|
98
95
|
|
99
|
-
def
|
100
|
-
context.
|
101
|
-
''
|
96
|
+
def buffer_set_extraction(data)
|
97
|
+
context.buffered_set_extractions << data
|
102
98
|
end
|
103
99
|
|
104
|
-
def
|
105
|
-
|
106
|
-
''
|
100
|
+
def convert_subset(subset)
|
101
|
+
SetConverter.new.convert(subset, context)
|
107
102
|
end
|
108
103
|
|
109
104
|
def finalize_set
|
@@ -125,7 +120,7 @@ class JsRegex
|
|
125
120
|
end
|
126
121
|
|
127
122
|
def finalize_nondepleted_set(buffered_members, buffered_extractions)
|
128
|
-
set = "[#{'^' if
|
123
|
+
set = "[#{'^' if negative_set?}#{buffered_members.join}]"
|
129
124
|
if buffered_extractions.empty?
|
130
125
|
set
|
131
126
|
else
|
@@ -8,8 +8,9 @@ class JsRegex
|
|
8
8
|
# Template class implementation.
|
9
9
|
#
|
10
10
|
class TypeConverter < JsRegex::Converter::Base
|
11
|
-
HEX_EXPANSION
|
12
|
-
NONHEX_EXPANSION
|
11
|
+
HEX_EXPANSION = '[A-Fa-f0-9]'
|
12
|
+
NONHEX_EXPANSION = '[^A-Fa-f0-9]'
|
13
|
+
LINEBREAK_EXPANSION = '(\r\n|\r|\n)'
|
13
14
|
|
14
15
|
private
|
15
16
|
|
@@ -17,6 +18,7 @@ class JsRegex
|
|
17
18
|
case subtype
|
18
19
|
when :hex then HEX_EXPANSION
|
19
20
|
when :nonhex then NONHEX_EXPANSION
|
21
|
+
when :linebreak then LINEBREAK_EXPANSION
|
20
22
|
when :digit, :nondigit, :word, :nonword, :space, :nonspace
|
21
23
|
pass_through
|
22
24
|
else
|
data/lib/js_regex/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: regexp_parser
|
@@ -16,7 +16,7 @@ dependencies:
|
|
16
16
|
requirements:
|
17
17
|
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: 0.
|
19
|
+
version: 0.4.6
|
20
20
|
- - "<="
|
21
21
|
- !ruby/object:Gem::Version
|
22
22
|
version: 0.5.0
|
@@ -26,7 +26,7 @@ dependencies:
|
|
26
26
|
requirements:
|
27
27
|
- - ">="
|
28
28
|
- !ruby/object:Gem::Version
|
29
|
-
version: 0.
|
29
|
+
version: 0.4.6
|
30
30
|
- - "<="
|
31
31
|
- !ruby/object:Gem::Version
|
32
32
|
version: 0.5.0
|
@@ -100,6 +100,34 @@ dependencies:
|
|
100
100
|
- - "~>"
|
101
101
|
- !ruby/object:Gem::Version
|
102
102
|
version: '0.12'
|
103
|
+
- !ruby/object:Gem::Dependency
|
104
|
+
name: codeclimate-test-reporter
|
105
|
+
requirement: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - "~>"
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '1.0'
|
110
|
+
type: :development
|
111
|
+
prerelease: false
|
112
|
+
version_requirements: !ruby/object:Gem::Requirement
|
113
|
+
requirements:
|
114
|
+
- - "~>"
|
115
|
+
- !ruby/object:Gem::Version
|
116
|
+
version: '1.0'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: mutant-rspec
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - "~>"
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0.8'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0.8'
|
103
131
|
description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
|
104
132
|
care of various incompatibilities and returning warnings for unsolvable differences.
|
105
133
|
email:
|
@@ -110,6 +138,7 @@ extra_rdoc_files: []
|
|
110
138
|
files:
|
111
139
|
- lib/js_regex.rb
|
112
140
|
- lib/js_regex/conversion.rb
|
141
|
+
- lib/js_regex/converter.rb
|
113
142
|
- lib/js_regex/converter/anchor_converter.rb
|
114
143
|
- lib/js_regex/converter/assertion_converter.rb
|
115
144
|
- lib/js_regex/converter/backreference_converter.rb
|
@@ -123,7 +152,7 @@ files:
|
|
123
152
|
- lib/js_regex/converter/meta_converter.rb
|
124
153
|
- lib/js_regex/converter/nonproperty_converter.rb
|
125
154
|
- lib/js_regex/converter/property_converter.rb
|
126
|
-
- lib/js_regex/converter/
|
155
|
+
- lib/js_regex/converter/root_converter.rb
|
127
156
|
- lib/js_regex/converter/set_converter.rb
|
128
157
|
- lib/js_regex/converter/type_converter.rb
|
129
158
|
- lib/js_regex/converter/unsupported_token_converter.rb
|
@@ -149,7 +178,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
149
178
|
version: '0'
|
150
179
|
requirements: []
|
151
180
|
rubyforge_project:
|
152
|
-
rubygems_version: 2.6.
|
181
|
+
rubygems_version: 2.6.13
|
153
182
|
signing_key:
|
154
183
|
specification_version: 4
|
155
184
|
summary: Converts Ruby regexes to JavaScript regexes.
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'base'
|
4
|
-
|
5
|
-
class JsRegex
|
6
|
-
module Converter
|
7
|
-
#
|
8
|
-
# Template class implementation.
|
9
|
-
#
|
10
|
-
class QuantifierConverter < JsRegex::Converter::Base
|
11
|
-
private
|
12
|
-
|
13
|
-
def convert_data
|
14
|
-
if context.stacked_quantifier?(start_index, end_index)
|
15
|
-
warn_of_unsupported_feature('adjacent quantifiers')
|
16
|
-
else
|
17
|
-
convert_quantifier
|
18
|
-
end
|
19
|
-
end
|
20
|
-
|
21
|
-
def convert_quantifier
|
22
|
-
if data.length > 1 && data.end_with?('+')
|
23
|
-
warn_of_unsupported_feature('declaration of quantifier as possessive')
|
24
|
-
data[0..-2]
|
25
|
-
else
|
26
|
-
pass_through
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|