js_regex 1.0.19 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/js_regex/conversion.rb +24 -22
- data/lib/js_regex/converter/anchor_converter.rb +2 -0
- data/lib/js_regex/converter/assertion_converter.rb +4 -2
- data/lib/js_regex/converter/backreference_converter.rb +2 -0
- data/lib/js_regex/converter/base.rb +3 -1
- data/lib/js_regex/converter/conditional_converter.rb +3 -1
- data/lib/js_regex/converter/context.rb +81 -19
- data/lib/js_regex/converter/escape_converter.rb +4 -4
- data/lib/js_regex/converter/freespace_converter.rb +2 -0
- data/lib/js_regex/converter/group_converter.rb +24 -32
- data/lib/js_regex/converter/literal_converter.rb +30 -16
- data/lib/js_regex/converter/meta_converter.rb +3 -2
- data/lib/js_regex/converter/nonproperty_converter.rb +8 -1
- data/lib/js_regex/converter/property_converter.rb +17 -12
- data/lib/js_regex/converter/quantifier_converter.rb +7 -5
- data/lib/js_regex/converter/set_converter.rb +41 -36
- data/lib/js_regex/converter/type_converter.rb +5 -3
- data/lib/js_regex/converter/unsupported_token_converter.rb +2 -0
- data/lib/js_regex/property_map.rb +5 -2
- data/lib/js_regex/version.rb +5 -0
- data/lib/js_regex.rb +1 -0
- metadata +35 -14
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1eb9a34e224340fb10bcaac25b7adee037279a2e
|
4
|
+
data.tar.gz: a50cc191b462e501e194308f9eb06c893b177657
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7156fa441d772630f4d1f947e029f11355c74c85a563cf8b5ed09f977930e238c0e7951856da0163d8c255620ae3a64f225d50b5d77f77fbe60025052929f46
|
7
|
+
data.tar.gz: 72bb8e367bc70bef958957ecc8444af459e26e4aa4111fcae10d6bd80fb863120161c13a5beedb9210a700fcfa0896004b4f06605a54bebe96dd32a4831ab7f6
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
#
|
3
5
|
# This class acts as a facade, creating specific Converters and
|
@@ -12,17 +14,17 @@ class JsRegex
|
|
12
14
|
attr_reader :ruby_regex, :context, :converters, :source, :options, :warnings
|
13
15
|
|
14
16
|
def initialize(ruby_regex)
|
15
|
-
|
17
|
+
self.ruby_regex = ruby_regex
|
16
18
|
|
17
|
-
|
18
|
-
|
19
|
+
self.context = Converter::Context.new
|
20
|
+
self.converters = {}
|
19
21
|
|
20
|
-
|
21
|
-
|
22
|
-
|
22
|
+
self.source = ''.dup
|
23
|
+
self.options = ''.dup
|
24
|
+
self.warnings = []
|
23
25
|
|
24
|
-
convert_source
|
25
|
-
convert_options
|
26
|
+
convert_source
|
27
|
+
convert_options
|
26
28
|
perform_sanity_check
|
27
29
|
end
|
28
30
|
|
@@ -33,6 +35,8 @@ class JsRegex
|
|
33
35
|
|
34
36
|
private
|
35
37
|
|
38
|
+
attr_writer :ruby_regex, :context, :converters, :source, :options, :warnings
|
39
|
+
|
36
40
|
CONVERTER_MAP = Hash.new(Converter::UnsupportedTokenConverter).merge(
|
37
41
|
anchor: Converter::AnchorConverter,
|
38
42
|
assertion: Converter::AssertionConverter,
|
@@ -51,36 +55,34 @@ class JsRegex
|
|
51
55
|
type: Converter::TypeConverter
|
52
56
|
).freeze
|
53
57
|
|
54
|
-
def convert_source
|
58
|
+
def convert_source
|
55
59
|
Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
|
56
60
|
# There might be a lot of tokens, so don't wrap their data in objects.
|
57
61
|
# Even just wrapping them in simple structs or attr_reader objects
|
58
62
|
# can lead to 60%+ longer processing times for large regexes.
|
59
|
-
|
63
|
+
converter_for_token_class(token_class)
|
64
|
+
.convert(token_class, subtype, data, s, e)
|
60
65
|
end
|
61
|
-
converters.clear
|
62
|
-
end
|
63
|
-
|
64
|
-
def convert_token(token_class, subtype, data, s, e)
|
65
|
-
converter = converter_for_token_class(token_class)
|
66
|
-
converter.convert(token_class, subtype, data, s, e)
|
67
66
|
end
|
68
67
|
|
69
68
|
def converter_for_token_class(token_class)
|
70
69
|
converters[token_class] ||= CONVERTER_MAP[token_class].new(self, context)
|
71
70
|
end
|
72
71
|
|
73
|
-
def convert_options
|
74
|
-
|
75
|
-
|
72
|
+
def convert_options
|
73
|
+
options << 'g' # all Ruby regexes are what is called "global" in JS
|
74
|
+
options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
76
75
|
end
|
77
76
|
|
77
|
+
SURROGATE_CODEPOINT_PATTERN = /\\uD[89A-F]\h\h/i
|
78
|
+
|
78
79
|
def perform_sanity_check
|
79
80
|
# Ruby regex capabilities are a superset of JS regex capabilities in
|
80
|
-
# the source part. So if this raises an Error, a Converter messed up
|
81
|
-
|
81
|
+
# the source part. So if this raises an Error, a Converter messed up.
|
82
|
+
# Ignore that Ruby won't accept surrogate pairs, though.
|
83
|
+
Regexp.new(source.gsub(SURROGATE_CODEPOINT_PATTERN, '.'))
|
82
84
|
rescue ArgumentError, RegexpError, SyntaxError => e
|
83
|
-
|
85
|
+
self.source = ''
|
84
86
|
warnings << e.message
|
85
87
|
end
|
86
88
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'group_converter'
|
3
5
|
|
@@ -14,9 +16,9 @@ class JsRegex
|
|
14
16
|
def convert_data
|
15
17
|
case subtype
|
16
18
|
when :lookahead, :nlookahead
|
17
|
-
open_group(
|
19
|
+
open_group(capturing: false)
|
18
20
|
when :nlookbehind
|
19
|
-
context.
|
21
|
+
context.start_negative_lookbehind
|
20
22
|
warn_of_unsupported_feature('negative lookbehind assertion')
|
21
23
|
else # :lookbehind, ...
|
22
24
|
open_unsupported_group
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
module Converter
|
3
5
|
#
|
@@ -31,7 +33,7 @@ class JsRegex
|
|
31
33
|
def warn_of_unsupported_feature(description = nil)
|
32
34
|
description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
|
33
35
|
target.warnings << "Dropped unsupported #{description} "\
|
34
|
-
"at index #{start_index}
|
36
|
+
"at index #{start_index}...#{end_index}"
|
35
37
|
''
|
36
38
|
end
|
37
39
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -11,7 +13,7 @@ class JsRegex
|
|
11
13
|
def convert_data
|
12
14
|
case subtype
|
13
15
|
when :open
|
14
|
-
warn_of_unsupported_feature("conditional '(?'")
|
16
|
+
warn_of_unsupported_feature("conditional '(?('")
|
15
17
|
'('
|
16
18
|
when :separator, :close
|
17
19
|
pass_through
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
class JsRegex
|
2
4
|
module Converter
|
3
5
|
#
|
@@ -6,25 +8,20 @@ class JsRegex
|
|
6
8
|
# The Converters themselves are stateless.
|
7
9
|
#
|
8
10
|
class Context
|
9
|
-
attr_accessor :
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
:previous_quantifier_end,
|
18
|
-
:previous_quantifier_subtype,
|
19
|
-
:set_level
|
11
|
+
attr_accessor :previous_quantifier_end # , :previous_quantifier_type
|
12
|
+
|
13
|
+
attr_reader :buffered_set_extractions,
|
14
|
+
:buffered_set_members,
|
15
|
+
:captured_group_count,
|
16
|
+
:group_count_changed,
|
17
|
+
:group_level_for_backreference,
|
18
|
+
:negative_lookbehind
|
20
19
|
|
21
20
|
def initialize
|
22
21
|
self.buffered_set_members = []
|
23
22
|
self.buffered_set_extractions = []
|
24
23
|
self.captured_group_count = 0
|
25
|
-
self.group_count_changed = false
|
26
24
|
self.group_level = 0
|
27
|
-
self.negative_lookbehind = false
|
28
25
|
self.negative_set_levels = []
|
29
26
|
self.set_level = 0
|
30
27
|
end
|
@@ -36,16 +33,24 @@ class JsRegex
|
|
36
33
|
# set context
|
37
34
|
|
38
35
|
def open_set
|
39
|
-
self.set_level
|
36
|
+
self.set_level = set_level + 1
|
40
37
|
if set_level == 1
|
41
38
|
buffered_set_members.clear
|
42
39
|
buffered_set_extractions.clear
|
43
40
|
end
|
44
|
-
|
41
|
+
negative_set_levels.delete(set_level)
|
45
42
|
end
|
46
43
|
|
47
44
|
def negate_set
|
48
|
-
self.negative_set_levels
|
45
|
+
self.negative_set_levels = negative_set_levels | [set_level]
|
46
|
+
end
|
47
|
+
|
48
|
+
def close_set
|
49
|
+
self.set_level = set_level - 1
|
50
|
+
end
|
51
|
+
|
52
|
+
def set?
|
53
|
+
set_level > 0
|
49
54
|
end
|
50
55
|
|
51
56
|
def negative_set?(level = set_level)
|
@@ -53,12 +58,69 @@ class JsRegex
|
|
53
58
|
end
|
54
59
|
|
55
60
|
def nested_negation?
|
56
|
-
|
61
|
+
nested_set? && negative_set?
|
57
62
|
end
|
58
63
|
|
59
|
-
def
|
60
|
-
|
64
|
+
def nested_set?
|
65
|
+
set_level > 1
|
66
|
+
end
|
67
|
+
|
68
|
+
# group context
|
69
|
+
|
70
|
+
def open_group
|
71
|
+
self.group_level = group_level + 1
|
72
|
+
end
|
73
|
+
|
74
|
+
def capture_group
|
75
|
+
self.captured_group_count = captured_group_count + 1
|
76
|
+
end
|
77
|
+
|
78
|
+
def start_atomic_group
|
79
|
+
self.group_level_for_backreference = group_level
|
80
|
+
end
|
81
|
+
|
82
|
+
def start_negative_lookbehind
|
83
|
+
self.negative_lookbehind = true
|
84
|
+
end
|
85
|
+
|
86
|
+
def close_group
|
87
|
+
self.group_level = group_level - 1
|
88
|
+
end
|
89
|
+
|
90
|
+
def close_atomic_group
|
91
|
+
close_group
|
92
|
+
self.group_level_for_backreference = nil
|
93
|
+
self.group_count_changed = true
|
61
94
|
end
|
95
|
+
|
96
|
+
def close_negative_lookbehind
|
97
|
+
close_group
|
98
|
+
self.negative_lookbehind = false
|
99
|
+
end
|
100
|
+
|
101
|
+
def group?
|
102
|
+
group_level > 0
|
103
|
+
end
|
104
|
+
|
105
|
+
def atomic_group?
|
106
|
+
group_level_for_backreference
|
107
|
+
end
|
108
|
+
|
109
|
+
def base_level_of_atomic_group?
|
110
|
+
group_level_for_backreference &&
|
111
|
+
group_level.equal?(group_level_for_backreference + 1)
|
112
|
+
end
|
113
|
+
|
114
|
+
private
|
115
|
+
|
116
|
+
attr_accessor :group_level, :negative_set_levels, :set_level
|
117
|
+
|
118
|
+
attr_writer :buffered_set_extractions,
|
119
|
+
:buffered_set_members,
|
120
|
+
:captured_group_count,
|
121
|
+
:group_count_changed,
|
122
|
+
:group_level_for_backreference,
|
123
|
+
:negative_lookbehind
|
62
124
|
end
|
63
125
|
end
|
64
126
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'literal_converter'
|
3
5
|
|
@@ -24,19 +26,17 @@ class JsRegex
|
|
24
26
|
:newline,
|
25
27
|
:octal,
|
26
28
|
:one_or_more,
|
27
|
-
:return,
|
28
29
|
:set_close,
|
29
30
|
:set_open,
|
30
|
-
:space,
|
31
31
|
:tab,
|
32
32
|
:vertical_tab,
|
33
33
|
:zero_or_more,
|
34
34
|
:zero_or_one
|
35
35
|
pass_through
|
36
36
|
when :literal
|
37
|
-
LiteralConverter.
|
37
|
+
LiteralConverter.convert_data(data)
|
38
38
|
else
|
39
|
-
#
|
39
|
+
# Bell, Escape, HexWide, Control, Meta, MetaControl, ...
|
40
40
|
warn_of_unsupported_feature
|
41
41
|
end
|
42
42
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -13,7 +15,7 @@ class JsRegex
|
|
13
15
|
when :atomic then open_atomic_group
|
14
16
|
when :capture then open_group
|
15
17
|
when :close then close_group
|
16
|
-
when :comment then '' # drop whole group
|
18
|
+
when :comment then '' # drop whole group without warning
|
17
19
|
when :named_ab, :named_sq then open_named_group
|
18
20
|
when :options then open_options_group
|
19
21
|
when :passive then open_passive_group
|
@@ -25,12 +27,16 @@ class JsRegex
|
|
25
27
|
# Atomicity is emulated using backreferenced lookahead groups:
|
26
28
|
# http://instanceof.me/post/52245507631
|
27
29
|
# regex-emulate-atomic-grouping-with-lookahead
|
28
|
-
|
29
|
-
|
30
|
+
if context.atomic_group?
|
31
|
+
open_unsupported_group('nested atomic group')
|
32
|
+
else
|
33
|
+
context.start_atomic_group
|
34
|
+
open_group(head: '(?=(')
|
35
|
+
end
|
30
36
|
end
|
31
37
|
|
32
38
|
def open_named_group
|
33
|
-
# drop name
|
39
|
+
# drop name without warning
|
34
40
|
open_group(head: '(')
|
35
41
|
end
|
36
42
|
|
@@ -40,47 +46,33 @@ class JsRegex
|
|
40
46
|
end
|
41
47
|
|
42
48
|
def open_passive_group
|
43
|
-
open_group(head: '(?:',
|
49
|
+
open_group(head: '(?:', capturing: false)
|
44
50
|
end
|
45
51
|
|
46
|
-
def open_unsupported_group
|
47
|
-
warn_of_unsupported_feature
|
52
|
+
def open_unsupported_group(description = nil)
|
53
|
+
warn_of_unsupported_feature(description)
|
48
54
|
open_passive_group
|
49
55
|
end
|
50
56
|
|
51
|
-
def open_group(
|
52
|
-
context.
|
53
|
-
context.
|
54
|
-
|
57
|
+
def open_group(opts = {})
|
58
|
+
context.open_group
|
59
|
+
context.capture_group unless opts[:capturing].equal?(false)
|
60
|
+
opts[:head] || pass_through
|
55
61
|
end
|
56
62
|
|
57
63
|
def close_group
|
58
|
-
context.group_level -= 1
|
59
64
|
if context.negative_lookbehind
|
60
|
-
close_negative_lookbehind
|
61
|
-
|
62
|
-
|
65
|
+
context.close_negative_lookbehind
|
66
|
+
''
|
67
|
+
elsif context.base_level_of_atomic_group?
|
68
|
+
context.close_atomic_group
|
69
|
+
# an empty passive group (?:) is appended as literal digits may follow
|
70
|
+
"))\\#{context.captured_group_count}(?:)"
|
63
71
|
else
|
72
|
+
context.close_group
|
64
73
|
')'
|
65
74
|
end
|
66
75
|
end
|
67
|
-
|
68
|
-
def close_negative_lookbehind
|
69
|
-
context.negative_lookbehind = false
|
70
|
-
''
|
71
|
-
end
|
72
|
-
|
73
|
-
def end_of_atomic_group?
|
74
|
-
return false unless context.group_level_for_backreference
|
75
|
-
context.group_level_for_backreference == context.group_level
|
76
|
-
end
|
77
|
-
|
78
|
-
def close_atomic_group
|
79
|
-
context.group_level_for_backreference = nil
|
80
|
-
context.group_count_changed = true
|
81
|
-
# the empty passive group (?:) is appended in case literal digits follow
|
82
|
-
"))\\#{context.captured_group_count}(?:)"
|
83
|
-
end
|
84
76
|
end
|
85
77
|
end
|
86
78
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -6,30 +8,42 @@ class JsRegex
|
|
6
8
|
# Template class implementation.
|
7
9
|
#
|
8
10
|
class LiteralConverter < JsRegex::Converter::Base
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
11
|
+
class << self
|
12
|
+
ASTRAL_PLANE_CODEPOINT_PATTERN = /\A[\u{10000}-\u{FFFFF}]\z/
|
13
|
+
|
14
|
+
def convert_data(data)
|
15
|
+
if data =~ ASTRAL_PLANE_CODEPOINT_PATTERN
|
16
|
+
surrogate_pair_for(data)
|
17
|
+
else
|
18
|
+
escape_literal_forward_slashes(data)
|
19
|
+
ensure_json_compatibility(data)
|
20
|
+
data
|
21
|
+
end
|
17
22
|
end
|
18
|
-
end
|
19
23
|
|
20
|
-
|
21
|
-
# literal slashes would be mistaken for the pattern end in JsRegex#to_s
|
22
|
-
data.gsub!('/', '\\/')
|
23
|
-
end
|
24
|
+
private
|
24
25
|
|
25
|
-
|
26
|
-
|
26
|
+
def surrogate_pair_for(astral_char)
|
27
|
+
base = astral_char.codepoints.first - 65_536
|
28
|
+
high = ((base / 1024).floor + 55_296).to_s(16)
|
29
|
+
low = (base % 1024 + 56_320).to_s(16)
|
30
|
+
"\\u#{high}\\u#{low}"
|
31
|
+
end
|
32
|
+
|
33
|
+
def escape_literal_forward_slashes(data)
|
34
|
+
# literal slashes would signify the pattern end in JsRegex#to_s
|
35
|
+
data.gsub!('/', '\\/')
|
36
|
+
end
|
37
|
+
|
38
|
+
def ensure_json_compatibility(data)
|
39
|
+
data.gsub!(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
|
40
|
+
end
|
27
41
|
end
|
28
42
|
|
29
43
|
private
|
30
44
|
|
31
45
|
def convert_data
|
32
|
-
self.class.
|
46
|
+
self.class.convert_data(data)
|
33
47
|
end
|
34
48
|
end
|
35
49
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -20,8 +22,7 @@ class JsRegex
|
|
20
22
|
end
|
21
23
|
|
22
24
|
def ruby_multiline_mode?
|
23
|
-
|
24
|
-
@rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
|
25
|
+
(target.ruby_regex.options & Regexp::MULTILINE).nonzero?
|
25
26
|
end
|
26
27
|
end
|
27
28
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'property_converter'
|
3
5
|
|
@@ -12,7 +14,12 @@ class JsRegex
|
|
12
14
|
private
|
13
15
|
|
14
16
|
def convert_data
|
15
|
-
|
17
|
+
if context.set?
|
18
|
+
context.buffered_set_extractions << convert_property(true)
|
19
|
+
''
|
20
|
+
else
|
21
|
+
convert_property(true)
|
22
|
+
end
|
16
23
|
end
|
17
24
|
end
|
18
25
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative File.join('..', 'property_map')
|
3
5
|
|
@@ -7,18 +9,21 @@ class JsRegex
|
|
7
9
|
# Template class implementation.
|
8
10
|
#
|
9
11
|
class PropertyConverter < JsRegex::Converter::Base
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
12
|
+
class << self
|
13
|
+
def property_replacement(property_name, negated = nil)
|
14
|
+
replacement = PROPERTY_MAP[property_name.downcase.to_sym]
|
15
|
+
negated ? negated_property_replacement(replacement) : replacement
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
14
19
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
def negated_property_replacement(property_string)
|
21
|
+
return nil unless property_string
|
22
|
+
if property_string.start_with?('[^')
|
23
|
+
property_string.sub('[^', '[')
|
24
|
+
else
|
25
|
+
property_string.sub('[', '[^')
|
26
|
+
end
|
22
27
|
end
|
23
28
|
end
|
24
29
|
|
@@ -28,7 +33,7 @@ class JsRegex
|
|
28
33
|
convert_property
|
29
34
|
end
|
30
35
|
|
31
|
-
def convert_property(negated =
|
36
|
+
def convert_property(negated = nil)
|
32
37
|
replace = self.class.property_replacement(subtype, negated)
|
33
38
|
replace || warn_of_unsupported_feature
|
34
39
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -10,9 +12,9 @@ class JsRegex
|
|
10
12
|
|
11
13
|
def convert_data
|
12
14
|
if multiplicative_interval?
|
13
|
-
warn_of_unsupported_feature('
|
15
|
+
warn_of_unsupported_feature('adjacent quantifiers')
|
14
16
|
else
|
15
|
-
context.
|
17
|
+
# context.previous_quantifier_type = subtype
|
16
18
|
context.previous_quantifier_end = end_index
|
17
19
|
convert_quantifier
|
18
20
|
end
|
@@ -28,9 +30,9 @@ class JsRegex
|
|
28
30
|
end
|
29
31
|
|
30
32
|
def multiplicative_interval?
|
31
|
-
subtype == :interval &&
|
32
|
-
|
33
|
-
|
33
|
+
# subtype == :interval &&
|
34
|
+
# context.previous_quantifier_type == :interval &&
|
35
|
+
context.previous_quantifier_end.equal?(start_index)
|
34
36
|
end
|
35
37
|
end
|
36
38
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
require_relative 'literal_converter'
|
3
5
|
require_relative 'property_converter'
|
@@ -26,12 +28,14 @@ class JsRegex
|
|
26
28
|
when :member, :range, :escape then convert_member_subtype
|
27
29
|
when /\Aclass_/ then convert_class_subtype
|
28
30
|
when /\Atype_/ then convert_type_subtype
|
31
|
+
when :backspace then convert_backspace_subtype
|
29
32
|
when :intersection
|
30
33
|
warn_of_unsupported_feature("set intersection '&&'")
|
31
34
|
else
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
+
# Note that, within sets, Regexp::Scanner returns
|
36
|
+
# - positive property tokens in the \p{-style with class :set
|
37
|
+
# - negative property tokens in the \P{-style with class :set
|
38
|
+
# - negative property tokens in the \p{^-style with class :nonproperty
|
35
39
|
try_replacing_potential_property_subtype
|
36
40
|
end
|
37
41
|
end
|
@@ -42,7 +46,7 @@ class JsRegex
|
|
42
46
|
end
|
43
47
|
|
44
48
|
def convert_negate_subtype
|
45
|
-
if context.
|
49
|
+
if context.nested_set?
|
46
50
|
warn_of_unsupported_feature('nested negative set data')
|
47
51
|
end
|
48
52
|
context.negate_set
|
@@ -51,25 +55,28 @@ class JsRegex
|
|
51
55
|
|
52
56
|
def convert_close_subtype
|
53
57
|
context.close_set
|
54
|
-
context.
|
58
|
+
context.set? ? '' : finalize_set
|
55
59
|
end
|
56
60
|
|
57
61
|
def convert_member_subtype
|
58
|
-
|
59
|
-
|
60
|
-
|
62
|
+
utf8_data = data.force_encoding('UTF-8')
|
63
|
+
if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
|
64
|
+
warn_of_unsupported_feature('astral plane set member')
|
65
|
+
else
|
66
|
+
literal_conversion = LiteralConverter.convert_data(utf8_data)
|
67
|
+
buffer_set_member(literal_conversion)
|
68
|
+
end
|
61
69
|
end
|
62
70
|
|
63
71
|
def convert_class_subtype
|
64
72
|
negated = subtype.to_s.start_with?('class_non')
|
65
|
-
name = subtype
|
73
|
+
name = subtype[(negated ? 9 : 6)..-1]
|
66
74
|
try_replacing_property(name, negated)
|
67
75
|
end
|
68
76
|
|
69
77
|
def try_replacing_potential_property_subtype
|
70
|
-
negated =
|
71
|
-
|
72
|
-
try_replacing_property(name, negated)
|
78
|
+
negated = data.start_with?('\\P')
|
79
|
+
try_replacing_property(subtype, negated)
|
73
80
|
end
|
74
81
|
|
75
82
|
def try_replacing_property(name, negated)
|
@@ -82,47 +89,40 @@ class JsRegex
|
|
82
89
|
end
|
83
90
|
|
84
91
|
def convert_type_subtype
|
85
|
-
if subtype
|
92
|
+
if subtype.equal?(:type_hex)
|
86
93
|
buffer_set_extraction(TypeConverter::HEX_EXPANSION)
|
87
|
-
elsif subtype
|
94
|
+
elsif subtype.equal?(:type_nonhex)
|
88
95
|
buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
|
89
96
|
else
|
90
97
|
buffer_set_member(data)
|
91
98
|
end
|
92
99
|
end
|
93
100
|
|
94
|
-
def
|
95
|
-
|
96
|
-
''
|
101
|
+
def convert_backspace_subtype
|
102
|
+
buffer_set_extraction('[\b]')
|
97
103
|
end
|
98
104
|
|
99
|
-
def
|
100
|
-
|
105
|
+
def buffer_set_member(m)
|
106
|
+
context.buffered_set_members << m unless context.nested_negation?
|
101
107
|
''
|
102
108
|
end
|
103
109
|
|
104
|
-
def
|
105
|
-
context.
|
106
|
-
|
107
|
-
|
108
|
-
def buffered_extractions
|
109
|
-
context.buffered_set_extractions
|
110
|
+
def buffer_set_extraction(e)
|
111
|
+
context.buffered_set_extractions << e unless context.nested_negation?
|
112
|
+
''
|
110
113
|
end
|
111
114
|
|
112
115
|
def finalize_set
|
113
|
-
|
114
|
-
|
116
|
+
buffered_members = context.buffered_set_members
|
117
|
+
buffered_extractions = context.buffered_set_extractions
|
118
|
+
if buffered_members.empty?
|
119
|
+
finalize_depleted_set(buffered_extractions)
|
115
120
|
else
|
116
|
-
|
117
|
-
if buffered_extractions.any?
|
118
|
-
"(?:#{set}|#{buffered_extractions.join('|')})"
|
119
|
-
else
|
120
|
-
set
|
121
|
-
end
|
121
|
+
finalize_nondepleted_set(buffered_members, buffered_extractions)
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
125
|
-
def finalize_depleted_set
|
125
|
+
def finalize_depleted_set(buffered_extractions)
|
126
126
|
case buffered_extractions.count
|
127
127
|
when 0 then ''
|
128
128
|
when 1 then buffered_extractions.first
|
@@ -130,8 +130,13 @@ class JsRegex
|
|
130
130
|
end
|
131
131
|
end
|
132
132
|
|
133
|
-
def
|
134
|
-
"[#{
|
133
|
+
def finalize_nondepleted_set(buffered_members, buffered_extractions)
|
134
|
+
set = "[#{'^' if context.negative_set?(1)}#{buffered_members.join}]"
|
135
|
+
if buffered_extractions.empty?
|
136
|
+
set
|
137
|
+
else
|
138
|
+
"(?:#{set}|#{buffered_extractions.join('|')})"
|
139
|
+
end
|
135
140
|
end
|
136
141
|
end
|
137
142
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'base'
|
2
4
|
|
3
5
|
class JsRegex
|
@@ -6,8 +8,8 @@ class JsRegex
|
|
6
8
|
# Template class implementation.
|
7
9
|
#
|
8
10
|
class TypeConverter < JsRegex::Converter::Base
|
9
|
-
HEX_EXPANSION = '[A-Fa-f0-9]'
|
10
|
-
NONHEX_EXPANSION = '[^A-Fa-f0-9]'
|
11
|
+
HEX_EXPANSION = '[A-Fa-f0-9]'
|
12
|
+
NONHEX_EXPANSION = '[^A-Fa-f0-9]'
|
11
13
|
|
12
14
|
private
|
13
15
|
|
@@ -15,7 +17,7 @@ class JsRegex
|
|
15
17
|
case subtype
|
16
18
|
when :hex then HEX_EXPANSION
|
17
19
|
when :nonhex then NONHEX_EXPANSION
|
18
|
-
when :
|
20
|
+
when :digit, :nondigit, :word, :nonword, :space, :nonspace
|
19
21
|
pass_through
|
20
22
|
else
|
21
23
|
warn_of_unsupported_feature
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
#
|
3
4
|
# This hash maps named properties that are available in Ruby's ::Regexp to
|
@@ -11,8 +12,6 @@
|
|
11
12
|
# Note that the names emitted by Scanner are slightly inconsistent at times,
|
12
13
|
# e.g. 'grapheme_extend' vs. 'other_grapheme_extended'.
|
13
14
|
#
|
14
|
-
# Surrogate blocks are left out because Ruby sees them as invalid unicode range.
|
15
|
-
#
|
16
15
|
# rubocop:disable ClassLength, LineLength
|
17
16
|
#
|
18
17
|
class JsRegex
|
@@ -99,6 +98,8 @@ class JsRegex
|
|
99
98
|
block_inhangul_syllables: '[\uAC00-\uD7AF]',
|
100
99
|
block_inhanunoo: '[\u1720-\u173F]',
|
101
100
|
block_inhebrew: '[\u0590-\u05FF]',
|
101
|
+
block_inhigh_private_use_surrogates: '[\uDB80–\uDBFF]',
|
102
|
+
block_inhigh_surrogates: '[\uD800–\uDBFF]',
|
102
103
|
block_inhiragana: '[\u3040-\u309F]',
|
103
104
|
block_inideographic_description_characters: '[\u2FF0-\u2FFF]',
|
104
105
|
block_inipa_extensions: '[\u0250-\u02AF]',
|
@@ -116,6 +117,7 @@ class JsRegex
|
|
116
117
|
block_inlatin_extended_b: '[\u0180-\u024F]',
|
117
118
|
block_inletterlike_symbols: '[\u2100-\u214F]',
|
118
119
|
block_inlimbu: '[\u1900-\u194F]',
|
120
|
+
block_inlow_surrogates: '[\uDC00–\uDFFF]',
|
119
121
|
block_inmalayalam: '[\u0D00-\u0D7F]',
|
120
122
|
block_inmathematical_operators: '[\u2200-\u22FF]',
|
121
123
|
block_inmiscellaneous_mathematical_symbols_a: '[\u27C0-\u27EF]',
|
@@ -310,6 +312,7 @@ class JsRegex
|
|
310
312
|
separator_space: '[\x20\u00A0\u1680\u2000-\u200A\u202F\u205F\u3000]',
|
311
313
|
soft_dotted: '[\u0069-\u006A\u012F\u0249\u0268\u029D\u02B2\u03F3\u0456\u0458\u1D62\u1D96\u1DA4\u1DA8\u1E2D\u1ECB\u2071\u2148-\u2149\u2C7C]',
|
312
314
|
space: '[\s]',
|
315
|
+
surrogate: '[\uD800-\uDFFF]',
|
313
316
|
symbol: '[\x24\x2B\x3C-\x3E\x5E\x60\x7C\x7E\u00A2-\u00A6\u00A8\u00A9\u00AC\u00AE-\u00B1\u00B4\u00B8\u00D7\u00F7\u02C2-\u02C5\u02D2-\u02DF\u02E5-\u02EB\u02ED\u02EF-\u02FF\u0375\u0384\u0385\u03F6\u0482\u058D-\u058F\u0606-\u0608\u060B\u060E\u060F\u06DE\u06E9\u06FD\u06FE\u07F6\u09F2\u09F3\u09FA\u09FB\u0AF1\u0B70\u0BF3-\u0BFA\u0C7F\u0D79\u0E3F\u0F01-\u0F03\u0F13\u0F15-\u0F17\u0F1A-\u0F1F\u0F34\u0F36\u0F38\u0FBE-\u0FC5\u0FC7-\u0FCC\u0FCE\u0FCF\u0FD5-\u0FD8\u109E\u109F\u1390-\u1399\u17DB\u1940\u19DE-\u19FF\u1B61-\u1B6A\u1B74-\u1B7C\u1FBD\u1FBF-\u1FC1\u1FCD-\u1FCF\u1FDD-\u1FDF\u1FED-\u1FEF\u1FFD\u1FFE\u2044\u2052\u207A-\u207C\u208A-\u208C\u20A0-\u20BD\u2100\u2101\u2103-\u2106\u2108\u2109\u2114\u2116-\u2118\u211E-\u2123\u2125\u2127\u2129\u212E\u213A\u213B\u2140-\u2144\u214A-\u214D\u214F\u2190-\u2307\u230C-\u2328\u232B-\u23FA\u2400-\u2426\u2440-\u244A\u249C-\u24E9\u2500-\u2767\u2794-\u27C4\u27C7-\u27E5\u27F0-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2B73\u2B76-\u2B95\u2B98-\u2BB9\u2BBD-\u2BC8\u2BCA-\u2BD1\u2CE5-\u2CEA\u2E80-\u2E99\u2E9B-\u2EF3\u2F00-\u2FD5\u2FF0-\u2FFB\u3004\u3012\u3013\u3020\u3036\u3037\u303E\u303F\u309B\u309C\u3190\u3191\u3196-\u319F\u31C0-\u31E3\u3200-\u321E\u322A-\u3247\u3250\u3260-\u327F\u328A-\u32B0\u32C0-\u32FE\u3300-\u33FF\u4DC0-\u4DFF\uA490-\uA4C6\uA700-\uA716\uA720\uA721\uA789\uA78A\uA828-\uA82B\uA836-\uA839\uAA77-\uAA79\uAB5B\uFB29\uFBB2-\uFBC1\uFDFC\uFDFD\uFE62\uFE64-\uFE66\uFE69\uFF04\uFF0B\uFF1C-\uFF1E\uFF3E\uFF40\uFF5C\uFF5E\uFFE0-\uFFE6\uFFE8-\uFFEE\uFFFC\uFFFD]',
|
314
317
|
symbol_currency: '[\x24\u00A2-\u00A5\u058F\u060B\u09F2\u09F3\u09FB\u0AF1\u0BF9\u0E3F\u17DB\u20A0-\u20BD\uA838\uFDFC\uFE69\uFF04\uFFE0\uFFE1\uFFE5\uFFE6]',
|
315
318
|
symbol_math: '[\x2B\x3C-\x3E\x7C\x7E\u00AC\u00B1\u00D7\u00F7\u03F6\u0606-\u0608\u2044\u2052\u207A-\u207C\u208A-\u208C\u2118\u2140-\u2144\u214B\u2190-\u2194\u219A\u219B\u21A0\u21A3\u21A6\u21AE\u21CE\u21CF\u21D2\u21D4\u21F4-\u22FF\u2320\u2321\u237C\u239B-\u23B3\u23DC-\u23E1\u25B7\u25C1\u25F8-\u25FF\u266F\u27C0-\u27C4\u27C7-\u27E5\u27F0-\u27FF\u2900-\u2982\u2999-\u29D7\u29DC-\u29FB\u29FE-\u2AFF\u2B30-\u2B44\u2B47-\u2B4C\uFB29\uFE62\uFE64-\uFE66\uFF0B\uFF1C-\uFF1E\uFF5C\uFF5E\uFFE2\uFFE9-\uFFEC]',
|
data/lib/js_regex.rb
CHANGED
metadata
CHANGED
@@ -1,59 +1,65 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: js_regex
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: regexp_parser
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
|
-
- -
|
17
|
+
- - ">="
|
18
18
|
- !ruby/object:Gem::Version
|
19
19
|
version: 0.3.6
|
20
|
+
- - "<="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: 0.4.1
|
20
23
|
type: :runtime
|
21
24
|
prerelease: false
|
22
25
|
version_requirements: !ruby/object:Gem::Requirement
|
23
26
|
requirements:
|
24
|
-
- -
|
27
|
+
- - ">="
|
25
28
|
- !ruby/object:Gem::Version
|
26
29
|
version: 0.3.6
|
30
|
+
- - "<="
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: 0.4.1
|
27
33
|
- !ruby/object:Gem::Dependency
|
28
|
-
name:
|
34
|
+
name: rake
|
29
35
|
requirement: !ruby/object:Gem::Requirement
|
30
36
|
requirements:
|
31
37
|
- - "~>"
|
32
38
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
39
|
+
version: '11.3'
|
34
40
|
type: :development
|
35
41
|
prerelease: false
|
36
42
|
version_requirements: !ruby/object:Gem::Requirement
|
37
43
|
requirements:
|
38
44
|
- - "~>"
|
39
45
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
46
|
+
version: '11.3'
|
41
47
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
48
|
+
name: rspec-core
|
43
49
|
requirement: !ruby/object:Gem::Requirement
|
44
50
|
requirements:
|
45
51
|
- - "~>"
|
46
52
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
53
|
+
version: '3.5'
|
48
54
|
type: :development
|
49
55
|
prerelease: false
|
50
56
|
version_requirements: !ruby/object:Gem::Requirement
|
51
57
|
requirements:
|
52
58
|
- - "~>"
|
53
59
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
60
|
+
version: '3.5'
|
55
61
|
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec-
|
62
|
+
name: rspec-expectations
|
57
63
|
requirement: !ruby/object:Gem::Requirement
|
58
64
|
requirements:
|
59
65
|
- - "~>"
|
@@ -67,7 +73,7 @@ dependencies:
|
|
67
73
|
- !ruby/object:Gem::Version
|
68
74
|
version: '3.5'
|
69
75
|
- !ruby/object:Gem::Dependency
|
70
|
-
name: rspec-
|
76
|
+
name: rspec-mocks
|
71
77
|
requirement: !ruby/object:Gem::Requirement
|
72
78
|
requirements:
|
73
79
|
- - "~>"
|
@@ -108,6 +114,20 @@ dependencies:
|
|
108
114
|
- - "~>"
|
109
115
|
- !ruby/object:Gem::Version
|
110
116
|
version: '0.6'
|
117
|
+
- !ruby/object:Gem::Dependency
|
118
|
+
name: mutant-rspec
|
119
|
+
requirement: !ruby/object:Gem::Requirement
|
120
|
+
requirements:
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: '0'
|
124
|
+
type: :development
|
125
|
+
prerelease: false
|
126
|
+
version_requirements: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - ">="
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '0'
|
111
131
|
description: JsRegex converts Ruby's native regular expressions for JavaScript, taking
|
112
132
|
care of various incompatibilities and returning warnings for unsolvable differences.
|
113
133
|
email:
|
@@ -136,6 +156,7 @@ files:
|
|
136
156
|
- lib/js_regex/converter/type_converter.rb
|
137
157
|
- lib/js_regex/converter/unsupported_token_converter.rb
|
138
158
|
- lib/js_regex/property_map.rb
|
159
|
+
- lib/js_regex/version.rb
|
139
160
|
homepage: https://github.com/janosch-x/js_regex
|
140
161
|
licenses:
|
141
162
|
- MIT
|
@@ -148,7 +169,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
169
|
requirements:
|
149
170
|
- - ">="
|
150
171
|
- !ruby/object:Gem::Version
|
151
|
-
version:
|
172
|
+
version: 1.9.1
|
152
173
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
174
|
requirements:
|
154
175
|
- - ">="
|
@@ -156,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
156
177
|
version: '0'
|
157
178
|
requirements: []
|
158
179
|
rubyforge_project:
|
159
|
-
rubygems_version: 2.5.
|
180
|
+
rubygems_version: 2.5.2
|
160
181
|
signing_key:
|
161
182
|
specification_version: 4
|
162
183
|
summary: Converts Ruby regexes to JavaScript regexes.
|