js_regex 2.2.2 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/js_regex.rb +2 -2
- data/lib/js_regex/conversion.rb +9 -9
- data/lib/js_regex/converter.rb +11 -0
- data/lib/js_regex/converter/backreference_converter.rb +46 -12
- data/lib/js_regex/converter/context.rb +8 -24
- data/lib/js_regex/converter/escape_converter.rb +37 -59
- data/lib/js_regex/converter/group_converter.rb +2 -3
- data/lib/js_regex/converter/property_converter.rb +24 -21
- data/lib/js_regex/converter/set_converter.rb +36 -127
- data/lib/js_regex/converter/type_converter.rb +12 -3
- data/lib/js_regex/version.rb +1 -1
- metadata +34 -13
- data/lib/js_regex/property_map.rb +0 -338
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4f8f83148f3bcfeb5262259d0893fe92db40e59f64627c4430deee7eaee194c2
|
4
|
+
data.tar.gz: 9cf144827bd01a075552cf12bfd16152c417e82eab064f1fa6a65133381d95ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bf9b4ff58756d2f12be785a803fda5e75aeffd556cdd49860e7474caf963957414b11e9fd1f3d35c6aee90375f3f23dc4435033ee1d3ba086534fdd7cf8d7caf
|
7
|
+
data.tar.gz: b6d4e6dd07949b8fa3394e4868214d1a1977ee3fe65713c7eb000cdffc50e1d485be9af2f7fcffcc1893c883eb36cd4bd5c4c687b2aebabd62f2454820f57db5
|
data/lib/js_regex.rb
CHANGED
@@ -15,8 +15,8 @@ class JsRegex
|
|
15
15
|
|
16
16
|
attr_reader :source, :options, :warnings
|
17
17
|
|
18
|
-
def initialize(ruby_regex)
|
19
|
-
@source, @options, @warnings = Conversion.of(ruby_regex)
|
18
|
+
def initialize(ruby_regex, options: nil)
|
19
|
+
@source, @options, @warnings = Conversion.of(ruby_regex, options: options)
|
20
20
|
end
|
21
21
|
|
22
22
|
def to_h
|
data/lib/js_regex/conversion.rb
CHANGED
@@ -11,26 +11,26 @@ class JsRegex
|
|
11
11
|
require_relative 'converter'
|
12
12
|
|
13
13
|
class << self
|
14
|
-
def of(ruby_regex)
|
14
|
+
def of(ruby_regex, options: nil)
|
15
15
|
source, warnings = convert_source(ruby_regex)
|
16
|
-
|
17
|
-
[source,
|
16
|
+
options_string = convert_options(ruby_regex, options)
|
17
|
+
[source, options_string, warnings]
|
18
18
|
end
|
19
19
|
|
20
20
|
private
|
21
21
|
|
22
22
|
def convert_source(ruby_regex)
|
23
|
-
context
|
24
|
-
expression_tree = Regexp::Parser.parse(ruby_regex)
|
23
|
+
context = Converter::Context.new(ruby_regex)
|
25
24
|
[
|
26
|
-
Converter::RootConverter.new.convert(
|
25
|
+
Converter::RootConverter.new.convert(context.ast, context),
|
27
26
|
context.warnings
|
28
27
|
]
|
29
28
|
end
|
30
29
|
|
31
|
-
def convert_options(ruby_regex)
|
32
|
-
|
33
|
-
|
30
|
+
def convert_options(ruby_regex, custom_options)
|
31
|
+
options = custom_options.to_s.scan(/[gimuy]/)
|
32
|
+
options << 'i' if (ruby_regex.options & Regexp::IGNORECASE).nonzero?
|
33
|
+
options.uniq.sort.join
|
34
34
|
end
|
35
35
|
end
|
36
36
|
end
|
data/lib/js_regex/converter.rb
CHANGED
@@ -25,5 +25,16 @@ class JsRegex
|
|
25
25
|
def self.for(expression)
|
26
26
|
MAP[expression.type].new
|
27
27
|
end
|
28
|
+
|
29
|
+
# Limit the number of generated surrogate pairs, else the output might
|
30
|
+
# get to large for certain applications. The chosen number is somewhat
|
31
|
+
# arbitrary. 100 pairs make for about 1 KB, uncompressed. The median char
|
32
|
+
# count of all properties supported by Ruby is 92. 75% are below 300 chars.
|
33
|
+
#
|
34
|
+
# Set this to nil if you need full unicode matches and size doesn't matter.
|
35
|
+
class << self
|
36
|
+
attr_accessor :surrogate_pair_limit
|
37
|
+
end
|
38
|
+
self.surrogate_pair_limit = 300
|
28
39
|
end
|
29
40
|
end
|
@@ -12,29 +12,63 @@ class JsRegex
|
|
12
12
|
|
13
13
|
def convert_data
|
14
14
|
case subtype
|
15
|
-
when :
|
16
|
-
|
17
|
-
when :number_rel_ref
|
18
|
-
|
19
|
-
when :
|
20
|
-
|
21
|
-
else
|
15
|
+
when :name_ref then convert_name_ref
|
16
|
+
when :number, :number_ref then convert_number_ref
|
17
|
+
when :number_rel_ref then convert_number_rel_ref
|
18
|
+
when :name_call then convert_name_call
|
19
|
+
when :number_call then convert_number_call
|
20
|
+
when :number_rel_call then convert_number_rel_call
|
21
|
+
else # name_recursion_ref, number_recursion_ref, ...
|
22
22
|
warn_of_unsupported_feature
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
26
|
+
def convert_name_ref
|
27
|
+
"\\#{context.named_group_positions.fetch(expression.name)}"
|
28
|
+
end
|
29
|
+
|
26
30
|
def convert_number_ref
|
27
|
-
"\\#{context.new_capturing_group_position(
|
31
|
+
"\\#{context.new_capturing_group_position(expression.number)}"
|
28
32
|
end
|
29
33
|
|
30
34
|
def convert_number_rel_ref
|
31
|
-
absolute_position = Integer(expression.number) +
|
32
|
-
context.original_capturing_group_count + 1
|
33
35
|
"\\#{context.new_capturing_group_position(absolute_position)}"
|
34
36
|
end
|
35
37
|
|
36
|
-
def
|
37
|
-
|
38
|
+
def absolute_position
|
39
|
+
expression.number + context.original_capturing_group_count + 1
|
40
|
+
end
|
41
|
+
|
42
|
+
def convert_name_call
|
43
|
+
replace_with_group do |group|
|
44
|
+
group.token == :named && group.name == expression.name
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
def convert_number_call
|
49
|
+
if expression.number == 0
|
50
|
+
return warn_of_unsupported_feature('whole-pattern recursion')
|
51
|
+
end
|
52
|
+
replace_with_group do |group|
|
53
|
+
[:capture, :options].include?(group.token) &&
|
54
|
+
group.number.equal?(expression.number)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def convert_number_rel_call
|
59
|
+
replace_with_group do |group|
|
60
|
+
[:capture, :options].include?(group.token) &&
|
61
|
+
group.number.equal?(absolute_position)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def replace_with_group
|
66
|
+
context.ast.each_expression do |subexp|
|
67
|
+
if subexp.type == :group && yield(subexp)
|
68
|
+
return Converter.for(subexp).convert(subexp, context)
|
69
|
+
end
|
70
|
+
end
|
71
|
+
''
|
38
72
|
end
|
39
73
|
end
|
40
74
|
end
|
@@ -8,12 +8,10 @@ class JsRegex
|
|
8
8
|
# The Converters themselves are stateless.
|
9
9
|
#
|
10
10
|
class Context
|
11
|
-
attr_reader :
|
12
|
-
:buffered_set_members,
|
11
|
+
attr_reader :ast,
|
13
12
|
:case_insensitive_root,
|
14
13
|
:in_atomic_group,
|
15
14
|
:named_group_positions,
|
16
|
-
:negative_base_set,
|
17
15
|
:warnings
|
18
16
|
|
19
17
|
def initialize(ruby_regex)
|
@@ -22,20 +20,8 @@ class JsRegex
|
|
22
20
|
self.named_group_positions = {}
|
23
21
|
self.warnings = []
|
24
22
|
|
25
|
-
self.
|
26
|
-
|
27
|
-
end
|
28
|
-
|
29
|
-
# set context
|
30
|
-
|
31
|
-
def negate_base_set
|
32
|
-
self.negative_base_set = true
|
33
|
-
end
|
34
|
-
|
35
|
-
def reset_set_context
|
36
|
-
self.buffered_set_extractions = []
|
37
|
-
self.buffered_set_members = []
|
38
|
-
self.negative_base_set = false
|
23
|
+
self.ast = Regexp::Parser.parse(ruby_regex)
|
24
|
+
self.case_insensitive_root = ast.case_insensitive?
|
39
25
|
end
|
40
26
|
|
41
27
|
# group context
|
@@ -75,10 +61,6 @@ class JsRegex
|
|
75
61
|
capturing_group_count - total_added_capturing_groups
|
76
62
|
end
|
77
63
|
|
78
|
-
def total_added_capturing_groups
|
79
|
-
added_capturing_groups_after_group.values.inject(0, &:+)
|
80
|
-
end
|
81
|
-
|
82
64
|
def store_named_group_position(name)
|
83
65
|
named_group_positions[name] = capturing_group_count + 1
|
84
66
|
end
|
@@ -88,13 +70,15 @@ class JsRegex
|
|
88
70
|
attr_accessor :added_capturing_groups_after_group,
|
89
71
|
:capturing_group_count
|
90
72
|
|
91
|
-
attr_writer :
|
92
|
-
:buffered_set_members,
|
73
|
+
attr_writer :ast,
|
93
74
|
:case_insensitive_root,
|
94
75
|
:in_atomic_group,
|
95
76
|
:named_group_positions,
|
96
|
-
:negative_base_set,
|
97
77
|
:warnings
|
78
|
+
|
79
|
+
def total_added_capturing_groups
|
80
|
+
added_capturing_groups_after_group.values.inject(0, &:+)
|
81
|
+
end
|
98
82
|
end
|
99
83
|
end
|
100
84
|
end
|
@@ -9,85 +9,63 @@ class JsRegex
|
|
9
9
|
# Template class implementation.
|
10
10
|
#
|
11
11
|
class EscapeConverter < JsRegex::Converter::Base
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
12
|
+
ESCAPES_SHARED_BY_RUBY_AND_JS = %i[
|
13
|
+
alternation
|
14
|
+
backslash
|
15
|
+
backspace
|
16
|
+
bol
|
17
|
+
carriage
|
18
|
+
codepoint
|
19
|
+
dot
|
20
|
+
eol
|
21
|
+
form_feed
|
22
|
+
group_close
|
23
|
+
group_open
|
24
|
+
hex
|
25
|
+
interval_close
|
26
|
+
interval_open
|
27
|
+
newline
|
28
|
+
octal
|
29
|
+
one_or_more
|
30
|
+
set_close
|
31
|
+
set_open
|
32
|
+
tab
|
33
|
+
vertical_tab
|
34
|
+
zero_or_more
|
35
|
+
zero_or_one
|
36
36
|
].freeze
|
37
37
|
|
38
|
+
private
|
39
|
+
|
38
40
|
def convert_data
|
39
41
|
case subtype
|
40
42
|
when :codepoint_list
|
41
43
|
convert_codepoint_list
|
42
|
-
when :control
|
43
|
-
|
44
|
+
when :control, :meta_sequence
|
45
|
+
unicode_escape_codepoint
|
44
46
|
when :literal
|
45
47
|
LiteralConverter.convert_data(data)
|
46
|
-
when :meta_sequence
|
47
|
-
convert_meta_sequence
|
48
48
|
when *ESCAPES_SHARED_BY_RUBY_AND_JS
|
49
49
|
pass_through
|
50
|
+
when :bell, :escape
|
51
|
+
hex_escape_codepoint
|
50
52
|
else
|
51
|
-
# Bell, Escape, HexWide, ...
|
52
53
|
warn_of_unsupported_feature
|
53
54
|
end
|
54
55
|
end
|
55
56
|
|
56
57
|
def convert_codepoint_list
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
end
|
61
|
-
elements.join
|
62
|
-
end
|
63
|
-
|
64
|
-
def convert_control_sequence
|
65
|
-
convert_meta_control_sequence ||
|
66
|
-
unicode_escape_for(control_sequence_to_s(data))
|
67
|
-
end
|
68
|
-
|
69
|
-
def convert_meta_sequence
|
70
|
-
convert_meta_control_sequence ||
|
71
|
-
unicode_escape_for(meta_char_to_char_code(data[-1]))
|
72
|
-
end
|
73
|
-
|
74
|
-
def convert_meta_control_sequence
|
75
|
-
return unless expression.class.to_s.include?('MetaControl')
|
76
|
-
unicode_escape_for(meta_char_to_char_code(control_sequence_to_s(data)))
|
77
|
-
end
|
78
|
-
|
79
|
-
def unicode_escape_for(char)
|
80
|
-
"\\u#{char.ord.to_s(16).upcase.rjust(4, '0')}"
|
58
|
+
expression.chars.map do |char|
|
59
|
+
LiteralConverter.convert_data(Regexp.escape(char))
|
60
|
+
end.join
|
81
61
|
end
|
82
62
|
|
83
|
-
def
|
84
|
-
|
85
|
-
["000#{five_lsb}"].pack('B*')
|
63
|
+
def unicode_escape_codepoint
|
64
|
+
"\\u#{expression.codepoint.to_s(16).upcase.rjust(4, '0')}"
|
86
65
|
end
|
87
66
|
|
88
|
-
def
|
89
|
-
|
90
|
-
byte_value < 128 ? byte_value + 128 : byte_value
|
67
|
+
def hex_escape_codepoint
|
68
|
+
"\\x#{expression.codepoint.to_s(16).upcase.rjust(2, '0')}"
|
91
69
|
end
|
92
70
|
end
|
93
71
|
end
|
@@ -16,7 +16,7 @@ class JsRegex
|
|
16
16
|
when :capture then build_group
|
17
17
|
when :comment then drop_without_warning
|
18
18
|
when :named then build_named_group
|
19
|
-
when :options then build_options_group
|
19
|
+
when :options, :options_switch then build_options_group
|
20
20
|
when :passive then build_passive_group
|
21
21
|
when :absence then warn_of_unsupported_feature
|
22
22
|
else build_unsupported_group
|
@@ -44,8 +44,7 @@ class JsRegex
|
|
44
44
|
unless (encoding_options = data.scan(/[adu]/)).empty?
|
45
45
|
warn_of_unsupported_feature("encoding options #{encoding_options}")
|
46
46
|
end
|
47
|
-
|
48
|
-
switch_only = !data.include?(':')
|
47
|
+
switch_only = subtype.equal?(:options_switch)
|
49
48
|
switch_only ? drop_without_warning : build_group(head: '(')
|
50
49
|
end
|
51
50
|
|
@@ -1,7 +1,7 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'base'
|
4
|
-
|
4
|
+
require 'character_set'
|
5
5
|
|
6
6
|
class JsRegex
|
7
7
|
module Converter
|
@@ -9,24 +9,6 @@ class JsRegex
|
|
9
9
|
# Template class implementation.
|
10
10
|
#
|
11
11
|
class PropertyConverter < JsRegex::Converter::Base
|
12
|
-
class << self
|
13
|
-
def property_replacement(property_name, negated = nil)
|
14
|
-
replacement = PROPERTY_MAP[property_name.downcase.to_sym]
|
15
|
-
negated ? negated_property_replacement(replacement) : replacement
|
16
|
-
end
|
17
|
-
|
18
|
-
private
|
19
|
-
|
20
|
-
def negated_property_replacement(property_string)
|
21
|
-
return nil unless property_string
|
22
|
-
if property_string.start_with?('[^')
|
23
|
-
property_string.sub('[^', '[')
|
24
|
-
else
|
25
|
-
property_string.sub('[', '[^')
|
26
|
-
end
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
12
|
private
|
31
13
|
|
32
14
|
def convert_data
|
@@ -34,8 +16,29 @@ class JsRegex
|
|
34
16
|
end
|
35
17
|
|
36
18
|
def convert_property(negated = nil)
|
37
|
-
|
38
|
-
|
19
|
+
content = CharacterSet.of_property(subtype)
|
20
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
21
|
+
content = content.case_insensitive
|
22
|
+
end
|
23
|
+
|
24
|
+
if negated
|
25
|
+
if content.astral_part.empty?
|
26
|
+
return "[^#{content.to_s(format: :js)}]"
|
27
|
+
else
|
28
|
+
warn_of_unsupported_feature('astral plane negation by property')
|
29
|
+
end
|
30
|
+
elsif Converter.surrogate_pair_limit.nil? ||
|
31
|
+
Converter.surrogate_pair_limit >= content.astral_part.size
|
32
|
+
return content.to_s_with_surrogate_alternation
|
33
|
+
else
|
34
|
+
warn_of_unsupported_feature('large astral plane match of property')
|
35
|
+
end
|
36
|
+
|
37
|
+
bmp_part = content.bmp_part
|
38
|
+
return '' if bmp_part.empty?
|
39
|
+
|
40
|
+
string = bmp_part.to_s(format: :js)
|
41
|
+
negated ? "[^#{string}]" : "[#{string}]"
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'base'
|
4
|
-
require_relative '
|
5
|
-
require_relative '
|
4
|
+
require_relative 'escape_converter'
|
5
|
+
require_relative 'type_converter'
|
6
|
+
require 'character_set'
|
6
7
|
|
7
8
|
class JsRegex
|
8
9
|
module Converter
|
@@ -13,143 +14,51 @@ class JsRegex
|
|
13
14
|
private
|
14
15
|
|
15
16
|
def convert_data
|
16
|
-
if
|
17
|
-
|
18
|
-
|
19
|
-
process_members
|
20
|
-
finalize_set
|
21
|
-
elsif negative_set?
|
22
|
-
warn_of_unsupported_feature('nested negative set data')
|
23
|
-
else # positive subset
|
24
|
-
process_members
|
17
|
+
if directly_compatible?
|
18
|
+
return expression.to_s(:base)
|
19
|
+
.gsub(%r{\\?([\f\n\r\t])}) { Regexp.escape($1) }
|
25
20
|
end
|
26
|
-
end
|
27
|
-
|
28
|
-
def negative_set?
|
29
|
-
expression.negative?
|
30
|
-
end
|
31
|
-
|
32
|
-
def process_members
|
33
|
-
expression.each { |member| process_member(member) }
|
34
|
-
end
|
35
21
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
utf8_data = member.dup.force_encoding('UTF-8')
|
43
|
-
case utf8_data
|
44
|
-
when ASTRAL_PLANE_PATTERN
|
45
|
-
warn_of_unsupported_feature('astral plane set member')
|
46
|
-
when '\\h'
|
47
|
-
handle_hex_type
|
48
|
-
when '\\H'
|
49
|
-
handle_nonhex_type
|
50
|
-
when '&&'
|
51
|
-
warn_of_unsupported_feature('set intersection')
|
52
|
-
when PROPERTY_PATTERN
|
53
|
-
handle_property($1, $2, $3)
|
54
|
-
else
|
55
|
-
handle_literal(utf8_data)
|
22
|
+
content = CharacterSet.of_expression(expression)
|
23
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
24
|
+
content = content.case_insensitive
|
25
|
+
elsif !expression.case_insensitive? && context.case_insensitive_root
|
26
|
+
warn_of_unsupported_feature('nested case-sensitive set')
|
56
27
|
end
|
57
|
-
end
|
58
|
-
|
59
|
-
HEX_RANGES = 'A-Fa-f0-9'
|
60
|
-
NONHEX_SET = '[^A-Fa-f0-9]'
|
61
28
|
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
def handle_nonhex_type
|
67
|
-
if context.negative_base_set
|
68
|
-
warn_of_unsupported_feature('nonhex type in negative set')
|
29
|
+
if Converter.surrogate_pair_limit.nil? ||
|
30
|
+
Converter.surrogate_pair_limit >= content.astral_part.size
|
31
|
+
content.to_s_with_surrogate_alternation
|
69
32
|
else
|
70
|
-
|
33
|
+
warn_of_unsupported_feature('large astral plane match of set')
|
34
|
+
bmp_part = content.bmp_part
|
35
|
+
bmp_part.empty? ? '' : bmp_part.to_s(format: :js, in_brackets: true)
|
71
36
|
end
|
72
37
|
end
|
73
38
|
|
74
|
-
def
|
75
|
-
if context.
|
76
|
-
|
39
|
+
def directly_compatible?
|
40
|
+
if expression.case_insensitive? && !context.case_insensitive_root
|
41
|
+
# casefolding needed
|
42
|
+
return false
|
77
43
|
end
|
78
|
-
std = standardize_property_name(name)
|
79
|
-
negated = sign.eql?('P') ^ caret.eql?('^')
|
80
|
-
if (replacement = PropertyConverter.property_replacement(std, negated))
|
81
|
-
buffer_set_extraction(replacement)
|
82
|
-
else
|
83
|
-
warn_of_unsupported_feature('property')
|
84
|
-
end
|
85
|
-
end
|
86
|
-
|
87
|
-
def handle_literal(utf8_data)
|
88
|
-
conversion = LiteralConverter.convert_data(utf8_data)
|
89
|
-
if context.case_insensitive_root && !expression.case_insensitive?
|
90
|
-
warn_of_unsupported_feature('nested case-sensitive set member')
|
91
|
-
elsif !context.case_insensitive_root && expression.case_insensitive?
|
92
|
-
return handle_locally_case_insensitive_literal(conversion)
|
93
|
-
end
|
94
|
-
buffer_set_member(conversion)
|
95
|
-
end
|
96
|
-
|
97
|
-
DESCENDING_CASE_RANGE_PATTERN = /\p{upper}-\p{lower}/
|
98
44
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
)
|
105
|
-
|
106
|
-
|
107
|
-
|
45
|
+
# check for subexpressions that need conversion
|
46
|
+
expression.each_expression do |node|
|
47
|
+
case node.type
|
48
|
+
when :literal
|
49
|
+
# surrogate pair substitution needed if astral
|
50
|
+
next if node.text.force_encoding('utf-8').ord <= 0xFFFF
|
51
|
+
when :set
|
52
|
+
# conversion needed for nested sets, intersections
|
53
|
+
next if node.token.equal?(:range)
|
54
|
+
when :type
|
55
|
+
next if TypeConverter::TYPES_SHARED_BY_RUBY_AND_JS.include?(node.token)
|
56
|
+
when :escape
|
57
|
+
next if EscapeConverter::ESCAPES_SHARED_BY_RUBY_AND_JS.include?(node.token)
|
108
58
|
end
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
def standardize_property_name(name)
|
113
|
-
Regexp::Parser.parse("\\p{#{name}}").expressions.first.token
|
114
|
-
end
|
115
|
-
|
116
|
-
def buffer_set_member(data)
|
117
|
-
context.buffered_set_members << data
|
118
|
-
end
|
119
|
-
|
120
|
-
def buffer_set_extraction(data)
|
121
|
-
context.buffered_set_extractions << data
|
122
|
-
end
|
123
|
-
|
124
|
-
def convert_subset(subset)
|
125
|
-
SetConverter.new.convert(subset, context)
|
126
|
-
end
|
127
|
-
|
128
|
-
def finalize_set
|
129
|
-
buffered_members = context.buffered_set_members
|
130
|
-
buffered_extractions = context.buffered_set_extractions
|
131
|
-
if buffered_members.empty?
|
132
|
-
finalize_depleted_set(buffered_extractions)
|
133
|
-
else
|
134
|
-
finalize_nondepleted_set(buffered_members, buffered_extractions)
|
135
|
-
end
|
136
|
-
end
|
137
|
-
|
138
|
-
def finalize_depleted_set(buffered_extractions)
|
139
|
-
case buffered_extractions.count
|
140
|
-
when 0 then ''
|
141
|
-
when 1 then buffered_extractions.first
|
142
|
-
else "(?:#{buffered_extractions.join('|')})"
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
def finalize_nondepleted_set(buffered_members, buffered_extractions)
|
147
|
-
set = "[#{'^' if negative_set?}#{buffered_members.join}]"
|
148
|
-
if buffered_extractions.empty?
|
149
|
-
set
|
150
|
-
else
|
151
|
-
"(?:#{set}|#{buffered_extractions.join('|')})"
|
59
|
+
return false
|
152
60
|
end
|
61
|
+
true
|
153
62
|
end
|
154
63
|
end
|
155
64
|
end
|