js_regex 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/js_regex/conversion.rb +82 -0
- data/lib/js_regex/converter/anchor_converter.rb +24 -0
- data/lib/js_regex/converter/assertion_converter.rb +27 -0
- data/lib/js_regex/converter/base.rb +44 -0
- data/lib/js_regex/converter/conditional_converter.rb +24 -0
- data/lib/js_regex/converter/context.rb +63 -0
- data/lib/js_regex/converter/escape_converter.rb +27 -0
- data/lib/js_regex/converter/freespace_converter.rb +16 -0
- data/lib/js_regex/converter/group_converter.rb +81 -0
- data/lib/js_regex/converter/literal_converter.rb +29 -0
- data/lib/js_regex/converter/meta_converter.rb +28 -0
- data/lib/js_regex/converter/nonproperty_converter.rb +18 -0
- data/lib/js_regex/converter/property_converter.rb +40 -0
- data/lib/js_regex/converter/quantifier_converter.rb +37 -0
- data/lib/js_regex/converter/set_converter.rb +137 -0
- data/lib/js_regex/converter/subset_converter.rb +10 -0
- data/lib/js_regex/converter/type_converter.rb +26 -0
- data/lib/js_regex/converter/unsupported_token_converter.rb +16 -0
- data/lib/js_regex/property_map.rb +330 -0
- data/lib/js_regex.rb +26 -0
- metadata +107 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7423c167f82ba8e240c7087149430e95f08c98c
|
4
|
+
data.tar.gz: ebbf085f8aede4f731ba598c04e6d2d87f1953fb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 651ccc96ac12d997fe49361270db2beb076954d8fe43299433370327175ff3a979e4378d2ec96b12401ec14438cc00a83ca47e607b6faac5fd43cbaea2f8d882
|
7
|
+
data.tar.gz: 1f57cab92495b0d6fb23017acde909daf8ef2a682dab21558246a3eb440859aa91d377c5be71699f08c1da50c5cea57e7acfba036f7049bcdd4e0f8e0deb2550
|
@@ -0,0 +1,82 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
# This class acts as a facade, creating specific Converters and
|
4
|
+
# passing Regexp::Scanner tokens to them, reusing Converters as needed.
|
5
|
+
#
|
6
|
+
# ::of returns a source String, options String, and warnings Array.
|
7
|
+
#
|
8
|
+
class Conversion
|
9
|
+
require 'regexp_parser'
|
10
|
+
Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
|
11
|
+
|
12
|
+
attr_reader :ruby_regex, :source, :options, :warnings
|
13
|
+
|
14
|
+
def initialize(ruby_regex)
|
15
|
+
@ruby_regex = ruby_regex
|
16
|
+
@source = ''
|
17
|
+
@options = ''
|
18
|
+
@warnings = []
|
19
|
+
|
20
|
+
convert_source(ruby_regex)
|
21
|
+
convert_options(ruby_regex)
|
22
|
+
perform_sanity_check
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.of(ruby_regex)
|
26
|
+
conversion = new(ruby_regex)
|
27
|
+
[conversion.source, conversion.options, conversion.warnings]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def convert_source(ruby_regex)
|
33
|
+
Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
|
34
|
+
# There might be a lot of tokens, so don't wrap their data in objects.
|
35
|
+
# Even just wrapping them in simple structs or attr_reader objects
|
36
|
+
# can lead to 60%+ longer processing times for large regexes.
|
37
|
+
convert_token(token_class, subtype, data, s, e)
|
38
|
+
end
|
39
|
+
converters.clear
|
40
|
+
end
|
41
|
+
|
42
|
+
def convert_token(token_class, subtype, data, s, e)
|
43
|
+
converter = converter_for_token_class(token_class)
|
44
|
+
converter.convert(token_class, subtype, data, s, e)
|
45
|
+
end
|
46
|
+
|
47
|
+
def converter_for_token_class(token_class)
|
48
|
+
converters[token_class] ||= begin
|
49
|
+
converter_name = converter_name_for_token_class(token_class)
|
50
|
+
converter_class = JsRegex::Converter.const_get(converter_name)
|
51
|
+
converter_class.new(self, context)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def converter_name_for_token_class(token_class)
|
56
|
+
name = "#{token_class.to_s.delete('_').capitalize}Converter"
|
57
|
+
Converter.const_defined?(name) ? name : 'UnsupportedTokenConverter'
|
58
|
+
end
|
59
|
+
|
60
|
+
def converters
|
61
|
+
@converters ||= {}
|
62
|
+
end
|
63
|
+
|
64
|
+
def context
|
65
|
+
@context ||= JsRegex::Converter::Context.new
|
66
|
+
end
|
67
|
+
|
68
|
+
def convert_options(ruby_regex)
|
69
|
+
@options = 'g' # all Ruby regexes are what is called "global" in JS
|
70
|
+
@options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
|
71
|
+
end
|
72
|
+
|
73
|
+
def perform_sanity_check
|
74
|
+
# Ruby regex capabilities are a superset of JS regex capabilities in
|
75
|
+
# the source part. So if this raises an Error, a Converter messed up:
|
76
|
+
Regexp.new(source, options)
|
77
|
+
rescue ArgumentError, RegexpError, SyntaxError => e
|
78
|
+
@source = ''
|
79
|
+
warnings << e.message
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class AnchorConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :bol, :bos then '^'
|
14
|
+
when :eol, :eos then '$'
|
15
|
+
when :eos_ob_eol then '(?=\n?$)'
|
16
|
+
when :word_boundary then '\b'
|
17
|
+
when :nonword_boundary then '\B'
|
18
|
+
else
|
19
|
+
warn_of_unsupported_feature
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'group_converter'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
# Note the inheritance from GroupConverter.
|
9
|
+
#
|
10
|
+
class AssertionConverter < JsRegex::Converter::GroupConverter
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
case subtype
|
15
|
+
when :lookahead, :nlookahead
|
16
|
+
open_assertion
|
17
|
+
when :nlookbehind
|
18
|
+
context.negative_lookbehind = true
|
19
|
+
warn_of_unsupported_feature('negative lookbehind assertion')
|
20
|
+
else # :lookbehind, ...
|
21
|
+
warn_of_unsupported_feature
|
22
|
+
open_group('(?:')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Converter
|
3
|
+
#
|
4
|
+
# Template class. Implement #convert_data in subclasses.
|
5
|
+
#
|
6
|
+
class Base
|
7
|
+
attr_reader :target, :context
|
8
|
+
|
9
|
+
def initialize(target, context)
|
10
|
+
@target = target
|
11
|
+
@context = context
|
12
|
+
end
|
13
|
+
|
14
|
+
def convert(token_class, subtype, data, start_index, end_index)
|
15
|
+
self.token_class = token_class
|
16
|
+
self.subtype = subtype
|
17
|
+
self.data = data
|
18
|
+
self.start_index = start_index
|
19
|
+
self.end_index = end_index
|
20
|
+
|
21
|
+
target.source << (context.valid? ? convert_data : '')
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_accessor :token_class, :subtype, :data, :start_index, :end_index
|
27
|
+
|
28
|
+
def convert_data
|
29
|
+
fail NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
def pass_through
|
33
|
+
data
|
34
|
+
end
|
35
|
+
|
36
|
+
def warn_of_unsupported_feature(description = nil)
|
37
|
+
description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
|
38
|
+
target.warnings << "Dropped unsupported #{description} "\
|
39
|
+
"at index #{start_index}..#{end_index}"
|
40
|
+
''
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class ConditionalConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :open
|
14
|
+
warn_of_unsupported_feature("conditional '(?'")
|
15
|
+
'('
|
16
|
+
when :separator, :close
|
17
|
+
pass_through
|
18
|
+
else
|
19
|
+
'' # one warning is enough, don't warn about other parts
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Converter
|
3
|
+
#
|
4
|
+
# Passed among Converters to globalize basic status data.
|
5
|
+
#
|
6
|
+
# The Converters themselves are stateless.
|
7
|
+
#
|
8
|
+
class Context
|
9
|
+
attr_accessor :buffered_set_members,
|
10
|
+
:buffered_set_extractions,
|
11
|
+
:group_level,
|
12
|
+
:group_level_for_backreference,
|
13
|
+
:group_number_for_backreference,
|
14
|
+
:negative_lookbehind,
|
15
|
+
:negative_set_levels,
|
16
|
+
:opened_groups,
|
17
|
+
:previous_quantifier_subtype,
|
18
|
+
:previous_quantifier_end,
|
19
|
+
:set_level
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
self.buffered_set_members = []
|
23
|
+
self.buffered_set_extractions = []
|
24
|
+
self.group_level = 0
|
25
|
+
self.negative_lookbehind = false
|
26
|
+
self.negative_set_levels = []
|
27
|
+
self.opened_groups = 0
|
28
|
+
self.set_level = 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def valid?
|
32
|
+
!negative_lookbehind
|
33
|
+
end
|
34
|
+
|
35
|
+
# set context
|
36
|
+
|
37
|
+
def open_set
|
38
|
+
self.set_level += 1
|
39
|
+
if set_level == 1
|
40
|
+
buffered_set_members.clear
|
41
|
+
buffered_set_extractions.clear
|
42
|
+
end
|
43
|
+
self.negative_set_levels -= [set_level]
|
44
|
+
end
|
45
|
+
|
46
|
+
def negate_set
|
47
|
+
self.negative_set_levels |= [set_level]
|
48
|
+
end
|
49
|
+
|
50
|
+
def negative_set?(level = set_level)
|
51
|
+
negative_set_levels.include?(level)
|
52
|
+
end
|
53
|
+
|
54
|
+
def nested_negation?
|
55
|
+
set_level > 1 && negative_set?
|
56
|
+
end
|
57
|
+
|
58
|
+
def close_set
|
59
|
+
self.set_level -= 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative 'literal_converter'
|
6
|
+
#
|
7
|
+
# Template class implementation.
|
8
|
+
#
|
9
|
+
class EscapeConverter < JsRegex::Converter::Base
|
10
|
+
private
|
11
|
+
|
12
|
+
def convert_data
|
13
|
+
case subtype
|
14
|
+
when :backslash, :dot, :form_feed, :hex, :interval_close,
|
15
|
+
:interval_open, :newline, :one_or_more, :octal, :return,
|
16
|
+
:space, :tab, :vertical_tab, :zero_or_more, :zero_or_one
|
17
|
+
pass_through
|
18
|
+
when :literal
|
19
|
+
LiteralConverter.convert(data, self)
|
20
|
+
else
|
21
|
+
# Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
|
22
|
+
warn_of_unsupported_feature
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class FreespaceConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
'' # drop data without warning
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class GroupConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :atomic then open_atomic_group
|
14
|
+
when :capture, :passive then open_group
|
15
|
+
when :close then close_group
|
16
|
+
when :comment then '' # drop whole group w/o warning
|
17
|
+
when :named_ab, :named_sq then open_group('(') # drop name w/o warning
|
18
|
+
when :options then open_options_group
|
19
|
+
else
|
20
|
+
warn_of_unsupported_feature
|
21
|
+
open_group('(')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def open_atomic_group
|
26
|
+
# Atomicity is achieved with backreferenced lookahead groups:
|
27
|
+
# http://instanceof.me/post/52245507631
|
28
|
+
# regex-emulate-atomic-grouping-with-lookahead
|
29
|
+
context.group_level_for_backreference = context.group_level
|
30
|
+
context.group_number_for_backreference = context.opened_groups + 1
|
31
|
+
open_assertion('(?=(')
|
32
|
+
end
|
33
|
+
|
34
|
+
def open_options_group
|
35
|
+
warn_of_unsupported_feature('group-specific options')
|
36
|
+
open_group('(')
|
37
|
+
end
|
38
|
+
|
39
|
+
def open_group(group_head = pass_through)
|
40
|
+
context.group_level += 1
|
41
|
+
context.opened_groups += 1
|
42
|
+
group_head
|
43
|
+
end
|
44
|
+
|
45
|
+
def open_assertion(assertion_head = pass_through)
|
46
|
+
# these don't count as opened groups for backreference purposes
|
47
|
+
context.group_level += 1
|
48
|
+
assertion_head
|
49
|
+
end
|
50
|
+
|
51
|
+
def close_group
|
52
|
+
if context.negative_lookbehind
|
53
|
+
close_negative_lookbehind
|
54
|
+
else
|
55
|
+
context.group_level -= 1
|
56
|
+
if end_of_atomic_group?
|
57
|
+
close_atomic_group
|
58
|
+
else
|
59
|
+
')'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def close_negative_lookbehind
|
65
|
+
context.negative_lookbehind = false
|
66
|
+
''
|
67
|
+
end
|
68
|
+
|
69
|
+
def end_of_atomic_group?
|
70
|
+
return false unless context.group_level_for_backreference
|
71
|
+
context.group_level_for_backreference == context.group_level
|
72
|
+
end
|
73
|
+
|
74
|
+
def close_atomic_group
|
75
|
+
context.group_level_for_backreference = nil
|
76
|
+
# an empty passive group is appended in case literal digits follow
|
77
|
+
"))\\#{context.group_number_for_backreference}(?:)"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class LiteralConverter < JsRegex::Converter::Base
|
9
|
+
def self.convert(data, converter)
|
10
|
+
utf8_data = data.dup.force_encoding('UTF-8')
|
11
|
+
if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
|
12
|
+
converter.send(:warn_of_unsupported_feature, 'astral plane character')
|
13
|
+
else
|
14
|
+
ensure_json_compatibility(utf8_data)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.ensure_json_compatibility(data)
|
19
|
+
data.gsub(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def convert_data
|
25
|
+
self.class.convert(data, self)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class MetaConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :alternation
|
14
|
+
pass_through
|
15
|
+
when :dot
|
16
|
+
ruby_multiline_mode? ? '(?:.|\n)' : '.'
|
17
|
+
else
|
18
|
+
warn_of_unsupported_feature
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def ruby_multiline_mode?
|
23
|
+
return false if @rb_mm == false
|
24
|
+
@rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'property_converter'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
# Note the inheritance from PropertyConverter.
|
9
|
+
#
|
10
|
+
class NonpropertyConverter < JsRegex::Converter::PropertyConverter
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
convert_property(true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative File.join('..', 'property_map')
|
6
|
+
#
|
7
|
+
# Template class implementation.
|
8
|
+
#
|
9
|
+
class PropertyConverter < JsRegex::Converter::Base
|
10
|
+
def self.property_replacement(property_name, negated = false)
|
11
|
+
replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
|
12
|
+
negated ? negated_property_replacement(replacement) : replacement
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.negated_property_replacement(property_string)
|
16
|
+
# take care not to use destructive methods on elements in the map
|
17
|
+
return nil unless property_string
|
18
|
+
if property_string.start_with?('[^')
|
19
|
+
property_string.sub('[^', '[')
|
20
|
+
elsif property_string.start_with?('[')
|
21
|
+
property_string.sub('[', '[^')
|
22
|
+
else
|
23
|
+
# it's an invertable meta char
|
24
|
+
property_string.swapcase
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def convert_data
|
31
|
+
convert_property
|
32
|
+
end
|
33
|
+
|
34
|
+
def convert_property(negated = false)
|
35
|
+
replace = self.class.property_replacement(subtype, negated)
|
36
|
+
replace || warn_of_unsupported_feature
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class QuantifierConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
if multiplicative_interval?
|
13
|
+
warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
|
14
|
+
else
|
15
|
+
context.previous_quantifier_subtype = subtype
|
16
|
+
context.previous_quantifier_end = end_index
|
17
|
+
convert_quantifier
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def convert_quantifier
|
22
|
+
if data.length > 1 && data.end_with?('+')
|
23
|
+
warn_of_unsupported_feature('declaration of quantifier as possessive')
|
24
|
+
data[0..-2]
|
25
|
+
else
|
26
|
+
pass_through
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def multiplicative_interval?
|
31
|
+
subtype == :interval &&
|
32
|
+
context.previous_quantifier_subtype == :interval &&
|
33
|
+
context.previous_quantifier_end == start_index
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative 'literal_converter'
|
6
|
+
require_relative 'property_converter'
|
7
|
+
require_relative 'type_converter'
|
8
|
+
#
|
9
|
+
# Template class implementation.
|
10
|
+
#
|
11
|
+
# This converter works a little differently from the others.
|
12
|
+
#
|
13
|
+
# It buffers anything that it finds within a set in the Context's
|
14
|
+
# #buffered_set_members and #buffered_set_extractions Arrays,
|
15
|
+
# returning an empty String for all passed tokens, and only when
|
16
|
+
# the set is closed does it compile and return the final String.
|
17
|
+
#
|
18
|
+
class SetConverter < JsRegex::Converter::Base
|
19
|
+
private
|
20
|
+
|
21
|
+
def convert_data
|
22
|
+
case subtype
|
23
|
+
when :open then convert_open_subtype
|
24
|
+
when :negate then convert_negate_subtype
|
25
|
+
when :close then convert_close_subtype
|
26
|
+
when :member, :range, :escape then convert_member_subtype
|
27
|
+
when /\Aclass_/ then convert_class_subtype
|
28
|
+
when /\Atype_/ then convert_type_subtype
|
29
|
+
when :intersection
|
30
|
+
warn_of_unsupported_feature("set intersection '&&'")
|
31
|
+
else
|
32
|
+
# TODO: I think it's a bug in Regexp::Scanner that some property
|
33
|
+
# tokens (only positive ones?) are returned with token the class :set
|
34
|
+
# within sets. If this's fixed, just warn_of_unsupported_feature here.
|
35
|
+
try_replacing_potential_property_subtype
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_open_subtype
|
40
|
+
context.open_set
|
41
|
+
''
|
42
|
+
end
|
43
|
+
|
44
|
+
def convert_negate_subtype
|
45
|
+
if context.set_level > 1
|
46
|
+
warn_of_unsupported_feature('nested negative set data')
|
47
|
+
end
|
48
|
+
context.negate_set
|
49
|
+
''
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert_close_subtype
|
53
|
+
context.close_set
|
54
|
+
context.set_level == 0 ? finalize_set : ''
|
55
|
+
end
|
56
|
+
|
57
|
+
def convert_member_subtype
|
58
|
+
literal_conversion = LiteralConverter.convert(data, self)
|
59
|
+
return '' if literal_conversion == ''
|
60
|
+
buffer_set_member(literal_conversion)
|
61
|
+
end
|
62
|
+
|
63
|
+
def convert_class_subtype
|
64
|
+
negated = subtype.to_s.start_with?('class_non')
|
65
|
+
name = subtype.to_s[(negated ? 9 : 6)..-1]
|
66
|
+
try_replacing_property(name, negated)
|
67
|
+
end
|
68
|
+
|
69
|
+
def try_replacing_potential_property_subtype
|
70
|
+
negated = subtype.to_s.start_with?('non')
|
71
|
+
name = negated ? subtype.to_s[3..-1] : subtype.to_s
|
72
|
+
try_replacing_property(name, negated)
|
73
|
+
end
|
74
|
+
|
75
|
+
def try_replacing_property(name, negated)
|
76
|
+
replacement = PropertyConverter.property_replacement(name, negated)
|
77
|
+
if replacement
|
78
|
+
buffer_set_extraction(replacement)
|
79
|
+
else
|
80
|
+
warn_of_unsupported_feature
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def convert_type_subtype
|
85
|
+
if subtype == :type_hex
|
86
|
+
buffer_set_extraction(TypeConverter::HEX_EXPANSION)
|
87
|
+
elsif subtype == :type_nonhex
|
88
|
+
buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
|
89
|
+
else
|
90
|
+
buffer_set_member(data)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def buffer_set_member(string)
|
95
|
+
buffered_members << string unless context.nested_negation?
|
96
|
+
''
|
97
|
+
end
|
98
|
+
|
99
|
+
def buffer_set_extraction(string)
|
100
|
+
buffered_extractions << string unless context.nested_negation?
|
101
|
+
''
|
102
|
+
end
|
103
|
+
|
104
|
+
def buffered_members
|
105
|
+
context.buffered_set_members
|
106
|
+
end
|
107
|
+
|
108
|
+
def buffered_extractions
|
109
|
+
context.buffered_set_extractions
|
110
|
+
end
|
111
|
+
|
112
|
+
def finalize_set
|
113
|
+
if buffered_members.none?
|
114
|
+
finalize_depleted_set
|
115
|
+
else
|
116
|
+
set = build_set(buffered_members, context.negative_set?(1))
|
117
|
+
if buffered_extractions.any?
|
118
|
+
"(?:#{set}|#{buffered_extractions.join('|')})"
|
119
|
+
else
|
120
|
+
set
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def finalize_depleted_set
|
126
|
+
case buffered_extractions.count
|
127
|
+
when 0 then ''
|
128
|
+
when 1 then buffered_extractions.first
|
129
|
+
else "(?:#{buffered_extractions.join('|')})" end
|
130
|
+
end
|
131
|
+
|
132
|
+
def build_set(members, negative)
|
133
|
+
"[#{negative ? '^' : ''}#{members.join}]"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|