js_regex 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/js_regex/conversion.rb +82 -0
- data/lib/js_regex/converter/anchor_converter.rb +24 -0
- data/lib/js_regex/converter/assertion_converter.rb +27 -0
- data/lib/js_regex/converter/base.rb +44 -0
- data/lib/js_regex/converter/conditional_converter.rb +24 -0
- data/lib/js_regex/converter/context.rb +63 -0
- data/lib/js_regex/converter/escape_converter.rb +27 -0
- data/lib/js_regex/converter/freespace_converter.rb +16 -0
- data/lib/js_regex/converter/group_converter.rb +81 -0
- data/lib/js_regex/converter/literal_converter.rb +29 -0
- data/lib/js_regex/converter/meta_converter.rb +28 -0
- data/lib/js_regex/converter/nonproperty_converter.rb +18 -0
- data/lib/js_regex/converter/property_converter.rb +40 -0
- data/lib/js_regex/converter/quantifier_converter.rb +37 -0
- data/lib/js_regex/converter/set_converter.rb +137 -0
- data/lib/js_regex/converter/subset_converter.rb +10 -0
- data/lib/js_regex/converter/type_converter.rb +26 -0
- data/lib/js_regex/converter/unsupported_token_converter.rb +16 -0
- data/lib/js_regex/property_map.rb +330 -0
- data/lib/js_regex.rb +26 -0
- metadata +107 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: d7423c167f82ba8e240c7087149430e95f08c98c
|
4
|
+
data.tar.gz: ebbf085f8aede4f731ba598c04e6d2d87f1953fb
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 651ccc96ac12d997fe49361270db2beb076954d8fe43299433370327175ff3a979e4378d2ec96b12401ec14438cc00a83ca47e607b6faac5fd43cbaea2f8d882
|
7
|
+
data.tar.gz: 1f57cab92495b0d6fb23017acde909daf8ef2a682dab21558246a3eb440859aa91d377c5be71699f08c1da50c5cea57e7acfba036f7049bcdd4e0f8e0deb2550
|
@@ -0,0 +1,82 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
# This class acts as a facade, creating specific Converters and
|
4
|
+
# passing Regexp::Scanner tokens to them, reusing Converters as needed.
|
5
|
+
#
|
6
|
+
# ::of returns a source String, options String, and warnings Array.
|
7
|
+
#
|
8
|
+
class Conversion
|
9
|
+
require 'regexp_parser'
|
10
|
+
Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
|
11
|
+
|
12
|
+
attr_reader :ruby_regex, :source, :options, :warnings
|
13
|
+
|
14
|
+
def initialize(ruby_regex)
|
15
|
+
@ruby_regex = ruby_regex
|
16
|
+
@source = ''
|
17
|
+
@options = ''
|
18
|
+
@warnings = []
|
19
|
+
|
20
|
+
convert_source(ruby_regex)
|
21
|
+
convert_options(ruby_regex)
|
22
|
+
perform_sanity_check
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.of(ruby_regex)
|
26
|
+
conversion = new(ruby_regex)
|
27
|
+
[conversion.source, conversion.options, conversion.warnings]
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def convert_source(ruby_regex)
|
33
|
+
Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
|
34
|
+
# There might be a lot of tokens, so don't wrap their data in objects.
|
35
|
+
# Even just wrapping them in simple structs or attr_reader objects
|
36
|
+
# can lead to 60%+ longer processing times for large regexes.
|
37
|
+
convert_token(token_class, subtype, data, s, e)
|
38
|
+
end
|
39
|
+
converters.clear
|
40
|
+
end
|
41
|
+
|
42
|
+
def convert_token(token_class, subtype, data, s, e)
|
43
|
+
converter = converter_for_token_class(token_class)
|
44
|
+
converter.convert(token_class, subtype, data, s, e)
|
45
|
+
end
|
46
|
+
|
47
|
+
def converter_for_token_class(token_class)
|
48
|
+
converters[token_class] ||= begin
|
49
|
+
converter_name = converter_name_for_token_class(token_class)
|
50
|
+
converter_class = JsRegex::Converter.const_get(converter_name)
|
51
|
+
converter_class.new(self, context)
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def converter_name_for_token_class(token_class)
|
56
|
+
name = "#{token_class.to_s.delete('_').capitalize}Converter"
|
57
|
+
Converter.const_defined?(name) ? name : 'UnsupportedTokenConverter'
|
58
|
+
end
|
59
|
+
|
60
|
+
def converters
|
61
|
+
@converters ||= {}
|
62
|
+
end
|
63
|
+
|
64
|
+
def context
|
65
|
+
@context ||= JsRegex::Converter::Context.new
|
66
|
+
end
|
67
|
+
|
68
|
+
def convert_options(ruby_regex)
|
69
|
+
@options = 'g' # all Ruby regexes are what is called "global" in JS
|
70
|
+
@options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
|
71
|
+
end
|
72
|
+
|
73
|
+
def perform_sanity_check
|
74
|
+
# Ruby regex capabilities are a superset of JS regex capabilities in
|
75
|
+
# the source part. So if this raises an Error, a Converter messed up:
|
76
|
+
Regexp.new(source, options)
|
77
|
+
rescue ArgumentError, RegexpError, SyntaxError => e
|
78
|
+
@source = ''
|
79
|
+
warnings << e.message
|
80
|
+
end
|
81
|
+
end
|
82
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class AnchorConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :bol, :bos then '^'
|
14
|
+
when :eol, :eos then '$'
|
15
|
+
when :eos_ob_eol then '(?=\n?$)'
|
16
|
+
when :word_boundary then '\b'
|
17
|
+
when :nonword_boundary then '\B'
|
18
|
+
else
|
19
|
+
warn_of_unsupported_feature
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'group_converter'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
# Note the inheritance from GroupConverter.
|
9
|
+
#
|
10
|
+
class AssertionConverter < JsRegex::Converter::GroupConverter
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
case subtype
|
15
|
+
when :lookahead, :nlookahead
|
16
|
+
open_assertion
|
17
|
+
when :nlookbehind
|
18
|
+
context.negative_lookbehind = true
|
19
|
+
warn_of_unsupported_feature('negative lookbehind assertion')
|
20
|
+
else # :lookbehind, ...
|
21
|
+
warn_of_unsupported_feature
|
22
|
+
open_group('(?:')
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Converter
|
3
|
+
#
|
4
|
+
# Template class. Implement #convert_data in subclasses.
|
5
|
+
#
|
6
|
+
class Base
|
7
|
+
attr_reader :target, :context
|
8
|
+
|
9
|
+
def initialize(target, context)
|
10
|
+
@target = target
|
11
|
+
@context = context
|
12
|
+
end
|
13
|
+
|
14
|
+
def convert(token_class, subtype, data, start_index, end_index)
|
15
|
+
self.token_class = token_class
|
16
|
+
self.subtype = subtype
|
17
|
+
self.data = data
|
18
|
+
self.start_index = start_index
|
19
|
+
self.end_index = end_index
|
20
|
+
|
21
|
+
target.source << (context.valid? ? convert_data : '')
|
22
|
+
end
|
23
|
+
|
24
|
+
private
|
25
|
+
|
26
|
+
attr_accessor :token_class, :subtype, :data, :start_index, :end_index
|
27
|
+
|
28
|
+
def convert_data
|
29
|
+
fail NotImplementedError
|
30
|
+
end
|
31
|
+
|
32
|
+
def pass_through
|
33
|
+
data
|
34
|
+
end
|
35
|
+
|
36
|
+
def warn_of_unsupported_feature(description = nil)
|
37
|
+
description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
|
38
|
+
target.warnings << "Dropped unsupported #{description} "\
|
39
|
+
"at index #{start_index}..#{end_index}"
|
40
|
+
''
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class ConditionalConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :open
|
14
|
+
warn_of_unsupported_feature("conditional '(?'")
|
15
|
+
'('
|
16
|
+
when :separator, :close
|
17
|
+
pass_through
|
18
|
+
else
|
19
|
+
'' # one warning is enough, don't warn about other parts
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
class JsRegex
|
2
|
+
module Converter
|
3
|
+
#
|
4
|
+
# Passed among Converters to globalize basic status data.
|
5
|
+
#
|
6
|
+
# The Converters themselves are stateless.
|
7
|
+
#
|
8
|
+
class Context
|
9
|
+
attr_accessor :buffered_set_members,
|
10
|
+
:buffered_set_extractions,
|
11
|
+
:group_level,
|
12
|
+
:group_level_for_backreference,
|
13
|
+
:group_number_for_backreference,
|
14
|
+
:negative_lookbehind,
|
15
|
+
:negative_set_levels,
|
16
|
+
:opened_groups,
|
17
|
+
:previous_quantifier_subtype,
|
18
|
+
:previous_quantifier_end,
|
19
|
+
:set_level
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
self.buffered_set_members = []
|
23
|
+
self.buffered_set_extractions = []
|
24
|
+
self.group_level = 0
|
25
|
+
self.negative_lookbehind = false
|
26
|
+
self.negative_set_levels = []
|
27
|
+
self.opened_groups = 0
|
28
|
+
self.set_level = 0
|
29
|
+
end
|
30
|
+
|
31
|
+
def valid?
|
32
|
+
!negative_lookbehind
|
33
|
+
end
|
34
|
+
|
35
|
+
# set context
|
36
|
+
|
37
|
+
def open_set
|
38
|
+
self.set_level += 1
|
39
|
+
if set_level == 1
|
40
|
+
buffered_set_members.clear
|
41
|
+
buffered_set_extractions.clear
|
42
|
+
end
|
43
|
+
self.negative_set_levels -= [set_level]
|
44
|
+
end
|
45
|
+
|
46
|
+
def negate_set
|
47
|
+
self.negative_set_levels |= [set_level]
|
48
|
+
end
|
49
|
+
|
50
|
+
def negative_set?(level = set_level)
|
51
|
+
negative_set_levels.include?(level)
|
52
|
+
end
|
53
|
+
|
54
|
+
def nested_negation?
|
55
|
+
set_level > 1 && negative_set?
|
56
|
+
end
|
57
|
+
|
58
|
+
def close_set
|
59
|
+
self.set_level -= 1
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative 'literal_converter'
|
6
|
+
#
|
7
|
+
# Template class implementation.
|
8
|
+
#
|
9
|
+
class EscapeConverter < JsRegex::Converter::Base
|
10
|
+
private
|
11
|
+
|
12
|
+
def convert_data
|
13
|
+
case subtype
|
14
|
+
when :backslash, :dot, :form_feed, :hex, :interval_close,
|
15
|
+
:interval_open, :newline, :one_or_more, :octal, :return,
|
16
|
+
:space, :tab, :vertical_tab, :zero_or_more, :zero_or_one
|
17
|
+
pass_through
|
18
|
+
when :literal
|
19
|
+
LiteralConverter.convert(data, self)
|
20
|
+
else
|
21
|
+
# Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
|
22
|
+
warn_of_unsupported_feature
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class FreespaceConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
'' # drop data without warning
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class GroupConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :atomic then open_atomic_group
|
14
|
+
when :capture, :passive then open_group
|
15
|
+
when :close then close_group
|
16
|
+
when :comment then '' # drop whole group w/o warning
|
17
|
+
when :named_ab, :named_sq then open_group('(') # drop name w/o warning
|
18
|
+
when :options then open_options_group
|
19
|
+
else
|
20
|
+
warn_of_unsupported_feature
|
21
|
+
open_group('(')
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def open_atomic_group
|
26
|
+
# Atomicity is achieved with backreferenced lookahead groups:
|
27
|
+
# http://instanceof.me/post/52245507631
|
28
|
+
# regex-emulate-atomic-grouping-with-lookahead
|
29
|
+
context.group_level_for_backreference = context.group_level
|
30
|
+
context.group_number_for_backreference = context.opened_groups + 1
|
31
|
+
open_assertion('(?=(')
|
32
|
+
end
|
33
|
+
|
34
|
+
def open_options_group
|
35
|
+
warn_of_unsupported_feature('group-specific options')
|
36
|
+
open_group('(')
|
37
|
+
end
|
38
|
+
|
39
|
+
def open_group(group_head = pass_through)
|
40
|
+
context.group_level += 1
|
41
|
+
context.opened_groups += 1
|
42
|
+
group_head
|
43
|
+
end
|
44
|
+
|
45
|
+
def open_assertion(assertion_head = pass_through)
|
46
|
+
# these don't count as opened groups for backreference purposes
|
47
|
+
context.group_level += 1
|
48
|
+
assertion_head
|
49
|
+
end
|
50
|
+
|
51
|
+
def close_group
|
52
|
+
if context.negative_lookbehind
|
53
|
+
close_negative_lookbehind
|
54
|
+
else
|
55
|
+
context.group_level -= 1
|
56
|
+
if end_of_atomic_group?
|
57
|
+
close_atomic_group
|
58
|
+
else
|
59
|
+
')'
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def close_negative_lookbehind
|
65
|
+
context.negative_lookbehind = false
|
66
|
+
''
|
67
|
+
end
|
68
|
+
|
69
|
+
def end_of_atomic_group?
|
70
|
+
return false unless context.group_level_for_backreference
|
71
|
+
context.group_level_for_backreference == context.group_level
|
72
|
+
end
|
73
|
+
|
74
|
+
def close_atomic_group
|
75
|
+
context.group_level_for_backreference = nil
|
76
|
+
# an empty passive group is appended in case literal digits follow
|
77
|
+
"))\\#{context.group_number_for_backreference}(?:)"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
81
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class LiteralConverter < JsRegex::Converter::Base
|
9
|
+
def self.convert(data, converter)
|
10
|
+
utf8_data = data.dup.force_encoding('UTF-8')
|
11
|
+
if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
|
12
|
+
converter.send(:warn_of_unsupported_feature, 'astral plane character')
|
13
|
+
else
|
14
|
+
ensure_json_compatibility(utf8_data)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def self.ensure_json_compatibility(data)
|
19
|
+
data.gsub(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def convert_data
|
25
|
+
self.class.convert(data, self)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class MetaConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
case subtype
|
13
|
+
when :alternation
|
14
|
+
pass_through
|
15
|
+
when :dot
|
16
|
+
ruby_multiline_mode? ? '(?:.|\n)' : '.'
|
17
|
+
else
|
18
|
+
warn_of_unsupported_feature
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
def ruby_multiline_mode?
|
23
|
+
return false if @rb_mm == false
|
24
|
+
@rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'property_converter'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
# Note the inheritance from PropertyConverter.
|
9
|
+
#
|
10
|
+
class NonpropertyConverter < JsRegex::Converter::PropertyConverter
|
11
|
+
private
|
12
|
+
|
13
|
+
def convert_data
|
14
|
+
convert_property(true)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative File.join('..', 'property_map')
|
6
|
+
#
|
7
|
+
# Template class implementation.
|
8
|
+
#
|
9
|
+
class PropertyConverter < JsRegex::Converter::Base
|
10
|
+
def self.property_replacement(property_name, negated = false)
|
11
|
+
replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
|
12
|
+
negated ? negated_property_replacement(replacement) : replacement
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.negated_property_replacement(property_string)
|
16
|
+
# take care not to use destructive methods on elements in the map
|
17
|
+
return nil unless property_string
|
18
|
+
if property_string.start_with?('[^')
|
19
|
+
property_string.sub('[^', '[')
|
20
|
+
elsif property_string.start_with?('[')
|
21
|
+
property_string.sub('[', '[^')
|
22
|
+
else
|
23
|
+
# it's an invertable meta char
|
24
|
+
property_string.swapcase
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def convert_data
|
31
|
+
convert_property
|
32
|
+
end
|
33
|
+
|
34
|
+
def convert_property(negated = false)
|
35
|
+
replace = self.class.property_replacement(subtype, negated)
|
36
|
+
replace || warn_of_unsupported_feature
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
#
|
6
|
+
# Template class implementation.
|
7
|
+
#
|
8
|
+
class QuantifierConverter < JsRegex::Converter::Base
|
9
|
+
private
|
10
|
+
|
11
|
+
def convert_data
|
12
|
+
if multiplicative_interval?
|
13
|
+
warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
|
14
|
+
else
|
15
|
+
context.previous_quantifier_subtype = subtype
|
16
|
+
context.previous_quantifier_end = end_index
|
17
|
+
convert_quantifier
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
def convert_quantifier
|
22
|
+
if data.length > 1 && data.end_with?('+')
|
23
|
+
warn_of_unsupported_feature('declaration of quantifier as possessive')
|
24
|
+
data[0..-2]
|
25
|
+
else
|
26
|
+
pass_through
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
def multiplicative_interval?
|
31
|
+
subtype == :interval &&
|
32
|
+
context.previous_quantifier_subtype == :interval &&
|
33
|
+
context.previous_quantifier_end == start_index
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,137 @@
|
|
1
|
+
class JsRegex
|
2
|
+
#
|
3
|
+
module Converter
|
4
|
+
require_relative 'base'
|
5
|
+
require_relative 'literal_converter'
|
6
|
+
require_relative 'property_converter'
|
7
|
+
require_relative 'type_converter'
|
8
|
+
#
|
9
|
+
# Template class implementation.
|
10
|
+
#
|
11
|
+
# This converter works a little differently from the others.
|
12
|
+
#
|
13
|
+
# It buffers anything that it finds within a set in the Context's
|
14
|
+
# #buffered_set_members and #buffered_set_extractions Arrays,
|
15
|
+
# returning an empty String for all passed tokens, and only when
|
16
|
+
# the set is closed does it compile and return the final String.
|
17
|
+
#
|
18
|
+
class SetConverter < JsRegex::Converter::Base
|
19
|
+
private
|
20
|
+
|
21
|
+
def convert_data
|
22
|
+
case subtype
|
23
|
+
when :open then convert_open_subtype
|
24
|
+
when :negate then convert_negate_subtype
|
25
|
+
when :close then convert_close_subtype
|
26
|
+
when :member, :range, :escape then convert_member_subtype
|
27
|
+
when /\Aclass_/ then convert_class_subtype
|
28
|
+
when /\Atype_/ then convert_type_subtype
|
29
|
+
when :intersection
|
30
|
+
warn_of_unsupported_feature("set intersection '&&'")
|
31
|
+
else
|
32
|
+
# TODO: I think it's a bug in Regexp::Scanner that some property
|
33
|
+
# tokens (only positive ones?) are returned with token the class :set
|
34
|
+
# within sets. If this's fixed, just warn_of_unsupported_feature here.
|
35
|
+
try_replacing_potential_property_subtype
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def convert_open_subtype
|
40
|
+
context.open_set
|
41
|
+
''
|
42
|
+
end
|
43
|
+
|
44
|
+
def convert_negate_subtype
|
45
|
+
if context.set_level > 1
|
46
|
+
warn_of_unsupported_feature('nested negative set data')
|
47
|
+
end
|
48
|
+
context.negate_set
|
49
|
+
''
|
50
|
+
end
|
51
|
+
|
52
|
+
def convert_close_subtype
|
53
|
+
context.close_set
|
54
|
+
context.set_level == 0 ? finalize_set : ''
|
55
|
+
end
|
56
|
+
|
57
|
+
def convert_member_subtype
|
58
|
+
literal_conversion = LiteralConverter.convert(data, self)
|
59
|
+
return '' if literal_conversion == ''
|
60
|
+
buffer_set_member(literal_conversion)
|
61
|
+
end
|
62
|
+
|
63
|
+
def convert_class_subtype
|
64
|
+
negated = subtype.to_s.start_with?('class_non')
|
65
|
+
name = subtype.to_s[(negated ? 9 : 6)..-1]
|
66
|
+
try_replacing_property(name, negated)
|
67
|
+
end
|
68
|
+
|
69
|
+
def try_replacing_potential_property_subtype
|
70
|
+
negated = subtype.to_s.start_with?('non')
|
71
|
+
name = negated ? subtype.to_s[3..-1] : subtype.to_s
|
72
|
+
try_replacing_property(name, negated)
|
73
|
+
end
|
74
|
+
|
75
|
+
def try_replacing_property(name, negated)
|
76
|
+
replacement = PropertyConverter.property_replacement(name, negated)
|
77
|
+
if replacement
|
78
|
+
buffer_set_extraction(replacement)
|
79
|
+
else
|
80
|
+
warn_of_unsupported_feature
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def convert_type_subtype
|
85
|
+
if subtype == :type_hex
|
86
|
+
buffer_set_extraction(TypeConverter::HEX_EXPANSION)
|
87
|
+
elsif subtype == :type_nonhex
|
88
|
+
buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
|
89
|
+
else
|
90
|
+
buffer_set_member(data)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def buffer_set_member(string)
|
95
|
+
buffered_members << string unless context.nested_negation?
|
96
|
+
''
|
97
|
+
end
|
98
|
+
|
99
|
+
def buffer_set_extraction(string)
|
100
|
+
buffered_extractions << string unless context.nested_negation?
|
101
|
+
''
|
102
|
+
end
|
103
|
+
|
104
|
+
def buffered_members
|
105
|
+
context.buffered_set_members
|
106
|
+
end
|
107
|
+
|
108
|
+
def buffered_extractions
|
109
|
+
context.buffered_set_extractions
|
110
|
+
end
|
111
|
+
|
112
|
+
def finalize_set
|
113
|
+
if buffered_members.none?
|
114
|
+
finalize_depleted_set
|
115
|
+
else
|
116
|
+
set = build_set(buffered_members, context.negative_set?(1))
|
117
|
+
if buffered_extractions.any?
|
118
|
+
"(?:#{set}|#{buffered_extractions.join('|')})"
|
119
|
+
else
|
120
|
+
set
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
|
125
|
+
def finalize_depleted_set
|
126
|
+
case buffered_extractions.count
|
127
|
+
when 0 then ''
|
128
|
+
when 1 then buffered_extractions.first
|
129
|
+
else "(?:#{buffered_extractions.join('|')})" end
|
130
|
+
end
|
131
|
+
|
132
|
+
def build_set(members, negative)
|
133
|
+
"[#{negative ? '^' : ''}#{members.join}]"
|
134
|
+
end
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|