js_regex 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d7423c167f82ba8e240c7087149430e95f08c98c
4
+ data.tar.gz: ebbf085f8aede4f731ba598c04e6d2d87f1953fb
5
+ SHA512:
6
+ metadata.gz: 651ccc96ac12d997fe49361270db2beb076954d8fe43299433370327175ff3a979e4378d2ec96b12401ec14438cc00a83ca47e607b6faac5fd43cbaea2f8d882
7
+ data.tar.gz: 1f57cab92495b0d6fb23017acde909daf8ef2a682dab21558246a3eb440859aa91d377c5be71699f08c1da50c5cea57e7acfba036f7049bcdd4e0f8e0deb2550
@@ -0,0 +1,82 @@
1
+ class JsRegex
2
+ #
3
+ # This class acts as a facade, creating specific Converters and
4
+ # passing Regexp::Scanner tokens to them, reusing Converters as needed.
5
+ #
6
+ # ::of returns a source String, options String, and warnings Array.
7
+ #
8
+ class Conversion
9
+ require 'regexp_parser'
10
+ Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
11
+
12
+ attr_reader :ruby_regex, :source, :options, :warnings
13
+
14
+ def initialize(ruby_regex)
15
+ @ruby_regex = ruby_regex
16
+ @source = ''
17
+ @options = ''
18
+ @warnings = []
19
+
20
+ convert_source(ruby_regex)
21
+ convert_options(ruby_regex)
22
+ perform_sanity_check
23
+ end
24
+
25
+ def self.of(ruby_regex)
26
+ conversion = new(ruby_regex)
27
+ [conversion.source, conversion.options, conversion.warnings]
28
+ end
29
+
30
+ private
31
+
32
+ def convert_source(ruby_regex)
33
+ Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
34
+ # There might be a lot of tokens, so don't wrap their data in objects.
35
+ # Even just wrapping them in simple structs or attr_reader objects
36
+ # can lead to 60%+ longer processing times for large regexes.
37
+ convert_token(token_class, subtype, data, s, e)
38
+ end
39
+ converters.clear
40
+ end
41
+
42
+ def convert_token(token_class, subtype, data, s, e)
43
+ converter = converter_for_token_class(token_class)
44
+ converter.convert(token_class, subtype, data, s, e)
45
+ end
46
+
47
+ def converter_for_token_class(token_class)
48
+ converters[token_class] ||= begin
49
+ converter_name = converter_name_for_token_class(token_class)
50
+ converter_class = JsRegex::Converter.const_get(converter_name)
51
+ converter_class.new(self, context)
52
+ end
53
+ end
54
+
55
+ def converter_name_for_token_class(token_class)
56
+ name = "#{token_class.to_s.delete('_').capitalize}Converter"
57
+ Converter.const_defined?(name) ? name : 'UnsupportedTokenConverter'
58
+ end
59
+
60
+ def converters
61
+ @converters ||= {}
62
+ end
63
+
64
+ def context
65
+ @context ||= JsRegex::Converter::Context.new
66
+ end
67
+
68
+ def convert_options(ruby_regex)
69
+ @options = 'g' # all Ruby regexes are what is called "global" in JS
70
+ @options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
71
+ end
72
+
73
+ def perform_sanity_check
74
+ # Ruby regex capabilities are a superset of JS regex capabilities in
75
+ # the source part. So if this raises an Error, a Converter messed up:
76
+ Regexp.new(source, options)
77
+ rescue ArgumentError, RegexpError, SyntaxError => e
78
+ @source = ''
79
+ warnings << e.message
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,24 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class AnchorConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :bol, :bos then '^'
14
+ when :eol, :eos then '$'
15
+ when :eos_ob_eol then '(?=\n?$)'
16
+ when :word_boundary then '\b'
17
+ when :nonword_boundary then '\B'
18
+ else
19
+ warn_of_unsupported_feature
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,27 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'group_converter'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ # Note the inheritance from GroupConverter.
9
+ #
10
+ class AssertionConverter < JsRegex::Converter::GroupConverter
11
+ private
12
+
13
+ def convert_data
14
+ case subtype
15
+ when :lookahead, :nlookahead
16
+ open_assertion
17
+ when :nlookbehind
18
+ context.negative_lookbehind = true
19
+ warn_of_unsupported_feature('negative lookbehind assertion')
20
+ else # :lookbehind, ...
21
+ warn_of_unsupported_feature
22
+ open_group('(?:')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,44 @@
1
+ class JsRegex
2
+ module Converter
3
+ #
4
+ # Template class. Implement #convert_data in subclasses.
5
+ #
6
+ class Base
7
+ attr_reader :target, :context
8
+
9
+ def initialize(target, context)
10
+ @target = target
11
+ @context = context
12
+ end
13
+
14
+ def convert(token_class, subtype, data, start_index, end_index)
15
+ self.token_class = token_class
16
+ self.subtype = subtype
17
+ self.data = data
18
+ self.start_index = start_index
19
+ self.end_index = end_index
20
+
21
+ target.source << (context.valid? ? convert_data : '')
22
+ end
23
+
24
+ private
25
+
26
+ attr_accessor :token_class, :subtype, :data, :start_index, :end_index
27
+
28
+ def convert_data
29
+ fail NotImplementedError
30
+ end
31
+
32
+ def pass_through
33
+ data
34
+ end
35
+
36
+ def warn_of_unsupported_feature(description = nil)
37
+ description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
38
+ target.warnings << "Dropped unsupported #{description} "\
39
+ "at index #{start_index}..#{end_index}"
40
+ ''
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,24 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class ConditionalConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :open
14
+ warn_of_unsupported_feature("conditional '(?'")
15
+ '('
16
+ when :separator, :close
17
+ pass_through
18
+ else
19
+ '' # one warning is enough, don't warn about other parts
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,63 @@
1
+ class JsRegex
2
+ module Converter
3
+ #
4
+ # Passed among Converters to globalize basic status data.
5
+ #
6
+ # The Converters themselves are stateless.
7
+ #
8
+ class Context
9
+ attr_accessor :buffered_set_members,
10
+ :buffered_set_extractions,
11
+ :group_level,
12
+ :group_level_for_backreference,
13
+ :group_number_for_backreference,
14
+ :negative_lookbehind,
15
+ :negative_set_levels,
16
+ :opened_groups,
17
+ :previous_quantifier_subtype,
18
+ :previous_quantifier_end,
19
+ :set_level
20
+
21
+ def initialize
22
+ self.buffered_set_members = []
23
+ self.buffered_set_extractions = []
24
+ self.group_level = 0
25
+ self.negative_lookbehind = false
26
+ self.negative_set_levels = []
27
+ self.opened_groups = 0
28
+ self.set_level = 0
29
+ end
30
+
31
+ def valid?
32
+ !negative_lookbehind
33
+ end
34
+
35
+ # set context
36
+
37
+ def open_set
38
+ self.set_level += 1
39
+ if set_level == 1
40
+ buffered_set_members.clear
41
+ buffered_set_extractions.clear
42
+ end
43
+ self.negative_set_levels -= [set_level]
44
+ end
45
+
46
+ def negate_set
47
+ self.negative_set_levels |= [set_level]
48
+ end
49
+
50
+ def negative_set?(level = set_level)
51
+ negative_set_levels.include?(level)
52
+ end
53
+
54
+ def nested_negation?
55
+ set_level > 1 && negative_set?
56
+ end
57
+
58
+ def close_set
59
+ self.set_level -= 1
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,27 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative 'literal_converter'
6
+ #
7
+ # Template class implementation.
8
+ #
9
+ class EscapeConverter < JsRegex::Converter::Base
10
+ private
11
+
12
+ def convert_data
13
+ case subtype
14
+ when :backslash, :dot, :form_feed, :hex, :interval_close,
15
+ :interval_open, :newline, :one_or_more, :octal, :return,
16
+ :space, :tab, :vertical_tab, :zero_or_more, :zero_or_one
17
+ pass_through
18
+ when :literal
19
+ LiteralConverter.convert(data, self)
20
+ else
21
+ # Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
22
+ warn_of_unsupported_feature
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,16 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class FreespaceConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ '' # drop data without warning
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,81 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class GroupConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :atomic then open_atomic_group
14
+ when :capture, :passive then open_group
15
+ when :close then close_group
16
+ when :comment then '' # drop whole group w/o warning
17
+ when :named_ab, :named_sq then open_group('(') # drop name w/o warning
18
+ when :options then open_options_group
19
+ else
20
+ warn_of_unsupported_feature
21
+ open_group('(')
22
+ end
23
+ end
24
+
25
+ def open_atomic_group
26
+ # Atomicity is achieved with backreferenced lookahead groups:
27
+ # http://instanceof.me/post/52245507631
28
+ # regex-emulate-atomic-grouping-with-lookahead
29
+ context.group_level_for_backreference = context.group_level
30
+ context.group_number_for_backreference = context.opened_groups + 1
31
+ open_assertion('(?=(')
32
+ end
33
+
34
+ def open_options_group
35
+ warn_of_unsupported_feature('group-specific options')
36
+ open_group('(')
37
+ end
38
+
39
+ def open_group(group_head = pass_through)
40
+ context.group_level += 1
41
+ context.opened_groups += 1
42
+ group_head
43
+ end
44
+
45
+ def open_assertion(assertion_head = pass_through)
46
+ # these don't count as opened groups for backreference purposes
47
+ context.group_level += 1
48
+ assertion_head
49
+ end
50
+
51
+ def close_group
52
+ if context.negative_lookbehind
53
+ close_negative_lookbehind
54
+ else
55
+ context.group_level -= 1
56
+ if end_of_atomic_group?
57
+ close_atomic_group
58
+ else
59
+ ')'
60
+ end
61
+ end
62
+ end
63
+
64
+ def close_negative_lookbehind
65
+ context.negative_lookbehind = false
66
+ ''
67
+ end
68
+
69
+ def end_of_atomic_group?
70
+ return false unless context.group_level_for_backreference
71
+ context.group_level_for_backreference == context.group_level
72
+ end
73
+
74
+ def close_atomic_group
75
+ context.group_level_for_backreference = nil
76
+ # an empty passive group is appended in case literal digits follow
77
+ "))\\#{context.group_number_for_backreference}(?:)"
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,29 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class LiteralConverter < JsRegex::Converter::Base
9
+ def self.convert(data, converter)
10
+ utf8_data = data.dup.force_encoding('UTF-8')
11
+ if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
12
+ converter.send(:warn_of_unsupported_feature, 'astral plane character')
13
+ else
14
+ ensure_json_compatibility(utf8_data)
15
+ end
16
+ end
17
+
18
+ def self.ensure_json_compatibility(data)
19
+ data.gsub(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
20
+ end
21
+
22
+ private
23
+
24
+ def convert_data
25
+ self.class.convert(data, self)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,28 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class MetaConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :alternation
14
+ pass_through
15
+ when :dot
16
+ ruby_multiline_mode? ? '(?:.|\n)' : '.'
17
+ else
18
+ warn_of_unsupported_feature
19
+ end
20
+ end
21
+
22
+ def ruby_multiline_mode?
23
+ return false if @rb_mm == false
24
+ @rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,18 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'property_converter'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ # Note the inheritance from PropertyConverter.
9
+ #
10
+ class NonpropertyConverter < JsRegex::Converter::PropertyConverter
11
+ private
12
+
13
+ def convert_data
14
+ convert_property(true)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,40 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative File.join('..', 'property_map')
6
+ #
7
+ # Template class implementation.
8
+ #
9
+ class PropertyConverter < JsRegex::Converter::Base
10
+ def self.property_replacement(property_name, negated = false)
11
+ replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
12
+ negated ? negated_property_replacement(replacement) : replacement
13
+ end
14
+
15
+ def self.negated_property_replacement(property_string)
16
+ # take care not to use destructive methods on elements in the map
17
+ return nil unless property_string
18
+ if property_string.start_with?('[^')
19
+ property_string.sub('[^', '[')
20
+ elsif property_string.start_with?('[')
21
+ property_string.sub('[', '[^')
22
+ else
23
+ # it's an invertable meta char
24
+ property_string.swapcase
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def convert_data
31
+ convert_property
32
+ end
33
+
34
+ def convert_property(negated = false)
35
+ replace = self.class.property_replacement(subtype, negated)
36
+ replace || warn_of_unsupported_feature
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,37 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class QuantifierConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ if multiplicative_interval?
13
+ warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
14
+ else
15
+ context.previous_quantifier_subtype = subtype
16
+ context.previous_quantifier_end = end_index
17
+ convert_quantifier
18
+ end
19
+ end
20
+
21
+ def convert_quantifier
22
+ if data.length > 1 && data.end_with?('+')
23
+ warn_of_unsupported_feature('declaration of quantifier as possessive')
24
+ data[0..-2]
25
+ else
26
+ pass_through
27
+ end
28
+ end
29
+
30
+ def multiplicative_interval?
31
+ subtype == :interval &&
32
+ context.previous_quantifier_subtype == :interval &&
33
+ context.previous_quantifier_end == start_index
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,137 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative 'literal_converter'
6
+ require_relative 'property_converter'
7
+ require_relative 'type_converter'
8
+ #
9
+ # Template class implementation.
10
+ #
11
+ # This converter works a little differently from the others.
12
+ #
13
+ # It buffers anything that it finds within a set in the Context's
14
+ # #buffered_set_members and #buffered_set_extractions Arrays,
15
+ # returning an empty String for all passed tokens, and only when
16
+ # the set is closed does it compile and return the final String.
17
+ #
18
+ class SetConverter < JsRegex::Converter::Base
19
+ private
20
+
21
+ def convert_data
22
+ case subtype
23
+ when :open then convert_open_subtype
24
+ when :negate then convert_negate_subtype
25
+ when :close then convert_close_subtype
26
+ when :member, :range, :escape then convert_member_subtype
27
+ when /\Aclass_/ then convert_class_subtype
28
+ when /\Atype_/ then convert_type_subtype
29
+ when :intersection
30
+ warn_of_unsupported_feature("set intersection '&&'")
31
+ else
32
+ # TODO: I think it's a bug in Regexp::Scanner that some property
33
+ # tokens (only positive ones?) are returned with token the class :set
34
+ # within sets. If this's fixed, just warn_of_unsupported_feature here.
35
+ try_replacing_potential_property_subtype
36
+ end
37
+ end
38
+
39
+ def convert_open_subtype
40
+ context.open_set
41
+ ''
42
+ end
43
+
44
+ def convert_negate_subtype
45
+ if context.set_level > 1
46
+ warn_of_unsupported_feature('nested negative set data')
47
+ end
48
+ context.negate_set
49
+ ''
50
+ end
51
+
52
+ def convert_close_subtype
53
+ context.close_set
54
+ context.set_level == 0 ? finalize_set : ''
55
+ end
56
+
57
+ def convert_member_subtype
58
+ literal_conversion = LiteralConverter.convert(data, self)
59
+ return '' if literal_conversion == ''
60
+ buffer_set_member(literal_conversion)
61
+ end
62
+
63
+ def convert_class_subtype
64
+ negated = subtype.to_s.start_with?('class_non')
65
+ name = subtype.to_s[(negated ? 9 : 6)..-1]
66
+ try_replacing_property(name, negated)
67
+ end
68
+
69
+ def try_replacing_potential_property_subtype
70
+ negated = subtype.to_s.start_with?('non')
71
+ name = negated ? subtype.to_s[3..-1] : subtype.to_s
72
+ try_replacing_property(name, negated)
73
+ end
74
+
75
+ def try_replacing_property(name, negated)
76
+ replacement = PropertyConverter.property_replacement(name, negated)
77
+ if replacement
78
+ buffer_set_extraction(replacement)
79
+ else
80
+ warn_of_unsupported_feature
81
+ end
82
+ end
83
+
84
+ def convert_type_subtype
85
+ if subtype == :type_hex
86
+ buffer_set_extraction(TypeConverter::HEX_EXPANSION)
87
+ elsif subtype == :type_nonhex
88
+ buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
89
+ else
90
+ buffer_set_member(data)
91
+ end
92
+ end
93
+
94
+ def buffer_set_member(string)
95
+ buffered_members << string unless context.nested_negation?
96
+ ''
97
+ end
98
+
99
+ def buffer_set_extraction(string)
100
+ buffered_extractions << string unless context.nested_negation?
101
+ ''
102
+ end
103
+
104
+ def buffered_members
105
+ context.buffered_set_members
106
+ end
107
+
108
+ def buffered_extractions
109
+ context.buffered_set_extractions
110
+ end
111
+
112
+ def finalize_set
113
+ if buffered_members.none?
114
+ finalize_depleted_set
115
+ else
116
+ set = build_set(buffered_members, context.negative_set?(1))
117
+ if buffered_extractions.any?
118
+ "(?:#{set}|#{buffered_extractions.join('|')})"
119
+ else
120
+ set
121
+ end
122
+ end
123
+ end
124
+
125
+ def finalize_depleted_set
126
+ case buffered_extractions.count
127
+ when 0 then ''
128
+ when 1 then buffered_extractions.first
129
+ else "(?:#{buffered_extractions.join('|')})" end
130
+ end
131
+
132
+ def build_set(members, negative)
133
+ "[#{negative ? '^' : ''}#{members.join}]"
134
+ end
135
+ end
136
+ end
137
+ end