js_regex 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d7423c167f82ba8e240c7087149430e95f08c98c
4
+ data.tar.gz: ebbf085f8aede4f731ba598c04e6d2d87f1953fb
5
+ SHA512:
6
+ metadata.gz: 651ccc96ac12d997fe49361270db2beb076954d8fe43299433370327175ff3a979e4378d2ec96b12401ec14438cc00a83ca47e607b6faac5fd43cbaea2f8d882
7
+ data.tar.gz: 1f57cab92495b0d6fb23017acde909daf8ef2a682dab21558246a3eb440859aa91d377c5be71699f08c1da50c5cea57e7acfba036f7049bcdd4e0f8e0deb2550
@@ -0,0 +1,82 @@
1
+ class JsRegex
2
+ #
3
+ # This class acts as a facade, creating specific Converters and
4
+ # passing Regexp::Scanner tokens to them, reusing Converters as needed.
5
+ #
6
+ # ::of returns a source String, options String, and warnings Array.
7
+ #
8
+ class Conversion
9
+ require 'regexp_parser'
10
+ Dir[File.join(File.dirname(__FILE__), '**', '*.rb')].each { |f| require f }
11
+
12
+ attr_reader :ruby_regex, :source, :options, :warnings
13
+
14
+ def initialize(ruby_regex)
15
+ @ruby_regex = ruby_regex
16
+ @source = ''
17
+ @options = ''
18
+ @warnings = []
19
+
20
+ convert_source(ruby_regex)
21
+ convert_options(ruby_regex)
22
+ perform_sanity_check
23
+ end
24
+
25
+ def self.of(ruby_regex)
26
+ conversion = new(ruby_regex)
27
+ [conversion.source, conversion.options, conversion.warnings]
28
+ end
29
+
30
+ private
31
+
32
+ def convert_source(ruby_regex)
33
+ Regexp::Scanner.scan(ruby_regex) do |token_class, subtype, data, s, e|
34
+ # There might be a lot of tokens, so don't wrap their data in objects.
35
+ # Even just wrapping them in simple structs or attr_reader objects
36
+ # can lead to 60%+ longer processing times for large regexes.
37
+ convert_token(token_class, subtype, data, s, e)
38
+ end
39
+ converters.clear
40
+ end
41
+
42
+ def convert_token(token_class, subtype, data, s, e)
43
+ converter = converter_for_token_class(token_class)
44
+ converter.convert(token_class, subtype, data, s, e)
45
+ end
46
+
47
+ def converter_for_token_class(token_class)
48
+ converters[token_class] ||= begin
49
+ converter_name = converter_name_for_token_class(token_class)
50
+ converter_class = JsRegex::Converter.const_get(converter_name)
51
+ converter_class.new(self, context)
52
+ end
53
+ end
54
+
55
+ def converter_name_for_token_class(token_class)
56
+ name = "#{token_class.to_s.delete('_').capitalize}Converter"
57
+ Converter.const_defined?(name) ? name : 'UnsupportedTokenConverter'
58
+ end
59
+
60
+ def converters
61
+ @converters ||= {}
62
+ end
63
+
64
+ def context
65
+ @context ||= JsRegex::Converter::Context.new
66
+ end
67
+
68
+ def convert_options(ruby_regex)
69
+ @options = 'g' # all Ruby regexes are what is called "global" in JS
70
+ @options << 'i' if ruby_regex.options & Regexp::IGNORECASE > 0
71
+ end
72
+
73
+ def perform_sanity_check
74
+ # Ruby regex capabilities are a superset of JS regex capabilities in
75
+ # the source part. So if this raises an Error, a Converter messed up:
76
+ Regexp.new(source, options)
77
+ rescue ArgumentError, RegexpError, SyntaxError => e
78
+ @source = ''
79
+ warnings << e.message
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,24 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class AnchorConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :bol, :bos then '^'
14
+ when :eol, :eos then '$'
15
+ when :eos_ob_eol then '(?=\n?$)'
16
+ when :word_boundary then '\b'
17
+ when :nonword_boundary then '\B'
18
+ else
19
+ warn_of_unsupported_feature
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,27 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'group_converter'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ # Note the inheritance from GroupConverter.
9
+ #
10
+ class AssertionConverter < JsRegex::Converter::GroupConverter
11
+ private
12
+
13
+ def convert_data
14
+ case subtype
15
+ when :lookahead, :nlookahead
16
+ open_assertion
17
+ when :nlookbehind
18
+ context.negative_lookbehind = true
19
+ warn_of_unsupported_feature('negative lookbehind assertion')
20
+ else # :lookbehind, ...
21
+ warn_of_unsupported_feature
22
+ open_group('(?:')
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,44 @@
1
+ class JsRegex
2
+ module Converter
3
+ #
4
+ # Template class. Implement #convert_data in subclasses.
5
+ #
6
+ class Base
7
+ attr_reader :target, :context
8
+
9
+ def initialize(target, context)
10
+ @target = target
11
+ @context = context
12
+ end
13
+
14
+ def convert(token_class, subtype, data, start_index, end_index)
15
+ self.token_class = token_class
16
+ self.subtype = subtype
17
+ self.data = data
18
+ self.start_index = start_index
19
+ self.end_index = end_index
20
+
21
+ target.source << (context.valid? ? convert_data : '')
22
+ end
23
+
24
+ private
25
+
26
+ attr_accessor :token_class, :subtype, :data, :start_index, :end_index
27
+
28
+ def convert_data
29
+ fail NotImplementedError
30
+ end
31
+
32
+ def pass_through
33
+ data
34
+ end
35
+
36
+ def warn_of_unsupported_feature(description = nil)
37
+ description ||= "#{subtype} #{token_class} '#{data}'".tr('_', ' ')
38
+ target.warnings << "Dropped unsupported #{description} "\
39
+ "at index #{start_index}..#{end_index}"
40
+ ''
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,24 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class ConditionalConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :open
14
+ warn_of_unsupported_feature("conditional '(?'")
15
+ '('
16
+ when :separator, :close
17
+ pass_through
18
+ else
19
+ '' # one warning is enough, don't warn about other parts
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,63 @@
1
+ class JsRegex
2
+ module Converter
3
+ #
4
+ # Passed among Converters to globalize basic status data.
5
+ #
6
+ # The Converters themselves are stateless.
7
+ #
8
+ class Context
9
+ attr_accessor :buffered_set_members,
10
+ :buffered_set_extractions,
11
+ :group_level,
12
+ :group_level_for_backreference,
13
+ :group_number_for_backreference,
14
+ :negative_lookbehind,
15
+ :negative_set_levels,
16
+ :opened_groups,
17
+ :previous_quantifier_subtype,
18
+ :previous_quantifier_end,
19
+ :set_level
20
+
21
+ def initialize
22
+ self.buffered_set_members = []
23
+ self.buffered_set_extractions = []
24
+ self.group_level = 0
25
+ self.negative_lookbehind = false
26
+ self.negative_set_levels = []
27
+ self.opened_groups = 0
28
+ self.set_level = 0
29
+ end
30
+
31
+ def valid?
32
+ !negative_lookbehind
33
+ end
34
+
35
+ # set context
36
+
37
+ def open_set
38
+ self.set_level += 1
39
+ if set_level == 1
40
+ buffered_set_members.clear
41
+ buffered_set_extractions.clear
42
+ end
43
+ self.negative_set_levels -= [set_level]
44
+ end
45
+
46
+ def negate_set
47
+ self.negative_set_levels |= [set_level]
48
+ end
49
+
50
+ def negative_set?(level = set_level)
51
+ negative_set_levels.include?(level)
52
+ end
53
+
54
+ def nested_negation?
55
+ set_level > 1 && negative_set?
56
+ end
57
+
58
+ def close_set
59
+ self.set_level -= 1
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,27 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative 'literal_converter'
6
+ #
7
+ # Template class implementation.
8
+ #
9
+ class EscapeConverter < JsRegex::Converter::Base
10
+ private
11
+
12
+ def convert_data
13
+ case subtype
14
+ when :backslash, :dot, :form_feed, :hex, :interval_close,
15
+ :interval_open, :newline, :one_or_more, :octal, :return,
16
+ :space, :tab, :vertical_tab, :zero_or_more, :zero_or_one
17
+ pass_through
18
+ when :literal
19
+ LiteralConverter.convert(data, self)
20
+ else
21
+ # Backspace, Bell, HexWide, Control, Meta, MetaControl, ...
22
+ warn_of_unsupported_feature
23
+ end
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,16 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class FreespaceConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ '' # drop data without warning
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,81 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class GroupConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :atomic then open_atomic_group
14
+ when :capture, :passive then open_group
15
+ when :close then close_group
16
+ when :comment then '' # drop whole group w/o warning
17
+ when :named_ab, :named_sq then open_group('(') # drop name w/o warning
18
+ when :options then open_options_group
19
+ else
20
+ warn_of_unsupported_feature
21
+ open_group('(')
22
+ end
23
+ end
24
+
25
+ def open_atomic_group
26
+ # Atomicity is achieved with backreferenced lookahead groups:
27
+ # http://instanceof.me/post/52245507631
28
+ # regex-emulate-atomic-grouping-with-lookahead
29
+ context.group_level_for_backreference = context.group_level
30
+ context.group_number_for_backreference = context.opened_groups + 1
31
+ open_assertion('(?=(')
32
+ end
33
+
34
+ def open_options_group
35
+ warn_of_unsupported_feature('group-specific options')
36
+ open_group('(')
37
+ end
38
+
39
+ def open_group(group_head = pass_through)
40
+ context.group_level += 1
41
+ context.opened_groups += 1
42
+ group_head
43
+ end
44
+
45
+ def open_assertion(assertion_head = pass_through)
46
+ # these don't count as opened groups for backreference purposes
47
+ context.group_level += 1
48
+ assertion_head
49
+ end
50
+
51
+ def close_group
52
+ if context.negative_lookbehind
53
+ close_negative_lookbehind
54
+ else
55
+ context.group_level -= 1
56
+ if end_of_atomic_group?
57
+ close_atomic_group
58
+ else
59
+ ')'
60
+ end
61
+ end
62
+ end
63
+
64
+ def close_negative_lookbehind
65
+ context.negative_lookbehind = false
66
+ ''
67
+ end
68
+
69
+ def end_of_atomic_group?
70
+ return false unless context.group_level_for_backreference
71
+ context.group_level_for_backreference == context.group_level
72
+ end
73
+
74
+ def close_atomic_group
75
+ context.group_level_for_backreference = nil
76
+ # an empty passive group is appended in case literal digits follow
77
+ "))\\#{context.group_number_for_backreference}(?:)"
78
+ end
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,29 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class LiteralConverter < JsRegex::Converter::Base
9
+ def self.convert(data, converter)
10
+ utf8_data = data.dup.force_encoding('UTF-8')
11
+ if /[\u{10000}-\u{FFFFF}]/ =~ utf8_data
12
+ converter.send(:warn_of_unsupported_feature, 'astral plane character')
13
+ else
14
+ ensure_json_compatibility(utf8_data)
15
+ end
16
+ end
17
+
18
+ def self.ensure_json_compatibility(data)
19
+ data.gsub(/\\?[\f\n\r\t]/) { |lit| Regexp.escape(lit.delete('\\')) }
20
+ end
21
+
22
+ private
23
+
24
+ def convert_data
25
+ self.class.convert(data, self)
26
+ end
27
+ end
28
+ end
29
+ end
@@ -0,0 +1,28 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class MetaConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ case subtype
13
+ when :alternation
14
+ pass_through
15
+ when :dot
16
+ ruby_multiline_mode? ? '(?:.|\n)' : '.'
17
+ else
18
+ warn_of_unsupported_feature
19
+ end
20
+ end
21
+
22
+ def ruby_multiline_mode?
23
+ return false if @rb_mm == false
24
+ @rb_mm ||= target.ruby_regex.options & Regexp::MULTILINE > 0
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,18 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'property_converter'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ # Note the inheritance from PropertyConverter.
9
+ #
10
+ class NonpropertyConverter < JsRegex::Converter::PropertyConverter
11
+ private
12
+
13
+ def convert_data
14
+ convert_property(true)
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,40 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative File.join('..', 'property_map')
6
+ #
7
+ # Template class implementation.
8
+ #
9
+ class PropertyConverter < JsRegex::Converter::Base
10
+ def self.property_replacement(property_name, negated = false)
11
+ replacement = JsRegex::PROPERTY_MAP[property_name.downcase.to_sym]
12
+ negated ? negated_property_replacement(replacement) : replacement
13
+ end
14
+
15
+ def self.negated_property_replacement(property_string)
16
+ # take care not to use destructive methods on elements in the map
17
+ return nil unless property_string
18
+ if property_string.start_with?('[^')
19
+ property_string.sub('[^', '[')
20
+ elsif property_string.start_with?('[')
21
+ property_string.sub('[', '[^')
22
+ else
23
+ # it's an invertable meta char
24
+ property_string.swapcase
25
+ end
26
+ end
27
+
28
+ private
29
+
30
+ def convert_data
31
+ convert_property
32
+ end
33
+
34
+ def convert_property(negated = false)
35
+ replace = self.class.property_replacement(subtype, negated)
36
+ replace || warn_of_unsupported_feature
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,37 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ #
6
+ # Template class implementation.
7
+ #
8
+ class QuantifierConverter < JsRegex::Converter::Base
9
+ private
10
+
11
+ def convert_data
12
+ if multiplicative_interval?
13
+ warn_of_unsupported_feature('multiplicative interval \'{x}{x}\'')
14
+ else
15
+ context.previous_quantifier_subtype = subtype
16
+ context.previous_quantifier_end = end_index
17
+ convert_quantifier
18
+ end
19
+ end
20
+
21
+ def convert_quantifier
22
+ if data.length > 1 && data.end_with?('+')
23
+ warn_of_unsupported_feature('declaration of quantifier as possessive')
24
+ data[0..-2]
25
+ else
26
+ pass_through
27
+ end
28
+ end
29
+
30
+ def multiplicative_interval?
31
+ subtype == :interval &&
32
+ context.previous_quantifier_subtype == :interval &&
33
+ context.previous_quantifier_end == start_index
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,137 @@
1
+ class JsRegex
2
+ #
3
+ module Converter
4
+ require_relative 'base'
5
+ require_relative 'literal_converter'
6
+ require_relative 'property_converter'
7
+ require_relative 'type_converter'
8
+ #
9
+ # Template class implementation.
10
+ #
11
+ # This converter works a little differently from the others.
12
+ #
13
+ # It buffers anything that it finds within a set in the Context's
14
+ # #buffered_set_members and #buffered_set_extractions Arrays,
15
+ # returning an empty String for all passed tokens, and only when
16
+ # the set is closed does it compile and return the final String.
17
+ #
18
+ class SetConverter < JsRegex::Converter::Base
19
+ private
20
+
21
+ def convert_data
22
+ case subtype
23
+ when :open then convert_open_subtype
24
+ when :negate then convert_negate_subtype
25
+ when :close then convert_close_subtype
26
+ when :member, :range, :escape then convert_member_subtype
27
+ when /\Aclass_/ then convert_class_subtype
28
+ when /\Atype_/ then convert_type_subtype
29
+ when :intersection
30
+ warn_of_unsupported_feature("set intersection '&&'")
31
+ else
32
+ # TODO: I think it's a bug in Regexp::Scanner that some property
33
+ # tokens (only positive ones?) are returned with token the class :set
34
+ # within sets. If this's fixed, just warn_of_unsupported_feature here.
35
+ try_replacing_potential_property_subtype
36
+ end
37
+ end
38
+
39
+ def convert_open_subtype
40
+ context.open_set
41
+ ''
42
+ end
43
+
44
+ def convert_negate_subtype
45
+ if context.set_level > 1
46
+ warn_of_unsupported_feature('nested negative set data')
47
+ end
48
+ context.negate_set
49
+ ''
50
+ end
51
+
52
+ def convert_close_subtype
53
+ context.close_set
54
+ context.set_level == 0 ? finalize_set : ''
55
+ end
56
+
57
+ def convert_member_subtype
58
+ literal_conversion = LiteralConverter.convert(data, self)
59
+ return '' if literal_conversion == ''
60
+ buffer_set_member(literal_conversion)
61
+ end
62
+
63
+ def convert_class_subtype
64
+ negated = subtype.to_s.start_with?('class_non')
65
+ name = subtype.to_s[(negated ? 9 : 6)..-1]
66
+ try_replacing_property(name, negated)
67
+ end
68
+
69
+ def try_replacing_potential_property_subtype
70
+ negated = subtype.to_s.start_with?('non')
71
+ name = negated ? subtype.to_s[3..-1] : subtype.to_s
72
+ try_replacing_property(name, negated)
73
+ end
74
+
75
+ def try_replacing_property(name, negated)
76
+ replacement = PropertyConverter.property_replacement(name, negated)
77
+ if replacement
78
+ buffer_set_extraction(replacement)
79
+ else
80
+ warn_of_unsupported_feature
81
+ end
82
+ end
83
+
84
+ def convert_type_subtype
85
+ if subtype == :type_hex
86
+ buffer_set_extraction(TypeConverter::HEX_EXPANSION)
87
+ elsif subtype == :type_nonhex
88
+ buffer_set_extraction(TypeConverter::NONHEX_EXPANSION)
89
+ else
90
+ buffer_set_member(data)
91
+ end
92
+ end
93
+
94
+ def buffer_set_member(string)
95
+ buffered_members << string unless context.nested_negation?
96
+ ''
97
+ end
98
+
99
+ def buffer_set_extraction(string)
100
+ buffered_extractions << string unless context.nested_negation?
101
+ ''
102
+ end
103
+
104
+ def buffered_members
105
+ context.buffered_set_members
106
+ end
107
+
108
+ def buffered_extractions
109
+ context.buffered_set_extractions
110
+ end
111
+
112
+ def finalize_set
113
+ if buffered_members.none?
114
+ finalize_depleted_set
115
+ else
116
+ set = build_set(buffered_members, context.negative_set?(1))
117
+ if buffered_extractions.any?
118
+ "(?:#{set}|#{buffered_extractions.join('|')})"
119
+ else
120
+ set
121
+ end
122
+ end
123
+ end
124
+
125
+ def finalize_depleted_set
126
+ case buffered_extractions.count
127
+ when 0 then ''
128
+ when 1 then buffered_extractions.first
129
+ else "(?:#{buffered_extractions.join('|')})" end
130
+ end
131
+
132
+ def build_set(members, negative)
133
+ "[#{negative ? '^' : ''}#{members.join}]"
134
+ end
135
+ end
136
+ end
137
+ end