modl 0.3.26 → 0.3.27

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -149
  3. data/Gemfile +4 -2
  4. data/LICENSE.txt +1 -1
  5. data/README.md +19 -11
  6. data/Rakefile +5 -3
  7. data/lib/modl/interpreter.rb +38 -0
  8. data/lib/modl/model/model.rb +264 -0
  9. data/lib/modl/parser/parser.rb +272 -59
  10. data/lib/modl/tokeniser/context.rb +113 -0
  11. data/lib/modl/tokeniser/tokeniser.rb +28 -0
  12. data/lib/modl/util/functions.rb +74 -0
  13. data/lib/modl/util/unicode.rb +44 -0
  14. data/lib/modl/version.rb +5 -0
  15. data/lib/modl.rb +7 -32
  16. data/modl.gemspec +8 -11
  17. metadata +16 -75
  18. data/.DS_Store +0 -0
  19. data/.idea/vcs.xml +0 -6
  20. data/.rspec +0 -3
  21. data/.rubocop.yml +0 -5
  22. data/.travis.yml +0 -7
  23. data/bin/console +0 -14
  24. data/bin/setup +0 -8
  25. data/lib/modl/parser/MODLLexer.interp +0 -132
  26. data/lib/modl/parser/MODLLexer.rb +0 -324
  27. data/lib/modl/parser/MODLLexer.tokens +0 -40
  28. data/lib/modl/parser/MODLParser.interp +0 -93
  29. data/lib/modl/parser/MODLParser.rb +0 -2492
  30. data/lib/modl/parser/MODLParser.tokens +0 -40
  31. data/lib/modl/parser/MODLParserBaseListener.rb +0 -164
  32. data/lib/modl/parser/MODLParserBaseVisitor.rb +0 -107
  33. data/lib/modl/parser/MODLParserListener.rb +0 -151
  34. data/lib/modl/parser/MODLParserVisitor.rb +0 -56
  35. data/lib/modl/parser/class_processor.rb +0 -411
  36. data/lib/modl/parser/evaluator.rb +0 -125
  37. data/lib/modl/parser/file_importer.rb +0 -101
  38. data/lib/modl/parser/global_parse_context.rb +0 -318
  39. data/lib/modl/parser/instruction_processor.rb +0 -82
  40. data/lib/modl/parser/interpreter.rb +0 -75
  41. data/lib/modl/parser/modl_class.rb +0 -138
  42. data/lib/modl/parser/modl_index.rb +0 -54
  43. data/lib/modl/parser/modl_keylist.rb +0 -81
  44. data/lib/modl/parser/modl_method.rb +0 -172
  45. data/lib/modl/parser/object_cache.rb +0 -88
  46. data/lib/modl/parser/orphan_handler.rb +0 -98
  47. data/lib/modl/parser/parsed.rb +0 -1469
  48. data/lib/modl/parser/ref_processor.rb +0 -258
  49. data/lib/modl/parser/substitutions.rb +0 -101
  50. data/lib/modl/parser/sutil.rb +0 -108
  51. data/lib/modl/parser/throwing_error_listener.rb +0 -44
  52. data/lib/modl/parser/unicode_escape_replacer.rb +0 -148
  53. data/lib/modl/parser/unicode_escapes.rb +0 -112
  54. data/lib/modl/parser/version.rb +0 -29
@@ -1,258 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'punycode'
26
- require 'modl/parser/sutil'
27
-
28
- module MODL
29
- module Parser
30
- # Convert MODL reference to the replacement value
31
- class RefProcessor
32
-
33
- NESTED_SEPARATOR = '.'
34
- MATCHER = Regexp.new('((%\w+)(\.\w*<`?\w*`?,`\w*`>)+|(%` ?[\w-]+`[\w.<>,]*%?)|(%\*?[\w]+(\.%?\w*<?[\w,]*>?)*%?))')
35
- MAX_RECURSE_DEPTH = 10
36
-
37
- def self.trivial_reject(str)
38
- # do a fast check to see if we need to deref - save processing the regex if we don't have to.
39
- !(str.is_a?(String) && !str.start_with?('%*') && (str.nil? || str.include?('%') || str.include?('`')))
40
- end
41
-
42
- # Check str for references and process them.
43
- # Return the processed string and a new_value if there is one.
44
- def self.deref(str, global)
45
- obj = str
46
- obj, new_value = split_by_ref_tokens str, global unless trivial_reject(str)
47
- [obj, new_value]
48
- end
49
-
50
- # Process the next %ref token
51
- def self.split_by_ref_tokens(str, global)
52
- new_value = nil
53
-
54
- text = str
55
- original = str
56
-
57
- new_value, str = process_tokens(global, original, str, text) if new_value.nil?
58
-
59
- [str, new_value]
60
- end
61
-
62
- private
63
-
64
- def self.process_tokens(global, original, str, text)
65
- new_value = nil
66
- loop do
67
- text_s = text.to_s
68
- match = MATCHER.match(text_s)
69
- break if match.nil?
70
-
71
- match_index = text_s.index(match[0])
72
- if match_index > 0
73
- if text_s[match_index - 1] == '~' || text_s[match_index - 1] == '\\'
74
- break
75
- end
76
- if text_s[match_index + match.length] == '~' || text_s[match_index + match.length] == '\\'
77
- break
78
- end
79
- end
80
-
81
-
82
- ref = match[0]
83
- text = Sutil.after(text, ref)
84
-
85
- new_value, remainder = expand(0, global, ref)
86
- ref = Sutil.until(ref, remainder)
87
- if new_value.is_a?(String)
88
- str = str.sub(ref, new_value)
89
- elsif new_value.is_a?(Parsed::ParsedArrayItem)
90
- nv_text = new_value.arrayValueItem.text
91
- str = if ref == str
92
- nv_text
93
- else
94
- str.sub(ref, nv_text.to_s)
95
- end
96
- new_value = nil
97
- elsif new_value.is_a?(Parsed::ParsedMapItem)
98
- raise InterpreterError, 'Found a map when expecting an array'
99
- elsif new_value.is_a?(MODL::Parser::MODLParserBaseListener)
100
- if new_value.text
101
- str = if ref == str
102
- new_value.text
103
- else
104
- str.sub(ref, Sutil.unquote(new_value.text.to_s))
105
- end
106
- new_value = nil
107
- else
108
- str = nil
109
- end
110
- else
111
- new_value = nil
112
- raise InterpreterError, 'Invalid object reference: "' + str + '"' if str == original
113
- end
114
- end
115
- return new_value, str
116
- end
117
-
118
- def self.expand(depth, global, ref)
119
- if depth > MAX_RECURSE_DEPTH
120
- raise InterpreterError, 'Recursing too deep to resolve: "' + ref + '"'
121
- end
122
- result = nil
123
- prev = nil
124
-
125
- degraved = Sutil.replace(ref, '`', '')
126
-
127
- parts = Sutil.tail(degraved).split('.') if degraved[0] == '%'
128
- parts = degraved.split('.') unless degraved[0] == '%'
129
- parts[-1] = Sutil.head(parts[-1]) if parts[-1].end_with?('%')
130
-
131
- if degraved.include?('%')
132
- resolved = 0
133
- parts.each do |p|
134
- if p.include?('%')
135
- p, _ignore = expand(depth + 1, global, p)
136
- if p.is_a?(MODL::Parser::MODLParserBaseListener)
137
- p = p.text
138
- end
139
- end
140
- n = p.to_i
141
- result = if n.to_s == p
142
- # Numeric ref
143
- if !result.nil? && !result.respond_to?(:find_property)
144
- if !result.is_a?(Parsed::ParsedArrayValueItem)
145
- t = result.class
146
- t = 'map' if result.is_a? Parsed::ParsedMapItem
147
- raise InterpreterError, 'Found a ' + t + ' when expecting an array'
148
- end
149
- raise InterpreterError, 'Invalid object reference: "' + degraved + '"'
150
- end
151
- result.nil? ? global.index_value(n, degraved) : result.find_property(n)
152
- else
153
- # String ref
154
- if result.is_a? String
155
- if StandardMethods.valid_method?(p)
156
- StandardMethods.run_method(p, result)
157
- else
158
- mthd = global.user_method(p)
159
- if !mthd.nil?
160
- mthd.run(result)
161
- else
162
- mthd
163
- end
164
- end
165
- elsif result.is_a? Parsed::ParsedPair
166
- prop = result.find_property(p)
167
- if result.text && !prop
168
- if StandardMethods.valid_method?(p)
169
- StandardMethods.run_method(p, Sutil.unquote(result.text))
170
- else
171
- mthd = global.user_method(p)
172
- if !mthd.nil?
173
- mthd.run(result.text)
174
- else
175
- mthd
176
- end
177
- end
178
- else
179
- prop
180
- end
181
- elsif result.is_a? Parsed::ParsedArrayValueItem
182
- prop = result.find_property(p)
183
- if result.text && !prop
184
- if StandardMethods.valid_method?(p)
185
- result_text = result.text
186
- if result_text.start_with?('`') && result_text.end_with?('`')
187
- result_text = Sutil.toptail(result_text)
188
- end
189
- StandardMethods.run_method(p, result_text)
190
- else
191
- mthd = global.user_method(p)
192
- if !mthd.nil?
193
- mthd.run(result.text)
194
- else
195
- mthd
196
- end
197
- end
198
- else
199
- prop
200
- end
201
- elsif result.is_a? Array
202
- nil
203
- else
204
- if !result.nil? && !result.respond_to?(:find_property)
205
- raise InterpreterError, 'Invalid object reference: "' + degraved + '"'
206
- end
207
- if result.nil?
208
- unless ref.start_with?('%`')
209
- a_pair = global.pair(p)
210
- end
211
- if a_pair.nil?
212
- p
213
- else
214
- a_pair
215
- end
216
- else
217
- result.find_property(p)
218
- end
219
- end
220
- end
221
- break if result.nil?
222
-
223
- prev = result
224
- resolved += 1
225
- end
226
- if prev.nil?
227
- remainder = ''
228
- prev = degraved
229
- else
230
- remainder = resolved < parts.length ? '.' + parts[resolved..parts.length].join('.') : ''
231
- end
232
- if (prev == Sutil.between(ref, '%', '%')) || (ref.start_with?('%') && prev == Sutil.tail(ref))
233
- prev = ref
234
- end
235
- [prev, remainder]
236
- else
237
- # Remove the graves if there are any.
238
- result = parts[0]
239
- i = 1
240
- stalled = false
241
- while i < parts.length
242
- stalled |= StandardMethods.valid_method?(parts[i]) ? false : true
243
-
244
- if stalled
245
- result << '.'
246
- result << parts[i]
247
- else
248
- result = StandardMethods.run_method(parts[i], result)
249
- end
250
- i += 1
251
- end
252
- [result, '']
253
- end
254
- end
255
-
256
- end
257
- end
258
- end
@@ -1,101 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'modl/parser/unicode_escape_replacer'
26
-
27
- module MODL
28
- module Parser
29
- # Escape-sequence replacements for MODL files.
30
- class Substitutions
31
-
32
- @@subs = [
33
- %w(~% %),
34
- %w(\\% %),
35
- %w(~\\ \\),
36
- %w(\\\\ \\),
37
- %w(~~ ~),
38
- %w(\\~ ~),
39
- %w{~( (},
40
- %w{\\( (},
41
- %w{~) )},
42
- %w{\\) )},
43
- %w(~[ [),
44
- %w(\\[ [),
45
- %w(~] ]),
46
- %w(\\] ]),
47
- %w(~{ {),
48
- %w(\\{ {),
49
- %w(~} }),
50
- %w(\\} }),
51
- %w(~; ;),
52
- %w(\\; ;),
53
- %w(~: :),
54
- %w(\\: :),
55
- %w(~` `),
56
- %w(\\` `),
57
- %w(~" "),
58
- %w(\\" "),
59
- %w(~= =),
60
- %w(\\= =),
61
- %w(~/ /),
62
- %w(\\/ /),
63
- %w(< <),
64
- %w(\\< <),
65
- %w(~> >),
66
- %w(\\> >),
67
- %w(~& &),
68
- %w(\\& &),
69
- %w(! !),
70
- %w(\\! !),
71
- %w(~| |),
72
- %w(\\| |),
73
- ['\\t', "\t"],
74
- ['\\n', "\n"],
75
- ['\\b', "\b"],
76
- ['\\f', "\f"],
77
- ['\\r', "\r"]
78
- ]
79
-
80
- # Replace all escape sequences in the supplied string and return the new value.
81
- def self.process(str)
82
- return str unless str.is_a? String
83
-
84
- # Remove unescaped graves and double quotes
85
- new_str = Sutil.unquote(str)
86
-
87
- new_str = UnicodeEscapeReplacer.convert_unicode_sequences new_str
88
-
89
- # Handle escape sequences
90
- @@subs.each do |s|
91
- loop do
92
- prev = new_str
93
- new_str = new_str.sub(s[0], s[1])
94
- break unless new_str && new_str != prev
95
- end
96
- end
97
- new_str
98
- end
99
- end
100
- end
101
- end
@@ -1,108 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- module Sutil
26
- # Remove the first n characters from a string
27
- def self.tail(str, n = 1)
28
- return if n.negative?
29
-
30
- str&.slice(n, str.length)
31
- end
32
-
33
- # Keep the first n characters of a string
34
- def self.head(str, n = nil)
35
- return if str.nil?
36
-
37
- n = str.length - 1 if n.nil? # Strip last char by default
38
-
39
- str&.slice(0, n)
40
- end
41
-
42
- # Keep everything until char c
43
- def self.until(str, c)
44
- return str if c.nil? || c.empty?
45
-
46
- i = str.index(c)
47
- i ? str&.slice(0, i) : str
48
- end
49
-
50
- # Get everything after char c
51
- def self.after(str, c)
52
- return if str.nil?
53
- return str if c.nil?
54
-
55
- i = str.index(c)
56
- return '' if i.nil?
57
-
58
- str.slice(i + c.length, str.length)
59
- end
60
-
61
- # Get everything between ch1 and ch2
62
- # ch1 and ch2 must be different and ch1 must be before ch2 in the string
63
- # e.g. Sutil.between('func(p1,p2)', '(', ')') returns 'p1,p2'
64
- def self.between(str, ch1, ch2)
65
- return if str.nil?
66
- return str if ch1.nil? || ch2.nil? || ch1 == '' || ch2 == ''
67
- return str if str.length < 3
68
-
69
- first = str.index(ch1)
70
- second = str.rindex(ch2)
71
- if first > second
72
- tmp = first
73
- first = second
74
- second = tmp
75
- end
76
- return str if first == second
77
-
78
- str.slice(first + 1, second - first - 1)
79
- end
80
-
81
- # Remove the first and last chars from a string.
82
- def self.toptail(str)
83
- return str if str.nil?
84
- return '' if str.length < 3
85
-
86
- str&.slice(1, str.length - 2)
87
- end
88
-
89
- def self.replace(str, old, new)
90
- return str if old.nil? || new.nil? || old.empty? || old == new
91
-
92
- result = str
93
- loop do
94
- break unless result.include?(old)
95
-
96
- result = result.sub(old, new)
97
- end
98
- result
99
- end
100
-
101
- def self.unquote(str)
102
- new_str = str
103
- if (str.start_with?('`') && str.end_with?('`')) || (str.start_with?('"') && str.end_with?('"'))
104
- new_str = Sutil.toptail(str)
105
- end
106
- new_str
107
- end
108
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'antlr4/runtime/parse_cancellation_exception'
26
-
27
- module MODL::Parser
28
- class ThrowingErrorListener
29
- include Singleton
30
-
31
- def syntax_error(_recognizer, _offending_symbol, _line, _char_position_in_line, _msg, _e)
32
- raise Antlr4::Runtime::ParseCancellationException, 'line ' + _line.to_s + ':' + _char_position_in_line.to_s + ' ' + _msg.to_s
33
- end
34
-
35
- def report_ambiguity(_recognizer, _dfa, _start_index, _stop_index, _exact, _ambig_ilts, _configs)
36
- end
37
-
38
- def report_attempting_full_context(_recognizer, _dfa, _start_index, _stop_index, _conflicting_alts, _configs)
39
- end
40
-
41
- def report_context_sensitivity(_recognizer, _dfa, _start_index, _stop_index, _prediction, _configs)
42
- end
43
- end
44
- end
@@ -1,148 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- module MODL
26
- module Parser
27
- # Unicode replacements for MODL files.
28
- class UnicodeEscapeReplacer
29
- BACKSLASH_U = "\\u"
30
- TILDE_U = "~u"
31
- TILDE = '~'
32
- BACKSLASH = '\\'
33
- HEX = 16
34
-
35
- def self.convert_unicode_sequences(str)
36
- start = 0
37
- result = str
38
-
39
- until result.nil?
40
- # We could have a backslash-u escape sequence or a ~u escape sequence
41
- back_slash_u_index = result.index(BACKSLASH_U, start)
42
- tilde_u_index = result.index(TILDE_U, start)
43
-
44
- # Filter out cases with no escape sequences.
45
- unicode_str_idx = 0
46
- if tilde_u_index.nil? && back_slash_u_index.nil?
47
- break
48
- elsif tilde_u_index.nil?
49
- unicode_str_idx = back_slash_u_index # No ~? Must be backslash
50
- elsif back_slash_u_index.nil?
51
- unicode_str_idx = tilde_u_index # No backslash? Must be ~
52
- else
53
- # Pick the first escaped character and proceed with that one.
54
- unicode_str_idx = [back_slash_u_index, tilde_u_index].min
55
- end
56
-
57
- try_parse_result = try_parse(result, unicode_str_idx + 2)
58
-
59
- # Next time round the loop we start searching after the current escape sequence.
60
- start = unicode_str_idx + 1
61
-
62
- # If the escape sequence is itself escaped then don't replace it
63
- if unicode_str_idx > 0 && (result[unicode_str_idx - 1] == TILDE || result[unicode_str_idx - 1] == BACKSLASH)
64
- next
65
- end
66
-
67
- # Get the codepoint value and replace the escape sequence
68
- if try_parse_result.code_point > 0
69
- chars = try_parse_result.code_point.chr(Encoding::UTF_8)
70
- result = replace(result, chars, unicode_str_idx, try_parse_result.length + 2)
71
- end
72
- end
73
- result
74
- end
75
-
76
- private
77
-
78
- #
79
- # Replace a unicode value in a String
80
- #
81
- def self.replace(s, value, unicode_str_index, length)
82
- left = s.slice(0, unicode_str_index)
83
- ends = [s.length, unicode_str_index + length].min
84
- right = s.slice(ends, s.length)
85
- left + value.to_s + right
86
- end
87
-
88
- #
89
- # Check whether the value is a valid unicode codepoint
90
- #
91
- def self.valid_range?(value)
92
- (value >= 0x100000 && value <= 0x10ffff) || (value >= 0x10000 && value <= 0xfffff) || (value >= 0 && value <= 0xd7ff) || (value >= 0xe000 && value <= 0xffff)
93
- end
94
-
95
- #
96
- # Can we get `n` hex digits from the string at the `idx` location?
97
- #
98
- def self.has_enough_digits?(s, idx, n)
99
- i = 0
100
- chars = s.chars
101
-
102
- while i < n && (idx + i) < s.length
103
- c = chars[idx + i]
104
- unless c =~ /[0-9a-fA-F]/
105
- return false
106
- end
107
- i += 1
108
- end
109
- i == n
110
- end
111
-
112
- #
113
- # Attempt to parse a unicode character starting at `idx` in `str`
114
- #
115
- def self.try_parse(str, idx)
116
- # Check for a 6-digit unicode value
117
- if has_enough_digits? str, idx, 6
118
- value = str.slice(idx, 6).to_i(HEX)
119
- return TryParse.new(value, 6) if valid_range? value
120
- end
121
-
122
- # Check for a 5-digit unicode value
123
- if has_enough_digits? str, idx, 5
124
- value = str.slice(idx, 5).to_i(HEX)
125
- return TryParse.new(value, 5) if valid_range? value
126
- end
127
-
128
- # Check for a 4-digit unicode value
129
- if has_enough_digits? str, idx, 4
130
- value = str.slice(idx, 4).to_i(HEX)
131
- return TryParse.new(value, 4) if valid_range? value
132
- end
133
- return TryParse.new(0, 4)
134
- end
135
- end
136
-
137
- # Class to hold the result of the tryParse method
138
- class TryParse
139
- attr_accessor :code_point
140
- attr_accessor :length
141
-
142
- def initialize(code, len)
143
- @code_point = code
144
- @length = len
145
- end
146
- end
147
- end
148
- end