modl 0.3.26 → 0.3.27

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1 -149
  3. data/Gemfile +4 -2
  4. data/LICENSE.txt +1 -1
  5. data/README.md +19 -11
  6. data/Rakefile +5 -3
  7. data/lib/modl/interpreter.rb +38 -0
  8. data/lib/modl/model/model.rb +264 -0
  9. data/lib/modl/parser/parser.rb +272 -59
  10. data/lib/modl/tokeniser/context.rb +113 -0
  11. data/lib/modl/tokeniser/tokeniser.rb +28 -0
  12. data/lib/modl/util/functions.rb +74 -0
  13. data/lib/modl/util/unicode.rb +44 -0
  14. data/lib/modl/version.rb +5 -0
  15. data/lib/modl.rb +7 -32
  16. data/modl.gemspec +8 -11
  17. metadata +16 -75
  18. data/.DS_Store +0 -0
  19. data/.idea/vcs.xml +0 -6
  20. data/.rspec +0 -3
  21. data/.rubocop.yml +0 -5
  22. data/.travis.yml +0 -7
  23. data/bin/console +0 -14
  24. data/bin/setup +0 -8
  25. data/lib/modl/parser/MODLLexer.interp +0 -132
  26. data/lib/modl/parser/MODLLexer.rb +0 -324
  27. data/lib/modl/parser/MODLLexer.tokens +0 -40
  28. data/lib/modl/parser/MODLParser.interp +0 -93
  29. data/lib/modl/parser/MODLParser.rb +0 -2492
  30. data/lib/modl/parser/MODLParser.tokens +0 -40
  31. data/lib/modl/parser/MODLParserBaseListener.rb +0 -164
  32. data/lib/modl/parser/MODLParserBaseVisitor.rb +0 -107
  33. data/lib/modl/parser/MODLParserListener.rb +0 -151
  34. data/lib/modl/parser/MODLParserVisitor.rb +0 -56
  35. data/lib/modl/parser/class_processor.rb +0 -411
  36. data/lib/modl/parser/evaluator.rb +0 -125
  37. data/lib/modl/parser/file_importer.rb +0 -101
  38. data/lib/modl/parser/global_parse_context.rb +0 -318
  39. data/lib/modl/parser/instruction_processor.rb +0 -82
  40. data/lib/modl/parser/interpreter.rb +0 -75
  41. data/lib/modl/parser/modl_class.rb +0 -138
  42. data/lib/modl/parser/modl_index.rb +0 -54
  43. data/lib/modl/parser/modl_keylist.rb +0 -81
  44. data/lib/modl/parser/modl_method.rb +0 -172
  45. data/lib/modl/parser/object_cache.rb +0 -88
  46. data/lib/modl/parser/orphan_handler.rb +0 -98
  47. data/lib/modl/parser/parsed.rb +0 -1469
  48. data/lib/modl/parser/ref_processor.rb +0 -258
  49. data/lib/modl/parser/substitutions.rb +0 -101
  50. data/lib/modl/parser/sutil.rb +0 -108
  51. data/lib/modl/parser/throwing_error_listener.rb +0 -44
  52. data/lib/modl/parser/unicode_escape_replacer.rb +0 -148
  53. data/lib/modl/parser/unicode_escapes.rb +0 -112
  54. data/lib/modl/parser/version.rb +0 -29
@@ -1,258 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'punycode'
26
- require 'modl/parser/sutil'
27
-
28
- module MODL
29
- module Parser
30
- # Convert MODL reference to the replacement value
31
- class RefProcessor
32
-
33
- NESTED_SEPARATOR = '.'
34
- MATCHER = Regexp.new('((%\w+)(\.\w*<`?\w*`?,`\w*`>)+|(%` ?[\w-]+`[\w.<>,]*%?)|(%\*?[\w]+(\.%?\w*<?[\w,]*>?)*%?))')
35
- MAX_RECURSE_DEPTH = 10
36
-
37
- def self.trivial_reject(str)
38
- # do a fast check to see if we need to deref - save processing the regex if we don't have to.
39
- !(str.is_a?(String) && !str.start_with?('%*') && (str.nil? || str.include?('%') || str.include?('`')))
40
- end
41
-
42
- # Check str for references and process them.
43
- # Return the processed string and a new_value if there is one.
44
- def self.deref(str, global)
45
- obj = str
46
- obj, new_value = split_by_ref_tokens str, global unless trivial_reject(str)
47
- [obj, new_value]
48
- end
49
-
50
- # Process the next %ref token
51
- def self.split_by_ref_tokens(str, global)
52
- new_value = nil
53
-
54
- text = str
55
- original = str
56
-
57
- new_value, str = process_tokens(global, original, str, text) if new_value.nil?
58
-
59
- [str, new_value]
60
- end
61
-
62
- private
63
-
64
- def self.process_tokens(global, original, str, text)
65
- new_value = nil
66
- loop do
67
- text_s = text.to_s
68
- match = MATCHER.match(text_s)
69
- break if match.nil?
70
-
71
- match_index = text_s.index(match[0])
72
- if match_index > 0
73
- if text_s[match_index - 1] == '~' || text_s[match_index - 1] == '\\'
74
- break
75
- end
76
- if text_s[match_index + match.length] == '~' || text_s[match_index + match.length] == '\\'
77
- break
78
- end
79
- end
80
-
81
-
82
- ref = match[0]
83
- text = Sutil.after(text, ref)
84
-
85
- new_value, remainder = expand(0, global, ref)
86
- ref = Sutil.until(ref, remainder)
87
- if new_value.is_a?(String)
88
- str = str.sub(ref, new_value)
89
- elsif new_value.is_a?(Parsed::ParsedArrayItem)
90
- nv_text = new_value.arrayValueItem.text
91
- str = if ref == str
92
- nv_text
93
- else
94
- str.sub(ref, nv_text.to_s)
95
- end
96
- new_value = nil
97
- elsif new_value.is_a?(Parsed::ParsedMapItem)
98
- raise InterpreterError, 'Found a map when expecting an array'
99
- elsif new_value.is_a?(MODL::Parser::MODLParserBaseListener)
100
- if new_value.text
101
- str = if ref == str
102
- new_value.text
103
- else
104
- str.sub(ref, Sutil.unquote(new_value.text.to_s))
105
- end
106
- new_value = nil
107
- else
108
- str = nil
109
- end
110
- else
111
- new_value = nil
112
- raise InterpreterError, 'Invalid object reference: "' + str + '"' if str == original
113
- end
114
- end
115
- return new_value, str
116
- end
117
-
118
- def self.expand(depth, global, ref)
119
- if depth > MAX_RECURSE_DEPTH
120
- raise InterpreterError, 'Recursing too deep to resolve: "' + ref + '"'
121
- end
122
- result = nil
123
- prev = nil
124
-
125
- degraved = Sutil.replace(ref, '`', '')
126
-
127
- parts = Sutil.tail(degraved).split('.') if degraved[0] == '%'
128
- parts = degraved.split('.') unless degraved[0] == '%'
129
- parts[-1] = Sutil.head(parts[-1]) if parts[-1].end_with?('%')
130
-
131
- if degraved.include?('%')
132
- resolved = 0
133
- parts.each do |p|
134
- if p.include?('%')
135
- p, _ignore = expand(depth + 1, global, p)
136
- if p.is_a?(MODL::Parser::MODLParserBaseListener)
137
- p = p.text
138
- end
139
- end
140
- n = p.to_i
141
- result = if n.to_s == p
142
- # Numeric ref
143
- if !result.nil? && !result.respond_to?(:find_property)
144
- if !result.is_a?(Parsed::ParsedArrayValueItem)
145
- t = result.class
146
- t = 'map' if result.is_a? Parsed::ParsedMapItem
147
- raise InterpreterError, 'Found a ' + t + ' when expecting an array'
148
- end
149
- raise InterpreterError, 'Invalid object reference: "' + degraved + '"'
150
- end
151
- result.nil? ? global.index_value(n, degraved) : result.find_property(n)
152
- else
153
- # String ref
154
- if result.is_a? String
155
- if StandardMethods.valid_method?(p)
156
- StandardMethods.run_method(p, result)
157
- else
158
- mthd = global.user_method(p)
159
- if !mthd.nil?
160
- mthd.run(result)
161
- else
162
- mthd
163
- end
164
- end
165
- elsif result.is_a? Parsed::ParsedPair
166
- prop = result.find_property(p)
167
- if result.text && !prop
168
- if StandardMethods.valid_method?(p)
169
- StandardMethods.run_method(p, Sutil.unquote(result.text))
170
- else
171
- mthd = global.user_method(p)
172
- if !mthd.nil?
173
- mthd.run(result.text)
174
- else
175
- mthd
176
- end
177
- end
178
- else
179
- prop
180
- end
181
- elsif result.is_a? Parsed::ParsedArrayValueItem
182
- prop = result.find_property(p)
183
- if result.text && !prop
184
- if StandardMethods.valid_method?(p)
185
- result_text = result.text
186
- if result_text.start_with?('`') && result_text.end_with?('`')
187
- result_text = Sutil.toptail(result_text)
188
- end
189
- StandardMethods.run_method(p, result_text)
190
- else
191
- mthd = global.user_method(p)
192
- if !mthd.nil?
193
- mthd.run(result.text)
194
- else
195
- mthd
196
- end
197
- end
198
- else
199
- prop
200
- end
201
- elsif result.is_a? Array
202
- nil
203
- else
204
- if !result.nil? && !result.respond_to?(:find_property)
205
- raise InterpreterError, 'Invalid object reference: "' + degraved + '"'
206
- end
207
- if result.nil?
208
- unless ref.start_with?('%`')
209
- a_pair = global.pair(p)
210
- end
211
- if a_pair.nil?
212
- p
213
- else
214
- a_pair
215
- end
216
- else
217
- result.find_property(p)
218
- end
219
- end
220
- end
221
- break if result.nil?
222
-
223
- prev = result
224
- resolved += 1
225
- end
226
- if prev.nil?
227
- remainder = ''
228
- prev = degraved
229
- else
230
- remainder = resolved < parts.length ? '.' + parts[resolved..parts.length].join('.') : ''
231
- end
232
- if (prev == Sutil.between(ref, '%', '%')) || (ref.start_with?('%') && prev == Sutil.tail(ref))
233
- prev = ref
234
- end
235
- [prev, remainder]
236
- else
237
- # Remove the graves if there are any.
238
- result = parts[0]
239
- i = 1
240
- stalled = false
241
- while i < parts.length
242
- stalled |= StandardMethods.valid_method?(parts[i]) ? false : true
243
-
244
- if stalled
245
- result << '.'
246
- result << parts[i]
247
- else
248
- result = StandardMethods.run_method(parts[i], result)
249
- end
250
- i += 1
251
- end
252
- [result, '']
253
- end
254
- end
255
-
256
- end
257
- end
258
- end
@@ -1,101 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'modl/parser/unicode_escape_replacer'
26
-
27
- module MODL
28
- module Parser
29
- # Escape-sequence replacements for MODL files.
30
- class Substitutions
31
-
32
- @@subs = [
33
- %w(~% %),
34
- %w(\\% %),
35
- %w(~\\ \\),
36
- %w(\\\\ \\),
37
- %w(~~ ~),
38
- %w(\\~ ~),
39
- %w{~( (},
40
- %w{\\( (},
41
- %w{~) )},
42
- %w{\\) )},
43
- %w(~[ [),
44
- %w(\\[ [),
45
- %w(~] ]),
46
- %w(\\] ]),
47
- %w(~{ {),
48
- %w(\\{ {),
49
- %w(~} }),
50
- %w(\\} }),
51
- %w(~; ;),
52
- %w(\\; ;),
53
- %w(~: :),
54
- %w(\\: :),
55
- %w(~` `),
56
- %w(\\` `),
57
- %w(~" "),
58
- %w(\\" "),
59
- %w(~= =),
60
- %w(\\= =),
61
- %w(~/ /),
62
- %w(\\/ /),
63
- %w(< <),
64
- %w(\\< <),
65
- %w(~> >),
66
- %w(\\> >),
67
- %w(~& &),
68
- %w(\\& &),
69
- %w(! !),
70
- %w(\\! !),
71
- %w(~| |),
72
- %w(\\| |),
73
- ['\\t', "\t"],
74
- ['\\n', "\n"],
75
- ['\\b', "\b"],
76
- ['\\f', "\f"],
77
- ['\\r', "\r"]
78
- ]
79
-
80
- # Replace all escape sequences in the supplied string and return the new value.
81
- def self.process(str)
82
- return str unless str.is_a? String
83
-
84
- # Remove unescaped graves and double quotes
85
- new_str = Sutil.unquote(str)
86
-
87
- new_str = UnicodeEscapeReplacer.convert_unicode_sequences new_str
88
-
89
- # Handle escape sequences
90
- @@subs.each do |s|
91
- loop do
92
- prev = new_str
93
- new_str = new_str.sub(s[0], s[1])
94
- break unless new_str && new_str != prev
95
- end
96
- end
97
- new_str
98
- end
99
- end
100
- end
101
- end
@@ -1,108 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- module Sutil
26
- # Remove the first n characters from a string
27
- def self.tail(str, n = 1)
28
- return if n.negative?
29
-
30
- str&.slice(n, str.length)
31
- end
32
-
33
- # Keep the first n characters of a string
34
- def self.head(str, n = nil)
35
- return if str.nil?
36
-
37
- n = str.length - 1 if n.nil? # Strip last char by default
38
-
39
- str&.slice(0, n)
40
- end
41
-
42
- # Keep everything until char c
43
- def self.until(str, c)
44
- return str if c.nil? || c.empty?
45
-
46
- i = str.index(c)
47
- i ? str&.slice(0, i) : str
48
- end
49
-
50
- # Get everything after char c
51
- def self.after(str, c)
52
- return if str.nil?
53
- return str if c.nil?
54
-
55
- i = str.index(c)
56
- return '' if i.nil?
57
-
58
- str.slice(i + c.length, str.length)
59
- end
60
-
61
- # Get everything between ch1 and ch2
62
- # ch1 and ch2 must be different and ch1 must be before ch2 in the string
63
- # e.g. Sutil.between('func(p1,p2)', '(', ')') returns 'p1,p2'
64
- def self.between(str, ch1, ch2)
65
- return if str.nil?
66
- return str if ch1.nil? || ch2.nil? || ch1 == '' || ch2 == ''
67
- return str if str.length < 3
68
-
69
- first = str.index(ch1)
70
- second = str.rindex(ch2)
71
- if first > second
72
- tmp = first
73
- first = second
74
- second = tmp
75
- end
76
- return str if first == second
77
-
78
- str.slice(first + 1, second - first - 1)
79
- end
80
-
81
- # Remove the first and last chars from a string.
82
- def self.toptail(str)
83
- return str if str.nil?
84
- return '' if str.length < 3
85
-
86
- str&.slice(1, str.length - 2)
87
- end
88
-
89
- def self.replace(str, old, new)
90
- return str if old.nil? || new.nil? || old.empty? || old == new
91
-
92
- result = str
93
- loop do
94
- break unless result.include?(old)
95
-
96
- result = result.sub(old, new)
97
- end
98
- result
99
- end
100
-
101
- def self.unquote(str)
102
- new_str = str
103
- if (str.start_with?('`') && str.end_with?('`')) || (str.start_with?('"') && str.end_with?('"'))
104
- new_str = Sutil.toptail(str)
105
- end
106
- new_str
107
- end
108
- end
@@ -1,44 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- require 'antlr4/runtime/parse_cancellation_exception'
26
-
27
- module MODL::Parser
28
- class ThrowingErrorListener
29
- include Singleton
30
-
31
- def syntax_error(_recognizer, _offending_symbol, _line, _char_position_in_line, _msg, _e)
32
- raise Antlr4::Runtime::ParseCancellationException, 'line ' + _line.to_s + ':' + _char_position_in_line.to_s + ' ' + _msg.to_s
33
- end
34
-
35
- def report_ambiguity(_recognizer, _dfa, _start_index, _stop_index, _exact, _ambig_ilts, _configs)
36
- end
37
-
38
- def report_attempting_full_context(_recognizer, _dfa, _start_index, _stop_index, _conflicting_alts, _configs)
39
- end
40
-
41
- def report_context_sensitivity(_recognizer, _dfa, _start_index, _stop_index, _prediction, _configs)
42
- end
43
- end
44
- end
@@ -1,148 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # The MIT License (MIT)
4
- #
5
- # Copyright (c) 2019 NUM Technology Ltd
6
- #
7
- # Permission is hereby granted, free of charge, to any person obtaining a copy
8
- # of this software and associated documentation files (the "Software"), to deal
9
- # in the Software without restriction, including without limitation the rights
10
- # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
- # copies of the Software, and to permit persons to whom the Software is
12
- # furnished to do so, subject to the following conditions:
13
- #
14
- # The above copyright notice and this permission notice shall be included in
15
- # all copies or substantial portions of the Software.
16
- #
17
- # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
- # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
- # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20
- # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
- # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
- # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23
- # THE SOFTWARE.
24
-
25
- module MODL
26
- module Parser
27
- # Unicode replacements for MODL files.
28
- class UnicodeEscapeReplacer
29
- BACKSLASH_U = "\\u"
30
- TILDE_U = "~u"
31
- TILDE = '~'
32
- BACKSLASH = '\\'
33
- HEX = 16
34
-
35
- def self.convert_unicode_sequences(str)
36
- start = 0
37
- result = str
38
-
39
- until result.nil?
40
- # We could have a backslash-u escape sequence or a ~u escape sequence
41
- back_slash_u_index = result.index(BACKSLASH_U, start)
42
- tilde_u_index = result.index(TILDE_U, start)
43
-
44
- # Filter out cases with no escape sequences.
45
- unicode_str_idx = 0
46
- if tilde_u_index.nil? && back_slash_u_index.nil?
47
- break
48
- elsif tilde_u_index.nil?
49
- unicode_str_idx = back_slash_u_index # No ~? Must be backslash
50
- elsif back_slash_u_index.nil?
51
- unicode_str_idx = tilde_u_index # No backslash? Must be ~
52
- else
53
- # Pick the first escaped character and proceed with that one.
54
- unicode_str_idx = [back_slash_u_index, tilde_u_index].min
55
- end
56
-
57
- try_parse_result = try_parse(result, unicode_str_idx + 2)
58
-
59
- # Next time round the loop we start searching after the current escape sequence.
60
- start = unicode_str_idx + 1
61
-
62
- # If the escape sequence is itself escaped then don't replace it
63
- if unicode_str_idx > 0 && (result[unicode_str_idx - 1] == TILDE || result[unicode_str_idx - 1] == BACKSLASH)
64
- next
65
- end
66
-
67
- # Get the codepoint value and replace the escape sequence
68
- if try_parse_result.code_point > 0
69
- chars = try_parse_result.code_point.chr(Encoding::UTF_8)
70
- result = replace(result, chars, unicode_str_idx, try_parse_result.length + 2)
71
- end
72
- end
73
- result
74
- end
75
-
76
- private
77
-
78
- #
79
- # Replace a unicode value in a String
80
- #
81
- def self.replace(s, value, unicode_str_index, length)
82
- left = s.slice(0, unicode_str_index)
83
- ends = [s.length, unicode_str_index + length].min
84
- right = s.slice(ends, s.length)
85
- left + value.to_s + right
86
- end
87
-
88
- #
89
- # Check whether the value is a valid unicode codepoint
90
- #
91
- def self.valid_range?(value)
92
- (value >= 0x100000 && value <= 0x10ffff) || (value >= 0x10000 && value <= 0xfffff) || (value >= 0 && value <= 0xd7ff) || (value >= 0xe000 && value <= 0xffff)
93
- end
94
-
95
- #
96
- # Can we get `n` hex digits from the string at the `idx` location?
97
- #
98
- def self.has_enough_digits?(s, idx, n)
99
- i = 0
100
- chars = s.chars
101
-
102
- while i < n && (idx + i) < s.length
103
- c = chars[idx + i]
104
- unless c =~ /[0-9a-fA-F]/
105
- return false
106
- end
107
- i += 1
108
- end
109
- i == n
110
- end
111
-
112
- #
113
- # Attempt to parse a unicode character starting at `idx` in `str`
114
- #
115
- def self.try_parse(str, idx)
116
- # Check for a 6-digit unicode value
117
- if has_enough_digits? str, idx, 6
118
- value = str.slice(idx, 6).to_i(HEX)
119
- return TryParse.new(value, 6) if valid_range? value
120
- end
121
-
122
- # Check for a 5-digit unicode value
123
- if has_enough_digits? str, idx, 5
124
- value = str.slice(idx, 5).to_i(HEX)
125
- return TryParse.new(value, 5) if valid_range? value
126
- end
127
-
128
- # Check for a 4-digit unicode value
129
- if has_enough_digits? str, idx, 4
130
- value = str.slice(idx, 4).to_i(HEX)
131
- return TryParse.new(value, 4) if valid_range? value
132
- end
133
- return TryParse.new(0, 4)
134
- end
135
- end
136
-
137
- # Class to hold the result of the tryParse method
138
- class TryParse
139
- attr_accessor :code_point
140
- attr_accessor :length
141
-
142
- def initialize(code, len)
143
- @code_point = code
144
- @length = len
145
- end
146
- end
147
- end
148
- end