taps-taps 0.3.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,182 @@
1
+ require 'zlib'
2
+ require 'stringio'
3
+ require 'time'
4
+ require 'tempfile'
5
+ require 'rest_client'
6
+
7
+ require 'taps/errors'
8
+ require 'taps/chunksize'
9
+
10
+ module Taps
11
+ module Utils
12
+ extend self
13
+
14
+ def windows?
15
+ return @windows if defined?(@windows)
16
+ require 'rbconfig'
17
+ @windows = !!(::Config::CONFIG['host_os'] =~ /mswin|mingw/)
18
+ end
19
+
20
+ def bin(cmd)
21
+ cmd = "#{cmd}.cmd" if windows?
22
+ cmd
23
+ end
24
+
25
+ def checksum(data)
26
+ Zlib.crc32(data)
27
+ end
28
+
29
+ def valid_data?(data, crc32)
30
+ Zlib.crc32(data) == crc32.to_i
31
+ end
32
+
33
+ def base64encode(data)
34
+ [data].pack("m")
35
+ end
36
+
37
+ def base64decode(data)
38
+ data.unpack("m").first
39
+ end
40
+
41
+ def format_data(data, opts={})
42
+ return {} if data.size == 0
43
+ string_columns = opts[:string_columns] || []
44
+ schema = opts[:schema] || []
45
+ table = opts[:table]
46
+
47
+ max_lengths = schema.inject({}) do |hash, (column, meta)|
48
+ if meta[:db_type] =~ /^varchar\((\d+)\)/
49
+ hash.update(column => $1.to_i)
50
+ end
51
+ hash
52
+ end
53
+
54
+ header = data[0].keys
55
+ only_data = data.collect do |row|
56
+ row = blobs_to_string(row, string_columns)
57
+ row.each do |column, data|
58
+ if data.to_s.length > (max_lengths[column] || data.to_s.length)
59
+ raise Taps::InvalidData.new(<<-ERROR)
60
+ Detected data that exceeds the length limitation of its column. This is
61
+ generally due to the fact that SQLite does not enforce length restrictions.
62
+
63
+ Table : #{table}
64
+ Column : #{column}
65
+ Type : #{schema.detect{|s| s.first == column}.last[:db_type]}
66
+ Data : #{data}
67
+ ERROR
68
+ end
69
+ end
70
+ header.collect { |h| row[h] }
71
+ end
72
+ { :header => header, :data => only_data }
73
+ end
74
+
75
+ # mysql text and blobs fields are handled the same way internally
76
+ # this is not true for other databases so we must check if the field is
77
+ # actually text and manually convert it back to a string
78
+ def incorrect_blobs(db, table)
79
+ return [] if (db.url =~ /mysql:\/\//).nil?
80
+
81
+ columns = []
82
+ db.schema(table).each do |data|
83
+ column, cdata = data
84
+ columns << column if cdata[:db_type] =~ /text/
85
+ end
86
+ columns
87
+ end
88
+
89
+ def blobs_to_string(row, columns)
90
+ return row if columns.size == 0
91
+ columns.each do |c|
92
+ row[c] = row[c].to_s if row[c].kind_of?(Sequel::SQL::Blob)
93
+ end
94
+ row
95
+ end
96
+
97
+ def calculate_chunksize(old_chunksize)
98
+ c = Taps::Chunksize.new(old_chunksize)
99
+
100
+ begin
101
+ c.start_time = Time.now
102
+ c.time_in_db = yield c
103
+ rescue Errno::EPIPE, RestClient::RequestFailed, RestClient::RequestTimeout
104
+ c.retries += 1
105
+ raise if c.retries > 2
106
+
107
+ # we got disconnected, the chunksize could be too large
108
+ # reset the chunksize based on the number of retries
109
+ c.reset_chunksize
110
+ retry
111
+ end
112
+
113
+ c.end_time = Time.now
114
+ c.calc_new_chunksize
115
+ end
116
+
117
+ def load_schema(database_url, schema_data)
118
+ Tempfile.open('taps') do |tmp|
119
+ File.open(tmp.path, 'w') { |f| f.write(schema_data) }
120
+ schema_bin(:load, database_url, tmp.path)
121
+ end
122
+ end
123
+
124
+ def load_indexes(database_url, index_data)
125
+ Tempfile.open('taps') do |tmp|
126
+ File.open(tmp.path, 'w') { |f| f.write(index_data) }
127
+ schema_bin(:load_indexes, database_url, tmp.path)
128
+ end
129
+ end
130
+
131
+ def schema_bin(*args)
132
+ bin_path = File.expand_path("#{File.dirname(__FILE__)}/../../bin/#{bin('schema')}")
133
+ `"#{bin_path}" #{args.map { |a| "'#{a}'" }.join(' ')}`
134
+ end
135
+
136
+ def primary_key(db, table)
137
+ db.schema(table).select { |c| c[1][:primary_key] }.map { |c| c[0] }
138
+ end
139
+
140
+ def single_integer_primary_key(db, table)
141
+ table = table.to_sym.identifier unless table.kind_of?(Sequel::SQL::Identifier)
142
+ keys = db.schema(table).select { |c| c[1][:primary_key] and c[1][:type] == :integer }
143
+ not keys.nil? and keys.size == 1
144
+ end
145
+
146
+ def order_by(db, table)
147
+ pkey = primary_key(db, table)
148
+ if pkey
149
+ pkey.kind_of?(Array) ? pkey : [pkey.to_sym]
150
+ else
151
+ table = table.to_sym.identifier unless table.kind_of?(Sequel::SQL::Identifier)
152
+ db[table].columns
153
+ end
154
+ end
155
+
156
+
157
+ # try to detect server side errors to
158
+ # give the client a more useful error message
159
+ def server_error_handling(&blk)
160
+ begin
161
+ blk.call
162
+ rescue Sequel::DatabaseError => e
163
+ if e.message =~ /duplicate key value/i
164
+ raise Taps::DuplicatePrimaryKeyError, e.message
165
+ else
166
+ raise
167
+ end
168
+ end
169
+ end
170
+
171
+ def reraise_server_exception(e)
172
+ if e.kind_of?(RestClient::Exception)
173
+ if e.respond_to?(:response) && e.response.headers[:content_type] == 'application/json'
174
+ json = OkJson.decode(e.response.to_s)
175
+ klass = eval(json['error_class']) rescue nil
176
+ raise klass.new(json['error_message'], :backtrace => json['error_backtrace']) if klass
177
+ end
178
+ end
179
+ raise e
180
+ end
181
+ end
182
+ end
@@ -0,0 +1,18 @@
1
+ require "yaml"
2
+
3
+ module Taps
4
+ def self.version_yml
5
+ @@version_yml ||= YAML.load(File.read(File.dirname(__FILE__) + '/../../VERSION.yml'))
6
+ end
7
+
8
+ def self.version
9
+ version = "#{version_yml[:major]}.#{version_yml[:minor]}.#{version_yml[:patch]}"
10
+ version += ".#{version_yml[:build]}" if version_yml[:build]
11
+ version
12
+ end
13
+
14
+ def self.compatible_version
15
+ "#{version_yml[:major]}.#{version_yml[:minor]}"
16
+ end
17
+ end
18
+
@@ -0,0 +1,555 @@
1
+ # Copyright 2011 Keith Rarick
2
+ #
3
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ # of this software and associated documentation files (the "Software"), to deal
5
+ # in the Software without restriction, including without limitation the rights
6
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ # copies of the Software, and to permit persons to whom the Software is
8
+ # furnished to do so, subject to the following conditions:
9
+ #
10
+ # The above copyright notice and this permission notice shall be included in
11
+ # all copies or substantial portions of the Software.
12
+ #
13
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ # THE SOFTWARE.
20
+
21
+ # See https://github.com/kr/okjson for updates.
22
+
23
+ require 'stringio'
24
+
25
+ # Some parts adapted from
26
+ # http://golang.org/src/pkg/json/decode.go and
27
+ # http://golang.org/src/pkg/utf8/utf8.go
28
+ module OkJson
29
+ extend self
30
+
31
+ class ParserError < ::StandardError; end
32
+
33
+ # Decodes a json document in string s and
34
+ # returns the corresponding ruby value.
35
+ # String s must be valid UTF-8. If you have
36
+ # a string in some other encoding, convert
37
+ # it first.
38
+ #
39
+ # String values in the resulting structure
40
+ # will be UTF-8.
41
+ def decode(s)
42
+ ts = lex(s)
43
+ v, ts = textparse(ts)
44
+ if ts.length > 0
45
+ raise OkJson::ParserError, 'trailing garbage'
46
+ end
47
+ v
48
+ end
49
+
50
+
51
+ # Parses a "json text" in the sense of RFC 4627.
52
+ # Returns the parsed value and any trailing tokens.
53
+ # Note: this is almost the same as valparse,
54
+ # except that it does not accept atomic values.
55
+ def textparse(ts)
56
+ if ts.length < 0
57
+ raise OkJson::ParserError, 'empty'
58
+ end
59
+
60
+ typ, _, val = ts[0]
61
+ case typ
62
+ when '{' then objparse(ts)
63
+ when '[' then arrparse(ts)
64
+ else valparse(ts)
65
+ end
66
+ end
67
+
68
+
69
+ # Parses a "value" in the sense of RFC 4627.
70
+ # Returns the parsed value and any trailing tokens.
71
+ def valparse(ts)
72
+ if ts.length < 0
73
+ raise OkJson::ParserError, 'empty'
74
+ end
75
+
76
+ typ, _, val = ts[0]
77
+ case typ
78
+ when '{' then objparse(ts)
79
+ when '[' then arrparse(ts)
80
+ when :val,:str then [val, ts[1..-1]]
81
+ else
82
+ raise OkJson::ParserError, "unexpected #{val.inspect}"
83
+ end
84
+ end
85
+
86
+
87
+ # Parses an "object" in the sense of RFC 4627.
88
+ # Returns the parsed value and any trailing tokens.
89
+ def objparse(ts)
90
+ ts = eat('{', ts)
91
+ obj = {}
92
+
93
+ if ts[0][0] == '}'
94
+ return obj, ts[1..-1]
95
+ end
96
+
97
+ k, v, ts = pairparse(ts)
98
+ obj[k] = v
99
+
100
+ if ts[0][0] == '}'
101
+ return obj, ts[1..-1]
102
+ end
103
+
104
+ loop do
105
+ ts = eat(',', ts)
106
+
107
+ k, v, ts = pairparse(ts)
108
+ obj[k] = v
109
+
110
+ if ts[0][0] == '}'
111
+ return obj, ts[1..-1]
112
+ end
113
+ end
114
+ end
115
+
116
+
117
+ # Parses a "member" in the sense of RFC 4627.
118
+ # Returns the parsed value and any trailing tokens.
119
+ def pairparse(ts)
120
+ (typ, _, k), ts = ts[0], ts[1..-1]
121
+ if typ != :str
122
+ raise OkJson::ParserError, "unexpected #{k.inspect}"
123
+ end
124
+ ts = eat(':', ts)
125
+ v, ts = valparse(ts)
126
+ [k, v, ts]
127
+ end
128
+
129
+
130
+ # Parses an "array" in the sense of RFC 4627.
131
+ # Returns the parsed value and any trailing tokens.
132
+ def arrparse(ts)
133
+ ts = eat('[', ts)
134
+ arr = []
135
+
136
+ if ts[0][0] == ']'
137
+ return arr, ts[1..-1]
138
+ end
139
+
140
+ v, ts = valparse(ts)
141
+ arr << v
142
+
143
+ if ts[0][0] == ']'
144
+ return arr, ts[1..-1]
145
+ end
146
+
147
+ loop do
148
+ ts = eat(',', ts)
149
+
150
+ v, ts = valparse(ts)
151
+ arr << v
152
+
153
+ if ts[0][0] == ']'
154
+ return arr, ts[1..-1]
155
+ end
156
+ end
157
+ end
158
+
159
+
160
+ def eat(typ, ts)
161
+ if ts[0][0] != typ
162
+ raise OkJson::ParserError, "expected #{typ} (got #{ts[0].inspect})"
163
+ end
164
+ ts[1..-1]
165
+ end
166
+
167
+
168
+ # Sans s and returns a list of json tokens,
169
+ # excluding white space (as defined in RFC 4627).
170
+ def lex(s)
171
+ ts = []
172
+ while s.length > 0
173
+ typ, lexeme, val = tok(s)
174
+ if typ == nil
175
+ raise OkJson::ParserError, "invalid character at #{s[0,10].inspect}"
176
+ end
177
+ if typ != :space
178
+ ts << [typ, lexeme, val]
179
+ end
180
+ s = s[lexeme.length..-1]
181
+ end
182
+ ts
183
+ end
184
+
185
+
186
+ # Scans the first token in s and
187
+ # returns a 3-element list, or nil
188
+ # if no such token exists.
189
+ #
190
+ # The first list element is one of
191
+ # '{', '}', ':', ',', '[', ']',
192
+ # :val, :str, and :space.
193
+ #
194
+ # The second element is the lexeme.
195
+ #
196
+ # The third element is the value of the
197
+ # token for :val and :str, otherwise
198
+ # it is the lexeme.
199
+ def tok(s)
200
+ case s[0]
201
+ when ?{ then ['{', s[0,1], s[0,1]]
202
+ when ?} then ['}', s[0,1], s[0,1]]
203
+ when ?: then [':', s[0,1], s[0,1]]
204
+ when ?, then [',', s[0,1], s[0,1]]
205
+ when ?[ then ['[', s[0,1], s[0,1]]
206
+ when ?] then [']', s[0,1], s[0,1]]
207
+ when ?n then nulltok(s)
208
+ when ?t then truetok(s)
209
+ when ?f then falsetok(s)
210
+ when ?" then strtok(s)
211
+ when Spc then [:space, s[0,1], s[0,1]]
212
+ when ?\t then [:space, s[0,1], s[0,1]]
213
+ when ?\n then [:space, s[0,1], s[0,1]]
214
+ when ?\r then [:space, s[0,1], s[0,1]]
215
+ else numtok(s)
216
+ end
217
+ end
218
+
219
+
220
+ def nulltok(s); s[0,4] == 'null' && [:val, 'null', nil] end
221
+ def truetok(s); s[0,4] == 'true' && [:val, 'true', true] end
222
+ def falsetok(s); s[0,5] == 'false' && [:val, 'false', false] end
223
+
224
+
225
+ def numtok(s)
226
+ m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
227
+ if m && m.begin(0) == 0
228
+ if m[3] && !m[2]
229
+ [:val, m[0], Integer(m[1])*(10**Integer(m[3][1..-1]))]
230
+ elsif m[2]
231
+ [:val, m[0], Float(m[0])]
232
+ else
233
+ [:val, m[0], Integer(m[0])]
234
+ end
235
+ end
236
+ end
237
+
238
+
239
+ def strtok(s)
240
+ m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
241
+ if ! m
242
+ raise OkJson::ParserError, "invalid string literal at #{abbrev(s)}"
243
+ end
244
+ [:str, m[0], unquote(m[0])]
245
+ end
246
+
247
+
248
+ def abbrev(s)
249
+ t = s[0,10]
250
+ p = t['`']
251
+ t = t[0,p] if p
252
+ t = t + '...' if t.length < s.length
253
+ '`' + t + '`'
254
+ end
255
+
256
+
257
+ # Converts a quoted json string literal q into a UTF-8-encoded string.
258
+ # The rules are different than for Ruby, so we cannot use eval.
259
+ # Unquote will raise OkJson::ParserError, an error if q contains control characters.
260
+ def unquote(q)
261
+ q = q[1...-1]
262
+ a = q.dup # allocate a big enough string
263
+ r, w = 0, 0
264
+ while r < q.length
265
+ c = q[r]
266
+ case true
267
+ when c == ?\\
268
+ r += 1
269
+ if r >= q.length
270
+ raise OkJson::ParserError, "string literal ends with a \"\\\": \"#{q}\""
271
+ end
272
+
273
+ case q[r]
274
+ when ?",?\\,?/,?'
275
+ a[w] = q[r]
276
+ r += 1
277
+ w += 1
278
+ when ?b,?f,?n,?r,?t
279
+ a[w] = Unesc[q[r]]
280
+ r += 1
281
+ w += 1
282
+ when ?u
283
+ r += 1
284
+ uchar = begin
285
+ hexdec4(q[r,4])
286
+ rescue RuntimeError => e
287
+ raise OkJson::ParserError, "invalid escape sequence \\u#{q[r,4]}: #{e}"
288
+ end
289
+ r += 4
290
+ if surrogate? uchar
291
+ if q.length >= r+6
292
+ uchar1 = hexdec4(q[r+2,4])
293
+ uchar = subst(uchar, uchar1)
294
+ if uchar != Ucharerr
295
+ # A valid pair; consume.
296
+ r += 6
297
+ end
298
+ end
299
+ end
300
+ w += ucharenc(a, w, uchar)
301
+ else
302
+ raise OkJson::ParserError, "invalid escape char #{q[r]} in \"#{q}\""
303
+ end
304
+ when c == ?", c < Spc
305
+ raise OkJson::ParserError, "invalid character in string literal \"#{q}\""
306
+ else
307
+ # Copy anything else byte-for-byte.
308
+ # Valid UTF-8 will remain valid UTF-8.
309
+ # Invalid UTF-8 will remain invalid UTF-8.
310
+ a[w] = c
311
+ r += 1
312
+ w += 1
313
+ end
314
+ end
315
+ a[0,w]
316
+ end
317
+
318
+
319
+ def hexdec4(s)
320
+ if s.length != 4
321
+ raise OkJson::ParserError, 'short'
322
+ end
323
+ (nibble(s[0])<<12) | (nibble(s[1])<<8) | (nibble(s[2])<<4) | nibble(s[3])
324
+ end
325
+
326
+
327
+ def subst(u1, u2)
328
+ if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
329
+ return ((u1-Usurr1)<<10) | (u2-Usurr2) + Usurrself
330
+ end
331
+ return Ucharerr
332
+ end
333
+
334
+
335
+ def unsubst(u)
336
+ if u < Usurrself || u > Umax || surrogate?(u)
337
+ return Ucharerr, Ucharerr
338
+ end
339
+ u -= Usurrself
340
+ [Usurr1 + ((u>>10)&0x3ff), Usurr2 + (u&0x3ff)]
341
+ end
342
+
343
+
344
+ def surrogate?(u)
345
+ Usurr1 <= u && u < Usurr3
346
+ end
347
+
348
+
349
+ def nibble(c)
350
+ case true
351
+ when ?0 <= c && c <= ?9 then c.ord - ?0.ord
352
+ when ?a <= c && c <= ?z then c.ord - ?a.ord + 10
353
+ when ?A <= c && c <= ?Z then c.ord - ?A.ord + 10
354
+ else
355
+ raise OkJson::ParserError, "invalid hex code #{c}"
356
+ end
357
+ end
358
+
359
+
360
+ # Encodes x into a json text. It may contain only
361
+ # Array, Hash, String, Numeric, true, false, nil.
362
+ # (Note, this list excludes Symbol.)
363
+ # Strings contained in x must be valid UTF-8.
364
+ # Values that cannot be represented, such as
365
+ # Nan, Infinity, Symbol, and Proc, are encoded
366
+ # as null, in accordance with ECMA-262, 5th ed.
367
+ def encode(x)
368
+ case x
369
+ when Hash then objenc(x)
370
+ when Array then arrenc(x)
371
+ when String then strenc(x)
372
+ when Numeric then numenc(x)
373
+ when Symbol then strenc(x.to_s)
374
+ when true then "true"
375
+ when false then "false"
376
+ when nil then "null"
377
+ else "null"
378
+ end
379
+ end
380
+
381
+
382
+ def objenc(x)
383
+ '{' + x.map{|k,v| encode(k) + ':' + encode(v)}.join(',') + '}'
384
+ end
385
+
386
+
387
+ def arrenc(a)
388
+ '[' + a.map{|x| encode(x)}.join(',') + ']'
389
+ end
390
+
391
+
392
+ def strenc(s)
393
+ t = StringIO.new
394
+ t.putc(?")
395
+ r = 0
396
+ while r < s.length
397
+ case s[r]
398
+ when ?" then t.print('\\"')
399
+ when ?\\ then t.print('\\\\')
400
+ when ?\b then t.print('\\b')
401
+ when ?\f then t.print('\\f')
402
+ when ?\n then t.print('\\n')
403
+ when ?\r then t.print('\\r')
404
+ when ?\t then t.print('\\t')
405
+ else
406
+ c = s[r]
407
+ case true
408
+ when Spc <= c && c <= ?~
409
+ t.putc(c)
410
+ when true
411
+ u, size = uchardec(s, r)
412
+ r += size - 1 # we add one more at the bottom of the loop
413
+ if u < 0x10000
414
+ t.print('\\u')
415
+ hexenc4(t, u)
416
+ else
417
+ u1, u2 = unsubst(u)
418
+ t.print('\\u')
419
+ hexenc4(t, u1)
420
+ t.print('\\u')
421
+ hexenc4(t, u2)
422
+ end
423
+ else
424
+ # invalid byte; skip it
425
+ end
426
+ end
427
+ r += 1
428
+ end
429
+ t.putc(?")
430
+ t.string
431
+ end
432
+
433
+
434
+ def hexenc4(t, u)
435
+ t.putc(Hex[(u>>12)&0xf])
436
+ t.putc(Hex[(u>>8)&0xf])
437
+ t.putc(Hex[(u>>4)&0xf])
438
+ t.putc(Hex[u&0xf])
439
+ end
440
+
441
+
442
+ def numenc(x)
443
+ if x.nan? || x.infinite?
444
+ return 'null'
445
+ end rescue nil
446
+ "#{x}"
447
+ end
448
+
449
+
450
+ # Decodes unicode character u from UTF-8
451
+ # bytes in string s at position i.
452
+ # Returns u and the number of bytes read.
453
+ def uchardec(s, i)
454
+ n = s.length - i
455
+ return [Ucharerr, 1] if n < 1
456
+
457
+ c0 = s[i].ord
458
+
459
+ # 1-byte, 7-bit sequence?
460
+ if c0 < Utagx
461
+ return [c0, 1]
462
+ end
463
+
464
+ # unexpected continuation byte?
465
+ return [Ucharerr, 1] if c0 < Utag2
466
+
467
+ # need continuation byte
468
+ return [Ucharerr, 1] if n < 2
469
+ c1 = s[i+1].ord
470
+ return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1
471
+
472
+ # 2-byte, 11-bit sequence?
473
+ if c0 < Utag3
474
+ u = (c0&Umask2)<<6 | (c1&Umaskx)
475
+ return [Ucharerr, 1] if u <= Uchar1max
476
+ return [u, 2]
477
+ end
478
+
479
+ # need second continuation byte
480
+ return [Ucharerr, 1] if n < 3
481
+ c2 = s[i+2].ord
482
+ return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2
483
+
484
+ # 3-byte, 16-bit sequence?
485
+ if c0 < Utag4
486
+ u = (c0&Umask3)<<12 | (c1&Umaskx)<<6 | (c2&Umaskx)
487
+ return [Ucharerr, 1] if u <= Uchar2max
488
+ return [u, 3]
489
+ end
490
+
491
+ # need third continuation byte
492
+ return [Ucharerr, 1] if n < 4
493
+ c3 = s[i+3].ord
494
+ return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3
495
+
496
+ # 4-byte, 21-bit sequence?
497
+ if c0 < Utag5
498
+ u = (c0&Umask4)<<18 | (c1&Umaskx)<<12 | (c2&Umaskx)<<6 | (c3&Umaskx)
499
+ return [Ucharerr, 1] if u <= Uchar3max
500
+ return [u, 4]
501
+ end
502
+
503
+ return [Ucharerr, 1]
504
+ end
505
+
506
+
507
+ # Encodes unicode character u as UTF-8
508
+ # bytes in string a at position i.
509
+ # Returns the number of bytes written.
510
+ def ucharenc(a, i, u)
511
+ case true
512
+ when u <= Uchar1max
513
+ a[i] = (u & 0xff).chr
514
+ 1
515
+ when u <= Uchar2max
516
+ a[i+0] = (Utag2 | ((u>>6)&0xff)).chr
517
+ a[i+1] = (Utagx | (u&Umaskx)).chr
518
+ 2
519
+ when u <= Uchar3max
520
+ a[i+0] = (Utag3 | ((u>>12)&0xff)).chr
521
+ a[i+1] = (Utagx | ((u>>6)&Umaskx)).chr
522
+ a[i+2] = (Utagx | (u&Umaskx)).chr
523
+ 3
524
+ else
525
+ a[i+0] = (Utag4 | ((u>>18)&0xff)).chr
526
+ a[i+1] = (Utagx | ((u>>12)&Umaskx)).chr
527
+ a[i+2] = (Utagx | ((u>>6)&Umaskx)).chr
528
+ a[i+3] = (Utagx | (u&Umaskx)).chr
529
+ 4
530
+ end
531
+ end
532
+
533
+ Utagx = 0x80 # 1000 0000
534
+ Utag2 = 0xc0 # 1100 0000
535
+ Utag3 = 0xe0 # 1110 0000
536
+ Utag4 = 0xf0 # 1111 0000
537
+ Utag5 = 0xF8 # 1111 1000
538
+ Umaskx = 0x3f # 0011 1111
539
+ Umask2 = 0x1f # 0001 1111
540
+ Umask3 = 0x0f # 0000 1111
541
+ Umask4 = 0x07 # 0000 0111
542
+ Uchar1max = (1<<7) - 1
543
+ Uchar2max = (1<<11) - 1
544
+ Uchar3max = (1<<16) - 1
545
+ Ucharerr = 0xFFFD # unicode "replacement char"
546
+ Usurrself = 0x10000
547
+ Usurr1 = 0xd800
548
+ Usurr2 = 0xdc00
549
+ Usurr3 = 0xe000
550
+ Umax = 0x10ffff
551
+
552
+ Spc = ' '[0]
553
+ Unesc = {?b=>?\b, ?f=>?\f, ?n=>?\n, ?r=>?\r, ?t=>?\t}
554
+ Hex = '0123456789abcdef'
555
+ end