zscan 2.0.3 → 2.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/bspec.c +2 -0
- data/ext/extconf.rb +12 -2
- data/ext/pack/internal-23.h +12 -0
- data/ext/pack/internal-25.h +1892 -0
- data/ext/pack/internal.h +1889 -9
- data/ext/pack/pack-23.c +2295 -0
- data/ext/pack/pack-25.c +1102 -0
- data/ext/pack/pack.c +170 -1363
- data/ext/zscan.c +3 -3
- data/lib/zscan.rb +1 -1
- data/rakefile +11 -3
- data/readme.md +1 -2
- data/spec/binary_scan_spec.rb +1 -1
- data/spec/spec_helper.rb +1 -7
- data/zscan.gemspec +2 -2
- metadata +25 -22
data/ext/pack/pack-25.c
ADDED
@@ -0,0 +1,1102 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
pack.c -
|
4
|
+
|
5
|
+
$Author$
|
6
|
+
created at: Thu Feb 10 15:17:05 JST 1994
|
7
|
+
|
8
|
+
Copyright (C) 1993-2007 Yukihiro Matsumoto
|
9
|
+
|
10
|
+
**********************************************************************/
|
11
|
+
|
12
|
+
#include "internal.h"
|
13
|
+
#include <sys/types.h>
|
14
|
+
#include <ctype.h>
|
15
|
+
#include <errno.h>
|
16
|
+
|
17
|
+
/*
|
18
|
+
* It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
|
19
|
+
* instead of HAVE_LONG_LONG or LONG_LONG.
|
20
|
+
* This means q! and Q! means always the standard long long type and
|
21
|
+
* causes ArgumentError for platforms which has no long long type,
|
22
|
+
* even if the platform has an implementation specific 64bit type.
|
23
|
+
* This behavior is consistent with the document of pack/unpack.
|
24
|
+
*/
|
25
|
+
#ifdef HAVE_TRUE_LONG_LONG
|
26
|
+
static const char natstr[] = "sSiIlLqQjJ";
|
27
|
+
#else
|
28
|
+
static const char natstr[] = "sSiIlLjJ";
|
29
|
+
#endif
|
30
|
+
static const char endstr[] = "sSiIlLqQjJ";
|
31
|
+
|
32
|
+
#ifdef HAVE_TRUE_LONG_LONG
|
33
|
+
/* It is intentional to use long long instead of LONG_LONG. */
|
34
|
+
# define NATINT_LEN_Q NATINT_LEN(long long, 8)
|
35
|
+
#else
|
36
|
+
# define NATINT_LEN_Q 8
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
|
40
|
+
# define NATINT_PACK
|
41
|
+
#endif
|
42
|
+
|
43
|
+
#ifdef DYNAMIC_ENDIAN
|
44
|
+
/* for universal binary of NEXTSTEP and MacOS X */
|
45
|
+
/* useless since autoconf 2.63? */
|
46
|
+
static int
|
47
|
+
is_bigendian(void)
|
48
|
+
{
|
49
|
+
static int init = 0;
|
50
|
+
static int endian_value;
|
51
|
+
char *p;
|
52
|
+
|
53
|
+
if (init) return endian_value;
|
54
|
+
init = 1;
|
55
|
+
p = (char*)&init;
|
56
|
+
return endian_value = p[0]?0:1;
|
57
|
+
}
|
58
|
+
# define BIGENDIAN_P() (is_bigendian())
|
59
|
+
#elif defined(WORDS_BIGENDIAN)
|
60
|
+
# define BIGENDIAN_P() 1
|
61
|
+
#else
|
62
|
+
# define BIGENDIAN_P() 0
|
63
|
+
#endif
|
64
|
+
|
65
|
+
#ifdef NATINT_PACK
|
66
|
+
# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
|
67
|
+
#else
|
68
|
+
# define NATINT_LEN(type,len) ((int)sizeof(type))
|
69
|
+
#endif
|
70
|
+
|
71
|
+
typedef union {
|
72
|
+
float f;
|
73
|
+
uint32_t u;
|
74
|
+
char buf[4];
|
75
|
+
} FLOAT_SWAPPER;
|
76
|
+
typedef union {
|
77
|
+
double d;
|
78
|
+
uint64_t u;
|
79
|
+
char buf[8];
|
80
|
+
} DOUBLE_SWAPPER;
|
81
|
+
#define swapf(x) swap32(x)
|
82
|
+
#define swapd(x) swap64(x)
|
83
|
+
|
84
|
+
#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
|
85
|
+
#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
|
86
|
+
#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
|
87
|
+
#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
|
88
|
+
#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
|
89
|
+
#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
|
90
|
+
#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
|
91
|
+
#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
|
92
|
+
|
93
|
+
#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
|
94
|
+
#define HTONF(x) ((x).u = rb_htonf((x).u))
|
95
|
+
#define HTOVF(x) ((x).u = rb_htovf((x).u))
|
96
|
+
#define NTOHF(x) ((x).u = rb_ntohf((x).u))
|
97
|
+
#define VTOHF(x) ((x).u = rb_vtohf((x).u))
|
98
|
+
|
99
|
+
#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
|
100
|
+
#define HTOND(x) ((x).u = rb_htond((x).u))
|
101
|
+
#define HTOVD(x) ((x).u = rb_htovd((x).u))
|
102
|
+
#define NTOHD(x) ((x).u = rb_ntohd((x).u))
|
103
|
+
#define VTOHD(x) ((x).u = rb_vtohd((x).u))
|
104
|
+
|
105
|
+
#define MAX_INTEGER_PACK_SIZE 8
|
106
|
+
|
107
|
+
static const char toofew[] = "too few arguments";
|
108
|
+
|
109
|
+
static void encodes(VALUE,const char*,long,int,int);
|
110
|
+
static void qpencode(VALUE,VALUE,long);
|
111
|
+
|
112
|
+
static unsigned long utf8_to_uv(const char*,long*);
|
113
|
+
|
114
|
+
static ID id_associated;
|
115
|
+
|
116
|
+
static void
|
117
|
+
str_associate(VALUE str, VALUE add)
|
118
|
+
{
|
119
|
+
/* assert(NIL_P(rb_attr_get(str, id_associated))); */
|
120
|
+
rb_ivar_set(str, id_associated, add);
|
121
|
+
}
|
122
|
+
|
123
|
+
static VALUE
|
124
|
+
str_associated(VALUE str)
|
125
|
+
{
|
126
|
+
return rb_ivar_lookup(str, id_associated, Qfalse);
|
127
|
+
}
|
128
|
+
|
129
|
+
static const char uu_table[] =
|
130
|
+
"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
|
131
|
+
static const char b64_table[] =
|
132
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
133
|
+
|
134
|
+
static void
|
135
|
+
encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
|
136
|
+
{
|
137
|
+
enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
|
138
|
+
char buff[buff_size + 1]; /* +1 for tail_lf */
|
139
|
+
long i = 0;
|
140
|
+
const char *const trans = type == 'u' ? uu_table : b64_table;
|
141
|
+
char padding;
|
142
|
+
const unsigned char *s = (const unsigned char *)s0;
|
143
|
+
|
144
|
+
if (type == 'u') {
|
145
|
+
buff[i++] = (char)len + ' ';
|
146
|
+
padding = '`';
|
147
|
+
}
|
148
|
+
else {
|
149
|
+
padding = '=';
|
150
|
+
}
|
151
|
+
while (len >= input_unit) {
|
152
|
+
while (len >= input_unit && buff_size-i >= encoded_unit) {
|
153
|
+
buff[i++] = trans[077 & (*s >> 2)];
|
154
|
+
buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
|
155
|
+
buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
|
156
|
+
buff[i++] = trans[077 & s[2]];
|
157
|
+
s += input_unit;
|
158
|
+
len -= input_unit;
|
159
|
+
}
|
160
|
+
if (buff_size-i < encoded_unit) {
|
161
|
+
rb_str_buf_cat(str, buff, i);
|
162
|
+
i = 0;
|
163
|
+
}
|
164
|
+
}
|
165
|
+
|
166
|
+
if (len == 2) {
|
167
|
+
buff[i++] = trans[077 & (*s >> 2)];
|
168
|
+
buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
|
169
|
+
buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
|
170
|
+
buff[i++] = padding;
|
171
|
+
}
|
172
|
+
else if (len == 1) {
|
173
|
+
buff[i++] = trans[077 & (*s >> 2)];
|
174
|
+
buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
|
175
|
+
buff[i++] = padding;
|
176
|
+
buff[i++] = padding;
|
177
|
+
}
|
178
|
+
if (tail_lf) buff[i++] = '\n';
|
179
|
+
rb_str_buf_cat(str, buff, i);
|
180
|
+
if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
|
181
|
+
}
|
182
|
+
|
183
|
+
static const char hex_table[] = "0123456789ABCDEF";
|
184
|
+
|
185
|
+
static void
|
186
|
+
qpencode(VALUE str, VALUE from, long len)
|
187
|
+
{
|
188
|
+
char buff[1024];
|
189
|
+
long i = 0, n = 0, prev = EOF;
|
190
|
+
unsigned char *s = (unsigned char*)RSTRING_PTR(from);
|
191
|
+
unsigned char *send = s + RSTRING_LEN(from);
|
192
|
+
|
193
|
+
while (s < send) {
|
194
|
+
if ((*s > 126) ||
|
195
|
+
(*s < 32 && *s != '\n' && *s != '\t') ||
|
196
|
+
(*s == '=')) {
|
197
|
+
buff[i++] = '=';
|
198
|
+
buff[i++] = hex_table[*s >> 4];
|
199
|
+
buff[i++] = hex_table[*s & 0x0f];
|
200
|
+
n += 3;
|
201
|
+
prev = EOF;
|
202
|
+
}
|
203
|
+
else if (*s == '\n') {
|
204
|
+
if (prev == ' ' || prev == '\t') {
|
205
|
+
buff[i++] = '=';
|
206
|
+
buff[i++] = *s;
|
207
|
+
}
|
208
|
+
buff[i++] = *s;
|
209
|
+
n = 0;
|
210
|
+
prev = *s;
|
211
|
+
}
|
212
|
+
else {
|
213
|
+
buff[i++] = *s;
|
214
|
+
n++;
|
215
|
+
prev = *s;
|
216
|
+
}
|
217
|
+
if (n > len) {
|
218
|
+
buff[i++] = '=';
|
219
|
+
buff[i++] = '\n';
|
220
|
+
n = 0;
|
221
|
+
prev = '\n';
|
222
|
+
}
|
223
|
+
if (i > 1024 - 5) {
|
224
|
+
rb_str_buf_cat(str, buff, i);
|
225
|
+
i = 0;
|
226
|
+
}
|
227
|
+
s++;
|
228
|
+
}
|
229
|
+
if (n > 0) {
|
230
|
+
buff[i++] = '=';
|
231
|
+
buff[i++] = '\n';
|
232
|
+
}
|
233
|
+
if (i > 0) {
|
234
|
+
rb_str_buf_cat(str, buff, i);
|
235
|
+
}
|
236
|
+
}
|
237
|
+
|
238
|
+
static inline int
|
239
|
+
hex2num(char c)
|
240
|
+
{
|
241
|
+
int n;
|
242
|
+
n = ruby_digit36_to_number_table[(unsigned char)c];
|
243
|
+
if (16 <= n)
|
244
|
+
n = -1;
|
245
|
+
return n;
|
246
|
+
}
|
247
|
+
|
248
|
+
#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
|
249
|
+
tmp_len = 0; \
|
250
|
+
if (len > (long)((send-s)/(sz))) { \
|
251
|
+
if (!star) { \
|
252
|
+
tmp_len = len-(send-s)/(sz); \
|
253
|
+
} \
|
254
|
+
len = (send-s)/(sz); \
|
255
|
+
} \
|
256
|
+
} while (0)
|
257
|
+
|
258
|
+
#define PACK_ITEM_ADJUST() do { \
|
259
|
+
if (tmp_len > 0 && mode == UNPACK_ARRAY) \
|
260
|
+
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
|
261
|
+
} while (0)
|
262
|
+
|
263
|
+
/* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
|
264
|
+
* with "-xO4" optimization option.
|
265
|
+
*/
|
266
|
+
#if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
|
267
|
+
# define AVOID_CC_BUG volatile
|
268
|
+
#else
|
269
|
+
# define AVOID_CC_BUG
|
270
|
+
#endif
|
271
|
+
|
272
|
+
static VALUE
|
273
|
+
infected_str_new(const char *ptr, long len, VALUE str)
|
274
|
+
{
|
275
|
+
VALUE s = rb_str_new(ptr, len);
|
276
|
+
|
277
|
+
OBJ_INFECT(s, str);
|
278
|
+
return s;
|
279
|
+
}
|
280
|
+
|
281
|
+
/* unpack mode */
|
282
|
+
#define UNPACK_ARRAY 0
|
283
|
+
#define UNPACK_BLOCK 1
|
284
|
+
#define UNPACK_1 2
|
285
|
+
|
286
|
+
#define castchar(from) (char)((from) & 0xff)
|
287
|
+
|
288
|
+
VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
289
|
+
{
|
290
|
+
#define hexdigits ruby_hexdigits
|
291
|
+
char *init_s, *s, *send;
|
292
|
+
char *p, *pend;
|
293
|
+
VALUE ary;
|
294
|
+
char type;
|
295
|
+
long len;
|
296
|
+
AVOID_CC_BUG long tmp_len;
|
297
|
+
int star;
|
298
|
+
#ifdef NATINT_PACK
|
299
|
+
int natint; /* native integer */
|
300
|
+
#endif
|
301
|
+
int signed_p, integer_size, bigendian_p;
|
302
|
+
int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
|
303
|
+
#define UNPACK_PUSH(item) do {\
|
304
|
+
VALUE item_val = (item);\
|
305
|
+
if ((mode) == UNPACK_BLOCK) {\
|
306
|
+
rb_yield(item_val);\
|
307
|
+
}\
|
308
|
+
else {\
|
309
|
+
rb_ary_push(ary, item_val);\
|
310
|
+
}\
|
311
|
+
} while (0)
|
312
|
+
|
313
|
+
StringValue(str);
|
314
|
+
StringValue(fmt);
|
315
|
+
init_s = s = RSTRING_PTR(str);
|
316
|
+
send = s + RSTRING_LEN(str);
|
317
|
+
p = RSTRING_PTR(fmt);
|
318
|
+
pend = p + RSTRING_LEN(fmt);
|
319
|
+
|
320
|
+
ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
|
321
|
+
while (p < pend) {
|
322
|
+
int explicit_endian = 0;
|
323
|
+
type = *p++;
|
324
|
+
#ifdef NATINT_PACK
|
325
|
+
natint = 0;
|
326
|
+
#endif
|
327
|
+
|
328
|
+
if (ISSPACE(type)) continue;
|
329
|
+
if (type == '#') {
|
330
|
+
while ((p < pend) && (*p != '\n')) {
|
331
|
+
p++;
|
332
|
+
}
|
333
|
+
continue;
|
334
|
+
}
|
335
|
+
|
336
|
+
star = 0;
|
337
|
+
{
|
338
|
+
modifiers:
|
339
|
+
switch (*p) {
|
340
|
+
case '_':
|
341
|
+
case '!':
|
342
|
+
|
343
|
+
if (strchr(natstr, type)) {
|
344
|
+
#ifdef NATINT_PACK
|
345
|
+
natint = 1;
|
346
|
+
#endif
|
347
|
+
p++;
|
348
|
+
}
|
349
|
+
else {
|
350
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
|
351
|
+
}
|
352
|
+
goto modifiers;
|
353
|
+
|
354
|
+
case '<':
|
355
|
+
case '>':
|
356
|
+
if (!strchr(endstr, type)) {
|
357
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
|
358
|
+
}
|
359
|
+
if (explicit_endian) {
|
360
|
+
rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
|
361
|
+
}
|
362
|
+
explicit_endian = *p++;
|
363
|
+
goto modifiers;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
|
367
|
+
if (p >= pend)
|
368
|
+
len = 1;
|
369
|
+
else if (*p == '*') {
|
370
|
+
star = 1;
|
371
|
+
len = send - s;
|
372
|
+
p++;
|
373
|
+
}
|
374
|
+
else if (ISDIGIT(*p)) {
|
375
|
+
errno = 0;
|
376
|
+
len = STRTOUL(p, (char**)&p, 10);
|
377
|
+
if (errno) {
|
378
|
+
rb_raise(rb_eRangeError, "pack length too big");
|
379
|
+
}
|
380
|
+
}
|
381
|
+
else {
|
382
|
+
len = (type != '@');
|
383
|
+
}
|
384
|
+
|
385
|
+
switch (type) {
|
386
|
+
case '%':
|
387
|
+
rb_raise(rb_eArgError, "%% is not supported");
|
388
|
+
break;
|
389
|
+
|
390
|
+
case 'A':
|
391
|
+
if (len > send - s) len = send - s;
|
392
|
+
{
|
393
|
+
long end = len;
|
394
|
+
char *t = s + len - 1;
|
395
|
+
|
396
|
+
while (t >= s) {
|
397
|
+
if (*t != ' ' && *t != '\0') break;
|
398
|
+
t--; len--;
|
399
|
+
}
|
400
|
+
UNPACK_PUSH(infected_str_new(s, len, str));
|
401
|
+
s += end;
|
402
|
+
}
|
403
|
+
break;
|
404
|
+
|
405
|
+
case 'Z':
|
406
|
+
{
|
407
|
+
char *t = s;
|
408
|
+
|
409
|
+
if (len > send-s) len = send-s;
|
410
|
+
while (t < s+len && *t) t++;
|
411
|
+
UNPACK_PUSH(infected_str_new(s, t-s, str));
|
412
|
+
if (t < send) t++;
|
413
|
+
s = star ? t : s+len;
|
414
|
+
}
|
415
|
+
break;
|
416
|
+
|
417
|
+
case 'a':
|
418
|
+
if (len > send - s) len = send - s;
|
419
|
+
UNPACK_PUSH(infected_str_new(s, len, str));
|
420
|
+
s += len;
|
421
|
+
break;
|
422
|
+
|
423
|
+
case 'b':
|
424
|
+
{
|
425
|
+
VALUE bitstr;
|
426
|
+
char *t;
|
427
|
+
int bits;
|
428
|
+
long i;
|
429
|
+
|
430
|
+
if (p[-1] == '*' || len > (send - s) * 8)
|
431
|
+
len = (send - s) * 8;
|
432
|
+
bits = 0;
|
433
|
+
bitstr = rb_usascii_str_new(0, len);
|
434
|
+
t = RSTRING_PTR(bitstr);
|
435
|
+
for (i=0; i<len; i++) {
|
436
|
+
if (i & 7) bits >>= 1;
|
437
|
+
else bits = (unsigned char)*s++;
|
438
|
+
*t++ = (bits & 1) ? '1' : '0';
|
439
|
+
}
|
440
|
+
UNPACK_PUSH(bitstr);
|
441
|
+
}
|
442
|
+
break;
|
443
|
+
|
444
|
+
case 'B':
|
445
|
+
{
|
446
|
+
VALUE bitstr;
|
447
|
+
char *t;
|
448
|
+
int bits;
|
449
|
+
long i;
|
450
|
+
|
451
|
+
if (p[-1] == '*' || len > (send - s) * 8)
|
452
|
+
len = (send - s) * 8;
|
453
|
+
bits = 0;
|
454
|
+
bitstr = rb_usascii_str_new(0, len);
|
455
|
+
t = RSTRING_PTR(bitstr);
|
456
|
+
for (i=0; i<len; i++) {
|
457
|
+
if (i & 7) bits <<= 1;
|
458
|
+
else bits = (unsigned char)*s++;
|
459
|
+
*t++ = (bits & 128) ? '1' : '0';
|
460
|
+
}
|
461
|
+
UNPACK_PUSH(bitstr);
|
462
|
+
}
|
463
|
+
break;
|
464
|
+
|
465
|
+
case 'h':
|
466
|
+
{
|
467
|
+
VALUE bitstr;
|
468
|
+
char *t;
|
469
|
+
int bits;
|
470
|
+
long i;
|
471
|
+
|
472
|
+
if (p[-1] == '*' || len > (send - s) * 2)
|
473
|
+
len = (send - s) * 2;
|
474
|
+
bits = 0;
|
475
|
+
bitstr = rb_usascii_str_new(0, len);
|
476
|
+
t = RSTRING_PTR(bitstr);
|
477
|
+
for (i=0; i<len; i++) {
|
478
|
+
if (i & 1)
|
479
|
+
bits >>= 4;
|
480
|
+
else
|
481
|
+
bits = (unsigned char)*s++;
|
482
|
+
*t++ = hexdigits[bits & 15];
|
483
|
+
}
|
484
|
+
UNPACK_PUSH(bitstr);
|
485
|
+
}
|
486
|
+
break;
|
487
|
+
|
488
|
+
case 'H':
|
489
|
+
{
|
490
|
+
VALUE bitstr;
|
491
|
+
char *t;
|
492
|
+
int bits;
|
493
|
+
long i;
|
494
|
+
|
495
|
+
if (p[-1] == '*' || len > (send - s) * 2)
|
496
|
+
len = (send - s) * 2;
|
497
|
+
bits = 0;
|
498
|
+
bitstr = rb_usascii_str_new(0, len);
|
499
|
+
t = RSTRING_PTR(bitstr);
|
500
|
+
for (i=0; i<len; i++) {
|
501
|
+
if (i & 1)
|
502
|
+
bits <<= 4;
|
503
|
+
else
|
504
|
+
bits = (unsigned char)*s++;
|
505
|
+
*t++ = hexdigits[(bits >> 4) & 15];
|
506
|
+
}
|
507
|
+
UNPACK_PUSH(bitstr);
|
508
|
+
}
|
509
|
+
break;
|
510
|
+
|
511
|
+
case 'c':
|
512
|
+
signed_p = 1;
|
513
|
+
integer_size = 1;
|
514
|
+
bigendian_p = BIGENDIAN_P(); /* not effective */
|
515
|
+
goto unpack_integer;
|
516
|
+
|
517
|
+
case 'C':
|
518
|
+
signed_p = 0;
|
519
|
+
integer_size = 1;
|
520
|
+
bigendian_p = BIGENDIAN_P(); /* not effective */
|
521
|
+
goto unpack_integer;
|
522
|
+
|
523
|
+
case 's':
|
524
|
+
signed_p = 1;
|
525
|
+
integer_size = NATINT_LEN(short, 2);
|
526
|
+
bigendian_p = BIGENDIAN_P();
|
527
|
+
goto unpack_integer;
|
528
|
+
|
529
|
+
case 'S':
|
530
|
+
signed_p = 0;
|
531
|
+
integer_size = NATINT_LEN(short, 2);
|
532
|
+
bigendian_p = BIGENDIAN_P();
|
533
|
+
goto unpack_integer;
|
534
|
+
|
535
|
+
case 'i':
|
536
|
+
signed_p = 1;
|
537
|
+
integer_size = (int)sizeof(int);
|
538
|
+
bigendian_p = BIGENDIAN_P();
|
539
|
+
goto unpack_integer;
|
540
|
+
|
541
|
+
case 'I':
|
542
|
+
signed_p = 0;
|
543
|
+
integer_size = (int)sizeof(int);
|
544
|
+
bigendian_p = BIGENDIAN_P();
|
545
|
+
goto unpack_integer;
|
546
|
+
|
547
|
+
case 'l':
|
548
|
+
signed_p = 1;
|
549
|
+
integer_size = NATINT_LEN(long, 4);
|
550
|
+
bigendian_p = BIGENDIAN_P();
|
551
|
+
goto unpack_integer;
|
552
|
+
|
553
|
+
case 'L':
|
554
|
+
signed_p = 0;
|
555
|
+
integer_size = NATINT_LEN(long, 4);
|
556
|
+
bigendian_p = BIGENDIAN_P();
|
557
|
+
goto unpack_integer;
|
558
|
+
|
559
|
+
case 'q':
|
560
|
+
signed_p = 1;
|
561
|
+
integer_size = NATINT_LEN_Q;
|
562
|
+
bigendian_p = BIGENDIAN_P();
|
563
|
+
goto unpack_integer;
|
564
|
+
|
565
|
+
case 'Q':
|
566
|
+
signed_p = 0;
|
567
|
+
integer_size = NATINT_LEN_Q;
|
568
|
+
bigendian_p = BIGENDIAN_P();
|
569
|
+
goto unpack_integer;
|
570
|
+
|
571
|
+
case 'j':
|
572
|
+
signed_p = 1;
|
573
|
+
integer_size = sizeof(intptr_t);
|
574
|
+
bigendian_p = BIGENDIAN_P();
|
575
|
+
goto unpack_integer;
|
576
|
+
|
577
|
+
case 'J':
|
578
|
+
signed_p = 0;
|
579
|
+
integer_size = sizeof(uintptr_t);
|
580
|
+
bigendian_p = BIGENDIAN_P();
|
581
|
+
goto unpack_integer;
|
582
|
+
|
583
|
+
case 'n':
|
584
|
+
signed_p = 0;
|
585
|
+
integer_size = 2;
|
586
|
+
bigendian_p = 1;
|
587
|
+
goto unpack_integer;
|
588
|
+
|
589
|
+
case 'N':
|
590
|
+
signed_p = 0;
|
591
|
+
integer_size = 4;
|
592
|
+
bigendian_p = 1;
|
593
|
+
goto unpack_integer;
|
594
|
+
|
595
|
+
case 'v':
|
596
|
+
signed_p = 0;
|
597
|
+
integer_size = 2;
|
598
|
+
bigendian_p = 0;
|
599
|
+
goto unpack_integer;
|
600
|
+
|
601
|
+
case 'V':
|
602
|
+
signed_p = 0;
|
603
|
+
integer_size = 4;
|
604
|
+
bigendian_p = 0;
|
605
|
+
goto unpack_integer;
|
606
|
+
|
607
|
+
unpack_integer:
|
608
|
+
if (explicit_endian) {
|
609
|
+
bigendian_p = explicit_endian == '>';
|
610
|
+
}
|
611
|
+
PACK_LENGTH_ADJUST_SIZE(integer_size);
|
612
|
+
while (len-- > 0) {
|
613
|
+
int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
|
614
|
+
VALUE val;
|
615
|
+
if (signed_p)
|
616
|
+
flags |= INTEGER_PACK_2COMP;
|
617
|
+
val = rb_integer_unpack(s, integer_size, 1, 0, flags);
|
618
|
+
UNPACK_PUSH(val);
|
619
|
+
s += integer_size;
|
620
|
+
}
|
621
|
+
PACK_ITEM_ADJUST();
|
622
|
+
break;
|
623
|
+
|
624
|
+
case 'f':
|
625
|
+
case 'F':
|
626
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
627
|
+
while (len-- > 0) {
|
628
|
+
float tmp;
|
629
|
+
memcpy(&tmp, s, sizeof(float));
|
630
|
+
s += sizeof(float);
|
631
|
+
UNPACK_PUSH(DBL2NUM((double)tmp));
|
632
|
+
}
|
633
|
+
PACK_ITEM_ADJUST();
|
634
|
+
break;
|
635
|
+
|
636
|
+
case 'e':
|
637
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
638
|
+
while (len-- > 0) {
|
639
|
+
FLOAT_CONVWITH(tmp);
|
640
|
+
memcpy(tmp.buf, s, sizeof(float));
|
641
|
+
s += sizeof(float);
|
642
|
+
VTOHF(tmp);
|
643
|
+
UNPACK_PUSH(DBL2NUM(tmp.f));
|
644
|
+
}
|
645
|
+
PACK_ITEM_ADJUST();
|
646
|
+
break;
|
647
|
+
|
648
|
+
case 'E':
|
649
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
650
|
+
while (len-- > 0) {
|
651
|
+
DOUBLE_CONVWITH(tmp);
|
652
|
+
memcpy(tmp.buf, s, sizeof(double));
|
653
|
+
s += sizeof(double);
|
654
|
+
VTOHD(tmp);
|
655
|
+
UNPACK_PUSH(DBL2NUM(tmp.d));
|
656
|
+
}
|
657
|
+
PACK_ITEM_ADJUST();
|
658
|
+
break;
|
659
|
+
|
660
|
+
case 'D':
|
661
|
+
case 'd':
|
662
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
663
|
+
while (len-- > 0) {
|
664
|
+
double tmp;
|
665
|
+
memcpy(&tmp, s, sizeof(double));
|
666
|
+
s += sizeof(double);
|
667
|
+
UNPACK_PUSH(DBL2NUM(tmp));
|
668
|
+
}
|
669
|
+
PACK_ITEM_ADJUST();
|
670
|
+
break;
|
671
|
+
|
672
|
+
case 'g':
|
673
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
674
|
+
while (len-- > 0) {
|
675
|
+
FLOAT_CONVWITH(tmp);
|
676
|
+
memcpy(tmp.buf, s, sizeof(float));
|
677
|
+
s += sizeof(float);
|
678
|
+
NTOHF(tmp);
|
679
|
+
UNPACK_PUSH(DBL2NUM(tmp.f));
|
680
|
+
}
|
681
|
+
PACK_ITEM_ADJUST();
|
682
|
+
break;
|
683
|
+
|
684
|
+
case 'G':
|
685
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
686
|
+
while (len-- > 0) {
|
687
|
+
DOUBLE_CONVWITH(tmp);
|
688
|
+
memcpy(tmp.buf, s, sizeof(double));
|
689
|
+
s += sizeof(double);
|
690
|
+
NTOHD(tmp);
|
691
|
+
UNPACK_PUSH(DBL2NUM(tmp.d));
|
692
|
+
}
|
693
|
+
PACK_ITEM_ADJUST();
|
694
|
+
break;
|
695
|
+
|
696
|
+
case 'U':
|
697
|
+
if (len > send - s) len = send - s;
|
698
|
+
while (len > 0 && s < send) {
|
699
|
+
long alen = send - s;
|
700
|
+
unsigned long l;
|
701
|
+
|
702
|
+
l = utf8_to_uv(s, &alen);
|
703
|
+
s += alen; len--;
|
704
|
+
UNPACK_PUSH(ULONG2NUM(l));
|
705
|
+
}
|
706
|
+
break;
|
707
|
+
|
708
|
+
case 'u':
|
709
|
+
{
|
710
|
+
VALUE buf = infected_str_new(0, (send - s)*3/4, str);
|
711
|
+
char *ptr = RSTRING_PTR(buf);
|
712
|
+
long total = 0;
|
713
|
+
|
714
|
+
while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
|
715
|
+
long a,b,c,d;
|
716
|
+
char hunk[3];
|
717
|
+
|
718
|
+
len = ((unsigned char)*s++ - ' ') & 077;
|
719
|
+
|
720
|
+
total += len;
|
721
|
+
if (total > RSTRING_LEN(buf)) {
|
722
|
+
len -= total - RSTRING_LEN(buf);
|
723
|
+
total = RSTRING_LEN(buf);
|
724
|
+
}
|
725
|
+
|
726
|
+
while (len > 0) {
|
727
|
+
long mlen = len > 3 ? 3 : len;
|
728
|
+
|
729
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
730
|
+
a = ((unsigned char)*s++ - ' ') & 077;
|
731
|
+
else
|
732
|
+
a = 0;
|
733
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
734
|
+
b = ((unsigned char)*s++ - ' ') & 077;
|
735
|
+
else
|
736
|
+
b = 0;
|
737
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
738
|
+
c = ((unsigned char)*s++ - ' ') & 077;
|
739
|
+
else
|
740
|
+
c = 0;
|
741
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
742
|
+
d = ((unsigned char)*s++ - ' ') & 077;
|
743
|
+
else
|
744
|
+
d = 0;
|
745
|
+
hunk[0] = (char)(a << 2 | b >> 4);
|
746
|
+
hunk[1] = (char)(b << 4 | c >> 2);
|
747
|
+
hunk[2] = (char)(c << 6 | d);
|
748
|
+
memcpy(ptr, hunk, mlen);
|
749
|
+
ptr += mlen;
|
750
|
+
len -= mlen;
|
751
|
+
}
|
752
|
+
if (s < send && (unsigned char)*s != '\r' && *s != '\n')
|
753
|
+
s++; /* possible checksum byte */
|
754
|
+
if (s < send && *s == '\r') s++;
|
755
|
+
if (s < send && *s == '\n') s++;
|
756
|
+
}
|
757
|
+
|
758
|
+
rb_str_set_len(buf, total);
|
759
|
+
UNPACK_PUSH(buf);
|
760
|
+
}
|
761
|
+
break;
|
762
|
+
|
763
|
+
case 'm':
|
764
|
+
{
|
765
|
+
VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
|
766
|
+
char *ptr = RSTRING_PTR(buf);
|
767
|
+
int a = -1,b = -1,c = 0,d = 0;
|
768
|
+
static signed char b64_xtable[256];
|
769
|
+
|
770
|
+
if (b64_xtable['/'] <= 0) {
|
771
|
+
int i;
|
772
|
+
|
773
|
+
for (i = 0; i < 256; i++) {
|
774
|
+
b64_xtable[i] = -1;
|
775
|
+
}
|
776
|
+
for (i = 0; i < 64; i++) {
|
777
|
+
b64_xtable[(unsigned char)b64_table[i]] = (char)i;
|
778
|
+
}
|
779
|
+
}
|
780
|
+
if (len == 0) {
|
781
|
+
while (s < send) {
|
782
|
+
a = b = c = d = -1;
|
783
|
+
a = b64_xtable[(unsigned char)*s++];
|
784
|
+
if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
|
785
|
+
b = b64_xtable[(unsigned char)*s++];
|
786
|
+
if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
|
787
|
+
if (*s == '=') {
|
788
|
+
if (s + 2 == send && *(s + 1) == '=') break;
|
789
|
+
rb_raise(rb_eArgError, "invalid base64");
|
790
|
+
}
|
791
|
+
c = b64_xtable[(unsigned char)*s++];
|
792
|
+
if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
|
793
|
+
if (s + 1 == send && *s == '=') break;
|
794
|
+
d = b64_xtable[(unsigned char)*s++];
|
795
|
+
if (d == -1) rb_raise(rb_eArgError, "invalid base64");
|
796
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
797
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
798
|
+
*ptr++ = castchar(c << 6 | d);
|
799
|
+
}
|
800
|
+
if (c == -1) {
|
801
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
802
|
+
if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
|
803
|
+
}
|
804
|
+
else if (d == -1) {
|
805
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
806
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
807
|
+
if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
|
808
|
+
}
|
809
|
+
}
|
810
|
+
else {
|
811
|
+
while (s < send) {
|
812
|
+
a = b = c = d = -1;
|
813
|
+
while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
|
814
|
+
if (s >= send) break;
|
815
|
+
s++;
|
816
|
+
while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
|
817
|
+
if (s >= send) break;
|
818
|
+
s++;
|
819
|
+
while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
|
820
|
+
if (*s == '=' || s >= send) break;
|
821
|
+
s++;
|
822
|
+
while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
|
823
|
+
if (*s == '=' || s >= send) break;
|
824
|
+
s++;
|
825
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
826
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
827
|
+
*ptr++ = castchar(c << 6 | d);
|
828
|
+
a = -1;
|
829
|
+
}
|
830
|
+
if (a != -1 && b != -1) {
|
831
|
+
if (c == -1)
|
832
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
833
|
+
else {
|
834
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
835
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
836
|
+
}
|
837
|
+
}
|
838
|
+
}
|
839
|
+
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
|
840
|
+
UNPACK_PUSH(buf);
|
841
|
+
}
|
842
|
+
break;
|
843
|
+
|
844
|
+
case 'M':
|
845
|
+
{
|
846
|
+
VALUE buf = infected_str_new(0, send - s, str);
|
847
|
+
char *ptr = RSTRING_PTR(buf), *ss = s;
|
848
|
+
int c1, c2;
|
849
|
+
|
850
|
+
while (s < send) {
|
851
|
+
if (*s == '=') {
|
852
|
+
if (++s == send) break;
|
853
|
+
if (s+1 < send && *s == '\r' && *(s+1) == '\n')
|
854
|
+
s++;
|
855
|
+
if (*s != '\n') {
|
856
|
+
if ((c1 = hex2num(*s)) == -1) break;
|
857
|
+
if (++s == send) break;
|
858
|
+
if ((c2 = hex2num(*s)) == -1) break;
|
859
|
+
*ptr++ = castchar(c1 << 4 | c2);
|
860
|
+
}
|
861
|
+
}
|
862
|
+
else {
|
863
|
+
*ptr++ = *s;
|
864
|
+
}
|
865
|
+
s++;
|
866
|
+
ss = s;
|
867
|
+
}
|
868
|
+
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
|
869
|
+
rb_str_buf_cat(buf, ss, send-ss);
|
870
|
+
ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
|
871
|
+
UNPACK_PUSH(buf);
|
872
|
+
}
|
873
|
+
break;
|
874
|
+
|
875
|
+
case '@':
|
876
|
+
if (len > RSTRING_LEN(str))
|
877
|
+
rb_raise(rb_eArgError, "@ outside of string");
|
878
|
+
s = RSTRING_PTR(str) + len;
|
879
|
+
break;
|
880
|
+
|
881
|
+
case 'X':
|
882
|
+
if (len > s - RSTRING_PTR(str))
|
883
|
+
rb_raise(rb_eArgError, "X outside of string");
|
884
|
+
s -= len;
|
885
|
+
break;
|
886
|
+
|
887
|
+
case 'x':
|
888
|
+
if (len > send - s)
|
889
|
+
rb_raise(rb_eArgError, "x outside of string");
|
890
|
+
s += len;
|
891
|
+
break;
|
892
|
+
|
893
|
+
case 'P':
|
894
|
+
if (sizeof(char *) <= (size_t)(send - s)) {
|
895
|
+
VALUE tmp = Qnil;
|
896
|
+
char *t;
|
897
|
+
|
898
|
+
memcpy(&t, s, sizeof(char *));
|
899
|
+
s += sizeof(char *);
|
900
|
+
|
901
|
+
if (t) {
|
902
|
+
VALUE a;
|
903
|
+
const VALUE *p, *pend;
|
904
|
+
|
905
|
+
if (!(a = str_associated(str))) {
|
906
|
+
rb_raise(rb_eArgError, "no associated pointer");
|
907
|
+
}
|
908
|
+
p = RARRAY_CONST_PTR(a);
|
909
|
+
pend = p + RARRAY_LEN(a);
|
910
|
+
while (p < pend) {
|
911
|
+
if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
|
912
|
+
if (len < RSTRING_LEN(*p)) {
|
913
|
+
tmp = rb_tainted_str_new(t, len);
|
914
|
+
str_associate(tmp, a);
|
915
|
+
}
|
916
|
+
else {
|
917
|
+
tmp = *p;
|
918
|
+
}
|
919
|
+
break;
|
920
|
+
}
|
921
|
+
p++;
|
922
|
+
}
|
923
|
+
if (p == pend) {
|
924
|
+
rb_raise(rb_eArgError, "non associated pointer");
|
925
|
+
}
|
926
|
+
}
|
927
|
+
UNPACK_PUSH(tmp);
|
928
|
+
}
|
929
|
+
break;
|
930
|
+
|
931
|
+
case 'p':
|
932
|
+
if (len > (long)((send - s) / sizeof(char *)))
|
933
|
+
len = (send - s) / sizeof(char *);
|
934
|
+
while (len-- > 0) {
|
935
|
+
if ((size_t)(send - s) < sizeof(char *))
|
936
|
+
break;
|
937
|
+
else {
|
938
|
+
VALUE tmp = Qnil;
|
939
|
+
char *t;
|
940
|
+
|
941
|
+
memcpy(&t, s, sizeof(char *));
|
942
|
+
s += sizeof(char *);
|
943
|
+
|
944
|
+
if (t) {
|
945
|
+
VALUE a;
|
946
|
+
const VALUE *p, *pend;
|
947
|
+
|
948
|
+
if (!(a = str_associated(str))) {
|
949
|
+
rb_raise(rb_eArgError, "no associated pointer");
|
950
|
+
}
|
951
|
+
p = RARRAY_CONST_PTR(a);
|
952
|
+
pend = p + RARRAY_LEN(a);
|
953
|
+
while (p < pend) {
|
954
|
+
if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
|
955
|
+
tmp = *p;
|
956
|
+
break;
|
957
|
+
}
|
958
|
+
p++;
|
959
|
+
}
|
960
|
+
if (p == pend) {
|
961
|
+
rb_raise(rb_eArgError, "non associated pointer");
|
962
|
+
}
|
963
|
+
}
|
964
|
+
UNPACK_PUSH(tmp);
|
965
|
+
}
|
966
|
+
}
|
967
|
+
break;
|
968
|
+
|
969
|
+
case 'w':
|
970
|
+
{
|
971
|
+
char *s0 = s;
|
972
|
+
while (len > 0 && s < send) {
|
973
|
+
if (*s & 0x80) {
|
974
|
+
s++;
|
975
|
+
}
|
976
|
+
else {
|
977
|
+
s++;
|
978
|
+
UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
|
979
|
+
len--;
|
980
|
+
s0 = s;
|
981
|
+
}
|
982
|
+
}
|
983
|
+
}
|
984
|
+
break;
|
985
|
+
|
986
|
+
default:
|
987
|
+
rb_warning("unknown unpack directive '%c' in '%s'",
|
988
|
+
type, RSTRING_PTR(fmt));
|
989
|
+
break;
|
990
|
+
}
|
991
|
+
}
|
992
|
+
|
993
|
+
*parsed_len = s - init_s;
|
994
|
+
return ary;
|
995
|
+
}
|
996
|
+
|
997
|
+
int
|
998
|
+
rb_uv_to_utf8(char buf[6], unsigned long uv)
|
999
|
+
{
|
1000
|
+
if (uv <= 0x7f) {
|
1001
|
+
buf[0] = (char)uv;
|
1002
|
+
return 1;
|
1003
|
+
}
|
1004
|
+
if (uv <= 0x7ff) {
|
1005
|
+
buf[0] = castchar(((uv>>6)&0xff)|0xc0);
|
1006
|
+
buf[1] = castchar((uv&0x3f)|0x80);
|
1007
|
+
return 2;
|
1008
|
+
}
|
1009
|
+
if (uv <= 0xffff) {
|
1010
|
+
buf[0] = castchar(((uv>>12)&0xff)|0xe0);
|
1011
|
+
buf[1] = castchar(((uv>>6)&0x3f)|0x80);
|
1012
|
+
buf[2] = castchar((uv&0x3f)|0x80);
|
1013
|
+
return 3;
|
1014
|
+
}
|
1015
|
+
if (uv <= 0x1fffff) {
|
1016
|
+
buf[0] = castchar(((uv>>18)&0xff)|0xf0);
|
1017
|
+
buf[1] = castchar(((uv>>12)&0x3f)|0x80);
|
1018
|
+
buf[2] = castchar(((uv>>6)&0x3f)|0x80);
|
1019
|
+
buf[3] = castchar((uv&0x3f)|0x80);
|
1020
|
+
return 4;
|
1021
|
+
}
|
1022
|
+
if (uv <= 0x3ffffff) {
|
1023
|
+
buf[0] = castchar(((uv>>24)&0xff)|0xf8);
|
1024
|
+
buf[1] = castchar(((uv>>18)&0x3f)|0x80);
|
1025
|
+
buf[2] = castchar(((uv>>12)&0x3f)|0x80);
|
1026
|
+
buf[3] = castchar(((uv>>6)&0x3f)|0x80);
|
1027
|
+
buf[4] = castchar((uv&0x3f)|0x80);
|
1028
|
+
return 5;
|
1029
|
+
}
|
1030
|
+
if (uv <= 0x7fffffff) {
|
1031
|
+
buf[0] = castchar(((uv>>30)&0xff)|0xfc);
|
1032
|
+
buf[1] = castchar(((uv>>24)&0x3f)|0x80);
|
1033
|
+
buf[2] = castchar(((uv>>18)&0x3f)|0x80);
|
1034
|
+
buf[3] = castchar(((uv>>12)&0x3f)|0x80);
|
1035
|
+
buf[4] = castchar(((uv>>6)&0x3f)|0x80);
|
1036
|
+
buf[5] = castchar((uv&0x3f)|0x80);
|
1037
|
+
return 6;
|
1038
|
+
}
|
1039
|
+
rb_raise(rb_eRangeError, "pack(U): value out of range");
|
1040
|
+
|
1041
|
+
UNREACHABLE;
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
static const unsigned long utf8_limits[] = {
|
1045
|
+
0x0, /* 1 */
|
1046
|
+
0x80, /* 2 */
|
1047
|
+
0x800, /* 3 */
|
1048
|
+
0x10000, /* 4 */
|
1049
|
+
0x200000, /* 5 */
|
1050
|
+
0x4000000, /* 6 */
|
1051
|
+
0x80000000, /* 7 */
|
1052
|
+
};
|
1053
|
+
|
1054
|
+
static unsigned long
|
1055
|
+
utf8_to_uv(const char *p, long *lenp)
|
1056
|
+
{
|
1057
|
+
int c = *p++ & 0xff;
|
1058
|
+
unsigned long uv = c;
|
1059
|
+
long n;
|
1060
|
+
|
1061
|
+
if (!(uv & 0x80)) {
|
1062
|
+
*lenp = 1;
|
1063
|
+
return uv;
|
1064
|
+
}
|
1065
|
+
if (!(uv & 0x40)) {
|
1066
|
+
*lenp = 1;
|
1067
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
1068
|
+
}
|
1069
|
+
|
1070
|
+
if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
|
1071
|
+
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
|
1072
|
+
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
|
1073
|
+
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
|
1074
|
+
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
|
1075
|
+
else {
|
1076
|
+
*lenp = 1;
|
1077
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
1078
|
+
}
|
1079
|
+
if (n > *lenp) {
|
1080
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
|
1081
|
+
n, *lenp);
|
1082
|
+
}
|
1083
|
+
*lenp = n--;
|
1084
|
+
if (n != 0) {
|
1085
|
+
while (n--) {
|
1086
|
+
c = *p++ & 0xff;
|
1087
|
+
if ((c & 0xc0) != 0x80) {
|
1088
|
+
*lenp -= n + 1;
|
1089
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
1090
|
+
}
|
1091
|
+
else {
|
1092
|
+
c &= 0x3f;
|
1093
|
+
uv = uv << 6 | c;
|
1094
|
+
}
|
1095
|
+
}
|
1096
|
+
}
|
1097
|
+
n = *lenp - 1;
|
1098
|
+
if (uv < utf8_limits[n]) {
|
1099
|
+
rb_raise(rb_eArgError, "redundant UTF-8 sequence");
|
1100
|
+
}
|
1101
|
+
return uv;
|
1102
|
+
}
|