zscan 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,1102 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author$
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "internal.h"
13
+ #include <sys/types.h>
14
+ #include <ctype.h>
15
+ #include <errno.h>
16
+
17
+ /*
18
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
19
+ * instead of HAVE_LONG_LONG or LONG_LONG.
20
+ * This means q! and Q! means always the standard long long type and
21
+ * causes ArgumentError for platforms which has no long long type,
22
+ * even if the platform has an implementation specific 64bit type.
23
+ * This behavior is consistent with the document of pack/unpack.
24
+ */
25
+ #ifdef HAVE_TRUE_LONG_LONG
26
+ static const char natstr[] = "sSiIlLqQjJ";
27
+ #else
28
+ static const char natstr[] = "sSiIlLjJ";
29
+ #endif
30
+ static const char endstr[] = "sSiIlLqQjJ";
31
+
32
+ #ifdef HAVE_TRUE_LONG_LONG
33
+ /* It is intentional to use long long instead of LONG_LONG. */
34
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
35
+ #else
36
+ # define NATINT_LEN_Q 8
37
+ #endif
38
+
39
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
40
+ # define NATINT_PACK
41
+ #endif
42
+
43
+ #ifdef DYNAMIC_ENDIAN
44
+ /* for universal binary of NEXTSTEP and MacOS X */
45
+ /* useless since autoconf 2.63? */
46
+ static int
47
+ is_bigendian(void)
48
+ {
49
+ static int init = 0;
50
+ static int endian_value;
51
+ char *p;
52
+
53
+ if (init) return endian_value;
54
+ init = 1;
55
+ p = (char*)&init;
56
+ return endian_value = p[0]?0:1;
57
+ }
58
+ # define BIGENDIAN_P() (is_bigendian())
59
+ #elif defined(WORDS_BIGENDIAN)
60
+ # define BIGENDIAN_P() 1
61
+ #else
62
+ # define BIGENDIAN_P() 0
63
+ #endif
64
+
65
+ #ifdef NATINT_PACK
66
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
67
+ #else
68
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
69
+ #endif
70
+
71
+ typedef union {
72
+ float f;
73
+ uint32_t u;
74
+ char buf[4];
75
+ } FLOAT_SWAPPER;
76
+ typedef union {
77
+ double d;
78
+ uint64_t u;
79
+ char buf[8];
80
+ } DOUBLE_SWAPPER;
81
+ #define swapf(x) swap32(x)
82
+ #define swapd(x) swap64(x)
83
+
84
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
85
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
86
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
87
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
88
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
89
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
90
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
91
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
92
+
93
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
94
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
95
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
96
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
97
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
98
+
99
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
100
+ #define HTOND(x) ((x).u = rb_htond((x).u))
101
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
102
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
103
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
104
+
105
+ #define MAX_INTEGER_PACK_SIZE 8
106
+
107
+ static const char toofew[] = "too few arguments";
108
+
109
+ static void encodes(VALUE,const char*,long,int,int);
110
+ static void qpencode(VALUE,VALUE,long);
111
+
112
+ static unsigned long utf8_to_uv(const char*,long*);
113
+
114
+ static ID id_associated;
115
+
116
+ static void
117
+ str_associate(VALUE str, VALUE add)
118
+ {
119
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
120
+ rb_ivar_set(str, id_associated, add);
121
+ }
122
+
123
+ static VALUE
124
+ str_associated(VALUE str)
125
+ {
126
+ return rb_ivar_lookup(str, id_associated, Qfalse);
127
+ }
128
+
129
+ static const char uu_table[] =
130
+ "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
131
+ static const char b64_table[] =
132
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
133
+
134
+ static void
135
+ encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
136
+ {
137
+ enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
138
+ char buff[buff_size + 1]; /* +1 for tail_lf */
139
+ long i = 0;
140
+ const char *const trans = type == 'u' ? uu_table : b64_table;
141
+ char padding;
142
+ const unsigned char *s = (const unsigned char *)s0;
143
+
144
+ if (type == 'u') {
145
+ buff[i++] = (char)len + ' ';
146
+ padding = '`';
147
+ }
148
+ else {
149
+ padding = '=';
150
+ }
151
+ while (len >= input_unit) {
152
+ while (len >= input_unit && buff_size-i >= encoded_unit) {
153
+ buff[i++] = trans[077 & (*s >> 2)];
154
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
155
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
156
+ buff[i++] = trans[077 & s[2]];
157
+ s += input_unit;
158
+ len -= input_unit;
159
+ }
160
+ if (buff_size-i < encoded_unit) {
161
+ rb_str_buf_cat(str, buff, i);
162
+ i = 0;
163
+ }
164
+ }
165
+
166
+ if (len == 2) {
167
+ buff[i++] = trans[077 & (*s >> 2)];
168
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
169
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
170
+ buff[i++] = padding;
171
+ }
172
+ else if (len == 1) {
173
+ buff[i++] = trans[077 & (*s >> 2)];
174
+ buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
175
+ buff[i++] = padding;
176
+ buff[i++] = padding;
177
+ }
178
+ if (tail_lf) buff[i++] = '\n';
179
+ rb_str_buf_cat(str, buff, i);
180
+ if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
181
+ }
182
+
183
+ static const char hex_table[] = "0123456789ABCDEF";
184
+
185
+ static void
186
+ qpencode(VALUE str, VALUE from, long len)
187
+ {
188
+ char buff[1024];
189
+ long i = 0, n = 0, prev = EOF;
190
+ unsigned char *s = (unsigned char*)RSTRING_PTR(from);
191
+ unsigned char *send = s + RSTRING_LEN(from);
192
+
193
+ while (s < send) {
194
+ if ((*s > 126) ||
195
+ (*s < 32 && *s != '\n' && *s != '\t') ||
196
+ (*s == '=')) {
197
+ buff[i++] = '=';
198
+ buff[i++] = hex_table[*s >> 4];
199
+ buff[i++] = hex_table[*s & 0x0f];
200
+ n += 3;
201
+ prev = EOF;
202
+ }
203
+ else if (*s == '\n') {
204
+ if (prev == ' ' || prev == '\t') {
205
+ buff[i++] = '=';
206
+ buff[i++] = *s;
207
+ }
208
+ buff[i++] = *s;
209
+ n = 0;
210
+ prev = *s;
211
+ }
212
+ else {
213
+ buff[i++] = *s;
214
+ n++;
215
+ prev = *s;
216
+ }
217
+ if (n > len) {
218
+ buff[i++] = '=';
219
+ buff[i++] = '\n';
220
+ n = 0;
221
+ prev = '\n';
222
+ }
223
+ if (i > 1024 - 5) {
224
+ rb_str_buf_cat(str, buff, i);
225
+ i = 0;
226
+ }
227
+ s++;
228
+ }
229
+ if (n > 0) {
230
+ buff[i++] = '=';
231
+ buff[i++] = '\n';
232
+ }
233
+ if (i > 0) {
234
+ rb_str_buf_cat(str, buff, i);
235
+ }
236
+ }
237
+
238
+ static inline int
239
+ hex2num(char c)
240
+ {
241
+ int n;
242
+ n = ruby_digit36_to_number_table[(unsigned char)c];
243
+ if (16 <= n)
244
+ n = -1;
245
+ return n;
246
+ }
247
+
248
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
249
+ tmp_len = 0; \
250
+ if (len > (long)((send-s)/(sz))) { \
251
+ if (!star) { \
252
+ tmp_len = len-(send-s)/(sz); \
253
+ } \
254
+ len = (send-s)/(sz); \
255
+ } \
256
+ } while (0)
257
+
258
+ #define PACK_ITEM_ADJUST() do { \
259
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
260
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
261
+ } while (0)
262
+
263
+ /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
264
+ * with "-xO4" optimization option.
265
+ */
266
+ #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
267
+ # define AVOID_CC_BUG volatile
268
+ #else
269
+ # define AVOID_CC_BUG
270
+ #endif
271
+
272
+ static VALUE
273
+ infected_str_new(const char *ptr, long len, VALUE str)
274
+ {
275
+ VALUE s = rb_str_new(ptr, len);
276
+
277
+ OBJ_INFECT(s, str);
278
+ return s;
279
+ }
280
+
281
+ /* unpack mode */
282
+ #define UNPACK_ARRAY 0
283
+ #define UNPACK_BLOCK 1
284
+ #define UNPACK_1 2
285
+
286
+ #define castchar(from) (char)((from) & 0xff)
287
+
288
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
289
+ {
290
+ #define hexdigits ruby_hexdigits
291
+ char *init_s, *s, *send;
292
+ char *p, *pend;
293
+ VALUE ary;
294
+ char type;
295
+ long len;
296
+ AVOID_CC_BUG long tmp_len;
297
+ int star;
298
+ #ifdef NATINT_PACK
299
+ int natint; /* native integer */
300
+ #endif
301
+ int signed_p, integer_size, bigendian_p;
302
+ int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
303
+ #define UNPACK_PUSH(item) do {\
304
+ VALUE item_val = (item);\
305
+ if ((mode) == UNPACK_BLOCK) {\
306
+ rb_yield(item_val);\
307
+ }\
308
+ else {\
309
+ rb_ary_push(ary, item_val);\
310
+ }\
311
+ } while (0)
312
+
313
+ StringValue(str);
314
+ StringValue(fmt);
315
+ init_s = s = RSTRING_PTR(str);
316
+ send = s + RSTRING_LEN(str);
317
+ p = RSTRING_PTR(fmt);
318
+ pend = p + RSTRING_LEN(fmt);
319
+
320
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
321
+ while (p < pend) {
322
+ int explicit_endian = 0;
323
+ type = *p++;
324
+ #ifdef NATINT_PACK
325
+ natint = 0;
326
+ #endif
327
+
328
+ if (ISSPACE(type)) continue;
329
+ if (type == '#') {
330
+ while ((p < pend) && (*p != '\n')) {
331
+ p++;
332
+ }
333
+ continue;
334
+ }
335
+
336
+ star = 0;
337
+ {
338
+ modifiers:
339
+ switch (*p) {
340
+ case '_':
341
+ case '!':
342
+
343
+ if (strchr(natstr, type)) {
344
+ #ifdef NATINT_PACK
345
+ natint = 1;
346
+ #endif
347
+ p++;
348
+ }
349
+ else {
350
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
351
+ }
352
+ goto modifiers;
353
+
354
+ case '<':
355
+ case '>':
356
+ if (!strchr(endstr, type)) {
357
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
358
+ }
359
+ if (explicit_endian) {
360
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
361
+ }
362
+ explicit_endian = *p++;
363
+ goto modifiers;
364
+ }
365
+ }
366
+
367
+ if (p >= pend)
368
+ len = 1;
369
+ else if (*p == '*') {
370
+ star = 1;
371
+ len = send - s;
372
+ p++;
373
+ }
374
+ else if (ISDIGIT(*p)) {
375
+ errno = 0;
376
+ len = STRTOUL(p, (char**)&p, 10);
377
+ if (errno) {
378
+ rb_raise(rb_eRangeError, "pack length too big");
379
+ }
380
+ }
381
+ else {
382
+ len = (type != '@');
383
+ }
384
+
385
+ switch (type) {
386
+ case '%':
387
+ rb_raise(rb_eArgError, "%% is not supported");
388
+ break;
389
+
390
+ case 'A':
391
+ if (len > send - s) len = send - s;
392
+ {
393
+ long end = len;
394
+ char *t = s + len - 1;
395
+
396
+ while (t >= s) {
397
+ if (*t != ' ' && *t != '\0') break;
398
+ t--; len--;
399
+ }
400
+ UNPACK_PUSH(infected_str_new(s, len, str));
401
+ s += end;
402
+ }
403
+ break;
404
+
405
+ case 'Z':
406
+ {
407
+ char *t = s;
408
+
409
+ if (len > send-s) len = send-s;
410
+ while (t < s+len && *t) t++;
411
+ UNPACK_PUSH(infected_str_new(s, t-s, str));
412
+ if (t < send) t++;
413
+ s = star ? t : s+len;
414
+ }
415
+ break;
416
+
417
+ case 'a':
418
+ if (len > send - s) len = send - s;
419
+ UNPACK_PUSH(infected_str_new(s, len, str));
420
+ s += len;
421
+ break;
422
+
423
+ case 'b':
424
+ {
425
+ VALUE bitstr;
426
+ char *t;
427
+ int bits;
428
+ long i;
429
+
430
+ if (p[-1] == '*' || len > (send - s) * 8)
431
+ len = (send - s) * 8;
432
+ bits = 0;
433
+ bitstr = rb_usascii_str_new(0, len);
434
+ t = RSTRING_PTR(bitstr);
435
+ for (i=0; i<len; i++) {
436
+ if (i & 7) bits >>= 1;
437
+ else bits = (unsigned char)*s++;
438
+ *t++ = (bits & 1) ? '1' : '0';
439
+ }
440
+ UNPACK_PUSH(bitstr);
441
+ }
442
+ break;
443
+
444
+ case 'B':
445
+ {
446
+ VALUE bitstr;
447
+ char *t;
448
+ int bits;
449
+ long i;
450
+
451
+ if (p[-1] == '*' || len > (send - s) * 8)
452
+ len = (send - s) * 8;
453
+ bits = 0;
454
+ bitstr = rb_usascii_str_new(0, len);
455
+ t = RSTRING_PTR(bitstr);
456
+ for (i=0; i<len; i++) {
457
+ if (i & 7) bits <<= 1;
458
+ else bits = (unsigned char)*s++;
459
+ *t++ = (bits & 128) ? '1' : '0';
460
+ }
461
+ UNPACK_PUSH(bitstr);
462
+ }
463
+ break;
464
+
465
+ case 'h':
466
+ {
467
+ VALUE bitstr;
468
+ char *t;
469
+ int bits;
470
+ long i;
471
+
472
+ if (p[-1] == '*' || len > (send - s) * 2)
473
+ len = (send - s) * 2;
474
+ bits = 0;
475
+ bitstr = rb_usascii_str_new(0, len);
476
+ t = RSTRING_PTR(bitstr);
477
+ for (i=0; i<len; i++) {
478
+ if (i & 1)
479
+ bits >>= 4;
480
+ else
481
+ bits = (unsigned char)*s++;
482
+ *t++ = hexdigits[bits & 15];
483
+ }
484
+ UNPACK_PUSH(bitstr);
485
+ }
486
+ break;
487
+
488
+ case 'H':
489
+ {
490
+ VALUE bitstr;
491
+ char *t;
492
+ int bits;
493
+ long i;
494
+
495
+ if (p[-1] == '*' || len > (send - s) * 2)
496
+ len = (send - s) * 2;
497
+ bits = 0;
498
+ bitstr = rb_usascii_str_new(0, len);
499
+ t = RSTRING_PTR(bitstr);
500
+ for (i=0; i<len; i++) {
501
+ if (i & 1)
502
+ bits <<= 4;
503
+ else
504
+ bits = (unsigned char)*s++;
505
+ *t++ = hexdigits[(bits >> 4) & 15];
506
+ }
507
+ UNPACK_PUSH(bitstr);
508
+ }
509
+ break;
510
+
511
+ case 'c':
512
+ signed_p = 1;
513
+ integer_size = 1;
514
+ bigendian_p = BIGENDIAN_P(); /* not effective */
515
+ goto unpack_integer;
516
+
517
+ case 'C':
518
+ signed_p = 0;
519
+ integer_size = 1;
520
+ bigendian_p = BIGENDIAN_P(); /* not effective */
521
+ goto unpack_integer;
522
+
523
+ case 's':
524
+ signed_p = 1;
525
+ integer_size = NATINT_LEN(short, 2);
526
+ bigendian_p = BIGENDIAN_P();
527
+ goto unpack_integer;
528
+
529
+ case 'S':
530
+ signed_p = 0;
531
+ integer_size = NATINT_LEN(short, 2);
532
+ bigendian_p = BIGENDIAN_P();
533
+ goto unpack_integer;
534
+
535
+ case 'i':
536
+ signed_p = 1;
537
+ integer_size = (int)sizeof(int);
538
+ bigendian_p = BIGENDIAN_P();
539
+ goto unpack_integer;
540
+
541
+ case 'I':
542
+ signed_p = 0;
543
+ integer_size = (int)sizeof(int);
544
+ bigendian_p = BIGENDIAN_P();
545
+ goto unpack_integer;
546
+
547
+ case 'l':
548
+ signed_p = 1;
549
+ integer_size = NATINT_LEN(long, 4);
550
+ bigendian_p = BIGENDIAN_P();
551
+ goto unpack_integer;
552
+
553
+ case 'L':
554
+ signed_p = 0;
555
+ integer_size = NATINT_LEN(long, 4);
556
+ bigendian_p = BIGENDIAN_P();
557
+ goto unpack_integer;
558
+
559
+ case 'q':
560
+ signed_p = 1;
561
+ integer_size = NATINT_LEN_Q;
562
+ bigendian_p = BIGENDIAN_P();
563
+ goto unpack_integer;
564
+
565
+ case 'Q':
566
+ signed_p = 0;
567
+ integer_size = NATINT_LEN_Q;
568
+ bigendian_p = BIGENDIAN_P();
569
+ goto unpack_integer;
570
+
571
+ case 'j':
572
+ signed_p = 1;
573
+ integer_size = sizeof(intptr_t);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto unpack_integer;
576
+
577
+ case 'J':
578
+ signed_p = 0;
579
+ integer_size = sizeof(uintptr_t);
580
+ bigendian_p = BIGENDIAN_P();
581
+ goto unpack_integer;
582
+
583
+ case 'n':
584
+ signed_p = 0;
585
+ integer_size = 2;
586
+ bigendian_p = 1;
587
+ goto unpack_integer;
588
+
589
+ case 'N':
590
+ signed_p = 0;
591
+ integer_size = 4;
592
+ bigendian_p = 1;
593
+ goto unpack_integer;
594
+
595
+ case 'v':
596
+ signed_p = 0;
597
+ integer_size = 2;
598
+ bigendian_p = 0;
599
+ goto unpack_integer;
600
+
601
+ case 'V':
602
+ signed_p = 0;
603
+ integer_size = 4;
604
+ bigendian_p = 0;
605
+ goto unpack_integer;
606
+
607
+ unpack_integer:
608
+ if (explicit_endian) {
609
+ bigendian_p = explicit_endian == '>';
610
+ }
611
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
612
+ while (len-- > 0) {
613
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
614
+ VALUE val;
615
+ if (signed_p)
616
+ flags |= INTEGER_PACK_2COMP;
617
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
618
+ UNPACK_PUSH(val);
619
+ s += integer_size;
620
+ }
621
+ PACK_ITEM_ADJUST();
622
+ break;
623
+
624
+ case 'f':
625
+ case 'F':
626
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
627
+ while (len-- > 0) {
628
+ float tmp;
629
+ memcpy(&tmp, s, sizeof(float));
630
+ s += sizeof(float);
631
+ UNPACK_PUSH(DBL2NUM((double)tmp));
632
+ }
633
+ PACK_ITEM_ADJUST();
634
+ break;
635
+
636
+ case 'e':
637
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
638
+ while (len-- > 0) {
639
+ FLOAT_CONVWITH(tmp);
640
+ memcpy(tmp.buf, s, sizeof(float));
641
+ s += sizeof(float);
642
+ VTOHF(tmp);
643
+ UNPACK_PUSH(DBL2NUM(tmp.f));
644
+ }
645
+ PACK_ITEM_ADJUST();
646
+ break;
647
+
648
+ case 'E':
649
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
650
+ while (len-- > 0) {
651
+ DOUBLE_CONVWITH(tmp);
652
+ memcpy(tmp.buf, s, sizeof(double));
653
+ s += sizeof(double);
654
+ VTOHD(tmp);
655
+ UNPACK_PUSH(DBL2NUM(tmp.d));
656
+ }
657
+ PACK_ITEM_ADJUST();
658
+ break;
659
+
660
+ case 'D':
661
+ case 'd':
662
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
663
+ while (len-- > 0) {
664
+ double tmp;
665
+ memcpy(&tmp, s, sizeof(double));
666
+ s += sizeof(double);
667
+ UNPACK_PUSH(DBL2NUM(tmp));
668
+ }
669
+ PACK_ITEM_ADJUST();
670
+ break;
671
+
672
+ case 'g':
673
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
674
+ while (len-- > 0) {
675
+ FLOAT_CONVWITH(tmp);
676
+ memcpy(tmp.buf, s, sizeof(float));
677
+ s += sizeof(float);
678
+ NTOHF(tmp);
679
+ UNPACK_PUSH(DBL2NUM(tmp.f));
680
+ }
681
+ PACK_ITEM_ADJUST();
682
+ break;
683
+
684
+ case 'G':
685
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
686
+ while (len-- > 0) {
687
+ DOUBLE_CONVWITH(tmp);
688
+ memcpy(tmp.buf, s, sizeof(double));
689
+ s += sizeof(double);
690
+ NTOHD(tmp);
691
+ UNPACK_PUSH(DBL2NUM(tmp.d));
692
+ }
693
+ PACK_ITEM_ADJUST();
694
+ break;
695
+
696
+ case 'U':
697
+ if (len > send - s) len = send - s;
698
+ while (len > 0 && s < send) {
699
+ long alen = send - s;
700
+ unsigned long l;
701
+
702
+ l = utf8_to_uv(s, &alen);
703
+ s += alen; len--;
704
+ UNPACK_PUSH(ULONG2NUM(l));
705
+ }
706
+ break;
707
+
708
+ case 'u':
709
+ {
710
+ VALUE buf = infected_str_new(0, (send - s)*3/4, str);
711
+ char *ptr = RSTRING_PTR(buf);
712
+ long total = 0;
713
+
714
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
715
+ long a,b,c,d;
716
+ char hunk[3];
717
+
718
+ len = ((unsigned char)*s++ - ' ') & 077;
719
+
720
+ total += len;
721
+ if (total > RSTRING_LEN(buf)) {
722
+ len -= total - RSTRING_LEN(buf);
723
+ total = RSTRING_LEN(buf);
724
+ }
725
+
726
+ while (len > 0) {
727
+ long mlen = len > 3 ? 3 : len;
728
+
729
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
730
+ a = ((unsigned char)*s++ - ' ') & 077;
731
+ else
732
+ a = 0;
733
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
734
+ b = ((unsigned char)*s++ - ' ') & 077;
735
+ else
736
+ b = 0;
737
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
738
+ c = ((unsigned char)*s++ - ' ') & 077;
739
+ else
740
+ c = 0;
741
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
742
+ d = ((unsigned char)*s++ - ' ') & 077;
743
+ else
744
+ d = 0;
745
+ hunk[0] = (char)(a << 2 | b >> 4);
746
+ hunk[1] = (char)(b << 4 | c >> 2);
747
+ hunk[2] = (char)(c << 6 | d);
748
+ memcpy(ptr, hunk, mlen);
749
+ ptr += mlen;
750
+ len -= mlen;
751
+ }
752
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
753
+ s++; /* possible checksum byte */
754
+ if (s < send && *s == '\r') s++;
755
+ if (s < send && *s == '\n') s++;
756
+ }
757
+
758
+ rb_str_set_len(buf, total);
759
+ UNPACK_PUSH(buf);
760
+ }
761
+ break;
762
+
763
+ case 'm':
764
+ {
765
+ VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
766
+ char *ptr = RSTRING_PTR(buf);
767
+ int a = -1,b = -1,c = 0,d = 0;
768
+ static signed char b64_xtable[256];
769
+
770
+ if (b64_xtable['/'] <= 0) {
771
+ int i;
772
+
773
+ for (i = 0; i < 256; i++) {
774
+ b64_xtable[i] = -1;
775
+ }
776
+ for (i = 0; i < 64; i++) {
777
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
778
+ }
779
+ }
780
+ if (len == 0) {
781
+ while (s < send) {
782
+ a = b = c = d = -1;
783
+ a = b64_xtable[(unsigned char)*s++];
784
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
785
+ b = b64_xtable[(unsigned char)*s++];
786
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
787
+ if (*s == '=') {
788
+ if (s + 2 == send && *(s + 1) == '=') break;
789
+ rb_raise(rb_eArgError, "invalid base64");
790
+ }
791
+ c = b64_xtable[(unsigned char)*s++];
792
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
793
+ if (s + 1 == send && *s == '=') break;
794
+ d = b64_xtable[(unsigned char)*s++];
795
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
796
+ *ptr++ = castchar(a << 2 | b >> 4);
797
+ *ptr++ = castchar(b << 4 | c >> 2);
798
+ *ptr++ = castchar(c << 6 | d);
799
+ }
800
+ if (c == -1) {
801
+ *ptr++ = castchar(a << 2 | b >> 4);
802
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
803
+ }
804
+ else if (d == -1) {
805
+ *ptr++ = castchar(a << 2 | b >> 4);
806
+ *ptr++ = castchar(b << 4 | c >> 2);
807
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
808
+ }
809
+ }
810
+ else {
811
+ while (s < send) {
812
+ a = b = c = d = -1;
813
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
814
+ if (s >= send) break;
815
+ s++;
816
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
817
+ if (s >= send) break;
818
+ s++;
819
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
820
+ if (*s == '=' || s >= send) break;
821
+ s++;
822
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
823
+ if (*s == '=' || s >= send) break;
824
+ s++;
825
+ *ptr++ = castchar(a << 2 | b >> 4);
826
+ *ptr++ = castchar(b << 4 | c >> 2);
827
+ *ptr++ = castchar(c << 6 | d);
828
+ a = -1;
829
+ }
830
+ if (a != -1 && b != -1) {
831
+ if (c == -1)
832
+ *ptr++ = castchar(a << 2 | b >> 4);
833
+ else {
834
+ *ptr++ = castchar(a << 2 | b >> 4);
835
+ *ptr++ = castchar(b << 4 | c >> 2);
836
+ }
837
+ }
838
+ }
839
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
840
+ UNPACK_PUSH(buf);
841
+ }
842
+ break;
843
+
844
+ case 'M':
845
+ {
846
+ VALUE buf = infected_str_new(0, send - s, str);
847
+ char *ptr = RSTRING_PTR(buf), *ss = s;
848
+ int c1, c2;
849
+
850
+ while (s < send) {
851
+ if (*s == '=') {
852
+ if (++s == send) break;
853
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
854
+ s++;
855
+ if (*s != '\n') {
856
+ if ((c1 = hex2num(*s)) == -1) break;
857
+ if (++s == send) break;
858
+ if ((c2 = hex2num(*s)) == -1) break;
859
+ *ptr++ = castchar(c1 << 4 | c2);
860
+ }
861
+ }
862
+ else {
863
+ *ptr++ = *s;
864
+ }
865
+ s++;
866
+ ss = s;
867
+ }
868
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
869
+ rb_str_buf_cat(buf, ss, send-ss);
870
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
871
+ UNPACK_PUSH(buf);
872
+ }
873
+ break;
874
+
875
+ case '@':
876
+ if (len > RSTRING_LEN(str))
877
+ rb_raise(rb_eArgError, "@ outside of string");
878
+ s = RSTRING_PTR(str) + len;
879
+ break;
880
+
881
+ case 'X':
882
+ if (len > s - RSTRING_PTR(str))
883
+ rb_raise(rb_eArgError, "X outside of string");
884
+ s -= len;
885
+ break;
886
+
887
+ case 'x':
888
+ if (len > send - s)
889
+ rb_raise(rb_eArgError, "x outside of string");
890
+ s += len;
891
+ break;
892
+
893
+ case 'P':
894
+ if (sizeof(char *) <= (size_t)(send - s)) {
895
+ VALUE tmp = Qnil;
896
+ char *t;
897
+
898
+ memcpy(&t, s, sizeof(char *));
899
+ s += sizeof(char *);
900
+
901
+ if (t) {
902
+ VALUE a;
903
+ const VALUE *p, *pend;
904
+
905
+ if (!(a = str_associated(str))) {
906
+ rb_raise(rb_eArgError, "no associated pointer");
907
+ }
908
+ p = RARRAY_CONST_PTR(a);
909
+ pend = p + RARRAY_LEN(a);
910
+ while (p < pend) {
911
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
912
+ if (len < RSTRING_LEN(*p)) {
913
+ tmp = rb_tainted_str_new(t, len);
914
+ str_associate(tmp, a);
915
+ }
916
+ else {
917
+ tmp = *p;
918
+ }
919
+ break;
920
+ }
921
+ p++;
922
+ }
923
+ if (p == pend) {
924
+ rb_raise(rb_eArgError, "non associated pointer");
925
+ }
926
+ }
927
+ UNPACK_PUSH(tmp);
928
+ }
929
+ break;
930
+
931
+ case 'p':
932
+ if (len > (long)((send - s) / sizeof(char *)))
933
+ len = (send - s) / sizeof(char *);
934
+ while (len-- > 0) {
935
+ if ((size_t)(send - s) < sizeof(char *))
936
+ break;
937
+ else {
938
+ VALUE tmp = Qnil;
939
+ char *t;
940
+
941
+ memcpy(&t, s, sizeof(char *));
942
+ s += sizeof(char *);
943
+
944
+ if (t) {
945
+ VALUE a;
946
+ const VALUE *p, *pend;
947
+
948
+ if (!(a = str_associated(str))) {
949
+ rb_raise(rb_eArgError, "no associated pointer");
950
+ }
951
+ p = RARRAY_CONST_PTR(a);
952
+ pend = p + RARRAY_LEN(a);
953
+ while (p < pend) {
954
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
955
+ tmp = *p;
956
+ break;
957
+ }
958
+ p++;
959
+ }
960
+ if (p == pend) {
961
+ rb_raise(rb_eArgError, "non associated pointer");
962
+ }
963
+ }
964
+ UNPACK_PUSH(tmp);
965
+ }
966
+ }
967
+ break;
968
+
969
+ case 'w':
970
+ {
971
+ char *s0 = s;
972
+ while (len > 0 && s < send) {
973
+ if (*s & 0x80) {
974
+ s++;
975
+ }
976
+ else {
977
+ s++;
978
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
979
+ len--;
980
+ s0 = s;
981
+ }
982
+ }
983
+ }
984
+ break;
985
+
986
+ default:
987
+ rb_warning("unknown unpack directive '%c' in '%s'",
988
+ type, RSTRING_PTR(fmt));
989
+ break;
990
+ }
991
+ }
992
+
993
+ *parsed_len = s - init_s;
994
+ return ary;
995
+ }
996
+
997
+ int
998
+ rb_uv_to_utf8(char buf[6], unsigned long uv)
999
+ {
1000
+ if (uv <= 0x7f) {
1001
+ buf[0] = (char)uv;
1002
+ return 1;
1003
+ }
1004
+ if (uv <= 0x7ff) {
1005
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1006
+ buf[1] = castchar((uv&0x3f)|0x80);
1007
+ return 2;
1008
+ }
1009
+ if (uv <= 0xffff) {
1010
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1011
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1012
+ buf[2] = castchar((uv&0x3f)|0x80);
1013
+ return 3;
1014
+ }
1015
+ if (uv <= 0x1fffff) {
1016
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1017
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1018
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1019
+ buf[3] = castchar((uv&0x3f)|0x80);
1020
+ return 4;
1021
+ }
1022
+ if (uv <= 0x3ffffff) {
1023
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1024
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1025
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1026
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1027
+ buf[4] = castchar((uv&0x3f)|0x80);
1028
+ return 5;
1029
+ }
1030
+ if (uv <= 0x7fffffff) {
1031
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1032
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1033
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1034
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1035
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1036
+ buf[5] = castchar((uv&0x3f)|0x80);
1037
+ return 6;
1038
+ }
1039
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
1040
+
1041
+ UNREACHABLE;
1042
+ }
1043
+
1044
+ static const unsigned long utf8_limits[] = {
1045
+ 0x0, /* 1 */
1046
+ 0x80, /* 2 */
1047
+ 0x800, /* 3 */
1048
+ 0x10000, /* 4 */
1049
+ 0x200000, /* 5 */
1050
+ 0x4000000, /* 6 */
1051
+ 0x80000000, /* 7 */
1052
+ };
1053
+
1054
+ static unsigned long
1055
+ utf8_to_uv(const char *p, long *lenp)
1056
+ {
1057
+ int c = *p++ & 0xff;
1058
+ unsigned long uv = c;
1059
+ long n;
1060
+
1061
+ if (!(uv & 0x80)) {
1062
+ *lenp = 1;
1063
+ return uv;
1064
+ }
1065
+ if (!(uv & 0x40)) {
1066
+ *lenp = 1;
1067
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1068
+ }
1069
+
1070
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1071
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1072
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1073
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1074
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1075
+ else {
1076
+ *lenp = 1;
1077
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1078
+ }
1079
+ if (n > *lenp) {
1080
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1081
+ n, *lenp);
1082
+ }
1083
+ *lenp = n--;
1084
+ if (n != 0) {
1085
+ while (n--) {
1086
+ c = *p++ & 0xff;
1087
+ if ((c & 0xc0) != 0x80) {
1088
+ *lenp -= n + 1;
1089
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1090
+ }
1091
+ else {
1092
+ c &= 0x3f;
1093
+ uv = uv << 6 | c;
1094
+ }
1095
+ }
1096
+ }
1097
+ n = *lenp - 1;
1098
+ if (uv < utf8_limits[n]) {
1099
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1100
+ }
1101
+ return uv;
1102
+ }