zscan 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1102 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author$
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "internal.h"
13
+ #include <sys/types.h>
14
+ #include <ctype.h>
15
+ #include <errno.h>
16
+
17
+ /*
18
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
19
+ * instead of HAVE_LONG_LONG or LONG_LONG.
20
+ * This means q! and Q! means always the standard long long type and
21
+ * causes ArgumentError for platforms which has no long long type,
22
+ * even if the platform has an implementation specific 64bit type.
23
+ * This behavior is consistent with the document of pack/unpack.
24
+ */
25
+ #ifdef HAVE_TRUE_LONG_LONG
26
+ static const char natstr[] = "sSiIlLqQjJ";
27
+ #else
28
+ static const char natstr[] = "sSiIlLjJ";
29
+ #endif
30
+ static const char endstr[] = "sSiIlLqQjJ";
31
+
32
+ #ifdef HAVE_TRUE_LONG_LONG
33
+ /* It is intentional to use long long instead of LONG_LONG. */
34
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
35
+ #else
36
+ # define NATINT_LEN_Q 8
37
+ #endif
38
+
39
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
40
+ # define NATINT_PACK
41
+ #endif
42
+
43
+ #ifdef DYNAMIC_ENDIAN
44
+ /* for universal binary of NEXTSTEP and MacOS X */
45
+ /* useless since autoconf 2.63? */
46
+ static int
47
+ is_bigendian(void)
48
+ {
49
+ static int init = 0;
50
+ static int endian_value;
51
+ char *p;
52
+
53
+ if (init) return endian_value;
54
+ init = 1;
55
+ p = (char*)&init;
56
+ return endian_value = p[0]?0:1;
57
+ }
58
+ # define BIGENDIAN_P() (is_bigendian())
59
+ #elif defined(WORDS_BIGENDIAN)
60
+ # define BIGENDIAN_P() 1
61
+ #else
62
+ # define BIGENDIAN_P() 0
63
+ #endif
64
+
65
+ #ifdef NATINT_PACK
66
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
67
+ #else
68
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
69
+ #endif
70
+
71
+ typedef union {
72
+ float f;
73
+ uint32_t u;
74
+ char buf[4];
75
+ } FLOAT_SWAPPER;
76
+ typedef union {
77
+ double d;
78
+ uint64_t u;
79
+ char buf[8];
80
+ } DOUBLE_SWAPPER;
81
+ #define swapf(x) swap32(x)
82
+ #define swapd(x) swap64(x)
83
+
84
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
85
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
86
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
87
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
88
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
89
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
90
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
91
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
92
+
93
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
94
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
95
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
96
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
97
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
98
+
99
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
100
+ #define HTOND(x) ((x).u = rb_htond((x).u))
101
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
102
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
103
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
104
+
105
+ #define MAX_INTEGER_PACK_SIZE 8
106
+
107
+ static const char toofew[] = "too few arguments";
108
+
109
+ static void encodes(VALUE,const char*,long,int,int);
110
+ static void qpencode(VALUE,VALUE,long);
111
+
112
+ static unsigned long utf8_to_uv(const char*,long*);
113
+
114
+ static ID id_associated;
115
+
116
+ static void
117
+ str_associate(VALUE str, VALUE add)
118
+ {
119
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
120
+ rb_ivar_set(str, id_associated, add);
121
+ }
122
+
123
+ static VALUE
124
+ str_associated(VALUE str)
125
+ {
126
+ return rb_ivar_lookup(str, id_associated, Qfalse);
127
+ }
128
+
129
+ static const char uu_table[] =
130
+ "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
131
+ static const char b64_table[] =
132
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
133
+
134
+ static void
135
+ encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
136
+ {
137
+ enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
138
+ char buff[buff_size + 1]; /* +1 for tail_lf */
139
+ long i = 0;
140
+ const char *const trans = type == 'u' ? uu_table : b64_table;
141
+ char padding;
142
+ const unsigned char *s = (const unsigned char *)s0;
143
+
144
+ if (type == 'u') {
145
+ buff[i++] = (char)len + ' ';
146
+ padding = '`';
147
+ }
148
+ else {
149
+ padding = '=';
150
+ }
151
+ while (len >= input_unit) {
152
+ while (len >= input_unit && buff_size-i >= encoded_unit) {
153
+ buff[i++] = trans[077 & (*s >> 2)];
154
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
155
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
156
+ buff[i++] = trans[077 & s[2]];
157
+ s += input_unit;
158
+ len -= input_unit;
159
+ }
160
+ if (buff_size-i < encoded_unit) {
161
+ rb_str_buf_cat(str, buff, i);
162
+ i = 0;
163
+ }
164
+ }
165
+
166
+ if (len == 2) {
167
+ buff[i++] = trans[077 & (*s >> 2)];
168
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
169
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
170
+ buff[i++] = padding;
171
+ }
172
+ else if (len == 1) {
173
+ buff[i++] = trans[077 & (*s >> 2)];
174
+ buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
175
+ buff[i++] = padding;
176
+ buff[i++] = padding;
177
+ }
178
+ if (tail_lf) buff[i++] = '\n';
179
+ rb_str_buf_cat(str, buff, i);
180
+ if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
181
+ }
182
+
183
+ static const char hex_table[] = "0123456789ABCDEF";
184
+
185
+ static void
186
+ qpencode(VALUE str, VALUE from, long len)
187
+ {
188
+ char buff[1024];
189
+ long i = 0, n = 0, prev = EOF;
190
+ unsigned char *s = (unsigned char*)RSTRING_PTR(from);
191
+ unsigned char *send = s + RSTRING_LEN(from);
192
+
193
+ while (s < send) {
194
+ if ((*s > 126) ||
195
+ (*s < 32 && *s != '\n' && *s != '\t') ||
196
+ (*s == '=')) {
197
+ buff[i++] = '=';
198
+ buff[i++] = hex_table[*s >> 4];
199
+ buff[i++] = hex_table[*s & 0x0f];
200
+ n += 3;
201
+ prev = EOF;
202
+ }
203
+ else if (*s == '\n') {
204
+ if (prev == ' ' || prev == '\t') {
205
+ buff[i++] = '=';
206
+ buff[i++] = *s;
207
+ }
208
+ buff[i++] = *s;
209
+ n = 0;
210
+ prev = *s;
211
+ }
212
+ else {
213
+ buff[i++] = *s;
214
+ n++;
215
+ prev = *s;
216
+ }
217
+ if (n > len) {
218
+ buff[i++] = '=';
219
+ buff[i++] = '\n';
220
+ n = 0;
221
+ prev = '\n';
222
+ }
223
+ if (i > 1024 - 5) {
224
+ rb_str_buf_cat(str, buff, i);
225
+ i = 0;
226
+ }
227
+ s++;
228
+ }
229
+ if (n > 0) {
230
+ buff[i++] = '=';
231
+ buff[i++] = '\n';
232
+ }
233
+ if (i > 0) {
234
+ rb_str_buf_cat(str, buff, i);
235
+ }
236
+ }
237
+
238
+ static inline int
239
+ hex2num(char c)
240
+ {
241
+ int n;
242
+ n = ruby_digit36_to_number_table[(unsigned char)c];
243
+ if (16 <= n)
244
+ n = -1;
245
+ return n;
246
+ }
247
+
248
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
249
+ tmp_len = 0; \
250
+ if (len > (long)((send-s)/(sz))) { \
251
+ if (!star) { \
252
+ tmp_len = len-(send-s)/(sz); \
253
+ } \
254
+ len = (send-s)/(sz); \
255
+ } \
256
+ } while (0)
257
+
258
+ #define PACK_ITEM_ADJUST() do { \
259
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
260
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
261
+ } while (0)
262
+
263
+ /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
264
+ * with "-xO4" optimization option.
265
+ */
266
+ #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
267
+ # define AVOID_CC_BUG volatile
268
+ #else
269
+ # define AVOID_CC_BUG
270
+ #endif
271
+
272
+ static VALUE
273
+ infected_str_new(const char *ptr, long len, VALUE str)
274
+ {
275
+ VALUE s = rb_str_new(ptr, len);
276
+
277
+ OBJ_INFECT(s, str);
278
+ return s;
279
+ }
280
+
281
+ /* unpack mode */
282
+ #define UNPACK_ARRAY 0
283
+ #define UNPACK_BLOCK 1
284
+ #define UNPACK_1 2
285
+
286
+ #define castchar(from) (char)((from) & 0xff)
287
+
288
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
289
+ {
290
+ #define hexdigits ruby_hexdigits
291
+ char *init_s, *s, *send;
292
+ char *p, *pend;
293
+ VALUE ary;
294
+ char type;
295
+ long len;
296
+ AVOID_CC_BUG long tmp_len;
297
+ int star;
298
+ #ifdef NATINT_PACK
299
+ int natint; /* native integer */
300
+ #endif
301
+ int signed_p, integer_size, bigendian_p;
302
+ int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
303
+ #define UNPACK_PUSH(item) do {\
304
+ VALUE item_val = (item);\
305
+ if ((mode) == UNPACK_BLOCK) {\
306
+ rb_yield(item_val);\
307
+ }\
308
+ else {\
309
+ rb_ary_push(ary, item_val);\
310
+ }\
311
+ } while (0)
312
+
313
+ StringValue(str);
314
+ StringValue(fmt);
315
+ init_s = s = RSTRING_PTR(str);
316
+ send = s + RSTRING_LEN(str);
317
+ p = RSTRING_PTR(fmt);
318
+ pend = p + RSTRING_LEN(fmt);
319
+
320
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
321
+ while (p < pend) {
322
+ int explicit_endian = 0;
323
+ type = *p++;
324
+ #ifdef NATINT_PACK
325
+ natint = 0;
326
+ #endif
327
+
328
+ if (ISSPACE(type)) continue;
329
+ if (type == '#') {
330
+ while ((p < pend) && (*p != '\n')) {
331
+ p++;
332
+ }
333
+ continue;
334
+ }
335
+
336
+ star = 0;
337
+ {
338
+ modifiers:
339
+ switch (*p) {
340
+ case '_':
341
+ case '!':
342
+
343
+ if (strchr(natstr, type)) {
344
+ #ifdef NATINT_PACK
345
+ natint = 1;
346
+ #endif
347
+ p++;
348
+ }
349
+ else {
350
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
351
+ }
352
+ goto modifiers;
353
+
354
+ case '<':
355
+ case '>':
356
+ if (!strchr(endstr, type)) {
357
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
358
+ }
359
+ if (explicit_endian) {
360
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
361
+ }
362
+ explicit_endian = *p++;
363
+ goto modifiers;
364
+ }
365
+ }
366
+
367
+ if (p >= pend)
368
+ len = 1;
369
+ else if (*p == '*') {
370
+ star = 1;
371
+ len = send - s;
372
+ p++;
373
+ }
374
+ else if (ISDIGIT(*p)) {
375
+ errno = 0;
376
+ len = STRTOUL(p, (char**)&p, 10);
377
+ if (errno) {
378
+ rb_raise(rb_eRangeError, "pack length too big");
379
+ }
380
+ }
381
+ else {
382
+ len = (type != '@');
383
+ }
384
+
385
+ switch (type) {
386
+ case '%':
387
+ rb_raise(rb_eArgError, "%% is not supported");
388
+ break;
389
+
390
+ case 'A':
391
+ if (len > send - s) len = send - s;
392
+ {
393
+ long end = len;
394
+ char *t = s + len - 1;
395
+
396
+ while (t >= s) {
397
+ if (*t != ' ' && *t != '\0') break;
398
+ t--; len--;
399
+ }
400
+ UNPACK_PUSH(infected_str_new(s, len, str));
401
+ s += end;
402
+ }
403
+ break;
404
+
405
+ case 'Z':
406
+ {
407
+ char *t = s;
408
+
409
+ if (len > send-s) len = send-s;
410
+ while (t < s+len && *t) t++;
411
+ UNPACK_PUSH(infected_str_new(s, t-s, str));
412
+ if (t < send) t++;
413
+ s = star ? t : s+len;
414
+ }
415
+ break;
416
+
417
+ case 'a':
418
+ if (len > send - s) len = send - s;
419
+ UNPACK_PUSH(infected_str_new(s, len, str));
420
+ s += len;
421
+ break;
422
+
423
+ case 'b':
424
+ {
425
+ VALUE bitstr;
426
+ char *t;
427
+ int bits;
428
+ long i;
429
+
430
+ if (p[-1] == '*' || len > (send - s) * 8)
431
+ len = (send - s) * 8;
432
+ bits = 0;
433
+ bitstr = rb_usascii_str_new(0, len);
434
+ t = RSTRING_PTR(bitstr);
435
+ for (i=0; i<len; i++) {
436
+ if (i & 7) bits >>= 1;
437
+ else bits = (unsigned char)*s++;
438
+ *t++ = (bits & 1) ? '1' : '0';
439
+ }
440
+ UNPACK_PUSH(bitstr);
441
+ }
442
+ break;
443
+
444
+ case 'B':
445
+ {
446
+ VALUE bitstr;
447
+ char *t;
448
+ int bits;
449
+ long i;
450
+
451
+ if (p[-1] == '*' || len > (send - s) * 8)
452
+ len = (send - s) * 8;
453
+ bits = 0;
454
+ bitstr = rb_usascii_str_new(0, len);
455
+ t = RSTRING_PTR(bitstr);
456
+ for (i=0; i<len; i++) {
457
+ if (i & 7) bits <<= 1;
458
+ else bits = (unsigned char)*s++;
459
+ *t++ = (bits & 128) ? '1' : '0';
460
+ }
461
+ UNPACK_PUSH(bitstr);
462
+ }
463
+ break;
464
+
465
+ case 'h':
466
+ {
467
+ VALUE bitstr;
468
+ char *t;
469
+ int bits;
470
+ long i;
471
+
472
+ if (p[-1] == '*' || len > (send - s) * 2)
473
+ len = (send - s) * 2;
474
+ bits = 0;
475
+ bitstr = rb_usascii_str_new(0, len);
476
+ t = RSTRING_PTR(bitstr);
477
+ for (i=0; i<len; i++) {
478
+ if (i & 1)
479
+ bits >>= 4;
480
+ else
481
+ bits = (unsigned char)*s++;
482
+ *t++ = hexdigits[bits & 15];
483
+ }
484
+ UNPACK_PUSH(bitstr);
485
+ }
486
+ break;
487
+
488
+ case 'H':
489
+ {
490
+ VALUE bitstr;
491
+ char *t;
492
+ int bits;
493
+ long i;
494
+
495
+ if (p[-1] == '*' || len > (send - s) * 2)
496
+ len = (send - s) * 2;
497
+ bits = 0;
498
+ bitstr = rb_usascii_str_new(0, len);
499
+ t = RSTRING_PTR(bitstr);
500
+ for (i=0; i<len; i++) {
501
+ if (i & 1)
502
+ bits <<= 4;
503
+ else
504
+ bits = (unsigned char)*s++;
505
+ *t++ = hexdigits[(bits >> 4) & 15];
506
+ }
507
+ UNPACK_PUSH(bitstr);
508
+ }
509
+ break;
510
+
511
+ case 'c':
512
+ signed_p = 1;
513
+ integer_size = 1;
514
+ bigendian_p = BIGENDIAN_P(); /* not effective */
515
+ goto unpack_integer;
516
+
517
+ case 'C':
518
+ signed_p = 0;
519
+ integer_size = 1;
520
+ bigendian_p = BIGENDIAN_P(); /* not effective */
521
+ goto unpack_integer;
522
+
523
+ case 's':
524
+ signed_p = 1;
525
+ integer_size = NATINT_LEN(short, 2);
526
+ bigendian_p = BIGENDIAN_P();
527
+ goto unpack_integer;
528
+
529
+ case 'S':
530
+ signed_p = 0;
531
+ integer_size = NATINT_LEN(short, 2);
532
+ bigendian_p = BIGENDIAN_P();
533
+ goto unpack_integer;
534
+
535
+ case 'i':
536
+ signed_p = 1;
537
+ integer_size = (int)sizeof(int);
538
+ bigendian_p = BIGENDIAN_P();
539
+ goto unpack_integer;
540
+
541
+ case 'I':
542
+ signed_p = 0;
543
+ integer_size = (int)sizeof(int);
544
+ bigendian_p = BIGENDIAN_P();
545
+ goto unpack_integer;
546
+
547
+ case 'l':
548
+ signed_p = 1;
549
+ integer_size = NATINT_LEN(long, 4);
550
+ bigendian_p = BIGENDIAN_P();
551
+ goto unpack_integer;
552
+
553
+ case 'L':
554
+ signed_p = 0;
555
+ integer_size = NATINT_LEN(long, 4);
556
+ bigendian_p = BIGENDIAN_P();
557
+ goto unpack_integer;
558
+
559
+ case 'q':
560
+ signed_p = 1;
561
+ integer_size = NATINT_LEN_Q;
562
+ bigendian_p = BIGENDIAN_P();
563
+ goto unpack_integer;
564
+
565
+ case 'Q':
566
+ signed_p = 0;
567
+ integer_size = NATINT_LEN_Q;
568
+ bigendian_p = BIGENDIAN_P();
569
+ goto unpack_integer;
570
+
571
+ case 'j':
572
+ signed_p = 1;
573
+ integer_size = sizeof(intptr_t);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto unpack_integer;
576
+
577
+ case 'J':
578
+ signed_p = 0;
579
+ integer_size = sizeof(uintptr_t);
580
+ bigendian_p = BIGENDIAN_P();
581
+ goto unpack_integer;
582
+
583
+ case 'n':
584
+ signed_p = 0;
585
+ integer_size = 2;
586
+ bigendian_p = 1;
587
+ goto unpack_integer;
588
+
589
+ case 'N':
590
+ signed_p = 0;
591
+ integer_size = 4;
592
+ bigendian_p = 1;
593
+ goto unpack_integer;
594
+
595
+ case 'v':
596
+ signed_p = 0;
597
+ integer_size = 2;
598
+ bigendian_p = 0;
599
+ goto unpack_integer;
600
+
601
+ case 'V':
602
+ signed_p = 0;
603
+ integer_size = 4;
604
+ bigendian_p = 0;
605
+ goto unpack_integer;
606
+
607
+ unpack_integer:
608
+ if (explicit_endian) {
609
+ bigendian_p = explicit_endian == '>';
610
+ }
611
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
612
+ while (len-- > 0) {
613
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
614
+ VALUE val;
615
+ if (signed_p)
616
+ flags |= INTEGER_PACK_2COMP;
617
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
618
+ UNPACK_PUSH(val);
619
+ s += integer_size;
620
+ }
621
+ PACK_ITEM_ADJUST();
622
+ break;
623
+
624
+ case 'f':
625
+ case 'F':
626
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
627
+ while (len-- > 0) {
628
+ float tmp;
629
+ memcpy(&tmp, s, sizeof(float));
630
+ s += sizeof(float);
631
+ UNPACK_PUSH(DBL2NUM((double)tmp));
632
+ }
633
+ PACK_ITEM_ADJUST();
634
+ break;
635
+
636
+ case 'e':
637
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
638
+ while (len-- > 0) {
639
+ FLOAT_CONVWITH(tmp);
640
+ memcpy(tmp.buf, s, sizeof(float));
641
+ s += sizeof(float);
642
+ VTOHF(tmp);
643
+ UNPACK_PUSH(DBL2NUM(tmp.f));
644
+ }
645
+ PACK_ITEM_ADJUST();
646
+ break;
647
+
648
+ case 'E':
649
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
650
+ while (len-- > 0) {
651
+ DOUBLE_CONVWITH(tmp);
652
+ memcpy(tmp.buf, s, sizeof(double));
653
+ s += sizeof(double);
654
+ VTOHD(tmp);
655
+ UNPACK_PUSH(DBL2NUM(tmp.d));
656
+ }
657
+ PACK_ITEM_ADJUST();
658
+ break;
659
+
660
+ case 'D':
661
+ case 'd':
662
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
663
+ while (len-- > 0) {
664
+ double tmp;
665
+ memcpy(&tmp, s, sizeof(double));
666
+ s += sizeof(double);
667
+ UNPACK_PUSH(DBL2NUM(tmp));
668
+ }
669
+ PACK_ITEM_ADJUST();
670
+ break;
671
+
672
+ case 'g':
673
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
674
+ while (len-- > 0) {
675
+ FLOAT_CONVWITH(tmp);
676
+ memcpy(tmp.buf, s, sizeof(float));
677
+ s += sizeof(float);
678
+ NTOHF(tmp);
679
+ UNPACK_PUSH(DBL2NUM(tmp.f));
680
+ }
681
+ PACK_ITEM_ADJUST();
682
+ break;
683
+
684
+ case 'G':
685
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
686
+ while (len-- > 0) {
687
+ DOUBLE_CONVWITH(tmp);
688
+ memcpy(tmp.buf, s, sizeof(double));
689
+ s += sizeof(double);
690
+ NTOHD(tmp);
691
+ UNPACK_PUSH(DBL2NUM(tmp.d));
692
+ }
693
+ PACK_ITEM_ADJUST();
694
+ break;
695
+
696
+ case 'U':
697
+ if (len > send - s) len = send - s;
698
+ while (len > 0 && s < send) {
699
+ long alen = send - s;
700
+ unsigned long l;
701
+
702
+ l = utf8_to_uv(s, &alen);
703
+ s += alen; len--;
704
+ UNPACK_PUSH(ULONG2NUM(l));
705
+ }
706
+ break;
707
+
708
+ case 'u':
709
+ {
710
+ VALUE buf = infected_str_new(0, (send - s)*3/4, str);
711
+ char *ptr = RSTRING_PTR(buf);
712
+ long total = 0;
713
+
714
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
715
+ long a,b,c,d;
716
+ char hunk[3];
717
+
718
+ len = ((unsigned char)*s++ - ' ') & 077;
719
+
720
+ total += len;
721
+ if (total > RSTRING_LEN(buf)) {
722
+ len -= total - RSTRING_LEN(buf);
723
+ total = RSTRING_LEN(buf);
724
+ }
725
+
726
+ while (len > 0) {
727
+ long mlen = len > 3 ? 3 : len;
728
+
729
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
730
+ a = ((unsigned char)*s++ - ' ') & 077;
731
+ else
732
+ a = 0;
733
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
734
+ b = ((unsigned char)*s++ - ' ') & 077;
735
+ else
736
+ b = 0;
737
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
738
+ c = ((unsigned char)*s++ - ' ') & 077;
739
+ else
740
+ c = 0;
741
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
742
+ d = ((unsigned char)*s++ - ' ') & 077;
743
+ else
744
+ d = 0;
745
+ hunk[0] = (char)(a << 2 | b >> 4);
746
+ hunk[1] = (char)(b << 4 | c >> 2);
747
+ hunk[2] = (char)(c << 6 | d);
748
+ memcpy(ptr, hunk, mlen);
749
+ ptr += mlen;
750
+ len -= mlen;
751
+ }
752
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
753
+ s++; /* possible checksum byte */
754
+ if (s < send && *s == '\r') s++;
755
+ if (s < send && *s == '\n') s++;
756
+ }
757
+
758
+ rb_str_set_len(buf, total);
759
+ UNPACK_PUSH(buf);
760
+ }
761
+ break;
762
+
763
+ case 'm':
764
+ {
765
+ VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
766
+ char *ptr = RSTRING_PTR(buf);
767
+ int a = -1,b = -1,c = 0,d = 0;
768
+ static signed char b64_xtable[256];
769
+
770
+ if (b64_xtable['/'] <= 0) {
771
+ int i;
772
+
773
+ for (i = 0; i < 256; i++) {
774
+ b64_xtable[i] = -1;
775
+ }
776
+ for (i = 0; i < 64; i++) {
777
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
778
+ }
779
+ }
780
+ if (len == 0) {
781
+ while (s < send) {
782
+ a = b = c = d = -1;
783
+ a = b64_xtable[(unsigned char)*s++];
784
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
785
+ b = b64_xtable[(unsigned char)*s++];
786
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
787
+ if (*s == '=') {
788
+ if (s + 2 == send && *(s + 1) == '=') break;
789
+ rb_raise(rb_eArgError, "invalid base64");
790
+ }
791
+ c = b64_xtable[(unsigned char)*s++];
792
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
793
+ if (s + 1 == send && *s == '=') break;
794
+ d = b64_xtable[(unsigned char)*s++];
795
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
796
+ *ptr++ = castchar(a << 2 | b >> 4);
797
+ *ptr++ = castchar(b << 4 | c >> 2);
798
+ *ptr++ = castchar(c << 6 | d);
799
+ }
800
+ if (c == -1) {
801
+ *ptr++ = castchar(a << 2 | b >> 4);
802
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
803
+ }
804
+ else if (d == -1) {
805
+ *ptr++ = castchar(a << 2 | b >> 4);
806
+ *ptr++ = castchar(b << 4 | c >> 2);
807
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
808
+ }
809
+ }
810
+ else {
811
+ while (s < send) {
812
+ a = b = c = d = -1;
813
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
814
+ if (s >= send) break;
815
+ s++;
816
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
817
+ if (s >= send) break;
818
+ s++;
819
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
820
+ if (*s == '=' || s >= send) break;
821
+ s++;
822
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
823
+ if (*s == '=' || s >= send) break;
824
+ s++;
825
+ *ptr++ = castchar(a << 2 | b >> 4);
826
+ *ptr++ = castchar(b << 4 | c >> 2);
827
+ *ptr++ = castchar(c << 6 | d);
828
+ a = -1;
829
+ }
830
+ if (a != -1 && b != -1) {
831
+ if (c == -1)
832
+ *ptr++ = castchar(a << 2 | b >> 4);
833
+ else {
834
+ *ptr++ = castchar(a << 2 | b >> 4);
835
+ *ptr++ = castchar(b << 4 | c >> 2);
836
+ }
837
+ }
838
+ }
839
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
840
+ UNPACK_PUSH(buf);
841
+ }
842
+ break;
843
+
844
+ case 'M':
845
+ {
846
+ VALUE buf = infected_str_new(0, send - s, str);
847
+ char *ptr = RSTRING_PTR(buf), *ss = s;
848
+ int c1, c2;
849
+
850
+ while (s < send) {
851
+ if (*s == '=') {
852
+ if (++s == send) break;
853
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
854
+ s++;
855
+ if (*s != '\n') {
856
+ if ((c1 = hex2num(*s)) == -1) break;
857
+ if (++s == send) break;
858
+ if ((c2 = hex2num(*s)) == -1) break;
859
+ *ptr++ = castchar(c1 << 4 | c2);
860
+ }
861
+ }
862
+ else {
863
+ *ptr++ = *s;
864
+ }
865
+ s++;
866
+ ss = s;
867
+ }
868
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
869
+ rb_str_buf_cat(buf, ss, send-ss);
870
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
871
+ UNPACK_PUSH(buf);
872
+ }
873
+ break;
874
+
875
+ case '@':
876
+ if (len > RSTRING_LEN(str))
877
+ rb_raise(rb_eArgError, "@ outside of string");
878
+ s = RSTRING_PTR(str) + len;
879
+ break;
880
+
881
+ case 'X':
882
+ if (len > s - RSTRING_PTR(str))
883
+ rb_raise(rb_eArgError, "X outside of string");
884
+ s -= len;
885
+ break;
886
+
887
+ case 'x':
888
+ if (len > send - s)
889
+ rb_raise(rb_eArgError, "x outside of string");
890
+ s += len;
891
+ break;
892
+
893
+ case 'P':
894
+ if (sizeof(char *) <= (size_t)(send - s)) {
895
+ VALUE tmp = Qnil;
896
+ char *t;
897
+
898
+ memcpy(&t, s, sizeof(char *));
899
+ s += sizeof(char *);
900
+
901
+ if (t) {
902
+ VALUE a;
903
+ const VALUE *p, *pend;
904
+
905
+ if (!(a = str_associated(str))) {
906
+ rb_raise(rb_eArgError, "no associated pointer");
907
+ }
908
+ p = RARRAY_CONST_PTR(a);
909
+ pend = p + RARRAY_LEN(a);
910
+ while (p < pend) {
911
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
912
+ if (len < RSTRING_LEN(*p)) {
913
+ tmp = rb_tainted_str_new(t, len);
914
+ str_associate(tmp, a);
915
+ }
916
+ else {
917
+ tmp = *p;
918
+ }
919
+ break;
920
+ }
921
+ p++;
922
+ }
923
+ if (p == pend) {
924
+ rb_raise(rb_eArgError, "non associated pointer");
925
+ }
926
+ }
927
+ UNPACK_PUSH(tmp);
928
+ }
929
+ break;
930
+
931
+ case 'p':
932
+ if (len > (long)((send - s) / sizeof(char *)))
933
+ len = (send - s) / sizeof(char *);
934
+ while (len-- > 0) {
935
+ if ((size_t)(send - s) < sizeof(char *))
936
+ break;
937
+ else {
938
+ VALUE tmp = Qnil;
939
+ char *t;
940
+
941
+ memcpy(&t, s, sizeof(char *));
942
+ s += sizeof(char *);
943
+
944
+ if (t) {
945
+ VALUE a;
946
+ const VALUE *p, *pend;
947
+
948
+ if (!(a = str_associated(str))) {
949
+ rb_raise(rb_eArgError, "no associated pointer");
950
+ }
951
+ p = RARRAY_CONST_PTR(a);
952
+ pend = p + RARRAY_LEN(a);
953
+ while (p < pend) {
954
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
955
+ tmp = *p;
956
+ break;
957
+ }
958
+ p++;
959
+ }
960
+ if (p == pend) {
961
+ rb_raise(rb_eArgError, "non associated pointer");
962
+ }
963
+ }
964
+ UNPACK_PUSH(tmp);
965
+ }
966
+ }
967
+ break;
968
+
969
+ case 'w':
970
+ {
971
+ char *s0 = s;
972
+ while (len > 0 && s < send) {
973
+ if (*s & 0x80) {
974
+ s++;
975
+ }
976
+ else {
977
+ s++;
978
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
979
+ len--;
980
+ s0 = s;
981
+ }
982
+ }
983
+ }
984
+ break;
985
+
986
+ default:
987
+ rb_warning("unknown unpack directive '%c' in '%s'",
988
+ type, RSTRING_PTR(fmt));
989
+ break;
990
+ }
991
+ }
992
+
993
+ *parsed_len = s - init_s;
994
+ return ary;
995
+ }
996
+
997
+ int
998
+ rb_uv_to_utf8(char buf[6], unsigned long uv)
999
+ {
1000
+ if (uv <= 0x7f) {
1001
+ buf[0] = (char)uv;
1002
+ return 1;
1003
+ }
1004
+ if (uv <= 0x7ff) {
1005
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1006
+ buf[1] = castchar((uv&0x3f)|0x80);
1007
+ return 2;
1008
+ }
1009
+ if (uv <= 0xffff) {
1010
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1011
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1012
+ buf[2] = castchar((uv&0x3f)|0x80);
1013
+ return 3;
1014
+ }
1015
+ if (uv <= 0x1fffff) {
1016
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1017
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1018
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1019
+ buf[3] = castchar((uv&0x3f)|0x80);
1020
+ return 4;
1021
+ }
1022
+ if (uv <= 0x3ffffff) {
1023
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1024
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1025
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1026
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1027
+ buf[4] = castchar((uv&0x3f)|0x80);
1028
+ return 5;
1029
+ }
1030
+ if (uv <= 0x7fffffff) {
1031
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1032
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1033
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1034
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1035
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1036
+ buf[5] = castchar((uv&0x3f)|0x80);
1037
+ return 6;
1038
+ }
1039
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
1040
+
1041
+ UNREACHABLE;
1042
+ }
1043
+
1044
+ static const unsigned long utf8_limits[] = {
1045
+ 0x0, /* 1 */
1046
+ 0x80, /* 2 */
1047
+ 0x800, /* 3 */
1048
+ 0x10000, /* 4 */
1049
+ 0x200000, /* 5 */
1050
+ 0x4000000, /* 6 */
1051
+ 0x80000000, /* 7 */
1052
+ };
1053
+
1054
+ static unsigned long
1055
+ utf8_to_uv(const char *p, long *lenp)
1056
+ {
1057
+ int c = *p++ & 0xff;
1058
+ unsigned long uv = c;
1059
+ long n;
1060
+
1061
+ if (!(uv & 0x80)) {
1062
+ *lenp = 1;
1063
+ return uv;
1064
+ }
1065
+ if (!(uv & 0x40)) {
1066
+ *lenp = 1;
1067
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1068
+ }
1069
+
1070
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1071
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1072
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1073
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1074
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1075
+ else {
1076
+ *lenp = 1;
1077
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1078
+ }
1079
+ if (n > *lenp) {
1080
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1081
+ n, *lenp);
1082
+ }
1083
+ *lenp = n--;
1084
+ if (n != 0) {
1085
+ while (n--) {
1086
+ c = *p++ & 0xff;
1087
+ if ((c & 0xc0) != 0x80) {
1088
+ *lenp -= n + 1;
1089
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1090
+ }
1091
+ else {
1092
+ c &= 0x3f;
1093
+ uv = uv << 6 | c;
1094
+ }
1095
+ }
1096
+ }
1097
+ n = *lenp - 1;
1098
+ if (uv < utf8_limits[n]) {
1099
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1100
+ }
1101
+ return uv;
1102
+ }