zscan 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2295 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author$
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "ruby/ruby.h"
13
+ #include "ruby/encoding.h"
14
+ #include "internal.h"
15
+ #include <sys/types.h>
16
+ #include <ctype.h>
17
+ #include <errno.h>
18
+
19
+ /*
20
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
21
+ * instead of HAVE_LONG_LONG or LONG_LONG.
22
+ * This means q! and Q! means always the standard long long type and
23
+ * causes ArgumentError for platforms which has no long long type,
24
+ * even if the platform has an implementation specific 64bit type.
25
+ * This behavior is consistent with the document of pack/unpack.
26
+ */
27
+ #ifdef HAVE_TRUE_LONG_LONG
28
+ static const char natstr[] = "sSiIlLqQ";
29
+ #else
30
+ static const char natstr[] = "sSiIlL";
31
+ #endif
32
+ static const char endstr[] = "sSiIlLqQ";
33
+
34
+ #ifdef HAVE_TRUE_LONG_LONG
35
+ /* It is intentional to use long long instead of LONG_LONG. */
36
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
37
+ #else
38
+ # define NATINT_LEN_Q 8
39
+ #endif
40
+
41
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
42
+ # define NATINT_PACK
43
+ #endif
44
+
45
+ #ifdef DYNAMIC_ENDIAN
46
+ /* for universal binary of NEXTSTEP and MacOS X */
47
+ /* useless since autoconf 2.63? */
48
+ static int
49
+ is_bigendian(void)
50
+ {
51
+ static int init = 0;
52
+ static int endian_value;
53
+ char *p;
54
+
55
+ if (init) return endian_value;
56
+ init = 1;
57
+ p = (char*)&init;
58
+ return endian_value = p[0]?0:1;
59
+ }
60
+ # define BIGENDIAN_P() (is_bigendian())
61
+ #elif defined(WORDS_BIGENDIAN)
62
+ # define BIGENDIAN_P() 1
63
+ #else
64
+ # define BIGENDIAN_P() 0
65
+ #endif
66
+
67
+ #ifdef NATINT_PACK
68
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
69
+ #else
70
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
71
+ #endif
72
+
73
+ #if SIZEOF_LONG == 8
74
+ # define INT64toNUM(x) LONG2NUM(x)
75
+ # define UINT64toNUM(x) ULONG2NUM(x)
76
+ #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
77
+ # define INT64toNUM(x) LL2NUM(x)
78
+ # define UINT64toNUM(x) ULL2NUM(x)
79
+ #endif
80
+
81
+ #define define_swapx(x, xtype) \
82
+ static xtype \
83
+ TOKEN_PASTE(swap,x)(xtype z) \
84
+ { \
85
+ xtype r; \
86
+ xtype *zp; \
87
+ unsigned char *s, *t; \
88
+ int i; \
89
+ \
90
+ zp = xmalloc(sizeof(xtype)); \
91
+ *zp = z; \
92
+ s = (unsigned char*)zp; \
93
+ t = xmalloc(sizeof(xtype)); \
94
+ for (i=0; i<sizeof(xtype); i++) { \
95
+ t[sizeof(xtype)-i-1] = s[i]; \
96
+ } \
97
+ r = *(xtype *)t; \
98
+ xfree(t); \
99
+ xfree(zp); \
100
+ return r; \
101
+ }
102
+
103
+ #if GCC_VERSION_SINCE(4,3,0)
104
+ # define swap32(x) __builtin_bswap32(x)
105
+ # define swap64(x) __builtin_bswap64(x)
106
+ #endif
107
+
108
+ #ifndef swap16
109
+ # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
110
+ #endif
111
+
112
+ #ifndef swap32
113
+ # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
114
+ |(((x)>>24)&0xFF) \
115
+ |(((x)&0x0000FF00)<<8) \
116
+ |(((x)&0x00FF0000)>>8) ))
117
+ #endif
118
+
119
+ #ifndef swap64
120
+ # ifdef HAVE_INT64_T
121
+ # define byte_in_64bit(n) ((uint64_t)0xff << (n))
122
+ # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
123
+ |(((x)>>56)&0xFF) \
124
+ |(((x)&byte_in_64bit(8))<<40) \
125
+ |(((x)&byte_in_64bit(48))>>40) \
126
+ |(((x)&byte_in_64bit(16))<<24) \
127
+ |(((x)&byte_in_64bit(40))>>24) \
128
+ |(((x)&byte_in_64bit(24))<<8) \
129
+ |(((x)&byte_in_64bit(32))>>8)))
130
+ # endif
131
+ #endif
132
+
133
+ #if SIZEOF_SHORT == 2
134
+ # define swaps(x) swap16(x)
135
+ #elif SIZEOF_SHORT == 4
136
+ # define swaps(x) swap32(x)
137
+ #else
138
+ define_swapx(s,short)
139
+ #endif
140
+
141
+ #if SIZEOF_INT == 2
142
+ # define swapi(x) swap16(x)
143
+ #elif SIZEOF_INT == 4
144
+ # define swapi(x) swap32(x)
145
+ #else
146
+ define_swapx(i,int)
147
+ #endif
148
+
149
+ #if SIZEOF_LONG == 4
150
+ # define swapl(x) swap32(x)
151
+ #elif SIZEOF_LONG == 8
152
+ # define swapl(x) swap64(x)
153
+ #else
154
+ define_swapx(l,long)
155
+ #endif
156
+
157
+ #ifdef HAVE_LONG_LONG
158
+ # if SIZEOF_LONG_LONG == 8
159
+ # define swapll(x) swap64(x)
160
+ # else
161
+ define_swapx(ll,LONG_LONG)
162
+ # endif
163
+ #endif
164
+
165
+ #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
166
+ # define swapf(x) swap32(x)
167
+ # define FLOAT_SWAPPER uint32_t
168
+ #else
169
+ define_swapx(f,float)
170
+ #endif
171
+
172
+ #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
173
+ # define swapd(x) swap64(x)
174
+ # define DOUBLE_SWAPPER uint64_t
175
+ #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
176
+ static double
177
+ swapd(const double d)
178
+ {
179
+ double dtmp = d;
180
+ uint32_t utmp[2];
181
+ uint32_t utmp0;
182
+
183
+ utmp[0] = 0; utmp[1] = 0;
184
+ memcpy(utmp,&dtmp,sizeof(double));
185
+ utmp0 = utmp[0];
186
+ utmp[0] = swap32(utmp[1]);
187
+ utmp[1] = swap32(utmp0);
188
+ memcpy(&dtmp,utmp,sizeof(double));
189
+ return dtmp;
190
+ }
191
+ #else
192
+ define_swapx(d, double)
193
+ #endif
194
+
195
+ #undef define_swapx
196
+
197
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
198
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
199
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
200
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
201
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
202
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
203
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
204
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
205
+
206
+ #ifdef FLOAT_SWAPPER
207
+ # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
208
+ # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
209
+ (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
210
+ memcpy(&(x),&(y),sizeof(float)), \
211
+ (x))
212
+ # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
213
+ (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
214
+ memcpy(&(x),&(y),sizeof(float)), \
215
+ (x))
216
+ # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
217
+ (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
218
+ memcpy(&(x),&(y),sizeof(float)), \
219
+ (x))
220
+ # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
221
+ (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
222
+ memcpy(&(x),&(y),sizeof(float)), \
223
+ (x))
224
+ #else
225
+ # define FLOAT_CONVWITH(y)
226
+ # define HTONF(x,y) rb_htonf(x)
227
+ # define HTOVF(x,y) rb_htovf(x)
228
+ # define NTOHF(x,y) rb_ntohf(x)
229
+ # define VTOHF(x,y) rb_vtohf(x)
230
+ #endif
231
+
232
+ #ifdef DOUBLE_SWAPPER
233
+ # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
234
+ # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
235
+ (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
236
+ memcpy(&(x),&(y),sizeof(double)), \
237
+ (x))
238
+ # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
239
+ (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
240
+ memcpy(&(x),&(y),sizeof(double)), \
241
+ (x))
242
+ # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
243
+ (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
244
+ memcpy(&(x),&(y),sizeof(double)), \
245
+ (x))
246
+ # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
247
+ (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
248
+ memcpy(&(x),&(y),sizeof(double)), \
249
+ (x))
250
+ #else
251
+ # define DOUBLE_CONVWITH(y)
252
+ # define HTOND(x,y) rb_htond(x)
253
+ # define HTOVD(x,y) rb_htovd(x)
254
+ # define NTOHD(x,y) rb_ntohd(x)
255
+ # define VTOHD(x,y) rb_vtohd(x)
256
+ #endif
257
+
258
+ static unsigned long
259
+ num2i32(VALUE x)
260
+ {
261
+ x = rb_to_int(x); /* is nil OK? (should not) */
262
+
263
+ if (FIXNUM_P(x)) return FIX2LONG(x);
264
+ if (RB_TYPE_P(x, T_BIGNUM)) {
265
+ return rb_big2ulong_pack(x);
266
+ }
267
+ rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
268
+
269
+ UNREACHABLE;
270
+ }
271
+
272
+ #define MAX_INTEGER_PACK_SIZE 8
273
+ /* #define FORCE_BIG_PACK */
274
+
275
+ static const char toofew[] = "too few arguments";
276
+
277
+ static void encodes(VALUE,const char*,long,int,int);
278
+ static void qpencode(VALUE,VALUE,long);
279
+
280
+ static unsigned long utf8_to_uv(const char*,long*);
281
+
282
+ /*
283
+ * call-seq:
284
+ * arr.pack ( aTemplateString ) -> aBinaryString
285
+ *
286
+ * Packs the contents of <i>arr</i> into a binary sequence according to
287
+ * the directives in <i>aTemplateString</i> (see the table below)
288
+ * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
289
+ * which gives the width of the resulting field. The remaining
290
+ * directives also may take a count, indicating the number of array
291
+ * elements to convert. If the count is an asterisk
292
+ * (``<code>*</code>''), all remaining array elements will be
293
+ * converted. Any of the directives ``<code>sSiIlL</code>'' may be
294
+ * followed by an underscore (``<code>_</code>'') or
295
+ * exclamation mark (``<code>!</code>'') to use the underlying
296
+ * platform's native size for the specified type; otherwise, they use a
297
+ * platform-independent size. Spaces are ignored in the template
298
+ * string. See also <code>String#unpack</code>.
299
+ *
300
+ * a = [ "a", "b", "c" ]
301
+ * n = [ 65, 66, 67 ]
302
+ * a.pack("A3A3A3") #=> "a b c "
303
+ * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
304
+ * n.pack("ccc") #=> "ABC"
305
+ *
306
+ * Directives for +pack+.
307
+ *
308
+ * Integer | Array |
309
+ * Directive | Element | Meaning
310
+ * ---------------------------------------------------------------------------
311
+ * C | Integer | 8-bit unsigned (unsigned char)
312
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
313
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
314
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
315
+ * | |
316
+ * c | Integer | 8-bit signed (signed char)
317
+ * s | Integer | 16-bit signed, native endian (int16_t)
318
+ * l | Integer | 32-bit signed, native endian (int32_t)
319
+ * q | Integer | 64-bit signed, native endian (int64_t)
320
+ * | |
321
+ * S_, S! | Integer | unsigned short, native endian
322
+ * I, I_, I! | Integer | unsigned int, native endian
323
+ * L_, L! | Integer | unsigned long, native endian
324
+ * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
325
+ * | | if the platform has no long long type.)
326
+ * | | (Q_ and Q! is available since Ruby 2.1.)
327
+ * | |
328
+ * s_, s! | Integer | signed short, native endian
329
+ * i, i_, i! | Integer | signed int, native endian
330
+ * l_, l! | Integer | signed long, native endian
331
+ * q_, q! | Integer | signed long long, native endian (ArgumentError
332
+ * | | if the platform has no long long type.)
333
+ * | | (q_ and q! is available since Ruby 2.1.)
334
+ * | |
335
+ * S> L> Q> | Integer | same as the directives without ">" except
336
+ * s> l> q> | | big endian
337
+ * S!> I!> | | (available since Ruby 1.9.3)
338
+ * L!> Q!> | | "S>" is same as "n"
339
+ * s!> i!> | | "L>" is same as "N"
340
+ * l!> q!> | |
341
+ * | |
342
+ * S< L< Q< | Integer | same as the directives without "<" except
343
+ * s< l< q< | | little endian
344
+ * S!< I!< | | (available since Ruby 1.9.3)
345
+ * L!< Q!< | | "S<" is same as "v"
346
+ * s!< i!< | | "L<" is same as "V"
347
+ * l!< q!< | |
348
+ * | |
349
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
350
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
351
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
352
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
353
+ * | |
354
+ * U | Integer | UTF-8 character
355
+ * w | Integer | BER-compressed integer
356
+ *
357
+ * Float | |
358
+ * Directive | | Meaning
359
+ * ---------------------------------------------------------------------------
360
+ * D, d | Float | double-precision, native format
361
+ * F, f | Float | single-precision, native format
362
+ * E | Float | double-precision, little-endian byte order
363
+ * e | Float | single-precision, little-endian byte order
364
+ * G | Float | double-precision, network (big-endian) byte order
365
+ * g | Float | single-precision, network (big-endian) byte order
366
+ *
367
+ * String | |
368
+ * Directive | | Meaning
369
+ * ---------------------------------------------------------------------------
370
+ * A | String | arbitrary binary string (space padded, count is width)
371
+ * a | String | arbitrary binary string (null padded, count is width)
372
+ * Z | String | same as ``a'', except that null is added with *
373
+ * B | String | bit string (MSB first)
374
+ * b | String | bit string (LSB first)
375
+ * H | String | hex string (high nibble first)
376
+ * h | String | hex string (low nibble first)
377
+ * u | String | UU-encoded string
378
+ * M | String | quoted printable, MIME encoding (see RFC2045)
379
+ * m | String | base64 encoded string (see RFC 2045, count is width)
380
+ * | | (if count is 0, no line feed are added, see RFC 4648)
381
+ * P | String | pointer to a structure (fixed-length string)
382
+ * p | String | pointer to a null-terminated string
383
+ *
384
+ * Misc. | |
385
+ * Directive | | Meaning
386
+ * ---------------------------------------------------------------------------
387
+ * @ | --- | moves to absolute position
388
+ * X | --- | back up a byte
389
+ * x | --- | null byte
390
+ */
391
+
392
+ __attribute__ ((unused))
393
+ static VALUE
394
+ pack_pack(VALUE ary, VALUE fmt)
395
+ {
396
+ static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
397
+ static const char spc10[] = " ";
398
+ const char *p, *pend;
399
+ VALUE res, from, associates = 0;
400
+ char type;
401
+ long items, len, idx, plen;
402
+ const char *ptr;
403
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
404
+ #ifdef NATINT_PACK
405
+ int natint; /* native integer */
406
+ #endif
407
+ int integer_size, bigendian_p;
408
+
409
+ StringValue(fmt);
410
+ p = RSTRING_PTR(fmt);
411
+ pend = p + RSTRING_LEN(fmt);
412
+ res = rb_str_buf_new(0);
413
+
414
+ items = RARRAY_LEN(ary);
415
+ idx = 0;
416
+
417
+ #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
418
+ #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
419
+ #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
420
+
421
+ while (p < pend) {
422
+ int explicit_endian = 0;
423
+ if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
424
+ rb_raise(rb_eRuntimeError, "format string modified");
425
+ }
426
+ type = *p++; /* get data type */
427
+ #ifdef NATINT_PACK
428
+ natint = 0;
429
+ #endif
430
+
431
+ if (ISSPACE(type)) continue;
432
+ if (type == '#') {
433
+ while ((p < pend) && (*p != '\n')) {
434
+ p++;
435
+ }
436
+ continue;
437
+ }
438
+
439
+ {
440
+ modifiers:
441
+ switch (*p) {
442
+ case '_':
443
+ case '!':
444
+ if (strchr(natstr, type)) {
445
+ #ifdef NATINT_PACK
446
+ natint = 1;
447
+ #endif
448
+ p++;
449
+ }
450
+ else {
451
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
452
+ }
453
+ goto modifiers;
454
+
455
+ case '<':
456
+ case '>':
457
+ if (!strchr(endstr, type)) {
458
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
459
+ }
460
+ if (explicit_endian) {
461
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
462
+ }
463
+ explicit_endian = *p++;
464
+ goto modifiers;
465
+ }
466
+ }
467
+
468
+ if (*p == '*') { /* set data length */
469
+ len = strchr("@Xxu", type) ? 0
470
+ : strchr("PMm", type) ? 1
471
+ : items;
472
+ p++;
473
+ }
474
+ else if (ISDIGIT(*p)) {
475
+ errno = 0;
476
+ len = STRTOUL(p, (char**)&p, 10);
477
+ if (errno) {
478
+ rb_raise(rb_eRangeError, "pack length too big");
479
+ }
480
+ }
481
+ else {
482
+ len = 1;
483
+ }
484
+
485
+ switch (type) {
486
+ case 'U':
487
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
488
+ if (enc_info == 1) enc_info = 2;
489
+ break;
490
+ case 'm': case 'M': case 'u':
491
+ /* keep US-ASCII (do nothing) */
492
+ break;
493
+ default:
494
+ /* fall back to BINARY */
495
+ enc_info = 0;
496
+ break;
497
+ }
498
+ switch (type) {
499
+ case 'A': case 'a': case 'Z':
500
+ case 'B': case 'b':
501
+ case 'H': case 'h':
502
+ from = NEXTFROM;
503
+ if (NIL_P(from)) {
504
+ ptr = "";
505
+ plen = 0;
506
+ }
507
+ else {
508
+ StringValue(from);
509
+ ptr = RSTRING_PTR(from);
510
+ plen = RSTRING_LEN(from);
511
+ OBJ_INFECT(res, from);
512
+ }
513
+
514
+ if (p[-1] == '*')
515
+ len = plen;
516
+
517
+ switch (type) {
518
+ case 'a': /* arbitrary binary string (null padded) */
519
+ case 'A': /* arbitrary binary string (ASCII space padded) */
520
+ case 'Z': /* null terminated string */
521
+ if (plen >= len) {
522
+ rb_str_buf_cat(res, ptr, len);
523
+ if (p[-1] == '*' && type == 'Z')
524
+ rb_str_buf_cat(res, nul10, 1);
525
+ }
526
+ else {
527
+ rb_str_buf_cat(res, ptr, plen);
528
+ len -= plen;
529
+ while (len >= 10) {
530
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
531
+ len -= 10;
532
+ }
533
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
534
+ }
535
+ break;
536
+
537
+ #define castchar(from) (char)((from) & 0xff)
538
+
539
+ case 'b': /* bit string (ascending) */
540
+ {
541
+ int byte = 0;
542
+ long i, j = 0;
543
+
544
+ if (len > plen) {
545
+ j = (len - plen + 1)/2;
546
+ len = plen;
547
+ }
548
+ for (i=0; i++ < len; ptr++) {
549
+ if (*ptr & 1)
550
+ byte |= 128;
551
+ if (i & 7)
552
+ byte >>= 1;
553
+ else {
554
+ char c = castchar(byte);
555
+ rb_str_buf_cat(res, &c, 1);
556
+ byte = 0;
557
+ }
558
+ }
559
+ if (len & 7) {
560
+ char c;
561
+ byte >>= 7 - (len & 7);
562
+ c = castchar(byte);
563
+ rb_str_buf_cat(res, &c, 1);
564
+ }
565
+ len = j;
566
+ goto grow;
567
+ }
568
+ break;
569
+
570
+ case 'B': /* bit string (descending) */
571
+ {
572
+ int byte = 0;
573
+ long i, j = 0;
574
+
575
+ if (len > plen) {
576
+ j = (len - plen + 1)/2;
577
+ len = plen;
578
+ }
579
+ for (i=0; i++ < len; ptr++) {
580
+ byte |= *ptr & 1;
581
+ if (i & 7)
582
+ byte <<= 1;
583
+ else {
584
+ char c = castchar(byte);
585
+ rb_str_buf_cat(res, &c, 1);
586
+ byte = 0;
587
+ }
588
+ }
589
+ if (len & 7) {
590
+ char c;
591
+ byte <<= 7 - (len & 7);
592
+ c = castchar(byte);
593
+ rb_str_buf_cat(res, &c, 1);
594
+ }
595
+ len = j;
596
+ goto grow;
597
+ }
598
+ break;
599
+
600
+ case 'h': /* hex string (low nibble first) */
601
+ {
602
+ int byte = 0;
603
+ long i, j = 0;
604
+
605
+ if (len > plen) {
606
+ j = (len + 1) / 2 - (plen + 1) / 2;
607
+ len = plen;
608
+ }
609
+ for (i=0; i++ < len; ptr++) {
610
+ if (ISALPHA(*ptr))
611
+ byte |= (((*ptr & 15) + 9) & 15) << 4;
612
+ else
613
+ byte |= (*ptr & 15) << 4;
614
+ if (i & 1)
615
+ byte >>= 4;
616
+ else {
617
+ char c = castchar(byte);
618
+ rb_str_buf_cat(res, &c, 1);
619
+ byte = 0;
620
+ }
621
+ }
622
+ if (len & 1) {
623
+ char c = castchar(byte);
624
+ rb_str_buf_cat(res, &c, 1);
625
+ }
626
+ len = j;
627
+ goto grow;
628
+ }
629
+ break;
630
+
631
+ case 'H': /* hex string (high nibble first) */
632
+ {
633
+ int byte = 0;
634
+ long i, j = 0;
635
+
636
+ if (len > plen) {
637
+ j = (len + 1) / 2 - (plen + 1) / 2;
638
+ len = plen;
639
+ }
640
+ for (i=0; i++ < len; ptr++) {
641
+ if (ISALPHA(*ptr))
642
+ byte |= ((*ptr & 15) + 9) & 15;
643
+ else
644
+ byte |= *ptr & 15;
645
+ if (i & 1)
646
+ byte <<= 4;
647
+ else {
648
+ char c = castchar(byte);
649
+ rb_str_buf_cat(res, &c, 1);
650
+ byte = 0;
651
+ }
652
+ }
653
+ if (len & 1) {
654
+ char c = castchar(byte);
655
+ rb_str_buf_cat(res, &c, 1);
656
+ }
657
+ len = j;
658
+ goto grow;
659
+ }
660
+ break;
661
+ }
662
+ break;
663
+
664
+ case 'c': /* signed char */
665
+ case 'C': /* unsigned char */
666
+ while (len-- > 0) {
667
+ char c;
668
+
669
+ from = NEXTFROM;
670
+ c = (char)num2i32(from);
671
+ rb_str_buf_cat(res, &c, sizeof(char));
672
+ }
673
+ break;
674
+
675
+ case 's': /* s for int16_t, s! for signed short */
676
+ integer_size = NATINT_LEN(short, 2);
677
+ bigendian_p = BIGENDIAN_P();
678
+ goto pack_integer;
679
+
680
+ case 'S': /* S for uint16_t, S! for unsigned short */
681
+ integer_size = NATINT_LEN(short, 2);
682
+ bigendian_p = BIGENDIAN_P();
683
+ goto pack_integer;
684
+
685
+ case 'i': /* i and i! for signed int */
686
+ integer_size = (int)sizeof(int);
687
+ bigendian_p = BIGENDIAN_P();
688
+ goto pack_integer;
689
+
690
+ case 'I': /* I and I! for unsigned int */
691
+ integer_size = (int)sizeof(int);
692
+ bigendian_p = BIGENDIAN_P();
693
+ goto pack_integer;
694
+
695
+ case 'l': /* l for int32_t, l! for signed long */
696
+ integer_size = NATINT_LEN(long, 4);
697
+ bigendian_p = BIGENDIAN_P();
698
+ goto pack_integer;
699
+
700
+ case 'L': /* L for uint32_t, L! for unsigned long */
701
+ integer_size = NATINT_LEN(long, 4);
702
+ bigendian_p = BIGENDIAN_P();
703
+ goto pack_integer;
704
+
705
+ case 'q': /* q for int64_t, q! for signed long long */
706
+ integer_size = NATINT_LEN_Q;
707
+ bigendian_p = BIGENDIAN_P();
708
+ goto pack_integer;
709
+
710
+ case 'Q': /* Q for uint64_t, Q! for unsigned long long */
711
+ integer_size = NATINT_LEN_Q;
712
+ bigendian_p = BIGENDIAN_P();
713
+ goto pack_integer;
714
+
715
+ case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
716
+ integer_size = 2;
717
+ bigendian_p = 1;
718
+ goto pack_integer;
719
+
720
+ case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
721
+ integer_size = 4;
722
+ bigendian_p = 1;
723
+ goto pack_integer;
724
+
725
+ case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
726
+ integer_size = 2;
727
+ bigendian_p = 0;
728
+ goto pack_integer;
729
+
730
+ case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
731
+ integer_size = 4;
732
+ bigendian_p = 0;
733
+ goto pack_integer;
734
+
735
+ pack_integer:
736
+ if (explicit_endian) {
737
+ bigendian_p = explicit_endian == '>';
738
+ }
739
+
740
+ switch (integer_size) {
741
+ #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
742
+ case SIZEOF_INT16_T:
743
+ while (len-- > 0) {
744
+ union {
745
+ int16_t i;
746
+ char a[sizeof(int16_t)];
747
+ } v;
748
+
749
+ from = NEXTFROM;
750
+ v.i = (int16_t)num2i32(from);
751
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
752
+ rb_str_buf_cat(res, v.a, sizeof(int16_t));
753
+ }
754
+ break;
755
+ #endif
756
+
757
+ #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
758
+ case SIZEOF_INT32_T:
759
+ while (len-- > 0) {
760
+ union {
761
+ int32_t i;
762
+ char a[sizeof(int32_t)];
763
+ } v;
764
+
765
+ from = NEXTFROM;
766
+ v.i = (int32_t)num2i32(from);
767
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
768
+ rb_str_buf_cat(res, v.a, sizeof(int32_t));
769
+ }
770
+ break;
771
+ #endif
772
+
773
+ #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
774
+ case SIZEOF_INT64_T:
775
+ while (len-- > 0) {
776
+ union {
777
+ int64_t i;
778
+ char a[sizeof(int64_t)];
779
+ } v;
780
+
781
+ from = NEXTFROM;
782
+ v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
783
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
784
+ rb_str_buf_cat(res, v.a, sizeof(int64_t));
785
+ }
786
+ break;
787
+ #endif
788
+
789
+ default:
790
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
791
+ rb_bug("unexpected intger size for pack: %d", integer_size);
792
+ while (len-- > 0) {
793
+ union {
794
+ unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
795
+ char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
796
+ } v;
797
+ int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
798
+ int i;
799
+
800
+ from = NEXTFROM;
801
+ rb_big_pack(from, v.i, num_longs);
802
+ if (bigendian_p) {
803
+ for (i = 0; i < num_longs/2; i++) {
804
+ unsigned long t = v.i[i];
805
+ v.i[i] = v.i[num_longs-1-i];
806
+ v.i[num_longs-1-i] = t;
807
+ }
808
+ }
809
+ if (bigendian_p != BIGENDIAN_P()) {
810
+ for (i = 0; i < num_longs; i++)
811
+ v.i[i] = swapl(v.i[i]);
812
+ }
813
+ rb_str_buf_cat(res,
814
+ bigendian_p ?
815
+ v.a + sizeof(long)*num_longs - integer_size :
816
+ v.a,
817
+ integer_size);
818
+ }
819
+ break;
820
+ }
821
+ break;
822
+
823
+ case 'f': /* single precision float in native format */
824
+ case 'F': /* ditto */
825
+ while (len-- > 0) {
826
+ float f;
827
+
828
+ from = NEXTFROM;
829
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
830
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
831
+ }
832
+ break;
833
+
834
+ case 'e': /* single precision float in VAX byte-order */
835
+ while (len-- > 0) {
836
+ float f;
837
+ FLOAT_CONVWITH(ftmp);
838
+
839
+ from = NEXTFROM;
840
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
841
+ f = HTOVF(f,ftmp);
842
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
843
+ }
844
+ break;
845
+
846
+ case 'E': /* double precision float in VAX byte-order */
847
+ while (len-- > 0) {
848
+ double d;
849
+ DOUBLE_CONVWITH(dtmp);
850
+
851
+ from = NEXTFROM;
852
+ d = RFLOAT_VALUE(rb_to_float(from));
853
+ d = HTOVD(d,dtmp);
854
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
855
+ }
856
+ break;
857
+
858
+ case 'd': /* double precision float in native format */
859
+ case 'D': /* ditto */
860
+ while (len-- > 0) {
861
+ double d;
862
+
863
+ from = NEXTFROM;
864
+ d = RFLOAT_VALUE(rb_to_float(from));
865
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
866
+ }
867
+ break;
868
+
869
+ case 'g': /* single precision float in network byte-order */
870
+ while (len-- > 0) {
871
+ float f;
872
+ FLOAT_CONVWITH(ftmp);
873
+
874
+ from = NEXTFROM;
875
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
876
+ f = HTONF(f,ftmp);
877
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
878
+ }
879
+ break;
880
+
881
+ case 'G': /* double precision float in network byte-order */
882
+ while (len-- > 0) {
883
+ double d;
884
+ DOUBLE_CONVWITH(dtmp);
885
+
886
+ from = NEXTFROM;
887
+ d = RFLOAT_VALUE(rb_to_float(from));
888
+ d = HTOND(d,dtmp);
889
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
890
+ }
891
+ break;
892
+
893
+ case 'x': /* null byte */
894
+ grow:
895
+ while (len >= 10) {
896
+ rb_str_buf_cat(res, nul10, 10);
897
+ len -= 10;
898
+ }
899
+ rb_str_buf_cat(res, nul10, len);
900
+ break;
901
+
902
+ case 'X': /* back up byte */
903
+ shrink:
904
+ plen = RSTRING_LEN(res);
905
+ if (plen < len)
906
+ rb_raise(rb_eArgError, "X outside of string");
907
+ rb_str_set_len(res, plen - len);
908
+ break;
909
+
910
+ case '@': /* null fill to absolute position */
911
+ len -= RSTRING_LEN(res);
912
+ if (len > 0) goto grow;
913
+ len = -len;
914
+ if (len > 0) goto shrink;
915
+ break;
916
+
917
+ case '%':
918
+ rb_raise(rb_eArgError, "%% is not supported");
919
+ break;
920
+
921
+ case 'U': /* Unicode character */
922
+ while (len-- > 0) {
923
+ SIGNED_VALUE l;
924
+ char buf[8];
925
+ int le;
926
+
927
+ from = NEXTFROM;
928
+ from = rb_to_int(from);
929
+ l = NUM2LONG(from);
930
+ if (l < 0) {
931
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
932
+ }
933
+ le = rb_uv_to_utf8(buf, l);
934
+ rb_str_buf_cat(res, (char*)buf, le);
935
+ }
936
+ break;
937
+
938
+ case 'u': /* uuencoded string */
939
+ case 'm': /* base64 encoded string */
940
+ from = NEXTFROM;
941
+ StringValue(from);
942
+ ptr = RSTRING_PTR(from);
943
+ plen = RSTRING_LEN(from);
944
+
945
+ if (len == 0 && type == 'm') {
946
+ encodes(res, ptr, plen, type, 0);
947
+ ptr += plen;
948
+ break;
949
+ }
950
+ if (len <= 2)
951
+ len = 45;
952
+ else if (len > 63 && type == 'u')
953
+ len = 63;
954
+ else
955
+ len = len / 3 * 3;
956
+ while (plen > 0) {
957
+ long todo;
958
+
959
+ if (plen > len)
960
+ todo = len;
961
+ else
962
+ todo = plen;
963
+ encodes(res, ptr, todo, type, 1);
964
+ plen -= todo;
965
+ ptr += todo;
966
+ }
967
+ break;
968
+
969
+ case 'M': /* quoted-printable encoded string */
970
+ from = rb_obj_as_string(NEXTFROM);
971
+ if (len <= 1)
972
+ len = 72;
973
+ qpencode(res, from, len);
974
+ break;
975
+
976
+ case 'P': /* pointer to packed byte string */
977
+ from = THISFROM;
978
+ if (!NIL_P(from)) {
979
+ StringValue(from);
980
+ if (RSTRING_LEN(from) < len) {
981
+ rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
982
+ RSTRING_LEN(from), len);
983
+ }
984
+ }
985
+ len = 1;
986
+ /* FALL THROUGH */
987
+ case 'p': /* pointer to string */
988
+ while (len-- > 0) {
989
+ char *t;
990
+ from = NEXTFROM;
991
+ if (NIL_P(from)) {
992
+ t = 0;
993
+ }
994
+ else {
995
+ t = StringValuePtr(from);
996
+ }
997
+ if (!associates) {
998
+ associates = rb_ary_new();
999
+ }
1000
+ rb_ary_push(associates, from);
1001
+ rb_obj_taint(from);
1002
+ rb_str_buf_cat(res, (char*)&t, sizeof(char*));
1003
+ }
1004
+ break;
1005
+
1006
+ case 'w': /* BER compressed integer */
1007
+ while (len-- > 0) {
1008
+ unsigned long ul;
1009
+ VALUE buf = rb_str_new(0, 0);
1010
+ char c, *bufs, *bufe;
1011
+
1012
+ from = NEXTFROM;
1013
+ if (RB_TYPE_P(from, T_BIGNUM)) {
1014
+ VALUE big128 = rb_uint2big(128);
1015
+ while (RB_TYPE_P(from, T_BIGNUM)) {
1016
+ from = rb_big_divmod(from, big128);
1017
+ c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
1018
+ rb_str_buf_cat(buf, &c, sizeof(char));
1019
+ from = RARRAY_PTR(from)[0]; /* div */
1020
+ }
1021
+ }
1022
+
1023
+ {
1024
+ long l = NUM2LONG(from);
1025
+ if (l < 0) {
1026
+ rb_raise(rb_eArgError, "can't compress negative numbers");
1027
+ }
1028
+ ul = l;
1029
+ }
1030
+
1031
+ while (ul) {
1032
+ c = castchar((ul & 0x7f) | 0x80);
1033
+ rb_str_buf_cat(buf, &c, sizeof(char));
1034
+ ul >>= 7;
1035
+ }
1036
+
1037
+ if (RSTRING_LEN(buf)) {
1038
+ bufs = RSTRING_PTR(buf);
1039
+ bufe = bufs + RSTRING_LEN(buf) - 1;
1040
+ *bufs &= 0x7f; /* clear continue bit */
1041
+ while (bufs < bufe) { /* reverse */
1042
+ c = *bufs;
1043
+ *bufs++ = *bufe;
1044
+ *bufe-- = c;
1045
+ }
1046
+ rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1047
+ }
1048
+ else {
1049
+ c = 0;
1050
+ rb_str_buf_cat(res, &c, sizeof(char));
1051
+ }
1052
+ }
1053
+ break;
1054
+
1055
+ default:
1056
+ rb_warning("unknown pack directive '%c' in '%s'",
1057
+ type, RSTRING_PTR(fmt));
1058
+ break;
1059
+ }
1060
+ }
1061
+
1062
+ if (associates) {
1063
+ rb_str_associate(res, associates);
1064
+ }
1065
+ OBJ_INFECT(res, fmt);
1066
+ switch (enc_info) {
1067
+ case 1:
1068
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
1069
+ break;
1070
+ case 2:
1071
+ rb_enc_set_index(res, rb_utf8_encindex());
1072
+ break;
1073
+ default:
1074
+ /* do nothing, keep ASCII-8BIT */
1075
+ break;
1076
+ }
1077
+ return res;
1078
+ }
1079
+
1080
+ static const char uu_table[] =
1081
+ "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1082
+ static const char b64_table[] =
1083
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1084
+
1085
+ static void
1086
+ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1087
+ {
1088
+ char buff[4096];
1089
+ long i = 0;
1090
+ const char *trans = type == 'u' ? uu_table : b64_table;
1091
+ char padding;
1092
+
1093
+ if (type == 'u') {
1094
+ buff[i++] = (char)len + ' ';
1095
+ padding = '`';
1096
+ }
1097
+ else {
1098
+ padding = '=';
1099
+ }
1100
+ while (len >= 3) {
1101
+ while (len >= 3 && sizeof(buff)-i >= 4) {
1102
+ buff[i++] = trans[077 & (*s >> 2)];
1103
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1104
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1105
+ buff[i++] = trans[077 & s[2]];
1106
+ s += 3;
1107
+ len -= 3;
1108
+ }
1109
+ if (sizeof(buff)-i < 4) {
1110
+ rb_str_buf_cat(str, buff, i);
1111
+ i = 0;
1112
+ }
1113
+ }
1114
+
1115
+ if (len == 2) {
1116
+ buff[i++] = trans[077 & (*s >> 2)];
1117
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1118
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
1119
+ buff[i++] = padding;
1120
+ }
1121
+ else if (len == 1) {
1122
+ buff[i++] = trans[077 & (*s >> 2)];
1123
+ buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
1124
+ buff[i++] = padding;
1125
+ buff[i++] = padding;
1126
+ }
1127
+ if (tail_lf) buff[i++] = '\n';
1128
+ rb_str_buf_cat(str, buff, i);
1129
+ }
1130
+
1131
+ static const char hex_table[] = "0123456789ABCDEF";
1132
+
1133
+ static void
1134
+ qpencode(VALUE str, VALUE from, long len)
1135
+ {
1136
+ char buff[1024];
1137
+ long i = 0, n = 0, prev = EOF;
1138
+ unsigned char *s = (unsigned char*)RSTRING_PTR(from);
1139
+ unsigned char *send = s + RSTRING_LEN(from);
1140
+
1141
+ while (s < send) {
1142
+ if ((*s > 126) ||
1143
+ (*s < 32 && *s != '\n' && *s != '\t') ||
1144
+ (*s == '=')) {
1145
+ buff[i++] = '=';
1146
+ buff[i++] = hex_table[*s >> 4];
1147
+ buff[i++] = hex_table[*s & 0x0f];
1148
+ n += 3;
1149
+ prev = EOF;
1150
+ }
1151
+ else if (*s == '\n') {
1152
+ if (prev == ' ' || prev == '\t') {
1153
+ buff[i++] = '=';
1154
+ buff[i++] = *s;
1155
+ }
1156
+ buff[i++] = *s;
1157
+ n = 0;
1158
+ prev = *s;
1159
+ }
1160
+ else {
1161
+ buff[i++] = *s;
1162
+ n++;
1163
+ prev = *s;
1164
+ }
1165
+ if (n > len) {
1166
+ buff[i++] = '=';
1167
+ buff[i++] = '\n';
1168
+ n = 0;
1169
+ prev = '\n';
1170
+ }
1171
+ if (i > 1024 - 5) {
1172
+ rb_str_buf_cat(str, buff, i);
1173
+ i = 0;
1174
+ }
1175
+ s++;
1176
+ }
1177
+ if (n > 0) {
1178
+ buff[i++] = '=';
1179
+ buff[i++] = '\n';
1180
+ }
1181
+ if (i > 0) {
1182
+ rb_str_buf_cat(str, buff, i);
1183
+ }
1184
+ }
1185
+
1186
+ static inline int
1187
+ hex2num(char c)
1188
+ {
1189
+ switch (c) {
1190
+ case '0': case '1': case '2': case '3': case '4':
1191
+ case '5': case '6': case '7': case '8': case '9':
1192
+ return c - '0';
1193
+ case 'a': case 'b': case 'c':
1194
+ case 'd': case 'e': case 'f':
1195
+ return c - 'a' + 10;
1196
+ case 'A': case 'B': case 'C':
1197
+ case 'D': case 'E': case 'F':
1198
+ return c - 'A' + 10;
1199
+ default:
1200
+ return -1;
1201
+ }
1202
+ }
1203
+
1204
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1205
+ tmp_len = 0; \
1206
+ if (len > (long)((send-s)/(sz))) { \
1207
+ if (!star) { \
1208
+ tmp_len = len-(send-s)/(sz); \
1209
+ } \
1210
+ len = (send-s)/(sz); \
1211
+ } \
1212
+ } while (0)
1213
+
1214
+ #define PACK_ITEM_ADJUST() do { \
1215
+ if (tmp_len > 0 && !block_p) \
1216
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1217
+ } while (0)
1218
+
1219
+ static VALUE
1220
+ infected_str_new(const char *ptr, long len, VALUE str)
1221
+ {
1222
+ VALUE s = rb_str_new(ptr, len);
1223
+
1224
+ OBJ_INFECT(s, str);
1225
+ return s;
1226
+ }
1227
+
1228
+ /*
1229
+ * call-seq:
1230
+ * str.unpack(format) -> anArray
1231
+ *
1232
+ * Decodes <i>str</i> (which may contain binary data) according to the
1233
+ * format string, returning an array of each value extracted. The
1234
+ * format string consists of a sequence of single-character directives,
1235
+ * summarized in the table at the end of this entry.
1236
+ * Each directive may be followed
1237
+ * by a number, indicating the number of times to repeat with this
1238
+ * directive. An asterisk (``<code>*</code>'') will use up all
1239
+ * remaining elements. The directives <code>sSiIlL</code> may each be
1240
+ * followed by an underscore (``<code>_</code>'') or
1241
+ * exclamation mark (``<code>!</code>'') to use the underlying
1242
+ * platform's native size for the specified type; otherwise, it uses a
1243
+ * platform-independent consistent size. Spaces are ignored in the
1244
+ * format string. See also <code>Array#pack</code>.
1245
+ *
1246
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1247
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1248
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1249
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1250
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1251
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1252
+ * "now=20is".unpack('M*') #=> ["now is"]
1253
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1254
+ *
1255
+ * This table summarizes the various formats and the Ruby classes
1256
+ * returned by each.
1257
+ *
1258
+ * Integer | |
1259
+ * Directive | Returns | Meaning
1260
+ * -----------------------------------------------------------------
1261
+ * C | Integer | 8-bit unsigned (unsigned char)
1262
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
1263
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
1264
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1265
+ * | |
1266
+ * c | Integer | 8-bit signed (signed char)
1267
+ * s | Integer | 16-bit signed, native endian (int16_t)
1268
+ * l | Integer | 32-bit signed, native endian (int32_t)
1269
+ * q | Integer | 64-bit signed, native endian (int64_t)
1270
+ * | |
1271
+ * S_, S! | Integer | unsigned short, native endian
1272
+ * I, I_, I! | Integer | unsigned int, native endian
1273
+ * L_, L! | Integer | unsigned long, native endian
1274
+ * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
1275
+ * | | if the platform has no long long type.)
1276
+ * | | (Q_ and Q! is available since Ruby 2.1.)
1277
+ * | |
1278
+ * s_, s! | Integer | signed short, native endian
1279
+ * i, i_, i! | Integer | signed int, native endian
1280
+ * l_, l! | Integer | signed long, native endian
1281
+ * q_, q! | Integer | signed long long, native endian (ArgumentError
1282
+ * | | if the platform has no long long type.)
1283
+ * | | (q_ and q! is available since Ruby 2.1.)
1284
+ * | |
1285
+ * S> L> Q> | Integer | same as the directives without ">" except
1286
+ * s> l> q> | | big endian
1287
+ * S!> I!> | | (available since Ruby 1.9.3)
1288
+ * L!> Q!> | | "S>" is same as "n"
1289
+ * s!> i!> | | "L>" is same as "N"
1290
+ * l!> q!> | |
1291
+ * | |
1292
+ * S< L< Q< | Integer | same as the directives without "<" except
1293
+ * s< l< q< | | little endian
1294
+ * S!< I!< | | (available since Ruby 1.9.3)
1295
+ * L!< Q!< | | "S<" is same as "v"
1296
+ * s!< i!< | | "L<" is same as "V"
1297
+ * l!< q!< | |
1298
+ * | |
1299
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
1300
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
1301
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1302
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1303
+ * | |
1304
+ * U | Integer | UTF-8 character
1305
+ * w | Integer | BER-compressed integer (see Array.pack)
1306
+ *
1307
+ * Float | |
1308
+ * Directive | Returns | Meaning
1309
+ * -----------------------------------------------------------------
1310
+ * D, d | Float | double-precision, native format
1311
+ * F, f | Float | single-precision, native format
1312
+ * E | Float | double-precision, little-endian byte order
1313
+ * e | Float | single-precision, little-endian byte order
1314
+ * G | Float | double-precision, network (big-endian) byte order
1315
+ * g | Float | single-precision, network (big-endian) byte order
1316
+ *
1317
+ * String | |
1318
+ * Directive | Returns | Meaning
1319
+ * -----------------------------------------------------------------
1320
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1321
+ * a | String | arbitrary binary string
1322
+ * Z | String | null-terminated string
1323
+ * B | String | bit string (MSB first)
1324
+ * b | String | bit string (LSB first)
1325
+ * H | String | hex string (high nibble first)
1326
+ * h | String | hex string (low nibble first)
1327
+ * u | String | UU-encoded string
1328
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
1329
+ * m | String | base64 encoded string (RFC 2045) (default)
1330
+ * | | base64 encoded string (RFC 4648) if followed by 0
1331
+ * P | String | pointer to a structure (fixed-length string)
1332
+ * p | String | pointer to a null-terminated string
1333
+ *
1334
+ * Misc. | |
1335
+ * Directive | Returns | Meaning
1336
+ * -----------------------------------------------------------------
1337
+ * @ | --- | skip to the offset given by the length argument
1338
+ * X | --- | skip backward one byte
1339
+ * x | --- | skip forward one byte
1340
+ */
1341
+
1342
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1343
+ {
1344
+ static const char hexdigits[] = "0123456789abcdef";
1345
+ char *init_s, *s, *send;
1346
+ char *p, *pend;
1347
+ VALUE ary;
1348
+ char type;
1349
+ long len, tmp_len;
1350
+ int star;
1351
+ #ifdef NATINT_PACK
1352
+ int natint; /* native integer */
1353
+ #endif
1354
+ int block_p = rb_block_given_p();
1355
+ int signed_p, integer_size, bigendian_p;
1356
+ #define UNPACK_PUSH(item) do {\
1357
+ VALUE item_val = (item);\
1358
+ if (block_p) {\
1359
+ rb_yield(item_val);\
1360
+ }\
1361
+ else {\
1362
+ rb_ary_push(ary, item_val);\
1363
+ }\
1364
+ } while (0)
1365
+
1366
+ // StringValue(str);
1367
+ StringValue(fmt);
1368
+ init_s = s = RSTRING_PTR(str);
1369
+ send = s + RSTRING_LEN(str);
1370
+ p = RSTRING_PTR(fmt);
1371
+ pend = p + RSTRING_LEN(fmt);
1372
+
1373
+ ary = block_p ? Qnil : rb_ary_new();
1374
+ while (p < pend) {
1375
+ int explicit_endian = 0;
1376
+ type = *p++;
1377
+ #ifdef NATINT_PACK
1378
+ natint = 0;
1379
+ #endif
1380
+
1381
+ if (ISSPACE(type)) continue;
1382
+ if (type == '#') {
1383
+ while ((p < pend) && (*p != '\n')) {
1384
+ p++;
1385
+ }
1386
+ continue;
1387
+ }
1388
+
1389
+ star = 0;
1390
+ {
1391
+ modifiers:
1392
+ switch (*p) {
1393
+ case '_':
1394
+ case '!':
1395
+
1396
+ if (strchr(natstr, type)) {
1397
+ #ifdef NATINT_PACK
1398
+ natint = 1;
1399
+ #endif
1400
+ p++;
1401
+ }
1402
+ else {
1403
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1404
+ }
1405
+ goto modifiers;
1406
+
1407
+ case '<':
1408
+ case '>':
1409
+ if (!strchr(endstr, type)) {
1410
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1411
+ }
1412
+ if (explicit_endian) {
1413
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1414
+ }
1415
+ explicit_endian = *p++;
1416
+ goto modifiers;
1417
+ }
1418
+ }
1419
+
1420
+ if (p >= pend)
1421
+ len = 1;
1422
+ else if (*p == '*') {
1423
+ star = 1;
1424
+ len = send - s;
1425
+ p++;
1426
+ }
1427
+ else if (ISDIGIT(*p)) {
1428
+ errno = 0;
1429
+ len = STRTOUL(p, (char**)&p, 10);
1430
+ if (errno) {
1431
+ rb_raise(rb_eRangeError, "pack length too big");
1432
+ }
1433
+ }
1434
+ else {
1435
+ len = (type != '@');
1436
+ }
1437
+
1438
+ switch (type) {
1439
+ case '%':
1440
+ rb_raise(rb_eArgError, "%% is not supported");
1441
+ break;
1442
+
1443
+ case 'A':
1444
+ if (len > send - s) len = send - s;
1445
+ {
1446
+ long end = len;
1447
+ char *t = s + len - 1;
1448
+
1449
+ while (t >= s) {
1450
+ if (*t != ' ' && *t != '\0') break;
1451
+ t--; len--;
1452
+ }
1453
+ UNPACK_PUSH(infected_str_new(s, len, str));
1454
+ s += end;
1455
+ }
1456
+ break;
1457
+
1458
+ case 'Z':
1459
+ {
1460
+ char *t = s;
1461
+
1462
+ if (len > send-s) len = send-s;
1463
+ while (t < s+len && *t) t++;
1464
+ UNPACK_PUSH(infected_str_new(s, t-s, str));
1465
+ if (t < send) t++;
1466
+ s = star ? t : s+len;
1467
+ }
1468
+ break;
1469
+
1470
+ case 'a':
1471
+ if (len > send - s) len = send - s;
1472
+ UNPACK_PUSH(infected_str_new(s, len, str));
1473
+ s += len;
1474
+ break;
1475
+
1476
+ case 'b':
1477
+ {
1478
+ VALUE bitstr;
1479
+ char *t;
1480
+ int bits;
1481
+ long i;
1482
+
1483
+ if (p[-1] == '*' || len > (send - s) * 8)
1484
+ len = (send - s) * 8;
1485
+ bits = 0;
1486
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1487
+ t = RSTRING_PTR(bitstr);
1488
+ for (i=0; i<len; i++) {
1489
+ if (i & 7) bits >>= 1;
1490
+ else bits = *s++;
1491
+ *t++ = (bits & 1) ? '1' : '0';
1492
+ }
1493
+ }
1494
+ break;
1495
+
1496
+ case 'B':
1497
+ {
1498
+ VALUE bitstr;
1499
+ char *t;
1500
+ int bits;
1501
+ long i;
1502
+
1503
+ if (p[-1] == '*' || len > (send - s) * 8)
1504
+ len = (send - s) * 8;
1505
+ bits = 0;
1506
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1507
+ t = RSTRING_PTR(bitstr);
1508
+ for (i=0; i<len; i++) {
1509
+ if (i & 7) bits <<= 1;
1510
+ else bits = *s++;
1511
+ *t++ = (bits & 128) ? '1' : '0';
1512
+ }
1513
+ }
1514
+ break;
1515
+
1516
+ case 'h':
1517
+ {
1518
+ VALUE bitstr;
1519
+ char *t;
1520
+ int bits;
1521
+ long i;
1522
+
1523
+ if (p[-1] == '*' || len > (send - s) * 2)
1524
+ len = (send - s) * 2;
1525
+ bits = 0;
1526
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1527
+ t = RSTRING_PTR(bitstr);
1528
+ for (i=0; i<len; i++) {
1529
+ if (i & 1)
1530
+ bits >>= 4;
1531
+ else
1532
+ bits = *s++;
1533
+ *t++ = hexdigits[bits & 15];
1534
+ }
1535
+ }
1536
+ break;
1537
+
1538
+ case 'H':
1539
+ {
1540
+ VALUE bitstr;
1541
+ char *t;
1542
+ int bits;
1543
+ long i;
1544
+
1545
+ if (p[-1] == '*' || len > (send - s) * 2)
1546
+ len = (send - s) * 2;
1547
+ bits = 0;
1548
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1549
+ t = RSTRING_PTR(bitstr);
1550
+ for (i=0; i<len; i++) {
1551
+ if (i & 1)
1552
+ bits <<= 4;
1553
+ else
1554
+ bits = *s++;
1555
+ *t++ = hexdigits[(bits >> 4) & 15];
1556
+ }
1557
+ }
1558
+ break;
1559
+
1560
+ case 'c':
1561
+ PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1562
+ while (len-- > 0) {
1563
+ int c = *s++;
1564
+ if (c > (char)127) c-=256;
1565
+ UNPACK_PUSH(INT2FIX(c));
1566
+ }
1567
+ PACK_ITEM_ADJUST();
1568
+ break;
1569
+
1570
+ case 'C':
1571
+ PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1572
+ while (len-- > 0) {
1573
+ unsigned char c = *s++;
1574
+ UNPACK_PUSH(INT2FIX(c));
1575
+ }
1576
+ PACK_ITEM_ADJUST();
1577
+ break;
1578
+
1579
+ case 's':
1580
+ signed_p = 1;
1581
+ integer_size = NATINT_LEN(short, 2);
1582
+ bigendian_p = BIGENDIAN_P();
1583
+ goto unpack_integer;
1584
+
1585
+ case 'S':
1586
+ signed_p = 0;
1587
+ integer_size = NATINT_LEN(short, 2);
1588
+ bigendian_p = BIGENDIAN_P();
1589
+ goto unpack_integer;
1590
+
1591
+ case 'i':
1592
+ signed_p = 1;
1593
+ integer_size = (int)sizeof(int);
1594
+ bigendian_p = BIGENDIAN_P();
1595
+ goto unpack_integer;
1596
+
1597
+ case 'I':
1598
+ signed_p = 0;
1599
+ integer_size = (int)sizeof(int);
1600
+ bigendian_p = BIGENDIAN_P();
1601
+ goto unpack_integer;
1602
+
1603
+ case 'l':
1604
+ signed_p = 1;
1605
+ integer_size = NATINT_LEN(long, 4);
1606
+ bigendian_p = BIGENDIAN_P();
1607
+ goto unpack_integer;
1608
+
1609
+ case 'L':
1610
+ signed_p = 0;
1611
+ integer_size = NATINT_LEN(long, 4);
1612
+ bigendian_p = BIGENDIAN_P();
1613
+ goto unpack_integer;
1614
+
1615
+ case 'q':
1616
+ signed_p = 1;
1617
+ integer_size = NATINT_LEN_Q;
1618
+ bigendian_p = BIGENDIAN_P();
1619
+ goto unpack_integer;
1620
+
1621
+ case 'Q':
1622
+ signed_p = 0;
1623
+ integer_size = NATINT_LEN_Q;
1624
+ bigendian_p = BIGENDIAN_P();
1625
+ goto unpack_integer;
1626
+
1627
+ case 'n':
1628
+ signed_p = 0;
1629
+ integer_size = 2;
1630
+ bigendian_p = 1;
1631
+ goto unpack_integer;
1632
+
1633
+ case 'N':
1634
+ signed_p = 0;
1635
+ integer_size = 4;
1636
+ bigendian_p = 1;
1637
+ goto unpack_integer;
1638
+
1639
+ case 'v':
1640
+ signed_p = 0;
1641
+ integer_size = 2;
1642
+ bigendian_p = 0;
1643
+ goto unpack_integer;
1644
+
1645
+ case 'V':
1646
+ signed_p = 0;
1647
+ integer_size = 4;
1648
+ bigendian_p = 0;
1649
+ goto unpack_integer;
1650
+
1651
+ unpack_integer:
1652
+ if (explicit_endian) {
1653
+ bigendian_p = explicit_endian == '>';
1654
+ }
1655
+
1656
+ switch (integer_size) {
1657
+ #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1658
+ case SIZEOF_INT16_T:
1659
+ if (signed_p) {
1660
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1661
+ while (len-- > 0) {
1662
+ union {
1663
+ int16_t i;
1664
+ char a[sizeof(int16_t)];
1665
+ } v;
1666
+ memcpy(v.a, s, sizeof(int16_t));
1667
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1668
+ s += sizeof(int16_t);
1669
+ UNPACK_PUSH(INT2FIX(v.i));
1670
+ }
1671
+ PACK_ITEM_ADJUST();
1672
+ }
1673
+ else {
1674
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1675
+ while (len-- > 0) {
1676
+ union {
1677
+ uint16_t i;
1678
+ char a[sizeof(uint16_t)];
1679
+ } v;
1680
+ memcpy(v.a, s, sizeof(uint16_t));
1681
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1682
+ s += sizeof(uint16_t);
1683
+ UNPACK_PUSH(INT2FIX(v.i));
1684
+ }
1685
+ PACK_ITEM_ADJUST();
1686
+ }
1687
+ break;
1688
+ #endif
1689
+
1690
+ #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1691
+ case SIZEOF_INT32_T:
1692
+ if (signed_p) {
1693
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1694
+ while (len-- > 0) {
1695
+ union {
1696
+ int32_t i;
1697
+ char a[sizeof(int32_t)];
1698
+ } v;
1699
+ memcpy(v.a, s, sizeof(int32_t));
1700
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1701
+ s += sizeof(int32_t);
1702
+ UNPACK_PUSH(INT2NUM(v.i));
1703
+ }
1704
+ PACK_ITEM_ADJUST();
1705
+ }
1706
+ else {
1707
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
1708
+ while (len-- > 0) {
1709
+ union {
1710
+ uint32_t i;
1711
+ char a[sizeof(uint32_t)];
1712
+ } v;
1713
+ memcpy(v.a, s, sizeof(uint32_t));
1714
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1715
+ s += sizeof(uint32_t);
1716
+ UNPACK_PUSH(UINT2NUM(v.i));
1717
+ }
1718
+ PACK_ITEM_ADJUST();
1719
+ }
1720
+ break;
1721
+ #endif
1722
+
1723
+ #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1724
+ case SIZEOF_INT64_T:
1725
+ if (signed_p) {
1726
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1727
+ while (len-- > 0) {
1728
+ union {
1729
+ int64_t i;
1730
+ char a[sizeof(int64_t)];
1731
+ } v;
1732
+ memcpy(v.a, s, sizeof(int64_t));
1733
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1734
+ s += sizeof(int64_t);
1735
+ UNPACK_PUSH(INT64toNUM(v.i));
1736
+ }
1737
+ PACK_ITEM_ADJUST();
1738
+ }
1739
+ else {
1740
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
1741
+ while (len-- > 0) {
1742
+ union {
1743
+ uint64_t i;
1744
+ char a[sizeof(uint64_t)];
1745
+ } v;
1746
+ memcpy(v.a, s, sizeof(uint64_t));
1747
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1748
+ s += sizeof(uint64_t);
1749
+ UNPACK_PUSH(UINT64toNUM(v.i));
1750
+ }
1751
+ PACK_ITEM_ADJUST();
1752
+ }
1753
+ break;
1754
+ #endif
1755
+
1756
+ default:
1757
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
1758
+ rb_bug("unexpected integer size for pack: %d", integer_size);
1759
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
1760
+ while (len-- > 0) {
1761
+ union {
1762
+ unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1763
+ char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1764
+ } v;
1765
+ int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1766
+ int i;
1767
+
1768
+ if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1769
+ memset(v.a, 0xff, sizeof(long)*num_longs);
1770
+ else
1771
+ memset(v.a, 0, sizeof(long)*num_longs);
1772
+ if (bigendian_p)
1773
+ memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1774
+ else
1775
+ memcpy(v.a, s, integer_size);
1776
+ if (bigendian_p) {
1777
+ for (i = 0; i < num_longs/2; i++) {
1778
+ unsigned long t = v.i[i];
1779
+ v.i[i] = v.i[num_longs-1-i];
1780
+ v.i[num_longs-1-i] = t;
1781
+ }
1782
+ }
1783
+ if (bigendian_p != BIGENDIAN_P()) {
1784
+ for (i = 0; i < num_longs; i++)
1785
+ v.i[i] = swapl(v.i[i]);
1786
+ }
1787
+ s += integer_size;
1788
+ UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1789
+ }
1790
+ PACK_ITEM_ADJUST();
1791
+ break;
1792
+ }
1793
+ break;
1794
+
1795
+ case 'f':
1796
+ case 'F':
1797
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1798
+ while (len-- > 0) {
1799
+ float tmp;
1800
+ memcpy(&tmp, s, sizeof(float));
1801
+ s += sizeof(float);
1802
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1803
+ }
1804
+ PACK_ITEM_ADJUST();
1805
+ break;
1806
+
1807
+ case 'e':
1808
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1809
+ while (len-- > 0) {
1810
+ float tmp;
1811
+ FLOAT_CONVWITH(ftmp);
1812
+
1813
+ memcpy(&tmp, s, sizeof(float));
1814
+ s += sizeof(float);
1815
+ tmp = VTOHF(tmp,ftmp);
1816
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1817
+ }
1818
+ PACK_ITEM_ADJUST();
1819
+ break;
1820
+
1821
+ case 'E':
1822
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1823
+ while (len-- > 0) {
1824
+ double tmp;
1825
+ DOUBLE_CONVWITH(dtmp);
1826
+
1827
+ memcpy(&tmp, s, sizeof(double));
1828
+ s += sizeof(double);
1829
+ tmp = VTOHD(tmp,dtmp);
1830
+ UNPACK_PUSH(DBL2NUM(tmp));
1831
+ }
1832
+ PACK_ITEM_ADJUST();
1833
+ break;
1834
+
1835
+ case 'D':
1836
+ case 'd':
1837
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1838
+ while (len-- > 0) {
1839
+ double tmp;
1840
+ memcpy(&tmp, s, sizeof(double));
1841
+ s += sizeof(double);
1842
+ UNPACK_PUSH(DBL2NUM(tmp));
1843
+ }
1844
+ PACK_ITEM_ADJUST();
1845
+ break;
1846
+
1847
+ case 'g':
1848
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1849
+ while (len-- > 0) {
1850
+ float tmp;
1851
+ FLOAT_CONVWITH(ftmp);
1852
+
1853
+ memcpy(&tmp, s, sizeof(float));
1854
+ s += sizeof(float);
1855
+ tmp = NTOHF(tmp,ftmp);
1856
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1857
+ }
1858
+ PACK_ITEM_ADJUST();
1859
+ break;
1860
+
1861
+ case 'G':
1862
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1863
+ while (len-- > 0) {
1864
+ double tmp;
1865
+ DOUBLE_CONVWITH(dtmp);
1866
+
1867
+ memcpy(&tmp, s, sizeof(double));
1868
+ s += sizeof(double);
1869
+ tmp = NTOHD(tmp,dtmp);
1870
+ UNPACK_PUSH(DBL2NUM(tmp));
1871
+ }
1872
+ PACK_ITEM_ADJUST();
1873
+ break;
1874
+
1875
+ case 'U':
1876
+ if (len > send - s) len = send - s;
1877
+ while (len > 0 && s < send) {
1878
+ long alen = send - s;
1879
+ unsigned long l;
1880
+
1881
+ l = utf8_to_uv(s, &alen);
1882
+ s += alen; len--;
1883
+ UNPACK_PUSH(ULONG2NUM(l));
1884
+ }
1885
+ break;
1886
+
1887
+ case 'u':
1888
+ {
1889
+ VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1890
+ char *ptr = RSTRING_PTR(buf);
1891
+ long total = 0;
1892
+
1893
+ while (s < send && *s > ' ' && *s < 'a') {
1894
+ long a,b,c,d;
1895
+ char hunk[4];
1896
+
1897
+ hunk[3] = '\0';
1898
+ len = (*s++ - ' ') & 077;
1899
+ total += len;
1900
+ if (total > RSTRING_LEN(buf)) {
1901
+ len -= total - RSTRING_LEN(buf);
1902
+ total = RSTRING_LEN(buf);
1903
+ }
1904
+
1905
+ while (len > 0) {
1906
+ long mlen = len > 3 ? 3 : len;
1907
+
1908
+ if (s < send && *s >= ' ')
1909
+ a = (*s++ - ' ') & 077;
1910
+ else
1911
+ a = 0;
1912
+ if (s < send && *s >= ' ')
1913
+ b = (*s++ - ' ') & 077;
1914
+ else
1915
+ b = 0;
1916
+ if (s < send && *s >= ' ')
1917
+ c = (*s++ - ' ') & 077;
1918
+ else
1919
+ c = 0;
1920
+ if (s < send && *s >= ' ')
1921
+ d = (*s++ - ' ') & 077;
1922
+ else
1923
+ d = 0;
1924
+ hunk[0] = (char)(a << 2 | b >> 4);
1925
+ hunk[1] = (char)(b << 4 | c >> 2);
1926
+ hunk[2] = (char)(c << 6 | d);
1927
+ memcpy(ptr, hunk, mlen);
1928
+ ptr += mlen;
1929
+ len -= mlen;
1930
+ }
1931
+ if (*s == '\r') s++;
1932
+ if (*s == '\n') s++;
1933
+ else if (s < send && (s+1 == send || s[1] == '\n'))
1934
+ s += 2; /* possible checksum byte */
1935
+ }
1936
+
1937
+ rb_str_set_len(buf, total);
1938
+ UNPACK_PUSH(buf);
1939
+ }
1940
+ break;
1941
+
1942
+ case 'm':
1943
+ {
1944
+ VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
1945
+ char *ptr = RSTRING_PTR(buf);
1946
+ int a = -1,b = -1,c = 0,d = 0;
1947
+ static signed char b64_xtable[256];
1948
+
1949
+ if (b64_xtable['/'] <= 0) {
1950
+ int i;
1951
+
1952
+ for (i = 0; i < 256; i++) {
1953
+ b64_xtable[i] = -1;
1954
+ }
1955
+ for (i = 0; i < 64; i++) {
1956
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1957
+ }
1958
+ }
1959
+ if (len == 0) {
1960
+ while (s < send) {
1961
+ a = b = c = d = -1;
1962
+ a = b64_xtable[(unsigned char)*s++];
1963
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1964
+ b = b64_xtable[(unsigned char)*s++];
1965
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1966
+ if (*s == '=') {
1967
+ if (s + 2 == send && *(s + 1) == '=') break;
1968
+ rb_raise(rb_eArgError, "invalid base64");
1969
+ }
1970
+ c = b64_xtable[(unsigned char)*s++];
1971
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1972
+ if (s + 1 == send && *s == '=') break;
1973
+ d = b64_xtable[(unsigned char)*s++];
1974
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1975
+ *ptr++ = castchar(a << 2 | b >> 4);
1976
+ *ptr++ = castchar(b << 4 | c >> 2);
1977
+ *ptr++ = castchar(c << 6 | d);
1978
+ }
1979
+ if (c == -1) {
1980
+ *ptr++ = castchar(a << 2 | b >> 4);
1981
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1982
+ }
1983
+ else if (d == -1) {
1984
+ *ptr++ = castchar(a << 2 | b >> 4);
1985
+ *ptr++ = castchar(b << 4 | c >> 2);
1986
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1987
+ }
1988
+ }
1989
+ else {
1990
+ while (s < send) {
1991
+ a = b = c = d = -1;
1992
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1993
+ if (s >= send) break;
1994
+ s++;
1995
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1996
+ if (s >= send) break;
1997
+ s++;
1998
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1999
+ if (*s == '=' || s >= send) break;
2000
+ s++;
2001
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
2002
+ if (*s == '=' || s >= send) break;
2003
+ s++;
2004
+ *ptr++ = castchar(a << 2 | b >> 4);
2005
+ *ptr++ = castchar(b << 4 | c >> 2);
2006
+ *ptr++ = castchar(c << 6 | d);
2007
+ a = -1;
2008
+ }
2009
+ if (a != -1 && b != -1) {
2010
+ if (c == -1)
2011
+ *ptr++ = castchar(a << 2 | b >> 4);
2012
+ else {
2013
+ *ptr++ = castchar(a << 2 | b >> 4);
2014
+ *ptr++ = castchar(b << 4 | c >> 2);
2015
+ }
2016
+ }
2017
+ }
2018
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2019
+ UNPACK_PUSH(buf);
2020
+ }
2021
+ break;
2022
+
2023
+ case 'M':
2024
+ {
2025
+ VALUE buf = infected_str_new(0, send - s, str);
2026
+ char *ptr = RSTRING_PTR(buf), *ss = s;
2027
+ int c1, c2;
2028
+
2029
+ while (s < send) {
2030
+ if (*s == '=') {
2031
+ if (++s == send) break;
2032
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
2033
+ s++;
2034
+ if (*s != '\n') {
2035
+ if ((c1 = hex2num(*s)) == -1) break;
2036
+ if (++s == send) break;
2037
+ if ((c2 = hex2num(*s)) == -1) break;
2038
+ *ptr++ = castchar(c1 << 4 | c2);
2039
+ }
2040
+ }
2041
+ else {
2042
+ *ptr++ = *s;
2043
+ }
2044
+ s++;
2045
+ ss = s;
2046
+ }
2047
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2048
+ rb_str_buf_cat(buf, ss, send-ss);
2049
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
2050
+ UNPACK_PUSH(buf);
2051
+ }
2052
+ break;
2053
+
2054
+ case '@':
2055
+ if (len > RSTRING_LEN(str))
2056
+ rb_raise(rb_eArgError, "@ outside of string");
2057
+ s = RSTRING_PTR(str) + len;
2058
+ break;
2059
+
2060
+ case 'X':
2061
+ if (len > s - RSTRING_PTR(str))
2062
+ rb_raise(rb_eArgError, "X outside of string");
2063
+ s -= len;
2064
+ break;
2065
+
2066
+ case 'x':
2067
+ if (len > send - s)
2068
+ rb_raise(rb_eArgError, "x outside of string");
2069
+ s += len;
2070
+ break;
2071
+
2072
+ case 'P':
2073
+ if (sizeof(char *) <= (size_t)(send - s)) {
2074
+ VALUE tmp = Qnil;
2075
+ char *t;
2076
+
2077
+ memcpy(&t, s, sizeof(char *));
2078
+ s += sizeof(char *);
2079
+
2080
+ if (t) {
2081
+ VALUE a, *p, *pend;
2082
+
2083
+ if (!(a = rb_str_associated(str))) {
2084
+ rb_raise(rb_eArgError, "no associated pointer");
2085
+ }
2086
+ p = RARRAY_PTR(a);
2087
+ pend = p + RARRAY_LEN(a);
2088
+ while (p < pend) {
2089
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2090
+ if (len < RSTRING_LEN(*p)) {
2091
+ tmp = rb_tainted_str_new(t, len);
2092
+ rb_str_associate(tmp, a);
2093
+ }
2094
+ else {
2095
+ tmp = *p;
2096
+ }
2097
+ break;
2098
+ }
2099
+ p++;
2100
+ }
2101
+ if (p == pend) {
2102
+ rb_raise(rb_eArgError, "non associated pointer");
2103
+ }
2104
+ }
2105
+ UNPACK_PUSH(tmp);
2106
+ }
2107
+ break;
2108
+
2109
+ case 'p':
2110
+ if (len > (long)((send - s) / sizeof(char *)))
2111
+ len = (send - s) / sizeof(char *);
2112
+ while (len-- > 0) {
2113
+ if ((size_t)(send - s) < sizeof(char *))
2114
+ break;
2115
+ else {
2116
+ VALUE tmp = Qnil;
2117
+ char *t;
2118
+
2119
+ memcpy(&t, s, sizeof(char *));
2120
+ s += sizeof(char *);
2121
+
2122
+ if (t) {
2123
+ VALUE a, *p, *pend;
2124
+
2125
+ if (!(a = rb_str_associated(str))) {
2126
+ rb_raise(rb_eArgError, "no associated pointer");
2127
+ }
2128
+ p = RARRAY_PTR(a);
2129
+ pend = p + RARRAY_LEN(a);
2130
+ while (p < pend) {
2131
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2132
+ tmp = *p;
2133
+ break;
2134
+ }
2135
+ p++;
2136
+ }
2137
+ if (p == pend) {
2138
+ rb_raise(rb_eArgError, "non associated pointer");
2139
+ }
2140
+ }
2141
+ UNPACK_PUSH(tmp);
2142
+ }
2143
+ }
2144
+ break;
2145
+
2146
+ case 'w':
2147
+ {
2148
+ unsigned long ul = 0;
2149
+ unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2150
+
2151
+ while (len > 0 && s < send) {
2152
+ ul <<= 7;
2153
+ ul |= (*s & 0x7f);
2154
+ if (!(*s++ & 0x80)) {
2155
+ UNPACK_PUSH(ULONG2NUM(ul));
2156
+ len--;
2157
+ ul = 0;
2158
+ }
2159
+ else if (ul & ulmask) {
2160
+ VALUE big = rb_uint2big(ul);
2161
+ VALUE big128 = rb_uint2big(128);
2162
+ while (s < send) {
2163
+ big = rb_big_mul(big, big128);
2164
+ big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2165
+ if (!(*s++ & 0x80)) {
2166
+ UNPACK_PUSH(big);
2167
+ len--;
2168
+ ul = 0;
2169
+ break;
2170
+ }
2171
+ }
2172
+ }
2173
+ }
2174
+ }
2175
+ break;
2176
+
2177
+ default:
2178
+ rb_warning("unknown unpack directive '%c' in '%s'",
2179
+ type, RSTRING_PTR(fmt));
2180
+ break;
2181
+ }
2182
+ }
2183
+
2184
+ *parsed_len = s - init_s;
2185
+ return ary;
2186
+ }
2187
+
2188
+ #define BYTEWIDTH 8
2189
+
2190
+ int
2191
+ rb_uv_to_utf8(char buf[6], unsigned long uv)
2192
+ {
2193
+ if (uv <= 0x7f) {
2194
+ buf[0] = (char)uv;
2195
+ return 1;
2196
+ }
2197
+ if (uv <= 0x7ff) {
2198
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
2199
+ buf[1] = castchar((uv&0x3f)|0x80);
2200
+ return 2;
2201
+ }
2202
+ if (uv <= 0xffff) {
2203
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
2204
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
2205
+ buf[2] = castchar((uv&0x3f)|0x80);
2206
+ return 3;
2207
+ }
2208
+ if (uv <= 0x1fffff) {
2209
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
2210
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
2211
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
2212
+ buf[3] = castchar((uv&0x3f)|0x80);
2213
+ return 4;
2214
+ }
2215
+ if (uv <= 0x3ffffff) {
2216
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
2217
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
2218
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
2219
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
2220
+ buf[4] = castchar((uv&0x3f)|0x80);
2221
+ return 5;
2222
+ }
2223
+ if (uv <= 0x7fffffff) {
2224
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
2225
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
2226
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
2227
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
2228
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
2229
+ buf[5] = castchar((uv&0x3f)|0x80);
2230
+ return 6;
2231
+ }
2232
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
2233
+
2234
+ UNREACHABLE;
2235
+ }
2236
+
2237
+ static const unsigned long utf8_limits[] = {
2238
+ 0x0, /* 1 */
2239
+ 0x80, /* 2 */
2240
+ 0x800, /* 3 */
2241
+ 0x10000, /* 4 */
2242
+ 0x200000, /* 5 */
2243
+ 0x4000000, /* 6 */
2244
+ 0x80000000, /* 7 */
2245
+ };
2246
+
2247
+ static unsigned long
2248
+ utf8_to_uv(const char *p, long *lenp)
2249
+ {
2250
+ int c = *p++ & 0xff;
2251
+ unsigned long uv = c;
2252
+ long n;
2253
+
2254
+ if (!(uv & 0x80)) {
2255
+ *lenp = 1;
2256
+ return uv;
2257
+ }
2258
+ if (!(uv & 0x40)) {
2259
+ *lenp = 1;
2260
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2261
+ }
2262
+
2263
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2264
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2265
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2266
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2267
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2268
+ else {
2269
+ *lenp = 1;
2270
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2271
+ }
2272
+ if (n > *lenp) {
2273
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2274
+ n, *lenp);
2275
+ }
2276
+ *lenp = n--;
2277
+ if (n != 0) {
2278
+ while (n--) {
2279
+ c = *p++ & 0xff;
2280
+ if ((c & 0xc0) != 0x80) {
2281
+ *lenp -= n + 1;
2282
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2283
+ }
2284
+ else {
2285
+ c &= 0x3f;
2286
+ uv = uv << 6 | c;
2287
+ }
2288
+ }
2289
+ }
2290
+ n = *lenp - 1;
2291
+ if (uv < utf8_limits[n]) {
2292
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2293
+ }
2294
+ return uv;
2295
+ }