zscan 1.3 → 2.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/pack/pack.c ADDED
@@ -0,0 +1,2295 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author$
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "ruby/ruby.h"
13
+ #include "ruby/encoding.h"
14
+ #include "internal.h"
15
+ #include <sys/types.h>
16
+ #include <ctype.h>
17
+ #include <errno.h>
18
+
19
+ /*
20
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
21
+ * instead of HAVE_LONG_LONG or LONG_LONG.
22
+ * This means q! and Q! means always the standard long long type and
23
+ * causes ArgumentError for platforms which has no long long type,
24
+ * even if the platform has an implementation specific 64bit type.
25
+ * This behavior is consistent with the document of pack/unpack.
26
+ */
27
+ #ifdef HAVE_TRUE_LONG_LONG
28
+ static const char natstr[] = "sSiIlLqQ";
29
+ #else
30
+ static const char natstr[] = "sSiIlL";
31
+ #endif
32
+ static const char endstr[] = "sSiIlLqQ";
33
+
34
+ #ifdef HAVE_TRUE_LONG_LONG
35
+ /* It is intentional to use long long instead of LONG_LONG. */
36
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
37
+ #else
38
+ # define NATINT_LEN_Q 8
39
+ #endif
40
+
41
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
42
+ # define NATINT_PACK
43
+ #endif
44
+
45
+ #ifdef DYNAMIC_ENDIAN
46
+ /* for universal binary of NEXTSTEP and MacOS X */
47
+ /* useless since autoconf 2.63? */
48
+ static int
49
+ is_bigendian(void)
50
+ {
51
+ static int init = 0;
52
+ static int endian_value;
53
+ char *p;
54
+
55
+ if (init) return endian_value;
56
+ init = 1;
57
+ p = (char*)&init;
58
+ return endian_value = p[0]?0:1;
59
+ }
60
+ # define BIGENDIAN_P() (is_bigendian())
61
+ #elif defined(WORDS_BIGENDIAN)
62
+ # define BIGENDIAN_P() 1
63
+ #else
64
+ # define BIGENDIAN_P() 0
65
+ #endif
66
+
67
+ #ifdef NATINT_PACK
68
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
69
+ #else
70
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
71
+ #endif
72
+
73
+ #if SIZEOF_LONG == 8
74
+ # define INT64toNUM(x) LONG2NUM(x)
75
+ # define UINT64toNUM(x) ULONG2NUM(x)
76
+ #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
77
+ # define INT64toNUM(x) LL2NUM(x)
78
+ # define UINT64toNUM(x) ULL2NUM(x)
79
+ #endif
80
+
81
+ #define define_swapx(x, xtype) \
82
+ static xtype \
83
+ TOKEN_PASTE(swap,x)(xtype z) \
84
+ { \
85
+ xtype r; \
86
+ xtype *zp; \
87
+ unsigned char *s, *t; \
88
+ int i; \
89
+ \
90
+ zp = xmalloc(sizeof(xtype)); \
91
+ *zp = z; \
92
+ s = (unsigned char*)zp; \
93
+ t = xmalloc(sizeof(xtype)); \
94
+ for (i=0; i<sizeof(xtype); i++) { \
95
+ t[sizeof(xtype)-i-1] = s[i]; \
96
+ } \
97
+ r = *(xtype *)t; \
98
+ xfree(t); \
99
+ xfree(zp); \
100
+ return r; \
101
+ }
102
+
103
+ #if GCC_VERSION_SINCE(4,3,0)
104
+ # define swap32(x) __builtin_bswap32(x)
105
+ # define swap64(x) __builtin_bswap64(x)
106
+ #endif
107
+
108
+ #ifndef swap16
109
+ # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
110
+ #endif
111
+
112
+ #ifndef swap32
113
+ # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
114
+ |(((x)>>24)&0xFF) \
115
+ |(((x)&0x0000FF00)<<8) \
116
+ |(((x)&0x00FF0000)>>8) ))
117
+ #endif
118
+
119
+ #ifndef swap64
120
+ # ifdef HAVE_INT64_T
121
+ # define byte_in_64bit(n) ((uint64_t)0xff << (n))
122
+ # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
123
+ |(((x)>>56)&0xFF) \
124
+ |(((x)&byte_in_64bit(8))<<40) \
125
+ |(((x)&byte_in_64bit(48))>>40) \
126
+ |(((x)&byte_in_64bit(16))<<24) \
127
+ |(((x)&byte_in_64bit(40))>>24) \
128
+ |(((x)&byte_in_64bit(24))<<8) \
129
+ |(((x)&byte_in_64bit(32))>>8)))
130
+ # endif
131
+ #endif
132
+
133
+ #if SIZEOF_SHORT == 2
134
+ # define swaps(x) swap16(x)
135
+ #elif SIZEOF_SHORT == 4
136
+ # define swaps(x) swap32(x)
137
+ #else
138
+ define_swapx(s,short)
139
+ #endif
140
+
141
+ #if SIZEOF_INT == 2
142
+ # define swapi(x) swap16(x)
143
+ #elif SIZEOF_INT == 4
144
+ # define swapi(x) swap32(x)
145
+ #else
146
+ define_swapx(i,int)
147
+ #endif
148
+
149
+ #if SIZEOF_LONG == 4
150
+ # define swapl(x) swap32(x)
151
+ #elif SIZEOF_LONG == 8
152
+ # define swapl(x) swap64(x)
153
+ #else
154
+ define_swapx(l,long)
155
+ #endif
156
+
157
+ #ifdef HAVE_LONG_LONG
158
+ # if SIZEOF_LONG_LONG == 8
159
+ # define swapll(x) swap64(x)
160
+ # else
161
+ define_swapx(ll,LONG_LONG)
162
+ # endif
163
+ #endif
164
+
165
+ #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
166
+ # define swapf(x) swap32(x)
167
+ # define FLOAT_SWAPPER uint32_t
168
+ #else
169
+ define_swapx(f,float)
170
+ #endif
171
+
172
+ #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
173
+ # define swapd(x) swap64(x)
174
+ # define DOUBLE_SWAPPER uint64_t
175
+ #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
176
+ static double
177
+ swapd(const double d)
178
+ {
179
+ double dtmp = d;
180
+ uint32_t utmp[2];
181
+ uint32_t utmp0;
182
+
183
+ utmp[0] = 0; utmp[1] = 0;
184
+ memcpy(utmp,&dtmp,sizeof(double));
185
+ utmp0 = utmp[0];
186
+ utmp[0] = swap32(utmp[1]);
187
+ utmp[1] = swap32(utmp0);
188
+ memcpy(&dtmp,utmp,sizeof(double));
189
+ return dtmp;
190
+ }
191
+ #else
192
+ define_swapx(d, double)
193
+ #endif
194
+
195
+ #undef define_swapx
196
+
197
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
198
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
199
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
200
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
201
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
202
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
203
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
204
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
205
+
206
+ #ifdef FLOAT_SWAPPER
207
+ # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
208
+ # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
209
+ (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
210
+ memcpy(&(x),&(y),sizeof(float)), \
211
+ (x))
212
+ # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
213
+ (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
214
+ memcpy(&(x),&(y),sizeof(float)), \
215
+ (x))
216
+ # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
217
+ (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
218
+ memcpy(&(x),&(y),sizeof(float)), \
219
+ (x))
220
+ # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
221
+ (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
222
+ memcpy(&(x),&(y),sizeof(float)), \
223
+ (x))
224
+ #else
225
+ # define FLOAT_CONVWITH(y)
226
+ # define HTONF(x,y) rb_htonf(x)
227
+ # define HTOVF(x,y) rb_htovf(x)
228
+ # define NTOHF(x,y) rb_ntohf(x)
229
+ # define VTOHF(x,y) rb_vtohf(x)
230
+ #endif
231
+
232
+ #ifdef DOUBLE_SWAPPER
233
+ # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
234
+ # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
235
+ (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
236
+ memcpy(&(x),&(y),sizeof(double)), \
237
+ (x))
238
+ # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
239
+ (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
240
+ memcpy(&(x),&(y),sizeof(double)), \
241
+ (x))
242
+ # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
243
+ (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
244
+ memcpy(&(x),&(y),sizeof(double)), \
245
+ (x))
246
+ # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
247
+ (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
248
+ memcpy(&(x),&(y),sizeof(double)), \
249
+ (x))
250
+ #else
251
+ # define DOUBLE_CONVWITH(y)
252
+ # define HTOND(x,y) rb_htond(x)
253
+ # define HTOVD(x,y) rb_htovd(x)
254
+ # define NTOHD(x,y) rb_ntohd(x)
255
+ # define VTOHD(x,y) rb_vtohd(x)
256
+ #endif
257
+
258
+ static unsigned long
259
+ num2i32(VALUE x)
260
+ {
261
+ x = rb_to_int(x); /* is nil OK? (should not) */
262
+
263
+ if (FIXNUM_P(x)) return FIX2LONG(x);
264
+ if (RB_TYPE_P(x, T_BIGNUM)) {
265
+ return rb_big2ulong_pack(x);
266
+ }
267
+ rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
268
+
269
+ UNREACHABLE;
270
+ }
271
+
272
+ #define MAX_INTEGER_PACK_SIZE 8
273
+ /* #define FORCE_BIG_PACK */
274
+
275
+ static const char toofew[] = "too few arguments";
276
+
277
+ static void encodes(VALUE,const char*,long,int,int);
278
+ static void qpencode(VALUE,VALUE,long);
279
+
280
+ static unsigned long utf8_to_uv(const char*,long*);
281
+
282
+ /*
283
+ * call-seq:
284
+ * arr.pack ( aTemplateString ) -> aBinaryString
285
+ *
286
+ * Packs the contents of <i>arr</i> into a binary sequence according to
287
+ * the directives in <i>aTemplateString</i> (see the table below)
288
+ * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
289
+ * which gives the width of the resulting field. The remaining
290
+ * directives also may take a count, indicating the number of array
291
+ * elements to convert. If the count is an asterisk
292
+ * (``<code>*</code>''), all remaining array elements will be
293
+ * converted. Any of the directives ``<code>sSiIlL</code>'' may be
294
+ * followed by an underscore (``<code>_</code>'') or
295
+ * exclamation mark (``<code>!</code>'') to use the underlying
296
+ * platform's native size for the specified type; otherwise, they use a
297
+ * platform-independent size. Spaces are ignored in the template
298
+ * string. See also <code>String#unpack</code>.
299
+ *
300
+ * a = [ "a", "b", "c" ]
301
+ * n = [ 65, 66, 67 ]
302
+ * a.pack("A3A3A3") #=> "a b c "
303
+ * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
304
+ * n.pack("ccc") #=> "ABC"
305
+ *
306
+ * Directives for +pack+.
307
+ *
308
+ * Integer | Array |
309
+ * Directive | Element | Meaning
310
+ * ---------------------------------------------------------------------------
311
+ * C | Integer | 8-bit unsigned (unsigned char)
312
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
313
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
314
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
315
+ * | |
316
+ * c | Integer | 8-bit signed (signed char)
317
+ * s | Integer | 16-bit signed, native endian (int16_t)
318
+ * l | Integer | 32-bit signed, native endian (int32_t)
319
+ * q | Integer | 64-bit signed, native endian (int64_t)
320
+ * | |
321
+ * S_, S! | Integer | unsigned short, native endian
322
+ * I, I_, I! | Integer | unsigned int, native endian
323
+ * L_, L! | Integer | unsigned long, native endian
324
+ * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
325
+ * | | if the platform has no long long type.)
326
+ * | | (Q_ and Q! is available since Ruby 2.1.)
327
+ * | |
328
+ * s_, s! | Integer | signed short, native endian
329
+ * i, i_, i! | Integer | signed int, native endian
330
+ * l_, l! | Integer | signed long, native endian
331
+ * q_, q! | Integer | signed long long, native endian (ArgumentError
332
+ * | | if the platform has no long long type.)
333
+ * | | (q_ and q! is available since Ruby 2.1.)
334
+ * | |
335
+ * S> L> Q> | Integer | same as the directives without ">" except
336
+ * s> l> q> | | big endian
337
+ * S!> I!> | | (available since Ruby 1.9.3)
338
+ * L!> Q!> | | "S>" is same as "n"
339
+ * s!> i!> | | "L>" is same as "N"
340
+ * l!> q!> | |
341
+ * | |
342
+ * S< L< Q< | Integer | same as the directives without "<" except
343
+ * s< l< q< | | little endian
344
+ * S!< I!< | | (available since Ruby 1.9.3)
345
+ * L!< Q!< | | "S<" is same as "v"
346
+ * s!< i!< | | "L<" is same as "V"
347
+ * l!< q!< | |
348
+ * | |
349
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
350
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
351
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
352
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
353
+ * | |
354
+ * U | Integer | UTF-8 character
355
+ * w | Integer | BER-compressed integer
356
+ *
357
+ * Float | |
358
+ * Directive | | Meaning
359
+ * ---------------------------------------------------------------------------
360
+ * D, d | Float | double-precision, native format
361
+ * F, f | Float | single-precision, native format
362
+ * E | Float | double-precision, little-endian byte order
363
+ * e | Float | single-precision, little-endian byte order
364
+ * G | Float | double-precision, network (big-endian) byte order
365
+ * g | Float | single-precision, network (big-endian) byte order
366
+ *
367
+ * String | |
368
+ * Directive | | Meaning
369
+ * ---------------------------------------------------------------------------
370
+ * A | String | arbitrary binary string (space padded, count is width)
371
+ * a | String | arbitrary binary string (null padded, count is width)
372
+ * Z | String | same as ``a'', except that null is added with *
373
+ * B | String | bit string (MSB first)
374
+ * b | String | bit string (LSB first)
375
+ * H | String | hex string (high nibble first)
376
+ * h | String | hex string (low nibble first)
377
+ * u | String | UU-encoded string
378
+ * M | String | quoted printable, MIME encoding (see RFC2045)
379
+ * m | String | base64 encoded string (see RFC 2045, count is width)
380
+ * | | (if count is 0, no line feed are added, see RFC 4648)
381
+ * P | String | pointer to a structure (fixed-length string)
382
+ * p | String | pointer to a null-terminated string
383
+ *
384
+ * Misc. | |
385
+ * Directive | | Meaning
386
+ * ---------------------------------------------------------------------------
387
+ * @ | --- | moves to absolute position
388
+ * X | --- | back up a byte
389
+ * x | --- | null byte
390
+ */
391
+
392
+ __attribute__ ((unused))
393
+ static VALUE
394
+ pack_pack(VALUE ary, VALUE fmt)
395
+ {
396
+ static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
397
+ static const char spc10[] = " ";
398
+ const char *p, *pend;
399
+ VALUE res, from, associates = 0;
400
+ char type;
401
+ long items, len, idx, plen;
402
+ const char *ptr;
403
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
404
+ #ifdef NATINT_PACK
405
+ int natint; /* native integer */
406
+ #endif
407
+ int integer_size, bigendian_p;
408
+
409
+ StringValue(fmt);
410
+ p = RSTRING_PTR(fmt);
411
+ pend = p + RSTRING_LEN(fmt);
412
+ res = rb_str_buf_new(0);
413
+
414
+ items = RARRAY_LEN(ary);
415
+ idx = 0;
416
+
417
+ #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
418
+ #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
419
+ #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
420
+
421
+ while (p < pend) {
422
+ int explicit_endian = 0;
423
+ if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
424
+ rb_raise(rb_eRuntimeError, "format string modified");
425
+ }
426
+ type = *p++; /* get data type */
427
+ #ifdef NATINT_PACK
428
+ natint = 0;
429
+ #endif
430
+
431
+ if (ISSPACE(type)) continue;
432
+ if (type == '#') {
433
+ while ((p < pend) && (*p != '\n')) {
434
+ p++;
435
+ }
436
+ continue;
437
+ }
438
+
439
+ {
440
+ modifiers:
441
+ switch (*p) {
442
+ case '_':
443
+ case '!':
444
+ if (strchr(natstr, type)) {
445
+ #ifdef NATINT_PACK
446
+ natint = 1;
447
+ #endif
448
+ p++;
449
+ }
450
+ else {
451
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
452
+ }
453
+ goto modifiers;
454
+
455
+ case '<':
456
+ case '>':
457
+ if (!strchr(endstr, type)) {
458
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
459
+ }
460
+ if (explicit_endian) {
461
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
462
+ }
463
+ explicit_endian = *p++;
464
+ goto modifiers;
465
+ }
466
+ }
467
+
468
+ if (*p == '*') { /* set data length */
469
+ len = strchr("@Xxu", type) ? 0
470
+ : strchr("PMm", type) ? 1
471
+ : items;
472
+ p++;
473
+ }
474
+ else if (ISDIGIT(*p)) {
475
+ errno = 0;
476
+ len = STRTOUL(p, (char**)&p, 10);
477
+ if (errno) {
478
+ rb_raise(rb_eRangeError, "pack length too big");
479
+ }
480
+ }
481
+ else {
482
+ len = 1;
483
+ }
484
+
485
+ switch (type) {
486
+ case 'U':
487
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
488
+ if (enc_info == 1) enc_info = 2;
489
+ break;
490
+ case 'm': case 'M': case 'u':
491
+ /* keep US-ASCII (do nothing) */
492
+ break;
493
+ default:
494
+ /* fall back to BINARY */
495
+ enc_info = 0;
496
+ break;
497
+ }
498
+ switch (type) {
499
+ case 'A': case 'a': case 'Z':
500
+ case 'B': case 'b':
501
+ case 'H': case 'h':
502
+ from = NEXTFROM;
503
+ if (NIL_P(from)) {
504
+ ptr = "";
505
+ plen = 0;
506
+ }
507
+ else {
508
+ StringValue(from);
509
+ ptr = RSTRING_PTR(from);
510
+ plen = RSTRING_LEN(from);
511
+ OBJ_INFECT(res, from);
512
+ }
513
+
514
+ if (p[-1] == '*')
515
+ len = plen;
516
+
517
+ switch (type) {
518
+ case 'a': /* arbitrary binary string (null padded) */
519
+ case 'A': /* arbitrary binary string (ASCII space padded) */
520
+ case 'Z': /* null terminated string */
521
+ if (plen >= len) {
522
+ rb_str_buf_cat(res, ptr, len);
523
+ if (p[-1] == '*' && type == 'Z')
524
+ rb_str_buf_cat(res, nul10, 1);
525
+ }
526
+ else {
527
+ rb_str_buf_cat(res, ptr, plen);
528
+ len -= plen;
529
+ while (len >= 10) {
530
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
531
+ len -= 10;
532
+ }
533
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
534
+ }
535
+ break;
536
+
537
+ #define castchar(from) (char)((from) & 0xff)
538
+
539
+ case 'b': /* bit string (ascending) */
540
+ {
541
+ int byte = 0;
542
+ long i, j = 0;
543
+
544
+ if (len > plen) {
545
+ j = (len - plen + 1)/2;
546
+ len = plen;
547
+ }
548
+ for (i=0; i++ < len; ptr++) {
549
+ if (*ptr & 1)
550
+ byte |= 128;
551
+ if (i & 7)
552
+ byte >>= 1;
553
+ else {
554
+ char c = castchar(byte);
555
+ rb_str_buf_cat(res, &c, 1);
556
+ byte = 0;
557
+ }
558
+ }
559
+ if (len & 7) {
560
+ char c;
561
+ byte >>= 7 - (len & 7);
562
+ c = castchar(byte);
563
+ rb_str_buf_cat(res, &c, 1);
564
+ }
565
+ len = j;
566
+ goto grow;
567
+ }
568
+ break;
569
+
570
+ case 'B': /* bit string (descending) */
571
+ {
572
+ int byte = 0;
573
+ long i, j = 0;
574
+
575
+ if (len > plen) {
576
+ j = (len - plen + 1)/2;
577
+ len = plen;
578
+ }
579
+ for (i=0; i++ < len; ptr++) {
580
+ byte |= *ptr & 1;
581
+ if (i & 7)
582
+ byte <<= 1;
583
+ else {
584
+ char c = castchar(byte);
585
+ rb_str_buf_cat(res, &c, 1);
586
+ byte = 0;
587
+ }
588
+ }
589
+ if (len & 7) {
590
+ char c;
591
+ byte <<= 7 - (len & 7);
592
+ c = castchar(byte);
593
+ rb_str_buf_cat(res, &c, 1);
594
+ }
595
+ len = j;
596
+ goto grow;
597
+ }
598
+ break;
599
+
600
+ case 'h': /* hex string (low nibble first) */
601
+ {
602
+ int byte = 0;
603
+ long i, j = 0;
604
+
605
+ if (len > plen) {
606
+ j = (len + 1) / 2 - (plen + 1) / 2;
607
+ len = plen;
608
+ }
609
+ for (i=0; i++ < len; ptr++) {
610
+ if (ISALPHA(*ptr))
611
+ byte |= (((*ptr & 15) + 9) & 15) << 4;
612
+ else
613
+ byte |= (*ptr & 15) << 4;
614
+ if (i & 1)
615
+ byte >>= 4;
616
+ else {
617
+ char c = castchar(byte);
618
+ rb_str_buf_cat(res, &c, 1);
619
+ byte = 0;
620
+ }
621
+ }
622
+ if (len & 1) {
623
+ char c = castchar(byte);
624
+ rb_str_buf_cat(res, &c, 1);
625
+ }
626
+ len = j;
627
+ goto grow;
628
+ }
629
+ break;
630
+
631
+ case 'H': /* hex string (high nibble first) */
632
+ {
633
+ int byte = 0;
634
+ long i, j = 0;
635
+
636
+ if (len > plen) {
637
+ j = (len + 1) / 2 - (plen + 1) / 2;
638
+ len = plen;
639
+ }
640
+ for (i=0; i++ < len; ptr++) {
641
+ if (ISALPHA(*ptr))
642
+ byte |= ((*ptr & 15) + 9) & 15;
643
+ else
644
+ byte |= *ptr & 15;
645
+ if (i & 1)
646
+ byte <<= 4;
647
+ else {
648
+ char c = castchar(byte);
649
+ rb_str_buf_cat(res, &c, 1);
650
+ byte = 0;
651
+ }
652
+ }
653
+ if (len & 1) {
654
+ char c = castchar(byte);
655
+ rb_str_buf_cat(res, &c, 1);
656
+ }
657
+ len = j;
658
+ goto grow;
659
+ }
660
+ break;
661
+ }
662
+ break;
663
+
664
+ case 'c': /* signed char */
665
+ case 'C': /* unsigned char */
666
+ while (len-- > 0) {
667
+ char c;
668
+
669
+ from = NEXTFROM;
670
+ c = (char)num2i32(from);
671
+ rb_str_buf_cat(res, &c, sizeof(char));
672
+ }
673
+ break;
674
+
675
+ case 's': /* s for int16_t, s! for signed short */
676
+ integer_size = NATINT_LEN(short, 2);
677
+ bigendian_p = BIGENDIAN_P();
678
+ goto pack_integer;
679
+
680
+ case 'S': /* S for uint16_t, S! for unsigned short */
681
+ integer_size = NATINT_LEN(short, 2);
682
+ bigendian_p = BIGENDIAN_P();
683
+ goto pack_integer;
684
+
685
+ case 'i': /* i and i! for signed int */
686
+ integer_size = (int)sizeof(int);
687
+ bigendian_p = BIGENDIAN_P();
688
+ goto pack_integer;
689
+
690
+ case 'I': /* I and I! for unsigned int */
691
+ integer_size = (int)sizeof(int);
692
+ bigendian_p = BIGENDIAN_P();
693
+ goto pack_integer;
694
+
695
+ case 'l': /* l for int32_t, l! for signed long */
696
+ integer_size = NATINT_LEN(long, 4);
697
+ bigendian_p = BIGENDIAN_P();
698
+ goto pack_integer;
699
+
700
+ case 'L': /* L for uint32_t, L! for unsigned long */
701
+ integer_size = NATINT_LEN(long, 4);
702
+ bigendian_p = BIGENDIAN_P();
703
+ goto pack_integer;
704
+
705
+ case 'q': /* q for int64_t, q! for signed long long */
706
+ integer_size = NATINT_LEN_Q;
707
+ bigendian_p = BIGENDIAN_P();
708
+ goto pack_integer;
709
+
710
+ case 'Q': /* Q for uint64_t, Q! for unsigned long long */
711
+ integer_size = NATINT_LEN_Q;
712
+ bigendian_p = BIGENDIAN_P();
713
+ goto pack_integer;
714
+
715
+ case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
716
+ integer_size = 2;
717
+ bigendian_p = 1;
718
+ goto pack_integer;
719
+
720
+ case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
721
+ integer_size = 4;
722
+ bigendian_p = 1;
723
+ goto pack_integer;
724
+
725
+ case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
726
+ integer_size = 2;
727
+ bigendian_p = 0;
728
+ goto pack_integer;
729
+
730
+ case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
731
+ integer_size = 4;
732
+ bigendian_p = 0;
733
+ goto pack_integer;
734
+
735
+ pack_integer:
736
+ if (explicit_endian) {
737
+ bigendian_p = explicit_endian == '>';
738
+ }
739
+
740
+ switch (integer_size) {
741
+ #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
742
+ case SIZEOF_INT16_T:
743
+ while (len-- > 0) {
744
+ union {
745
+ int16_t i;
746
+ char a[sizeof(int16_t)];
747
+ } v;
748
+
749
+ from = NEXTFROM;
750
+ v.i = (int16_t)num2i32(from);
751
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
752
+ rb_str_buf_cat(res, v.a, sizeof(int16_t));
753
+ }
754
+ break;
755
+ #endif
756
+
757
+ #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
758
+ case SIZEOF_INT32_T:
759
+ while (len-- > 0) {
760
+ union {
761
+ int32_t i;
762
+ char a[sizeof(int32_t)];
763
+ } v;
764
+
765
+ from = NEXTFROM;
766
+ v.i = (int32_t)num2i32(from);
767
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
768
+ rb_str_buf_cat(res, v.a, sizeof(int32_t));
769
+ }
770
+ break;
771
+ #endif
772
+
773
+ #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
774
+ case SIZEOF_INT64_T:
775
+ while (len-- > 0) {
776
+ union {
777
+ int64_t i;
778
+ char a[sizeof(int64_t)];
779
+ } v;
780
+
781
+ from = NEXTFROM;
782
+ v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
783
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
784
+ rb_str_buf_cat(res, v.a, sizeof(int64_t));
785
+ }
786
+ break;
787
+ #endif
788
+
789
+ default:
790
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
791
+ rb_bug("unexpected intger size for pack: %d", integer_size);
792
+ while (len-- > 0) {
793
+ union {
794
+ unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
795
+ char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
796
+ } v;
797
+ int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
798
+ int i;
799
+
800
+ from = NEXTFROM;
801
+ rb_big_pack(from, v.i, num_longs);
802
+ if (bigendian_p) {
803
+ for (i = 0; i < num_longs/2; i++) {
804
+ unsigned long t = v.i[i];
805
+ v.i[i] = v.i[num_longs-1-i];
806
+ v.i[num_longs-1-i] = t;
807
+ }
808
+ }
809
+ if (bigendian_p != BIGENDIAN_P()) {
810
+ for (i = 0; i < num_longs; i++)
811
+ v.i[i] = swapl(v.i[i]);
812
+ }
813
+ rb_str_buf_cat(res,
814
+ bigendian_p ?
815
+ v.a + sizeof(long)*num_longs - integer_size :
816
+ v.a,
817
+ integer_size);
818
+ }
819
+ break;
820
+ }
821
+ break;
822
+
823
+ case 'f': /* single precision float in native format */
824
+ case 'F': /* ditto */
825
+ while (len-- > 0) {
826
+ float f;
827
+
828
+ from = NEXTFROM;
829
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
830
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
831
+ }
832
+ break;
833
+
834
+ case 'e': /* single precision float in VAX byte-order */
835
+ while (len-- > 0) {
836
+ float f;
837
+ FLOAT_CONVWITH(ftmp);
838
+
839
+ from = NEXTFROM;
840
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
841
+ f = HTOVF(f,ftmp);
842
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
843
+ }
844
+ break;
845
+
846
+ case 'E': /* double precision float in VAX byte-order */
847
+ while (len-- > 0) {
848
+ double d;
849
+ DOUBLE_CONVWITH(dtmp);
850
+
851
+ from = NEXTFROM;
852
+ d = RFLOAT_VALUE(rb_to_float(from));
853
+ d = HTOVD(d,dtmp);
854
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
855
+ }
856
+ break;
857
+
858
+ case 'd': /* double precision float in native format */
859
+ case 'D': /* ditto */
860
+ while (len-- > 0) {
861
+ double d;
862
+
863
+ from = NEXTFROM;
864
+ d = RFLOAT_VALUE(rb_to_float(from));
865
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
866
+ }
867
+ break;
868
+
869
+ case 'g': /* single precision float in network byte-order */
870
+ while (len-- > 0) {
871
+ float f;
872
+ FLOAT_CONVWITH(ftmp);
873
+
874
+ from = NEXTFROM;
875
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
876
+ f = HTONF(f,ftmp);
877
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
878
+ }
879
+ break;
880
+
881
+ case 'G': /* double precision float in network byte-order */
882
+ while (len-- > 0) {
883
+ double d;
884
+ DOUBLE_CONVWITH(dtmp);
885
+
886
+ from = NEXTFROM;
887
+ d = RFLOAT_VALUE(rb_to_float(from));
888
+ d = HTOND(d,dtmp);
889
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
890
+ }
891
+ break;
892
+
893
+ case 'x': /* null byte */
894
+ grow:
895
+ while (len >= 10) {
896
+ rb_str_buf_cat(res, nul10, 10);
897
+ len -= 10;
898
+ }
899
+ rb_str_buf_cat(res, nul10, len);
900
+ break;
901
+
902
+ case 'X': /* back up byte */
903
+ shrink:
904
+ plen = RSTRING_LEN(res);
905
+ if (plen < len)
906
+ rb_raise(rb_eArgError, "X outside of string");
907
+ rb_str_set_len(res, plen - len);
908
+ break;
909
+
910
+ case '@': /* null fill to absolute position */
911
+ len -= RSTRING_LEN(res);
912
+ if (len > 0) goto grow;
913
+ len = -len;
914
+ if (len > 0) goto shrink;
915
+ break;
916
+
917
+ case '%':
918
+ rb_raise(rb_eArgError, "%% is not supported");
919
+ break;
920
+
921
+ case 'U': /* Unicode character */
922
+ while (len-- > 0) {
923
+ SIGNED_VALUE l;
924
+ char buf[8];
925
+ int le;
926
+
927
+ from = NEXTFROM;
928
+ from = rb_to_int(from);
929
+ l = NUM2LONG(from);
930
+ if (l < 0) {
931
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
932
+ }
933
+ le = rb_uv_to_utf8(buf, l);
934
+ rb_str_buf_cat(res, (char*)buf, le);
935
+ }
936
+ break;
937
+
938
+ case 'u': /* uuencoded string */
939
+ case 'm': /* base64 encoded string */
940
+ from = NEXTFROM;
941
+ StringValue(from);
942
+ ptr = RSTRING_PTR(from);
943
+ plen = RSTRING_LEN(from);
944
+
945
+ if (len == 0 && type == 'm') {
946
+ encodes(res, ptr, plen, type, 0);
947
+ ptr += plen;
948
+ break;
949
+ }
950
+ if (len <= 2)
951
+ len = 45;
952
+ else if (len > 63 && type == 'u')
953
+ len = 63;
954
+ else
955
+ len = len / 3 * 3;
956
+ while (plen > 0) {
957
+ long todo;
958
+
959
+ if (plen > len)
960
+ todo = len;
961
+ else
962
+ todo = plen;
963
+ encodes(res, ptr, todo, type, 1);
964
+ plen -= todo;
965
+ ptr += todo;
966
+ }
967
+ break;
968
+
969
+ case 'M': /* quoted-printable encoded string */
970
+ from = rb_obj_as_string(NEXTFROM);
971
+ if (len <= 1)
972
+ len = 72;
973
+ qpencode(res, from, len);
974
+ break;
975
+
976
+ case 'P': /* pointer to packed byte string */
977
+ from = THISFROM;
978
+ if (!NIL_P(from)) {
979
+ StringValue(from);
980
+ if (RSTRING_LEN(from) < len) {
981
+ rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
982
+ RSTRING_LEN(from), len);
983
+ }
984
+ }
985
+ len = 1;
986
+ /* FALL THROUGH */
987
+ case 'p': /* pointer to string */
988
+ while (len-- > 0) {
989
+ char *t;
990
+ from = NEXTFROM;
991
+ if (NIL_P(from)) {
992
+ t = 0;
993
+ }
994
+ else {
995
+ t = StringValuePtr(from);
996
+ }
997
+ if (!associates) {
998
+ associates = rb_ary_new();
999
+ }
1000
+ rb_ary_push(associates, from);
1001
+ rb_obj_taint(from);
1002
+ rb_str_buf_cat(res, (char*)&t, sizeof(char*));
1003
+ }
1004
+ break;
1005
+
1006
+ case 'w': /* BER compressed integer */
1007
+ while (len-- > 0) {
1008
+ unsigned long ul;
1009
+ VALUE buf = rb_str_new(0, 0);
1010
+ char c, *bufs, *bufe;
1011
+
1012
+ from = NEXTFROM;
1013
+ if (RB_TYPE_P(from, T_BIGNUM)) {
1014
+ VALUE big128 = rb_uint2big(128);
1015
+ while (RB_TYPE_P(from, T_BIGNUM)) {
1016
+ from = rb_big_divmod(from, big128);
1017
+ c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
1018
+ rb_str_buf_cat(buf, &c, sizeof(char));
1019
+ from = RARRAY_PTR(from)[0]; /* div */
1020
+ }
1021
+ }
1022
+
1023
+ {
1024
+ long l = NUM2LONG(from);
1025
+ if (l < 0) {
1026
+ rb_raise(rb_eArgError, "can't compress negative numbers");
1027
+ }
1028
+ ul = l;
1029
+ }
1030
+
1031
+ while (ul) {
1032
+ c = castchar((ul & 0x7f) | 0x80);
1033
+ rb_str_buf_cat(buf, &c, sizeof(char));
1034
+ ul >>= 7;
1035
+ }
1036
+
1037
+ if (RSTRING_LEN(buf)) {
1038
+ bufs = RSTRING_PTR(buf);
1039
+ bufe = bufs + RSTRING_LEN(buf) - 1;
1040
+ *bufs &= 0x7f; /* clear continue bit */
1041
+ while (bufs < bufe) { /* reverse */
1042
+ c = *bufs;
1043
+ *bufs++ = *bufe;
1044
+ *bufe-- = c;
1045
+ }
1046
+ rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1047
+ }
1048
+ else {
1049
+ c = 0;
1050
+ rb_str_buf_cat(res, &c, sizeof(char));
1051
+ }
1052
+ }
1053
+ break;
1054
+
1055
+ default:
1056
+ rb_warning("unknown pack directive '%c' in '%s'",
1057
+ type, RSTRING_PTR(fmt));
1058
+ break;
1059
+ }
1060
+ }
1061
+
1062
+ if (associates) {
1063
+ rb_str_associate(res, associates);
1064
+ }
1065
+ OBJ_INFECT(res, fmt);
1066
+ switch (enc_info) {
1067
+ case 1:
1068
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
1069
+ break;
1070
+ case 2:
1071
+ rb_enc_set_index(res, rb_utf8_encindex());
1072
+ break;
1073
+ default:
1074
+ /* do nothing, keep ASCII-8BIT */
1075
+ break;
1076
+ }
1077
+ return res;
1078
+ }
1079
+
1080
+ static const char uu_table[] =
1081
+ "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
1082
+ static const char b64_table[] =
1083
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1084
+
1085
+ static void
1086
+ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1087
+ {
1088
+ char buff[4096];
1089
+ long i = 0;
1090
+ const char *trans = type == 'u' ? uu_table : b64_table;
1091
+ char padding;
1092
+
1093
+ if (type == 'u') {
1094
+ buff[i++] = (char)len + ' ';
1095
+ padding = '`';
1096
+ }
1097
+ else {
1098
+ padding = '=';
1099
+ }
1100
+ while (len >= 3) {
1101
+ while (len >= 3 && sizeof(buff)-i >= 4) {
1102
+ buff[i++] = trans[077 & (*s >> 2)];
1103
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1104
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1105
+ buff[i++] = trans[077 & s[2]];
1106
+ s += 3;
1107
+ len -= 3;
1108
+ }
1109
+ if (sizeof(buff)-i < 4) {
1110
+ rb_str_buf_cat(str, buff, i);
1111
+ i = 0;
1112
+ }
1113
+ }
1114
+
1115
+ if (len == 2) {
1116
+ buff[i++] = trans[077 & (*s >> 2)];
1117
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1118
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
1119
+ buff[i++] = padding;
1120
+ }
1121
+ else if (len == 1) {
1122
+ buff[i++] = trans[077 & (*s >> 2)];
1123
+ buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
1124
+ buff[i++] = padding;
1125
+ buff[i++] = padding;
1126
+ }
1127
+ if (tail_lf) buff[i++] = '\n';
1128
+ rb_str_buf_cat(str, buff, i);
1129
+ }
1130
+
1131
+ static const char hex_table[] = "0123456789ABCDEF";
1132
+
1133
+ static void
1134
+ qpencode(VALUE str, VALUE from, long len)
1135
+ {
1136
+ char buff[1024];
1137
+ long i = 0, n = 0, prev = EOF;
1138
+ unsigned char *s = (unsigned char*)RSTRING_PTR(from);
1139
+ unsigned char *send = s + RSTRING_LEN(from);
1140
+
1141
+ while (s < send) {
1142
+ if ((*s > 126) ||
1143
+ (*s < 32 && *s != '\n' && *s != '\t') ||
1144
+ (*s == '=')) {
1145
+ buff[i++] = '=';
1146
+ buff[i++] = hex_table[*s >> 4];
1147
+ buff[i++] = hex_table[*s & 0x0f];
1148
+ n += 3;
1149
+ prev = EOF;
1150
+ }
1151
+ else if (*s == '\n') {
1152
+ if (prev == ' ' || prev == '\t') {
1153
+ buff[i++] = '=';
1154
+ buff[i++] = *s;
1155
+ }
1156
+ buff[i++] = *s;
1157
+ n = 0;
1158
+ prev = *s;
1159
+ }
1160
+ else {
1161
+ buff[i++] = *s;
1162
+ n++;
1163
+ prev = *s;
1164
+ }
1165
+ if (n > len) {
1166
+ buff[i++] = '=';
1167
+ buff[i++] = '\n';
1168
+ n = 0;
1169
+ prev = '\n';
1170
+ }
1171
+ if (i > 1024 - 5) {
1172
+ rb_str_buf_cat(str, buff, i);
1173
+ i = 0;
1174
+ }
1175
+ s++;
1176
+ }
1177
+ if (n > 0) {
1178
+ buff[i++] = '=';
1179
+ buff[i++] = '\n';
1180
+ }
1181
+ if (i > 0) {
1182
+ rb_str_buf_cat(str, buff, i);
1183
+ }
1184
+ }
1185
+
1186
+ static inline int
1187
+ hex2num(char c)
1188
+ {
1189
+ switch (c) {
1190
+ case '0': case '1': case '2': case '3': case '4':
1191
+ case '5': case '6': case '7': case '8': case '9':
1192
+ return c - '0';
1193
+ case 'a': case 'b': case 'c':
1194
+ case 'd': case 'e': case 'f':
1195
+ return c - 'a' + 10;
1196
+ case 'A': case 'B': case 'C':
1197
+ case 'D': case 'E': case 'F':
1198
+ return c - 'A' + 10;
1199
+ default:
1200
+ return -1;
1201
+ }
1202
+ }
1203
+
1204
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1205
+ tmp_len = 0; \
1206
+ if (len > (long)((send-s)/(sz))) { \
1207
+ if (!star) { \
1208
+ tmp_len = len-(send-s)/(sz); \
1209
+ } \
1210
+ len = (send-s)/(sz); \
1211
+ } \
1212
+ } while (0)
1213
+
1214
+ #define PACK_ITEM_ADJUST() do { \
1215
+ if (tmp_len > 0 && !block_p) \
1216
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1217
+ } while (0)
1218
+
1219
+ static VALUE
1220
+ infected_str_new(const char *ptr, long len, VALUE str)
1221
+ {
1222
+ VALUE s = rb_str_new(ptr, len);
1223
+
1224
+ OBJ_INFECT(s, str);
1225
+ return s;
1226
+ }
1227
+
1228
+ /*
1229
+ * call-seq:
1230
+ * str.unpack(format) -> anArray
1231
+ *
1232
+ * Decodes <i>str</i> (which may contain binary data) according to the
1233
+ * format string, returning an array of each value extracted. The
1234
+ * format string consists of a sequence of single-character directives,
1235
+ * summarized in the table at the end of this entry.
1236
+ * Each directive may be followed
1237
+ * by a number, indicating the number of times to repeat with this
1238
+ * directive. An asterisk (``<code>*</code>'') will use up all
1239
+ * remaining elements. The directives <code>sSiIlL</code> may each be
1240
+ * followed by an underscore (``<code>_</code>'') or
1241
+ * exclamation mark (``<code>!</code>'') to use the underlying
1242
+ * platform's native size for the specified type; otherwise, it uses a
1243
+ * platform-independent consistent size. Spaces are ignored in the
1244
+ * format string. See also <code>Array#pack</code>.
1245
+ *
1246
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1247
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1248
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1249
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1250
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1251
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1252
+ * "now=20is".unpack('M*') #=> ["now is"]
1253
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1254
+ *
1255
+ * This table summarizes the various formats and the Ruby classes
1256
+ * returned by each.
1257
+ *
1258
+ * Integer | |
1259
+ * Directive | Returns | Meaning
1260
+ * -----------------------------------------------------------------
1261
+ * C | Integer | 8-bit unsigned (unsigned char)
1262
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
1263
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
1264
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1265
+ * | |
1266
+ * c | Integer | 8-bit signed (signed char)
1267
+ * s | Integer | 16-bit signed, native endian (int16_t)
1268
+ * l | Integer | 32-bit signed, native endian (int32_t)
1269
+ * q | Integer | 64-bit signed, native endian (int64_t)
1270
+ * | |
1271
+ * S_, S! | Integer | unsigned short, native endian
1272
+ * I, I_, I! | Integer | unsigned int, native endian
1273
+ * L_, L! | Integer | unsigned long, native endian
1274
+ * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
1275
+ * | | if the platform has no long long type.)
1276
+ * | | (Q_ and Q! is available since Ruby 2.1.)
1277
+ * | |
1278
+ * s_, s! | Integer | signed short, native endian
1279
+ * i, i_, i! | Integer | signed int, native endian
1280
+ * l_, l! | Integer | signed long, native endian
1281
+ * q_, q! | Integer | signed long long, native endian (ArgumentError
1282
+ * | | if the platform has no long long type.)
1283
+ * | | (q_ and q! is available since Ruby 2.1.)
1284
+ * | |
1285
+ * S> L> Q> | Integer | same as the directives without ">" except
1286
+ * s> l> q> | | big endian
1287
+ * S!> I!> | | (available since Ruby 1.9.3)
1288
+ * L!> Q!> | | "S>" is same as "n"
1289
+ * s!> i!> | | "L>" is same as "N"
1290
+ * l!> q!> | |
1291
+ * | |
1292
+ * S< L< Q< | Integer | same as the directives without "<" except
1293
+ * s< l< q< | | little endian
1294
+ * S!< I!< | | (available since Ruby 1.9.3)
1295
+ * L!< Q!< | | "S<" is same as "v"
1296
+ * s!< i!< | | "L<" is same as "V"
1297
+ * l!< q!< | |
1298
+ * | |
1299
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
1300
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
1301
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1302
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1303
+ * | |
1304
+ * U | Integer | UTF-8 character
1305
+ * w | Integer | BER-compressed integer (see Array.pack)
1306
+ *
1307
+ * Float | |
1308
+ * Directive | Returns | Meaning
1309
+ * -----------------------------------------------------------------
1310
+ * D, d | Float | double-precision, native format
1311
+ * F, f | Float | single-precision, native format
1312
+ * E | Float | double-precision, little-endian byte order
1313
+ * e | Float | single-precision, little-endian byte order
1314
+ * G | Float | double-precision, network (big-endian) byte order
1315
+ * g | Float | single-precision, network (big-endian) byte order
1316
+ *
1317
+ * String | |
1318
+ * Directive | Returns | Meaning
1319
+ * -----------------------------------------------------------------
1320
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1321
+ * a | String | arbitrary binary string
1322
+ * Z | String | null-terminated string
1323
+ * B | String | bit string (MSB first)
1324
+ * b | String | bit string (LSB first)
1325
+ * H | String | hex string (high nibble first)
1326
+ * h | String | hex string (low nibble first)
1327
+ * u | String | UU-encoded string
1328
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
1329
+ * m | String | base64 encoded string (RFC 2045) (default)
1330
+ * | | base64 encoded string (RFC 4648) if followed by 0
1331
+ * P | String | pointer to a structure (fixed-length string)
1332
+ * p | String | pointer to a null-terminated string
1333
+ *
1334
+ * Misc. | |
1335
+ * Directive | Returns | Meaning
1336
+ * -----------------------------------------------------------------
1337
+ * @ | --- | skip to the offset given by the length argument
1338
+ * X | --- | skip backward one byte
1339
+ * x | --- | skip forward one byte
1340
+ */
1341
+
1342
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1343
+ {
1344
+ static const char hexdigits[] = "0123456789abcdef";
1345
+ char *init_s, *s, *send;
1346
+ char *p, *pend;
1347
+ VALUE ary;
1348
+ char type;
1349
+ long len, tmp_len;
1350
+ int star;
1351
+ #ifdef NATINT_PACK
1352
+ int natint; /* native integer */
1353
+ #endif
1354
+ int block_p = rb_block_given_p();
1355
+ int signed_p, integer_size, bigendian_p;
1356
+ #define UNPACK_PUSH(item) do {\
1357
+ VALUE item_val = (item);\
1358
+ if (block_p) {\
1359
+ rb_yield(item_val);\
1360
+ }\
1361
+ else {\
1362
+ rb_ary_push(ary, item_val);\
1363
+ }\
1364
+ } while (0)
1365
+
1366
+ // StringValue(str);
1367
+ StringValue(fmt);
1368
+ init_s = s = RSTRING_PTR(str);
1369
+ send = s + RSTRING_LEN(str);
1370
+ p = RSTRING_PTR(fmt);
1371
+ pend = p + RSTRING_LEN(fmt);
1372
+
1373
+ ary = block_p ? Qnil : rb_ary_new();
1374
+ while (p < pend) {
1375
+ int explicit_endian = 0;
1376
+ type = *p++;
1377
+ #ifdef NATINT_PACK
1378
+ natint = 0;
1379
+ #endif
1380
+
1381
+ if (ISSPACE(type)) continue;
1382
+ if (type == '#') {
1383
+ while ((p < pend) && (*p != '\n')) {
1384
+ p++;
1385
+ }
1386
+ continue;
1387
+ }
1388
+
1389
+ star = 0;
1390
+ {
1391
+ modifiers:
1392
+ switch (*p) {
1393
+ case '_':
1394
+ case '!':
1395
+
1396
+ if (strchr(natstr, type)) {
1397
+ #ifdef NATINT_PACK
1398
+ natint = 1;
1399
+ #endif
1400
+ p++;
1401
+ }
1402
+ else {
1403
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1404
+ }
1405
+ goto modifiers;
1406
+
1407
+ case '<':
1408
+ case '>':
1409
+ if (!strchr(endstr, type)) {
1410
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1411
+ }
1412
+ if (explicit_endian) {
1413
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1414
+ }
1415
+ explicit_endian = *p++;
1416
+ goto modifiers;
1417
+ }
1418
+ }
1419
+
1420
+ if (p >= pend)
1421
+ len = 1;
1422
+ else if (*p == '*') {
1423
+ star = 1;
1424
+ len = send - s;
1425
+ p++;
1426
+ }
1427
+ else if (ISDIGIT(*p)) {
1428
+ errno = 0;
1429
+ len = STRTOUL(p, (char**)&p, 10);
1430
+ if (errno) {
1431
+ rb_raise(rb_eRangeError, "pack length too big");
1432
+ }
1433
+ }
1434
+ else {
1435
+ len = (type != '@');
1436
+ }
1437
+
1438
+ switch (type) {
1439
+ case '%':
1440
+ rb_raise(rb_eArgError, "%% is not supported");
1441
+ break;
1442
+
1443
+ case 'A':
1444
+ if (len > send - s) len = send - s;
1445
+ {
1446
+ long end = len;
1447
+ char *t = s + len - 1;
1448
+
1449
+ while (t >= s) {
1450
+ if (*t != ' ' && *t != '\0') break;
1451
+ t--; len--;
1452
+ }
1453
+ UNPACK_PUSH(infected_str_new(s, len, str));
1454
+ s += end;
1455
+ }
1456
+ break;
1457
+
1458
+ case 'Z':
1459
+ {
1460
+ char *t = s;
1461
+
1462
+ if (len > send-s) len = send-s;
1463
+ while (t < s+len && *t) t++;
1464
+ UNPACK_PUSH(infected_str_new(s, t-s, str));
1465
+ if (t < send) t++;
1466
+ s = star ? t : s+len;
1467
+ }
1468
+ break;
1469
+
1470
+ case 'a':
1471
+ if (len > send - s) len = send - s;
1472
+ UNPACK_PUSH(infected_str_new(s, len, str));
1473
+ s += len;
1474
+ break;
1475
+
1476
+ case 'b':
1477
+ {
1478
+ VALUE bitstr;
1479
+ char *t;
1480
+ int bits;
1481
+ long i;
1482
+
1483
+ if (p[-1] == '*' || len > (send - s) * 8)
1484
+ len = (send - s) * 8;
1485
+ bits = 0;
1486
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1487
+ t = RSTRING_PTR(bitstr);
1488
+ for (i=0; i<len; i++) {
1489
+ if (i & 7) bits >>= 1;
1490
+ else bits = *s++;
1491
+ *t++ = (bits & 1) ? '1' : '0';
1492
+ }
1493
+ }
1494
+ break;
1495
+
1496
+ case 'B':
1497
+ {
1498
+ VALUE bitstr;
1499
+ char *t;
1500
+ int bits;
1501
+ long i;
1502
+
1503
+ if (p[-1] == '*' || len > (send - s) * 8)
1504
+ len = (send - s) * 8;
1505
+ bits = 0;
1506
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1507
+ t = RSTRING_PTR(bitstr);
1508
+ for (i=0; i<len; i++) {
1509
+ if (i & 7) bits <<= 1;
1510
+ else bits = *s++;
1511
+ *t++ = (bits & 128) ? '1' : '0';
1512
+ }
1513
+ }
1514
+ break;
1515
+
1516
+ case 'h':
1517
+ {
1518
+ VALUE bitstr;
1519
+ char *t;
1520
+ int bits;
1521
+ long i;
1522
+
1523
+ if (p[-1] == '*' || len > (send - s) * 2)
1524
+ len = (send - s) * 2;
1525
+ bits = 0;
1526
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1527
+ t = RSTRING_PTR(bitstr);
1528
+ for (i=0; i<len; i++) {
1529
+ if (i & 1)
1530
+ bits >>= 4;
1531
+ else
1532
+ bits = *s++;
1533
+ *t++ = hexdigits[bits & 15];
1534
+ }
1535
+ }
1536
+ break;
1537
+
1538
+ case 'H':
1539
+ {
1540
+ VALUE bitstr;
1541
+ char *t;
1542
+ int bits;
1543
+ long i;
1544
+
1545
+ if (p[-1] == '*' || len > (send - s) * 2)
1546
+ len = (send - s) * 2;
1547
+ bits = 0;
1548
+ UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
1549
+ t = RSTRING_PTR(bitstr);
1550
+ for (i=0; i<len; i++) {
1551
+ if (i & 1)
1552
+ bits <<= 4;
1553
+ else
1554
+ bits = *s++;
1555
+ *t++ = hexdigits[(bits >> 4) & 15];
1556
+ }
1557
+ }
1558
+ break;
1559
+
1560
+ case 'c':
1561
+ PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1562
+ while (len-- > 0) {
1563
+ int c = *s++;
1564
+ if (c > (char)127) c-=256;
1565
+ UNPACK_PUSH(INT2FIX(c));
1566
+ }
1567
+ PACK_ITEM_ADJUST();
1568
+ break;
1569
+
1570
+ case 'C':
1571
+ PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1572
+ while (len-- > 0) {
1573
+ unsigned char c = *s++;
1574
+ UNPACK_PUSH(INT2FIX(c));
1575
+ }
1576
+ PACK_ITEM_ADJUST();
1577
+ break;
1578
+
1579
+ case 's':
1580
+ signed_p = 1;
1581
+ integer_size = NATINT_LEN(short, 2);
1582
+ bigendian_p = BIGENDIAN_P();
1583
+ goto unpack_integer;
1584
+
1585
+ case 'S':
1586
+ signed_p = 0;
1587
+ integer_size = NATINT_LEN(short, 2);
1588
+ bigendian_p = BIGENDIAN_P();
1589
+ goto unpack_integer;
1590
+
1591
+ case 'i':
1592
+ signed_p = 1;
1593
+ integer_size = (int)sizeof(int);
1594
+ bigendian_p = BIGENDIAN_P();
1595
+ goto unpack_integer;
1596
+
1597
+ case 'I':
1598
+ signed_p = 0;
1599
+ integer_size = (int)sizeof(int);
1600
+ bigendian_p = BIGENDIAN_P();
1601
+ goto unpack_integer;
1602
+
1603
+ case 'l':
1604
+ signed_p = 1;
1605
+ integer_size = NATINT_LEN(long, 4);
1606
+ bigendian_p = BIGENDIAN_P();
1607
+ goto unpack_integer;
1608
+
1609
+ case 'L':
1610
+ signed_p = 0;
1611
+ integer_size = NATINT_LEN(long, 4);
1612
+ bigendian_p = BIGENDIAN_P();
1613
+ goto unpack_integer;
1614
+
1615
+ case 'q':
1616
+ signed_p = 1;
1617
+ integer_size = NATINT_LEN_Q;
1618
+ bigendian_p = BIGENDIAN_P();
1619
+ goto unpack_integer;
1620
+
1621
+ case 'Q':
1622
+ signed_p = 0;
1623
+ integer_size = NATINT_LEN_Q;
1624
+ bigendian_p = BIGENDIAN_P();
1625
+ goto unpack_integer;
1626
+
1627
+ case 'n':
1628
+ signed_p = 0;
1629
+ integer_size = 2;
1630
+ bigendian_p = 1;
1631
+ goto unpack_integer;
1632
+
1633
+ case 'N':
1634
+ signed_p = 0;
1635
+ integer_size = 4;
1636
+ bigendian_p = 1;
1637
+ goto unpack_integer;
1638
+
1639
+ case 'v':
1640
+ signed_p = 0;
1641
+ integer_size = 2;
1642
+ bigendian_p = 0;
1643
+ goto unpack_integer;
1644
+
1645
+ case 'V':
1646
+ signed_p = 0;
1647
+ integer_size = 4;
1648
+ bigendian_p = 0;
1649
+ goto unpack_integer;
1650
+
1651
+ unpack_integer:
1652
+ if (explicit_endian) {
1653
+ bigendian_p = explicit_endian == '>';
1654
+ }
1655
+
1656
+ switch (integer_size) {
1657
+ #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1658
+ case SIZEOF_INT16_T:
1659
+ if (signed_p) {
1660
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1661
+ while (len-- > 0) {
1662
+ union {
1663
+ int16_t i;
1664
+ char a[sizeof(int16_t)];
1665
+ } v;
1666
+ memcpy(v.a, s, sizeof(int16_t));
1667
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1668
+ s += sizeof(int16_t);
1669
+ UNPACK_PUSH(INT2FIX(v.i));
1670
+ }
1671
+ PACK_ITEM_ADJUST();
1672
+ }
1673
+ else {
1674
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1675
+ while (len-- > 0) {
1676
+ union {
1677
+ uint16_t i;
1678
+ char a[sizeof(uint16_t)];
1679
+ } v;
1680
+ memcpy(v.a, s, sizeof(uint16_t));
1681
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1682
+ s += sizeof(uint16_t);
1683
+ UNPACK_PUSH(INT2FIX(v.i));
1684
+ }
1685
+ PACK_ITEM_ADJUST();
1686
+ }
1687
+ break;
1688
+ #endif
1689
+
1690
+ #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1691
+ case SIZEOF_INT32_T:
1692
+ if (signed_p) {
1693
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1694
+ while (len-- > 0) {
1695
+ union {
1696
+ int32_t i;
1697
+ char a[sizeof(int32_t)];
1698
+ } v;
1699
+ memcpy(v.a, s, sizeof(int32_t));
1700
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1701
+ s += sizeof(int32_t);
1702
+ UNPACK_PUSH(INT2NUM(v.i));
1703
+ }
1704
+ PACK_ITEM_ADJUST();
1705
+ }
1706
+ else {
1707
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
1708
+ while (len-- > 0) {
1709
+ union {
1710
+ uint32_t i;
1711
+ char a[sizeof(uint32_t)];
1712
+ } v;
1713
+ memcpy(v.a, s, sizeof(uint32_t));
1714
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1715
+ s += sizeof(uint32_t);
1716
+ UNPACK_PUSH(UINT2NUM(v.i));
1717
+ }
1718
+ PACK_ITEM_ADJUST();
1719
+ }
1720
+ break;
1721
+ #endif
1722
+
1723
+ #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1724
+ case SIZEOF_INT64_T:
1725
+ if (signed_p) {
1726
+ PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1727
+ while (len-- > 0) {
1728
+ union {
1729
+ int64_t i;
1730
+ char a[sizeof(int64_t)];
1731
+ } v;
1732
+ memcpy(v.a, s, sizeof(int64_t));
1733
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1734
+ s += sizeof(int64_t);
1735
+ UNPACK_PUSH(INT64toNUM(v.i));
1736
+ }
1737
+ PACK_ITEM_ADJUST();
1738
+ }
1739
+ else {
1740
+ PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
1741
+ while (len-- > 0) {
1742
+ union {
1743
+ uint64_t i;
1744
+ char a[sizeof(uint64_t)];
1745
+ } v;
1746
+ memcpy(v.a, s, sizeof(uint64_t));
1747
+ if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1748
+ s += sizeof(uint64_t);
1749
+ UNPACK_PUSH(UINT64toNUM(v.i));
1750
+ }
1751
+ PACK_ITEM_ADJUST();
1752
+ }
1753
+ break;
1754
+ #endif
1755
+
1756
+ default:
1757
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
1758
+ rb_bug("unexpected integer size for pack: %d", integer_size);
1759
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
1760
+ while (len-- > 0) {
1761
+ union {
1762
+ unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1763
+ char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1764
+ } v;
1765
+ int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1766
+ int i;
1767
+
1768
+ if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1769
+ memset(v.a, 0xff, sizeof(long)*num_longs);
1770
+ else
1771
+ memset(v.a, 0, sizeof(long)*num_longs);
1772
+ if (bigendian_p)
1773
+ memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1774
+ else
1775
+ memcpy(v.a, s, integer_size);
1776
+ if (bigendian_p) {
1777
+ for (i = 0; i < num_longs/2; i++) {
1778
+ unsigned long t = v.i[i];
1779
+ v.i[i] = v.i[num_longs-1-i];
1780
+ v.i[num_longs-1-i] = t;
1781
+ }
1782
+ }
1783
+ if (bigendian_p != BIGENDIAN_P()) {
1784
+ for (i = 0; i < num_longs; i++)
1785
+ v.i[i] = swapl(v.i[i]);
1786
+ }
1787
+ s += integer_size;
1788
+ UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1789
+ }
1790
+ PACK_ITEM_ADJUST();
1791
+ break;
1792
+ }
1793
+ break;
1794
+
1795
+ case 'f':
1796
+ case 'F':
1797
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1798
+ while (len-- > 0) {
1799
+ float tmp;
1800
+ memcpy(&tmp, s, sizeof(float));
1801
+ s += sizeof(float);
1802
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1803
+ }
1804
+ PACK_ITEM_ADJUST();
1805
+ break;
1806
+
1807
+ case 'e':
1808
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1809
+ while (len-- > 0) {
1810
+ float tmp;
1811
+ FLOAT_CONVWITH(ftmp);
1812
+
1813
+ memcpy(&tmp, s, sizeof(float));
1814
+ s += sizeof(float);
1815
+ tmp = VTOHF(tmp,ftmp);
1816
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1817
+ }
1818
+ PACK_ITEM_ADJUST();
1819
+ break;
1820
+
1821
+ case 'E':
1822
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1823
+ while (len-- > 0) {
1824
+ double tmp;
1825
+ DOUBLE_CONVWITH(dtmp);
1826
+
1827
+ memcpy(&tmp, s, sizeof(double));
1828
+ s += sizeof(double);
1829
+ tmp = VTOHD(tmp,dtmp);
1830
+ UNPACK_PUSH(DBL2NUM(tmp));
1831
+ }
1832
+ PACK_ITEM_ADJUST();
1833
+ break;
1834
+
1835
+ case 'D':
1836
+ case 'd':
1837
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1838
+ while (len-- > 0) {
1839
+ double tmp;
1840
+ memcpy(&tmp, s, sizeof(double));
1841
+ s += sizeof(double);
1842
+ UNPACK_PUSH(DBL2NUM(tmp));
1843
+ }
1844
+ PACK_ITEM_ADJUST();
1845
+ break;
1846
+
1847
+ case 'g':
1848
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1849
+ while (len-- > 0) {
1850
+ float tmp;
1851
+ FLOAT_CONVWITH(ftmp);
1852
+
1853
+ memcpy(&tmp, s, sizeof(float));
1854
+ s += sizeof(float);
1855
+ tmp = NTOHF(tmp,ftmp);
1856
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1857
+ }
1858
+ PACK_ITEM_ADJUST();
1859
+ break;
1860
+
1861
+ case 'G':
1862
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1863
+ while (len-- > 0) {
1864
+ double tmp;
1865
+ DOUBLE_CONVWITH(dtmp);
1866
+
1867
+ memcpy(&tmp, s, sizeof(double));
1868
+ s += sizeof(double);
1869
+ tmp = NTOHD(tmp,dtmp);
1870
+ UNPACK_PUSH(DBL2NUM(tmp));
1871
+ }
1872
+ PACK_ITEM_ADJUST();
1873
+ break;
1874
+
1875
+ case 'U':
1876
+ if (len > send - s) len = send - s;
1877
+ while (len > 0 && s < send) {
1878
+ long alen = send - s;
1879
+ unsigned long l;
1880
+
1881
+ l = utf8_to_uv(s, &alen);
1882
+ s += alen; len--;
1883
+ UNPACK_PUSH(ULONG2NUM(l));
1884
+ }
1885
+ break;
1886
+
1887
+ case 'u':
1888
+ {
1889
+ VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1890
+ char *ptr = RSTRING_PTR(buf);
1891
+ long total = 0;
1892
+
1893
+ while (s < send && *s > ' ' && *s < 'a') {
1894
+ long a,b,c,d;
1895
+ char hunk[4];
1896
+
1897
+ hunk[3] = '\0';
1898
+ len = (*s++ - ' ') & 077;
1899
+ total += len;
1900
+ if (total > RSTRING_LEN(buf)) {
1901
+ len -= total - RSTRING_LEN(buf);
1902
+ total = RSTRING_LEN(buf);
1903
+ }
1904
+
1905
+ while (len > 0) {
1906
+ long mlen = len > 3 ? 3 : len;
1907
+
1908
+ if (s < send && *s >= ' ')
1909
+ a = (*s++ - ' ') & 077;
1910
+ else
1911
+ a = 0;
1912
+ if (s < send && *s >= ' ')
1913
+ b = (*s++ - ' ') & 077;
1914
+ else
1915
+ b = 0;
1916
+ if (s < send && *s >= ' ')
1917
+ c = (*s++ - ' ') & 077;
1918
+ else
1919
+ c = 0;
1920
+ if (s < send && *s >= ' ')
1921
+ d = (*s++ - ' ') & 077;
1922
+ else
1923
+ d = 0;
1924
+ hunk[0] = (char)(a << 2 | b >> 4);
1925
+ hunk[1] = (char)(b << 4 | c >> 2);
1926
+ hunk[2] = (char)(c << 6 | d);
1927
+ memcpy(ptr, hunk, mlen);
1928
+ ptr += mlen;
1929
+ len -= mlen;
1930
+ }
1931
+ if (*s == '\r') s++;
1932
+ if (*s == '\n') s++;
1933
+ else if (s < send && (s+1 == send || s[1] == '\n'))
1934
+ s += 2; /* possible checksum byte */
1935
+ }
1936
+
1937
+ rb_str_set_len(buf, total);
1938
+ UNPACK_PUSH(buf);
1939
+ }
1940
+ break;
1941
+
1942
+ case 'm':
1943
+ {
1944
+ VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
1945
+ char *ptr = RSTRING_PTR(buf);
1946
+ int a = -1,b = -1,c = 0,d = 0;
1947
+ static signed char b64_xtable[256];
1948
+
1949
+ if (b64_xtable['/'] <= 0) {
1950
+ int i;
1951
+
1952
+ for (i = 0; i < 256; i++) {
1953
+ b64_xtable[i] = -1;
1954
+ }
1955
+ for (i = 0; i < 64; i++) {
1956
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1957
+ }
1958
+ }
1959
+ if (len == 0) {
1960
+ while (s < send) {
1961
+ a = b = c = d = -1;
1962
+ a = b64_xtable[(unsigned char)*s++];
1963
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1964
+ b = b64_xtable[(unsigned char)*s++];
1965
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1966
+ if (*s == '=') {
1967
+ if (s + 2 == send && *(s + 1) == '=') break;
1968
+ rb_raise(rb_eArgError, "invalid base64");
1969
+ }
1970
+ c = b64_xtable[(unsigned char)*s++];
1971
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1972
+ if (s + 1 == send && *s == '=') break;
1973
+ d = b64_xtable[(unsigned char)*s++];
1974
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1975
+ *ptr++ = castchar(a << 2 | b >> 4);
1976
+ *ptr++ = castchar(b << 4 | c >> 2);
1977
+ *ptr++ = castchar(c << 6 | d);
1978
+ }
1979
+ if (c == -1) {
1980
+ *ptr++ = castchar(a << 2 | b >> 4);
1981
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1982
+ }
1983
+ else if (d == -1) {
1984
+ *ptr++ = castchar(a << 2 | b >> 4);
1985
+ *ptr++ = castchar(b << 4 | c >> 2);
1986
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1987
+ }
1988
+ }
1989
+ else {
1990
+ while (s < send) {
1991
+ a = b = c = d = -1;
1992
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1993
+ if (s >= send) break;
1994
+ s++;
1995
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1996
+ if (s >= send) break;
1997
+ s++;
1998
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1999
+ if (*s == '=' || s >= send) break;
2000
+ s++;
2001
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
2002
+ if (*s == '=' || s >= send) break;
2003
+ s++;
2004
+ *ptr++ = castchar(a << 2 | b >> 4);
2005
+ *ptr++ = castchar(b << 4 | c >> 2);
2006
+ *ptr++ = castchar(c << 6 | d);
2007
+ a = -1;
2008
+ }
2009
+ if (a != -1 && b != -1) {
2010
+ if (c == -1)
2011
+ *ptr++ = castchar(a << 2 | b >> 4);
2012
+ else {
2013
+ *ptr++ = castchar(a << 2 | b >> 4);
2014
+ *ptr++ = castchar(b << 4 | c >> 2);
2015
+ }
2016
+ }
2017
+ }
2018
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2019
+ UNPACK_PUSH(buf);
2020
+ }
2021
+ break;
2022
+
2023
+ case 'M':
2024
+ {
2025
+ VALUE buf = infected_str_new(0, send - s, str);
2026
+ char *ptr = RSTRING_PTR(buf), *ss = s;
2027
+ int c1, c2;
2028
+
2029
+ while (s < send) {
2030
+ if (*s == '=') {
2031
+ if (++s == send) break;
2032
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
2033
+ s++;
2034
+ if (*s != '\n') {
2035
+ if ((c1 = hex2num(*s)) == -1) break;
2036
+ if (++s == send) break;
2037
+ if ((c2 = hex2num(*s)) == -1) break;
2038
+ *ptr++ = castchar(c1 << 4 | c2);
2039
+ }
2040
+ }
2041
+ else {
2042
+ *ptr++ = *s;
2043
+ }
2044
+ s++;
2045
+ ss = s;
2046
+ }
2047
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
2048
+ rb_str_buf_cat(buf, ss, send-ss);
2049
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
2050
+ UNPACK_PUSH(buf);
2051
+ }
2052
+ break;
2053
+
2054
+ case '@':
2055
+ if (len > RSTRING_LEN(str))
2056
+ rb_raise(rb_eArgError, "@ outside of string");
2057
+ s = RSTRING_PTR(str) + len;
2058
+ break;
2059
+
2060
+ case 'X':
2061
+ if (len > s - RSTRING_PTR(str))
2062
+ rb_raise(rb_eArgError, "X outside of string");
2063
+ s -= len;
2064
+ break;
2065
+
2066
+ case 'x':
2067
+ if (len > send - s)
2068
+ rb_raise(rb_eArgError, "x outside of string");
2069
+ s += len;
2070
+ break;
2071
+
2072
+ case 'P':
2073
+ if (sizeof(char *) <= (size_t)(send - s)) {
2074
+ VALUE tmp = Qnil;
2075
+ char *t;
2076
+
2077
+ memcpy(&t, s, sizeof(char *));
2078
+ s += sizeof(char *);
2079
+
2080
+ if (t) {
2081
+ VALUE a, *p, *pend;
2082
+
2083
+ if (!(a = rb_str_associated(str))) {
2084
+ rb_raise(rb_eArgError, "no associated pointer");
2085
+ }
2086
+ p = RARRAY_PTR(a);
2087
+ pend = p + RARRAY_LEN(a);
2088
+ while (p < pend) {
2089
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2090
+ if (len < RSTRING_LEN(*p)) {
2091
+ tmp = rb_tainted_str_new(t, len);
2092
+ rb_str_associate(tmp, a);
2093
+ }
2094
+ else {
2095
+ tmp = *p;
2096
+ }
2097
+ break;
2098
+ }
2099
+ p++;
2100
+ }
2101
+ if (p == pend) {
2102
+ rb_raise(rb_eArgError, "non associated pointer");
2103
+ }
2104
+ }
2105
+ UNPACK_PUSH(tmp);
2106
+ }
2107
+ break;
2108
+
2109
+ case 'p':
2110
+ if (len > (long)((send - s) / sizeof(char *)))
2111
+ len = (send - s) / sizeof(char *);
2112
+ while (len-- > 0) {
2113
+ if ((size_t)(send - s) < sizeof(char *))
2114
+ break;
2115
+ else {
2116
+ VALUE tmp = Qnil;
2117
+ char *t;
2118
+
2119
+ memcpy(&t, s, sizeof(char *));
2120
+ s += sizeof(char *);
2121
+
2122
+ if (t) {
2123
+ VALUE a, *p, *pend;
2124
+
2125
+ if (!(a = rb_str_associated(str))) {
2126
+ rb_raise(rb_eArgError, "no associated pointer");
2127
+ }
2128
+ p = RARRAY_PTR(a);
2129
+ pend = p + RARRAY_LEN(a);
2130
+ while (p < pend) {
2131
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2132
+ tmp = *p;
2133
+ break;
2134
+ }
2135
+ p++;
2136
+ }
2137
+ if (p == pend) {
2138
+ rb_raise(rb_eArgError, "non associated pointer");
2139
+ }
2140
+ }
2141
+ UNPACK_PUSH(tmp);
2142
+ }
2143
+ }
2144
+ break;
2145
+
2146
+ case 'w':
2147
+ {
2148
+ unsigned long ul = 0;
2149
+ unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2150
+
2151
+ while (len > 0 && s < send) {
2152
+ ul <<= 7;
2153
+ ul |= (*s & 0x7f);
2154
+ if (!(*s++ & 0x80)) {
2155
+ UNPACK_PUSH(ULONG2NUM(ul));
2156
+ len--;
2157
+ ul = 0;
2158
+ }
2159
+ else if (ul & ulmask) {
2160
+ VALUE big = rb_uint2big(ul);
2161
+ VALUE big128 = rb_uint2big(128);
2162
+ while (s < send) {
2163
+ big = rb_big_mul(big, big128);
2164
+ big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2165
+ if (!(*s++ & 0x80)) {
2166
+ UNPACK_PUSH(big);
2167
+ len--;
2168
+ ul = 0;
2169
+ break;
2170
+ }
2171
+ }
2172
+ }
2173
+ }
2174
+ }
2175
+ break;
2176
+
2177
+ default:
2178
+ rb_warning("unknown unpack directive '%c' in '%s'",
2179
+ type, RSTRING_PTR(fmt));
2180
+ break;
2181
+ }
2182
+ }
2183
+
2184
+ *parsed_len = s - init_s;
2185
+ return ary;
2186
+ }
2187
+
2188
+ #define BYTEWIDTH 8
2189
+
2190
+ int
2191
+ rb_uv_to_utf8(char buf[6], unsigned long uv)
2192
+ {
2193
+ if (uv <= 0x7f) {
2194
+ buf[0] = (char)uv;
2195
+ return 1;
2196
+ }
2197
+ if (uv <= 0x7ff) {
2198
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
2199
+ buf[1] = castchar((uv&0x3f)|0x80);
2200
+ return 2;
2201
+ }
2202
+ if (uv <= 0xffff) {
2203
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
2204
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
2205
+ buf[2] = castchar((uv&0x3f)|0x80);
2206
+ return 3;
2207
+ }
2208
+ if (uv <= 0x1fffff) {
2209
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
2210
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
2211
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
2212
+ buf[3] = castchar((uv&0x3f)|0x80);
2213
+ return 4;
2214
+ }
2215
+ if (uv <= 0x3ffffff) {
2216
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
2217
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
2218
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
2219
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
2220
+ buf[4] = castchar((uv&0x3f)|0x80);
2221
+ return 5;
2222
+ }
2223
+ if (uv <= 0x7fffffff) {
2224
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
2225
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
2226
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
2227
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
2228
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
2229
+ buf[5] = castchar((uv&0x3f)|0x80);
2230
+ return 6;
2231
+ }
2232
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
2233
+
2234
+ UNREACHABLE;
2235
+ }
2236
+
2237
+ static const unsigned long utf8_limits[] = {
2238
+ 0x0, /* 1 */
2239
+ 0x80, /* 2 */
2240
+ 0x800, /* 3 */
2241
+ 0x10000, /* 4 */
2242
+ 0x200000, /* 5 */
2243
+ 0x4000000, /* 6 */
2244
+ 0x80000000, /* 7 */
2245
+ };
2246
+
2247
+ static unsigned long
2248
+ utf8_to_uv(const char *p, long *lenp)
2249
+ {
2250
+ int c = *p++ & 0xff;
2251
+ unsigned long uv = c;
2252
+ long n;
2253
+
2254
+ if (!(uv & 0x80)) {
2255
+ *lenp = 1;
2256
+ return uv;
2257
+ }
2258
+ if (!(uv & 0x40)) {
2259
+ *lenp = 1;
2260
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2261
+ }
2262
+
2263
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
2264
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
2265
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
2266
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
2267
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
2268
+ else {
2269
+ *lenp = 1;
2270
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2271
+ }
2272
+ if (n > *lenp) {
2273
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
2274
+ n, *lenp);
2275
+ }
2276
+ *lenp = n--;
2277
+ if (n != 0) {
2278
+ while (n--) {
2279
+ c = *p++ & 0xff;
2280
+ if ((c & 0xc0) != 0x80) {
2281
+ *lenp -= n + 1;
2282
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
2283
+ }
2284
+ else {
2285
+ c &= 0x3f;
2286
+ uv = uv << 6 | c;
2287
+ }
2288
+ }
2289
+ }
2290
+ n = *lenp - 1;
2291
+ if (uv < utf8_limits[n]) {
2292
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
2293
+ }
2294
+ return uv;
2295
+ }