zscan 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,2010 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author: naruse $
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "ruby/encoding.h"
13
+ #include "internal.h"
14
+ #include <sys/types.h>
15
+ #include <ctype.h>
16
+ #include <errno.h>
17
+
18
+ /*
19
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
20
+ * instead of HAVE_LONG_LONG or LONG_LONG.
21
+ * This means q! and Q! means always the standard long long type and
22
+ * causes ArgumentError for platforms which has no long long type,
23
+ * even if the platform has an implementation specific 64bit type.
24
+ * This behavior is consistent with the document of pack/unpack.
25
+ */
26
+ #ifdef HAVE_TRUE_LONG_LONG
27
+ static const char natstr[] = "sSiIlLqQjJ";
28
+ #else
29
+ static const char natstr[] = "sSiIlLjJ";
30
+ #endif
31
+ static const char endstr[] = "sSiIlLqQjJ";
32
+
33
+ #ifdef HAVE_TRUE_LONG_LONG
34
+ /* It is intentional to use long long instead of LONG_LONG. */
35
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
36
+ #else
37
+ # define NATINT_LEN_Q 8
38
+ #endif
39
+
40
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
41
+ # define NATINT_PACK
42
+ #endif
43
+
44
+ #ifdef DYNAMIC_ENDIAN
45
+ /* for universal binary of NEXTSTEP and MacOS X */
46
+ /* useless since autoconf 2.63? */
47
+ static int
48
+ is_bigendian(void)
49
+ {
50
+ static int init = 0;
51
+ static int endian_value;
52
+ char *p;
53
+
54
+ if (init) return endian_value;
55
+ init = 1;
56
+ p = (char*)&init;
57
+ return endian_value = p[0]?0:1;
58
+ }
59
+ # define BIGENDIAN_P() (is_bigendian())
60
+ #elif defined(WORDS_BIGENDIAN)
61
+ # define BIGENDIAN_P() 1
62
+ #else
63
+ # define BIGENDIAN_P() 0
64
+ #endif
65
+
66
+ #ifdef NATINT_PACK
67
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
68
+ #else
69
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
70
+ #endif
71
+
72
+ typedef union {
73
+ float f;
74
+ uint32_t u;
75
+ char buf[4];
76
+ } FLOAT_SWAPPER;
77
+ typedef union {
78
+ double d;
79
+ uint64_t u;
80
+ char buf[8];
81
+ } DOUBLE_SWAPPER;
82
+ #define swapf(x) swap32(x)
83
+ #define swapd(x) swap64(x)
84
+
85
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
86
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
87
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
88
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
89
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
90
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
91
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
92
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
93
+
94
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
95
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
96
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
97
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
98
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
99
+
100
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
101
+ #define HTOND(x) ((x).u = rb_htond((x).u))
102
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
103
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
104
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
105
+
106
+ #define MAX_INTEGER_PACK_SIZE 8
107
+
108
+ static const char toofew[] = "too few arguments";
109
+
110
+ static void encodes(VALUE,const char*,long,int,int);
111
+ static void qpencode(VALUE,VALUE,long);
112
+
113
+ static unsigned long utf8_to_uv(const char*,long*);
114
+
115
+ static ID id_associated;
116
+
117
+ static void
118
+ str_associate(VALUE str, VALUE add)
119
+ {
120
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
121
+ rb_ivar_set(str, id_associated, add);
122
+ }
123
+
124
+ static VALUE
125
+ str_associated(VALUE str)
126
+ {
127
+ return rb_ivar_lookup(str, id_associated, Qfalse);
128
+ }
129
+
130
+ /*
131
+ * call-seq:
132
+ * arr.pack( aTemplateString ) -> aBinaryString
133
+ * arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
134
+ *
135
+ * Packs the contents of <i>arr</i> into a binary sequence according to
136
+ * the directives in <i>aTemplateString</i> (see the table below)
137
+ * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
138
+ * which gives the width of the resulting field. The remaining
139
+ * directives also may take a count, indicating the number of array
140
+ * elements to convert. If the count is an asterisk
141
+ * (``<code>*</code>''), all remaining array elements will be
142
+ * converted. Any of the directives ``<code>sSiIlL</code>'' may be
143
+ * followed by an underscore (``<code>_</code>'') or
144
+ * exclamation mark (``<code>!</code>'') to use the underlying
145
+ * platform's native size for the specified type; otherwise, they use a
146
+ * platform-independent size. Spaces are ignored in the template
147
+ * string. See also <code>String#unpack</code>.
148
+ *
149
+ * a = [ "a", "b", "c" ]
150
+ * n = [ 65, 66, 67 ]
151
+ * a.pack("A3A3A3") #=> "a b c "
152
+ * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
153
+ * n.pack("ccc") #=> "ABC"
154
+ *
155
+ * If <i>aBufferString</i> is specified and its capacity is enough,
156
+ * +pack+ uses it as the buffer and returns it.
157
+ * When the offset is specified by the beginning of <i>aTemplateString</i>,
158
+ * the result is filled after the offset.
159
+ * If original contents of <i>aBufferString</i> exists and it's longer than
160
+ * the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
161
+ * If it's shorter, the gap is filled with ``<code>\0</code>''.
162
+ *
163
+ * Note that ``buffer:'' option does not guarantee not to allocate memory
164
+ * in +pack+. If the capacity of <i>aBufferString</i> is not enough,
165
+ * +pack+ allocates memory.
166
+ *
167
+ * Directives for +pack+.
168
+ *
169
+ * Integer | Array |
170
+ * Directive | Element | Meaning
171
+ * ----------------------------------------------------------------------------
172
+ * C | Integer | 8-bit unsigned (unsigned char)
173
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
174
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
175
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
176
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
177
+ * | | (J is available since Ruby 2.3.)
178
+ * | |
179
+ * c | Integer | 8-bit signed (signed char)
180
+ * s | Integer | 16-bit signed, native endian (int16_t)
181
+ * l | Integer | 32-bit signed, native endian (int32_t)
182
+ * q | Integer | 64-bit signed, native endian (int64_t)
183
+ * j | Integer | pointer width signed, native endian (intptr_t)
184
+ * | | (j is available since Ruby 2.3.)
185
+ * | |
186
+ * S_ S! | Integer | unsigned short, native endian
187
+ * I I_ I! | Integer | unsigned int, native endian
188
+ * L_ L! | Integer | unsigned long, native endian
189
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
190
+ * | | if the platform has no long long type.)
191
+ * | | (Q_ and Q! is available since Ruby 2.1.)
192
+ * J! | Integer | uintptr_t, native endian (same with J)
193
+ * | | (J! is available since Ruby 2.3.)
194
+ * | |
195
+ * s_ s! | Integer | signed short, native endian
196
+ * i i_ i! | Integer | signed int, native endian
197
+ * l_ l! | Integer | signed long, native endian
198
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
199
+ * | | if the platform has no long long type.)
200
+ * | | (q_ and q! is available since Ruby 2.1.)
201
+ * j! | Integer | intptr_t, native endian (same with j)
202
+ * | | (j! is available since Ruby 2.3.)
203
+ * | |
204
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
205
+ * L> l> L!> l!> | | big endian
206
+ * I!> i!> | | (available since Ruby 1.9.3)
207
+ * Q> q> Q!> q!> | | "S>" is same as "n"
208
+ * J> j> J!> j!> | | "L>" is same as "N"
209
+ * | |
210
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
211
+ * L< l< L!< l!< | | little endian
212
+ * I!< i!< | | (available since Ruby 1.9.3)
213
+ * Q< q< Q!< q!< | | "S<" is same as "v"
214
+ * J< j< J!< j!< | | "L<" is same as "V"
215
+ * | |
216
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
217
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
218
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
219
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
220
+ * | |
221
+ * U | Integer | UTF-8 character
222
+ * w | Integer | BER-compressed integer
223
+ *
224
+ * Float | Array |
225
+ * Directive | Element | Meaning
226
+ * ---------------------------------------------------------------------------
227
+ * D d | Float | double-precision, native format
228
+ * F f | Float | single-precision, native format
229
+ * E | Float | double-precision, little-endian byte order
230
+ * e | Float | single-precision, little-endian byte order
231
+ * G | Float | double-precision, network (big-endian) byte order
232
+ * g | Float | single-precision, network (big-endian) byte order
233
+ *
234
+ * String | Array |
235
+ * Directive | Element | Meaning
236
+ * ---------------------------------------------------------------------------
237
+ * A | String | arbitrary binary string (space padded, count is width)
238
+ * a | String | arbitrary binary string (null padded, count is width)
239
+ * Z | String | same as ``a'', except that null is added with *
240
+ * B | String | bit string (MSB first)
241
+ * b | String | bit string (LSB first)
242
+ * H | String | hex string (high nibble first)
243
+ * h | String | hex string (low nibble first)
244
+ * u | String | UU-encoded string
245
+ * M | String | quoted printable, MIME encoding (see also RFC2045)
246
+ * | | (text mode but input must use LF and output LF)
247
+ * m | String | base64 encoded string (see RFC 2045, count is width)
248
+ * | | (if count is 0, no line feed are added, see RFC 4648)
249
+ * P | String | pointer to a structure (fixed-length string)
250
+ * p | String | pointer to a null-terminated string
251
+ *
252
+ * Misc. | Array |
253
+ * Directive | Element | Meaning
254
+ * ---------------------------------------------------------------------------
255
+ * @ | --- | moves to absolute position
256
+ * X | --- | back up a byte
257
+ * x | --- | null byte
258
+ */
259
+
260
+ static VALUE
261
+ pack_pack(int argc, VALUE *argv, VALUE ary)
262
+ {
263
+ static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
264
+ static const char spc10[] = " ";
265
+ const char *p, *pend;
266
+ VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0;
267
+ char type;
268
+ long len, idx, plen;
269
+ const char *ptr;
270
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
271
+ #ifdef NATINT_PACK
272
+ int natint; /* native integer */
273
+ #endif
274
+ int integer_size, bigendian_p;
275
+
276
+ rb_scan_args(argc, argv, "10:", &fmt, &opt);
277
+
278
+ StringValue(fmt);
279
+ p = RSTRING_PTR(fmt);
280
+ pend = p + RSTRING_LEN(fmt);
281
+ if (!NIL_P(opt)) {
282
+ static ID keyword_ids[1];
283
+ if (!keyword_ids[0])
284
+ CONST_ID(keyword_ids[0], "buffer");
285
+
286
+ rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer);
287
+
288
+ if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING))
289
+ rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
290
+ }
291
+ if (buffer)
292
+ res = buffer;
293
+ else
294
+ res = rb_str_buf_new(0);
295
+
296
+ idx = 0;
297
+
298
+ #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
299
+ #define MORE_ITEM (idx < RARRAY_LEN(ary))
300
+ #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
301
+ #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
302
+
303
+ while (p < pend) {
304
+ int explicit_endian = 0;
305
+ if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
306
+ rb_raise(rb_eRuntimeError, "format string modified");
307
+ }
308
+ type = *p++; /* get data type */
309
+ #ifdef NATINT_PACK
310
+ natint = 0;
311
+ #endif
312
+
313
+ if (ISSPACE(type)) continue;
314
+ if (type == '#') {
315
+ while ((p < pend) && (*p != '\n')) {
316
+ p++;
317
+ }
318
+ continue;
319
+ }
320
+
321
+ {
322
+ modifiers:
323
+ switch (*p) {
324
+ case '_':
325
+ case '!':
326
+ if (strchr(natstr, type)) {
327
+ #ifdef NATINT_PACK
328
+ natint = 1;
329
+ #endif
330
+ p++;
331
+ }
332
+ else {
333
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
334
+ }
335
+ goto modifiers;
336
+
337
+ case '<':
338
+ case '>':
339
+ if (!strchr(endstr, type)) {
340
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
341
+ }
342
+ if (explicit_endian) {
343
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
344
+ }
345
+ explicit_endian = *p++;
346
+ goto modifiers;
347
+ }
348
+ }
349
+
350
+ if (*p == '*') { /* set data length */
351
+ len = strchr("@Xxu", type) ? 0
352
+ : strchr("PMm", type) ? 1
353
+ : RARRAY_LEN(ary) - idx;
354
+ p++;
355
+ }
356
+ else if (ISDIGIT(*p)) {
357
+ errno = 0;
358
+ len = STRTOUL(p, (char**)&p, 10);
359
+ if (errno) {
360
+ rb_raise(rb_eRangeError, "pack length too big");
361
+ }
362
+ }
363
+ else {
364
+ len = 1;
365
+ }
366
+
367
+ switch (type) {
368
+ case 'U':
369
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
370
+ if (enc_info == 1) enc_info = 2;
371
+ break;
372
+ case 'm': case 'M': case 'u':
373
+ /* keep US-ASCII (do nothing) */
374
+ break;
375
+ default:
376
+ /* fall back to BINARY */
377
+ enc_info = 0;
378
+ break;
379
+ }
380
+ switch (type) {
381
+ case 'A': case 'a': case 'Z':
382
+ case 'B': case 'b':
383
+ case 'H': case 'h':
384
+ from = NEXTFROM;
385
+ if (NIL_P(from)) {
386
+ ptr = "";
387
+ plen = 0;
388
+ }
389
+ else {
390
+ StringValue(from);
391
+ ptr = RSTRING_PTR(from);
392
+ plen = RSTRING_LEN(from);
393
+ OBJ_INFECT(res, from);
394
+ }
395
+
396
+ if (p[-1] == '*')
397
+ len = plen;
398
+
399
+ switch (type) {
400
+ case 'a': /* arbitrary binary string (null padded) */
401
+ case 'A': /* arbitrary binary string (ASCII space padded) */
402
+ case 'Z': /* null terminated string */
403
+ if (plen >= len) {
404
+ rb_str_buf_cat(res, ptr, len);
405
+ if (p[-1] == '*' && type == 'Z')
406
+ rb_str_buf_cat(res, nul10, 1);
407
+ }
408
+ else {
409
+ rb_str_buf_cat(res, ptr, plen);
410
+ len -= plen;
411
+ while (len >= 10) {
412
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
413
+ len -= 10;
414
+ }
415
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
416
+ }
417
+ break;
418
+
419
+ #define castchar(from) (char)((from) & 0xff)
420
+
421
+ case 'b': /* bit string (ascending) */
422
+ {
423
+ int byte = 0;
424
+ long i, j = 0;
425
+
426
+ if (len > plen) {
427
+ j = (len - plen + 1)/2;
428
+ len = plen;
429
+ }
430
+ for (i=0; i++ < len; ptr++) {
431
+ if (*ptr & 1)
432
+ byte |= 128;
433
+ if (i & 7)
434
+ byte >>= 1;
435
+ else {
436
+ char c = castchar(byte);
437
+ rb_str_buf_cat(res, &c, 1);
438
+ byte = 0;
439
+ }
440
+ }
441
+ if (len & 7) {
442
+ char c;
443
+ byte >>= 7 - (len & 7);
444
+ c = castchar(byte);
445
+ rb_str_buf_cat(res, &c, 1);
446
+ }
447
+ len = j;
448
+ goto grow;
449
+ }
450
+ break;
451
+
452
+ case 'B': /* bit string (descending) */
453
+ {
454
+ int byte = 0;
455
+ long i, j = 0;
456
+
457
+ if (len > plen) {
458
+ j = (len - plen + 1)/2;
459
+ len = plen;
460
+ }
461
+ for (i=0; i++ < len; ptr++) {
462
+ byte |= *ptr & 1;
463
+ if (i & 7)
464
+ byte <<= 1;
465
+ else {
466
+ char c = castchar(byte);
467
+ rb_str_buf_cat(res, &c, 1);
468
+ byte = 0;
469
+ }
470
+ }
471
+ if (len & 7) {
472
+ char c;
473
+ byte <<= 7 - (len & 7);
474
+ c = castchar(byte);
475
+ rb_str_buf_cat(res, &c, 1);
476
+ }
477
+ len = j;
478
+ goto grow;
479
+ }
480
+ break;
481
+
482
+ case 'h': /* hex string (low nibble first) */
483
+ {
484
+ int byte = 0;
485
+ long i, j = 0;
486
+
487
+ if (len > plen) {
488
+ j = (len + 1) / 2 - (plen + 1) / 2;
489
+ len = plen;
490
+ }
491
+ for (i=0; i++ < len; ptr++) {
492
+ if (ISALPHA(*ptr))
493
+ byte |= (((*ptr & 15) + 9) & 15) << 4;
494
+ else
495
+ byte |= (*ptr & 15) << 4;
496
+ if (i & 1)
497
+ byte >>= 4;
498
+ else {
499
+ char c = castchar(byte);
500
+ rb_str_buf_cat(res, &c, 1);
501
+ byte = 0;
502
+ }
503
+ }
504
+ if (len & 1) {
505
+ char c = castchar(byte);
506
+ rb_str_buf_cat(res, &c, 1);
507
+ }
508
+ len = j;
509
+ goto grow;
510
+ }
511
+ break;
512
+
513
+ case 'H': /* hex string (high nibble first) */
514
+ {
515
+ int byte = 0;
516
+ long i, j = 0;
517
+
518
+ if (len > plen) {
519
+ j = (len + 1) / 2 - (plen + 1) / 2;
520
+ len = plen;
521
+ }
522
+ for (i=0; i++ < len; ptr++) {
523
+ if (ISALPHA(*ptr))
524
+ byte |= ((*ptr & 15) + 9) & 15;
525
+ else
526
+ byte |= *ptr & 15;
527
+ if (i & 1)
528
+ byte <<= 4;
529
+ else {
530
+ char c = castchar(byte);
531
+ rb_str_buf_cat(res, &c, 1);
532
+ byte = 0;
533
+ }
534
+ }
535
+ if (len & 1) {
536
+ char c = castchar(byte);
537
+ rb_str_buf_cat(res, &c, 1);
538
+ }
539
+ len = j;
540
+ goto grow;
541
+ }
542
+ break;
543
+ }
544
+ break;
545
+
546
+ case 'c': /* signed char */
547
+ case 'C': /* unsigned char */
548
+ integer_size = 1;
549
+ bigendian_p = BIGENDIAN_P(); /* not effective */
550
+ goto pack_integer;
551
+
552
+ case 's': /* s for int16_t, s! for signed short */
553
+ integer_size = NATINT_LEN(short, 2);
554
+ bigendian_p = BIGENDIAN_P();
555
+ goto pack_integer;
556
+
557
+ case 'S': /* S for uint16_t, S! for unsigned short */
558
+ integer_size = NATINT_LEN(short, 2);
559
+ bigendian_p = BIGENDIAN_P();
560
+ goto pack_integer;
561
+
562
+ case 'i': /* i and i! for signed int */
563
+ integer_size = (int)sizeof(int);
564
+ bigendian_p = BIGENDIAN_P();
565
+ goto pack_integer;
566
+
567
+ case 'I': /* I and I! for unsigned int */
568
+ integer_size = (int)sizeof(int);
569
+ bigendian_p = BIGENDIAN_P();
570
+ goto pack_integer;
571
+
572
+ case 'l': /* l for int32_t, l! for signed long */
573
+ integer_size = NATINT_LEN(long, 4);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto pack_integer;
576
+
577
+ case 'L': /* L for uint32_t, L! for unsigned long */
578
+ integer_size = NATINT_LEN(long, 4);
579
+ bigendian_p = BIGENDIAN_P();
580
+ goto pack_integer;
581
+
582
+ case 'q': /* q for int64_t, q! for signed long long */
583
+ integer_size = NATINT_LEN_Q;
584
+ bigendian_p = BIGENDIAN_P();
585
+ goto pack_integer;
586
+
587
+ case 'Q': /* Q for uint64_t, Q! for unsigned long long */
588
+ integer_size = NATINT_LEN_Q;
589
+ bigendian_p = BIGENDIAN_P();
590
+ goto pack_integer;
591
+
592
+ case 'j': /* j for intptr_t */
593
+ integer_size = sizeof(intptr_t);
594
+ bigendian_p = BIGENDIAN_P();
595
+ goto pack_integer;
596
+
597
+ case 'J': /* J for uintptr_t */
598
+ integer_size = sizeof(uintptr_t);
599
+ bigendian_p = BIGENDIAN_P();
600
+ goto pack_integer;
601
+
602
+ case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
603
+ integer_size = 2;
604
+ bigendian_p = 1;
605
+ goto pack_integer;
606
+
607
+ case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
608
+ integer_size = 4;
609
+ bigendian_p = 1;
610
+ goto pack_integer;
611
+
612
+ case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
613
+ integer_size = 2;
614
+ bigendian_p = 0;
615
+ goto pack_integer;
616
+
617
+ case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
618
+ integer_size = 4;
619
+ bigendian_p = 0;
620
+ goto pack_integer;
621
+
622
+ pack_integer:
623
+ if (explicit_endian) {
624
+ bigendian_p = explicit_endian == '>';
625
+ }
626
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
627
+ rb_bug("unexpected intger size for pack: %d", integer_size);
628
+ while (len-- > 0) {
629
+ char intbuf[MAX_INTEGER_PACK_SIZE];
630
+
631
+ from = NEXTFROM;
632
+ rb_integer_pack(from, intbuf, integer_size, 1, 0,
633
+ INTEGER_PACK_2COMP |
634
+ (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
635
+ rb_str_buf_cat(res, intbuf, integer_size);
636
+ }
637
+ break;
638
+
639
+ case 'f': /* single precision float in native format */
640
+ case 'F': /* ditto */
641
+ while (len-- > 0) {
642
+ float f;
643
+
644
+ from = NEXTFROM;
645
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
646
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
647
+ }
648
+ break;
649
+
650
+ case 'e': /* single precision float in VAX byte-order */
651
+ while (len-- > 0) {
652
+ FLOAT_CONVWITH(tmp);
653
+
654
+ from = NEXTFROM;
655
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
656
+ HTOVF(tmp);
657
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
658
+ }
659
+ break;
660
+
661
+ case 'E': /* double precision float in VAX byte-order */
662
+ while (len-- > 0) {
663
+ DOUBLE_CONVWITH(tmp);
664
+ from = NEXTFROM;
665
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
666
+ HTOVD(tmp);
667
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
668
+ }
669
+ break;
670
+
671
+ case 'd': /* double precision float in native format */
672
+ case 'D': /* ditto */
673
+ while (len-- > 0) {
674
+ double d;
675
+
676
+ from = NEXTFROM;
677
+ d = RFLOAT_VALUE(rb_to_float(from));
678
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
679
+ }
680
+ break;
681
+
682
+ case 'g': /* single precision float in network byte-order */
683
+ while (len-- > 0) {
684
+ FLOAT_CONVWITH(tmp);
685
+ from = NEXTFROM;
686
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
687
+ HTONF(tmp);
688
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
689
+ }
690
+ break;
691
+
692
+ case 'G': /* double precision float in network byte-order */
693
+ while (len-- > 0) {
694
+ DOUBLE_CONVWITH(tmp);
695
+
696
+ from = NEXTFROM;
697
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
698
+ HTOND(tmp);
699
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
700
+ }
701
+ break;
702
+
703
+ case 'x': /* null byte */
704
+ grow:
705
+ while (len >= 10) {
706
+ rb_str_buf_cat(res, nul10, 10);
707
+ len -= 10;
708
+ }
709
+ rb_str_buf_cat(res, nul10, len);
710
+ break;
711
+
712
+ case 'X': /* back up byte */
713
+ shrink:
714
+ plen = RSTRING_LEN(res);
715
+ if (plen < len)
716
+ rb_raise(rb_eArgError, "X outside of string");
717
+ rb_str_set_len(res, plen - len);
718
+ break;
719
+
720
+ case '@': /* null fill to absolute position */
721
+ len -= RSTRING_LEN(res);
722
+ if (len > 0) goto grow;
723
+ len = -len;
724
+ if (len > 0) goto shrink;
725
+ break;
726
+
727
+ case '%':
728
+ rb_raise(rb_eArgError, "%% is not supported");
729
+ break;
730
+
731
+ case 'U': /* Unicode character */
732
+ while (len-- > 0) {
733
+ SIGNED_VALUE l;
734
+ char buf[8];
735
+ int le;
736
+
737
+ from = NEXTFROM;
738
+ from = rb_to_int(from);
739
+ l = NUM2LONG(from);
740
+ if (l < 0) {
741
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
742
+ }
743
+ le = rb_uv_to_utf8(buf, l);
744
+ rb_str_buf_cat(res, (char*)buf, le);
745
+ }
746
+ break;
747
+
748
+ case 'u': /* uuencoded string */
749
+ case 'm': /* base64 encoded string */
750
+ from = NEXTFROM;
751
+ StringValue(from);
752
+ ptr = RSTRING_PTR(from);
753
+ plen = RSTRING_LEN(from);
754
+
755
+ if (len == 0 && type == 'm') {
756
+ encodes(res, ptr, plen, type, 0);
757
+ ptr += plen;
758
+ break;
759
+ }
760
+ if (len <= 2)
761
+ len = 45;
762
+ else if (len > 63 && type == 'u')
763
+ len = 63;
764
+ else
765
+ len = len / 3 * 3;
766
+ while (plen > 0) {
767
+ long todo;
768
+
769
+ if (plen > len)
770
+ todo = len;
771
+ else
772
+ todo = plen;
773
+ encodes(res, ptr, todo, type, 1);
774
+ plen -= todo;
775
+ ptr += todo;
776
+ }
777
+ break;
778
+
779
+ case 'M': /* quoted-printable encoded string */
780
+ from = rb_obj_as_string(NEXTFROM);
781
+ if (len <= 1)
782
+ len = 72;
783
+ qpencode(res, from, len);
784
+ break;
785
+
786
+ case 'P': /* pointer to packed byte string */
787
+ from = THISFROM;
788
+ if (!NIL_P(from)) {
789
+ StringValue(from);
790
+ if (RSTRING_LEN(from) < len) {
791
+ rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
792
+ RSTRING_LEN(from), len);
793
+ }
794
+ }
795
+ len = 1;
796
+ /* FALL THROUGH */
797
+ case 'p': /* pointer to string */
798
+ while (len-- > 0) {
799
+ char *t;
800
+ from = NEXTFROM;
801
+ if (NIL_P(from)) {
802
+ t = 0;
803
+ }
804
+ else {
805
+ t = StringValuePtr(from);
806
+ rb_obj_taint(from);
807
+ }
808
+ if (!associates) {
809
+ associates = rb_ary_new();
810
+ }
811
+ rb_ary_push(associates, from);
812
+ rb_str_buf_cat(res, (char*)&t, sizeof(char*));
813
+ }
814
+ break;
815
+
816
+ case 'w': /* BER compressed integer */
817
+ while (len-- > 0) {
818
+ VALUE buf = rb_str_new(0, 0);
819
+ size_t numbytes;
820
+ int sign;
821
+ char *cp;
822
+
823
+ from = NEXTFROM;
824
+ from = rb_to_int(from);
825
+ numbytes = rb_absint_numwords(from, 7, NULL);
826
+ if (numbytes == 0)
827
+ numbytes = 1;
828
+ buf = rb_str_new(NULL, numbytes);
829
+
830
+ sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
831
+
832
+ if (sign < 0)
833
+ rb_raise(rb_eArgError, "can't compress negative numbers");
834
+ if (sign == 2)
835
+ rb_bug("buffer size problem?");
836
+
837
+ cp = RSTRING_PTR(buf);
838
+ while (1 < numbytes) {
839
+ *cp |= 0x80;
840
+ cp++;
841
+ numbytes--;
842
+ }
843
+
844
+ rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
845
+ }
846
+ break;
847
+
848
+ default: {
849
+ char unknown[5];
850
+ if (ISPRINT(type)) {
851
+ unknown[0] = type;
852
+ unknown[1] = '\0';
853
+ }
854
+ else {
855
+ snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
856
+ }
857
+ rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'",
858
+ unknown, fmt);
859
+ break;
860
+ }
861
+ }
862
+ }
863
+
864
+ if (associates) {
865
+ str_associate(res, associates);
866
+ }
867
+ OBJ_INFECT(res, fmt);
868
+ switch (enc_info) {
869
+ case 1:
870
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
871
+ break;
872
+ case 2:
873
+ rb_enc_set_index(res, rb_utf8_encindex());
874
+ break;
875
+ default:
876
+ /* do nothing, keep ASCII-8BIT */
877
+ break;
878
+ }
879
+ return res;
880
+ }
881
+
882
+ static const char uu_table[] =
883
+ "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
884
+ static const char b64_table[] =
885
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
886
+
887
+ static void
888
+ encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
889
+ {
890
+ enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
891
+ char buff[buff_size + 1]; /* +1 for tail_lf */
892
+ long i = 0;
893
+ const char *const trans = type == 'u' ? uu_table : b64_table;
894
+ char padding;
895
+ const unsigned char *s = (const unsigned char *)s0;
896
+
897
+ if (type == 'u') {
898
+ buff[i++] = (char)len + ' ';
899
+ padding = '`';
900
+ }
901
+ else {
902
+ padding = '=';
903
+ }
904
+ while (len >= input_unit) {
905
+ while (len >= input_unit && buff_size-i >= encoded_unit) {
906
+ buff[i++] = trans[077 & (*s >> 2)];
907
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
908
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
909
+ buff[i++] = trans[077 & s[2]];
910
+ s += input_unit;
911
+ len -= input_unit;
912
+ }
913
+ if (buff_size-i < encoded_unit) {
914
+ rb_str_buf_cat(str, buff, i);
915
+ i = 0;
916
+ }
917
+ }
918
+
919
+ if (len == 2) {
920
+ buff[i++] = trans[077 & (*s >> 2)];
921
+ buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
922
+ buff[i++] = trans[077 & (((s[1] << 2) & 074) | (('\0' >> 6) & 03))];
923
+ buff[i++] = padding;
924
+ }
925
+ else if (len == 1) {
926
+ buff[i++] = trans[077 & (*s >> 2)];
927
+ buff[i++] = trans[077 & (((*s << 4) & 060) | (('\0' >> 4) & 017))];
928
+ buff[i++] = padding;
929
+ buff[i++] = padding;
930
+ }
931
+ if (tail_lf) buff[i++] = '\n';
932
+ rb_str_buf_cat(str, buff, i);
933
+ if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
934
+ }
935
+
936
+ static const char hex_table[] = "0123456789ABCDEF";
937
+
938
+ static void
939
+ qpencode(VALUE str, VALUE from, long len)
940
+ {
941
+ char buff[1024];
942
+ long i = 0, n = 0, prev = EOF;
943
+ unsigned char *s = (unsigned char*)RSTRING_PTR(from);
944
+ unsigned char *send = s + RSTRING_LEN(from);
945
+
946
+ while (s < send) {
947
+ if ((*s > 126) ||
948
+ (*s < 32 && *s != '\n' && *s != '\t') ||
949
+ (*s == '=')) {
950
+ buff[i++] = '=';
951
+ buff[i++] = hex_table[*s >> 4];
952
+ buff[i++] = hex_table[*s & 0x0f];
953
+ n += 3;
954
+ prev = EOF;
955
+ }
956
+ else if (*s == '\n') {
957
+ if (prev == ' ' || prev == '\t') {
958
+ buff[i++] = '=';
959
+ buff[i++] = *s;
960
+ }
961
+ buff[i++] = *s;
962
+ n = 0;
963
+ prev = *s;
964
+ }
965
+ else {
966
+ buff[i++] = *s;
967
+ n++;
968
+ prev = *s;
969
+ }
970
+ if (n > len) {
971
+ buff[i++] = '=';
972
+ buff[i++] = '\n';
973
+ n = 0;
974
+ prev = '\n';
975
+ }
976
+ if (i > 1024 - 5) {
977
+ rb_str_buf_cat(str, buff, i);
978
+ i = 0;
979
+ }
980
+ s++;
981
+ }
982
+ if (n > 0) {
983
+ buff[i++] = '=';
984
+ buff[i++] = '\n';
985
+ }
986
+ if (i > 0) {
987
+ rb_str_buf_cat(str, buff, i);
988
+ }
989
+ }
990
+
991
+ static inline int
992
+ hex2num(char c)
993
+ {
994
+ int n;
995
+ n = ruby_digit36_to_number_table[(unsigned char)c];
996
+ if (16 <= n)
997
+ n = -1;
998
+ return n;
999
+ }
1000
+
1001
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
1002
+ tmp_len = 0; \
1003
+ if (len > (long)((send-s)/(sz))) { \
1004
+ if (!star) { \
1005
+ tmp_len = len-(send-s)/(sz); \
1006
+ } \
1007
+ len = (send-s)/(sz); \
1008
+ } \
1009
+ } while (0)
1010
+
1011
+ #define PACK_ITEM_ADJUST() do { \
1012
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
1013
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1014
+ } while (0)
1015
+
1016
+ /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
1017
+ * 12.4/12.5/12.6 C compiler optimization bug
1018
+ * with "-xO4" optimization option.
1019
+ */
1020
+ #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
1021
+ # define AVOID_CC_BUG volatile
1022
+ #else
1023
+ # define AVOID_CC_BUG
1024
+ #endif
1025
+
1026
+ static VALUE
1027
+ infected_str_new(const char *ptr, long len, VALUE str)
1028
+ {
1029
+ VALUE s = rb_str_new(ptr, len);
1030
+
1031
+ OBJ_INFECT(s, str);
1032
+ return s;
1033
+ }
1034
+
1035
+ /* unpack mode */
1036
+ #define UNPACK_ARRAY 0
1037
+ #define UNPACK_BLOCK 1
1038
+ #define UNPACK_1 2
1039
+
1040
+ static VALUE
1041
+ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
1042
+ {
1043
+ #define hexdigits ruby_hexdigits
1044
+ char *s, *send;
1045
+ char *p, *pend;
1046
+ VALUE ary;
1047
+ char type;
1048
+ long len;
1049
+ AVOID_CC_BUG long tmp_len;
1050
+ int star;
1051
+ #ifdef NATINT_PACK
1052
+ int natint; /* native integer */
1053
+ #endif
1054
+ int signed_p, integer_size, bigendian_p;
1055
+ #define UNPACK_PUSH(item) do {\
1056
+ VALUE item_val = (item);\
1057
+ if ((mode) == UNPACK_BLOCK) {\
1058
+ rb_yield(item_val);\
1059
+ }\
1060
+ else if ((mode) == UNPACK_ARRAY) {\
1061
+ rb_ary_push(ary, item_val);\
1062
+ }\
1063
+ else /* if ((mode) == UNPACK_1) { */ {\
1064
+ return item_val; \
1065
+ }\
1066
+ } while (0)
1067
+
1068
+ StringValue(str);
1069
+ StringValue(fmt);
1070
+ s = RSTRING_PTR(str);
1071
+ send = s + RSTRING_LEN(str);
1072
+ p = RSTRING_PTR(fmt);
1073
+ pend = p + RSTRING_LEN(fmt);
1074
+
1075
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1076
+ while (p < pend) {
1077
+ int explicit_endian = 0;
1078
+ type = *p++;
1079
+ #ifdef NATINT_PACK
1080
+ natint = 0;
1081
+ #endif
1082
+
1083
+ if (ISSPACE(type)) continue;
1084
+ if (type == '#') {
1085
+ while ((p < pend) && (*p != '\n')) {
1086
+ p++;
1087
+ }
1088
+ continue;
1089
+ }
1090
+
1091
+ star = 0;
1092
+ {
1093
+ modifiers:
1094
+ switch (*p) {
1095
+ case '_':
1096
+ case '!':
1097
+
1098
+ if (strchr(natstr, type)) {
1099
+ #ifdef NATINT_PACK
1100
+ natint = 1;
1101
+ #endif
1102
+ p++;
1103
+ }
1104
+ else {
1105
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
1106
+ }
1107
+ goto modifiers;
1108
+
1109
+ case '<':
1110
+ case '>':
1111
+ if (!strchr(endstr, type)) {
1112
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
1113
+ }
1114
+ if (explicit_endian) {
1115
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
1116
+ }
1117
+ explicit_endian = *p++;
1118
+ goto modifiers;
1119
+ }
1120
+ }
1121
+
1122
+ if (p >= pend)
1123
+ len = 1;
1124
+ else if (*p == '*') {
1125
+ star = 1;
1126
+ len = send - s;
1127
+ p++;
1128
+ }
1129
+ else if (ISDIGIT(*p)) {
1130
+ errno = 0;
1131
+ len = STRTOUL(p, (char**)&p, 10);
1132
+ if (len < 0 || errno) {
1133
+ rb_raise(rb_eRangeError, "pack length too big");
1134
+ }
1135
+ }
1136
+ else {
1137
+ len = (type != '@');
1138
+ }
1139
+
1140
+ switch (type) {
1141
+ case '%':
1142
+ rb_raise(rb_eArgError, "%% is not supported");
1143
+ break;
1144
+
1145
+ case 'A':
1146
+ if (len > send - s) len = send - s;
1147
+ {
1148
+ long end = len;
1149
+ char *t = s + len - 1;
1150
+
1151
+ while (t >= s) {
1152
+ if (*t != ' ' && *t != '\0') break;
1153
+ t--; len--;
1154
+ }
1155
+ UNPACK_PUSH(infected_str_new(s, len, str));
1156
+ s += end;
1157
+ }
1158
+ break;
1159
+
1160
+ case 'Z':
1161
+ {
1162
+ char *t = s;
1163
+
1164
+ if (len > send-s) len = send-s;
1165
+ while (t < s+len && *t) t++;
1166
+ UNPACK_PUSH(infected_str_new(s, t-s, str));
1167
+ if (t < send) t++;
1168
+ s = star ? t : s+len;
1169
+ }
1170
+ break;
1171
+
1172
+ case 'a':
1173
+ if (len > send - s) len = send - s;
1174
+ UNPACK_PUSH(infected_str_new(s, len, str));
1175
+ s += len;
1176
+ break;
1177
+
1178
+ case 'b':
1179
+ {
1180
+ VALUE bitstr;
1181
+ char *t;
1182
+ int bits;
1183
+ long i;
1184
+
1185
+ if (p[-1] == '*' || len > (send - s) * 8)
1186
+ len = (send - s) * 8;
1187
+ bits = 0;
1188
+ bitstr = rb_usascii_str_new(0, len);
1189
+ t = RSTRING_PTR(bitstr);
1190
+ for (i=0; i<len; i++) {
1191
+ if (i & 7) bits >>= 1;
1192
+ else bits = (unsigned char)*s++;
1193
+ *t++ = (bits & 1) ? '1' : '0';
1194
+ }
1195
+ UNPACK_PUSH(bitstr);
1196
+ }
1197
+ break;
1198
+
1199
+ case 'B':
1200
+ {
1201
+ VALUE bitstr;
1202
+ char *t;
1203
+ int bits;
1204
+ long i;
1205
+
1206
+ if (p[-1] == '*' || len > (send - s) * 8)
1207
+ len = (send - s) * 8;
1208
+ bits = 0;
1209
+ bitstr = rb_usascii_str_new(0, len);
1210
+ t = RSTRING_PTR(bitstr);
1211
+ for (i=0; i<len; i++) {
1212
+ if (i & 7) bits <<= 1;
1213
+ else bits = (unsigned char)*s++;
1214
+ *t++ = (bits & 128) ? '1' : '0';
1215
+ }
1216
+ UNPACK_PUSH(bitstr);
1217
+ }
1218
+ break;
1219
+
1220
+ case 'h':
1221
+ {
1222
+ VALUE bitstr;
1223
+ char *t;
1224
+ int bits;
1225
+ long i;
1226
+
1227
+ if (p[-1] == '*' || len > (send - s) * 2)
1228
+ len = (send - s) * 2;
1229
+ bits = 0;
1230
+ bitstr = rb_usascii_str_new(0, len);
1231
+ t = RSTRING_PTR(bitstr);
1232
+ for (i=0; i<len; i++) {
1233
+ if (i & 1)
1234
+ bits >>= 4;
1235
+ else
1236
+ bits = (unsigned char)*s++;
1237
+ *t++ = hexdigits[bits & 15];
1238
+ }
1239
+ UNPACK_PUSH(bitstr);
1240
+ }
1241
+ break;
1242
+
1243
+ case 'H':
1244
+ {
1245
+ VALUE bitstr;
1246
+ char *t;
1247
+ int bits;
1248
+ long i;
1249
+
1250
+ if (p[-1] == '*' || len > (send - s) * 2)
1251
+ len = (send - s) * 2;
1252
+ bits = 0;
1253
+ bitstr = rb_usascii_str_new(0, len);
1254
+ t = RSTRING_PTR(bitstr);
1255
+ for (i=0; i<len; i++) {
1256
+ if (i & 1)
1257
+ bits <<= 4;
1258
+ else
1259
+ bits = (unsigned char)*s++;
1260
+ *t++ = hexdigits[(bits >> 4) & 15];
1261
+ }
1262
+ UNPACK_PUSH(bitstr);
1263
+ }
1264
+ break;
1265
+
1266
+ case 'c':
1267
+ signed_p = 1;
1268
+ integer_size = 1;
1269
+ bigendian_p = BIGENDIAN_P(); /* not effective */
1270
+ goto unpack_integer;
1271
+
1272
+ case 'C':
1273
+ signed_p = 0;
1274
+ integer_size = 1;
1275
+ bigendian_p = BIGENDIAN_P(); /* not effective */
1276
+ goto unpack_integer;
1277
+
1278
+ case 's':
1279
+ signed_p = 1;
1280
+ integer_size = NATINT_LEN(short, 2);
1281
+ bigendian_p = BIGENDIAN_P();
1282
+ goto unpack_integer;
1283
+
1284
+ case 'S':
1285
+ signed_p = 0;
1286
+ integer_size = NATINT_LEN(short, 2);
1287
+ bigendian_p = BIGENDIAN_P();
1288
+ goto unpack_integer;
1289
+
1290
+ case 'i':
1291
+ signed_p = 1;
1292
+ integer_size = (int)sizeof(int);
1293
+ bigendian_p = BIGENDIAN_P();
1294
+ goto unpack_integer;
1295
+
1296
+ case 'I':
1297
+ signed_p = 0;
1298
+ integer_size = (int)sizeof(int);
1299
+ bigendian_p = BIGENDIAN_P();
1300
+ goto unpack_integer;
1301
+
1302
+ case 'l':
1303
+ signed_p = 1;
1304
+ integer_size = NATINT_LEN(long, 4);
1305
+ bigendian_p = BIGENDIAN_P();
1306
+ goto unpack_integer;
1307
+
1308
+ case 'L':
1309
+ signed_p = 0;
1310
+ integer_size = NATINT_LEN(long, 4);
1311
+ bigendian_p = BIGENDIAN_P();
1312
+ goto unpack_integer;
1313
+
1314
+ case 'q':
1315
+ signed_p = 1;
1316
+ integer_size = NATINT_LEN_Q;
1317
+ bigendian_p = BIGENDIAN_P();
1318
+ goto unpack_integer;
1319
+
1320
+ case 'Q':
1321
+ signed_p = 0;
1322
+ integer_size = NATINT_LEN_Q;
1323
+ bigendian_p = BIGENDIAN_P();
1324
+ goto unpack_integer;
1325
+
1326
+ case 'j':
1327
+ signed_p = 1;
1328
+ integer_size = sizeof(intptr_t);
1329
+ bigendian_p = BIGENDIAN_P();
1330
+ goto unpack_integer;
1331
+
1332
+ case 'J':
1333
+ signed_p = 0;
1334
+ integer_size = sizeof(uintptr_t);
1335
+ bigendian_p = BIGENDIAN_P();
1336
+ goto unpack_integer;
1337
+
1338
+ case 'n':
1339
+ signed_p = 0;
1340
+ integer_size = 2;
1341
+ bigendian_p = 1;
1342
+ goto unpack_integer;
1343
+
1344
+ case 'N':
1345
+ signed_p = 0;
1346
+ integer_size = 4;
1347
+ bigendian_p = 1;
1348
+ goto unpack_integer;
1349
+
1350
+ case 'v':
1351
+ signed_p = 0;
1352
+ integer_size = 2;
1353
+ bigendian_p = 0;
1354
+ goto unpack_integer;
1355
+
1356
+ case 'V':
1357
+ signed_p = 0;
1358
+ integer_size = 4;
1359
+ bigendian_p = 0;
1360
+ goto unpack_integer;
1361
+
1362
+ unpack_integer:
1363
+ if (explicit_endian) {
1364
+ bigendian_p = explicit_endian == '>';
1365
+ }
1366
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
1367
+ while (len-- > 0) {
1368
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
1369
+ VALUE val;
1370
+ if (signed_p)
1371
+ flags |= INTEGER_PACK_2COMP;
1372
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
1373
+ UNPACK_PUSH(val);
1374
+ s += integer_size;
1375
+ }
1376
+ PACK_ITEM_ADJUST();
1377
+ break;
1378
+
1379
+ case 'f':
1380
+ case 'F':
1381
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1382
+ while (len-- > 0) {
1383
+ float tmp;
1384
+ memcpy(&tmp, s, sizeof(float));
1385
+ s += sizeof(float);
1386
+ UNPACK_PUSH(DBL2NUM((double)tmp));
1387
+ }
1388
+ PACK_ITEM_ADJUST();
1389
+ break;
1390
+
1391
+ case 'e':
1392
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1393
+ while (len-- > 0) {
1394
+ FLOAT_CONVWITH(tmp);
1395
+ memcpy(tmp.buf, s, sizeof(float));
1396
+ s += sizeof(float);
1397
+ VTOHF(tmp);
1398
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1399
+ }
1400
+ PACK_ITEM_ADJUST();
1401
+ break;
1402
+
1403
+ case 'E':
1404
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1405
+ while (len-- > 0) {
1406
+ DOUBLE_CONVWITH(tmp);
1407
+ memcpy(tmp.buf, s, sizeof(double));
1408
+ s += sizeof(double);
1409
+ VTOHD(tmp);
1410
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1411
+ }
1412
+ PACK_ITEM_ADJUST();
1413
+ break;
1414
+
1415
+ case 'D':
1416
+ case 'd':
1417
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1418
+ while (len-- > 0) {
1419
+ double tmp;
1420
+ memcpy(&tmp, s, sizeof(double));
1421
+ s += sizeof(double);
1422
+ UNPACK_PUSH(DBL2NUM(tmp));
1423
+ }
1424
+ PACK_ITEM_ADJUST();
1425
+ break;
1426
+
1427
+ case 'g':
1428
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1429
+ while (len-- > 0) {
1430
+ FLOAT_CONVWITH(tmp);
1431
+ memcpy(tmp.buf, s, sizeof(float));
1432
+ s += sizeof(float);
1433
+ NTOHF(tmp);
1434
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1435
+ }
1436
+ PACK_ITEM_ADJUST();
1437
+ break;
1438
+
1439
+ case 'G':
1440
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1441
+ while (len-- > 0) {
1442
+ DOUBLE_CONVWITH(tmp);
1443
+ memcpy(tmp.buf, s, sizeof(double));
1444
+ s += sizeof(double);
1445
+ NTOHD(tmp);
1446
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1447
+ }
1448
+ PACK_ITEM_ADJUST();
1449
+ break;
1450
+
1451
+ case 'U':
1452
+ if (len > send - s) len = send - s;
1453
+ while (len > 0 && s < send) {
1454
+ long alen = send - s;
1455
+ unsigned long l;
1456
+
1457
+ l = utf8_to_uv(s, &alen);
1458
+ s += alen; len--;
1459
+ UNPACK_PUSH(ULONG2NUM(l));
1460
+ }
1461
+ break;
1462
+
1463
+ case 'u':
1464
+ {
1465
+ VALUE buf = infected_str_new(0, (send - s)*3/4, str);
1466
+ char *ptr = RSTRING_PTR(buf);
1467
+ long total = 0;
1468
+
1469
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1470
+ long a,b,c,d;
1471
+ char hunk[3];
1472
+
1473
+ len = ((unsigned char)*s++ - ' ') & 077;
1474
+
1475
+ total += len;
1476
+ if (total > RSTRING_LEN(buf)) {
1477
+ len -= total - RSTRING_LEN(buf);
1478
+ total = RSTRING_LEN(buf);
1479
+ }
1480
+
1481
+ while (len > 0) {
1482
+ long mlen = len > 3 ? 3 : len;
1483
+
1484
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1485
+ a = ((unsigned char)*s++ - ' ') & 077;
1486
+ else
1487
+ a = 0;
1488
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1489
+ b = ((unsigned char)*s++ - ' ') & 077;
1490
+ else
1491
+ b = 0;
1492
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1493
+ c = ((unsigned char)*s++ - ' ') & 077;
1494
+ else
1495
+ c = 0;
1496
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
1497
+ d = ((unsigned char)*s++ - ' ') & 077;
1498
+ else
1499
+ d = 0;
1500
+ hunk[0] = (char)(a << 2 | b >> 4);
1501
+ hunk[1] = (char)(b << 4 | c >> 2);
1502
+ hunk[2] = (char)(c << 6 | d);
1503
+ memcpy(ptr, hunk, mlen);
1504
+ ptr += mlen;
1505
+ len -= mlen;
1506
+ }
1507
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
1508
+ s++; /* possible checksum byte */
1509
+ if (s < send && *s == '\r') s++;
1510
+ if (s < send && *s == '\n') s++;
1511
+ }
1512
+
1513
+ rb_str_set_len(buf, total);
1514
+ UNPACK_PUSH(buf);
1515
+ }
1516
+ break;
1517
+
1518
+ case 'm':
1519
+ {
1520
+ VALUE buf = infected_str_new(0, (send - s + 3)*3/4, str); /* +3 is for skipping paddings */
1521
+ char *ptr = RSTRING_PTR(buf);
1522
+ int a = -1,b = -1,c = 0,d = 0;
1523
+ static signed char b64_xtable[256];
1524
+
1525
+ if (b64_xtable['/'] <= 0) {
1526
+ int i;
1527
+
1528
+ for (i = 0; i < 256; i++) {
1529
+ b64_xtable[i] = -1;
1530
+ }
1531
+ for (i = 0; i < 64; i++) {
1532
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
1533
+ }
1534
+ }
1535
+ if (len == 0) {
1536
+ while (s < send) {
1537
+ a = b = c = d = -1;
1538
+ a = b64_xtable[(unsigned char)*s++];
1539
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
1540
+ b = b64_xtable[(unsigned char)*s++];
1541
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
1542
+ if (*s == '=') {
1543
+ if (s + 2 == send && *(s + 1) == '=') break;
1544
+ rb_raise(rb_eArgError, "invalid base64");
1545
+ }
1546
+ c = b64_xtable[(unsigned char)*s++];
1547
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
1548
+ if (s + 1 == send && *s == '=') break;
1549
+ d = b64_xtable[(unsigned char)*s++];
1550
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
1551
+ *ptr++ = castchar(a << 2 | b >> 4);
1552
+ *ptr++ = castchar(b << 4 | c >> 2);
1553
+ *ptr++ = castchar(c << 6 | d);
1554
+ }
1555
+ if (c == -1) {
1556
+ *ptr++ = castchar(a << 2 | b >> 4);
1557
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
1558
+ }
1559
+ else if (d == -1) {
1560
+ *ptr++ = castchar(a << 2 | b >> 4);
1561
+ *ptr++ = castchar(b << 4 | c >> 2);
1562
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
1563
+ }
1564
+ }
1565
+ else {
1566
+ while (s < send) {
1567
+ a = b = c = d = -1;
1568
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1569
+ if (s >= send) break;
1570
+ s++;
1571
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
1572
+ if (s >= send) break;
1573
+ s++;
1574
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1575
+ if (*s == '=' || s >= send) break;
1576
+ s++;
1577
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
1578
+ if (*s == '=' || s >= send) break;
1579
+ s++;
1580
+ *ptr++ = castchar(a << 2 | b >> 4);
1581
+ *ptr++ = castchar(b << 4 | c >> 2);
1582
+ *ptr++ = castchar(c << 6 | d);
1583
+ a = -1;
1584
+ }
1585
+ if (a != -1 && b != -1) {
1586
+ if (c == -1)
1587
+ *ptr++ = castchar(a << 2 | b >> 4);
1588
+ else {
1589
+ *ptr++ = castchar(a << 2 | b >> 4);
1590
+ *ptr++ = castchar(b << 4 | c >> 2);
1591
+ }
1592
+ }
1593
+ }
1594
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1595
+ UNPACK_PUSH(buf);
1596
+ }
1597
+ break;
1598
+
1599
+ case 'M':
1600
+ {
1601
+ VALUE buf = infected_str_new(0, send - s, str);
1602
+ char *ptr = RSTRING_PTR(buf), *ss = s;
1603
+ int csum = 0;
1604
+ int c1, c2;
1605
+
1606
+ while (s < send) {
1607
+ if (*s == '=') {
1608
+ if (++s == send) break;
1609
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
1610
+ s++;
1611
+ if (*s != '\n') {
1612
+ if ((c1 = hex2num(*s)) == -1) break;
1613
+ if (++s == send) break;
1614
+ if ((c2 = hex2num(*s)) == -1) break;
1615
+ csum |= *ptr++ = castchar(c1 << 4 | c2);
1616
+ }
1617
+ }
1618
+ else {
1619
+ csum |= *ptr++ = *s;
1620
+ }
1621
+ s++;
1622
+ ss = s;
1623
+ }
1624
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
1625
+ rb_str_buf_cat(buf, ss, send-ss);
1626
+ csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1627
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
1628
+ UNPACK_PUSH(buf);
1629
+ }
1630
+ break;
1631
+
1632
+ case '@':
1633
+ if (len > RSTRING_LEN(str))
1634
+ rb_raise(rb_eArgError, "@ outside of string");
1635
+ s = RSTRING_PTR(str) + len;
1636
+ break;
1637
+
1638
+ case 'X':
1639
+ if (len > s - RSTRING_PTR(str))
1640
+ rb_raise(rb_eArgError, "X outside of string");
1641
+ s -= len;
1642
+ break;
1643
+
1644
+ case 'x':
1645
+ if (len > send - s)
1646
+ rb_raise(rb_eArgError, "x outside of string");
1647
+ s += len;
1648
+ break;
1649
+
1650
+ case 'P':
1651
+ if (sizeof(char *) <= (size_t)(send - s)) {
1652
+ VALUE tmp = Qnil;
1653
+ char *t;
1654
+
1655
+ memcpy(&t, s, sizeof(char *));
1656
+ s += sizeof(char *);
1657
+
1658
+ if (t) {
1659
+ VALUE a;
1660
+ const VALUE *p, *pend;
1661
+
1662
+ if (!(a = str_associated(str))) {
1663
+ rb_raise(rb_eArgError, "no associated pointer");
1664
+ }
1665
+ p = RARRAY_CONST_PTR(a);
1666
+ pend = p + RARRAY_LEN(a);
1667
+ while (p < pend) {
1668
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1669
+ if (len < RSTRING_LEN(*p)) {
1670
+ tmp = rb_tainted_str_new(t, len);
1671
+ str_associate(tmp, a);
1672
+ }
1673
+ else {
1674
+ tmp = *p;
1675
+ }
1676
+ break;
1677
+ }
1678
+ p++;
1679
+ }
1680
+ if (p == pend) {
1681
+ rb_raise(rb_eArgError, "non associated pointer");
1682
+ }
1683
+ }
1684
+ UNPACK_PUSH(tmp);
1685
+ }
1686
+ break;
1687
+
1688
+ case 'p':
1689
+ if (len > (long)((send - s) / sizeof(char *)))
1690
+ len = (send - s) / sizeof(char *);
1691
+ while (len-- > 0) {
1692
+ if ((size_t)(send - s) < sizeof(char *))
1693
+ break;
1694
+ else {
1695
+ VALUE tmp = Qnil;
1696
+ char *t;
1697
+
1698
+ memcpy(&t, s, sizeof(char *));
1699
+ s += sizeof(char *);
1700
+
1701
+ if (t) {
1702
+ VALUE a;
1703
+ const VALUE *p, *pend;
1704
+
1705
+ if (!(a = str_associated(str))) {
1706
+ rb_raise(rb_eArgError, "no associated pointer");
1707
+ }
1708
+ p = RARRAY_CONST_PTR(a);
1709
+ pend = p + RARRAY_LEN(a);
1710
+ while (p < pend) {
1711
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
1712
+ tmp = *p;
1713
+ break;
1714
+ }
1715
+ p++;
1716
+ }
1717
+ if (p == pend) {
1718
+ rb_raise(rb_eArgError, "non associated pointer");
1719
+ }
1720
+ }
1721
+ UNPACK_PUSH(tmp);
1722
+ }
1723
+ }
1724
+ break;
1725
+
1726
+ case 'w':
1727
+ {
1728
+ char *s0 = s;
1729
+ while (len > 0 && s < send) {
1730
+ if (*s & 0x80) {
1731
+ s++;
1732
+ }
1733
+ else {
1734
+ s++;
1735
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
1736
+ len--;
1737
+ s0 = s;
1738
+ }
1739
+ }
1740
+ }
1741
+ break;
1742
+
1743
+ default:
1744
+ rb_warning("unknown unpack directive '%c' in '%s'",
1745
+ type, RSTRING_PTR(fmt));
1746
+ break;
1747
+ }
1748
+ }
1749
+
1750
+ return ary;
1751
+ }
1752
+
1753
+ /*
1754
+ * call-seq:
1755
+ * str.unpack(format) -> anArray
1756
+ *
1757
+ * Decodes <i>str</i> (which may contain binary data) according to the
1758
+ * format string, returning an array of each value extracted. The
1759
+ * format string consists of a sequence of single-character directives,
1760
+ * summarized in the table at the end of this entry.
1761
+ * Each directive may be followed
1762
+ * by a number, indicating the number of times to repeat with this
1763
+ * directive. An asterisk (``<code>*</code>'') will use up all
1764
+ * remaining elements. The directives <code>sSiIlL</code> may each be
1765
+ * followed by an underscore (``<code>_</code>'') or
1766
+ * exclamation mark (``<code>!</code>'') to use the underlying
1767
+ * platform's native size for the specified type; otherwise, it uses a
1768
+ * platform-independent consistent size. Spaces are ignored in the
1769
+ * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
1770
+ *
1771
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1772
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1773
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1774
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1775
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1776
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1777
+ * "now=20is".unpack('M*') #=> ["now is"]
1778
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1779
+ *
1780
+ * This table summarizes the various formats and the Ruby classes
1781
+ * returned by each.
1782
+ *
1783
+ * Integer | |
1784
+ * Directive | Returns | Meaning
1785
+ * ------------------------------------------------------------------
1786
+ * C | Integer | 8-bit unsigned (unsigned char)
1787
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
1788
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
1789
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1790
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
1791
+ * | |
1792
+ * c | Integer | 8-bit signed (signed char)
1793
+ * s | Integer | 16-bit signed, native endian (int16_t)
1794
+ * l | Integer | 32-bit signed, native endian (int32_t)
1795
+ * q | Integer | 64-bit signed, native endian (int64_t)
1796
+ * j | Integer | pointer width signed, native endian (intptr_t)
1797
+ * | |
1798
+ * S_ S! | Integer | unsigned short, native endian
1799
+ * I I_ I! | Integer | unsigned int, native endian
1800
+ * L_ L! | Integer | unsigned long, native endian
1801
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
1802
+ * | | if the platform has no long long type.)
1803
+ * J! | Integer | uintptr_t, native endian (same with J)
1804
+ * | |
1805
+ * s_ s! | Integer | signed short, native endian
1806
+ * i i_ i! | Integer | signed int, native endian
1807
+ * l_ l! | Integer | signed long, native endian
1808
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
1809
+ * | | if the platform has no long long type.)
1810
+ * j! | Integer | intptr_t, native endian (same with j)
1811
+ * | |
1812
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
1813
+ * L> l> L!> l!> | | big endian
1814
+ * I!> i!> | |
1815
+ * Q> q> Q!> q!> | | "S>" is same as "n"
1816
+ * J> j> J!> j!> | | "L>" is same as "N"
1817
+ * | |
1818
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
1819
+ * L< l< L!< l!< | | little endian
1820
+ * I!< i!< | |
1821
+ * Q< q< Q!< q!< | | "S<" is same as "v"
1822
+ * J< j< J!< j!< | | "L<" is same as "V"
1823
+ * | |
1824
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
1825
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
1826
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1827
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1828
+ * | |
1829
+ * U | Integer | UTF-8 character
1830
+ * w | Integer | BER-compressed integer (see Array.pack)
1831
+ *
1832
+ * Float | |
1833
+ * Directive | Returns | Meaning
1834
+ * -----------------------------------------------------------------
1835
+ * D d | Float | double-precision, native format
1836
+ * F f | Float | single-precision, native format
1837
+ * E | Float | double-precision, little-endian byte order
1838
+ * e | Float | single-precision, little-endian byte order
1839
+ * G | Float | double-precision, network (big-endian) byte order
1840
+ * g | Float | single-precision, network (big-endian) byte order
1841
+ *
1842
+ * String | |
1843
+ * Directive | Returns | Meaning
1844
+ * -----------------------------------------------------------------
1845
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1846
+ * a | String | arbitrary binary string
1847
+ * Z | String | null-terminated string
1848
+ * B | String | bit string (MSB first)
1849
+ * b | String | bit string (LSB first)
1850
+ * H | String | hex string (high nibble first)
1851
+ * h | String | hex string (low nibble first)
1852
+ * u | String | UU-encoded string
1853
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
1854
+ * m | String | base64 encoded string (RFC 2045) (default)
1855
+ * | | base64 encoded string (RFC 4648) if followed by 0
1856
+ * P | String | pointer to a structure (fixed-length string)
1857
+ * p | String | pointer to a null-terminated string
1858
+ *
1859
+ * Misc. | |
1860
+ * Directive | Returns | Meaning
1861
+ * -----------------------------------------------------------------
1862
+ * @ | --- | skip to the offset given by the length argument
1863
+ * X | --- | skip backward one byte
1864
+ * x | --- | skip forward one byte
1865
+ *
1866
+ * HISTORY
1867
+ *
1868
+ * * J, J! j, and j! are available since Ruby 2.3.
1869
+ * * Q_, Q!, q_, and q! are available since Ruby 2.1.
1870
+ * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
1871
+ */
1872
+
1873
+ static VALUE
1874
+ pack_unpack(VALUE str, VALUE fmt)
1875
+ {
1876
+ int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1877
+ return pack_unpack_internal(str, fmt, mode);
1878
+ }
1879
+
1880
+ /*
1881
+ * call-seq:
1882
+ * str.unpack1(format) -> obj
1883
+ *
1884
+ * Decodes <i>str</i> (which may contain binary data) according to the
1885
+ * format string, returning the first value extracted.
1886
+ * See also <code>String#unpack</code>, <code>Array#pack</code>.
1887
+ */
1888
+
1889
+ static VALUE
1890
+ pack_unpack1(VALUE str, VALUE fmt)
1891
+ {
1892
+ return pack_unpack_internal(str, fmt, UNPACK_1);
1893
+ }
1894
+
1895
+ int
1896
+ rb_uv_to_utf8(char buf[6], unsigned long uv)
1897
+ {
1898
+ if (uv <= 0x7f) {
1899
+ buf[0] = (char)uv;
1900
+ return 1;
1901
+ }
1902
+ if (uv <= 0x7ff) {
1903
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
1904
+ buf[1] = castchar((uv&0x3f)|0x80);
1905
+ return 2;
1906
+ }
1907
+ if (uv <= 0xffff) {
1908
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
1909
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
1910
+ buf[2] = castchar((uv&0x3f)|0x80);
1911
+ return 3;
1912
+ }
1913
+ if (uv <= 0x1fffff) {
1914
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
1915
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
1916
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
1917
+ buf[3] = castchar((uv&0x3f)|0x80);
1918
+ return 4;
1919
+ }
1920
+ if (uv <= 0x3ffffff) {
1921
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
1922
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
1923
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
1924
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
1925
+ buf[4] = castchar((uv&0x3f)|0x80);
1926
+ return 5;
1927
+ }
1928
+ if (uv <= 0x7fffffff) {
1929
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
1930
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
1931
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
1932
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
1933
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
1934
+ buf[5] = castchar((uv&0x3f)|0x80);
1935
+ return 6;
1936
+ }
1937
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
1938
+
1939
+ UNREACHABLE;
1940
+ }
1941
+
1942
+ static const unsigned long utf8_limits[] = {
1943
+ 0x0, /* 1 */
1944
+ 0x80, /* 2 */
1945
+ 0x800, /* 3 */
1946
+ 0x10000, /* 4 */
1947
+ 0x200000, /* 5 */
1948
+ 0x4000000, /* 6 */
1949
+ 0x80000000, /* 7 */
1950
+ };
1951
+
1952
+ static unsigned long
1953
+ utf8_to_uv(const char *p, long *lenp)
1954
+ {
1955
+ int c = *p++ & 0xff;
1956
+ unsigned long uv = c;
1957
+ long n;
1958
+
1959
+ if (!(uv & 0x80)) {
1960
+ *lenp = 1;
1961
+ return uv;
1962
+ }
1963
+ if (!(uv & 0x40)) {
1964
+ *lenp = 1;
1965
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1966
+ }
1967
+
1968
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
1969
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
1970
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
1971
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
1972
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
1973
+ else {
1974
+ *lenp = 1;
1975
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1976
+ }
1977
+ if (n > *lenp) {
1978
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1979
+ n, *lenp);
1980
+ }
1981
+ *lenp = n--;
1982
+ if (n != 0) {
1983
+ while (n--) {
1984
+ c = *p++ & 0xff;
1985
+ if ((c & 0xc0) != 0x80) {
1986
+ *lenp -= n + 1;
1987
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1988
+ }
1989
+ else {
1990
+ c &= 0x3f;
1991
+ uv = uv << 6 | c;
1992
+ }
1993
+ }
1994
+ }
1995
+ n = *lenp - 1;
1996
+ if (uv < utf8_limits[n]) {
1997
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1998
+ }
1999
+ return uv;
2000
+ }
2001
+
2002
+ void
2003
+ Init_pack(void)
2004
+ {
2005
+ rb_define_method(rb_cArray, "pack", pack_pack, -1);
2006
+ rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2007
+ rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
2008
+
2009
+ id_associated = rb_make_internal_id();
2010
+ }