zscan 2.0.4 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,13 +2,14 @@
2
2
 
3
3
  pack.c -
4
4
 
5
- $Author$
5
+ $Author: naruse $
6
6
  created at: Thu Feb 10 15:17:05 JST 1994
7
7
 
8
8
  Copyright (C) 1993-2007 Yukihiro Matsumoto
9
9
 
10
10
  **********************************************************************/
11
11
 
12
+ #include "ruby/encoding.h"
12
13
  #include "internal.h"
13
14
  #include <sys/types.h>
14
15
  #include <ctype.h>
@@ -126,6 +127,758 @@ str_associated(VALUE str)
126
127
  return rb_ivar_lookup(str, id_associated, Qfalse);
127
128
  }
128
129
 
130
+ /*
131
+ * call-seq:
132
+ * arr.pack( aTemplateString ) -> aBinaryString
133
+ * arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
134
+ *
135
+ * Packs the contents of <i>arr</i> into a binary sequence according to
136
+ * the directives in <i>aTemplateString</i> (see the table below)
137
+ * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
138
+ * which gives the width of the resulting field. The remaining
139
+ * directives also may take a count, indicating the number of array
140
+ * elements to convert. If the count is an asterisk
141
+ * (``<code>*</code>''), all remaining array elements will be
142
+ * converted. Any of the directives ``<code>sSiIlL</code>'' may be
143
+ * followed by an underscore (``<code>_</code>'') or
144
+ * exclamation mark (``<code>!</code>'') to use the underlying
145
+ * platform's native size for the specified type; otherwise, they use a
146
+ * platform-independent size. Spaces are ignored in the template
147
+ * string. See also <code>String#unpack</code>.
148
+ *
149
+ * a = [ "a", "b", "c" ]
150
+ * n = [ 65, 66, 67 ]
151
+ * a.pack("A3A3A3") #=> "a b c "
152
+ * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
153
+ * n.pack("ccc") #=> "ABC"
154
+ *
155
+ * If <i>aBufferString</i> is specified and its capacity is enough,
156
+ * +pack+ uses it as the buffer and returns it.
157
+ * When the offset is specified by the beginning of <i>aTemplateString</i>,
158
+ * the result is filled after the offset.
159
+ * If original contents of <i>aBufferString</i> exists and it's longer than
160
+ * the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
161
+ * If it's shorter, the gap is filled with ``<code>\0</code>''.
162
+ *
163
+ * Note that ``buffer:'' option does not guarantee not to allocate memory
164
+ * in +pack+. If the capacity of <i>aBufferString</i> is not enough,
165
+ * +pack+ allocates memory.
166
+ *
167
+ * Directives for +pack+.
168
+ *
169
+ * Integer | Array |
170
+ * Directive | Element | Meaning
171
+ * ----------------------------------------------------------------------------
172
+ * C | Integer | 8-bit unsigned (unsigned char)
173
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
174
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
175
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
176
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
177
+ * | | (J is available since Ruby 2.3.)
178
+ * | |
179
+ * c | Integer | 8-bit signed (signed char)
180
+ * s | Integer | 16-bit signed, native endian (int16_t)
181
+ * l | Integer | 32-bit signed, native endian (int32_t)
182
+ * q | Integer | 64-bit signed, native endian (int64_t)
183
+ * j | Integer | pointer width signed, native endian (intptr_t)
184
+ * | | (j is available since Ruby 2.3.)
185
+ * | |
186
+ * S_ S! | Integer | unsigned short, native endian
187
+ * I I_ I! | Integer | unsigned int, native endian
188
+ * L_ L! | Integer | unsigned long, native endian
189
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
190
+ * | | if the platform has no long long type.)
191
+ * | | (Q_ and Q! is available since Ruby 2.1.)
192
+ * J! | Integer | uintptr_t, native endian (same with J)
193
+ * | | (J! is available since Ruby 2.3.)
194
+ * | |
195
+ * s_ s! | Integer | signed short, native endian
196
+ * i i_ i! | Integer | signed int, native endian
197
+ * l_ l! | Integer | signed long, native endian
198
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
199
+ * | | if the platform has no long long type.)
200
+ * | | (q_ and q! is available since Ruby 2.1.)
201
+ * j! | Integer | intptr_t, native endian (same with j)
202
+ * | | (j! is available since Ruby 2.3.)
203
+ * | |
204
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
205
+ * L> l> L!> l!> | | big endian
206
+ * I!> i!> | | (available since Ruby 1.9.3)
207
+ * Q> q> Q!> q!> | | "S>" is same as "n"
208
+ * J> j> J!> j!> | | "L>" is same as "N"
209
+ * | |
210
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
211
+ * L< l< L!< l!< | | little endian
212
+ * I!< i!< | | (available since Ruby 1.9.3)
213
+ * Q< q< Q!< q!< | | "S<" is same as "v"
214
+ * J< j< J!< j!< | | "L<" is same as "V"
215
+ * | |
216
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
217
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
218
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
219
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
220
+ * | |
221
+ * U | Integer | UTF-8 character
222
+ * w | Integer | BER-compressed integer
223
+ *
224
+ * Float | Array |
225
+ * Directive | Element | Meaning
226
+ * ---------------------------------------------------------------------------
227
+ * D d | Float | double-precision, native format
228
+ * F f | Float | single-precision, native format
229
+ * E | Float | double-precision, little-endian byte order
230
+ * e | Float | single-precision, little-endian byte order
231
+ * G | Float | double-precision, network (big-endian) byte order
232
+ * g | Float | single-precision, network (big-endian) byte order
233
+ *
234
+ * String | Array |
235
+ * Directive | Element | Meaning
236
+ * ---------------------------------------------------------------------------
237
+ * A | String | arbitrary binary string (space padded, count is width)
238
+ * a | String | arbitrary binary string (null padded, count is width)
239
+ * Z | String | same as ``a'', except that null is added with *
240
+ * B | String | bit string (MSB first)
241
+ * b | String | bit string (LSB first)
242
+ * H | String | hex string (high nibble first)
243
+ * h | String | hex string (low nibble first)
244
+ * u | String | UU-encoded string
245
+ * M | String | quoted printable, MIME encoding (see also RFC2045)
246
+ * | | (text mode but input must use LF and output LF)
247
+ * m | String | base64 encoded string (see RFC 2045, count is width)
248
+ * | | (if count is 0, no line feed are added, see RFC 4648)
249
+ * P | String | pointer to a structure (fixed-length string)
250
+ * p | String | pointer to a null-terminated string
251
+ *
252
+ * Misc. | Array |
253
+ * Directive | Element | Meaning
254
+ * ---------------------------------------------------------------------------
255
+ * @ | --- | moves to absolute position
256
+ * X | --- | back up a byte
257
+ * x | --- | null byte
258
+ */
259
+
260
+ static VALUE
261
+ pack_pack(int argc, VALUE *argv, VALUE ary)
262
+ {
263
+ static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
264
+ static const char spc10[] = " ";
265
+ const char *p, *pend;
266
+ VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0;
267
+ char type;
268
+ long len, idx, plen;
269
+ const char *ptr;
270
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
271
+ #ifdef NATINT_PACK
272
+ int natint; /* native integer */
273
+ #endif
274
+ int integer_size, bigendian_p;
275
+
276
+ rb_scan_args(argc, argv, "10:", &fmt, &opt);
277
+
278
+ StringValue(fmt);
279
+ p = RSTRING_PTR(fmt);
280
+ pend = p + RSTRING_LEN(fmt);
281
+ if (!NIL_P(opt)) {
282
+ static ID keyword_ids[1];
283
+ if (!keyword_ids[0])
284
+ CONST_ID(keyword_ids[0], "buffer");
285
+
286
+ rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer);
287
+
288
+ if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING))
289
+ rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
290
+ }
291
+ if (buffer)
292
+ res = buffer;
293
+ else
294
+ res = rb_str_buf_new(0);
295
+
296
+ idx = 0;
297
+
298
+ #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
299
+ #define MORE_ITEM (idx < RARRAY_LEN(ary))
300
+ #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
301
+ #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
302
+
303
+ while (p < pend) {
304
+ int explicit_endian = 0;
305
+ if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
306
+ rb_raise(rb_eRuntimeError, "format string modified");
307
+ }
308
+ type = *p++; /* get data type */
309
+ #ifdef NATINT_PACK
310
+ natint = 0;
311
+ #endif
312
+
313
+ if (ISSPACE(type)) continue;
314
+ if (type == '#') {
315
+ while ((p < pend) && (*p != '\n')) {
316
+ p++;
317
+ }
318
+ continue;
319
+ }
320
+
321
+ {
322
+ modifiers:
323
+ switch (*p) {
324
+ case '_':
325
+ case '!':
326
+ if (strchr(natstr, type)) {
327
+ #ifdef NATINT_PACK
328
+ natint = 1;
329
+ #endif
330
+ p++;
331
+ }
332
+ else {
333
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
334
+ }
335
+ goto modifiers;
336
+
337
+ case '<':
338
+ case '>':
339
+ if (!strchr(endstr, type)) {
340
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
341
+ }
342
+ if (explicit_endian) {
343
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
344
+ }
345
+ explicit_endian = *p++;
346
+ goto modifiers;
347
+ }
348
+ }
349
+
350
+ if (*p == '*') { /* set data length */
351
+ len = strchr("@Xxu", type) ? 0
352
+ : strchr("PMm", type) ? 1
353
+ : RARRAY_LEN(ary) - idx;
354
+ p++;
355
+ }
356
+ else if (ISDIGIT(*p)) {
357
+ errno = 0;
358
+ len = STRTOUL(p, (char**)&p, 10);
359
+ if (errno) {
360
+ rb_raise(rb_eRangeError, "pack length too big");
361
+ }
362
+ }
363
+ else {
364
+ len = 1;
365
+ }
366
+
367
+ switch (type) {
368
+ case 'U':
369
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
370
+ if (enc_info == 1) enc_info = 2;
371
+ break;
372
+ case 'm': case 'M': case 'u':
373
+ /* keep US-ASCII (do nothing) */
374
+ break;
375
+ default:
376
+ /* fall back to BINARY */
377
+ enc_info = 0;
378
+ break;
379
+ }
380
+ switch (type) {
381
+ case 'A': case 'a': case 'Z':
382
+ case 'B': case 'b':
383
+ case 'H': case 'h':
384
+ from = NEXTFROM;
385
+ if (NIL_P(from)) {
386
+ ptr = "";
387
+ plen = 0;
388
+ }
389
+ else {
390
+ StringValue(from);
391
+ ptr = RSTRING_PTR(from);
392
+ plen = RSTRING_LEN(from);
393
+ OBJ_INFECT(res, from);
394
+ }
395
+
396
+ if (p[-1] == '*')
397
+ len = plen;
398
+
399
+ switch (type) {
400
+ case 'a': /* arbitrary binary string (null padded) */
401
+ case 'A': /* arbitrary binary string (ASCII space padded) */
402
+ case 'Z': /* null terminated string */
403
+ if (plen >= len) {
404
+ rb_str_buf_cat(res, ptr, len);
405
+ if (p[-1] == '*' && type == 'Z')
406
+ rb_str_buf_cat(res, nul10, 1);
407
+ }
408
+ else {
409
+ rb_str_buf_cat(res, ptr, plen);
410
+ len -= plen;
411
+ while (len >= 10) {
412
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
413
+ len -= 10;
414
+ }
415
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
416
+ }
417
+ break;
418
+
419
+ #define castchar(from) (char)((from) & 0xff)
420
+
421
+ case 'b': /* bit string (ascending) */
422
+ {
423
+ int byte = 0;
424
+ long i, j = 0;
425
+
426
+ if (len > plen) {
427
+ j = (len - plen + 1)/2;
428
+ len = plen;
429
+ }
430
+ for (i=0; i++ < len; ptr++) {
431
+ if (*ptr & 1)
432
+ byte |= 128;
433
+ if (i & 7)
434
+ byte >>= 1;
435
+ else {
436
+ char c = castchar(byte);
437
+ rb_str_buf_cat(res, &c, 1);
438
+ byte = 0;
439
+ }
440
+ }
441
+ if (len & 7) {
442
+ char c;
443
+ byte >>= 7 - (len & 7);
444
+ c = castchar(byte);
445
+ rb_str_buf_cat(res, &c, 1);
446
+ }
447
+ len = j;
448
+ goto grow;
449
+ }
450
+ break;
451
+
452
+ case 'B': /* bit string (descending) */
453
+ {
454
+ int byte = 0;
455
+ long i, j = 0;
456
+
457
+ if (len > plen) {
458
+ j = (len - plen + 1)/2;
459
+ len = plen;
460
+ }
461
+ for (i=0; i++ < len; ptr++) {
462
+ byte |= *ptr & 1;
463
+ if (i & 7)
464
+ byte <<= 1;
465
+ else {
466
+ char c = castchar(byte);
467
+ rb_str_buf_cat(res, &c, 1);
468
+ byte = 0;
469
+ }
470
+ }
471
+ if (len & 7) {
472
+ char c;
473
+ byte <<= 7 - (len & 7);
474
+ c = castchar(byte);
475
+ rb_str_buf_cat(res, &c, 1);
476
+ }
477
+ len = j;
478
+ goto grow;
479
+ }
480
+ break;
481
+
482
+ case 'h': /* hex string (low nibble first) */
483
+ {
484
+ int byte = 0;
485
+ long i, j = 0;
486
+
487
+ if (len > plen) {
488
+ j = (len + 1) / 2 - (plen + 1) / 2;
489
+ len = plen;
490
+ }
491
+ for (i=0; i++ < len; ptr++) {
492
+ if (ISALPHA(*ptr))
493
+ byte |= (((*ptr & 15) + 9) & 15) << 4;
494
+ else
495
+ byte |= (*ptr & 15) << 4;
496
+ if (i & 1)
497
+ byte >>= 4;
498
+ else {
499
+ char c = castchar(byte);
500
+ rb_str_buf_cat(res, &c, 1);
501
+ byte = 0;
502
+ }
503
+ }
504
+ if (len & 1) {
505
+ char c = castchar(byte);
506
+ rb_str_buf_cat(res, &c, 1);
507
+ }
508
+ len = j;
509
+ goto grow;
510
+ }
511
+ break;
512
+
513
+ case 'H': /* hex string (high nibble first) */
514
+ {
515
+ int byte = 0;
516
+ long i, j = 0;
517
+
518
+ if (len > plen) {
519
+ j = (len + 1) / 2 - (plen + 1) / 2;
520
+ len = plen;
521
+ }
522
+ for (i=0; i++ < len; ptr++) {
523
+ if (ISALPHA(*ptr))
524
+ byte |= ((*ptr & 15) + 9) & 15;
525
+ else
526
+ byte |= *ptr & 15;
527
+ if (i & 1)
528
+ byte <<= 4;
529
+ else {
530
+ char c = castchar(byte);
531
+ rb_str_buf_cat(res, &c, 1);
532
+ byte = 0;
533
+ }
534
+ }
535
+ if (len & 1) {
536
+ char c = castchar(byte);
537
+ rb_str_buf_cat(res, &c, 1);
538
+ }
539
+ len = j;
540
+ goto grow;
541
+ }
542
+ break;
543
+ }
544
+ break;
545
+
546
+ case 'c': /* signed char */
547
+ case 'C': /* unsigned char */
548
+ integer_size = 1;
549
+ bigendian_p = BIGENDIAN_P(); /* not effective */
550
+ goto pack_integer;
551
+
552
+ case 's': /* s for int16_t, s! for signed short */
553
+ integer_size = NATINT_LEN(short, 2);
554
+ bigendian_p = BIGENDIAN_P();
555
+ goto pack_integer;
556
+
557
+ case 'S': /* S for uint16_t, S! for unsigned short */
558
+ integer_size = NATINT_LEN(short, 2);
559
+ bigendian_p = BIGENDIAN_P();
560
+ goto pack_integer;
561
+
562
+ case 'i': /* i and i! for signed int */
563
+ integer_size = (int)sizeof(int);
564
+ bigendian_p = BIGENDIAN_P();
565
+ goto pack_integer;
566
+
567
+ case 'I': /* I and I! for unsigned int */
568
+ integer_size = (int)sizeof(int);
569
+ bigendian_p = BIGENDIAN_P();
570
+ goto pack_integer;
571
+
572
+ case 'l': /* l for int32_t, l! for signed long */
573
+ integer_size = NATINT_LEN(long, 4);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto pack_integer;
576
+
577
+ case 'L': /* L for uint32_t, L! for unsigned long */
578
+ integer_size = NATINT_LEN(long, 4);
579
+ bigendian_p = BIGENDIAN_P();
580
+ goto pack_integer;
581
+
582
+ case 'q': /* q for int64_t, q! for signed long long */
583
+ integer_size = NATINT_LEN_Q;
584
+ bigendian_p = BIGENDIAN_P();
585
+ goto pack_integer;
586
+
587
+ case 'Q': /* Q for uint64_t, Q! for unsigned long long */
588
+ integer_size = NATINT_LEN_Q;
589
+ bigendian_p = BIGENDIAN_P();
590
+ goto pack_integer;
591
+
592
+ case 'j': /* j for intptr_t */
593
+ integer_size = sizeof(intptr_t);
594
+ bigendian_p = BIGENDIAN_P();
595
+ goto pack_integer;
596
+
597
+ case 'J': /* J for uintptr_t */
598
+ integer_size = sizeof(uintptr_t);
599
+ bigendian_p = BIGENDIAN_P();
600
+ goto pack_integer;
601
+
602
+ case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
603
+ integer_size = 2;
604
+ bigendian_p = 1;
605
+ goto pack_integer;
606
+
607
+ case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
608
+ integer_size = 4;
609
+ bigendian_p = 1;
610
+ goto pack_integer;
611
+
612
+ case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
613
+ integer_size = 2;
614
+ bigendian_p = 0;
615
+ goto pack_integer;
616
+
617
+ case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
618
+ integer_size = 4;
619
+ bigendian_p = 0;
620
+ goto pack_integer;
621
+
622
+ pack_integer:
623
+ if (explicit_endian) {
624
+ bigendian_p = explicit_endian == '>';
625
+ }
626
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
627
+ rb_bug("unexpected intger size for pack: %d", integer_size);
628
+ while (len-- > 0) {
629
+ char intbuf[MAX_INTEGER_PACK_SIZE];
630
+
631
+ from = NEXTFROM;
632
+ rb_integer_pack(from, intbuf, integer_size, 1, 0,
633
+ INTEGER_PACK_2COMP |
634
+ (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
635
+ rb_str_buf_cat(res, intbuf, integer_size);
636
+ }
637
+ break;
638
+
639
+ case 'f': /* single precision float in native format */
640
+ case 'F': /* ditto */
641
+ while (len-- > 0) {
642
+ float f;
643
+
644
+ from = NEXTFROM;
645
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
646
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
647
+ }
648
+ break;
649
+
650
+ case 'e': /* single precision float in VAX byte-order */
651
+ while (len-- > 0) {
652
+ FLOAT_CONVWITH(tmp);
653
+
654
+ from = NEXTFROM;
655
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
656
+ HTOVF(tmp);
657
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
658
+ }
659
+ break;
660
+
661
+ case 'E': /* double precision float in VAX byte-order */
662
+ while (len-- > 0) {
663
+ DOUBLE_CONVWITH(tmp);
664
+ from = NEXTFROM;
665
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
666
+ HTOVD(tmp);
667
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
668
+ }
669
+ break;
670
+
671
+ case 'd': /* double precision float in native format */
672
+ case 'D': /* ditto */
673
+ while (len-- > 0) {
674
+ double d;
675
+
676
+ from = NEXTFROM;
677
+ d = RFLOAT_VALUE(rb_to_float(from));
678
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
679
+ }
680
+ break;
681
+
682
+ case 'g': /* single precision float in network byte-order */
683
+ while (len-- > 0) {
684
+ FLOAT_CONVWITH(tmp);
685
+ from = NEXTFROM;
686
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
687
+ HTONF(tmp);
688
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
689
+ }
690
+ break;
691
+
692
+ case 'G': /* double precision float in network byte-order */
693
+ while (len-- > 0) {
694
+ DOUBLE_CONVWITH(tmp);
695
+
696
+ from = NEXTFROM;
697
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
698
+ HTOND(tmp);
699
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
700
+ }
701
+ break;
702
+
703
+ case 'x': /* null byte */
704
+ grow:
705
+ while (len >= 10) {
706
+ rb_str_buf_cat(res, nul10, 10);
707
+ len -= 10;
708
+ }
709
+ rb_str_buf_cat(res, nul10, len);
710
+ break;
711
+
712
+ case 'X': /* back up byte */
713
+ shrink:
714
+ plen = RSTRING_LEN(res);
715
+ if (plen < len)
716
+ rb_raise(rb_eArgError, "X outside of string");
717
+ rb_str_set_len(res, plen - len);
718
+ break;
719
+
720
+ case '@': /* null fill to absolute position */
721
+ len -= RSTRING_LEN(res);
722
+ if (len > 0) goto grow;
723
+ len = -len;
724
+ if (len > 0) goto shrink;
725
+ break;
726
+
727
+ case '%':
728
+ rb_raise(rb_eArgError, "%% is not supported");
729
+ break;
730
+
731
+ case 'U': /* Unicode character */
732
+ while (len-- > 0) {
733
+ SIGNED_VALUE l;
734
+ char buf[8];
735
+ int le;
736
+
737
+ from = NEXTFROM;
738
+ from = rb_to_int(from);
739
+ l = NUM2LONG(from);
740
+ if (l < 0) {
741
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
742
+ }
743
+ le = rb_uv_to_utf8(buf, l);
744
+ rb_str_buf_cat(res, (char*)buf, le);
745
+ }
746
+ break;
747
+
748
+ case 'u': /* uuencoded string */
749
+ case 'm': /* base64 encoded string */
750
+ from = NEXTFROM;
751
+ StringValue(from);
752
+ ptr = RSTRING_PTR(from);
753
+ plen = RSTRING_LEN(from);
754
+
755
+ if (len == 0 && type == 'm') {
756
+ encodes(res, ptr, plen, type, 0);
757
+ ptr += plen;
758
+ break;
759
+ }
760
+ if (len <= 2)
761
+ len = 45;
762
+ else if (len > 63 && type == 'u')
763
+ len = 63;
764
+ else
765
+ len = len / 3 * 3;
766
+ while (plen > 0) {
767
+ long todo;
768
+
769
+ if (plen > len)
770
+ todo = len;
771
+ else
772
+ todo = plen;
773
+ encodes(res, ptr, todo, type, 1);
774
+ plen -= todo;
775
+ ptr += todo;
776
+ }
777
+ break;
778
+
779
+ case 'M': /* quoted-printable encoded string */
780
+ from = rb_obj_as_string(NEXTFROM);
781
+ if (len <= 1)
782
+ len = 72;
783
+ qpencode(res, from, len);
784
+ break;
785
+
786
+ case 'P': /* pointer to packed byte string */
787
+ from = THISFROM;
788
+ if (!NIL_P(from)) {
789
+ StringValue(from);
790
+ if (RSTRING_LEN(from) < len) {
791
+ rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
792
+ RSTRING_LEN(from), len);
793
+ }
794
+ }
795
+ len = 1;
796
+ /* FALL THROUGH */
797
+ case 'p': /* pointer to string */
798
+ while (len-- > 0) {
799
+ char *t;
800
+ from = NEXTFROM;
801
+ if (NIL_P(from)) {
802
+ t = 0;
803
+ }
804
+ else {
805
+ t = StringValuePtr(from);
806
+ rb_obj_taint(from);
807
+ }
808
+ if (!associates) {
809
+ associates = rb_ary_new();
810
+ }
811
+ rb_ary_push(associates, from);
812
+ rb_str_buf_cat(res, (char*)&t, sizeof(char*));
813
+ }
814
+ break;
815
+
816
+ case 'w': /* BER compressed integer */
817
+ while (len-- > 0) {
818
+ VALUE buf = rb_str_new(0, 0);
819
+ size_t numbytes;
820
+ int sign;
821
+ char *cp;
822
+
823
+ from = NEXTFROM;
824
+ from = rb_to_int(from);
825
+ numbytes = rb_absint_numwords(from, 7, NULL);
826
+ if (numbytes == 0)
827
+ numbytes = 1;
828
+ buf = rb_str_new(NULL, numbytes);
829
+
830
+ sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
831
+
832
+ if (sign < 0)
833
+ rb_raise(rb_eArgError, "can't compress negative numbers");
834
+ if (sign == 2)
835
+ rb_bug("buffer size problem?");
836
+
837
+ cp = RSTRING_PTR(buf);
838
+ while (1 < numbytes) {
839
+ *cp |= 0x80;
840
+ cp++;
841
+ numbytes--;
842
+ }
843
+
844
+ rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
845
+ }
846
+ break;
847
+
848
+ default: {
849
+ char unknown[5];
850
+ if (ISPRINT(type)) {
851
+ unknown[0] = type;
852
+ unknown[1] = '\0';
853
+ }
854
+ else {
855
+ snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
856
+ }
857
+ rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'",
858
+ unknown, fmt);
859
+ break;
860
+ }
861
+ }
862
+ }
863
+
864
+ if (associates) {
865
+ str_associate(res, associates);
866
+ }
867
+ OBJ_INFECT(res, fmt);
868
+ switch (enc_info) {
869
+ case 1:
870
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
871
+ break;
872
+ case 2:
873
+ rb_enc_set_index(res, rb_utf8_encindex());
874
+ break;
875
+ default:
876
+ /* do nothing, keep ASCII-8BIT */
877
+ break;
878
+ }
879
+ return res;
880
+ }
881
+
129
882
  static const char uu_table[] =
130
883
  "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
131
884
  static const char b64_table[] =
@@ -260,10 +1013,11 @@ hex2num(char c)
260
1013
  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
261
1014
  } while (0)
262
1015
 
263
- /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
1016
+ /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
1017
+ * 12.4/12.5/12.6 C compiler optimization bug
264
1018
  * with "-xO4" optimization option.
265
1019
  */
266
- #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
1020
+ #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
267
1021
  # define AVOID_CC_BUG volatile
268
1022
  #else
269
1023
  # define AVOID_CC_BUG
@@ -283,12 +1037,11 @@ infected_str_new(const char *ptr, long len, VALUE str)
283
1037
  #define UNPACK_BLOCK 1
284
1038
  #define UNPACK_1 2
285
1039
 
286
- #define castchar(from) (char)((from) & 0xff)
287
-
288
- VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1040
+ static VALUE
1041
+ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
289
1042
  {
290
1043
  #define hexdigits ruby_hexdigits
291
- char *init_s, *s, *send;
1044
+ char *s, *send;
292
1045
  char *p, *pend;
293
1046
  VALUE ary;
294
1047
  char type;
@@ -299,20 +1052,22 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
299
1052
  int natint; /* native integer */
300
1053
  #endif
301
1054
  int signed_p, integer_size, bigendian_p;
302
- int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
303
1055
  #define UNPACK_PUSH(item) do {\
304
1056
  VALUE item_val = (item);\
305
1057
  if ((mode) == UNPACK_BLOCK) {\
306
1058
  rb_yield(item_val);\
307
1059
  }\
308
- else {\
1060
+ else if ((mode) == UNPACK_ARRAY) {\
309
1061
  rb_ary_push(ary, item_val);\
310
1062
  }\
1063
+ else /* if ((mode) == UNPACK_1) { */ {\
1064
+ return item_val; \
1065
+ }\
311
1066
  } while (0)
312
1067
 
313
1068
  StringValue(str);
314
1069
  StringValue(fmt);
315
- init_s = s = RSTRING_PTR(str);
1070
+ s = RSTRING_PTR(str);
316
1071
  send = s + RSTRING_LEN(str);
317
1072
  p = RSTRING_PTR(fmt);
318
1073
  pend = p + RSTRING_LEN(fmt);
@@ -374,7 +1129,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
374
1129
  else if (ISDIGIT(*p)) {
375
1130
  errno = 0;
376
1131
  len = STRTOUL(p, (char**)&p, 10);
377
- if (errno) {
1132
+ if (len < 0 || errno) {
378
1133
  rb_raise(rb_eRangeError, "pack length too big");
379
1134
  }
380
1135
  }
@@ -845,6 +1600,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
845
1600
  {
846
1601
  VALUE buf = infected_str_new(0, send - s, str);
847
1602
  char *ptr = RSTRING_PTR(buf), *ss = s;
1603
+ int csum = 0;
848
1604
  int c1, c2;
849
1605
 
850
1606
  while (s < send) {
@@ -856,18 +1612,19 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
856
1612
  if ((c1 = hex2num(*s)) == -1) break;
857
1613
  if (++s == send) break;
858
1614
  if ((c2 = hex2num(*s)) == -1) break;
859
- *ptr++ = castchar(c1 << 4 | c2);
1615
+ csum |= *ptr++ = castchar(c1 << 4 | c2);
860
1616
  }
861
1617
  }
862
1618
  else {
863
- *ptr++ = *s;
1619
+ csum |= *ptr++ = *s;
864
1620
  }
865
1621
  s++;
866
1622
  ss = s;
867
1623
  }
868
1624
  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
869
1625
  rb_str_buf_cat(buf, ss, send-ss);
870
- ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
1626
+ csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1627
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
871
1628
  UNPACK_PUSH(buf);
872
1629
  }
873
1630
  break;
@@ -990,10 +1747,151 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
990
1747
  }
991
1748
  }
992
1749
 
993
- *parsed_len = s - init_s;
994
1750
  return ary;
995
1751
  }
996
1752
 
1753
+ /*
1754
+ * call-seq:
1755
+ * str.unpack(format) -> anArray
1756
+ *
1757
+ * Decodes <i>str</i> (which may contain binary data) according to the
1758
+ * format string, returning an array of each value extracted. The
1759
+ * format string consists of a sequence of single-character directives,
1760
+ * summarized in the table at the end of this entry.
1761
+ * Each directive may be followed
1762
+ * by a number, indicating the number of times to repeat with this
1763
+ * directive. An asterisk (``<code>*</code>'') will use up all
1764
+ * remaining elements. The directives <code>sSiIlL</code> may each be
1765
+ * followed by an underscore (``<code>_</code>'') or
1766
+ * exclamation mark (``<code>!</code>'') to use the underlying
1767
+ * platform's native size for the specified type; otherwise, it uses a
1768
+ * platform-independent consistent size. Spaces are ignored in the
1769
+ * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
1770
+ *
1771
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1772
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1773
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1774
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1775
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1776
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1777
+ * "now=20is".unpack('M*') #=> ["now is"]
1778
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1779
+ *
1780
+ * This table summarizes the various formats and the Ruby classes
1781
+ * returned by each.
1782
+ *
1783
+ * Integer | |
1784
+ * Directive | Returns | Meaning
1785
+ * ------------------------------------------------------------------
1786
+ * C | Integer | 8-bit unsigned (unsigned char)
1787
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
1788
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
1789
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1790
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
1791
+ * | |
1792
+ * c | Integer | 8-bit signed (signed char)
1793
+ * s | Integer | 16-bit signed, native endian (int16_t)
1794
+ * l | Integer | 32-bit signed, native endian (int32_t)
1795
+ * q | Integer | 64-bit signed, native endian (int64_t)
1796
+ * j | Integer | pointer width signed, native endian (intptr_t)
1797
+ * | |
1798
+ * S_ S! | Integer | unsigned short, native endian
1799
+ * I I_ I! | Integer | unsigned int, native endian
1800
+ * L_ L! | Integer | unsigned long, native endian
1801
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
1802
+ * | | if the platform has no long long type.)
1803
+ * J! | Integer | uintptr_t, native endian (same with J)
1804
+ * | |
1805
+ * s_ s! | Integer | signed short, native endian
1806
+ * i i_ i! | Integer | signed int, native endian
1807
+ * l_ l! | Integer | signed long, native endian
1808
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
1809
+ * | | if the platform has no long long type.)
1810
+ * j! | Integer | intptr_t, native endian (same with j)
1811
+ * | |
1812
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
1813
+ * L> l> L!> l!> | | big endian
1814
+ * I!> i!> | |
1815
+ * Q> q> Q!> q!> | | "S>" is same as "n"
1816
+ * J> j> J!> j!> | | "L>" is same as "N"
1817
+ * | |
1818
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
1819
+ * L< l< L!< l!< | | little endian
1820
+ * I!< i!< | |
1821
+ * Q< q< Q!< q!< | | "S<" is same as "v"
1822
+ * J< j< J!< j!< | | "L<" is same as "V"
1823
+ * | |
1824
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
1825
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
1826
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1827
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1828
+ * | |
1829
+ * U | Integer | UTF-8 character
1830
+ * w | Integer | BER-compressed integer (see Array.pack)
1831
+ *
1832
+ * Float | |
1833
+ * Directive | Returns | Meaning
1834
+ * -----------------------------------------------------------------
1835
+ * D d | Float | double-precision, native format
1836
+ * F f | Float | single-precision, native format
1837
+ * E | Float | double-precision, little-endian byte order
1838
+ * e | Float | single-precision, little-endian byte order
1839
+ * G | Float | double-precision, network (big-endian) byte order
1840
+ * g | Float | single-precision, network (big-endian) byte order
1841
+ *
1842
+ * String | |
1843
+ * Directive | Returns | Meaning
1844
+ * -----------------------------------------------------------------
1845
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1846
+ * a | String | arbitrary binary string
1847
+ * Z | String | null-terminated string
1848
+ * B | String | bit string (MSB first)
1849
+ * b | String | bit string (LSB first)
1850
+ * H | String | hex string (high nibble first)
1851
+ * h | String | hex string (low nibble first)
1852
+ * u | String | UU-encoded string
1853
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
1854
+ * m | String | base64 encoded string (RFC 2045) (default)
1855
+ * | | base64 encoded string (RFC 4648) if followed by 0
1856
+ * P | String | pointer to a structure (fixed-length string)
1857
+ * p | String | pointer to a null-terminated string
1858
+ *
1859
+ * Misc. | |
1860
+ * Directive | Returns | Meaning
1861
+ * -----------------------------------------------------------------
1862
+ * @ | --- | skip to the offset given by the length argument
1863
+ * X | --- | skip backward one byte
1864
+ * x | --- | skip forward one byte
1865
+ *
1866
+ * HISTORY
1867
+ *
1868
+ * * J, J! j, and j! are available since Ruby 2.3.
1869
+ * * Q_, Q!, q_, and q! are available since Ruby 2.1.
1870
+ * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
1871
+ */
1872
+
1873
+ static VALUE
1874
+ pack_unpack(VALUE str, VALUE fmt)
1875
+ {
1876
+ int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1877
+ return pack_unpack_internal(str, fmt, mode);
1878
+ }
1879
+
1880
+ /*
1881
+ * call-seq:
1882
+ * str.unpack1(format) -> obj
1883
+ *
1884
+ * Decodes <i>str</i> (which may contain binary data) according to the
1885
+ * format string, returning the first value extracted.
1886
+ * See also <code>String#unpack</code>, <code>Array#pack</code>.
1887
+ */
1888
+
1889
+ static VALUE
1890
+ pack_unpack1(VALUE str, VALUE fmt)
1891
+ {
1892
+ return pack_unpack_internal(str, fmt, UNPACK_1);
1893
+ }
1894
+
997
1895
  int
998
1896
  rb_uv_to_utf8(char buf[6], unsigned long uv)
999
1897
  {
@@ -1100,3 +1998,13 @@ utf8_to_uv(const char *p, long *lenp)
1100
1998
  }
1101
1999
  return uv;
1102
2000
  }
2001
+
2002
+ void
2003
+ Init_pack(void)
2004
+ {
2005
+ rb_define_method(rb_cArray, "pack", pack_pack, -1);
2006
+ rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2007
+ rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
2008
+
2009
+ id_associated = rb_make_internal_id();
2010
+ }