zscan 2.0.4 → 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,13 +2,14 @@
2
2
 
3
3
  pack.c -
4
4
 
5
- $Author$
5
+ $Author: naruse $
6
6
  created at: Thu Feb 10 15:17:05 JST 1994
7
7
 
8
8
  Copyright (C) 1993-2007 Yukihiro Matsumoto
9
9
 
10
10
  **********************************************************************/
11
11
 
12
+ #include "ruby/encoding.h"
12
13
  #include "internal.h"
13
14
  #include <sys/types.h>
14
15
  #include <ctype.h>
@@ -126,6 +127,758 @@ str_associated(VALUE str)
126
127
  return rb_ivar_lookup(str, id_associated, Qfalse);
127
128
  }
128
129
 
130
+ /*
131
+ * call-seq:
132
+ * arr.pack( aTemplateString ) -> aBinaryString
133
+ * arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
134
+ *
135
+ * Packs the contents of <i>arr</i> into a binary sequence according to
136
+ * the directives in <i>aTemplateString</i> (see the table below)
137
+ * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
138
+ * which gives the width of the resulting field. The remaining
139
+ * directives also may take a count, indicating the number of array
140
+ * elements to convert. If the count is an asterisk
141
+ * (``<code>*</code>''), all remaining array elements will be
142
+ * converted. Any of the directives ``<code>sSiIlL</code>'' may be
143
+ * followed by an underscore (``<code>_</code>'') or
144
+ * exclamation mark (``<code>!</code>'') to use the underlying
145
+ * platform's native size for the specified type; otherwise, they use a
146
+ * platform-independent size. Spaces are ignored in the template
147
+ * string. See also <code>String#unpack</code>.
148
+ *
149
+ * a = [ "a", "b", "c" ]
150
+ * n = [ 65, 66, 67 ]
151
+ * a.pack("A3A3A3") #=> "a b c "
152
+ * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
153
+ * n.pack("ccc") #=> "ABC"
154
+ *
155
+ * If <i>aBufferString</i> is specified and its capacity is enough,
156
+ * +pack+ uses it as the buffer and returns it.
157
+ * When the offset is specified by the beginning of <i>aTemplateString</i>,
158
+ * the result is filled after the offset.
159
+ * If original contents of <i>aBufferString</i> exists and it's longer than
160
+ * the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
161
+ * If it's shorter, the gap is filled with ``<code>\0</code>''.
162
+ *
163
+ * Note that ``buffer:'' option does not guarantee not to allocate memory
164
+ * in +pack+. If the capacity of <i>aBufferString</i> is not enough,
165
+ * +pack+ allocates memory.
166
+ *
167
+ * Directives for +pack+.
168
+ *
169
+ * Integer | Array |
170
+ * Directive | Element | Meaning
171
+ * ----------------------------------------------------------------------------
172
+ * C | Integer | 8-bit unsigned (unsigned char)
173
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
174
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
175
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
176
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
177
+ * | | (J is available since Ruby 2.3.)
178
+ * | |
179
+ * c | Integer | 8-bit signed (signed char)
180
+ * s | Integer | 16-bit signed, native endian (int16_t)
181
+ * l | Integer | 32-bit signed, native endian (int32_t)
182
+ * q | Integer | 64-bit signed, native endian (int64_t)
183
+ * j | Integer | pointer width signed, native endian (intptr_t)
184
+ * | | (j is available since Ruby 2.3.)
185
+ * | |
186
+ * S_ S! | Integer | unsigned short, native endian
187
+ * I I_ I! | Integer | unsigned int, native endian
188
+ * L_ L! | Integer | unsigned long, native endian
189
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
190
+ * | | if the platform has no long long type.)
191
+ * | | (Q_ and Q! is available since Ruby 2.1.)
192
+ * J! | Integer | uintptr_t, native endian (same with J)
193
+ * | | (J! is available since Ruby 2.3.)
194
+ * | |
195
+ * s_ s! | Integer | signed short, native endian
196
+ * i i_ i! | Integer | signed int, native endian
197
+ * l_ l! | Integer | signed long, native endian
198
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
199
+ * | | if the platform has no long long type.)
200
+ * | | (q_ and q! is available since Ruby 2.1.)
201
+ * j! | Integer | intptr_t, native endian (same with j)
202
+ * | | (j! is available since Ruby 2.3.)
203
+ * | |
204
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
205
+ * L> l> L!> l!> | | big endian
206
+ * I!> i!> | | (available since Ruby 1.9.3)
207
+ * Q> q> Q!> q!> | | "S>" is same as "n"
208
+ * J> j> J!> j!> | | "L>" is same as "N"
209
+ * | |
210
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
211
+ * L< l< L!< l!< | | little endian
212
+ * I!< i!< | | (available since Ruby 1.9.3)
213
+ * Q< q< Q!< q!< | | "S<" is same as "v"
214
+ * J< j< J!< j!< | | "L<" is same as "V"
215
+ * | |
216
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
217
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
218
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
219
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
220
+ * | |
221
+ * U | Integer | UTF-8 character
222
+ * w | Integer | BER-compressed integer
223
+ *
224
+ * Float | Array |
225
+ * Directive | Element | Meaning
226
+ * ---------------------------------------------------------------------------
227
+ * D d | Float | double-precision, native format
228
+ * F f | Float | single-precision, native format
229
+ * E | Float | double-precision, little-endian byte order
230
+ * e | Float | single-precision, little-endian byte order
231
+ * G | Float | double-precision, network (big-endian) byte order
232
+ * g | Float | single-precision, network (big-endian) byte order
233
+ *
234
+ * String | Array |
235
+ * Directive | Element | Meaning
236
+ * ---------------------------------------------------------------------------
237
+ * A | String | arbitrary binary string (space padded, count is width)
238
+ * a | String | arbitrary binary string (null padded, count is width)
239
+ * Z | String | same as ``a'', except that null is added with *
240
+ * B | String | bit string (MSB first)
241
+ * b | String | bit string (LSB first)
242
+ * H | String | hex string (high nibble first)
243
+ * h | String | hex string (low nibble first)
244
+ * u | String | UU-encoded string
245
+ * M | String | quoted printable, MIME encoding (see also RFC2045)
246
+ * | | (text mode but input must use LF and output LF)
247
+ * m | String | base64 encoded string (see RFC 2045, count is width)
248
+ * | | (if count is 0, no line feed are added, see RFC 4648)
249
+ * P | String | pointer to a structure (fixed-length string)
250
+ * p | String | pointer to a null-terminated string
251
+ *
252
+ * Misc. | Array |
253
+ * Directive | Element | Meaning
254
+ * ---------------------------------------------------------------------------
255
+ * @ | --- | moves to absolute position
256
+ * X | --- | back up a byte
257
+ * x | --- | null byte
258
+ */
259
+
260
+ static VALUE
261
+ pack_pack(int argc, VALUE *argv, VALUE ary)
262
+ {
263
+ static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
264
+ static const char spc10[] = " ";
265
+ const char *p, *pend;
266
+ VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0;
267
+ char type;
268
+ long len, idx, plen;
269
+ const char *ptr;
270
+ int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
271
+ #ifdef NATINT_PACK
272
+ int natint; /* native integer */
273
+ #endif
274
+ int integer_size, bigendian_p;
275
+
276
+ rb_scan_args(argc, argv, "10:", &fmt, &opt);
277
+
278
+ StringValue(fmt);
279
+ p = RSTRING_PTR(fmt);
280
+ pend = p + RSTRING_LEN(fmt);
281
+ if (!NIL_P(opt)) {
282
+ static ID keyword_ids[1];
283
+ if (!keyword_ids[0])
284
+ CONST_ID(keyword_ids[0], "buffer");
285
+
286
+ rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer);
287
+
288
+ if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING))
289
+ rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
290
+ }
291
+ if (buffer)
292
+ res = buffer;
293
+ else
294
+ res = rb_str_buf_new(0);
295
+
296
+ idx = 0;
297
+
298
+ #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
299
+ #define MORE_ITEM (idx < RARRAY_LEN(ary))
300
+ #define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
301
+ #define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
302
+
303
+ while (p < pend) {
304
+ int explicit_endian = 0;
305
+ if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
306
+ rb_raise(rb_eRuntimeError, "format string modified");
307
+ }
308
+ type = *p++; /* get data type */
309
+ #ifdef NATINT_PACK
310
+ natint = 0;
311
+ #endif
312
+
313
+ if (ISSPACE(type)) continue;
314
+ if (type == '#') {
315
+ while ((p < pend) && (*p != '\n')) {
316
+ p++;
317
+ }
318
+ continue;
319
+ }
320
+
321
+ {
322
+ modifiers:
323
+ switch (*p) {
324
+ case '_':
325
+ case '!':
326
+ if (strchr(natstr, type)) {
327
+ #ifdef NATINT_PACK
328
+ natint = 1;
329
+ #endif
330
+ p++;
331
+ }
332
+ else {
333
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
334
+ }
335
+ goto modifiers;
336
+
337
+ case '<':
338
+ case '>':
339
+ if (!strchr(endstr, type)) {
340
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
341
+ }
342
+ if (explicit_endian) {
343
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
344
+ }
345
+ explicit_endian = *p++;
346
+ goto modifiers;
347
+ }
348
+ }
349
+
350
+ if (*p == '*') { /* set data length */
351
+ len = strchr("@Xxu", type) ? 0
352
+ : strchr("PMm", type) ? 1
353
+ : RARRAY_LEN(ary) - idx;
354
+ p++;
355
+ }
356
+ else if (ISDIGIT(*p)) {
357
+ errno = 0;
358
+ len = STRTOUL(p, (char**)&p, 10);
359
+ if (errno) {
360
+ rb_raise(rb_eRangeError, "pack length too big");
361
+ }
362
+ }
363
+ else {
364
+ len = 1;
365
+ }
366
+
367
+ switch (type) {
368
+ case 'U':
369
+ /* if encoding is US-ASCII, upgrade to UTF-8 */
370
+ if (enc_info == 1) enc_info = 2;
371
+ break;
372
+ case 'm': case 'M': case 'u':
373
+ /* keep US-ASCII (do nothing) */
374
+ break;
375
+ default:
376
+ /* fall back to BINARY */
377
+ enc_info = 0;
378
+ break;
379
+ }
380
+ switch (type) {
381
+ case 'A': case 'a': case 'Z':
382
+ case 'B': case 'b':
383
+ case 'H': case 'h':
384
+ from = NEXTFROM;
385
+ if (NIL_P(from)) {
386
+ ptr = "";
387
+ plen = 0;
388
+ }
389
+ else {
390
+ StringValue(from);
391
+ ptr = RSTRING_PTR(from);
392
+ plen = RSTRING_LEN(from);
393
+ OBJ_INFECT(res, from);
394
+ }
395
+
396
+ if (p[-1] == '*')
397
+ len = plen;
398
+
399
+ switch (type) {
400
+ case 'a': /* arbitrary binary string (null padded) */
401
+ case 'A': /* arbitrary binary string (ASCII space padded) */
402
+ case 'Z': /* null terminated string */
403
+ if (plen >= len) {
404
+ rb_str_buf_cat(res, ptr, len);
405
+ if (p[-1] == '*' && type == 'Z')
406
+ rb_str_buf_cat(res, nul10, 1);
407
+ }
408
+ else {
409
+ rb_str_buf_cat(res, ptr, plen);
410
+ len -= plen;
411
+ while (len >= 10) {
412
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
413
+ len -= 10;
414
+ }
415
+ rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
416
+ }
417
+ break;
418
+
419
+ #define castchar(from) (char)((from) & 0xff)
420
+
421
+ case 'b': /* bit string (ascending) */
422
+ {
423
+ int byte = 0;
424
+ long i, j = 0;
425
+
426
+ if (len > plen) {
427
+ j = (len - plen + 1)/2;
428
+ len = plen;
429
+ }
430
+ for (i=0; i++ < len; ptr++) {
431
+ if (*ptr & 1)
432
+ byte |= 128;
433
+ if (i & 7)
434
+ byte >>= 1;
435
+ else {
436
+ char c = castchar(byte);
437
+ rb_str_buf_cat(res, &c, 1);
438
+ byte = 0;
439
+ }
440
+ }
441
+ if (len & 7) {
442
+ char c;
443
+ byte >>= 7 - (len & 7);
444
+ c = castchar(byte);
445
+ rb_str_buf_cat(res, &c, 1);
446
+ }
447
+ len = j;
448
+ goto grow;
449
+ }
450
+ break;
451
+
452
+ case 'B': /* bit string (descending) */
453
+ {
454
+ int byte = 0;
455
+ long i, j = 0;
456
+
457
+ if (len > plen) {
458
+ j = (len - plen + 1)/2;
459
+ len = plen;
460
+ }
461
+ for (i=0; i++ < len; ptr++) {
462
+ byte |= *ptr & 1;
463
+ if (i & 7)
464
+ byte <<= 1;
465
+ else {
466
+ char c = castchar(byte);
467
+ rb_str_buf_cat(res, &c, 1);
468
+ byte = 0;
469
+ }
470
+ }
471
+ if (len & 7) {
472
+ char c;
473
+ byte <<= 7 - (len & 7);
474
+ c = castchar(byte);
475
+ rb_str_buf_cat(res, &c, 1);
476
+ }
477
+ len = j;
478
+ goto grow;
479
+ }
480
+ break;
481
+
482
+ case 'h': /* hex string (low nibble first) */
483
+ {
484
+ int byte = 0;
485
+ long i, j = 0;
486
+
487
+ if (len > plen) {
488
+ j = (len + 1) / 2 - (plen + 1) / 2;
489
+ len = plen;
490
+ }
491
+ for (i=0; i++ < len; ptr++) {
492
+ if (ISALPHA(*ptr))
493
+ byte |= (((*ptr & 15) + 9) & 15) << 4;
494
+ else
495
+ byte |= (*ptr & 15) << 4;
496
+ if (i & 1)
497
+ byte >>= 4;
498
+ else {
499
+ char c = castchar(byte);
500
+ rb_str_buf_cat(res, &c, 1);
501
+ byte = 0;
502
+ }
503
+ }
504
+ if (len & 1) {
505
+ char c = castchar(byte);
506
+ rb_str_buf_cat(res, &c, 1);
507
+ }
508
+ len = j;
509
+ goto grow;
510
+ }
511
+ break;
512
+
513
+ case 'H': /* hex string (high nibble first) */
514
+ {
515
+ int byte = 0;
516
+ long i, j = 0;
517
+
518
+ if (len > plen) {
519
+ j = (len + 1) / 2 - (plen + 1) / 2;
520
+ len = plen;
521
+ }
522
+ for (i=0; i++ < len; ptr++) {
523
+ if (ISALPHA(*ptr))
524
+ byte |= ((*ptr & 15) + 9) & 15;
525
+ else
526
+ byte |= *ptr & 15;
527
+ if (i & 1)
528
+ byte <<= 4;
529
+ else {
530
+ char c = castchar(byte);
531
+ rb_str_buf_cat(res, &c, 1);
532
+ byte = 0;
533
+ }
534
+ }
535
+ if (len & 1) {
536
+ char c = castchar(byte);
537
+ rb_str_buf_cat(res, &c, 1);
538
+ }
539
+ len = j;
540
+ goto grow;
541
+ }
542
+ break;
543
+ }
544
+ break;
545
+
546
+ case 'c': /* signed char */
547
+ case 'C': /* unsigned char */
548
+ integer_size = 1;
549
+ bigendian_p = BIGENDIAN_P(); /* not effective */
550
+ goto pack_integer;
551
+
552
+ case 's': /* s for int16_t, s! for signed short */
553
+ integer_size = NATINT_LEN(short, 2);
554
+ bigendian_p = BIGENDIAN_P();
555
+ goto pack_integer;
556
+
557
+ case 'S': /* S for uint16_t, S! for unsigned short */
558
+ integer_size = NATINT_LEN(short, 2);
559
+ bigendian_p = BIGENDIAN_P();
560
+ goto pack_integer;
561
+
562
+ case 'i': /* i and i! for signed int */
563
+ integer_size = (int)sizeof(int);
564
+ bigendian_p = BIGENDIAN_P();
565
+ goto pack_integer;
566
+
567
+ case 'I': /* I and I! for unsigned int */
568
+ integer_size = (int)sizeof(int);
569
+ bigendian_p = BIGENDIAN_P();
570
+ goto pack_integer;
571
+
572
+ case 'l': /* l for int32_t, l! for signed long */
573
+ integer_size = NATINT_LEN(long, 4);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto pack_integer;
576
+
577
+ case 'L': /* L for uint32_t, L! for unsigned long */
578
+ integer_size = NATINT_LEN(long, 4);
579
+ bigendian_p = BIGENDIAN_P();
580
+ goto pack_integer;
581
+
582
+ case 'q': /* q for int64_t, q! for signed long long */
583
+ integer_size = NATINT_LEN_Q;
584
+ bigendian_p = BIGENDIAN_P();
585
+ goto pack_integer;
586
+
587
+ case 'Q': /* Q for uint64_t, Q! for unsigned long long */
588
+ integer_size = NATINT_LEN_Q;
589
+ bigendian_p = BIGENDIAN_P();
590
+ goto pack_integer;
591
+
592
+ case 'j': /* j for intptr_t */
593
+ integer_size = sizeof(intptr_t);
594
+ bigendian_p = BIGENDIAN_P();
595
+ goto pack_integer;
596
+
597
+ case 'J': /* J for uintptr_t */
598
+ integer_size = sizeof(uintptr_t);
599
+ bigendian_p = BIGENDIAN_P();
600
+ goto pack_integer;
601
+
602
+ case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
603
+ integer_size = 2;
604
+ bigendian_p = 1;
605
+ goto pack_integer;
606
+
607
+ case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
608
+ integer_size = 4;
609
+ bigendian_p = 1;
610
+ goto pack_integer;
611
+
612
+ case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
613
+ integer_size = 2;
614
+ bigendian_p = 0;
615
+ goto pack_integer;
616
+
617
+ case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
618
+ integer_size = 4;
619
+ bigendian_p = 0;
620
+ goto pack_integer;
621
+
622
+ pack_integer:
623
+ if (explicit_endian) {
624
+ bigendian_p = explicit_endian == '>';
625
+ }
626
+ if (integer_size > MAX_INTEGER_PACK_SIZE)
627
+ rb_bug("unexpected intger size for pack: %d", integer_size);
628
+ while (len-- > 0) {
629
+ char intbuf[MAX_INTEGER_PACK_SIZE];
630
+
631
+ from = NEXTFROM;
632
+ rb_integer_pack(from, intbuf, integer_size, 1, 0,
633
+ INTEGER_PACK_2COMP |
634
+ (bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
635
+ rb_str_buf_cat(res, intbuf, integer_size);
636
+ }
637
+ break;
638
+
639
+ case 'f': /* single precision float in native format */
640
+ case 'F': /* ditto */
641
+ while (len-- > 0) {
642
+ float f;
643
+
644
+ from = NEXTFROM;
645
+ f = (float)RFLOAT_VALUE(rb_to_float(from));
646
+ rb_str_buf_cat(res, (char*)&f, sizeof(float));
647
+ }
648
+ break;
649
+
650
+ case 'e': /* single precision float in VAX byte-order */
651
+ while (len-- > 0) {
652
+ FLOAT_CONVWITH(tmp);
653
+
654
+ from = NEXTFROM;
655
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
656
+ HTOVF(tmp);
657
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
658
+ }
659
+ break;
660
+
661
+ case 'E': /* double precision float in VAX byte-order */
662
+ while (len-- > 0) {
663
+ DOUBLE_CONVWITH(tmp);
664
+ from = NEXTFROM;
665
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
666
+ HTOVD(tmp);
667
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
668
+ }
669
+ break;
670
+
671
+ case 'd': /* double precision float in native format */
672
+ case 'D': /* ditto */
673
+ while (len-- > 0) {
674
+ double d;
675
+
676
+ from = NEXTFROM;
677
+ d = RFLOAT_VALUE(rb_to_float(from));
678
+ rb_str_buf_cat(res, (char*)&d, sizeof(double));
679
+ }
680
+ break;
681
+
682
+ case 'g': /* single precision float in network byte-order */
683
+ while (len-- > 0) {
684
+ FLOAT_CONVWITH(tmp);
685
+ from = NEXTFROM;
686
+ tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
687
+ HTONF(tmp);
688
+ rb_str_buf_cat(res, tmp.buf, sizeof(float));
689
+ }
690
+ break;
691
+
692
+ case 'G': /* double precision float in network byte-order */
693
+ while (len-- > 0) {
694
+ DOUBLE_CONVWITH(tmp);
695
+
696
+ from = NEXTFROM;
697
+ tmp.d = RFLOAT_VALUE(rb_to_float(from));
698
+ HTOND(tmp);
699
+ rb_str_buf_cat(res, tmp.buf, sizeof(double));
700
+ }
701
+ break;
702
+
703
+ case 'x': /* null byte */
704
+ grow:
705
+ while (len >= 10) {
706
+ rb_str_buf_cat(res, nul10, 10);
707
+ len -= 10;
708
+ }
709
+ rb_str_buf_cat(res, nul10, len);
710
+ break;
711
+
712
+ case 'X': /* back up byte */
713
+ shrink:
714
+ plen = RSTRING_LEN(res);
715
+ if (plen < len)
716
+ rb_raise(rb_eArgError, "X outside of string");
717
+ rb_str_set_len(res, plen - len);
718
+ break;
719
+
720
+ case '@': /* null fill to absolute position */
721
+ len -= RSTRING_LEN(res);
722
+ if (len > 0) goto grow;
723
+ len = -len;
724
+ if (len > 0) goto shrink;
725
+ break;
726
+
727
+ case '%':
728
+ rb_raise(rb_eArgError, "%% is not supported");
729
+ break;
730
+
731
+ case 'U': /* Unicode character */
732
+ while (len-- > 0) {
733
+ SIGNED_VALUE l;
734
+ char buf[8];
735
+ int le;
736
+
737
+ from = NEXTFROM;
738
+ from = rb_to_int(from);
739
+ l = NUM2LONG(from);
740
+ if (l < 0) {
741
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
742
+ }
743
+ le = rb_uv_to_utf8(buf, l);
744
+ rb_str_buf_cat(res, (char*)buf, le);
745
+ }
746
+ break;
747
+
748
+ case 'u': /* uuencoded string */
749
+ case 'm': /* base64 encoded string */
750
+ from = NEXTFROM;
751
+ StringValue(from);
752
+ ptr = RSTRING_PTR(from);
753
+ plen = RSTRING_LEN(from);
754
+
755
+ if (len == 0 && type == 'm') {
756
+ encodes(res, ptr, plen, type, 0);
757
+ ptr += plen;
758
+ break;
759
+ }
760
+ if (len <= 2)
761
+ len = 45;
762
+ else if (len > 63 && type == 'u')
763
+ len = 63;
764
+ else
765
+ len = len / 3 * 3;
766
+ while (plen > 0) {
767
+ long todo;
768
+
769
+ if (plen > len)
770
+ todo = len;
771
+ else
772
+ todo = plen;
773
+ encodes(res, ptr, todo, type, 1);
774
+ plen -= todo;
775
+ ptr += todo;
776
+ }
777
+ break;
778
+
779
+ case 'M': /* quoted-printable encoded string */
780
+ from = rb_obj_as_string(NEXTFROM);
781
+ if (len <= 1)
782
+ len = 72;
783
+ qpencode(res, from, len);
784
+ break;
785
+
786
+ case 'P': /* pointer to packed byte string */
787
+ from = THISFROM;
788
+ if (!NIL_P(from)) {
789
+ StringValue(from);
790
+ if (RSTRING_LEN(from) < len) {
791
+ rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
792
+ RSTRING_LEN(from), len);
793
+ }
794
+ }
795
+ len = 1;
796
+ /* FALL THROUGH */
797
+ case 'p': /* pointer to string */
798
+ while (len-- > 0) {
799
+ char *t;
800
+ from = NEXTFROM;
801
+ if (NIL_P(from)) {
802
+ t = 0;
803
+ }
804
+ else {
805
+ t = StringValuePtr(from);
806
+ rb_obj_taint(from);
807
+ }
808
+ if (!associates) {
809
+ associates = rb_ary_new();
810
+ }
811
+ rb_ary_push(associates, from);
812
+ rb_str_buf_cat(res, (char*)&t, sizeof(char*));
813
+ }
814
+ break;
815
+
816
+ case 'w': /* BER compressed integer */
817
+ while (len-- > 0) {
818
+ VALUE buf = rb_str_new(0, 0);
819
+ size_t numbytes;
820
+ int sign;
821
+ char *cp;
822
+
823
+ from = NEXTFROM;
824
+ from = rb_to_int(from);
825
+ numbytes = rb_absint_numwords(from, 7, NULL);
826
+ if (numbytes == 0)
827
+ numbytes = 1;
828
+ buf = rb_str_new(NULL, numbytes);
829
+
830
+ sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
831
+
832
+ if (sign < 0)
833
+ rb_raise(rb_eArgError, "can't compress negative numbers");
834
+ if (sign == 2)
835
+ rb_bug("buffer size problem?");
836
+
837
+ cp = RSTRING_PTR(buf);
838
+ while (1 < numbytes) {
839
+ *cp |= 0x80;
840
+ cp++;
841
+ numbytes--;
842
+ }
843
+
844
+ rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
845
+ }
846
+ break;
847
+
848
+ default: {
849
+ char unknown[5];
850
+ if (ISPRINT(type)) {
851
+ unknown[0] = type;
852
+ unknown[1] = '\0';
853
+ }
854
+ else {
855
+ snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
856
+ }
857
+ rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'",
858
+ unknown, fmt);
859
+ break;
860
+ }
861
+ }
862
+ }
863
+
864
+ if (associates) {
865
+ str_associate(res, associates);
866
+ }
867
+ OBJ_INFECT(res, fmt);
868
+ switch (enc_info) {
869
+ case 1:
870
+ ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
871
+ break;
872
+ case 2:
873
+ rb_enc_set_index(res, rb_utf8_encindex());
874
+ break;
875
+ default:
876
+ /* do nothing, keep ASCII-8BIT */
877
+ break;
878
+ }
879
+ return res;
880
+ }
881
+
129
882
  static const char uu_table[] =
130
883
  "`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
131
884
  static const char b64_table[] =
@@ -260,10 +1013,11 @@ hex2num(char c)
260
1013
  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
261
1014
  } while (0)
262
1015
 
263
- /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
1016
+ /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
1017
+ * 12.4/12.5/12.6 C compiler optimization bug
264
1018
  * with "-xO4" optimization option.
265
1019
  */
266
- #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
1020
+ #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
267
1021
  # define AVOID_CC_BUG volatile
268
1022
  #else
269
1023
  # define AVOID_CC_BUG
@@ -283,12 +1037,11 @@ infected_str_new(const char *ptr, long len, VALUE str)
283
1037
  #define UNPACK_BLOCK 1
284
1038
  #define UNPACK_1 2
285
1039
 
286
- #define castchar(from) (char)((from) & 0xff)
287
-
288
- VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1040
+ static VALUE
1041
+ pack_unpack_internal(VALUE str, VALUE fmt, int mode)
289
1042
  {
290
1043
  #define hexdigits ruby_hexdigits
291
- char *init_s, *s, *send;
1044
+ char *s, *send;
292
1045
  char *p, *pend;
293
1046
  VALUE ary;
294
1047
  char type;
@@ -299,20 +1052,22 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
299
1052
  int natint; /* native integer */
300
1053
  #endif
301
1054
  int signed_p, integer_size, bigendian_p;
302
- int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
303
1055
  #define UNPACK_PUSH(item) do {\
304
1056
  VALUE item_val = (item);\
305
1057
  if ((mode) == UNPACK_BLOCK) {\
306
1058
  rb_yield(item_val);\
307
1059
  }\
308
- else {\
1060
+ else if ((mode) == UNPACK_ARRAY) {\
309
1061
  rb_ary_push(ary, item_val);\
310
1062
  }\
1063
+ else /* if ((mode) == UNPACK_1) { */ {\
1064
+ return item_val; \
1065
+ }\
311
1066
  } while (0)
312
1067
 
313
1068
  StringValue(str);
314
1069
  StringValue(fmt);
315
- init_s = s = RSTRING_PTR(str);
1070
+ s = RSTRING_PTR(str);
316
1071
  send = s + RSTRING_LEN(str);
317
1072
  p = RSTRING_PTR(fmt);
318
1073
  pend = p + RSTRING_LEN(fmt);
@@ -374,7 +1129,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
374
1129
  else if (ISDIGIT(*p)) {
375
1130
  errno = 0;
376
1131
  len = STRTOUL(p, (char**)&p, 10);
377
- if (errno) {
1132
+ if (len < 0 || errno) {
378
1133
  rb_raise(rb_eRangeError, "pack length too big");
379
1134
  }
380
1135
  }
@@ -845,6 +1600,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
845
1600
  {
846
1601
  VALUE buf = infected_str_new(0, send - s, str);
847
1602
  char *ptr = RSTRING_PTR(buf), *ss = s;
1603
+ int csum = 0;
848
1604
  int c1, c2;
849
1605
 
850
1606
  while (s < send) {
@@ -856,18 +1612,19 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
856
1612
  if ((c1 = hex2num(*s)) == -1) break;
857
1613
  if (++s == send) break;
858
1614
  if ((c2 = hex2num(*s)) == -1) break;
859
- *ptr++ = castchar(c1 << 4 | c2);
1615
+ csum |= *ptr++ = castchar(c1 << 4 | c2);
860
1616
  }
861
1617
  }
862
1618
  else {
863
- *ptr++ = *s;
1619
+ csum |= *ptr++ = *s;
864
1620
  }
865
1621
  s++;
866
1622
  ss = s;
867
1623
  }
868
1624
  rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
869
1625
  rb_str_buf_cat(buf, ss, send-ss);
870
- ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), ENC_CODERANGE_VALID);
1626
+ csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
1627
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
871
1628
  UNPACK_PUSH(buf);
872
1629
  }
873
1630
  break;
@@ -990,10 +1747,151 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
990
1747
  }
991
1748
  }
992
1749
 
993
- *parsed_len = s - init_s;
994
1750
  return ary;
995
1751
  }
996
1752
 
1753
+ /*
1754
+ * call-seq:
1755
+ * str.unpack(format) -> anArray
1756
+ *
1757
+ * Decodes <i>str</i> (which may contain binary data) according to the
1758
+ * format string, returning an array of each value extracted. The
1759
+ * format string consists of a sequence of single-character directives,
1760
+ * summarized in the table at the end of this entry.
1761
+ * Each directive may be followed
1762
+ * by a number, indicating the number of times to repeat with this
1763
+ * directive. An asterisk (``<code>*</code>'') will use up all
1764
+ * remaining elements. The directives <code>sSiIlL</code> may each be
1765
+ * followed by an underscore (``<code>_</code>'') or
1766
+ * exclamation mark (``<code>!</code>'') to use the underlying
1767
+ * platform's native size for the specified type; otherwise, it uses a
1768
+ * platform-independent consistent size. Spaces are ignored in the
1769
+ * format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
1770
+ *
1771
+ * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1772
+ * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1773
+ * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1774
+ * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1775
+ * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1776
+ * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1777
+ * "now=20is".unpack('M*') #=> ["now is"]
1778
+ * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1779
+ *
1780
+ * This table summarizes the various formats and the Ruby classes
1781
+ * returned by each.
1782
+ *
1783
+ * Integer | |
1784
+ * Directive | Returns | Meaning
1785
+ * ------------------------------------------------------------------
1786
+ * C | Integer | 8-bit unsigned (unsigned char)
1787
+ * S | Integer | 16-bit unsigned, native endian (uint16_t)
1788
+ * L | Integer | 32-bit unsigned, native endian (uint32_t)
1789
+ * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1790
+ * J | Integer | pointer width unsigned, native endian (uintptr_t)
1791
+ * | |
1792
+ * c | Integer | 8-bit signed (signed char)
1793
+ * s | Integer | 16-bit signed, native endian (int16_t)
1794
+ * l | Integer | 32-bit signed, native endian (int32_t)
1795
+ * q | Integer | 64-bit signed, native endian (int64_t)
1796
+ * j | Integer | pointer width signed, native endian (intptr_t)
1797
+ * | |
1798
+ * S_ S! | Integer | unsigned short, native endian
1799
+ * I I_ I! | Integer | unsigned int, native endian
1800
+ * L_ L! | Integer | unsigned long, native endian
1801
+ * Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
1802
+ * | | if the platform has no long long type.)
1803
+ * J! | Integer | uintptr_t, native endian (same with J)
1804
+ * | |
1805
+ * s_ s! | Integer | signed short, native endian
1806
+ * i i_ i! | Integer | signed int, native endian
1807
+ * l_ l! | Integer | signed long, native endian
1808
+ * q_ q! | Integer | signed long long, native endian (ArgumentError
1809
+ * | | if the platform has no long long type.)
1810
+ * j! | Integer | intptr_t, native endian (same with j)
1811
+ * | |
1812
+ * S> s> S!> s!> | Integer | same as the directives without ">" except
1813
+ * L> l> L!> l!> | | big endian
1814
+ * I!> i!> | |
1815
+ * Q> q> Q!> q!> | | "S>" is same as "n"
1816
+ * J> j> J!> j!> | | "L>" is same as "N"
1817
+ * | |
1818
+ * S< s< S!< s!< | Integer | same as the directives without "<" except
1819
+ * L< l< L!< l!< | | little endian
1820
+ * I!< i!< | |
1821
+ * Q< q< Q!< q!< | | "S<" is same as "v"
1822
+ * J< j< J!< j!< | | "L<" is same as "V"
1823
+ * | |
1824
+ * n | Integer | 16-bit unsigned, network (big-endian) byte order
1825
+ * N | Integer | 32-bit unsigned, network (big-endian) byte order
1826
+ * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1827
+ * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1828
+ * | |
1829
+ * U | Integer | UTF-8 character
1830
+ * w | Integer | BER-compressed integer (see Array.pack)
1831
+ *
1832
+ * Float | |
1833
+ * Directive | Returns | Meaning
1834
+ * -----------------------------------------------------------------
1835
+ * D d | Float | double-precision, native format
1836
+ * F f | Float | single-precision, native format
1837
+ * E | Float | double-precision, little-endian byte order
1838
+ * e | Float | single-precision, little-endian byte order
1839
+ * G | Float | double-precision, network (big-endian) byte order
1840
+ * g | Float | single-precision, network (big-endian) byte order
1841
+ *
1842
+ * String | |
1843
+ * Directive | Returns | Meaning
1844
+ * -----------------------------------------------------------------
1845
+ * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1846
+ * a | String | arbitrary binary string
1847
+ * Z | String | null-terminated string
1848
+ * B | String | bit string (MSB first)
1849
+ * b | String | bit string (LSB first)
1850
+ * H | String | hex string (high nibble first)
1851
+ * h | String | hex string (low nibble first)
1852
+ * u | String | UU-encoded string
1853
+ * M | String | quoted-printable, MIME encoding (see RFC2045)
1854
+ * m | String | base64 encoded string (RFC 2045) (default)
1855
+ * | | base64 encoded string (RFC 4648) if followed by 0
1856
+ * P | String | pointer to a structure (fixed-length string)
1857
+ * p | String | pointer to a null-terminated string
1858
+ *
1859
+ * Misc. | |
1860
+ * Directive | Returns | Meaning
1861
+ * -----------------------------------------------------------------
1862
+ * @ | --- | skip to the offset given by the length argument
1863
+ * X | --- | skip backward one byte
1864
+ * x | --- | skip forward one byte
1865
+ *
1866
+ * HISTORY
1867
+ *
1868
+ * * J, J! j, and j! are available since Ruby 2.3.
1869
+ * * Q_, Q!, q_, and q! are available since Ruby 2.1.
1870
+ * * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
1871
+ */
1872
+
1873
+ static VALUE
1874
+ pack_unpack(VALUE str, VALUE fmt)
1875
+ {
1876
+ int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
1877
+ return pack_unpack_internal(str, fmt, mode);
1878
+ }
1879
+
1880
+ /*
1881
+ * call-seq:
1882
+ * str.unpack1(format) -> obj
1883
+ *
1884
+ * Decodes <i>str</i> (which may contain binary data) according to the
1885
+ * format string, returning the first value extracted.
1886
+ * See also <code>String#unpack</code>, <code>Array#pack</code>.
1887
+ */
1888
+
1889
+ static VALUE
1890
+ pack_unpack1(VALUE str, VALUE fmt)
1891
+ {
1892
+ return pack_unpack_internal(str, fmt, UNPACK_1);
1893
+ }
1894
+
997
1895
  int
998
1896
  rb_uv_to_utf8(char buf[6], unsigned long uv)
999
1897
  {
@@ -1100,3 +1998,13 @@ utf8_to_uv(const char *p, long *lenp)
1100
1998
  }
1101
1999
  return uv;
1102
2000
  }
2001
+
2002
+ void
2003
+ Init_pack(void)
2004
+ {
2005
+ rb_define_method(rb_cArray, "pack", pack_pack, -1);
2006
+ rb_define_method(rb_cString, "unpack", pack_unpack, 1);
2007
+ rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
2008
+
2009
+ id_associated = rb_make_internal_id();
2010
+ }