zscan 2.0.3 → 2.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/pack/pack.c CHANGED
@@ -9,8 +9,6 @@
9
9
 
10
10
  **********************************************************************/
11
11
 
12
- #include "ruby/ruby.h"
13
- #include "ruby/encoding.h"
14
12
  #include "internal.h"
15
13
  #include <sys/types.h>
16
14
  #include <ctype.h>
@@ -25,11 +23,11 @@
25
23
  * This behavior is consistent with the document of pack/unpack.
26
24
  */
27
25
  #ifdef HAVE_TRUE_LONG_LONG
28
- static const char natstr[] = "sSiIlLqQ";
26
+ static const char natstr[] = "sSiIlLqQjJ";
29
27
  #else
30
- static const char natstr[] = "sSiIlL";
28
+ static const char natstr[] = "sSiIlLjJ";
31
29
  #endif
32
- static const char endstr[] = "sSiIlLqQ";
30
+ static const char endstr[] = "sSiIlLqQjJ";
33
31
 
34
32
  #ifdef HAVE_TRUE_LONG_LONG
35
33
  /* It is intentional to use long long instead of LONG_LONG. */
@@ -70,129 +68,18 @@ static const char endstr[] = "sSiIlLqQ";
70
68
  # define NATINT_LEN(type,len) ((int)sizeof(type))
71
69
  #endif
72
70
 
73
- #if SIZEOF_LONG == 8
74
- # define INT64toNUM(x) LONG2NUM(x)
75
- # define UINT64toNUM(x) ULONG2NUM(x)
76
- #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
77
- # define INT64toNUM(x) LL2NUM(x)
78
- # define UINT64toNUM(x) ULL2NUM(x)
79
- #endif
80
-
81
- #define define_swapx(x, xtype) \
82
- static xtype \
83
- TOKEN_PASTE(swap,x)(xtype z) \
84
- { \
85
- xtype r; \
86
- xtype *zp; \
87
- unsigned char *s, *t; \
88
- int i; \
89
- \
90
- zp = xmalloc(sizeof(xtype)); \
91
- *zp = z; \
92
- s = (unsigned char*)zp; \
93
- t = xmalloc(sizeof(xtype)); \
94
- for (i=0; i<sizeof(xtype); i++) { \
95
- t[sizeof(xtype)-i-1] = s[i]; \
96
- } \
97
- r = *(xtype *)t; \
98
- xfree(t); \
99
- xfree(zp); \
100
- return r; \
101
- }
102
-
103
- #if GCC_VERSION_SINCE(4,3,0)
104
- # define swap32(x) __builtin_bswap32(x)
105
- # define swap64(x) __builtin_bswap64(x)
106
- #endif
107
-
108
- #ifndef swap16
109
- # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
110
- #endif
111
-
112
- #ifndef swap32
113
- # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
114
- |(((x)>>24)&0xFF) \
115
- |(((x)&0x0000FF00)<<8) \
116
- |(((x)&0x00FF0000)>>8) ))
117
- #endif
118
-
119
- #ifndef swap64
120
- # ifdef HAVE_INT64_T
121
- # define byte_in_64bit(n) ((uint64_t)0xff << (n))
122
- # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
123
- |(((x)>>56)&0xFF) \
124
- |(((x)&byte_in_64bit(8))<<40) \
125
- |(((x)&byte_in_64bit(48))>>40) \
126
- |(((x)&byte_in_64bit(16))<<24) \
127
- |(((x)&byte_in_64bit(40))>>24) \
128
- |(((x)&byte_in_64bit(24))<<8) \
129
- |(((x)&byte_in_64bit(32))>>8)))
130
- # endif
131
- #endif
132
-
133
- #if SIZEOF_SHORT == 2
134
- # define swaps(x) swap16(x)
135
- #elif SIZEOF_SHORT == 4
136
- # define swaps(x) swap32(x)
137
- #else
138
- define_swapx(s,short)
139
- #endif
140
-
141
- #if SIZEOF_INT == 2
142
- # define swapi(x) swap16(x)
143
- #elif SIZEOF_INT == 4
144
- # define swapi(x) swap32(x)
145
- #else
146
- define_swapx(i,int)
147
- #endif
148
-
149
- #if SIZEOF_LONG == 4
150
- # define swapl(x) swap32(x)
151
- #elif SIZEOF_LONG == 8
152
- # define swapl(x) swap64(x)
153
- #else
154
- define_swapx(l,long)
155
- #endif
156
-
157
- #ifdef HAVE_LONG_LONG
158
- # if SIZEOF_LONG_LONG == 8
159
- # define swapll(x) swap64(x)
160
- # else
161
- define_swapx(ll,LONG_LONG)
162
- # endif
163
- #endif
164
-
165
- #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
166
- # define swapf(x) swap32(x)
167
- # define FLOAT_SWAPPER uint32_t
168
- #else
169
- define_swapx(f,float)
170
- #endif
171
-
172
- #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
173
- # define swapd(x) swap64(x)
174
- # define DOUBLE_SWAPPER uint64_t
175
- #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
176
- static double
177
- swapd(const double d)
178
- {
179
- double dtmp = d;
180
- uint32_t utmp[2];
181
- uint32_t utmp0;
182
-
183
- utmp[0] = 0; utmp[1] = 0;
184
- memcpy(utmp,&dtmp,sizeof(double));
185
- utmp0 = utmp[0];
186
- utmp[0] = swap32(utmp[1]);
187
- utmp[1] = swap32(utmp0);
188
- memcpy(&dtmp,utmp,sizeof(double));
189
- return dtmp;
190
- }
191
- #else
192
- define_swapx(d, double)
193
- #endif
194
-
195
- #undef define_swapx
71
+ typedef union {
72
+ float f;
73
+ uint32_t u;
74
+ char buf[4];
75
+ } FLOAT_SWAPPER;
76
+ typedef union {
77
+ double d;
78
+ uint64_t u;
79
+ char buf[8];
80
+ } DOUBLE_SWAPPER;
81
+ #define swapf(x) swap32(x)
82
+ #define swapd(x) swap64(x)
196
83
 
197
84
  #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
198
85
  #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
@@ -203,74 +90,19 @@ TOKEN_PASTE(swap,x)(xtype z) \
203
90
  #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
204
91
  #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
205
92
 
206
- #ifdef FLOAT_SWAPPER
207
- # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
208
- # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
209
- (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
210
- memcpy(&(x),&(y),sizeof(float)), \
211
- (x))
212
- # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
213
- (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
214
- memcpy(&(x),&(y),sizeof(float)), \
215
- (x))
216
- # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
217
- (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
218
- memcpy(&(x),&(y),sizeof(float)), \
219
- (x))
220
- # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
221
- (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
222
- memcpy(&(x),&(y),sizeof(float)), \
223
- (x))
224
- #else
225
- # define FLOAT_CONVWITH(y)
226
- # define HTONF(x,y) rb_htonf(x)
227
- # define HTOVF(x,y) rb_htovf(x)
228
- # define NTOHF(x,y) rb_ntohf(x)
229
- # define VTOHF(x,y) rb_vtohf(x)
230
- #endif
93
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
94
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
95
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
96
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
97
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
231
98
 
232
- #ifdef DOUBLE_SWAPPER
233
- # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
234
- # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
235
- (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
236
- memcpy(&(x),&(y),sizeof(double)), \
237
- (x))
238
- # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
239
- (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
240
- memcpy(&(x),&(y),sizeof(double)), \
241
- (x))
242
- # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
243
- (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
244
- memcpy(&(x),&(y),sizeof(double)), \
245
- (x))
246
- # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
247
- (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
248
- memcpy(&(x),&(y),sizeof(double)), \
249
- (x))
250
- #else
251
- # define DOUBLE_CONVWITH(y)
252
- # define HTOND(x,y) rb_htond(x)
253
- # define HTOVD(x,y) rb_htovd(x)
254
- # define NTOHD(x,y) rb_ntohd(x)
255
- # define VTOHD(x,y) rb_vtohd(x)
256
- #endif
257
-
258
- static unsigned long
259
- num2i32(VALUE x)
260
- {
261
- x = rb_to_int(x); /* is nil OK? (should not) */
262
-
263
- if (FIXNUM_P(x)) return FIX2LONG(x);
264
- if (RB_TYPE_P(x, T_BIGNUM)) {
265
- return rb_big2ulong_pack(x);
266
- }
267
- rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
268
-
269
- UNREACHABLE;
270
- }
99
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
100
+ #define HTOND(x) ((x).u = rb_htond((x).u))
101
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
102
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
103
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
271
104
 
272
105
  #define MAX_INTEGER_PACK_SIZE 8
273
- /* #define FORCE_BIG_PACK */
274
106
 
275
107
  static const char toofew[] = "too few arguments";
276
108
 
@@ -279,802 +111,19 @@ static void qpencode(VALUE,VALUE,long);
279
111
 
280
112
  static unsigned long utf8_to_uv(const char*,long*);
281
113
 
282
- /*
283
- * call-seq:
284
- * arr.pack ( aTemplateString ) -> aBinaryString
285
- *
286
- * Packs the contents of <i>arr</i> into a binary sequence according to
287
- * the directives in <i>aTemplateString</i> (see the table below)
288
- * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
289
- * which gives the width of the resulting field. The remaining
290
- * directives also may take a count, indicating the number of array
291
- * elements to convert. If the count is an asterisk
292
- * (``<code>*</code>''), all remaining array elements will be
293
- * converted. Any of the directives ``<code>sSiIlL</code>'' may be
294
- * followed by an underscore (``<code>_</code>'') or
295
- * exclamation mark (``<code>!</code>'') to use the underlying
296
- * platform's native size for the specified type; otherwise, they use a
297
- * platform-independent size. Spaces are ignored in the template
298
- * string. See also <code>String#unpack</code>.
299
- *
300
- * a = [ "a", "b", "c" ]
301
- * n = [ 65, 66, 67 ]
302
- * a.pack("A3A3A3") #=> "a b c "
303
- * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
304
- * n.pack("ccc") #=> "ABC"
305
- *
306
- * Directives for +pack+.
307
- *
308
- * Integer | Array |
309
- * Directive | Element | Meaning
310
- * ---------------------------------------------------------------------------
311
- * C | Integer | 8-bit unsigned (unsigned char)
312
- * S | Integer | 16-bit unsigned, native endian (uint16_t)
313
- * L | Integer | 32-bit unsigned, native endian (uint32_t)
314
- * Q | Integer | 64-bit unsigned, native endian (uint64_t)
315
- * | |
316
- * c | Integer | 8-bit signed (signed char)
317
- * s | Integer | 16-bit signed, native endian (int16_t)
318
- * l | Integer | 32-bit signed, native endian (int32_t)
319
- * q | Integer | 64-bit signed, native endian (int64_t)
320
- * | |
321
- * S_, S! | Integer | unsigned short, native endian
322
- * I, I_, I! | Integer | unsigned int, native endian
323
- * L_, L! | Integer | unsigned long, native endian
324
- * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
325
- * | | if the platform has no long long type.)
326
- * | | (Q_ and Q! is available since Ruby 2.1.)
327
- * | |
328
- * s_, s! | Integer | signed short, native endian
329
- * i, i_, i! | Integer | signed int, native endian
330
- * l_, l! | Integer | signed long, native endian
331
- * q_, q! | Integer | signed long long, native endian (ArgumentError
332
- * | | if the platform has no long long type.)
333
- * | | (q_ and q! is available since Ruby 2.1.)
334
- * | |
335
- * S> L> Q> | Integer | same as the directives without ">" except
336
- * s> l> q> | | big endian
337
- * S!> I!> | | (available since Ruby 1.9.3)
338
- * L!> Q!> | | "S>" is same as "n"
339
- * s!> i!> | | "L>" is same as "N"
340
- * l!> q!> | |
341
- * | |
342
- * S< L< Q< | Integer | same as the directives without "<" except
343
- * s< l< q< | | little endian
344
- * S!< I!< | | (available since Ruby 1.9.3)
345
- * L!< Q!< | | "S<" is same as "v"
346
- * s!< i!< | | "L<" is same as "V"
347
- * l!< q!< | |
348
- * | |
349
- * n | Integer | 16-bit unsigned, network (big-endian) byte order
350
- * N | Integer | 32-bit unsigned, network (big-endian) byte order
351
- * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
352
- * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
353
- * | |
354
- * U | Integer | UTF-8 character
355
- * w | Integer | BER-compressed integer
356
- *
357
- * Float | |
358
- * Directive | | Meaning
359
- * ---------------------------------------------------------------------------
360
- * D, d | Float | double-precision, native format
361
- * F, f | Float | single-precision, native format
362
- * E | Float | double-precision, little-endian byte order
363
- * e | Float | single-precision, little-endian byte order
364
- * G | Float | double-precision, network (big-endian) byte order
365
- * g | Float | single-precision, network (big-endian) byte order
366
- *
367
- * String | |
368
- * Directive | | Meaning
369
- * ---------------------------------------------------------------------------
370
- * A | String | arbitrary binary string (space padded, count is width)
371
- * a | String | arbitrary binary string (null padded, count is width)
372
- * Z | String | same as ``a'', except that null is added with *
373
- * B | String | bit string (MSB first)
374
- * b | String | bit string (LSB first)
375
- * H | String | hex string (high nibble first)
376
- * h | String | hex string (low nibble first)
377
- * u | String | UU-encoded string
378
- * M | String | quoted printable, MIME encoding (see RFC2045)
379
- * m | String | base64 encoded string (see RFC 2045, count is width)
380
- * | | (if count is 0, no line feed are added, see RFC 4648)
381
- * P | String | pointer to a structure (fixed-length string)
382
- * p | String | pointer to a null-terminated string
383
- *
384
- * Misc. | |
385
- * Directive | | Meaning
386
- * ---------------------------------------------------------------------------
387
- * @ | --- | moves to absolute position
388
- * X | --- | back up a byte
389
- * x | --- | null byte
390
- */
114
+ static ID id_associated;
391
115
 
392
- __attribute__ ((unused))
393
- static VALUE
394
- pack_pack(VALUE ary, VALUE fmt)
116
+ static void
117
+ str_associate(VALUE str, VALUE add)
395
118
  {
396
- static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
397
- static const char spc10[] = " ";
398
- const char *p, *pend;
399
- VALUE res, from, associates = 0;
400
- char type;
401
- long items, len, idx, plen;
402
- const char *ptr;
403
- int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
404
- #ifdef NATINT_PACK
405
- int natint; /* native integer */
406
- #endif
407
- int integer_size, bigendian_p;
408
-
409
- StringValue(fmt);
410
- p = RSTRING_PTR(fmt);
411
- pend = p + RSTRING_LEN(fmt);
412
- res = rb_str_buf_new(0);
413
-
414
- items = RARRAY_LEN(ary);
415
- idx = 0;
416
-
417
- #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
418
- #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
419
- #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
420
-
421
- while (p < pend) {
422
- int explicit_endian = 0;
423
- if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
424
- rb_raise(rb_eRuntimeError, "format string modified");
425
- }
426
- type = *p++; /* get data type */
427
- #ifdef NATINT_PACK
428
- natint = 0;
429
- #endif
430
-
431
- if (ISSPACE(type)) continue;
432
- if (type == '#') {
433
- while ((p < pend) && (*p != '\n')) {
434
- p++;
435
- }
436
- continue;
437
- }
438
-
439
- {
440
- modifiers:
441
- switch (*p) {
442
- case '_':
443
- case '!':
444
- if (strchr(natstr, type)) {
445
- #ifdef NATINT_PACK
446
- natint = 1;
447
- #endif
448
- p++;
449
- }
450
- else {
451
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
452
- }
453
- goto modifiers;
454
-
455
- case '<':
456
- case '>':
457
- if (!strchr(endstr, type)) {
458
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
459
- }
460
- if (explicit_endian) {
461
- rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
462
- }
463
- explicit_endian = *p++;
464
- goto modifiers;
465
- }
466
- }
467
-
468
- if (*p == '*') { /* set data length */
469
- len = strchr("@Xxu", type) ? 0
470
- : strchr("PMm", type) ? 1
471
- : items;
472
- p++;
473
- }
474
- else if (ISDIGIT(*p)) {
475
- errno = 0;
476
- len = STRTOUL(p, (char**)&p, 10);
477
- if (errno) {
478
- rb_raise(rb_eRangeError, "pack length too big");
479
- }
480
- }
481
- else {
482
- len = 1;
483
- }
484
-
485
- switch (type) {
486
- case 'U':
487
- /* if encoding is US-ASCII, upgrade to UTF-8 */
488
- if (enc_info == 1) enc_info = 2;
489
- break;
490
- case 'm': case 'M': case 'u':
491
- /* keep US-ASCII (do nothing) */
492
- break;
493
- default:
494
- /* fall back to BINARY */
495
- enc_info = 0;
496
- break;
497
- }
498
- switch (type) {
499
- case 'A': case 'a': case 'Z':
500
- case 'B': case 'b':
501
- case 'H': case 'h':
502
- from = NEXTFROM;
503
- if (NIL_P(from)) {
504
- ptr = "";
505
- plen = 0;
506
- }
507
- else {
508
- StringValue(from);
509
- ptr = RSTRING_PTR(from);
510
- plen = RSTRING_LEN(from);
511
- OBJ_INFECT(res, from);
512
- }
513
-
514
- if (p[-1] == '*')
515
- len = plen;
516
-
517
- switch (type) {
518
- case 'a': /* arbitrary binary string (null padded) */
519
- case 'A': /* arbitrary binary string (ASCII space padded) */
520
- case 'Z': /* null terminated string */
521
- if (plen >= len) {
522
- rb_str_buf_cat(res, ptr, len);
523
- if (p[-1] == '*' && type == 'Z')
524
- rb_str_buf_cat(res, nul10, 1);
525
- }
526
- else {
527
- rb_str_buf_cat(res, ptr, plen);
528
- len -= plen;
529
- while (len >= 10) {
530
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
531
- len -= 10;
532
- }
533
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
534
- }
535
- break;
536
-
537
- #define castchar(from) (char)((from) & 0xff)
538
-
539
- case 'b': /* bit string (ascending) */
540
- {
541
- int byte = 0;
542
- long i, j = 0;
543
-
544
- if (len > plen) {
545
- j = (len - plen + 1)/2;
546
- len = plen;
547
- }
548
- for (i=0; i++ < len; ptr++) {
549
- if (*ptr & 1)
550
- byte |= 128;
551
- if (i & 7)
552
- byte >>= 1;
553
- else {
554
- char c = castchar(byte);
555
- rb_str_buf_cat(res, &c, 1);
556
- byte = 0;
557
- }
558
- }
559
- if (len & 7) {
560
- char c;
561
- byte >>= 7 - (len & 7);
562
- c = castchar(byte);
563
- rb_str_buf_cat(res, &c, 1);
564
- }
565
- len = j;
566
- goto grow;
567
- }
568
- break;
569
-
570
- case 'B': /* bit string (descending) */
571
- {
572
- int byte = 0;
573
- long i, j = 0;
574
-
575
- if (len > plen) {
576
- j = (len - plen + 1)/2;
577
- len = plen;
578
- }
579
- for (i=0; i++ < len; ptr++) {
580
- byte |= *ptr & 1;
581
- if (i & 7)
582
- byte <<= 1;
583
- else {
584
- char c = castchar(byte);
585
- rb_str_buf_cat(res, &c, 1);
586
- byte = 0;
587
- }
588
- }
589
- if (len & 7) {
590
- char c;
591
- byte <<= 7 - (len & 7);
592
- c = castchar(byte);
593
- rb_str_buf_cat(res, &c, 1);
594
- }
595
- len = j;
596
- goto grow;
597
- }
598
- break;
599
-
600
- case 'h': /* hex string (low nibble first) */
601
- {
602
- int byte = 0;
603
- long i, j = 0;
604
-
605
- if (len > plen) {
606
- j = (len + 1) / 2 - (plen + 1) / 2;
607
- len = plen;
608
- }
609
- for (i=0; i++ < len; ptr++) {
610
- if (ISALPHA(*ptr))
611
- byte |= (((*ptr & 15) + 9) & 15) << 4;
612
- else
613
- byte |= (*ptr & 15) << 4;
614
- if (i & 1)
615
- byte >>= 4;
616
- else {
617
- char c = castchar(byte);
618
- rb_str_buf_cat(res, &c, 1);
619
- byte = 0;
620
- }
621
- }
622
- if (len & 1) {
623
- char c = castchar(byte);
624
- rb_str_buf_cat(res, &c, 1);
625
- }
626
- len = j;
627
- goto grow;
628
- }
629
- break;
630
-
631
- case 'H': /* hex string (high nibble first) */
632
- {
633
- int byte = 0;
634
- long i, j = 0;
635
-
636
- if (len > plen) {
637
- j = (len + 1) / 2 - (plen + 1) / 2;
638
- len = plen;
639
- }
640
- for (i=0; i++ < len; ptr++) {
641
- if (ISALPHA(*ptr))
642
- byte |= ((*ptr & 15) + 9) & 15;
643
- else
644
- byte |= *ptr & 15;
645
- if (i & 1)
646
- byte <<= 4;
647
- else {
648
- char c = castchar(byte);
649
- rb_str_buf_cat(res, &c, 1);
650
- byte = 0;
651
- }
652
- }
653
- if (len & 1) {
654
- char c = castchar(byte);
655
- rb_str_buf_cat(res, &c, 1);
656
- }
657
- len = j;
658
- goto grow;
659
- }
660
- break;
661
- }
662
- break;
663
-
664
- case 'c': /* signed char */
665
- case 'C': /* unsigned char */
666
- while (len-- > 0) {
667
- char c;
668
-
669
- from = NEXTFROM;
670
- c = (char)num2i32(from);
671
- rb_str_buf_cat(res, &c, sizeof(char));
672
- }
673
- break;
674
-
675
- case 's': /* s for int16_t, s! for signed short */
676
- integer_size = NATINT_LEN(short, 2);
677
- bigendian_p = BIGENDIAN_P();
678
- goto pack_integer;
679
-
680
- case 'S': /* S for uint16_t, S! for unsigned short */
681
- integer_size = NATINT_LEN(short, 2);
682
- bigendian_p = BIGENDIAN_P();
683
- goto pack_integer;
684
-
685
- case 'i': /* i and i! for signed int */
686
- integer_size = (int)sizeof(int);
687
- bigendian_p = BIGENDIAN_P();
688
- goto pack_integer;
689
-
690
- case 'I': /* I and I! for unsigned int */
691
- integer_size = (int)sizeof(int);
692
- bigendian_p = BIGENDIAN_P();
693
- goto pack_integer;
694
-
695
- case 'l': /* l for int32_t, l! for signed long */
696
- integer_size = NATINT_LEN(long, 4);
697
- bigendian_p = BIGENDIAN_P();
698
- goto pack_integer;
699
-
700
- case 'L': /* L for uint32_t, L! for unsigned long */
701
- integer_size = NATINT_LEN(long, 4);
702
- bigendian_p = BIGENDIAN_P();
703
- goto pack_integer;
704
-
705
- case 'q': /* q for int64_t, q! for signed long long */
706
- integer_size = NATINT_LEN_Q;
707
- bigendian_p = BIGENDIAN_P();
708
- goto pack_integer;
709
-
710
- case 'Q': /* Q for uint64_t, Q! for unsigned long long */
711
- integer_size = NATINT_LEN_Q;
712
- bigendian_p = BIGENDIAN_P();
713
- goto pack_integer;
714
-
715
- case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
716
- integer_size = 2;
717
- bigendian_p = 1;
718
- goto pack_integer;
719
-
720
- case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
721
- integer_size = 4;
722
- bigendian_p = 1;
723
- goto pack_integer;
724
-
725
- case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
726
- integer_size = 2;
727
- bigendian_p = 0;
728
- goto pack_integer;
729
-
730
- case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
731
- integer_size = 4;
732
- bigendian_p = 0;
733
- goto pack_integer;
734
-
735
- pack_integer:
736
- if (explicit_endian) {
737
- bigendian_p = explicit_endian == '>';
738
- }
739
-
740
- switch (integer_size) {
741
- #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
742
- case SIZEOF_INT16_T:
743
- while (len-- > 0) {
744
- union {
745
- int16_t i;
746
- char a[sizeof(int16_t)];
747
- } v;
748
-
749
- from = NEXTFROM;
750
- v.i = (int16_t)num2i32(from);
751
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
752
- rb_str_buf_cat(res, v.a, sizeof(int16_t));
753
- }
754
- break;
755
- #endif
756
-
757
- #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
758
- case SIZEOF_INT32_T:
759
- while (len-- > 0) {
760
- union {
761
- int32_t i;
762
- char a[sizeof(int32_t)];
763
- } v;
764
-
765
- from = NEXTFROM;
766
- v.i = (int32_t)num2i32(from);
767
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
768
- rb_str_buf_cat(res, v.a, sizeof(int32_t));
769
- }
770
- break;
771
- #endif
772
-
773
- #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
774
- case SIZEOF_INT64_T:
775
- while (len-- > 0) {
776
- union {
777
- int64_t i;
778
- char a[sizeof(int64_t)];
779
- } v;
780
-
781
- from = NEXTFROM;
782
- v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
783
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
784
- rb_str_buf_cat(res, v.a, sizeof(int64_t));
785
- }
786
- break;
787
- #endif
788
-
789
- default:
790
- if (integer_size > MAX_INTEGER_PACK_SIZE)
791
- rb_bug("unexpected intger size for pack: %d", integer_size);
792
- while (len-- > 0) {
793
- union {
794
- unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
795
- char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
796
- } v;
797
- int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
798
- int i;
799
-
800
- from = NEXTFROM;
801
- rb_big_pack(from, v.i, num_longs);
802
- if (bigendian_p) {
803
- for (i = 0; i < num_longs/2; i++) {
804
- unsigned long t = v.i[i];
805
- v.i[i] = v.i[num_longs-1-i];
806
- v.i[num_longs-1-i] = t;
807
- }
808
- }
809
- if (bigendian_p != BIGENDIAN_P()) {
810
- for (i = 0; i < num_longs; i++)
811
- v.i[i] = swapl(v.i[i]);
812
- }
813
- rb_str_buf_cat(res,
814
- bigendian_p ?
815
- v.a + sizeof(long)*num_longs - integer_size :
816
- v.a,
817
- integer_size);
818
- }
819
- break;
820
- }
821
- break;
822
-
823
- case 'f': /* single precision float in native format */
824
- case 'F': /* ditto */
825
- while (len-- > 0) {
826
- float f;
827
-
828
- from = NEXTFROM;
829
- f = (float)RFLOAT_VALUE(rb_to_float(from));
830
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
831
- }
832
- break;
833
-
834
- case 'e': /* single precision float in VAX byte-order */
835
- while (len-- > 0) {
836
- float f;
837
- FLOAT_CONVWITH(ftmp);
838
-
839
- from = NEXTFROM;
840
- f = (float)RFLOAT_VALUE(rb_to_float(from));
841
- f = HTOVF(f,ftmp);
842
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
843
- }
844
- break;
845
-
846
- case 'E': /* double precision float in VAX byte-order */
847
- while (len-- > 0) {
848
- double d;
849
- DOUBLE_CONVWITH(dtmp);
850
-
851
- from = NEXTFROM;
852
- d = RFLOAT_VALUE(rb_to_float(from));
853
- d = HTOVD(d,dtmp);
854
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
855
- }
856
- break;
857
-
858
- case 'd': /* double precision float in native format */
859
- case 'D': /* ditto */
860
- while (len-- > 0) {
861
- double d;
862
-
863
- from = NEXTFROM;
864
- d = RFLOAT_VALUE(rb_to_float(from));
865
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
866
- }
867
- break;
868
-
869
- case 'g': /* single precision float in network byte-order */
870
- while (len-- > 0) {
871
- float f;
872
- FLOAT_CONVWITH(ftmp);
873
-
874
- from = NEXTFROM;
875
- f = (float)RFLOAT_VALUE(rb_to_float(from));
876
- f = HTONF(f,ftmp);
877
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
878
- }
879
- break;
880
-
881
- case 'G': /* double precision float in network byte-order */
882
- while (len-- > 0) {
883
- double d;
884
- DOUBLE_CONVWITH(dtmp);
885
-
886
- from = NEXTFROM;
887
- d = RFLOAT_VALUE(rb_to_float(from));
888
- d = HTOND(d,dtmp);
889
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
890
- }
891
- break;
892
-
893
- case 'x': /* null byte */
894
- grow:
895
- while (len >= 10) {
896
- rb_str_buf_cat(res, nul10, 10);
897
- len -= 10;
898
- }
899
- rb_str_buf_cat(res, nul10, len);
900
- break;
901
-
902
- case 'X': /* back up byte */
903
- shrink:
904
- plen = RSTRING_LEN(res);
905
- if (plen < len)
906
- rb_raise(rb_eArgError, "X outside of string");
907
- rb_str_set_len(res, plen - len);
908
- break;
909
-
910
- case '@': /* null fill to absolute position */
911
- len -= RSTRING_LEN(res);
912
- if (len > 0) goto grow;
913
- len = -len;
914
- if (len > 0) goto shrink;
915
- break;
916
-
917
- case '%':
918
- rb_raise(rb_eArgError, "%% is not supported");
919
- break;
920
-
921
- case 'U': /* Unicode character */
922
- while (len-- > 0) {
923
- SIGNED_VALUE l;
924
- char buf[8];
925
- int le;
926
-
927
- from = NEXTFROM;
928
- from = rb_to_int(from);
929
- l = NUM2LONG(from);
930
- if (l < 0) {
931
- rb_raise(rb_eRangeError, "pack(U): value out of range");
932
- }
933
- le = rb_uv_to_utf8(buf, l);
934
- rb_str_buf_cat(res, (char*)buf, le);
935
- }
936
- break;
937
-
938
- case 'u': /* uuencoded string */
939
- case 'm': /* base64 encoded string */
940
- from = NEXTFROM;
941
- StringValue(from);
942
- ptr = RSTRING_PTR(from);
943
- plen = RSTRING_LEN(from);
944
-
945
- if (len == 0 && type == 'm') {
946
- encodes(res, ptr, plen, type, 0);
947
- ptr += plen;
948
- break;
949
- }
950
- if (len <= 2)
951
- len = 45;
952
- else if (len > 63 && type == 'u')
953
- len = 63;
954
- else
955
- len = len / 3 * 3;
956
- while (plen > 0) {
957
- long todo;
958
-
959
- if (plen > len)
960
- todo = len;
961
- else
962
- todo = plen;
963
- encodes(res, ptr, todo, type, 1);
964
- plen -= todo;
965
- ptr += todo;
966
- }
967
- break;
968
-
969
- case 'M': /* quoted-printable encoded string */
970
- from = rb_obj_as_string(NEXTFROM);
971
- if (len <= 1)
972
- len = 72;
973
- qpencode(res, from, len);
974
- break;
975
-
976
- case 'P': /* pointer to packed byte string */
977
- from = THISFROM;
978
- if (!NIL_P(from)) {
979
- StringValue(from);
980
- if (RSTRING_LEN(from) < len) {
981
- rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
982
- RSTRING_LEN(from), len);
983
- }
984
- }
985
- len = 1;
986
- /* FALL THROUGH */
987
- case 'p': /* pointer to string */
988
- while (len-- > 0) {
989
- char *t;
990
- from = NEXTFROM;
991
- if (NIL_P(from)) {
992
- t = 0;
993
- }
994
- else {
995
- t = StringValuePtr(from);
996
- }
997
- if (!associates) {
998
- associates = rb_ary_new();
999
- }
1000
- rb_ary_push(associates, from);
1001
- rb_obj_taint(from);
1002
- rb_str_buf_cat(res, (char*)&t, sizeof(char*));
1003
- }
1004
- break;
1005
-
1006
- case 'w': /* BER compressed integer */
1007
- while (len-- > 0) {
1008
- unsigned long ul;
1009
- VALUE buf = rb_str_new(0, 0);
1010
- char c, *bufs, *bufe;
1011
-
1012
- from = NEXTFROM;
1013
- if (RB_TYPE_P(from, T_BIGNUM)) {
1014
- VALUE big128 = rb_uint2big(128);
1015
- while (RB_TYPE_P(from, T_BIGNUM)) {
1016
- from = rb_big_divmod(from, big128);
1017
- c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
1018
- rb_str_buf_cat(buf, &c, sizeof(char));
1019
- from = RARRAY_PTR(from)[0]; /* div */
1020
- }
1021
- }
1022
-
1023
- {
1024
- long l = NUM2LONG(from);
1025
- if (l < 0) {
1026
- rb_raise(rb_eArgError, "can't compress negative numbers");
1027
- }
1028
- ul = l;
1029
- }
1030
-
1031
- while (ul) {
1032
- c = castchar((ul & 0x7f) | 0x80);
1033
- rb_str_buf_cat(buf, &c, sizeof(char));
1034
- ul >>= 7;
1035
- }
1036
-
1037
- if (RSTRING_LEN(buf)) {
1038
- bufs = RSTRING_PTR(buf);
1039
- bufe = bufs + RSTRING_LEN(buf) - 1;
1040
- *bufs &= 0x7f; /* clear continue bit */
1041
- while (bufs < bufe) { /* reverse */
1042
- c = *bufs;
1043
- *bufs++ = *bufe;
1044
- *bufe-- = c;
1045
- }
1046
- rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1047
- }
1048
- else {
1049
- c = 0;
1050
- rb_str_buf_cat(res, &c, sizeof(char));
1051
- }
1052
- }
1053
- break;
1054
-
1055
- default:
1056
- rb_warning("unknown pack directive '%c' in '%s'",
1057
- type, RSTRING_PTR(fmt));
1058
- break;
1059
- }
1060
- }
119
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
120
+ rb_ivar_set(str, id_associated, add);
121
+ }
1061
122
 
1062
- if (associates) {
1063
- rb_str_associate(res, associates);
1064
- }
1065
- OBJ_INFECT(res, fmt);
1066
- switch (enc_info) {
1067
- case 1:
1068
- ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
1069
- break;
1070
- case 2:
1071
- rb_enc_set_index(res, rb_utf8_encindex());
1072
- break;
1073
- default:
1074
- /* do nothing, keep ASCII-8BIT */
1075
- break;
1076
- }
1077
- return res;
123
+ static VALUE
124
+ str_associated(VALUE str)
125
+ {
126
+ return rb_ivar_lookup(str, id_associated, Qfalse);
1078
127
  }
1079
128
 
1080
129
  static const char uu_table[] =
@@ -1083,12 +132,14 @@ static const char b64_table[] =
1083
132
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1084
133
 
1085
134
  static void
1086
- encodes(VALUE str, const char *s, long len, int type, int tail_lf)
135
+ encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
1087
136
  {
1088
- char buff[4096];
137
+ enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
138
+ char buff[buff_size + 1]; /* +1 for tail_lf */
1089
139
  long i = 0;
1090
- const char *trans = type == 'u' ? uu_table : b64_table;
140
+ const char *const trans = type == 'u' ? uu_table : b64_table;
1091
141
  char padding;
142
+ const unsigned char *s = (const unsigned char *)s0;
1092
143
 
1093
144
  if (type == 'u') {
1094
145
  buff[i++] = (char)len + ' ';
@@ -1097,16 +148,16 @@ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1097
148
  else {
1098
149
  padding = '=';
1099
150
  }
1100
- while (len >= 3) {
1101
- while (len >= 3 && sizeof(buff)-i >= 4) {
151
+ while (len >= input_unit) {
152
+ while (len >= input_unit && buff_size-i >= encoded_unit) {
1102
153
  buff[i++] = trans[077 & (*s >> 2)];
1103
154
  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1104
155
  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1105
156
  buff[i++] = trans[077 & s[2]];
1106
- s += 3;
1107
- len -= 3;
157
+ s += input_unit;
158
+ len -= input_unit;
1108
159
  }
1109
- if (sizeof(buff)-i < 4) {
160
+ if (buff_size-i < encoded_unit) {
1110
161
  rb_str_buf_cat(str, buff, i);
1111
162
  i = 0;
1112
163
  }
@@ -1126,6 +177,7 @@ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1126
177
  }
1127
178
  if (tail_lf) buff[i++] = '\n';
1128
179
  rb_str_buf_cat(str, buff, i);
180
+ if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
1129
181
  }
1130
182
 
1131
183
  static const char hex_table[] = "0123456789ABCDEF";
@@ -1186,19 +238,11 @@ qpencode(VALUE str, VALUE from, long len)
1186
238
  static inline int
1187
239
  hex2num(char c)
1188
240
  {
1189
- switch (c) {
1190
- case '0': case '1': case '2': case '3': case '4':
1191
- case '5': case '6': case '7': case '8': case '9':
1192
- return c - '0';
1193
- case 'a': case 'b': case 'c':
1194
- case 'd': case 'e': case 'f':
1195
- return c - 'a' + 10;
1196
- case 'A': case 'B': case 'C':
1197
- case 'D': case 'E': case 'F':
1198
- return c - 'A' + 10;
1199
- default:
1200
- return -1;
1201
- }
241
+ int n;
242
+ n = ruby_digit36_to_number_table[(unsigned char)c];
243
+ if (16 <= n)
244
+ n = -1;
245
+ return n;
1202
246
  }
1203
247
 
1204
248
  #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
@@ -1212,10 +256,19 @@ hex2num(char c)
1212
256
  } while (0)
1213
257
 
1214
258
  #define PACK_ITEM_ADJUST() do { \
1215
- if (tmp_len > 0 && !block_p) \
259
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
1216
260
  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1217
261
  } while (0)
1218
262
 
263
+ /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
264
+ * with "-xO4" optimization option.
265
+ */
266
+ #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
267
+ # define AVOID_CC_BUG volatile
268
+ #else
269
+ # define AVOID_CC_BUG
270
+ #endif
271
+
1219
272
  static VALUE
1220
273
  infected_str_new(const char *ptr, long len, VALUE str)
1221
274
  {
@@ -1225,137 +278,31 @@ infected_str_new(const char *ptr, long len, VALUE str)
1225
278
  return s;
1226
279
  }
1227
280
 
1228
- /*
1229
- * call-seq:
1230
- * str.unpack(format) -> anArray
1231
- *
1232
- * Decodes <i>str</i> (which may contain binary data) according to the
1233
- * format string, returning an array of each value extracted. The
1234
- * format string consists of a sequence of single-character directives,
1235
- * summarized in the table at the end of this entry.
1236
- * Each directive may be followed
1237
- * by a number, indicating the number of times to repeat with this
1238
- * directive. An asterisk (``<code>*</code>'') will use up all
1239
- * remaining elements. The directives <code>sSiIlL</code> may each be
1240
- * followed by an underscore (``<code>_</code>'') or
1241
- * exclamation mark (``<code>!</code>'') to use the underlying
1242
- * platform's native size for the specified type; otherwise, it uses a
1243
- * platform-independent consistent size. Spaces are ignored in the
1244
- * format string. See also <code>Array#pack</code>.
1245
- *
1246
- * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1247
- * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1248
- * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1249
- * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1250
- * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1251
- * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1252
- * "now=20is".unpack('M*') #=> ["now is"]
1253
- * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1254
- *
1255
- * This table summarizes the various formats and the Ruby classes
1256
- * returned by each.
1257
- *
1258
- * Integer | |
1259
- * Directive | Returns | Meaning
1260
- * -----------------------------------------------------------------
1261
- * C | Integer | 8-bit unsigned (unsigned char)
1262
- * S | Integer | 16-bit unsigned, native endian (uint16_t)
1263
- * L | Integer | 32-bit unsigned, native endian (uint32_t)
1264
- * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1265
- * | |
1266
- * c | Integer | 8-bit signed (signed char)
1267
- * s | Integer | 16-bit signed, native endian (int16_t)
1268
- * l | Integer | 32-bit signed, native endian (int32_t)
1269
- * q | Integer | 64-bit signed, native endian (int64_t)
1270
- * | |
1271
- * S_, S! | Integer | unsigned short, native endian
1272
- * I, I_, I! | Integer | unsigned int, native endian
1273
- * L_, L! | Integer | unsigned long, native endian
1274
- * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
1275
- * | | if the platform has no long long type.)
1276
- * | | (Q_ and Q! is available since Ruby 2.1.)
1277
- * | |
1278
- * s_, s! | Integer | signed short, native endian
1279
- * i, i_, i! | Integer | signed int, native endian
1280
- * l_, l! | Integer | signed long, native endian
1281
- * q_, q! | Integer | signed long long, native endian (ArgumentError
1282
- * | | if the platform has no long long type.)
1283
- * | | (q_ and q! is available since Ruby 2.1.)
1284
- * | |
1285
- * S> L> Q> | Integer | same as the directives without ">" except
1286
- * s> l> q> | | big endian
1287
- * S!> I!> | | (available since Ruby 1.9.3)
1288
- * L!> Q!> | | "S>" is same as "n"
1289
- * s!> i!> | | "L>" is same as "N"
1290
- * l!> q!> | |
1291
- * | |
1292
- * S< L< Q< | Integer | same as the directives without "<" except
1293
- * s< l< q< | | little endian
1294
- * S!< I!< | | (available since Ruby 1.9.3)
1295
- * L!< Q!< | | "S<" is same as "v"
1296
- * s!< i!< | | "L<" is same as "V"
1297
- * l!< q!< | |
1298
- * | |
1299
- * n | Integer | 16-bit unsigned, network (big-endian) byte order
1300
- * N | Integer | 32-bit unsigned, network (big-endian) byte order
1301
- * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1302
- * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1303
- * | |
1304
- * U | Integer | UTF-8 character
1305
- * w | Integer | BER-compressed integer (see Array.pack)
1306
- *
1307
- * Float | |
1308
- * Directive | Returns | Meaning
1309
- * -----------------------------------------------------------------
1310
- * D, d | Float | double-precision, native format
1311
- * F, f | Float | single-precision, native format
1312
- * E | Float | double-precision, little-endian byte order
1313
- * e | Float | single-precision, little-endian byte order
1314
- * G | Float | double-precision, network (big-endian) byte order
1315
- * g | Float | single-precision, network (big-endian) byte order
1316
- *
1317
- * String | |
1318
- * Directive | Returns | Meaning
1319
- * -----------------------------------------------------------------
1320
- * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1321
- * a | String | arbitrary binary string
1322
- * Z | String | null-terminated string
1323
- * B | String | bit string (MSB first)
1324
- * b | String | bit string (LSB first)
1325
- * H | String | hex string (high nibble first)
1326
- * h | String | hex string (low nibble first)
1327
- * u | String | UU-encoded string
1328
- * M | String | quoted-printable, MIME encoding (see RFC2045)
1329
- * m | String | base64 encoded string (RFC 2045) (default)
1330
- * | | base64 encoded string (RFC 4648) if followed by 0
1331
- * P | String | pointer to a structure (fixed-length string)
1332
- * p | String | pointer to a null-terminated string
1333
- *
1334
- * Misc. | |
1335
- * Directive | Returns | Meaning
1336
- * -----------------------------------------------------------------
1337
- * @ | --- | skip to the offset given by the length argument
1338
- * X | --- | skip backward one byte
1339
- * x | --- | skip forward one byte
1340
- */
281
+ /* unpack mode */
282
+ #define UNPACK_ARRAY 0
283
+ #define UNPACK_BLOCK 1
284
+ #define UNPACK_1 2
285
+
286
+ #define castchar(from) (char)((from) & 0xff)
1341
287
 
1342
288
  VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1343
289
  {
1344
- static const char hexdigits[] = "0123456789abcdef";
290
+ #define hexdigits ruby_hexdigits
1345
291
  char *init_s, *s, *send;
1346
292
  char *p, *pend;
1347
293
  VALUE ary;
1348
294
  char type;
1349
- long len, tmp_len;
295
+ long len;
296
+ AVOID_CC_BUG long tmp_len;
1350
297
  int star;
1351
298
  #ifdef NATINT_PACK
1352
299
  int natint; /* native integer */
1353
300
  #endif
1354
- int block_p = rb_block_given_p();
1355
301
  int signed_p, integer_size, bigendian_p;
302
+ int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
1356
303
  #define UNPACK_PUSH(item) do {\
1357
304
  VALUE item_val = (item);\
1358
- if (block_p) {\
305
+ if ((mode) == UNPACK_BLOCK) {\
1359
306
  rb_yield(item_val);\
1360
307
  }\
1361
308
  else {\
@@ -1363,14 +310,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1363
310
  }\
1364
311
  } while (0)
1365
312
 
1366
- // StringValue(str);
313
+ StringValue(str);
1367
314
  StringValue(fmt);
1368
315
  init_s = s = RSTRING_PTR(str);
1369
316
  send = s + RSTRING_LEN(str);
1370
317
  p = RSTRING_PTR(fmt);
1371
318
  pend = p + RSTRING_LEN(fmt);
1372
319
 
1373
- ary = block_p ? Qnil : rb_ary_new();
320
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1374
321
  while (p < pend) {
1375
322
  int explicit_endian = 0;
1376
323
  type = *p++;
@@ -1483,13 +430,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1483
430
  if (p[-1] == '*' || len > (send - s) * 8)
1484
431
  len = (send - s) * 8;
1485
432
  bits = 0;
1486
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
433
+ bitstr = rb_usascii_str_new(0, len);
1487
434
  t = RSTRING_PTR(bitstr);
1488
435
  for (i=0; i<len; i++) {
1489
436
  if (i & 7) bits >>= 1;
1490
- else bits = *s++;
437
+ else bits = (unsigned char)*s++;
1491
438
  *t++ = (bits & 1) ? '1' : '0';
1492
439
  }
440
+ UNPACK_PUSH(bitstr);
1493
441
  }
1494
442
  break;
1495
443
 
@@ -1503,13 +451,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1503
451
  if (p[-1] == '*' || len > (send - s) * 8)
1504
452
  len = (send - s) * 8;
1505
453
  bits = 0;
1506
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
454
+ bitstr = rb_usascii_str_new(0, len);
1507
455
  t = RSTRING_PTR(bitstr);
1508
456
  for (i=0; i<len; i++) {
1509
457
  if (i & 7) bits <<= 1;
1510
- else bits = *s++;
458
+ else bits = (unsigned char)*s++;
1511
459
  *t++ = (bits & 128) ? '1' : '0';
1512
460
  }
461
+ UNPACK_PUSH(bitstr);
1513
462
  }
1514
463
  break;
1515
464
 
@@ -1523,15 +472,16 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1523
472
  if (p[-1] == '*' || len > (send - s) * 2)
1524
473
  len = (send - s) * 2;
1525
474
  bits = 0;
1526
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
475
+ bitstr = rb_usascii_str_new(0, len);
1527
476
  t = RSTRING_PTR(bitstr);
1528
477
  for (i=0; i<len; i++) {
1529
478
  if (i & 1)
1530
479
  bits >>= 4;
1531
480
  else
1532
- bits = *s++;
481
+ bits = (unsigned char)*s++;
1533
482
  *t++ = hexdigits[bits & 15];
1534
483
  }
484
+ UNPACK_PUSH(bitstr);
1535
485
  }
1536
486
  break;
1537
487
 
@@ -1545,36 +495,30 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1545
495
  if (p[-1] == '*' || len > (send - s) * 2)
1546
496
  len = (send - s) * 2;
1547
497
  bits = 0;
1548
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
498
+ bitstr = rb_usascii_str_new(0, len);
1549
499
  t = RSTRING_PTR(bitstr);
1550
500
  for (i=0; i<len; i++) {
1551
501
  if (i & 1)
1552
502
  bits <<= 4;
1553
503
  else
1554
- bits = *s++;
504
+ bits = (unsigned char)*s++;
1555
505
  *t++ = hexdigits[(bits >> 4) & 15];
1556
506
  }
507
+ UNPACK_PUSH(bitstr);
1557
508
  }
1558
509
  break;
1559
510
 
1560
511
  case 'c':
1561
- PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1562
- while (len-- > 0) {
1563
- int c = *s++;
1564
- if (c > (char)127) c-=256;
1565
- UNPACK_PUSH(INT2FIX(c));
1566
- }
1567
- PACK_ITEM_ADJUST();
1568
- break;
512
+ signed_p = 1;
513
+ integer_size = 1;
514
+ bigendian_p = BIGENDIAN_P(); /* not effective */
515
+ goto unpack_integer;
1569
516
 
1570
517
  case 'C':
1571
- PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1572
- while (len-- > 0) {
1573
- unsigned char c = *s++;
1574
- UNPACK_PUSH(INT2FIX(c));
1575
- }
1576
- PACK_ITEM_ADJUST();
1577
- break;
518
+ signed_p = 0;
519
+ integer_size = 1;
520
+ bigendian_p = BIGENDIAN_P(); /* not effective */
521
+ goto unpack_integer;
1578
522
 
1579
523
  case 's':
1580
524
  signed_p = 1;
@@ -1624,6 +568,18 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1624
568
  bigendian_p = BIGENDIAN_P();
1625
569
  goto unpack_integer;
1626
570
 
571
+ case 'j':
572
+ signed_p = 1;
573
+ integer_size = sizeof(intptr_t);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto unpack_integer;
576
+
577
+ case 'J':
578
+ signed_p = 0;
579
+ integer_size = sizeof(uintptr_t);
580
+ bigendian_p = BIGENDIAN_P();
581
+ goto unpack_integer;
582
+
1627
583
  case 'n':
1628
584
  signed_p = 0;
1629
585
  integer_size = 2;
@@ -1652,144 +608,17 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1652
608
  if (explicit_endian) {
1653
609
  bigendian_p = explicit_endian == '>';
1654
610
  }
1655
-
1656
- switch (integer_size) {
1657
- #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1658
- case SIZEOF_INT16_T:
1659
- if (signed_p) {
1660
- PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1661
- while (len-- > 0) {
1662
- union {
1663
- int16_t i;
1664
- char a[sizeof(int16_t)];
1665
- } v;
1666
- memcpy(v.a, s, sizeof(int16_t));
1667
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1668
- s += sizeof(int16_t);
1669
- UNPACK_PUSH(INT2FIX(v.i));
1670
- }
1671
- PACK_ITEM_ADJUST();
1672
- }
1673
- else {
1674
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1675
- while (len-- > 0) {
1676
- union {
1677
- uint16_t i;
1678
- char a[sizeof(uint16_t)];
1679
- } v;
1680
- memcpy(v.a, s, sizeof(uint16_t));
1681
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1682
- s += sizeof(uint16_t);
1683
- UNPACK_PUSH(INT2FIX(v.i));
1684
- }
1685
- PACK_ITEM_ADJUST();
1686
- }
1687
- break;
1688
- #endif
1689
-
1690
- #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1691
- case SIZEOF_INT32_T:
1692
- if (signed_p) {
1693
- PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1694
- while (len-- > 0) {
1695
- union {
1696
- int32_t i;
1697
- char a[sizeof(int32_t)];
1698
- } v;
1699
- memcpy(v.a, s, sizeof(int32_t));
1700
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1701
- s += sizeof(int32_t);
1702
- UNPACK_PUSH(INT2NUM(v.i));
1703
- }
1704
- PACK_ITEM_ADJUST();
1705
- }
1706
- else {
1707
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
1708
- while (len-- > 0) {
1709
- union {
1710
- uint32_t i;
1711
- char a[sizeof(uint32_t)];
1712
- } v;
1713
- memcpy(v.a, s, sizeof(uint32_t));
1714
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1715
- s += sizeof(uint32_t);
1716
- UNPACK_PUSH(UINT2NUM(v.i));
1717
- }
1718
- PACK_ITEM_ADJUST();
1719
- }
1720
- break;
1721
- #endif
1722
-
1723
- #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1724
- case SIZEOF_INT64_T:
1725
- if (signed_p) {
1726
- PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1727
- while (len-- > 0) {
1728
- union {
1729
- int64_t i;
1730
- char a[sizeof(int64_t)];
1731
- } v;
1732
- memcpy(v.a, s, sizeof(int64_t));
1733
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1734
- s += sizeof(int64_t);
1735
- UNPACK_PUSH(INT64toNUM(v.i));
1736
- }
1737
- PACK_ITEM_ADJUST();
1738
- }
1739
- else {
1740
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
1741
- while (len-- > 0) {
1742
- union {
1743
- uint64_t i;
1744
- char a[sizeof(uint64_t)];
1745
- } v;
1746
- memcpy(v.a, s, sizeof(uint64_t));
1747
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1748
- s += sizeof(uint64_t);
1749
- UNPACK_PUSH(UINT64toNUM(v.i));
1750
- }
1751
- PACK_ITEM_ADJUST();
1752
- }
1753
- break;
1754
- #endif
1755
-
1756
- default:
1757
- if (integer_size > MAX_INTEGER_PACK_SIZE)
1758
- rb_bug("unexpected integer size for pack: %d", integer_size);
1759
- PACK_LENGTH_ADJUST_SIZE(integer_size);
1760
- while (len-- > 0) {
1761
- union {
1762
- unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1763
- char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1764
- } v;
1765
- int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1766
- int i;
1767
-
1768
- if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1769
- memset(v.a, 0xff, sizeof(long)*num_longs);
1770
- else
1771
- memset(v.a, 0, sizeof(long)*num_longs);
1772
- if (bigendian_p)
1773
- memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1774
- else
1775
- memcpy(v.a, s, integer_size);
1776
- if (bigendian_p) {
1777
- for (i = 0; i < num_longs/2; i++) {
1778
- unsigned long t = v.i[i];
1779
- v.i[i] = v.i[num_longs-1-i];
1780
- v.i[num_longs-1-i] = t;
1781
- }
1782
- }
1783
- if (bigendian_p != BIGENDIAN_P()) {
1784
- for (i = 0; i < num_longs; i++)
1785
- v.i[i] = swapl(v.i[i]);
1786
- }
1787
- s += integer_size;
1788
- UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1789
- }
1790
- PACK_ITEM_ADJUST();
1791
- break;
1792
- }
611
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
612
+ while (len-- > 0) {
613
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
614
+ VALUE val;
615
+ if (signed_p)
616
+ flags |= INTEGER_PACK_2COMP;
617
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
618
+ UNPACK_PUSH(val);
619
+ s += integer_size;
620
+ }
621
+ PACK_ITEM_ADJUST();
1793
622
  break;
1794
623
 
1795
624
  case 'f':
@@ -1807,13 +636,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1807
636
  case 'e':
1808
637
  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1809
638
  while (len-- > 0) {
1810
- float tmp;
1811
- FLOAT_CONVWITH(ftmp);
1812
-
1813
- memcpy(&tmp, s, sizeof(float));
639
+ FLOAT_CONVWITH(tmp);
640
+ memcpy(tmp.buf, s, sizeof(float));
1814
641
  s += sizeof(float);
1815
- tmp = VTOHF(tmp,ftmp);
1816
- UNPACK_PUSH(DBL2NUM((double)tmp));
642
+ VTOHF(tmp);
643
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1817
644
  }
1818
645
  PACK_ITEM_ADJUST();
1819
646
  break;
@@ -1821,13 +648,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1821
648
  case 'E':
1822
649
  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1823
650
  while (len-- > 0) {
1824
- double tmp;
1825
- DOUBLE_CONVWITH(dtmp);
1826
-
1827
- memcpy(&tmp, s, sizeof(double));
651
+ DOUBLE_CONVWITH(tmp);
652
+ memcpy(tmp.buf, s, sizeof(double));
1828
653
  s += sizeof(double);
1829
- tmp = VTOHD(tmp,dtmp);
1830
- UNPACK_PUSH(DBL2NUM(tmp));
654
+ VTOHD(tmp);
655
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1831
656
  }
1832
657
  PACK_ITEM_ADJUST();
1833
658
  break;
@@ -1847,13 +672,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1847
672
  case 'g':
1848
673
  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1849
674
  while (len-- > 0) {
1850
- float tmp;
1851
- FLOAT_CONVWITH(ftmp);
1852
-
1853
- memcpy(&tmp, s, sizeof(float));
675
+ FLOAT_CONVWITH(tmp);
676
+ memcpy(tmp.buf, s, sizeof(float));
1854
677
  s += sizeof(float);
1855
- tmp = NTOHF(tmp,ftmp);
1856
- UNPACK_PUSH(DBL2NUM((double)tmp));
678
+ NTOHF(tmp);
679
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1857
680
  }
1858
681
  PACK_ITEM_ADJUST();
1859
682
  break;
@@ -1861,13 +684,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1861
684
  case 'G':
1862
685
  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1863
686
  while (len-- > 0) {
1864
- double tmp;
1865
- DOUBLE_CONVWITH(dtmp);
1866
-
1867
- memcpy(&tmp, s, sizeof(double));
687
+ DOUBLE_CONVWITH(tmp);
688
+ memcpy(tmp.buf, s, sizeof(double));
1868
689
  s += sizeof(double);
1869
- tmp = NTOHD(tmp,dtmp);
1870
- UNPACK_PUSH(DBL2NUM(tmp));
690
+ NTOHD(tmp);
691
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1871
692
  }
1872
693
  PACK_ITEM_ADJUST();
1873
694
  break;
@@ -1890,12 +711,12 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1890
711
  char *ptr = RSTRING_PTR(buf);
1891
712
  long total = 0;
1892
713
 
1893
- while (s < send && *s > ' ' && *s < 'a') {
714
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1894
715
  long a,b,c,d;
1895
- char hunk[4];
716
+ char hunk[3];
717
+
718
+ len = ((unsigned char)*s++ - ' ') & 077;
1896
719
 
1897
- hunk[3] = '\0';
1898
- len = (*s++ - ' ') & 077;
1899
720
  total += len;
1900
721
  if (total > RSTRING_LEN(buf)) {
1901
722
  len -= total - RSTRING_LEN(buf);
@@ -1905,20 +726,20 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1905
726
  while (len > 0) {
1906
727
  long mlen = len > 3 ? 3 : len;
1907
728
 
1908
- if (s < send && *s >= ' ')
1909
- a = (*s++ - ' ') & 077;
729
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
730
+ a = ((unsigned char)*s++ - ' ') & 077;
1910
731
  else
1911
732
  a = 0;
1912
- if (s < send && *s >= ' ')
1913
- b = (*s++ - ' ') & 077;
733
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
734
+ b = ((unsigned char)*s++ - ' ') & 077;
1914
735
  else
1915
736
  b = 0;
1916
- if (s < send && *s >= ' ')
1917
- c = (*s++ - ' ') & 077;
737
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
738
+ c = ((unsigned char)*s++ - ' ') & 077;
1918
739
  else
1919
740
  c = 0;
1920
- if (s < send && *s >= ' ')
1921
- d = (*s++ - ' ') & 077;
741
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
742
+ d = ((unsigned char)*s++ - ' ') & 077;
1922
743
  else
1923
744
  d = 0;
1924
745
  hunk[0] = (char)(a << 2 | b >> 4);
@@ -1928,10 +749,10 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1928
749
  ptr += mlen;
1929
750
  len -= mlen;
1930
751
  }
1931
- if (*s == '\r') s++;
1932
- if (*s == '\n') s++;
1933
- else if (s < send && (s+1 == send || s[1] == '\n'))
1934
- s += 2; /* possible checksum byte */
752
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
753
+ s++; /* possible checksum byte */
754
+ if (s < send && *s == '\r') s++;
755
+ if (s < send && *s == '\n') s++;
1935
756
  }
1936
757
 
1937
758
  rb_str_set_len(buf, total);
@@ -2078,18 +899,19 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2078
899
  s += sizeof(char *);
2079
900
 
2080
901
  if (t) {
2081
- VALUE a, *p, *pend;
902
+ VALUE a;
903
+ const VALUE *p, *pend;
2082
904
 
2083
- if (!(a = rb_str_associated(str))) {
905
+ if (!(a = str_associated(str))) {
2084
906
  rb_raise(rb_eArgError, "no associated pointer");
2085
907
  }
2086
- p = RARRAY_PTR(a);
908
+ p = RARRAY_CONST_PTR(a);
2087
909
  pend = p + RARRAY_LEN(a);
2088
910
  while (p < pend) {
2089
911
  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2090
912
  if (len < RSTRING_LEN(*p)) {
2091
913
  tmp = rb_tainted_str_new(t, len);
2092
- rb_str_associate(tmp, a);
914
+ str_associate(tmp, a);
2093
915
  }
2094
916
  else {
2095
917
  tmp = *p;
@@ -2120,12 +942,13 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2120
942
  s += sizeof(char *);
2121
943
 
2122
944
  if (t) {
2123
- VALUE a, *p, *pend;
945
+ VALUE a;
946
+ const VALUE *p, *pend;
2124
947
 
2125
- if (!(a = rb_str_associated(str))) {
948
+ if (!(a = str_associated(str))) {
2126
949
  rb_raise(rb_eArgError, "no associated pointer");
2127
950
  }
2128
- p = RARRAY_PTR(a);
951
+ p = RARRAY_CONST_PTR(a);
2129
952
  pend = p + RARRAY_LEN(a);
2130
953
  while (p < pend) {
2131
954
  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
@@ -2145,32 +968,18 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2145
968
 
2146
969
  case 'w':
2147
970
  {
2148
- unsigned long ul = 0;
2149
- unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2150
-
2151
- while (len > 0 && s < send) {
2152
- ul <<= 7;
2153
- ul |= (*s & 0x7f);
2154
- if (!(*s++ & 0x80)) {
2155
- UNPACK_PUSH(ULONG2NUM(ul));
2156
- len--;
2157
- ul = 0;
2158
- }
2159
- else if (ul & ulmask) {
2160
- VALUE big = rb_uint2big(ul);
2161
- VALUE big128 = rb_uint2big(128);
2162
- while (s < send) {
2163
- big = rb_big_mul(big, big128);
2164
- big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2165
- if (!(*s++ & 0x80)) {
2166
- UNPACK_PUSH(big);
2167
- len--;
2168
- ul = 0;
2169
- break;
2170
- }
2171
- }
2172
- }
2173
- }
971
+ char *s0 = s;
972
+ while (len > 0 && s < send) {
973
+ if (*s & 0x80) {
974
+ s++;
975
+ }
976
+ else {
977
+ s++;
978
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
979
+ len--;
980
+ s0 = s;
981
+ }
982
+ }
2174
983
  }
2175
984
  break;
2176
985
 
@@ -2185,8 +994,6 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2185
994
  return ary;
2186
995
  }
2187
996
 
2188
- #define BYTEWIDTH 8
2189
-
2190
997
  int
2191
998
  rb_uv_to_utf8(char buf[6], unsigned long uv)
2192
999
  {