zscan 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/ext/pack/pack.c CHANGED
@@ -9,8 +9,6 @@
9
9
 
10
10
  **********************************************************************/
11
11
 
12
- #include "ruby/ruby.h"
13
- #include "ruby/encoding.h"
14
12
  #include "internal.h"
15
13
  #include <sys/types.h>
16
14
  #include <ctype.h>
@@ -25,11 +23,11 @@
25
23
  * This behavior is consistent with the document of pack/unpack.
26
24
  */
27
25
  #ifdef HAVE_TRUE_LONG_LONG
28
- static const char natstr[] = "sSiIlLqQ";
26
+ static const char natstr[] = "sSiIlLqQjJ";
29
27
  #else
30
- static const char natstr[] = "sSiIlL";
28
+ static const char natstr[] = "sSiIlLjJ";
31
29
  #endif
32
- static const char endstr[] = "sSiIlLqQ";
30
+ static const char endstr[] = "sSiIlLqQjJ";
33
31
 
34
32
  #ifdef HAVE_TRUE_LONG_LONG
35
33
  /* It is intentional to use long long instead of LONG_LONG. */
@@ -70,129 +68,18 @@ static const char endstr[] = "sSiIlLqQ";
70
68
  # define NATINT_LEN(type,len) ((int)sizeof(type))
71
69
  #endif
72
70
 
73
- #if SIZEOF_LONG == 8
74
- # define INT64toNUM(x) LONG2NUM(x)
75
- # define UINT64toNUM(x) ULONG2NUM(x)
76
- #elif defined(HAVE_LONG_LONG) && SIZEOF_LONG_LONG == 8
77
- # define INT64toNUM(x) LL2NUM(x)
78
- # define UINT64toNUM(x) ULL2NUM(x)
79
- #endif
80
-
81
- #define define_swapx(x, xtype) \
82
- static xtype \
83
- TOKEN_PASTE(swap,x)(xtype z) \
84
- { \
85
- xtype r; \
86
- xtype *zp; \
87
- unsigned char *s, *t; \
88
- int i; \
89
- \
90
- zp = xmalloc(sizeof(xtype)); \
91
- *zp = z; \
92
- s = (unsigned char*)zp; \
93
- t = xmalloc(sizeof(xtype)); \
94
- for (i=0; i<sizeof(xtype); i++) { \
95
- t[sizeof(xtype)-i-1] = s[i]; \
96
- } \
97
- r = *(xtype *)t; \
98
- xfree(t); \
99
- xfree(zp); \
100
- return r; \
101
- }
102
-
103
- #if GCC_VERSION_SINCE(4,3,0)
104
- # define swap32(x) __builtin_bswap32(x)
105
- # define swap64(x) __builtin_bswap64(x)
106
- #endif
107
-
108
- #ifndef swap16
109
- # define swap16(x) ((uint16_t)((((x)&0xFF)<<8) | (((x)>>8)&0xFF)))
110
- #endif
111
-
112
- #ifndef swap32
113
- # define swap32(x) ((uint32_t)((((x)&0xFF)<<24) \
114
- |(((x)>>24)&0xFF) \
115
- |(((x)&0x0000FF00)<<8) \
116
- |(((x)&0x00FF0000)>>8) ))
117
- #endif
118
-
119
- #ifndef swap64
120
- # ifdef HAVE_INT64_T
121
- # define byte_in_64bit(n) ((uint64_t)0xff << (n))
122
- # define swap64(x) ((uint64_t)((((x)&byte_in_64bit(0))<<56) \
123
- |(((x)>>56)&0xFF) \
124
- |(((x)&byte_in_64bit(8))<<40) \
125
- |(((x)&byte_in_64bit(48))>>40) \
126
- |(((x)&byte_in_64bit(16))<<24) \
127
- |(((x)&byte_in_64bit(40))>>24) \
128
- |(((x)&byte_in_64bit(24))<<8) \
129
- |(((x)&byte_in_64bit(32))>>8)))
130
- # endif
131
- #endif
132
-
133
- #if SIZEOF_SHORT == 2
134
- # define swaps(x) swap16(x)
135
- #elif SIZEOF_SHORT == 4
136
- # define swaps(x) swap32(x)
137
- #else
138
- define_swapx(s,short)
139
- #endif
140
-
141
- #if SIZEOF_INT == 2
142
- # define swapi(x) swap16(x)
143
- #elif SIZEOF_INT == 4
144
- # define swapi(x) swap32(x)
145
- #else
146
- define_swapx(i,int)
147
- #endif
148
-
149
- #if SIZEOF_LONG == 4
150
- # define swapl(x) swap32(x)
151
- #elif SIZEOF_LONG == 8
152
- # define swapl(x) swap64(x)
153
- #else
154
- define_swapx(l,long)
155
- #endif
156
-
157
- #ifdef HAVE_LONG_LONG
158
- # if SIZEOF_LONG_LONG == 8
159
- # define swapll(x) swap64(x)
160
- # else
161
- define_swapx(ll,LONG_LONG)
162
- # endif
163
- #endif
164
-
165
- #if SIZEOF_FLOAT == 4 && defined(HAVE_INT32_T)
166
- # define swapf(x) swap32(x)
167
- # define FLOAT_SWAPPER uint32_t
168
- #else
169
- define_swapx(f,float)
170
- #endif
171
-
172
- #if SIZEOF_DOUBLE == 8 && defined(HAVE_INT64_T)
173
- # define swapd(x) swap64(x)
174
- # define DOUBLE_SWAPPER uint64_t
175
- #elif SIZEOF_DOUBLE == 8 && defined(HAVE_INT32_T)
176
- static double
177
- swapd(const double d)
178
- {
179
- double dtmp = d;
180
- uint32_t utmp[2];
181
- uint32_t utmp0;
182
-
183
- utmp[0] = 0; utmp[1] = 0;
184
- memcpy(utmp,&dtmp,sizeof(double));
185
- utmp0 = utmp[0];
186
- utmp[0] = swap32(utmp[1]);
187
- utmp[1] = swap32(utmp0);
188
- memcpy(&dtmp,utmp,sizeof(double));
189
- return dtmp;
190
- }
191
- #else
192
- define_swapx(d, double)
193
- #endif
194
-
195
- #undef define_swapx
71
+ typedef union {
72
+ float f;
73
+ uint32_t u;
74
+ char buf[4];
75
+ } FLOAT_SWAPPER;
76
+ typedef union {
77
+ double d;
78
+ uint64_t u;
79
+ char buf[8];
80
+ } DOUBLE_SWAPPER;
81
+ #define swapf(x) swap32(x)
82
+ #define swapd(x) swap64(x)
196
83
 
197
84
  #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
198
85
  #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
@@ -203,74 +90,19 @@ TOKEN_PASTE(swap,x)(xtype z) \
203
90
  #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
204
91
  #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
205
92
 
206
- #ifdef FLOAT_SWAPPER
207
- # define FLOAT_CONVWITH(y) FLOAT_SWAPPER y;
208
- # define HTONF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
209
- (y) = rb_htonf((FLOAT_SWAPPER)(y)), \
210
- memcpy(&(x),&(y),sizeof(float)), \
211
- (x))
212
- # define HTOVF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
213
- (y) = rb_htovf((FLOAT_SWAPPER)(y)), \
214
- memcpy(&(x),&(y),sizeof(float)), \
215
- (x))
216
- # define NTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
217
- (y) = rb_ntohf((FLOAT_SWAPPER)(y)), \
218
- memcpy(&(x),&(y),sizeof(float)), \
219
- (x))
220
- # define VTOHF(x,y) (memcpy(&(y),&(x),sizeof(float)), \
221
- (y) = rb_vtohf((FLOAT_SWAPPER)(y)), \
222
- memcpy(&(x),&(y),sizeof(float)), \
223
- (x))
224
- #else
225
- # define FLOAT_CONVWITH(y)
226
- # define HTONF(x,y) rb_htonf(x)
227
- # define HTOVF(x,y) rb_htovf(x)
228
- # define NTOHF(x,y) rb_ntohf(x)
229
- # define VTOHF(x,y) rb_vtohf(x)
230
- #endif
93
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
94
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
95
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
96
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
97
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
231
98
 
232
- #ifdef DOUBLE_SWAPPER
233
- # define DOUBLE_CONVWITH(y) DOUBLE_SWAPPER y;
234
- # define HTOND(x,y) (memcpy(&(y),&(x),sizeof(double)), \
235
- (y) = rb_htond((DOUBLE_SWAPPER)(y)), \
236
- memcpy(&(x),&(y),sizeof(double)), \
237
- (x))
238
- # define HTOVD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
239
- (y) = rb_htovd((DOUBLE_SWAPPER)(y)), \
240
- memcpy(&(x),&(y),sizeof(double)), \
241
- (x))
242
- # define NTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
243
- (y) = rb_ntohd((DOUBLE_SWAPPER)(y)), \
244
- memcpy(&(x),&(y),sizeof(double)), \
245
- (x))
246
- # define VTOHD(x,y) (memcpy(&(y),&(x),sizeof(double)), \
247
- (y) = rb_vtohd((DOUBLE_SWAPPER)(y)), \
248
- memcpy(&(x),&(y),sizeof(double)), \
249
- (x))
250
- #else
251
- # define DOUBLE_CONVWITH(y)
252
- # define HTOND(x,y) rb_htond(x)
253
- # define HTOVD(x,y) rb_htovd(x)
254
- # define NTOHD(x,y) rb_ntohd(x)
255
- # define VTOHD(x,y) rb_vtohd(x)
256
- #endif
257
-
258
- static unsigned long
259
- num2i32(VALUE x)
260
- {
261
- x = rb_to_int(x); /* is nil OK? (should not) */
262
-
263
- if (FIXNUM_P(x)) return FIX2LONG(x);
264
- if (RB_TYPE_P(x, T_BIGNUM)) {
265
- return rb_big2ulong_pack(x);
266
- }
267
- rb_raise(rb_eTypeError, "can't convert %s to `integer'", rb_obj_classname(x));
268
-
269
- UNREACHABLE;
270
- }
99
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
100
+ #define HTOND(x) ((x).u = rb_htond((x).u))
101
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
102
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
103
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
271
104
 
272
105
  #define MAX_INTEGER_PACK_SIZE 8
273
- /* #define FORCE_BIG_PACK */
274
106
 
275
107
  static const char toofew[] = "too few arguments";
276
108
 
@@ -279,802 +111,19 @@ static void qpencode(VALUE,VALUE,long);
279
111
 
280
112
  static unsigned long utf8_to_uv(const char*,long*);
281
113
 
282
- /*
283
- * call-seq:
284
- * arr.pack ( aTemplateString ) -> aBinaryString
285
- *
286
- * Packs the contents of <i>arr</i> into a binary sequence according to
287
- * the directives in <i>aTemplateString</i> (see the table below)
288
- * Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
289
- * which gives the width of the resulting field. The remaining
290
- * directives also may take a count, indicating the number of array
291
- * elements to convert. If the count is an asterisk
292
- * (``<code>*</code>''), all remaining array elements will be
293
- * converted. Any of the directives ``<code>sSiIlL</code>'' may be
294
- * followed by an underscore (``<code>_</code>'') or
295
- * exclamation mark (``<code>!</code>'') to use the underlying
296
- * platform's native size for the specified type; otherwise, they use a
297
- * platform-independent size. Spaces are ignored in the template
298
- * string. See also <code>String#unpack</code>.
299
- *
300
- * a = [ "a", "b", "c" ]
301
- * n = [ 65, 66, 67 ]
302
- * a.pack("A3A3A3") #=> "a b c "
303
- * a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
304
- * n.pack("ccc") #=> "ABC"
305
- *
306
- * Directives for +pack+.
307
- *
308
- * Integer | Array |
309
- * Directive | Element | Meaning
310
- * ---------------------------------------------------------------------------
311
- * C | Integer | 8-bit unsigned (unsigned char)
312
- * S | Integer | 16-bit unsigned, native endian (uint16_t)
313
- * L | Integer | 32-bit unsigned, native endian (uint32_t)
314
- * Q | Integer | 64-bit unsigned, native endian (uint64_t)
315
- * | |
316
- * c | Integer | 8-bit signed (signed char)
317
- * s | Integer | 16-bit signed, native endian (int16_t)
318
- * l | Integer | 32-bit signed, native endian (int32_t)
319
- * q | Integer | 64-bit signed, native endian (int64_t)
320
- * | |
321
- * S_, S! | Integer | unsigned short, native endian
322
- * I, I_, I! | Integer | unsigned int, native endian
323
- * L_, L! | Integer | unsigned long, native endian
324
- * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
325
- * | | if the platform has no long long type.)
326
- * | | (Q_ and Q! is available since Ruby 2.1.)
327
- * | |
328
- * s_, s! | Integer | signed short, native endian
329
- * i, i_, i! | Integer | signed int, native endian
330
- * l_, l! | Integer | signed long, native endian
331
- * q_, q! | Integer | signed long long, native endian (ArgumentError
332
- * | | if the platform has no long long type.)
333
- * | | (q_ and q! is available since Ruby 2.1.)
334
- * | |
335
- * S> L> Q> | Integer | same as the directives without ">" except
336
- * s> l> q> | | big endian
337
- * S!> I!> | | (available since Ruby 1.9.3)
338
- * L!> Q!> | | "S>" is same as "n"
339
- * s!> i!> | | "L>" is same as "N"
340
- * l!> q!> | |
341
- * | |
342
- * S< L< Q< | Integer | same as the directives without "<" except
343
- * s< l< q< | | little endian
344
- * S!< I!< | | (available since Ruby 1.9.3)
345
- * L!< Q!< | | "S<" is same as "v"
346
- * s!< i!< | | "L<" is same as "V"
347
- * l!< q!< | |
348
- * | |
349
- * n | Integer | 16-bit unsigned, network (big-endian) byte order
350
- * N | Integer | 32-bit unsigned, network (big-endian) byte order
351
- * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
352
- * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
353
- * | |
354
- * U | Integer | UTF-8 character
355
- * w | Integer | BER-compressed integer
356
- *
357
- * Float | |
358
- * Directive | | Meaning
359
- * ---------------------------------------------------------------------------
360
- * D, d | Float | double-precision, native format
361
- * F, f | Float | single-precision, native format
362
- * E | Float | double-precision, little-endian byte order
363
- * e | Float | single-precision, little-endian byte order
364
- * G | Float | double-precision, network (big-endian) byte order
365
- * g | Float | single-precision, network (big-endian) byte order
366
- *
367
- * String | |
368
- * Directive | | Meaning
369
- * ---------------------------------------------------------------------------
370
- * A | String | arbitrary binary string (space padded, count is width)
371
- * a | String | arbitrary binary string (null padded, count is width)
372
- * Z | String | same as ``a'', except that null is added with *
373
- * B | String | bit string (MSB first)
374
- * b | String | bit string (LSB first)
375
- * H | String | hex string (high nibble first)
376
- * h | String | hex string (low nibble first)
377
- * u | String | UU-encoded string
378
- * M | String | quoted printable, MIME encoding (see RFC2045)
379
- * m | String | base64 encoded string (see RFC 2045, count is width)
380
- * | | (if count is 0, no line feed are added, see RFC 4648)
381
- * P | String | pointer to a structure (fixed-length string)
382
- * p | String | pointer to a null-terminated string
383
- *
384
- * Misc. | |
385
- * Directive | | Meaning
386
- * ---------------------------------------------------------------------------
387
- * @ | --- | moves to absolute position
388
- * X | --- | back up a byte
389
- * x | --- | null byte
390
- */
114
+ static ID id_associated;
391
115
 
392
- __attribute__ ((unused))
393
- static VALUE
394
- pack_pack(VALUE ary, VALUE fmt)
116
+ static void
117
+ str_associate(VALUE str, VALUE add)
395
118
  {
396
- static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
397
- static const char spc10[] = " ";
398
- const char *p, *pend;
399
- VALUE res, from, associates = 0;
400
- char type;
401
- long items, len, idx, plen;
402
- const char *ptr;
403
- int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
404
- #ifdef NATINT_PACK
405
- int natint; /* native integer */
406
- #endif
407
- int integer_size, bigendian_p;
408
-
409
- StringValue(fmt);
410
- p = RSTRING_PTR(fmt);
411
- pend = p + RSTRING_LEN(fmt);
412
- res = rb_str_buf_new(0);
413
-
414
- items = RARRAY_LEN(ary);
415
- idx = 0;
416
-
417
- #define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
418
- #define THISFROM (items > 0 ? RARRAY_PTR(ary)[idx] : TOO_FEW)
419
- #define NEXTFROM (items-- > 0 ? RARRAY_PTR(ary)[idx++] : TOO_FEW)
420
-
421
- while (p < pend) {
422
- int explicit_endian = 0;
423
- if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
424
- rb_raise(rb_eRuntimeError, "format string modified");
425
- }
426
- type = *p++; /* get data type */
427
- #ifdef NATINT_PACK
428
- natint = 0;
429
- #endif
430
-
431
- if (ISSPACE(type)) continue;
432
- if (type == '#') {
433
- while ((p < pend) && (*p != '\n')) {
434
- p++;
435
- }
436
- continue;
437
- }
438
-
439
- {
440
- modifiers:
441
- switch (*p) {
442
- case '_':
443
- case '!':
444
- if (strchr(natstr, type)) {
445
- #ifdef NATINT_PACK
446
- natint = 1;
447
- #endif
448
- p++;
449
- }
450
- else {
451
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
452
- }
453
- goto modifiers;
454
-
455
- case '<':
456
- case '>':
457
- if (!strchr(endstr, type)) {
458
- rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
459
- }
460
- if (explicit_endian) {
461
- rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
462
- }
463
- explicit_endian = *p++;
464
- goto modifiers;
465
- }
466
- }
467
-
468
- if (*p == '*') { /* set data length */
469
- len = strchr("@Xxu", type) ? 0
470
- : strchr("PMm", type) ? 1
471
- : items;
472
- p++;
473
- }
474
- else if (ISDIGIT(*p)) {
475
- errno = 0;
476
- len = STRTOUL(p, (char**)&p, 10);
477
- if (errno) {
478
- rb_raise(rb_eRangeError, "pack length too big");
479
- }
480
- }
481
- else {
482
- len = 1;
483
- }
484
-
485
- switch (type) {
486
- case 'U':
487
- /* if encoding is US-ASCII, upgrade to UTF-8 */
488
- if (enc_info == 1) enc_info = 2;
489
- break;
490
- case 'm': case 'M': case 'u':
491
- /* keep US-ASCII (do nothing) */
492
- break;
493
- default:
494
- /* fall back to BINARY */
495
- enc_info = 0;
496
- break;
497
- }
498
- switch (type) {
499
- case 'A': case 'a': case 'Z':
500
- case 'B': case 'b':
501
- case 'H': case 'h':
502
- from = NEXTFROM;
503
- if (NIL_P(from)) {
504
- ptr = "";
505
- plen = 0;
506
- }
507
- else {
508
- StringValue(from);
509
- ptr = RSTRING_PTR(from);
510
- plen = RSTRING_LEN(from);
511
- OBJ_INFECT(res, from);
512
- }
513
-
514
- if (p[-1] == '*')
515
- len = plen;
516
-
517
- switch (type) {
518
- case 'a': /* arbitrary binary string (null padded) */
519
- case 'A': /* arbitrary binary string (ASCII space padded) */
520
- case 'Z': /* null terminated string */
521
- if (plen >= len) {
522
- rb_str_buf_cat(res, ptr, len);
523
- if (p[-1] == '*' && type == 'Z')
524
- rb_str_buf_cat(res, nul10, 1);
525
- }
526
- else {
527
- rb_str_buf_cat(res, ptr, plen);
528
- len -= plen;
529
- while (len >= 10) {
530
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
531
- len -= 10;
532
- }
533
- rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
534
- }
535
- break;
536
-
537
- #define castchar(from) (char)((from) & 0xff)
538
-
539
- case 'b': /* bit string (ascending) */
540
- {
541
- int byte = 0;
542
- long i, j = 0;
543
-
544
- if (len > plen) {
545
- j = (len - plen + 1)/2;
546
- len = plen;
547
- }
548
- for (i=0; i++ < len; ptr++) {
549
- if (*ptr & 1)
550
- byte |= 128;
551
- if (i & 7)
552
- byte >>= 1;
553
- else {
554
- char c = castchar(byte);
555
- rb_str_buf_cat(res, &c, 1);
556
- byte = 0;
557
- }
558
- }
559
- if (len & 7) {
560
- char c;
561
- byte >>= 7 - (len & 7);
562
- c = castchar(byte);
563
- rb_str_buf_cat(res, &c, 1);
564
- }
565
- len = j;
566
- goto grow;
567
- }
568
- break;
569
-
570
- case 'B': /* bit string (descending) */
571
- {
572
- int byte = 0;
573
- long i, j = 0;
574
-
575
- if (len > plen) {
576
- j = (len - plen + 1)/2;
577
- len = plen;
578
- }
579
- for (i=0; i++ < len; ptr++) {
580
- byte |= *ptr & 1;
581
- if (i & 7)
582
- byte <<= 1;
583
- else {
584
- char c = castchar(byte);
585
- rb_str_buf_cat(res, &c, 1);
586
- byte = 0;
587
- }
588
- }
589
- if (len & 7) {
590
- char c;
591
- byte <<= 7 - (len & 7);
592
- c = castchar(byte);
593
- rb_str_buf_cat(res, &c, 1);
594
- }
595
- len = j;
596
- goto grow;
597
- }
598
- break;
599
-
600
- case 'h': /* hex string (low nibble first) */
601
- {
602
- int byte = 0;
603
- long i, j = 0;
604
-
605
- if (len > plen) {
606
- j = (len + 1) / 2 - (plen + 1) / 2;
607
- len = plen;
608
- }
609
- for (i=0; i++ < len; ptr++) {
610
- if (ISALPHA(*ptr))
611
- byte |= (((*ptr & 15) + 9) & 15) << 4;
612
- else
613
- byte |= (*ptr & 15) << 4;
614
- if (i & 1)
615
- byte >>= 4;
616
- else {
617
- char c = castchar(byte);
618
- rb_str_buf_cat(res, &c, 1);
619
- byte = 0;
620
- }
621
- }
622
- if (len & 1) {
623
- char c = castchar(byte);
624
- rb_str_buf_cat(res, &c, 1);
625
- }
626
- len = j;
627
- goto grow;
628
- }
629
- break;
630
-
631
- case 'H': /* hex string (high nibble first) */
632
- {
633
- int byte = 0;
634
- long i, j = 0;
635
-
636
- if (len > plen) {
637
- j = (len + 1) / 2 - (plen + 1) / 2;
638
- len = plen;
639
- }
640
- for (i=0; i++ < len; ptr++) {
641
- if (ISALPHA(*ptr))
642
- byte |= ((*ptr & 15) + 9) & 15;
643
- else
644
- byte |= *ptr & 15;
645
- if (i & 1)
646
- byte <<= 4;
647
- else {
648
- char c = castchar(byte);
649
- rb_str_buf_cat(res, &c, 1);
650
- byte = 0;
651
- }
652
- }
653
- if (len & 1) {
654
- char c = castchar(byte);
655
- rb_str_buf_cat(res, &c, 1);
656
- }
657
- len = j;
658
- goto grow;
659
- }
660
- break;
661
- }
662
- break;
663
-
664
- case 'c': /* signed char */
665
- case 'C': /* unsigned char */
666
- while (len-- > 0) {
667
- char c;
668
-
669
- from = NEXTFROM;
670
- c = (char)num2i32(from);
671
- rb_str_buf_cat(res, &c, sizeof(char));
672
- }
673
- break;
674
-
675
- case 's': /* s for int16_t, s! for signed short */
676
- integer_size = NATINT_LEN(short, 2);
677
- bigendian_p = BIGENDIAN_P();
678
- goto pack_integer;
679
-
680
- case 'S': /* S for uint16_t, S! for unsigned short */
681
- integer_size = NATINT_LEN(short, 2);
682
- bigendian_p = BIGENDIAN_P();
683
- goto pack_integer;
684
-
685
- case 'i': /* i and i! for signed int */
686
- integer_size = (int)sizeof(int);
687
- bigendian_p = BIGENDIAN_P();
688
- goto pack_integer;
689
-
690
- case 'I': /* I and I! for unsigned int */
691
- integer_size = (int)sizeof(int);
692
- bigendian_p = BIGENDIAN_P();
693
- goto pack_integer;
694
-
695
- case 'l': /* l for int32_t, l! for signed long */
696
- integer_size = NATINT_LEN(long, 4);
697
- bigendian_p = BIGENDIAN_P();
698
- goto pack_integer;
699
-
700
- case 'L': /* L for uint32_t, L! for unsigned long */
701
- integer_size = NATINT_LEN(long, 4);
702
- bigendian_p = BIGENDIAN_P();
703
- goto pack_integer;
704
-
705
- case 'q': /* q for int64_t, q! for signed long long */
706
- integer_size = NATINT_LEN_Q;
707
- bigendian_p = BIGENDIAN_P();
708
- goto pack_integer;
709
-
710
- case 'Q': /* Q for uint64_t, Q! for unsigned long long */
711
- integer_size = NATINT_LEN_Q;
712
- bigendian_p = BIGENDIAN_P();
713
- goto pack_integer;
714
-
715
- case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
716
- integer_size = 2;
717
- bigendian_p = 1;
718
- goto pack_integer;
719
-
720
- case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
721
- integer_size = 4;
722
- bigendian_p = 1;
723
- goto pack_integer;
724
-
725
- case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
726
- integer_size = 2;
727
- bigendian_p = 0;
728
- goto pack_integer;
729
-
730
- case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
731
- integer_size = 4;
732
- bigendian_p = 0;
733
- goto pack_integer;
734
-
735
- pack_integer:
736
- if (explicit_endian) {
737
- bigendian_p = explicit_endian == '>';
738
- }
739
-
740
- switch (integer_size) {
741
- #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
742
- case SIZEOF_INT16_T:
743
- while (len-- > 0) {
744
- union {
745
- int16_t i;
746
- char a[sizeof(int16_t)];
747
- } v;
748
-
749
- from = NEXTFROM;
750
- v.i = (int16_t)num2i32(from);
751
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
752
- rb_str_buf_cat(res, v.a, sizeof(int16_t));
753
- }
754
- break;
755
- #endif
756
-
757
- #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
758
- case SIZEOF_INT32_T:
759
- while (len-- > 0) {
760
- union {
761
- int32_t i;
762
- char a[sizeof(int32_t)];
763
- } v;
764
-
765
- from = NEXTFROM;
766
- v.i = (int32_t)num2i32(from);
767
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
768
- rb_str_buf_cat(res, v.a, sizeof(int32_t));
769
- }
770
- break;
771
- #endif
772
-
773
- #if defined(HAVE_INT64_T) && SIZEOF_LONG == SIZEOF_INT64_T && !defined(FORCE_BIG_PACK)
774
- case SIZEOF_INT64_T:
775
- while (len-- > 0) {
776
- union {
777
- int64_t i;
778
- char a[sizeof(int64_t)];
779
- } v;
780
-
781
- from = NEXTFROM;
782
- v.i = num2i32(from); /* can return 64bit value if SIZEOF_LONG == SIZEOF_INT64_T */
783
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
784
- rb_str_buf_cat(res, v.a, sizeof(int64_t));
785
- }
786
- break;
787
- #endif
788
-
789
- default:
790
- if (integer_size > MAX_INTEGER_PACK_SIZE)
791
- rb_bug("unexpected intger size for pack: %d", integer_size);
792
- while (len-- > 0) {
793
- union {
794
- unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG];
795
- char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG-1)/SIZEOF_LONG*SIZEOF_LONG];
796
- } v;
797
- int num_longs = (integer_size+SIZEOF_LONG-1)/SIZEOF_LONG;
798
- int i;
799
-
800
- from = NEXTFROM;
801
- rb_big_pack(from, v.i, num_longs);
802
- if (bigendian_p) {
803
- for (i = 0; i < num_longs/2; i++) {
804
- unsigned long t = v.i[i];
805
- v.i[i] = v.i[num_longs-1-i];
806
- v.i[num_longs-1-i] = t;
807
- }
808
- }
809
- if (bigendian_p != BIGENDIAN_P()) {
810
- for (i = 0; i < num_longs; i++)
811
- v.i[i] = swapl(v.i[i]);
812
- }
813
- rb_str_buf_cat(res,
814
- bigendian_p ?
815
- v.a + sizeof(long)*num_longs - integer_size :
816
- v.a,
817
- integer_size);
818
- }
819
- break;
820
- }
821
- break;
822
-
823
- case 'f': /* single precision float in native format */
824
- case 'F': /* ditto */
825
- while (len-- > 0) {
826
- float f;
827
-
828
- from = NEXTFROM;
829
- f = (float)RFLOAT_VALUE(rb_to_float(from));
830
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
831
- }
832
- break;
833
-
834
- case 'e': /* single precision float in VAX byte-order */
835
- while (len-- > 0) {
836
- float f;
837
- FLOAT_CONVWITH(ftmp);
838
-
839
- from = NEXTFROM;
840
- f = (float)RFLOAT_VALUE(rb_to_float(from));
841
- f = HTOVF(f,ftmp);
842
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
843
- }
844
- break;
845
-
846
- case 'E': /* double precision float in VAX byte-order */
847
- while (len-- > 0) {
848
- double d;
849
- DOUBLE_CONVWITH(dtmp);
850
-
851
- from = NEXTFROM;
852
- d = RFLOAT_VALUE(rb_to_float(from));
853
- d = HTOVD(d,dtmp);
854
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
855
- }
856
- break;
857
-
858
- case 'd': /* double precision float in native format */
859
- case 'D': /* ditto */
860
- while (len-- > 0) {
861
- double d;
862
-
863
- from = NEXTFROM;
864
- d = RFLOAT_VALUE(rb_to_float(from));
865
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
866
- }
867
- break;
868
-
869
- case 'g': /* single precision float in network byte-order */
870
- while (len-- > 0) {
871
- float f;
872
- FLOAT_CONVWITH(ftmp);
873
-
874
- from = NEXTFROM;
875
- f = (float)RFLOAT_VALUE(rb_to_float(from));
876
- f = HTONF(f,ftmp);
877
- rb_str_buf_cat(res, (char*)&f, sizeof(float));
878
- }
879
- break;
880
-
881
- case 'G': /* double precision float in network byte-order */
882
- while (len-- > 0) {
883
- double d;
884
- DOUBLE_CONVWITH(dtmp);
885
-
886
- from = NEXTFROM;
887
- d = RFLOAT_VALUE(rb_to_float(from));
888
- d = HTOND(d,dtmp);
889
- rb_str_buf_cat(res, (char*)&d, sizeof(double));
890
- }
891
- break;
892
-
893
- case 'x': /* null byte */
894
- grow:
895
- while (len >= 10) {
896
- rb_str_buf_cat(res, nul10, 10);
897
- len -= 10;
898
- }
899
- rb_str_buf_cat(res, nul10, len);
900
- break;
901
-
902
- case 'X': /* back up byte */
903
- shrink:
904
- plen = RSTRING_LEN(res);
905
- if (plen < len)
906
- rb_raise(rb_eArgError, "X outside of string");
907
- rb_str_set_len(res, plen - len);
908
- break;
909
-
910
- case '@': /* null fill to absolute position */
911
- len -= RSTRING_LEN(res);
912
- if (len > 0) goto grow;
913
- len = -len;
914
- if (len > 0) goto shrink;
915
- break;
916
-
917
- case '%':
918
- rb_raise(rb_eArgError, "%% is not supported");
919
- break;
920
-
921
- case 'U': /* Unicode character */
922
- while (len-- > 0) {
923
- SIGNED_VALUE l;
924
- char buf[8];
925
- int le;
926
-
927
- from = NEXTFROM;
928
- from = rb_to_int(from);
929
- l = NUM2LONG(from);
930
- if (l < 0) {
931
- rb_raise(rb_eRangeError, "pack(U): value out of range");
932
- }
933
- le = rb_uv_to_utf8(buf, l);
934
- rb_str_buf_cat(res, (char*)buf, le);
935
- }
936
- break;
937
-
938
- case 'u': /* uuencoded string */
939
- case 'm': /* base64 encoded string */
940
- from = NEXTFROM;
941
- StringValue(from);
942
- ptr = RSTRING_PTR(from);
943
- plen = RSTRING_LEN(from);
944
-
945
- if (len == 0 && type == 'm') {
946
- encodes(res, ptr, plen, type, 0);
947
- ptr += plen;
948
- break;
949
- }
950
- if (len <= 2)
951
- len = 45;
952
- else if (len > 63 && type == 'u')
953
- len = 63;
954
- else
955
- len = len / 3 * 3;
956
- while (plen > 0) {
957
- long todo;
958
-
959
- if (plen > len)
960
- todo = len;
961
- else
962
- todo = plen;
963
- encodes(res, ptr, todo, type, 1);
964
- plen -= todo;
965
- ptr += todo;
966
- }
967
- break;
968
-
969
- case 'M': /* quoted-printable encoded string */
970
- from = rb_obj_as_string(NEXTFROM);
971
- if (len <= 1)
972
- len = 72;
973
- qpencode(res, from, len);
974
- break;
975
-
976
- case 'P': /* pointer to packed byte string */
977
- from = THISFROM;
978
- if (!NIL_P(from)) {
979
- StringValue(from);
980
- if (RSTRING_LEN(from) < len) {
981
- rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
982
- RSTRING_LEN(from), len);
983
- }
984
- }
985
- len = 1;
986
- /* FALL THROUGH */
987
- case 'p': /* pointer to string */
988
- while (len-- > 0) {
989
- char *t;
990
- from = NEXTFROM;
991
- if (NIL_P(from)) {
992
- t = 0;
993
- }
994
- else {
995
- t = StringValuePtr(from);
996
- }
997
- if (!associates) {
998
- associates = rb_ary_new();
999
- }
1000
- rb_ary_push(associates, from);
1001
- rb_obj_taint(from);
1002
- rb_str_buf_cat(res, (char*)&t, sizeof(char*));
1003
- }
1004
- break;
1005
-
1006
- case 'w': /* BER compressed integer */
1007
- while (len-- > 0) {
1008
- unsigned long ul;
1009
- VALUE buf = rb_str_new(0, 0);
1010
- char c, *bufs, *bufe;
1011
-
1012
- from = NEXTFROM;
1013
- if (RB_TYPE_P(from, T_BIGNUM)) {
1014
- VALUE big128 = rb_uint2big(128);
1015
- while (RB_TYPE_P(from, T_BIGNUM)) {
1016
- from = rb_big_divmod(from, big128);
1017
- c = castchar(NUM2INT(RARRAY_PTR(from)[1]) | 0x80); /* mod */
1018
- rb_str_buf_cat(buf, &c, sizeof(char));
1019
- from = RARRAY_PTR(from)[0]; /* div */
1020
- }
1021
- }
1022
-
1023
- {
1024
- long l = NUM2LONG(from);
1025
- if (l < 0) {
1026
- rb_raise(rb_eArgError, "can't compress negative numbers");
1027
- }
1028
- ul = l;
1029
- }
1030
-
1031
- while (ul) {
1032
- c = castchar((ul & 0x7f) | 0x80);
1033
- rb_str_buf_cat(buf, &c, sizeof(char));
1034
- ul >>= 7;
1035
- }
1036
-
1037
- if (RSTRING_LEN(buf)) {
1038
- bufs = RSTRING_PTR(buf);
1039
- bufe = bufs + RSTRING_LEN(buf) - 1;
1040
- *bufs &= 0x7f; /* clear continue bit */
1041
- while (bufs < bufe) { /* reverse */
1042
- c = *bufs;
1043
- *bufs++ = *bufe;
1044
- *bufe-- = c;
1045
- }
1046
- rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
1047
- }
1048
- else {
1049
- c = 0;
1050
- rb_str_buf_cat(res, &c, sizeof(char));
1051
- }
1052
- }
1053
- break;
1054
-
1055
- default:
1056
- rb_warning("unknown pack directive '%c' in '%s'",
1057
- type, RSTRING_PTR(fmt));
1058
- break;
1059
- }
1060
- }
119
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
120
+ rb_ivar_set(str, id_associated, add);
121
+ }
1061
122
 
1062
- if (associates) {
1063
- rb_str_associate(res, associates);
1064
- }
1065
- OBJ_INFECT(res, fmt);
1066
- switch (enc_info) {
1067
- case 1:
1068
- ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
1069
- break;
1070
- case 2:
1071
- rb_enc_set_index(res, rb_utf8_encindex());
1072
- break;
1073
- default:
1074
- /* do nothing, keep ASCII-8BIT */
1075
- break;
1076
- }
1077
- return res;
123
+ static VALUE
124
+ str_associated(VALUE str)
125
+ {
126
+ return rb_ivar_lookup(str, id_associated, Qfalse);
1078
127
  }
1079
128
 
1080
129
  static const char uu_table[] =
@@ -1083,12 +132,14 @@ static const char b64_table[] =
1083
132
  "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1084
133
 
1085
134
  static void
1086
- encodes(VALUE str, const char *s, long len, int type, int tail_lf)
135
+ encodes(VALUE str, const char *s0, long len, int type, int tail_lf)
1087
136
  {
1088
- char buff[4096];
137
+ enum {buff_size = 4096, encoded_unit = 4, input_unit = 3};
138
+ char buff[buff_size + 1]; /* +1 for tail_lf */
1089
139
  long i = 0;
1090
- const char *trans = type == 'u' ? uu_table : b64_table;
140
+ const char *const trans = type == 'u' ? uu_table : b64_table;
1091
141
  char padding;
142
+ const unsigned char *s = (const unsigned char *)s0;
1092
143
 
1093
144
  if (type == 'u') {
1094
145
  buff[i++] = (char)len + ' ';
@@ -1097,16 +148,16 @@ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1097
148
  else {
1098
149
  padding = '=';
1099
150
  }
1100
- while (len >= 3) {
1101
- while (len >= 3 && sizeof(buff)-i >= 4) {
151
+ while (len >= input_unit) {
152
+ while (len >= input_unit && buff_size-i >= encoded_unit) {
1102
153
  buff[i++] = trans[077 & (*s >> 2)];
1103
154
  buff[i++] = trans[077 & (((*s << 4) & 060) | ((s[1] >> 4) & 017))];
1104
155
  buff[i++] = trans[077 & (((s[1] << 2) & 074) | ((s[2] >> 6) & 03))];
1105
156
  buff[i++] = trans[077 & s[2]];
1106
- s += 3;
1107
- len -= 3;
157
+ s += input_unit;
158
+ len -= input_unit;
1108
159
  }
1109
- if (sizeof(buff)-i < 4) {
160
+ if (buff_size-i < encoded_unit) {
1110
161
  rb_str_buf_cat(str, buff, i);
1111
162
  i = 0;
1112
163
  }
@@ -1126,6 +177,7 @@ encodes(VALUE str, const char *s, long len, int type, int tail_lf)
1126
177
  }
1127
178
  if (tail_lf) buff[i++] = '\n';
1128
179
  rb_str_buf_cat(str, buff, i);
180
+ if ((size_t)i > sizeof(buff)) rb_bug("encodes() buffer overrun");
1129
181
  }
1130
182
 
1131
183
  static const char hex_table[] = "0123456789ABCDEF";
@@ -1186,19 +238,11 @@ qpencode(VALUE str, VALUE from, long len)
1186
238
  static inline int
1187
239
  hex2num(char c)
1188
240
  {
1189
- switch (c) {
1190
- case '0': case '1': case '2': case '3': case '4':
1191
- case '5': case '6': case '7': case '8': case '9':
1192
- return c - '0';
1193
- case 'a': case 'b': case 'c':
1194
- case 'd': case 'e': case 'f':
1195
- return c - 'a' + 10;
1196
- case 'A': case 'B': case 'C':
1197
- case 'D': case 'E': case 'F':
1198
- return c - 'A' + 10;
1199
- default:
1200
- return -1;
1201
- }
241
+ int n;
242
+ n = ruby_digit36_to_number_table[(unsigned char)c];
243
+ if (16 <= n)
244
+ n = -1;
245
+ return n;
1202
246
  }
1203
247
 
1204
248
  #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
@@ -1212,10 +256,19 @@ hex2num(char c)
1212
256
  } while (0)
1213
257
 
1214
258
  #define PACK_ITEM_ADJUST() do { \
1215
- if (tmp_len > 0 && !block_p) \
259
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
1216
260
  rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
1217
261
  } while (0)
1218
262
 
263
+ /* Workaround for Oracle Solaris Studio 12.4 C compiler optimization bug
264
+ * with "-xO4" optimization option.
265
+ */
266
+ #if defined(__SUNPRO_C) && __SUNPRO_C == 0x5130
267
+ # define AVOID_CC_BUG volatile
268
+ #else
269
+ # define AVOID_CC_BUG
270
+ #endif
271
+
1219
272
  static VALUE
1220
273
  infected_str_new(const char *ptr, long len, VALUE str)
1221
274
  {
@@ -1225,137 +278,31 @@ infected_str_new(const char *ptr, long len, VALUE str)
1225
278
  return s;
1226
279
  }
1227
280
 
1228
- /*
1229
- * call-seq:
1230
- * str.unpack(format) -> anArray
1231
- *
1232
- * Decodes <i>str</i> (which may contain binary data) according to the
1233
- * format string, returning an array of each value extracted. The
1234
- * format string consists of a sequence of single-character directives,
1235
- * summarized in the table at the end of this entry.
1236
- * Each directive may be followed
1237
- * by a number, indicating the number of times to repeat with this
1238
- * directive. An asterisk (``<code>*</code>'') will use up all
1239
- * remaining elements. The directives <code>sSiIlL</code> may each be
1240
- * followed by an underscore (``<code>_</code>'') or
1241
- * exclamation mark (``<code>!</code>'') to use the underlying
1242
- * platform's native size for the specified type; otherwise, it uses a
1243
- * platform-independent consistent size. Spaces are ignored in the
1244
- * format string. See also <code>Array#pack</code>.
1245
- *
1246
- * "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
1247
- * "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
1248
- * "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
1249
- * "aa".unpack('b8B8') #=> ["10000110", "01100001"]
1250
- * "aaa".unpack('h2H2c') #=> ["16", "61", 97]
1251
- * "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
1252
- * "now=20is".unpack('M*') #=> ["now is"]
1253
- * "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
1254
- *
1255
- * This table summarizes the various formats and the Ruby classes
1256
- * returned by each.
1257
- *
1258
- * Integer | |
1259
- * Directive | Returns | Meaning
1260
- * -----------------------------------------------------------------
1261
- * C | Integer | 8-bit unsigned (unsigned char)
1262
- * S | Integer | 16-bit unsigned, native endian (uint16_t)
1263
- * L | Integer | 32-bit unsigned, native endian (uint32_t)
1264
- * Q | Integer | 64-bit unsigned, native endian (uint64_t)
1265
- * | |
1266
- * c | Integer | 8-bit signed (signed char)
1267
- * s | Integer | 16-bit signed, native endian (int16_t)
1268
- * l | Integer | 32-bit signed, native endian (int32_t)
1269
- * q | Integer | 64-bit signed, native endian (int64_t)
1270
- * | |
1271
- * S_, S! | Integer | unsigned short, native endian
1272
- * I, I_, I! | Integer | unsigned int, native endian
1273
- * L_, L! | Integer | unsigned long, native endian
1274
- * Q_, Q! | Integer | unsigned long long, native endian (ArgumentError
1275
- * | | if the platform has no long long type.)
1276
- * | | (Q_ and Q! is available since Ruby 2.1.)
1277
- * | |
1278
- * s_, s! | Integer | signed short, native endian
1279
- * i, i_, i! | Integer | signed int, native endian
1280
- * l_, l! | Integer | signed long, native endian
1281
- * q_, q! | Integer | signed long long, native endian (ArgumentError
1282
- * | | if the platform has no long long type.)
1283
- * | | (q_ and q! is available since Ruby 2.1.)
1284
- * | |
1285
- * S> L> Q> | Integer | same as the directives without ">" except
1286
- * s> l> q> | | big endian
1287
- * S!> I!> | | (available since Ruby 1.9.3)
1288
- * L!> Q!> | | "S>" is same as "n"
1289
- * s!> i!> | | "L>" is same as "N"
1290
- * l!> q!> | |
1291
- * | |
1292
- * S< L< Q< | Integer | same as the directives without "<" except
1293
- * s< l< q< | | little endian
1294
- * S!< I!< | | (available since Ruby 1.9.3)
1295
- * L!< Q!< | | "S<" is same as "v"
1296
- * s!< i!< | | "L<" is same as "V"
1297
- * l!< q!< | |
1298
- * | |
1299
- * n | Integer | 16-bit unsigned, network (big-endian) byte order
1300
- * N | Integer | 32-bit unsigned, network (big-endian) byte order
1301
- * v | Integer | 16-bit unsigned, VAX (little-endian) byte order
1302
- * V | Integer | 32-bit unsigned, VAX (little-endian) byte order
1303
- * | |
1304
- * U | Integer | UTF-8 character
1305
- * w | Integer | BER-compressed integer (see Array.pack)
1306
- *
1307
- * Float | |
1308
- * Directive | Returns | Meaning
1309
- * -----------------------------------------------------------------
1310
- * D, d | Float | double-precision, native format
1311
- * F, f | Float | single-precision, native format
1312
- * E | Float | double-precision, little-endian byte order
1313
- * e | Float | single-precision, little-endian byte order
1314
- * G | Float | double-precision, network (big-endian) byte order
1315
- * g | Float | single-precision, network (big-endian) byte order
1316
- *
1317
- * String | |
1318
- * Directive | Returns | Meaning
1319
- * -----------------------------------------------------------------
1320
- * A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
1321
- * a | String | arbitrary binary string
1322
- * Z | String | null-terminated string
1323
- * B | String | bit string (MSB first)
1324
- * b | String | bit string (LSB first)
1325
- * H | String | hex string (high nibble first)
1326
- * h | String | hex string (low nibble first)
1327
- * u | String | UU-encoded string
1328
- * M | String | quoted-printable, MIME encoding (see RFC2045)
1329
- * m | String | base64 encoded string (RFC 2045) (default)
1330
- * | | base64 encoded string (RFC 4648) if followed by 0
1331
- * P | String | pointer to a structure (fixed-length string)
1332
- * p | String | pointer to a null-terminated string
1333
- *
1334
- * Misc. | |
1335
- * Directive | Returns | Meaning
1336
- * -----------------------------------------------------------------
1337
- * @ | --- | skip to the offset given by the length argument
1338
- * X | --- | skip backward one byte
1339
- * x | --- | skip forward one byte
1340
- */
281
+ /* unpack mode */
282
+ #define UNPACK_ARRAY 0
283
+ #define UNPACK_BLOCK 1
284
+ #define UNPACK_1 2
285
+
286
+ #define castchar(from) (char)((from) & 0xff)
1341
287
 
1342
288
  VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1343
289
  {
1344
- static const char hexdigits[] = "0123456789abcdef";
290
+ #define hexdigits ruby_hexdigits
1345
291
  char *init_s, *s, *send;
1346
292
  char *p, *pend;
1347
293
  VALUE ary;
1348
294
  char type;
1349
- long len, tmp_len;
295
+ long len;
296
+ AVOID_CC_BUG long tmp_len;
1350
297
  int star;
1351
298
  #ifdef NATINT_PACK
1352
299
  int natint; /* native integer */
1353
300
  #endif
1354
- int block_p = rb_block_given_p();
1355
301
  int signed_p, integer_size, bigendian_p;
302
+ int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
1356
303
  #define UNPACK_PUSH(item) do {\
1357
304
  VALUE item_val = (item);\
1358
- if (block_p) {\
305
+ if ((mode) == UNPACK_BLOCK) {\
1359
306
  rb_yield(item_val);\
1360
307
  }\
1361
308
  else {\
@@ -1363,14 +310,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1363
310
  }\
1364
311
  } while (0)
1365
312
 
1366
- // StringValue(str);
313
+ StringValue(str);
1367
314
  StringValue(fmt);
1368
315
  init_s = s = RSTRING_PTR(str);
1369
316
  send = s + RSTRING_LEN(str);
1370
317
  p = RSTRING_PTR(fmt);
1371
318
  pend = p + RSTRING_LEN(fmt);
1372
319
 
1373
- ary = block_p ? Qnil : rb_ary_new();
320
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
1374
321
  while (p < pend) {
1375
322
  int explicit_endian = 0;
1376
323
  type = *p++;
@@ -1483,13 +430,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1483
430
  if (p[-1] == '*' || len > (send - s) * 8)
1484
431
  len = (send - s) * 8;
1485
432
  bits = 0;
1486
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
433
+ bitstr = rb_usascii_str_new(0, len);
1487
434
  t = RSTRING_PTR(bitstr);
1488
435
  for (i=0; i<len; i++) {
1489
436
  if (i & 7) bits >>= 1;
1490
- else bits = *s++;
437
+ else bits = (unsigned char)*s++;
1491
438
  *t++ = (bits & 1) ? '1' : '0';
1492
439
  }
440
+ UNPACK_PUSH(bitstr);
1493
441
  }
1494
442
  break;
1495
443
 
@@ -1503,13 +451,14 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1503
451
  if (p[-1] == '*' || len > (send - s) * 8)
1504
452
  len = (send - s) * 8;
1505
453
  bits = 0;
1506
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
454
+ bitstr = rb_usascii_str_new(0, len);
1507
455
  t = RSTRING_PTR(bitstr);
1508
456
  for (i=0; i<len; i++) {
1509
457
  if (i & 7) bits <<= 1;
1510
- else bits = *s++;
458
+ else bits = (unsigned char)*s++;
1511
459
  *t++ = (bits & 128) ? '1' : '0';
1512
460
  }
461
+ UNPACK_PUSH(bitstr);
1513
462
  }
1514
463
  break;
1515
464
 
@@ -1523,15 +472,16 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1523
472
  if (p[-1] == '*' || len > (send - s) * 2)
1524
473
  len = (send - s) * 2;
1525
474
  bits = 0;
1526
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
475
+ bitstr = rb_usascii_str_new(0, len);
1527
476
  t = RSTRING_PTR(bitstr);
1528
477
  for (i=0; i<len; i++) {
1529
478
  if (i & 1)
1530
479
  bits >>= 4;
1531
480
  else
1532
- bits = *s++;
481
+ bits = (unsigned char)*s++;
1533
482
  *t++ = hexdigits[bits & 15];
1534
483
  }
484
+ UNPACK_PUSH(bitstr);
1535
485
  }
1536
486
  break;
1537
487
 
@@ -1545,36 +495,30 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1545
495
  if (p[-1] == '*' || len > (send - s) * 2)
1546
496
  len = (send - s) * 2;
1547
497
  bits = 0;
1548
- UNPACK_PUSH(bitstr = rb_usascii_str_new(0, len));
498
+ bitstr = rb_usascii_str_new(0, len);
1549
499
  t = RSTRING_PTR(bitstr);
1550
500
  for (i=0; i<len; i++) {
1551
501
  if (i & 1)
1552
502
  bits <<= 4;
1553
503
  else
1554
- bits = *s++;
504
+ bits = (unsigned char)*s++;
1555
505
  *t++ = hexdigits[(bits >> 4) & 15];
1556
506
  }
507
+ UNPACK_PUSH(bitstr);
1557
508
  }
1558
509
  break;
1559
510
 
1560
511
  case 'c':
1561
- PACK_LENGTH_ADJUST_SIZE(sizeof(char));
1562
- while (len-- > 0) {
1563
- int c = *s++;
1564
- if (c > (char)127) c-=256;
1565
- UNPACK_PUSH(INT2FIX(c));
1566
- }
1567
- PACK_ITEM_ADJUST();
1568
- break;
512
+ signed_p = 1;
513
+ integer_size = 1;
514
+ bigendian_p = BIGENDIAN_P(); /* not effective */
515
+ goto unpack_integer;
1569
516
 
1570
517
  case 'C':
1571
- PACK_LENGTH_ADJUST_SIZE(sizeof(unsigned char));
1572
- while (len-- > 0) {
1573
- unsigned char c = *s++;
1574
- UNPACK_PUSH(INT2FIX(c));
1575
- }
1576
- PACK_ITEM_ADJUST();
1577
- break;
518
+ signed_p = 0;
519
+ integer_size = 1;
520
+ bigendian_p = BIGENDIAN_P(); /* not effective */
521
+ goto unpack_integer;
1578
522
 
1579
523
  case 's':
1580
524
  signed_p = 1;
@@ -1624,6 +568,18 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1624
568
  bigendian_p = BIGENDIAN_P();
1625
569
  goto unpack_integer;
1626
570
 
571
+ case 'j':
572
+ signed_p = 1;
573
+ integer_size = sizeof(intptr_t);
574
+ bigendian_p = BIGENDIAN_P();
575
+ goto unpack_integer;
576
+
577
+ case 'J':
578
+ signed_p = 0;
579
+ integer_size = sizeof(uintptr_t);
580
+ bigendian_p = BIGENDIAN_P();
581
+ goto unpack_integer;
582
+
1627
583
  case 'n':
1628
584
  signed_p = 0;
1629
585
  integer_size = 2;
@@ -1652,144 +608,17 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1652
608
  if (explicit_endian) {
1653
609
  bigendian_p = explicit_endian == '>';
1654
610
  }
1655
-
1656
- switch (integer_size) {
1657
- #if defined(HAVE_INT16_T) && !defined(FORCE_BIG_PACK)
1658
- case SIZEOF_INT16_T:
1659
- if (signed_p) {
1660
- PACK_LENGTH_ADJUST_SIZE(sizeof(int16_t));
1661
- while (len-- > 0) {
1662
- union {
1663
- int16_t i;
1664
- char a[sizeof(int16_t)];
1665
- } v;
1666
- memcpy(v.a, s, sizeof(int16_t));
1667
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1668
- s += sizeof(int16_t);
1669
- UNPACK_PUSH(INT2FIX(v.i));
1670
- }
1671
- PACK_ITEM_ADJUST();
1672
- }
1673
- else {
1674
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint16_t));
1675
- while (len-- > 0) {
1676
- union {
1677
- uint16_t i;
1678
- char a[sizeof(uint16_t)];
1679
- } v;
1680
- memcpy(v.a, s, sizeof(uint16_t));
1681
- if (bigendian_p != BIGENDIAN_P()) v.i = swap16(v.i);
1682
- s += sizeof(uint16_t);
1683
- UNPACK_PUSH(INT2FIX(v.i));
1684
- }
1685
- PACK_ITEM_ADJUST();
1686
- }
1687
- break;
1688
- #endif
1689
-
1690
- #if defined(HAVE_INT32_T) && !defined(FORCE_BIG_PACK)
1691
- case SIZEOF_INT32_T:
1692
- if (signed_p) {
1693
- PACK_LENGTH_ADJUST_SIZE(sizeof(int32_t));
1694
- while (len-- > 0) {
1695
- union {
1696
- int32_t i;
1697
- char a[sizeof(int32_t)];
1698
- } v;
1699
- memcpy(v.a, s, sizeof(int32_t));
1700
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1701
- s += sizeof(int32_t);
1702
- UNPACK_PUSH(INT2NUM(v.i));
1703
- }
1704
- PACK_ITEM_ADJUST();
1705
- }
1706
- else {
1707
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint32_t));
1708
- while (len-- > 0) {
1709
- union {
1710
- uint32_t i;
1711
- char a[sizeof(uint32_t)];
1712
- } v;
1713
- memcpy(v.a, s, sizeof(uint32_t));
1714
- if (bigendian_p != BIGENDIAN_P()) v.i = swap32(v.i);
1715
- s += sizeof(uint32_t);
1716
- UNPACK_PUSH(UINT2NUM(v.i));
1717
- }
1718
- PACK_ITEM_ADJUST();
1719
- }
1720
- break;
1721
- #endif
1722
-
1723
- #if defined(HAVE_INT64_T) && !defined(FORCE_BIG_PACK)
1724
- case SIZEOF_INT64_T:
1725
- if (signed_p) {
1726
- PACK_LENGTH_ADJUST_SIZE(sizeof(int64_t));
1727
- while (len-- > 0) {
1728
- union {
1729
- int64_t i;
1730
- char a[sizeof(int64_t)];
1731
- } v;
1732
- memcpy(v.a, s, sizeof(int64_t));
1733
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1734
- s += sizeof(int64_t);
1735
- UNPACK_PUSH(INT64toNUM(v.i));
1736
- }
1737
- PACK_ITEM_ADJUST();
1738
- }
1739
- else {
1740
- PACK_LENGTH_ADJUST_SIZE(sizeof(uint64_t));
1741
- while (len-- > 0) {
1742
- union {
1743
- uint64_t i;
1744
- char a[sizeof(uint64_t)];
1745
- } v;
1746
- memcpy(v.a, s, sizeof(uint64_t));
1747
- if (bigendian_p != BIGENDIAN_P()) v.i = swap64(v.i);
1748
- s += sizeof(uint64_t);
1749
- UNPACK_PUSH(UINT64toNUM(v.i));
1750
- }
1751
- PACK_ITEM_ADJUST();
1752
- }
1753
- break;
1754
- #endif
1755
-
1756
- default:
1757
- if (integer_size > MAX_INTEGER_PACK_SIZE)
1758
- rb_bug("unexpected integer size for pack: %d", integer_size);
1759
- PACK_LENGTH_ADJUST_SIZE(integer_size);
1760
- while (len-- > 0) {
1761
- union {
1762
- unsigned long i[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG];
1763
- char a[(MAX_INTEGER_PACK_SIZE+SIZEOF_LONG)/SIZEOF_LONG*SIZEOF_LONG];
1764
- } v;
1765
- int num_longs = (integer_size+SIZEOF_LONG)/SIZEOF_LONG;
1766
- int i;
1767
-
1768
- if (signed_p && (signed char)s[bigendian_p ? 0 : (integer_size-1)] < 0)
1769
- memset(v.a, 0xff, sizeof(long)*num_longs);
1770
- else
1771
- memset(v.a, 0, sizeof(long)*num_longs);
1772
- if (bigendian_p)
1773
- memcpy(v.a + sizeof(long)*num_longs - integer_size, s, integer_size);
1774
- else
1775
- memcpy(v.a, s, integer_size);
1776
- if (bigendian_p) {
1777
- for (i = 0; i < num_longs/2; i++) {
1778
- unsigned long t = v.i[i];
1779
- v.i[i] = v.i[num_longs-1-i];
1780
- v.i[num_longs-1-i] = t;
1781
- }
1782
- }
1783
- if (bigendian_p != BIGENDIAN_P()) {
1784
- for (i = 0; i < num_longs; i++)
1785
- v.i[i] = swapl(v.i[i]);
1786
- }
1787
- s += integer_size;
1788
- UNPACK_PUSH(rb_big_unpack(v.i, num_longs));
1789
- }
1790
- PACK_ITEM_ADJUST();
1791
- break;
1792
- }
611
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
612
+ while (len-- > 0) {
613
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
614
+ VALUE val;
615
+ if (signed_p)
616
+ flags |= INTEGER_PACK_2COMP;
617
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
618
+ UNPACK_PUSH(val);
619
+ s += integer_size;
620
+ }
621
+ PACK_ITEM_ADJUST();
1793
622
  break;
1794
623
 
1795
624
  case 'f':
@@ -1807,13 +636,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1807
636
  case 'e':
1808
637
  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1809
638
  while (len-- > 0) {
1810
- float tmp;
1811
- FLOAT_CONVWITH(ftmp);
1812
-
1813
- memcpy(&tmp, s, sizeof(float));
639
+ FLOAT_CONVWITH(tmp);
640
+ memcpy(tmp.buf, s, sizeof(float));
1814
641
  s += sizeof(float);
1815
- tmp = VTOHF(tmp,ftmp);
1816
- UNPACK_PUSH(DBL2NUM((double)tmp));
642
+ VTOHF(tmp);
643
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1817
644
  }
1818
645
  PACK_ITEM_ADJUST();
1819
646
  break;
@@ -1821,13 +648,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1821
648
  case 'E':
1822
649
  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1823
650
  while (len-- > 0) {
1824
- double tmp;
1825
- DOUBLE_CONVWITH(dtmp);
1826
-
1827
- memcpy(&tmp, s, sizeof(double));
651
+ DOUBLE_CONVWITH(tmp);
652
+ memcpy(tmp.buf, s, sizeof(double));
1828
653
  s += sizeof(double);
1829
- tmp = VTOHD(tmp,dtmp);
1830
- UNPACK_PUSH(DBL2NUM(tmp));
654
+ VTOHD(tmp);
655
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1831
656
  }
1832
657
  PACK_ITEM_ADJUST();
1833
658
  break;
@@ -1847,13 +672,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1847
672
  case 'g':
1848
673
  PACK_LENGTH_ADJUST_SIZE(sizeof(float));
1849
674
  while (len-- > 0) {
1850
- float tmp;
1851
- FLOAT_CONVWITH(ftmp);
1852
-
1853
- memcpy(&tmp, s, sizeof(float));
675
+ FLOAT_CONVWITH(tmp);
676
+ memcpy(tmp.buf, s, sizeof(float));
1854
677
  s += sizeof(float);
1855
- tmp = NTOHF(tmp,ftmp);
1856
- UNPACK_PUSH(DBL2NUM((double)tmp));
678
+ NTOHF(tmp);
679
+ UNPACK_PUSH(DBL2NUM(tmp.f));
1857
680
  }
1858
681
  PACK_ITEM_ADJUST();
1859
682
  break;
@@ -1861,13 +684,11 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1861
684
  case 'G':
1862
685
  PACK_LENGTH_ADJUST_SIZE(sizeof(double));
1863
686
  while (len-- > 0) {
1864
- double tmp;
1865
- DOUBLE_CONVWITH(dtmp);
1866
-
1867
- memcpy(&tmp, s, sizeof(double));
687
+ DOUBLE_CONVWITH(tmp);
688
+ memcpy(tmp.buf, s, sizeof(double));
1868
689
  s += sizeof(double);
1869
- tmp = NTOHD(tmp,dtmp);
1870
- UNPACK_PUSH(DBL2NUM(tmp));
690
+ NTOHD(tmp);
691
+ UNPACK_PUSH(DBL2NUM(tmp.d));
1871
692
  }
1872
693
  PACK_ITEM_ADJUST();
1873
694
  break;
@@ -1890,12 +711,12 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1890
711
  char *ptr = RSTRING_PTR(buf);
1891
712
  long total = 0;
1892
713
 
1893
- while (s < send && *s > ' ' && *s < 'a') {
714
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
1894
715
  long a,b,c,d;
1895
- char hunk[4];
716
+ char hunk[3];
717
+
718
+ len = ((unsigned char)*s++ - ' ') & 077;
1896
719
 
1897
- hunk[3] = '\0';
1898
- len = (*s++ - ' ') & 077;
1899
720
  total += len;
1900
721
  if (total > RSTRING_LEN(buf)) {
1901
722
  len -= total - RSTRING_LEN(buf);
@@ -1905,20 +726,20 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1905
726
  while (len > 0) {
1906
727
  long mlen = len > 3 ? 3 : len;
1907
728
 
1908
- if (s < send && *s >= ' ')
1909
- a = (*s++ - ' ') & 077;
729
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
730
+ a = ((unsigned char)*s++ - ' ') & 077;
1910
731
  else
1911
732
  a = 0;
1912
- if (s < send && *s >= ' ')
1913
- b = (*s++ - ' ') & 077;
733
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
734
+ b = ((unsigned char)*s++ - ' ') & 077;
1914
735
  else
1915
736
  b = 0;
1916
- if (s < send && *s >= ' ')
1917
- c = (*s++ - ' ') & 077;
737
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
738
+ c = ((unsigned char)*s++ - ' ') & 077;
1918
739
  else
1919
740
  c = 0;
1920
- if (s < send && *s >= ' ')
1921
- d = (*s++ - ' ') & 077;
741
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
742
+ d = ((unsigned char)*s++ - ' ') & 077;
1922
743
  else
1923
744
  d = 0;
1924
745
  hunk[0] = (char)(a << 2 | b >> 4);
@@ -1928,10 +749,10 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
1928
749
  ptr += mlen;
1929
750
  len -= mlen;
1930
751
  }
1931
- if (*s == '\r') s++;
1932
- if (*s == '\n') s++;
1933
- else if (s < send && (s+1 == send || s[1] == '\n'))
1934
- s += 2; /* possible checksum byte */
752
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
753
+ s++; /* possible checksum byte */
754
+ if (s < send && *s == '\r') s++;
755
+ if (s < send && *s == '\n') s++;
1935
756
  }
1936
757
 
1937
758
  rb_str_set_len(buf, total);
@@ -2078,18 +899,19 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2078
899
  s += sizeof(char *);
2079
900
 
2080
901
  if (t) {
2081
- VALUE a, *p, *pend;
902
+ VALUE a;
903
+ const VALUE *p, *pend;
2082
904
 
2083
- if (!(a = rb_str_associated(str))) {
905
+ if (!(a = str_associated(str))) {
2084
906
  rb_raise(rb_eArgError, "no associated pointer");
2085
907
  }
2086
- p = RARRAY_PTR(a);
908
+ p = RARRAY_CONST_PTR(a);
2087
909
  pend = p + RARRAY_LEN(a);
2088
910
  while (p < pend) {
2089
911
  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
2090
912
  if (len < RSTRING_LEN(*p)) {
2091
913
  tmp = rb_tainted_str_new(t, len);
2092
- rb_str_associate(tmp, a);
914
+ str_associate(tmp, a);
2093
915
  }
2094
916
  else {
2095
917
  tmp = *p;
@@ -2120,12 +942,13 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2120
942
  s += sizeof(char *);
2121
943
 
2122
944
  if (t) {
2123
- VALUE a, *p, *pend;
945
+ VALUE a;
946
+ const VALUE *p, *pend;
2124
947
 
2125
- if (!(a = rb_str_associated(str))) {
948
+ if (!(a = str_associated(str))) {
2126
949
  rb_raise(rb_eArgError, "no associated pointer");
2127
950
  }
2128
- p = RARRAY_PTR(a);
951
+ p = RARRAY_CONST_PTR(a);
2129
952
  pend = p + RARRAY_LEN(a);
2130
953
  while (p < pend) {
2131
954
  if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
@@ -2145,32 +968,18 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2145
968
 
2146
969
  case 'w':
2147
970
  {
2148
- unsigned long ul = 0;
2149
- unsigned long ulmask = 0xfeUL << ((sizeof(unsigned long) - 1) * 8);
2150
-
2151
- while (len > 0 && s < send) {
2152
- ul <<= 7;
2153
- ul |= (*s & 0x7f);
2154
- if (!(*s++ & 0x80)) {
2155
- UNPACK_PUSH(ULONG2NUM(ul));
2156
- len--;
2157
- ul = 0;
2158
- }
2159
- else if (ul & ulmask) {
2160
- VALUE big = rb_uint2big(ul);
2161
- VALUE big128 = rb_uint2big(128);
2162
- while (s < send) {
2163
- big = rb_big_mul(big, big128);
2164
- big = rb_big_plus(big, rb_uint2big(*s & 0x7f));
2165
- if (!(*s++ & 0x80)) {
2166
- UNPACK_PUSH(big);
2167
- len--;
2168
- ul = 0;
2169
- break;
2170
- }
2171
- }
2172
- }
2173
- }
971
+ char *s0 = s;
972
+ while (len > 0 && s < send) {
973
+ if (*s & 0x80) {
974
+ s++;
975
+ }
976
+ else {
977
+ s++;
978
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
979
+ len--;
980
+ s0 = s;
981
+ }
982
+ }
2174
983
  }
2175
984
  break;
2176
985
 
@@ -2185,8 +994,6 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
2185
994
  return ary;
2186
995
  }
2187
996
 
2188
- #define BYTEWIDTH 8
2189
-
2190
997
  int
2191
998
  rb_uv_to_utf8(char buf[6], unsigned long uv)
2192
999
  {