zscan 2.0.6 → 2.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. checksums.yaml +4 -4
  2. data/ext/extconf.rb +9 -1
  3. data/ext/pack/builtin-27.h +79 -0
  4. data/ext/pack/builtin.h +79 -0
  5. data/ext/pack/internal-27.h +96 -0
  6. data/ext/pack/internal-27/array.h +103 -0
  7. data/ext/pack/internal-27/bignum.h +246 -0
  8. data/ext/pack/internal-27/bits.h +566 -0
  9. data/ext/pack/internal-27/class.h +162 -0
  10. data/ext/pack/internal-27/compar.h +50 -0
  11. data/ext/pack/internal-27/compile.h +32 -0
  12. data/ext/pack/internal-27/compilers.h +108 -0
  13. data/ext/pack/internal-27/complex.h +29 -0
  14. data/ext/pack/internal-27/cont.h +23 -0
  15. data/ext/pack/internal-27/dir.h +17 -0
  16. data/ext/pack/internal-27/enc.h +20 -0
  17. data/ext/pack/internal-27/encoding.h +28 -0
  18. data/ext/pack/internal-27/enum.h +19 -0
  19. data/ext/pack/internal-27/enumerator.h +22 -0
  20. data/ext/pack/internal-27/error.h +129 -0
  21. data/ext/pack/internal-27/eval.h +33 -0
  22. data/ext/pack/internal-27/file.h +39 -0
  23. data/ext/pack/internal-27/fixnum.h +185 -0
  24. data/ext/pack/internal-27/gc.h +153 -0
  25. data/ext/pack/internal-27/hash.h +234 -0
  26. data/ext/pack/internal-27/imemo.h +242 -0
  27. data/ext/pack/internal-27/inits.h +51 -0
  28. data/ext/pack/internal-27/io.h +35 -0
  29. data/ext/pack/internal-27/load.h +19 -0
  30. data/ext/pack/internal-27/loadpath.h +17 -0
  31. data/ext/pack/internal-27/math.h +24 -0
  32. data/ext/pack/internal-27/missing.h +19 -0
  33. data/ext/pack/internal-27/mjit.h +29 -0
  34. data/ext/pack/internal-27/numeric.h +249 -0
  35. data/ext/pack/internal-27/object.h +60 -0
  36. data/ext/pack/internal-27/parse.h +23 -0
  37. data/ext/pack/internal-27/proc.h +32 -0
  38. data/ext/pack/internal-27/process.h +130 -0
  39. data/ext/pack/internal-27/random.h +17 -0
  40. data/ext/pack/internal-27/range.h +37 -0
  41. data/ext/pack/internal-27/rational.h +68 -0
  42. data/ext/pack/internal-27/re.h +29 -0
  43. data/ext/pack/internal-27/sanitizers.h +191 -0
  44. data/ext/pack/internal-27/serial.h +24 -0
  45. data/ext/pack/internal-27/signal.h +22 -0
  46. data/ext/pack/internal-27/static_assert.h +17 -0
  47. data/ext/pack/internal-27/string.h +135 -0
  48. data/ext/pack/internal-27/struct.h +154 -0
  49. data/ext/pack/internal-27/symbol.h +41 -0
  50. data/ext/pack/internal-27/thread.h +52 -0
  51. data/ext/pack/internal-27/time.h +35 -0
  52. data/ext/pack/internal-27/transcode.h +21 -0
  53. data/ext/pack/internal-27/util.h +31 -0
  54. data/ext/pack/internal-27/variable.h +92 -0
  55. data/ext/pack/internal-27/vm.h +127 -0
  56. data/ext/pack/internal-27/warnings.h +17 -0
  57. data/ext/pack/internal.h +57 -2102
  58. data/ext/pack/internal/array.h +103 -0
  59. data/ext/pack/internal/bignum.h +246 -0
  60. data/ext/pack/internal/bits.h +566 -0
  61. data/ext/pack/internal/class.h +162 -0
  62. data/ext/pack/internal/compar.h +50 -0
  63. data/ext/pack/internal/compile.h +32 -0
  64. data/ext/pack/internal/compilers.h +108 -0
  65. data/ext/pack/internal/complex.h +29 -0
  66. data/ext/pack/internal/cont.h +23 -0
  67. data/ext/pack/internal/dir.h +17 -0
  68. data/ext/pack/internal/enc.h +20 -0
  69. data/ext/pack/internal/encoding.h +28 -0
  70. data/ext/pack/internal/enum.h +19 -0
  71. data/ext/pack/internal/enumerator.h +22 -0
  72. data/ext/pack/internal/error.h +129 -0
  73. data/ext/pack/internal/eval.h +33 -0
  74. data/ext/pack/internal/file.h +39 -0
  75. data/ext/pack/internal/fixnum.h +185 -0
  76. data/ext/pack/internal/gc.h +153 -0
  77. data/ext/pack/internal/hash.h +234 -0
  78. data/ext/pack/internal/imemo.h +242 -0
  79. data/ext/pack/internal/inits.h +51 -0
  80. data/ext/pack/internal/io.h +35 -0
  81. data/ext/pack/internal/load.h +19 -0
  82. data/ext/pack/internal/loadpath.h +17 -0
  83. data/ext/pack/internal/math.h +24 -0
  84. data/ext/pack/internal/missing.h +19 -0
  85. data/ext/pack/internal/mjit.h +29 -0
  86. data/ext/pack/internal/numeric.h +249 -0
  87. data/ext/pack/internal/object.h +60 -0
  88. data/ext/pack/internal/parse.h +23 -0
  89. data/ext/pack/internal/proc.h +32 -0
  90. data/ext/pack/internal/process.h +130 -0
  91. data/ext/pack/internal/random.h +17 -0
  92. data/ext/pack/internal/range.h +37 -0
  93. data/ext/pack/internal/rational.h +68 -0
  94. data/ext/pack/internal/re.h +29 -0
  95. data/ext/pack/internal/sanitizers.h +191 -0
  96. data/ext/pack/internal/serial.h +24 -0
  97. data/ext/pack/internal/signal.h +22 -0
  98. data/ext/pack/internal/static_assert.h +17 -0
  99. data/ext/pack/internal/string.h +135 -0
  100. data/ext/pack/internal/struct.h +154 -0
  101. data/ext/pack/internal/symbol.h +41 -0
  102. data/ext/pack/internal/thread.h +52 -0
  103. data/ext/pack/internal/time.h +35 -0
  104. data/ext/pack/internal/transcode.h +21 -0
  105. data/ext/pack/internal/util.h +31 -0
  106. data/ext/pack/internal/variable.h +92 -0
  107. data/ext/pack/internal/vm.h +127 -0
  108. data/ext/pack/internal/warnings.h +17 -0
  109. data/ext/pack/pack-26.c +2 -109
  110. data/ext/pack/pack-27.c +1022 -0
  111. data/ext/pack/pack.c +57 -1047
  112. data/lib/zscan.rb +1 -1
  113. data/zscan.gemspec +1 -1
  114. metadata +109 -3
@@ -0,0 +1,1022 @@
1
+ /**********************************************************************
2
+
3
+ pack.c -
4
+
5
+ $Author$
6
+ created at: Thu Feb 10 15:17:05 JST 1994
7
+
8
+ Copyright (C) 1993-2007 Yukihiro Matsumoto
9
+
10
+ **********************************************************************/
11
+
12
+ #include "ruby/internal/config.h"
13
+
14
+ #include <ctype.h>
15
+ #include <errno.h>
16
+ #include <float.h>
17
+ #include <sys/types.h>
18
+
19
+ #include "internal.h"
20
+ #include "internal/bits.h"
21
+ #include "internal/string.h"
22
+ #include "internal/symbol.h"
23
+ #include "internal/util.h"
24
+
25
+ // #include "internal/variable.h"
26
+ VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef);
27
+
28
+ #include "builtin.h"
29
+
30
+ /*
31
+ * It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
32
+ * instead of HAVE_LONG_LONG or LONG_LONG.
33
+ * This means q! and Q! means always the standard long long type and
34
+ * causes ArgumentError for platforms which has no long long type,
35
+ * even if the platform has an implementation specific 64bit type.
36
+ * This behavior is consistent with the document of pack/unpack.
37
+ */
38
+ #ifdef HAVE_TRUE_LONG_LONG
39
+ static const char natstr[] = "sSiIlLqQjJ";
40
+ #else
41
+ static const char natstr[] = "sSiIlLjJ";
42
+ #endif
43
+ static const char endstr[] = "sSiIlLqQjJ";
44
+
45
+ #ifdef HAVE_TRUE_LONG_LONG
46
+ /* It is intentional to use long long instead of LONG_LONG. */
47
+ # define NATINT_LEN_Q NATINT_LEN(long long, 8)
48
+ #else
49
+ # define NATINT_LEN_Q 8
50
+ #endif
51
+
52
+ #if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
53
+ # define NATINT_PACK
54
+ #endif
55
+
56
+ #ifdef DYNAMIC_ENDIAN
57
+ /* for universal binary of NEXTSTEP and MacOS X */
58
+ /* useless since autoconf 2.63? */
59
+ static int
60
+ is_bigendian(void)
61
+ {
62
+ static int init = 0;
63
+ static int endian_value;
64
+ char *p;
65
+
66
+ if (init) return endian_value;
67
+ init = 1;
68
+ p = (char*)&init;
69
+ return endian_value = p[0]?0:1;
70
+ }
71
+ # define BIGENDIAN_P() (is_bigendian())
72
+ #elif defined(WORDS_BIGENDIAN)
73
+ # define BIGENDIAN_P() 1
74
+ #else
75
+ # define BIGENDIAN_P() 0
76
+ #endif
77
+
78
+ #ifdef NATINT_PACK
79
+ # define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
80
+ #else
81
+ # define NATINT_LEN(type,len) ((int)sizeof(type))
82
+ #endif
83
+
84
+ typedef union {
85
+ float f;
86
+ uint32_t u;
87
+ char buf[4];
88
+ } FLOAT_SWAPPER;
89
+ typedef union {
90
+ double d;
91
+ uint64_t u;
92
+ char buf[8];
93
+ } DOUBLE_SWAPPER;
94
+ #define swapf(x) swap32(x)
95
+ #define swapd(x) swap64(x)
96
+
97
+ #define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
98
+ #define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
99
+ #define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
100
+ #define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
101
+ #define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
102
+ #define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
103
+ #define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
104
+ #define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
105
+
106
+ #define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
107
+ #define HTONF(x) ((x).u = rb_htonf((x).u))
108
+ #define HTOVF(x) ((x).u = rb_htovf((x).u))
109
+ #define NTOHF(x) ((x).u = rb_ntohf((x).u))
110
+ #define VTOHF(x) ((x).u = rb_vtohf((x).u))
111
+
112
+ #define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
113
+ #define HTOND(x) ((x).u = rb_htond((x).u))
114
+ #define HTOVD(x) ((x).u = rb_htovd((x).u))
115
+ #define NTOHD(x) ((x).u = rb_ntohd((x).u))
116
+ #define VTOHD(x) ((x).u = rb_vtohd((x).u))
117
+
118
+ #define MAX_INTEGER_PACK_SIZE 8
119
+
120
+ static unsigned long utf8_to_uv(const char*,long*);
121
+
122
+ static ID id_associated;
123
+
124
+ static void
125
+ str_associate(VALUE str, VALUE add)
126
+ {
127
+ /* assert(NIL_P(rb_attr_get(str, id_associated))); */
128
+ rb_ivar_set(str, id_associated, add);
129
+ }
130
+
131
+ static VALUE
132
+ str_associated(VALUE str)
133
+ {
134
+ return rb_ivar_lookup(str, id_associated, Qfalse);
135
+ }
136
+
137
+ static void
138
+ unknown_directive(const char *mode, char type, VALUE fmt)
139
+ {
140
+ VALUE f;
141
+ char unknown[5];
142
+
143
+ if (ISPRINT(type)) {
144
+ unknown[0] = type;
145
+ unknown[1] = '\0';
146
+ }
147
+ else {
148
+ snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
149
+ }
150
+ f = rb_str_quote_unprintable(fmt);
151
+ if (f != fmt) {
152
+ fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
153
+ }
154
+ rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
155
+ mode, unknown, fmt);
156
+ }
157
+
158
+ static const char b64_table[] =
159
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
160
+
161
+ static inline int
162
+ hex2num(char c)
163
+ {
164
+ int n;
165
+ n = ruby_digit36_to_number_table[(unsigned char)c];
166
+ if (16 <= n)
167
+ n = -1;
168
+ return n;
169
+ }
170
+
171
+ #define PACK_LENGTH_ADJUST_SIZE(sz) do { \
172
+ tmp_len = 0; \
173
+ if (len > (long)((send-s)/(sz))) { \
174
+ if (!star) { \
175
+ tmp_len = len-(send-s)/(sz); \
176
+ } \
177
+ len = (send-s)/(sz); \
178
+ } \
179
+ } while (0)
180
+
181
+ #define PACK_ITEM_ADJUST() do { \
182
+ if (tmp_len > 0 && mode == UNPACK_ARRAY) \
183
+ rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
184
+ } while (0)
185
+
186
+ /* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
187
+ * 12.4/12.5/12.6 C compiler optimization bug
188
+ * with "-xO4" optimization option.
189
+ */
190
+ #if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
191
+ # define AVOID_CC_BUG volatile
192
+ #else
193
+ # define AVOID_CC_BUG
194
+ #endif
195
+
196
+ /* unpack mode */
197
+ #define UNPACK_ARRAY 0
198
+ #define UNPACK_BLOCK 1
199
+ #define UNPACK_1 2
200
+
201
+ #define castchar(from) (char)((from) & 0xff)
202
+
203
+ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
204
+ {
205
+ char* init_s;
206
+ int mode = UNPACK_ARRAY;
207
+ #define hexdigits ruby_hexdigits
208
+ char *s, *send;
209
+ char *p, *pend;
210
+ VALUE ary;
211
+ char type;
212
+ long len;
213
+ AVOID_CC_BUG long tmp_len;
214
+ int star;
215
+ #ifdef NATINT_PACK
216
+ int natint; /* native integer */
217
+ #endif
218
+ int signed_p, integer_size, bigendian_p;
219
+ #define UNPACK_PUSH(item) do {\
220
+ VALUE item_val = (item);\
221
+ if ((mode) == UNPACK_BLOCK) {\
222
+ rb_yield(item_val);\
223
+ }\
224
+ else if ((mode) == UNPACK_ARRAY) {\
225
+ rb_ary_push(ary, item_val);\
226
+ }\
227
+ else /* if ((mode) == UNPACK_1) { */ {\
228
+ return item_val; \
229
+ }\
230
+ } while (0)
231
+
232
+ StringValue(str);
233
+ StringValue(fmt);
234
+ init_s = s = RSTRING_PTR(str);
235
+ send = s + RSTRING_LEN(str);
236
+ p = RSTRING_PTR(fmt);
237
+ pend = p + RSTRING_LEN(fmt);
238
+
239
+ ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
240
+ while (p < pend) {
241
+ int explicit_endian = 0;
242
+ type = *p++;
243
+ #ifdef NATINT_PACK
244
+ natint = 0;
245
+ #endif
246
+
247
+ if (ISSPACE(type)) continue;
248
+ if (type == '#') {
249
+ while ((p < pend) && (*p != '\n')) {
250
+ p++;
251
+ }
252
+ continue;
253
+ }
254
+
255
+ star = 0;
256
+ {
257
+ modifiers:
258
+ switch (*p) {
259
+ case '_':
260
+ case '!':
261
+
262
+ if (strchr(natstr, type)) {
263
+ #ifdef NATINT_PACK
264
+ natint = 1;
265
+ #endif
266
+ p++;
267
+ }
268
+ else {
269
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
270
+ }
271
+ goto modifiers;
272
+
273
+ case '<':
274
+ case '>':
275
+ if (!strchr(endstr, type)) {
276
+ rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
277
+ }
278
+ if (explicit_endian) {
279
+ rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
280
+ }
281
+ explicit_endian = *p++;
282
+ goto modifiers;
283
+ }
284
+ }
285
+
286
+ if (p >= pend)
287
+ len = 1;
288
+ else if (*p == '*') {
289
+ star = 1;
290
+ len = send - s;
291
+ p++;
292
+ }
293
+ else if (ISDIGIT(*p)) {
294
+ errno = 0;
295
+ len = STRTOUL(p, (char**)&p, 10);
296
+ if (len < 0 || errno) {
297
+ rb_raise(rb_eRangeError, "pack length too big");
298
+ }
299
+ }
300
+ else {
301
+ len = (type != '@');
302
+ }
303
+
304
+ switch (type) {
305
+ case '%':
306
+ rb_raise(rb_eArgError, "%% is not supported");
307
+ break;
308
+
309
+ case 'A':
310
+ if (len > send - s) len = send - s;
311
+ {
312
+ long end = len;
313
+ char *t = s + len - 1;
314
+
315
+ while (t >= s) {
316
+ if (*t != ' ' && *t != '\0') break;
317
+ t--; len--;
318
+ }
319
+ UNPACK_PUSH(rb_str_new(s, len));
320
+ s += end;
321
+ }
322
+ break;
323
+
324
+ case 'Z':
325
+ {
326
+ char *t = s;
327
+
328
+ if (len > send-s) len = send-s;
329
+ while (t < s+len && *t) t++;
330
+ UNPACK_PUSH(rb_str_new(s, t-s));
331
+ if (t < send) t++;
332
+ s = star ? t : s+len;
333
+ }
334
+ break;
335
+
336
+ case 'a':
337
+ if (len > send - s) len = send - s;
338
+ UNPACK_PUSH(rb_str_new(s, len));
339
+ s += len;
340
+ break;
341
+
342
+ case 'b':
343
+ {
344
+ VALUE bitstr;
345
+ char *t;
346
+ int bits;
347
+ long i;
348
+
349
+ if (p[-1] == '*' || len > (send - s) * 8)
350
+ len = (send - s) * 8;
351
+ bits = 0;
352
+ bitstr = rb_usascii_str_new(0, len);
353
+ t = RSTRING_PTR(bitstr);
354
+ for (i=0; i<len; i++) {
355
+ if (i & 7) bits >>= 1;
356
+ else bits = (unsigned char)*s++;
357
+ *t++ = (bits & 1) ? '1' : '0';
358
+ }
359
+ UNPACK_PUSH(bitstr);
360
+ }
361
+ break;
362
+
363
+ case 'B':
364
+ {
365
+ VALUE bitstr;
366
+ char *t;
367
+ int bits;
368
+ long i;
369
+
370
+ if (p[-1] == '*' || len > (send - s) * 8)
371
+ len = (send - s) * 8;
372
+ bits = 0;
373
+ bitstr = rb_usascii_str_new(0, len);
374
+ t = RSTRING_PTR(bitstr);
375
+ for (i=0; i<len; i++) {
376
+ if (i & 7) bits <<= 1;
377
+ else bits = (unsigned char)*s++;
378
+ *t++ = (bits & 128) ? '1' : '0';
379
+ }
380
+ UNPACK_PUSH(bitstr);
381
+ }
382
+ break;
383
+
384
+ case 'h':
385
+ {
386
+ VALUE bitstr;
387
+ char *t;
388
+ int bits;
389
+ long i;
390
+
391
+ if (p[-1] == '*' || len > (send - s) * 2)
392
+ len = (send - s) * 2;
393
+ bits = 0;
394
+ bitstr = rb_usascii_str_new(0, len);
395
+ t = RSTRING_PTR(bitstr);
396
+ for (i=0; i<len; i++) {
397
+ if (i & 1)
398
+ bits >>= 4;
399
+ else
400
+ bits = (unsigned char)*s++;
401
+ *t++ = hexdigits[bits & 15];
402
+ }
403
+ UNPACK_PUSH(bitstr);
404
+ }
405
+ break;
406
+
407
+ case 'H':
408
+ {
409
+ VALUE bitstr;
410
+ char *t;
411
+ int bits;
412
+ long i;
413
+
414
+ if (p[-1] == '*' || len > (send - s) * 2)
415
+ len = (send - s) * 2;
416
+ bits = 0;
417
+ bitstr = rb_usascii_str_new(0, len);
418
+ t = RSTRING_PTR(bitstr);
419
+ for (i=0; i<len; i++) {
420
+ if (i & 1)
421
+ bits <<= 4;
422
+ else
423
+ bits = (unsigned char)*s++;
424
+ *t++ = hexdigits[(bits >> 4) & 15];
425
+ }
426
+ UNPACK_PUSH(bitstr);
427
+ }
428
+ break;
429
+
430
+ case 'c':
431
+ signed_p = 1;
432
+ integer_size = 1;
433
+ bigendian_p = BIGENDIAN_P(); /* not effective */
434
+ goto unpack_integer;
435
+
436
+ case 'C':
437
+ signed_p = 0;
438
+ integer_size = 1;
439
+ bigendian_p = BIGENDIAN_P(); /* not effective */
440
+ goto unpack_integer;
441
+
442
+ case 's':
443
+ signed_p = 1;
444
+ integer_size = NATINT_LEN(short, 2);
445
+ bigendian_p = BIGENDIAN_P();
446
+ goto unpack_integer;
447
+
448
+ case 'S':
449
+ signed_p = 0;
450
+ integer_size = NATINT_LEN(short, 2);
451
+ bigendian_p = BIGENDIAN_P();
452
+ goto unpack_integer;
453
+
454
+ case 'i':
455
+ signed_p = 1;
456
+ integer_size = (int)sizeof(int);
457
+ bigendian_p = BIGENDIAN_P();
458
+ goto unpack_integer;
459
+
460
+ case 'I':
461
+ signed_p = 0;
462
+ integer_size = (int)sizeof(int);
463
+ bigendian_p = BIGENDIAN_P();
464
+ goto unpack_integer;
465
+
466
+ case 'l':
467
+ signed_p = 1;
468
+ integer_size = NATINT_LEN(long, 4);
469
+ bigendian_p = BIGENDIAN_P();
470
+ goto unpack_integer;
471
+
472
+ case 'L':
473
+ signed_p = 0;
474
+ integer_size = NATINT_LEN(long, 4);
475
+ bigendian_p = BIGENDIAN_P();
476
+ goto unpack_integer;
477
+
478
+ case 'q':
479
+ signed_p = 1;
480
+ integer_size = NATINT_LEN_Q;
481
+ bigendian_p = BIGENDIAN_P();
482
+ goto unpack_integer;
483
+
484
+ case 'Q':
485
+ signed_p = 0;
486
+ integer_size = NATINT_LEN_Q;
487
+ bigendian_p = BIGENDIAN_P();
488
+ goto unpack_integer;
489
+
490
+ case 'j':
491
+ signed_p = 1;
492
+ integer_size = sizeof(intptr_t);
493
+ bigendian_p = BIGENDIAN_P();
494
+ goto unpack_integer;
495
+
496
+ case 'J':
497
+ signed_p = 0;
498
+ integer_size = sizeof(uintptr_t);
499
+ bigendian_p = BIGENDIAN_P();
500
+ goto unpack_integer;
501
+
502
+ case 'n':
503
+ signed_p = 0;
504
+ integer_size = 2;
505
+ bigendian_p = 1;
506
+ goto unpack_integer;
507
+
508
+ case 'N':
509
+ signed_p = 0;
510
+ integer_size = 4;
511
+ bigendian_p = 1;
512
+ goto unpack_integer;
513
+
514
+ case 'v':
515
+ signed_p = 0;
516
+ integer_size = 2;
517
+ bigendian_p = 0;
518
+ goto unpack_integer;
519
+
520
+ case 'V':
521
+ signed_p = 0;
522
+ integer_size = 4;
523
+ bigendian_p = 0;
524
+ goto unpack_integer;
525
+
526
+ unpack_integer:
527
+ if (explicit_endian) {
528
+ bigendian_p = explicit_endian == '>';
529
+ }
530
+ PACK_LENGTH_ADJUST_SIZE(integer_size);
531
+ while (len-- > 0) {
532
+ int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
533
+ VALUE val;
534
+ if (signed_p)
535
+ flags |= INTEGER_PACK_2COMP;
536
+ val = rb_integer_unpack(s, integer_size, 1, 0, flags);
537
+ UNPACK_PUSH(val);
538
+ s += integer_size;
539
+ }
540
+ PACK_ITEM_ADJUST();
541
+ break;
542
+
543
+ case 'f':
544
+ case 'F':
545
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
546
+ while (len-- > 0) {
547
+ float tmp;
548
+ memcpy(&tmp, s, sizeof(float));
549
+ s += sizeof(float);
550
+ UNPACK_PUSH(DBL2NUM((double)tmp));
551
+ }
552
+ PACK_ITEM_ADJUST();
553
+ break;
554
+
555
+ case 'e':
556
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
557
+ while (len-- > 0) {
558
+ FLOAT_CONVWITH(tmp);
559
+ memcpy(tmp.buf, s, sizeof(float));
560
+ s += sizeof(float);
561
+ VTOHF(tmp);
562
+ UNPACK_PUSH(DBL2NUM(tmp.f));
563
+ }
564
+ PACK_ITEM_ADJUST();
565
+ break;
566
+
567
+ case 'E':
568
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
569
+ while (len-- > 0) {
570
+ DOUBLE_CONVWITH(tmp);
571
+ memcpy(tmp.buf, s, sizeof(double));
572
+ s += sizeof(double);
573
+ VTOHD(tmp);
574
+ UNPACK_PUSH(DBL2NUM(tmp.d));
575
+ }
576
+ PACK_ITEM_ADJUST();
577
+ break;
578
+
579
+ case 'D':
580
+ case 'd':
581
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
582
+ while (len-- > 0) {
583
+ double tmp;
584
+ memcpy(&tmp, s, sizeof(double));
585
+ s += sizeof(double);
586
+ UNPACK_PUSH(DBL2NUM(tmp));
587
+ }
588
+ PACK_ITEM_ADJUST();
589
+ break;
590
+
591
+ case 'g':
592
+ PACK_LENGTH_ADJUST_SIZE(sizeof(float));
593
+ while (len-- > 0) {
594
+ FLOAT_CONVWITH(tmp);
595
+ memcpy(tmp.buf, s, sizeof(float));
596
+ s += sizeof(float);
597
+ NTOHF(tmp);
598
+ UNPACK_PUSH(DBL2NUM(tmp.f));
599
+ }
600
+ PACK_ITEM_ADJUST();
601
+ break;
602
+
603
+ case 'G':
604
+ PACK_LENGTH_ADJUST_SIZE(sizeof(double));
605
+ while (len-- > 0) {
606
+ DOUBLE_CONVWITH(tmp);
607
+ memcpy(tmp.buf, s, sizeof(double));
608
+ s += sizeof(double);
609
+ NTOHD(tmp);
610
+ UNPACK_PUSH(DBL2NUM(tmp.d));
611
+ }
612
+ PACK_ITEM_ADJUST();
613
+ break;
614
+
615
+ case 'U':
616
+ if (len > send - s) len = send - s;
617
+ while (len > 0 && s < send) {
618
+ long alen = send - s;
619
+ unsigned long l;
620
+
621
+ l = utf8_to_uv(s, &alen);
622
+ s += alen; len--;
623
+ UNPACK_PUSH(ULONG2NUM(l));
624
+ }
625
+ break;
626
+
627
+ case 'u':
628
+ {
629
+ VALUE buf = rb_str_new(0, (send - s)*3/4);
630
+ char *ptr = RSTRING_PTR(buf);
631
+ long total = 0;
632
+
633
+ while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
634
+ long a,b,c,d;
635
+ char hunk[3];
636
+
637
+ len = ((unsigned char)*s++ - ' ') & 077;
638
+
639
+ total += len;
640
+ if (total > RSTRING_LEN(buf)) {
641
+ len -= total - RSTRING_LEN(buf);
642
+ total = RSTRING_LEN(buf);
643
+ }
644
+
645
+ while (len > 0) {
646
+ long mlen = len > 3 ? 3 : len;
647
+
648
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
649
+ a = ((unsigned char)*s++ - ' ') & 077;
650
+ else
651
+ a = 0;
652
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
653
+ b = ((unsigned char)*s++ - ' ') & 077;
654
+ else
655
+ b = 0;
656
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
657
+ c = ((unsigned char)*s++ - ' ') & 077;
658
+ else
659
+ c = 0;
660
+ if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
661
+ d = ((unsigned char)*s++ - ' ') & 077;
662
+ else
663
+ d = 0;
664
+ hunk[0] = (char)(a << 2 | b >> 4);
665
+ hunk[1] = (char)(b << 4 | c >> 2);
666
+ hunk[2] = (char)(c << 6 | d);
667
+ memcpy(ptr, hunk, mlen);
668
+ ptr += mlen;
669
+ len -= mlen;
670
+ }
671
+ if (s < send && (unsigned char)*s != '\r' && *s != '\n')
672
+ s++; /* possible checksum byte */
673
+ if (s < send && *s == '\r') s++;
674
+ if (s < send && *s == '\n') s++;
675
+ }
676
+
677
+ rb_str_set_len(buf, total);
678
+ UNPACK_PUSH(buf);
679
+ }
680
+ break;
681
+
682
+ case 'm':
683
+ {
684
+ VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
685
+ char *ptr = RSTRING_PTR(buf);
686
+ int a = -1,b = -1,c = 0,d = 0;
687
+ static signed char b64_xtable[256];
688
+
689
+ if (b64_xtable['/'] <= 0) {
690
+ int i;
691
+
692
+ for (i = 0; i < 256; i++) {
693
+ b64_xtable[i] = -1;
694
+ }
695
+ for (i = 0; i < 64; i++) {
696
+ b64_xtable[(unsigned char)b64_table[i]] = (char)i;
697
+ }
698
+ }
699
+ if (len == 0) {
700
+ while (s < send) {
701
+ a = b = c = d = -1;
702
+ a = b64_xtable[(unsigned char)*s++];
703
+ if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
704
+ b = b64_xtable[(unsigned char)*s++];
705
+ if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
706
+ if (*s == '=') {
707
+ if (s + 2 == send && *(s + 1) == '=') break;
708
+ rb_raise(rb_eArgError, "invalid base64");
709
+ }
710
+ c = b64_xtable[(unsigned char)*s++];
711
+ if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
712
+ if (s + 1 == send && *s == '=') break;
713
+ d = b64_xtable[(unsigned char)*s++];
714
+ if (d == -1) rb_raise(rb_eArgError, "invalid base64");
715
+ *ptr++ = castchar(a << 2 | b >> 4);
716
+ *ptr++ = castchar(b << 4 | c >> 2);
717
+ *ptr++ = castchar(c << 6 | d);
718
+ }
719
+ if (c == -1) {
720
+ *ptr++ = castchar(a << 2 | b >> 4);
721
+ if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
722
+ }
723
+ else if (d == -1) {
724
+ *ptr++ = castchar(a << 2 | b >> 4);
725
+ *ptr++ = castchar(b << 4 | c >> 2);
726
+ if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
727
+ }
728
+ }
729
+ else {
730
+ while (s < send) {
731
+ a = b = c = d = -1;
732
+ while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
733
+ if (s >= send) break;
734
+ s++;
735
+ while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
736
+ if (s >= send) break;
737
+ s++;
738
+ while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
739
+ if (*s == '=' || s >= send) break;
740
+ s++;
741
+ while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
742
+ if (*s == '=' || s >= send) break;
743
+ s++;
744
+ *ptr++ = castchar(a << 2 | b >> 4);
745
+ *ptr++ = castchar(b << 4 | c >> 2);
746
+ *ptr++ = castchar(c << 6 | d);
747
+ a = -1;
748
+ }
749
+ if (a != -1 && b != -1) {
750
+ if (c == -1)
751
+ *ptr++ = castchar(a << 2 | b >> 4);
752
+ else {
753
+ *ptr++ = castchar(a << 2 | b >> 4);
754
+ *ptr++ = castchar(b << 4 | c >> 2);
755
+ }
756
+ }
757
+ }
758
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
759
+ UNPACK_PUSH(buf);
760
+ }
761
+ break;
762
+
763
+ case 'M':
764
+ {
765
+ VALUE buf = rb_str_new(0, send - s);
766
+ char *ptr = RSTRING_PTR(buf), *ss = s;
767
+ int csum = 0;
768
+ int c1, c2;
769
+
770
+ while (s < send) {
771
+ if (*s == '=') {
772
+ if (++s == send) break;
773
+ if (s+1 < send && *s == '\r' && *(s+1) == '\n')
774
+ s++;
775
+ if (*s != '\n') {
776
+ if ((c1 = hex2num(*s)) == -1) break;
777
+ if (++s == send) break;
778
+ if ((c2 = hex2num(*s)) == -1) break;
779
+ csum |= *ptr++ = castchar(c1 << 4 | c2);
780
+ }
781
+ }
782
+ else {
783
+ csum |= *ptr++ = *s;
784
+ }
785
+ s++;
786
+ ss = s;
787
+ }
788
+ rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
789
+ rb_str_buf_cat(buf, ss, send-ss);
790
+ csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
791
+ ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
792
+ UNPACK_PUSH(buf);
793
+ }
794
+ break;
795
+
796
+ case '@':
797
+ if (len > RSTRING_LEN(str))
798
+ rb_raise(rb_eArgError, "@ outside of string");
799
+ s = RSTRING_PTR(str) + len;
800
+ break;
801
+
802
+ case 'X':
803
+ if (len > s - RSTRING_PTR(str))
804
+ rb_raise(rb_eArgError, "X outside of string");
805
+ s -= len;
806
+ break;
807
+
808
+ case 'x':
809
+ if (len > send - s)
810
+ rb_raise(rb_eArgError, "x outside of string");
811
+ s += len;
812
+ break;
813
+
814
+ case 'P':
815
+ if (sizeof(char *) <= (size_t)(send - s)) {
816
+ VALUE tmp = Qnil;
817
+ char *t;
818
+
819
+ memcpy(&t, s, sizeof(char *));
820
+ s += sizeof(char *);
821
+
822
+ if (t) {
823
+ VALUE a;
824
+ const VALUE *p, *pend;
825
+
826
+ if (!(a = str_associated(str))) {
827
+ rb_raise(rb_eArgError, "no associated pointer");
828
+ }
829
+ p = RARRAY_CONST_PTR(a);
830
+ pend = p + RARRAY_LEN(a);
831
+ while (p < pend) {
832
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
833
+ if (len < RSTRING_LEN(*p)) {
834
+ tmp = rb_str_new(t, len);
835
+ str_associate(tmp, a);
836
+ }
837
+ else {
838
+ tmp = *p;
839
+ }
840
+ break;
841
+ }
842
+ p++;
843
+ }
844
+ if (p == pend) {
845
+ rb_raise(rb_eArgError, "non associated pointer");
846
+ }
847
+ }
848
+ UNPACK_PUSH(tmp);
849
+ }
850
+ break;
851
+
852
+ case 'p':
853
+ if (len > (long)((send - s) / sizeof(char *)))
854
+ len = (send - s) / sizeof(char *);
855
+ while (len-- > 0) {
856
+ if ((size_t)(send - s) < sizeof(char *))
857
+ break;
858
+ else {
859
+ VALUE tmp = Qnil;
860
+ char *t;
861
+
862
+ memcpy(&t, s, sizeof(char *));
863
+ s += sizeof(char *);
864
+
865
+ if (t) {
866
+ VALUE a;
867
+ const VALUE *p, *pend;
868
+
869
+ if (!(a = str_associated(str))) {
870
+ rb_raise(rb_eArgError, "no associated pointer");
871
+ }
872
+ p = RARRAY_CONST_PTR(a);
873
+ pend = p + RARRAY_LEN(a);
874
+ while (p < pend) {
875
+ if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
876
+ tmp = *p;
877
+ break;
878
+ }
879
+ p++;
880
+ }
881
+ if (p == pend) {
882
+ rb_raise(rb_eArgError, "non associated pointer");
883
+ }
884
+ }
885
+ UNPACK_PUSH(tmp);
886
+ }
887
+ }
888
+ break;
889
+
890
+ case 'w':
891
+ {
892
+ char *s0 = s;
893
+ while (len > 0 && s < send) {
894
+ if (*s & 0x80) {
895
+ s++;
896
+ }
897
+ else {
898
+ s++;
899
+ UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
900
+ len--;
901
+ s0 = s;
902
+ }
903
+ }
904
+ }
905
+ break;
906
+
907
+ default:
908
+ unknown_directive("unpack", type, fmt);
909
+ break;
910
+ }
911
+ }
912
+
913
+ *parsed_len = s - init_s;
914
+ return ary;
915
+ }
916
+
917
+ int
918
+ srb_uv_to_utf8(char buf[6], unsigned long uv)
919
+ {
920
+ if (uv <= 0x7f) {
921
+ buf[0] = (char)uv;
922
+ return 1;
923
+ }
924
+ if (uv <= 0x7ff) {
925
+ buf[0] = castchar(((uv>>6)&0xff)|0xc0);
926
+ buf[1] = castchar((uv&0x3f)|0x80);
927
+ return 2;
928
+ }
929
+ if (uv <= 0xffff) {
930
+ buf[0] = castchar(((uv>>12)&0xff)|0xe0);
931
+ buf[1] = castchar(((uv>>6)&0x3f)|0x80);
932
+ buf[2] = castchar((uv&0x3f)|0x80);
933
+ return 3;
934
+ }
935
+ if (uv <= 0x1fffff) {
936
+ buf[0] = castchar(((uv>>18)&0xff)|0xf0);
937
+ buf[1] = castchar(((uv>>12)&0x3f)|0x80);
938
+ buf[2] = castchar(((uv>>6)&0x3f)|0x80);
939
+ buf[3] = castchar((uv&0x3f)|0x80);
940
+ return 4;
941
+ }
942
+ if (uv <= 0x3ffffff) {
943
+ buf[0] = castchar(((uv>>24)&0xff)|0xf8);
944
+ buf[1] = castchar(((uv>>18)&0x3f)|0x80);
945
+ buf[2] = castchar(((uv>>12)&0x3f)|0x80);
946
+ buf[3] = castchar(((uv>>6)&0x3f)|0x80);
947
+ buf[4] = castchar((uv&0x3f)|0x80);
948
+ return 5;
949
+ }
950
+ if (uv <= 0x7fffffff) {
951
+ buf[0] = castchar(((uv>>30)&0xff)|0xfc);
952
+ buf[1] = castchar(((uv>>24)&0x3f)|0x80);
953
+ buf[2] = castchar(((uv>>18)&0x3f)|0x80);
954
+ buf[3] = castchar(((uv>>12)&0x3f)|0x80);
955
+ buf[4] = castchar(((uv>>6)&0x3f)|0x80);
956
+ buf[5] = castchar((uv&0x3f)|0x80);
957
+ return 6;
958
+ }
959
+ rb_raise(rb_eRangeError, "pack(U): value out of range");
960
+
961
+ UNREACHABLE_RETURN(Qnil);
962
+ }
963
+
964
+ static const unsigned long utf8_limits[] = {
965
+ 0x0, /* 1 */
966
+ 0x80, /* 2 */
967
+ 0x800, /* 3 */
968
+ 0x10000, /* 4 */
969
+ 0x200000, /* 5 */
970
+ 0x4000000, /* 6 */
971
+ 0x80000000, /* 7 */
972
+ };
973
+
974
+ static unsigned long
975
+ utf8_to_uv(const char *p, long *lenp)
976
+ {
977
+ int c = *p++ & 0xff;
978
+ unsigned long uv = c;
979
+ long n;
980
+
981
+ if (!(uv & 0x80)) {
982
+ *lenp = 1;
983
+ return uv;
984
+ }
985
+ if (!(uv & 0x40)) {
986
+ *lenp = 1;
987
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
988
+ }
989
+
990
+ if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
991
+ else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
992
+ else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
993
+ else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
994
+ else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
995
+ else {
996
+ *lenp = 1;
997
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
998
+ }
999
+ if (n > *lenp) {
1000
+ rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
1001
+ n, *lenp);
1002
+ }
1003
+ *lenp = n--;
1004
+ if (n != 0) {
1005
+ while (n--) {
1006
+ c = *p++ & 0xff;
1007
+ if ((c & 0xc0) != 0x80) {
1008
+ *lenp -= n + 1;
1009
+ rb_raise(rb_eArgError, "malformed UTF-8 character");
1010
+ }
1011
+ else {
1012
+ c &= 0x3f;
1013
+ uv = uv << 6 | c;
1014
+ }
1015
+ }
1016
+ }
1017
+ n = *lenp - 1;
1018
+ if (uv < utf8_limits[n]) {
1019
+ rb_raise(rb_eArgError, "redundant UTF-8 sequence");
1020
+ }
1021
+ return uv;
1022
+ }