zscan 2.0.4 → 2.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/ext/extconf.rb +4 -1
- data/ext/pack/internal-26.h +2141 -0
- data/ext/pack/internal.h +319 -70
- data/ext/pack/pack-26.c +2010 -0
- data/ext/pack/pack.c +923 -15
- data/lib/zscan.rb +1 -1
- data/zscan.gemspec +1 -1
- metadata +5 -3
data/ext/pack/pack.c
CHANGED
|
@@ -2,13 +2,14 @@
|
|
|
2
2
|
|
|
3
3
|
pack.c -
|
|
4
4
|
|
|
5
|
-
$Author$
|
|
5
|
+
$Author: naruse $
|
|
6
6
|
created at: Thu Feb 10 15:17:05 JST 1994
|
|
7
7
|
|
|
8
8
|
Copyright (C) 1993-2007 Yukihiro Matsumoto
|
|
9
9
|
|
|
10
10
|
**********************************************************************/
|
|
11
11
|
|
|
12
|
+
#include "ruby/encoding.h"
|
|
12
13
|
#include "internal.h"
|
|
13
14
|
#include <sys/types.h>
|
|
14
15
|
#include <ctype.h>
|
|
@@ -126,6 +127,758 @@ str_associated(VALUE str)
|
|
|
126
127
|
return rb_ivar_lookup(str, id_associated, Qfalse);
|
|
127
128
|
}
|
|
128
129
|
|
|
130
|
+
/*
|
|
131
|
+
* call-seq:
|
|
132
|
+
* arr.pack( aTemplateString ) -> aBinaryString
|
|
133
|
+
* arr.pack( aTemplateString, buffer: aBufferString ) -> aBufferString
|
|
134
|
+
*
|
|
135
|
+
* Packs the contents of <i>arr</i> into a binary sequence according to
|
|
136
|
+
* the directives in <i>aTemplateString</i> (see the table below)
|
|
137
|
+
* Directives ``A,'' ``a,'' and ``Z'' may be followed by a count,
|
|
138
|
+
* which gives the width of the resulting field. The remaining
|
|
139
|
+
* directives also may take a count, indicating the number of array
|
|
140
|
+
* elements to convert. If the count is an asterisk
|
|
141
|
+
* (``<code>*</code>''), all remaining array elements will be
|
|
142
|
+
* converted. Any of the directives ``<code>sSiIlL</code>'' may be
|
|
143
|
+
* followed by an underscore (``<code>_</code>'') or
|
|
144
|
+
* exclamation mark (``<code>!</code>'') to use the underlying
|
|
145
|
+
* platform's native size for the specified type; otherwise, they use a
|
|
146
|
+
* platform-independent size. Spaces are ignored in the template
|
|
147
|
+
* string. See also <code>String#unpack</code>.
|
|
148
|
+
*
|
|
149
|
+
* a = [ "a", "b", "c" ]
|
|
150
|
+
* n = [ 65, 66, 67 ]
|
|
151
|
+
* a.pack("A3A3A3") #=> "a b c "
|
|
152
|
+
* a.pack("a3a3a3") #=> "a\000\000b\000\000c\000\000"
|
|
153
|
+
* n.pack("ccc") #=> "ABC"
|
|
154
|
+
*
|
|
155
|
+
* If <i>aBufferString</i> is specified and its capacity is enough,
|
|
156
|
+
* +pack+ uses it as the buffer and returns it.
|
|
157
|
+
* When the offset is specified by the beginning of <i>aTemplateString</i>,
|
|
158
|
+
* the result is filled after the offset.
|
|
159
|
+
* If original contents of <i>aBufferString</i> exists and it's longer than
|
|
160
|
+
* the offset, the rest of <i>offsetOfBuffer</i> are overwritten by the result.
|
|
161
|
+
* If it's shorter, the gap is filled with ``<code>\0</code>''.
|
|
162
|
+
*
|
|
163
|
+
* Note that ``buffer:'' option does not guarantee not to allocate memory
|
|
164
|
+
* in +pack+. If the capacity of <i>aBufferString</i> is not enough,
|
|
165
|
+
* +pack+ allocates memory.
|
|
166
|
+
*
|
|
167
|
+
* Directives for +pack+.
|
|
168
|
+
*
|
|
169
|
+
* Integer | Array |
|
|
170
|
+
* Directive | Element | Meaning
|
|
171
|
+
* ----------------------------------------------------------------------------
|
|
172
|
+
* C | Integer | 8-bit unsigned (unsigned char)
|
|
173
|
+
* S | Integer | 16-bit unsigned, native endian (uint16_t)
|
|
174
|
+
* L | Integer | 32-bit unsigned, native endian (uint32_t)
|
|
175
|
+
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
|
|
176
|
+
* J | Integer | pointer width unsigned, native endian (uintptr_t)
|
|
177
|
+
* | | (J is available since Ruby 2.3.)
|
|
178
|
+
* | |
|
|
179
|
+
* c | Integer | 8-bit signed (signed char)
|
|
180
|
+
* s | Integer | 16-bit signed, native endian (int16_t)
|
|
181
|
+
* l | Integer | 32-bit signed, native endian (int32_t)
|
|
182
|
+
* q | Integer | 64-bit signed, native endian (int64_t)
|
|
183
|
+
* j | Integer | pointer width signed, native endian (intptr_t)
|
|
184
|
+
* | | (j is available since Ruby 2.3.)
|
|
185
|
+
* | |
|
|
186
|
+
* S_ S! | Integer | unsigned short, native endian
|
|
187
|
+
* I I_ I! | Integer | unsigned int, native endian
|
|
188
|
+
* L_ L! | Integer | unsigned long, native endian
|
|
189
|
+
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
|
|
190
|
+
* | | if the platform has no long long type.)
|
|
191
|
+
* | | (Q_ and Q! is available since Ruby 2.1.)
|
|
192
|
+
* J! | Integer | uintptr_t, native endian (same with J)
|
|
193
|
+
* | | (J! is available since Ruby 2.3.)
|
|
194
|
+
* | |
|
|
195
|
+
* s_ s! | Integer | signed short, native endian
|
|
196
|
+
* i i_ i! | Integer | signed int, native endian
|
|
197
|
+
* l_ l! | Integer | signed long, native endian
|
|
198
|
+
* q_ q! | Integer | signed long long, native endian (ArgumentError
|
|
199
|
+
* | | if the platform has no long long type.)
|
|
200
|
+
* | | (q_ and q! is available since Ruby 2.1.)
|
|
201
|
+
* j! | Integer | intptr_t, native endian (same with j)
|
|
202
|
+
* | | (j! is available since Ruby 2.3.)
|
|
203
|
+
* | |
|
|
204
|
+
* S> s> S!> s!> | Integer | same as the directives without ">" except
|
|
205
|
+
* L> l> L!> l!> | | big endian
|
|
206
|
+
* I!> i!> | | (available since Ruby 1.9.3)
|
|
207
|
+
* Q> q> Q!> q!> | | "S>" is same as "n"
|
|
208
|
+
* J> j> J!> j!> | | "L>" is same as "N"
|
|
209
|
+
* | |
|
|
210
|
+
* S< s< S!< s!< | Integer | same as the directives without "<" except
|
|
211
|
+
* L< l< L!< l!< | | little endian
|
|
212
|
+
* I!< i!< | | (available since Ruby 1.9.3)
|
|
213
|
+
* Q< q< Q!< q!< | | "S<" is same as "v"
|
|
214
|
+
* J< j< J!< j!< | | "L<" is same as "V"
|
|
215
|
+
* | |
|
|
216
|
+
* n | Integer | 16-bit unsigned, network (big-endian) byte order
|
|
217
|
+
* N | Integer | 32-bit unsigned, network (big-endian) byte order
|
|
218
|
+
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
|
|
219
|
+
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
|
|
220
|
+
* | |
|
|
221
|
+
* U | Integer | UTF-8 character
|
|
222
|
+
* w | Integer | BER-compressed integer
|
|
223
|
+
*
|
|
224
|
+
* Float | Array |
|
|
225
|
+
* Directive | Element | Meaning
|
|
226
|
+
* ---------------------------------------------------------------------------
|
|
227
|
+
* D d | Float | double-precision, native format
|
|
228
|
+
* F f | Float | single-precision, native format
|
|
229
|
+
* E | Float | double-precision, little-endian byte order
|
|
230
|
+
* e | Float | single-precision, little-endian byte order
|
|
231
|
+
* G | Float | double-precision, network (big-endian) byte order
|
|
232
|
+
* g | Float | single-precision, network (big-endian) byte order
|
|
233
|
+
*
|
|
234
|
+
* String | Array |
|
|
235
|
+
* Directive | Element | Meaning
|
|
236
|
+
* ---------------------------------------------------------------------------
|
|
237
|
+
* A | String | arbitrary binary string (space padded, count is width)
|
|
238
|
+
* a | String | arbitrary binary string (null padded, count is width)
|
|
239
|
+
* Z | String | same as ``a'', except that null is added with *
|
|
240
|
+
* B | String | bit string (MSB first)
|
|
241
|
+
* b | String | bit string (LSB first)
|
|
242
|
+
* H | String | hex string (high nibble first)
|
|
243
|
+
* h | String | hex string (low nibble first)
|
|
244
|
+
* u | String | UU-encoded string
|
|
245
|
+
* M | String | quoted printable, MIME encoding (see also RFC2045)
|
|
246
|
+
* | | (text mode but input must use LF and output LF)
|
|
247
|
+
* m | String | base64 encoded string (see RFC 2045, count is width)
|
|
248
|
+
* | | (if count is 0, no line feed are added, see RFC 4648)
|
|
249
|
+
* P | String | pointer to a structure (fixed-length string)
|
|
250
|
+
* p | String | pointer to a null-terminated string
|
|
251
|
+
*
|
|
252
|
+
* Misc. | Array |
|
|
253
|
+
* Directive | Element | Meaning
|
|
254
|
+
* ---------------------------------------------------------------------------
|
|
255
|
+
* @ | --- | moves to absolute position
|
|
256
|
+
* X | --- | back up a byte
|
|
257
|
+
* x | --- | null byte
|
|
258
|
+
*/
|
|
259
|
+
|
|
260
|
+
static VALUE
|
|
261
|
+
pack_pack(int argc, VALUE *argv, VALUE ary)
|
|
262
|
+
{
|
|
263
|
+
static const char nul10[] = "\0\0\0\0\0\0\0\0\0\0";
|
|
264
|
+
static const char spc10[] = " ";
|
|
265
|
+
const char *p, *pend;
|
|
266
|
+
VALUE fmt, opt = Qnil, res, from, associates = 0, buffer = 0;
|
|
267
|
+
char type;
|
|
268
|
+
long len, idx, plen;
|
|
269
|
+
const char *ptr;
|
|
270
|
+
int enc_info = 1; /* 0 - BINARY, 1 - US-ASCII, 2 - UTF-8 */
|
|
271
|
+
#ifdef NATINT_PACK
|
|
272
|
+
int natint; /* native integer */
|
|
273
|
+
#endif
|
|
274
|
+
int integer_size, bigendian_p;
|
|
275
|
+
|
|
276
|
+
rb_scan_args(argc, argv, "10:", &fmt, &opt);
|
|
277
|
+
|
|
278
|
+
StringValue(fmt);
|
|
279
|
+
p = RSTRING_PTR(fmt);
|
|
280
|
+
pend = p + RSTRING_LEN(fmt);
|
|
281
|
+
if (!NIL_P(opt)) {
|
|
282
|
+
static ID keyword_ids[1];
|
|
283
|
+
if (!keyword_ids[0])
|
|
284
|
+
CONST_ID(keyword_ids[0], "buffer");
|
|
285
|
+
|
|
286
|
+
rb_get_kwargs(opt, keyword_ids, 0, 1, &buffer);
|
|
287
|
+
|
|
288
|
+
if (buffer != Qundef && !RB_TYPE_P(buffer, T_STRING))
|
|
289
|
+
rb_raise(rb_eTypeError, "buffer must be String, not %s", rb_obj_classname(buffer));
|
|
290
|
+
}
|
|
291
|
+
if (buffer)
|
|
292
|
+
res = buffer;
|
|
293
|
+
else
|
|
294
|
+
res = rb_str_buf_new(0);
|
|
295
|
+
|
|
296
|
+
idx = 0;
|
|
297
|
+
|
|
298
|
+
#define TOO_FEW (rb_raise(rb_eArgError, toofew), 0)
|
|
299
|
+
#define MORE_ITEM (idx < RARRAY_LEN(ary))
|
|
300
|
+
#define THISFROM (MORE_ITEM ? RARRAY_AREF(ary, idx) : TOO_FEW)
|
|
301
|
+
#define NEXTFROM (MORE_ITEM ? RARRAY_AREF(ary, idx++) : TOO_FEW)
|
|
302
|
+
|
|
303
|
+
while (p < pend) {
|
|
304
|
+
int explicit_endian = 0;
|
|
305
|
+
if (RSTRING_PTR(fmt) + RSTRING_LEN(fmt) != pend) {
|
|
306
|
+
rb_raise(rb_eRuntimeError, "format string modified");
|
|
307
|
+
}
|
|
308
|
+
type = *p++; /* get data type */
|
|
309
|
+
#ifdef NATINT_PACK
|
|
310
|
+
natint = 0;
|
|
311
|
+
#endif
|
|
312
|
+
|
|
313
|
+
if (ISSPACE(type)) continue;
|
|
314
|
+
if (type == '#') {
|
|
315
|
+
while ((p < pend) && (*p != '\n')) {
|
|
316
|
+
p++;
|
|
317
|
+
}
|
|
318
|
+
continue;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
{
|
|
322
|
+
modifiers:
|
|
323
|
+
switch (*p) {
|
|
324
|
+
case '_':
|
|
325
|
+
case '!':
|
|
326
|
+
if (strchr(natstr, type)) {
|
|
327
|
+
#ifdef NATINT_PACK
|
|
328
|
+
natint = 1;
|
|
329
|
+
#endif
|
|
330
|
+
p++;
|
|
331
|
+
}
|
|
332
|
+
else {
|
|
333
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
|
|
334
|
+
}
|
|
335
|
+
goto modifiers;
|
|
336
|
+
|
|
337
|
+
case '<':
|
|
338
|
+
case '>':
|
|
339
|
+
if (!strchr(endstr, type)) {
|
|
340
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
|
|
341
|
+
}
|
|
342
|
+
if (explicit_endian) {
|
|
343
|
+
rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
|
|
344
|
+
}
|
|
345
|
+
explicit_endian = *p++;
|
|
346
|
+
goto modifiers;
|
|
347
|
+
}
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
if (*p == '*') { /* set data length */
|
|
351
|
+
len = strchr("@Xxu", type) ? 0
|
|
352
|
+
: strchr("PMm", type) ? 1
|
|
353
|
+
: RARRAY_LEN(ary) - idx;
|
|
354
|
+
p++;
|
|
355
|
+
}
|
|
356
|
+
else if (ISDIGIT(*p)) {
|
|
357
|
+
errno = 0;
|
|
358
|
+
len = STRTOUL(p, (char**)&p, 10);
|
|
359
|
+
if (errno) {
|
|
360
|
+
rb_raise(rb_eRangeError, "pack length too big");
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
else {
|
|
364
|
+
len = 1;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
switch (type) {
|
|
368
|
+
case 'U':
|
|
369
|
+
/* if encoding is US-ASCII, upgrade to UTF-8 */
|
|
370
|
+
if (enc_info == 1) enc_info = 2;
|
|
371
|
+
break;
|
|
372
|
+
case 'm': case 'M': case 'u':
|
|
373
|
+
/* keep US-ASCII (do nothing) */
|
|
374
|
+
break;
|
|
375
|
+
default:
|
|
376
|
+
/* fall back to BINARY */
|
|
377
|
+
enc_info = 0;
|
|
378
|
+
break;
|
|
379
|
+
}
|
|
380
|
+
switch (type) {
|
|
381
|
+
case 'A': case 'a': case 'Z':
|
|
382
|
+
case 'B': case 'b':
|
|
383
|
+
case 'H': case 'h':
|
|
384
|
+
from = NEXTFROM;
|
|
385
|
+
if (NIL_P(from)) {
|
|
386
|
+
ptr = "";
|
|
387
|
+
plen = 0;
|
|
388
|
+
}
|
|
389
|
+
else {
|
|
390
|
+
StringValue(from);
|
|
391
|
+
ptr = RSTRING_PTR(from);
|
|
392
|
+
plen = RSTRING_LEN(from);
|
|
393
|
+
OBJ_INFECT(res, from);
|
|
394
|
+
}
|
|
395
|
+
|
|
396
|
+
if (p[-1] == '*')
|
|
397
|
+
len = plen;
|
|
398
|
+
|
|
399
|
+
switch (type) {
|
|
400
|
+
case 'a': /* arbitrary binary string (null padded) */
|
|
401
|
+
case 'A': /* arbitrary binary string (ASCII space padded) */
|
|
402
|
+
case 'Z': /* null terminated string */
|
|
403
|
+
if (plen >= len) {
|
|
404
|
+
rb_str_buf_cat(res, ptr, len);
|
|
405
|
+
if (p[-1] == '*' && type == 'Z')
|
|
406
|
+
rb_str_buf_cat(res, nul10, 1);
|
|
407
|
+
}
|
|
408
|
+
else {
|
|
409
|
+
rb_str_buf_cat(res, ptr, plen);
|
|
410
|
+
len -= plen;
|
|
411
|
+
while (len >= 10) {
|
|
412
|
+
rb_str_buf_cat(res, (type == 'A')?spc10:nul10, 10);
|
|
413
|
+
len -= 10;
|
|
414
|
+
}
|
|
415
|
+
rb_str_buf_cat(res, (type == 'A')?spc10:nul10, len);
|
|
416
|
+
}
|
|
417
|
+
break;
|
|
418
|
+
|
|
419
|
+
#define castchar(from) (char)((from) & 0xff)
|
|
420
|
+
|
|
421
|
+
case 'b': /* bit string (ascending) */
|
|
422
|
+
{
|
|
423
|
+
int byte = 0;
|
|
424
|
+
long i, j = 0;
|
|
425
|
+
|
|
426
|
+
if (len > plen) {
|
|
427
|
+
j = (len - plen + 1)/2;
|
|
428
|
+
len = plen;
|
|
429
|
+
}
|
|
430
|
+
for (i=0; i++ < len; ptr++) {
|
|
431
|
+
if (*ptr & 1)
|
|
432
|
+
byte |= 128;
|
|
433
|
+
if (i & 7)
|
|
434
|
+
byte >>= 1;
|
|
435
|
+
else {
|
|
436
|
+
char c = castchar(byte);
|
|
437
|
+
rb_str_buf_cat(res, &c, 1);
|
|
438
|
+
byte = 0;
|
|
439
|
+
}
|
|
440
|
+
}
|
|
441
|
+
if (len & 7) {
|
|
442
|
+
char c;
|
|
443
|
+
byte >>= 7 - (len & 7);
|
|
444
|
+
c = castchar(byte);
|
|
445
|
+
rb_str_buf_cat(res, &c, 1);
|
|
446
|
+
}
|
|
447
|
+
len = j;
|
|
448
|
+
goto grow;
|
|
449
|
+
}
|
|
450
|
+
break;
|
|
451
|
+
|
|
452
|
+
case 'B': /* bit string (descending) */
|
|
453
|
+
{
|
|
454
|
+
int byte = 0;
|
|
455
|
+
long i, j = 0;
|
|
456
|
+
|
|
457
|
+
if (len > plen) {
|
|
458
|
+
j = (len - plen + 1)/2;
|
|
459
|
+
len = plen;
|
|
460
|
+
}
|
|
461
|
+
for (i=0; i++ < len; ptr++) {
|
|
462
|
+
byte |= *ptr & 1;
|
|
463
|
+
if (i & 7)
|
|
464
|
+
byte <<= 1;
|
|
465
|
+
else {
|
|
466
|
+
char c = castchar(byte);
|
|
467
|
+
rb_str_buf_cat(res, &c, 1);
|
|
468
|
+
byte = 0;
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
if (len & 7) {
|
|
472
|
+
char c;
|
|
473
|
+
byte <<= 7 - (len & 7);
|
|
474
|
+
c = castchar(byte);
|
|
475
|
+
rb_str_buf_cat(res, &c, 1);
|
|
476
|
+
}
|
|
477
|
+
len = j;
|
|
478
|
+
goto grow;
|
|
479
|
+
}
|
|
480
|
+
break;
|
|
481
|
+
|
|
482
|
+
case 'h': /* hex string (low nibble first) */
|
|
483
|
+
{
|
|
484
|
+
int byte = 0;
|
|
485
|
+
long i, j = 0;
|
|
486
|
+
|
|
487
|
+
if (len > plen) {
|
|
488
|
+
j = (len + 1) / 2 - (plen + 1) / 2;
|
|
489
|
+
len = plen;
|
|
490
|
+
}
|
|
491
|
+
for (i=0; i++ < len; ptr++) {
|
|
492
|
+
if (ISALPHA(*ptr))
|
|
493
|
+
byte |= (((*ptr & 15) + 9) & 15) << 4;
|
|
494
|
+
else
|
|
495
|
+
byte |= (*ptr & 15) << 4;
|
|
496
|
+
if (i & 1)
|
|
497
|
+
byte >>= 4;
|
|
498
|
+
else {
|
|
499
|
+
char c = castchar(byte);
|
|
500
|
+
rb_str_buf_cat(res, &c, 1);
|
|
501
|
+
byte = 0;
|
|
502
|
+
}
|
|
503
|
+
}
|
|
504
|
+
if (len & 1) {
|
|
505
|
+
char c = castchar(byte);
|
|
506
|
+
rb_str_buf_cat(res, &c, 1);
|
|
507
|
+
}
|
|
508
|
+
len = j;
|
|
509
|
+
goto grow;
|
|
510
|
+
}
|
|
511
|
+
break;
|
|
512
|
+
|
|
513
|
+
case 'H': /* hex string (high nibble first) */
|
|
514
|
+
{
|
|
515
|
+
int byte = 0;
|
|
516
|
+
long i, j = 0;
|
|
517
|
+
|
|
518
|
+
if (len > plen) {
|
|
519
|
+
j = (len + 1) / 2 - (plen + 1) / 2;
|
|
520
|
+
len = plen;
|
|
521
|
+
}
|
|
522
|
+
for (i=0; i++ < len; ptr++) {
|
|
523
|
+
if (ISALPHA(*ptr))
|
|
524
|
+
byte |= ((*ptr & 15) + 9) & 15;
|
|
525
|
+
else
|
|
526
|
+
byte |= *ptr & 15;
|
|
527
|
+
if (i & 1)
|
|
528
|
+
byte <<= 4;
|
|
529
|
+
else {
|
|
530
|
+
char c = castchar(byte);
|
|
531
|
+
rb_str_buf_cat(res, &c, 1);
|
|
532
|
+
byte = 0;
|
|
533
|
+
}
|
|
534
|
+
}
|
|
535
|
+
if (len & 1) {
|
|
536
|
+
char c = castchar(byte);
|
|
537
|
+
rb_str_buf_cat(res, &c, 1);
|
|
538
|
+
}
|
|
539
|
+
len = j;
|
|
540
|
+
goto grow;
|
|
541
|
+
}
|
|
542
|
+
break;
|
|
543
|
+
}
|
|
544
|
+
break;
|
|
545
|
+
|
|
546
|
+
case 'c': /* signed char */
|
|
547
|
+
case 'C': /* unsigned char */
|
|
548
|
+
integer_size = 1;
|
|
549
|
+
bigendian_p = BIGENDIAN_P(); /* not effective */
|
|
550
|
+
goto pack_integer;
|
|
551
|
+
|
|
552
|
+
case 's': /* s for int16_t, s! for signed short */
|
|
553
|
+
integer_size = NATINT_LEN(short, 2);
|
|
554
|
+
bigendian_p = BIGENDIAN_P();
|
|
555
|
+
goto pack_integer;
|
|
556
|
+
|
|
557
|
+
case 'S': /* S for uint16_t, S! for unsigned short */
|
|
558
|
+
integer_size = NATINT_LEN(short, 2);
|
|
559
|
+
bigendian_p = BIGENDIAN_P();
|
|
560
|
+
goto pack_integer;
|
|
561
|
+
|
|
562
|
+
case 'i': /* i and i! for signed int */
|
|
563
|
+
integer_size = (int)sizeof(int);
|
|
564
|
+
bigendian_p = BIGENDIAN_P();
|
|
565
|
+
goto pack_integer;
|
|
566
|
+
|
|
567
|
+
case 'I': /* I and I! for unsigned int */
|
|
568
|
+
integer_size = (int)sizeof(int);
|
|
569
|
+
bigendian_p = BIGENDIAN_P();
|
|
570
|
+
goto pack_integer;
|
|
571
|
+
|
|
572
|
+
case 'l': /* l for int32_t, l! for signed long */
|
|
573
|
+
integer_size = NATINT_LEN(long, 4);
|
|
574
|
+
bigendian_p = BIGENDIAN_P();
|
|
575
|
+
goto pack_integer;
|
|
576
|
+
|
|
577
|
+
case 'L': /* L for uint32_t, L! for unsigned long */
|
|
578
|
+
integer_size = NATINT_LEN(long, 4);
|
|
579
|
+
bigendian_p = BIGENDIAN_P();
|
|
580
|
+
goto pack_integer;
|
|
581
|
+
|
|
582
|
+
case 'q': /* q for int64_t, q! for signed long long */
|
|
583
|
+
integer_size = NATINT_LEN_Q;
|
|
584
|
+
bigendian_p = BIGENDIAN_P();
|
|
585
|
+
goto pack_integer;
|
|
586
|
+
|
|
587
|
+
case 'Q': /* Q for uint64_t, Q! for unsigned long long */
|
|
588
|
+
integer_size = NATINT_LEN_Q;
|
|
589
|
+
bigendian_p = BIGENDIAN_P();
|
|
590
|
+
goto pack_integer;
|
|
591
|
+
|
|
592
|
+
case 'j': /* j for intptr_t */
|
|
593
|
+
integer_size = sizeof(intptr_t);
|
|
594
|
+
bigendian_p = BIGENDIAN_P();
|
|
595
|
+
goto pack_integer;
|
|
596
|
+
|
|
597
|
+
case 'J': /* J for uintptr_t */
|
|
598
|
+
integer_size = sizeof(uintptr_t);
|
|
599
|
+
bigendian_p = BIGENDIAN_P();
|
|
600
|
+
goto pack_integer;
|
|
601
|
+
|
|
602
|
+
case 'n': /* 16 bit (2 bytes) integer (network byte-order) */
|
|
603
|
+
integer_size = 2;
|
|
604
|
+
bigendian_p = 1;
|
|
605
|
+
goto pack_integer;
|
|
606
|
+
|
|
607
|
+
case 'N': /* 32 bit (4 bytes) integer (network byte-order) */
|
|
608
|
+
integer_size = 4;
|
|
609
|
+
bigendian_p = 1;
|
|
610
|
+
goto pack_integer;
|
|
611
|
+
|
|
612
|
+
case 'v': /* 16 bit (2 bytes) integer (VAX byte-order) */
|
|
613
|
+
integer_size = 2;
|
|
614
|
+
bigendian_p = 0;
|
|
615
|
+
goto pack_integer;
|
|
616
|
+
|
|
617
|
+
case 'V': /* 32 bit (4 bytes) integer (VAX byte-order) */
|
|
618
|
+
integer_size = 4;
|
|
619
|
+
bigendian_p = 0;
|
|
620
|
+
goto pack_integer;
|
|
621
|
+
|
|
622
|
+
pack_integer:
|
|
623
|
+
if (explicit_endian) {
|
|
624
|
+
bigendian_p = explicit_endian == '>';
|
|
625
|
+
}
|
|
626
|
+
if (integer_size > MAX_INTEGER_PACK_SIZE)
|
|
627
|
+
rb_bug("unexpected intger size for pack: %d", integer_size);
|
|
628
|
+
while (len-- > 0) {
|
|
629
|
+
char intbuf[MAX_INTEGER_PACK_SIZE];
|
|
630
|
+
|
|
631
|
+
from = NEXTFROM;
|
|
632
|
+
rb_integer_pack(from, intbuf, integer_size, 1, 0,
|
|
633
|
+
INTEGER_PACK_2COMP |
|
|
634
|
+
(bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN));
|
|
635
|
+
rb_str_buf_cat(res, intbuf, integer_size);
|
|
636
|
+
}
|
|
637
|
+
break;
|
|
638
|
+
|
|
639
|
+
case 'f': /* single precision float in native format */
|
|
640
|
+
case 'F': /* ditto */
|
|
641
|
+
while (len-- > 0) {
|
|
642
|
+
float f;
|
|
643
|
+
|
|
644
|
+
from = NEXTFROM;
|
|
645
|
+
f = (float)RFLOAT_VALUE(rb_to_float(from));
|
|
646
|
+
rb_str_buf_cat(res, (char*)&f, sizeof(float));
|
|
647
|
+
}
|
|
648
|
+
break;
|
|
649
|
+
|
|
650
|
+
case 'e': /* single precision float in VAX byte-order */
|
|
651
|
+
while (len-- > 0) {
|
|
652
|
+
FLOAT_CONVWITH(tmp);
|
|
653
|
+
|
|
654
|
+
from = NEXTFROM;
|
|
655
|
+
tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
|
|
656
|
+
HTOVF(tmp);
|
|
657
|
+
rb_str_buf_cat(res, tmp.buf, sizeof(float));
|
|
658
|
+
}
|
|
659
|
+
break;
|
|
660
|
+
|
|
661
|
+
case 'E': /* double precision float in VAX byte-order */
|
|
662
|
+
while (len-- > 0) {
|
|
663
|
+
DOUBLE_CONVWITH(tmp);
|
|
664
|
+
from = NEXTFROM;
|
|
665
|
+
tmp.d = RFLOAT_VALUE(rb_to_float(from));
|
|
666
|
+
HTOVD(tmp);
|
|
667
|
+
rb_str_buf_cat(res, tmp.buf, sizeof(double));
|
|
668
|
+
}
|
|
669
|
+
break;
|
|
670
|
+
|
|
671
|
+
case 'd': /* double precision float in native format */
|
|
672
|
+
case 'D': /* ditto */
|
|
673
|
+
while (len-- > 0) {
|
|
674
|
+
double d;
|
|
675
|
+
|
|
676
|
+
from = NEXTFROM;
|
|
677
|
+
d = RFLOAT_VALUE(rb_to_float(from));
|
|
678
|
+
rb_str_buf_cat(res, (char*)&d, sizeof(double));
|
|
679
|
+
}
|
|
680
|
+
break;
|
|
681
|
+
|
|
682
|
+
case 'g': /* single precision float in network byte-order */
|
|
683
|
+
while (len-- > 0) {
|
|
684
|
+
FLOAT_CONVWITH(tmp);
|
|
685
|
+
from = NEXTFROM;
|
|
686
|
+
tmp.f = (float)RFLOAT_VALUE(rb_to_float(from));
|
|
687
|
+
HTONF(tmp);
|
|
688
|
+
rb_str_buf_cat(res, tmp.buf, sizeof(float));
|
|
689
|
+
}
|
|
690
|
+
break;
|
|
691
|
+
|
|
692
|
+
case 'G': /* double precision float in network byte-order */
|
|
693
|
+
while (len-- > 0) {
|
|
694
|
+
DOUBLE_CONVWITH(tmp);
|
|
695
|
+
|
|
696
|
+
from = NEXTFROM;
|
|
697
|
+
tmp.d = RFLOAT_VALUE(rb_to_float(from));
|
|
698
|
+
HTOND(tmp);
|
|
699
|
+
rb_str_buf_cat(res, tmp.buf, sizeof(double));
|
|
700
|
+
}
|
|
701
|
+
break;
|
|
702
|
+
|
|
703
|
+
case 'x': /* null byte */
|
|
704
|
+
grow:
|
|
705
|
+
while (len >= 10) {
|
|
706
|
+
rb_str_buf_cat(res, nul10, 10);
|
|
707
|
+
len -= 10;
|
|
708
|
+
}
|
|
709
|
+
rb_str_buf_cat(res, nul10, len);
|
|
710
|
+
break;
|
|
711
|
+
|
|
712
|
+
case 'X': /* back up byte */
|
|
713
|
+
shrink:
|
|
714
|
+
plen = RSTRING_LEN(res);
|
|
715
|
+
if (plen < len)
|
|
716
|
+
rb_raise(rb_eArgError, "X outside of string");
|
|
717
|
+
rb_str_set_len(res, plen - len);
|
|
718
|
+
break;
|
|
719
|
+
|
|
720
|
+
case '@': /* null fill to absolute position */
|
|
721
|
+
len -= RSTRING_LEN(res);
|
|
722
|
+
if (len > 0) goto grow;
|
|
723
|
+
len = -len;
|
|
724
|
+
if (len > 0) goto shrink;
|
|
725
|
+
break;
|
|
726
|
+
|
|
727
|
+
case '%':
|
|
728
|
+
rb_raise(rb_eArgError, "%% is not supported");
|
|
729
|
+
break;
|
|
730
|
+
|
|
731
|
+
case 'U': /* Unicode character */
|
|
732
|
+
while (len-- > 0) {
|
|
733
|
+
SIGNED_VALUE l;
|
|
734
|
+
char buf[8];
|
|
735
|
+
int le;
|
|
736
|
+
|
|
737
|
+
from = NEXTFROM;
|
|
738
|
+
from = rb_to_int(from);
|
|
739
|
+
l = NUM2LONG(from);
|
|
740
|
+
if (l < 0) {
|
|
741
|
+
rb_raise(rb_eRangeError, "pack(U): value out of range");
|
|
742
|
+
}
|
|
743
|
+
le = rb_uv_to_utf8(buf, l);
|
|
744
|
+
rb_str_buf_cat(res, (char*)buf, le);
|
|
745
|
+
}
|
|
746
|
+
break;
|
|
747
|
+
|
|
748
|
+
case 'u': /* uuencoded string */
|
|
749
|
+
case 'm': /* base64 encoded string */
|
|
750
|
+
from = NEXTFROM;
|
|
751
|
+
StringValue(from);
|
|
752
|
+
ptr = RSTRING_PTR(from);
|
|
753
|
+
plen = RSTRING_LEN(from);
|
|
754
|
+
|
|
755
|
+
if (len == 0 && type == 'm') {
|
|
756
|
+
encodes(res, ptr, plen, type, 0);
|
|
757
|
+
ptr += plen;
|
|
758
|
+
break;
|
|
759
|
+
}
|
|
760
|
+
if (len <= 2)
|
|
761
|
+
len = 45;
|
|
762
|
+
else if (len > 63 && type == 'u')
|
|
763
|
+
len = 63;
|
|
764
|
+
else
|
|
765
|
+
len = len / 3 * 3;
|
|
766
|
+
while (plen > 0) {
|
|
767
|
+
long todo;
|
|
768
|
+
|
|
769
|
+
if (plen > len)
|
|
770
|
+
todo = len;
|
|
771
|
+
else
|
|
772
|
+
todo = plen;
|
|
773
|
+
encodes(res, ptr, todo, type, 1);
|
|
774
|
+
plen -= todo;
|
|
775
|
+
ptr += todo;
|
|
776
|
+
}
|
|
777
|
+
break;
|
|
778
|
+
|
|
779
|
+
case 'M': /* quoted-printable encoded string */
|
|
780
|
+
from = rb_obj_as_string(NEXTFROM);
|
|
781
|
+
if (len <= 1)
|
|
782
|
+
len = 72;
|
|
783
|
+
qpencode(res, from, len);
|
|
784
|
+
break;
|
|
785
|
+
|
|
786
|
+
case 'P': /* pointer to packed byte string */
|
|
787
|
+
from = THISFROM;
|
|
788
|
+
if (!NIL_P(from)) {
|
|
789
|
+
StringValue(from);
|
|
790
|
+
if (RSTRING_LEN(from) < len) {
|
|
791
|
+
rb_raise(rb_eArgError, "too short buffer for P(%ld for %ld)",
|
|
792
|
+
RSTRING_LEN(from), len);
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
len = 1;
|
|
796
|
+
/* FALL THROUGH */
|
|
797
|
+
case 'p': /* pointer to string */
|
|
798
|
+
while (len-- > 0) {
|
|
799
|
+
char *t;
|
|
800
|
+
from = NEXTFROM;
|
|
801
|
+
if (NIL_P(from)) {
|
|
802
|
+
t = 0;
|
|
803
|
+
}
|
|
804
|
+
else {
|
|
805
|
+
t = StringValuePtr(from);
|
|
806
|
+
rb_obj_taint(from);
|
|
807
|
+
}
|
|
808
|
+
if (!associates) {
|
|
809
|
+
associates = rb_ary_new();
|
|
810
|
+
}
|
|
811
|
+
rb_ary_push(associates, from);
|
|
812
|
+
rb_str_buf_cat(res, (char*)&t, sizeof(char*));
|
|
813
|
+
}
|
|
814
|
+
break;
|
|
815
|
+
|
|
816
|
+
case 'w': /* BER compressed integer */
|
|
817
|
+
while (len-- > 0) {
|
|
818
|
+
VALUE buf = rb_str_new(0, 0);
|
|
819
|
+
size_t numbytes;
|
|
820
|
+
int sign;
|
|
821
|
+
char *cp;
|
|
822
|
+
|
|
823
|
+
from = NEXTFROM;
|
|
824
|
+
from = rb_to_int(from);
|
|
825
|
+
numbytes = rb_absint_numwords(from, 7, NULL);
|
|
826
|
+
if (numbytes == 0)
|
|
827
|
+
numbytes = 1;
|
|
828
|
+
buf = rb_str_new(NULL, numbytes);
|
|
829
|
+
|
|
830
|
+
sign = rb_integer_pack(from, RSTRING_PTR(buf), RSTRING_LEN(buf), 1, 1, INTEGER_PACK_BIG_ENDIAN);
|
|
831
|
+
|
|
832
|
+
if (sign < 0)
|
|
833
|
+
rb_raise(rb_eArgError, "can't compress negative numbers");
|
|
834
|
+
if (sign == 2)
|
|
835
|
+
rb_bug("buffer size problem?");
|
|
836
|
+
|
|
837
|
+
cp = RSTRING_PTR(buf);
|
|
838
|
+
while (1 < numbytes) {
|
|
839
|
+
*cp |= 0x80;
|
|
840
|
+
cp++;
|
|
841
|
+
numbytes--;
|
|
842
|
+
}
|
|
843
|
+
|
|
844
|
+
rb_str_buf_cat(res, RSTRING_PTR(buf), RSTRING_LEN(buf));
|
|
845
|
+
}
|
|
846
|
+
break;
|
|
847
|
+
|
|
848
|
+
default: {
|
|
849
|
+
char unknown[5];
|
|
850
|
+
if (ISPRINT(type)) {
|
|
851
|
+
unknown[0] = type;
|
|
852
|
+
unknown[1] = '\0';
|
|
853
|
+
}
|
|
854
|
+
else {
|
|
855
|
+
snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
|
|
856
|
+
}
|
|
857
|
+
rb_warning("unknown pack directive '%s' in '% "PRIsVALUE"'",
|
|
858
|
+
unknown, fmt);
|
|
859
|
+
break;
|
|
860
|
+
}
|
|
861
|
+
}
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
if (associates) {
|
|
865
|
+
str_associate(res, associates);
|
|
866
|
+
}
|
|
867
|
+
OBJ_INFECT(res, fmt);
|
|
868
|
+
switch (enc_info) {
|
|
869
|
+
case 1:
|
|
870
|
+
ENCODING_CODERANGE_SET(res, rb_usascii_encindex(), ENC_CODERANGE_7BIT);
|
|
871
|
+
break;
|
|
872
|
+
case 2:
|
|
873
|
+
rb_enc_set_index(res, rb_utf8_encindex());
|
|
874
|
+
break;
|
|
875
|
+
default:
|
|
876
|
+
/* do nothing, keep ASCII-8BIT */
|
|
877
|
+
break;
|
|
878
|
+
}
|
|
879
|
+
return res;
|
|
880
|
+
}
|
|
881
|
+
|
|
129
882
|
static const char uu_table[] =
|
|
130
883
|
"`!\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
|
|
131
884
|
static const char b64_table[] =
|
|
@@ -260,10 +1013,11 @@ hex2num(char c)
|
|
|
260
1013
|
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
|
|
261
1014
|
} while (0)
|
|
262
1015
|
|
|
263
|
-
/* Workaround for Oracle
|
|
1016
|
+
/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
|
|
1017
|
+
* 12.4/12.5/12.6 C compiler optimization bug
|
|
264
1018
|
* with "-xO4" optimization option.
|
|
265
1019
|
*/
|
|
266
|
-
#if defined(__SUNPRO_C) && __SUNPRO_C
|
|
1020
|
+
#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
|
|
267
1021
|
# define AVOID_CC_BUG volatile
|
|
268
1022
|
#else
|
|
269
1023
|
# define AVOID_CC_BUG
|
|
@@ -283,12 +1037,11 @@ infected_str_new(const char *ptr, long len, VALUE str)
|
|
|
283
1037
|
#define UNPACK_BLOCK 1
|
|
284
1038
|
#define UNPACK_1 2
|
|
285
1039
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
1040
|
+
static VALUE
|
|
1041
|
+
pack_unpack_internal(VALUE str, VALUE fmt, int mode)
|
|
289
1042
|
{
|
|
290
1043
|
#define hexdigits ruby_hexdigits
|
|
291
|
-
char *
|
|
1044
|
+
char *s, *send;
|
|
292
1045
|
char *p, *pend;
|
|
293
1046
|
VALUE ary;
|
|
294
1047
|
char type;
|
|
@@ -299,20 +1052,22 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
|
299
1052
|
int natint; /* native integer */
|
|
300
1053
|
#endif
|
|
301
1054
|
int signed_p, integer_size, bigendian_p;
|
|
302
|
-
int mode = (rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY);
|
|
303
1055
|
#define UNPACK_PUSH(item) do {\
|
|
304
1056
|
VALUE item_val = (item);\
|
|
305
1057
|
if ((mode) == UNPACK_BLOCK) {\
|
|
306
1058
|
rb_yield(item_val);\
|
|
307
1059
|
}\
|
|
308
|
-
else {\
|
|
1060
|
+
else if ((mode) == UNPACK_ARRAY) {\
|
|
309
1061
|
rb_ary_push(ary, item_val);\
|
|
310
1062
|
}\
|
|
1063
|
+
else /* if ((mode) == UNPACK_1) { */ {\
|
|
1064
|
+
return item_val; \
|
|
1065
|
+
}\
|
|
311
1066
|
} while (0)
|
|
312
1067
|
|
|
313
1068
|
StringValue(str);
|
|
314
1069
|
StringValue(fmt);
|
|
315
|
-
|
|
1070
|
+
s = RSTRING_PTR(str);
|
|
316
1071
|
send = s + RSTRING_LEN(str);
|
|
317
1072
|
p = RSTRING_PTR(fmt);
|
|
318
1073
|
pend = p + RSTRING_LEN(fmt);
|
|
@@ -374,7 +1129,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
|
374
1129
|
else if (ISDIGIT(*p)) {
|
|
375
1130
|
errno = 0;
|
|
376
1131
|
len = STRTOUL(p, (char**)&p, 10);
|
|
377
|
-
if (errno) {
|
|
1132
|
+
if (len < 0 || errno) {
|
|
378
1133
|
rb_raise(rb_eRangeError, "pack length too big");
|
|
379
1134
|
}
|
|
380
1135
|
}
|
|
@@ -845,6 +1600,7 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
|
845
1600
|
{
|
|
846
1601
|
VALUE buf = infected_str_new(0, send - s, str);
|
|
847
1602
|
char *ptr = RSTRING_PTR(buf), *ss = s;
|
|
1603
|
+
int csum = 0;
|
|
848
1604
|
int c1, c2;
|
|
849
1605
|
|
|
850
1606
|
while (s < send) {
|
|
@@ -856,18 +1612,19 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
|
856
1612
|
if ((c1 = hex2num(*s)) == -1) break;
|
|
857
1613
|
if (++s == send) break;
|
|
858
1614
|
if ((c2 = hex2num(*s)) == -1) break;
|
|
859
|
-
*ptr++ = castchar(c1 << 4 | c2);
|
|
1615
|
+
csum |= *ptr++ = castchar(c1 << 4 | c2);
|
|
860
1616
|
}
|
|
861
1617
|
}
|
|
862
1618
|
else {
|
|
863
|
-
*ptr++ = *s;
|
|
1619
|
+
csum |= *ptr++ = *s;
|
|
864
1620
|
}
|
|
865
1621
|
s++;
|
|
866
1622
|
ss = s;
|
|
867
1623
|
}
|
|
868
1624
|
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
|
|
869
1625
|
rb_str_buf_cat(buf, ss, send-ss);
|
|
870
|
-
|
|
1626
|
+
csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
|
|
1627
|
+
ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
|
|
871
1628
|
UNPACK_PUSH(buf);
|
|
872
1629
|
}
|
|
873
1630
|
break;
|
|
@@ -990,10 +1747,151 @@ VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
|
|
990
1747
|
}
|
|
991
1748
|
}
|
|
992
1749
|
|
|
993
|
-
*parsed_len = s - init_s;
|
|
994
1750
|
return ary;
|
|
995
1751
|
}
|
|
996
1752
|
|
|
1753
|
+
/*
|
|
1754
|
+
* call-seq:
|
|
1755
|
+
* str.unpack(format) -> anArray
|
|
1756
|
+
*
|
|
1757
|
+
* Decodes <i>str</i> (which may contain binary data) according to the
|
|
1758
|
+
* format string, returning an array of each value extracted. The
|
|
1759
|
+
* format string consists of a sequence of single-character directives,
|
|
1760
|
+
* summarized in the table at the end of this entry.
|
|
1761
|
+
* Each directive may be followed
|
|
1762
|
+
* by a number, indicating the number of times to repeat with this
|
|
1763
|
+
* directive. An asterisk (``<code>*</code>'') will use up all
|
|
1764
|
+
* remaining elements. The directives <code>sSiIlL</code> may each be
|
|
1765
|
+
* followed by an underscore (``<code>_</code>'') or
|
|
1766
|
+
* exclamation mark (``<code>!</code>'') to use the underlying
|
|
1767
|
+
* platform's native size for the specified type; otherwise, it uses a
|
|
1768
|
+
* platform-independent consistent size. Spaces are ignored in the
|
|
1769
|
+
* format string. See also <code>String#unpack1</code>, <code>Array#pack</code>.
|
|
1770
|
+
*
|
|
1771
|
+
* "abc \0\0abc \0\0".unpack('A6Z6') #=> ["abc", "abc "]
|
|
1772
|
+
* "abc \0\0".unpack('a3a3') #=> ["abc", " \000\000"]
|
|
1773
|
+
* "abc \0abc \0".unpack('Z*Z*') #=> ["abc ", "abc "]
|
|
1774
|
+
* "aa".unpack('b8B8') #=> ["10000110", "01100001"]
|
|
1775
|
+
* "aaa".unpack('h2H2c') #=> ["16", "61", 97]
|
|
1776
|
+
* "\xfe\xff\xfe\xff".unpack('sS') #=> [-2, 65534]
|
|
1777
|
+
* "now=20is".unpack('M*') #=> ["now is"]
|
|
1778
|
+
* "whole".unpack('xax2aX2aX1aX2a') #=> ["h", "e", "l", "l", "o"]
|
|
1779
|
+
*
|
|
1780
|
+
* This table summarizes the various formats and the Ruby classes
|
|
1781
|
+
* returned by each.
|
|
1782
|
+
*
|
|
1783
|
+
* Integer | |
|
|
1784
|
+
* Directive | Returns | Meaning
|
|
1785
|
+
* ------------------------------------------------------------------
|
|
1786
|
+
* C | Integer | 8-bit unsigned (unsigned char)
|
|
1787
|
+
* S | Integer | 16-bit unsigned, native endian (uint16_t)
|
|
1788
|
+
* L | Integer | 32-bit unsigned, native endian (uint32_t)
|
|
1789
|
+
* Q | Integer | 64-bit unsigned, native endian (uint64_t)
|
|
1790
|
+
* J | Integer | pointer width unsigned, native endian (uintptr_t)
|
|
1791
|
+
* | |
|
|
1792
|
+
* c | Integer | 8-bit signed (signed char)
|
|
1793
|
+
* s | Integer | 16-bit signed, native endian (int16_t)
|
|
1794
|
+
* l | Integer | 32-bit signed, native endian (int32_t)
|
|
1795
|
+
* q | Integer | 64-bit signed, native endian (int64_t)
|
|
1796
|
+
* j | Integer | pointer width signed, native endian (intptr_t)
|
|
1797
|
+
* | |
|
|
1798
|
+
* S_ S! | Integer | unsigned short, native endian
|
|
1799
|
+
* I I_ I! | Integer | unsigned int, native endian
|
|
1800
|
+
* L_ L! | Integer | unsigned long, native endian
|
|
1801
|
+
* Q_ Q! | Integer | unsigned long long, native endian (ArgumentError
|
|
1802
|
+
* | | if the platform has no long long type.)
|
|
1803
|
+
* J! | Integer | uintptr_t, native endian (same with J)
|
|
1804
|
+
* | |
|
|
1805
|
+
* s_ s! | Integer | signed short, native endian
|
|
1806
|
+
* i i_ i! | Integer | signed int, native endian
|
|
1807
|
+
* l_ l! | Integer | signed long, native endian
|
|
1808
|
+
* q_ q! | Integer | signed long long, native endian (ArgumentError
|
|
1809
|
+
* | | if the platform has no long long type.)
|
|
1810
|
+
* j! | Integer | intptr_t, native endian (same with j)
|
|
1811
|
+
* | |
|
|
1812
|
+
* S> s> S!> s!> | Integer | same as the directives without ">" except
|
|
1813
|
+
* L> l> L!> l!> | | big endian
|
|
1814
|
+
* I!> i!> | |
|
|
1815
|
+
* Q> q> Q!> q!> | | "S>" is same as "n"
|
|
1816
|
+
* J> j> J!> j!> | | "L>" is same as "N"
|
|
1817
|
+
* | |
|
|
1818
|
+
* S< s< S!< s!< | Integer | same as the directives without "<" except
|
|
1819
|
+
* L< l< L!< l!< | | little endian
|
|
1820
|
+
* I!< i!< | |
|
|
1821
|
+
* Q< q< Q!< q!< | | "S<" is same as "v"
|
|
1822
|
+
* J< j< J!< j!< | | "L<" is same as "V"
|
|
1823
|
+
* | |
|
|
1824
|
+
* n | Integer | 16-bit unsigned, network (big-endian) byte order
|
|
1825
|
+
* N | Integer | 32-bit unsigned, network (big-endian) byte order
|
|
1826
|
+
* v | Integer | 16-bit unsigned, VAX (little-endian) byte order
|
|
1827
|
+
* V | Integer | 32-bit unsigned, VAX (little-endian) byte order
|
|
1828
|
+
* | |
|
|
1829
|
+
* U | Integer | UTF-8 character
|
|
1830
|
+
* w | Integer | BER-compressed integer (see Array.pack)
|
|
1831
|
+
*
|
|
1832
|
+
* Float | |
|
|
1833
|
+
* Directive | Returns | Meaning
|
|
1834
|
+
* -----------------------------------------------------------------
|
|
1835
|
+
* D d | Float | double-precision, native format
|
|
1836
|
+
* F f | Float | single-precision, native format
|
|
1837
|
+
* E | Float | double-precision, little-endian byte order
|
|
1838
|
+
* e | Float | single-precision, little-endian byte order
|
|
1839
|
+
* G | Float | double-precision, network (big-endian) byte order
|
|
1840
|
+
* g | Float | single-precision, network (big-endian) byte order
|
|
1841
|
+
*
|
|
1842
|
+
* String | |
|
|
1843
|
+
* Directive | Returns | Meaning
|
|
1844
|
+
* -----------------------------------------------------------------
|
|
1845
|
+
* A | String | arbitrary binary string (remove trailing nulls and ASCII spaces)
|
|
1846
|
+
* a | String | arbitrary binary string
|
|
1847
|
+
* Z | String | null-terminated string
|
|
1848
|
+
* B | String | bit string (MSB first)
|
|
1849
|
+
* b | String | bit string (LSB first)
|
|
1850
|
+
* H | String | hex string (high nibble first)
|
|
1851
|
+
* h | String | hex string (low nibble first)
|
|
1852
|
+
* u | String | UU-encoded string
|
|
1853
|
+
* M | String | quoted-printable, MIME encoding (see RFC2045)
|
|
1854
|
+
* m | String | base64 encoded string (RFC 2045) (default)
|
|
1855
|
+
* | | base64 encoded string (RFC 4648) if followed by 0
|
|
1856
|
+
* P | String | pointer to a structure (fixed-length string)
|
|
1857
|
+
* p | String | pointer to a null-terminated string
|
|
1858
|
+
*
|
|
1859
|
+
* Misc. | |
|
|
1860
|
+
* Directive | Returns | Meaning
|
|
1861
|
+
* -----------------------------------------------------------------
|
|
1862
|
+
* @ | --- | skip to the offset given by the length argument
|
|
1863
|
+
* X | --- | skip backward one byte
|
|
1864
|
+
* x | --- | skip forward one byte
|
|
1865
|
+
*
|
|
1866
|
+
* HISTORY
|
|
1867
|
+
*
|
|
1868
|
+
* * J, J! j, and j! are available since Ruby 2.3.
|
|
1869
|
+
* * Q_, Q!, q_, and q! are available since Ruby 2.1.
|
|
1870
|
+
* * I!<, i!<, I!>, and i!> are available since Ruby 1.9.3.
|
|
1871
|
+
*/
|
|
1872
|
+
|
|
1873
|
+
static VALUE
|
|
1874
|
+
pack_unpack(VALUE str, VALUE fmt)
|
|
1875
|
+
{
|
|
1876
|
+
int mode = rb_block_given_p() ? UNPACK_BLOCK : UNPACK_ARRAY;
|
|
1877
|
+
return pack_unpack_internal(str, fmt, mode);
|
|
1878
|
+
}
|
|
1879
|
+
|
|
1880
|
+
/*
|
|
1881
|
+
* call-seq:
|
|
1882
|
+
* str.unpack1(format) -> obj
|
|
1883
|
+
*
|
|
1884
|
+
* Decodes <i>str</i> (which may contain binary data) according to the
|
|
1885
|
+
* format string, returning the first value extracted.
|
|
1886
|
+
* See also <code>String#unpack</code>, <code>Array#pack</code>.
|
|
1887
|
+
*/
|
|
1888
|
+
|
|
1889
|
+
static VALUE
|
|
1890
|
+
pack_unpack1(VALUE str, VALUE fmt)
|
|
1891
|
+
{
|
|
1892
|
+
return pack_unpack_internal(str, fmt, UNPACK_1);
|
|
1893
|
+
}
|
|
1894
|
+
|
|
997
1895
|
int
|
|
998
1896
|
rb_uv_to_utf8(char buf[6], unsigned long uv)
|
|
999
1897
|
{
|
|
@@ -1100,3 +1998,13 @@ utf8_to_uv(const char *p, long *lenp)
|
|
|
1100
1998
|
}
|
|
1101
1999
|
return uv;
|
|
1102
2000
|
}
|
|
2001
|
+
|
|
2002
|
+
void
|
|
2003
|
+
Init_pack(void)
|
|
2004
|
+
{
|
|
2005
|
+
rb_define_method(rb_cArray, "pack", pack_pack, -1);
|
|
2006
|
+
rb_define_method(rb_cString, "unpack", pack_unpack, 1);
|
|
2007
|
+
rb_define_method(rb_cString, "unpack1", pack_unpack1, 1);
|
|
2008
|
+
|
|
2009
|
+
id_associated = rb_make_internal_id();
|
|
2010
|
+
}
|