zscan 2.0.6 → 2.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/extconf.rb +9 -1
- data/ext/pack/builtin-27.h +79 -0
- data/ext/pack/builtin.h +79 -0
- data/ext/pack/internal-27.h +96 -0
- data/ext/pack/internal-27/array.h +103 -0
- data/ext/pack/internal-27/bignum.h +246 -0
- data/ext/pack/internal-27/bits.h +566 -0
- data/ext/pack/internal-27/class.h +162 -0
- data/ext/pack/internal-27/compar.h +50 -0
- data/ext/pack/internal-27/compile.h +32 -0
- data/ext/pack/internal-27/compilers.h +108 -0
- data/ext/pack/internal-27/complex.h +29 -0
- data/ext/pack/internal-27/cont.h +23 -0
- data/ext/pack/internal-27/dir.h +17 -0
- data/ext/pack/internal-27/enc.h +20 -0
- data/ext/pack/internal-27/encoding.h +28 -0
- data/ext/pack/internal-27/enum.h +19 -0
- data/ext/pack/internal-27/enumerator.h +22 -0
- data/ext/pack/internal-27/error.h +129 -0
- data/ext/pack/internal-27/eval.h +33 -0
- data/ext/pack/internal-27/file.h +39 -0
- data/ext/pack/internal-27/fixnum.h +185 -0
- data/ext/pack/internal-27/gc.h +153 -0
- data/ext/pack/internal-27/hash.h +234 -0
- data/ext/pack/internal-27/imemo.h +242 -0
- data/ext/pack/internal-27/inits.h +51 -0
- data/ext/pack/internal-27/io.h +35 -0
- data/ext/pack/internal-27/load.h +19 -0
- data/ext/pack/internal-27/loadpath.h +17 -0
- data/ext/pack/internal-27/math.h +24 -0
- data/ext/pack/internal-27/missing.h +19 -0
- data/ext/pack/internal-27/mjit.h +29 -0
- data/ext/pack/internal-27/numeric.h +249 -0
- data/ext/pack/internal-27/object.h +60 -0
- data/ext/pack/internal-27/parse.h +23 -0
- data/ext/pack/internal-27/proc.h +32 -0
- data/ext/pack/internal-27/process.h +130 -0
- data/ext/pack/internal-27/random.h +17 -0
- data/ext/pack/internal-27/range.h +37 -0
- data/ext/pack/internal-27/rational.h +68 -0
- data/ext/pack/internal-27/re.h +29 -0
- data/ext/pack/internal-27/sanitizers.h +191 -0
- data/ext/pack/internal-27/serial.h +24 -0
- data/ext/pack/internal-27/signal.h +22 -0
- data/ext/pack/internal-27/static_assert.h +17 -0
- data/ext/pack/internal-27/string.h +135 -0
- data/ext/pack/internal-27/struct.h +154 -0
- data/ext/pack/internal-27/symbol.h +41 -0
- data/ext/pack/internal-27/thread.h +52 -0
- data/ext/pack/internal-27/time.h +35 -0
- data/ext/pack/internal-27/transcode.h +21 -0
- data/ext/pack/internal-27/util.h +31 -0
- data/ext/pack/internal-27/variable.h +92 -0
- data/ext/pack/internal-27/vm.h +127 -0
- data/ext/pack/internal-27/warnings.h +17 -0
- data/ext/pack/internal.h +57 -2102
- data/ext/pack/internal/array.h +103 -0
- data/ext/pack/internal/bignum.h +246 -0
- data/ext/pack/internal/bits.h +566 -0
- data/ext/pack/internal/class.h +162 -0
- data/ext/pack/internal/compar.h +50 -0
- data/ext/pack/internal/compile.h +32 -0
- data/ext/pack/internal/compilers.h +108 -0
- data/ext/pack/internal/complex.h +29 -0
- data/ext/pack/internal/cont.h +23 -0
- data/ext/pack/internal/dir.h +17 -0
- data/ext/pack/internal/enc.h +20 -0
- data/ext/pack/internal/encoding.h +28 -0
- data/ext/pack/internal/enum.h +19 -0
- data/ext/pack/internal/enumerator.h +22 -0
- data/ext/pack/internal/error.h +129 -0
- data/ext/pack/internal/eval.h +33 -0
- data/ext/pack/internal/file.h +39 -0
- data/ext/pack/internal/fixnum.h +185 -0
- data/ext/pack/internal/gc.h +153 -0
- data/ext/pack/internal/hash.h +234 -0
- data/ext/pack/internal/imemo.h +242 -0
- data/ext/pack/internal/inits.h +51 -0
- data/ext/pack/internal/io.h +35 -0
- data/ext/pack/internal/load.h +19 -0
- data/ext/pack/internal/loadpath.h +17 -0
- data/ext/pack/internal/math.h +24 -0
- data/ext/pack/internal/missing.h +19 -0
- data/ext/pack/internal/mjit.h +29 -0
- data/ext/pack/internal/numeric.h +249 -0
- data/ext/pack/internal/object.h +60 -0
- data/ext/pack/internal/parse.h +23 -0
- data/ext/pack/internal/proc.h +32 -0
- data/ext/pack/internal/process.h +130 -0
- data/ext/pack/internal/random.h +17 -0
- data/ext/pack/internal/range.h +37 -0
- data/ext/pack/internal/rational.h +68 -0
- data/ext/pack/internal/re.h +29 -0
- data/ext/pack/internal/sanitizers.h +191 -0
- data/ext/pack/internal/serial.h +24 -0
- data/ext/pack/internal/signal.h +22 -0
- data/ext/pack/internal/static_assert.h +17 -0
- data/ext/pack/internal/string.h +135 -0
- data/ext/pack/internal/struct.h +154 -0
- data/ext/pack/internal/symbol.h +41 -0
- data/ext/pack/internal/thread.h +52 -0
- data/ext/pack/internal/time.h +35 -0
- data/ext/pack/internal/transcode.h +21 -0
- data/ext/pack/internal/util.h +31 -0
- data/ext/pack/internal/variable.h +92 -0
- data/ext/pack/internal/vm.h +127 -0
- data/ext/pack/internal/warnings.h +17 -0
- data/ext/pack/pack-26.c +2 -109
- data/ext/pack/pack-27.c +1022 -0
- data/ext/pack/pack.c +57 -1047
- data/lib/zscan.rb +1 -1
- data/zscan.gemspec +1 -1
- metadata +109 -3
data/ext/pack/pack-27.c
ADDED
@@ -0,0 +1,1022 @@
|
|
1
|
+
/**********************************************************************
|
2
|
+
|
3
|
+
pack.c -
|
4
|
+
|
5
|
+
$Author$
|
6
|
+
created at: Thu Feb 10 15:17:05 JST 1994
|
7
|
+
|
8
|
+
Copyright (C) 1993-2007 Yukihiro Matsumoto
|
9
|
+
|
10
|
+
**********************************************************************/
|
11
|
+
|
12
|
+
#include "ruby/internal/config.h"
|
13
|
+
|
14
|
+
#include <ctype.h>
|
15
|
+
#include <errno.h>
|
16
|
+
#include <float.h>
|
17
|
+
#include <sys/types.h>
|
18
|
+
|
19
|
+
#include "internal.h"
|
20
|
+
#include "internal/bits.h"
|
21
|
+
#include "internal/string.h"
|
22
|
+
#include "internal/symbol.h"
|
23
|
+
#include "internal/util.h"
|
24
|
+
|
25
|
+
// #include "internal/variable.h"
|
26
|
+
VALUE rb_ivar_lookup(VALUE obj, ID id, VALUE undef);
|
27
|
+
|
28
|
+
#include "builtin.h"
|
29
|
+
|
30
|
+
/*
|
31
|
+
* It is intentional that the condition for natstr is HAVE_TRUE_LONG_LONG
|
32
|
+
* instead of HAVE_LONG_LONG or LONG_LONG.
|
33
|
+
* This means q! and Q! means always the standard long long type and
|
34
|
+
* causes ArgumentError for platforms which has no long long type,
|
35
|
+
* even if the platform has an implementation specific 64bit type.
|
36
|
+
* This behavior is consistent with the document of pack/unpack.
|
37
|
+
*/
|
38
|
+
#ifdef HAVE_TRUE_LONG_LONG
|
39
|
+
static const char natstr[] = "sSiIlLqQjJ";
|
40
|
+
#else
|
41
|
+
static const char natstr[] = "sSiIlLjJ";
|
42
|
+
#endif
|
43
|
+
static const char endstr[] = "sSiIlLqQjJ";
|
44
|
+
|
45
|
+
#ifdef HAVE_TRUE_LONG_LONG
|
46
|
+
/* It is intentional to use long long instead of LONG_LONG. */
|
47
|
+
# define NATINT_LEN_Q NATINT_LEN(long long, 8)
|
48
|
+
#else
|
49
|
+
# define NATINT_LEN_Q 8
|
50
|
+
#endif
|
51
|
+
|
52
|
+
#if SIZEOF_SHORT != 2 || SIZEOF_LONG != 4 || (defined(HAVE_TRUE_LONG_LONG) && SIZEOF_LONG_LONG != 8)
|
53
|
+
# define NATINT_PACK
|
54
|
+
#endif
|
55
|
+
|
56
|
+
#ifdef DYNAMIC_ENDIAN
|
57
|
+
/* for universal binary of NEXTSTEP and MacOS X */
|
58
|
+
/* useless since autoconf 2.63? */
|
59
|
+
static int
|
60
|
+
is_bigendian(void)
|
61
|
+
{
|
62
|
+
static int init = 0;
|
63
|
+
static int endian_value;
|
64
|
+
char *p;
|
65
|
+
|
66
|
+
if (init) return endian_value;
|
67
|
+
init = 1;
|
68
|
+
p = (char*)&init;
|
69
|
+
return endian_value = p[0]?0:1;
|
70
|
+
}
|
71
|
+
# define BIGENDIAN_P() (is_bigendian())
|
72
|
+
#elif defined(WORDS_BIGENDIAN)
|
73
|
+
# define BIGENDIAN_P() 1
|
74
|
+
#else
|
75
|
+
# define BIGENDIAN_P() 0
|
76
|
+
#endif
|
77
|
+
|
78
|
+
#ifdef NATINT_PACK
|
79
|
+
# define NATINT_LEN(type,len) (natint?(int)sizeof(type):(int)(len))
|
80
|
+
#else
|
81
|
+
# define NATINT_LEN(type,len) ((int)sizeof(type))
|
82
|
+
#endif
|
83
|
+
|
84
|
+
typedef union {
|
85
|
+
float f;
|
86
|
+
uint32_t u;
|
87
|
+
char buf[4];
|
88
|
+
} FLOAT_SWAPPER;
|
89
|
+
typedef union {
|
90
|
+
double d;
|
91
|
+
uint64_t u;
|
92
|
+
char buf[8];
|
93
|
+
} DOUBLE_SWAPPER;
|
94
|
+
#define swapf(x) swap32(x)
|
95
|
+
#define swapd(x) swap64(x)
|
96
|
+
|
97
|
+
#define rb_ntohf(x) (BIGENDIAN_P()?(x):swapf(x))
|
98
|
+
#define rb_ntohd(x) (BIGENDIAN_P()?(x):swapd(x))
|
99
|
+
#define rb_htonf(x) (BIGENDIAN_P()?(x):swapf(x))
|
100
|
+
#define rb_htond(x) (BIGENDIAN_P()?(x):swapd(x))
|
101
|
+
#define rb_htovf(x) (BIGENDIAN_P()?swapf(x):(x))
|
102
|
+
#define rb_htovd(x) (BIGENDIAN_P()?swapd(x):(x))
|
103
|
+
#define rb_vtohf(x) (BIGENDIAN_P()?swapf(x):(x))
|
104
|
+
#define rb_vtohd(x) (BIGENDIAN_P()?swapd(x):(x))
|
105
|
+
|
106
|
+
#define FLOAT_CONVWITH(x) FLOAT_SWAPPER x;
|
107
|
+
#define HTONF(x) ((x).u = rb_htonf((x).u))
|
108
|
+
#define HTOVF(x) ((x).u = rb_htovf((x).u))
|
109
|
+
#define NTOHF(x) ((x).u = rb_ntohf((x).u))
|
110
|
+
#define VTOHF(x) ((x).u = rb_vtohf((x).u))
|
111
|
+
|
112
|
+
#define DOUBLE_CONVWITH(x) DOUBLE_SWAPPER x;
|
113
|
+
#define HTOND(x) ((x).u = rb_htond((x).u))
|
114
|
+
#define HTOVD(x) ((x).u = rb_htovd((x).u))
|
115
|
+
#define NTOHD(x) ((x).u = rb_ntohd((x).u))
|
116
|
+
#define VTOHD(x) ((x).u = rb_vtohd((x).u))
|
117
|
+
|
118
|
+
#define MAX_INTEGER_PACK_SIZE 8
|
119
|
+
|
120
|
+
static unsigned long utf8_to_uv(const char*,long*);
|
121
|
+
|
122
|
+
static ID id_associated;
|
123
|
+
|
124
|
+
static void
|
125
|
+
str_associate(VALUE str, VALUE add)
|
126
|
+
{
|
127
|
+
/* assert(NIL_P(rb_attr_get(str, id_associated))); */
|
128
|
+
rb_ivar_set(str, id_associated, add);
|
129
|
+
}
|
130
|
+
|
131
|
+
static VALUE
|
132
|
+
str_associated(VALUE str)
|
133
|
+
{
|
134
|
+
return rb_ivar_lookup(str, id_associated, Qfalse);
|
135
|
+
}
|
136
|
+
|
137
|
+
static void
|
138
|
+
unknown_directive(const char *mode, char type, VALUE fmt)
|
139
|
+
{
|
140
|
+
VALUE f;
|
141
|
+
char unknown[5];
|
142
|
+
|
143
|
+
if (ISPRINT(type)) {
|
144
|
+
unknown[0] = type;
|
145
|
+
unknown[1] = '\0';
|
146
|
+
}
|
147
|
+
else {
|
148
|
+
snprintf(unknown, sizeof(unknown), "\\x%.2x", type & 0xff);
|
149
|
+
}
|
150
|
+
f = rb_str_quote_unprintable(fmt);
|
151
|
+
if (f != fmt) {
|
152
|
+
fmt = rb_str_subseq(f, 1, RSTRING_LEN(f) - 2);
|
153
|
+
}
|
154
|
+
rb_warning("unknown %s directive '%s' in '%"PRIsVALUE"'",
|
155
|
+
mode, unknown, fmt);
|
156
|
+
}
|
157
|
+
|
158
|
+
static const char b64_table[] =
|
159
|
+
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
|
160
|
+
|
161
|
+
static inline int
|
162
|
+
hex2num(char c)
|
163
|
+
{
|
164
|
+
int n;
|
165
|
+
n = ruby_digit36_to_number_table[(unsigned char)c];
|
166
|
+
if (16 <= n)
|
167
|
+
n = -1;
|
168
|
+
return n;
|
169
|
+
}
|
170
|
+
|
171
|
+
#define PACK_LENGTH_ADJUST_SIZE(sz) do { \
|
172
|
+
tmp_len = 0; \
|
173
|
+
if (len > (long)((send-s)/(sz))) { \
|
174
|
+
if (!star) { \
|
175
|
+
tmp_len = len-(send-s)/(sz); \
|
176
|
+
} \
|
177
|
+
len = (send-s)/(sz); \
|
178
|
+
} \
|
179
|
+
} while (0)
|
180
|
+
|
181
|
+
#define PACK_ITEM_ADJUST() do { \
|
182
|
+
if (tmp_len > 0 && mode == UNPACK_ARRAY) \
|
183
|
+
rb_ary_store(ary, RARRAY_LEN(ary)+tmp_len-1, Qnil); \
|
184
|
+
} while (0)
|
185
|
+
|
186
|
+
/* Workaround for Oracle Developer Studio (Oracle Solaris Studio)
|
187
|
+
* 12.4/12.5/12.6 C compiler optimization bug
|
188
|
+
* with "-xO4" optimization option.
|
189
|
+
*/
|
190
|
+
#if defined(__SUNPRO_C) && 0x5130 <= __SUNPRO_C && __SUNPRO_C <= 0x5150
|
191
|
+
# define AVOID_CC_BUG volatile
|
192
|
+
#else
|
193
|
+
# define AVOID_CC_BUG
|
194
|
+
#endif
|
195
|
+
|
196
|
+
/* unpack mode */
|
197
|
+
#define UNPACK_ARRAY 0
|
198
|
+
#define UNPACK_BLOCK 1
|
199
|
+
#define UNPACK_1 2
|
200
|
+
|
201
|
+
#define castchar(from) (char)((from) & 0xff)
|
202
|
+
|
203
|
+
VALUE zscan_internal_unpack(VALUE str, VALUE fmt, long* parsed_len)
|
204
|
+
{
|
205
|
+
char* init_s;
|
206
|
+
int mode = UNPACK_ARRAY;
|
207
|
+
#define hexdigits ruby_hexdigits
|
208
|
+
char *s, *send;
|
209
|
+
char *p, *pend;
|
210
|
+
VALUE ary;
|
211
|
+
char type;
|
212
|
+
long len;
|
213
|
+
AVOID_CC_BUG long tmp_len;
|
214
|
+
int star;
|
215
|
+
#ifdef NATINT_PACK
|
216
|
+
int natint; /* native integer */
|
217
|
+
#endif
|
218
|
+
int signed_p, integer_size, bigendian_p;
|
219
|
+
#define UNPACK_PUSH(item) do {\
|
220
|
+
VALUE item_val = (item);\
|
221
|
+
if ((mode) == UNPACK_BLOCK) {\
|
222
|
+
rb_yield(item_val);\
|
223
|
+
}\
|
224
|
+
else if ((mode) == UNPACK_ARRAY) {\
|
225
|
+
rb_ary_push(ary, item_val);\
|
226
|
+
}\
|
227
|
+
else /* if ((mode) == UNPACK_1) { */ {\
|
228
|
+
return item_val; \
|
229
|
+
}\
|
230
|
+
} while (0)
|
231
|
+
|
232
|
+
StringValue(str);
|
233
|
+
StringValue(fmt);
|
234
|
+
init_s = s = RSTRING_PTR(str);
|
235
|
+
send = s + RSTRING_LEN(str);
|
236
|
+
p = RSTRING_PTR(fmt);
|
237
|
+
pend = p + RSTRING_LEN(fmt);
|
238
|
+
|
239
|
+
ary = mode == UNPACK_ARRAY ? rb_ary_new() : Qnil;
|
240
|
+
while (p < pend) {
|
241
|
+
int explicit_endian = 0;
|
242
|
+
type = *p++;
|
243
|
+
#ifdef NATINT_PACK
|
244
|
+
natint = 0;
|
245
|
+
#endif
|
246
|
+
|
247
|
+
if (ISSPACE(type)) continue;
|
248
|
+
if (type == '#') {
|
249
|
+
while ((p < pend) && (*p != '\n')) {
|
250
|
+
p++;
|
251
|
+
}
|
252
|
+
continue;
|
253
|
+
}
|
254
|
+
|
255
|
+
star = 0;
|
256
|
+
{
|
257
|
+
modifiers:
|
258
|
+
switch (*p) {
|
259
|
+
case '_':
|
260
|
+
case '!':
|
261
|
+
|
262
|
+
if (strchr(natstr, type)) {
|
263
|
+
#ifdef NATINT_PACK
|
264
|
+
natint = 1;
|
265
|
+
#endif
|
266
|
+
p++;
|
267
|
+
}
|
268
|
+
else {
|
269
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, natstr);
|
270
|
+
}
|
271
|
+
goto modifiers;
|
272
|
+
|
273
|
+
case '<':
|
274
|
+
case '>':
|
275
|
+
if (!strchr(endstr, type)) {
|
276
|
+
rb_raise(rb_eArgError, "'%c' allowed only after types %s", *p, endstr);
|
277
|
+
}
|
278
|
+
if (explicit_endian) {
|
279
|
+
rb_raise(rb_eRangeError, "Can't use both '<' and '>'");
|
280
|
+
}
|
281
|
+
explicit_endian = *p++;
|
282
|
+
goto modifiers;
|
283
|
+
}
|
284
|
+
}
|
285
|
+
|
286
|
+
if (p >= pend)
|
287
|
+
len = 1;
|
288
|
+
else if (*p == '*') {
|
289
|
+
star = 1;
|
290
|
+
len = send - s;
|
291
|
+
p++;
|
292
|
+
}
|
293
|
+
else if (ISDIGIT(*p)) {
|
294
|
+
errno = 0;
|
295
|
+
len = STRTOUL(p, (char**)&p, 10);
|
296
|
+
if (len < 0 || errno) {
|
297
|
+
rb_raise(rb_eRangeError, "pack length too big");
|
298
|
+
}
|
299
|
+
}
|
300
|
+
else {
|
301
|
+
len = (type != '@');
|
302
|
+
}
|
303
|
+
|
304
|
+
switch (type) {
|
305
|
+
case '%':
|
306
|
+
rb_raise(rb_eArgError, "%% is not supported");
|
307
|
+
break;
|
308
|
+
|
309
|
+
case 'A':
|
310
|
+
if (len > send - s) len = send - s;
|
311
|
+
{
|
312
|
+
long end = len;
|
313
|
+
char *t = s + len - 1;
|
314
|
+
|
315
|
+
while (t >= s) {
|
316
|
+
if (*t != ' ' && *t != '\0') break;
|
317
|
+
t--; len--;
|
318
|
+
}
|
319
|
+
UNPACK_PUSH(rb_str_new(s, len));
|
320
|
+
s += end;
|
321
|
+
}
|
322
|
+
break;
|
323
|
+
|
324
|
+
case 'Z':
|
325
|
+
{
|
326
|
+
char *t = s;
|
327
|
+
|
328
|
+
if (len > send-s) len = send-s;
|
329
|
+
while (t < s+len && *t) t++;
|
330
|
+
UNPACK_PUSH(rb_str_new(s, t-s));
|
331
|
+
if (t < send) t++;
|
332
|
+
s = star ? t : s+len;
|
333
|
+
}
|
334
|
+
break;
|
335
|
+
|
336
|
+
case 'a':
|
337
|
+
if (len > send - s) len = send - s;
|
338
|
+
UNPACK_PUSH(rb_str_new(s, len));
|
339
|
+
s += len;
|
340
|
+
break;
|
341
|
+
|
342
|
+
case 'b':
|
343
|
+
{
|
344
|
+
VALUE bitstr;
|
345
|
+
char *t;
|
346
|
+
int bits;
|
347
|
+
long i;
|
348
|
+
|
349
|
+
if (p[-1] == '*' || len > (send - s) * 8)
|
350
|
+
len = (send - s) * 8;
|
351
|
+
bits = 0;
|
352
|
+
bitstr = rb_usascii_str_new(0, len);
|
353
|
+
t = RSTRING_PTR(bitstr);
|
354
|
+
for (i=0; i<len; i++) {
|
355
|
+
if (i & 7) bits >>= 1;
|
356
|
+
else bits = (unsigned char)*s++;
|
357
|
+
*t++ = (bits & 1) ? '1' : '0';
|
358
|
+
}
|
359
|
+
UNPACK_PUSH(bitstr);
|
360
|
+
}
|
361
|
+
break;
|
362
|
+
|
363
|
+
case 'B':
|
364
|
+
{
|
365
|
+
VALUE bitstr;
|
366
|
+
char *t;
|
367
|
+
int bits;
|
368
|
+
long i;
|
369
|
+
|
370
|
+
if (p[-1] == '*' || len > (send - s) * 8)
|
371
|
+
len = (send - s) * 8;
|
372
|
+
bits = 0;
|
373
|
+
bitstr = rb_usascii_str_new(0, len);
|
374
|
+
t = RSTRING_PTR(bitstr);
|
375
|
+
for (i=0; i<len; i++) {
|
376
|
+
if (i & 7) bits <<= 1;
|
377
|
+
else bits = (unsigned char)*s++;
|
378
|
+
*t++ = (bits & 128) ? '1' : '0';
|
379
|
+
}
|
380
|
+
UNPACK_PUSH(bitstr);
|
381
|
+
}
|
382
|
+
break;
|
383
|
+
|
384
|
+
case 'h':
|
385
|
+
{
|
386
|
+
VALUE bitstr;
|
387
|
+
char *t;
|
388
|
+
int bits;
|
389
|
+
long i;
|
390
|
+
|
391
|
+
if (p[-1] == '*' || len > (send - s) * 2)
|
392
|
+
len = (send - s) * 2;
|
393
|
+
bits = 0;
|
394
|
+
bitstr = rb_usascii_str_new(0, len);
|
395
|
+
t = RSTRING_PTR(bitstr);
|
396
|
+
for (i=0; i<len; i++) {
|
397
|
+
if (i & 1)
|
398
|
+
bits >>= 4;
|
399
|
+
else
|
400
|
+
bits = (unsigned char)*s++;
|
401
|
+
*t++ = hexdigits[bits & 15];
|
402
|
+
}
|
403
|
+
UNPACK_PUSH(bitstr);
|
404
|
+
}
|
405
|
+
break;
|
406
|
+
|
407
|
+
case 'H':
|
408
|
+
{
|
409
|
+
VALUE bitstr;
|
410
|
+
char *t;
|
411
|
+
int bits;
|
412
|
+
long i;
|
413
|
+
|
414
|
+
if (p[-1] == '*' || len > (send - s) * 2)
|
415
|
+
len = (send - s) * 2;
|
416
|
+
bits = 0;
|
417
|
+
bitstr = rb_usascii_str_new(0, len);
|
418
|
+
t = RSTRING_PTR(bitstr);
|
419
|
+
for (i=0; i<len; i++) {
|
420
|
+
if (i & 1)
|
421
|
+
bits <<= 4;
|
422
|
+
else
|
423
|
+
bits = (unsigned char)*s++;
|
424
|
+
*t++ = hexdigits[(bits >> 4) & 15];
|
425
|
+
}
|
426
|
+
UNPACK_PUSH(bitstr);
|
427
|
+
}
|
428
|
+
break;
|
429
|
+
|
430
|
+
case 'c':
|
431
|
+
signed_p = 1;
|
432
|
+
integer_size = 1;
|
433
|
+
bigendian_p = BIGENDIAN_P(); /* not effective */
|
434
|
+
goto unpack_integer;
|
435
|
+
|
436
|
+
case 'C':
|
437
|
+
signed_p = 0;
|
438
|
+
integer_size = 1;
|
439
|
+
bigendian_p = BIGENDIAN_P(); /* not effective */
|
440
|
+
goto unpack_integer;
|
441
|
+
|
442
|
+
case 's':
|
443
|
+
signed_p = 1;
|
444
|
+
integer_size = NATINT_LEN(short, 2);
|
445
|
+
bigendian_p = BIGENDIAN_P();
|
446
|
+
goto unpack_integer;
|
447
|
+
|
448
|
+
case 'S':
|
449
|
+
signed_p = 0;
|
450
|
+
integer_size = NATINT_LEN(short, 2);
|
451
|
+
bigendian_p = BIGENDIAN_P();
|
452
|
+
goto unpack_integer;
|
453
|
+
|
454
|
+
case 'i':
|
455
|
+
signed_p = 1;
|
456
|
+
integer_size = (int)sizeof(int);
|
457
|
+
bigendian_p = BIGENDIAN_P();
|
458
|
+
goto unpack_integer;
|
459
|
+
|
460
|
+
case 'I':
|
461
|
+
signed_p = 0;
|
462
|
+
integer_size = (int)sizeof(int);
|
463
|
+
bigendian_p = BIGENDIAN_P();
|
464
|
+
goto unpack_integer;
|
465
|
+
|
466
|
+
case 'l':
|
467
|
+
signed_p = 1;
|
468
|
+
integer_size = NATINT_LEN(long, 4);
|
469
|
+
bigendian_p = BIGENDIAN_P();
|
470
|
+
goto unpack_integer;
|
471
|
+
|
472
|
+
case 'L':
|
473
|
+
signed_p = 0;
|
474
|
+
integer_size = NATINT_LEN(long, 4);
|
475
|
+
bigendian_p = BIGENDIAN_P();
|
476
|
+
goto unpack_integer;
|
477
|
+
|
478
|
+
case 'q':
|
479
|
+
signed_p = 1;
|
480
|
+
integer_size = NATINT_LEN_Q;
|
481
|
+
bigendian_p = BIGENDIAN_P();
|
482
|
+
goto unpack_integer;
|
483
|
+
|
484
|
+
case 'Q':
|
485
|
+
signed_p = 0;
|
486
|
+
integer_size = NATINT_LEN_Q;
|
487
|
+
bigendian_p = BIGENDIAN_P();
|
488
|
+
goto unpack_integer;
|
489
|
+
|
490
|
+
case 'j':
|
491
|
+
signed_p = 1;
|
492
|
+
integer_size = sizeof(intptr_t);
|
493
|
+
bigendian_p = BIGENDIAN_P();
|
494
|
+
goto unpack_integer;
|
495
|
+
|
496
|
+
case 'J':
|
497
|
+
signed_p = 0;
|
498
|
+
integer_size = sizeof(uintptr_t);
|
499
|
+
bigendian_p = BIGENDIAN_P();
|
500
|
+
goto unpack_integer;
|
501
|
+
|
502
|
+
case 'n':
|
503
|
+
signed_p = 0;
|
504
|
+
integer_size = 2;
|
505
|
+
bigendian_p = 1;
|
506
|
+
goto unpack_integer;
|
507
|
+
|
508
|
+
case 'N':
|
509
|
+
signed_p = 0;
|
510
|
+
integer_size = 4;
|
511
|
+
bigendian_p = 1;
|
512
|
+
goto unpack_integer;
|
513
|
+
|
514
|
+
case 'v':
|
515
|
+
signed_p = 0;
|
516
|
+
integer_size = 2;
|
517
|
+
bigendian_p = 0;
|
518
|
+
goto unpack_integer;
|
519
|
+
|
520
|
+
case 'V':
|
521
|
+
signed_p = 0;
|
522
|
+
integer_size = 4;
|
523
|
+
bigendian_p = 0;
|
524
|
+
goto unpack_integer;
|
525
|
+
|
526
|
+
unpack_integer:
|
527
|
+
if (explicit_endian) {
|
528
|
+
bigendian_p = explicit_endian == '>';
|
529
|
+
}
|
530
|
+
PACK_LENGTH_ADJUST_SIZE(integer_size);
|
531
|
+
while (len-- > 0) {
|
532
|
+
int flags = bigendian_p ? INTEGER_PACK_BIG_ENDIAN : INTEGER_PACK_LITTLE_ENDIAN;
|
533
|
+
VALUE val;
|
534
|
+
if (signed_p)
|
535
|
+
flags |= INTEGER_PACK_2COMP;
|
536
|
+
val = rb_integer_unpack(s, integer_size, 1, 0, flags);
|
537
|
+
UNPACK_PUSH(val);
|
538
|
+
s += integer_size;
|
539
|
+
}
|
540
|
+
PACK_ITEM_ADJUST();
|
541
|
+
break;
|
542
|
+
|
543
|
+
case 'f':
|
544
|
+
case 'F':
|
545
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
546
|
+
while (len-- > 0) {
|
547
|
+
float tmp;
|
548
|
+
memcpy(&tmp, s, sizeof(float));
|
549
|
+
s += sizeof(float);
|
550
|
+
UNPACK_PUSH(DBL2NUM((double)tmp));
|
551
|
+
}
|
552
|
+
PACK_ITEM_ADJUST();
|
553
|
+
break;
|
554
|
+
|
555
|
+
case 'e':
|
556
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
557
|
+
while (len-- > 0) {
|
558
|
+
FLOAT_CONVWITH(tmp);
|
559
|
+
memcpy(tmp.buf, s, sizeof(float));
|
560
|
+
s += sizeof(float);
|
561
|
+
VTOHF(tmp);
|
562
|
+
UNPACK_PUSH(DBL2NUM(tmp.f));
|
563
|
+
}
|
564
|
+
PACK_ITEM_ADJUST();
|
565
|
+
break;
|
566
|
+
|
567
|
+
case 'E':
|
568
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
569
|
+
while (len-- > 0) {
|
570
|
+
DOUBLE_CONVWITH(tmp);
|
571
|
+
memcpy(tmp.buf, s, sizeof(double));
|
572
|
+
s += sizeof(double);
|
573
|
+
VTOHD(tmp);
|
574
|
+
UNPACK_PUSH(DBL2NUM(tmp.d));
|
575
|
+
}
|
576
|
+
PACK_ITEM_ADJUST();
|
577
|
+
break;
|
578
|
+
|
579
|
+
case 'D':
|
580
|
+
case 'd':
|
581
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
582
|
+
while (len-- > 0) {
|
583
|
+
double tmp;
|
584
|
+
memcpy(&tmp, s, sizeof(double));
|
585
|
+
s += sizeof(double);
|
586
|
+
UNPACK_PUSH(DBL2NUM(tmp));
|
587
|
+
}
|
588
|
+
PACK_ITEM_ADJUST();
|
589
|
+
break;
|
590
|
+
|
591
|
+
case 'g':
|
592
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(float));
|
593
|
+
while (len-- > 0) {
|
594
|
+
FLOAT_CONVWITH(tmp);
|
595
|
+
memcpy(tmp.buf, s, sizeof(float));
|
596
|
+
s += sizeof(float);
|
597
|
+
NTOHF(tmp);
|
598
|
+
UNPACK_PUSH(DBL2NUM(tmp.f));
|
599
|
+
}
|
600
|
+
PACK_ITEM_ADJUST();
|
601
|
+
break;
|
602
|
+
|
603
|
+
case 'G':
|
604
|
+
PACK_LENGTH_ADJUST_SIZE(sizeof(double));
|
605
|
+
while (len-- > 0) {
|
606
|
+
DOUBLE_CONVWITH(tmp);
|
607
|
+
memcpy(tmp.buf, s, sizeof(double));
|
608
|
+
s += sizeof(double);
|
609
|
+
NTOHD(tmp);
|
610
|
+
UNPACK_PUSH(DBL2NUM(tmp.d));
|
611
|
+
}
|
612
|
+
PACK_ITEM_ADJUST();
|
613
|
+
break;
|
614
|
+
|
615
|
+
case 'U':
|
616
|
+
if (len > send - s) len = send - s;
|
617
|
+
while (len > 0 && s < send) {
|
618
|
+
long alen = send - s;
|
619
|
+
unsigned long l;
|
620
|
+
|
621
|
+
l = utf8_to_uv(s, &alen);
|
622
|
+
s += alen; len--;
|
623
|
+
UNPACK_PUSH(ULONG2NUM(l));
|
624
|
+
}
|
625
|
+
break;
|
626
|
+
|
627
|
+
case 'u':
|
628
|
+
{
|
629
|
+
VALUE buf = rb_str_new(0, (send - s)*3/4);
|
630
|
+
char *ptr = RSTRING_PTR(buf);
|
631
|
+
long total = 0;
|
632
|
+
|
633
|
+
while (s < send && (unsigned char)*s > ' ' && (unsigned char)*s < 'a') {
|
634
|
+
long a,b,c,d;
|
635
|
+
char hunk[3];
|
636
|
+
|
637
|
+
len = ((unsigned char)*s++ - ' ') & 077;
|
638
|
+
|
639
|
+
total += len;
|
640
|
+
if (total > RSTRING_LEN(buf)) {
|
641
|
+
len -= total - RSTRING_LEN(buf);
|
642
|
+
total = RSTRING_LEN(buf);
|
643
|
+
}
|
644
|
+
|
645
|
+
while (len > 0) {
|
646
|
+
long mlen = len > 3 ? 3 : len;
|
647
|
+
|
648
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
649
|
+
a = ((unsigned char)*s++ - ' ') & 077;
|
650
|
+
else
|
651
|
+
a = 0;
|
652
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
653
|
+
b = ((unsigned char)*s++ - ' ') & 077;
|
654
|
+
else
|
655
|
+
b = 0;
|
656
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
657
|
+
c = ((unsigned char)*s++ - ' ') & 077;
|
658
|
+
else
|
659
|
+
c = 0;
|
660
|
+
if (s < send && (unsigned char)*s >= ' ' && (unsigned char)*s < 'a')
|
661
|
+
d = ((unsigned char)*s++ - ' ') & 077;
|
662
|
+
else
|
663
|
+
d = 0;
|
664
|
+
hunk[0] = (char)(a << 2 | b >> 4);
|
665
|
+
hunk[1] = (char)(b << 4 | c >> 2);
|
666
|
+
hunk[2] = (char)(c << 6 | d);
|
667
|
+
memcpy(ptr, hunk, mlen);
|
668
|
+
ptr += mlen;
|
669
|
+
len -= mlen;
|
670
|
+
}
|
671
|
+
if (s < send && (unsigned char)*s != '\r' && *s != '\n')
|
672
|
+
s++; /* possible checksum byte */
|
673
|
+
if (s < send && *s == '\r') s++;
|
674
|
+
if (s < send && *s == '\n') s++;
|
675
|
+
}
|
676
|
+
|
677
|
+
rb_str_set_len(buf, total);
|
678
|
+
UNPACK_PUSH(buf);
|
679
|
+
}
|
680
|
+
break;
|
681
|
+
|
682
|
+
case 'm':
|
683
|
+
{
|
684
|
+
VALUE buf = rb_str_new(0, (send - s + 3)*3/4); /* +3 is for skipping paddings */
|
685
|
+
char *ptr = RSTRING_PTR(buf);
|
686
|
+
int a = -1,b = -1,c = 0,d = 0;
|
687
|
+
static signed char b64_xtable[256];
|
688
|
+
|
689
|
+
if (b64_xtable['/'] <= 0) {
|
690
|
+
int i;
|
691
|
+
|
692
|
+
for (i = 0; i < 256; i++) {
|
693
|
+
b64_xtable[i] = -1;
|
694
|
+
}
|
695
|
+
for (i = 0; i < 64; i++) {
|
696
|
+
b64_xtable[(unsigned char)b64_table[i]] = (char)i;
|
697
|
+
}
|
698
|
+
}
|
699
|
+
if (len == 0) {
|
700
|
+
while (s < send) {
|
701
|
+
a = b = c = d = -1;
|
702
|
+
a = b64_xtable[(unsigned char)*s++];
|
703
|
+
if (s >= send || a == -1) rb_raise(rb_eArgError, "invalid base64");
|
704
|
+
b = b64_xtable[(unsigned char)*s++];
|
705
|
+
if (s >= send || b == -1) rb_raise(rb_eArgError, "invalid base64");
|
706
|
+
if (*s == '=') {
|
707
|
+
if (s + 2 == send && *(s + 1) == '=') break;
|
708
|
+
rb_raise(rb_eArgError, "invalid base64");
|
709
|
+
}
|
710
|
+
c = b64_xtable[(unsigned char)*s++];
|
711
|
+
if (s >= send || c == -1) rb_raise(rb_eArgError, "invalid base64");
|
712
|
+
if (s + 1 == send && *s == '=') break;
|
713
|
+
d = b64_xtable[(unsigned char)*s++];
|
714
|
+
if (d == -1) rb_raise(rb_eArgError, "invalid base64");
|
715
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
716
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
717
|
+
*ptr++ = castchar(c << 6 | d);
|
718
|
+
}
|
719
|
+
if (c == -1) {
|
720
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
721
|
+
if (b & 0xf) rb_raise(rb_eArgError, "invalid base64");
|
722
|
+
}
|
723
|
+
else if (d == -1) {
|
724
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
725
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
726
|
+
if (c & 0x3) rb_raise(rb_eArgError, "invalid base64");
|
727
|
+
}
|
728
|
+
}
|
729
|
+
else {
|
730
|
+
while (s < send) {
|
731
|
+
a = b = c = d = -1;
|
732
|
+
while ((a = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
|
733
|
+
if (s >= send) break;
|
734
|
+
s++;
|
735
|
+
while ((b = b64_xtable[(unsigned char)*s]) == -1 && s < send) {s++;}
|
736
|
+
if (s >= send) break;
|
737
|
+
s++;
|
738
|
+
while ((c = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
|
739
|
+
if (*s == '=' || s >= send) break;
|
740
|
+
s++;
|
741
|
+
while ((d = b64_xtable[(unsigned char)*s]) == -1 && s < send) {if (*s == '=') break; s++;}
|
742
|
+
if (*s == '=' || s >= send) break;
|
743
|
+
s++;
|
744
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
745
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
746
|
+
*ptr++ = castchar(c << 6 | d);
|
747
|
+
a = -1;
|
748
|
+
}
|
749
|
+
if (a != -1 && b != -1) {
|
750
|
+
if (c == -1)
|
751
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
752
|
+
else {
|
753
|
+
*ptr++ = castchar(a << 2 | b >> 4);
|
754
|
+
*ptr++ = castchar(b << 4 | c >> 2);
|
755
|
+
}
|
756
|
+
}
|
757
|
+
}
|
758
|
+
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
|
759
|
+
UNPACK_PUSH(buf);
|
760
|
+
}
|
761
|
+
break;
|
762
|
+
|
763
|
+
case 'M':
|
764
|
+
{
|
765
|
+
VALUE buf = rb_str_new(0, send - s);
|
766
|
+
char *ptr = RSTRING_PTR(buf), *ss = s;
|
767
|
+
int csum = 0;
|
768
|
+
int c1, c2;
|
769
|
+
|
770
|
+
while (s < send) {
|
771
|
+
if (*s == '=') {
|
772
|
+
if (++s == send) break;
|
773
|
+
if (s+1 < send && *s == '\r' && *(s+1) == '\n')
|
774
|
+
s++;
|
775
|
+
if (*s != '\n') {
|
776
|
+
if ((c1 = hex2num(*s)) == -1) break;
|
777
|
+
if (++s == send) break;
|
778
|
+
if ((c2 = hex2num(*s)) == -1) break;
|
779
|
+
csum |= *ptr++ = castchar(c1 << 4 | c2);
|
780
|
+
}
|
781
|
+
}
|
782
|
+
else {
|
783
|
+
csum |= *ptr++ = *s;
|
784
|
+
}
|
785
|
+
s++;
|
786
|
+
ss = s;
|
787
|
+
}
|
788
|
+
rb_str_set_len(buf, ptr - RSTRING_PTR(buf));
|
789
|
+
rb_str_buf_cat(buf, ss, send-ss);
|
790
|
+
csum = ISASCII(csum) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID;
|
791
|
+
ENCODING_CODERANGE_SET(buf, rb_ascii8bit_encindex(), csum);
|
792
|
+
UNPACK_PUSH(buf);
|
793
|
+
}
|
794
|
+
break;
|
795
|
+
|
796
|
+
case '@':
|
797
|
+
if (len > RSTRING_LEN(str))
|
798
|
+
rb_raise(rb_eArgError, "@ outside of string");
|
799
|
+
s = RSTRING_PTR(str) + len;
|
800
|
+
break;
|
801
|
+
|
802
|
+
case 'X':
|
803
|
+
if (len > s - RSTRING_PTR(str))
|
804
|
+
rb_raise(rb_eArgError, "X outside of string");
|
805
|
+
s -= len;
|
806
|
+
break;
|
807
|
+
|
808
|
+
case 'x':
|
809
|
+
if (len > send - s)
|
810
|
+
rb_raise(rb_eArgError, "x outside of string");
|
811
|
+
s += len;
|
812
|
+
break;
|
813
|
+
|
814
|
+
case 'P':
|
815
|
+
if (sizeof(char *) <= (size_t)(send - s)) {
|
816
|
+
VALUE tmp = Qnil;
|
817
|
+
char *t;
|
818
|
+
|
819
|
+
memcpy(&t, s, sizeof(char *));
|
820
|
+
s += sizeof(char *);
|
821
|
+
|
822
|
+
if (t) {
|
823
|
+
VALUE a;
|
824
|
+
const VALUE *p, *pend;
|
825
|
+
|
826
|
+
if (!(a = str_associated(str))) {
|
827
|
+
rb_raise(rb_eArgError, "no associated pointer");
|
828
|
+
}
|
829
|
+
p = RARRAY_CONST_PTR(a);
|
830
|
+
pend = p + RARRAY_LEN(a);
|
831
|
+
while (p < pend) {
|
832
|
+
if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
|
833
|
+
if (len < RSTRING_LEN(*p)) {
|
834
|
+
tmp = rb_str_new(t, len);
|
835
|
+
str_associate(tmp, a);
|
836
|
+
}
|
837
|
+
else {
|
838
|
+
tmp = *p;
|
839
|
+
}
|
840
|
+
break;
|
841
|
+
}
|
842
|
+
p++;
|
843
|
+
}
|
844
|
+
if (p == pend) {
|
845
|
+
rb_raise(rb_eArgError, "non associated pointer");
|
846
|
+
}
|
847
|
+
}
|
848
|
+
UNPACK_PUSH(tmp);
|
849
|
+
}
|
850
|
+
break;
|
851
|
+
|
852
|
+
case 'p':
|
853
|
+
if (len > (long)((send - s) / sizeof(char *)))
|
854
|
+
len = (send - s) / sizeof(char *);
|
855
|
+
while (len-- > 0) {
|
856
|
+
if ((size_t)(send - s) < sizeof(char *))
|
857
|
+
break;
|
858
|
+
else {
|
859
|
+
VALUE tmp = Qnil;
|
860
|
+
char *t;
|
861
|
+
|
862
|
+
memcpy(&t, s, sizeof(char *));
|
863
|
+
s += sizeof(char *);
|
864
|
+
|
865
|
+
if (t) {
|
866
|
+
VALUE a;
|
867
|
+
const VALUE *p, *pend;
|
868
|
+
|
869
|
+
if (!(a = str_associated(str))) {
|
870
|
+
rb_raise(rb_eArgError, "no associated pointer");
|
871
|
+
}
|
872
|
+
p = RARRAY_CONST_PTR(a);
|
873
|
+
pend = p + RARRAY_LEN(a);
|
874
|
+
while (p < pend) {
|
875
|
+
if (RB_TYPE_P(*p, T_STRING) && RSTRING_PTR(*p) == t) {
|
876
|
+
tmp = *p;
|
877
|
+
break;
|
878
|
+
}
|
879
|
+
p++;
|
880
|
+
}
|
881
|
+
if (p == pend) {
|
882
|
+
rb_raise(rb_eArgError, "non associated pointer");
|
883
|
+
}
|
884
|
+
}
|
885
|
+
UNPACK_PUSH(tmp);
|
886
|
+
}
|
887
|
+
}
|
888
|
+
break;
|
889
|
+
|
890
|
+
case 'w':
|
891
|
+
{
|
892
|
+
char *s0 = s;
|
893
|
+
while (len > 0 && s < send) {
|
894
|
+
if (*s & 0x80) {
|
895
|
+
s++;
|
896
|
+
}
|
897
|
+
else {
|
898
|
+
s++;
|
899
|
+
UNPACK_PUSH(rb_integer_unpack(s0, s-s0, 1, 1, INTEGER_PACK_BIG_ENDIAN));
|
900
|
+
len--;
|
901
|
+
s0 = s;
|
902
|
+
}
|
903
|
+
}
|
904
|
+
}
|
905
|
+
break;
|
906
|
+
|
907
|
+
default:
|
908
|
+
unknown_directive("unpack", type, fmt);
|
909
|
+
break;
|
910
|
+
}
|
911
|
+
}
|
912
|
+
|
913
|
+
*parsed_len = s - init_s;
|
914
|
+
return ary;
|
915
|
+
}
|
916
|
+
|
917
|
+
int
|
918
|
+
srb_uv_to_utf8(char buf[6], unsigned long uv)
|
919
|
+
{
|
920
|
+
if (uv <= 0x7f) {
|
921
|
+
buf[0] = (char)uv;
|
922
|
+
return 1;
|
923
|
+
}
|
924
|
+
if (uv <= 0x7ff) {
|
925
|
+
buf[0] = castchar(((uv>>6)&0xff)|0xc0);
|
926
|
+
buf[1] = castchar((uv&0x3f)|0x80);
|
927
|
+
return 2;
|
928
|
+
}
|
929
|
+
if (uv <= 0xffff) {
|
930
|
+
buf[0] = castchar(((uv>>12)&0xff)|0xe0);
|
931
|
+
buf[1] = castchar(((uv>>6)&0x3f)|0x80);
|
932
|
+
buf[2] = castchar((uv&0x3f)|0x80);
|
933
|
+
return 3;
|
934
|
+
}
|
935
|
+
if (uv <= 0x1fffff) {
|
936
|
+
buf[0] = castchar(((uv>>18)&0xff)|0xf0);
|
937
|
+
buf[1] = castchar(((uv>>12)&0x3f)|0x80);
|
938
|
+
buf[2] = castchar(((uv>>6)&0x3f)|0x80);
|
939
|
+
buf[3] = castchar((uv&0x3f)|0x80);
|
940
|
+
return 4;
|
941
|
+
}
|
942
|
+
if (uv <= 0x3ffffff) {
|
943
|
+
buf[0] = castchar(((uv>>24)&0xff)|0xf8);
|
944
|
+
buf[1] = castchar(((uv>>18)&0x3f)|0x80);
|
945
|
+
buf[2] = castchar(((uv>>12)&0x3f)|0x80);
|
946
|
+
buf[3] = castchar(((uv>>6)&0x3f)|0x80);
|
947
|
+
buf[4] = castchar((uv&0x3f)|0x80);
|
948
|
+
return 5;
|
949
|
+
}
|
950
|
+
if (uv <= 0x7fffffff) {
|
951
|
+
buf[0] = castchar(((uv>>30)&0xff)|0xfc);
|
952
|
+
buf[1] = castchar(((uv>>24)&0x3f)|0x80);
|
953
|
+
buf[2] = castchar(((uv>>18)&0x3f)|0x80);
|
954
|
+
buf[3] = castchar(((uv>>12)&0x3f)|0x80);
|
955
|
+
buf[4] = castchar(((uv>>6)&0x3f)|0x80);
|
956
|
+
buf[5] = castchar((uv&0x3f)|0x80);
|
957
|
+
return 6;
|
958
|
+
}
|
959
|
+
rb_raise(rb_eRangeError, "pack(U): value out of range");
|
960
|
+
|
961
|
+
UNREACHABLE_RETURN(Qnil);
|
962
|
+
}
|
963
|
+
|
964
|
+
static const unsigned long utf8_limits[] = {
|
965
|
+
0x0, /* 1 */
|
966
|
+
0x80, /* 2 */
|
967
|
+
0x800, /* 3 */
|
968
|
+
0x10000, /* 4 */
|
969
|
+
0x200000, /* 5 */
|
970
|
+
0x4000000, /* 6 */
|
971
|
+
0x80000000, /* 7 */
|
972
|
+
};
|
973
|
+
|
974
|
+
static unsigned long
|
975
|
+
utf8_to_uv(const char *p, long *lenp)
|
976
|
+
{
|
977
|
+
int c = *p++ & 0xff;
|
978
|
+
unsigned long uv = c;
|
979
|
+
long n;
|
980
|
+
|
981
|
+
if (!(uv & 0x80)) {
|
982
|
+
*lenp = 1;
|
983
|
+
return uv;
|
984
|
+
}
|
985
|
+
if (!(uv & 0x40)) {
|
986
|
+
*lenp = 1;
|
987
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
988
|
+
}
|
989
|
+
|
990
|
+
if (!(uv & 0x20)) { n = 2; uv &= 0x1f; }
|
991
|
+
else if (!(uv & 0x10)) { n = 3; uv &= 0x0f; }
|
992
|
+
else if (!(uv & 0x08)) { n = 4; uv &= 0x07; }
|
993
|
+
else if (!(uv & 0x04)) { n = 5; uv &= 0x03; }
|
994
|
+
else if (!(uv & 0x02)) { n = 6; uv &= 0x01; }
|
995
|
+
else {
|
996
|
+
*lenp = 1;
|
997
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
998
|
+
}
|
999
|
+
if (n > *lenp) {
|
1000
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character (expected %ld bytes, given %ld bytes)",
|
1001
|
+
n, *lenp);
|
1002
|
+
}
|
1003
|
+
*lenp = n--;
|
1004
|
+
if (n != 0) {
|
1005
|
+
while (n--) {
|
1006
|
+
c = *p++ & 0xff;
|
1007
|
+
if ((c & 0xc0) != 0x80) {
|
1008
|
+
*lenp -= n + 1;
|
1009
|
+
rb_raise(rb_eArgError, "malformed UTF-8 character");
|
1010
|
+
}
|
1011
|
+
else {
|
1012
|
+
c &= 0x3f;
|
1013
|
+
uv = uv << 6 | c;
|
1014
|
+
}
|
1015
|
+
}
|
1016
|
+
}
|
1017
|
+
n = *lenp - 1;
|
1018
|
+
if (uv < utf8_limits[n]) {
|
1019
|
+
rb_raise(rb_eArgError, "redundant UTF-8 sequence");
|
1020
|
+
}
|
1021
|
+
return uv;
|
1022
|
+
}
|