psych 3.3.2 → 5.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,469 +0,0 @@
1
-
2
- #include "yaml_private.h"
3
-
4
- /*
5
- * Declarations.
6
- */
7
-
8
- static int
9
- yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem,
10
- size_t offset, int value);
11
-
12
- static int
13
- yaml_parser_update_raw_buffer(yaml_parser_t *parser);
14
-
15
- static int
16
- yaml_parser_determine_encoding(yaml_parser_t *parser);
17
-
18
- YAML_DECLARE(int)
19
- yaml_parser_update_buffer(yaml_parser_t *parser, size_t length);
20
-
21
- /*
22
- * Set the reader error and return 0.
23
- */
24
-
25
- static int
26
- yaml_parser_set_reader_error(yaml_parser_t *parser, const char *problem,
27
- size_t offset, int value)
28
- {
29
- parser->error = YAML_READER_ERROR;
30
- parser->problem = problem;
31
- parser->problem_offset = offset;
32
- parser->problem_value = value;
33
-
34
- return 0;
35
- }
36
-
37
- /*
38
- * Byte order marks.
39
- */
40
-
41
- #define BOM_UTF8 "\xef\xbb\xbf"
42
- #define BOM_UTF16LE "\xff\xfe"
43
- #define BOM_UTF16BE "\xfe\xff"
44
-
45
- /*
46
- * Determine the input stream encoding by checking the BOM symbol. If no BOM is
47
- * found, the UTF-8 encoding is assumed. Return 1 on success, 0 on failure.
48
- */
49
-
50
- static int
51
- yaml_parser_determine_encoding(yaml_parser_t *parser)
52
- {
53
- /* Ensure that we had enough bytes in the raw buffer. */
54
-
55
- while (!parser->eof
56
- && parser->raw_buffer.last - parser->raw_buffer.pointer < 3) {
57
- if (!yaml_parser_update_raw_buffer(parser)) {
58
- return 0;
59
- }
60
- }
61
-
62
- /* Determine the encoding. */
63
-
64
- if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2
65
- && !memcmp(parser->raw_buffer.pointer, BOM_UTF16LE, 2)) {
66
- parser->encoding = YAML_UTF16LE_ENCODING;
67
- parser->raw_buffer.pointer += 2;
68
- parser->offset += 2;
69
- }
70
- else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 2
71
- && !memcmp(parser->raw_buffer.pointer, BOM_UTF16BE, 2)) {
72
- parser->encoding = YAML_UTF16BE_ENCODING;
73
- parser->raw_buffer.pointer += 2;
74
- parser->offset += 2;
75
- }
76
- else if (parser->raw_buffer.last - parser->raw_buffer.pointer >= 3
77
- && !memcmp(parser->raw_buffer.pointer, BOM_UTF8, 3)) {
78
- parser->encoding = YAML_UTF8_ENCODING;
79
- parser->raw_buffer.pointer += 3;
80
- parser->offset += 3;
81
- }
82
- else {
83
- parser->encoding = YAML_UTF8_ENCODING;
84
- }
85
-
86
- return 1;
87
- }
88
-
89
- /*
90
- * Update the raw buffer.
91
- */
92
-
93
- static int
94
- yaml_parser_update_raw_buffer(yaml_parser_t *parser)
95
- {
96
- size_t size_read = 0;
97
-
98
- /* Return if the raw buffer is full. */
99
-
100
- if (parser->raw_buffer.start == parser->raw_buffer.pointer
101
- && parser->raw_buffer.last == parser->raw_buffer.end)
102
- return 1;
103
-
104
- /* Return on EOF. */
105
-
106
- if (parser->eof) return 1;
107
-
108
- /* Move the remaining bytes in the raw buffer to the beginning. */
109
-
110
- if (parser->raw_buffer.start < parser->raw_buffer.pointer
111
- && parser->raw_buffer.pointer < parser->raw_buffer.last) {
112
- memmove(parser->raw_buffer.start, parser->raw_buffer.pointer,
113
- parser->raw_buffer.last - parser->raw_buffer.pointer);
114
- }
115
- parser->raw_buffer.last -=
116
- parser->raw_buffer.pointer - parser->raw_buffer.start;
117
- parser->raw_buffer.pointer = parser->raw_buffer.start;
118
-
119
- /* Call the read handler to fill the buffer. */
120
-
121
- if (!parser->read_handler(parser->read_handler_data, parser->raw_buffer.last,
122
- parser->raw_buffer.end - parser->raw_buffer.last, &size_read)) {
123
- return yaml_parser_set_reader_error(parser, "input error",
124
- parser->offset, -1);
125
- }
126
- parser->raw_buffer.last += size_read;
127
- if (!size_read) {
128
- parser->eof = 1;
129
- }
130
-
131
- return 1;
132
- }
133
-
134
- /*
135
- * Ensure that the buffer contains at least `length` characters.
136
- * Return 1 on success, 0 on failure.
137
- *
138
- * The length is supposed to be significantly less that the buffer size.
139
- */
140
-
141
- YAML_DECLARE(int)
142
- yaml_parser_update_buffer(yaml_parser_t *parser, size_t length)
143
- {
144
- int first = 1;
145
-
146
- assert(parser->read_handler); /* Read handler must be set. */
147
-
148
- /* If the EOF flag is set and the raw buffer is empty, do nothing. */
149
-
150
- if (parser->eof && parser->raw_buffer.pointer == parser->raw_buffer.last)
151
- return 1;
152
-
153
- /* Return if the buffer contains enough characters. */
154
-
155
- if (parser->unread >= length)
156
- return 1;
157
-
158
- /* Determine the input encoding if it is not known yet. */
159
-
160
- if (!parser->encoding) {
161
- if (!yaml_parser_determine_encoding(parser))
162
- return 0;
163
- }
164
-
165
- /* Move the unread characters to the beginning of the buffer. */
166
-
167
- if (parser->buffer.start < parser->buffer.pointer
168
- && parser->buffer.pointer < parser->buffer.last) {
169
- size_t size = parser->buffer.last - parser->buffer.pointer;
170
- memmove(parser->buffer.start, parser->buffer.pointer, size);
171
- parser->buffer.pointer = parser->buffer.start;
172
- parser->buffer.last = parser->buffer.start + size;
173
- }
174
- else if (parser->buffer.pointer == parser->buffer.last) {
175
- parser->buffer.pointer = parser->buffer.start;
176
- parser->buffer.last = parser->buffer.start;
177
- }
178
-
179
- /* Fill the buffer until it has enough characters. */
180
-
181
- while (parser->unread < length)
182
- {
183
- /* Fill the raw buffer if necessary. */
184
-
185
- if (!first || parser->raw_buffer.pointer == parser->raw_buffer.last) {
186
- if (!yaml_parser_update_raw_buffer(parser)) return 0;
187
- }
188
- first = 0;
189
-
190
- /* Decode the raw buffer. */
191
-
192
- while (parser->raw_buffer.pointer != parser->raw_buffer.last)
193
- {
194
- unsigned int value = 0, value2 = 0;
195
- int incomplete = 0;
196
- unsigned char octet;
197
- unsigned int width = 0;
198
- int low, high;
199
- size_t k;
200
- size_t raw_unread = parser->raw_buffer.last - parser->raw_buffer.pointer;
201
-
202
- /* Decode the next character. */
203
-
204
- switch (parser->encoding)
205
- {
206
- case YAML_UTF8_ENCODING:
207
-
208
- /*
209
- * Decode a UTF-8 character. Check RFC 3629
210
- * (http://www.ietf.org/rfc/rfc3629.txt) for more details.
211
- *
212
- * The following table (taken from the RFC) is used for
213
- * decoding.
214
- *
215
- * Char. number range | UTF-8 octet sequence
216
- * (hexadecimal) | (binary)
217
- * --------------------+------------------------------------
218
- * 0000 0000-0000 007F | 0xxxxxxx
219
- * 0000 0080-0000 07FF | 110xxxxx 10xxxxxx
220
- * 0000 0800-0000 FFFF | 1110xxxx 10xxxxxx 10xxxxxx
221
- * 0001 0000-0010 FFFF | 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
222
- *
223
- * Additionally, the characters in the range 0xD800-0xDFFF
224
- * are prohibited as they are reserved for use with UTF-16
225
- * surrogate pairs.
226
- */
227
-
228
- /* Determine the length of the UTF-8 sequence. */
229
-
230
- octet = parser->raw_buffer.pointer[0];
231
- width = (octet & 0x80) == 0x00 ? 1 :
232
- (octet & 0xE0) == 0xC0 ? 2 :
233
- (octet & 0xF0) == 0xE0 ? 3 :
234
- (octet & 0xF8) == 0xF0 ? 4 : 0;
235
-
236
- /* Check if the leading octet is valid. */
237
-
238
- if (!width)
239
- return yaml_parser_set_reader_error(parser,
240
- "invalid leading UTF-8 octet",
241
- parser->offset, octet);
242
-
243
- /* Check if the raw buffer contains an incomplete character. */
244
-
245
- if (width > raw_unread) {
246
- if (parser->eof) {
247
- return yaml_parser_set_reader_error(parser,
248
- "incomplete UTF-8 octet sequence",
249
- parser->offset, -1);
250
- }
251
- incomplete = 1;
252
- break;
253
- }
254
-
255
- /* Decode the leading octet. */
256
-
257
- value = (octet & 0x80) == 0x00 ? octet & 0x7F :
258
- (octet & 0xE0) == 0xC0 ? octet & 0x1F :
259
- (octet & 0xF0) == 0xE0 ? octet & 0x0F :
260
- (octet & 0xF8) == 0xF0 ? octet & 0x07 : 0;
261
-
262
- /* Check and decode the trailing octets. */
263
-
264
- for (k = 1; k < width; k ++)
265
- {
266
- octet = parser->raw_buffer.pointer[k];
267
-
268
- /* Check if the octet is valid. */
269
-
270
- if ((octet & 0xC0) != 0x80)
271
- return yaml_parser_set_reader_error(parser,
272
- "invalid trailing UTF-8 octet",
273
- parser->offset+k, octet);
274
-
275
- /* Decode the octet. */
276
-
277
- value = (value << 6) + (octet & 0x3F);
278
- }
279
-
280
- /* Check the length of the sequence against the value. */
281
-
282
- if (!((width == 1) ||
283
- (width == 2 && value >= 0x80) ||
284
- (width == 3 && value >= 0x800) ||
285
- (width == 4 && value >= 0x10000)))
286
- return yaml_parser_set_reader_error(parser,
287
- "invalid length of a UTF-8 sequence",
288
- parser->offset, -1);
289
-
290
- /* Check the range of the value. */
291
-
292
- if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF)
293
- return yaml_parser_set_reader_error(parser,
294
- "invalid Unicode character",
295
- parser->offset, value);
296
-
297
- break;
298
-
299
- case YAML_UTF16LE_ENCODING:
300
- case YAML_UTF16BE_ENCODING:
301
-
302
- low = (parser->encoding == YAML_UTF16LE_ENCODING ? 0 : 1);
303
- high = (parser->encoding == YAML_UTF16LE_ENCODING ? 1 : 0);
304
-
305
- /*
306
- * The UTF-16 encoding is not as simple as one might
307
- * naively think. Check RFC 2781
308
- * (http://www.ietf.org/rfc/rfc2781.txt).
309
- *
310
- * Normally, two subsequent bytes describe a Unicode
311
- * character. However a special technique (called a
312
- * surrogate pair) is used for specifying character
313
- * values larger than 0xFFFF.
314
- *
315
- * A surrogate pair consists of two pseudo-characters:
316
- * high surrogate area (0xD800-0xDBFF)
317
- * low surrogate area (0xDC00-0xDFFF)
318
- *
319
- * The following formulas are used for decoding
320
- * and encoding characters using surrogate pairs:
321
- *
322
- * U = U' + 0x10000 (0x01 00 00 <= U <= 0x10 FF FF)
323
- * U' = yyyyyyyyyyxxxxxxxxxx (0 <= U' <= 0x0F FF FF)
324
- * W1 = 110110yyyyyyyyyy
325
- * W2 = 110111xxxxxxxxxx
326
- *
327
- * where U is the character value, W1 is the high surrogate
328
- * area, W2 is the low surrogate area.
329
- */
330
-
331
- /* Check for incomplete UTF-16 character. */
332
-
333
- if (raw_unread < 2) {
334
- if (parser->eof) {
335
- return yaml_parser_set_reader_error(parser,
336
- "incomplete UTF-16 character",
337
- parser->offset, -1);
338
- }
339
- incomplete = 1;
340
- break;
341
- }
342
-
343
- /* Get the character. */
344
-
345
- value = parser->raw_buffer.pointer[low]
346
- + (parser->raw_buffer.pointer[high] << 8);
347
-
348
- /* Check for unexpected low surrogate area. */
349
-
350
- if ((value & 0xFC00) == 0xDC00)
351
- return yaml_parser_set_reader_error(parser,
352
- "unexpected low surrogate area",
353
- parser->offset, value);
354
-
355
- /* Check for a high surrogate area. */
356
-
357
- if ((value & 0xFC00) == 0xD800) {
358
-
359
- width = 4;
360
-
361
- /* Check for incomplete surrogate pair. */
362
-
363
- if (raw_unread < 4) {
364
- if (parser->eof) {
365
- return yaml_parser_set_reader_error(parser,
366
- "incomplete UTF-16 surrogate pair",
367
- parser->offset, -1);
368
- }
369
- incomplete = 1;
370
- break;
371
- }
372
-
373
- /* Get the next character. */
374
-
375
- value2 = parser->raw_buffer.pointer[low+2]
376
- + (parser->raw_buffer.pointer[high+2] << 8);
377
-
378
- /* Check for a low surrogate area. */
379
-
380
- if ((value2 & 0xFC00) != 0xDC00)
381
- return yaml_parser_set_reader_error(parser,
382
- "expected low surrogate area",
383
- parser->offset+2, value2);
384
-
385
- /* Generate the value of the surrogate pair. */
386
-
387
- value = 0x10000 + ((value & 0x3FF) << 10) + (value2 & 0x3FF);
388
- }
389
-
390
- else {
391
- width = 2;
392
- }
393
-
394
- break;
395
-
396
- default:
397
- assert(1); /* Impossible. */
398
- }
399
-
400
- /* Check if the raw buffer contains enough bytes to form a character. */
401
-
402
- if (incomplete) break;
403
-
404
- /*
405
- * Check if the character is in the allowed range:
406
- * #x9 | #xA | #xD | [#x20-#x7E] (8 bit)
407
- * | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] (16 bit)
408
- * | [#x10000-#x10FFFF] (32 bit)
409
- */
410
-
411
- if (! (value == 0x09 || value == 0x0A || value == 0x0D
412
- || (value >= 0x20 && value <= 0x7E)
413
- || (value == 0x85) || (value >= 0xA0 && value <= 0xD7FF)
414
- || (value >= 0xE000 && value <= 0xFFFD)
415
- || (value >= 0x10000 && value <= 0x10FFFF)))
416
- return yaml_parser_set_reader_error(parser,
417
- "control characters are not allowed",
418
- parser->offset, value);
419
-
420
- /* Move the raw pointers. */
421
-
422
- parser->raw_buffer.pointer += width;
423
- parser->offset += width;
424
-
425
- /* Finally put the character into the buffer. */
426
-
427
- /* 0000 0000-0000 007F -> 0xxxxxxx */
428
- if (value <= 0x7F) {
429
- *(parser->buffer.last++) = value;
430
- }
431
- /* 0000 0080-0000 07FF -> 110xxxxx 10xxxxxx */
432
- else if (value <= 0x7FF) {
433
- *(parser->buffer.last++) = 0xC0 + (value >> 6);
434
- *(parser->buffer.last++) = 0x80 + (value & 0x3F);
435
- }
436
- /* 0000 0800-0000 FFFF -> 1110xxxx 10xxxxxx 10xxxxxx */
437
- else if (value <= 0xFFFF) {
438
- *(parser->buffer.last++) = 0xE0 + (value >> 12);
439
- *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F);
440
- *(parser->buffer.last++) = 0x80 + (value & 0x3F);
441
- }
442
- /* 0001 0000-0010 FFFF -> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx */
443
- else {
444
- *(parser->buffer.last++) = 0xF0 + (value >> 18);
445
- *(parser->buffer.last++) = 0x80 + ((value >> 12) & 0x3F);
446
- *(parser->buffer.last++) = 0x80 + ((value >> 6) & 0x3F);
447
- *(parser->buffer.last++) = 0x80 + (value & 0x3F);
448
- }
449
-
450
- parser->unread ++;
451
- }
452
-
453
- /* On EOF, put NUL into the buffer and return. */
454
-
455
- if (parser->eof) {
456
- *(parser->buffer.last++) = '\0';
457
- parser->unread ++;
458
- return 1;
459
- }
460
-
461
- }
462
-
463
- if (parser->offset >= MAX_FILE_SIZE) {
464
- return yaml_parser_set_reader_error(parser, "input is too long",
465
- parser->offset, -1);
466
- }
467
-
468
- return 1;
469
- }