pygments.rb 0.2.13 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. data/.gitignore +1 -0
  2. data/README.md +45 -19
  3. data/Rakefile +21 -11
  4. data/bench.rb +15 -48
  5. data/cache-lexers.rb +8 -0
  6. data/lexers +0 -0
  7. data/lib/pygments.rb +3 -6
  8. data/lib/pygments/mentos.py +343 -0
  9. data/lib/pygments/popen.rb +383 -0
  10. data/lib/pygments/version.rb +1 -1
  11. data/pygments.rb.gemspec +5 -4
  12. data/test/test_data.c +2581 -0
  13. data/test/test_data.py +514 -0
  14. data/test/test_data_generated +2582 -0
  15. data/test/test_pygments.rb +208 -84
  16. data/vendor/pygments-main/pygments/lexers/_mapping.py +1 -1
  17. data/vendor/pygments-main/pygments/lexers/shell.py +1 -1
  18. data/vendor/simplejson/.gitignore +10 -0
  19. data/vendor/simplejson/.travis.yml +5 -0
  20. data/vendor/simplejson/CHANGES.txt +291 -0
  21. data/vendor/simplejson/LICENSE.txt +19 -0
  22. data/vendor/simplejson/MANIFEST.in +5 -0
  23. data/vendor/simplejson/README.rst +19 -0
  24. data/vendor/simplejson/conf.py +179 -0
  25. data/vendor/simplejson/index.rst +628 -0
  26. data/vendor/simplejson/scripts/make_docs.py +18 -0
  27. data/vendor/simplejson/setup.py +104 -0
  28. data/vendor/simplejson/simplejson/__init__.py +510 -0
  29. data/vendor/simplejson/simplejson/_speedups.c +2745 -0
  30. data/vendor/simplejson/simplejson/decoder.py +425 -0
  31. data/vendor/simplejson/simplejson/encoder.py +567 -0
  32. data/vendor/simplejson/simplejson/ordered_dict.py +119 -0
  33. data/vendor/simplejson/simplejson/scanner.py +77 -0
  34. data/vendor/simplejson/simplejson/tests/__init__.py +67 -0
  35. data/vendor/simplejson/simplejson/tests/test_bigint_as_string.py +55 -0
  36. data/vendor/simplejson/simplejson/tests/test_check_circular.py +30 -0
  37. data/vendor/simplejson/simplejson/tests/test_decimal.py +66 -0
  38. data/vendor/simplejson/simplejson/tests/test_decode.py +83 -0
  39. data/vendor/simplejson/simplejson/tests/test_default.py +9 -0
  40. data/vendor/simplejson/simplejson/tests/test_dump.py +67 -0
  41. data/vendor/simplejson/simplejson/tests/test_encode_basestring_ascii.py +46 -0
  42. data/vendor/simplejson/simplejson/tests/test_encode_for_html.py +32 -0
  43. data/vendor/simplejson/simplejson/tests/test_errors.py +34 -0
  44. data/vendor/simplejson/simplejson/tests/test_fail.py +91 -0
  45. data/vendor/simplejson/simplejson/tests/test_float.py +19 -0
  46. data/vendor/simplejson/simplejson/tests/test_indent.py +86 -0
  47. data/vendor/simplejson/simplejson/tests/test_item_sort_key.py +20 -0
  48. data/vendor/simplejson/simplejson/tests/test_namedtuple.py +121 -0
  49. data/vendor/simplejson/simplejson/tests/test_pass1.py +76 -0
  50. data/vendor/simplejson/simplejson/tests/test_pass2.py +14 -0
  51. data/vendor/simplejson/simplejson/tests/test_pass3.py +20 -0
  52. data/vendor/simplejson/simplejson/tests/test_recursion.py +67 -0
  53. data/vendor/simplejson/simplejson/tests/test_scanstring.py +117 -0
  54. data/vendor/simplejson/simplejson/tests/test_separators.py +42 -0
  55. data/vendor/simplejson/simplejson/tests/test_speedups.py +20 -0
  56. data/vendor/simplejson/simplejson/tests/test_tuple.py +49 -0
  57. data/vendor/simplejson/simplejson/tests/test_unicode.py +109 -0
  58. data/vendor/simplejson/simplejson/tool.py +39 -0
  59. metadata +80 -22
  60. data/ext/extconf.rb +0 -14
  61. data/ext/pygments.c +0 -466
  62. data/lib/pygments/c.rb +0 -54
  63. data/lib/pygments/ffi.rb +0 -155
  64. data/vendor/.gitignore +0 -1
@@ -0,0 +1,2745 @@
1
+ #include "Python.h"
2
+ #include "structmember.h"
3
+ #if PY_VERSION_HEX < 0x02070000 && !defined(PyOS_string_to_double)
4
+ #define PyOS_string_to_double json_PyOS_string_to_double
5
+ static double
6
+ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception);
7
+ static double
8
+ json_PyOS_string_to_double(const char *s, char **endptr, PyObject *overflow_exception) {
9
+ double x;
10
+ assert(endptr == NULL);
11
+ assert(overflow_exception == NULL);
12
+ PyFPE_START_PROTECT("json_PyOS_string_to_double", return -1.0;)
13
+ x = PyOS_ascii_atof(s);
14
+ PyFPE_END_PROTECT(x)
15
+ return x;
16
+ }
17
+ #endif
18
+ #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE)
19
+ #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type)
20
+ #endif
21
+ #if PY_VERSION_HEX < 0x02060000 && !defined(Py_SIZE)
22
+ #define Py_SIZE(ob) (((PyVarObject*)(ob))->ob_size)
23
+ #endif
24
+ #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
25
+ typedef int Py_ssize_t;
26
+ #define PY_SSIZE_T_MAX INT_MAX
27
+ #define PY_SSIZE_T_MIN INT_MIN
28
+ #define PyInt_FromSsize_t PyInt_FromLong
29
+ #define PyInt_AsSsize_t PyInt_AsLong
30
+ #endif
31
+ #ifndef Py_IS_FINITE
32
+ #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X))
33
+ #endif
34
+
35
+ #ifdef __GNUC__
36
+ #define UNUSED __attribute__((__unused__))
37
+ #else
38
+ #define UNUSED
39
+ #endif
40
+
41
+ #define DEFAULT_ENCODING "utf-8"
42
+
43
+ #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType)
44
+ #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType)
45
+ #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType)
46
+ #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType)
47
+
48
+ static PyTypeObject PyScannerType;
49
+ static PyTypeObject PyEncoderType;
50
+
51
+ typedef struct _PyScannerObject {
52
+ PyObject_HEAD
53
+ PyObject *encoding;
54
+ PyObject *strict;
55
+ PyObject *object_hook;
56
+ PyObject *pairs_hook;
57
+ PyObject *parse_float;
58
+ PyObject *parse_int;
59
+ PyObject *parse_constant;
60
+ PyObject *memo;
61
+ } PyScannerObject;
62
+
63
+ static PyMemberDef scanner_members[] = {
64
+ {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"},
65
+ {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"},
66
+ {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"},
67
+ {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"},
68
+ {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"},
69
+ {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"},
70
+ {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"},
71
+ {NULL}
72
+ };
73
+
74
+ typedef struct _PyEncoderObject {
75
+ PyObject_HEAD
76
+ PyObject *markers;
77
+ PyObject *defaultfn;
78
+ PyObject *encoder;
79
+ PyObject *indent;
80
+ PyObject *key_separator;
81
+ PyObject *item_separator;
82
+ PyObject *sort_keys;
83
+ PyObject *skipkeys;
84
+ PyObject *key_memo;
85
+ PyObject *Decimal;
86
+ int fast_encode;
87
+ int allow_nan;
88
+ int use_decimal;
89
+ int namedtuple_as_object;
90
+ int tuple_as_array;
91
+ int bigint_as_string;
92
+ PyObject *item_sort_key;
93
+ } PyEncoderObject;
94
+
95
+ static PyMemberDef encoder_members[] = {
96
+ {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"},
97
+ {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"},
98
+ {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"},
99
+ {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"},
100
+ {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"},
101
+ {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"},
102
+ {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"},
103
+ {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"},
104
+ {"key_memo", T_OBJECT, offsetof(PyEncoderObject, key_memo), READONLY, "key_memo"},
105
+ {"item_sort_key", T_OBJECT, offsetof(PyEncoderObject, item_sort_key), READONLY, "item_sort_key"},
106
+ {NULL}
107
+ };
108
+
109
+ static PyObject *
110
+ maybe_quote_bigint(PyObject *encoded, PyObject *obj);
111
+
112
+ static Py_ssize_t
113
+ ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
114
+ static PyObject *
115
+ ascii_escape_unicode(PyObject *pystr);
116
+ static PyObject *
117
+ ascii_escape_str(PyObject *pystr);
118
+ static PyObject *
119
+ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr);
120
+ void init_speedups(void);
121
+ static PyObject *
122
+ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
123
+ static PyObject *
124
+ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr);
125
+ static PyObject *
126
+ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx);
127
+ static PyObject *
128
+ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
129
+ static int
130
+ scanner_init(PyObject *self, PyObject *args, PyObject *kwds);
131
+ static void
132
+ scanner_dealloc(PyObject *self);
133
+ static int
134
+ scanner_clear(PyObject *self);
135
+ static PyObject *
136
+ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
137
+ static int
138
+ encoder_init(PyObject *self, PyObject *args, PyObject *kwds);
139
+ static void
140
+ encoder_dealloc(PyObject *self);
141
+ static int
142
+ encoder_clear(PyObject *self);
143
+ static int
144
+ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level);
145
+ static int
146
+ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level);
147
+ static int
148
+ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level);
149
+ static PyObject *
150
+ _encoded_const(PyObject *obj);
151
+ static void
152
+ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end);
153
+ static PyObject *
154
+ encoder_encode_string(PyEncoderObject *s, PyObject *obj);
155
+ static int
156
+ _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr);
157
+ static PyObject *
158
+ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr);
159
+ static PyObject *
160
+ encoder_encode_float(PyEncoderObject *s, PyObject *obj);
161
+ static int
162
+ _is_namedtuple(PyObject *obj);
163
+
164
+ #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
165
+ #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r'))
166
+
167
+ #define MIN_EXPANSION 6
168
+ #ifdef Py_UNICODE_WIDE
169
+ #define MAX_EXPANSION (2 * MIN_EXPANSION)
170
+ #else
171
+ #define MAX_EXPANSION MIN_EXPANSION
172
+ #endif
173
+
174
+ static PyObject *
175
+ maybe_quote_bigint(PyObject *encoded, PyObject *obj)
176
+ {
177
+ static PyObject *big_long = NULL;
178
+ static PyObject *small_long = NULL;
179
+ if (big_long == NULL) {
180
+ big_long = PyLong_FromLongLong(1LL << 53);
181
+ if (big_long == NULL) {
182
+ Py_DECREF(encoded);
183
+ return NULL;
184
+ }
185
+ }
186
+ if (small_long == NULL) {
187
+ small_long = PyLong_FromLongLong(-1LL << 53);
188
+ if (small_long == NULL) {
189
+ Py_DECREF(encoded);
190
+ return NULL;
191
+ }
192
+ }
193
+ if (PyObject_RichCompareBool(obj, big_long, Py_GE) ||
194
+ PyObject_RichCompareBool(obj, small_long, Py_LE)) {
195
+ PyObject* quoted = PyString_FromFormat("\"%s\"",
196
+ PyString_AsString(encoded));
197
+ Py_DECREF(encoded);
198
+ encoded = quoted;
199
+ }
200
+ return encoded;
201
+ }
202
+
203
+ static int
204
+ _is_namedtuple(PyObject *obj)
205
+ {
206
+ int rval = 0;
207
+ PyObject *_asdict = PyObject_GetAttrString(obj, "_asdict");
208
+ if (_asdict == NULL) {
209
+ PyErr_Clear();
210
+ return 0;
211
+ }
212
+ rval = PyCallable_Check(_asdict);
213
+ Py_DECREF(_asdict);
214
+ return rval;
215
+ }
216
+
217
+ static int
218
+ _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr)
219
+ {
220
+ /* PyObject to Py_ssize_t converter */
221
+ *size_ptr = PyInt_AsSsize_t(o);
222
+ if (*size_ptr == -1 && PyErr_Occurred())
223
+ return 0;
224
+ return 1;
225
+ }
226
+
227
+ static PyObject *
228
+ _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr)
229
+ {
230
+ /* Py_ssize_t to PyObject converter */
231
+ return PyInt_FromSsize_t(*size_ptr);
232
+ }
233
+
234
+ static Py_ssize_t
235
+ ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
236
+ {
237
+ /* Escape unicode code point c to ASCII escape sequences
238
+ in char *output. output must have at least 12 bytes unused to
239
+ accommodate an escaped surrogate pair "\uXXXX\uXXXX" */
240
+ output[chars++] = '\\';
241
+ switch (c) {
242
+ case '\\': output[chars++] = (char)c; break;
243
+ case '"': output[chars++] = (char)c; break;
244
+ case '\b': output[chars++] = 'b'; break;
245
+ case '\f': output[chars++] = 'f'; break;
246
+ case '\n': output[chars++] = 'n'; break;
247
+ case '\r': output[chars++] = 'r'; break;
248
+ case '\t': output[chars++] = 't'; break;
249
+ default:
250
+ #ifdef Py_UNICODE_WIDE
251
+ if (c >= 0x10000) {
252
+ /* UTF-16 surrogate pair */
253
+ Py_UNICODE v = c - 0x10000;
254
+ c = 0xd800 | ((v >> 10) & 0x3ff);
255
+ output[chars++] = 'u';
256
+ output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
257
+ output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
258
+ output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
259
+ output[chars++] = "0123456789abcdef"[(c ) & 0xf];
260
+ c = 0xdc00 | (v & 0x3ff);
261
+ output[chars++] = '\\';
262
+ }
263
+ #endif
264
+ output[chars++] = 'u';
265
+ output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf];
266
+ output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf];
267
+ output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf];
268
+ output[chars++] = "0123456789abcdef"[(c ) & 0xf];
269
+ }
270
+ return chars;
271
+ }
272
+
273
+ static PyObject *
274
+ ascii_escape_unicode(PyObject *pystr)
275
+ {
276
+ /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */
277
+ Py_ssize_t i;
278
+ Py_ssize_t input_chars;
279
+ Py_ssize_t output_size;
280
+ Py_ssize_t max_output_size;
281
+ Py_ssize_t chars;
282
+ PyObject *rval;
283
+ char *output;
284
+ Py_UNICODE *input_unicode;
285
+
286
+ input_chars = PyUnicode_GET_SIZE(pystr);
287
+ input_unicode = PyUnicode_AS_UNICODE(pystr);
288
+
289
+ /* One char input can be up to 6 chars output, estimate 4 of these */
290
+ output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
291
+ max_output_size = 2 + (input_chars * MAX_EXPANSION);
292
+ rval = PyString_FromStringAndSize(NULL, output_size);
293
+ if (rval == NULL) {
294
+ return NULL;
295
+ }
296
+ output = PyString_AS_STRING(rval);
297
+ chars = 0;
298
+ output[chars++] = '"';
299
+ for (i = 0; i < input_chars; i++) {
300
+ Py_UNICODE c = input_unicode[i];
301
+ if (S_CHAR(c)) {
302
+ output[chars++] = (char)c;
303
+ }
304
+ else {
305
+ chars = ascii_escape_char(c, output, chars);
306
+ }
307
+ if (output_size - chars < (1 + MAX_EXPANSION)) {
308
+ /* There's more than four, so let's resize by a lot */
309
+ Py_ssize_t new_output_size = output_size * 2;
310
+ /* This is an upper bound */
311
+ if (new_output_size > max_output_size) {
312
+ new_output_size = max_output_size;
313
+ }
314
+ /* Make sure that the output size changed before resizing */
315
+ if (new_output_size != output_size) {
316
+ output_size = new_output_size;
317
+ if (_PyString_Resize(&rval, output_size) == -1) {
318
+ return NULL;
319
+ }
320
+ output = PyString_AS_STRING(rval);
321
+ }
322
+ }
323
+ }
324
+ output[chars++] = '"';
325
+ if (_PyString_Resize(&rval, chars) == -1) {
326
+ return NULL;
327
+ }
328
+ return rval;
329
+ }
330
+
331
+ static PyObject *
332
+ ascii_escape_str(PyObject *pystr)
333
+ {
334
+ /* Take a PyString pystr and return a new ASCII-only escaped PyString */
335
+ Py_ssize_t i;
336
+ Py_ssize_t input_chars;
337
+ Py_ssize_t output_size;
338
+ Py_ssize_t chars;
339
+ PyObject *rval;
340
+ char *output;
341
+ char *input_str;
342
+
343
+ input_chars = PyString_GET_SIZE(pystr);
344
+ input_str = PyString_AS_STRING(pystr);
345
+
346
+ /* Fast path for a string that's already ASCII */
347
+ for (i = 0; i < input_chars; i++) {
348
+ Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
349
+ if (!S_CHAR(c)) {
350
+ /* If we have to escape something, scan the string for unicode */
351
+ Py_ssize_t j;
352
+ for (j = i; j < input_chars; j++) {
353
+ c = (Py_UNICODE)(unsigned char)input_str[j];
354
+ if (c > 0x7f) {
355
+ /* We hit a non-ASCII character, bail to unicode mode */
356
+ PyObject *uni;
357
+ uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
358
+ if (uni == NULL) {
359
+ return NULL;
360
+ }
361
+ rval = ascii_escape_unicode(uni);
362
+ Py_DECREF(uni);
363
+ return rval;
364
+ }
365
+ }
366
+ break;
367
+ }
368
+ }
369
+
370
+ if (i == input_chars) {
371
+ /* Input is already ASCII */
372
+ output_size = 2 + input_chars;
373
+ }
374
+ else {
375
+ /* One char input can be up to 6 chars output, estimate 4 of these */
376
+ output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
377
+ }
378
+ rval = PyString_FromStringAndSize(NULL, output_size);
379
+ if (rval == NULL) {
380
+ return NULL;
381
+ }
382
+ output = PyString_AS_STRING(rval);
383
+ output[0] = '"';
384
+
385
+ /* We know that everything up to i is ASCII already */
386
+ chars = i + 1;
387
+ memcpy(&output[1], input_str, i);
388
+
389
+ for (; i < input_chars; i++) {
390
+ Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i];
391
+ if (S_CHAR(c)) {
392
+ output[chars++] = (char)c;
393
+ }
394
+ else {
395
+ chars = ascii_escape_char(c, output, chars);
396
+ }
397
+ /* An ASCII char can't possibly expand to a surrogate! */
398
+ if (output_size - chars < (1 + MIN_EXPANSION)) {
399
+ /* There's more than four, so let's resize by a lot */
400
+ output_size *= 2;
401
+ if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
402
+ output_size = 2 + (input_chars * MIN_EXPANSION);
403
+ }
404
+ if (_PyString_Resize(&rval, output_size) == -1) {
405
+ return NULL;
406
+ }
407
+ output = PyString_AS_STRING(rval);
408
+ }
409
+ }
410
+ output[chars++] = '"';
411
+ if (_PyString_Resize(&rval, chars) == -1) {
412
+ return NULL;
413
+ }
414
+ return rval;
415
+ }
416
+
417
+ static void
418
+ raise_errmsg(char *msg, PyObject *s, Py_ssize_t end)
419
+ {
420
+ /* Use the Python function simplejson.decoder.errmsg to raise a nice
421
+ looking ValueError exception */
422
+ static PyObject *JSONDecodeError = NULL;
423
+ PyObject *exc;
424
+ if (JSONDecodeError == NULL) {
425
+ PyObject *decoder = PyImport_ImportModule("simplejson.decoder");
426
+ if (decoder == NULL)
427
+ return;
428
+ JSONDecodeError = PyObject_GetAttrString(decoder, "JSONDecodeError");
429
+ Py_DECREF(decoder);
430
+ if (JSONDecodeError == NULL)
431
+ return;
432
+ }
433
+ exc = PyObject_CallFunction(JSONDecodeError, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end);
434
+ if (exc) {
435
+ PyErr_SetObject(JSONDecodeError, exc);
436
+ Py_DECREF(exc);
437
+ }
438
+ }
439
+
440
+ static PyObject *
441
+ join_list_unicode(PyObject *lst)
442
+ {
443
+ /* return u''.join(lst) */
444
+ static PyObject *joinfn = NULL;
445
+ if (joinfn == NULL) {
446
+ PyObject *ustr = PyUnicode_FromUnicode(NULL, 0);
447
+ if (ustr == NULL)
448
+ return NULL;
449
+
450
+ joinfn = PyObject_GetAttrString(ustr, "join");
451
+ Py_DECREF(ustr);
452
+ if (joinfn == NULL)
453
+ return NULL;
454
+ }
455
+ return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
456
+ }
457
+
458
+ static PyObject *
459
+ join_list_string(PyObject *lst)
460
+ {
461
+ /* return ''.join(lst) */
462
+ static PyObject *joinfn = NULL;
463
+ if (joinfn == NULL) {
464
+ PyObject *ustr = PyString_FromStringAndSize(NULL, 0);
465
+ if (ustr == NULL)
466
+ return NULL;
467
+
468
+ joinfn = PyObject_GetAttrString(ustr, "join");
469
+ Py_DECREF(ustr);
470
+ if (joinfn == NULL)
471
+ return NULL;
472
+ }
473
+ return PyObject_CallFunctionObjArgs(joinfn, lst, NULL);
474
+ }
475
+
476
+ static PyObject *
477
+ _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
478
+ /* return (rval, idx) tuple, stealing reference to rval */
479
+ PyObject *tpl;
480
+ PyObject *pyidx;
481
+ /*
482
+ steal a reference to rval, returns (rval, idx)
483
+ */
484
+ if (rval == NULL) {
485
+ return NULL;
486
+ }
487
+ pyidx = PyInt_FromSsize_t(idx);
488
+ if (pyidx == NULL) {
489
+ Py_DECREF(rval);
490
+ return NULL;
491
+ }
492
+ tpl = PyTuple_New(2);
493
+ if (tpl == NULL) {
494
+ Py_DECREF(pyidx);
495
+ Py_DECREF(rval);
496
+ return NULL;
497
+ }
498
+ PyTuple_SET_ITEM(tpl, 0, rval);
499
+ PyTuple_SET_ITEM(tpl, 1, pyidx);
500
+ return tpl;
501
+ }
502
+
503
+ #define APPEND_OLD_CHUNK \
504
+ if (chunk != NULL) { \
505
+ if (chunks == NULL) { \
506
+ chunks = PyList_New(0); \
507
+ if (chunks == NULL) { \
508
+ goto bail; \
509
+ } \
510
+ } \
511
+ if (PyList_Append(chunks, chunk)) { \
512
+ goto bail; \
513
+ } \
514
+ Py_CLEAR(chunk); \
515
+ }
516
+
517
+ static PyObject *
518
+ scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr)
519
+ {
520
+ /* Read the JSON string from PyString pystr.
521
+ end is the index of the first character after the quote.
522
+ encoding is the encoding of pystr (must be an ASCII superset)
523
+ if strict is zero then literal control characters are allowed
524
+ *next_end_ptr is a return-by-reference index of the character
525
+ after the end quote
526
+
527
+ Return value is a new PyString (if ASCII-only) or PyUnicode
528
+ */
529
+ PyObject *rval;
530
+ Py_ssize_t len = PyString_GET_SIZE(pystr);
531
+ Py_ssize_t begin = end - 1;
532
+ Py_ssize_t next = begin;
533
+ int has_unicode = 0;
534
+ char *buf = PyString_AS_STRING(pystr);
535
+ PyObject *chunks = NULL;
536
+ PyObject *chunk = NULL;
537
+
538
+ if (len == end) {
539
+ raise_errmsg("Unterminated string starting at", pystr, begin);
540
+ }
541
+ else if (end < 0 || len < end) {
542
+ PyErr_SetString(PyExc_ValueError, "end is out of bounds");
543
+ goto bail;
544
+ }
545
+ while (1) {
546
+ /* Find the end of the string or the next escape */
547
+ Py_UNICODE c = 0;
548
+ for (next = end; next < len; next++) {
549
+ c = (unsigned char)buf[next];
550
+ if (c == '"' || c == '\\') {
551
+ break;
552
+ }
553
+ else if (strict && c <= 0x1f) {
554
+ raise_errmsg("Invalid control character at", pystr, next);
555
+ goto bail;
556
+ }
557
+ else if (c > 0x7f) {
558
+ has_unicode = 1;
559
+ }
560
+ }
561
+ if (!(c == '"' || c == '\\')) {
562
+ raise_errmsg("Unterminated string starting at", pystr, begin);
563
+ goto bail;
564
+ }
565
+ /* Pick up this chunk if it's not zero length */
566
+ if (next != end) {
567
+ PyObject *strchunk;
568
+ APPEND_OLD_CHUNK
569
+ strchunk = PyString_FromStringAndSize(&buf[end], next - end);
570
+ if (strchunk == NULL) {
571
+ goto bail;
572
+ }
573
+ if (has_unicode) {
574
+ chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
575
+ Py_DECREF(strchunk);
576
+ if (chunk == NULL) {
577
+ goto bail;
578
+ }
579
+ }
580
+ else {
581
+ chunk = strchunk;
582
+ }
583
+ }
584
+ next++;
585
+ if (c == '"') {
586
+ end = next;
587
+ break;
588
+ }
589
+ if (next == len) {
590
+ raise_errmsg("Unterminated string starting at", pystr, begin);
591
+ goto bail;
592
+ }
593
+ c = buf[next];
594
+ if (c != 'u') {
595
+ /* Non-unicode backslash escapes */
596
+ end = next + 1;
597
+ switch (c) {
598
+ case '"': break;
599
+ case '\\': break;
600
+ case '/': break;
601
+ case 'b': c = '\b'; break;
602
+ case 'f': c = '\f'; break;
603
+ case 'n': c = '\n'; break;
604
+ case 'r': c = '\r'; break;
605
+ case 't': c = '\t'; break;
606
+ default: c = 0;
607
+ }
608
+ if (c == 0) {
609
+ raise_errmsg("Invalid \\escape", pystr, end - 2);
610
+ goto bail;
611
+ }
612
+ }
613
+ else {
614
+ c = 0;
615
+ next++;
616
+ end = next + 4;
617
+ if (end >= len) {
618
+ raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
619
+ goto bail;
620
+ }
621
+ /* Decode 4 hex digits */
622
+ for (; next < end; next++) {
623
+ Py_UNICODE digit = buf[next];
624
+ c <<= 4;
625
+ switch (digit) {
626
+ case '0': case '1': case '2': case '3': case '4':
627
+ case '5': case '6': case '7': case '8': case '9':
628
+ c |= (digit - '0'); break;
629
+ case 'a': case 'b': case 'c': case 'd': case 'e':
630
+ case 'f':
631
+ c |= (digit - 'a' + 10); break;
632
+ case 'A': case 'B': case 'C': case 'D': case 'E':
633
+ case 'F':
634
+ c |= (digit - 'A' + 10); break;
635
+ default:
636
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
637
+ goto bail;
638
+ }
639
+ }
640
+ #ifdef Py_UNICODE_WIDE
641
+ /* Surrogate pair */
642
+ if ((c & 0xfc00) == 0xd800) {
643
+ Py_UNICODE c2 = 0;
644
+ if (end + 6 >= len) {
645
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
646
+ goto bail;
647
+ }
648
+ if (buf[next++] != '\\' || buf[next++] != 'u') {
649
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
650
+ goto bail;
651
+ }
652
+ end += 6;
653
+ /* Decode 4 hex digits */
654
+ for (; next < end; next++) {
655
+ c2 <<= 4;
656
+ Py_UNICODE digit = buf[next];
657
+ switch (digit) {
658
+ case '0': case '1': case '2': case '3': case '4':
659
+ case '5': case '6': case '7': case '8': case '9':
660
+ c2 |= (digit - '0'); break;
661
+ case 'a': case 'b': case 'c': case 'd': case 'e':
662
+ case 'f':
663
+ c2 |= (digit - 'a' + 10); break;
664
+ case 'A': case 'B': case 'C': case 'D': case 'E':
665
+ case 'F':
666
+ c2 |= (digit - 'A' + 10); break;
667
+ default:
668
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
669
+ goto bail;
670
+ }
671
+ }
672
+ if ((c2 & 0xfc00) != 0xdc00) {
673
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
674
+ goto bail;
675
+ }
676
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
677
+ }
678
+ else if ((c & 0xfc00) == 0xdc00) {
679
+ raise_errmsg("Unpaired low surrogate", pystr, end - 5);
680
+ goto bail;
681
+ }
682
+ #endif
683
+ }
684
+ if (c > 0x7f) {
685
+ has_unicode = 1;
686
+ }
687
+ APPEND_OLD_CHUNK
688
+ if (has_unicode) {
689
+ chunk = PyUnicode_FromUnicode(&c, 1);
690
+ if (chunk == NULL) {
691
+ goto bail;
692
+ }
693
+ }
694
+ else {
695
+ char c_char = Py_CHARMASK(c);
696
+ chunk = PyString_FromStringAndSize(&c_char, 1);
697
+ if (chunk == NULL) {
698
+ goto bail;
699
+ }
700
+ }
701
+ }
702
+
703
+ if (chunks == NULL) {
704
+ if (chunk != NULL)
705
+ rval = chunk;
706
+ else
707
+ rval = PyString_FromStringAndSize("", 0);
708
+ }
709
+ else {
710
+ APPEND_OLD_CHUNK
711
+ rval = join_list_string(chunks);
712
+ if (rval == NULL) {
713
+ goto bail;
714
+ }
715
+ Py_CLEAR(chunks);
716
+ }
717
+
718
+ *next_end_ptr = end;
719
+ return rval;
720
+ bail:
721
+ *next_end_ptr = -1;
722
+ Py_XDECREF(chunk);
723
+ Py_XDECREF(chunks);
724
+ return NULL;
725
+ }
726
+
727
+
728
+ static PyObject *
729
+ scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr)
730
+ {
731
+ /* Read the JSON string from PyUnicode pystr.
732
+ end is the index of the first character after the quote.
733
+ if strict is zero then literal control characters are allowed
734
+ *next_end_ptr is a return-by-reference index of the character
735
+ after the end quote
736
+
737
+ Return value is a new PyUnicode
738
+ */
739
+ PyObject *rval;
740
+ Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
741
+ Py_ssize_t begin = end - 1;
742
+ Py_ssize_t next = begin;
743
+ const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
744
+ PyObject *chunks = NULL;
745
+ PyObject *chunk = NULL;
746
+
747
+ if (len == end) {
748
+ raise_errmsg("Unterminated string starting at", pystr, begin);
749
+ }
750
+ else if (end < 0 || len < end) {
751
+ PyErr_SetString(PyExc_ValueError, "end is out of bounds");
752
+ goto bail;
753
+ }
754
+ while (1) {
755
+ /* Find the end of the string or the next escape */
756
+ Py_UNICODE c = 0;
757
+ for (next = end; next < len; next++) {
758
+ c = buf[next];
759
+ if (c == '"' || c == '\\') {
760
+ break;
761
+ }
762
+ else if (strict && c <= 0x1f) {
763
+ raise_errmsg("Invalid control character at", pystr, next);
764
+ goto bail;
765
+ }
766
+ }
767
+ if (!(c == '"' || c == '\\')) {
768
+ raise_errmsg("Unterminated string starting at", pystr, begin);
769
+ goto bail;
770
+ }
771
+ /* Pick up this chunk if it's not zero length */
772
+ if (next != end) {
773
+ APPEND_OLD_CHUNK
774
+ chunk = PyUnicode_FromUnicode(&buf[end], next - end);
775
+ if (chunk == NULL) {
776
+ goto bail;
777
+ }
778
+ }
779
+ next++;
780
+ if (c == '"') {
781
+ end = next;
782
+ break;
783
+ }
784
+ if (next == len) {
785
+ raise_errmsg("Unterminated string starting at", pystr, begin);
786
+ goto bail;
787
+ }
788
+ c = buf[next];
789
+ if (c != 'u') {
790
+ /* Non-unicode backslash escapes */
791
+ end = next + 1;
792
+ switch (c) {
793
+ case '"': break;
794
+ case '\\': break;
795
+ case '/': break;
796
+ case 'b': c = '\b'; break;
797
+ case 'f': c = '\f'; break;
798
+ case 'n': c = '\n'; break;
799
+ case 'r': c = '\r'; break;
800
+ case 't': c = '\t'; break;
801
+ default: c = 0;
802
+ }
803
+ if (c == 0) {
804
+ raise_errmsg("Invalid \\escape", pystr, end - 2);
805
+ goto bail;
806
+ }
807
+ }
808
+ else {
809
+ c = 0;
810
+ next++;
811
+ end = next + 4;
812
+ if (end >= len) {
813
+ raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
814
+ goto bail;
815
+ }
816
+ /* Decode 4 hex digits */
817
+ for (; next < end; next++) {
818
+ Py_UNICODE digit = buf[next];
819
+ c <<= 4;
820
+ switch (digit) {
821
+ case '0': case '1': case '2': case '3': case '4':
822
+ case '5': case '6': case '7': case '8': case '9':
823
+ c |= (digit - '0'); break;
824
+ case 'a': case 'b': case 'c': case 'd': case 'e':
825
+ case 'f':
826
+ c |= (digit - 'a' + 10); break;
827
+ case 'A': case 'B': case 'C': case 'D': case 'E':
828
+ case 'F':
829
+ c |= (digit - 'A' + 10); break;
830
+ default:
831
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
832
+ goto bail;
833
+ }
834
+ }
835
+ #ifdef Py_UNICODE_WIDE
836
+ /* Surrogate pair */
837
+ if ((c & 0xfc00) == 0xd800) {
838
+ Py_UNICODE c2 = 0;
839
+ if (end + 6 >= len) {
840
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
841
+ goto bail;
842
+ }
843
+ if (buf[next++] != '\\' || buf[next++] != 'u') {
844
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
845
+ goto bail;
846
+ }
847
+ end += 6;
848
+ /* Decode 4 hex digits */
849
+ for (; next < end; next++) {
850
+ c2 <<= 4;
851
+ Py_UNICODE digit = buf[next];
852
+ switch (digit) {
853
+ case '0': case '1': case '2': case '3': case '4':
854
+ case '5': case '6': case '7': case '8': case '9':
855
+ c2 |= (digit - '0'); break;
856
+ case 'a': case 'b': case 'c': case 'd': case 'e':
857
+ case 'f':
858
+ c2 |= (digit - 'a' + 10); break;
859
+ case 'A': case 'B': case 'C': case 'D': case 'E':
860
+ case 'F':
861
+ c2 |= (digit - 'A' + 10); break;
862
+ default:
863
+ raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
864
+ goto bail;
865
+ }
866
+ }
867
+ if ((c2 & 0xfc00) != 0xdc00) {
868
+ raise_errmsg("Unpaired high surrogate", pystr, end - 5);
869
+ goto bail;
870
+ }
871
+ c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00));
872
+ }
873
+ else if ((c & 0xfc00) == 0xdc00) {
874
+ raise_errmsg("Unpaired low surrogate", pystr, end - 5);
875
+ goto bail;
876
+ }
877
+ #endif
878
+ }
879
+ APPEND_OLD_CHUNK
880
+ chunk = PyUnicode_FromUnicode(&c, 1);
881
+ if (chunk == NULL) {
882
+ goto bail;
883
+ }
884
+ }
885
+
886
+ if (chunks == NULL) {
887
+ if (chunk != NULL)
888
+ rval = chunk;
889
+ else
890
+ rval = PyUnicode_FromUnicode(NULL, 0);
891
+ }
892
+ else {
893
+ APPEND_OLD_CHUNK
894
+ rval = join_list_unicode(chunks);
895
+ if (rval == NULL) {
896
+ goto bail;
897
+ }
898
+ Py_CLEAR(chunks);
899
+ }
900
+ *next_end_ptr = end;
901
+ return rval;
902
+ bail:
903
+ *next_end_ptr = -1;
904
+ Py_XDECREF(chunk);
905
+ Py_XDECREF(chunks);
906
+ return NULL;
907
+ }
908
+
909
+ PyDoc_STRVAR(pydoc_scanstring,
910
+ "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n"
911
+ "\n"
912
+ "Scan the string s for a JSON string. End is the index of the\n"
913
+ "character in s after the quote that started the JSON string.\n"
914
+ "Unescapes all valid JSON string escape sequences and raises ValueError\n"
915
+ "on attempt to decode an invalid string. If strict is False then literal\n"
916
+ "control characters are allowed in the string.\n"
917
+ "\n"
918
+ "Returns a tuple of the decoded string and the index of the character in s\n"
919
+ "after the end quote."
920
+ );
921
+
922
+ static PyObject *
923
+ py_scanstring(PyObject* self UNUSED, PyObject *args)
924
+ {
925
+ PyObject *pystr;
926
+ PyObject *rval;
927
+ Py_ssize_t end;
928
+ Py_ssize_t next_end = -1;
929
+ char *encoding = NULL;
930
+ int strict = 1;
931
+ if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) {
932
+ return NULL;
933
+ }
934
+ if (encoding == NULL) {
935
+ encoding = DEFAULT_ENCODING;
936
+ }
937
+ if (PyString_Check(pystr)) {
938
+ rval = scanstring_str(pystr, end, encoding, strict, &next_end);
939
+ }
940
+ else if (PyUnicode_Check(pystr)) {
941
+ rval = scanstring_unicode(pystr, end, strict, &next_end);
942
+ }
943
+ else {
944
+ PyErr_Format(PyExc_TypeError,
945
+ "first argument must be a string, not %.80s",
946
+ Py_TYPE(pystr)->tp_name);
947
+ return NULL;
948
+ }
949
+ return _build_rval_index_tuple(rval, next_end);
950
+ }
951
+
952
+ PyDoc_STRVAR(pydoc_encode_basestring_ascii,
953
+ "encode_basestring_ascii(basestring) -> str\n"
954
+ "\n"
955
+ "Return an ASCII-only JSON representation of a Python string"
956
+ );
957
+
958
+ static PyObject *
959
+ py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr)
960
+ {
961
+ /* Return an ASCII-only JSON representation of a Python string */
962
+ /* METH_O */
963
+ if (PyString_Check(pystr)) {
964
+ return ascii_escape_str(pystr);
965
+ }
966
+ else if (PyUnicode_Check(pystr)) {
967
+ return ascii_escape_unicode(pystr);
968
+ }
969
+ else {
970
+ PyErr_Format(PyExc_TypeError,
971
+ "first argument must be a string, not %.80s",
972
+ Py_TYPE(pystr)->tp_name);
973
+ return NULL;
974
+ }
975
+ }
976
+
977
+ static void
978
+ scanner_dealloc(PyObject *self)
979
+ {
980
+ /* Deallocate scanner object */
981
+ scanner_clear(self);
982
+ Py_TYPE(self)->tp_free(self);
983
+ }
984
+
985
+ static int
986
+ scanner_traverse(PyObject *self, visitproc visit, void *arg)
987
+ {
988
+ PyScannerObject *s;
989
+ assert(PyScanner_Check(self));
990
+ s = (PyScannerObject *)self;
991
+ Py_VISIT(s->encoding);
992
+ Py_VISIT(s->strict);
993
+ Py_VISIT(s->object_hook);
994
+ Py_VISIT(s->pairs_hook);
995
+ Py_VISIT(s->parse_float);
996
+ Py_VISIT(s->parse_int);
997
+ Py_VISIT(s->parse_constant);
998
+ Py_VISIT(s->memo);
999
+ return 0;
1000
+ }
1001
+
1002
+ static int
1003
+ scanner_clear(PyObject *self)
1004
+ {
1005
+ PyScannerObject *s;
1006
+ assert(PyScanner_Check(self));
1007
+ s = (PyScannerObject *)self;
1008
+ Py_CLEAR(s->encoding);
1009
+ Py_CLEAR(s->strict);
1010
+ Py_CLEAR(s->object_hook);
1011
+ Py_CLEAR(s->pairs_hook);
1012
+ Py_CLEAR(s->parse_float);
1013
+ Py_CLEAR(s->parse_int);
1014
+ Py_CLEAR(s->parse_constant);
1015
+ Py_CLEAR(s->memo);
1016
+ return 0;
1017
+ }
1018
+
1019
+ static PyObject *
1020
+ _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1021
+ /* Read a JSON object from PyString pystr.
1022
+ idx is the index of the first character after the opening curly brace.
1023
+ *next_idx_ptr is a return-by-reference index to the first character after
1024
+ the closing curly brace.
1025
+
1026
+ Returns a new PyObject (usually a dict, but object_hook or
1027
+ object_pairs_hook can change that)
1028
+ */
1029
+ char *str = PyString_AS_STRING(pystr);
1030
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1031
+ PyObject *rval = NULL;
1032
+ PyObject *pairs = NULL;
1033
+ PyObject *item;
1034
+ PyObject *key = NULL;
1035
+ PyObject *val = NULL;
1036
+ char *encoding = PyString_AS_STRING(s->encoding);
1037
+ int strict = PyObject_IsTrue(s->strict);
1038
+ int has_pairs_hook = (s->pairs_hook != Py_None);
1039
+ Py_ssize_t next_idx;
1040
+ if (has_pairs_hook) {
1041
+ pairs = PyList_New(0);
1042
+ if (pairs == NULL)
1043
+ return NULL;
1044
+ }
1045
+ else {
1046
+ rval = PyDict_New();
1047
+ if (rval == NULL)
1048
+ return NULL;
1049
+ }
1050
+
1051
+ /* skip whitespace after { */
1052
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1053
+
1054
+ /* only loop if the object is non-empty */
1055
+ if (idx <= end_idx && str[idx] != '}') {
1056
+ while (idx <= end_idx) {
1057
+ PyObject *memokey;
1058
+
1059
+ /* read key */
1060
+ if (str[idx] != '"') {
1061
+ raise_errmsg(
1062
+ "Expecting property name enclosed in double quotes",
1063
+ pystr, idx);
1064
+ goto bail;
1065
+ }
1066
+ key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx);
1067
+ if (key == NULL)
1068
+ goto bail;
1069
+ memokey = PyDict_GetItem(s->memo, key);
1070
+ if (memokey != NULL) {
1071
+ Py_INCREF(memokey);
1072
+ Py_DECREF(key);
1073
+ key = memokey;
1074
+ }
1075
+ else {
1076
+ if (PyDict_SetItem(s->memo, key, key) < 0)
1077
+ goto bail;
1078
+ }
1079
+ idx = next_idx;
1080
+
1081
+ /* skip whitespace between key and : delimiter, read :, skip whitespace */
1082
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1083
+ if (idx > end_idx || str[idx] != ':') {
1084
+ raise_errmsg("Expecting ':' delimiter", pystr, idx);
1085
+ goto bail;
1086
+ }
1087
+ idx++;
1088
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1089
+
1090
+ /* read any JSON data type */
1091
+ val = scan_once_str(s, pystr, idx, &next_idx);
1092
+ if (val == NULL)
1093
+ goto bail;
1094
+
1095
+ if (has_pairs_hook) {
1096
+ item = PyTuple_Pack(2, key, val);
1097
+ if (item == NULL)
1098
+ goto bail;
1099
+ Py_CLEAR(key);
1100
+ Py_CLEAR(val);
1101
+ if (PyList_Append(pairs, item) == -1) {
1102
+ Py_DECREF(item);
1103
+ goto bail;
1104
+ }
1105
+ Py_DECREF(item);
1106
+ }
1107
+ else {
1108
+ if (PyDict_SetItem(rval, key, val) < 0)
1109
+ goto bail;
1110
+ Py_CLEAR(key);
1111
+ Py_CLEAR(val);
1112
+ }
1113
+ idx = next_idx;
1114
+
1115
+ /* skip whitespace before } or , */
1116
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1117
+
1118
+ /* bail if the object is closed or we didn't get the , delimiter */
1119
+ if (idx > end_idx) break;
1120
+ if (str[idx] == '}') {
1121
+ break;
1122
+ }
1123
+ else if (str[idx] != ',') {
1124
+ raise_errmsg("Expecting ',' delimiter", pystr, idx);
1125
+ goto bail;
1126
+ }
1127
+ idx++;
1128
+
1129
+ /* skip whitespace after , delimiter */
1130
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1131
+ }
1132
+ }
1133
+ /* verify that idx < end_idx, str[idx] should be '}' */
1134
+ if (idx > end_idx || str[idx] != '}') {
1135
+ raise_errmsg("Expecting object", pystr, end_idx);
1136
+ goto bail;
1137
+ }
1138
+
1139
+ /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1140
+ if (s->pairs_hook != Py_None) {
1141
+ val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1142
+ if (val == NULL)
1143
+ goto bail;
1144
+ Py_DECREF(pairs);
1145
+ *next_idx_ptr = idx + 1;
1146
+ return val;
1147
+ }
1148
+
1149
+ /* if object_hook is not None: rval = object_hook(rval) */
1150
+ if (s->object_hook != Py_None) {
1151
+ val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1152
+ if (val == NULL)
1153
+ goto bail;
1154
+ Py_DECREF(rval);
1155
+ rval = val;
1156
+ val = NULL;
1157
+ }
1158
+ *next_idx_ptr = idx + 1;
1159
+ return rval;
1160
+ bail:
1161
+ Py_XDECREF(rval);
1162
+ Py_XDECREF(key);
1163
+ Py_XDECREF(val);
1164
+ Py_XDECREF(pairs);
1165
+ return NULL;
1166
+ }
1167
+
1168
+ static PyObject *
1169
+ _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1170
+ /* Read a JSON object from PyUnicode pystr.
1171
+ idx is the index of the first character after the opening curly brace.
1172
+ *next_idx_ptr is a return-by-reference index to the first character after
1173
+ the closing curly brace.
1174
+
1175
+ Returns a new PyObject (usually a dict, but object_hook can change that)
1176
+ */
1177
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1178
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1179
+ PyObject *rval = NULL;
1180
+ PyObject *pairs = NULL;
1181
+ PyObject *item;
1182
+ PyObject *key = NULL;
1183
+ PyObject *val = NULL;
1184
+ int strict = PyObject_IsTrue(s->strict);
1185
+ int has_pairs_hook = (s->pairs_hook != Py_None);
1186
+ Py_ssize_t next_idx;
1187
+
1188
+ if (has_pairs_hook) {
1189
+ pairs = PyList_New(0);
1190
+ if (pairs == NULL)
1191
+ return NULL;
1192
+ }
1193
+ else {
1194
+ rval = PyDict_New();
1195
+ if (rval == NULL)
1196
+ return NULL;
1197
+ }
1198
+
1199
+ /* skip whitespace after { */
1200
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1201
+
1202
+ /* only loop if the object is non-empty */
1203
+ if (idx <= end_idx && str[idx] != '}') {
1204
+ while (idx <= end_idx) {
1205
+ PyObject *memokey;
1206
+
1207
+ /* read key */
1208
+ if (str[idx] != '"') {
1209
+ raise_errmsg(
1210
+ "Expecting property name enclosed in double quotes",
1211
+ pystr, idx);
1212
+ goto bail;
1213
+ }
1214
+ key = scanstring_unicode(pystr, idx + 1, strict, &next_idx);
1215
+ if (key == NULL)
1216
+ goto bail;
1217
+ memokey = PyDict_GetItem(s->memo, key);
1218
+ if (memokey != NULL) {
1219
+ Py_INCREF(memokey);
1220
+ Py_DECREF(key);
1221
+ key = memokey;
1222
+ }
1223
+ else {
1224
+ if (PyDict_SetItem(s->memo, key, key) < 0)
1225
+ goto bail;
1226
+ }
1227
+ idx = next_idx;
1228
+
1229
+ /* skip whitespace between key and : delimiter, read :, skip
1230
+ whitespace */
1231
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1232
+ if (idx > end_idx || str[idx] != ':') {
1233
+ raise_errmsg("Expecting ':' delimiter", pystr, idx);
1234
+ goto bail;
1235
+ }
1236
+ idx++;
1237
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1238
+
1239
+ /* read any JSON term */
1240
+ val = scan_once_unicode(s, pystr, idx, &next_idx);
1241
+ if (val == NULL)
1242
+ goto bail;
1243
+
1244
+ if (has_pairs_hook) {
1245
+ item = PyTuple_Pack(2, key, val);
1246
+ if (item == NULL)
1247
+ goto bail;
1248
+ Py_CLEAR(key);
1249
+ Py_CLEAR(val);
1250
+ if (PyList_Append(pairs, item) == -1) {
1251
+ Py_DECREF(item);
1252
+ goto bail;
1253
+ }
1254
+ Py_DECREF(item);
1255
+ }
1256
+ else {
1257
+ if (PyDict_SetItem(rval, key, val) < 0)
1258
+ goto bail;
1259
+ Py_CLEAR(key);
1260
+ Py_CLEAR(val);
1261
+ }
1262
+ idx = next_idx;
1263
+
1264
+ /* skip whitespace before } or , */
1265
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1266
+
1267
+ /* bail if the object is closed or we didn't get the ,
1268
+ delimiter */
1269
+ if (idx > end_idx) break;
1270
+ if (str[idx] == '}') {
1271
+ break;
1272
+ }
1273
+ else if (str[idx] != ',') {
1274
+ raise_errmsg("Expecting ',' delimiter", pystr, idx);
1275
+ goto bail;
1276
+ }
1277
+ idx++;
1278
+
1279
+ /* skip whitespace after , delimiter */
1280
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1281
+ }
1282
+ }
1283
+
1284
+ /* verify that idx < end_idx, str[idx] should be '}' */
1285
+ if (idx > end_idx || str[idx] != '}') {
1286
+ raise_errmsg("Expecting object", pystr, end_idx);
1287
+ goto bail;
1288
+ }
1289
+
1290
+ /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */
1291
+ if (s->pairs_hook != Py_None) {
1292
+ val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL);
1293
+ if (val == NULL)
1294
+ goto bail;
1295
+ Py_DECREF(pairs);
1296
+ *next_idx_ptr = idx + 1;
1297
+ return val;
1298
+ }
1299
+
1300
+ /* if object_hook is not None: rval = object_hook(rval) */
1301
+ if (s->object_hook != Py_None) {
1302
+ val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL);
1303
+ if (val == NULL)
1304
+ goto bail;
1305
+ Py_DECREF(rval);
1306
+ rval = val;
1307
+ val = NULL;
1308
+ }
1309
+ *next_idx_ptr = idx + 1;
1310
+ return rval;
1311
+ bail:
1312
+ Py_XDECREF(rval);
1313
+ Py_XDECREF(key);
1314
+ Py_XDECREF(val);
1315
+ Py_XDECREF(pairs);
1316
+ return NULL;
1317
+ }
1318
+
1319
+ static PyObject *
1320
+ _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1321
+ /* Read a JSON array from PyString pystr.
1322
+ idx is the index of the first character after the opening brace.
1323
+ *next_idx_ptr is a return-by-reference index to the first character after
1324
+ the closing brace.
1325
+
1326
+ Returns a new PyList
1327
+ */
1328
+ char *str = PyString_AS_STRING(pystr);
1329
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1330
+ PyObject *val = NULL;
1331
+ PyObject *rval = PyList_New(0);
1332
+ Py_ssize_t next_idx;
1333
+ if (rval == NULL)
1334
+ return NULL;
1335
+
1336
+ /* skip whitespace after [ */
1337
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1338
+
1339
+ /* only loop if the array is non-empty */
1340
+ if (idx <= end_idx && str[idx] != ']') {
1341
+ while (idx <= end_idx) {
1342
+
1343
+ /* read any JSON term and de-tuplefy the (rval, idx) */
1344
+ val = scan_once_str(s, pystr, idx, &next_idx);
1345
+ if (val == NULL) {
1346
+ if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1347
+ PyErr_Clear();
1348
+ raise_errmsg("Expecting object", pystr, idx);
1349
+ }
1350
+ goto bail;
1351
+ }
1352
+
1353
+ if (PyList_Append(rval, val) == -1)
1354
+ goto bail;
1355
+
1356
+ Py_CLEAR(val);
1357
+ idx = next_idx;
1358
+
1359
+ /* skip whitespace between term and , */
1360
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1361
+
1362
+ /* bail if the array is closed or we didn't get the , delimiter */
1363
+ if (idx > end_idx) break;
1364
+ if (str[idx] == ']') {
1365
+ break;
1366
+ }
1367
+ else if (str[idx] != ',') {
1368
+ raise_errmsg("Expecting ',' delimiter", pystr, idx);
1369
+ goto bail;
1370
+ }
1371
+ idx++;
1372
+
1373
+ /* skip whitespace after , */
1374
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1375
+ }
1376
+ }
1377
+
1378
+ /* verify that idx < end_idx, str[idx] should be ']' */
1379
+ if (idx > end_idx || str[idx] != ']') {
1380
+ raise_errmsg("Expecting object", pystr, end_idx);
1381
+ goto bail;
1382
+ }
1383
+ *next_idx_ptr = idx + 1;
1384
+ return rval;
1385
+ bail:
1386
+ Py_XDECREF(val);
1387
+ Py_DECREF(rval);
1388
+ return NULL;
1389
+ }
1390
+
1391
+ static PyObject *
1392
+ _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1393
+ /* Read a JSON array from PyString pystr.
1394
+ idx is the index of the first character after the opening brace.
1395
+ *next_idx_ptr is a return-by-reference index to the first character after
1396
+ the closing brace.
1397
+
1398
+ Returns a new PyList
1399
+ */
1400
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1401
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1402
+ PyObject *val = NULL;
1403
+ PyObject *rval = PyList_New(0);
1404
+ Py_ssize_t next_idx;
1405
+ if (rval == NULL)
1406
+ return NULL;
1407
+
1408
+ /* skip whitespace after [ */
1409
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1410
+
1411
+ /* only loop if the array is non-empty */
1412
+ if (idx <= end_idx && str[idx] != ']') {
1413
+ while (idx <= end_idx) {
1414
+
1415
+ /* read any JSON term */
1416
+ val = scan_once_unicode(s, pystr, idx, &next_idx);
1417
+ if (val == NULL) {
1418
+ if (PyErr_ExceptionMatches(PyExc_StopIteration)) {
1419
+ PyErr_Clear();
1420
+ raise_errmsg("Expecting object", pystr, idx);
1421
+ }
1422
+ goto bail;
1423
+ }
1424
+
1425
+ if (PyList_Append(rval, val) == -1)
1426
+ goto bail;
1427
+
1428
+ Py_CLEAR(val);
1429
+ idx = next_idx;
1430
+
1431
+ /* skip whitespace between term and , */
1432
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1433
+
1434
+ /* bail if the array is closed or we didn't get the , delimiter */
1435
+ if (idx > end_idx) break;
1436
+ if (str[idx] == ']') {
1437
+ break;
1438
+ }
1439
+ else if (str[idx] != ',') {
1440
+ raise_errmsg("Expecting ',' delimiter", pystr, idx);
1441
+ goto bail;
1442
+ }
1443
+ idx++;
1444
+
1445
+ /* skip whitespace after , */
1446
+ while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++;
1447
+ }
1448
+ }
1449
+
1450
+ /* verify that idx < end_idx, str[idx] should be ']' */
1451
+ if (idx > end_idx || str[idx] != ']') {
1452
+ raise_errmsg("Expecting object", pystr, end_idx);
1453
+ goto bail;
1454
+ }
1455
+ *next_idx_ptr = idx + 1;
1456
+ return rval;
1457
+ bail:
1458
+ Py_XDECREF(val);
1459
+ Py_DECREF(rval);
1460
+ return NULL;
1461
+ }
1462
+
1463
+ static PyObject *
1464
+ _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) {
1465
+ /* Read a JSON constant from PyString pystr.
1466
+ constant is the constant string that was found
1467
+ ("NaN", "Infinity", "-Infinity").
1468
+ idx is the index of the first character of the constant
1469
+ *next_idx_ptr is a return-by-reference index to the first character after
1470
+ the constant.
1471
+
1472
+ Returns the result of parse_constant
1473
+ */
1474
+ PyObject *cstr;
1475
+ PyObject *rval;
1476
+ /* constant is "NaN", "Infinity", or "-Infinity" */
1477
+ cstr = PyString_InternFromString(constant);
1478
+ if (cstr == NULL)
1479
+ return NULL;
1480
+
1481
+ /* rval = parse_constant(constant) */
1482
+ rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL);
1483
+ idx += PyString_GET_SIZE(cstr);
1484
+ Py_DECREF(cstr);
1485
+ *next_idx_ptr = idx;
1486
+ return rval;
1487
+ }
1488
+
1489
+ static PyObject *
1490
+ _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1491
+ /* Read a JSON number from PyString pystr.
1492
+ idx is the index of the first character of the number
1493
+ *next_idx_ptr is a return-by-reference index to the first character after
1494
+ the number.
1495
+
1496
+ Returns a new PyObject representation of that number:
1497
+ PyInt, PyLong, or PyFloat.
1498
+ May return other types if parse_int or parse_float are set
1499
+ */
1500
+ char *str = PyString_AS_STRING(pystr);
1501
+ Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1;
1502
+ Py_ssize_t idx = start;
1503
+ int is_float = 0;
1504
+ PyObject *rval;
1505
+ PyObject *numstr;
1506
+
1507
+ /* read a sign if it's there, make sure it's not the end of the string */
1508
+ if (str[idx] == '-') {
1509
+ idx++;
1510
+ if (idx > end_idx) {
1511
+ PyErr_SetNone(PyExc_StopIteration);
1512
+ return NULL;
1513
+ }
1514
+ }
1515
+
1516
+ /* read as many integer digits as we find as long as it doesn't start with 0 */
1517
+ if (str[idx] >= '1' && str[idx] <= '9') {
1518
+ idx++;
1519
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1520
+ }
1521
+ /* if it starts with 0 we only expect one integer digit */
1522
+ else if (str[idx] == '0') {
1523
+ idx++;
1524
+ }
1525
+ /* no integer digits, error */
1526
+ else {
1527
+ PyErr_SetNone(PyExc_StopIteration);
1528
+ return NULL;
1529
+ }
1530
+
1531
+ /* if the next char is '.' followed by a digit then read all float digits */
1532
+ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1533
+ is_float = 1;
1534
+ idx += 2;
1535
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1536
+ }
1537
+
1538
+ /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1539
+ if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1540
+
1541
+ /* save the index of the 'e' or 'E' just in case we need to backtrack */
1542
+ Py_ssize_t e_start = idx;
1543
+ idx++;
1544
+
1545
+ /* read an exponent sign if present */
1546
+ if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1547
+
1548
+ /* read all digits */
1549
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1550
+
1551
+ /* if we got a digit, then parse as float. if not, backtrack */
1552
+ if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1553
+ is_float = 1;
1554
+ }
1555
+ else {
1556
+ idx = e_start;
1557
+ }
1558
+ }
1559
+
1560
+ /* copy the section we determined to be a number */
1561
+ numstr = PyString_FromStringAndSize(&str[start], idx - start);
1562
+ if (numstr == NULL)
1563
+ return NULL;
1564
+ if (is_float) {
1565
+ /* parse as a float using a fast path if available, otherwise call user defined method */
1566
+ if (s->parse_float != (PyObject *)&PyFloat_Type) {
1567
+ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1568
+ }
1569
+ else {
1570
+ /* rval = PyFloat_FromDouble(PyOS_ascii_atof(PyString_AS_STRING(numstr))); */
1571
+ double d = PyOS_string_to_double(PyString_AS_STRING(numstr),
1572
+ NULL, NULL);
1573
+ if (d == -1.0 && PyErr_Occurred())
1574
+ return NULL;
1575
+ rval = PyFloat_FromDouble(d);
1576
+ }
1577
+ }
1578
+ else {
1579
+ /* parse as an int using a fast path if available, otherwise call user defined method */
1580
+ if (s->parse_int != (PyObject *)&PyInt_Type) {
1581
+ rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1582
+ }
1583
+ else {
1584
+ rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10);
1585
+ }
1586
+ }
1587
+ Py_DECREF(numstr);
1588
+ *next_idx_ptr = idx;
1589
+ return rval;
1590
+ }
1591
+
1592
+ static PyObject *
1593
+ _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) {
1594
+ /* Read a JSON number from PyUnicode pystr.
1595
+ idx is the index of the first character of the number
1596
+ *next_idx_ptr is a return-by-reference index to the first character after
1597
+ the number.
1598
+
1599
+ Returns a new PyObject representation of that number:
1600
+ PyInt, PyLong, or PyFloat.
1601
+ May return other types if parse_int or parse_float are set
1602
+ */
1603
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1604
+ Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1;
1605
+ Py_ssize_t idx = start;
1606
+ int is_float = 0;
1607
+ PyObject *rval;
1608
+ PyObject *numstr;
1609
+
1610
+ /* read a sign if it's there, make sure it's not the end of the string */
1611
+ if (str[idx] == '-') {
1612
+ idx++;
1613
+ if (idx > end_idx) {
1614
+ PyErr_SetNone(PyExc_StopIteration);
1615
+ return NULL;
1616
+ }
1617
+ }
1618
+
1619
+ /* read as many integer digits as we find as long as it doesn't start with 0 */
1620
+ if (str[idx] >= '1' && str[idx] <= '9') {
1621
+ idx++;
1622
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1623
+ }
1624
+ /* if it starts with 0 we only expect one integer digit */
1625
+ else if (str[idx] == '0') {
1626
+ idx++;
1627
+ }
1628
+ /* no integer digits, error */
1629
+ else {
1630
+ PyErr_SetNone(PyExc_StopIteration);
1631
+ return NULL;
1632
+ }
1633
+
1634
+ /* if the next char is '.' followed by a digit then read all float digits */
1635
+ if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') {
1636
+ is_float = 1;
1637
+ idx += 2;
1638
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1639
+ }
1640
+
1641
+ /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */
1642
+ if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) {
1643
+ Py_ssize_t e_start = idx;
1644
+ idx++;
1645
+
1646
+ /* read an exponent sign if present */
1647
+ if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++;
1648
+
1649
+ /* read all digits */
1650
+ while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++;
1651
+
1652
+ /* if we got a digit, then parse as float. if not, backtrack */
1653
+ if (str[idx - 1] >= '0' && str[idx - 1] <= '9') {
1654
+ is_float = 1;
1655
+ }
1656
+ else {
1657
+ idx = e_start;
1658
+ }
1659
+ }
1660
+
1661
+ /* copy the section we determined to be a number */
1662
+ numstr = PyUnicode_FromUnicode(&str[start], idx - start);
1663
+ if (numstr == NULL)
1664
+ return NULL;
1665
+ if (is_float) {
1666
+ /* parse as a float using a fast path if available, otherwise call user defined method */
1667
+ if (s->parse_float != (PyObject *)&PyFloat_Type) {
1668
+ rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL);
1669
+ }
1670
+ else {
1671
+ rval = PyFloat_FromString(numstr, NULL);
1672
+ }
1673
+ }
1674
+ else {
1675
+ /* no fast path for unicode -> int, just call */
1676
+ rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL);
1677
+ }
1678
+ Py_DECREF(numstr);
1679
+ *next_idx_ptr = idx;
1680
+ return rval;
1681
+ }
1682
+
1683
+ static PyObject *
1684
+ scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1685
+ {
1686
+ /* Read one JSON term (of any kind) from PyString pystr.
1687
+ idx is the index of the first character of the term
1688
+ *next_idx_ptr is a return-by-reference index to the first character after
1689
+ the number.
1690
+
1691
+ Returns a new PyObject representation of the term.
1692
+ */
1693
+ char *str = PyString_AS_STRING(pystr);
1694
+ Py_ssize_t length = PyString_GET_SIZE(pystr);
1695
+ PyObject *rval = NULL;
1696
+ int fallthrough = 0;
1697
+ if (idx >= length) {
1698
+ PyErr_SetNone(PyExc_StopIteration);
1699
+ return NULL;
1700
+ }
1701
+ if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1702
+ return NULL;
1703
+ switch (str[idx]) {
1704
+ case '"':
1705
+ /* string */
1706
+ rval = scanstring_str(pystr, idx + 1,
1707
+ PyString_AS_STRING(s->encoding),
1708
+ PyObject_IsTrue(s->strict),
1709
+ next_idx_ptr);
1710
+ break;
1711
+ case '{':
1712
+ /* object */
1713
+ rval = _parse_object_str(s, pystr, idx + 1, next_idx_ptr);
1714
+ break;
1715
+ case '[':
1716
+ /* array */
1717
+ rval = _parse_array_str(s, pystr, idx + 1, next_idx_ptr);
1718
+ break;
1719
+ case 'n':
1720
+ /* null */
1721
+ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1722
+ Py_INCREF(Py_None);
1723
+ *next_idx_ptr = idx + 4;
1724
+ rval = Py_None;
1725
+ }
1726
+ else
1727
+ fallthrough = 1;
1728
+ break;
1729
+ case 't':
1730
+ /* true */
1731
+ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1732
+ Py_INCREF(Py_True);
1733
+ *next_idx_ptr = idx + 4;
1734
+ rval = Py_True;
1735
+ }
1736
+ else
1737
+ fallthrough = 1;
1738
+ break;
1739
+ case 'f':
1740
+ /* false */
1741
+ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1742
+ Py_INCREF(Py_False);
1743
+ *next_idx_ptr = idx + 5;
1744
+ rval = Py_False;
1745
+ }
1746
+ else
1747
+ fallthrough = 1;
1748
+ break;
1749
+ case 'N':
1750
+ /* NaN */
1751
+ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1752
+ rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1753
+ }
1754
+ else
1755
+ fallthrough = 1;
1756
+ break;
1757
+ case 'I':
1758
+ /* Infinity */
1759
+ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1760
+ rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1761
+ }
1762
+ else
1763
+ fallthrough = 1;
1764
+ break;
1765
+ case '-':
1766
+ /* -Infinity */
1767
+ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1768
+ rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1769
+ }
1770
+ else
1771
+ fallthrough = 1;
1772
+ break;
1773
+ default:
1774
+ fallthrough = 1;
1775
+ }
1776
+ /* Didn't find a string, object, array, or named constant. Look for a number. */
1777
+ if (fallthrough)
1778
+ rval = _match_number_str(s, pystr, idx, next_idx_ptr);
1779
+ Py_LeaveRecursiveCall();
1780
+ return rval;
1781
+ }
1782
+
1783
+ static PyObject *
1784
+ scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr)
1785
+ {
1786
+ /* Read one JSON term (of any kind) from PyUnicode pystr.
1787
+ idx is the index of the first character of the term
1788
+ *next_idx_ptr is a return-by-reference index to the first character after
1789
+ the number.
1790
+
1791
+ Returns a new PyObject representation of the term.
1792
+ */
1793
+ Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr);
1794
+ Py_ssize_t length = PyUnicode_GET_SIZE(pystr);
1795
+ PyObject *rval = NULL;
1796
+ int fallthrough = 0;
1797
+ if (idx >= length) {
1798
+ PyErr_SetNone(PyExc_StopIteration);
1799
+ return NULL;
1800
+ }
1801
+ if (Py_EnterRecursiveCall(" while decoding a JSON document"))
1802
+ return NULL;
1803
+ switch (str[idx]) {
1804
+ case '"':
1805
+ /* string */
1806
+ rval = scanstring_unicode(pystr, idx + 1,
1807
+ PyObject_IsTrue(s->strict),
1808
+ next_idx_ptr);
1809
+ break;
1810
+ case '{':
1811
+ /* object */
1812
+ rval = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr);
1813
+ break;
1814
+ case '[':
1815
+ /* array */
1816
+ rval = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr);
1817
+ break;
1818
+ case 'n':
1819
+ /* null */
1820
+ if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') {
1821
+ Py_INCREF(Py_None);
1822
+ *next_idx_ptr = idx + 4;
1823
+ rval = Py_None;
1824
+ }
1825
+ else
1826
+ fallthrough = 1;
1827
+ break;
1828
+ case 't':
1829
+ /* true */
1830
+ if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') {
1831
+ Py_INCREF(Py_True);
1832
+ *next_idx_ptr = idx + 4;
1833
+ rval = Py_True;
1834
+ }
1835
+ else
1836
+ fallthrough = 1;
1837
+ break;
1838
+ case 'f':
1839
+ /* false */
1840
+ if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') {
1841
+ Py_INCREF(Py_False);
1842
+ *next_idx_ptr = idx + 5;
1843
+ rval = Py_False;
1844
+ }
1845
+ else
1846
+ fallthrough = 1;
1847
+ break;
1848
+ case 'N':
1849
+ /* NaN */
1850
+ if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') {
1851
+ rval = _parse_constant(s, "NaN", idx, next_idx_ptr);
1852
+ }
1853
+ else
1854
+ fallthrough = 1;
1855
+ break;
1856
+ case 'I':
1857
+ /* Infinity */
1858
+ if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') {
1859
+ rval = _parse_constant(s, "Infinity", idx, next_idx_ptr);
1860
+ }
1861
+ else
1862
+ fallthrough = 1;
1863
+ break;
1864
+ case '-':
1865
+ /* -Infinity */
1866
+ if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') {
1867
+ rval = _parse_constant(s, "-Infinity", idx, next_idx_ptr);
1868
+ }
1869
+ else
1870
+ fallthrough = 1;
1871
+ break;
1872
+ default:
1873
+ fallthrough = 1;
1874
+ }
1875
+ /* Didn't find a string, object, array, or named constant. Look for a number. */
1876
+ if (fallthrough)
1877
+ rval = _match_number_unicode(s, pystr, idx, next_idx_ptr);
1878
+ Py_LeaveRecursiveCall();
1879
+ return rval;
1880
+ }
1881
+
1882
+ static PyObject *
1883
+ scanner_call(PyObject *self, PyObject *args, PyObject *kwds)
1884
+ {
1885
+ /* Python callable interface to scan_once_{str,unicode} */
1886
+ PyObject *pystr;
1887
+ PyObject *rval;
1888
+ Py_ssize_t idx;
1889
+ Py_ssize_t next_idx = -1;
1890
+ static char *kwlist[] = {"string", "idx", NULL};
1891
+ PyScannerObject *s;
1892
+ assert(PyScanner_Check(self));
1893
+ s = (PyScannerObject *)self;
1894
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx))
1895
+ return NULL;
1896
+
1897
+ if (PyString_Check(pystr)) {
1898
+ rval = scan_once_str(s, pystr, idx, &next_idx);
1899
+ }
1900
+ else if (PyUnicode_Check(pystr)) {
1901
+ rval = scan_once_unicode(s, pystr, idx, &next_idx);
1902
+ }
1903
+ else {
1904
+ PyErr_Format(PyExc_TypeError,
1905
+ "first argument must be a string, not %.80s",
1906
+ Py_TYPE(pystr)->tp_name);
1907
+ return NULL;
1908
+ }
1909
+ PyDict_Clear(s->memo);
1910
+ return _build_rval_index_tuple(rval, next_idx);
1911
+ }
1912
+
1913
+ static PyObject *
1914
+ scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1915
+ {
1916
+ PyScannerObject *s;
1917
+ s = (PyScannerObject *)type->tp_alloc(type, 0);
1918
+ if (s != NULL) {
1919
+ s->encoding = NULL;
1920
+ s->strict = NULL;
1921
+ s->object_hook = NULL;
1922
+ s->pairs_hook = NULL;
1923
+ s->parse_float = NULL;
1924
+ s->parse_int = NULL;
1925
+ s->parse_constant = NULL;
1926
+ }
1927
+ return (PyObject *)s;
1928
+ }
1929
+
1930
+ static int
1931
+ scanner_init(PyObject *self, PyObject *args, PyObject *kwds)
1932
+ {
1933
+ /* Initialize Scanner object */
1934
+ PyObject *ctx;
1935
+ static char *kwlist[] = {"context", NULL};
1936
+ PyScannerObject *s;
1937
+
1938
+ assert(PyScanner_Check(self));
1939
+ s = (PyScannerObject *)self;
1940
+
1941
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx))
1942
+ return -1;
1943
+
1944
+ if (s->memo == NULL) {
1945
+ s->memo = PyDict_New();
1946
+ if (s->memo == NULL)
1947
+ goto bail;
1948
+ }
1949
+
1950
+ /* PyString_AS_STRING is used on encoding */
1951
+ s->encoding = PyObject_GetAttrString(ctx, "encoding");
1952
+ if (s->encoding == NULL)
1953
+ goto bail;
1954
+ if (s->encoding == Py_None) {
1955
+ Py_DECREF(Py_None);
1956
+ s->encoding = PyString_InternFromString(DEFAULT_ENCODING);
1957
+ }
1958
+ else if (PyUnicode_Check(s->encoding)) {
1959
+ PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL);
1960
+ Py_DECREF(s->encoding);
1961
+ s->encoding = tmp;
1962
+ }
1963
+ if (s->encoding == NULL || !PyString_Check(s->encoding))
1964
+ goto bail;
1965
+
1966
+ /* All of these will fail "gracefully" so we don't need to verify them */
1967
+ s->strict = PyObject_GetAttrString(ctx, "strict");
1968
+ if (s->strict == NULL)
1969
+ goto bail;
1970
+ s->object_hook = PyObject_GetAttrString(ctx, "object_hook");
1971
+ if (s->object_hook == NULL)
1972
+ goto bail;
1973
+ s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook");
1974
+ if (s->pairs_hook == NULL)
1975
+ goto bail;
1976
+ s->parse_float = PyObject_GetAttrString(ctx, "parse_float");
1977
+ if (s->parse_float == NULL)
1978
+ goto bail;
1979
+ s->parse_int = PyObject_GetAttrString(ctx, "parse_int");
1980
+ if (s->parse_int == NULL)
1981
+ goto bail;
1982
+ s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant");
1983
+ if (s->parse_constant == NULL)
1984
+ goto bail;
1985
+
1986
+ return 0;
1987
+
1988
+ bail:
1989
+ Py_CLEAR(s->encoding);
1990
+ Py_CLEAR(s->strict);
1991
+ Py_CLEAR(s->object_hook);
1992
+ Py_CLEAR(s->pairs_hook);
1993
+ Py_CLEAR(s->parse_float);
1994
+ Py_CLEAR(s->parse_int);
1995
+ Py_CLEAR(s->parse_constant);
1996
+ return -1;
1997
+ }
1998
+
1999
+ PyDoc_STRVAR(scanner_doc, "JSON scanner object");
2000
+
2001
+ static
2002
+ PyTypeObject PyScannerType = {
2003
+ PyObject_HEAD_INIT(NULL)
2004
+ 0, /* tp_internal */
2005
+ "simplejson._speedups.Scanner", /* tp_name */
2006
+ sizeof(PyScannerObject), /* tp_basicsize */
2007
+ 0, /* tp_itemsize */
2008
+ scanner_dealloc, /* tp_dealloc */
2009
+ 0, /* tp_print */
2010
+ 0, /* tp_getattr */
2011
+ 0, /* tp_setattr */
2012
+ 0, /* tp_compare */
2013
+ 0, /* tp_repr */
2014
+ 0, /* tp_as_number */
2015
+ 0, /* tp_as_sequence */
2016
+ 0, /* tp_as_mapping */
2017
+ 0, /* tp_hash */
2018
+ scanner_call, /* tp_call */
2019
+ 0, /* tp_str */
2020
+ 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */
2021
+ 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */
2022
+ 0, /* tp_as_buffer */
2023
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2024
+ scanner_doc, /* tp_doc */
2025
+ scanner_traverse, /* tp_traverse */
2026
+ scanner_clear, /* tp_clear */
2027
+ 0, /* tp_richcompare */
2028
+ 0, /* tp_weaklistoffset */
2029
+ 0, /* tp_iter */
2030
+ 0, /* tp_iternext */
2031
+ 0, /* tp_methods */
2032
+ scanner_members, /* tp_members */
2033
+ 0, /* tp_getset */
2034
+ 0, /* tp_base */
2035
+ 0, /* tp_dict */
2036
+ 0, /* tp_descr_get */
2037
+ 0, /* tp_descr_set */
2038
+ 0, /* tp_dictoffset */
2039
+ scanner_init, /* tp_init */
2040
+ 0,/* PyType_GenericAlloc, */ /* tp_alloc */
2041
+ scanner_new, /* tp_new */
2042
+ 0,/* PyObject_GC_Del, */ /* tp_free */
2043
+ };
2044
+
2045
+ static PyObject *
2046
+ encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2047
+ {
2048
+ PyEncoderObject *s;
2049
+ s = (PyEncoderObject *)type->tp_alloc(type, 0);
2050
+ if (s != NULL) {
2051
+ s->markers = NULL;
2052
+ s->defaultfn = NULL;
2053
+ s->encoder = NULL;
2054
+ s->indent = NULL;
2055
+ s->key_separator = NULL;
2056
+ s->item_separator = NULL;
2057
+ s->sort_keys = NULL;
2058
+ s->skipkeys = NULL;
2059
+ s->key_memo = NULL;
2060
+ s->item_sort_key = NULL;
2061
+ s->Decimal = NULL;
2062
+ }
2063
+ return (PyObject *)s;
2064
+ }
2065
+
2066
+ static int
2067
+ encoder_init(PyObject *self, PyObject *args, PyObject *kwds)
2068
+ {
2069
+ /* initialize Encoder object */
2070
+ static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", "key_memo", "use_decimal", "namedtuple_as_object", "tuple_as_array", "bigint_as_string", "item_sort_key", "Decimal", NULL};
2071
+
2072
+ PyEncoderObject *s;
2073
+ PyObject *markers, *defaultfn, *encoder, *indent, *key_separator;
2074
+ PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan, *key_memo;
2075
+ PyObject *use_decimal, *namedtuple_as_object, *tuple_as_array;
2076
+ PyObject *bigint_as_string, *item_sort_key, *Decimal;
2077
+
2078
+ assert(PyEncoder_Check(self));
2079
+ s = (PyEncoderObject *)self;
2080
+
2081
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOOOOOOOOO:make_encoder", kwlist,
2082
+ &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator,
2083
+ &sort_keys, &skipkeys, &allow_nan, &key_memo, &use_decimal,
2084
+ &namedtuple_as_object, &tuple_as_array, &bigint_as_string,
2085
+ &item_sort_key, &Decimal))
2086
+ return -1;
2087
+
2088
+ s->markers = markers;
2089
+ s->defaultfn = defaultfn;
2090
+ s->encoder = encoder;
2091
+ s->indent = indent;
2092
+ s->key_separator = key_separator;
2093
+ s->item_separator = item_separator;
2094
+ s->sort_keys = sort_keys;
2095
+ s->skipkeys = skipkeys;
2096
+ s->key_memo = key_memo;
2097
+ s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii);
2098
+ s->allow_nan = PyObject_IsTrue(allow_nan);
2099
+ s->use_decimal = PyObject_IsTrue(use_decimal);
2100
+ s->namedtuple_as_object = PyObject_IsTrue(namedtuple_as_object);
2101
+ s->tuple_as_array = PyObject_IsTrue(tuple_as_array);
2102
+ s->bigint_as_string = PyObject_IsTrue(bigint_as_string);
2103
+ s->item_sort_key = item_sort_key;
2104
+ s->Decimal = Decimal;
2105
+
2106
+ Py_INCREF(s->markers);
2107
+ Py_INCREF(s->defaultfn);
2108
+ Py_INCREF(s->encoder);
2109
+ Py_INCREF(s->indent);
2110
+ Py_INCREF(s->key_separator);
2111
+ Py_INCREF(s->item_separator);
2112
+ Py_INCREF(s->sort_keys);
2113
+ Py_INCREF(s->skipkeys);
2114
+ Py_INCREF(s->key_memo);
2115
+ Py_INCREF(s->item_sort_key);
2116
+ Py_INCREF(s->Decimal);
2117
+ return 0;
2118
+ }
2119
+
2120
+ static PyObject *
2121
+ encoder_call(PyObject *self, PyObject *args, PyObject *kwds)
2122
+ {
2123
+ /* Python callable interface to encode_listencode_obj */
2124
+ static char *kwlist[] = {"obj", "_current_indent_level", NULL};
2125
+ PyObject *obj;
2126
+ PyObject *rval;
2127
+ Py_ssize_t indent_level;
2128
+ PyEncoderObject *s;
2129
+ assert(PyEncoder_Check(self));
2130
+ s = (PyEncoderObject *)self;
2131
+ if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist,
2132
+ &obj, _convertPyInt_AsSsize_t, &indent_level))
2133
+ return NULL;
2134
+ rval = PyList_New(0);
2135
+ if (rval == NULL)
2136
+ return NULL;
2137
+ if (encoder_listencode_obj(s, rval, obj, indent_level)) {
2138
+ Py_DECREF(rval);
2139
+ return NULL;
2140
+ }
2141
+ return rval;
2142
+ }
2143
+
2144
+ static PyObject *
2145
+ _encoded_const(PyObject *obj)
2146
+ {
2147
+ /* Return the JSON string representation of None, True, False */
2148
+ if (obj == Py_None) {
2149
+ static PyObject *s_null = NULL;
2150
+ if (s_null == NULL) {
2151
+ s_null = PyString_InternFromString("null");
2152
+ }
2153
+ Py_INCREF(s_null);
2154
+ return s_null;
2155
+ }
2156
+ else if (obj == Py_True) {
2157
+ static PyObject *s_true = NULL;
2158
+ if (s_true == NULL) {
2159
+ s_true = PyString_InternFromString("true");
2160
+ }
2161
+ Py_INCREF(s_true);
2162
+ return s_true;
2163
+ }
2164
+ else if (obj == Py_False) {
2165
+ static PyObject *s_false = NULL;
2166
+ if (s_false == NULL) {
2167
+ s_false = PyString_InternFromString("false");
2168
+ }
2169
+ Py_INCREF(s_false);
2170
+ return s_false;
2171
+ }
2172
+ else {
2173
+ PyErr_SetString(PyExc_ValueError, "not a const");
2174
+ return NULL;
2175
+ }
2176
+ }
2177
+
2178
+ static PyObject *
2179
+ encoder_encode_float(PyEncoderObject *s, PyObject *obj)
2180
+ {
2181
+ /* Return the JSON representation of a PyFloat */
2182
+ double i = PyFloat_AS_DOUBLE(obj);
2183
+ if (!Py_IS_FINITE(i)) {
2184
+ if (!s->allow_nan) {
2185
+ PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant");
2186
+ return NULL;
2187
+ }
2188
+ if (i > 0) {
2189
+ return PyString_FromString("Infinity");
2190
+ }
2191
+ else if (i < 0) {
2192
+ return PyString_FromString("-Infinity");
2193
+ }
2194
+ else {
2195
+ return PyString_FromString("NaN");
2196
+ }
2197
+ }
2198
+ /* Use a better float format here? */
2199
+ return PyObject_Repr(obj);
2200
+ }
2201
+
2202
+ static PyObject *
2203
+ encoder_encode_string(PyEncoderObject *s, PyObject *obj)
2204
+ {
2205
+ /* Return the JSON representation of a string */
2206
+ if (s->fast_encode)
2207
+ return py_encode_basestring_ascii(NULL, obj);
2208
+ else
2209
+ return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL);
2210
+ }
2211
+
2212
+ static int
2213
+ _steal_list_append(PyObject *lst, PyObject *stolen)
2214
+ {
2215
+ /* Append stolen and then decrement its reference count */
2216
+ int rval = PyList_Append(lst, stolen);
2217
+ Py_DECREF(stolen);
2218
+ return rval;
2219
+ }
2220
+
2221
+ static int
2222
+ encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level)
2223
+ {
2224
+ /* Encode Python object obj to a JSON term, rval is a PyList */
2225
+ int rv = -1;
2226
+ if (Py_EnterRecursiveCall(" while encoding a JSON document"))
2227
+ return rv;
2228
+ do {
2229
+ if (obj == Py_None || obj == Py_True || obj == Py_False) {
2230
+ PyObject *cstr = _encoded_const(obj);
2231
+ if (cstr != NULL)
2232
+ rv = _steal_list_append(rval, cstr);
2233
+ }
2234
+ else if (PyString_Check(obj) || PyUnicode_Check(obj))
2235
+ {
2236
+ PyObject *encoded = encoder_encode_string(s, obj);
2237
+ if (encoded != NULL)
2238
+ rv = _steal_list_append(rval, encoded);
2239
+ }
2240
+ else if (PyInt_Check(obj) || PyLong_Check(obj)) {
2241
+ PyObject *encoded = PyObject_Str(obj);
2242
+ if (encoded != NULL) {
2243
+ if (s->bigint_as_string) {
2244
+ encoded = maybe_quote_bigint(encoded, obj);
2245
+ if (encoded == NULL)
2246
+ break;
2247
+ }
2248
+ rv = _steal_list_append(rval, encoded);
2249
+ }
2250
+ }
2251
+ else if (PyFloat_Check(obj)) {
2252
+ PyObject *encoded = encoder_encode_float(s, obj);
2253
+ if (encoded != NULL)
2254
+ rv = _steal_list_append(rval, encoded);
2255
+ }
2256
+ else if (s->namedtuple_as_object && _is_namedtuple(obj)) {
2257
+ PyObject *newobj = PyObject_CallMethod(obj, "_asdict", NULL);
2258
+ if (newobj != NULL) {
2259
+ rv = encoder_listencode_dict(s, rval, newobj, indent_level);
2260
+ Py_DECREF(newobj);
2261
+ }
2262
+ }
2263
+ else if (PyList_Check(obj) || (s->tuple_as_array && PyTuple_Check(obj))) {
2264
+ rv = encoder_listencode_list(s, rval, obj, indent_level);
2265
+ }
2266
+ else if (PyDict_Check(obj)) {
2267
+ rv = encoder_listencode_dict(s, rval, obj, indent_level);
2268
+ }
2269
+ else if (s->use_decimal && PyObject_TypeCheck(obj, s->Decimal)) {
2270
+ PyObject *encoded = PyObject_Str(obj);
2271
+ if (encoded != NULL)
2272
+ rv = _steal_list_append(rval, encoded);
2273
+ }
2274
+ else {
2275
+ PyObject *ident = NULL;
2276
+ PyObject *newobj;
2277
+ if (s->markers != Py_None) {
2278
+ int has_key;
2279
+ ident = PyLong_FromVoidPtr(obj);
2280
+ if (ident == NULL)
2281
+ break;
2282
+ has_key = PyDict_Contains(s->markers, ident);
2283
+ if (has_key) {
2284
+ if (has_key != -1)
2285
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2286
+ Py_DECREF(ident);
2287
+ break;
2288
+ }
2289
+ if (PyDict_SetItem(s->markers, ident, obj)) {
2290
+ Py_DECREF(ident);
2291
+ break;
2292
+ }
2293
+ }
2294
+ newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL);
2295
+ if (newobj == NULL) {
2296
+ Py_XDECREF(ident);
2297
+ break;
2298
+ }
2299
+ rv = encoder_listencode_obj(s, rval, newobj, indent_level);
2300
+ Py_DECREF(newobj);
2301
+ if (rv) {
2302
+ Py_XDECREF(ident);
2303
+ rv = -1;
2304
+ }
2305
+ else if (ident != NULL) {
2306
+ if (PyDict_DelItem(s->markers, ident)) {
2307
+ Py_XDECREF(ident);
2308
+ rv = -1;
2309
+ }
2310
+ Py_XDECREF(ident);
2311
+ }
2312
+ }
2313
+ } while (0);
2314
+ Py_LeaveRecursiveCall();
2315
+ return rv;
2316
+ }
2317
+
2318
+ static int
2319
+ encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level)
2320
+ {
2321
+ /* Encode Python dict dct a JSON term, rval is a PyList */
2322
+ static PyObject *open_dict = NULL;
2323
+ static PyObject *close_dict = NULL;
2324
+ static PyObject *empty_dict = NULL;
2325
+ static PyObject *iteritems = NULL;
2326
+ PyObject *kstr = NULL;
2327
+ PyObject *ident = NULL;
2328
+ PyObject *iter = NULL;
2329
+ PyObject *item = NULL;
2330
+ PyObject *items = NULL;
2331
+ PyObject *encoded = NULL;
2332
+ int skipkeys;
2333
+ Py_ssize_t idx;
2334
+
2335
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL) {
2336
+ open_dict = PyString_InternFromString("{");
2337
+ close_dict = PyString_InternFromString("}");
2338
+ empty_dict = PyString_InternFromString("{}");
2339
+ iteritems = PyString_InternFromString("iteritems");
2340
+ if (open_dict == NULL || close_dict == NULL || empty_dict == NULL || iteritems == NULL)
2341
+ return -1;
2342
+ }
2343
+ if (PyDict_Size(dct) == 0)
2344
+ return PyList_Append(rval, empty_dict);
2345
+
2346
+ if (s->markers != Py_None) {
2347
+ int has_key;
2348
+ ident = PyLong_FromVoidPtr(dct);
2349
+ if (ident == NULL)
2350
+ goto bail;
2351
+ has_key = PyDict_Contains(s->markers, ident);
2352
+ if (has_key) {
2353
+ if (has_key != -1)
2354
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2355
+ goto bail;
2356
+ }
2357
+ if (PyDict_SetItem(s->markers, ident, dct)) {
2358
+ goto bail;
2359
+ }
2360
+ }
2361
+
2362
+ if (PyList_Append(rval, open_dict))
2363
+ goto bail;
2364
+
2365
+ if (s->indent != Py_None) {
2366
+ /* TODO: DOES NOT RUN */
2367
+ indent_level += 1;
2368
+ /*
2369
+ newline_indent = '\n' + (_indent * _current_indent_level)
2370
+ separator = _item_separator + newline_indent
2371
+ buf += newline_indent
2372
+ */
2373
+ }
2374
+
2375
+ if (PyCallable_Check(s->item_sort_key)) {
2376
+ if (PyDict_CheckExact(dct))
2377
+ items = PyDict_Items(dct);
2378
+ else
2379
+ items = PyMapping_Items(dct);
2380
+ PyObject_CallMethod(items, "sort", "OO", Py_None, s->item_sort_key);
2381
+ }
2382
+ else if (PyObject_IsTrue(s->sort_keys)) {
2383
+ /* First sort the keys then replace them with (key, value) tuples. */
2384
+ Py_ssize_t i, nitems;
2385
+ if (PyDict_CheckExact(dct))
2386
+ items = PyDict_Keys(dct);
2387
+ else
2388
+ items = PyMapping_Keys(dct);
2389
+ if (items == NULL)
2390
+ goto bail;
2391
+ if (!PyList_Check(items)) {
2392
+ PyErr_SetString(PyExc_ValueError, "keys must return list");
2393
+ goto bail;
2394
+ }
2395
+ if (PyList_Sort(items) < 0)
2396
+ goto bail;
2397
+ nitems = PyList_GET_SIZE(items);
2398
+ for (i = 0; i < nitems; i++) {
2399
+ PyObject *key, *value;
2400
+ key = PyList_GET_ITEM(items, i);
2401
+ value = PyDict_GetItem(dct, key);
2402
+ item = PyTuple_Pack(2, key, value);
2403
+ if (item == NULL)
2404
+ goto bail;
2405
+ PyList_SET_ITEM(items, i, item);
2406
+ Py_DECREF(key);
2407
+ }
2408
+ }
2409
+ else {
2410
+ if (PyDict_CheckExact(dct))
2411
+ items = PyDict_Items(dct);
2412
+ else
2413
+ items = PyMapping_Items(dct);
2414
+ }
2415
+ if (items == NULL)
2416
+ goto bail;
2417
+ iter = PyObject_GetIter(items);
2418
+ Py_DECREF(items);
2419
+ if (iter == NULL)
2420
+ goto bail;
2421
+
2422
+ skipkeys = PyObject_IsTrue(s->skipkeys);
2423
+ idx = 0;
2424
+ while ((item = PyIter_Next(iter))) {
2425
+ PyObject *encoded, *key, *value;
2426
+ if (!PyTuple_Check(item) || Py_SIZE(item) != 2) {
2427
+ PyErr_SetString(PyExc_ValueError, "items must return 2-tuples");
2428
+ goto bail;
2429
+ }
2430
+ key = PyTuple_GET_ITEM(item, 0);
2431
+ if (key == NULL)
2432
+ goto bail;
2433
+ value = PyTuple_GET_ITEM(item, 1);
2434
+ if (value == NULL)
2435
+ goto bail;
2436
+
2437
+ encoded = PyDict_GetItem(s->key_memo, key);
2438
+ if (encoded != NULL) {
2439
+ Py_INCREF(encoded);
2440
+ }
2441
+ else if (PyString_Check(key) || PyUnicode_Check(key)) {
2442
+ Py_INCREF(key);
2443
+ kstr = key;
2444
+ }
2445
+ else if (PyFloat_Check(key)) {
2446
+ kstr = encoder_encode_float(s, key);
2447
+ if (kstr == NULL)
2448
+ goto bail;
2449
+ }
2450
+ else if (key == Py_True || key == Py_False || key == Py_None) {
2451
+ /* This must come before the PyInt_Check because
2452
+ True and False are also 1 and 0.*/
2453
+ kstr = _encoded_const(key);
2454
+ if (kstr == NULL)
2455
+ goto bail;
2456
+ }
2457
+ else if (PyInt_Check(key) || PyLong_Check(key)) {
2458
+ kstr = PyObject_Str(key);
2459
+ if (kstr == NULL)
2460
+ goto bail;
2461
+ }
2462
+ else if (skipkeys) {
2463
+ Py_DECREF(item);
2464
+ continue;
2465
+ }
2466
+ else {
2467
+ /* TODO: include repr of key */
2468
+ PyErr_SetString(PyExc_TypeError, "keys must be a string");
2469
+ goto bail;
2470
+ }
2471
+
2472
+ if (idx) {
2473
+ if (PyList_Append(rval, s->item_separator))
2474
+ goto bail;
2475
+ }
2476
+
2477
+ if (encoded == NULL) {
2478
+ encoded = encoder_encode_string(s, kstr);
2479
+ Py_CLEAR(kstr);
2480
+ if (encoded == NULL)
2481
+ goto bail;
2482
+ if (PyDict_SetItem(s->key_memo, key, encoded))
2483
+ goto bail;
2484
+ }
2485
+ if (PyList_Append(rval, encoded)) {
2486
+ goto bail;
2487
+ }
2488
+ Py_CLEAR(encoded);
2489
+ if (PyList_Append(rval, s->key_separator))
2490
+ goto bail;
2491
+ if (encoder_listencode_obj(s, rval, value, indent_level))
2492
+ goto bail;
2493
+ Py_CLEAR(item);
2494
+ idx += 1;
2495
+ }
2496
+ Py_CLEAR(iter);
2497
+ if (PyErr_Occurred())
2498
+ goto bail;
2499
+ if (ident != NULL) {
2500
+ if (PyDict_DelItem(s->markers, ident))
2501
+ goto bail;
2502
+ Py_CLEAR(ident);
2503
+ }
2504
+ if (s->indent != Py_None) {
2505
+ /* TODO: DOES NOT RUN */
2506
+ indent_level -= 1;
2507
+ /*
2508
+ yield '\n' + (_indent * _current_indent_level)
2509
+ */
2510
+ }
2511
+ if (PyList_Append(rval, close_dict))
2512
+ goto bail;
2513
+ return 0;
2514
+
2515
+ bail:
2516
+ Py_XDECREF(encoded);
2517
+ Py_XDECREF(items);
2518
+ Py_XDECREF(iter);
2519
+ Py_XDECREF(kstr);
2520
+ Py_XDECREF(ident);
2521
+ return -1;
2522
+ }
2523
+
2524
+
2525
+ static int
2526
+ encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level)
2527
+ {
2528
+ /* Encode Python list seq to a JSON term, rval is a PyList */
2529
+ static PyObject *open_array = NULL;
2530
+ static PyObject *close_array = NULL;
2531
+ static PyObject *empty_array = NULL;
2532
+ PyObject *ident = NULL;
2533
+ PyObject *iter = NULL;
2534
+ PyObject *obj = NULL;
2535
+ int is_true;
2536
+ int i = 0;
2537
+
2538
+ if (open_array == NULL || close_array == NULL || empty_array == NULL) {
2539
+ open_array = PyString_InternFromString("[");
2540
+ close_array = PyString_InternFromString("]");
2541
+ empty_array = PyString_InternFromString("[]");
2542
+ if (open_array == NULL || close_array == NULL || empty_array == NULL)
2543
+ return -1;
2544
+ }
2545
+ ident = NULL;
2546
+ is_true = PyObject_IsTrue(seq);
2547
+ if (is_true == -1)
2548
+ return -1;
2549
+ else if (is_true == 0)
2550
+ return PyList_Append(rval, empty_array);
2551
+
2552
+ if (s->markers != Py_None) {
2553
+ int has_key;
2554
+ ident = PyLong_FromVoidPtr(seq);
2555
+ if (ident == NULL)
2556
+ goto bail;
2557
+ has_key = PyDict_Contains(s->markers, ident);
2558
+ if (has_key) {
2559
+ if (has_key != -1)
2560
+ PyErr_SetString(PyExc_ValueError, "Circular reference detected");
2561
+ goto bail;
2562
+ }
2563
+ if (PyDict_SetItem(s->markers, ident, seq)) {
2564
+ goto bail;
2565
+ }
2566
+ }
2567
+
2568
+ iter = PyObject_GetIter(seq);
2569
+ if (iter == NULL)
2570
+ goto bail;
2571
+
2572
+ if (PyList_Append(rval, open_array))
2573
+ goto bail;
2574
+ if (s->indent != Py_None) {
2575
+ /* TODO: DOES NOT RUN */
2576
+ indent_level += 1;
2577
+ /*
2578
+ newline_indent = '\n' + (_indent * _current_indent_level)
2579
+ separator = _item_separator + newline_indent
2580
+ buf += newline_indent
2581
+ */
2582
+ }
2583
+ while ((obj = PyIter_Next(iter))) {
2584
+ if (i) {
2585
+ if (PyList_Append(rval, s->item_separator))
2586
+ goto bail;
2587
+ }
2588
+ if (encoder_listencode_obj(s, rval, obj, indent_level))
2589
+ goto bail;
2590
+ i++;
2591
+ Py_CLEAR(obj);
2592
+ }
2593
+ Py_CLEAR(iter);
2594
+ if (PyErr_Occurred())
2595
+ goto bail;
2596
+ if (ident != NULL) {
2597
+ if (PyDict_DelItem(s->markers, ident))
2598
+ goto bail;
2599
+ Py_CLEAR(ident);
2600
+ }
2601
+ if (s->indent != Py_None) {
2602
+ /* TODO: DOES NOT RUN */
2603
+ indent_level -= 1;
2604
+ /*
2605
+ yield '\n' + (_indent * _current_indent_level)
2606
+ */
2607
+ }
2608
+ if (PyList_Append(rval, close_array))
2609
+ goto bail;
2610
+ return 0;
2611
+
2612
+ bail:
2613
+ Py_XDECREF(obj);
2614
+ Py_XDECREF(iter);
2615
+ Py_XDECREF(ident);
2616
+ return -1;
2617
+ }
2618
+
2619
+ static void
2620
+ encoder_dealloc(PyObject *self)
2621
+ {
2622
+ /* Deallocate Encoder */
2623
+ encoder_clear(self);
2624
+ Py_TYPE(self)->tp_free(self);
2625
+ }
2626
+
2627
+ static int
2628
+ encoder_traverse(PyObject *self, visitproc visit, void *arg)
2629
+ {
2630
+ PyEncoderObject *s;
2631
+ assert(PyEncoder_Check(self));
2632
+ s = (PyEncoderObject *)self;
2633
+ Py_VISIT(s->markers);
2634
+ Py_VISIT(s->defaultfn);
2635
+ Py_VISIT(s->encoder);
2636
+ Py_VISIT(s->indent);
2637
+ Py_VISIT(s->key_separator);
2638
+ Py_VISIT(s->item_separator);
2639
+ Py_VISIT(s->sort_keys);
2640
+ Py_VISIT(s->skipkeys);
2641
+ Py_VISIT(s->key_memo);
2642
+ Py_VISIT(s->item_sort_key);
2643
+ return 0;
2644
+ }
2645
+
2646
+ static int
2647
+ encoder_clear(PyObject *self)
2648
+ {
2649
+ /* Deallocate Encoder */
2650
+ PyEncoderObject *s;
2651
+ assert(PyEncoder_Check(self));
2652
+ s = (PyEncoderObject *)self;
2653
+ Py_CLEAR(s->markers);
2654
+ Py_CLEAR(s->defaultfn);
2655
+ Py_CLEAR(s->encoder);
2656
+ Py_CLEAR(s->indent);
2657
+ Py_CLEAR(s->key_separator);
2658
+ Py_CLEAR(s->item_separator);
2659
+ Py_CLEAR(s->sort_keys);
2660
+ Py_CLEAR(s->skipkeys);
2661
+ Py_CLEAR(s->key_memo);
2662
+ Py_CLEAR(s->item_sort_key);
2663
+ Py_CLEAR(s->Decimal);
2664
+ return 0;
2665
+ }
2666
+
2667
+ PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable");
2668
+
2669
+ static
2670
+ PyTypeObject PyEncoderType = {
2671
+ PyObject_HEAD_INIT(NULL)
2672
+ 0, /* tp_internal */
2673
+ "simplejson._speedups.Encoder", /* tp_name */
2674
+ sizeof(PyEncoderObject), /* tp_basicsize */
2675
+ 0, /* tp_itemsize */
2676
+ encoder_dealloc, /* tp_dealloc */
2677
+ 0, /* tp_print */
2678
+ 0, /* tp_getattr */
2679
+ 0, /* tp_setattr */
2680
+ 0, /* tp_compare */
2681
+ 0, /* tp_repr */
2682
+ 0, /* tp_as_number */
2683
+ 0, /* tp_as_sequence */
2684
+ 0, /* tp_as_mapping */
2685
+ 0, /* tp_hash */
2686
+ encoder_call, /* tp_call */
2687
+ 0, /* tp_str */
2688
+ 0, /* tp_getattro */
2689
+ 0, /* tp_setattro */
2690
+ 0, /* tp_as_buffer */
2691
+ Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */
2692
+ encoder_doc, /* tp_doc */
2693
+ encoder_traverse, /* tp_traverse */
2694
+ encoder_clear, /* tp_clear */
2695
+ 0, /* tp_richcompare */
2696
+ 0, /* tp_weaklistoffset */
2697
+ 0, /* tp_iter */
2698
+ 0, /* tp_iternext */
2699
+ 0, /* tp_methods */
2700
+ encoder_members, /* tp_members */
2701
+ 0, /* tp_getset */
2702
+ 0, /* tp_base */
2703
+ 0, /* tp_dict */
2704
+ 0, /* tp_descr_get */
2705
+ 0, /* tp_descr_set */
2706
+ 0, /* tp_dictoffset */
2707
+ encoder_init, /* tp_init */
2708
+ 0, /* tp_alloc */
2709
+ encoder_new, /* tp_new */
2710
+ 0, /* tp_free */
2711
+ };
2712
+
2713
+ static PyMethodDef speedups_methods[] = {
2714
+ {"encode_basestring_ascii",
2715
+ (PyCFunction)py_encode_basestring_ascii,
2716
+ METH_O,
2717
+ pydoc_encode_basestring_ascii},
2718
+ {"scanstring",
2719
+ (PyCFunction)py_scanstring,
2720
+ METH_VARARGS,
2721
+ pydoc_scanstring},
2722
+ {NULL, NULL, 0, NULL}
2723
+ };
2724
+
2725
+ PyDoc_STRVAR(module_doc,
2726
+ "simplejson speedups\n");
2727
+
2728
+ void
2729
+ init_speedups(void)
2730
+ {
2731
+ PyObject *m;
2732
+ PyScannerType.tp_new = PyType_GenericNew;
2733
+ if (PyType_Ready(&PyScannerType) < 0)
2734
+ return;
2735
+ PyEncoderType.tp_new = PyType_GenericNew;
2736
+ if (PyType_Ready(&PyEncoderType) < 0)
2737
+ return;
2738
+
2739
+
2740
+ m = Py_InitModule3("_speedups", speedups_methods, module_doc);
2741
+ Py_INCREF((PyObject*)&PyScannerType);
2742
+ PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType);
2743
+ Py_INCREF((PyObject*)&PyEncoderType);
2744
+ PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType);
2745
+ }