pygments.rb 0.2.13 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. data/.gitignore +1 -0
  2. data/README.md +45 -19
  3. data/Rakefile +21 -11
  4. data/bench.rb +15 -48
  5. data/cache-lexers.rb +8 -0
  6. data/lexers +0 -0
  7. data/lib/pygments.rb +3 -6
  8. data/lib/pygments/mentos.py +343 -0
  9. data/lib/pygments/popen.rb +383 -0
  10. data/lib/pygments/version.rb +1 -1
  11. data/pygments.rb.gemspec +5 -4
  12. data/test/test_data.c +2581 -0
  13. data/test/test_data.py +514 -0
  14. data/test/test_data_generated +2582 -0
  15. data/test/test_pygments.rb +208 -84
  16. data/vendor/pygments-main/pygments/lexers/_mapping.py +1 -1
  17. data/vendor/pygments-main/pygments/lexers/shell.py +1 -1
  18. data/vendor/simplejson/.gitignore +10 -0
  19. data/vendor/simplejson/.travis.yml +5 -0
  20. data/vendor/simplejson/CHANGES.txt +291 -0
  21. data/vendor/simplejson/LICENSE.txt +19 -0
  22. data/vendor/simplejson/MANIFEST.in +5 -0
  23. data/vendor/simplejson/README.rst +19 -0
  24. data/vendor/simplejson/conf.py +179 -0
  25. data/vendor/simplejson/index.rst +628 -0
  26. data/vendor/simplejson/scripts/make_docs.py +18 -0
  27. data/vendor/simplejson/setup.py +104 -0
  28. data/vendor/simplejson/simplejson/__init__.py +510 -0
  29. data/vendor/simplejson/simplejson/_speedups.c +2745 -0
  30. data/vendor/simplejson/simplejson/decoder.py +425 -0
  31. data/vendor/simplejson/simplejson/encoder.py +567 -0
  32. data/vendor/simplejson/simplejson/ordered_dict.py +119 -0
  33. data/vendor/simplejson/simplejson/scanner.py +77 -0
  34. data/vendor/simplejson/simplejson/tests/__init__.py +67 -0
  35. data/vendor/simplejson/simplejson/tests/test_bigint_as_string.py +55 -0
  36. data/vendor/simplejson/simplejson/tests/test_check_circular.py +30 -0
  37. data/vendor/simplejson/simplejson/tests/test_decimal.py +66 -0
  38. data/vendor/simplejson/simplejson/tests/test_decode.py +83 -0
  39. data/vendor/simplejson/simplejson/tests/test_default.py +9 -0
  40. data/vendor/simplejson/simplejson/tests/test_dump.py +67 -0
  41. data/vendor/simplejson/simplejson/tests/test_encode_basestring_ascii.py +46 -0
  42. data/vendor/simplejson/simplejson/tests/test_encode_for_html.py +32 -0
  43. data/vendor/simplejson/simplejson/tests/test_errors.py +34 -0
  44. data/vendor/simplejson/simplejson/tests/test_fail.py +91 -0
  45. data/vendor/simplejson/simplejson/tests/test_float.py +19 -0
  46. data/vendor/simplejson/simplejson/tests/test_indent.py +86 -0
  47. data/vendor/simplejson/simplejson/tests/test_item_sort_key.py +20 -0
  48. data/vendor/simplejson/simplejson/tests/test_namedtuple.py +121 -0
  49. data/vendor/simplejson/simplejson/tests/test_pass1.py +76 -0
  50. data/vendor/simplejson/simplejson/tests/test_pass2.py +14 -0
  51. data/vendor/simplejson/simplejson/tests/test_pass3.py +20 -0
  52. data/vendor/simplejson/simplejson/tests/test_recursion.py +67 -0
  53. data/vendor/simplejson/simplejson/tests/test_scanstring.py +117 -0
  54. data/vendor/simplejson/simplejson/tests/test_separators.py +42 -0
  55. data/vendor/simplejson/simplejson/tests/test_speedups.py +20 -0
  56. data/vendor/simplejson/simplejson/tests/test_tuple.py +49 -0
  57. data/vendor/simplejson/simplejson/tests/test_unicode.py +109 -0
  58. data/vendor/simplejson/simplejson/tool.py +39 -0
  59. metadata +80 -22
  60. data/ext/extconf.rb +0 -14
  61. data/ext/pygments.c +0 -466
  62. data/lib/pygments/c.rb +0 -54
  63. data/lib/pygments/ffi.rb +0 -155
  64. data/vendor/.gitignore +0 -1
@@ -0,0 +1,425 @@
1
+ """Implementation of JSONDecoder
2
+ """
3
+ import re
4
+ import sys
5
+ import struct
6
+
7
+ from simplejson.scanner import make_scanner
8
+ def _import_c_scanstring():
9
+ try:
10
+ from simplejson._speedups import scanstring
11
+ return scanstring
12
+ except ImportError:
13
+ return None
14
+ c_scanstring = _import_c_scanstring()
15
+
16
+ __all__ = ['JSONDecoder']
17
+
18
+ FLAGS = re.VERBOSE | re.MULTILINE | re.DOTALL
19
+
20
+ def _floatconstants():
21
+ _BYTES = '7FF80000000000007FF0000000000000'.decode('hex')
22
+ # The struct module in Python 2.4 would get frexp() out of range here
23
+ # when an endian is specified in the format string. Fixed in Python 2.5+
24
+ if sys.byteorder != 'big':
25
+ _BYTES = _BYTES[:8][::-1] + _BYTES[8:][::-1]
26
+ nan, inf = struct.unpack('dd', _BYTES)
27
+ return nan, inf, -inf
28
+
29
+ NaN, PosInf, NegInf = _floatconstants()
30
+
31
+
32
+ class JSONDecodeError(ValueError):
33
+ """Subclass of ValueError with the following additional properties:
34
+
35
+ msg: The unformatted error message
36
+ doc: The JSON document being parsed
37
+ pos: The start index of doc where parsing failed
38
+ end: The end index of doc where parsing failed (may be None)
39
+ lineno: The line corresponding to pos
40
+ colno: The column corresponding to pos
41
+ endlineno: The line corresponding to end (may be None)
42
+ endcolno: The column corresponding to end (may be None)
43
+
44
+ """
45
+ def __init__(self, msg, doc, pos, end=None):
46
+ ValueError.__init__(self, errmsg(msg, doc, pos, end=end))
47
+ self.msg = msg
48
+ self.doc = doc
49
+ self.pos = pos
50
+ self.end = end
51
+ self.lineno, self.colno = linecol(doc, pos)
52
+ if end is not None:
53
+ self.endlineno, self.endcolno = linecol(doc, end)
54
+ else:
55
+ self.endlineno, self.endcolno = None, None
56
+
57
+
58
+ def linecol(doc, pos):
59
+ lineno = doc.count('\n', 0, pos) + 1
60
+ if lineno == 1:
61
+ colno = pos
62
+ else:
63
+ colno = pos - doc.rindex('\n', 0, pos)
64
+ return lineno, colno
65
+
66
+
67
+ def errmsg(msg, doc, pos, end=None):
68
+ # Note that this function is called from _speedups
69
+ lineno, colno = linecol(doc, pos)
70
+ if end is None:
71
+ #fmt = '{0}: line {1} column {2} (char {3})'
72
+ #return fmt.format(msg, lineno, colno, pos)
73
+ fmt = '%s: line %d column %d (char %d)'
74
+ return fmt % (msg, lineno, colno, pos)
75
+ endlineno, endcolno = linecol(doc, end)
76
+ #fmt = '{0}: line {1} column {2} - line {3} column {4} (char {5} - {6})'
77
+ #return fmt.format(msg, lineno, colno, endlineno, endcolno, pos, end)
78
+ fmt = '%s: line %d column %d - line %d column %d (char %d - %d)'
79
+ return fmt % (msg, lineno, colno, endlineno, endcolno, pos, end)
80
+
81
+
82
+ _CONSTANTS = {
83
+ '-Infinity': NegInf,
84
+ 'Infinity': PosInf,
85
+ 'NaN': NaN,
86
+ }
87
+
88
+ STRINGCHUNK = re.compile(r'(.*?)(["\\\x00-\x1f])', FLAGS)
89
+ BACKSLASH = {
90
+ '"': u'"', '\\': u'\\', '/': u'/',
91
+ 'b': u'\b', 'f': u'\f', 'n': u'\n', 'r': u'\r', 't': u'\t',
92
+ }
93
+
94
+ DEFAULT_ENCODING = "utf-8"
95
+
96
+ def py_scanstring(s, end, encoding=None, strict=True,
97
+ _b=BACKSLASH, _m=STRINGCHUNK.match):
98
+ """Scan the string s for a JSON string. End is the index of the
99
+ character in s after the quote that started the JSON string.
100
+ Unescapes all valid JSON string escape sequences and raises ValueError
101
+ on attempt to decode an invalid string. If strict is False then literal
102
+ control characters are allowed in the string.
103
+
104
+ Returns a tuple of the decoded string and the index of the character in s
105
+ after the end quote."""
106
+ if encoding is None:
107
+ encoding = DEFAULT_ENCODING
108
+ chunks = []
109
+ _append = chunks.append
110
+ begin = end - 1
111
+ while 1:
112
+ chunk = _m(s, end)
113
+ if chunk is None:
114
+ raise JSONDecodeError(
115
+ "Unterminated string starting at", s, begin)
116
+ end = chunk.end()
117
+ content, terminator = chunk.groups()
118
+ # Content is contains zero or more unescaped string characters
119
+ if content:
120
+ if not isinstance(content, unicode):
121
+ content = unicode(content, encoding)
122
+ _append(content)
123
+ # Terminator is the end of string, a literal control character,
124
+ # or a backslash denoting that an escape sequence follows
125
+ if terminator == '"':
126
+ break
127
+ elif terminator != '\\':
128
+ if strict:
129
+ msg = "Invalid control character %r at" % (terminator,)
130
+ #msg = "Invalid control character {0!r} at".format(terminator)
131
+ raise JSONDecodeError(msg, s, end)
132
+ else:
133
+ _append(terminator)
134
+ continue
135
+ try:
136
+ esc = s[end]
137
+ except IndexError:
138
+ raise JSONDecodeError(
139
+ "Unterminated string starting at", s, begin)
140
+ # If not a unicode escape sequence, must be in the lookup table
141
+ if esc != 'u':
142
+ try:
143
+ char = _b[esc]
144
+ except KeyError:
145
+ msg = "Invalid \\escape: " + repr(esc)
146
+ raise JSONDecodeError(msg, s, end)
147
+ end += 1
148
+ else:
149
+ # Unicode escape sequence
150
+ esc = s[end + 1:end + 5]
151
+ next_end = end + 5
152
+ if len(esc) != 4:
153
+ msg = "Invalid \\uXXXX escape"
154
+ raise JSONDecodeError(msg, s, end)
155
+ uni = int(esc, 16)
156
+ # Check for surrogate pair on UCS-4 systems
157
+ if 0xd800 <= uni <= 0xdbff and sys.maxunicode > 65535:
158
+ msg = "Invalid \\uXXXX\\uXXXX surrogate pair"
159
+ if not s[end + 5:end + 7] == '\\u':
160
+ raise JSONDecodeError(msg, s, end)
161
+ esc2 = s[end + 7:end + 11]
162
+ if len(esc2) != 4:
163
+ raise JSONDecodeError(msg, s, end)
164
+ uni2 = int(esc2, 16)
165
+ uni = 0x10000 + (((uni - 0xd800) << 10) | (uni2 - 0xdc00))
166
+ next_end += 6
167
+ char = unichr(uni)
168
+ end = next_end
169
+ # Append the unescaped character
170
+ _append(char)
171
+ return u''.join(chunks), end
172
+
173
+
174
+ # Use speedup if available
175
+ scanstring = c_scanstring or py_scanstring
176
+
177
+ WHITESPACE = re.compile(r'[ \t\n\r]*', FLAGS)
178
+ WHITESPACE_STR = ' \t\n\r'
179
+
180
+ def JSONObject((s, end), encoding, strict, scan_once, object_hook,
181
+ object_pairs_hook, memo=None,
182
+ _w=WHITESPACE.match, _ws=WHITESPACE_STR):
183
+ # Backwards compatibility
184
+ if memo is None:
185
+ memo = {}
186
+ memo_get = memo.setdefault
187
+ pairs = []
188
+ # Use a slice to prevent IndexError from being raised, the following
189
+ # check will raise a more specific ValueError if the string is empty
190
+ nextchar = s[end:end + 1]
191
+ # Normally we expect nextchar == '"'
192
+ if nextchar != '"':
193
+ if nextchar in _ws:
194
+ end = _w(s, end).end()
195
+ nextchar = s[end:end + 1]
196
+ # Trivial empty object
197
+ if nextchar == '}':
198
+ if object_pairs_hook is not None:
199
+ result = object_pairs_hook(pairs)
200
+ return result, end + 1
201
+ pairs = {}
202
+ if object_hook is not None:
203
+ pairs = object_hook(pairs)
204
+ return pairs, end + 1
205
+ elif nextchar != '"':
206
+ raise JSONDecodeError(
207
+ "Expecting property name enclosed in double quotes",
208
+ s, end)
209
+ end += 1
210
+ while True:
211
+ key, end = scanstring(s, end, encoding, strict)
212
+ key = memo_get(key, key)
213
+
214
+ # To skip some function call overhead we optimize the fast paths where
215
+ # the JSON key separator is ": " or just ":".
216
+ if s[end:end + 1] != ':':
217
+ end = _w(s, end).end()
218
+ if s[end:end + 1] != ':':
219
+ raise JSONDecodeError("Expecting ':' delimiter", s, end)
220
+
221
+ end += 1
222
+
223
+ try:
224
+ if s[end] in _ws:
225
+ end += 1
226
+ if s[end] in _ws:
227
+ end = _w(s, end + 1).end()
228
+ except IndexError:
229
+ pass
230
+
231
+ try:
232
+ value, end = scan_once(s, end)
233
+ except StopIteration:
234
+ raise JSONDecodeError("Expecting object", s, end)
235
+ pairs.append((key, value))
236
+
237
+ try:
238
+ nextchar = s[end]
239
+ if nextchar in _ws:
240
+ end = _w(s, end + 1).end()
241
+ nextchar = s[end]
242
+ except IndexError:
243
+ nextchar = ''
244
+ end += 1
245
+
246
+ if nextchar == '}':
247
+ break
248
+ elif nextchar != ',':
249
+ raise JSONDecodeError("Expecting ',' delimiter", s, end - 1)
250
+
251
+ try:
252
+ nextchar = s[end]
253
+ if nextchar in _ws:
254
+ end += 1
255
+ nextchar = s[end]
256
+ if nextchar in _ws:
257
+ end = _w(s, end + 1).end()
258
+ nextchar = s[end]
259
+ except IndexError:
260
+ nextchar = ''
261
+
262
+ end += 1
263
+ if nextchar != '"':
264
+ raise JSONDecodeError(
265
+ "Expecting property name enclosed in double quotes",
266
+ s, end - 1)
267
+
268
+ if object_pairs_hook is not None:
269
+ result = object_pairs_hook(pairs)
270
+ return result, end
271
+ pairs = dict(pairs)
272
+ if object_hook is not None:
273
+ pairs = object_hook(pairs)
274
+ return pairs, end
275
+
276
+ def JSONArray((s, end), scan_once, _w=WHITESPACE.match, _ws=WHITESPACE_STR):
277
+ values = []
278
+ nextchar = s[end:end + 1]
279
+ if nextchar in _ws:
280
+ end = _w(s, end + 1).end()
281
+ nextchar = s[end:end + 1]
282
+ # Look-ahead for trivial empty array
283
+ if nextchar == ']':
284
+ return values, end + 1
285
+ _append = values.append
286
+ while True:
287
+ try:
288
+ value, end = scan_once(s, end)
289
+ except StopIteration:
290
+ raise JSONDecodeError("Expecting object", s, end)
291
+ _append(value)
292
+ nextchar = s[end:end + 1]
293
+ if nextchar in _ws:
294
+ end = _w(s, end + 1).end()
295
+ nextchar = s[end:end + 1]
296
+ end += 1
297
+ if nextchar == ']':
298
+ break
299
+ elif nextchar != ',':
300
+ raise JSONDecodeError("Expecting ',' delimiter", s, end)
301
+
302
+ try:
303
+ if s[end] in _ws:
304
+ end += 1
305
+ if s[end] in _ws:
306
+ end = _w(s, end + 1).end()
307
+ except IndexError:
308
+ pass
309
+
310
+ return values, end
311
+
312
+ class JSONDecoder(object):
313
+ """Simple JSON <http://json.org> decoder
314
+
315
+ Performs the following translations in decoding by default:
316
+
317
+ +---------------+-------------------+
318
+ | JSON | Python |
319
+ +===============+===================+
320
+ | object | dict |
321
+ +---------------+-------------------+
322
+ | array | list |
323
+ +---------------+-------------------+
324
+ | string | unicode |
325
+ +---------------+-------------------+
326
+ | number (int) | int, long |
327
+ +---------------+-------------------+
328
+ | number (real) | float |
329
+ +---------------+-------------------+
330
+ | true | True |
331
+ +---------------+-------------------+
332
+ | false | False |
333
+ +---------------+-------------------+
334
+ | null | None |
335
+ +---------------+-------------------+
336
+
337
+ It also understands ``NaN``, ``Infinity``, and ``-Infinity`` as
338
+ their corresponding ``float`` values, which is outside the JSON spec.
339
+
340
+ """
341
+
342
+ def __init__(self, encoding=None, object_hook=None, parse_float=None,
343
+ parse_int=None, parse_constant=None, strict=True,
344
+ object_pairs_hook=None):
345
+ """
346
+ *encoding* determines the encoding used to interpret any
347
+ :class:`str` objects decoded by this instance (``'utf-8'`` by
348
+ default). It has no effect when decoding :class:`unicode` objects.
349
+
350
+ Note that currently only encodings that are a superset of ASCII work,
351
+ strings of other encodings should be passed in as :class:`unicode`.
352
+
353
+ *object_hook*, if specified, will be called with the result of every
354
+ JSON object decoded and its return value will be used in place of the
355
+ given :class:`dict`. This can be used to provide custom
356
+ deserializations (e.g. to support JSON-RPC class hinting).
357
+
358
+ *object_pairs_hook* is an optional function that will be called with
359
+ the result of any object literal decode with an ordered list of pairs.
360
+ The return value of *object_pairs_hook* will be used instead of the
361
+ :class:`dict`. This feature can be used to implement custom decoders
362
+ that rely on the order that the key and value pairs are decoded (for
363
+ example, :func:`collections.OrderedDict` will remember the order of
364
+ insertion). If *object_hook* is also defined, the *object_pairs_hook*
365
+ takes priority.
366
+
367
+ *parse_float*, if specified, will be called with the string of every
368
+ JSON float to be decoded. By default, this is equivalent to
369
+ ``float(num_str)``. This can be used to use another datatype or parser
370
+ for JSON floats (e.g. :class:`decimal.Decimal`).
371
+
372
+ *parse_int*, if specified, will be called with the string of every
373
+ JSON int to be decoded. By default, this is equivalent to
374
+ ``int(num_str)``. This can be used to use another datatype or parser
375
+ for JSON integers (e.g. :class:`float`).
376
+
377
+ *parse_constant*, if specified, will be called with one of the
378
+ following strings: ``'-Infinity'``, ``'Infinity'``, ``'NaN'``. This
379
+ can be used to raise an exception if invalid JSON numbers are
380
+ encountered.
381
+
382
+ *strict* controls the parser's behavior when it encounters an
383
+ invalid control character in a string. The default setting of
384
+ ``True`` means that unescaped control characters are parse errors, if
385
+ ``False`` then control characters will be allowed in strings.
386
+
387
+ """
388
+ self.encoding = encoding
389
+ self.object_hook = object_hook
390
+ self.object_pairs_hook = object_pairs_hook
391
+ self.parse_float = parse_float or float
392
+ self.parse_int = parse_int or int
393
+ self.parse_constant = parse_constant or _CONSTANTS.__getitem__
394
+ self.strict = strict
395
+ self.parse_object = JSONObject
396
+ self.parse_array = JSONArray
397
+ self.parse_string = scanstring
398
+ self.memo = {}
399
+ self.scan_once = make_scanner(self)
400
+
401
+ def decode(self, s, _w=WHITESPACE.match):
402
+ """Return the Python representation of ``s`` (a ``str`` or ``unicode``
403
+ instance containing a JSON document)
404
+
405
+ """
406
+ obj, end = self.raw_decode(s, idx=_w(s, 0).end())
407
+ end = _w(s, end).end()
408
+ if end != len(s):
409
+ raise JSONDecodeError("Extra data", s, end, len(s))
410
+ return obj
411
+
412
+ def raw_decode(self, s, idx=0):
413
+ """Decode a JSON document from ``s`` (a ``str`` or ``unicode``
414
+ beginning with a JSON document) and return a 2-tuple of the Python
415
+ representation and the index in ``s`` where the document ended.
416
+
417
+ This can be used to decode a JSON document from a string that may
418
+ have extraneous data at the end.
419
+
420
+ """
421
+ try:
422
+ obj, end = self.scan_once(s, idx)
423
+ except StopIteration:
424
+ raise JSONDecodeError("No JSON object could be decoded", s, idx)
425
+ return obj, end
@@ -0,0 +1,567 @@
1
+ """Implementation of JSONEncoder
2
+ """
3
+ import re
4
+ from decimal import Decimal
5
+
6
+ def _import_speedups():
7
+ try:
8
+ from simplejson import _speedups
9
+ return _speedups.encode_basestring_ascii, _speedups.make_encoder
10
+ except ImportError:
11
+ return None, None
12
+ c_encode_basestring_ascii, c_make_encoder = _import_speedups()
13
+
14
+ from simplejson.decoder import PosInf
15
+
16
+ ESCAPE = re.compile(ur'[\x00-\x1f\\"\b\f\n\r\t\u2028\u2029]')
17
+ ESCAPE_ASCII = re.compile(r'([\\"]|[^\ -~])')
18
+ HAS_UTF8 = re.compile(r'[\x80-\xff]')
19
+ ESCAPE_DCT = {
20
+ '\\': '\\\\',
21
+ '"': '\\"',
22
+ '\b': '\\b',
23
+ '\f': '\\f',
24
+ '\n': '\\n',
25
+ '\r': '\\r',
26
+ '\t': '\\t',
27
+ u'\u2028': '\\u2028',
28
+ u'\u2029': '\\u2029',
29
+ }
30
+ for i in range(0x20):
31
+ #ESCAPE_DCT.setdefault(chr(i), '\\u{0:04x}'.format(i))
32
+ ESCAPE_DCT.setdefault(chr(i), '\\u%04x' % (i,))
33
+
34
+ FLOAT_REPR = repr
35
+
36
+ def encode_basestring(s):
37
+ """Return a JSON representation of a Python string
38
+
39
+ """
40
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
41
+ s = s.decode('utf-8')
42
+ def replace(match):
43
+ return ESCAPE_DCT[match.group(0)]
44
+ return u'"' + ESCAPE.sub(replace, s) + u'"'
45
+
46
+
47
+ def py_encode_basestring_ascii(s):
48
+ """Return an ASCII-only JSON representation of a Python string
49
+
50
+ """
51
+ if isinstance(s, str) and HAS_UTF8.search(s) is not None:
52
+ s = s.decode('utf-8')
53
+ def replace(match):
54
+ s = match.group(0)
55
+ try:
56
+ return ESCAPE_DCT[s]
57
+ except KeyError:
58
+ n = ord(s)
59
+ if n < 0x10000:
60
+ #return '\\u{0:04x}'.format(n)
61
+ return '\\u%04x' % (n,)
62
+ else:
63
+ # surrogate pair
64
+ n -= 0x10000
65
+ s1 = 0xd800 | ((n >> 10) & 0x3ff)
66
+ s2 = 0xdc00 | (n & 0x3ff)
67
+ #return '\\u{0:04x}\\u{1:04x}'.format(s1, s2)
68
+ return '\\u%04x\\u%04x' % (s1, s2)
69
+ return '"' + str(ESCAPE_ASCII.sub(replace, s)) + '"'
70
+
71
+
72
+ encode_basestring_ascii = (
73
+ c_encode_basestring_ascii or py_encode_basestring_ascii)
74
+
75
+ class JSONEncoder(object):
76
+ """Extensible JSON <http://json.org> encoder for Python data structures.
77
+
78
+ Supports the following objects and types by default:
79
+
80
+ +-------------------+---------------+
81
+ | Python | JSON |
82
+ +===================+===============+
83
+ | dict, namedtuple | object |
84
+ +-------------------+---------------+
85
+ | list, tuple | array |
86
+ +-------------------+---------------+
87
+ | str, unicode | string |
88
+ +-------------------+---------------+
89
+ | int, long, float | number |
90
+ +-------------------+---------------+
91
+ | True | true |
92
+ +-------------------+---------------+
93
+ | False | false |
94
+ +-------------------+---------------+
95
+ | None | null |
96
+ +-------------------+---------------+
97
+
98
+ To extend this to recognize other objects, subclass and implement a
99
+ ``.default()`` method with another method that returns a serializable
100
+ object for ``o`` if possible, otherwise it should call the superclass
101
+ implementation (to raise ``TypeError``).
102
+
103
+ """
104
+ item_separator = ', '
105
+ key_separator = ': '
106
+ def __init__(self, skipkeys=False, ensure_ascii=True,
107
+ check_circular=True, allow_nan=True, sort_keys=False,
108
+ indent=None, separators=None, encoding='utf-8', default=None,
109
+ use_decimal=True, namedtuple_as_object=True,
110
+ tuple_as_array=True, bigint_as_string=False,
111
+ item_sort_key=None):
112
+ """Constructor for JSONEncoder, with sensible defaults.
113
+
114
+ If skipkeys is false, then it is a TypeError to attempt
115
+ encoding of keys that are not str, int, long, float or None. If
116
+ skipkeys is True, such items are simply skipped.
117
+
118
+ If ensure_ascii is true, the output is guaranteed to be str
119
+ objects with all incoming unicode characters escaped. If
120
+ ensure_ascii is false, the output will be unicode object.
121
+
122
+ If check_circular is true, then lists, dicts, and custom encoded
123
+ objects will be checked for circular references during encoding to
124
+ prevent an infinite recursion (which would cause an OverflowError).
125
+ Otherwise, no such check takes place.
126
+
127
+ If allow_nan is true, then NaN, Infinity, and -Infinity will be
128
+ encoded as such. This behavior is not JSON specification compliant,
129
+ but is consistent with most JavaScript based encoders and decoders.
130
+ Otherwise, it will be a ValueError to encode such floats.
131
+
132
+ If sort_keys is true, then the output of dictionaries will be
133
+ sorted by key; this is useful for regression tests to ensure
134
+ that JSON serializations can be compared on a day-to-day basis.
135
+
136
+ If indent is a string, then JSON array elements and object members
137
+ will be pretty-printed with a newline followed by that string repeated
138
+ for each level of nesting. ``None`` (the default) selects the most compact
139
+ representation without any newlines. For backwards compatibility with
140
+ versions of simplejson earlier than 2.1.0, an integer is also accepted
141
+ and is converted to a string with that many spaces.
142
+
143
+ If specified, separators should be a (item_separator, key_separator)
144
+ tuple. The default is (', ', ': '). To get the most compact JSON
145
+ representation you should specify (',', ':') to eliminate whitespace.
146
+
147
+ If specified, default is a function that gets called for objects
148
+ that can't otherwise be serialized. It should return a JSON encodable
149
+ version of the object or raise a ``TypeError``.
150
+
151
+ If encoding is not None, then all input strings will be
152
+ transformed into unicode using that encoding prior to JSON-encoding.
153
+ The default is UTF-8.
154
+
155
+ If use_decimal is true (not the default), ``decimal.Decimal`` will
156
+ be supported directly by the encoder. For the inverse, decode JSON
157
+ with ``parse_float=decimal.Decimal``.
158
+
159
+ If namedtuple_as_object is true (the default), objects with
160
+ ``_asdict()`` methods will be encoded as JSON objects.
161
+
162
+ If tuple_as_array is true (the default), tuple (and subclasses) will
163
+ be encoded as JSON arrays.
164
+
165
+ If bigint_as_string is true (not the default), ints 2**53 and higher
166
+ or lower than -2**53 will be encoded as strings. This is to avoid the
167
+ rounding that happens in Javascript otherwise.
168
+
169
+ If specified, item_sort_key is a callable used to sort the items in
170
+ each dictionary. This is useful if you want to sort items other than
171
+ in alphabetical order by key.
172
+ """
173
+
174
+ self.skipkeys = skipkeys
175
+ self.ensure_ascii = ensure_ascii
176
+ self.check_circular = check_circular
177
+ self.allow_nan = allow_nan
178
+ self.sort_keys = sort_keys
179
+ self.use_decimal = use_decimal
180
+ self.namedtuple_as_object = namedtuple_as_object
181
+ self.tuple_as_array = tuple_as_array
182
+ self.bigint_as_string = bigint_as_string
183
+ self.item_sort_key = item_sort_key
184
+ if indent is not None and not isinstance(indent, basestring):
185
+ indent = indent * ' '
186
+ self.indent = indent
187
+ if separators is not None:
188
+ self.item_separator, self.key_separator = separators
189
+ elif indent is not None:
190
+ self.item_separator = ','
191
+ if default is not None:
192
+ self.default = default
193
+ self.encoding = encoding
194
+
195
+ def default(self, o):
196
+ """Implement this method in a subclass such that it returns
197
+ a serializable object for ``o``, or calls the base implementation
198
+ (to raise a ``TypeError``).
199
+
200
+ For example, to support arbitrary iterators, you could
201
+ implement default like this::
202
+
203
+ def default(self, o):
204
+ try:
205
+ iterable = iter(o)
206
+ except TypeError:
207
+ pass
208
+ else:
209
+ return list(iterable)
210
+ return JSONEncoder.default(self, o)
211
+
212
+ """
213
+ raise TypeError(repr(o) + " is not JSON serializable")
214
+
215
+ def encode(self, o):
216
+ """Return a JSON string representation of a Python data structure.
217
+
218
+ >>> from simplejson import JSONEncoder
219
+ >>> JSONEncoder().encode({"foo": ["bar", "baz"]})
220
+ '{"foo": ["bar", "baz"]}'
221
+
222
+ """
223
+ # This is for extremely simple cases and benchmarks.
224
+ if isinstance(o, basestring):
225
+ if isinstance(o, str):
226
+ _encoding = self.encoding
227
+ if (_encoding is not None
228
+ and not (_encoding == 'utf-8')):
229
+ o = o.decode(_encoding)
230
+ if self.ensure_ascii:
231
+ return encode_basestring_ascii(o)
232
+ else:
233
+ return encode_basestring(o)
234
+ # This doesn't pass the iterator directly to ''.join() because the
235
+ # exceptions aren't as detailed. The list call should be roughly
236
+ # equivalent to the PySequence_Fast that ''.join() would do.
237
+ chunks = self.iterencode(o, _one_shot=True)
238
+ if not isinstance(chunks, (list, tuple)):
239
+ chunks = list(chunks)
240
+ if self.ensure_ascii:
241
+ return ''.join(chunks)
242
+ else:
243
+ return u''.join(chunks)
244
+
245
+ def iterencode(self, o, _one_shot=False):
246
+ """Encode the given object and yield each string
247
+ representation as available.
248
+
249
+ For example::
250
+
251
+ for chunk in JSONEncoder().iterencode(bigobject):
252
+ mysocket.write(chunk)
253
+
254
+ """
255
+ if self.check_circular:
256
+ markers = {}
257
+ else:
258
+ markers = None
259
+ if self.ensure_ascii:
260
+ _encoder = encode_basestring_ascii
261
+ else:
262
+ _encoder = encode_basestring
263
+ if self.encoding != 'utf-8':
264
+ def _encoder(o, _orig_encoder=_encoder, _encoding=self.encoding):
265
+ if isinstance(o, str):
266
+ o = o.decode(_encoding)
267
+ return _orig_encoder(o)
268
+
269
+ def floatstr(o, allow_nan=self.allow_nan,
270
+ _repr=FLOAT_REPR, _inf=PosInf, _neginf=-PosInf):
271
+ # Check for specials. Note that this type of test is processor
272
+ # and/or platform-specific, so do tests which don't depend on
273
+ # the internals.
274
+
275
+ if o != o:
276
+ text = 'NaN'
277
+ elif o == _inf:
278
+ text = 'Infinity'
279
+ elif o == _neginf:
280
+ text = '-Infinity'
281
+ else:
282
+ return _repr(o)
283
+
284
+ if not allow_nan:
285
+ raise ValueError(
286
+ "Out of range float values are not JSON compliant: " +
287
+ repr(o))
288
+
289
+ return text
290
+
291
+
292
+ key_memo = {}
293
+ if (_one_shot and c_make_encoder is not None
294
+ and self.indent is None):
295
+ _iterencode = c_make_encoder(
296
+ markers, self.default, _encoder, self.indent,
297
+ self.key_separator, self.item_separator, self.sort_keys,
298
+ self.skipkeys, self.allow_nan, key_memo, self.use_decimal,
299
+ self.namedtuple_as_object, self.tuple_as_array,
300
+ self.bigint_as_string, self.item_sort_key,
301
+ Decimal)
302
+ else:
303
+ _iterencode = _make_iterencode(
304
+ markers, self.default, _encoder, self.indent, floatstr,
305
+ self.key_separator, self.item_separator, self.sort_keys,
306
+ self.skipkeys, _one_shot, self.use_decimal,
307
+ self.namedtuple_as_object, self.tuple_as_array,
308
+ self.bigint_as_string, self.item_sort_key,
309
+ Decimal=Decimal)
310
+ try:
311
+ return _iterencode(o, 0)
312
+ finally:
313
+ key_memo.clear()
314
+
315
+
316
+ class JSONEncoderForHTML(JSONEncoder):
317
+ """An encoder that produces JSON safe to embed in HTML.
318
+
319
+ To embed JSON content in, say, a script tag on a web page, the
320
+ characters &, < and > should be escaped. They cannot be escaped
321
+ with the usual entities (e.g. &amp;) because they are not expanded
322
+ within <script> tags.
323
+ """
324
+
325
+ def encode(self, o):
326
+ # Override JSONEncoder.encode because it has hacks for
327
+ # performance that make things more complicated.
328
+ chunks = self.iterencode(o, True)
329
+ if self.ensure_ascii:
330
+ return ''.join(chunks)
331
+ else:
332
+ return u''.join(chunks)
333
+
334
+ def iterencode(self, o, _one_shot=False):
335
+ chunks = super(JSONEncoderForHTML, self).iterencode(o, _one_shot)
336
+ for chunk in chunks:
337
+ chunk = chunk.replace('&', '\\u0026')
338
+ chunk = chunk.replace('<', '\\u003c')
339
+ chunk = chunk.replace('>', '\\u003e')
340
+ yield chunk
341
+
342
+
343
+ def _make_iterencode(markers, _default, _encoder, _indent, _floatstr,
344
+ _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot,
345
+ _use_decimal, _namedtuple_as_object, _tuple_as_array,
346
+ _bigint_as_string, _item_sort_key,
347
+ ## HACK: hand-optimized bytecode; turn globals into locals
348
+ False=False,
349
+ True=True,
350
+ ValueError=ValueError,
351
+ basestring=basestring,
352
+ Decimal=Decimal,
353
+ dict=dict,
354
+ float=float,
355
+ id=id,
356
+ int=int,
357
+ isinstance=isinstance,
358
+ list=list,
359
+ long=long,
360
+ str=str,
361
+ tuple=tuple,
362
+ ):
363
+ if _item_sort_key and not callable(_item_sort_key):
364
+ raise TypeError("item_sort_key must be None or callable")
365
+
366
+ def _iterencode_list(lst, _current_indent_level):
367
+ if not lst:
368
+ yield '[]'
369
+ return
370
+ if markers is not None:
371
+ markerid = id(lst)
372
+ if markerid in markers:
373
+ raise ValueError("Circular reference detected")
374
+ markers[markerid] = lst
375
+ buf = '['
376
+ if _indent is not None:
377
+ _current_indent_level += 1
378
+ newline_indent = '\n' + (_indent * _current_indent_level)
379
+ separator = _item_separator + newline_indent
380
+ buf += newline_indent
381
+ else:
382
+ newline_indent = None
383
+ separator = _item_separator
384
+ first = True
385
+ for value in lst:
386
+ if first:
387
+ first = False
388
+ else:
389
+ buf = separator
390
+ if isinstance(value, basestring):
391
+ yield buf + _encoder(value)
392
+ elif value is None:
393
+ yield buf + 'null'
394
+ elif value is True:
395
+ yield buf + 'true'
396
+ elif value is False:
397
+ yield buf + 'false'
398
+ elif isinstance(value, (int, long)):
399
+ yield ((buf + str(value))
400
+ if (not _bigint_as_string or
401
+ (-1 << 53) < value < (1 << 53))
402
+ else (buf + '"' + str(value) + '"'))
403
+ elif isinstance(value, float):
404
+ yield buf + _floatstr(value)
405
+ elif _use_decimal and isinstance(value, Decimal):
406
+ yield buf + str(value)
407
+ else:
408
+ yield buf
409
+ if isinstance(value, list):
410
+ chunks = _iterencode_list(value, _current_indent_level)
411
+ else:
412
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
413
+ if _asdict and callable(_asdict):
414
+ chunks = _iterencode_dict(_asdict(),
415
+ _current_indent_level)
416
+ elif _tuple_as_array and isinstance(value, tuple):
417
+ chunks = _iterencode_list(value, _current_indent_level)
418
+ elif isinstance(value, dict):
419
+ chunks = _iterencode_dict(value, _current_indent_level)
420
+ else:
421
+ chunks = _iterencode(value, _current_indent_level)
422
+ for chunk in chunks:
423
+ yield chunk
424
+ if newline_indent is not None:
425
+ _current_indent_level -= 1
426
+ yield '\n' + (_indent * _current_indent_level)
427
+ yield ']'
428
+ if markers is not None:
429
+ del markers[markerid]
430
+
431
+ def _iterencode_dict(dct, _current_indent_level):
432
+ if not dct:
433
+ yield '{}'
434
+ return
435
+ if markers is not None:
436
+ markerid = id(dct)
437
+ if markerid in markers:
438
+ raise ValueError("Circular reference detected")
439
+ markers[markerid] = dct
440
+ yield '{'
441
+ if _indent is not None:
442
+ _current_indent_level += 1
443
+ newline_indent = '\n' + (_indent * _current_indent_level)
444
+ item_separator = _item_separator + newline_indent
445
+ yield newline_indent
446
+ else:
447
+ newline_indent = None
448
+ item_separator = _item_separator
449
+ first = True
450
+ if _item_sort_key:
451
+ items = dct.items()
452
+ items.sort(key=_item_sort_key)
453
+ elif _sort_keys:
454
+ items = dct.items()
455
+ items.sort(key=lambda kv: kv[0])
456
+ else:
457
+ items = dct.iteritems()
458
+ for key, value in items:
459
+ if isinstance(key, basestring):
460
+ pass
461
+ # JavaScript is weakly typed for these, so it makes sense to
462
+ # also allow them. Many encoders seem to do something like this.
463
+ elif isinstance(key, float):
464
+ key = _floatstr(key)
465
+ elif key is True:
466
+ key = 'true'
467
+ elif key is False:
468
+ key = 'false'
469
+ elif key is None:
470
+ key = 'null'
471
+ elif isinstance(key, (int, long)):
472
+ key = str(key)
473
+ elif _skipkeys:
474
+ continue
475
+ else:
476
+ raise TypeError("key " + repr(key) + " is not a string")
477
+ if first:
478
+ first = False
479
+ else:
480
+ yield item_separator
481
+ yield _encoder(key)
482
+ yield _key_separator
483
+ if isinstance(value, basestring):
484
+ yield _encoder(value)
485
+ elif value is None:
486
+ yield 'null'
487
+ elif value is True:
488
+ yield 'true'
489
+ elif value is False:
490
+ yield 'false'
491
+ elif isinstance(value, (int, long)):
492
+ yield (str(value)
493
+ if (not _bigint_as_string or
494
+ (-1 << 53) < value < (1 << 53))
495
+ else ('"' + str(value) + '"'))
496
+ elif isinstance(value, float):
497
+ yield _floatstr(value)
498
+ elif _use_decimal and isinstance(value, Decimal):
499
+ yield str(value)
500
+ else:
501
+ if isinstance(value, list):
502
+ chunks = _iterencode_list(value, _current_indent_level)
503
+ else:
504
+ _asdict = _namedtuple_as_object and getattr(value, '_asdict', None)
505
+ if _asdict and callable(_asdict):
506
+ chunks = _iterencode_dict(_asdict(),
507
+ _current_indent_level)
508
+ elif _tuple_as_array and isinstance(value, tuple):
509
+ chunks = _iterencode_list(value, _current_indent_level)
510
+ elif isinstance(value, dict):
511
+ chunks = _iterencode_dict(value, _current_indent_level)
512
+ else:
513
+ chunks = _iterencode(value, _current_indent_level)
514
+ for chunk in chunks:
515
+ yield chunk
516
+ if newline_indent is not None:
517
+ _current_indent_level -= 1
518
+ yield '\n' + (_indent * _current_indent_level)
519
+ yield '}'
520
+ if markers is not None:
521
+ del markers[markerid]
522
+
523
+ def _iterencode(o, _current_indent_level):
524
+ if isinstance(o, basestring):
525
+ yield _encoder(o)
526
+ elif o is None:
527
+ yield 'null'
528
+ elif o is True:
529
+ yield 'true'
530
+ elif o is False:
531
+ yield 'false'
532
+ elif isinstance(o, (int, long)):
533
+ yield (str(o)
534
+ if (not _bigint_as_string or
535
+ (-1 << 53) < o < (1 << 53))
536
+ else ('"' + str(o) + '"'))
537
+ elif isinstance(o, float):
538
+ yield _floatstr(o)
539
+ elif isinstance(o, list):
540
+ for chunk in _iterencode_list(o, _current_indent_level):
541
+ yield chunk
542
+ else:
543
+ _asdict = _namedtuple_as_object and getattr(o, '_asdict', None)
544
+ if _asdict and callable(_asdict):
545
+ for chunk in _iterencode_dict(_asdict(), _current_indent_level):
546
+ yield chunk
547
+ elif (_tuple_as_array and isinstance(o, tuple)):
548
+ for chunk in _iterencode_list(o, _current_indent_level):
549
+ yield chunk
550
+ elif isinstance(o, dict):
551
+ for chunk in _iterencode_dict(o, _current_indent_level):
552
+ yield chunk
553
+ elif _use_decimal and isinstance(o, Decimal):
554
+ yield str(o)
555
+ else:
556
+ if markers is not None:
557
+ markerid = id(o)
558
+ if markerid in markers:
559
+ raise ValueError("Circular reference detected")
560
+ markers[markerid] = o
561
+ o = _default(o)
562
+ for chunk in _iterencode(o, _current_indent_level):
563
+ yield chunk
564
+ if markers is not None:
565
+ del markers[markerid]
566
+
567
+ return _iterencode