@grain/stdlib 0.5.13 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (155) hide show
  1. package/CHANGELOG.md +201 -0
  2. package/LICENSE +1 -1
  3. package/README.md +25 -2
  4. package/array.gr +1512 -199
  5. package/array.md +2032 -94
  6. package/bigint.gr +239 -140
  7. package/bigint.md +450 -106
  8. package/buffer.gr +595 -102
  9. package/buffer.md +903 -145
  10. package/bytes.gr +401 -110
  11. package/bytes.md +551 -63
  12. package/char.gr +228 -49
  13. package/char.md +373 -7
  14. package/exception.gr +26 -12
  15. package/exception.md +29 -5
  16. package/float32.gr +130 -109
  17. package/float32.md +185 -57
  18. package/float64.gr +112 -99
  19. package/float64.md +185 -57
  20. package/hash.gr +62 -40
  21. package/hash.md +27 -3
  22. package/int16.gr +430 -0
  23. package/int16.md +618 -0
  24. package/int32.gr +200 -269
  25. package/int32.md +254 -289
  26. package/int64.gr +142 -225
  27. package/int64.md +254 -289
  28. package/int8.gr +511 -0
  29. package/int8.md +786 -0
  30. package/json.gr +2071 -0
  31. package/json.md +646 -0
  32. package/list.gr +120 -68
  33. package/list.md +125 -80
  34. package/map.gr +560 -57
  35. package/map.md +672 -56
  36. package/marshal.gr +239 -227
  37. package/marshal.md +36 -4
  38. package/number.gr +626 -676
  39. package/number.md +738 -153
  40. package/option.gr +33 -35
  41. package/option.md +58 -42
  42. package/package.json +2 -2
  43. package/path.gr +148 -187
  44. package/path.md +47 -96
  45. package/pervasives.gr +75 -416
  46. package/pervasives.md +85 -180
  47. package/priorityqueue.gr +433 -74
  48. package/priorityqueue.md +422 -54
  49. package/queue.gr +362 -80
  50. package/queue.md +433 -38
  51. package/random.gr +67 -75
  52. package/random.md +68 -40
  53. package/range.gr +135 -63
  54. package/range.md +198 -43
  55. package/rational.gr +284 -0
  56. package/rational.md +545 -0
  57. package/regex.gr +933 -1066
  58. package/regex.md +59 -60
  59. package/result.gr +23 -25
  60. package/result.md +54 -39
  61. package/runtime/atof/common.gr +78 -82
  62. package/runtime/atof/common.md +22 -10
  63. package/runtime/atof/decimal.gr +102 -127
  64. package/runtime/atof/decimal.md +28 -7
  65. package/runtime/atof/lemire.gr +56 -71
  66. package/runtime/atof/lemire.md +9 -1
  67. package/runtime/atof/parse.gr +83 -110
  68. package/runtime/atof/parse.md +12 -2
  69. package/runtime/atof/slow.gr +28 -35
  70. package/runtime/atof/slow.md +9 -1
  71. package/runtime/atof/table.gr +19 -18
  72. package/runtime/atof/table.md +10 -2
  73. package/runtime/atoi/parse.gr +153 -136
  74. package/runtime/atoi/parse.md +50 -1
  75. package/runtime/bigint.gr +410 -517
  76. package/runtime/bigint.md +71 -57
  77. package/runtime/compare.gr +176 -85
  78. package/runtime/compare.md +31 -1
  79. package/runtime/dataStructures.gr +144 -32
  80. package/runtime/dataStructures.md +267 -31
  81. package/runtime/debugPrint.gr +34 -15
  82. package/runtime/debugPrint.md +37 -5
  83. package/runtime/equal.gr +53 -52
  84. package/runtime/equal.md +30 -1
  85. package/runtime/exception.gr +38 -47
  86. package/runtime/exception.md +10 -8
  87. package/runtime/gc.gr +23 -152
  88. package/runtime/gc.md +13 -17
  89. package/runtime/malloc.gr +31 -31
  90. package/runtime/malloc.md +11 -3
  91. package/runtime/numberUtils.gr +193 -174
  92. package/runtime/numberUtils.md +29 -9
  93. package/runtime/numbers.gr +1695 -1021
  94. package/runtime/numbers.md +1098 -134
  95. package/runtime/string.gr +543 -245
  96. package/runtime/string.md +76 -6
  97. package/runtime/unsafe/constants.gr +30 -13
  98. package/runtime/unsafe/constants.md +80 -0
  99. package/runtime/unsafe/conv.gr +55 -28
  100. package/runtime/unsafe/conv.md +41 -9
  101. package/runtime/unsafe/memory.gr +10 -30
  102. package/runtime/unsafe/memory.md +15 -19
  103. package/runtime/unsafe/tags.gr +37 -21
  104. package/runtime/unsafe/tags.md +88 -8
  105. package/runtime/unsafe/wasmf32.gr +30 -36
  106. package/runtime/unsafe/wasmf32.md +64 -56
  107. package/runtime/unsafe/wasmf64.gr +30 -36
  108. package/runtime/unsafe/wasmf64.md +64 -56
  109. package/runtime/unsafe/wasmi32.gr +49 -66
  110. package/runtime/unsafe/wasmi32.md +102 -94
  111. package/runtime/unsafe/wasmi64.gr +52 -79
  112. package/runtime/unsafe/wasmi64.md +108 -100
  113. package/runtime/utils/printing.gr +13 -15
  114. package/runtime/utils/printing.md +11 -3
  115. package/runtime/wasi.gr +294 -295
  116. package/runtime/wasi.md +62 -42
  117. package/set.gr +574 -64
  118. package/set.md +634 -54
  119. package/stack.gr +181 -64
  120. package/stack.md +271 -42
  121. package/string.gr +453 -533
  122. package/string.md +241 -151
  123. package/uint16.gr +369 -0
  124. package/uint16.md +585 -0
  125. package/uint32.gr +470 -0
  126. package/uint32.md +737 -0
  127. package/uint64.gr +471 -0
  128. package/uint64.md +737 -0
  129. package/uint8.gr +369 -0
  130. package/uint8.md +585 -0
  131. package/uri.gr +1093 -0
  132. package/uri.md +477 -0
  133. package/{sys → wasi}/file.gr +914 -500
  134. package/{sys → wasi}/file.md +454 -50
  135. package/wasi/process.gr +292 -0
  136. package/{sys → wasi}/process.md +164 -6
  137. package/wasi/random.gr +77 -0
  138. package/wasi/random.md +80 -0
  139. package/{sys → wasi}/time.gr +15 -22
  140. package/{sys → wasi}/time.md +5 -5
  141. package/immutablearray.gr +0 -929
  142. package/immutablearray.md +0 -1038
  143. package/immutablemap.gr +0 -493
  144. package/immutablemap.md +0 -479
  145. package/immutablepriorityqueue.gr +0 -360
  146. package/immutablepriorityqueue.md +0 -291
  147. package/immutableset.gr +0 -498
  148. package/immutableset.md +0 -449
  149. package/runtime/debug.gr +0 -2
  150. package/runtime/debug.md +0 -6
  151. package/runtime/unsafe/errors.gr +0 -36
  152. package/runtime/unsafe/errors.md +0 -204
  153. package/sys/process.gr +0 -254
  154. package/sys/random.gr +0 -79
  155. package/sys/random.md +0 -66
package/json.gr ADDED
@@ -0,0 +1,2071 @@
1
+ /**
2
+ * JSON (JavaScript Object Notation) parsing, printing, and access utilities.
3
+ *
4
+ * @example from "json" include Json
5
+ * @example Json.parse("{\"currency\":\"€\",\"price\":99.99}")
6
+ * @example
7
+ * print(
8
+ * toString(
9
+ * format=Pretty,
10
+ * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))])
11
+ * )
12
+ * )
13
+ */
14
+ module Json
15
+
16
+ from "runtime/bigint" include Bigint as BI
17
+ from "runtime/dataStructures" include DataStructures
18
+ from "runtime/numbers" include Numbers
19
+ from "runtime/numberUtils" include NumberUtils
20
+ from "runtime/string" include String as RuntimeString
21
+ from "runtime/unsafe/tags" include Tags
22
+ from "runtime/unsafe/wasmi32" include WasmI32
23
+ from "runtime/unsafe/wasmi64" include WasmI64
24
+ from "runtime/unsafe/wasmf64" include WasmF64
25
+ from "runtime/atof/parse" include Parse as Atof
26
+ from "buffer" include Buffer
27
+ from "char" include Char
28
+ from "string" include String
29
+ from "list" include List
30
+ from "uint8" include Uint8
31
+ use RuntimeString.{ toString as runtimeToString, getCodePoint }
32
+ use Numbers.{ coerceNumberToWasmI32 }
33
+ use DataStructures.{ tagSimpleNumber, untagSimpleNumber }
34
+
35
+ // Primitive offsets
36
+ // TODO(#703): Get these offsets from the runtime
37
+ @unsafe
38
+ let _INT64_BOXED_VALUE_OFFSET = 8n
39
+ @unsafe
40
+ let _Float64_BOXED_VALUE_OFFSET = 8n
41
+
42
+ /**
43
+ * Data structure representing JSON in Grain.
44
+ *
45
+ * @example
46
+ * assert Json.parse("{\"currency\":\"€\",\"price\":99.99}") == JsonObject([
47
+ * ("currency", JsonString("€")),
48
+ * ("price", JsonNumber(99.99)),
49
+ * ])
50
+ *
51
+ * @example
52
+ * assert Json.parse("{\n\"currency\":\"€\",\n\"price\":99.99\n}") == JsonObject([
53
+ * ("currency", JsonString("€")),
54
+ * ("price", JsonNumber(99.99)),
55
+ * ])
56
+ */
57
+ provide enum rec Json {
58
+ /**
59
+ * Represents the JSON `null` value.
60
+ */
61
+ JsonNull,
62
+ /**
63
+ * Represents a JSON boolean value.
64
+ */
65
+ JsonBoolean(Bool),
66
+ /**
67
+ * Represents a JSON number value.
68
+ */
69
+ JsonNumber(Number),
70
+ /**
71
+ * Represents a JSON string value.
72
+ */
73
+ JsonString(String),
74
+ /**
75
+ * Represents a JSON array value.
76
+ */
77
+ JsonArray(List<Json>),
78
+ // Note that JsonObject here is deliberately defined as a simple list of key value pair tuples as opposed
79
+ // to for example a Map in order to accommodate the fact that the ECMA-404 standard doesn't prohibit
80
+ // duplicate names in Objects. Such JSON should be representable by the JSON data structure for lossless
81
+ // processing. This also simplifies implementation by not requiring a purpose built data structure and
82
+ // has the benefit of List's immutability. It's a conscious decision that sacrifices ease of use of the
83
+ // API for lossless handing of these edge cases with intention of later building more ergonomic APIs on a
84
+ // higher level of abstraction.
85
+ /**
86
+ * Represents a JSON object value, as a list of (key, value).
87
+ */
88
+ JsonObject(List<(String, Json)>),
89
+ }
90
+
91
+ /**
92
+ * Represents errors for cases where a `Json` data structure cannot be represented as a
93
+ * JSON string.
94
+ */
95
+ provide enum JsonToStringError {
96
+ /**
97
+ * The `Json` data structure contains a number value of `NaN`, `Infinity`, or `-Infinity`.
98
+ */
99
+ InvalidNumber(String),
100
+ }
101
+
102
+ /**
103
+ * Controls how indentation is output in custom formatting.
104
+ */
105
+ provide enum IndentationFormat {
106
+ /**
107
+ * No indentation is emitted.
108
+ *
109
+ * ```json
110
+ * {
111
+ * "currency": "€",
112
+ * "price": 99.9
113
+ * }
114
+ * ```
115
+ */
116
+ NoIndentation,
117
+ /**
118
+ * Tabs are emitted.
119
+ *
120
+ * ```json
121
+ * {
122
+ * "currency": "€",
123
+ * "price": 99.9
124
+ * }
125
+ * ```
126
+ */
127
+ IndentWithTab,
128
+ /**
129
+ * The desired number of spaces are emitted.
130
+ *
131
+ * `IndentWithSpaces(2)`
132
+ * ```json
133
+ * {
134
+ * "currency": "€",
135
+ * "price": 99.9
136
+ * }
137
+ * ```
138
+ *
139
+ * `IndentWithSpaces(4)`
140
+ * ```json
141
+ * {
142
+ * "currency": "€",
143
+ * "price": 99.9
144
+ * }
145
+ * ```
146
+ */
147
+ IndentWithSpaces(Number),
148
+ }
149
+
150
+ /**
151
+ * Controls how arrays are output in custom formatting.
152
+ */
153
+ provide enum ArrayFormat {
154
+ /**
155
+ * Arrays are emitted in a compact manner.
156
+ *
157
+ * ```json
158
+ * []
159
+ * ```
160
+ *
161
+ * ```json
162
+ * [1]
163
+ * ```
164
+ *
165
+ * ```json
166
+ * [1,2,3]
167
+ * ```
168
+ */
169
+ CompactArrayEntries,
170
+ /**
171
+ * Arrays are emitted with spaces between elements.
172
+ *
173
+ * ```json
174
+ * [ ]
175
+ * ```
176
+ *
177
+ * ```json
178
+ * [1]
179
+ * ```
180
+ *
181
+ * ```json
182
+ * [1, 2, 3]
183
+ * ```
184
+ */
185
+ SpacedArrayEntries,
186
+ /**
187
+ * Arrays are emitted with newlines and indentation between each element.
188
+ *
189
+ * ```json
190
+ * []
191
+ * ```
192
+ *
193
+ * ```json
194
+ * [
195
+ * 1
196
+ * ]
197
+ * ```
198
+ *
199
+ * ```json
200
+ * [
201
+ * 1,
202
+ * 2,
203
+ * 3
204
+ * ]
205
+ * ```
206
+ */
207
+ OneArrayEntryPerLine,
208
+ }
209
+
210
+ /**
211
+ * Controls how objects are output in custom formatting.
212
+ */
213
+ provide enum ObjectFormat {
214
+ /**
215
+ * Objects are emitted in a compact manner.
216
+ *
217
+ * ```json
218
+ * {}
219
+ * ```
220
+ *
221
+ * ```json
222
+ * {"a":1}
223
+ * ```
224
+ *
225
+ * ```json
226
+ * {"a":1,"b":2,"c":3}
227
+ * ```
228
+ */
229
+ CompactObjectEntries,
230
+ /**
231
+ * Objects are emitted with spaces between entries.
232
+ *
233
+ * ```json
234
+ * { }
235
+ * ```
236
+ *
237
+ * ```json
238
+ * {"a": 1}
239
+ * ```
240
+ *
241
+ * ```json
242
+ * {"a": 1, "b": 2, "c": 3}
243
+ * ```
244
+ */
245
+ SpacedObjectEntries,
246
+ /**
247
+ * Objects are emitted with each entry on a new line.
248
+ *
249
+ * ```
250
+ * {}
251
+ * ```
252
+ *
253
+ * ```
254
+ * {
255
+ * "a": 1
256
+ * }
257
+ * ```
258
+ *
259
+ * ```
260
+ * {
261
+ * "a": 1,
262
+ * "b": 2,
263
+ * "c": 3
264
+ * }
265
+ * ```
266
+ */
267
+ OneObjectEntryPerLine,
268
+ }
269
+
270
+ /**
271
+ * Controls how line endings are output in custom formatting.
272
+ */
273
+ provide enum LineEnding {
274
+ /**
275
+ * No line endings will be emitted.
276
+ */
277
+ NoLineEnding,
278
+ /**
279
+ * A `\n` will be emitted at the end of each line.
280
+ */
281
+ LineFeed,
282
+ /**
283
+ * A `\r\n` will be emitted at the end of each line.
284
+ */
285
+ CarriageReturnLineFeed,
286
+ /**
287
+ * A `\r` will be emitted at the end of each line.
288
+ */
289
+ CarriageReturn,
290
+ }
291
+
292
+ /*
293
+ * Allows fine-grained control of formatting in JSON output.
294
+ */
295
+ record FormattingSettings {
296
+ indentation: IndentationFormat,
297
+ arrayFormat: ArrayFormat,
298
+ objectFormat: ObjectFormat,
299
+ lineEnding: LineEnding,
300
+ finishWithNewLine: Bool,
301
+ escapeAllControlPoints: Bool,
302
+ escapeHTMLUnsafeSequences: Bool,
303
+ escapeNonASCII: Bool,
304
+ }
305
+
306
+ /**
307
+ * Allows control of formatting in JSON output.
308
+ */
309
+ provide enum FormattingChoices {
310
+ /**
311
+ * Recommended human readable formatting.
312
+ *
313
+ * Escapes all control points for the sake of clarity, but outputs unicode
314
+ * codepoints directly so the result needs to be treated as proper unicode and
315
+ * is not safe to be transported in ASCII encoding.
316
+ *
317
+ * Roughly Equivalent to:
318
+ * ```grain
319
+ * Custom{
320
+ * indentation: IndentWithSpaces(2),
321
+ * arrayFormat: OneArrayEntryPerLine,
322
+ * objectFormat: OneObjectEntryPerLine,
323
+ * lineEnding: LineFeed,
324
+ * finishWithNewLine: true,
325
+ * escapeAllControlPoints: true,
326
+ * escapeHTMLUnsafeSequences: false,
327
+ * escapeNonASCII: false,
328
+ * }
329
+ * ```
330
+ *
331
+ * ```json
332
+ * {
333
+ * "currency": "€",
334
+ * "price": 99.9,
335
+ * "currencyDescription": "EURO\u007f",
336
+ * }
337
+ * ```
338
+ */
339
+ Pretty,
340
+ /**
341
+ * Compact formatting that minimizes the size of resulting JSON at cost of not
342
+ * being easily human readable.
343
+ *
344
+ * Only performs minimal string escaping as required by the ECMA-404 standard,
345
+ * so the result needs to be treated as proper unicode and is not safe to be
346
+ * transported in ASCII encoding.
347
+ *
348
+ * Roughly Equivalent to:
349
+ * ```grain
350
+ * Custom{
351
+ * indentation: NoIndentation,
352
+ * arrayFormat: CompactArrayEntries,
353
+ * objectFormat: CompactObjectEntries,
354
+ * lineEnding: NoLineEnding,
355
+ * finishWithNewLine: false,
356
+ * escapeAllControlPoints: false,
357
+ * escapeHTMLUnsafeSequences: false,
358
+ * escapeNonASCII: false,
359
+ * }
360
+ * ```
361
+ *
362
+ * ```json
363
+ * {"currency":"€","price":99.9,"currencyDescription":"EURO␡"}
364
+ * ```
365
+ */
366
+ Compact,
367
+ /**
368
+ * Pretty and conservative formatting to maximize compatibility and
369
+ * embeddability of the resulting JSON.
370
+ *
371
+ * Should be safe to copy and paste directly into HTML and to be transported in
372
+ * plain ASCII.
373
+ *
374
+ * Roughly Equivalent to:
375
+ * ```grain
376
+ * Custom{
377
+ * indentation: IndentWithSpaces(2),
378
+ * arrayFormat: OneArrayEntryPerLine,
379
+ * objectFormat: OneObjectEntryPerLine,
380
+ * lineEnding: LineFeed,
381
+ * finishWithNewLine: true,
382
+ * escapeAllControlPoints: true,
383
+ * escapeHTMLUnsafeSequences: true,
384
+ * escapeNonASCII: true,
385
+ * }
386
+ * ```
387
+ *
388
+ * ```json
389
+ * {
390
+ * "currency": "\u20ac",
391
+ * "price": 99.9,
392
+ * "currencyDescription": "EURO\u007f",
393
+ * }
394
+ * ```
395
+ */
396
+ PrettyAndSafe,
397
+ /**
398
+ * Compact and conservative formatting to maximize compatibility and
399
+ * embeddability of the resulting JSON.
400
+ *
401
+ * Should be safe to copy and paste directly into HTML and to transported in
402
+ * plain ASCII.
403
+ *
404
+ * Roughly Equivalent to:
405
+ * ```grain
406
+ * Custom{
407
+ * indentation: NoIndentation,
408
+ * arrayFormat: CompactArrayEntries,
409
+ * objectFormat: CompactObjectEntries,
410
+ * lineEnding: NoLineEnding,
411
+ * finishWithNewLine: false,
412
+ * escapeAllControlPoints: true,
413
+ * escapeHTMLUnsafeSequences: true,
414
+ * escapeNonASCII: true,
415
+ * }
416
+ * ```
417
+ *
418
+ * ```json
419
+ * {"currency":"\u20ac","price":99.9,"currencyDescription":"EURO\u007f"}
420
+ * ```
421
+ */
422
+ CompactAndSafe,
423
+ /**
424
+ * Allows for fined grained control of the formatting output.
425
+ */
426
+ Custom{
427
+ indentation: IndentationFormat,
428
+ arrayFormat: ArrayFormat,
429
+ objectFormat: ObjectFormat,
430
+ lineEnding: LineEnding,
431
+ finishWithNewLine: Bool,
432
+ escapeAllControlPoints: Bool,
433
+ escapeHTMLUnsafeSequences: Bool,
434
+ escapeNonASCII: Bool,
435
+ },
436
+ }
437
+
438
+ record JsonWriterConfig {
439
+ format: FormattingSettings,
440
+ buffer: Buffer.Buffer,
441
+ emitEscapedQuotedString: String => Void,
442
+ printNewLine: Option<() => Void>,
443
+ printIndentation: Option<Number => Void>,
444
+ }
445
+
446
+ // The idea for this type is to allow reusing a bit of work done in preparing for printing JSON.
447
+ // For now this is not exposed and remains an internal implementation detail.
448
+ // It may make sense in the future to expose it and let the user reuse a writer for multiple
449
+ // JSON emit operations without reallocating new closures and buffers each time.
450
+ record JsonWriter {
451
+ emit: Json => Option<JsonToStringError>,
452
+ }
453
+
454
+ let emitUTF16EscapeSequence = (codePoint: Number, buffer: Buffer.Buffer) => {
455
+ // Emit the "\u" followed by hexadecimal representation of the codepoint
456
+ // with fixed length of 4 hexadecimal digits corresponding to the two byte
457
+ // codepoint. No checks are performed here if the codepoint is in the
458
+ // "Basic Multilingual Plane" (0000-FFFF) as this function is only called
459
+ // internally.
460
+ // An alternative was to this implementation was to use NumberUtils.itoa32,
461
+ // but this avoids unnecessary heap allocations. As a possible future
462
+ // optimization this loop could be unrolled possibly even converted to be
463
+ // branchless and SIMD optimized, but it could be a bit of an overkill as
464
+ // this codepath is only for escape sequences, which probably aren't all
465
+ // that common occurrence.
466
+
467
+ Buffer.addChar('\\', buffer)
468
+ Buffer.addChar('u', buffer)
469
+ // Loop over the four digit from most to least significant.
470
+ for (let mut digitIndex = 3; digitIndex >= 0; digitIndex -= 1) {
471
+ // Use bit masking and shifting to extract from the codepoint a number
472
+ // with just the bits corresponding to this hexadecimal digit.
473
+ let shift = digitIndex * 4
474
+ let mask = 0b1111 << shift
475
+ let digit = (codePoint & mask) >>> shift
476
+
477
+ // Digit now is a number in the range 0..15 and we need to translate it
478
+ // into a unicode codepoint representing the hexadecimal digit
479
+ // (0..9/a..f). We can use the fact that digits and latin letters in
480
+ // ASCII and by extension in Unicode are adjacent and ordered.
481
+ let hexDigitCodePoint = if (digit <= 9) {
482
+ // 48 is codepoint for char '0'
483
+ digit + 48
484
+ } else {
485
+ // 97 is codepoint for char 'a'
486
+ // But we also need to subtract 10 from it because we need
487
+ // the 10..15 number range translated to 0..5 in order to
488
+ // serve as an index in the ASCI range 'a'..'f'.
489
+ digit + 87
490
+ }
491
+
492
+ Buffer.addCharFromCodePoint(hexDigitCodePoint, buffer)
493
+ }
494
+ }
495
+
496
+ let emitEscapedUnicodeSequence = (codePoint: Number, buffer: Buffer.Buffer) => {
497
+ // See the String section in the ECMA-404 doc.
498
+ // If the code point is "in the Basic Multilingual Plane", that is in range
499
+ // 0..65535. Greater values need to be split into two UTF-16 chunks.
500
+ if (codePoint <= 0xFFFF) {
501
+ emitUTF16EscapeSequence(codePoint, buffer)
502
+ } else {
503
+ // The following three lines are copied from String module of Grain's
504
+ // stdlib. It would be nice to share more code. On the other hand it
505
+ // may make sense to just have these few instructions directly here
506
+ // from the performance standpoint so we can print millions of emojis
507
+ // per second 😄.
508
+
509
+ // https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
510
+ let uPrime = codePoint - 0x10000
511
+ let highSurrogate = ((uPrime & 0b11111111110000000000) >>> 10) + 0xD800
512
+ // High surrogate
513
+ let lowSurrogate = (uPrime & 0b00000000001111111111) + 0xDC00
514
+ // Low surrogate
515
+ emitUTF16EscapeSequence(highSurrogate, buffer)
516
+ emitUTF16EscapeSequence(lowSurrogate, buffer)
517
+ }
518
+ }
519
+
520
+ let emitEscapedCodePoint = (codePoint: Number, buffer: Buffer.Buffer) => {
521
+ match (codePoint) {
522
+ 0x0008 => { // backspace
523
+ Buffer.addChar('\\', buffer)
524
+ Buffer.addChar('b', buffer)
525
+ },
526
+ 0x0009 => { // tab
527
+ Buffer.addChar('\\', buffer)
528
+ Buffer.addChar('t', buffer)
529
+ },
530
+ 0x000A => { // line feed
531
+ Buffer.addChar('\\', buffer)
532
+ Buffer.addChar('n', buffer)
533
+ },
534
+ 0x000C => { // form feed
535
+ Buffer.addChar('\\', buffer)
536
+ Buffer.addChar('f', buffer)
537
+ },
538
+ 0x000D => { // carriage return
539
+ Buffer.addChar('\\', buffer)
540
+ Buffer.addChar('r', buffer)
541
+ },
542
+ 0x0022 => { // quotation mark
543
+ Buffer.addChar('\\', buffer)
544
+ Buffer.addChar('"', buffer)
545
+ },
546
+ 0x005C => { // backslash or "Reverse Solidus"
547
+ Buffer.addChar('\\', buffer)
548
+ Buffer.addChar('\\', buffer)
549
+ },
550
+ _ => {
551
+ emitEscapedUnicodeSequence(codePoint, buffer)
552
+ },
553
+ }
554
+ }
555
+
556
+ let printNull = (buffer: Buffer.Buffer) => Buffer.addString("null", buffer)
557
+
558
+ let printBool = (b: Bool, buffer: Buffer.Buffer) => {
559
+ if (b) {
560
+ Buffer.addString("true", buffer)
561
+ } else {
562
+ Buffer.addString("false", buffer)
563
+ }
564
+ }
565
+
566
+ @unsafe
567
+ let printNumberWasmI32 = (value: WasmI32, buffer: Buffer.Buffer) => {
568
+ let s = NumberUtils.itoa32(value, 10n)
569
+ Buffer.addString(s, buffer)
570
+ }
571
+
572
+ @unsafe
573
+ let printNumberWasmI64 = (value: WasmI64, buffer: Buffer.Buffer) => {
574
+ let s = NumberUtils.itoa64(value, 10n)
575
+ Buffer.addString(s, buffer)
576
+ }
577
+
578
+ @unsafe
579
+ let printNumberWasmF64 = (value: WasmF64, buffer: Buffer.Buffer) => {
580
+ if (NumberUtils.isFinite(value)) {
581
+ let s = NumberUtils.dtoa(value)
582
+ Buffer.addString(s, buffer)
583
+ None
584
+ } else {
585
+ use WasmF64.{ (<) }
586
+ // JSON standard doesn't allow NaN or infinite values in numbers,
587
+ // but WASM f64 (IEEE 754-2008), as well as Grain's number types do
588
+ // (Float64 as well as Number). This is the only reason that the
589
+ // formatting needs to return a Result and not just a String
590
+ // directly. Other possible choices were to throw exceptions or to
591
+ // continue formatting without representing these values correctly
592
+ // (like JavaScript's JSON.stringify).
593
+ if (NumberUtils.isNaN(value)) {
594
+ Some(InvalidNumber("NaN is not allowed in JsonNumber"))
595
+ } else if (value < 0.0W) {
596
+ Some(InvalidNumber("-Infinity is not allowed in JsonNumber"))
597
+ } else {
598
+ Some(InvalidNumber("Infinity is not allowed in JsonNumber"))
599
+ }
600
+ }
601
+ }
602
+
603
+ @unsafe
604
+ let printNumber = (value: Number, buffer: Buffer.Buffer) => {
605
+ use WasmI32.{ (&), (==), (!=), (<<), (>>) }
606
+
607
+ let ptr = WasmI32.fromGrain(value)
608
+ let ret = if ((ptr & 1n) != 0n) {
609
+ printNumberWasmI32(untagSimpleNumber(value), buffer)
610
+ None
611
+ } else if ((ptr & 7n) == Tags._GRAIN_GENERIC_HEAP_TAG_TYPE) {
612
+ let tag = WasmI32.load(ptr, 0n)
613
+ match (tag) {
614
+ t when t == Tags._GRAIN_BOXED_NUM_HEAP_TAG => {
615
+ let numberTag = WasmI32.load(ptr, 4n)
616
+ match (numberTag) {
617
+ t when t == Tags._GRAIN_INT64_BOXED_NUM_TAG => {
618
+ let asWasmI64 = WasmI64.load(ptr, _INT64_BOXED_VALUE_OFFSET)
619
+ printNumberWasmI64(asWasmI64, buffer)
620
+ None
621
+ },
622
+ t when t == Tags._GRAIN_BIGINT_BOXED_NUM_TAG => {
623
+ Buffer.addString(BI.bigIntToString10(ptr), buffer)
624
+ None
625
+ },
626
+ t when t == Tags._GRAIN_RATIONAL_BOXED_NUM_TAG => {
627
+ // JSON does not support rationals as a compromise
628
+ // we coerce them to an f64 and print that
629
+ // this means there is a slight loss in precision
630
+ let asFloat64 = Numbers.coerceNumberToFloat64(value)
631
+ let ptr = WasmI32.fromGrain(asFloat64)
632
+ let asWasmF64 = WasmF64.load(ptr, _Float64_BOXED_VALUE_OFFSET)
633
+ printNumberWasmF64(asWasmF64, buffer)
634
+ },
635
+ t when t == Tags._GRAIN_FLOAT64_BOXED_NUM_TAG => {
636
+ let asWasmF64 = WasmF64.load(ptr, _Float64_BOXED_VALUE_OFFSET)
637
+ printNumberWasmF64(asWasmF64, buffer)
638
+ },
639
+ _ => {
640
+ fail "Impossible: Json.toString encountered an unknown number tag"
641
+ },
642
+ }
643
+ },
644
+ _ => {
645
+ fail "Impossible: Json.toString encountered an unknown number tag"
646
+ },
647
+ }
648
+ } else {
649
+ fail "Impossible: Json.toString encountered an unknown number tag"
650
+ }
651
+ // This keeps the gc from prematurely freeing the value
652
+ ignore(value)
653
+ ret
654
+ }
655
+
656
+ // Note that this compromises on peak performance by also handling
657
+ // the compact printing case, merging these two together greatly simplifies the amount
658
+ // of code we need to maintain so it seems worth it.
659
+ let rec printElement = (
660
+ json: Json,
661
+ implHelper: JsonWriterConfig,
662
+ indentationLevel: Number,
663
+ ) => {
664
+ let buffer = implHelper.buffer
665
+ match (json) {
666
+ JsonNull => {
667
+ printNull(buffer)
668
+ return None
669
+ },
670
+ JsonBoolean(b) => {
671
+ printBool(b, buffer)
672
+ return None
673
+ },
674
+ JsonNumber(n) => return printNumber(n, buffer),
675
+ JsonString(s) => {
676
+ implHelper.emitEscapedQuotedString(s)
677
+ return None
678
+ },
679
+ JsonArray(elems) => {
680
+ match (elems) {
681
+ [] => {
682
+ Buffer.addChar('[', buffer)
683
+ if (implHelper.format.arrayFormat == SpacedArrayEntries) {
684
+ Buffer.addChar(' ', buffer)
685
+ }
686
+ Buffer.addChar(']', buffer)
687
+ return None
688
+ },
689
+ [e] => {
690
+ let format = implHelper.format
691
+
692
+ Buffer.addChar('[', buffer)
693
+
694
+ if (format.arrayFormat == OneArrayEntryPerLine) {
695
+ match (implHelper.printNewLine) {
696
+ Some(printNewLine) => printNewLine(),
697
+ None => void,
698
+ }
699
+ }
700
+
701
+ let elemLevel = indentationLevel + 1
702
+
703
+ if (format.arrayFormat == OneArrayEntryPerLine) {
704
+ match (implHelper.printIndentation) {
705
+ Some(printIndentation) => printIndentation(elemLevel),
706
+ None => void,
707
+ }
708
+ }
709
+
710
+ match (printElement(e, implHelper, elemLevel)) {
711
+ None => void,
712
+ err => return err,
713
+ }
714
+
715
+ if (format.arrayFormat == OneArrayEntryPerLine) {
716
+ match (implHelper.printNewLine) {
717
+ Some(printNewLine) => printNewLine(),
718
+ None => void,
719
+ }
720
+ match (implHelper.printIndentation) {
721
+ Some(printIndentation) => printIndentation(indentationLevel),
722
+ None => void,
723
+ }
724
+ }
725
+
726
+ Buffer.addChar(']', buffer)
727
+
728
+ return None
729
+ },
730
+ [initialHead, ...initialRest] => {
731
+ let format = implHelper.format
732
+
733
+ Buffer.addChar('[', buffer)
734
+
735
+ if (format.arrayFormat == OneArrayEntryPerLine) {
736
+ match (implHelper.printNewLine) {
737
+ Some(printNewLine) => printNewLine(),
738
+ None => void,
739
+ }
740
+ }
741
+
742
+ let mut currentHead = initialHead
743
+ let mut currentRest = initialRest
744
+
745
+ let elemLevel = indentationLevel + 1
746
+
747
+ for (let mut index = 0;; index += 1) {
748
+ if (index > 0) {
749
+ Buffer.addChar(',', buffer)
750
+ if (format.arrayFormat == SpacedArrayEntries) {
751
+ Buffer.addChar(' ', buffer)
752
+ }
753
+
754
+ if (format.arrayFormat == OneArrayEntryPerLine) {
755
+ match (implHelper.printNewLine) {
756
+ Some(printNewLine) => printNewLine(),
757
+ None => void,
758
+ }
759
+ }
760
+ }
761
+
762
+ if (format.arrayFormat == OneArrayEntryPerLine) {
763
+ match (implHelper.printIndentation) {
764
+ Some(printIndentation) => printIndentation(elemLevel),
765
+ None => void,
766
+ }
767
+ }
768
+
769
+ match (printElement(currentHead, implHelper, elemLevel)) {
770
+ None => void,
771
+ err => return err,
772
+ }
773
+
774
+ match (currentRest) {
775
+ [] => break,
776
+ [newHead, ...newRest] => {
777
+ currentHead = newHead
778
+ currentRest = newRest
779
+ },
780
+ }
781
+ }
782
+
783
+ if (format.arrayFormat == OneArrayEntryPerLine) {
784
+ match (implHelper.printNewLine) {
785
+ Some(printNewLine) => printNewLine(),
786
+ None => void,
787
+ }
788
+ match (implHelper.printIndentation) {
789
+ Some(printIndentation) => printIndentation(indentationLevel),
790
+ None => void,
791
+ }
792
+ }
793
+
794
+ Buffer.addChar(']', buffer)
795
+
796
+ return None
797
+ },
798
+ }
799
+ },
800
+ JsonObject(entries) => {
801
+ match (entries) {
802
+ [] => {
803
+ Buffer.addChar('{', buffer)
804
+ if (implHelper.format.objectFormat == SpacedObjectEntries) {
805
+ Buffer.addChar(' ', buffer)
806
+ }
807
+ Buffer.addChar('}', buffer)
808
+ return None
809
+ },
810
+ [(key, value)] => {
811
+ let format = implHelper.format
812
+
813
+ Buffer.addChar('{', buffer)
814
+
815
+ let elemLevel = indentationLevel + 1
816
+
817
+ if (format.objectFormat == OneObjectEntryPerLine) {
818
+ match (implHelper.printNewLine) {
819
+ Some(printNewLine) => printNewLine(),
820
+ None => void,
821
+ }
822
+ match (implHelper.printIndentation) {
823
+ Some(printIndentation) => printIndentation(elemLevel),
824
+ None => void,
825
+ }
826
+ }
827
+
828
+ implHelper.emitEscapedQuotedString(key)
829
+
830
+ Buffer.addChar(':', buffer)
831
+ match (format.objectFormat) {
832
+ CompactObjectEntries => void,
833
+ SpacedObjectEntries | OneObjectEntryPerLine => {
834
+ Buffer.addChar(' ', buffer)
835
+ },
836
+ }
837
+
838
+ match (printElement(value, implHelper, elemLevel)) {
839
+ None => void,
840
+ err => return err,
841
+ }
842
+
843
+ if (format.objectFormat == OneObjectEntryPerLine) {
844
+ match (implHelper.printNewLine) {
845
+ Some(printNewLine) => printNewLine(),
846
+ None => void,
847
+ }
848
+ match (implHelper.printIndentation) {
849
+ Some(printIndentation) => printIndentation(indentationLevel),
850
+ None => void,
851
+ }
852
+ }
853
+
854
+ Buffer.addChar('}', buffer)
855
+
856
+ return None
857
+ },
858
+ [initialHead, ...initialRest] => {
859
+ let format = implHelper.format
860
+
861
+ Buffer.addChar('{', buffer)
862
+
863
+ if (format.objectFormat == OneObjectEntryPerLine) {
864
+ match (implHelper.printNewLine) {
865
+ Some(printNewLine) => printNewLine(),
866
+ None => void,
867
+ }
868
+ }
869
+
870
+ let mut currentHead = initialHead
871
+ let mut currentRest = initialRest
872
+
873
+ let elemLevel = indentationLevel + 1
874
+
875
+ for (let mut index = 0;; index += 1) {
876
+ if (index > 0) {
877
+ Buffer.addChar(',', buffer)
878
+ if (format.objectFormat == SpacedObjectEntries) {
879
+ Buffer.addChar(' ', buffer)
880
+ }
881
+
882
+ if (format.objectFormat == OneObjectEntryPerLine) {
883
+ match (implHelper.printNewLine) {
884
+ Some(printNewLine) => printNewLine(),
885
+ None => void,
886
+ }
887
+ }
888
+ }
889
+
890
+ if (format.objectFormat == OneObjectEntryPerLine) {
891
+ match (implHelper.printIndentation) {
892
+ Some(printIndentation) => printIndentation(elemLevel),
893
+ None => void,
894
+ }
895
+ }
896
+
897
+ let (key, value) = currentHead
898
+
899
+ implHelper.emitEscapedQuotedString(key)
900
+
901
+ Buffer.addChar(':', buffer)
902
+ match (format.objectFormat) {
903
+ CompactObjectEntries => void,
904
+ SpacedObjectEntries | OneObjectEntryPerLine => {
905
+ Buffer.addChar(' ', buffer)
906
+ },
907
+ }
908
+
909
+ match (printElement(value, implHelper, elemLevel)) {
910
+ None => void,
911
+ err => return err,
912
+ }
913
+
914
+ match (currentRest) {
915
+ [] => break,
916
+ [newHead, ...newRest] => {
917
+ currentHead = newHead
918
+ currentRest = newRest
919
+ },
920
+ }
921
+ }
922
+
923
+ if (format.objectFormat == OneObjectEntryPerLine) {
924
+ match (implHelper.printNewLine) {
925
+ Some(printNewLine) => printNewLine(),
926
+ None => void,
927
+ }
928
+ match (implHelper.printIndentation) {
929
+ Some(printIndentation) => printIndentation(indentationLevel),
930
+ None => void,
931
+ }
932
+ }
933
+
934
+ Buffer.addChar('}', buffer)
935
+
936
+ return None
937
+ },
938
+ }
939
+ },
940
+ }
941
+ }
942
+
943
+ let isCodePointInBasicMultilingualPlane = (code: Number) =>
944
+ code >= 0x0000 && code <= 0xFFFF
945
+
946
+ let isHighSurrogate = (code: Number) => code >= 0xD800 && code <= 0xDBFF
947
+
948
+ let isLowSurrogate = (code: Number) => code >= 0xDC00 && code <= 0xDFFF
949
+
950
+ let combineSurrogatePairToCodePoint = (
951
+ highSurrogate: Number,
952
+ lowSurrogate: Number,
953
+ ) => {
954
+ // If this was a method exposed by itself in a library then it should check the
955
+ // ranges of the input surrogates, but here it's necessary because checks are made
956
+ // as part of the parsing logic.
957
+ ((highSurrogate - 0xD800) << 10) + (lowSurrogate - 0xDC00) + 0x10000
958
+ }
959
+
960
+ let makeJsonWriter = (format: FormattingSettings, buffer: Buffer.Buffer) => {
961
+ let printNewLine = match (format.lineEnding) {
962
+ NoLineEnding => None,
963
+ LineFeed => Some(() => {
964
+ Buffer.addChar('\n', buffer)
965
+ }),
966
+ CarriageReturnLineFeed => Some(() => {
967
+ Buffer.addChar('\r', buffer)
968
+ Buffer.addChar('\n', buffer)
969
+ }),
970
+ CarriageReturn => Some(() => {
971
+ Buffer.addChar('\r', buffer)
972
+ }),
973
+ }
974
+
975
+ let printIndentation = match (format.indentation) {
976
+ IndentWithTab => Some(indentationLevel => {
977
+ for (let mut count = 0; count < indentationLevel; count += 1) {
978
+ Buffer.addChar('\t', buffer)
979
+ }
980
+ }),
981
+ // Implement fast path, for common indentation level to avoid closure
982
+ IndentWithSpaces(spacesPerIndentation) when spacesPerIndentation == 2 =>
983
+ Some(indentationLevel => {
984
+ let spaceCount = indentationLevel * 2
985
+ for (let mut count = 0; count < spaceCount; count += 1) {
986
+ Buffer.addChar(' ', buffer)
987
+ }
988
+ }),
989
+ // Implement fast path, for common indentation level to avoid closure
990
+ IndentWithSpaces(spacesPerIndentation) when spacesPerIndentation == 4 =>
991
+ Some(indentationLevel => {
992
+ let spaceCount = indentationLevel * 4
993
+ for (let mut count = 0; count < spaceCount; count += 1) {
994
+ Buffer.addChar(' ', buffer)
995
+ }
996
+ }),
997
+ IndentWithSpaces(spacesPerIndentation) => Some(indentationLevel => {
998
+ let spaceCount = indentationLevel * spacesPerIndentation
999
+ for (let mut count = 0; count < spaceCount; count += 1) {
1000
+ Buffer.addChar(' ', buffer)
1001
+ }
1002
+ }),
1003
+ NoIndentation => None,
1004
+ }
1005
+
1006
+ // A possible optimization to make this faster would be to
1007
+ // prepare a different closure for each combination of escaping options.
1008
+ // This way unnecessary branching is avoided.
1009
+ // The most important thing is that the non pretty printed format is optimized for
1010
+ // as this is where the performance is most likely to matter.
1011
+
1012
+ // In every case code points 0..31 must be escaped as
1013
+ // required by ECMA-404 (the so called "C0" control point group).
1014
+
1015
+ // For the non pretty printed case it is fastest to escape only what is
1016
+ // strictly required to avoid increasing output size
1017
+ // But for pretty printing or compatibility it may be desirable to escape other control points
1018
+ // or even everything other than printable ASCII characters.
1019
+ // for this reason options for this control has been exposed otherwise
1020
+ // just a sane default would suffice.
1021
+ // Additionally many JSON libraries escape additional two character
1022
+ // sequences for direct embedding into html for example. This is
1023
+ // specifically to avoid emitting the sequence "</" like in "</script>".
1024
+ // The lazy approach would be to just escape the slash (which can become
1025
+ // "\\/", not necessarily "\u002F"). This more conservative approach only
1026
+ // escapes it when needed, but requires to keep track of the previous code
1027
+ // point in the iteration so it's more complicated and handled separately.
1028
+ let emitCodePoint = if (
1029
+ !format.escapeAllControlPoints &&
1030
+ !format.escapeNonASCII
1031
+ ) {
1032
+ (codePoint: Number) => {
1033
+ if (codePoint > 31 && codePoint != 0x0022 && codePoint != 0x005C) {
1034
+ Buffer.addCharFromCodePoint(codePoint, buffer)
1035
+ } else {
1036
+ emitEscapedCodePoint(codePoint, buffer)
1037
+ }
1038
+ }
1039
+ } else if (!format.escapeAllControlPoints && format.escapeNonASCII) {
1040
+ // If desired, escape all non ASCII code points. So the only non
1041
+ // escaped code points are those in the range of ASCII characters
1042
+ // from 31 to 127.
1043
+ (codePoint: Number) => {
1044
+ if (
1045
+ codePoint > 31 &&
1046
+ codePoint != 0x0022 &&
1047
+ codePoint != 0x005C &&
1048
+ codePoint < 128
1049
+ ) {
1050
+ Buffer.addCharFromCodePoint(codePoint, buffer)
1051
+ } else {
1052
+ emitEscapedCodePoint(codePoint, buffer)
1053
+ }
1054
+ }
1055
+ } else if (format.escapeAllControlPoints && !format.escapeNonASCII) {
1056
+ // If desired, in addition to the required 0..31 control points,
1057
+ // also escape unicode control point group C1 (128-159).
1058
+ // There could be more control points or otherwise escape worthy
1059
+ // codepoints, but covering that would be overkill.
1060
+ (codePoint: Number) => {
1061
+ if (
1062
+ codePoint > 31 &&
1063
+ codePoint != 0x0022 &&
1064
+ codePoint != 0x005C &&
1065
+ codePoint < 127 ||
1066
+ codePoint > 159
1067
+ ) {
1068
+ Buffer.addCharFromCodePoint(codePoint, buffer)
1069
+ } else {
1070
+ emitEscapedCodePoint(codePoint, buffer)
1071
+ }
1072
+ }
1073
+ } else {
1074
+ // And this is just the combination of both flags, which means
1075
+ // doing almost the same as for the case above for
1076
+ // escapeNonASCII=true, but also escape the ASCII control codepoint
1077
+ // 127 (Delete).
1078
+ (codePoint: Number) => {
1079
+ if (
1080
+ codePoint > 31 &&
1081
+ codePoint != 0x0022 &&
1082
+ codePoint != 0x005C &&
1083
+ codePoint < 127
1084
+ ) {
1085
+ // fast path for chars that never need any escaping
1086
+ Buffer.addCharFromCodePoint(codePoint, buffer)
1087
+ } else {
1088
+ emitEscapedCodePoint(codePoint, buffer)
1089
+ }
1090
+ }
1091
+ }
1092
+
1093
+ let emitEscapedQuotedString = if (!format.escapeHTMLUnsafeSequences) {
1094
+ (s: String) => {
1095
+ Buffer.addChar('"', buffer)
1096
+
1097
+ // Note that it's important for performance that the closure passed to forEachCodePoint
1098
+ // is not allocated inline here, but just once when creating the writer.
1099
+
1100
+ String.forEachCodePoint(emitCodePoint, s)
1101
+
1102
+ Buffer.addChar('"', buffer)
1103
+ }
1104
+ } else {
1105
+ // Special handling for the escapeHTMLUnsafeSequences flag.
1106
+ // Escaping a sequence requires keeping track of previous characters,
1107
+ // which is difficult and suboptimal when using a function to iterate
1108
+ // the input string. So we don't want to pay the price in other cases.
1109
+ // This cannot be done just in the emitCodePoint function.
1110
+ // It could be possible to implement more optimally, but would
1111
+ // complicate things even more than this.
1112
+ (s: String) => {
1113
+ Buffer.addChar('"', buffer)
1114
+
1115
+ let mut prevCodePoint = 0
1116
+
1117
+ String.forEachCodePoint(codePoint => {
1118
+ if (codePoint == 47) {
1119
+ if (prevCodePoint == 60) {
1120
+ Buffer.addChar('\\', buffer)
1121
+ Buffer.addChar('/', buffer)
1122
+ } else {
1123
+ // otherwise just emit the slash as-is
1124
+ Buffer.addChar('/', buffer)
1125
+ }
1126
+ } else {
1127
+ emitCodePoint(codePoint)
1128
+ }
1129
+
1130
+ prevCodePoint = codePoint
1131
+ }, s)
1132
+
1133
+ Buffer.addChar('"', buffer)
1134
+ }
1135
+ }
1136
+
1137
+ let implHelper = {
1138
+ format,
1139
+ buffer,
1140
+ emitEscapedQuotedString,
1141
+ printNewLine,
1142
+ printIndentation,
1143
+ }: JsonWriterConfig
1144
+
1145
+ { emit: json => {
1146
+ match (printElement(json, implHelper, 0)) {
1147
+ None => void,
1148
+ err => return err,
1149
+ }
1150
+ if (format.finishWithNewLine) {
1151
+ match (printNewLine) {
1152
+ Some(printNewLine) => printNewLine(),
1153
+ None => void,
1154
+ }
1155
+ }
1156
+ return None
1157
+ }, }: JsonWriter
1158
+ }
1159
+
1160
+ /**
1161
+ * Converts the `Json` data structure into a JSON string with specific formatting settings.
1162
+ *
1163
+ * @param format: Formatting options
1164
+ * @param json: The `Json` data structure to convert
1165
+ * @returns `Ok(str)` containing the JSON string or `Err(err)` if the provided `Json` data structure cannot be converted to a string
1166
+ *
1167
+ * @example
1168
+ * assert toString(
1169
+ * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))]
1170
+ * ) == Ok("{\"currency\":\"€\",\"price\":99.9}")
1171
+ * @example
1172
+ * assert toString(
1173
+ * format=Compact
1174
+ * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))])
1175
+ * ) == Ok("{\"currency\":\"€\",\"price\":99.9}")
1176
+ * @example
1177
+ * assert toString(
1178
+ * format=Pretty,
1179
+ * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))])
1180
+ * ) == Ok("{
1181
+ * \"currency\": \"€\",
1182
+ * \"price\": 99.9
1183
+ * }")
1184
+ * @example
1185
+ * assert toString(
1186
+ * format=Custom{
1187
+ * indentation: NoIndentation,
1188
+ * arrayFormat: CompactArrayEntries,
1189
+ * objectFormat: CompactObjectEntries,
1190
+ * lineEnding: NoLineEnding,
1191
+ * finishWithNewLine: false,
1192
+ * escapeAllControlPoints: true,
1193
+ * escapeHTMLUnsafeSequences: true,
1194
+ * escapeNonASCII: true,
1195
+ * },
1196
+ * JsonObject([("currency", JsonString("€")), ("price", JsonNumber(99.9))])
1197
+ * ) == Ok("{\"currency\":\"\\u20ac\",\"price\":99.9}")
1198
+ *
1199
+ * @since v0.6.0
1200
+ */
1201
+ provide let toString = (format=Compact, json: Json) => {
1202
+ let buf = Buffer.make(16)
1203
+ let format = match (format) {
1204
+ Pretty =>
1205
+ {
1206
+ indentation: IndentWithSpaces(2),
1207
+ arrayFormat: OneArrayEntryPerLine,
1208
+ objectFormat: OneObjectEntryPerLine,
1209
+ lineEnding: LineFeed,
1210
+ finishWithNewLine: true,
1211
+ escapeAllControlPoints: true,
1212
+ escapeHTMLUnsafeSequences: false,
1213
+ escapeNonASCII: false,
1214
+ },
1215
+ Compact =>
1216
+ {
1217
+ indentation: NoIndentation,
1218
+ arrayFormat: CompactArrayEntries,
1219
+ objectFormat: CompactObjectEntries,
1220
+ lineEnding: NoLineEnding,
1221
+ finishWithNewLine: false,
1222
+ escapeAllControlPoints: false,
1223
+ escapeHTMLUnsafeSequences: false,
1224
+ escapeNonASCII: false,
1225
+ },
1226
+ PrettyAndSafe =>
1227
+ {
1228
+ indentation: IndentWithSpaces(2),
1229
+ arrayFormat: OneArrayEntryPerLine,
1230
+ objectFormat: OneObjectEntryPerLine,
1231
+ lineEnding: LineFeed,
1232
+ finishWithNewLine: true,
1233
+ escapeAllControlPoints: true,
1234
+ escapeHTMLUnsafeSequences: true,
1235
+ escapeNonASCII: true,
1236
+ },
1237
+ CompactAndSafe =>
1238
+ {
1239
+ indentation: NoIndentation,
1240
+ arrayFormat: CompactArrayEntries,
1241
+ objectFormat: CompactObjectEntries,
1242
+ lineEnding: NoLineEnding,
1243
+ finishWithNewLine: false,
1244
+ escapeAllControlPoints: true,
1245
+ escapeHTMLUnsafeSequences: true,
1246
+ escapeNonASCII: true,
1247
+ },
1248
+ Custom{
1249
+ indentation,
1250
+ arrayFormat,
1251
+ objectFormat,
1252
+ lineEnding,
1253
+ finishWithNewLine,
1254
+ escapeAllControlPoints,
1255
+ escapeHTMLUnsafeSequences,
1256
+ escapeNonASCII,
1257
+ } =>
1258
+ {
1259
+ indentation,
1260
+ arrayFormat,
1261
+ objectFormat,
1262
+ lineEnding,
1263
+ finishWithNewLine,
1264
+ escapeAllControlPoints,
1265
+ escapeHTMLUnsafeSequences,
1266
+ escapeNonASCII,
1267
+ },
1268
+ }
1269
+ let writer = makeJsonWriter(format, buf)
1270
+ let error = writer.emit(json)
1271
+
1272
+ match (error) {
1273
+ None => Ok(Buffer.toString(buf)),
1274
+ Some(e) => Err(e),
1275
+ }
1276
+ }
1277
+
1278
+ /**
1279
+ * Represents errors for JSON parsing along with a human readable message.
1280
+ */
1281
+ provide enum JsonParseError {
1282
+ UnexpectedEndOfInput(String),
1283
+ UnexpectedToken(String),
1284
+ InvalidUTF16SurrogatePair(String),
1285
+ }
1286
+
1287
+ /*
1288
+ * Internal data structure used during parsing.
1289
+ */
1290
+ record JsonParserState {
1291
+ string: String,
1292
+ bufferParse: Buffer.Buffer,
1293
+ mut currentCodePoint: Number,
1294
+ mut pos: Number,
1295
+ mut bytePos: Number,
1296
+ }
1297
+
1298
+ let isInterTokenWhiteSpace = (codePoint: Number) => {
1299
+ match (codePoint) {
1300
+ 0x0009 => true, // tab
1301
+ 0x000A => true, // line feed
1302
+ 0x000D => true, // carriage return
1303
+ 0x0020 => true, // space
1304
+ _ => false,
1305
+ }
1306
+ }
1307
+
1308
+ let _END_OF_INPUT = -1
1309
+
1310
+ @unsafe
1311
+ let rec readCodePoint = (bytePosition: Number, string: String) => {
1312
+ use WasmI32.{ (+), (<) }
1313
+
1314
+ let strPtr = WasmI32.fromGrain(string)
1315
+
1316
+ let byteSize = WasmI32.load(strPtr, 4n)
1317
+
1318
+ let bytePositionW32 = coerceNumberToWasmI32(bytePosition)
1319
+
1320
+ let ptr = strPtr + 8n + bytePositionW32
1321
+
1322
+ if (bytePositionW32 < byteSize) {
1323
+ let codePoint = getCodePoint(ptr)
1324
+ tagSimpleNumber(codePoint)
1325
+ } else {
1326
+ _END_OF_INPUT
1327
+ }
1328
+ }
1329
+
1330
+ let codePointUTF8ByteCount = (usv: Number) => {
1331
+ if (!Char.isValid(usv)) {
1332
+ fail "Impossible: JSON parser encountered an invalid unicode scalar value in codePointUTF8ByteCount"
1333
+ }
1334
+
1335
+ if (usv <= 0x7f) {
1336
+ 1
1337
+ } else if (usv <= 0x7ff) {
1338
+ 2
1339
+ } else if (usv <= 0xffff) {
1340
+ 3
1341
+ } else {
1342
+ 4
1343
+ }
1344
+ }
1345
+
1346
+ let isAtEndOfInput = (parserState: JsonParserState) => {
1347
+ parserState.currentCodePoint == _END_OF_INPUT
1348
+ }
1349
+
1350
+ let next = (parserState: JsonParserState) => {
1351
+ let mut c = parserState.currentCodePoint
1352
+ if (c != _END_OF_INPUT) {
1353
+ parserState.bytePos += codePointUTF8ByteCount(c)
1354
+
1355
+ c = readCodePoint(parserState.bytePos, parserState.string)
1356
+
1357
+ parserState.currentCodePoint = c
1358
+ parserState.pos += 1
1359
+ }
1360
+ c
1361
+ }
1362
+
1363
+ let skipWhiteSpace = (parserState: JsonParserState) => {
1364
+ // isAtEndOfInput is not strictly necessary here
1365
+ // could remove as an optimization
1366
+ while (
1367
+ isInterTokenWhiteSpace(parserState.currentCodePoint) &&
1368
+ !isAtEndOfInput(parserState)
1369
+ ) {
1370
+ next(parserState)
1371
+ void
1372
+ }
1373
+ }
1374
+
1375
+ let buildUnexpectedTokenError = (parserState: JsonParserState, detail: String) => {
1376
+ let codePoint = parserState.currentCodePoint
1377
+ let pos = parserState.pos
1378
+ if (codePoint == _END_OF_INPUT) {
1379
+ UnexpectedEndOfInput(
1380
+ "Unexpected token at position " ++ runtimeToString(pos) ++ ": " ++ detail,
1381
+ )
1382
+ } else {
1383
+ UnexpectedToken(
1384
+ "Unexpected token at position " ++ runtimeToString(pos) ++ ": " ++ detail,
1385
+ )
1386
+ }
1387
+ }
1388
+
1389
+ @unsafe
1390
+ let toHex = (n: Number) => {
1391
+ let x = coerceNumberToWasmI32(n)
1392
+ NumberUtils.itoa32(x, 16n)
1393
+ }
1394
+
1395
+ let toHexWithZeroPadding = (n: Number, padTo: Number) => {
1396
+ // Note that this function is only called in exceptional cases so no effort
1397
+ // was made to optimize it.
1398
+ let mut result = toHex(n)
1399
+ for (let mut i = String.length(result); i < padTo; i += 1) {
1400
+ result = "0" ++ result
1401
+ }
1402
+ result
1403
+ }
1404
+
1405
+ let formatCodePointOrEOF = (codePoint: Number) => {
1406
+ if (codePoint >= 32 && codePoint <= 126) {
1407
+ // If the codepoint is in the range of printable ASCII characters, then
1408
+ // display the character itself . Whether it's a good idea to display
1409
+ // all of them, especially space is up for debate.
1410
+ "'" ++ Char.toString(Char.fromCode(codePoint)) ++ "'"
1411
+ } else if (codePoint == -1) {
1412
+ // Special case for value used by the parsing code to avoid heap allocations.
1413
+ "end of input"
1414
+ } else {
1415
+ // Format any other code point as hexadecimal value.
1416
+ "U+" ++ toHexWithZeroPadding(codePoint, 4)
1417
+ }
1418
+ }
1419
+
1420
+ let expectCodePointAndAdvance = (
1421
+ expectedCodePoint: Number,
1422
+ parserState: JsonParserState,
1423
+ ) => {
1424
+ let c = parserState.currentCodePoint
1425
+ if (c == expectedCodePoint) {
1426
+ next(parserState)
1427
+ None
1428
+ } else {
1429
+ let detail = "expected " ++
1430
+ formatCodePointOrEOF(expectedCodePoint) ++
1431
+ ", found " ++
1432
+ formatCodePointOrEOF(c)
1433
+ Some(buildUnexpectedTokenError(parserState, detail))
1434
+ }
1435
+ }
1436
+ let atoiFast = buffer => {
1437
+ let bufLen = Buffer.length(buffer)
1438
+ let mut result = 0
1439
+ for (let mut i = 0; i < bufLen; i += 1) {
1440
+ use Uint8.{ (-) }
1441
+ result = (result << 1) +
1442
+ (result << 3) +
1443
+ Uint8.toNumber(Buffer.getUint8(i, buffer) - 48us)
1444
+ }
1445
+ result
1446
+ }
1447
+ let rec parseValue = (parserState: JsonParserState) => {
1448
+ skipWhiteSpace(parserState)
1449
+
1450
+ let result = match (parserState.currentCodePoint) {
1451
+ 0x7B => parseObject(parserState), // '{'
1452
+ 0x5B => parseArray(parserState), // '['
1453
+ 0x22 => parseStringValue(parserState), // '"'
1454
+ 0x74 => parseTrueValue(parserState), // 't'
1455
+ 0x66 => parseFalseValue(parserState), // 'f'
1456
+ 0x6E => parseNullValue(parserState), // 'n'
1457
+ // Numbers
1458
+ 0x30 => parseNumberValue(parserState), // '0'
1459
+ 0x31 => parseNumberValue(parserState), // '1'
1460
+ 0x32 => parseNumberValue(parserState), // '2'
1461
+ 0x33 => parseNumberValue(parserState), // '3'
1462
+ 0x34 => parseNumberValue(parserState), // '4'
1463
+ 0x35 => parseNumberValue(parserState), // '5'
1464
+ 0x36 => parseNumberValue(parserState), // '6'
1465
+ 0x37 => parseNumberValue(parserState), // '7'
1466
+ 0x38 => parseNumberValue(parserState), // '8'
1467
+ 0x39 => parseNumberValue(parserState), // '9'
1468
+ 0x2D => parseNumberValue(parserState), // '-'
1469
+ c => {
1470
+ let detail = "expected start of a JSON value, found " ++
1471
+ formatCodePointOrEOF(c)
1472
+ Err(buildUnexpectedTokenError(parserState, detail))
1473
+ },
1474
+ }
1475
+
1476
+ skipWhiteSpace(parserState)
1477
+
1478
+ result
1479
+ }
1480
+ and parseNullValue = (parserState: JsonParserState) => {
1481
+ match (expectCodePointAndAdvance(0x6E, parserState)) {
1482
+ // 'n'
1483
+ Some(e) => Err(e),
1484
+ None => {
1485
+ match (expectCodePointAndAdvance(0x75, parserState)) {
1486
+ // 'u'
1487
+ Some(e) => Err(e),
1488
+ None => {
1489
+ match (expectCodePointAndAdvance(0x6C, parserState)) {
1490
+ // 'l'
1491
+ Some(e) => Err(e),
1492
+ None => {
1493
+ match (expectCodePointAndAdvance(0x6C, parserState)) {
1494
+ // 'l'
1495
+ Some(e) => Err(e),
1496
+ None => Ok(JsonNull),
1497
+ }
1498
+ },
1499
+ }
1500
+ },
1501
+ }
1502
+ },
1503
+ }
1504
+ }
1505
+ and parseTrueValue = (parserState: JsonParserState) => {
1506
+ match (expectCodePointAndAdvance(0x74, parserState)) {
1507
+ // 't'
1508
+ Some(e) => Err(e),
1509
+ None => {
1510
+ match (expectCodePointAndAdvance(0x72, parserState)) {
1511
+ // 'r'
1512
+ Some(e) => Err(e),
1513
+ None => {
1514
+ match (expectCodePointAndAdvance(0x75, parserState)) {
1515
+ // 'u'
1516
+ Some(e) => Err(e),
1517
+ None => {
1518
+ match (expectCodePointAndAdvance(0x65, parserState)) {
1519
+ // 'e'
1520
+ Some(e) => Err(e),
1521
+ None => Ok(JsonBoolean(true)),
1522
+ }
1523
+ },
1524
+ }
1525
+ },
1526
+ }
1527
+ },
1528
+ }
1529
+ }
1530
+ and parseFalseValue = (parserState: JsonParserState) => {
1531
+ match (expectCodePointAndAdvance(0x66, parserState)) {
1532
+ // 'f'
1533
+ Some(e) => Err(e),
1534
+ None => {
1535
+ match (expectCodePointAndAdvance(0x61, parserState)) {
1536
+ // 'a'
1537
+ Some(e) => Err(e),
1538
+ None => {
1539
+ match (expectCodePointAndAdvance(0x6C, parserState)) {
1540
+ // 'l'
1541
+ Some(e) => Err(e),
1542
+ None => {
1543
+ match (expectCodePointAndAdvance(0x73, parserState)) {
1544
+ // 's'
1545
+ Some(e) => Err(e),
1546
+ None => {
1547
+ match (expectCodePointAndAdvance(0x65, parserState)) {
1548
+ // 'e'
1549
+ Some(e) => Err(e),
1550
+ None => Ok(JsonBoolean(false)),
1551
+ }
1552
+ },
1553
+ }
1554
+ },
1555
+ }
1556
+ },
1557
+ }
1558
+ },
1559
+ }
1560
+ }
1561
+ and parseString = (parserState: JsonParserState) => {
1562
+ match (expectCodePointAndAdvance(0x22, parserState)) {
1563
+ // '"'
1564
+ Some(e) => return Err(e),
1565
+ None => {
1566
+ let buffer = parserState.bufferParse
1567
+ Buffer.clear(buffer)
1568
+
1569
+ while (true) {
1570
+ match (parserState.currentCodePoint) {
1571
+ 0x22 => { // '"'
1572
+ next(parserState)
1573
+ break
1574
+ },
1575
+ -1 => { // EOF
1576
+ return Err(
1577
+ buildUnexpectedTokenError(
1578
+ parserState,
1579
+ "unexpected end of string value"
1580
+ ),
1581
+ )
1582
+ },
1583
+ 0x5C => { // '\'
1584
+ // Keep the starting position for better error reporting.
1585
+ let escapeStartPos = parserState.pos
1586
+
1587
+ next(parserState)
1588
+
1589
+ match (parserState.currentCodePoint) {
1590
+ 0x22 => { // '"'
1591
+ Buffer.addChar('"', buffer)
1592
+ ignore(next(parserState))
1593
+ },
1594
+ 0x5C => { // '\'
1595
+ Buffer.addChar('\\', buffer)
1596
+ ignore(next(parserState))
1597
+ },
1598
+ 0x2F => { // '/'
1599
+ Buffer.addChar('/', buffer)
1600
+ ignore(next(parserState))
1601
+ },
1602
+ 0x62 => { // letter 'b' as in Backspace
1603
+ // emit backspace control code
1604
+ Buffer.addChar('\u{08}', buffer)
1605
+ ignore(next(parserState))
1606
+ },
1607
+ 0x66 => { // letter 'f' as in Form Feed
1608
+ // emit Form Feed control code
1609
+ Buffer.addChar('\u{0C}', buffer)
1610
+ ignore(next(parserState))
1611
+ },
1612
+ 0x6E => { // letter 'n' as in New line
1613
+ // emit Line Feed control code
1614
+ Buffer.addChar('\u{0A}', buffer)
1615
+ ignore(next(parserState))
1616
+ },
1617
+ 0x72 => { // letter 'r' as in carriage Return
1618
+ // emit Carriage Return control code
1619
+ Buffer.addChar('\u{0D}', buffer)
1620
+ ignore(next(parserState))
1621
+ },
1622
+ 0x74 => { // letter 't' as in Tab
1623
+ // emit Tab control code
1624
+ Buffer.addChar('\u{09}', buffer)
1625
+ ignore(next(parserState))
1626
+ },
1627
+ 0x75 => { // 'u' (start of hexadecimal UTF-16 escape sequence)
1628
+ next(parserState)
1629
+
1630
+ // The escape sequence can either be a standalone code point or
1631
+ // a UTF-16 surrogate pair made of two code units that have to
1632
+ // be combined to form a code point. This is legacy of
1633
+ // JavaScript's UTF-16 string representation, despite JSON
1634
+ // mandating UTF-8 (kind of, as stated in rfc8259: "JSON text
1635
+ // exchanged between systems that are not part of a closed
1636
+ // ecosystem MUST be encoded using UTF-8").
1637
+ // This would be easy to do using a function for shared logic,
1638
+ // but in order to avoid heap allocation I've chosen to instead
1639
+ // use a loop and local state.
1640
+
1641
+ let mut highSurrogate = -1
1642
+
1643
+ while (true) {
1644
+ let mut codeUnit = 0
1645
+
1646
+ for (
1647
+ let mut digitIndex = 3;
1648
+ digitIndex >= 0;
1649
+ digitIndex -= 1
1650
+ ) {
1651
+ let hexDigitCodePoint = parserState.currentCodePoint
1652
+
1653
+ let mut digit = hexDigitCodePoint
1654
+
1655
+ if (hexDigitCodePoint >= 48 && hexDigitCodePoint <= 57) { // 0..9
1656
+ digit -= 48
1657
+ } else if (
1658
+ hexDigitCodePoint >= 65 &&
1659
+ hexDigitCodePoint <= 70
1660
+ ) { // A..F
1661
+ digit -= 55 // (65 - 10)
1662
+ } else if (
1663
+ hexDigitCodePoint >= 97 &&
1664
+ hexDigitCodePoint <= 102
1665
+ ) { // a..f
1666
+ digit -= 87 // (97 - 10)
1667
+ } else {
1668
+ let digitsSoFar = 3 - digitIndex
1669
+ let detail =
1670
+ "expected exactly 4 hexadecimal digits in the UTF-16 escape sequence, found only " ++
1671
+ runtimeToString(digitsSoFar)
1672
+ return Err(buildUnexpectedTokenError(parserState, detail))
1673
+ }
1674
+
1675
+ let shift = digitIndex * 4
1676
+ codeUnit = codeUnit | digit << shift
1677
+
1678
+ ignore(next(parserState))
1679
+ }
1680
+
1681
+ if (highSurrogate == -1) {
1682
+ // This is the first iteration of the loop.
1683
+ // The code unit should either be the high surrogate of the
1684
+ // pair or a full code point in the Basic Multilingual
1685
+ // Plane (U+0000..U+FFFF).
1686
+ if (isHighSurrogate(codeUnit)) {
1687
+ // Next characters should be "\u"
1688
+ // '\'
1689
+ match (expectCodePointAndAdvance(0x5C, parserState)) {
1690
+ Some(e) => return Err(e),
1691
+ None => void,
1692
+ }
1693
+ // 'u'
1694
+ match (expectCodePointAndAdvance(0x75, parserState)) {
1695
+ Some(e) => return Err(e),
1696
+ None => void,
1697
+ }
1698
+
1699
+ // Keep the high surrogate and proceed to the second
1700
+ // iteration of the loop.
1701
+ highSurrogate = codeUnit
1702
+ } else if (
1703
+ isCodePointInBasicMultilingualPlane(codeUnit) &&
1704
+ !isLowSurrogate(codeUnit)
1705
+ ) {
1706
+ let codePoint = codeUnit
1707
+ Buffer.addCharFromCodePoint(codePoint, buffer)
1708
+ break
1709
+ } else {
1710
+ let message =
1711
+ "Invalid character escape sequence at position " ++
1712
+ runtimeToString(escapeStartPos) ++
1713
+ ": expected a Unicode code point in Basic Multilingual Plane (U+0000..U+FFFF) or a high surrogate (0xD800..0xDBFF) of a UTF-16 surrogate pair, found " ++
1714
+ "0x" ++
1715
+ toHexWithZeroPadding(codeUnit, 4)
1716
+ return Err(InvalidUTF16SurrogatePair(message))
1717
+ }
1718
+ } else {
1719
+ // This is the second iteration of the loop.
1720
+ // The code unit should be the low surrogate of the pair.
1721
+ if (isLowSurrogate(codeUnit)) {
1722
+ let lowSurrogate = codeUnit
1723
+ let combinedCodePoint = combineSurrogatePairToCodePoint(
1724
+ highSurrogate,
1725
+ lowSurrogate
1726
+ )
1727
+ Buffer.addCharFromCodePoint(combinedCodePoint, buffer)
1728
+ break
1729
+ } else {
1730
+ let message =
1731
+ "Invalid character escape sequence at position " ++
1732
+ runtimeToString(escapeStartPos) ++
1733
+ ": expected a low surrogate (0xDC00..0xDFFF) in the second code unit of the UTF-16 sequence, found " ++
1734
+ "0x" ++
1735
+ toHexWithZeroPadding(codeUnit, 4)
1736
+ return Err(InvalidUTF16SurrogatePair(message))
1737
+ }
1738
+ }
1739
+ }
1740
+ },
1741
+ unexpectedCodePoint => {
1742
+ // JSON doesn't allow arbitrary characters to be preceded by backslash escape.
1743
+ // Only the ones above.
1744
+ let detail =
1745
+ "expected a valid escape sequence or the end of string, found " ++
1746
+ formatCodePointOrEOF(unexpectedCodePoint)
1747
+ return Err(buildUnexpectedTokenError(parserState, detail))
1748
+ },
1749
+ }
1750
+ },
1751
+ c => {
1752
+ if (c >= 0x00 && c <= 0x1F) {
1753
+ return Err(
1754
+ buildUnexpectedTokenError(
1755
+ parserState,
1756
+ "Bad control character in string literal"
1757
+ ),
1758
+ )
1759
+ }
1760
+ // Finally the happy case of a simple unescaped code point.
1761
+ next(parserState)
1762
+ Buffer.addCharFromCodePoint(c, buffer)
1763
+ },
1764
+ }
1765
+ }
1766
+
1767
+ let s = Buffer.toString(buffer)
1768
+ return Ok(s)
1769
+ },
1770
+ }
1771
+ }
1772
+ and parseStringValue = (parserState: JsonParserState) => {
1773
+ match (parseString(parserState)) {
1774
+ Ok(s) => Ok(JsonString(s)),
1775
+ Err(e) => Err(e),
1776
+ }
1777
+ }
1778
+ and parseNumberValue = (parserState: JsonParserState) => {
1779
+ // TODO(#1878): Use a streaming-optimized way to parse numbers
1780
+ let buffer = parserState.bufferParse
1781
+ Buffer.clear(buffer)
1782
+ // First char can optionally be a minus sign.
1783
+ let mut c = parserState.currentCodePoint
1784
+ let mut isFloat = false
1785
+ let isNegative = c == 0x2D
1786
+ // '-'
1787
+ if (isNegative) {
1788
+ c = next(parserState)
1789
+ }
1790
+
1791
+ // After that, the first/second char can only be a decimal digit ('0'..'9').
1792
+ match (c) {
1793
+ 0x30 => { // '0'
1794
+ // JSON doesn't allow numbers with additional leading zeros like
1795
+ // "01". Which means that if a number starts with zero then the
1796
+ // integer part is just zero and the next one can only be one of
1797
+ // '.', 'e' or 'E'. In any case all that needs to be done here is
1798
+ // to advance over the zero character and proceed to the optional
1799
+ // fractional and exponential parts. If another digit follows then
1800
+ // a parsing error will occur as expected, but implicitly because
1801
+ // this function finishes with the parser positioned on a digit
1802
+ // and not on a token expected after a number like ',', ']', '}' or
1803
+ // EOF.
1804
+ Buffer.addCharFromCodePoint(c, buffer)
1805
+ c = next(parserState)
1806
+ },
1807
+ x when x >= 0x31 && x <= 0x39 => { // '1'..'9'
1808
+ while (true) {
1809
+ Buffer.addCharFromCodePoint(c, buffer)
1810
+ c = next(parserState)
1811
+ if (c < 0x30 || c > 0x39) {
1812
+ break
1813
+ }
1814
+ }
1815
+ },
1816
+ unexpectedCodePoint => {
1817
+ // The integer part of the number has to have at least one digit.
1818
+ // JSON doesn't allow numbers starting with decimal separator like ".1".
1819
+ let detail = "expected a decimal digit, found " ++
1820
+ formatCodePointOrEOF(unexpectedCodePoint)
1821
+ return Err(buildUnexpectedTokenError(parserState, detail))
1822
+ },
1823
+ }
1824
+ // Optional fractional part of the number.
1825
+ if (c == 0x2E) { // '.'
1826
+ isFloat = true
1827
+ Buffer.addChar('.', buffer)
1828
+ c = next(parserState)
1829
+ let mut hasHitDigit = false
1830
+ for (; c >= 0x30 && c <= 0x39;) {
1831
+ hasHitDigit = true
1832
+ Buffer.addCharFromCodePoint(c, buffer)
1833
+ c = next(parserState)
1834
+ }
1835
+ if (!hasHitDigit)
1836
+ return Err(
1837
+ buildUnexpectedTokenError(
1838
+ parserState,
1839
+ "exponent part is missing in number"
1840
+ ),
1841
+ )
1842
+ }
1843
+ // Optional exponential part of the number.
1844
+ if (c == 0x65 || c == 0x45) { // 'e' or 'E'
1845
+ isFloat = true
1846
+ Buffer.addChar('e', buffer)
1847
+ c = next(parserState)
1848
+ // can start with optional plus or minus sign
1849
+ match (c) {
1850
+ 0x2D => { // '-'
1851
+ c = next(parserState)
1852
+ Buffer.addChar('-', buffer)
1853
+ },
1854
+ 0x2B => { // '+'
1855
+ c = next(parserState)
1856
+ },
1857
+ _ => void,
1858
+ }
1859
+ // followed by one or more digits (0-9)
1860
+ let mut hasHitDigit = false
1861
+ for (; c >= 0x30 && c <= 0x39;) {
1862
+ hasHitDigit = true
1863
+ Buffer.addCharFromCodePoint(c, buffer)
1864
+ c = next(parserState)
1865
+ }
1866
+ if (!hasHitDigit)
1867
+ return Err(
1868
+ buildUnexpectedTokenError(
1869
+ parserState,
1870
+ "exponent part is missing in number"
1871
+ ),
1872
+ )
1873
+ }
1874
+ // Note that unlike all other JSON value types there's no explicit ending
1875
+ // character like ('"' for strings, ']' for arrays,'}' for objects etc). We
1876
+ // just leave the parser state at current position and the reading of next
1877
+ // token will succeed or fail, but number parsing just ends here.
1878
+ let result = match (isFloat) {
1879
+ false => atoiFast(buffer),
1880
+ true => {
1881
+ let str = Buffer.toString(buffer)
1882
+ match (Atof.parseFloat(str)) {
1883
+ Err(err) => fail "Impossible: Json parse float on invalid float",
1884
+ Ok(n) => n,
1885
+ }
1886
+ },
1887
+ }
1888
+
1889
+ let result = if (result == 0 && isNegative) {
1890
+ -0.0
1891
+ } else {
1892
+ if (isNegative) result * -1 else result
1893
+ }
1894
+
1895
+ return Ok(JsonNumber(result))
1896
+ }
1897
+ and parseArray = (parserState: JsonParserState) => {
1898
+ match (expectCodePointAndAdvance(0x5B, parserState)) {
1899
+ // '['
1900
+ Some(e) => return Err(e),
1901
+ None => {
1902
+ skipWhiteSpace(parserState)
1903
+
1904
+ let mut elems = []: List<Json>
1905
+
1906
+ let mut first = true
1907
+ let mut trailingComma = false
1908
+ while (true) {
1909
+ let c = parserState.currentCodePoint
1910
+ match (c) {
1911
+ 0x2C => { // ','
1912
+ if (first) {
1913
+ return Err(
1914
+ buildUnexpectedTokenError(
1915
+ parserState,
1916
+ "unexpected comma at beginning of array"
1917
+ ),
1918
+ )
1919
+ }
1920
+ trailingComma = true
1921
+ next(parserState)
1922
+ skipWhiteSpace(parserState)
1923
+ },
1924
+ 0x5D => { // ']'
1925
+ next(parserState)
1926
+ break
1927
+ },
1928
+ -1 => { // EOF
1929
+ return Err(
1930
+ buildUnexpectedTokenError(parserState, "unexpected end of array"),
1931
+ )
1932
+ },
1933
+ _ => {
1934
+ // note that parseValue skips initial and final whitespace
1935
+ match (parseValue(parserState)) {
1936
+ Ok(elem) => {
1937
+ first = false
1938
+ trailingComma = false
1939
+ elems = [elem, ...elems]
1940
+ },
1941
+ Err(e) => return Err(e),
1942
+ }
1943
+ },
1944
+ }
1945
+ }
1946
+
1947
+ if (trailingComma) {
1948
+ return Err(
1949
+ buildUnexpectedTokenError(parserState, "unexpected end of array"),
1950
+ )
1951
+ } else {
1952
+ return Ok(JsonArray(List.reverse(elems)))
1953
+ }
1954
+ },
1955
+ }
1956
+ }
1957
+ and parseObject = (parserState: JsonParserState) => {
1958
+ match (expectCodePointAndAdvance(0x7B, parserState)) {
1959
+ // '{'
1960
+ Some(e) => return Err(e),
1961
+ None => {
1962
+ let mut entries = []: List<(String, Json)>
1963
+
1964
+ let mut first = true
1965
+
1966
+ // one iteration of this loop should correspond to a key-value pair
1967
+ let mut trailingComma = false
1968
+ while (true) {
1969
+ skipWhiteSpace(parserState)
1970
+
1971
+ let c = parserState.currentCodePoint
1972
+ match (c) {
1973
+ -1 => {
1974
+ let detail = "expected a key-value pair or the end of the object"
1975
+ return Err(buildUnexpectedTokenError(parserState, detail))
1976
+ },
1977
+ 0x2C => { // ','
1978
+ trailingComma = true
1979
+ if (first) {
1980
+ let detail =
1981
+ "expected a key-value pair or the end of the object, found ','"
1982
+ return Err(buildUnexpectedTokenError(parserState, detail))
1983
+ } else {
1984
+ ignore(next(parserState))
1985
+ }
1986
+ },
1987
+ 0x7D => { // '}'
1988
+ if (trailingComma) {
1989
+ let detail = "unexpected trailing comma in object"
1990
+ return Err(buildUnexpectedTokenError(parserState, detail))
1991
+ }
1992
+ next(parserState)
1993
+ break
1994
+ },
1995
+ _ => {
1996
+ trailingComma = false
1997
+ // A new entry in current object.
1998
+ // Just call parseString directly. In case the current character id not '"', it will return an error we can pass along.
1999
+ match (parseString(parserState)) {
2000
+ Ok(key) => {
2001
+ skipWhiteSpace(parserState)
2002
+
2003
+ match (expectCodePointAndAdvance(0x3A, parserState)) {
2004
+ // ':'
2005
+ None => {
2006
+ // note that parseValue skips initial and final whitespace
2007
+ match (parseValue(parserState)) {
2008
+ Ok(value) => {
2009
+ entries = [(key, value), ...entries]
2010
+ first = false
2011
+ },
2012
+ Err(e) => return Err(e),
2013
+ }
2014
+ },
2015
+ Some(e) => return Err(e),
2016
+ }
2017
+ },
2018
+ Err(e) => return Err(e),
2019
+ }
2020
+ },
2021
+ }
2022
+ }
2023
+ // end of entry loop
2024
+
2025
+ return Ok(JsonObject(List.reverse(entries)))
2026
+ },
2027
+ }
2028
+ }
2029
+
2030
+ /**
2031
+ * Parses JSON string into a `Json` data structure.
2032
+ *
2033
+ * @param str: The JSON string to parse
2034
+ * @returns `Ok(json)` containing the parsed data structure on a successful parse or `Err(err)` containing a parse error otherwise
2035
+ *
2036
+ * @example
2037
+ * assert parse("{\"currency\":\"$\",\"price\":119}") == Ok(
2038
+ * JsonObject([
2039
+ * ("currency", JsonString("$")),
2040
+ * ("price", JsonNumber(119))
2041
+ * ])
2042
+ * )
2043
+ *
2044
+ * @since v0.6.0
2045
+ */
2046
+ provide let parse: (str: String) => Result<Json, JsonParseError> = (str: String) => {
2047
+ let parserState = {
2048
+ string: str,
2049
+ bufferParse: Buffer.make(16),
2050
+ currentCodePoint: readCodePoint(0, str),
2051
+ pos: 0,
2052
+ bytePos: 0,
2053
+ }: JsonParserState
2054
+
2055
+ let root = parseValue(parserState)
2056
+
2057
+ skipWhiteSpace(parserState)
2058
+
2059
+ if (isAtEndOfInput(parserState)) {
2060
+ root
2061
+ } else {
2062
+ match (root) {
2063
+ Ok(_) => {
2064
+ let detail = "expected end of input, found " ++
2065
+ formatCodePointOrEOF(parserState.currentCodePoint)
2066
+ Err(buildUnexpectedTokenError(parserState, detail))
2067
+ },
2068
+ e => e,
2069
+ }
2070
+ }
2071
+ }