@grain/stdlib 0.4.4 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +87 -0
- package/LICENSE +1 -1
- package/array.gr +92 -73
- package/array.md +18 -18
- package/bigint.gr +497 -0
- package/bigint.md +811 -0
- package/buffer.gr +56 -217
- package/buffer.md +24 -17
- package/bytes.gr +103 -205
- package/bytes.md +19 -0
- package/char.gr +152 -166
- package/char.md +200 -0
- package/exception.md +6 -0
- package/float32.gr +159 -82
- package/float32.md +315 -0
- package/float64.gr +163 -82
- package/float64.md +315 -0
- package/hash.gr +53 -49
- package/int32.gr +479 -230
- package/int32.md +937 -0
- package/int64.gr +479 -230
- package/int64.md +937 -0
- package/list.gr +530 -116
- package/list.md +1141 -0
- package/map.gr +302 -121
- package/map.md +525 -0
- package/number.gr +51 -57
- package/number.md +37 -3
- package/option.gr +25 -25
- package/option.md +1 -1
- package/package.json +3 -3
- package/pervasives.gr +504 -52
- package/pervasives.md +1116 -0
- package/queue.gr +8 -1
- package/queue.md +10 -0
- package/random.gr +196 -0
- package/random.md +179 -0
- package/range.gr +26 -26
- package/regex.gr +1833 -842
- package/regex.md +11 -11
- package/result.md +1 -1
- package/runtime/bigint.gr +2045 -0
- package/runtime/bigint.md +326 -0
- package/runtime/dataStructures.gr +99 -279
- package/runtime/dataStructures.md +391 -0
- package/runtime/debug.gr +0 -1
- package/runtime/debug.md +6 -0
- package/runtime/equal.gr +40 -37
- package/runtime/equal.md +6 -0
- package/runtime/exception.gr +28 -15
- package/runtime/exception.md +30 -0
- package/runtime/gc.gr +50 -20
- package/runtime/gc.md +36 -0
- package/runtime/malloc.gr +32 -22
- package/runtime/malloc.md +55 -0
- package/runtime/numberUtils.gr +297 -142
- package/runtime/numberUtils.md +54 -0
- package/runtime/numbers.gr +1204 -453
- package/runtime/numbers.md +300 -0
- package/runtime/string.gr +193 -228
- package/runtime/string.md +24 -0
- package/runtime/stringUtils.gr +62 -38
- package/runtime/stringUtils.md +6 -0
- package/runtime/unsafe/constants.gr +17 -0
- package/runtime/unsafe/constants.md +72 -0
- package/runtime/unsafe/conv.gr +10 -10
- package/runtime/unsafe/conv.md +71 -0
- package/runtime/unsafe/errors.md +204 -0
- package/runtime/unsafe/memory.gr +14 -3
- package/runtime/unsafe/memory.md +54 -0
- package/runtime/unsafe/printWasm.gr +4 -4
- package/runtime/unsafe/printWasm.md +24 -0
- package/runtime/unsafe/tags.gr +11 -10
- package/runtime/unsafe/tags.md +120 -0
- package/runtime/unsafe/wasmf32.gr +9 -2
- package/runtime/unsafe/wasmf32.md +168 -0
- package/runtime/unsafe/wasmf64.gr +9 -2
- package/runtime/unsafe/wasmf64.md +168 -0
- package/runtime/unsafe/wasmi32.gr +65 -47
- package/runtime/unsafe/wasmi32.md +282 -0
- package/runtime/unsafe/wasmi64.gr +78 -50
- package/runtime/unsafe/wasmi64.md +300 -0
- package/runtime/utils/printing.gr +62 -0
- package/runtime/utils/printing.md +18 -0
- package/runtime/wasi.gr +200 -46
- package/runtime/wasi.md +839 -0
- package/set.gr +125 -121
- package/set.md +24 -21
- package/stack.gr +29 -29
- package/stack.md +4 -6
- package/string.gr +434 -415
- package/string.md +3 -3
- package/sys/file.gr +477 -482
- package/sys/process.gr +33 -47
- package/sys/random.gr +48 -20
- package/sys/random.md +38 -0
- package/sys/time.gr +12 -28
package/string.gr
CHANGED
|
@@ -9,7 +9,15 @@
|
|
|
9
9
|
import WasmI32 from "runtime/unsafe/wasmi32"
|
|
10
10
|
import Memory from "runtime/unsafe/memory"
|
|
11
11
|
import Exception from "runtime/exception"
|
|
12
|
-
import
|
|
12
|
+
import Conv from "runtime/unsafe/conv"
|
|
13
|
+
import {
|
|
14
|
+
tagSimpleNumber,
|
|
15
|
+
tagChar,
|
|
16
|
+
untagChar,
|
|
17
|
+
allocateArray,
|
|
18
|
+
allocateString,
|
|
19
|
+
allocateBytes,
|
|
20
|
+
} from "runtime/dataStructures"
|
|
13
21
|
|
|
14
22
|
/**
|
|
15
23
|
* @section Types: Type declarations included in the String module.
|
|
@@ -26,6 +34,8 @@ export enum Encoding {
|
|
|
26
34
|
UTF32_LE,
|
|
27
35
|
}
|
|
28
36
|
|
|
37
|
+
exception MalformedUnicode
|
|
38
|
+
|
|
29
39
|
/**
|
|
30
40
|
* @section Values: Functions for working with the String data type.
|
|
31
41
|
*/
|
|
@@ -37,9 +47,9 @@ export enum Encoding {
|
|
|
37
47
|
* @param str2: The ending string
|
|
38
48
|
* @returns The combined string
|
|
39
49
|
*
|
|
40
|
-
* @example String.concat("Foo", "
|
|
50
|
+
* @example String.concat("Foo", "Bar") == "FooBar"
|
|
41
51
|
*
|
|
42
|
-
* @since v0.
|
|
52
|
+
* @since v0.2.0
|
|
43
53
|
*/
|
|
44
54
|
export let concat = (++)
|
|
45
55
|
|
|
@@ -53,8 +63,8 @@ export let concat = (++)
|
|
|
53
63
|
*
|
|
54
64
|
* @since v0.1.0
|
|
55
65
|
*/
|
|
56
|
-
@
|
|
57
|
-
export let
|
|
66
|
+
@unsafe
|
|
67
|
+
export let length = (string: String) => {
|
|
58
68
|
let string = WasmI32.fromGrain(string)
|
|
59
69
|
let size = WasmI32.load(string, 4n)
|
|
60
70
|
|
|
@@ -70,18 +80,7 @@ export let rec length = (string: String) => {
|
|
|
70
80
|
ptr = WasmI32.add(ptr, 1n)
|
|
71
81
|
}
|
|
72
82
|
|
|
73
|
-
|
|
74
|
-
Memory.decRef(WasmI32.fromGrain(length))
|
|
75
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
76
|
-
ret
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
// @disableGC-safe wrapper
|
|
80
|
-
@disableGC
|
|
81
|
-
let wasmSafeLength = (s: String) => {
|
|
82
|
-
Memory.incRef(WasmI32.fromGrain(length))
|
|
83
|
-
Memory.incRef(WasmI32.fromGrain(s))
|
|
84
|
-
length(s)
|
|
83
|
+
Conv.wasmI32ToNumber(len)
|
|
85
84
|
}
|
|
86
85
|
|
|
87
86
|
/**
|
|
@@ -94,21 +93,10 @@ let wasmSafeLength = (s: String) => {
|
|
|
94
93
|
*
|
|
95
94
|
* @since v0.1.0
|
|
96
95
|
*/
|
|
97
|
-
@
|
|
98
|
-
export let
|
|
96
|
+
@unsafe
|
|
97
|
+
export let byteLength = (string: String) => {
|
|
99
98
|
let string = WasmI32.fromGrain(string)
|
|
100
|
-
|
|
101
|
-
Memory.decRef(WasmI32.fromGrain(byteLength))
|
|
102
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
103
|
-
ret
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
// @disableGC-safe wrapper
|
|
107
|
-
@disableGC
|
|
108
|
-
let wasmSafeByteLength = (string: String) => {
|
|
109
|
-
Memory.incRef(WasmI32.fromGrain(byteLength))
|
|
110
|
-
Memory.incRef(WasmI32.fromGrain(string))
|
|
111
|
-
byteLength(string)
|
|
99
|
+
Conv.wasmI32ToNumber(WasmI32.load(string, 4n))
|
|
112
100
|
}
|
|
113
101
|
|
|
114
102
|
/**
|
|
@@ -122,8 +110,8 @@ let wasmSafeByteLength = (string: String) => {
|
|
|
122
110
|
*
|
|
123
111
|
* @since v0.3.0
|
|
124
112
|
*/
|
|
125
|
-
@
|
|
126
|
-
export let
|
|
113
|
+
@unsafe
|
|
114
|
+
export let indexOf = (search: String, string: String) => {
|
|
127
115
|
let search = WasmI32.fromGrain(search)
|
|
128
116
|
let string = WasmI32.fromGrain(string)
|
|
129
117
|
|
|
@@ -137,10 +125,8 @@ export let rec indexOf = (search: String, string: String) => {
|
|
|
137
125
|
let (-) = WasmI32.sub
|
|
138
126
|
let (&) = WasmI32.and
|
|
139
127
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
Memory.incRef(WasmI32.fromGrain(none))
|
|
143
|
-
none
|
|
128
|
+
if (psize > size) {
|
|
129
|
+
None
|
|
144
130
|
} else {
|
|
145
131
|
let mut idx = 0n
|
|
146
132
|
let mut ptr = string + 8n
|
|
@@ -168,20 +154,73 @@ export let rec indexOf = (search: String, string: String) => {
|
|
|
168
154
|
}
|
|
169
155
|
|
|
170
156
|
if (result == -1n) {
|
|
171
|
-
|
|
172
|
-
Memory.incRef(WasmI32.fromGrain(none))
|
|
173
|
-
none
|
|
157
|
+
None
|
|
174
158
|
} else {
|
|
175
|
-
Memory.incRef(WasmI32.fromGrain(Some))
|
|
176
159
|
Some(tagSimpleNumber(result))
|
|
177
160
|
}
|
|
178
161
|
}
|
|
179
|
-
Memory.decRef(WasmI32.fromGrain(search))
|
|
180
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
181
|
-
Memory.decRef(WasmI32.fromGrain(indexOf))
|
|
182
|
-
ret
|
|
183
162
|
}
|
|
184
163
|
|
|
164
|
+
@disableGC
|
|
165
|
+
let getCodePoint = (ptr: WasmI32) => {
|
|
166
|
+
// Algorithm from https://encoding.spec.whatwg.org/#utf-8-decoder
|
|
167
|
+
let (+) = WasmI32.add
|
|
168
|
+
let (==) = WasmI32.eq
|
|
169
|
+
let (>=) = WasmI32.geU
|
|
170
|
+
let (<=) = WasmI32.leU
|
|
171
|
+
let (<<) = WasmI32.shl
|
|
172
|
+
let (&) = WasmI32.and
|
|
173
|
+
let (|) = WasmI32.or
|
|
174
|
+
|
|
175
|
+
let mut codePoint = 0n
|
|
176
|
+
let mut bytesSeen = 0n
|
|
177
|
+
let mut bytesNeeded = 0n
|
|
178
|
+
let mut lowerBoundary = 0x80n
|
|
179
|
+
let mut upperBoundary = 0xBFn
|
|
180
|
+
|
|
181
|
+
let mut offset = 0n
|
|
182
|
+
|
|
183
|
+
let mut result = 0n
|
|
184
|
+
|
|
185
|
+
while (true) {
|
|
186
|
+
let byte = WasmI32.load8U(ptr + offset, 0n)
|
|
187
|
+
offset += 1n
|
|
188
|
+
if (bytesNeeded == 0n) {
|
|
189
|
+
if (byte >= 0x00n && byte <= 0x7Fn) {
|
|
190
|
+
result = byte
|
|
191
|
+
break
|
|
192
|
+
} else if (byte >= 0xC2n && byte <= 0xDFn) {
|
|
193
|
+
bytesNeeded = 1n
|
|
194
|
+
codePoint = byte & 0x1Fn
|
|
195
|
+
} else if (byte >= 0xE0n && byte <= 0xEFn) {
|
|
196
|
+
if (byte == 0xE0n) lowerBoundary = 0xA0n
|
|
197
|
+
if (byte == 0xEDn) upperBoundary = 0x9Fn
|
|
198
|
+
bytesNeeded = 2n
|
|
199
|
+
codePoint = byte & 0xFn
|
|
200
|
+
} else if (byte >= 0xF0n && byte <= 0xF4n) {
|
|
201
|
+
if (byte == 0xF0n) lowerBoundary = 0x90n
|
|
202
|
+
if (byte == 0xF4n) upperBoundary = 0x8Fn
|
|
203
|
+
bytesNeeded = 3n
|
|
204
|
+
codePoint = byte & 0x7n
|
|
205
|
+
} else {
|
|
206
|
+
throw MalformedUnicode
|
|
207
|
+
}
|
|
208
|
+
continue
|
|
209
|
+
}
|
|
210
|
+
if (!(lowerBoundary <= byte && byte <= upperBoundary)) {
|
|
211
|
+
throw MalformedUnicode
|
|
212
|
+
}
|
|
213
|
+
lowerBoundary = 0x80n
|
|
214
|
+
upperBoundary = 0xBFn
|
|
215
|
+
codePoint = codePoint << 6n | byte & 0x3Fn
|
|
216
|
+
bytesSeen += 1n
|
|
217
|
+
if (bytesSeen == bytesNeeded) {
|
|
218
|
+
result = codePoint
|
|
219
|
+
break
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
result
|
|
223
|
+
}
|
|
185
224
|
/**
|
|
186
225
|
* Get the character at the position in the input string.
|
|
187
226
|
*
|
|
@@ -193,13 +232,10 @@ export let rec indexOf = (search: String, string: String) => {
|
|
|
193
232
|
*
|
|
194
233
|
* @since v0.4.0
|
|
195
234
|
*/
|
|
196
|
-
@
|
|
197
|
-
export let
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
Memory.incRef(WasmI32.fromGrain((++)))
|
|
201
|
-
Memory.incRef(WasmI32.fromGrain(toString))
|
|
202
|
-
fail ("Invalid offset: " ++ toString(position))
|
|
235
|
+
@unsafe
|
|
236
|
+
export let charAt = (position, string: String) => {
|
|
237
|
+
if (length(string) <= position || position < 0) {
|
|
238
|
+
fail "Invalid offset: " ++ toString(position)
|
|
203
239
|
}
|
|
204
240
|
// Implementation is similar to explodeHelp, but doesn't perform unneeded memory allocations
|
|
205
241
|
let (>>>) = WasmI32.shrU
|
|
@@ -207,15 +243,18 @@ export let rec charAt = (position, string: String) => {
|
|
|
207
243
|
let (&) = WasmI32.and
|
|
208
244
|
let (<) = WasmI32.ltU
|
|
209
245
|
let (==) = WasmI32.eq
|
|
210
|
-
let size = WasmI32.fromGrain(
|
|
211
|
-
let len = WasmI32.fromGrain(wasmSafeLength(string)) >>> 1n
|
|
246
|
+
let size = WasmI32.fromGrain(byteLength(string)) >>> 1n
|
|
212
247
|
let position = WasmI32.fromGrain(position) >>> 1n
|
|
213
248
|
let string = WasmI32.fromGrain(string)
|
|
214
|
-
let mut ptr = string + 8n
|
|
215
|
-
let end = ptr + size
|
|
216
|
-
let mut counter = 0n
|
|
217
|
-
let mut result = 0n
|
|
249
|
+
let mut ptr = string + 8n
|
|
250
|
+
let end = ptr + size
|
|
251
|
+
let mut counter = 0n
|
|
252
|
+
let mut result = WasmI32.toGrain(0n): Char
|
|
218
253
|
while (ptr < end) {
|
|
254
|
+
if (counter == position) {
|
|
255
|
+
result = tagChar(getCodePoint(ptr))
|
|
256
|
+
break
|
|
257
|
+
}
|
|
219
258
|
let byte = WasmI32.load8U(ptr, 0n)
|
|
220
259
|
let n = if ((byte & 0x80n) == 0x00n) {
|
|
221
260
|
1n
|
|
@@ -226,25 +265,16 @@ export let rec charAt = (position, string: String) => {
|
|
|
226
265
|
} else {
|
|
227
266
|
2n
|
|
228
267
|
}
|
|
229
|
-
if (counter == position) {
|
|
230
|
-
let c = allocateChar()
|
|
231
|
-
Memory.copy(c + 4n, ptr, n)
|
|
232
|
-
result = c
|
|
233
|
-
break
|
|
234
|
-
}
|
|
235
268
|
counter += 1n
|
|
236
269
|
ptr += n
|
|
237
270
|
}
|
|
238
|
-
if (result
|
|
271
|
+
if (WasmI32.eqz(WasmI32.fromGrain(result))) {
|
|
239
272
|
fail "charAt: should be impossible (please report)"
|
|
240
273
|
}
|
|
241
|
-
|
|
242
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
243
|
-
Memory.decRef(WasmI32.fromGrain(charAt))
|
|
244
|
-
ret
|
|
274
|
+
result
|
|
245
275
|
}
|
|
246
276
|
|
|
247
|
-
@
|
|
277
|
+
@unsafe
|
|
248
278
|
let explodeHelp = (s: String, chars) => {
|
|
249
279
|
let (>>>) = WasmI32.shrU
|
|
250
280
|
let (+) = WasmI32.add
|
|
@@ -252,8 +282,8 @@ let explodeHelp = (s: String, chars) => {
|
|
|
252
282
|
let (<) = WasmI32.ltU
|
|
253
283
|
let (==) = WasmI32.eq
|
|
254
284
|
|
|
255
|
-
let size = WasmI32.fromGrain(
|
|
256
|
-
let len = WasmI32.fromGrain(
|
|
285
|
+
let size = WasmI32.fromGrain(byteLength(s)) >>> 1n
|
|
286
|
+
let len = WasmI32.fromGrain(length(s)) >>> 1n
|
|
257
287
|
|
|
258
288
|
let s = WasmI32.fromGrain(s)
|
|
259
289
|
|
|
@@ -276,9 +306,7 @@ let explodeHelp = (s: String, chars) => {
|
|
|
276
306
|
}
|
|
277
307
|
|
|
278
308
|
let c = if (chars) {
|
|
279
|
-
|
|
280
|
-
Memory.copy(c + 4n, ptr, n)
|
|
281
|
-
c
|
|
309
|
+
WasmI32.fromGrain(tagChar(getCodePoint(ptr)))
|
|
282
310
|
} else {
|
|
283
311
|
let s = allocateString(n)
|
|
284
312
|
Memory.copy(s + 8n, ptr, n)
|
|
@@ -303,15 +331,9 @@ let explodeHelp = (s: String, chars) => {
|
|
|
303
331
|
*
|
|
304
332
|
* @since v0.3.0
|
|
305
333
|
*/
|
|
306
|
-
@
|
|
307
|
-
export let
|
|
308
|
-
|
|
309
|
-
// decRef'd in `explodeHelp`
|
|
310
|
-
let ret = WasmI32.toGrain(explodeHelp(string, true)) : (Array<Char>)
|
|
311
|
-
|
|
312
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
313
|
-
Memory.decRef(WasmI32.fromGrain(explode))
|
|
314
|
-
ret
|
|
334
|
+
@unsafe
|
|
335
|
+
export let explode = string => {
|
|
336
|
+
WasmI32.toGrain(explodeHelp(string, true)): Array<Char>
|
|
315
337
|
}
|
|
316
338
|
|
|
317
339
|
/**
|
|
@@ -324,32 +346,34 @@ export let rec explode = (string) => {
|
|
|
324
346
|
*
|
|
325
347
|
* @since v0.3.0
|
|
326
348
|
*/
|
|
327
|
-
@
|
|
328
|
-
export let
|
|
349
|
+
@unsafe
|
|
350
|
+
export let implode = (arr: Array<Char>) => {
|
|
329
351
|
let (+) = WasmI32.add
|
|
330
|
-
let (
|
|
352
|
+
let (-) = WasmI32.sub
|
|
353
|
+
let (*) = WasmI32.mul
|
|
331
354
|
let (<) = WasmI32.ltU
|
|
355
|
+
let (>) = WasmI32.gtU
|
|
356
|
+
let (<=) = WasmI32.leU
|
|
332
357
|
let (<<) = WasmI32.shl
|
|
358
|
+
let (>>>) = WasmI32.shrU
|
|
333
359
|
let (&) = WasmI32.and
|
|
360
|
+
let (|) = WasmI32.or
|
|
334
361
|
|
|
335
|
-
let
|
|
336
|
-
|
|
337
|
-
let arrLength = WasmI32.load(arr, 4n)
|
|
362
|
+
let arrLength = WasmI32.load(WasmI32.fromGrain(arr), 4n)
|
|
338
363
|
|
|
339
364
|
let mut stringByteLength = 0n
|
|
340
365
|
|
|
341
366
|
for (let mut i = 0n; i < arrLength; i += 1n) {
|
|
342
|
-
let
|
|
343
|
-
let byte = WasmI32.load8U(char, 4n)
|
|
367
|
+
let usv = untagChar(arr[tagSimpleNumber(i)])
|
|
344
368
|
|
|
345
|
-
let n = if (
|
|
369
|
+
let n = if (usv <= 0x7Fn) {
|
|
346
370
|
1n
|
|
347
|
-
} else if (
|
|
348
|
-
|
|
349
|
-
} else if (
|
|
371
|
+
} else if (usv <= 0x07FFn) {
|
|
372
|
+
2n
|
|
373
|
+
} else if (usv <= 0xFFFFn) {
|
|
350
374
|
3n
|
|
351
375
|
} else {
|
|
352
|
-
|
|
376
|
+
4n
|
|
353
377
|
}
|
|
354
378
|
|
|
355
379
|
stringByteLength += n
|
|
@@ -359,31 +383,41 @@ export let rec implode = (arr: Array<Char>) => {
|
|
|
359
383
|
let mut offset = 8n
|
|
360
384
|
|
|
361
385
|
for (let mut i = 0n; i < arrLength; i += 1n) {
|
|
362
|
-
let
|
|
363
|
-
let byte = WasmI32.load8U(char, 4n)
|
|
386
|
+
let usv = untagChar(arr[tagSimpleNumber(i)])
|
|
364
387
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
4n
|
|
369
|
-
} else if ((byte & 0xE0n) == 0xE0n) {
|
|
370
|
-
3n
|
|
388
|
+
if (usv < 0x7Fn) {
|
|
389
|
+
WasmI32.store8(str + offset, usv, 0n)
|
|
390
|
+
offset += 1n
|
|
371
391
|
} else {
|
|
372
|
-
|
|
392
|
+
let mut count = 0n
|
|
393
|
+
let mut marker = 0n
|
|
394
|
+
if (usv <= 0x07FFn) {
|
|
395
|
+
count = 1n
|
|
396
|
+
marker = 0xC0n
|
|
397
|
+
} else if (usv <= 0xFFFFn) {
|
|
398
|
+
count = 2n
|
|
399
|
+
marker = 0xE0n
|
|
400
|
+
} else {
|
|
401
|
+
count = 3n
|
|
402
|
+
marker = 0xF0n
|
|
403
|
+
}
|
|
404
|
+
WasmI32.store8(str + offset, (usv >>> 6n * count) + marker, 0n)
|
|
405
|
+
offset += 1n
|
|
406
|
+
|
|
407
|
+
while (count > 0n) {
|
|
408
|
+
let temp = usv >>> 6n * (count - 1n)
|
|
409
|
+
WasmI32.store8(str + offset, 0x80n | temp & 0x3Fn, 0n)
|
|
410
|
+
count -= 1n
|
|
411
|
+
offset += 1n
|
|
412
|
+
}
|
|
373
413
|
}
|
|
374
|
-
|
|
375
|
-
Memory.copy(str + offset, char + 4n, n)
|
|
376
|
-
offset += n
|
|
377
414
|
}
|
|
378
415
|
|
|
379
|
-
|
|
380
|
-
Memory.decRef(WasmI32.fromGrain(arr))
|
|
381
|
-
Memory.decRef(WasmI32.fromGrain(implode))
|
|
382
|
-
ret
|
|
416
|
+
WasmI32.toGrain(str): String
|
|
383
417
|
}
|
|
384
418
|
|
|
385
419
|
// Helper to get the length in constant time without depending on Array
|
|
386
|
-
primitive arrayLength
|
|
420
|
+
primitive arrayLength: Array<a> -> Number = "@array.length"
|
|
387
421
|
|
|
388
422
|
/**
|
|
389
423
|
* Create a string that is the given string reversed.
|
|
@@ -395,7 +429,7 @@ primitive arrayLength : Array<a> -> Number = "@array.length"
|
|
|
395
429
|
*
|
|
396
430
|
* @since v0.4.5
|
|
397
431
|
*/
|
|
398
|
-
export let reverse =
|
|
432
|
+
export let reverse = string => {
|
|
399
433
|
let mut arr = explode(string)
|
|
400
434
|
let len = arrayLength(arr)
|
|
401
435
|
let halfLen = len / 2
|
|
@@ -418,8 +452,8 @@ export let reverse = (string) => {
|
|
|
418
452
|
*
|
|
419
453
|
* @example String.split(" ", "Hello world") == [> "Hello", "world"]
|
|
420
454
|
*/
|
|
421
|
-
@
|
|
422
|
-
export let
|
|
455
|
+
@unsafe
|
|
456
|
+
export let split = (separator: String, string: String) => {
|
|
423
457
|
let (+) = WasmI32.add
|
|
424
458
|
let (-) = WasmI32.sub
|
|
425
459
|
let (==) = WasmI32.eq
|
|
@@ -429,16 +463,13 @@ export let rec split = (separator: String, string: String) => {
|
|
|
429
463
|
let (>>) = WasmI32.shrS
|
|
430
464
|
let (&) = WasmI32.and
|
|
431
465
|
|
|
432
|
-
let size = WasmI32.fromGrain(
|
|
433
|
-
let psize = WasmI32.fromGrain(
|
|
466
|
+
let size = WasmI32.fromGrain(byteLength(string)) >> 1n
|
|
467
|
+
let psize = WasmI32.fromGrain(byteLength(separator)) >> 1n
|
|
434
468
|
|
|
435
|
-
|
|
469
|
+
if (psize == 0n) {
|
|
436
470
|
WasmI32.toGrain(explodeHelp(string, false)): Array<String>
|
|
437
471
|
} else if (psize > size) {
|
|
438
|
-
|
|
439
|
-
let ptr = allocateArray(1n)
|
|
440
|
-
WasmI32.store(ptr, Memory.incRef(string), 8n)
|
|
441
|
-
WasmI32.toGrain(ptr): Array<String>
|
|
472
|
+
[> string]
|
|
442
473
|
} else {
|
|
443
474
|
let string = WasmI32.fromGrain(string)
|
|
444
475
|
let separator = WasmI32.fromGrain(separator)
|
|
@@ -501,10 +532,6 @@ export let rec split = (separator: String, string: String) => {
|
|
|
501
532
|
|
|
502
533
|
WasmI32.toGrain(arr): Array<String>
|
|
503
534
|
}
|
|
504
|
-
Memory.decRef(WasmI32.fromGrain(separator))
|
|
505
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
506
|
-
Memory.decRef(WasmI32.fromGrain(split))
|
|
507
|
-
ret
|
|
508
535
|
}
|
|
509
536
|
|
|
510
537
|
/**
|
|
@@ -519,8 +546,8 @@ export let rec split = (separator: String, string: String) => {
|
|
|
519
546
|
*
|
|
520
547
|
* @since v0.1.0
|
|
521
548
|
*/
|
|
522
|
-
@
|
|
523
|
-
export let
|
|
549
|
+
@unsafe
|
|
550
|
+
export let slice = (start: Number, to: Number, string: String) => {
|
|
524
551
|
let (+) = WasmI32.add
|
|
525
552
|
let (-) = WasmI32.sub
|
|
526
553
|
let (==) = WasmI32.eq
|
|
@@ -533,8 +560,8 @@ export let rec slice = (start: Number, to: Number, string: String) => {
|
|
|
533
560
|
let startOrig = start
|
|
534
561
|
let toOrig = to
|
|
535
562
|
|
|
536
|
-
let len = WasmI32.fromGrain(
|
|
537
|
-
let size = WasmI32.fromGrain(
|
|
563
|
+
let len = WasmI32.fromGrain(length(string)) >> 1n
|
|
564
|
+
let size = WasmI32.fromGrain(byteLength(string)) >> 1n
|
|
538
565
|
|
|
539
566
|
let string = WasmI32.fromGrain(string)
|
|
540
567
|
|
|
@@ -597,12 +624,7 @@ export let rec slice = (start: Number, to: Number, string: String) => {
|
|
|
597
624
|
|
|
598
625
|
Memory.copy(newString + 8n, begin, newSize)
|
|
599
626
|
|
|
600
|
-
|
|
601
|
-
Memory.decRef(WasmI32.fromGrain(startOrig))
|
|
602
|
-
Memory.decRef(WasmI32.fromGrain(toOrig))
|
|
603
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
604
|
-
Memory.decRef(WasmI32.fromGrain(slice))
|
|
605
|
-
ret
|
|
627
|
+
WasmI32.toGrain(newString): String
|
|
606
628
|
}
|
|
607
629
|
|
|
608
630
|
/**
|
|
@@ -616,8 +638,8 @@ export let rec slice = (start: Number, to: Number, string: String) => {
|
|
|
616
638
|
*
|
|
617
639
|
* @since v0.1.0
|
|
618
640
|
*/
|
|
619
|
-
@
|
|
620
|
-
export let
|
|
641
|
+
@unsafe
|
|
642
|
+
export let contains = (search: String, string: String) => {
|
|
621
643
|
// "Not So Naive" string search algorithm
|
|
622
644
|
// searching phase in O(nm) time complexity
|
|
623
645
|
// slightly (by coefficient) sub-linear in the average case
|
|
@@ -634,8 +656,8 @@ export let rec contains = (search: String, string: String) => {
|
|
|
634
656
|
let pOrig = search
|
|
635
657
|
let sOrig = string
|
|
636
658
|
|
|
637
|
-
let n = WasmI32.fromGrain(
|
|
638
|
-
let m = WasmI32.fromGrain(
|
|
659
|
+
let n = WasmI32.fromGrain(byteLength(string)) >> 1n
|
|
660
|
+
let m = WasmI32.fromGrain(byteLength(search)) >> 1n
|
|
639
661
|
|
|
640
662
|
let mut string = WasmI32.fromGrain(string)
|
|
641
663
|
let mut search = WasmI32.fromGrain(search)
|
|
@@ -645,7 +667,7 @@ export let rec contains = (search: String, string: String) => {
|
|
|
645
667
|
|
|
646
668
|
let mut j = 0n, k = 0n, ell = 0n
|
|
647
669
|
|
|
648
|
-
|
|
670
|
+
if (m > n) {
|
|
649
671
|
// Bail if pattern length is longer than input length
|
|
650
672
|
false
|
|
651
673
|
} else if (m < 2n) {
|
|
@@ -681,7 +703,10 @@ export let rec contains = (search: String, string: String) => {
|
|
|
681
703
|
if (WasmI32.load8U(search, 1n) != WasmI32.load8U(string + j, 1n)) {
|
|
682
704
|
j += k
|
|
683
705
|
} else {
|
|
684
|
-
if (
|
|
706
|
+
if (
|
|
707
|
+
Memory.compare(search + 2n, string + j + 2n, m - 2n) == 0n &&
|
|
708
|
+
WasmI32.load8U(search, 0n) == WasmI32.load8U(string + j, 0n)
|
|
709
|
+
) {
|
|
685
710
|
result = true
|
|
686
711
|
break
|
|
687
712
|
}
|
|
@@ -690,10 +715,6 @@ export let rec contains = (search: String, string: String) => {
|
|
|
690
715
|
}
|
|
691
716
|
result
|
|
692
717
|
}
|
|
693
|
-
Memory.decRef(WasmI32.fromGrain(pOrig))
|
|
694
|
-
Memory.decRef(WasmI32.fromGrain(sOrig))
|
|
695
|
-
Memory.decRef(WasmI32.fromGrain(contains))
|
|
696
|
-
ret
|
|
697
718
|
}
|
|
698
719
|
|
|
699
720
|
/**
|
|
@@ -707,8 +728,8 @@ export let rec contains = (search: String, string: String) => {
|
|
|
707
728
|
*
|
|
708
729
|
* @since v0.1.0
|
|
709
730
|
*/
|
|
710
|
-
@
|
|
711
|
-
export let
|
|
731
|
+
@unsafe
|
|
732
|
+
export let startsWith = (search: String, string: String) => {
|
|
712
733
|
let (+) = WasmI32.add
|
|
713
734
|
let (>) = WasmI32.gtU
|
|
714
735
|
let (==) = WasmI32.eq
|
|
@@ -725,15 +746,11 @@ export let rec startsWith = (search: String, string: String) => {
|
|
|
725
746
|
search += 8n
|
|
726
747
|
|
|
727
748
|
// Bail if pattern length is longer than input length
|
|
728
|
-
|
|
749
|
+
if (m > n) {
|
|
729
750
|
false
|
|
730
751
|
} else {
|
|
731
752
|
Memory.compare(search, string, m) == 0n
|
|
732
753
|
}
|
|
733
|
-
Memory.decRef(WasmI32.fromGrain(pOrig))
|
|
734
|
-
Memory.decRef(WasmI32.fromGrain(sOrig))
|
|
735
|
-
Memory.decRef(WasmI32.fromGrain(startsWith))
|
|
736
|
-
ret
|
|
737
754
|
}
|
|
738
755
|
|
|
739
756
|
/**
|
|
@@ -747,8 +764,8 @@ export let rec startsWith = (search: String, string: String) => {
|
|
|
747
764
|
*
|
|
748
765
|
* @since v0.1.0
|
|
749
766
|
*/
|
|
750
|
-
@
|
|
751
|
-
export let
|
|
767
|
+
@unsafe
|
|
768
|
+
export let endsWith = (search: String, string: String) => {
|
|
752
769
|
let (+) = WasmI32.add
|
|
753
770
|
let (-) = WasmI32.sub
|
|
754
771
|
let (>) = WasmI32.gtU
|
|
@@ -766,15 +783,11 @@ export let rec endsWith = (search: String, string: String) => {
|
|
|
766
783
|
search += 8n
|
|
767
784
|
|
|
768
785
|
// Bail if pattern length is longer than input length
|
|
769
|
-
|
|
786
|
+
if (m > n) {
|
|
770
787
|
false
|
|
771
788
|
} else {
|
|
772
789
|
Memory.compare(search, string + n - m, m) == 0n
|
|
773
790
|
}
|
|
774
|
-
Memory.decRef(WasmI32.fromGrain(pOrig))
|
|
775
|
-
Memory.decRef(WasmI32.fromGrain(sOrig))
|
|
776
|
-
Memory.decRef(WasmI32.fromGrain(endsWith))
|
|
777
|
-
ret
|
|
778
791
|
}
|
|
779
792
|
|
|
780
793
|
// String->Byte encoding and helper functions:
|
|
@@ -784,30 +797,22 @@ let _START_NAME = "start"
|
|
|
784
797
|
let _SIZE_NAME = "size"
|
|
785
798
|
let _OFFSET_NAME = "offset"
|
|
786
799
|
|
|
787
|
-
@
|
|
800
|
+
@unsafe
|
|
788
801
|
let grainToWasmNumber = (num, name) => {
|
|
789
802
|
let num = WasmI32.fromGrain(num)
|
|
790
803
|
if (WasmI32.eqz(WasmI32.and(num, 1n))) {
|
|
791
|
-
Memory.incRef(WasmI32.fromGrain(name))
|
|
792
|
-
Memory.incRef(WasmI32.fromGrain((++)))
|
|
793
804
|
let str = " argument must be an integer"
|
|
794
|
-
Memory.incRef(WasmI32.fromGrain(str))
|
|
795
|
-
Memory.incRef(WasmI32.fromGrain(Exception.InvalidArgument))
|
|
796
805
|
throw Exception.InvalidArgument(name ++ str)
|
|
797
806
|
}
|
|
798
807
|
let num = WasmI32.shrS(num, 1n)
|
|
799
808
|
if (WasmI32.ltS(num, 0n)) {
|
|
800
|
-
Memory.incRef(WasmI32.fromGrain(name))
|
|
801
|
-
Memory.incRef(WasmI32.fromGrain((++)))
|
|
802
809
|
let str = " argument must be non-negative"
|
|
803
|
-
Memory.incRef(WasmI32.fromGrain(str))
|
|
804
|
-
Memory.incRef(WasmI32.fromGrain(Exception.InvalidArgument))
|
|
805
810
|
throw Exception.InvalidArgument(name ++ str)
|
|
806
811
|
}
|
|
807
812
|
num
|
|
808
813
|
}
|
|
809
814
|
|
|
810
|
-
@
|
|
815
|
+
@unsafe
|
|
811
816
|
let utf16Length = (s: String) => {
|
|
812
817
|
let (>>>) = WasmI32.shrU
|
|
813
818
|
let (<<) = WasmI32.shl
|
|
@@ -816,8 +821,8 @@ let utf16Length = (s: String) => {
|
|
|
816
821
|
let (<) = WasmI32.ltU
|
|
817
822
|
let (==) = WasmI32.eq
|
|
818
823
|
|
|
819
|
-
let size = WasmI32.fromGrain(
|
|
820
|
-
let len = WasmI32.fromGrain(
|
|
824
|
+
let size = WasmI32.fromGrain(byteLength(s)) >>> 1n
|
|
825
|
+
let len = WasmI32.fromGrain(length(s)) >>> 1n
|
|
821
826
|
|
|
822
827
|
let s = WasmI32.fromGrain(s)
|
|
823
828
|
|
|
@@ -847,101 +852,31 @@ let utf16Length = (s: String) => {
|
|
|
847
852
|
tagSimpleNumber(size << 1n)
|
|
848
853
|
}
|
|
849
854
|
|
|
850
|
-
@
|
|
855
|
+
@unsafe
|
|
851
856
|
let encodedLength = (s: String, encoding) => {
|
|
852
|
-
match(encoding) {
|
|
853
|
-
UTF32_BE =>
|
|
854
|
-
|
|
855
|
-
wasmSafeLength(s) * 4
|
|
856
|
-
},
|
|
857
|
-
UTF32_LE => {
|
|
858
|
-
Memory.incRef(WasmI32.fromGrain((*)))
|
|
859
|
-
wasmSafeLength(s) * 4
|
|
860
|
-
},
|
|
857
|
+
match (encoding) {
|
|
858
|
+
UTF32_BE => length(s) * 4,
|
|
859
|
+
UTF32_LE => length(s) * 4,
|
|
861
860
|
UTF16_BE => utf16Length(s),
|
|
862
861
|
UTF16_LE => utf16Length(s),
|
|
863
|
-
UTF8 =>
|
|
862
|
+
UTF8 => byteLength(s),
|
|
864
863
|
}
|
|
865
864
|
}
|
|
866
865
|
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
@
|
|
870
|
-
let
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
874
|
-
|
|
875
|
-
|
|
876
|
-
|
|
877
|
-
|
|
878
|
-
|
|
879
|
-
|
|
880
|
-
|
|
881
|
-
let mut bytesSeen = 0n
|
|
882
|
-
let mut bytesNeeded = 0n
|
|
883
|
-
let mut lowerBoundary = 0x80n
|
|
884
|
-
let mut upperBoundary = 0xBFn
|
|
885
|
-
|
|
886
|
-
let mut offset = 0n
|
|
887
|
-
|
|
888
|
-
let mut result = 0n
|
|
889
|
-
|
|
890
|
-
while (true) {
|
|
891
|
-
let byte = WasmI32.load8U(ptr + offset, 0n)
|
|
892
|
-
offset += 1n
|
|
893
|
-
if (bytesNeeded == 0n) {
|
|
894
|
-
if (byte >= 0x00n && byte <= 0x7Fn) {
|
|
895
|
-
result = byte
|
|
896
|
-
break
|
|
897
|
-
} else if (byte >= 0xC2n && byte <= 0xDFn) {
|
|
898
|
-
bytesNeeded = 1n
|
|
899
|
-
codePoint = byte & 0x1Fn
|
|
900
|
-
} else if (byte >= 0xE0n && byte <= 0xEFn) {
|
|
901
|
-
if (byte == 0xE0n) lowerBoundary = 0xA0n
|
|
902
|
-
if (byte == 0xEDn) upperBoundary = 0x9Fn
|
|
903
|
-
bytesNeeded = 2n
|
|
904
|
-
codePoint = byte & 0xFn
|
|
905
|
-
} else if (byte >= 0xF0n && byte <= 0xF4n) {
|
|
906
|
-
if (byte == 0xF0n) lowerBoundary = 0x90n
|
|
907
|
-
if (byte == 0xF4n) upperBoundary = 0x8Fn
|
|
908
|
-
bytesNeeded = 3n
|
|
909
|
-
codePoint = byte & 0x7n
|
|
910
|
-
} else {
|
|
911
|
-
throw MalformedUnicode
|
|
912
|
-
}
|
|
913
|
-
continue
|
|
914
|
-
}
|
|
915
|
-
if (!(lowerBoundary <= byte && byte <= upperBoundary)) {
|
|
916
|
-
throw MalformedUnicode
|
|
917
|
-
}
|
|
918
|
-
lowerBoundary = 0x80n
|
|
919
|
-
upperBoundary = 0xBFn
|
|
920
|
-
codePoint = (codePoint << 6n) | (byte & 0x3Fn)
|
|
921
|
-
bytesSeen += 1n
|
|
922
|
-
if (bytesSeen == bytesNeeded) {
|
|
923
|
-
result = codePoint
|
|
924
|
-
break
|
|
925
|
-
}
|
|
926
|
-
}
|
|
927
|
-
result: WasmI32
|
|
928
|
-
}
|
|
929
|
-
|
|
930
|
-
// hack to avoid incRef on this pointer
|
|
931
|
-
@disableGC
|
|
932
|
-
let mut _BYTES_SIZE_OFFSET = 1n;
|
|
933
|
-
@disableGC
|
|
934
|
-
let mut _BYTES_OFFSET = 1n;
|
|
935
|
-
|
|
936
|
-
@disableGC
|
|
937
|
-
let initPtr = () => {
|
|
938
|
-
_BYTES_SIZE_OFFSET = 4n
|
|
939
|
-
_BYTES_OFFSET = 8n
|
|
940
|
-
}
|
|
941
|
-
initPtr();
|
|
942
|
-
|
|
943
|
-
@disableGC
|
|
944
|
-
let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, dest: Bytes, destPos: Number) => {
|
|
866
|
+
@unsafe
|
|
867
|
+
let mut _BYTES_SIZE_OFFSET = 4n
|
|
868
|
+
@unsafe
|
|
869
|
+
let mut _BYTES_OFFSET = 8n
|
|
870
|
+
|
|
871
|
+
@unsafe
|
|
872
|
+
let encodeAtHelp =
|
|
873
|
+
(
|
|
874
|
+
string: String,
|
|
875
|
+
encoding: Encoding,
|
|
876
|
+
includeBom: Bool,
|
|
877
|
+
dest: Bytes,
|
|
878
|
+
destPos: Number,
|
|
879
|
+
) => {
|
|
945
880
|
let (>>>) = WasmI32.shrU
|
|
946
881
|
let (-) = WasmI32.sub
|
|
947
882
|
let (&) = WasmI32.and
|
|
@@ -950,8 +885,8 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
950
885
|
let (<=) = WasmI32.leU
|
|
951
886
|
let (==) = WasmI32.eq
|
|
952
887
|
let (+) = WasmI32.add
|
|
953
|
-
let byteSize = WasmI32.fromGrain(
|
|
954
|
-
let len = WasmI32.fromGrain(
|
|
888
|
+
let byteSize = WasmI32.fromGrain(byteLength(string)) >>> 1n
|
|
889
|
+
let len = WasmI32.fromGrain(length(string)) >>> 1n
|
|
955
890
|
|
|
956
891
|
let string = WasmI32.fromGrain(string)
|
|
957
892
|
|
|
@@ -964,9 +899,9 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
964
899
|
let destSize = WasmI32.load(bytes, _BYTES_SIZE_OFFSET)
|
|
965
900
|
|
|
966
901
|
if (includeBom) {
|
|
967
|
-
match(encoding) {
|
|
902
|
+
match (encoding) {
|
|
968
903
|
UTF8 => {
|
|
969
|
-
if (
|
|
904
|
+
if (bytesIdx + 3n > destSize) {
|
|
970
905
|
throw Exception.IndexOutOfBounds
|
|
971
906
|
}
|
|
972
907
|
WasmI32.store8(bytes + bytesIdx, 0xEFn, _BYTES_OFFSET)
|
|
@@ -975,7 +910,7 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
975
910
|
bytesIdx += 3n
|
|
976
911
|
},
|
|
977
912
|
UTF16_BE => {
|
|
978
|
-
if (
|
|
913
|
+
if (bytesIdx + 2n > destSize) {
|
|
979
914
|
throw Exception.IndexOutOfBounds
|
|
980
915
|
}
|
|
981
916
|
WasmI32.store8(bytes + bytesIdx, 0xFEn, _BYTES_OFFSET)
|
|
@@ -983,7 +918,7 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
983
918
|
bytesIdx += 2n
|
|
984
919
|
},
|
|
985
920
|
UTF16_LE => {
|
|
986
|
-
if (
|
|
921
|
+
if (bytesIdx + 2n > destSize) {
|
|
987
922
|
throw Exception.IndexOutOfBounds
|
|
988
923
|
}
|
|
989
924
|
WasmI32.store8(bytes + bytesIdx, 0xFFn, _BYTES_OFFSET)
|
|
@@ -991,7 +926,7 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
991
926
|
bytesIdx += 2n
|
|
992
927
|
},
|
|
993
928
|
UTF32_BE => {
|
|
994
|
-
if (
|
|
929
|
+
if (bytesIdx + 4n > destSize) {
|
|
995
930
|
throw Exception.IndexOutOfBounds
|
|
996
931
|
}
|
|
997
932
|
WasmI32.store8(bytes + bytesIdx, 0n, _BYTES_OFFSET)
|
|
@@ -1001,7 +936,7 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
1001
936
|
bytesIdx += 4n
|
|
1002
937
|
},
|
|
1003
938
|
UTF32_LE => {
|
|
1004
|
-
if (
|
|
939
|
+
if (bytesIdx + 4n > destSize) {
|
|
1005
940
|
throw Exception.IndexOutOfBounds
|
|
1006
941
|
}
|
|
1007
942
|
WasmI32.store8(bytes + bytesIdx, 0xFFn, _BYTES_OFFSET)
|
|
@@ -1009,16 +944,16 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
1009
944
|
WasmI32.store8(bytes + bytesIdx + 2n, 0n, _BYTES_OFFSET)
|
|
1010
945
|
WasmI32.store8(bytes + bytesIdx + 3n, 0n, _BYTES_OFFSET)
|
|
1011
946
|
bytesIdx += 4n
|
|
1012
|
-
}
|
|
947
|
+
},
|
|
1013
948
|
}
|
|
1014
949
|
}
|
|
1015
950
|
|
|
1016
|
-
match(encoding) {
|
|
951
|
+
match (encoding) {
|
|
1017
952
|
UTF8 => {
|
|
1018
953
|
// Optimization: since internally strings in Grain are UTF8 encoded, when
|
|
1019
954
|
// the target encoding is UTF8 as well, then copy the entire memory range
|
|
1020
955
|
// in bulk. No need to iterate individual characters.
|
|
1021
|
-
if (
|
|
956
|
+
if (bytesIdx + byteSize > destSize) {
|
|
1022
957
|
throw Exception.IndexOutOfBounds
|
|
1023
958
|
}
|
|
1024
959
|
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, byteSize)
|
|
@@ -1036,37 +971,55 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
1036
971
|
} else {
|
|
1037
972
|
2n
|
|
1038
973
|
}
|
|
1039
|
-
match(encoding) {
|
|
974
|
+
match (encoding) {
|
|
1040
975
|
UTF8 => {
|
|
1041
976
|
// With the optimization above for bulk memory copy, this match
|
|
1042
977
|
// should never occur for the UTF8 case.
|
|
1043
|
-
if (
|
|
978
|
+
if (bytesIdx + n > destSize) {
|
|
1044
979
|
throw Exception.IndexOutOfBounds
|
|
1045
980
|
}
|
|
1046
|
-
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, n)
|
|
981
|
+
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, n)
|
|
1047
982
|
bytesIdx += n
|
|
1048
983
|
},
|
|
1049
984
|
UTF16_BE => {
|
|
1050
985
|
let codePoint = getCodePoint(ptr)
|
|
1051
986
|
if (codePoint <= 0xFFFFn) {
|
|
1052
987
|
// <hi><lo>
|
|
1053
|
-
if (
|
|
988
|
+
if (bytesIdx + 2n > destSize) {
|
|
1054
989
|
throw Exception.IndexOutOfBounds
|
|
1055
990
|
}
|
|
1056
|
-
WasmI32.store8(
|
|
1057
|
-
|
|
991
|
+
WasmI32.store8(
|
|
992
|
+
bytes + bytesIdx,
|
|
993
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
994
|
+
_BYTES_OFFSET
|
|
995
|
+
)
|
|
996
|
+
WasmI32.store8(
|
|
997
|
+
bytes + bytesIdx + 1n,
|
|
998
|
+
codePoint & 0xffn,
|
|
999
|
+
_BYTES_OFFSET
|
|
1000
|
+
)
|
|
1058
1001
|
bytesIdx += 2n
|
|
1059
1002
|
} else {
|
|
1060
1003
|
// https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
|
|
1061
|
-
if (
|
|
1004
|
+
if (bytesIdx + 4n > destSize) {
|
|
1062
1005
|
throw Exception.IndexOutOfBounds
|
|
1063
1006
|
}
|
|
1064
1007
|
let uPrime = codePoint - 0x10000n
|
|
1065
|
-
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1066
|
-
|
|
1067
|
-
|
|
1008
|
+
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1009
|
+
0xD800n // High surrogate
|
|
1010
|
+
let w2 = (uPrime & 0b00000000001111111111n) +
|
|
1011
|
+
0xDC00n // Low surrogate
|
|
1012
|
+
WasmI32.store8(
|
|
1013
|
+
bytes + bytesIdx,
|
|
1014
|
+
(w1 & 0xff00n) >>> 8n,
|
|
1015
|
+
_BYTES_OFFSET
|
|
1016
|
+
)
|
|
1068
1017
|
WasmI32.store8(bytes + bytesIdx + 1n, w1 & 0xffn, _BYTES_OFFSET)
|
|
1069
|
-
WasmI32.store8(
|
|
1018
|
+
WasmI32.store8(
|
|
1019
|
+
bytes + bytesIdx + 2n,
|
|
1020
|
+
(w2 & 0xff00n) >>> 8n,
|
|
1021
|
+
_BYTES_OFFSET
|
|
1022
|
+
)
|
|
1070
1023
|
WasmI32.store8(bytes + bytesIdx + 3n, w2 & 0xffn, _BYTES_OFFSET)
|
|
1071
1024
|
bytesIdx += 4n
|
|
1072
1025
|
}
|
|
@@ -1074,63 +1027,98 @@ let rec encodeAtHelp = (string: String, encoding: Encoding, includeBom: Bool, de
|
|
|
1074
1027
|
UTF16_LE => {
|
|
1075
1028
|
let codePoint = getCodePoint(ptr)
|
|
1076
1029
|
if (codePoint <= 0xFFFFn) {
|
|
1077
|
-
if (
|
|
1030
|
+
if (bytesIdx + 2n > destSize) {
|
|
1078
1031
|
throw Exception.IndexOutOfBounds
|
|
1079
1032
|
}
|
|
1080
1033
|
// <lo><hi>
|
|
1081
1034
|
WasmI32.store8(bytes + bytesIdx, codePoint & 0xffn, _BYTES_OFFSET)
|
|
1082
|
-
WasmI32.store8(
|
|
1035
|
+
WasmI32.store8(
|
|
1036
|
+
bytes + bytesIdx + 1n,
|
|
1037
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1038
|
+
_BYTES_OFFSET
|
|
1039
|
+
)
|
|
1083
1040
|
bytesIdx += 2n
|
|
1084
1041
|
} else {
|
|
1085
1042
|
// https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
|
|
1086
|
-
if (
|
|
1043
|
+
if (bytesIdx + 4n > destSize) {
|
|
1087
1044
|
throw Exception.IndexOutOfBounds
|
|
1088
1045
|
}
|
|
1089
1046
|
let uPrime = codePoint - 0x10000n
|
|
1090
|
-
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1091
|
-
|
|
1047
|
+
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1048
|
+
0xD800n // High surrogate
|
|
1049
|
+
let w2 = (uPrime & 0b00000000001111111111n) +
|
|
1050
|
+
0xDC00n // Low surrogate
|
|
1092
1051
|
WasmI32.store8(bytes + bytesIdx, w1 & 0xffn, _BYTES_OFFSET)
|
|
1093
|
-
WasmI32.store8(
|
|
1052
|
+
WasmI32.store8(
|
|
1053
|
+
bytes + bytesIdx + 1n,
|
|
1054
|
+
(w1 & 0xff00n) >>> 8n,
|
|
1055
|
+
_BYTES_OFFSET
|
|
1056
|
+
)
|
|
1094
1057
|
WasmI32.store8(bytes + bytesIdx + 2n, w2 & 0xffn, _BYTES_OFFSET)
|
|
1095
|
-
WasmI32.store8(
|
|
1058
|
+
WasmI32.store8(
|
|
1059
|
+
bytes + bytesIdx + 3n,
|
|
1060
|
+
(w2 & 0xff00n) >>> 8n,
|
|
1061
|
+
_BYTES_OFFSET
|
|
1062
|
+
)
|
|
1096
1063
|
bytesIdx += 4n
|
|
1097
1064
|
}
|
|
1098
1065
|
},
|
|
1099
1066
|
UTF32_BE => {
|
|
1100
|
-
if (
|
|
1067
|
+
if (bytesIdx + 4n > destSize) {
|
|
1101
1068
|
throw Exception.IndexOutOfBounds
|
|
1102
1069
|
}
|
|
1103
1070
|
let codePoint = getCodePoint(ptr)
|
|
1104
|
-
WasmI32.store8(
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
|
-
|
|
1071
|
+
WasmI32.store8(
|
|
1072
|
+
bytes + bytesIdx,
|
|
1073
|
+
(codePoint & 0xff000000n) >>> 24n,
|
|
1074
|
+
_BYTES_OFFSET
|
|
1075
|
+
)
|
|
1076
|
+
WasmI32.store8(
|
|
1077
|
+
bytes + bytesIdx + 1n,
|
|
1078
|
+
(codePoint & 0xff0000n) >>> 16n,
|
|
1079
|
+
_BYTES_OFFSET
|
|
1080
|
+
)
|
|
1081
|
+
WasmI32.store8(
|
|
1082
|
+
bytes + bytesIdx + 2n,
|
|
1083
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1084
|
+
_BYTES_OFFSET
|
|
1085
|
+
)
|
|
1086
|
+
WasmI32.store8(
|
|
1087
|
+
bytes + bytesIdx + 3n,
|
|
1088
|
+
codePoint & 0xffn,
|
|
1089
|
+
_BYTES_OFFSET
|
|
1090
|
+
)
|
|
1108
1091
|
bytesIdx += 4n
|
|
1109
1092
|
},
|
|
1110
1093
|
UTF32_LE => {
|
|
1111
|
-
if (
|
|
1094
|
+
if (bytesIdx + 4n > destSize) {
|
|
1112
1095
|
throw Exception.IndexOutOfBounds
|
|
1113
1096
|
}
|
|
1114
1097
|
let codePoint = getCodePoint(ptr)
|
|
1115
1098
|
WasmI32.store8(bytes + bytesIdx, codePoint & 0xffn, _BYTES_OFFSET)
|
|
1116
|
-
WasmI32.store8(
|
|
1117
|
-
|
|
1118
|
-
|
|
1099
|
+
WasmI32.store8(
|
|
1100
|
+
bytes + bytesIdx + 1n,
|
|
1101
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1102
|
+
_BYTES_OFFSET
|
|
1103
|
+
)
|
|
1104
|
+
WasmI32.store8(
|
|
1105
|
+
bytes + bytesIdx + 2n,
|
|
1106
|
+
(codePoint & 0xff0000n) >>> 16n,
|
|
1107
|
+
_BYTES_OFFSET
|
|
1108
|
+
)
|
|
1109
|
+
WasmI32.store8(
|
|
1110
|
+
bytes + bytesIdx + 3n,
|
|
1111
|
+
(codePoint & 0xff000000n) >>> 24n,
|
|
1112
|
+
_BYTES_OFFSET
|
|
1113
|
+
)
|
|
1119
1114
|
bytesIdx += 4n
|
|
1120
1115
|
},
|
|
1121
|
-
}
|
|
1116
|
+
}
|
|
1122
1117
|
ptr += n
|
|
1123
1118
|
}
|
|
1124
|
-
}
|
|
1119
|
+
},
|
|
1125
1120
|
}
|
|
1126
1121
|
|
|
1127
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
1128
|
-
Memory.decRef(WasmI32.fromGrain(encoding))
|
|
1129
|
-
Memory.decRef(WasmI32.fromGrain(includeBom))
|
|
1130
|
-
Memory.decRef(WasmI32.fromGrain(destPos))
|
|
1131
|
-
Memory.decRef(WasmI32.fromGrain(encodeAtHelp))
|
|
1132
|
-
|
|
1133
|
-
// We don't decRef `dest` because we're returning it
|
|
1134
1122
|
dest
|
|
1135
1123
|
}
|
|
1136
1124
|
|
|
@@ -1164,31 +1152,23 @@ export let encodeAtWithBom = (string, encoding, dest, destPos) => {
|
|
|
1164
1152
|
encodeAtHelp(string, encoding, true, dest, destPos)
|
|
1165
1153
|
}
|
|
1166
1154
|
|
|
1167
|
-
@
|
|
1168
|
-
let
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
|
-
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
|
|
1178
|
-
|
|
1155
|
+
@unsafe
|
|
1156
|
+
let encodeHelp = (string: String, encoding: Encoding, includeBom: Bool) => {
|
|
1157
|
+
let size = encodedLength(string, encoding) +
|
|
1158
|
+
(if (includeBom) {
|
|
1159
|
+
match (encoding) {
|
|
1160
|
+
UTF8 => 3,
|
|
1161
|
+
UTF16_LE => 2,
|
|
1162
|
+
UTF16_BE => 2,
|
|
1163
|
+
UTF32_LE => 4,
|
|
1164
|
+
UTF32_BE => 4,
|
|
1165
|
+
}
|
|
1166
|
+
} else {
|
|
1167
|
+
0
|
|
1168
|
+
})
|
|
1179
1169
|
let (>>>) = WasmI32.shrU
|
|
1180
1170
|
let bytes = WasmI32.toGrain(allocateBytes(WasmI32.fromGrain(size) >>> 1n))
|
|
1181
|
-
|
|
1182
|
-
Memory.incRef(WasmI32.fromGrain(string))
|
|
1183
|
-
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1184
|
-
Memory.incRef(WasmI32.fromGrain(includeBom))
|
|
1185
|
-
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1186
|
-
let ret = encodeAtHelp(string, encoding, includeBom, bytes, 0)
|
|
1187
|
-
Memory.decRef(WasmI32.fromGrain(string))
|
|
1188
|
-
Memory.decRef(WasmI32.fromGrain(encoding))
|
|
1189
|
-
Memory.decRef(WasmI32.fromGrain(includeBom))
|
|
1190
|
-
Memory.decRef(WasmI32.fromGrain(encodeHelp))
|
|
1191
|
-
ret
|
|
1171
|
+
encodeAtHelp(string, encoding, includeBom, bytes, 0)
|
|
1192
1172
|
}
|
|
1193
1173
|
|
|
1194
1174
|
/**
|
|
@@ -1219,7 +1199,7 @@ export let encodeWithBom = (string: String, encoding: Encoding) => {
|
|
|
1219
1199
|
|
|
1220
1200
|
// Byte->String decoding and helper functions:
|
|
1221
1201
|
|
|
1222
|
-
@
|
|
1202
|
+
@unsafe
|
|
1223
1203
|
let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
1224
1204
|
let (>>>) = WasmI32.shrU
|
|
1225
1205
|
let (-) = WasmI32.sub
|
|
@@ -1242,8 +1222,8 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1242
1222
|
// The first byte has a three bit prefix of 110, followed by 5 bits of the
|
|
1243
1223
|
// codepoint. The second byte has a two bit prefix of 10, followed by 6 bits
|
|
1244
1224
|
// of the codepoint.
|
|
1245
|
-
let high =
|
|
1246
|
-
let low =
|
|
1225
|
+
let high = codePoint >>> 6n & 0b000_11111n | 0b110_00000n
|
|
1226
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1247
1227
|
WasmI32.store8(ptr, high, 0n)
|
|
1248
1228
|
WasmI32.store8(ptr + 1n, low, 0n)
|
|
1249
1229
|
2n
|
|
@@ -1252,9 +1232,9 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1252
1232
|
// The first byte has a four bit prefix of 1110, followed by 4 bits of the
|
|
1253
1233
|
// codepoint. Remaining bytes each have a two bit prefix of 10, followed by
|
|
1254
1234
|
// 6 bits of the codepoint.
|
|
1255
|
-
let high =
|
|
1256
|
-
let mid =
|
|
1257
|
-
let low =
|
|
1235
|
+
let high = codePoint >>> 12n & 0b0000_1111n | 0b1110_0000n
|
|
1236
|
+
let mid = codePoint >>> 6n & 0b00_111111n | 0b10_000000n
|
|
1237
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1258
1238
|
WasmI32.store8(ptr, high, 0n)
|
|
1259
1239
|
WasmI32.store8(ptr + 1n, mid, 0n)
|
|
1260
1240
|
WasmI32.store8(ptr + 2n, low, 0n)
|
|
@@ -1264,10 +1244,10 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1264
1244
|
// The first byte has a five bit prefix of 11110, followed by 3 bits of the
|
|
1265
1245
|
// codepoint. Remaining bytes each have a two bit prefix of 10, followed by
|
|
1266
1246
|
// 6 bits of the codepoint.
|
|
1267
|
-
let high =
|
|
1268
|
-
let mid1 =
|
|
1269
|
-
let mid2 =
|
|
1270
|
-
let low =
|
|
1247
|
+
let high = codePoint >>> 18n & 0b00000_111n | 0b11110_000n
|
|
1248
|
+
let mid1 = codePoint >>> 12n & 0b00_111111n | 0b10_000000n
|
|
1249
|
+
let mid2 = codePoint >>> 6n & 0b00_111111n | 0b10_000000n
|
|
1250
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1271
1251
|
WasmI32.store8(ptr, high, 0n)
|
|
1272
1252
|
WasmI32.store8(ptr + 1n, mid1, 0n)
|
|
1273
1253
|
WasmI32.store8(ptr + 2n, mid2, 0n)
|
|
@@ -1276,7 +1256,7 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1276
1256
|
}
|
|
1277
1257
|
}
|
|
1278
1258
|
|
|
1279
|
-
@
|
|
1259
|
+
@unsafe
|
|
1280
1260
|
let bytesHaveBom = (bytes: Bytes, encoding: Encoding, start: WasmI32) => {
|
|
1281
1261
|
let (+) = WasmI32.add
|
|
1282
1262
|
let (==) = WasmI32.eq
|
|
@@ -1286,25 +1266,46 @@ let bytesHaveBom = (bytes: Bytes, encoding: Encoding, start: WasmI32) => {
|
|
|
1286
1266
|
let ptr = ptr + start
|
|
1287
1267
|
match (encoding) {
|
|
1288
1268
|
UTF8 => {
|
|
1289
|
-
bytesSize >= 3n &&
|
|
1269
|
+
bytesSize >= 3n &&
|
|
1270
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xEFn &&
|
|
1271
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xBBn &&
|
|
1272
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0xBFn
|
|
1290
1273
|
},
|
|
1291
1274
|
UTF16_BE => {
|
|
1292
|
-
bytesSize >= 2n &&
|
|
1275
|
+
bytesSize >= 2n &&
|
|
1276
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFEn &&
|
|
1277
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFFn
|
|
1293
1278
|
},
|
|
1294
1279
|
UTF16_LE => {
|
|
1295
|
-
bytesSize >= 2n &&
|
|
1280
|
+
bytesSize >= 2n &&
|
|
1281
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFFn &&
|
|
1282
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFEn
|
|
1296
1283
|
},
|
|
1297
1284
|
UTF32_BE => {
|
|
1298
|
-
bytesSize >= 4n &&
|
|
1285
|
+
bytesSize >= 4n &&
|
|
1286
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0x00n &&
|
|
1287
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0x00n &&
|
|
1288
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0xFEn &&
|
|
1289
|
+
WasmI32.load8U(ptr + 3n, _BYTES_OFFSET) == 0xFFn
|
|
1299
1290
|
},
|
|
1300
1291
|
UTF32_LE => {
|
|
1301
|
-
bytesSize >= 4n &&
|
|
1302
|
-
|
|
1292
|
+
bytesSize >= 4n &&
|
|
1293
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFFn &&
|
|
1294
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFEn &&
|
|
1295
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0x00n &&
|
|
1296
|
+
WasmI32.load8U(ptr + 3n, _BYTES_OFFSET) == 0x00n
|
|
1297
|
+
},
|
|
1303
1298
|
}
|
|
1304
1299
|
}
|
|
1305
1300
|
|
|
1306
|
-
@
|
|
1307
|
-
let decodedLength =
|
|
1301
|
+
@unsafe
|
|
1302
|
+
let decodedLength =
|
|
1303
|
+
(
|
|
1304
|
+
bytes: Bytes,
|
|
1305
|
+
encoding: Encoding,
|
|
1306
|
+
start: WasmI32,
|
|
1307
|
+
size: WasmI32,
|
|
1308
|
+
) => {
|
|
1308
1309
|
let (+) = WasmI32.add
|
|
1309
1310
|
let (-) = WasmI32.sub
|
|
1310
1311
|
let (==) = WasmI32.eq
|
|
@@ -1327,20 +1328,21 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1327
1328
|
}
|
|
1328
1329
|
}
|
|
1329
1330
|
let start = ptr + _BYTES_OFFSET + start
|
|
1330
|
-
match(encoding) {
|
|
1331
|
+
match (encoding) {
|
|
1331
1332
|
UTF8 => bytesSize,
|
|
1332
1333
|
UTF16_BE => {
|
|
1333
1334
|
let end = start + bytesSize
|
|
1334
1335
|
let mut ptr = start
|
|
1335
1336
|
let mut count = 0n
|
|
1336
1337
|
while (ptr < end) {
|
|
1337
|
-
let codeWord =
|
|
1338
|
+
let codeWord = WasmI32.load8U(ptr, 0n) << 8n | WasmI32.load8U(ptr, 1n)
|
|
1338
1339
|
let codeWord = if (codeWord >= 0xD800n && codeWord <= 0xDBFFn) {
|
|
1339
1340
|
// high surrogate. need to check that next character is low srurrogate
|
|
1340
1341
|
let ret = if (ptr + 2n >= end) {
|
|
1341
1342
|
throw MalformedUnicode
|
|
1342
1343
|
} else {
|
|
1343
|
-
let nextCodeWord =
|
|
1344
|
+
let nextCodeWord = WasmI32.load8U(ptr, 2n) << 8n |
|
|
1345
|
+
WasmI32.load8U(ptr, 3n)
|
|
1344
1346
|
if (nextCodeWord < 0xDC00n || nextCodeWord > 0xDFFFn) {
|
|
1345
1347
|
// high surrogate without low surrogate
|
|
1346
1348
|
throw MalformedUnicode
|
|
@@ -1374,13 +1376,14 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1374
1376
|
let mut ptr = start
|
|
1375
1377
|
let mut count = 0n
|
|
1376
1378
|
while (ptr < end) {
|
|
1377
|
-
let codeWord =
|
|
1379
|
+
let codeWord = WasmI32.load8U(ptr, 1n) << 8n | WasmI32.load8U(ptr, 0n)
|
|
1378
1380
|
let codeWord = if (codeWord >= 0xD800n && codeWord <= 0xDBFFn) {
|
|
1379
1381
|
// high surrogate. need to check that next character is low srurrogate
|
|
1380
1382
|
let ret = if (ptr + 2n >= end) {
|
|
1381
1383
|
throw MalformedUnicode
|
|
1382
1384
|
} else {
|
|
1383
|
-
let nextCodeWord =
|
|
1385
|
+
let nextCodeWord = WasmI32.load8U(ptr, 3n) << 8n |
|
|
1386
|
+
WasmI32.load8U(ptr, 2n)
|
|
1384
1387
|
if (nextCodeWord < 0xDC00n || nextCodeWord > 0xDFFFn) {
|
|
1385
1388
|
// high surrogate without low surrogate
|
|
1386
1389
|
throw MalformedUnicode
|
|
@@ -1418,7 +1421,10 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1418
1421
|
let mut ptr = start
|
|
1419
1422
|
let mut count = 0n
|
|
1420
1423
|
while (ptr < end) {
|
|
1421
|
-
let codeWord =
|
|
1424
|
+
let codeWord = WasmI32.load8U(ptr, 0n) << 24n |
|
|
1425
|
+
WasmI32.load8U(ptr, 1n) << 16n |
|
|
1426
|
+
WasmI32.load8U(ptr, 2n) << 8n |
|
|
1427
|
+
WasmI32.load8U(ptr, 3n)
|
|
1422
1428
|
ptr += 4n
|
|
1423
1429
|
if (codeWord <= 0x007Fn) {
|
|
1424
1430
|
count += 1n
|
|
@@ -1441,7 +1447,10 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1441
1447
|
let mut ptr = start
|
|
1442
1448
|
let mut count = 0n
|
|
1443
1449
|
while (ptr < end) {
|
|
1444
|
-
let codeWord =
|
|
1450
|
+
let codeWord = WasmI32.load8U(ptr, 3n) << 24n |
|
|
1451
|
+
WasmI32.load8U(ptr, 2n) << 16n |
|
|
1452
|
+
WasmI32.load8U(ptr, 1n) << 8n |
|
|
1453
|
+
WasmI32.load8U(ptr, 0n)
|
|
1445
1454
|
ptr += 4n
|
|
1446
1455
|
if (codeWord <= 0x007Fn) {
|
|
1447
1456
|
count += 1n
|
|
@@ -1454,12 +1463,19 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1454
1463
|
}
|
|
1455
1464
|
}
|
|
1456
1465
|
count
|
|
1457
|
-
}
|
|
1466
|
+
},
|
|
1458
1467
|
}
|
|
1459
1468
|
}
|
|
1460
1469
|
|
|
1461
|
-
@
|
|
1462
|
-
let
|
|
1470
|
+
@unsafe
|
|
1471
|
+
let decodeRangeHelp =
|
|
1472
|
+
(
|
|
1473
|
+
bytes: Bytes,
|
|
1474
|
+
encoding: Encoding,
|
|
1475
|
+
skipBom: Bool,
|
|
1476
|
+
start: Number,
|
|
1477
|
+
size: Number,
|
|
1478
|
+
) => {
|
|
1463
1479
|
let (+) = WasmI32.add
|
|
1464
1480
|
let (-) = WasmI32.sub
|
|
1465
1481
|
let (<) = WasmI32.ltU
|
|
@@ -1474,7 +1490,7 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1474
1490
|
let size = grainToWasmNumber(size, _SIZE_NAME)
|
|
1475
1491
|
let hasBom = bytesHaveBom(bytes, encoding, start)
|
|
1476
1492
|
let stringSize = decodedLength(bytes, encoding, start, size)
|
|
1477
|
-
let stringSize = if (skipBom && hasBom)
|
|
1493
|
+
let stringSize = if (skipBom && hasBom) stringSize - 3n else stringSize
|
|
1478
1494
|
let str = allocateString(stringSize)
|
|
1479
1495
|
let mut bytesPtr = WasmI32.fromGrain(bytes)
|
|
1480
1496
|
let bytesSize = {
|
|
@@ -1489,7 +1505,7 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1489
1505
|
let mut strPtr = str + 8n
|
|
1490
1506
|
let mut bomRead = false
|
|
1491
1507
|
if (skipBom && hasBom) {
|
|
1492
|
-
bytesPtr += match(encoding) {
|
|
1508
|
+
bytesPtr += match (encoding) {
|
|
1493
1509
|
UTF8 => 3n,
|
|
1494
1510
|
UTF16_LE => 2n,
|
|
1495
1511
|
UTF16_BE => 2n,
|
|
@@ -1497,10 +1513,10 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1497
1513
|
UTF32_BE => 4n,
|
|
1498
1514
|
}
|
|
1499
1515
|
}
|
|
1500
|
-
|
|
1501
|
-
WasmI32.toGrain(str)
|
|
1516
|
+
if (stringSize == 0n) {
|
|
1517
|
+
WasmI32.toGrain(str): String
|
|
1502
1518
|
} else {
|
|
1503
|
-
match(encoding) {
|
|
1519
|
+
match (encoding) {
|
|
1504
1520
|
UTF8 => {
|
|
1505
1521
|
Memory.copy(strPtr, bytesPtr, stringSize)
|
|
1506
1522
|
},
|
|
@@ -1508,11 +1524,14 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1508
1524
|
// NOTE: Because the size check passed, we know the string is well-formed
|
|
1509
1525
|
let end = bytesPtr + bytesSize
|
|
1510
1526
|
while (bytesPtr < end) {
|
|
1511
|
-
let w1 =
|
|
1527
|
+
let w1 = WasmI32.load8U(bytesPtr, 0n) << 8n |
|
|
1528
|
+
WasmI32.load8U(bytesPtr, 1n)
|
|
1512
1529
|
let codeWord = if (w1 >= 0xD800n && w1 <= 0xDBFFn) {
|
|
1513
1530
|
// high surrogate. next character is low srurrogate
|
|
1514
1531
|
let w1 = (w1 & 0x03FFn) << 10n
|
|
1515
|
-
let w2 = (
|
|
1532
|
+
let w2 = (WasmI32.load8U(bytesPtr, 2n) << 8n |
|
|
1533
|
+
WasmI32.load8U(bytesPtr, 3n)) &
|
|
1534
|
+
0x03FFn
|
|
1516
1535
|
let codeWord = w1 + w2 + 0x10000n
|
|
1517
1536
|
// no problems, so go past both code words
|
|
1518
1537
|
bytesPtr += 4n
|
|
@@ -1528,11 +1547,14 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1528
1547
|
// NOTE: Because the size check passed, we know the string is well-formed
|
|
1529
1548
|
let end = bytesPtr + bytesSize
|
|
1530
1549
|
while (bytesPtr < end) {
|
|
1531
|
-
let w1 =
|
|
1550
|
+
let w1 = WasmI32.load8U(bytesPtr, 1n) << 8n |
|
|
1551
|
+
WasmI32.load8U(bytesPtr, 0n)
|
|
1532
1552
|
let codeWord = if (w1 >= 0xD800n && w1 <= 0xDBFFn) {
|
|
1533
1553
|
// high surrogate. next character is low srurrogate
|
|
1534
1554
|
let w1 = (w1 & 0x03FFn) << 10n
|
|
1535
|
-
let w2 = (
|
|
1555
|
+
let w2 = (WasmI32.load8U(bytesPtr, 3n) << 8n |
|
|
1556
|
+
WasmI32.load8U(bytesPtr, 2n)) &
|
|
1557
|
+
0x03FFn
|
|
1536
1558
|
//let uPrime = codePoint - 0x10000n
|
|
1537
1559
|
//let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) + 0xD800n // High surrogate
|
|
1538
1560
|
//let w2 = (uPrime & 0b00000000001111111111n) + 0xDC00n // Low surrogate
|
|
@@ -1550,7 +1572,10 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1550
1572
|
UTF32_BE => {
|
|
1551
1573
|
let end = bytesPtr + bytesSize
|
|
1552
1574
|
while (bytesPtr < end) {
|
|
1553
|
-
let codeWord =
|
|
1575
|
+
let codeWord = WasmI32.load8U(bytesPtr, 0n) << 24n |
|
|
1576
|
+
WasmI32.load8U(bytesPtr, 1n) << 16n |
|
|
1577
|
+
WasmI32.load8U(bytesPtr, 2n) << 8n |
|
|
1578
|
+
WasmI32.load8U(bytesPtr, 3n)
|
|
1554
1579
|
bytesPtr += 4n
|
|
1555
1580
|
strPtr += writeUtf8CodePoint(strPtr, codeWord)
|
|
1556
1581
|
}
|
|
@@ -1558,20 +1583,17 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1558
1583
|
UTF32_LE => {
|
|
1559
1584
|
let end = bytesPtr + bytesSize
|
|
1560
1585
|
while (bytesPtr < end) {
|
|
1561
|
-
let codeWord =
|
|
1586
|
+
let codeWord = WasmI32.load8U(bytesPtr, 3n) << 24n |
|
|
1587
|
+
WasmI32.load8U(bytesPtr, 2n) << 16n |
|
|
1588
|
+
WasmI32.load8U(bytesPtr, 1n) << 8n |
|
|
1589
|
+
WasmI32.load8U(bytesPtr, 0n)
|
|
1562
1590
|
bytesPtr += 4n
|
|
1563
1591
|
strPtr += writeUtf8CodePoint(strPtr, codeWord)
|
|
1564
1592
|
}
|
|
1565
|
-
}
|
|
1593
|
+
},
|
|
1566
1594
|
}
|
|
1567
|
-
WasmI32.toGrain(str)
|
|
1595
|
+
WasmI32.toGrain(str): String
|
|
1568
1596
|
}
|
|
1569
|
-
// bytes: Bytes, encoding: Encoding, skipBom: Bool, start: Number, size: Number
|
|
1570
|
-
Memory.decRef(WasmI32.fromGrain(bytes))
|
|
1571
|
-
Memory.decRef(WasmI32.fromGrain(encoding))
|
|
1572
|
-
Memory.decRef(WasmI32.fromGrain(skipBom))
|
|
1573
|
-
Memory.decRef(WasmI32.fromGrain(decodeRangeHelp))
|
|
1574
|
-
ret
|
|
1575
1597
|
}
|
|
1576
1598
|
|
|
1577
1599
|
/**
|
|
@@ -1585,7 +1607,13 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1585
1607
|
*
|
|
1586
1608
|
* @since v0.4.0
|
|
1587
1609
|
*/
|
|
1588
|
-
export let decodeRange =
|
|
1610
|
+
export let decodeRange =
|
|
1611
|
+
(
|
|
1612
|
+
bytes: Bytes,
|
|
1613
|
+
encoding: Encoding,
|
|
1614
|
+
start: Number,
|
|
1615
|
+
size: Number,
|
|
1616
|
+
) => {
|
|
1589
1617
|
decodeRangeHelp(bytes, encoding, true, start, size)
|
|
1590
1618
|
}
|
|
1591
1619
|
|
|
@@ -1600,22 +1628,21 @@ export let decodeRange = (bytes: Bytes, encoding: Encoding, start: Number, size:
|
|
|
1600
1628
|
*
|
|
1601
1629
|
* @since v0.4.0
|
|
1602
1630
|
*/
|
|
1603
|
-
export let decodeRangeKeepBom =
|
|
1631
|
+
export let decodeRangeKeepBom =
|
|
1632
|
+
(
|
|
1633
|
+
bytes: Bytes,
|
|
1634
|
+
encoding: Encoding,
|
|
1635
|
+
start: Number,
|
|
1636
|
+
size: Number,
|
|
1637
|
+
) => {
|
|
1604
1638
|
decodeRangeHelp(bytes, encoding, false, start, size)
|
|
1605
1639
|
}
|
|
1606
1640
|
|
|
1607
|
-
@
|
|
1608
|
-
let
|
|
1641
|
+
@unsafe
|
|
1642
|
+
let decodeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool) => {
|
|
1609
1643
|
let bytesPtr = WasmI32.fromGrain(bytes)
|
|
1610
1644
|
let bytesSize = WasmI32.load(bytesPtr, 4n)
|
|
1611
|
-
|
|
1612
|
-
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1613
|
-
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1614
|
-
let ret = decodeRangeHelp(bytes, encoding, skipBom, 0, tagSimpleNumber(bytesSize))
|
|
1615
|
-
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1616
|
-
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1617
|
-
Memory.incRef(WasmI32.fromGrain(decodeHelp))
|
|
1618
|
-
ret
|
|
1645
|
+
decodeRangeHelp(bytes, encoding, skipBom, 0, tagSimpleNumber(bytesSize))
|
|
1619
1646
|
}
|
|
1620
1647
|
|
|
1621
1648
|
/**
|
|
@@ -1654,8 +1681,8 @@ export let decodeKeepBom = (bytes: Bytes, encoding: Encoding) => {
|
|
|
1654
1681
|
*
|
|
1655
1682
|
* @since v0.4.0
|
|
1656
1683
|
*/
|
|
1657
|
-
@
|
|
1658
|
-
export let
|
|
1684
|
+
@unsafe
|
|
1685
|
+
export let forEachCodePoint = (fn: Number -> Void, str: String) => {
|
|
1659
1686
|
let (>>>) = WasmI32.shrU
|
|
1660
1687
|
let (-) = WasmI32.sub
|
|
1661
1688
|
let (&) = WasmI32.and
|
|
@@ -1692,15 +1719,11 @@ export let rec forEachCodePoint = (fn: (Number) -> Void, str: String) => {
|
|
|
1692
1719
|
// avoid heap allocations. `getCodePoint` will throw
|
|
1693
1720
|
// MalformedUnicode exception for values exceeding this limit.
|
|
1694
1721
|
let codePoint = getCodePoint(ptr)
|
|
1695
|
-
Memory.incRef(WasmI32.fromGrain(fn))
|
|
1696
1722
|
fn(tagSimpleNumber(codePoint))
|
|
1697
1723
|
|
|
1698
1724
|
ptr += codePointByteCount
|
|
1699
1725
|
idx += 1n
|
|
1700
1726
|
}
|
|
1701
|
-
Memory.decRef(WasmI32.fromGrain(fn))
|
|
1702
|
-
Memory.decRef(WasmI32.fromGrain(str))
|
|
1703
|
-
Memory.decRef(WasmI32.fromGrain(forEachCodePoint))
|
|
1704
1727
|
void
|
|
1705
1728
|
}
|
|
1706
1729
|
|
|
@@ -1716,8 +1739,8 @@ export let rec forEachCodePoint = (fn: (Number) -> Void, str: String) => {
|
|
|
1716
1739
|
*
|
|
1717
1740
|
* @since v0.4.0
|
|
1718
1741
|
*/
|
|
1719
|
-
@
|
|
1720
|
-
export let
|
|
1742
|
+
@unsafe
|
|
1743
|
+
export let forEachCodePointi = (fn: (Number, Number) -> Void, str: String) => {
|
|
1721
1744
|
let (>>>) = WasmI32.shrU
|
|
1722
1745
|
let (-) = WasmI32.sub
|
|
1723
1746
|
let (&) = WasmI32.and
|
|
@@ -1754,42 +1777,38 @@ export let rec forEachCodePointi = (fn: (Number, Number) -> Void, str: String) =
|
|
|
1754
1777
|
// avoid heap allocations. `getCodePoint` will throw
|
|
1755
1778
|
// MalformedUnicode exception for values exceeding this limit.
|
|
1756
1779
|
let codePoint = getCodePoint(ptr)
|
|
1757
|
-
Memory.incRef(WasmI32.fromGrain(fn))
|
|
1758
1780
|
fn(tagSimpleNumber(codePoint), tagSimpleNumber(idx))
|
|
1759
1781
|
|
|
1760
1782
|
ptr += codePointByteCount
|
|
1761
1783
|
idx += 1n
|
|
1762
1784
|
}
|
|
1763
|
-
Memory.decRef(WasmI32.fromGrain(fn))
|
|
1764
|
-
Memory.decRef(WasmI32.fromGrain(str))
|
|
1765
|
-
Memory.decRef(WasmI32.fromGrain(forEachCodePointi))
|
|
1766
1785
|
void
|
|
1767
1786
|
}
|
|
1787
|
+
|
|
1768
1788
|
let trimString = (str: String, end: Bool) => {
|
|
1769
1789
|
let chars = explode(str), charsLength = length(str)
|
|
1770
1790
|
let mut i = 0, offset = 1
|
|
1771
1791
|
if (end) {
|
|
1772
|
-
i = charsLength-1
|
|
1792
|
+
i = charsLength - 1
|
|
1773
1793
|
offset = -1
|
|
1774
1794
|
}
|
|
1775
1795
|
for (; i < charsLength && i > -1; i += offset) {
|
|
1776
|
-
let currentChar = chars[i]
|
|
1796
|
+
let currentChar = chars[i]
|
|
1777
1797
|
// TODO: Use unicode whitespace property and unicode line terminator once github issue #661 is completed
|
|
1778
1798
|
if (
|
|
1779
1799
|
// Spacing
|
|
1780
|
-
currentChar != '\u{0009}'
|
|
1781
|
-
currentChar != '\u{000B}'
|
|
1782
|
-
currentChar != '\u{000C}'
|
|
1783
|
-
currentChar != '\u{0020}'
|
|
1784
|
-
currentChar != '\u{00A0}'
|
|
1785
|
-
currentChar != '\u{FEFF}'
|
|
1800
|
+
currentChar != '\u{0009}' && // Tab
|
|
1801
|
+
currentChar != '\u{000B}' && // LINE TABULATION
|
|
1802
|
+
currentChar != '\u{000C}' && // FORM FEED (FF)
|
|
1803
|
+
currentChar != '\u{0020}' && // Space
|
|
1804
|
+
currentChar != '\u{00A0}' && // No Break Space
|
|
1805
|
+
currentChar != '\u{FEFF}' && // ZERO WIDTH NO-BREAK SPACE
|
|
1786
1806
|
// Line Terminators
|
|
1787
1807
|
currentChar != '\n' && // LF
|
|
1788
1808
|
currentChar != '\r' // CR
|
|
1789
1809
|
) break
|
|
1790
1810
|
}
|
|
1791
|
-
if (end) slice(0, i+1, str)
|
|
1792
|
-
else slice(i, charsLength, str)
|
|
1811
|
+
if (end) slice(0, i + 1, str) else slice(i, charsLength, str)
|
|
1793
1812
|
}
|
|
1794
1813
|
/**
|
|
1795
1814
|
* Trims the beginning of a string—removing any leading whitespace characters.
|