@grain/stdlib 0.4.1 → 0.4.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +63 -0
- package/LICENSE +21 -0
- package/README.md +34 -0
- package/array.gr +200 -89
- package/array.md +81 -5
- package/buffer.gr +93 -36
- package/bytes.gr +512 -407
- package/bytes.md +621 -0
- package/char.gr +119 -55
- package/char.md +200 -0
- package/hash.gr +42 -15
- package/hash.md +44 -0
- package/list.gr +121 -50
- package/map.gr +106 -110
- package/number.gr +37 -1
- package/number.md +66 -0
- package/option.gr +260 -53
- package/option.md +579 -0
- package/package.json +33 -29
- package/pervasives.gr +32 -20
- package/queue.gr +102 -30
- package/queue.md +191 -0
- package/range.gr +26 -26
- package/range.md +1 -1
- package/regex.gr +3055 -0
- package/regex.md +449 -0
- package/result.gr +216 -70
- package/result.md +446 -0
- package/runtime/dataStructures.gr +28 -29
- package/runtime/debug.gr +0 -1
- package/runtime/equal.gr +37 -16
- package/runtime/exception.gr +28 -15
- package/runtime/gc.gr +33 -20
- package/runtime/malloc.gr +19 -11
- package/runtime/numberUtils.gr +208 -105
- package/runtime/numbers.gr +217 -118
- package/runtime/string.gr +150 -59
- package/runtime/stringUtils.gr +176 -0
- package/runtime/unsafe/conv.gr +51 -8
- package/runtime/unsafe/memory.gr +14 -3
- package/runtime/unsafe/printWasm.gr +4 -4
- package/runtime/unsafe/tags.gr +2 -2
- package/runtime/unsafe/wasmf32.gr +9 -2
- package/runtime/unsafe/wasmf64.gr +9 -2
- package/runtime/unsafe/wasmi32.gr +65 -47
- package/runtime/unsafe/wasmi64.gr +78 -50
- package/runtime/wasi.gr +199 -45
- package/set.gr +281 -119
- package/set.md +502 -0
- package/stack.gr +26 -26
- package/stack.md +143 -0
- package/string.gr +697 -329
- package/string.md +815 -0
- package/sys/file.gr +356 -177
- package/sys/process.gr +10 -6
- package/sys/random.gr +3 -6
- package/sys/time.gr +3 -3
package/string.gr
CHANGED
|
@@ -1,24 +1,71 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @module String: Utilities for working with strings.
|
|
3
|
+
* @example import String from "string"
|
|
4
|
+
*
|
|
5
|
+
* @since v0.2.0
|
|
6
|
+
* @history v0.1.0: Originally named `strings`
|
|
7
|
+
* @history v0.2.0: Renamed to `string`
|
|
8
|
+
*/
|
|
1
9
|
import WasmI32 from "runtime/unsafe/wasmi32"
|
|
2
10
|
import Memory from "runtime/unsafe/memory"
|
|
3
11
|
import Exception from "runtime/exception"
|
|
4
|
-
import {
|
|
12
|
+
import {
|
|
13
|
+
tagSimpleNumber,
|
|
14
|
+
allocateArray,
|
|
15
|
+
allocateChar,
|
|
16
|
+
allocateString,
|
|
17
|
+
allocateBytes,
|
|
18
|
+
} from "runtime/dataStructures"
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @section Types: Type declarations included in the String module.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Byte encodings
|
|
26
|
+
*/
|
|
27
|
+
export enum Encoding {
|
|
28
|
+
UTF8,
|
|
29
|
+
UTF16_BE,
|
|
30
|
+
UTF16_LE,
|
|
31
|
+
UTF32_BE,
|
|
32
|
+
UTF32_LE,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* @section Values: Functions for working with the String data type.
|
|
37
|
+
*/
|
|
5
38
|
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
39
|
+
/**
|
|
40
|
+
* Concatenate two strings.
|
|
41
|
+
*
|
|
42
|
+
* @param str1: The beginning string
|
|
43
|
+
* @param str2: The ending string
|
|
44
|
+
* @returns The combined string
|
|
45
|
+
*
|
|
46
|
+
* @example String.concat("Foo", " Bar") == "FooBar"
|
|
47
|
+
*
|
|
48
|
+
* @since v0.1.0
|
|
49
|
+
*/
|
|
10
50
|
export let concat = (++)
|
|
11
51
|
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
52
|
+
/**
|
|
53
|
+
* Returns the character length of the input string.
|
|
54
|
+
*
|
|
55
|
+
* @param string: The string to inspect
|
|
56
|
+
* @returns The number of characters in the string
|
|
57
|
+
*
|
|
58
|
+
* @example String.length("Hello world") == 11
|
|
59
|
+
*
|
|
60
|
+
* @since v0.1.0
|
|
61
|
+
*/
|
|
15
62
|
@disableGC
|
|
16
|
-
export let rec length = (
|
|
17
|
-
let
|
|
18
|
-
let size = WasmI32.load(
|
|
63
|
+
export let rec length = (string: String) => {
|
|
64
|
+
let string = WasmI32.fromGrain(string)
|
|
65
|
+
let size = WasmI32.load(string, 4n)
|
|
19
66
|
|
|
20
67
|
let mut len = 0n
|
|
21
|
-
let mut ptr = WasmI32.add(
|
|
68
|
+
let mut ptr = WasmI32.add(string, 8n)
|
|
22
69
|
let end = WasmI32.add(ptr, size)
|
|
23
70
|
|
|
24
71
|
while (WasmI32.ltU(ptr, end)) {
|
|
@@ -31,7 +78,7 @@ export let rec length = (s: String) => {
|
|
|
31
78
|
|
|
32
79
|
let ret = tagSimpleNumber(len)
|
|
33
80
|
Memory.decRef(WasmI32.fromGrain(length))
|
|
34
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
81
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
35
82
|
ret
|
|
36
83
|
}
|
|
37
84
|
|
|
@@ -43,38 +90,51 @@ let wasmSafeLength = (s: String) => {
|
|
|
43
90
|
length(s)
|
|
44
91
|
}
|
|
45
92
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
93
|
+
/**
|
|
94
|
+
* Returns the byte length of the input string.
|
|
95
|
+
*
|
|
96
|
+
* @param string: The string to inspect
|
|
97
|
+
* @returns The number of bytes in the string
|
|
98
|
+
*
|
|
99
|
+
* @example String.byteLength("🌾") == 4
|
|
100
|
+
*
|
|
101
|
+
* @since v0.1.0
|
|
102
|
+
*/
|
|
49
103
|
@disableGC
|
|
50
|
-
export let rec byteLength = (
|
|
51
|
-
let
|
|
52
|
-
let ret = tagSimpleNumber(WasmI32.load(
|
|
104
|
+
export let rec byteLength = (string: String) => {
|
|
105
|
+
let string = WasmI32.fromGrain(string)
|
|
106
|
+
let ret = tagSimpleNumber(WasmI32.load(string, 4n))
|
|
53
107
|
Memory.decRef(WasmI32.fromGrain(byteLength))
|
|
54
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
108
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
55
109
|
ret
|
|
56
110
|
}
|
|
57
111
|
|
|
58
|
-
|
|
59
112
|
// @disableGC-safe wrapper
|
|
60
113
|
@disableGC
|
|
61
|
-
let wasmSafeByteLength = (
|
|
114
|
+
let wasmSafeByteLength = (string: String) => {
|
|
62
115
|
Memory.incRef(WasmI32.fromGrain(byteLength))
|
|
63
|
-
Memory.incRef(WasmI32.fromGrain(
|
|
64
|
-
byteLength(
|
|
116
|
+
Memory.incRef(WasmI32.fromGrain(string))
|
|
117
|
+
byteLength(string)
|
|
65
118
|
}
|
|
66
119
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
120
|
+
/**
|
|
121
|
+
* Finds the position of a substring in the input string.
|
|
122
|
+
*
|
|
123
|
+
* @param search: The substring to find
|
|
124
|
+
* @param string: The string to inspect
|
|
125
|
+
* @returns `Some(position)` containing the starting position of the substring if found or `None` otherwise
|
|
126
|
+
*
|
|
127
|
+
* @example String.indexOf("world", "Hello world") == Some(6)
|
|
128
|
+
*
|
|
129
|
+
* @since v0.3.0
|
|
130
|
+
*/
|
|
71
131
|
@disableGC
|
|
72
|
-
export let rec indexOf = (
|
|
73
|
-
let
|
|
74
|
-
let
|
|
132
|
+
export let rec indexOf = (search: String, string: String) => {
|
|
133
|
+
let search = WasmI32.fromGrain(search)
|
|
134
|
+
let string = WasmI32.fromGrain(string)
|
|
75
135
|
|
|
76
|
-
let size = WasmI32.load(
|
|
77
|
-
let psize = WasmI32.load(
|
|
136
|
+
let size = WasmI32.load(string, 4n)
|
|
137
|
+
let psize = WasmI32.load(search, 4n)
|
|
78
138
|
|
|
79
139
|
let (>) = WasmI32.gtU
|
|
80
140
|
let (<) = WasmI32.ltU
|
|
@@ -89,8 +149,8 @@ export let rec indexOf = (p: String, s: String) => {
|
|
|
89
149
|
none
|
|
90
150
|
} else {
|
|
91
151
|
let mut idx = 0n
|
|
92
|
-
let mut ptr =
|
|
93
|
-
let mut pptr =
|
|
152
|
+
let mut ptr = string + 8n
|
|
153
|
+
let mut pptr = search + 8n
|
|
94
154
|
let end = ptr + size - psize + 1n
|
|
95
155
|
|
|
96
156
|
let mut result = -1n
|
|
@@ -122,19 +182,36 @@ export let rec indexOf = (p: String, s: String) => {
|
|
|
122
182
|
Some(tagSimpleNumber(result))
|
|
123
183
|
}
|
|
124
184
|
}
|
|
125
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
126
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
185
|
+
Memory.decRef(WasmI32.fromGrain(search))
|
|
186
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
127
187
|
Memory.decRef(WasmI32.fromGrain(indexOf))
|
|
128
188
|
ret
|
|
129
189
|
}
|
|
130
190
|
|
|
191
|
+
/**
|
|
192
|
+
* Get the character at the position in the input string.
|
|
193
|
+
*
|
|
194
|
+
* @param position: The position to check
|
|
195
|
+
* @param string: The string to search
|
|
196
|
+
* @returns The character at the provided position
|
|
197
|
+
*
|
|
198
|
+
* @example String.charAt(5, "Hello world") == ' '
|
|
199
|
+
*
|
|
200
|
+
* @since v0.4.0
|
|
201
|
+
*/
|
|
131
202
|
@disableGC
|
|
132
|
-
export let rec charAt = (
|
|
203
|
+
export let rec charAt = (position, string: String) => {
|
|
133
204
|
Memory.incRef(WasmI32.fromGrain((<=)))
|
|
134
|
-
if (
|
|
205
|
+
if (
|
|
206
|
+
wasmSafeLength(string) <= position ||
|
|
207
|
+
{
|
|
208
|
+
Memory.incRef(WasmI32.fromGrain((<)))
|
|
209
|
+
position < 0
|
|
210
|
+
}
|
|
211
|
+
) {
|
|
135
212
|
Memory.incRef(WasmI32.fromGrain((++)))
|
|
136
213
|
Memory.incRef(WasmI32.fromGrain(toString))
|
|
137
|
-
fail
|
|
214
|
+
fail "Invalid offset: " ++ toString(position)
|
|
138
215
|
}
|
|
139
216
|
// Implementation is similar to explodeHelp, but doesn't perform unneeded memory allocations
|
|
140
217
|
let (>>>) = WasmI32.shrU
|
|
@@ -142,13 +219,13 @@ export let rec charAt = (idx, s: String) => {
|
|
|
142
219
|
let (&) = WasmI32.and
|
|
143
220
|
let (<) = WasmI32.ltU
|
|
144
221
|
let (==) = WasmI32.eq
|
|
145
|
-
let size = WasmI32.fromGrain(wasmSafeByteLength(
|
|
146
|
-
let len = WasmI32.fromGrain(wasmSafeLength(
|
|
147
|
-
let
|
|
148
|
-
let
|
|
149
|
-
let mut ptr =
|
|
150
|
-
let end = ptr + size
|
|
151
|
-
let mut counter = 0n
|
|
222
|
+
let size = WasmI32.fromGrain(wasmSafeByteLength(string)) >>> 1n
|
|
223
|
+
let len = WasmI32.fromGrain(wasmSafeLength(string)) >>> 1n
|
|
224
|
+
let position = WasmI32.fromGrain(position) >>> 1n
|
|
225
|
+
let string = WasmI32.fromGrain(string)
|
|
226
|
+
let mut ptr = string + 8n
|
|
227
|
+
let end = ptr + size
|
|
228
|
+
let mut counter = 0n
|
|
152
229
|
let mut result = 0n
|
|
153
230
|
while (ptr < end) {
|
|
154
231
|
let byte = WasmI32.load8U(ptr, 0n)
|
|
@@ -161,7 +238,7 @@ export let rec charAt = (idx, s: String) => {
|
|
|
161
238
|
} else {
|
|
162
239
|
2n
|
|
163
240
|
}
|
|
164
|
-
if (counter ==
|
|
241
|
+
if (counter == position) {
|
|
165
242
|
let c = allocateChar()
|
|
166
243
|
Memory.copy(c + 4n, ptr, n)
|
|
167
244
|
result = c
|
|
@@ -174,7 +251,7 @@ export let rec charAt = (idx, s: String) => {
|
|
|
174
251
|
fail "charAt: should be impossible (please report)"
|
|
175
252
|
}
|
|
176
253
|
let ret = WasmI32.toGrain(result): Char
|
|
177
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
254
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
178
255
|
Memory.decRef(WasmI32.fromGrain(charAt))
|
|
179
256
|
ret
|
|
180
257
|
}
|
|
@@ -228,17 +305,37 @@ let explodeHelp = (s: String, chars) => {
|
|
|
228
305
|
arr
|
|
229
306
|
}
|
|
230
307
|
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
308
|
+
/**
|
|
309
|
+
* Split a string into its Unicode characters.
|
|
310
|
+
*
|
|
311
|
+
* @param string: The string to split
|
|
312
|
+
* @returns An array containing all characters in the string
|
|
313
|
+
*
|
|
314
|
+
* @example String.explode("Hello") == [> 'H', 'e', 'l', 'l', 'o']
|
|
315
|
+
*
|
|
316
|
+
* @since v0.3.0
|
|
317
|
+
*/
|
|
234
318
|
@disableGC
|
|
235
|
-
export let explode =
|
|
236
|
-
|
|
319
|
+
export let rec explode = string => {
|
|
320
|
+
// `explodeHelp` and `string` do not need to be incRef'd as they are not
|
|
321
|
+
// decRef'd in `explodeHelp`
|
|
322
|
+
let ret = WasmI32.toGrain(explodeHelp(string, true)): (Array<Char>)
|
|
323
|
+
|
|
324
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
325
|
+
Memory.decRef(WasmI32.fromGrain(explode))
|
|
326
|
+
ret
|
|
237
327
|
}
|
|
238
328
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
329
|
+
/**
|
|
330
|
+
* Create a string from an array of characters.
|
|
331
|
+
*
|
|
332
|
+
* @param arr: The array to combine
|
|
333
|
+
* @returns A string representation of the array of characters
|
|
334
|
+
*
|
|
335
|
+
* @example String.implode([> 'H', 'e', 'l', 'l', 'o']) == "Hello"
|
|
336
|
+
*
|
|
337
|
+
* @since v0.3.0
|
|
338
|
+
*/
|
|
242
339
|
@disableGC
|
|
243
340
|
export let rec implode = (arr: Array<Char>) => {
|
|
244
341
|
let (+) = WasmI32.add
|
|
@@ -297,12 +394,44 @@ export let rec implode = (arr: Array<Char>) => {
|
|
|
297
394
|
ret
|
|
298
395
|
}
|
|
299
396
|
|
|
300
|
-
//
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
397
|
+
// Helper to get the length in constant time without depending on Array
|
|
398
|
+
primitive arrayLength: Array<a> -> Number = "@array.length"
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* Create a string that is the given string reversed.
|
|
402
|
+
*
|
|
403
|
+
* @param string: The string to reverse
|
|
404
|
+
* @returns A string whose characters are in the reverse order of the given string
|
|
405
|
+
*
|
|
406
|
+
* @example String.reverse("olleH") == "Hello"
|
|
407
|
+
*
|
|
408
|
+
* @since v0.4.5
|
|
409
|
+
*/
|
|
410
|
+
export let reverse = string => {
|
|
411
|
+
let mut arr = explode(string)
|
|
412
|
+
let len = arrayLength(arr)
|
|
413
|
+
let halfLen = len / 2
|
|
414
|
+
for (let mut i = 0; i < halfLen; i += 1) {
|
|
415
|
+
let lastIdx = len - i - 1
|
|
416
|
+
let last = arr[lastIdx]
|
|
417
|
+
let first = arr[i]
|
|
418
|
+
arr[i] = last
|
|
419
|
+
arr[lastIdx] = first
|
|
420
|
+
}
|
|
421
|
+
implode(arr)
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
/**
|
|
425
|
+
* Split a string by the given separator.
|
|
426
|
+
*
|
|
427
|
+
* @param separator: The separator to split on
|
|
428
|
+
* @param string: The string to split
|
|
429
|
+
* @returns An array of substrings from the initial string
|
|
430
|
+
*
|
|
431
|
+
* @example String.split(" ", "Hello world") == [> "Hello", "world"]
|
|
432
|
+
*/
|
|
304
433
|
@disableGC
|
|
305
|
-
export let rec split = (
|
|
434
|
+
export let rec split = (separator: String, string: String) => {
|
|
306
435
|
let (+) = WasmI32.add
|
|
307
436
|
let (-) = WasmI32.sub
|
|
308
437
|
let (==) = WasmI32.eq
|
|
@@ -312,22 +441,22 @@ export let rec split = (p: String, s: String) => {
|
|
|
312
441
|
let (>>) = WasmI32.shrS
|
|
313
442
|
let (&) = WasmI32.and
|
|
314
443
|
|
|
315
|
-
let size = WasmI32.fromGrain(wasmSafeByteLength(
|
|
316
|
-
let psize = WasmI32.fromGrain(wasmSafeByteLength(
|
|
444
|
+
let size = WasmI32.fromGrain(wasmSafeByteLength(string)) >> 1n
|
|
445
|
+
let psize = WasmI32.fromGrain(wasmSafeByteLength(separator)) >> 1n
|
|
317
446
|
|
|
318
447
|
let ret = if (psize == 0n) {
|
|
319
|
-
WasmI32.toGrain(explodeHelp(
|
|
448
|
+
WasmI32.toGrain(explodeHelp(string, false)): (Array<String>)
|
|
320
449
|
} else if (psize > size) {
|
|
321
|
-
let
|
|
450
|
+
let string = WasmI32.fromGrain(string)
|
|
322
451
|
let ptr = allocateArray(1n)
|
|
323
|
-
WasmI32.store(ptr, Memory.incRef(
|
|
324
|
-
WasmI32.toGrain(ptr): Array<String>
|
|
452
|
+
WasmI32.store(ptr, Memory.incRef(string), 8n)
|
|
453
|
+
WasmI32.toGrain(ptr): (Array<String>)
|
|
325
454
|
} else {
|
|
326
|
-
let
|
|
327
|
-
let
|
|
455
|
+
let string = WasmI32.fromGrain(string)
|
|
456
|
+
let separator = WasmI32.fromGrain(separator)
|
|
328
457
|
|
|
329
|
-
let mut ptr =
|
|
330
|
-
let mut pptr =
|
|
458
|
+
let mut ptr = string + 8n
|
|
459
|
+
let mut pptr = separator + 8n
|
|
331
460
|
let end = ptr + size - psize + 1n
|
|
332
461
|
|
|
333
462
|
let mut numStrings = 1n
|
|
@@ -348,7 +477,7 @@ export let rec split = (p: String, s: String) => {
|
|
|
348
477
|
}
|
|
349
478
|
}
|
|
350
479
|
|
|
351
|
-
ptr =
|
|
480
|
+
ptr = string + 8n
|
|
352
481
|
let mut last = ptr
|
|
353
482
|
let arr = allocateArray(numStrings)
|
|
354
483
|
let mut arrIdx = 0n
|
|
@@ -377,26 +506,33 @@ export let rec split = (p: String, s: String) => {
|
|
|
377
506
|
}
|
|
378
507
|
|
|
379
508
|
// Grab last string
|
|
380
|
-
let strSize =
|
|
509
|
+
let strSize = string + 8n + size - last
|
|
381
510
|
let lastStr = allocateString(strSize)
|
|
382
511
|
Memory.copy(lastStr + 8n, last, strSize)
|
|
383
512
|
WasmI32.store(arr + arrIdx, lastStr, 8n)
|
|
384
513
|
|
|
385
|
-
WasmI32.toGrain(arr): Array<String>
|
|
514
|
+
WasmI32.toGrain(arr): (Array<String>)
|
|
386
515
|
}
|
|
387
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
388
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
516
|
+
Memory.decRef(WasmI32.fromGrain(separator))
|
|
517
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
389
518
|
Memory.decRef(WasmI32.fromGrain(split))
|
|
390
519
|
ret
|
|
391
520
|
}
|
|
392
521
|
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
522
|
+
/**
|
|
523
|
+
* Get a portion of a string.
|
|
524
|
+
*
|
|
525
|
+
* @param start: The start position of the substring
|
|
526
|
+
* @param to: The end position of the substring, exclusive
|
|
527
|
+
* @param string: The input string
|
|
528
|
+
* @returns The substring from the initial string
|
|
529
|
+
*
|
|
530
|
+
* @example String.slice(0, 5, "Hello world") == "Hello"
|
|
531
|
+
*
|
|
532
|
+
* @since v0.1.0
|
|
533
|
+
*/
|
|
398
534
|
@disableGC
|
|
399
|
-
export let rec slice = (start: Number, to: Number,
|
|
535
|
+
export let rec slice = (start: Number, to: Number, string: String) => {
|
|
400
536
|
let (+) = WasmI32.add
|
|
401
537
|
let (-) = WasmI32.sub
|
|
402
538
|
let (==) = WasmI32.eq
|
|
@@ -409,10 +545,10 @@ export let rec slice = (start: Number, to: Number, s: String) => {
|
|
|
409
545
|
let startOrig = start
|
|
410
546
|
let toOrig = to
|
|
411
547
|
|
|
412
|
-
let len = WasmI32.fromGrain(wasmSafeLength(
|
|
413
|
-
let size = WasmI32.fromGrain(wasmSafeByteLength(
|
|
548
|
+
let len = WasmI32.fromGrain(wasmSafeLength(string)) >> 1n
|
|
549
|
+
let size = WasmI32.fromGrain(wasmSafeByteLength(string)) >> 1n
|
|
414
550
|
|
|
415
|
-
let
|
|
551
|
+
let string = WasmI32.fromGrain(string)
|
|
416
552
|
|
|
417
553
|
let mut start = WasmI32.fromGrain(start)
|
|
418
554
|
if ((start & 1n) != 1n) {
|
|
@@ -441,7 +577,7 @@ export let rec slice = (start: Number, to: Number, s: String) => {
|
|
|
441
577
|
throw InvalidArgument("Start index exceeds end index")
|
|
442
578
|
}
|
|
443
579
|
|
|
444
|
-
let mut ptr =
|
|
580
|
+
let mut ptr = string + 8n
|
|
445
581
|
let mut begin = ptr
|
|
446
582
|
let mut end = ptr
|
|
447
583
|
let stop = ptr + size
|
|
@@ -462,7 +598,7 @@ export let rec slice = (start: Number, to: Number, s: String) => {
|
|
|
462
598
|
ptr += 1n
|
|
463
599
|
}
|
|
464
600
|
if (to == len) {
|
|
465
|
-
end =
|
|
601
|
+
end = string + 8n + size
|
|
466
602
|
}
|
|
467
603
|
if (start == to) {
|
|
468
604
|
begin = end
|
|
@@ -476,17 +612,24 @@ export let rec slice = (start: Number, to: Number, s: String) => {
|
|
|
476
612
|
let ret = WasmI32.toGrain(newString): String
|
|
477
613
|
Memory.decRef(WasmI32.fromGrain(startOrig))
|
|
478
614
|
Memory.decRef(WasmI32.fromGrain(toOrig))
|
|
479
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
615
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
480
616
|
Memory.decRef(WasmI32.fromGrain(slice))
|
|
481
617
|
ret
|
|
482
618
|
}
|
|
483
619
|
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
620
|
+
/**
|
|
621
|
+
* Check if a string contains a substring.
|
|
622
|
+
*
|
|
623
|
+
* @param search: The substring to check
|
|
624
|
+
* @param string: The string to search
|
|
625
|
+
* @returns `true` if the input string contains the search value or `false` otherwise
|
|
626
|
+
*
|
|
627
|
+
* @example String.contains("world", "Hello world") == true
|
|
628
|
+
*
|
|
629
|
+
* @since v0.1.0
|
|
630
|
+
*/
|
|
488
631
|
@disableGC
|
|
489
|
-
export let rec contains = (
|
|
632
|
+
export let rec contains = (search: String, string: String) => {
|
|
490
633
|
// "Not So Naive" string search algorithm
|
|
491
634
|
// searching phase in O(nm) time complexity
|
|
492
635
|
// slightly (by coefficient) sub-linear in the average case
|
|
@@ -500,17 +643,17 @@ export let rec contains = (p: String, s: String) => {
|
|
|
500
643
|
let (<=) = WasmI32.leU
|
|
501
644
|
let (>) = WasmI32.gtU
|
|
502
645
|
let (>>) = WasmI32.shrS
|
|
503
|
-
let pOrig =
|
|
504
|
-
let sOrig =
|
|
646
|
+
let pOrig = search
|
|
647
|
+
let sOrig = string
|
|
505
648
|
|
|
506
|
-
let n = WasmI32.fromGrain(wasmSafeByteLength(
|
|
507
|
-
let m = WasmI32.fromGrain(wasmSafeByteLength(
|
|
649
|
+
let n = WasmI32.fromGrain(wasmSafeByteLength(string)) >> 1n
|
|
650
|
+
let m = WasmI32.fromGrain(wasmSafeByteLength(search)) >> 1n
|
|
508
651
|
|
|
509
|
-
let mut
|
|
510
|
-
let mut
|
|
652
|
+
let mut string = WasmI32.fromGrain(string)
|
|
653
|
+
let mut search = WasmI32.fromGrain(search)
|
|
511
654
|
|
|
512
|
-
|
|
513
|
-
|
|
655
|
+
string += 8n
|
|
656
|
+
search += 8n
|
|
514
657
|
|
|
515
658
|
let mut j = 0n, k = 0n, ell = 0n
|
|
516
659
|
|
|
@@ -522,10 +665,10 @@ export let rec contains = (p: String, s: String) => {
|
|
|
522
665
|
if (m == 0n) {
|
|
523
666
|
true
|
|
524
667
|
} else {
|
|
525
|
-
let pat = WasmI32.load8U(
|
|
668
|
+
let pat = WasmI32.load8U(search, 0n)
|
|
526
669
|
let mut result = false
|
|
527
670
|
while (j < n) {
|
|
528
|
-
if (pat == WasmI32.load8U(
|
|
671
|
+
if (pat == WasmI32.load8U(string + j, 0n)) {
|
|
529
672
|
result = true
|
|
530
673
|
break
|
|
531
674
|
} else {
|
|
@@ -536,7 +679,7 @@ export let rec contains = (p: String, s: String) => {
|
|
|
536
679
|
}
|
|
537
680
|
} else {
|
|
538
681
|
// NSM preprocessing
|
|
539
|
-
if (WasmI32.load8U(
|
|
682
|
+
if (WasmI32.load8U(search, 0n) == WasmI32.load8U(search, 1n)) {
|
|
540
683
|
k = 2n
|
|
541
684
|
ell = 1n
|
|
542
685
|
} else {
|
|
@@ -547,10 +690,13 @@ export let rec contains = (p: String, s: String) => {
|
|
|
547
690
|
let mut result = false
|
|
548
691
|
// NSM searching
|
|
549
692
|
while (j <= n - m) {
|
|
550
|
-
if (WasmI32.load8U(
|
|
693
|
+
if (WasmI32.load8U(search, 1n) != WasmI32.load8U(string + j, 1n)) {
|
|
551
694
|
j += k
|
|
552
695
|
} else {
|
|
553
|
-
if (
|
|
696
|
+
if (
|
|
697
|
+
Memory.compare(search + 2n, string + j + 2n, m - 2n) == 0n &&
|
|
698
|
+
WasmI32.load8U(search, 0n) == WasmI32.load8U(string + j, 0n)
|
|
699
|
+
) {
|
|
554
700
|
result = true
|
|
555
701
|
break
|
|
556
702
|
}
|
|
@@ -565,32 +711,39 @@ export let rec contains = (p: String, s: String) => {
|
|
|
565
711
|
ret
|
|
566
712
|
}
|
|
567
713
|
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
714
|
+
/**
|
|
715
|
+
* Check if a string begins with another string.
|
|
716
|
+
*
|
|
717
|
+
* @param search: The string to compare to the start
|
|
718
|
+
* @param string: The string to search
|
|
719
|
+
* @returns `true` if the input string starts with the search value or `false` otherwise
|
|
720
|
+
*
|
|
721
|
+
* @example String.startsWith("Hello", "Hello world") == true
|
|
722
|
+
*
|
|
723
|
+
* @since v0.1.0
|
|
724
|
+
*/
|
|
572
725
|
@disableGC
|
|
573
|
-
export let rec startsWith = (
|
|
726
|
+
export let rec startsWith = (search: String, string: String) => {
|
|
574
727
|
let (+) = WasmI32.add
|
|
575
728
|
let (>) = WasmI32.gtU
|
|
576
729
|
let (==) = WasmI32.eq
|
|
577
|
-
let pOrig =
|
|
578
|
-
let sOrig =
|
|
730
|
+
let pOrig = search
|
|
731
|
+
let sOrig = string
|
|
579
732
|
|
|
580
|
-
let mut
|
|
581
|
-
let mut
|
|
733
|
+
let mut search = WasmI32.fromGrain(search)
|
|
734
|
+
let mut string = WasmI32.fromGrain(string)
|
|
582
735
|
|
|
583
|
-
let n = WasmI32.load(
|
|
584
|
-
let m = WasmI32.load(
|
|
736
|
+
let n = WasmI32.load(string, 4n)
|
|
737
|
+
let m = WasmI32.load(search, 4n)
|
|
585
738
|
|
|
586
|
-
|
|
587
|
-
|
|
739
|
+
string += 8n
|
|
740
|
+
search += 8n
|
|
588
741
|
|
|
589
742
|
// Bail if pattern length is longer than input length
|
|
590
743
|
let ret = if (m > n) {
|
|
591
744
|
false
|
|
592
745
|
} else {
|
|
593
|
-
Memory.compare(
|
|
746
|
+
Memory.compare(search, string, m) == 0n
|
|
594
747
|
}
|
|
595
748
|
Memory.decRef(WasmI32.fromGrain(pOrig))
|
|
596
749
|
Memory.decRef(WasmI32.fromGrain(sOrig))
|
|
@@ -598,33 +751,40 @@ export let rec startsWith = (p: String, s: String) => {
|
|
|
598
751
|
ret
|
|
599
752
|
}
|
|
600
753
|
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
754
|
+
/**
|
|
755
|
+
* Check if a string ends with another string.
|
|
756
|
+
*
|
|
757
|
+
* @param search: The string to compare to the end
|
|
758
|
+
* @param string: The string to search
|
|
759
|
+
* @returns `true` if the input string ends with the search value or `false` otherwise
|
|
760
|
+
*
|
|
761
|
+
* @example String.endsWith("world", "Hello world") == true
|
|
762
|
+
*
|
|
763
|
+
* @since v0.1.0
|
|
764
|
+
*/
|
|
605
765
|
@disableGC
|
|
606
|
-
export let rec endsWith = (
|
|
766
|
+
export let rec endsWith = (search: String, string: String) => {
|
|
607
767
|
let (+) = WasmI32.add
|
|
608
768
|
let (-) = WasmI32.sub
|
|
609
769
|
let (>) = WasmI32.gtU
|
|
610
770
|
let (==) = WasmI32.eq
|
|
611
|
-
let pOrig =
|
|
612
|
-
let sOrig =
|
|
771
|
+
let pOrig = search
|
|
772
|
+
let sOrig = string
|
|
613
773
|
|
|
614
|
-
let mut
|
|
615
|
-
let mut
|
|
774
|
+
let mut search = WasmI32.fromGrain(search)
|
|
775
|
+
let mut string = WasmI32.fromGrain(string)
|
|
616
776
|
|
|
617
|
-
let n = WasmI32.load(
|
|
618
|
-
let m = WasmI32.load(
|
|
777
|
+
let n = WasmI32.load(string, 4n)
|
|
778
|
+
let m = WasmI32.load(search, 4n)
|
|
619
779
|
|
|
620
|
-
|
|
621
|
-
|
|
780
|
+
string += 8n
|
|
781
|
+
search += 8n
|
|
622
782
|
|
|
623
783
|
// Bail if pattern length is longer than input length
|
|
624
784
|
let ret = if (m > n) {
|
|
625
785
|
false
|
|
626
786
|
} else {
|
|
627
|
-
Memory.compare(
|
|
787
|
+
Memory.compare(search, string + n - m, m) == 0n
|
|
628
788
|
}
|
|
629
789
|
Memory.decRef(WasmI32.fromGrain(pOrig))
|
|
630
790
|
Memory.decRef(WasmI32.fromGrain(sOrig))
|
|
@@ -632,14 +792,6 @@ export let rec endsWith = (p: String, s: String) => {
|
|
|
632
792
|
ret
|
|
633
793
|
}
|
|
634
794
|
|
|
635
|
-
export enum Encoding {
|
|
636
|
-
UTF8,
|
|
637
|
-
UTF16_BE,
|
|
638
|
-
UTF16_LE,
|
|
639
|
-
UTF32_BE,
|
|
640
|
-
UTF32_LE,
|
|
641
|
-
}
|
|
642
|
-
|
|
643
795
|
// String->Byte encoding and helper functions:
|
|
644
796
|
|
|
645
797
|
// these are globals to avoid memory leaks
|
|
@@ -712,7 +864,7 @@ let utf16Length = (s: String) => {
|
|
|
712
864
|
|
|
713
865
|
@disableGC
|
|
714
866
|
let encodedLength = (s: String, encoding) => {
|
|
715
|
-
match(encoding) {
|
|
867
|
+
match (encoding) {
|
|
716
868
|
UTF32_BE => {
|
|
717
869
|
Memory.incRef(WasmI32.fromGrain((*)))
|
|
718
870
|
wasmSafeLength(s) * 4
|
|
@@ -775,13 +927,12 @@ let getCodePoint = (ptr: WasmI32) => {
|
|
|
775
927
|
}
|
|
776
928
|
continue
|
|
777
929
|
}
|
|
778
|
-
Memory.incRef(WasmI32.fromGrain((!)))
|
|
779
930
|
if (!(lowerBoundary <= byte && byte <= upperBoundary)) {
|
|
780
931
|
throw MalformedUnicode
|
|
781
932
|
}
|
|
782
933
|
lowerBoundary = 0x80n
|
|
783
934
|
upperBoundary = 0xBFn
|
|
784
|
-
codePoint =
|
|
935
|
+
codePoint = codePoint << 6n | byte & 0x3Fn
|
|
785
936
|
bytesSeen += 1n
|
|
786
937
|
if (bytesSeen == bytesNeeded) {
|
|
787
938
|
result = codePoint
|
|
@@ -793,26 +944,26 @@ let getCodePoint = (ptr: WasmI32) => {
|
|
|
793
944
|
|
|
794
945
|
// hack to avoid incRef on this pointer
|
|
795
946
|
@disableGC
|
|
796
|
-
let mut _BYTES_SIZE_OFFSET = 1n
|
|
947
|
+
let mut _BYTES_SIZE_OFFSET = 1n
|
|
797
948
|
@disableGC
|
|
798
|
-
let mut _BYTES_OFFSET = 1n
|
|
949
|
+
let mut _BYTES_OFFSET = 1n
|
|
799
950
|
|
|
800
951
|
@disableGC
|
|
801
952
|
let initPtr = () => {
|
|
802
953
|
_BYTES_SIZE_OFFSET = 4n
|
|
803
954
|
_BYTES_OFFSET = 8n
|
|
804
955
|
}
|
|
805
|
-
initPtr()
|
|
806
|
-
|
|
807
|
-
// Encodes the given string using the given encoding scheme
|
|
808
|
-
// @param s: String - The input string
|
|
809
|
-
// @param encoding: Encoding - The encoding to use
|
|
810
|
-
// @param includeBom: Bool - Whether to include the byte-order marker in the encoded output
|
|
811
|
-
// @param dest: Bytes - The bytes object to write the encoded output into
|
|
812
|
-
// @param destPos: Number - The location in the byte array to write the output
|
|
813
|
-
// @returns Bytes - Returns `dest`
|
|
956
|
+
initPtr()
|
|
957
|
+
|
|
814
958
|
@disableGC
|
|
815
|
-
let rec encodeAtHelp =
|
|
959
|
+
let rec encodeAtHelp =
|
|
960
|
+
(
|
|
961
|
+
string: String,
|
|
962
|
+
encoding: Encoding,
|
|
963
|
+
includeBom: Bool,
|
|
964
|
+
dest: Bytes,
|
|
965
|
+
destPos: Number,
|
|
966
|
+
) => {
|
|
816
967
|
let (>>>) = WasmI32.shrU
|
|
817
968
|
let (-) = WasmI32.sub
|
|
818
969
|
let (&) = WasmI32.and
|
|
@@ -821,12 +972,12 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
821
972
|
let (<=) = WasmI32.leU
|
|
822
973
|
let (==) = WasmI32.eq
|
|
823
974
|
let (+) = WasmI32.add
|
|
824
|
-
let byteSize = WasmI32.fromGrain(wasmSafeByteLength(
|
|
825
|
-
let len = WasmI32.fromGrain(wasmSafeLength(
|
|
975
|
+
let byteSize = WasmI32.fromGrain(wasmSafeByteLength(string)) >>> 1n
|
|
976
|
+
let len = WasmI32.fromGrain(wasmSafeLength(string)) >>> 1n
|
|
826
977
|
|
|
827
|
-
let
|
|
978
|
+
let string = WasmI32.fromGrain(string)
|
|
828
979
|
|
|
829
|
-
let mut ptr =
|
|
980
|
+
let mut ptr = string + 8n
|
|
830
981
|
let end = ptr + byteSize
|
|
831
982
|
|
|
832
983
|
let bytes = WasmI32.fromGrain(dest)
|
|
@@ -835,9 +986,9 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
835
986
|
let destSize = WasmI32.load(bytes, _BYTES_SIZE_OFFSET)
|
|
836
987
|
|
|
837
988
|
if (includeBom) {
|
|
838
|
-
match(encoding) {
|
|
989
|
+
match (encoding) {
|
|
839
990
|
UTF8 => {
|
|
840
|
-
if (
|
|
991
|
+
if (bytesIdx + 3n > destSize) {
|
|
841
992
|
throw Exception.IndexOutOfBounds
|
|
842
993
|
}
|
|
843
994
|
WasmI32.store8(bytes + bytesIdx, 0xEFn, _BYTES_OFFSET)
|
|
@@ -846,7 +997,7 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
846
997
|
bytesIdx += 3n
|
|
847
998
|
},
|
|
848
999
|
UTF16_BE => {
|
|
849
|
-
if (
|
|
1000
|
+
if (bytesIdx + 2n > destSize) {
|
|
850
1001
|
throw Exception.IndexOutOfBounds
|
|
851
1002
|
}
|
|
852
1003
|
WasmI32.store8(bytes + bytesIdx, 0xFEn, _BYTES_OFFSET)
|
|
@@ -854,7 +1005,7 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
854
1005
|
bytesIdx += 2n
|
|
855
1006
|
},
|
|
856
1007
|
UTF16_LE => {
|
|
857
|
-
if (
|
|
1008
|
+
if (bytesIdx + 2n > destSize) {
|
|
858
1009
|
throw Exception.IndexOutOfBounds
|
|
859
1010
|
}
|
|
860
1011
|
WasmI32.store8(bytes + bytesIdx, 0xFFn, _BYTES_OFFSET)
|
|
@@ -862,7 +1013,7 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
862
1013
|
bytesIdx += 2n
|
|
863
1014
|
},
|
|
864
1015
|
UTF32_BE => {
|
|
865
|
-
if (
|
|
1016
|
+
if (bytesIdx + 4n > destSize) {
|
|
866
1017
|
throw Exception.IndexOutOfBounds
|
|
867
1018
|
}
|
|
868
1019
|
WasmI32.store8(bytes + bytesIdx, 0n, _BYTES_OFFSET)
|
|
@@ -872,7 +1023,7 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
872
1023
|
bytesIdx += 4n
|
|
873
1024
|
},
|
|
874
1025
|
UTF32_LE => {
|
|
875
|
-
if (
|
|
1026
|
+
if (bytesIdx + 4n > destSize) {
|
|
876
1027
|
throw Exception.IndexOutOfBounds
|
|
877
1028
|
}
|
|
878
1029
|
WasmI32.store8(bytes + bytesIdx, 0xFFn, _BYTES_OFFSET)
|
|
@@ -880,16 +1031,16 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
880
1031
|
WasmI32.store8(bytes + bytesIdx + 2n, 0n, _BYTES_OFFSET)
|
|
881
1032
|
WasmI32.store8(bytes + bytesIdx + 3n, 0n, _BYTES_OFFSET)
|
|
882
1033
|
bytesIdx += 4n
|
|
883
|
-
}
|
|
1034
|
+
},
|
|
884
1035
|
}
|
|
885
1036
|
}
|
|
886
1037
|
|
|
887
|
-
match(encoding) {
|
|
1038
|
+
match (encoding) {
|
|
888
1039
|
UTF8 => {
|
|
889
1040
|
// Optimization: since internally strings in Grain are UTF8 encoded, when
|
|
890
1041
|
// the target encoding is UTF8 as well, then copy the entire memory range
|
|
891
1042
|
// in bulk. No need to iterate individual characters.
|
|
892
|
-
if (
|
|
1043
|
+
if (bytesIdx + byteSize > destSize) {
|
|
893
1044
|
throw Exception.IndexOutOfBounds
|
|
894
1045
|
}
|
|
895
1046
|
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, byteSize)
|
|
@@ -907,37 +1058,55 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
907
1058
|
} else {
|
|
908
1059
|
2n
|
|
909
1060
|
}
|
|
910
|
-
match(encoding) {
|
|
1061
|
+
match (encoding) {
|
|
911
1062
|
UTF8 => {
|
|
912
1063
|
// With the optimization above for bulk memory copy, this match
|
|
913
1064
|
// should never occur for the UTF8 case.
|
|
914
|
-
if (
|
|
1065
|
+
if (bytesIdx + n > destSize) {
|
|
915
1066
|
throw Exception.IndexOutOfBounds
|
|
916
1067
|
}
|
|
917
|
-
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, n)
|
|
1068
|
+
Memory.copy(bytes + bytesIdx + _BYTES_OFFSET, ptr, n)
|
|
918
1069
|
bytesIdx += n
|
|
919
1070
|
},
|
|
920
1071
|
UTF16_BE => {
|
|
921
1072
|
let codePoint = getCodePoint(ptr)
|
|
922
1073
|
if (codePoint <= 0xFFFFn) {
|
|
923
1074
|
// <hi><lo>
|
|
924
|
-
if (
|
|
1075
|
+
if (bytesIdx + 2n > destSize) {
|
|
925
1076
|
throw Exception.IndexOutOfBounds
|
|
926
1077
|
}
|
|
927
|
-
WasmI32.store8(
|
|
928
|
-
|
|
1078
|
+
WasmI32.store8(
|
|
1079
|
+
bytes + bytesIdx,
|
|
1080
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1081
|
+
_BYTES_OFFSET
|
|
1082
|
+
)
|
|
1083
|
+
WasmI32.store8(
|
|
1084
|
+
bytes + bytesIdx + 1n,
|
|
1085
|
+
codePoint & 0xffn,
|
|
1086
|
+
_BYTES_OFFSET
|
|
1087
|
+
)
|
|
929
1088
|
bytesIdx += 2n
|
|
930
1089
|
} else {
|
|
931
1090
|
// https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
|
|
932
|
-
if (
|
|
1091
|
+
if (bytesIdx + 4n > destSize) {
|
|
933
1092
|
throw Exception.IndexOutOfBounds
|
|
934
1093
|
}
|
|
935
1094
|
let uPrime = codePoint - 0x10000n
|
|
936
|
-
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
937
|
-
|
|
938
|
-
|
|
1095
|
+
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1096
|
+
0xD800n // High surrogate
|
|
1097
|
+
let w2 = (uPrime & 0b00000000001111111111n) +
|
|
1098
|
+
0xDC00n // Low surrogate
|
|
1099
|
+
WasmI32.store8(
|
|
1100
|
+
bytes + bytesIdx,
|
|
1101
|
+
(w1 & 0xff00n) >>> 8n,
|
|
1102
|
+
_BYTES_OFFSET
|
|
1103
|
+
)
|
|
939
1104
|
WasmI32.store8(bytes + bytesIdx + 1n, w1 & 0xffn, _BYTES_OFFSET)
|
|
940
|
-
WasmI32.store8(
|
|
1105
|
+
WasmI32.store8(
|
|
1106
|
+
bytes + bytesIdx + 2n,
|
|
1107
|
+
(w2 & 0xff00n) >>> 8n,
|
|
1108
|
+
_BYTES_OFFSET
|
|
1109
|
+
)
|
|
941
1110
|
WasmI32.store8(bytes + bytesIdx + 3n, w2 & 0xffn, _BYTES_OFFSET)
|
|
942
1111
|
bytesIdx += 4n
|
|
943
1112
|
}
|
|
@@ -945,133 +1114,192 @@ let rec encodeAtHelp = (s: String, encoding: Encoding, includeBom: Bool, dest: B
|
|
|
945
1114
|
UTF16_LE => {
|
|
946
1115
|
let codePoint = getCodePoint(ptr)
|
|
947
1116
|
if (codePoint <= 0xFFFFn) {
|
|
948
|
-
if (
|
|
1117
|
+
if (bytesIdx + 2n > destSize) {
|
|
949
1118
|
throw Exception.IndexOutOfBounds
|
|
950
1119
|
}
|
|
951
1120
|
// <lo><hi>
|
|
952
1121
|
WasmI32.store8(bytes + bytesIdx, codePoint & 0xffn, _BYTES_OFFSET)
|
|
953
|
-
WasmI32.store8(
|
|
1122
|
+
WasmI32.store8(
|
|
1123
|
+
bytes + bytesIdx + 1n,
|
|
1124
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1125
|
+
_BYTES_OFFSET
|
|
1126
|
+
)
|
|
954
1127
|
bytesIdx += 2n
|
|
955
1128
|
} else {
|
|
956
1129
|
// https://en.wikipedia.org/wiki/UTF-16#Code_points_from_U+010000_to_U+10FFFF
|
|
957
|
-
if (
|
|
1130
|
+
if (bytesIdx + 4n > destSize) {
|
|
958
1131
|
throw Exception.IndexOutOfBounds
|
|
959
1132
|
}
|
|
960
1133
|
let uPrime = codePoint - 0x10000n
|
|
961
|
-
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
962
|
-
|
|
1134
|
+
let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) +
|
|
1135
|
+
0xD800n // High surrogate
|
|
1136
|
+
let w2 = (uPrime & 0b00000000001111111111n) +
|
|
1137
|
+
0xDC00n // Low surrogate
|
|
963
1138
|
WasmI32.store8(bytes + bytesIdx, w1 & 0xffn, _BYTES_OFFSET)
|
|
964
|
-
WasmI32.store8(
|
|
1139
|
+
WasmI32.store8(
|
|
1140
|
+
bytes + bytesIdx + 1n,
|
|
1141
|
+
(w1 & 0xff00n) >>> 8n,
|
|
1142
|
+
_BYTES_OFFSET
|
|
1143
|
+
)
|
|
965
1144
|
WasmI32.store8(bytes + bytesIdx + 2n, w2 & 0xffn, _BYTES_OFFSET)
|
|
966
|
-
WasmI32.store8(
|
|
1145
|
+
WasmI32.store8(
|
|
1146
|
+
bytes + bytesIdx + 3n,
|
|
1147
|
+
(w2 & 0xff00n) >>> 8n,
|
|
1148
|
+
_BYTES_OFFSET
|
|
1149
|
+
)
|
|
967
1150
|
bytesIdx += 4n
|
|
968
1151
|
}
|
|
969
1152
|
},
|
|
970
1153
|
UTF32_BE => {
|
|
971
|
-
if (
|
|
1154
|
+
if (bytesIdx + 4n > destSize) {
|
|
972
1155
|
throw Exception.IndexOutOfBounds
|
|
973
1156
|
}
|
|
974
1157
|
let codePoint = getCodePoint(ptr)
|
|
975
|
-
WasmI32.store8(
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
1158
|
+
WasmI32.store8(
|
|
1159
|
+
bytes + bytesIdx,
|
|
1160
|
+
(codePoint & 0xff000000n) >>> 24n,
|
|
1161
|
+
_BYTES_OFFSET
|
|
1162
|
+
)
|
|
1163
|
+
WasmI32.store8(
|
|
1164
|
+
bytes + bytesIdx + 1n,
|
|
1165
|
+
(codePoint & 0xff0000n) >>> 16n,
|
|
1166
|
+
_BYTES_OFFSET
|
|
1167
|
+
)
|
|
1168
|
+
WasmI32.store8(
|
|
1169
|
+
bytes + bytesIdx + 2n,
|
|
1170
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1171
|
+
_BYTES_OFFSET
|
|
1172
|
+
)
|
|
1173
|
+
WasmI32.store8(
|
|
1174
|
+
bytes + bytesIdx + 3n,
|
|
1175
|
+
codePoint & 0xffn,
|
|
1176
|
+
_BYTES_OFFSET
|
|
1177
|
+
)
|
|
979
1178
|
bytesIdx += 4n
|
|
980
1179
|
},
|
|
981
1180
|
UTF32_LE => {
|
|
982
|
-
if (
|
|
1181
|
+
if (bytesIdx + 4n > destSize) {
|
|
983
1182
|
throw Exception.IndexOutOfBounds
|
|
984
1183
|
}
|
|
985
1184
|
let codePoint = getCodePoint(ptr)
|
|
986
1185
|
WasmI32.store8(bytes + bytesIdx, codePoint & 0xffn, _BYTES_OFFSET)
|
|
987
|
-
WasmI32.store8(
|
|
988
|
-
|
|
989
|
-
|
|
1186
|
+
WasmI32.store8(
|
|
1187
|
+
bytes + bytesIdx + 1n,
|
|
1188
|
+
(codePoint & 0xff00n) >>> 8n,
|
|
1189
|
+
_BYTES_OFFSET
|
|
1190
|
+
)
|
|
1191
|
+
WasmI32.store8(
|
|
1192
|
+
bytes + bytesIdx + 2n,
|
|
1193
|
+
(codePoint & 0xff0000n) >>> 16n,
|
|
1194
|
+
_BYTES_OFFSET
|
|
1195
|
+
)
|
|
1196
|
+
WasmI32.store8(
|
|
1197
|
+
bytes + bytesIdx + 3n,
|
|
1198
|
+
(codePoint & 0xff000000n) >>> 24n,
|
|
1199
|
+
_BYTES_OFFSET
|
|
1200
|
+
)
|
|
990
1201
|
bytesIdx += 4n
|
|
991
1202
|
},
|
|
992
|
-
}
|
|
1203
|
+
}
|
|
993
1204
|
ptr += n
|
|
994
1205
|
}
|
|
995
|
-
}
|
|
1206
|
+
},
|
|
996
1207
|
}
|
|
997
1208
|
|
|
998
|
-
|
|
999
|
-
Memory.decRef(WasmI32.fromGrain(s))
|
|
1209
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
1000
1210
|
Memory.decRef(WasmI32.fromGrain(encoding))
|
|
1001
1211
|
Memory.decRef(WasmI32.fromGrain(includeBom))
|
|
1002
|
-
Memory.decRef(WasmI32.fromGrain(dest))
|
|
1003
1212
|
Memory.decRef(WasmI32.fromGrain(destPos))
|
|
1004
1213
|
Memory.decRef(WasmI32.fromGrain(encodeAtHelp))
|
|
1005
|
-
|
|
1214
|
+
|
|
1215
|
+
// We don't decRef `dest` because we're returning it
|
|
1216
|
+
dest
|
|
1006
1217
|
}
|
|
1007
1218
|
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1011
|
-
|
|
1012
|
-
|
|
1013
|
-
|
|
1014
|
-
|
|
1015
|
-
|
|
1219
|
+
/**
|
|
1220
|
+
* Encodes the given string into a byte sequence at the supplied position, excluding any byte-order marker, using the encoding scheme provided.
|
|
1221
|
+
*
|
|
1222
|
+
* @param string: The input string
|
|
1223
|
+
* @param encoding: The encoding to use
|
|
1224
|
+
* @param dest: The byte sequence that will be copied
|
|
1225
|
+
* @param destPos: The location in the byte sequence to write the output
|
|
1226
|
+
* @returns A copy of the input bytes with the encoded string replaced at the given position
|
|
1227
|
+
*
|
|
1228
|
+
* @since v0.4.0
|
|
1229
|
+
*/
|
|
1230
|
+
export let encodeAt = (string, encoding, dest, destPos) => {
|
|
1231
|
+
encodeAtHelp(string, encoding, false, dest, destPos)
|
|
1016
1232
|
}
|
|
1017
1233
|
|
|
1018
|
-
|
|
1019
|
-
|
|
1020
|
-
|
|
1021
|
-
|
|
1022
|
-
|
|
1023
|
-
|
|
1024
|
-
|
|
1025
|
-
|
|
1234
|
+
/**
|
|
1235
|
+
* Encodes the given string into a byte sequence at the supplied position, including any byte-order marker, using the encoding scheme provided.
|
|
1236
|
+
*
|
|
1237
|
+
* @param string: The input string
|
|
1238
|
+
* @param encoding: The encoding to use
|
|
1239
|
+
* @param dest: The byte sequence that will be copied
|
|
1240
|
+
* @param destPos: The location in the byte sequence to write the output
|
|
1241
|
+
* @returns A copy of the input bytes with the encoded string replaced at the given position
|
|
1242
|
+
*
|
|
1243
|
+
* @since v0.4.0
|
|
1244
|
+
*/
|
|
1245
|
+
export let encodeAtWithBom = (string, encoding, dest, destPos) => {
|
|
1246
|
+
encodeAtHelp(string, encoding, true, dest, destPos)
|
|
1026
1247
|
}
|
|
1027
1248
|
|
|
1028
|
-
// Encodes the given string using the given encoding scheme
|
|
1029
|
-
// @param s: String - The input string
|
|
1030
|
-
// @param encoding: Encoding - The encoding to use
|
|
1031
|
-
// @param includeBom: Bool - Whether to include the byte-order marker in the encoded output
|
|
1032
|
-
// @returns Bytes
|
|
1033
1249
|
@disableGC
|
|
1034
|
-
let rec encodeHelp = (
|
|
1250
|
+
let rec encodeHelp = (string: String, encoding: Encoding, includeBom: Bool) => {
|
|
1035
1251
|
Memory.incRef(WasmI32.fromGrain((+)))
|
|
1036
|
-
let size = encodedLength(
|
|
1037
|
-
|
|
1038
|
-
|
|
1039
|
-
|
|
1040
|
-
|
|
1041
|
-
|
|
1042
|
-
|
|
1043
|
-
|
|
1044
|
-
|
|
1252
|
+
let size = encodedLength(string, encoding) +
|
|
1253
|
+
(if (includeBom) {
|
|
1254
|
+
match (encoding) {
|
|
1255
|
+
UTF8 => 3,
|
|
1256
|
+
UTF16_LE => 2,
|
|
1257
|
+
UTF16_BE => 2,
|
|
1258
|
+
UTF32_LE => 4,
|
|
1259
|
+
UTF32_BE => 4,
|
|
1260
|
+
}
|
|
1261
|
+
} else {
|
|
1262
|
+
0
|
|
1263
|
+
})
|
|
1045
1264
|
let (>>>) = WasmI32.shrU
|
|
1046
1265
|
let bytes = WasmI32.toGrain(allocateBytes(WasmI32.fromGrain(size) >>> 1n))
|
|
1047
1266
|
Memory.incRef(WasmI32.fromGrain(encodeAtHelp))
|
|
1048
|
-
Memory.incRef(WasmI32.fromGrain(
|
|
1267
|
+
Memory.incRef(WasmI32.fromGrain(string))
|
|
1049
1268
|
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1050
1269
|
Memory.incRef(WasmI32.fromGrain(includeBom))
|
|
1051
1270
|
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1052
|
-
let ret = encodeAtHelp(
|
|
1053
|
-
Memory.decRef(WasmI32.fromGrain(
|
|
1271
|
+
let ret = encodeAtHelp(string, encoding, includeBom, bytes, 0)
|
|
1272
|
+
Memory.decRef(WasmI32.fromGrain(string))
|
|
1054
1273
|
Memory.decRef(WasmI32.fromGrain(encoding))
|
|
1055
1274
|
Memory.decRef(WasmI32.fromGrain(includeBom))
|
|
1056
1275
|
Memory.decRef(WasmI32.fromGrain(encodeHelp))
|
|
1057
1276
|
ret
|
|
1058
1277
|
}
|
|
1059
1278
|
|
|
1060
|
-
|
|
1061
|
-
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1065
|
-
|
|
1066
|
-
|
|
1279
|
+
/**
|
|
1280
|
+
* Encodes the given string using the given encoding scheme, excluding any byte-order marker.
|
|
1281
|
+
*
|
|
1282
|
+
* @param string: The input string
|
|
1283
|
+
* @param encoding: The encoding to use
|
|
1284
|
+
* @returns The byte representation of the string in the given encoding
|
|
1285
|
+
*
|
|
1286
|
+
* @since v0.4.0
|
|
1287
|
+
*/
|
|
1288
|
+
export let encode = (string: String, encoding: Encoding) => {
|
|
1289
|
+
encodeHelp(string, encoding, false)
|
|
1067
1290
|
}
|
|
1068
1291
|
|
|
1069
|
-
|
|
1070
|
-
|
|
1071
|
-
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1292
|
+
/**
|
|
1293
|
+
* Encodes the given string using the given encoding scheme, including any byte-order marker.
|
|
1294
|
+
*
|
|
1295
|
+
* @param string: The input string
|
|
1296
|
+
* @param encoding: The encoding to use
|
|
1297
|
+
* @returns The byte representation of the string in the given encoding
|
|
1298
|
+
*
|
|
1299
|
+
* @since v0.4.0
|
|
1300
|
+
*/
|
|
1301
|
+
export let encodeWithBom = (string: String, encoding: Encoding) => {
|
|
1302
|
+
encodeHelp(string, encoding, true)
|
|
1075
1303
|
}
|
|
1076
1304
|
|
|
1077
1305
|
// Byte->String decoding and helper functions:
|
|
@@ -1099,8 +1327,8 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1099
1327
|
// The first byte has a three bit prefix of 110, followed by 5 bits of the
|
|
1100
1328
|
// codepoint. The second byte has a two bit prefix of 10, followed by 6 bits
|
|
1101
1329
|
// of the codepoint.
|
|
1102
|
-
let high =
|
|
1103
|
-
let low =
|
|
1330
|
+
let high = codePoint >>> 6n & 0b000_11111n | 0b110_00000n
|
|
1331
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1104
1332
|
WasmI32.store8(ptr, high, 0n)
|
|
1105
1333
|
WasmI32.store8(ptr + 1n, low, 0n)
|
|
1106
1334
|
2n
|
|
@@ -1109,9 +1337,9 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1109
1337
|
// The first byte has a four bit prefix of 1110, followed by 4 bits of the
|
|
1110
1338
|
// codepoint. Remaining bytes each have a two bit prefix of 10, followed by
|
|
1111
1339
|
// 6 bits of the codepoint.
|
|
1112
|
-
let high =
|
|
1113
|
-
let mid =
|
|
1114
|
-
let low =
|
|
1340
|
+
let high = codePoint >>> 12n & 0b0000_1111n | 0b1110_0000n
|
|
1341
|
+
let mid = codePoint >>> 6n & 0b00_111111n | 0b10_000000n
|
|
1342
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1115
1343
|
WasmI32.store8(ptr, high, 0n)
|
|
1116
1344
|
WasmI32.store8(ptr + 1n, mid, 0n)
|
|
1117
1345
|
WasmI32.store8(ptr + 2n, low, 0n)
|
|
@@ -1121,10 +1349,10 @@ let writeUtf8CodePoint = (ptr, codePoint) => {
|
|
|
1121
1349
|
// The first byte has a five bit prefix of 11110, followed by 3 bits of the
|
|
1122
1350
|
// codepoint. Remaining bytes each have a two bit prefix of 10, followed by
|
|
1123
1351
|
// 6 bits of the codepoint.
|
|
1124
|
-
let high =
|
|
1125
|
-
let mid1 =
|
|
1126
|
-
let mid2 =
|
|
1127
|
-
let low =
|
|
1352
|
+
let high = codePoint >>> 18n & 0b00000_111n | 0b11110_000n
|
|
1353
|
+
let mid1 = codePoint >>> 12n & 0b00_111111n | 0b10_000000n
|
|
1354
|
+
let mid2 = codePoint >>> 6n & 0b00_111111n | 0b10_000000n
|
|
1355
|
+
let low = codePoint & 0b00_111111n | 0b10_000000n
|
|
1128
1356
|
WasmI32.store8(ptr, high, 0n)
|
|
1129
1357
|
WasmI32.store8(ptr + 1n, mid1, 0n)
|
|
1130
1358
|
WasmI32.store8(ptr + 2n, mid2, 0n)
|
|
@@ -1143,25 +1371,46 @@ let bytesHaveBom = (bytes: Bytes, encoding: Encoding, start: WasmI32) => {
|
|
|
1143
1371
|
let ptr = ptr + start
|
|
1144
1372
|
match (encoding) {
|
|
1145
1373
|
UTF8 => {
|
|
1146
|
-
bytesSize >= 3n &&
|
|
1374
|
+
bytesSize >= 3n &&
|
|
1375
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xEFn &&
|
|
1376
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xBBn &&
|
|
1377
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0xBFn
|
|
1147
1378
|
},
|
|
1148
1379
|
UTF16_BE => {
|
|
1149
|
-
bytesSize >= 2n &&
|
|
1380
|
+
bytesSize >= 2n &&
|
|
1381
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFEn &&
|
|
1382
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFFn
|
|
1150
1383
|
},
|
|
1151
1384
|
UTF16_LE => {
|
|
1152
|
-
bytesSize >= 2n &&
|
|
1385
|
+
bytesSize >= 2n &&
|
|
1386
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFFn &&
|
|
1387
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFEn
|
|
1153
1388
|
},
|
|
1154
1389
|
UTF32_BE => {
|
|
1155
|
-
bytesSize >= 4n &&
|
|
1390
|
+
bytesSize >= 4n &&
|
|
1391
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0x00n &&
|
|
1392
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0x00n &&
|
|
1393
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0xFEn &&
|
|
1394
|
+
WasmI32.load8U(ptr + 3n, _BYTES_OFFSET) == 0xFFn
|
|
1156
1395
|
},
|
|
1157
1396
|
UTF32_LE => {
|
|
1158
|
-
bytesSize >= 4n &&
|
|
1159
|
-
|
|
1397
|
+
bytesSize >= 4n &&
|
|
1398
|
+
WasmI32.load8U(ptr, _BYTES_OFFSET) == 0xFFn &&
|
|
1399
|
+
WasmI32.load8U(ptr + 1n, _BYTES_OFFSET) == 0xFEn &&
|
|
1400
|
+
WasmI32.load8U(ptr + 2n, _BYTES_OFFSET) == 0x00n &&
|
|
1401
|
+
WasmI32.load8U(ptr + 3n, _BYTES_OFFSET) == 0x00n
|
|
1402
|
+
},
|
|
1160
1403
|
}
|
|
1161
1404
|
}
|
|
1162
1405
|
|
|
1163
1406
|
@disableGC
|
|
1164
|
-
let decodedLength =
|
|
1407
|
+
let decodedLength =
|
|
1408
|
+
(
|
|
1409
|
+
bytes: Bytes,
|
|
1410
|
+
encoding: Encoding,
|
|
1411
|
+
start: WasmI32,
|
|
1412
|
+
size: WasmI32,
|
|
1413
|
+
) => {
|
|
1165
1414
|
let (+) = WasmI32.add
|
|
1166
1415
|
let (-) = WasmI32.sub
|
|
1167
1416
|
let (==) = WasmI32.eq
|
|
@@ -1184,20 +1433,21 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1184
1433
|
}
|
|
1185
1434
|
}
|
|
1186
1435
|
let start = ptr + _BYTES_OFFSET + start
|
|
1187
|
-
match(encoding) {
|
|
1436
|
+
match (encoding) {
|
|
1188
1437
|
UTF8 => bytesSize,
|
|
1189
1438
|
UTF16_BE => {
|
|
1190
1439
|
let end = start + bytesSize
|
|
1191
1440
|
let mut ptr = start
|
|
1192
1441
|
let mut count = 0n
|
|
1193
1442
|
while (ptr < end) {
|
|
1194
|
-
let codeWord =
|
|
1443
|
+
let codeWord = WasmI32.load8U(ptr, 0n) << 8n | WasmI32.load8U(ptr, 1n)
|
|
1195
1444
|
let codeWord = if (codeWord >= 0xD800n && codeWord <= 0xDBFFn) {
|
|
1196
1445
|
// high surrogate. need to check that next character is low srurrogate
|
|
1197
1446
|
let ret = if (ptr + 2n >= end) {
|
|
1198
1447
|
throw MalformedUnicode
|
|
1199
1448
|
} else {
|
|
1200
|
-
let nextCodeWord =
|
|
1449
|
+
let nextCodeWord = WasmI32.load8U(ptr, 2n) << 8n |
|
|
1450
|
+
WasmI32.load8U(ptr, 3n)
|
|
1201
1451
|
if (nextCodeWord < 0xDC00n || nextCodeWord > 0xDFFFn) {
|
|
1202
1452
|
// high surrogate without low surrogate
|
|
1203
1453
|
throw MalformedUnicode
|
|
@@ -1231,13 +1481,14 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1231
1481
|
let mut ptr = start
|
|
1232
1482
|
let mut count = 0n
|
|
1233
1483
|
while (ptr < end) {
|
|
1234
|
-
let codeWord =
|
|
1484
|
+
let codeWord = WasmI32.load8U(ptr, 1n) << 8n | WasmI32.load8U(ptr, 0n)
|
|
1235
1485
|
let codeWord = if (codeWord >= 0xD800n && codeWord <= 0xDBFFn) {
|
|
1236
1486
|
// high surrogate. need to check that next character is low srurrogate
|
|
1237
1487
|
let ret = if (ptr + 2n >= end) {
|
|
1238
1488
|
throw MalformedUnicode
|
|
1239
1489
|
} else {
|
|
1240
|
-
let nextCodeWord =
|
|
1490
|
+
let nextCodeWord = WasmI32.load8U(ptr, 3n) << 8n |
|
|
1491
|
+
WasmI32.load8U(ptr, 2n)
|
|
1241
1492
|
if (nextCodeWord < 0xDC00n || nextCodeWord > 0xDFFFn) {
|
|
1242
1493
|
// high surrogate without low surrogate
|
|
1243
1494
|
throw MalformedUnicode
|
|
@@ -1275,7 +1526,10 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1275
1526
|
let mut ptr = start
|
|
1276
1527
|
let mut count = 0n
|
|
1277
1528
|
while (ptr < end) {
|
|
1278
|
-
let codeWord =
|
|
1529
|
+
let codeWord = WasmI32.load8U(ptr, 0n) << 24n |
|
|
1530
|
+
WasmI32.load8U(ptr, 1n) << 16n |
|
|
1531
|
+
WasmI32.load8U(ptr, 2n) << 8n |
|
|
1532
|
+
WasmI32.load8U(ptr, 3n)
|
|
1279
1533
|
ptr += 4n
|
|
1280
1534
|
if (codeWord <= 0x007Fn) {
|
|
1281
1535
|
count += 1n
|
|
@@ -1298,7 +1552,10 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1298
1552
|
let mut ptr = start
|
|
1299
1553
|
let mut count = 0n
|
|
1300
1554
|
while (ptr < end) {
|
|
1301
|
-
let codeWord =
|
|
1555
|
+
let codeWord = WasmI32.load8U(ptr, 3n) << 24n |
|
|
1556
|
+
WasmI32.load8U(ptr, 2n) << 16n |
|
|
1557
|
+
WasmI32.load8U(ptr, 1n) << 8n |
|
|
1558
|
+
WasmI32.load8U(ptr, 0n)
|
|
1302
1559
|
ptr += 4n
|
|
1303
1560
|
if (codeWord <= 0x007Fn) {
|
|
1304
1561
|
count += 1n
|
|
@@ -1311,19 +1568,19 @@ let decodedLength = (bytes: Bytes, encoding: Encoding, start: WasmI32, size: Was
|
|
|
1311
1568
|
}
|
|
1312
1569
|
}
|
|
1313
1570
|
count
|
|
1314
|
-
}
|
|
1571
|
+
},
|
|
1315
1572
|
}
|
|
1316
1573
|
}
|
|
1317
1574
|
|
|
1318
|
-
// Decodes the given byte sequence into a string using the given encoding scheme
|
|
1319
|
-
// @param bytes: Bytes - The input bytes
|
|
1320
|
-
// @param encoding: Encoding - The encoding to use
|
|
1321
|
-
// @param skipBom: Bool - Whether to include the byte-order marker (if present) in the decoded output
|
|
1322
|
-
// @param start: Number - The byte offset to begin decoding from
|
|
1323
|
-
// @param size: Number - The maximum number of bytes to decode
|
|
1324
|
-
// @returns String
|
|
1325
1575
|
@disableGC
|
|
1326
|
-
let rec decodeRangeHelp =
|
|
1576
|
+
let rec decodeRangeHelp =
|
|
1577
|
+
(
|
|
1578
|
+
bytes: Bytes,
|
|
1579
|
+
encoding: Encoding,
|
|
1580
|
+
skipBom: Bool,
|
|
1581
|
+
start: Number,
|
|
1582
|
+
size: Number,
|
|
1583
|
+
) => {
|
|
1327
1584
|
let (+) = WasmI32.add
|
|
1328
1585
|
let (-) = WasmI32.sub
|
|
1329
1586
|
let (<) = WasmI32.ltU
|
|
@@ -1338,7 +1595,7 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1338
1595
|
let size = grainToWasmNumber(size, _SIZE_NAME)
|
|
1339
1596
|
let hasBom = bytesHaveBom(bytes, encoding, start)
|
|
1340
1597
|
let stringSize = decodedLength(bytes, encoding, start, size)
|
|
1341
|
-
let stringSize = if (skipBom && hasBom)
|
|
1598
|
+
let stringSize = if (skipBom && hasBom) stringSize - 3n else stringSize
|
|
1342
1599
|
let str = allocateString(stringSize)
|
|
1343
1600
|
let mut bytesPtr = WasmI32.fromGrain(bytes)
|
|
1344
1601
|
let bytesSize = {
|
|
@@ -1353,7 +1610,7 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1353
1610
|
let mut strPtr = str + 8n
|
|
1354
1611
|
let mut bomRead = false
|
|
1355
1612
|
if (skipBom && hasBom) {
|
|
1356
|
-
bytesPtr += match(encoding) {
|
|
1613
|
+
bytesPtr += match (encoding) {
|
|
1357
1614
|
UTF8 => 3n,
|
|
1358
1615
|
UTF16_LE => 2n,
|
|
1359
1616
|
UTF16_BE => 2n,
|
|
@@ -1362,9 +1619,9 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1362
1619
|
}
|
|
1363
1620
|
}
|
|
1364
1621
|
let ret = if (stringSize == 0n) {
|
|
1365
|
-
WasmI32.toGrain(str)
|
|
1622
|
+
WasmI32.toGrain(str): String
|
|
1366
1623
|
} else {
|
|
1367
|
-
match(encoding) {
|
|
1624
|
+
match (encoding) {
|
|
1368
1625
|
UTF8 => {
|
|
1369
1626
|
Memory.copy(strPtr, bytesPtr, stringSize)
|
|
1370
1627
|
},
|
|
@@ -1372,11 +1629,14 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1372
1629
|
// NOTE: Because the size check passed, we know the string is well-formed
|
|
1373
1630
|
let end = bytesPtr + bytesSize
|
|
1374
1631
|
while (bytesPtr < end) {
|
|
1375
|
-
let w1 =
|
|
1632
|
+
let w1 = WasmI32.load8U(bytesPtr, 0n) << 8n |
|
|
1633
|
+
WasmI32.load8U(bytesPtr, 1n)
|
|
1376
1634
|
let codeWord = if (w1 >= 0xD800n && w1 <= 0xDBFFn) {
|
|
1377
1635
|
// high surrogate. next character is low srurrogate
|
|
1378
1636
|
let w1 = (w1 & 0x03FFn) << 10n
|
|
1379
|
-
let w2 = (
|
|
1637
|
+
let w2 = (WasmI32.load8U(bytesPtr, 2n) << 8n |
|
|
1638
|
+
WasmI32.load8U(bytesPtr, 3n)) &
|
|
1639
|
+
0x03FFn
|
|
1380
1640
|
let codeWord = w1 + w2 + 0x10000n
|
|
1381
1641
|
// no problems, so go past both code words
|
|
1382
1642
|
bytesPtr += 4n
|
|
@@ -1392,11 +1652,14 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1392
1652
|
// NOTE: Because the size check passed, we know the string is well-formed
|
|
1393
1653
|
let end = bytesPtr + bytesSize
|
|
1394
1654
|
while (bytesPtr < end) {
|
|
1395
|
-
let w1 =
|
|
1655
|
+
let w1 = WasmI32.load8U(bytesPtr, 1n) << 8n |
|
|
1656
|
+
WasmI32.load8U(bytesPtr, 0n)
|
|
1396
1657
|
let codeWord = if (w1 >= 0xD800n && w1 <= 0xDBFFn) {
|
|
1397
1658
|
// high surrogate. next character is low srurrogate
|
|
1398
1659
|
let w1 = (w1 & 0x03FFn) << 10n
|
|
1399
|
-
let w2 = (
|
|
1660
|
+
let w2 = (WasmI32.load8U(bytesPtr, 3n) << 8n |
|
|
1661
|
+
WasmI32.load8U(bytesPtr, 2n)) &
|
|
1662
|
+
0x03FFn
|
|
1400
1663
|
//let uPrime = codePoint - 0x10000n
|
|
1401
1664
|
//let w1 = ((uPrime & 0b11111111110000000000n) >>> 10n) + 0xD800n // High surrogate
|
|
1402
1665
|
//let w2 = (uPrime & 0b00000000001111111111n) + 0xDC00n // Low surrogate
|
|
@@ -1414,7 +1677,10 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1414
1677
|
UTF32_BE => {
|
|
1415
1678
|
let end = bytesPtr + bytesSize
|
|
1416
1679
|
while (bytesPtr < end) {
|
|
1417
|
-
let codeWord =
|
|
1680
|
+
let codeWord = WasmI32.load8U(bytesPtr, 0n) << 24n |
|
|
1681
|
+
WasmI32.load8U(bytesPtr, 1n) << 16n |
|
|
1682
|
+
WasmI32.load8U(bytesPtr, 2n) << 8n |
|
|
1683
|
+
WasmI32.load8U(bytesPtr, 3n)
|
|
1418
1684
|
bytesPtr += 4n
|
|
1419
1685
|
strPtr += writeUtf8CodePoint(strPtr, codeWord)
|
|
1420
1686
|
}
|
|
@@ -1422,13 +1688,16 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1422
1688
|
UTF32_LE => {
|
|
1423
1689
|
let end = bytesPtr + bytesSize
|
|
1424
1690
|
while (bytesPtr < end) {
|
|
1425
|
-
let codeWord =
|
|
1691
|
+
let codeWord = WasmI32.load8U(bytesPtr, 3n) << 24n |
|
|
1692
|
+
WasmI32.load8U(bytesPtr, 2n) << 16n |
|
|
1693
|
+
WasmI32.load8U(bytesPtr, 1n) << 8n |
|
|
1694
|
+
WasmI32.load8U(bytesPtr, 0n)
|
|
1426
1695
|
bytesPtr += 4n
|
|
1427
1696
|
strPtr += writeUtf8CodePoint(strPtr, codeWord)
|
|
1428
1697
|
}
|
|
1429
|
-
}
|
|
1698
|
+
},
|
|
1430
1699
|
}
|
|
1431
|
-
WasmI32.toGrain(str)
|
|
1700
|
+
WasmI32.toGrain(str): String
|
|
1432
1701
|
}
|
|
1433
1702
|
// bytes: Bytes, encoding: Encoding, skipBom: Bool, start: Number, size: Number
|
|
1434
1703
|
Memory.decRef(WasmI32.fromGrain(bytes))
|
|
@@ -1438,33 +1707,48 @@ let rec decodeRangeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool, star
|
|
|
1438
1707
|
ret
|
|
1439
1708
|
}
|
|
1440
1709
|
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
|
|
1444
|
-
|
|
1445
|
-
|
|
1446
|
-
|
|
1447
|
-
|
|
1448
|
-
|
|
1710
|
+
/**
|
|
1711
|
+
* Decodes the given byte sequence of the specified range into a string, excluding any byte-order marker, using encoding scheme provided.
|
|
1712
|
+
*
|
|
1713
|
+
* @param bytes: The input bytes
|
|
1714
|
+
* @param encoding: The encoding to use
|
|
1715
|
+
* @param start: The byte offset to begin decoding from
|
|
1716
|
+
* @param size: The maximum number of bytes to decode
|
|
1717
|
+
* @returns The decoded string
|
|
1718
|
+
*
|
|
1719
|
+
* @since v0.4.0
|
|
1720
|
+
*/
|
|
1721
|
+
export let decodeRange =
|
|
1722
|
+
(
|
|
1723
|
+
bytes: Bytes,
|
|
1724
|
+
encoding: Encoding,
|
|
1725
|
+
start: Number,
|
|
1726
|
+
size: Number,
|
|
1727
|
+
) => {
|
|
1449
1728
|
decodeRangeHelp(bytes, encoding, true, start, size)
|
|
1450
1729
|
}
|
|
1451
1730
|
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1455
|
-
|
|
1456
|
-
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1731
|
+
/**
|
|
1732
|
+
* Decodes the given byte sequence of the specified range into a string, including any byte-order marker, using encoding scheme provided.
|
|
1733
|
+
*
|
|
1734
|
+
* @param bytes: The input bytes
|
|
1735
|
+
* @param encoding: The encoding to use
|
|
1736
|
+
* @param start: The byte offset to begin decoding from
|
|
1737
|
+
* @param size: The maximum number of bytes to decode
|
|
1738
|
+
* @returns The decoded string
|
|
1739
|
+
*
|
|
1740
|
+
* @since v0.4.0
|
|
1741
|
+
*/
|
|
1742
|
+
export let decodeRangeKeepBom =
|
|
1743
|
+
(
|
|
1744
|
+
bytes: Bytes,
|
|
1745
|
+
encoding: Encoding,
|
|
1746
|
+
start: Number,
|
|
1747
|
+
size: Number,
|
|
1748
|
+
) => {
|
|
1460
1749
|
decodeRangeHelp(bytes, encoding, false, start, size)
|
|
1461
1750
|
}
|
|
1462
1751
|
|
|
1463
|
-
// Decodes the given byte sequence into a string using the given encoding scheme
|
|
1464
|
-
// @param bytes: Bytes - The input bytes
|
|
1465
|
-
// @param encoding: Encoding - The encoding to use
|
|
1466
|
-
// @param skipBom: Bool - Whether to include the byte-order marker (if present) in the decoded output
|
|
1467
|
-
// @returns String
|
|
1468
1752
|
@disableGC
|
|
1469
1753
|
let rec decodeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool) => {
|
|
1470
1754
|
let bytesPtr = WasmI32.fromGrain(bytes)
|
|
@@ -1472,27 +1756,41 @@ let rec decodeHelp = (bytes: Bytes, encoding: Encoding, skipBom: Bool) => {
|
|
|
1472
1756
|
Memory.incRef(WasmI32.fromGrain(decodeRangeHelp))
|
|
1473
1757
|
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1474
1758
|
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1475
|
-
let ret = decodeRangeHelp(
|
|
1759
|
+
let ret = decodeRangeHelp(
|
|
1760
|
+
bytes,
|
|
1761
|
+
encoding,
|
|
1762
|
+
skipBom,
|
|
1763
|
+
0,
|
|
1764
|
+
tagSimpleNumber(bytesSize)
|
|
1765
|
+
)
|
|
1476
1766
|
Memory.incRef(WasmI32.fromGrain(bytes))
|
|
1477
1767
|
Memory.incRef(WasmI32.fromGrain(encoding))
|
|
1478
1768
|
Memory.incRef(WasmI32.fromGrain(decodeHelp))
|
|
1479
1769
|
ret
|
|
1480
1770
|
}
|
|
1481
1771
|
|
|
1482
|
-
|
|
1483
|
-
|
|
1484
|
-
|
|
1485
|
-
|
|
1486
|
-
|
|
1772
|
+
/**
|
|
1773
|
+
* Decodes the given byte sequence into a string using the given encoding scheme, excluding any byte-order marker.
|
|
1774
|
+
*
|
|
1775
|
+
* @param bytes: The input bytes
|
|
1776
|
+
* @param encoding: The encoding to use
|
|
1777
|
+
* @returns The decoded string
|
|
1778
|
+
*
|
|
1779
|
+
* @since v0.4.0
|
|
1780
|
+
*/
|
|
1487
1781
|
export let decode = (bytes: Bytes, encoding: Encoding) => {
|
|
1488
1782
|
decodeHelp(bytes, encoding, true)
|
|
1489
1783
|
}
|
|
1490
1784
|
|
|
1491
|
-
|
|
1492
|
-
|
|
1493
|
-
|
|
1494
|
-
|
|
1495
|
-
|
|
1785
|
+
/**
|
|
1786
|
+
* Decodes the given byte sequence into a string using the given encoding scheme, including any byte-order marker.
|
|
1787
|
+
*
|
|
1788
|
+
* @param bytes: The input bytes
|
|
1789
|
+
* @param encoding: The encoding to use
|
|
1790
|
+
* @returns The decoded string
|
|
1791
|
+
*
|
|
1792
|
+
* @since v0.4.0
|
|
1793
|
+
*/
|
|
1496
1794
|
export let decodeKeepBom = (bytes: Bytes, encoding: Encoding) => {
|
|
1497
1795
|
decodeHelp(bytes, encoding, false)
|
|
1498
1796
|
}
|
|
@@ -1502,9 +1800,13 @@ export let decodeKeepBom = (bytes: Bytes, encoding: Encoding) => {
|
|
|
1502
1800
|
*
|
|
1503
1801
|
* @param fn: The iterator function
|
|
1504
1802
|
* @param str: The string to iterate
|
|
1803
|
+
*
|
|
1804
|
+
* @example String.forEachCodePoint(print, "Hello world")
|
|
1805
|
+
*
|
|
1806
|
+
* @since v0.4.0
|
|
1505
1807
|
*/
|
|
1506
1808
|
@disableGC
|
|
1507
|
-
export let rec forEachCodePoint = (fn:
|
|
1809
|
+
export let rec forEachCodePoint = (fn: Number -> Void, str: String) => {
|
|
1508
1810
|
let (>>>) = WasmI32.shrU
|
|
1509
1811
|
let (-) = WasmI32.sub
|
|
1510
1812
|
let (&) = WasmI32.and
|
|
@@ -1560,9 +1862,17 @@ export let rec forEachCodePoint = (fn: (Number) -> Void, str: String) => {
|
|
|
1560
1862
|
*
|
|
1561
1863
|
* @param fn: The iterator function
|
|
1562
1864
|
* @param str: The string to iterate
|
|
1865
|
+
*
|
|
1866
|
+
* @example String.forEachCodePointi((codepoint, index) => print((codepoint, index)), "Hello world")
|
|
1867
|
+
*
|
|
1868
|
+
* @since v0.4.0
|
|
1563
1869
|
*/
|
|
1564
1870
|
@disableGC
|
|
1565
|
-
export let rec forEachCodePointi =
|
|
1871
|
+
export let rec forEachCodePointi =
|
|
1872
|
+
(
|
|
1873
|
+
fn: (Number, Number) -> Void,
|
|
1874
|
+
str: String,
|
|
1875
|
+
) => {
|
|
1566
1876
|
let (>>>) = WasmI32.shrU
|
|
1567
1877
|
let (-) = WasmI32.sub
|
|
1568
1878
|
let (&) = WasmI32.and
|
|
@@ -1610,3 +1920,61 @@ export let rec forEachCodePointi = (fn: (Number, Number) -> Void, str: String) =
|
|
|
1610
1920
|
Memory.decRef(WasmI32.fromGrain(forEachCodePointi))
|
|
1611
1921
|
void
|
|
1612
1922
|
}
|
|
1923
|
+
let trimString = (str: String, end: Bool) => {
|
|
1924
|
+
let chars = explode(str), charsLength = length(str)
|
|
1925
|
+
let mut i = 0, offset = 1
|
|
1926
|
+
if (end) {
|
|
1927
|
+
i = charsLength - 1
|
|
1928
|
+
offset = -1
|
|
1929
|
+
}
|
|
1930
|
+
for (; i < charsLength && i > -1; i += offset) {
|
|
1931
|
+
let currentChar = chars[i]
|
|
1932
|
+
// TODO: Use unicode whitespace property and unicode line terminator once github issue #661 is completed
|
|
1933
|
+
if (
|
|
1934
|
+
// Spacing
|
|
1935
|
+
currentChar != '\u{0009}' && // Tab
|
|
1936
|
+
currentChar != '\u{000B}' && // LINE TABULATION
|
|
1937
|
+
currentChar != '\u{000C}' && // FORM FEED (FF)
|
|
1938
|
+
currentChar != '\u{0020}' && // Space
|
|
1939
|
+
currentChar != '\u{00A0}' && // No Break Space
|
|
1940
|
+
currentChar != '\u{FEFF}' && // ZERO WIDTH NO-BREAK SPACE
|
|
1941
|
+
// Line Terminators
|
|
1942
|
+
currentChar != '\n' && // LF
|
|
1943
|
+
currentChar != '\r' // CR
|
|
1944
|
+
) break
|
|
1945
|
+
}
|
|
1946
|
+
if (end) slice(0, i + 1, str) else slice(i, charsLength, str)
|
|
1947
|
+
}
|
|
1948
|
+
/**
|
|
1949
|
+
* Trims the beginning of a string—removing any leading whitespace characters.
|
|
1950
|
+
*
|
|
1951
|
+
* @param string: The string to be trimmed
|
|
1952
|
+
* @returns The trimmed string
|
|
1953
|
+
*
|
|
1954
|
+
* @example String.trimStart(" Hello World") == "Hello World"
|
|
1955
|
+
*
|
|
1956
|
+
* @since v0.4.2
|
|
1957
|
+
*/
|
|
1958
|
+
export let trimStart = (string: String) => trimString(string, false)
|
|
1959
|
+
/**
|
|
1960
|
+
* Trims the end of a string—removing any trailing whitespace characters.
|
|
1961
|
+
*
|
|
1962
|
+
* @param string: The string to be trimmed
|
|
1963
|
+
* @returns The trimmed string
|
|
1964
|
+
*
|
|
1965
|
+
* @example String.trimEnd("Hello World ") == "Hello World"
|
|
1966
|
+
*
|
|
1967
|
+
* @since v0.4.2
|
|
1968
|
+
*/
|
|
1969
|
+
export let trimEnd = (string: String) => trimString(string, true)
|
|
1970
|
+
/**
|
|
1971
|
+
* Trims a string—removing all leading and trailing whitespace characters.
|
|
1972
|
+
*
|
|
1973
|
+
* @param string: The string to be trimmed
|
|
1974
|
+
* @returns The trimmed string
|
|
1975
|
+
*
|
|
1976
|
+
* @example String.trim(" Hello World ") == "Hello World"
|
|
1977
|
+
*
|
|
1978
|
+
* @since v0.4.2
|
|
1979
|
+
*/
|
|
1980
|
+
export let trim = (string: String) => trimEnd(trimStart(string))
|