functionalscript 0.0.375 → 0.0.376
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/text/encoding/README.md +75 -27
package/package.json
CHANGED
package/text/encoding/README.md
CHANGED
|
@@ -4,39 +4,87 @@
|
|
|
4
4
|
|
|
5
5
|
Requirement: no loss for UTF8 => codepoint => UTF8
|
|
6
6
|
|
|
7
|
-
|utf8 |codepoint|
|
|
8
|
-
|
|
9
|
-
|[a]
|
|
10
|
-
|[a
|
|
11
|
-
|[
|
|
12
|
-
|[
|
|
13
|
-
|
|
14
|
-
|
|
|
15
|
-
|
|
16
|
-
|[
|
|
17
|
-
|[
|
|
18
|
-
|[
|
|
19
|
-
|[
|
|
7
|
+
|utf8 |codepoint |size |
|
|
8
|
+
|---------|---------------------------------------|---------|
|
|
9
|
+
|[a] |0xxx_xxxx |7 bit |
|
|
10
|
+
|[b,a] |110x_xxxx 10xx_xxxx |11 bit |
|
|
11
|
+
|[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |
|
|
12
|
+
|[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |
|
|
13
|
+
|
|
14
|
+
|utf8 error|codepoint |size |
|
|
15
|
+
|----------|-----------------------------|------|
|
|
16
|
+
|[e] |10xx_xxxx |6 bit |
|
|
17
|
+
|[e] |1111_1xxx |3 bit |
|
|
18
|
+
|[b,] |110x_xxxx |5 bit |
|
|
19
|
+
|[c,] |1110_xxxx |4 bit |
|
|
20
|
+
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|
|
|
21
|
+
|[d,] |1111_0xxx |3 bit |
|
|
22
|
+
|[d,c,] |1111_0xxx 10xx_xxxx |9 bit |
|
|
23
|
+
|[d,c,b] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|
|
|
24
|
+
|
|
25
|
+
Total error states:
|
|
26
|
+
|
|
27
|
+
- 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
|
|
28
|
+
- 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
|
|
29
|
+
- 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
|
|
30
|
+
- 2^6 + 2^6 + 2^10 + 2^9 + 2^15
|
|
31
|
+
- 2^7 + 2^9 + 2^10 + 2^15
|
|
32
|
+
- < 2^16
|
|
33
|
+
|
|
34
|
+
|utf8 error|codepoint |size |map |
|
|
35
|
+
|----------|-----------------------------|------|-------------------|
|
|
36
|
+
|[e] |1111_1xxx | 3 bit| |
|
|
37
|
+
|[d,] |1111_0xxx | 3 bit| |
|
|
38
|
+
|[c,] |1110_xxxx | 4 bit| |
|
|
39
|
+
|[b,] |110x_xxxx | 5 bit| |
|
|
40
|
+
|[e] |10xx_xxxx | 6 bit|1111_1111 1xxx_xxxx|
|
|
41
|
+
|[d,c,] |1111_0xxx 10xx_xxxx | 9 bit|1111_0xxx 10xx_xxxx|
|
|
42
|
+
|[c,b,] |1110_xxxx 10xx_xxxx |10 bit|1110_xxxx 10xx_xxxx|
|
|
43
|
+
|[d,c,b] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|0xxx_xxxx xxxx_xxxx|
|
|
44
|
+
|
|
45
|
+
```js
|
|
46
|
+
/** @type {(input: List<u8|undefined>) => List<i32>} */
|
|
47
|
+
const utf8ToCodePoint
|
|
48
|
+
|
|
49
|
+
/** @type {(input: List<i32>) => List<u8>} */
|
|
50
|
+
const codePointToUtf8
|
|
51
|
+
```
|
|
20
52
|
|
|
21
53
|
## UTF-16
|
|
22
54
|
|
|
23
55
|
Requirement: no loss for UTF16 => codepoint => UTF16
|
|
24
56
|
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|[a,b] |+ |
|
|
29
|
-
|[e] |- |
|
|
30
|
-
|[a,e] |- |
|
|
31
|
-
|[a,] |- |
|
|
57
|
+
0xD800..0xDFFF
|
|
58
|
+
0b_1101_1000_0000_0000
|
|
59
|
+
0b_1101_1111_1111_1111
|
|
32
60
|
|
|
33
|
-
|
|
34
|
-
|
|
61
|
+
0b_1101_1xxx_xxxx_xxxx : 11 bits
|
|
62
|
+
|
|
63
|
+
- first : 0xD800: 0b_1101_10xx_xxxx_xxxx : 10 bit
|
|
64
|
+
- second: 0xDC00: 0b_1101_11xx_xxxx_xxxx : 10 bit
|
|
65
|
+
|
|
66
|
+
|utf16 |codepoint |size |
|
|
67
|
+
|---------|---------------------------------------|------|
|
|
68
|
+
|[a] |xxxx_xxxx_xxxx_xxxx |16 bit|
|
|
69
|
+
|[b,a] |1101_10xx_xxxx_xxxx 1101_11xx_xxxx_xxxx|20 bit|
|
|
70
|
+
|
|
71
|
+
|utf16 error|codepoint |size |
|
|
72
|
+
|-----------|-------------------|------|
|
|
73
|
+
|[e] |1101_11xx_xxxx_xxxx|10 bit|
|
|
74
|
+
|[b,] |1101_10xx_xxxx_xxxx|10 bit|
|
|
75
|
+
|
|
76
|
+
Total error states: 11 bit
|
|
35
77
|
|
|
36
78
|
```js
|
|
37
|
-
/** @
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
/** @
|
|
41
|
-
|
|
79
|
+
/** @type {(input: List<u16|undefined>) => List<i32>} */
|
|
80
|
+
const utf16ToCodePoint
|
|
81
|
+
|
|
82
|
+
/** @type {(input: List<i32>) => List<u16>} */
|
|
83
|
+
const codePointToUtf16
|
|
84
|
+
|
|
85
|
+
/** @type {(input: string) => List<u16> */
|
|
86
|
+
const stringToUtf16
|
|
87
|
+
|
|
88
|
+
/** @type {(input: List<u16>) => string} */
|
|
89
|
+
const utf16ToString
|
|
42
90
|
```
|