functionalscript 0.0.373 → 0.0.374
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/text/encoding/module.f.cjs +104 -6
- package/text/encoding/test.f.cjs +80 -0
package/package.json
CHANGED
|
@@ -1,10 +1,29 @@
|
|
|
1
1
|
const result = require('../../types/result/module.f.cjs')
|
|
2
2
|
const list = require('../../types/list/module.f.cjs')
|
|
3
|
+
const operator = require('../../types/function/operator/module.f.cjs')
|
|
4
|
+
const array = require('../../types/array/module.f.cjs')
|
|
5
|
+
const { contains } = require('../../types/range/module.f.cjs')
|
|
3
6
|
const { ok, error } = result
|
|
4
7
|
|
|
5
|
-
/** @typedef {result.Result<number,number>}
|
|
8
|
+
/** @typedef {result.Result<number,number>} ByteResult */
|
|
6
9
|
|
|
7
|
-
/** @
|
|
10
|
+
/** @typedef {result.Result<number,readonly number[]>} CodePointResult */
|
|
11
|
+
|
|
12
|
+
/** @typedef {number|undefined} ByteOrEof */
|
|
13
|
+
|
|
14
|
+
/** @typedef {undefined|array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf8State */
|
|
15
|
+
|
|
16
|
+
/** @typedef {undefined|array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf16State */
|
|
17
|
+
|
|
18
|
+
/** @type {(a:number) => boolean} */
|
|
19
|
+
const isBmpCodePoint = a => a >= 0x0000 && a <= 0xd7ff || a >= 0xe000 && a <= 0xffff
|
|
20
|
+
|
|
21
|
+
const isHighSurrogate = contains([0xd800, 0xdbff])
|
|
22
|
+
|
|
23
|
+
/** @type {(a:number) => boolean} */
|
|
24
|
+
const isLowSurrogate = contains([0xdc00, 0xdfff])
|
|
25
|
+
|
|
26
|
+
/** @type {(input:number) => list.List<ByteResult>} */
|
|
8
27
|
const codePointToUtf8 = input =>
|
|
9
28
|
{
|
|
10
29
|
if (input >= 0x0000 && input <= 0x007f) { return [ok(input & 0x7f)] }
|
|
@@ -14,10 +33,10 @@ const codePointToUtf8 = input =>
|
|
|
14
33
|
return [error(input)]
|
|
15
34
|
}
|
|
16
35
|
|
|
17
|
-
/** @type {(input:number) => list.List<
|
|
36
|
+
/** @type {(input:number) => list.List<ByteResult>} */
|
|
18
37
|
const codePointToUtf16 = input =>
|
|
19
38
|
{
|
|
20
|
-
if (input
|
|
39
|
+
if (isBmpCodePoint(input)) { return [ok(input >> 8), ok(input & 0xff)] }
|
|
21
40
|
if (input >= 0x010000 && input <= 0x10ffff) {
|
|
22
41
|
const high = ((input - 0x10000) >> 10) + 0xd800
|
|
23
42
|
const low = ((input - 0x10000) & 0x3ff) + 0xdc00
|
|
@@ -26,15 +45,94 @@ const codePointToUtf16 = input =>
|
|
|
26
45
|
return [error(input)]
|
|
27
46
|
}
|
|
28
47
|
|
|
29
|
-
/** @type {(input: list.List<number>) => list.List<
|
|
48
|
+
/** @type {(input: list.List<number>) => list.List<ByteResult>} */
|
|
30
49
|
const codePointListToUtf8 = list.flatMap(codePointToUtf8)
|
|
31
50
|
|
|
32
|
-
/** @type {(input: list.List<number>) => list.List<
|
|
51
|
+
/** @type {(input: list.List<number>) => list.List<ByteResult>} */
|
|
33
52
|
const codePointListToUtf16 = list.flatMap(codePointToUtf16)
|
|
34
53
|
|
|
54
|
+
/** @type {operator.StateScan<number, Utf8State, list.List<CodePointResult>>} */
|
|
55
|
+
const utf8ByteToCodePointOp = state => byte => {
|
|
56
|
+
if (byte < 0x00 || byte > 0xff) {
|
|
57
|
+
return [[error([byte])], state]
|
|
58
|
+
}
|
|
59
|
+
if (state == undefined) {
|
|
60
|
+
if (byte < 0x80) { return [[ok(byte)], undefined] }
|
|
61
|
+
if (byte >= 0xc2 && byte <= 0xf4) { return [[], [byte]] }
|
|
62
|
+
return [[error([byte])], undefined]
|
|
63
|
+
}
|
|
64
|
+
if (byte >= 0x80 && byte < 0xc0)
|
|
65
|
+
{
|
|
66
|
+
switch(state.length)
|
|
67
|
+
{
|
|
68
|
+
case 1:
|
|
69
|
+
if (state[0] < 0xe0) { return [[ok(((state[0] & 0x1f) << 6) + (byte & 0x3f))], undefined] }
|
|
70
|
+
if (state[0] < 0xf8) { return [[], [state[0], byte]] }
|
|
71
|
+
break
|
|
72
|
+
case 2:
|
|
73
|
+
if (state[0] < 0xf0) { return [[ok(((state[0] & 0x0f) << 12) + ((state[1] & 0x3f) << 6) + (byte & 0x3f))], undefined] }
|
|
74
|
+
if (state[0] < 0xf8) { return [[], [state[0], state[1], byte]] }
|
|
75
|
+
break
|
|
76
|
+
case 3:
|
|
77
|
+
return [[ok(((state[0] & 0x07) << 18) + ((state[1] & 0x3f) << 12) + ((state[2] & 0x3f) << 6) + (byte & 0x3f))], undefined]
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return [[error(list.toArray(list.concat(state)([byte])))], undefined]
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
/** @type {(state: Utf8State) => readonly[list.List<CodePointResult>, Utf8State]} */
|
|
84
|
+
const utf8EofToCodePointOp = state => [state == undefined ? undefined : [error(state)], undefined]
|
|
85
|
+
|
|
86
|
+
/** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<CodePointResult>>} */
|
|
87
|
+
const utf8ByteOrEofToCodePointOp = state => input => input === undefined ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input)
|
|
88
|
+
|
|
89
|
+
/** @type {(input: list.List<number>) => list.List<CodePointResult>} */
|
|
90
|
+
const utf8ListToCodePoint = input => list.flat(list.stateScan(utf8ByteOrEofToCodePointOp)(undefined)(list.concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
|
|
91
|
+
|
|
92
|
+
/** @type {operator.StateScan<number, Utf16State, list.List<CodePointResult>>} */
|
|
93
|
+
const utf16ByteToCodePointOp = state => byte => {
|
|
94
|
+
if (byte < 0x00 || byte > 0xff) {
|
|
95
|
+
return [[error([byte])], state]
|
|
96
|
+
}
|
|
97
|
+
if (state == undefined) {
|
|
98
|
+
return [[], [byte]]
|
|
99
|
+
}
|
|
100
|
+
switch(state.length)
|
|
101
|
+
{
|
|
102
|
+
case 1:
|
|
103
|
+
const codeUnit = (state[0] << 8) + byte
|
|
104
|
+
if (isBmpCodePoint(codeUnit)) { return [[ok(codeUnit)], undefined] }
|
|
105
|
+
if (isHighSurrogate(codeUnit)) { return [[], [state[0], byte]] }
|
|
106
|
+
break
|
|
107
|
+
case 2:
|
|
108
|
+
return [[], [state[0], state[1], byte]]
|
|
109
|
+
case 3:
|
|
110
|
+
if (isLowSurrogate((state[2] << 8) + byte)) {
|
|
111
|
+
const high = (state[0] << 8) + state[1] - 0xd800
|
|
112
|
+
const low = (state[2] << 8) + byte - 0xdc00
|
|
113
|
+
return [[ok((high << 10) + low + 0x10000)], undefined]
|
|
114
|
+
}
|
|
115
|
+
break
|
|
116
|
+
}
|
|
117
|
+
return [[error(list.toArray(list.concat(state)([byte])))], undefined]
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/** @type {(state: Utf8State) => readonly[list.List<CodePointResult>, Utf16State]} */
|
|
121
|
+
const utf16EofToCodePointOp = state => [state == undefined ? undefined : [error(state)], undefined]
|
|
122
|
+
|
|
123
|
+
/** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<CodePointResult>>} */
|
|
124
|
+
const utf16ByteOrEofToCodePointOp = state => input => input === undefined ? utf16EofToCodePointOp(state) : utf16ByteToCodePointOp(state)(input)
|
|
125
|
+
|
|
126
|
+
/** @type {(input: list.List<number>) => list.List<CodePointResult>} */
|
|
127
|
+
const utf16ListToCodePoint = input => list.flat(list.stateScan(utf16ByteOrEofToCodePointOp)(undefined)(list.concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
|
|
128
|
+
|
|
35
129
|
module.exports = {
|
|
36
130
|
/** @readonly */
|
|
37
131
|
codePointListToUtf8,
|
|
38
132
|
/** @readonly */
|
|
39
133
|
codePointListToUtf16,
|
|
134
|
+
/** @readonly */
|
|
135
|
+
utf8ListToCodePoint,
|
|
136
|
+
/** @readonly */
|
|
137
|
+
utf16ListToCodePoint
|
|
40
138
|
}
|
package/text/encoding/test.f.cjs
CHANGED
|
@@ -116,4 +116,84 @@ const stringify = a => json.stringify(sort)(a)
|
|
|
116
116
|
if (result !== '[["error",-1],["error",55296],["error",57343],["error",1114112]]') { throw result }
|
|
117
117
|
}
|
|
118
118
|
|
|
119
|
+
{
|
|
120
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([-1, 256])))
|
|
121
|
+
if (result !== '[["error",[-1]],["error",[256]]]') { throw result }
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
{
|
|
125
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([128, 193, 245, 255])))
|
|
126
|
+
if (result !== '[["error",[128]],["error",[193]],["error",[245]],["error",[255]]]') { throw result }
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
{
|
|
130
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([0, 1, 127])))
|
|
131
|
+
if (result !== '[["ok",0],["ok",1],["ok",127]]') { throw result }
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
{
|
|
135
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, 128, 194, 169, 223, 191])))
|
|
136
|
+
if (result !== '[["ok",128],["ok",169],["ok",2047]]') { throw result }
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
{
|
|
140
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, 127, 194, 192, 194])))
|
|
141
|
+
if (result !== '[["error",[194,127]],["error",[194,192]],["error",[194]]]') { throw result }
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
{
|
|
145
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([224, 160, 128, 224, 160, 129, 239, 191, 191])))
|
|
146
|
+
if (result !== '[["ok",2048],["ok",2049],["ok",65535]]') { throw result }
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
{
|
|
150
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([224, 160, 127, 224, 160, 192, 224, 160])))
|
|
151
|
+
if (result !== '[["error",[224,160,127]],["error",[224,160,192]],["error",[224,160]]]') { throw result }
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
{
|
|
155
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([240, 144, 128, 128, 240, 144, 128, 129, 244, 143, 191, 191])))
|
|
156
|
+
if (result !== '[["ok",65536],["ok",65537],["ok",1114111]]') { throw result }
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
{
|
|
160
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([240, 144, 128, 127, 240, 144, 128, 192, 240, 144, 128])))
|
|
161
|
+
if (result !== '[["error",[240,144,128,127]],["error",[240,144,128,192]],["error",[240,144,128]]]') { throw result }
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
{
|
|
165
|
+
const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, -1, 128])))
|
|
166
|
+
if (result !== '[["error",[-1]],["ok",128]]') { throw result }
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
{
|
|
170
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([-1, 256,])))
|
|
171
|
+
if (result !== '[["error",[-1]],["error",[256]]]') { throw result }
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
{
|
|
175
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([0, 0, 0, 36, 32, 172, 215, 255, 224, 0, 255, 255])))
|
|
176
|
+
if (result !== '[["ok",0],["ok",36],["ok",8364],["ok",55295],["ok",57344],["ok",65535]]') { throw result }
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
{
|
|
180
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([220, 0, 223, 255])))
|
|
181
|
+
if (result !== '[["error",[220,0]],["error",[223,255]]]') { throw result }
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
{
|
|
185
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 220, 0, 216, 1, 220, 55, 216, 82, 223, 98, 219, 255, 223, 255])))
|
|
186
|
+
if (result !== '[["ok",65536],["ok",66615],["ok",150370],["ok",1114111]]') { throw result }
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
{
|
|
190
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 216, 0])))
|
|
191
|
+
if (result !== '[["error",[216,0,216,0]]]') { throw result }
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
{
|
|
195
|
+
const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 0, 0])))
|
|
196
|
+
if (result !== '[["error",[216,0,0,0]]]') { throw result }
|
|
197
|
+
}
|
|
198
|
+
|
|
119
199
|
module.exports = {}
|