functionalscript 0.0.373 → 0.0.376

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "functionalscript",
3
- "version": "0.0.373",
3
+ "version": "0.0.376",
4
4
  "description": "FunctionalScript is a functional subset of JavaScript",
5
5
  "main": "module.f.cjs",
6
6
  "scripts": {
@@ -0,0 +1,90 @@
1
+ # UNICODE
2
+
3
+ ## UTF-8
4
+
5
+ Requirement: no loss for UTF8 => codepoint => UTF8
6
+
7
+ |utf8 |codepoint |size |
8
+ |---------|---------------------------------------|---------|
9
+ |[a] |0xxx_xxxx |7 bit |
10
+ |[b,a] |110x_xxxx 10xx_xxxx |11 bit |
11
+ |[c,b,a] |1110_xxxx 10xx_xxxx 10xx_xxxx |16 bit |
12
+ |[d,c,b,a]|1111_0xxx 10xx_xxxx 10xx_xxxx 10xx_xxxx|21 bit |
13
+
14
+ |utf8 error|codepoint |size |
15
+ |----------|-----------------------------|------|
16
+ |[e] |10xx_xxxx |6 bit |
17
+ |[e] |1111_1xxx |3 bit |
18
+ |[b,] |110x_xxxx |5 bit |
19
+ |[c,] |1110_xxxx |4 bit |
20
+ |[c,b,] |1110_xxxx 10xx_xxxx |10 bit|
21
+ |[d,] |1111_0xxx |3 bit |
22
+ |[d,c,] |1111_0xxx 10xx_xxxx |9 bit |
23
+ |[d,c,b] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|
24
+
25
+ Total error states:
26
+
27
+ - 2^6 + 2^3 + 2^5 + 2^4 + 2^10 + 2^3 + + 2^9 + 2^15
28
+ - 2^4 + 2^6 + 2^5 + 2^4 + 2^10 + 2^9 + 2^15
29
+ - 2^5 + 2^6 + 2^5 + 2^10 + 2^9 + 2^15
30
+ - 2^6 + 2^6 + 2^10 + 2^9 + 2^15
31
+ - 2^7 + 2^9 + 2^10 + 2^15
32
+ - < 2^16
33
+
34
+ |utf8 error|codepoint |size |map |
35
+ |----------|-----------------------------|------|-------------------|
36
+ |[e] |1111_1xxx | 3 bit| |
37
+ |[d,] |1111_0xxx | 3 bit| |
38
+ |[c,] |1110_xxxx | 4 bit| |
39
+ |[b,] |110x_xxxx | 5 bit| |
40
+ |[e] |10xx_xxxx | 6 bit|1111_1111 1xxx_xxxx|
41
+ |[d,c,] |1111_0xxx 10xx_xxxx | 9 bit|1111_0xxx 10xx_xxxx|
42
+ |[c,b,] |1110_xxxx 10xx_xxxx |10 bit|1110_xxxx 10xx_xxxx|
43
+ |[d,c,b] |1111_0xxx 10xx_xxxx 10xx_xxxx|15 bit|0xxx_xxxx xxxx_xxxx|
44
+
45
+ ```js
46
+ /** @type {(input: List<u8|undefined>) => List<i32>} */
47
+ const utf8ToCodePoint
48
+
49
+ /** @type {(input: List<i32>) => List<u8>} */
50
+ const codePointToUtf8
51
+ ```
52
+
53
+ ## UTF-16
54
+
55
+ Requirement: no loss for UTF16 => codepoint => UTF16
56
+
57
+ 0xD800..0xDFFF
58
+ 0b_1101_1000_0000_0000
59
+ 0b_1101_1111_1111_1111
60
+
61
+ 0b_1101_1xxx_xxxx_xxxx : 11 bits
62
+
63
+ - first : 0xD800: 0b_1101_10xx_xxxx_xxxx : 10 bit
64
+ - second: 0xDC00: 0b_1101_11xx_xxxx_xxxx : 10 bit
65
+
66
+ |utf16 |codepoint |size |
67
+ |---------|---------------------------------------|------|
68
+ |[a] |xxxx_xxxx_xxxx_xxxx |16 bit|
69
+ |[b,a] |1101_10xx_xxxx_xxxx 1101_11xx_xxxx_xxxx|20 bit|
70
+
71
+ |utf16 error|codepoint |size |
72
+ |-----------|-------------------|------|
73
+ |[e] |1101_11xx_xxxx_xxxx|10 bit|
74
+ |[b,] |1101_10xx_xxxx_xxxx|10 bit|
75
+
76
+ Total error states: 11 bit
77
+
78
+ ```js
79
+ /** @type {(input: List<u16|undefined>) => List<i32>} */
80
+ const utf16ToCodePoint
81
+
82
+ /** @type {(input: List<i32>) => List<u16>} */
83
+ const codePointToUtf16
84
+
85
+ /** @type {(input: string) => List<u16> */
86
+ const stringToUtf16
87
+
88
+ /** @type {(input: List<u16>) => string} */
89
+ const utf16ToString
90
+ ```
@@ -1,10 +1,29 @@
1
1
  const result = require('../../types/result/module.f.cjs')
2
2
  const list = require('../../types/list/module.f.cjs')
3
+ const operator = require('../../types/function/operator/module.f.cjs')
4
+ const array = require('../../types/array/module.f.cjs')
5
+ const { contains } = require('../../types/range/module.f.cjs')
3
6
  const { ok, error } = result
4
7
 
5
- /** @typedef {result.Result<number,number>} Utf8Result */
8
+ /** @typedef {result.Result<number,number>} ByteResult */
6
9
 
7
- /** @type {(input:number) => list.List<Utf8Result>} */
10
+ /** @typedef {result.Result<number,readonly number[]>} CodePointResult */
11
+
12
+ /** @typedef {number|undefined} ByteOrEof */
13
+
14
+ /** @typedef {undefined|array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf8State */
15
+
16
+ /** @typedef {undefined|array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf16State */
17
+
18
+ /** @type {(a:number) => boolean} */
19
+ const isBmpCodePoint = a => a >= 0x0000 && a <= 0xd7ff || a >= 0xe000 && a <= 0xffff
20
+
21
+ const isHighSurrogate = contains([0xd800, 0xdbff])
22
+
23
+ /** @type {(a:number) => boolean} */
24
+ const isLowSurrogate = contains([0xdc00, 0xdfff])
25
+
26
+ /** @type {(input:number) => list.List<ByteResult>} */
8
27
  const codePointToUtf8 = input =>
9
28
  {
10
29
  if (input >= 0x0000 && input <= 0x007f) { return [ok(input & 0x7f)] }
@@ -14,10 +33,10 @@ const codePointToUtf8 = input =>
14
33
  return [error(input)]
15
34
  }
16
35
 
17
- /** @type {(input:number) => list.List<Utf8Result>} */
36
+ /** @type {(input:number) => list.List<ByteResult>} */
18
37
  const codePointToUtf16 = input =>
19
38
  {
20
- if (input >= 0x0000 && input <= 0xd7ff || input >= 0xe000 && input <= 0xffff) { return [ok(input >> 8), ok(input & 0xff)] }
39
+ if (isBmpCodePoint(input)) { return [ok(input >> 8), ok(input & 0xff)] }
21
40
  if (input >= 0x010000 && input <= 0x10ffff) {
22
41
  const high = ((input - 0x10000) >> 10) + 0xd800
23
42
  const low = ((input - 0x10000) & 0x3ff) + 0xdc00
@@ -26,15 +45,94 @@ const codePointToUtf16 = input =>
26
45
  return [error(input)]
27
46
  }
28
47
 
29
- /** @type {(input: list.List<number>) => list.List<Utf8Result>} */
48
+ /** @type {(input: list.List<number>) => list.List<ByteResult>} */
30
49
  const codePointListToUtf8 = list.flatMap(codePointToUtf8)
31
50
 
32
- /** @type {(input: list.List<number>) => list.List<Utf8Result>} */
51
+ /** @type {(input: list.List<number>) => list.List<ByteResult>} */
33
52
  const codePointListToUtf16 = list.flatMap(codePointToUtf16)
34
53
 
54
+ /** @type {operator.StateScan<number, Utf8State, list.List<CodePointResult>>} */
55
+ const utf8ByteToCodePointOp = state => byte => {
56
+ if (byte < 0x00 || byte > 0xff) {
57
+ return [[error([byte])], state]
58
+ }
59
+ if (state == undefined) {
60
+ if (byte < 0x80) { return [[ok(byte)], undefined] }
61
+ if (byte >= 0xc2 && byte <= 0xf4) { return [[], [byte]] }
62
+ return [[error([byte])], undefined]
63
+ }
64
+ if (byte >= 0x80 && byte < 0xc0)
65
+ {
66
+ switch(state.length)
67
+ {
68
+ case 1:
69
+ if (state[0] < 0xe0) { return [[ok(((state[0] & 0x1f) << 6) + (byte & 0x3f))], undefined] }
70
+ if (state[0] < 0xf8) { return [[], [state[0], byte]] }
71
+ break
72
+ case 2:
73
+ if (state[0] < 0xf0) { return [[ok(((state[0] & 0x0f) << 12) + ((state[1] & 0x3f) << 6) + (byte & 0x3f))], undefined] }
74
+ if (state[0] < 0xf8) { return [[], [state[0], state[1], byte]] }
75
+ break
76
+ case 3:
77
+ return [[ok(((state[0] & 0x07) << 18) + ((state[1] & 0x3f) << 12) + ((state[2] & 0x3f) << 6) + (byte & 0x3f))], undefined]
78
+ }
79
+ }
80
+ return [[error(list.toArray(list.concat(state)([byte])))], undefined]
81
+ }
82
+
83
+ /** @type {(state: Utf8State) => readonly[list.List<CodePointResult>, Utf8State]} */
84
+ const utf8EofToCodePointOp = state => [state == undefined ? undefined : [error(state)], undefined]
85
+
86
+ /** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<CodePointResult>>} */
87
+ const utf8ByteOrEofToCodePointOp = state => input => input === undefined ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input)
88
+
89
+ /** @type {(input: list.List<number>) => list.List<CodePointResult>} */
90
+ const utf8ListToCodePoint = input => list.flat(list.stateScan(utf8ByteOrEofToCodePointOp)(undefined)(list.concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
91
+
92
+ /** @type {operator.StateScan<number, Utf16State, list.List<CodePointResult>>} */
93
+ const utf16ByteToCodePointOp = state => byte => {
94
+ if (byte < 0x00 || byte > 0xff) {
95
+ return [[error([byte])], state]
96
+ }
97
+ if (state == undefined) {
98
+ return [[], [byte]]
99
+ }
100
+ switch(state.length)
101
+ {
102
+ case 1:
103
+ const codeUnit = (state[0] << 8) + byte
104
+ if (isBmpCodePoint(codeUnit)) { return [[ok(codeUnit)], undefined] }
105
+ if (isHighSurrogate(codeUnit)) { return [[], [state[0], byte]] }
106
+ break
107
+ case 2:
108
+ return [[], [state[0], state[1], byte]]
109
+ case 3:
110
+ if (isLowSurrogate((state[2] << 8) + byte)) {
111
+ const high = (state[0] << 8) + state[1] - 0xd800
112
+ const low = (state[2] << 8) + byte - 0xdc00
113
+ return [[ok((high << 10) + low + 0x10000)], undefined]
114
+ }
115
+ break
116
+ }
117
+ return [[error(list.toArray(list.concat(state)([byte])))], undefined]
118
+ }
119
+
120
+ /** @type {(state: Utf8State) => readonly[list.List<CodePointResult>, Utf16State]} */
121
+ const utf16EofToCodePointOp = state => [state == undefined ? undefined : [error(state)], undefined]
122
+
123
+ /** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<CodePointResult>>} */
124
+ const utf16ByteOrEofToCodePointOp = state => input => input === undefined ? utf16EofToCodePointOp(state) : utf16ByteToCodePointOp(state)(input)
125
+
126
+ /** @type {(input: list.List<number>) => list.List<CodePointResult>} */
127
+ const utf16ListToCodePoint = input => list.flat(list.stateScan(utf16ByteOrEofToCodePointOp)(undefined)(list.concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
128
+
35
129
  module.exports = {
36
130
  /** @readonly */
37
131
  codePointListToUtf8,
38
132
  /** @readonly */
39
133
  codePointListToUtf16,
134
+ /** @readonly */
135
+ utf8ListToCodePoint,
136
+ /** @readonly */
137
+ utf16ListToCodePoint
40
138
  }
@@ -116,4 +116,84 @@ const stringify = a => json.stringify(sort)(a)
116
116
  if (result !== '[["error",-1],["error",55296],["error",57343],["error",1114112]]') { throw result }
117
117
  }
118
118
 
119
+ {
120
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([-1, 256])))
121
+ if (result !== '[["error",[-1]],["error",[256]]]') { throw result }
122
+ }
123
+
124
+ {
125
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([128, 193, 245, 255])))
126
+ if (result !== '[["error",[128]],["error",[193]],["error",[245]],["error",[255]]]') { throw result }
127
+ }
128
+
129
+ {
130
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([0, 1, 127])))
131
+ if (result !== '[["ok",0],["ok",1],["ok",127]]') { throw result }
132
+ }
133
+
134
+ {
135
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, 128, 194, 169, 223, 191])))
136
+ if (result !== '[["ok",128],["ok",169],["ok",2047]]') { throw result }
137
+ }
138
+
139
+ {
140
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, 127, 194, 192, 194])))
141
+ if (result !== '[["error",[194,127]],["error",[194,192]],["error",[194]]]') { throw result }
142
+ }
143
+
144
+ {
145
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([224, 160, 128, 224, 160, 129, 239, 191, 191])))
146
+ if (result !== '[["ok",2048],["ok",2049],["ok",65535]]') { throw result }
147
+ }
148
+
149
+ {
150
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([224, 160, 127, 224, 160, 192, 224, 160])))
151
+ if (result !== '[["error",[224,160,127]],["error",[224,160,192]],["error",[224,160]]]') { throw result }
152
+ }
153
+
154
+ {
155
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([240, 144, 128, 128, 240, 144, 128, 129, 244, 143, 191, 191])))
156
+ if (result !== '[["ok",65536],["ok",65537],["ok",1114111]]') { throw result }
157
+ }
158
+
159
+ {
160
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([240, 144, 128, 127, 240, 144, 128, 192, 240, 144, 128])))
161
+ if (result !== '[["error",[240,144,128,127]],["error",[240,144,128,192]],["error",[240,144,128]]]') { throw result }
162
+ }
163
+
164
+ {
165
+ const result = stringify(list.toArray(encoding.utf8ListToCodePoint([194, -1, 128])))
166
+ if (result !== '[["error",[-1]],["ok",128]]') { throw result }
167
+ }
168
+
169
+ {
170
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([-1, 256,])))
171
+ if (result !== '[["error",[-1]],["error",[256]]]') { throw result }
172
+ }
173
+
174
+ {
175
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([0, 0, 0, 36, 32, 172, 215, 255, 224, 0, 255, 255])))
176
+ if (result !== '[["ok",0],["ok",36],["ok",8364],["ok",55295],["ok",57344],["ok",65535]]') { throw result }
177
+ }
178
+
179
+ {
180
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([220, 0, 223, 255])))
181
+ if (result !== '[["error",[220,0]],["error",[223,255]]]') { throw result }
182
+ }
183
+
184
+ {
185
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 220, 0, 216, 1, 220, 55, 216, 82, 223, 98, 219, 255, 223, 255])))
186
+ if (result !== '[["ok",65536],["ok",66615],["ok",150370],["ok",1114111]]') { throw result }
187
+ }
188
+
189
+ {
190
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 216, 0])))
191
+ if (result !== '[["error",[216,0,216,0]]]') { throw result }
192
+ }
193
+
194
+ {
195
+ const result = stringify(list.toArray(encoding.utf16ListToCodePoint([216, 0, 0, 0])))
196
+ if (result !== '[["error",[216,0,0,0]]]') { throw result }
197
+ }
198
+
119
199
  module.exports = {}