functionalscript 0.0.405 → 0.0.408

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/doc/fa.md ADDED
@@ -0,0 +1,158 @@
1
+ # FA
2
+
3
+ F ::= A 'hello'
4
+ F ::= A 'help'
5
+
6
+ ## Classic FA
7
+
8
+ S0 ::= A 'h'
9
+ S1 ::= S0 'e'
10
+ S2 ::= S1 'l'
11
+ S3 ::= S2 'l'
12
+ F ::= S3 'o'
13
+
14
+ X0 ::= A 'h'
15
+ X1 ::= X0 'e'
16
+ X2 ::= X1 'l'
17
+ F ::= X2 'p'
18
+
19
+ ## DFA
20
+
21
+ {S0,X0} = A 'h'
22
+ {S1,X1} ::= {S0,X0} 'e'
23
+ {S2,X2} ::= {S1,X1} 'l'
24
+ S3 ::= {S2,X2} 'l'
25
+ F ::= {S2,X2} 'p'
26
+ F ::= S3 'o'
27
+
28
+ P0 = A 'h'
29
+ P1 ::= P0 'e'
30
+ P2 ::= P1 'l'
31
+ S3 ::= P2 'l'
32
+ F ::= P2 'p'
33
+ F ::= S3 'o'
34
+
35
+ ## Tokenizer FA
36
+
37
+ T ::= I 'true' // T0, T1, T2
38
+ F ::= I 'false' // F0, F2, F2, F3
39
+ N ::= I 'null' // N0, N1, N2
40
+ Id ::= I letter
41
+ Id ::= Id letter
42
+ Id ::= Id digit
43
+
44
+ ## Tokenizer DFA
45
+
46
+ {T0,Id} = I 't'
47
+ {T1,Id} = {T0,Id} 'r'
48
+ Id = {T0,Id} letter(except 'r')
49
+ Id = {T0,Id} digit
50
+
51
+ {a..b}{c..d}{e..f}
52
+
53
+ ```js
54
+ const t0 = [[init, one('t')]]
55
+ const t1 = [[t0, one('r')]]
56
+ const t2 = [[t1, one('u')]]
57
+ const t = [[t2, one('e')]]
58
+
59
+ const id = [
60
+ [init, letter]
61
+ [() => id, letter]
62
+ [() => id, digit]
63
+ ]
64
+
65
+ const dfa = ([t, f, n, id]) => ?
66
+ ```
67
+
68
+ ## Set
69
+
70
+ ```js
71
+ const letter = byteSet(['_', '$', ['a', 'z'], ['A', 'Z']])
72
+ ```
73
+
74
+ Letters and digits
75
+
76
+ | | |
77
+ |------|------------|
78
+ |`$` |`0x24` |
79
+ |`0..9`|`0x30..0x39`|
80
+ |`A..Z`|`0x41..0x5A`|
81
+ |`_` |`0x5F` |
82
+ |`a..z`|`0x61..0x7A`|
83
+
84
+ ## Bit set
85
+
86
+ For a byte, it is an array of 8 uint32, bigint (0..2^256-1), or string of 16 characters.
87
+
88
+ ### 16 bit set.
89
+
90
+ It can use an intermediate state.
91
+
92
+ | | | |
93
+ |---|-------|--------|
94
+ |`2`|`4` |`$` |
95
+ |`3`|`..9` |`0..9` |
96
+ |`4`|`1..` |`A..O` |
97
+ |`5`|`..A,F`|`P..Z,_`|
98
+ |`6`|`1..` |`a..o` |
99
+ |`7`|`..A` |`p..z` |
100
+
101
+ ```js
102
+ const init = [
103
+ _, _, i2, _,
104
+ i4, i5, i4, i7,
105
+ _, _, _, _,
106
+ _, _, _, _]
107
+
108
+ const i2 = [
109
+ _, _, _, _,
110
+ id, _, _, _,
111
+ _, _, _, _,
112
+ _, _, _, _]
113
+
114
+ const i3 = [
115
+ id, id, id, id,
116
+ id, id, id, id,
117
+ id, id, _, _,
118
+ _, _, _, _]
119
+
120
+ const i4 = [
121
+ _, id, id, id,
122
+ id, id, id, id,
123
+ id, id, id, id,
124
+ id, id, id, id]
125
+
126
+ const i5 = [
127
+ id, id, id, id,
128
+ id, id, id, id,
129
+ id, id, id, _,
130
+ _, _, _, id]
131
+
132
+ const i6 = [
133
+ id, id, id, id,
134
+ id, id, id, id,
135
+ id, id, id, _,
136
+ _, _, _, _]
137
+ ```
138
+
139
+ ```js
140
+ const init =
141
+ 000_000_001_000
142
+ 010_011_100_101
143
+ 000_000_000_000
144
+ 000_000_000_000
145
+
146
+ const i = [
147
+ // 1
148
+ 0000_1000_0000_0000,
149
+ // 2
150
+ 1111_1111_1100_0000,
151
+ // 3
152
+ 0111_1111_1111_1111,
153
+ // 4
154
+ 1111_1111_1110_0001,
155
+ // 5
156
+ 1111_1111_1110_0000,
157
+ ]
158
+ ```
package/html/module.f.cjs CHANGED
@@ -3,8 +3,8 @@ const { map, flatMap, flat, concat: listConcat } = list
3
3
  const { concat: stringConcat } = require('../types/string/module.f.cjs')
4
4
  const object = require('../types/object/module.f.cjs')
5
5
  const { compose } = require('../types/function/module.f.cjs')
6
- const encoding = require('../text/encoding/module.f.cjs');
7
- const { stringToUtf16List } = encoding
6
+ const encoding = require('../text/utf16/module.f.cjs');
7
+ const { stringToList } = encoding
8
8
 
9
9
  const { fromCharCode } = String
10
10
  const { entries } = Object
@@ -80,7 +80,7 @@ const escapeCharCode = code => {
80
80
  }
81
81
  }
82
82
 
83
- const escape = compose(stringToUtf16List)(map(escapeCharCode))
83
+ const escape = compose(stringToList)(map(escapeCharCode))
84
84
 
85
85
  /** @type {(n: Node) => list.List<string>} */
86
86
  const node = n => typeof n === 'string' ? escape(n) : element(n)
@@ -2,10 +2,10 @@ const tokenizer = require('./module.f.cjs')
2
2
  const list = require('../../types/list/module.f.cjs')
3
3
  const json = require('../module.f.cjs')
4
4
  const { sort } = require('../../types/object/module.f.cjs')
5
- const encoding = require('../../text/encoding/module.f.cjs');
5
+ const encoding = require('../../text/utf16/module.f.cjs');
6
6
 
7
7
  /** @type {(s: string) => readonly tokenizer.JsonToken[]} */
8
- const tokenizeString = s => list.toArray(tokenizer.tokenize(encoding.stringToUtf16List(s)))
8
+ const tokenizeString = s => list.toArray(tokenizer.tokenize(encoding.stringToList(s)))
9
9
 
10
10
  const stringify = json.stringify(sort)
11
11
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "functionalscript",
3
- "version": "0.0.405",
3
+ "version": "0.0.408",
4
4
  "description": "FunctionalScript is a functional subset of JavaScript",
5
5
  "main": "module.f.cjs",
6
6
  "scripts": {
package/test.f.cjs CHANGED
@@ -22,7 +22,8 @@ require('./commonjs/build/test.f.cjs')
22
22
  require('./types/range/test.f.cjs')
23
23
  require('./html/test.f.cjs')
24
24
  require('./text/test.f.cjs')
25
- require('./text/encoding/test.f.cjs')
25
+ require('./text/utf8/test.f.cjs')
26
+ require('./text/utf16/test.f.cjs')
26
27
  require('./com/cs/test.f.cjs')
27
28
  require('./com/cpp/test.f.cjs')
28
29
  require('./nodejs/version/test.f.cjs')
@@ -34,12 +34,14 @@ Total error states:
34
34
  - 34_432
35
35
  - < 2^16
36
36
 
37
+ ### utf8/module.f.cjs
38
+
37
39
  ```js
38
40
  /** @type {(input: List<u8|undefined>) => List<i32>} */
39
- const utf8ToCodePoint
41
+ const toCodePointList
40
42
 
41
43
  /** @type {(input: List<i32>) => List<u8>} */
42
- const codePointToUtf8
44
+ const fromCodePointList
43
45
  ```
44
46
 
45
47
  ## UTF-16
@@ -67,18 +69,20 @@ Requirement: no loss for UTF16 => codepoint => UTF16
67
69
 
68
70
  Total error states: 11 bit
69
71
 
72
+ ### utf16/module.f.cjs
73
+
70
74
  ```js
71
75
  /** @type {(input: List<u16|undefined>) => List<i32>} */
72
- const utf16ListToCodePointList
76
+ const toCodePointList
73
77
 
74
78
  /** @type {(input: List<i32>) => List<u16>} */
75
- const codePointListToUtf16List
79
+ const fromCodePointList
76
80
 
77
81
  /** @type {(input: string) => List<u16>} */
78
- const stringToUtf16List
82
+ const stringToList
79
83
 
80
84
  /** @type {(input: List<u16>) => string} */
81
- const utf16ListToString
85
+ const listToString
82
86
  ```
83
87
 
84
88
  UTF-16 => CP => UTF-8 => CP = UTF-16
@@ -0,0 +1,90 @@
1
+ const list = require('../../types/list/module.f.cjs')
2
+ const operator = require('../../types/function/operator/module.f.cjs')
3
+ const array = require('../../types/array/module.f.cjs')
4
+ const { contains } = require('../../types/range/module.f.cjs')
5
+ const { compose } = require('../../types/function/module.f.cjs')
6
+ const { map, flat, stateScan, concat, reduce, flatMap } = list
7
+
8
+ /** @typedef {u16|undefined} WordOrEof */
9
+
10
+ /** @typedef {undefined|number} Utf16State */
11
+
12
+ /** @typedef {number} u16 */
13
+
14
+ /** @typedef {number} i32 */
15
+
16
+ /** @type {(a:number) => boolean} */
17
+ const isBmpCodePoint = a => a >= 0x0000 && a <= 0xd7ff || a >= 0xe000 && a <= 0xffff
18
+
19
+ const isHighSurrogate = contains([0xd800, 0xdbff])
20
+
21
+ /** @type {(a:number) => boolean} */
22
+ const isLowSurrogate = contains([0xdc00, 0xdfff])
23
+
24
+ const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000
25
+
26
+ /** @type {(input:i32) => list.List<u16>} */
27
+ const codePointToUtf16 = input =>
28
+ {
29
+ if (isBmpCodePoint(input)) { return [input] }
30
+ if (input >= 0x010000 && input <= 0x10ffff) {
31
+ const high = ((input - 0x10000) >> 10) + 0xd800
32
+ const low = ((input - 0x10000) & 0b0011_1111_1111) + 0xdc00
33
+ return [high, low]
34
+ }
35
+ return [input & 0xffff]
36
+ }
37
+
38
+ const fromCodePointList = flatMap(codePointToUtf16)
39
+
40
+ /** @type {operator.StateScan<u16, Utf16State, list.List<i32>>} */
41
+ const utf16ByteToCodePointOp = state => byte => {
42
+ if (byte < 0x00 || byte > 0xffff) {
43
+ return [[0xffffffff], state]
44
+ }
45
+ if (state === undefined) {
46
+ if (isBmpCodePoint(byte)) { return [[byte], undefined] }
47
+ if (isHighSurrogate(byte)) { return [[], byte] }
48
+ return [[byte | errorMask], undefined]
49
+ }
50
+ if (isLowSurrogate(byte)) {
51
+ const high = state - 0xd800
52
+ const low = byte - 0xdc00
53
+ return [[(high << 10) + low + 0x10000], undefined]
54
+ }
55
+ if (isBmpCodePoint(byte)) { return [[state | errorMask, byte], undefined] }
56
+ if (isHighSurrogate(byte)) { return [[state | errorMask], byte] }
57
+ return [[state | errorMask, byte | errorMask], undefined]
58
+ }
59
+
60
+ /** @type {(state: Utf16State) => readonly[list.List<i32>, Utf16State]} */
61
+ const utf16EofToCodePointOp = state => [state === undefined ? undefined : [state | errorMask], undefined]
62
+
63
+ /** @type {operator.StateScan<WordOrEof, Utf16State, list.List<i32>>} */
64
+ const utf16ByteOrEofToCodePointOp = state => input => input === undefined ? utf16EofToCodePointOp(state) : utf16ByteToCodePointOp(state)(input)
65
+
66
+ /** @type {(input: list.List<u16>) => list.List<i32>} */
67
+ const toCodePointList = input => flat(stateScan(utf16ByteOrEofToCodePointOp)(undefined)(concat(/** @type {list.List<WordOrEof>} */(input))([undefined])))
68
+
69
+ /** @type {(s: string) => list.List<u16>} */
70
+ const stringToList = s => {
71
+ /** @type {(i: number) => list.Result<number>} */
72
+ const at = i => {
73
+ const first = s.charCodeAt(i)
74
+ return isNaN(first) ? undefined : { first, tail: () => at(i + 1) }
75
+ }
76
+ return at(0)
77
+ }
78
+
79
+ const listToString = compose(map(String.fromCharCode))(reduce(operator.concat)(''))
80
+
81
+ module.exports = {
82
+ /** @readonly */
83
+ fromCodePointList,
84
+ /** @readonly */
85
+ toCodePointList,
86
+ /** @readonly */
87
+ stringToList,
88
+ /** @readonly */
89
+ listToString
90
+ }
@@ -0,0 +1,118 @@
1
+ const encoding = require('./module.f.cjs')
2
+ const json = require('../../json/module.f.cjs')
3
+ const { sort } = require('../../types/object/module.f.cjs')
4
+ const { list } = require('../../types/module.f.cjs')
5
+
6
+ /** @type {(a: readonly json.Unknown[]) => string} */
7
+ const stringify = a => json.stringify(sort)(a)
8
+
9
+ {
10
+ const result = stringify(list.toArray(encoding.toCodePointList([-1, 65536])))
11
+ if (result !== '[4294967295,4294967295]') { throw result }
12
+ }
13
+
14
+ {
15
+ const result = stringify(list.toArray(encoding.toCodePointList([0, 36, 8364, 55295, 57344, 65535])))
16
+ if (result !== '[0,36,8364,55295,57344,65535]') { throw result }
17
+ }
18
+
19
+ {
20
+ const result = stringify(list.toArray(encoding.toCodePointList([56320, 57343])))
21
+ if (result !== '[-2147427328,-2147426305]') { throw result }
22
+ }
23
+
24
+ {
25
+ const result = stringify(list.toArray(encoding.toCodePointList([55296, 56320, 55297, 56375, 55378, 57186, 56319, 57343])))
26
+ if (result !== '[65536,66615,150370,1114111]') { throw result }
27
+ }
28
+
29
+ {
30
+ const result = stringify(list.toArray(encoding.toCodePointList([55296, 55296])))
31
+ if (result !== '[-2147428352,-2147428352]') { throw result }
32
+ }
33
+
34
+ {
35
+ const result = stringify(list.toArray(encoding.toCodePointList([55296, 0])))
36
+ if (result !== '[-2147428352,0]') { throw result }
37
+ }
38
+
39
+ {
40
+ const result = stringify(list.toArray(encoding.toCodePointList([56320])))
41
+ if (result !== '[-2147427328]') { throw result }
42
+ }
43
+
44
+ {
45
+ const result = stringify(list.toArray(encoding.toCodePointList([56320, 0])))
46
+ if (result !== '[-2147427328,0]') { throw result }
47
+ }
48
+
49
+ {
50
+ const result = stringify(list.toArray(encoding.fromCodePointList([0])))
51
+ if (result !== '[0]') { throw result }
52
+ }
53
+
54
+ {
55
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x24])))
56
+ if (result !== '[36]') { throw result }
57
+ }
58
+
59
+ {
60
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x20AC])))
61
+ if (result !== '[8364]') { throw result }
62
+ }
63
+
64
+ {
65
+ const result = stringify(list.toArray(encoding.fromCodePointList([0xd7ff])))
66
+ if (result !== '[55295]') { throw result }
67
+ }
68
+
69
+ {
70
+ const result = stringify(list.toArray(encoding.fromCodePointList([0xe000])))
71
+ if (result !== '[57344]') { throw result }
72
+ }
73
+
74
+ {
75
+ const result = stringify(list.toArray(encoding.fromCodePointList([0xffff])))
76
+ if (result !== '[65535]') { throw result }
77
+ }
78
+
79
+ {
80
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10000])))
81
+ if (result !== '[55296,56320]') { throw result }
82
+ }
83
+
84
+ {
85
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10437])))
86
+ if (result !== '[55297,56375]') { throw result }
87
+ }
88
+
89
+ {
90
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x24B62])))
91
+ if (result !== '[55378,57186]') { throw result }
92
+ }
93
+
94
+ {
95
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10ffff])))
96
+ if (result !== '[56319,57343]') { throw result }
97
+ }
98
+
99
+ {
100
+ const result = stringify(list.toArray(encoding.fromCodePointList([-1, 0xd800, 0xdfff, 0x110000])))
101
+ if (result !== '[65535,55296,57343,0]') { throw result }
102
+ }
103
+
104
+ {
105
+ const utf16List = encoding.stringToList("Hello world!😂🚜🚲")
106
+ const result = encoding.listToString(utf16List)
107
+ if (result !== "Hello world!😂🚜🚲") { throw result }
108
+ }
109
+
110
+ {
111
+ const a = encoding.stringToList("Hello world!😂🚜🚲")
112
+ const b = encoding.toCodePointList(a)
113
+ const c = encoding.fromCodePointList(b)
114
+ const result = encoding.listToString(c)
115
+ if (result !== "Hello world!😂🚜🚲") { throw result }
116
+ }
117
+
118
+ module.exports = {}
@@ -0,0 +1,96 @@
1
+ const list = require('../../types/list/module.f.cjs')
2
+ const operator = require('../../types/function/operator/module.f.cjs')
3
+ const array = require('../../types/array/module.f.cjs')
4
+ const { flatMap } = list
5
+
6
+ /** @typedef {u8|undefined} ByteOrEof */
7
+
8
+ /** @typedef {array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf8NonEmptyState */
9
+
10
+ /** @typedef {undefined|Utf8NonEmptyState} Utf8State */
11
+
12
+ /** @typedef {number} u8 */
13
+
14
+ /** @typedef {number} i32 */
15
+
16
+ const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000
17
+
18
+ /** @type {(input:number) => list.List<u8>} */
19
+ const codePointToUtf8 = input =>
20
+ {
21
+ if (input >= 0x0000 && input <= 0x007f) { return [input & 0b01111_1111] }
22
+ if (input >= 0x0080 && input <= 0x07ff) { return [input >> 6 | 0b1100_0000, input & 0b0011_1111 | 0b1000_0000] }
23
+ if (input >= 0x0800 && input <= 0xffff) { return [input >> 12 | 0b1110_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000] }
24
+ if (input >= 0x10000 && input <= 0x10ffff) { return [input >> 18 | 0b1111_0000, input >> 12 & 0b0011_1111 | 0b1000_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000] }
25
+ if ((input & errorMask) !== 0) {
26
+ if ((input & 0b1000_0000_0000_0000) !== 0) { return [input >> 12 & 0b0000_0111 | 0b1111_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000] }
27
+ if ((input & 0b0000_0100_0000_0000) !== 0) { return [input >> 6 & 0b0000_1111 | 0b1110_0000, input & 0b0011_1111 | 0b1000_0000] }
28
+ if ((input & 0b0000_0010_0000_0000) !== 0) { return [input >> 6 & 0b0000_0111 | 0b1111_0000, input & 0b0011_1111 | 0b1000_0000] }
29
+ if ((input & 0b0000_0000_1000_0000) !== 0) { return [input & 0b1111_1111] }
30
+ }
31
+ return [errorMask]
32
+ }
33
+
34
+ const fromCodePointList = flatMap(codePointToUtf8)
35
+
36
+ /** @type {(state: Utf8NonEmptyState) => i32}*/
37
+ const utf8StateToError = state => {
38
+ switch(state.length) {
39
+ case 1:
40
+ return state[0] | errorMask
41
+ case 2:
42
+ if (state[0] < 0b1111_0000) return (((state[0] & 0b0000_1111) << 6) + (state[1] & 0b0011_1111) + 0b0000_0100_0000_0000) | errorMask
43
+ return (((state[0] & 0b0000_0111) << 6) + (state[1] & 0b0011_1111) + 0b0000_0010_0000_0000) | errorMask
44
+ case 3:
45
+ return (((state[0] & 0b0000_0111) << 12) + ((state[1] & 0b0011_1111) << 6) + (state[2] & 0b0011_1111) + 0b1000_0000_0000_0000) | errorMask
46
+ }
47
+ }
48
+
49
+ /** @type {operator.StateScan<number, Utf8State, list.List<i32>>} */
50
+ const utf8ByteToCodePointOp = state => byte => {
51
+ if (byte < 0x00 || byte > 0xff) {
52
+ return [[errorMask], state]
53
+ }
54
+ if (state == undefined) {
55
+ if (byte < 0b1000_0000) { return [[byte], undefined] }
56
+ if (byte >= 0b1100_0010 && byte <= 0b1111_0100) { return [[], [byte]] }
57
+ return [[byte | errorMask], undefined]
58
+ }
59
+ if (byte >= 0b1000_0000 && byte < 0b1100_0000) {
60
+ switch(state.length) {
61
+ case 1:
62
+ if (state[0] < 0b1110_0000) { return [[((state[0] & 0b0001_1111) << 6) + (byte & 0b0011_1111)], undefined] }
63
+ if (state[0] < 0b1111_1000) { return [[], [state[0], byte]] }
64
+ break
65
+ case 2:
66
+ if (state[0] < 0b1111_0000) { return [[((state[0] & 0b0000_1111) << 12) + ((state[1] & 0b0011_1111) << 6) + (byte & 0b0011_1111)], undefined] }
67
+ if (state[0] < 0b1111_1000) { return [[], [state[0], state[1], byte]] }
68
+ break
69
+ case 3:
70
+ return [[((state[0] & 0b0000_0111) << 18) + ((state[1] & 0b0011_1111) << 12) + ((state[2] & 0b0011_1111) << 6) + (byte & 0b0011_1111)], undefined]
71
+ }
72
+ }
73
+ const error = utf8StateToError(state)
74
+ if (byte < 0b1000_0000) { return [[error, byte], undefined] }
75
+ if (byte >= 0b1100_0010 && byte <= 0b1111_0100) { return [[error], [byte]] }
76
+ return [[error, byte | errorMask], undefined]
77
+ }
78
+
79
+ /** @type {(state: Utf8State) => readonly[list.List<i32>, Utf8State]} */
80
+ const utf8EofToCodePointOp = state => {
81
+ if (state === undefined) { return [undefined, undefined] }
82
+ return [[utf8StateToError(state)], undefined]
83
+ }
84
+
85
+ /** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<i32>>} */
86
+ const utf8ByteOrEofToCodePointOp = state => input => input === undefined ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input)
87
+
88
+ /** @type {(input: list.List<u8>) => list.List<i32>} */
89
+ const toCodePointList = input => list.flat(list.stateScan(utf8ByteOrEofToCodePointOp)(undefined)(list.concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
90
+
91
+ module.exports = {
92
+ /** @readonly */
93
+ fromCodePointList,
94
+ /** @readonly */
95
+ toCodePointList
96
+ }
@@ -0,0 +1,144 @@
1
+ const encoding = require('./module.f.cjs')
2
+ const json = require('../../json/module.f.cjs')
3
+ const { sort } = require('../../types/object/module.f.cjs')
4
+ const { list } = require('../../types/module.f.cjs')
5
+
6
+ /** @type {(a: readonly json.Unknown[]) => string} */
7
+ const stringify = a => json.stringify(sort)(a)
8
+
9
+ {
10
+ const result = stringify(list.toArray(encoding.toCodePointList([-1, 256])))
11
+ if (result !== '[2147483648,2147483648]') { throw result }
12
+ }
13
+
14
+ {
15
+ const result = stringify(list.toArray(encoding.toCodePointList([128, 193, 245, 255])))
16
+ if (result !== '[-2147483520,-2147483455,-2147483403,-2147483393]') { throw result }
17
+ }
18
+
19
+ {
20
+ const result = stringify(list.toArray(encoding.toCodePointList([0, 1, 127])))
21
+ if (result !== '[0,1,127]') { throw result }
22
+ }
23
+
24
+ {
25
+ const result = stringify(list.toArray(encoding.toCodePointList([194, 128, 194, 169, 223, 191])))
26
+ if (result !== '[128,169,2047]') { throw result }
27
+ }
28
+
29
+ {
30
+ const result = stringify(list.toArray(encoding.toCodePointList([194, 194, 127, 194, 192, 194])))
31
+ if (result !== '[-2147483454,-2147483454,127,-2147483454,-2147483456,-2147483454]') { throw result }
32
+ }
33
+
34
+ {
35
+ const result = stringify(list.toArray(encoding.toCodePointList([224, 160, 128, 224, 160, 129, 239, 191, 191])))
36
+ if (result !== '[2048,2049,65535]') { throw result }
37
+ }
38
+
39
+ {
40
+ const result = stringify(list.toArray(encoding.toCodePointList([224, 224, 160, 127, 239, 191])))
41
+ if (result !== '[-2147483424,-2147482592,127,-2147481601]') { throw result }
42
+ }
43
+
44
+ {
45
+ const result = stringify(list.toArray(encoding.toCodePointList([240, 144, 128, 128, 240, 144, 128, 129, 244, 143, 191, 191])))
46
+ if (result !== '[65536,65537,1114111]') { throw result }
47
+ }
48
+
49
+ {
50
+ const result = stringify(list.toArray(encoding.toCodePointList([240, 240, 160, 127, 244, 191])))
51
+ if (result !== '[-2147483408,-2147483104,127,-2147482817]') { throw result }
52
+ }
53
+
54
+ {
55
+ const result = stringify(list.toArray(encoding.toCodePointList([240, 160, 160, 244, 160, 160])))
56
+ if (result !== '[-2147448800,-2147432416]') { throw result }
57
+ }
58
+
59
+ {
60
+ const result = stringify(list.toArray(encoding.fromCodePointList([0,1,0x7F])))
61
+ if (result !== '[0,1,127]') { throw result }
62
+ }
63
+
64
+ {
65
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x80])))
66
+ if (result !== '[194,128]') { throw result }
67
+ }
68
+
69
+ {
70
+ const result = stringify(list.toArray(encoding.fromCodePointList([0xa9])))
71
+ if (result !== '[194,169]') { throw result }
72
+ }
73
+
74
+ {
75
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x7ff])))
76
+ if (result !== '[223,191]') { throw result }
77
+ }
78
+
79
+ {
80
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x800])))
81
+ if (result !== '[224,160,128]') { throw result }
82
+ }
83
+
84
+ {
85
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x801])))
86
+ if (result !== '[224,160,129]') { throw result }
87
+ }
88
+
89
+ {
90
+ const result = stringify(list.toArray(encoding.fromCodePointList([0xffff])))
91
+ if (result !== '[239,191,191]') { throw result }
92
+ }
93
+
94
+ {
95
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10000])))
96
+ if (result !== '[240,144,128,128]') { throw result }
97
+ }
98
+
99
+ {
100
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10001])))
101
+ if (result !== '[240,144,128,129]') { throw result }
102
+ }
103
+
104
+ {
105
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x10FFFF])))
106
+ if (result !== '[244,143,191,191]') { throw result }
107
+ }
108
+
109
+ {
110
+ const result = stringify(list.toArray(encoding.fromCodePointList([0x110000,2147483648])))
111
+ if (result !== '[2147483648,2147483648]') { throw result }
112
+ }
113
+
114
+ {
115
+ const codePointList = encoding.toCodePointList([128, 193, 245, 255])
116
+ const result = stringify(list.toArray(encoding.fromCodePointList(codePointList)))
117
+ if (result !== '[128,193,245,255]') { throw result }
118
+ }
119
+
120
+ {
121
+ const codePointList = encoding.toCodePointList([194, 194, 127, 194, 192, 194])
122
+ const result = stringify(list.toArray(encoding.fromCodePointList(codePointList)))
123
+ if (result !== '[194,194,127,194,192,194]') { throw result }
124
+ }
125
+
126
+ {
127
+ const codePointList = encoding.toCodePointList([224, 224, 160, 127, 239, 191])
128
+ const result = stringify(list.toArray(encoding.fromCodePointList(codePointList)))
129
+ if (result !== '[224,224,160,127,239,191]') { throw result }
130
+ }
131
+
132
+ {
133
+ const codePointList = encoding.toCodePointList([240, 240, 160, 127, 244, 191])
134
+ const result = stringify(list.toArray(encoding.fromCodePointList(codePointList)))
135
+ if (result !== '[240,240,160,127,244,191]') { throw result }
136
+ }
137
+
138
+ {
139
+ const codePointList = encoding.toCodePointList([240, 160, 160, 244, 160, 160])
140
+ const result = stringify(list.toArray(encoding.fromCodePointList(codePointList)))
141
+ if (result !== '[240,160,160,244,160,160]') { throw result }
142
+ }
143
+
144
+ module.exports = {}
@@ -1,154 +0,0 @@
1
- const result = require('../../types/result/module.f.cjs')
2
- const list = require('../../types/list/module.f.cjs')
3
- const operator = require('../../types/function/operator/module.f.cjs')
4
- const array = require('../../types/array/module.f.cjs')
5
- const { contains } = require('../../types/range/module.f.cjs')
6
- const { compose } = require('../../types/function/module.f.cjs')
7
- const { map, flat, stateScan, concat, reduce, toArray, flatMap } = list
8
- const { ok, error } = result
9
-
10
- /** @typedef {result.Result<number,number>} ByteResult */
11
-
12
- /** @typedef {result.Result<number,readonly number[]>} CodePointResult */
13
-
14
- /** @typedef {number|undefined} ByteOrEof */
15
-
16
- /** @typedef {u16|undefined} WordOrEof */
17
-
18
- /** @typedef {undefined|array.Array1<number>|array.Array2<number>|array.Array3<number>} Utf8State */
19
-
20
- /** @typedef {undefined|number} Utf16State */
21
-
22
- /** @typedef {number} u16 */
23
-
24
- /** @typedef {number} i32 */
25
-
26
- /** @type {(a:number) => boolean} */
27
- const isBmpCodePoint = a => a >= 0x0000 && a <= 0xd7ff || a >= 0xe000 && a <= 0xffff
28
-
29
- const isHighSurrogate = contains([0xd800, 0xdbff])
30
-
31
- /** @type {(a:number) => boolean} */
32
- const isLowSurrogate = contains([0xdc00, 0xdfff])
33
-
34
- const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000
35
-
36
- /** @type {(input:number) => list.List<ByteResult>} */
37
- const codePointToUtf8 = input =>
38
- {
39
- if (input >= 0x0000 && input <= 0x007f) { return [ok(input & 0x7f)] }
40
- if (input >= 0x0080 && input <= 0x07ff) { return [ok(input >> 6 | 0xc0), ok(input & 0x3f | 0x80)] }
41
- if (input >= 0x0800 && input <= 0xffff) { return [ok(input >> 12 | 0xe0), ok(input >> 6 & 0x3f | 0x80), ok(input & 0x3f | 0x80)] }
42
- if (input >= 0x10000 && input <= 0x10ffff) { return [ok(input >> 18 | 0xf0), ok(input >> 12 & 0x3f | 0x80), ok(input >> 6 & 0x3f | 0x80), ok(input & 0x3f | 0x80)] }
43
- return [error(input)]
44
- }
45
-
46
- /** @type {(input:i32) => list.List<u16>} */
47
- const codePointToUtf16 = input =>
48
- {
49
- if (isBmpCodePoint(input)) { return [input] }
50
- if (input >= 0x010000 && input <= 0x10ffff) {
51
- const high = ((input - 0x10000) >> 10) + 0xd800
52
- const low = ((input - 0x10000) & 0b0011_1111_1111) + 0xdc00
53
- return [high, low]
54
- }
55
- return [input & 0xffff]
56
- }
57
-
58
- const codePointListToUtf8List = flatMap(codePointToUtf8)
59
-
60
- const codePointListToUtf16List = flatMap(codePointToUtf16)
61
-
62
- /** @type {operator.StateScan<number, Utf8State, list.List<CodePointResult>>} */
63
- const utf8ByteToCodePointOp = state => byte => {
64
- if (byte < 0x00 || byte > 0xff) {
65
- return [[error([byte])], state]
66
- }
67
- if (state == undefined) {
68
- if (byte < 0x80) { return [[ok(byte)], undefined] }
69
- if (byte >= 0xc2 && byte <= 0xf4) { return [[], [byte]] }
70
- return [[error([byte])], undefined]
71
- }
72
- if (byte >= 0x80 && byte < 0xc0)
73
- {
74
- switch(state.length)
75
- {
76
- case 1:
77
- if (state[0] < 0xe0) { return [[ok(((state[0] & 0x1f) << 6) + (byte & 0x3f))], undefined] }
78
- if (state[0] < 0xf8) { return [[], [state[0], byte]] }
79
- break
80
- case 2:
81
- if (state[0] < 0xf0) { return [[ok(((state[0] & 0x0f) << 12) + ((state[1] & 0x3f) << 6) + (byte & 0x3f))], undefined] }
82
- if (state[0] < 0xf8) { return [[], [state[0], state[1], byte]] }
83
- break
84
- case 3:
85
- return [[ok(((state[0] & 0x07) << 18) + ((state[1] & 0x3f) << 12) + ((state[2] & 0x3f) << 6) + (byte & 0x3f))], undefined]
86
- }
87
- }
88
- return [[error(toArray(concat(state)([byte])))], undefined]
89
- }
90
-
91
- /** @type {(state: Utf8State) => readonly[list.List<CodePointResult>, Utf8State]} */
92
- const utf8EofToCodePointOp = state => [state === undefined ? undefined : [error(state)], undefined]
93
-
94
- /** @type {operator.StateScan<ByteOrEof, Utf8State, list.List<CodePointResult>>} */
95
- const utf8ByteOrEofToCodePointOp = state => input => input === undefined ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input)
96
-
97
- /** @type {(input: list.List<number>) => list.List<CodePointResult>} */
98
- const utf8ListToCodePointList = input => flat(stateScan(utf8ByteOrEofToCodePointOp)(undefined)(concat(/** @type {list.List<ByteOrEof>} */(input))([undefined])))
99
-
100
- /** @type {operator.StateScan<u16, Utf16State, list.List<i32>>} */
101
- const utf16ByteToCodePointOp = state => byte => {
102
- if (byte < 0x00 || byte > 0xffff) {
103
- return [[0xffffffff], state]
104
- }
105
- if (state === undefined) {
106
- if (isBmpCodePoint(byte)) { return [[byte], undefined] }
107
- if (isHighSurrogate(byte)) { return [[], byte] }
108
- return [[byte | errorMask], undefined]
109
- }
110
- if (isLowSurrogate(byte)) {
111
- const high = state - 0xd800
112
- const low = byte - 0xdc00
113
- return [[(high << 10) + low + 0x10000], undefined]
114
- }
115
- if (isBmpCodePoint(byte)) { return [[state | errorMask, byte], undefined] }
116
- if (isHighSurrogate(byte)) { return [[state | errorMask], byte] }
117
- return [[state | errorMask, byte | errorMask], undefined]
118
- }
119
-
120
- /** @type {(state: Utf16State) => readonly[list.List<i32>, Utf16State]} */
121
- const utf16EofToCodePointOp = state => [state === undefined ? undefined : [state | errorMask], undefined]
122
-
123
- /** @type {operator.StateScan<WordOrEof, Utf16State, list.List<i32>>} */
124
- const utf16ByteOrEofToCodePointOp = state => input => input === undefined ? utf16EofToCodePointOp(state) : utf16ByteToCodePointOp(state)(input)
125
-
126
- /** @type {(input: list.List<u16>) => list.List<i32>} */
127
- const utf16ListToCodePointList = input => flat(stateScan(utf16ByteOrEofToCodePointOp)(undefined)(concat(/** @type {list.List<WordOrEof>} */(input))([undefined])))
128
-
129
- /** @type {(s: string) => list.List<u16>} */
130
- const stringToUtf16List = s => {
131
- /** @type {(i: number) => list.Result<number>} */
132
- const at = i => {
133
- const first = s.charCodeAt(i)
134
- return isNaN(first) ? undefined : { first, tail: () => at(i + 1) }
135
- }
136
- return at(0)
137
- }
138
-
139
- const utf16ListToString = compose(map(String.fromCharCode))(reduce(operator.concat)(''))
140
-
141
- module.exports = {
142
- /** @readonly */
143
- codePointListToUtf8List,
144
- /** @readonly */
145
- codePointListToUtf16List,
146
- /** @readonly */
147
- utf8ListToCodePointList,
148
- /** @readonly */
149
- utf16ListToCodePointList,
150
- /** @readonly */
151
- stringToUtf16List,
152
- /** @readonly */
153
- utf16ListToString
154
- }
@@ -1,223 +0,0 @@
1
- const encoding = require('./module.f.cjs')
2
- const json = require('../../json/module.f.cjs')
3
- const { sort } = require('../../types/object/module.f.cjs')
4
- const { list } = require('../../types/module.f.cjs')
5
-
6
- /** @type {(a: readonly json.Unknown[]) => string} */
7
- const stringify = a => json.stringify(sort)(a)
8
-
9
- {
10
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0,1,0x7F])))
11
- if (result !== '[["ok",0],["ok",1],["ok",127]]') { throw result }
12
- }
13
-
14
- {
15
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x80])))
16
- if (result !== '[["ok",194],["ok",128]]') { throw result }
17
- }
18
-
19
- {
20
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0xa9])))
21
- if (result !== '[["ok",194],["ok",169]]') { throw result }
22
- }
23
-
24
- {
25
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x7ff])))
26
- if (result !== '[["ok",223],["ok",191]]') { throw result }
27
- }
28
-
29
- {
30
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x800])))
31
- if (result !== '[["ok",224],["ok",160],["ok",128]]') { throw result }
32
- }
33
-
34
- {
35
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x801])))
36
- if (result !== '[["ok",224],["ok",160],["ok",129]]') { throw result }
37
- }
38
-
39
- {
40
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0xffff])))
41
- if (result !== '[["ok",239],["ok",191],["ok",191]]') { throw result }
42
- }
43
-
44
- {
45
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x10000])))
46
- if (result !== '[["ok",240],["ok",144],["ok",128],["ok",128]]') { throw result }
47
- }
48
-
49
- {
50
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x10001])))
51
- if (result !== '[["ok",240],["ok",144],["ok",128],["ok",129]]') { throw result }
52
- }
53
-
54
- {
55
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([0x10FFFF])))
56
- if (result !== '[["ok",244],["ok",143],["ok",191],["ok",191]]') { throw result }
57
- }
58
-
59
- {
60
- const result = stringify(list.toArray(encoding.codePointListToUtf8List([-1,0x110000])))
61
- if (result !== '[["error",-1],["error",1114112]]') { throw result }
62
- }
63
-
64
- {
65
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0])))
66
- if (result !== '[0]') { throw result }
67
- }
68
-
69
- {
70
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x24])))
71
- if (result !== '[36]') { throw result }
72
- }
73
-
74
- {
75
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x20AC])))
76
- if (result !== '[8364]') { throw result }
77
- }
78
-
79
- {
80
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0xd7ff])))
81
- if (result !== '[55295]') { throw result }
82
- }
83
-
84
- {
85
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0xe000])))
86
- if (result !== '[57344]') { throw result }
87
- }
88
-
89
- {
90
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0xffff])))
91
- if (result !== '[65535]') { throw result }
92
- }
93
-
94
- {
95
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x10000])))
96
- if (result !== '[55296,56320]') { throw result }
97
- }
98
-
99
- {
100
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x10437])))
101
- if (result !== '[55297,56375]') { throw result }
102
- }
103
-
104
- {
105
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x24B62])))
106
- if (result !== '[55378,57186]') { throw result }
107
- }
108
-
109
- {
110
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([0x10ffff])))
111
- if (result !== '[56319,57343]') { throw result }
112
- }
113
-
114
- {
115
- const result = stringify(list.toArray(encoding.codePointListToUtf16List([-1, 0xd800, 0xdfff, 0x110000])))
116
- if (result !== '[65535,55296,57343,0]') { throw result }
117
- }
118
-
119
- {
120
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([-1, 256])))
121
- if (result !== '[["error",[-1]],["error",[256]]]') { throw result }
122
- }
123
-
124
- {
125
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([128, 193, 245, 255])))
126
- if (result !== '[["error",[128]],["error",[193]],["error",[245]],["error",[255]]]') { throw result }
127
- }
128
-
129
- {
130
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([0, 1, 127])))
131
- if (result !== '[["ok",0],["ok",1],["ok",127]]') { throw result }
132
- }
133
-
134
- {
135
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([194, 128, 194, 169, 223, 191])))
136
- if (result !== '[["ok",128],["ok",169],["ok",2047]]') { throw result }
137
- }
138
-
139
- {
140
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([194, 127, 194, 192, 194])))
141
- if (result !== '[["error",[194,127]],["error",[194,192]],["error",[194]]]') { throw result }
142
- }
143
-
144
- {
145
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([224, 160, 128, 224, 160, 129, 239, 191, 191])))
146
- if (result !== '[["ok",2048],["ok",2049],["ok",65535]]') { throw result }
147
- }
148
-
149
- {
150
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([224, 160, 127, 224, 160, 192, 224, 160])))
151
- if (result !== '[["error",[224,160,127]],["error",[224,160,192]],["error",[224,160]]]') { throw result }
152
- }
153
-
154
- {
155
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([240, 144, 128, 128, 240, 144, 128, 129, 244, 143, 191, 191])))
156
- if (result !== '[["ok",65536],["ok",65537],["ok",1114111]]') { throw result }
157
- }
158
-
159
- {
160
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([240, 144, 128, 127, 240, 144, 128, 192, 240, 144, 128])))
161
- if (result !== '[["error",[240,144,128,127]],["error",[240,144,128,192]],["error",[240,144,128]]]') { throw result }
162
- }
163
-
164
- {
165
- const result = stringify(list.toArray(encoding.utf8ListToCodePointList([194, -1, 128])))
166
- if (result !== '[["error",[-1]],["ok",128]]') { throw result }
167
- }
168
-
169
- {
170
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([-1, 65536])))
171
- if (result !== '[4294967295,4294967295]') { throw result }
172
- }
173
-
174
- {
175
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([0, 36, 8364, 55295, 57344, 65535])))
176
- if (result !== '[0,36,8364,55295,57344,65535]') { throw result }
177
- }
178
-
179
- {
180
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([56320, 57343])))
181
- if (result !== '[-2147427328,-2147426305]') { throw result }
182
- }
183
-
184
- {
185
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([55296, 56320, 55297, 56375, 55378, 57186, 56319, 57343])))
186
- if (result !== '[65536,66615,150370,1114111]') { throw result }
187
- }
188
-
189
- {
190
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([55296, 55296])))
191
- if (result !== '[-2147428352,-2147428352]') { throw result }
192
- }
193
-
194
- {
195
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([55296, 0])))
196
- if (result !== '[-2147428352,0]') { throw result }
197
- }
198
-
199
- {
200
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([56320])))
201
- if (result !== '[-2147427328]') { throw result }
202
- }
203
-
204
- {
205
- const result = stringify(list.toArray(encoding.utf16ListToCodePointList([56320, 0])))
206
- if (result !== '[-2147427328,0]') { throw result }
207
- }
208
-
209
- {
210
- const utf16List = encoding.stringToUtf16List("Hello world!😂🚜🚲")
211
- const result = encoding.utf16ListToString(utf16List)
212
- if (result !== "Hello world!😂🚜🚲") { throw result }
213
- }
214
-
215
- {
216
- const a = encoding.stringToUtf16List("Hello world!😂🚜🚲")
217
- const b = encoding.utf16ListToCodePointList(a)
218
- const c = encoding.codePointListToUtf16List(b)
219
- const result = encoding.utf16ListToString(c)
220
- if (result !== "Hello world!😂🚜🚲") { throw result }
221
- }
222
-
223
- module.exports = {}