functionalscript 0.3.13 → 0.3.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "functionalscript",
3
- "version": "0.3.13",
3
+ "version": "0.3.15",
4
4
  "type": "module",
5
5
  "files": [
6
6
  "**/*.f.d.ts",
@@ -1,9 +1,38 @@
1
1
  import { type List, type Thunk } from '../../types/list/module.f.ts';
2
2
  import type { Array1, Array2, Array3 } from '../../types/array/module.f.ts';
3
+ /**
4
+ * An unsigned 8-bit integer, represents a single byte.
5
+ */
6
+ export type U8 = number;
7
+ /**
8
+ * A singed 32-bit integer.
9
+ */
10
+ export type I32 = number;
11
+ /**
12
+ * Represents an unsigend 8-bit type - U8 or the end-of-file indicator.
13
+ * The U8 represents the byte itself, and null indicates that reading does not return anything else.
14
+ */
3
15
  export type ByteOrEof = U8 | null;
16
+ /**
17
+ * Represents the state of a UTF-8 decoding operation that contains at least one byte.
18
+ */
4
19
  export type Utf8NonEmptyState = Array1<number> | Array2<number> | Array3<number>;
20
+ /**
21
+ * Represents the state of a UTF-8 decoding operation, which can be either `null` (no state)
22
+ * or a non-empty state containing one or more bytes.
23
+ */
5
24
  export type Utf8State = null | Utf8NonEmptyState;
6
- export type U8 = number;
7
- export type I32 = number;
25
+ /**
26
+ * Maps a list of Unicode code points to a stream of UTF-8 bytes.
27
+ *
28
+ * @param input - A list of Unicode code points to be converted.
29
+ * @returns A thunk that lazily produces a sequence of UTF-8 bytes.
30
+ */
8
31
  export declare const fromCodePointList: (input: List<number>) => Thunk<U8>;
32
+ /**
33
+ * Converts a list of UTF-8 bytes into a list of Unicode code points.
34
+ *
35
+ * @param input - A list of UTF-8 bytes.
36
+ * @returns A list of Unicode code points or error codes.
37
+ */
9
38
  export declare const toCodePointList: (input: List<U8>) => List<I32>;
@@ -1,5 +1,18 @@
1
- import { flatMap, flat, stateScan } from "../../types/list/module.f.js";
1
+ import { flat, flatMap, stateScan } from "../../types/list/module.f.js";
2
+ /**
3
+ * Error mask constant used to represent invalid code points or encoding errors in UTF-8.
4
+ */
2
5
  const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000;
6
+ /**
7
+ * Converts a Unicode code point to a sequence of UTF-8 bytes.
8
+ * @param input The Unicode code point to be converted. Valid range:
9
+ * - 0x0000 to 0x007F for 1-byte sequences.
10
+ * - 0x0080 to 0x07FF for 2-byte sequences.
11
+ * - 0x0800 to 0xFFFF for 3-byte sequences.
12
+ * - 0x10000 to 0x10FFFF for 4-byte sequences.
13
+ * @returns A readonly array of UTF-8 bytes representing the input code point.
14
+ * - Returns `[errorMask]` if the input does not match valid UTF-8 encoding rules.
15
+ */
3
16
  const codePointToUtf8 = (input) => {
4
17
  if (input >= 0x0000 && input <= 0x007f) {
5
18
  return [input & 0b01111_1111];
@@ -8,28 +21,58 @@ const codePointToUtf8 = (input) => {
8
21
  return [input >> 6 | 0b1100_0000, input & 0b0011_1111 | 0b1000_0000];
9
22
  }
10
23
  if (input >= 0x0800 && input <= 0xffff) {
11
- return [input >> 12 | 0b1110_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000];
24
+ return [
25
+ input >> 12 | 0b1110_0000,
26
+ input >> 6 & 0b0011_1111 | 0b1000_0000,
27
+ input & 0b0011_1111 | 0b1000_0000,
28
+ ];
12
29
  }
13
30
  if (input >= 0x10000 && input <= 0x10ffff) {
14
- return [input >> 18 | 0b1111_0000, input >> 12 & 0b0011_1111 | 0b1000_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000];
31
+ return [
32
+ input >> 18 | 0b1111_0000,
33
+ input >> 12 & 0b0011_1111 | 0b1000_0000,
34
+ input >> 6 & 0b0011_1111 | 0b1000_0000,
35
+ input & 0b0011_1111 | 0b1000_0000,
36
+ ];
15
37
  }
16
38
  if ((input & errorMask) !== 0) {
17
39
  if ((input & 0b1000_0000_0000_0000) !== 0) {
18
- return [input >> 12 & 0b0000_0111 | 0b1111_0000, input >> 6 & 0b0011_1111 | 0b1000_0000, input & 0b0011_1111 | 0b1000_0000];
40
+ return [
41
+ input >> 12 & 0b0000_0111 | 0b1111_0000,
42
+ input >> 6 & 0b0011_1111 | 0b1000_0000,
43
+ input & 0b0011_1111 | 0b1000_0000,
44
+ ];
19
45
  }
20
46
  if ((input & 0b0000_0100_0000_0000) !== 0) {
21
- return [input >> 6 & 0b0000_1111 | 0b1110_0000, input & 0b0011_1111 | 0b1000_0000];
47
+ return [
48
+ input >> 6 & 0b0000_1111 | 0b1110_0000,
49
+ input & 0b0011_1111 | 0b1000_0000,
50
+ ];
22
51
  }
23
52
  if ((input & 0b0000_0010_0000_0000) !== 0) {
24
- return [input >> 6 & 0b0000_0111 | 0b1111_0000, input & 0b0011_1111 | 0b1000_0000];
53
+ return [
54
+ input >> 6 & 0b0000_0111 | 0b1111_0000,
55
+ input & 0b0011_1111 | 0b1000_0000,
56
+ ];
25
57
  }
26
- if ((input & 0b0000_0000_1000_0000) !== 0) {
58
+ if ((input & 0b0000_0000_1000_0000) !== 0)
27
59
  return [input & 0b1111_1111];
28
- }
29
60
  }
30
61
  return [errorMask];
31
62
  };
63
+ /**
64
+ * Maps a list of Unicode code points to a stream of UTF-8 bytes.
65
+ *
66
+ * @param input - A list of Unicode code points to be converted.
67
+ * @returns A thunk that lazily produces a sequence of UTF-8 bytes.
68
+ */
32
69
  export const fromCodePointList = flatMap(codePointToUtf8);
70
+ /**
71
+ * Converts a non-empty UTF-8 decoding state to an error code.
72
+ *
73
+ * @param state - A non-empty UTF-8 decoding state.
74
+ * @returns An I32 error code derived from the invalid UTF-8 state.
75
+ */
33
76
  const utf8StateToError = (state) => {
34
77
  let x;
35
78
  switch (state.length) {
@@ -41,12 +84,14 @@ const utf8StateToError = (state) => {
41
84
  const [s0, s1] = state;
42
85
  x = s0 < 0b1111_0000
43
86
  ? ((s0 & 0b0000_1111) << 6) + (s1 & 0b0011_1111) + 0b0000_0100_0000_0000
44
- : ((s0 & 0b0000_0111) << 6) + (s1 & 0b0011_1111) + 0b0000_0010_0000_0000;
87
+ : ((s0 & 0b0000_0111) << 6) + (s1 & 0b0011_1111) +
88
+ 0b0000_0010_0000_0000;
45
89
  break;
46
90
  }
47
91
  case 3: {
48
92
  const [s0, s1, s2] = state;
49
- x = ((s0 & 0b0000_0111) << 12) + ((s1 & 0b0011_1111) << 6) + (s2 & 0b0011_1111) + 0b1000_0000_0000_0000;
93
+ x = ((s0 & 0b0000_0111) << 12) + ((s1 & 0b0011_1111) << 6) +
94
+ (s2 & 0b0011_1111) + 0b1000_0000_0000_0000;
50
95
  break;
51
96
  }
52
97
  default:
@@ -54,17 +99,24 @@ const utf8StateToError = (state) => {
54
99
  }
55
100
  return x | errorMask;
56
101
  };
57
- const utf8ByteToCodePointOp = state => byte => {
102
+ /**
103
+ * Decodes a byte into a Unicode code point, using a given UTF-8 state.
104
+ *
105
+ * @param state - The current UTF-8 decoding state.
106
+ * @param byte - A single byte to decode.
107
+ * @returns A tuple containing:
108
+ * - A list of decoded Unicode code points or error codes.
109
+ * - The updated UTF-8 state.
110
+ */
111
+ const utf8ByteToCodePointOp = (state) => (byte) => {
58
112
  if (byte < 0x00 || byte > 0xff) {
59
113
  return [[errorMask], state];
60
114
  }
61
115
  if (state === null) {
62
- if (byte < 0b1000_0000) {
116
+ if (byte < 0b1000_0000)
63
117
  return [[byte], null];
64
- }
65
- if (byte >= 0b1100_0010 && byte <= 0b1111_0100) {
118
+ if (byte >= 0b1100_0010 && byte <= 0b1111_0100)
66
119
  return [[], [byte]];
67
- }
68
120
  return [[byte | errorMask], null];
69
121
  }
70
122
  if (byte >= 0b1000_0000 && byte < 0b1100_0000) {
@@ -74,37 +126,73 @@ const utf8ByteToCodePointOp = state => byte => {
74
126
  if (s0 < 0b1110_0000) {
75
127
  return [[((s0 & 0b0001_1111) << 6) + (byte & 0b0011_1111)], null];
76
128
  }
77
- if (s0 < 0b1111_1000) {
129
+ if (s0 < 0b1111_1000)
78
130
  return [[], [s0, byte]];
79
- }
80
131
  break;
81
132
  }
82
133
  case 2: {
83
134
  const [s0, s1] = state;
84
135
  if (s0 < 0b1111_0000) {
85
- return [[((s0 & 0b0000_1111) << 12) + ((s1 & 0b0011_1111) << 6) + (byte & 0b0011_1111)], null];
136
+ return [[
137
+ ((s0 & 0b0000_1111) << 12) + ((s1 & 0b0011_1111) << 6) +
138
+ (byte & 0b0011_1111),
139
+ ], null];
86
140
  }
87
- if (s0 < 0b1111_1000) {
141
+ if (s0 < 0b1111_1000)
88
142
  return [[], [s0, s1, byte]];
89
- }
90
143
  break;
91
144
  }
92
145
  case 3: {
93
146
  const [s0, s1, s2] = state;
94
- return [[((s0 & 0b0000_0111) << 18) + ((s1 & 0b0011_1111) << 12) + ((s2 & 0b0011_1111) << 6) + (byte & 0b0011_1111)], null];
147
+ return [[
148
+ ((s0 & 0b0000_0111) << 18) + ((s1 & 0b0011_1111) << 12) +
149
+ ((s2 & 0b0011_1111) << 6) + (byte & 0b0011_1111),
150
+ ], null];
95
151
  }
96
152
  }
97
153
  }
98
154
  const error = utf8StateToError(state);
99
- if (byte < 0b1000_0000) {
155
+ if (byte < 0b1000_0000)
100
156
  return [[error, byte], null];
101
- }
102
- if (byte >= 0b1100_0010 && byte <= 0b1111_0100) {
157
+ if (byte >= 0b1100_0010 && byte <= 0b1111_0100)
103
158
  return [[error], [byte]];
104
- }
105
159
  return [[error, byte | errorMask], null];
106
160
  };
107
- const utf8EofToCodePointOp = (state) => [state === null ? null : [utf8StateToError(state)], null];
108
- const utf8ByteOrEofToCodePointOp = state => input => input === null ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input);
161
+ /**
162
+ * Handles the end-of-file (EOF) case for UTF-8 decoding.
163
+ *
164
+ * @param state - The current UTF-8 decoding state.
165
+ * @returns A tuple containing:
166
+ * - A list of decoded Unicode code points or error codes.
167
+ * - The reset UTF-8 state (`null`).
168
+ */
169
+ const utf8EofToCodePointOp = (state) => [
170
+ state === null ? null : [utf8StateToError(state)],
171
+ null,
172
+ ];
173
+ /**
174
+ * Combines UTF-8 byte and EOF handling into a single decoding operation.
175
+ *
176
+ * @param state - The current UTF-8 decoding state.
177
+ * @param input - The next byte or EOF indicator.
178
+ * @returns A tuple containing:
179
+ * - A list of decoded Unicode code points or error codes.
180
+ * - The updated UTF-8 state.
181
+ */
182
+ const utf8ByteOrEofToCodePointOp = (state) => (input) => input === null ? utf8EofToCodePointOp(state) : utf8ByteToCodePointOp(state)(input);
183
+ /**
184
+ * A constant representing the end-of-file (EOF) marker for UTF-8 decoding.
185
+ *
186
+ * @remarks
187
+ * This is used as a sentinel value in decoding operations to signify the
188
+ * termination of input. The list contains a single `null` value, which
189
+ * represents the EOF condition.
190
+ */
109
191
  const eofList = [null];
110
- export const toCodePointList = input => flat(stateScan(utf8ByteOrEofToCodePointOp)(null)(flat([input, eofList])));
192
+ /**
193
+ * Converts a list of UTF-8 bytes into a list of Unicode code points.
194
+ *
195
+ * @param input - A list of UTF-8 bytes.
196
+ * @returns A list of Unicode code points or error codes.
197
+ */
198
+ export const toCodePointList = (input) => flat(stateScan(utf8ByteOrEofToCodePointOp)(null)(flat([input, eofList])));
@@ -18,8 +18,8 @@ export declare const length: (v: bigint) => bigint;
18
18
  *
19
19
  * ```js
20
20
  * const vec4 = vec(4n)
21
- * const v0 = vec4(5n) // 0x15n
22
- * const v1 = vec4(0x5FEn) // 0x1En
21
+ * const v0 = vec4(5n) // 0x15n = 0b1_0101
22
+ * const v1 = vec4(0x5FEn) // 0x1En = 0b1_1110
23
23
  * ```
24
24
  */
25
25
  export declare const vec: (len: bigint) => (ui: bigint) => Vec;
@@ -1,3 +1,16 @@
1
+ /**
2
+ * MSb is most-significant bit first.
3
+ * ```
4
+ * - byte: 0x53 = 0b0101_0011
5
+ * - 0123_4567
6
+ * ```
7
+ * LSb is least-significant bit first.
8
+ * ```
9
+ * - byte: 0x53 = 0b0101_0011
10
+ * - 7654_3210
11
+ * ```
12
+ * @module
13
+ */
1
14
  import { log2, mask } from "../bigint/module.f.js";
2
15
  import { flip } from "../function/module.f.js";
3
16
  import { fold } from "../list/module.f.js";
@@ -16,8 +29,8 @@ export const length = log2;
16
29
  *
17
30
  * ```js
18
31
  * const vec4 = vec(4n)
19
- * const v0 = vec4(5n) // 0x15n
20
- * const v1 = vec4(0x5FEn) // 0x1En
32
+ * const v0 = vec4(5n) // 0x15n = 0b1_0101
33
+ * const v1 = vec4(0x5FEn) // 0x1En = 0b1_1110
21
34
  * ```
22
35
  */
23
36
  export const vec = (len) => {
@@ -73,7 +86,7 @@ export const lsb = {
73
86
  const aLen = length(a);
74
87
  const m = mask(aLen);
75
88
  return b => (b << aLen) | (a & m);
76
- }
89
+ },
77
90
  };
78
91
  /**
79
92
  * Implements operations for handling vectors in a most-significant-bit (MSb) first order.
@@ -95,7 +108,7 @@ export const msb = {
95
108
  return [(v >> d) & m, vec(d)(v)];
96
109
  };
97
110
  },
98
- concat: flip(lsb.concat)
111
+ concat: flip(lsb.concat),
99
112
  };
100
113
  const appendU8 = ({ concat }) => (u8) => (a) => concat(a)(vec8(BigInt(u8)));
101
114
  /**