functionalscript 0.4.2 → 0.4.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/bnf/data/module.f.d.ts +12 -0
- package/bnf/data/module.f.js +85 -0
- package/bnf/data/test.f.d.ts +4 -0
- package/bnf/data/test.f.js +8 -0
- package/bnf/module.f.d.ts +55 -0
- package/bnf/module.f.js +98 -0
- package/bnf/test.f.d.ts +4 -0
- package/bnf/test.f.js +7 -0
- package/bnf/testlib.f.d.ts +3 -0
- package/bnf/{tag/test.f.js → testlib.f.js} +48 -44
- package/crypto/hmac/module.f.d.ts +28 -0
- package/crypto/hmac/module.f.js +60 -0
- package/crypto/hmac/test.f.d.ts +6 -0
- package/crypto/hmac/test.f.js +24 -0
- package/crypto/secp/module.f.d.ts +8 -0
- package/crypto/secp/module.f.js +30 -0
- package/crypto/secp/test.f.d.ts +3 -0
- package/crypto/secp/test.f.js +67 -3
- package/crypto/sha2/module.f.d.ts +10 -0
- package/crypto/sha2/module.f.js +11 -2
- package/crypto/sha2/test.f.js +28 -21
- package/djs/ast/module.f.d.ts +10 -0
- package/djs/ast/module.f.js +52 -0
- package/djs/ast/test.f.d.ts +8 -0
- package/djs/ast/test.f.js +41 -0
- package/djs/module.f.d.ts +5 -15
- package/djs/module.f.js +1 -62
- package/djs/parser/module.f.d.ts +9 -9
- package/djs/parser/module.f.js +7 -8
- package/djs/parser/test.f.js +97 -97
- package/djs/serializer/module.f.d.ts +8 -4
- package/djs/serializer/module.f.js +9 -27
- package/djs/{test.f.d.ts → serializer/test.f.d.ts} +3 -3
- package/djs/{test.f.js → serializer/test.f.js} +13 -13
- package/djs/tokenizer/test.f.js +2 -2
- package/djs/transpiler/module.f.d.ts +15 -0
- package/djs/transpiler/module.f.js +57 -0
- package/djs/transpiler/test.f.d.ts +8 -0
- package/djs/transpiler/test.f.js +74 -0
- package/io/module.f.d.ts +12 -0
- package/io/virtual-io.f.d.ts +3 -0
- package/io/virtual-io.f.js +10 -0
- package/js/tokenizer/test.f.js +2 -2
- package/json/module.f.d.ts +2 -1
- package/json/tokenizer/test.f.js +2 -2
- package/nanvm-lib/tests/test.f.js +4 -4
- package/package.json +2 -2
- package/path/module.f.d.ts +2 -0
- package/path/module.f.js +34 -0
- package/path/test.f.d.ts +5 -0
- package/path/test.f.js +49 -0
- package/text/sgr/module.f.d.ts +19 -1
- package/text/sgr/module.f.js +26 -1
- package/text/sgr/test.f.d.ts +2 -0
- package/text/sgr/test.f.js +8 -0
- package/text/utf16/module.f.d.ts +116 -0
- package/text/utf16/module.f.js +285 -0
- package/text/utf16/test.f.d.ts +4 -0
- package/text/utf16/test.f.js +28 -0
- package/types/bigint/module.f.d.ts +83 -2
- package/types/bigint/module.f.js +98 -2
- package/types/bigint/test.f.d.ts +4 -0
- package/types/bigint/test.f.js +66 -6
- package/types/bit_vec/module.f.d.ts +1 -1
- package/types/monoid/module.f.d.ts +3 -1
- package/types/monoid/module.f.js +2 -0
- package/types/number/module.f.d.ts +4 -0
- package/types/number/module.f.js +16 -0
- package/types/number/test.f.d.ts +1 -0
- package/types/number/test.f.js +19 -3
- package/types/object/module.f.d.ts +18 -0
- package/types/object/module.f.js +1 -1
- package/bnf/tag/module.f.d.ts +0 -30
- package/bnf/tag/module.f.js +0 -37
- /package/{bnf/tag/test.f.d.ts → io/module.f.js} +0 -0
package/text/utf16/module.f.d.ts
CHANGED
|
@@ -1,14 +1,130 @@
|
|
|
1
1
|
import { type List, type Thunk } from '../../types/list/module.f.ts';
|
|
2
|
+
/**
|
|
3
|
+
* Represent an unsigned UTF16, used to store one word UTF-16 (code unit).
|
|
4
|
+
*/
|
|
2
5
|
export type U16 = number;
|
|
3
6
|
/**
|
|
4
7
|
* [0, 0x10_FFFF]: 16+5 = 21 bits
|
|
5
8
|
*
|
|
6
9
|
* 121_0000_0000: 16+16+9 = 41 bits
|
|
7
10
|
*/
|
|
11
|
+
/**
|
|
12
|
+
* Represent an Unicode code point.
|
|
13
|
+
* Has range: from 0x0000 to 0x10_FFFF (21 bits).
|
|
14
|
+
*/
|
|
8
15
|
export type CodePoint = number;
|
|
16
|
+
/**
|
|
17
|
+
* Converts a UTF-16 sequence to its corresponding Unicode code points.
|
|
18
|
+
*
|
|
19
|
+
* This function handles:
|
|
20
|
+
* 1. Single U16 values in the Basic Multilingual Plane (BMP) [0x0000–0xFFFF].
|
|
21
|
+
* 2. Surrogate pairs representing code points in the Supplementary Plane [0x10000–0x10FFFF].
|
|
22
|
+
* 3. Invalid input sequences by applying an error mask to the resulting code point.
|
|
23
|
+
*
|
|
24
|
+
* @param utf16 - A list of UTF-16 code units (U16) to convert.
|
|
25
|
+
* @returns A list of Unicode code points. Each code point corresponds to one or more U16
|
|
26
|
+
* values in the input. Invalid sequences are marked with the `errorMask`.
|
|
27
|
+
* @example
|
|
28
|
+
*
|
|
29
|
+
* ```ts
|
|
30
|
+
* const exampleUtf16: List<U16> = [
|
|
31
|
+
* 0x0041, // 'A' (BMP, single U16)
|
|
32
|
+
* 0xD83D, 0xDE00, // 😀 (Emoji, surrogate pair)
|
|
33
|
+
* 0xD800, // Unpaired high surrogate
|
|
34
|
+
* 0xDC00, // Unpaired low surrogate
|
|
35
|
+
* ]
|
|
36
|
+
*
|
|
37
|
+
* const codePoints = toCodePointList(exampleUtf16)
|
|
38
|
+
* codePoints.forEach((codePoint) => {
|
|
39
|
+
* if (codePoint & errorMask) {
|
|
40
|
+
* console.log(`Invalid sequence detected: ${codePoint.toString(16).toUpperCase()}`)
|
|
41
|
+
* } else {
|
|
42
|
+
* console.log(`Code Point: U+${codePoint.toString(16).toUpperCase()}`)
|
|
43
|
+
* }
|
|
44
|
+
* })
|
|
45
|
+
* ```
|
|
46
|
+
*/
|
|
9
47
|
export declare const fromCodePointList: (input: List<CodePoint>) => Thunk<U16>;
|
|
48
|
+
/**
|
|
49
|
+
* Converts a list of UTF-16 code units to a list of Unicode code points (CodePoint).
|
|
50
|
+
* This function processes each UTF-16 code unit, decoding them into their corresponding Unicode code points.
|
|
51
|
+
* The input list of `U16` values may represent characters in the Basic Multilingual Plane (BMP) or supplementary planes,
|
|
52
|
+
* with surrogate pairs handled correctly. The function also handles EOF (`null`).
|
|
53
|
+
* @param input - A list of UTF-16 code units (`U16`), possibly containing surrogate pairs.
|
|
54
|
+
* @returns A list of Unicode code points (`CodePoint`), one for each valid code unit or surrogate pair.
|
|
55
|
+
*
|
|
56
|
+
* @example
|
|
57
|
+
*
|
|
58
|
+
* ```ts
|
|
59
|
+
* const utf16List: List<U16> = [0x0041, 0xD83D, 0xDE00] // 'A' and 😀 (surrogate pair)
|
|
60
|
+
* const codePoints = toCodePointList(utf16List)
|
|
61
|
+
* ```
|
|
62
|
+
*/
|
|
10
63
|
export declare const toCodePointList: (input: List<U16>) => List<CodePoint>;
|
|
64
|
+
/**
|
|
65
|
+
* Converts a string to a list of UTF-16 code units (U16).
|
|
66
|
+
*
|
|
67
|
+
* This function processes each character in the input string and converts it to its corresponding UTF-16 code unit(s).
|
|
68
|
+
* Characters in the Basic Multilingual Plane (BMP) will produce a single `U16`, while supplementary plane characters
|
|
69
|
+
* (those requiring surrogate pairs) will produce two `U16` values.
|
|
70
|
+
* @param s - The input string to convert to UTF-16 code units.
|
|
71
|
+
* @returns A list of UTF-16 code units (`U16`) representing the string.
|
|
72
|
+
*
|
|
73
|
+
* @example
|
|
74
|
+
*
|
|
75
|
+
* ```js
|
|
76
|
+
* const inputString = "Hello, 😀"
|
|
77
|
+
* const utf16List = stringToList(inputString)
|
|
78
|
+
* ```
|
|
79
|
+
*/
|
|
11
80
|
export declare const stringToList: (s: string) => List<U16>;
|
|
81
|
+
/**
|
|
82
|
+
* Converts a string to a list of Unicode code points (CodePoint).
|
|
83
|
+
* This function first converts the string to a list of UTF-16 code units (U16) using `stringToList`,
|
|
84
|
+
* then it converts the UTF-16 code units to Unicode code points using `toCodePointList`. This is useful for handling
|
|
85
|
+
* Unicode characters, including supplementary characters represented by surrogate pairs in UTF-16.
|
|
86
|
+
*
|
|
87
|
+
* @param input - The input string to convert.
|
|
88
|
+
* @returns A list of Unicode code points (`CodePoint`) corresponding to the characters in the string.
|
|
89
|
+
*
|
|
90
|
+
* @example
|
|
91
|
+
*
|
|
92
|
+
* ```js
|
|
93
|
+
* const inputString = "Hello, 😀"
|
|
94
|
+
* const codePoints = stringToCodePointList(inputString)
|
|
95
|
+
* ```
|
|
96
|
+
*/
|
|
12
97
|
export declare const stringToCodePointList: (input: string) => List<CodePoint>;
|
|
98
|
+
/**
|
|
99
|
+
* Converts a list of UTF-16 code units (U16) to a string.
|
|
100
|
+
* This function takes a list of `U16` values (UTF-16 code units) and reconstructs the original string by mapping
|
|
101
|
+
* each code unit back to its character using `String.fromCharCode`. The resulting characters are concatenated
|
|
102
|
+
* to form the final string.
|
|
103
|
+
*
|
|
104
|
+
* @param input - A list of UTF-16 code units (`U16`).
|
|
105
|
+
* @returns A string representing the characters encoded by the input UTF-16 code units.
|
|
106
|
+
*
|
|
107
|
+
* @example
|
|
108
|
+
*
|
|
109
|
+
* ```ts
|
|
110
|
+
* const utf16List: List<U16> = [0x0041, 0x0042, 0x0043] // 'ABC'
|
|
111
|
+
* const outputString = listToString(utf16List)
|
|
112
|
+
* ```
|
|
113
|
+
*/
|
|
13
114
|
export declare const listToString: (input: List<U16>) => string;
|
|
115
|
+
/**
|
|
116
|
+
* Converts a list of Unicode code points (CodePoint) to a string.
|
|
117
|
+
* This function first converts the list of Unicode code points to a list of UTF-16 code units using `fromCodePointList`,
|
|
118
|
+
* then it uses `listToString` to reconstruct the string from the UTF-16 code units.
|
|
119
|
+
*
|
|
120
|
+
* @param input - A list of Unicode code points (`CodePoint`).
|
|
121
|
+
* @returns A string representing the characters encoded by the input code points.
|
|
122
|
+
*
|
|
123
|
+
* @example
|
|
124
|
+
*
|
|
125
|
+
* ```ts
|
|
126
|
+
* const codePoints: List<CodePoint> = [0x48, 0x65, 0x6C, 0x6C, 0x6F]
|
|
127
|
+
* const outputString = codePointListToString(codePoints)
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
14
130
|
export declare const codePointListToString: (input: List<CodePoint>) => string;
|
package/text/utf16/module.f.js
CHANGED
|
@@ -2,13 +2,67 @@ import { map, flat, stateScan, reduce, flatMap, empty, } from "../../types/list/
|
|
|
2
2
|
import { concat } from "../../types/function/operator/module.f.js";
|
|
3
3
|
import { contains } from "../../types/range/module.f.js";
|
|
4
4
|
import { fn } from "../../types/function/module.f.js";
|
|
5
|
+
/**
|
|
6
|
+
* Ranges of code points for the lower (Low) and higher (High) parts of the BMP (Basic Multilingual Plane) plane.
|
|
7
|
+
*/
|
|
5
8
|
const lowBmp = contains([0x0000, 0xd7ff]);
|
|
6
9
|
const highBmp = contains([0xe000, 0xffff]);
|
|
10
|
+
/**
|
|
11
|
+
* Checks whether the code point is in the BMP range.
|
|
12
|
+
* BMP is the main multi-plane Unicode plane that covers code points 0x0000 - 0xFFFF, except for the range of surrogates.
|
|
13
|
+
*/
|
|
7
14
|
const isBmpCodePoint = (codePoint) => lowBmp(codePoint) || highBmp(codePoint);
|
|
15
|
+
/**
|
|
16
|
+
* Checks whether the 16-bit word (U16) is a surrogate of the high part.
|
|
17
|
+
* Range: 0xD800 - 0xDBFF.
|
|
18
|
+
*/
|
|
8
19
|
const isHighSurrogate = contains([0xd800, 0xdbff]);
|
|
20
|
+
/**
|
|
21
|
+
* Checks whether the 16-bit word (U16) is a substitute for the low part.
|
|
22
|
+
* Range: 0xDC00 – 0xDFFF.
|
|
23
|
+
*/
|
|
9
24
|
const isLowSurrogate = contains([0xdc00, 0xdfff]);
|
|
25
|
+
/**
|
|
26
|
+
* Mask of mistakes. Used to indicate invalid code points or coding errors
|
|
27
|
+
*/
|
|
10
28
|
const errorMask = 0b1000_0000_0000_0000_0000_0000_0000_0000;
|
|
29
|
+
/**
|
|
30
|
+
* Checks whether the code point belongs to the additional (Supplementary) plane of Unicode.
|
|
31
|
+
* Additional planes include code points from 0x010000 to 0x10FFFF.
|
|
32
|
+
*/
|
|
11
33
|
const isSupplementaryPlane = contains([0x01_0000, 0x10_ffff]);
|
|
34
|
+
/**
|
|
35
|
+
* Converts a Unicode code point to its corresponding UTF-16 representation.
|
|
36
|
+
*
|
|
37
|
+
* This function handles:
|
|
38
|
+
* 1. Code points in the Basic Multilingual Plane (BMP) [0x0000–0xFFFF],
|
|
39
|
+
* which map directly to a single 16-bit value (U16).
|
|
40
|
+
* 2. Supplementary Plane code points [0x10000–0x10FFFF],
|
|
41
|
+
* which are represented as surrogate pairs in UTF-16.
|
|
42
|
+
*
|
|
43
|
+
* @param codePoint - A valid Unicode code point.
|
|
44
|
+
* @returns A list of 16-bit unsigned integers (U16) representing the UTF-16 encoding
|
|
45
|
+
* of the input code point. If the code point is in the BMP range, a single U16
|
|
46
|
+
* value is returned. For code points in the supplementary planes, two U16
|
|
47
|
+
* values (a high and a low surrogate) are returned.
|
|
48
|
+
* @example
|
|
49
|
+
*
|
|
50
|
+
* ```ts
|
|
51
|
+
* const exampleCodePoints: List<CodePoint> = [
|
|
52
|
+
* 0x0041, // 'A' (BMP, single U16)
|
|
53
|
+
* 0x1F600, // 😀 (Emoji, supplementary plane, surrogate pair)
|
|
54
|
+
* 0xD7FF, // Last code point in the low BMP range
|
|
55
|
+
* 0xE000, // First code point in the high BMP range
|
|
56
|
+
* 0x10FFFF, // Maximum valid code point in Unicode
|
|
57
|
+
* 0x110000, // Invalid code point (outside Unicode range)
|
|
58
|
+
* ]
|
|
59
|
+
* exampleCodePoints.forEach((codePoint) => {
|
|
60
|
+
* const utf16Result = codePointToUtf16(codePoint)
|
|
61
|
+
* console.log(`Code Point: U+${codePoint.toString(16).toUpperCase()}`)
|
|
62
|
+
* console.log(`UTF-16: ${utf16Result.map(u16 => u16.toString(16).toUpperCase()).join(' ')}`)
|
|
63
|
+
* })
|
|
64
|
+
* ```
|
|
65
|
+
*/
|
|
12
66
|
const codePointToUtf16 = (codePoint) => {
|
|
13
67
|
if (isBmpCodePoint(codePoint)) {
|
|
14
68
|
return [codePoint];
|
|
@@ -21,8 +75,92 @@ const codePointToUtf16 = (codePoint) => {
|
|
|
21
75
|
}
|
|
22
76
|
return [codePoint & 0xffff];
|
|
23
77
|
};
|
|
78
|
+
/**
|
|
79
|
+
* Converts a UTF-16 sequence to its corresponding Unicode code points.
|
|
80
|
+
*
|
|
81
|
+
* This function handles:
|
|
82
|
+
* 1. Single U16 values in the Basic Multilingual Plane (BMP) [0x0000–0xFFFF].
|
|
83
|
+
* 2. Surrogate pairs representing code points in the Supplementary Plane [0x10000–0x10FFFF].
|
|
84
|
+
* 3. Invalid input sequences by applying an error mask to the resulting code point.
|
|
85
|
+
*
|
|
86
|
+
* @param utf16 - A list of UTF-16 code units (U16) to convert.
|
|
87
|
+
* @returns A list of Unicode code points. Each code point corresponds to one or more U16
|
|
88
|
+
* values in the input. Invalid sequences are marked with the `errorMask`.
|
|
89
|
+
* @example
|
|
90
|
+
*
|
|
91
|
+
* ```ts
|
|
92
|
+
* const exampleUtf16: List<U16> = [
|
|
93
|
+
* 0x0041, // 'A' (BMP, single U16)
|
|
94
|
+
* 0xD83D, 0xDE00, // 😀 (Emoji, surrogate pair)
|
|
95
|
+
* 0xD800, // Unpaired high surrogate
|
|
96
|
+
* 0xDC00, // Unpaired low surrogate
|
|
97
|
+
* ]
|
|
98
|
+
*
|
|
99
|
+
* const codePoints = toCodePointList(exampleUtf16)
|
|
100
|
+
* codePoints.forEach((codePoint) => {
|
|
101
|
+
* if (codePoint & errorMask) {
|
|
102
|
+
* console.log(`Invalid sequence detected: ${codePoint.toString(16).toUpperCase()}`)
|
|
103
|
+
* } else {
|
|
104
|
+
* console.log(`Code Point: U+${codePoint.toString(16).toUpperCase()}`)
|
|
105
|
+
* }
|
|
106
|
+
* })
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
24
109
|
export const fromCodePointList = flatMap(codePointToUtf16);
|
|
110
|
+
/**
|
|
111
|
+
* Validates whether a given 16-bit unsigned integer (U16) falls within the valid range for UTF-16 code units.
|
|
112
|
+
*
|
|
113
|
+
* UTF-16 uses 16-bit code units to encode characters. The valid range for these code units is [0x0000, 0xFFFF].
|
|
114
|
+
* This function is used to verify that a number is within this range.
|
|
115
|
+
*
|
|
116
|
+
* @param i - A 16-bit unsigned integer (U16) to validate.
|
|
117
|
+
* @returns A boolean value indicating whether the input is a valid UTF-16 code unit.
|
|
118
|
+
*
|
|
119
|
+
* @example
|
|
120
|
+
*
|
|
121
|
+
* ```ts
|
|
122
|
+
* const validU16 = u16(0x0041) // true: U+0041 ('A')
|
|
123
|
+
* const invalidU16 = u16(0x110000) // false: Value is outside the valid range
|
|
124
|
+
* const edgeCaseLow = u16(0x0000) // true: Minimum valid value for UTF-16
|
|
125
|
+
* const edgeCaseHigh = u16(0xFFFF) // true: Maximum valid value for UTF-16
|
|
126
|
+
* ```
|
|
127
|
+
*/
|
|
25
128
|
const u16 = contains([0x0000, 0xFFFF]);
|
|
129
|
+
/**
|
|
130
|
+
* A stateful operation that converts a UTF-16 word (U16) to a list of Unicode code points (CodePoint),
|
|
131
|
+
* while maintaining the state of surrogate pair decoding.
|
|
132
|
+
*
|
|
133
|
+
* This operation processes UTF-16 code units and decodes them into Unicode code points. It handles:
|
|
134
|
+
* 1. BMP code points (single U16).
|
|
135
|
+
* 2. Surrogate pairs (two U16s representing a single code point in the supplementary planes).
|
|
136
|
+
* 3. Invalid or malformed code units.
|
|
137
|
+
*
|
|
138
|
+
* It also manages the internal state, which is necessary to handle surrogate pairs. If the state is null,
|
|
139
|
+
* it expects a valid BMP code point or a high surrogate. If the state is not null, it processes a low surrogate
|
|
140
|
+
* and combines the pair into a single supplementary code point.
|
|
141
|
+
*
|
|
142
|
+
* @param state - The current state of the UTF-16 decoding operation.
|
|
143
|
+
* This can be either `null` (no state) or the previous high surrogate value.
|
|
144
|
+
* @param word - The current UTF-16 word (U16) to process.
|
|
145
|
+
* @returns A tuple where the first element is a list of decoded Unicode code points (`CodePoint`), and
|
|
146
|
+
* the second element is the updated state. If the word is invalid, an error code `0xffffffff` is returned.
|
|
147
|
+
*
|
|
148
|
+
* @example
|
|
149
|
+
*
|
|
150
|
+
* ```ts
|
|
151
|
+
* const state: Utf16State = null;
|
|
152
|
+
* const word: U16 = 0xD83D; // High surrogate for 😀 emoji
|
|
153
|
+
* const [decodedCodePoints, newState] = utf16ByteToCodePointOp(state)(word);
|
|
154
|
+
* ```
|
|
155
|
+
*
|
|
156
|
+
* @example
|
|
157
|
+
*
|
|
158
|
+
* ```ts
|
|
159
|
+
* const state: Utf16State = 0xD83D; // High surrogate already stored
|
|
160
|
+
* const word: U16 = 0xDC00; // Low surrogate for 😀 emoji
|
|
161
|
+
* const [decodedCodePoints, newState] = utf16ByteToCodePointOp(state)(word);
|
|
162
|
+
* ```
|
|
163
|
+
*/
|
|
26
164
|
const utf16ByteToCodePointOp = state => word => {
|
|
27
165
|
if (!u16(word)) {
|
|
28
166
|
return [[0xffffffff], state];
|
|
@@ -49,10 +187,110 @@ const utf16ByteToCodePointOp = state => word => {
|
|
|
49
187
|
}
|
|
50
188
|
return [[state | errorMask, word | errorMask], null];
|
|
51
189
|
};
|
|
190
|
+
/**
|
|
191
|
+
* Handles the EOF (end-of-file) condition during UTF-16 decoding.
|
|
192
|
+
*
|
|
193
|
+
* If there is no pending state (`state === null`), it simply returns an empty list
|
|
194
|
+
* of code points, indicating no further input to process. If there is a pending state,
|
|
195
|
+
* it is treated as an unpaired surrogate, and the `errorMask` is applied to flag
|
|
196
|
+
* the invalid sequence.
|
|
197
|
+
*
|
|
198
|
+
* @param state - The current UTF-16 decoding state. This can be:
|
|
199
|
+
* - `null`: No pending surrogate to process.
|
|
200
|
+
* - A high surrogate (0xD800–0xDBFF) left from an earlier input, waiting for a low surrogate.
|
|
201
|
+
* @returns A tuple:
|
|
202
|
+
* - The first element is a list of code points. If there’s a pending state, it is returned
|
|
203
|
+
* with the `errorMask` applied.
|
|
204
|
+
* - The second element is the next state, which will always be `null` because EOF means no
|
|
205
|
+
* further processing.
|
|
206
|
+
*
|
|
207
|
+
* @example
|
|
208
|
+
*
|
|
209
|
+
* ```js
|
|
210
|
+
* const eofState = utf16EofToCodePointOp(0xD800) // Unpaired high surrogate
|
|
211
|
+
* const validState = utf16EofToCodePointOp(null) // No pending state
|
|
212
|
+
* ```
|
|
213
|
+
*/
|
|
52
214
|
const utf16EofToCodePointOp = (state) => [state === null ? empty : [state | errorMask], null];
|
|
215
|
+
/**
|
|
216
|
+
* A stateful scan operation that processes UTF-16 input (word or EOF).
|
|
217
|
+
* This function determines whether to handle a UTF-16 word or an end-of-file (EOF)
|
|
218
|
+
* signal during decoding:
|
|
219
|
+
* 1. If the input is `null` (EOF), it calls `utf16EofToCodePointOp` to process
|
|
220
|
+
* any remaining state.
|
|
221
|
+
* 2. If the input is a valid UTF-16 word, it calls `utf16ByteToCodePointOp` to
|
|
222
|
+
* process the word and update the state accordingly.
|
|
223
|
+
* @param state - The current state in the UTF-16 decoding process:
|
|
224
|
+
* - `null`: No pending surrogate.
|
|
225
|
+
* - A high surrogate waiting for a low surrogate.
|
|
226
|
+
* @param input - The current UTF-16 word to process, or `null` to signal EOF.
|
|
227
|
+
* @returns A tuple:
|
|
228
|
+
* - A list of decoded code points (if any).
|
|
229
|
+
* - The updated decoding state.
|
|
230
|
+
*
|
|
231
|
+
* @example
|
|
232
|
+
*
|
|
233
|
+
* ```ts
|
|
234
|
+
* // Example 1: Process a valid UTF-16 word
|
|
235
|
+
* const input1 = 0x0041 // 'A' (BMP code point)
|
|
236
|
+
* const result1 = utf16ByteOrEofToCodePointOp(null)(input1)
|
|
237
|
+
* console.log(result1) // [[0x0041], null]
|
|
238
|
+
* // Example 2: Process a high surrogate, followed by EOF
|
|
239
|
+
* const input2 = 0xD83D // High surrogate
|
|
240
|
+
* const result2 = utf16ByteOrEofToCodePointOp(null)(input2)
|
|
241
|
+
* console.log(result2) // [[], 0xD83D] (waiting for a low surrogate)
|
|
242
|
+
* const eofResult = utf16ByteOrEofToCodePointOp(0xD83D)(null)
|
|
243
|
+
* console.log(eofResult) // [[0xD83D | errorMask], null] (unpaired high surrogate)
|
|
244
|
+
* // Example 3: Handle EOF with no pending state
|
|
245
|
+
* const eofResult2 = utf16ByteOrEofToCodePointOp(null)(null)
|
|
246
|
+
* ```
|
|
247
|
+
*/
|
|
53
248
|
const utf16ByteOrEofToCodePointOp = state => input => input === null ? utf16EofToCodePointOp(state) : utf16ByteToCodePointOp(state)(input);
|
|
249
|
+
/**
|
|
250
|
+
* Represents an end-of-file (EOF) indicator in a list of UTF-16 code units.
|
|
251
|
+
*
|
|
252
|
+
* This list contains a single element, `null`, which is used to signal the end
|
|
253
|
+
* of input during UTF-16 decoding operations.
|
|
254
|
+
* @example
|
|
255
|
+
*
|
|
256
|
+
* ```ts
|
|
257
|
+
* const input = [...utf16Data, ...eofList]
|
|
258
|
+
* // Ensures the EOF is handled during processing.
|
|
259
|
+
* ```
|
|
260
|
+
*/
|
|
54
261
|
const eofList = [null];
|
|
262
|
+
/**
|
|
263
|
+
* Converts a list of UTF-16 code units to a list of Unicode code points (CodePoint).
|
|
264
|
+
* This function processes each UTF-16 code unit, decoding them into their corresponding Unicode code points.
|
|
265
|
+
* The input list of `U16` values may represent characters in the Basic Multilingual Plane (BMP) or supplementary planes,
|
|
266
|
+
* with surrogate pairs handled correctly. The function also handles EOF (`null`).
|
|
267
|
+
* @param input - A list of UTF-16 code units (`U16`), possibly containing surrogate pairs.
|
|
268
|
+
* @returns A list of Unicode code points (`CodePoint`), one for each valid code unit or surrogate pair.
|
|
269
|
+
*
|
|
270
|
+
* @example
|
|
271
|
+
*
|
|
272
|
+
* ```ts
|
|
273
|
+
* const utf16List: List<U16> = [0x0041, 0xD83D, 0xDE00] // 'A' and 😀 (surrogate pair)
|
|
274
|
+
* const codePoints = toCodePointList(utf16List)
|
|
275
|
+
* ```
|
|
276
|
+
*/
|
|
55
277
|
export const toCodePointList = (input) => flat(stateScan(utf16ByteOrEofToCodePointOp)(null)(flat([input, eofList])));
|
|
278
|
+
/**
|
|
279
|
+
* Converts a string to a list of UTF-16 code units (U16).
|
|
280
|
+
*
|
|
281
|
+
* This function processes each character in the input string and converts it to its corresponding UTF-16 code unit(s).
|
|
282
|
+
* Characters in the Basic Multilingual Plane (BMP) will produce a single `U16`, while supplementary plane characters
|
|
283
|
+
* (those requiring surrogate pairs) will produce two `U16` values.
|
|
284
|
+
* @param s - The input string to convert to UTF-16 code units.
|
|
285
|
+
* @returns A list of UTF-16 code units (`U16`) representing the string.
|
|
286
|
+
*
|
|
287
|
+
* @example
|
|
288
|
+
*
|
|
289
|
+
* ```js
|
|
290
|
+
* const inputString = "Hello, 😀"
|
|
291
|
+
* const utf16List = stringToList(inputString)
|
|
292
|
+
* ```
|
|
293
|
+
*/
|
|
56
294
|
export const stringToList = (s) => {
|
|
57
295
|
const at = (i) => {
|
|
58
296
|
const first = s.charCodeAt(i);
|
|
@@ -60,8 +298,55 @@ export const stringToList = (s) => {
|
|
|
60
298
|
};
|
|
61
299
|
return at(0);
|
|
62
300
|
};
|
|
301
|
+
/**
|
|
302
|
+
* Converts a string to a list of Unicode code points (CodePoint).
|
|
303
|
+
* This function first converts the string to a list of UTF-16 code units (U16) using `stringToList`,
|
|
304
|
+
* then it converts the UTF-16 code units to Unicode code points using `toCodePointList`. This is useful for handling
|
|
305
|
+
* Unicode characters, including supplementary characters represented by surrogate pairs in UTF-16.
|
|
306
|
+
*
|
|
307
|
+
* @param input - The input string to convert.
|
|
308
|
+
* @returns A list of Unicode code points (`CodePoint`) corresponding to the characters in the string.
|
|
309
|
+
*
|
|
310
|
+
* @example
|
|
311
|
+
*
|
|
312
|
+
* ```js
|
|
313
|
+
* const inputString = "Hello, 😀"
|
|
314
|
+
* const codePoints = stringToCodePointList(inputString)
|
|
315
|
+
* ```
|
|
316
|
+
*/
|
|
63
317
|
export const stringToCodePointList = (input) => toCodePointList(stringToList(input));
|
|
318
|
+
/**
|
|
319
|
+
* Converts a list of UTF-16 code units (U16) to a string.
|
|
320
|
+
* This function takes a list of `U16` values (UTF-16 code units) and reconstructs the original string by mapping
|
|
321
|
+
* each code unit back to its character using `String.fromCharCode`. The resulting characters are concatenated
|
|
322
|
+
* to form the final string.
|
|
323
|
+
*
|
|
324
|
+
* @param input - A list of UTF-16 code units (`U16`).
|
|
325
|
+
* @returns A string representing the characters encoded by the input UTF-16 code units.
|
|
326
|
+
*
|
|
327
|
+
* @example
|
|
328
|
+
*
|
|
329
|
+
* ```ts
|
|
330
|
+
* const utf16List: List<U16> = [0x0041, 0x0042, 0x0043] // 'ABC'
|
|
331
|
+
* const outputString = listToString(utf16List)
|
|
332
|
+
* ```
|
|
333
|
+
*/
|
|
64
334
|
export const listToString = fn(map(String.fromCharCode))
|
|
65
335
|
.then(reduce(concat)(''))
|
|
66
336
|
.result;
|
|
337
|
+
/**
|
|
338
|
+
* Converts a list of Unicode code points (CodePoint) to a string.
|
|
339
|
+
* This function first converts the list of Unicode code points to a list of UTF-16 code units using `fromCodePointList`,
|
|
340
|
+
* then it uses `listToString` to reconstruct the string from the UTF-16 code units.
|
|
341
|
+
*
|
|
342
|
+
* @param input - A list of Unicode code points (`CodePoint`).
|
|
343
|
+
* @returns A string representing the characters encoded by the input code points.
|
|
344
|
+
*
|
|
345
|
+
* @example
|
|
346
|
+
*
|
|
347
|
+
* ```ts
|
|
348
|
+
* const codePoints: List<CodePoint> = [0x48, 0x65, 0x6C, 0x6C, 0x6F]
|
|
349
|
+
* const outputString = codePointListToString(codePoints)
|
|
350
|
+
* ```
|
|
351
|
+
*/
|
|
67
352
|
export const codePointListToString = (input) => listToString(fromCodePointList(input));
|
package/text/utf16/test.f.d.ts
CHANGED
|
@@ -2,5 +2,9 @@ declare const _default: {
|
|
|
2
2
|
toCodePointList: (() => void)[];
|
|
3
3
|
fromCodePointList: (() => void)[];
|
|
4
4
|
string: (() => void)[];
|
|
5
|
+
stringToList: (() => void)[];
|
|
6
|
+
listToString: (() => void)[];
|
|
7
|
+
stringToCodePointList: (() => void)[];
|
|
8
|
+
codePointListToString: (() => void)[];
|
|
5
9
|
};
|
|
6
10
|
export default _default;
|
package/text/utf16/test.f.js
CHANGED
|
@@ -146,5 +146,33 @@ export default {
|
|
|
146
146
|
throw result;
|
|
147
147
|
}
|
|
148
148
|
}
|
|
149
|
+
],
|
|
150
|
+
stringToList: [
|
|
151
|
+
() => {
|
|
152
|
+
const inputString = "Hello, i like js";
|
|
153
|
+
const utf16List = stringToList(inputString);
|
|
154
|
+
},
|
|
155
|
+
() => {
|
|
156
|
+
const inputString = "😇🤬🫥😑🫠";
|
|
157
|
+
const utf16List = stringToList(inputString);
|
|
158
|
+
}
|
|
159
|
+
],
|
|
160
|
+
listToString: [
|
|
161
|
+
() => {
|
|
162
|
+
const utf16List = [0x0041, 0x0042, 0x0043];
|
|
163
|
+
const outputString = listToString(utf16List);
|
|
164
|
+
}
|
|
165
|
+
],
|
|
166
|
+
stringToCodePointList: [
|
|
167
|
+
() => {
|
|
168
|
+
const inputString = "Hello, 😀";
|
|
169
|
+
const codePoints = stringToCodePointList(inputString);
|
|
170
|
+
}
|
|
171
|
+
],
|
|
172
|
+
codePointListToString: [
|
|
173
|
+
() => {
|
|
174
|
+
const codePoints = [0x48, 0x65, 0x6C, 0x6C, 0x6F];
|
|
175
|
+
const outputString = codePointListToString(codePoints);
|
|
176
|
+
}
|
|
149
177
|
]
|
|
150
178
|
};
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/**
|
|
2
|
-
*
|
|
2
|
+
* Utility functions for working with `bigint` values.
|
|
3
3
|
*
|
|
4
4
|
* @module
|
|
5
5
|
*
|
|
@@ -14,17 +14,67 @@
|
|
|
14
14
|
* const bitCount = bitLength(255n) // 8n
|
|
15
15
|
* const bitmask = mask(5n) // 31n
|
|
16
16
|
* const m = min(3n)(13n) // 3n
|
|
17
|
+
* const c = combination([3n, 2n, 1n]) // 60n
|
|
17
18
|
* ```
|
|
18
19
|
*/
|
|
19
20
|
import { type Sign } from '../function/compare/module.f.ts';
|
|
20
21
|
import type * as Operator from '../function/operator/module.f.ts';
|
|
21
22
|
import { type List } from '../list/module.f.ts';
|
|
23
|
+
/**
|
|
24
|
+
* Type representing a unary operation on `bigint`.
|
|
25
|
+
*/
|
|
22
26
|
export type Unary = Operator.Unary<bigint, bigint>;
|
|
27
|
+
/**
|
|
28
|
+
* Type representing a reduction operation on `bigint` values.
|
|
29
|
+
*/
|
|
23
30
|
export type Reduce = Operator.Reduce<bigint>;
|
|
31
|
+
/**
|
|
32
|
+
* Adds two `bigint` values.
|
|
33
|
+
*
|
|
34
|
+
* @param a - The first bigint value.
|
|
35
|
+
* @returns A function that takes the second bigint value and returns the sum.
|
|
36
|
+
*/
|
|
24
37
|
export declare const addition: Reduce;
|
|
38
|
+
/**
|
|
39
|
+
* Calculates the sum of a list of `bigint` values.
|
|
40
|
+
*
|
|
41
|
+
* @param input - A list of bigint values.
|
|
42
|
+
* @returns The sum of all values in the list.
|
|
43
|
+
*/
|
|
25
44
|
export declare const sum: (input: List<bigint>) => bigint;
|
|
45
|
+
/**
|
|
46
|
+
* Multiplies two `bigint` values.
|
|
47
|
+
*
|
|
48
|
+
* @param a - The first bigint value.
|
|
49
|
+
* @returns A function that takes the second bigint value and returns the product.
|
|
50
|
+
*/
|
|
51
|
+
export declare const multiple: Reduce;
|
|
52
|
+
/**
|
|
53
|
+
* Calculates the product of a list of `bigint` values.
|
|
54
|
+
*
|
|
55
|
+
* @param input - A list of bigint values.
|
|
56
|
+
* @returns The product of all values in the list.
|
|
57
|
+
*/
|
|
58
|
+
export declare const product: (input: List<bigint>) => bigint;
|
|
59
|
+
/**
|
|
60
|
+
* Calculates the absolute value of a `bigint`.
|
|
61
|
+
*
|
|
62
|
+
* @param a - The bigint value.
|
|
63
|
+
* @returns The absolute value of the input bigint.
|
|
64
|
+
*/
|
|
26
65
|
export declare const abs: Unary;
|
|
66
|
+
/**
|
|
67
|
+
* Determines the sign of a `bigint`.
|
|
68
|
+
* @param a - The bigint value.
|
|
69
|
+
* @returns `1` if positive, `-1` if negative, and `0` if zero.
|
|
70
|
+
*/
|
|
27
71
|
export declare const sign: (a: bigint) => Sign;
|
|
72
|
+
/**
|
|
73
|
+
* Serializes a `bigint` to a string representation.
|
|
74
|
+
*
|
|
75
|
+
* @param a - The bigint value.
|
|
76
|
+
* @returns A string representation of the bigint (e.g., '123n').
|
|
77
|
+
*/
|
|
28
78
|
export declare const serialize: (a: bigint) => string;
|
|
29
79
|
/**
|
|
30
80
|
* Calculates the base-2 logarithm (floor).
|
|
@@ -81,6 +131,37 @@ export declare const bitLength: (v: bigint) => bigint;
|
|
|
81
131
|
*/
|
|
82
132
|
export declare const mask: (len: bigint) => bigint;
|
|
83
133
|
/**
|
|
84
|
-
*
|
|
134
|
+
* Returns the smaller of two `bigint` values.
|
|
135
|
+
*
|
|
136
|
+
* @param a - The first bigint.
|
|
137
|
+
* @returns A function that takes the second bigint and returns the smaller value.
|
|
85
138
|
*/
|
|
86
139
|
export declare const min: (a: bigint) => (b: bigint) => bigint;
|
|
140
|
+
/**
|
|
141
|
+
* Returns the larger of two `bigint` values.
|
|
142
|
+
*
|
|
143
|
+
* @param a - The first bigint.
|
|
144
|
+
* @returns A function that takes the second bigint and returns the larger value.
|
|
145
|
+
*/
|
|
146
|
+
export declare const max: (a: bigint) => (b: bigint) => bigint;
|
|
147
|
+
/**
|
|
148
|
+
* Calculates the partial factorial `b!/a!`.
|
|
149
|
+
*
|
|
150
|
+
* @param a - The starting bigint value.
|
|
151
|
+
* @returns A function that takes `b` and computes `b!/a!`.
|
|
152
|
+
*/
|
|
153
|
+
export declare const partialFactorial: (a: bigint) => (b: bigint) => bigint;
|
|
154
|
+
/**
|
|
155
|
+
* Calculates the factorial of a `bigint`.
|
|
156
|
+
*
|
|
157
|
+
* @param b - The bigint value.
|
|
158
|
+
* @returns The factorial of the input.
|
|
159
|
+
*/
|
|
160
|
+
export declare const factorial: (b: bigint) => bigint;
|
|
161
|
+
/**
|
|
162
|
+
* Calculates the number of combinations for a list of `bigint` values.
|
|
163
|
+
*
|
|
164
|
+
* @param k - A list of bigint values.
|
|
165
|
+
* @returns The number of combinations.
|
|
166
|
+
*/
|
|
167
|
+
export declare const combination: (...k: readonly bigint[]) => bigint;
|