@thi.ng/strings 3.6.6 → 3.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +8 -1
- package/README.md +1 -1
- package/index.d.ts +1 -0
- package/index.js +1 -0
- package/package.json +13 -10
- package/utf8.d.ts +54 -0
- package/utf8.js +140 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# Change Log
|
|
2
2
|
|
|
3
|
-
- **Last updated**: 2023-
|
|
3
|
+
- **Last updated**: 2023-12-03T12:13:31Z
|
|
4
4
|
- **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
|
|
5
5
|
|
|
6
6
|
All notable changes to this project will be documented in this file.
|
|
@@ -9,6 +9,13 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
|
|
|
9
9
|
**Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
|
|
10
10
|
and/or version bumps of transitive dependencies.
|
|
11
11
|
|
|
12
|
+
## [3.7.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/strings@3.7.0) (2023-11-24)
|
|
13
|
+
|
|
14
|
+
#### 🚀 Features
|
|
15
|
+
|
|
16
|
+
- add/migrate utf8 encode/decode/length fns ([a8955f2](https://github.com/thi-ng/umbrella/commit/a8955f2))
|
|
17
|
+
- migrate & update from [@thi.ng/transducers-binary](https://github.com/thi-ng/umbrella/tree/main/packages/transducers-binary)
|
|
18
|
+
|
|
12
19
|
### [3.6.5](https://github.com/thi-ng/umbrella/tree/@thi.ng/strings@3.6.5) (2023-11-09)
|
|
13
20
|
|
|
14
21
|
#### ♻️ Refactoring
|
package/README.md
CHANGED
package/index.d.ts
CHANGED
package/index.js
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@thi.ng/strings",
|
|
3
|
-
"version": "3.
|
|
3
|
+
"version": "3.7.1",
|
|
4
4
|
"description": "Various string formatting & utility functions",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"module": "./index.js",
|
|
@@ -33,18 +33,18 @@
|
|
|
33
33
|
"test": "bun test"
|
|
34
34
|
},
|
|
35
35
|
"dependencies": {
|
|
36
|
-
"@thi.ng/api": "^8.9.
|
|
37
|
-
"@thi.ng/errors": "^2.4.
|
|
38
|
-
"@thi.ng/hex": "^2.3.
|
|
39
|
-
"@thi.ng/memoize": "^3.1.
|
|
36
|
+
"@thi.ng/api": "^8.9.10",
|
|
37
|
+
"@thi.ng/errors": "^2.4.4",
|
|
38
|
+
"@thi.ng/hex": "^2.3.22",
|
|
39
|
+
"@thi.ng/memoize": "^3.1.44"
|
|
40
40
|
},
|
|
41
41
|
"devDependencies": {
|
|
42
|
-
"@microsoft/api-extractor": "^7.38.
|
|
43
|
-
"@thi.ng/testament": "^0.4.
|
|
42
|
+
"@microsoft/api-extractor": "^7.38.3",
|
|
43
|
+
"@thi.ng/testament": "^0.4.3",
|
|
44
44
|
"rimraf": "^5.0.5",
|
|
45
45
|
"tools": "^0.0.1",
|
|
46
|
-
"typedoc": "^0.25.
|
|
47
|
-
"typescript": "^5.
|
|
46
|
+
"typedoc": "^0.25.4",
|
|
47
|
+
"typescript": "^5.3.2"
|
|
48
48
|
},
|
|
49
49
|
"keywords": [
|
|
50
50
|
"ansi",
|
|
@@ -183,6 +183,9 @@
|
|
|
183
183
|
"./units": {
|
|
184
184
|
"default": "./units.js"
|
|
185
185
|
},
|
|
186
|
+
"./utf8": {
|
|
187
|
+
"default": "./utf8.js"
|
|
188
|
+
},
|
|
186
189
|
"./uuid": {
|
|
187
190
|
"default": "./uuid.js"
|
|
188
191
|
},
|
|
@@ -199,5 +202,5 @@
|
|
|
199
202
|
"thi.ng": {
|
|
200
203
|
"year": 2015
|
|
201
204
|
},
|
|
202
|
-
"gitHead": "
|
|
205
|
+
"gitHead": "04d1de79f256d7a53c6b5fd157b37f49bc88e11d\n"
|
|
203
206
|
}
|
package/utf8.d.ts
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
/// <reference types="node" />
|
|
2
|
+
/**
|
|
3
|
+
* Returns the number of bytes required to encode the given string as UTF-8.
|
|
4
|
+
*
|
|
5
|
+
* @param str
|
|
6
|
+
*/
|
|
7
|
+
export declare const utf8Length: (str: string) => number;
|
|
8
|
+
/**
|
|
9
|
+
* Non-transducer version of
|
|
10
|
+
* [`utf8Decode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Decode.html).
|
|
11
|
+
* Decodes `num` bytes from `start` index in given byte buffer. In Firefox this
|
|
12
|
+
* is much faster than using the `TextDecoder` API.
|
|
13
|
+
*
|
|
14
|
+
* @param buf
|
|
15
|
+
* @param start
|
|
16
|
+
* @param num
|
|
17
|
+
*/
|
|
18
|
+
export declare const utf8Decode: (buf: Uint8Array, start: number, num: number) => string;
|
|
19
|
+
/**
|
|
20
|
+
* Non-transducer version of
|
|
21
|
+
* [`utf8Encode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Encode.html).
|
|
22
|
+
*
|
|
23
|
+
* @remarks
|
|
24
|
+
* If `capacity` is given, initializes the byte array to that size (and assumes
|
|
25
|
+
* that it is sufficient to store the entire string, e.g. by using
|
|
26
|
+
* {@link utf8Length} to pre-determine the number of bytes required for a given
|
|
27
|
+
* string). If `capacity` is _not_ provided, the buffer will be initialized to
|
|
28
|
+
* `4 * src.length`.
|
|
29
|
+
*
|
|
30
|
+
* Based on:
|
|
31
|
+
* - https://github.com/thi-ng/umbrella/blob/main/packages/transducers-binary/src/utf8.ts
|
|
32
|
+
* - https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
|
|
33
|
+
*
|
|
34
|
+
* @param buf
|
|
35
|
+
*/
|
|
36
|
+
export declare const utf8Encode: (src: string, capacity?: number) => Uint8Array;
|
|
37
|
+
/**
|
|
38
|
+
* Returns character string for given UTF-8 codepoint.
|
|
39
|
+
*
|
|
40
|
+
* @param x
|
|
41
|
+
*/
|
|
42
|
+
export declare const fromUtf8CodePoint: (x: number) => string;
|
|
43
|
+
export declare const UTF8Error: {
|
|
44
|
+
new (msg?: string | undefined): {
|
|
45
|
+
name: string;
|
|
46
|
+
message: string;
|
|
47
|
+
stack?: string | undefined;
|
|
48
|
+
cause?: unknown;
|
|
49
|
+
};
|
|
50
|
+
captureStackTrace(targetObject: object, constructorOpt?: Function | undefined): void;
|
|
51
|
+
prepareStackTrace?: ((err: Error, stackTraces: NodeJS.CallSite[]) => any) | undefined;
|
|
52
|
+
stackTraceLimit: number;
|
|
53
|
+
};
|
|
54
|
+
//# sourceMappingURL=utf8.d.ts.map
|
package/utf8.js
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import { defError } from "@thi.ng/errors/deferror";
|
|
2
|
+
/**
|
|
3
|
+
* Returns the number of bytes required to encode the given string as UTF-8.
|
|
4
|
+
*
|
|
5
|
+
* @param str
|
|
6
|
+
*/
|
|
7
|
+
export const utf8Length = (str) => {
|
|
8
|
+
const n = str.length;
|
|
9
|
+
let len = 0;
|
|
10
|
+
for (let i = 0; i < n; ++i) {
|
|
11
|
+
let u = str.charCodeAt(i);
|
|
12
|
+
if (u >= 0xd800 && u < 0xe0000) {
|
|
13
|
+
u = (0x10000 + ((u & 0x3ff) << 10)) | (str.charCodeAt(++i) & 0x3ff);
|
|
14
|
+
}
|
|
15
|
+
len +=
|
|
16
|
+
u < 0x80
|
|
17
|
+
? 1
|
|
18
|
+
: u < 0x800
|
|
19
|
+
? 2
|
|
20
|
+
: u < 0x10000
|
|
21
|
+
? 3
|
|
22
|
+
: u < 0x200000
|
|
23
|
+
? 4
|
|
24
|
+
: u < 0x4000000
|
|
25
|
+
? 5
|
|
26
|
+
: 6;
|
|
27
|
+
}
|
|
28
|
+
return len;
|
|
29
|
+
};
|
|
30
|
+
/**
|
|
31
|
+
* Non-transducer version of
|
|
32
|
+
* [`utf8Decode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Decode.html).
|
|
33
|
+
* Decodes `num` bytes from `start` index in given byte buffer. In Firefox this
|
|
34
|
+
* is much faster than using the `TextDecoder` API.
|
|
35
|
+
*
|
|
36
|
+
* @param buf
|
|
37
|
+
* @param start
|
|
38
|
+
* @param num
|
|
39
|
+
*/
|
|
40
|
+
export const utf8Decode = (buf, start, num) => {
|
|
41
|
+
const end = start + num;
|
|
42
|
+
let i = start;
|
|
43
|
+
let result = "";
|
|
44
|
+
let c;
|
|
45
|
+
while (i < end) {
|
|
46
|
+
c = buf[i++];
|
|
47
|
+
if (c < 0x80) {
|
|
48
|
+
result += String.fromCharCode(c);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
if (c >= 0xc0 && c < 0xe0) {
|
|
52
|
+
c = ((c & 0x1f) << 6) | (buf[i++] & 0x3f);
|
|
53
|
+
}
|
|
54
|
+
else if (c >= 0xe0 && c < 0xf0) {
|
|
55
|
+
c =
|
|
56
|
+
((c & 0x0f) << 12) |
|
|
57
|
+
((buf[i++] & 0x3f) << 6) |
|
|
58
|
+
(buf[i++] & 0x3f);
|
|
59
|
+
}
|
|
60
|
+
else if (c >= 0xf0 && c < 0xf8) {
|
|
61
|
+
c =
|
|
62
|
+
((c & 7) << 18) |
|
|
63
|
+
((buf[i++] & 0x3f) << 12) |
|
|
64
|
+
((buf[i++] & 0x3f) << 6) |
|
|
65
|
+
(buf[i++] & 0x3f);
|
|
66
|
+
}
|
|
67
|
+
else
|
|
68
|
+
utf8Error();
|
|
69
|
+
result += fromUtf8CodePoint(c);
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
return result;
|
|
73
|
+
};
|
|
74
|
+
/**
|
|
75
|
+
* Non-transducer version of
|
|
76
|
+
* [`utf8Encode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Encode.html).
|
|
77
|
+
*
|
|
78
|
+
* @remarks
|
|
79
|
+
* If `capacity` is given, initializes the byte array to that size (and assumes
|
|
80
|
+
* that it is sufficient to store the entire string, e.g. by using
|
|
81
|
+
* {@link utf8Length} to pre-determine the number of bytes required for a given
|
|
82
|
+
* string). If `capacity` is _not_ provided, the buffer will be initialized to
|
|
83
|
+
* `4 * src.length`.
|
|
84
|
+
*
|
|
85
|
+
* Based on:
|
|
86
|
+
* - https://github.com/thi-ng/umbrella/blob/main/packages/transducers-binary/src/utf8.ts
|
|
87
|
+
* - https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
|
|
88
|
+
*
|
|
89
|
+
* @param buf
|
|
90
|
+
*/
|
|
91
|
+
export const utf8Encode = (src, capacity) => {
|
|
92
|
+
const n = src.length;
|
|
93
|
+
const buf = new Uint8Array(capacity || n << 2);
|
|
94
|
+
let pos = 0;
|
|
95
|
+
let c;
|
|
96
|
+
for (let i = 0; i < n; i++) {
|
|
97
|
+
c = src.charCodeAt(i);
|
|
98
|
+
if (c < 0x80) {
|
|
99
|
+
buf[pos++] = c;
|
|
100
|
+
}
|
|
101
|
+
else {
|
|
102
|
+
if (c < 0x800) {
|
|
103
|
+
buf[pos++] = 0xc0 | (c >> 6);
|
|
104
|
+
}
|
|
105
|
+
else {
|
|
106
|
+
if (c >= 0xd800 && c < 0xdc00) {
|
|
107
|
+
c =
|
|
108
|
+
0x10000 +
|
|
109
|
+
((c & 0x03ff) << 10) +
|
|
110
|
+
(src.charCodeAt(++i) & 0x3ff);
|
|
111
|
+
buf[pos++] = 0xf0 | (c >> 18);
|
|
112
|
+
buf[pos++] = 0x80 | ((c >> 12) & 0x3f);
|
|
113
|
+
}
|
|
114
|
+
else
|
|
115
|
+
buf[pos++] = 0xe0 | (c >> 12);
|
|
116
|
+
buf[pos++] = 0x80 | ((c >> 6) & 0x3f);
|
|
117
|
+
}
|
|
118
|
+
buf[pos++] = 0x80 | (c & 0x3f);
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
return buf.subarray(0, pos);
|
|
122
|
+
};
|
|
123
|
+
/**
|
|
124
|
+
* Returns character string for given UTF-8 codepoint.
|
|
125
|
+
*
|
|
126
|
+
* @param x
|
|
127
|
+
*/
|
|
128
|
+
export const fromUtf8CodePoint = (x) => {
|
|
129
|
+
if (x < 0x10000)
|
|
130
|
+
return String.fromCharCode(x);
|
|
131
|
+
if (x < 0x110000) {
|
|
132
|
+
x -= 0x10000;
|
|
133
|
+
return String.fromCharCode(0xd800 | (x >>> 10), 0xdc00 | (x & 0x3ff));
|
|
134
|
+
}
|
|
135
|
+
return utf8Error(`invalid codepoint 0x${x.toString(16)}`);
|
|
136
|
+
};
|
|
137
|
+
export const UTF8Error = defError(() => "UTF-8 error");
|
|
138
|
+
const utf8Error = (msg) => {
|
|
139
|
+
throw new UTF8Error(msg);
|
|
140
|
+
};
|