@thi.ng/strings 3.6.6 → 3.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Change Log
2
2
 
3
- - **Last updated**: 2023-11-09T10:28:19Z
3
+ - **Last updated**: 2023-11-24T09:35:46Z
4
4
  - **Generator**: [thi.ng/monopub](https://thi.ng/monopub)
5
5
 
6
6
  All notable changes to this project will be documented in this file.
@@ -9,6 +9,13 @@ See [Conventional Commits](https://conventionalcommits.org/) for commit guidelin
9
9
  **Note:** Unlisted _patch_ versions only involve non-code or otherwise excluded changes
10
10
  and/or version bumps of transitive dependencies.
11
11
 
12
+ ## [3.7.0](https://github.com/thi-ng/umbrella/tree/@thi.ng/strings@3.7.0) (2023-11-24)
13
+
14
+ #### 🚀 Features
15
+
16
+ - add/migrate utf8 encode/decode/length fns ([a8955f2](https://github.com/thi-ng/umbrella/commit/a8955f2))
17
+ - migrate & update from [@thi.ng/transducers-binary](https://github.com/thi-ng/umbrella/tree/main/packages/transducers-binary)
18
+
12
19
  ### [3.6.5](https://github.com/thi-ng/umbrella/tree/@thi.ng/strings@3.6.5) (2023-11-09)
13
20
 
14
21
  #### ♻️ Refactoring
package/index.d.ts CHANGED
@@ -31,6 +31,7 @@ export * from "./trim.js";
31
31
  export * from "./truncate.js";
32
32
  export * from "./truncate-left.js";
33
33
  export * from "./units.js";
34
+ export * from "./utf8.js";
34
35
  export * from "./uuid.js";
35
36
  export * from "./vector.js";
36
37
  export * from "./wrap.js";
package/index.js CHANGED
@@ -31,6 +31,7 @@ export * from "./trim.js";
31
31
  export * from "./truncate.js";
32
32
  export * from "./truncate-left.js";
33
33
  export * from "./units.js";
34
+ export * from "./utf8.js";
34
35
  export * from "./uuid.js";
35
36
  export * from "./vector.js";
36
37
  export * from "./wrap.js";
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@thi.ng/strings",
3
- "version": "3.6.6",
3
+ "version": "3.7.0",
4
4
  "description": "Various string formatting & utility functions",
5
5
  "type": "module",
6
6
  "module": "./index.js",
@@ -33,18 +33,18 @@
33
33
  "test": "bun test"
34
34
  },
35
35
  "dependencies": {
36
- "@thi.ng/api": "^8.9.8",
37
- "@thi.ng/errors": "^2.4.2",
38
- "@thi.ng/hex": "^2.3.20",
39
- "@thi.ng/memoize": "^3.1.42"
36
+ "@thi.ng/api": "^8.9.9",
37
+ "@thi.ng/errors": "^2.4.3",
38
+ "@thi.ng/hex": "^2.3.21",
39
+ "@thi.ng/memoize": "^3.1.43"
40
40
  },
41
41
  "devDependencies": {
42
- "@microsoft/api-extractor": "^7.38.2",
43
- "@thi.ng/testament": "^0.4.1",
42
+ "@microsoft/api-extractor": "^7.38.3",
43
+ "@thi.ng/testament": "^0.4.2",
44
44
  "rimraf": "^5.0.5",
45
45
  "tools": "^0.0.1",
46
46
  "typedoc": "^0.25.3",
47
- "typescript": "^5.2.2"
47
+ "typescript": "^5.3.2"
48
48
  },
49
49
  "keywords": [
50
50
  "ansi",
@@ -183,6 +183,9 @@
183
183
  "./units": {
184
184
  "default": "./units.js"
185
185
  },
186
+ "./utf8": {
187
+ "default": "./utf8.js"
188
+ },
186
189
  "./uuid": {
187
190
  "default": "./uuid.js"
188
191
  },
@@ -199,5 +202,5 @@
199
202
  "thi.ng": {
200
203
  "year": 2015
201
204
  },
202
- "gitHead": "669a3151e4302480244fe3e60eff5e732ea5b7a7\n"
205
+ "gitHead": "f6de41f4991704fdbbb2899bb430ed4f4f6efab0\n"
203
206
  }
package/utf8.d.ts ADDED
@@ -0,0 +1,54 @@
1
+ /// <reference types="node" />
2
+ /**
3
+ * Returns the number of bytes required to encode the given string as UTF-8.
4
+ *
5
+ * @param str
6
+ */
7
+ export declare const utf8Length: (str: string) => number;
8
+ /**
9
+ * Non-transducer version of
10
+ * [`utf8Decode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Decode.html).
11
+ * Decodes `num` bytes from `start` index in given byte buffer. In Firefox this
12
+ * is much faster than using the `TextDecoder` API.
13
+ *
14
+ * @param buf
15
+ * @param start
16
+ * @param num
17
+ */
18
+ export declare const utf8Decode: (buf: Uint8Array, start: number, num: number) => string;
19
+ /**
20
+ * Non-transducer version of
21
+ * [`utf8Encode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Encode.html).
22
+ *
23
+ * @remarks
24
+ * If `capacity` is given, initializes the byte array to that size (and assumes
25
+ * that it is sufficient to store the entire string, e.g. by using
26
+ * {@link utf8Length} to pre-determine the number of bytes required for a given
27
+ * string). If `capacity` is _not_ provided, the buffer will be initialized to
28
+ * `4 * src.length`.
29
+ *
30
+ * Based on:
31
+ * - https://github.com/thi-ng/umbrella/blob/main/packages/transducers-binary/src/utf8.ts
32
+ * - https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
33
+ *
34
+ * @param buf
35
+ */
36
+ export declare const utf8Encode: (src: string, capacity?: number) => Uint8Array;
37
+ /**
38
+ * Returns character string for given UTF-8 codepoint.
39
+ *
40
+ * @param x
41
+ */
42
+ export declare const fromUtf8CodePoint: (x: number) => string;
43
+ export declare const UTF8Error: {
44
+ new (msg?: string | undefined): {
45
+ name: string;
46
+ message: string;
47
+ stack?: string | undefined;
48
+ cause?: unknown;
49
+ };
50
+ captureStackTrace(targetObject: object, constructorOpt?: Function | undefined): void;
51
+ prepareStackTrace?: ((err: Error, stackTraces: NodeJS.CallSite[]) => any) | undefined;
52
+ stackTraceLimit: number;
53
+ };
54
+ //# sourceMappingURL=utf8.d.ts.map
package/utf8.js ADDED
@@ -0,0 +1,140 @@
1
+ import { defError } from "@thi.ng/errors/deferror";
2
+ /**
3
+ * Returns the number of bytes required to encode the given string as UTF-8.
4
+ *
5
+ * @param str
6
+ */
7
+ export const utf8Length = (str) => {
8
+ const n = str.length;
9
+ let len = 0;
10
+ for (let i = 0; i < n; ++i) {
11
+ let u = str.charCodeAt(i);
12
+ if (u >= 0xd800 && u < 0xe0000) {
13
+ u = (0x10000 + ((u & 0x3ff) << 10)) | (str.charCodeAt(++i) & 0x3ff);
14
+ }
15
+ len +=
16
+ u < 0x80
17
+ ? 1
18
+ : u < 0x800
19
+ ? 2
20
+ : u < 0x10000
21
+ ? 3
22
+ : u < 0x200000
23
+ ? 4
24
+ : u < 0x4000000
25
+ ? 5
26
+ : 6;
27
+ }
28
+ return len;
29
+ };
30
+ /**
31
+ * Non-transducer version of
32
+ * [`utf8Decode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Decode.html).
33
+ * Decodes `num` bytes from `start` index in given byte buffer. In Firefox this
34
+ * is much faster than using the `TextDecoder` API.
35
+ *
36
+ * @param buf
37
+ * @param start
38
+ * @param num
39
+ */
40
+ export const utf8Decode = (buf, start, num) => {
41
+ const end = start + num;
42
+ let i = start;
43
+ let result = "";
44
+ let c;
45
+ while (i < end) {
46
+ c = buf[i++];
47
+ if (c < 0x80) {
48
+ result += String.fromCharCode(c);
49
+ }
50
+ else {
51
+ if (c >= 0xc0 && c < 0xe0) {
52
+ c = ((c & 0x1f) << 6) | (buf[i++] & 0x3f);
53
+ }
54
+ else if (c >= 0xe0 && c < 0xf0) {
55
+ c =
56
+ ((c & 0x0f) << 12) |
57
+ ((buf[i++] & 0x3f) << 6) |
58
+ (buf[i++] & 0x3f);
59
+ }
60
+ else if (c >= 0xf0 && c < 0xf8) {
61
+ c =
62
+ ((c & 7) << 18) |
63
+ ((buf[i++] & 0x3f) << 12) |
64
+ ((buf[i++] & 0x3f) << 6) |
65
+ (buf[i++] & 0x3f);
66
+ }
67
+ else
68
+ utf8Error();
69
+ result += fromUtf8CodePoint(c);
70
+ }
71
+ }
72
+ return result;
73
+ };
74
+ /**
75
+ * Non-transducer version of
76
+ * [`utf8Encode()`](https://docs.thi.ng/umbrella/transducers-binary/functions/utf8Encode.html).
77
+ *
78
+ * @remarks
79
+ * If `capacity` is given, initializes the byte array to that size (and assumes
80
+ * that it is sufficient to store the entire string, e.g. by using
81
+ * {@link utf8Length} to pre-determine the number of bytes required for a given
82
+ * string). If `capacity` is _not_ provided, the buffer will be initialized to
83
+ * `4 * src.length`.
84
+ *
85
+ * Based on:
86
+ * - https://github.com/thi-ng/umbrella/blob/main/packages/transducers-binary/src/utf8.ts
87
+ * - https://gist.github.com/pascaldekloe/62546103a1576803dade9269ccf76330
88
+ *
89
+ * @param buf
90
+ */
91
+ export const utf8Encode = (src, capacity) => {
92
+ const n = src.length;
93
+ const buf = new Uint8Array(capacity || n << 2);
94
+ let pos = 0;
95
+ let c;
96
+ for (let i = 0; i < n; i++) {
97
+ c = src.charCodeAt(i);
98
+ if (c < 0x80) {
99
+ buf[pos++] = c;
100
+ }
101
+ else {
102
+ if (c < 0x800) {
103
+ buf[pos++] = 0xc0 | (c >> 6);
104
+ }
105
+ else {
106
+ if (c >= 0xd800 && c < 0xdc00) {
107
+ c =
108
+ 0x10000 +
109
+ ((c & 0x03ff) << 10) +
110
+ (src.charCodeAt(++i) & 0x3ff);
111
+ buf[pos++] = 0xf0 | (c >> 18);
112
+ buf[pos++] = 0x80 | ((c >> 12) & 0x3f);
113
+ }
114
+ else
115
+ buf[pos++] = 0xe0 | (c >> 12);
116
+ buf[pos++] = 0x80 | ((c >> 6) & 0x3f);
117
+ }
118
+ buf[pos++] = 0x80 | (c & 0x3f);
119
+ }
120
+ }
121
+ return buf.subarray(0, pos);
122
+ };
123
+ /**
124
+ * Returns character string for given UTF-8 codepoint.
125
+ *
126
+ * @param x
127
+ */
128
+ export const fromUtf8CodePoint = (x) => {
129
+ if (x < 0x10000)
130
+ return String.fromCharCode(x);
131
+ if (x < 0x110000) {
132
+ x -= 0x10000;
133
+ return String.fromCharCode(0xd800 | (x >>> 10), 0xdc00 | (x & 0x3ff));
134
+ }
135
+ return utf8Error(`invalid codepoint 0x${x.toString(16)}`);
136
+ };
137
+ export const UTF8Error = defError(() => "UTF-8 error");
138
+ const utf8Error = (msg) => {
139
+ throw new UTF8Error(msg);
140
+ };