@dbx-tools/shared 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/string.ts ADDED
@@ -0,0 +1,276 @@
1
+ // Direct import (not via the barrel) to avoid a self-import cycle:
2
+ // `index.client.ts` re-exports `* as stringUtils from "./src/string.js"`,
3
+ // so going back through it would close a loop.
4
+ import { fnvHash, fnvHashWithOptions } from "./common.js";
5
+
6
+ type TokenizeOptions = {
7
+ distinct?: boolean;
8
+ lowerCase?: boolean;
9
+ capitalize?: boolean;
10
+ omitUriScheme?: boolean;
11
+ omitEmailDomain?: boolean;
12
+ camelCase?: boolean;
13
+ };
14
+
15
+ // Keys/identifiers/slugs are always lowercased; `lowerCase` is not a
16
+ // caller-configurable option.
17
+ type KeyOptions = Omit<TokenizeOptions, "lowerCase" | "capitalize"> & {
18
+ maxLength?: number;
19
+ truncateStrategy?: "hash" | "trim" | "empty";
20
+ truncateHashLength?: number;
21
+ };
22
+
23
+ type IdentifierOptions = KeyOptions & {
24
+ delimiter?: string;
25
+ };
26
+
27
+ type ResolvedTokenizeOptions = Required<TokenizeOptions>;
28
+ type ResolvedIdentifierOptions = Required<
29
+ IdentifierOptions & Pick<TokenizeOptions, "lowerCase" | "capitalize">
30
+ >;
31
+
32
+ const TOKENIZE_CAMEL_CASE_REGEXP = /[A-Z]?[a-z]+|[0-9]+|[A-Z]+(?![a-z])/g;
33
+ const TOKENIZE_NON_ALPHANUMERIC_REGEXP = /[a-zA-Z0-9]+/g;
34
+ const URI_REGEXP = /^([a-zA-Z][a-zA-Z0-9+.-]*)?:\/\/([^\s/?#][^\s]*)?$/;
35
+ const EMAIL_REGEXP =
36
+ /^([a-zA-Z0-9.!#$%&'*+/=?^_`{|}~-]+)@([a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)+)$/;
37
+
38
+ const TOKENIZE_DEFAULTS: ResolvedTokenizeOptions = {
39
+ distinct: false,
40
+ lowerCase: false,
41
+ capitalize: false,
42
+ omitUriScheme: false,
43
+ omitEmailDomain: false,
44
+ camelCase: true,
45
+ };
46
+
47
+ const IDENTIFIER_DEFAULTS: ResolvedIdentifierOptions = {
48
+ ...TOKENIZE_DEFAULTS,
49
+ lowerCase: true,
50
+ maxLength: Infinity,
51
+ truncateStrategy: "hash",
52
+ truncateHashLength: 6,
53
+ delimiter: "-",
54
+ };
55
+
56
+ export function* tokenizeWithOptions(
57
+ options: TokenizeOptions,
58
+ ...values: unknown[]
59
+ ): Generator<string> {
60
+ const opts: ResolvedTokenizeOptions = { ...TOKENIZE_DEFAULTS, ...options };
61
+ const seen = opts.distinct ? new Set<string>() : undefined;
62
+ const regexp = opts.camelCase
63
+ ? TOKENIZE_CAMEL_CASE_REGEXP
64
+ : TOKENIZE_NON_ALPHANUMERIC_REGEXP;
65
+
66
+ for (const value of values) {
67
+ if (value == null) continue;
68
+ let stringValue = typeof value === "string" ? value : String(value);
69
+ if (!stringValue) continue;
70
+ if (opts.omitUriScheme) {
71
+ const match = stringValue.match(URI_REGEXP);
72
+ if (match) stringValue = match[2] ?? "";
73
+ }
74
+ if (opts.omitEmailDomain) {
75
+ const match = stringValue.match(EMAIL_REGEXP);
76
+ if (match) stringValue = match[1] ?? "";
77
+ }
78
+ if (!stringValue) continue;
79
+ for (const tokenMatch of stringValue.matchAll(regexp)) {
80
+ let token = tokenMatch[0]!;
81
+ if (opts.lowerCase) token = token.toLowerCase();
82
+ if (opts.capitalize) token = token.charAt(0).toUpperCase() + token.slice(1);
83
+ if (!token || seen?.has(token)) continue;
84
+ seen?.add(token);
85
+ yield token;
86
+ }
87
+ }
88
+ }
89
+
90
+ export function* tokenize(...values: unknown[]): Generator<string> {
91
+ yield* tokenizeWithOptions({}, ...values);
92
+ }
93
+
94
+ /**
95
+ * Join tokenized values with `delimiter`. When the next token would push the
96
+ * result over `maxLength`: `trim` stops adding; `empty` returns `""`; `hash`
97
+ * appends a digest of accepted tokens plus the overflow token if the result
98
+ * still fits, otherwise `""`.
99
+ */
100
+ export function toIdentifierWithOptions(
101
+ options: IdentifierOptions,
102
+ ...values: unknown[]
103
+ ): string {
104
+ const opts: ResolvedIdentifierOptions = {
105
+ ...IDENTIFIER_DEFAULTS,
106
+ ...options,
107
+ lowerCase: true,
108
+ };
109
+ const tokens: string[] = [];
110
+ let currentLength = 0;
111
+
112
+ for (const token of tokenizeWithOptions(opts, ...values)) {
113
+ const sepLength = tokens.length > 0 ? opts.delimiter.length : 0;
114
+ const nextLength = currentLength + sepLength + token.length;
115
+
116
+ if (nextLength > opts.maxLength) {
117
+ if (opts.truncateStrategy === "empty") return "";
118
+ if (opts.truncateStrategy === "trim") break;
119
+
120
+ const hash = digestTokens(opts.truncateHashLength, tokens, token);
121
+ if (currentLength + sepLength + hash.length <= opts.maxLength) {
122
+ return tokens.length > 0
123
+ ? tokens.join(opts.delimiter) + opts.delimiter + hash
124
+ : hash;
125
+ }
126
+ return "";
127
+ }
128
+
129
+ tokens.push(token);
130
+ currentLength = nextLength;
131
+ }
132
+
133
+ return tokens.join(opts.delimiter);
134
+ }
135
+
136
+ export function toIdentifier(...values: unknown[]): string {
137
+ return toIdentifierWithOptions({}, ...values);
138
+ }
139
+
140
+ /**
141
+ * Slugified identifier: same rules as {@link toIdentifierWithOptions} with the
142
+ * delimiter forced to `-`. Accepts {@link KeyOptions} so callers cannot
143
+ * override the delimiter.
144
+ */
145
+ export function toSlugWithOptions(options: KeyOptions, ...values: unknown[]): string {
146
+ return toIdentifierWithOptions({ ...options, delimiter: "-" }, ...values);
147
+ }
148
+
149
+ export function toSlug(...values: unknown[]): string {
150
+ return toSlugWithOptions({}, ...values);
151
+ }
152
+
153
+ /**
154
+ * Trim `value` and return `null` for non-strings, `undefined`, or
155
+ * strings that are empty after trimming. Lets call sites collapse the
156
+ * common
157
+ *
158
+ * ```ts
159
+ * typeof v === "string" && v.trim() ? v.trim() : null
160
+ * ```
161
+ *
162
+ * dance into a single helper. Useful for HTTP header / query / form
163
+ * extractors where downstream code wants `string | null` to drive a
164
+ * cheap `??` / `if (x)` cascade.
165
+ */
166
+ export function trimToNull(value: unknown): string | null {
167
+ if (typeof value !== "string") return null;
168
+ const trimmed = value.trim();
169
+ return trimmed ? trimmed : null;
170
+ }
171
+
172
+ /**
173
+ * Trim the first usable string out of `value`. Returns `null` when
174
+ * `value` is `undefined`, `null`, an empty string, or an array whose
175
+ * first string member is empty. Mirrors how Express / Node header
176
+ * accessors expose single vs. repeated headers - the first
177
+ * non-empty entry wins, everything else is ignored.
178
+ */
179
+ export function firstNonEmpty(value: unknown): string | null {
180
+ if (Array.isArray(value)) {
181
+ for (const item of value) {
182
+ const trimmed = trimToNull(item);
183
+ if (trimmed) return trimmed;
184
+ }
185
+ return null;
186
+ }
187
+ return trimToNull(value);
188
+ }
189
+
190
+ /**
191
+ * Tagged-template helper that collapses a multi-line indented
192
+ * template literal into a single space-joined string. Lets call
193
+ * sites write Zod `.describe()` blocks, Mastra tool descriptions,
194
+ * and other long prose constants as readable indented paragraphs
195
+ * in source while still emitting clean text the LLM (or any other
196
+ * consumer) doesn't have to mentally re-flow. Interpolated values
197
+ * are stringified verbatim and folded with the surrounding
198
+ * whitespace.
199
+ *
200
+ * ```ts
201
+ * toDescription`
202
+ * Ask the Genie space "${alias}" a question.
203
+ * Pass the answer through as-is.
204
+ * `;
205
+ * // -> 'Ask the Genie space "default" a question. Pass the answer through as-is.'
206
+ * ```
207
+ */
208
+ export function toDescription(
209
+ strings: TemplateStringsArray,
210
+ ...values: unknown[]
211
+ ): string {
212
+ let out = "";
213
+ for (let i = 0; i < strings.length; i += 1) {
214
+ out += strings[i];
215
+ if (i < values.length) out += String(values[i]);
216
+ }
217
+ return out.replace(/\s+/g, " ").trim();
218
+ }
219
+
220
+ /**
221
+ * Slugify `value` (using the standard {@link toIdentifierWithOptions}
222
+ * tokenizer + delimiter rules) and **always** suffix a short
223
+ * deterministic hash. Use when you need a stable, slugified id that
224
+ * is guaranteed to be unique across descriptions sharing the same
225
+ * leading tokens (tool ids, cache keys, etc.).
226
+ *
227
+ * Behaviour differs from `toIdentifierWithOptions({ maxLength,
228
+ * truncateStrategy: "hash" })`: that helper only appends a hash when
229
+ * the slug *overflows* `maxLength`. This helper appends a hash
230
+ * unconditionally so the result is collision-resistant even for
231
+ * short inputs. The hash is computed over the raw `value` so two
232
+ * descriptions producing the same slug still get different ids.
233
+ *
234
+ * @param value - Source string (typically a tool/agent description).
235
+ * @param options.delimiter - Token separator (default `"_"`).
236
+ * @param options.slugMaxLength - Cap on the slug portion (the part
237
+ * before the hash). Default 32.
238
+ * @param options.hashLength - Length of the suffix produced by
239
+ * `commonUtils.fnvHash` (Crockford-style base-32 alphabet, max 7
240
+ * chars). Default 6.
241
+ * @param options.fallbackPrefix - Prefix used when the slug is empty
242
+ * (e.g. punctuation-only input). Default `"id"`.
243
+ */
244
+ export function toUniqueSlug(
245
+ value: string,
246
+ options: {
247
+ delimiter?: string;
248
+ slugMaxLength?: number;
249
+ hashLength?: number;
250
+ fallbackPrefix?: string;
251
+ } = {},
252
+ ): string {
253
+ const delimiter = options.delimiter ?? "_";
254
+ const slugMaxLength = options.slugMaxLength ?? 32;
255
+ const hashLength = options.hashLength ?? 6;
256
+ const fallbackPrefix = options.fallbackPrefix ?? "id";
257
+ const slug = toIdentifierWithOptions(
258
+ { delimiter, maxLength: slugMaxLength, truncateStrategy: "trim" },
259
+ value,
260
+ );
261
+ const suffix = fnvHashWithOptions({ length: hashLength }, value);
262
+ return slug
263
+ ? `${slug}${delimiter}${suffix}`
264
+ : `${fallbackPrefix}${delimiter}${suffix}`;
265
+ }
266
+
267
+ function digestTokens(
268
+ length: number,
269
+ parts: readonly string[],
270
+ extra?: string,
271
+ ): string {
272
+ let combined = "";
273
+ for (const part of parts) combined += part + "\0";
274
+ if (extra !== undefined) combined += extra + "\0";
275
+ return fnvHashWithOptions({ length }, combined);
276
+ }