@theclawlab/xdb 1.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js ADDED
@@ -0,0 +1,2197 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/cli.ts
4
+ import { Command } from "commander";
5
+ import { readFileSync } from "fs";
6
+ import { join as join8, dirname as dirname2 } from "path";
7
+ import { fileURLToPath } from "url";
8
+
9
+ // src/commands/col.ts
10
+ import { homedir } from "os";
11
+ import { join as join2 } from "path";
12
+
13
+ // src/collection-manager.ts
14
+ import { readdir, mkdir, rm, stat, readFile, writeFile } from "fs/promises";
15
+ import { join } from "path";
16
+
17
+ // src/errors.ts
18
+ var PARAMETER_ERROR = 2;
19
+ var RUNTIME_ERROR = 1;
20
+ var XDBError = class extends Error {
21
+ constructor(exitCode, message) {
22
+ super(message);
23
+ this.exitCode = exitCode;
24
+ this.name = "XDBError";
25
+ }
26
+ };
27
+ function outputError(err) {
28
+ process.stderr.write(`Error: ${err.message}
29
+ `);
30
+ }
31
+ function handleError(err) {
32
+ if (err instanceof XDBError) {
33
+ outputError(err);
34
+ process.exit(err.exitCode);
35
+ }
36
+ const message = err instanceof Error ? err.message : String(err);
37
+ const xdbErr = new XDBError(RUNTIME_ERROR, message);
38
+ outputError(xdbErr);
39
+ process.exit(xdbErr.exitCode);
40
+ }
41
+
42
+ // src/collection-manager.ts
43
+ var CollectionManager = class {
44
+ constructor(dataRoot) {
45
+ this.dataRoot = dataRoot;
46
+ this.collectionsDir = join(dataRoot, "collections");
47
+ }
48
+ collectionsDir;
49
+ /** Ensure the dataRoot and collections directory exist. */
50
+ async ensureRoot() {
51
+ await mkdir(this.collectionsDir, { recursive: true });
52
+ }
53
+ collectionPath(name) {
54
+ return join(this.collectionsDir, name);
55
+ }
56
+ metaPath(name) {
57
+ return join(this.collectionPath(name), "collection_meta.json");
58
+ }
59
+ /**
60
+ * Create a new collection directory and write collection_meta.json.
61
+ * Throws PARAMETER_ERROR if the collection already exists.
62
+ */
63
+ async init(name, policy2) {
64
+ await this.ensureRoot();
65
+ if (await this.exists(name)) {
66
+ throw new XDBError(PARAMETER_ERROR, `Collection "${name}" already exists`);
67
+ }
68
+ const colPath = this.collectionPath(name);
69
+ await mkdir(colPath, { recursive: true });
70
+ const meta = {
71
+ name,
72
+ policy: policy2,
73
+ createdAt: (/* @__PURE__ */ new Date()).toISOString()
74
+ };
75
+ await writeFile(this.metaPath(name), JSON.stringify(meta, null, 2), "utf-8");
76
+ }
77
+ /**
78
+ * Scan the collections directory and return info for each collection.
79
+ */
80
+ async list() {
81
+ await this.ensureRoot();
82
+ let entries;
83
+ try {
84
+ entries = await readdir(this.collectionsDir);
85
+ } catch {
86
+ return [];
87
+ }
88
+ const results = [];
89
+ for (const entry of entries) {
90
+ const colPath = this.collectionPath(entry);
91
+ try {
92
+ const s = await stat(colPath);
93
+ if (!s.isDirectory()) continue;
94
+ const meta = await this.load(entry);
95
+ const sizeBytes = await this.calcDirSize(colPath);
96
+ const recordCount = await this.countRecords(colPath, meta.policy);
97
+ results.push({
98
+ name: meta.name,
99
+ policy: `${meta.policy.main}/${meta.policy.minor}`,
100
+ recordCount,
101
+ sizeBytes,
102
+ ...meta.embeddingDimension !== void 0 ? { embeddingDimension: meta.embeddingDimension } : {}
103
+ });
104
+ } catch {
105
+ continue;
106
+ }
107
+ }
108
+ return results;
109
+ }
110
+ /**
111
+ * Count records in a collection by opening the appropriate engine.
112
+ * Prefers SQLite (cheaper to open) when available, falls back to LanceDB.
113
+ */
114
+ async countRecords(colPath, policy2) {
115
+ const hasSqlite = policy2.main === "hybrid" || policy2.main === "relational";
116
+ const hasLance = policy2.main === "hybrid" || policy2.main === "vector";
117
+ if (hasSqlite) {
118
+ try {
119
+ const { default: Database2 } = await import("better-sqlite3");
120
+ const dbPath = join(colPath, "relational.db");
121
+ try {
122
+ await stat(dbPath);
123
+ } catch {
124
+ return 0;
125
+ }
126
+ const db = new Database2(dbPath, { readonly: true });
127
+ try {
128
+ const row = db.prepare("SELECT COUNT(*) as cnt FROM records").get();
129
+ return row?.cnt ?? 0;
130
+ } catch {
131
+ return 0;
132
+ } finally {
133
+ db.close();
134
+ }
135
+ } catch {
136
+ return 0;
137
+ }
138
+ }
139
+ if (hasLance) {
140
+ try {
141
+ const lancedb2 = await import("@lancedb/lancedb");
142
+ const dbPath = join(colPath, "vector.lance");
143
+ try {
144
+ await stat(dbPath);
145
+ } catch {
146
+ return 0;
147
+ }
148
+ const db = await lancedb2.connect(dbPath);
149
+ const tableNames = await db.tableNames();
150
+ if (tableNames.includes("data")) {
151
+ const table = await db.openTable("data");
152
+ const count = await table.countRows();
153
+ table.close();
154
+ db.close();
155
+ return count;
156
+ }
157
+ db.close();
158
+ return 0;
159
+ } catch {
160
+ return 0;
161
+ }
162
+ }
163
+ return 0;
164
+ }
165
+ /**
166
+ * Recursively delete a collection directory.
167
+ * Throws PARAMETER_ERROR if the collection doesn't exist.
168
+ */
169
+ async remove(name) {
170
+ await this.ensureRoot();
171
+ if (!await this.exists(name)) {
172
+ throw new XDBError(PARAMETER_ERROR, `Collection "${name}" does not exist`);
173
+ }
174
+ await rm(this.collectionPath(name), { recursive: true, force: true });
175
+ }
176
+ /**
177
+ * Read and parse collection_meta.json for a collection.
178
+ * Throws PARAMETER_ERROR if the collection doesn't exist.
179
+ */
180
+ async load(name) {
181
+ if (!await this.exists(name)) {
182
+ throw new XDBError(PARAMETER_ERROR, `Collection "${name}" does not exist`);
183
+ }
184
+ try {
185
+ const raw = await readFile(this.metaPath(name), "utf-8");
186
+ return JSON.parse(raw);
187
+ } catch (err) {
188
+ if (err instanceof XDBError) throw err;
189
+ throw new XDBError(RUNTIME_ERROR, `Failed to read metadata for collection "${name}": ${err.message}`);
190
+ }
191
+ }
192
+ /**
193
+ * Check if a collection directory exists.
194
+ */
195
+ async exists(name) {
196
+ try {
197
+ const s = await stat(this.collectionPath(name));
198
+ return s.isDirectory();
199
+ } catch {
200
+ return false;
201
+ }
202
+ }
203
+ /**
204
+ * Update the embeddingDimension in collection_meta.json.
205
+ * Only writes if the current meta has no embeddingDimension set.
206
+ * Throws PARAMETER_ERROR if dimension conflicts with existing value.
207
+ */
208
+ async recordEmbeddingDimension(name, dimension) {
209
+ const meta = await this.load(name);
210
+ if (meta.embeddingDimension !== void 0) {
211
+ if (meta.embeddingDimension !== dimension) {
212
+ throw new XDBError(
213
+ PARAMETER_ERROR,
214
+ `Embedding dimension mismatch for collection "${name}": expected ${meta.embeddingDimension}, got ${dimension}. This usually means the embedding model has changed. Remove and recreate the collection to use a different model.`
215
+ );
216
+ }
217
+ return;
218
+ }
219
+ meta.embeddingDimension = dimension;
220
+ await writeFile(this.metaPath(name), JSON.stringify(meta, null, 2), "utf-8");
221
+ }
222
+ /** Calculate total size of all files in a directory (non-recursive for simplicity). */
223
+ async calcDirSize(dirPath) {
224
+ let total = 0;
225
+ try {
226
+ const entries = await readdir(dirPath);
227
+ for (const entry of entries) {
228
+ const entryPath = join(dirPath, entry);
229
+ const s = await stat(entryPath);
230
+ if (s.isFile()) {
231
+ total += s.size;
232
+ } else if (s.isDirectory()) {
233
+ total += await this.calcDirSize(entryPath);
234
+ }
235
+ }
236
+ } catch {
237
+ }
238
+ return total;
239
+ }
240
+ };
241
+
242
+ // src/policy-registry.ts
243
+ var BUILTIN_POLICIES = {
244
+ "hybrid/knowledge-base": {
245
+ main: "hybrid",
246
+ minor: "knowledge-base",
247
+ fields: { content: { findCaps: ["similar", "match"] } },
248
+ autoIndex: true
249
+ },
250
+ "relational/structured-logs": {
251
+ main: "relational",
252
+ minor: "structured-logs",
253
+ fields: {},
254
+ autoIndex: true
255
+ },
256
+ "relational/simple-kv": {
257
+ main: "relational",
258
+ minor: "simple-kv",
259
+ fields: {},
260
+ autoIndex: false
261
+ },
262
+ "vector/feature-store": {
263
+ main: "vector",
264
+ minor: "feature-store",
265
+ fields: { tensor: { findCaps: ["similar"] } },
266
+ autoIndex: false
267
+ }
268
+ };
269
+ var DEFAULT_MINORS = {
270
+ hybrid: "knowledge-base",
271
+ relational: "structured-logs",
272
+ vector: "feature-store"
273
+ };
274
+ var ALLOWED_CAPS = {
275
+ hybrid: /* @__PURE__ */ new Set(["similar", "match"]),
276
+ relational: /* @__PURE__ */ new Set(["match"]),
277
+ vector: /* @__PURE__ */ new Set(["similar"])
278
+ };
279
+ var PolicyRegistry = class {
280
+ /**
281
+ * Resolve a policy string like "hybrid/knowledge-base" or just "hybrid"
282
+ * into a full PolicyConfig. Optionally deep-merge params overrides.
283
+ */
284
+ resolve(policyStr, params) {
285
+ let fullName = policyStr;
286
+ if (!policyStr.includes("/")) {
287
+ const defaultMinor = DEFAULT_MINORS[policyStr];
288
+ if (!defaultMinor) {
289
+ const available = Object.keys(BUILTIN_POLICIES).join(", ");
290
+ throw new XDBError(PARAMETER_ERROR, `Unknown policy "${policyStr}". Available policies: ${available}`);
291
+ }
292
+ fullName = `${policyStr}/${defaultMinor}`;
293
+ }
294
+ const builtin = BUILTIN_POLICIES[fullName];
295
+ if (!builtin) {
296
+ const available = Object.keys(BUILTIN_POLICIES).join(", ");
297
+ throw new XDBError(PARAMETER_ERROR, `Unknown policy "${fullName}". Available policies: ${available}`);
298
+ }
299
+ const config2 = {
300
+ main: builtin.main,
301
+ minor: builtin.minor,
302
+ fields: deepCloneFields(builtin.fields),
303
+ ...builtin.autoIndex !== void 0 ? { autoIndex: builtin.autoIndex } : {}
304
+ };
305
+ if (params) {
306
+ if (params.fields && typeof params.fields === "object") {
307
+ const paramFields = params.fields;
308
+ for (const [fieldName, fieldConfig] of Object.entries(paramFields)) {
309
+ config2.fields[fieldName] = { findCaps: [...fieldConfig.findCaps] };
310
+ }
311
+ }
312
+ if (params.autoIndex !== void 0) {
313
+ config2.autoIndex = params.autoIndex;
314
+ }
315
+ }
316
+ return config2;
317
+ }
318
+ /**
319
+ * Validate that a PolicyConfig's findCaps are compatible with its main engine type.
320
+ * Throws XDBError(PARAMETER_ERROR) if validation fails.
321
+ */
322
+ validate(config2) {
323
+ const allowed = ALLOWED_CAPS[config2.main];
324
+ if (!allowed) {
325
+ throw new XDBError(PARAMETER_ERROR, `Unknown main engine type "${config2.main}"`);
326
+ }
327
+ for (const [fieldName, fieldConfig] of Object.entries(config2.fields)) {
328
+ for (const cap of fieldConfig.findCaps) {
329
+ if (!allowed.has(cap)) {
330
+ throw new XDBError(
331
+ PARAMETER_ERROR,
332
+ `findCaps "${cap}" is not compatible with engine type "${config2.main}" (field: "${fieldName}")`
333
+ );
334
+ }
335
+ }
336
+ }
337
+ }
338
+ /** List all available built-in policies. */
339
+ listPolicies() {
340
+ return Object.values(BUILTIN_POLICIES).map((p) => ({
341
+ main: p.main,
342
+ minor: p.minor,
343
+ fields: deepCloneFields(p.fields),
344
+ ...p.autoIndex !== void 0 ? { autoIndex: p.autoIndex } : {}
345
+ }));
346
+ }
347
+ };
348
+ function deepCloneFields(fields) {
349
+ const result = {};
350
+ for (const [key, value] of Object.entries(fields)) {
351
+ result[key] = { findCaps: [...value.findCaps] };
352
+ }
353
+ return result;
354
+ }
355
+
356
+ // src/commands/col.ts
357
+ function getDataRoot() {
358
+ return join2(homedir(), ".local", "share", "xdb");
359
+ }
360
+ function formatBytes(bytes) {
361
+ if (bytes < 1024) return `${bytes} B`;
362
+ if (bytes < 1024 * 1024) return `${(bytes / 1024).toFixed(1)} KB`;
363
+ return `${(bytes / (1024 * 1024)).toFixed(1)} MB`;
364
+ }
365
+ function registerColCommands(col2) {
366
+ col2.command("init <name>").description("Initialize a new collection").requiredOption("--policy <policy>", "Policy name (main/minor format)").option("--params <json>", "Custom parameters as JSON to override policy defaults").action(async (name, opts) => {
367
+ try {
368
+ const registry = new PolicyRegistry();
369
+ let params;
370
+ if (opts.params) {
371
+ try {
372
+ params = JSON.parse(opts.params);
373
+ } catch {
374
+ throw new XDBError(PARAMETER_ERROR, `Invalid JSON for --params: ${opts.params}`);
375
+ }
376
+ }
377
+ const config2 = registry.resolve(opts.policy, params);
378
+ registry.validate(config2);
379
+ const manager = new CollectionManager(getDataRoot());
380
+ await manager.init(name, config2);
381
+ process.stderr.write(`Collection "${name}" created (policy: ${config2.main}/${config2.minor})
382
+ `);
383
+ } catch (err) {
384
+ handleError(err);
385
+ }
386
+ });
387
+ col2.command("list").description("List all collections").option("--json", "Output as JSON array").action(async (opts) => {
388
+ try {
389
+ const manager = new CollectionManager(getDataRoot());
390
+ const collections = await manager.list();
391
+ if (opts.json) {
392
+ process.stdout.write(JSON.stringify(collections) + "\n");
393
+ return;
394
+ }
395
+ if (collections.length === 0) {
396
+ process.stderr.write("No collections found.\n");
397
+ return;
398
+ }
399
+ for (const info of collections) {
400
+ const dim = info.embeddingDimension ? `, dim=${info.embeddingDimension}` : "";
401
+ process.stdout.write(
402
+ `${info.name} policy=${info.policy} records=${info.recordCount} size=${formatBytes(info.sizeBytes)}${dim}
403
+ `
404
+ );
405
+ }
406
+ } catch (err) {
407
+ handleError(err);
408
+ }
409
+ });
410
+ col2.command("info <name>").description("Show detailed information about a collection").option("--json", "Output as JSON").action(async (name, opts) => {
411
+ try {
412
+ const manager = new CollectionManager(getDataRoot());
413
+ const meta = await manager.load(name);
414
+ const collections = await manager.list();
415
+ const stats = collections.find((c) => c.name === name);
416
+ const info = {
417
+ name: meta.name,
418
+ createdAt: meta.createdAt,
419
+ policy: `${meta.policy.main}/${meta.policy.minor}`,
420
+ engines: meta.policy.main,
421
+ autoIndex: meta.policy.autoIndex ?? false,
422
+ fields: meta.policy.fields,
423
+ embeddingDimension: meta.embeddingDimension ?? null,
424
+ recordCount: stats?.recordCount ?? 0,
425
+ sizeBytes: stats?.sizeBytes ?? 0,
426
+ path: join2(getDataRoot(), "collections", name)
427
+ };
428
+ if (opts.json) {
429
+ process.stdout.write(JSON.stringify(info) + "\n");
430
+ return;
431
+ }
432
+ process.stdout.write(`name: ${info.name}
433
+ `);
434
+ process.stdout.write(`createdAt: ${info.createdAt}
435
+ `);
436
+ process.stdout.write(`path: ${info.path}
437
+ `);
438
+ process.stdout.write(`policy: ${info.policy}
439
+ `);
440
+ process.stdout.write(`engines: ${info.engines}
441
+ `);
442
+ process.stdout.write(`autoIndex: ${info.autoIndex}
443
+ `);
444
+ process.stdout.write(`records: ${info.recordCount}
445
+ `);
446
+ process.stdout.write(`size: ${formatBytes(info.sizeBytes)}
447
+ `);
448
+ if (info.embeddingDimension) {
449
+ process.stdout.write(`embedDim: ${info.embeddingDimension}
450
+ `);
451
+ }
452
+ const fieldNames = Object.keys(info.fields);
453
+ if (fieldNames.length > 0) {
454
+ process.stdout.write(`fields:
455
+ `);
456
+ for (const [f, cfg] of Object.entries(info.fields)) {
457
+ process.stdout.write(` ${f} findCaps=[${cfg.findCaps.join(", ")}]
458
+ `);
459
+ }
460
+ } else {
461
+ process.stdout.write(`fields: (none)
462
+ `);
463
+ }
464
+ } catch (err) {
465
+ handleError(err);
466
+ }
467
+ });
468
+ col2.command("rm <name>").description("Remove a collection").action(async (name) => {
469
+ try {
470
+ const manager = new CollectionManager(getDataRoot());
471
+ await manager.remove(name);
472
+ process.stderr.write(`Collection "${name}" removed.
473
+ `);
474
+ } catch (err) {
475
+ handleError(err);
476
+ }
477
+ });
478
+ }
479
+
480
+ // src/commands/policy.ts
481
+ var ENGINE_DESC = {
482
+ hybrid: "LanceDB + SQLite",
483
+ relational: "SQLite",
484
+ vector: "LanceDB"
485
+ };
486
+ function registerPolicyCommands(policy2) {
487
+ policy2.command("list").description("List all available built-in policies").option("--json", "Output as JSON array").action((opts) => {
488
+ process.stderr.write("[Deprecated] xdb policy list is deprecated. Use 'xdb config' instead.\n");
489
+ try {
490
+ const registry = new PolicyRegistry();
491
+ const policies = registry.listPolicies();
492
+ if (opts.json) {
493
+ process.stdout.write(JSON.stringify(policies) + "\n");
494
+ return;
495
+ }
496
+ for (const p of policies) {
497
+ const name = `${p.main}/${p.minor}`;
498
+ const engines = ENGINE_DESC[p.main] ?? p.main;
499
+ const fieldNames = Object.keys(p.fields);
500
+ const fieldsStr = fieldNames.length > 0 ? fieldNames.map((f) => {
501
+ const caps = p.fields[f].findCaps.join(", ");
502
+ return `${f} [${caps}]`;
503
+ }).join("; ") : "(none)";
504
+ const autoIdx = p.autoIndex ? "yes" : "no";
505
+ process.stdout.write(`${name}
506
+ `);
507
+ process.stdout.write(` engines: ${engines}
508
+ `);
509
+ process.stdout.write(` fields: ${fieldsStr}
510
+ `);
511
+ process.stdout.write(` autoIndex: ${autoIdx}
512
+ `);
513
+ }
514
+ } catch (err) {
515
+ handleError(err);
516
+ }
517
+ });
518
+ }
519
+
520
+ // src/commands/put.ts
521
+ import { homedir as homedir3 } from "os";
522
+ import { join as join6 } from "path";
523
+
524
+ // src/embedding-client.ts
525
+ var PROVIDER_DEFAULT_BASE_URLS = {
526
+ openai: "https://api.openai.com"
527
+ };
528
+ var EmbeddingClient = class _EmbeddingClient {
529
+ endpoint;
530
+ apiKey;
531
+ model;
532
+ isAzure;
533
+ constructor(config2) {
534
+ this.apiKey = config2.apiKey;
535
+ this.model = config2.model;
536
+ const apiType = config2.api ?? config2.provider;
537
+ this.isAzure = apiType === "azure-openai-responses" || apiType === "azure-openai";
538
+ this.endpoint = this.isAzure ? _EmbeddingClient.resolveAzureEndpoint(config2.baseUrl, config2.model, config2.providerOptions) : _EmbeddingClient.resolveEndpoint(config2.provider, config2.baseUrl);
539
+ }
540
+ /**
541
+ * Resolve the full embeddings API endpoint URL.
542
+ * If baseUrl is provided, use it; otherwise fall back to the provider default.
543
+ */
544
+ static resolveEndpoint(provider, baseUrl) {
545
+ const base = baseUrl ?? (Object.hasOwn(PROVIDER_DEFAULT_BASE_URLS, provider) ? PROVIDER_DEFAULT_BASE_URLS[provider] : void 0);
546
+ if (!base) {
547
+ throw new XDBError(
548
+ PARAMETER_ERROR,
549
+ `No base URL configured for provider "${provider}". Please specify a baseUrl.`
550
+ );
551
+ }
552
+ return `${base.replace(/\/+$/, "")}/v1/embeddings`;
553
+ }
554
+ /**
555
+ * Resolve the Azure OpenAI embeddings endpoint URL.
556
+ * Azure format: {baseUrl}/openai/deployments/{deployment}/embeddings?api-version={version}
557
+ */
558
+ static resolveAzureEndpoint(baseUrl, model, providerOptions) {
559
+ if (!baseUrl) {
560
+ throw new XDBError(
561
+ PARAMETER_ERROR,
562
+ "Azure OpenAI requires a baseUrl. Please specify a baseUrl."
563
+ );
564
+ }
565
+ const deployment = model ?? providerOptions?.azureDeploymentName;
566
+ if (!deployment) {
567
+ throw new XDBError(
568
+ PARAMETER_ERROR,
569
+ "Azure OpenAI requires a deployment name. Specify a model or set providerOptions.azureDeploymentName."
570
+ );
571
+ }
572
+ const apiVersion = providerOptions?.azureApiVersion;
573
+ const resolvedVersion = apiVersion && /^\d{4}-\d{2}-\d{2}/.test(apiVersion) ? apiVersion : "2024-06-01";
574
+ const resourceBase = baseUrl.replace(/\/openai\/v1\/?$/, "").replace(/\/+$/, "");
575
+ return `${resourceBase}/openai/deployments/${deployment}/embeddings?api-version=${resolvedVersion}`;
576
+ }
577
+ /**
578
+ * Call the embedding API for the given texts.
579
+ */
580
+ async embed(request) {
581
+ const body = JSON.stringify({
582
+ model: request.model,
583
+ input: request.texts
584
+ });
585
+ let response;
586
+ try {
587
+ const headers = {
588
+ "Content-Type": "application/json"
589
+ };
590
+ if (this.isAzure) {
591
+ headers["api-key"] = this.apiKey;
592
+ } else {
593
+ headers["Authorization"] = `Bearer ${this.apiKey}`;
594
+ }
595
+ response = await fetch(this.endpoint, {
596
+ method: "POST",
597
+ headers,
598
+ body
599
+ });
600
+ } catch (err) {
601
+ const message = err instanceof Error ? err.message : String(err);
602
+ throw new XDBError(
603
+ RUNTIME_ERROR,
604
+ `Network error calling embedding API: ${message}`
605
+ );
606
+ }
607
+ if (!response.ok) {
608
+ let detail = "";
609
+ try {
610
+ const errorBody = await response.text();
611
+ detail = errorBody;
612
+ } catch {
613
+ }
614
+ throw new XDBError(
615
+ RUNTIME_ERROR,
616
+ `Embedding API error (${response.status}): ${detail || response.statusText}`
617
+ );
618
+ }
619
+ const json = await response.json();
620
+ const sorted = [...json.data].sort((a, b) => a.index - b.index);
621
+ return {
622
+ embeddings: sorted.map((d) => d.embedding),
623
+ model: json.model,
624
+ usage: {
625
+ promptTokens: json.usage.prompt_tokens,
626
+ totalTokens: json.usage.total_tokens
627
+ }
628
+ };
629
+ }
630
+ };
631
+
632
+ // src/config-manager.ts
633
+ import * as fs from "fs/promises";
634
+ import * as path from "path";
635
+ import * as os from "os";
636
+ var DEFAULT_CONFIG_PATH = path.join(os.homedir(), ".config", "xdb", "default.json");
637
+ var PAI_CONFIG_PATH = path.join(os.homedir(), ".config", "pai", "default.json");
638
+ var EMPTY_CONFIG = {
639
+ providers: []
640
+ };
641
+ var XdbConfigManager = class {
642
+ configPath;
643
+ paiConfigPath;
644
+ constructor(configPath, paiConfigPath) {
645
+ this.configPath = configPath ?? DEFAULT_CONFIG_PATH;
646
+ this.paiConfigPath = paiConfigPath ?? PAI_CONFIG_PATH;
647
+ }
648
+ async load() {
649
+ let raw;
650
+ try {
651
+ raw = await fs.readFile(this.configPath, "utf-8");
652
+ } catch (err) {
653
+ if (err.code === "ENOENT") {
654
+ return { ...EMPTY_CONFIG, providers: [] };
655
+ }
656
+ throw new XDBError(RUNTIME_ERROR, `Failed to read config file: ${err.message}`);
657
+ }
658
+ try {
659
+ const parsed = JSON.parse(raw);
660
+ if (!Array.isArray(parsed.providers)) {
661
+ parsed.providers = [];
662
+ }
663
+ return parsed;
664
+ } catch {
665
+ throw new XDBError(RUNTIME_ERROR, `Config file contains invalid JSON: ${this.configPath}`);
666
+ }
667
+ }
668
+ async save(config2) {
669
+ const dir = path.dirname(this.configPath);
670
+ try {
671
+ await fs.mkdir(dir, { recursive: true });
672
+ } catch (err) {
673
+ throw new XDBError(RUNTIME_ERROR, `Failed to create config directory: ${err.message}`);
674
+ }
675
+ try {
676
+ await fs.writeFile(this.configPath, JSON.stringify(config2, null, 2), "utf-8");
677
+ } catch (err) {
678
+ throw new XDBError(RUNTIME_ERROR, `Failed to write config file: ${err.message}`);
679
+ }
680
+ }
681
+ /**
682
+ * Resolve API key for a provider.
683
+ * Priority: XDB_<PROVIDER>_API_KEY env var > config file apiKey
684
+ */
685
+ async resolveApiKey(providerName) {
686
+ const envVarName = `XDB_${providerName.toUpperCase().replace(/-/g, "_")}_API_KEY`;
687
+ const envValue = process.env[envVarName];
688
+ if (envValue) {
689
+ return envValue;
690
+ }
691
+ const config2 = await this.load();
692
+ const providerConfig = config2.providers.find((p) => p.name === providerName);
693
+ if (providerConfig?.apiKey) {
694
+ return providerConfig.apiKey;
695
+ }
696
+ throw new XDBError(
697
+ PARAMETER_ERROR,
698
+ `No API key found for provider "${providerName}". Set the ${envVarName} environment variable or run: xdb config embed --set-key <apiKey>`
699
+ );
700
+ }
701
+ /**
702
+ * Try to load pai's config as an embed fallback.
703
+ * Returns null if pai config doesn't exist or has no embed settings.
704
+ */
705
+ async loadPaiFallback() {
706
+ let raw;
707
+ try {
708
+ raw = await fs.readFile(this.paiConfigPath, "utf-8");
709
+ } catch {
710
+ return null;
711
+ }
712
+ let pai;
713
+ try {
714
+ pai = JSON.parse(raw);
715
+ } catch {
716
+ return null;
717
+ }
718
+ const provider = pai.defaultEmbedProvider;
719
+ const model = pai.defaultEmbedModel;
720
+ if (!provider || !model) return null;
721
+ const paiProvider = pai.providers?.find((p) => p.name === provider);
722
+ if (!paiProvider) return null;
723
+ if (!paiProvider.apiKey) return null;
724
+ const providerConfig = {
725
+ name: paiProvider.name,
726
+ apiKey: paiProvider.apiKey,
727
+ ...paiProvider.baseUrl !== void 0 && { baseUrl: paiProvider.baseUrl },
728
+ ...paiProvider.api !== void 0 && { api: paiProvider.api },
729
+ ...paiProvider.providerOptions !== void 0 && { providerOptions: paiProvider.providerOptions }
730
+ };
731
+ return { provider, model, providerConfig, apiKey: paiProvider.apiKey };
732
+ }
733
+ /**
734
+ * Resolve the current embed configuration (provider + model + providerConfig + apiKey).
735
+ * Priority:
736
+ * 1. xdb's own config (~/.config/xdb/default.json or XDB_* env vars)
737
+ * 2. pai's config (~/.config/pai/default.json) as fallback
738
+ * Throws XDBError(PARAMETER_ERROR) if neither source has embed config.
739
+ */
740
+ async resolveEmbedConfig() {
741
+ const config2 = await this.load();
742
+ const provider = config2.defaultEmbedProvider;
743
+ const model = config2.defaultEmbedModel;
744
+ if (provider && model) {
745
+ const providerConfig = config2.providers.find((p) => p.name === provider) ?? { name: provider };
746
+ const apiKey = await this.resolveApiKey(provider);
747
+ return { provider, model, providerConfig, apiKey };
748
+ }
749
+ const paiFallback = await this.loadPaiFallback();
750
+ if (paiFallback) {
751
+ return paiFallback;
752
+ }
753
+ throw new XDBError(
754
+ PARAMETER_ERROR,
755
+ "No embed provider configured. Run: xdb config embed --set-provider <name>\nOr configure pai embed settings: pai model default --embed-provider <name> --embed-model <model>"
756
+ );
757
+ }
758
+ };
759
+
760
+ // src/embedder.ts
761
+ var Embedder = class {
762
+ configManager;
763
+ constructor(configManager) {
764
+ this.configManager = configManager ?? new XdbConfigManager();
765
+ }
766
+ /**
767
+ * Embed a single text string into a vector.
768
+ * Returns number[] directly (no hex encoding).
769
+ */
770
+ async embed(text) {
771
+ const { model, providerConfig, apiKey } = await this.configManager.resolveEmbedConfig();
772
+ const client = new EmbeddingClient({
773
+ provider: providerConfig.name,
774
+ apiKey,
775
+ model,
776
+ ...providerConfig.baseUrl !== void 0 ? { baseUrl: providerConfig.baseUrl } : {},
777
+ ...providerConfig.api !== void 0 ? { api: providerConfig.api } : {},
778
+ ...providerConfig.providerOptions !== void 0 ? { providerOptions: providerConfig.providerOptions } : {}
779
+ });
780
+ try {
781
+ const response = await client.embed({ texts: [text], model });
782
+ return response.embeddings[0];
783
+ } catch (err) {
784
+ if (err instanceof XDBError) throw err;
785
+ throw new XDBError(RUNTIME_ERROR, `Embedding failed: ${err instanceof Error ? err.message : String(err)}`);
786
+ }
787
+ }
788
+ /**
789
+ * Embed multiple texts in a single batch call.
790
+ * Returns number[][] directly (no hex encoding).
791
+ */
792
+ async embedBatch(texts) {
793
+ const { model, providerConfig, apiKey } = await this.configManager.resolveEmbedConfig();
794
+ const client = new EmbeddingClient({
795
+ provider: providerConfig.name,
796
+ apiKey,
797
+ model,
798
+ ...providerConfig.baseUrl !== void 0 ? { baseUrl: providerConfig.baseUrl } : {},
799
+ ...providerConfig.api !== void 0 ? { api: providerConfig.api } : {},
800
+ ...providerConfig.providerOptions !== void 0 ? { providerOptions: providerConfig.providerOptions } : {}
801
+ });
802
+ try {
803
+ const response = await client.embed({ texts, model });
804
+ return response.embeddings;
805
+ } catch (err) {
806
+ if (err instanceof XDBError) throw err;
807
+ throw new XDBError(RUNTIME_ERROR, `Embedding failed: ${err instanceof Error ? err.message : String(err)}`);
808
+ }
809
+ }
810
+ };
811
+
812
+ // src/data-writer.ts
813
+ import { v4 as uuidv4 } from "uuid";
814
+ var DataWriter = class {
815
+ constructor(policy2, embedder, lanceEngine, sqliteEngine, onEmbeddingDimension) {
816
+ this.policy = policy2;
817
+ this.embedder = embedder;
818
+ this.lanceEngine = lanceEngine;
819
+ this.sqliteEngine = sqliteEngine;
820
+ this.onEmbeddingDimension = onEmbeddingDimension;
821
+ }
822
+ dimensionRecorded = false;
823
+ /** Report embedding dimension on first encounter */
824
+ async reportDimension(vector) {
825
+ if (this.dimensionRecorded || !this.onEmbeddingDimension) return;
826
+ this.dimensionRecorded = true;
827
+ await this.onEmbeddingDimension(vector.length);
828
+ }
829
+ /**
830
+ * Write a single record. Auto-generates UUID if no `id` field present.
831
+ * Routes data to engines based on Policy findCaps configuration.
832
+ */
833
+ async write(record) {
834
+ this.validateRecord(record);
835
+ if (record.id === void 0 || record.id === null) {
836
+ record = { ...record, id: uuidv4() };
837
+ }
838
+ const result = { inserted: 0, updated: 0, errors: 0 };
839
+ const hasSimilarFields = this.getSimilarFields().length > 0;
840
+ const needsSqlite3 = this.needsSqliteWrite();
841
+ if (hasSimilarFields && this.lanceEngine) {
842
+ const vectorRecord = await this.buildVectorRecord(record);
843
+ const lanceResult = await this.lanceEngine.upsert([vectorRecord]);
844
+ result.inserted += lanceResult.inserted;
845
+ result.updated += lanceResult.updated;
846
+ }
847
+ if (needsSqlite3 && this.sqliteEngine) {
848
+ const sqliteResult = this.sqliteEngine.upsert([record]);
849
+ if (!hasSimilarFields || !this.lanceEngine) {
850
+ result.inserted += sqliteResult.inserted;
851
+ result.updated += sqliteResult.updated;
852
+ }
853
+ }
854
+ if ((!hasSimilarFields || !this.lanceEngine) && (!needsSqlite3 || !this.sqliteEngine)) {
855
+ result.inserted = 1;
856
+ }
857
+ return result;
858
+ }
859
+ /**
860
+ * Batch write with transaction optimization and error tolerance.
861
+ * Failed records are skipped with a warning to stderr.
862
+ */
863
+ async writeBatch(records) {
864
+ const result = { inserted: 0, updated: 0, errors: 0 };
865
+ const prepared = [];
866
+ const validIndices = [];
867
+ for (let i = 0; i < records.length; i++) {
868
+ try {
869
+ this.validateRecord(records[i]);
870
+ let rec = records[i];
871
+ if (rec.id === void 0 || rec.id === null) {
872
+ rec = { ...rec, id: uuidv4() };
873
+ }
874
+ prepared.push(rec);
875
+ validIndices.push(i);
876
+ } catch (err) {
877
+ const msg = err instanceof Error ? err.message : String(err);
878
+ process.stderr.write(`Warning: Line ${i + 1}: ${msg}
879
+ `);
880
+ result.errors++;
881
+ }
882
+ }
883
+ if (prepared.length === 0) {
884
+ return result;
885
+ }
886
+ const hasSimilarFields = this.getSimilarFields().length > 0;
887
+ const needsSqlite3 = this.needsSqliteWrite();
888
+ if (hasSimilarFields && this.lanceEngine) {
889
+ const vectorRecords = [];
890
+ const similarFields = this.getSimilarFields();
891
+ const textsPerField = /* @__PURE__ */ new Map();
892
+ for (const field of similarFields) {
893
+ textsPerField.set(field, prepared.map((r) => String(r[field] ?? "")));
894
+ }
895
+ const vectorsPerField = /* @__PURE__ */ new Map();
896
+ for (const [field, texts] of textsPerField) {
897
+ const vectors = await this.embedder.embedBatch(texts);
898
+ if (vectors.length > 0) {
899
+ await this.reportDimension(vectors[0]);
900
+ }
901
+ vectorsPerField.set(field, vectors);
902
+ }
903
+ for (let i = 0; i < prepared.length; i++) {
904
+ const rec = { ...prepared[i] };
905
+ for (const [field, vectors] of vectorsPerField) {
906
+ rec[`${field}_vector`] = new Float32Array(vectors[i]);
907
+ }
908
+ vectorRecords.push(rec);
909
+ }
910
+ try {
911
+ const lanceResult = await this.lanceEngine.upsert(vectorRecords);
912
+ result.inserted += lanceResult.inserted;
913
+ result.updated += lanceResult.updated;
914
+ } catch (err) {
915
+ const msg = err instanceof Error ? err.message : String(err);
916
+ process.stderr.write(`Warning: LanceDB batch write failed: ${msg}
917
+ `);
918
+ result.errors += prepared.length;
919
+ if (needsSqlite3 && this.sqliteEngine) {
920
+ const sqliteResult = this.sqliteEngine.batchUpsert(prepared);
921
+ void sqliteResult;
922
+ }
923
+ return result;
924
+ }
925
+ }
926
+ if (needsSqlite3 && this.sqliteEngine) {
927
+ const sqliteResult = this.sqliteEngine.batchUpsert(prepared);
928
+ if (!hasSimilarFields || !this.lanceEngine) {
929
+ result.inserted += sqliteResult.inserted;
930
+ result.updated += sqliteResult.updated;
931
+ }
932
+ result.errors += sqliteResult.errors;
933
+ }
934
+ return result;
935
+ }
936
+ /** Get field names that have 'similar' findCaps */
937
+ getSimilarFields() {
938
+ return Object.entries(this.policy.fields).filter(([, cfg]) => cfg.findCaps.includes("similar")).map(([name]) => name);
939
+ }
940
+ /** Check if SQLite write is needed based on policy */
941
+ needsSqliteWrite() {
942
+ const hasMatchFields = Object.values(this.policy.fields).some((cfg) => cfg.findCaps.includes("match"));
943
+ return hasMatchFields || !!this.policy.autoIndex;
944
+ }
945
+ /** Validate that a record is a valid object */
946
+ validateRecord(record) {
947
+ if (record === null || record === void 0 || typeof record !== "object" || Array.isArray(record)) {
948
+ throw new XDBError(PARAMETER_ERROR, "Invalid input: expected a JSON object");
949
+ }
950
+ }
951
+ /** Build a record with vector fields for LanceDB */
952
+ async buildVectorRecord(record) {
953
+ const similarFields = this.getSimilarFields();
954
+ const vectorRecord = { ...record };
955
+ for (const field of similarFields) {
956
+ const text = String(record[field] ?? "");
957
+ const vector = await this.embedder.embed(text);
958
+ await this.reportDimension(vector);
959
+ vectorRecord[`${field}_vector`] = new Float32Array(vector);
960
+ }
961
+ return vectorRecord;
962
+ }
963
+ };
964
+
965
+ // src/engines/lancedb-engine.ts
966
+ import * as lancedb from "@lancedb/lancedb";
967
+ import { join as join4 } from "path";
968
+ var DEFAULT_TABLE_NAME = "data";
969
+ var LanceDBEngine = class _LanceDBEngine {
970
+ db;
971
+ table;
972
+ constructor(db, table) {
973
+ this.db = db;
974
+ this.table = table;
975
+ }
976
+ /**
977
+ * Open or create a LanceDB connection at `<collectionPath>/vector.lance/`.
978
+ * If the table does not exist, it will be created on first upsert.
979
+ */
980
+ static async open(collectionPath, tableName) {
981
+ const name = tableName ?? DEFAULT_TABLE_NAME;
982
+ try {
983
+ const dbPath = join4(collectionPath, "vector.lance");
984
+ const db = await lancedb.connect(dbPath);
985
+ let table;
986
+ const tableNames = await db.tableNames();
987
+ if (tableNames.includes(name)) {
988
+ table = await db.openTable(name);
989
+ } else {
990
+ return new LanceDBEngineDeferred(db, name);
991
+ }
992
+ return new _LanceDBEngine(db, table);
993
+ } catch (err) {
994
+ const msg = err instanceof Error ? err.message : String(err);
995
+ throw new XDBError(RUNTIME_ERROR, `Failed to open LanceDB: ${msg}`);
996
+ }
997
+ }
998
+ /**
999
+ * Write records containing vector fields, executing upsert (merge insert on "id").
1000
+ * Records should contain an `id` field and at least one vector field (array of numbers).
1001
+ */
1002
+ async upsert(records) {
1003
+ if (records.length === 0) {
1004
+ return { inserted: 0, updated: 0 };
1005
+ }
1006
+ try {
1007
+ const countBefore = await this.table.countRows();
1008
+ const arrowTable = _LanceDBEngine.toArrowTable(records);
1009
+ await this.table.mergeInsert("id").whenMatchedUpdateAll().whenNotMatchedInsertAll().execute(arrowTable);
1010
+ const countAfter = await this.table.countRows();
1011
+ const netNew = countAfter - countBefore;
1012
+ const updated = records.length - netNew;
1013
+ return { inserted: netNew, updated };
1014
+ } catch (err) {
1015
+ const msg = err instanceof Error ? err.message : String(err);
1016
+ throw new XDBError(RUNTIME_ERROR, `LanceDB upsert failed: ${msg}`);
1017
+ }
1018
+ }
1019
+ /**
1020
+ * Convert records with Float32Array fields into a proper Arrow Table.
1021
+ * Detects Float32Array fields, registers them as vectorColumns, and
1022
+ * uses makeArrowTable to create FixedSizeList<Float32> columns.
1023
+ * If no Float32Array fields are found, returns records as-is for default handling.
1024
+ */
1025
+ static toArrowTable(records) {
1026
+ const vectorColumnNames = /* @__PURE__ */ new Set();
1027
+ for (const rec of records) {
1028
+ for (const [key, value] of Object.entries(rec)) {
1029
+ if (value instanceof Float32Array) {
1030
+ vectorColumnNames.add(key);
1031
+ }
1032
+ }
1033
+ if (vectorColumnNames.size > 0) break;
1034
+ }
1035
+ if (vectorColumnNames.size === 0) {
1036
+ return records;
1037
+ }
1038
+ const vectorColumns = {};
1039
+ for (const name of vectorColumnNames) {
1040
+ vectorColumns[name] = new lancedb.VectorColumnOptions();
1041
+ }
1042
+ const converted = records.map((rec) => {
1043
+ const out = {};
1044
+ for (const [key, value] of Object.entries(rec)) {
1045
+ out[key] = value instanceof Float32Array ? Array.from(value) : value;
1046
+ }
1047
+ return out;
1048
+ });
1049
+ return lancedb.makeArrowTable(converted, { vectorColumns });
1050
+ }
1051
+ /**
1052
+ * Nearest neighbor vector search with optional pre-filter.
1053
+ */
1054
+ async vectorSearch(queryVector, options) {
1055
+ try {
1056
+ let query = this.table.vectorSearch(queryVector).column(options.column ?? "vector").limit(options.limit);
1057
+ if (options.filter) {
1058
+ query = query.where(options.filter);
1059
+ }
1060
+ const results = await query.toArray();
1061
+ return results.map((row) => {
1062
+ const data = {};
1063
+ for (const [key, value] of Object.entries(row)) {
1064
+ if (key === "_distance") continue;
1065
+ if (key.endsWith("_vector")) continue;
1066
+ data[key] = value;
1067
+ }
1068
+ const result = {
1069
+ data,
1070
+ _engine: "lancedb"
1071
+ };
1072
+ if (row._distance != null) {
1073
+ result._score = 1 - row._distance / 2;
1074
+ }
1075
+ return result;
1076
+ });
1077
+ } catch (err) {
1078
+ const msg = err instanceof Error ? err.message : String(err);
1079
+ throw new XDBError(RUNTIME_ERROR, `LanceDB vector search failed: ${msg}`);
1080
+ }
1081
+ }
1082
+ /**
1083
+ * Scalar filter query (no vector search).
1084
+ */
1085
+ async filterSearch(filter, limit) {
1086
+ try {
1087
+ const results = await this.table.query().where(filter).limit(limit).toArray();
1088
+ return results.map((row) => {
1089
+ const data = {};
1090
+ for (const [key, value] of Object.entries(row)) {
1091
+ if (key.endsWith("_vector")) continue;
1092
+ data[key] = value;
1093
+ }
1094
+ return {
1095
+ data,
1096
+ _engine: "lancedb"
1097
+ };
1098
+ });
1099
+ } catch (err) {
1100
+ const msg = err instanceof Error ? err.message : String(err);
1101
+ throw new XDBError(RUNTIME_ERROR, `LanceDB filter search failed: ${msg}`);
1102
+ }
1103
+ }
1104
+ /**
1105
+ * Count total rows in the table.
1106
+ */
1107
+ async countRows() {
1108
+ return this.table.countRows();
1109
+ }
1110
+ /**
1111
+ * Close the database connection.
1112
+ */
1113
+ async close() {
1114
+ this.table.close();
1115
+ this.db.close();
1116
+ }
1117
+ };
1118
+ var LanceDBEngineDeferred = class extends LanceDBEngine {
1119
+ deferredDb;
1120
+ tableName;
1121
+ initialized = false;
1122
+ constructor(db, tableName) {
1123
+ super(db, void 0);
1124
+ this.deferredDb = db;
1125
+ this.tableName = tableName;
1126
+ }
1127
+ async ensureTable(records) {
1128
+ if (this.initialized) {
1129
+ return this.table;
1130
+ }
1131
+ if (!records || records.length === 0) {
1132
+ throw new XDBError(RUNTIME_ERROR, "LanceDB table does not exist yet. Write data first.");
1133
+ }
1134
+ const data = LanceDBEngine.toArrowTable(records);
1135
+ const table = await this.deferredDb.createTable(this.tableName, data);
1136
+ this.table = table;
1137
+ this.initialized = true;
1138
+ return table;
1139
+ }
1140
+ async upsert(records) {
1141
+ if (records.length === 0) {
1142
+ return { inserted: 0, updated: 0 };
1143
+ }
1144
+ if (!this.initialized) {
1145
+ try {
1146
+ await this.ensureTable(records);
1147
+ return { inserted: records.length, updated: 0 };
1148
+ } catch (err) {
1149
+ const msg = err instanceof Error ? err.message : String(err);
1150
+ throw new XDBError(RUNTIME_ERROR, `LanceDB upsert failed: ${msg}`);
1151
+ }
1152
+ }
1153
+ return super.upsert(records);
1154
+ }
1155
+ async vectorSearch(queryVector, options) {
1156
+ if (!this.initialized) {
1157
+ return [];
1158
+ }
1159
+ return super.vectorSearch(queryVector, options);
1160
+ }
1161
+ async filterSearch(filter, limit) {
1162
+ if (!this.initialized) {
1163
+ return [];
1164
+ }
1165
+ return super.filterSearch(filter, limit);
1166
+ }
1167
+ async countRows() {
1168
+ if (!this.initialized) {
1169
+ return 0;
1170
+ }
1171
+ return super.countRows();
1172
+ }
1173
+ async close() {
1174
+ if (this.initialized) {
1175
+ this.table.close();
1176
+ }
1177
+ this.deferredDb.close();
1178
+ }
1179
+ };
1180
+
1181
+ // src/engines/sqlite-engine.ts
1182
+ import Database from "better-sqlite3";
1183
+ import { join as join5 } from "path";
1184
+ var SQLiteEngine = class _SQLiteEngine {
1185
+ db;
1186
+ hasFts = false;
1187
+ ftsFields = [];
1188
+ constructor(db) {
1189
+ this.db = db;
1190
+ }
1191
+ /**
1192
+ * Open or create a SQLite database at `<collectionPath>/relational.db`.
1193
+ */
1194
+ static open(collectionPath) {
1195
+ try {
1196
+ const dbPath = join5(collectionPath, "relational.db");
1197
+ const db = new Database(dbPath);
1198
+ db.pragma("journal_mode = WAL");
1199
+ return new _SQLiteEngine(db);
1200
+ } catch (err) {
1201
+ const msg = err instanceof Error ? err.message : String(err);
1202
+ throw new XDBError(RUNTIME_ERROR, `Failed to open SQLite database: ${msg}`);
1203
+ }
1204
+ }
1205
+ /**
1206
+ * Initialize table schema based on Policy configuration.
1207
+ * Creates the records table and, if the policy has fields with 'match' findCaps,
1208
+ * creates a standalone FTS5 virtual table for full-text search.
1209
+ */
1210
+ initSchema(policy2) {
1211
+ this.db.exec(`
1212
+ CREATE TABLE IF NOT EXISTS records (
1213
+ id TEXT PRIMARY KEY,
1214
+ data JSON NOT NULL
1215
+ );
1216
+ `);
1217
+ this.ftsFields = Object.entries(policy2.fields).filter(([, cfg]) => cfg.findCaps.includes("match")).map(([name]) => name);
1218
+ if (this.ftsFields.length > 0) {
1219
+ const columnDefs = this.ftsFields.join(", ");
1220
+ this.db.exec(`
1221
+ CREATE VIRTUAL TABLE IF NOT EXISTS records_fts USING fts5(
1222
+ id UNINDEXED,
1223
+ ${columnDefs}
1224
+ );
1225
+ `);
1226
+ this.hasFts = true;
1227
+ }
1228
+ }
1229
+ /**
1230
+ * Sync FTS index for a single record: delete old entry then insert new one.
1231
+ */
1232
+ syncFts(record) {
1233
+ if (!this.hasFts) return;
1234
+ const id = String(record.id);
1235
+ this.db.prepare("DELETE FROM records_fts WHERE id = ?").run(id);
1236
+ const cols = ["id", ...this.ftsFields];
1237
+ const placeholders = cols.map(() => "?").join(", ");
1238
+ const values = [id, ...this.ftsFields.map((f) => String(record[f] ?? ""))];
1239
+ this.db.prepare(`INSERT INTO records_fts (${cols.join(", ")}) VALUES (${placeholders})`).run(...values);
1240
+ }
1241
+ /**
1242
+ * Upsert records into the database. Each record must have an `id` field.
1243
+ * Uses INSERT OR REPLACE for upsert semantics.
1244
+ */
1245
+ upsert(records) {
1246
+ let inserted = 0;
1247
+ let updated = 0;
1248
+ const checkStmt = this.db.prepare("SELECT 1 FROM records WHERE id = ?");
1249
+ const upsertStmt = this.db.prepare(
1250
+ "INSERT OR REPLACE INTO records (id, data) VALUES (?, ?)"
1251
+ );
1252
+ const txn = this.db.transaction(() => {
1253
+ for (const record of records) {
1254
+ const id = String(record.id);
1255
+ const exists = checkStmt.get(id);
1256
+ upsertStmt.run(id, JSON.stringify(record));
1257
+ this.syncFts(record);
1258
+ if (exists) {
1259
+ updated++;
1260
+ } else {
1261
+ inserted++;
1262
+ }
1263
+ }
1264
+ });
1265
+ txn();
1266
+ return { inserted, updated };
1267
+ }
1268
+ /**
1269
+ * Batch upsert with error tolerance. Wraps in a transaction.
1270
+ * Individual record failures are counted but don't abort the batch.
1271
+ */
1272
+ batchUpsert(records) {
1273
+ let inserted = 0;
1274
+ let updated = 0;
1275
+ let errors = 0;
1276
+ const checkStmt = this.db.prepare("SELECT 1 FROM records WHERE id = ?");
1277
+ const upsertStmt = this.db.prepare(
1278
+ "INSERT OR REPLACE INTO records (id, data) VALUES (?, ?)"
1279
+ );
1280
+ const txn = this.db.transaction(() => {
1281
+ for (const record of records) {
1282
+ try {
1283
+ const id = String(record.id);
1284
+ const exists = checkStmt.get(id);
1285
+ upsertStmt.run(id, JSON.stringify(record));
1286
+ this.syncFts(record);
1287
+ if (exists) {
1288
+ updated++;
1289
+ } else {
1290
+ inserted++;
1291
+ }
1292
+ } catch {
1293
+ errors++;
1294
+ }
1295
+ }
1296
+ });
1297
+ txn();
1298
+ return { inserted, updated, errors };
1299
+ }
1300
+ /**
1301
+ * Full-text search using FTS5.
1302
+ */
1303
+ ftsSearch(query, limit) {
1304
+ if (!this.hasFts) {
1305
+ return [];
1306
+ }
1307
+ const stmt = this.db.prepare(`
1308
+ SELECT r.data, fts.rank
1309
+ FROM records_fts fts
1310
+ JOIN records r ON r.id = fts.id
1311
+ WHERE records_fts MATCH ?
1312
+ ORDER BY fts.rank
1313
+ LIMIT ?
1314
+ `);
1315
+ const rows = stmt.all(query, limit);
1316
+ return rows.map((row) => ({
1317
+ data: JSON.parse(row.data),
1318
+ _score: -row.rank,
1319
+ // FTS5 rank is negative; negate for positive score
1320
+ _engine: "sqlite"
1321
+ }));
1322
+ }
1323
+ /**
1324
+ * Condition-based filtering using a WHERE clause applied to JSON data.
1325
+ */
1326
+ whereSearch(filter, limit) {
1327
+ try {
1328
+ const stmt = this.db.prepare(`
1329
+ SELECT data FROM records WHERE ${filter} LIMIT ?
1330
+ `);
1331
+ const rows = stmt.all(limit);
1332
+ return rows.map((row) => ({
1333
+ data: JSON.parse(row.data),
1334
+ _engine: "sqlite"
1335
+ }));
1336
+ } catch (err) {
1337
+ const msg = err instanceof Error ? err.message : String(err);
1338
+ throw new XDBError(RUNTIME_ERROR, `WHERE filter error: ${msg}`);
1339
+ }
1340
+ }
1341
+ /**
1342
+ * Combined FTS + WHERE search.
1343
+ */
1344
+ ftsWhereSearch(query, filter, limit) {
1345
+ if (!this.hasFts) {
1346
+ return [];
1347
+ }
1348
+ try {
1349
+ const stmt = this.db.prepare(`
1350
+ SELECT r.data, fts.rank
1351
+ FROM records_fts fts
1352
+ JOIN records r ON r.id = fts.id
1353
+ WHERE records_fts MATCH ? AND ${filter}
1354
+ ORDER BY fts.rank
1355
+ LIMIT ?
1356
+ `);
1357
+ const rows = stmt.all(query, limit);
1358
+ return rows.map((row) => ({
1359
+ data: JSON.parse(row.data),
1360
+ _score: -row.rank,
1361
+ _engine: "sqlite"
1362
+ }));
1363
+ } catch (err) {
1364
+ const msg = err instanceof Error ? err.message : String(err);
1365
+ throw new XDBError(RUNTIME_ERROR, `FTS+WHERE filter error: ${msg}`);
1366
+ }
1367
+ }
1368
+ /**
1369
+ * Count total rows in the records table.
1370
+ */
1371
+ countRows() {
1372
+ const row = this.db.prepare("SELECT COUNT(*) as cnt FROM records").get();
1373
+ return row.cnt;
1374
+ }
1375
+ /**
1376
+ * Close the database connection.
1377
+ */
1378
+ close() {
1379
+ this.db.close();
1380
+ }
1381
+ };
1382
+
1383
+ // src/commands/put.ts
1384
+ function getDataRoot2() {
1385
+ return join6(homedir3(), ".local", "share", "xdb");
1386
+ }
1387
+ function readStdin() {
1388
+ return new Promise((resolve, reject) => {
1389
+ if (process.stdin.isTTY) {
1390
+ resolve("");
1391
+ return;
1392
+ }
1393
+ const chunks = [];
1394
+ process.stdin.on("data", (chunk) => chunks.push(chunk));
1395
+ process.stdin.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
1396
+ process.stdin.on("error", reject);
1397
+ });
1398
+ }
1399
+ function parseJsonl(input) {
1400
+ const lines = input.split("\n").filter((l) => l.trim().length > 0);
1401
+ return lines.map((line, i) => {
1402
+ try {
1403
+ const parsed = JSON.parse(line);
1404
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
1405
+ throw new Error("Expected a JSON object");
1406
+ }
1407
+ return parsed;
1408
+ } catch (err) {
1409
+ const msg = err instanceof Error ? err.message : String(err);
1410
+ throw new XDBError(PARAMETER_ERROR, `Invalid JSON at line ${i + 1}: ${msg}`);
1411
+ }
1412
+ });
1413
+ }
1414
+ function needsLance(policy2) {
1415
+ return policy2.main === "hybrid" || policy2.main === "vector";
1416
+ }
1417
+ function needsSqlite(policy2) {
1418
+ return policy2.main === "hybrid" || policy2.main === "relational";
1419
+ }
1420
+ function registerPutCommand(program2) {
1421
+ program2.command("put <collection> [json]").description("Write data to a collection").option("--batch", "Enable batch write mode for JSONL stdin input").option("--json", "Output stats as JSON (batch mode)").action(async (collection, json, opts) => {
1422
+ try {
1423
+ await executePut(getDataRoot2(), collection, json, !!opts.batch, !!opts.json);
1424
+ } catch (err) {
1425
+ handleError(err);
1426
+ }
1427
+ });
1428
+ }
1429
+ async function executePut(dataRoot, collection, json, batch, jsonOutput = false) {
1430
+ const manager = new CollectionManager(dataRoot);
1431
+ const meta = await manager.load(collection);
1432
+ const policy2 = meta.policy;
1433
+ const colPath = join6(dataRoot, "collections", collection);
1434
+ let lanceEngine;
1435
+ let sqliteEngine;
1436
+ try {
1437
+ if (needsLance(policy2)) {
1438
+ lanceEngine = await LanceDBEngine.open(colPath);
1439
+ }
1440
+ if (needsSqlite(policy2)) {
1441
+ sqliteEngine = SQLiteEngine.open(colPath);
1442
+ sqliteEngine.initSchema(policy2);
1443
+ }
1444
+ const embedder = new Embedder();
1445
+ const writer = new DataWriter(policy2, embedder, lanceEngine, sqliteEngine, async (dim) => {
1446
+ await manager.recordEmbeddingDimension(collection, dim);
1447
+ });
1448
+ let records;
1449
+ if (json !== void 0) {
1450
+ let parsed;
1451
+ try {
1452
+ parsed = JSON.parse(json);
1453
+ } catch {
1454
+ throw new XDBError(PARAMETER_ERROR, `Invalid JSON: ${json}`);
1455
+ }
1456
+ if (parsed === null || typeof parsed !== "object" || Array.isArray(parsed)) {
1457
+ throw new XDBError(PARAMETER_ERROR, "Invalid input: expected a JSON object");
1458
+ }
1459
+ records = [parsed];
1460
+ } else {
1461
+ const input = await readStdin();
1462
+ if (input.trim().length === 0) {
1463
+ throw new XDBError(PARAMETER_ERROR, "No input provided. Pass JSON as argument or pipe JSONL via stdin.");
1464
+ }
1465
+ records = parseJsonl(input);
1466
+ }
1467
+ if (batch) {
1468
+ const stats = await writer.writeBatch(records);
1469
+ if (jsonOutput) {
1470
+ process.stdout.write(JSON.stringify(stats) + "\n");
1471
+ } else {
1472
+ process.stderr.write(`Batch complete: ${stats.inserted} inserted, ${stats.updated} updated, ${stats.errors} errors
1473
+ `);
1474
+ }
1475
+ } else {
1476
+ for (const record of records) {
1477
+ await writer.write(record);
1478
+ }
1479
+ process.stderr.write(`${records.length} record(s) written to "${collection}"
1480
+ `);
1481
+ }
1482
+ } finally {
1483
+ if (lanceEngine) await lanceEngine.close();
1484
+ if (sqliteEngine) sqliteEngine.close();
1485
+ }
1486
+ }
1487
+
1488
+ // src/commands/find.ts
1489
+ import { homedir as homedir4 } from "os";
1490
+ import { join as join7 } from "path";
1491
+
1492
+ // src/data-finder.ts
1493
+ var DataFinder = class {
1494
+ constructor(policy2, embedder, lanceEngine, sqliteEngine) {
1495
+ this.policy = policy2;
1496
+ this.embedder = embedder;
1497
+ this.lanceEngine = lanceEngine;
1498
+ this.sqliteEngine = sqliteEngine;
1499
+ }
1500
+ /**
1501
+ * Execute a search based on intent flags and return results.
1502
+ * Routes to the appropriate engine(s) based on --similar/--match/--where.
1503
+ */
1504
+ async find(query, options) {
1505
+ const limit = options.limit ?? 10;
1506
+ if (options.similar) {
1507
+ return this.handleSimilar(query, options.where, limit);
1508
+ }
1509
+ if (options.match) {
1510
+ return this.handleMatch(query, options.where, limit);
1511
+ }
1512
+ if (options.where) {
1513
+ return this.handleWhereOnly(options.where, limit);
1514
+ }
1515
+ throw new XDBError(PARAMETER_ERROR, "No search intent specified. Use --similar, --match, or --where");
1516
+ }
1517
+ /** --similar: vector search via LanceDB */
1518
+ async handleSimilar(query, where, limit) {
1519
+ if (!this.hasCap("similar")) {
1520
+ throw new XDBError(PARAMETER_ERROR, 'This collection does not support semantic search (no fields with "similar" findCaps)');
1521
+ }
1522
+ if (!query) {
1523
+ throw new XDBError(PARAMETER_ERROR, "Query text is required for --similar search");
1524
+ }
1525
+ const similarField = this.getSimilarFields()[0];
1526
+ const column = `${similarField}_vector`;
1527
+ const vector = await this.embedder.embed(query);
1528
+ return this.lanceEngine.vectorSearch(vector, {
1529
+ limit,
1530
+ ...where !== void 0 ? { filter: where } : {},
1531
+ column
1532
+ });
1533
+ }
1534
+ /** --match: full-text search via SQLite FTS5 */
1535
+ async handleMatch(query, where, limit) {
1536
+ if (!this.hasCap("match")) {
1537
+ throw new XDBError(PARAMETER_ERROR, 'This collection does not support full-text search (no fields with "match" findCaps)');
1538
+ }
1539
+ if (!query) {
1540
+ throw new XDBError(PARAMETER_ERROR, "Query text is required for --match search");
1541
+ }
1542
+ if (where) {
1543
+ return this.sqliteEngine.ftsWhereSearch(query, where, limit);
1544
+ }
1545
+ return this.sqliteEngine.ftsSearch(query, limit);
1546
+ }
1547
+ /** --where only (no --similar or --match): prefer SQLite, fallback to LanceDB */
1548
+ async handleWhereOnly(where, limit) {
1549
+ if (this.sqliteEngine) {
1550
+ return this.sqliteEngine.whereSearch(where, limit);
1551
+ }
1552
+ if (this.lanceEngine) {
1553
+ return this.lanceEngine.filterSearch(where, limit);
1554
+ }
1555
+ throw new XDBError(PARAMETER_ERROR, "No search engine available for this collection");
1556
+ }
1557
+ /** Check if the policy has any field with the given findCap */
1558
+ hasCap(cap) {
1559
+ return Object.values(this.policy.fields).some((cfg) => cfg.findCaps.includes(cap));
1560
+ }
1561
+ /** Get field names that have 'similar' findCaps */
1562
+ getSimilarFields() {
1563
+ return Object.entries(this.policy.fields).filter(([, cfg]) => cfg.findCaps.includes("similar")).map(([name]) => name);
1564
+ }
1565
+ };
1566
+
1567
+ // src/commands/find.ts
1568
+ function getDataRoot3() {
1569
+ return join7(homedir4(), ".local", "share", "xdb");
1570
+ }
1571
+ function readStdin2() {
1572
+ return new Promise((resolve, reject) => {
1573
+ if (process.stdin.isTTY) {
1574
+ resolve("");
1575
+ return;
1576
+ }
1577
+ const chunks = [];
1578
+ process.stdin.on("data", (chunk) => chunks.push(chunk));
1579
+ process.stdin.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
1580
+ process.stdin.on("error", reject);
1581
+ });
1582
+ }
1583
+ function needsLance2(policy2) {
1584
+ return policy2.main === "hybrid" || policy2.main === "vector";
1585
+ }
1586
+ function needsSqlite2(policy2) {
1587
+ return policy2.main === "hybrid" || policy2.main === "relational";
1588
+ }
1589
+ function registerFindCommand(program2) {
1590
+ program2.command("find <collection> [query]").description("Search data in a collection").option("-s, --similar", "Semantic similarity search").option("-m, --match", "Full-text search").option("-w, --where <sql>", "SQL WHERE clause for filtering").option("-l, --limit <n>", "Maximum number of results", "10").option("--json", "Output as JSONL (machine-readable)").action(
1591
+ async (collection, query, opts) => {
1592
+ try {
1593
+ await executeFind(getDataRoot3(), collection, query, opts);
1594
+ } catch (err) {
1595
+ handleError(err);
1596
+ }
1597
+ }
1598
+ );
1599
+ }
1600
+ async function executeFind(dataRoot, collection, query, opts) {
1601
+ const manager = new CollectionManager(dataRoot);
1602
+ const meta = await manager.load(collection);
1603
+ const policy2 = meta.policy;
1604
+ const colPath = join7(dataRoot, "collections", collection);
1605
+ const limit = parseInt(opts.limit, 10);
1606
+ if (isNaN(limit) || limit <= 0) {
1607
+ throw new XDBError(PARAMETER_ERROR, `Invalid limit value: ${opts.limit}`);
1608
+ }
1609
+ if (query === void 0 && (opts.similar || opts.match)) {
1610
+ const stdinText = await readStdin2();
1611
+ const trimmed = stdinText.trim();
1612
+ if (trimmed.length > 0) {
1613
+ query = trimmed;
1614
+ }
1615
+ }
1616
+ let lanceEngine;
1617
+ let sqliteEngine;
1618
+ try {
1619
+ if (needsLance2(policy2)) {
1620
+ lanceEngine = await LanceDBEngine.open(colPath);
1621
+ }
1622
+ if (needsSqlite2(policy2)) {
1623
+ sqliteEngine = SQLiteEngine.open(colPath);
1624
+ sqliteEngine.initSchema(policy2);
1625
+ }
1626
+ const embedder = new Embedder();
1627
+ const finder = new DataFinder(policy2, embedder, lanceEngine, sqliteEngine);
1628
+ const results = await finder.find(query, {
1629
+ ...opts.similar !== void 0 ? { similar: opts.similar } : {},
1630
+ ...opts.match !== void 0 ? { match: opts.match } : {},
1631
+ ...opts.where !== void 0 ? { where: opts.where } : {},
1632
+ limit
1633
+ });
1634
+ if (results.length === 0) {
1635
+ if (!opts.json) {
1636
+ process.stderr.write("No results found.\n");
1637
+ }
1638
+ return;
1639
+ }
1640
+ if (opts.json) {
1641
+ for (const result of results) {
1642
+ const output = {
1643
+ ...result.data,
1644
+ _score: result._score,
1645
+ _engine: result._engine
1646
+ };
1647
+ process.stdout.write(JSON.stringify(output) + "\n");
1648
+ }
1649
+ } else {
1650
+ for (const result of results) {
1651
+ const score = typeof result._score === "number" ? ` (score: ${result._score.toFixed(4)})` : "";
1652
+ const id = result.data.id ? `[${result.data.id}]` : "";
1653
+ const dataKeys = Object.keys(result.data).filter((k) => k !== "id");
1654
+ const preview = dataKeys.slice(0, 3).map((k) => {
1655
+ const v = result.data[k];
1656
+ const s = typeof v === "string" ? v : JSON.stringify(v);
1657
+ const truncated = s != null && s.length > 60 ? s.substring(0, 57) + "..." : s;
1658
+ return `${k}=${truncated}`;
1659
+ }).join(" ");
1660
+ const more = dataKeys.length > 3 ? ` (+${dataKeys.length - 3} more)` : "";
1661
+ process.stdout.write(`${id}${score} ${preview}${more}
1662
+ `);
1663
+ }
1664
+ process.stderr.write(`${results.length} result(s) found.
1665
+ `);
1666
+ }
1667
+ } finally {
1668
+ if (lanceEngine) await lanceEngine.close();
1669
+ if (sqliteEngine) sqliteEngine.close();
1670
+ }
1671
+ }
1672
+
1673
+ // src/commands/config.ts
1674
+ var ENGINE_DESC2 = {
1675
+ hybrid: "LanceDB + SQLite",
1676
+ relational: "SQLite",
1677
+ vector: "LanceDB"
1678
+ };
1679
+ function maskApiKey(key) {
1680
+ if (key.length <= 7) return "****";
1681
+ return `${key.slice(0, 3)}...${key.slice(-4)}`;
1682
+ }
1683
+ async function executeConfig(opts, manager = new XdbConfigManager()) {
1684
+ const cfg = await manager.load();
1685
+ const registry = new PolicyRegistry();
1686
+ const policies = registry.listPolicies();
1687
+ if (opts.json) {
1688
+ let hasApiKey = false;
1689
+ const provider2 = cfg.defaultEmbedProvider;
1690
+ if (provider2) {
1691
+ try {
1692
+ await manager.resolveApiKey(provider2);
1693
+ hasApiKey = true;
1694
+ } catch {
1695
+ hasApiKey = false;
1696
+ }
1697
+ }
1698
+ const providerConfig2 = cfg.providers.find((p) => p.name === provider2);
1699
+ const output = {
1700
+ embed: {
1701
+ provider: cfg.defaultEmbedProvider ?? null,
1702
+ model: cfg.defaultEmbedModel ?? null,
1703
+ baseUrl: providerConfig2?.baseUrl ?? null,
1704
+ hasApiKey
1705
+ },
1706
+ policies
1707
+ };
1708
+ process.stdout.write(JSON.stringify(output, null, 2) + "\n");
1709
+ return;
1710
+ }
1711
+ const provider = cfg.defaultEmbedProvider ?? "(not set)";
1712
+ const model = cfg.defaultEmbedModel ?? "(not set)";
1713
+ const providerConfig = cfg.providers.find((p) => p.name === cfg.defaultEmbedProvider);
1714
+ const baseUrl = providerConfig?.baseUrl ?? "(default)";
1715
+ let apiKeyDisplay = "(not set)";
1716
+ if (cfg.defaultEmbedProvider) {
1717
+ try {
1718
+ const key = await manager.resolveApiKey(cfg.defaultEmbedProvider);
1719
+ apiKeyDisplay = maskApiKey(key);
1720
+ } catch {
1721
+ apiKeyDisplay = "(not set)";
1722
+ }
1723
+ }
1724
+ process.stdout.write("Embed Configuration:\n");
1725
+ process.stdout.write(` provider: ${provider}
1726
+ `);
1727
+ process.stdout.write(` model: ${model}
1728
+ `);
1729
+ process.stdout.write(` base-url: ${baseUrl}
1730
+ `);
1731
+ process.stdout.write(` api-key: ${apiKeyDisplay}
1732
+ `);
1733
+ process.stdout.write("\n");
1734
+ process.stdout.write("Available Policies:\n");
1735
+ for (const p of policies) {
1736
+ const name = `${p.main}/${p.minor}`;
1737
+ const engines = ENGINE_DESC2[p.main] ?? p.main;
1738
+ const fieldNames = Object.keys(p.fields);
1739
+ const fieldsStr = fieldNames.length > 0 ? fieldNames.map((f) => {
1740
+ const caps = p.fields[f].findCaps.join(", ");
1741
+ return `${f} [${caps}]`;
1742
+ }).join("; ") : "(none)";
1743
+ const autoIdx = p.autoIndex ? "yes" : "no";
1744
+ process.stdout.write(` ${name}
1745
+ `);
1746
+ process.stdout.write(` engines: ${engines}
1747
+ `);
1748
+ process.stdout.write(` fields: ${fieldsStr}
1749
+ `);
1750
+ process.stdout.write(` autoIndex: ${autoIdx}
1751
+ `);
1752
+ }
1753
+ }
1754
+ async function executeConfigEmbed(opts, manager = new XdbConfigManager()) {
1755
+ const cfg = await manager.load();
1756
+ if (opts.setProvider !== void 0) {
1757
+ cfg.defaultEmbedProvider = opts.setProvider;
1758
+ await manager.save(cfg);
1759
+ process.stdout.write(`Embed provider set to: ${opts.setProvider}
1760
+ `);
1761
+ }
1762
+ if (opts.setModel !== void 0) {
1763
+ cfg.defaultEmbedModel = opts.setModel;
1764
+ await manager.save(cfg);
1765
+ process.stdout.write(`Embed model set to: ${opts.setModel}
1766
+ `);
1767
+ }
1768
+ if (opts.setKey !== void 0) {
1769
+ if (!cfg.defaultEmbedProvider) {
1770
+ throw new XDBError(
1771
+ PARAMETER_ERROR,
1772
+ "No embed provider configured. Run: xdb config embed --set-provider <name>"
1773
+ );
1774
+ }
1775
+ const existing = cfg.providers.find((p) => p.name === cfg.defaultEmbedProvider);
1776
+ if (existing) {
1777
+ existing.apiKey = opts.setKey;
1778
+ } else {
1779
+ cfg.providers.push({ name: cfg.defaultEmbedProvider, apiKey: opts.setKey });
1780
+ }
1781
+ await manager.save(cfg);
1782
+ process.stdout.write(`API key set for provider: ${cfg.defaultEmbedProvider}
1783
+ `);
1784
+ }
1785
+ if (opts.setBaseUrl !== void 0) {
1786
+ if (!cfg.defaultEmbedProvider) {
1787
+ throw new XDBError(
1788
+ PARAMETER_ERROR,
1789
+ "No embed provider configured. Run: xdb config embed --set-provider <name>"
1790
+ );
1791
+ }
1792
+ const existing = cfg.providers.find((p) => p.name === cfg.defaultEmbedProvider);
1793
+ if (existing) {
1794
+ existing.baseUrl = opts.setBaseUrl;
1795
+ } else {
1796
+ cfg.providers.push({ name: cfg.defaultEmbedProvider, baseUrl: opts.setBaseUrl });
1797
+ }
1798
+ await manager.save(cfg);
1799
+ process.stdout.write(`Base URL set for provider: ${cfg.defaultEmbedProvider}
1800
+ `);
1801
+ }
1802
+ }
1803
+ function registerConfigCommands(config2) {
1804
+ config2.option("--json", "Output as JSON").action(async (opts) => {
1805
+ try {
1806
+ await executeConfig(opts);
1807
+ } catch (err) {
1808
+ handleError(err);
1809
+ }
1810
+ });
1811
+ config2.command("embed").description("Manage embed service configuration").option("--set-provider <name>", "Set the default embed provider").option("--set-model <model>", "Set the default embed model").option("--set-key <apiKey>", "Set the API key for the current provider").option("--set-base-url <url>", "Set the base URL for the current provider").action(async (opts) => {
1812
+ try {
1813
+ await executeConfigEmbed(opts);
1814
+ } catch (err) {
1815
+ handleError(err);
1816
+ }
1817
+ });
1818
+ }
1819
+
1820
+ // src/commands/embed.ts
1821
+ import { readFile as readFile3 } from "fs/promises";
1822
+
1823
+ // src/embed-io.ts
1824
+ function vectorToHex(vec) {
1825
+ const buf = new ArrayBuffer(4);
1826
+ const view = new DataView(buf);
1827
+ const result = new Array(vec.length);
1828
+ for (let i = 0; i < vec.length; i++) {
1829
+ view.setFloat32(0, vec[i], false);
1830
+ let hex = "";
1831
+ for (let b = 0; b < 4; b++) {
1832
+ const byte = view.getUint8(b);
1833
+ hex += (byte < 16 ? "0" : "") + byte.toString(16);
1834
+ }
1835
+ result[i] = hex;
1836
+ }
1837
+ return result;
1838
+ }
1839
+ function parseBatchInput(raw) {
1840
+ let parsed;
1841
+ try {
1842
+ parsed = JSON.parse(raw);
1843
+ } catch {
1844
+ throw new XDBError(
1845
+ PARAMETER_ERROR,
1846
+ "Invalid batch input: not valid JSON"
1847
+ );
1848
+ }
1849
+ if (!Array.isArray(parsed)) {
1850
+ throw new XDBError(
1851
+ PARAMETER_ERROR,
1852
+ "Invalid batch input: expected a JSON array of strings"
1853
+ );
1854
+ }
1855
+ for (let i = 0; i < parsed.length; i++) {
1856
+ if (typeof parsed[i] !== "string") {
1857
+ throw new XDBError(
1858
+ PARAMETER_ERROR,
1859
+ `Invalid batch input: element at index ${i} is not a string`
1860
+ );
1861
+ }
1862
+ }
1863
+ return parsed;
1864
+ }
1865
+ function formatEmbeddingOutput(result, options) {
1866
+ if (!options.json) {
1867
+ return result.embeddings.map((emb) => JSON.stringify(vectorToHex(emb))).join("\n");
1868
+ }
1869
+ const usage = {
1870
+ prompt_tokens: result.usage.promptTokens,
1871
+ total_tokens: result.usage.totalTokens
1872
+ };
1873
+ if (options.batch) {
1874
+ return JSON.stringify({
1875
+ embeddings: result.embeddings.map((emb) => vectorToHex(emb)),
1876
+ model: result.model,
1877
+ usage
1878
+ });
1879
+ }
1880
+ return JSON.stringify({
1881
+ embedding: vectorToHex(result.embeddings[0]),
1882
+ model: result.model,
1883
+ usage
1884
+ });
1885
+ }
1886
+
1887
+ // src/embedding-models.ts
1888
+ var EMBEDDING_MODEL_LIMITS = {
1889
+ // OpenAI
1890
+ "text-embedding-3-small": 8191,
1891
+ "text-embedding-3-large": 8191,
1892
+ "text-embedding-ada-002": 8191,
1893
+ // Google
1894
+ "text-embedding-004": 2048,
1895
+ // Cohere
1896
+ "embed-english-v3.0": 512,
1897
+ "embed-multilingual-v3.0": 512,
1898
+ "embed-english-light-v3.0": 512,
1899
+ "embed-multilingual-light-v3.0": 512
1900
+ };
1901
+ var CHARS_PER_TOKEN = 4;
1902
+ function estimateTokens(text) {
1903
+ return Math.ceil(text.length / CHARS_PER_TOKEN);
1904
+ }
1905
+ function truncateText(text, model) {
1906
+ const limit = EMBEDDING_MODEL_LIMITS[model];
1907
+ const originalTokens = estimateTokens(text);
1908
+ if (limit === void 0) {
1909
+ return { text, truncated: false, originalTokens };
1910
+ }
1911
+ if (originalTokens <= limit) {
1912
+ return { text, truncated: false, originalTokens };
1913
+ }
1914
+ const maxChars = limit * CHARS_PER_TOKEN;
1915
+ return {
1916
+ text: text.slice(0, maxChars),
1917
+ truncated: true,
1918
+ originalTokens
1919
+ };
1920
+ }
1921
+
1922
+ // src/commands/embed.ts
1923
+ async function readStdin3() {
1924
+ return new Promise((resolve, reject) => {
1925
+ let data = "";
1926
+ process.stdin.setEncoding("utf-8");
1927
+ process.stdin.on("data", (chunk) => {
1928
+ data += chunk;
1929
+ });
1930
+ process.stdin.on("end", () => {
1931
+ resolve(data);
1932
+ });
1933
+ process.stdin.on("error", (err) => {
1934
+ reject(new XDBError(RUNTIME_ERROR, `Failed to read from stdin: ${err.message}`));
1935
+ });
1936
+ });
1937
+ }
1938
+ async function executeEmbed(text, opts, manager = new XdbConfigManager(), clientFactory) {
1939
+ const { provider, model, providerConfig, apiKey } = await manager.resolveEmbedConfig();
1940
+ const hasExplicitInput = text !== void 0 || opts.inputFile !== void 0;
1941
+ const stdinAvailable = !process.stdin.isTTY && !hasExplicitInput;
1942
+ const sourceCount = [
1943
+ text !== void 0,
1944
+ stdinAvailable,
1945
+ opts.inputFile !== void 0
1946
+ ].filter(Boolean).length;
1947
+ if (sourceCount > 1) {
1948
+ throw new XDBError(
1949
+ PARAMETER_ERROR,
1950
+ "Multiple input sources specified. Provide input via argument, stdin, or --input-file (only one)."
1951
+ );
1952
+ }
1953
+ let rawInput;
1954
+ if (text !== void 0) {
1955
+ rawInput = text;
1956
+ } else if (opts.inputFile) {
1957
+ try {
1958
+ rawInput = await readFile3(opts.inputFile, "utf-8");
1959
+ } catch (err) {
1960
+ throw new XDBError(
1961
+ RUNTIME_ERROR,
1962
+ `Failed to read input file: ${err instanceof Error ? err.message : String(err)}`
1963
+ );
1964
+ }
1965
+ } else if (stdinAvailable) {
1966
+ rawInput = await readStdin3();
1967
+ } else {
1968
+ throw new XDBError(
1969
+ PARAMETER_ERROR,
1970
+ "No input text provided. Provide input via argument, stdin, or --input-file."
1971
+ );
1972
+ }
1973
+ let texts;
1974
+ if (opts.batch) {
1975
+ texts = parseBatchInput(rawInput);
1976
+ } else {
1977
+ texts = [rawInput];
1978
+ }
1979
+ texts = texts.map((t) => {
1980
+ const result = truncateText(t, model);
1981
+ if (result.truncated) {
1982
+ const truncatedTokens = Math.ceil(result.text.length / 4);
1983
+ const modelLimit = EMBEDDING_MODEL_LIMITS[model] ?? truncatedTokens;
1984
+ if (opts.json) {
1985
+ const warning = {
1986
+ type: "warning",
1987
+ data: {
1988
+ message: `Input text truncated from ~${result.originalTokens} tokens to ${truncatedTokens} tokens (model limit: ${modelLimit})`,
1989
+ originalTokens: result.originalTokens,
1990
+ truncatedTokens
1991
+ }
1992
+ };
1993
+ process.stderr.write(JSON.stringify(warning) + "\n");
1994
+ } else {
1995
+ process.stderr.write(
1996
+ `[Warning] Input text truncated from ~${result.originalTokens} tokens to ${truncatedTokens} tokens (model limit: ${modelLimit})
1997
+ `
1998
+ );
1999
+ }
2000
+ }
2001
+ return result.text;
2002
+ });
2003
+ const clientConfig = { provider, apiKey, model };
2004
+ if (providerConfig.baseUrl) {
2005
+ clientConfig.baseUrl = providerConfig.baseUrl;
2006
+ }
2007
+ if (providerConfig.api) {
2008
+ clientConfig.api = providerConfig.api;
2009
+ }
2010
+ const client = clientFactory ? clientFactory(clientConfig) : new EmbeddingClient(clientConfig);
2011
+ const response = await client.embed({ texts, model });
2012
+ const output = formatEmbeddingOutput(response, {
2013
+ json: opts.json ?? false,
2014
+ batch: opts.batch ?? false
2015
+ });
2016
+ process.stdout.write(output + "\n");
2017
+ }
2018
+ function registerEmbedCommand(program2) {
2019
+ program2.command("embed [text]").description("Embed text using the configured embedding provider").option("--batch", "Parse input as a JSON string array for batch embedding").option("--json", "Output as JSON").option("--input-file <path>", "Read input from a file").action(async (text, opts) => {
2020
+ try {
2021
+ await executeEmbed(text, opts);
2022
+ } catch (err) {
2023
+ handleError(err);
2024
+ }
2025
+ });
2026
+ }
2027
+
2028
+ // src/help.ts
2029
+ var MAIN_EXAMPLES = `
2030
+ Examples:
2031
+ $ xdb col init my-docs --policy hybrid/knowledge-base
2032
+ $ xdb put my-docs '{"content":"How to use tar"}'
2033
+ $ xdb find my-docs "compress files" --similar
2034
+ $ xdb col list
2035
+ $ xdb col info my-docs
2036
+ $ xdb policy list
2037
+
2038
+ Prerequisites:
2039
+ \u5411\u91CF\u5316\u529F\u80FD\u4F9D\u8D56 pai \u547D\u4EE4\u3002\u8BF7\u786E\u4FDD pai \u5DF2\u5B89\u88C5\u5E76\u914D\u7F6E\u4E86 embedding provider:
2040
+ pai model default --embed-provider openai --embed-model text-embedding-3-small
2041
+
2042
+ Data:
2043
+ \u6570\u636E\u76EE\u5F55: ~/.local/share/xdb/`;
2044
+ var MAIN_VERBOSE = `
2045
+ Policies:
2046
+ hybrid/knowledge-base \u5411\u91CF + \u5168\u6587\u68C0\u7D22\uFF08\u6700\u5E38\u7528\uFF09
2047
+ relational/structured-logs \u7ED3\u6784\u5316\u65E5\u5FD7
2048
+ relational/simple-kv \u7B80\u5355\u952E\u503C\u5BF9
2049
+ vector/feature-store \u7279\u5F81\u5B58\u50A8
2050
+
2051
+ Storage:
2052
+ ~/.local/share/xdb/collections/<name>/
2053
+ collection_meta.json Policy \u5FEB\u7167 + \u5143\u6570\u636E
2054
+ vector.lance/ LanceDB \u5411\u91CF\u6570\u636E
2055
+ relational.db SQLite \u5173\u7CFB\u6570\u636E + FTS
2056
+
2057
+ Exit Codes:
2058
+ 0 \u6210\u529F
2059
+ 2 \u53C2\u6570\u9519\u8BEF / \u96C6\u5408\u4E0D\u5B58\u5728 / \u80FD\u529B\u4E0D\u5339\u914D
2060
+ 1 \u8FD0\u884C\u65F6\u9519\u8BEF\uFF08\u5F15\u64CE\u6545\u969C\u3001pai \u8C03\u7528\u5931\u8D25\u7B49\uFF09`;
2061
+ var COL_INIT_EXAMPLES = `
2062
+ Examples:
2063
+ $ xdb col init my-docs --policy hybrid/knowledge-base
2064
+ $ xdb col init logs --policy relational
2065
+ $ xdb col init my-col --policy hybrid --params '{"fields":{"title":{"findCaps":["match"]}}}'`;
2066
+ var COL_LIST_EXAMPLES = `
2067
+ Examples:
2068
+ $ xdb col list
2069
+ $ xdb col list --json # JSON array \u8F93\u51FA`;
2070
+ var COL_RM_EXAMPLES = `
2071
+ Examples:
2072
+ $ xdb col rm my-docs
2073
+
2074
+ Warning: \u6B64\u64CD\u4F5C\u4E0D\u53EF\u9006\uFF0C\u5C06\u7269\u7406\u5220\u9664\u96C6\u5408\u76EE\u5F55\u53CA\u6240\u6709\u7D22\u5F15\u6587\u4EF6\u3002`;
2075
+ var COL_INFO_EXAMPLES = `
2076
+ Examples:
2077
+ $ xdb col info my-docs # \u4EBA\u7C7B\u53EF\u8BFB
2078
+ $ xdb col info my-docs --json # JSON \u8F93\u51FA`;
2079
+ var POLICY_LIST_EXAMPLES = `
2080
+ Examples:
2081
+ $ xdb policy list # \u4EBA\u7C7B\u53EF\u8BFB
2082
+ $ xdb policy list --json # JSON \u8F93\u51FA`;
2083
+ var PUT_EXAMPLES = `
2084
+ Examples:
2085
+ $ xdb put my-docs '{"content":"How to use tar"}' # \u4F4D\u7F6E\u53C2\u6570
2086
+ $ echo '{"content":"Git branching"}' | xdb put my-docs # stdin \u8F93\u5165
2087
+ $ cat data.jsonl | xdb put my-docs --batch # \u6279\u91CF\u5199\u5165
2088
+
2089
+ Stdin:
2090
+ \u652F\u6301\u901A\u8FC7\u7BA1\u9053\u4F20\u5165 JSON \u6216 JSONL \u6570\u636E\u3002
2091
+
2092
+ Note:
2093
+ \u76F8\u540C id \u7684\u8BB0\u5F55\u6267\u884C upsert\uFF08\u5E42\u7B49\u64CD\u4F5C\uFF09\u3002
2094
+ --batch --json \u8F93\u51FA: {"inserted":N,"updated":N,"errors":N}`;
2095
+ var FIND_EXAMPLES = `
2096
+ Examples:
2097
+ $ xdb find my-docs "compress files" --similar # \u8BED\u4E49\u641C\u7D22
2098
+ $ xdb find my-docs "tar compression" --match # \u5168\u6587\u68C0\u7D22
2099
+ $ xdb find my-docs --where "json_extract(data, '$.category') = 'network'"
2100
+ $ echo "database optimization" | xdb find my-docs --similar # stdin \u67E5\u8BE2
2101
+
2102
+ Stdin:
2103
+ \u652F\u6301\u901A\u8FC7\u7BA1\u9053\u4F20\u5165\u67E5\u8BE2\u6587\u672C\uFF08\u7528\u4E8E --similar \u548C --match\uFF09\u3002
2104
+
2105
+ JSON output (--json):
2106
+ JSONL \u683C\u5F0F\uFF0C\u6BCF\u884C\u4E00\u4E2A\u7ED3\u679C\uFF0C\u542B _score \u548C _engine \u5143\u6570\u636E\u3002`;
2107
+ function installHelp(program2) {
2108
+ program2.addHelpText("after", MAIN_EXAMPLES);
2109
+ installVerboseHelp(program2);
2110
+ }
2111
+ function addColExamples(col2) {
2112
+ for (const sub of col2.commands) {
2113
+ const name = sub.name();
2114
+ if (name === "init") sub.addHelpText("after", COL_INIT_EXAMPLES);
2115
+ else if (name === "list") sub.addHelpText("after", COL_LIST_EXAMPLES);
2116
+ else if (name === "rm") sub.addHelpText("after", COL_RM_EXAMPLES);
2117
+ else if (name === "info") sub.addHelpText("after", COL_INFO_EXAMPLES);
2118
+ }
2119
+ }
2120
+ function addPolicyExamples(policy2) {
2121
+ for (const sub of policy2.commands) {
2122
+ const name = sub.name();
2123
+ if (name === "list") sub.addHelpText("after", POLICY_LIST_EXAMPLES);
2124
+ }
2125
+ }
2126
+ function addPutExamples(cmd) {
2127
+ cmd.addHelpText("after", PUT_EXAMPLES);
2128
+ }
2129
+ function addFindExamples(cmd) {
2130
+ cmd.addHelpText("after", FIND_EXAMPLES);
2131
+ }
2132
+ function installVerboseHelp(program2) {
2133
+ program2.option("--verbose", "(\u4E0E --help \u4E00\u8D77\u4F7F\u7528) \u663E\u793A\u5B8C\u6574\u5E2E\u52A9\u4FE1\u606F");
2134
+ program2.on("option:verbose", () => {
2135
+ program2.__verboseHelp = true;
2136
+ });
2137
+ program2.addHelpText("afterAll", () => {
2138
+ if (program2.__verboseHelp) {
2139
+ return MAIN_VERBOSE;
2140
+ }
2141
+ return "";
2142
+ });
2143
+ }
2144
+
2145
+ // src/cli.ts
2146
+ process.stdout.on("error", (err) => {
2147
+ if (err.code === "EPIPE") process.exit(0);
2148
+ throw err;
2149
+ });
2150
+ process.stderr.on("error", (err) => {
2151
+ if (err.code === "EPIPE") process.exit(0);
2152
+ throw err;
2153
+ });
2154
+ var __dirname = dirname2(fileURLToPath(import.meta.url));
2155
+ var { version: pkgVersion } = JSON.parse(readFileSync(join8(__dirname, "../package.json"), "utf8"));
2156
+ var program = new Command();
2157
+ program.name("xdb").description("Intent-driven data hub CLI for AI agents").version(`xdb ${pkgVersion}`).showHelpAfterError(true);
2158
+ program.exitOverride();
2159
+ installHelp(program);
2160
+ var col = program.command("col").description("Manage collections");
2161
+ registerColCommands(col);
2162
+ addColExamples(col);
2163
+ var policy = program.command("policy").description("Discover available policies");
2164
+ registerPolicyCommands(policy);
2165
+ addPolicyExamples(policy);
2166
+ policy.action(() => {
2167
+ policy.outputHelp();
2168
+ });
2169
+ registerPutCommand(program);
2170
+ addPutExamples(program.commands.find((c) => c.name() === "put"));
2171
+ registerFindCommand(program);
2172
+ addFindExamples(program.commands.find((c) => c.name() === "find"));
2173
+ var config = program.command("config").description("Manage xdb configuration");
2174
+ registerConfigCommands(config);
2175
+ registerEmbedCommand(program);
2176
+ program.configureOutput({
2177
+ writeErr: (str) => process.stderr.write(str),
2178
+ writeOut: (str) => process.stdout.write(str)
2179
+ });
2180
+ col.action(() => {
2181
+ col.outputHelp();
2182
+ });
2183
+ (async () => {
2184
+ try {
2185
+ await program.parseAsync(process.argv);
2186
+ } catch (err) {
2187
+ if (err && typeof err === "object" && "exitCode" in err) {
2188
+ const exitCode = err.exitCode;
2189
+ process.exitCode = exitCode === 1 ? 2 : exitCode;
2190
+ } else {
2191
+ process.stderr.write(`Error: ${err instanceof Error ? err.message : String(err)}
2192
+ `);
2193
+ process.exitCode = 1;
2194
+ }
2195
+ }
2196
+ })();
2197
+ //# sourceMappingURL=cli.js.map