@kweaver-ai/kweaver-sdk 0.7.2 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +6 -1
  2. package/dist/api/datasources.d.ts +7 -0
  3. package/dist/api/datasources.js +8 -0
  4. package/dist/api/toolboxes.d.ts +2 -0
  5. package/dist/api/toolboxes.js +2 -1
  6. package/dist/cli.js +18 -2
  7. package/dist/commands/auth.js +42 -7
  8. package/dist/commands/bkn-ops.d.ts +2 -1
  9. package/dist/commands/bkn-ops.js +69 -34
  10. package/dist/commands/bkn-utils.d.ts +26 -2
  11. package/dist/commands/bkn-utils.js +66 -9
  12. package/dist/commands/dataflow.js +194 -20
  13. package/dist/commands/ds.d.ts +0 -1
  14. package/dist/commands/ds.js +19 -9
  15. package/dist/commands/import-csv.d.ts +0 -2
  16. package/dist/commands/import-csv.js +2 -4
  17. package/dist/commands/tool.d.ts +1 -0
  18. package/dist/commands/tool.js +12 -0
  19. package/dist/config/store.d.ts +1 -0
  20. package/dist/config/store.js +17 -0
  21. package/dist/resources/toolboxes.d.ts +2 -0
  22. package/dist/templates/bkn/document/manifest.json +12 -0
  23. package/dist/templates/bkn/document/template.json +757 -0
  24. package/dist/templates/dataflow/unstructured/manifest.json +11 -0
  25. package/dist/templates/dataflow/unstructured/template.json +63 -0
  26. package/dist/templates/dataset/document/manifest.json +10 -0
  27. package/dist/templates/dataset/document/template.json +23 -0
  28. package/dist/templates/dataset/document-content/manifest.json +10 -0
  29. package/dist/templates/dataset/document-content/template.json +29 -0
  30. package/dist/templates/dataset/document-element/manifest.json +10 -0
  31. package/dist/templates/dataset/document-element/template.json +21 -0
  32. package/dist/utils/template-loader.d.ts +40 -0
  33. package/dist/utils/template-loader.js +129 -0
  34. package/package.json +1 -1
package/README.md CHANGED
@@ -193,7 +193,7 @@ kweaver context-loader search-schema|tool-call|kn-search|kn-schema-search <kn-id
193
193
  kweaver context-loader query-object-instance|query-instance-subgraph|get-logic-properties|get-action-info|find-skills <kn-id> ...
194
194
  kweaver context-loader config set/use/list/show (deprecated; <kn-id> may be omitted to fall back to saved config)
195
195
  kweaver toolbox create/list/publish/unpublish/delete
196
- kweaver tool upload/list/enable/disable
196
+ kweaver tool upload/list/enable/disable/execute/debug (execute and debug accept --path for OpenAPI path params)
197
197
  kweaver call <path> [-X METHOD] [-d BODY] [-H header] [-F key=value]
198
198
  ```
199
199
 
@@ -242,6 +242,11 @@ kweaver tool upload --toolbox <BOX_ID> ./openapi.json
242
242
  # 3. Publish the toolbox and enable the tool
243
243
  kweaver toolbox publish <BOX_ID>
244
244
  kweaver tool enable --toolbox <BOX_ID> <TOOL_ID>
245
+
246
+ # Invoke / debug: envelope supports `--header`, `--query`, `--body`, and **`--path`**
247
+ # for OpenAPI `{param}` placeholders (required for paths like `/data-views/{id}`).
248
+ kweaver tool debug --toolbox <BOX_ID> <TOOL_ID> \
249
+ --path '{"id":"<DATA_VIEW_UUID>"}' [--body '<json>']
245
250
  ```
246
251
 
247
252
  **No-auth platforms:** If OAuth is not enabled, use `kweaver auth <url> --no-auth` (or run a normal `auth login`; a **404** on `POST /oauth2/clients` switches to no-auth automatically). Credentials are still saved under `~/.kweaver/` and work with `auth use` / `auth list`. Optional: `KWEAVER_NO_AUTH=1` with `KWEAVER_BASE_URL` when no token env is set. SDK: `new KWeaverClient({ baseUrl, auth: false })` or `kweaver.configure({ baseUrl, auth: false })`.
@@ -71,3 +71,10 @@ export interface ScanMetadataOptions {
71
71
  businessDomain?: string;
72
72
  }
73
73
  export declare function scanMetadata(options: ScanMetadataOptions): Promise<string>;
74
+ export interface ScanDatasourceMetadataOptions {
75
+ baseUrl: string;
76
+ accessToken: string;
77
+ id: string;
78
+ businessDomain?: string;
79
+ }
80
+ export declare function scanDatasourceMetadata(options: ScanDatasourceMetadataOptions): Promise<string>;
@@ -208,3 +208,11 @@ export async function scanMetadata(options) {
208
208
  }
209
209
  return taskId;
210
210
  }
211
+ // Looks up a datasource's type then triggers a metadata scan, so callers
212
+ // don't have to repeat the GET-then-scan dance whenever a flow needs the
213
+ // platform catalog refreshed (after import-csv, before discovering tables).
214
+ export async function scanDatasourceMetadata(options) {
215
+ const dsBody = await getDatasource(options);
216
+ const dsType = JSON.parse(dsBody).type ?? "mysql";
217
+ return scanMetadata({ ...options, dsType });
218
+ }
@@ -63,6 +63,8 @@ export interface InvokeToolOptions extends BaseOpts {
63
63
  header?: Record<string, unknown>;
64
64
  /** Optional query params to forward. */
65
65
  query?: Record<string, unknown>;
66
+ /** Path parameter map for OpenAPI `{param}` placeholders (e.g. `{ id: "<uuid>" }`). */
67
+ path?: Record<string, unknown>;
66
68
  /** JSON body forwarded to the downstream tool. */
67
69
  body?: unknown;
68
70
  /** Per-call timeout in seconds; backend default applies when omitted. */
@@ -20,7 +20,7 @@ import { buildHeaders } from "./headers.js";
20
20
  // POST /tool-box/{box}/tool/{tool}/debug debug tool (envelope JSON)
21
21
  //
22
22
  // Envelope shape required by /proxy and /debug:
23
- // { "timeout": <s>, "header": {...}, "query": {...}, "body": {...} }
23
+ // { "timeout": <s>, "header": {...}, "query": {...}, "body": {...}, "path": {...} }
24
24
  // Flat-shape requests cause the forwarder to drop downstream Authorization
25
25
  // headers, which manifests as 401 "token expired" from the underlying tool.
26
26
  const PATH = "/api/agent-operator-integration/v1/tool-box";
@@ -145,6 +145,7 @@ function buildEnvelope(opts) {
145
145
  envelope.timeout = opts.timeout;
146
146
  envelope.header = opts.header ?? {};
147
147
  envelope.query = opts.query ?? {};
148
+ envelope.path = opts.path ?? {};
148
149
  envelope.body = opts.body ?? {};
149
150
  return JSON.stringify(envelope);
150
151
  }
package/dist/cli.js CHANGED
@@ -62,6 +62,10 @@ Usage:
62
62
  kweaver ds connect <db_type> <host> <port> <database> --account X --password Y [--schema S] [--name N]
63
63
  [--reuse-existing|--force-new]
64
64
 
65
+ kweaver dataflow templates [--json]
66
+ kweaver dataflow create-dataset --template <name> --set "key=value" [--json] [-bd value]
67
+ kweaver dataflow create-bkn --template <name> --set "key=value" [--json] [-bd value]
68
+ kweaver dataflow create (--template <name> --set "key=value" | <json>) [-bd value]
65
69
  kweaver dataflow list [-bd value]
66
70
  kweaver dataflow run <dagId> (--file <path> | --url <remote-url> --name <filename>) [-bd value]
67
71
  kweaver dataflow runs <dagId> [--since <date-like>] [-bd value]
@@ -116,7 +120,7 @@ Usage:
116
120
  kweaver tool enable|disable --toolbox <box-id> <tool-id>... [-bd value]
117
121
  kweaver tool execute|debug --toolbox <box-id> <tool-id>
118
122
  [--body '<json>'|--body-file <path>]
119
- [--header '<json>'] [--query '<json>'] [--timeout <s>]
123
+ [--header '<json>'] [--query '<json>'] [--path '<json>'] [--timeout <s>]
120
124
 
121
125
  kweaver vega health|stats|inspect
122
126
  kweaver vega catalog list|get|health|test-connection|discover|resources [options]
@@ -139,7 +143,19 @@ Usage:
139
143
  Global options:
140
144
  --base-url <url> Override platform base URL for this command (env: KWEAVER_BASE_URL)
141
145
  --token <value> Override access token for this command (env: KWEAVER_TOKEN; disables write-to-disk commands)
142
- --user <id|name> Use a specific user's credentials for this command (env: KWEAVER_USER)
146
+ --user <id|name> Use a specific user's credentials for this command, transient (env: KWEAVER_USER)
147
+
148
+ Multi-shell account isolation:
149
+ KWEAVER_PROFILE=<name> Scope state.json (active platform / active user) to a named
150
+ profile. Tokens under platforms/ are still shared, so each
151
+ profile reuses logins. Required for \`auth switch\` and
152
+ \`auth use\` (use --global to override). Name must match
153
+ [A-Za-z0-9_-]{1,64}.
154
+ KWEAVERC_CONFIG_DIR=<dir> Override the entire config root (~/.kweaver by default).
155
+ Use this for hard isolation (separate token store per shell).
156
+
157
+ For agents / multi-terminal scripts: prefer \`--user <id>\` (transient, no persistence)
158
+ over \`auth switch\` (persistent, requires KWEAVER_PROFILE).
143
159
  --pretty / --compact
144
160
  Toggle pretty-printed JSON output. Supported by every
145
161
  command that prints a JSON payload (default: pretty).
@@ -1,9 +1,31 @@
1
1
  import { isNoAuth } from "../config/no-auth.js";
2
2
  import { assertNotStatelessForWrite } from "../config/stateless.js";
3
- import { autoSelectBusinessDomain, clearPlatformSession, deletePlatform, deleteUser, getActiveUser, getConfigDir, getCurrentPlatform, getPlatformAlias, hasPlatform, listPlatforms, listUserProfiles, loadClientConfig, loadTokenConfig, loadUserTokenConfig, resolveBusinessDomain, resolvePlatformIdentifier, resolveUserId, saveNoAuthPlatform, setActiveUser, setCurrentPlatform, setPlatformAlias, } from "../config/store.js";
3
+ import { autoSelectBusinessDomain, clearPlatformSession, deletePlatform, deleteUser, getActiveUser, getConfigDir, getCurrentPlatform, getPlatformAlias, getProfileName, hasPlatform, listPlatforms, listUserProfiles, loadClientConfig, loadTokenConfig, loadUserTokenConfig, resolveBusinessDomain, resolvePlatformIdentifier, resolveUserId, saveNoAuthPlatform, setActiveUser, setCurrentPlatform, setPlatformAlias, } from "../config/store.js";
4
4
  import { decodeJwtPayload } from "../config/jwt.js";
5
5
  import { eacpModifyPassword } from "../auth/eacp-modify-password.js";
6
6
  import { buildCopyCommand, fetchEacpUserInfo, formatHttpError, InitialPasswordChangeRequiredError, normalizeBaseUrl, oauth2Login, oauth2PasswordSigninLogin, promptForUsername, promptForPassword, refreshTokenLogin, resolveActivePlatform, } from "../auth/oauth.js";
7
+ function consumeGlobalFlag(args) {
8
+ const idx = args.indexOf("--global");
9
+ if (idx === -1)
10
+ return { args, isGlobal: false };
11
+ return { args: [...args.slice(0, idx), ...args.slice(idx + 1)], isGlobal: true };
12
+ }
13
+ function requireProfileOrGlobal(command, isGlobal) {
14
+ if (isGlobal)
15
+ return null;
16
+ try {
17
+ if (getProfileName())
18
+ return null;
19
+ }
20
+ catch (err) {
21
+ return err instanceof Error ? err.message : String(err);
22
+ }
23
+ return (`kweaver auth ${command} mutates the active account globally and would affect every shell using ~/.kweaver.\n` +
24
+ `Pick one:\n` +
25
+ ` - Transient: prepend \`--user <id|name>\` (or \`KWEAVER_USER=<id>\`) to the command you actually want to run; no persistent switch.\n` +
26
+ ` - Persistent (this shell only): \`export KWEAVER_PROFILE=<name>\`, then re-run.\n` +
27
+ ` - Intentionally global (CI / single-user setup): re-run with \`--global\`.`);
28
+ }
7
29
  export async function runAuthCommand(args) {
8
30
  const target = args[0];
9
31
  const rest = args.slice(1);
@@ -357,10 +379,16 @@ Login options:
357
379
  return 0;
358
380
  }
359
381
  if (target === "use") {
360
- const resolvedTarget = args[1] ? resolvePlatformIdentifier(args[1]) : "";
382
+ const { args: useArgs, isGlobal } = consumeGlobalFlag(args);
383
+ const refusal = requireProfileOrGlobal("use", isGlobal);
384
+ if (refusal !== null) {
385
+ console.error(refusal);
386
+ return 1;
387
+ }
388
+ const resolvedTarget = useArgs[1] ? resolvePlatformIdentifier(useArgs[1]) : "";
361
389
  const useTarget = resolvedTarget && /^https?:\/\//.test(resolvedTarget) ? normalizeBaseUrl(resolvedTarget) : resolvedTarget;
362
390
  if (!useTarget) {
363
- console.error("Usage: kweaver auth use <platform-url|alias>");
391
+ console.error("Usage: kweaver auth use [--global] <platform-url|alias>");
364
392
  return 1;
365
393
  }
366
394
  if (!hasPlatform(useTarget)) {
@@ -490,18 +518,25 @@ You can use either userId or username with --user in switch/logout/delete.`);
490
518
  }
491
519
  function runAuthSwitchCommand(args) {
492
520
  if (args[0] === "--help" || args[0] === "-h") {
493
- console.log(`kweaver auth switch [platform-url|alias] --user <userId|username>
521
+ console.log(`kweaver auth switch [--global] [platform-url|alias] --user <userId|username>
494
522
 
495
523
  Switch the active user for a platform.
496
524
  You can specify either the userId (sub claim) or the username (preferred_username from id_token).`);
497
525
  return 0;
498
526
  }
499
- const userArg = readOption(args, "--user");
527
+ const { args: switchArgs, isGlobal } = consumeGlobalFlag(args);
528
+ const refusal = requireProfileOrGlobal("switch", isGlobal);
529
+ if (refusal !== null) {
530
+ console.error(refusal);
531
+ return 1;
532
+ }
533
+ const cleanedArgs = switchArgs;
534
+ const userArg = readOption(cleanedArgs, "--user") ?? process.env.KWEAVER_USER;
500
535
  if (!userArg) {
501
- console.error("Usage: kweaver auth switch [platform-url|alias] --user <userId|username>");
536
+ console.error("Usage: kweaver auth switch [--global] [platform-url|alias] --user <userId|username>");
502
537
  return 1;
503
538
  }
504
- const filteredArgs = args.filter((a) => a !== "--user" && a !== userArg);
539
+ const filteredArgs = cleanedArgs.filter((a) => a !== "--user" && a !== userArg);
505
540
  const platform = resolvePlatformArg(filteredArgs);
506
541
  if (!platform) {
507
542
  console.error("No active platform. Run `kweaver auth login <platform-url>` first.");
@@ -32,6 +32,7 @@ export declare function parseKnCreateFromDsArgs(args: string[]): {
32
32
  dsId: string;
33
33
  name: string;
34
34
  tables: string[];
35
+ pkMap: Record<string, string>;
35
36
  build: boolean;
36
37
  timeout: number;
37
38
  businessDomain: string;
@@ -51,8 +52,8 @@ export declare function parseKnCreateFromCsvArgs(args: string[]): {
51
52
  tablePrefix: string;
52
53
  batchSize: number;
53
54
  tables: string[];
55
+ pkMap: Record<string, string>;
54
56
  build: boolean;
55
- recreate: boolean;
56
57
  timeout: number;
57
58
  businessDomain: string;
58
59
  noRollback: boolean;
@@ -5,7 +5,7 @@ import { loadNetwork, allObjects, allRelations, allActions, generateChecksum, va
5
5
  import { prepareBknDirectoryForImport, stripBknEncodingCliArgs, } from "../utils/bkn-encoding.js";
6
6
  import { ensureValidToken, formatHttpError } from "../auth/oauth.js";
7
7
  import { createKnowledgeNetwork, createObjectTypes, deleteKnowledgeNetwork, buildKnowledgeNetwork, getBuildStatus, } from "../api/knowledge-networks.js";
8
- import { listTablesWithColumns, scanMetadata, getDatasource } from "../api/datasources.js";
8
+ import { listTablesWithColumns, scanDatasourceMetadata } from "../api/datasources.js";
9
9
  import { createDataView, findDataView } from "../api/dataviews.js";
10
10
  import { resolveFiles } from "./ds.js";
11
11
  import { buildTableName } from "./import-csv.js";
@@ -13,7 +13,7 @@ import { downloadBkn, uploadBkn, listActionSchedules, getActionSchedule, createA
13
13
  import { formatCallOutput } from "./call.js";
14
14
  import { resolveBusinessDomain } from "../config/store.js";
15
15
  import { runDsImportCsv } from "./ds.js";
16
- import { pollWithBackoff, detectPrimaryKey, detectDisplayKey, confirmYes, } from "./bkn-utils.js";
16
+ import { pollWithBackoff, detectPrimaryKey, detectDisplayKey, formatPkDetectionError, parsePkMap, confirmYes, } from "./bkn-utils.js";
17
17
  // ── BKN object name validation ──────────────────────────────────────────────
18
18
  // Mirrors bkn-backend OBJECT_NAME_MAX_LENGTH (interfaces/common.go:28) and
19
19
  // validateObjectName (driveradapters/validate.go:85). 40 utf-8 codepoints,
@@ -480,6 +480,8 @@ Create a knowledge network from a datasource (dataviews + object types + optiona
480
480
  Options:
481
481
  --name <s> Knowledge network name (required)
482
482
  --tables <a,b> Comma-separated table names (default: all)
483
+ --pk-map <s> Explicit primary keys: <table>:<field>[,<table>:<field>...]
484
+ Required when auto-detection fails (no unique column in sample)
483
485
  --build (default) Build after creation
484
486
  --no-build Skip build after creation
485
487
  --timeout <n> Build timeout in seconds (default: 300)
@@ -490,6 +492,7 @@ export function parseKnCreateFromDsArgs(args) {
490
492
  let dsId = "";
491
493
  let name = "";
492
494
  let tablesStr = "";
495
+ let pkMapStr = "";
493
496
  let build = true;
494
497
  let timeout = 300;
495
498
  let businessDomain = "";
@@ -507,6 +510,10 @@ export function parseKnCreateFromDsArgs(args) {
507
510
  tablesStr = args[++i];
508
511
  continue;
509
512
  }
513
+ if (arg === "--pk-map" && args[i + 1]) {
514
+ pkMapStr = args[++i];
515
+ continue;
516
+ }
510
517
  if (arg === "--build") {
511
518
  build = true;
512
519
  continue;
@@ -541,9 +548,10 @@ export function parseKnCreateFromDsArgs(args) {
541
548
  if (!dsId || !name) {
542
549
  throw new Error("Usage: kweaver bkn create-from-ds <ds-id> --name X [options]");
543
550
  }
551
+ const pkMap = pkMapStr ? parsePkMap(pkMapStr) : {};
544
552
  if (!businessDomain)
545
553
  businessDomain = resolveBusinessDomain();
546
- return { dsId, name, tables, build, timeout, businessDomain, pretty, noRollback };
554
+ return { dsId, name, tables, pkMap, build, timeout, businessDomain, pretty, noRollback };
547
555
  }
548
556
  /** Sanitize a table name into a BKN-safe ID (alphanumeric + underscore). */
549
557
  function sanitizeBknId(name) {
@@ -587,6 +595,7 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
587
595
  const tableRetryDelayMs = 4000;
588
596
  let allTables = [];
589
597
  let targetTables = [];
598
+ let scanAttempted = false;
590
599
  for (let attempt = 1; attempt <= maxTableListAttempts; attempt += 1) {
591
600
  const tablesBody = await listTablesWithColumns({ ...base, id: options.dsId });
592
601
  allTables = JSON.parse(tablesBody);
@@ -596,8 +605,24 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
596
605
  if (targetTables.length > 0)
597
606
  break;
598
607
  if (attempt < maxTableListAttempts) {
599
- console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); retrying in ${tableRetryDelayMs / 1000}s...`);
600
- await new Promise((r) => setTimeout(r, tableRetryDelayMs));
608
+ // First miss: the catalog often hasn't picked up tables created
609
+ // out-of-band (e.g. ds import-csv from an older SDK that didn't
610
+ // self-scan). Trigger a scan once before falling back to plain
611
+ // sleep-retries.
612
+ if (!scanAttempted) {
613
+ scanAttempted = true;
614
+ console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); scanning datasource metadata before retry...`);
615
+ try {
616
+ await scanDatasourceMetadata({ ...base, id: options.dsId });
617
+ }
618
+ catch (err) {
619
+ console.error(`Scan warning (continuing): ${formatHttpError(err)}`);
620
+ }
621
+ }
622
+ else {
623
+ console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); retrying in ${tableRetryDelayMs / 1000}s...`);
624
+ await new Promise((r) => setTimeout(r, tableRetryDelayMs));
625
+ }
601
626
  }
602
627
  }
603
628
  if (targetTables.length === 0) {
@@ -608,6 +633,31 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
608
633
  // Backend rejects the whole batch on first violation (validate.go:90),
609
634
  // so retroactive rollback is wasted work if we can fail fast here.
610
635
  assertValidBknObjectNames(targetTables.map((t) => t.name), "Object type names derived from table names");
636
+ // Pre-flight: resolve PK for every table BEFORE any side effect.
637
+ // Auto-detection silently picking the wrong column was the cause of
638
+ // issue #97 (KN built with ~5 indexed docs out of 2036 source rows).
639
+ // Resolve order: --pk-map override → cardinality-based detection → fail-fast.
640
+ const tablePks = {};
641
+ const unknownPkMapTables = Object.keys(options.pkMap).filter((name) => !targetTables.some((t) => t.name === name));
642
+ if (unknownPkMapTables.length > 0) {
643
+ throw new Error(`--pk-map references unknown table(s): ${unknownPkMapTables.join(", ")}`);
644
+ }
645
+ for (const t of targetTables) {
646
+ const override = options.pkMap[t.name];
647
+ if (override) {
648
+ if (!t.columns.some((c) => c.name === override)) {
649
+ throw new Error(`--pk-map specifies '${override}' for table '${t.name}', but no such column. ` +
650
+ `Columns: ${t.columns.map((c) => c.name).join(", ")}`);
651
+ }
652
+ tablePks[t.name] = override;
653
+ continue;
654
+ }
655
+ const result = detectPrimaryKey(t, sampleRows?.[t.name]);
656
+ if (!result.pk) {
657
+ throw new Error(formatPkDetectionError(t.name, result));
658
+ }
659
+ tablePks[t.name] = result.pk;
660
+ }
611
661
  // Phase 1: Create DataViews for each table. findDataView is idempotent;
612
662
  // not tracked for rollback so a retry can reuse what's already there.
613
663
  console.error(`Creating data views for ${targetTables.length} table(s) ...`);
@@ -653,7 +703,7 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
653
703
  // (object_type_service.go:213-355) — all-or-nothing.
654
704
  console.error(`Creating ${targetTables.length} object type(s) ...`);
655
705
  const entries = targetTables.map((t) => {
656
- const pk = detectPrimaryKey(t, sampleRows?.[t.name]);
706
+ const pk = tablePks[t.name];
657
707
  const dk = detectDisplayKey(t, pk);
658
708
  return {
659
709
  branch: "main",
@@ -759,7 +809,7 @@ Options:
759
809
  --tables <a,b> Tables to include in KN (default: all imported)
760
810
  --build (default) Build after creation
761
811
  --no-build Skip build
762
- --recreate Use "insert" mode on first batch (only effective for new tables)
812
+ --pk-map <s> Explicit primary keys: <table>:<field>[,<table>:<field>...]
763
813
  --timeout <n> Build timeout in seconds (default: 300)
764
814
  --no-rollback Keep partially-created KN on failure (debug; default: rollback)
765
815
  -bd, --biz-domain Business domain (default: bd_public)`;
@@ -770,8 +820,8 @@ export function parseKnCreateFromCsvArgs(args) {
770
820
  let tablePrefix = "";
771
821
  let batchSize = 500;
772
822
  let tablesStr = "";
823
+ let pkMapStr = "";
773
824
  let build = true;
774
- let recreate = false;
775
825
  let timeout = 300;
776
826
  let businessDomain = "";
777
827
  let noRollback = false;
@@ -809,8 +859,8 @@ export function parseKnCreateFromCsvArgs(args) {
809
859
  build = false;
810
860
  continue;
811
861
  }
812
- if (arg === "--recreate") {
813
- recreate = true;
862
+ if (arg === "--pk-map" && args[i + 1]) {
863
+ pkMapStr = args[++i];
814
864
  continue;
815
865
  }
816
866
  if (arg === "--no-rollback") {
@@ -835,9 +885,10 @@ export function parseKnCreateFromCsvArgs(args) {
835
885
  if (!dsId || !files || !name) {
836
886
  throw new Error("Usage: kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]");
837
887
  }
888
+ const pkMap = pkMapStr ? parsePkMap(pkMapStr) : {};
838
889
  if (!businessDomain)
839
890
  businessDomain = resolveBusinessDomain();
840
- return { dsId, files, name, tablePrefix, batchSize, tables, build, recreate, timeout, businessDomain, noRollback };
891
+ return { dsId, files, name, tablePrefix, batchSize, tables, pkMap, build, timeout, businessDomain, noRollback };
841
892
  }
842
893
  export async function runKnCreateFromCsvCommand(args) {
843
894
  let options;
@@ -874,35 +925,15 @@ export async function runKnCreateFromCsvCommand(args) {
874
925
  "--table-prefix", options.tablePrefix,
875
926
  "--batch-size", String(options.batchSize),
876
927
  "-bd", options.businessDomain,
877
- ...(options.recreate ? ["--recreate"] : []),
878
928
  ];
879
929
  const importResult = await runDsImportCsv(importArgs);
880
930
  if (importResult.code !== 0) {
881
931
  console.error("CSV import failed — aborting KN creation");
882
932
  return importResult.code;
883
933
  }
884
- // Phase 1.5: Scan datasource metadata so platform discovers newly imported tables
885
- console.error("Scanning datasource metadata ...");
886
- try {
887
- const token = await ensureValidToken();
888
- const dsBody = await getDatasource({
889
- baseUrl: token.baseUrl,
890
- accessToken: token.accessToken,
891
- id: options.dsId,
892
- businessDomain: options.businessDomain,
893
- });
894
- const dsParsed = JSON.parse(dsBody);
895
- await scanMetadata({
896
- baseUrl: token.baseUrl,
897
- accessToken: token.accessToken,
898
- id: options.dsId,
899
- dsType: dsParsed.type ?? "mysql",
900
- businessDomain: options.businessDomain,
901
- });
902
- }
903
- catch (err) {
904
- console.error(`Scan warning (continuing): ${String(err)}`);
905
- }
934
+ // (Phase 1.5 metadata scan removed runDsImportCsv now self-scans on
935
+ // success, and runKnCreateFromDsCommand's table-discovery retry triggers
936
+ // a scan if the catalog still lags. Two layers of fallback are enough.)
906
937
  // Phase 2: Create KN from datasource
907
938
  console.error("Phase 2: Creating knowledge network ...");
908
939
  const tableNames = options.tables.length > 0 ? options.tables : importResult.tables;
@@ -910,6 +941,7 @@ export async function runKnCreateFromCsvCommand(args) {
910
941
  console.error("No tables available for KN creation — aborting");
911
942
  return 1;
912
943
  }
944
+ const pkMapEntries = Object.entries(options.pkMap);
913
945
  const knArgs = [
914
946
  options.dsId,
915
947
  "--name", options.name,
@@ -917,6 +949,9 @@ export async function runKnCreateFromCsvCommand(args) {
917
949
  options.build ? "--build" : "--no-build",
918
950
  "--timeout", String(options.timeout),
919
951
  "-bd", options.businessDomain,
952
+ ...(pkMapEntries.length > 0
953
+ ? ["--pk-map", pkMapEntries.map(([t, f]) => `${t}:${f}`).join(",")]
954
+ : []),
920
955
  ...(options.noRollback ? ["--no-rollback"] : []),
921
956
  ];
922
957
  return runKnCreateFromDsCommand(knArgs, importResult.sampleRows);
@@ -18,14 +18,38 @@ export declare function parseOntologyQueryFlags(args: string[]): {
18
18
  businessDomain: string;
19
19
  };
20
20
  export declare const DISPLAY_HINTS: string[];
21
- /** Detect primary key: first column (left-to-right) with all unique values in the sample. */
21
+ export interface PkCandidate {
22
+ name: string;
23
+ cardinality: number;
24
+ }
25
+ export interface PkDetectionResult {
26
+ /** Detected PK column name, or null when detection is not confident. */
27
+ pk: string | null;
28
+ /** All columns sorted by cardinality desc. Empty when no sample. */
29
+ candidates: PkCandidate[];
30
+ /** 0 when no sample data was provided. */
31
+ sampleSize: number;
32
+ }
33
+ export declare const PK_NAME_HINTS: string[];
34
+ /**
35
+ * Detect primary key from a row sample. Returns null pk when no column has
36
+ * unique values across the sample — caller must fail-fast and prompt for --pk-map.
37
+ * Among columns that ARE fully unique, prefers PK-like names (id, *_id, pk).
38
+ */
22
39
  export declare function detectPrimaryKey(table: {
23
40
  name: string;
24
41
  columns: Array<{
25
42
  name: string;
26
43
  type: string;
27
44
  }>;
28
- }, rows?: Array<Record<string, string | null>>): string;
45
+ }, rows?: Array<Record<string, string | null>>): PkDetectionResult;
46
+ /** Format a user-facing error message when PK auto-detection fails. */
47
+ export declare function formatPkDetectionError(tableName: string, result: PkDetectionResult): string;
48
+ /**
49
+ * Parse --pk-map string into a Record<table, field>.
50
+ * Format: "<table>:<field>[,<table>:<field>...]". Throws on invalid input.
51
+ */
52
+ export declare function parsePkMap(input: string): Record<string, string>;
29
53
  export declare function detectDisplayKey(table: {
30
54
  name: string;
31
55
  columns: Array<{
@@ -68,18 +68,75 @@ export function parseOntologyQueryFlags(args) {
68
68
  }
69
69
  // ── Schema detection helpers ─────────────────────────────────────────────────
70
70
  export const DISPLAY_HINTS = ["name", "title", "label", "display_name", "description"];
71
- /** Detect primary key: first column (left-to-right) with all unique values in the sample. */
71
+ export const PK_NAME_HINTS = ["id", "_id", "pk"];
72
+ /**
73
+ * Detect primary key from a row sample. Returns null pk when no column has
74
+ * unique values across the sample — caller must fail-fast and prompt for --pk-map.
75
+ * Among columns that ARE fully unique, prefers PK-like names (id, *_id, pk).
76
+ */
72
77
  export function detectPrimaryKey(table, rows) {
73
- if (rows && rows.length > 0) {
74
- for (const col of table.columns) {
75
- const values = rows.map((r) => r[col.name]);
76
- const unique = new Set(values);
77
- if (unique.size === rows.length)
78
- return col.name;
78
+ if (!rows || rows.length === 0) {
79
+ return { pk: null, candidates: [], sampleSize: 0 };
80
+ }
81
+ const candidates = table.columns
82
+ .map((col) => {
83
+ const unique = new Set(rows.map((r) => r[col.name]));
84
+ return { name: col.name, cardinality: unique.size };
85
+ })
86
+ .sort((a, b) => b.cardinality - a.cardinality);
87
+ const fullCardinality = candidates.filter((c) => c.cardinality === rows.length);
88
+ if (fullCardinality.length === 0) {
89
+ return { pk: null, candidates, sampleSize: rows.length };
90
+ }
91
+ const named = fullCardinality.find((c) => {
92
+ const lower = c.name.toLowerCase();
93
+ return PK_NAME_HINTS.some((h) => lower === h || lower.endsWith(`_${h}`));
94
+ });
95
+ return {
96
+ pk: named?.name ?? fullCardinality[0].name,
97
+ candidates,
98
+ sampleSize: rows.length,
99
+ };
100
+ }
101
+ /** Format a user-facing error message when PK auto-detection fails. */
102
+ export function formatPkDetectionError(tableName, result) {
103
+ const lines = [`Cannot auto-detect primary key for table '${tableName}'.`];
104
+ if (result.sampleSize === 0) {
105
+ lines.push(` No sample data available — chain with 'kweaver ds import-csv' or use --pk-map.`);
106
+ }
107
+ else {
108
+ lines.push(` No column has unique values in the ${result.sampleSize}-row sample.`);
109
+ lines.push(` Top candidates by cardinality:`);
110
+ const top = result.candidates.slice(0, 5);
111
+ const maxNameLen = Math.max(...top.map((c) => c.name.length));
112
+ for (const c of top) {
113
+ lines.push(` ${c.name.padEnd(maxNameLen)} ${c.cardinality} unique`);
114
+ }
115
+ }
116
+ lines.push(``);
117
+ lines.push(` Re-run with --pk-map to specify explicitly:`);
118
+ lines.push(` --pk-map ${tableName}:<column>`);
119
+ return lines.join("\n");
120
+ }
121
+ /**
122
+ * Parse --pk-map string into a Record<table, field>.
123
+ * Format: "<table>:<field>[,<table>:<field>...]". Throws on invalid input.
124
+ */
125
+ export function parsePkMap(input) {
126
+ const result = {};
127
+ for (const pair of input.split(",").map((s) => s.trim()).filter(Boolean)) {
128
+ const idx = pair.indexOf(":");
129
+ if (idx <= 0 || idx >= pair.length - 1) {
130
+ throw new Error(`Invalid --pk-map entry '${pair}'. Expected '<table>:<field>[,<table>:<field>...]'`);
131
+ }
132
+ const table = pair.slice(0, idx).trim();
133
+ const field = pair.slice(idx + 1).trim();
134
+ if (!table || !field) {
135
+ throw new Error(`Invalid --pk-map entry '${pair}'. Expected '<table>:<field>[,<table>:<field>...]'`);
79
136
  }
137
+ result[table] = field;
80
138
  }
81
- // Fallback: first column
82
- return table.columns[0]?.name ?? "id";
139
+ return result;
83
140
  }
84
141
  export function detectDisplayKey(table, primaryKey) {
85
142
  for (const col of table.columns) {