@kweaver-ai/kweaver-sdk 0.7.1 → 0.7.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +34 -4
  2. package/README.zh.md +27 -2
  3. package/dist/api/datasources.d.ts +7 -0
  4. package/dist/api/datasources.js +8 -0
  5. package/dist/api/skills.js +10 -8
  6. package/dist/api/toolboxes.d.ts +2 -0
  7. package/dist/api/toolboxes.js +2 -1
  8. package/dist/cli.js +65 -17
  9. package/dist/commands/auth.js +85 -10
  10. package/dist/commands/bkn-ops.d.ts +6 -1
  11. package/dist/commands/bkn-ops.js +202 -93
  12. package/dist/commands/bkn-utils.d.ts +26 -2
  13. package/dist/commands/bkn-utils.js +66 -9
  14. package/dist/commands/config.js +8 -0
  15. package/dist/commands/context-loader.js +112 -36
  16. package/dist/commands/dataflow.js +194 -20
  17. package/dist/commands/ds.d.ts +23 -1
  18. package/dist/commands/ds.js +135 -27
  19. package/dist/commands/import-csv.d.ts +0 -2
  20. package/dist/commands/import-csv.js +2 -4
  21. package/dist/commands/skill.js +26 -6
  22. package/dist/commands/tool.d.ts +1 -0
  23. package/dist/commands/tool.js +12 -0
  24. package/dist/config/stateless.d.ts +13 -0
  25. package/dist/config/stateless.js +20 -0
  26. package/dist/config/store.d.ts +1 -0
  27. package/dist/config/store.js +17 -0
  28. package/dist/resources/toolboxes.d.ts +2 -0
  29. package/dist/templates/bkn/document/manifest.json +12 -0
  30. package/dist/templates/bkn/document/template.json +757 -0
  31. package/dist/templates/dataflow/unstructured/manifest.json +11 -0
  32. package/dist/templates/dataflow/unstructured/template.json +63 -0
  33. package/dist/templates/dataset/document/manifest.json +10 -0
  34. package/dist/templates/dataset/document/template.json +23 -0
  35. package/dist/templates/dataset/document-content/manifest.json +10 -0
  36. package/dist/templates/dataset/document-content/template.json +29 -0
  37. package/dist/templates/dataset/document-element/manifest.json +10 -0
  38. package/dist/templates/dataset/document-element/template.json +21 -0
  39. package/dist/utils/skill-bundle.d.ts +5 -0
  40. package/dist/utils/skill-bundle.js +74 -0
  41. package/dist/utils/template-loader.d.ts +40 -0
  42. package/dist/utils/template-loader.js +129 -0
  43. package/package.json +2 -1
@@ -4,14 +4,36 @@ import { resolve } from "node:path";
4
4
  import { loadNetwork, allObjects, allRelations, allActions, generateChecksum, validateNetwork } from "@kweaver-ai/bkn";
5
5
  import { prepareBknDirectoryForImport, stripBknEncodingCliArgs, } from "../utils/bkn-encoding.js";
6
6
  import { ensureValidToken, formatHttpError } from "../auth/oauth.js";
7
- import { createKnowledgeNetwork, createObjectTypes, buildKnowledgeNetwork, getBuildStatus, } from "../api/knowledge-networks.js";
8
- import { listTablesWithColumns, scanMetadata, getDatasource } from "../api/datasources.js";
7
+ import { createKnowledgeNetwork, createObjectTypes, deleteKnowledgeNetwork, buildKnowledgeNetwork, getBuildStatus, } from "../api/knowledge-networks.js";
8
+ import { listTablesWithColumns, scanDatasourceMetadata } from "../api/datasources.js";
9
9
  import { createDataView, findDataView } from "../api/dataviews.js";
10
+ import { resolveFiles } from "./ds.js";
11
+ import { buildTableName } from "./import-csv.js";
10
12
  import { downloadBkn, uploadBkn, listActionSchedules, getActionSchedule, createActionSchedule, updateActionSchedule, setActionScheduleStatus, deleteActionSchedules, listJobs, getJob, getJobTasks, deleteJobs, } from "../api/bkn-backend.js";
11
13
  import { formatCallOutput } from "./call.js";
12
14
  import { resolveBusinessDomain } from "../config/store.js";
13
15
  import { runDsImportCsv } from "./ds.js";
14
- import { pollWithBackoff, detectPrimaryKey, detectDisplayKey, confirmYes, } from "./bkn-utils.js";
16
+ import { pollWithBackoff, detectPrimaryKey, detectDisplayKey, formatPkDetectionError, parsePkMap, confirmYes, } from "./bkn-utils.js";
17
+ // ── BKN object name validation ──────────────────────────────────────────────
18
+ // Mirrors bkn-backend OBJECT_NAME_MAX_LENGTH (interfaces/common.go:28) and
19
+ // validateObjectName (driveradapters/validate.go:85). 40 utf-8 codepoints,
20
+ // non-empty. Backend rejects the whole batch on first violation, so we surface
21
+ // every offender locally before any side-effecting call.
22
+ export const BKN_OBJECT_NAME_MAX_LENGTH = 40;
23
+ export function assertValidBknObjectNames(names, context) {
24
+ const offenders = [];
25
+ for (const name of names) {
26
+ const len = [...name].length;
27
+ if (len === 0 || len > BKN_OBJECT_NAME_MAX_LENGTH) {
28
+ offenders.push({ name, length: len });
29
+ }
30
+ }
31
+ if (offenders.length === 0)
32
+ return;
33
+ const lines = offenders.map((o) => ` - ${o.name} (${o.length} chars)`);
34
+ throw new Error(`${context}: ${offenders.length} name(s) violate BKN object-name limit ` +
35
+ `(1..${BKN_OBJECT_NAME_MAX_LENGTH} utf-8 chars):\n${lines.join("\n")}`);
36
+ }
15
37
  // ── Build ───────────────────────────────────────────────────────────────────
16
38
  const KN_BUILD_HELP = `kweaver bkn build <kn-id> [options]
17
39
 
@@ -458,19 +480,24 @@ Create a knowledge network from a datasource (dataviews + object types + optiona
458
480
  Options:
459
481
  --name <s> Knowledge network name (required)
460
482
  --tables <a,b> Comma-separated table names (default: all)
483
+ --pk-map <s> Explicit primary keys: <table>:<field>[,<table>:<field>...]
484
+ Required when auto-detection fails (no unique column in sample)
461
485
  --build (default) Build after creation
462
486
  --no-build Skip build after creation
463
487
  --timeout <n> Build timeout in seconds (default: 300)
488
+ --no-rollback Keep partially-created KN on failure (debug; default: rollback)
464
489
  -bd, --biz-domain Business domain (default: bd_public)
465
490
  --pretty Pretty-print output (default)`;
466
491
  export function parseKnCreateFromDsArgs(args) {
467
492
  let dsId = "";
468
493
  let name = "";
469
494
  let tablesStr = "";
495
+ let pkMapStr = "";
470
496
  let build = true;
471
497
  let timeout = 300;
472
498
  let businessDomain = "";
473
499
  let pretty = true;
500
+ let noRollback = false;
474
501
  for (let i = 0; i < args.length; i += 1) {
475
502
  const arg = args[i];
476
503
  if (arg === "--help" || arg === "-h")
@@ -483,6 +510,10 @@ export function parseKnCreateFromDsArgs(args) {
483
510
  tablesStr = args[++i];
484
511
  continue;
485
512
  }
513
+ if (arg === "--pk-map" && args[i + 1]) {
514
+ pkMapStr = args[++i];
515
+ continue;
516
+ }
486
517
  if (arg === "--build") {
487
518
  build = true;
488
519
  continue;
@@ -491,6 +522,10 @@ export function parseKnCreateFromDsArgs(args) {
491
522
  build = false;
492
523
  continue;
493
524
  }
525
+ if (arg === "--no-rollback") {
526
+ noRollback = true;
527
+ continue;
528
+ }
494
529
  if (arg === "--timeout" && args[i + 1]) {
495
530
  timeout = parseInt(args[++i], 10);
496
531
  if (Number.isNaN(timeout) || timeout < 1)
@@ -513,9 +548,10 @@ export function parseKnCreateFromDsArgs(args) {
513
548
  if (!dsId || !name) {
514
549
  throw new Error("Usage: kweaver bkn create-from-ds <ds-id> --name X [options]");
515
550
  }
551
+ const pkMap = pkMapStr ? parsePkMap(pkMapStr) : {};
516
552
  if (!businessDomain)
517
553
  businessDomain = resolveBusinessDomain();
518
- return { dsId, name, tables, build, timeout, businessDomain, pretty };
554
+ return { dsId, name, tables, pkMap, build, timeout, businessDomain, pretty, noRollback };
519
555
  }
520
556
  /** Sanitize a table name into a BKN-safe ID (alphanumeric + underscore). */
521
557
  function sanitizeBknId(name) {
@@ -559,6 +595,7 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
559
595
  const tableRetryDelayMs = 4000;
560
596
  let allTables = [];
561
597
  let targetTables = [];
598
+ let scanAttempted = false;
562
599
  for (let attempt = 1; attempt <= maxTableListAttempts; attempt += 1) {
563
600
  const tablesBody = await listTablesWithColumns({ ...base, id: options.dsId });
564
601
  allTables = JSON.parse(tablesBody);
@@ -568,15 +605,61 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
568
605
  if (targetTables.length > 0)
569
606
  break;
570
607
  if (attempt < maxTableListAttempts) {
571
- console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); retrying in ${tableRetryDelayMs / 1000}s...`);
572
- await new Promise((r) => setTimeout(r, tableRetryDelayMs));
608
+ // First miss: the catalog often hasn't picked up tables created
609
+ // out-of-band (e.g. ds import-csv from an older SDK that didn't
610
+ // self-scan). Trigger a scan once before falling back to plain
611
+ // sleep-retries.
612
+ if (!scanAttempted) {
613
+ scanAttempted = true;
614
+ console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); scanning datasource metadata before retry...`);
615
+ try {
616
+ await scanDatasourceMetadata({ ...base, id: options.dsId });
617
+ }
618
+ catch (err) {
619
+ console.error(`Scan warning (continuing): ${formatHttpError(err)}`);
620
+ }
621
+ }
622
+ else {
623
+ console.error(`No tables available (attempt ${attempt}/${maxTableListAttempts}); retrying in ${tableRetryDelayMs / 1000}s...`);
624
+ await new Promise((r) => setTimeout(r, tableRetryDelayMs));
625
+ }
573
626
  }
574
627
  }
575
628
  if (targetTables.length === 0) {
576
629
  console.error("No tables available");
577
630
  return 1;
578
631
  }
579
- // Phase 1: Create DataViews for each table
632
+ // Pre-flight: catch every offending OT name before any side effect.
633
+ // Backend rejects the whole batch on first violation (validate.go:90),
634
+ // so retroactive rollback is wasted work if we can fail fast here.
635
+ assertValidBknObjectNames(targetTables.map((t) => t.name), "Object type names derived from table names");
636
+ // Pre-flight: resolve PK for every table BEFORE any side effect.
637
+ // Auto-detection silently picking the wrong column was the cause of
638
+ // issue #97 (KN built with ~5 indexed docs out of 2036 source rows).
639
+ // Resolve order: --pk-map override → cardinality-based detection → fail-fast.
640
+ const tablePks = {};
641
+ const unknownPkMapTables = Object.keys(options.pkMap).filter((name) => !targetTables.some((t) => t.name === name));
642
+ if (unknownPkMapTables.length > 0) {
643
+ throw new Error(`--pk-map references unknown table(s): ${unknownPkMapTables.join(", ")}`);
644
+ }
645
+ for (const t of targetTables) {
646
+ const override = options.pkMap[t.name];
647
+ if (override) {
648
+ if (!t.columns.some((c) => c.name === override)) {
649
+ throw new Error(`--pk-map specifies '${override}' for table '${t.name}', but no such column. ` +
650
+ `Columns: ${t.columns.map((c) => c.name).join(", ")}`);
651
+ }
652
+ tablePks[t.name] = override;
653
+ continue;
654
+ }
655
+ const result = detectPrimaryKey(t, sampleRows?.[t.name]);
656
+ if (!result.pk) {
657
+ throw new Error(formatPkDetectionError(t.name, result));
658
+ }
659
+ tablePks[t.name] = result.pk;
660
+ }
661
+ // Phase 1: Create DataViews for each table. findDataView is idempotent;
662
+ // not tracked for rollback so a retry can reuse what's already there.
580
663
  console.error(`Creating data views for ${targetTables.length} table(s) ...`);
581
664
  const viewMap = {};
582
665
  for (const t of targetTables) {
@@ -597,7 +680,8 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
597
680
  }));
598
681
  viewMap[t.name] = dvId;
599
682
  }
600
- // Phase 2: Create the KN record
683
+ // Phase 2: Create the KN. If any subsequent step fails we DELETE this
684
+ // KN — backend cascades to OTs (knowledge_network_service.go:917-969).
601
685
  const knBody = JSON.stringify({
602
686
  name: options.name,
603
687
  branch: "main",
@@ -611,72 +695,91 @@ export async function runKnCreateFromDsCommand(args, sampleRows) {
611
695
  const knItem = Array.isArray(knParsed) ? knParsed[0] : knParsed;
612
696
  const knId = String(knItem?.id ?? "");
613
697
  console.error(`Knowledge network created: ${knId}`);
614
- // Phase 3: Create object types via REST API
615
- console.error(`Creating ${targetTables.length} object type(s) ...`);
698
+ let createdKnId = knId;
616
699
  const otResults = [];
617
- for (const t of targetTables) {
618
- const pk = detectPrimaryKey(t, sampleRows?.[t.name]);
619
- const dk = detectDisplayKey(t, pk);
620
- const uniqueProps = [pk, dk].filter((x, i, a) => a.indexOf(x) === i);
621
- const entry = {
622
- branch: "main",
623
- name: t.name,
624
- data_source: { type: "data_view", id: viewMap[t.name] },
625
- primary_keys: [pk],
626
- display_key: dk,
627
- data_properties: t.columns.map((c) => ({
628
- name: c.name,
629
- display_name: c.name,
630
- type: "string",
631
- mapped_field: { name: c.name, type: c.type || "varchar" },
632
- })),
633
- };
634
- const otBody = JSON.stringify({ entries: [entry] });
700
+ let statusStr = "skipped";
701
+ try {
702
+ // Phase 3: Single batched POST. Backend wraps all entries in one tx
703
+ // (object_type_service.go:213-355) all-or-nothing.
704
+ console.error(`Creating ${targetTables.length} object type(s) ...`);
705
+ const entries = targetTables.map((t) => {
706
+ const pk = tablePks[t.name];
707
+ const dk = detectDisplayKey(t, pk);
708
+ return {
709
+ branch: "main",
710
+ name: t.name,
711
+ data_source: { type: "data_view", id: viewMap[t.name] },
712
+ primary_keys: [pk],
713
+ display_key: dk,
714
+ data_properties: t.columns.map((c) => ({
715
+ name: c.name,
716
+ display_name: c.name,
717
+ type: "string",
718
+ mapped_field: { name: c.name, type: c.type || "varchar" },
719
+ })),
720
+ _meta: { pk, dk },
721
+ };
722
+ });
723
+ const wireEntries = entries.map(({ _meta: _, ...rest }) => rest);
724
+ const otBody = JSON.stringify({ entries: wireEntries });
635
725
  const otResponse = await createObjectTypes({
636
726
  ...base,
637
727
  knId,
638
728
  body: otBody,
639
729
  });
640
730
  const otParsed = JSON.parse(otResponse);
641
- const otItem = otParsed.entries?.[0];
642
- otResults.push({
643
- name: t.name,
644
- id: otItem?.id ?? "",
645
- field_count: t.columns.length,
646
- });
647
- console.error(` Created: ${t.name} (${t.columns.length} fields, pk=${pk}, dk=${dk})`);
648
- }
649
- if (otResults.length === 0) {
650
- const errorOutput = {
651
- kn_id: knId,
652
- kn_name: options.name,
653
- error: "No object types were created",
654
- };
655
- console.log(JSON.stringify(errorOutput, null, options.pretty ? 2 : 0));
656
- return 1;
657
- }
658
- let statusStr = "skipped";
659
- if (options.build) {
660
- console.error("Building ...");
661
- await buildKnowledgeNetwork({ ...base, knId });
662
- const TERMINAL = ["completed", "failed", "success"];
663
- try {
664
- statusStr = await pollWithBackoff({
665
- fn: async () => {
666
- const statusBody = await getBuildStatus({ ...base, knId });
667
- const statusParsed = JSON.parse(statusBody);
668
- const jobs = Array.isArray(statusParsed) ? statusParsed : (statusParsed.entries ?? []);
669
- const state = (jobs[0]?.state ?? "running").toLowerCase();
670
- if (TERMINAL.includes(state))
671
- return { done: true, value: state };
672
- return { done: false, value: "running" };
673
- },
674
- interval: 2000,
675
- timeout: options.timeout * 1000,
731
+ const otItems = Array.isArray(otParsed) ? otParsed : (otParsed.entries ?? []);
732
+ for (let i = 0; i < entries.length; i += 1) {
733
+ const t = targetTables[i];
734
+ const meta = entries[i]._meta;
735
+ otResults.push({
736
+ name: t.name,
737
+ id: otItems[i]?.id ?? "",
738
+ field_count: t.columns.length,
676
739
  });
740
+ console.error(` Created: ${t.name} (${t.columns.length} fields, pk=${meta.pk}, dk=${meta.dk})`);
677
741
  }
678
- catch {
679
- // timeout — statusStr remains "skipped"
742
+ if (options.build) {
743
+ console.error("Building ...");
744
+ await buildKnowledgeNetwork({ ...base, knId });
745
+ const TERMINAL = ["completed", "failed", "success"];
746
+ try {
747
+ statusStr = await pollWithBackoff({
748
+ fn: async () => {
749
+ const statusBody = await getBuildStatus({ ...base, knId });
750
+ const statusParsed = JSON.parse(statusBody);
751
+ const jobs = Array.isArray(statusParsed) ? statusParsed : (statusParsed.entries ?? []);
752
+ const state = (jobs[0]?.state ?? "running").toLowerCase();
753
+ if (TERMINAL.includes(state))
754
+ return { done: true, value: state };
755
+ return { done: false, value: "running" };
756
+ },
757
+ interval: 2000,
758
+ timeout: options.timeout * 1000,
759
+ });
760
+ }
761
+ catch {
762
+ // build timeout — KN itself is fine, just mark skipped
763
+ }
764
+ }
765
+ // Reached the end without throwing — clear the rollback handle.
766
+ createdKnId = undefined;
767
+ }
768
+ finally {
769
+ if (createdKnId !== undefined) {
770
+ if (options.noRollback) {
771
+ console.error(`Leaving partial KN ${createdKnId} in place (--no-rollback)`);
772
+ }
773
+ else {
774
+ console.error(`Rolling back KN ${createdKnId} ...`);
775
+ try {
776
+ await deleteKnowledgeNetwork({ ...base, knId: createdKnId });
777
+ console.error(`Rolled back KN ${createdKnId}`);
778
+ }
779
+ catch (rollbackErr) {
780
+ console.error(`Rollback failed for KN ${createdKnId}: ${formatHttpError(rollbackErr)}`);
781
+ }
782
+ }
680
783
  }
681
784
  }
682
785
  const output = {
@@ -706,8 +809,9 @@ Options:
706
809
  --tables <a,b> Tables to include in KN (default: all imported)
707
810
  --build (default) Build after creation
708
811
  --no-build Skip build
709
- --recreate Use "insert" mode on first batch (only effective for new tables)
812
+ --pk-map <s> Explicit primary keys: <table>:<field>[,<table>:<field>...]
710
813
  --timeout <n> Build timeout in seconds (default: 300)
814
+ --no-rollback Keep partially-created KN on failure (debug; default: rollback)
711
815
  -bd, --biz-domain Business domain (default: bd_public)`;
712
816
  export function parseKnCreateFromCsvArgs(args) {
713
817
  let dsId = "";
@@ -716,10 +820,11 @@ export function parseKnCreateFromCsvArgs(args) {
716
820
  let tablePrefix = "";
717
821
  let batchSize = 500;
718
822
  let tablesStr = "";
823
+ let pkMapStr = "";
719
824
  let build = true;
720
- let recreate = false;
721
825
  let timeout = 300;
722
826
  let businessDomain = "";
827
+ let noRollback = false;
723
828
  for (let i = 0; i < args.length; i += 1) {
724
829
  const arg = args[i];
725
830
  if (arg === "--help" || arg === "-h")
@@ -754,8 +859,12 @@ export function parseKnCreateFromCsvArgs(args) {
754
859
  build = false;
755
860
  continue;
756
861
  }
757
- if (arg === "--recreate") {
758
- recreate = true;
862
+ if (arg === "--pk-map" && args[i + 1]) {
863
+ pkMapStr = args[++i];
864
+ continue;
865
+ }
866
+ if (arg === "--no-rollback") {
867
+ noRollback = true;
759
868
  continue;
760
869
  }
761
870
  if (arg === "--timeout" && args[i + 1]) {
@@ -776,9 +885,10 @@ export function parseKnCreateFromCsvArgs(args) {
776
885
  if (!dsId || !files || !name) {
777
886
  throw new Error("Usage: kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]");
778
887
  }
888
+ const pkMap = pkMapStr ? parsePkMap(pkMapStr) : {};
779
889
  if (!businessDomain)
780
890
  businessDomain = resolveBusinessDomain();
781
- return { dsId, files, name, tablePrefix, batchSize, tables, build, recreate, timeout, businessDomain };
891
+ return { dsId, files, name, tablePrefix, batchSize, tables, pkMap, build, timeout, businessDomain, noRollback };
782
892
  }
783
893
  export async function runKnCreateFromCsvCommand(args) {
784
894
  let options;
@@ -793,6 +903,20 @@ export async function runKnCreateFromCsvCommand(args) {
793
903
  console.error(formatHttpError(error));
794
904
  return 1;
795
905
  }
906
+ // Pre-flight: predict OT names from (table-prefix + csv basename) and
907
+ // reject before any CSV is imported. CSV import is expensive; failing
908
+ // here saves the user a multi-minute round trip.
909
+ try {
910
+ const filePaths = await resolveFiles(options.files);
911
+ const predictedNames = options.tables.length > 0
912
+ ? options.tables
913
+ : filePaths.map((p) => buildTableName(p, options.tablePrefix));
914
+ assertValidBknObjectNames(predictedNames, "Object type names derived from CSV file names");
915
+ }
916
+ catch (error) {
917
+ console.error(formatHttpError(error));
918
+ return 1;
919
+ }
796
920
  // Phase 1: Import CSVs
797
921
  console.error("Phase 1: Importing CSVs ...");
798
922
  const importArgs = [
@@ -801,35 +925,15 @@ export async function runKnCreateFromCsvCommand(args) {
801
925
  "--table-prefix", options.tablePrefix,
802
926
  "--batch-size", String(options.batchSize),
803
927
  "-bd", options.businessDomain,
804
- ...(options.recreate ? ["--recreate"] : []),
805
928
  ];
806
929
  const importResult = await runDsImportCsv(importArgs);
807
930
  if (importResult.code !== 0) {
808
931
  console.error("CSV import failed — aborting KN creation");
809
932
  return importResult.code;
810
933
  }
811
- // Phase 1.5: Scan datasource metadata so platform discovers newly imported tables
812
- console.error("Scanning datasource metadata ...");
813
- try {
814
- const token = await ensureValidToken();
815
- const dsBody = await getDatasource({
816
- baseUrl: token.baseUrl,
817
- accessToken: token.accessToken,
818
- id: options.dsId,
819
- businessDomain: options.businessDomain,
820
- });
821
- const dsParsed = JSON.parse(dsBody);
822
- await scanMetadata({
823
- baseUrl: token.baseUrl,
824
- accessToken: token.accessToken,
825
- id: options.dsId,
826
- dsType: dsParsed.type ?? "mysql",
827
- businessDomain: options.businessDomain,
828
- });
829
- }
830
- catch (err) {
831
- console.error(`Scan warning (continuing): ${String(err)}`);
832
- }
934
+ // (Phase 1.5 metadata scan removed runDsImportCsv now self-scans on
935
+ // success, and runKnCreateFromDsCommand's table-discovery retry triggers
936
+ // a scan if the catalog still lags. Two layers of fallback are enough.)
833
937
  // Phase 2: Create KN from datasource
834
938
  console.error("Phase 2: Creating knowledge network ...");
835
939
  const tableNames = options.tables.length > 0 ? options.tables : importResult.tables;
@@ -837,6 +941,7 @@ export async function runKnCreateFromCsvCommand(args) {
837
941
  console.error("No tables available for KN creation — aborting");
838
942
  return 1;
839
943
  }
944
+ const pkMapEntries = Object.entries(options.pkMap);
840
945
  const knArgs = [
841
946
  options.dsId,
842
947
  "--name", options.name,
@@ -844,6 +949,10 @@ export async function runKnCreateFromCsvCommand(args) {
844
949
  options.build ? "--build" : "--no-build",
845
950
  "--timeout", String(options.timeout),
846
951
  "-bd", options.businessDomain,
952
+ ...(pkMapEntries.length > 0
953
+ ? ["--pk-map", pkMapEntries.map(([t, f]) => `${t}:${f}`).join(",")]
954
+ : []),
955
+ ...(options.noRollback ? ["--no-rollback"] : []),
847
956
  ];
848
957
  return runKnCreateFromDsCommand(knArgs, importResult.sampleRows);
849
958
  }
@@ -18,14 +18,38 @@ export declare function parseOntologyQueryFlags(args: string[]): {
18
18
  businessDomain: string;
19
19
  };
20
20
  export declare const DISPLAY_HINTS: string[];
21
- /** Detect primary key: first column (left-to-right) with all unique values in the sample. */
21
+ export interface PkCandidate {
22
+ name: string;
23
+ cardinality: number;
24
+ }
25
+ export interface PkDetectionResult {
26
+ /** Detected PK column name, or null when detection is not confident. */
27
+ pk: string | null;
28
+ /** All columns sorted by cardinality desc. Empty when no sample. */
29
+ candidates: PkCandidate[];
30
+ /** 0 when no sample data was provided. */
31
+ sampleSize: number;
32
+ }
33
+ export declare const PK_NAME_HINTS: string[];
34
+ /**
35
+ * Detect primary key from a row sample. Returns null pk when no column has
36
+ * unique values across the sample — caller must fail-fast and prompt for --pk-map.
37
+ * Among columns that ARE fully unique, prefers PK-like names (id, *_id, pk).
38
+ */
22
39
  export declare function detectPrimaryKey(table: {
23
40
  name: string;
24
41
  columns: Array<{
25
42
  name: string;
26
43
  type: string;
27
44
  }>;
28
- }, rows?: Array<Record<string, string | null>>): string;
45
+ }, rows?: Array<Record<string, string | null>>): PkDetectionResult;
46
+ /** Format a user-facing error message when PK auto-detection fails. */
47
+ export declare function formatPkDetectionError(tableName: string, result: PkDetectionResult): string;
48
+ /**
49
+ * Parse --pk-map string into a Record<table, field>.
50
+ * Format: "<table>:<field>[,<table>:<field>...]". Throws on invalid input.
51
+ */
52
+ export declare function parsePkMap(input: string): Record<string, string>;
29
53
  export declare function detectDisplayKey(table: {
30
54
  name: string;
31
55
  columns: Array<{
@@ -68,18 +68,75 @@ export function parseOntologyQueryFlags(args) {
68
68
  }
69
69
  // ── Schema detection helpers ─────────────────────────────────────────────────
70
70
  export const DISPLAY_HINTS = ["name", "title", "label", "display_name", "description"];
71
- /** Detect primary key: first column (left-to-right) with all unique values in the sample. */
71
+ export const PK_NAME_HINTS = ["id", "_id", "pk"];
72
+ /**
73
+ * Detect primary key from a row sample. Returns null pk when no column has
74
+ * unique values across the sample — caller must fail-fast and prompt for --pk-map.
75
+ * Among columns that ARE fully unique, prefers PK-like names (id, *_id, pk).
76
+ */
72
77
  export function detectPrimaryKey(table, rows) {
73
- if (rows && rows.length > 0) {
74
- for (const col of table.columns) {
75
- const values = rows.map((r) => r[col.name]);
76
- const unique = new Set(values);
77
- if (unique.size === rows.length)
78
- return col.name;
78
+ if (!rows || rows.length === 0) {
79
+ return { pk: null, candidates: [], sampleSize: 0 };
80
+ }
81
+ const candidates = table.columns
82
+ .map((col) => {
83
+ const unique = new Set(rows.map((r) => r[col.name]));
84
+ return { name: col.name, cardinality: unique.size };
85
+ })
86
+ .sort((a, b) => b.cardinality - a.cardinality);
87
+ const fullCardinality = candidates.filter((c) => c.cardinality === rows.length);
88
+ if (fullCardinality.length === 0) {
89
+ return { pk: null, candidates, sampleSize: rows.length };
90
+ }
91
+ const named = fullCardinality.find((c) => {
92
+ const lower = c.name.toLowerCase();
93
+ return PK_NAME_HINTS.some((h) => lower === h || lower.endsWith(`_${h}`));
94
+ });
95
+ return {
96
+ pk: named?.name ?? fullCardinality[0].name,
97
+ candidates,
98
+ sampleSize: rows.length,
99
+ };
100
+ }
101
+ /** Format a user-facing error message when PK auto-detection fails. */
102
+ export function formatPkDetectionError(tableName, result) {
103
+ const lines = [`Cannot auto-detect primary key for table '${tableName}'.`];
104
+ if (result.sampleSize === 0) {
105
+ lines.push(` No sample data available — chain with 'kweaver ds import-csv' or use --pk-map.`);
106
+ }
107
+ else {
108
+ lines.push(` No column has unique values in the ${result.sampleSize}-row sample.`);
109
+ lines.push(` Top candidates by cardinality:`);
110
+ const top = result.candidates.slice(0, 5);
111
+ const maxNameLen = Math.max(...top.map((c) => c.name.length));
112
+ for (const c of top) {
113
+ lines.push(` ${c.name.padEnd(maxNameLen)} ${c.cardinality} unique`);
114
+ }
115
+ }
116
+ lines.push(``);
117
+ lines.push(` Re-run with --pk-map to specify explicitly:`);
118
+ lines.push(` --pk-map ${tableName}:<column>`);
119
+ return lines.join("\n");
120
+ }
121
+ /**
122
+ * Parse --pk-map string into a Record<table, field>.
123
+ * Format: "<table>:<field>[,<table>:<field>...]". Throws on invalid input.
124
+ */
125
+ export function parsePkMap(input) {
126
+ const result = {};
127
+ for (const pair of input.split(",").map((s) => s.trim()).filter(Boolean)) {
128
+ const idx = pair.indexOf(":");
129
+ if (idx <= 0 || idx >= pair.length - 1) {
130
+ throw new Error(`Invalid --pk-map entry '${pair}'. Expected '<table>:<field>[,<table>:<field>...]'`);
131
+ }
132
+ const table = pair.slice(0, idx).trim();
133
+ const field = pair.slice(idx + 1).trim();
134
+ if (!table || !field) {
135
+ throw new Error(`Invalid --pk-map entry '${pair}'. Expected '<table>:<field>[,<table>:<field>...]'`);
79
136
  }
137
+ result[table] = field;
80
138
  }
81
- // Fallback: first column
82
- return table.columns[0]?.name ?? "id";
139
+ return result;
83
140
  }
84
141
  export function detectDisplayKey(table, primaryKey) {
85
142
  for (const col of table.columns) {
@@ -2,6 +2,7 @@ import { listBusinessDomains } from "../api/business-domains.js";
2
2
  import { fetchEacpUserInfo, resolveActivePlatform, withTokenRetry } from "../auth/oauth.js";
3
3
  import { HttpError } from "../utils/http.js";
4
4
  import { loadPlatformBusinessDomain, resolveBusinessDomain, savePlatformBusinessDomain, } from "../config/store.js";
5
+ import { assertNotStatelessForWrite } from "../config/stateless.js";
5
6
  const HELP = `kweaver config
6
7
 
7
8
  Subcommands:
@@ -50,6 +51,13 @@ export async function runConfigCommand(args) {
50
51
  return 1;
51
52
  }
52
53
  const platform = active.url;
54
+ try {
55
+ assertNotStatelessForWrite("config set-bd");
56
+ }
57
+ catch (err) {
58
+ console.error(err instanceof Error ? err.message : String(err));
59
+ return 1;
60
+ }
53
61
  savePlatformBusinessDomain(platform, value);
54
62
  const provenance = active.source === "env" ? `${platform} via KWEAVER_BASE_URL` : platform;
55
63
  console.log(`Business domain set to: ${value} (${provenance})`);