@kweaver-ai/kweaver-sdk 0.4.9 → 0.4.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -66,6 +66,14 @@ export interface KnObjectTypeQueryOptions {
66
66
  }
67
67
  export declare function parseKnObjectTypeQueryArgs(args: string[]): KnObjectTypeQueryOptions;
68
68
  export declare function runKnCommand(args: string[]): Promise<number>;
69
+ /** Parse object-type create args: --name --dataview-id --primary-key --display-key [--property '<json>' ...] */
70
+ export declare function parseObjectTypeCreateArgs(args: string[]): {
71
+ knId: string;
72
+ body: string;
73
+ businessDomain: string;
74
+ branch: string;
75
+ pretty: boolean;
76
+ };
69
77
  /** Fields merged via GET → modify → PUT (not raw body mode). */
70
78
  export interface ObjectTypeMergeFields {
71
79
  name?: string;
@@ -112,6 +120,14 @@ export interface KnActionTypeExecuteOptions {
112
120
  timeout: number;
113
121
  }
114
122
  export declare function parseKnActionTypeExecuteArgs(args: string[]): KnActionTypeExecuteOptions;
123
+ /** Parse relation-type create args: --name --source --target [--mapping src:tgt ...] */
124
+ export declare function parseRelationTypeCreateArgs(args: string[]): {
125
+ knId: string;
126
+ body: string;
127
+ businessDomain: string;
128
+ branch: string;
129
+ pretty: boolean;
130
+ };
115
131
  export declare function parseKnBuildArgs(args: string[]): {
116
132
  knId: string;
117
133
  wait: boolean;
@@ -8,11 +8,12 @@ import { ensureValidToken, formatHttpError, with401RefreshRetry } from "../auth/
8
8
  import { listKnowledgeNetworks, getKnowledgeNetwork, createKnowledgeNetwork, updateKnowledgeNetwork, deleteKnowledgeNetwork, listObjectTypes, listRelationTypes, listActionTypes, getObjectType, createObjectTypes, updateObjectType, deleteObjectTypes, getRelationType, createRelationTypes, updateRelationType, deleteRelationTypes, buildKnowledgeNetwork, getBuildStatus, } from "../api/knowledge-networks.js";
9
9
  import { objectTypeQuery, objectTypeProperties, subgraph, actionTypeQuery, actionTypeExecute, actionExecutionGet, actionLogsList, actionLogGet, actionLogCancel, } from "../api/ontology-query.js";
10
10
  import { semanticSearch } from "../api/semantic-search.js";
11
- import { listTablesWithColumns } from "../api/datasources.js";
12
- import { createDataView } from "../api/dataviews.js";
11
+ import { listTablesWithColumns, scanMetadata, getDatasource } from "../api/datasources.js";
12
+ import { createDataView } from "../api/dataviews.js"; // used by runKnCreateFromDsCommand
13
13
  import { downloadBkn, uploadBkn } from "../api/bkn-backend.js";
14
14
  import { formatCallOutput } from "./call.js";
15
15
  import { resolveBusinessDomain } from "../config/store.js";
16
+ import { runDsImportCsv } from "./ds.js";
16
17
  export function formatSimpleKnList(text, pretty, includeDetail = false) {
17
18
  const parsed = JSON.parse(text);
18
19
  const entries = Array.isArray(parsed.entries) ? parsed.entries : [];
@@ -600,11 +601,13 @@ Subcommands:
600
601
  get <kn-id> [options] Get knowledge network detail (use --stats or --export)
601
602
  create [options] Create a knowledge network (empty or from --body-file)
602
603
  create-from-ds <ds-id> --name X [--tables a,b] [--build] Create KN from datasource
604
+ create-from-csv <ds-id> --files <glob> --name X [--table-prefix P] [--build]
605
+ Import CSVs then create knowledge network
603
606
  update <kn-id> [options] Update a knowledge network
604
607
  delete <kn-id> Delete a knowledge network
605
608
  build <kn-id> [--wait|--no-wait] [--timeout n] Trigger full build
606
- validate <directory> Validate a local BKN directory (no upload)
607
- push <directory> [--branch main] Upload BKN directory as tar
609
+ validate <directory> [--detect-encoding|--no-detect-encoding] [--source-encoding n] Validate local BKN (no upload)
610
+ push <directory> [--branch main] [--detect-encoding|--no-detect-encoding] [--source-encoding n] Upload BKN as tar
608
611
  pull <kn-id> [<directory>] [--branch main] Download BKN tar and extract
609
612
  export <kn-id> Export knowledge network (alias for get --export)
610
613
  stats <kn-id> Get statistics (alias for get --stats)
@@ -646,6 +649,8 @@ export async function runKnCommand(args) {
646
649
  return runKnCreateCommand(rest);
647
650
  if (subcommand === "create-from-ds")
648
651
  return runKnCreateFromDsCommand(rest);
652
+ if (subcommand === "create-from-csv")
653
+ return runKnCreateFromCsvCommand(rest);
649
654
  if (subcommand === "update")
650
655
  return runKnUpdateCommand(rest);
651
656
  if (subcommand === "delete")
@@ -694,7 +699,7 @@ export async function runKnCommand(args) {
694
699
  }
695
700
  }
696
701
  /** Parse object-type create args: --name --dataview-id --primary-key --display-key [--property '<json>' ...] */
697
- function parseObjectTypeCreateArgs(args) {
702
+ export function parseObjectTypeCreateArgs(args) {
698
703
  let name = "";
699
704
  let dataviewId = "";
700
705
  let primaryKey = "";
@@ -748,6 +753,7 @@ function parseObjectTypeCreateArgs(args) {
748
753
  throw new Error("Usage: kweaver bkn object-type create <kn-id> --name X --dataview-id Y --primary-key Z --display-key W");
749
754
  }
750
755
  const entry = {
756
+ branch,
751
757
  name,
752
758
  data_source: { type: "data_view", id: dataviewId },
753
759
  primary_keys: [primaryKey],
@@ -764,7 +770,7 @@ function parseObjectTypeCreateArgs(args) {
764
770
  type: "string",
765
771
  }));
766
772
  }
767
- const body = JSON.stringify({ entries: [entry], branch });
773
+ const body = JSON.stringify({ entries: [entry] });
768
774
  if (!businessDomain)
769
775
  businessDomain = resolveBusinessDomain();
770
776
  return { knId, body, businessDomain, branch, pretty };
@@ -1073,20 +1079,18 @@ export function parseKnActionTypeExecuteArgs(args) {
1073
1079
  timeout,
1074
1080
  };
1075
1081
  }
1076
- const PK_CANDIDATES = new Set(["id", "pk", "key"]);
1077
- const PK_TYPES = new Set(["integer", "unsigned integer", "string", "varchar", "bigint", "int"]);
1078
1082
  const DISPLAY_HINTS = ["name", "title", "label", "display_name", "description"];
1079
- function detectPrimaryKey(table) {
1080
- for (const col of table.columns) {
1081
- if (PK_CANDIDATES.has(col.name.toLowerCase()) && PK_TYPES.has(col.type.toLowerCase())) {
1082
- return col.name;
1083
- }
1084
- }
1085
- for (const col of table.columns) {
1086
- if (PK_TYPES.has(col.type.toLowerCase())) {
1087
- return col.name;
1083
+ /** Detect primary key: first column (left-to-right) with all unique values in the sample. */
1084
+ function detectPrimaryKey(table, rows) {
1085
+ if (rows && rows.length > 0) {
1086
+ for (const col of table.columns) {
1087
+ const values = rows.map((r) => r[col.name]);
1088
+ const unique = new Set(values);
1089
+ if (unique.size === rows.length)
1090
+ return col.name;
1088
1091
  }
1089
1092
  }
1093
+ // Fallback: first column
1090
1094
  return table.columns[0]?.name ?? "id";
1091
1095
  }
1092
1096
  function detectDisplayKey(table, primaryKey) {
@@ -1287,7 +1291,7 @@ kweaver bkn object-type delete <kn-id> <ot-ids> [-y]`);
1287
1291
  }
1288
1292
  }
1289
1293
  /** Parse relation-type create args: --name --source --target [--mapping src:tgt ...] */
1290
- function parseRelationTypeCreateArgs(args) {
1294
+ export function parseRelationTypeCreateArgs(args) {
1291
1295
  let name = "";
1292
1296
  let source = "";
1293
1297
  let target = "";
@@ -1341,6 +1345,7 @@ function parseRelationTypeCreateArgs(args) {
1341
1345
  throw new Error("Usage: kweaver bkn relation-type create <kn-id> --name X --source <ot-id> --target <ot-id> [--mapping src:tgt ...]");
1342
1346
  }
1343
1347
  const entry = {
1348
+ branch,
1344
1349
  name,
1345
1350
  source_object_type_id: source,
1346
1351
  target_object_type_id: target,
@@ -1350,7 +1355,7 @@ function parseRelationTypeCreateArgs(args) {
1350
1355
  target_property: { name: t },
1351
1356
  })),
1352
1357
  };
1353
- const body = JSON.stringify({ entries: [entry], branch });
1358
+ const body = JSON.stringify({ entries: [entry] });
1354
1359
  if (!businessDomain)
1355
1360
  businessDomain = resolveBusinessDomain();
1356
1361
  return { knId, body, businessDomain, branch, pretty };
@@ -2139,7 +2144,25 @@ function parseKnCreateFromDsArgs(args) {
2139
2144
  businessDomain = resolveBusinessDomain();
2140
2145
  return { dsId, name, tables, build, timeout, businessDomain, pretty };
2141
2146
  }
2142
- async function runKnCreateFromDsCommand(args) {
2147
+ /** Sanitize a table name into a BKN-safe ID (alphanumeric + underscore). */
2148
+ function sanitizeBknId(name) {
2149
+ return name.replace(/[^a-zA-Z0-9_]/g, "_").replace(/^(\d)/, "_$1");
2150
+ }
2151
+ /** Generate a BKN ObjectType YAML markdown file for a table. */
2152
+ function generateObjectTypeBkn(tableName, dvId, pk, dk, columns) {
2153
+ const safeId = sanitizeBknId(tableName);
2154
+ const header = `## ObjectType: ${safeId}\n\n**${tableName}**\n`;
2155
+ const dsTable = `### Data Source\n\n| Type | ID | Name |\n|------|-----|------|\n| data_view | ${dvId} | ${tableName} |\n`;
2156
+ const dpHeader = `### Data Properties\n\n| Property | Display Name | Type | Primary Key | Display Key |\n|----------|-------------|------|-------------|-------------|\n`;
2157
+ const dpRows = columns.map((c) => {
2158
+ const isPk = c.name === pk ? "yes" : "no";
2159
+ const isDk = c.name === dk ? "yes" : "no";
2160
+ return `| ${c.name} | ${c.name} | string | ${isPk} | ${isDk} |`;
2161
+ }).join("\n");
2162
+ const frontmatter = `---\ntype: object_type\nid: ${safeId}\nname: ${tableName}\n---\n\n`;
2163
+ return `${frontmatter}${header}\n${dsTable}\n${dpHeader}${dpRows}\n`;
2164
+ }
2165
+ async function runKnCreateFromDsCommand(args, sampleRows) {
2143
2166
  let options;
2144
2167
  try {
2145
2168
  options = parseKnCreateFromDsArgs(args);
@@ -2168,6 +2191,8 @@ async function runKnCreateFromDsCommand(args) {
2168
2191
  console.error("No tables available");
2169
2192
  return 1;
2170
2193
  }
2194
+ // Phase 1: Create DataViews for each table
2195
+ console.error(`Creating data views for ${targetTables.length} table(s) ...`);
2171
2196
  const viewMap = {};
2172
2197
  for (const t of targetTables) {
2173
2198
  const dvId = await createDataView({
@@ -2179,6 +2204,7 @@ async function runKnCreateFromDsCommand(args) {
2179
2204
  });
2180
2205
  viewMap[t.name] = dvId;
2181
2206
  }
2207
+ // Phase 2: Create the KN record
2182
2208
  const knBody = JSON.stringify({
2183
2209
  name: options.name,
2184
2210
  branch: "main",
@@ -2191,22 +2217,28 @@ async function runKnCreateFromDsCommand(args) {
2191
2217
  const knParsed = JSON.parse(knResponse);
2192
2218
  const knItem = Array.isArray(knParsed) ? knParsed[0] : knParsed;
2193
2219
  const knId = String(knItem?.id ?? "");
2220
+ console.error(`Knowledge network created: ${knId}`);
2221
+ // Phase 3: Create object types via REST API
2222
+ console.error(`Creating ${targetTables.length} object type(s) ...`);
2194
2223
  const otResults = [];
2195
2224
  for (const t of targetTables) {
2196
- const pk = detectPrimaryKey(t);
2225
+ const pk = detectPrimaryKey(t, sampleRows?.[t.name]);
2197
2226
  const dk = detectDisplayKey(t, pk);
2227
+ const uniqueProps = [pk, dk].filter((x, i, a) => a.indexOf(x) === i);
2198
2228
  const entry = {
2229
+ branch: "main",
2199
2230
  name: t.name,
2200
2231
  data_source: { type: "data_view", id: viewMap[t.name] },
2201
2232
  primary_keys: [pk],
2202
2233
  display_key: dk,
2203
- data_properties: [pk, dk].filter((x, i, a) => a.indexOf(x) === i).map((n) => ({
2204
- name: n,
2205
- display_name: n,
2234
+ data_properties: t.columns.map((c) => ({
2235
+ name: c.name,
2236
+ display_name: c.name,
2206
2237
  type: "string",
2238
+ mapped_field: { name: c.name, type: c.type || "varchar" },
2207
2239
  })),
2208
2240
  };
2209
- const otBody = JSON.stringify({ entries: [entry], branch: "main" });
2241
+ const otBody = JSON.stringify({ entries: [entry] });
2210
2242
  const otResponse = await createObjectTypes({
2211
2243
  ...base,
2212
2244
  knId,
@@ -2219,6 +2251,16 @@ async function runKnCreateFromDsCommand(args) {
2219
2251
  id: otItem?.id ?? "",
2220
2252
  field_count: t.columns.length,
2221
2253
  });
2254
+ console.error(` Created: ${t.name} (${t.columns.length} fields, pk=${pk}, dk=${dk})`);
2255
+ }
2256
+ if (otResults.length === 0) {
2257
+ const errorOutput = {
2258
+ kn_id: knId,
2259
+ kn_name: options.name,
2260
+ error: "No object types were created",
2261
+ };
2262
+ console.log(JSON.stringify(errorOutput, null, options.pretty ? 2 : 0));
2263
+ return 1;
2222
2264
  }
2223
2265
  let statusStr = "skipped";
2224
2266
  if (options.build) {
@@ -2746,3 +2788,149 @@ async function runKnSearchCommand(args) {
2746
2788
  return 1;
2747
2789
  }
2748
2790
  }
2791
+ const KN_CREATE_FROM_CSV_HELP = `kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]
2792
+
2793
+ Import CSV files into datasource, then create a knowledge network.
2794
+
2795
+ Options:
2796
+ --files <s> CSV file paths (comma-separated or glob, required)
2797
+ --name <s> Knowledge network name (required)
2798
+ --table-prefix <s> Table name prefix (default: none)
2799
+ --batch-size <n> Rows per batch (default: 500)
2800
+ --tables <a,b> Tables to include in KN (default: all imported)
2801
+ --build (default) Build after creation
2802
+ --no-build Skip build
2803
+ --timeout <n> Build timeout in seconds (default: 300)
2804
+ -bd, --biz-domain Business domain (default: bd_public)`;
2805
+ function parseKnCreateFromCsvArgs(args) {
2806
+ let dsId = "";
2807
+ let files = "";
2808
+ let name = "";
2809
+ let tablePrefix = "";
2810
+ let batchSize = 500;
2811
+ let tablesStr = "";
2812
+ let build = true;
2813
+ let timeout = 300;
2814
+ let businessDomain = "";
2815
+ for (let i = 0; i < args.length; i += 1) {
2816
+ const arg = args[i];
2817
+ if (arg === "--help" || arg === "-h")
2818
+ throw new Error("help");
2819
+ if (arg === "--files" && args[i + 1]) {
2820
+ files = args[++i];
2821
+ continue;
2822
+ }
2823
+ if (arg === "--name" && args[i + 1]) {
2824
+ name = args[++i];
2825
+ continue;
2826
+ }
2827
+ if (arg === "--table-prefix" && args[i + 1]) {
2828
+ tablePrefix = args[++i];
2829
+ continue;
2830
+ }
2831
+ if (arg === "--batch-size" && args[i + 1]) {
2832
+ batchSize = parseInt(args[++i], 10);
2833
+ if (Number.isNaN(batchSize) || batchSize < 1)
2834
+ batchSize = 500;
2835
+ continue;
2836
+ }
2837
+ if (arg === "--tables" && args[i + 1]) {
2838
+ tablesStr = args[++i];
2839
+ continue;
2840
+ }
2841
+ if (arg === "--build") {
2842
+ build = true;
2843
+ continue;
2844
+ }
2845
+ if (arg === "--no-build") {
2846
+ build = false;
2847
+ continue;
2848
+ }
2849
+ if (arg === "--timeout" && args[i + 1]) {
2850
+ timeout = parseInt(args[++i], 10);
2851
+ if (Number.isNaN(timeout) || timeout < 1)
2852
+ timeout = 300;
2853
+ continue;
2854
+ }
2855
+ if ((arg === "-bd" || arg === "--biz-domain") && args[i + 1]) {
2856
+ businessDomain = args[++i];
2857
+ continue;
2858
+ }
2859
+ if (!arg.startsWith("-") && !dsId) {
2860
+ dsId = arg;
2861
+ }
2862
+ }
2863
+ const tables = tablesStr ? tablesStr.split(",").map((s) => s.trim()).filter(Boolean) : [];
2864
+ if (!dsId || !files || !name) {
2865
+ throw new Error("Usage: kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]");
2866
+ }
2867
+ if (!businessDomain)
2868
+ businessDomain = resolveBusinessDomain();
2869
+ return { dsId, files, name, tablePrefix, batchSize, tables, build, timeout, businessDomain };
2870
+ }
2871
+ async function runKnCreateFromCsvCommand(args) {
2872
+ let options;
2873
+ try {
2874
+ options = parseKnCreateFromCsvArgs(args);
2875
+ }
2876
+ catch (error) {
2877
+ if (error instanceof Error && error.message === "help") {
2878
+ console.log(KN_CREATE_FROM_CSV_HELP);
2879
+ return 0;
2880
+ }
2881
+ console.error(formatHttpError(error));
2882
+ return 1;
2883
+ }
2884
+ // Phase 1: Import CSVs
2885
+ console.error("Phase 1: Importing CSVs ...");
2886
+ const importArgs = [
2887
+ options.dsId,
2888
+ "--files", options.files,
2889
+ "--table-prefix", options.tablePrefix,
2890
+ "--batch-size", String(options.batchSize),
2891
+ "-bd", options.businessDomain,
2892
+ ];
2893
+ const importResult = await runDsImportCsv(importArgs);
2894
+ if (importResult.code !== 0) {
2895
+ console.error("CSV import failed — aborting KN creation");
2896
+ return importResult.code;
2897
+ }
2898
+ // Phase 1.5: Scan datasource metadata so platform discovers newly imported tables
2899
+ console.error("Scanning datasource metadata ...");
2900
+ try {
2901
+ const token = await ensureValidToken();
2902
+ const dsBody = await getDatasource({
2903
+ baseUrl: token.baseUrl,
2904
+ accessToken: token.accessToken,
2905
+ id: options.dsId,
2906
+ businessDomain: options.businessDomain,
2907
+ });
2908
+ const dsParsed = JSON.parse(dsBody);
2909
+ await scanMetadata({
2910
+ baseUrl: token.baseUrl,
2911
+ accessToken: token.accessToken,
2912
+ id: options.dsId,
2913
+ dsType: dsParsed.type ?? "mysql",
2914
+ businessDomain: options.businessDomain,
2915
+ });
2916
+ }
2917
+ catch (err) {
2918
+ console.error(`Scan warning (continuing): ${String(err)}`);
2919
+ }
2920
+ // Phase 2: Create KN from datasource
2921
+ console.error("Phase 2: Creating knowledge network ...");
2922
+ const tableNames = options.tables.length > 0 ? options.tables : importResult.tables;
2923
+ if (tableNames.length === 0) {
2924
+ console.error("No tables available for KN creation — aborting");
2925
+ return 1;
2926
+ }
2927
+ const knArgs = [
2928
+ options.dsId,
2929
+ "--name", options.name,
2930
+ "--tables", tableNames.join(","),
2931
+ options.build ? "--build" : "--no-build",
2932
+ "--timeout", String(options.timeout),
2933
+ "-bd", options.businessDomain,
2934
+ ];
2935
+ return runKnCreateFromDsCommand(knArgs, importResult.sampleRows);
2936
+ }
@@ -5,3 +5,19 @@ export declare function parseDsListArgs(args: string[]): {
5
5
  businessDomain: string;
6
6
  pretty: boolean;
7
7
  };
8
+ export declare function parseImportCsvArgs(args: string[]): {
9
+ datasourceId: string;
10
+ files: string;
11
+ tablePrefix: string;
12
+ batchSize: number;
13
+ businessDomain: string;
14
+ };
15
+ export declare function resolveFiles(pattern: string): Promise<string[]>;
16
+ export interface ImportCsvResult {
17
+ code: number;
18
+ tables: string[];
19
+ tableColumns: Record<string, string[]>;
20
+ sampleRows: Record<string, Array<Record<string, string | null>>>;
21
+ }
22
+ export declare function runDsImportCsv(args: string[]): Promise<ImportCsvResult>;
23
+ export declare function runDsImportCsvCommand(args: string[]): Promise<number>;
@@ -1,8 +1,13 @@
1
1
  import { createInterface } from "node:readline";
2
+ import { statSync } from "node:fs";
3
+ import { glob } from "node:fs/promises";
4
+ import { resolve as resolvePath } from "node:path";
2
5
  import { ensureValidToken, formatHttpError, with401RefreshRetry } from "../auth/oauth.js";
3
6
  import { testDatasource, createDatasource, listDatasources, getDatasource, deleteDatasource, listTablesWithColumns, } from "../api/datasources.js";
4
7
  import { formatCallOutput } from "./call.js";
5
8
  import { resolveBusinessDomain } from "../config/store.js";
9
+ import { parseCsvFile, buildTableName, splitBatches, buildFieldMappings, buildDagBody, } from "./import-csv.js";
10
+ import { executeDataflow } from "../api/dataflow.js";
6
11
  function confirmYes(prompt) {
7
12
  return new Promise((resolve) => {
8
13
  const rl = createInterface({ input: process.stdin, output: process.stdout });
@@ -32,7 +37,9 @@ Subcommands:
32
37
  delete <id> [-y] Delete a datasource
33
38
  tables <id> [--keyword X] List tables with columns
34
39
  connect <db_type> <host> <port> <database> --account X --password Y [--schema Z] [--name N]
35
- Test connectivity, register datasource, and discover tables.`);
40
+ Test connectivity, register datasource, and discover tables.
41
+ import-csv <ds-id> --files <glob_or_list> [--table-prefix X] [--batch-size N]
42
+ Import CSV files into datasource tables via dataflow API.`);
36
43
  return 0;
37
44
  }
38
45
  const dispatch = () => {
@@ -46,6 +53,8 @@ Subcommands:
46
53
  return runDsTablesCommand(rest);
47
54
  if (subcommand === "connect")
48
55
  return runDsConnectCommand(rest);
56
+ if (subcommand === "import-csv")
57
+ return runDsImportCsvCommand(rest);
49
58
  return Promise.resolve(-1);
50
59
  };
51
60
  try {
@@ -288,3 +297,197 @@ async function runDsConnectCommand(args) {
288
297
  console.log(JSON.stringify(output, null, 2));
289
298
  return 0;
290
299
  }
300
+ // ── import-csv ────────────────────────────────────────────────────────────────
301
+ const IMPORT_CSV_HELP = `kweaver ds import-csv <ds-id> --files <glob_or_list> [options]
302
+
303
+ Import CSV files into datasource tables via dataflow API.
304
+
305
+ Options:
306
+ --files <s> CSV file paths (comma-separated or glob pattern, required)
307
+ --table-prefix <s> Table name prefix (default: none)
308
+ --batch-size <n> Rows per batch (default: 500, range: 1-10000)
309
+ -bd, --biz-domain Business domain (default: bd_public)`;
310
+ export function parseImportCsvArgs(args) {
311
+ let datasourceId = "";
312
+ let files = "";
313
+ let tablePrefix = "";
314
+ let batchSize = 500;
315
+ let businessDomain = "";
316
+ for (let i = 0; i < args.length; i += 1) {
317
+ const arg = args[i];
318
+ if (arg === "--help" || arg === "-h")
319
+ throw new Error("help");
320
+ if (arg === "--files" && args[i + 1]) {
321
+ files = args[++i];
322
+ continue;
323
+ }
324
+ if (arg === "--table-prefix" && args[i + 1]) {
325
+ tablePrefix = args[++i];
326
+ continue;
327
+ }
328
+ if (arg === "--batch-size" && args[i + 1]) {
329
+ const n = parseInt(args[++i], 10);
330
+ if (Number.isNaN(n) || n < 1 || n > 10000) {
331
+ throw new Error("--batch-size must be between 1 and 10000");
332
+ }
333
+ batchSize = n;
334
+ continue;
335
+ }
336
+ if ((arg === "-bd" || arg === "--biz-domain") && args[i + 1]) {
337
+ businessDomain = args[++i];
338
+ continue;
339
+ }
340
+ if (!arg.startsWith("-") && !datasourceId) {
341
+ datasourceId = arg;
342
+ }
343
+ }
344
+ if (!businessDomain)
345
+ businessDomain = resolveBusinessDomain();
346
+ return { datasourceId, files, tablePrefix, batchSize, businessDomain };
347
+ }
348
+ export async function resolveFiles(pattern) {
349
+ const parts = pattern.split(",").map((p) => p.trim()).filter(Boolean);
350
+ const result = [];
351
+ for (const part of parts) {
352
+ if (part.includes("*") || part.includes("?")) {
353
+ const matched = [];
354
+ for await (const entry of glob(part)) {
355
+ const p = String(entry);
356
+ if (/\.csv$/i.test(p)) {
357
+ matched.push(resolvePath(p));
358
+ }
359
+ }
360
+ result.push(...matched);
361
+ }
362
+ else {
363
+ const abs = resolvePath(part);
364
+ statSync(abs); // throws if file does not exist
365
+ result.push(abs);
366
+ }
367
+ }
368
+ if (result.length === 0) {
369
+ throw new Error(`No CSV files matched: ${pattern}`);
370
+ }
371
+ return result;
372
+ }
373
+ export async function runDsImportCsv(args) {
374
+ let options;
375
+ try {
376
+ options = parseImportCsvArgs(args);
377
+ }
378
+ catch (error) {
379
+ if (error instanceof Error && error.message === "help") {
380
+ console.log(IMPORT_CSV_HELP);
381
+ return { code: 0, tables: [], tableColumns: {}, sampleRows: {} };
382
+ }
383
+ throw error;
384
+ }
385
+ if (!options.datasourceId) {
386
+ console.error("Usage: kweaver ds import-csv <ds-id> --files <glob_or_list> [options]");
387
+ return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
388
+ }
389
+ if (!options.files) {
390
+ console.error("Error: --files is required");
391
+ return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
392
+ }
393
+ // 1. Get credentials
394
+ const token = await ensureValidToken();
395
+ const base = { baseUrl: token.baseUrl, accessToken: token.accessToken };
396
+ // 2. Resolve glob / file list
397
+ const filePaths = await resolveFiles(options.files);
398
+ // 3. Get datasource type
399
+ const dsBody = await getDatasource({ ...base, id: options.datasourceId, businessDomain: options.businessDomain });
400
+ const dsData = JSON.parse(dsBody);
401
+ const datasourceType = String(dsData.type ?? dsData.ds_type ?? dsData.data_type ?? "mysql");
402
+ const parsed = [];
403
+ for (const filePath of filePaths) {
404
+ const tableName = buildTableName(filePath, options.tablePrefix);
405
+ let csvData;
406
+ try {
407
+ csvData = await parseCsvFile(filePath);
408
+ }
409
+ catch (err) {
410
+ const msg = err instanceof Error ? err.message : String(err);
411
+ console.error(`[${tableName}] skipping — parse error: ${msg}`);
412
+ continue;
413
+ }
414
+ if (csvData.headers.length === 0) {
415
+ console.error(`[${tableName}] skipping — no headers`);
416
+ continue;
417
+ }
418
+ if (csvData.rows.length === 0) {
419
+ console.error(`[${tableName}] skipping — no rows`);
420
+ continue;
421
+ }
422
+ parsed.push({ filePath, tableName, headers: csvData.headers, rows: csvData.rows });
423
+ }
424
+ if (parsed.length === 0) {
425
+ console.error("All files were skipped — nothing to import");
426
+ return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
427
+ }
428
+ // Phase 2: Import each file in batches
429
+ const succeeded = [];
430
+ const failed = [];
431
+ const tableColumns = {};
432
+ const sampleRows = {};
433
+ for (const { tableName, headers, rows } of parsed) {
434
+ const batches = splitBatches(rows, options.batchSize);
435
+ const fieldMappings = buildFieldMappings(headers);
436
+ let batchFailed = false;
437
+ for (let bIdx = 0; bIdx < batches.length; bIdx += 1) {
438
+ const batch = batches[bIdx];
439
+ const tableExist = bIdx > 0;
440
+ const batchLabel = `${bIdx + 1}/${batches.length}`;
441
+ const rowCount = batch.length;
442
+ const dagBody = buildDagBody({
443
+ datasourceId: options.datasourceId,
444
+ datasourceType,
445
+ tableName,
446
+ tableExist,
447
+ data: batch,
448
+ fieldMappings,
449
+ });
450
+ const t0 = Date.now();
451
+ process.stderr.write(`[${tableName}] batch ${batchLabel} (${rowCount} rows)... `);
452
+ try {
453
+ await executeDataflow({
454
+ ...base,
455
+ businessDomain: options.businessDomain,
456
+ body: dagBody,
457
+ });
458
+ const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
459
+ process.stderr.write(`${elapsed}s\n`);
460
+ }
461
+ catch (err) {
462
+ const msg = err instanceof Error ? err.message : String(err);
463
+ process.stderr.write(`FAILED\n`);
464
+ console.error(`[${tableName}] batch ${batchLabel} error: ${msg}`);
465
+ batchFailed = true;
466
+ break;
467
+ }
468
+ }
469
+ if (batchFailed) {
470
+ failed.push(tableName);
471
+ }
472
+ else {
473
+ succeeded.push(tableName);
474
+ tableColumns[tableName] = headers;
475
+ sampleRows[tableName] = parsed.find((p) => p.tableName === tableName)?.rows.slice(0, 100) ?? [];
476
+ }
477
+ }
478
+ // Summary
479
+ console.error(`\nImport complete: ${succeeded.length} succeeded, ${failed.length} failed.`);
480
+ if (failed.length > 0) {
481
+ console.error(`Failed tables: ${failed.join(", ")}`);
482
+ }
483
+ console.log(JSON.stringify({
484
+ tables: succeeded,
485
+ failed,
486
+ summary: { succeeded: succeeded.length, failed: failed.length },
487
+ }, null, 2));
488
+ return { code: failed.length > 0 ? 1 : 0, tables: succeeded, tableColumns, sampleRows };
489
+ }
490
+ export async function runDsImportCsvCommand(args) {
491
+ const result = await runDsImportCsv(args);
492
+ return result.code;
493
+ }