@kweaver-ai/kweaver-sdk 0.4.10 → 0.4.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +28 -1
- package/README.zh.md +28 -1
- package/dist/api/dataflow.d.ts +78 -0
- package/dist/api/dataflow.js +135 -0
- package/dist/api/dataviews.js +49 -0
- package/dist/auth/oauth.d.ts +6 -1
- package/dist/auth/oauth.js +240 -166
- package/dist/cli.js +3 -1
- package/dist/client.js +2 -0
- package/dist/commands/auth.js +36 -16
- package/dist/commands/bkn.js +203 -18
- package/dist/commands/ds.d.ts +16 -0
- package/dist/commands/ds.js +204 -1
- package/dist/commands/import-csv.d.ts +47 -0
- package/dist/commands/import-csv.js +111 -0
- package/dist/config/store.d.ts +2 -0
- package/dist/config/tls-env.d.ts +8 -0
- package/dist/config/tls-env.js +22 -0
- package/package.json +2 -1
package/dist/commands/bkn.js
CHANGED
|
@@ -8,11 +8,12 @@ import { ensureValidToken, formatHttpError, with401RefreshRetry } from "../auth/
|
|
|
8
8
|
import { listKnowledgeNetworks, getKnowledgeNetwork, createKnowledgeNetwork, updateKnowledgeNetwork, deleteKnowledgeNetwork, listObjectTypes, listRelationTypes, listActionTypes, getObjectType, createObjectTypes, updateObjectType, deleteObjectTypes, getRelationType, createRelationTypes, updateRelationType, deleteRelationTypes, buildKnowledgeNetwork, getBuildStatus, } from "../api/knowledge-networks.js";
|
|
9
9
|
import { objectTypeQuery, objectTypeProperties, subgraph, actionTypeQuery, actionTypeExecute, actionExecutionGet, actionLogsList, actionLogGet, actionLogCancel, } from "../api/ontology-query.js";
|
|
10
10
|
import { semanticSearch } from "../api/semantic-search.js";
|
|
11
|
-
import { listTablesWithColumns } from "../api/datasources.js";
|
|
12
|
-
import { createDataView } from "../api/dataviews.js";
|
|
11
|
+
import { listTablesWithColumns, scanMetadata, getDatasource } from "../api/datasources.js";
|
|
12
|
+
import { createDataView } from "../api/dataviews.js"; // used by runKnCreateFromDsCommand
|
|
13
13
|
import { downloadBkn, uploadBkn } from "../api/bkn-backend.js";
|
|
14
14
|
import { formatCallOutput } from "./call.js";
|
|
15
15
|
import { resolveBusinessDomain } from "../config/store.js";
|
|
16
|
+
import { runDsImportCsv } from "./ds.js";
|
|
16
17
|
export function formatSimpleKnList(text, pretty, includeDetail = false) {
|
|
17
18
|
const parsed = JSON.parse(text);
|
|
18
19
|
const entries = Array.isArray(parsed.entries) ? parsed.entries : [];
|
|
@@ -600,6 +601,8 @@ Subcommands:
|
|
|
600
601
|
get <kn-id> [options] Get knowledge network detail (use --stats or --export)
|
|
601
602
|
create [options] Create a knowledge network (empty or from --body-file)
|
|
602
603
|
create-from-ds <ds-id> --name X [--tables a,b] [--build] Create KN from datasource
|
|
604
|
+
create-from-csv <ds-id> --files <glob> --name X [--table-prefix P] [--build]
|
|
605
|
+
Import CSVs then create knowledge network
|
|
603
606
|
update <kn-id> [options] Update a knowledge network
|
|
604
607
|
delete <kn-id> Delete a knowledge network
|
|
605
608
|
build <kn-id> [--wait|--no-wait] [--timeout n] Trigger full build
|
|
@@ -646,6 +649,8 @@ export async function runKnCommand(args) {
|
|
|
646
649
|
return runKnCreateCommand(rest);
|
|
647
650
|
if (subcommand === "create-from-ds")
|
|
648
651
|
return runKnCreateFromDsCommand(rest);
|
|
652
|
+
if (subcommand === "create-from-csv")
|
|
653
|
+
return runKnCreateFromCsvCommand(rest);
|
|
649
654
|
if (subcommand === "update")
|
|
650
655
|
return runKnUpdateCommand(rest);
|
|
651
656
|
if (subcommand === "delete")
|
|
@@ -1074,20 +1079,18 @@ export function parseKnActionTypeExecuteArgs(args) {
|
|
|
1074
1079
|
timeout,
|
|
1075
1080
|
};
|
|
1076
1081
|
}
|
|
1077
|
-
const PK_CANDIDATES = new Set(["id", "pk", "key"]);
|
|
1078
|
-
const PK_TYPES = new Set(["integer", "unsigned integer", "string", "varchar", "bigint", "int"]);
|
|
1079
1082
|
const DISPLAY_HINTS = ["name", "title", "label", "display_name", "description"];
|
|
1080
|
-
|
|
1081
|
-
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
|
|
1088
|
-
return col.name;
|
|
1083
|
+
/** Detect primary key: first column (left-to-right) with all unique values in the sample. */
|
|
1084
|
+
function detectPrimaryKey(table, rows) {
|
|
1085
|
+
if (rows && rows.length > 0) {
|
|
1086
|
+
for (const col of table.columns) {
|
|
1087
|
+
const values = rows.map((r) => r[col.name]);
|
|
1088
|
+
const unique = new Set(values);
|
|
1089
|
+
if (unique.size === rows.length)
|
|
1090
|
+
return col.name;
|
|
1089
1091
|
}
|
|
1090
1092
|
}
|
|
1093
|
+
// Fallback: first column
|
|
1091
1094
|
return table.columns[0]?.name ?? "id";
|
|
1092
1095
|
}
|
|
1093
1096
|
function detectDisplayKey(table, primaryKey) {
|
|
@@ -2141,7 +2144,25 @@ function parseKnCreateFromDsArgs(args) {
|
|
|
2141
2144
|
businessDomain = resolveBusinessDomain();
|
|
2142
2145
|
return { dsId, name, tables, build, timeout, businessDomain, pretty };
|
|
2143
2146
|
}
|
|
2144
|
-
|
|
2147
|
+
/** Sanitize a table name into a BKN-safe ID (alphanumeric + underscore). */
|
|
2148
|
+
function sanitizeBknId(name) {
|
|
2149
|
+
return name.replace(/[^a-zA-Z0-9_]/g, "_").replace(/^(\d)/, "_$1");
|
|
2150
|
+
}
|
|
2151
|
+
/** Generate a BKN ObjectType YAML markdown file for a table. */
|
|
2152
|
+
function generateObjectTypeBkn(tableName, dvId, pk, dk, columns) {
|
|
2153
|
+
const safeId = sanitizeBknId(tableName);
|
|
2154
|
+
const header = `## ObjectType: ${safeId}\n\n**${tableName}**\n`;
|
|
2155
|
+
const dsTable = `### Data Source\n\n| Type | ID | Name |\n|------|-----|------|\n| data_view | ${dvId} | ${tableName} |\n`;
|
|
2156
|
+
const dpHeader = `### Data Properties\n\n| Property | Display Name | Type | Primary Key | Display Key |\n|----------|-------------|------|-------------|-------------|\n`;
|
|
2157
|
+
const dpRows = columns.map((c) => {
|
|
2158
|
+
const isPk = c.name === pk ? "yes" : "no";
|
|
2159
|
+
const isDk = c.name === dk ? "yes" : "no";
|
|
2160
|
+
return `| ${c.name} | ${c.name} | string | ${isPk} | ${isDk} |`;
|
|
2161
|
+
}).join("\n");
|
|
2162
|
+
const frontmatter = `---\ntype: object_type\nid: ${safeId}\nname: ${tableName}\n---\n\n`;
|
|
2163
|
+
return `${frontmatter}${header}\n${dsTable}\n${dpHeader}${dpRows}\n`;
|
|
2164
|
+
}
|
|
2165
|
+
async function runKnCreateFromDsCommand(args, sampleRows) {
|
|
2145
2166
|
let options;
|
|
2146
2167
|
try {
|
|
2147
2168
|
options = parseKnCreateFromDsArgs(args);
|
|
@@ -2170,6 +2191,8 @@ async function runKnCreateFromDsCommand(args) {
|
|
|
2170
2191
|
console.error("No tables available");
|
|
2171
2192
|
return 1;
|
|
2172
2193
|
}
|
|
2194
|
+
// Phase 1: Create DataViews for each table
|
|
2195
|
+
console.error(`Creating data views for ${targetTables.length} table(s) ...`);
|
|
2173
2196
|
const viewMap = {};
|
|
2174
2197
|
for (const t of targetTables) {
|
|
2175
2198
|
const dvId = await createDataView({
|
|
@@ -2181,6 +2204,7 @@ async function runKnCreateFromDsCommand(args) {
|
|
|
2181
2204
|
});
|
|
2182
2205
|
viewMap[t.name] = dvId;
|
|
2183
2206
|
}
|
|
2207
|
+
// Phase 2: Create the KN record
|
|
2184
2208
|
const knBody = JSON.stringify({
|
|
2185
2209
|
name: options.name,
|
|
2186
2210
|
branch: "main",
|
|
@@ -2193,20 +2217,25 @@ async function runKnCreateFromDsCommand(args) {
|
|
|
2193
2217
|
const knParsed = JSON.parse(knResponse);
|
|
2194
2218
|
const knItem = Array.isArray(knParsed) ? knParsed[0] : knParsed;
|
|
2195
2219
|
const knId = String(knItem?.id ?? "");
|
|
2220
|
+
console.error(`Knowledge network created: ${knId}`);
|
|
2221
|
+
// Phase 3: Create object types via REST API
|
|
2222
|
+
console.error(`Creating ${targetTables.length} object type(s) ...`);
|
|
2196
2223
|
const otResults = [];
|
|
2197
2224
|
for (const t of targetTables) {
|
|
2198
|
-
const pk = detectPrimaryKey(t);
|
|
2225
|
+
const pk = detectPrimaryKey(t, sampleRows?.[t.name]);
|
|
2199
2226
|
const dk = detectDisplayKey(t, pk);
|
|
2227
|
+
const uniqueProps = [pk, dk].filter((x, i, a) => a.indexOf(x) === i);
|
|
2200
2228
|
const entry = {
|
|
2201
2229
|
branch: "main",
|
|
2202
2230
|
name: t.name,
|
|
2203
2231
|
data_source: { type: "data_view", id: viewMap[t.name] },
|
|
2204
2232
|
primary_keys: [pk],
|
|
2205
2233
|
display_key: dk,
|
|
2206
|
-
data_properties:
|
|
2207
|
-
name:
|
|
2208
|
-
display_name:
|
|
2234
|
+
data_properties: t.columns.map((c) => ({
|
|
2235
|
+
name: c.name,
|
|
2236
|
+
display_name: c.name,
|
|
2209
2237
|
type: "string",
|
|
2238
|
+
mapped_field: { name: c.name, type: c.type || "varchar" },
|
|
2210
2239
|
})),
|
|
2211
2240
|
};
|
|
2212
2241
|
const otBody = JSON.stringify({ entries: [entry] });
|
|
@@ -2222,6 +2251,16 @@ async function runKnCreateFromDsCommand(args) {
|
|
|
2222
2251
|
id: otItem?.id ?? "",
|
|
2223
2252
|
field_count: t.columns.length,
|
|
2224
2253
|
});
|
|
2254
|
+
console.error(` Created: ${t.name} (${t.columns.length} fields, pk=${pk}, dk=${dk})`);
|
|
2255
|
+
}
|
|
2256
|
+
if (otResults.length === 0) {
|
|
2257
|
+
const errorOutput = {
|
|
2258
|
+
kn_id: knId,
|
|
2259
|
+
kn_name: options.name,
|
|
2260
|
+
error: "No object types were created",
|
|
2261
|
+
};
|
|
2262
|
+
console.log(JSON.stringify(errorOutput, null, options.pretty ? 2 : 0));
|
|
2263
|
+
return 1;
|
|
2225
2264
|
}
|
|
2226
2265
|
let statusStr = "skipped";
|
|
2227
2266
|
if (options.build) {
|
|
@@ -2749,3 +2788,149 @@ async function runKnSearchCommand(args) {
|
|
|
2749
2788
|
return 1;
|
|
2750
2789
|
}
|
|
2751
2790
|
}
|
|
2791
|
+
const KN_CREATE_FROM_CSV_HELP = `kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]
|
|
2792
|
+
|
|
2793
|
+
Import CSV files into datasource, then create a knowledge network.
|
|
2794
|
+
|
|
2795
|
+
Options:
|
|
2796
|
+
--files <s> CSV file paths (comma-separated or glob, required)
|
|
2797
|
+
--name <s> Knowledge network name (required)
|
|
2798
|
+
--table-prefix <s> Table name prefix (default: none)
|
|
2799
|
+
--batch-size <n> Rows per batch (default: 500)
|
|
2800
|
+
--tables <a,b> Tables to include in KN (default: all imported)
|
|
2801
|
+
--build (default) Build after creation
|
|
2802
|
+
--no-build Skip build
|
|
2803
|
+
--timeout <n> Build timeout in seconds (default: 300)
|
|
2804
|
+
-bd, --biz-domain Business domain (default: bd_public)`;
|
|
2805
|
+
function parseKnCreateFromCsvArgs(args) {
|
|
2806
|
+
let dsId = "";
|
|
2807
|
+
let files = "";
|
|
2808
|
+
let name = "";
|
|
2809
|
+
let tablePrefix = "";
|
|
2810
|
+
let batchSize = 500;
|
|
2811
|
+
let tablesStr = "";
|
|
2812
|
+
let build = true;
|
|
2813
|
+
let timeout = 300;
|
|
2814
|
+
let businessDomain = "";
|
|
2815
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
2816
|
+
const arg = args[i];
|
|
2817
|
+
if (arg === "--help" || arg === "-h")
|
|
2818
|
+
throw new Error("help");
|
|
2819
|
+
if (arg === "--files" && args[i + 1]) {
|
|
2820
|
+
files = args[++i];
|
|
2821
|
+
continue;
|
|
2822
|
+
}
|
|
2823
|
+
if (arg === "--name" && args[i + 1]) {
|
|
2824
|
+
name = args[++i];
|
|
2825
|
+
continue;
|
|
2826
|
+
}
|
|
2827
|
+
if (arg === "--table-prefix" && args[i + 1]) {
|
|
2828
|
+
tablePrefix = args[++i];
|
|
2829
|
+
continue;
|
|
2830
|
+
}
|
|
2831
|
+
if (arg === "--batch-size" && args[i + 1]) {
|
|
2832
|
+
batchSize = parseInt(args[++i], 10);
|
|
2833
|
+
if (Number.isNaN(batchSize) || batchSize < 1)
|
|
2834
|
+
batchSize = 500;
|
|
2835
|
+
continue;
|
|
2836
|
+
}
|
|
2837
|
+
if (arg === "--tables" && args[i + 1]) {
|
|
2838
|
+
tablesStr = args[++i];
|
|
2839
|
+
continue;
|
|
2840
|
+
}
|
|
2841
|
+
if (arg === "--build") {
|
|
2842
|
+
build = true;
|
|
2843
|
+
continue;
|
|
2844
|
+
}
|
|
2845
|
+
if (arg === "--no-build") {
|
|
2846
|
+
build = false;
|
|
2847
|
+
continue;
|
|
2848
|
+
}
|
|
2849
|
+
if (arg === "--timeout" && args[i + 1]) {
|
|
2850
|
+
timeout = parseInt(args[++i], 10);
|
|
2851
|
+
if (Number.isNaN(timeout) || timeout < 1)
|
|
2852
|
+
timeout = 300;
|
|
2853
|
+
continue;
|
|
2854
|
+
}
|
|
2855
|
+
if ((arg === "-bd" || arg === "--biz-domain") && args[i + 1]) {
|
|
2856
|
+
businessDomain = args[++i];
|
|
2857
|
+
continue;
|
|
2858
|
+
}
|
|
2859
|
+
if (!arg.startsWith("-") && !dsId) {
|
|
2860
|
+
dsId = arg;
|
|
2861
|
+
}
|
|
2862
|
+
}
|
|
2863
|
+
const tables = tablesStr ? tablesStr.split(",").map((s) => s.trim()).filter(Boolean) : [];
|
|
2864
|
+
if (!dsId || !files || !name) {
|
|
2865
|
+
throw new Error("Usage: kweaver bkn create-from-csv <ds-id> --files <glob> --name X [options]");
|
|
2866
|
+
}
|
|
2867
|
+
if (!businessDomain)
|
|
2868
|
+
businessDomain = resolveBusinessDomain();
|
|
2869
|
+
return { dsId, files, name, tablePrefix, batchSize, tables, build, timeout, businessDomain };
|
|
2870
|
+
}
|
|
2871
|
+
async function runKnCreateFromCsvCommand(args) {
|
|
2872
|
+
let options;
|
|
2873
|
+
try {
|
|
2874
|
+
options = parseKnCreateFromCsvArgs(args);
|
|
2875
|
+
}
|
|
2876
|
+
catch (error) {
|
|
2877
|
+
if (error instanceof Error && error.message === "help") {
|
|
2878
|
+
console.log(KN_CREATE_FROM_CSV_HELP);
|
|
2879
|
+
return 0;
|
|
2880
|
+
}
|
|
2881
|
+
console.error(formatHttpError(error));
|
|
2882
|
+
return 1;
|
|
2883
|
+
}
|
|
2884
|
+
// Phase 1: Import CSVs
|
|
2885
|
+
console.error("Phase 1: Importing CSVs ...");
|
|
2886
|
+
const importArgs = [
|
|
2887
|
+
options.dsId,
|
|
2888
|
+
"--files", options.files,
|
|
2889
|
+
"--table-prefix", options.tablePrefix,
|
|
2890
|
+
"--batch-size", String(options.batchSize),
|
|
2891
|
+
"-bd", options.businessDomain,
|
|
2892
|
+
];
|
|
2893
|
+
const importResult = await runDsImportCsv(importArgs);
|
|
2894
|
+
if (importResult.code !== 0) {
|
|
2895
|
+
console.error("CSV import failed — aborting KN creation");
|
|
2896
|
+
return importResult.code;
|
|
2897
|
+
}
|
|
2898
|
+
// Phase 1.5: Scan datasource metadata so platform discovers newly imported tables
|
|
2899
|
+
console.error("Scanning datasource metadata ...");
|
|
2900
|
+
try {
|
|
2901
|
+
const token = await ensureValidToken();
|
|
2902
|
+
const dsBody = await getDatasource({
|
|
2903
|
+
baseUrl: token.baseUrl,
|
|
2904
|
+
accessToken: token.accessToken,
|
|
2905
|
+
id: options.dsId,
|
|
2906
|
+
businessDomain: options.businessDomain,
|
|
2907
|
+
});
|
|
2908
|
+
const dsParsed = JSON.parse(dsBody);
|
|
2909
|
+
await scanMetadata({
|
|
2910
|
+
baseUrl: token.baseUrl,
|
|
2911
|
+
accessToken: token.accessToken,
|
|
2912
|
+
id: options.dsId,
|
|
2913
|
+
dsType: dsParsed.type ?? "mysql",
|
|
2914
|
+
businessDomain: options.businessDomain,
|
|
2915
|
+
});
|
|
2916
|
+
}
|
|
2917
|
+
catch (err) {
|
|
2918
|
+
console.error(`Scan warning (continuing): ${String(err)}`);
|
|
2919
|
+
}
|
|
2920
|
+
// Phase 2: Create KN from datasource
|
|
2921
|
+
console.error("Phase 2: Creating knowledge network ...");
|
|
2922
|
+
const tableNames = options.tables.length > 0 ? options.tables : importResult.tables;
|
|
2923
|
+
if (tableNames.length === 0) {
|
|
2924
|
+
console.error("No tables available for KN creation — aborting");
|
|
2925
|
+
return 1;
|
|
2926
|
+
}
|
|
2927
|
+
const knArgs = [
|
|
2928
|
+
options.dsId,
|
|
2929
|
+
"--name", options.name,
|
|
2930
|
+
"--tables", tableNames.join(","),
|
|
2931
|
+
options.build ? "--build" : "--no-build",
|
|
2932
|
+
"--timeout", String(options.timeout),
|
|
2933
|
+
"-bd", options.businessDomain,
|
|
2934
|
+
];
|
|
2935
|
+
return runKnCreateFromDsCommand(knArgs, importResult.sampleRows);
|
|
2936
|
+
}
|
package/dist/commands/ds.d.ts
CHANGED
|
@@ -5,3 +5,19 @@ export declare function parseDsListArgs(args: string[]): {
|
|
|
5
5
|
businessDomain: string;
|
|
6
6
|
pretty: boolean;
|
|
7
7
|
};
|
|
8
|
+
export declare function parseImportCsvArgs(args: string[]): {
|
|
9
|
+
datasourceId: string;
|
|
10
|
+
files: string;
|
|
11
|
+
tablePrefix: string;
|
|
12
|
+
batchSize: number;
|
|
13
|
+
businessDomain: string;
|
|
14
|
+
};
|
|
15
|
+
export declare function resolveFiles(pattern: string): Promise<string[]>;
|
|
16
|
+
export interface ImportCsvResult {
|
|
17
|
+
code: number;
|
|
18
|
+
tables: string[];
|
|
19
|
+
tableColumns: Record<string, string[]>;
|
|
20
|
+
sampleRows: Record<string, Array<Record<string, string | null>>>;
|
|
21
|
+
}
|
|
22
|
+
export declare function runDsImportCsv(args: string[]): Promise<ImportCsvResult>;
|
|
23
|
+
export declare function runDsImportCsvCommand(args: string[]): Promise<number>;
|
package/dist/commands/ds.js
CHANGED
|
@@ -1,8 +1,13 @@
|
|
|
1
1
|
import { createInterface } from "node:readline";
|
|
2
|
+
import { statSync } from "node:fs";
|
|
3
|
+
import { glob } from "node:fs/promises";
|
|
4
|
+
import { resolve as resolvePath } from "node:path";
|
|
2
5
|
import { ensureValidToken, formatHttpError, with401RefreshRetry } from "../auth/oauth.js";
|
|
3
6
|
import { testDatasource, createDatasource, listDatasources, getDatasource, deleteDatasource, listTablesWithColumns, } from "../api/datasources.js";
|
|
4
7
|
import { formatCallOutput } from "./call.js";
|
|
5
8
|
import { resolveBusinessDomain } from "../config/store.js";
|
|
9
|
+
import { parseCsvFile, buildTableName, splitBatches, buildFieldMappings, buildDagBody, } from "./import-csv.js";
|
|
10
|
+
import { executeDataflow } from "../api/dataflow.js";
|
|
6
11
|
function confirmYes(prompt) {
|
|
7
12
|
return new Promise((resolve) => {
|
|
8
13
|
const rl = createInterface({ input: process.stdin, output: process.stdout });
|
|
@@ -32,7 +37,9 @@ Subcommands:
|
|
|
32
37
|
delete <id> [-y] Delete a datasource
|
|
33
38
|
tables <id> [--keyword X] List tables with columns
|
|
34
39
|
connect <db_type> <host> <port> <database> --account X --password Y [--schema Z] [--name N]
|
|
35
|
-
Test connectivity, register datasource, and discover tables
|
|
40
|
+
Test connectivity, register datasource, and discover tables.
|
|
41
|
+
import-csv <ds-id> --files <glob_or_list> [--table-prefix X] [--batch-size N]
|
|
42
|
+
Import CSV files into datasource tables via dataflow API.`);
|
|
36
43
|
return 0;
|
|
37
44
|
}
|
|
38
45
|
const dispatch = () => {
|
|
@@ -46,6 +53,8 @@ Subcommands:
|
|
|
46
53
|
return runDsTablesCommand(rest);
|
|
47
54
|
if (subcommand === "connect")
|
|
48
55
|
return runDsConnectCommand(rest);
|
|
56
|
+
if (subcommand === "import-csv")
|
|
57
|
+
return runDsImportCsvCommand(rest);
|
|
49
58
|
return Promise.resolve(-1);
|
|
50
59
|
};
|
|
51
60
|
try {
|
|
@@ -288,3 +297,197 @@ async function runDsConnectCommand(args) {
|
|
|
288
297
|
console.log(JSON.stringify(output, null, 2));
|
|
289
298
|
return 0;
|
|
290
299
|
}
|
|
300
|
+
// ── import-csv ────────────────────────────────────────────────────────────────
|
|
301
|
+
const IMPORT_CSV_HELP = `kweaver ds import-csv <ds-id> --files <glob_or_list> [options]
|
|
302
|
+
|
|
303
|
+
Import CSV files into datasource tables via dataflow API.
|
|
304
|
+
|
|
305
|
+
Options:
|
|
306
|
+
--files <s> CSV file paths (comma-separated or glob pattern, required)
|
|
307
|
+
--table-prefix <s> Table name prefix (default: none)
|
|
308
|
+
--batch-size <n> Rows per batch (default: 500, range: 1-10000)
|
|
309
|
+
-bd, --biz-domain Business domain (default: bd_public)`;
|
|
310
|
+
export function parseImportCsvArgs(args) {
|
|
311
|
+
let datasourceId = "";
|
|
312
|
+
let files = "";
|
|
313
|
+
let tablePrefix = "";
|
|
314
|
+
let batchSize = 500;
|
|
315
|
+
let businessDomain = "";
|
|
316
|
+
for (let i = 0; i < args.length; i += 1) {
|
|
317
|
+
const arg = args[i];
|
|
318
|
+
if (arg === "--help" || arg === "-h")
|
|
319
|
+
throw new Error("help");
|
|
320
|
+
if (arg === "--files" && args[i + 1]) {
|
|
321
|
+
files = args[++i];
|
|
322
|
+
continue;
|
|
323
|
+
}
|
|
324
|
+
if (arg === "--table-prefix" && args[i + 1]) {
|
|
325
|
+
tablePrefix = args[++i];
|
|
326
|
+
continue;
|
|
327
|
+
}
|
|
328
|
+
if (arg === "--batch-size" && args[i + 1]) {
|
|
329
|
+
const n = parseInt(args[++i], 10);
|
|
330
|
+
if (Number.isNaN(n) || n < 1 || n > 10000) {
|
|
331
|
+
throw new Error("--batch-size must be between 1 and 10000");
|
|
332
|
+
}
|
|
333
|
+
batchSize = n;
|
|
334
|
+
continue;
|
|
335
|
+
}
|
|
336
|
+
if ((arg === "-bd" || arg === "--biz-domain") && args[i + 1]) {
|
|
337
|
+
businessDomain = args[++i];
|
|
338
|
+
continue;
|
|
339
|
+
}
|
|
340
|
+
if (!arg.startsWith("-") && !datasourceId) {
|
|
341
|
+
datasourceId = arg;
|
|
342
|
+
}
|
|
343
|
+
}
|
|
344
|
+
if (!businessDomain)
|
|
345
|
+
businessDomain = resolveBusinessDomain();
|
|
346
|
+
return { datasourceId, files, tablePrefix, batchSize, businessDomain };
|
|
347
|
+
}
|
|
348
|
+
export async function resolveFiles(pattern) {
|
|
349
|
+
const parts = pattern.split(",").map((p) => p.trim()).filter(Boolean);
|
|
350
|
+
const result = [];
|
|
351
|
+
for (const part of parts) {
|
|
352
|
+
if (part.includes("*") || part.includes("?")) {
|
|
353
|
+
const matched = [];
|
|
354
|
+
for await (const entry of glob(part)) {
|
|
355
|
+
const p = String(entry);
|
|
356
|
+
if (/\.csv$/i.test(p)) {
|
|
357
|
+
matched.push(resolvePath(p));
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
result.push(...matched);
|
|
361
|
+
}
|
|
362
|
+
else {
|
|
363
|
+
const abs = resolvePath(part);
|
|
364
|
+
statSync(abs); // throws if file does not exist
|
|
365
|
+
result.push(abs);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
if (result.length === 0) {
|
|
369
|
+
throw new Error(`No CSV files matched: ${pattern}`);
|
|
370
|
+
}
|
|
371
|
+
return result;
|
|
372
|
+
}
|
|
373
|
+
export async function runDsImportCsv(args) {
|
|
374
|
+
let options;
|
|
375
|
+
try {
|
|
376
|
+
options = parseImportCsvArgs(args);
|
|
377
|
+
}
|
|
378
|
+
catch (error) {
|
|
379
|
+
if (error instanceof Error && error.message === "help") {
|
|
380
|
+
console.log(IMPORT_CSV_HELP);
|
|
381
|
+
return { code: 0, tables: [], tableColumns: {}, sampleRows: {} };
|
|
382
|
+
}
|
|
383
|
+
throw error;
|
|
384
|
+
}
|
|
385
|
+
if (!options.datasourceId) {
|
|
386
|
+
console.error("Usage: kweaver ds import-csv <ds-id> --files <glob_or_list> [options]");
|
|
387
|
+
return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
|
|
388
|
+
}
|
|
389
|
+
if (!options.files) {
|
|
390
|
+
console.error("Error: --files is required");
|
|
391
|
+
return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
|
|
392
|
+
}
|
|
393
|
+
// 1. Get credentials
|
|
394
|
+
const token = await ensureValidToken();
|
|
395
|
+
const base = { baseUrl: token.baseUrl, accessToken: token.accessToken };
|
|
396
|
+
// 2. Resolve glob / file list
|
|
397
|
+
const filePaths = await resolveFiles(options.files);
|
|
398
|
+
// 3. Get datasource type
|
|
399
|
+
const dsBody = await getDatasource({ ...base, id: options.datasourceId, businessDomain: options.businessDomain });
|
|
400
|
+
const dsData = JSON.parse(dsBody);
|
|
401
|
+
const datasourceType = String(dsData.type ?? dsData.ds_type ?? dsData.data_type ?? "mysql");
|
|
402
|
+
const parsed = [];
|
|
403
|
+
for (const filePath of filePaths) {
|
|
404
|
+
const tableName = buildTableName(filePath, options.tablePrefix);
|
|
405
|
+
let csvData;
|
|
406
|
+
try {
|
|
407
|
+
csvData = await parseCsvFile(filePath);
|
|
408
|
+
}
|
|
409
|
+
catch (err) {
|
|
410
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
411
|
+
console.error(`[${tableName}] skipping — parse error: ${msg}`);
|
|
412
|
+
continue;
|
|
413
|
+
}
|
|
414
|
+
if (csvData.headers.length === 0) {
|
|
415
|
+
console.error(`[${tableName}] skipping — no headers`);
|
|
416
|
+
continue;
|
|
417
|
+
}
|
|
418
|
+
if (csvData.rows.length === 0) {
|
|
419
|
+
console.error(`[${tableName}] skipping — no rows`);
|
|
420
|
+
continue;
|
|
421
|
+
}
|
|
422
|
+
parsed.push({ filePath, tableName, headers: csvData.headers, rows: csvData.rows });
|
|
423
|
+
}
|
|
424
|
+
if (parsed.length === 0) {
|
|
425
|
+
console.error("All files were skipped — nothing to import");
|
|
426
|
+
return { code: 1, tables: [], tableColumns: {}, sampleRows: {} };
|
|
427
|
+
}
|
|
428
|
+
// Phase 2: Import each file in batches
|
|
429
|
+
const succeeded = [];
|
|
430
|
+
const failed = [];
|
|
431
|
+
const tableColumns = {};
|
|
432
|
+
const sampleRows = {};
|
|
433
|
+
for (const { tableName, headers, rows } of parsed) {
|
|
434
|
+
const batches = splitBatches(rows, options.batchSize);
|
|
435
|
+
const fieldMappings = buildFieldMappings(headers);
|
|
436
|
+
let batchFailed = false;
|
|
437
|
+
for (let bIdx = 0; bIdx < batches.length; bIdx += 1) {
|
|
438
|
+
const batch = batches[bIdx];
|
|
439
|
+
const tableExist = bIdx > 0;
|
|
440
|
+
const batchLabel = `${bIdx + 1}/${batches.length}`;
|
|
441
|
+
const rowCount = batch.length;
|
|
442
|
+
const dagBody = buildDagBody({
|
|
443
|
+
datasourceId: options.datasourceId,
|
|
444
|
+
datasourceType,
|
|
445
|
+
tableName,
|
|
446
|
+
tableExist,
|
|
447
|
+
data: batch,
|
|
448
|
+
fieldMappings,
|
|
449
|
+
});
|
|
450
|
+
const t0 = Date.now();
|
|
451
|
+
process.stderr.write(`[${tableName}] batch ${batchLabel} (${rowCount} rows)... `);
|
|
452
|
+
try {
|
|
453
|
+
await executeDataflow({
|
|
454
|
+
...base,
|
|
455
|
+
businessDomain: options.businessDomain,
|
|
456
|
+
body: dagBody,
|
|
457
|
+
});
|
|
458
|
+
const elapsed = ((Date.now() - t0) / 1000).toFixed(1);
|
|
459
|
+
process.stderr.write(`${elapsed}s\n`);
|
|
460
|
+
}
|
|
461
|
+
catch (err) {
|
|
462
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
463
|
+
process.stderr.write(`FAILED\n`);
|
|
464
|
+
console.error(`[${tableName}] batch ${batchLabel} error: ${msg}`);
|
|
465
|
+
batchFailed = true;
|
|
466
|
+
break;
|
|
467
|
+
}
|
|
468
|
+
}
|
|
469
|
+
if (batchFailed) {
|
|
470
|
+
failed.push(tableName);
|
|
471
|
+
}
|
|
472
|
+
else {
|
|
473
|
+
succeeded.push(tableName);
|
|
474
|
+
tableColumns[tableName] = headers;
|
|
475
|
+
sampleRows[tableName] = parsed.find((p) => p.tableName === tableName)?.rows.slice(0, 100) ?? [];
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
// Summary
|
|
479
|
+
console.error(`\nImport complete: ${succeeded.length} succeeded, ${failed.length} failed.`);
|
|
480
|
+
if (failed.length > 0) {
|
|
481
|
+
console.error(`Failed tables: ${failed.join(", ")}`);
|
|
482
|
+
}
|
|
483
|
+
console.log(JSON.stringify({
|
|
484
|
+
tables: succeeded,
|
|
485
|
+
failed,
|
|
486
|
+
summary: { succeeded: succeeded.length, failed: failed.length },
|
|
487
|
+
}, null, 2));
|
|
488
|
+
return { code: failed.length > 0 ? 1 : 0, tables: succeeded, tableColumns, sampleRows };
|
|
489
|
+
}
|
|
490
|
+
export async function runDsImportCsvCommand(args) {
|
|
491
|
+
const result = await runDsImportCsv(args);
|
|
492
|
+
return result.code;
|
|
493
|
+
}
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import type { DataflowCreateBody } from "../api/dataflow.js";
|
|
2
|
+
export interface CsvData {
|
|
3
|
+
headers: string[];
|
|
4
|
+
rows: Array<Record<string, string | null>>;
|
|
5
|
+
}
|
|
6
|
+
export interface FieldMapping {
|
|
7
|
+
source: {
|
|
8
|
+
name: string;
|
|
9
|
+
};
|
|
10
|
+
target: {
|
|
11
|
+
name: string;
|
|
12
|
+
data_type: string;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export interface DagBodyOptions {
|
|
16
|
+
datasourceId: string;
|
|
17
|
+
datasourceType: string;
|
|
18
|
+
tableName: string;
|
|
19
|
+
tableExist: boolean;
|
|
20
|
+
data: Array<Record<string, string | null>>;
|
|
21
|
+
fieldMappings: FieldMapping[];
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Read a CSV file and return its headers and rows.
|
|
25
|
+
* - Strips UTF-8 BOM if present
|
|
26
|
+
* - Converts empty strings to null
|
|
27
|
+
* - Throws on column count mismatch
|
|
28
|
+
*/
|
|
29
|
+
export declare function parseCsvFile(filePath: string): Promise<CsvData>;
|
|
30
|
+
/**
|
|
31
|
+
* Derive a table name from a file path: strip .csv (case-insensitive) and prepend prefix.
|
|
32
|
+
*/
|
|
33
|
+
export declare function buildTableName(filePath: string, prefix: string): string;
|
|
34
|
+
/**
|
|
35
|
+
* Split an array into chunks of at most `batchSize` elements.
|
|
36
|
+
*/
|
|
37
|
+
export declare function splitBatches<T>(rows: T[], batchSize: number): T[][];
|
|
38
|
+
/**
|
|
39
|
+
* Build field mapping descriptors from CSV headers.
|
|
40
|
+
* All target fields default to VARCHAR(512).
|
|
41
|
+
*/
|
|
42
|
+
export declare function buildFieldMappings(headers: string[]): FieldMapping[];
|
|
43
|
+
/**
|
|
44
|
+
* Construct a DataflowCreateBody for a CSV → database write operation.
|
|
45
|
+
* The DAG has two steps: a manual trigger and the database write.
|
|
46
|
+
*/
|
|
47
|
+
export declare function buildDagBody(options: DagBodyOptions): DataflowCreateBody;
|