@kweaver-ai/kweaver-sdk 0.7.2 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -1
- package/dist/api/datasources.d.ts +7 -0
- package/dist/api/datasources.js +8 -0
- package/dist/api/toolboxes.d.ts +2 -0
- package/dist/api/toolboxes.js +2 -1
- package/dist/cli.js +18 -2
- package/dist/commands/auth.js +42 -7
- package/dist/commands/bkn-ops.d.ts +2 -1
- package/dist/commands/bkn-ops.js +69 -34
- package/dist/commands/bkn-utils.d.ts +26 -2
- package/dist/commands/bkn-utils.js +66 -9
- package/dist/commands/dataflow.js +194 -20
- package/dist/commands/ds.d.ts +0 -1
- package/dist/commands/ds.js +19 -9
- package/dist/commands/import-csv.d.ts +0 -2
- package/dist/commands/import-csv.js +2 -4
- package/dist/commands/tool.d.ts +1 -0
- package/dist/commands/tool.js +12 -0
- package/dist/config/store.d.ts +1 -0
- package/dist/config/store.js +17 -0
- package/dist/resources/toolboxes.d.ts +2 -0
- package/dist/templates/bkn/document/manifest.json +12 -0
- package/dist/templates/bkn/document/template.json +757 -0
- package/dist/templates/dataflow/unstructured/manifest.json +11 -0
- package/dist/templates/dataflow/unstructured/template.json +63 -0
- package/dist/templates/dataset/document/manifest.json +10 -0
- package/dist/templates/dataset/document/template.json +23 -0
- package/dist/templates/dataset/document-content/manifest.json +10 -0
- package/dist/templates/dataset/document-content/template.json +29 -0
- package/dist/templates/dataset/document-element/manifest.json +10 -0
- package/dist/templates/dataset/document-element/template.json +21 -0
- package/dist/utils/template-loader.d.ts +40 -0
- package/dist/utils/template-loader.js +129 -0
- package/package.json +1 -1
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "unstructured",
|
|
3
|
+
"type": "dataflow",
|
|
4
|
+
"description": "非结构化文档处理流程",
|
|
5
|
+
"arguments": [
|
|
6
|
+
{ "name": "title", "required": true, "description": "数据流标题", "type": "string" },
|
|
7
|
+
{ "name": "content_dataset_id", "required": true, "description": "内容数据集 ID", "type": "string" },
|
|
8
|
+
{ "name": "document_dataset_id", "required": true, "description": "文档数据集 ID", "type": "string" },
|
|
9
|
+
{ "name": "element_dataset_id", "required": true, "description": "元素数据集 ID", "type": "string" }
|
|
10
|
+
]
|
|
11
|
+
}
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
{
|
|
2
|
+
"title": "{{title}}",
|
|
3
|
+
"steps": [
|
|
4
|
+
{
|
|
5
|
+
"id": "0",
|
|
6
|
+
"title": "",
|
|
7
|
+
"operator": "@trigger/dataflow-doc"
|
|
8
|
+
},
|
|
9
|
+
{
|
|
10
|
+
"id": "1",
|
|
11
|
+
"title": "",
|
|
12
|
+
"operator": "@content/file_parse",
|
|
13
|
+
"parameters": {
|
|
14
|
+
"docid": "{{__0.id}}",
|
|
15
|
+
"model": "embedding",
|
|
16
|
+
"slice_vector": "slice_vector",
|
|
17
|
+
"source_type": "docid",
|
|
18
|
+
"version": "{{__0.rev}}"
|
|
19
|
+
}
|
|
20
|
+
},
|
|
21
|
+
{
|
|
22
|
+
"id": "1001",
|
|
23
|
+
"title": "写入向量",
|
|
24
|
+
"operator": "@dataset/write-docs",
|
|
25
|
+
"parameters": {
|
|
26
|
+
"dataset_id": "{{content_dataset_id}}",
|
|
27
|
+
"documents": "{{__1.chunks}}"
|
|
28
|
+
}
|
|
29
|
+
},
|
|
30
|
+
{
|
|
31
|
+
"id": "1002",
|
|
32
|
+
"title": "写入元素",
|
|
33
|
+
"operator": "@dataset/write-docs",
|
|
34
|
+
"parameters": {
|
|
35
|
+
"dataset_id": "{{element_dataset_id}}",
|
|
36
|
+
"documents": "{{__1.content_list}}"
|
|
37
|
+
}
|
|
38
|
+
},
|
|
39
|
+
{
|
|
40
|
+
"id": "1003",
|
|
41
|
+
"title": "写入文件元信息",
|
|
42
|
+
"operator": "@dataset/write-docs",
|
|
43
|
+
"parameters": {
|
|
44
|
+
"dataset_id": "{{document_dataset_id}}",
|
|
45
|
+
"documents": [
|
|
46
|
+
{
|
|
47
|
+
"document_id": "{{__0.id}}",
|
|
48
|
+
"doc_name": "{{__0.name}}"
|
|
49
|
+
}
|
|
50
|
+
]
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
],
|
|
54
|
+
"trigger_config": {
|
|
55
|
+
"operator": "@trigger/manual",
|
|
56
|
+
"dataSource": {
|
|
57
|
+
"operator": "",
|
|
58
|
+
"parameters": {
|
|
59
|
+
"accessorid": "00000000-0000-0000-0000-000000000000"
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "document",
|
|
3
|
+
"type": "dataset",
|
|
4
|
+
"description": "文档元信息数据集",
|
|
5
|
+
"arguments": [
|
|
6
|
+
{ "name": "name", "required": true, "description": "数据集名称", "type": "string" },
|
|
7
|
+
{ "name": "catalog_id", "required": false, "default": "adp_bkn_catalog", "description": "所属目录ID", "type": "string" },
|
|
8
|
+
{ "name": "source_identifier", "required": false, "default": "", "description": "数据源标识符,为空时自动生成", "type": "string" }
|
|
9
|
+
]
|
|
10
|
+
}
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
{
|
|
2
|
+
"catalog_id": "{{catalog_id}}",
|
|
3
|
+
"name": "{{name}}",
|
|
4
|
+
"category": "dataset",
|
|
5
|
+
"status": "active",
|
|
6
|
+
"description": "文档元信息数据集",
|
|
7
|
+
"source_identifier": "{{source_identifier}}",
|
|
8
|
+
"schema_definition": [
|
|
9
|
+
{ "name": "id", "type": "keyword" },
|
|
10
|
+
{ "name": "document_id", "type": "keyword" },
|
|
11
|
+
{ "name": "doc_name", "type": "text", "features": [
|
|
12
|
+
{ "name": "doc_name_keyword", "feature_type": "keyword", "ref_property": "doc_name" },
|
|
13
|
+
{ "name": "doc_name_fulltext", "feature_type": "fulltext", "ref_property": "doc_name", "config": { "analyzer": "standard" } }
|
|
14
|
+
]},
|
|
15
|
+
{ "name": "doc_md5", "type": "keyword" },
|
|
16
|
+
{ "name": "pages", "type": "integer" },
|
|
17
|
+
{ "name": "file_type", "type": "keyword" },
|
|
18
|
+
{ "name": "creator_id", "type": "keyword" },
|
|
19
|
+
{ "name": "created_at", "type": "text" },
|
|
20
|
+
{ "name": "updated_at", "type": "text" },
|
|
21
|
+
{ "name": "@timestamp", "type": "long" }
|
|
22
|
+
]
|
|
23
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "document-content",
|
|
3
|
+
"type": "dataset",
|
|
4
|
+
"description": "文档切片及向量数据集",
|
|
5
|
+
"arguments": [
|
|
6
|
+
{ "name": "name", "required": true, "description": "数据集名称", "type": "string" },
|
|
7
|
+
{ "name": "catalog_id", "required": false, "default": "adp_bkn_catalog", "description": "所属目录ID", "type": "string" },
|
|
8
|
+
{ "name": "source_identifier", "required": false, "default": "", "description": "数据源标识符,为空时自动生成", "type": "string" }
|
|
9
|
+
]
|
|
10
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
{
|
|
2
|
+
"catalog_id": "{{catalog_id}}",
|
|
3
|
+
"name": "{{name}}",
|
|
4
|
+
"category": "dataset",
|
|
5
|
+
"status": "active",
|
|
6
|
+
"description": "文档切片及向量数据集",
|
|
7
|
+
"source_identifier": "{{source_identifier}}",
|
|
8
|
+
"schema_definition": [
|
|
9
|
+
{ "name": "id", "type": "keyword" },
|
|
10
|
+
{ "name": "document_id", "type": "keyword" },
|
|
11
|
+
{ "name": "slice_md5", "type": "keyword" },
|
|
12
|
+
{ "name": "deduplication_id", "type": "keyword" },
|
|
13
|
+
{ "name": "segment_id", "type": "integer" },
|
|
14
|
+
{ "name": "slice_type", "type": "integer" },
|
|
15
|
+
{ "name": "slice_content", "type": "text", "features": [
|
|
16
|
+
{ "name": "slice_content_fulltext", "feature_type": "fulltext", "ref_property": "slice_content", "config": { "analyzer": "standard" } }
|
|
17
|
+
]},
|
|
18
|
+
{ "name": "text_vector", "type": "vector", "features": [
|
|
19
|
+
{ "name": "text_vector", "feature_type": "vector", "ref_property": "text_vector", "config": { "dimension": 768, "method": { "name": "hnsw", "engine": "lucene", "parameters": { "ef_construction": 256 } } } }
|
|
20
|
+
]},
|
|
21
|
+
{ "name": "img_path", "type": "keyword" },
|
|
22
|
+
{ "name": "image_vector", "type": "vector", "features": [
|
|
23
|
+
{ "name": "image_vector", "feature_type": "vector", "ref_property": "image_vector", "config": { "dimension": 512, "method": { "name": "hnsw", "engine": "lucene", "parameters": { "ef_construction": 256 } } } }
|
|
24
|
+
]},
|
|
25
|
+
{ "name": "created_at", "type": "text" },
|
|
26
|
+
{ "name": "updated_at", "type": "text" },
|
|
27
|
+
{ "name": "@timestamp", "type": "long" }
|
|
28
|
+
]
|
|
29
|
+
}
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "document-element",
|
|
3
|
+
"type": "dataset",
|
|
4
|
+
"description": "文档结构化元素数据集",
|
|
5
|
+
"arguments": [
|
|
6
|
+
{ "name": "name", "required": true, "description": "数据集名称", "type": "string" },
|
|
7
|
+
{ "name": "catalog_id", "required": false, "default": "adp_bkn_catalog", "description": "所属目录ID", "type": "string" },
|
|
8
|
+
{ "name": "source_identifier", "required": false, "default": "", "description": "数据源标识符,为空时自动生成", "type": "string" }
|
|
9
|
+
]
|
|
10
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
{
|
|
2
|
+
"catalog_id": "{{catalog_id}}",
|
|
3
|
+
"name": "{{name}}",
|
|
4
|
+
"category": "dataset",
|
|
5
|
+
"status": "active",
|
|
6
|
+
"description": "文档结构化元素数据集",
|
|
7
|
+
"source_identifier": "{{source_identifier}}",
|
|
8
|
+
"schema_definition": [
|
|
9
|
+
{ "name": "id", "type": "keyword" },
|
|
10
|
+
{ "name": "element_id", "type": "keyword" },
|
|
11
|
+
{ "name": "document_id", "type": "keyword" },
|
|
12
|
+
{ "name": "element_type", "type": "keyword" },
|
|
13
|
+
{ "name": "parent_id", "type": "keyword" },
|
|
14
|
+
{ "name": "level", "type": "integer" },
|
|
15
|
+
{ "name": "content", "type": "text", "features": [
|
|
16
|
+
{ "name": "content_fulltext", "feature_type": "fulltext", "ref_property": "content", "config": { "analyzer": "standard" } }
|
|
17
|
+
]},
|
|
18
|
+
{ "name": "metadata", "type": "object" },
|
|
19
|
+
{ "name": "@timestamp", "type": "long" }
|
|
20
|
+
]
|
|
21
|
+
}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
export interface TemplateManifest {
|
|
2
|
+
name: string;
|
|
3
|
+
type: "dataset" | "bkn" | "dataflow";
|
|
4
|
+
description: string;
|
|
5
|
+
arguments: Array<{
|
|
6
|
+
name: string;
|
|
7
|
+
required: boolean;
|
|
8
|
+
description: string;
|
|
9
|
+
type: "string" | "integer" | "boolean" | "array";
|
|
10
|
+
default?: unknown;
|
|
11
|
+
}>;
|
|
12
|
+
}
|
|
13
|
+
export interface LoadedTemplate {
|
|
14
|
+
template: Record<string, unknown>;
|
|
15
|
+
manifest: TemplateManifest;
|
|
16
|
+
templatePath: string;
|
|
17
|
+
}
|
|
18
|
+
/**
|
|
19
|
+
* Generate a unique source identifier with prefix
|
|
20
|
+
*/
|
|
21
|
+
export declare function generateSourceIdentifier(prefix: string): string;
|
|
22
|
+
/**
|
|
23
|
+
* Render template with arguments, applying defaults and validation
|
|
24
|
+
*/
|
|
25
|
+
export declare function renderTemplate(template: Record<string, unknown>, manifest: TemplateManifest, args: Record<string, unknown>): Record<string, unknown>;
|
|
26
|
+
/**
|
|
27
|
+
* Load template from directory
|
|
28
|
+
*/
|
|
29
|
+
export declare function loadTemplate(templateName: string, templateType: "dataset" | "bkn" | "dataflow", templatesDir: string): Promise<LoadedTemplate | null>;
|
|
30
|
+
/**
|
|
31
|
+
* List all available templates of a given type
|
|
32
|
+
*/
|
|
33
|
+
export declare function listTemplates(templateType: "dataset" | "bkn" | "dataflow", templatesDir: string): Promise<Array<{
|
|
34
|
+
name: string;
|
|
35
|
+
description: string;
|
|
36
|
+
}>>;
|
|
37
|
+
/**
|
|
38
|
+
* Get the templates directory path (relative to dist or src)
|
|
39
|
+
*/
|
|
40
|
+
export declare function getTemplatesDir(): string;
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { readFile, access } from "node:fs/promises";
|
|
2
|
+
import { join } from "node:path";
|
|
3
|
+
import { constants } from "node:fs";
|
|
4
|
+
/**
|
|
5
|
+
* Generate a unique source identifier with prefix
|
|
6
|
+
*/
|
|
7
|
+
export function generateSourceIdentifier(prefix) {
|
|
8
|
+
const random = Math.random().toString(36).substring(2, 15);
|
|
9
|
+
const timestamp = Date.now().toString(36);
|
|
10
|
+
return `${prefix}_${timestamp}_${random}`;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Replace {{placeholder}} with actual values in a string
|
|
14
|
+
*/
|
|
15
|
+
function replacePlaceholders(str, values) {
|
|
16
|
+
return str.replace(/\{\{(\w+)\}\}/g, (_, key) => {
|
|
17
|
+
if (values[key] !== undefined) {
|
|
18
|
+
return String(values[key]);
|
|
19
|
+
}
|
|
20
|
+
return `{{${key}}}`;
|
|
21
|
+
});
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Deep replace placeholders in an object
|
|
25
|
+
*/
|
|
26
|
+
function deepReplace(obj, values) {
|
|
27
|
+
if (typeof obj === "string") {
|
|
28
|
+
return replacePlaceholders(obj, values);
|
|
29
|
+
}
|
|
30
|
+
if (Array.isArray(obj)) {
|
|
31
|
+
return obj.map(item => deepReplace(item, values));
|
|
32
|
+
}
|
|
33
|
+
if (obj !== null && typeof obj === "object") {
|
|
34
|
+
const result = {};
|
|
35
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
36
|
+
result[key] = deepReplace(value, values);
|
|
37
|
+
}
|
|
38
|
+
return result;
|
|
39
|
+
}
|
|
40
|
+
return obj;
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Render template with arguments, applying defaults and validation
|
|
44
|
+
*/
|
|
45
|
+
export function renderTemplate(template, manifest, args) {
|
|
46
|
+
// Merge args with defaults
|
|
47
|
+
const merged = {};
|
|
48
|
+
const missing = [];
|
|
49
|
+
for (const arg of manifest.arguments) {
|
|
50
|
+
if (args[arg.name] !== undefined) {
|
|
51
|
+
merged[arg.name] = args[arg.name];
|
|
52
|
+
}
|
|
53
|
+
else if (arg.default !== undefined) {
|
|
54
|
+
merged[arg.name] = arg.default;
|
|
55
|
+
}
|
|
56
|
+
else if (arg.required) {
|
|
57
|
+
missing.push(arg.name);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
if (missing.length > 0) {
|
|
61
|
+
throw new Error(`Missing required argument(s): ${missing.join(", ")}`);
|
|
62
|
+
}
|
|
63
|
+
// Deep replace placeholders
|
|
64
|
+
return deepReplace(template, merged);
|
|
65
|
+
}
|
|
66
|
+
/**
|
|
67
|
+
* Load template from directory
|
|
68
|
+
*/
|
|
69
|
+
export async function loadTemplate(templateName, templateType, templatesDir) {
|
|
70
|
+
const templateDir = join(templatesDir, templateType, templateName);
|
|
71
|
+
try {
|
|
72
|
+
await access(templateDir, constants.R_OK);
|
|
73
|
+
}
|
|
74
|
+
catch {
|
|
75
|
+
return null;
|
|
76
|
+
}
|
|
77
|
+
const templatePath = join(templateDir, "template.json");
|
|
78
|
+
const manifestPath = join(templateDir, "manifest.json");
|
|
79
|
+
try {
|
|
80
|
+
const [templateContent, manifestContent] = await Promise.all([
|
|
81
|
+
readFile(templatePath, "utf-8"),
|
|
82
|
+
readFile(manifestPath, "utf-8"),
|
|
83
|
+
]);
|
|
84
|
+
return {
|
|
85
|
+
template: JSON.parse(templateContent),
|
|
86
|
+
manifest: JSON.parse(manifestContent),
|
|
87
|
+
templatePath: templateDir,
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
catch {
|
|
91
|
+
return null;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
/**
|
|
95
|
+
* List all available templates of a given type
|
|
96
|
+
*/
|
|
97
|
+
export async function listTemplates(templateType, templatesDir) {
|
|
98
|
+
const { readdir } = await import("node:fs/promises");
|
|
99
|
+
const typeDir = join(templatesDir, templateType);
|
|
100
|
+
try {
|
|
101
|
+
const entries = await readdir(typeDir, { withFileTypes: true });
|
|
102
|
+
const templates = [];
|
|
103
|
+
for (const entry of entries) {
|
|
104
|
+
if (!entry.isDirectory())
|
|
105
|
+
continue;
|
|
106
|
+
const loaded = await loadTemplate(entry.name, templateType, templatesDir);
|
|
107
|
+
if (loaded) {
|
|
108
|
+
templates.push({
|
|
109
|
+
name: loaded.manifest.name,
|
|
110
|
+
description: loaded.manifest.description,
|
|
111
|
+
});
|
|
112
|
+
}
|
|
113
|
+
}
|
|
114
|
+
return templates;
|
|
115
|
+
}
|
|
116
|
+
catch {
|
|
117
|
+
return [];
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
/**
|
|
121
|
+
* Get the templates directory path (relative to dist or src)
|
|
122
|
+
*/
|
|
123
|
+
export function getTemplatesDir() {
|
|
124
|
+
// When running from dist, templates are copied to dist/templates
|
|
125
|
+
// When running from src (tsx), templates are in src/templates
|
|
126
|
+
const { url } = import.meta;
|
|
127
|
+
const baseDir = join(new URL(url).pathname, "..", "..", "templates");
|
|
128
|
+
return baseDir;
|
|
129
|
+
}
|
package/package.json
CHANGED