@ontos-ai/knowhere-claw 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,7 +1,7 @@
1
1
  # Knowhere for OpenClaw
2
2
 
3
3
  Knowhere is an OpenClaw plugin that parses documents and URLs with Knowhere,
4
- stores the extracted result package in OpenClaw state, and gives agents a
4
+ stores extracted Knowhere files in OpenClaw state, and gives agents a
5
5
  browse-first toolset for grounded document work.
6
6
 
7
7
  Quick mental model:
@@ -22,7 +22,7 @@ the machine running that Gateway, then restart that Gateway.
22
22
  ## What You Get
23
23
 
24
24
  - Ingest local files or document URLs with Knowhere
25
- - Store parsed result packages inside OpenClaw-managed state
25
+ - Store parsed documents inside OpenClaw-managed state
26
26
  - Preview document structure, search chunks, and inspect raw result files
27
27
  - Reuse stored documents across `session`, `agent`, or `global` scope
28
28
  - Ship bundled `knowhere` and `knowhere_memory` skills so agents prefer this
@@ -73,7 +73,7 @@ Config notes:
73
73
  - `pollIntervalMs`, `pollTimeoutMs`, `requestTimeoutMs`, `uploadTimeoutMs`:
74
74
  optional tuning for job polling, API calls, and large uploads.
75
75
  - An explicit `storageDir` such as
76
- `/home/<user>/.openclaw/plugin-state/knowhere` makes stored result packages
76
+ `/home/<user>/.openclaw/plugin-state/knowhere` makes stored documents
77
77
  easier to inspect, back up, or clean up.
78
78
 
79
79
  ## How OpenClaw Uses It
@@ -110,8 +110,8 @@ actually call the plugin tools.
110
110
  Within each scope, the plugin keeps:
111
111
 
112
112
  - an `index.json` cache of stored document summaries
113
- - per-document metadata and browse indexes
114
- - the extracted Knowhere result package under `result/`
113
+ - a `metadata/` directory with one JSON record per stored document
114
+ - the extracted Knowhere result files directly inside each document directory
115
115
 
116
116
  ## Common Workflow
117
117
 
@@ -12,6 +12,8 @@ export interface ChunkData {
12
12
  chunk_id: string;
13
13
  path: string;
14
14
  content: string;
15
+ /** Document directory name (e.g. "report.pdf"). Injected by kg-service at load time. */
16
+ fileKey: string;
15
17
  metadata: {
16
18
  keywords?: string[];
17
19
  tokens?: string[];
@@ -8,15 +8,14 @@ function normalizeKeyword(keyword) {
8
8
  return keyword.toLowerCase().trim();
9
9
  }
10
10
  /**
11
- * Extract file key from a chunk path
12
- * Equivalent to Python: _extract_file_key
13
- *
14
- * Example: "Default_Root/report.docx/Chapter 1" -> "report.docx"
11
+ * Extract file key from a chunk.
12
+ * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
15
13
  */
16
- function extractFileKey(path) {
17
- const parts = path.split("/");
14
+ function getFileKey(chunk) {
15
+ if (chunk.fileKey) return chunk.fileKey;
16
+ const parts = chunk.path.replace(/-->/g, "/").split("/");
18
17
  if (parts.length >= 2) return parts[1];
19
- return path;
18
+ return chunk.path;
20
19
  }
21
20
  /**
22
21
  * Build inverted keyword index: keyword -> [chunk_ids]
@@ -97,7 +96,7 @@ function buildConnections(chunks, config, logger) {
97
96
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
98
97
  const chunksByFile = /* @__PURE__ */ new Map();
99
98
  for (const chunk of chunks) {
100
- const fileKey = extractFileKey(chunk.path);
99
+ const fileKey = getFileKey(chunk);
101
100
  if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, /* @__PURE__ */ new Set());
102
101
  chunksByFile.get(fileKey).add(chunk.chunk_id);
103
102
  }
@@ -120,14 +119,14 @@ function buildConnections(chunks, config, logger) {
120
119
  const sourceChunk = chunkById.get(sourceId);
121
120
  if (!sourceChunk) continue;
122
121
  const sourceKeywords = getKeywords(sourceChunk);
123
- const sourceFileKey = extractFileKey(sourceChunk.path);
122
+ const sourceFileKey = getFileKey(sourceChunk);
124
123
  for (const targetId of targetIds) {
125
124
  const targetChunk = chunkById.get(targetId);
126
125
  if (!targetChunk) continue;
127
126
  const pairKey = sourceId < targetId ? `${sourceId}::${targetId}` : `${targetId}::${sourceId}`;
128
127
  if (seenPairs.has(pairKey)) continue;
129
128
  seenPairs.add(pairKey);
130
- const targetFileKey = extractFileKey(targetChunk.path);
129
+ const targetFileKey = getFileKey(targetChunk);
131
130
  if (config.crossFileOnly && sourceFileKey === targetFileKey) continue;
132
131
  const contentRatio = sequenceMatcherRatio(sourceChunk.content.slice(0, 500), targetChunk.content.slice(0, 500));
133
132
  if (contentRatio >= config.maxContentOverlap) {
@@ -32,12 +32,15 @@ interface FileMetadata {
32
32
  top_keywords: string[];
33
33
  top_summary: string;
34
34
  importance: number;
35
+ created_at: string;
35
36
  }
36
37
  /**
37
38
  * Complete knowledge graph structure
38
39
  */
39
40
  export interface KnowledgeGraph {
40
41
  version: string;
42
+ updated_at: string;
43
+ kb_id: string;
41
44
  stats: {
42
45
  total_files: number;
43
46
  total_chunks: number;
@@ -59,7 +62,7 @@ export interface ChunkStats {
59
62
  * Main function to build knowledge graph
60
63
  * Equivalent to Python: build_knowledge_graph
61
64
  */
62
- export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger): KnowledgeGraph;
65
+ export declare function buildKnowledgeGraph(chunks: ChunkData[], connections: Connection[], chunkStats: ChunkStats, jiebaInitialized: boolean, logger?: PluginLogger, kbId?: string): KnowledgeGraph;
63
66
  /**
64
67
  * Incremental update: match new chunks against existing chunks
65
68
  * Equivalent to Python: _incremental_connections
@@ -7,12 +7,14 @@ import * as nodejieba from "nodejieba";
7
7
  * Builds file-level knowledge graphs from chunk connections with TF-IDF and importance scoring.
8
8
  */
9
9
  /**
10
- * Extract file key from chunk path
10
+ * Extract file key from a chunk.
11
+ * Prefers the explicit fileKey field; falls back to path-based extraction for backward compatibility.
11
12
  */
12
- function extractFileKey(path) {
13
- const parts = path.split("/");
13
+ function getFileKey(chunk) {
14
+ if (chunk.fileKey) return chunk.fileKey;
15
+ const parts = chunk.path.replace(/-->/g, "/").split("/");
14
16
  if (parts.length >= 2) return parts[1];
15
- return path;
17
+ return chunk.path;
16
18
  }
17
19
  /**
18
20
  * Extract label from chunk path (last segment)
@@ -107,7 +109,7 @@ function computeFileImportance(fileKey, fileChunks, allChunks, chunkStats, decay
107
109
  function getAllChunkCountsByFile(chunks) {
108
110
  const countsByFile = /* @__PURE__ */ new Map();
109
111
  for (const chunk of chunks) {
110
- const fileKey = extractFileKey(chunk.path);
112
+ const fileKey = getFileKey(chunk);
111
113
  countsByFile.set(fileKey, (countsByFile.get(fileKey) || 0) + 1);
112
114
  }
113
115
  return Array.from(countsByFile.values());
@@ -122,8 +124,8 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
122
124
  const sourceChunk = chunkById.get(conn.source);
123
125
  const targetChunk = chunkById.get(conn.target);
124
126
  if (!sourceChunk || !targetChunk) continue;
125
- const sourceFile = extractFileKey(sourceChunk.path);
126
- const targetFile = extractFileKey(targetChunk.path);
127
+ const sourceFile = getFileKey(sourceChunk);
128
+ const targetFile = getFileKey(targetChunk);
127
129
  if (sourceFile === targetFile) continue;
128
130
  const pairKey = sourceFile < targetFile ? `${sourceFile}::${targetFile}` : `${targetFile}::${sourceFile}`;
129
131
  if (!filePairs.has(pairKey)) filePairs.set(pairKey, []);
@@ -159,13 +161,13 @@ function aggregateFileLevelEdges(connections, chunkById, topN = 5) {
159
161
  * Main function to build knowledge graph
160
162
  * Equivalent to Python: build_knowledge_graph
161
163
  */
162
- function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger) {
164
+ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized, logger, kbId) {
163
165
  logger?.info(`Building knowledge graph from ${chunks.length} chunks and ${connections.length} connections`);
164
166
  const chunkById = /* @__PURE__ */ new Map();
165
167
  for (const chunk of chunks) chunkById.set(chunk.chunk_id, chunk);
166
168
  const chunksByFile = /* @__PURE__ */ new Map();
167
169
  for (const chunk of chunks) {
168
- const fileKey = extractFileKey(chunk.path);
170
+ const fileKey = getFileKey(chunk);
169
171
  if (!chunksByFile.has(fileKey)) chunksByFile.set(fileKey, []);
170
172
  chunksByFile.get(fileKey).push(chunk);
171
173
  }
@@ -188,13 +190,16 @@ function buildKnowledgeGraph(chunks, connections, chunkStats, jiebaInitialized,
188
190
  types: typeCount,
189
191
  top_keywords: topKeywords,
190
192
  top_summary: topSummary,
191
- importance
193
+ importance,
194
+ created_at: (/* @__PURE__ */ new Date()).toISOString()
192
195
  };
193
196
  }
194
197
  const fileEdges = aggregateFileLevelEdges(connections, chunkById, 5);
195
198
  logger?.info(`Created graph with ${Object.keys(filesMetadata).length} files and ${fileEdges.length} edges`);
196
199
  return {
197
200
  version: "2.0",
201
+ updated_at: (/* @__PURE__ */ new Date()).toISOString(),
202
+ kb_id: kbId || "",
198
203
  stats: {
199
204
  total_files: Object.keys(filesMetadata).length,
200
205
  total_chunks: chunks.length,
package/dist/index.js CHANGED
@@ -6,7 +6,7 @@ import { KnowledgeGraphService } from "./kg-service.js";
6
6
  const plugin = {
7
7
  id: "knowhere-claw",
8
8
  name: "Knowhere",
9
- description: "Knowhere document ingestion and direct stored-result access for OpenClaw.",
9
+ description: "Knowhere document ingestion, job management, and knowledge graph tools for OpenClaw.",
10
10
  configSchema: knowherePluginConfigSchema,
11
11
  register(api) {
12
12
  const config = resolveKnowhereConfig(api);
@@ -41,12 +41,6 @@ const plugin = {
41
41
  "knowhere_list_jobs",
42
42
  "knowhere_get_job_status",
43
43
  "knowhere_import_completed_job",
44
- "knowhere_grep",
45
- "knowhere_read_result_file",
46
- "knowhere_preview_document",
47
- "knowhere_list_documents",
48
- "knowhere_remove_document",
49
- "knowhere_clear_scope",
50
44
  "knowhere_set_api_key",
51
45
  "knowhere_kg_list",
52
46
  "knowhere_kg_query"
@@ -1,3 +1,4 @@
1
+ import { resolveStoredKnowhereResultRoot } from "./parser.js";
1
2
  import { buildConnections, init_connect_builder } from "./connect-builder.js";
2
3
  import { buildKnowledgeGraph } from "./graph-builder.js";
3
4
  import path from "node:path";
@@ -143,7 +144,8 @@ var KnowledgeGraphService = class {
143
144
  const kbPath = await this.ensureKbDirectory(params.kbId);
144
145
  const docDir = path.join(kbPath, params.docId);
145
146
  await fs.ensureDir(docDir);
146
- await fs.copy(params.sourcePath, docDir, { overwrite: true });
147
+ const sourceResultRoot = await resolveStoredKnowhereResultRoot(params.sourcePath);
148
+ await fs.copy(sourceResultRoot, docDir, { overwrite: true });
147
149
  const keywordsPath = path.join(docDir, "keywords.json");
148
150
  await fs.writeJSON(keywordsPath, params.keywords, { spaces: 2 });
149
151
  const metadataPath = path.join(docDir, "metadata.json");
@@ -185,7 +187,10 @@ var KnowledgeGraphService = class {
185
187
  const chunksPath = path.join(kbPath, docDir, "chunks.json");
186
188
  if (await fs.pathExists(chunksPath)) {
187
189
  const chunksData = await fs.readJSON(chunksPath);
188
- if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks);
190
+ if (chunksData.chunks && Array.isArray(chunksData.chunks)) allChunks.push(...chunksData.chunks.map((c) => ({
191
+ ...c,
192
+ fileKey: docDir
193
+ })));
189
194
  }
190
195
  }
191
196
  if (allChunks.length === 0) {
@@ -198,7 +203,7 @@ var KnowledgeGraphService = class {
198
203
  const chunkStatsPath = path.join(kbPath, "chunk_stats.json");
199
204
  let chunkStats = {};
200
205
  if (await fs.pathExists(chunkStatsPath)) chunkStats = await fs.readJSON(chunkStatsPath);
201
- const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger);
206
+ const knowledgeGraph = buildKnowledgeGraph(allChunks, connections, chunkStats, this.jiebaInitialized, this.logger, kbId);
202
207
  const graphFile = path.join(kbPath, "knowledge_graph.json");
203
208
  await fs.writeJSON(graphFile, knowledgeGraph, { spaces: 2 });
204
209
  this.logger.info(`Knowledge graph saved to ${graphFile}`);
package/dist/parser.d.ts CHANGED
@@ -1,16 +1,12 @@
1
- import type { KnowhereDownloadedResult, KnowhereManifest, KnowhereParseResult, KnowhereStatistics, StoredBrowseIndex, StoredChunk } from "./types";
1
+ import type { KnowhereDownloadedResult, KnowhereManifest, KnowhereStatistics } from "./types";
2
2
  type KnowhereStoredResultSummary = {
3
3
  manifest: KnowhereManifest;
4
4
  chunkCount: number;
5
5
  statistics: KnowhereStatistics;
6
6
  };
7
- export declare const STORED_BROWSE_INDEX_VERSION = 2;
8
7
  export declare function resolveResultEntryPath(rootDir: string, entryPath: string): string;
9
- export declare function buildStoredPathPrefixes(storedPath: string): string[];
10
- export declare function isStoredBrowseIndex(value: unknown): value is StoredBrowseIndex;
11
- export declare function buildStoredBrowseIndex(resultDir: string, manifest: KnowhereManifest, chunks: StoredChunk[]): Promise<StoredBrowseIndex>;
12
8
  export declare function extractKnowhereResultArchive(downloadedResult: KnowhereDownloadedResult, targetDir: string): Promise<void>;
13
- export declare function readStoredKnowhereResultSummary(resultDir: string): Promise<KnowhereStoredResultSummary>;
14
- export declare function readStoredKnowhereResultContent(resultDir: string): Promise<Omit<KnowhereParseResult, "browseIndex">>;
15
- export declare function readStoredKnowhereParseResult(resultDir: string): Promise<KnowhereParseResult>;
9
+ export declare function resolveStoredKnowhereResultRoot(documentDir: string): Promise<string>;
10
+ export declare function resolveStoredKnowhereArtifactPath(documentDir: string, entryPath: string): Promise<string>;
11
+ export declare function readStoredKnowhereResultSummary(documentDir: string): Promise<KnowhereStoredResultSummary>;
16
12
  export {};
package/dist/parser.js CHANGED
@@ -3,11 +3,9 @@ import fs from "node:fs/promises";
3
3
  import path from "node:path";
4
4
  import { createHash } from "node:crypto";
5
5
  import { strFromU8, unzipSync } from "fflate";
6
+ //#region src/parser.ts
6
7
  const CHUNKS_FILE_NAME = "chunks.json";
7
- const FULL_MARKDOWN_FILE_NAME = "full.md";
8
- const HIERARCHY_FILE_NAME = "hierarchy.json";
9
- const HIERARCHY_VIEW_FILE_NAME = "hierarchy_view.html";
10
- const KB_CSV_FILE_NAME = "kb.csv";
8
+ const LEGACY_RESULT_DIRECTORY_NAME = "result";
11
9
  const MANIFEST_FILE_NAME = "manifest.json";
12
10
  function readZipText(entries, fileName) {
13
11
  const entry = entries[fileName];
@@ -16,6 +14,15 @@ function readZipText(entries, fileName) {
16
14
  async function ensureDir(targetPath) {
17
15
  await fs.mkdir(targetPath, { recursive: true });
18
16
  }
17
+ async function pathExists(targetPath) {
18
+ try {
19
+ await fs.access(targetPath);
20
+ return true;
21
+ } catch (error) {
22
+ if (isNodeError(error) && error.code === "ENOENT") return false;
23
+ throw error;
24
+ }
25
+ }
19
26
  async function readTextFile(targetPath) {
20
27
  try {
21
28
  return await fs.readFile(targetPath, "utf-8");
@@ -39,36 +46,6 @@ function resolveResultEntryPath(rootDir, entryPath) {
39
46
  function normalizeRelativePath(value) {
40
47
  return value.replace(/\\/g, "/").replace(/^\/+/, "");
41
48
  }
42
- function normalizeStoredPath(value) {
43
- if (typeof value !== "string") return null;
44
- return value.trim() || null;
45
- }
46
- function readChunkNumber(rawChunk, metadata, key) {
47
- const metadataValue = metadata[key];
48
- if (typeof metadataValue === "number" && Number.isFinite(metadataValue)) return metadataValue;
49
- const rawValue = rawChunk[key];
50
- if (typeof rawValue === "number" && Number.isFinite(rawValue)) return rawValue;
51
- return null;
52
- }
53
- function readChunkStringArray(rawChunk, metadata, key) {
54
- const metadataValue = metadata[key];
55
- if (Array.isArray(metadataValue)) return metadataValue.filter((entry) => typeof entry === "string");
56
- const rawValue = rawChunk[key];
57
- if (Array.isArray(rawValue)) return rawValue.filter((entry) => typeof entry === "string");
58
- return [];
59
- }
60
- function readChunkArray(rawChunk, metadata, key) {
61
- const metadataValue = metadata[key];
62
- if (Array.isArray(metadataValue)) return metadataValue;
63
- const rawValue = rawChunk[key];
64
- if (Array.isArray(rawValue)) return rawValue;
65
- return [];
66
- }
67
- function extractAssetFilePath(rawChunk, metadata) {
68
- const candidates = [rawChunk.file_path, metadata.file_path];
69
- for (const candidate of candidates) if (typeof candidate === "string" && candidate.trim()) return normalizeRelativePath(candidate.trim());
70
- return null;
71
- }
72
49
  function parseRawChunks(value) {
73
50
  if (Array.isArray(value)) return value.filter((entry) => isRecord(entry));
74
51
  if (isRecord(value) && Array.isArray(value.chunks)) return value.chunks.filter((entry) => isRecord(entry));
@@ -77,24 +54,6 @@ function parseRawChunks(value) {
77
54
  function parseManifest(value) {
78
55
  return isRecord(value) ? value : {};
79
56
  }
80
- function buildChunk(rawChunk) {
81
- const metadata = isRecord(rawChunk.metadata) ? rawChunk.metadata : {};
82
- const type = rawChunk.type === "image" || rawChunk.type === "table" || rawChunk.type === "text" ? rawChunk.type : "text";
83
- return {
84
- chunkId: typeof rawChunk.chunk_id === "string" ? rawChunk.chunk_id : "",
85
- type,
86
- path: normalizeStoredPath(rawChunk.path),
87
- summary: typeof metadata.summary === "string" ? metadata.summary : typeof rawChunk.summary === "string" ? rawChunk.summary : "",
88
- content: typeof rawChunk.content === "string" ? rawChunk.content : "",
89
- tokens: readChunkNumber(rawChunk, metadata, "tokens"),
90
- keywords: readChunkStringArray(rawChunk, metadata, "keywords"),
91
- relationships: readChunkArray(rawChunk, metadata, "relationships"),
92
- metadata,
93
- assetFilePath: extractAssetFilePath(rawChunk, metadata),
94
- originalName: typeof metadata.original_name === "string" ? metadata.original_name : typeof rawChunk.original_name === "string" ? rawChunk.original_name : null,
95
- tableType: typeof metadata.table_type === "string" ? metadata.table_type : typeof rawChunk.table_type === "string" ? rawChunk.table_type : null
96
- };
97
- }
98
57
  function normalizeStatistics(manifest, rawChunks) {
99
58
  if (manifest.statistics) return manifest.statistics;
100
59
  return {
@@ -110,185 +69,6 @@ function validateKnowhereResultChecksum(zipBuffer, manifest) {
110
69
  if (typeof checksum !== "string" || !checksum) return;
111
70
  if (createHash("sha256").update(zipBuffer).digest("hex") !== checksum) throw new Error("Knowhere result ZIP checksum mismatch.");
112
71
  }
113
- function tokenizeStoredPath(storedPath) {
114
- const slashSegments = storedPath.split("/").map((segment) => segment.trim()).filter(Boolean);
115
- const tokens = [];
116
- for (const slashSegment of slashSegments) {
117
- const arrowSegments = slashSegment.split("-->").map((segment) => segment.trim()).filter(Boolean);
118
- if (arrowSegments.length === 0) continue;
119
- tokens.push({
120
- delimiter: tokens.length === 0 ? null : "/",
121
- segment: arrowSegments[0] || ""
122
- });
123
- for (const arrowSegment of arrowSegments.slice(1)) tokens.push({
124
- delimiter: "-->",
125
- segment: arrowSegment
126
- });
127
- }
128
- return tokens;
129
- }
130
- function buildStoredPathPrefixes(storedPath) {
131
- const tokens = tokenizeStoredPath(storedPath);
132
- const prefixes = [];
133
- let currentPath = "";
134
- for (const token of tokens) {
135
- currentPath = token.delimiter ? `${currentPath}${token.delimiter}${token.segment}` : token.segment;
136
- prefixes.push(currentPath);
137
- }
138
- return prefixes;
139
- }
140
- function ensurePathAccumulator(accumulators, pathValue, parentPath, depth) {
141
- const existing = accumulators.get(pathValue);
142
- if (existing) {
143
- if (parentPath && !existing.parentPath) existing.parentPath = parentPath;
144
- return existing;
145
- }
146
- const next = {
147
- childPaths: /* @__PURE__ */ new Set(),
148
- chunkCount: 0,
149
- chunkIds: [],
150
- depth,
151
- directChunkCount: 0,
152
- imageChunkCount: 0,
153
- parentPath,
154
- path: pathValue,
155
- tableChunkCount: 0,
156
- textChunkCount: 0
157
- };
158
- accumulators.set(pathValue, next);
159
- return next;
160
- }
161
- function incrementPathCounters(accumulator, chunkType) {
162
- accumulator.chunkCount += 1;
163
- if (chunkType === "image") {
164
- accumulator.imageChunkCount += 1;
165
- return;
166
- }
167
- if (chunkType === "table") {
168
- accumulator.tableChunkCount += 1;
169
- return;
170
- }
171
- accumulator.textChunkCount += 1;
172
- }
173
- function buildPathRecords(chunks) {
174
- const accumulators = /* @__PURE__ */ new Map();
175
- for (const chunk of chunks) {
176
- if (!chunk.path) continue;
177
- const prefixes = buildStoredPathPrefixes(chunk.path);
178
- for (const [index, prefix] of prefixes.entries()) {
179
- const parentPath = index > 0 ? prefixes[index - 1] || null : null;
180
- const accumulator = ensurePathAccumulator(accumulators, prefix, parentPath, index + 1);
181
- incrementPathCounters(accumulator, chunk.type);
182
- if (parentPath) ensurePathAccumulator(accumulators, parentPath, index > 1 ? prefixes[index - 2] || null : null, index).childPaths.add(prefix);
183
- if (index === prefixes.length - 1) {
184
- accumulator.directChunkCount += 1;
185
- if (chunk.chunkId) accumulator.chunkIds.push(chunk.chunkId);
186
- }
187
- }
188
- }
189
- return [...accumulators.values()].sort((left, right) => left.depth - right.depth || left.path.localeCompare(right.path)).map((entry) => ({
190
- path: entry.path,
191
- parentPath: entry.parentPath,
192
- depth: entry.depth,
193
- childPaths: [...entry.childPaths].sort((left, right) => left.localeCompare(right)),
194
- chunkIds: [...entry.chunkIds],
195
- directChunkCount: entry.directChunkCount,
196
- chunkCount: entry.chunkCount,
197
- textChunkCount: entry.textChunkCount,
198
- imageChunkCount: entry.imageChunkCount,
199
- tableChunkCount: entry.tableChunkCount
200
- }));
201
- }
202
- function readManifestAssetEntries(manifest, key) {
203
- const rawEntries = (isRecord(manifest.files) ? manifest.files : {})[key];
204
- if (!Array.isArray(rawEntries)) return [];
205
- return rawEntries.filter((entry) => isRecord(entry));
206
- }
207
- function buildResultFileChunkLookup(manifest, chunks) {
208
- const entries = /* @__PURE__ */ new Map();
209
- for (const key of ["images", "tables"]) {
210
- const assetEntries = readManifestAssetEntries(manifest, key);
211
- for (const entry of assetEntries) {
212
- const filePath = typeof entry.file_path === "string" && entry.file_path.trim() ? normalizeRelativePath(entry.file_path.trim()) : null;
213
- if (!filePath) continue;
214
- entries.set(filePath, {
215
- chunkId: typeof entry.id === "string" ? entry.id : null,
216
- format: typeof entry.format === "string" ? entry.format : null
217
- });
218
- }
219
- }
220
- for (const chunk of chunks) {
221
- if (!chunk.assetFilePath || entries.has(chunk.assetFilePath)) continue;
222
- entries.set(chunk.assetFilePath, {
223
- chunkId: chunk.chunkId || null,
224
- format: null
225
- });
226
- }
227
- return entries;
228
- }
229
- function inferResultFileKind(relativePath) {
230
- if (relativePath === MANIFEST_FILE_NAME) return "manifest";
231
- if (relativePath === CHUNKS_FILE_NAME) return "chunks";
232
- if (relativePath === FULL_MARKDOWN_FILE_NAME) return "fullMarkdown";
233
- if (relativePath === KB_CSV_FILE_NAME) return "kbCsv";
234
- if (relativePath === HIERARCHY_FILE_NAME) return "hierarchy";
235
- if (relativePath === HIERARCHY_VIEW_FILE_NAME) return "hierarchyView";
236
- if (relativePath.startsWith("images/")) return "image";
237
- if (relativePath.startsWith("tables/")) return "table";
238
- return "other";
239
- }
240
- function inferResultFileFormat(relativePath) {
241
- return path.posix.extname(relativePath).replace(/^\./, "").trim() || null;
242
- }
243
- function isStringArray(value) {
244
- return Array.isArray(value) && value.every((entry) => typeof entry === "string");
245
- }
246
- function isStoredBrowseIndex(value) {
247
- if (!isRecord(value)) return false;
248
- if (value.version !== 2) return false;
249
- if (!isStringArray(value.chunkOrder)) return false;
250
- if (!Array.isArray(value.paths) || !Array.isArray(value.resultFiles)) return false;
251
- if (!value.paths.every((entry) => isRecord(entry) && typeof entry.path === "string" && (entry.parentPath === null || typeof entry.parentPath === "string") && typeof entry.depth === "number" && Number.isFinite(entry.depth) && isStringArray(entry.childPaths) && isStringArray(entry.chunkIds) && typeof entry.directChunkCount === "number" && typeof entry.chunkCount === "number" && typeof entry.textChunkCount === "number" && typeof entry.imageChunkCount === "number" && typeof entry.tableChunkCount === "number")) return false;
252
- return value.resultFiles.every((entry) => isRecord(entry) && typeof entry.relativePath === "string" && typeof entry.kind === "string" && (entry.chunkId === null || typeof entry.chunkId === "string") && (entry.format === null || typeof entry.format === "string") && (entry.sizeBytes === null || typeof entry.sizeBytes === "number" && Number.isFinite(entry.sizeBytes)));
253
- }
254
- async function listResultFiles(rootDir, currentDir = rootDir) {
255
- const entries = await fs.readdir(currentDir, { withFileTypes: true });
256
- const files = [];
257
- for (const entry of entries) {
258
- const absolutePath = path.join(currentDir, entry.name);
259
- if (entry.isDirectory()) {
260
- files.push(...await listResultFiles(rootDir, absolutePath));
261
- continue;
262
- }
263
- if (!entry.isFile()) continue;
264
- files.push(normalizeRelativePath(path.relative(rootDir, absolutePath)));
265
- }
266
- return files.sort((left, right) => left.localeCompare(right));
267
- }
268
- async function buildResultFileRecords(resultDir, manifest, chunks) {
269
- const lookup = buildResultFileChunkLookup(manifest, chunks);
270
- const relativePaths = await listResultFiles(resultDir);
271
- return Promise.all(relativePaths.map(async (relativePath) => {
272
- const absolutePath = resolveResultEntryPath(resultDir, relativePath);
273
- const stats = await fs.stat(absolutePath);
274
- const manifestEntry = lookup.get(relativePath);
275
- return {
276
- relativePath,
277
- kind: inferResultFileKind(relativePath),
278
- chunkId: manifestEntry?.chunkId ?? null,
279
- format: manifestEntry?.format ?? inferResultFileFormat(relativePath),
280
- sizeBytes: stats.isFile() ? stats.size : null
281
- };
282
- }));
283
- }
284
- async function buildStoredBrowseIndex(resultDir, manifest, chunks) {
285
- return {
286
- version: 2,
287
- paths: buildPathRecords(chunks),
288
- chunkOrder: chunks.map((chunk) => chunk.chunkId).filter((chunkId) => chunkId.length > 0),
289
- resultFiles: await buildResultFileRecords(resultDir, manifest, chunks)
290
- };
291
- }
292
72
  async function extractKnowhereResultArchive(downloadedResult, targetDir) {
293
73
  const zipBuffer = Buffer.isBuffer(downloadedResult.zipBytes) ? downloadedResult.zipBytes : Buffer.from(downloadedResult.zipBytes);
294
74
  const entries = unzipSync(new Uint8Array(zipBuffer));
@@ -302,22 +82,24 @@ async function extractKnowhereResultArchive(downloadedResult, targetDir) {
302
82
  await fs.writeFile(outputPath, entryBytes);
303
83
  }
304
84
  }
305
- async function readStoredKnowhereResultSummary(resultDir) {
306
- const manifest = parseManifest(await readJsonFile(path.join(resultDir, MANIFEST_FILE_NAME)));
307
- const rawChunks = parseRawChunks(await readJsonFile(path.join(resultDir, CHUNKS_FILE_NAME)));
85
+ async function resolveStoredKnowhereResultRoot(documentDir) {
86
+ if (await pathExists(path.join(documentDir, MANIFEST_FILE_NAME))) return documentDir;
87
+ const legacyResultDir = path.join(documentDir, LEGACY_RESULT_DIRECTORY_NAME);
88
+ if (await pathExists(path.join(legacyResultDir, MANIFEST_FILE_NAME))) return legacyResultDir;
89
+ return documentDir;
90
+ }
91
+ async function resolveStoredKnowhereArtifactPath(documentDir, entryPath) {
92
+ return resolveResultEntryPath(await resolveStoredKnowhereResultRoot(documentDir), entryPath);
93
+ }
94
+ async function readStoredKnowhereResultSummary(documentDir) {
95
+ const resultRoot = await resolveStoredKnowhereResultRoot(documentDir);
96
+ const manifest = parseManifest(await readJsonFile(path.join(resultRoot, MANIFEST_FILE_NAME)));
97
+ const rawChunks = parseRawChunks(await readJsonFile(path.join(resultRoot, CHUNKS_FILE_NAME)));
308
98
  return {
309
99
  manifest,
310
100
  chunkCount: rawChunks.length,
311
101
  statistics: normalizeStatistics(manifest, rawChunks)
312
102
  };
313
103
  }
314
- async function readStoredKnowhereResultContent(resultDir) {
315
- return {
316
- manifest: parseManifest(await readJsonFile(path.join(resultDir, MANIFEST_FILE_NAME))),
317
- chunks: parseRawChunks(await readJsonFile(path.join(resultDir, CHUNKS_FILE_NAME))).map((rawChunk) => buildChunk(rawChunk)),
318
- fullMarkdown: await readTextFile(path.join(resultDir, FULL_MARKDOWN_FILE_NAME)) || "",
319
- hierarchy: await readJsonFile(path.join(resultDir, HIERARCHY_FILE_NAME))
320
- };
321
- }
322
104
  //#endregion
323
- export { buildStoredBrowseIndex, extractKnowhereResultArchive, isStoredBrowseIndex, readStoredKnowhereResultContent, readStoredKnowhereResultSummary, resolveResultEntryPath };
105
+ export { extractKnowhereResultArchive, readStoredKnowhereResultSummary, resolveStoredKnowhereArtifactPath, resolveStoredKnowhereResultRoot };
package/dist/store.d.ts CHANGED
@@ -1,10 +1,10 @@
1
- import type { ChannelRouteRecord, KnowhereScope, PluginLogger, SaveStoredDocumentPayload, ScopeMode, StoredDocumentPayload, StoredDocumentRecord } from "./types";
1
+ import type { ChannelRouteRecord, KnowhereScope, PluginLogger, SaveStoredDocumentPayload, ScopeMode, StoredDocumentMetadata, StoredDocumentRecord } from "./types";
2
+ type StoredDocumentScopePaths = Pick<KnowhereScope, "documentsDir" | "metadataDir">;
2
3
  export declare class KnowhereStore {
3
4
  private readonly rootDir;
4
5
  private readonly scopeMode;
5
6
  private readonly logger;
6
7
  private readonly indexCache;
7
- private readonly documentPayloadCache;
8
8
  private readonly scopeAccessChains;
9
9
  private readonly scopeKeyAliases;
10
10
  private readonly sessionScopeKeysBySessionId;
@@ -48,14 +48,8 @@ export declare class KnowhereStore {
48
48
  sessionKey?: string;
49
49
  sessionId?: string;
50
50
  }): KnowhereScope;
51
+ readDocumentMetadata(scope: StoredDocumentScopePaths, docId: string): Promise<StoredDocumentMetadata | null>;
51
52
  listDocuments(scope: KnowhereScope): Promise<StoredDocumentRecord[]>;
52
- loadDocumentPayload(scope: KnowhereScope, docId: string): Promise<StoredDocumentPayload | null>;
53
- getResultFileAbsolutePath(scope: KnowhereScope, docId: string, relativePath: string): string;
54
- readResultFile(scope: KnowhereScope, docId: string, relativePath: string): Promise<{
55
- document: StoredDocumentRecord;
56
- relativePath: string;
57
- text: string | null;
58
- } | null>;
59
53
  saveDownloadedDocument(scope: KnowhereScope, payload: SaveStoredDocumentPayload, options?: {
60
54
  overwrite?: boolean;
61
55
  }): Promise<StoredDocumentRecord>;
@@ -65,11 +59,6 @@ export declare class KnowhereStore {
65
59
  private persistIndex;
66
60
  private runWithScopeAccessLock;
67
61
  private removeDocumentArtifacts;
68
- private buildDocumentPayloadCacheKey;
69
- private touchDocumentPayloadCache;
70
- private deleteDocumentPayloadCache;
71
- private deleteScopeDocumentPayloadCaches;
72
- private loadOrBuildBrowseIndex;
73
62
  private buildRouteKey;
74
63
  private ensureRoutesLoaded;
75
64
  private persistRoutes;
@@ -77,3 +66,4 @@ export declare class KnowhereStore {
77
66
  private resolveKnownScopeKey;
78
67
  private rebuildIndex;
79
68
  }
69
+ export {};