@xdarkicex/openclaw-memory-libravdb 1.5.3 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-engine.js +17 -1
- package/dist/index.js +277 -48
- package/dist/ingest-queue.d.ts +17 -1
- package/dist/ingest-queue.js +38 -19
- package/dist/markdown-ingest.d.ts +8 -0
- package/dist/markdown-ingest.js +225 -25
- package/dist/types.d.ts +2 -0
- package/openclaw.plugin.json +15 -1
- package/package.json +1 -1
package/dist/context-engine.js
CHANGED
|
@@ -559,6 +559,19 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
559
559
|
: {}),
|
|
560
560
|
};
|
|
561
561
|
}
|
|
562
|
+
function isGrpcAuthConfigured() {
|
|
563
|
+
const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
|
|
564
|
+
const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
|
|
565
|
+
return (typeof secret === "string" && secret.length > 0) || (typeof secretFile === "string" && secretFile.length > 0);
|
|
566
|
+
}
|
|
567
|
+
function buildGrpcAuthInitializationError(error) {
|
|
568
|
+
const code = typeof error?.code === "number" ||
|
|
569
|
+
typeof error?.code === "string"
|
|
570
|
+
? ` code=${String(error.code)}`
|
|
571
|
+
: "";
|
|
572
|
+
return new Error(`LibraVDB gRPC auth initialization failed${code}; ` +
|
|
573
|
+
`check LIBRAVDB_AUTH_SECRET and daemon auth configuration`);
|
|
574
|
+
}
|
|
562
575
|
async function runCompaction(args) {
|
|
563
576
|
const request = buildCompactSessionRequest(args);
|
|
564
577
|
const kernel = await getKernelOrNull("compact");
|
|
@@ -644,7 +657,10 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
644
657
|
});
|
|
645
658
|
}
|
|
646
659
|
catch (error) {
|
|
647
|
-
|
|
660
|
+
if (isGrpcAuthConfigured()) {
|
|
661
|
+
throw buildGrpcAuthInitializationError(error);
|
|
662
|
+
}
|
|
663
|
+
// Proceed when the kernel does not require auth and the init call is unavailable.
|
|
648
664
|
}
|
|
649
665
|
return await kernel.bootstrapSession({
|
|
650
666
|
sessionId,
|
package/dist/index.js
CHANGED
|
@@ -839,14 +839,14 @@ var require_binary_encoding = __commonJS({
|
|
|
839
839
|
};
|
|
840
840
|
exports2.BinaryWriter = BinaryWriter;
|
|
841
841
|
var BinaryReader = class {
|
|
842
|
-
constructor(buf,
|
|
842
|
+
constructor(buf, textDecoder2) {
|
|
843
843
|
this.varint64 = varint_js_1.varint64read;
|
|
844
844
|
this.uint32 = varint_js_1.varint32read;
|
|
845
845
|
this.buf = buf;
|
|
846
846
|
this.len = buf.length;
|
|
847
847
|
this.pos = 0;
|
|
848
848
|
this.view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
849
|
-
this.textDecoder =
|
|
849
|
+
this.textDecoder = textDecoder2 !== null && textDecoder2 !== void 0 ? textDecoder2 : new TextDecoder();
|
|
850
850
|
}
|
|
851
851
|
/**
|
|
852
852
|
* Reads a tag - field number and wire type.
|
|
@@ -34240,6 +34240,17 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
34240
34240
|
...typeof cfg.continuityPriorContextTokens === "number" ? { continuityPriorContextTokens: cfg.continuityPriorContextTokens } : {}
|
|
34241
34241
|
};
|
|
34242
34242
|
}
|
|
34243
|
+
function isGrpcAuthConfigured() {
|
|
34244
|
+
const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
|
|
34245
|
+
const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
|
|
34246
|
+
return typeof secret === "string" && secret.length > 0 || typeof secretFile === "string" && secretFile.length > 0;
|
|
34247
|
+
}
|
|
34248
|
+
function buildGrpcAuthInitializationError(error) {
|
|
34249
|
+
const code = typeof error?.code === "number" || typeof error?.code === "string" ? ` code=${String(error.code)}` : "";
|
|
34250
|
+
return new Error(
|
|
34251
|
+
`LibraVDB gRPC auth initialization failed${code}; check LIBRAVDB_AUTH_SECRET and daemon auth configuration`
|
|
34252
|
+
);
|
|
34253
|
+
}
|
|
34243
34254
|
async function runCompaction(args) {
|
|
34244
34255
|
const request = buildCompactSessionRequest(args);
|
|
34245
34256
|
const kernel = await getKernelOrNull("compact");
|
|
@@ -34322,6 +34333,9 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
34322
34333
|
clientCapabilities: [{ name: "grpc", version: "1.0" }]
|
|
34323
34334
|
});
|
|
34324
34335
|
} catch (error) {
|
|
34336
|
+
if (isGrpcAuthConfigured()) {
|
|
34337
|
+
throw buildGrpcAuthInitializationError(error);
|
|
34338
|
+
}
|
|
34325
34339
|
}
|
|
34326
34340
|
return await kernel.bootstrapSession({
|
|
34327
34341
|
sessionId,
|
|
@@ -38523,37 +38537,55 @@ var IngestQueue = class {
|
|
|
38523
38537
|
this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
|
|
38524
38538
|
}
|
|
38525
38539
|
}
|
|
38526
|
-
async enqueueIngest(sourceDoc, text, baseParams) {
|
|
38540
|
+
async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
|
|
38527
38541
|
if (this.options.chunkTokens === Infinity) {
|
|
38528
|
-
|
|
38542
|
+
const resp = await this.ingestWithRetry({
|
|
38529
38543
|
...baseParams,
|
|
38530
38544
|
sourceDoc,
|
|
38531
38545
|
text,
|
|
38532
38546
|
mode: IngestMode.REPLACE
|
|
38533
38547
|
});
|
|
38534
|
-
|
|
38535
|
-
|
|
38536
|
-
|
|
38537
|
-
|
|
38538
|
-
|
|
38539
|
-
|
|
38540
|
-
|
|
38541
|
-
|
|
38542
|
-
|
|
38543
|
-
|
|
38544
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
38545
|
-
const isFirst = i === 0;
|
|
38548
|
+
return resp.feedback;
|
|
38549
|
+
}
|
|
38550
|
+
let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
|
|
38551
|
+
let offset = 0;
|
|
38552
|
+
let isFirst = true;
|
|
38553
|
+
let lastFeedback;
|
|
38554
|
+
while (offset < text.length) {
|
|
38555
|
+
const remainingText = text.slice(offset);
|
|
38556
|
+
const chunks = splitIntoChunks(remainingText, currentLimit);
|
|
38557
|
+
const chunkText = chunks[0].text;
|
|
38546
38558
|
const chunkParams = {
|
|
38547
38559
|
...baseParams,
|
|
38548
38560
|
sourceDoc,
|
|
38549
|
-
text:
|
|
38561
|
+
text: chunkText,
|
|
38550
38562
|
mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND
|
|
38551
38563
|
};
|
|
38552
|
-
await this.ingestWithRetry(chunkParams);
|
|
38564
|
+
const resp = await this.ingestWithRetry(chunkParams);
|
|
38565
|
+
lastFeedback = resp.feedback;
|
|
38566
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0 && lastFeedback.tokenBurstLimit && lastFeedback.tokenBurstLimit > 0 && lastFeedback.tokenBurstLimit < currentLimit) {
|
|
38567
|
+
currentLimit = lastFeedback.tokenBurstLimit;
|
|
38568
|
+
continue;
|
|
38569
|
+
}
|
|
38570
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0) {
|
|
38571
|
+
this.logger.warn?.(
|
|
38572
|
+
`[ingest-queue] Chunk permanently rejected for ${sourceDoc} at offset=${offset} length=${chunkText.length} tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`
|
|
38573
|
+
);
|
|
38574
|
+
}
|
|
38575
|
+
if (this.options.onChunkFeedback && lastFeedback) {
|
|
38576
|
+
this.options.onChunkFeedback(lastFeedback);
|
|
38577
|
+
}
|
|
38578
|
+
offset += chunkText.length;
|
|
38579
|
+
isFirst = false;
|
|
38580
|
+
if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
|
|
38581
|
+
const delay = lastFeedback.retryAfterMs || 1e3;
|
|
38582
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
38583
|
+
}
|
|
38553
38584
|
}
|
|
38585
|
+
return lastFeedback;
|
|
38554
38586
|
}
|
|
38555
38587
|
async ingestWithRetry(params) {
|
|
38556
|
-
|
|
38588
|
+
return withRetry(
|
|
38557
38589
|
() => this.rpcCall("ingest_markdown_document", params),
|
|
38558
38590
|
this.options.maxRetries,
|
|
38559
38591
|
this.options.retryBaseDelayMs,
|
|
@@ -38644,6 +38676,7 @@ var DEFAULT_DEBOUNCE_MS2 = 150;
|
|
|
38644
38676
|
var DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
|
|
38645
38677
|
var MARKDOWN_INGEST_VERSION = 3;
|
|
38646
38678
|
var HASH_BACKEND = "wasm-fnv1a64";
|
|
38679
|
+
var STREAM_CHUNK_BYTES = 64 * 1024;
|
|
38647
38680
|
function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi2()) {
|
|
38648
38681
|
const adapters = [];
|
|
38649
38682
|
const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
|
|
@@ -38656,7 +38689,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
|
|
|
38656
38689
|
include: cfg.markdownIngestionInclude,
|
|
38657
38690
|
exclude: cfg.markdownIngestionExclude,
|
|
38658
38691
|
debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
|
|
38659
|
-
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath)
|
|
38692
|
+
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
|
|
38693
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
38694
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
|
|
38660
38695
|
},
|
|
38661
38696
|
getRpc,
|
|
38662
38697
|
logger,
|
|
@@ -38674,7 +38709,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
|
|
|
38674
38709
|
include: cfg.markdownIngestionObsidianInclude,
|
|
38675
38710
|
exclude: cfg.markdownIngestionObsidianExclude,
|
|
38676
38711
|
debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
|
|
38677
|
-
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath)
|
|
38712
|
+
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
|
|
38713
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
38714
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
|
|
38678
38715
|
},
|
|
38679
38716
|
getRpc,
|
|
38680
38717
|
logger,
|
|
@@ -38731,6 +38768,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38731
38768
|
getRpc;
|
|
38732
38769
|
logger;
|
|
38733
38770
|
snapshotPath;
|
|
38771
|
+
priorityMode;
|
|
38772
|
+
maxTokensPerFile;
|
|
38734
38773
|
states = /* @__PURE__ */ new Map();
|
|
38735
38774
|
fileStates = /* @__PURE__ */ new Map();
|
|
38736
38775
|
activeScans = /* @__PURE__ */ new Set();
|
|
@@ -38739,6 +38778,17 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38739
38778
|
started = false;
|
|
38740
38779
|
ingestQueue = null;
|
|
38741
38780
|
stopping = false;
|
|
38781
|
+
lastAcceptMore = true;
|
|
38782
|
+
lastRetryAfterMs = 0;
|
|
38783
|
+
lastQueueDepth = 0;
|
|
38784
|
+
lastQueueCapacity = 0;
|
|
38785
|
+
lastProcessingTimeUs = 0;
|
|
38786
|
+
lastNodesAccepted = 0;
|
|
38787
|
+
lastNodesRejected = 0;
|
|
38788
|
+
lastTokensIngested = 0;
|
|
38789
|
+
lastTokenBurstLimit = 512;
|
|
38790
|
+
lastWalDepth = 0;
|
|
38791
|
+
lastWalCapacity = 0;
|
|
38742
38792
|
snapshotLoaded = false;
|
|
38743
38793
|
snapshotDirty = false;
|
|
38744
38794
|
constructor(kind, config, getRpc, logger, fsApi) {
|
|
@@ -38751,6 +38801,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38751
38801
|
this.getRpc = getRpc;
|
|
38752
38802
|
this.logger = logger;
|
|
38753
38803
|
this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
|
|
38804
|
+
this.priorityMode = config.priorityMode ?? "mtime";
|
|
38805
|
+
this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128e3));
|
|
38754
38806
|
this.tokenizerId = DEFAULT_TOKENIZER_ID;
|
|
38755
38807
|
this.coreDoc = true;
|
|
38756
38808
|
}
|
|
@@ -38803,7 +38855,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38803
38855
|
scanState: {
|
|
38804
38856
|
scanning: false,
|
|
38805
38857
|
dirty: false,
|
|
38806
|
-
timer: null
|
|
38858
|
+
timer: null,
|
|
38859
|
+
resumeFromPath: null
|
|
38807
38860
|
},
|
|
38808
38861
|
knownFiles: this.snapshotFilesForRoot(resolved),
|
|
38809
38862
|
directoryWatchers: /* @__PURE__ */ new Map()
|
|
@@ -38821,12 +38874,16 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38821
38874
|
return;
|
|
38822
38875
|
}
|
|
38823
38876
|
rootState.scanState.scanning = true;
|
|
38877
|
+
this.lastAcceptMore = true;
|
|
38878
|
+
this.lastRetryAfterMs = 0;
|
|
38824
38879
|
const scan = (async () => {
|
|
38825
38880
|
const stats = createScanStats();
|
|
38826
38881
|
const startedAt = Date.now();
|
|
38827
38882
|
try {
|
|
38828
38883
|
const currentFiles = /* @__PURE__ */ new Set();
|
|
38829
|
-
|
|
38884
|
+
const candidates = [];
|
|
38885
|
+
await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
|
|
38886
|
+
await this.syncCandidates(rootState, candidates, stats);
|
|
38830
38887
|
if (!this.stopping) {
|
|
38831
38888
|
await this.pruneDeletedFiles(rootState, currentFiles, stats);
|
|
38832
38889
|
rootState.knownFiles = currentFiles;
|
|
@@ -38850,7 +38907,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38850
38907
|
this.activeScans.delete(scan);
|
|
38851
38908
|
}
|
|
38852
38909
|
}
|
|
38853
|
-
scheduleRootScan(rootState) {
|
|
38910
|
+
scheduleRootScan(rootState, delayMs) {
|
|
38854
38911
|
if (!this.started || this.stopping) {
|
|
38855
38912
|
return;
|
|
38856
38913
|
}
|
|
@@ -38866,9 +38923,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38866
38923
|
void this.scanRoot(rootState.root).catch((error) => {
|
|
38867
38924
|
this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
|
|
38868
38925
|
});
|
|
38869
|
-
}, this.debounceMs);
|
|
38926
|
+
}, Math.max(this.debounceMs, delayMs ?? 0));
|
|
38870
38927
|
}
|
|
38871
|
-
async walkDirectory(rootState, dir, currentFiles, stats) {
|
|
38928
|
+
async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
|
|
38872
38929
|
if (this.shouldPruneDirectory(rootState.root, dir)) {
|
|
38873
38930
|
stats.directoriesPruned++;
|
|
38874
38931
|
return;
|
|
@@ -38891,7 +38948,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38891
38948
|
}
|
|
38892
38949
|
const child = path2.join(dir, entry.name);
|
|
38893
38950
|
if (entry.isDirectory()) {
|
|
38894
|
-
await this.walkDirectory(rootState, child, currentFiles, stats);
|
|
38951
|
+
await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
|
|
38895
38952
|
continue;
|
|
38896
38953
|
}
|
|
38897
38954
|
if (!entry.isFile() || !isMarkdownFile(entry.name)) {
|
|
@@ -38904,16 +38961,71 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38904
38961
|
}
|
|
38905
38962
|
stats.filesIncluded++;
|
|
38906
38963
|
currentFiles.add(child);
|
|
38964
|
+
const stat = await this.safeStatWithCtime(child);
|
|
38965
|
+
if (!stat) {
|
|
38966
|
+
continue;
|
|
38967
|
+
}
|
|
38968
|
+
candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
|
|
38969
|
+
}
|
|
38970
|
+
}
|
|
38971
|
+
async syncCandidates(rootState, candidates, stats) {
|
|
38972
|
+
const sorted = sortCandidates(candidates, this.priorityMode);
|
|
38973
|
+
let skipping = false;
|
|
38974
|
+
if (rootState.scanState.resumeFromPath) {
|
|
38975
|
+
const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
|
|
38976
|
+
if (targetExists) {
|
|
38977
|
+
skipping = true;
|
|
38978
|
+
this.lastAcceptMore = true;
|
|
38979
|
+
this.lastRetryAfterMs = 0;
|
|
38980
|
+
} else {
|
|
38981
|
+
rootState.scanState.resumeFromPath = null;
|
|
38982
|
+
}
|
|
38983
|
+
}
|
|
38984
|
+
for (const candidate of sorted) {
|
|
38985
|
+
if (skipping) {
|
|
38986
|
+
if (candidate.path === rootState.scanState.resumeFromPath) {
|
|
38987
|
+
skipping = false;
|
|
38988
|
+
} else {
|
|
38989
|
+
continue;
|
|
38990
|
+
}
|
|
38991
|
+
}
|
|
38992
|
+
if (this.stopping) {
|
|
38993
|
+
return;
|
|
38994
|
+
}
|
|
38995
|
+
if (!this.lastAcceptMore) {
|
|
38996
|
+
if (!this.stopping) {
|
|
38997
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
38998
|
+
this.scheduleRootScan(rootState, this.lastRetryAfterMs);
|
|
38999
|
+
}
|
|
39000
|
+
return;
|
|
39001
|
+
}
|
|
39002
|
+
if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
|
|
39003
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
39004
|
+
if (!this.stopping) {
|
|
39005
|
+
this.scheduleRootScan(rootState, 2e3);
|
|
39006
|
+
}
|
|
39007
|
+
return;
|
|
39008
|
+
}
|
|
39009
|
+
const estimatedTokens = estimateTokens(candidate.size);
|
|
39010
|
+
if (estimatedTokens > this.maxTokensPerFile) {
|
|
39011
|
+
stats.filesDeferred++;
|
|
39012
|
+
continue;
|
|
39013
|
+
}
|
|
38907
39014
|
try {
|
|
38908
|
-
const result = await this.syncMarkdownFile(rootState,
|
|
39015
|
+
const result = await this.syncMarkdownFile(rootState, candidate.path, {
|
|
39016
|
+
size: candidate.size,
|
|
39017
|
+
mtimeMs: candidate.mtimeMs,
|
|
39018
|
+
ctimeMs: candidate.ctimeMs
|
|
39019
|
+
});
|
|
38909
39020
|
recordSyncResult(stats, result);
|
|
38910
39021
|
} catch (error) {
|
|
38911
39022
|
stats.syncErrors++;
|
|
38912
39023
|
if (!this.stopping) {
|
|
38913
|
-
this.logger.warn?.(`[markdown-ingest] sync failed for ${
|
|
39024
|
+
this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
|
|
38914
39025
|
}
|
|
38915
39026
|
}
|
|
38916
39027
|
}
|
|
39028
|
+
rootState.scanState.resumeFromPath = null;
|
|
38917
39029
|
}
|
|
38918
39030
|
shouldPruneDirectory(root, dir) {
|
|
38919
39031
|
const relative = toPosixPath(path2.relative(root, dir));
|
|
@@ -38934,6 +39046,11 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38934
39046
|
try {
|
|
38935
39047
|
const watcher = this.fsApi.watch(dir, () => {
|
|
38936
39048
|
if (!this.stopping) {
|
|
39049
|
+
rootState.scanState.resumeFromPath = null;
|
|
39050
|
+
if (rootState.scanState.timer) {
|
|
39051
|
+
clearTimeout(rootState.scanState.timer);
|
|
39052
|
+
rootState.scanState.timer = null;
|
|
39053
|
+
}
|
|
38937
39054
|
this.scheduleRootScan(rootState);
|
|
38938
39055
|
}
|
|
38939
39056
|
});
|
|
@@ -38984,10 +39101,10 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38984
39101
|
stats.filesDeleted++;
|
|
38985
39102
|
}
|
|
38986
39103
|
}
|
|
38987
|
-
async syncMarkdownFile(rootState, filePath) {
|
|
39104
|
+
async syncMarkdownFile(rootState, filePath, initialStat) {
|
|
38988
39105
|
const sourceDoc = filePath;
|
|
38989
39106
|
const relativePath = toPosixPath(path2.relative(rootState.root, filePath));
|
|
38990
|
-
const stat = await this.
|
|
39107
|
+
const stat = initialStat ?? await this.safeStatWithCtime(filePath);
|
|
38991
39108
|
if (!stat) {
|
|
38992
39109
|
await this.deleteSourceDocument(sourceDoc);
|
|
38993
39110
|
this.fileStates.delete(sourceDoc);
|
|
@@ -38998,14 +39115,18 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38998
39115
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
|
|
38999
39116
|
return "unchanged";
|
|
39000
39117
|
}
|
|
39001
|
-
const
|
|
39002
|
-
|
|
39118
|
+
const maxBytes = this.maxTokensPerFile * 4 + 3;
|
|
39119
|
+
const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
|
|
39120
|
+
if (streamed === "too_large") {
|
|
39121
|
+
return "skipped";
|
|
39122
|
+
}
|
|
39123
|
+
if (!streamed) {
|
|
39003
39124
|
await this.deleteSourceDocument(sourceDoc);
|
|
39004
39125
|
this.fileStates.delete(sourceDoc);
|
|
39005
39126
|
this.snapshotDirty = true;
|
|
39006
39127
|
return "deleted";
|
|
39007
39128
|
}
|
|
39008
|
-
const fileHash =
|
|
39129
|
+
const { text, fileHash } = streamed;
|
|
39009
39130
|
if (cached && cached.fileHash === fileHash) {
|
|
39010
39131
|
this.setFileState(sourceDoc, {
|
|
39011
39132
|
root: rootState.root,
|
|
@@ -39017,14 +39138,13 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39017
39138
|
});
|
|
39018
39139
|
return "unchanged";
|
|
39019
39140
|
}
|
|
39020
|
-
const text = textDecoder2.decode(bytes);
|
|
39021
39141
|
if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
|
|
39022
39142
|
await this.deleteSourceDocument(sourceDoc);
|
|
39023
39143
|
this.fileStates.delete(sourceDoc);
|
|
39024
39144
|
this.snapshotDirty = true;
|
|
39025
39145
|
return "skipped";
|
|
39026
39146
|
}
|
|
39027
|
-
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
|
|
39147
|
+
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
|
|
39028
39148
|
this.setFileState(sourceDoc, {
|
|
39029
39149
|
root: rootState.root,
|
|
39030
39150
|
sourceDoc,
|
|
@@ -39039,9 +39159,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39039
39159
|
this.fileStates.set(sourceDoc, state);
|
|
39040
39160
|
this.snapshotDirty = true;
|
|
39041
39161
|
}
|
|
39042
|
-
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
|
|
39162
|
+
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
|
|
39043
39163
|
const queue = await this.getIngestQueue();
|
|
39044
|
-
await queue.enqueueIngest(
|
|
39164
|
+
const feedback = await queue.enqueueIngest(
|
|
39045
39165
|
sourceDoc,
|
|
39046
39166
|
text,
|
|
39047
39167
|
{
|
|
@@ -39054,11 +39174,44 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39054
39174
|
fileHash,
|
|
39055
39175
|
sourceSize,
|
|
39056
39176
|
sourceMtimeMs: Math.trunc(sourceMtimeMs),
|
|
39177
|
+
sourceCtimeMs: Math.trunc(sourceCtimeMs),
|
|
39057
39178
|
ingestVersion: MARKDOWN_INGEST_VERSION,
|
|
39058
39179
|
hashBackend: HASH_BACKEND
|
|
39059
39180
|
}
|
|
39060
|
-
}
|
|
39181
|
+
},
|
|
39182
|
+
this.lastTokenBurstLimit
|
|
39061
39183
|
);
|
|
39184
|
+
this.applyIngestFeedback(feedback);
|
|
39185
|
+
}
|
|
39186
|
+
applyIngestFeedback(feedback) {
|
|
39187
|
+
if (feedback && typeof feedback.acceptMore === "boolean") {
|
|
39188
|
+
this.lastAcceptMore = feedback.acceptMore;
|
|
39189
|
+
this.lastQueueDepth = feedback.queueDepth ?? 0;
|
|
39190
|
+
this.lastQueueCapacity = feedback.queueCapacity ?? 0;
|
|
39191
|
+
this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
|
|
39192
|
+
this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
|
|
39193
|
+
this.lastNodesRejected = feedback.nodesRejected ?? 0;
|
|
39194
|
+
this.lastTokensIngested = feedback.tokensIngested ?? 0;
|
|
39195
|
+
if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
|
|
39196
|
+
this.lastTokenBurstLimit = feedback.tokenBurstLimit;
|
|
39197
|
+
}
|
|
39198
|
+
this.lastWalDepth = feedback.walDepth ?? 0;
|
|
39199
|
+
this.lastWalCapacity = feedback.walCapacity ?? 0;
|
|
39200
|
+
if (feedback.acceptMore) {
|
|
39201
|
+
this.lastRetryAfterMs = 0;
|
|
39202
|
+
} else {
|
|
39203
|
+
this.lastRetryAfterMs = feedback.retryAfterMs || 1e3;
|
|
39204
|
+
}
|
|
39205
|
+
} else {
|
|
39206
|
+
this.lastAcceptMore = true;
|
|
39207
|
+
this.lastRetryAfterMs = 0;
|
|
39208
|
+
this.lastQueueDepth = 0;
|
|
39209
|
+
this.lastQueueCapacity = 0;
|
|
39210
|
+
this.lastProcessingTimeUs = 0;
|
|
39211
|
+
this.lastNodesAccepted = 0;
|
|
39212
|
+
this.lastNodesRejected = 0;
|
|
39213
|
+
this.lastTokensIngested = 0;
|
|
39214
|
+
}
|
|
39062
39215
|
}
|
|
39063
39216
|
async deleteSourceDocument(sourceDoc) {
|
|
39064
39217
|
const queue = await this.getIngestQueue();
|
|
@@ -39067,7 +39220,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39067
39220
|
async getIngestQueue() {
|
|
39068
39221
|
if (!this.ingestQueue) {
|
|
39069
39222
|
const rpc = await this.getRpc();
|
|
39070
|
-
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger
|
|
39223
|
+
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
|
|
39224
|
+
onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback)
|
|
39225
|
+
});
|
|
39071
39226
|
}
|
|
39072
39227
|
return this.ingestQueue;
|
|
39073
39228
|
}
|
|
@@ -39078,13 +39233,49 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39078
39233
|
return null;
|
|
39079
39234
|
}
|
|
39080
39235
|
}
|
|
39081
|
-
async
|
|
39236
|
+
async safeStatWithCtime(filePath) {
|
|
39082
39237
|
try {
|
|
39083
|
-
return await this.fsApi.
|
|
39238
|
+
return await this.fsApi.stat(filePath);
|
|
39084
39239
|
} catch {
|
|
39085
39240
|
return null;
|
|
39086
39241
|
}
|
|
39087
39242
|
}
|
|
39243
|
+
async safeReadFileStreamed(filePath, maxBytes) {
|
|
39244
|
+
let stream = null;
|
|
39245
|
+
try {
|
|
39246
|
+
stream = await this.fsApi.openReadStream(filePath);
|
|
39247
|
+
const decoder = new TextDecoder();
|
|
39248
|
+
const chunks = [];
|
|
39249
|
+
let hash = 0xcbf29ce484222325n;
|
|
39250
|
+
let total = 0;
|
|
39251
|
+
const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
|
|
39252
|
+
while (true) {
|
|
39253
|
+
const { bytesRead } = await stream.read(buffer);
|
|
39254
|
+
if (bytesRead === 0) {
|
|
39255
|
+
break;
|
|
39256
|
+
}
|
|
39257
|
+
total += bytesRead;
|
|
39258
|
+
if (total > maxBytes) {
|
|
39259
|
+
return "too_large";
|
|
39260
|
+
}
|
|
39261
|
+
const chunk = buffer.subarray(0, bytesRead);
|
|
39262
|
+
hash = updateFnv1a64(hash, chunk);
|
|
39263
|
+
chunks.push(decoder.decode(chunk, { stream: true }));
|
|
39264
|
+
}
|
|
39265
|
+
chunks.push(decoder.decode());
|
|
39266
|
+
return {
|
|
39267
|
+
text: chunks.join(""),
|
|
39268
|
+
fileHash: hash.toString(16).padStart(16, "0")
|
|
39269
|
+
};
|
|
39270
|
+
} catch {
|
|
39271
|
+
return null;
|
|
39272
|
+
} finally {
|
|
39273
|
+
if (stream) {
|
|
39274
|
+
await stream.close().catch(() => {
|
|
39275
|
+
});
|
|
39276
|
+
}
|
|
39277
|
+
}
|
|
39278
|
+
}
|
|
39088
39279
|
snapshotFilesForRoot(root) {
|
|
39089
39280
|
const files = /* @__PURE__ */ new Set();
|
|
39090
39281
|
for (const state of this.fileStates.values()) {
|
|
@@ -39147,7 +39338,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39147
39338
|
}
|
|
39148
39339
|
logScanStats(root, stats, durationMs) {
|
|
39149
39340
|
this.logger.info?.(
|
|
39150
|
-
`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`
|
|
39341
|
+
`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`
|
|
39151
39342
|
);
|
|
39152
39343
|
}
|
|
39153
39344
|
};
|
|
@@ -39161,9 +39352,27 @@ function createScanStats() {
|
|
|
39161
39352
|
filesUnchanged: 0,
|
|
39162
39353
|
filesIngested: 0,
|
|
39163
39354
|
filesDeleted: 0,
|
|
39164
|
-
syncErrors: 0
|
|
39355
|
+
syncErrors: 0,
|
|
39356
|
+
filesDeferred: 0
|
|
39165
39357
|
};
|
|
39166
39358
|
}
|
|
39359
|
+
function estimateTokens(size) {
|
|
39360
|
+
return Math.max(1, Math.floor(size / 4));
|
|
39361
|
+
}
|
|
39362
|
+
function sortCandidates(candidates, mode) {
|
|
39363
|
+
return [...candidates].sort((left, right) => {
|
|
39364
|
+
if (mode === "size") {
|
|
39365
|
+
return right.size - left.size || left.ordinal - right.ordinal;
|
|
39366
|
+
}
|
|
39367
|
+
if (mode === "ctime") {
|
|
39368
|
+
return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
|
|
39369
|
+
}
|
|
39370
|
+
if (mode === "fifo") {
|
|
39371
|
+
return left.ordinal - right.ordinal;
|
|
39372
|
+
}
|
|
39373
|
+
return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
|
|
39374
|
+
});
|
|
39375
|
+
}
|
|
39167
39376
|
function recordSyncResult(stats, result) {
|
|
39168
39377
|
if (result === "ingested") {
|
|
39169
39378
|
stats.filesIngested++;
|
|
@@ -39178,7 +39387,6 @@ function recordSyncResult(stats, result) {
|
|
|
39178
39387
|
function toPosixPath(value) {
|
|
39179
39388
|
return value.split(path2.sep).join("/");
|
|
39180
39389
|
}
|
|
39181
|
-
var textDecoder2 = new TextDecoder();
|
|
39182
39390
|
function normalizeMarkdownRoots(roots) {
|
|
39183
39391
|
if (!roots?.length) {
|
|
39184
39392
|
return [];
|
|
@@ -39193,6 +39401,15 @@ function normalizeMarkdownRoots(roots) {
|
|
|
39193
39401
|
}
|
|
39194
39402
|
return [...resolved];
|
|
39195
39403
|
}
|
|
39404
|
+
function updateFnv1a64(seed, bytes) {
|
|
39405
|
+
let hash = seed;
|
|
39406
|
+
const prime = 0x100000001b3n;
|
|
39407
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
39408
|
+
hash ^= BigInt(bytes[i] ?? 0);
|
|
39409
|
+
hash = BigInt.asUintN(64, hash * prime);
|
|
39410
|
+
}
|
|
39411
|
+
return hash;
|
|
39412
|
+
}
|
|
39196
39413
|
function resolveMarkdownSnapshotPath(kind, configuredPath) {
|
|
39197
39414
|
const trimmed = configuredPath?.trim();
|
|
39198
39415
|
if (trimmed) {
|
|
@@ -39209,10 +39426,22 @@ function createRealFsApi2() {
|
|
|
39209
39426
|
readdir: async (dir) => fsp2.readdir(dir, { withFileTypes: true }),
|
|
39210
39427
|
readFile: async (file) => fsp2.readFile(file),
|
|
39211
39428
|
stat: async (file) => {
|
|
39212
|
-
const
|
|
39213
|
-
return { size:
|
|
39429
|
+
const s = await fsp2.stat(file);
|
|
39430
|
+
return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
|
|
39214
39431
|
},
|
|
39215
|
-
watch: (dir, onChange) => fs2.watch(dir, onChange)
|
|
39432
|
+
watch: (dir, onChange) => fs2.watch(dir, onChange),
|
|
39433
|
+
openReadStream: async (file) => {
|
|
39434
|
+
const handle = await fsp2.open(file, "r");
|
|
39435
|
+
return {
|
|
39436
|
+
read: async (buffer) => {
|
|
39437
|
+
const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
|
|
39438
|
+
return { bytesRead };
|
|
39439
|
+
},
|
|
39440
|
+
close: async () => {
|
|
39441
|
+
await handle.close();
|
|
39442
|
+
}
|
|
39443
|
+
};
|
|
39444
|
+
}
|
|
39216
39445
|
};
|
|
39217
39446
|
}
|
|
39218
39447
|
function isMarkdownFile(fileName) {
|
package/dist/ingest-queue.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface IngestQueueOptions {
|
|
|
7
7
|
retryBaseDelayMs: number;
|
|
8
8
|
/** Max retries per chunk. */
|
|
9
9
|
maxRetries: number;
|
|
10
|
+
/** Called after each chunk is accepted so scan-level state stays current. */
|
|
11
|
+
onChunkFeedback?: (feedback: IngestFeedback) => void;
|
|
10
12
|
}
|
|
11
13
|
interface IngestMarkdownDocumentParams {
|
|
12
14
|
sourceDoc: string;
|
|
@@ -20,11 +22,25 @@ interface IngestMarkdownDocumentParams {
|
|
|
20
22
|
fileHash: string;
|
|
21
23
|
sourceSize: number;
|
|
22
24
|
sourceMtimeMs: number;
|
|
25
|
+
sourceCtimeMs: number;
|
|
23
26
|
ingestVersion: number;
|
|
24
27
|
hashBackend: string;
|
|
25
28
|
};
|
|
26
29
|
mode?: IngestMode;
|
|
27
30
|
}
|
|
31
|
+
interface IngestFeedback {
|
|
32
|
+
queueDepth: number;
|
|
33
|
+
queueCapacity: number;
|
|
34
|
+
acceptMore: boolean;
|
|
35
|
+
retryAfterMs: number;
|
|
36
|
+
processingTimeUs: number;
|
|
37
|
+
nodesAccepted: number;
|
|
38
|
+
nodesRejected: number;
|
|
39
|
+
tokensIngested: number;
|
|
40
|
+
tokenBurstLimit: number;
|
|
41
|
+
walDepth?: number;
|
|
42
|
+
walCapacity?: number;
|
|
43
|
+
}
|
|
28
44
|
export declare class IngestQueue {
|
|
29
45
|
private readonly queue;
|
|
30
46
|
private readonly rpcCall;
|
|
@@ -32,7 +48,7 @@ export declare class IngestQueue {
|
|
|
32
48
|
private readonly options;
|
|
33
49
|
private running;
|
|
34
50
|
constructor(rpcCall: <T>(method: string, params: unknown) => Promise<T>, logger: LoggerLike, options?: Partial<IngestQueueOptions>);
|
|
35
|
-
enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode"
|
|
51
|
+
enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">, maxChunkTokens?: number): Promise<IngestFeedback | undefined>;
|
|
36
52
|
private ingestWithRetry;
|
|
37
53
|
enqueueDelete(sourceDoc: string): Promise<void>;
|
|
38
54
|
}
|
package/dist/ingest-queue.js
CHANGED
|
@@ -19,40 +19,59 @@ export class IngestQueue {
|
|
|
19
19
|
this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
|
-
async enqueueIngest(sourceDoc, text, baseParams) {
|
|
22
|
+
async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
|
|
23
23
|
if (this.options.chunkTokens === Infinity) {
|
|
24
|
-
|
|
25
|
-
return this.ingestWithRetry({
|
|
24
|
+
const resp = await this.ingestWithRetry({
|
|
26
25
|
...baseParams,
|
|
27
26
|
sourceDoc,
|
|
28
27
|
text,
|
|
29
28
|
mode: IngestMode.REPLACE,
|
|
30
29
|
});
|
|
30
|
+
return resp.feedback;
|
|
31
31
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
}
|
|
41
|
-
// Multiple chunks: clear the source once, then append the remaining chunks.
|
|
42
|
-
// Sending REPLACE last deletes the earlier chunks from the same source_doc.
|
|
43
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
44
|
-
const isFirst = i === 0;
|
|
32
|
+
let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
|
|
33
|
+
let offset = 0;
|
|
34
|
+
let isFirst = true;
|
|
35
|
+
let lastFeedback;
|
|
36
|
+
while (offset < text.length) {
|
|
37
|
+
const remainingText = text.slice(offset);
|
|
38
|
+
const chunks = splitIntoChunks(remainingText, currentLimit);
|
|
39
|
+
const chunkText = chunks[0].text;
|
|
45
40
|
const chunkParams = {
|
|
46
41
|
...baseParams,
|
|
47
42
|
sourceDoc,
|
|
48
|
-
text:
|
|
43
|
+
text: chunkText,
|
|
49
44
|
mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND,
|
|
50
45
|
};
|
|
51
|
-
await this.ingestWithRetry(chunkParams);
|
|
46
|
+
const resp = await this.ingestWithRetry(chunkParams);
|
|
47
|
+
lastFeedback = resp.feedback;
|
|
48
|
+
if (lastFeedback &&
|
|
49
|
+
lastFeedback.nodesAccepted === 0 &&
|
|
50
|
+
lastFeedback.tokenBurstLimit &&
|
|
51
|
+
lastFeedback.tokenBurstLimit > 0 &&
|
|
52
|
+
lastFeedback.tokenBurstLimit < currentLimit) {
|
|
53
|
+
currentLimit = lastFeedback.tokenBurstLimit;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0) {
|
|
57
|
+
this.logger.warn?.(`[ingest-queue] Chunk permanently rejected for ${sourceDoc} ` +
|
|
58
|
+
`at offset=${offset} length=${chunkText.length} ` +
|
|
59
|
+
`tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`);
|
|
60
|
+
}
|
|
61
|
+
if (this.options.onChunkFeedback && lastFeedback) {
|
|
62
|
+
this.options.onChunkFeedback(lastFeedback);
|
|
63
|
+
}
|
|
64
|
+
offset += chunkText.length;
|
|
65
|
+
isFirst = false;
|
|
66
|
+
if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
|
|
67
|
+
const delay = lastFeedback.retryAfterMs || 1000;
|
|
68
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
69
|
+
}
|
|
52
70
|
}
|
|
71
|
+
return lastFeedback;
|
|
53
72
|
}
|
|
54
73
|
async ingestWithRetry(params) {
|
|
55
|
-
|
|
74
|
+
return withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
|
|
56
75
|
}
|
|
57
76
|
async enqueueDelete(sourceDoc) {
|
|
58
77
|
await withRetry(() => this.rpcCall("delete_authored_document", { sourceDoc }), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `delete_authored_document(${sourceDoc})`);
|
|
@@ -14,14 +14,22 @@ interface FsDirentLike {
|
|
|
14
14
|
interface FsWatcherLike extends Disposable {
|
|
15
15
|
on(event: "error", handler: (error: Error) => void): void;
|
|
16
16
|
}
|
|
17
|
+
interface FsReadStream {
|
|
18
|
+
read(buffer: Uint8Array): Promise<{
|
|
19
|
+
bytesRead: number;
|
|
20
|
+
}>;
|
|
21
|
+
close(): Promise<void>;
|
|
22
|
+
}
|
|
17
23
|
interface FsApi {
|
|
18
24
|
readdir(dir: string): Promise<FsDirentLike[]>;
|
|
19
25
|
readFile(file: string): Promise<Uint8Array>;
|
|
20
26
|
stat(file: string): Promise<{
|
|
21
27
|
size: number;
|
|
22
28
|
mtimeMs: number;
|
|
29
|
+
ctimeMs: number;
|
|
23
30
|
}>;
|
|
24
31
|
watch(dir: string, onChange: (event: string, filename: string | Buffer | null) => void): FsWatcherLike;
|
|
32
|
+
openReadStream(file: string): Promise<FsReadStream>;
|
|
25
33
|
}
|
|
26
34
|
export interface MarkdownSourceAdapter {
|
|
27
35
|
kind: string;
|
package/dist/markdown-ingest.js
CHANGED
|
@@ -2,13 +2,13 @@ import fs from "node:fs";
|
|
|
2
2
|
import fsp from "node:fs/promises";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
|
-
import { hashBytes } from "./markdown-hash.js";
|
|
6
5
|
import { formatError } from "./format-error.js";
|
|
7
6
|
import { IngestQueue } from "./ingest-queue.js";
|
|
8
7
|
const DEFAULT_DEBOUNCE_MS = 150;
|
|
9
8
|
const DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
|
|
10
9
|
const MARKDOWN_INGEST_VERSION = 3;
|
|
11
10
|
const HASH_BACKEND = "wasm-fnv1a64";
|
|
11
|
+
const STREAM_CHUNK_BYTES = 64 * 1024;
|
|
12
12
|
export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi()) {
|
|
13
13
|
const adapters = [];
|
|
14
14
|
const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
|
|
@@ -19,6 +19,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
|
|
|
19
19
|
exclude: cfg.markdownIngestionExclude,
|
|
20
20
|
debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
|
|
21
21
|
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
|
|
22
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
23
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
|
|
22
24
|
}, getRpc, logger, fsApi));
|
|
23
25
|
}
|
|
24
26
|
const obsidianRoots = normalizeMarkdownRoots(cfg.markdownIngestionObsidianRoots);
|
|
@@ -29,6 +31,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
|
|
|
29
31
|
exclude: cfg.markdownIngestionObsidianExclude,
|
|
30
32
|
debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
|
|
31
33
|
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
|
|
34
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
35
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
|
|
32
36
|
}, getRpc, logger, fsApi));
|
|
33
37
|
}
|
|
34
38
|
if (adapters.length === 0) {
|
|
@@ -77,6 +81,8 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
77
81
|
getRpc;
|
|
78
82
|
logger;
|
|
79
83
|
snapshotPath;
|
|
84
|
+
priorityMode;
|
|
85
|
+
maxTokensPerFile;
|
|
80
86
|
states = new Map();
|
|
81
87
|
fileStates = new Map();
|
|
82
88
|
activeScans = new Set();
|
|
@@ -85,6 +91,17 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
85
91
|
started = false;
|
|
86
92
|
ingestQueue = null;
|
|
87
93
|
stopping = false;
|
|
94
|
+
lastAcceptMore = true;
|
|
95
|
+
lastRetryAfterMs = 0;
|
|
96
|
+
lastQueueDepth = 0;
|
|
97
|
+
lastQueueCapacity = 0;
|
|
98
|
+
lastProcessingTimeUs = 0;
|
|
99
|
+
lastNodesAccepted = 0;
|
|
100
|
+
lastNodesRejected = 0;
|
|
101
|
+
lastTokensIngested = 0;
|
|
102
|
+
lastTokenBurstLimit = 512;
|
|
103
|
+
lastWalDepth = 0;
|
|
104
|
+
lastWalCapacity = 0;
|
|
88
105
|
snapshotLoaded = false;
|
|
89
106
|
snapshotDirty = false;
|
|
90
107
|
constructor(kind, config, getRpc, logger, fsApi) {
|
|
@@ -97,6 +114,8 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
97
114
|
this.getRpc = getRpc;
|
|
98
115
|
this.logger = logger;
|
|
99
116
|
this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
|
|
117
|
+
this.priorityMode = config.priorityMode ?? "mtime";
|
|
118
|
+
this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128_000));
|
|
100
119
|
this.tokenizerId = DEFAULT_TOKENIZER_ID;
|
|
101
120
|
this.coreDoc = true;
|
|
102
121
|
}
|
|
@@ -150,6 +169,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
150
169
|
scanning: false,
|
|
151
170
|
dirty: false,
|
|
152
171
|
timer: null,
|
|
172
|
+
resumeFromPath: null,
|
|
153
173
|
},
|
|
154
174
|
knownFiles: this.snapshotFilesForRoot(resolved),
|
|
155
175
|
directoryWatchers: new Map(),
|
|
@@ -167,12 +187,16 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
167
187
|
return;
|
|
168
188
|
}
|
|
169
189
|
rootState.scanState.scanning = true;
|
|
190
|
+
this.lastAcceptMore = true;
|
|
191
|
+
this.lastRetryAfterMs = 0;
|
|
170
192
|
const scan = (async () => {
|
|
171
193
|
const stats = createScanStats();
|
|
172
194
|
const startedAt = Date.now();
|
|
173
195
|
try {
|
|
174
196
|
const currentFiles = new Set();
|
|
175
|
-
|
|
197
|
+
const candidates = [];
|
|
198
|
+
await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
|
|
199
|
+
await this.syncCandidates(rootState, candidates, stats);
|
|
176
200
|
if (!this.stopping) {
|
|
177
201
|
await this.pruneDeletedFiles(rootState, currentFiles, stats);
|
|
178
202
|
rootState.knownFiles = currentFiles;
|
|
@@ -198,7 +222,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
198
222
|
this.activeScans.delete(scan);
|
|
199
223
|
}
|
|
200
224
|
}
|
|
201
|
-
scheduleRootScan(rootState) {
|
|
225
|
+
scheduleRootScan(rootState, delayMs) {
|
|
202
226
|
if (!this.started || this.stopping) {
|
|
203
227
|
return;
|
|
204
228
|
}
|
|
@@ -214,9 +238,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
214
238
|
void this.scanRoot(rootState.root).catch((error) => {
|
|
215
239
|
this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
|
|
216
240
|
});
|
|
217
|
-
}, this.debounceMs);
|
|
241
|
+
}, Math.max(this.debounceMs, delayMs ?? 0));
|
|
218
242
|
}
|
|
219
|
-
async walkDirectory(rootState, dir, currentFiles, stats) {
|
|
243
|
+
async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
|
|
220
244
|
if (this.shouldPruneDirectory(rootState.root, dir)) {
|
|
221
245
|
stats.directoriesPruned++;
|
|
222
246
|
return;
|
|
@@ -240,7 +264,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
240
264
|
}
|
|
241
265
|
const child = path.join(dir, entry.name);
|
|
242
266
|
if (entry.isDirectory()) {
|
|
243
|
-
await this.walkDirectory(rootState, child, currentFiles, stats);
|
|
267
|
+
await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
|
|
244
268
|
continue;
|
|
245
269
|
}
|
|
246
270
|
if (!entry.isFile() || !isMarkdownFile(entry.name)) {
|
|
@@ -253,17 +277,74 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
253
277
|
}
|
|
254
278
|
stats.filesIncluded++;
|
|
255
279
|
currentFiles.add(child);
|
|
280
|
+
const stat = await this.safeStatWithCtime(child);
|
|
281
|
+
if (!stat) {
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
async syncCandidates(rootState, candidates, stats) {
|
|
288
|
+
const sorted = sortCandidates(candidates, this.priorityMode);
|
|
289
|
+
let skipping = false;
|
|
290
|
+
if (rootState.scanState.resumeFromPath) {
|
|
291
|
+
const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
|
|
292
|
+
if (targetExists) {
|
|
293
|
+
skipping = true;
|
|
294
|
+
this.lastAcceptMore = true;
|
|
295
|
+
this.lastRetryAfterMs = 0;
|
|
296
|
+
}
|
|
297
|
+
else {
|
|
298
|
+
rootState.scanState.resumeFromPath = null;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
for (const candidate of sorted) {
|
|
302
|
+
if (skipping) {
|
|
303
|
+
if (candidate.path === rootState.scanState.resumeFromPath) {
|
|
304
|
+
skipping = false;
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
if (this.stopping) {
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
if (!this.lastAcceptMore) {
|
|
314
|
+
if (!this.stopping) {
|
|
315
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
316
|
+
this.scheduleRootScan(rootState, this.lastRetryAfterMs);
|
|
317
|
+
}
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
|
|
321
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
322
|
+
if (!this.stopping) {
|
|
323
|
+
this.scheduleRootScan(rootState, 2000);
|
|
324
|
+
}
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
const estimatedTokens = estimateTokens(candidate.size);
|
|
328
|
+
if (estimatedTokens > this.maxTokensPerFile) {
|
|
329
|
+
stats.filesDeferred++;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
256
332
|
try {
|
|
257
|
-
const result = await this.syncMarkdownFile(rootState,
|
|
333
|
+
const result = await this.syncMarkdownFile(rootState, candidate.path, {
|
|
334
|
+
size: candidate.size,
|
|
335
|
+
mtimeMs: candidate.mtimeMs,
|
|
336
|
+
ctimeMs: candidate.ctimeMs,
|
|
337
|
+
});
|
|
258
338
|
recordSyncResult(stats, result);
|
|
259
339
|
}
|
|
260
340
|
catch (error) {
|
|
261
341
|
stats.syncErrors++;
|
|
262
342
|
if (!this.stopping) {
|
|
263
|
-
this.logger.warn?.(`[markdown-ingest] sync failed for ${
|
|
343
|
+
this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
|
|
264
344
|
}
|
|
265
345
|
}
|
|
266
346
|
}
|
|
347
|
+
rootState.scanState.resumeFromPath = null;
|
|
267
348
|
}
|
|
268
349
|
shouldPruneDirectory(root, dir) {
|
|
269
350
|
const relative = toPosixPath(path.relative(root, dir));
|
|
@@ -284,6 +365,11 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
284
365
|
try {
|
|
285
366
|
const watcher = this.fsApi.watch(dir, () => {
|
|
286
367
|
if (!this.stopping) {
|
|
368
|
+
rootState.scanState.resumeFromPath = null;
|
|
369
|
+
if (rootState.scanState.timer) {
|
|
370
|
+
clearTimeout(rootState.scanState.timer);
|
|
371
|
+
rootState.scanState.timer = null;
|
|
372
|
+
}
|
|
287
373
|
this.scheduleRootScan(rootState);
|
|
288
374
|
}
|
|
289
375
|
});
|
|
@@ -335,10 +421,10 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
335
421
|
stats.filesDeleted++;
|
|
336
422
|
}
|
|
337
423
|
}
|
|
338
|
-
async syncMarkdownFile(rootState, filePath) {
|
|
424
|
+
async syncMarkdownFile(rootState, filePath, initialStat) {
|
|
339
425
|
const sourceDoc = filePath;
|
|
340
426
|
const relativePath = toPosixPath(path.relative(rootState.root, filePath));
|
|
341
|
-
const stat = await this.
|
|
427
|
+
const stat = initialStat ?? (await this.safeStatWithCtime(filePath));
|
|
342
428
|
if (!stat) {
|
|
343
429
|
await this.deleteSourceDocument(sourceDoc);
|
|
344
430
|
this.fileStates.delete(sourceDoc);
|
|
@@ -349,14 +435,18 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
349
435
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
|
|
350
436
|
return "unchanged";
|
|
351
437
|
}
|
|
352
|
-
const
|
|
353
|
-
|
|
438
|
+
const maxBytes = this.maxTokensPerFile * 4 + 3;
|
|
439
|
+
const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
|
|
440
|
+
if (streamed === "too_large") {
|
|
441
|
+
return "skipped";
|
|
442
|
+
}
|
|
443
|
+
if (!streamed) {
|
|
354
444
|
await this.deleteSourceDocument(sourceDoc);
|
|
355
445
|
this.fileStates.delete(sourceDoc);
|
|
356
446
|
this.snapshotDirty = true;
|
|
357
447
|
return "deleted";
|
|
358
448
|
}
|
|
359
|
-
const fileHash =
|
|
449
|
+
const { text, fileHash } = streamed;
|
|
360
450
|
if (cached && cached.fileHash === fileHash) {
|
|
361
451
|
this.setFileState(sourceDoc, {
|
|
362
452
|
root: rootState.root,
|
|
@@ -368,14 +458,13 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
368
458
|
});
|
|
369
459
|
return "unchanged";
|
|
370
460
|
}
|
|
371
|
-
const text = textDecoder.decode(bytes);
|
|
372
461
|
if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
|
|
373
462
|
await this.deleteSourceDocument(sourceDoc);
|
|
374
463
|
this.fileStates.delete(sourceDoc);
|
|
375
464
|
this.snapshotDirty = true;
|
|
376
465
|
return "skipped";
|
|
377
466
|
}
|
|
378
|
-
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
|
|
467
|
+
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
|
|
379
468
|
this.setFileState(sourceDoc, {
|
|
380
469
|
root: rootState.root,
|
|
381
470
|
sourceDoc,
|
|
@@ -390,9 +479,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
390
479
|
this.fileStates.set(sourceDoc, state);
|
|
391
480
|
this.snapshotDirty = true;
|
|
392
481
|
}
|
|
393
|
-
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
|
|
482
|
+
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
|
|
394
483
|
const queue = await this.getIngestQueue();
|
|
395
|
-
await queue.enqueueIngest(sourceDoc, text, {
|
|
484
|
+
const feedback = await queue.enqueueIngest(sourceDoc, text, {
|
|
396
485
|
tokenizerId: this.tokenizerId,
|
|
397
486
|
coreDoc: this.coreDoc,
|
|
398
487
|
sourceMeta: {
|
|
@@ -402,10 +491,44 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
402
491
|
fileHash,
|
|
403
492
|
sourceSize,
|
|
404
493
|
sourceMtimeMs: Math.trunc(sourceMtimeMs),
|
|
494
|
+
sourceCtimeMs: Math.trunc(sourceCtimeMs),
|
|
405
495
|
ingestVersion: MARKDOWN_INGEST_VERSION,
|
|
406
496
|
hashBackend: HASH_BACKEND,
|
|
407
497
|
},
|
|
408
|
-
});
|
|
498
|
+
}, this.lastTokenBurstLimit);
|
|
499
|
+
this.applyIngestFeedback(feedback);
|
|
500
|
+
}
|
|
501
|
+
applyIngestFeedback(feedback) {
|
|
502
|
+
if (feedback && typeof feedback.acceptMore === "boolean") {
|
|
503
|
+
this.lastAcceptMore = feedback.acceptMore;
|
|
504
|
+
this.lastQueueDepth = feedback.queueDepth ?? 0;
|
|
505
|
+
this.lastQueueCapacity = feedback.queueCapacity ?? 0;
|
|
506
|
+
this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
|
|
507
|
+
this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
|
|
508
|
+
this.lastNodesRejected = feedback.nodesRejected ?? 0;
|
|
509
|
+
this.lastTokensIngested = feedback.tokensIngested ?? 0;
|
|
510
|
+
if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
|
|
511
|
+
this.lastTokenBurstLimit = feedback.tokenBurstLimit;
|
|
512
|
+
}
|
|
513
|
+
this.lastWalDepth = feedback.walDepth ?? 0;
|
|
514
|
+
this.lastWalCapacity = feedback.walCapacity ?? 0;
|
|
515
|
+
if (feedback.acceptMore) {
|
|
516
|
+
this.lastRetryAfterMs = 0;
|
|
517
|
+
}
|
|
518
|
+
else {
|
|
519
|
+
this.lastRetryAfterMs = feedback.retryAfterMs || 1000;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
else {
|
|
523
|
+
this.lastAcceptMore = true;
|
|
524
|
+
this.lastRetryAfterMs = 0;
|
|
525
|
+
this.lastQueueDepth = 0;
|
|
526
|
+
this.lastQueueCapacity = 0;
|
|
527
|
+
this.lastProcessingTimeUs = 0;
|
|
528
|
+
this.lastNodesAccepted = 0;
|
|
529
|
+
this.lastNodesRejected = 0;
|
|
530
|
+
this.lastTokensIngested = 0;
|
|
531
|
+
}
|
|
409
532
|
}
|
|
410
533
|
async deleteSourceDocument(sourceDoc) {
|
|
411
534
|
const queue = await this.getIngestQueue();
|
|
@@ -414,7 +537,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
414
537
|
async getIngestQueue() {
|
|
415
538
|
if (!this.ingestQueue) {
|
|
416
539
|
const rpc = await this.getRpc();
|
|
417
|
-
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger
|
|
540
|
+
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
|
|
541
|
+
onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback),
|
|
542
|
+
});
|
|
418
543
|
}
|
|
419
544
|
return this.ingestQueue;
|
|
420
545
|
}
|
|
@@ -426,14 +551,51 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
426
551
|
return null;
|
|
427
552
|
}
|
|
428
553
|
}
|
|
429
|
-
async
|
|
554
|
+
async safeStatWithCtime(filePath) {
|
|
430
555
|
try {
|
|
431
|
-
return await this.fsApi.
|
|
556
|
+
return await this.fsApi.stat(filePath);
|
|
432
557
|
}
|
|
433
558
|
catch {
|
|
434
559
|
return null;
|
|
435
560
|
}
|
|
436
561
|
}
|
|
562
|
+
async safeReadFileStreamed(filePath, maxBytes) {
|
|
563
|
+
let stream = null;
|
|
564
|
+
try {
|
|
565
|
+
stream = await this.fsApi.openReadStream(filePath);
|
|
566
|
+
const decoder = new TextDecoder();
|
|
567
|
+
const chunks = [];
|
|
568
|
+
let hash = 0xcbf29ce484222325n;
|
|
569
|
+
let total = 0;
|
|
570
|
+
const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
|
|
571
|
+
while (true) {
|
|
572
|
+
const { bytesRead } = await stream.read(buffer);
|
|
573
|
+
if (bytesRead === 0) {
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
total += bytesRead;
|
|
577
|
+
if (total > maxBytes) {
|
|
578
|
+
return "too_large";
|
|
579
|
+
}
|
|
580
|
+
const chunk = buffer.subarray(0, bytesRead);
|
|
581
|
+
hash = updateFnv1a64(hash, chunk);
|
|
582
|
+
chunks.push(decoder.decode(chunk, { stream: true }));
|
|
583
|
+
}
|
|
584
|
+
chunks.push(decoder.decode());
|
|
585
|
+
return {
|
|
586
|
+
text: chunks.join(""),
|
|
587
|
+
fileHash: hash.toString(16).padStart(16, "0"),
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
catch {
|
|
591
|
+
return null;
|
|
592
|
+
}
|
|
593
|
+
finally {
|
|
594
|
+
if (stream) {
|
|
595
|
+
await stream.close().catch(() => { });
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
437
599
|
snapshotFilesForRoot(root) {
|
|
438
600
|
const files = new Set();
|
|
439
601
|
for (const state of this.fileStates.values()) {
|
|
@@ -497,7 +659,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
497
659
|
}
|
|
498
660
|
}
|
|
499
661
|
logScanStats(root, stats, durationMs) {
|
|
500
|
-
this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`);
|
|
662
|
+
this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`);
|
|
501
663
|
}
|
|
502
664
|
}
|
|
503
665
|
function createScanStats() {
|
|
@@ -511,8 +673,26 @@ function createScanStats() {
|
|
|
511
673
|
filesIngested: 0,
|
|
512
674
|
filesDeleted: 0,
|
|
513
675
|
syncErrors: 0,
|
|
676
|
+
filesDeferred: 0,
|
|
514
677
|
};
|
|
515
678
|
}
|
|
679
|
+
function estimateTokens(size) {
|
|
680
|
+
return Math.max(1, Math.floor(size / 4));
|
|
681
|
+
}
|
|
682
|
+
function sortCandidates(candidates, mode) {
|
|
683
|
+
return [...candidates].sort((left, right) => {
|
|
684
|
+
if (mode === "size") {
|
|
685
|
+
return right.size - left.size || left.ordinal - right.ordinal;
|
|
686
|
+
}
|
|
687
|
+
if (mode === "ctime") {
|
|
688
|
+
return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
|
|
689
|
+
}
|
|
690
|
+
if (mode === "fifo") {
|
|
691
|
+
return left.ordinal - right.ordinal;
|
|
692
|
+
}
|
|
693
|
+
return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
|
|
694
|
+
});
|
|
695
|
+
}
|
|
516
696
|
function recordSyncResult(stats, result) {
|
|
517
697
|
if (result === "ingested") {
|
|
518
698
|
stats.filesIngested++;
|
|
@@ -530,7 +710,6 @@ function recordSyncResult(stats, result) {
|
|
|
530
710
|
function toPosixPath(value) {
|
|
531
711
|
return value.split(path.sep).join("/");
|
|
532
712
|
}
|
|
533
|
-
const textDecoder = new TextDecoder();
|
|
534
713
|
function normalizeMarkdownRoots(roots) {
|
|
535
714
|
if (!roots?.length) {
|
|
536
715
|
return [];
|
|
@@ -545,6 +724,15 @@ function normalizeMarkdownRoots(roots) {
|
|
|
545
724
|
}
|
|
546
725
|
return [...resolved];
|
|
547
726
|
}
|
|
727
|
+
function updateFnv1a64(seed, bytes) {
|
|
728
|
+
let hash = seed;
|
|
729
|
+
const prime = 0x100000001b3n;
|
|
730
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
731
|
+
hash ^= BigInt(bytes[i] ?? 0);
|
|
732
|
+
hash = BigInt.asUintN(64, hash * prime);
|
|
733
|
+
}
|
|
734
|
+
return hash;
|
|
735
|
+
}
|
|
548
736
|
function resolveMarkdownSnapshotPath(kind, configuredPath) {
|
|
549
737
|
const trimmed = configuredPath?.trim();
|
|
550
738
|
if (trimmed) {
|
|
@@ -561,10 +749,22 @@ function createRealFsApi() {
|
|
|
561
749
|
readdir: async (dir) => fsp.readdir(dir, { withFileTypes: true }),
|
|
562
750
|
readFile: async (file) => fsp.readFile(file),
|
|
563
751
|
stat: async (file) => {
|
|
564
|
-
const
|
|
565
|
-
return { size:
|
|
752
|
+
const s = await fsp.stat(file);
|
|
753
|
+
return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
|
|
566
754
|
},
|
|
567
755
|
watch: (dir, onChange) => fs.watch(dir, onChange),
|
|
756
|
+
openReadStream: async (file) => {
|
|
757
|
+
const handle = await fsp.open(file, "r");
|
|
758
|
+
return {
|
|
759
|
+
read: async (buffer) => {
|
|
760
|
+
const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
|
|
761
|
+
return { bytesRead };
|
|
762
|
+
},
|
|
763
|
+
close: async () => {
|
|
764
|
+
await handle.close();
|
|
765
|
+
},
|
|
766
|
+
};
|
|
767
|
+
},
|
|
568
768
|
};
|
|
569
769
|
}
|
|
570
770
|
function isMarkdownFile(fileName) {
|
package/dist/types.d.ts
CHANGED
|
@@ -48,6 +48,8 @@ export interface PluginConfig {
|
|
|
48
48
|
markdownIngestionInclude?: string[];
|
|
49
49
|
markdownIngestionExclude?: string[];
|
|
50
50
|
markdownIngestionDebounceMs?: number;
|
|
51
|
+
markdownIngestionPriorityMode?: "mtime" | "ctime" | "size" | "fifo";
|
|
52
|
+
markdownIngestionMaxTokensPerFile?: number;
|
|
51
53
|
markdownIngestionSnapshotPath?: string;
|
|
52
54
|
markdownIngestionObsidianSnapshotPath?: string;
|
|
53
55
|
dreamPromotionEnabled?: boolean;
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "libravdb-memory",
|
|
3
3
|
"name": "LibraVDB Memory",
|
|
4
4
|
"description": "Persistent vector memory with three-tier hybrid scoring",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.5",
|
|
6
6
|
"kind": [
|
|
7
7
|
"memory",
|
|
8
8
|
"context-engine"
|
|
@@ -263,6 +263,20 @@
|
|
|
263
263
|
"type": "number",
|
|
264
264
|
"default": 150
|
|
265
265
|
},
|
|
266
|
+
"markdownIngestionPriorityMode": {
|
|
267
|
+
"type": "string",
|
|
268
|
+
"enum": [
|
|
269
|
+
"mtime",
|
|
270
|
+
"ctime",
|
|
271
|
+
"size",
|
|
272
|
+
"fifo"
|
|
273
|
+
],
|
|
274
|
+
"default": "mtime"
|
|
275
|
+
},
|
|
276
|
+
"markdownIngestionMaxTokensPerFile": {
|
|
277
|
+
"type": "number",
|
|
278
|
+
"default": 128000
|
|
279
|
+
},
|
|
266
280
|
"markdownIngestionSnapshotPath": {
|
|
267
281
|
"type": "string"
|
|
268
282
|
},
|