@xdarkicex/openclaw-memory-libravdb 1.5.4 → 1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/context-engine.js +3 -3
- package/dist/index.js +266 -49
- package/dist/ingest-queue.d.ts +17 -1
- package/dist/ingest-queue.js +38 -19
- package/dist/markdown-ingest.d.ts +8 -0
- package/dist/markdown-ingest.js +225 -25
- package/dist/types.d.ts +2 -0
- package/openclaw.plugin.json +15 -1
- package/package.json +1 -1
package/dist/context-engine.js
CHANGED
|
@@ -560,9 +560,9 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
560
560
|
};
|
|
561
561
|
}
|
|
562
562
|
function isGrpcAuthConfigured() {
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
563
|
+
const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
|
|
564
|
+
const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
|
|
565
|
+
return (typeof secret === "string" && secret.length > 0) || (typeof secretFile === "string" && secretFile.length > 0);
|
|
566
566
|
}
|
|
567
567
|
function buildGrpcAuthInitializationError(error) {
|
|
568
568
|
const code = typeof error?.code === "number" ||
|
package/dist/index.js
CHANGED
|
@@ -839,14 +839,14 @@ var require_binary_encoding = __commonJS({
|
|
|
839
839
|
};
|
|
840
840
|
exports2.BinaryWriter = BinaryWriter;
|
|
841
841
|
var BinaryReader = class {
|
|
842
|
-
constructor(buf,
|
|
842
|
+
constructor(buf, textDecoder2) {
|
|
843
843
|
this.varint64 = varint_js_1.varint64read;
|
|
844
844
|
this.uint32 = varint_js_1.varint32read;
|
|
845
845
|
this.buf = buf;
|
|
846
846
|
this.len = buf.length;
|
|
847
847
|
this.pos = 0;
|
|
848
848
|
this.view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
|
|
849
|
-
this.textDecoder =
|
|
849
|
+
this.textDecoder = textDecoder2 !== null && textDecoder2 !== void 0 ? textDecoder2 : new TextDecoder();
|
|
850
850
|
}
|
|
851
851
|
/**
|
|
852
852
|
* Reads a tag - field number and wire type.
|
|
@@ -34241,7 +34241,9 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
|
|
|
34241
34241
|
};
|
|
34242
34242
|
}
|
|
34243
34243
|
function isGrpcAuthConfigured() {
|
|
34244
|
-
|
|
34244
|
+
const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
|
|
34245
|
+
const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
|
|
34246
|
+
return typeof secret === "string" && secret.length > 0 || typeof secretFile === "string" && secretFile.length > 0;
|
|
34245
34247
|
}
|
|
34246
34248
|
function buildGrpcAuthInitializationError(error) {
|
|
34247
34249
|
const code = typeof error?.code === "number" || typeof error?.code === "string" ? ` code=${String(error.code)}` : "";
|
|
@@ -38535,37 +38537,55 @@ var IngestQueue = class {
|
|
|
38535
38537
|
this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
|
|
38536
38538
|
}
|
|
38537
38539
|
}
|
|
38538
|
-
async enqueueIngest(sourceDoc, text, baseParams) {
|
|
38540
|
+
async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
|
|
38539
38541
|
if (this.options.chunkTokens === Infinity) {
|
|
38540
|
-
|
|
38542
|
+
const resp = await this.ingestWithRetry({
|
|
38541
38543
|
...baseParams,
|
|
38542
38544
|
sourceDoc,
|
|
38543
38545
|
text,
|
|
38544
38546
|
mode: IngestMode.REPLACE
|
|
38545
38547
|
});
|
|
38546
|
-
|
|
38547
|
-
|
|
38548
|
-
|
|
38549
|
-
|
|
38550
|
-
|
|
38551
|
-
|
|
38552
|
-
|
|
38553
|
-
|
|
38554
|
-
|
|
38555
|
-
|
|
38556
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
38557
|
-
const isFirst = i === 0;
|
|
38548
|
+
return resp.feedback;
|
|
38549
|
+
}
|
|
38550
|
+
let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
|
|
38551
|
+
let offset = 0;
|
|
38552
|
+
let isFirst = true;
|
|
38553
|
+
let lastFeedback;
|
|
38554
|
+
while (offset < text.length) {
|
|
38555
|
+
const remainingText = text.slice(offset);
|
|
38556
|
+
const chunks = splitIntoChunks(remainingText, currentLimit);
|
|
38557
|
+
const chunkText = chunks[0].text;
|
|
38558
38558
|
const chunkParams = {
|
|
38559
38559
|
...baseParams,
|
|
38560
38560
|
sourceDoc,
|
|
38561
|
-
text:
|
|
38561
|
+
text: chunkText,
|
|
38562
38562
|
mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND
|
|
38563
38563
|
};
|
|
38564
|
-
await this.ingestWithRetry(chunkParams);
|
|
38564
|
+
const resp = await this.ingestWithRetry(chunkParams);
|
|
38565
|
+
lastFeedback = resp.feedback;
|
|
38566
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0 && lastFeedback.tokenBurstLimit && lastFeedback.tokenBurstLimit > 0 && lastFeedback.tokenBurstLimit < currentLimit) {
|
|
38567
|
+
currentLimit = lastFeedback.tokenBurstLimit;
|
|
38568
|
+
continue;
|
|
38569
|
+
}
|
|
38570
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0) {
|
|
38571
|
+
this.logger.warn?.(
|
|
38572
|
+
`[ingest-queue] Chunk permanently rejected for ${sourceDoc} at offset=${offset} length=${chunkText.length} tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`
|
|
38573
|
+
);
|
|
38574
|
+
}
|
|
38575
|
+
if (this.options.onChunkFeedback && lastFeedback) {
|
|
38576
|
+
this.options.onChunkFeedback(lastFeedback);
|
|
38577
|
+
}
|
|
38578
|
+
offset += chunkText.length;
|
|
38579
|
+
isFirst = false;
|
|
38580
|
+
if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
|
|
38581
|
+
const delay = lastFeedback.retryAfterMs || 1e3;
|
|
38582
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
38583
|
+
}
|
|
38565
38584
|
}
|
|
38585
|
+
return lastFeedback;
|
|
38566
38586
|
}
|
|
38567
38587
|
async ingestWithRetry(params) {
|
|
38568
|
-
|
|
38588
|
+
return withRetry(
|
|
38569
38589
|
() => this.rpcCall("ingest_markdown_document", params),
|
|
38570
38590
|
this.options.maxRetries,
|
|
38571
38591
|
this.options.retryBaseDelayMs,
|
|
@@ -38656,6 +38676,7 @@ var DEFAULT_DEBOUNCE_MS2 = 150;
|
|
|
38656
38676
|
var DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
|
|
38657
38677
|
var MARKDOWN_INGEST_VERSION = 3;
|
|
38658
38678
|
var HASH_BACKEND = "wasm-fnv1a64";
|
|
38679
|
+
var STREAM_CHUNK_BYTES = 64 * 1024;
|
|
38659
38680
|
function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi2()) {
|
|
38660
38681
|
const adapters = [];
|
|
38661
38682
|
const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
|
|
@@ -38668,7 +38689,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
|
|
|
38668
38689
|
include: cfg.markdownIngestionInclude,
|
|
38669
38690
|
exclude: cfg.markdownIngestionExclude,
|
|
38670
38691
|
debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
|
|
38671
|
-
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath)
|
|
38692
|
+
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
|
|
38693
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
38694
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
|
|
38672
38695
|
},
|
|
38673
38696
|
getRpc,
|
|
38674
38697
|
logger,
|
|
@@ -38686,7 +38709,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
|
|
|
38686
38709
|
include: cfg.markdownIngestionObsidianInclude,
|
|
38687
38710
|
exclude: cfg.markdownIngestionObsidianExclude,
|
|
38688
38711
|
debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
|
|
38689
|
-
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath)
|
|
38712
|
+
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
|
|
38713
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
38714
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
|
|
38690
38715
|
},
|
|
38691
38716
|
getRpc,
|
|
38692
38717
|
logger,
|
|
@@ -38743,6 +38768,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38743
38768
|
getRpc;
|
|
38744
38769
|
logger;
|
|
38745
38770
|
snapshotPath;
|
|
38771
|
+
priorityMode;
|
|
38772
|
+
maxTokensPerFile;
|
|
38746
38773
|
states = /* @__PURE__ */ new Map();
|
|
38747
38774
|
fileStates = /* @__PURE__ */ new Map();
|
|
38748
38775
|
activeScans = /* @__PURE__ */ new Set();
|
|
@@ -38751,6 +38778,17 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38751
38778
|
started = false;
|
|
38752
38779
|
ingestQueue = null;
|
|
38753
38780
|
stopping = false;
|
|
38781
|
+
lastAcceptMore = true;
|
|
38782
|
+
lastRetryAfterMs = 0;
|
|
38783
|
+
lastQueueDepth = 0;
|
|
38784
|
+
lastQueueCapacity = 0;
|
|
38785
|
+
lastProcessingTimeUs = 0;
|
|
38786
|
+
lastNodesAccepted = 0;
|
|
38787
|
+
lastNodesRejected = 0;
|
|
38788
|
+
lastTokensIngested = 0;
|
|
38789
|
+
lastTokenBurstLimit = 512;
|
|
38790
|
+
lastWalDepth = 0;
|
|
38791
|
+
lastWalCapacity = 0;
|
|
38754
38792
|
snapshotLoaded = false;
|
|
38755
38793
|
snapshotDirty = false;
|
|
38756
38794
|
constructor(kind, config, getRpc, logger, fsApi) {
|
|
@@ -38763,6 +38801,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38763
38801
|
this.getRpc = getRpc;
|
|
38764
38802
|
this.logger = logger;
|
|
38765
38803
|
this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
|
|
38804
|
+
this.priorityMode = config.priorityMode ?? "mtime";
|
|
38805
|
+
this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128e3));
|
|
38766
38806
|
this.tokenizerId = DEFAULT_TOKENIZER_ID;
|
|
38767
38807
|
this.coreDoc = true;
|
|
38768
38808
|
}
|
|
@@ -38815,7 +38855,8 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38815
38855
|
scanState: {
|
|
38816
38856
|
scanning: false,
|
|
38817
38857
|
dirty: false,
|
|
38818
|
-
timer: null
|
|
38858
|
+
timer: null,
|
|
38859
|
+
resumeFromPath: null
|
|
38819
38860
|
},
|
|
38820
38861
|
knownFiles: this.snapshotFilesForRoot(resolved),
|
|
38821
38862
|
directoryWatchers: /* @__PURE__ */ new Map()
|
|
@@ -38833,12 +38874,16 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38833
38874
|
return;
|
|
38834
38875
|
}
|
|
38835
38876
|
rootState.scanState.scanning = true;
|
|
38877
|
+
this.lastAcceptMore = true;
|
|
38878
|
+
this.lastRetryAfterMs = 0;
|
|
38836
38879
|
const scan = (async () => {
|
|
38837
38880
|
const stats = createScanStats();
|
|
38838
38881
|
const startedAt = Date.now();
|
|
38839
38882
|
try {
|
|
38840
38883
|
const currentFiles = /* @__PURE__ */ new Set();
|
|
38841
|
-
|
|
38884
|
+
const candidates = [];
|
|
38885
|
+
await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
|
|
38886
|
+
await this.syncCandidates(rootState, candidates, stats);
|
|
38842
38887
|
if (!this.stopping) {
|
|
38843
38888
|
await this.pruneDeletedFiles(rootState, currentFiles, stats);
|
|
38844
38889
|
rootState.knownFiles = currentFiles;
|
|
@@ -38862,7 +38907,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38862
38907
|
this.activeScans.delete(scan);
|
|
38863
38908
|
}
|
|
38864
38909
|
}
|
|
38865
|
-
scheduleRootScan(rootState) {
|
|
38910
|
+
scheduleRootScan(rootState, delayMs) {
|
|
38866
38911
|
if (!this.started || this.stopping) {
|
|
38867
38912
|
return;
|
|
38868
38913
|
}
|
|
@@ -38878,9 +38923,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38878
38923
|
void this.scanRoot(rootState.root).catch((error) => {
|
|
38879
38924
|
this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
|
|
38880
38925
|
});
|
|
38881
|
-
}, this.debounceMs);
|
|
38926
|
+
}, Math.max(this.debounceMs, delayMs ?? 0));
|
|
38882
38927
|
}
|
|
38883
|
-
async walkDirectory(rootState, dir, currentFiles, stats) {
|
|
38928
|
+
async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
|
|
38884
38929
|
if (this.shouldPruneDirectory(rootState.root, dir)) {
|
|
38885
38930
|
stats.directoriesPruned++;
|
|
38886
38931
|
return;
|
|
@@ -38903,7 +38948,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38903
38948
|
}
|
|
38904
38949
|
const child = path2.join(dir, entry.name);
|
|
38905
38950
|
if (entry.isDirectory()) {
|
|
38906
|
-
await this.walkDirectory(rootState, child, currentFiles, stats);
|
|
38951
|
+
await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
|
|
38907
38952
|
continue;
|
|
38908
38953
|
}
|
|
38909
38954
|
if (!entry.isFile() || !isMarkdownFile(entry.name)) {
|
|
@@ -38916,16 +38961,71 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38916
38961
|
}
|
|
38917
38962
|
stats.filesIncluded++;
|
|
38918
38963
|
currentFiles.add(child);
|
|
38964
|
+
const stat = await this.safeStatWithCtime(child);
|
|
38965
|
+
if (!stat) {
|
|
38966
|
+
continue;
|
|
38967
|
+
}
|
|
38968
|
+
candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
|
|
38969
|
+
}
|
|
38970
|
+
}
|
|
38971
|
+
async syncCandidates(rootState, candidates, stats) {
|
|
38972
|
+
const sorted = sortCandidates(candidates, this.priorityMode);
|
|
38973
|
+
let skipping = false;
|
|
38974
|
+
if (rootState.scanState.resumeFromPath) {
|
|
38975
|
+
const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
|
|
38976
|
+
if (targetExists) {
|
|
38977
|
+
skipping = true;
|
|
38978
|
+
this.lastAcceptMore = true;
|
|
38979
|
+
this.lastRetryAfterMs = 0;
|
|
38980
|
+
} else {
|
|
38981
|
+
rootState.scanState.resumeFromPath = null;
|
|
38982
|
+
}
|
|
38983
|
+
}
|
|
38984
|
+
for (const candidate of sorted) {
|
|
38985
|
+
if (skipping) {
|
|
38986
|
+
if (candidate.path === rootState.scanState.resumeFromPath) {
|
|
38987
|
+
skipping = false;
|
|
38988
|
+
} else {
|
|
38989
|
+
continue;
|
|
38990
|
+
}
|
|
38991
|
+
}
|
|
38992
|
+
if (this.stopping) {
|
|
38993
|
+
return;
|
|
38994
|
+
}
|
|
38995
|
+
if (!this.lastAcceptMore) {
|
|
38996
|
+
if (!this.stopping) {
|
|
38997
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
38998
|
+
this.scheduleRootScan(rootState, this.lastRetryAfterMs);
|
|
38999
|
+
}
|
|
39000
|
+
return;
|
|
39001
|
+
}
|
|
39002
|
+
if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
|
|
39003
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
39004
|
+
if (!this.stopping) {
|
|
39005
|
+
this.scheduleRootScan(rootState, 2e3);
|
|
39006
|
+
}
|
|
39007
|
+
return;
|
|
39008
|
+
}
|
|
39009
|
+
const estimatedTokens = estimateTokens(candidate.size);
|
|
39010
|
+
if (estimatedTokens > this.maxTokensPerFile) {
|
|
39011
|
+
stats.filesDeferred++;
|
|
39012
|
+
continue;
|
|
39013
|
+
}
|
|
38919
39014
|
try {
|
|
38920
|
-
const result = await this.syncMarkdownFile(rootState,
|
|
39015
|
+
const result = await this.syncMarkdownFile(rootState, candidate.path, {
|
|
39016
|
+
size: candidate.size,
|
|
39017
|
+
mtimeMs: candidate.mtimeMs,
|
|
39018
|
+
ctimeMs: candidate.ctimeMs
|
|
39019
|
+
});
|
|
38921
39020
|
recordSyncResult(stats, result);
|
|
38922
39021
|
} catch (error) {
|
|
38923
39022
|
stats.syncErrors++;
|
|
38924
39023
|
if (!this.stopping) {
|
|
38925
|
-
this.logger.warn?.(`[markdown-ingest] sync failed for ${
|
|
39024
|
+
this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
|
|
38926
39025
|
}
|
|
38927
39026
|
}
|
|
38928
39027
|
}
|
|
39028
|
+
rootState.scanState.resumeFromPath = null;
|
|
38929
39029
|
}
|
|
38930
39030
|
shouldPruneDirectory(root, dir) {
|
|
38931
39031
|
const relative = toPosixPath(path2.relative(root, dir));
|
|
@@ -38946,6 +39046,11 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38946
39046
|
try {
|
|
38947
39047
|
const watcher = this.fsApi.watch(dir, () => {
|
|
38948
39048
|
if (!this.stopping) {
|
|
39049
|
+
rootState.scanState.resumeFromPath = null;
|
|
39050
|
+
if (rootState.scanState.timer) {
|
|
39051
|
+
clearTimeout(rootState.scanState.timer);
|
|
39052
|
+
rootState.scanState.timer = null;
|
|
39053
|
+
}
|
|
38949
39054
|
this.scheduleRootScan(rootState);
|
|
38950
39055
|
}
|
|
38951
39056
|
});
|
|
@@ -38996,10 +39101,10 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
38996
39101
|
stats.filesDeleted++;
|
|
38997
39102
|
}
|
|
38998
39103
|
}
|
|
38999
|
-
async syncMarkdownFile(rootState, filePath) {
|
|
39104
|
+
async syncMarkdownFile(rootState, filePath, initialStat) {
|
|
39000
39105
|
const sourceDoc = filePath;
|
|
39001
39106
|
const relativePath = toPosixPath(path2.relative(rootState.root, filePath));
|
|
39002
|
-
const stat = await this.
|
|
39107
|
+
const stat = initialStat ?? await this.safeStatWithCtime(filePath);
|
|
39003
39108
|
if (!stat) {
|
|
39004
39109
|
await this.deleteSourceDocument(sourceDoc);
|
|
39005
39110
|
this.fileStates.delete(sourceDoc);
|
|
@@ -39010,14 +39115,18 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39010
39115
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
|
|
39011
39116
|
return "unchanged";
|
|
39012
39117
|
}
|
|
39013
|
-
const
|
|
39014
|
-
|
|
39118
|
+
const maxBytes = this.maxTokensPerFile * 4 + 3;
|
|
39119
|
+
const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
|
|
39120
|
+
if (streamed === "too_large") {
|
|
39121
|
+
return "skipped";
|
|
39122
|
+
}
|
|
39123
|
+
if (!streamed) {
|
|
39015
39124
|
await this.deleteSourceDocument(sourceDoc);
|
|
39016
39125
|
this.fileStates.delete(sourceDoc);
|
|
39017
39126
|
this.snapshotDirty = true;
|
|
39018
39127
|
return "deleted";
|
|
39019
39128
|
}
|
|
39020
|
-
const fileHash =
|
|
39129
|
+
const { text, fileHash } = streamed;
|
|
39021
39130
|
if (cached && cached.fileHash === fileHash) {
|
|
39022
39131
|
this.setFileState(sourceDoc, {
|
|
39023
39132
|
root: rootState.root,
|
|
@@ -39029,14 +39138,13 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39029
39138
|
});
|
|
39030
39139
|
return "unchanged";
|
|
39031
39140
|
}
|
|
39032
|
-
const text = textDecoder2.decode(bytes);
|
|
39033
39141
|
if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
|
|
39034
39142
|
await this.deleteSourceDocument(sourceDoc);
|
|
39035
39143
|
this.fileStates.delete(sourceDoc);
|
|
39036
39144
|
this.snapshotDirty = true;
|
|
39037
39145
|
return "skipped";
|
|
39038
39146
|
}
|
|
39039
|
-
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
|
|
39147
|
+
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
|
|
39040
39148
|
this.setFileState(sourceDoc, {
|
|
39041
39149
|
root: rootState.root,
|
|
39042
39150
|
sourceDoc,
|
|
@@ -39051,9 +39159,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39051
39159
|
this.fileStates.set(sourceDoc, state);
|
|
39052
39160
|
this.snapshotDirty = true;
|
|
39053
39161
|
}
|
|
39054
|
-
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
|
|
39162
|
+
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
|
|
39055
39163
|
const queue = await this.getIngestQueue();
|
|
39056
|
-
await queue.enqueueIngest(
|
|
39164
|
+
const feedback = await queue.enqueueIngest(
|
|
39057
39165
|
sourceDoc,
|
|
39058
39166
|
text,
|
|
39059
39167
|
{
|
|
@@ -39066,11 +39174,44 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39066
39174
|
fileHash,
|
|
39067
39175
|
sourceSize,
|
|
39068
39176
|
sourceMtimeMs: Math.trunc(sourceMtimeMs),
|
|
39177
|
+
sourceCtimeMs: Math.trunc(sourceCtimeMs),
|
|
39069
39178
|
ingestVersion: MARKDOWN_INGEST_VERSION,
|
|
39070
39179
|
hashBackend: HASH_BACKEND
|
|
39071
39180
|
}
|
|
39072
|
-
}
|
|
39181
|
+
},
|
|
39182
|
+
this.lastTokenBurstLimit
|
|
39073
39183
|
);
|
|
39184
|
+
this.applyIngestFeedback(feedback);
|
|
39185
|
+
}
|
|
39186
|
+
applyIngestFeedback(feedback) {
|
|
39187
|
+
if (feedback && typeof feedback.acceptMore === "boolean") {
|
|
39188
|
+
this.lastAcceptMore = feedback.acceptMore;
|
|
39189
|
+
this.lastQueueDepth = feedback.queueDepth ?? 0;
|
|
39190
|
+
this.lastQueueCapacity = feedback.queueCapacity ?? 0;
|
|
39191
|
+
this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
|
|
39192
|
+
this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
|
|
39193
|
+
this.lastNodesRejected = feedback.nodesRejected ?? 0;
|
|
39194
|
+
this.lastTokensIngested = feedback.tokensIngested ?? 0;
|
|
39195
|
+
if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
|
|
39196
|
+
this.lastTokenBurstLimit = feedback.tokenBurstLimit;
|
|
39197
|
+
}
|
|
39198
|
+
this.lastWalDepth = feedback.walDepth ?? 0;
|
|
39199
|
+
this.lastWalCapacity = feedback.walCapacity ?? 0;
|
|
39200
|
+
if (feedback.acceptMore) {
|
|
39201
|
+
this.lastRetryAfterMs = 0;
|
|
39202
|
+
} else {
|
|
39203
|
+
this.lastRetryAfterMs = feedback.retryAfterMs || 1e3;
|
|
39204
|
+
}
|
|
39205
|
+
} else {
|
|
39206
|
+
this.lastAcceptMore = true;
|
|
39207
|
+
this.lastRetryAfterMs = 0;
|
|
39208
|
+
this.lastQueueDepth = 0;
|
|
39209
|
+
this.lastQueueCapacity = 0;
|
|
39210
|
+
this.lastProcessingTimeUs = 0;
|
|
39211
|
+
this.lastNodesAccepted = 0;
|
|
39212
|
+
this.lastNodesRejected = 0;
|
|
39213
|
+
this.lastTokensIngested = 0;
|
|
39214
|
+
}
|
|
39074
39215
|
}
|
|
39075
39216
|
async deleteSourceDocument(sourceDoc) {
|
|
39076
39217
|
const queue = await this.getIngestQueue();
|
|
@@ -39079,7 +39220,9 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39079
39220
|
async getIngestQueue() {
|
|
39080
39221
|
if (!this.ingestQueue) {
|
|
39081
39222
|
const rpc = await this.getRpc();
|
|
39082
|
-
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger
|
|
39223
|
+
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
|
|
39224
|
+
onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback)
|
|
39225
|
+
});
|
|
39083
39226
|
}
|
|
39084
39227
|
return this.ingestQueue;
|
|
39085
39228
|
}
|
|
@@ -39090,13 +39233,49 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39090
39233
|
return null;
|
|
39091
39234
|
}
|
|
39092
39235
|
}
|
|
39093
|
-
async
|
|
39236
|
+
async safeStatWithCtime(filePath) {
|
|
39094
39237
|
try {
|
|
39095
|
-
return await this.fsApi.
|
|
39238
|
+
return await this.fsApi.stat(filePath);
|
|
39096
39239
|
} catch {
|
|
39097
39240
|
return null;
|
|
39098
39241
|
}
|
|
39099
39242
|
}
|
|
39243
|
+
async safeReadFileStreamed(filePath, maxBytes) {
|
|
39244
|
+
let stream = null;
|
|
39245
|
+
try {
|
|
39246
|
+
stream = await this.fsApi.openReadStream(filePath);
|
|
39247
|
+
const decoder = new TextDecoder();
|
|
39248
|
+
const chunks = [];
|
|
39249
|
+
let hash = 0xcbf29ce484222325n;
|
|
39250
|
+
let total = 0;
|
|
39251
|
+
const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
|
|
39252
|
+
while (true) {
|
|
39253
|
+
const { bytesRead } = await stream.read(buffer);
|
|
39254
|
+
if (bytesRead === 0) {
|
|
39255
|
+
break;
|
|
39256
|
+
}
|
|
39257
|
+
total += bytesRead;
|
|
39258
|
+
if (total > maxBytes) {
|
|
39259
|
+
return "too_large";
|
|
39260
|
+
}
|
|
39261
|
+
const chunk = buffer.subarray(0, bytesRead);
|
|
39262
|
+
hash = updateFnv1a64(hash, chunk);
|
|
39263
|
+
chunks.push(decoder.decode(chunk, { stream: true }));
|
|
39264
|
+
}
|
|
39265
|
+
chunks.push(decoder.decode());
|
|
39266
|
+
return {
|
|
39267
|
+
text: chunks.join(""),
|
|
39268
|
+
fileHash: hash.toString(16).padStart(16, "0")
|
|
39269
|
+
};
|
|
39270
|
+
} catch {
|
|
39271
|
+
return null;
|
|
39272
|
+
} finally {
|
|
39273
|
+
if (stream) {
|
|
39274
|
+
await stream.close().catch(() => {
|
|
39275
|
+
});
|
|
39276
|
+
}
|
|
39277
|
+
}
|
|
39278
|
+
}
|
|
39100
39279
|
snapshotFilesForRoot(root) {
|
|
39101
39280
|
const files = /* @__PURE__ */ new Set();
|
|
39102
39281
|
for (const state of this.fileStates.values()) {
|
|
@@ -39159,7 +39338,7 @@ var DirectoryMarkdownSourceAdapter = class {
|
|
|
39159
39338
|
}
|
|
39160
39339
|
logScanStats(root, stats, durationMs) {
|
|
39161
39340
|
this.logger.info?.(
|
|
39162
|
-
`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`
|
|
39341
|
+
`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`
|
|
39163
39342
|
);
|
|
39164
39343
|
}
|
|
39165
39344
|
};
|
|
@@ -39173,9 +39352,27 @@ function createScanStats() {
|
|
|
39173
39352
|
filesUnchanged: 0,
|
|
39174
39353
|
filesIngested: 0,
|
|
39175
39354
|
filesDeleted: 0,
|
|
39176
|
-
syncErrors: 0
|
|
39355
|
+
syncErrors: 0,
|
|
39356
|
+
filesDeferred: 0
|
|
39177
39357
|
};
|
|
39178
39358
|
}
|
|
39359
|
+
function estimateTokens(size) {
|
|
39360
|
+
return Math.max(1, Math.floor(size / 4));
|
|
39361
|
+
}
|
|
39362
|
+
function sortCandidates(candidates, mode) {
|
|
39363
|
+
return [...candidates].sort((left, right) => {
|
|
39364
|
+
if (mode === "size") {
|
|
39365
|
+
return right.size - left.size || left.ordinal - right.ordinal;
|
|
39366
|
+
}
|
|
39367
|
+
if (mode === "ctime") {
|
|
39368
|
+
return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
|
|
39369
|
+
}
|
|
39370
|
+
if (mode === "fifo") {
|
|
39371
|
+
return left.ordinal - right.ordinal;
|
|
39372
|
+
}
|
|
39373
|
+
return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
|
|
39374
|
+
});
|
|
39375
|
+
}
|
|
39179
39376
|
function recordSyncResult(stats, result) {
|
|
39180
39377
|
if (result === "ingested") {
|
|
39181
39378
|
stats.filesIngested++;
|
|
@@ -39190,7 +39387,6 @@ function recordSyncResult(stats, result) {
|
|
|
39190
39387
|
function toPosixPath(value) {
|
|
39191
39388
|
return value.split(path2.sep).join("/");
|
|
39192
39389
|
}
|
|
39193
|
-
var textDecoder2 = new TextDecoder();
|
|
39194
39390
|
function normalizeMarkdownRoots(roots) {
|
|
39195
39391
|
if (!roots?.length) {
|
|
39196
39392
|
return [];
|
|
@@ -39205,6 +39401,15 @@ function normalizeMarkdownRoots(roots) {
|
|
|
39205
39401
|
}
|
|
39206
39402
|
return [...resolved];
|
|
39207
39403
|
}
|
|
39404
|
+
function updateFnv1a64(seed, bytes) {
|
|
39405
|
+
let hash = seed;
|
|
39406
|
+
const prime = 0x100000001b3n;
|
|
39407
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
39408
|
+
hash ^= BigInt(bytes[i] ?? 0);
|
|
39409
|
+
hash = BigInt.asUintN(64, hash * prime);
|
|
39410
|
+
}
|
|
39411
|
+
return hash;
|
|
39412
|
+
}
|
|
39208
39413
|
function resolveMarkdownSnapshotPath(kind, configuredPath) {
|
|
39209
39414
|
const trimmed = configuredPath?.trim();
|
|
39210
39415
|
if (trimmed) {
|
|
@@ -39221,10 +39426,22 @@ function createRealFsApi2() {
|
|
|
39221
39426
|
readdir: async (dir) => fsp2.readdir(dir, { withFileTypes: true }),
|
|
39222
39427
|
readFile: async (file) => fsp2.readFile(file),
|
|
39223
39428
|
stat: async (file) => {
|
|
39224
|
-
const
|
|
39225
|
-
return { size:
|
|
39429
|
+
const s = await fsp2.stat(file);
|
|
39430
|
+
return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
|
|
39226
39431
|
},
|
|
39227
|
-
watch: (dir, onChange) => fs2.watch(dir, onChange)
|
|
39432
|
+
watch: (dir, onChange) => fs2.watch(dir, onChange),
|
|
39433
|
+
openReadStream: async (file) => {
|
|
39434
|
+
const handle = await fsp2.open(file, "r");
|
|
39435
|
+
return {
|
|
39436
|
+
read: async (buffer) => {
|
|
39437
|
+
const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
|
|
39438
|
+
return { bytesRead };
|
|
39439
|
+
},
|
|
39440
|
+
close: async () => {
|
|
39441
|
+
await handle.close();
|
|
39442
|
+
}
|
|
39443
|
+
};
|
|
39444
|
+
}
|
|
39228
39445
|
};
|
|
39229
39446
|
}
|
|
39230
39447
|
function isMarkdownFile(fileName) {
|
package/dist/ingest-queue.d.ts
CHANGED
|
@@ -7,6 +7,8 @@ export interface IngestQueueOptions {
|
|
|
7
7
|
retryBaseDelayMs: number;
|
|
8
8
|
/** Max retries per chunk. */
|
|
9
9
|
maxRetries: number;
|
|
10
|
+
/** Called after each chunk is accepted so scan-level state stays current. */
|
|
11
|
+
onChunkFeedback?: (feedback: IngestFeedback) => void;
|
|
10
12
|
}
|
|
11
13
|
interface IngestMarkdownDocumentParams {
|
|
12
14
|
sourceDoc: string;
|
|
@@ -20,11 +22,25 @@ interface IngestMarkdownDocumentParams {
|
|
|
20
22
|
fileHash: string;
|
|
21
23
|
sourceSize: number;
|
|
22
24
|
sourceMtimeMs: number;
|
|
25
|
+
sourceCtimeMs: number;
|
|
23
26
|
ingestVersion: number;
|
|
24
27
|
hashBackend: string;
|
|
25
28
|
};
|
|
26
29
|
mode?: IngestMode;
|
|
27
30
|
}
|
|
31
|
+
interface IngestFeedback {
|
|
32
|
+
queueDepth: number;
|
|
33
|
+
queueCapacity: number;
|
|
34
|
+
acceptMore: boolean;
|
|
35
|
+
retryAfterMs: number;
|
|
36
|
+
processingTimeUs: number;
|
|
37
|
+
nodesAccepted: number;
|
|
38
|
+
nodesRejected: number;
|
|
39
|
+
tokensIngested: number;
|
|
40
|
+
tokenBurstLimit: number;
|
|
41
|
+
walDepth?: number;
|
|
42
|
+
walCapacity?: number;
|
|
43
|
+
}
|
|
28
44
|
export declare class IngestQueue {
|
|
29
45
|
private readonly queue;
|
|
30
46
|
private readonly rpcCall;
|
|
@@ -32,7 +48,7 @@ export declare class IngestQueue {
|
|
|
32
48
|
private readonly options;
|
|
33
49
|
private running;
|
|
34
50
|
constructor(rpcCall: <T>(method: string, params: unknown) => Promise<T>, logger: LoggerLike, options?: Partial<IngestQueueOptions>);
|
|
35
|
-
enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode"
|
|
51
|
+
enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">, maxChunkTokens?: number): Promise<IngestFeedback | undefined>;
|
|
36
52
|
private ingestWithRetry;
|
|
37
53
|
enqueueDelete(sourceDoc: string): Promise<void>;
|
|
38
54
|
}
|
package/dist/ingest-queue.js
CHANGED
|
@@ -19,40 +19,59 @@ export class IngestQueue {
|
|
|
19
19
|
this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
|
-
async enqueueIngest(sourceDoc, text, baseParams) {
|
|
22
|
+
async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
|
|
23
23
|
if (this.options.chunkTokens === Infinity) {
|
|
24
|
-
|
|
25
|
-
return this.ingestWithRetry({
|
|
24
|
+
const resp = await this.ingestWithRetry({
|
|
26
25
|
...baseParams,
|
|
27
26
|
sourceDoc,
|
|
28
27
|
text,
|
|
29
28
|
mode: IngestMode.REPLACE,
|
|
30
29
|
});
|
|
30
|
+
return resp.feedback;
|
|
31
31
|
}
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
}
|
|
41
|
-
// Multiple chunks: clear the source once, then append the remaining chunks.
|
|
42
|
-
// Sending REPLACE last deletes the earlier chunks from the same source_doc.
|
|
43
|
-
for (let i = 0; i < chunks.length; i++) {
|
|
44
|
-
const isFirst = i === 0;
|
|
32
|
+
let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
|
|
33
|
+
let offset = 0;
|
|
34
|
+
let isFirst = true;
|
|
35
|
+
let lastFeedback;
|
|
36
|
+
while (offset < text.length) {
|
|
37
|
+
const remainingText = text.slice(offset);
|
|
38
|
+
const chunks = splitIntoChunks(remainingText, currentLimit);
|
|
39
|
+
const chunkText = chunks[0].text;
|
|
45
40
|
const chunkParams = {
|
|
46
41
|
...baseParams,
|
|
47
42
|
sourceDoc,
|
|
48
|
-
text:
|
|
43
|
+
text: chunkText,
|
|
49
44
|
mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND,
|
|
50
45
|
};
|
|
51
|
-
await this.ingestWithRetry(chunkParams);
|
|
46
|
+
const resp = await this.ingestWithRetry(chunkParams);
|
|
47
|
+
lastFeedback = resp.feedback;
|
|
48
|
+
if (lastFeedback &&
|
|
49
|
+
lastFeedback.nodesAccepted === 0 &&
|
|
50
|
+
lastFeedback.tokenBurstLimit &&
|
|
51
|
+
lastFeedback.tokenBurstLimit > 0 &&
|
|
52
|
+
lastFeedback.tokenBurstLimit < currentLimit) {
|
|
53
|
+
currentLimit = lastFeedback.tokenBurstLimit;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (lastFeedback && lastFeedback.nodesAccepted === 0) {
|
|
57
|
+
this.logger.warn?.(`[ingest-queue] Chunk permanently rejected for ${sourceDoc} ` +
|
|
58
|
+
`at offset=${offset} length=${chunkText.length} ` +
|
|
59
|
+
`tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`);
|
|
60
|
+
}
|
|
61
|
+
if (this.options.onChunkFeedback && lastFeedback) {
|
|
62
|
+
this.options.onChunkFeedback(lastFeedback);
|
|
63
|
+
}
|
|
64
|
+
offset += chunkText.length;
|
|
65
|
+
isFirst = false;
|
|
66
|
+
if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
|
|
67
|
+
const delay = lastFeedback.retryAfterMs || 1000;
|
|
68
|
+
await new Promise((resolve) => setTimeout(resolve, delay));
|
|
69
|
+
}
|
|
52
70
|
}
|
|
71
|
+
return lastFeedback;
|
|
53
72
|
}
|
|
54
73
|
async ingestWithRetry(params) {
|
|
55
|
-
|
|
74
|
+
return withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
|
|
56
75
|
}
|
|
57
76
|
async enqueueDelete(sourceDoc) {
|
|
58
77
|
await withRetry(() => this.rpcCall("delete_authored_document", { sourceDoc }), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `delete_authored_document(${sourceDoc})`);
|
|
@@ -14,14 +14,22 @@ interface FsDirentLike {
|
|
|
14
14
|
interface FsWatcherLike extends Disposable {
|
|
15
15
|
on(event: "error", handler: (error: Error) => void): void;
|
|
16
16
|
}
|
|
17
|
+
interface FsReadStream {
|
|
18
|
+
read(buffer: Uint8Array): Promise<{
|
|
19
|
+
bytesRead: number;
|
|
20
|
+
}>;
|
|
21
|
+
close(): Promise<void>;
|
|
22
|
+
}
|
|
17
23
|
interface FsApi {
|
|
18
24
|
readdir(dir: string): Promise<FsDirentLike[]>;
|
|
19
25
|
readFile(file: string): Promise<Uint8Array>;
|
|
20
26
|
stat(file: string): Promise<{
|
|
21
27
|
size: number;
|
|
22
28
|
mtimeMs: number;
|
|
29
|
+
ctimeMs: number;
|
|
23
30
|
}>;
|
|
24
31
|
watch(dir: string, onChange: (event: string, filename: string | Buffer | null) => void): FsWatcherLike;
|
|
32
|
+
openReadStream(file: string): Promise<FsReadStream>;
|
|
25
33
|
}
|
|
26
34
|
export interface MarkdownSourceAdapter {
|
|
27
35
|
kind: string;
|
package/dist/markdown-ingest.js
CHANGED
|
@@ -2,13 +2,13 @@ import fs from "node:fs";
|
|
|
2
2
|
import fsp from "node:fs/promises";
|
|
3
3
|
import os from "node:os";
|
|
4
4
|
import path from "node:path";
|
|
5
|
-
import { hashBytes } from "./markdown-hash.js";
|
|
6
5
|
import { formatError } from "./format-error.js";
|
|
7
6
|
import { IngestQueue } from "./ingest-queue.js";
|
|
8
7
|
const DEFAULT_DEBOUNCE_MS = 150;
|
|
9
8
|
const DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
|
|
10
9
|
const MARKDOWN_INGEST_VERSION = 3;
|
|
11
10
|
const HASH_BACKEND = "wasm-fnv1a64";
|
|
11
|
+
const STREAM_CHUNK_BYTES = 64 * 1024;
|
|
12
12
|
export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi()) {
|
|
13
13
|
const adapters = [];
|
|
14
14
|
const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
|
|
@@ -19,6 +19,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
|
|
|
19
19
|
exclude: cfg.markdownIngestionExclude,
|
|
20
20
|
debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
|
|
21
21
|
snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
|
|
22
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
23
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
|
|
22
24
|
}, getRpc, logger, fsApi));
|
|
23
25
|
}
|
|
24
26
|
const obsidianRoots = normalizeMarkdownRoots(cfg.markdownIngestionObsidianRoots);
|
|
@@ -29,6 +31,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
|
|
|
29
31
|
exclude: cfg.markdownIngestionObsidianExclude,
|
|
30
32
|
debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
|
|
31
33
|
snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
|
|
34
|
+
priorityMode: cfg.markdownIngestionPriorityMode,
|
|
35
|
+
maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
|
|
32
36
|
}, getRpc, logger, fsApi));
|
|
33
37
|
}
|
|
34
38
|
if (adapters.length === 0) {
|
|
@@ -77,6 +81,8 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
77
81
|
getRpc;
|
|
78
82
|
logger;
|
|
79
83
|
snapshotPath;
|
|
84
|
+
priorityMode;
|
|
85
|
+
maxTokensPerFile;
|
|
80
86
|
states = new Map();
|
|
81
87
|
fileStates = new Map();
|
|
82
88
|
activeScans = new Set();
|
|
@@ -85,6 +91,17 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
85
91
|
started = false;
|
|
86
92
|
ingestQueue = null;
|
|
87
93
|
stopping = false;
|
|
94
|
+
lastAcceptMore = true;
|
|
95
|
+
lastRetryAfterMs = 0;
|
|
96
|
+
lastQueueDepth = 0;
|
|
97
|
+
lastQueueCapacity = 0;
|
|
98
|
+
lastProcessingTimeUs = 0;
|
|
99
|
+
lastNodesAccepted = 0;
|
|
100
|
+
lastNodesRejected = 0;
|
|
101
|
+
lastTokensIngested = 0;
|
|
102
|
+
lastTokenBurstLimit = 512;
|
|
103
|
+
lastWalDepth = 0;
|
|
104
|
+
lastWalCapacity = 0;
|
|
88
105
|
snapshotLoaded = false;
|
|
89
106
|
snapshotDirty = false;
|
|
90
107
|
constructor(kind, config, getRpc, logger, fsApi) {
|
|
@@ -97,6 +114,8 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
97
114
|
this.getRpc = getRpc;
|
|
98
115
|
this.logger = logger;
|
|
99
116
|
this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
|
|
117
|
+
this.priorityMode = config.priorityMode ?? "mtime";
|
|
118
|
+
this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128_000));
|
|
100
119
|
this.tokenizerId = DEFAULT_TOKENIZER_ID;
|
|
101
120
|
this.coreDoc = true;
|
|
102
121
|
}
|
|
@@ -150,6 +169,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
150
169
|
scanning: false,
|
|
151
170
|
dirty: false,
|
|
152
171
|
timer: null,
|
|
172
|
+
resumeFromPath: null,
|
|
153
173
|
},
|
|
154
174
|
knownFiles: this.snapshotFilesForRoot(resolved),
|
|
155
175
|
directoryWatchers: new Map(),
|
|
@@ -167,12 +187,16 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
167
187
|
return;
|
|
168
188
|
}
|
|
169
189
|
rootState.scanState.scanning = true;
|
|
190
|
+
this.lastAcceptMore = true;
|
|
191
|
+
this.lastRetryAfterMs = 0;
|
|
170
192
|
const scan = (async () => {
|
|
171
193
|
const stats = createScanStats();
|
|
172
194
|
const startedAt = Date.now();
|
|
173
195
|
try {
|
|
174
196
|
const currentFiles = new Set();
|
|
175
|
-
|
|
197
|
+
const candidates = [];
|
|
198
|
+
await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
|
|
199
|
+
await this.syncCandidates(rootState, candidates, stats);
|
|
176
200
|
if (!this.stopping) {
|
|
177
201
|
await this.pruneDeletedFiles(rootState, currentFiles, stats);
|
|
178
202
|
rootState.knownFiles = currentFiles;
|
|
@@ -198,7 +222,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
198
222
|
this.activeScans.delete(scan);
|
|
199
223
|
}
|
|
200
224
|
}
|
|
201
|
-
scheduleRootScan(rootState) {
|
|
225
|
+
scheduleRootScan(rootState, delayMs) {
|
|
202
226
|
if (!this.started || this.stopping) {
|
|
203
227
|
return;
|
|
204
228
|
}
|
|
@@ -214,9 +238,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
214
238
|
void this.scanRoot(rootState.root).catch((error) => {
|
|
215
239
|
this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
|
|
216
240
|
});
|
|
217
|
-
}, this.debounceMs);
|
|
241
|
+
}, Math.max(this.debounceMs, delayMs ?? 0));
|
|
218
242
|
}
|
|
219
|
-
async walkDirectory(rootState, dir, currentFiles, stats) {
|
|
243
|
+
async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
|
|
220
244
|
if (this.shouldPruneDirectory(rootState.root, dir)) {
|
|
221
245
|
stats.directoriesPruned++;
|
|
222
246
|
return;
|
|
@@ -240,7 +264,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
240
264
|
}
|
|
241
265
|
const child = path.join(dir, entry.name);
|
|
242
266
|
if (entry.isDirectory()) {
|
|
243
|
-
await this.walkDirectory(rootState, child, currentFiles, stats);
|
|
267
|
+
await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
|
|
244
268
|
continue;
|
|
245
269
|
}
|
|
246
270
|
if (!entry.isFile() || !isMarkdownFile(entry.name)) {
|
|
@@ -253,17 +277,74 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
253
277
|
}
|
|
254
278
|
stats.filesIncluded++;
|
|
255
279
|
currentFiles.add(child);
|
|
280
|
+
const stat = await this.safeStatWithCtime(child);
|
|
281
|
+
if (!stat) {
|
|
282
|
+
continue;
|
|
283
|
+
}
|
|
284
|
+
candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
async syncCandidates(rootState, candidates, stats) {
|
|
288
|
+
const sorted = sortCandidates(candidates, this.priorityMode);
|
|
289
|
+
let skipping = false;
|
|
290
|
+
if (rootState.scanState.resumeFromPath) {
|
|
291
|
+
const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
|
|
292
|
+
if (targetExists) {
|
|
293
|
+
skipping = true;
|
|
294
|
+
this.lastAcceptMore = true;
|
|
295
|
+
this.lastRetryAfterMs = 0;
|
|
296
|
+
}
|
|
297
|
+
else {
|
|
298
|
+
rootState.scanState.resumeFromPath = null;
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
for (const candidate of sorted) {
|
|
302
|
+
if (skipping) {
|
|
303
|
+
if (candidate.path === rootState.scanState.resumeFromPath) {
|
|
304
|
+
skipping = false;
|
|
305
|
+
}
|
|
306
|
+
else {
|
|
307
|
+
continue;
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
if (this.stopping) {
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
if (!this.lastAcceptMore) {
|
|
314
|
+
if (!this.stopping) {
|
|
315
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
316
|
+
this.scheduleRootScan(rootState, this.lastRetryAfterMs);
|
|
317
|
+
}
|
|
318
|
+
return;
|
|
319
|
+
}
|
|
320
|
+
if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
|
|
321
|
+
rootState.scanState.resumeFromPath = candidate.path;
|
|
322
|
+
if (!this.stopping) {
|
|
323
|
+
this.scheduleRootScan(rootState, 2000);
|
|
324
|
+
}
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
const estimatedTokens = estimateTokens(candidate.size);
|
|
328
|
+
if (estimatedTokens > this.maxTokensPerFile) {
|
|
329
|
+
stats.filesDeferred++;
|
|
330
|
+
continue;
|
|
331
|
+
}
|
|
256
332
|
try {
|
|
257
|
-
const result = await this.syncMarkdownFile(rootState,
|
|
333
|
+
const result = await this.syncMarkdownFile(rootState, candidate.path, {
|
|
334
|
+
size: candidate.size,
|
|
335
|
+
mtimeMs: candidate.mtimeMs,
|
|
336
|
+
ctimeMs: candidate.ctimeMs,
|
|
337
|
+
});
|
|
258
338
|
recordSyncResult(stats, result);
|
|
259
339
|
}
|
|
260
340
|
catch (error) {
|
|
261
341
|
stats.syncErrors++;
|
|
262
342
|
if (!this.stopping) {
|
|
263
|
-
this.logger.warn?.(`[markdown-ingest] sync failed for ${
|
|
343
|
+
this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
|
|
264
344
|
}
|
|
265
345
|
}
|
|
266
346
|
}
|
|
347
|
+
rootState.scanState.resumeFromPath = null;
|
|
267
348
|
}
|
|
268
349
|
shouldPruneDirectory(root, dir) {
|
|
269
350
|
const relative = toPosixPath(path.relative(root, dir));
|
|
@@ -284,6 +365,11 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
284
365
|
try {
|
|
285
366
|
const watcher = this.fsApi.watch(dir, () => {
|
|
286
367
|
if (!this.stopping) {
|
|
368
|
+
rootState.scanState.resumeFromPath = null;
|
|
369
|
+
if (rootState.scanState.timer) {
|
|
370
|
+
clearTimeout(rootState.scanState.timer);
|
|
371
|
+
rootState.scanState.timer = null;
|
|
372
|
+
}
|
|
287
373
|
this.scheduleRootScan(rootState);
|
|
288
374
|
}
|
|
289
375
|
});
|
|
@@ -335,10 +421,10 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
335
421
|
stats.filesDeleted++;
|
|
336
422
|
}
|
|
337
423
|
}
|
|
338
|
-
async syncMarkdownFile(rootState, filePath) {
|
|
424
|
+
async syncMarkdownFile(rootState, filePath, initialStat) {
|
|
339
425
|
const sourceDoc = filePath;
|
|
340
426
|
const relativePath = toPosixPath(path.relative(rootState.root, filePath));
|
|
341
|
-
const stat = await this.
|
|
427
|
+
const stat = initialStat ?? (await this.safeStatWithCtime(filePath));
|
|
342
428
|
if (!stat) {
|
|
343
429
|
await this.deleteSourceDocument(sourceDoc);
|
|
344
430
|
this.fileStates.delete(sourceDoc);
|
|
@@ -349,14 +435,18 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
349
435
|
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
|
|
350
436
|
return "unchanged";
|
|
351
437
|
}
|
|
352
|
-
const
|
|
353
|
-
|
|
438
|
+
const maxBytes = this.maxTokensPerFile * 4 + 3;
|
|
439
|
+
const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
|
|
440
|
+
if (streamed === "too_large") {
|
|
441
|
+
return "skipped";
|
|
442
|
+
}
|
|
443
|
+
if (!streamed) {
|
|
354
444
|
await this.deleteSourceDocument(sourceDoc);
|
|
355
445
|
this.fileStates.delete(sourceDoc);
|
|
356
446
|
this.snapshotDirty = true;
|
|
357
447
|
return "deleted";
|
|
358
448
|
}
|
|
359
|
-
const fileHash =
|
|
449
|
+
const { text, fileHash } = streamed;
|
|
360
450
|
if (cached && cached.fileHash === fileHash) {
|
|
361
451
|
this.setFileState(sourceDoc, {
|
|
362
452
|
root: rootState.root,
|
|
@@ -368,14 +458,13 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
368
458
|
});
|
|
369
459
|
return "unchanged";
|
|
370
460
|
}
|
|
371
|
-
const text = textDecoder.decode(bytes);
|
|
372
461
|
if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
|
|
373
462
|
await this.deleteSourceDocument(sourceDoc);
|
|
374
463
|
this.fileStates.delete(sourceDoc);
|
|
375
464
|
this.snapshotDirty = true;
|
|
376
465
|
return "skipped";
|
|
377
466
|
}
|
|
378
|
-
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
|
|
467
|
+
await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
|
|
379
468
|
this.setFileState(sourceDoc, {
|
|
380
469
|
root: rootState.root,
|
|
381
470
|
sourceDoc,
|
|
@@ -390,9 +479,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
390
479
|
this.fileStates.set(sourceDoc, state);
|
|
391
480
|
this.snapshotDirty = true;
|
|
392
481
|
}
|
|
393
|
-
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
|
|
482
|
+
async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
|
|
394
483
|
const queue = await this.getIngestQueue();
|
|
395
|
-
await queue.enqueueIngest(sourceDoc, text, {
|
|
484
|
+
const feedback = await queue.enqueueIngest(sourceDoc, text, {
|
|
396
485
|
tokenizerId: this.tokenizerId,
|
|
397
486
|
coreDoc: this.coreDoc,
|
|
398
487
|
sourceMeta: {
|
|
@@ -402,10 +491,44 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
402
491
|
fileHash,
|
|
403
492
|
sourceSize,
|
|
404
493
|
sourceMtimeMs: Math.trunc(sourceMtimeMs),
|
|
494
|
+
sourceCtimeMs: Math.trunc(sourceCtimeMs),
|
|
405
495
|
ingestVersion: MARKDOWN_INGEST_VERSION,
|
|
406
496
|
hashBackend: HASH_BACKEND,
|
|
407
497
|
},
|
|
408
|
-
});
|
|
498
|
+
}, this.lastTokenBurstLimit);
|
|
499
|
+
this.applyIngestFeedback(feedback);
|
|
500
|
+
}
|
|
501
|
+
applyIngestFeedback(feedback) {
|
|
502
|
+
if (feedback && typeof feedback.acceptMore === "boolean") {
|
|
503
|
+
this.lastAcceptMore = feedback.acceptMore;
|
|
504
|
+
this.lastQueueDepth = feedback.queueDepth ?? 0;
|
|
505
|
+
this.lastQueueCapacity = feedback.queueCapacity ?? 0;
|
|
506
|
+
this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
|
|
507
|
+
this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
|
|
508
|
+
this.lastNodesRejected = feedback.nodesRejected ?? 0;
|
|
509
|
+
this.lastTokensIngested = feedback.tokensIngested ?? 0;
|
|
510
|
+
if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
|
|
511
|
+
this.lastTokenBurstLimit = feedback.tokenBurstLimit;
|
|
512
|
+
}
|
|
513
|
+
this.lastWalDepth = feedback.walDepth ?? 0;
|
|
514
|
+
this.lastWalCapacity = feedback.walCapacity ?? 0;
|
|
515
|
+
if (feedback.acceptMore) {
|
|
516
|
+
this.lastRetryAfterMs = 0;
|
|
517
|
+
}
|
|
518
|
+
else {
|
|
519
|
+
this.lastRetryAfterMs = feedback.retryAfterMs || 1000;
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
else {
|
|
523
|
+
this.lastAcceptMore = true;
|
|
524
|
+
this.lastRetryAfterMs = 0;
|
|
525
|
+
this.lastQueueDepth = 0;
|
|
526
|
+
this.lastQueueCapacity = 0;
|
|
527
|
+
this.lastProcessingTimeUs = 0;
|
|
528
|
+
this.lastNodesAccepted = 0;
|
|
529
|
+
this.lastNodesRejected = 0;
|
|
530
|
+
this.lastTokensIngested = 0;
|
|
531
|
+
}
|
|
409
532
|
}
|
|
410
533
|
async deleteSourceDocument(sourceDoc) {
|
|
411
534
|
const queue = await this.getIngestQueue();
|
|
@@ -414,7 +537,9 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
414
537
|
async getIngestQueue() {
|
|
415
538
|
if (!this.ingestQueue) {
|
|
416
539
|
const rpc = await this.getRpc();
|
|
417
|
-
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger
|
|
540
|
+
this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
|
|
541
|
+
onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback),
|
|
542
|
+
});
|
|
418
543
|
}
|
|
419
544
|
return this.ingestQueue;
|
|
420
545
|
}
|
|
@@ -426,14 +551,51 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
426
551
|
return null;
|
|
427
552
|
}
|
|
428
553
|
}
|
|
429
|
-
async
|
|
554
|
+
async safeStatWithCtime(filePath) {
|
|
430
555
|
try {
|
|
431
|
-
return await this.fsApi.
|
|
556
|
+
return await this.fsApi.stat(filePath);
|
|
432
557
|
}
|
|
433
558
|
catch {
|
|
434
559
|
return null;
|
|
435
560
|
}
|
|
436
561
|
}
|
|
562
|
+
async safeReadFileStreamed(filePath, maxBytes) {
|
|
563
|
+
let stream = null;
|
|
564
|
+
try {
|
|
565
|
+
stream = await this.fsApi.openReadStream(filePath);
|
|
566
|
+
const decoder = new TextDecoder();
|
|
567
|
+
const chunks = [];
|
|
568
|
+
let hash = 0xcbf29ce484222325n;
|
|
569
|
+
let total = 0;
|
|
570
|
+
const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
|
|
571
|
+
while (true) {
|
|
572
|
+
const { bytesRead } = await stream.read(buffer);
|
|
573
|
+
if (bytesRead === 0) {
|
|
574
|
+
break;
|
|
575
|
+
}
|
|
576
|
+
total += bytesRead;
|
|
577
|
+
if (total > maxBytes) {
|
|
578
|
+
return "too_large";
|
|
579
|
+
}
|
|
580
|
+
const chunk = buffer.subarray(0, bytesRead);
|
|
581
|
+
hash = updateFnv1a64(hash, chunk);
|
|
582
|
+
chunks.push(decoder.decode(chunk, { stream: true }));
|
|
583
|
+
}
|
|
584
|
+
chunks.push(decoder.decode());
|
|
585
|
+
return {
|
|
586
|
+
text: chunks.join(""),
|
|
587
|
+
fileHash: hash.toString(16).padStart(16, "0"),
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
catch {
|
|
591
|
+
return null;
|
|
592
|
+
}
|
|
593
|
+
finally {
|
|
594
|
+
if (stream) {
|
|
595
|
+
await stream.close().catch(() => { });
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
}
|
|
437
599
|
snapshotFilesForRoot(root) {
|
|
438
600
|
const files = new Set();
|
|
439
601
|
for (const state of this.fileStates.values()) {
|
|
@@ -497,7 +659,7 @@ class DirectoryMarkdownSourceAdapter {
|
|
|
497
659
|
}
|
|
498
660
|
}
|
|
499
661
|
logScanStats(root, stats, durationMs) {
|
|
500
|
-
this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`);
|
|
662
|
+
this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`);
|
|
501
663
|
}
|
|
502
664
|
}
|
|
503
665
|
function createScanStats() {
|
|
@@ -511,8 +673,26 @@ function createScanStats() {
|
|
|
511
673
|
filesIngested: 0,
|
|
512
674
|
filesDeleted: 0,
|
|
513
675
|
syncErrors: 0,
|
|
676
|
+
filesDeferred: 0,
|
|
514
677
|
};
|
|
515
678
|
}
|
|
679
|
+
function estimateTokens(size) {
|
|
680
|
+
return Math.max(1, Math.floor(size / 4));
|
|
681
|
+
}
|
|
682
|
+
function sortCandidates(candidates, mode) {
|
|
683
|
+
return [...candidates].sort((left, right) => {
|
|
684
|
+
if (mode === "size") {
|
|
685
|
+
return right.size - left.size || left.ordinal - right.ordinal;
|
|
686
|
+
}
|
|
687
|
+
if (mode === "ctime") {
|
|
688
|
+
return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
|
|
689
|
+
}
|
|
690
|
+
if (mode === "fifo") {
|
|
691
|
+
return left.ordinal - right.ordinal;
|
|
692
|
+
}
|
|
693
|
+
return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
|
|
694
|
+
});
|
|
695
|
+
}
|
|
516
696
|
function recordSyncResult(stats, result) {
|
|
517
697
|
if (result === "ingested") {
|
|
518
698
|
stats.filesIngested++;
|
|
@@ -530,7 +710,6 @@ function recordSyncResult(stats, result) {
|
|
|
530
710
|
function toPosixPath(value) {
|
|
531
711
|
return value.split(path.sep).join("/");
|
|
532
712
|
}
|
|
533
|
-
const textDecoder = new TextDecoder();
|
|
534
713
|
function normalizeMarkdownRoots(roots) {
|
|
535
714
|
if (!roots?.length) {
|
|
536
715
|
return [];
|
|
@@ -545,6 +724,15 @@ function normalizeMarkdownRoots(roots) {
|
|
|
545
724
|
}
|
|
546
725
|
return [...resolved];
|
|
547
726
|
}
|
|
727
|
+
function updateFnv1a64(seed, bytes) {
|
|
728
|
+
let hash = seed;
|
|
729
|
+
const prime = 0x100000001b3n;
|
|
730
|
+
for (let i = 0; i < bytes.length; i++) {
|
|
731
|
+
hash ^= BigInt(bytes[i] ?? 0);
|
|
732
|
+
hash = BigInt.asUintN(64, hash * prime);
|
|
733
|
+
}
|
|
734
|
+
return hash;
|
|
735
|
+
}
|
|
548
736
|
function resolveMarkdownSnapshotPath(kind, configuredPath) {
|
|
549
737
|
const trimmed = configuredPath?.trim();
|
|
550
738
|
if (trimmed) {
|
|
@@ -561,10 +749,22 @@ function createRealFsApi() {
|
|
|
561
749
|
readdir: async (dir) => fsp.readdir(dir, { withFileTypes: true }),
|
|
562
750
|
readFile: async (file) => fsp.readFile(file),
|
|
563
751
|
stat: async (file) => {
|
|
564
|
-
const
|
|
565
|
-
return { size:
|
|
752
|
+
const s = await fsp.stat(file);
|
|
753
|
+
return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
|
|
566
754
|
},
|
|
567
755
|
watch: (dir, onChange) => fs.watch(dir, onChange),
|
|
756
|
+
openReadStream: async (file) => {
|
|
757
|
+
const handle = await fsp.open(file, "r");
|
|
758
|
+
return {
|
|
759
|
+
read: async (buffer) => {
|
|
760
|
+
const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
|
|
761
|
+
return { bytesRead };
|
|
762
|
+
},
|
|
763
|
+
close: async () => {
|
|
764
|
+
await handle.close();
|
|
765
|
+
},
|
|
766
|
+
};
|
|
767
|
+
},
|
|
568
768
|
};
|
|
569
769
|
}
|
|
570
770
|
function isMarkdownFile(fileName) {
|
package/dist/types.d.ts
CHANGED
|
@@ -48,6 +48,8 @@ export interface PluginConfig {
|
|
|
48
48
|
markdownIngestionInclude?: string[];
|
|
49
49
|
markdownIngestionExclude?: string[];
|
|
50
50
|
markdownIngestionDebounceMs?: number;
|
|
51
|
+
markdownIngestionPriorityMode?: "mtime" | "ctime" | "size" | "fifo";
|
|
52
|
+
markdownIngestionMaxTokensPerFile?: number;
|
|
51
53
|
markdownIngestionSnapshotPath?: string;
|
|
52
54
|
markdownIngestionObsidianSnapshotPath?: string;
|
|
53
55
|
dreamPromotionEnabled?: boolean;
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "libravdb-memory",
|
|
3
3
|
"name": "LibraVDB Memory",
|
|
4
4
|
"description": "Persistent vector memory with three-tier hybrid scoring",
|
|
5
|
-
"version": "1.5.
|
|
5
|
+
"version": "1.5.5",
|
|
6
6
|
"kind": [
|
|
7
7
|
"memory",
|
|
8
8
|
"context-engine"
|
|
@@ -263,6 +263,20 @@
|
|
|
263
263
|
"type": "number",
|
|
264
264
|
"default": 150
|
|
265
265
|
},
|
|
266
|
+
"markdownIngestionPriorityMode": {
|
|
267
|
+
"type": "string",
|
|
268
|
+
"enum": [
|
|
269
|
+
"mtime",
|
|
270
|
+
"ctime",
|
|
271
|
+
"size",
|
|
272
|
+
"fifo"
|
|
273
|
+
],
|
|
274
|
+
"default": "mtime"
|
|
275
|
+
},
|
|
276
|
+
"markdownIngestionMaxTokensPerFile": {
|
|
277
|
+
"type": "number",
|
|
278
|
+
"default": 128000
|
|
279
|
+
},
|
|
266
280
|
"markdownIngestionSnapshotPath": {
|
|
267
281
|
"type": "string"
|
|
268
282
|
},
|