@xdarkicex/openclaw-memory-libravdb 1.5.4 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -560,9 +560,9 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
560
560
  };
561
561
  }
562
562
  function isGrpcAuthConfigured() {
563
- return (typeof process.env.LIBRAVDB_AUTH_SECRET === "string" &&
564
- process.env.LIBRAVDB_AUTH_SECRET.trim().length > 0) || (typeof process.env.LIBRAVDB_AUTH_SECRET_FILE === "string" &&
565
- process.env.LIBRAVDB_AUTH_SECRET_FILE.trim().length > 0);
563
+ const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
564
+ const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
565
+ return (typeof secret === "string" && secret.length > 0) || (typeof secretFile === "string" && secretFile.length > 0);
566
566
  }
567
567
  function buildGrpcAuthInitializationError(error) {
568
568
  const code = typeof error?.code === "number" ||
package/dist/index.js CHANGED
@@ -839,14 +839,14 @@ var require_binary_encoding = __commonJS({
839
839
  };
840
840
  exports2.BinaryWriter = BinaryWriter;
841
841
  var BinaryReader = class {
842
- constructor(buf, textDecoder3) {
842
+ constructor(buf, textDecoder2) {
843
843
  this.varint64 = varint_js_1.varint64read;
844
844
  this.uint32 = varint_js_1.varint32read;
845
845
  this.buf = buf;
846
846
  this.len = buf.length;
847
847
  this.pos = 0;
848
848
  this.view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
849
- this.textDecoder = textDecoder3 !== null && textDecoder3 !== void 0 ? textDecoder3 : new TextDecoder();
849
+ this.textDecoder = textDecoder2 !== null && textDecoder2 !== void 0 ? textDecoder2 : new TextDecoder();
850
850
  }
851
851
  /**
852
852
  * Reads a tag - field number and wire type.
@@ -34241,7 +34241,9 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
34241
34241
  };
34242
34242
  }
34243
34243
  function isGrpcAuthConfigured() {
34244
- return typeof process.env.LIBRAVDB_AUTH_SECRET === "string" && process.env.LIBRAVDB_AUTH_SECRET.trim().length > 0 || typeof process.env.LIBRAVDB_AUTH_SECRET_FILE === "string" && process.env.LIBRAVDB_AUTH_SECRET_FILE.trim().length > 0;
34244
+ const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
34245
+ const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
34246
+ return typeof secret === "string" && secret.length > 0 || typeof secretFile === "string" && secretFile.length > 0;
34245
34247
  }
34246
34248
  function buildGrpcAuthInitializationError(error) {
34247
34249
  const code = typeof error?.code === "number" || typeof error?.code === "string" ? ` code=${String(error.code)}` : "";
@@ -38535,37 +38537,55 @@ var IngestQueue = class {
38535
38537
  this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
38536
38538
  }
38537
38539
  }
38538
- async enqueueIngest(sourceDoc, text, baseParams) {
38540
+ async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
38539
38541
  if (this.options.chunkTokens === Infinity) {
38540
- return this.ingestWithRetry({
38542
+ const resp = await this.ingestWithRetry({
38541
38543
  ...baseParams,
38542
38544
  sourceDoc,
38543
38545
  text,
38544
38546
  mode: IngestMode.REPLACE
38545
38547
  });
38546
- }
38547
- const chunks = splitIntoChunks(text, this.options.chunkTokens);
38548
- if (chunks.length === 1) {
38549
- return this.ingestWithRetry({
38550
- ...baseParams,
38551
- sourceDoc,
38552
- text: chunks[0].text,
38553
- mode: IngestMode.REPLACE
38554
- });
38555
- }
38556
- for (let i = 0; i < chunks.length; i++) {
38557
- const isFirst = i === 0;
38548
+ return resp.feedback;
38549
+ }
38550
+ let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
38551
+ let offset = 0;
38552
+ let isFirst = true;
38553
+ let lastFeedback;
38554
+ while (offset < text.length) {
38555
+ const remainingText = text.slice(offset);
38556
+ const chunks = splitIntoChunks(remainingText, currentLimit);
38557
+ const chunkText = chunks[0].text;
38558
38558
  const chunkParams = {
38559
38559
  ...baseParams,
38560
38560
  sourceDoc,
38561
- text: chunks[i].text,
38561
+ text: chunkText,
38562
38562
  mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND
38563
38563
  };
38564
- await this.ingestWithRetry(chunkParams);
38564
+ const resp = await this.ingestWithRetry(chunkParams);
38565
+ lastFeedback = resp.feedback;
38566
+ if (lastFeedback && lastFeedback.nodesAccepted === 0 && lastFeedback.tokenBurstLimit && lastFeedback.tokenBurstLimit > 0 && lastFeedback.tokenBurstLimit < currentLimit) {
38567
+ currentLimit = lastFeedback.tokenBurstLimit;
38568
+ continue;
38569
+ }
38570
+ if (lastFeedback && lastFeedback.nodesAccepted === 0) {
38571
+ this.logger.warn?.(
38572
+ `[ingest-queue] Chunk permanently rejected for ${sourceDoc} at offset=${offset} length=${chunkText.length} tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`
38573
+ );
38574
+ }
38575
+ if (this.options.onChunkFeedback && lastFeedback) {
38576
+ this.options.onChunkFeedback(lastFeedback);
38577
+ }
38578
+ offset += chunkText.length;
38579
+ isFirst = false;
38580
+ if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
38581
+ const delay = lastFeedback.retryAfterMs || 1e3;
38582
+ await new Promise((resolve) => setTimeout(resolve, delay));
38583
+ }
38565
38584
  }
38585
+ return lastFeedback;
38566
38586
  }
38567
38587
  async ingestWithRetry(params) {
38568
- await withRetry(
38588
+ return withRetry(
38569
38589
  () => this.rpcCall("ingest_markdown_document", params),
38570
38590
  this.options.maxRetries,
38571
38591
  this.options.retryBaseDelayMs,
@@ -38656,6 +38676,7 @@ var DEFAULT_DEBOUNCE_MS2 = 150;
38656
38676
  var DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
38657
38677
  var MARKDOWN_INGEST_VERSION = 3;
38658
38678
  var HASH_BACKEND = "wasm-fnv1a64";
38679
+ var STREAM_CHUNK_BYTES = 64 * 1024;
38659
38680
  function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi2()) {
38660
38681
  const adapters = [];
38661
38682
  const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
@@ -38668,7 +38689,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
38668
38689
  include: cfg.markdownIngestionInclude,
38669
38690
  exclude: cfg.markdownIngestionExclude,
38670
38691
  debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
38671
- snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath)
38692
+ snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
38693
+ priorityMode: cfg.markdownIngestionPriorityMode,
38694
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
38672
38695
  },
38673
38696
  getRpc,
38674
38697
  logger,
@@ -38686,7 +38709,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
38686
38709
  include: cfg.markdownIngestionObsidianInclude,
38687
38710
  exclude: cfg.markdownIngestionObsidianExclude,
38688
38711
  debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
38689
- snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath)
38712
+ snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
38713
+ priorityMode: cfg.markdownIngestionPriorityMode,
38714
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
38690
38715
  },
38691
38716
  getRpc,
38692
38717
  logger,
@@ -38743,6 +38768,8 @@ var DirectoryMarkdownSourceAdapter = class {
38743
38768
  getRpc;
38744
38769
  logger;
38745
38770
  snapshotPath;
38771
+ priorityMode;
38772
+ maxTokensPerFile;
38746
38773
  states = /* @__PURE__ */ new Map();
38747
38774
  fileStates = /* @__PURE__ */ new Map();
38748
38775
  activeScans = /* @__PURE__ */ new Set();
@@ -38751,6 +38778,17 @@ var DirectoryMarkdownSourceAdapter = class {
38751
38778
  started = false;
38752
38779
  ingestQueue = null;
38753
38780
  stopping = false;
38781
+ lastAcceptMore = true;
38782
+ lastRetryAfterMs = 0;
38783
+ lastQueueDepth = 0;
38784
+ lastQueueCapacity = 0;
38785
+ lastProcessingTimeUs = 0;
38786
+ lastNodesAccepted = 0;
38787
+ lastNodesRejected = 0;
38788
+ lastTokensIngested = 0;
38789
+ lastTokenBurstLimit = 512;
38790
+ lastWalDepth = 0;
38791
+ lastWalCapacity = 0;
38754
38792
  snapshotLoaded = false;
38755
38793
  snapshotDirty = false;
38756
38794
  constructor(kind, config, getRpc, logger, fsApi) {
@@ -38763,6 +38801,8 @@ var DirectoryMarkdownSourceAdapter = class {
38763
38801
  this.getRpc = getRpc;
38764
38802
  this.logger = logger;
38765
38803
  this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
38804
+ this.priorityMode = config.priorityMode ?? "mtime";
38805
+ this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128e3));
38766
38806
  this.tokenizerId = DEFAULT_TOKENIZER_ID;
38767
38807
  this.coreDoc = true;
38768
38808
  }
@@ -38815,7 +38855,8 @@ var DirectoryMarkdownSourceAdapter = class {
38815
38855
  scanState: {
38816
38856
  scanning: false,
38817
38857
  dirty: false,
38818
- timer: null
38858
+ timer: null,
38859
+ resumeFromPath: null
38819
38860
  },
38820
38861
  knownFiles: this.snapshotFilesForRoot(resolved),
38821
38862
  directoryWatchers: /* @__PURE__ */ new Map()
@@ -38833,12 +38874,16 @@ var DirectoryMarkdownSourceAdapter = class {
38833
38874
  return;
38834
38875
  }
38835
38876
  rootState.scanState.scanning = true;
38877
+ this.lastAcceptMore = true;
38878
+ this.lastRetryAfterMs = 0;
38836
38879
  const scan = (async () => {
38837
38880
  const stats = createScanStats();
38838
38881
  const startedAt = Date.now();
38839
38882
  try {
38840
38883
  const currentFiles = /* @__PURE__ */ new Set();
38841
- await this.walkDirectory(rootState, rootState.root, currentFiles, stats);
38884
+ const candidates = [];
38885
+ await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
38886
+ await this.syncCandidates(rootState, candidates, stats);
38842
38887
  if (!this.stopping) {
38843
38888
  await this.pruneDeletedFiles(rootState, currentFiles, stats);
38844
38889
  rootState.knownFiles = currentFiles;
@@ -38862,7 +38907,7 @@ var DirectoryMarkdownSourceAdapter = class {
38862
38907
  this.activeScans.delete(scan);
38863
38908
  }
38864
38909
  }
38865
- scheduleRootScan(rootState) {
38910
+ scheduleRootScan(rootState, delayMs) {
38866
38911
  if (!this.started || this.stopping) {
38867
38912
  return;
38868
38913
  }
@@ -38878,9 +38923,9 @@ var DirectoryMarkdownSourceAdapter = class {
38878
38923
  void this.scanRoot(rootState.root).catch((error) => {
38879
38924
  this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
38880
38925
  });
38881
- }, this.debounceMs);
38926
+ }, Math.max(this.debounceMs, delayMs ?? 0));
38882
38927
  }
38883
- async walkDirectory(rootState, dir, currentFiles, stats) {
38928
+ async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
38884
38929
  if (this.shouldPruneDirectory(rootState.root, dir)) {
38885
38930
  stats.directoriesPruned++;
38886
38931
  return;
@@ -38903,7 +38948,7 @@ var DirectoryMarkdownSourceAdapter = class {
38903
38948
  }
38904
38949
  const child = path2.join(dir, entry.name);
38905
38950
  if (entry.isDirectory()) {
38906
- await this.walkDirectory(rootState, child, currentFiles, stats);
38951
+ await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
38907
38952
  continue;
38908
38953
  }
38909
38954
  if (!entry.isFile() || !isMarkdownFile(entry.name)) {
@@ -38916,16 +38961,71 @@ var DirectoryMarkdownSourceAdapter = class {
38916
38961
  }
38917
38962
  stats.filesIncluded++;
38918
38963
  currentFiles.add(child);
38964
+ const stat = await this.safeStatWithCtime(child);
38965
+ if (!stat) {
38966
+ continue;
38967
+ }
38968
+ candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
38969
+ }
38970
+ }
38971
+ async syncCandidates(rootState, candidates, stats) {
38972
+ const sorted = sortCandidates(candidates, this.priorityMode);
38973
+ let skipping = false;
38974
+ if (rootState.scanState.resumeFromPath) {
38975
+ const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
38976
+ if (targetExists) {
38977
+ skipping = true;
38978
+ this.lastAcceptMore = true;
38979
+ this.lastRetryAfterMs = 0;
38980
+ } else {
38981
+ rootState.scanState.resumeFromPath = null;
38982
+ }
38983
+ }
38984
+ for (const candidate of sorted) {
38985
+ if (skipping) {
38986
+ if (candidate.path === rootState.scanState.resumeFromPath) {
38987
+ skipping = false;
38988
+ } else {
38989
+ continue;
38990
+ }
38991
+ }
38992
+ if (this.stopping) {
38993
+ return;
38994
+ }
38995
+ if (!this.lastAcceptMore) {
38996
+ if (!this.stopping) {
38997
+ rootState.scanState.resumeFromPath = candidate.path;
38998
+ this.scheduleRootScan(rootState, this.lastRetryAfterMs);
38999
+ }
39000
+ return;
39001
+ }
39002
+ if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
39003
+ rootState.scanState.resumeFromPath = candidate.path;
39004
+ if (!this.stopping) {
39005
+ this.scheduleRootScan(rootState, 2e3);
39006
+ }
39007
+ return;
39008
+ }
39009
+ const estimatedTokens = estimateTokens(candidate.size);
39010
+ if (estimatedTokens > this.maxTokensPerFile) {
39011
+ stats.filesDeferred++;
39012
+ continue;
39013
+ }
38919
39014
  try {
38920
- const result = await this.syncMarkdownFile(rootState, child);
39015
+ const result = await this.syncMarkdownFile(rootState, candidate.path, {
39016
+ size: candidate.size,
39017
+ mtimeMs: candidate.mtimeMs,
39018
+ ctimeMs: candidate.ctimeMs
39019
+ });
38921
39020
  recordSyncResult(stats, result);
38922
39021
  } catch (error) {
38923
39022
  stats.syncErrors++;
38924
39023
  if (!this.stopping) {
38925
- this.logger.warn?.(`[markdown-ingest] sync failed for ${child}: ${formatError(error)}`);
39024
+ this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
38926
39025
  }
38927
39026
  }
38928
39027
  }
39028
+ rootState.scanState.resumeFromPath = null;
38929
39029
  }
38930
39030
  shouldPruneDirectory(root, dir) {
38931
39031
  const relative = toPosixPath(path2.relative(root, dir));
@@ -38946,6 +39046,11 @@ var DirectoryMarkdownSourceAdapter = class {
38946
39046
  try {
38947
39047
  const watcher = this.fsApi.watch(dir, () => {
38948
39048
  if (!this.stopping) {
39049
+ rootState.scanState.resumeFromPath = null;
39050
+ if (rootState.scanState.timer) {
39051
+ clearTimeout(rootState.scanState.timer);
39052
+ rootState.scanState.timer = null;
39053
+ }
38949
39054
  this.scheduleRootScan(rootState);
38950
39055
  }
38951
39056
  });
@@ -38996,10 +39101,10 @@ var DirectoryMarkdownSourceAdapter = class {
38996
39101
  stats.filesDeleted++;
38997
39102
  }
38998
39103
  }
38999
- async syncMarkdownFile(rootState, filePath) {
39104
+ async syncMarkdownFile(rootState, filePath, initialStat) {
39000
39105
  const sourceDoc = filePath;
39001
39106
  const relativePath = toPosixPath(path2.relative(rootState.root, filePath));
39002
- const stat = await this.safeStat(filePath);
39107
+ const stat = initialStat ?? await this.safeStatWithCtime(filePath);
39003
39108
  if (!stat) {
39004
39109
  await this.deleteSourceDocument(sourceDoc);
39005
39110
  this.fileStates.delete(sourceDoc);
@@ -39010,14 +39115,18 @@ var DirectoryMarkdownSourceAdapter = class {
39010
39115
  if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
39011
39116
  return "unchanged";
39012
39117
  }
39013
- const bytes = await this.safeReadFile(filePath);
39014
- if (!bytes) {
39118
+ const maxBytes = this.maxTokensPerFile * 4 + 3;
39119
+ const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
39120
+ if (streamed === "too_large") {
39121
+ return "skipped";
39122
+ }
39123
+ if (!streamed) {
39015
39124
  await this.deleteSourceDocument(sourceDoc);
39016
39125
  this.fileStates.delete(sourceDoc);
39017
39126
  this.snapshotDirty = true;
39018
39127
  return "deleted";
39019
39128
  }
39020
- const fileHash = hashBytes(bytes);
39129
+ const { text, fileHash } = streamed;
39021
39130
  if (cached && cached.fileHash === fileHash) {
39022
39131
  this.setFileState(sourceDoc, {
39023
39132
  root: rootState.root,
@@ -39029,14 +39138,13 @@ var DirectoryMarkdownSourceAdapter = class {
39029
39138
  });
39030
39139
  return "unchanged";
39031
39140
  }
39032
- const text = textDecoder2.decode(bytes);
39033
39141
  if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
39034
39142
  await this.deleteSourceDocument(sourceDoc);
39035
39143
  this.fileStates.delete(sourceDoc);
39036
39144
  this.snapshotDirty = true;
39037
39145
  return "skipped";
39038
39146
  }
39039
- await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
39147
+ await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
39040
39148
  this.setFileState(sourceDoc, {
39041
39149
  root: rootState.root,
39042
39150
  sourceDoc,
@@ -39051,9 +39159,9 @@ var DirectoryMarkdownSourceAdapter = class {
39051
39159
  this.fileStates.set(sourceDoc, state);
39052
39160
  this.snapshotDirty = true;
39053
39161
  }
39054
- async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
39162
+ async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
39055
39163
  const queue = await this.getIngestQueue();
39056
- await queue.enqueueIngest(
39164
+ const feedback = await queue.enqueueIngest(
39057
39165
  sourceDoc,
39058
39166
  text,
39059
39167
  {
@@ -39066,11 +39174,44 @@ var DirectoryMarkdownSourceAdapter = class {
39066
39174
  fileHash,
39067
39175
  sourceSize,
39068
39176
  sourceMtimeMs: Math.trunc(sourceMtimeMs),
39177
+ sourceCtimeMs: Math.trunc(sourceCtimeMs),
39069
39178
  ingestVersion: MARKDOWN_INGEST_VERSION,
39070
39179
  hashBackend: HASH_BACKEND
39071
39180
  }
39072
- }
39181
+ },
39182
+ this.lastTokenBurstLimit
39073
39183
  );
39184
+ this.applyIngestFeedback(feedback);
39185
+ }
39186
+ applyIngestFeedback(feedback) {
39187
+ if (feedback && typeof feedback.acceptMore === "boolean") {
39188
+ this.lastAcceptMore = feedback.acceptMore;
39189
+ this.lastQueueDepth = feedback.queueDepth ?? 0;
39190
+ this.lastQueueCapacity = feedback.queueCapacity ?? 0;
39191
+ this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
39192
+ this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
39193
+ this.lastNodesRejected = feedback.nodesRejected ?? 0;
39194
+ this.lastTokensIngested = feedback.tokensIngested ?? 0;
39195
+ if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
39196
+ this.lastTokenBurstLimit = feedback.tokenBurstLimit;
39197
+ }
39198
+ this.lastWalDepth = feedback.walDepth ?? 0;
39199
+ this.lastWalCapacity = feedback.walCapacity ?? 0;
39200
+ if (feedback.acceptMore) {
39201
+ this.lastRetryAfterMs = 0;
39202
+ } else {
39203
+ this.lastRetryAfterMs = feedback.retryAfterMs || 1e3;
39204
+ }
39205
+ } else {
39206
+ this.lastAcceptMore = true;
39207
+ this.lastRetryAfterMs = 0;
39208
+ this.lastQueueDepth = 0;
39209
+ this.lastQueueCapacity = 0;
39210
+ this.lastProcessingTimeUs = 0;
39211
+ this.lastNodesAccepted = 0;
39212
+ this.lastNodesRejected = 0;
39213
+ this.lastTokensIngested = 0;
39214
+ }
39074
39215
  }
39075
39216
  async deleteSourceDocument(sourceDoc) {
39076
39217
  const queue = await this.getIngestQueue();
@@ -39079,7 +39220,9 @@ var DirectoryMarkdownSourceAdapter = class {
39079
39220
  async getIngestQueue() {
39080
39221
  if (!this.ingestQueue) {
39081
39222
  const rpc = await this.getRpc();
39082
- this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger);
39223
+ this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
39224
+ onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback)
39225
+ });
39083
39226
  }
39084
39227
  return this.ingestQueue;
39085
39228
  }
@@ -39090,13 +39233,49 @@ var DirectoryMarkdownSourceAdapter = class {
39090
39233
  return null;
39091
39234
  }
39092
39235
  }
39093
- async safeReadFile(filePath) {
39236
+ async safeStatWithCtime(filePath) {
39094
39237
  try {
39095
- return await this.fsApi.readFile(filePath);
39238
+ return await this.fsApi.stat(filePath);
39096
39239
  } catch {
39097
39240
  return null;
39098
39241
  }
39099
39242
  }
39243
+ async safeReadFileStreamed(filePath, maxBytes) {
39244
+ let stream = null;
39245
+ try {
39246
+ stream = await this.fsApi.openReadStream(filePath);
39247
+ const decoder = new TextDecoder();
39248
+ const chunks = [];
39249
+ let hash = 0xcbf29ce484222325n;
39250
+ let total = 0;
39251
+ const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
39252
+ while (true) {
39253
+ const { bytesRead } = await stream.read(buffer);
39254
+ if (bytesRead === 0) {
39255
+ break;
39256
+ }
39257
+ total += bytesRead;
39258
+ if (total > maxBytes) {
39259
+ return "too_large";
39260
+ }
39261
+ const chunk = buffer.subarray(0, bytesRead);
39262
+ hash = updateFnv1a64(hash, chunk);
39263
+ chunks.push(decoder.decode(chunk, { stream: true }));
39264
+ }
39265
+ chunks.push(decoder.decode());
39266
+ return {
39267
+ text: chunks.join(""),
39268
+ fileHash: hash.toString(16).padStart(16, "0")
39269
+ };
39270
+ } catch {
39271
+ return null;
39272
+ } finally {
39273
+ if (stream) {
39274
+ await stream.close().catch(() => {
39275
+ });
39276
+ }
39277
+ }
39278
+ }
39100
39279
  snapshotFilesForRoot(root) {
39101
39280
  const files = /* @__PURE__ */ new Set();
39102
39281
  for (const state of this.fileStates.values()) {
@@ -39159,7 +39338,7 @@ var DirectoryMarkdownSourceAdapter = class {
39159
39338
  }
39160
39339
  logScanStats(root, stats, durationMs) {
39161
39340
  this.logger.info?.(
39162
- `[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`
39341
+ `[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`
39163
39342
  );
39164
39343
  }
39165
39344
  };
@@ -39173,9 +39352,27 @@ function createScanStats() {
39173
39352
  filesUnchanged: 0,
39174
39353
  filesIngested: 0,
39175
39354
  filesDeleted: 0,
39176
- syncErrors: 0
39355
+ syncErrors: 0,
39356
+ filesDeferred: 0
39177
39357
  };
39178
39358
  }
39359
+ function estimateTokens(size) {
39360
+ return Math.max(1, Math.floor(size / 4));
39361
+ }
39362
+ function sortCandidates(candidates, mode) {
39363
+ return [...candidates].sort((left, right) => {
39364
+ if (mode === "size") {
39365
+ return right.size - left.size || left.ordinal - right.ordinal;
39366
+ }
39367
+ if (mode === "ctime") {
39368
+ return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
39369
+ }
39370
+ if (mode === "fifo") {
39371
+ return left.ordinal - right.ordinal;
39372
+ }
39373
+ return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
39374
+ });
39375
+ }
39179
39376
  function recordSyncResult(stats, result) {
39180
39377
  if (result === "ingested") {
39181
39378
  stats.filesIngested++;
@@ -39190,7 +39387,6 @@ function recordSyncResult(stats, result) {
39190
39387
  function toPosixPath(value) {
39191
39388
  return value.split(path2.sep).join("/");
39192
39389
  }
39193
- var textDecoder2 = new TextDecoder();
39194
39390
  function normalizeMarkdownRoots(roots) {
39195
39391
  if (!roots?.length) {
39196
39392
  return [];
@@ -39205,6 +39401,15 @@ function normalizeMarkdownRoots(roots) {
39205
39401
  }
39206
39402
  return [...resolved];
39207
39403
  }
39404
+ function updateFnv1a64(seed, bytes) {
39405
+ let hash = seed;
39406
+ const prime = 0x100000001b3n;
39407
+ for (let i = 0; i < bytes.length; i++) {
39408
+ hash ^= BigInt(bytes[i] ?? 0);
39409
+ hash = BigInt.asUintN(64, hash * prime);
39410
+ }
39411
+ return hash;
39412
+ }
39208
39413
  function resolveMarkdownSnapshotPath(kind, configuredPath) {
39209
39414
  const trimmed = configuredPath?.trim();
39210
39415
  if (trimmed) {
@@ -39221,10 +39426,22 @@ function createRealFsApi2() {
39221
39426
  readdir: async (dir) => fsp2.readdir(dir, { withFileTypes: true }),
39222
39427
  readFile: async (file) => fsp2.readFile(file),
39223
39428
  stat: async (file) => {
39224
- const stat = await fsp2.stat(file);
39225
- return { size: stat.size, mtimeMs: stat.mtimeMs };
39429
+ const s = await fsp2.stat(file);
39430
+ return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
39226
39431
  },
39227
- watch: (dir, onChange) => fs2.watch(dir, onChange)
39432
+ watch: (dir, onChange) => fs2.watch(dir, onChange),
39433
+ openReadStream: async (file) => {
39434
+ const handle = await fsp2.open(file, "r");
39435
+ return {
39436
+ read: async (buffer) => {
39437
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
39438
+ return { bytesRead };
39439
+ },
39440
+ close: async () => {
39441
+ await handle.close();
39442
+ }
39443
+ };
39444
+ }
39228
39445
  };
39229
39446
  }
39230
39447
  function isMarkdownFile(fileName) {
@@ -7,6 +7,8 @@ export interface IngestQueueOptions {
7
7
  retryBaseDelayMs: number;
8
8
  /** Max retries per chunk. */
9
9
  maxRetries: number;
10
+ /** Called after each chunk is accepted so scan-level state stays current. */
11
+ onChunkFeedback?: (feedback: IngestFeedback) => void;
10
12
  }
11
13
  interface IngestMarkdownDocumentParams {
12
14
  sourceDoc: string;
@@ -20,11 +22,25 @@ interface IngestMarkdownDocumentParams {
20
22
  fileHash: string;
21
23
  sourceSize: number;
22
24
  sourceMtimeMs: number;
25
+ sourceCtimeMs: number;
23
26
  ingestVersion: number;
24
27
  hashBackend: string;
25
28
  };
26
29
  mode?: IngestMode;
27
30
  }
31
+ interface IngestFeedback {
32
+ queueDepth: number;
33
+ queueCapacity: number;
34
+ acceptMore: boolean;
35
+ retryAfterMs: number;
36
+ processingTimeUs: number;
37
+ nodesAccepted: number;
38
+ nodesRejected: number;
39
+ tokensIngested: number;
40
+ tokenBurstLimit: number;
41
+ walDepth?: number;
42
+ walCapacity?: number;
43
+ }
28
44
  export declare class IngestQueue {
29
45
  private readonly queue;
30
46
  private readonly rpcCall;
@@ -32,7 +48,7 @@ export declare class IngestQueue {
32
48
  private readonly options;
33
49
  private running;
34
50
  constructor(rpcCall: <T>(method: string, params: unknown) => Promise<T>, logger: LoggerLike, options?: Partial<IngestQueueOptions>);
35
- enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">): Promise<void>;
51
+ enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">, maxChunkTokens?: number): Promise<IngestFeedback | undefined>;
36
52
  private ingestWithRetry;
37
53
  enqueueDelete(sourceDoc: string): Promise<void>;
38
54
  }
@@ -19,40 +19,59 @@ export class IngestQueue {
19
19
  this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
20
20
  }
21
21
  }
22
- async enqueueIngest(sourceDoc, text, baseParams) {
22
+ async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
23
23
  if (this.options.chunkTokens === Infinity) {
24
- // Retry-only mode: send full text as single chunk
25
- return this.ingestWithRetry({
24
+ const resp = await this.ingestWithRetry({
26
25
  ...baseParams,
27
26
  sourceDoc,
28
27
  text,
29
28
  mode: IngestMode.REPLACE,
30
29
  });
30
+ return resp.feedback;
31
31
  }
32
- const chunks = splitIntoChunks(text, this.options.chunkTokens);
33
- if (chunks.length === 1) {
34
- return this.ingestWithRetry({
35
- ...baseParams,
36
- sourceDoc,
37
- text: chunks[0].text,
38
- mode: IngestMode.REPLACE,
39
- });
40
- }
41
- // Multiple chunks: clear the source once, then append the remaining chunks.
42
- // Sending REPLACE last deletes the earlier chunks from the same source_doc.
43
- for (let i = 0; i < chunks.length; i++) {
44
- const isFirst = i === 0;
32
+ let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
33
+ let offset = 0;
34
+ let isFirst = true;
35
+ let lastFeedback;
36
+ while (offset < text.length) {
37
+ const remainingText = text.slice(offset);
38
+ const chunks = splitIntoChunks(remainingText, currentLimit);
39
+ const chunkText = chunks[0].text;
45
40
  const chunkParams = {
46
41
  ...baseParams,
47
42
  sourceDoc,
48
- text: chunks[i].text,
43
+ text: chunkText,
49
44
  mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND,
50
45
  };
51
- await this.ingestWithRetry(chunkParams);
46
+ const resp = await this.ingestWithRetry(chunkParams);
47
+ lastFeedback = resp.feedback;
48
+ if (lastFeedback &&
49
+ lastFeedback.nodesAccepted === 0 &&
50
+ lastFeedback.tokenBurstLimit &&
51
+ lastFeedback.tokenBurstLimit > 0 &&
52
+ lastFeedback.tokenBurstLimit < currentLimit) {
53
+ currentLimit = lastFeedback.tokenBurstLimit;
54
+ continue;
55
+ }
56
+ if (lastFeedback && lastFeedback.nodesAccepted === 0) {
57
+ this.logger.warn?.(`[ingest-queue] Chunk permanently rejected for ${sourceDoc} ` +
58
+ `at offset=${offset} length=${chunkText.length} ` +
59
+ `tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`);
60
+ }
61
+ if (this.options.onChunkFeedback && lastFeedback) {
62
+ this.options.onChunkFeedback(lastFeedback);
63
+ }
64
+ offset += chunkText.length;
65
+ isFirst = false;
66
+ if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
67
+ const delay = lastFeedback.retryAfterMs || 1000;
68
+ await new Promise((resolve) => setTimeout(resolve, delay));
69
+ }
52
70
  }
71
+ return lastFeedback;
53
72
  }
54
73
  async ingestWithRetry(params) {
55
- await withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
74
+ return withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
56
75
  }
57
76
  async enqueueDelete(sourceDoc) {
58
77
  await withRetry(() => this.rpcCall("delete_authored_document", { sourceDoc }), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `delete_authored_document(${sourceDoc})`);
@@ -14,14 +14,22 @@ interface FsDirentLike {
14
14
  interface FsWatcherLike extends Disposable {
15
15
  on(event: "error", handler: (error: Error) => void): void;
16
16
  }
17
+ interface FsReadStream {
18
+ read(buffer: Uint8Array): Promise<{
19
+ bytesRead: number;
20
+ }>;
21
+ close(): Promise<void>;
22
+ }
17
23
  interface FsApi {
18
24
  readdir(dir: string): Promise<FsDirentLike[]>;
19
25
  readFile(file: string): Promise<Uint8Array>;
20
26
  stat(file: string): Promise<{
21
27
  size: number;
22
28
  mtimeMs: number;
29
+ ctimeMs: number;
23
30
  }>;
24
31
  watch(dir: string, onChange: (event: string, filename: string | Buffer | null) => void): FsWatcherLike;
32
+ openReadStream(file: string): Promise<FsReadStream>;
25
33
  }
26
34
  export interface MarkdownSourceAdapter {
27
35
  kind: string;
@@ -2,13 +2,13 @@ import fs from "node:fs";
2
2
  import fsp from "node:fs/promises";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
5
- import { hashBytes } from "./markdown-hash.js";
6
5
  import { formatError } from "./format-error.js";
7
6
  import { IngestQueue } from "./ingest-queue.js";
8
7
  const DEFAULT_DEBOUNCE_MS = 150;
9
8
  const DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
10
9
  const MARKDOWN_INGEST_VERSION = 3;
11
10
  const HASH_BACKEND = "wasm-fnv1a64";
11
+ const STREAM_CHUNK_BYTES = 64 * 1024;
12
12
  export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi()) {
13
13
  const adapters = [];
14
14
  const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
@@ -19,6 +19,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
19
19
  exclude: cfg.markdownIngestionExclude,
20
20
  debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
21
21
  snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
22
+ priorityMode: cfg.markdownIngestionPriorityMode,
23
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
22
24
  }, getRpc, logger, fsApi));
23
25
  }
24
26
  const obsidianRoots = normalizeMarkdownRoots(cfg.markdownIngestionObsidianRoots);
@@ -29,6 +31,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
29
31
  exclude: cfg.markdownIngestionObsidianExclude,
30
32
  debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
31
33
  snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
34
+ priorityMode: cfg.markdownIngestionPriorityMode,
35
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
32
36
  }, getRpc, logger, fsApi));
33
37
  }
34
38
  if (adapters.length === 0) {
@@ -77,6 +81,8 @@ class DirectoryMarkdownSourceAdapter {
77
81
  getRpc;
78
82
  logger;
79
83
  snapshotPath;
84
+ priorityMode;
85
+ maxTokensPerFile;
80
86
  states = new Map();
81
87
  fileStates = new Map();
82
88
  activeScans = new Set();
@@ -85,6 +91,17 @@ class DirectoryMarkdownSourceAdapter {
85
91
  started = false;
86
92
  ingestQueue = null;
87
93
  stopping = false;
94
+ lastAcceptMore = true;
95
+ lastRetryAfterMs = 0;
96
+ lastQueueDepth = 0;
97
+ lastQueueCapacity = 0;
98
+ lastProcessingTimeUs = 0;
99
+ lastNodesAccepted = 0;
100
+ lastNodesRejected = 0;
101
+ lastTokensIngested = 0;
102
+ lastTokenBurstLimit = 512;
103
+ lastWalDepth = 0;
104
+ lastWalCapacity = 0;
88
105
  snapshotLoaded = false;
89
106
  snapshotDirty = false;
90
107
  constructor(kind, config, getRpc, logger, fsApi) {
@@ -97,6 +114,8 @@ class DirectoryMarkdownSourceAdapter {
97
114
  this.getRpc = getRpc;
98
115
  this.logger = logger;
99
116
  this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
117
+ this.priorityMode = config.priorityMode ?? "mtime";
118
+ this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128_000));
100
119
  this.tokenizerId = DEFAULT_TOKENIZER_ID;
101
120
  this.coreDoc = true;
102
121
  }
@@ -150,6 +169,7 @@ class DirectoryMarkdownSourceAdapter {
150
169
  scanning: false,
151
170
  dirty: false,
152
171
  timer: null,
172
+ resumeFromPath: null,
153
173
  },
154
174
  knownFiles: this.snapshotFilesForRoot(resolved),
155
175
  directoryWatchers: new Map(),
@@ -167,12 +187,16 @@ class DirectoryMarkdownSourceAdapter {
167
187
  return;
168
188
  }
169
189
  rootState.scanState.scanning = true;
190
+ this.lastAcceptMore = true;
191
+ this.lastRetryAfterMs = 0;
170
192
  const scan = (async () => {
171
193
  const stats = createScanStats();
172
194
  const startedAt = Date.now();
173
195
  try {
174
196
  const currentFiles = new Set();
175
- await this.walkDirectory(rootState, rootState.root, currentFiles, stats);
197
+ const candidates = [];
198
+ await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
199
+ await this.syncCandidates(rootState, candidates, stats);
176
200
  if (!this.stopping) {
177
201
  await this.pruneDeletedFiles(rootState, currentFiles, stats);
178
202
  rootState.knownFiles = currentFiles;
@@ -198,7 +222,7 @@ class DirectoryMarkdownSourceAdapter {
198
222
  this.activeScans.delete(scan);
199
223
  }
200
224
  }
201
- scheduleRootScan(rootState) {
225
+ scheduleRootScan(rootState, delayMs) {
202
226
  if (!this.started || this.stopping) {
203
227
  return;
204
228
  }
@@ -214,9 +238,9 @@ class DirectoryMarkdownSourceAdapter {
214
238
  void this.scanRoot(rootState.root).catch((error) => {
215
239
  this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
216
240
  });
217
- }, this.debounceMs);
241
+ }, Math.max(this.debounceMs, delayMs ?? 0));
218
242
  }
219
- async walkDirectory(rootState, dir, currentFiles, stats) {
243
+ async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
220
244
  if (this.shouldPruneDirectory(rootState.root, dir)) {
221
245
  stats.directoriesPruned++;
222
246
  return;
@@ -240,7 +264,7 @@ class DirectoryMarkdownSourceAdapter {
240
264
  }
241
265
  const child = path.join(dir, entry.name);
242
266
  if (entry.isDirectory()) {
243
- await this.walkDirectory(rootState, child, currentFiles, stats);
267
+ await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
244
268
  continue;
245
269
  }
246
270
  if (!entry.isFile() || !isMarkdownFile(entry.name)) {
@@ -253,17 +277,74 @@ class DirectoryMarkdownSourceAdapter {
253
277
  }
254
278
  stats.filesIncluded++;
255
279
  currentFiles.add(child);
280
+ const stat = await this.safeStatWithCtime(child);
281
+ if (!stat) {
282
+ continue;
283
+ }
284
+ candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
285
+ }
286
+ }
287
+ async syncCandidates(rootState, candidates, stats) {
288
+ const sorted = sortCandidates(candidates, this.priorityMode);
289
+ let skipping = false;
290
+ if (rootState.scanState.resumeFromPath) {
291
+ const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
292
+ if (targetExists) {
293
+ skipping = true;
294
+ this.lastAcceptMore = true;
295
+ this.lastRetryAfterMs = 0;
296
+ }
297
+ else {
298
+ rootState.scanState.resumeFromPath = null;
299
+ }
300
+ }
301
+ for (const candidate of sorted) {
302
+ if (skipping) {
303
+ if (candidate.path === rootState.scanState.resumeFromPath) {
304
+ skipping = false;
305
+ }
306
+ else {
307
+ continue;
308
+ }
309
+ }
310
+ if (this.stopping) {
311
+ return;
312
+ }
313
+ if (!this.lastAcceptMore) {
314
+ if (!this.stopping) {
315
+ rootState.scanState.resumeFromPath = candidate.path;
316
+ this.scheduleRootScan(rootState, this.lastRetryAfterMs);
317
+ }
318
+ return;
319
+ }
320
+ if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
321
+ rootState.scanState.resumeFromPath = candidate.path;
322
+ if (!this.stopping) {
323
+ this.scheduleRootScan(rootState, 2000);
324
+ }
325
+ return;
326
+ }
327
+ const estimatedTokens = estimateTokens(candidate.size);
328
+ if (estimatedTokens > this.maxTokensPerFile) {
329
+ stats.filesDeferred++;
330
+ continue;
331
+ }
256
332
  try {
257
- const result = await this.syncMarkdownFile(rootState, child);
333
+ const result = await this.syncMarkdownFile(rootState, candidate.path, {
334
+ size: candidate.size,
335
+ mtimeMs: candidate.mtimeMs,
336
+ ctimeMs: candidate.ctimeMs,
337
+ });
258
338
  recordSyncResult(stats, result);
259
339
  }
260
340
  catch (error) {
261
341
  stats.syncErrors++;
262
342
  if (!this.stopping) {
263
- this.logger.warn?.(`[markdown-ingest] sync failed for ${child}: ${formatError(error)}`);
343
+ this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
264
344
  }
265
345
  }
266
346
  }
347
+ rootState.scanState.resumeFromPath = null;
267
348
  }
268
349
  shouldPruneDirectory(root, dir) {
269
350
  const relative = toPosixPath(path.relative(root, dir));
@@ -284,6 +365,11 @@ class DirectoryMarkdownSourceAdapter {
284
365
  try {
285
366
  const watcher = this.fsApi.watch(dir, () => {
286
367
  if (!this.stopping) {
368
+ rootState.scanState.resumeFromPath = null;
369
+ if (rootState.scanState.timer) {
370
+ clearTimeout(rootState.scanState.timer);
371
+ rootState.scanState.timer = null;
372
+ }
287
373
  this.scheduleRootScan(rootState);
288
374
  }
289
375
  });
@@ -335,10 +421,10 @@ class DirectoryMarkdownSourceAdapter {
335
421
  stats.filesDeleted++;
336
422
  }
337
423
  }
338
- async syncMarkdownFile(rootState, filePath) {
424
+ async syncMarkdownFile(rootState, filePath, initialStat) {
339
425
  const sourceDoc = filePath;
340
426
  const relativePath = toPosixPath(path.relative(rootState.root, filePath));
341
- const stat = await this.safeStat(filePath);
427
+ const stat = initialStat ?? (await this.safeStatWithCtime(filePath));
342
428
  if (!stat) {
343
429
  await this.deleteSourceDocument(sourceDoc);
344
430
  this.fileStates.delete(sourceDoc);
@@ -349,14 +435,18 @@ class DirectoryMarkdownSourceAdapter {
349
435
  if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
350
436
  return "unchanged";
351
437
  }
352
- const bytes = await this.safeReadFile(filePath);
353
- if (!bytes) {
438
+ const maxBytes = this.maxTokensPerFile * 4 + 3;
439
+ const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
440
+ if (streamed === "too_large") {
441
+ return "skipped";
442
+ }
443
+ if (!streamed) {
354
444
  await this.deleteSourceDocument(sourceDoc);
355
445
  this.fileStates.delete(sourceDoc);
356
446
  this.snapshotDirty = true;
357
447
  return "deleted";
358
448
  }
359
- const fileHash = hashBytes(bytes);
449
+ const { text, fileHash } = streamed;
360
450
  if (cached && cached.fileHash === fileHash) {
361
451
  this.setFileState(sourceDoc, {
362
452
  root: rootState.root,
@@ -368,14 +458,13 @@ class DirectoryMarkdownSourceAdapter {
368
458
  });
369
459
  return "unchanged";
370
460
  }
371
- const text = textDecoder.decode(bytes);
372
461
  if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
373
462
  await this.deleteSourceDocument(sourceDoc);
374
463
  this.fileStates.delete(sourceDoc);
375
464
  this.snapshotDirty = true;
376
465
  return "skipped";
377
466
  }
378
- await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
467
+ await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
379
468
  this.setFileState(sourceDoc, {
380
469
  root: rootState.root,
381
470
  sourceDoc,
@@ -390,9 +479,9 @@ class DirectoryMarkdownSourceAdapter {
390
479
  this.fileStates.set(sourceDoc, state);
391
480
  this.snapshotDirty = true;
392
481
  }
393
- async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
482
+ async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
394
483
  const queue = await this.getIngestQueue();
395
- await queue.enqueueIngest(sourceDoc, text, {
484
+ const feedback = await queue.enqueueIngest(sourceDoc, text, {
396
485
  tokenizerId: this.tokenizerId,
397
486
  coreDoc: this.coreDoc,
398
487
  sourceMeta: {
@@ -402,10 +491,44 @@ class DirectoryMarkdownSourceAdapter {
402
491
  fileHash,
403
492
  sourceSize,
404
493
  sourceMtimeMs: Math.trunc(sourceMtimeMs),
494
+ sourceCtimeMs: Math.trunc(sourceCtimeMs),
405
495
  ingestVersion: MARKDOWN_INGEST_VERSION,
406
496
  hashBackend: HASH_BACKEND,
407
497
  },
408
- });
498
+ }, this.lastTokenBurstLimit);
499
+ this.applyIngestFeedback(feedback);
500
+ }
501
+ applyIngestFeedback(feedback) {
502
+ if (feedback && typeof feedback.acceptMore === "boolean") {
503
+ this.lastAcceptMore = feedback.acceptMore;
504
+ this.lastQueueDepth = feedback.queueDepth ?? 0;
505
+ this.lastQueueCapacity = feedback.queueCapacity ?? 0;
506
+ this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
507
+ this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
508
+ this.lastNodesRejected = feedback.nodesRejected ?? 0;
509
+ this.lastTokensIngested = feedback.tokensIngested ?? 0;
510
+ if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
511
+ this.lastTokenBurstLimit = feedback.tokenBurstLimit;
512
+ }
513
+ this.lastWalDepth = feedback.walDepth ?? 0;
514
+ this.lastWalCapacity = feedback.walCapacity ?? 0;
515
+ if (feedback.acceptMore) {
516
+ this.lastRetryAfterMs = 0;
517
+ }
518
+ else {
519
+ this.lastRetryAfterMs = feedback.retryAfterMs || 1000;
520
+ }
521
+ }
522
+ else {
523
+ this.lastAcceptMore = true;
524
+ this.lastRetryAfterMs = 0;
525
+ this.lastQueueDepth = 0;
526
+ this.lastQueueCapacity = 0;
527
+ this.lastProcessingTimeUs = 0;
528
+ this.lastNodesAccepted = 0;
529
+ this.lastNodesRejected = 0;
530
+ this.lastTokensIngested = 0;
531
+ }
409
532
  }
410
533
  async deleteSourceDocument(sourceDoc) {
411
534
  const queue = await this.getIngestQueue();
@@ -414,7 +537,9 @@ class DirectoryMarkdownSourceAdapter {
414
537
  async getIngestQueue() {
415
538
  if (!this.ingestQueue) {
416
539
  const rpc = await this.getRpc();
417
- this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger);
540
+ this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
541
+ onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback),
542
+ });
418
543
  }
419
544
  return this.ingestQueue;
420
545
  }
@@ -426,14 +551,51 @@ class DirectoryMarkdownSourceAdapter {
426
551
  return null;
427
552
  }
428
553
  }
429
- async safeReadFile(filePath) {
554
+ async safeStatWithCtime(filePath) {
430
555
  try {
431
- return await this.fsApi.readFile(filePath);
556
+ return await this.fsApi.stat(filePath);
432
557
  }
433
558
  catch {
434
559
  return null;
435
560
  }
436
561
  }
562
+ async safeReadFileStreamed(filePath, maxBytes) {
563
+ let stream = null;
564
+ try {
565
+ stream = await this.fsApi.openReadStream(filePath);
566
+ const decoder = new TextDecoder();
567
+ const chunks = [];
568
+ let hash = 0xcbf29ce484222325n;
569
+ let total = 0;
570
+ const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
571
+ while (true) {
572
+ const { bytesRead } = await stream.read(buffer);
573
+ if (bytesRead === 0) {
574
+ break;
575
+ }
576
+ total += bytesRead;
577
+ if (total > maxBytes) {
578
+ return "too_large";
579
+ }
580
+ const chunk = buffer.subarray(0, bytesRead);
581
+ hash = updateFnv1a64(hash, chunk);
582
+ chunks.push(decoder.decode(chunk, { stream: true }));
583
+ }
584
+ chunks.push(decoder.decode());
585
+ return {
586
+ text: chunks.join(""),
587
+ fileHash: hash.toString(16).padStart(16, "0"),
588
+ };
589
+ }
590
+ catch {
591
+ return null;
592
+ }
593
+ finally {
594
+ if (stream) {
595
+ await stream.close().catch(() => { });
596
+ }
597
+ }
598
+ }
437
599
  snapshotFilesForRoot(root) {
438
600
  const files = new Set();
439
601
  for (const state of this.fileStates.values()) {
@@ -497,7 +659,7 @@ class DirectoryMarkdownSourceAdapter {
497
659
  }
498
660
  }
499
661
  logScanStats(root, stats, durationMs) {
500
- this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`);
662
+ this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`);
501
663
  }
502
664
  }
503
665
  function createScanStats() {
@@ -511,8 +673,26 @@ function createScanStats() {
511
673
  filesIngested: 0,
512
674
  filesDeleted: 0,
513
675
  syncErrors: 0,
676
+ filesDeferred: 0,
514
677
  };
515
678
  }
679
+ function estimateTokens(size) {
680
+ return Math.max(1, Math.floor(size / 4));
681
+ }
682
+ function sortCandidates(candidates, mode) {
683
+ return [...candidates].sort((left, right) => {
684
+ if (mode === "size") {
685
+ return right.size - left.size || left.ordinal - right.ordinal;
686
+ }
687
+ if (mode === "ctime") {
688
+ return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
689
+ }
690
+ if (mode === "fifo") {
691
+ return left.ordinal - right.ordinal;
692
+ }
693
+ return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
694
+ });
695
+ }
516
696
  function recordSyncResult(stats, result) {
517
697
  if (result === "ingested") {
518
698
  stats.filesIngested++;
@@ -530,7 +710,6 @@ function recordSyncResult(stats, result) {
530
710
  function toPosixPath(value) {
531
711
  return value.split(path.sep).join("/");
532
712
  }
533
- const textDecoder = new TextDecoder();
534
713
  function normalizeMarkdownRoots(roots) {
535
714
  if (!roots?.length) {
536
715
  return [];
@@ -545,6 +724,15 @@ function normalizeMarkdownRoots(roots) {
545
724
  }
546
725
  return [...resolved];
547
726
  }
727
+ function updateFnv1a64(seed, bytes) {
728
+ let hash = seed;
729
+ const prime = 0x100000001b3n;
730
+ for (let i = 0; i < bytes.length; i++) {
731
+ hash ^= BigInt(bytes[i] ?? 0);
732
+ hash = BigInt.asUintN(64, hash * prime);
733
+ }
734
+ return hash;
735
+ }
548
736
  function resolveMarkdownSnapshotPath(kind, configuredPath) {
549
737
  const trimmed = configuredPath?.trim();
550
738
  if (trimmed) {
@@ -561,10 +749,22 @@ function createRealFsApi() {
561
749
  readdir: async (dir) => fsp.readdir(dir, { withFileTypes: true }),
562
750
  readFile: async (file) => fsp.readFile(file),
563
751
  stat: async (file) => {
564
- const stat = await fsp.stat(file);
565
- return { size: stat.size, mtimeMs: stat.mtimeMs };
752
+ const s = await fsp.stat(file);
753
+ return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
566
754
  },
567
755
  watch: (dir, onChange) => fs.watch(dir, onChange),
756
+ openReadStream: async (file) => {
757
+ const handle = await fsp.open(file, "r");
758
+ return {
759
+ read: async (buffer) => {
760
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
761
+ return { bytesRead };
762
+ },
763
+ close: async () => {
764
+ await handle.close();
765
+ },
766
+ };
767
+ },
568
768
  };
569
769
  }
570
770
  function isMarkdownFile(fileName) {
package/dist/types.d.ts CHANGED
@@ -48,6 +48,8 @@ export interface PluginConfig {
48
48
  markdownIngestionInclude?: string[];
49
49
  markdownIngestionExclude?: string[];
50
50
  markdownIngestionDebounceMs?: number;
51
+ markdownIngestionPriorityMode?: "mtime" | "ctime" | "size" | "fifo";
52
+ markdownIngestionMaxTokensPerFile?: number;
51
53
  markdownIngestionSnapshotPath?: string;
52
54
  markdownIngestionObsidianSnapshotPath?: string;
53
55
  dreamPromotionEnabled?: boolean;
@@ -2,7 +2,7 @@
2
2
  "id": "libravdb-memory",
3
3
  "name": "LibraVDB Memory",
4
4
  "description": "Persistent vector memory with three-tier hybrid scoring",
5
- "version": "1.5.4",
5
+ "version": "1.5.5",
6
6
  "kind": [
7
7
  "memory",
8
8
  "context-engine"
@@ -263,6 +263,20 @@
263
263
  "type": "number",
264
264
  "default": 150
265
265
  },
266
+ "markdownIngestionPriorityMode": {
267
+ "type": "string",
268
+ "enum": [
269
+ "mtime",
270
+ "ctime",
271
+ "size",
272
+ "fifo"
273
+ ],
274
+ "default": "mtime"
275
+ },
276
+ "markdownIngestionMaxTokensPerFile": {
277
+ "type": "number",
278
+ "default": 128000
279
+ },
266
280
  "markdownIngestionSnapshotPath": {
267
281
  "type": "string"
268
282
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdarkicex/openclaw-memory-libravdb",
3
- "version": "1.5.4",
3
+ "version": "1.5.5",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",