@xdarkicex/openclaw-memory-libravdb 1.5.3 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -559,6 +559,19 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
559
559
  : {}),
560
560
  };
561
561
  }
562
+ function isGrpcAuthConfigured() {
563
+ const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
564
+ const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
565
+ return (typeof secret === "string" && secret.length > 0) || (typeof secretFile === "string" && secretFile.length > 0);
566
+ }
567
+ function buildGrpcAuthInitializationError(error) {
568
+ const code = typeof error?.code === "number" ||
569
+ typeof error?.code === "string"
570
+ ? ` code=${String(error.code)}`
571
+ : "";
572
+ return new Error(`LibraVDB gRPC auth initialization failed${code}; ` +
573
+ `check LIBRAVDB_AUTH_SECRET and daemon auth configuration`);
574
+ }
562
575
  async function runCompaction(args) {
563
576
  const request = buildCompactSessionRequest(args);
564
577
  const kernel = await getKernelOrNull("compact");
@@ -644,7 +657,10 @@ export function buildContextEngineFactory(runtime, cfg, logger = console) {
644
657
  });
645
658
  }
646
659
  catch (error) {
647
- // Proceed even if initialize session fails or doesn't return nonce if secret optional
660
+ if (isGrpcAuthConfigured()) {
661
+ throw buildGrpcAuthInitializationError(error);
662
+ }
663
+ // Proceed when the kernel does not require auth and the init call is unavailable.
648
664
  }
649
665
  return await kernel.bootstrapSession({
650
666
  sessionId,
package/dist/index.js CHANGED
@@ -839,14 +839,14 @@ var require_binary_encoding = __commonJS({
839
839
  };
840
840
  exports2.BinaryWriter = BinaryWriter;
841
841
  var BinaryReader = class {
842
- constructor(buf, textDecoder3) {
842
+ constructor(buf, textDecoder2) {
843
843
  this.varint64 = varint_js_1.varint64read;
844
844
  this.uint32 = varint_js_1.varint32read;
845
845
  this.buf = buf;
846
846
  this.len = buf.length;
847
847
  this.pos = 0;
848
848
  this.view = new DataView(buf.buffer, buf.byteOffset, buf.byteLength);
849
- this.textDecoder = textDecoder3 !== null && textDecoder3 !== void 0 ? textDecoder3 : new TextDecoder();
849
+ this.textDecoder = textDecoder2 !== null && textDecoder2 !== void 0 ? textDecoder2 : new TextDecoder();
850
850
  }
851
851
  /**
852
852
  * Reads a tag - field number and wire type.
@@ -34240,6 +34240,17 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
34240
34240
  ...typeof cfg.continuityPriorContextTokens === "number" ? { continuityPriorContextTokens: cfg.continuityPriorContextTokens } : {}
34241
34241
  };
34242
34242
  }
34243
+ function isGrpcAuthConfigured() {
34244
+ const secret = process.env.LIBRAVDB_AUTH_SECRET?.trim();
34245
+ const secretFile = process.env.LIBRAVDB_AUTH_SECRET_FILE?.trim();
34246
+ return typeof secret === "string" && secret.length > 0 || typeof secretFile === "string" && secretFile.length > 0;
34247
+ }
34248
+ function buildGrpcAuthInitializationError(error) {
34249
+ const code = typeof error?.code === "number" || typeof error?.code === "string" ? ` code=${String(error.code)}` : "";
34250
+ return new Error(
34251
+ `LibraVDB gRPC auth initialization failed${code}; check LIBRAVDB_AUTH_SECRET and daemon auth configuration`
34252
+ );
34253
+ }
34243
34254
  async function runCompaction(args) {
34244
34255
  const request = buildCompactSessionRequest(args);
34245
34256
  const kernel = await getKernelOrNull("compact");
@@ -34322,6 +34333,9 @@ function buildContextEngineFactory(runtime, cfg, logger = console) {
34322
34333
  clientCapabilities: [{ name: "grpc", version: "1.0" }]
34323
34334
  });
34324
34335
  } catch (error) {
34336
+ if (isGrpcAuthConfigured()) {
34337
+ throw buildGrpcAuthInitializationError(error);
34338
+ }
34325
34339
  }
34326
34340
  return await kernel.bootstrapSession({
34327
34341
  sessionId,
@@ -38523,37 +38537,55 @@ var IngestQueue = class {
38523
38537
  this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
38524
38538
  }
38525
38539
  }
38526
- async enqueueIngest(sourceDoc, text, baseParams) {
38540
+ async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
38527
38541
  if (this.options.chunkTokens === Infinity) {
38528
- return this.ingestWithRetry({
38542
+ const resp = await this.ingestWithRetry({
38529
38543
  ...baseParams,
38530
38544
  sourceDoc,
38531
38545
  text,
38532
38546
  mode: IngestMode.REPLACE
38533
38547
  });
38534
- }
38535
- const chunks = splitIntoChunks(text, this.options.chunkTokens);
38536
- if (chunks.length === 1) {
38537
- return this.ingestWithRetry({
38538
- ...baseParams,
38539
- sourceDoc,
38540
- text: chunks[0].text,
38541
- mode: IngestMode.REPLACE
38542
- });
38543
- }
38544
- for (let i = 0; i < chunks.length; i++) {
38545
- const isFirst = i === 0;
38548
+ return resp.feedback;
38549
+ }
38550
+ let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
38551
+ let offset = 0;
38552
+ let isFirst = true;
38553
+ let lastFeedback;
38554
+ while (offset < text.length) {
38555
+ const remainingText = text.slice(offset);
38556
+ const chunks = splitIntoChunks(remainingText, currentLimit);
38557
+ const chunkText = chunks[0].text;
38546
38558
  const chunkParams = {
38547
38559
  ...baseParams,
38548
38560
  sourceDoc,
38549
- text: chunks[i].text,
38561
+ text: chunkText,
38550
38562
  mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND
38551
38563
  };
38552
- await this.ingestWithRetry(chunkParams);
38564
+ const resp = await this.ingestWithRetry(chunkParams);
38565
+ lastFeedback = resp.feedback;
38566
+ if (lastFeedback && lastFeedback.nodesAccepted === 0 && lastFeedback.tokenBurstLimit && lastFeedback.tokenBurstLimit > 0 && lastFeedback.tokenBurstLimit < currentLimit) {
38567
+ currentLimit = lastFeedback.tokenBurstLimit;
38568
+ continue;
38569
+ }
38570
+ if (lastFeedback && lastFeedback.nodesAccepted === 0) {
38571
+ this.logger.warn?.(
38572
+ `[ingest-queue] Chunk permanently rejected for ${sourceDoc} at offset=${offset} length=${chunkText.length} tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`
38573
+ );
38574
+ }
38575
+ if (this.options.onChunkFeedback && lastFeedback) {
38576
+ this.options.onChunkFeedback(lastFeedback);
38577
+ }
38578
+ offset += chunkText.length;
38579
+ isFirst = false;
38580
+ if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
38581
+ const delay = lastFeedback.retryAfterMs || 1e3;
38582
+ await new Promise((resolve) => setTimeout(resolve, delay));
38583
+ }
38553
38584
  }
38585
+ return lastFeedback;
38554
38586
  }
38555
38587
  async ingestWithRetry(params) {
38556
- await withRetry(
38588
+ return withRetry(
38557
38589
  () => this.rpcCall("ingest_markdown_document", params),
38558
38590
  this.options.maxRetries,
38559
38591
  this.options.retryBaseDelayMs,
@@ -38644,6 +38676,7 @@ var DEFAULT_DEBOUNCE_MS2 = 150;
38644
38676
  var DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
38645
38677
  var MARKDOWN_INGEST_VERSION = 3;
38646
38678
  var HASH_BACKEND = "wasm-fnv1a64";
38679
+ var STREAM_CHUNK_BYTES = 64 * 1024;
38647
38680
  function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi2()) {
38648
38681
  const adapters = [];
38649
38682
  const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
@@ -38656,7 +38689,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
38656
38689
  include: cfg.markdownIngestionInclude,
38657
38690
  exclude: cfg.markdownIngestionExclude,
38658
38691
  debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
38659
- snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath)
38692
+ snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
38693
+ priorityMode: cfg.markdownIngestionPriorityMode,
38694
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
38660
38695
  },
38661
38696
  getRpc,
38662
38697
  logger,
@@ -38674,7 +38709,9 @@ function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = cr
38674
38709
  include: cfg.markdownIngestionObsidianInclude,
38675
38710
  exclude: cfg.markdownIngestionObsidianExclude,
38676
38711
  debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS2,
38677
- snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath)
38712
+ snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
38713
+ priorityMode: cfg.markdownIngestionPriorityMode,
38714
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile
38678
38715
  },
38679
38716
  getRpc,
38680
38717
  logger,
@@ -38731,6 +38768,8 @@ var DirectoryMarkdownSourceAdapter = class {
38731
38768
  getRpc;
38732
38769
  logger;
38733
38770
  snapshotPath;
38771
+ priorityMode;
38772
+ maxTokensPerFile;
38734
38773
  states = /* @__PURE__ */ new Map();
38735
38774
  fileStates = /* @__PURE__ */ new Map();
38736
38775
  activeScans = /* @__PURE__ */ new Set();
@@ -38739,6 +38778,17 @@ var DirectoryMarkdownSourceAdapter = class {
38739
38778
  started = false;
38740
38779
  ingestQueue = null;
38741
38780
  stopping = false;
38781
+ lastAcceptMore = true;
38782
+ lastRetryAfterMs = 0;
38783
+ lastQueueDepth = 0;
38784
+ lastQueueCapacity = 0;
38785
+ lastProcessingTimeUs = 0;
38786
+ lastNodesAccepted = 0;
38787
+ lastNodesRejected = 0;
38788
+ lastTokensIngested = 0;
38789
+ lastTokenBurstLimit = 512;
38790
+ lastWalDepth = 0;
38791
+ lastWalCapacity = 0;
38742
38792
  snapshotLoaded = false;
38743
38793
  snapshotDirty = false;
38744
38794
  constructor(kind, config, getRpc, logger, fsApi) {
@@ -38751,6 +38801,8 @@ var DirectoryMarkdownSourceAdapter = class {
38751
38801
  this.getRpc = getRpc;
38752
38802
  this.logger = logger;
38753
38803
  this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
38804
+ this.priorityMode = config.priorityMode ?? "mtime";
38805
+ this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128e3));
38754
38806
  this.tokenizerId = DEFAULT_TOKENIZER_ID;
38755
38807
  this.coreDoc = true;
38756
38808
  }
@@ -38803,7 +38855,8 @@ var DirectoryMarkdownSourceAdapter = class {
38803
38855
  scanState: {
38804
38856
  scanning: false,
38805
38857
  dirty: false,
38806
- timer: null
38858
+ timer: null,
38859
+ resumeFromPath: null
38807
38860
  },
38808
38861
  knownFiles: this.snapshotFilesForRoot(resolved),
38809
38862
  directoryWatchers: /* @__PURE__ */ new Map()
@@ -38821,12 +38874,16 @@ var DirectoryMarkdownSourceAdapter = class {
38821
38874
  return;
38822
38875
  }
38823
38876
  rootState.scanState.scanning = true;
38877
+ this.lastAcceptMore = true;
38878
+ this.lastRetryAfterMs = 0;
38824
38879
  const scan = (async () => {
38825
38880
  const stats = createScanStats();
38826
38881
  const startedAt = Date.now();
38827
38882
  try {
38828
38883
  const currentFiles = /* @__PURE__ */ new Set();
38829
- await this.walkDirectory(rootState, rootState.root, currentFiles, stats);
38884
+ const candidates = [];
38885
+ await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
38886
+ await this.syncCandidates(rootState, candidates, stats);
38830
38887
  if (!this.stopping) {
38831
38888
  await this.pruneDeletedFiles(rootState, currentFiles, stats);
38832
38889
  rootState.knownFiles = currentFiles;
@@ -38850,7 +38907,7 @@ var DirectoryMarkdownSourceAdapter = class {
38850
38907
  this.activeScans.delete(scan);
38851
38908
  }
38852
38909
  }
38853
- scheduleRootScan(rootState) {
38910
+ scheduleRootScan(rootState, delayMs) {
38854
38911
  if (!this.started || this.stopping) {
38855
38912
  return;
38856
38913
  }
@@ -38866,9 +38923,9 @@ var DirectoryMarkdownSourceAdapter = class {
38866
38923
  void this.scanRoot(rootState.root).catch((error) => {
38867
38924
  this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
38868
38925
  });
38869
- }, this.debounceMs);
38926
+ }, Math.max(this.debounceMs, delayMs ?? 0));
38870
38927
  }
38871
- async walkDirectory(rootState, dir, currentFiles, stats) {
38928
+ async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
38872
38929
  if (this.shouldPruneDirectory(rootState.root, dir)) {
38873
38930
  stats.directoriesPruned++;
38874
38931
  return;
@@ -38891,7 +38948,7 @@ var DirectoryMarkdownSourceAdapter = class {
38891
38948
  }
38892
38949
  const child = path2.join(dir, entry.name);
38893
38950
  if (entry.isDirectory()) {
38894
- await this.walkDirectory(rootState, child, currentFiles, stats);
38951
+ await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
38895
38952
  continue;
38896
38953
  }
38897
38954
  if (!entry.isFile() || !isMarkdownFile(entry.name)) {
@@ -38904,16 +38961,71 @@ var DirectoryMarkdownSourceAdapter = class {
38904
38961
  }
38905
38962
  stats.filesIncluded++;
38906
38963
  currentFiles.add(child);
38964
+ const stat = await this.safeStatWithCtime(child);
38965
+ if (!stat) {
38966
+ continue;
38967
+ }
38968
+ candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
38969
+ }
38970
+ }
38971
+ async syncCandidates(rootState, candidates, stats) {
38972
+ const sorted = sortCandidates(candidates, this.priorityMode);
38973
+ let skipping = false;
38974
+ if (rootState.scanState.resumeFromPath) {
38975
+ const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
38976
+ if (targetExists) {
38977
+ skipping = true;
38978
+ this.lastAcceptMore = true;
38979
+ this.lastRetryAfterMs = 0;
38980
+ } else {
38981
+ rootState.scanState.resumeFromPath = null;
38982
+ }
38983
+ }
38984
+ for (const candidate of sorted) {
38985
+ if (skipping) {
38986
+ if (candidate.path === rootState.scanState.resumeFromPath) {
38987
+ skipping = false;
38988
+ } else {
38989
+ continue;
38990
+ }
38991
+ }
38992
+ if (this.stopping) {
38993
+ return;
38994
+ }
38995
+ if (!this.lastAcceptMore) {
38996
+ if (!this.stopping) {
38997
+ rootState.scanState.resumeFromPath = candidate.path;
38998
+ this.scheduleRootScan(rootState, this.lastRetryAfterMs);
38999
+ }
39000
+ return;
39001
+ }
39002
+ if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
39003
+ rootState.scanState.resumeFromPath = candidate.path;
39004
+ if (!this.stopping) {
39005
+ this.scheduleRootScan(rootState, 2e3);
39006
+ }
39007
+ return;
39008
+ }
39009
+ const estimatedTokens = estimateTokens(candidate.size);
39010
+ if (estimatedTokens > this.maxTokensPerFile) {
39011
+ stats.filesDeferred++;
39012
+ continue;
39013
+ }
38907
39014
  try {
38908
- const result = await this.syncMarkdownFile(rootState, child);
39015
+ const result = await this.syncMarkdownFile(rootState, candidate.path, {
39016
+ size: candidate.size,
39017
+ mtimeMs: candidate.mtimeMs,
39018
+ ctimeMs: candidate.ctimeMs
39019
+ });
38909
39020
  recordSyncResult(stats, result);
38910
39021
  } catch (error) {
38911
39022
  stats.syncErrors++;
38912
39023
  if (!this.stopping) {
38913
- this.logger.warn?.(`[markdown-ingest] sync failed for ${child}: ${formatError(error)}`);
39024
+ this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
38914
39025
  }
38915
39026
  }
38916
39027
  }
39028
+ rootState.scanState.resumeFromPath = null;
38917
39029
  }
38918
39030
  shouldPruneDirectory(root, dir) {
38919
39031
  const relative = toPosixPath(path2.relative(root, dir));
@@ -38934,6 +39046,11 @@ var DirectoryMarkdownSourceAdapter = class {
38934
39046
  try {
38935
39047
  const watcher = this.fsApi.watch(dir, () => {
38936
39048
  if (!this.stopping) {
39049
+ rootState.scanState.resumeFromPath = null;
39050
+ if (rootState.scanState.timer) {
39051
+ clearTimeout(rootState.scanState.timer);
39052
+ rootState.scanState.timer = null;
39053
+ }
38937
39054
  this.scheduleRootScan(rootState);
38938
39055
  }
38939
39056
  });
@@ -38984,10 +39101,10 @@ var DirectoryMarkdownSourceAdapter = class {
38984
39101
  stats.filesDeleted++;
38985
39102
  }
38986
39103
  }
38987
- async syncMarkdownFile(rootState, filePath) {
39104
+ async syncMarkdownFile(rootState, filePath, initialStat) {
38988
39105
  const sourceDoc = filePath;
38989
39106
  const relativePath = toPosixPath(path2.relative(rootState.root, filePath));
38990
- const stat = await this.safeStat(filePath);
39107
+ const stat = initialStat ?? await this.safeStatWithCtime(filePath);
38991
39108
  if (!stat) {
38992
39109
  await this.deleteSourceDocument(sourceDoc);
38993
39110
  this.fileStates.delete(sourceDoc);
@@ -38998,14 +39115,18 @@ var DirectoryMarkdownSourceAdapter = class {
38998
39115
  if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
38999
39116
  return "unchanged";
39000
39117
  }
39001
- const bytes = await this.safeReadFile(filePath);
39002
- if (!bytes) {
39118
+ const maxBytes = this.maxTokensPerFile * 4 + 3;
39119
+ const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
39120
+ if (streamed === "too_large") {
39121
+ return "skipped";
39122
+ }
39123
+ if (!streamed) {
39003
39124
  await this.deleteSourceDocument(sourceDoc);
39004
39125
  this.fileStates.delete(sourceDoc);
39005
39126
  this.snapshotDirty = true;
39006
39127
  return "deleted";
39007
39128
  }
39008
- const fileHash = hashBytes(bytes);
39129
+ const { text, fileHash } = streamed;
39009
39130
  if (cached && cached.fileHash === fileHash) {
39010
39131
  this.setFileState(sourceDoc, {
39011
39132
  root: rootState.root,
@@ -39017,14 +39138,13 @@ var DirectoryMarkdownSourceAdapter = class {
39017
39138
  });
39018
39139
  return "unchanged";
39019
39140
  }
39020
- const text = textDecoder2.decode(bytes);
39021
39141
  if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
39022
39142
  await this.deleteSourceDocument(sourceDoc);
39023
39143
  this.fileStates.delete(sourceDoc);
39024
39144
  this.snapshotDirty = true;
39025
39145
  return "skipped";
39026
39146
  }
39027
- await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
39147
+ await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
39028
39148
  this.setFileState(sourceDoc, {
39029
39149
  root: rootState.root,
39030
39150
  sourceDoc,
@@ -39039,9 +39159,9 @@ var DirectoryMarkdownSourceAdapter = class {
39039
39159
  this.fileStates.set(sourceDoc, state);
39040
39160
  this.snapshotDirty = true;
39041
39161
  }
39042
- async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
39162
+ async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
39043
39163
  const queue = await this.getIngestQueue();
39044
- await queue.enqueueIngest(
39164
+ const feedback = await queue.enqueueIngest(
39045
39165
  sourceDoc,
39046
39166
  text,
39047
39167
  {
@@ -39054,11 +39174,44 @@ var DirectoryMarkdownSourceAdapter = class {
39054
39174
  fileHash,
39055
39175
  sourceSize,
39056
39176
  sourceMtimeMs: Math.trunc(sourceMtimeMs),
39177
+ sourceCtimeMs: Math.trunc(sourceCtimeMs),
39057
39178
  ingestVersion: MARKDOWN_INGEST_VERSION,
39058
39179
  hashBackend: HASH_BACKEND
39059
39180
  }
39060
- }
39181
+ },
39182
+ this.lastTokenBurstLimit
39061
39183
  );
39184
+ this.applyIngestFeedback(feedback);
39185
+ }
39186
+ applyIngestFeedback(feedback) {
39187
+ if (feedback && typeof feedback.acceptMore === "boolean") {
39188
+ this.lastAcceptMore = feedback.acceptMore;
39189
+ this.lastQueueDepth = feedback.queueDepth ?? 0;
39190
+ this.lastQueueCapacity = feedback.queueCapacity ?? 0;
39191
+ this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
39192
+ this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
39193
+ this.lastNodesRejected = feedback.nodesRejected ?? 0;
39194
+ this.lastTokensIngested = feedback.tokensIngested ?? 0;
39195
+ if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
39196
+ this.lastTokenBurstLimit = feedback.tokenBurstLimit;
39197
+ }
39198
+ this.lastWalDepth = feedback.walDepth ?? 0;
39199
+ this.lastWalCapacity = feedback.walCapacity ?? 0;
39200
+ if (feedback.acceptMore) {
39201
+ this.lastRetryAfterMs = 0;
39202
+ } else {
39203
+ this.lastRetryAfterMs = feedback.retryAfterMs || 1e3;
39204
+ }
39205
+ } else {
39206
+ this.lastAcceptMore = true;
39207
+ this.lastRetryAfterMs = 0;
39208
+ this.lastQueueDepth = 0;
39209
+ this.lastQueueCapacity = 0;
39210
+ this.lastProcessingTimeUs = 0;
39211
+ this.lastNodesAccepted = 0;
39212
+ this.lastNodesRejected = 0;
39213
+ this.lastTokensIngested = 0;
39214
+ }
39062
39215
  }
39063
39216
  async deleteSourceDocument(sourceDoc) {
39064
39217
  const queue = await this.getIngestQueue();
@@ -39067,7 +39220,9 @@ var DirectoryMarkdownSourceAdapter = class {
39067
39220
  async getIngestQueue() {
39068
39221
  if (!this.ingestQueue) {
39069
39222
  const rpc = await this.getRpc();
39070
- this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger);
39223
+ this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
39224
+ onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback)
39225
+ });
39071
39226
  }
39072
39227
  return this.ingestQueue;
39073
39228
  }
@@ -39078,13 +39233,49 @@ var DirectoryMarkdownSourceAdapter = class {
39078
39233
  return null;
39079
39234
  }
39080
39235
  }
39081
- async safeReadFile(filePath) {
39236
+ async safeStatWithCtime(filePath) {
39082
39237
  try {
39083
- return await this.fsApi.readFile(filePath);
39238
+ return await this.fsApi.stat(filePath);
39084
39239
  } catch {
39085
39240
  return null;
39086
39241
  }
39087
39242
  }
39243
+ async safeReadFileStreamed(filePath, maxBytes) {
39244
+ let stream = null;
39245
+ try {
39246
+ stream = await this.fsApi.openReadStream(filePath);
39247
+ const decoder = new TextDecoder();
39248
+ const chunks = [];
39249
+ let hash = 0xcbf29ce484222325n;
39250
+ let total = 0;
39251
+ const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
39252
+ while (true) {
39253
+ const { bytesRead } = await stream.read(buffer);
39254
+ if (bytesRead === 0) {
39255
+ break;
39256
+ }
39257
+ total += bytesRead;
39258
+ if (total > maxBytes) {
39259
+ return "too_large";
39260
+ }
39261
+ const chunk = buffer.subarray(0, bytesRead);
39262
+ hash = updateFnv1a64(hash, chunk);
39263
+ chunks.push(decoder.decode(chunk, { stream: true }));
39264
+ }
39265
+ chunks.push(decoder.decode());
39266
+ return {
39267
+ text: chunks.join(""),
39268
+ fileHash: hash.toString(16).padStart(16, "0")
39269
+ };
39270
+ } catch {
39271
+ return null;
39272
+ } finally {
39273
+ if (stream) {
39274
+ await stream.close().catch(() => {
39275
+ });
39276
+ }
39277
+ }
39278
+ }
39088
39279
  snapshotFilesForRoot(root) {
39089
39280
  const files = /* @__PURE__ */ new Set();
39090
39281
  for (const state of this.fileStates.values()) {
@@ -39147,7 +39338,7 @@ var DirectoryMarkdownSourceAdapter = class {
39147
39338
  }
39148
39339
  logScanStats(root, stats, durationMs) {
39149
39340
  this.logger.info?.(
39150
- `[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`
39341
+ `[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`
39151
39342
  );
39152
39343
  }
39153
39344
  };
@@ -39161,9 +39352,27 @@ function createScanStats() {
39161
39352
  filesUnchanged: 0,
39162
39353
  filesIngested: 0,
39163
39354
  filesDeleted: 0,
39164
- syncErrors: 0
39355
+ syncErrors: 0,
39356
+ filesDeferred: 0
39165
39357
  };
39166
39358
  }
39359
+ function estimateTokens(size) {
39360
+ return Math.max(1, Math.floor(size / 4));
39361
+ }
39362
+ function sortCandidates(candidates, mode) {
39363
+ return [...candidates].sort((left, right) => {
39364
+ if (mode === "size") {
39365
+ return right.size - left.size || left.ordinal - right.ordinal;
39366
+ }
39367
+ if (mode === "ctime") {
39368
+ return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
39369
+ }
39370
+ if (mode === "fifo") {
39371
+ return left.ordinal - right.ordinal;
39372
+ }
39373
+ return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
39374
+ });
39375
+ }
39167
39376
  function recordSyncResult(stats, result) {
39168
39377
  if (result === "ingested") {
39169
39378
  stats.filesIngested++;
@@ -39178,7 +39387,6 @@ function recordSyncResult(stats, result) {
39178
39387
  function toPosixPath(value) {
39179
39388
  return value.split(path2.sep).join("/");
39180
39389
  }
39181
- var textDecoder2 = new TextDecoder();
39182
39390
  function normalizeMarkdownRoots(roots) {
39183
39391
  if (!roots?.length) {
39184
39392
  return [];
@@ -39193,6 +39401,15 @@ function normalizeMarkdownRoots(roots) {
39193
39401
  }
39194
39402
  return [...resolved];
39195
39403
  }
39404
+ function updateFnv1a64(seed, bytes) {
39405
+ let hash = seed;
39406
+ const prime = 0x100000001b3n;
39407
+ for (let i = 0; i < bytes.length; i++) {
39408
+ hash ^= BigInt(bytes[i] ?? 0);
39409
+ hash = BigInt.asUintN(64, hash * prime);
39410
+ }
39411
+ return hash;
39412
+ }
39196
39413
  function resolveMarkdownSnapshotPath(kind, configuredPath) {
39197
39414
  const trimmed = configuredPath?.trim();
39198
39415
  if (trimmed) {
@@ -39209,10 +39426,22 @@ function createRealFsApi2() {
39209
39426
  readdir: async (dir) => fsp2.readdir(dir, { withFileTypes: true }),
39210
39427
  readFile: async (file) => fsp2.readFile(file),
39211
39428
  stat: async (file) => {
39212
- const stat = await fsp2.stat(file);
39213
- return { size: stat.size, mtimeMs: stat.mtimeMs };
39429
+ const s = await fsp2.stat(file);
39430
+ return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
39214
39431
  },
39215
- watch: (dir, onChange) => fs2.watch(dir, onChange)
39432
+ watch: (dir, onChange) => fs2.watch(dir, onChange),
39433
+ openReadStream: async (file) => {
39434
+ const handle = await fsp2.open(file, "r");
39435
+ return {
39436
+ read: async (buffer) => {
39437
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
39438
+ return { bytesRead };
39439
+ },
39440
+ close: async () => {
39441
+ await handle.close();
39442
+ }
39443
+ };
39444
+ }
39216
39445
  };
39217
39446
  }
39218
39447
  function isMarkdownFile(fileName) {
@@ -7,6 +7,8 @@ export interface IngestQueueOptions {
7
7
  retryBaseDelayMs: number;
8
8
  /** Max retries per chunk. */
9
9
  maxRetries: number;
10
+ /** Called after each chunk is accepted so scan-level state stays current. */
11
+ onChunkFeedback?: (feedback: IngestFeedback) => void;
10
12
  }
11
13
  interface IngestMarkdownDocumentParams {
12
14
  sourceDoc: string;
@@ -20,11 +22,25 @@ interface IngestMarkdownDocumentParams {
20
22
  fileHash: string;
21
23
  sourceSize: number;
22
24
  sourceMtimeMs: number;
25
+ sourceCtimeMs: number;
23
26
  ingestVersion: number;
24
27
  hashBackend: string;
25
28
  };
26
29
  mode?: IngestMode;
27
30
  }
31
+ interface IngestFeedback {
32
+ queueDepth: number;
33
+ queueCapacity: number;
34
+ acceptMore: boolean;
35
+ retryAfterMs: number;
36
+ processingTimeUs: number;
37
+ nodesAccepted: number;
38
+ nodesRejected: number;
39
+ tokensIngested: number;
40
+ tokenBurstLimit: number;
41
+ walDepth?: number;
42
+ walCapacity?: number;
43
+ }
28
44
  export declare class IngestQueue {
29
45
  private readonly queue;
30
46
  private readonly rpcCall;
@@ -32,7 +48,7 @@ export declare class IngestQueue {
32
48
  private readonly options;
33
49
  private running;
34
50
  constructor(rpcCall: <T>(method: string, params: unknown) => Promise<T>, logger: LoggerLike, options?: Partial<IngestQueueOptions>);
35
- enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">): Promise<void>;
51
+ enqueueIngest(sourceDoc: string, text: string, baseParams: Omit<IngestMarkdownDocumentParams, "sourceDoc" | "text" | "mode">, maxChunkTokens?: number): Promise<IngestFeedback | undefined>;
36
52
  private ingestWithRetry;
37
53
  enqueueDelete(sourceDoc: string): Promise<void>;
38
54
  }
@@ -19,40 +19,59 @@ export class IngestQueue {
19
19
  this.options.chunkTokens = DEFAULT_OPTIONS.chunkTokens;
20
20
  }
21
21
  }
22
- async enqueueIngest(sourceDoc, text, baseParams) {
22
+ async enqueueIngest(sourceDoc, text, baseParams, maxChunkTokens) {
23
23
  if (this.options.chunkTokens === Infinity) {
24
- // Retry-only mode: send full text as single chunk
25
- return this.ingestWithRetry({
24
+ const resp = await this.ingestWithRetry({
26
25
  ...baseParams,
27
26
  sourceDoc,
28
27
  text,
29
28
  mode: IngestMode.REPLACE,
30
29
  });
30
+ return resp.feedback;
31
31
  }
32
- const chunks = splitIntoChunks(text, this.options.chunkTokens);
33
- if (chunks.length === 1) {
34
- return this.ingestWithRetry({
35
- ...baseParams,
36
- sourceDoc,
37
- text: chunks[0].text,
38
- mode: IngestMode.REPLACE,
39
- });
40
- }
41
- // Multiple chunks: clear the source once, then append the remaining chunks.
42
- // Sending REPLACE last deletes the earlier chunks from the same source_doc.
43
- for (let i = 0; i < chunks.length; i++) {
44
- const isFirst = i === 0;
32
+ let currentLimit = maxChunkTokens && maxChunkTokens > 0 ? maxChunkTokens : this.options.chunkTokens;
33
+ let offset = 0;
34
+ let isFirst = true;
35
+ let lastFeedback;
36
+ while (offset < text.length) {
37
+ const remainingText = text.slice(offset);
38
+ const chunks = splitIntoChunks(remainingText, currentLimit);
39
+ const chunkText = chunks[0].text;
45
40
  const chunkParams = {
46
41
  ...baseParams,
47
42
  sourceDoc,
48
- text: chunks[i].text,
43
+ text: chunkText,
49
44
  mode: isFirst ? IngestMode.REPLACE : IngestMode.APPEND,
50
45
  };
51
- await this.ingestWithRetry(chunkParams);
46
+ const resp = await this.ingestWithRetry(chunkParams);
47
+ lastFeedback = resp.feedback;
48
+ if (lastFeedback &&
49
+ lastFeedback.nodesAccepted === 0 &&
50
+ lastFeedback.tokenBurstLimit &&
51
+ lastFeedback.tokenBurstLimit > 0 &&
52
+ lastFeedback.tokenBurstLimit < currentLimit) {
53
+ currentLimit = lastFeedback.tokenBurstLimit;
54
+ continue;
55
+ }
56
+ if (lastFeedback && lastFeedback.nodesAccepted === 0) {
57
+ this.logger.warn?.(`[ingest-queue] Chunk permanently rejected for ${sourceDoc} ` +
58
+ `at offset=${offset} length=${chunkText.length} ` +
59
+ `tokenBurstLimit=${lastFeedback.tokenBurstLimit ?? "unset"}`);
60
+ }
61
+ if (this.options.onChunkFeedback && lastFeedback) {
62
+ this.options.onChunkFeedback(lastFeedback);
63
+ }
64
+ offset += chunkText.length;
65
+ isFirst = false;
66
+ if (lastFeedback && !lastFeedback.acceptMore && offset < text.length) {
67
+ const delay = lastFeedback.retryAfterMs || 1000;
68
+ await new Promise((resolve) => setTimeout(resolve, delay));
69
+ }
52
70
  }
71
+ return lastFeedback;
53
72
  }
54
73
  async ingestWithRetry(params) {
55
- await withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
74
+ return withRetry(() => this.rpcCall("ingest_markdown_document", params), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `ingest_markdown_document(${params.sourceDoc})`);
56
75
  }
57
76
  async enqueueDelete(sourceDoc) {
58
77
  await withRetry(() => this.rpcCall("delete_authored_document", { sourceDoc }), this.options.maxRetries, this.options.retryBaseDelayMs, this.logger, `delete_authored_document(${sourceDoc})`);
@@ -14,14 +14,22 @@ interface FsDirentLike {
14
14
  interface FsWatcherLike extends Disposable {
15
15
  on(event: "error", handler: (error: Error) => void): void;
16
16
  }
17
+ interface FsReadStream {
18
+ read(buffer: Uint8Array): Promise<{
19
+ bytesRead: number;
20
+ }>;
21
+ close(): Promise<void>;
22
+ }
17
23
  interface FsApi {
18
24
  readdir(dir: string): Promise<FsDirentLike[]>;
19
25
  readFile(file: string): Promise<Uint8Array>;
20
26
  stat(file: string): Promise<{
21
27
  size: number;
22
28
  mtimeMs: number;
29
+ ctimeMs: number;
23
30
  }>;
24
31
  watch(dir: string, onChange: (event: string, filename: string | Buffer | null) => void): FsWatcherLike;
32
+ openReadStream(file: string): Promise<FsReadStream>;
25
33
  }
26
34
  export interface MarkdownSourceAdapter {
27
35
  kind: string;
@@ -2,13 +2,13 @@ import fs from "node:fs";
2
2
  import fsp from "node:fs/promises";
3
3
  import os from "node:os";
4
4
  import path from "node:path";
5
- import { hashBytes } from "./markdown-hash.js";
6
5
  import { formatError } from "./format-error.js";
7
6
  import { IngestQueue } from "./ingest-queue.js";
8
7
  const DEFAULT_DEBOUNCE_MS = 150;
9
8
  const DEFAULT_TOKENIZER_ID = "markdown-ingest:v1";
10
9
  const MARKDOWN_INGEST_VERSION = 3;
11
10
  const HASH_BACKEND = "wasm-fnv1a64";
11
+ const STREAM_CHUNK_BYTES = 64 * 1024;
12
12
  export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsApi = createRealFsApi()) {
13
13
  const adapters = [];
14
14
  const genericRoots = normalizeMarkdownRoots(cfg.markdownIngestionRoots);
@@ -19,6 +19,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
19
19
  exclude: cfg.markdownIngestionExclude,
20
20
  debounceMs: cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
21
21
  snapshotPath: resolveMarkdownSnapshotPath("generic", cfg.markdownIngestionSnapshotPath),
22
+ priorityMode: cfg.markdownIngestionPriorityMode,
23
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
22
24
  }, getRpc, logger, fsApi));
23
25
  }
24
26
  const obsidianRoots = normalizeMarkdownRoots(cfg.markdownIngestionObsidianRoots);
@@ -29,6 +31,8 @@ export function createMarkdownIngestionHandle(cfg, getRpc, logger = console, fsA
29
31
  exclude: cfg.markdownIngestionObsidianExclude,
30
32
  debounceMs: cfg.markdownIngestionObsidianDebounceMs ?? cfg.markdownIngestionDebounceMs ?? DEFAULT_DEBOUNCE_MS,
31
33
  snapshotPath: resolveMarkdownSnapshotPath("obsidian", cfg.markdownIngestionObsidianSnapshotPath),
34
+ priorityMode: cfg.markdownIngestionPriorityMode,
35
+ maxTokensPerFile: cfg.markdownIngestionMaxTokensPerFile,
32
36
  }, getRpc, logger, fsApi));
33
37
  }
34
38
  if (adapters.length === 0) {
@@ -77,6 +81,8 @@ class DirectoryMarkdownSourceAdapter {
77
81
  getRpc;
78
82
  logger;
79
83
  snapshotPath;
84
+ priorityMode;
85
+ maxTokensPerFile;
80
86
  states = new Map();
81
87
  fileStates = new Map();
82
88
  activeScans = new Set();
@@ -85,6 +91,17 @@ class DirectoryMarkdownSourceAdapter {
85
91
  started = false;
86
92
  ingestQueue = null;
87
93
  stopping = false;
94
+ lastAcceptMore = true;
95
+ lastRetryAfterMs = 0;
96
+ lastQueueDepth = 0;
97
+ lastQueueCapacity = 0;
98
+ lastProcessingTimeUs = 0;
99
+ lastNodesAccepted = 0;
100
+ lastNodesRejected = 0;
101
+ lastTokensIngested = 0;
102
+ lastTokenBurstLimit = 512;
103
+ lastWalDepth = 0;
104
+ lastWalCapacity = 0;
88
105
  snapshotLoaded = false;
89
106
  snapshotDirty = false;
90
107
  constructor(kind, config, getRpc, logger, fsApi) {
@@ -97,6 +114,8 @@ class DirectoryMarkdownSourceAdapter {
97
114
  this.getRpc = getRpc;
98
115
  this.logger = logger;
99
116
  this.snapshotPath = config.snapshotPath ?? resolveMarkdownSnapshotPath(kind);
117
+ this.priorityMode = config.priorityMode ?? "mtime";
118
+ this.maxTokensPerFile = Math.max(1, Math.trunc(config.maxTokensPerFile ?? 128_000));
100
119
  this.tokenizerId = DEFAULT_TOKENIZER_ID;
101
120
  this.coreDoc = true;
102
121
  }
@@ -150,6 +169,7 @@ class DirectoryMarkdownSourceAdapter {
150
169
  scanning: false,
151
170
  dirty: false,
152
171
  timer: null,
172
+ resumeFromPath: null,
153
173
  },
154
174
  knownFiles: this.snapshotFilesForRoot(resolved),
155
175
  directoryWatchers: new Map(),
@@ -167,12 +187,16 @@ class DirectoryMarkdownSourceAdapter {
167
187
  return;
168
188
  }
169
189
  rootState.scanState.scanning = true;
190
+ this.lastAcceptMore = true;
191
+ this.lastRetryAfterMs = 0;
170
192
  const scan = (async () => {
171
193
  const stats = createScanStats();
172
194
  const startedAt = Date.now();
173
195
  try {
174
196
  const currentFiles = new Set();
175
- await this.walkDirectory(rootState, rootState.root, currentFiles, stats);
197
+ const candidates = [];
198
+ await this.walkDirectory(rootState, rootState.root, currentFiles, stats, candidates);
199
+ await this.syncCandidates(rootState, candidates, stats);
176
200
  if (!this.stopping) {
177
201
  await this.pruneDeletedFiles(rootState, currentFiles, stats);
178
202
  rootState.knownFiles = currentFiles;
@@ -198,7 +222,7 @@ class DirectoryMarkdownSourceAdapter {
198
222
  this.activeScans.delete(scan);
199
223
  }
200
224
  }
201
- scheduleRootScan(rootState) {
225
+ scheduleRootScan(rootState, delayMs) {
202
226
  if (!this.started || this.stopping) {
203
227
  return;
204
228
  }
@@ -214,9 +238,9 @@ class DirectoryMarkdownSourceAdapter {
214
238
  void this.scanRoot(rootState.root).catch((error) => {
215
239
  this.logger.warn?.(`[markdown-ingest] root scan failed for ${rootState.root}: ${formatError(error)}`);
216
240
  });
217
- }, this.debounceMs);
241
+ }, Math.max(this.debounceMs, delayMs ?? 0));
218
242
  }
219
- async walkDirectory(rootState, dir, currentFiles, stats) {
243
+ async walkDirectory(rootState, dir, currentFiles, stats, candidates) {
220
244
  if (this.shouldPruneDirectory(rootState.root, dir)) {
221
245
  stats.directoriesPruned++;
222
246
  return;
@@ -240,7 +264,7 @@ class DirectoryMarkdownSourceAdapter {
240
264
  }
241
265
  const child = path.join(dir, entry.name);
242
266
  if (entry.isDirectory()) {
243
- await this.walkDirectory(rootState, child, currentFiles, stats);
267
+ await this.walkDirectory(rootState, child, currentFiles, stats, candidates);
244
268
  continue;
245
269
  }
246
270
  if (!entry.isFile() || !isMarkdownFile(entry.name)) {
@@ -253,17 +277,74 @@ class DirectoryMarkdownSourceAdapter {
253
277
  }
254
278
  stats.filesIncluded++;
255
279
  currentFiles.add(child);
280
+ const stat = await this.safeStatWithCtime(child);
281
+ if (!stat) {
282
+ continue;
283
+ }
284
+ candidates.push({ path: child, size: stat.size, mtimeMs: stat.mtimeMs, ctimeMs: stat.ctimeMs, ordinal: candidates.length });
285
+ }
286
+ }
287
+ async syncCandidates(rootState, candidates, stats) {
288
+ const sorted = sortCandidates(candidates, this.priorityMode);
289
+ let skipping = false;
290
+ if (rootState.scanState.resumeFromPath) {
291
+ const targetExists = sorted.some((c) => c.path === rootState.scanState.resumeFromPath);
292
+ if (targetExists) {
293
+ skipping = true;
294
+ this.lastAcceptMore = true;
295
+ this.lastRetryAfterMs = 0;
296
+ }
297
+ else {
298
+ rootState.scanState.resumeFromPath = null;
299
+ }
300
+ }
301
+ for (const candidate of sorted) {
302
+ if (skipping) {
303
+ if (candidate.path === rootState.scanState.resumeFromPath) {
304
+ skipping = false;
305
+ }
306
+ else {
307
+ continue;
308
+ }
309
+ }
310
+ if (this.stopping) {
311
+ return;
312
+ }
313
+ if (!this.lastAcceptMore) {
314
+ if (!this.stopping) {
315
+ rootState.scanState.resumeFromPath = candidate.path;
316
+ this.scheduleRootScan(rootState, this.lastRetryAfterMs);
317
+ }
318
+ return;
319
+ }
320
+ if (this.lastWalCapacity > 0 && this.lastWalDepth > this.lastWalCapacity * 0.8) {
321
+ rootState.scanState.resumeFromPath = candidate.path;
322
+ if (!this.stopping) {
323
+ this.scheduleRootScan(rootState, 2000);
324
+ }
325
+ return;
326
+ }
327
+ const estimatedTokens = estimateTokens(candidate.size);
328
+ if (estimatedTokens > this.maxTokensPerFile) {
329
+ stats.filesDeferred++;
330
+ continue;
331
+ }
256
332
  try {
257
- const result = await this.syncMarkdownFile(rootState, child);
333
+ const result = await this.syncMarkdownFile(rootState, candidate.path, {
334
+ size: candidate.size,
335
+ mtimeMs: candidate.mtimeMs,
336
+ ctimeMs: candidate.ctimeMs,
337
+ });
258
338
  recordSyncResult(stats, result);
259
339
  }
260
340
  catch (error) {
261
341
  stats.syncErrors++;
262
342
  if (!this.stopping) {
263
- this.logger.warn?.(`[markdown-ingest] sync failed for ${child}: ${formatError(error)}`);
343
+ this.logger.warn?.(`[markdown-ingest] sync failed for ${candidate.path}: ${formatError(error)}`);
264
344
  }
265
345
  }
266
346
  }
347
+ rootState.scanState.resumeFromPath = null;
267
348
  }
268
349
  shouldPruneDirectory(root, dir) {
269
350
  const relative = toPosixPath(path.relative(root, dir));
@@ -284,6 +365,11 @@ class DirectoryMarkdownSourceAdapter {
284
365
  try {
285
366
  const watcher = this.fsApi.watch(dir, () => {
286
367
  if (!this.stopping) {
368
+ rootState.scanState.resumeFromPath = null;
369
+ if (rootState.scanState.timer) {
370
+ clearTimeout(rootState.scanState.timer);
371
+ rootState.scanState.timer = null;
372
+ }
287
373
  this.scheduleRootScan(rootState);
288
374
  }
289
375
  });
@@ -335,10 +421,10 @@ class DirectoryMarkdownSourceAdapter {
335
421
  stats.filesDeleted++;
336
422
  }
337
423
  }
338
- async syncMarkdownFile(rootState, filePath) {
424
+ async syncMarkdownFile(rootState, filePath, initialStat) {
339
425
  const sourceDoc = filePath;
340
426
  const relativePath = toPosixPath(path.relative(rootState.root, filePath));
341
- const stat = await this.safeStat(filePath);
427
+ const stat = initialStat ?? (await this.safeStatWithCtime(filePath));
342
428
  if (!stat) {
343
429
  await this.deleteSourceDocument(sourceDoc);
344
430
  this.fileStates.delete(sourceDoc);
@@ -349,14 +435,18 @@ class DirectoryMarkdownSourceAdapter {
349
435
  if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) {
350
436
  return "unchanged";
351
437
  }
352
- const bytes = await this.safeReadFile(filePath);
353
- if (!bytes) {
438
+ const maxBytes = this.maxTokensPerFile * 4 + 3;
439
+ const streamed = await this.safeReadFileStreamed(filePath, maxBytes);
440
+ if (streamed === "too_large") {
441
+ return "skipped";
442
+ }
443
+ if (!streamed) {
354
444
  await this.deleteSourceDocument(sourceDoc);
355
445
  this.fileStates.delete(sourceDoc);
356
446
  this.snapshotDirty = true;
357
447
  return "deleted";
358
448
  }
359
- const fileHash = hashBytes(bytes);
449
+ const { text, fileHash } = streamed;
360
450
  if (cached && cached.fileHash === fileHash) {
361
451
  this.setFileState(sourceDoc, {
362
452
  root: rootState.root,
@@ -368,14 +458,13 @@ class DirectoryMarkdownSourceAdapter {
368
458
  });
369
459
  return "unchanged";
370
460
  }
371
- const text = textDecoder.decode(bytes);
372
461
  if (this.kind === "obsidian" && this.includePatterns.length === 0 && !looksLikeObsidianNote(filePath, text)) {
373
462
  await this.deleteSourceDocument(sourceDoc);
374
463
  this.fileStates.delete(sourceDoc);
375
464
  this.snapshotDirty = true;
376
465
  return "skipped";
377
466
  }
378
- await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs);
467
+ await this.ingestMarkdownDocument(sourceDoc, text, rootState.root, relativePath, fileHash, stat.size, stat.mtimeMs, stat.ctimeMs);
379
468
  this.setFileState(sourceDoc, {
380
469
  root: rootState.root,
381
470
  sourceDoc,
@@ -390,9 +479,9 @@ class DirectoryMarkdownSourceAdapter {
390
479
  this.fileStates.set(sourceDoc, state);
391
480
  this.snapshotDirty = true;
392
481
  }
393
- async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs) {
482
+ async ingestMarkdownDocument(sourceDoc, text, sourceRoot, sourcePath, fileHash, sourceSize, sourceMtimeMs, sourceCtimeMs) {
394
483
  const queue = await this.getIngestQueue();
395
- await queue.enqueueIngest(sourceDoc, text, {
484
+ const feedback = await queue.enqueueIngest(sourceDoc, text, {
396
485
  tokenizerId: this.tokenizerId,
397
486
  coreDoc: this.coreDoc,
398
487
  sourceMeta: {
@@ -402,10 +491,44 @@ class DirectoryMarkdownSourceAdapter {
402
491
  fileHash,
403
492
  sourceSize,
404
493
  sourceMtimeMs: Math.trunc(sourceMtimeMs),
494
+ sourceCtimeMs: Math.trunc(sourceCtimeMs),
405
495
  ingestVersion: MARKDOWN_INGEST_VERSION,
406
496
  hashBackend: HASH_BACKEND,
407
497
  },
408
- });
498
+ }, this.lastTokenBurstLimit);
499
+ this.applyIngestFeedback(feedback);
500
+ }
501
+ applyIngestFeedback(feedback) {
502
+ if (feedback && typeof feedback.acceptMore === "boolean") {
503
+ this.lastAcceptMore = feedback.acceptMore;
504
+ this.lastQueueDepth = feedback.queueDepth ?? 0;
505
+ this.lastQueueCapacity = feedback.queueCapacity ?? 0;
506
+ this.lastProcessingTimeUs = feedback.processingTimeUs ?? 0;
507
+ this.lastNodesAccepted = feedback.nodesAccepted ?? 0;
508
+ this.lastNodesRejected = feedback.nodesRejected ?? 0;
509
+ this.lastTokensIngested = feedback.tokensIngested ?? 0;
510
+ if (feedback.tokenBurstLimit && feedback.tokenBurstLimit > 0) {
511
+ this.lastTokenBurstLimit = feedback.tokenBurstLimit;
512
+ }
513
+ this.lastWalDepth = feedback.walDepth ?? 0;
514
+ this.lastWalCapacity = feedback.walCapacity ?? 0;
515
+ if (feedback.acceptMore) {
516
+ this.lastRetryAfterMs = 0;
517
+ }
518
+ else {
519
+ this.lastRetryAfterMs = feedback.retryAfterMs || 1000;
520
+ }
521
+ }
522
+ else {
523
+ this.lastAcceptMore = true;
524
+ this.lastRetryAfterMs = 0;
525
+ this.lastQueueDepth = 0;
526
+ this.lastQueueCapacity = 0;
527
+ this.lastProcessingTimeUs = 0;
528
+ this.lastNodesAccepted = 0;
529
+ this.lastNodesRejected = 0;
530
+ this.lastTokensIngested = 0;
531
+ }
409
532
  }
410
533
  async deleteSourceDocument(sourceDoc) {
411
534
  const queue = await this.getIngestQueue();
@@ -414,7 +537,9 @@ class DirectoryMarkdownSourceAdapter {
414
537
  async getIngestQueue() {
415
538
  if (!this.ingestQueue) {
416
539
  const rpc = await this.getRpc();
417
- this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger);
540
+ this.ingestQueue = new IngestQueue(rpc.call.bind(rpc), this.logger, {
541
+ onChunkFeedback: (feedback) => this.applyIngestFeedback(feedback),
542
+ });
418
543
  }
419
544
  return this.ingestQueue;
420
545
  }
@@ -426,14 +551,51 @@ class DirectoryMarkdownSourceAdapter {
426
551
  return null;
427
552
  }
428
553
  }
429
- async safeReadFile(filePath) {
554
+ async safeStatWithCtime(filePath) {
430
555
  try {
431
- return await this.fsApi.readFile(filePath);
556
+ return await this.fsApi.stat(filePath);
432
557
  }
433
558
  catch {
434
559
  return null;
435
560
  }
436
561
  }
562
+ async safeReadFileStreamed(filePath, maxBytes) {
563
+ let stream = null;
564
+ try {
565
+ stream = await this.fsApi.openReadStream(filePath);
566
+ const decoder = new TextDecoder();
567
+ const chunks = [];
568
+ let hash = 0xcbf29ce484222325n;
569
+ let total = 0;
570
+ const buffer = Buffer.allocUnsafe(STREAM_CHUNK_BYTES);
571
+ while (true) {
572
+ const { bytesRead } = await stream.read(buffer);
573
+ if (bytesRead === 0) {
574
+ break;
575
+ }
576
+ total += bytesRead;
577
+ if (total > maxBytes) {
578
+ return "too_large";
579
+ }
580
+ const chunk = buffer.subarray(0, bytesRead);
581
+ hash = updateFnv1a64(hash, chunk);
582
+ chunks.push(decoder.decode(chunk, { stream: true }));
583
+ }
584
+ chunks.push(decoder.decode());
585
+ return {
586
+ text: chunks.join(""),
587
+ fileHash: hash.toString(16).padStart(16, "0"),
588
+ };
589
+ }
590
+ catch {
591
+ return null;
592
+ }
593
+ finally {
594
+ if (stream) {
595
+ await stream.close().catch(() => { });
596
+ }
597
+ }
598
+ }
437
599
  snapshotFilesForRoot(root) {
438
600
  const files = new Set();
439
601
  for (const state of this.fileStates.values()) {
@@ -497,7 +659,7 @@ class DirectoryMarkdownSourceAdapter {
497
659
  }
498
660
  }
499
661
  logScanStats(root, stats, durationMs) {
500
- this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} errors=${stats.syncErrors} durationMs=${durationMs}`);
662
+ this.logger.info?.(`[markdown-ingest] ${this.kind} scan complete root=${root} dirs=${stats.directoriesScanned} prunedDirs=${stats.directoriesPruned} markdown=${stats.markdownFilesSeen} included=${stats.filesIncluded} skipped=${stats.filesSkipped} unchanged=${stats.filesUnchanged} ingested=${stats.filesIngested} deleted=${stats.filesDeleted} deferred=${stats.filesDeferred} errors=${stats.syncErrors} durationMs=${durationMs}`);
501
663
  }
502
664
  }
503
665
  function createScanStats() {
@@ -511,8 +673,26 @@ function createScanStats() {
511
673
  filesIngested: 0,
512
674
  filesDeleted: 0,
513
675
  syncErrors: 0,
676
+ filesDeferred: 0,
514
677
  };
515
678
  }
679
+ function estimateTokens(size) {
680
+ return Math.max(1, Math.floor(size / 4));
681
+ }
682
+ function sortCandidates(candidates, mode) {
683
+ return [...candidates].sort((left, right) => {
684
+ if (mode === "size") {
685
+ return right.size - left.size || left.ordinal - right.ordinal;
686
+ }
687
+ if (mode === "ctime") {
688
+ return right.ctimeMs - left.ctimeMs || left.ordinal - right.ordinal;
689
+ }
690
+ if (mode === "fifo") {
691
+ return left.ordinal - right.ordinal;
692
+ }
693
+ return right.mtimeMs - left.mtimeMs || left.ordinal - right.ordinal;
694
+ });
695
+ }
516
696
  function recordSyncResult(stats, result) {
517
697
  if (result === "ingested") {
518
698
  stats.filesIngested++;
@@ -530,7 +710,6 @@ function recordSyncResult(stats, result) {
530
710
  function toPosixPath(value) {
531
711
  return value.split(path.sep).join("/");
532
712
  }
533
- const textDecoder = new TextDecoder();
534
713
  function normalizeMarkdownRoots(roots) {
535
714
  if (!roots?.length) {
536
715
  return [];
@@ -545,6 +724,15 @@ function normalizeMarkdownRoots(roots) {
545
724
  }
546
725
  return [...resolved];
547
726
  }
727
+ function updateFnv1a64(seed, bytes) {
728
+ let hash = seed;
729
+ const prime = 0x100000001b3n;
730
+ for (let i = 0; i < bytes.length; i++) {
731
+ hash ^= BigInt(bytes[i] ?? 0);
732
+ hash = BigInt.asUintN(64, hash * prime);
733
+ }
734
+ return hash;
735
+ }
548
736
  function resolveMarkdownSnapshotPath(kind, configuredPath) {
549
737
  const trimmed = configuredPath?.trim();
550
738
  if (trimmed) {
@@ -561,10 +749,22 @@ function createRealFsApi() {
561
749
  readdir: async (dir) => fsp.readdir(dir, { withFileTypes: true }),
562
750
  readFile: async (file) => fsp.readFile(file),
563
751
  stat: async (file) => {
564
- const stat = await fsp.stat(file);
565
- return { size: stat.size, mtimeMs: stat.mtimeMs };
752
+ const s = await fsp.stat(file);
753
+ return { size: s.size, mtimeMs: s.mtimeMs, ctimeMs: s.ctimeMs };
566
754
  },
567
755
  watch: (dir, onChange) => fs.watch(dir, onChange),
756
+ openReadStream: async (file) => {
757
+ const handle = await fsp.open(file, "r");
758
+ return {
759
+ read: async (buffer) => {
760
+ const { bytesRead } = await handle.read(buffer, 0, buffer.length, null);
761
+ return { bytesRead };
762
+ },
763
+ close: async () => {
764
+ await handle.close();
765
+ },
766
+ };
767
+ },
568
768
  };
569
769
  }
570
770
  function isMarkdownFile(fileName) {
package/dist/types.d.ts CHANGED
@@ -48,6 +48,8 @@ export interface PluginConfig {
48
48
  markdownIngestionInclude?: string[];
49
49
  markdownIngestionExclude?: string[];
50
50
  markdownIngestionDebounceMs?: number;
51
+ markdownIngestionPriorityMode?: "mtime" | "ctime" | "size" | "fifo";
52
+ markdownIngestionMaxTokensPerFile?: number;
51
53
  markdownIngestionSnapshotPath?: string;
52
54
  markdownIngestionObsidianSnapshotPath?: string;
53
55
  dreamPromotionEnabled?: boolean;
@@ -2,7 +2,7 @@
2
2
  "id": "libravdb-memory",
3
3
  "name": "LibraVDB Memory",
4
4
  "description": "Persistent vector memory with three-tier hybrid scoring",
5
- "version": "1.5.3",
5
+ "version": "1.5.5",
6
6
  "kind": [
7
7
  "memory",
8
8
  "context-engine"
@@ -263,6 +263,20 @@
263
263
  "type": "number",
264
264
  "default": 150
265
265
  },
266
+ "markdownIngestionPriorityMode": {
267
+ "type": "string",
268
+ "enum": [
269
+ "mtime",
270
+ "ctime",
271
+ "size",
272
+ "fifo"
273
+ ],
274
+ "default": "mtime"
275
+ },
276
+ "markdownIngestionMaxTokensPerFile": {
277
+ "type": "number",
278
+ "default": 128000
279
+ },
266
280
  "markdownIngestionSnapshotPath": {
267
281
  "type": "string"
268
282
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@xdarkicex/openclaw-memory-libravdb",
3
- "version": "1.5.3",
3
+ "version": "1.5.5",
4
4
  "type": "module",
5
5
  "main": "./dist/index.js",
6
6
  "types": "./dist/index.d.ts",