agenr 0.7.16 → 0.7.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,20 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.18] - 2026-02-21
4
+
5
+ ### Fixed
6
+ - fix(lockfile): suppress false-positive "Another agenr process is writing" warning during multi-worker ingest; `isDbLocked` now returns false when the lock is held by the current process (#121)
7
+
8
+ ## [0.7.17] - 2026-02-21
9
+
10
+ ### Performance
11
+ - perf(ingest): two-phase extract+write pipeline eliminates SQLite write-lock contention; extraction workers run in parallel while a single background writer drains entries in batched transactions (#107)
12
+ - feat(ingest): add `--workers` flag (default 10) for file-level parallelism; previously hardcoded to 1
13
+ - The write queue retries each write sub-batch once on transient failure (2s delay) before surfacing the error to the outer file-level retry loop. Use `--no-retry` to disable all retries including the inner write retry.
14
+
15
+ ### Changed
16
+ - ingest: `entriesStored` now counts `added + superseded` (previously only `added`); superseded entries are written before the previous entry is marked superseded
17
+
3
18
  ## [0.7.16] - 2026-02-21
4
19
 
5
20
  ### Fixed
package/dist/cli-main.js CHANGED
@@ -4272,6 +4272,9 @@ function isDbLocked(lockDir) {
4272
4272
  if (!pid) {
4273
4273
  return false;
4274
4274
  }
4275
+ if (pid === process.pid) {
4276
+ return false;
4277
+ }
4275
4278
  return isPidAlive(pid);
4276
4279
  }
4277
4280
  function warnIfLocked() {
@@ -10202,7 +10205,7 @@ async function extractKnowledgeFromChunks(params) {
10202
10205
  );
10203
10206
  let dynamicDelay = baseDelay;
10204
10207
  let lastThrottleNoticeDelayMs = null;
10205
- const sleep3 = params.sleepImpl ?? sleepMs2;
10208
+ const sleep4 = params.sleepImpl ?? sleepMs2;
10206
10209
  const llmConcurrency = Math.max(1, Math.trunc(params.llmConcurrency ?? 1));
10207
10210
  const bufferStreamDeltas = llmConcurrency > 1 && Boolean(params.onStreamDelta);
10208
10211
  let cursor = 0;
@@ -10276,7 +10279,7 @@ async function extractKnowledgeFromChunks(params) {
10276
10279
  warnings.push(
10277
10280
  `Chunk ${chunk.chunk_index + 1}: attempt ${attempt} failed (${error instanceof Error ? error.message : String(error)}), retrying in ${backoffMs}ms.`
10278
10281
  );
10279
- await sleep3(backoffMs);
10282
+ await sleep4(backoffMs);
10280
10283
  continue;
10281
10284
  }
10282
10285
  break;
@@ -10328,9 +10331,9 @@ async function extractKnowledgeFromChunks(params) {
10328
10331
  if (dynamicDelay > 0 && cursor < params.chunks.length && !isShutdownRequested()) {
10329
10332
  if (llmConcurrency > 1) {
10330
10333
  const jitterMs = Math.max(0, Math.trunc(dynamicDelay * (0.5 + Math.random())));
10331
- await sleep3(jitterMs);
10334
+ await sleep4(jitterMs);
10332
10335
  } else if (currentIndex < params.chunks.length - 1) {
10333
- await sleep3(dynamicDelay);
10336
+ await sleep4(dynamicDelay);
10334
10337
  }
10335
10338
  }
10336
10339
  }
@@ -12389,6 +12392,375 @@ async function parseTranscriptFile(filePath, options) {
12389
12392
  };
12390
12393
  }
12391
12394
 
12395
+ // src/ingest/write-queue.ts
12396
+ var CancelledError = class extends Error {
12397
+ constructor(message = "Write queue item was cancelled.") {
12398
+ super(message);
12399
+ this.name = "CancelledError";
12400
+ }
12401
+ };
12402
+ var ShutdownError = class extends Error {
12403
+ constructor(message = "Write queue is shutting down.") {
12404
+ super(message);
12405
+ this.name = "ShutdownError";
12406
+ }
12407
+ };
12408
+ var ZERO_RESULT = {
12409
+ added: 0,
12410
+ updated: 0,
12411
+ skipped: 0,
12412
+ superseded: 0,
12413
+ llm_dedup_calls: 0
12414
+ };
12415
+ function sleep(ms) {
12416
+ return new Promise((resolve) => setTimeout(resolve, ms));
12417
+ }
12418
+ function mergeBatchResult(a, b) {
12419
+ return {
12420
+ added: a.added + b.added,
12421
+ updated: a.updated + b.updated,
12422
+ skipped: a.skipped + b.skipped,
12423
+ superseded: a.superseded + b.superseded,
12424
+ llm_dedup_calls: a.llm_dedup_calls + b.llm_dedup_calls
12425
+ };
12426
+ }
12427
+ function chunkEntries2(entries, size) {
12428
+ if (entries.length <= size) {
12429
+ return [entries];
12430
+ }
12431
+ const chunks = [];
12432
+ for (let i = 0; i < entries.length; i += size) {
12433
+ chunks.push(entries.slice(i, i + size));
12434
+ }
12435
+ return chunks;
12436
+ }
12437
+ function toError(error) {
12438
+ if (error instanceof Error) {
12439
+ return error;
12440
+ }
12441
+ return new Error(String(error));
12442
+ }
12443
+ var WriteQueue = class {
12444
+ db;
12445
+ storeEntriesFn;
12446
+ apiKey;
12447
+ llmClient;
12448
+ dbPath;
12449
+ batchSize;
12450
+ highWatermark;
12451
+ retryOnFailure;
12452
+ isShutdownRequested;
12453
+ queue = [];
12454
+ pendingEntries = 0;
12455
+ destroyed = false;
12456
+ writerStopping = false;
12457
+ inflightWrites = 0;
12458
+ activeWorkItems = 0;
12459
+ activeByFileKey = /* @__PURE__ */ new Map();
12460
+ fileWaiters = /* @__PURE__ */ new Map();
12461
+ drainWaiters = [];
12462
+ writerWaiters = [];
12463
+ constructor(options) {
12464
+ this.db = options.db;
12465
+ this.storeEntriesFn = options.storeEntriesFn;
12466
+ this.apiKey = options.apiKey;
12467
+ this.llmClient = options.llmClient;
12468
+ this.dbPath = options.dbPath;
12469
+ this.batchSize = Math.max(1, Math.floor(options.batchSize ?? 40));
12470
+ this.highWatermark = Math.max(1, Math.floor(options.highWatermark ?? 500));
12471
+ this.retryOnFailure = options.retryOnFailure !== false;
12472
+ this.isShutdownRequested = options.isShutdownRequested;
12473
+ void this.runWriterLoop();
12474
+ }
12475
+ get pendingCount() {
12476
+ return this.pendingEntries + this.activeWorkItems;
12477
+ }
12478
+ async push(entries, fileKey, fileHash) {
12479
+ if (this.destroyed) {
12480
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12481
+ }
12482
+ while (this.pendingEntries > 0 && this.pendingEntries + entries.length > this.highWatermark) {
12483
+ if (this.destroyed) {
12484
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12485
+ }
12486
+ await sleep(50);
12487
+ }
12488
+ if (entries.length === 0) {
12489
+ return { ...ZERO_RESULT };
12490
+ }
12491
+ return await new Promise((resolve, reject) => {
12492
+ if (this.destroyed) {
12493
+ reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
12494
+ return;
12495
+ }
12496
+ this.pendingEntries += entries.length;
12497
+ this.queue.push({
12498
+ kind: "write",
12499
+ entries,
12500
+ fileKey,
12501
+ fileHash,
12502
+ resolve,
12503
+ reject
12504
+ });
12505
+ this.wakeWriter();
12506
+ });
12507
+ }
12508
+ async cancel(fileKey) {
12509
+ const kept = [];
12510
+ const cancelled = [];
12511
+ for (const item of this.queue) {
12512
+ if (item.kind === "write" && item.fileKey === fileKey) {
12513
+ cancelled.push(item);
12514
+ this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
12515
+ } else {
12516
+ kept.push(item);
12517
+ }
12518
+ }
12519
+ if (cancelled.length > 0) {
12520
+ this.queue.splice(0, this.queue.length, ...kept);
12521
+ for (const item of cancelled) {
12522
+ item.reject(new CancelledError(`Cancelled queued write for ${fileKey}.`));
12523
+ }
12524
+ this.resolveDrainIfIdle();
12525
+ }
12526
+ await this.waitForFileIdle(fileKey);
12527
+ }
12528
+ async runExclusive(fn) {
12529
+ if (this.destroyed) {
12530
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12531
+ }
12532
+ return await new Promise((resolve, reject) => {
12533
+ if (this.destroyed) {
12534
+ reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
12535
+ return;
12536
+ }
12537
+ this.queue.push({
12538
+ kind: "exclusive",
12539
+ fn,
12540
+ resolve,
12541
+ reject
12542
+ });
12543
+ this.wakeWriter();
12544
+ });
12545
+ }
12546
+ async drain() {
12547
+ if (this.isIdle()) {
12548
+ return;
12549
+ }
12550
+ await new Promise((resolve) => {
12551
+ this.drainWaiters.push(resolve);
12552
+ });
12553
+ }
12554
+ destroy() {
12555
+ if (this.destroyed) {
12556
+ return;
12557
+ }
12558
+ this.destroyed = true;
12559
+ this.writerStopping = true;
12560
+ const pending = this.queue.splice(0, this.queue.length);
12561
+ this.pendingEntries = 0;
12562
+ for (const item of pending) {
12563
+ item.reject(new ShutdownError("WriteQueue destroyed before item was processed."));
12564
+ }
12565
+ this.wakeWriter();
12566
+ this.resolveDrainIfIdle();
12567
+ }
12568
+ async runWriterLoop() {
12569
+ while (true) {
12570
+ if (this.isShutdownRequested?.() && this.hasPendingWriteItems()) {
12571
+ this.shutdownFromSignal();
12572
+ }
12573
+ if ((this.writerStopping || this.destroyed) && this.queue.length === 0) {
12574
+ break;
12575
+ }
12576
+ if (this.queue.length === 0) {
12577
+ await this.waitForWork();
12578
+ continue;
12579
+ }
12580
+ const batch = this.queue.splice(0, this.queue.length);
12581
+ this.markBatchAsDispatched(batch);
12582
+ await this.processBatch(batch);
12583
+ }
12584
+ this.resolveDrainIfIdle();
12585
+ }
12586
+ hasPendingWriteItems() {
12587
+ return this.queue.some((item) => item.kind === "write");
12588
+ }
12589
+ async processBatch(batch) {
12590
+ let index = 0;
12591
+ while (index < batch.length) {
12592
+ const item = batch[index];
12593
+ if (!item) {
12594
+ index += 1;
12595
+ continue;
12596
+ }
12597
+ if (item.kind === "exclusive") {
12598
+ await this.processExclusive(item);
12599
+ index += 1;
12600
+ continue;
12601
+ }
12602
+ const segment = [];
12603
+ while (index < batch.length) {
12604
+ const segmentItem = batch[index];
12605
+ if (!segmentItem || segmentItem.kind !== "write") {
12606
+ break;
12607
+ }
12608
+ segment.push(segmentItem);
12609
+ index += 1;
12610
+ }
12611
+ await this.processWriteSegment(segment);
12612
+ }
12613
+ }
12614
+ async processExclusive(item) {
12615
+ try {
12616
+ const result = await item.fn();
12617
+ item.resolve(result);
12618
+ } catch (error) {
12619
+ item.reject(toError(error));
12620
+ } finally {
12621
+ this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
12622
+ this.resolveDrainIfIdle();
12623
+ }
12624
+ }
12625
+ async processWriteSegment(segment) {
12626
+ const grouped = /* @__PURE__ */ new Map();
12627
+ for (const item of segment) {
12628
+ const group = grouped.get(item.fileKey);
12629
+ if (group) {
12630
+ group.push(item);
12631
+ } else {
12632
+ grouped.set(item.fileKey, [item]);
12633
+ }
12634
+ }
12635
+ for (const items of grouped.values()) {
12636
+ for (const item of items) {
12637
+ await this.processWriteItem(item);
12638
+ }
12639
+ }
12640
+ }
12641
+ async processWriteItem(item) {
12642
+ let result = { ...ZERO_RESULT };
12643
+ const subBatches = chunkEntries2(item.entries, this.batchSize);
12644
+ try {
12645
+ for (const subBatch of subBatches) {
12646
+ const writeResult = await this.writeSubBatchWithRetry(subBatch, item.fileKey, item.fileHash);
12647
+ result = mergeBatchResult(result, writeResult);
12648
+ }
12649
+ item.resolve(result);
12650
+ } catch (error) {
12651
+ item.reject(toError(error));
12652
+ } finally {
12653
+ this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
12654
+ this.decrementActiveFile(item.fileKey);
12655
+ this.resolveDrainIfIdle();
12656
+ }
12657
+ }
12658
+ async writeSubBatchWithRetry(entries, fileKey, fileHash) {
12659
+ let lastError = null;
12660
+ const maxAttempts = this.retryOnFailure ? 2 : 1;
12661
+ for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
12662
+ this.inflightWrites += 1;
12663
+ try {
12664
+ const result = await this.storeEntriesFn(this.db, entries, this.apiKey, {
12665
+ sourceFile: fileKey,
12666
+ ingestContentHash: fileHash,
12667
+ skipIngestLog: true,
12668
+ onlineDedup: true,
12669
+ skipLlmDedup: false,
12670
+ llmClient: this.llmClient,
12671
+ dbPath: this.dbPath
12672
+ });
12673
+ return {
12674
+ added: result.added,
12675
+ updated: result.updated,
12676
+ skipped: result.skipped,
12677
+ superseded: result.superseded,
12678
+ llm_dedup_calls: result.llm_dedup_calls
12679
+ };
12680
+ } catch (error) {
12681
+ lastError = toError(error);
12682
+ if (attempt < maxAttempts) {
12683
+ await sleep(2e3);
12684
+ }
12685
+ } finally {
12686
+ this.inflightWrites = Math.max(0, this.inflightWrites - 1);
12687
+ this.resolveDrainIfIdle();
12688
+ }
12689
+ }
12690
+ throw lastError ?? new Error("Write queue failed to store sub-batch.");
12691
+ }
12692
+ markBatchAsDispatched(batch) {
12693
+ for (const item of batch) {
12694
+ this.activeWorkItems += 1;
12695
+ if (item.kind === "write") {
12696
+ this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
12697
+ this.activeByFileKey.set(item.fileKey, (this.activeByFileKey.get(item.fileKey) ?? 0) + 1);
12698
+ }
12699
+ }
12700
+ this.resolveDrainIfIdle();
12701
+ }
12702
+ decrementActiveFile(fileKey) {
12703
+ const current = this.activeByFileKey.get(fileKey) ?? 0;
12704
+ if (current <= 1) {
12705
+ this.activeByFileKey.delete(fileKey);
12706
+ const waiters = this.fileWaiters.get(fileKey) ?? [];
12707
+ this.fileWaiters.delete(fileKey);
12708
+ for (const waiter of waiters) {
12709
+ waiter();
12710
+ }
12711
+ return;
12712
+ }
12713
+ this.activeByFileKey.set(fileKey, current - 1);
12714
+ }
12715
+ async waitForFileIdle(fileKey) {
12716
+ if ((this.activeByFileKey.get(fileKey) ?? 0) === 0) {
12717
+ return;
12718
+ }
12719
+ await new Promise((resolve) => {
12720
+ const existing = this.fileWaiters.get(fileKey);
12721
+ if (existing) {
12722
+ existing.push(resolve);
12723
+ } else {
12724
+ this.fileWaiters.set(fileKey, [resolve]);
12725
+ }
12726
+ });
12727
+ }
12728
+ waitForWork() {
12729
+ return new Promise((resolve) => {
12730
+ this.writerWaiters.push(resolve);
12731
+ });
12732
+ }
12733
+ wakeWriter() {
12734
+ while (this.writerWaiters.length > 0) {
12735
+ const waiter = this.writerWaiters.shift();
12736
+ waiter?.();
12737
+ }
12738
+ }
12739
+ shutdownFromSignal() {
12740
+ this.destroyed = true;
12741
+ this.writerStopping = true;
12742
+ const pending = this.queue.splice(0, this.queue.length);
12743
+ this.pendingEntries = 0;
12744
+ for (const item of pending) {
12745
+ item.reject(new ShutdownError("Shutdown requested. Dropping queued writes."));
12746
+ }
12747
+ this.wakeWriter();
12748
+ this.resolveDrainIfIdle();
12749
+ }
12750
+ isIdle() {
12751
+ return this.pendingEntries === 0 && this.activeWorkItems === 0 && this.inflightWrites === 0;
12752
+ }
12753
+ resolveDrainIfIdle() {
12754
+ if (!this.isIdle()) {
12755
+ return;
12756
+ }
12757
+ while (this.drainWaiters.length > 0) {
12758
+ const waiter = this.drainWaiters.shift();
12759
+ waiter?.();
12760
+ }
12761
+ }
12762
+ };
12763
+
12392
12764
  // src/watch/pid.ts
12393
12765
  import fs26 from "fs/promises";
12394
12766
  import path25 from "path";
@@ -12565,7 +12937,7 @@ function parsePositiveInt3(value, fallback, label) {
12565
12937
  }
12566
12938
  return Math.floor(parsed);
12567
12939
  }
12568
- async function sleep(ms) {
12940
+ async function sleep2(ms) {
12569
12941
  await new Promise((resolve) => setTimeout(resolve, ms));
12570
12942
  }
12571
12943
  function retryBackoffMs(attempt) {
@@ -12744,8 +13116,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12744
13116
  readWatcherPidFn: deps?.readWatcherPidFn ?? readWatcherPid,
12745
13117
  resolveWatcherPidPathFn: deps?.resolveWatcherPidPathFn ?? resolveWatcherPidPath,
12746
13118
  nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
12747
- sleepFn: deps?.sleepFn ?? sleep,
12748
- shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
13119
+ sleepFn: deps?.sleepFn ?? sleep2,
13120
+ shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested,
13121
+ createWriteQueueFn: deps?.createWriteQueueFn ?? ((opts) => new WriteQueue(opts))
12749
13122
  };
12750
13123
  const clackOutput = { output: process.stderr };
12751
13124
  clack4.intro(banner(), clackOutput);
@@ -12788,6 +13161,7 @@ async function runIngestCommand(inputPaths, options, deps) {
12788
13161
  const skipIngested = force ? false : options.skipIngested !== false;
12789
13162
  const globPattern = options.glob?.trim() || DEFAULT_GLOB;
12790
13163
  const llmConcurrency = parsePositiveInt3(options.concurrency, 5, "--concurrency");
13164
+ const requestedFileWorkers = parsePositiveInt3(options.workers, 10, "--workers");
12791
13165
  const retryEnabled = options.retry !== false;
12792
13166
  const maxRetries = retryEnabled ? parsePositiveInt3(options.maxRetries, 3, "--max-retries") : 0;
12793
13167
  const platformRaw = options.platform?.trim();
@@ -12818,8 +13192,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12818
13192
  })
12819
13193
  );
12820
13194
  const sortedTargets = targetsWithSizes.sort((a, b) => a.size - b.size || a.file.localeCompare(b.file)).map((item, index) => ({ ...item, index }));
13195
+ const fileWorkerCount = Math.min(requestedFileWorkers, Math.max(1, sortedTargets.length));
12821
13196
  clack4.log.info(
12822
- `Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
13197
+ `Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | File workers: ${ui.bold(String(fileWorkerCount))} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
12823
13198
  clackOutput
12824
13199
  );
12825
13200
  if (sortedTargets.length === 0) {
@@ -12865,6 +13240,42 @@ async function runIngestCommand(inputPaths, options, deps) {
12865
13240
  }
12866
13241
  const db = resolvedDeps.getDbFn(dbPath);
12867
13242
  await resolvedDeps.initDbFn(db);
13243
+ const cleanupDbResources = () => {
13244
+ if (shouldLockDb) {
13245
+ releaseDbLock();
13246
+ }
13247
+ resolvedDeps.closeDbFn(db);
13248
+ };
13249
+ let embeddingApiKey = null;
13250
+ try {
13251
+ if (!embeddingApiKey) {
13252
+ embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
13253
+ }
13254
+ } catch (error) {
13255
+ cleanupDbResources();
13256
+ throw error;
13257
+ }
13258
+ if ((!embeddingApiKey || embeddingApiKey.trim().length === 0) && !dryRun) {
13259
+ cleanupDbResources();
13260
+ throw new Error("Embedding API key is required for ingest. Run 'agenr setup' to configure.");
13261
+ }
13262
+ let queue;
13263
+ try {
13264
+ queue = resolvedDeps.createWriteQueueFn({
13265
+ db,
13266
+ storeEntriesFn: resolvedDeps.storeEntriesFn,
13267
+ apiKey: embeddingApiKey ?? "",
13268
+ llmClient: client,
13269
+ dbPath,
13270
+ batchSize: 40,
13271
+ highWatermark: 500,
13272
+ retryOnFailure: retryEnabled,
13273
+ isShutdownRequested: resolvedDeps.shouldShutdownFn
13274
+ });
13275
+ } catch (error) {
13276
+ cleanupDbResources();
13277
+ throw error;
13278
+ }
12868
13279
  const results = new Array(sortedTargets.length);
12869
13280
  let totalEntriesExtracted = 0;
12870
13281
  let totalEntriesStored = 0;
@@ -12878,22 +13289,6 @@ async function runIngestCommand(inputPaths, options, deps) {
12878
13289
  let forceDeletedEntryRows = 0;
12879
13290
  let forceDeletedEntrySourceRows = 0;
12880
13291
  let completed = 0;
12881
- let embeddingApiKey = null;
12882
- if (!options.noPreFetch) {
12883
- try {
12884
- embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
12885
- } catch (error) {
12886
- embeddingApiKey = null;
12887
- if (verbose) {
12888
- clack4.log.warn(
12889
- formatWarn(
12890
- `Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
12891
- ),
12892
- clackOutput
12893
- );
12894
- }
12895
- }
12896
- }
12897
13292
  let watchStateLoaded = false;
12898
13293
  let watchState = createEmptyWatchState();
12899
13294
  let cursor = 0;
@@ -12901,26 +13296,11 @@ async function runIngestCommand(inputPaths, options, deps) {
12901
13296
  let filesWithChunkFailures = 0;
12902
13297
  const chunkStatsByFile = /* @__PURE__ */ new Map();
12903
13298
  let firstPassFailedIndexSet = /* @__PURE__ */ new Set();
12904
- let dbChain = Promise.resolve();
12905
- const withDbLock = async (fn) => {
12906
- const previous = dbChain;
12907
- let release;
12908
- dbChain = new Promise((resolve) => {
12909
- release = resolve;
12910
- });
12911
- await previous;
12912
- try {
12913
- return await fn();
12914
- } finally {
12915
- release();
12916
- }
12917
- };
12918
13299
  const updateProgress = (completedCount, totalCount, verb) => {
12919
13300
  if (verbose) {
12920
13301
  return;
12921
13302
  }
12922
- const suffix = llmConcurrency > 1 ? ` (${llmConcurrency} chunks active)...` : "...";
12923
- process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount}${suffix}`)}`);
13303
+ process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount} (queue: ${queue.pendingCount})...`)}`);
12924
13304
  };
12925
13305
  const clearProgressLine = () => {
12926
13306
  if (verbose) {
@@ -12988,8 +13368,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12988
13368
  const rawContent = await fs27.readFile(target.file, "utf8");
12989
13369
  const ingestByteOffset = Buffer.byteLength(rawContent, "utf8");
12990
13370
  fileHash = resolvedDeps.hashTextFn(rawContent);
13371
+ const chunkTickets = [];
12991
13372
  if (skipIngested && !force) {
12992
- const alreadyIngested = await withDbLock(() => isAlreadyIngested(db, target.file, fileHash));
13373
+ const alreadyIngested = await isAlreadyIngested(db, target.file, fileHash);
12993
13374
  if (alreadyIngested) {
12994
13375
  fileResult.skipped = true;
12995
13376
  fileResult.skipReason = "already ingested";
@@ -12997,12 +13378,12 @@ async function runIngestCommand(inputPaths, options, deps) {
12997
13378
  }
12998
13379
  }
12999
13380
  if (force) {
13000
- const cleanupStats = await withDbLock(() => cleanupForForceReingest(db, target.file, dryRun));
13381
+ const cleanupStats = await queue.runExclusive(() => cleanupForForceReingest(db, target.file, dryRun));
13001
13382
  forceDeletedIngestLogRows += cleanupStats.ingestLogRows;
13002
13383
  forceDeletedEntryRows += cleanupStats.entryRows;
13003
13384
  forceDeletedEntrySourceRows += cleanupStats.entrySourceRows;
13004
13385
  } else {
13005
- baselineEntryIds = await withDbLock(() => getSourceEntryIds(db, target.file));
13386
+ baselineEntryIds = await getSourceEntryIds(db, target.file);
13006
13387
  }
13007
13388
  const parsed = await resolvedDeps.parseTranscriptFileFn(target.file, { raw: options.raw === true, verbose });
13008
13389
  if (verbose && parsed.warnings.length > 0) {
@@ -13014,8 +13395,8 @@ async function runIngestCommand(inputPaths, options, deps) {
13014
13395
  }
13015
13396
  }
13016
13397
  }
13017
- const processChunkEntries = async (chunkEntries2) => {
13018
- const normalizedEntries = chunkEntries2.map((entry) => ({
13398
+ const processChunkEntries = async (chunkEntries3) => {
13399
+ const normalizedEntries = chunkEntries3.map((entry) => ({
13019
13400
  ...entry,
13020
13401
  ...platform ? { platform } : {},
13021
13402
  ...project ? { project } : {},
@@ -13026,41 +13407,32 @@ async function runIngestCommand(inputPaths, options, deps) {
13026
13407
  }));
13027
13408
  fileResult.entriesExtracted += normalizedEntries.length;
13028
13409
  totalEntriesExtracted += normalizedEntries.length;
13029
- const deduped = resolvedDeps.deduplicateEntriesFn(normalizedEntries);
13030
- if (dryRun || deduped.length === 0) {
13410
+ if (dryRun || normalizedEntries.length === 0) {
13411
+ chunkTickets.push(
13412
+ Promise.resolve({
13413
+ added: 0,
13414
+ updated: 0,
13415
+ skipped: 0,
13416
+ superseded: 0,
13417
+ llm_dedup_calls: 0
13418
+ })
13419
+ );
13031
13420
  return;
13032
13421
  }
13033
- if (!embeddingApiKey) {
13034
- embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
13422
+ const deduped = resolvedDeps.deduplicateEntriesFn(normalizedEntries);
13423
+ if (deduped.length === 0) {
13424
+ chunkTickets.push(
13425
+ Promise.resolve({
13426
+ added: 0,
13427
+ updated: 0,
13428
+ skipped: 0,
13429
+ superseded: 0,
13430
+ llm_dedup_calls: 0
13431
+ })
13432
+ );
13433
+ return;
13035
13434
  }
13036
- const storeResult = await withDbLock(
13037
- () => resolvedDeps.storeEntriesFn(db, deduped, embeddingApiKey ?? "", {
13038
- sourceFile: target.file,
13039
- ingestContentHash: fileHash,
13040
- skipIngestLog: true,
13041
- onlineDedup: true,
13042
- skipLlmDedup: false,
13043
- llmClient: client,
13044
- dbPath
13045
- })
13046
- );
13047
- const reinforced = storeResult.updated;
13048
- const stored = storeResult.added + storeResult.superseded;
13049
- fileResult.entriesStored += stored;
13050
- fileResult.entriesSkippedDuplicate += storeResult.skipped;
13051
- fileResult.entriesReinforced += reinforced;
13052
- totalEntriesStored += stored;
13053
- totalEntriesAdded += storeResult.added;
13054
- totalEntriesUpdated += 0;
13055
- totalEntriesSkipped += storeResult.skipped;
13056
- totalEntriesReinforced += reinforced;
13057
- totalEntriesSuperseded += storeResult.superseded;
13058
- totalDedupLlmCalls += storeResult.llm_dedup_calls;
13059
- fileStoreStats.added += storeResult.added;
13060
- fileStoreStats.updated += storeResult.updated;
13061
- fileStoreStats.skipped += storeResult.skipped;
13062
- fileStoreStats.superseded += storeResult.superseded;
13063
- fileStoreStats.llmDedupCalls += storeResult.llm_dedup_calls;
13435
+ chunkTickets.push(queue.push(deduped, target.file, fileHash));
13064
13436
  };
13065
13437
  const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
13066
13438
  file: target.file,
@@ -13093,10 +13465,49 @@ async function runIngestCommand(inputPaths, options, deps) {
13093
13465
  `All chunks failed during extraction (${chunkLabel}). This is often caused by API rate limits or timeouts; check provider limits/logs and re-run ingest.`
13094
13466
  );
13095
13467
  }
13096
- await withDbLock(() => syncWatchStateOffset(target.file, ingestByteOffset));
13468
+ const writeResults = await Promise.allSettled(chunkTickets);
13469
+ const writeErrors = [];
13470
+ let cancelledTickets = 0;
13471
+ for (const writeResult of writeResults) {
13472
+ if (writeResult.status === "fulfilled") {
13473
+ const result = writeResult.value;
13474
+ const stored = result.added + result.superseded;
13475
+ fileResult.entriesStored += stored;
13476
+ fileResult.entriesSkippedDuplicate += result.skipped;
13477
+ fileResult.entriesReinforced += result.updated;
13478
+ totalEntriesStored += stored;
13479
+ totalEntriesAdded += result.added;
13480
+ totalEntriesSuperseded += result.superseded;
13481
+ totalEntriesSkipped += result.skipped;
13482
+ totalEntriesReinforced += result.updated;
13483
+ totalDedupLlmCalls += result.llm_dedup_calls;
13484
+ fileStoreStats.added += result.added;
13485
+ fileStoreStats.updated += result.updated;
13486
+ fileStoreStats.skipped += result.skipped;
13487
+ fileStoreStats.superseded += result.superseded;
13488
+ fileStoreStats.llmDedupCalls += result.llm_dedup_calls;
13489
+ continue;
13490
+ }
13491
+ const reason = writeResult.reason;
13492
+ if (reason instanceof CancelledError) {
13493
+ cancelledTickets += 1;
13494
+ continue;
13495
+ }
13496
+ writeErrors.push(errorMessage2(reason));
13497
+ }
13498
+ if (writeErrors.length > 0) {
13499
+ throw new Error(writeErrors.join(" | "));
13500
+ }
13501
+ if (cancelledTickets > 0 && verbose) {
13502
+ clack4.log.warn(
13503
+ formatWarn(`Cancelled ${cancelledTickets} pending write chunk(s) for ${path26.basename(target.file)}.`),
13504
+ clackOutput
13505
+ );
13506
+ }
13507
+ await syncWatchStateOffset(target.file, ingestByteOffset);
13097
13508
  if (!dryRun) {
13098
13509
  const fileDurationMs = Math.max(0, resolvedDeps.nowFn().getTime() - fileStartedAt.getTime());
13099
- await withDbLock(
13510
+ await queue.runExclusive(
13100
13511
  () => insertIngestLogForFile(db, {
13101
13512
  filePath: target.file,
13102
13513
  contentHash: fileHash,
@@ -13109,7 +13520,8 @@ async function runIngestCommand(inputPaths, options, deps) {
13109
13520
  } catch (error) {
13110
13521
  if (fileHash.length > 0) {
13111
13522
  try {
13112
- await withDbLock(() => cleanupFailedFileIngest(db, target.file, fileHash, baselineEntryIds, dryRun));
13523
+ await queue.cancel(target.file);
13524
+ await queue.runExclusive(() => cleanupFailedFileIngest(db, target.file, fileHash, baselineEntryIds, dryRun));
13113
13525
  } catch (cleanupError) {
13114
13526
  fileResult.error = `${errorMessage2(error)} | cleanup failed: ${errorMessage2(cleanupError)}`;
13115
13527
  return fileResult;
@@ -13129,7 +13541,7 @@ async function runIngestCommand(inputPaths, options, deps) {
13129
13541
  cursor = 0;
13130
13542
  completed = 0;
13131
13543
  const total = targets.length;
13132
- const workerCount = 1;
13544
+ const workerCount = Math.min(fileWorkerCount, total);
13133
13545
  await Promise.all(
13134
13546
  Array.from({ length: workerCount }, async () => {
13135
13547
  while (true) {
@@ -13247,6 +13659,17 @@ async function runIngestCommand(inputPaths, options, deps) {
13247
13659
  }
13248
13660
  } finally {
13249
13661
  clearProgressLine();
13662
+ try {
13663
+ await queue.drain();
13664
+ } catch (error) {
13665
+ if (error instanceof ShutdownError) {
13666
+ clack4.log.warn(formatWarn(`Write queue shutdown before full drain: ${error.message}`), clackOutput);
13667
+ } else {
13668
+ clack4.log.warn(formatWarn(`Write queue drain failed: ${errorMessage2(error)}`), clackOutput);
13669
+ }
13670
+ } finally {
13671
+ queue.destroy();
13672
+ }
13250
13673
  if (!dryRun) {
13251
13674
  try {
13252
13675
  await walCheckpoint(db);
@@ -15844,7 +16267,7 @@ var DEFAULT_WAL_CHECKPOINT_INTERVAL_MS = 3e4;
15844
16267
  function isFileNotFound(error) {
15845
16268
  return error.code === "ENOENT";
15846
16269
  }
15847
- async function sleep2(ms) {
16270
+ async function sleep3(ms) {
15848
16271
  await new Promise((resolve) => setTimeout(resolve, ms));
15849
16272
  }
15850
16273
  function formatError2(error) {
@@ -15931,7 +16354,7 @@ async function runWatcher(options, deps) {
15931
16354
  rmFn: deps?.rmFn ?? fs33.rm,
15932
16355
  watchFn: deps?.watchFn ?? watchFs,
15933
16356
  nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
15934
- sleepFn: deps?.sleepFn ?? sleep2,
16357
+ sleepFn: deps?.sleepFn ?? sleep3,
15935
16358
  shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
15936
16359
  };
15937
16360
  const directoryMode = options.directoryMode === true;
@@ -16192,13 +16615,13 @@ async function runWatcher(options, deps) {
16192
16615
  options.onWarn?.(warning);
16193
16616
  }
16194
16617
  }
16195
- const processChunkEntries = async (chunkEntries2) => {
16618
+ const processChunkEntries = async (chunkEntries3) => {
16196
16619
  const platformTag = currentPlatform && currentPlatform !== "mtime" ? normalizeKnowledgePlatform(currentPlatform) ?? void 0 : void 0;
16197
- const taggedEntries = platformTag || cachedProject ? chunkEntries2.map((entry) => ({
16620
+ const taggedEntries = platformTag || cachedProject ? chunkEntries3.map((entry) => ({
16198
16621
  ...entry,
16199
16622
  ...platformTag ? { platform: platformTag } : {},
16200
16623
  ...cachedProject ? { project: cachedProject } : {}
16201
- })) : chunkEntries2;
16624
+ })) : chunkEntries3;
16202
16625
  cycleResult.entriesExtracted += taggedEntries.length;
16203
16626
  const deduped = resolvedDeps.deduplicateEntriesFn(taggedEntries);
16204
16627
  if (options.dryRun || deduped.length === 0) {
@@ -17677,7 +18100,12 @@ function createProgram() {
17677
18100
  const result = await runTodoCommand(subcommand, subject, { db: opts.db });
17678
18101
  process.exitCode = result.exitCode;
17679
18102
  });
17680
- program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
18103
+ program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option(
18104
+ "--workers <n>",
18105
+ "Number of files to process in parallel (default: 10). Each worker uses --concurrency chunk parallelism. Total concurrent LLM calls = workers x concurrency. Reduce if hitting rate limits. Writes retry once per sub-batch unless --no-retry is set.",
18106
+ parseIntOption,
18107
+ 10
18108
+ ).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
17681
18109
  const result = await runIngestCommand(paths, {
17682
18110
  ...opts,
17683
18111
  noPreFetch: opts.noPreFetch === true
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.7.16",
3
+ "version": "0.7.18",
4
4
  "openclaw": {
5
5
  "extensions": [
6
6
  "dist/openclaw-plugin/index.js"
@@ -11,6 +11,13 @@
11
11
  "bin": {
12
12
  "agenr": "dist/cli.js"
13
13
  },
14
+ "scripts": {
15
+ "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
16
+ "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
17
+ "test": "vitest run",
18
+ "test:watch": "vitest",
19
+ "typecheck": "tsc --noEmit"
20
+ },
14
21
  "dependencies": {
15
22
  "@clack/prompts": "^1.0.1",
16
23
  "@libsql/client": "^0.17.0",
@@ -54,11 +61,9 @@
54
61
  "README.md"
55
62
  ],
56
63
  "author": "agenr-ai",
57
- "scripts": {
58
- "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
59
- "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
60
- "test": "vitest run",
61
- "test:watch": "vitest",
62
- "typecheck": "tsc --noEmit"
64
+ "pnpm": {
65
+ "overrides": {
66
+ "fast-xml-parser": "^5.3.6"
67
+ }
63
68
  }
64
- }
69
+ }