agenr 0.7.16 → 0.7.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,15 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.7.17] - 2026-02-21
4
+
5
+ ### Performance
6
+ - perf(ingest): two-phase extract+write pipeline eliminates SQLite write-lock contention; extraction workers run in parallel while a single background writer drains entries in batched transactions (#107)
7
+ - feat(ingest): add `--workers` flag (default 10) for file-level parallelism; previously hardcoded to 1
8
+ - The write queue retries each write sub-batch once on transient failure (2s delay) before surfacing the error to the outer file-level retry loop. Use `--no-retry` to disable all retries including the inner write retry.
9
+
10
+ ### Changed
11
+ - ingest: `entriesStored` now counts `added + superseded` (previously only `added`); superseded entries are written before the previous entry is marked superseded
12
+
3
13
  ## [0.7.16] - 2026-02-21
4
14
 
5
15
  ### Fixed
package/dist/cli-main.js CHANGED
@@ -10202,7 +10202,7 @@ async function extractKnowledgeFromChunks(params) {
10202
10202
  );
10203
10203
  let dynamicDelay = baseDelay;
10204
10204
  let lastThrottleNoticeDelayMs = null;
10205
- const sleep3 = params.sleepImpl ?? sleepMs2;
10205
+ const sleep4 = params.sleepImpl ?? sleepMs2;
10206
10206
  const llmConcurrency = Math.max(1, Math.trunc(params.llmConcurrency ?? 1));
10207
10207
  const bufferStreamDeltas = llmConcurrency > 1 && Boolean(params.onStreamDelta);
10208
10208
  let cursor = 0;
@@ -10276,7 +10276,7 @@ async function extractKnowledgeFromChunks(params) {
10276
10276
  warnings.push(
10277
10277
  `Chunk ${chunk.chunk_index + 1}: attempt ${attempt} failed (${error instanceof Error ? error.message : String(error)}), retrying in ${backoffMs}ms.`
10278
10278
  );
10279
- await sleep3(backoffMs);
10279
+ await sleep4(backoffMs);
10280
10280
  continue;
10281
10281
  }
10282
10282
  break;
@@ -10328,9 +10328,9 @@ async function extractKnowledgeFromChunks(params) {
10328
10328
  if (dynamicDelay > 0 && cursor < params.chunks.length && !isShutdownRequested()) {
10329
10329
  if (llmConcurrency > 1) {
10330
10330
  const jitterMs = Math.max(0, Math.trunc(dynamicDelay * (0.5 + Math.random())));
10331
- await sleep3(jitterMs);
10331
+ await sleep4(jitterMs);
10332
10332
  } else if (currentIndex < params.chunks.length - 1) {
10333
- await sleep3(dynamicDelay);
10333
+ await sleep4(dynamicDelay);
10334
10334
  }
10335
10335
  }
10336
10336
  }
@@ -12389,6 +12389,374 @@ async function parseTranscriptFile(filePath, options) {
12389
12389
  };
12390
12390
  }
12391
12391
 
12392
+ // src/ingest/write-queue.ts
12393
+ var CancelledError = class extends Error {
12394
+ constructor(message = "Write queue item was cancelled.") {
12395
+ super(message);
12396
+ this.name = "CancelledError";
12397
+ }
12398
+ };
12399
+ var ShutdownError = class extends Error {
12400
+ constructor(message = "Write queue is shutting down.") {
12401
+ super(message);
12402
+ this.name = "ShutdownError";
12403
+ }
12404
+ };
12405
+ var ZERO_RESULT = {
12406
+ added: 0,
12407
+ updated: 0,
12408
+ skipped: 0,
12409
+ superseded: 0,
12410
+ llm_dedup_calls: 0
12411
+ };
12412
+ function sleep(ms) {
12413
+ return new Promise((resolve) => setTimeout(resolve, ms));
12414
+ }
12415
+ function mergeBatchResult(a, b) {
12416
+ return {
12417
+ added: a.added + b.added,
12418
+ updated: a.updated + b.updated,
12419
+ skipped: a.skipped + b.skipped,
12420
+ superseded: a.superseded + b.superseded,
12421
+ llm_dedup_calls: a.llm_dedup_calls + b.llm_dedup_calls
12422
+ };
12423
+ }
12424
+ function chunkEntries2(entries, size) {
12425
+ if (entries.length <= size) {
12426
+ return [entries];
12427
+ }
12428
+ const chunks = [];
12429
+ for (let i = 0; i < entries.length; i += size) {
12430
+ chunks.push(entries.slice(i, i + size));
12431
+ }
12432
+ return chunks;
12433
+ }
12434
+ function toError(error) {
12435
+ if (error instanceof Error) {
12436
+ return error;
12437
+ }
12438
+ return new Error(String(error));
12439
+ }
12440
+ var WriteQueue = class {
12441
+ db;
12442
+ storeEntriesFn;
12443
+ apiKey;
12444
+ llmClient;
12445
+ dbPath;
12446
+ batchSize;
12447
+ highWatermark;
12448
+ isShutdownRequested;
12449
+ queue = [];
12450
+ pendingEntries = 0;
12451
+ destroyed = false;
12452
+ writerStopping = false;
12453
+ writerStopped = false;
12454
+ inflightWrites = 0;
12455
+ activeWorkItems = 0;
12456
+ activeByFileKey = /* @__PURE__ */ new Map();
12457
+ fileWaiters = /* @__PURE__ */ new Map();
12458
+ drainWaiters = [];
12459
+ writerWaiters = [];
12460
+ constructor(options) {
12461
+ this.db = options.db;
12462
+ this.storeEntriesFn = options.storeEntriesFn;
12463
+ this.apiKey = options.apiKey;
12464
+ this.llmClient = options.llmClient;
12465
+ this.dbPath = options.dbPath;
12466
+ this.batchSize = Math.max(1, Math.floor(options.batchSize ?? 40));
12467
+ this.highWatermark = Math.max(1, Math.floor(options.highWatermark ?? 500));
12468
+ this.isShutdownRequested = options.isShutdownRequested;
12469
+ void this.runWriterLoop();
12470
+ }
12471
+ get pendingCount() {
12472
+ return this.pendingEntries;
12473
+ }
12474
+ async push(entries, fileKey, fileHash) {
12475
+ if (this.destroyed) {
12476
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12477
+ }
12478
+ while (this.pendingEntries >= this.highWatermark) {
12479
+ if (this.destroyed) {
12480
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12481
+ }
12482
+ await sleep(50);
12483
+ }
12484
+ if (entries.length === 0) {
12485
+ return { ...ZERO_RESULT };
12486
+ }
12487
+ return await new Promise((resolve, reject) => {
12488
+ if (this.destroyed) {
12489
+ reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
12490
+ return;
12491
+ }
12492
+ this.pendingEntries += entries.length;
12493
+ this.queue.push({
12494
+ kind: "write",
12495
+ entries,
12496
+ fileKey,
12497
+ fileHash,
12498
+ resolve,
12499
+ reject
12500
+ });
12501
+ this.wakeWriter();
12502
+ });
12503
+ }
12504
+ async cancel(fileKey) {
12505
+ const kept = [];
12506
+ const cancelled = [];
12507
+ for (const item of this.queue) {
12508
+ if (item.kind === "write" && item.fileKey === fileKey) {
12509
+ cancelled.push(item);
12510
+ this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
12511
+ } else {
12512
+ kept.push(item);
12513
+ }
12514
+ }
12515
+ if (cancelled.length > 0) {
12516
+ this.queue.splice(0, this.queue.length, ...kept);
12517
+ for (const item of cancelled) {
12518
+ item.reject(new CancelledError(`Cancelled queued write for ${fileKey}.`));
12519
+ }
12520
+ this.resolveDrainIfIdle();
12521
+ }
12522
+ await this.waitForFileIdle(fileKey);
12523
+ }
12524
+ async runExclusive(fn) {
12525
+ if (this.destroyed) {
12526
+ throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
12527
+ }
12528
+ return await new Promise((resolve, reject) => {
12529
+ if (this.destroyed) {
12530
+ reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
12531
+ return;
12532
+ }
12533
+ this.queue.push({
12534
+ kind: "exclusive",
12535
+ fn,
12536
+ resolve,
12537
+ reject
12538
+ });
12539
+ this.wakeWriter();
12540
+ });
12541
+ }
12542
+ async drain() {
12543
+ if (this.isIdle()) {
12544
+ return;
12545
+ }
12546
+ await new Promise((resolve) => {
12547
+ this.drainWaiters.push(resolve);
12548
+ });
12549
+ }
12550
+ destroy() {
12551
+ if (this.destroyed) {
12552
+ return;
12553
+ }
12554
+ this.destroyed = true;
12555
+ this.writerStopping = true;
12556
+ const pending = this.queue.splice(0, this.queue.length);
12557
+ this.pendingEntries = 0;
12558
+ for (const item of pending) {
12559
+ item.reject(new ShutdownError("WriteQueue destroyed before item was processed."));
12560
+ }
12561
+ this.wakeWriter();
12562
+ this.resolveDrainIfIdle();
12563
+ }
12564
+ async runWriterLoop() {
12565
+ while (true) {
12566
+ if (this.isShutdownRequested?.() && this.hasPendingWriteItems()) {
12567
+ this.shutdownFromSignal();
12568
+ }
12569
+ if ((this.writerStopping || this.destroyed) && this.queue.length === 0) {
12570
+ break;
12571
+ }
12572
+ if (this.queue.length === 0) {
12573
+ await this.waitForWork();
12574
+ continue;
12575
+ }
12576
+ const batch = this.queue.splice(0, this.queue.length);
12577
+ this.markBatchAsDispatched(batch);
12578
+ await this.processBatch(batch);
12579
+ }
12580
+ this.writerStopped = true;
12581
+ this.resolveDrainIfIdle();
12582
+ }
12583
+ hasPendingWriteItems() {
12584
+ return this.queue.some((item) => item.kind === "write");
12585
+ }
12586
+ async processBatch(batch) {
12587
+ let index = 0;
12588
+ while (index < batch.length) {
12589
+ const item = batch[index];
12590
+ if (!item) {
12591
+ index += 1;
12592
+ continue;
12593
+ }
12594
+ if (item.kind === "exclusive") {
12595
+ await this.processExclusive(item);
12596
+ index += 1;
12597
+ continue;
12598
+ }
12599
+ const segment = [];
12600
+ while (index < batch.length) {
12601
+ const segmentItem = batch[index];
12602
+ if (!segmentItem || segmentItem.kind !== "write") {
12603
+ break;
12604
+ }
12605
+ segment.push(segmentItem);
12606
+ index += 1;
12607
+ }
12608
+ await this.processWriteSegment(segment);
12609
+ }
12610
+ }
12611
+ async processExclusive(item) {
12612
+ try {
12613
+ const result = await item.fn();
12614
+ item.resolve(result);
12615
+ } catch (error) {
12616
+ item.reject(toError(error));
12617
+ } finally {
12618
+ this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
12619
+ this.resolveDrainIfIdle();
12620
+ }
12621
+ }
12622
+ async processWriteSegment(segment) {
12623
+ const grouped = /* @__PURE__ */ new Map();
12624
+ for (const item of segment) {
12625
+ const group = grouped.get(item.fileKey);
12626
+ if (group) {
12627
+ group.push(item);
12628
+ } else {
12629
+ grouped.set(item.fileKey, [item]);
12630
+ }
12631
+ }
12632
+ for (const items of grouped.values()) {
12633
+ for (const item of items) {
12634
+ await this.processWriteItem(item);
12635
+ }
12636
+ }
12637
+ }
12638
+ async processWriteItem(item) {
12639
+ let result = { ...ZERO_RESULT };
12640
+ const subBatches = chunkEntries2(item.entries, this.batchSize);
12641
+ try {
12642
+ for (const subBatch of subBatches) {
12643
+ const writeResult = await this.writeSubBatchWithRetry(subBatch, item.fileKey, item.fileHash);
12644
+ result = mergeBatchResult(result, writeResult);
12645
+ }
12646
+ item.resolve(result);
12647
+ } catch (error) {
12648
+ item.reject(toError(error));
12649
+ } finally {
12650
+ this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
12651
+ this.decrementActiveFile(item.fileKey);
12652
+ this.resolveDrainIfIdle();
12653
+ }
12654
+ }
12655
+ async writeSubBatchWithRetry(entries, fileKey, fileHash) {
12656
+ let lastError = null;
12657
+ for (let attempt = 1; attempt <= 2; attempt += 1) {
12658
+ this.inflightWrites += 1;
12659
+ try {
12660
+ const result = await this.storeEntriesFn(this.db, entries, this.apiKey, {
12661
+ sourceFile: fileKey,
12662
+ ingestContentHash: fileHash,
12663
+ skipIngestLog: true,
12664
+ onlineDedup: true,
12665
+ skipLlmDedup: false,
12666
+ llmClient: this.llmClient,
12667
+ dbPath: this.dbPath
12668
+ });
12669
+ return {
12670
+ added: result.added,
12671
+ updated: result.updated,
12672
+ skipped: result.skipped,
12673
+ superseded: result.superseded,
12674
+ llm_dedup_calls: result.llm_dedup_calls
12675
+ };
12676
+ } catch (error) {
12677
+ lastError = toError(error);
12678
+ if (attempt < 2) {
12679
+ await sleep(2e3);
12680
+ }
12681
+ } finally {
12682
+ this.inflightWrites = Math.max(0, this.inflightWrites - 1);
12683
+ this.resolveDrainIfIdle();
12684
+ }
12685
+ }
12686
+ throw lastError ?? new Error("Write queue failed to store sub-batch.");
12687
+ }
12688
+ markBatchAsDispatched(batch) {
12689
+ for (const item of batch) {
12690
+ this.activeWorkItems += 1;
12691
+ if (item.kind === "write") {
12692
+ this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
12693
+ this.activeByFileKey.set(item.fileKey, (this.activeByFileKey.get(item.fileKey) ?? 0) + 1);
12694
+ }
12695
+ }
12696
+ this.resolveDrainIfIdle();
12697
+ }
12698
+ decrementActiveFile(fileKey) {
12699
+ const current = this.activeByFileKey.get(fileKey) ?? 0;
12700
+ if (current <= 1) {
12701
+ this.activeByFileKey.delete(fileKey);
12702
+ const waiters = this.fileWaiters.get(fileKey) ?? [];
12703
+ this.fileWaiters.delete(fileKey);
12704
+ for (const waiter of waiters) {
12705
+ waiter();
12706
+ }
12707
+ return;
12708
+ }
12709
+ this.activeByFileKey.set(fileKey, current - 1);
12710
+ }
12711
+ async waitForFileIdle(fileKey) {
12712
+ if ((this.activeByFileKey.get(fileKey) ?? 0) === 0) {
12713
+ return;
12714
+ }
12715
+ await new Promise((resolve) => {
12716
+ const existing = this.fileWaiters.get(fileKey);
12717
+ if (existing) {
12718
+ existing.push(resolve);
12719
+ } else {
12720
+ this.fileWaiters.set(fileKey, [resolve]);
12721
+ }
12722
+ });
12723
+ }
12724
+ waitForWork() {
12725
+ return new Promise((resolve) => {
12726
+ this.writerWaiters.push(resolve);
12727
+ });
12728
+ }
12729
+ wakeWriter() {
12730
+ while (this.writerWaiters.length > 0) {
12731
+ const waiter = this.writerWaiters.shift();
12732
+ waiter?.();
12733
+ }
12734
+ }
12735
+ shutdownFromSignal() {
12736
+ this.destroyed = true;
12737
+ this.writerStopping = true;
12738
+ const pending = this.queue.splice(0, this.queue.length);
12739
+ this.pendingEntries = 0;
12740
+ for (const item of pending) {
12741
+ item.reject(new ShutdownError("Shutdown requested. Dropping queued writes."));
12742
+ }
12743
+ this.wakeWriter();
12744
+ this.resolveDrainIfIdle();
12745
+ }
12746
+ isIdle() {
12747
+ return this.pendingEntries === 0 && this.activeWorkItems === 0 && this.inflightWrites === 0;
12748
+ }
12749
+ resolveDrainIfIdle() {
12750
+ if (!this.isIdle()) {
12751
+ return;
12752
+ }
12753
+ while (this.drainWaiters.length > 0) {
12754
+ const waiter = this.drainWaiters.shift();
12755
+ waiter?.();
12756
+ }
12757
+ }
12758
+ };
12759
+
12392
12760
  // src/watch/pid.ts
12393
12761
  import fs26 from "fs/promises";
12394
12762
  import path25 from "path";
@@ -12565,7 +12933,7 @@ function parsePositiveInt3(value, fallback, label) {
12565
12933
  }
12566
12934
  return Math.floor(parsed);
12567
12935
  }
12568
- async function sleep(ms) {
12936
+ async function sleep2(ms) {
12569
12937
  await new Promise((resolve) => setTimeout(resolve, ms));
12570
12938
  }
12571
12939
  function retryBackoffMs(attempt) {
@@ -12744,8 +13112,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12744
13112
  readWatcherPidFn: deps?.readWatcherPidFn ?? readWatcherPid,
12745
13113
  resolveWatcherPidPathFn: deps?.resolveWatcherPidPathFn ?? resolveWatcherPidPath,
12746
13114
  nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
12747
- sleepFn: deps?.sleepFn ?? sleep,
12748
- shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
13115
+ sleepFn: deps?.sleepFn ?? sleep2,
13116
+ shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested,
13117
+ createWriteQueueFn: deps?.createWriteQueueFn ?? ((opts) => new WriteQueue(opts))
12749
13118
  };
12750
13119
  const clackOutput = { output: process.stderr };
12751
13120
  clack4.intro(banner(), clackOutput);
@@ -12788,6 +13157,7 @@ async function runIngestCommand(inputPaths, options, deps) {
12788
13157
  const skipIngested = force ? false : options.skipIngested !== false;
12789
13158
  const globPattern = options.glob?.trim() || DEFAULT_GLOB;
12790
13159
  const llmConcurrency = parsePositiveInt3(options.concurrency, 5, "--concurrency");
13160
+ const requestedFileWorkers = parsePositiveInt3(options.workers, 10, "--workers");
12791
13161
  const retryEnabled = options.retry !== false;
12792
13162
  const maxRetries = retryEnabled ? parsePositiveInt3(options.maxRetries, 3, "--max-retries") : 0;
12793
13163
  const platformRaw = options.platform?.trim();
@@ -12818,8 +13188,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12818
13188
  })
12819
13189
  );
12820
13190
  const sortedTargets = targetsWithSizes.sort((a, b) => a.size - b.size || a.file.localeCompare(b.file)).map((item, index) => ({ ...item, index }));
13191
+ const fileWorkerCount = Math.min(requestedFileWorkers, Math.max(1, sortedTargets.length));
12821
13192
  clack4.log.info(
12822
- `Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
13193
+ `Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | File workers: ${ui.bold(String(fileWorkerCount))} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
12823
13194
  clackOutput
12824
13195
  );
12825
13196
  if (sortedTargets.length === 0) {
@@ -12865,6 +13236,41 @@ async function runIngestCommand(inputPaths, options, deps) {
12865
13236
  }
12866
13237
  const db = resolvedDeps.getDbFn(dbPath);
12867
13238
  await resolvedDeps.initDbFn(db);
13239
+ const cleanupDbResources = () => {
13240
+ if (shouldLockDb) {
13241
+ releaseDbLock();
13242
+ }
13243
+ resolvedDeps.closeDbFn(db);
13244
+ };
13245
+ let embeddingApiKey = null;
13246
+ try {
13247
+ if (!embeddingApiKey && !options.noPreFetch) {
13248
+ embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
13249
+ }
13250
+ } catch (error) {
13251
+ cleanupDbResources();
13252
+ throw error;
13253
+ }
13254
+ if ((!embeddingApiKey || embeddingApiKey.trim().length === 0) && !dryRun) {
13255
+ cleanupDbResources();
13256
+ throw new Error("Embedding API key is required for ingest. Run 'agenr setup' to configure.");
13257
+ }
13258
+ let queue;
13259
+ try {
13260
+ queue = resolvedDeps.createWriteQueueFn({
13261
+ db,
13262
+ storeEntriesFn: resolvedDeps.storeEntriesFn,
13263
+ apiKey: embeddingApiKey ?? "",
13264
+ llmClient: client,
13265
+ dbPath,
13266
+ batchSize: 40,
13267
+ highWatermark: 500,
13268
+ isShutdownRequested: resolvedDeps.shouldShutdownFn
13269
+ });
13270
+ } catch (error) {
13271
+ cleanupDbResources();
13272
+ throw error;
13273
+ }
12868
13274
  const results = new Array(sortedTargets.length);
12869
13275
  let totalEntriesExtracted = 0;
12870
13276
  let totalEntriesStored = 0;
@@ -12878,22 +13284,6 @@ async function runIngestCommand(inputPaths, options, deps) {
12878
13284
  let forceDeletedEntryRows = 0;
12879
13285
  let forceDeletedEntrySourceRows = 0;
12880
13286
  let completed = 0;
12881
- let embeddingApiKey = null;
12882
- if (!options.noPreFetch) {
12883
- try {
12884
- embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
12885
- } catch (error) {
12886
- embeddingApiKey = null;
12887
- if (verbose) {
12888
- clack4.log.warn(
12889
- formatWarn(
12890
- `Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
12891
- ),
12892
- clackOutput
12893
- );
12894
- }
12895
- }
12896
- }
12897
13287
  let watchStateLoaded = false;
12898
13288
  let watchState = createEmptyWatchState();
12899
13289
  let cursor = 0;
@@ -12901,26 +13291,11 @@ async function runIngestCommand(inputPaths, options, deps) {
12901
13291
  let filesWithChunkFailures = 0;
12902
13292
  const chunkStatsByFile = /* @__PURE__ */ new Map();
12903
13293
  let firstPassFailedIndexSet = /* @__PURE__ */ new Set();
12904
- let dbChain = Promise.resolve();
12905
- const withDbLock = async (fn) => {
12906
- const previous = dbChain;
12907
- let release;
12908
- dbChain = new Promise((resolve) => {
12909
- release = resolve;
12910
- });
12911
- await previous;
12912
- try {
12913
- return await fn();
12914
- } finally {
12915
- release();
12916
- }
12917
- };
12918
13294
  const updateProgress = (completedCount, totalCount, verb) => {
12919
13295
  if (verbose) {
12920
13296
  return;
12921
13297
  }
12922
- const suffix = llmConcurrency > 1 ? ` (${llmConcurrency} chunks active)...` : "...";
12923
- process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount}${suffix}`)}`);
13298
+ process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount} (queue: ${queue.pendingCount})...`)}`);
12924
13299
  };
12925
13300
  const clearProgressLine = () => {
12926
13301
  if (verbose) {
@@ -12988,8 +13363,9 @@ async function runIngestCommand(inputPaths, options, deps) {
12988
13363
  const rawContent = await fs27.readFile(target.file, "utf8");
12989
13364
  const ingestByteOffset = Buffer.byteLength(rawContent, "utf8");
12990
13365
  fileHash = resolvedDeps.hashTextFn(rawContent);
13366
+ const chunkTickets = [];
12991
13367
  if (skipIngested && !force) {
12992
- const alreadyIngested = await withDbLock(() => isAlreadyIngested(db, target.file, fileHash));
13368
+ const alreadyIngested = await isAlreadyIngested(db, target.file, fileHash);
12993
13369
  if (alreadyIngested) {
12994
13370
  fileResult.skipped = true;
12995
13371
  fileResult.skipReason = "already ingested";
@@ -12997,12 +13373,12 @@ async function runIngestCommand(inputPaths, options, deps) {
12997
13373
  }
12998
13374
  }
12999
13375
  if (force) {
13000
- const cleanupStats = await withDbLock(() => cleanupForForceReingest(db, target.file, dryRun));
13376
+ const cleanupStats = await queue.runExclusive(() => cleanupForForceReingest(db, target.file, dryRun));
13001
13377
  forceDeletedIngestLogRows += cleanupStats.ingestLogRows;
13002
13378
  forceDeletedEntryRows += cleanupStats.entryRows;
13003
13379
  forceDeletedEntrySourceRows += cleanupStats.entrySourceRows;
13004
13380
  } else {
13005
- baselineEntryIds = await withDbLock(() => getSourceEntryIds(db, target.file));
13381
+ baselineEntryIds = await getSourceEntryIds(db, target.file);
13006
13382
  }
13007
13383
  const parsed = await resolvedDeps.parseTranscriptFileFn(target.file, { raw: options.raw === true, verbose });
13008
13384
  if (verbose && parsed.warnings.length > 0) {
@@ -13014,8 +13390,8 @@ async function runIngestCommand(inputPaths, options, deps) {
13014
13390
  }
13015
13391
  }
13016
13392
  }
13017
- const processChunkEntries = async (chunkEntries2) => {
13018
- const normalizedEntries = chunkEntries2.map((entry) => ({
13393
+ const processChunkEntries = async (chunkEntries3) => {
13394
+ const normalizedEntries = chunkEntries3.map((entry) => ({
13019
13395
  ...entry,
13020
13396
  ...platform ? { platform } : {},
13021
13397
  ...project ? { project } : {},
@@ -13026,41 +13402,32 @@ async function runIngestCommand(inputPaths, options, deps) {
13026
13402
  }));
13027
13403
  fileResult.entriesExtracted += normalizedEntries.length;
13028
13404
  totalEntriesExtracted += normalizedEntries.length;
13029
- const deduped = resolvedDeps.deduplicateEntriesFn(normalizedEntries);
13030
- if (dryRun || deduped.length === 0) {
13405
+ if (dryRun || normalizedEntries.length === 0) {
13406
+ chunkTickets.push(
13407
+ Promise.resolve({
13408
+ added: 0,
13409
+ updated: 0,
13410
+ skipped: 0,
13411
+ superseded: 0,
13412
+ llm_dedup_calls: 0
13413
+ })
13414
+ );
13031
13415
  return;
13032
13416
  }
13033
- if (!embeddingApiKey) {
13034
- embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
13417
+ const deduped = resolvedDeps.deduplicateEntriesFn(normalizedEntries);
13418
+ if (deduped.length === 0) {
13419
+ chunkTickets.push(
13420
+ Promise.resolve({
13421
+ added: 0,
13422
+ updated: 0,
13423
+ skipped: 0,
13424
+ superseded: 0,
13425
+ llm_dedup_calls: 0
13426
+ })
13427
+ );
13428
+ return;
13035
13429
  }
13036
- const storeResult = await withDbLock(
13037
- () => resolvedDeps.storeEntriesFn(db, deduped, embeddingApiKey ?? "", {
13038
- sourceFile: target.file,
13039
- ingestContentHash: fileHash,
13040
- skipIngestLog: true,
13041
- onlineDedup: true,
13042
- skipLlmDedup: false,
13043
- llmClient: client,
13044
- dbPath
13045
- })
13046
- );
13047
- const reinforced = storeResult.updated;
13048
- const stored = storeResult.added + storeResult.superseded;
13049
- fileResult.entriesStored += stored;
13050
- fileResult.entriesSkippedDuplicate += storeResult.skipped;
13051
- fileResult.entriesReinforced += reinforced;
13052
- totalEntriesStored += stored;
13053
- totalEntriesAdded += storeResult.added;
13054
- totalEntriesUpdated += 0;
13055
- totalEntriesSkipped += storeResult.skipped;
13056
- totalEntriesReinforced += reinforced;
13057
- totalEntriesSuperseded += storeResult.superseded;
13058
- totalDedupLlmCalls += storeResult.llm_dedup_calls;
13059
- fileStoreStats.added += storeResult.added;
13060
- fileStoreStats.updated += storeResult.updated;
13061
- fileStoreStats.skipped += storeResult.skipped;
13062
- fileStoreStats.superseded += storeResult.superseded;
13063
- fileStoreStats.llmDedupCalls += storeResult.llm_dedup_calls;
13430
+ chunkTickets.push(queue.push(deduped, target.file, fileHash));
13064
13431
  };
13065
13432
  const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
13066
13433
  file: target.file,
@@ -13093,10 +13460,49 @@ async function runIngestCommand(inputPaths, options, deps) {
13093
13460
  `All chunks failed during extraction (${chunkLabel}). This is often caused by API rate limits or timeouts; check provider limits/logs and re-run ingest.`
13094
13461
  );
13095
13462
  }
13096
- await withDbLock(() => syncWatchStateOffset(target.file, ingestByteOffset));
13463
+ const writeResults = await Promise.allSettled(chunkTickets);
13464
+ const writeErrors = [];
13465
+ let cancelledTickets = 0;
13466
+ for (const writeResult of writeResults) {
13467
+ if (writeResult.status === "fulfilled") {
13468
+ const result = writeResult.value;
13469
+ const stored = result.added + result.superseded;
13470
+ fileResult.entriesStored += stored;
13471
+ fileResult.entriesSkippedDuplicate += result.skipped;
13472
+ fileResult.entriesReinforced += result.updated;
13473
+ totalEntriesStored += stored;
13474
+ totalEntriesAdded += result.added;
13475
+ totalEntriesSuperseded += result.superseded;
13476
+ totalEntriesSkipped += result.skipped;
13477
+ totalEntriesReinforced += result.updated;
13478
+ totalDedupLlmCalls += result.llm_dedup_calls;
13479
+ fileStoreStats.added += result.added;
13480
+ fileStoreStats.updated += result.updated;
13481
+ fileStoreStats.skipped += result.skipped;
13482
+ fileStoreStats.superseded += result.superseded;
13483
+ fileStoreStats.llmDedupCalls += result.llm_dedup_calls;
13484
+ continue;
13485
+ }
13486
+ const reason = writeResult.reason;
13487
+ if (reason instanceof CancelledError) {
13488
+ cancelledTickets += 1;
13489
+ continue;
13490
+ }
13491
+ writeErrors.push(errorMessage2(reason));
13492
+ }
13493
+ if (writeErrors.length > 0) {
13494
+ throw new Error(writeErrors.join(" | "));
13495
+ }
13496
+ if (cancelledTickets > 0 && verbose) {
13497
+ clack4.log.warn(
13498
+ formatWarn(`Cancelled ${cancelledTickets} pending write chunk(s) for ${path26.basename(target.file)}.`),
13499
+ clackOutput
13500
+ );
13501
+ }
13502
+ await syncWatchStateOffset(target.file, ingestByteOffset);
13097
13503
  if (!dryRun) {
13098
13504
  const fileDurationMs = Math.max(0, resolvedDeps.nowFn().getTime() - fileStartedAt.getTime());
13099
- await withDbLock(
13505
+ await queue.runExclusive(
13100
13506
  () => insertIngestLogForFile(db, {
13101
13507
  filePath: target.file,
13102
13508
  contentHash: fileHash,
@@ -13109,7 +13515,8 @@ async function runIngestCommand(inputPaths, options, deps) {
13109
13515
  } catch (error) {
13110
13516
  if (fileHash.length > 0) {
13111
13517
  try {
13112
- await withDbLock(() => cleanupFailedFileIngest(db, target.file, fileHash, baselineEntryIds, dryRun));
13518
+ await queue.cancel(target.file);
13519
+ await queue.runExclusive(() => cleanupFailedFileIngest(db, target.file, fileHash, baselineEntryIds, dryRun));
13113
13520
  } catch (cleanupError) {
13114
13521
  fileResult.error = `${errorMessage2(error)} | cleanup failed: ${errorMessage2(cleanupError)}`;
13115
13522
  return fileResult;
@@ -13129,7 +13536,7 @@ async function runIngestCommand(inputPaths, options, deps) {
13129
13536
  cursor = 0;
13130
13537
  completed = 0;
13131
13538
  const total = targets.length;
13132
- const workerCount = 1;
13539
+ const workerCount = Math.min(fileWorkerCount, total);
13133
13540
  await Promise.all(
13134
13541
  Array.from({ length: workerCount }, async () => {
13135
13542
  while (true) {
@@ -13247,6 +13654,17 @@ async function runIngestCommand(inputPaths, options, deps) {
13247
13654
  }
13248
13655
  } finally {
13249
13656
  clearProgressLine();
13657
+ try {
13658
+ await queue.drain();
13659
+ } catch (error) {
13660
+ if (error instanceof ShutdownError) {
13661
+ clack4.log.warn(formatWarn(`Write queue shutdown before full drain: ${error.message}`), clackOutput);
13662
+ } else {
13663
+ clack4.log.warn(formatWarn(`Write queue drain failed: ${errorMessage2(error)}`), clackOutput);
13664
+ }
13665
+ } finally {
13666
+ queue.destroy();
13667
+ }
13250
13668
  if (!dryRun) {
13251
13669
  try {
13252
13670
  await walCheckpoint(db);
@@ -15844,7 +16262,7 @@ var DEFAULT_WAL_CHECKPOINT_INTERVAL_MS = 3e4;
15844
16262
  function isFileNotFound(error) {
15845
16263
  return error.code === "ENOENT";
15846
16264
  }
15847
- async function sleep2(ms) {
16265
+ async function sleep3(ms) {
15848
16266
  await new Promise((resolve) => setTimeout(resolve, ms));
15849
16267
  }
15850
16268
  function formatError2(error) {
@@ -15931,7 +16349,7 @@ async function runWatcher(options, deps) {
15931
16349
  rmFn: deps?.rmFn ?? fs33.rm,
15932
16350
  watchFn: deps?.watchFn ?? watchFs,
15933
16351
  nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
15934
- sleepFn: deps?.sleepFn ?? sleep2,
16352
+ sleepFn: deps?.sleepFn ?? sleep3,
15935
16353
  shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
15936
16354
  };
15937
16355
  const directoryMode = options.directoryMode === true;
@@ -16192,13 +16610,13 @@ async function runWatcher(options, deps) {
16192
16610
  options.onWarn?.(warning);
16193
16611
  }
16194
16612
  }
16195
- const processChunkEntries = async (chunkEntries2) => {
16613
+ const processChunkEntries = async (chunkEntries3) => {
16196
16614
  const platformTag = currentPlatform && currentPlatform !== "mtime" ? normalizeKnowledgePlatform(currentPlatform) ?? void 0 : void 0;
16197
- const taggedEntries = platformTag || cachedProject ? chunkEntries2.map((entry) => ({
16615
+ const taggedEntries = platformTag || cachedProject ? chunkEntries3.map((entry) => ({
16198
16616
  ...entry,
16199
16617
  ...platformTag ? { platform: platformTag } : {},
16200
16618
  ...cachedProject ? { project: cachedProject } : {}
16201
- })) : chunkEntries2;
16619
+ })) : chunkEntries3;
16202
16620
  cycleResult.entriesExtracted += taggedEntries.length;
16203
16621
  const deduped = resolvedDeps.deduplicateEntriesFn(taggedEntries);
16204
16622
  if (options.dryRun || deduped.length === 0) {
@@ -17677,7 +18095,12 @@ function createProgram() {
17677
18095
  const result = await runTodoCommand(subcommand, subject, { db: opts.db });
17678
18096
  process.exitCode = result.exitCode;
17679
18097
  });
17680
- program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
18098
+ program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option(
18099
+ "--workers <n>",
18100
+ "Number of files to process in parallel (default: 10). Each worker uses --concurrency chunk parallelism. Total concurrent LLM calls = workers x concurrency. Reduce if hitting rate limits.",
18101
+ parseIntOption,
18102
+ 10
18103
+ ).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
17681
18104
  const result = await runIngestCommand(paths, {
17682
18105
  ...opts,
17683
18106
  noPreFetch: opts.noPreFetch === true
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.7.16",
3
+ "version": "0.7.17",
4
4
  "openclaw": {
5
5
  "extensions": [
6
6
  "dist/openclaw-plugin/index.js"
@@ -11,6 +11,13 @@
11
11
  "bin": {
12
12
  "agenr": "dist/cli.js"
13
13
  },
14
+ "scripts": {
15
+ "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
16
+ "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
17
+ "test": "vitest run",
18
+ "test:watch": "vitest",
19
+ "typecheck": "tsc --noEmit"
20
+ },
14
21
  "dependencies": {
15
22
  "@clack/prompts": "^1.0.1",
16
23
  "@libsql/client": "^0.17.0",
@@ -54,11 +61,9 @@
54
61
  "README.md"
55
62
  ],
56
63
  "author": "agenr-ai",
57
- "scripts": {
58
- "build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
59
- "dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
60
- "test": "vitest run",
61
- "test:watch": "vitest",
62
- "typecheck": "tsc --noEmit"
64
+ "pnpm": {
65
+ "overrides": {
66
+ "fast-xml-parser": "^5.3.6"
67
+ }
63
68
  }
64
- }
69
+ }