agenr 0.7.16 → 0.7.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/dist/cli-main.js +515 -87
- package/package.json +13 -8
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.7.18] - 2026-02-21
|
|
4
|
+
|
|
5
|
+
### Fixed
|
|
6
|
+
- fix(lockfile): suppress false-positive "Another agenr process is writing" warning during multi-worker ingest; `isDbLocked` now returns false when the lock is held by the current process (#121)
|
|
7
|
+
|
|
8
|
+
## [0.7.17] - 2026-02-21
|
|
9
|
+
|
|
10
|
+
### Performance
|
|
11
|
+
- perf(ingest): two-phase extract+write pipeline eliminates SQLite write-lock contention; extraction workers run in parallel while a single background writer drains entries in batched transactions (#107)
|
|
12
|
+
- feat(ingest): add `--workers` flag (default 10) for file-level parallelism; previously hardcoded to 1
|
|
13
|
+
- The write queue retries each write sub-batch once on transient failure (2s delay) before surfacing the error to the outer file-level retry loop. Use `--no-retry` to disable all retries including the inner write retry.
|
|
14
|
+
|
|
15
|
+
### Changed
|
|
16
|
+
- ingest: `entriesStored` now counts `added + superseded` (previously only `added`); superseded entries are written before the previous entry is marked superseded
|
|
17
|
+
|
|
3
18
|
## [0.7.16] - 2026-02-21
|
|
4
19
|
|
|
5
20
|
### Fixed
|
package/dist/cli-main.js
CHANGED
|
@@ -4272,6 +4272,9 @@ function isDbLocked(lockDir) {
|
|
|
4272
4272
|
if (!pid) {
|
|
4273
4273
|
return false;
|
|
4274
4274
|
}
|
|
4275
|
+
if (pid === process.pid) {
|
|
4276
|
+
return false;
|
|
4277
|
+
}
|
|
4275
4278
|
return isPidAlive(pid);
|
|
4276
4279
|
}
|
|
4277
4280
|
function warnIfLocked() {
|
|
@@ -10202,7 +10205,7 @@ async function extractKnowledgeFromChunks(params) {
|
|
|
10202
10205
|
);
|
|
10203
10206
|
let dynamicDelay = baseDelay;
|
|
10204
10207
|
let lastThrottleNoticeDelayMs = null;
|
|
10205
|
-
const
|
|
10208
|
+
const sleep4 = params.sleepImpl ?? sleepMs2;
|
|
10206
10209
|
const llmConcurrency = Math.max(1, Math.trunc(params.llmConcurrency ?? 1));
|
|
10207
10210
|
const bufferStreamDeltas = llmConcurrency > 1 && Boolean(params.onStreamDelta);
|
|
10208
10211
|
let cursor = 0;
|
|
@@ -10276,7 +10279,7 @@ async function extractKnowledgeFromChunks(params) {
|
|
|
10276
10279
|
warnings.push(
|
|
10277
10280
|
`Chunk ${chunk.chunk_index + 1}: attempt ${attempt} failed (${error instanceof Error ? error.message : String(error)}), retrying in ${backoffMs}ms.`
|
|
10278
10281
|
);
|
|
10279
|
-
await
|
|
10282
|
+
await sleep4(backoffMs);
|
|
10280
10283
|
continue;
|
|
10281
10284
|
}
|
|
10282
10285
|
break;
|
|
@@ -10328,9 +10331,9 @@ async function extractKnowledgeFromChunks(params) {
|
|
|
10328
10331
|
if (dynamicDelay > 0 && cursor < params.chunks.length && !isShutdownRequested()) {
|
|
10329
10332
|
if (llmConcurrency > 1) {
|
|
10330
10333
|
const jitterMs = Math.max(0, Math.trunc(dynamicDelay * (0.5 + Math.random())));
|
|
10331
|
-
await
|
|
10334
|
+
await sleep4(jitterMs);
|
|
10332
10335
|
} else if (currentIndex < params.chunks.length - 1) {
|
|
10333
|
-
await
|
|
10336
|
+
await sleep4(dynamicDelay);
|
|
10334
10337
|
}
|
|
10335
10338
|
}
|
|
10336
10339
|
}
|
|
@@ -12389,6 +12392,375 @@ async function parseTranscriptFile(filePath, options) {
|
|
|
12389
12392
|
};
|
|
12390
12393
|
}
|
|
12391
12394
|
|
|
12395
|
+
// src/ingest/write-queue.ts
|
|
12396
|
+
var CancelledError = class extends Error {
|
|
12397
|
+
constructor(message = "Write queue item was cancelled.") {
|
|
12398
|
+
super(message);
|
|
12399
|
+
this.name = "CancelledError";
|
|
12400
|
+
}
|
|
12401
|
+
};
|
|
12402
|
+
var ShutdownError = class extends Error {
|
|
12403
|
+
constructor(message = "Write queue is shutting down.") {
|
|
12404
|
+
super(message);
|
|
12405
|
+
this.name = "ShutdownError";
|
|
12406
|
+
}
|
|
12407
|
+
};
|
|
12408
|
+
var ZERO_RESULT = {
|
|
12409
|
+
added: 0,
|
|
12410
|
+
updated: 0,
|
|
12411
|
+
skipped: 0,
|
|
12412
|
+
superseded: 0,
|
|
12413
|
+
llm_dedup_calls: 0
|
|
12414
|
+
};
|
|
12415
|
+
function sleep(ms) {
|
|
12416
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
12417
|
+
}
|
|
12418
|
+
function mergeBatchResult(a, b) {
|
|
12419
|
+
return {
|
|
12420
|
+
added: a.added + b.added,
|
|
12421
|
+
updated: a.updated + b.updated,
|
|
12422
|
+
skipped: a.skipped + b.skipped,
|
|
12423
|
+
superseded: a.superseded + b.superseded,
|
|
12424
|
+
llm_dedup_calls: a.llm_dedup_calls + b.llm_dedup_calls
|
|
12425
|
+
};
|
|
12426
|
+
}
|
|
12427
|
+
function chunkEntries2(entries, size) {
|
|
12428
|
+
if (entries.length <= size) {
|
|
12429
|
+
return [entries];
|
|
12430
|
+
}
|
|
12431
|
+
const chunks = [];
|
|
12432
|
+
for (let i = 0; i < entries.length; i += size) {
|
|
12433
|
+
chunks.push(entries.slice(i, i + size));
|
|
12434
|
+
}
|
|
12435
|
+
return chunks;
|
|
12436
|
+
}
|
|
12437
|
+
function toError(error) {
|
|
12438
|
+
if (error instanceof Error) {
|
|
12439
|
+
return error;
|
|
12440
|
+
}
|
|
12441
|
+
return new Error(String(error));
|
|
12442
|
+
}
|
|
12443
|
+
var WriteQueue = class {
|
|
12444
|
+
db;
|
|
12445
|
+
storeEntriesFn;
|
|
12446
|
+
apiKey;
|
|
12447
|
+
llmClient;
|
|
12448
|
+
dbPath;
|
|
12449
|
+
batchSize;
|
|
12450
|
+
highWatermark;
|
|
12451
|
+
retryOnFailure;
|
|
12452
|
+
isShutdownRequested;
|
|
12453
|
+
queue = [];
|
|
12454
|
+
pendingEntries = 0;
|
|
12455
|
+
destroyed = false;
|
|
12456
|
+
writerStopping = false;
|
|
12457
|
+
inflightWrites = 0;
|
|
12458
|
+
activeWorkItems = 0;
|
|
12459
|
+
activeByFileKey = /* @__PURE__ */ new Map();
|
|
12460
|
+
fileWaiters = /* @__PURE__ */ new Map();
|
|
12461
|
+
drainWaiters = [];
|
|
12462
|
+
writerWaiters = [];
|
|
12463
|
+
constructor(options) {
|
|
12464
|
+
this.db = options.db;
|
|
12465
|
+
this.storeEntriesFn = options.storeEntriesFn;
|
|
12466
|
+
this.apiKey = options.apiKey;
|
|
12467
|
+
this.llmClient = options.llmClient;
|
|
12468
|
+
this.dbPath = options.dbPath;
|
|
12469
|
+
this.batchSize = Math.max(1, Math.floor(options.batchSize ?? 40));
|
|
12470
|
+
this.highWatermark = Math.max(1, Math.floor(options.highWatermark ?? 500));
|
|
12471
|
+
this.retryOnFailure = options.retryOnFailure !== false;
|
|
12472
|
+
this.isShutdownRequested = options.isShutdownRequested;
|
|
12473
|
+
void this.runWriterLoop();
|
|
12474
|
+
}
|
|
12475
|
+
get pendingCount() {
|
|
12476
|
+
return this.pendingEntries + this.activeWorkItems;
|
|
12477
|
+
}
|
|
12478
|
+
async push(entries, fileKey, fileHash) {
|
|
12479
|
+
if (this.destroyed) {
|
|
12480
|
+
throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
|
|
12481
|
+
}
|
|
12482
|
+
while (this.pendingEntries > 0 && this.pendingEntries + entries.length > this.highWatermark) {
|
|
12483
|
+
if (this.destroyed) {
|
|
12484
|
+
throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
|
|
12485
|
+
}
|
|
12486
|
+
await sleep(50);
|
|
12487
|
+
}
|
|
12488
|
+
if (entries.length === 0) {
|
|
12489
|
+
return { ...ZERO_RESULT };
|
|
12490
|
+
}
|
|
12491
|
+
return await new Promise((resolve, reject) => {
|
|
12492
|
+
if (this.destroyed) {
|
|
12493
|
+
reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
|
|
12494
|
+
return;
|
|
12495
|
+
}
|
|
12496
|
+
this.pendingEntries += entries.length;
|
|
12497
|
+
this.queue.push({
|
|
12498
|
+
kind: "write",
|
|
12499
|
+
entries,
|
|
12500
|
+
fileKey,
|
|
12501
|
+
fileHash,
|
|
12502
|
+
resolve,
|
|
12503
|
+
reject
|
|
12504
|
+
});
|
|
12505
|
+
this.wakeWriter();
|
|
12506
|
+
});
|
|
12507
|
+
}
|
|
12508
|
+
async cancel(fileKey) {
|
|
12509
|
+
const kept = [];
|
|
12510
|
+
const cancelled = [];
|
|
12511
|
+
for (const item of this.queue) {
|
|
12512
|
+
if (item.kind === "write" && item.fileKey === fileKey) {
|
|
12513
|
+
cancelled.push(item);
|
|
12514
|
+
this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
|
|
12515
|
+
} else {
|
|
12516
|
+
kept.push(item);
|
|
12517
|
+
}
|
|
12518
|
+
}
|
|
12519
|
+
if (cancelled.length > 0) {
|
|
12520
|
+
this.queue.splice(0, this.queue.length, ...kept);
|
|
12521
|
+
for (const item of cancelled) {
|
|
12522
|
+
item.reject(new CancelledError(`Cancelled queued write for ${fileKey}.`));
|
|
12523
|
+
}
|
|
12524
|
+
this.resolveDrainIfIdle();
|
|
12525
|
+
}
|
|
12526
|
+
await this.waitForFileIdle(fileKey);
|
|
12527
|
+
}
|
|
12528
|
+
async runExclusive(fn) {
|
|
12529
|
+
if (this.destroyed) {
|
|
12530
|
+
throw new ShutdownError("WriteQueue has been destroyed and cannot accept new items.");
|
|
12531
|
+
}
|
|
12532
|
+
return await new Promise((resolve, reject) => {
|
|
12533
|
+
if (this.destroyed) {
|
|
12534
|
+
reject(new ShutdownError("WriteQueue has been destroyed and cannot accept new items."));
|
|
12535
|
+
return;
|
|
12536
|
+
}
|
|
12537
|
+
this.queue.push({
|
|
12538
|
+
kind: "exclusive",
|
|
12539
|
+
fn,
|
|
12540
|
+
resolve,
|
|
12541
|
+
reject
|
|
12542
|
+
});
|
|
12543
|
+
this.wakeWriter();
|
|
12544
|
+
});
|
|
12545
|
+
}
|
|
12546
|
+
async drain() {
|
|
12547
|
+
if (this.isIdle()) {
|
|
12548
|
+
return;
|
|
12549
|
+
}
|
|
12550
|
+
await new Promise((resolve) => {
|
|
12551
|
+
this.drainWaiters.push(resolve);
|
|
12552
|
+
});
|
|
12553
|
+
}
|
|
12554
|
+
destroy() {
|
|
12555
|
+
if (this.destroyed) {
|
|
12556
|
+
return;
|
|
12557
|
+
}
|
|
12558
|
+
this.destroyed = true;
|
|
12559
|
+
this.writerStopping = true;
|
|
12560
|
+
const pending = this.queue.splice(0, this.queue.length);
|
|
12561
|
+
this.pendingEntries = 0;
|
|
12562
|
+
for (const item of pending) {
|
|
12563
|
+
item.reject(new ShutdownError("WriteQueue destroyed before item was processed."));
|
|
12564
|
+
}
|
|
12565
|
+
this.wakeWriter();
|
|
12566
|
+
this.resolveDrainIfIdle();
|
|
12567
|
+
}
|
|
12568
|
+
async runWriterLoop() {
|
|
12569
|
+
while (true) {
|
|
12570
|
+
if (this.isShutdownRequested?.() && this.hasPendingWriteItems()) {
|
|
12571
|
+
this.shutdownFromSignal();
|
|
12572
|
+
}
|
|
12573
|
+
if ((this.writerStopping || this.destroyed) && this.queue.length === 0) {
|
|
12574
|
+
break;
|
|
12575
|
+
}
|
|
12576
|
+
if (this.queue.length === 0) {
|
|
12577
|
+
await this.waitForWork();
|
|
12578
|
+
continue;
|
|
12579
|
+
}
|
|
12580
|
+
const batch = this.queue.splice(0, this.queue.length);
|
|
12581
|
+
this.markBatchAsDispatched(batch);
|
|
12582
|
+
await this.processBatch(batch);
|
|
12583
|
+
}
|
|
12584
|
+
this.resolveDrainIfIdle();
|
|
12585
|
+
}
|
|
12586
|
+
hasPendingWriteItems() {
|
|
12587
|
+
return this.queue.some((item) => item.kind === "write");
|
|
12588
|
+
}
|
|
12589
|
+
async processBatch(batch) {
|
|
12590
|
+
let index = 0;
|
|
12591
|
+
while (index < batch.length) {
|
|
12592
|
+
const item = batch[index];
|
|
12593
|
+
if (!item) {
|
|
12594
|
+
index += 1;
|
|
12595
|
+
continue;
|
|
12596
|
+
}
|
|
12597
|
+
if (item.kind === "exclusive") {
|
|
12598
|
+
await this.processExclusive(item);
|
|
12599
|
+
index += 1;
|
|
12600
|
+
continue;
|
|
12601
|
+
}
|
|
12602
|
+
const segment = [];
|
|
12603
|
+
while (index < batch.length) {
|
|
12604
|
+
const segmentItem = batch[index];
|
|
12605
|
+
if (!segmentItem || segmentItem.kind !== "write") {
|
|
12606
|
+
break;
|
|
12607
|
+
}
|
|
12608
|
+
segment.push(segmentItem);
|
|
12609
|
+
index += 1;
|
|
12610
|
+
}
|
|
12611
|
+
await this.processWriteSegment(segment);
|
|
12612
|
+
}
|
|
12613
|
+
}
|
|
12614
|
+
async processExclusive(item) {
|
|
12615
|
+
try {
|
|
12616
|
+
const result = await item.fn();
|
|
12617
|
+
item.resolve(result);
|
|
12618
|
+
} catch (error) {
|
|
12619
|
+
item.reject(toError(error));
|
|
12620
|
+
} finally {
|
|
12621
|
+
this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
|
|
12622
|
+
this.resolveDrainIfIdle();
|
|
12623
|
+
}
|
|
12624
|
+
}
|
|
12625
|
+
async processWriteSegment(segment) {
|
|
12626
|
+
const grouped = /* @__PURE__ */ new Map();
|
|
12627
|
+
for (const item of segment) {
|
|
12628
|
+
const group = grouped.get(item.fileKey);
|
|
12629
|
+
if (group) {
|
|
12630
|
+
group.push(item);
|
|
12631
|
+
} else {
|
|
12632
|
+
grouped.set(item.fileKey, [item]);
|
|
12633
|
+
}
|
|
12634
|
+
}
|
|
12635
|
+
for (const items of grouped.values()) {
|
|
12636
|
+
for (const item of items) {
|
|
12637
|
+
await this.processWriteItem(item);
|
|
12638
|
+
}
|
|
12639
|
+
}
|
|
12640
|
+
}
|
|
12641
|
+
async processWriteItem(item) {
|
|
12642
|
+
let result = { ...ZERO_RESULT };
|
|
12643
|
+
const subBatches = chunkEntries2(item.entries, this.batchSize);
|
|
12644
|
+
try {
|
|
12645
|
+
for (const subBatch of subBatches) {
|
|
12646
|
+
const writeResult = await this.writeSubBatchWithRetry(subBatch, item.fileKey, item.fileHash);
|
|
12647
|
+
result = mergeBatchResult(result, writeResult);
|
|
12648
|
+
}
|
|
12649
|
+
item.resolve(result);
|
|
12650
|
+
} catch (error) {
|
|
12651
|
+
item.reject(toError(error));
|
|
12652
|
+
} finally {
|
|
12653
|
+
this.activeWorkItems = Math.max(0, this.activeWorkItems - 1);
|
|
12654
|
+
this.decrementActiveFile(item.fileKey);
|
|
12655
|
+
this.resolveDrainIfIdle();
|
|
12656
|
+
}
|
|
12657
|
+
}
|
|
12658
|
+
async writeSubBatchWithRetry(entries, fileKey, fileHash) {
|
|
12659
|
+
let lastError = null;
|
|
12660
|
+
const maxAttempts = this.retryOnFailure ? 2 : 1;
|
|
12661
|
+
for (let attempt = 1; attempt <= maxAttempts; attempt += 1) {
|
|
12662
|
+
this.inflightWrites += 1;
|
|
12663
|
+
try {
|
|
12664
|
+
const result = await this.storeEntriesFn(this.db, entries, this.apiKey, {
|
|
12665
|
+
sourceFile: fileKey,
|
|
12666
|
+
ingestContentHash: fileHash,
|
|
12667
|
+
skipIngestLog: true,
|
|
12668
|
+
onlineDedup: true,
|
|
12669
|
+
skipLlmDedup: false,
|
|
12670
|
+
llmClient: this.llmClient,
|
|
12671
|
+
dbPath: this.dbPath
|
|
12672
|
+
});
|
|
12673
|
+
return {
|
|
12674
|
+
added: result.added,
|
|
12675
|
+
updated: result.updated,
|
|
12676
|
+
skipped: result.skipped,
|
|
12677
|
+
superseded: result.superseded,
|
|
12678
|
+
llm_dedup_calls: result.llm_dedup_calls
|
|
12679
|
+
};
|
|
12680
|
+
} catch (error) {
|
|
12681
|
+
lastError = toError(error);
|
|
12682
|
+
if (attempt < maxAttempts) {
|
|
12683
|
+
await sleep(2e3);
|
|
12684
|
+
}
|
|
12685
|
+
} finally {
|
|
12686
|
+
this.inflightWrites = Math.max(0, this.inflightWrites - 1);
|
|
12687
|
+
this.resolveDrainIfIdle();
|
|
12688
|
+
}
|
|
12689
|
+
}
|
|
12690
|
+
throw lastError ?? new Error("Write queue failed to store sub-batch.");
|
|
12691
|
+
}
|
|
12692
|
+
markBatchAsDispatched(batch) {
|
|
12693
|
+
for (const item of batch) {
|
|
12694
|
+
this.activeWorkItems += 1;
|
|
12695
|
+
if (item.kind === "write") {
|
|
12696
|
+
this.pendingEntries = Math.max(0, this.pendingEntries - item.entries.length);
|
|
12697
|
+
this.activeByFileKey.set(item.fileKey, (this.activeByFileKey.get(item.fileKey) ?? 0) + 1);
|
|
12698
|
+
}
|
|
12699
|
+
}
|
|
12700
|
+
this.resolveDrainIfIdle();
|
|
12701
|
+
}
|
|
12702
|
+
decrementActiveFile(fileKey) {
|
|
12703
|
+
const current = this.activeByFileKey.get(fileKey) ?? 0;
|
|
12704
|
+
if (current <= 1) {
|
|
12705
|
+
this.activeByFileKey.delete(fileKey);
|
|
12706
|
+
const waiters = this.fileWaiters.get(fileKey) ?? [];
|
|
12707
|
+
this.fileWaiters.delete(fileKey);
|
|
12708
|
+
for (const waiter of waiters) {
|
|
12709
|
+
waiter();
|
|
12710
|
+
}
|
|
12711
|
+
return;
|
|
12712
|
+
}
|
|
12713
|
+
this.activeByFileKey.set(fileKey, current - 1);
|
|
12714
|
+
}
|
|
12715
|
+
async waitForFileIdle(fileKey) {
|
|
12716
|
+
if ((this.activeByFileKey.get(fileKey) ?? 0) === 0) {
|
|
12717
|
+
return;
|
|
12718
|
+
}
|
|
12719
|
+
await new Promise((resolve) => {
|
|
12720
|
+
const existing = this.fileWaiters.get(fileKey);
|
|
12721
|
+
if (existing) {
|
|
12722
|
+
existing.push(resolve);
|
|
12723
|
+
} else {
|
|
12724
|
+
this.fileWaiters.set(fileKey, [resolve]);
|
|
12725
|
+
}
|
|
12726
|
+
});
|
|
12727
|
+
}
|
|
12728
|
+
waitForWork() {
|
|
12729
|
+
return new Promise((resolve) => {
|
|
12730
|
+
this.writerWaiters.push(resolve);
|
|
12731
|
+
});
|
|
12732
|
+
}
|
|
12733
|
+
wakeWriter() {
|
|
12734
|
+
while (this.writerWaiters.length > 0) {
|
|
12735
|
+
const waiter = this.writerWaiters.shift();
|
|
12736
|
+
waiter?.();
|
|
12737
|
+
}
|
|
12738
|
+
}
|
|
12739
|
+
shutdownFromSignal() {
|
|
12740
|
+
this.destroyed = true;
|
|
12741
|
+
this.writerStopping = true;
|
|
12742
|
+
const pending = this.queue.splice(0, this.queue.length);
|
|
12743
|
+
this.pendingEntries = 0;
|
|
12744
|
+
for (const item of pending) {
|
|
12745
|
+
item.reject(new ShutdownError("Shutdown requested. Dropping queued writes."));
|
|
12746
|
+
}
|
|
12747
|
+
this.wakeWriter();
|
|
12748
|
+
this.resolveDrainIfIdle();
|
|
12749
|
+
}
|
|
12750
|
+
isIdle() {
|
|
12751
|
+
return this.pendingEntries === 0 && this.activeWorkItems === 0 && this.inflightWrites === 0;
|
|
12752
|
+
}
|
|
12753
|
+
resolveDrainIfIdle() {
|
|
12754
|
+
if (!this.isIdle()) {
|
|
12755
|
+
return;
|
|
12756
|
+
}
|
|
12757
|
+
while (this.drainWaiters.length > 0) {
|
|
12758
|
+
const waiter = this.drainWaiters.shift();
|
|
12759
|
+
waiter?.();
|
|
12760
|
+
}
|
|
12761
|
+
}
|
|
12762
|
+
};
|
|
12763
|
+
|
|
12392
12764
|
// src/watch/pid.ts
|
|
12393
12765
|
import fs26 from "fs/promises";
|
|
12394
12766
|
import path25 from "path";
|
|
@@ -12565,7 +12937,7 @@ function parsePositiveInt3(value, fallback, label) {
|
|
|
12565
12937
|
}
|
|
12566
12938
|
return Math.floor(parsed);
|
|
12567
12939
|
}
|
|
12568
|
-
async function
|
|
12940
|
+
async function sleep2(ms) {
|
|
12569
12941
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
12570
12942
|
}
|
|
12571
12943
|
function retryBackoffMs(attempt) {
|
|
@@ -12744,8 +13116,9 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12744
13116
|
readWatcherPidFn: deps?.readWatcherPidFn ?? readWatcherPid,
|
|
12745
13117
|
resolveWatcherPidPathFn: deps?.resolveWatcherPidPathFn ?? resolveWatcherPidPath,
|
|
12746
13118
|
nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
|
|
12747
|
-
sleepFn: deps?.sleepFn ??
|
|
12748
|
-
shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
|
|
13119
|
+
sleepFn: deps?.sleepFn ?? sleep2,
|
|
13120
|
+
shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested,
|
|
13121
|
+
createWriteQueueFn: deps?.createWriteQueueFn ?? ((opts) => new WriteQueue(opts))
|
|
12749
13122
|
};
|
|
12750
13123
|
const clackOutput = { output: process.stderr };
|
|
12751
13124
|
clack4.intro(banner(), clackOutput);
|
|
@@ -12788,6 +13161,7 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12788
13161
|
const skipIngested = force ? false : options.skipIngested !== false;
|
|
12789
13162
|
const globPattern = options.glob?.trim() || DEFAULT_GLOB;
|
|
12790
13163
|
const llmConcurrency = parsePositiveInt3(options.concurrency, 5, "--concurrency");
|
|
13164
|
+
const requestedFileWorkers = parsePositiveInt3(options.workers, 10, "--workers");
|
|
12791
13165
|
const retryEnabled = options.retry !== false;
|
|
12792
13166
|
const maxRetries = retryEnabled ? parsePositiveInt3(options.maxRetries, 3, "--max-retries") : 0;
|
|
12793
13167
|
const platformRaw = options.platform?.trim();
|
|
@@ -12818,8 +13192,9 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12818
13192
|
})
|
|
12819
13193
|
);
|
|
12820
13194
|
const sortedTargets = targetsWithSizes.sort((a, b) => a.size - b.size || a.file.localeCompare(b.file)).map((item, index) => ({ ...item, index }));
|
|
13195
|
+
const fileWorkerCount = Math.min(requestedFileWorkers, Math.max(1, sortedTargets.length));
|
|
12821
13196
|
clack4.log.info(
|
|
12822
|
-
`Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
|
|
13197
|
+
`Ingesting: ${ui.bold(String(sortedTargets.length))} file(s) | Glob: ${globPattern} | File workers: ${ui.bold(String(fileWorkerCount))} | Chunk concurrency: ${ui.bold(String(llmConcurrency))} | Skip ingested: ${skipIngested ? "yes" : "no"}`,
|
|
12823
13198
|
clackOutput
|
|
12824
13199
|
);
|
|
12825
13200
|
if (sortedTargets.length === 0) {
|
|
@@ -12865,6 +13240,42 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12865
13240
|
}
|
|
12866
13241
|
const db = resolvedDeps.getDbFn(dbPath);
|
|
12867
13242
|
await resolvedDeps.initDbFn(db);
|
|
13243
|
+
const cleanupDbResources = () => {
|
|
13244
|
+
if (shouldLockDb) {
|
|
13245
|
+
releaseDbLock();
|
|
13246
|
+
}
|
|
13247
|
+
resolvedDeps.closeDbFn(db);
|
|
13248
|
+
};
|
|
13249
|
+
let embeddingApiKey = null;
|
|
13250
|
+
try {
|
|
13251
|
+
if (!embeddingApiKey) {
|
|
13252
|
+
embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
|
|
13253
|
+
}
|
|
13254
|
+
} catch (error) {
|
|
13255
|
+
cleanupDbResources();
|
|
13256
|
+
throw error;
|
|
13257
|
+
}
|
|
13258
|
+
if ((!embeddingApiKey || embeddingApiKey.trim().length === 0) && !dryRun) {
|
|
13259
|
+
cleanupDbResources();
|
|
13260
|
+
throw new Error("Embedding API key is required for ingest. Run 'agenr setup' to configure.");
|
|
13261
|
+
}
|
|
13262
|
+
let queue;
|
|
13263
|
+
try {
|
|
13264
|
+
queue = resolvedDeps.createWriteQueueFn({
|
|
13265
|
+
db,
|
|
13266
|
+
storeEntriesFn: resolvedDeps.storeEntriesFn,
|
|
13267
|
+
apiKey: embeddingApiKey ?? "",
|
|
13268
|
+
llmClient: client,
|
|
13269
|
+
dbPath,
|
|
13270
|
+
batchSize: 40,
|
|
13271
|
+
highWatermark: 500,
|
|
13272
|
+
retryOnFailure: retryEnabled,
|
|
13273
|
+
isShutdownRequested: resolvedDeps.shouldShutdownFn
|
|
13274
|
+
});
|
|
13275
|
+
} catch (error) {
|
|
13276
|
+
cleanupDbResources();
|
|
13277
|
+
throw error;
|
|
13278
|
+
}
|
|
12868
13279
|
const results = new Array(sortedTargets.length);
|
|
12869
13280
|
let totalEntriesExtracted = 0;
|
|
12870
13281
|
let totalEntriesStored = 0;
|
|
@@ -12878,22 +13289,6 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12878
13289
|
let forceDeletedEntryRows = 0;
|
|
12879
13290
|
let forceDeletedEntrySourceRows = 0;
|
|
12880
13291
|
let completed = 0;
|
|
12881
|
-
let embeddingApiKey = null;
|
|
12882
|
-
if (!options.noPreFetch) {
|
|
12883
|
-
try {
|
|
12884
|
-
embeddingApiKey = resolvedDeps.resolveEmbeddingApiKeyFn(config, process.env);
|
|
12885
|
-
} catch (error) {
|
|
12886
|
-
embeddingApiKey = null;
|
|
12887
|
-
if (verbose) {
|
|
12888
|
-
clack4.log.warn(
|
|
12889
|
-
formatWarn(
|
|
12890
|
-
`Pre-fetch disabled - embedding API key not available: ${error instanceof Error ? error.message : String(error)}`
|
|
12891
|
-
),
|
|
12892
|
-
clackOutput
|
|
12893
|
-
);
|
|
12894
|
-
}
|
|
12895
|
-
}
|
|
12896
|
-
}
|
|
12897
13292
|
let watchStateLoaded = false;
|
|
12898
13293
|
let watchState = createEmptyWatchState();
|
|
12899
13294
|
let cursor = 0;
|
|
@@ -12901,26 +13296,11 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12901
13296
|
let filesWithChunkFailures = 0;
|
|
12902
13297
|
const chunkStatsByFile = /* @__PURE__ */ new Map();
|
|
12903
13298
|
let firstPassFailedIndexSet = /* @__PURE__ */ new Set();
|
|
12904
|
-
let dbChain = Promise.resolve();
|
|
12905
|
-
const withDbLock = async (fn) => {
|
|
12906
|
-
const previous = dbChain;
|
|
12907
|
-
let release;
|
|
12908
|
-
dbChain = new Promise((resolve) => {
|
|
12909
|
-
release = resolve;
|
|
12910
|
-
});
|
|
12911
|
-
await previous;
|
|
12912
|
-
try {
|
|
12913
|
-
return await fn();
|
|
12914
|
-
} finally {
|
|
12915
|
-
release();
|
|
12916
|
-
}
|
|
12917
|
-
};
|
|
12918
13299
|
const updateProgress = (completedCount, totalCount, verb) => {
|
|
12919
13300
|
if (verbose) {
|
|
12920
13301
|
return;
|
|
12921
13302
|
}
|
|
12922
|
-
|
|
12923
|
-
process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount}${suffix}`)}`);
|
|
13303
|
+
process.stderr.write(`\r${ui.dim(`${verb} ${completedCount}/${totalCount} (queue: ${queue.pendingCount})...`)}`);
|
|
12924
13304
|
};
|
|
12925
13305
|
const clearProgressLine = () => {
|
|
12926
13306
|
if (verbose) {
|
|
@@ -12988,8 +13368,9 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12988
13368
|
const rawContent = await fs27.readFile(target.file, "utf8");
|
|
12989
13369
|
const ingestByteOffset = Buffer.byteLength(rawContent, "utf8");
|
|
12990
13370
|
fileHash = resolvedDeps.hashTextFn(rawContent);
|
|
13371
|
+
const chunkTickets = [];
|
|
12991
13372
|
if (skipIngested && !force) {
|
|
12992
|
-
const alreadyIngested = await
|
|
13373
|
+
const alreadyIngested = await isAlreadyIngested(db, target.file, fileHash);
|
|
12993
13374
|
if (alreadyIngested) {
|
|
12994
13375
|
fileResult.skipped = true;
|
|
12995
13376
|
fileResult.skipReason = "already ingested";
|
|
@@ -12997,12 +13378,12 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
12997
13378
|
}
|
|
12998
13379
|
}
|
|
12999
13380
|
if (force) {
|
|
13000
|
-
const cleanupStats = await
|
|
13381
|
+
const cleanupStats = await queue.runExclusive(() => cleanupForForceReingest(db, target.file, dryRun));
|
|
13001
13382
|
forceDeletedIngestLogRows += cleanupStats.ingestLogRows;
|
|
13002
13383
|
forceDeletedEntryRows += cleanupStats.entryRows;
|
|
13003
13384
|
forceDeletedEntrySourceRows += cleanupStats.entrySourceRows;
|
|
13004
13385
|
} else {
|
|
13005
|
-
baselineEntryIds = await
|
|
13386
|
+
baselineEntryIds = await getSourceEntryIds(db, target.file);
|
|
13006
13387
|
}
|
|
13007
13388
|
const parsed = await resolvedDeps.parseTranscriptFileFn(target.file, { raw: options.raw === true, verbose });
|
|
13008
13389
|
if (verbose && parsed.warnings.length > 0) {
|
|
@@ -13014,8 +13395,8 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13014
13395
|
}
|
|
13015
13396
|
}
|
|
13016
13397
|
}
|
|
13017
|
-
const processChunkEntries = async (
|
|
13018
|
-
const normalizedEntries =
|
|
13398
|
+
const processChunkEntries = async (chunkEntries3) => {
|
|
13399
|
+
const normalizedEntries = chunkEntries3.map((entry) => ({
|
|
13019
13400
|
...entry,
|
|
13020
13401
|
...platform ? { platform } : {},
|
|
13021
13402
|
...project ? { project } : {},
|
|
@@ -13026,41 +13407,32 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13026
13407
|
}));
|
|
13027
13408
|
fileResult.entriesExtracted += normalizedEntries.length;
|
|
13028
13409
|
totalEntriesExtracted += normalizedEntries.length;
|
|
13029
|
-
|
|
13030
|
-
|
|
13410
|
+
if (dryRun || normalizedEntries.length === 0) {
|
|
13411
|
+
chunkTickets.push(
|
|
13412
|
+
Promise.resolve({
|
|
13413
|
+
added: 0,
|
|
13414
|
+
updated: 0,
|
|
13415
|
+
skipped: 0,
|
|
13416
|
+
superseded: 0,
|
|
13417
|
+
llm_dedup_calls: 0
|
|
13418
|
+
})
|
|
13419
|
+
);
|
|
13031
13420
|
return;
|
|
13032
13421
|
}
|
|
13033
|
-
|
|
13034
|
-
|
|
13422
|
+
const deduped = resolvedDeps.deduplicateEntriesFn(normalizedEntries);
|
|
13423
|
+
if (deduped.length === 0) {
|
|
13424
|
+
chunkTickets.push(
|
|
13425
|
+
Promise.resolve({
|
|
13426
|
+
added: 0,
|
|
13427
|
+
updated: 0,
|
|
13428
|
+
skipped: 0,
|
|
13429
|
+
superseded: 0,
|
|
13430
|
+
llm_dedup_calls: 0
|
|
13431
|
+
})
|
|
13432
|
+
);
|
|
13433
|
+
return;
|
|
13035
13434
|
}
|
|
13036
|
-
|
|
13037
|
-
() => resolvedDeps.storeEntriesFn(db, deduped, embeddingApiKey ?? "", {
|
|
13038
|
-
sourceFile: target.file,
|
|
13039
|
-
ingestContentHash: fileHash,
|
|
13040
|
-
skipIngestLog: true,
|
|
13041
|
-
onlineDedup: true,
|
|
13042
|
-
skipLlmDedup: false,
|
|
13043
|
-
llmClient: client,
|
|
13044
|
-
dbPath
|
|
13045
|
-
})
|
|
13046
|
-
);
|
|
13047
|
-
const reinforced = storeResult.updated;
|
|
13048
|
-
const stored = storeResult.added + storeResult.superseded;
|
|
13049
|
-
fileResult.entriesStored += stored;
|
|
13050
|
-
fileResult.entriesSkippedDuplicate += storeResult.skipped;
|
|
13051
|
-
fileResult.entriesReinforced += reinforced;
|
|
13052
|
-
totalEntriesStored += stored;
|
|
13053
|
-
totalEntriesAdded += storeResult.added;
|
|
13054
|
-
totalEntriesUpdated += 0;
|
|
13055
|
-
totalEntriesSkipped += storeResult.skipped;
|
|
13056
|
-
totalEntriesReinforced += reinforced;
|
|
13057
|
-
totalEntriesSuperseded += storeResult.superseded;
|
|
13058
|
-
totalDedupLlmCalls += storeResult.llm_dedup_calls;
|
|
13059
|
-
fileStoreStats.added += storeResult.added;
|
|
13060
|
-
fileStoreStats.updated += storeResult.updated;
|
|
13061
|
-
fileStoreStats.skipped += storeResult.skipped;
|
|
13062
|
-
fileStoreStats.superseded += storeResult.superseded;
|
|
13063
|
-
fileStoreStats.llmDedupCalls += storeResult.llm_dedup_calls;
|
|
13435
|
+
chunkTickets.push(queue.push(deduped, target.file, fileHash));
|
|
13064
13436
|
};
|
|
13065
13437
|
const extracted = await resolvedDeps.extractKnowledgeFromChunksFn({
|
|
13066
13438
|
file: target.file,
|
|
@@ -13093,10 +13465,49 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13093
13465
|
`All chunks failed during extraction (${chunkLabel}). This is often caused by API rate limits or timeouts; check provider limits/logs and re-run ingest.`
|
|
13094
13466
|
);
|
|
13095
13467
|
}
|
|
13096
|
-
|
|
13468
|
+
const writeResults = await Promise.allSettled(chunkTickets);
|
|
13469
|
+
const writeErrors = [];
|
|
13470
|
+
let cancelledTickets = 0;
|
|
13471
|
+
for (const writeResult of writeResults) {
|
|
13472
|
+
if (writeResult.status === "fulfilled") {
|
|
13473
|
+
const result = writeResult.value;
|
|
13474
|
+
const stored = result.added + result.superseded;
|
|
13475
|
+
fileResult.entriesStored += stored;
|
|
13476
|
+
fileResult.entriesSkippedDuplicate += result.skipped;
|
|
13477
|
+
fileResult.entriesReinforced += result.updated;
|
|
13478
|
+
totalEntriesStored += stored;
|
|
13479
|
+
totalEntriesAdded += result.added;
|
|
13480
|
+
totalEntriesSuperseded += result.superseded;
|
|
13481
|
+
totalEntriesSkipped += result.skipped;
|
|
13482
|
+
totalEntriesReinforced += result.updated;
|
|
13483
|
+
totalDedupLlmCalls += result.llm_dedup_calls;
|
|
13484
|
+
fileStoreStats.added += result.added;
|
|
13485
|
+
fileStoreStats.updated += result.updated;
|
|
13486
|
+
fileStoreStats.skipped += result.skipped;
|
|
13487
|
+
fileStoreStats.superseded += result.superseded;
|
|
13488
|
+
fileStoreStats.llmDedupCalls += result.llm_dedup_calls;
|
|
13489
|
+
continue;
|
|
13490
|
+
}
|
|
13491
|
+
const reason = writeResult.reason;
|
|
13492
|
+
if (reason instanceof CancelledError) {
|
|
13493
|
+
cancelledTickets += 1;
|
|
13494
|
+
continue;
|
|
13495
|
+
}
|
|
13496
|
+
writeErrors.push(errorMessage2(reason));
|
|
13497
|
+
}
|
|
13498
|
+
if (writeErrors.length > 0) {
|
|
13499
|
+
throw new Error(writeErrors.join(" | "));
|
|
13500
|
+
}
|
|
13501
|
+
if (cancelledTickets > 0 && verbose) {
|
|
13502
|
+
clack4.log.warn(
|
|
13503
|
+
formatWarn(`Cancelled ${cancelledTickets} pending write chunk(s) for ${path26.basename(target.file)}.`),
|
|
13504
|
+
clackOutput
|
|
13505
|
+
);
|
|
13506
|
+
}
|
|
13507
|
+
await syncWatchStateOffset(target.file, ingestByteOffset);
|
|
13097
13508
|
if (!dryRun) {
|
|
13098
13509
|
const fileDurationMs = Math.max(0, resolvedDeps.nowFn().getTime() - fileStartedAt.getTime());
|
|
13099
|
-
await
|
|
13510
|
+
await queue.runExclusive(
|
|
13100
13511
|
() => insertIngestLogForFile(db, {
|
|
13101
13512
|
filePath: target.file,
|
|
13102
13513
|
contentHash: fileHash,
|
|
@@ -13109,7 +13520,8 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13109
13520
|
} catch (error) {
|
|
13110
13521
|
if (fileHash.length > 0) {
|
|
13111
13522
|
try {
|
|
13112
|
-
await
|
|
13523
|
+
await queue.cancel(target.file);
|
|
13524
|
+
await queue.runExclusive(() => cleanupFailedFileIngest(db, target.file, fileHash, baselineEntryIds, dryRun));
|
|
13113
13525
|
} catch (cleanupError) {
|
|
13114
13526
|
fileResult.error = `${errorMessage2(error)} | cleanup failed: ${errorMessage2(cleanupError)}`;
|
|
13115
13527
|
return fileResult;
|
|
@@ -13129,7 +13541,7 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13129
13541
|
cursor = 0;
|
|
13130
13542
|
completed = 0;
|
|
13131
13543
|
const total = targets.length;
|
|
13132
|
-
const workerCount =
|
|
13544
|
+
const workerCount = Math.min(fileWorkerCount, total);
|
|
13133
13545
|
await Promise.all(
|
|
13134
13546
|
Array.from({ length: workerCount }, async () => {
|
|
13135
13547
|
while (true) {
|
|
@@ -13247,6 +13659,17 @@ async function runIngestCommand(inputPaths, options, deps) {
|
|
|
13247
13659
|
}
|
|
13248
13660
|
} finally {
|
|
13249
13661
|
clearProgressLine();
|
|
13662
|
+
try {
|
|
13663
|
+
await queue.drain();
|
|
13664
|
+
} catch (error) {
|
|
13665
|
+
if (error instanceof ShutdownError) {
|
|
13666
|
+
clack4.log.warn(formatWarn(`Write queue shutdown before full drain: ${error.message}`), clackOutput);
|
|
13667
|
+
} else {
|
|
13668
|
+
clack4.log.warn(formatWarn(`Write queue drain failed: ${errorMessage2(error)}`), clackOutput);
|
|
13669
|
+
}
|
|
13670
|
+
} finally {
|
|
13671
|
+
queue.destroy();
|
|
13672
|
+
}
|
|
13250
13673
|
if (!dryRun) {
|
|
13251
13674
|
try {
|
|
13252
13675
|
await walCheckpoint(db);
|
|
@@ -15844,7 +16267,7 @@ var DEFAULT_WAL_CHECKPOINT_INTERVAL_MS = 3e4;
|
|
|
15844
16267
|
function isFileNotFound(error) {
|
|
15845
16268
|
return error.code === "ENOENT";
|
|
15846
16269
|
}
|
|
15847
|
-
async function
|
|
16270
|
+
async function sleep3(ms) {
|
|
15848
16271
|
await new Promise((resolve) => setTimeout(resolve, ms));
|
|
15849
16272
|
}
|
|
15850
16273
|
function formatError2(error) {
|
|
@@ -15931,7 +16354,7 @@ async function runWatcher(options, deps) {
|
|
|
15931
16354
|
rmFn: deps?.rmFn ?? fs33.rm,
|
|
15932
16355
|
watchFn: deps?.watchFn ?? watchFs,
|
|
15933
16356
|
nowFn: deps?.nowFn ?? (() => /* @__PURE__ */ new Date()),
|
|
15934
|
-
sleepFn: deps?.sleepFn ??
|
|
16357
|
+
sleepFn: deps?.sleepFn ?? sleep3,
|
|
15935
16358
|
shouldShutdownFn: deps?.shouldShutdownFn ?? isShutdownRequested
|
|
15936
16359
|
};
|
|
15937
16360
|
const directoryMode = options.directoryMode === true;
|
|
@@ -16192,13 +16615,13 @@ async function runWatcher(options, deps) {
|
|
|
16192
16615
|
options.onWarn?.(warning);
|
|
16193
16616
|
}
|
|
16194
16617
|
}
|
|
16195
|
-
const processChunkEntries = async (
|
|
16618
|
+
const processChunkEntries = async (chunkEntries3) => {
|
|
16196
16619
|
const platformTag = currentPlatform && currentPlatform !== "mtime" ? normalizeKnowledgePlatform(currentPlatform) ?? void 0 : void 0;
|
|
16197
|
-
const taggedEntries = platformTag || cachedProject ?
|
|
16620
|
+
const taggedEntries = platformTag || cachedProject ? chunkEntries3.map((entry) => ({
|
|
16198
16621
|
...entry,
|
|
16199
16622
|
...platformTag ? { platform: platformTag } : {},
|
|
16200
16623
|
...cachedProject ? { project: cachedProject } : {}
|
|
16201
|
-
})) :
|
|
16624
|
+
})) : chunkEntries3;
|
|
16202
16625
|
cycleResult.entriesExtracted += taggedEntries.length;
|
|
16203
16626
|
const deduped = resolvedDeps.deduplicateEntriesFn(taggedEntries);
|
|
16204
16627
|
if (options.dryRun || deduped.length === 0) {
|
|
@@ -17677,7 +18100,12 @@ function createProgram() {
|
|
|
17677
18100
|
const result = await runTodoCommand(subcommand, subject, { db: opts.db });
|
|
17678
18101
|
process.exitCode = result.exitCode;
|
|
17679
18102
|
});
|
|
17680
|
-
program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option(
|
|
18103
|
+
program.command("ingest").description("Bulk-ingest knowledge from files and directories").argument("<paths...>", "Files or directories to process").option("--glob <pattern>", "File filter glob", "**/*.{jsonl,md,txt}").option("--db <path>", "Database path override").option("--model <model>", "LLM model to use").option("--provider <name>", "LLM provider: anthropic, openai, openai-codex").option("--platform <name>", "Platform tag: openclaw, claude-code, codex").option("--project <name>", "Project tag (lowercase).", (val, prev) => [...prev, val], []).option("--verbose", "Show per-file details", false).option("--raw", "Bypass adapter filtering (pass transcripts through unmodified)", false).option("--dry-run", "Extract without storing", false).option("--json", "Output JSON results", false).option("--concurrency <n>", "Parallel chunk extractions", parseIntOption, 5).option(
|
|
18104
|
+
"--workers <n>",
|
|
18105
|
+
"Number of files to process in parallel (default: 10). Each worker uses --concurrency chunk parallelism. Total concurrent LLM calls = workers x concurrency. Reduce if hitting rate limits. Writes retry once per sub-batch unless --no-retry is set.",
|
|
18106
|
+
parseIntOption,
|
|
18107
|
+
10
|
|
18108
|
+
).option("--skip-ingested", "Skip already-ingested files", true).option("--no-retry", "Disable auto-retry for failed files").option("--no-pre-fetch", "Disable elaborative encoding pre-fetch").option("--max-retries <n>", "Maximum auto-retry attempts", parseIntOption, 3).option("--force", "Clean re-ingest: delete previous rows for each file before processing", false).action(async (paths, opts) => {
|
|
17681
18109
|
const result = await runIngestCommand(paths, {
|
|
17682
18110
|
...opts,
|
|
17683
18111
|
noPreFetch: opts.noPreFetch === true
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agenr",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.18",
|
|
4
4
|
"openclaw": {
|
|
5
5
|
"extensions": [
|
|
6
6
|
"dist/openclaw-plugin/index.js"
|
|
@@ -11,6 +11,13 @@
|
|
|
11
11
|
"bin": {
|
|
12
12
|
"agenr": "dist/cli.js"
|
|
13
13
|
},
|
|
14
|
+
"scripts": {
|
|
15
|
+
"build": "tsup src/cli.ts src/cli-main.ts src/openclaw-plugin/index.ts --format esm --dts",
|
|
16
|
+
"dev": "tsup src/cli.ts src/cli-main.ts --format esm --watch",
|
|
17
|
+
"test": "vitest run",
|
|
18
|
+
"test:watch": "vitest",
|
|
19
|
+
"typecheck": "tsc --noEmit"
|
|
20
|
+
},
|
|
14
21
|
"dependencies": {
|
|
15
22
|
"@clack/prompts": "^1.0.1",
|
|
16
23
|
"@libsql/client": "^0.17.0",
|
|
@@ -54,11 +61,9 @@
|
|
|
54
61
|
"README.md"
|
|
55
62
|
],
|
|
56
63
|
"author": "agenr-ai",
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
"test:watch": "vitest",
|
|
62
|
-
"typecheck": "tsc --noEmit"
|
|
64
|
+
"pnpm": {
|
|
65
|
+
"overrides": {
|
|
66
|
+
"fast-xml-parser": "^5.3.6"
|
|
67
|
+
}
|
|
63
68
|
}
|
|
64
|
-
}
|
|
69
|
+
}
|