agenr 0.9.15 → 0.9.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/cli-main.js +225 -53
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 0.9.17 - 2026-02-27
|
|
4
|
+
|
|
5
|
+
### Changed
|
|
6
|
+
- Optimized LLM dedup in consolidate clustering: batch up to 10 pairs per API
|
|
7
|
+
call with 5 concurrent batches. Reduces a 2400-pair dedup queue from ~60min
|
|
8
|
+
(sequential, 1 call per pair) to ~2min.
|
|
9
|
+
|
|
10
|
+
## 0.9.16 - 2026-02-27
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- Added progress logging throughout the consolidate pipeline. Pairwise
|
|
14
|
+
similarity scan, rules phases, cluster processing, LLM dedup checks, and
|
|
15
|
+
LLM merge calls now report progress so users can see the system is working.
|
|
16
|
+
Phase-level progress logs are always shown (not gated behind `--verbose`).
|
|
17
|
+
- Added live cluster progress updates with ETA during consolidation phases so
|
|
18
|
+
long-running Phase 1, Phase 2, and Phase 3 work shows continuous terminal
|
|
19
|
+
activity.
|
|
20
|
+
|
|
3
21
|
## 0.9.14 - 2026-02-27
|
|
4
22
|
|
|
5
23
|
### Fixed
|
package/dist/cli-main.js
CHANGED
|
@@ -3741,6 +3741,13 @@ function collapsePreview(text4, maxLength = 80) {
|
|
|
3741
3741
|
}
|
|
3742
3742
|
return `${collapsed.slice(0, maxLength - 3)}...`;
|
|
3743
3743
|
}
|
|
3744
|
+
function formatSeconds(ms) {
|
|
3745
|
+
const seconds = Math.max(0, Math.round(ms / 1e3));
|
|
3746
|
+
return `${seconds}s`;
|
|
3747
|
+
}
|
|
3748
|
+
function formatCount(value) {
|
|
3749
|
+
return value.toLocaleString("en-US");
|
|
3750
|
+
}
|
|
3744
3751
|
function forgettingScore(entry, now) {
|
|
3745
3752
|
const ageDays2 = parseDaysBetween(now, entry.created_at);
|
|
3746
3753
|
const recallCount = entry.recall_count ?? 0;
|
|
@@ -3932,6 +3939,13 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
3932
3939
|
if (entries.length < 2) {
|
|
3933
3940
|
return 0;
|
|
3934
3941
|
}
|
|
3942
|
+
const totalComparisons = entries.length * (entries.length - 1) / 2;
|
|
3943
|
+
options.onLog(
|
|
3944
|
+
`[merge] Computing pairwise similarity for ${entries.length} entries (${formatCount(totalComparisons)} comparisons)...`
|
|
3945
|
+
);
|
|
3946
|
+
const scanStartedAt = Date.now();
|
|
3947
|
+
let checkedPairs = 0;
|
|
3948
|
+
let nextProgressPercent = 10;
|
|
3935
3949
|
const entryById = new Map(entries.map((entry) => [entry.id, entry]));
|
|
3936
3950
|
const unionFind = new UnionFind();
|
|
3937
3951
|
for (const entry of entries) {
|
|
@@ -3941,6 +3955,7 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
3941
3955
|
const entry = entries[i];
|
|
3942
3956
|
for (let j = i + 1; j < entries.length; j += 1) {
|
|
3943
3957
|
const candidate = entries[j];
|
|
3958
|
+
checkedPairs += 1;
|
|
3944
3959
|
const similarity = cosineSim(entry.embedding, candidate.embedding);
|
|
3945
3960
|
if (similarity <= MERGE_SIMILARITY_THRESHOLD) {
|
|
3946
3961
|
continue;
|
|
@@ -3956,6 +3971,14 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
3956
3971
|
}
|
|
3957
3972
|
unionFind.union(entry.id, candidate.id);
|
|
3958
3973
|
}
|
|
3974
|
+
while (nextProgressPercent <= 100 && i + 1 >= Math.ceil(entries.length * nextProgressPercent / 100)) {
|
|
3975
|
+
const elapsedMs = Date.now() - scanStartedAt;
|
|
3976
|
+
const remainingPairs = Math.max(totalComparisons - checkedPairs, 0);
|
|
3977
|
+
const etaMs = checkedPairs > 0 ? elapsedMs / checkedPairs * remainingPairs : 0;
|
|
3978
|
+
const etaSuffix = checkedPairs > 0 ? ` ~${formatSeconds(etaMs)} remaining` : "";
|
|
3979
|
+
options.onLog(`[merge] ...${nextProgressPercent}% (${formatCount(checkedPairs)} pairs checked)${etaSuffix}`);
|
|
3980
|
+
nextProgressPercent += 10;
|
|
3981
|
+
}
|
|
3959
3982
|
}
|
|
3960
3983
|
const groups = /* @__PURE__ */ new Map();
|
|
3961
3984
|
for (const entry of entries) {
|
|
@@ -3964,6 +3987,9 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
3964
3987
|
current.push(entry);
|
|
3965
3988
|
groups.set(root, current);
|
|
3966
3989
|
}
|
|
3990
|
+
const groupedCount = Array.from(groups.values()).filter((group) => group.length >= 2).length;
|
|
3991
|
+
const scanSeconds = ((Date.now() - scanStartedAt) / 1e3).toFixed(1);
|
|
3992
|
+
options.onLog(`[merge] Similarity scan complete: ${groupedCount} groups found in ${scanSeconds}s`);
|
|
3967
3993
|
let mergedCount = 0;
|
|
3968
3994
|
for (const rawGroup of groups.values()) {
|
|
3969
3995
|
if (rawGroup.length < 2) {
|
|
@@ -4126,17 +4152,44 @@ async function consolidateRules(db, dbPath, options = {}) {
|
|
|
4126
4152
|
let expiredCount = 0;
|
|
4127
4153
|
let mergedCount = 0;
|
|
4128
4154
|
let orphanedRelationsCleaned = 0;
|
|
4155
|
+
const mergePassCount = 1;
|
|
4156
|
+
const runRulePasses = async () => {
|
|
4157
|
+
for (let pass = 1; pass <= mergePassCount; pass += 1) {
|
|
4158
|
+
onLog("[rules] Pruning expired entries...");
|
|
4159
|
+
expiredCount += await expireDecayedEntries(db, now, {
|
|
4160
|
+
dryRun,
|
|
4161
|
+
verbose,
|
|
4162
|
+
onLog,
|
|
4163
|
+
platform,
|
|
4164
|
+
project,
|
|
4165
|
+
excludeProject
|
|
4166
|
+
});
|
|
4167
|
+
onLog(`[rules] Pass ${pass}: merging near-exact duplicates...`);
|
|
4168
|
+
mergedCount += await mergeNearExactDuplicates(db, {
|
|
4169
|
+
dryRun,
|
|
4170
|
+
verbose,
|
|
4171
|
+
onLog,
|
|
4172
|
+
platform,
|
|
4173
|
+
project,
|
|
4174
|
+
excludeProject
|
|
4175
|
+
});
|
|
4176
|
+
}
|
|
4177
|
+
};
|
|
4129
4178
|
if (dryRun) {
|
|
4130
|
-
|
|
4131
|
-
|
|
4132
|
-
|
|
4179
|
+
await runRulePasses();
|
|
4180
|
+
if (!skipOrphanCleanup) {
|
|
4181
|
+
onLog("[rules] Cleaning orphaned relations...");
|
|
4182
|
+
orphanedRelationsCleaned = await cleanOrphanedRelations(db, true);
|
|
4183
|
+
}
|
|
4133
4184
|
} else {
|
|
4134
4185
|
await db.execute("BEGIN");
|
|
4135
4186
|
try {
|
|
4136
4187
|
await ensureExpiredSentinel(db);
|
|
4137
|
-
|
|
4138
|
-
|
|
4139
|
-
|
|
4188
|
+
await runRulePasses();
|
|
4189
|
+
if (!skipOrphanCleanup) {
|
|
4190
|
+
onLog("[rules] Cleaning orphaned relations...");
|
|
4191
|
+
orphanedRelationsCleaned = await cleanOrphanedRelations(db, false);
|
|
4192
|
+
}
|
|
4140
4193
|
await db.execute("COMMIT");
|
|
4141
4194
|
} catch (error) {
|
|
4142
4195
|
try {
|
|
@@ -4201,15 +4254,26 @@ var DEFAULT_IDEMPOTENCY_DAYS = 7;
|
|
|
4201
4254
|
var DEFAULT_NEIGHBOR_LIMIT = 20;
|
|
4202
4255
|
var MAX_ACTIVE_EMBEDDED_ENTRIES2 = 2e4;
|
|
4203
4256
|
var MILLISECONDS_PER_DAY = 1e3 * 60 * 60 * 24;
|
|
4204
|
-
var
|
|
4257
|
+
var LLM_DEDUP_BATCH_TOOL_NAME = "batch_dedup_check";
|
|
4258
|
+
var LLM_DEDUP_BATCH_SIZE = 10;
|
|
4259
|
+
var LLM_DEDUP_CONCURRENCY = 5;
|
|
4205
4260
|
var LLM_DEDUP_TOOL_SCHEMA = Type.Object({
|
|
4206
4261
|
same: Type.Boolean(),
|
|
4207
4262
|
reason: Type.String()
|
|
4208
4263
|
});
|
|
4209
|
-
var
|
|
4210
|
-
|
|
4211
|
-
|
|
4212
|
-
|
|
4264
|
+
var LLM_DEDUP_BATCH_TOOL_SCHEMA = Type.Object({
|
|
4265
|
+
results: Type.Array(
|
|
4266
|
+
Type.Object({
|
|
4267
|
+
pair: Type.Number(),
|
|
4268
|
+
same: Type.Boolean(),
|
|
4269
|
+
reason: Type.Optional(Type.String())
|
|
4270
|
+
})
|
|
4271
|
+
)
|
|
4272
|
+
});
|
|
4273
|
+
var LLM_DEDUP_BATCH_TOOL = {
|
|
4274
|
+
name: LLM_DEDUP_BATCH_TOOL_NAME,
|
|
4275
|
+
description: "Return dedup results for all pairs",
|
|
4276
|
+
parameters: LLM_DEDUP_BATCH_TOOL_SCHEMA
|
|
4213
4277
|
};
|
|
4214
4278
|
function toNumber5(value) {
|
|
4215
4279
|
if (typeof value === "number") {
|
|
@@ -4250,71 +4314,99 @@ function normalizeSubject2(value) {
|
|
|
4250
4314
|
function pairKey(a, b) {
|
|
4251
4315
|
return a < b ? `${a}|${b}` : `${b}|${a}`;
|
|
4252
4316
|
}
|
|
4253
|
-
function
|
|
4317
|
+
function buildLlmDedupBatchContext(pairs) {
|
|
4254
4318
|
const systemPrompt = [
|
|
4255
4319
|
"You are a deduplication assistant for knowledge entries.",
|
|
4256
|
-
"
|
|
4257
|
-
"
|
|
4258
|
-
].join("\n");
|
|
4259
|
-
const userPrompt = [
|
|
4260
|
-
"Are these two knowledge entries expressing the same fact or genuinely distinct?",
|
|
4261
|
-
`Entry A: ${entryA.content}`,
|
|
4262
|
-
`Entry B: ${entryB.content}`,
|
|
4263
|
-
'Set "same" to true only when both entries represent the same knowledge.'
|
|
4320
|
+
"For each numbered pair, decide if they express the same knowledge.",
|
|
4321
|
+
"Call batch_dedup_check once with your results."
|
|
4264
4322
|
].join("\n");
|
|
4323
|
+
const pairBlocks = pairs.map(
|
|
4324
|
+
(pair, index) => [
|
|
4325
|
+
`Pair ${index + 1}:`,
|
|
4326
|
+
` Entry A: ${pair.entry.content}`,
|
|
4327
|
+
` Entry B: ${pair.candidate.content}`
|
|
4328
|
+
].join("\n")
|
|
4329
|
+
).join("\n\n");
|
|
4265
4330
|
return {
|
|
4266
4331
|
systemPrompt,
|
|
4267
4332
|
messages: [
|
|
4268
4333
|
{
|
|
4269
4334
|
role: "user",
|
|
4270
|
-
content:
|
|
4335
|
+
content: pairBlocks,
|
|
4271
4336
|
timestamp: Date.now()
|
|
4272
4337
|
}
|
|
4273
4338
|
],
|
|
4274
|
-
tools: [
|
|
4339
|
+
tools: [LLM_DEDUP_BATCH_TOOL]
|
|
4275
4340
|
};
|
|
4276
4341
|
}
|
|
4277
|
-
function
|
|
4342
|
+
function extractLlmDedupBatchFromToolCall(message) {
|
|
4278
4343
|
for (const block of message.content) {
|
|
4279
|
-
if (block.type !== "toolCall" || block.name !==
|
|
4344
|
+
if (block.type !== "toolCall" || block.name !== LLM_DEDUP_BATCH_TOOL_NAME) {
|
|
4280
4345
|
continue;
|
|
4281
4346
|
}
|
|
4282
4347
|
const args = block.arguments;
|
|
4283
|
-
if (!args ||
|
|
4348
|
+
if (!args || !Array.isArray(args.results)) {
|
|
4284
4349
|
continue;
|
|
4285
4350
|
}
|
|
4286
|
-
|
|
4287
|
-
|
|
4288
|
-
|
|
4289
|
-
|
|
4351
|
+
const parsed = args.results.filter(
|
|
4352
|
+
(result) => typeof result === "object" && result !== null && typeof result.pair === "number" && typeof result.same === "boolean"
|
|
4353
|
+
).map((result) => ({
|
|
4354
|
+
pair: result.pair,
|
|
4355
|
+
same: result.same,
|
|
4356
|
+
reason: typeof result.reason === "string" ? result.reason : void 0
|
|
4357
|
+
}));
|
|
4358
|
+
return parsed;
|
|
4290
4359
|
}
|
|
4291
4360
|
return null;
|
|
4292
4361
|
}
|
|
4293
|
-
async function
|
|
4362
|
+
async function llmDedupCheckBatch(llmClient, pairs) {
|
|
4363
|
+
if (pairs.length === 0) {
|
|
4364
|
+
return [];
|
|
4365
|
+
}
|
|
4366
|
+
const fallback = Array.from({ length: pairs.length }, () => false);
|
|
4294
4367
|
try {
|
|
4295
|
-
const timeoutMs =
|
|
4368
|
+
const timeoutMs = 3e4;
|
|
4369
|
+
let timer;
|
|
4296
4370
|
const response = await Promise.race([
|
|
4297
4371
|
runSimpleStream({
|
|
4298
4372
|
model: llmClient.resolvedModel.model,
|
|
4299
|
-
context:
|
|
4373
|
+
context: buildLlmDedupBatchContext(pairs),
|
|
4300
4374
|
options: {
|
|
4301
4375
|
apiKey: llmClient.credentials.apiKey
|
|
4302
4376
|
},
|
|
4303
4377
|
verbose: false
|
|
4304
4378
|
}),
|
|
4305
|
-
new Promise(
|
|
4306
|
-
|
|
4307
|
-
)
|
|
4308
|
-
]);
|
|
4379
|
+
new Promise((_, reject) => {
|
|
4380
|
+
timer = setTimeout(() => reject(new Error("llmDedupCheckBatch timed out")), timeoutMs);
|
|
4381
|
+
})
|
|
4382
|
+
]).finally(() => clearTimeout(timer));
|
|
4309
4383
|
if (response.stopReason === "error" || response.errorMessage) {
|
|
4310
|
-
return
|
|
4384
|
+
return fallback;
|
|
4385
|
+
}
|
|
4386
|
+
const parsed = extractLlmDedupBatchFromToolCall(response);
|
|
4387
|
+
if (!parsed) {
|
|
4388
|
+
return fallback;
|
|
4311
4389
|
}
|
|
4312
|
-
const
|
|
4313
|
-
|
|
4390
|
+
const results = [...fallback];
|
|
4391
|
+
for (const result of parsed) {
|
|
4392
|
+
const pairIndex = Math.trunc(result.pair) - 1;
|
|
4393
|
+
if (pairIndex < 0 || pairIndex >= results.length) {
|
|
4394
|
+
continue;
|
|
4395
|
+
}
|
|
4396
|
+
results[pairIndex] = result.same;
|
|
4397
|
+
}
|
|
4398
|
+
return results;
|
|
4314
4399
|
} catch {
|
|
4315
|
-
return
|
|
4400
|
+
return fallback;
|
|
4316
4401
|
}
|
|
4317
4402
|
}
|
|
4403
|
+
function chunkPairs(items, size) {
|
|
4404
|
+
const chunks = [];
|
|
4405
|
+
for (let i = 0; i < items.length; i += size) {
|
|
4406
|
+
chunks.push(items.slice(i, i + size));
|
|
4407
|
+
}
|
|
4408
|
+
return chunks;
|
|
4409
|
+
}
|
|
4318
4410
|
function parseDaysSince(value, now) {
|
|
4319
4411
|
const parsed = new Date(value);
|
|
4320
4412
|
if (Number.isNaN(parsed.getTime())) {
|
|
@@ -4421,7 +4513,9 @@ async function buildClusters(db, options = {}) {
|
|
|
4421
4513
|
const entryById = new Map(candidates.map((entry) => [entry.id, entry]));
|
|
4422
4514
|
const unionFind = new UnionFind();
|
|
4423
4515
|
const looseUnionPairs = /* @__PURE__ */ new Set();
|
|
4516
|
+
const llmDedupQueue = [];
|
|
4424
4517
|
let llmDedupCalls = 0;
|
|
4518
|
+
let llmDedupCheckedPairs = 0;
|
|
4425
4519
|
let llmDedupMatches = 0;
|
|
4426
4520
|
for (const entry of candidates) {
|
|
4427
4521
|
unionFind.add(entry.id);
|
|
@@ -4450,13 +4544,47 @@ async function buildClusters(db, options = {}) {
|
|
|
4450
4544
|
if (!llmClient) {
|
|
4451
4545
|
continue;
|
|
4452
4546
|
}
|
|
4453
|
-
|
|
4454
|
-
|
|
4455
|
-
|
|
4456
|
-
|
|
4457
|
-
|
|
4458
|
-
|
|
4547
|
+
llmDedupQueue.push({ entry, candidate, key });
|
|
4548
|
+
}
|
|
4549
|
+
}
|
|
4550
|
+
if (llmClient && llmDedupQueue.length > 0) {
|
|
4551
|
+
const startedAt = Date.now();
|
|
4552
|
+
const batches = chunkPairs(llmDedupQueue, LLM_DEDUP_BATCH_SIZE);
|
|
4553
|
+
for (let i = 0; i < batches.length; i += LLM_DEDUP_CONCURRENCY) {
|
|
4554
|
+
const batchGroup = batches.slice(i, i + LLM_DEDUP_CONCURRENCY);
|
|
4555
|
+
const settled = await Promise.allSettled(
|
|
4556
|
+
batchGroup.map(
|
|
4557
|
+
(batch) => llmDedupCheckBatch(
|
|
4558
|
+
llmClient,
|
|
4559
|
+
batch.map((pair) => ({
|
|
4560
|
+
entry: pair.entry,
|
|
4561
|
+
candidate: pair.candidate
|
|
4562
|
+
}))
|
|
4563
|
+
)
|
|
4564
|
+
)
|
|
4565
|
+
);
|
|
4566
|
+
llmDedupCalls += batchGroup.length;
|
|
4567
|
+
for (let batchIndex = 0; batchIndex < batchGroup.length; batchIndex += 1) {
|
|
4568
|
+
const batch = batchGroup[batchIndex];
|
|
4569
|
+
const result2 = settled[batchIndex];
|
|
4570
|
+
const matches = result2 && result2.status === "fulfilled" ? result2.value : Array.from({ length: batch.length }, () => false);
|
|
4571
|
+
llmDedupCheckedPairs += batch.length;
|
|
4572
|
+
for (let pairIndex = 0; pairIndex < batch.length; pairIndex += 1) {
|
|
4573
|
+
const pair = batch[pairIndex];
|
|
4574
|
+
if (matches[pairIndex] !== true) {
|
|
4575
|
+
continue;
|
|
4576
|
+
}
|
|
4577
|
+
llmDedupMatches += 1;
|
|
4578
|
+
looseUnionPairs.add(pair.key);
|
|
4579
|
+
unionFind.union(pair.entry.id, pair.candidate.id);
|
|
4580
|
+
}
|
|
4459
4581
|
}
|
|
4582
|
+
const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1e3));
|
|
4583
|
+
const remainingPairs = Math.max(0, llmDedupQueue.length - llmDedupCheckedPairs);
|
|
4584
|
+
const estimatedRemainingSeconds = Math.round(elapsedSeconds / llmDedupCheckedPairs * remainingPairs);
|
|
4585
|
+
onLog(
|
|
4586
|
+
`[dedup] Checked ${llmDedupCheckedPairs}/${llmDedupQueue.length} pairs (${llmDedupMatches} matched) ~${estimatedRemainingSeconds}s remaining`
|
|
4587
|
+
);
|
|
4460
4588
|
}
|
|
4461
4589
|
}
|
|
4462
4590
|
const groups = /* @__PURE__ */ new Map();
|
|
@@ -7916,6 +8044,10 @@ async function mergeCluster(db, cluster, llmClient, apiKey, options = {}) {
|
|
|
7916
8044
|
);
|
|
7917
8045
|
}
|
|
7918
8046
|
}
|
|
8047
|
+
const subjectPreview = truncateContent(cluster.entries[0]?.subject ?? "", 60);
|
|
8048
|
+
onLog(
|
|
8049
|
+
`[merge-llm] Merging cluster of ${cluster.entries.length} entries (subject: "${subjectPreview || "unknown"}")`
|
|
8050
|
+
);
|
|
7919
8051
|
let mergeResult = null;
|
|
7920
8052
|
try {
|
|
7921
8053
|
const response = await runSimpleStream({
|
|
@@ -8376,6 +8508,41 @@ async function runFinalization(db, dryRun, onWarn, deps) {
|
|
|
8376
8508
|
async function processPhaseClusters(params, deps) {
|
|
8377
8509
|
const stats = defaultClusterStats();
|
|
8378
8510
|
stats.clustersFound = params.clusters.length;
|
|
8511
|
+
const onLog = params.options.onLog ?? (() => void 0);
|
|
8512
|
+
const showLiveProgress = process.stderr.isTTY && params.options.verbose !== true;
|
|
8513
|
+
const liveLineWidth = 120;
|
|
8514
|
+
const phaseStartedAt = Date.now();
|
|
8515
|
+
const formatEta = (ms) => {
|
|
8516
|
+
const seconds = Math.max(0, Math.round(ms / 1e3));
|
|
8517
|
+
if (seconds < 60) {
|
|
8518
|
+
return `${seconds}s`;
|
|
8519
|
+
}
|
|
8520
|
+
const minutes = Math.floor(seconds / 60);
|
|
8521
|
+
const remainingSeconds = seconds % 60;
|
|
8522
|
+
return remainingSeconds === 0 ? `${minutes}m` : `${minutes}m ${remainingSeconds}s`;
|
|
8523
|
+
};
|
|
8524
|
+
const updateLiveProgress = (clusterIndex, totalClusters) => {
|
|
8525
|
+
if (!showLiveProgress) {
|
|
8526
|
+
return;
|
|
8527
|
+
}
|
|
8528
|
+
const completedClusters = clusterIndex - 1;
|
|
8529
|
+
let etaSuffix = "";
|
|
8530
|
+
if (completedClusters >= 2) {
|
|
8531
|
+
const elapsedMs = Date.now() - phaseStartedAt;
|
|
8532
|
+
const etaMs = (totalClusters - completedClusters) * (elapsedMs / completedClusters);
|
|
8533
|
+
etaSuffix = ` ~${formatEta(etaMs)} remaining`;
|
|
8534
|
+
}
|
|
8535
|
+
process.stderr.write(
|
|
8536
|
+
`\rPhase ${params.phase}: Processing cluster ${clusterIndex}/${totalClusters} (${params.type}) ...${etaSuffix}`
|
|
8537
|
+
);
|
|
8538
|
+
};
|
|
8539
|
+
const clearLiveProgress = () => {
|
|
8540
|
+
if (!showLiveProgress) {
|
|
8541
|
+
return;
|
|
8542
|
+
}
|
|
8543
|
+
process.stderr.write("\r");
|
|
8544
|
+
process.stderr.write(`${" ".repeat(liveLineWidth)}\r`);
|
|
8545
|
+
};
|
|
8379
8546
|
const pending = [];
|
|
8380
8547
|
for (let i = 0; i < params.clusters.length; i += 1) {
|
|
8381
8548
|
const cluster = params.clusters[i];
|
|
@@ -8395,10 +8562,14 @@ async function processPhaseClusters(params, deps) {
|
|
|
8395
8562
|
params.context.batchReached = true;
|
|
8396
8563
|
break;
|
|
8397
8564
|
}
|
|
8565
|
+
clearLiveProgress();
|
|
8566
|
+
const clusterNumber = stats.clustersProcessed + 1;
|
|
8567
|
+
onLog(`[phase ${params.phase}] Processing cluster ${clusterNumber}/${pending.length}...`);
|
|
8568
|
+
updateLiveProgress(clusterNumber, pending.length);
|
|
8398
8569
|
const outcome = await deps.mergeClusterFn(params.db, item.cluster, params.llmClient, params.embeddingApiKey, {
|
|
8399
8570
|
dryRun: params.options.dryRun,
|
|
8400
8571
|
verbose: params.options.verbose,
|
|
8401
|
-
onLog
|
|
8572
|
+
onLog
|
|
8402
8573
|
});
|
|
8403
8574
|
stats.clustersProcessed += 1;
|
|
8404
8575
|
stats.llmCalls += 1;
|
|
@@ -8425,6 +8596,7 @@ async function processPhaseClusters(params, deps) {
|
|
|
8425
8596
|
break;
|
|
8426
8597
|
}
|
|
8427
8598
|
}
|
|
8599
|
+
clearLiveProgress();
|
|
8428
8600
|
return stats;
|
|
8429
8601
|
}
|
|
8430
8602
|
async function runConsolidationOrchestrator(db, dbPath, llmClient, embeddingApiKey, options = {}, deps = {}) {
|
|
@@ -8577,7 +8749,7 @@ async function runConsolidationOrchestrator(db, dbPath, llmClient, embeddingApiK
|
|
|
8577
8749
|
skipBackup: projectIndex > 0,
|
|
8578
8750
|
backupPath: projectIndex > 0 ? sharedBackupPath : void 0,
|
|
8579
8751
|
skipOrphanCleanup: projectIndex > 0,
|
|
8580
|
-
onLog
|
|
8752
|
+
onLog
|
|
8581
8753
|
});
|
|
8582
8754
|
if (projectIndex === 0) {
|
|
8583
8755
|
sharedBackupPath = phase0Stats.backupPath;
|
|
@@ -8636,7 +8808,7 @@ async function runConsolidationOrchestrator(db, dbPath, llmClient, embeddingApiK
|
|
|
8636
8808
|
looseThreshold: options.looseThreshold,
|
|
8637
8809
|
idempotencyDays: options.idempotencyDays,
|
|
8638
8810
|
verbose: options.verbose,
|
|
8639
|
-
onLog
|
|
8811
|
+
onLog,
|
|
8640
8812
|
onStats: (stats) => {
|
|
8641
8813
|
phase1ClusterStats = stats;
|
|
8642
8814
|
}
|
|
@@ -8671,7 +8843,7 @@ async function runConsolidationOrchestrator(db, dbPath, llmClient, embeddingApiK
|
|
|
8671
8843
|
looseThreshold: options.looseThreshold,
|
|
8672
8844
|
idempotencyDays: options.idempotencyDays,
|
|
8673
8845
|
verbose: options.verbose,
|
|
8674
|
-
onLog
|
|
8846
|
+
onLog,
|
|
8675
8847
|
onStats: (stats) => {
|
|
8676
8848
|
phase2ClusterStats = stats;
|
|
8677
8849
|
}
|
|
@@ -8834,7 +9006,7 @@ async function runConsolidationOrchestrator(db, dbPath, llmClient, embeddingApiK
|
|
|
8834
9006
|
looseThreshold: options.looseThreshold,
|
|
8835
9007
|
idempotencyDays: 0,
|
|
8836
9008
|
verbose: options.verbose,
|
|
8837
|
-
onLog
|
|
9009
|
+
onLog,
|
|
8838
9010
|
onStats: (stats) => {
|
|
8839
9011
|
phase3ClusterStats = stats;
|
|
8840
9012
|
}
|
|
@@ -15865,7 +16037,7 @@ function parsePositiveInt2(value, fallback, label) {
|
|
|
15865
16037
|
function formatMetric(value) {
|
|
15866
16038
|
return value.toFixed(2);
|
|
15867
16039
|
}
|
|
15868
|
-
function
|
|
16040
|
+
function formatCount2(value) {
|
|
15869
16041
|
const rounded = Math.round(value);
|
|
15870
16042
|
if (Math.abs(value - rounded) < 1e-9) {
|
|
15871
16043
|
return String(rounded);
|
|
@@ -16294,7 +16466,7 @@ function renderSummary(result, rangesBySession) {
|
|
|
16294
16466
|
for (const session of result.sessions) {
|
|
16295
16467
|
const range = rangesBySession.get(session.session);
|
|
16296
16468
|
const avgEntryCount = mean(session.runs.map((run) => run.total_entries));
|
|
16297
|
-
const countLabel = range !== void 0 ? `${
|
|
16469
|
+
const countLabel = range !== void 0 ? `${formatCount2(avgEntryCount)}/${range.min}-${range.max}` : `${formatCount2(avgEntryCount)}/-`;
|
|
16298
16470
|
const passCount = session.runs.filter((run) => run.pass).length;
|
|
16299
16471
|
lines.push(
|
|
16300
16472
|
`${session.session.padEnd(sessionWidth)} ${formatMetric(session.mean_recall).padEnd(6)} ${formatMetric(session.mean_partial_recall).padEnd(8)} ${formatMetric(session.mean_precision).padEnd(9)} ${formatMetric(session.mean_composite).padEnd(9)} ${countLabel.padEnd(9)} ${passCount}/${result.runs}`
|