agenr 0.9.16 → 0.9.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,12 @@
1
1
  # Changelog
2
2
 
3
+ ## 0.9.17 - 2026-02-27
4
+
5
+ ### Changed
6
+ - Optimized LLM dedup in consolidate clustering: batch up to 10 pairs per API
7
+ call with 5 concurrent batches. Reduces a 2400-pair dedup queue from ~60min
8
+ (sequential, 1 call per pair) to ~2min.
9
+
3
10
  ## 0.9.16 - 2026-02-27
4
11
 
5
12
  ### Added
package/dist/cli-main.js CHANGED
@@ -4254,15 +4254,26 @@ var DEFAULT_IDEMPOTENCY_DAYS = 7;
4254
4254
  var DEFAULT_NEIGHBOR_LIMIT = 20;
4255
4255
  var MAX_ACTIVE_EMBEDDED_ENTRIES2 = 2e4;
4256
4256
  var MILLISECONDS_PER_DAY = 1e3 * 60 * 60 * 24;
4257
- var LLM_DEDUP_TOOL_NAME = "dedup_check";
4257
+ var LLM_DEDUP_BATCH_TOOL_NAME = "batch_dedup_check";
4258
+ var LLM_DEDUP_BATCH_SIZE = 10;
4259
+ var LLM_DEDUP_CONCURRENCY = 5;
4258
4260
  var LLM_DEDUP_TOOL_SCHEMA = Type.Object({
4259
4261
  same: Type.Boolean(),
4260
4262
  reason: Type.String()
4261
4263
  });
4262
- var LLM_DEDUP_TOOL = {
4263
- name: LLM_DEDUP_TOOL_NAME,
4264
- description: "Decide whether two knowledge entries express the same knowledge.",
4265
- parameters: LLM_DEDUP_TOOL_SCHEMA
4264
+ var LLM_DEDUP_BATCH_TOOL_SCHEMA = Type.Object({
4265
+ results: Type.Array(
4266
+ Type.Object({
4267
+ pair: Type.Number(),
4268
+ same: Type.Boolean(),
4269
+ reason: Type.Optional(Type.String())
4270
+ })
4271
+ )
4272
+ });
4273
+ var LLM_DEDUP_BATCH_TOOL = {
4274
+ name: LLM_DEDUP_BATCH_TOOL_NAME,
4275
+ description: "Return dedup results for all pairs",
4276
+ parameters: LLM_DEDUP_BATCH_TOOL_SCHEMA
4266
4277
  };
4267
4278
  function toNumber5(value) {
4268
4279
  if (typeof value === "number") {
@@ -4303,70 +4314,98 @@ function normalizeSubject2(value) {
4303
4314
  function pairKey(a, b) {
4304
4315
  return a < b ? `${a}|${b}` : `${b}|${a}`;
4305
4316
  }
4306
- function buildLlmDedupContext(entryA, entryB) {
4317
+ function buildLlmDedupBatchContext(pairs) {
4307
4318
  const systemPrompt = [
4308
4319
  "You are a deduplication assistant for knowledge entries.",
4309
- "Decide if two entries express the same knowledge in different wording.",
4310
- "Return your answer by calling dedup_check."
4311
- ].join("\n");
4312
- const userPrompt = [
4313
- "Are these two knowledge entries expressing the same fact or genuinely distinct?",
4314
- `Entry A: ${entryA.content}`,
4315
- `Entry B: ${entryB.content}`,
4316
- 'Set "same" to true only when both entries represent the same knowledge.'
4320
+ "For each numbered pair, decide if they express the same knowledge.",
4321
+ "Call batch_dedup_check once with your results."
4317
4322
  ].join("\n");
4323
+ const pairBlocks = pairs.map(
4324
+ (pair, index) => [
4325
+ `Pair ${index + 1}:`,
4326
+ ` Entry A: ${pair.entry.content}`,
4327
+ ` Entry B: ${pair.candidate.content}`
4328
+ ].join("\n")
4329
+ ).join("\n\n");
4318
4330
  return {
4319
4331
  systemPrompt,
4320
4332
  messages: [
4321
4333
  {
4322
4334
  role: "user",
4323
- content: userPrompt,
4335
+ content: pairBlocks,
4324
4336
  timestamp: Date.now()
4325
4337
  }
4326
4338
  ],
4327
- tools: [LLM_DEDUP_TOOL]
4339
+ tools: [LLM_DEDUP_BATCH_TOOL]
4328
4340
  };
4329
4341
  }
4330
- function extractLlmDedupFromToolCall(message) {
4342
+ function extractLlmDedupBatchFromToolCall(message) {
4331
4343
  for (const block of message.content) {
4332
- if (block.type !== "toolCall" || block.name !== LLM_DEDUP_TOOL_NAME) {
4344
+ if (block.type !== "toolCall" || block.name !== LLM_DEDUP_BATCH_TOOL_NAME) {
4333
4345
  continue;
4334
4346
  }
4335
4347
  const args = block.arguments;
4336
- if (!args || typeof args.same !== "boolean") {
4348
+ if (!args || !Array.isArray(args.results)) {
4337
4349
  continue;
4338
4350
  }
4339
- return {
4340
- same: args.same,
4341
- reason: typeof args.reason === "string" ? args.reason : ""
4342
- };
4351
+ const parsed = args.results.filter(
4352
+ (result) => typeof result === "object" && result !== null && typeof result.pair === "number" && typeof result.same === "boolean"
4353
+ ).map((result) => ({
4354
+ pair: result.pair,
4355
+ same: result.same,
4356
+ reason: typeof result.reason === "string" ? result.reason : void 0
4357
+ }));
4358
+ return parsed;
4343
4359
  }
4344
4360
  return null;
4345
4361
  }
4346
- async function llmDedupCheck(llmClient, entryA, entryB) {
4362
+ async function llmDedupCheckBatch(llmClient, pairs) {
4363
+ if (pairs.length === 0) {
4364
+ return [];
4365
+ }
4366
+ const fallback = Array.from({ length: pairs.length }, () => false);
4347
4367
  try {
4348
- const timeoutMs = 15e3;
4368
+ const timeoutMs = 3e4;
4369
+ let timer;
4349
4370
  const response = await Promise.race([
4350
4371
  runSimpleStream({
4351
4372
  model: llmClient.resolvedModel.model,
4352
- context: buildLlmDedupContext(entryA, entryB),
4373
+ context: buildLlmDedupBatchContext(pairs),
4353
4374
  options: {
4354
4375
  apiKey: llmClient.credentials.apiKey
4355
4376
  },
4356
4377
  verbose: false
4357
4378
  }),
4358
- new Promise(
4359
- (_, reject) => setTimeout(() => reject(new Error("llmDedupCheck timed out")), timeoutMs)
4360
- )
4361
- ]);
4379
+ new Promise((_, reject) => {
4380
+ timer = setTimeout(() => reject(new Error("llmDedupCheckBatch timed out")), timeoutMs);
4381
+ })
4382
+ ]).finally(() => clearTimeout(timer));
4362
4383
  if (response.stopReason === "error" || response.errorMessage) {
4363
- return false;
4384
+ return fallback;
4385
+ }
4386
+ const parsed = extractLlmDedupBatchFromToolCall(response);
4387
+ if (!parsed) {
4388
+ return fallback;
4389
+ }
4390
+ const results = [...fallback];
4391
+ for (const result of parsed) {
4392
+ const pairIndex = Math.trunc(result.pair) - 1;
4393
+ if (pairIndex < 0 || pairIndex >= results.length) {
4394
+ continue;
4395
+ }
4396
+ results[pairIndex] = result.same;
4364
4397
  }
4365
- const parsed = extractLlmDedupFromToolCall(response);
4366
- return parsed?.same === true;
4398
+ return results;
4367
4399
  } catch {
4368
- return false;
4400
+ return fallback;
4401
+ }
4402
+ }
4403
+ function chunkPairs(items, size) {
4404
+ const chunks = [];
4405
+ for (let i = 0; i < items.length; i += size) {
4406
+ chunks.push(items.slice(i, i + size));
4369
4407
  }
4408
+ return chunks;
4370
4409
  }
4371
4410
  function parseDaysSince(value, now) {
4372
4411
  const parsed = new Date(value);
@@ -4476,6 +4515,7 @@ async function buildClusters(db, options = {}) {
4476
4515
  const looseUnionPairs = /* @__PURE__ */ new Set();
4477
4516
  const llmDedupQueue = [];
4478
4517
  let llmDedupCalls = 0;
4518
+ let llmDedupCheckedPairs = 0;
4479
4519
  let llmDedupMatches = 0;
4480
4520
  for (const entry of candidates) {
4481
4521
  unionFind.add(entry.id);
@@ -4507,18 +4547,45 @@ async function buildClusters(db, options = {}) {
4507
4547
  llmDedupQueue.push({ entry, candidate, key });
4508
4548
  }
4509
4549
  }
4510
- for (const pair of llmDedupQueue) {
4511
- if (!llmClient) {
4512
- break;
4513
- }
4514
- llmDedupCalls += 1;
4515
- const isSame = await llmDedupCheck(llmClient, pair.entry, pair.candidate);
4516
- if (isSame) {
4517
- llmDedupMatches += 1;
4518
- looseUnionPairs.add(pair.key);
4519
- unionFind.union(pair.entry.id, pair.candidate.id);
4550
+ if (llmClient && llmDedupQueue.length > 0) {
4551
+ const startedAt = Date.now();
4552
+ const batches = chunkPairs(llmDedupQueue, LLM_DEDUP_BATCH_SIZE);
4553
+ for (let i = 0; i < batches.length; i += LLM_DEDUP_CONCURRENCY) {
4554
+ const batchGroup = batches.slice(i, i + LLM_DEDUP_CONCURRENCY);
4555
+ const settled = await Promise.allSettled(
4556
+ batchGroup.map(
4557
+ (batch) => llmDedupCheckBatch(
4558
+ llmClient,
4559
+ batch.map((pair) => ({
4560
+ entry: pair.entry,
4561
+ candidate: pair.candidate
4562
+ }))
4563
+ )
4564
+ )
4565
+ );
4566
+ llmDedupCalls += batchGroup.length;
4567
+ for (let batchIndex = 0; batchIndex < batchGroup.length; batchIndex += 1) {
4568
+ const batch = batchGroup[batchIndex];
4569
+ const result2 = settled[batchIndex];
4570
+ const matches = result2 && result2.status === "fulfilled" ? result2.value : Array.from({ length: batch.length }, () => false);
4571
+ llmDedupCheckedPairs += batch.length;
4572
+ for (let pairIndex = 0; pairIndex < batch.length; pairIndex += 1) {
4573
+ const pair = batch[pairIndex];
4574
+ if (matches[pairIndex] !== true) {
4575
+ continue;
4576
+ }
4577
+ llmDedupMatches += 1;
4578
+ looseUnionPairs.add(pair.key);
4579
+ unionFind.union(pair.entry.id, pair.candidate.id);
4580
+ }
4581
+ }
4582
+ const elapsedSeconds = Math.max(1, Math.floor((Date.now() - startedAt) / 1e3));
4583
+ const remainingPairs = Math.max(0, llmDedupQueue.length - llmDedupCheckedPairs);
4584
+ const estimatedRemainingSeconds = Math.round(elapsedSeconds / llmDedupCheckedPairs * remainingPairs);
4585
+ onLog(
4586
+ `[dedup] Checked ${llmDedupCheckedPairs}/${llmDedupQueue.length} pairs (${llmDedupMatches} matched) ~${estimatedRemainingSeconds}s remaining`
4587
+ );
4520
4588
  }
4521
- onLog(`[dedup] Checked ${llmDedupCalls}/${llmDedupQueue.length} pairs (${llmDedupMatches} matched)`);
4522
4589
  }
4523
4590
  const groups = /* @__PURE__ */ new Map();
4524
4591
  for (const entry of candidates) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agenr",
3
- "version": "0.9.16",
3
+ "version": "0.9.17",
4
4
  "openclaw": {
5
5
  "extensions": [
6
6
  "dist/openclaw-plugin/index.js"