npm - agenr - Versions diffs - 1.8.0 → 1.8.1 - Mend

agenr 1.8.0 → 1.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/CHANGELOG.md +18 -0
package/dist/adapters/openclaw/index.js +2 -2
package/dist/{chunk-6CEKKEFZ.js → chunk-O45JQ6O3.js} +56 -20
package/dist/cli.js +123 -30
package/package.json +1 -1

package/CHANGELOG.md CHANGED Viewed

@@ -2,6 +2,24 @@
 ## [Unreleased]
+## [1.8.1] - 2026-04-11
+Ingest concurrency and progress reporting patch release.
+### Fixed
+- **Dedup now honors bounded concurrency.** Multi-entry similarity clusters are now arbitrated in parallel with deterministic result ordering, and ingest paths explicitly thread configured/default concurrency into dedup instead of leaving arbitration serial.
+- **Claim extraction now uses real batch concurrency without violating ordered semantics.** Batch claim-key extraction now honors configured concurrency, preserves past-only hint visibility via per-entry frozen hint snapshots, and propagates sensible defaults through the relevant ingest/store paths instead of falling back to historical hardcoded single-worker behavior.
+- **Ingest spinner now reports real post-extraction stages.** Non-verbose `agenr ingest entries` runs now surface dedup, claim-key extraction, store pipeline, and bulk-write index preparation/finalization stages instead of looking stuck after `(N/N extracted)`.
+### Validation
+Changes since last push to `origin/master`:
+- Enhance ingestion process with stage progress events
+- Refactor deduplication process to support configurable concurrency
+- Refactor concurrency handling in ingestion process
 ## [1.8.0] - 2026-04-11
 Claim-centric trust loop close-out release.

package/dist/adapters/openclaw/index.js CHANGED Viewed

@@ -7,7 +7,7 @@ import {
   parseTuiSessionKey,
   readOpenClawSessionsStore,
   storeEntriesDetailed
-} from "../../chunk-6CEKKEFZ.js";
+} from "../../chunk-O45JQ6O3.js";
 import {
   EMBEDDING_DIMENSIONS,
   ENTRY_TYPES,
@@ -1055,7 +1055,7 @@ function registerAgenrOpenClawTools(api, servicesPromise, logger) {
 var openclaw_plugin_default = {
   id: "agenr",
   name: "agenr",
-  version: "1.8.0",
+  version: "1.8.1",
   description: "agenr memory plugin for OpenClaw",
   kind: "memory",
   contracts: {

package/dist/{chunk-6CEKKEFZ.js → chunk-O45JQ6O3.js} RENAMED Viewed

@@ -2737,7 +2737,7 @@ async function extractClaimKeyDecision(entry, llm, config, options = {}) {
 async function getEntityHints(db) {
   return db.getDistinctClaimKeyPrefixes();
 }
-async function runBatchClaimExtraction(results, ports, config, _concurrency = 10, onWarning, onDiagnostic) {
+async function runBatchClaimExtraction(results, ports, config, concurrency = 10, onWarning, onDiagnostic) {
   if (!config.enabled) {
     return /* @__PURE__ */ new Map();
   }
@@ -2746,8 +2746,12 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
   const extractedEntries = /* @__PURE__ */ new Map();
   const diagnostics = /* @__PURE__ */ new Map();
   const retryEntries = [];
-  for (const result of results) {
-    for (const entry of result.entries) {
+  const stageSize = normalizeClaimExtractionConcurrency(concurrency);
+  const orderedEntries = results.flatMap((result) => result.entries);
+  for (let stageStart = 0; stageStart < orderedEntries.length; stageStart += stageSize) {
+    const stageEntries = orderedEntries.slice(stageStart, stageStart + stageSize);
+    const stageRequests = [];
+    for (const entry of stageEntries) {
       if (entry.claim_key) {
         recordClaimKeyHint(hintState, entry.claim_key);
         continue;
@@ -2765,7 +2769,18 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
         });
         continue;
       }
-      const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
+      stageRequests.push({
+        entry,
+        hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
+      });
+    }
+    const stageDecisions = await Promise.all(
+      stageRequests.map(async ({ entry, hintSnapshot }) => ({
+        entry,
+        decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
+      }))
+    );
+    for (const { entry, decision } of stageDecisions) {
       diagnostics.set(entry, decision.diagnostic);
       if (decision.result?.claimKey) {
         applyClaimExtractionResultToEntry(entry, decision.result);
@@ -2777,18 +2792,26 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
     }
   }
   if (retryEntries.length > 0 && extractedEntries.size > 0) {
-    for (const entry of retryEntries) {
-      if (entry.claim_key) {
-        continue;
-      }
-      const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning);
-      diagnostics.set(entry, decision.diagnostic);
-      if (!decision.result?.claimKey) {
-        continue;
+    for (let stageStart = 0; stageStart < retryEntries.length; stageStart += stageSize) {
+      const stageRequests = retryEntries.slice(stageStart, stageStart + stageSize).filter((entry) => !entry.claim_key).map((entry) => ({
+        entry,
+        hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
+      }));
+      const stageDecisions = await Promise.all(
+        stageRequests.map(async ({ entry, hintSnapshot }) => ({
+          entry,
+          decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
+        }))
+      );
+      for (const { entry, decision } of stageDecisions) {
+        diagnostics.set(entry, decision.diagnostic);
+        if (!decision.result?.claimKey) {
+          continue;
+        }
+        applyClaimExtractionResultToEntry(entry, decision.result);
+        recordClaimKeyHint(hintState, decision.result.claimKey);
+        extractedEntries.set(entry, decision.result);
       }
-      applyClaimExtractionResultToEntry(entry, decision.result);
-      recordClaimKeyHint(hintState, decision.result.claimKey);
-      extractedEntries.set(entry, decision.result);
     }
   }
   for (const result of results) {
@@ -2801,7 +2824,20 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
   }
   return extractedEntries;
 }
-async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWarning) {
+function normalizeClaimExtractionConcurrency(value) {
+  if (!Number.isInteger(value) || value <= 0) {
+    return 10;
+  }
+  return value;
+}
+function buildClaimExtractionHintSnapshot(hintState, entry) {
+  return {
+    hints: buildEntryHints(hintState, entry),
+    supportClaimKeys: [...hintState.supportClaimKeys],
+    entityPrefixStats: hintState.entityPrefixStats
+  };
+}
+async function extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning) {
   try {
     return await extractClaimKeyDecision(
       {
@@ -2814,10 +2850,10 @@ async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWar
       llm,
       config,
       {
-        hints: buildEntryHints(hintState, entry),
+        hints: hintSnapshot.hints,
         onWarning,
-        supportClaimKeys: [...hintState.supportClaimKeys],
-        entityPrefixStats: hintState.entityPrefixStats
+        supportClaimKeys: hintSnapshot.supportClaimKeys,
+        entityPrefixStats: hintSnapshot.entityPrefixStats
       }
     );
   } catch {
@@ -3735,7 +3771,7 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
         db: claimExtraction.db
       },
       claimExtraction.config,
-      1,
+      claimExtraction.config.concurrency ?? 10,
       options.onWarning,
       (entry, diagnostic) => {
         const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);

package/dist/cli.js CHANGED Viewed

@@ -27,7 +27,7 @@ import {
   tokenizeGroundingText,
   validateEntriesWithIndexes,
   validateSupersessionRules
-} from "./chunk-6CEKKEFZ.js";
+} from "./chunk-O45JQ6O3.js";
 import {
   DEFAULT_CLAIM_EXTRACTION_CONCURRENCY,
   DEFAULT_SURGEON_CONTEXT_LIMIT,
@@ -1053,11 +1053,13 @@ async function sleep(durationMs) {
 // src/core/ingestion/dedup.ts
 var DEFAULT_SIMILARITY_THRESHOLD = 0.75;
+var DEFAULT_DEDUP_CONCURRENCY = 10;
 function getDefaultDedupSimilarityThreshold() {
   return DEFAULT_SIMILARITY_THRESHOLD;
 }
 async function dedupBatch(entries, llm, embedding, options = {}) {
   const similarityThreshold = options.similarityThreshold ?? DEFAULT_SIMILARITY_THRESHOLD;
+  const concurrency = normalizeDedupConcurrency(options.concurrency);
   if (entries.length === 0) {
     return {
       survivors: [],
@@ -1085,8 +1087,8 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
   const survivorByIndex = /* @__PURE__ */ new Map();
   const clusterDetails = [];
   const warnings = [];
+  const arbitrationTasks = [];
   let singletonsPassedThrough = 0;
-  let llmCalls = 0;
   for (const [clusterIndex, cluster] of clusters.entries()) {
     if (cluster.length === 1) {
       const entryIndex = cluster[0];
@@ -1096,15 +1098,24 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
       }
       continue;
     }
-    const maxSimilarity = calculateClusterMaxSimilarity(cluster, embeddings);
-    const arbitration = await arbitrateCluster(clusterIndex, cluster, entries, llm, maxSimilarity);
-    llmCalls += 1;
+    arbitrationTasks.push({
+      clusterIndex,
+      cluster,
+      maxSimilarity: calculateClusterMaxSimilarity(cluster, embeddings)
+    });
+  }
+  const arbitrationResults = await runBoundedArbitrations(
+    arbitrationTasks,
+    concurrency,
+    async (task) => arbitrateCluster(task.clusterIndex, task.cluster, entries, llm, task.maxSimilarity)
+  );
+  for (const arbitration of arbitrationResults) {
     clusterDetails.push(arbitration.detail);
     if (arbitration.warning) {
       warnings.push(arbitration.warning);
     }
     for (const keptIndex of arbitration.detail.kept) {
-      const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(cluster, keptIndex, arbitration.detail.mergedContent, entries) : entries[keptIndex];
+      const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(arbitration.detail.entryIndices, keptIndex, arbitration.detail.mergedContent, entries) : entries[keptIndex];
       survivorByIndex.set(keptIndex, updatedEntry);
     }
   }
@@ -1128,12 +1139,43 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
     removedCount: entries.length - survivors.length,
     clustersArbitrated: clusterDetails.length,
     singletonsPassedThrough,
-    llmCalls,
+    llmCalls: arbitrationTasks.length,
     clusterDetails,
     warnings,
     similarityThreshold
   };
 }
+function normalizeDedupConcurrency(value) {
+  if (!Number.isInteger(value) || value === void 0 || value <= 0) {
+    return DEFAULT_DEDUP_CONCURRENCY;
+  }
+  return value;
+}
+async function runBoundedArbitrations(tasks, concurrency, worker) {
+  if (tasks.length === 0) {
+    return [];
+  }
+  const results = new Array(tasks.length);
+  const workerCount = Math.min(concurrency, tasks.length);
+  let nextTaskIndex = 0;
+  await Promise.all(
+    Array.from({ length: workerCount }, async () => {
+      while (true) {
+        const taskIndex = nextTaskIndex;
+        nextTaskIndex += 1;
+        if (taskIndex >= tasks.length) {
+          return;
+        }
+        const task = tasks[taskIndex];
+        if (task === void 0) {
+          return;
+        }
+        results[taskIndex] = await worker(task, taskIndex);
+      }
+    })
+  );
+  return results;
+}
 async function arbitrateCluster(clusterIndex, cluster, entries, llm, maxSimilarity) {
   const systemPrompt = buildDedupSystemPrompt();
   const userPrompt = buildDedupUserPrompt(cluster, entries);
@@ -1901,12 +1943,19 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
   let precomputedEmbeddings;
   const claimKeyDiagnostics = /* @__PURE__ */ new Map();
   if (taggedEntries.length > 0) {
+    if (options.skipDedup !== true) {
+      options.onStageProgress?.({
+        phase: "dedup_start",
+        totalEntries: taggedEntries.length
+      });
+    }
     const dedupLlm = options.skipDedup === true ? createNoopLlmPort() : ports.createDedupLlm?.() ?? ports.createExtractionLlm();
     dedupResult = await dedupBatch(
       taggedEntries.map((taggedEntry) => taggedEntry.entry),
       dedupLlm,
       ports.embedding,
       {
+        concurrency: options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
         skip: options.skipDedup,
         verbose: options.verbose
       }
@@ -1928,6 +1977,12 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
       confidenceThreshold: 0.8,
       eligibleTypes: ["fact", "preference", "decision", "lesson"]
     };
+    if (claimConfig.enabled) {
+      options.onStageProgress?.({
+        phase: "claim_extraction_start",
+        totalEntries: flattenEntries(resultsToStore).length
+      });
+    }
     const extractedClaimKeys = await runBatchClaimExtraction(
       resultsToStore,
       {
@@ -1935,7 +1990,7 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
         db: ports.db
       },
       claimConfig,
-      options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
+      claimConfig.concurrency ?? options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
       options.onWarning,
       (entry, diagnostic) => {
         const flattenedIndex = findFlattenedEntryIndex(resultsToStore, entry);
@@ -1957,20 +2012,26 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
       eligibleTypes: ["fact", "preference", "decision", "lesson"]
     }).eligibleTypes
   ) : null;
-  const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await storeExtractedResults(
-    resultsToStore,
-    {
-      db: ports.db,
-      embedding: ports.embedding
-    },
-    {
-      dryRun: options.dryRun,
-      verbose: options.verbose,
-      precomputedEmbeddings,
-      onWarning: options.onWarning,
-      onBulkWriteProgress: options.onBulkWriteProgress
-    }
-  );
+  const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await (async () => {
+    options.onStageProgress?.({
+      phase: "store_start",
+      totalEntries: flattenEntries(resultsToStore).length
+    });
+    return storeExtractedResults(
+      resultsToStore,
+      {
+        db: ports.db,
+        embedding: ports.embedding
+      },
+      {
+        dryRun: options.dryRun,
+        verbose: options.verbose,
+        precomputedEmbeddings,
+        onWarning: options.onWarning,
+        onBulkWriteProgress: options.onBulkWriteProgress
+      }
+    );
+  })();
   return {
     files,
     extractionRuns,
@@ -3217,7 +3278,7 @@ function registerIngestCommand(program2) {
   registerIngestEpisodesCommand(ingestCommand);
 }
 function registerIngestEntriesCommand(parent) {
-  const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency).default(DEFAULT_INGEST_CONCURRENCY));
+  const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency));
   ingestCommand.action(async (targetPath, options) => {
     const startedAt = Date.now();
     let db = null;
@@ -3231,6 +3292,11 @@ function registerIngestEntriesCommand(parent) {
       const { provider, modelId } = resolveModel(config, "extraction");
       const { provider: dedupProvider, modelId: dedupModelId } = resolveModel(config, "dedup");
       const claimExtractionConfig = resolveClaimExtractionConfig(config);
+      const effectiveConcurrency = commandInput.concurrency ?? claimExtractionConfig.concurrency ?? DEFAULT_INGEST_CONCURRENCY;
+      const cliClaimExtractionConfig = {
+        ...claimExtractionConfig,
+        concurrency: effectiveConcurrency
+      };
       const claimModel = claimExtractionConfig.enabled ? resolveModel(config, "claim") : null;
       const llmApiKey = resolveLlmApiKey(config, provider);
       const dedupApiKey = resolveLlmApiKey(config, dedupProvider);
@@ -3254,7 +3320,7 @@ function registerIngestEntriesCommand(parent) {
           formatLabel("Whole-file", commandInput.wholeFile),
           formatLabel("Within-batch dedup", commandInput.skipDedup ? "skipped" : "enabled"),
           formatLabel("Embeddings", "stored"),
-          formatLabel("Concurrency", `${commandInput.concurrency}`)
+          formatLabel("Concurrency", `${effectiveConcurrency}`)
         ].join("\n")
       );
       if (commandInput.dryRun) {
@@ -3277,8 +3343,8 @@ function registerIngestEntriesCommand(parent) {
           } : {}
         },
         {
-          concurrency: commandInput.concurrency,
-          claimExtractionConfig,
+          concurrency: effectiveConcurrency,
+          claimExtractionConfig: cliClaimExtractionConfig,
           dryRun: commandInput.dryRun,
           verbose: commandInput.verbose,
           wholeFile: commandInput.wholeFile,
@@ -3287,7 +3353,12 @@ function registerIngestEntriesCommand(parent) {
           onExtractionProgress: (completed, total) => {
             spinner5?.message(`Processing transcripts... (${completed}/${total} extracted)`);
           },
-          onBulkWriteProgress: useVerboseBulkWriteProgress ? reportBulkWriteProgress : void 0
+          onStageProgress: (event) => {
+            spinner5?.message(progressMessageForIngestStage(event, files.length));
+          },
+          onBulkWriteProgress: useVerboseBulkWriteProgress ? reportBulkWriteProgress : (event) => {
+            spinner5?.message(progressMessageForBulkWrite(event.phase));
+          }
         }
       );
       spinner5?.stop("Ingest pipeline complete.");
@@ -3396,7 +3467,7 @@ function normalizeIngestEntriesCommand(targetPath, options) {
     dryRun: options.dryRun === true,
     wholeFile: options.wholeFile ?? "auto",
     skipDedup: options.skipDedup === true,
-    concurrency: options.concurrency ?? DEFAULT_INGEST_CONCURRENCY
+    concurrency: options.concurrency
   };
 }
 function formatClaimKeyHealthSummary(summary) {
@@ -3678,6 +3749,28 @@ function emptyStoreResult2() {
     rejected: 0
   };
 }
+function progressMessageForIngestStage(event, totalFiles) {
+  switch (event.phase) {
+    case "dedup_start":
+      return `Deduplicating ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")} from ${totalFiles} ${pluralize2(totalFiles, "file")}...`;
+    case "claim_extraction_start":
+      return `Extracting claim keys for ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")}...`;
+    case "store_start":
+      return `Running store pipeline for ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")}...`;
+  }
+}
+function progressMessageForBulkWrite(phase) {
+  switch (phase) {
+    case "prepare_start":
+      return "Preparing database indexes for bulk ingest...";
+    case "store_complete":
+      return "Bulk ingest store phase complete...";
+    case "finalize_start":
+      return "Rebuilding indexes after bulk ingest...";
+    case "finalize_complete":
+      return "Bulk ingest finalization complete...";
+  }
+}
 function reportBulkWriteProgress(event) {
   switch (event.phase) {
     case "prepare_start":
@@ -5334,7 +5427,7 @@ async function runBulkIngest(files, config, prompts) {
           spinner5.message(`Ingesting sessions... (${completed}/${total} extracted)`);
         },
         onBulkWriteProgress: (event) => {
-          spinner5.message(progressMessageForBulkWrite(event.phase));
+          spinner5.message(progressMessageForBulkWrite2(event.phase));
         }
       }
     );
@@ -5433,7 +5526,7 @@ function normalizeSetupProvider(provider) {
 function hasMeteredIngestCost(auth) {
   return auth !== "openai-subscription" && auth !== "anthropic-oauth" && auth !== "anthropic-token";
 }
-function progressMessageForBulkWrite(phase) {
+function progressMessageForBulkWrite2(phase) {
   switch (phase) {
     case "prepare_start":
       return "Preparing database indexes for bulk ingest...";

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "agenr",
-  "version": "1.8.0",
+  "version": "1.8.1",
   "description": "Agent memory - local-first knowledge infrastructure for AI agents",
   "type": "module",
   "bin": {