agenr 1.8.0 → 1.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/adapters/openclaw/index.js +2 -2
- package/dist/{chunk-6CEKKEFZ.js → chunk-O45JQ6O3.js} +56 -20
- package/dist/cli.js +123 -30
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,24 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [1.8.1] - 2026-04-11
|
|
6
|
+
|
|
7
|
+
Ingest concurrency and progress reporting patch release.
|
|
8
|
+
|
|
9
|
+
### Fixed
|
|
10
|
+
|
|
11
|
+
- **Dedup now honors bounded concurrency.** Multi-entry similarity clusters are now arbitrated in parallel with deterministic result ordering, and ingest paths explicitly thread configured/default concurrency into dedup instead of leaving arbitration serial.
|
|
12
|
+
- **Claim extraction now uses real batch concurrency without violating ordered semantics.** Batch claim-key extraction now honors configured concurrency, preserves past-only hint visibility via per-entry frozen hint snapshots, and propagates sensible defaults through the relevant ingest/store paths instead of falling back to historical hardcoded single-worker behavior.
|
|
13
|
+
- **Ingest spinner now reports real post-extraction stages.** Non-verbose `agenr ingest entries` runs now surface dedup, claim-key extraction, store pipeline, and bulk-write index preparation/finalization stages instead of looking stuck after `(N/N extracted)`.
|
|
14
|
+
|
|
15
|
+
### Validation
|
|
16
|
+
|
|
17
|
+
Changes since last push to `origin/master`:
|
|
18
|
+
|
|
19
|
+
- Enhance ingestion process with stage progress events
|
|
20
|
+
- Refactor deduplication process to support configurable concurrency
|
|
21
|
+
- Refactor concurrency handling in ingestion process
|
|
22
|
+
|
|
5
23
|
## [1.8.0] - 2026-04-11
|
|
6
24
|
|
|
7
25
|
Claim-centric trust loop close-out release.
|
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
parseTuiSessionKey,
|
|
8
8
|
readOpenClawSessionsStore,
|
|
9
9
|
storeEntriesDetailed
|
|
10
|
-
} from "../../chunk-
|
|
10
|
+
} from "../../chunk-O45JQ6O3.js";
|
|
11
11
|
import {
|
|
12
12
|
EMBEDDING_DIMENSIONS,
|
|
13
13
|
ENTRY_TYPES,
|
|
@@ -1055,7 +1055,7 @@ function registerAgenrOpenClawTools(api, servicesPromise, logger) {
|
|
|
1055
1055
|
var openclaw_plugin_default = {
|
|
1056
1056
|
id: "agenr",
|
|
1057
1057
|
name: "agenr",
|
|
1058
|
-
version: "1.8.
|
|
1058
|
+
version: "1.8.1",
|
|
1059
1059
|
description: "agenr memory plugin for OpenClaw",
|
|
1060
1060
|
kind: "memory",
|
|
1061
1061
|
contracts: {
|
|
@@ -2737,7 +2737,7 @@ async function extractClaimKeyDecision(entry, llm, config, options = {}) {
|
|
|
2737
2737
|
async function getEntityHints(db) {
|
|
2738
2738
|
return db.getDistinctClaimKeyPrefixes();
|
|
2739
2739
|
}
|
|
2740
|
-
async function runBatchClaimExtraction(results, ports, config,
|
|
2740
|
+
async function runBatchClaimExtraction(results, ports, config, concurrency = 10, onWarning, onDiagnostic) {
|
|
2741
2741
|
if (!config.enabled) {
|
|
2742
2742
|
return /* @__PURE__ */ new Map();
|
|
2743
2743
|
}
|
|
@@ -2746,8 +2746,12 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2746
2746
|
const extractedEntries = /* @__PURE__ */ new Map();
|
|
2747
2747
|
const diagnostics = /* @__PURE__ */ new Map();
|
|
2748
2748
|
const retryEntries = [];
|
|
2749
|
-
|
|
2750
|
-
|
|
2749
|
+
const stageSize = normalizeClaimExtractionConcurrency(concurrency);
|
|
2750
|
+
const orderedEntries = results.flatMap((result) => result.entries);
|
|
2751
|
+
for (let stageStart = 0; stageStart < orderedEntries.length; stageStart += stageSize) {
|
|
2752
|
+
const stageEntries = orderedEntries.slice(stageStart, stageStart + stageSize);
|
|
2753
|
+
const stageRequests = [];
|
|
2754
|
+
for (const entry of stageEntries) {
|
|
2751
2755
|
if (entry.claim_key) {
|
|
2752
2756
|
recordClaimKeyHint(hintState, entry.claim_key);
|
|
2753
2757
|
continue;
|
|
@@ -2765,7 +2769,18 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2765
2769
|
});
|
|
2766
2770
|
continue;
|
|
2767
2771
|
}
|
|
2768
|
-
|
|
2772
|
+
stageRequests.push({
|
|
2773
|
+
entry,
|
|
2774
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2775
|
+
});
|
|
2776
|
+
}
|
|
2777
|
+
const stageDecisions = await Promise.all(
|
|
2778
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => ({
|
|
2779
|
+
entry,
|
|
2780
|
+
decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
|
|
2781
|
+
}))
|
|
2782
|
+
);
|
|
2783
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2769
2784
|
diagnostics.set(entry, decision.diagnostic);
|
|
2770
2785
|
if (decision.result?.claimKey) {
|
|
2771
2786
|
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
@@ -2777,18 +2792,26 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2777
2792
|
}
|
|
2778
2793
|
}
|
|
2779
2794
|
if (retryEntries.length > 0 && extractedEntries.size > 0) {
|
|
2780
|
-
for (
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2795
|
+
for (let stageStart = 0; stageStart < retryEntries.length; stageStart += stageSize) {
|
|
2796
|
+
const stageRequests = retryEntries.slice(stageStart, stageStart + stageSize).filter((entry) => !entry.claim_key).map((entry) => ({
|
|
2797
|
+
entry,
|
|
2798
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2799
|
+
}));
|
|
2800
|
+
const stageDecisions = await Promise.all(
|
|
2801
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => ({
|
|
2802
|
+
entry,
|
|
2803
|
+
decision: await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning)
|
|
2804
|
+
}))
|
|
2805
|
+
);
|
|
2806
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2807
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2808
|
+
if (!decision.result?.claimKey) {
|
|
2809
|
+
continue;
|
|
2810
|
+
}
|
|
2811
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2812
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2813
|
+
extractedEntries.set(entry, decision.result);
|
|
2788
2814
|
}
|
|
2789
|
-
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2790
|
-
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2791
|
-
extractedEntries.set(entry, decision.result);
|
|
2792
2815
|
}
|
|
2793
2816
|
}
|
|
2794
2817
|
for (const result of results) {
|
|
@@ -2801,7 +2824,20 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2801
2824
|
}
|
|
2802
2825
|
return extractedEntries;
|
|
2803
2826
|
}
|
|
2804
|
-
|
|
2827
|
+
function normalizeClaimExtractionConcurrency(value) {
|
|
2828
|
+
if (!Number.isInteger(value) || value <= 0) {
|
|
2829
|
+
return 10;
|
|
2830
|
+
}
|
|
2831
|
+
return value;
|
|
2832
|
+
}
|
|
2833
|
+
function buildClaimExtractionHintSnapshot(hintState, entry) {
|
|
2834
|
+
return {
|
|
2835
|
+
hints: buildEntryHints(hintState, entry),
|
|
2836
|
+
supportClaimKeys: [...hintState.supportClaimKeys],
|
|
2837
|
+
entityPrefixStats: hintState.entityPrefixStats
|
|
2838
|
+
};
|
|
2839
|
+
}
|
|
2840
|
+
async function extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning) {
|
|
2805
2841
|
try {
|
|
2806
2842
|
return await extractClaimKeyDecision(
|
|
2807
2843
|
{
|
|
@@ -2814,10 +2850,10 @@ async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWar
|
|
|
2814
2850
|
llm,
|
|
2815
2851
|
config,
|
|
2816
2852
|
{
|
|
2817
|
-
hints:
|
|
2853
|
+
hints: hintSnapshot.hints,
|
|
2818
2854
|
onWarning,
|
|
2819
|
-
supportClaimKeys:
|
|
2820
|
-
entityPrefixStats:
|
|
2855
|
+
supportClaimKeys: hintSnapshot.supportClaimKeys,
|
|
2856
|
+
entityPrefixStats: hintSnapshot.entityPrefixStats
|
|
2821
2857
|
}
|
|
2822
2858
|
);
|
|
2823
2859
|
} catch {
|
|
@@ -3735,7 +3771,7 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
|
3735
3771
|
db: claimExtraction.db
|
|
3736
3772
|
},
|
|
3737
3773
|
claimExtraction.config,
|
|
3738
|
-
|
|
3774
|
+
claimExtraction.config.concurrency ?? 10,
|
|
3739
3775
|
options.onWarning,
|
|
3740
3776
|
(entry, diagnostic) => {
|
|
3741
3777
|
const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);
|
package/dist/cli.js
CHANGED
|
@@ -27,7 +27,7 @@ import {
|
|
|
27
27
|
tokenizeGroundingText,
|
|
28
28
|
validateEntriesWithIndexes,
|
|
29
29
|
validateSupersessionRules
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-O45JQ6O3.js";
|
|
31
31
|
import {
|
|
32
32
|
DEFAULT_CLAIM_EXTRACTION_CONCURRENCY,
|
|
33
33
|
DEFAULT_SURGEON_CONTEXT_LIMIT,
|
|
@@ -1053,11 +1053,13 @@ async function sleep(durationMs) {
|
|
|
1053
1053
|
|
|
1054
1054
|
// src/core/ingestion/dedup.ts
|
|
1055
1055
|
var DEFAULT_SIMILARITY_THRESHOLD = 0.75;
|
|
1056
|
+
var DEFAULT_DEDUP_CONCURRENCY = 10;
|
|
1056
1057
|
function getDefaultDedupSimilarityThreshold() {
|
|
1057
1058
|
return DEFAULT_SIMILARITY_THRESHOLD;
|
|
1058
1059
|
}
|
|
1059
1060
|
async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
1060
1061
|
const similarityThreshold = options.similarityThreshold ?? DEFAULT_SIMILARITY_THRESHOLD;
|
|
1062
|
+
const concurrency = normalizeDedupConcurrency(options.concurrency);
|
|
1061
1063
|
if (entries.length === 0) {
|
|
1062
1064
|
return {
|
|
1063
1065
|
survivors: [],
|
|
@@ -1085,8 +1087,8 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1085
1087
|
const survivorByIndex = /* @__PURE__ */ new Map();
|
|
1086
1088
|
const clusterDetails = [];
|
|
1087
1089
|
const warnings = [];
|
|
1090
|
+
const arbitrationTasks = [];
|
|
1088
1091
|
let singletonsPassedThrough = 0;
|
|
1089
|
-
let llmCalls = 0;
|
|
1090
1092
|
for (const [clusterIndex, cluster] of clusters.entries()) {
|
|
1091
1093
|
if (cluster.length === 1) {
|
|
1092
1094
|
const entryIndex = cluster[0];
|
|
@@ -1096,15 +1098,24 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1096
1098
|
}
|
|
1097
1099
|
continue;
|
|
1098
1100
|
}
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1101
|
+
arbitrationTasks.push({
|
|
1102
|
+
clusterIndex,
|
|
1103
|
+
cluster,
|
|
1104
|
+
maxSimilarity: calculateClusterMaxSimilarity(cluster, embeddings)
|
|
1105
|
+
});
|
|
1106
|
+
}
|
|
1107
|
+
const arbitrationResults = await runBoundedArbitrations(
|
|
1108
|
+
arbitrationTasks,
|
|
1109
|
+
concurrency,
|
|
1110
|
+
async (task) => arbitrateCluster(task.clusterIndex, task.cluster, entries, llm, task.maxSimilarity)
|
|
1111
|
+
);
|
|
1112
|
+
for (const arbitration of arbitrationResults) {
|
|
1102
1113
|
clusterDetails.push(arbitration.detail);
|
|
1103
1114
|
if (arbitration.warning) {
|
|
1104
1115
|
warnings.push(arbitration.warning);
|
|
1105
1116
|
}
|
|
1106
1117
|
for (const keptIndex of arbitration.detail.kept) {
|
|
1107
|
-
const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(
|
|
1118
|
+
const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(arbitration.detail.entryIndices, keptIndex, arbitration.detail.mergedContent, entries) : entries[keptIndex];
|
|
1108
1119
|
survivorByIndex.set(keptIndex, updatedEntry);
|
|
1109
1120
|
}
|
|
1110
1121
|
}
|
|
@@ -1128,12 +1139,43 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1128
1139
|
removedCount: entries.length - survivors.length,
|
|
1129
1140
|
clustersArbitrated: clusterDetails.length,
|
|
1130
1141
|
singletonsPassedThrough,
|
|
1131
|
-
llmCalls,
|
|
1142
|
+
llmCalls: arbitrationTasks.length,
|
|
1132
1143
|
clusterDetails,
|
|
1133
1144
|
warnings,
|
|
1134
1145
|
similarityThreshold
|
|
1135
1146
|
};
|
|
1136
1147
|
}
|
|
1148
|
+
function normalizeDedupConcurrency(value) {
|
|
1149
|
+
if (!Number.isInteger(value) || value === void 0 || value <= 0) {
|
|
1150
|
+
return DEFAULT_DEDUP_CONCURRENCY;
|
|
1151
|
+
}
|
|
1152
|
+
return value;
|
|
1153
|
+
}
|
|
1154
|
+
async function runBoundedArbitrations(tasks, concurrency, worker) {
|
|
1155
|
+
if (tasks.length === 0) {
|
|
1156
|
+
return [];
|
|
1157
|
+
}
|
|
1158
|
+
const results = new Array(tasks.length);
|
|
1159
|
+
const workerCount = Math.min(concurrency, tasks.length);
|
|
1160
|
+
let nextTaskIndex = 0;
|
|
1161
|
+
await Promise.all(
|
|
1162
|
+
Array.from({ length: workerCount }, async () => {
|
|
1163
|
+
while (true) {
|
|
1164
|
+
const taskIndex = nextTaskIndex;
|
|
1165
|
+
nextTaskIndex += 1;
|
|
1166
|
+
if (taskIndex >= tasks.length) {
|
|
1167
|
+
return;
|
|
1168
|
+
}
|
|
1169
|
+
const task = tasks[taskIndex];
|
|
1170
|
+
if (task === void 0) {
|
|
1171
|
+
return;
|
|
1172
|
+
}
|
|
1173
|
+
results[taskIndex] = await worker(task, taskIndex);
|
|
1174
|
+
}
|
|
1175
|
+
})
|
|
1176
|
+
);
|
|
1177
|
+
return results;
|
|
1178
|
+
}
|
|
1137
1179
|
async function arbitrateCluster(clusterIndex, cluster, entries, llm, maxSimilarity) {
|
|
1138
1180
|
const systemPrompt = buildDedupSystemPrompt();
|
|
1139
1181
|
const userPrompt = buildDedupUserPrompt(cluster, entries);
|
|
@@ -1901,12 +1943,19 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1901
1943
|
let precomputedEmbeddings;
|
|
1902
1944
|
const claimKeyDiagnostics = /* @__PURE__ */ new Map();
|
|
1903
1945
|
if (taggedEntries.length > 0) {
|
|
1946
|
+
if (options.skipDedup !== true) {
|
|
1947
|
+
options.onStageProgress?.({
|
|
1948
|
+
phase: "dedup_start",
|
|
1949
|
+
totalEntries: taggedEntries.length
|
|
1950
|
+
});
|
|
1951
|
+
}
|
|
1904
1952
|
const dedupLlm = options.skipDedup === true ? createNoopLlmPort() : ports.createDedupLlm?.() ?? ports.createExtractionLlm();
|
|
1905
1953
|
dedupResult = await dedupBatch(
|
|
1906
1954
|
taggedEntries.map((taggedEntry) => taggedEntry.entry),
|
|
1907
1955
|
dedupLlm,
|
|
1908
1956
|
ports.embedding,
|
|
1909
1957
|
{
|
|
1958
|
+
concurrency: options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
1910
1959
|
skip: options.skipDedup,
|
|
1911
1960
|
verbose: options.verbose
|
|
1912
1961
|
}
|
|
@@ -1928,6 +1977,12 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1928
1977
|
confidenceThreshold: 0.8,
|
|
1929
1978
|
eligibleTypes: ["fact", "preference", "decision", "lesson"]
|
|
1930
1979
|
};
|
|
1980
|
+
if (claimConfig.enabled) {
|
|
1981
|
+
options.onStageProgress?.({
|
|
1982
|
+
phase: "claim_extraction_start",
|
|
1983
|
+
totalEntries: flattenEntries(resultsToStore).length
|
|
1984
|
+
});
|
|
1985
|
+
}
|
|
1931
1986
|
const extractedClaimKeys = await runBatchClaimExtraction(
|
|
1932
1987
|
resultsToStore,
|
|
1933
1988
|
{
|
|
@@ -1935,7 +1990,7 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1935
1990
|
db: ports.db
|
|
1936
1991
|
},
|
|
1937
1992
|
claimConfig,
|
|
1938
|
-
options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
1993
|
+
claimConfig.concurrency ?? options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
1939
1994
|
options.onWarning,
|
|
1940
1995
|
(entry, diagnostic) => {
|
|
1941
1996
|
const flattenedIndex = findFlattenedEntryIndex(resultsToStore, entry);
|
|
@@ -1957,20 +2012,26 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1957
2012
|
eligibleTypes: ["fact", "preference", "decision", "lesson"]
|
|
1958
2013
|
}).eligibleTypes
|
|
1959
2014
|
) : null;
|
|
1960
|
-
const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
2015
|
+
const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await (async () => {
|
|
2016
|
+
options.onStageProgress?.({
|
|
2017
|
+
phase: "store_start",
|
|
2018
|
+
totalEntries: flattenEntries(resultsToStore).length
|
|
2019
|
+
});
|
|
2020
|
+
return storeExtractedResults(
|
|
2021
|
+
resultsToStore,
|
|
2022
|
+
{
|
|
2023
|
+
db: ports.db,
|
|
2024
|
+
embedding: ports.embedding
|
|
2025
|
+
},
|
|
2026
|
+
{
|
|
2027
|
+
dryRun: options.dryRun,
|
|
2028
|
+
verbose: options.verbose,
|
|
2029
|
+
precomputedEmbeddings,
|
|
2030
|
+
onWarning: options.onWarning,
|
|
2031
|
+
onBulkWriteProgress: options.onBulkWriteProgress
|
|
2032
|
+
}
|
|
2033
|
+
);
|
|
2034
|
+
})();
|
|
1974
2035
|
return {
|
|
1975
2036
|
files,
|
|
1976
2037
|
extractionRuns,
|
|
@@ -3217,7 +3278,7 @@ function registerIngestCommand(program2) {
|
|
|
3217
3278
|
registerIngestEpisodesCommand(ingestCommand);
|
|
3218
3279
|
}
|
|
3219
3280
|
function registerIngestEntriesCommand(parent) {
|
|
3220
|
-
const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency)
|
|
3281
|
+
const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency));
|
|
3221
3282
|
ingestCommand.action(async (targetPath, options) => {
|
|
3222
3283
|
const startedAt = Date.now();
|
|
3223
3284
|
let db = null;
|
|
@@ -3231,6 +3292,11 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3231
3292
|
const { provider, modelId } = resolveModel(config, "extraction");
|
|
3232
3293
|
const { provider: dedupProvider, modelId: dedupModelId } = resolveModel(config, "dedup");
|
|
3233
3294
|
const claimExtractionConfig = resolveClaimExtractionConfig(config);
|
|
3295
|
+
const effectiveConcurrency = commandInput.concurrency ?? claimExtractionConfig.concurrency ?? DEFAULT_INGEST_CONCURRENCY;
|
|
3296
|
+
const cliClaimExtractionConfig = {
|
|
3297
|
+
...claimExtractionConfig,
|
|
3298
|
+
concurrency: effectiveConcurrency
|
|
3299
|
+
};
|
|
3234
3300
|
const claimModel = claimExtractionConfig.enabled ? resolveModel(config, "claim") : null;
|
|
3235
3301
|
const llmApiKey = resolveLlmApiKey(config, provider);
|
|
3236
3302
|
const dedupApiKey = resolveLlmApiKey(config, dedupProvider);
|
|
@@ -3254,7 +3320,7 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3254
3320
|
formatLabel("Whole-file", commandInput.wholeFile),
|
|
3255
3321
|
formatLabel("Within-batch dedup", commandInput.skipDedup ? "skipped" : "enabled"),
|
|
3256
3322
|
formatLabel("Embeddings", "stored"),
|
|
3257
|
-
formatLabel("Concurrency", `${
|
|
3323
|
+
formatLabel("Concurrency", `${effectiveConcurrency}`)
|
|
3258
3324
|
].join("\n")
|
|
3259
3325
|
);
|
|
3260
3326
|
if (commandInput.dryRun) {
|
|
@@ -3277,8 +3343,8 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3277
3343
|
} : {}
|
|
3278
3344
|
},
|
|
3279
3345
|
{
|
|
3280
|
-
concurrency:
|
|
3281
|
-
claimExtractionConfig,
|
|
3346
|
+
concurrency: effectiveConcurrency,
|
|
3347
|
+
claimExtractionConfig: cliClaimExtractionConfig,
|
|
3282
3348
|
dryRun: commandInput.dryRun,
|
|
3283
3349
|
verbose: commandInput.verbose,
|
|
3284
3350
|
wholeFile: commandInput.wholeFile,
|
|
@@ -3287,7 +3353,12 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3287
3353
|
onExtractionProgress: (completed, total) => {
|
|
3288
3354
|
spinner5?.message(`Processing transcripts... (${completed}/${total} extracted)`);
|
|
3289
3355
|
},
|
|
3290
|
-
|
|
3356
|
+
onStageProgress: (event) => {
|
|
3357
|
+
spinner5?.message(progressMessageForIngestStage(event, files.length));
|
|
3358
|
+
},
|
|
3359
|
+
onBulkWriteProgress: useVerboseBulkWriteProgress ? reportBulkWriteProgress : (event) => {
|
|
3360
|
+
spinner5?.message(progressMessageForBulkWrite(event.phase));
|
|
3361
|
+
}
|
|
3291
3362
|
}
|
|
3292
3363
|
);
|
|
3293
3364
|
spinner5?.stop("Ingest pipeline complete.");
|
|
@@ -3396,7 +3467,7 @@ function normalizeIngestEntriesCommand(targetPath, options) {
|
|
|
3396
3467
|
dryRun: options.dryRun === true,
|
|
3397
3468
|
wholeFile: options.wholeFile ?? "auto",
|
|
3398
3469
|
skipDedup: options.skipDedup === true,
|
|
3399
|
-
concurrency: options.concurrency
|
|
3470
|
+
concurrency: options.concurrency
|
|
3400
3471
|
};
|
|
3401
3472
|
}
|
|
3402
3473
|
function formatClaimKeyHealthSummary(summary) {
|
|
@@ -3678,6 +3749,28 @@ function emptyStoreResult2() {
|
|
|
3678
3749
|
rejected: 0
|
|
3679
3750
|
};
|
|
3680
3751
|
}
|
|
3752
|
+
function progressMessageForIngestStage(event, totalFiles) {
|
|
3753
|
+
switch (event.phase) {
|
|
3754
|
+
case "dedup_start":
|
|
3755
|
+
return `Deduplicating ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")} from ${totalFiles} ${pluralize2(totalFiles, "file")}...`;
|
|
3756
|
+
case "claim_extraction_start":
|
|
3757
|
+
return `Extracting claim keys for ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")}...`;
|
|
3758
|
+
case "store_start":
|
|
3759
|
+
return `Running store pipeline for ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")}...`;
|
|
3760
|
+
}
|
|
3761
|
+
}
|
|
3762
|
+
function progressMessageForBulkWrite(phase) {
|
|
3763
|
+
switch (phase) {
|
|
3764
|
+
case "prepare_start":
|
|
3765
|
+
return "Preparing database indexes for bulk ingest...";
|
|
3766
|
+
case "store_complete":
|
|
3767
|
+
return "Bulk ingest store phase complete...";
|
|
3768
|
+
case "finalize_start":
|
|
3769
|
+
return "Rebuilding indexes after bulk ingest...";
|
|
3770
|
+
case "finalize_complete":
|
|
3771
|
+
return "Bulk ingest finalization complete...";
|
|
3772
|
+
}
|
|
3773
|
+
}
|
|
3681
3774
|
function reportBulkWriteProgress(event) {
|
|
3682
3775
|
switch (event.phase) {
|
|
3683
3776
|
case "prepare_start":
|
|
@@ -5334,7 +5427,7 @@ async function runBulkIngest(files, config, prompts) {
|
|
|
5334
5427
|
spinner5.message(`Ingesting sessions... (${completed}/${total} extracted)`);
|
|
5335
5428
|
},
|
|
5336
5429
|
onBulkWriteProgress: (event) => {
|
|
5337
|
-
spinner5.message(
|
|
5430
|
+
spinner5.message(progressMessageForBulkWrite2(event.phase));
|
|
5338
5431
|
}
|
|
5339
5432
|
}
|
|
5340
5433
|
);
|
|
@@ -5433,7 +5526,7 @@ function normalizeSetupProvider(provider) {
|
|
|
5433
5526
|
function hasMeteredIngestCost(auth) {
|
|
5434
5527
|
return auth !== "openai-subscription" && auth !== "anthropic-oauth" && auth !== "anthropic-token";
|
|
5435
5528
|
}
|
|
5436
|
-
function
|
|
5529
|
+
function progressMessageForBulkWrite2(phase) {
|
|
5437
5530
|
switch (phase) {
|
|
5438
5531
|
case "prepare_start":
|
|
5439
5532
|
return "Preparing database indexes for bulk ingest...";
|