agenr 1.8.0 → 1.8.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +39 -0
- package/dist/adapters/openclaw/index.js +3 -3
- package/dist/{chunk-6CEKKEFZ.js → chunk-PVYS6BMG.js} +98 -21
- package/dist/{chunk-LVDQXSHP.js → chunk-SQARNOYD.js} +22 -2
- package/dist/cli.js +159 -33
- package/dist/internal-recall-eval-server.js +1 -1
- package/package.json +21 -21
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,45 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [1.8.2] - 2026-04-12
|
|
6
|
+
|
|
7
|
+
Temporal parsing and ingest workflow polish patch release.
|
|
8
|
+
|
|
9
|
+
### Changed
|
|
10
|
+
|
|
11
|
+
- **Relative time parsing now accepts small spelled-out amounts.** Episode temporal-window parsing now recognizes natural language queries such as "two days ago" for small relative ranges, improving recall routing for conversational phrasing.
|
|
12
|
+
- **Local Cursor scaffolding stays out of repo status.** `.gitignore` now excludes local Cursor rules and skills directories so release work and day-to-day development stay focused on product changes.
|
|
13
|
+
|
|
14
|
+
### Fixed
|
|
15
|
+
|
|
16
|
+
- **Ingest progress propagation now reaches the CLI consistently.** The ingestion app and CLI layers now forward progress events end-to-end so long-running ingest runs surface stage updates reliably.
|
|
17
|
+
|
|
18
|
+
### Validation
|
|
19
|
+
|
|
20
|
+
Changes since last push to `origin/master`:
|
|
21
|
+
|
|
22
|
+
- Update `.gitignore` to exclude cursor rules and skills directories
|
|
23
|
+
- Enhance temporal parsing with small spelled-out relative amounts
|
|
24
|
+
- Enhance ingestion process with progress event propagation
|
|
25
|
+
|
|
26
|
+
## [1.8.1] - 2026-04-11
|
|
27
|
+
|
|
28
|
+
Ingest concurrency and progress reporting patch release.
|
|
29
|
+
|
|
30
|
+
### Fixed
|
|
31
|
+
|
|
32
|
+
- **Dedup now honors bounded concurrency.** Multi-entry similarity clusters are now arbitrated in parallel with deterministic result ordering, and ingest paths explicitly thread configured/default concurrency into dedup instead of leaving arbitration serial.
|
|
33
|
+
- **Claim extraction now uses real batch concurrency without violating ordered semantics.** Batch claim-key extraction now honors configured concurrency, preserves past-only hint visibility via per-entry frozen hint snapshots, and propagates sensible defaults through the relevant ingest/store paths instead of falling back to historical hardcoded single-worker behavior.
|
|
34
|
+
- **Ingest spinner now reports real post-extraction stages.** Non-verbose `agenr ingest entries` runs now surface dedup, claim-key extraction, store pipeline, and bulk-write index preparation/finalization stages instead of looking stuck after `(N/N extracted)`.
|
|
35
|
+
|
|
36
|
+
### Validation
|
|
37
|
+
|
|
38
|
+
Changes since last push to `origin/master`:
|
|
39
|
+
|
|
40
|
+
- Enhance ingestion process with stage progress events
|
|
41
|
+
- Refactor deduplication process to support configurable concurrency
|
|
42
|
+
- Refactor concurrency handling in ingestion process
|
|
43
|
+
|
|
5
44
|
## [1.8.0] - 2026-04-11
|
|
6
45
|
|
|
7
46
|
Claim-centric trust loop close-out release.
|
|
@@ -7,7 +7,7 @@ import {
|
|
|
7
7
|
parseTuiSessionKey,
|
|
8
8
|
readOpenClawSessionsStore,
|
|
9
9
|
storeEntriesDetailed
|
|
10
|
-
} from "../../chunk-
|
|
10
|
+
} from "../../chunk-PVYS6BMG.js";
|
|
11
11
|
import {
|
|
12
12
|
EMBEDDING_DIMENSIONS,
|
|
13
13
|
ENTRY_TYPES,
|
|
@@ -24,7 +24,7 @@ import {
|
|
|
24
24
|
resolveEmbeddingModel,
|
|
25
25
|
runUnifiedRecall,
|
|
26
26
|
validateTemporalValidityRange
|
|
27
|
-
} from "../../chunk-
|
|
27
|
+
} from "../../chunk-SQARNOYD.js";
|
|
28
28
|
import {
|
|
29
29
|
resolveClaimSlotPolicy
|
|
30
30
|
} from "../../chunk-GUDCFFRV.js";
|
|
@@ -1055,7 +1055,7 @@ function registerAgenrOpenClawTools(api, servicesPromise, logger) {
|
|
|
1055
1055
|
var openclaw_plugin_default = {
|
|
1056
1056
|
id: "agenr",
|
|
1057
1057
|
name: "agenr",
|
|
1058
|
-
version: "1.8.
|
|
1058
|
+
version: "1.8.1",
|
|
1059
1059
|
description: "agenr memory plugin for OpenClaw",
|
|
1060
1060
|
kind: "memory",
|
|
1061
1061
|
contracts: {
|
|
@@ -22,7 +22,7 @@ import {
|
|
|
22
22
|
readOptionalString,
|
|
23
23
|
readRequiredString,
|
|
24
24
|
validateTemporalValidityRange
|
|
25
|
-
} from "./chunk-
|
|
25
|
+
} from "./chunk-SQARNOYD.js";
|
|
26
26
|
import {
|
|
27
27
|
compactClaimKey,
|
|
28
28
|
describeClaimKeyNormalizationFailure,
|
|
@@ -2737,7 +2737,7 @@ async function extractClaimKeyDecision(entry, llm, config, options = {}) {
|
|
|
2737
2737
|
async function getEntityHints(db) {
|
|
2738
2738
|
return db.getDistinctClaimKeyPrefixes();
|
|
2739
2739
|
}
|
|
2740
|
-
async function runBatchClaimExtraction(results, ports, config,
|
|
2740
|
+
async function runBatchClaimExtraction(results, ports, config, concurrency = 10, onWarning, onDiagnostic, onProgress) {
|
|
2741
2741
|
if (!config.enabled) {
|
|
2742
2742
|
return /* @__PURE__ */ new Map();
|
|
2743
2743
|
}
|
|
@@ -2746,8 +2746,14 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2746
2746
|
const extractedEntries = /* @__PURE__ */ new Map();
|
|
2747
2747
|
const diagnostics = /* @__PURE__ */ new Map();
|
|
2748
2748
|
const retryEntries = [];
|
|
2749
|
-
|
|
2750
|
-
|
|
2749
|
+
const stageSize = normalizeClaimExtractionConcurrency(concurrency);
|
|
2750
|
+
const orderedEntries = results.flatMap((result) => result.entries);
|
|
2751
|
+
const totalEligibleEntries = orderedEntries.filter((entry) => !entry.claim_key && config.eligibleTypes.includes(entry.type)).length;
|
|
2752
|
+
let completedPrimaryEntries = 0;
|
|
2753
|
+
for (let stageStart = 0; stageStart < orderedEntries.length; stageStart += stageSize) {
|
|
2754
|
+
const stageEntries = orderedEntries.slice(stageStart, stageStart + stageSize);
|
|
2755
|
+
const stageRequests = [];
|
|
2756
|
+
for (const entry of stageEntries) {
|
|
2751
2757
|
if (entry.claim_key) {
|
|
2752
2758
|
recordClaimKeyHint(hintState, entry.claim_key);
|
|
2753
2759
|
continue;
|
|
@@ -2765,7 +2771,29 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2765
2771
|
});
|
|
2766
2772
|
continue;
|
|
2767
2773
|
}
|
|
2768
|
-
|
|
2774
|
+
stageRequests.push({
|
|
2775
|
+
entry,
|
|
2776
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2777
|
+
});
|
|
2778
|
+
}
|
|
2779
|
+
const stageDecisions = await executeClaimExtractionStageRequests(
|
|
2780
|
+
stageRequests,
|
|
2781
|
+
llm,
|
|
2782
|
+
config,
|
|
2783
|
+
onWarning,
|
|
2784
|
+
completedPrimaryEntries,
|
|
2785
|
+
totalEligibleEntries,
|
|
2786
|
+
(completedEntries, totalEntries) => {
|
|
2787
|
+
completedPrimaryEntries = completedEntries;
|
|
2788
|
+
onProgress?.({
|
|
2789
|
+
phase: "primary",
|
|
2790
|
+
completedEntries,
|
|
2791
|
+
totalEntries,
|
|
2792
|
+
totalEligibleEntries
|
|
2793
|
+
});
|
|
2794
|
+
}
|
|
2795
|
+
);
|
|
2796
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2769
2797
|
diagnostics.set(entry, decision.diagnostic);
|
|
2770
2798
|
if (decision.result?.claimKey) {
|
|
2771
2799
|
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
@@ -2777,18 +2805,40 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2777
2805
|
}
|
|
2778
2806
|
}
|
|
2779
2807
|
if (retryEntries.length > 0 && extractedEntries.size > 0) {
|
|
2780
|
-
|
|
2781
|
-
|
|
2782
|
-
|
|
2783
|
-
|
|
2784
|
-
const
|
|
2785
|
-
|
|
2786
|
-
|
|
2787
|
-
|
|
2808
|
+
const retryEligibleEntries = retryEntries.filter((entry) => !entry.claim_key);
|
|
2809
|
+
const totalRetryEntries = retryEligibleEntries.length;
|
|
2810
|
+
let completedRetryEntries = 0;
|
|
2811
|
+
for (let stageStart = 0; stageStart < retryEligibleEntries.length; stageStart += stageSize) {
|
|
2812
|
+
const stageRequests = retryEligibleEntries.slice(stageStart, stageStart + stageSize).map((entry) => ({
|
|
2813
|
+
entry,
|
|
2814
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
2815
|
+
}));
|
|
2816
|
+
const stageDecisions = await executeClaimExtractionStageRequests(
|
|
2817
|
+
stageRequests,
|
|
2818
|
+
llm,
|
|
2819
|
+
config,
|
|
2820
|
+
onWarning,
|
|
2821
|
+
completedRetryEntries,
|
|
2822
|
+
totalRetryEntries,
|
|
2823
|
+
(completedEntries, totalEntries) => {
|
|
2824
|
+
completedRetryEntries = completedEntries;
|
|
2825
|
+
onProgress?.({
|
|
2826
|
+
phase: "retry",
|
|
2827
|
+
completedEntries,
|
|
2828
|
+
totalEntries,
|
|
2829
|
+
totalEligibleEntries
|
|
2830
|
+
});
|
|
2831
|
+
}
|
|
2832
|
+
);
|
|
2833
|
+
for (const { entry, decision } of stageDecisions) {
|
|
2834
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
2835
|
+
if (!decision.result?.claimKey) {
|
|
2836
|
+
continue;
|
|
2837
|
+
}
|
|
2838
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2839
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2840
|
+
extractedEntries.set(entry, decision.result);
|
|
2788
2841
|
}
|
|
2789
|
-
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
2790
|
-
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
2791
|
-
extractedEntries.set(entry, decision.result);
|
|
2792
2842
|
}
|
|
2793
2843
|
}
|
|
2794
2844
|
for (const result of results) {
|
|
@@ -2801,7 +2851,34 @@ async function runBatchClaimExtraction(results, ports, config, _concurrency = 10
|
|
|
2801
2851
|
}
|
|
2802
2852
|
return extractedEntries;
|
|
2803
2853
|
}
|
|
2804
|
-
async function
|
|
2854
|
+
async function executeClaimExtractionStageRequests(stageRequests, llm, config, onWarning, initialCompletedEntries, totalEntries, onProgress) {
|
|
2855
|
+
let completedEntries = initialCompletedEntries;
|
|
2856
|
+
return Promise.all(
|
|
2857
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => {
|
|
2858
|
+
const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning);
|
|
2859
|
+
completedEntries += 1;
|
|
2860
|
+
onProgress(completedEntries, totalEntries);
|
|
2861
|
+
return {
|
|
2862
|
+
entry,
|
|
2863
|
+
decision
|
|
2864
|
+
};
|
|
2865
|
+
})
|
|
2866
|
+
);
|
|
2867
|
+
}
|
|
2868
|
+
function normalizeClaimExtractionConcurrency(value) {
|
|
2869
|
+
if (!Number.isInteger(value) || value <= 0) {
|
|
2870
|
+
return 10;
|
|
2871
|
+
}
|
|
2872
|
+
return value;
|
|
2873
|
+
}
|
|
2874
|
+
function buildClaimExtractionHintSnapshot(hintState, entry) {
|
|
2875
|
+
return {
|
|
2876
|
+
hints: buildEntryHints(hintState, entry),
|
|
2877
|
+
supportClaimKeys: [...hintState.supportClaimKeys],
|
|
2878
|
+
entityPrefixStats: hintState.entityPrefixStats
|
|
2879
|
+
};
|
|
2880
|
+
}
|
|
2881
|
+
async function extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning) {
|
|
2805
2882
|
try {
|
|
2806
2883
|
return await extractClaimKeyDecision(
|
|
2807
2884
|
{
|
|
@@ -2814,10 +2891,10 @@ async function extractBatchClaimKeyDecision(entry, llm, config, hintState, onWar
|
|
|
2814
2891
|
llm,
|
|
2815
2892
|
config,
|
|
2816
2893
|
{
|
|
2817
|
-
hints:
|
|
2894
|
+
hints: hintSnapshot.hints,
|
|
2818
2895
|
onWarning,
|
|
2819
|
-
supportClaimKeys:
|
|
2820
|
-
entityPrefixStats:
|
|
2896
|
+
supportClaimKeys: hintSnapshot.supportClaimKeys,
|
|
2897
|
+
entityPrefixStats: hintSnapshot.entityPrefixStats
|
|
2821
2898
|
}
|
|
2822
2899
|
);
|
|
2823
2900
|
} catch {
|
|
@@ -3735,7 +3812,7 @@ async function maybeExtractClaimKeys(preparedEntries, options) {
|
|
|
3735
3812
|
db: claimExtraction.db
|
|
3736
3813
|
},
|
|
3737
3814
|
claimExtraction.config,
|
|
3738
|
-
|
|
3815
|
+
claimExtraction.config.concurrency ?? 10,
|
|
3739
3816
|
options.onWarning,
|
|
3740
3817
|
(entry, diagnostic) => {
|
|
3741
3818
|
const preparedEntry = preparedEntries.find((candidate) => candidate.input === entry);
|
|
@@ -4094,6 +4094,20 @@ function parseTimestamp(value) {
|
|
|
4094
4094
|
// src/core/episode/temporal-window.ts
|
|
4095
4095
|
var DAY_IN_MILLISECONDS = 24 * 60 * 60 * 1e3;
|
|
4096
4096
|
var DEFAULT_ANCHOR_RADIUS_DAYS = 3;
|
|
4097
|
+
var RELATIVE_NUMBER_WORDS = /* @__PURE__ */ new Map([
|
|
4098
|
+
["one", 1],
|
|
4099
|
+
["two", 2],
|
|
4100
|
+
["three", 3],
|
|
4101
|
+
["four", 4],
|
|
4102
|
+
["five", 5],
|
|
4103
|
+
["six", 6],
|
|
4104
|
+
["seven", 7],
|
|
4105
|
+
["eight", 8],
|
|
4106
|
+
["nine", 9],
|
|
4107
|
+
["ten", 10],
|
|
4108
|
+
["eleven", 11],
|
|
4109
|
+
["twelve", 12]
|
|
4110
|
+
]);
|
|
4097
4111
|
var MONTH_INDEX = /* @__PURE__ */ new Map([
|
|
4098
4112
|
["january", 0],
|
|
4099
4113
|
["february", 1],
|
|
@@ -4243,9 +4257,9 @@ function parseTemporalWindow(text, now = /* @__PURE__ */ new Date()) {
|
|
|
4243
4257
|
now: referenceNow
|
|
4244
4258
|
});
|
|
4245
4259
|
}
|
|
4246
|
-
const relativeMatch = lower.match(/\b(\d
|
|
4260
|
+
const relativeMatch = lower.match(/\b(\d+|one|two|three|four|five|six|seven|eight|nine|ten|eleven|twelve)\s+(day|days|week|weeks|month|months)\s+ago\b/);
|
|
4247
4261
|
if (relativeMatch?.[1] && relativeMatch[2]) {
|
|
4248
|
-
const amount =
|
|
4262
|
+
const amount = parseRelativeAmount(relativeMatch[1]);
|
|
4249
4263
|
if (Number.isFinite(amount) && amount > 0) {
|
|
4250
4264
|
const unit = relativeMatch[2];
|
|
4251
4265
|
if (unit.startsWith("day")) {
|
|
@@ -4446,6 +4460,12 @@ function buildLocalDateAtNoon(year, month, day) {
|
|
|
4446
4460
|
}
|
|
4447
4461
|
return parsed;
|
|
4448
4462
|
}
|
|
4463
|
+
function parseRelativeAmount(value) {
|
|
4464
|
+
if (/^\d+$/u.test(value)) {
|
|
4465
|
+
return Number(value);
|
|
4466
|
+
}
|
|
4467
|
+
return RELATIVE_NUMBER_WORDS.get(value) ?? Number.NaN;
|
|
4468
|
+
}
|
|
4449
4469
|
function resolveWeekStartDay() {
|
|
4450
4470
|
try {
|
|
4451
4471
|
const locale = Intl.DateTimeFormat().resolvedOptions().locale;
|
package/dist/cli.js
CHANGED
|
@@ -27,7 +27,7 @@ import {
|
|
|
27
27
|
tokenizeGroundingText,
|
|
28
28
|
validateEntriesWithIndexes,
|
|
29
29
|
validateSupersessionRules
|
|
30
|
-
} from "./chunk-
|
|
30
|
+
} from "./chunk-PVYS6BMG.js";
|
|
31
31
|
import {
|
|
32
32
|
DEFAULT_CLAIM_EXTRACTION_CONCURRENCY,
|
|
33
33
|
DEFAULT_SURGEON_CONTEXT_LIMIT,
|
|
@@ -86,7 +86,7 @@ import {
|
|
|
86
86
|
updateEntry,
|
|
87
87
|
validateTemporalValidityRange,
|
|
88
88
|
writeConfig
|
|
89
|
-
} from "./chunk-
|
|
89
|
+
} from "./chunk-SQARNOYD.js";
|
|
90
90
|
import {
|
|
91
91
|
compactClaimKey,
|
|
92
92
|
describeClaimKeyNormalizationFailure,
|
|
@@ -1053,11 +1053,13 @@ async function sleep(durationMs) {
|
|
|
1053
1053
|
|
|
1054
1054
|
// src/core/ingestion/dedup.ts
|
|
1055
1055
|
var DEFAULT_SIMILARITY_THRESHOLD = 0.75;
|
|
1056
|
+
var DEFAULT_DEDUP_CONCURRENCY = 10;
|
|
1056
1057
|
function getDefaultDedupSimilarityThreshold() {
|
|
1057
1058
|
return DEFAULT_SIMILARITY_THRESHOLD;
|
|
1058
1059
|
}
|
|
1059
1060
|
async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
1060
1061
|
const similarityThreshold = options.similarityThreshold ?? DEFAULT_SIMILARITY_THRESHOLD;
|
|
1062
|
+
const concurrency = normalizeDedupConcurrency(options.concurrency);
|
|
1061
1063
|
if (entries.length === 0) {
|
|
1062
1064
|
return {
|
|
1063
1065
|
survivors: [],
|
|
@@ -1085,8 +1087,8 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1085
1087
|
const survivorByIndex = /* @__PURE__ */ new Map();
|
|
1086
1088
|
const clusterDetails = [];
|
|
1087
1089
|
const warnings = [];
|
|
1090
|
+
const arbitrationTasks = [];
|
|
1088
1091
|
let singletonsPassedThrough = 0;
|
|
1089
|
-
let llmCalls = 0;
|
|
1090
1092
|
for (const [clusterIndex, cluster] of clusters.entries()) {
|
|
1091
1093
|
if (cluster.length === 1) {
|
|
1092
1094
|
const entryIndex = cluster[0];
|
|
@@ -1096,15 +1098,37 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1096
1098
|
}
|
|
1097
1099
|
continue;
|
|
1098
1100
|
}
|
|
1099
|
-
|
|
1100
|
-
|
|
1101
|
-
|
|
1101
|
+
arbitrationTasks.push({
|
|
1102
|
+
clusterIndex,
|
|
1103
|
+
cluster,
|
|
1104
|
+
maxSimilarity: calculateClusterMaxSimilarity(cluster, embeddings)
|
|
1105
|
+
});
|
|
1106
|
+
}
|
|
1107
|
+
const totalArbitratedEntries = arbitrationTasks.reduce((sum, task) => sum + task.cluster.length, 0);
|
|
1108
|
+
let completedClusters = 0;
|
|
1109
|
+
let completedEntries = 0;
|
|
1110
|
+
const arbitrationResults = await runBoundedArbitrations(
|
|
1111
|
+
arbitrationTasks,
|
|
1112
|
+
concurrency,
|
|
1113
|
+
async (task) => arbitrateCluster(task.clusterIndex, task.cluster, entries, llm, task.maxSimilarity),
|
|
1114
|
+
(task) => {
|
|
1115
|
+
completedClusters += 1;
|
|
1116
|
+
completedEntries += task.cluster.length;
|
|
1117
|
+
options.onProgress?.({
|
|
1118
|
+
completedClusters,
|
|
1119
|
+
totalClusters: arbitrationTasks.length,
|
|
1120
|
+
completedEntries,
|
|
1121
|
+
totalEntries: totalArbitratedEntries
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
);
|
|
1125
|
+
for (const arbitration of arbitrationResults) {
|
|
1102
1126
|
clusterDetails.push(arbitration.detail);
|
|
1103
1127
|
if (arbitration.warning) {
|
|
1104
1128
|
warnings.push(arbitration.warning);
|
|
1105
1129
|
}
|
|
1106
1130
|
for (const keptIndex of arbitration.detail.kept) {
|
|
1107
|
-
const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(
|
|
1131
|
+
const updatedEntry = arbitration.detail.merged === true && arbitration.detail.mergedContent && keptIndex === arbitration.detail.mergeTarget ? mergeClusterEntry(arbitration.detail.entryIndices, keptIndex, arbitration.detail.mergedContent, entries) : entries[keptIndex];
|
|
1108
1132
|
survivorByIndex.set(keptIndex, updatedEntry);
|
|
1109
1133
|
}
|
|
1110
1134
|
}
|
|
@@ -1128,12 +1152,44 @@ async function dedupBatch(entries, llm, embedding, options = {}) {
|
|
|
1128
1152
|
removedCount: entries.length - survivors.length,
|
|
1129
1153
|
clustersArbitrated: clusterDetails.length,
|
|
1130
1154
|
singletonsPassedThrough,
|
|
1131
|
-
llmCalls,
|
|
1155
|
+
llmCalls: arbitrationTasks.length,
|
|
1132
1156
|
clusterDetails,
|
|
1133
1157
|
warnings,
|
|
1134
1158
|
similarityThreshold
|
|
1135
1159
|
};
|
|
1136
1160
|
}
|
|
1161
|
+
function normalizeDedupConcurrency(value) {
|
|
1162
|
+
if (!Number.isInteger(value) || value === void 0 || value <= 0) {
|
|
1163
|
+
return DEFAULT_DEDUP_CONCURRENCY;
|
|
1164
|
+
}
|
|
1165
|
+
return value;
|
|
1166
|
+
}
|
|
1167
|
+
async function runBoundedArbitrations(tasks, concurrency, worker, onTaskComplete) {
|
|
1168
|
+
if (tasks.length === 0) {
|
|
1169
|
+
return [];
|
|
1170
|
+
}
|
|
1171
|
+
const results = new Array(tasks.length);
|
|
1172
|
+
const workerCount = Math.min(concurrency, tasks.length);
|
|
1173
|
+
let nextTaskIndex = 0;
|
|
1174
|
+
await Promise.all(
|
|
1175
|
+
Array.from({ length: workerCount }, async () => {
|
|
1176
|
+
while (true) {
|
|
1177
|
+
const taskIndex = nextTaskIndex;
|
|
1178
|
+
nextTaskIndex += 1;
|
|
1179
|
+
if (taskIndex >= tasks.length) {
|
|
1180
|
+
return;
|
|
1181
|
+
}
|
|
1182
|
+
const task = tasks[taskIndex];
|
|
1183
|
+
if (task === void 0) {
|
|
1184
|
+
return;
|
|
1185
|
+
}
|
|
1186
|
+
results[taskIndex] = await worker(task, taskIndex);
|
|
1187
|
+
onTaskComplete?.(task, taskIndex);
|
|
1188
|
+
}
|
|
1189
|
+
})
|
|
1190
|
+
);
|
|
1191
|
+
return results;
|
|
1192
|
+
}
|
|
1137
1193
|
async function arbitrateCluster(clusterIndex, cluster, entries, llm, maxSimilarity) {
|
|
1138
1194
|
const systemPrompt = buildDedupSystemPrompt();
|
|
1139
1195
|
const userPrompt = buildDedupUserPrompt(cluster, entries);
|
|
@@ -1901,14 +1957,22 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1901
1957
|
let precomputedEmbeddings;
|
|
1902
1958
|
const claimKeyDiagnostics = /* @__PURE__ */ new Map();
|
|
1903
1959
|
if (taggedEntries.length > 0) {
|
|
1960
|
+
if (options.skipDedup !== true) {
|
|
1961
|
+
options.onStageProgress?.({
|
|
1962
|
+
phase: "dedup_start",
|
|
1963
|
+
totalEntries: taggedEntries.length
|
|
1964
|
+
});
|
|
1965
|
+
}
|
|
1904
1966
|
const dedupLlm = options.skipDedup === true ? createNoopLlmPort() : ports.createDedupLlm?.() ?? ports.createExtractionLlm();
|
|
1905
1967
|
dedupResult = await dedupBatch(
|
|
1906
1968
|
taggedEntries.map((taggedEntry) => taggedEntry.entry),
|
|
1907
1969
|
dedupLlm,
|
|
1908
1970
|
ports.embedding,
|
|
1909
1971
|
{
|
|
1972
|
+
concurrency: options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
1910
1973
|
skip: options.skipDedup,
|
|
1911
|
-
verbose: options.verbose
|
|
1974
|
+
verbose: options.verbose,
|
|
1975
|
+
onProgress: options.onDedupProgress
|
|
1912
1976
|
}
|
|
1913
1977
|
);
|
|
1914
1978
|
const preservedDedupResult = {
|
|
@@ -1928,6 +1992,12 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1928
1992
|
confidenceThreshold: 0.8,
|
|
1929
1993
|
eligibleTypes: ["fact", "preference", "decision", "lesson"]
|
|
1930
1994
|
};
|
|
1995
|
+
if (claimConfig.enabled) {
|
|
1996
|
+
options.onStageProgress?.({
|
|
1997
|
+
phase: "claim_extraction_start",
|
|
1998
|
+
totalEntries: flattenEntries(resultsToStore).length
|
|
1999
|
+
});
|
|
2000
|
+
}
|
|
1931
2001
|
const extractedClaimKeys = await runBatchClaimExtraction(
|
|
1932
2002
|
resultsToStore,
|
|
1933
2003
|
{
|
|
@@ -1935,14 +2005,15 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1935
2005
|
db: ports.db
|
|
1936
2006
|
},
|
|
1937
2007
|
claimConfig,
|
|
1938
|
-
options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
2008
|
+
claimConfig.concurrency ?? options.concurrency ?? DEFAULT_INGEST_CONCURRENCY,
|
|
1939
2009
|
options.onWarning,
|
|
1940
2010
|
(entry, diagnostic) => {
|
|
1941
2011
|
const flattenedIndex = findFlattenedEntryIndex(resultsToStore, entry);
|
|
1942
2012
|
if (flattenedIndex >= 0) {
|
|
1943
2013
|
claimKeyDiagnostics.set(flattenedIndex, diagnostic);
|
|
1944
2014
|
}
|
|
1945
|
-
}
|
|
2015
|
+
},
|
|
2016
|
+
options.onClaimExtractionProgress
|
|
1946
2017
|
);
|
|
1947
2018
|
for (const [entry, extractedClaimKey] of extractedClaimKeys) {
|
|
1948
2019
|
applyClaimExtractionResultToEntry(entry, extractedClaimKey);
|
|
@@ -1957,20 +2028,26 @@ async function ingestDiscoveredFiles(files, ports, options = {}) {
|
|
|
1957
2028
|
eligibleTypes: ["fact", "preference", "decision", "lesson"]
|
|
1958
2029
|
}).eligibleTypes
|
|
1959
2030
|
) : null;
|
|
1960
|
-
const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await
|
|
1961
|
-
|
|
1962
|
-
|
|
1963
|
-
|
|
1964
|
-
|
|
1965
|
-
|
|
1966
|
-
|
|
1967
|
-
|
|
1968
|
-
|
|
1969
|
-
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
2031
|
+
const storeResults = resultsToStore.length === 0 ? /* @__PURE__ */ new Map() : await (async () => {
|
|
2032
|
+
options.onStageProgress?.({
|
|
2033
|
+
phase: "store_start",
|
|
2034
|
+
totalEntries: flattenEntries(resultsToStore).length
|
|
2035
|
+
});
|
|
2036
|
+
return storeExtractedResults(
|
|
2037
|
+
resultsToStore,
|
|
2038
|
+
{
|
|
2039
|
+
db: ports.db,
|
|
2040
|
+
embedding: ports.embedding
|
|
2041
|
+
},
|
|
2042
|
+
{
|
|
2043
|
+
dryRun: options.dryRun,
|
|
2044
|
+
verbose: options.verbose,
|
|
2045
|
+
precomputedEmbeddings,
|
|
2046
|
+
onWarning: options.onWarning,
|
|
2047
|
+
onBulkWriteProgress: options.onBulkWriteProgress
|
|
2048
|
+
}
|
|
2049
|
+
);
|
|
2050
|
+
})();
|
|
1974
2051
|
return {
|
|
1975
2052
|
files,
|
|
1976
2053
|
extractionRuns,
|
|
@@ -3217,7 +3294,7 @@ function registerIngestCommand(program2) {
|
|
|
3217
3294
|
registerIngestEpisodesCommand(ingestCommand);
|
|
3218
3295
|
}
|
|
3219
3296
|
function registerIngestEntriesCommand(parent) {
|
|
3220
|
-
const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency)
|
|
3297
|
+
const ingestCommand = parent.command("entries <path>", { isDefault: true }).description("Ingest OpenClaw session files into the knowledge database").option("--verbose", "Show detailed progress").option("--dry-run", "Parse and extract without storing").addOption(new Option2("--whole-file <mode>", "Whole-file mode: auto|force|never").choices(["auto", "force", "never"]).default("auto")).option("--skip-dedup", "Skip within-batch semantic dedup").addOption(new Option2("--concurrency <n>", "Max files to extract in parallel").argParser(parseConcurrency));
|
|
3221
3298
|
ingestCommand.action(async (targetPath, options) => {
|
|
3222
3299
|
const startedAt = Date.now();
|
|
3223
3300
|
let db = null;
|
|
@@ -3231,6 +3308,11 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3231
3308
|
const { provider, modelId } = resolveModel(config, "extraction");
|
|
3232
3309
|
const { provider: dedupProvider, modelId: dedupModelId } = resolveModel(config, "dedup");
|
|
3233
3310
|
const claimExtractionConfig = resolveClaimExtractionConfig(config);
|
|
3311
|
+
const effectiveConcurrency = commandInput.concurrency ?? claimExtractionConfig.concurrency ?? DEFAULT_INGEST_CONCURRENCY;
|
|
3312
|
+
const cliClaimExtractionConfig = {
|
|
3313
|
+
...claimExtractionConfig,
|
|
3314
|
+
concurrency: effectiveConcurrency
|
|
3315
|
+
};
|
|
3234
3316
|
const claimModel = claimExtractionConfig.enabled ? resolveModel(config, "claim") : null;
|
|
3235
3317
|
const llmApiKey = resolveLlmApiKey(config, provider);
|
|
3236
3318
|
const dedupApiKey = resolveLlmApiKey(config, dedupProvider);
|
|
@@ -3254,7 +3336,7 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3254
3336
|
formatLabel("Whole-file", commandInput.wholeFile),
|
|
3255
3337
|
formatLabel("Within-batch dedup", commandInput.skipDedup ? "skipped" : "enabled"),
|
|
3256
3338
|
formatLabel("Embeddings", "stored"),
|
|
3257
|
-
formatLabel("Concurrency", `${
|
|
3339
|
+
formatLabel("Concurrency", `${effectiveConcurrency}`)
|
|
3258
3340
|
].join("\n")
|
|
3259
3341
|
);
|
|
3260
3342
|
if (commandInput.dryRun) {
|
|
@@ -3277,8 +3359,8 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3277
3359
|
} : {}
|
|
3278
3360
|
},
|
|
3279
3361
|
{
|
|
3280
|
-
concurrency:
|
|
3281
|
-
claimExtractionConfig,
|
|
3362
|
+
concurrency: effectiveConcurrency,
|
|
3363
|
+
claimExtractionConfig: cliClaimExtractionConfig,
|
|
3282
3364
|
dryRun: commandInput.dryRun,
|
|
3283
3365
|
verbose: commandInput.verbose,
|
|
3284
3366
|
wholeFile: commandInput.wholeFile,
|
|
@@ -3287,7 +3369,18 @@ function registerIngestEntriesCommand(parent) {
|
|
|
3287
3369
|
onExtractionProgress: (completed, total) => {
|
|
3288
3370
|
spinner5?.message(`Processing transcripts... (${completed}/${total} extracted)`);
|
|
3289
3371
|
},
|
|
3290
|
-
|
|
3372
|
+
onStageProgress: (event) => {
|
|
3373
|
+
spinner5?.message(progressMessageForIngestStage(event));
|
|
3374
|
+
},
|
|
3375
|
+
onDedupProgress: (event) => {
|
|
3376
|
+
spinner5?.message(progressMessageForDedup(event));
|
|
3377
|
+
},
|
|
3378
|
+
onClaimExtractionProgress: (event) => {
|
|
3379
|
+
spinner5?.message(progressMessageForClaimExtraction(event));
|
|
3380
|
+
},
|
|
3381
|
+
onBulkWriteProgress: useVerboseBulkWriteProgress ? reportBulkWriteProgress : (event) => {
|
|
3382
|
+
spinner5?.message(progressMessageForBulkWrite(event.phase));
|
|
3383
|
+
}
|
|
3291
3384
|
}
|
|
3292
3385
|
);
|
|
3293
3386
|
spinner5?.stop("Ingest pipeline complete.");
|
|
@@ -3396,7 +3489,7 @@ function normalizeIngestEntriesCommand(targetPath, options) {
|
|
|
3396
3489
|
dryRun: options.dryRun === true,
|
|
3397
3490
|
wholeFile: options.wholeFile ?? "auto",
|
|
3398
3491
|
skipDedup: options.skipDedup === true,
|
|
3399
|
-
concurrency: options.concurrency
|
|
3492
|
+
concurrency: options.concurrency
|
|
3400
3493
|
};
|
|
3401
3494
|
}
|
|
3402
3495
|
function formatClaimKeyHealthSummary(summary) {
|
|
@@ -3678,6 +3771,39 @@ function emptyStoreResult2() {
|
|
|
3678
3771
|
rejected: 0
|
|
3679
3772
|
};
|
|
3680
3773
|
}
|
|
3774
|
+
function progressMessageForIngestStage(event) {
|
|
3775
|
+
switch (event.phase) {
|
|
3776
|
+
case "dedup_start":
|
|
3777
|
+
return "Deduplicating entries...";
|
|
3778
|
+
case "claim_extraction_start":
|
|
3779
|
+
return "Extracting claim keys...";
|
|
3780
|
+
case "store_start":
|
|
3781
|
+
return `Running store pipeline for ${event.totalEntries} ${pluralize2(event.totalEntries, "entry", "entries")}...`;
|
|
3782
|
+
}
|
|
3783
|
+
}
|
|
3784
|
+
function progressMessageForDedup(event) {
|
|
3785
|
+
return `Deduplicating entries... ${event.completedClusters}/${event.totalClusters} ${pluralize2(event.totalClusters, "cluster")} arbitrated (${event.completedEntries}/${event.totalEntries} entries covered)`;
|
|
3786
|
+
}
|
|
3787
|
+
function progressMessageForClaimExtraction(event) {
|
|
3788
|
+
switch (event.phase) {
|
|
3789
|
+
case "primary":
|
|
3790
|
+
return `Extracting claim keys... ${event.completedEntries}/${event.totalEntries} entries`;
|
|
3791
|
+
case "retry":
|
|
3792
|
+
return `Retrying unresolved claim keys... ${event.completedEntries}/${event.totalEntries} entries`;
|
|
3793
|
+
}
|
|
3794
|
+
}
|
|
3795
|
+
function progressMessageForBulkWrite(phase) {
|
|
3796
|
+
switch (phase) {
|
|
3797
|
+
case "prepare_start":
|
|
3798
|
+
return "Preparing database indexes for bulk ingest...";
|
|
3799
|
+
case "store_complete":
|
|
3800
|
+
return "Bulk ingest store phase complete...";
|
|
3801
|
+
case "finalize_start":
|
|
3802
|
+
return "Rebuilding indexes after bulk ingest...";
|
|
3803
|
+
case "finalize_complete":
|
|
3804
|
+
return "Bulk ingest finalization complete...";
|
|
3805
|
+
}
|
|
3806
|
+
}
|
|
3681
3807
|
function reportBulkWriteProgress(event) {
|
|
3682
3808
|
switch (event.phase) {
|
|
3683
3809
|
case "prepare_start":
|
|
@@ -5334,7 +5460,7 @@ async function runBulkIngest(files, config, prompts) {
|
|
|
5334
5460
|
spinner5.message(`Ingesting sessions... (${completed}/${total} extracted)`);
|
|
5335
5461
|
},
|
|
5336
5462
|
onBulkWriteProgress: (event) => {
|
|
5337
|
-
spinner5.message(
|
|
5463
|
+
spinner5.message(progressMessageForBulkWrite2(event.phase));
|
|
5338
5464
|
}
|
|
5339
5465
|
}
|
|
5340
5466
|
);
|
|
@@ -5433,7 +5559,7 @@ function normalizeSetupProvider(provider) {
|
|
|
5433
5559
|
function hasMeteredIngestCost(auth) {
|
|
5434
5560
|
return auth !== "openai-subscription" && auth !== "anthropic-oauth" && auth !== "anthropic-token";
|
|
5435
5561
|
}
|
|
5436
|
-
function
|
|
5562
|
+
function progressMessageForBulkWrite2(phase) {
|
|
5437
5563
|
switch (phase) {
|
|
5438
5564
|
case "prepare_start":
|
|
5439
5565
|
return "Preparing database indexes for bulk ingest...";
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "agenr",
|
|
3
|
-
"version": "1.8.
|
|
3
|
+
"version": "1.8.2",
|
|
4
4
|
"description": "Agent memory - local-first knowledge infrastructure for AI agents",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
@@ -12,24 +12,6 @@
|
|
|
12
12
|
"LICENSE",
|
|
13
13
|
"CHANGELOG.md"
|
|
14
14
|
],
|
|
15
|
-
"scripts": {
|
|
16
|
-
"build": "pnpm run build:root && pnpm run build:plugin",
|
|
17
|
-
"build:root": "tsup",
|
|
18
|
-
"build:plugin": "pnpm --filter @agenr/agenr-plugin build",
|
|
19
|
-
"build:debug": "pnpm run build:root:debug && pnpm run build:plugin:debug",
|
|
20
|
-
"build:root:debug": "tsup --sourcemap",
|
|
21
|
-
"build:plugin:debug": "pnpm --filter @agenr/agenr-plugin build:debug",
|
|
22
|
-
"dev": "tsup --watch",
|
|
23
|
-
"internal:recall-eval-server": "pnpm run build:root && node dist/internal-recall-eval-server.js",
|
|
24
|
-
"check": "pnpm format:check && pnpm lint && pnpm typecheck && pnpm test",
|
|
25
|
-
"typecheck": "tsc --noEmit",
|
|
26
|
-
"typecheck:tests": "tsc --noEmit -p tsconfig.tests.json",
|
|
27
|
-
"lint": "eslint .",
|
|
28
|
-
"format": "prettier --write .",
|
|
29
|
-
"format:check": "prettier --check .",
|
|
30
|
-
"test": "vitest run",
|
|
31
|
-
"test:watch": "vitest"
|
|
32
|
-
},
|
|
33
15
|
"dependencies": {
|
|
34
16
|
"@mariozechner/pi-agent-core": "^0.63.1",
|
|
35
17
|
"@mariozechner/pi-ai": "^0.63.2",
|
|
@@ -55,5 +37,23 @@
|
|
|
55
37
|
"engines": {
|
|
56
38
|
"node": ">=24"
|
|
57
39
|
},
|
|
58
|
-
"license": "AGPL-3.0"
|
|
59
|
-
|
|
40
|
+
"license": "AGPL-3.0",
|
|
41
|
+
"scripts": {
|
|
42
|
+
"build": "pnpm run build:root && pnpm run build:plugin",
|
|
43
|
+
"build:root": "tsup",
|
|
44
|
+
"build:plugin": "pnpm --filter @agenr/agenr-plugin build",
|
|
45
|
+
"build:debug": "pnpm run build:root:debug && pnpm run build:plugin:debug",
|
|
46
|
+
"build:root:debug": "tsup --sourcemap",
|
|
47
|
+
"build:plugin:debug": "pnpm --filter @agenr/agenr-plugin build:debug",
|
|
48
|
+
"dev": "tsup --watch",
|
|
49
|
+
"internal:recall-eval-server": "pnpm run build:root && node dist/internal-recall-eval-server.js",
|
|
50
|
+
"check": "pnpm format:check && pnpm lint && pnpm typecheck && pnpm test",
|
|
51
|
+
"typecheck": "tsc --noEmit",
|
|
52
|
+
"typecheck:tests": "tsc --noEmit -p tsconfig.tests.json",
|
|
53
|
+
"lint": "eslint .",
|
|
54
|
+
"format": "prettier --write .",
|
|
55
|
+
"format:check": "prettier --check .",
|
|
56
|
+
"test": "vitest run",
|
|
57
|
+
"test:watch": "vitest"
|
|
58
|
+
}
|
|
59
|
+
}
|