openclaw-cortex-memory 0.1.0-Alpha.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +347 -299
- package/SIGNATURE.md +7 -0
- package/SKILL.md +96 -350
- package/dist/index.d.ts +93 -23
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1234 -1318
- package/dist/index.js.map +1 -1
- package/dist/openclaw.plugin.json +377 -18
- package/dist/src/dedup/three_stage_deduplicator.d.ts.map +1 -1
- package/dist/src/dedup/three_stage_deduplicator.js +13 -3
- package/dist/src/dedup/three_stage_deduplicator.js.map +1 -1
- package/dist/src/engine/memory_engine.d.ts +6 -1
- package/dist/src/engine/memory_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.d.ts +208 -0
- package/dist/src/engine/ts_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.js +1353 -84
- package/dist/src/engine/ts_engine.js.map +1 -1
- package/dist/src/engine/types.d.ts +27 -0
- package/dist/src/engine/types.d.ts.map +1 -1
- package/dist/src/graph/ontology.d.ts +87 -15
- package/dist/src/graph/ontology.d.ts.map +1 -1
- package/dist/src/graph/ontology.js +999 -12
- package/dist/src/graph/ontology.js.map +1 -1
- package/dist/src/net/http_post.d.ts +17 -0
- package/dist/src/net/http_post.d.ts.map +1 -0
- package/dist/src/net/http_post.js +56 -0
- package/dist/src/net/http_post.js.map +1 -0
- package/dist/src/quality/llm_output_validator.d.ts +65 -0
- package/dist/src/quality/llm_output_validator.d.ts.map +1 -0
- package/dist/src/quality/llm_output_validator.js +635 -0
- package/dist/src/quality/llm_output_validator.js.map +1 -0
- package/dist/src/reflect/reflector.d.ts.map +1 -1
- package/dist/src/reflect/reflector.js +296 -26
- package/dist/src/reflect/reflector.js.map +1 -1
- package/dist/src/rules/rule_store.d.ts.map +1 -1
- package/dist/src/rules/rule_store.js +75 -16
- package/dist/src/rules/rule_store.js.map +1 -1
- package/dist/src/session/session_end.d.ts +20 -42
- package/dist/src/session/session_end.d.ts.map +1 -1
- package/dist/src/session/session_end.js +31 -214
- package/dist/src/session/session_end.js.map +1 -1
- package/dist/src/store/archive_store.d.ts +52 -7
- package/dist/src/store/archive_store.d.ts.map +1 -1
- package/dist/src/store/archive_store.js +526 -96
- package/dist/src/store/archive_store.js.map +1 -1
- package/dist/src/store/embedding_utils.d.ts +32 -0
- package/dist/src/store/embedding_utils.d.ts.map +1 -0
- package/dist/src/store/embedding_utils.js +173 -0
- package/dist/src/store/embedding_utils.js.map +1 -0
- package/dist/src/store/graph_memory_store.d.ts +115 -0
- package/dist/src/store/graph_memory_store.d.ts.map +1 -0
- package/dist/src/store/graph_memory_store.js +1061 -0
- package/dist/src/store/graph_memory_store.js.map +1 -0
- package/dist/src/store/read_store.d.ts +95 -0
- package/dist/src/store/read_store.d.ts.map +1 -1
- package/dist/src/store/read_store.js +2108 -268
- package/dist/src/store/read_store.js.map +1 -1
- package/dist/src/store/vector_store.d.ts +15 -0
- package/dist/src/store/vector_store.d.ts.map +1 -1
- package/dist/src/store/vector_store.js +75 -1
- package/dist/src/store/vector_store.js.map +1 -1
- package/dist/src/store/write_store.d.ts +46 -0
- package/dist/src/store/write_store.d.ts.map +1 -1
- package/dist/src/store/write_store.js +399 -50
- package/dist/src/store/write_store.js.map +1 -1
- package/dist/src/sync/session_sync.d.ts +115 -2
- package/dist/src/sync/session_sync.d.ts.map +1 -1
- package/dist/src/sync/session_sync.js +2497 -44
- package/dist/src/sync/session_sync.js.map +1 -1
- package/dist/src/utils/runtime_env.d.ts +4 -0
- package/dist/src/utils/runtime_env.d.ts.map +1 -0
- package/dist/src/utils/runtime_env.js +51 -0
- package/dist/src/utils/runtime_env.js.map +1 -0
- package/dist/src/wiki/wiki_linter.d.ts +26 -0
- package/dist/src/wiki/wiki_linter.d.ts.map +1 -0
- package/dist/src/wiki/wiki_linter.js +339 -0
- package/dist/src/wiki/wiki_linter.js.map +1 -0
- package/dist/src/wiki/wiki_logger.d.ts +10 -0
- package/dist/src/wiki/wiki_logger.d.ts.map +1 -0
- package/dist/src/wiki/wiki_logger.js +78 -0
- package/dist/src/wiki/wiki_logger.js.map +1 -0
- package/dist/src/wiki/wiki_maintainer.d.ts +39 -0
- package/dist/src/wiki/wiki_maintainer.d.ts.map +1 -0
- package/dist/src/wiki/wiki_maintainer.js +38 -0
- package/dist/src/wiki/wiki_maintainer.js.map +1 -0
- package/dist/src/wiki/wiki_projector.d.ts +35 -0
- package/dist/src/wiki/wiki_projector.d.ts.map +1 -0
- package/dist/src/wiki/wiki_projector.js +1151 -0
- package/dist/src/wiki/wiki_projector.js.map +1 -0
- package/dist/src/wiki/wiki_queue.d.ts +29 -0
- package/dist/src/wiki/wiki_queue.d.ts.map +1 -0
- package/dist/src/wiki/wiki_queue.js +137 -0
- package/dist/src/wiki/wiki_queue.js.map +1 -0
- package/openclaw.plugin.json +377 -18
- package/package.json +52 -5
- package/schema/graph.schema.yaml +330 -0
- package/scripts/cli.js +80 -26
- package/scripts/repair-memory.js +321 -0
- package/scripts/uninstall.js +7 -1
- package/skills/cortex-memory/SKILL.md +83 -0
- package/skills/cortex-memory/references/agent-manual.md +127 -0
- package/skills/cortex-memory/references/configuration.md +109 -0
- package/skills/cortex-memory/references/publish-checklist.md +45 -0
- package/skills/cortex-memory/references/system-prompt-template.md +27 -0
- package/skills/cortex-memory/references/tools.md +191 -0
|
@@ -36,22 +36,197 @@ Object.defineProperty(exports, "__esModule", { value: true });
|
|
|
36
36
|
exports.createArchiveStore = createArchiveStore;
|
|
37
37
|
const fs = __importStar(require("fs"));
|
|
38
38
|
const path = __importStar(require("path"));
|
|
39
|
+
const http_post_1 = require("../net/http_post");
|
|
39
40
|
const ontology_1 = require("../graph/ontology");
|
|
41
|
+
const llm_output_validator_1 = require("../quality/llm_output_validator");
|
|
40
42
|
function normalizeBaseUrl(value) {
|
|
41
43
|
if (!value)
|
|
42
44
|
return "";
|
|
43
45
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
44
46
|
}
|
|
45
|
-
function
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
if (score >= 0.75) {
|
|
49
|
-
return { score, level: "high" };
|
|
47
|
+
function resolveArchiveSourceCharLimit(value) {
|
|
48
|
+
if (typeof value === "number" && Number.isFinite(value)) {
|
|
49
|
+
return Math.max(1000, Math.floor(value));
|
|
50
50
|
}
|
|
51
|
-
|
|
52
|
-
|
|
51
|
+
return 500000;
|
|
52
|
+
}
|
|
53
|
+
function clampTailText(text, maxChars) {
|
|
54
|
+
const source = (text || "").trim();
|
|
55
|
+
if (!source)
|
|
56
|
+
return "";
|
|
57
|
+
if (!Number.isFinite(maxChars) || maxChars <= 0 || source.length <= maxChars) {
|
|
58
|
+
return source;
|
|
59
|
+
}
|
|
60
|
+
return source.slice(-Math.floor(maxChars)).trim();
|
|
61
|
+
}
|
|
62
|
+
const ARCHIVE_LOW_INFORMATION_LINE = /^(ok|okay|got it|roger|noted|sure|thanks|thank you|received|copy that|understood|好的|收到|明白|了解|谢谢|感谢|可以|行|嗯|嗯嗯|没问题)(?:\b|$)/i;
|
|
63
|
+
function denoiseArchiveSourceText(text) {
|
|
64
|
+
const raw = (text || "").trim();
|
|
65
|
+
if (!raw)
|
|
66
|
+
return "";
|
|
67
|
+
const output = [];
|
|
68
|
+
const seen = new Set();
|
|
69
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
70
|
+
const trimmed = line.trim();
|
|
71
|
+
if (!trimmed)
|
|
72
|
+
continue;
|
|
73
|
+
const content = trimmed.replace(/^\[[^\]]+\]\s*/, "").trim();
|
|
74
|
+
if (!content)
|
|
75
|
+
continue;
|
|
76
|
+
const hasSignal = /(https?:\/\/|www\.|[A-Za-z0-9._-]+\.[A-Za-z]{2,}|[`#/:\\]|@\w+|\b\d{2,}\b)/.test(content);
|
|
77
|
+
if (!hasSignal && ARCHIVE_LOW_INFORMATION_LINE.test(content)) {
|
|
78
|
+
continue;
|
|
79
|
+
}
|
|
80
|
+
const dedupKey = content.toLowerCase();
|
|
81
|
+
if (!hasSignal && seen.has(dedupKey)) {
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
seen.add(dedupKey);
|
|
85
|
+
output.push(trimmed);
|
|
86
|
+
}
|
|
87
|
+
return output.length > 0 ? output.join("\n") : raw;
|
|
88
|
+
}
|
|
89
|
+
function normalizeOneLineText(value) {
|
|
90
|
+
return String(value || "").replace(/\s+/g, " ").trim();
|
|
91
|
+
}
|
|
92
|
+
const ARCHIVE_TASK_INSTRUCTION_PATTERNS = [
|
|
93
|
+
/请|需要|帮我|麻烦|任务|需求|实现|修复|排查|优化|上线|部署|整理|编写|启用|查看|检查|分析|导入|生成|提高|改进/,
|
|
94
|
+
/please|can you|need to|task|implement|fix|investigate|optimi[sz]e|deploy|enable|review/i,
|
|
95
|
+
];
|
|
96
|
+
const ARCHIVE_COMPLETION_REPORT_PATTERNS = [
|
|
97
|
+
/已完成|完成了|处理完|搞定|已修复|修复了|已实现|已上线|已部署|结果|汇报|完成情况|报告|已通过|验证通过|测试通过/,
|
|
98
|
+
/done|completed|fixed|implemented|deployed|resolved|report|summary|finished/i,
|
|
99
|
+
];
|
|
100
|
+
const ARCHIVE_USER_ACCEPTANCE_PATTERNS = [
|
|
101
|
+
/确认|认可|通过|验收|OK|可以|好的|收到|辛苦|谢谢|没问题|就这样|接受|效果可以/,
|
|
102
|
+
/approved|accepted|looks good|great|works|thank you|confirmed/i,
|
|
103
|
+
];
|
|
104
|
+
const ARCHIVE_ACTION_PATTERNS = [
|
|
105
|
+
/决定|完成|修复|发布|上线|部署|提交|交付|验证|关闭|推进|落地|实施|启用|导入|生成|优化|改进/,
|
|
106
|
+
/decide|complete|fix|release|deploy|ship|deliver|verify|close|implement|enable|migrate/i,
|
|
107
|
+
];
|
|
108
|
+
const ARCHIVE_FAILURE_PATTERNS = [
|
|
109
|
+
/失败|报错|错误|异常|阻塞|卡住|不行|超时|回滚|故障|不通过|无法/,
|
|
110
|
+
/failed|error|exception|blocked|timeout|rollback|incident/i,
|
|
111
|
+
];
|
|
112
|
+
const ARCHIVE_SUCCESS_PATTERNS = [
|
|
113
|
+
/成功|完成|修复|解决|通过|已上线|稳定|正常|恢复|可用|生效/,
|
|
114
|
+
/success|completed|fixed|resolved|passed|stable|recovered|works/i,
|
|
115
|
+
];
|
|
116
|
+
function matchesAnyPattern(text, patterns) {
|
|
117
|
+
return patterns.some(pattern => pattern.test(text));
|
|
118
|
+
}
|
|
119
|
+
function firstMatchIndex(text, patterns) {
|
|
120
|
+
let minIndex = -1;
|
|
121
|
+
for (const pattern of patterns) {
|
|
122
|
+
const idx = text.search(pattern);
|
|
123
|
+
if (idx < 0) {
|
|
124
|
+
continue;
|
|
125
|
+
}
|
|
126
|
+
if (minIndex < 0 || idx < minIndex) {
|
|
127
|
+
minIndex = idx;
|
|
128
|
+
}
|
|
53
129
|
}
|
|
54
|
-
return
|
|
130
|
+
return minIndex;
|
|
131
|
+
}
|
|
132
|
+
function scoreQuality(args) {
|
|
133
|
+
const summary = (args.summary || "").trim();
|
|
134
|
+
const cause = (args.cause || "").trim();
|
|
135
|
+
const process = (args.process || "").trim();
|
|
136
|
+
const result = (args.result || "").trim();
|
|
137
|
+
const outcome = (args.outcome || "").trim();
|
|
138
|
+
const sourceText = (args.sourceText || "").trim();
|
|
139
|
+
const mergedText = [summary, cause, process, result, outcome, sourceText].filter(Boolean).join("\n");
|
|
140
|
+
const hasStructuredTriplet = cause.length > 0 && process.length > 0 && result.length > 0;
|
|
141
|
+
const hasTaskInstruction = matchesAnyPattern(mergedText, ARCHIVE_TASK_INSTRUCTION_PATTERNS);
|
|
142
|
+
const hasCompletionReport = matchesAnyPattern(mergedText, ARCHIVE_COMPLETION_REPORT_PATTERNS);
|
|
143
|
+
const hasUserAcceptance = matchesAnyPattern(mergedText, ARCHIVE_USER_ACCEPTANCE_PATTERNS);
|
|
144
|
+
const hasAction = matchesAnyPattern(mergedText, ARCHIVE_ACTION_PATTERNS);
|
|
145
|
+
const hasFailure = matchesAnyPattern(mergedText, ARCHIVE_FAILURE_PATTERNS);
|
|
146
|
+
const hasSuccess = matchesAnyPattern(mergedText, ARCHIVE_SUCCESS_PATTERNS);
|
|
147
|
+
const hasOutcome = outcome.length >= 6 || hasSuccess;
|
|
148
|
+
const firstFailureIdx = hasFailure ? firstMatchIndex(mergedText, ARCHIVE_FAILURE_PATTERNS) : -1;
|
|
149
|
+
const firstSuccessIdx = hasSuccess ? firstMatchIndex(mergedText, ARCHIVE_SUCCESS_PATTERNS) : -1;
|
|
150
|
+
const failThenSuccess = hasFailure && hasSuccess && firstFailureIdx >= 0 && firstSuccessIdx > firstFailureIdx;
|
|
151
|
+
const workflowComplete = hasStructuredTriplet || (hasTaskInstruction && hasCompletionReport && hasUserAcceptance);
|
|
152
|
+
let score = 0;
|
|
153
|
+
if (summary.length >= 24)
|
|
154
|
+
score += 0.1;
|
|
155
|
+
if (summary.length >= 60)
|
|
156
|
+
score += 0.1;
|
|
157
|
+
if (summary.length >= 120)
|
|
158
|
+
score += 0.06;
|
|
159
|
+
if (summary.length >= 180)
|
|
160
|
+
score += 0.04;
|
|
161
|
+
if (hasStructuredTriplet)
|
|
162
|
+
score += 0.22;
|
|
163
|
+
if (hasAction)
|
|
164
|
+
score += 0.14;
|
|
165
|
+
if (hasOutcome)
|
|
166
|
+
score += 0.12;
|
|
167
|
+
if (hasTaskInstruction)
|
|
168
|
+
score += 0.12;
|
|
169
|
+
if (hasCompletionReport)
|
|
170
|
+
score += 0.12;
|
|
171
|
+
if (hasUserAcceptance)
|
|
172
|
+
score += 0.14;
|
|
173
|
+
if (workflowComplete)
|
|
174
|
+
score += 0.12;
|
|
175
|
+
if (failThenSuccess)
|
|
176
|
+
score += 0.1;
|
|
177
|
+
const normalizedScore = Math.max(0, Math.min(1, Number(score.toFixed(2))));
|
|
178
|
+
if (normalizedScore >= 0.75) {
|
|
179
|
+
return {
|
|
180
|
+
score: normalizedScore,
|
|
181
|
+
level: "high",
|
|
182
|
+
signals: { hasStructuredTriplet, hasTaskInstruction, hasCompletionReport, hasUserAcceptance, workflowComplete, failThenSuccess },
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
if (normalizedScore >= 0.4) {
|
|
186
|
+
return {
|
|
187
|
+
score: normalizedScore,
|
|
188
|
+
level: "medium",
|
|
189
|
+
signals: { hasStructuredTriplet, hasTaskInstruction, hasCompletionReport, hasUserAcceptance, workflowComplete, failThenSuccess },
|
|
190
|
+
};
|
|
191
|
+
}
|
|
192
|
+
return {
|
|
193
|
+
score: normalizedScore,
|
|
194
|
+
level: "low",
|
|
195
|
+
signals: { hasStructuredTriplet, hasTaskInstruction, hasCompletionReport, hasUserAcceptance, workflowComplete, failThenSuccess },
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
function normalizeArchiveDedupText(value) {
|
|
199
|
+
return String(value || "")
|
|
200
|
+
.toLowerCase()
|
|
201
|
+
.replace(/[^\p{L}\p{N}:./\\#@_-]+/gu, " ")
|
|
202
|
+
.replace(/\s+/g, " ")
|
|
203
|
+
.trim();
|
|
204
|
+
}
|
|
205
|
+
function buildArchiveDedupText(args) {
|
|
206
|
+
const relationText = Array.isArray(args.relations)
|
|
207
|
+
? args.relations
|
|
208
|
+
.map(relation => `${relation.source || ""}|${relation.type || ""}|${relation.target || ""}`)
|
|
209
|
+
.filter(item => item.replace(/\|/g, "").trim().length > 0)
|
|
210
|
+
.sort()
|
|
211
|
+
.join("\n")
|
|
212
|
+
: "";
|
|
213
|
+
const entityText = Array.isArray(args.entities)
|
|
214
|
+
? args.entities.map(item => String(item || "").trim()).filter(Boolean).sort().join("\n")
|
|
215
|
+
: "";
|
|
216
|
+
return [
|
|
217
|
+
`canonical:${args.canonicalId || ""}`,
|
|
218
|
+
`event_type:${args.eventType}`,
|
|
219
|
+
`summary:${args.summary}`,
|
|
220
|
+
`cause:${args.cause || ""}`,
|
|
221
|
+
`process:${args.process || ""}`,
|
|
222
|
+
`result:${args.result || args.outcome || ""}`,
|
|
223
|
+
entityText ? `entities:${entityText}` : "",
|
|
224
|
+
relationText ? `relations:${relationText}` : "",
|
|
225
|
+
]
|
|
226
|
+
.filter(Boolean)
|
|
227
|
+
.map(normalizeArchiveDedupText)
|
|
228
|
+
.filter(Boolean)
|
|
229
|
+
.join("\n");
|
|
55
230
|
}
|
|
56
231
|
async function requestEmbedding(args) {
|
|
57
232
|
const endpoint = args.baseUrl.endsWith("/embeddings") ? args.baseUrl : `${args.baseUrl}/embeddings`;
|
|
@@ -70,24 +245,18 @@ async function requestEmbedding(args) {
|
|
|
70
245
|
: 4;
|
|
71
246
|
let lastError = null;
|
|
72
247
|
for (let attempt = 0; attempt < maxRetries; attempt += 1) {
|
|
73
|
-
const
|
|
74
|
-
|
|
248
|
+
const response = await (0, http_post_1.postJsonWithTimeout)({
|
|
249
|
+
endpoint,
|
|
250
|
+
apiKey: args.apiKey,
|
|
251
|
+
body,
|
|
252
|
+
timeoutMs,
|
|
253
|
+
});
|
|
254
|
+
if (!response.ok) {
|
|
255
|
+
lastError = new Error(response.status > 0 ? `embedding_http_${response.status}` : (response.error || "embedding_network_error"));
|
|
256
|
+
continue;
|
|
257
|
+
}
|
|
75
258
|
try {
|
|
76
|
-
const
|
|
77
|
-
method: "POST",
|
|
78
|
-
headers: {
|
|
79
|
-
"content-type": "application/json",
|
|
80
|
-
authorization: `Bearer ${args.apiKey}`,
|
|
81
|
-
},
|
|
82
|
-
body: JSON.stringify(body),
|
|
83
|
-
signal: controller.signal,
|
|
84
|
-
});
|
|
85
|
-
clearTimeout(timeoutId);
|
|
86
|
-
if (!response.ok) {
|
|
87
|
-
lastError = new Error(`embedding_http_${response.status}`);
|
|
88
|
-
continue;
|
|
89
|
-
}
|
|
90
|
-
const json = await response.json();
|
|
259
|
+
const json = (response.json || {});
|
|
91
260
|
const embedding = json?.data?.[0]?.embedding;
|
|
92
261
|
if (Array.isArray(embedding) && embedding.length > 0) {
|
|
93
262
|
return embedding.filter(item => Number.isFinite(item));
|
|
@@ -95,7 +264,6 @@ async function requestEmbedding(args) {
|
|
|
95
264
|
lastError = new Error("embedding_empty");
|
|
96
265
|
}
|
|
97
266
|
catch (error) {
|
|
98
|
-
clearTimeout(timeoutId);
|
|
99
267
|
lastError = error;
|
|
100
268
|
}
|
|
101
269
|
if (attempt < maxRetries - 1) {
|
|
@@ -113,10 +281,126 @@ function ensureDirForFile(filePath) {
|
|
|
113
281
|
fs.mkdirSync(dir, { recursive: true });
|
|
114
282
|
}
|
|
115
283
|
}
|
|
284
|
+
function estimateTokenCount(text) {
|
|
285
|
+
const parts = text
|
|
286
|
+
.split(/[\s,.;:!?,。;:!?、()()[\]{}"'`~]+/)
|
|
287
|
+
.map(part => part.trim())
|
|
288
|
+
.filter(Boolean);
|
|
289
|
+
return parts.length;
|
|
290
|
+
}
|
|
291
|
+
function inferGateSource(event) {
|
|
292
|
+
const sourceFile = (event.source_file || "").toLowerCase();
|
|
293
|
+
const actor = (event.actor || "").toLowerCase();
|
|
294
|
+
if (sourceFile.includes("session_end") || actor.includes("session_end")) {
|
|
295
|
+
return "session_end";
|
|
296
|
+
}
|
|
297
|
+
if (sourceFile.includes("sync") || actor.includes("sync")) {
|
|
298
|
+
return "sync";
|
|
299
|
+
}
|
|
300
|
+
return "manual";
|
|
301
|
+
}
|
|
302
|
+
function splitTextChunks(text, chunkSize, chunkOverlap) {
|
|
303
|
+
const normalizedSize = Number.isFinite(chunkSize) && chunkSize >= 200 ? Math.floor(chunkSize) : 600;
|
|
304
|
+
const normalizedOverlap = Number.isFinite(chunkOverlap) && chunkOverlap >= 0
|
|
305
|
+
? Math.floor(chunkOverlap)
|
|
306
|
+
: 100;
|
|
307
|
+
const overlap = Math.min(normalizedOverlap, Math.max(0, normalizedSize - 50));
|
|
308
|
+
const output = [];
|
|
309
|
+
let cursor = 0;
|
|
310
|
+
let index = 0;
|
|
311
|
+
const punctuationSet = new Set(["。", "!", "?", ".", "!", "?", "\n", ";", ";"]);
|
|
312
|
+
while (cursor < text.length) {
|
|
313
|
+
const rawEnd = Math.min(text.length, cursor + normalizedSize);
|
|
314
|
+
let end = rawEnd;
|
|
315
|
+
if (rawEnd < text.length) {
|
|
316
|
+
const backwardStart = Math.max(cursor + Math.floor(normalizedSize * 0.45), cursor + 1);
|
|
317
|
+
let found = -1;
|
|
318
|
+
for (let i = rawEnd - 1; i >= backwardStart; i -= 1) {
|
|
319
|
+
if (punctuationSet.has(text[i])) {
|
|
320
|
+
found = i + 1;
|
|
321
|
+
break;
|
|
322
|
+
}
|
|
323
|
+
}
|
|
324
|
+
if (found < 0) {
|
|
325
|
+
const forwardEnd = Math.min(text.length, rawEnd + Math.floor(normalizedSize * 0.2));
|
|
326
|
+
for (let i = rawEnd; i < forwardEnd; i += 1) {
|
|
327
|
+
if (punctuationSet.has(text[i])) {
|
|
328
|
+
found = i + 1;
|
|
329
|
+
break;
|
|
330
|
+
}
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
if (found > cursor) {
|
|
334
|
+
end = found;
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
if (end <= cursor) {
|
|
338
|
+
end = Math.min(text.length, cursor + normalizedSize);
|
|
339
|
+
}
|
|
340
|
+
const chunkText = text.slice(cursor, end).trim();
|
|
341
|
+
if (chunkText) {
|
|
342
|
+
output.push({ index, start: cursor, end, text: chunkText });
|
|
343
|
+
index += 1;
|
|
344
|
+
}
|
|
345
|
+
if (end >= text.length) {
|
|
346
|
+
break;
|
|
347
|
+
}
|
|
348
|
+
const nextCursor = Math.max(cursor + 1, end - overlap);
|
|
349
|
+
cursor = nextCursor <= cursor ? end : nextCursor;
|
|
350
|
+
}
|
|
351
|
+
return output;
|
|
352
|
+
}
|
|
353
|
+
function pickEvidenceChunks(chunks, maxCount) {
|
|
354
|
+
if (!chunks.length || maxCount <= 0)
|
|
355
|
+
return [];
|
|
356
|
+
if (chunks.length <= maxCount)
|
|
357
|
+
return chunks;
|
|
358
|
+
const picked = new Map();
|
|
359
|
+
picked.set(chunks[0].index, chunks[0]);
|
|
360
|
+
if (maxCount >= 2) {
|
|
361
|
+
const mid = chunks[Math.floor(chunks.length / 2)];
|
|
362
|
+
picked.set(mid.index, mid);
|
|
363
|
+
}
|
|
364
|
+
if (maxCount >= 3) {
|
|
365
|
+
const last = chunks[chunks.length - 1];
|
|
366
|
+
picked.set(last.index, last);
|
|
367
|
+
}
|
|
368
|
+
if (picked.size < maxCount) {
|
|
369
|
+
for (const chunk of chunks) {
|
|
370
|
+
if (!picked.has(chunk.index)) {
|
|
371
|
+
picked.set(chunk.index, chunk);
|
|
372
|
+
}
|
|
373
|
+
if (picked.size >= maxCount)
|
|
374
|
+
break;
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
return [...picked.values()].sort((a, b) => a.index - b.index).slice(0, maxCount);
|
|
378
|
+
}
|
|
379
|
+
async function mapWithConcurrency(items, maxConcurrency, mapper) {
|
|
380
|
+
if (items.length === 0) {
|
|
381
|
+
return [];
|
|
382
|
+
}
|
|
383
|
+
const concurrency = Math.max(1, Math.min(maxConcurrency, items.length));
|
|
384
|
+
const results = new Array(items.length);
|
|
385
|
+
let cursor = 0;
|
|
386
|
+
async function worker() {
|
|
387
|
+
while (true) {
|
|
388
|
+
const current = cursor;
|
|
389
|
+
cursor += 1;
|
|
390
|
+
if (current >= items.length) {
|
|
391
|
+
break;
|
|
392
|
+
}
|
|
393
|
+
results[current] = await mapper(items[current], current);
|
|
394
|
+
}
|
|
395
|
+
}
|
|
396
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
397
|
+
return results;
|
|
398
|
+
}
|
|
116
399
|
function createArchiveStore(options) {
|
|
117
400
|
const archivePath = path.join(options.memoryRoot, "sessions", "archive", "sessions.jsonl");
|
|
118
|
-
const mutationLogPath = path.join(options.memoryRoot, "
|
|
401
|
+
const mutationLogPath = path.join(options.memoryRoot, "sessions", "archive", "mutation_log.jsonl");
|
|
119
402
|
const graphSchema = (0, ontology_1.loadGraphSchema)(options.projectRoot);
|
|
403
|
+
const archiveSourceTextMaxChars = resolveArchiveSourceCharLimit(options.writePolicy?.archiveSourceTextMaxChars);
|
|
120
404
|
async function storeEvents(events) {
|
|
121
405
|
const stored = [];
|
|
122
406
|
const skipped = [];
|
|
@@ -126,92 +410,208 @@ function createArchiveStore(options) {
|
|
|
126
410
|
const lines = [];
|
|
127
411
|
const mutationLines = [];
|
|
128
412
|
for (const event of events) {
|
|
129
|
-
const
|
|
130
|
-
if (!
|
|
413
|
+
const rawSummary = normalizeOneLineText(event.summary || "");
|
|
414
|
+
if (!rawSummary) {
|
|
131
415
|
skipped.push({ summary: "", reason: "empty_summary" });
|
|
416
|
+
options.logger.info("archive_skip reason=empty_summary");
|
|
132
417
|
continue;
|
|
133
418
|
}
|
|
419
|
+
const cause = normalizeOneLineText(event.cause || "");
|
|
420
|
+
const process = normalizeOneLineText(event.process || "");
|
|
421
|
+
const result = normalizeOneLineText(event.result || event.outcome || "");
|
|
422
|
+
const summary = rawSummary;
|
|
134
423
|
const confidence = typeof event.confidence === "number"
|
|
135
424
|
? Math.max(0, Math.min(1, event.confidence))
|
|
136
425
|
: undefined;
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
continue;
|
|
145
|
-
}
|
|
146
|
-
const normalizedEventType = (0, ontology_1.normalizeEventType)(event.event_type || "insight", graphSchema);
|
|
147
|
-
const entities = Array.isArray(event.entities)
|
|
148
|
-
? [...new Set(event.entities.map(item => (typeof item === "string" ? item.trim() : "")).filter(Boolean))]
|
|
149
|
-
: [];
|
|
150
|
-
const relationValidation = (0, ontology_1.validateRelations)({
|
|
151
|
-
relations: Array.isArray(event.relations) ? event.relations : [],
|
|
152
|
-
entities,
|
|
153
|
-
entityTypes: event.entity_types,
|
|
154
|
-
schema: graphSchema,
|
|
426
|
+
const quality = scoreQuality({
|
|
427
|
+
summary,
|
|
428
|
+
cause,
|
|
429
|
+
process,
|
|
430
|
+
result,
|
|
431
|
+
outcome: event.outcome,
|
|
432
|
+
sourceText: event.source_text,
|
|
155
433
|
});
|
|
156
|
-
|
|
157
|
-
|
|
434
|
+
const gateSource = inferGateSource(event);
|
|
435
|
+
const lifecycleComplete = quality.signals.workflowComplete;
|
|
436
|
+
if (gateSource === "sync" && !quality.signals.hasStructuredTriplet) {
|
|
437
|
+
skipped.push({ summary, reason: "incomplete_cause_process_result" });
|
|
438
|
+
options.logger.info("archive_skip reason=incomplete_cause_process_result gate_source=sync");
|
|
158
439
|
continue;
|
|
159
440
|
}
|
|
160
|
-
const
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
text: summary,
|
|
169
|
-
model: embeddingModel,
|
|
170
|
-
apiKey: embeddingApiKey,
|
|
171
|
-
baseUrl: embeddingBaseUrl,
|
|
172
|
-
dimensions: options.embedding?.dimensions,
|
|
173
|
-
timeoutMs: options.embedding?.timeoutMs,
|
|
174
|
-
maxRetries: options.embedding?.maxRetries,
|
|
175
|
-
}) || undefined;
|
|
441
|
+
const archiveMinConfidence = typeof options.writePolicy?.archiveMinConfidence === "number"
|
|
442
|
+
? Math.max(0, Math.min(1, options.writePolicy.archiveMinConfidence))
|
|
443
|
+
: 0.35;
|
|
444
|
+
if (typeof confidence === "number" && confidence < archiveMinConfidence) {
|
|
445
|
+
if (!lifecycleComplete) {
|
|
446
|
+
skipped.push({ summary, reason: "low_confidence" });
|
|
447
|
+
options.logger.info("archive_skip reason=filtered_low_quality detail=low_confidence");
|
|
448
|
+
continue;
|
|
176
449
|
}
|
|
177
|
-
|
|
178
|
-
|
|
450
|
+
options.logger.info(`archive_confidence_override reason=workflow_complete confidence=${confidence.toFixed(2)} threshold=${archiveMinConfidence.toFixed(2)}`);
|
|
451
|
+
}
|
|
452
|
+
const archiveMinQualityScore = typeof options.writePolicy?.archiveMinQualityScore === "number"
|
|
453
|
+
? Math.max(0, Math.min(1, options.writePolicy.archiveMinQualityScore))
|
|
454
|
+
: 0.4;
|
|
455
|
+
if (quality.score < archiveMinQualityScore) {
|
|
456
|
+
if (!lifecycleComplete) {
|
|
457
|
+
skipped.push({ summary, reason: "low_quality" });
|
|
458
|
+
options.logger.info("archive_skip reason=filtered_low_quality detail=low_quality");
|
|
459
|
+
continue;
|
|
179
460
|
}
|
|
461
|
+
options.logger.info(`archive_quality_override reason=workflow_complete quality=${quality.score.toFixed(2)} threshold=${archiveMinQualityScore.toFixed(2)}`);
|
|
180
462
|
}
|
|
463
|
+
const normalizedEventType = (0, ontology_1.normalizeEventType)(event.event_type || "insight", graphSchema);
|
|
464
|
+
const id = `evt_${Date.now().toString(36)}_${Math.random().toString(36).slice(2, 8)}`;
|
|
465
|
+
const sourceTextRaw = typeof event.source_text === "string" ? event.source_text : "";
|
|
466
|
+
const sourceText = clampTailText(denoiseArchiveSourceText(sourceTextRaw), archiveSourceTextMaxChars);
|
|
467
|
+
const canonicalId = event.canonical_id || (0, ontology_1.buildCanonicalId)({
|
|
468
|
+
eventType: normalizedEventType,
|
|
469
|
+
summary,
|
|
470
|
+
outcome: event.outcome,
|
|
471
|
+
});
|
|
472
|
+
const dedupText = buildArchiveDedupText({
|
|
473
|
+
eventType: normalizedEventType,
|
|
474
|
+
summary,
|
|
475
|
+
cause,
|
|
476
|
+
process,
|
|
477
|
+
result,
|
|
478
|
+
outcome: event.outcome,
|
|
479
|
+
canonicalId,
|
|
480
|
+
entities: event.entities,
|
|
481
|
+
relations: event.relations,
|
|
482
|
+
});
|
|
181
483
|
const dedup = options.deduplicator.check({
|
|
182
484
|
id,
|
|
183
|
-
summary: `${normalizedEventType}: ${summary}`,
|
|
184
|
-
embedding,
|
|
485
|
+
summary: dedupText || `${normalizedEventType}: ${summary}`,
|
|
185
486
|
});
|
|
186
487
|
if (dedup.duplicate) {
|
|
187
488
|
skipped.push({ summary, reason: `duplicate_${dedup.stage || "unknown"}` });
|
|
489
|
+
options.logger.info(`archive_skip reason=duplicate_dedup_stage_${dedup.stage || "unknown"}`);
|
|
188
490
|
continue;
|
|
189
491
|
}
|
|
190
492
|
const record = {
|
|
191
|
-
...event,
|
|
192
|
-
event_type: normalizedEventType,
|
|
193
|
-
entities,
|
|
194
|
-
relations: relationValidation.accepted,
|
|
195
|
-
canonical_id: event.canonical_id || (0, ontology_1.buildCanonicalId)({
|
|
196
|
-
eventType: normalizedEventType,
|
|
197
|
-
summary,
|
|
198
|
-
entities,
|
|
199
|
-
relations: relationValidation.accepted,
|
|
200
|
-
outcome: event.outcome,
|
|
201
|
-
}),
|
|
202
|
-
actor: event.actor || "system",
|
|
203
|
-
confidence,
|
|
204
493
|
id,
|
|
205
494
|
timestamp: new Date().toISOString(),
|
|
495
|
+
layer: "archive",
|
|
496
|
+
event_type: normalizedEventType,
|
|
497
|
+
summary,
|
|
498
|
+
cause,
|
|
499
|
+
process,
|
|
500
|
+
result,
|
|
501
|
+
source_text: sourceText || undefined,
|
|
502
|
+
outcome: event.outcome,
|
|
503
|
+
session_id: event.session_id,
|
|
504
|
+
source_file: event.source_file,
|
|
505
|
+
gate_source: gateSource,
|
|
506
|
+
embedding_status: "pending",
|
|
206
507
|
quality_score: quality.score,
|
|
207
508
|
quality_level: quality.level,
|
|
208
|
-
|
|
509
|
+
char_count: (sourceText || summary).length,
|
|
510
|
+
token_count: estimateTokenCount(sourceText || summary),
|
|
511
|
+
vector_chunks_total: 0,
|
|
512
|
+
vector_chunks_ok: 0,
|
|
513
|
+
confidence,
|
|
514
|
+
source_event_id: event.source_event_id,
|
|
515
|
+
actor: event.actor || "system",
|
|
516
|
+
canonical_id: canonicalId,
|
|
209
517
|
};
|
|
518
|
+
let embedding = undefined;
|
|
519
|
+
const vectorUpsertRows = [];
|
|
520
|
+
const embeddingModel = options.embedding?.model || "";
|
|
521
|
+
const embeddingApiKey = options.embedding?.apiKey || "";
|
|
522
|
+
const embeddingBaseUrl = normalizeBaseUrl(options.embedding?.baseURL || options.embedding?.baseUrl);
|
|
523
|
+
const maxParallel = 6;
|
|
524
|
+
if (embeddingModel && embeddingApiKey && embeddingBaseUrl) {
|
|
525
|
+
const chunkSize = options.vectorChunking?.chunkSize ?? 600;
|
|
526
|
+
const chunkOverlap = options.vectorChunking?.chunkOverlap ?? 100;
|
|
527
|
+
const evidenceMaxChunks = typeof options.vectorChunking?.evidenceMaxChunks === "number"
|
|
528
|
+
? Math.max(0, Math.min(8, Math.floor(options.vectorChunking.evidenceMaxChunks)))
|
|
529
|
+
: 2;
|
|
530
|
+
const summaryText = (record.summary || "").trim();
|
|
531
|
+
const evidenceChunks = record.source_text
|
|
532
|
+
? pickEvidenceChunks(splitTextChunks(record.source_text, chunkSize, chunkOverlap), evidenceMaxChunks)
|
|
533
|
+
: [];
|
|
534
|
+
const summaryChunk = summaryText
|
|
535
|
+
? [
|
|
536
|
+
{
|
|
537
|
+
text: summaryText,
|
|
538
|
+
source_field: "summary",
|
|
539
|
+
index: 0,
|
|
540
|
+
total: 1 + evidenceChunks.length,
|
|
541
|
+
start: 0,
|
|
542
|
+
end: summaryText.length,
|
|
543
|
+
},
|
|
544
|
+
]
|
|
545
|
+
: [];
|
|
546
|
+
const embeddingInputs = [
|
|
547
|
+
...summaryChunk,
|
|
548
|
+
...evidenceChunks.map((chunk, idx) => ({
|
|
549
|
+
text: chunk.text,
|
|
550
|
+
source_field: "evidence",
|
|
551
|
+
index: idx + summaryChunk.length,
|
|
552
|
+
total: summaryChunk.length + evidenceChunks.length,
|
|
553
|
+
start: chunk.start,
|
|
554
|
+
end: chunk.end,
|
|
555
|
+
})),
|
|
556
|
+
];
|
|
557
|
+
record.vector_chunks_total = embeddingInputs.length;
|
|
558
|
+
const chunkEmbeddings = await mapWithConcurrency(embeddingInputs, maxParallel, async (chunk) => {
|
|
559
|
+
try {
|
|
560
|
+
const chunkEmbedding = await requestEmbedding({
|
|
561
|
+
text: chunk.text,
|
|
562
|
+
model: embeddingModel,
|
|
563
|
+
apiKey: embeddingApiKey,
|
|
564
|
+
baseUrl: embeddingBaseUrl,
|
|
565
|
+
dimensions: options.embedding?.dimensions,
|
|
566
|
+
timeoutMs: options.embedding?.timeoutMs,
|
|
567
|
+
maxRetries: options.embedding?.maxRetries,
|
|
568
|
+
}) || undefined;
|
|
569
|
+
if (chunkEmbedding && chunkEmbedding.length > 0) {
|
|
570
|
+
return {
|
|
571
|
+
chunk,
|
|
572
|
+
embedding: chunkEmbedding,
|
|
573
|
+
};
|
|
574
|
+
}
|
|
575
|
+
return null;
|
|
576
|
+
}
|
|
577
|
+
catch (error) {
|
|
578
|
+
options.logger.warn(`Archive chunk embedding failed id=${id} chunk=${chunk.index} field=${chunk.source_field} error=${error}`);
|
|
579
|
+
return null;
|
|
580
|
+
}
|
|
581
|
+
});
|
|
582
|
+
const validEmbeddings = chunkEmbeddings
|
|
583
|
+
.filter((item) => Boolean(item))
|
|
584
|
+
.sort((a, b) => a.chunk.index - b.chunk.index);
|
|
585
|
+
const primary = validEmbeddings.find(item => item.chunk.source_field === "summary");
|
|
586
|
+
if (primary) {
|
|
587
|
+
embedding = primary.embedding;
|
|
588
|
+
}
|
|
589
|
+
else if (validEmbeddings.length > 0) {
|
|
590
|
+
embedding = validEmbeddings[0].embedding;
|
|
591
|
+
}
|
|
592
|
+
for (const item of validEmbeddings) {
|
|
593
|
+
vectorUpsertRows.push({
|
|
594
|
+
id: `${id}_c${item.chunk.index}`,
|
|
595
|
+
summary: item.chunk.text,
|
|
596
|
+
embedding: item.embedding,
|
|
597
|
+
source_field: item.chunk.source_field,
|
|
598
|
+
chunk_index: item.chunk.index,
|
|
599
|
+
chunk_total: item.chunk.total,
|
|
600
|
+
chunk_start: item.chunk.start,
|
|
601
|
+
chunk_end: item.chunk.end,
|
|
602
|
+
});
|
|
603
|
+
}
|
|
604
|
+
record.vector_chunks_ok = validEmbeddings.length;
|
|
605
|
+
record.embedding_status = record.vector_chunks_total > 0 && record.vector_chunks_ok === record.vector_chunks_total
|
|
606
|
+
? "ok"
|
|
607
|
+
: "failed";
|
|
608
|
+
}
|
|
609
|
+
record.embedding = embedding;
|
|
210
610
|
lines.push(JSON.stringify(record));
|
|
211
611
|
stored.push(record);
|
|
212
612
|
options.deduplicator.append({
|
|
213
613
|
id: record.id,
|
|
214
|
-
summary: `${record.event_type}: ${summary}`,
|
|
614
|
+
summary: dedupText || `${record.event_type}: ${summary}`,
|
|
215
615
|
embedding: embedding,
|
|
216
616
|
});
|
|
217
617
|
mutationLines.push(JSON.stringify({
|
|
@@ -224,24 +624,54 @@ function createArchiveStore(options) {
|
|
|
224
624
|
event_type: record.event_type,
|
|
225
625
|
summary: record.summary,
|
|
226
626
|
}));
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
627
|
+
options.logger.info(`archive_write reason=archived_success gate_source=${record.gate_source} id=${record.id}`);
|
|
628
|
+
if (vectorUpsertRows.length > 0) {
|
|
629
|
+
await options.vectorStore.deleteBySourceMemory({ layer: "archive", sourceMemoryId: record.id });
|
|
630
|
+
const upsertResults = await mapWithConcurrency(vectorUpsertRows, maxParallel, async (chunkRow) => {
|
|
631
|
+
try {
|
|
632
|
+
await options.vectorStore.upsert({
|
|
633
|
+
id: chunkRow.id,
|
|
634
|
+
session_id: record.session_id,
|
|
635
|
+
event_type: record.event_type,
|
|
636
|
+
summary: chunkRow.summary,
|
|
637
|
+
timestamp: record.timestamp,
|
|
638
|
+
outcome: record.outcome,
|
|
639
|
+
embedding: chunkRow.embedding,
|
|
640
|
+
quality_score: record.quality_score,
|
|
641
|
+
layer: "archive",
|
|
642
|
+
source_memory_id: record.id,
|
|
643
|
+
source_memory_canonical_id: record.canonical_id,
|
|
644
|
+
source_event_id: record.source_event_id || record.id,
|
|
645
|
+
source_field: chunkRow.source_field,
|
|
646
|
+
char_count: chunkRow.summary.length,
|
|
647
|
+
token_count: estimateTokenCount(chunkRow.summary),
|
|
648
|
+
chunk_index: chunkRow.chunk_index,
|
|
649
|
+
chunk_total: chunkRow.chunk_total,
|
|
650
|
+
chunk_start: chunkRow.chunk_start,
|
|
651
|
+
chunk_end: chunkRow.chunk_end,
|
|
652
|
+
});
|
|
653
|
+
return true;
|
|
654
|
+
}
|
|
655
|
+
catch (error) {
|
|
656
|
+
options.logger.warn(`Archive chunk upsert failed id=${record.id} chunk=${chunkRow.chunk_index} error=${error}`);
|
|
657
|
+
return false;
|
|
658
|
+
}
|
|
239
659
|
});
|
|
660
|
+
const upsertOk = upsertResults.filter(Boolean).length;
|
|
661
|
+
if (upsertOk !== vectorUpsertRows.length) {
|
|
662
|
+
options.logger.warn(`archive_vector_upsert_partial id=${record.id} ok=${upsertOk}/${vectorUpsertRows.length}`);
|
|
663
|
+
}
|
|
240
664
|
}
|
|
241
665
|
}
|
|
242
666
|
if (lines.length > 0) {
|
|
243
667
|
ensureDirForFile(archivePath);
|
|
244
668
|
fs.appendFileSync(archivePath, `${lines.join("\n")}\n`, "utf-8");
|
|
669
|
+
for (let i = 0; i < lines.length; i++) {
|
|
670
|
+
const validation = (0, llm_output_validator_1.validateJsonlLine)(lines[i]);
|
|
671
|
+
if (!validation.valid && validation.errors.length > 0) {
|
|
672
|
+
options.logger.warn(`archive_write_integrity_check_failed line=${i} errors=${validation.errors.join("|")}`);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
245
675
|
ensureDirForFile(mutationLogPath);
|
|
246
676
|
fs.appendFileSync(mutationLogPath, `${mutationLines.join("\n")}\n`, "utf-8");
|
|
247
677
|
}
|