openclaw-cortex-memory 0.1.0-Alpha.8 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +347 -299
- package/SIGNATURE.md +7 -0
- package/SKILL.md +96 -350
- package/dist/index.d.ts +93 -23
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1234 -1318
- package/dist/index.js.map +1 -1
- package/dist/openclaw.plugin.json +377 -18
- package/dist/src/dedup/three_stage_deduplicator.d.ts.map +1 -1
- package/dist/src/dedup/three_stage_deduplicator.js +13 -3
- package/dist/src/dedup/three_stage_deduplicator.js.map +1 -1
- package/dist/src/engine/memory_engine.d.ts +6 -1
- package/dist/src/engine/memory_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.d.ts +208 -0
- package/dist/src/engine/ts_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.js +1353 -84
- package/dist/src/engine/ts_engine.js.map +1 -1
- package/dist/src/engine/types.d.ts +27 -0
- package/dist/src/engine/types.d.ts.map +1 -1
- package/dist/src/graph/ontology.d.ts +87 -15
- package/dist/src/graph/ontology.d.ts.map +1 -1
- package/dist/src/graph/ontology.js +999 -12
- package/dist/src/graph/ontology.js.map +1 -1
- package/dist/src/net/http_post.d.ts +17 -0
- package/dist/src/net/http_post.d.ts.map +1 -0
- package/dist/src/net/http_post.js +56 -0
- package/dist/src/net/http_post.js.map +1 -0
- package/dist/src/quality/llm_output_validator.d.ts +65 -0
- package/dist/src/quality/llm_output_validator.d.ts.map +1 -0
- package/dist/src/quality/llm_output_validator.js +635 -0
- package/dist/src/quality/llm_output_validator.js.map +1 -0
- package/dist/src/reflect/reflector.d.ts.map +1 -1
- package/dist/src/reflect/reflector.js +296 -26
- package/dist/src/reflect/reflector.js.map +1 -1
- package/dist/src/rules/rule_store.d.ts.map +1 -1
- package/dist/src/rules/rule_store.js +75 -16
- package/dist/src/rules/rule_store.js.map +1 -1
- package/dist/src/session/session_end.d.ts +20 -42
- package/dist/src/session/session_end.d.ts.map +1 -1
- package/dist/src/session/session_end.js +31 -214
- package/dist/src/session/session_end.js.map +1 -1
- package/dist/src/store/archive_store.d.ts +52 -7
- package/dist/src/store/archive_store.d.ts.map +1 -1
- package/dist/src/store/archive_store.js +526 -96
- package/dist/src/store/archive_store.js.map +1 -1
- package/dist/src/store/embedding_utils.d.ts +32 -0
- package/dist/src/store/embedding_utils.d.ts.map +1 -0
- package/dist/src/store/embedding_utils.js +173 -0
- package/dist/src/store/embedding_utils.js.map +1 -0
- package/dist/src/store/graph_memory_store.d.ts +115 -0
- package/dist/src/store/graph_memory_store.d.ts.map +1 -0
- package/dist/src/store/graph_memory_store.js +1061 -0
- package/dist/src/store/graph_memory_store.js.map +1 -0
- package/dist/src/store/read_store.d.ts +95 -0
- package/dist/src/store/read_store.d.ts.map +1 -1
- package/dist/src/store/read_store.js +2108 -268
- package/dist/src/store/read_store.js.map +1 -1
- package/dist/src/store/vector_store.d.ts +15 -0
- package/dist/src/store/vector_store.d.ts.map +1 -1
- package/dist/src/store/vector_store.js +75 -1
- package/dist/src/store/vector_store.js.map +1 -1
- package/dist/src/store/write_store.d.ts +46 -0
- package/dist/src/store/write_store.d.ts.map +1 -1
- package/dist/src/store/write_store.js +399 -50
- package/dist/src/store/write_store.js.map +1 -1
- package/dist/src/sync/session_sync.d.ts +115 -2
- package/dist/src/sync/session_sync.d.ts.map +1 -1
- package/dist/src/sync/session_sync.js +2497 -44
- package/dist/src/sync/session_sync.js.map +1 -1
- package/dist/src/utils/runtime_env.d.ts +4 -0
- package/dist/src/utils/runtime_env.d.ts.map +1 -0
- package/dist/src/utils/runtime_env.js +51 -0
- package/dist/src/utils/runtime_env.js.map +1 -0
- package/dist/src/wiki/wiki_linter.d.ts +26 -0
- package/dist/src/wiki/wiki_linter.d.ts.map +1 -0
- package/dist/src/wiki/wiki_linter.js +339 -0
- package/dist/src/wiki/wiki_linter.js.map +1 -0
- package/dist/src/wiki/wiki_logger.d.ts +10 -0
- package/dist/src/wiki/wiki_logger.d.ts.map +1 -0
- package/dist/src/wiki/wiki_logger.js +78 -0
- package/dist/src/wiki/wiki_logger.js.map +1 -0
- package/dist/src/wiki/wiki_maintainer.d.ts +39 -0
- package/dist/src/wiki/wiki_maintainer.d.ts.map +1 -0
- package/dist/src/wiki/wiki_maintainer.js +38 -0
- package/dist/src/wiki/wiki_maintainer.js.map +1 -0
- package/dist/src/wiki/wiki_projector.d.ts +35 -0
- package/dist/src/wiki/wiki_projector.d.ts.map +1 -0
- package/dist/src/wiki/wiki_projector.js +1151 -0
- package/dist/src/wiki/wiki_projector.js.map +1 -0
- package/dist/src/wiki/wiki_queue.d.ts +29 -0
- package/dist/src/wiki/wiki_queue.d.ts.map +1 -0
- package/dist/src/wiki/wiki_queue.js +137 -0
- package/dist/src/wiki/wiki_queue.js.map +1 -0
- package/openclaw.plugin.json +377 -18
- package/package.json +52 -5
- package/schema/graph.schema.yaml +330 -0
- package/scripts/cli.js +80 -26
- package/scripts/repair-memory.js +321 -0
- package/scripts/uninstall.js +7 -1
- package/skills/cortex-memory/SKILL.md +83 -0
- package/skills/cortex-memory/references/agent-manual.md +127 -0
- package/skills/cortex-memory/references/configuration.md +109 -0
- package/skills/cortex-memory/references/publish-checklist.md +45 -0
- package/skills/cortex-memory/references/system-prompt-template.md +27 -0
- package/skills/cortex-memory/references/tools.md +191 -0
|
@@ -8,6 +8,8 @@ export interface WriteMemoryArgs {
|
|
|
8
8
|
role: string;
|
|
9
9
|
source: string;
|
|
10
10
|
sessionId: string;
|
|
11
|
+
summary?: string;
|
|
12
|
+
sourceText?: string;
|
|
11
13
|
}
|
|
12
14
|
export interface WriteMemoryResult {
|
|
13
15
|
status: "ok" | "skipped";
|
|
@@ -33,6 +35,50 @@ interface WriteStoreOptions {
|
|
|
33
35
|
timeoutMs?: number;
|
|
34
36
|
maxRetries?: number;
|
|
35
37
|
};
|
|
38
|
+
vectorChunking?: {
|
|
39
|
+
chunkSize?: number;
|
|
40
|
+
chunkOverlap?: number;
|
|
41
|
+
};
|
|
42
|
+
writePolicy?: {
|
|
43
|
+
activeMinQualityScore?: number;
|
|
44
|
+
activeDedupTailLines?: number;
|
|
45
|
+
activeTextMaxChars?: number;
|
|
46
|
+
};
|
|
47
|
+
vectorStore?: {
|
|
48
|
+
upsert(record: {
|
|
49
|
+
id: string;
|
|
50
|
+
session_id: string;
|
|
51
|
+
event_type: string;
|
|
52
|
+
summary: string;
|
|
53
|
+
timestamp: string;
|
|
54
|
+
layer: "active" | "archive";
|
|
55
|
+
source_memory_id: string;
|
|
56
|
+
source_memory_canonical_id?: string;
|
|
57
|
+
source_event_id?: string;
|
|
58
|
+
source_field?: "summary" | "evidence";
|
|
59
|
+
outcome?: string;
|
|
60
|
+
entities?: string[];
|
|
61
|
+
relations?: Array<{
|
|
62
|
+
source: string;
|
|
63
|
+
target: string;
|
|
64
|
+
type: string;
|
|
65
|
+
evidence_span?: string;
|
|
66
|
+
confidence?: number;
|
|
67
|
+
}>;
|
|
68
|
+
embedding: number[];
|
|
69
|
+
quality_score: number;
|
|
70
|
+
char_count: number;
|
|
71
|
+
token_count: number;
|
|
72
|
+
chunk_index?: number;
|
|
73
|
+
chunk_total?: number;
|
|
74
|
+
chunk_start?: number;
|
|
75
|
+
chunk_end?: number;
|
|
76
|
+
}): Promise<void>;
|
|
77
|
+
deleteBySourceMemory(args: {
|
|
78
|
+
layer: "active" | "archive";
|
|
79
|
+
sourceMemoryId: string;
|
|
80
|
+
}): Promise<void>;
|
|
81
|
+
};
|
|
36
82
|
}
|
|
37
83
|
export declare function createWriteStore(options: WriteStoreOptions): {
|
|
38
84
|
writeMemory(args: WriteMemoryArgs): Promise<WriteMemoryResult>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"write_store.d.ts","sourceRoot":"","sources":["../../../src/store/write_store.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"write_store.d.ts","sourceRoot":"","sources":["../../../src/store/write_store.ts"],"names":[],"mappings":"AAMA,UAAU,UAAU;IAClB,KAAK,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACrD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACpD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;CACrD;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,IAAI,GAAG,SAAS,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE;QACR,KAAK,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QACjC,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED,UAAU,iBAAiB;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,SAAS,CAAC,EAAE;QACV,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,cAAc,CAAC,EAAE;QACf,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;IACF,WAAW,CAAC,EAAE;QACZ,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,oBAAoB,CAAC,EAAE,MAAM,CAAC;QAC9B,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,WAAW,CAAC,EAAE;QACZ,MAAM,CAAC,MAAM,EAAE;YACb,EAAE,EAAE,MAAM,CAAC;YACX,UAAU,EAAE,MAAM,CAAC;YACnB,UAAU,EAAE,MAAM,CAAC;YACnB,OAAO,EAAE,MAAM,CAAC;YAChB,SAAS,EAAE,MAAM,CAAC;YAClB,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;YAC5B,gBAAgB,EAAE,MAAM,CAAC;YACzB,0BAA0B,CAAC,EAAE,MAAM,CAAC;YACpC,eAAe,CAAC,EAAE,MAAM,CAAC;YACzB,YAAY,CAAC,EAAE,SAAS,GAAG,UAAU,CAAC;YACtC,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;YACpB,SAAS,CAAC,EAAE,KAAK,CAAC;gBAAE,MAAM,EAAE,MAAM,CAAC;gBAAC,MAAM,EAAE,MAAM,CAAC;gBAAC,IAAI,EAAE,MAAM,CAAC;gBAAC,aAAa,CAAC,EAAE,MAAM,CAAC;gBAAC,UAAU,CAAC,EAAE,MAAM,CAAA;aAAE,CAAC,CAAC;YACjH,SAAS,EAAE,MAAM,EAAE,CAAC;YACpB,aAAa,EAAE,MAAM,CAAC;YACtB,UAAU,EAAE,MAAM,CAAC;YACnB,WAAW,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,SAAS,CAAC,EAAE,MAAM,CAAC;SACpB,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAClB,oBAAoB,CAAC,IAAI,EAAE;YAAE,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;YAAC,cAAc,EAAE,MAAM,CAAA;SAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;KACpG,CAAC;CACH;AAuYD,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG;IAAE,WAAW,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAA;CAAE,CA2L/H"}
|
|
@@ -37,25 +37,184 @@ exports.createWriteStore = createWriteStore;
|
|
|
37
37
|
const crypto = __importStar(require("crypto"));
|
|
38
38
|
const fs = __importStar(require("fs"));
|
|
39
39
|
const path = __importStar(require("path"));
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
const http_post_1 = require("../net/http_post");
|
|
41
|
+
const llm_output_validator_1 = require("../quality/llm_output_validator");
|
|
42
|
+
const ACTIVE_LOW_INFORMATION_LINE = /^(ok|okay|got it|roger|noted|sure|thanks|thank you|received|copy that|understood|好的|收到|明白|了解|谢谢|感谢|可以|行|嗯|嗯嗯|没问题)(?:\b|$)/i;
|
|
43
|
+
const ACTIVE_LOW_VALUE_ONLY = /^(ok|okay|got it|roger|noted|thanks|thank you|received|copy that|understood|sounds good|好的|收到|明白|了解|谢谢|感谢|可以|行|嗯|嗯嗯|没问题|辛苦了)[\s.!?,。!?、]*$/i;
|
|
44
|
+
const ACTIVE_SEMANTIC_SIGNAL = /(decision|trade-?off|constraint|requirement|fix|error|exception|blocked|rollback|deploy|progress|milestone|action item|owner|next step|todo|deadline|eta|issue|bug|metric|latency|error rate|cost|url|link|path|file|config|parameter|version|commit|pr|ticket|test|verify|passed|failed|success|import|memory|wiki|决策|决定|取舍|约束|需求|要求|修复|错误|异常|阻塞|回滚|部署|进展|里程碑|行动项|负责人|下一步|待办|截止|问题|缺陷|指标|延迟|成本|链接|路径|文件|配置|参数|版本|提交|工单|测试|验证|通过|失败|成功|优化|导入|记忆|wiki)/i;
|
|
45
|
+
const ACTIVE_EVIDENCE_SIGNAL = /(https?:\/\/|www\.|[`#/:\\]|[A-Za-z]:\\|\/[A-Za-z0-9._\-\/]+|\b\d+(?:\.\d+)?%?\b|#\d{1,8}|npm run|pnpm |yarn |node |git )/i;
|
|
46
|
+
const ACTIVE_WORKFLOW_SIGNAL = /(done|completed|fixed|implemented|resolved|passed|verified|accepted|approved|已完成|完成了|修复了|实现了|已修复|已实现|已通过|通过了|验证通过|测试通过|接受|确认)/i;
|
|
47
|
+
function denoiseActiveText(input) {
|
|
48
|
+
const raw = (input || "").trim();
|
|
49
|
+
if (!raw)
|
|
50
|
+
return "";
|
|
51
|
+
const output = [];
|
|
52
|
+
const seen = new Set();
|
|
53
|
+
for (const line of raw.split(/\r?\n/)) {
|
|
54
|
+
const trimmed = line.trim();
|
|
55
|
+
if (!trimmed)
|
|
56
|
+
continue;
|
|
57
|
+
const content = trimmed.replace(/^\[[^\]]+\]\s*/, "").trim();
|
|
58
|
+
if (!content)
|
|
59
|
+
continue;
|
|
60
|
+
const hasSignal = /(https?:\/\/|www\.|[A-Za-z0-9._-]+\.[A-Za-z]{2,}|[`#/:\\]|@\w+|\b\d{2,}\b)/.test(content);
|
|
61
|
+
if (!hasSignal && ACTIVE_LOW_INFORMATION_LINE.test(content)) {
|
|
62
|
+
continue;
|
|
63
|
+
}
|
|
64
|
+
const dedupKey = content.toLowerCase();
|
|
65
|
+
if (!hasSignal && seen.has(dedupKey)) {
|
|
66
|
+
continue;
|
|
67
|
+
}
|
|
68
|
+
seen.add(dedupKey);
|
|
69
|
+
output.push(trimmed);
|
|
70
|
+
}
|
|
71
|
+
return output.length > 0 ? output.join("\n") : raw;
|
|
72
|
+
}
|
|
73
|
+
function normalizeText(input, maxChars) {
|
|
74
|
+
const cleaned = denoiseActiveText(input);
|
|
75
|
+
if (!cleaned)
|
|
76
|
+
return "";
|
|
77
|
+
if (!Number.isFinite(maxChars) || maxChars <= 0 || cleaned.length <= maxChars) {
|
|
78
|
+
return cleaned;
|
|
79
|
+
}
|
|
80
|
+
return cleaned.slice(-Math.floor(maxChars)).trim();
|
|
81
|
+
}
|
|
82
|
+
function normalizeSummary(input) {
|
|
83
|
+
return String(input || "").replace(/\s+/g, " ").trim();
|
|
42
84
|
}
|
|
43
|
-
function
|
|
85
|
+
function normalizeSemanticText(input) {
|
|
86
|
+
return String(input || "")
|
|
87
|
+
.replace(/^\[[^\]]+\]\s*/gm, "")
|
|
88
|
+
.toLowerCase()
|
|
89
|
+
.replace(/[^\p{L}\p{N}:./\\#@_-]+/gu, " ")
|
|
90
|
+
.replace(/\s+/g, " ")
|
|
91
|
+
.trim();
|
|
92
|
+
}
|
|
93
|
+
function buildSimilarityTokens(input) {
|
|
94
|
+
const normalized = normalizeSemanticText(input);
|
|
95
|
+
if (!normalized)
|
|
96
|
+
return [];
|
|
97
|
+
const tokens = normalized.split(/\s+/).filter(Boolean);
|
|
98
|
+
const output = new Set();
|
|
99
|
+
for (const token of tokens) {
|
|
100
|
+
output.add(token);
|
|
101
|
+
const cjkChars = [...token].filter(char => /[\u3400-\u9fff]/.test(char));
|
|
102
|
+
if (cjkChars.length > 1) {
|
|
103
|
+
for (let index = 0; index < cjkChars.length - 1; index += 1) {
|
|
104
|
+
output.add(`${cjkChars[index]}${cjkChars[index + 1]}`);
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return [...output];
|
|
109
|
+
}
|
|
110
|
+
function hashToken64(value) {
|
|
111
|
+
const digest = crypto.createHash("sha1").update(value).digest();
|
|
112
|
+
let output = 0n;
|
|
113
|
+
for (let index = 0; index < 8; index += 1) {
|
|
114
|
+
output = (output << 8n) + BigInt(digest[index]);
|
|
115
|
+
}
|
|
116
|
+
return output;
|
|
117
|
+
}
|
|
118
|
+
function computeSimhashHex(text) {
|
|
119
|
+
const tokens = buildSimilarityTokens(text);
|
|
120
|
+
const vector = Array.from({ length: 64 }, () => 0);
|
|
121
|
+
for (const token of tokens) {
|
|
122
|
+
const hash = hashToken64(token);
|
|
123
|
+
for (let bit = 0; bit < 64; bit += 1) {
|
|
124
|
+
vector[bit] += (hash & (1n << BigInt(bit))) !== 0n ? 1 : -1;
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
let result = 0n;
|
|
128
|
+
for (let bit = 0; bit < 64; bit += 1) {
|
|
129
|
+
if (vector[bit] >= 0) {
|
|
130
|
+
result |= 1n << BigInt(bit);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return result.toString(16).padStart(16, "0");
|
|
134
|
+
}
|
|
135
|
+
function hammingDistanceHex(left, right) {
|
|
136
|
+
let value = BigInt(`0x${left || "0"}`) ^ BigInt(`0x${right || "0"}`);
|
|
137
|
+
let count = 0;
|
|
138
|
+
while (value > 0n) {
|
|
139
|
+
count += Number(value & 1n);
|
|
140
|
+
value >>= 1n;
|
|
141
|
+
}
|
|
142
|
+
return count;
|
|
143
|
+
}
|
|
144
|
+
function seedHash(seed, token) {
|
|
145
|
+
return crypto.createHash("sha1").update(`${seed}:${token}`).digest().readUInt32BE(0);
|
|
146
|
+
}
|
|
147
|
+
function computeMinhash(text, signatures = 48) {
|
|
148
|
+
const tokens = buildSimilarityTokens(text);
|
|
149
|
+
if (tokens.length === 0)
|
|
150
|
+
return Array.from({ length: signatures }, () => 0);
|
|
151
|
+
const output = [];
|
|
152
|
+
for (let seed = 0; seed < signatures; seed += 1) {
|
|
153
|
+
let min = Number.MAX_SAFE_INTEGER;
|
|
154
|
+
for (const token of tokens) {
|
|
155
|
+
const value = seedHash(seed, token);
|
|
156
|
+
if (value < min)
|
|
157
|
+
min = value;
|
|
158
|
+
}
|
|
159
|
+
output.push(min === Number.MAX_SAFE_INTEGER ? 0 : min);
|
|
160
|
+
}
|
|
161
|
+
return output;
|
|
162
|
+
}
|
|
163
|
+
function minhashSimilarity(left, right) {
|
|
164
|
+
if (left.length === 0 || right.length === 0)
|
|
165
|
+
return 0;
|
|
166
|
+
const size = Math.min(left.length, right.length);
|
|
167
|
+
let same = 0;
|
|
168
|
+
for (let index = 0; index < size; index += 1) {
|
|
169
|
+
if (left[index] === right[index])
|
|
170
|
+
same += 1;
|
|
171
|
+
}
|
|
172
|
+
return same / size;
|
|
173
|
+
}
|
|
174
|
+
function buildActiveDedupText(args) {
|
|
175
|
+
const summary = normalizeSummary(args.summary || "");
|
|
176
|
+
const text = normalizeSummary(args.text || "");
|
|
177
|
+
const sourceText = normalizeSummary(args.sourceText || "");
|
|
178
|
+
const base = summary || text;
|
|
179
|
+
return base || sourceText;
|
|
180
|
+
}
|
|
181
|
+
function scoreQuality(args) {
|
|
182
|
+
const text = args.text;
|
|
183
|
+
const summary = normalizeSummary(args.summary || "");
|
|
184
|
+
const sourceText = normalizeSummary(args.sourceText || "");
|
|
185
|
+
const merged = [summary, text, sourceText].filter(Boolean).join("\n");
|
|
186
|
+
const normalized = normalizeSemanticText(merged);
|
|
187
|
+
if (!normalized || ACTIVE_LOW_VALUE_ONLY.test(normalized)) {
|
|
188
|
+
return { score: 0, level: "low" };
|
|
189
|
+
}
|
|
44
190
|
const length = text.length;
|
|
45
|
-
const uniqueChars = new Set(
|
|
191
|
+
const uniqueChars = new Set(normalized).size;
|
|
192
|
+
const hasSemanticSignal = ACTIVE_SEMANTIC_SIGNAL.test(merged);
|
|
193
|
+
const hasEvidence = ACTIVE_EVIDENCE_SIGNAL.test(merged);
|
|
194
|
+
const hasWorkflowSignal = ACTIVE_WORKFLOW_SIGNAL.test(merged);
|
|
46
195
|
let score = 0;
|
|
47
196
|
if (length >= 20)
|
|
48
|
-
score += 0.
|
|
197
|
+
score += 0.18;
|
|
49
198
|
if (length >= 60)
|
|
50
|
-
score += 0.
|
|
199
|
+
score += 0.14;
|
|
51
200
|
if (length >= 120)
|
|
52
|
-
score += 0.
|
|
201
|
+
score += 0.1;
|
|
53
202
|
if (uniqueChars >= 10)
|
|
54
|
-
score += 0.
|
|
55
|
-
if (
|
|
56
|
-
score += 0.
|
|
57
|
-
if (
|
|
58
|
-
score += 0.
|
|
203
|
+
score += 0.08;
|
|
204
|
+
if (uniqueChars >= 24)
|
|
205
|
+
score += 0.04;
|
|
206
|
+
if (hasSemanticSignal)
|
|
207
|
+
score += 0.26;
|
|
208
|
+
if (hasEvidence)
|
|
209
|
+
score += 0.16;
|
|
210
|
+
if (hasWorkflowSignal)
|
|
211
|
+
score += 0.12;
|
|
212
|
+
if (summary && sourceText && sourceText.toLowerCase().includes(summary.toLowerCase().slice(0, Math.min(32, summary.length)))) {
|
|
213
|
+
score += 0.06;
|
|
214
|
+
}
|
|
215
|
+
if (!hasSemanticSignal && !hasEvidence && !hasWorkflowSignal) {
|
|
216
|
+
score = Math.min(score, 0.35);
|
|
217
|
+
}
|
|
59
218
|
const normalizedScore = Math.max(0, Math.min(1, Number(score.toFixed(2))));
|
|
60
219
|
if (normalizedScore >= 0.75) {
|
|
61
220
|
return { score: normalizedScore, level: "high" };
|
|
@@ -85,11 +244,97 @@ function safeReadTailLines(filePath, maxLines) {
|
|
|
85
244
|
function computeHash(text) {
|
|
86
245
|
return crypto.createHash("sha256").update(text).digest("hex");
|
|
87
246
|
}
|
|
247
|
+
function estimateTokenCount(text) {
|
|
248
|
+
const parts = text
|
|
249
|
+
.split(/[\s,.;:!?,。;:!?、()()[\]{}"'`~]+/)
|
|
250
|
+
.map(part => part.trim())
|
|
251
|
+
.filter(Boolean);
|
|
252
|
+
return parts.length;
|
|
253
|
+
}
|
|
88
254
|
function normalizeBaseUrl(value) {
|
|
89
255
|
if (!value)
|
|
90
256
|
return "";
|
|
91
257
|
return value.endsWith("/") ? value.slice(0, -1) : value;
|
|
92
258
|
}
|
|
259
|
+
function splitTextChunks(text, chunkSize, chunkOverlap) {
|
|
260
|
+
const normalizedSize = Number.isFinite(chunkSize) && chunkSize >= 200 ? Math.floor(chunkSize) : 600;
|
|
261
|
+
const normalizedOverlap = Number.isFinite(chunkOverlap) && chunkOverlap >= 0
|
|
262
|
+
? Math.floor(chunkOverlap)
|
|
263
|
+
: 100;
|
|
264
|
+
const overlap = Math.min(normalizedOverlap, Math.max(0, normalizedSize - 50));
|
|
265
|
+
const output = [];
|
|
266
|
+
if (!text.trim()) {
|
|
267
|
+
return output;
|
|
268
|
+
}
|
|
269
|
+
let cursor = 0;
|
|
270
|
+
let index = 0;
|
|
271
|
+
const punctuationSet = new Set(["。", "!", "?", ".", "!", "?", "\n", ";", ";"]);
|
|
272
|
+
while (cursor < text.length) {
|
|
273
|
+
const rawEnd = Math.min(text.length, cursor + normalizedSize);
|
|
274
|
+
let end = rawEnd;
|
|
275
|
+
if (rawEnd < text.length) {
|
|
276
|
+
const backwardStart = Math.max(cursor + Math.floor(normalizedSize * 0.45), cursor + 1);
|
|
277
|
+
let found = -1;
|
|
278
|
+
for (let i = rawEnd - 1; i >= backwardStart; i -= 1) {
|
|
279
|
+
if (punctuationSet.has(text[i])) {
|
|
280
|
+
found = i + 1;
|
|
281
|
+
break;
|
|
282
|
+
}
|
|
283
|
+
}
|
|
284
|
+
if (found < 0) {
|
|
285
|
+
const forwardEnd = Math.min(text.length, rawEnd + Math.floor(normalizedSize * 0.2));
|
|
286
|
+
for (let i = rawEnd; i < forwardEnd; i += 1) {
|
|
287
|
+
if (punctuationSet.has(text[i])) {
|
|
288
|
+
found = i + 1;
|
|
289
|
+
break;
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
if (found > cursor) {
|
|
294
|
+
end = found;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
if (end <= cursor) {
|
|
298
|
+
end = Math.min(text.length, cursor + normalizedSize);
|
|
299
|
+
}
|
|
300
|
+
const chunkText = text.slice(cursor, end).trim();
|
|
301
|
+
if (chunkText) {
|
|
302
|
+
output.push({ index, start: cursor, end, text: chunkText });
|
|
303
|
+
index += 1;
|
|
304
|
+
}
|
|
305
|
+
if (end >= text.length) {
|
|
306
|
+
break;
|
|
307
|
+
}
|
|
308
|
+
const nextCursor = Math.max(cursor + 1, end - overlap);
|
|
309
|
+
if (nextCursor <= cursor) {
|
|
310
|
+
cursor = end;
|
|
311
|
+
}
|
|
312
|
+
else {
|
|
313
|
+
cursor = nextCursor;
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
return output;
|
|
317
|
+
}
|
|
318
|
+
async function mapWithConcurrency(items, maxConcurrency, mapper) {
|
|
319
|
+
if (items.length === 0) {
|
|
320
|
+
return [];
|
|
321
|
+
}
|
|
322
|
+
const concurrency = Math.max(1, Math.min(maxConcurrency, items.length));
|
|
323
|
+
const results = new Array(items.length);
|
|
324
|
+
let cursor = 0;
|
|
325
|
+
async function worker() {
|
|
326
|
+
while (true) {
|
|
327
|
+
const current = cursor;
|
|
328
|
+
cursor += 1;
|
|
329
|
+
if (current >= items.length) {
|
|
330
|
+
break;
|
|
331
|
+
}
|
|
332
|
+
results[current] = await mapper(items[current], current);
|
|
333
|
+
}
|
|
334
|
+
}
|
|
335
|
+
await Promise.all(Array.from({ length: concurrency }, () => worker()));
|
|
336
|
+
return results;
|
|
337
|
+
}
|
|
93
338
|
async function requestEmbedding(args) {
|
|
94
339
|
const endpoint = args.baseUrl.endsWith("/embeddings") ? args.baseUrl : `${args.baseUrl}/embeddings`;
|
|
95
340
|
const body = {
|
|
@@ -107,24 +352,18 @@ async function requestEmbedding(args) {
|
|
|
107
352
|
: 4;
|
|
108
353
|
let lastError = null;
|
|
109
354
|
for (let attempt = 0; attempt < maxRetries; attempt += 1) {
|
|
110
|
-
const
|
|
111
|
-
|
|
355
|
+
const response = await (0, http_post_1.postJsonWithTimeout)({
|
|
356
|
+
endpoint,
|
|
357
|
+
apiKey: args.apiKey,
|
|
358
|
+
body,
|
|
359
|
+
timeoutMs,
|
|
360
|
+
});
|
|
361
|
+
if (!response.ok) {
|
|
362
|
+
lastError = new Error(response.status > 0 ? `embedding_http_${response.status}` : (response.error || "embedding_network_error"));
|
|
363
|
+
continue;
|
|
364
|
+
}
|
|
112
365
|
try {
|
|
113
|
-
const
|
|
114
|
-
method: "POST",
|
|
115
|
-
headers: {
|
|
116
|
-
"content-type": "application/json",
|
|
117
|
-
authorization: `Bearer ${args.apiKey}`,
|
|
118
|
-
},
|
|
119
|
-
body: JSON.stringify(body),
|
|
120
|
-
signal: controller.signal,
|
|
121
|
-
});
|
|
122
|
-
clearTimeout(timeoutId);
|
|
123
|
-
if (!response.ok) {
|
|
124
|
-
lastError = new Error(`embedding_http_${response.status}`);
|
|
125
|
-
continue;
|
|
126
|
-
}
|
|
127
|
-
const json = await response.json();
|
|
366
|
+
const json = (response.json || {});
|
|
128
367
|
const embedding = json?.data?.[0]?.embedding;
|
|
129
368
|
if (Array.isArray(embedding) && embedding.length > 0) {
|
|
130
369
|
return embedding.filter(item => Number.isFinite(item));
|
|
@@ -132,7 +371,6 @@ async function requestEmbedding(args) {
|
|
|
132
371
|
lastError = new Error("embedding_empty");
|
|
133
372
|
}
|
|
134
373
|
catch (error) {
|
|
135
|
-
clearTimeout(timeoutId);
|
|
136
374
|
lastError = error;
|
|
137
375
|
}
|
|
138
376
|
if (attempt < maxRetries - 1) {
|
|
@@ -148,17 +386,32 @@ function createWriteStore(options) {
|
|
|
148
386
|
const memoryRoot = options.dbPath ? path.resolve(options.dbPath) : path.join(options.projectRoot, "data", "memory");
|
|
149
387
|
const activeSessionsPath = path.join(memoryRoot, "sessions", "active", "sessions.jsonl");
|
|
150
388
|
async function writeMemory(args) {
|
|
151
|
-
const
|
|
389
|
+
const activeTextMaxChars = typeof options.writePolicy?.activeTextMaxChars === "number" && Number.isFinite(options.writePolicy.activeTextMaxChars)
|
|
390
|
+
? Math.max(500, Math.floor(options.writePolicy.activeTextMaxChars))
|
|
391
|
+
: 200000;
|
|
392
|
+
const cleaned = normalizeText(args.text || "", activeTextMaxChars);
|
|
152
393
|
if (!cleaned) {
|
|
153
394
|
return { status: "skipped", reason: "empty_text", error_code: "E204" };
|
|
154
395
|
}
|
|
155
|
-
const
|
|
156
|
-
|
|
396
|
+
const sourceTextRaw = typeof args.sourceText === "string" ? args.sourceText.trim() : "";
|
|
397
|
+
const sourceText = sourceTextRaw || cleaned;
|
|
398
|
+
const quality = scoreQuality({ text: cleaned, summary: args.summary, sourceText });
|
|
399
|
+
const activeMinQualityScore = typeof options.writePolicy?.activeMinQualityScore === "number"
|
|
400
|
+
? Math.max(0, Math.min(1, options.writePolicy.activeMinQualityScore))
|
|
401
|
+
: 0.45;
|
|
402
|
+
if (quality.score < activeMinQualityScore) {
|
|
157
403
|
return { status: "skipped", reason: "low_quality", error_code: "E204", quality };
|
|
158
404
|
}
|
|
159
405
|
const textHash = computeHash(cleaned);
|
|
406
|
+
const semanticDedupText = buildActiveDedupText({ text: cleaned, summary: args.summary, sourceText });
|
|
407
|
+
const semanticHash = computeHash(normalizeSemanticText(semanticDedupText));
|
|
408
|
+
const semanticSimhash = computeSimhashHex(semanticDedupText);
|
|
409
|
+
const semanticMinhash = computeMinhash(semanticDedupText);
|
|
160
410
|
try {
|
|
161
|
-
const
|
|
411
|
+
const dedupTailLines = typeof options.writePolicy?.activeDedupTailLines === "number"
|
|
412
|
+
? Math.max(20, Math.min(5000, Math.floor(options.writePolicy.activeDedupTailLines)))
|
|
413
|
+
: 200;
|
|
414
|
+
const tailLines = safeReadTailLines(activeSessionsPath, dedupTailLines);
|
|
162
415
|
for (const line of tailLines) {
|
|
163
416
|
try {
|
|
164
417
|
const parsed = JSON.parse(line);
|
|
@@ -167,6 +420,28 @@ function createWriteStore(options) {
|
|
|
167
420
|
parsed.text_hash === textHash) {
|
|
168
421
|
return { status: "skipped", reason: "duplicate", error_code: "E203", quality };
|
|
169
422
|
}
|
|
423
|
+
const existingSemanticHash = typeof parsed.semantic_hash === "string"
|
|
424
|
+
? parsed.semantic_hash
|
|
425
|
+
: computeHash(normalizeSemanticText(String(parsed.summary || parsed.source_text || "")));
|
|
426
|
+
if (existingSemanticHash && existingSemanticHash === semanticHash) {
|
|
427
|
+
return { status: "skipped", reason: "duplicate_semantic", error_code: "E203", quality };
|
|
428
|
+
}
|
|
429
|
+
const existingSemanticText = buildActiveDedupText({
|
|
430
|
+
text: String(parsed.summary || parsed.source_text || ""),
|
|
431
|
+
summary: typeof parsed.summary === "string" ? parsed.summary : undefined,
|
|
432
|
+
sourceText: typeof parsed.source_text === "string" ? parsed.source_text : undefined,
|
|
433
|
+
});
|
|
434
|
+
const existingSimhash = typeof parsed.semantic_simhash === "string"
|
|
435
|
+
? parsed.semantic_simhash
|
|
436
|
+
: computeSimhashHex(existingSemanticText);
|
|
437
|
+
if (normalizeSemanticText(semanticDedupText).length >= 24 &&
|
|
438
|
+
normalizeSemanticText(existingSemanticText).length >= 24 &&
|
|
439
|
+
hammingDistanceHex(semanticSimhash, existingSimhash) <= 3) {
|
|
440
|
+
return { status: "skipped", reason: "duplicate_simhash", error_code: "E203", quality };
|
|
441
|
+
}
|
|
442
|
+
if (minhashSimilarity(semanticMinhash, computeMinhash(existingSemanticText)) >= 0.92) {
|
|
443
|
+
return { status: "skipped", reason: "duplicate_minhash", error_code: "E203", quality };
|
|
444
|
+
}
|
|
170
445
|
}
|
|
171
446
|
catch { }
|
|
172
447
|
}
|
|
@@ -181,35 +456,109 @@ function createWriteStore(options) {
|
|
|
181
456
|
session_id: args.sessionId,
|
|
182
457
|
role: args.role || "user",
|
|
183
458
|
source: args.source || "message",
|
|
184
|
-
|
|
459
|
+
summary: normalizeSummary(args.summary || "") || normalizeSummary(cleaned),
|
|
460
|
+
source_text: sourceText || undefined,
|
|
461
|
+
layer: "active",
|
|
462
|
+
source_memory_id: id,
|
|
463
|
+
source_memory_canonical_id: id,
|
|
464
|
+
source_event_id: id,
|
|
465
|
+
canonical_id: id,
|
|
466
|
+
embedding_status: "pending",
|
|
467
|
+
llm_gate_decision: "active_only",
|
|
185
468
|
quality_level: quality.level,
|
|
186
469
|
quality_score: quality.score,
|
|
187
470
|
text_hash: textHash,
|
|
471
|
+
semantic_hash: semanticHash,
|
|
472
|
+
semantic_simhash: semanticSimhash,
|
|
473
|
+
char_count: sourceText.length,
|
|
474
|
+
token_count: estimateTokenCount(sourceText),
|
|
188
475
|
};
|
|
189
476
|
const embeddingModel = options.embedding?.model || "";
|
|
190
477
|
const embeddingApiKey = options.embedding?.apiKey || "";
|
|
191
478
|
const embeddingBaseUrl = normalizeBaseUrl(options.embedding?.baseURL || options.embedding?.baseUrl);
|
|
192
|
-
|
|
479
|
+
const chunkSize = options.vectorChunking?.chunkSize ?? 600;
|
|
480
|
+
const chunkOverlap = options.vectorChunking?.chunkOverlap ?? 100;
|
|
481
|
+
const maxParallel = 6;
|
|
482
|
+
const vectorStore = options.vectorStore;
|
|
483
|
+
if (embeddingModel && embeddingApiKey && embeddingBaseUrl && vectorStore) {
|
|
484
|
+
const chunks = splitTextChunks(sourceText, chunkSize, chunkOverlap);
|
|
485
|
+
record.vector_chunks_total = chunks.length;
|
|
486
|
+
record.vector_chunks_ok = 0;
|
|
193
487
|
try {
|
|
194
|
-
|
|
195
|
-
text: cleaned,
|
|
196
|
-
model: embeddingModel,
|
|
197
|
-
apiKey: embeddingApiKey,
|
|
198
|
-
baseUrl: embeddingBaseUrl,
|
|
199
|
-
dimensions: options.embedding?.dimensions,
|
|
200
|
-
timeoutMs: options.embedding?.timeoutMs,
|
|
201
|
-
maxRetries: options.embedding?.maxRetries,
|
|
202
|
-
});
|
|
203
|
-
if (embedding && embedding.length > 0) {
|
|
204
|
-
record.embedding = embedding;
|
|
205
|
-
}
|
|
488
|
+
await vectorStore.deleteBySourceMemory({ layer: "active", sourceMemoryId: record.id });
|
|
206
489
|
}
|
|
207
490
|
catch (error) {
|
|
208
|
-
options.logger.warn(`
|
|
491
|
+
options.logger.warn(`Active vector cleanup failed before upsert: ${error}`);
|
|
209
492
|
}
|
|
493
|
+
const chunkEmbeddings = await mapWithConcurrency(chunks, maxParallel, async (chunk) => {
|
|
494
|
+
try {
|
|
495
|
+
const embedding = await requestEmbedding({
|
|
496
|
+
text: chunk.text,
|
|
497
|
+
model: embeddingModel,
|
|
498
|
+
apiKey: embeddingApiKey,
|
|
499
|
+
baseUrl: embeddingBaseUrl,
|
|
500
|
+
dimensions: options.embedding?.dimensions,
|
|
501
|
+
timeoutMs: options.embedding?.timeoutMs,
|
|
502
|
+
maxRetries: options.embedding?.maxRetries,
|
|
503
|
+
});
|
|
504
|
+
if (!embedding || embedding.length === 0) {
|
|
505
|
+
return null;
|
|
506
|
+
}
|
|
507
|
+
return { chunk, embedding };
|
|
508
|
+
}
|
|
509
|
+
catch (error) {
|
|
510
|
+
options.logger.warn(`Active chunk embedding failed id=${record.id} chunk=${chunk.index} error=${error}`);
|
|
511
|
+
return null;
|
|
512
|
+
}
|
|
513
|
+
});
|
|
514
|
+
const validEmbeddings = chunkEmbeddings
|
|
515
|
+
.filter((item) => Boolean(item))
|
|
516
|
+
.sort((a, b) => a.chunk.index - b.chunk.index);
|
|
517
|
+
const upsertStatus = await mapWithConcurrency(validEmbeddings, maxParallel, async (item) => {
|
|
518
|
+
const { chunk, embedding } = item;
|
|
519
|
+
try {
|
|
520
|
+
await vectorStore.upsert({
|
|
521
|
+
id: `vec_${record.id}_c${chunk.index}`,
|
|
522
|
+
session_id: record.session_id,
|
|
523
|
+
event_type: "message",
|
|
524
|
+
summary: chunk.text,
|
|
525
|
+
timestamp: record.timestamp,
|
|
526
|
+
layer: "active",
|
|
527
|
+
source_memory_id: record.id,
|
|
528
|
+
source_memory_canonical_id: record.id,
|
|
529
|
+
source_event_id: record.id,
|
|
530
|
+
source_field: "summary",
|
|
531
|
+
embedding,
|
|
532
|
+
quality_score: record.quality_score,
|
|
533
|
+
char_count: chunk.text.length,
|
|
534
|
+
token_count: estimateTokenCount(chunk.text),
|
|
535
|
+
chunk_index: chunk.index,
|
|
536
|
+
chunk_total: chunks.length,
|
|
537
|
+
chunk_start: chunk.start,
|
|
538
|
+
chunk_end: chunk.end,
|
|
539
|
+
});
|
|
540
|
+
return true;
|
|
541
|
+
}
|
|
542
|
+
catch (error) {
|
|
543
|
+
options.logger.warn(`Active chunk embedding failed id=${record.id} chunk=${chunk.index} error=${error}`);
|
|
544
|
+
return false;
|
|
545
|
+
}
|
|
546
|
+
});
|
|
547
|
+
record.vector_chunks_ok = upsertStatus.filter(Boolean).length;
|
|
548
|
+
record.embedding_status = record.vector_chunks_total > 0 && record.vector_chunks_ok === record.vector_chunks_total
|
|
549
|
+
? "ok"
|
|
550
|
+
: "failed";
|
|
210
551
|
}
|
|
211
552
|
ensureDirForFile(activeSessionsPath);
|
|
212
|
-
|
|
553
|
+
const recordLine = JSON.stringify(record);
|
|
554
|
+
fs.appendFileSync(activeSessionsPath, `${recordLine}\n`, "utf-8");
|
|
555
|
+
const validation = (0, llm_output_validator_1.validateJsonlLine)(recordLine);
|
|
556
|
+
if (!validation.valid && validation.errors.length > 0) {
|
|
557
|
+
options.logger.warn(`active_write_integrity_check_failed errors=${validation.errors.join("|")}`);
|
|
558
|
+
}
|
|
559
|
+
if (record.vector_chunks_total && record.vector_chunks_total > 0) {
|
|
560
|
+
options.logger.info(`active_vector_chunks source=${record.id} ok=${record.vector_chunks_ok || 0}/${record.vector_chunks_total}`);
|
|
561
|
+
}
|
|
213
562
|
options.logger.info(`TS write stored message for session ${args.sessionId}`);
|
|
214
563
|
return { status: "ok", memory_id: id, quality };
|
|
215
564
|
}
|