openclaw-cortex-memory 0.1.0-Alpha.8 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +347 -299
  3. package/SIGNATURE.md +7 -0
  4. package/SKILL.md +96 -350
  5. package/dist/index.d.ts +93 -23
  6. package/dist/index.d.ts.map +1 -1
  7. package/dist/index.js +1234 -1318
  8. package/dist/index.js.map +1 -1
  9. package/dist/openclaw.plugin.json +377 -18
  10. package/dist/src/dedup/three_stage_deduplicator.d.ts.map +1 -1
  11. package/dist/src/dedup/three_stage_deduplicator.js +13 -3
  12. package/dist/src/dedup/three_stage_deduplicator.js.map +1 -1
  13. package/dist/src/engine/memory_engine.d.ts +6 -1
  14. package/dist/src/engine/memory_engine.d.ts.map +1 -1
  15. package/dist/src/engine/ts_engine.d.ts +208 -0
  16. package/dist/src/engine/ts_engine.d.ts.map +1 -1
  17. package/dist/src/engine/ts_engine.js +1353 -84
  18. package/dist/src/engine/ts_engine.js.map +1 -1
  19. package/dist/src/engine/types.d.ts +27 -0
  20. package/dist/src/engine/types.d.ts.map +1 -1
  21. package/dist/src/graph/ontology.d.ts +87 -15
  22. package/dist/src/graph/ontology.d.ts.map +1 -1
  23. package/dist/src/graph/ontology.js +999 -12
  24. package/dist/src/graph/ontology.js.map +1 -1
  25. package/dist/src/net/http_post.d.ts +17 -0
  26. package/dist/src/net/http_post.d.ts.map +1 -0
  27. package/dist/src/net/http_post.js +56 -0
  28. package/dist/src/net/http_post.js.map +1 -0
  29. package/dist/src/quality/llm_output_validator.d.ts +65 -0
  30. package/dist/src/quality/llm_output_validator.d.ts.map +1 -0
  31. package/dist/src/quality/llm_output_validator.js +635 -0
  32. package/dist/src/quality/llm_output_validator.js.map +1 -0
  33. package/dist/src/reflect/reflector.d.ts.map +1 -1
  34. package/dist/src/reflect/reflector.js +296 -26
  35. package/dist/src/reflect/reflector.js.map +1 -1
  36. package/dist/src/rules/rule_store.d.ts.map +1 -1
  37. package/dist/src/rules/rule_store.js +75 -16
  38. package/dist/src/rules/rule_store.js.map +1 -1
  39. package/dist/src/session/session_end.d.ts +20 -42
  40. package/dist/src/session/session_end.d.ts.map +1 -1
  41. package/dist/src/session/session_end.js +31 -214
  42. package/dist/src/session/session_end.js.map +1 -1
  43. package/dist/src/store/archive_store.d.ts +52 -7
  44. package/dist/src/store/archive_store.d.ts.map +1 -1
  45. package/dist/src/store/archive_store.js +526 -96
  46. package/dist/src/store/archive_store.js.map +1 -1
  47. package/dist/src/store/embedding_utils.d.ts +32 -0
  48. package/dist/src/store/embedding_utils.d.ts.map +1 -0
  49. package/dist/src/store/embedding_utils.js +173 -0
  50. package/dist/src/store/embedding_utils.js.map +1 -0
  51. package/dist/src/store/graph_memory_store.d.ts +115 -0
  52. package/dist/src/store/graph_memory_store.d.ts.map +1 -0
  53. package/dist/src/store/graph_memory_store.js +1061 -0
  54. package/dist/src/store/graph_memory_store.js.map +1 -0
  55. package/dist/src/store/read_store.d.ts +95 -0
  56. package/dist/src/store/read_store.d.ts.map +1 -1
  57. package/dist/src/store/read_store.js +2108 -268
  58. package/dist/src/store/read_store.js.map +1 -1
  59. package/dist/src/store/vector_store.d.ts +15 -0
  60. package/dist/src/store/vector_store.d.ts.map +1 -1
  61. package/dist/src/store/vector_store.js +75 -1
  62. package/dist/src/store/vector_store.js.map +1 -1
  63. package/dist/src/store/write_store.d.ts +46 -0
  64. package/dist/src/store/write_store.d.ts.map +1 -1
  65. package/dist/src/store/write_store.js +399 -50
  66. package/dist/src/store/write_store.js.map +1 -1
  67. package/dist/src/sync/session_sync.d.ts +115 -2
  68. package/dist/src/sync/session_sync.d.ts.map +1 -1
  69. package/dist/src/sync/session_sync.js +2497 -44
  70. package/dist/src/sync/session_sync.js.map +1 -1
  71. package/dist/src/utils/runtime_env.d.ts +4 -0
  72. package/dist/src/utils/runtime_env.d.ts.map +1 -0
  73. package/dist/src/utils/runtime_env.js +51 -0
  74. package/dist/src/utils/runtime_env.js.map +1 -0
  75. package/dist/src/wiki/wiki_linter.d.ts +26 -0
  76. package/dist/src/wiki/wiki_linter.d.ts.map +1 -0
  77. package/dist/src/wiki/wiki_linter.js +339 -0
  78. package/dist/src/wiki/wiki_linter.js.map +1 -0
  79. package/dist/src/wiki/wiki_logger.d.ts +10 -0
  80. package/dist/src/wiki/wiki_logger.d.ts.map +1 -0
  81. package/dist/src/wiki/wiki_logger.js +78 -0
  82. package/dist/src/wiki/wiki_logger.js.map +1 -0
  83. package/dist/src/wiki/wiki_maintainer.d.ts +39 -0
  84. package/dist/src/wiki/wiki_maintainer.d.ts.map +1 -0
  85. package/dist/src/wiki/wiki_maintainer.js +38 -0
  86. package/dist/src/wiki/wiki_maintainer.js.map +1 -0
  87. package/dist/src/wiki/wiki_projector.d.ts +35 -0
  88. package/dist/src/wiki/wiki_projector.d.ts.map +1 -0
  89. package/dist/src/wiki/wiki_projector.js +1151 -0
  90. package/dist/src/wiki/wiki_projector.js.map +1 -0
  91. package/dist/src/wiki/wiki_queue.d.ts +29 -0
  92. package/dist/src/wiki/wiki_queue.d.ts.map +1 -0
  93. package/dist/src/wiki/wiki_queue.js +137 -0
  94. package/dist/src/wiki/wiki_queue.js.map +1 -0
  95. package/openclaw.plugin.json +377 -18
  96. package/package.json +52 -5
  97. package/schema/graph.schema.yaml +330 -0
  98. package/scripts/cli.js +80 -26
  99. package/scripts/repair-memory.js +321 -0
  100. package/scripts/uninstall.js +7 -1
  101. package/skills/cortex-memory/SKILL.md +83 -0
  102. package/skills/cortex-memory/references/agent-manual.md +127 -0
  103. package/skills/cortex-memory/references/configuration.md +109 -0
  104. package/skills/cortex-memory/references/publish-checklist.md +45 -0
  105. package/skills/cortex-memory/references/system-prompt-template.md +27 -0
  106. package/skills/cortex-memory/references/tools.md +191 -0
@@ -8,6 +8,8 @@ export interface WriteMemoryArgs {
8
8
  role: string;
9
9
  source: string;
10
10
  sessionId: string;
11
+ summary?: string;
12
+ sourceText?: string;
11
13
  }
12
14
  export interface WriteMemoryResult {
13
15
  status: "ok" | "skipped";
@@ -33,6 +35,50 @@ interface WriteStoreOptions {
33
35
  timeoutMs?: number;
34
36
  maxRetries?: number;
35
37
  };
38
+ vectorChunking?: {
39
+ chunkSize?: number;
40
+ chunkOverlap?: number;
41
+ };
42
+ writePolicy?: {
43
+ activeMinQualityScore?: number;
44
+ activeDedupTailLines?: number;
45
+ activeTextMaxChars?: number;
46
+ };
47
+ vectorStore?: {
48
+ upsert(record: {
49
+ id: string;
50
+ session_id: string;
51
+ event_type: string;
52
+ summary: string;
53
+ timestamp: string;
54
+ layer: "active" | "archive";
55
+ source_memory_id: string;
56
+ source_memory_canonical_id?: string;
57
+ source_event_id?: string;
58
+ source_field?: "summary" | "evidence";
59
+ outcome?: string;
60
+ entities?: string[];
61
+ relations?: Array<{
62
+ source: string;
63
+ target: string;
64
+ type: string;
65
+ evidence_span?: string;
66
+ confidence?: number;
67
+ }>;
68
+ embedding: number[];
69
+ quality_score: number;
70
+ char_count: number;
71
+ token_count: number;
72
+ chunk_index?: number;
73
+ chunk_total?: number;
74
+ chunk_start?: number;
75
+ chunk_end?: number;
76
+ }): Promise<void>;
77
+ deleteBySourceMemory(args: {
78
+ layer: "active" | "archive";
79
+ sourceMemoryId: string;
80
+ }): Promise<void>;
81
+ };
36
82
  }
37
83
  export declare function createWriteStore(options: WriteStoreOptions): {
38
84
  writeMemory(args: WriteMemoryArgs): Promise<WriteMemoryResult>;
@@ -1 +1 @@
1
- {"version":3,"file":"write_store.d.ts","sourceRoot":"","sources":["../../../src/store/write_store.ts"],"names":[],"mappings":"AAIA,UAAU,UAAU;IAClB,KAAK,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACrD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACpD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;CACrD;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;CACnB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,IAAI,GAAG,SAAS,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE;QACR,KAAK,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QACjC,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED,UAAU,iBAAiB;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,SAAS,CAAC,EAAE;QACV,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;CACH;AAiID,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG;IAAE,WAAW,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAA;CAAE,CA4E/H"}
1
+ {"version":3,"file":"write_store.d.ts","sourceRoot":"","sources":["../../../src/store/write_store.ts"],"names":[],"mappings":"AAMA,UAAU,UAAU;IAClB,KAAK,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACrD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;IACpD,IAAI,EAAE,CAAC,OAAO,EAAE,MAAM,EAAE,GAAG,IAAI,EAAE,OAAO,EAAE,KAAK,IAAI,CAAC;CACrD;AAED,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;IACb,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,iBAAiB;IAChC,MAAM,EAAE,IAAI,GAAG,SAAS,CAAC;IACzB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,OAAO,CAAC,EAAE;QACR,KAAK,EAAE,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QACjC,KAAK,EAAE,MAAM,CAAC;KACf,CAAC;CACH;AAED,UAAU,iBAAiB;IACzB,WAAW,EAAE,MAAM,CAAC;IACpB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,MAAM,EAAE,UAAU,CAAC;IACnB,SAAS,CAAC,EAAE;QACV,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,EAAE,MAAM,CAAC;QACd,MAAM,CAAC,EAAE,MAAM,CAAC;QAChB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,OAAO,CAAC,EAAE,MAAM,CAAC;QACjB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;KACrB,CAAC;IACF,cAAc,CAAC,EAAE;QACf,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,CAAC;IACF,WAAW,CAAC,EAAE;QACZ,qBAAqB,CAAC,EAAE,MAAM,CAAC;QAC/B,oBAAoB,CAAC,EAAE,MAAM,CAAC;QAC9B,kBAAkB,CAAC,EAAE,MAAM,CAAC;KAC7B,CAAC;IACF,WAAW,CAAC,EAAE;QACZ,MAAM,CAAC,MAAM,EAAE;YACb,EAAE,EAAE,MAAM,CAAC;YACX,UAAU,EAAE,MAAM,CAAC;YACnB,UAAU,EAAE,MAAM,CAAC;YACnB,OAAO,EAAE,MAAM,CAAC;YAChB,SAAS,EAAE,MAAM,CAAC;YAClB,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;YAC5B,gBAAgB,EAAE,MAAM,CAAC;YACzB,0BAA0B,CAAC,EAAE,MAAM,CAAC;YACpC,eAAe,CAAC,EAAE,MAAM,CAAC;YACzB,YAAY,CAAC,EAAE,SAAS,GAAG,UAAU,CAAC;YACtC,OAAO,CAAC,EAAE,MAAM,CAAC;YACjB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;YACpB,SAAS,CAAC,EAAE,KAAK,CAAC;gBAAE,MAAM,EAAE,MAAM,CAAC;gBAAC,MAAM,EAAE,MAAM,CAAC;gBAAC,IAAI,EAAE,MAAM,CAAC;gBAAC,aAAa,CAAC,EAAE,MAAM,CAAC;gBAAC,UAAU,CAAC,EAAE,MAAM,CAAA;aAAE,CAAC,CAAC;YACjH,SAAS,EAAE,MAAM,EAAE,CAAC;YACpB,aAAa,EAAE,MAAM,CAAC;YACtB,UAAU,EAAE,MAAM,CAAC;YACnB,WAAW,EAAE,MAAM,CAAC;YACpB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,WAAW,CAAC,EAAE,MAAM,CAAC;YACrB,SAAS,CAAC,EAAE,MAAM,CAAC;SACpB,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;QAClB,oBAAoB,CAAC,IAAI,EAAE;YAAE,KAAK,EAAE,QAAQ,GAAG,SAAS,CAAC;YAAC,cAAc,EAAE,MAAM,CAAA;SAAE,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;KACpG,CAAC;CACH;AAuYD,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,iBAAiB,GAAG;IAAE,WAAW,CAAC,IAAI,EAAE,eAAe,GAAG,OAAO,CAAC,iBAAiB,CAAC,CAAA;CAAE,CA2L/H"}
@@ -37,25 +37,184 @@ exports.createWriteStore = createWriteStore;
37
37
  const crypto = __importStar(require("crypto"));
38
38
  const fs = __importStar(require("fs"));
39
39
  const path = __importStar(require("path"));
40
- function normalizeText(input) {
41
- return input.replace(/\s+/g, " ").trim();
40
+ const http_post_1 = require("../net/http_post");
41
+ const llm_output_validator_1 = require("../quality/llm_output_validator");
42
+ const ACTIVE_LOW_INFORMATION_LINE = /^(ok|okay|got it|roger|noted|sure|thanks|thank you|received|copy that|understood|好的|收到|明白|了解|谢谢|感谢|可以|行|嗯|嗯嗯|没问题)(?:\b|$)/i;
43
+ const ACTIVE_LOW_VALUE_ONLY = /^(ok|okay|got it|roger|noted|thanks|thank you|received|copy that|understood|sounds good|好的|收到|明白|了解|谢谢|感谢|可以|行|嗯|嗯嗯|没问题|辛苦了)[\s.!?,。!?、]*$/i;
44
+ const ACTIVE_SEMANTIC_SIGNAL = /(decision|trade-?off|constraint|requirement|fix|error|exception|blocked|rollback|deploy|progress|milestone|action item|owner|next step|todo|deadline|eta|issue|bug|metric|latency|error rate|cost|url|link|path|file|config|parameter|version|commit|pr|ticket|test|verify|passed|failed|success|import|memory|wiki|决策|决定|取舍|约束|需求|要求|修复|错误|异常|阻塞|回滚|部署|进展|里程碑|行动项|负责人|下一步|待办|截止|问题|缺陷|指标|延迟|成本|链接|路径|文件|配置|参数|版本|提交|工单|测试|验证|通过|失败|成功|优化|导入|记忆|wiki)/i;
45
+ const ACTIVE_EVIDENCE_SIGNAL = /(https?:\/\/|www\.|[`#/:\\]|[A-Za-z]:\\|\/[A-Za-z0-9._\-\/]+|\b\d+(?:\.\d+)?%?\b|#\d{1,8}|npm run|pnpm |yarn |node |git )/i;
46
+ const ACTIVE_WORKFLOW_SIGNAL = /(done|completed|fixed|implemented|resolved|passed|verified|accepted|approved|已完成|完成了|修复了|实现了|已修复|已实现|已通过|通过了|验证通过|测试通过|接受|确认)/i;
47
+ function denoiseActiveText(input) {
48
+ const raw = (input || "").trim();
49
+ if (!raw)
50
+ return "";
51
+ const output = [];
52
+ const seen = new Set();
53
+ for (const line of raw.split(/\r?\n/)) {
54
+ const trimmed = line.trim();
55
+ if (!trimmed)
56
+ continue;
57
+ const content = trimmed.replace(/^\[[^\]]+\]\s*/, "").trim();
58
+ if (!content)
59
+ continue;
60
+ const hasSignal = /(https?:\/\/|www\.|[A-Za-z0-9._-]+\.[A-Za-z]{2,}|[`#/:\\]|@\w+|\b\d{2,}\b)/.test(content);
61
+ if (!hasSignal && ACTIVE_LOW_INFORMATION_LINE.test(content)) {
62
+ continue;
63
+ }
64
+ const dedupKey = content.toLowerCase();
65
+ if (!hasSignal && seen.has(dedupKey)) {
66
+ continue;
67
+ }
68
+ seen.add(dedupKey);
69
+ output.push(trimmed);
70
+ }
71
+ return output.length > 0 ? output.join("\n") : raw;
72
+ }
73
+ function normalizeText(input, maxChars) {
74
+ const cleaned = denoiseActiveText(input);
75
+ if (!cleaned)
76
+ return "";
77
+ if (!Number.isFinite(maxChars) || maxChars <= 0 || cleaned.length <= maxChars) {
78
+ return cleaned;
79
+ }
80
+ return cleaned.slice(-Math.floor(maxChars)).trim();
81
+ }
82
+ function normalizeSummary(input) {
83
+ return String(input || "").replace(/\s+/g, " ").trim();
42
84
  }
43
- function scoreQuality(text) {
85
+ function normalizeSemanticText(input) {
86
+ return String(input || "")
87
+ .replace(/^\[[^\]]+\]\s*/gm, "")
88
+ .toLowerCase()
89
+ .replace(/[^\p{L}\p{N}:./\\#@_-]+/gu, " ")
90
+ .replace(/\s+/g, " ")
91
+ .trim();
92
+ }
93
+ function buildSimilarityTokens(input) {
94
+ const normalized = normalizeSemanticText(input);
95
+ if (!normalized)
96
+ return [];
97
+ const tokens = normalized.split(/\s+/).filter(Boolean);
98
+ const output = new Set();
99
+ for (const token of tokens) {
100
+ output.add(token);
101
+ const cjkChars = [...token].filter(char => /[\u3400-\u9fff]/.test(char));
102
+ if (cjkChars.length > 1) {
103
+ for (let index = 0; index < cjkChars.length - 1; index += 1) {
104
+ output.add(`${cjkChars[index]}${cjkChars[index + 1]}`);
105
+ }
106
+ }
107
+ }
108
+ return [...output];
109
+ }
110
+ function hashToken64(value) {
111
+ const digest = crypto.createHash("sha1").update(value).digest();
112
+ let output = 0n;
113
+ for (let index = 0; index < 8; index += 1) {
114
+ output = (output << 8n) + BigInt(digest[index]);
115
+ }
116
+ return output;
117
+ }
118
+ function computeSimhashHex(text) {
119
+ const tokens = buildSimilarityTokens(text);
120
+ const vector = Array.from({ length: 64 }, () => 0);
121
+ for (const token of tokens) {
122
+ const hash = hashToken64(token);
123
+ for (let bit = 0; bit < 64; bit += 1) {
124
+ vector[bit] += (hash & (1n << BigInt(bit))) !== 0n ? 1 : -1;
125
+ }
126
+ }
127
+ let result = 0n;
128
+ for (let bit = 0; bit < 64; bit += 1) {
129
+ if (vector[bit] >= 0) {
130
+ result |= 1n << BigInt(bit);
131
+ }
132
+ }
133
+ return result.toString(16).padStart(16, "0");
134
+ }
135
+ function hammingDistanceHex(left, right) {
136
+ let value = BigInt(`0x${left || "0"}`) ^ BigInt(`0x${right || "0"}`);
137
+ let count = 0;
138
+ while (value > 0n) {
139
+ count += Number(value & 1n);
140
+ value >>= 1n;
141
+ }
142
+ return count;
143
+ }
144
+ function seedHash(seed, token) {
145
+ return crypto.createHash("sha1").update(`${seed}:${token}`).digest().readUInt32BE(0);
146
+ }
147
+ function computeMinhash(text, signatures = 48) {
148
+ const tokens = buildSimilarityTokens(text);
149
+ if (tokens.length === 0)
150
+ return Array.from({ length: signatures }, () => 0);
151
+ const output = [];
152
+ for (let seed = 0; seed < signatures; seed += 1) {
153
+ let min = Number.MAX_SAFE_INTEGER;
154
+ for (const token of tokens) {
155
+ const value = seedHash(seed, token);
156
+ if (value < min)
157
+ min = value;
158
+ }
159
+ output.push(min === Number.MAX_SAFE_INTEGER ? 0 : min);
160
+ }
161
+ return output;
162
+ }
163
+ function minhashSimilarity(left, right) {
164
+ if (left.length === 0 || right.length === 0)
165
+ return 0;
166
+ const size = Math.min(left.length, right.length);
167
+ let same = 0;
168
+ for (let index = 0; index < size; index += 1) {
169
+ if (left[index] === right[index])
170
+ same += 1;
171
+ }
172
+ return same / size;
173
+ }
174
+ function buildActiveDedupText(args) {
175
+ const summary = normalizeSummary(args.summary || "");
176
+ const text = normalizeSummary(args.text || "");
177
+ const sourceText = normalizeSummary(args.sourceText || "");
178
+ const base = summary || text;
179
+ return base || sourceText;
180
+ }
181
+ function scoreQuality(args) {
182
+ const text = args.text;
183
+ const summary = normalizeSummary(args.summary || "");
184
+ const sourceText = normalizeSummary(args.sourceText || "");
185
+ const merged = [summary, text, sourceText].filter(Boolean).join("\n");
186
+ const normalized = normalizeSemanticText(merged);
187
+ if (!normalized || ACTIVE_LOW_VALUE_ONLY.test(normalized)) {
188
+ return { score: 0, level: "low" };
189
+ }
44
190
  const length = text.length;
45
- const uniqueChars = new Set(text.toLowerCase()).size;
191
+ const uniqueChars = new Set(normalized).size;
192
+ const hasSemanticSignal = ACTIVE_SEMANTIC_SIGNAL.test(merged);
193
+ const hasEvidence = ACTIVE_EVIDENCE_SIGNAL.test(merged);
194
+ const hasWorkflowSignal = ACTIVE_WORKFLOW_SIGNAL.test(merged);
46
195
  let score = 0;
47
196
  if (length >= 20)
48
- score += 0.35;
197
+ score += 0.18;
49
198
  if (length >= 60)
50
- score += 0.2;
199
+ score += 0.14;
51
200
  if (length >= 120)
52
- score += 0.2;
201
+ score += 0.1;
53
202
  if (uniqueChars >= 10)
54
- score += 0.15;
55
- if (/\d/.test(text))
56
- score += 0.05;
57
- if (/[a-zA-Z\u4e00-\u9fa5]/.test(text))
58
- score += 0.05;
203
+ score += 0.08;
204
+ if (uniqueChars >= 24)
205
+ score += 0.04;
206
+ if (hasSemanticSignal)
207
+ score += 0.26;
208
+ if (hasEvidence)
209
+ score += 0.16;
210
+ if (hasWorkflowSignal)
211
+ score += 0.12;
212
+ if (summary && sourceText && sourceText.toLowerCase().includes(summary.toLowerCase().slice(0, Math.min(32, summary.length)))) {
213
+ score += 0.06;
214
+ }
215
+ if (!hasSemanticSignal && !hasEvidence && !hasWorkflowSignal) {
216
+ score = Math.min(score, 0.35);
217
+ }
59
218
  const normalizedScore = Math.max(0, Math.min(1, Number(score.toFixed(2))));
60
219
  if (normalizedScore >= 0.75) {
61
220
  return { score: normalizedScore, level: "high" };
@@ -85,11 +244,97 @@ function safeReadTailLines(filePath, maxLines) {
85
244
  function computeHash(text) {
86
245
  return crypto.createHash("sha256").update(text).digest("hex");
87
246
  }
247
+ function estimateTokenCount(text) {
248
+ const parts = text
249
+ .split(/[\s,.;:!?,。;:!?、()()[\]{}"'`~]+/)
250
+ .map(part => part.trim())
251
+ .filter(Boolean);
252
+ return parts.length;
253
+ }
88
254
  function normalizeBaseUrl(value) {
89
255
  if (!value)
90
256
  return "";
91
257
  return value.endsWith("/") ? value.slice(0, -1) : value;
92
258
  }
259
+ function splitTextChunks(text, chunkSize, chunkOverlap) {
260
+ const normalizedSize = Number.isFinite(chunkSize) && chunkSize >= 200 ? Math.floor(chunkSize) : 600;
261
+ const normalizedOverlap = Number.isFinite(chunkOverlap) && chunkOverlap >= 0
262
+ ? Math.floor(chunkOverlap)
263
+ : 100;
264
+ const overlap = Math.min(normalizedOverlap, Math.max(0, normalizedSize - 50));
265
+ const output = [];
266
+ if (!text.trim()) {
267
+ return output;
268
+ }
269
+ let cursor = 0;
270
+ let index = 0;
271
+ const punctuationSet = new Set(["。", "!", "?", ".", "!", "?", "\n", ";", ";"]);
272
+ while (cursor < text.length) {
273
+ const rawEnd = Math.min(text.length, cursor + normalizedSize);
274
+ let end = rawEnd;
275
+ if (rawEnd < text.length) {
276
+ const backwardStart = Math.max(cursor + Math.floor(normalizedSize * 0.45), cursor + 1);
277
+ let found = -1;
278
+ for (let i = rawEnd - 1; i >= backwardStart; i -= 1) {
279
+ if (punctuationSet.has(text[i])) {
280
+ found = i + 1;
281
+ break;
282
+ }
283
+ }
284
+ if (found < 0) {
285
+ const forwardEnd = Math.min(text.length, rawEnd + Math.floor(normalizedSize * 0.2));
286
+ for (let i = rawEnd; i < forwardEnd; i += 1) {
287
+ if (punctuationSet.has(text[i])) {
288
+ found = i + 1;
289
+ break;
290
+ }
291
+ }
292
+ }
293
+ if (found > cursor) {
294
+ end = found;
295
+ }
296
+ }
297
+ if (end <= cursor) {
298
+ end = Math.min(text.length, cursor + normalizedSize);
299
+ }
300
+ const chunkText = text.slice(cursor, end).trim();
301
+ if (chunkText) {
302
+ output.push({ index, start: cursor, end, text: chunkText });
303
+ index += 1;
304
+ }
305
+ if (end >= text.length) {
306
+ break;
307
+ }
308
+ const nextCursor = Math.max(cursor + 1, end - overlap);
309
+ if (nextCursor <= cursor) {
310
+ cursor = end;
311
+ }
312
+ else {
313
+ cursor = nextCursor;
314
+ }
315
+ }
316
+ return output;
317
+ }
318
+ async function mapWithConcurrency(items, maxConcurrency, mapper) {
319
+ if (items.length === 0) {
320
+ return [];
321
+ }
322
+ const concurrency = Math.max(1, Math.min(maxConcurrency, items.length));
323
+ const results = new Array(items.length);
324
+ let cursor = 0;
325
+ async function worker() {
326
+ while (true) {
327
+ const current = cursor;
328
+ cursor += 1;
329
+ if (current >= items.length) {
330
+ break;
331
+ }
332
+ results[current] = await mapper(items[current], current);
333
+ }
334
+ }
335
+ await Promise.all(Array.from({ length: concurrency }, () => worker()));
336
+ return results;
337
+ }
93
338
  async function requestEmbedding(args) {
94
339
  const endpoint = args.baseUrl.endsWith("/embeddings") ? args.baseUrl : `${args.baseUrl}/embeddings`;
95
340
  const body = {
@@ -107,24 +352,18 @@ async function requestEmbedding(args) {
107
352
  : 4;
108
353
  let lastError = null;
109
354
  for (let attempt = 0; attempt < maxRetries; attempt += 1) {
110
- const controller = new AbortController();
111
- const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
355
+ const response = await (0, http_post_1.postJsonWithTimeout)({
356
+ endpoint,
357
+ apiKey: args.apiKey,
358
+ body,
359
+ timeoutMs,
360
+ });
361
+ if (!response.ok) {
362
+ lastError = new Error(response.status > 0 ? `embedding_http_${response.status}` : (response.error || "embedding_network_error"));
363
+ continue;
364
+ }
112
365
  try {
113
- const response = await fetch(endpoint, {
114
- method: "POST",
115
- headers: {
116
- "content-type": "application/json",
117
- authorization: `Bearer ${args.apiKey}`,
118
- },
119
- body: JSON.stringify(body),
120
- signal: controller.signal,
121
- });
122
- clearTimeout(timeoutId);
123
- if (!response.ok) {
124
- lastError = new Error(`embedding_http_${response.status}`);
125
- continue;
126
- }
127
- const json = await response.json();
366
+ const json = (response.json || {});
128
367
  const embedding = json?.data?.[0]?.embedding;
129
368
  if (Array.isArray(embedding) && embedding.length > 0) {
130
369
  return embedding.filter(item => Number.isFinite(item));
@@ -132,7 +371,6 @@ async function requestEmbedding(args) {
132
371
  lastError = new Error("embedding_empty");
133
372
  }
134
373
  catch (error) {
135
- clearTimeout(timeoutId);
136
374
  lastError = error;
137
375
  }
138
376
  if (attempt < maxRetries - 1) {
@@ -148,17 +386,32 @@ function createWriteStore(options) {
148
386
  const memoryRoot = options.dbPath ? path.resolve(options.dbPath) : path.join(options.projectRoot, "data", "memory");
149
387
  const activeSessionsPath = path.join(memoryRoot, "sessions", "active", "sessions.jsonl");
150
388
  async function writeMemory(args) {
151
- const cleaned = normalizeText(args.text || "");
389
+ const activeTextMaxChars = typeof options.writePolicy?.activeTextMaxChars === "number" && Number.isFinite(options.writePolicy.activeTextMaxChars)
390
+ ? Math.max(500, Math.floor(options.writePolicy.activeTextMaxChars))
391
+ : 200000;
392
+ const cleaned = normalizeText(args.text || "", activeTextMaxChars);
152
393
  if (!cleaned) {
153
394
  return { status: "skipped", reason: "empty_text", error_code: "E204" };
154
395
  }
155
- const quality = scoreQuality(cleaned);
156
- if (quality.level === "low") {
396
+ const sourceTextRaw = typeof args.sourceText === "string" ? args.sourceText.trim() : "";
397
+ const sourceText = sourceTextRaw || cleaned;
398
+ const quality = scoreQuality({ text: cleaned, summary: args.summary, sourceText });
399
+ const activeMinQualityScore = typeof options.writePolicy?.activeMinQualityScore === "number"
400
+ ? Math.max(0, Math.min(1, options.writePolicy.activeMinQualityScore))
401
+ : 0.45;
402
+ if (quality.score < activeMinQualityScore) {
157
403
  return { status: "skipped", reason: "low_quality", error_code: "E204", quality };
158
404
  }
159
405
  const textHash = computeHash(cleaned);
406
+ const semanticDedupText = buildActiveDedupText({ text: cleaned, summary: args.summary, sourceText });
407
+ const semanticHash = computeHash(normalizeSemanticText(semanticDedupText));
408
+ const semanticSimhash = computeSimhashHex(semanticDedupText);
409
+ const semanticMinhash = computeMinhash(semanticDedupText);
160
410
  try {
161
- const tailLines = safeReadTailLines(activeSessionsPath, 200);
411
+ const dedupTailLines = typeof options.writePolicy?.activeDedupTailLines === "number"
412
+ ? Math.max(20, Math.min(5000, Math.floor(options.writePolicy.activeDedupTailLines)))
413
+ : 200;
414
+ const tailLines = safeReadTailLines(activeSessionsPath, dedupTailLines);
162
415
  for (const line of tailLines) {
163
416
  try {
164
417
  const parsed = JSON.parse(line);
@@ -167,6 +420,28 @@ function createWriteStore(options) {
167
420
  parsed.text_hash === textHash) {
168
421
  return { status: "skipped", reason: "duplicate", error_code: "E203", quality };
169
422
  }
423
+ const existingSemanticHash = typeof parsed.semantic_hash === "string"
424
+ ? parsed.semantic_hash
425
+ : computeHash(normalizeSemanticText(String(parsed.summary || parsed.source_text || "")));
426
+ if (existingSemanticHash && existingSemanticHash === semanticHash) {
427
+ return { status: "skipped", reason: "duplicate_semantic", error_code: "E203", quality };
428
+ }
429
+ const existingSemanticText = buildActiveDedupText({
430
+ text: String(parsed.summary || parsed.source_text || ""),
431
+ summary: typeof parsed.summary === "string" ? parsed.summary : undefined,
432
+ sourceText: typeof parsed.source_text === "string" ? parsed.source_text : undefined,
433
+ });
434
+ const existingSimhash = typeof parsed.semantic_simhash === "string"
435
+ ? parsed.semantic_simhash
436
+ : computeSimhashHex(existingSemanticText);
437
+ if (normalizeSemanticText(semanticDedupText).length >= 24 &&
438
+ normalizeSemanticText(existingSemanticText).length >= 24 &&
439
+ hammingDistanceHex(semanticSimhash, existingSimhash) <= 3) {
440
+ return { status: "skipped", reason: "duplicate_simhash", error_code: "E203", quality };
441
+ }
442
+ if (minhashSimilarity(semanticMinhash, computeMinhash(existingSemanticText)) >= 0.92) {
443
+ return { status: "skipped", reason: "duplicate_minhash", error_code: "E203", quality };
444
+ }
170
445
  }
171
446
  catch { }
172
447
  }
@@ -181,35 +456,109 @@ function createWriteStore(options) {
181
456
  session_id: args.sessionId,
182
457
  role: args.role || "user",
183
458
  source: args.source || "message",
184
- content: cleaned,
459
+ summary: normalizeSummary(args.summary || "") || normalizeSummary(cleaned),
460
+ source_text: sourceText || undefined,
461
+ layer: "active",
462
+ source_memory_id: id,
463
+ source_memory_canonical_id: id,
464
+ source_event_id: id,
465
+ canonical_id: id,
466
+ embedding_status: "pending",
467
+ llm_gate_decision: "active_only",
185
468
  quality_level: quality.level,
186
469
  quality_score: quality.score,
187
470
  text_hash: textHash,
471
+ semantic_hash: semanticHash,
472
+ semantic_simhash: semanticSimhash,
473
+ char_count: sourceText.length,
474
+ token_count: estimateTokenCount(sourceText),
188
475
  };
189
476
  const embeddingModel = options.embedding?.model || "";
190
477
  const embeddingApiKey = options.embedding?.apiKey || "";
191
478
  const embeddingBaseUrl = normalizeBaseUrl(options.embedding?.baseURL || options.embedding?.baseUrl);
192
- if (embeddingModel && embeddingApiKey && embeddingBaseUrl) {
479
+ const chunkSize = options.vectorChunking?.chunkSize ?? 600;
480
+ const chunkOverlap = options.vectorChunking?.chunkOverlap ?? 100;
481
+ const maxParallel = 6;
482
+ const vectorStore = options.vectorStore;
483
+ if (embeddingModel && embeddingApiKey && embeddingBaseUrl && vectorStore) {
484
+ const chunks = splitTextChunks(sourceText, chunkSize, chunkOverlap);
485
+ record.vector_chunks_total = chunks.length;
486
+ record.vector_chunks_ok = 0;
193
487
  try {
194
- const embedding = await requestEmbedding({
195
- text: cleaned,
196
- model: embeddingModel,
197
- apiKey: embeddingApiKey,
198
- baseUrl: embeddingBaseUrl,
199
- dimensions: options.embedding?.dimensions,
200
- timeoutMs: options.embedding?.timeoutMs,
201
- maxRetries: options.embedding?.maxRetries,
202
- });
203
- if (embedding && embedding.length > 0) {
204
- record.embedding = embedding;
205
- }
488
+ await vectorStore.deleteBySourceMemory({ layer: "active", sourceMemoryId: record.id });
206
489
  }
207
490
  catch (error) {
208
- options.logger.warn(`Embedding request failed, fallback to lexical store: ${error}`);
491
+ options.logger.warn(`Active vector cleanup failed before upsert: ${error}`);
209
492
  }
493
+ const chunkEmbeddings = await mapWithConcurrency(chunks, maxParallel, async (chunk) => {
494
+ try {
495
+ const embedding = await requestEmbedding({
496
+ text: chunk.text,
497
+ model: embeddingModel,
498
+ apiKey: embeddingApiKey,
499
+ baseUrl: embeddingBaseUrl,
500
+ dimensions: options.embedding?.dimensions,
501
+ timeoutMs: options.embedding?.timeoutMs,
502
+ maxRetries: options.embedding?.maxRetries,
503
+ });
504
+ if (!embedding || embedding.length === 0) {
505
+ return null;
506
+ }
507
+ return { chunk, embedding };
508
+ }
509
+ catch (error) {
510
+ options.logger.warn(`Active chunk embedding failed id=${record.id} chunk=${chunk.index} error=${error}`);
511
+ return null;
512
+ }
513
+ });
514
+ const validEmbeddings = chunkEmbeddings
515
+ .filter((item) => Boolean(item))
516
+ .sort((a, b) => a.chunk.index - b.chunk.index);
517
+ const upsertStatus = await mapWithConcurrency(validEmbeddings, maxParallel, async (item) => {
518
+ const { chunk, embedding } = item;
519
+ try {
520
+ await vectorStore.upsert({
521
+ id: `vec_${record.id}_c${chunk.index}`,
522
+ session_id: record.session_id,
523
+ event_type: "message",
524
+ summary: chunk.text,
525
+ timestamp: record.timestamp,
526
+ layer: "active",
527
+ source_memory_id: record.id,
528
+ source_memory_canonical_id: record.id,
529
+ source_event_id: record.id,
530
+ source_field: "summary",
531
+ embedding,
532
+ quality_score: record.quality_score,
533
+ char_count: chunk.text.length,
534
+ token_count: estimateTokenCount(chunk.text),
535
+ chunk_index: chunk.index,
536
+ chunk_total: chunks.length,
537
+ chunk_start: chunk.start,
538
+ chunk_end: chunk.end,
539
+ });
540
+ return true;
541
+ }
542
+ catch (error) {
543
+ options.logger.warn(`Active chunk embedding failed id=${record.id} chunk=${chunk.index} error=${error}`);
544
+ return false;
545
+ }
546
+ });
547
+ record.vector_chunks_ok = upsertStatus.filter(Boolean).length;
548
+ record.embedding_status = record.vector_chunks_total > 0 && record.vector_chunks_ok === record.vector_chunks_total
549
+ ? "ok"
550
+ : "failed";
210
551
  }
211
552
  ensureDirForFile(activeSessionsPath);
212
- fs.appendFileSync(activeSessionsPath, `${JSON.stringify(record)}\n`, "utf-8");
553
+ const recordLine = JSON.stringify(record);
554
+ fs.appendFileSync(activeSessionsPath, `${recordLine}\n`, "utf-8");
555
+ const validation = (0, llm_output_validator_1.validateJsonlLine)(recordLine);
556
+ if (!validation.valid && validation.errors.length > 0) {
557
+ options.logger.warn(`active_write_integrity_check_failed errors=${validation.errors.join("|")}`);
558
+ }
559
+ if (record.vector_chunks_total && record.vector_chunks_total > 0) {
560
+ options.logger.info(`active_vector_chunks source=${record.id} ok=${record.vector_chunks_ok || 0}/${record.vector_chunks_total}`);
561
+ }
213
562
  options.logger.info(`TS write stored message for session ${args.sessionId}`);
214
563
  return { status: "ok", memory_id: id, quality };
215
564
  }