openclaw-cortex-memory 0.1.0-Alpha.30 → 0.1.0-Alpha.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +46 -12
- package/SIGNATURE.md +7 -0
- package/SKILL.md +18 -3
- package/dist/index.d.ts +8 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +148 -6
- package/dist/index.js.map +1 -1
- package/dist/openclaw.plugin.json +120 -4
- package/dist/src/engine/memory_engine.d.ts +5 -1
- package/dist/src/engine/memory_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.d.ts +116 -0
- package/dist/src/engine/ts_engine.d.ts.map +1 -1
- package/dist/src/engine/ts_engine.js +417 -102
- package/dist/src/engine/ts_engine.js.map +1 -1
- package/dist/src/engine/types.d.ts +17 -0
- package/dist/src/engine/types.d.ts.map +1 -1
- package/dist/src/graph/ontology.d.ts +23 -1
- package/dist/src/graph/ontology.d.ts.map +1 -1
- package/dist/src/graph/ontology.js +743 -70
- package/dist/src/graph/ontology.js.map +1 -1
- package/dist/src/quality/llm_output_validator.d.ts +20 -2
- package/dist/src/quality/llm_output_validator.d.ts.map +1 -1
- package/dist/src/quality/llm_output_validator.js +296 -41
- package/dist/src/quality/llm_output_validator.js.map +1 -1
- package/dist/src/store/archive_store.d.ts +8 -0
- package/dist/src/store/archive_store.d.ts.map +1 -1
- package/dist/src/store/archive_store.js +244 -84
- package/dist/src/store/archive_store.js.map +1 -1
- package/dist/src/store/graph_memory_store.d.ts +72 -2
- package/dist/src/store/graph_memory_store.d.ts.map +1 -1
- package/dist/src/store/graph_memory_store.js +723 -50
- package/dist/src/store/graph_memory_store.js.map +1 -1
- package/dist/src/store/read_store.d.ts +3 -0
- package/dist/src/store/read_store.d.ts.map +1 -1
- package/dist/src/store/read_store.js +1004 -209
- package/dist/src/store/read_store.js.map +1 -1
- package/dist/src/store/vector_store.d.ts +1 -0
- package/dist/src/store/vector_store.d.ts.map +1 -1
- package/dist/src/store/vector_store.js +1 -0
- package/dist/src/store/vector_store.js.map +1 -1
- package/dist/src/store/write_store.d.ts +2 -0
- package/dist/src/store/write_store.d.ts.map +1 -1
- package/dist/src/store/write_store.js +45 -3
- package/dist/src/store/write_store.js.map +1 -1
- package/dist/src/sync/session_sync.d.ts +20 -1
- package/dist/src/sync/session_sync.d.ts.map +1 -1
- package/dist/src/sync/session_sync.js +1810 -161
- package/dist/src/sync/session_sync.js.map +1 -1
- package/dist/src/wiki/wiki_linter.d.ts +25 -0
- package/dist/src/wiki/wiki_linter.d.ts.map +1 -0
- package/dist/src/wiki/wiki_linter.js +268 -0
- package/dist/src/wiki/wiki_linter.js.map +1 -0
- package/dist/src/wiki/wiki_logger.d.ts +10 -0
- package/dist/src/wiki/wiki_logger.d.ts.map +1 -0
- package/dist/src/wiki/wiki_logger.js +78 -0
- package/dist/src/wiki/wiki_logger.js.map +1 -0
- package/dist/src/wiki/wiki_maintainer.d.ts +36 -0
- package/dist/src/wiki/wiki_maintainer.d.ts.map +1 -0
- package/dist/src/wiki/wiki_maintainer.js +38 -0
- package/dist/src/wiki/wiki_maintainer.js.map +1 -0
- package/dist/src/wiki/wiki_projector.d.ts +33 -0
- package/dist/src/wiki/wiki_projector.d.ts.map +1 -0
- package/dist/src/wiki/wiki_projector.js +633 -0
- package/dist/src/wiki/wiki_projector.js.map +1 -0
- package/dist/src/wiki/wiki_queue.d.ts +29 -0
- package/dist/src/wiki/wiki_queue.d.ts.map +1 -0
- package/dist/src/wiki/wiki_queue.js +137 -0
- package/dist/src/wiki/wiki_queue.js.map +1 -0
- package/openclaw.plugin.json +120 -4
- package/package.json +8 -4
- package/schema/graph.schema.yaml +188 -33
- package/skills/cortex-memory/SKILL.md +49 -0
- package/skills/cortex-memory/references/agent-manual.md +115 -0
- package/skills/cortex-memory/references/configuration.md +92 -0
- package/skills/cortex-memory/references/publish-checklist.md +46 -0
- package/skills/cortex-memory/references/system-prompt-template.md +27 -0
- package/skills/cortex-memory/references/tools.md +181 -0
- package/skills/cortex-memory/scripts/smoke-check.ps1 +56 -0
|
@@ -34,9 +34,13 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.normalizeEntityName = normalizeEntityName;
|
|
37
|
+
exports.getEntityMatchKeys = getEntityMatchKeys;
|
|
37
38
|
exports.loadGraphSchema = loadGraphSchema;
|
|
38
39
|
exports.normalizeEventType = normalizeEventType;
|
|
39
40
|
exports.normalizeRelationType = normalizeRelationType;
|
|
41
|
+
exports.isCanonicalRelationType = isCanonicalRelationType;
|
|
42
|
+
exports.getDefaultGraphSchema = getDefaultGraphSchema;
|
|
43
|
+
exports.buildRelationPromptHint = buildRelationPromptHint;
|
|
40
44
|
exports.buildCanonicalId = buildCanonicalId;
|
|
41
45
|
exports.validateRelations = validateRelations;
|
|
42
46
|
exports.normalizeEntityType = normalizeEntityType;
|
|
@@ -113,20 +117,40 @@ const DEFAULT_SCHEMA = {
|
|
|
113
117
|
"Concept",
|
|
114
118
|
"Resource",
|
|
115
119
|
"Document",
|
|
120
|
+
"ConfigFile",
|
|
121
|
+
"Preference",
|
|
122
|
+
"Case",
|
|
123
|
+
"Pattern",
|
|
124
|
+
"Date",
|
|
116
125
|
],
|
|
117
126
|
entityAliases: {
|
|
118
127
|
"OpenClaw": ["openclaw", "插件", "该项目", "本项目"],
|
|
119
128
|
"FamilyMember": ["家人", "家庭成员", "亲人"],
|
|
120
129
|
"Friend": ["朋友", "好友"],
|
|
130
|
+
"Team": ["团队", "小组", "组", "班组"],
|
|
121
131
|
"Location": ["地点", "位置", "住址", "地址"],
|
|
122
132
|
"Event": ["活动", "事情", "事项"],
|
|
123
133
|
"Schedule": ["日程", "安排", "计划表"],
|
|
124
134
|
"Habit": ["习惯", "作息"],
|
|
125
135
|
"HealthItem": ["健康", "体检", "药物", "锻炼"],
|
|
126
136
|
"FinanceItem": ["账单", "支出", "收入", "预算"],
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"
|
|
137
|
+
"Plan": ["计划", "方案", "路线图"],
|
|
138
|
+
"Preference": ["偏好", "习惯选择"],
|
|
139
|
+
"Document": ["文档", "说明文档", "手册", "wiki", "README", "PRD", "方案文档"],
|
|
140
|
+
"Resource": ["资源", "物品", "物件", "设备", "工具", "素材", "资产"],
|
|
141
|
+
"ConfigFile": ["配置文件", "config", "配置"],
|
|
142
|
+
"Decision": ["决策", "决定", "拍板"],
|
|
143
|
+
"Action": ["动作", "操作", "执行"],
|
|
144
|
+
"Risk": ["风险", "隐患"],
|
|
145
|
+
"Blocker": ["阻塞", "卡点", "障碍"],
|
|
146
|
+
"Assumption": ["假设", "前提"],
|
|
147
|
+
"Concept": ["概念", "术语"],
|
|
148
|
+
"Case": ["案例", "case"],
|
|
149
|
+
"Pattern": ["模式", "pattern"],
|
|
150
|
+
"Date": ["日期", "时间", "时间点"],
|
|
151
|
+
"Person": ["我", "自己", "本人", "同事", "客户", "用户", "姓名", "名字", "人名", "成员", "联系人"],
|
|
152
|
+
"Project": ["项目", "工程", "项目线"],
|
|
153
|
+
"Task": ["任务", "待办", "todo", "工单", "事项"],
|
|
130
154
|
"Milestone": ["里程碑", "节点"],
|
|
131
155
|
"Issue": ["问题", "故障", "报错"],
|
|
132
156
|
"Fix": ["修复", "解决方案"],
|
|
@@ -134,43 +158,100 @@ const DEFAULT_SCHEMA = {
|
|
|
134
158
|
relationTypes: [
|
|
135
159
|
"depends_on",
|
|
136
160
|
"blocks",
|
|
137
|
-
"
|
|
161
|
+
"unblocks",
|
|
138
162
|
"causes",
|
|
163
|
+
"impacts",
|
|
139
164
|
"resolves",
|
|
165
|
+
"encountered_bug",
|
|
166
|
+
"solved_with",
|
|
167
|
+
"uses_tech",
|
|
168
|
+
"integrates_with",
|
|
169
|
+
"migrates_to",
|
|
170
|
+
"replaced_by",
|
|
171
|
+
"has_subtask",
|
|
172
|
+
"belongs_to",
|
|
173
|
+
"owned_by",
|
|
174
|
+
"implements",
|
|
175
|
+
"requires",
|
|
140
176
|
"plans_to",
|
|
177
|
+
"planned_for",
|
|
141
178
|
"scheduled_for",
|
|
179
|
+
"references",
|
|
180
|
+
"documents",
|
|
181
|
+
"defined_in",
|
|
182
|
+
"configured_in",
|
|
183
|
+
"supports",
|
|
184
|
+
"conflicts_with",
|
|
185
|
+
"duplicates",
|
|
186
|
+
"supersedes",
|
|
187
|
+
"assigned_to",
|
|
188
|
+
"reviewed_by",
|
|
189
|
+
"approved_by",
|
|
190
|
+
"rejected_by",
|
|
191
|
+
"reported_by",
|
|
142
192
|
"lives_in",
|
|
143
193
|
"cares_for",
|
|
144
194
|
"pays_for",
|
|
145
|
-
"supports",
|
|
146
|
-
"conflicts_with",
|
|
147
|
-
"belongs_to",
|
|
148
|
-
"owned_by",
|
|
149
|
-
"references",
|
|
150
195
|
"prefers",
|
|
151
|
-
"
|
|
152
|
-
"
|
|
196
|
+
"has_spouse",
|
|
197
|
+
"has_child",
|
|
198
|
+
"birthday_on",
|
|
199
|
+
"anniversary_on",
|
|
153
200
|
],
|
|
154
201
|
relationTypeAliases: {
|
|
155
202
|
dependency: "depends_on",
|
|
156
203
|
blocked_by: "blocks",
|
|
157
|
-
|
|
204
|
+
unblock: "unblocks",
|
|
205
|
+
impact: "impacts",
|
|
158
206
|
plan_to: "plans_to",
|
|
207
|
+
plan_for: "planned_for",
|
|
159
208
|
schedule_for: "scheduled_for",
|
|
160
209
|
located_in: "lives_in",
|
|
161
210
|
care_for: "cares_for",
|
|
162
211
|
pay_for: "pays_for",
|
|
163
212
|
support: "supports",
|
|
164
213
|
conflict_with: "conflicts_with",
|
|
214
|
+
use_tech: "uses_tech",
|
|
215
|
+
tech_stack: "uses_tech",
|
|
216
|
+
integrate_with: "integrates_with",
|
|
217
|
+
migrate_to: "migrates_to",
|
|
218
|
+
replace_by: "replaced_by",
|
|
219
|
+
replace_with: "replaced_by",
|
|
220
|
+
bug: "encountered_bug",
|
|
221
|
+
bug_on: "encountered_bug",
|
|
222
|
+
fix_with: "solved_with",
|
|
223
|
+
solve_with: "solved_with",
|
|
224
|
+
solved_by: "solved_with",
|
|
225
|
+
subtask_of: "has_subtask",
|
|
226
|
+
child_task: "has_subtask",
|
|
227
|
+
documented_by: "documents",
|
|
228
|
+
defined_by: "defined_in",
|
|
229
|
+
config_in: "configured_in",
|
|
230
|
+
duplicate_of: "duplicates",
|
|
231
|
+
superseded_by: "supersedes",
|
|
232
|
+
assign_to: "assigned_to",
|
|
233
|
+
review_by: "reviewed_by",
|
|
234
|
+
approve_by: "approved_by",
|
|
235
|
+
reject_by: "rejected_by",
|
|
236
|
+
report_by: "reported_by",
|
|
165
237
|
"依赖于": "depends_on",
|
|
166
238
|
"依赖": "depends_on",
|
|
167
239
|
"取决于": "depends_on",
|
|
168
240
|
"阻塞": "blocks",
|
|
169
241
|
"卡住": "blocks",
|
|
242
|
+
"解除阻塞": "unblocks",
|
|
170
243
|
"导致": "causes",
|
|
171
244
|
"引起": "causes",
|
|
245
|
+
"影响": "impacts",
|
|
172
246
|
"解决": "resolves",
|
|
173
247
|
"修复": "resolves",
|
|
248
|
+
"遇到报错": "encountered_bug",
|
|
249
|
+
"通过": "solved_with",
|
|
250
|
+
"使用技术": "uses_tech",
|
|
251
|
+
"集成": "integrates_with",
|
|
252
|
+
"迁移到": "migrates_to",
|
|
253
|
+
"被替代": "replaced_by",
|
|
254
|
+
"子任务": "has_subtask",
|
|
174
255
|
"属于": "belongs_to",
|
|
175
256
|
"归属": "belongs_to",
|
|
176
257
|
"负责": "owned_by",
|
|
@@ -183,6 +264,7 @@ const DEFAULT_SCHEMA = {
|
|
|
183
264
|
"需要": "requires",
|
|
184
265
|
"计划做": "plans_to",
|
|
185
266
|
"打算": "plans_to",
|
|
267
|
+
"计划于": "planned_for",
|
|
186
268
|
"安排在": "scheduled_for",
|
|
187
269
|
"约在": "scheduled_for",
|
|
188
270
|
"住在": "lives_in",
|
|
@@ -194,14 +276,33 @@ const DEFAULT_SCHEMA = {
|
|
|
194
276
|
"支持": "supports",
|
|
195
277
|
"冲突": "conflicts_with",
|
|
196
278
|
"矛盾": "conflicts_with",
|
|
197
|
-
"
|
|
198
|
-
"
|
|
279
|
+
"记录": "documents",
|
|
280
|
+
"定义于": "defined_in",
|
|
281
|
+
"配置于": "configured_in",
|
|
282
|
+
"重复": "duplicates",
|
|
283
|
+
"取代": "supersedes",
|
|
284
|
+
"分配给": "assigned_to",
|
|
285
|
+
"评审": "reviewed_by",
|
|
286
|
+
"批准": "approved_by",
|
|
287
|
+
"拒绝": "rejected_by",
|
|
288
|
+
"报告": "reported_by",
|
|
199
289
|
belongs: "belongs_to",
|
|
200
290
|
owner_of: "owned_by",
|
|
201
291
|
refer_to: "references",
|
|
202
292
|
preference_for: "prefers",
|
|
203
293
|
implement: "implements",
|
|
204
294
|
need: "requires",
|
|
295
|
+
technology: "uses_tech",
|
|
296
|
+
encountered_issue: "encountered_bug",
|
|
297
|
+
spouse: "has_spouse",
|
|
298
|
+
wife_of: "has_spouse",
|
|
299
|
+
husband_of: "has_spouse",
|
|
300
|
+
child_of: "has_child",
|
|
301
|
+
parent_of: "has_child",
|
|
302
|
+
birthday: "birthday_on",
|
|
303
|
+
born_on: "birthday_on",
|
|
304
|
+
anniversary: "anniversary_on",
|
|
305
|
+
married_on: "anniversary_on",
|
|
205
306
|
},
|
|
206
307
|
relationRules: [
|
|
207
308
|
{ type: "depends_on", fromTypes: ["Task", "Plan", "Milestone"], toTypes: ["Task", "Plan", "Milestone"], allowSelfLoop: false },
|
|
@@ -210,10 +311,35 @@ const DEFAULT_SCHEMA = {
|
|
|
210
311
|
{ type: "resolves", fromTypes: ["Fix", "Decision", "Action"], toTypes: ["Issue", "Blocker"], allowSelfLoop: false },
|
|
211
312
|
{ type: "belongs_to", fromTypes: ["Task", "Issue", "Fix", "Decision"], toTypes: ["Project", "Plan", "Milestone"], allowSelfLoop: false },
|
|
212
313
|
{ type: "owned_by", fromTypes: ["Task", "Plan", "Project", "Issue"], toTypes: ["Person", "Team"], allowSelfLoop: false },
|
|
314
|
+
{ type: "uses_tech", fromTypes: ["Project", "Task", "Fix", "Action"], toTypes: ["Resource", "Document", "Concept", "Project"], allowSelfLoop: false },
|
|
315
|
+
{ type: "encountered_bug", fromTypes: ["Project", "Task", "Action"], toTypes: ["Issue", "Blocker"], allowSelfLoop: false },
|
|
316
|
+
{ type: "solved_with", fromTypes: ["Issue", "Blocker"], toTypes: ["Fix", "Action", "Decision", "Resource"], allowSelfLoop: false },
|
|
317
|
+
{ type: "has_subtask", fromTypes: ["Project", "Plan", "Milestone", "Task"], toTypes: ["Task"], allowSelfLoop: false },
|
|
318
|
+
{ type: "planned_for", fromTypes: ["Task", "Plan", "Milestone"], toTypes: ["Date", "Schedule", "Milestone"], allowSelfLoop: false },
|
|
213
319
|
],
|
|
214
|
-
highValueRelationTypes: [
|
|
215
|
-
|
|
216
|
-
|
|
320
|
+
highValueRelationTypes: [
|
|
321
|
+
"depends_on",
|
|
322
|
+
"blocks",
|
|
323
|
+
"unblocks",
|
|
324
|
+
"causes",
|
|
325
|
+
"impacts",
|
|
326
|
+
"resolves",
|
|
327
|
+
"encountered_bug",
|
|
328
|
+
"solved_with",
|
|
329
|
+
"uses_tech",
|
|
330
|
+
"integrates_with",
|
|
331
|
+
"migrates_to",
|
|
332
|
+
"replaced_by",
|
|
333
|
+
"has_subtask",
|
|
334
|
+
"belongs_to",
|
|
335
|
+
"owned_by",
|
|
336
|
+
"implements",
|
|
337
|
+
"requires",
|
|
338
|
+
"planned_for",
|
|
339
|
+
"scheduled_for",
|
|
340
|
+
],
|
|
341
|
+
relatedToMaxRatio: 0,
|
|
342
|
+
relatedToMaxAbsolute: 0,
|
|
217
343
|
minRelationConfidence: 0.35,
|
|
218
344
|
evidenceSpanRequired: true,
|
|
219
345
|
endpointMentionRequired: true,
|
|
@@ -258,28 +384,127 @@ function sanitizeEntityAliases(input) {
|
|
|
258
384
|
}
|
|
259
385
|
return Object.keys(output).length > 0 ? output : DEFAULT_SCHEMA.entityAliases;
|
|
260
386
|
}
|
|
387
|
+
function normalizeAliasKey(value) {
|
|
388
|
+
return value
|
|
389
|
+
.normalize("NFKC")
|
|
390
|
+
.toLowerCase()
|
|
391
|
+
.replace(/[`"'“”‘’]/g, "")
|
|
392
|
+
.replace(/[【】[\]{}()<>]/g, " ")
|
|
393
|
+
.replace(/[_\-\/\\|]+/g, " ")
|
|
394
|
+
.replace(/[,:;!?]+/g, " ")
|
|
395
|
+
.replace(/\s+/g, " ")
|
|
396
|
+
.trim();
|
|
397
|
+
}
|
|
398
|
+
function buildEntityLookupKeys(value) {
|
|
399
|
+
const trimmed = value.trim();
|
|
400
|
+
if (!trimmed) {
|
|
401
|
+
return [];
|
|
402
|
+
}
|
|
403
|
+
const normalized = normalizeAliasKey(trimmed);
|
|
404
|
+
const compact = normalized.replace(/\s+/g, "");
|
|
405
|
+
const keys = new Set();
|
|
406
|
+
keys.add(trimmed.toLowerCase());
|
|
407
|
+
if (normalized) {
|
|
408
|
+
keys.add(normalized);
|
|
409
|
+
}
|
|
410
|
+
if (compact) {
|
|
411
|
+
keys.add(compact);
|
|
412
|
+
}
|
|
413
|
+
return [...keys];
|
|
414
|
+
}
|
|
415
|
+
function chooseCanonicalAlias(leftRaw, rightRaw) {
|
|
416
|
+
const left = leftRaw.trim();
|
|
417
|
+
const right = rightRaw.trim();
|
|
418
|
+
if (!left)
|
|
419
|
+
return right;
|
|
420
|
+
if (!right)
|
|
421
|
+
return left;
|
|
422
|
+
const leftAscii = /[A-Za-z]/.test(left);
|
|
423
|
+
const rightAscii = /[A-Za-z]/.test(right);
|
|
424
|
+
if (leftAscii && !rightAscii)
|
|
425
|
+
return left;
|
|
426
|
+
if (!leftAscii && rightAscii)
|
|
427
|
+
return right;
|
|
428
|
+
return left.length >= right.length ? left : right;
|
|
429
|
+
}
|
|
430
|
+
function buildRuntimeAliasLookup(sourceText) {
|
|
431
|
+
const lookup = new Map();
|
|
432
|
+
const text = (sourceText || "").trim();
|
|
433
|
+
if (!text) {
|
|
434
|
+
return lookup;
|
|
435
|
+
}
|
|
436
|
+
const pairPattern = /([^()\n()]{1,80})\s*[((]\s*([^()\n()]{1,80})\s*[))]/g;
|
|
437
|
+
let matched = pairPattern.exec(text);
|
|
438
|
+
while (matched) {
|
|
439
|
+
const left = (matched[1] || "").trim();
|
|
440
|
+
const right = (matched[2] || "").trim();
|
|
441
|
+
if (left && right && left !== right) {
|
|
442
|
+
const canonical = chooseCanonicalAlias(left, right);
|
|
443
|
+
const alias = canonical === left ? right : left;
|
|
444
|
+
for (const key of buildEntityLookupKeys(alias)) {
|
|
445
|
+
lookup.set(key, canonical);
|
|
446
|
+
}
|
|
447
|
+
for (const key of buildEntityLookupKeys(canonical)) {
|
|
448
|
+
lookup.set(key, canonical);
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
matched = pairPattern.exec(text);
|
|
452
|
+
}
|
|
453
|
+
return lookup;
|
|
454
|
+
}
|
|
261
455
|
function buildAliasLookup(schema) {
|
|
262
456
|
const lookup = new Map();
|
|
263
457
|
for (const [canonical, aliases] of Object.entries(schema.entityAliases || {})) {
|
|
264
458
|
const normalizedCanonical = canonical.trim();
|
|
265
459
|
if (!normalizedCanonical)
|
|
266
460
|
continue;
|
|
267
|
-
|
|
461
|
+
for (const key of buildEntityLookupKeys(normalizedCanonical)) {
|
|
462
|
+
lookup.set(key, normalizedCanonical);
|
|
463
|
+
}
|
|
268
464
|
for (const alias of aliases || []) {
|
|
269
|
-
const
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
lookup.set(normalizedAlias, normalizedCanonical);
|
|
465
|
+
for (const key of buildEntityLookupKeys(alias)) {
|
|
466
|
+
lookup.set(key, normalizedCanonical);
|
|
467
|
+
}
|
|
273
468
|
}
|
|
274
469
|
}
|
|
275
470
|
return lookup;
|
|
276
471
|
}
|
|
277
|
-
function normalizeEntityName(raw, schema) {
|
|
472
|
+
function normalizeEntityName(raw, schema, runtimeAliasLookup) {
|
|
278
473
|
const value = raw.trim();
|
|
279
474
|
if (!value)
|
|
280
475
|
return "";
|
|
281
476
|
const lookup = buildAliasLookup(schema);
|
|
282
|
-
|
|
477
|
+
for (const key of buildEntityLookupKeys(value)) {
|
|
478
|
+
const runtimeMapped = runtimeAliasLookup?.get(key);
|
|
479
|
+
if (runtimeMapped) {
|
|
480
|
+
return runtimeMapped;
|
|
481
|
+
}
|
|
482
|
+
const schemaMapped = lookup.get(key);
|
|
483
|
+
if (schemaMapped) {
|
|
484
|
+
return schemaMapped;
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
return value;
|
|
488
|
+
}
|
|
489
|
+
function getEntityMatchKeys(raw, schema) {
|
|
490
|
+
const value = raw.trim();
|
|
491
|
+
if (!value) {
|
|
492
|
+
return [];
|
|
493
|
+
}
|
|
494
|
+
const canonical = normalizeEntityName(value, schema);
|
|
495
|
+
const keys = new Set();
|
|
496
|
+
for (const key of buildEntityLookupKeys(value)) {
|
|
497
|
+
keys.add(key);
|
|
498
|
+
}
|
|
499
|
+
for (const key of buildEntityLookupKeys(canonical)) {
|
|
500
|
+
keys.add(key);
|
|
501
|
+
}
|
|
502
|
+
for (const alias of schema.entityAliases[canonical] || []) {
|
|
503
|
+
for (const key of buildEntityLookupKeys(alias)) {
|
|
504
|
+
keys.add(key);
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
return [...keys];
|
|
283
508
|
}
|
|
284
509
|
function tokenizeForMatch(value) {
|
|
285
510
|
return value.toLowerCase().replace(/[_-]+/g, " ").replace(/\s+/g, " ").trim();
|
|
@@ -288,11 +513,12 @@ function entityMentionedInText(entity, sourceText, schema) {
|
|
|
288
513
|
const text = tokenizeForMatch(sourceText || "");
|
|
289
514
|
if (!text)
|
|
290
515
|
return false;
|
|
291
|
-
const
|
|
516
|
+
const canonical = normalizeEntityName(entity, schema);
|
|
517
|
+
const target = tokenizeForMatch(canonical || entity);
|
|
292
518
|
if (target && text.includes(target)) {
|
|
293
519
|
return true;
|
|
294
520
|
}
|
|
295
|
-
const aliases = schema.entityAliases[
|
|
521
|
+
const aliases = schema.entityAliases[canonical] || [];
|
|
296
522
|
for (const alias of aliases) {
|
|
297
523
|
const normalized = tokenizeForMatch(alias);
|
|
298
524
|
if (normalized && text.includes(normalized)) {
|
|
@@ -352,6 +578,9 @@ function normalizeEventType(raw, schema) {
|
|
|
352
578
|
}
|
|
353
579
|
function normalizeRelationType(raw, schema) {
|
|
354
580
|
const value = raw.trim().toLowerCase();
|
|
581
|
+
if (!value) {
|
|
582
|
+
return "";
|
|
583
|
+
}
|
|
355
584
|
const relationTypes = new Set(schema.relationTypes.map(item => item.toLowerCase()));
|
|
356
585
|
const aliases = toLowerMap(schema.relationTypeAliases);
|
|
357
586
|
if (relationTypes.has(value)) {
|
|
@@ -361,7 +590,346 @@ function normalizeRelationType(raw, schema) {
|
|
|
361
590
|
if (mapped) {
|
|
362
591
|
return mapped;
|
|
363
592
|
}
|
|
364
|
-
|
|
593
|
+
const snakeCase = value.replace(/[^a-z0-9]+/g, "_").replace(/^_+|_+$/g, "");
|
|
594
|
+
if (/^[a-z][a-z0-9_]*$/.test(snakeCase)) {
|
|
595
|
+
return snakeCase;
|
|
596
|
+
}
|
|
597
|
+
return "";
|
|
598
|
+
}
|
|
599
|
+
function isCanonicalRelationType(type, schema) {
|
|
600
|
+
const value = type.trim().toLowerCase();
|
|
601
|
+
if (!value) {
|
|
602
|
+
return false;
|
|
603
|
+
}
|
|
604
|
+
const relationTypes = new Set(schema.relationTypes.map(item => item.toLowerCase()));
|
|
605
|
+
return relationTypes.has(value);
|
|
606
|
+
}
|
|
607
|
+
function getDefaultGraphSchema() {
|
|
608
|
+
return DEFAULT_SCHEMA;
|
|
609
|
+
}
|
|
610
|
+
function buildRelationPromptHint(schema) {
|
|
611
|
+
return [
|
|
612
|
+
`Allowed canonical relation types: ${schema.relationTypes.join(", ")}.`,
|
|
613
|
+
"Never use related_to.",
|
|
614
|
+
"If no canonical relation fits, create a snake_case custom relation, set relation_origin=llm_custom, and include relation_definition.",
|
|
615
|
+
].join(" ");
|
|
616
|
+
}
|
|
617
|
+
const GENERIC_ENTITY_BLOCKLIST = new Set([
|
|
618
|
+
"用户",
|
|
619
|
+
"我",
|
|
620
|
+
"我们",
|
|
621
|
+
"你",
|
|
622
|
+
"你们",
|
|
623
|
+
"他",
|
|
624
|
+
"她",
|
|
625
|
+
"他们",
|
|
626
|
+
"问题",
|
|
627
|
+
"方案",
|
|
628
|
+
"实体",
|
|
629
|
+
"系统",
|
|
630
|
+
"task",
|
|
631
|
+
"issue",
|
|
632
|
+
"solution",
|
|
633
|
+
"system",
|
|
634
|
+
"person",
|
|
635
|
+
"user",
|
|
636
|
+
"thing",
|
|
637
|
+
]);
|
|
638
|
+
function isGenericEntityName(raw) {
|
|
639
|
+
const value = normalizeAliasKey(String(raw || ""));
|
|
640
|
+
return value ? GENERIC_ENTITY_BLOCKLIST.has(value) : false;
|
|
641
|
+
}
|
|
642
|
+
function collectEntitiesFromRelations(relations, schema, runtimeAliasLookup) {
|
|
643
|
+
const output = new Set();
|
|
644
|
+
for (const relation of relations) {
|
|
645
|
+
const source = normalizeEntityName(relation.source || "", schema, runtimeAliasLookup);
|
|
646
|
+
const target = normalizeEntityName(relation.target || "", schema, runtimeAliasLookup);
|
|
647
|
+
if (source)
|
|
648
|
+
output.add(source);
|
|
649
|
+
if (target)
|
|
650
|
+
output.add(target);
|
|
651
|
+
}
|
|
652
|
+
return [...output];
|
|
653
|
+
}
|
|
654
|
+
function extractResourceReferences(sourceText) {
|
|
655
|
+
const text = (sourceText || "").trim();
|
|
656
|
+
if (!text) {
|
|
657
|
+
return [];
|
|
658
|
+
}
|
|
659
|
+
const output = new Set();
|
|
660
|
+
const urlMatches = text.match(/https?:\/\/[^\s)>"'`]+|www\.[^\s)>"'`]+/gi) || [];
|
|
661
|
+
const normalizedUrls = urlMatches.map(item => item.trim()).filter(Boolean);
|
|
662
|
+
for (const item of urlMatches) {
|
|
663
|
+
output.add(item.trim());
|
|
664
|
+
}
|
|
665
|
+
const pathMatches = text.match(/[A-Za-z]:\\[^\s"']+|(?:\.{0,2}\/)?(?:[\w.-]+\/)+[\w.-]+\.[A-Za-z0-9]{1,12}/g) || [];
|
|
666
|
+
for (const item of pathMatches) {
|
|
667
|
+
const value = item.trim();
|
|
668
|
+
const compact = value.replace(/^\.\/+/, "").replace(/^\/+/, "");
|
|
669
|
+
const coveredByUrl = normalizedUrls.some(url => url.includes(value) || (compact && url.includes(compact)));
|
|
670
|
+
if (coveredByUrl) {
|
|
671
|
+
continue;
|
|
672
|
+
}
|
|
673
|
+
if (value.length >= 4) {
|
|
674
|
+
output.add(value);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
677
|
+
return [...output].slice(0, 12);
|
|
678
|
+
}
|
|
679
|
+
function inferEntityTypeFromName(entity, schema) {
|
|
680
|
+
const valid = new Set(schema.entityTypes);
|
|
681
|
+
const value = entity.trim();
|
|
682
|
+
if (!value) {
|
|
683
|
+
return schema.defaultEntityType;
|
|
684
|
+
}
|
|
685
|
+
if (valid.has("Date") && /(?:\d{4}-\d{2}-\d{2}|\d{1,2}月\d{1,2}日|\d{1,2}[/-]\d{1,2})/.test(value)) {
|
|
686
|
+
return "Date";
|
|
687
|
+
}
|
|
688
|
+
if (valid.has("Resource") && /^(https?:\/\/|www\.)/i.test(value)) {
|
|
689
|
+
return "Resource";
|
|
690
|
+
}
|
|
691
|
+
if (valid.has("Document")
|
|
692
|
+
&& (/([/\\].+\.[A-Za-z0-9]{1,12})$/.test(value) || /\.(md|txt|pdf|docx?|pptx?|xlsx?|json|yaml|yml|xml|html?)$/i.test(value))) {
|
|
693
|
+
return "Document";
|
|
694
|
+
}
|
|
695
|
+
if (valid.has("Team") && /(team|org|organization|团队|组织|公司)/i.test(value)) {
|
|
696
|
+
return "Team";
|
|
697
|
+
}
|
|
698
|
+
if (valid.has("Project") && /(project|repo|仓库|项目|工程)/i.test(value)) {
|
|
699
|
+
return "Project";
|
|
700
|
+
}
|
|
701
|
+
return schema.defaultEntityType;
|
|
702
|
+
}
|
|
703
|
+
function inferEvidenceSpanFromSource(sourceText, candidates) {
|
|
704
|
+
const text = (sourceText || "").trim();
|
|
705
|
+
if (!text) {
|
|
706
|
+
return undefined;
|
|
707
|
+
}
|
|
708
|
+
const normalizedText = tokenizeForMatch(text);
|
|
709
|
+
const uniqueCandidates = [...new Set(candidates.map(item => item.trim()).filter(Boolean))]
|
|
710
|
+
.sort((a, b) => b.length - a.length);
|
|
711
|
+
for (const candidate of uniqueCandidates) {
|
|
712
|
+
const normalized = tokenizeForMatch(candidate);
|
|
713
|
+
if (normalized && normalizedText.includes(normalized)) {
|
|
714
|
+
return candidate;
|
|
715
|
+
}
|
|
716
|
+
}
|
|
717
|
+
return undefined;
|
|
718
|
+
}
|
|
719
|
+
function inferContextChunkFromSource(sourceText, anchors) {
|
|
720
|
+
const text = (sourceText || "").trim().replace(/\s+/g, " ");
|
|
721
|
+
if (!text)
|
|
722
|
+
return undefined;
|
|
723
|
+
const normalizedAnchors = anchors.map(item => String(item || "").trim()).filter(Boolean);
|
|
724
|
+
let hitIndex = -1;
|
|
725
|
+
let hitAnchor = "";
|
|
726
|
+
for (const anchor of normalizedAnchors) {
|
|
727
|
+
const idx = text.indexOf(anchor);
|
|
728
|
+
if (idx >= 0) {
|
|
729
|
+
hitIndex = idx;
|
|
730
|
+
hitAnchor = anchor;
|
|
731
|
+
break;
|
|
732
|
+
}
|
|
733
|
+
}
|
|
734
|
+
if (hitIndex < 0) {
|
|
735
|
+
const fallback = text.slice(0, Math.min(text.length, 100)).trim();
|
|
736
|
+
return fallback || undefined;
|
|
737
|
+
}
|
|
738
|
+
const targetLength = 80;
|
|
739
|
+
const minLength = 50;
|
|
740
|
+
const maxLength = 120;
|
|
741
|
+
let start = Math.max(0, hitIndex - Math.floor((targetLength - hitAnchor.length) / 2));
|
|
742
|
+
let end = Math.min(text.length, start + targetLength);
|
|
743
|
+
if ((end - start) < minLength) {
|
|
744
|
+
end = Math.min(text.length, start + minLength);
|
|
745
|
+
}
|
|
746
|
+
if ((end - start) > maxLength) {
|
|
747
|
+
end = start + maxLength;
|
|
748
|
+
}
|
|
749
|
+
if (end >= text.length && (end - start) < minLength) {
|
|
750
|
+
start = Math.max(0, end - minLength);
|
|
751
|
+
}
|
|
752
|
+
const chunk = text.slice(start, end).trim();
|
|
753
|
+
return chunk || undefined;
|
|
754
|
+
}
|
|
755
|
+
function summaryMentionsEntity(summary, entity, schema, runtimeAliasLookup) {
|
|
756
|
+
const normalizedSummary = tokenizeForMatch(summary || "");
|
|
757
|
+
if (!normalizedSummary) {
|
|
758
|
+
return false;
|
|
759
|
+
}
|
|
760
|
+
const canonical = normalizeEntityName(entity, schema, runtimeAliasLookup);
|
|
761
|
+
const candidates = new Set([
|
|
762
|
+
entity,
|
|
763
|
+
canonical,
|
|
764
|
+
...(schema.entityAliases[canonical] || []),
|
|
765
|
+
]);
|
|
766
|
+
for (const candidateRaw of candidates) {
|
|
767
|
+
const candidate = tokenizeForMatch(candidateRaw || "");
|
|
768
|
+
if (candidate && normalizedSummary.includes(candidate)) {
|
|
769
|
+
return true;
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
return false;
|
|
773
|
+
}
|
|
774
|
+
function missingEntitiesInSummary(args) {
|
|
775
|
+
const missing = [];
|
|
776
|
+
for (const entity of args.entities) {
|
|
777
|
+
if (!summaryMentionsEntity(args.summary, entity, args.schema, args.runtimeAliasLookup)) {
|
|
778
|
+
missing.push(entity);
|
|
779
|
+
}
|
|
780
|
+
}
|
|
781
|
+
return missing;
|
|
782
|
+
}
|
|
783
|
+
function normalizeSourceTextNav(args) {
|
|
784
|
+
const nav = args.sourceTextNav || {};
|
|
785
|
+
const layerRaw = typeof nav.layer === "string" ? nav.layer.trim() : "";
|
|
786
|
+
const layer = layerRaw === "archive_event" || layerRaw === "active_only"
|
|
787
|
+
? layerRaw
|
|
788
|
+
: args.sourceLayer;
|
|
789
|
+
const sourceEventId = (typeof nav.source_event_id === "string" ? nav.source_event_id : "").trim()
|
|
790
|
+
|| args.sourceEventId.trim()
|
|
791
|
+
|| (typeof args.archiveEventId === "string" ? args.archiveEventId.trim() : "");
|
|
792
|
+
const sourceMemoryId = (typeof nav.source_memory_id === "string" ? nav.source_memory_id : "").trim()
|
|
793
|
+
|| sourceEventId;
|
|
794
|
+
const sessionId = (typeof nav.session_id === "string" ? nav.session_id : "").trim()
|
|
795
|
+
|| args.sessionId.trim();
|
|
796
|
+
const sourceFile = (typeof nav.source_file === "string" ? nav.source_file : "").trim()
|
|
797
|
+
|| (typeof args.sourceFile === "string" ? args.sourceFile.trim() : "");
|
|
798
|
+
const fulltextAnchor = typeof nav.fulltext_anchor === "string" ? nav.fulltext_anchor.trim() : "";
|
|
799
|
+
if (!layer || !sessionId || !sourceFile || !sourceEventId || !sourceMemoryId) {
|
|
800
|
+
return null;
|
|
801
|
+
}
|
|
802
|
+
return {
|
|
803
|
+
layer,
|
|
804
|
+
session_id: sessionId,
|
|
805
|
+
source_file: sourceFile,
|
|
806
|
+
source_memory_id: sourceMemoryId,
|
|
807
|
+
source_event_id: sourceEventId,
|
|
808
|
+
fulltext_anchor: fulltextAnchor || undefined,
|
|
809
|
+
};
|
|
810
|
+
}
|
|
811
|
+
function shouldRetryWithFallbackRelations(rejectedReasons) {
|
|
812
|
+
const hardStopReasons = new Set([
|
|
813
|
+
"missing_relation_confidence",
|
|
814
|
+
"missing_evidence_span",
|
|
815
|
+
"low_relation_confidence",
|
|
816
|
+
"empty_edge",
|
|
817
|
+
]);
|
|
818
|
+
for (const reason of rejectedReasons) {
|
|
819
|
+
if (hardStopReasons.has(reason)) {
|
|
820
|
+
return false;
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
return true;
|
|
824
|
+
}
|
|
825
|
+
function buildFallbackRelations(args) {
|
|
826
|
+
const output = [];
|
|
827
|
+
const dedupe = new Set();
|
|
828
|
+
const entitySet = new Set(args.entities);
|
|
829
|
+
const sourceText = (args.sourceText || "").trim();
|
|
830
|
+
const fallbackConfidence = Math.max(args.schema.minRelationConfidence + 0.05, 0.55);
|
|
831
|
+
const pushRelation = (relation) => {
|
|
832
|
+
const source = normalizeEntityName(relation.source || "", args.schema, args.runtimeAliasLookup);
|
|
833
|
+
const target = normalizeEntityName(relation.target || "", args.schema, args.runtimeAliasLookup);
|
|
834
|
+
const type = normalizeRelationType(relation.type || "", args.schema);
|
|
835
|
+
const isCanonical = isCanonicalRelationType(type, args.schema);
|
|
836
|
+
const relationOrigin = relation.relation_origin || (isCanonical ? "canonical" : "llm_custom");
|
|
837
|
+
const relationDefinition = typeof relation.relation_definition === "string" ? relation.relation_definition.trim() : "";
|
|
838
|
+
const evidenceSpan = typeof relation.evidence_span === "string" ? relation.evidence_span.trim() : "";
|
|
839
|
+
const contextChunkRaw = typeof relation.context_chunk === "string" ? relation.context_chunk.trim() : "";
|
|
840
|
+
const contextChunk = contextChunkRaw || inferContextChunkFromSource(sourceText, [evidenceSpan, source, target].filter(Boolean));
|
|
841
|
+
const confidence = typeof relation.confidence === "number"
|
|
842
|
+
? Math.max(0, Math.min(1, relation.confidence))
|
|
843
|
+
: fallbackConfidence;
|
|
844
|
+
if (!source || !target || source === target)
|
|
845
|
+
return;
|
|
846
|
+
if (!entitySet.has(source) || !entitySet.has(target))
|
|
847
|
+
return;
|
|
848
|
+
if (!type || type === "related_to")
|
|
849
|
+
return;
|
|
850
|
+
if (relationOrigin === "llm_custom" && !relationDefinition)
|
|
851
|
+
return;
|
|
852
|
+
if (!evidenceSpan)
|
|
853
|
+
return;
|
|
854
|
+
const key = `${source}|${type}|${target}`;
|
|
855
|
+
if (dedupe.has(key))
|
|
856
|
+
return;
|
|
857
|
+
dedupe.add(key);
|
|
858
|
+
output.push({
|
|
859
|
+
source,
|
|
860
|
+
target,
|
|
861
|
+
type,
|
|
862
|
+
relation_origin: relationOrigin,
|
|
863
|
+
relation_definition: relationDefinition || undefined,
|
|
864
|
+
mapping_hint: typeof relation.mapping_hint === "string" ? relation.mapping_hint.trim() || undefined : undefined,
|
|
865
|
+
evidence_span: evidenceSpan,
|
|
866
|
+
context_chunk: contextChunk,
|
|
867
|
+
confidence,
|
|
868
|
+
});
|
|
869
|
+
};
|
|
870
|
+
for (const relation of args.relations) {
|
|
871
|
+
const sourceRaw = (relation.source || "").trim();
|
|
872
|
+
const targetRaw = (relation.target || "").trim();
|
|
873
|
+
const evidence = (typeof relation.evidence_span === "string" && relation.evidence_span.trim())
|
|
874
|
+
|| inferEvidenceSpanFromSource(sourceText, [sourceRaw, targetRaw])
|
|
875
|
+
|| "";
|
|
876
|
+
pushRelation({
|
|
877
|
+
source: sourceRaw,
|
|
878
|
+
target: targetRaw,
|
|
879
|
+
type: relation.type || "",
|
|
880
|
+
relation_origin: relation.relation_origin,
|
|
881
|
+
relation_definition: relation.relation_definition,
|
|
882
|
+
mapping_hint: relation.mapping_hint,
|
|
883
|
+
evidence_span: evidence,
|
|
884
|
+
context_chunk: inferContextChunkFromSource(sourceText, [evidence, sourceRaw, targetRaw].filter(Boolean)),
|
|
885
|
+
confidence: typeof relation.confidence === "number" ? relation.confidence : fallbackConfidence,
|
|
886
|
+
});
|
|
887
|
+
}
|
|
888
|
+
if (output.length === 0) {
|
|
889
|
+
const resources = args.entities.filter(entity => {
|
|
890
|
+
const type = (args.entityTypes[entity] || "").trim();
|
|
891
|
+
return type === "Resource" || type === "Document";
|
|
892
|
+
});
|
|
893
|
+
const anchors = args.entities.filter(entity => !resources.includes(entity) && (args.entityTypes[entity] || "").trim() !== "Date");
|
|
894
|
+
const anchor = anchors[0];
|
|
895
|
+
if (anchor) {
|
|
896
|
+
for (const resource of resources.slice(0, 3)) {
|
|
897
|
+
const evidence = inferEvidenceSpanFromSource(sourceText, [resource, anchor]) || "";
|
|
898
|
+
pushRelation({
|
|
899
|
+
source: anchor,
|
|
900
|
+
target: resource,
|
|
901
|
+
type: "references",
|
|
902
|
+
evidence_span: evidence,
|
|
903
|
+
context_chunk: inferContextChunkFromSource(sourceText, [evidence, anchor, resource].filter(Boolean)),
|
|
904
|
+
confidence: fallbackConfidence,
|
|
905
|
+
});
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
}
|
|
909
|
+
if (output.length === 0) {
|
|
910
|
+
const nonDateEntities = args.entities.filter(entity => (args.entityTypes[entity] || "").trim() !== "Date");
|
|
911
|
+
if (nonDateEntities.length >= 2) {
|
|
912
|
+
const source = nonDateEntities[0];
|
|
913
|
+
for (const target of nonDateEntities.slice(1)) {
|
|
914
|
+
const evidence = inferEvidenceSpanFromSource(sourceText, [source, target]) || "";
|
|
915
|
+
pushRelation({
|
|
916
|
+
source,
|
|
917
|
+
target,
|
|
918
|
+
type: "co_occurs_with",
|
|
919
|
+
relation_origin: "llm_custom",
|
|
920
|
+
relation_definition: "Source and target are explicitly co-mentioned within the same source chunk.",
|
|
921
|
+
mapping_hint: "references",
|
|
922
|
+
evidence_span: evidence,
|
|
923
|
+
context_chunk: inferContextChunkFromSource(sourceText, [evidence, source, target].filter(Boolean)),
|
|
924
|
+
confidence: fallbackConfidence,
|
|
925
|
+
});
|
|
926
|
+
if (output.length > 0) {
|
|
927
|
+
break;
|
|
928
|
+
}
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
}
|
|
932
|
+
return output;
|
|
365
933
|
}
|
|
366
934
|
function buildCanonicalId(args) {
|
|
367
935
|
const entities = (args.entities || []).map(item => item.trim().toLowerCase()).filter(Boolean).sort();
|
|
@@ -382,32 +950,78 @@ function validateRelations(args) {
|
|
|
382
950
|
const accepted = [];
|
|
383
951
|
const rejected = [];
|
|
384
952
|
const warnings = [];
|
|
385
|
-
const
|
|
953
|
+
const runtimeAliasLookup = args.runtimeAliasLookup || new Map();
|
|
954
|
+
const normalizedSourceText = (args.sourceText || "").trim().replace(/\s+/g, " ");
|
|
955
|
+
const entitySet = new Set(args.entities
|
|
956
|
+
.map(item => normalizeEntityName(item, args.schema, runtimeAliasLookup))
|
|
957
|
+
.filter(Boolean));
|
|
386
958
|
const rules = toKeyedRules(args.schema.relationRules);
|
|
387
959
|
const typeMap = {};
|
|
388
960
|
for (const [name, type] of Object.entries(args.entityTypes || {})) {
|
|
389
961
|
if (typeof name === "string" && typeof type === "string" && name.trim() && type.trim()) {
|
|
390
|
-
|
|
962
|
+
const normalizedName = normalizeEntityName(name, args.schema, runtimeAliasLookup);
|
|
963
|
+
if (normalizedName) {
|
|
964
|
+
typeMap[normalizedName] = type.trim();
|
|
965
|
+
}
|
|
391
966
|
}
|
|
392
967
|
}
|
|
393
968
|
for (const relation of args.relations) {
|
|
394
|
-
const source = normalizeEntityName(relation.source || "", args.schema);
|
|
395
|
-
const target = normalizeEntityName(relation.target || "", args.schema);
|
|
969
|
+
const source = normalizeEntityName(relation.source || "", args.schema, runtimeAliasLookup);
|
|
970
|
+
const target = normalizeEntityName(relation.target || "", args.schema, runtimeAliasLookup);
|
|
396
971
|
const type = normalizeRelationType(relation.type, args.schema);
|
|
972
|
+
const typeIsCanonical = isCanonicalRelationType(type, args.schema);
|
|
973
|
+
const relationOriginRaw = typeof relation.relation_origin === "string" ? relation.relation_origin.trim() : "";
|
|
974
|
+
const relationOrigin = relationOriginRaw === "canonical" || relationOriginRaw === "llm_custom"
|
|
975
|
+
? relationOriginRaw
|
|
976
|
+
: (typeIsCanonical ? "canonical" : "llm_custom");
|
|
977
|
+
const relationDefinition = typeof relation.relation_definition === "string" ? relation.relation_definition.trim() : "";
|
|
978
|
+
const mappingHint = typeof relation.mapping_hint === "string" ? relation.mapping_hint.trim() : "";
|
|
397
979
|
const confidence = typeof relation.confidence === "number"
|
|
398
980
|
? Math.max(0, Math.min(1, relation.confidence))
|
|
399
981
|
: undefined;
|
|
400
982
|
const evidenceSpan = typeof relation.evidence_span === "string" ? relation.evidence_span.trim() : "";
|
|
401
|
-
const
|
|
983
|
+
const contextChunkRaw = typeof relation.context_chunk === "string" ? relation.context_chunk.trim() : "";
|
|
984
|
+
const contextChunk = contextChunkRaw || inferContextChunkFromSource(normalizedSourceText, [evidenceSpan, source, target].filter(Boolean));
|
|
985
|
+
const normalized = {
|
|
986
|
+
source,
|
|
987
|
+
target,
|
|
988
|
+
type,
|
|
989
|
+
relation_origin: relationOrigin,
|
|
990
|
+
relation_definition: relationDefinition || undefined,
|
|
991
|
+
mapping_hint: mappingHint || undefined,
|
|
992
|
+
confidence,
|
|
993
|
+
evidence_span: evidenceSpan || undefined,
|
|
994
|
+
context_chunk: contextChunk || undefined,
|
|
995
|
+
};
|
|
402
996
|
if (!source || !target) {
|
|
403
997
|
rejected.push({ reason: "empty_edge", relation: normalized });
|
|
404
998
|
continue;
|
|
405
999
|
}
|
|
1000
|
+
if (!type) {
|
|
1001
|
+
rejected.push({ reason: "invalid_relation_type", relation: normalized });
|
|
1002
|
+
continue;
|
|
1003
|
+
}
|
|
1004
|
+
if (type === "related_to") {
|
|
1005
|
+
rejected.push({ reason: "related_to_detected", relation: normalized });
|
|
1006
|
+
continue;
|
|
1007
|
+
}
|
|
1008
|
+
if (relationOrigin === "canonical" && !typeIsCanonical) {
|
|
1009
|
+
rejected.push({ reason: "relation_origin_mismatch", relation: normalized });
|
|
1010
|
+
continue;
|
|
1011
|
+
}
|
|
1012
|
+
if (relationOrigin === "llm_custom" && typeIsCanonical) {
|
|
1013
|
+
rejected.push({ reason: "relation_origin_mismatch", relation: normalized });
|
|
1014
|
+
continue;
|
|
1015
|
+
}
|
|
1016
|
+
if (relationOrigin === "llm_custom" && !relationDefinition) {
|
|
1017
|
+
rejected.push({ reason: "llm_custom_missing_definition", relation: normalized });
|
|
1018
|
+
continue;
|
|
1019
|
+
}
|
|
406
1020
|
if (!entitySet.has(source) || !entitySet.has(target)) {
|
|
407
1021
|
rejected.push({ reason: "edge_entity_missing", relation: normalized });
|
|
408
1022
|
continue;
|
|
409
1023
|
}
|
|
410
|
-
const rule = rules.get(type);
|
|
1024
|
+
const rule = typeIsCanonical ? rules.get(type) : undefined;
|
|
411
1025
|
if (source === target && !(rule?.allowSelfLoop ?? false)) {
|
|
412
1026
|
rejected.push({ reason: "self_loop_blocked", relation: normalized });
|
|
413
1027
|
continue;
|
|
@@ -430,15 +1044,16 @@ function validateRelations(args) {
|
|
|
430
1044
|
rejected.push({ reason: "low_relation_confidence", relation: normalized });
|
|
431
1045
|
continue;
|
|
432
1046
|
}
|
|
1047
|
+
if (typeof confidence !== "number") {
|
|
1048
|
+
rejected.push({ reason: "missing_relation_confidence", relation: normalized });
|
|
1049
|
+
continue;
|
|
1050
|
+
}
|
|
1051
|
+
if (args.schema.evidenceSpanRequired && !evidenceSpan) {
|
|
1052
|
+
rejected.push({ reason: "missing_evidence_span", relation: normalized });
|
|
1053
|
+
continue;
|
|
1054
|
+
}
|
|
433
1055
|
if (mode !== "off" && args.schema.evidenceSpanRequired && args.sourceText) {
|
|
434
|
-
if (!evidenceSpan) {
|
|
435
|
-
if (mode === "strict") {
|
|
436
|
-
rejected.push({ reason: "missing_evidence_span", relation: normalized });
|
|
437
|
-
continue;
|
|
438
|
-
}
|
|
439
|
-
warnings.push({ reason: "missing_evidence_span", relation: normalized });
|
|
440
|
-
}
|
|
441
|
-
else if (!tokenizeForMatch(args.sourceText).includes(tokenizeForMatch(evidenceSpan))) {
|
|
1056
|
+
if (evidenceSpan && !tokenizeForMatch(args.sourceText).includes(tokenizeForMatch(evidenceSpan))) {
|
|
442
1057
|
if (mode === "strict") {
|
|
443
1058
|
rejected.push({ reason: "evidence_span_not_in_source", relation: normalized });
|
|
444
1059
|
continue;
|
|
@@ -446,6 +1061,20 @@ function validateRelations(args) {
|
|
|
446
1061
|
warnings.push({ reason: "evidence_span_not_in_source", relation: normalized });
|
|
447
1062
|
}
|
|
448
1063
|
}
|
|
1064
|
+
if (mode !== "off") {
|
|
1065
|
+
if (!contextChunk) {
|
|
1066
|
+
warnings.push({ reason: "missing_context_chunk", relation: normalized });
|
|
1067
|
+
}
|
|
1068
|
+
else {
|
|
1069
|
+
const length = contextChunk.length;
|
|
1070
|
+
if (length < 50 || length > 120) {
|
|
1071
|
+
warnings.push({ reason: "context_chunk_length_out_of_range", relation: normalized });
|
|
1072
|
+
}
|
|
1073
|
+
if (normalizedSourceText && !normalizedSourceText.includes(contextChunk)) {
|
|
1074
|
+
warnings.push({ reason: "context_chunk_not_in_source", relation: normalized });
|
|
1075
|
+
}
|
|
1076
|
+
}
|
|
1077
|
+
}
|
|
449
1078
|
if (mode !== "off" && args.schema.endpointMentionRequired && args.sourceText) {
|
|
450
1079
|
const sourceHit = entityMentionedInText(source, args.sourceText, args.schema);
|
|
451
1080
|
const targetHit = entityMentionedInText(target, args.sourceText, args.schema);
|
|
@@ -459,27 +1088,6 @@ function validateRelations(args) {
|
|
|
459
1088
|
}
|
|
460
1089
|
accepted.push(normalized);
|
|
461
1090
|
}
|
|
462
|
-
if (accepted.length > 0) {
|
|
463
|
-
const highValueSet = new Set((args.schema.highValueRelationTypes || []).map(item => item.toLowerCase()));
|
|
464
|
-
const relatedTo = accepted.filter(item => item.type === "related_to");
|
|
465
|
-
const highValueCount = accepted.filter(item => highValueSet.has(item.type)).length;
|
|
466
|
-
const maxByRatio = Math.max(1, Math.ceil(Math.max(1, highValueCount) * args.schema.relatedToMaxRatio));
|
|
467
|
-
const maxAllowed = Math.max(0, Math.min(args.schema.relatedToMaxAbsolute, maxByRatio));
|
|
468
|
-
if (relatedTo.length > maxAllowed) {
|
|
469
|
-
const sorted = [...relatedTo].sort((a, b) => (b.confidence || 0.5) - (a.confidence || 0.5));
|
|
470
|
-
const keepSet = new Set(sorted.slice(0, maxAllowed).map(item => `${item.source}|${item.type}|${item.target}`));
|
|
471
|
-
const filtered = accepted.filter(item => item.type !== "related_to" || keepSet.has(`${item.source}|${item.type}|${item.target}`));
|
|
472
|
-
if (filtered.length !== accepted.length) {
|
|
473
|
-
for (const item of accepted) {
|
|
474
|
-
if (item.type === "related_to" && !keepSet.has(`${item.source}|${item.type}|${item.target}`)) {
|
|
475
|
-
rejected.push({ reason: "related_to_throttled", relation: item });
|
|
476
|
-
}
|
|
477
|
-
}
|
|
478
|
-
}
|
|
479
|
-
accepted.length = 0;
|
|
480
|
-
accepted.push(...filtered);
|
|
481
|
-
}
|
|
482
|
-
}
|
|
483
1091
|
return { accepted, rejected, warnings };
|
|
484
1092
|
}
|
|
485
1093
|
function normalizeEntityType(raw, schema) {
|
|
@@ -495,20 +1103,50 @@ function validateGraphPayload(args) {
|
|
|
495
1103
|
if (!sourceEventId) {
|
|
496
1104
|
return { valid: false, reason: "source_event_id_empty" };
|
|
497
1105
|
}
|
|
498
|
-
const
|
|
499
|
-
|
|
1106
|
+
const summary = typeof args.summary === "string" ? args.summary.trim() : "";
|
|
1107
|
+
if (!summary) {
|
|
1108
|
+
return { valid: false, reason: "missing_summary" };
|
|
1109
|
+
}
|
|
1110
|
+
const sourceTextNav = normalizeSourceTextNav({
|
|
1111
|
+
sourceTextNav: args.source_text_nav,
|
|
1112
|
+
sourceLayer: args.sourceLayer,
|
|
1113
|
+
sourceEventId,
|
|
1114
|
+
archiveEventId: args.archiveEventId,
|
|
1115
|
+
sessionId: args.sessionId,
|
|
1116
|
+
sourceFile: args.sourceFile,
|
|
1117
|
+
});
|
|
1118
|
+
if (!sourceTextNav) {
|
|
1119
|
+
return { valid: false, reason: "fulltext_navigation_missing" };
|
|
1120
|
+
}
|
|
1121
|
+
const baseWarnings = [];
|
|
1122
|
+
const runtimeAliasLookup = buildRuntimeAliasLookup(args.sourceText);
|
|
1123
|
+
const normalizedInputEntities = Array.isArray(args.entities)
|
|
1124
|
+
? args.entities
|
|
1125
|
+
.map(item => normalizeEntityName(typeof item === "string" ? item : "", args.schema, runtimeAliasLookup))
|
|
1126
|
+
.filter(Boolean)
|
|
500
1127
|
: [];
|
|
1128
|
+
const relationEndpoints = collectEntitiesFromRelations(Array.isArray(args.relations) ? args.relations : [], args.schema, runtimeAliasLookup);
|
|
1129
|
+
const resourceEntities = extractResourceReferences(args.sourceText)
|
|
1130
|
+
.map(item => normalizeEntityName(item, args.schema, runtimeAliasLookup))
|
|
1131
|
+
.filter(Boolean);
|
|
1132
|
+
const dedupedEntities = [...new Set([...normalizedInputEntities, ...relationEndpoints, ...resourceEntities])];
|
|
1133
|
+
const entities = dedupedEntities.filter(entity => !isGenericEntityName(entity));
|
|
1134
|
+
if (entities.length !== dedupedEntities.length) {
|
|
1135
|
+
baseWarnings.push("generic_entity_rejected");
|
|
1136
|
+
}
|
|
501
1137
|
if (entities.length === 0) {
|
|
502
1138
|
return { valid: false, reason: "entities_empty" };
|
|
503
1139
|
}
|
|
1140
|
+
if (missingEntitiesInSummary({ summary, entities, schema: args.schema, runtimeAliasLookup }).length > 0) {
|
|
1141
|
+
return { valid: false, reason: "summary_missing_entities" };
|
|
1142
|
+
}
|
|
504
1143
|
const entityTypes = args.entity_types || {};
|
|
505
1144
|
const validEntityTypes = new Set(args.schema.entityTypes);
|
|
506
1145
|
const normalizedEntityTypes = {};
|
|
507
|
-
const aliasLookup = buildAliasLookup(args.schema);
|
|
508
1146
|
for (const [nameRaw, typeRaw] of Object.entries(entityTypes)) {
|
|
509
1147
|
if (typeof typeRaw !== "string")
|
|
510
1148
|
continue;
|
|
511
|
-
const normalizedName =
|
|
1149
|
+
const normalizedName = normalizeEntityName(nameRaw.trim(), args.schema, runtimeAliasLookup);
|
|
512
1150
|
if (!normalizedName)
|
|
513
1151
|
continue;
|
|
514
1152
|
normalizedEntityTypes[normalizedName] = typeRaw.trim();
|
|
@@ -519,7 +1157,7 @@ function validateGraphPayload(args) {
|
|
|
519
1157
|
normalizedEntityTypes[entity] = providedType;
|
|
520
1158
|
}
|
|
521
1159
|
else {
|
|
522
|
-
|
|
1160
|
+
normalizedEntityTypes[entity] = inferEntityTypeFromName(entity, args.schema);
|
|
523
1161
|
}
|
|
524
1162
|
}
|
|
525
1163
|
const relationValidation = validateRelations({
|
|
@@ -529,16 +1167,51 @@ function validateGraphPayload(args) {
|
|
|
529
1167
|
schema: args.schema,
|
|
530
1168
|
sourceText: args.sourceText,
|
|
531
1169
|
qualityMode: args.qualityMode,
|
|
1170
|
+
runtimeAliasLookup,
|
|
532
1171
|
});
|
|
533
|
-
|
|
1172
|
+
let acceptedRelations = relationValidation.accepted;
|
|
1173
|
+
const warnings = [...baseWarnings, ...relationValidation.warnings.map(item => item.reason)];
|
|
1174
|
+
if (acceptedRelations.length === 0) {
|
|
1175
|
+
const rejectedReasons = new Set(relationValidation.rejected.map(item => item.reason));
|
|
1176
|
+
if (!shouldRetryWithFallbackRelations(rejectedReasons)) {
|
|
1177
|
+
return { valid: false, reason: "relations_empty_or_invalid" };
|
|
1178
|
+
}
|
|
1179
|
+
const fallbackRelations = buildFallbackRelations({
|
|
1180
|
+
entities,
|
|
1181
|
+
entityTypes: normalizedEntityTypes,
|
|
1182
|
+
relations: Array.isArray(args.relations) ? args.relations : [],
|
|
1183
|
+
sourceText: args.sourceText,
|
|
1184
|
+
schema: args.schema,
|
|
1185
|
+
runtimeAliasLookup,
|
|
1186
|
+
});
|
|
1187
|
+
if (fallbackRelations.length > 0) {
|
|
1188
|
+
const fallbackValidation = validateRelations({
|
|
1189
|
+
relations: fallbackRelations,
|
|
1190
|
+
entities,
|
|
1191
|
+
entityTypes: normalizedEntityTypes,
|
|
1192
|
+
schema: args.schema,
|
|
1193
|
+
sourceText: args.sourceText,
|
|
1194
|
+
qualityMode: args.qualityMode,
|
|
1195
|
+
runtimeAliasLookup,
|
|
1196
|
+
});
|
|
1197
|
+
if (fallbackValidation.accepted.length > 0) {
|
|
1198
|
+
acceptedRelations = fallbackValidation.accepted;
|
|
1199
|
+
warnings.push("fallback_relations_applied");
|
|
1200
|
+
warnings.push(...fallbackValidation.warnings.map(item => item.reason));
|
|
1201
|
+
}
|
|
1202
|
+
}
|
|
1203
|
+
}
|
|
1204
|
+
if (acceptedRelations.length === 0) {
|
|
534
1205
|
return { valid: false, reason: "relations_empty_or_invalid" };
|
|
535
1206
|
}
|
|
536
1207
|
const id = `gph_${Date.now().toString(36)}_${crypto.randomBytes(4).toString("hex")}`;
|
|
537
1208
|
return {
|
|
538
1209
|
valid: true,
|
|
539
|
-
warnings:
|
|
1210
|
+
warnings: [...new Set(warnings)],
|
|
540
1211
|
normalized: {
|
|
541
1212
|
id,
|
|
1213
|
+
summary,
|
|
1214
|
+
source_text_nav: sourceTextNav,
|
|
542
1215
|
source_event_id: args.sourceEventId.trim(),
|
|
543
1216
|
source_layer: args.sourceLayer,
|
|
544
1217
|
archive_event_id: typeof args.archiveEventId === "string" && args.archiveEventId.trim()
|
|
@@ -551,7 +1224,7 @@ function validateGraphPayload(args) {
|
|
|
551
1224
|
timestamp: new Date().toISOString(),
|
|
552
1225
|
entities,
|
|
553
1226
|
entity_types: normalizedEntityTypes,
|
|
554
|
-
relations:
|
|
1227
|
+
relations: acceptedRelations,
|
|
555
1228
|
gate_source: args.gateSource,
|
|
556
1229
|
event_type: typeof args.eventType === "string" && args.eventType.trim()
|
|
557
1230
|
? normalizeEventType(args.eventType, args.schema)
|