@steno-ai/engine 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"structured-cross-linker.d.ts","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAOrD;;;;;GAKG;AACH,wBAAsB,yBAAyB,CAC7C,OAAO,EAAE,cAAc,EACvB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAI,2BAA2B;AAChE,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CA4CjB;AAMD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,gBAAgB,EAC3B,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IAAE,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAA;CAAE,GACtG,OAAO,CAAC,gBAAgB,EAAE,CAAC,
|
|
1
|
+
{"version":3,"file":"structured-cross-linker.d.ts","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAEH,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,wBAAwB,CAAC;AAC7D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,0BAA0B,CAAC;AACjE,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,oBAAoB,CAAC;AAOrD;;;;;GAKG;AACH,wBAAsB,yBAAyB,CAC7C,OAAO,EAAE,cAAc,EACvB,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,GAAG,CAAC,MAAM,EAAE,MAAM,CAAC,EAAI,2BAA2B;AAChE,SAAS,EAAE,MAAM,GAChB,OAAO,CAAC,MAAM,CAAC,CA4CjB;AAMD,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,MAAM,CAAC;IACjB,UAAU,EAAE,MAAM,CAAC;IACnB,MAAM,EAAE,MAAM,CAAC;IACf,WAAW,EAAE,MAAM,CAAC;IACpB,UAAU,EAAE,MAAM,CAAC;IACnB,iBAAiB,EAAE,MAAM,CAAC;IAC1B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,eAAe,EAAE,MAAM,CAAC;IACxB,oBAAoB,EAAE,MAAM,CAAC;IAC7B,mBAAmB,EAAE,MAAM,CAAC;IAC5B,UAAU,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,wBAAsB,qBAAqB,CACzC,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,gBAAgB,EAC3B,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IAAE,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAC;IAAC,aAAa,CAAC,EAAE,MAAM,CAAA;CAAE,GACtG,OAAO,CAAC,gBAAgB,EAAE,CAAC,CAmE7B;AAED;;;;;GAKG;AACH,wBAAsB,wBAAwB,CAC5C,OAAO,EAAE,cAAc,EACvB,SAAS,EAAE,gBAAgB,EAC3B,GAAG,EAAE,UAAU,EACf,QAAQ,EAAE,MAAM,EAChB,KAAK,EAAE,MAAM,EACb,OAAO,EAAE,MAAM,EACf,OAAO,CAAC,EAAE;IAAE,aAAa,CAAC,EAAE,MAAM,CAAC;IAAC,QAAQ,CAAC,EAAE,MAAM,CAAA;CAAE,GACtD,OAAO,CAAC;IAAE,SAAS,EAAE,MAAM,CAAC;IAAC,YAAY,EAAE,MAAM,CAAA;CAAE,CAAC,CA2FtD"}
|
|
@@ -71,64 +71,61 @@ export async function findPendingCrossLinks(storage, embedding, tenantId, scope,
|
|
|
71
71
|
const maxFacts = options?.maxFacts ?? 10;
|
|
72
72
|
const maxDurationMs = options?.maxDurationMs ?? 20000;
|
|
73
73
|
const startTime = Date.now();
|
|
74
|
-
//
|
|
75
|
-
//
|
|
76
|
-
|
|
77
|
-
const
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
];
|
|
74
|
+
// Get actual structured facts (decrypted by storage adapter), then use their
|
|
75
|
+
// content to find cross-source matches via vector search.
|
|
76
|
+
const allFacts = await storage.getFactsByScope(tenantId, scope, scopeId, { limit: 100 });
|
|
77
|
+
const structuredFacts = (allFacts.data ?? [])
|
|
78
|
+
.filter((f) => f.sourceType?.startsWith('structured_'))
|
|
79
|
+
.slice(0, maxFacts);
|
|
80
|
+
console.error(`[steno-structured-xlink] Found ${structuredFacts.length} structured facts to cross-link`);
|
|
81
|
+
if (structuredFacts.length === 0)
|
|
82
|
+
return [];
|
|
84
83
|
const candidates = [];
|
|
85
|
-
for (
|
|
84
|
+
for (const fact of structuredFacts) {
|
|
86
85
|
if (Date.now() - startTime > maxDurationMs) {
|
|
87
86
|
console.error(`[steno-structured-xlink] Time budget exhausted after ${candidates.length} candidates`);
|
|
88
87
|
break;
|
|
89
88
|
}
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
// Embed the
|
|
93
|
-
const
|
|
94
|
-
const
|
|
95
|
-
embedding:
|
|
89
|
+
if (candidates.length >= maxCandidates)
|
|
90
|
+
break;
|
|
91
|
+
// Embed the decrypted fact content to find similar facts from other sources
|
|
92
|
+
const factEmbedding = await embedding.embed(fact.content);
|
|
93
|
+
const similar = await storage.vectorSearch({
|
|
94
|
+
embedding: factEmbedding,
|
|
96
95
|
tenantId,
|
|
97
96
|
scope,
|
|
98
97
|
scopeId,
|
|
99
|
-
limit:
|
|
100
|
-
minSimilarity:
|
|
98
|
+
limit: 5,
|
|
99
|
+
minSimilarity: minSim,
|
|
101
100
|
});
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
if (candidates.length >= maxCandidates)
|
|
126
|
-
break;
|
|
127
|
-
}
|
|
101
|
+
for (const match of similar) {
|
|
102
|
+
// Skip self-matches
|
|
103
|
+
if (match.fact.id === fact.id)
|
|
104
|
+
continue;
|
|
105
|
+
// Skip same source type — we want CROSS-source links
|
|
106
|
+
if (match.fact.sourceType === fact.sourceType)
|
|
107
|
+
continue;
|
|
108
|
+
// Must be a structured fact from a different source
|
|
109
|
+
if (!match.fact.sourceType?.startsWith('structured_'))
|
|
110
|
+
continue;
|
|
111
|
+
candidates.push({
|
|
112
|
+
entityId: '',
|
|
113
|
+
entityName: '',
|
|
114
|
+
factId: fact.id,
|
|
115
|
+
factContent: fact.content,
|
|
116
|
+
sourceType: fact.sourceType,
|
|
117
|
+
candidateEntityId: '',
|
|
118
|
+
candidateEntityName: '',
|
|
119
|
+
candidateFactId: match.fact.id,
|
|
120
|
+
candidateFactContent: match.fact.content,
|
|
121
|
+
candidateSourceType: match.fact.sourceType,
|
|
122
|
+
similarity: match.similarity,
|
|
123
|
+
});
|
|
128
124
|
if (candidates.length >= maxCandidates)
|
|
129
125
|
break;
|
|
130
126
|
}
|
|
131
127
|
}
|
|
128
|
+
console.error(`[steno-structured-xlink] Found ${candidates.length} cross-link candidates`);
|
|
132
129
|
return candidates;
|
|
133
130
|
}
|
|
134
131
|
/**
|
|
@@ -163,15 +160,26 @@ ${pairsText}`;
|
|
|
163
160
|
const response = await llm.complete([{ role: 'user', content: prompt }], { temperature: 0, responseFormat: 'json' });
|
|
164
161
|
const parsed = JSON.parse(response.content);
|
|
165
162
|
const classifications = Array.isArray(parsed) ? parsed : parsed.pairs ?? parsed.results ?? [];
|
|
163
|
+
console.error(`[steno-structured-xlink] LLM classified ${classifications.length} pairs: ${JSON.stringify(classifications)}`);
|
|
166
164
|
for (const classification of classifications) {
|
|
167
165
|
const idx = (classification.pair ?? classification.index ?? 0) - 1;
|
|
168
166
|
const relation = classification.relation ?? classification.type;
|
|
169
167
|
const candidate = candidates[idx];
|
|
170
|
-
if (!candidate || relation === 'unrelated')
|
|
168
|
+
if (!candidate || relation === 'unrelated') {
|
|
169
|
+
console.error(`[steno-structured-xlink] Pair ${idx + 1}: ${relation ?? 'no candidate'}`);
|
|
171
170
|
continue;
|
|
171
|
+
}
|
|
172
172
|
// Get entities for both facts to create the edge
|
|
173
|
-
|
|
174
|
-
|
|
173
|
+
let entitiesA = [];
|
|
174
|
+
let entitiesB = [];
|
|
175
|
+
try {
|
|
176
|
+
entitiesA = await storage.getEntitiesForFact(candidate.factId);
|
|
177
|
+
entitiesB = await storage.getEntitiesForFact(candidate.candidateFactId);
|
|
178
|
+
}
|
|
179
|
+
catch (err) {
|
|
180
|
+
console.error(`[steno-structured-xlink] getEntitiesForFact failed:`, err instanceof Error ? err.message : err);
|
|
181
|
+
}
|
|
182
|
+
console.error(`[steno-structured-xlink] Pair ${idx + 1} (${relation}): entitiesA=${entitiesA.length}, entitiesB=${entitiesB.length}`);
|
|
175
183
|
if (entitiesA.length > 0 && entitiesB.length > 0) {
|
|
176
184
|
const edgeType = relation === 'same_as' ? 'same_as' : 'associative';
|
|
177
185
|
try {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"structured-cross-linker.js","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAOH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAAuB,EACvB,QAAgB,EAChB,YAAiC,EAAI,2BAA2B;AAChE,SAAiB;IAEjB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,IAAI,YAAY,EAAE,CAAC;QACrD,8DAA8D;QAC9D,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEvC,mEAAmE;QACnE,qEAAqE;QACrE,mFAAmF;QACnF,IAAI,CAAC;YACH,+DAA+D;YAC/D,8EAA8E;YAC9E,2EAA2E;YAC3E,gEAAgE;YAChE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YACvF,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;YAErE,kFAAkF;YAClF,4EAA4E;YAC5E,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,iBAAwB,CAAC,CAAC;YAE1D,MAAM,gBAAgB,GAAG,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;YAC3F,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;gBAC1B,4EAA4E;gBAC5E,MAAM,UAAU,GAAG,IAAI,aAAa,4CAA4C;oBAC9E,WAAW,IAAI,UAAU;oBACzB,QAAQ,IAAI,OAAO;oBACnB,QAAQ,IAAI,OAAO;oBACnB,OAAO,IAAI,OAAO;iBACnB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAE/B,OAAO,CAAC,KAAK,CAAC,oDAAoD,UAAU,EAAE,CAAC,CAAC;gBAChF,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,kDAAkD,aAAa,GAAG,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC9H,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAoBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,KAAa,EACb,OAAe,EACf,OAAuG;IAEvG,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,IAAI,GAAG,CAAC;IAC7C,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,EAAE,CAAC;IACnD,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,EAAE,CAAC;IACzC,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,KAAK,CAAC;IACtD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,
|
|
1
|
+
{"version":3,"file":"structured-cross-linker.js","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAOH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAAuB,EACvB,QAAgB,EAChB,YAAiC,EAAI,2BAA2B;AAChE,SAAiB;IAEjB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,IAAI,YAAY,EAAE,CAAC;QACrD,8DAA8D;QAC9D,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEvC,mEAAmE;QACnE,qEAAqE;QACrE,mFAAmF;QACnF,IAAI,CAAC;YACH,+DAA+D;YAC/D,8EAA8E;YAC9E,2EAA2E;YAC3E,gEAAgE;YAChE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YACvF,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;YAErE,kFAAkF;YAClF,4EAA4E;YAC5E,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,iBAAwB,CAAC,CAAC;YAE1D,MAAM,gBAAgB,GAAG,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;YAC3F,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;gBAC1B,4EAA4E;gBAC5E,MAAM,UAAU,GAAG,IAAI,aAAa,4CAA4C;oBAC9E,WAAW,IAAI,UAAU;oBACzB,QAAQ,IAAI,OAAO;oBACnB,QAAQ,IAAI,OAAO;oBACnB,OAAO,IAAI,OAAO;iBACnB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAE/B,OAAO,CAAC,KAAK,CAAC,oDAAoD,UAAU,EAAE,CAAC,CAAC;gBAChF,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,kDAAkD,aAAa,GAAG,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC9H,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAoBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,KAAa,EACb,OAAe,EACf,OAAuG;IAEvG,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,IAAI,GAAG,CAAC;IAC7C,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,EAAE,CAAC;IACnD,MAAM,QAAQ,GAAG,OAAO,EAAE,QAAQ,IAAI,EAAE,CAAC;IACzC,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,KAAK,CAAC;IACtD,MAAM,SAAS,GAAG,IAAI,CAAC,GAAG,EAAE,CAAC;IAE7B,6EAA6E;IAC7E,0DAA0D;IAC1D,MAAM,QAAQ,GAAG,MAAM,OAAO,CAAC,eAAe,CAAC,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,KAAK,EAAE,GAAG,EAAE,CAAC,CAAC;IACzF,MAAM,eAAe,GAAG,CAAC,QAAQ,CAAC,IAAI,IAAI,EAAE,CAAC;SAC1C,MAAM,CAAC,CAAC,CAAM,EAAE,EAAE,CAAC,CAAC,CAAC,UAAU,EAAE,UAAU,CAAC,aAAa,CAAC,CAAC;SAC3D,KAAK,CAAC,CAAC,EAAE,QAAQ,CAAC,CAAC;IAEtB,OAAO,CAAC,KAAK,CAAC,kCAAkC,eAAe,CAAC,MAAM,iCAAiC,CAAC,CAAC;IAEzG,IAAI,eAAe,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAE5C,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,KAAK,MAAM,IAAI,IAAI,eAAe,EAAE,CAAC;QACnC,IAAI,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,GAAG,aAAa,EAAE,CAAC;YAC3C,OAAO,CAAC,KAAK,CAAC,wDAAwD,UAAU,CAAC,MAAM,aAAa,CAAC,CAAC;YACtG,MAAM;QACR,CAAC;QACD,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;YAAE,MAAM;QAE9C,4EAA4E;QAC5E,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,CAAC;YACzC,SAAS,EAAE,aAAa;YACxB,QAAQ;YACR,KAAK;YACL,OAAO;YACP,KAAK,EAAE,CAAC;YACR,aAAa,EAAE,MAAM;SACtB,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,oBAAoB;YACpB,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE;gBAAE,SAAS;YACxC,qDAAqD;YACrD,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,UAAU;gBAAE,SAAS;YACxD,oDAAoD;YACpD,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,UAAU,EAAE,UAAU,CAAC,aAAa,CAAC;gBAAE,SAAS;YAEhE,UAAU,CAAC,IAAI,CAAC;gBACd,QAAQ,EAAE,EAAE;gBACZ,UAAU,EAAE,EAAE;gBACd,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,WAAW,EAAE,IAAI,CAAC,OAAO;gBACzB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,iBAAiB,EAAE,EAAE;gBACrB,mBAAmB,EAAE,EAAE;gBACvB,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE;gBAC9B,oBAAoB,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO;gBACxC,mBAAmB,EAAE,KAAK,CAAC,IAAI,CAAC,UAAU;gBAC1C,UAAU,EAAE,KAAK,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;gBAAE,MAAM;QAChD,CAAC;IACH,CAAC;IAED,OAAO,CAAC,KAAK,CAAC,kCAAkC,UAAU,CAAC,MAAM,wBAAwB,CAAC,CAAC;IAC3F,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,OAAuB,EACvB,SAA2B,EAC3B,GAAe,EACf,QAAgB,EAChB,KAAa,EACb,OAAe,EACf,OAAuD;IAEvD,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,KAAK,CAAC;IACtD,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE;QAC3F,QAAQ,EAAE,OAAO,EAAE,QAAQ,IAAI,EAAE;QACjC,aAAa;QACb,aAAa,EAAE,EAAE;KAClB,CAAC,CAAC;IAEH,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IAEtE,qDAAqD;IACrD,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACxC,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,UAAU,OAAO,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,iBAAiB,CAAC,CAAC,mBAAmB,OAAO,CAAC,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CACxJ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEf,MAAM,MAAM,GAAG;;;;;;;;;EASf,SAAS,EAAE,CAAC;IAEZ,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,QAAQ,CACjC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,CAC3C,CAAC;QAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QAE9F,OAAO,CAAC,KAAK,CAAC,2CAA2C,eAAe,CAAC,MAAM,WAAW,IAAI,CAAC,SAAS,CAAC,eAAe,CAAC,EAAE,CAAC,CAAC;QAE7H,KAAK,MAAM,cAAc,IAAI,eAAe,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACnE,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,IAAI,CAAC;YAChE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAElC,IAAI,CAAC,SAAS,IAAI,QAAQ,KAAK,WAAW,EAAE,CAAC;gBAC3C,OAAO,CAAC,KAAK,CAAC,iCAAiC,GAAG,GAAG,CAAC,KAAK,QAAQ,IAAI,cAAc,EAAE,CAAC,CAAC;gBACzF,SAAS;YACX,CAAC;YAED,iDAAiD;YACjD,IAAI,SAAS,GAAU,EAAE,CAAC;YAC1B,IAAI,SAAS,GAAU,EAAE,CAAC;YAC1B,IAAI,CAAC;gBACH,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;gBAC/D,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;YAC1E,CAAC;YAAC,OAAO,GAAG,EAAE,CAAC;gBACb,OAAO,CAAC,KAAK,CAAC,qDAAqD,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;YACjH,CAAC;YAED,OAAO,CAAC,KAAK,CAAC,iCAAiC,GAAG,GAAG,CAAC,KAAK,QAAQ,gBAAgB,SAAS,CAAC,MAAM,eAAe,SAAS,CAAC,MAAM,EAAE,CAAC,CAAC;YAEtI,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjD,MAAM,QAAQ,GAAG,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,SAAkB,CAAC,CAAC,CAAC,aAAsB,CAAC;gBACtF,IAAI,CAAC;oBACH,MAAM,OAAO,CAAC,UAAU,CAAC;wBACvB,EAAE,EAAE,MAAM,CAAC,UAAU,EAAE;wBACvB,QAAQ;wBACR,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ;wBACR,QAAQ;wBACR,MAAM,EAAE,SAAS,CAAC,UAAU;wBAC5B,UAAU,EAAE,GAAG;wBACf,QAAQ,EAAE;4BACR,UAAU,EAAE,IAAI;4BAChB,YAAY,EAAE,SAAS,CAAC,MAAM;4BAC9B,YAAY,EAAE,SAAS,CAAC,eAAe;4BACvC,MAAM,EAAE,0BAA0B;yBACnC;qBACF,CAAC,CAAC;oBACH,YAAY,EAAE,CAAC;gBACjB,CAAC;gBAAC,MAAM,CAAC;oBACP,gCAAgC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2DAA2D,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvH,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,CAAC;AACxD,CAAC"}
|
package/package.json
CHANGED
|
@@ -113,73 +113,65 @@ export async function findPendingCrossLinks(
|
|
|
113
113
|
const maxDurationMs = options?.maxDurationMs ?? 20000;
|
|
114
114
|
const startTime = Date.now();
|
|
115
115
|
|
|
116
|
-
//
|
|
117
|
-
//
|
|
118
|
-
|
|
119
|
-
const
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
];
|
|
116
|
+
// Get actual structured facts (decrypted by storage adapter), then use their
|
|
117
|
+
// content to find cross-source matches via vector search.
|
|
118
|
+
const allFacts = await storage.getFactsByScope(tenantId, scope, scopeId, { limit: 100 });
|
|
119
|
+
const structuredFacts = (allFacts.data ?? [])
|
|
120
|
+
.filter((f: any) => f.sourceType?.startsWith('structured_'))
|
|
121
|
+
.slice(0, maxFacts);
|
|
122
|
+
|
|
123
|
+
console.error(`[steno-structured-xlink] Found ${structuredFacts.length} structured facts to cross-link`);
|
|
124
|
+
|
|
125
|
+
if (structuredFacts.length === 0) return [];
|
|
126
126
|
|
|
127
127
|
const candidates: PendingCrossLink[] = [];
|
|
128
128
|
|
|
129
|
-
for (
|
|
129
|
+
for (const fact of structuredFacts) {
|
|
130
130
|
if (Date.now() - startTime > maxDurationMs) {
|
|
131
131
|
console.error(`[steno-structured-xlink] Time budget exhausted after ${candidates.length} candidates`);
|
|
132
132
|
break;
|
|
133
133
|
}
|
|
134
|
+
if (candidates.length >= maxCandidates) break;
|
|
134
135
|
|
|
135
|
-
|
|
136
|
-
const
|
|
137
|
-
|
|
138
|
-
// Embed the query to find facts of this type
|
|
139
|
-
const queryEmbedding = await embedding.embed(queryText);
|
|
136
|
+
// Embed the decrypted fact content to find similar facts from other sources
|
|
137
|
+
const factEmbedding = await embedding.embed(fact.content);
|
|
140
138
|
|
|
141
|
-
const
|
|
142
|
-
embedding:
|
|
139
|
+
const similar = await storage.vectorSearch({
|
|
140
|
+
embedding: factEmbedding,
|
|
143
141
|
tenantId,
|
|
144
142
|
scope,
|
|
145
143
|
scopeId,
|
|
146
|
-
limit:
|
|
147
|
-
minSimilarity:
|
|
144
|
+
limit: 5,
|
|
145
|
+
minSimilarity: minSim,
|
|
148
146
|
});
|
|
149
147
|
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
148
|
+
for (const match of similar) {
|
|
149
|
+
// Skip self-matches
|
|
150
|
+
if (match.fact.id === fact.id) continue;
|
|
151
|
+
// Skip same source type — we want CROSS-source links
|
|
152
|
+
if (match.fact.sourceType === fact.sourceType) continue;
|
|
153
|
+
// Must be a structured fact from a different source
|
|
154
|
+
if (!match.fact.sourceType?.startsWith('structured_')) continue;
|
|
155
|
+
|
|
156
|
+
candidates.push({
|
|
157
|
+
entityId: '',
|
|
158
|
+
entityName: '',
|
|
159
|
+
factId: fact.id,
|
|
160
|
+
factContent: fact.content,
|
|
161
|
+
sourceType: fact.sourceType,
|
|
162
|
+
candidateEntityId: '',
|
|
163
|
+
candidateEntityName: '',
|
|
164
|
+
candidateFactId: match.fact.id,
|
|
165
|
+
candidateFactContent: match.fact.content,
|
|
166
|
+
candidateSourceType: match.fact.sourceType,
|
|
167
|
+
similarity: match.similarity,
|
|
168
|
+
});
|
|
157
169
|
|
|
158
|
-
// Create candidate pairs
|
|
159
|
-
for (const src of sourceFacts) {
|
|
160
|
-
for (const other of otherFacts) {
|
|
161
|
-
if (src.fact.id === other.fact.id) continue;
|
|
162
|
-
|
|
163
|
-
candidates.push({
|
|
164
|
-
entityId: '',
|
|
165
|
-
entityName: '',
|
|
166
|
-
factId: src.fact.id,
|
|
167
|
-
factContent: src.fact.content,
|
|
168
|
-
sourceType: src.fact.sourceType,
|
|
169
|
-
candidateEntityId: '',
|
|
170
|
-
candidateEntityName: '',
|
|
171
|
-
candidateFactId: other.fact.id,
|
|
172
|
-
candidateFactContent: other.fact.content,
|
|
173
|
-
candidateSourceType: other.fact.sourceType,
|
|
174
|
-
similarity: Math.min(src.similarity, other.similarity),
|
|
175
|
-
});
|
|
176
|
-
|
|
177
|
-
if (candidates.length >= maxCandidates) break;
|
|
178
|
-
}
|
|
179
170
|
if (candidates.length >= maxCandidates) break;
|
|
180
171
|
}
|
|
181
172
|
}
|
|
182
173
|
|
|
174
|
+
console.error(`[steno-structured-xlink] Found ${candidates.length} cross-link candidates`);
|
|
183
175
|
return candidates;
|
|
184
176
|
}
|
|
185
177
|
|
|
@@ -234,16 +226,29 @@ ${pairsText}`;
|
|
|
234
226
|
const parsed = JSON.parse(response.content);
|
|
235
227
|
const classifications = Array.isArray(parsed) ? parsed : parsed.pairs ?? parsed.results ?? [];
|
|
236
228
|
|
|
229
|
+
console.error(`[steno-structured-xlink] LLM classified ${classifications.length} pairs: ${JSON.stringify(classifications)}`);
|
|
230
|
+
|
|
237
231
|
for (const classification of classifications) {
|
|
238
232
|
const idx = (classification.pair ?? classification.index ?? 0) - 1;
|
|
239
233
|
const relation = classification.relation ?? classification.type;
|
|
240
234
|
const candidate = candidates[idx];
|
|
241
235
|
|
|
242
|
-
if (!candidate || relation === 'unrelated')
|
|
236
|
+
if (!candidate || relation === 'unrelated') {
|
|
237
|
+
console.error(`[steno-structured-xlink] Pair ${idx + 1}: ${relation ?? 'no candidate'}`);
|
|
238
|
+
continue;
|
|
239
|
+
}
|
|
243
240
|
|
|
244
241
|
// Get entities for both facts to create the edge
|
|
245
|
-
|
|
246
|
-
|
|
242
|
+
let entitiesA: any[] = [];
|
|
243
|
+
let entitiesB: any[] = [];
|
|
244
|
+
try {
|
|
245
|
+
entitiesA = await storage.getEntitiesForFact(candidate.factId);
|
|
246
|
+
entitiesB = await storage.getEntitiesForFact(candidate.candidateFactId);
|
|
247
|
+
} catch (err) {
|
|
248
|
+
console.error(`[steno-structured-xlink] getEntitiesForFact failed:`, err instanceof Error ? err.message : err);
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
console.error(`[steno-structured-xlink] Pair ${idx + 1} (${relation}): entitiesA=${entitiesA.length}, entitiesB=${entitiesB.length}`);
|
|
247
252
|
|
|
248
253
|
if (entitiesA.length > 0 && entitiesB.length > 0) {
|
|
249
254
|
const edgeType = relation === 'same_as' ? 'same_as' as const : 'associative' as const;
|