@steno-ai/engine 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/storage.d.ts +29 -2
- package/dist/adapters/storage.d.ts.map +1 -1
- package/dist/config.d.ts +3 -3
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +9 -0
- package/dist/config.js.map +1 -1
- package/dist/extraction/index.d.ts +2 -0
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +2 -0
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +48 -1
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/structured-cross-linker.d.ts +55 -0
- package/dist/extraction/structured-cross-linker.d.ts.map +1 -0
- package/dist/extraction/structured-cross-linker.js +195 -0
- package/dist/extraction/structured-cross-linker.js.map +1 -0
- package/dist/extraction/structured-extractor.d.ts +59 -0
- package/dist/extraction/structured-extractor.d.ts.map +1 -0
- package/dist/extraction/structured-extractor.js +389 -0
- package/dist/extraction/structured-extractor.js.map +1 -0
- package/dist/extraction/types.d.ts +3 -1
- package/dist/extraction/types.d.ts.map +1 -1
- package/dist/identity/index.d.ts +2 -0
- package/dist/identity/index.d.ts.map +1 -0
- package/dist/identity/index.js +2 -0
- package/dist/identity/index.js.map +1 -0
- package/dist/identity/resolver.d.ts +31 -0
- package/dist/identity/resolver.d.ts.map +1 -0
- package/dist/identity/resolver.js +122 -0
- package/dist/identity/resolver.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/models/edge.d.ts +6 -6
- package/dist/models/entity.d.ts +32 -0
- package/dist/models/entity.d.ts.map +1 -1
- package/dist/models/entity.js +11 -0
- package/dist/models/entity.js.map +1 -1
- package/dist/models/extraction.d.ts +6 -6
- package/dist/models/fact.d.ts +6 -6
- package/dist/retrieval/graph-traversal.d.ts +4 -1
- package/dist/retrieval/graph-traversal.d.ts.map +1 -1
- package/dist/retrieval/graph-traversal.js +6 -3
- package/dist/retrieval/graph-traversal.js.map +1 -1
- package/dist/retrieval/search.d.ts.map +1 -1
- package/dist/retrieval/search.js +56 -3
- package/dist/retrieval/search.js.map +1 -1
- package/dist/retrieval/types.d.ts +1 -0
- package/dist/retrieval/types.d.ts.map +1 -1
- package/dist/retrieval/types.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/storage.ts +35 -2
- package/src/config.ts +9 -0
- package/src/extraction/index.ts +2 -0
- package/src/extraction/pipeline.ts +63 -1
- package/src/extraction/structured-cross-linker.ts +259 -0
- package/src/extraction/structured-extractor.ts +463 -0
- package/src/extraction/types.ts +3 -1
- package/src/identity/index.ts +1 -0
- package/src/identity/resolver.ts +149 -0
- package/src/index.ts +1 -0
- package/src/models/entity.ts +13 -0
- package/src/retrieval/graph-traversal.ts +7 -4
- package/src/retrieval/search.ts +58 -3
- package/src/retrieval/types.ts +1 -0
- package/src/adapters/cache.d.ts +0 -9
- package/src/adapters/cache.d.ts.map +0 -1
- package/src/adapters/cache.js +0 -2
- package/src/adapters/cache.js.map +0 -1
- package/src/adapters/embedding.d.ts +0 -7
- package/src/adapters/embedding.d.ts.map +0 -1
- package/src/adapters/embedding.js +0 -2
- package/src/adapters/embedding.js.map +0 -1
- package/src/adapters/llm.d.ts +0 -19
- package/src/adapters/llm.d.ts.map +0 -1
- package/src/adapters/llm.js +0 -2
- package/src/adapters/llm.js.map +0 -1
- package/src/adapters/perplexity-embedding.d.ts +0 -24
- package/src/adapters/perplexity-embedding.d.ts.map +0 -1
- package/src/adapters/perplexity-embedding.js +0 -78
- package/src/adapters/perplexity-embedding.js.map +0 -1
- package/src/adapters/storage.d.ts +0 -173
- package/src/adapters/storage.d.ts.map +0 -1
- package/src/adapters/storage.js +0 -2
- package/src/adapters/storage.js.map +0 -1
- package/src/config.d.ts +0 -296
- package/src/config.d.ts.map +0 -1
- package/src/config.js +0 -92
- package/src/config.js.map +0 -1
- package/src/extraction/contradiction.d.ts +0 -15
- package/src/extraction/contradiction.d.ts.map +0 -1
- package/src/extraction/contradiction.js +0 -23
- package/src/extraction/contradiction.js.map +0 -1
- package/src/extraction/cross-linker.d.ts +0 -23
- package/src/extraction/cross-linker.d.ts.map +0 -1
- package/src/extraction/cross-linker.js +0 -146
- package/src/extraction/cross-linker.js.map +0 -1
- package/src/extraction/dedup.d.ts +0 -12
- package/src/extraction/dedup.d.ts.map +0 -1
- package/src/extraction/dedup.js +0 -93
- package/src/extraction/dedup.js.map +0 -1
- package/src/extraction/entity-extractor.d.ts +0 -30
- package/src/extraction/entity-extractor.d.ts.map +0 -1
- package/src/extraction/entity-extractor.js +0 -145
- package/src/extraction/entity-extractor.js.map +0 -1
- package/src/extraction/hasher.d.ts +0 -5
- package/src/extraction/hasher.d.ts.map +0 -1
- package/src/extraction/hasher.js +0 -8
- package/src/extraction/hasher.js.map +0 -1
- package/src/extraction/heuristic.d.ts +0 -3
- package/src/extraction/heuristic.d.ts.map +0 -1
- package/src/extraction/heuristic.js +0 -282
- package/src/extraction/heuristic.js.map +0 -1
- package/src/extraction/llm-extractor.d.ts +0 -23
- package/src/extraction/llm-extractor.d.ts.map +0 -1
- package/src/extraction/llm-extractor.js +0 -240
- package/src/extraction/llm-extractor.js.map +0 -1
- package/src/extraction/pipeline.d.ts +0 -30
- package/src/extraction/pipeline.d.ts.map +0 -1
- package/src/extraction/pipeline.js +0 -413
- package/src/extraction/pipeline.js.map +0 -1
- package/src/extraction/prompts.d.ts +0 -28
- package/src/extraction/prompts.d.ts.map +0 -1
- package/src/extraction/prompts.js +0 -205
- package/src/extraction/prompts.js.map +0 -1
- package/src/extraction/sliding-window.d.ts +0 -41
- package/src/extraction/sliding-window.d.ts.map +0 -1
- package/src/extraction/sliding-window.js +0 -84
- package/src/extraction/sliding-window.js.map +0 -1
- package/src/extraction/types.d.ts +0 -80
- package/src/extraction/types.d.ts.map +0 -1
- package/src/extraction/types.js +0 -2
- package/src/extraction/types.js.map +0 -1
- package/src/feedback/tracker.d.ts +0 -25
- package/src/feedback/tracker.d.ts.map +0 -1
- package/src/feedback/tracker.js +0 -90
- package/src/feedback/tracker.js.map +0 -1
- package/src/models/api-key.d.ts +0 -54
- package/src/models/api-key.d.ts.map +0 -1
- package/src/models/api-key.js +0 -21
- package/src/models/api-key.js.map +0 -1
- package/src/models/edge.d.ts +0 -78
- package/src/models/edge.d.ts.map +0 -1
- package/src/models/edge.js +0 -29
- package/src/models/edge.js.map +0 -1
- package/src/models/entity.d.ts +0 -60
- package/src/models/entity.d.ts.map +0 -1
- package/src/models/entity.js +0 -22
- package/src/models/entity.js.map +0 -1
- package/src/models/extraction.d.ts +0 -111
- package/src/models/extraction.d.ts.map +0 -1
- package/src/models/extraction.js +0 -40
- package/src/models/extraction.js.map +0 -1
- package/src/models/fact-entity.d.ts +0 -33
- package/src/models/fact-entity.d.ts.map +0 -1
- package/src/models/fact-entity.js +0 -14
- package/src/models/fact-entity.js.map +0 -1
- package/src/models/fact.d.ts +0 -191
- package/src/models/fact.d.ts.map +0 -1
- package/src/models/fact.js +0 -72
- package/src/models/fact.js.map +0 -1
- package/src/models/index.d.ts +0 -13
- package/src/models/index.d.ts.map +0 -1
- package/src/models/index.js +0 -13
- package/src/models/index.js.map +0 -1
- package/src/models/memory-access.d.ts +0 -89
- package/src/models/memory-access.d.ts.map +0 -1
- package/src/models/memory-access.js +0 -33
- package/src/models/memory-access.js.map +0 -1
- package/src/models/session.d.ts +0 -60
- package/src/models/session.d.ts.map +0 -1
- package/src/models/session.js +0 -23
- package/src/models/session.js.map +0 -1
- package/src/models/tenant.d.ts +0 -448
- package/src/models/tenant.d.ts.map +0 -1
- package/src/models/tenant.js +0 -23
- package/src/models/tenant.js.map +0 -1
- package/src/models/trigger.d.ts +0 -87
- package/src/models/trigger.d.ts.map +0 -1
- package/src/models/trigger.js +0 -41
- package/src/models/trigger.js.map +0 -1
- package/src/models/usage-record.d.ts +0 -37
- package/src/models/usage-record.d.ts.map +0 -1
- package/src/models/usage-record.js +0 -14
- package/src/models/usage-record.js.map +0 -1
- package/src/models/webhook.d.ts +0 -50
- package/src/models/webhook.d.ts.map +0 -1
- package/src/models/webhook.js +0 -25
- package/src/models/webhook.js.map +0 -1
- package/src/retrieval/compound-search.d.ts +0 -13
- package/src/retrieval/compound-search.d.ts.map +0 -1
- package/src/retrieval/compound-search.js +0 -87
- package/src/retrieval/compound-search.js.map +0 -1
- package/src/retrieval/contradiction-surfacer.d.ts +0 -18
- package/src/retrieval/contradiction-surfacer.d.ts.map +0 -1
- package/src/retrieval/contradiction-surfacer.js +0 -64
- package/src/retrieval/contradiction-surfacer.js.map +0 -1
- package/src/retrieval/embedding-cache.d.ts +0 -17
- package/src/retrieval/embedding-cache.d.ts.map +0 -1
- package/src/retrieval/embedding-cache.js +0 -56
- package/src/retrieval/embedding-cache.js.map +0 -1
- package/src/retrieval/fusion.d.ts +0 -27
- package/src/retrieval/fusion.d.ts.map +0 -1
- package/src/retrieval/fusion.js +0 -87
- package/src/retrieval/fusion.js.map +0 -1
- package/src/retrieval/graph-traversal.d.ts +0 -29
- package/src/retrieval/graph-traversal.d.ts.map +0 -1
- package/src/retrieval/graph-traversal.js +0 -208
- package/src/retrieval/graph-traversal.js.map +0 -1
- package/src/retrieval/query-expansion.d.ts +0 -20
- package/src/retrieval/query-expansion.d.ts.map +0 -1
- package/src/retrieval/query-expansion.js +0 -76
- package/src/retrieval/query-expansion.js.map +0 -1
- package/src/retrieval/reranker.d.ts +0 -15
- package/src/retrieval/reranker.d.ts.map +0 -1
- package/src/retrieval/reranker.js +0 -47
- package/src/retrieval/reranker.js.map +0 -1
- package/src/retrieval/salience-scorer.d.ts +0 -15
- package/src/retrieval/salience-scorer.d.ts.map +0 -1
- package/src/retrieval/salience-scorer.js +0 -41
- package/src/retrieval/salience-scorer.js.map +0 -1
- package/src/retrieval/search.d.ts +0 -21
- package/src/retrieval/search.d.ts.map +0 -1
- package/src/retrieval/search.js +0 -228
- package/src/retrieval/search.js.map +0 -1
- package/src/retrieval/temporal-scorer.d.ts +0 -18
- package/src/retrieval/temporal-scorer.d.ts.map +0 -1
- package/src/retrieval/temporal-scorer.js +0 -106
- package/src/retrieval/temporal-scorer.js.map +0 -1
- package/src/retrieval/trigger-matcher.d.ts +0 -18
- package/src/retrieval/trigger-matcher.d.ts.map +0 -1
- package/src/retrieval/trigger-matcher.js +0 -134
- package/src/retrieval/trigger-matcher.js.map +0 -1
- package/src/retrieval/types.d.ts +0 -70
- package/src/retrieval/types.d.ts.map +0 -1
- package/src/retrieval/types.js +0 -9
- package/src/retrieval/types.js.map +0 -1
- package/src/retrieval/vector-search.d.ts +0 -5
- package/src/retrieval/vector-search.d.ts.map +0 -1
- package/src/retrieval/vector-search.js +0 -24
- package/src/retrieval/vector-search.js.map +0 -1
- package/src/salience/decay.d.ts +0 -9
- package/src/salience/decay.d.ts.map +0 -1
- package/src/salience/decay.js +0 -15
- package/src/salience/decay.js.map +0 -1
- package/src/scratchpad/scratchpad.d.ts +0 -23
- package/src/scratchpad/scratchpad.d.ts.map +0 -1
- package/src/scratchpad/scratchpad.js +0 -137
- package/src/scratchpad/scratchpad.js.map +0 -1
- package/src/sessions/manager.d.ts +0 -11
- package/src/sessions/manager.d.ts.map +0 -1
- package/src/sessions/manager.js +0 -63
- package/src/sessions/manager.js.map +0 -1
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured data extractor — bypasses LLM entirely.
|
|
3
|
+
*
|
|
4
|
+
* Handles structured_event, structured_task, structured_email, structured_vault
|
|
5
|
+
* input types by directly creating entities, edges, and facts from known fields.
|
|
6
|
+
* Zero LLM cost, deterministic, high confidence.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
import type { ExtractionResult, ExtractedFact, ExtractedEntity, ExtractedEdge } from './types.js';
|
|
10
|
+
import type { SourceType, EdgeType } from '../config.js';
|
|
11
|
+
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// Structured input schemas
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
|
|
16
|
+
export interface StructuredEvent {
|
|
17
|
+
title: string;
|
|
18
|
+
startTime: string; // ISO 8601
|
|
19
|
+
endTime?: string;
|
|
20
|
+
location?: string;
|
|
21
|
+
description?: string;
|
|
22
|
+
organizers?: string[]; // org/person names
|
|
23
|
+
attendees?: string[];
|
|
24
|
+
url?: string;
|
|
25
|
+
provider?: string; // 'google' | 'microsoft' | 'partiful' etc.
|
|
26
|
+
externalId?: string; // calendar event ID, vault item ID, etc.
|
|
27
|
+
sourceType?: 'calendar' | 'vault';
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
export interface StructuredTask {
|
|
31
|
+
title: string;
|
|
32
|
+
description?: string;
|
|
33
|
+
status?: string;
|
|
34
|
+
priority?: string;
|
|
35
|
+
category?: string;
|
|
36
|
+
dueDate?: string; // ISO 8601
|
|
37
|
+
tags?: string[];
|
|
38
|
+
externalId?: string;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
export interface StructuredEmail {
|
|
42
|
+
subject: string;
|
|
43
|
+
from: string;
|
|
44
|
+
to?: string[];
|
|
45
|
+
body?: string; // truncated
|
|
46
|
+
date: string; // ISO 8601
|
|
47
|
+
isUnread?: boolean;
|
|
48
|
+
threadId?: string;
|
|
49
|
+
provider?: string; // 'gmail' | 'outlook'
|
|
50
|
+
externalId?: string;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
export interface StructuredVault {
|
|
54
|
+
title: string;
|
|
55
|
+
contentType: string; // 'event', 'article', 'job', 'recipe', etc.
|
|
56
|
+
url?: string;
|
|
57
|
+
source?: string; // domain
|
|
58
|
+
savedAt: string; // ISO 8601
|
|
59
|
+
content?: string; // truncated page content
|
|
60
|
+
metadata?: Record<string, unknown>;
|
|
61
|
+
externalId?: string;
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
// ---------------------------------------------------------------------------
|
|
65
|
+
// Helpers
|
|
66
|
+
// ---------------------------------------------------------------------------
|
|
67
|
+
|
|
68
|
+
function canonicalize(name: string): string {
|
|
69
|
+
return name.toLowerCase().replace(/[^a-z0-9\s.-]/g, '').replace(/\s+/g, ' ').trim();
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function formatDate(iso: string): string {
|
|
73
|
+
try {
|
|
74
|
+
return new Date(iso).toLocaleDateString('en-US', {
|
|
75
|
+
weekday: 'long', year: 'numeric', month: 'long', day: 'numeric',
|
|
76
|
+
});
|
|
77
|
+
} catch {
|
|
78
|
+
return iso;
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function formatTime(iso: string): string {
|
|
83
|
+
try {
|
|
84
|
+
return new Date(iso).toLocaleTimeString('en-US', {
|
|
85
|
+
hour: 'numeric', minute: '2-digit', hour12: true,
|
|
86
|
+
});
|
|
87
|
+
} catch {
|
|
88
|
+
return '';
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// ---------------------------------------------------------------------------
|
|
93
|
+
// Extractors
|
|
94
|
+
// ---------------------------------------------------------------------------
|
|
95
|
+
|
|
96
|
+
export function extractStructuredEvent(data: StructuredEvent): ExtractionResult {
|
|
97
|
+
const entities: ExtractedEntity[] = [];
|
|
98
|
+
const edges: ExtractedEdge[] = [];
|
|
99
|
+
|
|
100
|
+
// Main event entity
|
|
101
|
+
const eventCanonical = canonicalize(data.title);
|
|
102
|
+
entities.push({
|
|
103
|
+
name: data.title,
|
|
104
|
+
entityType: 'event',
|
|
105
|
+
canonicalName: eventCanonical,
|
|
106
|
+
properties: {
|
|
107
|
+
startTime: data.startTime,
|
|
108
|
+
endTime: data.endTime,
|
|
109
|
+
location: data.location,
|
|
110
|
+
url: data.url,
|
|
111
|
+
provider: data.provider,
|
|
112
|
+
externalId: data.externalId,
|
|
113
|
+
sourceType: data.sourceType,
|
|
114
|
+
},
|
|
115
|
+
});
|
|
116
|
+
|
|
117
|
+
// Location entity
|
|
118
|
+
if (data.location) {
|
|
119
|
+
const locCanonical = canonicalize(data.location);
|
|
120
|
+
entities.push({
|
|
121
|
+
name: data.location,
|
|
122
|
+
entityType: 'location',
|
|
123
|
+
canonicalName: locCanonical,
|
|
124
|
+
properties: {},
|
|
125
|
+
});
|
|
126
|
+
edges.push({
|
|
127
|
+
sourceName: eventCanonical,
|
|
128
|
+
targetName: locCanonical,
|
|
129
|
+
relation: 'located_at',
|
|
130
|
+
edgeType: 'associative',
|
|
131
|
+
confidence: 1.0,
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Organizer entities
|
|
136
|
+
for (const org of data.organizers ?? []) {
|
|
137
|
+
const orgCanonical = canonicalize(org);
|
|
138
|
+
entities.push({
|
|
139
|
+
name: org,
|
|
140
|
+
entityType: 'organization',
|
|
141
|
+
canonicalName: orgCanonical,
|
|
142
|
+
properties: {},
|
|
143
|
+
});
|
|
144
|
+
edges.push({
|
|
145
|
+
sourceName: eventCanonical,
|
|
146
|
+
targetName: orgCanonical,
|
|
147
|
+
relation: 'hosted_by',
|
|
148
|
+
edgeType: 'associative',
|
|
149
|
+
confidence: 1.0,
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Attendee entities
|
|
154
|
+
for (const attendee of data.attendees ?? []) {
|
|
155
|
+
const attCanonical = canonicalize(attendee);
|
|
156
|
+
entities.push({
|
|
157
|
+
name: attendee,
|
|
158
|
+
entityType: 'person',
|
|
159
|
+
canonicalName: attCanonical,
|
|
160
|
+
properties: {},
|
|
161
|
+
});
|
|
162
|
+
edges.push({
|
|
163
|
+
sourceName: attCanonical,
|
|
164
|
+
targetName: eventCanonical,
|
|
165
|
+
relation: 'attends',
|
|
166
|
+
edgeType: 'associative',
|
|
167
|
+
confidence: 1.0,
|
|
168
|
+
});
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
// Build fact content
|
|
172
|
+
let factContent = `Event: "${data.title}" on ${formatDate(data.startTime)}`;
|
|
173
|
+
if (data.startTime) factContent += ` at ${formatTime(data.startTime)}`;
|
|
174
|
+
if (data.endTime) factContent += ` - ${formatTime(data.endTime)}`;
|
|
175
|
+
if (data.location) factContent += ` at ${data.location}`;
|
|
176
|
+
if (data.organizers?.length) factContent += `. Hosted by ${data.organizers.join(', ')}`;
|
|
177
|
+
if (data.description) factContent += `. ${data.description.slice(0, 300)}`;
|
|
178
|
+
|
|
179
|
+
const fact: ExtractedFact = {
|
|
180
|
+
content: factContent,
|
|
181
|
+
importance: 0.8,
|
|
182
|
+
confidence: 1.0,
|
|
183
|
+
sourceType: (data.sourceType === 'vault' ? 'structured_vault' : 'structured_event') as SourceType,
|
|
184
|
+
modality: 'text',
|
|
185
|
+
tags: ['structured', 'event', ...(data.provider ? [data.provider] : [])],
|
|
186
|
+
originalContent: JSON.stringify(data),
|
|
187
|
+
entityCanonicalNames: [eventCanonical, ...entities.filter(e => e.canonicalName !== eventCanonical).map(e => e.canonicalName)],
|
|
188
|
+
eventDate: new Date(data.startTime),
|
|
189
|
+
documentDate: new Date(),
|
|
190
|
+
};
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
facts: [fact],
|
|
194
|
+
entities,
|
|
195
|
+
edges,
|
|
196
|
+
tier: 'heuristic',
|
|
197
|
+
confidence: 1.0,
|
|
198
|
+
tokensInput: 0,
|
|
199
|
+
tokensOutput: 0,
|
|
200
|
+
model: null,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
export function extractStructuredTask(data: StructuredTask): ExtractionResult {
|
|
205
|
+
const entities: ExtractedEntity[] = [];
|
|
206
|
+
const edges: ExtractedEdge[] = [];
|
|
207
|
+
|
|
208
|
+
const taskCanonical = canonicalize(data.title);
|
|
209
|
+
entities.push({
|
|
210
|
+
name: data.title,
|
|
211
|
+
entityType: 'task',
|
|
212
|
+
canonicalName: taskCanonical,
|
|
213
|
+
properties: {
|
|
214
|
+
status: data.status,
|
|
215
|
+
priority: data.priority,
|
|
216
|
+
category: data.category,
|
|
217
|
+
dueDate: data.dueDate,
|
|
218
|
+
externalId: data.externalId,
|
|
219
|
+
},
|
|
220
|
+
});
|
|
221
|
+
|
|
222
|
+
// Category entity
|
|
223
|
+
if (data.category) {
|
|
224
|
+
const catCanonical = canonicalize(data.category);
|
|
225
|
+
entities.push({
|
|
226
|
+
name: data.category,
|
|
227
|
+
entityType: 'topic',
|
|
228
|
+
canonicalName: catCanonical,
|
|
229
|
+
properties: {},
|
|
230
|
+
});
|
|
231
|
+
edges.push({
|
|
232
|
+
sourceName: taskCanonical,
|
|
233
|
+
targetName: catCanonical,
|
|
234
|
+
relation: 'categorized_as',
|
|
235
|
+
edgeType: 'hierarchical',
|
|
236
|
+
confidence: 1.0,
|
|
237
|
+
});
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
let factContent = `Task: "${data.title}"`;
|
|
241
|
+
if (data.status) factContent += ` (${data.status})`;
|
|
242
|
+
if (data.priority) factContent += `, priority: ${data.priority}`;
|
|
243
|
+
if (data.dueDate) factContent += `, due ${formatDate(data.dueDate)}`;
|
|
244
|
+
if (data.description) factContent += `. ${data.description.slice(0, 200)}`;
|
|
245
|
+
|
|
246
|
+
const fact: ExtractedFact = {
|
|
247
|
+
content: factContent,
|
|
248
|
+
importance: data.priority === 'high' || data.priority === 'urgent' ? 0.9 : 0.7,
|
|
249
|
+
confidence: 1.0,
|
|
250
|
+
sourceType: 'structured_task' as SourceType,
|
|
251
|
+
modality: 'text',
|
|
252
|
+
tags: ['structured', 'task', ...(data.tags ?? [])],
|
|
253
|
+
originalContent: JSON.stringify(data),
|
|
254
|
+
entityCanonicalNames: [taskCanonical],
|
|
255
|
+
eventDate: data.dueDate ? new Date(data.dueDate) : undefined,
|
|
256
|
+
documentDate: new Date(),
|
|
257
|
+
};
|
|
258
|
+
|
|
259
|
+
return {
|
|
260
|
+
facts: [fact],
|
|
261
|
+
entities,
|
|
262
|
+
edges,
|
|
263
|
+
tier: 'heuristic',
|
|
264
|
+
confidence: 1.0,
|
|
265
|
+
tokensInput: 0,
|
|
266
|
+
tokensOutput: 0,
|
|
267
|
+
model: null,
|
|
268
|
+
};
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
export function extractStructuredEmail(data: StructuredEmail): ExtractionResult {
|
|
272
|
+
const entities: ExtractedEntity[] = [];
|
|
273
|
+
const edges: ExtractedEdge[] = [];
|
|
274
|
+
|
|
275
|
+
// Sender entity
|
|
276
|
+
const senderCanonical = canonicalize(data.from);
|
|
277
|
+
entities.push({
|
|
278
|
+
name: data.from,
|
|
279
|
+
entityType: 'person',
|
|
280
|
+
canonicalName: senderCanonical,
|
|
281
|
+
properties: { email: data.from },
|
|
282
|
+
});
|
|
283
|
+
|
|
284
|
+
// Subject as topic entity if substantial
|
|
285
|
+
if (data.subject && data.subject.length > 5) {
|
|
286
|
+
const subjectCanonical = canonicalize(data.subject);
|
|
287
|
+
entities.push({
|
|
288
|
+
name: data.subject,
|
|
289
|
+
entityType: 'topic',
|
|
290
|
+
canonicalName: subjectCanonical,
|
|
291
|
+
properties: { threadId: data.threadId, provider: data.provider },
|
|
292
|
+
});
|
|
293
|
+
edges.push({
|
|
294
|
+
sourceName: senderCanonical,
|
|
295
|
+
targetName: subjectCanonical,
|
|
296
|
+
relation: 'authored',
|
|
297
|
+
edgeType: 'associative',
|
|
298
|
+
confidence: 1.0,
|
|
299
|
+
});
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
// Recipients
|
|
303
|
+
for (const to of data.to ?? []) {
|
|
304
|
+
const toCanonical = canonicalize(to);
|
|
305
|
+
entities.push({
|
|
306
|
+
name: to,
|
|
307
|
+
entityType: 'person',
|
|
308
|
+
canonicalName: toCanonical,
|
|
309
|
+
properties: { email: to },
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
let factContent = `Email from ${data.from}: "${data.subject}"`;
|
|
314
|
+
if (data.date) factContent += ` on ${formatDate(data.date)}`;
|
|
315
|
+
if (data.body) factContent += `. ${data.body.slice(0, 300)}`;
|
|
316
|
+
|
|
317
|
+
const fact: ExtractedFact = {
|
|
318
|
+
content: factContent,
|
|
319
|
+
importance: data.isUnread ? 0.8 : 0.5,
|
|
320
|
+
confidence: 1.0,
|
|
321
|
+
sourceType: 'structured_email' as SourceType,
|
|
322
|
+
modality: 'text',
|
|
323
|
+
tags: ['structured', 'email', ...(data.provider ? [data.provider] : []), ...(data.isUnread ? ['unread'] : [])],
|
|
324
|
+
originalContent: JSON.stringify(data),
|
|
325
|
+
entityCanonicalNames: [senderCanonical],
|
|
326
|
+
eventDate: new Date(data.date),
|
|
327
|
+
documentDate: new Date(),
|
|
328
|
+
};
|
|
329
|
+
|
|
330
|
+
return {
|
|
331
|
+
facts: [fact],
|
|
332
|
+
entities,
|
|
333
|
+
edges,
|
|
334
|
+
tier: 'heuristic',
|
|
335
|
+
confidence: 1.0,
|
|
336
|
+
tokensInput: 0,
|
|
337
|
+
tokensOutput: 0,
|
|
338
|
+
model: null,
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
export function extractStructuredVault(data: StructuredVault): ExtractionResult {
|
|
343
|
+
const entities: ExtractedEntity[] = [];
|
|
344
|
+
const edges: ExtractedEdge[] = [];
|
|
345
|
+
|
|
346
|
+
const vaultCanonical = canonicalize(data.title);
|
|
347
|
+
entities.push({
|
|
348
|
+
name: data.title,
|
|
349
|
+
entityType: data.contentType === 'event' ? 'event' : 'topic',
|
|
350
|
+
canonicalName: vaultCanonical,
|
|
351
|
+
properties: {
|
|
352
|
+
contentType: data.contentType,
|
|
353
|
+
url: data.url,
|
|
354
|
+
source: data.source,
|
|
355
|
+
savedAt: data.savedAt,
|
|
356
|
+
externalId: data.externalId,
|
|
357
|
+
...(data.metadata ?? {}),
|
|
358
|
+
},
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
// Source domain entity
|
|
362
|
+
if (data.source) {
|
|
363
|
+
const sourceCanonical = canonicalize(data.source);
|
|
364
|
+
entities.push({
|
|
365
|
+
name: data.source,
|
|
366
|
+
entityType: 'source',
|
|
367
|
+
canonicalName: sourceCanonical,
|
|
368
|
+
properties: {},
|
|
369
|
+
});
|
|
370
|
+
edges.push({
|
|
371
|
+
sourceName: vaultCanonical,
|
|
372
|
+
targetName: sourceCanonical,
|
|
373
|
+
relation: 'saved_from',
|
|
374
|
+
edgeType: 'associative',
|
|
375
|
+
confidence: 1.0,
|
|
376
|
+
});
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
// If event type, extract organizers from metadata
|
|
380
|
+
const organizers = data.metadata?.organizer || data.metadata?.organizers;
|
|
381
|
+
if (organizers) {
|
|
382
|
+
const orgList = typeof organizers === 'string'
|
|
383
|
+
? organizers.split(/,\s*|(?:\s+and\s+)/)
|
|
384
|
+
: Array.isArray(organizers) ? organizers : [];
|
|
385
|
+
for (const org of orgList) {
|
|
386
|
+
const trimmed = (org as string).trim();
|
|
387
|
+
if (!trimmed) continue;
|
|
388
|
+
const orgCanonical = canonicalize(trimmed);
|
|
389
|
+
entities.push({
|
|
390
|
+
name: trimmed,
|
|
391
|
+
entityType: 'organization',
|
|
392
|
+
canonicalName: orgCanonical,
|
|
393
|
+
properties: {},
|
|
394
|
+
});
|
|
395
|
+
edges.push({
|
|
396
|
+
sourceName: vaultCanonical,
|
|
397
|
+
targetName: orgCanonical,
|
|
398
|
+
relation: 'hosted_by',
|
|
399
|
+
edgeType: 'associative',
|
|
400
|
+
confidence: 1.0,
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
}
|
|
404
|
+
|
|
405
|
+
let factContent = `Saved to vault: "${data.title}" (${data.contentType})`;
|
|
406
|
+
if (data.source) factContent += ` from ${data.source}`;
|
|
407
|
+
if (data.savedAt) factContent += ` on ${formatDate(data.savedAt)}`;
|
|
408
|
+
if (data.content) factContent += `. ${data.content.slice(0, 300)}`;
|
|
409
|
+
|
|
410
|
+
const fact: ExtractedFact = {
|
|
411
|
+
content: factContent,
|
|
412
|
+
importance: 0.7,
|
|
413
|
+
confidence: 1.0,
|
|
414
|
+
sourceType: 'structured_vault' as SourceType,
|
|
415
|
+
modality: 'text',
|
|
416
|
+
tags: ['structured', 'vault', data.contentType],
|
|
417
|
+
originalContent: JSON.stringify(data),
|
|
418
|
+
entityCanonicalNames: [vaultCanonical, ...entities.filter(e => e.canonicalName !== vaultCanonical).map(e => e.canonicalName)],
|
|
419
|
+
eventDate: data.metadata?.date ? new Date(data.metadata.date as string) : undefined,
|
|
420
|
+
documentDate: new Date(data.savedAt),
|
|
421
|
+
};
|
|
422
|
+
|
|
423
|
+
return {
|
|
424
|
+
facts: [fact],
|
|
425
|
+
entities,
|
|
426
|
+
edges,
|
|
427
|
+
tier: 'heuristic',
|
|
428
|
+
confidence: 1.0,
|
|
429
|
+
tokensInput: 0,
|
|
430
|
+
tokensOutput: 0,
|
|
431
|
+
model: null,
|
|
432
|
+
};
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
// ---------------------------------------------------------------------------
|
|
436
|
+
// Router — picks the right extractor based on inputType
|
|
437
|
+
// ---------------------------------------------------------------------------
|
|
438
|
+
|
|
439
|
+
const STRUCTURED_INPUT_TYPES = new Set([
|
|
440
|
+
'structured_event',
|
|
441
|
+
'structured_task',
|
|
442
|
+
'structured_email',
|
|
443
|
+
'structured_vault',
|
|
444
|
+
]);
|
|
445
|
+
|
|
446
|
+
export function isStructuredInput(inputType: string): boolean {
|
|
447
|
+
return STRUCTURED_INPUT_TYPES.has(inputType);
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
export function extractStructured(inputType: string, data: unknown): ExtractionResult {
|
|
451
|
+
switch (inputType) {
|
|
452
|
+
case 'structured_event':
|
|
453
|
+
return extractStructuredEvent(data as StructuredEvent);
|
|
454
|
+
case 'structured_task':
|
|
455
|
+
return extractStructuredTask(data as StructuredTask);
|
|
456
|
+
case 'structured_email':
|
|
457
|
+
return extractStructuredEmail(data as StructuredEmail);
|
|
458
|
+
case 'structured_vault':
|
|
459
|
+
return extractStructuredVault(data as StructuredVault);
|
|
460
|
+
default:
|
|
461
|
+
throw new Error(`Unknown structured input type: ${inputType}`);
|
|
462
|
+
}
|
|
463
|
+
}
|
package/src/extraction/types.ts
CHANGED
|
@@ -60,11 +60,13 @@ export interface ExtractionInput {
|
|
|
60
60
|
scope: Scope;
|
|
61
61
|
scopeId: string;
|
|
62
62
|
sessionId?: string;
|
|
63
|
-
inputType: 'conversation' | 'document' | 'url' | 'raw_text' | 'image' | 'audio' | 'code' | 'codebase_scan' | 'file_change' | 'architecture_doc';
|
|
63
|
+
inputType: 'conversation' | 'document' | 'url' | 'raw_text' | 'image' | 'audio' | 'code' | 'codebase_scan' | 'file_change' | 'architecture_doc' | 'structured_event' | 'structured_task' | 'structured_email' | 'structured_vault';
|
|
64
64
|
data: unknown;
|
|
65
65
|
existingFacts?: Array<{ id: string; lineageId: string; content: string; embedding?: number[] }>;
|
|
66
66
|
/** Source provider for provenance tracking — where did this data come from? */
|
|
67
67
|
sourceProvider?: 'user_direct' | 'email' | 'calendar' | 'slack' | 'github' | 'linear' | 'jira' | 'code' | 'document';
|
|
68
|
+
/** Opt-in: run cross-scope identity resolution after entity creation */
|
|
69
|
+
resolveIdentities?: boolean;
|
|
68
70
|
}
|
|
69
71
|
|
|
70
72
|
/** Final output of the full pipeline */
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export { resolveIdentities, type IdentityCandidate, type AliasCandidate } from './resolver.js';
|
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
export interface IdentityCandidate {
|
|
2
|
+
id: string;
|
|
3
|
+
canonicalName: string;
|
|
4
|
+
entityType: string;
|
|
5
|
+
properties: Record<string, unknown>;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export interface AliasCandidate {
|
|
9
|
+
primaryEntityId: string;
|
|
10
|
+
aliasEntityId: string;
|
|
11
|
+
confidence: number;
|
|
12
|
+
matchReason: 'email_match' | 'fuzzy_name' | 'temporal_cooccurrence' | 'manual';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
/**
|
|
16
|
+
* Jaro-Winkler similarity algorithm.
|
|
17
|
+
* Returns a value between 0 (no similarity) and 1 (identical).
|
|
18
|
+
*/
|
|
19
|
+
export function jaroWinkler(s1: string, s2: string): number {
|
|
20
|
+
if (s1 === s2) return 1.0;
|
|
21
|
+
if (s1.length === 0 || s2.length === 0) return 0.0;
|
|
22
|
+
|
|
23
|
+
const matchDistance = Math.floor(Math.max(s1.length, s2.length) / 2) - 1;
|
|
24
|
+
|
|
25
|
+
const s1Matches = new Array(s1.length).fill(false);
|
|
26
|
+
const s2Matches = new Array(s2.length).fill(false);
|
|
27
|
+
|
|
28
|
+
let matches = 0;
|
|
29
|
+
let transpositions = 0;
|
|
30
|
+
|
|
31
|
+
// Find matching characters
|
|
32
|
+
for (let i = 0; i < s1.length; i++) {
|
|
33
|
+
const start = Math.max(0, i - matchDistance);
|
|
34
|
+
const end = Math.min(i + matchDistance + 1, s2.length);
|
|
35
|
+
|
|
36
|
+
for (let j = start; j < end; j++) {
|
|
37
|
+
if (s2Matches[j] || s1[i] !== s2[j]) continue;
|
|
38
|
+
s1Matches[i] = true;
|
|
39
|
+
s2Matches[j] = true;
|
|
40
|
+
matches++;
|
|
41
|
+
break;
|
|
42
|
+
}
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
if (matches === 0) return 0.0;
|
|
46
|
+
|
|
47
|
+
// Count transpositions
|
|
48
|
+
let k = 0;
|
|
49
|
+
for (let i = 0; i < s1.length; i++) {
|
|
50
|
+
if (!s1Matches[i]) continue;
|
|
51
|
+
while (!s2Matches[k]) k++;
|
|
52
|
+
if (s1[i] !== s2[k]) transpositions++;
|
|
53
|
+
k++;
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
const jaro =
|
|
57
|
+
(matches / s1.length + matches / s2.length + (matches - transpositions / 2) / matches) / 3;
|
|
58
|
+
|
|
59
|
+
// Winkler prefix bonus (up to 4 common prefix chars)
|
|
60
|
+
let prefix = 0;
|
|
61
|
+
for (let i = 0; i < Math.min(4, Math.min(s1.length, s2.length)); i++) {
|
|
62
|
+
if (s1[i] === s2[i]) prefix++;
|
|
63
|
+
else break;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
return jaro + prefix * 0.1 * (1 - jaro);
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Normalize a name for fuzzy comparison:
|
|
71
|
+
* lowercase and strip separators (. _ - @).
|
|
72
|
+
*/
|
|
73
|
+
export function normalizeName(name: string): string {
|
|
74
|
+
return name.toLowerCase().replace(/[._\-@]/g, '');
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Resolve identities across a list of entity candidates using a three-tier strategy:
|
|
79
|
+
* 1. Exact email match (confidence 1.0)
|
|
80
|
+
* 2. Fuzzy canonical name match via Jaro-Winkler (confidence 0.6–0.8)
|
|
81
|
+
*
|
|
82
|
+
* Returns alias pairs without duplicates.
|
|
83
|
+
*/
|
|
84
|
+
export function resolveIdentities(entities: IdentityCandidate[]): AliasCandidate[] {
|
|
85
|
+
const aliases: AliasCandidate[] = [];
|
|
86
|
+
const seen = new Set<string>();
|
|
87
|
+
|
|
88
|
+
const addAlias = (
|
|
89
|
+
primaryEntityId: string,
|
|
90
|
+
aliasEntityId: string,
|
|
91
|
+
confidence: number,
|
|
92
|
+
matchReason: AliasCandidate['matchReason'],
|
|
93
|
+
): void => {
|
|
94
|
+
const key = [primaryEntityId, aliasEntityId].sort().join(':');
|
|
95
|
+
if (seen.has(key)) return;
|
|
96
|
+
seen.add(key);
|
|
97
|
+
aliases.push({ primaryEntityId, aliasEntityId, confidence, matchReason });
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
// Tier 1: Email match (confidence 1.0)
|
|
101
|
+
const emailGroups = new Map<string, IdentityCandidate[]>();
|
|
102
|
+
for (const entity of entities) {
|
|
103
|
+
const email = entity.properties['email'];
|
|
104
|
+
if (typeof email !== 'string' || email.trim() === '') continue;
|
|
105
|
+
const normalizedEmail = email.toLowerCase();
|
|
106
|
+
const group = emailGroups.get(normalizedEmail) ?? [];
|
|
107
|
+
group.push(entity);
|
|
108
|
+
emailGroups.set(normalizedEmail, group);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
for (const group of emailGroups.values()) {
|
|
112
|
+
if (group.length < 2) continue;
|
|
113
|
+
const primary = group[0]!;
|
|
114
|
+
for (let i = 1; i < group.length; i++) {
|
|
115
|
+
addAlias(primary.id, group[i]!.id, 1.0, 'email_match');
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
|
|
119
|
+
// Tier 2: Fuzzy name match (confidence 0.6–0.8)
|
|
120
|
+
// Threshold: Jaro-Winkler >= 0.85, same entity type required
|
|
121
|
+
const JARO_WINKLER_THRESHOLD = 0.85;
|
|
122
|
+
|
|
123
|
+
for (let i = 0; i < entities.length; i++) {
|
|
124
|
+
for (let j = i + 1; j < entities.length; j++) {
|
|
125
|
+
const a = entities[i]!;
|
|
126
|
+
const b = entities[j]!;
|
|
127
|
+
|
|
128
|
+
// Must be same entity type
|
|
129
|
+
if (a.entityType !== b.entityType) continue;
|
|
130
|
+
|
|
131
|
+
// Skip if already aliased
|
|
132
|
+
const key = [a.id, b.id].sort().join(':');
|
|
133
|
+
if (seen.has(key)) continue;
|
|
134
|
+
|
|
135
|
+
const normA = normalizeName(a.canonicalName);
|
|
136
|
+
const normB = normalizeName(b.canonicalName);
|
|
137
|
+
const similarity = jaroWinkler(normA, normB);
|
|
138
|
+
|
|
139
|
+
if (similarity >= JARO_WINKLER_THRESHOLD) {
|
|
140
|
+
// Map similarity [0.85, 1.0] → confidence [0.6, 0.8]
|
|
141
|
+
const confidence = 0.6 + ((similarity - 0.85) / 0.15) * 0.2;
|
|
142
|
+
const clampedConfidence = Math.min(0.8, Math.max(0.6, confidence));
|
|
143
|
+
addAlias(a.id, b.id, clampedConfidence, 'fuzzy_name');
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
return aliases;
|
|
149
|
+
}
|
package/src/index.ts
CHANGED
package/src/models/entity.ts
CHANGED
|
@@ -16,6 +16,19 @@ export const EntitySchema = z.object({
|
|
|
16
16
|
|
|
17
17
|
export type Entity = z.infer<typeof EntitySchema>;
|
|
18
18
|
|
|
19
|
+
export const AliasSchema = z.object({
|
|
20
|
+
id: z.string().uuid(),
|
|
21
|
+
tenantId: z.string().uuid(),
|
|
22
|
+
primaryEntityId: z.string().uuid(),
|
|
23
|
+
aliasEntityId: z.string().uuid(),
|
|
24
|
+
confidence: z.number().min(0).max(1),
|
|
25
|
+
matchReason: z.enum(['email_match', 'fuzzy_name', 'temporal_cooccurrence', 'manual']),
|
|
26
|
+
confirmedBy: z.string().nullable(),
|
|
27
|
+
confirmedAt: z.coerce.date().nullable(),
|
|
28
|
+
createdAt: z.coerce.date(),
|
|
29
|
+
});
|
|
30
|
+
export type Alias = z.infer<typeof AliasSchema>;
|
|
31
|
+
|
|
19
32
|
export const CreateEntitySchema = z.object({
|
|
20
33
|
tenantId: z.string().uuid(),
|
|
21
34
|
name: z.string().min(1).max(500),
|
|
@@ -41,14 +41,17 @@ export function tokenizeQuery(query: string): string[] {
|
|
|
41
41
|
* - 1-hop = 0.5
|
|
42
42
|
* - 2-hop = 0.25
|
|
43
43
|
* - 3-hop = 0.125
|
|
44
|
+
*
|
|
45
|
+
* Note: Entity lookups are tenant-scoped.
|
|
46
|
+
* Fact retrieval is scope-filtered to prevent cross-user data leakage.
|
|
44
47
|
*/
|
|
45
48
|
export async function graphSearch(
|
|
46
49
|
storage: StorageAdapter,
|
|
47
50
|
embedding: EmbeddingAdapter,
|
|
48
51
|
query: string,
|
|
49
52
|
tenantId: string,
|
|
50
|
-
|
|
51
|
-
|
|
53
|
+
scope: string,
|
|
54
|
+
scopeId: string,
|
|
52
55
|
limit: number,
|
|
53
56
|
config?: Partial<GraphSearchConfig>,
|
|
54
57
|
): Promise<Candidate[]> {
|
|
@@ -183,7 +186,7 @@ export async function graphSearch(
|
|
|
183
186
|
// Single query: get all facts linked to any of these entities
|
|
184
187
|
try {
|
|
185
188
|
const batchResult = await storage.getFactsForEntities(
|
|
186
|
-
tenantId, entityIds, PER_ENTITY_LIMIT
|
|
189
|
+
tenantId, entityIds, PER_ENTITY_LIMIT, scope, scopeId
|
|
187
190
|
);
|
|
188
191
|
|
|
189
192
|
for (const { entityId, fact } of batchResult) {
|
|
@@ -214,7 +217,7 @@ export async function graphSearch(
|
|
|
214
217
|
const hopDepth = entityHopMap.get(entity.id) ?? maxDepth;
|
|
215
218
|
const graphScore = 1 / Math.pow(2, hopDepth);
|
|
216
219
|
try {
|
|
217
|
-
const factsResult = await storage.getFactsForEntity(tenantId, entity.id, { limit: 3 });
|
|
220
|
+
const factsResult = await storage.getFactsForEntity(tenantId, entity.id, { limit: 3 }, scope, scopeId);
|
|
218
221
|
for (const fact of factsResult.data) {
|
|
219
222
|
if (!candidateMap.has(fact.id)) {
|
|
220
223
|
candidateMap.set(fact.id, {
|