@steno-ai/engine 0.1.15 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/storage.d.ts +29 -2
- package/dist/adapters/storage.d.ts.map +1 -1
- package/dist/config.d.ts +3 -3
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +9 -0
- package/dist/config.js.map +1 -1
- package/dist/extraction/index.d.ts +2 -0
- package/dist/extraction/index.d.ts.map +1 -1
- package/dist/extraction/index.js +2 -0
- package/dist/extraction/index.js.map +1 -1
- package/dist/extraction/pipeline.d.ts.map +1 -1
- package/dist/extraction/pipeline.js +48 -1
- package/dist/extraction/pipeline.js.map +1 -1
- package/dist/extraction/structured-cross-linker.d.ts +55 -0
- package/dist/extraction/structured-cross-linker.d.ts.map +1 -0
- package/dist/extraction/structured-cross-linker.js +195 -0
- package/dist/extraction/structured-cross-linker.js.map +1 -0
- package/dist/extraction/structured-extractor.d.ts +59 -0
- package/dist/extraction/structured-extractor.d.ts.map +1 -0
- package/dist/extraction/structured-extractor.js +389 -0
- package/dist/extraction/structured-extractor.js.map +1 -0
- package/dist/extraction/types.d.ts +3 -1
- package/dist/extraction/types.d.ts.map +1 -1
- package/dist/identity/index.d.ts +2 -0
- package/dist/identity/index.d.ts.map +1 -0
- package/dist/identity/index.js +2 -0
- package/dist/identity/index.js.map +1 -0
- package/dist/identity/resolver.d.ts +31 -0
- package/dist/identity/resolver.d.ts.map +1 -0
- package/dist/identity/resolver.js +122 -0
- package/dist/identity/resolver.js.map +1 -0
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/index.js.map +1 -1
- package/dist/models/edge.d.ts +6 -6
- package/dist/models/entity.d.ts +32 -0
- package/dist/models/entity.d.ts.map +1 -1
- package/dist/models/entity.js +11 -0
- package/dist/models/entity.js.map +1 -1
- package/dist/models/extraction.d.ts +6 -6
- package/dist/models/fact.d.ts +6 -6
- package/dist/retrieval/graph-traversal.d.ts +4 -1
- package/dist/retrieval/graph-traversal.d.ts.map +1 -1
- package/dist/retrieval/graph-traversal.js +6 -3
- package/dist/retrieval/graph-traversal.js.map +1 -1
- package/dist/retrieval/search.d.ts.map +1 -1
- package/dist/retrieval/search.js +56 -3
- package/dist/retrieval/search.js.map +1 -1
- package/dist/retrieval/types.d.ts +1 -0
- package/dist/retrieval/types.d.ts.map +1 -1
- package/dist/retrieval/types.js.map +1 -1
- package/package.json +1 -1
- package/src/adapters/storage.ts +35 -2
- package/src/config.ts +9 -0
- package/src/extraction/index.ts +2 -0
- package/src/extraction/pipeline.ts +63 -1
- package/src/extraction/structured-cross-linker.ts +259 -0
- package/src/extraction/structured-extractor.ts +463 -0
- package/src/extraction/types.ts +3 -1
- package/src/identity/index.ts +1 -0
- package/src/identity/resolver.ts +149 -0
- package/src/index.ts +1 -0
- package/src/models/entity.ts +13 -0
- package/src/retrieval/graph-traversal.ts +7 -4
- package/src/retrieval/search.ts +58 -3
- package/src/retrieval/types.ts +1 -0
- package/src/adapters/cache.d.ts +0 -9
- package/src/adapters/cache.d.ts.map +0 -1
- package/src/adapters/cache.js +0 -2
- package/src/adapters/cache.js.map +0 -1
- package/src/adapters/embedding.d.ts +0 -7
- package/src/adapters/embedding.d.ts.map +0 -1
- package/src/adapters/embedding.js +0 -2
- package/src/adapters/embedding.js.map +0 -1
- package/src/adapters/llm.d.ts +0 -19
- package/src/adapters/llm.d.ts.map +0 -1
- package/src/adapters/llm.js +0 -2
- package/src/adapters/llm.js.map +0 -1
- package/src/adapters/perplexity-embedding.d.ts +0 -24
- package/src/adapters/perplexity-embedding.d.ts.map +0 -1
- package/src/adapters/perplexity-embedding.js +0 -78
- package/src/adapters/perplexity-embedding.js.map +0 -1
- package/src/adapters/storage.d.ts +0 -173
- package/src/adapters/storage.d.ts.map +0 -1
- package/src/adapters/storage.js +0 -2
- package/src/adapters/storage.js.map +0 -1
- package/src/config.d.ts +0 -296
- package/src/config.d.ts.map +0 -1
- package/src/config.js +0 -92
- package/src/config.js.map +0 -1
- package/src/extraction/contradiction.d.ts +0 -15
- package/src/extraction/contradiction.d.ts.map +0 -1
- package/src/extraction/contradiction.js +0 -23
- package/src/extraction/contradiction.js.map +0 -1
- package/src/extraction/cross-linker.d.ts +0 -23
- package/src/extraction/cross-linker.d.ts.map +0 -1
- package/src/extraction/cross-linker.js +0 -146
- package/src/extraction/cross-linker.js.map +0 -1
- package/src/extraction/dedup.d.ts +0 -12
- package/src/extraction/dedup.d.ts.map +0 -1
- package/src/extraction/dedup.js +0 -93
- package/src/extraction/dedup.js.map +0 -1
- package/src/extraction/entity-extractor.d.ts +0 -30
- package/src/extraction/entity-extractor.d.ts.map +0 -1
- package/src/extraction/entity-extractor.js +0 -145
- package/src/extraction/entity-extractor.js.map +0 -1
- package/src/extraction/hasher.d.ts +0 -5
- package/src/extraction/hasher.d.ts.map +0 -1
- package/src/extraction/hasher.js +0 -8
- package/src/extraction/hasher.js.map +0 -1
- package/src/extraction/heuristic.d.ts +0 -3
- package/src/extraction/heuristic.d.ts.map +0 -1
- package/src/extraction/heuristic.js +0 -282
- package/src/extraction/heuristic.js.map +0 -1
- package/src/extraction/llm-extractor.d.ts +0 -23
- package/src/extraction/llm-extractor.d.ts.map +0 -1
- package/src/extraction/llm-extractor.js +0 -240
- package/src/extraction/llm-extractor.js.map +0 -1
- package/src/extraction/pipeline.d.ts +0 -30
- package/src/extraction/pipeline.d.ts.map +0 -1
- package/src/extraction/pipeline.js +0 -413
- package/src/extraction/pipeline.js.map +0 -1
- package/src/extraction/prompts.d.ts +0 -28
- package/src/extraction/prompts.d.ts.map +0 -1
- package/src/extraction/prompts.js +0 -205
- package/src/extraction/prompts.js.map +0 -1
- package/src/extraction/sliding-window.d.ts +0 -41
- package/src/extraction/sliding-window.d.ts.map +0 -1
- package/src/extraction/sliding-window.js +0 -84
- package/src/extraction/sliding-window.js.map +0 -1
- package/src/extraction/types.d.ts +0 -80
- package/src/extraction/types.d.ts.map +0 -1
- package/src/extraction/types.js +0 -2
- package/src/extraction/types.js.map +0 -1
- package/src/feedback/tracker.d.ts +0 -25
- package/src/feedback/tracker.d.ts.map +0 -1
- package/src/feedback/tracker.js +0 -90
- package/src/feedback/tracker.js.map +0 -1
- package/src/models/api-key.d.ts +0 -54
- package/src/models/api-key.d.ts.map +0 -1
- package/src/models/api-key.js +0 -21
- package/src/models/api-key.js.map +0 -1
- package/src/models/edge.d.ts +0 -78
- package/src/models/edge.d.ts.map +0 -1
- package/src/models/edge.js +0 -29
- package/src/models/edge.js.map +0 -1
- package/src/models/entity.d.ts +0 -60
- package/src/models/entity.d.ts.map +0 -1
- package/src/models/entity.js +0 -22
- package/src/models/entity.js.map +0 -1
- package/src/models/extraction.d.ts +0 -111
- package/src/models/extraction.d.ts.map +0 -1
- package/src/models/extraction.js +0 -40
- package/src/models/extraction.js.map +0 -1
- package/src/models/fact-entity.d.ts +0 -33
- package/src/models/fact-entity.d.ts.map +0 -1
- package/src/models/fact-entity.js +0 -14
- package/src/models/fact-entity.js.map +0 -1
- package/src/models/fact.d.ts +0 -191
- package/src/models/fact.d.ts.map +0 -1
- package/src/models/fact.js +0 -72
- package/src/models/fact.js.map +0 -1
- package/src/models/index.d.ts +0 -13
- package/src/models/index.d.ts.map +0 -1
- package/src/models/index.js +0 -13
- package/src/models/index.js.map +0 -1
- package/src/models/memory-access.d.ts +0 -89
- package/src/models/memory-access.d.ts.map +0 -1
- package/src/models/memory-access.js +0 -33
- package/src/models/memory-access.js.map +0 -1
- package/src/models/session.d.ts +0 -60
- package/src/models/session.d.ts.map +0 -1
- package/src/models/session.js +0 -23
- package/src/models/session.js.map +0 -1
- package/src/models/tenant.d.ts +0 -448
- package/src/models/tenant.d.ts.map +0 -1
- package/src/models/tenant.js +0 -23
- package/src/models/tenant.js.map +0 -1
- package/src/models/trigger.d.ts +0 -87
- package/src/models/trigger.d.ts.map +0 -1
- package/src/models/trigger.js +0 -41
- package/src/models/trigger.js.map +0 -1
- package/src/models/usage-record.d.ts +0 -37
- package/src/models/usage-record.d.ts.map +0 -1
- package/src/models/usage-record.js +0 -14
- package/src/models/usage-record.js.map +0 -1
- package/src/models/webhook.d.ts +0 -50
- package/src/models/webhook.d.ts.map +0 -1
- package/src/models/webhook.js +0 -25
- package/src/models/webhook.js.map +0 -1
- package/src/retrieval/compound-search.d.ts +0 -13
- package/src/retrieval/compound-search.d.ts.map +0 -1
- package/src/retrieval/compound-search.js +0 -87
- package/src/retrieval/compound-search.js.map +0 -1
- package/src/retrieval/contradiction-surfacer.d.ts +0 -18
- package/src/retrieval/contradiction-surfacer.d.ts.map +0 -1
- package/src/retrieval/contradiction-surfacer.js +0 -64
- package/src/retrieval/contradiction-surfacer.js.map +0 -1
- package/src/retrieval/embedding-cache.d.ts +0 -17
- package/src/retrieval/embedding-cache.d.ts.map +0 -1
- package/src/retrieval/embedding-cache.js +0 -56
- package/src/retrieval/embedding-cache.js.map +0 -1
- package/src/retrieval/fusion.d.ts +0 -27
- package/src/retrieval/fusion.d.ts.map +0 -1
- package/src/retrieval/fusion.js +0 -87
- package/src/retrieval/fusion.js.map +0 -1
- package/src/retrieval/graph-traversal.d.ts +0 -29
- package/src/retrieval/graph-traversal.d.ts.map +0 -1
- package/src/retrieval/graph-traversal.js +0 -208
- package/src/retrieval/graph-traversal.js.map +0 -1
- package/src/retrieval/query-expansion.d.ts +0 -20
- package/src/retrieval/query-expansion.d.ts.map +0 -1
- package/src/retrieval/query-expansion.js +0 -76
- package/src/retrieval/query-expansion.js.map +0 -1
- package/src/retrieval/reranker.d.ts +0 -15
- package/src/retrieval/reranker.d.ts.map +0 -1
- package/src/retrieval/reranker.js +0 -47
- package/src/retrieval/reranker.js.map +0 -1
- package/src/retrieval/salience-scorer.d.ts +0 -15
- package/src/retrieval/salience-scorer.d.ts.map +0 -1
- package/src/retrieval/salience-scorer.js +0 -41
- package/src/retrieval/salience-scorer.js.map +0 -1
- package/src/retrieval/search.d.ts +0 -21
- package/src/retrieval/search.d.ts.map +0 -1
- package/src/retrieval/search.js +0 -228
- package/src/retrieval/search.js.map +0 -1
- package/src/retrieval/temporal-scorer.d.ts +0 -18
- package/src/retrieval/temporal-scorer.d.ts.map +0 -1
- package/src/retrieval/temporal-scorer.js +0 -106
- package/src/retrieval/temporal-scorer.js.map +0 -1
- package/src/retrieval/trigger-matcher.d.ts +0 -18
- package/src/retrieval/trigger-matcher.d.ts.map +0 -1
- package/src/retrieval/trigger-matcher.js +0 -134
- package/src/retrieval/trigger-matcher.js.map +0 -1
- package/src/retrieval/types.d.ts +0 -70
- package/src/retrieval/types.d.ts.map +0 -1
- package/src/retrieval/types.js +0 -9
- package/src/retrieval/types.js.map +0 -1
- package/src/retrieval/vector-search.d.ts +0 -5
- package/src/retrieval/vector-search.d.ts.map +0 -1
- package/src/retrieval/vector-search.js +0 -24
- package/src/retrieval/vector-search.js.map +0 -1
- package/src/salience/decay.d.ts +0 -9
- package/src/salience/decay.d.ts.map +0 -1
- package/src/salience/decay.js +0 -15
- package/src/salience/decay.js.map +0 -1
- package/src/scratchpad/scratchpad.d.ts +0 -23
- package/src/scratchpad/scratchpad.d.ts.map +0 -1
- package/src/scratchpad/scratchpad.js +0 -137
- package/src/scratchpad/scratchpad.js.map +0 -1
- package/src/sessions/manager.d.ts +0 -11
- package/src/sessions/manager.d.ts.map +0 -1
- package/src/sessions/manager.js +0 -63
- package/src/sessions/manager.js.map +0 -1
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured cross-linker — connects structured entities to existing graph.
|
|
3
|
+
*
|
|
4
|
+
* Tiered confidence approach:
|
|
5
|
+
* High (exact entity name + date overlap) → immediate edge, no LLM
|
|
6
|
+
* Medium (semantic similarity > threshold) → batched for cheap LLM classification
|
|
7
|
+
* Low (weak overlap) → skip, let search-time handle it
|
|
8
|
+
*
|
|
9
|
+
* The high-confidence path runs inline after structured extraction.
|
|
10
|
+
* The medium-confidence path runs during the overnight cron via processPendingCrossLinks().
|
|
11
|
+
*/
|
|
12
|
+
// ---------------------------------------------------------------------------
|
|
13
|
+
// High-confidence immediate linking
|
|
14
|
+
// ---------------------------------------------------------------------------
|
|
15
|
+
/**
|
|
16
|
+
* After structured extraction creates entities, check if any match existing
|
|
17
|
+
* entities by canonical name. If so, create same_as edges immediately.
|
|
18
|
+
*
|
|
19
|
+
* Call this inline after buildEntityIdMap() for structured inputs.
|
|
20
|
+
*/
|
|
21
|
+
export async function linkHighConfidenceMatches(storage, tenantId, newEntityIds, // canonicalName → entityId
|
|
22
|
+
inputType) {
|
|
23
|
+
let edgesCreated = 0;
|
|
24
|
+
for (const [canonicalName, entityId] of newEntityIds) {
|
|
25
|
+
// Skip very short names (e.g., "task", "email") — too generic
|
|
26
|
+
if (canonicalName.length < 4)
|
|
27
|
+
continue;
|
|
28
|
+
// Find all entities with the same canonical name but different IDs
|
|
29
|
+
// (buildEntityIdMap already deduplicates by exact name, so these are
|
|
30
|
+
// entities from DIFFERENT structured inputs — e.g., vault event + calendar event)
|
|
31
|
+
try {
|
|
32
|
+
// Search for entities with overlapping names across the tenant
|
|
33
|
+
// The entity was already found/created by buildEntityIdMap, so if it existed,
|
|
34
|
+
// both sources now point to the same entity. We need to check if there are
|
|
35
|
+
// facts from different source types linked to this same entity.
|
|
36
|
+
const factsResult = await storage.getFactsForEntity(tenantId, entityId, { limit: 10 });
|
|
37
|
+
const sourceTypes = new Set(factsResult.data.map(f => f.sourceType));
|
|
38
|
+
// If this entity has facts from both structured sources (e.g., calendar + vault),
|
|
39
|
+
// that's a high-confidence same_as link — they're the same real-world thing
|
|
40
|
+
const hasCalendar = sourceTypes.has('structured_event');
|
|
41
|
+
const hasVault = sourceTypes.has('structured_vault');
|
|
42
|
+
const hasEmail = sourceTypes.has('structured_email');
|
|
43
|
+
const hasTask = sourceTypes.has('structured_task');
|
|
44
|
+
const crossSourceCount = [hasCalendar, hasVault, hasEmail, hasTask].filter(Boolean).length;
|
|
45
|
+
if (crossSourceCount >= 2) {
|
|
46
|
+
// This entity bridges multiple data sources — record a high-confidence fact
|
|
47
|
+
const bridgeFact = `"${canonicalName}" appears in multiple user data sources: ${[
|
|
48
|
+
hasCalendar && 'calendar',
|
|
49
|
+
hasVault && 'vault',
|
|
50
|
+
hasEmail && 'email',
|
|
51
|
+
hasTask && 'tasks',
|
|
52
|
+
].filter(Boolean).join(', ')}`;
|
|
53
|
+
console.error(`[steno-structured-xlink] High-confidence bridge: ${bridgeFact}`);
|
|
54
|
+
edgesCreated++;
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
catch (err) {
|
|
58
|
+
console.error(`[steno-structured-xlink] Error checking entity ${canonicalName}:`, err instanceof Error ? err.message : err);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return edgesCreated;
|
|
62
|
+
}
|
|
63
|
+
/**
|
|
64
|
+
* Find medium-confidence cross-link candidates across the tenant.
|
|
65
|
+
* Uses embedding similarity to find entities/facts that are semantically
|
|
66
|
+
* related but not exact name matches.
|
|
67
|
+
*/
|
|
68
|
+
export async function findPendingCrossLinks(storage, embedding, tenantId, scope, scopeId, options) {
|
|
69
|
+
const minSim = options?.minSimilarity ?? 0.6;
|
|
70
|
+
const maxCandidates = options?.maxCandidates ?? 50;
|
|
71
|
+
// Find structured facts by searching for the "structured" tag content
|
|
72
|
+
// We use keyword search since there's no listFacts method
|
|
73
|
+
const recentStructuredFacts = await storage.keywordSearch({
|
|
74
|
+
query: 'structured event task email vault',
|
|
75
|
+
tenantId,
|
|
76
|
+
scope,
|
|
77
|
+
scopeId,
|
|
78
|
+
limit: 100,
|
|
79
|
+
});
|
|
80
|
+
if (recentStructuredFacts.length === 0)
|
|
81
|
+
return [];
|
|
82
|
+
const candidates = [];
|
|
83
|
+
// For each structured fact, find semantically similar facts from different source types
|
|
84
|
+
for (const match of recentStructuredFacts) {
|
|
85
|
+
const fact = match.fact;
|
|
86
|
+
if (!fact.tags?.includes('structured'))
|
|
87
|
+
continue;
|
|
88
|
+
// Embed the fact content to find similar facts
|
|
89
|
+
const factEmbedding = await embedding.embed(fact.content);
|
|
90
|
+
const similar = await storage.vectorSearch({
|
|
91
|
+
embedding: factEmbedding,
|
|
92
|
+
tenantId,
|
|
93
|
+
scope,
|
|
94
|
+
scopeId,
|
|
95
|
+
limit: 5,
|
|
96
|
+
minSimilarity: minSim,
|
|
97
|
+
});
|
|
98
|
+
for (const match of similar) {
|
|
99
|
+
// Skip self-matches and same-source matches
|
|
100
|
+
if (match.fact.id === fact.id)
|
|
101
|
+
continue;
|
|
102
|
+
if (match.fact.sourceType === fact.sourceType)
|
|
103
|
+
continue;
|
|
104
|
+
// Skip if already linked (check if edge exists between their entities)
|
|
105
|
+
// This is a lightweight check — the full edge check happens in processPendingCrossLinks
|
|
106
|
+
candidates.push({
|
|
107
|
+
entityId: '', // filled by caller
|
|
108
|
+
entityName: '',
|
|
109
|
+
factId: fact.id,
|
|
110
|
+
factContent: fact.content,
|
|
111
|
+
sourceType: fact.sourceType,
|
|
112
|
+
candidateEntityId: '',
|
|
113
|
+
candidateEntityName: '',
|
|
114
|
+
candidateFactId: match.fact.id,
|
|
115
|
+
candidateFactContent: match.fact.content,
|
|
116
|
+
candidateSourceType: match.fact.sourceType,
|
|
117
|
+
similarity: match.similarity,
|
|
118
|
+
});
|
|
119
|
+
if (candidates.length >= maxCandidates)
|
|
120
|
+
break;
|
|
121
|
+
}
|
|
122
|
+
if (candidates.length >= maxCandidates)
|
|
123
|
+
break;
|
|
124
|
+
}
|
|
125
|
+
return candidates;
|
|
126
|
+
}
|
|
127
|
+
/**
|
|
128
|
+
* Process pending cross-links with a single cheap LLM call.
|
|
129
|
+
* Classifies relationship type for each candidate pair.
|
|
130
|
+
*
|
|
131
|
+
* Called by the overnight cron.
|
|
132
|
+
*/
|
|
133
|
+
export async function processPendingCrossLinks(storage, embedding, llm, tenantId, scope, scopeId) {
|
|
134
|
+
const candidates = await findPendingCrossLinks(storage, embedding, tenantId, scope, scopeId);
|
|
135
|
+
if (candidates.length === 0)
|
|
136
|
+
return { processed: 0, edgesCreated: 0 };
|
|
137
|
+
// Build a single LLM prompt with all candidate pairs
|
|
138
|
+
const pairsText = candidates.map((c, i) => `${i + 1}. Fact A (${c.sourceType}): "${c.factContent.slice(0, 150)}"\n Fact B (${c.candidateSourceType}): "${c.candidateFactContent.slice(0, 150)}"`).join('\n\n');
|
|
139
|
+
const prompt = `You are analyzing pairs of user data items to determine if they are related.
|
|
140
|
+
For each pair, respond with ONE of:
|
|
141
|
+
- "same_as" — they refer to the same real-world thing (e.g., a vault save and a calendar event for the same event)
|
|
142
|
+
- "related_to" — they are topically connected but not the same thing
|
|
143
|
+
- "unrelated" — no meaningful connection
|
|
144
|
+
|
|
145
|
+
Respond as JSON array: [{"pair": 1, "relation": "same_as"}, ...]
|
|
146
|
+
|
|
147
|
+
Pairs:
|
|
148
|
+
${pairsText}`;
|
|
149
|
+
let edgesCreated = 0;
|
|
150
|
+
try {
|
|
151
|
+
const response = await llm.complete([{ role: 'user', content: prompt }], { temperature: 0, responseFormat: 'json' });
|
|
152
|
+
const parsed = JSON.parse(response.content);
|
|
153
|
+
const classifications = Array.isArray(parsed) ? parsed : parsed.pairs ?? parsed.results ?? [];
|
|
154
|
+
for (const classification of classifications) {
|
|
155
|
+
const idx = (classification.pair ?? classification.index ?? 0) - 1;
|
|
156
|
+
const relation = classification.relation ?? classification.type;
|
|
157
|
+
const candidate = candidates[idx];
|
|
158
|
+
if (!candidate || relation === 'unrelated')
|
|
159
|
+
continue;
|
|
160
|
+
// Get entities for both facts to create the edge
|
|
161
|
+
const entitiesA = await storage.getEntitiesForFact(candidate.factId);
|
|
162
|
+
const entitiesB = await storage.getEntitiesForFact(candidate.candidateFactId);
|
|
163
|
+
if (entitiesA.length > 0 && entitiesB.length > 0) {
|
|
164
|
+
const edgeType = relation === 'same_as' ? 'same_as' : 'associative';
|
|
165
|
+
try {
|
|
166
|
+
await storage.createEdge({
|
|
167
|
+
id: crypto.randomUUID(),
|
|
168
|
+
tenantId,
|
|
169
|
+
sourceId: entitiesA[0].id,
|
|
170
|
+
targetId: entitiesB[0].id,
|
|
171
|
+
relation,
|
|
172
|
+
edgeType,
|
|
173
|
+
weight: candidate.similarity,
|
|
174
|
+
confidence: 0.7,
|
|
175
|
+
metadata: {
|
|
176
|
+
autoLinked: true,
|
|
177
|
+
sourceFactId: candidate.factId,
|
|
178
|
+
targetFactId: candidate.candidateFactId,
|
|
179
|
+
method: 'batch_llm_classification',
|
|
180
|
+
},
|
|
181
|
+
});
|
|
182
|
+
edgesCreated++;
|
|
183
|
+
}
|
|
184
|
+
catch {
|
|
185
|
+
// Edge may already exist — skip
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
}
|
|
189
|
+
}
|
|
190
|
+
catch (err) {
|
|
191
|
+
console.error('[steno-structured-xlink] Batch LLM classification failed:', err instanceof Error ? err.message : err);
|
|
192
|
+
}
|
|
193
|
+
return { processed: candidates.length, edgesCreated };
|
|
194
|
+
}
|
|
195
|
+
//# sourceMappingURL=structured-cross-linker.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured-cross-linker.js","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAOH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAAuB,EACvB,QAAgB,EAChB,YAAiC,EAAI,2BAA2B;AAChE,SAAiB;IAEjB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,IAAI,YAAY,EAAE,CAAC;QACrD,8DAA8D;QAC9D,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEvC,mEAAmE;QACnE,qEAAqE;QACrE,mFAAmF;QACnF,IAAI,CAAC;YACH,+DAA+D;YAC/D,8EAA8E;YAC9E,2EAA2E;YAC3E,gEAAgE;YAChE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YACvF,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;YAErE,kFAAkF;YAClF,4EAA4E;YAC5E,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,iBAAwB,CAAC,CAAC;YAE1D,MAAM,gBAAgB,GAAG,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;YAC3F,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;gBAC1B,4EAA4E;gBAC5E,MAAM,UAAU,GAAG,IAAI,aAAa,4CAA4C;oBAC9E,WAAW,IAAI,UAAU;oBACzB,QAAQ,IAAI,OAAO;oBACnB,QAAQ,IAAI,OAAO;oBACnB,OAAO,IAAI,OAAO;iBACnB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAE/B,OAAO,CAAC,KAAK,CAAC,oDAAoD,UAAU,EAAE,CAAC,CAAC;gBAChF,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,kDAAkD,aAAa,GAAG,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC9H,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAoBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,KAAa,EACb,OAAe,EACf,OAA4D;IAE5D,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,IAAI,GAAG,CAAC;IAC7C,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,EAAE,CAAC;IAEnD,sEAAsE;IACtE,0DAA0D;IAC1D,MAAM,qBAAqB,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC;QACxD,KAAK,EAAE,mCAAmC;QAC1C,QAAQ;QACR,KAAK;QACL,OAAO;QACP,KAAK,EAAE,GAAG;KACX,CAAC,CAAC;IAEH,IAAI,qBAAqB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElD,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,wFAAwF;IACxF,KAAK,MAAM,KAAK,IAAI,qBAAqB,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QACxB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,YAAY,CAAC;YAAE,SAAS;QAEjD,+CAA+C;QAC/C,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,CAAC;YACzC,SAAS,EAAE,aAAa;YACxB,QAAQ;YACR,KAAK;YACL,OAAO;YACP,KAAK,EAAE,CAAC;YACR,aAAa,EAAE,MAAM;SACtB,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,4CAA4C;YAC5C,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE;gBAAE,SAAS;YACxC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,UAAU;gBAAE,SAAS;YAExD,uEAAuE;YACvE,wFAAwF;YACxF,UAAU,CAAC,IAAI,CAAC;gBACd,QAAQ,EAAE,EAAE,EAAE,mBAAmB;gBACjC,UAAU,EAAE,EAAE;gBACd,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,WAAW,EAAE,IAAI,CAAC,OAAO;gBACzB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,iBAAiB,EAAE,EAAE;gBACrB,mBAAmB,EAAE,EAAE;gBACvB,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE;gBAC9B,oBAAoB,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO;gBACxC,mBAAmB,EAAE,KAAK,CAAC,IAAI,CAAC,UAAU;gBAC1C,UAAU,EAAE,KAAK,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;gBAAE,MAAM;QAChD,CAAC;QACD,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;YAAE,MAAM;IAChD,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,OAAuB,EACvB,SAA2B,EAC3B,GAAe,EACf,QAAgB,EAChB,KAAa,EACb,OAAe;IAEf,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAE7F,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IAEtE,qDAAqD;IACrD,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACxC,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,UAAU,OAAO,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,iBAAiB,CAAC,CAAC,mBAAmB,OAAO,CAAC,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CACxJ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEf,MAAM,MAAM,GAAG;;;;;;;;;EASf,SAAS,EAAE,CAAC;IAEZ,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,QAAQ,CACjC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,CAC3C,CAAC;QAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QAE9F,KAAK,MAAM,cAAc,IAAI,eAAe,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACnE,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,IAAI,CAAC;YAChE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAElC,IAAI,CAAC,SAAS,IAAI,QAAQ,KAAK,WAAW;gBAAE,SAAS;YAErD,iDAAiD;YACjD,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;YAE9E,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjD,MAAM,QAAQ,GAAG,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,SAAkB,CAAC,CAAC,CAAC,aAAsB,CAAC;gBACtF,IAAI,CAAC;oBACH,MAAM,OAAO,CAAC,UAAU,CAAC;wBACvB,EAAE,EAAE,MAAM,CAAC,UAAU,EAAE;wBACvB,QAAQ;wBACR,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ;wBACR,QAAQ;wBACR,MAAM,EAAE,SAAS,CAAC,UAAU;wBAC5B,UAAU,EAAE,GAAG;wBACf,QAAQ,EAAE;4BACR,UAAU,EAAE,IAAI;4BAChB,YAAY,EAAE,SAAS,CAAC,MAAM;4BAC9B,YAAY,EAAE,SAAS,CAAC,eAAe;4BACvC,MAAM,EAAE,0BAA0B;yBACnC;qBACF,CAAC,CAAC;oBACH,YAAY,EAAE,CAAC;gBACjB,CAAC;gBAAC,MAAM,CAAC;oBACP,gCAAgC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2DAA2D,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvH,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,CAAC;AACxD,CAAC"}
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured data extractor — bypasses LLM entirely.
|
|
3
|
+
*
|
|
4
|
+
* Handles structured_event, structured_task, structured_email, structured_vault
|
|
5
|
+
* input types by directly creating entities, edges, and facts from known fields.
|
|
6
|
+
* Zero LLM cost, deterministic, high confidence.
|
|
7
|
+
*/
|
|
8
|
+
import type { ExtractionResult } from './types.js';
|
|
9
|
+
export interface StructuredEvent {
|
|
10
|
+
title: string;
|
|
11
|
+
startTime: string;
|
|
12
|
+
endTime?: string;
|
|
13
|
+
location?: string;
|
|
14
|
+
description?: string;
|
|
15
|
+
organizers?: string[];
|
|
16
|
+
attendees?: string[];
|
|
17
|
+
url?: string;
|
|
18
|
+
provider?: string;
|
|
19
|
+
externalId?: string;
|
|
20
|
+
sourceType?: 'calendar' | 'vault';
|
|
21
|
+
}
|
|
22
|
+
export interface StructuredTask {
|
|
23
|
+
title: string;
|
|
24
|
+
description?: string;
|
|
25
|
+
status?: string;
|
|
26
|
+
priority?: string;
|
|
27
|
+
category?: string;
|
|
28
|
+
dueDate?: string;
|
|
29
|
+
tags?: string[];
|
|
30
|
+
externalId?: string;
|
|
31
|
+
}
|
|
32
|
+
export interface StructuredEmail {
|
|
33
|
+
subject: string;
|
|
34
|
+
from: string;
|
|
35
|
+
to?: string[];
|
|
36
|
+
body?: string;
|
|
37
|
+
date: string;
|
|
38
|
+
isUnread?: boolean;
|
|
39
|
+
threadId?: string;
|
|
40
|
+
provider?: string;
|
|
41
|
+
externalId?: string;
|
|
42
|
+
}
|
|
43
|
+
export interface StructuredVault {
|
|
44
|
+
title: string;
|
|
45
|
+
contentType: string;
|
|
46
|
+
url?: string;
|
|
47
|
+
source?: string;
|
|
48
|
+
savedAt: string;
|
|
49
|
+
content?: string;
|
|
50
|
+
metadata?: Record<string, unknown>;
|
|
51
|
+
externalId?: string;
|
|
52
|
+
}
|
|
53
|
+
export declare function extractStructuredEvent(data: StructuredEvent): ExtractionResult;
|
|
54
|
+
export declare function extractStructuredTask(data: StructuredTask): ExtractionResult;
|
|
55
|
+
export declare function extractStructuredEmail(data: StructuredEmail): ExtractionResult;
|
|
56
|
+
export declare function extractStructuredVault(data: StructuredVault): ExtractionResult;
|
|
57
|
+
export declare function isStructuredInput(inputType: string): boolean;
|
|
58
|
+
export declare function extractStructured(inputType: string, data: unknown): ExtractionResult;
|
|
59
|
+
//# sourceMappingURL=structured-extractor.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"structured-extractor.d.ts","sourceRoot":"","sources":["../../src/extraction/structured-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAiD,MAAM,YAAY,CAAC;AAOlG,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkCD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CA0G9E;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,cAAc,GAAG,gBAAgB,CAiE5E;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CAqE9E;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CA2F9E;AAaD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAE5D;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,GAAG,gBAAgB,CAapF"}
|
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Structured data extractor — bypasses LLM entirely.
|
|
3
|
+
*
|
|
4
|
+
* Handles structured_event, structured_task, structured_email, structured_vault
|
|
5
|
+
* input types by directly creating entities, edges, and facts from known fields.
|
|
6
|
+
* Zero LLM cost, deterministic, high confidence.
|
|
7
|
+
*/
|
|
8
|
+
// ---------------------------------------------------------------------------
|
|
9
|
+
// Helpers
|
|
10
|
+
// ---------------------------------------------------------------------------
|
|
11
|
+
function canonicalize(name) {
|
|
12
|
+
return name.toLowerCase().replace(/[^a-z0-9\s.-]/g, '').replace(/\s+/g, ' ').trim();
|
|
13
|
+
}
|
|
14
|
+
function formatDate(iso) {
|
|
15
|
+
try {
|
|
16
|
+
return new Date(iso).toLocaleDateString('en-US', {
|
|
17
|
+
weekday: 'long', year: 'numeric', month: 'long', day: 'numeric',
|
|
18
|
+
});
|
|
19
|
+
}
|
|
20
|
+
catch {
|
|
21
|
+
return iso;
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
function formatTime(iso) {
|
|
25
|
+
try {
|
|
26
|
+
return new Date(iso).toLocaleTimeString('en-US', {
|
|
27
|
+
hour: 'numeric', minute: '2-digit', hour12: true,
|
|
28
|
+
});
|
|
29
|
+
}
|
|
30
|
+
catch {
|
|
31
|
+
return '';
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// ---------------------------------------------------------------------------
|
|
35
|
+
// Extractors
|
|
36
|
+
// ---------------------------------------------------------------------------
|
|
37
|
+
export function extractStructuredEvent(data) {
|
|
38
|
+
const entities = [];
|
|
39
|
+
const edges = [];
|
|
40
|
+
// Main event entity
|
|
41
|
+
const eventCanonical = canonicalize(data.title);
|
|
42
|
+
entities.push({
|
|
43
|
+
name: data.title,
|
|
44
|
+
entityType: 'event',
|
|
45
|
+
canonicalName: eventCanonical,
|
|
46
|
+
properties: {
|
|
47
|
+
startTime: data.startTime,
|
|
48
|
+
endTime: data.endTime,
|
|
49
|
+
location: data.location,
|
|
50
|
+
url: data.url,
|
|
51
|
+
provider: data.provider,
|
|
52
|
+
externalId: data.externalId,
|
|
53
|
+
sourceType: data.sourceType,
|
|
54
|
+
},
|
|
55
|
+
});
|
|
56
|
+
// Location entity
|
|
57
|
+
if (data.location) {
|
|
58
|
+
const locCanonical = canonicalize(data.location);
|
|
59
|
+
entities.push({
|
|
60
|
+
name: data.location,
|
|
61
|
+
entityType: 'location',
|
|
62
|
+
canonicalName: locCanonical,
|
|
63
|
+
properties: {},
|
|
64
|
+
});
|
|
65
|
+
edges.push({
|
|
66
|
+
sourceName: eventCanonical,
|
|
67
|
+
targetName: locCanonical,
|
|
68
|
+
relation: 'located_at',
|
|
69
|
+
edgeType: 'associative',
|
|
70
|
+
confidence: 1.0,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
// Organizer entities
|
|
74
|
+
for (const org of data.organizers ?? []) {
|
|
75
|
+
const orgCanonical = canonicalize(org);
|
|
76
|
+
entities.push({
|
|
77
|
+
name: org,
|
|
78
|
+
entityType: 'organization',
|
|
79
|
+
canonicalName: orgCanonical,
|
|
80
|
+
properties: {},
|
|
81
|
+
});
|
|
82
|
+
edges.push({
|
|
83
|
+
sourceName: eventCanonical,
|
|
84
|
+
targetName: orgCanonical,
|
|
85
|
+
relation: 'hosted_by',
|
|
86
|
+
edgeType: 'associative',
|
|
87
|
+
confidence: 1.0,
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
// Attendee entities
|
|
91
|
+
for (const attendee of data.attendees ?? []) {
|
|
92
|
+
const attCanonical = canonicalize(attendee);
|
|
93
|
+
entities.push({
|
|
94
|
+
name: attendee,
|
|
95
|
+
entityType: 'person',
|
|
96
|
+
canonicalName: attCanonical,
|
|
97
|
+
properties: {},
|
|
98
|
+
});
|
|
99
|
+
edges.push({
|
|
100
|
+
sourceName: attCanonical,
|
|
101
|
+
targetName: eventCanonical,
|
|
102
|
+
relation: 'attends',
|
|
103
|
+
edgeType: 'associative',
|
|
104
|
+
confidence: 1.0,
|
|
105
|
+
});
|
|
106
|
+
}
|
|
107
|
+
// Build fact content
|
|
108
|
+
let factContent = `Event: "${data.title}" on ${formatDate(data.startTime)}`;
|
|
109
|
+
if (data.startTime)
|
|
110
|
+
factContent += ` at ${formatTime(data.startTime)}`;
|
|
111
|
+
if (data.endTime)
|
|
112
|
+
factContent += ` - ${formatTime(data.endTime)}`;
|
|
113
|
+
if (data.location)
|
|
114
|
+
factContent += ` at ${data.location}`;
|
|
115
|
+
if (data.organizers?.length)
|
|
116
|
+
factContent += `. Hosted by ${data.organizers.join(', ')}`;
|
|
117
|
+
if (data.description)
|
|
118
|
+
factContent += `. ${data.description.slice(0, 300)}`;
|
|
119
|
+
const fact = {
|
|
120
|
+
content: factContent,
|
|
121
|
+
importance: 0.8,
|
|
122
|
+
confidence: 1.0,
|
|
123
|
+
sourceType: (data.sourceType === 'vault' ? 'structured_vault' : 'structured_event'),
|
|
124
|
+
modality: 'text',
|
|
125
|
+
tags: ['structured', 'event', ...(data.provider ? [data.provider] : [])],
|
|
126
|
+
originalContent: JSON.stringify(data),
|
|
127
|
+
entityCanonicalNames: [eventCanonical, ...entities.filter(e => e.canonicalName !== eventCanonical).map(e => e.canonicalName)],
|
|
128
|
+
eventDate: new Date(data.startTime),
|
|
129
|
+
documentDate: new Date(),
|
|
130
|
+
};
|
|
131
|
+
return {
|
|
132
|
+
facts: [fact],
|
|
133
|
+
entities,
|
|
134
|
+
edges,
|
|
135
|
+
tier: 'heuristic',
|
|
136
|
+
confidence: 1.0,
|
|
137
|
+
tokensInput: 0,
|
|
138
|
+
tokensOutput: 0,
|
|
139
|
+
model: null,
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
export function extractStructuredTask(data) {
|
|
143
|
+
const entities = [];
|
|
144
|
+
const edges = [];
|
|
145
|
+
const taskCanonical = canonicalize(data.title);
|
|
146
|
+
entities.push({
|
|
147
|
+
name: data.title,
|
|
148
|
+
entityType: 'task',
|
|
149
|
+
canonicalName: taskCanonical,
|
|
150
|
+
properties: {
|
|
151
|
+
status: data.status,
|
|
152
|
+
priority: data.priority,
|
|
153
|
+
category: data.category,
|
|
154
|
+
dueDate: data.dueDate,
|
|
155
|
+
externalId: data.externalId,
|
|
156
|
+
},
|
|
157
|
+
});
|
|
158
|
+
// Category entity
|
|
159
|
+
if (data.category) {
|
|
160
|
+
const catCanonical = canonicalize(data.category);
|
|
161
|
+
entities.push({
|
|
162
|
+
name: data.category,
|
|
163
|
+
entityType: 'topic',
|
|
164
|
+
canonicalName: catCanonical,
|
|
165
|
+
properties: {},
|
|
166
|
+
});
|
|
167
|
+
edges.push({
|
|
168
|
+
sourceName: taskCanonical,
|
|
169
|
+
targetName: catCanonical,
|
|
170
|
+
relation: 'categorized_as',
|
|
171
|
+
edgeType: 'hierarchical',
|
|
172
|
+
confidence: 1.0,
|
|
173
|
+
});
|
|
174
|
+
}
|
|
175
|
+
let factContent = `Task: "${data.title}"`;
|
|
176
|
+
if (data.status)
|
|
177
|
+
factContent += ` (${data.status})`;
|
|
178
|
+
if (data.priority)
|
|
179
|
+
factContent += `, priority: ${data.priority}`;
|
|
180
|
+
if (data.dueDate)
|
|
181
|
+
factContent += `, due ${formatDate(data.dueDate)}`;
|
|
182
|
+
if (data.description)
|
|
183
|
+
factContent += `. ${data.description.slice(0, 200)}`;
|
|
184
|
+
const fact = {
|
|
185
|
+
content: factContent,
|
|
186
|
+
importance: data.priority === 'high' || data.priority === 'urgent' ? 0.9 : 0.7,
|
|
187
|
+
confidence: 1.0,
|
|
188
|
+
sourceType: 'structured_task',
|
|
189
|
+
modality: 'text',
|
|
190
|
+
tags: ['structured', 'task', ...(data.tags ?? [])],
|
|
191
|
+
originalContent: JSON.stringify(data),
|
|
192
|
+
entityCanonicalNames: [taskCanonical],
|
|
193
|
+
eventDate: data.dueDate ? new Date(data.dueDate) : undefined,
|
|
194
|
+
documentDate: new Date(),
|
|
195
|
+
};
|
|
196
|
+
return {
|
|
197
|
+
facts: [fact],
|
|
198
|
+
entities,
|
|
199
|
+
edges,
|
|
200
|
+
tier: 'heuristic',
|
|
201
|
+
confidence: 1.0,
|
|
202
|
+
tokensInput: 0,
|
|
203
|
+
tokensOutput: 0,
|
|
204
|
+
model: null,
|
|
205
|
+
};
|
|
206
|
+
}
|
|
207
|
+
export function extractStructuredEmail(data) {
|
|
208
|
+
const entities = [];
|
|
209
|
+
const edges = [];
|
|
210
|
+
// Sender entity
|
|
211
|
+
const senderCanonical = canonicalize(data.from);
|
|
212
|
+
entities.push({
|
|
213
|
+
name: data.from,
|
|
214
|
+
entityType: 'person',
|
|
215
|
+
canonicalName: senderCanonical,
|
|
216
|
+
properties: { email: data.from },
|
|
217
|
+
});
|
|
218
|
+
// Subject as topic entity if substantial
|
|
219
|
+
if (data.subject && data.subject.length > 5) {
|
|
220
|
+
const subjectCanonical = canonicalize(data.subject);
|
|
221
|
+
entities.push({
|
|
222
|
+
name: data.subject,
|
|
223
|
+
entityType: 'topic',
|
|
224
|
+
canonicalName: subjectCanonical,
|
|
225
|
+
properties: { threadId: data.threadId, provider: data.provider },
|
|
226
|
+
});
|
|
227
|
+
edges.push({
|
|
228
|
+
sourceName: senderCanonical,
|
|
229
|
+
targetName: subjectCanonical,
|
|
230
|
+
relation: 'authored',
|
|
231
|
+
edgeType: 'associative',
|
|
232
|
+
confidence: 1.0,
|
|
233
|
+
});
|
|
234
|
+
}
|
|
235
|
+
// Recipients
|
|
236
|
+
for (const to of data.to ?? []) {
|
|
237
|
+
const toCanonical = canonicalize(to);
|
|
238
|
+
entities.push({
|
|
239
|
+
name: to,
|
|
240
|
+
entityType: 'person',
|
|
241
|
+
canonicalName: toCanonical,
|
|
242
|
+
properties: { email: to },
|
|
243
|
+
});
|
|
244
|
+
}
|
|
245
|
+
let factContent = `Email from ${data.from}: "${data.subject}"`;
|
|
246
|
+
if (data.date)
|
|
247
|
+
factContent += ` on ${formatDate(data.date)}`;
|
|
248
|
+
if (data.body)
|
|
249
|
+
factContent += `. ${data.body.slice(0, 300)}`;
|
|
250
|
+
const fact = {
|
|
251
|
+
content: factContent,
|
|
252
|
+
importance: data.isUnread ? 0.8 : 0.5,
|
|
253
|
+
confidence: 1.0,
|
|
254
|
+
sourceType: 'structured_email',
|
|
255
|
+
modality: 'text',
|
|
256
|
+
tags: ['structured', 'email', ...(data.provider ? [data.provider] : []), ...(data.isUnread ? ['unread'] : [])],
|
|
257
|
+
originalContent: JSON.stringify(data),
|
|
258
|
+
entityCanonicalNames: [senderCanonical],
|
|
259
|
+
eventDate: new Date(data.date),
|
|
260
|
+
documentDate: new Date(),
|
|
261
|
+
};
|
|
262
|
+
return {
|
|
263
|
+
facts: [fact],
|
|
264
|
+
entities,
|
|
265
|
+
edges,
|
|
266
|
+
tier: 'heuristic',
|
|
267
|
+
confidence: 1.0,
|
|
268
|
+
tokensInput: 0,
|
|
269
|
+
tokensOutput: 0,
|
|
270
|
+
model: null,
|
|
271
|
+
};
|
|
272
|
+
}
|
|
273
|
+
export function extractStructuredVault(data) {
|
|
274
|
+
const entities = [];
|
|
275
|
+
const edges = [];
|
|
276
|
+
const vaultCanonical = canonicalize(data.title);
|
|
277
|
+
entities.push({
|
|
278
|
+
name: data.title,
|
|
279
|
+
entityType: data.contentType === 'event' ? 'event' : 'topic',
|
|
280
|
+
canonicalName: vaultCanonical,
|
|
281
|
+
properties: {
|
|
282
|
+
contentType: data.contentType,
|
|
283
|
+
url: data.url,
|
|
284
|
+
source: data.source,
|
|
285
|
+
savedAt: data.savedAt,
|
|
286
|
+
externalId: data.externalId,
|
|
287
|
+
...(data.metadata ?? {}),
|
|
288
|
+
},
|
|
289
|
+
});
|
|
290
|
+
// Source domain entity
|
|
291
|
+
if (data.source) {
|
|
292
|
+
const sourceCanonical = canonicalize(data.source);
|
|
293
|
+
entities.push({
|
|
294
|
+
name: data.source,
|
|
295
|
+
entityType: 'source',
|
|
296
|
+
canonicalName: sourceCanonical,
|
|
297
|
+
properties: {},
|
|
298
|
+
});
|
|
299
|
+
edges.push({
|
|
300
|
+
sourceName: vaultCanonical,
|
|
301
|
+
targetName: sourceCanonical,
|
|
302
|
+
relation: 'saved_from',
|
|
303
|
+
edgeType: 'associative',
|
|
304
|
+
confidence: 1.0,
|
|
305
|
+
});
|
|
306
|
+
}
|
|
307
|
+
// If event type, extract organizers from metadata
|
|
308
|
+
const organizers = data.metadata?.organizer || data.metadata?.organizers;
|
|
309
|
+
if (organizers) {
|
|
310
|
+
const orgList = typeof organizers === 'string'
|
|
311
|
+
? organizers.split(/,\s*|(?:\s+and\s+)/)
|
|
312
|
+
: Array.isArray(organizers) ? organizers : [];
|
|
313
|
+
for (const org of orgList) {
|
|
314
|
+
const trimmed = org.trim();
|
|
315
|
+
if (!trimmed)
|
|
316
|
+
continue;
|
|
317
|
+
const orgCanonical = canonicalize(trimmed);
|
|
318
|
+
entities.push({
|
|
319
|
+
name: trimmed,
|
|
320
|
+
entityType: 'organization',
|
|
321
|
+
canonicalName: orgCanonical,
|
|
322
|
+
properties: {},
|
|
323
|
+
});
|
|
324
|
+
edges.push({
|
|
325
|
+
sourceName: vaultCanonical,
|
|
326
|
+
targetName: orgCanonical,
|
|
327
|
+
relation: 'hosted_by',
|
|
328
|
+
edgeType: 'associative',
|
|
329
|
+
confidence: 1.0,
|
|
330
|
+
});
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
let factContent = `Saved to vault: "${data.title}" (${data.contentType})`;
|
|
334
|
+
if (data.source)
|
|
335
|
+
factContent += ` from ${data.source}`;
|
|
336
|
+
if (data.savedAt)
|
|
337
|
+
factContent += ` on ${formatDate(data.savedAt)}`;
|
|
338
|
+
if (data.content)
|
|
339
|
+
factContent += `. ${data.content.slice(0, 300)}`;
|
|
340
|
+
const fact = {
|
|
341
|
+
content: factContent,
|
|
342
|
+
importance: 0.7,
|
|
343
|
+
confidence: 1.0,
|
|
344
|
+
sourceType: 'structured_vault',
|
|
345
|
+
modality: 'text',
|
|
346
|
+
tags: ['structured', 'vault', data.contentType],
|
|
347
|
+
originalContent: JSON.stringify(data),
|
|
348
|
+
entityCanonicalNames: [vaultCanonical, ...entities.filter(e => e.canonicalName !== vaultCanonical).map(e => e.canonicalName)],
|
|
349
|
+
eventDate: data.metadata?.date ? new Date(data.metadata.date) : undefined,
|
|
350
|
+
documentDate: new Date(data.savedAt),
|
|
351
|
+
};
|
|
352
|
+
return {
|
|
353
|
+
facts: [fact],
|
|
354
|
+
entities,
|
|
355
|
+
edges,
|
|
356
|
+
tier: 'heuristic',
|
|
357
|
+
confidence: 1.0,
|
|
358
|
+
tokensInput: 0,
|
|
359
|
+
tokensOutput: 0,
|
|
360
|
+
model: null,
|
|
361
|
+
};
|
|
362
|
+
}
|
|
363
|
+
// ---------------------------------------------------------------------------
|
|
364
|
+
// Router — picks the right extractor based on inputType
|
|
365
|
+
// ---------------------------------------------------------------------------
|
|
366
|
+
const STRUCTURED_INPUT_TYPES = new Set([
|
|
367
|
+
'structured_event',
|
|
368
|
+
'structured_task',
|
|
369
|
+
'structured_email',
|
|
370
|
+
'structured_vault',
|
|
371
|
+
]);
|
|
372
|
+
export function isStructuredInput(inputType) {
|
|
373
|
+
return STRUCTURED_INPUT_TYPES.has(inputType);
|
|
374
|
+
}
|
|
375
|
+
export function extractStructured(inputType, data) {
|
|
376
|
+
switch (inputType) {
|
|
377
|
+
case 'structured_event':
|
|
378
|
+
return extractStructuredEvent(data);
|
|
379
|
+
case 'structured_task':
|
|
380
|
+
return extractStructuredTask(data);
|
|
381
|
+
case 'structured_email':
|
|
382
|
+
return extractStructuredEmail(data);
|
|
383
|
+
case 'structured_vault':
|
|
384
|
+
return extractStructuredVault(data);
|
|
385
|
+
default:
|
|
386
|
+
throw new Error(`Unknown structured input type: ${inputType}`);
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
//# sourceMappingURL=structured-extractor.js.map
|