@steno-ai/engine 0.1.15 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (255) hide show
  1. package/dist/adapters/storage.d.ts +29 -2
  2. package/dist/adapters/storage.d.ts.map +1 -1
  3. package/dist/config.d.ts +3 -3
  4. package/dist/config.d.ts.map +1 -1
  5. package/dist/config.js +9 -0
  6. package/dist/config.js.map +1 -1
  7. package/dist/extraction/index.d.ts +2 -0
  8. package/dist/extraction/index.d.ts.map +1 -1
  9. package/dist/extraction/index.js +2 -0
  10. package/dist/extraction/index.js.map +1 -1
  11. package/dist/extraction/pipeline.d.ts.map +1 -1
  12. package/dist/extraction/pipeline.js +48 -1
  13. package/dist/extraction/pipeline.js.map +1 -1
  14. package/dist/extraction/structured-cross-linker.d.ts +55 -0
  15. package/dist/extraction/structured-cross-linker.d.ts.map +1 -0
  16. package/dist/extraction/structured-cross-linker.js +195 -0
  17. package/dist/extraction/structured-cross-linker.js.map +1 -0
  18. package/dist/extraction/structured-extractor.d.ts +59 -0
  19. package/dist/extraction/structured-extractor.d.ts.map +1 -0
  20. package/dist/extraction/structured-extractor.js +389 -0
  21. package/dist/extraction/structured-extractor.js.map +1 -0
  22. package/dist/extraction/types.d.ts +3 -1
  23. package/dist/extraction/types.d.ts.map +1 -1
  24. package/dist/identity/index.d.ts +2 -0
  25. package/dist/identity/index.d.ts.map +1 -0
  26. package/dist/identity/index.js +2 -0
  27. package/dist/identity/index.js.map +1 -0
  28. package/dist/identity/resolver.d.ts +31 -0
  29. package/dist/identity/resolver.d.ts.map +1 -0
  30. package/dist/identity/resolver.js +122 -0
  31. package/dist/identity/resolver.js.map +1 -0
  32. package/dist/index.d.ts +1 -0
  33. package/dist/index.d.ts.map +1 -1
  34. package/dist/index.js +1 -0
  35. package/dist/index.js.map +1 -1
  36. package/dist/models/edge.d.ts +6 -6
  37. package/dist/models/entity.d.ts +32 -0
  38. package/dist/models/entity.d.ts.map +1 -1
  39. package/dist/models/entity.js +11 -0
  40. package/dist/models/entity.js.map +1 -1
  41. package/dist/models/extraction.d.ts +6 -6
  42. package/dist/models/fact.d.ts +6 -6
  43. package/dist/retrieval/graph-traversal.d.ts +4 -1
  44. package/dist/retrieval/graph-traversal.d.ts.map +1 -1
  45. package/dist/retrieval/graph-traversal.js +6 -3
  46. package/dist/retrieval/graph-traversal.js.map +1 -1
  47. package/dist/retrieval/search.d.ts.map +1 -1
  48. package/dist/retrieval/search.js +56 -3
  49. package/dist/retrieval/search.js.map +1 -1
  50. package/dist/retrieval/types.d.ts +1 -0
  51. package/dist/retrieval/types.d.ts.map +1 -1
  52. package/dist/retrieval/types.js.map +1 -1
  53. package/package.json +1 -1
  54. package/src/adapters/storage.ts +35 -2
  55. package/src/config.ts +9 -0
  56. package/src/extraction/index.ts +2 -0
  57. package/src/extraction/pipeline.ts +63 -1
  58. package/src/extraction/structured-cross-linker.ts +259 -0
  59. package/src/extraction/structured-extractor.ts +463 -0
  60. package/src/extraction/types.ts +3 -1
  61. package/src/identity/index.ts +1 -0
  62. package/src/identity/resolver.ts +149 -0
  63. package/src/index.ts +1 -0
  64. package/src/models/entity.ts +13 -0
  65. package/src/retrieval/graph-traversal.ts +7 -4
  66. package/src/retrieval/search.ts +58 -3
  67. package/src/retrieval/types.ts +1 -0
  68. package/src/adapters/cache.d.ts +0 -9
  69. package/src/adapters/cache.d.ts.map +0 -1
  70. package/src/adapters/cache.js +0 -2
  71. package/src/adapters/cache.js.map +0 -1
  72. package/src/adapters/embedding.d.ts +0 -7
  73. package/src/adapters/embedding.d.ts.map +0 -1
  74. package/src/adapters/embedding.js +0 -2
  75. package/src/adapters/embedding.js.map +0 -1
  76. package/src/adapters/llm.d.ts +0 -19
  77. package/src/adapters/llm.d.ts.map +0 -1
  78. package/src/adapters/llm.js +0 -2
  79. package/src/adapters/llm.js.map +0 -1
  80. package/src/adapters/perplexity-embedding.d.ts +0 -24
  81. package/src/adapters/perplexity-embedding.d.ts.map +0 -1
  82. package/src/adapters/perplexity-embedding.js +0 -78
  83. package/src/adapters/perplexity-embedding.js.map +0 -1
  84. package/src/adapters/storage.d.ts +0 -173
  85. package/src/adapters/storage.d.ts.map +0 -1
  86. package/src/adapters/storage.js +0 -2
  87. package/src/adapters/storage.js.map +0 -1
  88. package/src/config.d.ts +0 -296
  89. package/src/config.d.ts.map +0 -1
  90. package/src/config.js +0 -92
  91. package/src/config.js.map +0 -1
  92. package/src/extraction/contradiction.d.ts +0 -15
  93. package/src/extraction/contradiction.d.ts.map +0 -1
  94. package/src/extraction/contradiction.js +0 -23
  95. package/src/extraction/contradiction.js.map +0 -1
  96. package/src/extraction/cross-linker.d.ts +0 -23
  97. package/src/extraction/cross-linker.d.ts.map +0 -1
  98. package/src/extraction/cross-linker.js +0 -146
  99. package/src/extraction/cross-linker.js.map +0 -1
  100. package/src/extraction/dedup.d.ts +0 -12
  101. package/src/extraction/dedup.d.ts.map +0 -1
  102. package/src/extraction/dedup.js +0 -93
  103. package/src/extraction/dedup.js.map +0 -1
  104. package/src/extraction/entity-extractor.d.ts +0 -30
  105. package/src/extraction/entity-extractor.d.ts.map +0 -1
  106. package/src/extraction/entity-extractor.js +0 -145
  107. package/src/extraction/entity-extractor.js.map +0 -1
  108. package/src/extraction/hasher.d.ts +0 -5
  109. package/src/extraction/hasher.d.ts.map +0 -1
  110. package/src/extraction/hasher.js +0 -8
  111. package/src/extraction/hasher.js.map +0 -1
  112. package/src/extraction/heuristic.d.ts +0 -3
  113. package/src/extraction/heuristic.d.ts.map +0 -1
  114. package/src/extraction/heuristic.js +0 -282
  115. package/src/extraction/heuristic.js.map +0 -1
  116. package/src/extraction/llm-extractor.d.ts +0 -23
  117. package/src/extraction/llm-extractor.d.ts.map +0 -1
  118. package/src/extraction/llm-extractor.js +0 -240
  119. package/src/extraction/llm-extractor.js.map +0 -1
  120. package/src/extraction/pipeline.d.ts +0 -30
  121. package/src/extraction/pipeline.d.ts.map +0 -1
  122. package/src/extraction/pipeline.js +0 -413
  123. package/src/extraction/pipeline.js.map +0 -1
  124. package/src/extraction/prompts.d.ts +0 -28
  125. package/src/extraction/prompts.d.ts.map +0 -1
  126. package/src/extraction/prompts.js +0 -205
  127. package/src/extraction/prompts.js.map +0 -1
  128. package/src/extraction/sliding-window.d.ts +0 -41
  129. package/src/extraction/sliding-window.d.ts.map +0 -1
  130. package/src/extraction/sliding-window.js +0 -84
  131. package/src/extraction/sliding-window.js.map +0 -1
  132. package/src/extraction/types.d.ts +0 -80
  133. package/src/extraction/types.d.ts.map +0 -1
  134. package/src/extraction/types.js +0 -2
  135. package/src/extraction/types.js.map +0 -1
  136. package/src/feedback/tracker.d.ts +0 -25
  137. package/src/feedback/tracker.d.ts.map +0 -1
  138. package/src/feedback/tracker.js +0 -90
  139. package/src/feedback/tracker.js.map +0 -1
  140. package/src/models/api-key.d.ts +0 -54
  141. package/src/models/api-key.d.ts.map +0 -1
  142. package/src/models/api-key.js +0 -21
  143. package/src/models/api-key.js.map +0 -1
  144. package/src/models/edge.d.ts +0 -78
  145. package/src/models/edge.d.ts.map +0 -1
  146. package/src/models/edge.js +0 -29
  147. package/src/models/edge.js.map +0 -1
  148. package/src/models/entity.d.ts +0 -60
  149. package/src/models/entity.d.ts.map +0 -1
  150. package/src/models/entity.js +0 -22
  151. package/src/models/entity.js.map +0 -1
  152. package/src/models/extraction.d.ts +0 -111
  153. package/src/models/extraction.d.ts.map +0 -1
  154. package/src/models/extraction.js +0 -40
  155. package/src/models/extraction.js.map +0 -1
  156. package/src/models/fact-entity.d.ts +0 -33
  157. package/src/models/fact-entity.d.ts.map +0 -1
  158. package/src/models/fact-entity.js +0 -14
  159. package/src/models/fact-entity.js.map +0 -1
  160. package/src/models/fact.d.ts +0 -191
  161. package/src/models/fact.d.ts.map +0 -1
  162. package/src/models/fact.js +0 -72
  163. package/src/models/fact.js.map +0 -1
  164. package/src/models/index.d.ts +0 -13
  165. package/src/models/index.d.ts.map +0 -1
  166. package/src/models/index.js +0 -13
  167. package/src/models/index.js.map +0 -1
  168. package/src/models/memory-access.d.ts +0 -89
  169. package/src/models/memory-access.d.ts.map +0 -1
  170. package/src/models/memory-access.js +0 -33
  171. package/src/models/memory-access.js.map +0 -1
  172. package/src/models/session.d.ts +0 -60
  173. package/src/models/session.d.ts.map +0 -1
  174. package/src/models/session.js +0 -23
  175. package/src/models/session.js.map +0 -1
  176. package/src/models/tenant.d.ts +0 -448
  177. package/src/models/tenant.d.ts.map +0 -1
  178. package/src/models/tenant.js +0 -23
  179. package/src/models/tenant.js.map +0 -1
  180. package/src/models/trigger.d.ts +0 -87
  181. package/src/models/trigger.d.ts.map +0 -1
  182. package/src/models/trigger.js +0 -41
  183. package/src/models/trigger.js.map +0 -1
  184. package/src/models/usage-record.d.ts +0 -37
  185. package/src/models/usage-record.d.ts.map +0 -1
  186. package/src/models/usage-record.js +0 -14
  187. package/src/models/usage-record.js.map +0 -1
  188. package/src/models/webhook.d.ts +0 -50
  189. package/src/models/webhook.d.ts.map +0 -1
  190. package/src/models/webhook.js +0 -25
  191. package/src/models/webhook.js.map +0 -1
  192. package/src/retrieval/compound-search.d.ts +0 -13
  193. package/src/retrieval/compound-search.d.ts.map +0 -1
  194. package/src/retrieval/compound-search.js +0 -87
  195. package/src/retrieval/compound-search.js.map +0 -1
  196. package/src/retrieval/contradiction-surfacer.d.ts +0 -18
  197. package/src/retrieval/contradiction-surfacer.d.ts.map +0 -1
  198. package/src/retrieval/contradiction-surfacer.js +0 -64
  199. package/src/retrieval/contradiction-surfacer.js.map +0 -1
  200. package/src/retrieval/embedding-cache.d.ts +0 -17
  201. package/src/retrieval/embedding-cache.d.ts.map +0 -1
  202. package/src/retrieval/embedding-cache.js +0 -56
  203. package/src/retrieval/embedding-cache.js.map +0 -1
  204. package/src/retrieval/fusion.d.ts +0 -27
  205. package/src/retrieval/fusion.d.ts.map +0 -1
  206. package/src/retrieval/fusion.js +0 -87
  207. package/src/retrieval/fusion.js.map +0 -1
  208. package/src/retrieval/graph-traversal.d.ts +0 -29
  209. package/src/retrieval/graph-traversal.d.ts.map +0 -1
  210. package/src/retrieval/graph-traversal.js +0 -208
  211. package/src/retrieval/graph-traversal.js.map +0 -1
  212. package/src/retrieval/query-expansion.d.ts +0 -20
  213. package/src/retrieval/query-expansion.d.ts.map +0 -1
  214. package/src/retrieval/query-expansion.js +0 -76
  215. package/src/retrieval/query-expansion.js.map +0 -1
  216. package/src/retrieval/reranker.d.ts +0 -15
  217. package/src/retrieval/reranker.d.ts.map +0 -1
  218. package/src/retrieval/reranker.js +0 -47
  219. package/src/retrieval/reranker.js.map +0 -1
  220. package/src/retrieval/salience-scorer.d.ts +0 -15
  221. package/src/retrieval/salience-scorer.d.ts.map +0 -1
  222. package/src/retrieval/salience-scorer.js +0 -41
  223. package/src/retrieval/salience-scorer.js.map +0 -1
  224. package/src/retrieval/search.d.ts +0 -21
  225. package/src/retrieval/search.d.ts.map +0 -1
  226. package/src/retrieval/search.js +0 -228
  227. package/src/retrieval/search.js.map +0 -1
  228. package/src/retrieval/temporal-scorer.d.ts +0 -18
  229. package/src/retrieval/temporal-scorer.d.ts.map +0 -1
  230. package/src/retrieval/temporal-scorer.js +0 -106
  231. package/src/retrieval/temporal-scorer.js.map +0 -1
  232. package/src/retrieval/trigger-matcher.d.ts +0 -18
  233. package/src/retrieval/trigger-matcher.d.ts.map +0 -1
  234. package/src/retrieval/trigger-matcher.js +0 -134
  235. package/src/retrieval/trigger-matcher.js.map +0 -1
  236. package/src/retrieval/types.d.ts +0 -70
  237. package/src/retrieval/types.d.ts.map +0 -1
  238. package/src/retrieval/types.js +0 -9
  239. package/src/retrieval/types.js.map +0 -1
  240. package/src/retrieval/vector-search.d.ts +0 -5
  241. package/src/retrieval/vector-search.d.ts.map +0 -1
  242. package/src/retrieval/vector-search.js +0 -24
  243. package/src/retrieval/vector-search.js.map +0 -1
  244. package/src/salience/decay.d.ts +0 -9
  245. package/src/salience/decay.d.ts.map +0 -1
  246. package/src/salience/decay.js +0 -15
  247. package/src/salience/decay.js.map +0 -1
  248. package/src/scratchpad/scratchpad.d.ts +0 -23
  249. package/src/scratchpad/scratchpad.d.ts.map +0 -1
  250. package/src/scratchpad/scratchpad.js +0 -137
  251. package/src/scratchpad/scratchpad.js.map +0 -1
  252. package/src/sessions/manager.d.ts +0 -11
  253. package/src/sessions/manager.d.ts.map +0 -1
  254. package/src/sessions/manager.js +0 -63
  255. package/src/sessions/manager.js.map +0 -1
@@ -0,0 +1,195 @@
1
+ /**
2
+ * Structured cross-linker — connects structured entities to existing graph.
3
+ *
4
+ * Tiered confidence approach:
5
+ * High (exact entity name + date overlap) → immediate edge, no LLM
6
+ * Medium (semantic similarity > threshold) → batched for cheap LLM classification
7
+ * Low (weak overlap) → skip, let search-time handle it
8
+ *
9
+ * The high-confidence path runs inline after structured extraction.
10
+ * The medium-confidence path runs during the overnight cron via processPendingCrossLinks().
11
+ */
12
+ // ---------------------------------------------------------------------------
13
+ // High-confidence immediate linking
14
+ // ---------------------------------------------------------------------------
15
+ /**
16
+ * After structured extraction creates entities, check if any match existing
17
+ * entities by canonical name. If so, create same_as edges immediately.
18
+ *
19
+ * Call this inline after buildEntityIdMap() for structured inputs.
20
+ */
21
+ export async function linkHighConfidenceMatches(storage, tenantId, newEntityIds, // canonicalName → entityId
22
+ inputType) {
23
+ let edgesCreated = 0;
24
+ for (const [canonicalName, entityId] of newEntityIds) {
25
+ // Skip very short names (e.g., "task", "email") — too generic
26
+ if (canonicalName.length < 4)
27
+ continue;
28
+ // Find all entities with the same canonical name but different IDs
29
+ // (buildEntityIdMap already deduplicates by exact name, so these are
30
+ // entities from DIFFERENT structured inputs — e.g., vault event + calendar event)
31
+ try {
32
+ // Search for entities with overlapping names across the tenant
33
+ // The entity was already found/created by buildEntityIdMap, so if it existed,
34
+ // both sources now point to the same entity. We need to check if there are
35
+ // facts from different source types linked to this same entity.
36
+ const factsResult = await storage.getFactsForEntity(tenantId, entityId, { limit: 10 });
37
+ const sourceTypes = new Set(factsResult.data.map(f => f.sourceType));
38
+ // If this entity has facts from both structured sources (e.g., calendar + vault),
39
+ // that's a high-confidence same_as link — they're the same real-world thing
40
+ const hasCalendar = sourceTypes.has('structured_event');
41
+ const hasVault = sourceTypes.has('structured_vault');
42
+ const hasEmail = sourceTypes.has('structured_email');
43
+ const hasTask = sourceTypes.has('structured_task');
44
+ const crossSourceCount = [hasCalendar, hasVault, hasEmail, hasTask].filter(Boolean).length;
45
+ if (crossSourceCount >= 2) {
46
+ // This entity bridges multiple data sources — record a high-confidence fact
47
+ const bridgeFact = `"${canonicalName}" appears in multiple user data sources: ${[
48
+ hasCalendar && 'calendar',
49
+ hasVault && 'vault',
50
+ hasEmail && 'email',
51
+ hasTask && 'tasks',
52
+ ].filter(Boolean).join(', ')}`;
53
+ console.error(`[steno-structured-xlink] High-confidence bridge: ${bridgeFact}`);
54
+ edgesCreated++;
55
+ }
56
+ }
57
+ catch (err) {
58
+ console.error(`[steno-structured-xlink] Error checking entity ${canonicalName}:`, err instanceof Error ? err.message : err);
59
+ }
60
+ }
61
+ return edgesCreated;
62
+ }
63
+ /**
64
+ * Find medium-confidence cross-link candidates across the tenant.
65
+ * Uses embedding similarity to find entities/facts that are semantically
66
+ * related but not exact name matches.
67
+ */
68
+ export async function findPendingCrossLinks(storage, embedding, tenantId, scope, scopeId, options) {
69
+ const minSim = options?.minSimilarity ?? 0.6;
70
+ const maxCandidates = options?.maxCandidates ?? 50;
71
+ // Find structured facts by searching for the "structured" tag content
72
+ // We use keyword search since there's no listFacts method
73
+ const recentStructuredFacts = await storage.keywordSearch({
74
+ query: 'structured event task email vault',
75
+ tenantId,
76
+ scope,
77
+ scopeId,
78
+ limit: 100,
79
+ });
80
+ if (recentStructuredFacts.length === 0)
81
+ return [];
82
+ const candidates = [];
83
+ // For each structured fact, find semantically similar facts from different source types
84
+ for (const match of recentStructuredFacts) {
85
+ const fact = match.fact;
86
+ if (!fact.tags?.includes('structured'))
87
+ continue;
88
+ // Embed the fact content to find similar facts
89
+ const factEmbedding = await embedding.embed(fact.content);
90
+ const similar = await storage.vectorSearch({
91
+ embedding: factEmbedding,
92
+ tenantId,
93
+ scope,
94
+ scopeId,
95
+ limit: 5,
96
+ minSimilarity: minSim,
97
+ });
98
+ for (const match of similar) {
99
+ // Skip self-matches and same-source matches
100
+ if (match.fact.id === fact.id)
101
+ continue;
102
+ if (match.fact.sourceType === fact.sourceType)
103
+ continue;
104
+ // Skip if already linked (check if edge exists between their entities)
105
+ // This is a lightweight check — the full edge check happens in processPendingCrossLinks
106
+ candidates.push({
107
+ entityId: '', // filled by caller
108
+ entityName: '',
109
+ factId: fact.id,
110
+ factContent: fact.content,
111
+ sourceType: fact.sourceType,
112
+ candidateEntityId: '',
113
+ candidateEntityName: '',
114
+ candidateFactId: match.fact.id,
115
+ candidateFactContent: match.fact.content,
116
+ candidateSourceType: match.fact.sourceType,
117
+ similarity: match.similarity,
118
+ });
119
+ if (candidates.length >= maxCandidates)
120
+ break;
121
+ }
122
+ if (candidates.length >= maxCandidates)
123
+ break;
124
+ }
125
+ return candidates;
126
+ }
127
+ /**
128
+ * Process pending cross-links with a single cheap LLM call.
129
+ * Classifies relationship type for each candidate pair.
130
+ *
131
+ * Called by the overnight cron.
132
+ */
133
+ export async function processPendingCrossLinks(storage, embedding, llm, tenantId, scope, scopeId) {
134
+ const candidates = await findPendingCrossLinks(storage, embedding, tenantId, scope, scopeId);
135
+ if (candidates.length === 0)
136
+ return { processed: 0, edgesCreated: 0 };
137
+ // Build a single LLM prompt with all candidate pairs
138
+ const pairsText = candidates.map((c, i) => `${i + 1}. Fact A (${c.sourceType}): "${c.factContent.slice(0, 150)}"\n Fact B (${c.candidateSourceType}): "${c.candidateFactContent.slice(0, 150)}"`).join('\n\n');
139
+ const prompt = `You are analyzing pairs of user data items to determine if they are related.
140
+ For each pair, respond with ONE of:
141
+ - "same_as" — they refer to the same real-world thing (e.g., a vault save and a calendar event for the same event)
142
+ - "related_to" — they are topically connected but not the same thing
143
+ - "unrelated" — no meaningful connection
144
+
145
+ Respond as JSON array: [{"pair": 1, "relation": "same_as"}, ...]
146
+
147
+ Pairs:
148
+ ${pairsText}`;
149
+ let edgesCreated = 0;
150
+ try {
151
+ const response = await llm.complete([{ role: 'user', content: prompt }], { temperature: 0, responseFormat: 'json' });
152
+ const parsed = JSON.parse(response.content);
153
+ const classifications = Array.isArray(parsed) ? parsed : parsed.pairs ?? parsed.results ?? [];
154
+ for (const classification of classifications) {
155
+ const idx = (classification.pair ?? classification.index ?? 0) - 1;
156
+ const relation = classification.relation ?? classification.type;
157
+ const candidate = candidates[idx];
158
+ if (!candidate || relation === 'unrelated')
159
+ continue;
160
+ // Get entities for both facts to create the edge
161
+ const entitiesA = await storage.getEntitiesForFact(candidate.factId);
162
+ const entitiesB = await storage.getEntitiesForFact(candidate.candidateFactId);
163
+ if (entitiesA.length > 0 && entitiesB.length > 0) {
164
+ const edgeType = relation === 'same_as' ? 'same_as' : 'associative';
165
+ try {
166
+ await storage.createEdge({
167
+ id: crypto.randomUUID(),
168
+ tenantId,
169
+ sourceId: entitiesA[0].id,
170
+ targetId: entitiesB[0].id,
171
+ relation,
172
+ edgeType,
173
+ weight: candidate.similarity,
174
+ confidence: 0.7,
175
+ metadata: {
176
+ autoLinked: true,
177
+ sourceFactId: candidate.factId,
178
+ targetFactId: candidate.candidateFactId,
179
+ method: 'batch_llm_classification',
180
+ },
181
+ });
182
+ edgesCreated++;
183
+ }
184
+ catch {
185
+ // Edge may already exist — skip
186
+ }
187
+ }
188
+ }
189
+ }
190
+ catch (err) {
191
+ console.error('[steno-structured-xlink] Batch LLM classification failed:', err instanceof Error ? err.message : err);
192
+ }
193
+ return { processed: candidates.length, edgesCreated };
194
+ }
195
+ //# sourceMappingURL=structured-cross-linker.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-cross-linker.js","sourceRoot":"","sources":["../../src/extraction/structured-cross-linker.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;GAUG;AAOH,8EAA8E;AAC9E,oCAAoC;AACpC,8EAA8E;AAE9E;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,yBAAyB,CAC7C,OAAuB,EACvB,QAAgB,EAChB,YAAiC,EAAI,2BAA2B;AAChE,SAAiB;IAEjB,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,KAAK,MAAM,CAAC,aAAa,EAAE,QAAQ,CAAC,IAAI,YAAY,EAAE,CAAC;QACrD,8DAA8D;QAC9D,IAAI,aAAa,CAAC,MAAM,GAAG,CAAC;YAAE,SAAS;QAEvC,mEAAmE;QACnE,qEAAqE;QACrE,mFAAmF;QACnF,IAAI,CAAC;YACH,+DAA+D;YAC/D,8EAA8E;YAC9E,2EAA2E;YAC3E,gEAAgE;YAChE,MAAM,WAAW,GAAG,MAAM,OAAO,CAAC,iBAAiB,CAAC,QAAQ,EAAE,QAAQ,EAAE,EAAE,KAAK,EAAE,EAAE,EAAE,CAAC,CAAC;YACvF,MAAM,WAAW,GAAG,IAAI,GAAG,CAAC,WAAW,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC;YAErE,kFAAkF;YAClF,4EAA4E;YAC5E,MAAM,WAAW,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC/D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,QAAQ,GAAG,WAAW,CAAC,GAAG,CAAC,kBAAyB,CAAC,CAAC;YAC5D,MAAM,OAAO,GAAG,WAAW,CAAC,GAAG,CAAC,iBAAwB,CAAC,CAAC;YAE1D,MAAM,gBAAgB,GAAG,CAAC,WAAW,EAAE,QAAQ,EAAE,QAAQ,EAAE,OAAO,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,MAAM,CAAC;YAC3F,IAAI,gBAAgB,IAAI,CAAC,EAAE,CAAC;gBAC1B,4EAA4E;gBAC5E,MAAM,UAAU,GAAG,IAAI,aAAa,4CAA4C;oBAC9E,WAAW,IAAI,UAAU;oBACzB,QAAQ,IAAI,OAAO;oBACnB,QAAQ,IAAI,OAAO;oBACnB,OAAO,IAAI,OAAO;iBACnB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,EAAE,CAAC;gBAE/B,OAAO,CAAC,KAAK,CAAC,oDAAoD,UAAU,EAAE,CAAC,CAAC;gBAChF,YAAY,EAAE,CAAC;YACjB,CAAC;QACH,CAAC;QAAC,OAAO,GAAG,EAAE,CAAC;YACb,OAAO,CAAC,KAAK,CAAC,kDAAkD,aAAa,GAAG,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;QAC9H,CAAC;IACH,CAAC;IAED,OAAO,YAAY,CAAC;AACtB,CAAC;AAoBD;;;;GAIG;AACH,MAAM,CAAC,KAAK,UAAU,qBAAqB,CACzC,OAAuB,EACvB,SAA2B,EAC3B,QAAgB,EAChB,KAAa,EACb,OAAe,EACf,OAA4D;IAE5D,MAAM,MAAM,GAAG,OAAO,EAAE,aAAa,IAAI,GAAG,CAAC;IAC7C,MAAM,aAAa,GAAG,OAAO,EAAE,aAAa,IAAI,EAAE,CAAC;IAEnD,sEAAsE;IACtE,0DAA0D;IAC1D,MAAM,qBAAqB,GAAG,MAAM,OAAO,CAAC,aAAa,CAAC;QACxD,KAAK,EAAE,mCAAmC;QAC1C,QAAQ;QACR,KAAK;QACL,OAAO;QACP,KAAK,EAAE,GAAG;KACX,CAAC,CAAC;IAEH,IAAI,qBAAqB,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,CAAC;IAElD,MAAM,UAAU,GAAuB,EAAE,CAAC;IAE1C,wFAAwF;IACxF,KAAK,MAAM,KAAK,IAAI,qBAAqB,EAAE,CAAC;QAC1C,MAAM,IAAI,GAAG,KAAK,CAAC,IAAI,CAAC;QACxB,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,QAAQ,CAAC,YAAY,CAAC;YAAE,SAAS;QAEjD,+CAA+C;QAC/C,MAAM,aAAa,GAAG,MAAM,SAAS,CAAC,KAAK,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAE1D,MAAM,OAAO,GAAG,MAAM,OAAO,CAAC,YAAY,CAAC;YACzC,SAAS,EAAE,aAAa;YACxB,QAAQ;YACR,KAAK;YACL,OAAO;YACP,KAAK,EAAE,CAAC;YACR,aAAa,EAAE,MAAM;SACtB,CAAC,CAAC;QAEH,KAAK,MAAM,KAAK,IAAI,OAAO,EAAE,CAAC;YAC5B,4CAA4C;YAC5C,IAAI,KAAK,CAAC,IAAI,CAAC,EAAE,KAAK,IAAI,CAAC,EAAE;gBAAE,SAAS;YACxC,IAAI,KAAK,CAAC,IAAI,CAAC,UAAU,KAAK,IAAI,CAAC,UAAU;gBAAE,SAAS;YAExD,uEAAuE;YACvE,wFAAwF;YACxF,UAAU,CAAC,IAAI,CAAC;gBACd,QAAQ,EAAE,EAAE,EAAE,mBAAmB;gBACjC,UAAU,EAAE,EAAE;gBACd,MAAM,EAAE,IAAI,CAAC,EAAE;gBACf,WAAW,EAAE,IAAI,CAAC,OAAO;gBACzB,UAAU,EAAE,IAAI,CAAC,UAAU;gBAC3B,iBAAiB,EAAE,EAAE;gBACrB,mBAAmB,EAAE,EAAE;gBACvB,eAAe,EAAE,KAAK,CAAC,IAAI,CAAC,EAAE;gBAC9B,oBAAoB,EAAE,KAAK,CAAC,IAAI,CAAC,OAAO;gBACxC,mBAAmB,EAAE,KAAK,CAAC,IAAI,CAAC,UAAU;gBAC1C,UAAU,EAAE,KAAK,CAAC,UAAU;aAC7B,CAAC,CAAC;YAEH,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;gBAAE,MAAM;QAChD,CAAC;QACD,IAAI,UAAU,CAAC,MAAM,IAAI,aAAa;YAAE,MAAM;IAChD,CAAC;IAED,OAAO,UAAU,CAAC;AACpB,CAAC;AAED;;;;;GAKG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAC5C,OAAuB,EACvB,SAA2B,EAC3B,GAAe,EACf,QAAgB,EAChB,KAAa,EACb,OAAe;IAEf,MAAM,UAAU,GAAG,MAAM,qBAAqB,CAAC,OAAO,EAAE,SAAS,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,CAAC,CAAC;IAE7F,IAAI,UAAU,CAAC,MAAM,KAAK,CAAC;QAAE,OAAO,EAAE,SAAS,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE,CAAC;IAEtE,qDAAqD;IACrD,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,EAAE,EAAE,CACxC,GAAG,CAAC,GAAG,CAAC,aAAa,CAAC,CAAC,UAAU,OAAO,CAAC,CAAC,WAAW,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,iBAAiB,CAAC,CAAC,mBAAmB,OAAO,CAAC,CAAC,oBAAoB,CAAC,KAAK,CAAC,CAAC,EAAE,GAAG,CAAC,GAAG,CACxJ,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAEf,MAAM,MAAM,GAAG;;;;;;;;;EASf,SAAS,EAAE,CAAC;IAEZ,IAAI,YAAY,GAAG,CAAC,CAAC;IAErB,IAAI,CAAC;QACH,MAAM,QAAQ,GAAG,MAAM,GAAG,CAAC,QAAQ,CACjC,CAAC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,MAAM,EAAE,CAAC,EACnC,EAAE,WAAW,EAAE,CAAC,EAAE,cAAc,EAAE,MAAM,EAAE,CAC3C,CAAC;QAEF,MAAM,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC;QAC5C,MAAM,eAAe,GAAG,KAAK,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,IAAI,MAAM,CAAC,OAAO,IAAI,EAAE,CAAC;QAE9F,KAAK,MAAM,cAAc,IAAI,eAAe,EAAE,CAAC;YAC7C,MAAM,GAAG,GAAG,CAAC,cAAc,CAAC,IAAI,IAAI,cAAc,CAAC,KAAK,IAAI,CAAC,CAAC,GAAG,CAAC,CAAC;YACnE,MAAM,QAAQ,GAAG,cAAc,CAAC,QAAQ,IAAI,cAAc,CAAC,IAAI,CAAC;YAChE,MAAM,SAAS,GAAG,UAAU,CAAC,GAAG,CAAC,CAAC;YAElC,IAAI,CAAC,SAAS,IAAI,QAAQ,KAAK,WAAW;gBAAE,SAAS;YAErD,iDAAiD;YACjD,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;YACrE,MAAM,SAAS,GAAG,MAAM,OAAO,CAAC,kBAAkB,CAAC,SAAS,CAAC,eAAe,CAAC,CAAC;YAE9E,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,IAAI,SAAS,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBACjD,MAAM,QAAQ,GAAG,QAAQ,KAAK,SAAS,CAAC,CAAC,CAAC,SAAkB,CAAC,CAAC,CAAC,aAAsB,CAAC;gBACtF,IAAI,CAAC;oBACH,MAAM,OAAO,CAAC,UAAU,CAAC;wBACvB,EAAE,EAAE,MAAM,CAAC,UAAU,EAAE;wBACvB,QAAQ;wBACR,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ,EAAE,SAAS,CAAC,CAAC,CAAE,CAAC,EAAE;wBAC1B,QAAQ;wBACR,QAAQ;wBACR,MAAM,EAAE,SAAS,CAAC,UAAU;wBAC5B,UAAU,EAAE,GAAG;wBACf,QAAQ,EAAE;4BACR,UAAU,EAAE,IAAI;4BAChB,YAAY,EAAE,SAAS,CAAC,MAAM;4BAC9B,YAAY,EAAE,SAAS,CAAC,eAAe;4BACvC,MAAM,EAAE,0BAA0B;yBACnC;qBACF,CAAC,CAAC;oBACH,YAAY,EAAE,CAAC;gBACjB,CAAC;gBAAC,MAAM,CAAC;oBACP,gCAAgC;gBAClC,CAAC;YACH,CAAC;QACH,CAAC;IACH,CAAC;IAAC,OAAO,GAAG,EAAE,CAAC;QACb,OAAO,CAAC,KAAK,CAAC,2DAA2D,EAAE,GAAG,YAAY,KAAK,CAAC,CAAC,CAAC,GAAG,CAAC,OAAO,CAAC,CAAC,CAAC,GAAG,CAAC,CAAC;IACvH,CAAC;IAED,OAAO,EAAE,SAAS,EAAE,UAAU,CAAC,MAAM,EAAE,YAAY,EAAE,CAAC;AACxD,CAAC"}
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Structured data extractor — bypasses LLM entirely.
3
+ *
4
+ * Handles structured_event, structured_task, structured_email, structured_vault
5
+ * input types by directly creating entities, edges, and facts from known fields.
6
+ * Zero LLM cost, deterministic, high confidence.
7
+ */
8
+ import type { ExtractionResult } from './types.js';
9
+ export interface StructuredEvent {
10
+ title: string;
11
+ startTime: string;
12
+ endTime?: string;
13
+ location?: string;
14
+ description?: string;
15
+ organizers?: string[];
16
+ attendees?: string[];
17
+ url?: string;
18
+ provider?: string;
19
+ externalId?: string;
20
+ sourceType?: 'calendar' | 'vault';
21
+ }
22
+ export interface StructuredTask {
23
+ title: string;
24
+ description?: string;
25
+ status?: string;
26
+ priority?: string;
27
+ category?: string;
28
+ dueDate?: string;
29
+ tags?: string[];
30
+ externalId?: string;
31
+ }
32
+ export interface StructuredEmail {
33
+ subject: string;
34
+ from: string;
35
+ to?: string[];
36
+ body?: string;
37
+ date: string;
38
+ isUnread?: boolean;
39
+ threadId?: string;
40
+ provider?: string;
41
+ externalId?: string;
42
+ }
43
+ export interface StructuredVault {
44
+ title: string;
45
+ contentType: string;
46
+ url?: string;
47
+ source?: string;
48
+ savedAt: string;
49
+ content?: string;
50
+ metadata?: Record<string, unknown>;
51
+ externalId?: string;
52
+ }
53
+ export declare function extractStructuredEvent(data: StructuredEvent): ExtractionResult;
54
+ export declare function extractStructuredTask(data: StructuredTask): ExtractionResult;
55
+ export declare function extractStructuredEmail(data: StructuredEmail): ExtractionResult;
56
+ export declare function extractStructuredVault(data: StructuredVault): ExtractionResult;
57
+ export declare function isStructuredInput(inputType: string): boolean;
58
+ export declare function extractStructured(inputType: string, data: unknown): ExtractionResult;
59
+ //# sourceMappingURL=structured-extractor.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"structured-extractor.d.ts","sourceRoot":"","sources":["../../src/extraction/structured-extractor.ts"],"names":[],"mappings":"AAAA;;;;;;GAMG;AAEH,OAAO,KAAK,EAAE,gBAAgB,EAAiD,MAAM,YAAY,CAAC;AAOlG,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,SAAS,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,UAAU,CAAC,EAAE,MAAM,EAAE,CAAC;IACtB,SAAS,CAAC,EAAE,MAAM,EAAE,CAAC;IACrB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,UAAU,CAAC,EAAE,UAAU,GAAG,OAAO,CAAC;CACnC;AAED,MAAM,WAAW,cAAc;IAC7B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,IAAI,CAAC,EAAE,MAAM,EAAE,CAAC;IAChB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;IACb,EAAE,CAAC,EAAE,MAAM,EAAE,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,CAAC,EAAE,OAAO,CAAC;IACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,EAAE,MAAM,CAAC;IACd,WAAW,EAAE,MAAM,CAAC;IACpB,GAAG,CAAC,EAAE,MAAM,CAAC;IACb,MAAM,CAAC,EAAE,MAAM,CAAC;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,OAAO,CAAC,CAAC;IACnC,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAkCD,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CA0G9E;AAED,wBAAgB,qBAAqB,CAAC,IAAI,EAAE,cAAc,GAAG,gBAAgB,CAiE5E;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CAqE9E;AAED,wBAAgB,sBAAsB,CAAC,IAAI,EAAE,eAAe,GAAG,gBAAgB,CA2F9E;AAaD,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,GAAG,OAAO,CAE5D;AAED,wBAAgB,iBAAiB,CAAC,SAAS,EAAE,MAAM,EAAE,IAAI,EAAE,OAAO,GAAG,gBAAgB,CAapF"}
@@ -0,0 +1,389 @@
1
+ /**
2
+ * Structured data extractor — bypasses LLM entirely.
3
+ *
4
+ * Handles structured_event, structured_task, structured_email, structured_vault
5
+ * input types by directly creating entities, edges, and facts from known fields.
6
+ * Zero LLM cost, deterministic, high confidence.
7
+ */
8
+ // ---------------------------------------------------------------------------
9
+ // Helpers
10
+ // ---------------------------------------------------------------------------
11
+ function canonicalize(name) {
12
+ return name.toLowerCase().replace(/[^a-z0-9\s.-]/g, '').replace(/\s+/g, ' ').trim();
13
+ }
14
+ function formatDate(iso) {
15
+ try {
16
+ return new Date(iso).toLocaleDateString('en-US', {
17
+ weekday: 'long', year: 'numeric', month: 'long', day: 'numeric',
18
+ });
19
+ }
20
+ catch {
21
+ return iso;
22
+ }
23
+ }
24
+ function formatTime(iso) {
25
+ try {
26
+ return new Date(iso).toLocaleTimeString('en-US', {
27
+ hour: 'numeric', minute: '2-digit', hour12: true,
28
+ });
29
+ }
30
+ catch {
31
+ return '';
32
+ }
33
+ }
34
+ // ---------------------------------------------------------------------------
35
+ // Extractors
36
+ // ---------------------------------------------------------------------------
37
+ export function extractStructuredEvent(data) {
38
+ const entities = [];
39
+ const edges = [];
40
+ // Main event entity
41
+ const eventCanonical = canonicalize(data.title);
42
+ entities.push({
43
+ name: data.title,
44
+ entityType: 'event',
45
+ canonicalName: eventCanonical,
46
+ properties: {
47
+ startTime: data.startTime,
48
+ endTime: data.endTime,
49
+ location: data.location,
50
+ url: data.url,
51
+ provider: data.provider,
52
+ externalId: data.externalId,
53
+ sourceType: data.sourceType,
54
+ },
55
+ });
56
+ // Location entity
57
+ if (data.location) {
58
+ const locCanonical = canonicalize(data.location);
59
+ entities.push({
60
+ name: data.location,
61
+ entityType: 'location',
62
+ canonicalName: locCanonical,
63
+ properties: {},
64
+ });
65
+ edges.push({
66
+ sourceName: eventCanonical,
67
+ targetName: locCanonical,
68
+ relation: 'located_at',
69
+ edgeType: 'associative',
70
+ confidence: 1.0,
71
+ });
72
+ }
73
+ // Organizer entities
74
+ for (const org of data.organizers ?? []) {
75
+ const orgCanonical = canonicalize(org);
76
+ entities.push({
77
+ name: org,
78
+ entityType: 'organization',
79
+ canonicalName: orgCanonical,
80
+ properties: {},
81
+ });
82
+ edges.push({
83
+ sourceName: eventCanonical,
84
+ targetName: orgCanonical,
85
+ relation: 'hosted_by',
86
+ edgeType: 'associative',
87
+ confidence: 1.0,
88
+ });
89
+ }
90
+ // Attendee entities
91
+ for (const attendee of data.attendees ?? []) {
92
+ const attCanonical = canonicalize(attendee);
93
+ entities.push({
94
+ name: attendee,
95
+ entityType: 'person',
96
+ canonicalName: attCanonical,
97
+ properties: {},
98
+ });
99
+ edges.push({
100
+ sourceName: attCanonical,
101
+ targetName: eventCanonical,
102
+ relation: 'attends',
103
+ edgeType: 'associative',
104
+ confidence: 1.0,
105
+ });
106
+ }
107
+ // Build fact content
108
+ let factContent = `Event: "${data.title}" on ${formatDate(data.startTime)}`;
109
+ if (data.startTime)
110
+ factContent += ` at ${formatTime(data.startTime)}`;
111
+ if (data.endTime)
112
+ factContent += ` - ${formatTime(data.endTime)}`;
113
+ if (data.location)
114
+ factContent += ` at ${data.location}`;
115
+ if (data.organizers?.length)
116
+ factContent += `. Hosted by ${data.organizers.join(', ')}`;
117
+ if (data.description)
118
+ factContent += `. ${data.description.slice(0, 300)}`;
119
+ const fact = {
120
+ content: factContent,
121
+ importance: 0.8,
122
+ confidence: 1.0,
123
+ sourceType: (data.sourceType === 'vault' ? 'structured_vault' : 'structured_event'),
124
+ modality: 'text',
125
+ tags: ['structured', 'event', ...(data.provider ? [data.provider] : [])],
126
+ originalContent: JSON.stringify(data),
127
+ entityCanonicalNames: [eventCanonical, ...entities.filter(e => e.canonicalName !== eventCanonical).map(e => e.canonicalName)],
128
+ eventDate: new Date(data.startTime),
129
+ documentDate: new Date(),
130
+ };
131
+ return {
132
+ facts: [fact],
133
+ entities,
134
+ edges,
135
+ tier: 'heuristic',
136
+ confidence: 1.0,
137
+ tokensInput: 0,
138
+ tokensOutput: 0,
139
+ model: null,
140
+ };
141
+ }
142
+ export function extractStructuredTask(data) {
143
+ const entities = [];
144
+ const edges = [];
145
+ const taskCanonical = canonicalize(data.title);
146
+ entities.push({
147
+ name: data.title,
148
+ entityType: 'task',
149
+ canonicalName: taskCanonical,
150
+ properties: {
151
+ status: data.status,
152
+ priority: data.priority,
153
+ category: data.category,
154
+ dueDate: data.dueDate,
155
+ externalId: data.externalId,
156
+ },
157
+ });
158
+ // Category entity
159
+ if (data.category) {
160
+ const catCanonical = canonicalize(data.category);
161
+ entities.push({
162
+ name: data.category,
163
+ entityType: 'topic',
164
+ canonicalName: catCanonical,
165
+ properties: {},
166
+ });
167
+ edges.push({
168
+ sourceName: taskCanonical,
169
+ targetName: catCanonical,
170
+ relation: 'categorized_as',
171
+ edgeType: 'hierarchical',
172
+ confidence: 1.0,
173
+ });
174
+ }
175
+ let factContent = `Task: "${data.title}"`;
176
+ if (data.status)
177
+ factContent += ` (${data.status})`;
178
+ if (data.priority)
179
+ factContent += `, priority: ${data.priority}`;
180
+ if (data.dueDate)
181
+ factContent += `, due ${formatDate(data.dueDate)}`;
182
+ if (data.description)
183
+ factContent += `. ${data.description.slice(0, 200)}`;
184
+ const fact = {
185
+ content: factContent,
186
+ importance: data.priority === 'high' || data.priority === 'urgent' ? 0.9 : 0.7,
187
+ confidence: 1.0,
188
+ sourceType: 'structured_task',
189
+ modality: 'text',
190
+ tags: ['structured', 'task', ...(data.tags ?? [])],
191
+ originalContent: JSON.stringify(data),
192
+ entityCanonicalNames: [taskCanonical],
193
+ eventDate: data.dueDate ? new Date(data.dueDate) : undefined,
194
+ documentDate: new Date(),
195
+ };
196
+ return {
197
+ facts: [fact],
198
+ entities,
199
+ edges,
200
+ tier: 'heuristic',
201
+ confidence: 1.0,
202
+ tokensInput: 0,
203
+ tokensOutput: 0,
204
+ model: null,
205
+ };
206
+ }
207
+ export function extractStructuredEmail(data) {
208
+ const entities = [];
209
+ const edges = [];
210
+ // Sender entity
211
+ const senderCanonical = canonicalize(data.from);
212
+ entities.push({
213
+ name: data.from,
214
+ entityType: 'person',
215
+ canonicalName: senderCanonical,
216
+ properties: { email: data.from },
217
+ });
218
+ // Subject as topic entity if substantial
219
+ if (data.subject && data.subject.length > 5) {
220
+ const subjectCanonical = canonicalize(data.subject);
221
+ entities.push({
222
+ name: data.subject,
223
+ entityType: 'topic',
224
+ canonicalName: subjectCanonical,
225
+ properties: { threadId: data.threadId, provider: data.provider },
226
+ });
227
+ edges.push({
228
+ sourceName: senderCanonical,
229
+ targetName: subjectCanonical,
230
+ relation: 'authored',
231
+ edgeType: 'associative',
232
+ confidence: 1.0,
233
+ });
234
+ }
235
+ // Recipients
236
+ for (const to of data.to ?? []) {
237
+ const toCanonical = canonicalize(to);
238
+ entities.push({
239
+ name: to,
240
+ entityType: 'person',
241
+ canonicalName: toCanonical,
242
+ properties: { email: to },
243
+ });
244
+ }
245
+ let factContent = `Email from ${data.from}: "${data.subject}"`;
246
+ if (data.date)
247
+ factContent += ` on ${formatDate(data.date)}`;
248
+ if (data.body)
249
+ factContent += `. ${data.body.slice(0, 300)}`;
250
+ const fact = {
251
+ content: factContent,
252
+ importance: data.isUnread ? 0.8 : 0.5,
253
+ confidence: 1.0,
254
+ sourceType: 'structured_email',
255
+ modality: 'text',
256
+ tags: ['structured', 'email', ...(data.provider ? [data.provider] : []), ...(data.isUnread ? ['unread'] : [])],
257
+ originalContent: JSON.stringify(data),
258
+ entityCanonicalNames: [senderCanonical],
259
+ eventDate: new Date(data.date),
260
+ documentDate: new Date(),
261
+ };
262
+ return {
263
+ facts: [fact],
264
+ entities,
265
+ edges,
266
+ tier: 'heuristic',
267
+ confidence: 1.0,
268
+ tokensInput: 0,
269
+ tokensOutput: 0,
270
+ model: null,
271
+ };
272
+ }
273
+ export function extractStructuredVault(data) {
274
+ const entities = [];
275
+ const edges = [];
276
+ const vaultCanonical = canonicalize(data.title);
277
+ entities.push({
278
+ name: data.title,
279
+ entityType: data.contentType === 'event' ? 'event' : 'topic',
280
+ canonicalName: vaultCanonical,
281
+ properties: {
282
+ contentType: data.contentType,
283
+ url: data.url,
284
+ source: data.source,
285
+ savedAt: data.savedAt,
286
+ externalId: data.externalId,
287
+ ...(data.metadata ?? {}),
288
+ },
289
+ });
290
+ // Source domain entity
291
+ if (data.source) {
292
+ const sourceCanonical = canonicalize(data.source);
293
+ entities.push({
294
+ name: data.source,
295
+ entityType: 'source',
296
+ canonicalName: sourceCanonical,
297
+ properties: {},
298
+ });
299
+ edges.push({
300
+ sourceName: vaultCanonical,
301
+ targetName: sourceCanonical,
302
+ relation: 'saved_from',
303
+ edgeType: 'associative',
304
+ confidence: 1.0,
305
+ });
306
+ }
307
+ // If event type, extract organizers from metadata
308
+ const organizers = data.metadata?.organizer || data.metadata?.organizers;
309
+ if (organizers) {
310
+ const orgList = typeof organizers === 'string'
311
+ ? organizers.split(/,\s*|(?:\s+and\s+)/)
312
+ : Array.isArray(organizers) ? organizers : [];
313
+ for (const org of orgList) {
314
+ const trimmed = org.trim();
315
+ if (!trimmed)
316
+ continue;
317
+ const orgCanonical = canonicalize(trimmed);
318
+ entities.push({
319
+ name: trimmed,
320
+ entityType: 'organization',
321
+ canonicalName: orgCanonical,
322
+ properties: {},
323
+ });
324
+ edges.push({
325
+ sourceName: vaultCanonical,
326
+ targetName: orgCanonical,
327
+ relation: 'hosted_by',
328
+ edgeType: 'associative',
329
+ confidence: 1.0,
330
+ });
331
+ }
332
+ }
333
+ let factContent = `Saved to vault: "${data.title}" (${data.contentType})`;
334
+ if (data.source)
335
+ factContent += ` from ${data.source}`;
336
+ if (data.savedAt)
337
+ factContent += ` on ${formatDate(data.savedAt)}`;
338
+ if (data.content)
339
+ factContent += `. ${data.content.slice(0, 300)}`;
340
+ const fact = {
341
+ content: factContent,
342
+ importance: 0.7,
343
+ confidence: 1.0,
344
+ sourceType: 'structured_vault',
345
+ modality: 'text',
346
+ tags: ['structured', 'vault', data.contentType],
347
+ originalContent: JSON.stringify(data),
348
+ entityCanonicalNames: [vaultCanonical, ...entities.filter(e => e.canonicalName !== vaultCanonical).map(e => e.canonicalName)],
349
+ eventDate: data.metadata?.date ? new Date(data.metadata.date) : undefined,
350
+ documentDate: new Date(data.savedAt),
351
+ };
352
+ return {
353
+ facts: [fact],
354
+ entities,
355
+ edges,
356
+ tier: 'heuristic',
357
+ confidence: 1.0,
358
+ tokensInput: 0,
359
+ tokensOutput: 0,
360
+ model: null,
361
+ };
362
+ }
363
+ // ---------------------------------------------------------------------------
364
+ // Router — picks the right extractor based on inputType
365
+ // ---------------------------------------------------------------------------
366
+ const STRUCTURED_INPUT_TYPES = new Set([
367
+ 'structured_event',
368
+ 'structured_task',
369
+ 'structured_email',
370
+ 'structured_vault',
371
+ ]);
372
+ export function isStructuredInput(inputType) {
373
+ return STRUCTURED_INPUT_TYPES.has(inputType);
374
+ }
375
+ export function extractStructured(inputType, data) {
376
+ switch (inputType) {
377
+ case 'structured_event':
378
+ return extractStructuredEvent(data);
379
+ case 'structured_task':
380
+ return extractStructuredTask(data);
381
+ case 'structured_email':
382
+ return extractStructuredEmail(data);
383
+ case 'structured_vault':
384
+ return extractStructuredVault(data);
385
+ default:
386
+ throw new Error(`Unknown structured input type: ${inputType}`);
387
+ }
388
+ }
389
+ //# sourceMappingURL=structured-extractor.js.map