hindsight-api 0.2.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. hindsight_api/admin/__init__.py +1 -0
  2. hindsight_api/admin/cli.py +252 -0
  3. hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
  4. hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
  5. hindsight_api/api/http.py +282 -20
  6. hindsight_api/api/mcp.py +47 -52
  7. hindsight_api/config.py +238 -6
  8. hindsight_api/engine/cross_encoder.py +599 -86
  9. hindsight_api/engine/db_budget.py +284 -0
  10. hindsight_api/engine/db_utils.py +11 -0
  11. hindsight_api/engine/embeddings.py +453 -26
  12. hindsight_api/engine/entity_resolver.py +8 -5
  13. hindsight_api/engine/interface.py +8 -4
  14. hindsight_api/engine/llm_wrapper.py +241 -27
  15. hindsight_api/engine/memory_engine.py +609 -122
  16. hindsight_api/engine/query_analyzer.py +4 -3
  17. hindsight_api/engine/response_models.py +38 -0
  18. hindsight_api/engine/retain/fact_extraction.py +388 -192
  19. hindsight_api/engine/retain/fact_storage.py +34 -8
  20. hindsight_api/engine/retain/link_utils.py +24 -16
  21. hindsight_api/engine/retain/orchestrator.py +52 -17
  22. hindsight_api/engine/retain/types.py +9 -0
  23. hindsight_api/engine/search/graph_retrieval.py +42 -13
  24. hindsight_api/engine/search/link_expansion_retrieval.py +256 -0
  25. hindsight_api/engine/search/mpfp_retrieval.py +362 -117
  26. hindsight_api/engine/search/reranking.py +2 -2
  27. hindsight_api/engine/search/retrieval.py +847 -200
  28. hindsight_api/engine/search/tags.py +172 -0
  29. hindsight_api/engine/search/think_utils.py +1 -1
  30. hindsight_api/engine/search/trace.py +12 -0
  31. hindsight_api/engine/search/tracer.py +24 -1
  32. hindsight_api/engine/search/types.py +21 -0
  33. hindsight_api/engine/task_backend.py +109 -18
  34. hindsight_api/engine/utils.py +1 -1
  35. hindsight_api/extensions/context.py +10 -1
  36. hindsight_api/main.py +56 -4
  37. hindsight_api/metrics.py +433 -48
  38. hindsight_api/migrations.py +141 -1
  39. hindsight_api/models.py +3 -1
  40. hindsight_api/pg0.py +53 -0
  41. hindsight_api/server.py +39 -2
  42. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/METADATA +5 -1
  43. hindsight_api-0.3.0.dist-info/RECORD +82 -0
  44. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/entry_points.txt +1 -0
  45. hindsight_api-0.2.0.dist-info/RECORD +0 -75
  46. {hindsight_api-0.2.0.dist-info → hindsight_api-0.3.0.dist-info}/WHEEL +0 -0
@@ -14,7 +14,9 @@ from typing import Literal
14
14
 
15
15
  from pydantic import BaseModel, ConfigDict, Field, field_validator
16
16
 
17
+ from ...config import get_config
17
18
  from ..llm_wrapper import LLMConfig, OutputTooLongError
19
+ from ..response_models import TokenUsage
18
20
 
19
21
 
20
22
  def _infer_temporal_date(fact_text: str, event_date: datetime) -> str | None:
@@ -109,22 +111,44 @@ class Fact(BaseModel):
109
111
 
110
112
 
111
113
  class CausalRelation(BaseModel):
112
- """Causal relationship between facts."""
114
+ """Causal relationship from this fact to a previous fact (stored format)."""
115
+
116
+ target_fact_index: int = Field(description="Index of the related fact in the facts array (0-based).")
117
+ relation_type: Literal["caused_by", "enabled_by", "prevented_by"] = Field(
118
+ description="How this fact relates to the target: "
119
+ "'caused_by' = this fact was caused by the target, "
120
+ "'enabled_by' = this fact was enabled by the target, "
121
+ "'prevented_by' = this fact was prevented by the target"
122
+ )
123
+ strength: float = Field(
124
+ description="Strength of relationship (0.0 to 1.0)",
125
+ ge=0.0,
126
+ le=1.0,
127
+ default=1.0,
128
+ )
129
+
113
130
 
114
- target_fact_index: int = Field(
115
- description="Index of the related fact in the facts array (0-based). "
116
- "This creates a directed causal link to another fact in the extraction."
131
+ class FactCausalRelation(BaseModel):
132
+ """
133
+ Causal relationship from this fact to a PREVIOUS fact (embedded in each fact).
134
+
135
+ Uses index-based references but ONLY allows referencing facts that appear
136
+ BEFORE this fact in the list. This prevents hallucination of invalid indices.
137
+ """
138
+
139
+ target_index: int = Field(
140
+ description="Index of the PREVIOUS fact this relates to (0-based). "
141
+ "MUST be less than this fact's position in the list. "
142
+ "Example: if this is fact #5, target_index can only be 0, 1, 2, 3, or 4."
117
143
  )
118
- relation_type: Literal["causes", "caused_by", "enables", "prevents"] = Field(
119
- description="Type of causal relationship: "
120
- "'causes' = this fact directly causes the target fact, "
144
+ relation_type: Literal["caused_by", "enabled_by", "prevented_by"] = Field(
145
+ description="How this fact relates to the target fact: "
121
146
  "'caused_by' = this fact was caused by the target fact, "
122
- "'enables' = this fact enables/allows the target fact, "
123
- "'prevents' = this fact prevents/blocks the target fact"
147
+ "'enabled_by' = this fact was enabled by the target fact, "
148
+ "'prevented_by' = this fact was blocked/prevented by the target fact"
124
149
  )
125
150
  strength: float = Field(
126
- description="Strength of causal relationship (0.0 to 1.0). "
127
- "1.0 = direct/strong causation, 0.5 = moderate, 0.3 = weak/indirect",
151
+ description="Strength of relationship (0.0 to 1.0). 1.0 = strong, 0.5 = moderate",
128
152
  ge=0.0,
129
153
  le=1.0,
130
154
  default=1.0,
@@ -132,16 +156,67 @@ class CausalRelation(BaseModel):
132
156
 
133
157
 
134
158
  class ExtractedFact(BaseModel):
135
- """A single extracted fact with 5 required dimensions for comprehensive capture."""
159
+ """A single extracted fact."""
136
160
 
137
161
  model_config = ConfigDict(
138
162
  json_schema_mode="validation",
139
163
  json_schema_extra={"required": ["what", "when", "where", "who", "why", "fact_type"]},
140
164
  )
141
165
 
142
- # ==========================================================================
143
- # FIVE REQUIRED DIMENSIONS - LLM must think about each one
144
- # ==========================================================================
166
+ what: str = Field(description="Core fact - concise but complete (1-2 sentences)")
167
+ when: str = Field(description="When it happened. 'N/A' if unknown.")
168
+ where: str = Field(description="Location if relevant. 'N/A' if none.")
169
+ who: str = Field(description="People involved with relationships. 'N/A' if general.")
170
+ why: str = Field(description="Context/significance if important. 'N/A' if obvious.")
171
+
172
+ fact_kind: str = Field(default="conversation", description="'event' or 'conversation'")
173
+ occurred_start: str | None = Field(default=None, description="ISO timestamp for events")
174
+ occurred_end: str | None = Field(default=None, description="ISO timestamp for event end")
175
+ fact_type: Literal["world", "assistant"] = Field(description="'world' or 'assistant'")
176
+ entities: list[Entity] | None = Field(default=None, description="People, places, concepts")
177
+ causal_relations: list[FactCausalRelation] | None = Field(
178
+ default=None, description="Links to previous facts (target_index < this fact's index)"
179
+ )
180
+
181
+ @field_validator("entities", mode="before")
182
+ @classmethod
183
+ def ensure_entities_list(cls, v):
184
+ """Ensure entities is always a list (convert None to empty list)."""
185
+ if v is None:
186
+ return []
187
+ return v
188
+
189
+ def build_fact_text(self) -> str:
190
+ """Combine all dimensions into a single comprehensive fact string."""
191
+ parts = [self.what]
192
+
193
+ # Add 'who' if not N/A
194
+ if self.who and self.who.upper() != "N/A":
195
+ parts.append(f"Involving: {self.who}")
196
+
197
+ # Add 'why' if not N/A
198
+ if self.why and self.why.upper() != "N/A":
199
+ parts.append(self.why)
200
+
201
+ if len(parts) == 1:
202
+ return parts[0]
203
+
204
+ return " | ".join(parts)
205
+
206
+
207
+ class FactExtractionResponse(BaseModel):
208
+ """Response containing all extracted facts (causal relations are embedded in each fact)."""
209
+
210
+ facts: list[ExtractedFact] = Field(description="List of extracted factual statements")
211
+
212
+
213
+ class ExtractedFactVerbose(BaseModel):
214
+ """A single extracted fact with verbose field descriptions for detailed extraction."""
215
+
216
+ model_config = ConfigDict(
217
+ json_schema_mode="validation",
218
+ json_schema_extra={"required": ["what", "when", "where", "who", "why", "fact_type"]},
219
+ )
145
220
 
146
221
  what: str = Field(
147
222
  description="WHAT happened - COMPLETE, DETAILED description with ALL specifics. "
@@ -184,16 +259,11 @@ class ExtractedFact(BaseModel):
184
259
  "NOT: 'User liked it' or 'To help user'"
185
260
  )
186
261
 
187
- # ==========================================================================
188
- # CLASSIFICATION
189
- # ==========================================================================
190
-
191
262
  fact_kind: str = Field(
192
263
  default="conversation",
193
264
  description="'event' = specific datable occurrence (set occurred dates), 'conversation' = general info (no occurred dates)",
194
265
  )
195
266
 
196
- # Temporal fields - optional
197
267
  occurred_start: str | None = Field(
198
268
  default=None,
199
269
  description="WHEN the event happened (ISO timestamp). Only for fact_kind='event'. Leave null for conversations.",
@@ -203,59 +273,76 @@ class ExtractedFact(BaseModel):
203
273
  description="WHEN the event ended (ISO timestamp). Only for events with duration. Leave null for conversations.",
204
274
  )
205
275
 
206
- # Classification (CRITICAL - required)
207
- # Note: LLM uses "assistant" but we convert to "bank" for storage
208
276
  fact_type: Literal["world", "assistant"] = Field(
209
277
  description="'world' = about the user/others (background, experiences). 'assistant' = experience with the assistant."
210
278
  )
211
279
 
212
- # Entities - extracted from fact content
213
280
  entities: list[Entity] | None = Field(
214
281
  default=None,
215
282
  description="Named entities, objects, AND abstract concepts from the fact. Include: people names, organizations, places, significant objects (e.g., 'coffee maker', 'car'), AND abstract concepts/themes (e.g., 'friendship', 'career growth', 'loss', 'celebration'). Extract anything that could help link related facts together.",
216
283
  )
217
- causal_relations: list[CausalRelation] | None = Field(
218
- default=None, description="Causal links to other facts. Can be null."
284
+
285
+ causal_relations: list[FactCausalRelation] | None = Field(
286
+ default=None,
287
+ description="Causal links to PREVIOUS facts only. target_index MUST be less than this fact's position. "
288
+ "Example: fact #3 can only reference facts 0, 1, or 2. Max 2 relations per fact.",
219
289
  )
220
290
 
221
291
  @field_validator("entities", mode="before")
222
292
  @classmethod
223
293
  def ensure_entities_list(cls, v):
224
- """Ensure entities is always a list (convert None to empty list)."""
225
294
  if v is None:
226
295
  return []
227
296
  return v
228
297
 
229
- @field_validator("causal_relations", mode="before")
230
- @classmethod
231
- def ensure_causal_relations_list(cls, v):
232
- """Ensure causal_relations is always a list (convert None to empty list)."""
233
- if v is None:
234
- return []
235
- return v
236
298
 
237
- def build_fact_text(self) -> str:
238
- """Combine all dimensions into a single comprehensive fact string."""
239
- parts = [self.what]
299
+ class FactExtractionResponseVerbose(BaseModel):
300
+ """Response for verbose fact extraction."""
240
301
 
241
- # Add 'who' if not N/A
242
- if self.who and self.who.upper() != "N/A":
243
- parts.append(f"Involving: {self.who}")
302
+ facts: list[ExtractedFactVerbose] = Field(description="List of extracted factual statements")
244
303
 
245
- # Add 'why' if not N/A
246
- if self.why and self.why.upper() != "N/A":
247
- parts.append(self.why)
248
304
 
249
- if len(parts) == 1:
250
- return parts[0]
305
+ class ExtractedFactNoCausal(BaseModel):
306
+ """A single extracted fact WITHOUT causal relations (for when causal extraction is disabled)."""
251
307
 
252
- return " | ".join(parts)
308
+ model_config = ConfigDict(
309
+ json_schema_mode="validation",
310
+ json_schema_extra={"required": ["what", "when", "where", "who", "why", "fact_type"]},
311
+ )
253
312
 
313
+ # Same fields as ExtractedFact but without causal_relations
314
+ what: str = Field(description="WHAT happened - COMPLETE, DETAILED description with ALL specifics.")
315
+ when: str = Field(description="WHEN it happened - include temporal information if mentioned.")
316
+ where: str = Field(description="WHERE it happened - SPECIFIC locations if applicable.")
317
+ who: str = Field(description="WHO is involved - ALL people/entities with relationships.")
318
+ why: str = Field(description="WHY it matters - emotional, contextual, and motivational details.")
254
319
 
255
- class FactExtractionResponse(BaseModel):
256
- """Response containing all extracted facts."""
320
+ fact_kind: str = Field(
321
+ default="conversation",
322
+ description="'event' = specific datable occurrence, 'conversation' = general info",
323
+ )
324
+ occurred_start: str | None = Field(default=None, description="WHEN the event happened (ISO timestamp).")
325
+ occurred_end: str | None = Field(default=None, description="WHEN the event ended (ISO timestamp).")
326
+ fact_type: Literal["world", "assistant"] = Field(
327
+ description="'world' = about the user/others. 'assistant' = experience with assistant."
328
+ )
329
+ entities: list[Entity] | None = Field(
330
+ default=None,
331
+ description="Named entities, objects, and concepts from the fact.",
332
+ )
257
333
 
258
- facts: list[ExtractedFact] = Field(description="List of extracted factual statements")
334
+ @field_validator("entities", mode="before")
335
+ @classmethod
336
+ def ensure_entities_list(cls, v):
337
+ if v is None:
338
+ return []
339
+ return v
340
+
341
+
342
+ class FactExtractionResponseNoCausal(BaseModel):
343
+ """Response for fact extraction without causal relations."""
344
+
345
+ facts: list[ExtractedFactNoCausal] = Field(description="List of extracted factual statements")
259
346
 
260
347
 
261
348
  def chunk_text(text: str, max_chars: int) -> list[str]:
@@ -347,39 +434,119 @@ def _chunk_conversation(turns: list[dict], max_chars: int) -> list[str]:
347
434
  return chunks if chunks else [json.dumps(turns, ensure_ascii=False)]
348
435
 
349
436
 
350
- async def _extract_facts_from_chunk(
351
- chunk: str,
352
- chunk_index: int,
353
- total_chunks: int,
354
- event_date: datetime,
355
- context: str,
356
- llm_config: "LLMConfig",
357
- agent_name: str = None,
358
- extract_opinions: bool = False,
359
- ) -> list[dict[str, str]]:
360
- """
361
- Extract facts from a single chunk (internal helper for parallel processing).
437
+ # =============================================================================
438
+ # FACT EXTRACTION PROMPTS
439
+ # =============================================================================
362
440
 
363
- Note: event_date parameter is kept for backward compatibility but not used in prompt.
364
- The LLM extracts temporal information from the context string instead.
365
- """
366
- memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
441
+ # Concise extraction prompt (default) - selective, high-quality facts
442
+ CONCISE_FACT_EXTRACTION_PROMPT = """Extract SIGNIFICANT facts from text. Be SELECTIVE - only extract facts worth remembering long-term.
367
443
 
368
- # Determine which fact types to extract based on the flag
369
- # Note: We use "assistant" in the prompt but convert to "bank" for storage
370
- if extract_opinions:
371
- # Opinion extraction uses a separate prompt (not this one)
372
- fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
373
- else:
374
- fact_types_instruction = (
375
- "Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
376
- )
377
-
378
- prompt = f"""Extract facts from text into structured format with FOUR required dimensions - BE EXTREMELY DETAILED.
444
+ LANGUAGE RULE (CRITICAL): Output facts in the EXACT SAME language as the input text. If input is Japanese, output Japanese. If input is Chinese, output Chinese. NEVER translate to English. Preserve original language completely.
379
445
 
380
446
  {fact_types_instruction}
381
447
 
448
+ ══════════════════════════════════════════════════════════════════════════
449
+ SELECTIVITY - CRITICAL (Reduces 90% of unnecessary output)
450
+ ══════════════════════════════════════════════════════════════════════════
451
+
452
+ ONLY extract facts that are:
453
+ ✅ Personal info: names, relationships, roles, background
454
+ ✅ Preferences: likes, dislikes, habits, interests (e.g., "Alice likes coffee")
455
+ ✅ Significant events: milestones, decisions, achievements, changes
456
+ ✅ Plans/goals: future intentions, deadlines, commitments
457
+ ✅ Expertise: skills, knowledge, certifications, experience
458
+ ✅ Important context: projects, problems, constraints
459
+ ✅ Sensory/emotional details: feelings, sensations, perceptions that provide context
460
+ ✅ Observations: descriptions of people, places, things with specific details
461
+
462
+ DO NOT extract:
463
+ ❌ Generic greetings: "how are you", "hello", pleasantries without substance
464
+ ❌ Pure filler: "thanks", "sounds good", "ok", "got it", "sure"
465
+ ❌ Process chatter: "let me check", "one moment", "I'll look into it"
466
+ ❌ Repeated info: if already stated, don't extract again
467
+
468
+ CONSOLIDATE related statements into ONE fact when possible.
469
+
470
+ ══════════════════════════════════════════════════════════════════════════
471
+ FACT FORMAT - BE CONCISE
472
+ ══════════════════════════════════════════════════════════════════════════
473
+
474
+ 1. **what**: Core fact - concise but complete (1-2 sentences max)
475
+ 2. **when**: Temporal info if mentioned. "N/A" if none. Use day name when known.
476
+ 3. **where**: Location if relevant. "N/A" if none.
477
+ 4. **who**: People involved with relationships. "N/A" if just general info.
478
+ 5. **why**: Context/significance ONLY if important. "N/A" if obvious.
479
+
480
+ CONCISENESS: Capture the essence, not every word. One good sentence beats three mediocre ones.
481
+
482
+ ══════════════════════════════════════════════════════════════════════════
483
+ COREFERENCE RESOLUTION
484
+ ══════════════════════════════════════════════════════════════════════════
485
+
486
+ Link generic references to names when both appear:
487
+ - "my roommate" + "Emily" → use "Emily (user's roommate)"
488
+ - "the manager" + "Sarah" → use "Sarah (the manager)"
489
+
490
+ ══════════════════════════════════════════════════════════════════════════
491
+ CLASSIFICATION
492
+ ══════════════════════════════════════════════════════════════════════════
493
+
494
+ fact_kind:
495
+ - "event": Specific datable occurrence (set occurred_start/end)
496
+ - "conversation": Ongoing state, preference, trait (no dates)
497
+
498
+ fact_type:
499
+ - "world": About user's life, other people, external events
500
+ - "assistant": Interactions with assistant (requests, recommendations)
501
+
502
+ ══════════════════════════════════════════════════════════════════════════
503
+ TEMPORAL HANDLING
504
+ ══════════════════════════════════════════════════════════════════════════
505
+
506
+ Use "Event Date" from input as reference for relative dates.
507
+ - "yesterday" relative to Event Date, not today
508
+ - For events: set occurred_start AND occurred_end (same for point events)
509
+ - For conversation facts: NO occurred dates
510
+
511
+ ══════════════════════════════════════════════════════════════════════════
512
+ ENTITIES
513
+ ══════════════════════════════════════════════════════════════════════════
514
+
515
+ Include: people names, organizations, places, key objects, abstract concepts (career, friendship, etc.)
516
+ Always include "user" when fact is about the user.
517
+
518
+ ══════════════════════════════════════════════════════════════════════════
519
+ EXAMPLES
520
+ ══════════════════════════════════════════════════════════════════════════
521
+
522
+ Example 1 - Selective extraction (Event Date: June 10, 2024):
523
+ Input: "Hey! How's it going? Good morning! So I'm planning my wedding - want a small outdoor ceremony. Just got back from Emily's wedding, she married Sarah at a rooftop garden. It was nice weather. I grabbed a coffee on the way."
524
+
525
+ Output: ONLY 2 facts (skip greetings, weather, coffee):
526
+ 1. what="User planning wedding, wants small outdoor ceremony", who="user", why="N/A", entities=["user", "wedding"]
527
+ 2. what="Emily married Sarah at rooftop garden", who="Emily (user's friend), Sarah", occurred_start="2024-06-09", entities=["Emily", "Sarah", "wedding"]
528
+
529
+ Example 2 - Professional context:
530
+ Input: "Alice has 5 years of Kubernetes experience and holds CKA certification. She's been leading the infrastructure team since March. By the way, she prefers dark roast coffee."
382
531
 
532
+ Output: ONLY 2 facts (skip coffee preference - too trivial):
533
+ 1. what="Alice has 5 years Kubernetes experience, CKA certified", who="Alice", entities=["Alice", "Kubernetes", "CKA"]
534
+ 2. what="Alice leads infrastructure team since March", who="Alice", entities=["Alice", "infrastructure"]
535
+
536
+ ══════════════════════════════════════════════════════════════════════════
537
+ QUALITY OVER QUANTITY
538
+ ══════════════════════════════════════════════════════════════════════════
539
+
540
+ Ask: "Would this be useful to recall in 6 months?" If no, skip it."""
541
+
542
+
543
+ # Verbose extraction prompt - detailed, comprehensive facts (legacy mode)
544
+ VERBOSE_FACT_EXTRACTION_PROMPT = """Extract facts from text into structured format with FIVE required dimensions - BE EXTREMELY DETAILED.
545
+
546
+ LANGUAGE REQUIREMENT: Detect the language of the input text. All extracted facts, entity names, descriptions,
547
+ and other output MUST be in the SAME language as the input. Do not translate to English if the input is in another language.
548
+
549
+ {fact_types_instruction}
383
550
 
384
551
  ══════════════════════════════════════════════════════════════════════════
385
552
  FACT FORMAT - ALL FIVE DIMENSIONS REQUIRED - MAXIMUM VERBOSITY
@@ -473,106 +640,88 @@ FACT TYPE
473
640
  Include: what the user asked, what problem they wanted solved, what context they provided
474
641
 
475
642
  ══════════════════════════════════════════════════════════════════════════
476
- USER PREFERENCES (CRITICAL)
643
+ ENTITIES - EXTRACT EVERYTHING
477
644
  ══════════════════════════════════════════════════════════════════════════
478
645
 
479
- ALWAYS extract user preferences as separate facts! Watch for these keywords:
480
- - "enjoy", "like", "love", "prefer", "hate", "dislike", "favorite", "ideal", "dream", "want"
646
+ Extract ALL of the following from the fact:
647
+ - People names (Emily, Alice, Dr. Smith)
648
+ - Organizations (Google, MIT, local coffee shop)
649
+ - Places (San Francisco, Brooklyn, Paris)
650
+ - Significant objects mentioned (coffee maker, new car, wedding dress)
651
+ - Abstract concepts/themes (friendship, career growth, loss, celebration)
481
652
 
482
- Example: "I love Italian food and prefer outdoor dining"
483
- Fact 1: what="User loves Italian food", who="user", why="This is a food preference", entities=["user"]
484
- → Fact 2: what="User prefers outdoor dining", who="user", why="This is a dining preference", entities=["user"]
653
+ ALWAYS include "user" when fact is about the user.
654
+ Extract anything that could help link related facts together."""
485
655
 
486
- ══════════════════════════════════════════════════════════════════════════
487
- ENTITIES - INCLUDE PEOPLE, PLACES, OBJECTS, AND CONCEPTS (CRITICAL)
488
- ══════════════════════════════════════════════════════════════════════════
489
656
 
490
- Extract entities that help link related facts together. Include:
491
- 1. "user" - when the fact is about the user
492
- 2. People names - Emily, Dr. Smith, etc.
493
- 3. Organizations/Places - IKEA, Goodwill, New York, etc.
494
- 4. Specific objects - coffee maker, toaster, car, laptop, kitchen, etc.
495
- 5. Abstract concepts - themes, values, emotions, or ideas that capture the essence of the fact:
496
- - "friendship" for facts about friends helping each other, bonding, loyalty
497
- - "career growth" for facts about promotions, learning new skills, job changes
498
- - "loss" or "grief" for facts about death, endings, saying goodbye
499
- - "celebration" for facts about parties, achievements, milestones
500
- - "trust" or "betrayal" for facts involving those themes
501
-
502
- ✅ CORRECT: entities=["user", "coffee maker", "Goodwill", "kitchen"] for "User donated their coffee maker to Goodwill"
503
- ✅ CORRECT: entities=["user", "Emily", "friendship"] for "Emily helped user move to a new apartment"
504
- ✅ CORRECT: entities=["user", "promotion", "career growth"] for "User got promoted to senior engineer"
505
- ✅ CORRECT: entities=["user", "grandmother", "loss", "grief"] for "User's grandmother passed away last week"
506
- ❌ WRONG: entities=["user", "Emily"] only - missing the "friendship" concept that links to other friendship facts!
657
+ # Causal relationships section - appended when causal extraction is enabled
658
+ CAUSAL_RELATIONSHIPS_SECTION = """
507
659
 
508
660
  ══════════════════════════════════════════════════════════════════════════
509
- EXAMPLES
661
+ CAUSAL RELATIONSHIPS
510
662
  ══════════════════════════════════════════════════════════════════════════
511
663
 
512
- Example 1 - World Facts (Event Date: Tuesday, June 10, 2024):
513
- Input: "I'm planning my wedding and want a small outdoor ceremony. I just got back from my college roommate Emily's wedding - she married Sarah at a rooftop garden, it was so romantic!"
514
-
515
- Output facts:
516
-
517
- 1. User's wedding preference
518
- - what: "User wants a small outdoor ceremony for their wedding"
519
- - who: "user"
520
- - why: "User prefers intimate outdoor settings"
521
- - fact_type: "world", fact_kind: "conversation"
522
- - entities: ["user", "wedding", "outdoor ceremony"]
523
-
524
- 2. User planning wedding
525
- - what: "User is planning their own wedding"
526
- - who: "user"
527
- - why: "Inspired by Emily's ceremony"
528
- - fact_type: "world", fact_kind: "conversation"
529
- - entities: ["user", "wedding"]
530
-
531
- 3. Emily's wedding (THE EVENT - note occurred_start AND occurred_end both set)
532
- - what: "Emily got married to Sarah at a rooftop garden ceremony in the city"
533
- - who: "Emily (user's college roommate), Sarah (Emily's partner)"
534
- - why: "User found it romantic and beautiful"
535
- - fact_type: "world", fact_kind: "event"
536
- - occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back" - relative to Event Date June 10, 2024)
537
- - occurred_end: "2024-06-09T23:59:59Z" (same day - point event)
538
- - entities: ["user", "Emily", "Sarah", "wedding", "rooftop garden"]
539
-
540
- Example 2 - Assistant Facts (Context: March 5, 2024):
541
- Input: "User: My API is really slow when we have 1000+ concurrent users. What can I do?
542
- Assistant: I'd recommend implementing Redis for caching frequently-accessed data, which should reduce your database load by 70-80%."
543
-
544
- Output fact:
545
- - what: "Assistant recommended implementing Redis for caching frequently-accessed data to improve API performance"
546
- - when: "March 5, 2024 during conversation"
547
- - who: "user, assistant"
548
- - why: "User asked how to fix slow API performance with 1000+ concurrent users, expected 70-80% reduction in database load"
549
- - fact_type: "assistant", fact_kind: "conversation"
550
- - entities: ["user", "API", "Redis"]
551
-
552
- Example 3 - Kitchen Items with Concept Inference (Event Date: Thursday, May 30, 2024):
553
- Input: "I finally donated my old coffee maker to Goodwill. I upgraded to that new espresso machine last month and the old one was just taking up counter space."
554
-
555
- Output fact:
556
- - what: "User donated their old coffee maker to Goodwill after upgrading to a new espresso machine"
557
- - when: "Thursday, May 30, 2024"
558
- - who: "user"
559
- - why: "The old coffee maker was taking up counter space after the upgrade"
560
- - fact_type: "world", fact_kind: "event"
561
- - occurred_start: "2024-05-30T00:00:00Z" (uses Event Date year)
562
- - occurred_end: "2024-05-30T23:59:59Z" (same day - point event)
563
- - entities: ["user", "coffee maker", "Goodwill", "espresso machine", "kitchen"]
564
-
565
- Note: "kitchen" is inferred as a concept because coffee makers and espresso machines are kitchen appliances.
566
- This links the fact to other kitchen-related facts (toaster, faucet, kitchen mat, etc.) via the shared "kitchen" entity.
567
-
568
- Note how the "why" field captures the FULL STORY: what the user asked AND what outcome was expected!
664
+ Link facts with causal_relations (max 2 per fact). target_index must be < this fact's index.
665
+ Types: "caused_by", "enabled_by", "prevented_by"
569
666
 
570
- ══════════════════════════════════════════════════════════════════════════
571
- WHAT TO EXTRACT vs SKIP
572
- ══════════════════════════════════════════════════════════════════════════
667
+ Example: "Lost job → couldn't pay rent → moved apartment"
668
+ - Fact 0: Lost job, causal_relations: null
669
+ - Fact 1: Couldn't pay rent, causal_relations: [{target_index: 0, relation_type: "caused_by"}]
670
+ - Fact 2: Moved apartment, causal_relations: [{target_index: 1, relation_type: "caused_by"}]"""
671
+
672
+
673
+ async def _extract_facts_from_chunk(
674
+ chunk: str,
675
+ chunk_index: int,
676
+ total_chunks: int,
677
+ event_date: datetime,
678
+ context: str,
679
+ llm_config: "LLMConfig",
680
+ agent_name: str = None,
681
+ extract_opinions: bool = False,
682
+ ) -> tuple[list[dict[str, str]], TokenUsage]:
683
+ """
684
+ Extract facts from a single chunk (internal helper for parallel processing).
573
685
 
574
- ✅ EXTRACT: User preferences (ALWAYS as separate facts!), feelings, plans, events, relationships, achievements
575
- SKIP: Greetings, filler ("thanks", "cool"), purely structural statements"""
686
+ Note: event_date parameter is kept for backward compatibility but not used in prompt.
687
+ The LLM extracts temporal information from the context string instead.
688
+ """
689
+ memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
690
+
691
+ # Determine which fact types to extract based on the flag
692
+ # Note: We use "assistant" in the prompt but convert to "bank" for storage
693
+ if extract_opinions:
694
+ # Opinion extraction uses a separate prompt (not this one)
695
+ fact_types_instruction = "Extract ONLY 'opinion' type facts (formed opinions, beliefs, and perspectives). DO NOT extract 'world' or 'assistant' facts."
696
+ else:
697
+ fact_types_instruction = (
698
+ "Extract ONLY 'world' and 'assistant' type facts. DO NOT extract opinions - those are extracted separately."
699
+ )
700
+
701
+ # Check config for extraction mode and causal link extraction
702
+ config = get_config()
703
+ extraction_mode = config.retain_extraction_mode
704
+ extract_causal_links = config.retain_extract_causal_links
705
+
706
+ # Select base prompt based on extraction mode
707
+ if extraction_mode == "verbose":
708
+ base_prompt = VERBOSE_FACT_EXTRACTION_PROMPT
709
+ else:
710
+ base_prompt = CONCISE_FACT_EXTRACTION_PROMPT
711
+
712
+ # Format the prompt with fact types instruction
713
+ prompt = base_prompt.format(fact_types_instruction=fact_types_instruction)
714
+
715
+ # Build the full prompt with or without causal relationships section
716
+ # Select appropriate response schema based on extraction mode and causal links
717
+ if extract_causal_links:
718
+ prompt = prompt + CAUSAL_RELATIONSHIPS_SECTION
719
+ if extraction_mode == "verbose":
720
+ response_schema = FactExtractionResponseVerbose
721
+ else:
722
+ response_schema = FactExtractionResponse
723
+ else:
724
+ response_schema = FactExtractionResponseNoCausal
576
725
 
577
726
  import logging
578
727
 
@@ -601,16 +750,19 @@ Context: {sanitized_context}
601
750
  Text:
602
751
  {sanitized_chunk}"""
603
752
 
753
+ usage = TokenUsage() # Track cumulative usage across retries
604
754
  for attempt in range(max_retries):
605
755
  try:
606
- extraction_response_json = await llm_config.call(
756
+ extraction_response_json, call_usage = await llm_config.call(
607
757
  messages=[{"role": "system", "content": prompt}, {"role": "user", "content": user_message}],
608
- response_format=FactExtractionResponse,
758
+ response_format=response_schema,
609
759
  scope="memory_extract_facts",
610
760
  temperature=0.1,
611
- max_completion_tokens=65000,
761
+ max_completion_tokens=config.retain_max_completion_tokens,
612
762
  skip_validation=True, # Get raw JSON, we'll validate leniently
763
+ return_usage=True,
613
764
  )
765
+ usage = usage + call_usage # Aggregate usage across retries
614
766
 
615
767
  # Lenient parsing of facts from raw JSON
616
768
  chunk_facts = []
@@ -628,9 +780,10 @@ Text:
628
780
  f"LLM returned non-dict JSON after {max_retries} attempts: {type(extraction_response_json).__name__}. "
629
781
  f"Raw: {str(extraction_response_json)[:500]}"
630
782
  )
631
- return []
783
+ return [], usage
632
784
 
633
785
  raw_facts = extraction_response_json.get("facts", [])
786
+
634
787
  if not raw_facts:
635
788
  logger.debug(
636
789
  f"LLM response missing 'facts' field or returned empty list. "
@@ -745,17 +898,40 @@ Text:
745
898
  if validated_entities:
746
899
  fact_data["entities"] = validated_entities
747
900
 
748
- # Add causal relations if present (validate as CausalRelation objects)
749
- # Filter out invalid relations (missing required fields)
750
- causal_relations = get_value("causal_relations")
751
- if causal_relations:
901
+ # Add per-fact causal relations (only if enabled in config)
902
+ if extract_causal_links:
752
903
  validated_relations = []
753
- for rel in causal_relations:
754
- if isinstance(rel, dict) and "target_fact_index" in rel and "relation_type" in rel:
904
+ causal_relations_raw = get_value("causal_relations")
905
+ if causal_relations_raw:
906
+ for rel in causal_relations_raw:
907
+ if not isinstance(rel, dict):
908
+ continue
909
+ # New schema uses target_index
910
+ target_idx = rel.get("target_index")
911
+ relation_type = rel.get("relation_type")
912
+ strength = rel.get("strength", 1.0)
913
+
914
+ if target_idx is None or relation_type is None:
915
+ continue
916
+
917
+ # Validate: target_index must be < current fact index
918
+ if target_idx < 0 or target_idx >= i:
919
+ logger.debug(
920
+ f"Invalid target_index {target_idx} for fact {i} (must be 0 to {i - 1}). Skipping."
921
+ )
922
+ continue
923
+
755
924
  try:
756
- validated_relations.append(CausalRelation.model_validate(rel))
925
+ validated_relations.append(
926
+ CausalRelation(
927
+ target_fact_index=target_idx,
928
+ relation_type=relation_type,
929
+ strength=strength,
930
+ )
931
+ )
757
932
  except Exception as e:
758
- logger.warning(f"Invalid causal relation {rel}: {e}")
933
+ logger.debug(f"Invalid causal relation {rel}: {e}")
934
+
759
935
  if validated_relations:
760
936
  fact_data["causal_relations"] = validated_relations
761
937
 
@@ -778,7 +954,7 @@ Text:
778
954
  )
779
955
  continue
780
956
 
781
- return chunk_facts
957
+ return chunk_facts, usage
782
958
 
783
959
  except BadRequestError as e:
784
960
  last_error = e
@@ -805,7 +981,7 @@ async def _extract_facts_with_auto_split(
805
981
  llm_config: LLMConfig,
806
982
  agent_name: str = None,
807
983
  extract_opinions: bool = False,
808
- ) -> list[dict[str, str]]:
984
+ ) -> tuple[list[dict[str, str]], TokenUsage]:
809
985
  """
810
986
  Extract facts from a chunk with automatic splitting if output exceeds token limits.
811
987
 
@@ -823,7 +999,7 @@ async def _extract_facts_with_auto_split(
823
999
  extract_opinions: If True, extract ONLY opinions. If False, extract world and agent facts (no opinions)
824
1000
 
825
1001
  Returns:
826
- List of fact dictionaries extracted from the chunk (possibly from sub-chunks)
1002
+ Tuple of (facts list, token usage) extracted from the chunk (possibly from sub-chunks)
827
1003
  """
828
1004
  import logging
829
1005
 
@@ -902,12 +1078,14 @@ async def _extract_facts_with_auto_split(
902
1078
 
903
1079
  # Combine results from both halves
904
1080
  all_facts = []
905
- for sub_result in sub_results:
906
- all_facts.extend(sub_result)
1081
+ total_usage = TokenUsage()
1082
+ for sub_facts, sub_usage in sub_results:
1083
+ all_facts.extend(sub_facts)
1084
+ total_usage = total_usage + sub_usage
907
1085
 
908
1086
  logger.info(f"Successfully extracted {len(all_facts)} facts from split chunk {chunk_index + 1}")
909
1087
 
910
- return all_facts
1088
+ return all_facts, total_usage
911
1089
 
912
1090
 
913
1091
  async def extract_facts_from_text(
@@ -917,7 +1095,7 @@ async def extract_facts_from_text(
917
1095
  agent_name: str,
918
1096
  context: str = "",
919
1097
  extract_opinions: bool = False,
920
- ) -> tuple[list[Fact], list[tuple[str, int]]]:
1098
+ ) -> tuple[list[Fact], list[tuple[str, int]], TokenUsage]:
921
1099
  """
922
1100
  Extract semantic facts from conversational or narrative text using LLM.
923
1101
 
@@ -936,11 +1114,22 @@ async def extract_facts_from_text(
936
1114
  extract_opinions: If True, extract ONLY opinions. If False, extract world and bank facts (no opinions)
937
1115
 
938
1116
  Returns:
939
- Tuple of (facts, chunks) where:
1117
+ Tuple of (facts, chunks, usage) where:
940
1118
  - facts: List of Fact model instances
941
1119
  - chunks: List of tuples (chunk_text, fact_count) for each chunk
1120
+ - usage: Aggregated token usage across all LLM calls
942
1121
  """
943
- chunks = chunk_text(text, max_chars=3000)
1122
+ config = get_config()
1123
+ chunks = chunk_text(text, max_chars=config.retain_chunk_size)
1124
+
1125
+ # Log chunk count before starting LLM requests
1126
+ total_chars = sum(len(c) for c in chunks)
1127
+ if len(chunks) > 1:
1128
+ logger.debug(
1129
+ f"[FACT_EXTRACTION] Text chunked into {len(chunks)} chunks ({total_chars:,} chars total, "
1130
+ f"chunk_size={config.retain_chunk_size:,}) - starting parallel LLM extraction"
1131
+ )
1132
+
944
1133
  tasks = [
945
1134
  _extract_facts_with_auto_split(
946
1135
  chunk=chunk,
@@ -957,10 +1146,12 @@ async def extract_facts_from_text(
957
1146
  chunk_results = await asyncio.gather(*tasks)
958
1147
  all_facts = []
959
1148
  chunk_metadata = [] # [(chunk_text, fact_count), ...]
960
- for chunk, chunk_facts in zip(chunks, chunk_results):
1149
+ total_usage = TokenUsage()
1150
+ for chunk, (chunk_facts, chunk_usage) in zip(chunks, chunk_results):
961
1151
  all_facts.extend(chunk_facts)
962
1152
  chunk_metadata.append((chunk, len(chunk_facts)))
963
- return all_facts, chunk_metadata
1153
+ total_usage = total_usage + chunk_usage
1154
+ return all_facts, chunk_metadata, total_usage
964
1155
 
965
1156
 
966
1157
  # ============================================================================
@@ -981,7 +1172,7 @@ SECONDS_PER_FACT = 10
981
1172
 
982
1173
  async def extract_facts_from_contents(
983
1174
  contents: list[RetainContent], llm_config, agent_name: str, extract_opinions: bool = False
984
- ) -> tuple[list[ExtractedFactType], list[ChunkMetadata]]:
1175
+ ) -> tuple[list[ExtractedFactType], list[ChunkMetadata], TokenUsage]:
985
1176
  """
986
1177
  Extract facts from multiple content items in parallel.
987
1178
 
@@ -998,10 +1189,10 @@ async def extract_facts_from_contents(
998
1189
  extract_opinions: If True, extract only opinions; otherwise world/bank facts
999
1190
 
1000
1191
  Returns:
1001
- Tuple of (extracted_facts, chunks_metadata)
1192
+ Tuple of (extracted_facts, chunks_metadata, usage)
1002
1193
  """
1003
1194
  if not contents:
1004
- return [], []
1195
+ return [], [], TokenUsage()
1005
1196
 
1006
1197
  # Step 1: Create parallel fact extraction tasks
1007
1198
  fact_extraction_tasks = []
@@ -1024,11 +1215,15 @@ async def extract_facts_from_contents(
1024
1215
  # Step 3: Flatten and convert to typed objects
1025
1216
  extracted_facts: list[ExtractedFactType] = []
1026
1217
  chunks_metadata: list[ChunkMetadata] = []
1218
+ total_usage = TokenUsage()
1027
1219
 
1028
1220
  global_chunk_idx = 0
1029
1221
  global_fact_idx = 0
1030
1222
 
1031
- for content_index, (content, (facts_from_llm, chunks_from_llm)) in enumerate(zip(contents, all_fact_results)):
1223
+ for content_index, (content, (facts_from_llm, chunks_from_llm, content_usage)) in enumerate(
1224
+ zip(contents, all_fact_results)
1225
+ ):
1226
+ total_usage = total_usage + content_usage
1032
1227
  chunk_start_idx = global_chunk_idx
1033
1228
 
1034
1229
  # Convert chunk tuples to ChunkMetadata objects
@@ -1073,6 +1268,7 @@ async def extract_facts_from_contents(
1073
1268
  # mentioned_at: always the event_date (when the conversation/document occurred)
1074
1269
  mentioned_at=content.event_date,
1075
1270
  metadata=content.metadata,
1271
+ tags=content.tags,
1076
1272
  )
1077
1273
 
1078
1274
  extracted_facts.append(extracted_fact)
@@ -1082,7 +1278,7 @@ async def extract_facts_from_contents(
1082
1278
  # Step 4: Add time offsets to preserve ordering within each content
1083
1279
  _add_temporal_offsets(extracted_facts, contents)
1084
1280
 
1085
- return extracted_facts, chunks_metadata
1281
+ return extracted_facts, chunks_metadata, total_usage
1086
1282
 
1087
1283
 
1088
1284
  def _parse_datetime(date_str: str):