hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. hindsight_api/__init__.py +10 -2
  2. hindsight_api/alembic/README +1 -0
  3. hindsight_api/alembic/env.py +146 -0
  4. hindsight_api/alembic/script.py.mako +28 -0
  5. hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
  6. hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
  7. hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
  8. hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
  9. hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
  10. hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
  11. hindsight_api/api/__init__.py +2 -4
  12. hindsight_api/api/http.py +112 -164
  13. hindsight_api/api/mcp.py +2 -1
  14. hindsight_api/config.py +154 -0
  15. hindsight_api/engine/__init__.py +7 -2
  16. hindsight_api/engine/cross_encoder.py +225 -16
  17. hindsight_api/engine/embeddings.py +198 -19
  18. hindsight_api/engine/entity_resolver.py +56 -29
  19. hindsight_api/engine/llm_wrapper.py +147 -106
  20. hindsight_api/engine/memory_engine.py +337 -192
  21. hindsight_api/engine/response_models.py +15 -17
  22. hindsight_api/engine/retain/bank_utils.py +25 -35
  23. hindsight_api/engine/retain/entity_processing.py +5 -5
  24. hindsight_api/engine/retain/fact_extraction.py +86 -24
  25. hindsight_api/engine/retain/fact_storage.py +1 -1
  26. hindsight_api/engine/retain/link_creation.py +12 -6
  27. hindsight_api/engine/retain/link_utils.py +50 -56
  28. hindsight_api/engine/retain/observation_regeneration.py +264 -0
  29. hindsight_api/engine/retain/orchestrator.py +31 -44
  30. hindsight_api/engine/retain/types.py +14 -0
  31. hindsight_api/engine/search/reranking.py +6 -10
  32. hindsight_api/engine/search/retrieval.py +2 -2
  33. hindsight_api/engine/search/think_utils.py +59 -30
  34. hindsight_api/engine/search/tracer.py +1 -1
  35. hindsight_api/main.py +201 -0
  36. hindsight_api/migrations.py +61 -39
  37. hindsight_api/models.py +1 -2
  38. hindsight_api/pg0.py +17 -36
  39. hindsight_api/server.py +43 -0
  40. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
  41. hindsight_api-0.1.1.dist-info/RECORD +60 -0
  42. hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
  43. hindsight_api/cli.py +0 -128
  44. hindsight_api/web/__init__.py +0 -12
  45. hindsight_api/web/server.py +0 -109
  46. hindsight_api-0.0.21.dist-info/RECORD +0 -50
  47. hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
  48. {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
@@ -10,27 +10,28 @@ from typing import Optional, List, Dict, Any
10
10
  from pydantic import BaseModel, Field, ConfigDict
11
11
 
12
12
 
13
+ # Valid fact types for recall operations (excludes 'observation' which is internal)
14
+ VALID_RECALL_FACT_TYPES = frozenset(["world", "experience", "opinion"])
15
+
16
+
13
17
  class DispositionTraits(BaseModel):
14
18
  """
15
- Disposition traits for a bank using the Big Five model.
19
+ Disposition traits for a memory bank.
16
20
 
17
- All traits are scored 0.0-1.0 where higher values indicate stronger presence of the trait.
21
+ All traits are scored 1-5 where:
22
+ - skepticism: 1=trusting, 5=skeptical (how much to doubt or question information)
23
+ - literalism: 1=flexible interpretation, 5=literal interpretation (how strictly to interpret information)
24
+ - empathy: 1=detached, 5=empathetic (how much to consider emotional context)
18
25
  """
19
- openness: float = Field(description="Openness to experience (0.0-1.0)")
20
- conscientiousness: float = Field(description="Conscientiousness and organization (0.0-1.0)")
21
- extraversion: float = Field(description="Extraversion and sociability (0.0-1.0)")
22
- agreeableness: float = Field(description="Agreeableness and cooperation (0.0-1.0)")
23
- neuroticism: float = Field(description="Emotional sensitivity and neuroticism (0.0-1.0)")
24
- bias_strength: float = Field(description="How strongly disposition influences thinking (0.0-1.0)")
26
+ skepticism: int = Field(ge=1, le=5, description="How skeptical vs trusting (1=trusting, 5=skeptical)")
27
+ literalism: int = Field(ge=1, le=5, description="How literally to interpret information (1=flexible, 5=literal)")
28
+ empathy: int = Field(ge=1, le=5, description="How much to consider emotional context (1=detached, 5=empathetic)")
25
29
 
26
30
  model_config = ConfigDict(json_schema_extra={
27
31
  "example": {
28
- "openness": 0.8,
29
- "conscientiousness": 0.6,
30
- "extraversion": 0.4,
31
- "agreeableness": 0.7,
32
- "neuroticism": 0.3,
33
- "bias_strength": 0.5
32
+ "skepticism": 3,
33
+ "literalism": 3,
34
+ "empathy": 3
34
35
  }
35
36
  })
36
37
 
@@ -71,9 +72,6 @@ class MemoryFact(BaseModel):
71
72
  metadata: Optional[Dict[str, str]] = Field(None, description="User-defined metadata")
72
73
  chunk_id: Optional[str] = Field(None, description="ID of the chunk this fact was extracted from (format: bank_id_document_id_chunk_index)")
73
74
 
74
- # Internal metrics (used by system but may not be exposed in API)
75
- activation: Optional[float] = Field(None, description="Internal activation score")
76
-
77
75
 
78
76
  class ChunkInfo(BaseModel):
79
77
  """Information about a chunk."""
@@ -13,12 +13,9 @@ from ..response_models import DispositionTraits
13
13
  logger = logging.getLogger(__name__)
14
14
 
15
15
  DEFAULT_DISPOSITION = {
16
- "openness": 0.5,
17
- "conscientiousness": 0.5,
18
- "extraversion": 0.5,
19
- "agreeableness": 0.5,
20
- "neuroticism": 0.5,
21
- "bias_strength": 0.5,
16
+ "skepticism": 3,
17
+ "literalism": 3,
18
+ "empathy": 3,
22
19
  }
23
20
 
24
21
 
@@ -32,7 +29,7 @@ class BankProfile(TypedDict):
32
29
  class BackgroundMergeResponse(BaseModel):
33
30
  """LLM response for background merge with disposition inference."""
34
31
  background: str = Field(description="Merged background in first person perspective")
35
- disposition: DispositionTraits = Field(description="Inferred Big Five disposition traits")
32
+ disposition: DispositionTraits = Field(description="Inferred disposition traits (skepticism, literalism, empathy)")
36
33
 
37
34
 
38
35
  async def get_bank_profile(pool, bank_id: str) -> BankProfile:
@@ -92,7 +89,7 @@ async def get_bank_profile(pool, bank_id: str) -> BankProfile:
92
89
  async def update_bank_disposition(
93
90
  pool,
94
91
  bank_id: str,
95
- disposition: Dict[str, float]
92
+ disposition: Dict[str, int]
96
93
  ) -> None:
97
94
  """
98
95
  Update bank disposition traits.
@@ -100,7 +97,7 @@ async def update_bank_disposition(
100
97
  Args:
101
98
  pool: Database connection pool
102
99
  bank_id: bank IDentifier
103
- disposition: Dict with Big Five traits + bias_strength (all 0-1)
100
+ disposition: Dict with skepticism, literalism, empathy (all 1-5)
104
101
  """
105
102
  # Ensure bank exists first
106
103
  await get_bank_profile(pool, bank_id)
@@ -223,13 +220,10 @@ Instructions:
223
220
  3. Keep additions that don't conflict
224
221
  4. Output in FIRST PERSON ("I") perspective
225
222
  5. Be concise - keep merged background under 500 characters
226
- 6. Infer Big Five disposition traits from the merged background:
227
- - Openness: 0.0-1.0 (creativity, curiosity, openness to new ideas)
228
- - Conscientiousness: 0.0-1.0 (organization, discipline, goal-directed)
229
- - Extraversion: 0.0-1.0 (sociability, assertiveness, energy from others)
230
- - Agreeableness: 0.0-1.0 (cooperation, empathy, consideration)
231
- - Neuroticism: 0.0-1.0 (emotional sensitivity, anxiety, stress response)
232
- - Bias Strength: 0.0-1.0 (how much disposition influences opinions)
223
+ 6. Infer disposition traits from the merged background (each 1-5 integer):
224
+ - Skepticism: 1-5 (1=trusting, takes things at face value; 5=skeptical, questions everything)
225
+ - Literalism: 1-5 (1=flexible interpretation, reads between lines; 5=literal, exact interpretation)
226
+ - Empathy: 1-5 (1=detached, focuses on facts; 5=empathetic, considers emotional context)
233
227
 
234
228
  CRITICAL: You MUST respond with ONLY a valid JSON object. No markdown, no code blocks, no explanations. Just the JSON.
235
229
 
@@ -237,22 +231,19 @@ Format:
237
231
  {{
238
232
  "background": "the merged background text in first person",
239
233
  "disposition": {{
240
- "openness": 0.7,
241
- "conscientiousness": 0.6,
242
- "extraversion": 0.5,
243
- "agreeableness": 0.8,
244
- "neuroticism": 0.4,
245
- "bias_strength": 0.6
234
+ "skepticism": 3,
235
+ "literalism": 3,
236
+ "empathy": 3
246
237
  }}
247
238
  }}
248
239
 
249
240
  Trait inference examples:
250
- - "creative artist" → openness: 0.8+, bias_strength: 0.6
251
- - "organized engineer" → conscientiousness: 0.8+, openness: 0.5-0.6
252
- - "startup founder" → openness: 0.8+, extraversion: 0.7+, neuroticism: 0.3-0.4
253
- - "risk-averse analyst" → openness: 0.3-0.4, conscientiousness: 0.8+, neuroticism: 0.6+
254
- - "rational and diligent" → conscientiousness: 0.7+, openness: 0.6+
255
- - "passionate and dramatic" → extraversion: 0.7+, neuroticism: 0.6+, openness: 0.7+"""
241
+ - "I'm a lawyer" → skepticism: 4, literalism: 5, empathy: 2
242
+ - "I'm a therapist" → skepticism: 2, literalism: 2, empathy: 5
243
+ - "I'm an engineer" → skepticism: 3, literalism: 4, empathy: 3
244
+ - "I've been burned before by trusting people" → skepticism: 5, literalism: 3, empathy: 3
245
+ - "I try to understand what people really mean" → skepticism: 3, literalism: 2, empathy: 4
246
+ - "I take contracts very seriously" → skepticism: 4, literalism: 5, empathy: 2"""
256
247
  else:
257
248
  prompt = f"""You are helping maintain a memory bank's background/profile.
258
249
 
@@ -282,7 +273,7 @@ Merged background:"""
282
273
  response_format=BackgroundMergeResponse,
283
274
  scope="bank_background",
284
275
  temperature=0.3,
285
- max_tokens=8192
276
+ max_completion_tokens=8192
286
277
  )
287
278
  logger.info(f"Successfully got structured response: background={parsed.background[:100]}")
288
279
 
@@ -300,7 +291,7 @@ Merged background:"""
300
291
  messages=messages,
301
292
  scope="bank_background",
302
293
  temperature=0.3,
303
- max_tokens=8192
294
+ max_completion_tokens=8192
304
295
  )
305
296
 
306
297
  logger.info(f"LLM response for background merge (first 500 chars): {content[:500]}")
@@ -349,13 +340,12 @@ Merged background:"""
349
340
 
350
341
  # Validate disposition values
351
342
  disposition = result.get("disposition", {})
352
- for key in ["openness", "conscientiousness", "extraversion",
353
- "agreeableness", "neuroticism", "bias_strength"]:
343
+ for key in ["skepticism", "literalism", "empathy"]:
354
344
  if key not in disposition:
355
- disposition[key] = 0.5 # Default to neutral
345
+ disposition[key] = 3 # Default to neutral
356
346
  else:
357
- # Clamp to [0, 1]
358
- disposition[key] = max(0.0, min(1.0, float(disposition[key])))
347
+ # Clamp to [1, 5] and convert to int
348
+ disposition[key] = max(1, min(5, int(disposition[key])))
359
349
 
360
350
  result["disposition"] = disposition
361
351
 
@@ -7,7 +7,7 @@ import logging
7
7
  from typing import List, Tuple, Dict, Any
8
8
  from uuid import UUID
9
9
 
10
- from .types import ProcessedFact, EntityRef
10
+ from .types import ProcessedFact, EntityRef, EntityLink
11
11
  from . import link_utils
12
12
 
13
13
  logger = logging.getLogger(__name__)
@@ -20,7 +20,7 @@ async def process_entities_batch(
20
20
  unit_ids: List[str],
21
21
  facts: List[ProcessedFact],
22
22
  log_buffer: List[str] = None
23
- ) -> List[Tuple[str, str, float]]:
23
+ ) -> List[EntityLink]:
24
24
  """
25
25
  Process entities for all facts and create entity links.
26
26
 
@@ -39,7 +39,7 @@ async def process_entities_batch(
39
39
  log_buffer: Optional buffer for detailed logging
40
40
 
41
41
  Returns:
42
- List of entity link tuples: (unit_id, entity_id, confidence)
42
+ List of EntityLink objects for batch insertion
43
43
  """
44
44
  if not unit_ids or not facts:
45
45
  return []
@@ -75,14 +75,14 @@ async def process_entities_batch(
75
75
 
76
76
  async def insert_entity_links_batch(
77
77
  conn,
78
- entity_links: List[Tuple[str, str, float]]
78
+ entity_links: List[EntityLink]
79
79
  ) -> None:
80
80
  """
81
81
  Insert entity links in batch.
82
82
 
83
83
  Args:
84
84
  conn: Database connection
85
- entity_links: List of (unit_id, entity_id, confidence) tuples
85
+ entity_links: List of EntityLink objects
86
86
  """
87
87
  if not entity_links:
88
88
  return
@@ -167,10 +167,10 @@ class ExtractedFact(BaseModel):
167
167
  description="'world' = about the user/others (background, experiences). 'assistant' = experience with the assistant."
168
168
  )
169
169
 
170
- # Entities - extracted from 'who' field
170
+ # Entities - extracted from fact content
171
171
  entities: Optional[List[Entity]] = Field(
172
172
  default=None,
173
- description="Named entities from 'who': people names, organizations, places. NOT generic relations."
173
+ description="Named entities, objects, AND abstract concepts from the fact. Include: people names, organizations, places, significant objects (e.g., 'coffee maker', 'car'), AND abstract concepts/themes (e.g., 'friendship', 'career growth', 'loss', 'celebration'). Extract anything that could help link related facts together."
174
174
  )
175
175
  causal_relations: Optional[List[CausalRelation]] = Field(
176
176
  default=None,
@@ -325,7 +325,7 @@ async def _extract_facts_from_chunk(
325
325
  Note: event_date parameter is kept for backward compatibility but not used in prompt.
326
326
  The LLM extracts temporal information from the context string instead.
327
327
  """
328
- agent_context = f"\n- Your name: {agent_name}" if agent_name else ""
328
+ memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
329
329
 
330
330
  # Determine which fact types to extract based on the flag
331
331
  # Note: We use "assistant" in the prompt but convert to "bank" for storage
@@ -339,7 +339,7 @@ async def _extract_facts_from_chunk(
339
339
 
340
340
  {fact_types_instruction}
341
341
 
342
- Context: {context if context else 'none'}{agent_context}
342
+
343
343
 
344
344
  ══════════════════════════════════════════════════════════════════════════
345
345
  FACT FORMAT - ALL FIVE DIMENSIONS REQUIRED - MAXIMUM VERBOSITY
@@ -382,13 +382,42 @@ WRONG output:
382
382
  - where: (missing) ← WRONG - include the location!
383
383
 
384
384
  ══════════════════════════════════════════════════════════════════════════
385
- TEMPORAL HANDLING
385
+ FACT_KIND CLASSIFICATION (CRITICAL FOR TEMPORAL HANDLING)
386
386
  ══════════════════════════════════════════════════════════════════════════
387
387
 
388
- For EVENTS (fact_kind="event"):
389
- - Convert relative dates → absolute WITH DAY OF WEEK: "yesterday" on Saturday March 15 → "Friday, March 14, 2024"
388
+ ⚠️ MUST set fact_kind correctly - this determines whether occurred_start/end are set!
389
+
390
+ fact_kind="event" - USE FOR:
391
+ - Actions that happened at a specific time: "went to", "attended", "visited", "bought", "made"
392
+ - Past events: "yesterday I...", "last week...", "in March 2020..."
393
+ - Future plans with dates: "will go to", "scheduled for"
394
+ - Examples: "I went to a pottery workshop" → event
395
+ "Alice visited Paris in February" → event
396
+ "I bought a new car yesterday" → event
397
+ "The user graduated from MIT in March 2020" → event
398
+
399
+ fact_kind="conversation" - USE FOR:
400
+ - Ongoing states: "works as", "lives in", "is married to"
401
+ - Preferences: "loves", "prefers", "enjoys"
402
+ - Traits/abilities: "speaks fluent French", "knows Python"
403
+ - Examples: "I love Italian food" → conversation
404
+ "Alice works at Google" → conversation
405
+ "I prefer outdoor dining" → conversation
406
+
407
+ ══════════════════════════════════════════════════════════════════════════
408
+ TEMPORAL HANDLING (CRITICAL - USE EVENT DATE AS REFERENCE)
409
+ ══════════════════════════════════════════════════════════════════════════
410
+
411
+ ⚠️ IMPORTANT: Use the "Event Date" provided in the input as your reference point!
412
+ All relative dates ("yesterday", "last week", "recently") must be resolved relative to the Event Date, NOT today's date.
413
+
414
+ For EVENTS (fact_kind="event") - MUST SET BOTH occurred_start AND occurred_end:
415
+ - Convert relative dates → absolute using Event Date as reference
416
+ - If Event Date is "Saturday, March 15, 2020", then "yesterday" = Friday, March 14, 2020
417
+ - Dates mentioned in text (e.g., "in March 2020") should use THAT year, not current year
390
418
  - Always include the day name (Monday, Tuesday, etc.) in the 'when' field
391
- - Set occurred_start/occurred_end to WHEN IT HAPPENED (not when mentioned)
419
+ - Set occurred_start AND occurred_end to WHEN IT HAPPENED (not when mentioned)
420
+ - For single-day/point events: set occurred_end = occurred_start (same timestamp)
392
421
 
393
422
  For CONVERSATIONS (fact_kind="conversation"):
394
423
  - General info, preferences, ongoing states → NO occurred dates
@@ -415,20 +444,32 @@ Example: "I love Italian food and prefer outdoor dining"
415
444
  → Fact 2: what="User prefers outdoor dining", who="user", why="This is a dining preference", entities=["user"]
416
445
 
417
446
  ══════════════════════════════════════════════════════════════════════════
418
- ENTITIES - INCLUDE "user" (CRITICAL)
447
+ ENTITIES - INCLUDE PEOPLE, PLACES, OBJECTS, AND CONCEPTS (CRITICAL)
419
448
  ══════════════════════════════════════════════════════════════════════════
420
449
 
421
- When a fact is ABOUT the user (their preferences, plans, experiences), ALWAYS include "user" in entities!
422
-
423
- CORRECT: entities=["user"] for "User loves coffee"
424
- CORRECT: entities=["user", "Emily"] for "User attended Emily's wedding"
425
- WRONG: entities=[] for facts about the user
450
+ Extract entities that help link related facts together. Include:
451
+ 1. "user" - when the fact is about the user
452
+ 2. People names - Emily, Dr. Smith, etc.
453
+ 3. Organizations/Places - IKEA, Goodwill, New York, etc.
454
+ 4. Specific objects - coffee maker, toaster, car, laptop, kitchen, etc.
455
+ 5. Abstract concepts - themes, values, emotions, or ideas that capture the essence of the fact:
456
+ - "friendship" for facts about friends helping each other, bonding, loyalty
457
+ - "career growth" for facts about promotions, learning new skills, job changes
458
+ - "loss" or "grief" for facts about death, endings, saying goodbye
459
+ - "celebration" for facts about parties, achievements, milestones
460
+ - "trust" or "betrayal" for facts involving those themes
461
+
462
+ ✅ CORRECT: entities=["user", "coffee maker", "Goodwill", "kitchen"] for "User donated their coffee maker to Goodwill"
463
+ ✅ CORRECT: entities=["user", "Emily", "friendship"] for "Emily helped user move to a new apartment"
464
+ ✅ CORRECT: entities=["user", "promotion", "career growth"] for "User got promoted to senior engineer"
465
+ ✅ CORRECT: entities=["user", "grandmother", "loss", "grief"] for "User's grandmother passed away last week"
466
+ ❌ WRONG: entities=["user", "Emily"] only - missing the "friendship" concept that links to other friendship facts!
426
467
 
427
468
  ══════════════════════════════════════════════════════════════════════════
428
469
  EXAMPLES
429
470
  ══════════════════════════════════════════════════════════════════════════
430
471
 
431
- Example 1 - World Facts (Context: June 10, 2024):
472
+ Example 1 - World Facts (Event Date: Tuesday, June 10, 2024):
432
473
  Input: "I'm planning my wedding and want a small outdoor ceremony. I just got back from my college roommate Emily's wedding - she married Sarah at a rooftop garden, it was so romantic!"
433
474
 
434
475
  Output facts:
@@ -438,22 +479,23 @@ Output facts:
438
479
  - who: "user"
439
480
  - why: "User prefers intimate outdoor settings"
440
481
  - fact_type: "world", fact_kind: "conversation"
441
- - entities: ["user"]
482
+ - entities: ["user", "wedding", "outdoor ceremony"]
442
483
 
443
484
  2. User planning wedding
444
485
  - what: "User is planning their own wedding"
445
486
  - who: "user"
446
487
  - why: "Inspired by Emily's ceremony"
447
488
  - fact_type: "world", fact_kind: "conversation"
448
- - entities: ["user"]
489
+ - entities: ["user", "wedding"]
449
490
 
450
- 3. Emily's wedding (THE EVENT)
491
+ 3. Emily's wedding (THE EVENT - note occurred_start AND occurred_end both set)
451
492
  - what: "Emily got married to Sarah at a rooftop garden ceremony in the city"
452
493
  - who: "Emily (user's college roommate), Sarah (Emily's partner)"
453
494
  - why: "User found it romantic and beautiful"
454
495
  - fact_type: "world", fact_kind: "event"
455
- - occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back")
456
- - entities: ["user", "Emily", "Sarah"]
496
+ - occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back" - relative to Event Date June 10, 2024)
497
+ - occurred_end: "2024-06-09T23:59:59Z" (same day - point event)
498
+ - entities: ["user", "Emily", "Sarah", "wedding", "rooftop garden"]
457
499
 
458
500
  Example 2 - Assistant Facts (Context: March 5, 2024):
459
501
  Input: "User: My API is really slow when we have 1000+ concurrent users. What can I do?
@@ -465,7 +507,23 @@ Output fact:
465
507
  - who: "user, assistant"
466
508
  - why: "User asked how to fix slow API performance with 1000+ concurrent users, expected 70-80% reduction in database load"
467
509
  - fact_type: "assistant", fact_kind: "conversation"
468
- - entities: ["user"]
510
+ - entities: ["user", "API", "Redis"]
511
+
512
+ Example 3 - Kitchen Items with Concept Inference (Event Date: Thursday, May 30, 2024):
513
+ Input: "I finally donated my old coffee maker to Goodwill. I upgraded to that new espresso machine last month and the old one was just taking up counter space."
514
+
515
+ Output fact:
516
+ - what: "User donated their old coffee maker to Goodwill after upgrading to a new espresso machine"
517
+ - when: "Thursday, May 30, 2024"
518
+ - who: "user"
519
+ - why: "The old coffee maker was taking up counter space after the upgrade"
520
+ - fact_type: "world", fact_kind: "event"
521
+ - occurred_start: "2024-05-30T00:00:00Z" (uses Event Date year)
522
+ - occurred_end: "2024-05-30T23:59:59Z" (same day - point event)
523
+ - entities: ["user", "coffee maker", "Goodwill", "espresso machine", "kitchen"]
524
+
525
+ Note: "kitchen" is inferred as a concept because coffee makers and espresso machines are kitchen appliances.
526
+ This links the fact to other kitchen-related facts (toaster, faucet, kitchen mat, etc.) via the shared "kitchen" entity.
469
527
 
470
528
  Note how the "why" field captures the FULL STORY: what the user asked AND what outcome was expected!
471
529
 
@@ -496,6 +554,7 @@ WHAT TO EXTRACT vs SKIP
496
554
  # Format event_date with day of week for better temporal reasoning
497
555
  event_date_formatted = event_date.strftime('%A, %B %d, %Y') # e.g., "Monday, June 10, 2024"
498
556
  user_message = f"""Extract facts from the following text chunk.
557
+ {memory_bank_context}
499
558
 
500
559
  Chunk: {chunk_index + 1}/{total_chunks}
501
560
  Event Date: {event_date_formatted} ({event_date.isoformat()})
@@ -520,7 +579,7 @@ Text:
520
579
  response_format=FactExtractionResponse,
521
580
  scope="memory_extract_facts",
522
581
  temperature=0.1,
523
- max_tokens=65000,
582
+ max_completion_tokens=65000,
524
583
  skip_validation=True, # Get raw JSON, we'll validate leniently
525
584
  )
526
585
 
@@ -628,8 +687,11 @@ Text:
628
687
  occurred_end = get_value('occurred_end')
629
688
  if occurred_start:
630
689
  fact_data['occurred_start'] = occurred_start
631
- if occurred_end:
632
- fact_data['occurred_end'] = occurred_end
690
+ # For point events: if occurred_end not set, default to occurred_start
691
+ if occurred_end:
692
+ fact_data['occurred_end'] = occurred_end
693
+ else:
694
+ fact_data['occurred_end'] = occurred_start
633
695
 
634
696
  # Add entities if present (validate as Entity objects)
635
697
  # LLM sometimes returns strings instead of {"text": "..."} format
@@ -118,7 +118,7 @@ async def ensure_bank_exists(conn, bank_id: str) -> None:
118
118
  SET updated_at = NOW()
119
119
  """,
120
120
  bank_id,
121
- '{"openness": 0.5, "conscientiousness": 0.5, "extraversion": 0.5, "agreeableness": 0.5, "neuroticism": 0.5, "bias_strength": 0.5}',
121
+ '{"skepticism": 3, "literalism": 3, "empathy": 3}',
122
122
  ""
123
123
  )
124
124
 
@@ -16,7 +16,7 @@ async def create_temporal_links_batch(
16
16
  conn,
17
17
  bank_id: str,
18
18
  unit_ids: List[str]
19
- ) -> None:
19
+ ) -> int:
20
20
  """
21
21
  Create temporal links between facts.
22
22
 
@@ -26,11 +26,14 @@ async def create_temporal_links_batch(
26
26
  conn: Database connection
27
27
  bank_id: Bank identifier
28
28
  unit_ids: List of unit IDs to create links for
29
+
30
+ Returns:
31
+ Number of temporal links created
29
32
  """
30
33
  if not unit_ids:
31
- return
34
+ return 0
32
35
 
33
- await link_utils.create_temporal_links_batch_per_fact(
36
+ return await link_utils.create_temporal_links_batch_per_fact(
34
37
  conn,
35
38
  bank_id,
36
39
  unit_ids,
@@ -43,7 +46,7 @@ async def create_semantic_links_batch(
43
46
  bank_id: str,
44
47
  unit_ids: List[str],
45
48
  embeddings: List[List[float]]
46
- ) -> None:
49
+ ) -> int:
47
50
  """
48
51
  Create semantic links between facts.
49
52
 
@@ -54,14 +57,17 @@ async def create_semantic_links_batch(
54
57
  bank_id: Bank identifier
55
58
  unit_ids: List of unit IDs to create links for
56
59
  embeddings: List of embedding vectors (same length as unit_ids)
60
+
61
+ Returns:
62
+ Number of semantic links created
57
63
  """
58
64
  if not unit_ids or not embeddings:
59
- return
65
+ return 0
60
66
 
61
67
  if len(unit_ids) != len(embeddings):
62
68
  raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and embeddings ({len(embeddings)})")
63
69
 
64
- await link_utils.create_semantic_links_batch(
70
+ return await link_utils.create_semantic_links_batch(
65
71
  conn,
66
72
  bank_id,
67
73
  unit_ids,