hindsight-api 0.0.21__py3-none-any.whl → 0.1.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -2
- hindsight_api/alembic/README +1 -0
- hindsight_api/alembic/env.py +146 -0
- hindsight_api/alembic/script.py.mako +28 -0
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +274 -0
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +70 -0
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +39 -0
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +48 -0
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +62 -0
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +65 -0
- hindsight_api/api/__init__.py +2 -4
- hindsight_api/api/http.py +112 -164
- hindsight_api/api/mcp.py +2 -1
- hindsight_api/config.py +154 -0
- hindsight_api/engine/__init__.py +7 -2
- hindsight_api/engine/cross_encoder.py +225 -16
- hindsight_api/engine/embeddings.py +198 -19
- hindsight_api/engine/entity_resolver.py +56 -29
- hindsight_api/engine/llm_wrapper.py +147 -106
- hindsight_api/engine/memory_engine.py +337 -192
- hindsight_api/engine/response_models.py +15 -17
- hindsight_api/engine/retain/bank_utils.py +25 -35
- hindsight_api/engine/retain/entity_processing.py +5 -5
- hindsight_api/engine/retain/fact_extraction.py +86 -24
- hindsight_api/engine/retain/fact_storage.py +1 -1
- hindsight_api/engine/retain/link_creation.py +12 -6
- hindsight_api/engine/retain/link_utils.py +50 -56
- hindsight_api/engine/retain/observation_regeneration.py +264 -0
- hindsight_api/engine/retain/orchestrator.py +31 -44
- hindsight_api/engine/retain/types.py +14 -0
- hindsight_api/engine/search/reranking.py +6 -10
- hindsight_api/engine/search/retrieval.py +2 -2
- hindsight_api/engine/search/think_utils.py +59 -30
- hindsight_api/engine/search/tracer.py +1 -1
- hindsight_api/main.py +201 -0
- hindsight_api/migrations.py +61 -39
- hindsight_api/models.py +1 -2
- hindsight_api/pg0.py +17 -36
- hindsight_api/server.py +43 -0
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/METADATA +2 -3
- hindsight_api-0.1.1.dist-info/RECORD +60 -0
- hindsight_api-0.1.1.dist-info/entry_points.txt +2 -0
- hindsight_api/cli.py +0 -128
- hindsight_api/web/__init__.py +0 -12
- hindsight_api/web/server.py +0 -109
- hindsight_api-0.0.21.dist-info/RECORD +0 -50
- hindsight_api-0.0.21.dist-info/entry_points.txt +0 -2
- {hindsight_api-0.0.21.dist-info → hindsight_api-0.1.1.dist-info}/WHEEL +0 -0
|
@@ -10,27 +10,28 @@ from typing import Optional, List, Dict, Any
|
|
|
10
10
|
from pydantic import BaseModel, Field, ConfigDict
|
|
11
11
|
|
|
12
12
|
|
|
13
|
+
# Valid fact types for recall operations (excludes 'observation' which is internal)
|
|
14
|
+
VALID_RECALL_FACT_TYPES = frozenset(["world", "experience", "opinion"])
|
|
15
|
+
|
|
16
|
+
|
|
13
17
|
class DispositionTraits(BaseModel):
|
|
14
18
|
"""
|
|
15
|
-
Disposition traits for a bank
|
|
19
|
+
Disposition traits for a memory bank.
|
|
16
20
|
|
|
17
|
-
All traits are scored
|
|
21
|
+
All traits are scored 1-5 where:
|
|
22
|
+
- skepticism: 1=trusting, 5=skeptical (how much to doubt or question information)
|
|
23
|
+
- literalism: 1=flexible interpretation, 5=literal interpretation (how strictly to interpret information)
|
|
24
|
+
- empathy: 1=detached, 5=empathetic (how much to consider emotional context)
|
|
18
25
|
"""
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
agreeableness: float = Field(description="Agreeableness and cooperation (0.0-1.0)")
|
|
23
|
-
neuroticism: float = Field(description="Emotional sensitivity and neuroticism (0.0-1.0)")
|
|
24
|
-
bias_strength: float = Field(description="How strongly disposition influences thinking (0.0-1.0)")
|
|
26
|
+
skepticism: int = Field(ge=1, le=5, description="How skeptical vs trusting (1=trusting, 5=skeptical)")
|
|
27
|
+
literalism: int = Field(ge=1, le=5, description="How literally to interpret information (1=flexible, 5=literal)")
|
|
28
|
+
empathy: int = Field(ge=1, le=5, description="How much to consider emotional context (1=detached, 5=empathetic)")
|
|
25
29
|
|
|
26
30
|
model_config = ConfigDict(json_schema_extra={
|
|
27
31
|
"example": {
|
|
28
|
-
"
|
|
29
|
-
"
|
|
30
|
-
"
|
|
31
|
-
"agreeableness": 0.7,
|
|
32
|
-
"neuroticism": 0.3,
|
|
33
|
-
"bias_strength": 0.5
|
|
32
|
+
"skepticism": 3,
|
|
33
|
+
"literalism": 3,
|
|
34
|
+
"empathy": 3
|
|
34
35
|
}
|
|
35
36
|
})
|
|
36
37
|
|
|
@@ -71,9 +72,6 @@ class MemoryFact(BaseModel):
|
|
|
71
72
|
metadata: Optional[Dict[str, str]] = Field(None, description="User-defined metadata")
|
|
72
73
|
chunk_id: Optional[str] = Field(None, description="ID of the chunk this fact was extracted from (format: bank_id_document_id_chunk_index)")
|
|
73
74
|
|
|
74
|
-
# Internal metrics (used by system but may not be exposed in API)
|
|
75
|
-
activation: Optional[float] = Field(None, description="Internal activation score")
|
|
76
|
-
|
|
77
75
|
|
|
78
76
|
class ChunkInfo(BaseModel):
|
|
79
77
|
"""Information about a chunk."""
|
|
@@ -13,12 +13,9 @@ from ..response_models import DispositionTraits
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
15
15
|
DEFAULT_DISPOSITION = {
|
|
16
|
-
"
|
|
17
|
-
"
|
|
18
|
-
"
|
|
19
|
-
"agreeableness": 0.5,
|
|
20
|
-
"neuroticism": 0.5,
|
|
21
|
-
"bias_strength": 0.5,
|
|
16
|
+
"skepticism": 3,
|
|
17
|
+
"literalism": 3,
|
|
18
|
+
"empathy": 3,
|
|
22
19
|
}
|
|
23
20
|
|
|
24
21
|
|
|
@@ -32,7 +29,7 @@ class BankProfile(TypedDict):
|
|
|
32
29
|
class BackgroundMergeResponse(BaseModel):
|
|
33
30
|
"""LLM response for background merge with disposition inference."""
|
|
34
31
|
background: str = Field(description="Merged background in first person perspective")
|
|
35
|
-
disposition: DispositionTraits = Field(description="Inferred
|
|
32
|
+
disposition: DispositionTraits = Field(description="Inferred disposition traits (skepticism, literalism, empathy)")
|
|
36
33
|
|
|
37
34
|
|
|
38
35
|
async def get_bank_profile(pool, bank_id: str) -> BankProfile:
|
|
@@ -92,7 +89,7 @@ async def get_bank_profile(pool, bank_id: str) -> BankProfile:
|
|
|
92
89
|
async def update_bank_disposition(
|
|
93
90
|
pool,
|
|
94
91
|
bank_id: str,
|
|
95
|
-
disposition: Dict[str,
|
|
92
|
+
disposition: Dict[str, int]
|
|
96
93
|
) -> None:
|
|
97
94
|
"""
|
|
98
95
|
Update bank disposition traits.
|
|
@@ -100,7 +97,7 @@ async def update_bank_disposition(
|
|
|
100
97
|
Args:
|
|
101
98
|
pool: Database connection pool
|
|
102
99
|
bank_id: bank IDentifier
|
|
103
|
-
disposition: Dict with
|
|
100
|
+
disposition: Dict with skepticism, literalism, empathy (all 1-5)
|
|
104
101
|
"""
|
|
105
102
|
# Ensure bank exists first
|
|
106
103
|
await get_bank_profile(pool, bank_id)
|
|
@@ -223,13 +220,10 @@ Instructions:
|
|
|
223
220
|
3. Keep additions that don't conflict
|
|
224
221
|
4. Output in FIRST PERSON ("I") perspective
|
|
225
222
|
5. Be concise - keep merged background under 500 characters
|
|
226
|
-
6. Infer
|
|
227
|
-
-
|
|
228
|
-
-
|
|
229
|
-
-
|
|
230
|
-
- Agreeableness: 0.0-1.0 (cooperation, empathy, consideration)
|
|
231
|
-
- Neuroticism: 0.0-1.0 (emotional sensitivity, anxiety, stress response)
|
|
232
|
-
- Bias Strength: 0.0-1.0 (how much disposition influences opinions)
|
|
223
|
+
6. Infer disposition traits from the merged background (each 1-5 integer):
|
|
224
|
+
- Skepticism: 1-5 (1=trusting, takes things at face value; 5=skeptical, questions everything)
|
|
225
|
+
- Literalism: 1-5 (1=flexible interpretation, reads between lines; 5=literal, exact interpretation)
|
|
226
|
+
- Empathy: 1-5 (1=detached, focuses on facts; 5=empathetic, considers emotional context)
|
|
233
227
|
|
|
234
228
|
CRITICAL: You MUST respond with ONLY a valid JSON object. No markdown, no code blocks, no explanations. Just the JSON.
|
|
235
229
|
|
|
@@ -237,22 +231,19 @@ Format:
|
|
|
237
231
|
{{
|
|
238
232
|
"background": "the merged background text in first person",
|
|
239
233
|
"disposition": {{
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"
|
|
243
|
-
"agreeableness": 0.8,
|
|
244
|
-
"neuroticism": 0.4,
|
|
245
|
-
"bias_strength": 0.6
|
|
234
|
+
"skepticism": 3,
|
|
235
|
+
"literalism": 3,
|
|
236
|
+
"empathy": 3
|
|
246
237
|
}}
|
|
247
238
|
}}
|
|
248
239
|
|
|
249
240
|
Trait inference examples:
|
|
250
|
-
- "
|
|
251
|
-
- "
|
|
252
|
-
- "
|
|
253
|
-
- "
|
|
254
|
-
- "
|
|
255
|
-
- "
|
|
241
|
+
- "I'm a lawyer" → skepticism: 4, literalism: 5, empathy: 2
|
|
242
|
+
- "I'm a therapist" → skepticism: 2, literalism: 2, empathy: 5
|
|
243
|
+
- "I'm an engineer" → skepticism: 3, literalism: 4, empathy: 3
|
|
244
|
+
- "I've been burned before by trusting people" → skepticism: 5, literalism: 3, empathy: 3
|
|
245
|
+
- "I try to understand what people really mean" → skepticism: 3, literalism: 2, empathy: 4
|
|
246
|
+
- "I take contracts very seriously" → skepticism: 4, literalism: 5, empathy: 2"""
|
|
256
247
|
else:
|
|
257
248
|
prompt = f"""You are helping maintain a memory bank's background/profile.
|
|
258
249
|
|
|
@@ -282,7 +273,7 @@ Merged background:"""
|
|
|
282
273
|
response_format=BackgroundMergeResponse,
|
|
283
274
|
scope="bank_background",
|
|
284
275
|
temperature=0.3,
|
|
285
|
-
|
|
276
|
+
max_completion_tokens=8192
|
|
286
277
|
)
|
|
287
278
|
logger.info(f"Successfully got structured response: background={parsed.background[:100]}")
|
|
288
279
|
|
|
@@ -300,7 +291,7 @@ Merged background:"""
|
|
|
300
291
|
messages=messages,
|
|
301
292
|
scope="bank_background",
|
|
302
293
|
temperature=0.3,
|
|
303
|
-
|
|
294
|
+
max_completion_tokens=8192
|
|
304
295
|
)
|
|
305
296
|
|
|
306
297
|
logger.info(f"LLM response for background merge (first 500 chars): {content[:500]}")
|
|
@@ -349,13 +340,12 @@ Merged background:"""
|
|
|
349
340
|
|
|
350
341
|
# Validate disposition values
|
|
351
342
|
disposition = result.get("disposition", {})
|
|
352
|
-
for key in ["
|
|
353
|
-
"agreeableness", "neuroticism", "bias_strength"]:
|
|
343
|
+
for key in ["skepticism", "literalism", "empathy"]:
|
|
354
344
|
if key not in disposition:
|
|
355
|
-
disposition[key] =
|
|
345
|
+
disposition[key] = 3 # Default to neutral
|
|
356
346
|
else:
|
|
357
|
-
# Clamp to [
|
|
358
|
-
disposition[key] = max(
|
|
347
|
+
# Clamp to [1, 5] and convert to int
|
|
348
|
+
disposition[key] = max(1, min(5, int(disposition[key])))
|
|
359
349
|
|
|
360
350
|
result["disposition"] = disposition
|
|
361
351
|
|
|
@@ -7,7 +7,7 @@ import logging
|
|
|
7
7
|
from typing import List, Tuple, Dict, Any
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
|
-
from .types import ProcessedFact, EntityRef
|
|
10
|
+
from .types import ProcessedFact, EntityRef, EntityLink
|
|
11
11
|
from . import link_utils
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
@@ -20,7 +20,7 @@ async def process_entities_batch(
|
|
|
20
20
|
unit_ids: List[str],
|
|
21
21
|
facts: List[ProcessedFact],
|
|
22
22
|
log_buffer: List[str] = None
|
|
23
|
-
) -> List[
|
|
23
|
+
) -> List[EntityLink]:
|
|
24
24
|
"""
|
|
25
25
|
Process entities for all facts and create entity links.
|
|
26
26
|
|
|
@@ -39,7 +39,7 @@ async def process_entities_batch(
|
|
|
39
39
|
log_buffer: Optional buffer for detailed logging
|
|
40
40
|
|
|
41
41
|
Returns:
|
|
42
|
-
List of
|
|
42
|
+
List of EntityLink objects for batch insertion
|
|
43
43
|
"""
|
|
44
44
|
if not unit_ids or not facts:
|
|
45
45
|
return []
|
|
@@ -75,14 +75,14 @@ async def process_entities_batch(
|
|
|
75
75
|
|
|
76
76
|
async def insert_entity_links_batch(
|
|
77
77
|
conn,
|
|
78
|
-
entity_links: List[
|
|
78
|
+
entity_links: List[EntityLink]
|
|
79
79
|
) -> None:
|
|
80
80
|
"""
|
|
81
81
|
Insert entity links in batch.
|
|
82
82
|
|
|
83
83
|
Args:
|
|
84
84
|
conn: Database connection
|
|
85
|
-
entity_links: List of
|
|
85
|
+
entity_links: List of EntityLink objects
|
|
86
86
|
"""
|
|
87
87
|
if not entity_links:
|
|
88
88
|
return
|
|
@@ -167,10 +167,10 @@ class ExtractedFact(BaseModel):
|
|
|
167
167
|
description="'world' = about the user/others (background, experiences). 'assistant' = experience with the assistant."
|
|
168
168
|
)
|
|
169
169
|
|
|
170
|
-
# Entities - extracted from
|
|
170
|
+
# Entities - extracted from fact content
|
|
171
171
|
entities: Optional[List[Entity]] = Field(
|
|
172
172
|
default=None,
|
|
173
|
-
description="Named entities from
|
|
173
|
+
description="Named entities, objects, AND abstract concepts from the fact. Include: people names, organizations, places, significant objects (e.g., 'coffee maker', 'car'), AND abstract concepts/themes (e.g., 'friendship', 'career growth', 'loss', 'celebration'). Extract anything that could help link related facts together."
|
|
174
174
|
)
|
|
175
175
|
causal_relations: Optional[List[CausalRelation]] = Field(
|
|
176
176
|
default=None,
|
|
@@ -325,7 +325,7 @@ async def _extract_facts_from_chunk(
|
|
|
325
325
|
Note: event_date parameter is kept for backward compatibility but not used in prompt.
|
|
326
326
|
The LLM extracts temporal information from the context string instead.
|
|
327
327
|
"""
|
|
328
|
-
|
|
328
|
+
memory_bank_context = f"\n- Your name: {agent_name}" if agent_name and extract_opinions else ""
|
|
329
329
|
|
|
330
330
|
# Determine which fact types to extract based on the flag
|
|
331
331
|
# Note: We use "assistant" in the prompt but convert to "bank" for storage
|
|
@@ -339,7 +339,7 @@ async def _extract_facts_from_chunk(
|
|
|
339
339
|
|
|
340
340
|
{fact_types_instruction}
|
|
341
341
|
|
|
342
|
-
|
|
342
|
+
|
|
343
343
|
|
|
344
344
|
══════════════════════════════════════════════════════════════════════════
|
|
345
345
|
FACT FORMAT - ALL FIVE DIMENSIONS REQUIRED - MAXIMUM VERBOSITY
|
|
@@ -382,13 +382,42 @@ WRONG output:
|
|
|
382
382
|
- where: (missing) ← WRONG - include the location!
|
|
383
383
|
|
|
384
384
|
══════════════════════════════════════════════════════════════════════════
|
|
385
|
-
TEMPORAL HANDLING
|
|
385
|
+
FACT_KIND CLASSIFICATION (CRITICAL FOR TEMPORAL HANDLING)
|
|
386
386
|
══════════════════════════════════════════════════════════════════════════
|
|
387
387
|
|
|
388
|
-
|
|
389
|
-
|
|
388
|
+
⚠️ MUST set fact_kind correctly - this determines whether occurred_start/end are set!
|
|
389
|
+
|
|
390
|
+
fact_kind="event" - USE FOR:
|
|
391
|
+
- Actions that happened at a specific time: "went to", "attended", "visited", "bought", "made"
|
|
392
|
+
- Past events: "yesterday I...", "last week...", "in March 2020..."
|
|
393
|
+
- Future plans with dates: "will go to", "scheduled for"
|
|
394
|
+
- Examples: "I went to a pottery workshop" → event
|
|
395
|
+
"Alice visited Paris in February" → event
|
|
396
|
+
"I bought a new car yesterday" → event
|
|
397
|
+
"The user graduated from MIT in March 2020" → event
|
|
398
|
+
|
|
399
|
+
fact_kind="conversation" - USE FOR:
|
|
400
|
+
- Ongoing states: "works as", "lives in", "is married to"
|
|
401
|
+
- Preferences: "loves", "prefers", "enjoys"
|
|
402
|
+
- Traits/abilities: "speaks fluent French", "knows Python"
|
|
403
|
+
- Examples: "I love Italian food" → conversation
|
|
404
|
+
"Alice works at Google" → conversation
|
|
405
|
+
"I prefer outdoor dining" → conversation
|
|
406
|
+
|
|
407
|
+
══════════════════════════════════════════════════════════════════════════
|
|
408
|
+
TEMPORAL HANDLING (CRITICAL - USE EVENT DATE AS REFERENCE)
|
|
409
|
+
══════════════════════════════════════════════════════════════════════════
|
|
410
|
+
|
|
411
|
+
⚠️ IMPORTANT: Use the "Event Date" provided in the input as your reference point!
|
|
412
|
+
All relative dates ("yesterday", "last week", "recently") must be resolved relative to the Event Date, NOT today's date.
|
|
413
|
+
|
|
414
|
+
For EVENTS (fact_kind="event") - MUST SET BOTH occurred_start AND occurred_end:
|
|
415
|
+
- Convert relative dates → absolute using Event Date as reference
|
|
416
|
+
- If Event Date is "Saturday, March 15, 2020", then "yesterday" = Friday, March 14, 2020
|
|
417
|
+
- Dates mentioned in text (e.g., "in March 2020") should use THAT year, not current year
|
|
390
418
|
- Always include the day name (Monday, Tuesday, etc.) in the 'when' field
|
|
391
|
-
- Set occurred_start
|
|
419
|
+
- Set occurred_start AND occurred_end to WHEN IT HAPPENED (not when mentioned)
|
|
420
|
+
- For single-day/point events: set occurred_end = occurred_start (same timestamp)
|
|
392
421
|
|
|
393
422
|
For CONVERSATIONS (fact_kind="conversation"):
|
|
394
423
|
- General info, preferences, ongoing states → NO occurred dates
|
|
@@ -415,20 +444,32 @@ Example: "I love Italian food and prefer outdoor dining"
|
|
|
415
444
|
→ Fact 2: what="User prefers outdoor dining", who="user", why="This is a dining preference", entities=["user"]
|
|
416
445
|
|
|
417
446
|
══════════════════════════════════════════════════════════════════════════
|
|
418
|
-
ENTITIES - INCLUDE
|
|
447
|
+
ENTITIES - INCLUDE PEOPLE, PLACES, OBJECTS, AND CONCEPTS (CRITICAL)
|
|
419
448
|
══════════════════════════════════════════════════════════════════════════
|
|
420
449
|
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
450
|
+
Extract entities that help link related facts together. Include:
|
|
451
|
+
1. "user" - when the fact is about the user
|
|
452
|
+
2. People names - Emily, Dr. Smith, etc.
|
|
453
|
+
3. Organizations/Places - IKEA, Goodwill, New York, etc.
|
|
454
|
+
4. Specific objects - coffee maker, toaster, car, laptop, kitchen, etc.
|
|
455
|
+
5. Abstract concepts - themes, values, emotions, or ideas that capture the essence of the fact:
|
|
456
|
+
- "friendship" for facts about friends helping each other, bonding, loyalty
|
|
457
|
+
- "career growth" for facts about promotions, learning new skills, job changes
|
|
458
|
+
- "loss" or "grief" for facts about death, endings, saying goodbye
|
|
459
|
+
- "celebration" for facts about parties, achievements, milestones
|
|
460
|
+
- "trust" or "betrayal" for facts involving those themes
|
|
461
|
+
|
|
462
|
+
✅ CORRECT: entities=["user", "coffee maker", "Goodwill", "kitchen"] for "User donated their coffee maker to Goodwill"
|
|
463
|
+
✅ CORRECT: entities=["user", "Emily", "friendship"] for "Emily helped user move to a new apartment"
|
|
464
|
+
✅ CORRECT: entities=["user", "promotion", "career growth"] for "User got promoted to senior engineer"
|
|
465
|
+
✅ CORRECT: entities=["user", "grandmother", "loss", "grief"] for "User's grandmother passed away last week"
|
|
466
|
+
❌ WRONG: entities=["user", "Emily"] only - missing the "friendship" concept that links to other friendship facts!
|
|
426
467
|
|
|
427
468
|
══════════════════════════════════════════════════════════════════════════
|
|
428
469
|
EXAMPLES
|
|
429
470
|
══════════════════════════════════════════════════════════════════════════
|
|
430
471
|
|
|
431
|
-
Example 1 - World Facts (
|
|
472
|
+
Example 1 - World Facts (Event Date: Tuesday, June 10, 2024):
|
|
432
473
|
Input: "I'm planning my wedding and want a small outdoor ceremony. I just got back from my college roommate Emily's wedding - she married Sarah at a rooftop garden, it was so romantic!"
|
|
433
474
|
|
|
434
475
|
Output facts:
|
|
@@ -438,22 +479,23 @@ Output facts:
|
|
|
438
479
|
- who: "user"
|
|
439
480
|
- why: "User prefers intimate outdoor settings"
|
|
440
481
|
- fact_type: "world", fact_kind: "conversation"
|
|
441
|
-
- entities: ["user"]
|
|
482
|
+
- entities: ["user", "wedding", "outdoor ceremony"]
|
|
442
483
|
|
|
443
484
|
2. User planning wedding
|
|
444
485
|
- what: "User is planning their own wedding"
|
|
445
486
|
- who: "user"
|
|
446
487
|
- why: "Inspired by Emily's ceremony"
|
|
447
488
|
- fact_type: "world", fact_kind: "conversation"
|
|
448
|
-
- entities: ["user"]
|
|
489
|
+
- entities: ["user", "wedding"]
|
|
449
490
|
|
|
450
|
-
3. Emily's wedding (THE EVENT)
|
|
491
|
+
3. Emily's wedding (THE EVENT - note occurred_start AND occurred_end both set)
|
|
451
492
|
- what: "Emily got married to Sarah at a rooftop garden ceremony in the city"
|
|
452
493
|
- who: "Emily (user's college roommate), Sarah (Emily's partner)"
|
|
453
494
|
- why: "User found it romantic and beautiful"
|
|
454
495
|
- fact_type: "world", fact_kind: "event"
|
|
455
|
-
- occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back")
|
|
456
|
-
-
|
|
496
|
+
- occurred_start: "2024-06-09T00:00:00Z" (recently, user "just got back" - relative to Event Date June 10, 2024)
|
|
497
|
+
- occurred_end: "2024-06-09T23:59:59Z" (same day - point event)
|
|
498
|
+
- entities: ["user", "Emily", "Sarah", "wedding", "rooftop garden"]
|
|
457
499
|
|
|
458
500
|
Example 2 - Assistant Facts (Context: March 5, 2024):
|
|
459
501
|
Input: "User: My API is really slow when we have 1000+ concurrent users. What can I do?
|
|
@@ -465,7 +507,23 @@ Output fact:
|
|
|
465
507
|
- who: "user, assistant"
|
|
466
508
|
- why: "User asked how to fix slow API performance with 1000+ concurrent users, expected 70-80% reduction in database load"
|
|
467
509
|
- fact_type: "assistant", fact_kind: "conversation"
|
|
468
|
-
- entities: ["user"]
|
|
510
|
+
- entities: ["user", "API", "Redis"]
|
|
511
|
+
|
|
512
|
+
Example 3 - Kitchen Items with Concept Inference (Event Date: Thursday, May 30, 2024):
|
|
513
|
+
Input: "I finally donated my old coffee maker to Goodwill. I upgraded to that new espresso machine last month and the old one was just taking up counter space."
|
|
514
|
+
|
|
515
|
+
Output fact:
|
|
516
|
+
- what: "User donated their old coffee maker to Goodwill after upgrading to a new espresso machine"
|
|
517
|
+
- when: "Thursday, May 30, 2024"
|
|
518
|
+
- who: "user"
|
|
519
|
+
- why: "The old coffee maker was taking up counter space after the upgrade"
|
|
520
|
+
- fact_type: "world", fact_kind: "event"
|
|
521
|
+
- occurred_start: "2024-05-30T00:00:00Z" (uses Event Date year)
|
|
522
|
+
- occurred_end: "2024-05-30T23:59:59Z" (same day - point event)
|
|
523
|
+
- entities: ["user", "coffee maker", "Goodwill", "espresso machine", "kitchen"]
|
|
524
|
+
|
|
525
|
+
Note: "kitchen" is inferred as a concept because coffee makers and espresso machines are kitchen appliances.
|
|
526
|
+
This links the fact to other kitchen-related facts (toaster, faucet, kitchen mat, etc.) via the shared "kitchen" entity.
|
|
469
527
|
|
|
470
528
|
Note how the "why" field captures the FULL STORY: what the user asked AND what outcome was expected!
|
|
471
529
|
|
|
@@ -496,6 +554,7 @@ WHAT TO EXTRACT vs SKIP
|
|
|
496
554
|
# Format event_date with day of week for better temporal reasoning
|
|
497
555
|
event_date_formatted = event_date.strftime('%A, %B %d, %Y') # e.g., "Monday, June 10, 2024"
|
|
498
556
|
user_message = f"""Extract facts from the following text chunk.
|
|
557
|
+
{memory_bank_context}
|
|
499
558
|
|
|
500
559
|
Chunk: {chunk_index + 1}/{total_chunks}
|
|
501
560
|
Event Date: {event_date_formatted} ({event_date.isoformat()})
|
|
@@ -520,7 +579,7 @@ Text:
|
|
|
520
579
|
response_format=FactExtractionResponse,
|
|
521
580
|
scope="memory_extract_facts",
|
|
522
581
|
temperature=0.1,
|
|
523
|
-
|
|
582
|
+
max_completion_tokens=65000,
|
|
524
583
|
skip_validation=True, # Get raw JSON, we'll validate leniently
|
|
525
584
|
)
|
|
526
585
|
|
|
@@ -628,8 +687,11 @@ Text:
|
|
|
628
687
|
occurred_end = get_value('occurred_end')
|
|
629
688
|
if occurred_start:
|
|
630
689
|
fact_data['occurred_start'] = occurred_start
|
|
631
|
-
|
|
632
|
-
|
|
690
|
+
# For point events: if occurred_end not set, default to occurred_start
|
|
691
|
+
if occurred_end:
|
|
692
|
+
fact_data['occurred_end'] = occurred_end
|
|
693
|
+
else:
|
|
694
|
+
fact_data['occurred_end'] = occurred_start
|
|
633
695
|
|
|
634
696
|
# Add entities if present (validate as Entity objects)
|
|
635
697
|
# LLM sometimes returns strings instead of {"text": "..."} format
|
|
@@ -118,7 +118,7 @@ async def ensure_bank_exists(conn, bank_id: str) -> None:
|
|
|
118
118
|
SET updated_at = NOW()
|
|
119
119
|
""",
|
|
120
120
|
bank_id,
|
|
121
|
-
'{"
|
|
121
|
+
'{"skepticism": 3, "literalism": 3, "empathy": 3}',
|
|
122
122
|
""
|
|
123
123
|
)
|
|
124
124
|
|
|
@@ -16,7 +16,7 @@ async def create_temporal_links_batch(
|
|
|
16
16
|
conn,
|
|
17
17
|
bank_id: str,
|
|
18
18
|
unit_ids: List[str]
|
|
19
|
-
) ->
|
|
19
|
+
) -> int:
|
|
20
20
|
"""
|
|
21
21
|
Create temporal links between facts.
|
|
22
22
|
|
|
@@ -26,11 +26,14 @@ async def create_temporal_links_batch(
|
|
|
26
26
|
conn: Database connection
|
|
27
27
|
bank_id: Bank identifier
|
|
28
28
|
unit_ids: List of unit IDs to create links for
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
Number of temporal links created
|
|
29
32
|
"""
|
|
30
33
|
if not unit_ids:
|
|
31
|
-
return
|
|
34
|
+
return 0
|
|
32
35
|
|
|
33
|
-
await link_utils.create_temporal_links_batch_per_fact(
|
|
36
|
+
return await link_utils.create_temporal_links_batch_per_fact(
|
|
34
37
|
conn,
|
|
35
38
|
bank_id,
|
|
36
39
|
unit_ids,
|
|
@@ -43,7 +46,7 @@ async def create_semantic_links_batch(
|
|
|
43
46
|
bank_id: str,
|
|
44
47
|
unit_ids: List[str],
|
|
45
48
|
embeddings: List[List[float]]
|
|
46
|
-
) ->
|
|
49
|
+
) -> int:
|
|
47
50
|
"""
|
|
48
51
|
Create semantic links between facts.
|
|
49
52
|
|
|
@@ -54,14 +57,17 @@ async def create_semantic_links_batch(
|
|
|
54
57
|
bank_id: Bank identifier
|
|
55
58
|
unit_ids: List of unit IDs to create links for
|
|
56
59
|
embeddings: List of embedding vectors (same length as unit_ids)
|
|
60
|
+
|
|
61
|
+
Returns:
|
|
62
|
+
Number of semantic links created
|
|
57
63
|
"""
|
|
58
64
|
if not unit_ids or not embeddings:
|
|
59
|
-
return
|
|
65
|
+
return 0
|
|
60
66
|
|
|
61
67
|
if len(unit_ids) != len(embeddings):
|
|
62
68
|
raise ValueError(f"Mismatch between unit_ids ({len(unit_ids)}) and embeddings ({len(embeddings)})")
|
|
63
69
|
|
|
64
|
-
await link_utils.create_semantic_links_batch(
|
|
70
|
+
return await link_utils.create_semantic_links_batch(
|
|
65
71
|
conn,
|
|
66
72
|
bank_id,
|
|
67
73
|
unit_ids,
|