hindsight-api 0.1.5__py3-none-any.whl → 0.1.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/__init__.py +10 -9
- hindsight_api/alembic/env.py +5 -8
- hindsight_api/alembic/versions/5a366d414dce_initial_schema.py +266 -180
- hindsight_api/alembic/versions/b7c4d8e9f1a2_add_chunks_table.py +32 -32
- hindsight_api/alembic/versions/c8e5f2a3b4d1_add_retain_params_to_documents.py +11 -11
- hindsight_api/alembic/versions/d9f6a3b4c5e2_rename_bank_to_interactions.py +7 -12
- hindsight_api/alembic/versions/e0a1b2c3d4e5_disposition_to_3_traits.py +23 -15
- hindsight_api/alembic/versions/rename_personality_to_disposition.py +30 -21
- hindsight_api/api/__init__.py +10 -10
- hindsight_api/api/http.py +575 -593
- hindsight_api/api/mcp.py +30 -28
- hindsight_api/banner.py +13 -6
- hindsight_api/config.py +9 -13
- hindsight_api/engine/__init__.py +9 -9
- hindsight_api/engine/cross_encoder.py +22 -21
- hindsight_api/engine/db_utils.py +5 -4
- hindsight_api/engine/embeddings.py +22 -21
- hindsight_api/engine/entity_resolver.py +81 -75
- hindsight_api/engine/llm_wrapper.py +61 -79
- hindsight_api/engine/memory_engine.py +603 -625
- hindsight_api/engine/query_analyzer.py +100 -97
- hindsight_api/engine/response_models.py +105 -106
- hindsight_api/engine/retain/__init__.py +9 -16
- hindsight_api/engine/retain/bank_utils.py +34 -58
- hindsight_api/engine/retain/chunk_storage.py +4 -12
- hindsight_api/engine/retain/deduplication.py +9 -28
- hindsight_api/engine/retain/embedding_processing.py +4 -11
- hindsight_api/engine/retain/embedding_utils.py +3 -4
- hindsight_api/engine/retain/entity_processing.py +7 -17
- hindsight_api/engine/retain/fact_extraction.py +155 -165
- hindsight_api/engine/retain/fact_storage.py +11 -23
- hindsight_api/engine/retain/link_creation.py +11 -39
- hindsight_api/engine/retain/link_utils.py +166 -95
- hindsight_api/engine/retain/observation_regeneration.py +39 -52
- hindsight_api/engine/retain/orchestrator.py +72 -62
- hindsight_api/engine/retain/types.py +49 -43
- hindsight_api/engine/search/__init__.py +5 -5
- hindsight_api/engine/search/fusion.py +6 -15
- hindsight_api/engine/search/graph_retrieval.py +22 -23
- hindsight_api/engine/search/mpfp_retrieval.py +76 -92
- hindsight_api/engine/search/observation_utils.py +9 -16
- hindsight_api/engine/search/reranking.py +4 -7
- hindsight_api/engine/search/retrieval.py +87 -66
- hindsight_api/engine/search/scoring.py +5 -7
- hindsight_api/engine/search/temporal_extraction.py +8 -11
- hindsight_api/engine/search/think_utils.py +115 -39
- hindsight_api/engine/search/trace.py +68 -39
- hindsight_api/engine/search/tracer.py +44 -35
- hindsight_api/engine/search/types.py +20 -17
- hindsight_api/engine/task_backend.py +21 -26
- hindsight_api/engine/utils.py +25 -10
- hindsight_api/main.py +21 -40
- hindsight_api/mcp_local.py +190 -0
- hindsight_api/metrics.py +44 -30
- hindsight_api/migrations.py +10 -8
- hindsight_api/models.py +60 -72
- hindsight_api/pg0.py +22 -23
- hindsight_api/server.py +3 -6
- hindsight_api-0.1.7.dist-info/METADATA +178 -0
- hindsight_api-0.1.7.dist-info/RECORD +64 -0
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/entry_points.txt +1 -0
- hindsight_api-0.1.5.dist-info/METADATA +0 -42
- hindsight_api-0.1.5.dist-info/RECORD +0 -63
- {hindsight_api-0.1.5.dist-info → hindsight_api-0.1.7.dist-info}/WHEEL +0 -0
|
@@ -2,41 +2,35 @@
|
|
|
2
2
|
Think operation utilities for formulating answers based on agent and world facts.
|
|
3
3
|
"""
|
|
4
4
|
|
|
5
|
-
import asyncio
|
|
6
5
|
import logging
|
|
7
6
|
import re
|
|
8
|
-
from datetime import datetime
|
|
9
|
-
|
|
7
|
+
from datetime import datetime
|
|
8
|
+
|
|
10
9
|
from pydantic import BaseModel, Field
|
|
11
10
|
|
|
12
|
-
from ..response_models import
|
|
11
|
+
from ..response_models import DispositionTraits, MemoryFact
|
|
13
12
|
|
|
14
13
|
logger = logging.getLogger(__name__)
|
|
15
14
|
|
|
16
15
|
|
|
17
16
|
class Opinion(BaseModel):
|
|
18
17
|
"""An opinion formed by the bank."""
|
|
18
|
+
|
|
19
19
|
opinion: str = Field(description="The opinion or perspective with reasoning included")
|
|
20
20
|
confidence: float = Field(description="Confidence score for this opinion (0.0 to 1.0, where 1.0 is very confident)")
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
class OpinionExtractionResponse(BaseModel):
|
|
24
24
|
"""Response containing extracted opinions."""
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
description="List of opinions formed with their supporting reasons and confidence scores"
|
|
25
|
+
|
|
26
|
+
opinions: list[Opinion] = Field(
|
|
27
|
+
default_factory=list, description="List of opinions formed with their supporting reasons and confidence scores"
|
|
28
28
|
)
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
def describe_trait_level(value: int) -> str:
|
|
32
32
|
"""Convert trait value (1-5) to descriptive text."""
|
|
33
|
-
levels = {
|
|
34
|
-
1: "very low",
|
|
35
|
-
2: "low",
|
|
36
|
-
3: "moderate",
|
|
37
|
-
4: "high",
|
|
38
|
-
5: "very high"
|
|
39
|
-
}
|
|
33
|
+
levels = {1: "very low", 2: "low", 3: "moderate", 4: "high", 5: "very high"}
|
|
40
34
|
return levels.get(value, "moderate")
|
|
41
35
|
|
|
42
36
|
|
|
@@ -47,7 +41,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
47
41
|
2: "You tend to trust information but may question obvious inconsistencies.",
|
|
48
42
|
3: "You have a balanced approach to information, neither too trusting nor too skeptical.",
|
|
49
43
|
4: "You are somewhat skeptical and often question the reliability of information.",
|
|
50
|
-
5: "You are highly skeptical and critically examine all information for accuracy and hidden motives."
|
|
44
|
+
5: "You are highly skeptical and critically examine all information for accuracy and hidden motives.",
|
|
51
45
|
}
|
|
52
46
|
|
|
53
47
|
literalism_desc = {
|
|
@@ -55,7 +49,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
55
49
|
2: "You tend to consider context and implied meaning alongside literal statements.",
|
|
56
50
|
3: "You balance literal interpretation with contextual understanding.",
|
|
57
51
|
4: "You prefer to interpret information more literally and precisely.",
|
|
58
|
-
5: "You interpret information very literally and focus on exact wording and commitments."
|
|
52
|
+
5: "You interpret information very literally and focus on exact wording and commitments.",
|
|
59
53
|
}
|
|
60
54
|
|
|
61
55
|
empathy_desc = {
|
|
@@ -63,7 +57,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
63
57
|
2: "You consider facts first but acknowledge emotional factors exist.",
|
|
64
58
|
3: "You balance factual analysis with emotional understanding.",
|
|
65
59
|
4: "You give significant weight to emotional context and human factors.",
|
|
66
|
-
5: "You strongly consider the emotional state and circumstances of others when forming memories."
|
|
60
|
+
5: "You strongly consider the emotional state and circumstances of others when forming memories.",
|
|
67
61
|
}
|
|
68
62
|
|
|
69
63
|
return f"""Your disposition traits:
|
|
@@ -72,7 +66,7 @@ def build_disposition_description(disposition: DispositionTraits) -> str:
|
|
|
72
66
|
- Empathy ({describe_trait_level(disposition.empathy)}): {empathy_desc.get(disposition.empathy, empathy_desc[3])}"""
|
|
73
67
|
|
|
74
68
|
|
|
75
|
-
def format_facts_for_prompt(facts:
|
|
69
|
+
def format_facts_for_prompt(facts: list[MemoryFact]) -> str:
|
|
76
70
|
"""Format facts as JSON for LLM prompt."""
|
|
77
71
|
import json
|
|
78
72
|
|
|
@@ -80,9 +74,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
|
80
74
|
return "[]"
|
|
81
75
|
formatted = []
|
|
82
76
|
for fact in facts:
|
|
83
|
-
fact_obj = {
|
|
84
|
-
"text": fact.text
|
|
85
|
-
}
|
|
77
|
+
fact_obj = {"text": fact.text}
|
|
86
78
|
|
|
87
79
|
# Add context if available
|
|
88
80
|
if fact.context:
|
|
@@ -94,7 +86,7 @@ def format_facts_for_prompt(facts: List[MemoryFact]) -> str:
|
|
|
94
86
|
if isinstance(occurred_start, str):
|
|
95
87
|
fact_obj["occurred_start"] = occurred_start
|
|
96
88
|
elif isinstance(occurred_start, datetime):
|
|
97
|
-
fact_obj["occurred_start"] = occurred_start.strftime(
|
|
89
|
+
fact_obj["occurred_start"] = occurred_start.strftime("%Y-%m-%d %H:%M:%S")
|
|
98
90
|
|
|
99
91
|
formatted.append(fact_obj)
|
|
100
92
|
|
|
@@ -176,16 +168,14 @@ def get_system_message(disposition: DispositionTraits) -> str:
|
|
|
176
168
|
elif disposition.empathy <= 2:
|
|
177
169
|
instructions.append("Focus on facts and outcomes rather than emotional context.")
|
|
178
170
|
|
|
179
|
-
disposition_instruction =
|
|
171
|
+
disposition_instruction = (
|
|
172
|
+
" ".join(instructions) if instructions else "Balance your disposition traits when interpreting information."
|
|
173
|
+
)
|
|
180
174
|
|
|
181
175
|
return f"You are a person with your own thoughts, experiences, opinions, and disposition. When asked a question, respond naturally like a human would - share what you genuinely think based on what you know and have experienced. {disposition_instruction} Be direct, express your views confidently, and use 'I think', 'I believe', 'in my view', etc. Respond in plain text without markdown formatting."
|
|
182
176
|
|
|
183
177
|
|
|
184
|
-
async def extract_opinions_from_text(
|
|
185
|
-
llm_config,
|
|
186
|
-
text: str,
|
|
187
|
-
query: str
|
|
188
|
-
) -> List[Opinion]:
|
|
178
|
+
async def extract_opinions_from_text(llm_config, text: str, query: str) -> list[Opinion]:
|
|
189
179
|
"""
|
|
190
180
|
Extract opinions with reasons and confidence from text using LLM.
|
|
191
181
|
|
|
@@ -238,11 +228,14 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
238
228
|
try:
|
|
239
229
|
result = await llm_config.call(
|
|
240
230
|
messages=[
|
|
241
|
-
{
|
|
242
|
-
|
|
231
|
+
{
|
|
232
|
+
"role": "system",
|
|
233
|
+
"content": "You are converting opinions from text into first-person statements. Always use 'I think', 'I believe', 'I feel', etc. NEVER use third-person like 'The speaker' or 'They'.",
|
|
234
|
+
},
|
|
235
|
+
{"role": "user", "content": extraction_prompt},
|
|
243
236
|
],
|
|
244
237
|
response_format=OpinionExtractionResponse,
|
|
245
|
-
scope="memory_extract_opinion"
|
|
238
|
+
scope="memory_extract_opinion",
|
|
246
239
|
)
|
|
247
240
|
|
|
248
241
|
# Format opinions with confidence score and convert to first-person
|
|
@@ -253,14 +246,18 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
253
246
|
|
|
254
247
|
# Replace common third-person patterns with first-person
|
|
255
248
|
def singularize_verb(verb):
|
|
256
|
-
if verb.endswith(
|
|
249
|
+
if verb.endswith("es"):
|
|
257
250
|
return verb[:-1] # believes -> believe
|
|
258
|
-
elif verb.endswith(
|
|
251
|
+
elif verb.endswith("s"):
|
|
259
252
|
return verb[:-1] # thinks -> think
|
|
260
253
|
return verb
|
|
261
254
|
|
|
262
255
|
# Pattern: "The speaker/user [verb]..." -> "I [verb]..."
|
|
263
|
-
match = re.match(
|
|
256
|
+
match = re.match(
|
|
257
|
+
r"^(The speaker|The user|They|It is believed) (believes?|thinks?|feels?|says|asserts?|considers?)(\s+that)?(.*)$",
|
|
258
|
+
opinion_text,
|
|
259
|
+
re.IGNORECASE,
|
|
260
|
+
)
|
|
264
261
|
if match:
|
|
265
262
|
verb = singularize_verb(match.group(2))
|
|
266
263
|
that_part = match.group(3) or "" # Keep " that" if present
|
|
@@ -268,17 +265,96 @@ If no genuine opinions are expressed (e.g., the response just says "I don't know
|
|
|
268
265
|
opinion_text = f"I {verb}{that_part}{rest}"
|
|
269
266
|
|
|
270
267
|
# If still doesn't start with first-person, prepend "I believe that "
|
|
271
|
-
first_person_starters = [
|
|
268
|
+
first_person_starters = [
|
|
269
|
+
"I think",
|
|
270
|
+
"I believe",
|
|
271
|
+
"I feel",
|
|
272
|
+
"In my view",
|
|
273
|
+
"I've come to believe",
|
|
274
|
+
"Previously I",
|
|
275
|
+
]
|
|
272
276
|
if not any(opinion_text.startswith(starter) for starter in first_person_starters):
|
|
273
277
|
opinion_text = "I believe that " + opinion_text[0].lower() + opinion_text[1:]
|
|
274
278
|
|
|
275
|
-
formatted_opinions.append(Opinion(
|
|
276
|
-
opinion=opinion_text,
|
|
277
|
-
confidence=op.confidence
|
|
278
|
-
))
|
|
279
|
+
formatted_opinions.append(Opinion(opinion=opinion_text, confidence=op.confidence))
|
|
279
280
|
|
|
280
281
|
return formatted_opinions
|
|
281
282
|
|
|
282
283
|
except Exception as e:
|
|
283
284
|
logger.warning(f"Failed to extract opinions: {str(e)}")
|
|
284
285
|
return []
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
async def reflect(
|
|
289
|
+
llm_config,
|
|
290
|
+
query: str,
|
|
291
|
+
experience_facts: list[str] = None,
|
|
292
|
+
world_facts: list[str] = None,
|
|
293
|
+
opinion_facts: list[str] = None,
|
|
294
|
+
name: str = "Assistant",
|
|
295
|
+
disposition: DispositionTraits = None,
|
|
296
|
+
background: str = "",
|
|
297
|
+
context: str = None,
|
|
298
|
+
) -> str:
|
|
299
|
+
"""
|
|
300
|
+
Standalone reflect function for generating answers based on facts.
|
|
301
|
+
|
|
302
|
+
This is a static version of the reflect operation that can be called
|
|
303
|
+
without a MemoryEngine instance, useful for testing.
|
|
304
|
+
|
|
305
|
+
Args:
|
|
306
|
+
llm_config: LLM provider instance
|
|
307
|
+
query: Question to answer
|
|
308
|
+
experience_facts: List of experience/agent fact strings
|
|
309
|
+
world_facts: List of world fact strings
|
|
310
|
+
opinion_facts: List of opinion fact strings
|
|
311
|
+
name: Name of the agent/persona
|
|
312
|
+
disposition: Disposition traits (defaults to neutral)
|
|
313
|
+
background: Background information
|
|
314
|
+
context: Additional context for the prompt
|
|
315
|
+
|
|
316
|
+
Returns:
|
|
317
|
+
Generated answer text
|
|
318
|
+
"""
|
|
319
|
+
# Default disposition if not provided
|
|
320
|
+
if disposition is None:
|
|
321
|
+
disposition = DispositionTraits(skepticism=3, literalism=3, empathy=3)
|
|
322
|
+
|
|
323
|
+
# Convert string lists to MemoryFact format for formatting
|
|
324
|
+
def to_memory_facts(facts: list[str], fact_type: str) -> list[MemoryFact]:
|
|
325
|
+
if not facts:
|
|
326
|
+
return []
|
|
327
|
+
return [MemoryFact(id=f"test-{i}", text=f, fact_type=fact_type) for i, f in enumerate(facts)]
|
|
328
|
+
|
|
329
|
+
agent_results = to_memory_facts(experience_facts or [], "experience")
|
|
330
|
+
world_results = to_memory_facts(world_facts or [], "world")
|
|
331
|
+
opinion_results = to_memory_facts(opinion_facts or [], "opinion")
|
|
332
|
+
|
|
333
|
+
# Format facts for prompt
|
|
334
|
+
agent_facts_text = format_facts_for_prompt(agent_results)
|
|
335
|
+
world_facts_text = format_facts_for_prompt(world_results)
|
|
336
|
+
opinion_facts_text = format_facts_for_prompt(opinion_results)
|
|
337
|
+
|
|
338
|
+
# Build prompt
|
|
339
|
+
prompt = build_think_prompt(
|
|
340
|
+
agent_facts_text=agent_facts_text,
|
|
341
|
+
world_facts_text=world_facts_text,
|
|
342
|
+
opinion_facts_text=opinion_facts_text,
|
|
343
|
+
query=query,
|
|
344
|
+
name=name,
|
|
345
|
+
disposition=disposition,
|
|
346
|
+
background=background,
|
|
347
|
+
context=context,
|
|
348
|
+
)
|
|
349
|
+
|
|
350
|
+
system_message = get_system_message(disposition)
|
|
351
|
+
|
|
352
|
+
# Call LLM
|
|
353
|
+
answer_text = await llm_config.call(
|
|
354
|
+
messages=[{"role": "system", "content": system_message}, {"role": "user", "content": prompt}],
|
|
355
|
+
scope="memory_think",
|
|
356
|
+
temperature=0.9,
|
|
357
|
+
max_completion_tokens=1000,
|
|
358
|
+
)
|
|
359
|
+
|
|
360
|
+
return answer_text.strip()
|
|
@@ -4,15 +4,18 @@ Search trace models for debugging and visualization.
|
|
|
4
4
|
These Pydantic models define the structure of search traces, capturing
|
|
5
5
|
every step of the spreading activation search process for analysis.
|
|
6
6
|
"""
|
|
7
|
+
|
|
7
8
|
from datetime import datetime
|
|
8
|
-
from typing import
|
|
9
|
+
from typing import Any, Literal
|
|
10
|
+
|
|
9
11
|
from pydantic import BaseModel, Field
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class QueryInfo(BaseModel):
|
|
13
15
|
"""Information about the search query."""
|
|
16
|
+
|
|
14
17
|
query_text: str = Field(description="Original query text")
|
|
15
|
-
query_embedding:
|
|
18
|
+
query_embedding: list[float] = Field(description="Generated query embedding vector")
|
|
16
19
|
timestamp: datetime = Field(description="When the query was executed")
|
|
17
20
|
budget: int = Field(description="Maximum nodes to explore")
|
|
18
21
|
max_tokens: int = Field(description="Maximum tokens to return in results")
|
|
@@ -20,6 +23,7 @@ class QueryInfo(BaseModel):
|
|
|
20
23
|
|
|
21
24
|
class EntryPoint(BaseModel):
|
|
22
25
|
"""An entry point node selected for search."""
|
|
26
|
+
|
|
23
27
|
node_id: str = Field(description="Memory unit ID")
|
|
24
28
|
text: str = Field(description="Memory unit text content")
|
|
25
29
|
similarity_score: float = Field(description="Cosine similarity to query", ge=0.0, le=1.0)
|
|
@@ -28,6 +32,7 @@ class EntryPoint(BaseModel):
|
|
|
28
32
|
|
|
29
33
|
class WeightComponents(BaseModel):
|
|
30
34
|
"""Breakdown of weight calculation components."""
|
|
35
|
+
|
|
31
36
|
activation: float = Field(description="Activation from spreading (can exceed 1.0 through accumulation)", ge=0.0)
|
|
32
37
|
semantic_similarity: float = Field(description="Semantic similarity to query", ge=0.0, le=1.0)
|
|
33
38
|
recency: float = Field(description="Recency weight", ge=0.0, le=1.0)
|
|
@@ -43,99 +48,120 @@ class WeightComponents(BaseModel):
|
|
|
43
48
|
|
|
44
49
|
class LinkInfo(BaseModel):
|
|
45
50
|
"""Information about a link to a neighbor."""
|
|
51
|
+
|
|
46
52
|
to_node_id: str = Field(description="Target node ID")
|
|
47
53
|
link_type: Literal["temporal", "semantic", "entity"] = Field(description="Type of link")
|
|
48
|
-
link_weight: float = Field(
|
|
49
|
-
|
|
50
|
-
|
|
54
|
+
link_weight: float = Field(
|
|
55
|
+
description="Weight of the link (can exceed 1.0 when aggregating multiple connections)", ge=0.0
|
|
56
|
+
)
|
|
57
|
+
entity_id: str | None = Field(default=None, description="Entity ID if link_type is 'entity'")
|
|
58
|
+
new_activation: float | None = Field(
|
|
59
|
+
default=None, description="Activation that would be passed to neighbor (None for supplementary links)"
|
|
60
|
+
)
|
|
51
61
|
followed: bool = Field(description="Whether this link was followed (or pruned)")
|
|
52
|
-
prune_reason:
|
|
53
|
-
is_supplementary: bool = Field(
|
|
62
|
+
prune_reason: str | None = Field(default=None, description="Why link was not followed (if not followed)")
|
|
63
|
+
is_supplementary: bool = Field(
|
|
64
|
+
default=False, description="Whether this is a supplementary link (multiple connections to same node)"
|
|
65
|
+
)
|
|
54
66
|
|
|
55
67
|
|
|
56
68
|
class NodeVisit(BaseModel):
|
|
57
69
|
"""Information about visiting a node during search."""
|
|
70
|
+
|
|
58
71
|
step: int = Field(description="Step number in search (1-based)")
|
|
59
72
|
node_id: str = Field(description="Memory unit ID")
|
|
60
73
|
text: str = Field(description="Memory unit text content")
|
|
61
74
|
context: str = Field(description="Memory unit context")
|
|
62
|
-
event_date:
|
|
75
|
+
event_date: datetime | None = Field(default=None, description="When the memory occurred")
|
|
63
76
|
access_count: int = Field(description="Number of times accessed before this search")
|
|
64
77
|
|
|
65
78
|
# How this node was reached
|
|
66
79
|
is_entry_point: bool = Field(description="Whether this is an entry point")
|
|
67
|
-
parent_node_id:
|
|
68
|
-
link_type:
|
|
69
|
-
|
|
80
|
+
parent_node_id: str | None = Field(default=None, description="Node that led to this one")
|
|
81
|
+
link_type: Literal["temporal", "semantic", "entity"] | None = Field(
|
|
82
|
+
default=None, description="Type of link from parent"
|
|
83
|
+
)
|
|
84
|
+
link_weight: float | None = Field(default=None, description="Weight of link from parent")
|
|
70
85
|
|
|
71
86
|
# Weights
|
|
72
87
|
weights: WeightComponents = Field(description="Weight calculation breakdown")
|
|
73
88
|
|
|
74
89
|
# Neighbors discovered from this node
|
|
75
|
-
neighbors_explored:
|
|
90
|
+
neighbors_explored: list[LinkInfo] = Field(default_factory=list, description="Links explored from this node")
|
|
76
91
|
|
|
77
92
|
# Ranking
|
|
78
|
-
final_rank:
|
|
93
|
+
final_rank: int | None = Field(default=None, description="Final rank in results (1-based, None if not in top-k)")
|
|
79
94
|
|
|
80
95
|
|
|
81
96
|
class PruningDecision(BaseModel):
|
|
82
97
|
"""Records when a node was considered but not visited."""
|
|
98
|
+
|
|
83
99
|
node_id: str = Field(description="Node that was pruned")
|
|
84
|
-
reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(
|
|
100
|
+
reason: Literal["already_visited", "activation_too_low", "budget_exhausted"] = Field(
|
|
101
|
+
description="Why it was pruned"
|
|
102
|
+
)
|
|
85
103
|
activation: float = Field(description="Activation value when pruned")
|
|
86
104
|
would_have_been_step: int = Field(description="What step it would have been if visited")
|
|
87
105
|
|
|
88
106
|
|
|
89
107
|
class SearchPhaseMetrics(BaseModel):
|
|
90
108
|
"""Performance metrics for a search phase."""
|
|
109
|
+
|
|
91
110
|
phase_name: str = Field(description="Name of the phase")
|
|
92
111
|
duration_seconds: float = Field(description="Time taken in seconds")
|
|
93
|
-
details:
|
|
112
|
+
details: dict[str, Any] = Field(default_factory=dict, description="Additional phase-specific metrics")
|
|
94
113
|
|
|
95
114
|
|
|
96
115
|
class RetrievalResult(BaseModel):
|
|
97
116
|
"""A single result from a retrieval method."""
|
|
117
|
+
|
|
98
118
|
rank: int = Field(description="Rank in this retrieval method (1-based)")
|
|
99
119
|
node_id: str = Field(description="Memory unit ID")
|
|
100
120
|
text: str = Field(description="Memory unit text content")
|
|
101
121
|
context: str = Field(default="", description="Memory unit context")
|
|
102
|
-
event_date:
|
|
103
|
-
fact_type:
|
|
122
|
+
event_date: datetime | None = Field(default=None, description="When the memory occurred")
|
|
123
|
+
fact_type: str | None = Field(default=None, description="Fact type (world, experience, opinion)")
|
|
104
124
|
score: float = Field(description="Score from this retrieval method")
|
|
105
125
|
score_name: str = Field(description="Name of the score (e.g., 'similarity', 'bm25_score', 'activation')")
|
|
106
126
|
|
|
107
127
|
|
|
108
128
|
class RetrievalMethodResults(BaseModel):
|
|
109
129
|
"""Results from a single retrieval method."""
|
|
130
|
+
|
|
110
131
|
method_name: Literal["semantic", "bm25", "graph", "temporal"] = Field(description="Name of retrieval method")
|
|
111
|
-
fact_type:
|
|
112
|
-
|
|
132
|
+
fact_type: str | None = Field(
|
|
133
|
+
default=None, description="Fact type this retrieval was for (world, experience, opinion)"
|
|
134
|
+
)
|
|
135
|
+
results: list[RetrievalResult] = Field(description="Retrieved results with ranks")
|
|
113
136
|
duration_seconds: float = Field(description="Time taken for this retrieval")
|
|
114
|
-
metadata:
|
|
137
|
+
metadata: dict[str, Any] = Field(default_factory=dict, description="Method-specific metadata")
|
|
115
138
|
|
|
116
139
|
|
|
117
140
|
class RRFMergeResult(BaseModel):
|
|
118
141
|
"""A result after RRF merging."""
|
|
142
|
+
|
|
119
143
|
node_id: str = Field(description="Memory unit ID")
|
|
120
144
|
text: str = Field(description="Memory unit text content")
|
|
121
145
|
rrf_score: float = Field(description="Reciprocal Rank Fusion score")
|
|
122
|
-
source_ranks:
|
|
146
|
+
source_ranks: dict[str, int] = Field(description="Rank in each source that contributed (method_name -> rank)")
|
|
123
147
|
final_rrf_rank: int = Field(description="Rank after RRF merge (1-based)")
|
|
124
148
|
|
|
125
149
|
|
|
126
150
|
class RerankedResult(BaseModel):
|
|
127
151
|
"""A result after reranking."""
|
|
152
|
+
|
|
128
153
|
node_id: str = Field(description="Memory unit ID")
|
|
129
154
|
text: str = Field(description="Memory unit text content")
|
|
130
155
|
rerank_score: float = Field(description="Final reranking score")
|
|
131
156
|
rerank_rank: int = Field(description="Rank after reranking (1-based)")
|
|
132
157
|
rrf_rank: int = Field(description="Original RRF rank before reranking")
|
|
133
158
|
rank_change: int = Field(description="Change in rank (positive = moved up)")
|
|
134
|
-
score_components:
|
|
159
|
+
score_components: dict[str, float] = Field(default_factory=dict, description="Score breakdown")
|
|
135
160
|
|
|
136
161
|
|
|
137
162
|
class SearchSummary(BaseModel):
|
|
138
163
|
"""Summary statistics about the search."""
|
|
164
|
+
|
|
139
165
|
total_nodes_visited: int = Field(description="Total nodes visited")
|
|
140
166
|
total_nodes_pruned: int = Field(description="Total nodes pruned")
|
|
141
167
|
entry_points_found: int = Field(description="Number of entry points")
|
|
@@ -150,33 +176,36 @@ class SearchSummary(BaseModel):
|
|
|
150
176
|
entity_links_followed: int = Field(default=0, description="Entity links followed")
|
|
151
177
|
|
|
152
178
|
# Phase timings
|
|
153
|
-
phase_metrics:
|
|
179
|
+
phase_metrics: list[SearchPhaseMetrics] = Field(default_factory=list, description="Metrics for each phase")
|
|
154
180
|
|
|
155
181
|
|
|
156
182
|
class SearchTrace(BaseModel):
|
|
157
183
|
"""Complete trace of a search operation."""
|
|
184
|
+
|
|
158
185
|
query: QueryInfo = Field(description="Query information")
|
|
159
186
|
|
|
160
187
|
# New 4-way retrieval architecture
|
|
161
|
-
retrieval_results:
|
|
162
|
-
|
|
163
|
-
|
|
188
|
+
retrieval_results: list[RetrievalMethodResults] = Field(
|
|
189
|
+
default_factory=list, description="Results from each retrieval method"
|
|
190
|
+
)
|
|
191
|
+
rrf_merged: list[RRFMergeResult] = Field(default_factory=list, description="Results after RRF merging")
|
|
192
|
+
reranked: list[RerankedResult] = Field(default_factory=list, description="Results after reranking")
|
|
164
193
|
|
|
165
194
|
# Legacy fields (kept for backward compatibility with graph/temporal visualizations)
|
|
166
|
-
entry_points:
|
|
167
|
-
|
|
168
|
-
|
|
195
|
+
entry_points: list[EntryPoint] = Field(
|
|
196
|
+
default_factory=list, description="Entry points selected for search (legacy)"
|
|
197
|
+
)
|
|
198
|
+
visits: list[NodeVisit] = Field(
|
|
199
|
+
default_factory=list, description="All nodes visited during search (legacy, for graph viz)"
|
|
200
|
+
)
|
|
201
|
+
pruned: list[PruningDecision] = Field(default_factory=list, description="Nodes that were pruned (legacy)")
|
|
169
202
|
|
|
170
203
|
summary: SearchSummary = Field(description="Summary statistics")
|
|
171
204
|
|
|
172
205
|
# Final results (for comparison with visits)
|
|
173
|
-
final_results:
|
|
206
|
+
final_results: list[dict[str, Any]] = Field(description="Final ranked results returned to user")
|
|
174
207
|
|
|
175
|
-
model_config = {
|
|
176
|
-
"json_encoders": {
|
|
177
|
-
datetime: lambda v: v.isoformat()
|
|
178
|
-
}
|
|
179
|
-
}
|
|
208
|
+
model_config = {"json_encoders": {datetime: lambda v: v.isoformat()}}
|
|
180
209
|
|
|
181
210
|
def to_json(self, **kwargs) -> str:
|
|
182
211
|
"""Export trace as JSON string."""
|
|
@@ -186,14 +215,14 @@ class SearchTrace(BaseModel):
|
|
|
186
215
|
"""Export trace as dictionary."""
|
|
187
216
|
return self.model_dump()
|
|
188
217
|
|
|
189
|
-
def get_visit_by_node_id(self, node_id: str) ->
|
|
218
|
+
def get_visit_by_node_id(self, node_id: str) -> NodeVisit | None:
|
|
190
219
|
"""Find a visit by node ID."""
|
|
191
220
|
for visit in self.visits:
|
|
192
221
|
if visit.node_id == node_id:
|
|
193
222
|
return visit
|
|
194
223
|
return None
|
|
195
224
|
|
|
196
|
-
def get_search_path_to_node(self, node_id: str) ->
|
|
225
|
+
def get_search_path_to_node(self, node_id: str) -> list[NodeVisit]:
|
|
197
226
|
"""Get the path from entry point to a specific node."""
|
|
198
227
|
path = []
|
|
199
228
|
current_visit = self.get_visit_by_node_id(node_id)
|
|
@@ -207,10 +236,10 @@ class SearchTrace(BaseModel):
|
|
|
207
236
|
|
|
208
237
|
return path
|
|
209
238
|
|
|
210
|
-
def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) ->
|
|
239
|
+
def get_nodes_by_link_type(self, link_type: Literal["temporal", "semantic", "entity"]) -> list[NodeVisit]:
|
|
211
240
|
"""Get all nodes reached via a specific link type."""
|
|
212
241
|
return [v for v in self.visits if v.link_type == link_type]
|
|
213
242
|
|
|
214
|
-
def get_entry_point_nodes(self) ->
|
|
243
|
+
def get_entry_point_nodes(self) -> list[NodeVisit]:
|
|
215
244
|
"""Get all entry point visits."""
|
|
216
245
|
return [v for v in self.visits if v.is_entry_point]
|