hindsight-api 0.2.1__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- hindsight_api/admin/__init__.py +1 -0
- hindsight_api/admin/cli.py +311 -0
- hindsight_api/alembic/versions/f1a2b3c4d5e6_add_memory_links_composite_index.py +44 -0
- hindsight_api/alembic/versions/g2a3b4c5d6e7_add_tags_column.py +48 -0
- hindsight_api/alembic/versions/h3c4d5e6f7g8_mental_models_v4.py +112 -0
- hindsight_api/alembic/versions/i4d5e6f7g8h9_delete_opinions.py +41 -0
- hindsight_api/alembic/versions/j5e6f7g8h9i0_mental_model_versions.py +95 -0
- hindsight_api/alembic/versions/k6f7g8h9i0j1_add_directive_subtype.py +58 -0
- hindsight_api/alembic/versions/l7g8h9i0j1k2_add_worker_columns.py +109 -0
- hindsight_api/alembic/versions/m8h9i0j1k2l3_mental_model_id_to_text.py +41 -0
- hindsight_api/alembic/versions/n9i0j1k2l3m4_learnings_and_pinned_reflections.py +134 -0
- hindsight_api/alembic/versions/o0j1k2l3m4n5_migrate_mental_models_data.py +113 -0
- hindsight_api/alembic/versions/p1k2l3m4n5o6_new_knowledge_architecture.py +194 -0
- hindsight_api/alembic/versions/q2l3m4n5o6p7_fix_mental_model_fact_type.py +50 -0
- hindsight_api/alembic/versions/r3m4n5o6p7q8_add_reflect_response_to_reflections.py +47 -0
- hindsight_api/alembic/versions/s4n5o6p7q8r9_add_consolidated_at_to_memory_units.py +53 -0
- hindsight_api/alembic/versions/t5o6p7q8r9s0_rename_mental_models_to_observations.py +134 -0
- hindsight_api/alembic/versions/u6p7q8r9s0t1_mental_models_text_id.py +41 -0
- hindsight_api/alembic/versions/v7q8r9s0t1u2_add_max_tokens_to_mental_models.py +50 -0
- hindsight_api/api/http.py +1406 -118
- hindsight_api/api/mcp.py +11 -196
- hindsight_api/config.py +359 -27
- hindsight_api/engine/consolidation/__init__.py +5 -0
- hindsight_api/engine/consolidation/consolidator.py +859 -0
- hindsight_api/engine/consolidation/prompts.py +69 -0
- hindsight_api/engine/cross_encoder.py +706 -88
- hindsight_api/engine/db_budget.py +284 -0
- hindsight_api/engine/db_utils.py +11 -0
- hindsight_api/engine/directives/__init__.py +5 -0
- hindsight_api/engine/directives/models.py +37 -0
- hindsight_api/engine/embeddings.py +553 -29
- hindsight_api/engine/entity_resolver.py +8 -5
- hindsight_api/engine/interface.py +40 -17
- hindsight_api/engine/llm_wrapper.py +744 -68
- hindsight_api/engine/memory_engine.py +2505 -1017
- hindsight_api/engine/mental_models/__init__.py +14 -0
- hindsight_api/engine/mental_models/models.py +53 -0
- hindsight_api/engine/query_analyzer.py +4 -3
- hindsight_api/engine/reflect/__init__.py +18 -0
- hindsight_api/engine/reflect/agent.py +933 -0
- hindsight_api/engine/reflect/models.py +109 -0
- hindsight_api/engine/reflect/observations.py +186 -0
- hindsight_api/engine/reflect/prompts.py +483 -0
- hindsight_api/engine/reflect/tools.py +437 -0
- hindsight_api/engine/reflect/tools_schema.py +250 -0
- hindsight_api/engine/response_models.py +168 -4
- hindsight_api/engine/retain/bank_utils.py +79 -201
- hindsight_api/engine/retain/fact_extraction.py +424 -195
- hindsight_api/engine/retain/fact_storage.py +35 -12
- hindsight_api/engine/retain/link_utils.py +29 -24
- hindsight_api/engine/retain/orchestrator.py +24 -43
- hindsight_api/engine/retain/types.py +11 -2
- hindsight_api/engine/search/graph_retrieval.py +43 -14
- hindsight_api/engine/search/link_expansion_retrieval.py +391 -0
- hindsight_api/engine/search/mpfp_retrieval.py +362 -117
- hindsight_api/engine/search/reranking.py +2 -2
- hindsight_api/engine/search/retrieval.py +848 -201
- hindsight_api/engine/search/tags.py +172 -0
- hindsight_api/engine/search/think_utils.py +42 -141
- hindsight_api/engine/search/trace.py +12 -1
- hindsight_api/engine/search/tracer.py +26 -6
- hindsight_api/engine/search/types.py +21 -3
- hindsight_api/engine/task_backend.py +113 -106
- hindsight_api/engine/utils.py +1 -152
- hindsight_api/extensions/__init__.py +10 -1
- hindsight_api/extensions/builtin/tenant.py +5 -1
- hindsight_api/extensions/context.py +10 -1
- hindsight_api/extensions/operation_validator.py +81 -4
- hindsight_api/extensions/tenant.py +26 -0
- hindsight_api/main.py +69 -6
- hindsight_api/mcp_local.py +12 -53
- hindsight_api/mcp_tools.py +494 -0
- hindsight_api/metrics.py +433 -48
- hindsight_api/migrations.py +141 -1
- hindsight_api/models.py +3 -3
- hindsight_api/pg0.py +53 -0
- hindsight_api/server.py +39 -2
- hindsight_api/worker/__init__.py +11 -0
- hindsight_api/worker/main.py +296 -0
- hindsight_api/worker/poller.py +486 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/METADATA +16 -6
- hindsight_api-0.4.0.dist-info/RECORD +112 -0
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/entry_points.txt +2 -0
- hindsight_api/engine/retain/observation_regeneration.py +0 -254
- hindsight_api/engine/search/observation_utils.py +0 -125
- hindsight_api/engine/search/scoring.py +0 -159
- hindsight_api-0.2.1.dist-info/RECORD +0 -75
- {hindsight_api-0.2.1.dist-info → hindsight_api-0.4.0.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic models for the reflect agent.
|
|
3
|
+
"""
|
|
4
|
+
|
|
5
|
+
from typing import Any, Literal
|
|
6
|
+
|
|
7
|
+
from pydantic import BaseModel, Field
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class ObservationSection(BaseModel):
|
|
11
|
+
"""A section within an observation with its supporting memories."""
|
|
12
|
+
|
|
13
|
+
title: str = Field(description="Section header (can be empty for intro)")
|
|
14
|
+
text: str = Field(description="Section content - no headers, use lists/tables/bold")
|
|
15
|
+
memory_ids: list[str] = Field(default_factory=list, description="Memory IDs supporting this section")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ReflectAction(BaseModel):
|
|
19
|
+
"""Single action the reflect agent can take."""
|
|
20
|
+
|
|
21
|
+
tool: Literal["list_observations", "get_observation", "recall", "expand", "done"] = Field(
|
|
22
|
+
description="Tool to invoke: list_observations, get_observation, recall, expand, or done"
|
|
23
|
+
)
|
|
24
|
+
# Tool-specific parameters
|
|
25
|
+
observation_id: str | None = Field(default=None, description="Observation ID for get_observation")
|
|
26
|
+
query: str | None = Field(default=None, description="Search query for recall")
|
|
27
|
+
max_tokens: int | None = Field(default=None, description="Max tokens for recall results (default 2048)")
|
|
28
|
+
memory_ids: list[str] | None = Field(default=None, description="Memory unit IDs for expand (batched)")
|
|
29
|
+
depth: Literal["chunk", "document"] | None = Field(default=None, description="Expansion depth for expand")
|
|
30
|
+
observation_sections: list[ObservationSection] | None = Field(
|
|
31
|
+
default=None, description="Observation sections for done action (when output_mode=observations)"
|
|
32
|
+
)
|
|
33
|
+
# Plain text answer fields (for output_mode=answer)
|
|
34
|
+
answer: str | None = Field(default=None, description="Plain text answer for done action (no markdown)")
|
|
35
|
+
answer_memory_ids: list[str] | None = Field(
|
|
36
|
+
default=None, description="Memory IDs supporting the answer", alias="memory_ids"
|
|
37
|
+
)
|
|
38
|
+
answer_model_ids: list[str] | None = Field(
|
|
39
|
+
default=None, description="Mental model IDs supporting the answer", alias="model_ids"
|
|
40
|
+
)
|
|
41
|
+
reasoning: str | None = Field(default=None, description="Brief reasoning for this action")
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class ReflectActionBatch(BaseModel):
|
|
45
|
+
"""Batch of actions for parallel execution."""
|
|
46
|
+
|
|
47
|
+
actions: list[ReflectAction] = Field(description="List of actions to execute in parallel")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class ToolCall(BaseModel):
|
|
51
|
+
"""A single tool call made during reflect."""
|
|
52
|
+
|
|
53
|
+
tool: str = Field(description="Tool name: lookup, recall, expand")
|
|
54
|
+
reason: str | None = Field(default=None, description="Agent's reasoning for making this tool call")
|
|
55
|
+
input: dict = Field(description="Tool input parameters")
|
|
56
|
+
output: dict = Field(description="Tool output/result")
|
|
57
|
+
duration_ms: int = Field(description="Execution time in milliseconds")
|
|
58
|
+
iteration: int = Field(default=0, description="Iteration number (1-based) when this tool was called")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class LLMCall(BaseModel):
|
|
62
|
+
"""A single LLM call made during reflect."""
|
|
63
|
+
|
|
64
|
+
scope: str = Field(description="Call scope: agent_1, agent_2, final, etc.")
|
|
65
|
+
duration_ms: int = Field(description="Execution time in milliseconds")
|
|
66
|
+
input_tokens: int = Field(default=0, description="Input tokens used")
|
|
67
|
+
output_tokens: int = Field(default=0, description="Output tokens used")
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
class DirectiveInfo(BaseModel):
|
|
71
|
+
"""Information about a directive that was applied during reflect."""
|
|
72
|
+
|
|
73
|
+
id: str = Field(description="Directive mental model ID")
|
|
74
|
+
name: str = Field(description="Directive name")
|
|
75
|
+
content: str = Field(description="Directive content")
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
class TokenUsageSummary(BaseModel):
|
|
79
|
+
"""Total token usage across all LLM calls."""
|
|
80
|
+
|
|
81
|
+
input_tokens: int = Field(default=0, description="Total input tokens used")
|
|
82
|
+
output_tokens: int = Field(default=0, description="Total output tokens used")
|
|
83
|
+
total_tokens: int = Field(default=0, description="Total tokens (input + output)")
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
class ReflectAgentResult(BaseModel):
|
|
87
|
+
"""Result from the reflect agent."""
|
|
88
|
+
|
|
89
|
+
text: str = Field(description="Final answer text")
|
|
90
|
+
structured_output: dict[str, Any] | None = Field(
|
|
91
|
+
default=None, description="Structured output parsed according to provided response_schema"
|
|
92
|
+
)
|
|
93
|
+
iterations: int = Field(default=0, description="Number of iterations taken")
|
|
94
|
+
tools_called: int = Field(default=0, description="Total number of tool calls made")
|
|
95
|
+
tool_trace: list[ToolCall] = Field(default_factory=list, description="Trace of all tool calls made")
|
|
96
|
+
llm_trace: list[LLMCall] = Field(default_factory=list, description="Trace of all LLM calls made")
|
|
97
|
+
usage: TokenUsageSummary = Field(
|
|
98
|
+
default_factory=TokenUsageSummary, description="Total token usage across all LLM calls"
|
|
99
|
+
)
|
|
100
|
+
used_memory_ids: list[str] = Field(default_factory=list, description="Validated memory IDs actually used in answer")
|
|
101
|
+
used_mental_model_ids: list[str] = Field(
|
|
102
|
+
default_factory=list, description="Validated mental model IDs actually used in answer"
|
|
103
|
+
)
|
|
104
|
+
used_observation_ids: list[str] = Field(
|
|
105
|
+
default_factory=list, description="Validated observation IDs actually used in answer"
|
|
106
|
+
)
|
|
107
|
+
directives_applied: list[DirectiveInfo] = Field(
|
|
108
|
+
default_factory=list, description="Directive mental models that affected this reflection"
|
|
109
|
+
)
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Models and utilities for evidence-grounded observations with computed trends.
|
|
3
|
+
|
|
4
|
+
Observations are part of mental models and represent patterns/beliefs derived
|
|
5
|
+
from memories. Each observation must be grounded in specific evidence (quotes)
|
|
6
|
+
from memories, and trends are computed algorithmically from evidence timestamps.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from datetime import datetime, timedelta, timezone
|
|
10
|
+
from enum import Enum
|
|
11
|
+
|
|
12
|
+
from pydantic import BaseModel, Field, computed_field, field_validator
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class Trend(str, Enum):
|
|
16
|
+
"""Computed trend for an observation based on evidence timestamps.
|
|
17
|
+
|
|
18
|
+
Trends indicate how an observation's evidence is distributed over time:
|
|
19
|
+
- STABLE: Evidence spread across time, continues to present
|
|
20
|
+
- STRENGTHENING: More/denser evidence recently than before
|
|
21
|
+
- WEAKENING: Evidence mostly old, sparse recently
|
|
22
|
+
- NEW: All evidence within recent window
|
|
23
|
+
- STALE: No evidence in recent window (may no longer apply)
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
STABLE = "stable"
|
|
27
|
+
STRENGTHENING = "strengthening"
|
|
28
|
+
WEAKENING = "weakening"
|
|
29
|
+
NEW = "new"
|
|
30
|
+
STALE = "stale"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class ObservationEvidence(BaseModel):
|
|
34
|
+
"""A single piece of evidence supporting an observation.
|
|
35
|
+
|
|
36
|
+
Each evidence item must include an exact quote from the source memory
|
|
37
|
+
to ensure observations are grounded and verifiable.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
memory_id: str = Field(description="ID of the memory unit this evidence comes from")
|
|
41
|
+
quote: str = Field(description="Exact quote from the memory supporting the observation")
|
|
42
|
+
relevance: str = Field(default="", description="Brief explanation of how this quote supports the observation")
|
|
43
|
+
timestamp: datetime = Field(description="When the source memory was created")
|
|
44
|
+
|
|
45
|
+
@field_validator("timestamp", mode="before")
|
|
46
|
+
@classmethod
|
|
47
|
+
def ensure_timezone_aware(cls, v: datetime | str | None) -> datetime:
|
|
48
|
+
"""Ensure timestamp is always timezone-aware UTC."""
|
|
49
|
+
if v is None:
|
|
50
|
+
return datetime.now(timezone.utc)
|
|
51
|
+
if isinstance(v, str):
|
|
52
|
+
# Parse ISO format string, handling 'Z' suffix
|
|
53
|
+
v = datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
54
|
+
if isinstance(v, datetime):
|
|
55
|
+
if v.tzinfo is None:
|
|
56
|
+
return v.replace(tzinfo=timezone.utc)
|
|
57
|
+
return v
|
|
58
|
+
raise ValueError(f"Invalid timestamp type: {type(v)}")
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class Observation(BaseModel):
|
|
62
|
+
"""A single observation within a mental model.
|
|
63
|
+
|
|
64
|
+
Observations represent patterns, preferences, beliefs, or other insights
|
|
65
|
+
derived from memories. Each observation must be grounded in evidence
|
|
66
|
+
with exact quotes from source memories.
|
|
67
|
+
"""
|
|
68
|
+
|
|
69
|
+
title: str = Field(description="Short summary title for the observation (5-10 words)")
|
|
70
|
+
content: str = Field(description="The observation content - detailed explanation of what we believe to be true")
|
|
71
|
+
evidence: list[ObservationEvidence] = Field(default_factory=list, description="Supporting evidence with quotes")
|
|
72
|
+
created_at: datetime = Field(
|
|
73
|
+
default_factory=lambda: datetime.now(timezone.utc), description="When this observation was first created"
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
@field_validator("created_at", mode="before")
|
|
77
|
+
@classmethod
|
|
78
|
+
def ensure_created_at_timezone_aware(cls, v: datetime | str | None) -> datetime:
|
|
79
|
+
"""Ensure created_at is always timezone-aware UTC."""
|
|
80
|
+
if v is None:
|
|
81
|
+
return datetime.now(timezone.utc)
|
|
82
|
+
if isinstance(v, str):
|
|
83
|
+
v = datetime.fromisoformat(v.replace("Z", "+00:00"))
|
|
84
|
+
if isinstance(v, datetime):
|
|
85
|
+
if v.tzinfo is None:
|
|
86
|
+
return v.replace(tzinfo=timezone.utc)
|
|
87
|
+
return v
|
|
88
|
+
raise ValueError(f"Invalid created_at type: {type(v)}")
|
|
89
|
+
|
|
90
|
+
@computed_field
|
|
91
|
+
@property
|
|
92
|
+
def trend(self) -> Trend:
|
|
93
|
+
"""Compute trend from evidence timestamps."""
|
|
94
|
+
return compute_trend(self.evidence)
|
|
95
|
+
|
|
96
|
+
@computed_field
|
|
97
|
+
@property
|
|
98
|
+
def evidence_span(self) -> dict[str, str | None]:
|
|
99
|
+
"""Get the time span covered by evidence."""
|
|
100
|
+
if not self.evidence:
|
|
101
|
+
return {"from": None, "to": None}
|
|
102
|
+
timestamps = [e.timestamp for e in self.evidence]
|
|
103
|
+
return {
|
|
104
|
+
"from": min(timestamps).isoformat(),
|
|
105
|
+
"to": max(timestamps).isoformat(),
|
|
106
|
+
}
|
|
107
|
+
|
|
108
|
+
@computed_field
|
|
109
|
+
@property
|
|
110
|
+
def evidence_count(self) -> int:
|
|
111
|
+
"""Number of evidence items supporting this observation."""
|
|
112
|
+
return len(self.evidence)
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def compute_trend(
|
|
116
|
+
evidence: list[ObservationEvidence],
|
|
117
|
+
now: datetime | None = None,
|
|
118
|
+
recent_days: int = 30,
|
|
119
|
+
old_days: int = 90,
|
|
120
|
+
) -> Trend:
|
|
121
|
+
"""Compute the trend for an observation based on evidence timestamps.
|
|
122
|
+
|
|
123
|
+
The trend indicates how the evidence is distributed over time:
|
|
124
|
+
- STABLE: Evidence spread across time, continues to present
|
|
125
|
+
- STRENGTHENING: More evidence recently than historically
|
|
126
|
+
- WEAKENING: Evidence mostly old, sparse recently
|
|
127
|
+
- NEW: All evidence is recent (within recent_days)
|
|
128
|
+
- STALE: No evidence in recent window
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
evidence: List of evidence items with timestamps
|
|
132
|
+
now: Reference time for calculations (defaults to current UTC time)
|
|
133
|
+
recent_days: Number of days to consider "recent" (default 30)
|
|
134
|
+
old_days: Number of days to consider "old" (default 90)
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Computed Trend enum value
|
|
138
|
+
"""
|
|
139
|
+
if now is None:
|
|
140
|
+
now = datetime.now(timezone.utc)
|
|
141
|
+
|
|
142
|
+
# Ensure now is timezone-aware
|
|
143
|
+
if now.tzinfo is None:
|
|
144
|
+
now = now.replace(tzinfo=timezone.utc)
|
|
145
|
+
|
|
146
|
+
if not evidence:
|
|
147
|
+
return Trend.STALE
|
|
148
|
+
|
|
149
|
+
recent_cutoff = now - timedelta(days=recent_days)
|
|
150
|
+
old_cutoff = now - timedelta(days=old_days)
|
|
151
|
+
|
|
152
|
+
# Normalize timestamps to UTC for comparison
|
|
153
|
+
def normalize_ts(ts: datetime) -> datetime:
|
|
154
|
+
if ts.tzinfo is None:
|
|
155
|
+
return ts.replace(tzinfo=timezone.utc)
|
|
156
|
+
return ts
|
|
157
|
+
|
|
158
|
+
recent = [e for e in evidence if normalize_ts(e.timestamp) > recent_cutoff]
|
|
159
|
+
old = [e for e in evidence if normalize_ts(e.timestamp) < old_cutoff]
|
|
160
|
+
middle = [e for e in evidence if old_cutoff <= normalize_ts(e.timestamp) <= recent_cutoff]
|
|
161
|
+
|
|
162
|
+
# No recent evidence = stale
|
|
163
|
+
if not recent:
|
|
164
|
+
return Trend.STALE
|
|
165
|
+
|
|
166
|
+
# All evidence is recent = new
|
|
167
|
+
if not old and not middle:
|
|
168
|
+
return Trend.NEW
|
|
169
|
+
|
|
170
|
+
# Compare density (evidence per day)
|
|
171
|
+
recent_density = len(recent) / recent_days if recent_days > 0 else 0
|
|
172
|
+
older_period = old_days - recent_days
|
|
173
|
+
older_density = (len(old) + len(middle)) / older_period if older_period > 0 else 0
|
|
174
|
+
|
|
175
|
+
# Avoid division by zero
|
|
176
|
+
if older_density == 0:
|
|
177
|
+
return Trend.NEW
|
|
178
|
+
|
|
179
|
+
ratio = recent_density / older_density
|
|
180
|
+
|
|
181
|
+
if ratio > 1.5:
|
|
182
|
+
return Trend.STRENGTHENING
|
|
183
|
+
elif ratio < 0.5:
|
|
184
|
+
return Trend.WEAKENING
|
|
185
|
+
else:
|
|
186
|
+
return Trend.STABLE
|