remdb 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +566 -0
  44. rem/cli/commands/configure.py +497 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1302 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +96 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +801 -0
  104. rem/services/content/service.py +676 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +336 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.3.7.dist-info/METADATA +1473 -0
  185. remdb-0.3.7.dist-info/RECORD +187 -0
  186. remdb-0.3.7.dist-info/WHEEL +4 -0
  187. remdb-0.3.7.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,155 @@
1
+ # REM Agents
2
+
3
+ Built-in agents for REM system operations.
4
+
5
+ ## Overview
6
+
7
+ This folder contains specialized agents that provide high-level interfaces for REM operations. These agents use LLMs to interpret natural language and convert it to structured REM queries.
8
+
9
+ ## REM Query Agent
10
+
11
+ **File**: `rem_query_agent.py`
12
+
13
+ Converts natural language questions into structured REM queries with PostgreSQL dialect awareness.
14
+
15
+ ### Features
16
+
17
+ - **Query Type Selection**: Automatically chooses optimal query type (LOOKUP, FUZZY, SEARCH, SQL, TRAVERSE)
18
+ - **PostgreSQL Dialect Aware**: Knows when to use KV_STORE vs primary tables
19
+ - **Token Optimized**: Minimal output fields for fast generation and low cost
20
+ - **Confidence Scoring**: Returns confidence (0-1) with reasoning for low scores
21
+ - **Multi-Step Planning**: Can break complex queries into multiple REM calls
22
+
23
+ ### Usage
24
+
25
+ #### Simple Query
26
+
27
+ ```python
28
+ from rem.agentic.agents import ask_rem
29
+
30
+ # Convert natural language to REM query
31
+ result = await ask_rem("Show me Sarah Chen")
32
+
33
+ print(result.query_type) # QueryType.LOOKUP
34
+ print(result.parameters) # {"entity_key": "sarah-chen"}
35
+ print(result.confidence) # 1.0
36
+ ```
37
+
38
+ #### With Custom Model
39
+
40
+ ```python
41
+ # Use fast, cheap model for query generation
42
+ result = await ask_rem(
43
+ "Find documents about databases",
44
+ llm_model="gpt-4o-mini"
45
+ )
46
+
47
+ print(result.query_type) # QueryType.SEARCH
48
+ print(result.parameters)
49
+ # {
50
+ # "query_text": "database",
51
+ # "table_name": "resources",
52
+ # "field_name": "content",
53
+ # "limit": 10
54
+ # }
55
+ ```
56
+
57
+ #### Integration with RemService
58
+
59
+ ```python
60
+ from rem.services.rem import RemService
61
+
62
+ # RemService automatically uses REM Query Agent
63
+ result = await rem_service.ask_rem(
64
+ natural_query="What does Sarah manage?",
65
+ tenant_id="acme-corp"
66
+ )
67
+
68
+ # Returns:
69
+ # {
70
+ # "query_output": {
71
+ # "query_type": "TRAVERSE",
72
+ # "parameters": {"start_key": "sarah-chen", "max_depth": 1, "rel_type": "manages"},
73
+ # "confidence": 0.85,
74
+ # "reasoning": "TRAVERSE query to find entities Sarah manages via graph edges"
75
+ # },
76
+ # "results": [...], # Executed query results (if confidence >= 0.7)
77
+ # "natural_query": "What does Sarah manage?"
78
+ # }
79
+ ```
80
+
81
+ ### Query Types
82
+
83
+ | Type | Description | When to Use | Example |
84
+ |------|-------------|-------------|---------|
85
+ | `LOOKUP` | O(1) entity lookup by natural key | User references specific entity by name | "Show me Sarah Chen" |
86
+ | `FUZZY` | Trigram text similarity (pg_trgm) | Partial/misspelled names, approximate matches | "Find people named Sara" |
87
+ | `SEARCH` | Semantic vector similarity | Conceptual questions, semantic similarity | "Documents about databases" |
88
+ | `SQL` | Direct table queries with WHERE | Temporal, filtered, or aggregate queries | "Meetings in Q4 2024" |
89
+ | `TRAVERSE` | Recursive graph traversal | Relationships, connections, "what's related" | "What does Sarah manage?" |
90
+
91
+ ### Configuration
92
+
93
+ Set the model for REM Query Agent in your environment:
94
+
95
+ ```bash
96
+ # .env
97
+ LLM__QUERY_AGENT_MODEL=gpt-4o-mini # Fast, cheap model recommended
98
+ ```
99
+
100
+ If not set, uses `settings.llm.default_model`.
101
+
102
+ ### Output Schema
103
+
104
+ ```python
105
+ class REMQueryOutput(BaseModel):
106
+ query_type: QueryType # Selected query type
107
+ parameters: dict # Query parameters
108
+ confidence: float # 0.0-1.0 confidence score
109
+ reasoning: str | None # Only if confidence < 0.7 or multi-step
110
+ multi_step: list[dict] | None # For complex queries
111
+ ```
112
+
113
+ ### Design Philosophy
114
+
115
+ 1. **Token Efficiency**: Output is concise by design
116
+ - Reasoning only included when needed (low confidence or multi-step)
117
+ - Minimal fields to reduce generation time and cost
118
+
119
+ 2. **PostgreSQL Awareness**: Agent knows the database schema
120
+ - LOOKUP/FUZZY use UNLOGGED KV_STORE (fast cache)
121
+ - SEARCH joins KV_STORE + embeddings_<table>
122
+ - SQL queries primary tables directly
123
+ - TRAVERSE follows graph_edges JSONB field
124
+
125
+ 3. **Progressive Complexity**: Prefer simple queries over complex
126
+ - LOOKUP is fastest (O(1))
127
+ - FUZZY uses indexed trigrams
128
+ - SEARCH requires embedding generation
129
+ - SQL scans tables (filtered)
130
+ - TRAVERSE is recursive (most complex)
131
+
132
+ 4. **Confidence-Based Execution**: RemService auto-executes if confidence >= 0.7
133
+ - High confidence: Execute immediately
134
+ - Low confidence: Return query + reasoning for review
135
+
136
+ ### Testing
137
+
138
+ See `tests/unit/agentic/agents/test_rem_query_agent.py` for unit tests.
139
+
140
+ Tests cover:
141
+ - Schema structure validation
142
+ - Output model creation
143
+ - Confidence validation
144
+ - Multi-step query support
145
+
146
+ Integration tests with actual LLM execution require API keys and are in `tests/integration/`.
147
+
148
+ ## Future Agents
149
+
150
+ Additional agents can be added following the same pattern:
151
+
152
+ - **Entity Summarization Agent**: Summarize entity relationships
153
+ - **Query Explanation Agent**: Explain REM query results in natural language
154
+ - **Schema Discovery Agent**: Discover available tables and fields
155
+ - **Data Quality Agent**: Identify data quality issues in entities
@@ -0,0 +1,8 @@
1
+ """
2
+ REM Agents - Specialized agents for REM operations.
3
+
4
+ All agents are defined as YAML schemas in src/rem/schemas/agents/.
5
+ Use create_agent_from_schema_file() to instantiate agents.
6
+ """
7
+
8
+ __all__ = []
rem/agentic/context.py ADDED
@@ -0,0 +1,148 @@
1
+ """
2
+ Agent execution context and configuration.
3
+
4
+ Design pattern for session context that can be constructed from:
5
+ - HTTP headers (X-User-Id, X-Session-Id, X-Model-Name)
6
+ - Direct instantiation for testing/CLI
7
+
8
+ Key Design Pattern
9
+ - AgentContext is passed to agent factory, not stored in agents
10
+ - Enables session tracking across API, CLI, and test execution
11
+ - Supports header-based configuration override (model, schema URI)
12
+ - Clean separation: context (who/what) vs agent (how)
13
+ """
14
+
15
+ from loguru import logger
16
+ from pydantic import BaseModel, Field
17
+
18
+ from ..settings import settings
19
+
20
+
21
+ class AgentContext(BaseModel):
22
+ """
23
+ Session and configuration context for agent execution.
24
+
25
+ Provides session identifiers (user_id, tenant_id, session_id) and
26
+ configuration defaults (model) for agent factory and execution.
27
+
28
+ Design Pattern
29
+ - Construct from HTTP headers via from_headers()
30
+ - Pass to agent factory, not stored in agent
31
+ - Enables header-based model/schema override
32
+ - Supports observability (user tracking, session continuity)
33
+
34
+ Example:
35
+ # From HTTP request
36
+ context = AgentContext.from_headers(request.headers)
37
+ agent = await create_agent(context)
38
+
39
+ # Direct construction for testing
40
+ context = AgentContext(user_id="test-user", tenant_id="test-tenant")
41
+ agent = await create_agent(context)
42
+ """
43
+
44
+ user_id: str | None = Field(
45
+ default=None,
46
+ description="User identifier for tracking and personalization",
47
+ )
48
+
49
+ tenant_id: str = Field(
50
+ default="default",
51
+ description="Tenant identifier for multi-tenancy isolation (REM requirement)",
52
+ )
53
+
54
+ session_id: str | None = Field(
55
+ default=None,
56
+ description="Session/conversation identifier for continuity",
57
+ )
58
+
59
+ default_model: str = Field(
60
+ default_factory=lambda: settings.llm.default_model,
61
+ description="Default LLM model (can be overridden via headers)",
62
+ )
63
+
64
+ agent_schema_uri: str | None = Field(
65
+ default=None,
66
+ description="Agent schema URI (e.g., 'rem-agents-query-agent')",
67
+ )
68
+
69
+ model_config = {"populate_by_name": True}
70
+
71
+ @staticmethod
72
+ def get_user_id_or_default(
73
+ user_id: str | None,
74
+ source: str = "context",
75
+ default: str = "default",
76
+ ) -> str:
77
+ """
78
+ Get user_id or fallback to default with logging.
79
+
80
+ Centralized helper for consistent user_id fallback behavior across
81
+ API endpoints, MCP tools, CLI commands, and services.
82
+
83
+ Args:
84
+ user_id: User identifier (may be None)
85
+ source: Source of the call (for logging clarity)
86
+ default: Default value to use (default: "default")
87
+
88
+ Returns:
89
+ user_id if provided, otherwise default
90
+
91
+ Example:
92
+ # In MCP tool
93
+ user_id = AgentContext.get_user_id_or_default(
94
+ user_id, source="ask_rem_agent"
95
+ )
96
+
97
+ # In API endpoint
98
+ user_id = AgentContext.get_user_id_or_default(
99
+ temp_context.user_id, source="chat_completions"
100
+ )
101
+
102
+ # In CLI command
103
+ user_id = AgentContext.get_user_id_or_default(
104
+ args.user_id, source="rem ask"
105
+ )
106
+ """
107
+ if user_id is None:
108
+ logger.debug(f"No user_id provided from {source}, using '{default}'")
109
+ return default
110
+ return user_id
111
+
112
+ @classmethod
113
+ def from_headers(cls, headers: dict[str, str]) -> "AgentContext":
114
+ """
115
+ Construct AgentContext from HTTP headers.
116
+
117
+ Reads standard headers:
118
+ - X-User-Id: User identifier
119
+ - X-Tenant-Id: Tenant identifier
120
+ - X-Session-Id: Session identifier
121
+ - X-Model-Name: Model override
122
+ - X-Agent-Schema: Agent schema URI
123
+
124
+ Args:
125
+ headers: Dictionary of HTTP headers (case-insensitive)
126
+
127
+ Returns:
128
+ AgentContext with values from headers
129
+
130
+ Example:
131
+ headers = {
132
+ "X-User-Id": "user123",
133
+ "X-Tenant-Id": "acme-corp",
134
+ "X-Session-Id": "sess-456",
135
+ "X-Model-Name": "anthropic:claude-opus-4-20250514"
136
+ }
137
+ context = AgentContext.from_headers(headers)
138
+ """
139
+ # Normalize header keys to lowercase for case-insensitive lookup
140
+ normalized = {k.lower(): v for k, v in headers.items()}
141
+
142
+ return cls(
143
+ user_id=normalized.get("x-user-id"),
144
+ tenant_id=normalized.get("x-tenant-id", "default"),
145
+ session_id=normalized.get("x-session-id"),
146
+ default_model=normalized.get("x-model-name") or settings.llm.default_model,
147
+ agent_schema_uri=normalized.get("x-agent-schema"),
148
+ )
@@ -0,0 +1,329 @@
1
+ """
2
+ Centralized context builder for agent execution.
3
+
4
+ Session History (ALWAYS loaded with compression):
5
+ - Each chat request is a single message, so session history MUST be recovered
6
+ - Uses SessionMessageStore with compression to keep context efficient
7
+ - Long assistant responses include REM LOOKUP hints: "... [REM LOOKUP session-{id}-msg-{index}] ..."
8
+ - Agent can retrieve full content on-demand using REM LOOKUP
9
+ - Prevents context window bloat while maintaining conversation continuity
10
+
11
+ User Context (on-demand by default):
12
+ - System message includes REM LOOKUP hint for user profile
13
+ - Agent decides whether to load profile based on query
14
+ - More efficient for queries that don't need personalization
15
+ - Example: "User ID: sarah@example.com. To load user profile: Use REM LOOKUP users/sarah@example.com"
16
+
17
+ User Context (auto-inject when enabled):
18
+ - Set CHAT__AUTO_INJECT_USER_CONTEXT=true
19
+ - User profile automatically loaded from database and injected into system message
20
+ - Simpler for basic chatbots that always need context
21
+
22
+ Design Pattern:
23
+ 1. Extract AgentContext from headers (user_id, tenant_id, session_id)
24
+ 2. If auto-inject enabled: Load User/Session from database
25
+ 3. If auto-inject disabled: Provide REM LOOKUP hints in system message
26
+ 4. Construct system message with date + context (injected or hints)
27
+ 5. Return complete context ready for agent execution
28
+
29
+ Integration Points:
30
+ - API endpoints: build_from_headers() extracts user context from JWT/session headers
31
+ - Tests: build_from_test() creates minimal test context without DB
32
+ - Settings: CHAT__AUTO_INJECT_* controls auto-inject vs on-demand behavior
33
+
34
+ Usage (on-demand, default):
35
+ # From FastAPI endpoint
36
+ context, messages = await ContextBuilder.build_from_headers(
37
+ headers=request.headers,
38
+ new_messages=[{"role": "user", "content": "What's next for the API migration?"}]
39
+ )
40
+
41
+ # Messages list structure (on-demand):
42
+ # [
43
+ # {"role": "system", "content": "Today's date: 2025-11-22\nUser ID: sarah@example.com\nTo load user profile: Use REM LOOKUP users/sarah@example.com\nSession ID: sess-123\nTo load session history: Use REM LOOKUP messages?session_id=sess-123"},
44
+ # {"role": "user", "content": "What's next for the API migration?"}
45
+ # ]
46
+
47
+ # Agent receives hints and can decide to load context if needed
48
+ agent = await create_agent(context=context, ...)
49
+ prompt = "\n".join(msg.content for msg in messages)
50
+ result = await agent.run(prompt)
51
+
52
+ Usage (auto-inject, CHAT__AUTO_INJECT_USER_CONTEXT=true):
53
+ # Messages list structure (auto-inject):
54
+ # [
55
+ # {"role": "system", "content": "Today's date: 2025-11-22\n\nUser Context (auto-injected):\nSummary: ...\nInterests: ...\n\nSession History (auto-injected, 5 messages):"},
56
+ # {"role": "user", "content": "Previous message"},
57
+ # {"role": "assistant", "content": "Previous response"},
58
+ # {"role": "user", "content": "What's next for the API migration?"}
59
+ # ]
60
+
61
+ Testing:
62
+ # From CLI/test (no database)
63
+ context, messages = await ContextBuilder.build_from_test(
64
+ user_id="test@rem.ai",
65
+ tenant_id="test-tenant",
66
+ message="Hello"
67
+ )
68
+ """
69
+
70
+ from datetime import datetime, timezone
71
+ from typing import Any
72
+
73
+ from loguru import logger
74
+ from pydantic import BaseModel
75
+
76
+ from .context import AgentContext
77
+ from ..models.entities.user import User
78
+ from ..models.entities.message import Message
79
+ from ..services.postgres.repository import Repository
80
+ from ..services.postgres.service import PostgresService
81
+
82
+
83
+ class ContextMessage(BaseModel):
84
+ """Standard message format for LLM conversations."""
85
+
86
+ role: str # "system", "user", "assistant"
87
+ content: str
88
+
89
+
90
+ class ContextBuilder:
91
+ """
92
+ Centralized builder for agent execution context.
93
+
94
+ Handles:
95
+ - User profile loading from database
96
+ - Session history recovery
97
+ - Context message construction
98
+ - Test context generation
99
+ """
100
+
101
+ @staticmethod
102
+ async def build_from_headers(
103
+ headers: dict[str, str],
104
+ new_messages: list[dict[str, str]] | None = None,
105
+ db: PostgresService | None = None,
106
+ ) -> tuple[AgentContext, list[ContextMessage]]:
107
+ """
108
+ Build complete context from HTTP headers.
109
+
110
+ Session History (ALWAYS loaded with compression):
111
+ - If session_id provided, session history is ALWAYS loaded using SessionMessageStore
112
+ - Compression keeps it efficient with REM LOOKUP hints for long messages
113
+ - Example: "... [Message truncated - REM LOOKUP session-{id}-msg-{index}] ..."
114
+ - Agent can retrieve full content on-demand using REM LOOKUP
115
+
116
+ User Context (on-demand by default):
117
+ - System message includes REM LOOKUP hint: "User ID: {user_id}. To load user profile: Use REM LOOKUP users/{user_id}"
118
+ - Agent decides whether to load profile based on query
119
+
120
+ User Context (auto-inject when enabled):
121
+ - Set CHAT__AUTO_INJECT_USER_CONTEXT=true
122
+ - User profile automatically loaded and injected into system message
123
+
124
+ Args:
125
+ headers: HTTP request headers (case-insensitive)
126
+ new_messages: New messages from current request
127
+ db: Optional PostgresService (creates if None)
128
+
129
+ Returns:
130
+ Tuple of (AgentContext, messages list)
131
+
132
+ Example:
133
+ headers = {"X-User-Id": "sarah@example.com", "X-Session-Id": "sess-123"}
134
+ context, messages = await ContextBuilder.build_from_headers(headers, new_messages)
135
+
136
+ # messages structure:
137
+ # [
138
+ # {"role": "system", "content": "Today's date: 2025-11-22\nUser ID: sarah@example.com\nTo load user profile: Use REM LOOKUP users/sarah@example.com"},
139
+ # {"role": "user", "content": "Previous message"},
140
+ # {"role": "assistant", "content": "Start of long response... [REM LOOKUP session-123-msg-1] ...end"},
141
+ # {"role": "user", "content": "New message"}
142
+ # ]
143
+ """
144
+ from ..settings import settings
145
+ from ..services.session.compression import SessionMessageStore
146
+
147
+ # Extract AgentContext from headers
148
+ context = AgentContext.from_headers(headers)
149
+
150
+ # Initialize DB if not provided and needed (for user context or session history)
151
+ close_db = False
152
+ if db is None and (settings.chat.auto_inject_user_context or context.session_id):
153
+ from ..services.postgres import get_postgres_service
154
+ db = get_postgres_service()
155
+ if db:
156
+ await db.connect()
157
+ close_db = True
158
+
159
+ try:
160
+ # Build messages list
161
+ messages: list[ContextMessage] = []
162
+
163
+ # Build context hint message
164
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
165
+ context_hint = f"Today's date: {today}."
166
+
167
+ # Add user context (auto-inject or on-demand hint)
168
+ if settings.chat.auto_inject_user_context and context.user_id and db:
169
+ # Auto-inject: Load and include user profile
170
+ user_context_content = await ContextBuilder._load_user_context(
171
+ user_id=context.user_id,
172
+ tenant_id=context.tenant_id,
173
+ db=db,
174
+ )
175
+ if user_context_content:
176
+ context_hint += f"\n\nUser Context (auto-injected):\n{user_context_content}"
177
+ else:
178
+ context_hint += "\n\nNo user context available (anonymous or new user)."
179
+ elif context.user_id:
180
+ # On-demand: Provide hint to use REM LOOKUP
181
+ context_hint += f"\n\nUser ID: {context.user_id}"
182
+ context_hint += f"\nTo load user profile: Use REM LOOKUP users/{context.user_id}"
183
+
184
+ # Add system context hint
185
+ messages.append(ContextMessage(role="system", content=context_hint))
186
+
187
+ # ALWAYS load session history (if session_id provided) with compression
188
+ if context.session_id and settings.postgres.enabled:
189
+ store = SessionMessageStore(user_id=context.user_id or "default")
190
+ session_history = await store.load_session_messages(
191
+ session_id=context.session_id,
192
+ user_id=context.user_id,
193
+ decompress=False, # Use compressed versions with REM LOOKUP hints
194
+ )
195
+
196
+ # Convert to ContextMessage format
197
+ for msg_dict in session_history:
198
+ messages.append(
199
+ ContextMessage(
200
+ role=msg_dict["role"],
201
+ content=msg_dict["content"],
202
+ )
203
+ )
204
+
205
+ logger.debug(f"Loaded {len(session_history)} compressed messages for session {context.session_id}")
206
+
207
+ # Add new messages from request
208
+ if new_messages:
209
+ for msg in new_messages:
210
+ messages.append(ContextMessage(**msg))
211
+
212
+ return context, messages
213
+
214
+ finally:
215
+ if close_db and db:
216
+ await db.disconnect()
217
+
218
+ @staticmethod
219
+ async def _load_user_context(
220
+ user_id: str | None,
221
+ tenant_id: str,
222
+ db: PostgresService,
223
+ ) -> str | None:
224
+ """
225
+ Load user profile from database and format as context.
226
+
227
+ Returns formatted string with:
228
+ - User summary (generated by dreaming worker)
229
+ - Current projects
230
+ - Technical interests
231
+ - Preferred topics
232
+
233
+ Returns None if user_id not provided or user not found.
234
+ """
235
+ if not user_id:
236
+ return None
237
+
238
+ try:
239
+ user_repo = Repository(User, "users", db=db)
240
+ user = await user_repo.get_by_id(user_id, tenant_id)
241
+
242
+ if not user:
243
+ logger.debug(f"User {user_id} not found in tenant {tenant_id}")
244
+ return None
245
+
246
+ # Build user context string
247
+ parts = []
248
+
249
+ if user.summary:
250
+ parts.append(f"Summary: {user.summary}")
251
+
252
+ if user.interests:
253
+ parts.append(f"Interests: {', '.join(user.interests[:5])}")
254
+
255
+ if user.preferred_topics:
256
+ parts.append(f"Topics: {', '.join(user.preferred_topics[:5])}")
257
+
258
+ # Add full profile from metadata if available
259
+ if user.metadata and "profile" in user.metadata:
260
+ profile = user.metadata["profile"]
261
+
262
+ if profile.get("current_projects"):
263
+ projects = profile["current_projects"]
264
+ project_names = [p.get("name", "Unnamed") for p in projects[:3]]
265
+ parts.append(f"Current Projects: {', '.join(project_names)}")
266
+
267
+ if not parts:
268
+ return None
269
+
270
+ return "\n".join(parts)
271
+
272
+ except Exception as e:
273
+ logger.error(f"Failed to load user context: {e}")
274
+ return None
275
+
276
+
277
+ @staticmethod
278
+ async def build_from_test(
279
+ user_id: str = "test@rem.ai",
280
+ tenant_id: str = "test-tenant",
281
+ session_id: str | None = None,
282
+ message: str = "Hello",
283
+ model: str | None = None,
284
+ ) -> tuple[AgentContext, list[ContextMessage]]:
285
+ """
286
+ Build context for testing (no database lookup).
287
+
288
+ Creates minimal context with:
289
+ - Test user (test@rem.ai)
290
+ - Test tenant
291
+ - Context hint with date
292
+ - Single user message
293
+
294
+ Args:
295
+ user_id: Test user identifier (default: test@rem.ai)
296
+ tenant_id: Test tenant identifier
297
+ session_id: Optional session ID
298
+ message: User message content
299
+ model: Optional model override
300
+
301
+ Returns:
302
+ Tuple of (AgentContext, messages list)
303
+
304
+ Example:
305
+ context, messages = await ContextBuilder.build_from_test(
306
+ user_id="test@rem.ai",
307
+ message="What's the weather like?"
308
+ )
309
+ """
310
+ from ..settings import settings
311
+
312
+ # Create test context
313
+ context = AgentContext(
314
+ user_id=user_id,
315
+ tenant_id=tenant_id,
316
+ session_id=session_id,
317
+ default_model=model or settings.llm.default_model,
318
+ )
319
+
320
+ # Build minimal messages
321
+ today = datetime.now(timezone.utc).strftime("%Y-%m-%d")
322
+ context_hint = f"Today's date: {today}.\n\nTest user context: {user_id} (test mode, no profile loaded)."
323
+
324
+ messages = [
325
+ ContextMessage(role="system", content=context_hint),
326
+ ContextMessage(role="user", content=message),
327
+ ]
328
+
329
+ return context, messages
File without changes