remdb 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +806 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.0.dist-info/METADATA +1455 -0
- remdb-0.3.0.dist-info/RECORD +187 -0
- remdb-0.3.0.dist-info/WHEEL +4 -0
- remdb-0.3.0.dist-info/entry_points.txt +2 -0
rem/__init__.py
ADDED
rem/agentic/README.md
ADDED
|
@@ -0,0 +1,650 @@
|
|
|
1
|
+
# REM Agentic Framework
|
|
2
|
+
|
|
3
|
+
This folder contains the core agentic framework for REM, providing a provider-agnostic abstraction for building and executing AI agents with structured outputs, MCP tool integration, and observability.
|
|
4
|
+
|
|
5
|
+
## Architecture Overview
|
|
6
|
+
|
|
7
|
+
```
|
|
8
|
+
agentic/
|
|
9
|
+
├── README.md # This file
|
|
10
|
+
├── context.py # AgentContext - session and configuration
|
|
11
|
+
├── query.py # AgentQuery - structured agent input
|
|
12
|
+
├── agents/ # Built-in agents
|
|
13
|
+
│ └── rem_query_agent.py # Natural language to REM query conversion
|
|
14
|
+
└── providers/
|
|
15
|
+
└── pydantic_ai.py # Pydantic AI agent factory
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Core Concepts
|
|
19
|
+
|
|
20
|
+
### 1. Agent Schemas (JSON Schema)
|
|
21
|
+
|
|
22
|
+
Agent schemas are **JSON Schema documents** that define both the agent's behavior AND structured output format. This design pattern comes from the `carrier` project and provides several key advantages:
|
|
23
|
+
|
|
24
|
+
- **Single source of truth**: One schema defines prompt, output structure, and tool configuration
|
|
25
|
+
- **Framework-agnostic**: JSON Schema can be converted to any framework's format
|
|
26
|
+
- **Tooling support**: JSON Schema validators, editors, and documentation generators work out of the box
|
|
27
|
+
|
|
28
|
+
#### Agent Schema Structure
|
|
29
|
+
|
|
30
|
+
```json
|
|
31
|
+
{
|
|
32
|
+
"type": "object",
|
|
33
|
+
"description": "SYSTEM PROMPT: This description becomes the agent's system prompt. Explain the agent's purpose, capabilities, and behavioral guidelines here.",
|
|
34
|
+
"properties": {
|
|
35
|
+
"answer": {
|
|
36
|
+
"type": "string",
|
|
37
|
+
"description": "The query answer with supporting evidence"
|
|
38
|
+
},
|
|
39
|
+
"confidence": {
|
|
40
|
+
"type": "number",
|
|
41
|
+
"minimum": 0,
|
|
42
|
+
"maximum": 1,
|
|
43
|
+
"description": "Confidence score for the answer"
|
|
44
|
+
},
|
|
45
|
+
"sources": {
|
|
46
|
+
"type": "array",
|
|
47
|
+
"items": {"type": "string"},
|
|
48
|
+
"description": "Entity keys used as evidence"
|
|
49
|
+
}
|
|
50
|
+
},
|
|
51
|
+
"required": ["answer", "confidence"],
|
|
52
|
+
"json_schema_extra": {
|
|
53
|
+
"fully_qualified_name": "rem.agents.QueryAgent",
|
|
54
|
+
"tools": [
|
|
55
|
+
{"name": "lookup_entity", "mcp_server": "rem"},
|
|
56
|
+
{"name": "search_knowledge", "mcp_server": "rem"}
|
|
57
|
+
],
|
|
58
|
+
"resources": [
|
|
59
|
+
{"uri_pattern": "rem://resources/.*", "mcp_server": "rem"}
|
|
60
|
+
]
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
#### Key Schema Sections
|
|
66
|
+
|
|
67
|
+
##### `description` - System Prompt
|
|
68
|
+
The top-level `description` field contains the **system prompt** for the agent. This is where you define:
|
|
69
|
+
- Agent's role and purpose
|
|
70
|
+
- Behavioral guidelines
|
|
71
|
+
- Task-specific instructions
|
|
72
|
+
- Output formatting requirements
|
|
73
|
+
|
|
74
|
+
##### `properties` - Structured Output Fields
|
|
75
|
+
The `properties` section defines the **structured output schema**. Each property:
|
|
76
|
+
- Has a JSON Schema type (`string`, `number`, `array`, `object`, etc.)
|
|
77
|
+
- Has a `description` explaining what the field should contain
|
|
78
|
+
- May have validation constraints (`minimum`, `maximum`, `pattern`, etc.)
|
|
79
|
+
|
|
80
|
+
##### `json_schema_extra` - REM Extensions (see `schema.py`)
|
|
81
|
+
|
|
82
|
+
The `json_schema_extra` section contains REM-specific metadata. See `agentic/schema.py` for the complete Pydantic model documentation with detailed field descriptions.
|
|
83
|
+
|
|
84
|
+
**Core Fields:**
|
|
85
|
+
|
|
86
|
+
- **`fully_qualified_name`** (required): Python module path (e.g., `rem.agents.QueryAgent`)
|
|
87
|
+
- **`name`** (optional): Human-readable agent name (e.g., "Query Agent")
|
|
88
|
+
- **`short_name`** (optional): URL-safe identifier (e.g., "query-agent")
|
|
89
|
+
- **`version`** (optional): Semantic version (e.g., "1.0.0")
|
|
90
|
+
- **`tools`** (optional): MCP tools available to the agent
|
|
91
|
+
```json
|
|
92
|
+
{
|
|
93
|
+
"name": "lookup_entity",
|
|
94
|
+
"mcp_server": "rem",
|
|
95
|
+
"description": "Optional override description"
|
|
96
|
+
}
|
|
97
|
+
```
|
|
98
|
+
- **`resources`** (optional): MCP resources accessible to the agent
|
|
99
|
+
```json
|
|
100
|
+
{
|
|
101
|
+
"uri_pattern": "rem://resources/.*",
|
|
102
|
+
"mcp_server": "rem"
|
|
103
|
+
}
|
|
104
|
+
```
|
|
105
|
+
- **`tags`** (optional): Categorization tags (e.g., `["query", "knowledge-graph"]`)
|
|
106
|
+
- **`author`** (optional): Agent author or team
|
|
107
|
+
|
|
108
|
+
**Complete Protocol Documentation:**
|
|
109
|
+
|
|
110
|
+
See `rem/src/rem/agentic/schema.py` for:
|
|
111
|
+
- `AgentSchema`: Complete schema structure with validation
|
|
112
|
+
- `AgentSchemaMetadata`: REM-specific metadata fields
|
|
113
|
+
- `MCPToolReference`: Tool configuration structure
|
|
114
|
+
- `MCPResourceReference`: Resource pattern structure
|
|
115
|
+
- Helper functions: `validate_agent_schema()`, `create_agent_schema()`
|
|
116
|
+
|
|
117
|
+
#### Working with Agent Schemas
|
|
118
|
+
|
|
119
|
+
**Validate a schema:**
|
|
120
|
+
|
|
121
|
+
```python
|
|
122
|
+
from rem.agentic.schema import validate_agent_schema
|
|
123
|
+
|
|
124
|
+
# Load schema from file
|
|
125
|
+
import json
|
|
126
|
+
with open("agents/query_agent.json") as f:
|
|
127
|
+
schema_dict = json.load(f)
|
|
128
|
+
|
|
129
|
+
# Validate structure
|
|
130
|
+
validated = validate_agent_schema(schema_dict)
|
|
131
|
+
|
|
132
|
+
# Access metadata
|
|
133
|
+
print(validated.json_schema_extra.fully_qualified_name)
|
|
134
|
+
# "rem.agents.QueryAgent"
|
|
135
|
+
|
|
136
|
+
print(validated.json_schema_extra.version)
|
|
137
|
+
# "1.0.0"
|
|
138
|
+
|
|
139
|
+
print(validated.json_schema_extra.tools[0].name)
|
|
140
|
+
# "lookup_entity"
|
|
141
|
+
```
|
|
142
|
+
|
|
143
|
+
**Create a schema programmatically:**
|
|
144
|
+
|
|
145
|
+
```python
|
|
146
|
+
from rem.agentic.schema import create_agent_schema
|
|
147
|
+
|
|
148
|
+
schema = create_agent_schema(
|
|
149
|
+
description=(
|
|
150
|
+
"You are a helpful assistant that answers questions.\n\n"
|
|
151
|
+
"Guidelines:\n"
|
|
152
|
+
"- Be concise and accurate\n"
|
|
153
|
+
"- Cite sources when available\n"
|
|
154
|
+
"- Acknowledge uncertainty"
|
|
155
|
+
),
|
|
156
|
+
properties={
|
|
157
|
+
"answer": {
|
|
158
|
+
"type": "string",
|
|
159
|
+
"description": "Clear, concise answer to the question"
|
|
160
|
+
},
|
|
161
|
+
"sources": {
|
|
162
|
+
"type": "array",
|
|
163
|
+
"items": {"type": "string"},
|
|
164
|
+
"description": "List of source entity keys"
|
|
165
|
+
},
|
|
166
|
+
"confidence": {
|
|
167
|
+
"type": "number",
|
|
168
|
+
"minimum": 0,
|
|
169
|
+
"maximum": 1,
|
|
170
|
+
"description": "Confidence score (0-1)"
|
|
171
|
+
}
|
|
172
|
+
},
|
|
173
|
+
required=["answer", "confidence"],
|
|
174
|
+
fully_qualified_name="rem.agents.Assistant",
|
|
175
|
+
tools=[
|
|
176
|
+
{"name": "search", "mcp_server": "rem"},
|
|
177
|
+
{"name": "lookup", "mcp_server": "rem"}
|
|
178
|
+
],
|
|
179
|
+
resources=[
|
|
180
|
+
{"uri_pattern": "rem://.*", "mcp_server": "rem"}
|
|
181
|
+
],
|
|
182
|
+
version="1.0.0",
|
|
183
|
+
tags=["assistant", "general"],
|
|
184
|
+
author="REM Team"
|
|
185
|
+
)
|
|
186
|
+
|
|
187
|
+
# Convert to dict for JSON serialization
|
|
188
|
+
schema_dict = schema.model_dump(exclude_none=True)
|
|
189
|
+
|
|
190
|
+
# Save to file
|
|
191
|
+
import json
|
|
192
|
+
with open("agents/assistant.json", "w") as f:
|
|
193
|
+
json.dump(schema_dict, f, indent=2)
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
**Schema versioning best practices:**
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
# Version format: MAJOR.MINOR.PATCH
|
|
200
|
+
|
|
201
|
+
# MAJOR: Breaking changes to schema structure
|
|
202
|
+
# - Removing required fields
|
|
203
|
+
# - Changing field types
|
|
204
|
+
# - Removing tools that agents depend on
|
|
205
|
+
# Example: "1.0.0" → "2.0.0"
|
|
206
|
+
|
|
207
|
+
# MINOR: Backward-compatible additions
|
|
208
|
+
# - Adding optional fields
|
|
209
|
+
# - Adding new tools
|
|
210
|
+
# - Expanding allowed values
|
|
211
|
+
# Example: "1.0.0" → "1.1.0"
|
|
212
|
+
|
|
213
|
+
# PATCH: Bug fixes and clarifications
|
|
214
|
+
# - Fixing typos in descriptions
|
|
215
|
+
# - Clarifying field documentation
|
|
216
|
+
# - Updating examples
|
|
217
|
+
# Example: "1.0.0" → "1.0.1"
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
### 2. Pydantic AI Integration
|
|
221
|
+
|
|
222
|
+
The `providers/pydantic_ai.py` module converts JSON Schema agents to Pydantic AI format.
|
|
223
|
+
|
|
224
|
+
#### JSON Schema → Pydantic Model Conversion
|
|
225
|
+
|
|
226
|
+
Pydantic AI requires a Pydantic `BaseModel` for structured output. We convert JSON Schema to Pydantic using the `json-schema-to-pydantic` library:
|
|
227
|
+
|
|
228
|
+
```python
|
|
229
|
+
from json_schema_to_pydantic import PydanticModelBuilder
|
|
230
|
+
|
|
231
|
+
builder = PydanticModelBuilder()
|
|
232
|
+
OutputModel = builder.create_pydantic_model(
|
|
233
|
+
agent_schema,
|
|
234
|
+
root_schema=agent_schema
|
|
235
|
+
)
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
This handles:
|
|
239
|
+
- Nested objects
|
|
240
|
+
- Arrays and validation constraints
|
|
241
|
+
- Required fields
|
|
242
|
+
- Type conversions (JSON Schema types → Python types)
|
|
243
|
+
|
|
244
|
+
#### Pydantic Docstrings vs JSON Schema Descriptions
|
|
245
|
+
|
|
246
|
+
**Important Design Pattern**: Pydantic converts docstrings to JSON Schema `description` fields:
|
|
247
|
+
|
|
248
|
+
```python
|
|
249
|
+
class QueryOutput(BaseModel):
|
|
250
|
+
"""Agent output with answer and confidence.""" # ← This becomes schema description
|
|
251
|
+
answer: str = Field(description="The query answer")
|
|
252
|
+
confidence: float = Field(ge=0, le=1)
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
When sent to the LLM, the Pydantic model's **docstring duplicates the system prompt** (already in `schema.description`). To avoid redundancy and reduce token usage:
|
|
256
|
+
|
|
257
|
+
**We STRIP the model-level description from the response schema**:
|
|
258
|
+
|
|
259
|
+
```python
|
|
260
|
+
def _create_schema_wrapper(result_type: type[BaseModel], strip_description: bool = True):
|
|
261
|
+
"""Strip model docstring from schema sent to LLM."""
|
|
262
|
+
if not strip_description:
|
|
263
|
+
return result_type
|
|
264
|
+
|
|
265
|
+
class SchemaWrapper(result_type):
|
|
266
|
+
@classmethod
|
|
267
|
+
def model_json_schema(cls, **kwargs):
|
|
268
|
+
schema = super().model_json_schema(**kwargs)
|
|
269
|
+
schema.pop("description", None) # Remove top-level description
|
|
270
|
+
return schema
|
|
271
|
+
|
|
272
|
+
return SchemaWrapper
|
|
273
|
+
```
|
|
274
|
+
|
|
275
|
+
**Why this matters**:
|
|
276
|
+
- JSON Schema `description` → System prompt (tells agent WHAT to do)
|
|
277
|
+
- Pydantic model schema → Response format (tells agent HOW to structure output)
|
|
278
|
+
- Stripping the model description prevents sending the same instructions twice
|
|
279
|
+
|
|
280
|
+
### 3. MCP Tool Integration
|
|
281
|
+
|
|
282
|
+
REM agents **only support MCP (Model Context Protocol) tools and resources**. This design decision provides:
|
|
283
|
+
|
|
284
|
+
- **Standardization**: All tools use the same protocol
|
|
285
|
+
- **Interoperability**: MCP tools work across different agent frameworks
|
|
286
|
+
- **Simplicity**: Single integration point instead of framework-specific tool APIs
|
|
287
|
+
|
|
288
|
+
#### Tool Configuration
|
|
289
|
+
|
|
290
|
+
Tools are declared in `json_schema_extra.tools`:
|
|
291
|
+
|
|
292
|
+
```json
|
|
293
|
+
{
|
|
294
|
+
"tools": [
|
|
295
|
+
{
|
|
296
|
+
"name": "lookup_entity",
|
|
297
|
+
"mcp_server": "rem",
|
|
298
|
+
"description": "Optional override for tool description"
|
|
299
|
+
}
|
|
300
|
+
]
|
|
301
|
+
}
|
|
302
|
+
```
|
|
303
|
+
|
|
304
|
+
The `mcp_server` value maps to environment variables:
|
|
305
|
+
- `rem` → `MCP_SERVER_REM` (e.g., `http://rem-mcp:8000`)
|
|
306
|
+
- `search` → `MCP_SERVER_SEARCH`
|
|
307
|
+
|
|
308
|
+
#### Resource Configuration
|
|
309
|
+
|
|
310
|
+
Resources are MCP's way of exposing retrievable content:
|
|
311
|
+
|
|
312
|
+
```json
|
|
313
|
+
{
|
|
314
|
+
"resources": [
|
|
315
|
+
{
|
|
316
|
+
"uri_pattern": "rem://resources/.*",
|
|
317
|
+
"mcp_server": "rem"
|
|
318
|
+
}
|
|
319
|
+
]
|
|
320
|
+
}
|
|
321
|
+
```
|
|
322
|
+
|
|
323
|
+
Resources are converted to tool calls by the provider factory.
|
|
324
|
+
|
|
325
|
+
### 4. Agent Context
|
|
326
|
+
|
|
327
|
+
`AgentContext` (defined in `context.py`) provides session and configuration for agent execution:
|
|
328
|
+
|
|
329
|
+
```python
|
|
330
|
+
class AgentContext(BaseModel):
|
|
331
|
+
user_id: str | None # User identifier
|
|
332
|
+
tenant_id: str # Tenant for multi-tenancy (REM requirement)
|
|
333
|
+
session_id: str | None # Conversation/session ID
|
|
334
|
+
default_model: str # LLM model name
|
|
335
|
+
agent_schema_uri: str | None # Schema reference (e.g., "rem-agents-query-agent")
|
|
336
|
+
```
|
|
337
|
+
|
|
338
|
+
#### Context Headers
|
|
339
|
+
|
|
340
|
+
Contexts can be constructed from HTTP headers:
|
|
341
|
+
|
|
342
|
+
| Header | Field | Example |
|
|
343
|
+
|--------|-------|---------|
|
|
344
|
+
| `X-User-Id` | `user_id` | `user123` |
|
|
345
|
+
| `X-Tenant-Id` | `tenant_id` | `acme-corp` |
|
|
346
|
+
| `X-Session-Id` | `session_id` | `sess-456` |
|
|
347
|
+
| `X-Model-Name` | `default_model` | `anthropic:claude-sonnet-4-5-20250929` |
|
|
348
|
+
| `X-Agent-Schema` | `agent_schema_uri` | `rem-agents-query-agent` |
|
|
349
|
+
|
|
350
|
+
```python
|
|
351
|
+
context = AgentContext.from_headers(request.headers)
|
|
352
|
+
agent = await create_pydantic_ai_agent(context)
|
|
353
|
+
```
|
|
354
|
+
|
|
355
|
+
### 5. Agent Query
|
|
356
|
+
|
|
357
|
+
`AgentQuery` (defined in `query.py`) provides structured input for agent execution:
|
|
358
|
+
|
|
359
|
+
```python
|
|
360
|
+
class AgentQuery(BaseModel):
|
|
361
|
+
query: str # Primary user question/task
|
|
362
|
+
knowledge: str # Retrieved context (markdown + fenced JSON)
|
|
363
|
+
scratchpad: str | dict # Working memory for multi-turn reasoning
|
|
364
|
+
```
|
|
365
|
+
|
|
366
|
+
Example usage:
|
|
367
|
+
|
|
368
|
+
```python
|
|
369
|
+
query = AgentQuery(
|
|
370
|
+
query="Find all documents Sarah authored",
|
|
371
|
+
knowledge="""
|
|
372
|
+
# Entity: sarah-chen
|
|
373
|
+
Type: person/employee
|
|
374
|
+
Role: Senior Engineer
|
|
375
|
+
Projects: [Project Alpha, TiDB Migration]
|
|
376
|
+
""",
|
|
377
|
+
scratchpad={"stage": "entity_lookup", "visited": ["sarah-chen"]}
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
prompt = query.to_prompt()
|
|
381
|
+
result = await agent.run(prompt)
|
|
382
|
+
```
|
|
383
|
+
|
|
384
|
+
## Provider Factories
|
|
385
|
+
|
|
386
|
+
### Pydantic AI Provider
|
|
387
|
+
|
|
388
|
+
The `providers/pydantic_ai.py` module is the reference implementation:
|
|
389
|
+
|
|
390
|
+
```python
|
|
391
|
+
agent = await create_pydantic_ai_agent(
|
|
392
|
+
context=context, # Optional: AgentContext with session info
|
|
393
|
+
agent_schema_override=schema, # Optional: Explicit schema (bypasses context.agent_schema_uri)
|
|
394
|
+
model_override="claude-opus-4", # Optional: Model override
|
|
395
|
+
result_type=OutputModel, # Optional: Pydantic model for output
|
|
396
|
+
strip_model_description=True # Strip docstring from response schema
|
|
397
|
+
)
|
|
398
|
+
```
|
|
399
|
+
|
|
400
|
+
**Key Implementation Details**:
|
|
401
|
+
|
|
402
|
+
1. **Schema Loading**: Loads agent schema from `context.agent_schema_uri` or uses `agent_schema_override`
|
|
403
|
+
2. **System Prompt**: Extracted from `schema.description`
|
|
404
|
+
3. **Dynamic Model**: Converts `schema.properties` to Pydantic model using `json-schema-to-pydantic`
|
|
405
|
+
4. **MCP Tools**: Loads tools from `schema.json_schema_extra.tools`
|
|
406
|
+
5. **OTEL Instrumentation**: Conditionally enabled via `settings.otel.enabled`
|
|
407
|
+
|
|
408
|
+
### Provider Interface
|
|
409
|
+
|
|
410
|
+
The framework is designed to support multiple agent frameworks if needed. Each provider implements the same interface:
|
|
411
|
+
- Input: JSON Schema + AgentContext
|
|
412
|
+
- Output: Framework-specific agent instance
|
|
413
|
+
- Responsibilities: Schema conversion, tool loading, model configuration
|
|
414
|
+
|
|
415
|
+
## Evaluators
|
|
416
|
+
|
|
417
|
+
Evaluators follow the **same pattern as agents** - they are JSON Schema documents that define:
|
|
418
|
+
- Evaluation criteria (in `description`)
|
|
419
|
+
- Structured evaluation output (in `properties`)
|
|
420
|
+
- Tools needed for evaluation (in `json_schema_extra`)
|
|
421
|
+
|
|
422
|
+
### Evaluator Schema Example
|
|
423
|
+
|
|
424
|
+
```json
|
|
425
|
+
{
|
|
426
|
+
"type": "object",
|
|
427
|
+
"description": "Evaluate agent responses for accuracy and completeness. Check if the answer correctly uses the provided evidence and addresses all aspects of the query.",
|
|
428
|
+
"properties": {
|
|
429
|
+
"accuracy_score": {
|
|
430
|
+
"type": "number",
|
|
431
|
+
"minimum": 0,
|
|
432
|
+
"maximum": 1,
|
|
433
|
+
"description": "How accurate is the answer based on evidence"
|
|
434
|
+
},
|
|
435
|
+
"completeness_score": {
|
|
436
|
+
"type": "number",
|
|
437
|
+
"minimum": 0,
|
|
438
|
+
"maximum": 1,
|
|
439
|
+
"description": "Does the answer address all parts of the query"
|
|
440
|
+
},
|
|
441
|
+
"reasoning": {
|
|
442
|
+
"type": "string",
|
|
443
|
+
"description": "Explanation of scores"
|
|
444
|
+
}
|
|
445
|
+
},
|
|
446
|
+
"required": ["accuracy_score", "completeness_score", "reasoning"],
|
|
447
|
+
"json_schema_extra": {
|
|
448
|
+
"fully_qualified_name": "rem.evaluators.AccuracyEvaluator",
|
|
449
|
+
"evaluation_type": "accuracy"
|
|
450
|
+
}
|
|
451
|
+
}
|
|
452
|
+
```
|
|
453
|
+
|
|
454
|
+
### Phoenix Integration (TODO)
|
|
455
|
+
|
|
456
|
+
Arize Phoenix supports OpenTelemetry-based LLM observability. Evaluators can be registered with Phoenix to:
|
|
457
|
+
|
|
458
|
+
1. **Trace Evaluation**: Automatically evaluate traced agent runs
|
|
459
|
+
2. **Dataset Evaluation**: Batch evaluate against test datasets
|
|
460
|
+
3. **Dashboard Integration**: View evaluation metrics in Phoenix UI
|
|
461
|
+
|
|
462
|
+
Example integration pattern:
|
|
463
|
+
|
|
464
|
+
```python
|
|
465
|
+
# TODO: Implement in future
|
|
466
|
+
from phoenix.evals import run_evals
|
|
467
|
+
from .providers.pydantic_ai import create_pydantic_ai_agent
|
|
468
|
+
|
|
469
|
+
# Load evaluator schema
|
|
470
|
+
evaluator_schema = load_schema("rem-evaluators-accuracy")
|
|
471
|
+
|
|
472
|
+
# Create evaluator agent
|
|
473
|
+
evaluator = await create_pydantic_ai_agent(
|
|
474
|
+
agent_schema_override=evaluator_schema
|
|
475
|
+
)
|
|
476
|
+
|
|
477
|
+
# Register with Phoenix
|
|
478
|
+
run_evals(
|
|
479
|
+
dataframe=traces_df,
|
|
480
|
+
evaluators=[evaluator],
|
|
481
|
+
provide_explanation=True
|
|
482
|
+
)
|
|
483
|
+
```
|
|
484
|
+
|
|
485
|
+
## Testing
|
|
486
|
+
|
|
487
|
+
### Example Agent Schemas
|
|
488
|
+
|
|
489
|
+
Test agent schemas live in `tests/data/agents/`:
|
|
490
|
+
|
|
491
|
+
```
|
|
492
|
+
tests/
|
|
493
|
+
└── data/
|
|
494
|
+
└── agents/
|
|
495
|
+
├── query_agent.json # Example query agent
|
|
496
|
+
├── summarization_agent.json # Example summarization agent
|
|
497
|
+
└── evaluators/
|
|
498
|
+
└── accuracy_evaluator.json
|
|
499
|
+
```
|
|
500
|
+
|
|
501
|
+
### Unit Tests
|
|
502
|
+
|
|
503
|
+
Unit tests for agent conversion are in `tests/unit/agentic/providers/`:
|
|
504
|
+
|
|
505
|
+
```python
|
|
506
|
+
# tests/unit/agentic/providers/test_pydantic_ai.py
|
|
507
|
+
import pytest
|
|
508
|
+
from rem.agentic.providers.pydantic_ai import create_pydantic_ai_agent
|
|
509
|
+
|
|
510
|
+
@pytest.mark.asyncio
|
|
511
|
+
async def test_agent_from_schema(query_agent_schema):
|
|
512
|
+
"""Test creating Pydantic AI agent from JSON Schema."""
|
|
513
|
+
agent = await create_pydantic_ai_agent(
|
|
514
|
+
agent_schema_override=query_agent_schema
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
assert agent is not None
|
|
518
|
+
assert agent.system_prompt == query_agent_schema["description"]
|
|
519
|
+
# Test structured output, tools, etc.
|
|
520
|
+
```
|
|
521
|
+
|
|
522
|
+
## Design Principles
|
|
523
|
+
|
|
524
|
+
1. **JSON Schema as Source of Truth**: All agent definitions start as JSON Schema
|
|
525
|
+
2. **Provider Agnostic**: Framework-specific implementations in `providers/`
|
|
526
|
+
3. **MCP Only**: Standardize on MCP for tool/resource integration
|
|
527
|
+
4. **Description Stripping**: Prevent prompt duplication in structured output schemas
|
|
528
|
+
5. **Evaluators = Agents**: Use same pattern for evaluation as for agent execution
|
|
529
|
+
6. **Observable by Default**: OTEL instrumentation built into agent factories
|
|
530
|
+
|
|
531
|
+
## Usage Example
|
|
532
|
+
|
|
533
|
+
```python
|
|
534
|
+
from rem.agentic.context import AgentContext
|
|
535
|
+
from rem.agentic.query import AgentQuery
|
|
536
|
+
from rem.agentic.providers.pydantic_ai import create_pydantic_ai_agent
|
|
537
|
+
|
|
538
|
+
# 1. Load agent schema
|
|
539
|
+
schema = {
|
|
540
|
+
"type": "object",
|
|
541
|
+
"description": "Answer REM queries using LOOKUP and TRAVERSE operations...",
|
|
542
|
+
"properties": {
|
|
543
|
+
"answer": {"type": "string"},
|
|
544
|
+
"confidence": {"type": "number", "minimum": 0, "maximum": 1}
|
|
545
|
+
},
|
|
546
|
+
"required": ["answer", "confidence"],
|
|
547
|
+
"json_schema_extra": {
|
|
548
|
+
"tools": [{"name": "lookup_entity", "mcp_server": "rem"}]
|
|
549
|
+
}
|
|
550
|
+
}
|
|
551
|
+
|
|
552
|
+
# 2. Create context
|
|
553
|
+
context = AgentContext(
|
|
554
|
+
user_id="user123",
|
|
555
|
+
tenant_id="acme-corp",
|
|
556
|
+
session_id="sess-456"
|
|
557
|
+
)
|
|
558
|
+
|
|
559
|
+
# 3. Create agent
|
|
560
|
+
agent = await create_pydantic_ai_agent(
|
|
561
|
+
context=context,
|
|
562
|
+
agent_schema_override=schema
|
|
563
|
+
)
|
|
564
|
+
|
|
565
|
+
# 4. Build query
|
|
566
|
+
query = AgentQuery(
|
|
567
|
+
query="Who manages Project Alpha?",
|
|
568
|
+
knowledge="# Recent LOOKUP results\n...",
|
|
569
|
+
scratchpad={"stage": "initial_lookup"}
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# 5. Execute
|
|
573
|
+
result = await agent.run(query.to_prompt())
|
|
574
|
+
|
|
575
|
+
print(result.data.answer)
|
|
576
|
+
print(f"Confidence: {result.data.confidence}")
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
## Built-in Agents
|
|
580
|
+
|
|
581
|
+
### REM Query Agent
|
|
582
|
+
|
|
583
|
+
The REM Query Agent converts natural language questions into structured REM queries. It's a specialized agent that understands the PostgreSQL dialect and knows when to use different query types.
|
|
584
|
+
|
|
585
|
+
**Location**: `rem/agentic/agents/rem_query_agent.py`
|
|
586
|
+
|
|
587
|
+
**Usage**:
|
|
588
|
+
|
|
589
|
+
```python
|
|
590
|
+
from rem.agentic.agents import ask_rem
|
|
591
|
+
|
|
592
|
+
# Simple usage
|
|
593
|
+
result = await ask_rem("Show me Sarah Chen")
|
|
594
|
+
# REMQueryOutput(
|
|
595
|
+
# query_type=QueryType.LOOKUP,
|
|
596
|
+
# parameters={"entity_key": "sarah-chen"},
|
|
597
|
+
# confidence=1.0,
|
|
598
|
+
# reasoning=None
|
|
599
|
+
# )
|
|
600
|
+
|
|
601
|
+
# With custom model
|
|
602
|
+
result = await ask_rem(
|
|
603
|
+
"Find documents about databases",
|
|
604
|
+
llm_model="gpt-4o-mini"
|
|
605
|
+
)
|
|
606
|
+
# REMQueryOutput(
|
|
607
|
+
# query_type=QueryType.SEARCH,
|
|
608
|
+
# parameters={
|
|
609
|
+
# "query_text": "database",
|
|
610
|
+
# "table_name": "resources",
|
|
611
|
+
# "field_name": "content",
|
|
612
|
+
# "limit": 10
|
|
613
|
+
# },
|
|
614
|
+
# confidence=0.95,
|
|
615
|
+
# reasoning=None
|
|
616
|
+
# )
|
|
617
|
+
```
|
|
618
|
+
|
|
619
|
+
**Query Types**:
|
|
620
|
+
|
|
621
|
+
- `LOOKUP` - O(1) entity lookup by natural key (fastest)
|
|
622
|
+
- `FUZZY` - Trigram text similarity (pg_trgm) for partial/misspelled names
|
|
623
|
+
- `SEARCH` - Semantic vector similarity using embeddings
|
|
624
|
+
- `SQL` - Direct table queries with WHERE clauses for temporal/filtered queries
|
|
625
|
+
- `TRAVERSE` - Recursive graph traversal for relationship exploration
|
|
626
|
+
|
|
627
|
+
**Settings**:
|
|
628
|
+
|
|
629
|
+
```python
|
|
630
|
+
# In .env or environment variables
|
|
631
|
+
LLM__QUERY_AGENT_MODEL=gpt-4o-mini # Fast, cheap model for query generation
|
|
632
|
+
```
|
|
633
|
+
|
|
634
|
+
If `query_agent_model` is not set, the agent uses `settings.llm.default_model`.
|
|
635
|
+
|
|
636
|
+
**Design**:
|
|
637
|
+
|
|
638
|
+
- Token-optimized output (minimal fields)
|
|
639
|
+
- Reasoning only provided when confidence < 0.7 or multi-step queries
|
|
640
|
+
- PostgreSQL dialect aware (knows about KV_STORE, embeddings tables)
|
|
641
|
+
- Can generate multi-step query plans for complex questions
|
|
642
|
+
|
|
643
|
+
## Future Work
|
|
644
|
+
|
|
645
|
+
- [ ] Phoenix evaluator integration
|
|
646
|
+
- [ ] Agent schema registry (load schemas by URI)
|
|
647
|
+
- [ ] Schema validation and versioning
|
|
648
|
+
- [ ] Multi-turn conversation management
|
|
649
|
+
- [ ] Agent composition (agents calling agents)
|
|
650
|
+
- [ ] Alternative provider implementations (if needed)
|
rem/agentic/__init__.py
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
"""
|
|
2
|
+
REM Agentic Framework.
|
|
3
|
+
|
|
4
|
+
Provider-agnostic agent orchestration with JSON Schema agents,
|
|
5
|
+
MCP tool integration, and structured output.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from .context import AgentContext
|
|
9
|
+
from .query import AgentQuery
|
|
10
|
+
from .schema import (
|
|
11
|
+
AgentSchema,
|
|
12
|
+
AgentSchemaMetadata,
|
|
13
|
+
MCPToolReference,
|
|
14
|
+
MCPResourceReference,
|
|
15
|
+
validate_agent_schema,
|
|
16
|
+
create_agent_schema,
|
|
17
|
+
)
|
|
18
|
+
from .providers.pydantic_ai import create_agent_from_schema_file, create_agent, AgentRuntime
|
|
19
|
+
from .query_helper import ask_rem, REMQueryOutput
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
# Context and Query
|
|
23
|
+
"AgentContext",
|
|
24
|
+
"AgentQuery",
|
|
25
|
+
# Schema Protocol
|
|
26
|
+
"AgentSchema",
|
|
27
|
+
"AgentSchemaMetadata",
|
|
28
|
+
"MCPToolReference",
|
|
29
|
+
"MCPResourceReference",
|
|
30
|
+
"validate_agent_schema",
|
|
31
|
+
"create_agent_schema",
|
|
32
|
+
# Agent Factories
|
|
33
|
+
"create_agent_from_schema_file",
|
|
34
|
+
"create_agent",
|
|
35
|
+
"AgentRuntime",
|
|
36
|
+
# REM Query Helpers
|
|
37
|
+
"ask_rem",
|
|
38
|
+
"REMQueryOutput",
|
|
39
|
+
]
|