remdb 0.3.242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/__init__.py +129 -0
- rem/agentic/README.md +760 -0
- rem/agentic/__init__.py +54 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +38 -0
- rem/agentic/agents/agent_manager.py +311 -0
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +425 -0
- rem/agentic/context_builder.py +360 -0
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +273 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +240 -0
- rem/agentic/providers/phoenix.py +926 -0
- rem/agentic/providers/pydantic_ai.py +854 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +737 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +242 -0
- rem/api/README.md +657 -0
- rem/api/deps.py +253 -0
- rem/api/main.py +460 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +820 -0
- rem/api/mcp_router/server.py +243 -0
- rem/api/mcp_router/tools.py +1605 -0
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +520 -0
- rem/api/routers/auth.py +898 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/child_streaming.py +394 -0
- rem/api/routers/chat/completions.py +702 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +202 -0
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +546 -0
- rem/api/routers/chat/streaming.py +950 -0
- rem/api/routers/chat/streaming_utils.py +327 -0
- rem/api/routers/common.py +18 -0
- rem/api/routers/dev.py +87 -0
- rem/api/routers/feedback.py +276 -0
- rem/api/routers/messages.py +620 -0
- rem/api/routers/models.py +86 -0
- rem/api/routers/query.py +362 -0
- rem/api/routers/shared_sessions.py +422 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +36 -0
- rem/auth/jwt.py +367 -0
- rem/auth/middleware.py +318 -0
- rem/auth/providers/__init__.py +16 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/email.py +215 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +517 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +299 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +549 -0
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +495 -0
- rem/cli/commands/db.py +828 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1698 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +388 -0
- rem/cli/commands/query.py +109 -0
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +230 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/commands/session.py +453 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +123 -0
- rem/config.py +244 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +70 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +672 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +246 -0
- rem/models/entities/__init__.py +68 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +64 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +181 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/session.py +84 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/subscriber.py +175 -0
- rem/models/entities/user.py +93 -0
- rem/py.typed +0 -0
- rem/registry.py +373 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/agent-builder.yaml +235 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +132 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +18 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +760 -0
- rem/services/content/service.py +762 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +322 -0
- rem/services/dreaming/moment_service.py +251 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/email/__init__.py +10 -0
- rem/services/email/service.py +522 -0
- rem/services/email/templates.py +360 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +127 -0
- rem/services/embeddings/worker.py +435 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +960 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +757 -0
- rem/services/postgres/__init__.py +49 -0
- rem/services/postgres/diff_service.py +599 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/programmable_diff_service.py +635 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
- rem/services/postgres/register_type.py +353 -0
- rem/services/postgres/repository.py +481 -0
- rem/services/postgres/schema_generator.py +661 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +355 -0
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +318 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +180 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +608 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +13 -0
- rem/services/session/compression.py +488 -0
- rem/services/session/pydantic_messages.py +310 -0
- rem/services/session/reload.py +85 -0
- rem/services/user_service.py +130 -0
- rem/settings.py +1877 -0
- rem/sql/background_indexes.sql +52 -0
- rem/sql/migrations/001_install.sql +983 -0
- rem/sql/migrations/002_install_models.sql +3157 -0
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +282 -0
- rem/sql/migrations/005_schema_update.sql +145 -0
- rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +628 -0
- rem/utils/__init__.py +61 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +436 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/files.py +323 -0
- rem/utils/markdown.py +16 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +492 -0
- rem/utils/schema_loader.py +649 -0
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +350 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +325 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +7 -0
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- rem/workers/unlogged_maintainer.py +463 -0
- remdb-0.3.242.dist-info/METADATA +1632 -0
- remdb-0.3.242.dist-info/RECORD +235 -0
- remdb-0.3.242.dist-info/WHEEL +4 -0
- remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/cli/README.md
ADDED
|
@@ -0,0 +1,517 @@
|
|
|
1
|
+
# REM CLI - Agent Testing Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The `rem ask` command provides a CLI interface for testing Pydantic AI agents with YAML-based schemas. It supports both streaming and non-streaming modes, structured output, and optional OTEL/Phoenix instrumentation.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Install REM with all dependencies
|
|
11
|
+
cd /Users/sirsh/code/mr_saoirse/remstack/rem
|
|
12
|
+
uv pip install -e .
|
|
13
|
+
|
|
14
|
+
# Verify installation
|
|
15
|
+
rem --help
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Basic Usage
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Simple question (non-streaming by default)
|
|
22
|
+
rem ask simple "What is 2+2?"
|
|
23
|
+
|
|
24
|
+
# Streaming mode for real-time output
|
|
25
|
+
rem ask simple "What is 2+2?" --stream
|
|
26
|
+
|
|
27
|
+
# With specific model
|
|
28
|
+
rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
|
|
29
|
+
|
|
30
|
+
# Structured output
|
|
31
|
+
rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
|
|
32
|
+
|
|
33
|
+
# Process file and save output
|
|
34
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt -o output.yaml
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## File Processing
|
|
38
|
+
|
|
39
|
+
The `--input-file` option allows you to process files directly instead of providing a text query:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Extract data from contract (text file)
|
|
43
|
+
rem ask contract-analyzer \
|
|
44
|
+
-i rem/tests/data/content-examples/service_agreement.txt \
|
|
45
|
+
-o output.yaml
|
|
46
|
+
|
|
47
|
+
# Extract from PDF contract
|
|
48
|
+
rem ask contract-analyzer \
|
|
49
|
+
-i rem/tests/data/content-examples/pdf/service_contract.pdf \
|
|
50
|
+
-o output.yaml
|
|
51
|
+
|
|
52
|
+
# With specific model
|
|
53
|
+
rem ask contract-analyzer \
|
|
54
|
+
-i rem/tests/data/content-examples/service_agreement.txt \
|
|
55
|
+
-o output.yaml \
|
|
56
|
+
-m anthropic:claude-sonnet-4-5-20250929
|
|
57
|
+
|
|
58
|
+
# Output to console (default)
|
|
59
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt
|
|
60
|
+
|
|
61
|
+
# Stream output in real-time
|
|
62
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt --stream
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Schema name resolution:**
|
|
66
|
+
- Short names: `contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
|
|
67
|
+
- With folder: `examples/contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
|
|
68
|
+
- Core agents: `moment-builder` → `schemas/agents/core/moment-builder.yaml`
|
|
69
|
+
- Full paths: `schemas/agents/examples/contract-analyzer.yaml` (as-is)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**Supported file types:**
|
|
73
|
+
- Documents: PDF, DOCX, PPTX, XLSX (via Kreuzberg)
|
|
74
|
+
- Text: TXT, MD, Markdown, code files
|
|
75
|
+
- Schemas: YAML, JSON
|
|
76
|
+
- Audio: MP3, WAV, M4A (via Whisper API)
|
|
77
|
+
|
|
78
|
+
See [examples/README.md](../../../examples/README.md) for complete contract extraction examples.
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Command Options
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
rem ask NAME [QUERY] [OPTIONS]
|
|
85
|
+
|
|
86
|
+
Arguments:
|
|
87
|
+
NAME Agent schema name (YAML files in schemas/agents/)
|
|
88
|
+
- Short name: contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
|
|
89
|
+
- With folder: examples/contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
|
|
90
|
+
- Core agent: moment-builder → schemas/agents/core/moment-builder.yaml
|
|
91
|
+
- Full path: schemas/agents/examples/contract-analyzer.yaml
|
|
92
|
+
|
|
93
|
+
QUERY User query to send to the agent (optional if --input-file is used)
|
|
94
|
+
|
|
95
|
+
Options:
|
|
96
|
+
--model, -m TEXT LLM model (default: from settings)
|
|
97
|
+
--temperature, -t FLOAT Temperature 0.0-1.0 (not yet implemented)
|
|
98
|
+
--max-turns INTEGER Maximum turns for execution (default: 10)
|
|
99
|
+
--version, -v TEXT Schema version for registry lookup
|
|
100
|
+
--stream / --no-stream Enable/disable streaming (default: disabled)
|
|
101
|
+
--input-file, -i PATH Read input from file (PDF, TXT, Markdown, etc.)
|
|
102
|
+
--output-file, -o PATH Write output to file (YAML format)
|
|
103
|
+
--user-id TEXT User ID for context (default: cli-user)
|
|
104
|
+
--session-id TEXT Session ID for context (default: auto-generated)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Agent Schema Format
|
|
108
|
+
|
|
109
|
+
Agent schemas are YAML files following JSON Schema with embedded metadata:
|
|
110
|
+
|
|
111
|
+
```yaml
|
|
112
|
+
type: object
|
|
113
|
+
description: |
|
|
114
|
+
System prompt for the agent.
|
|
115
|
+
|
|
116
|
+
This describes what the agent does and how it should behave.
|
|
117
|
+
|
|
118
|
+
properties:
|
|
119
|
+
answer:
|
|
120
|
+
type: string
|
|
121
|
+
description: The response to the user's query
|
|
122
|
+
|
|
123
|
+
confidence:
|
|
124
|
+
type: number
|
|
125
|
+
minimum: 0
|
|
126
|
+
maximum: 1
|
|
127
|
+
description: Confidence score for the response
|
|
128
|
+
|
|
129
|
+
required:
|
|
130
|
+
- answer
|
|
131
|
+
|
|
132
|
+
json_schema_extra:
|
|
133
|
+
fully_qualified_name: "rem.agents.SimpleAgent"
|
|
134
|
+
version: "1.0.0"
|
|
135
|
+
tools: [] # MCP tool configurations (future)
|
|
136
|
+
resources: [] # MCP resource configurations (future)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Example Schemas
|
|
140
|
+
|
|
141
|
+
### Simple Agent (`schemas/agents/examples/simple.yaml`)
|
|
142
|
+
|
|
143
|
+
A basic conversational agent that returns simple text answers:
|
|
144
|
+
|
|
145
|
+
```yaml
|
|
146
|
+
type: object
|
|
147
|
+
description: |
|
|
148
|
+
A simple conversational agent that provides helpful, friendly responses.
|
|
149
|
+
|
|
150
|
+
You are a helpful AI assistant. Answer questions clearly and concisely.
|
|
151
|
+
If you don't know something, say so. Be friendly and professional.
|
|
152
|
+
|
|
153
|
+
properties:
|
|
154
|
+
answer:
|
|
155
|
+
type: string
|
|
156
|
+
description: The response to the user's query
|
|
157
|
+
|
|
158
|
+
required:
|
|
159
|
+
- answer
|
|
160
|
+
|
|
161
|
+
json_schema_extra:
|
|
162
|
+
fully_qualified_name: "rem.agents.SimpleAgent"
|
|
163
|
+
version: "1.0.0"
|
|
164
|
+
tools: []
|
|
165
|
+
resources: []
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Query Agent (`schemas/agents/examples/query.yaml`)
|
|
169
|
+
|
|
170
|
+
An agent that provides structured output with confidence scores:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
type: object
|
|
174
|
+
description: |
|
|
175
|
+
REM Query Agent - Converts natural language questions to REM queries.
|
|
176
|
+
|
|
177
|
+
You are a specialized agent that understands REM (Resources Entities Moments) queries.
|
|
178
|
+
Your job is to interpret user questions and provide answers with confidence scores.
|
|
179
|
+
|
|
180
|
+
properties:
|
|
181
|
+
answer:
|
|
182
|
+
type: string
|
|
183
|
+
description: The answer to the user's query with supporting details
|
|
184
|
+
|
|
185
|
+
confidence:
|
|
186
|
+
type: number
|
|
187
|
+
minimum: 0
|
|
188
|
+
maximum: 1
|
|
189
|
+
description: Confidence score (0.0-1.0) for this answer
|
|
190
|
+
|
|
191
|
+
query_type:
|
|
192
|
+
type: string
|
|
193
|
+
enum:
|
|
194
|
+
- LOOKUP
|
|
195
|
+
- FUZZY
|
|
196
|
+
- TRAVERSE
|
|
197
|
+
- UNKNOWN
|
|
198
|
+
description: The type of REM query that would best answer this question
|
|
199
|
+
|
|
200
|
+
required:
|
|
201
|
+
- answer
|
|
202
|
+
- confidence
|
|
203
|
+
- query_type
|
|
204
|
+
|
|
205
|
+
json_schema_extra:
|
|
206
|
+
fully_qualified_name: "rem.agents.QueryAgent"
|
|
207
|
+
version: "1.0.0"
|
|
208
|
+
tools: []
|
|
209
|
+
resources: []
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Streaming vs Non-Streaming
|
|
213
|
+
|
|
214
|
+
### Non-Streaming Mode (default)
|
|
215
|
+
|
|
216
|
+
Uses `agent.run()` to return complete structured result at once:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
rem ask simple "Explain quantum computing"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Output:
|
|
223
|
+
```json
|
|
224
|
+
{
|
|
225
|
+
"answer": "Quantum computing uses quantum mechanical phenomena..."
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
**Best for:**
|
|
230
|
+
- Saving output to files
|
|
231
|
+
- Structured data extraction
|
|
232
|
+
- Processing files with complex schemas
|
|
233
|
+
- Programmatic usage
|
|
234
|
+
|
|
235
|
+
### Streaming Mode
|
|
236
|
+
|
|
237
|
+
Uses `agent.iter()` to stream events in real-time:
|
|
238
|
+
- Tool call markers: `[Calling: tool_name]`
|
|
239
|
+
- Text content deltas as they arrive
|
|
240
|
+
- Final structured result after completion
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
rem ask simple "Explain quantum computing" --stream
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Output:
|
|
247
|
+
```
|
|
248
|
+
[Calling: final_result]
|
|
249
|
+
Quantum computing uses quantum mechanical phenomena like superposition...
|
|
250
|
+
|
|
251
|
+
{
|
|
252
|
+
"answer": "Quantum computing uses quantum mechanical phenomena..."
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Best for:**
|
|
257
|
+
- Interactive conversations
|
|
258
|
+
- Long-running queries where you want to see progress
|
|
259
|
+
- Debugging agent behavior
|
|
260
|
+
|
|
261
|
+
## Implementation Details
|
|
262
|
+
|
|
263
|
+
### Architecture
|
|
264
|
+
|
|
265
|
+
```
|
|
266
|
+
CLI (ask.py)
|
|
267
|
+
├── load_schema_from_file() - YAML file loading
|
|
268
|
+
├── load_schema_from_registry() - TODO: Database/cache lookup
|
|
269
|
+
├── run_agent_streaming() - agent.iter() with event streaming
|
|
270
|
+
└── run_agent_non_streaming() - agent.run() for complete result
|
|
271
|
+
|
|
272
|
+
Agent Factory (providers/pydantic_ai.py)
|
|
273
|
+
├── create_pydantic_ai_agent() - Main factory
|
|
274
|
+
├── _create_model_from_schema() - JSON Schema → Pydantic model
|
|
275
|
+
└── _create_schema_wrapper() - Strip description for LLM
|
|
276
|
+
|
|
277
|
+
OTEL (otel/setup.py)
|
|
278
|
+
├── setup_instrumentation() - Initialize OTLP exporters
|
|
279
|
+
└── set_agent_resource_attributes() - Set span attributes
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Design Patterns
|
|
283
|
+
|
|
284
|
+
1. **JsonSchema to Pydantic Pattern**
|
|
285
|
+
- Agent schemas are JSON Schema with embedded metadata
|
|
286
|
+
- `description` field becomes system prompt
|
|
287
|
+
- `properties` field becomes Pydantic output model
|
|
288
|
+
- Dynamic model creation using `json-schema-to-pydantic`
|
|
289
|
+
|
|
290
|
+
2. **Streaming with agent.iter() Pattern**
|
|
291
|
+
- Use `agent.iter()` for complete execution (not `run_stream()`)
|
|
292
|
+
- `agent.iter()` captures tool calls, `run_stream()` stops after first output
|
|
293
|
+
- Stream tool call events with `[Calling: tool_name]` markers
|
|
294
|
+
- Stream text content deltas as they arrive
|
|
295
|
+
|
|
296
|
+
3. **Conditional OTEL Instrumentation**
|
|
297
|
+
- OTEL disabled by default for local development
|
|
298
|
+
- Enabled in production via `OTEL__ENABLED=true`
|
|
299
|
+
- Applied at agent creation time: `Agent(..., instrument=settings.otel.enabled)`
|
|
300
|
+
|
|
301
|
+
## Environment Variables
|
|
302
|
+
|
|
303
|
+
Set API keys for LLM providers:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
# In ~/.bash_profile or ~/.zshrc
|
|
307
|
+
export OPENAI_API_KEY="sk-..."
|
|
308
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
309
|
+
|
|
310
|
+
# Optional: OTEL/Phoenix configuration
|
|
311
|
+
export OTEL__ENABLED=true
|
|
312
|
+
export OTEL__SERVICE_NAME=rem-cli
|
|
313
|
+
export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
314
|
+
export PHOENIX__ENABLED=true
|
|
315
|
+
export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Observability (Optional)
|
|
319
|
+
|
|
320
|
+
### OTEL Configuration
|
|
321
|
+
|
|
322
|
+
Enable distributed tracing with OpenTelemetry:
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
# Enable OTEL
|
|
326
|
+
export OTEL__ENABLED=true
|
|
327
|
+
export OTEL__SERVICE_NAME=rem-cli
|
|
328
|
+
export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
329
|
+
export OTEL__PROTOCOL=http
|
|
330
|
+
|
|
331
|
+
# Run agent with tracing
|
|
332
|
+
rem ask query "Find documents" --model openai:gpt-4o-mini
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Phoenix Integration
|
|
336
|
+
|
|
337
|
+
Enable LLM observability with Arize Phoenix:
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
# Start Phoenix locally
|
|
341
|
+
docker run -p 6006:6006 arizephoenix/phoenix:latest
|
|
342
|
+
|
|
343
|
+
# Enable Phoenix
|
|
344
|
+
export PHOENIX__ENABLED=true
|
|
345
|
+
export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
346
|
+
export PHOENIX__PROJECT_NAME=rem-cli
|
|
347
|
+
|
|
348
|
+
# Run agent with Phoenix tracing
|
|
349
|
+
rem ask query "Find documents" --model openai:gpt-4o-mini
|
|
350
|
+
|
|
351
|
+
# View traces at http://localhost:6006
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
## Schema Registry (TODO)
|
|
355
|
+
|
|
356
|
+
The schema registry is stubbed but not yet implemented. To implement:
|
|
357
|
+
|
|
358
|
+
1. **Database Schema**:
|
|
359
|
+
```sql
|
|
360
|
+
CREATE TABLE agent_schemas (
|
|
361
|
+
id UUID PRIMARY KEY,
|
|
362
|
+
name TEXT NOT NULL,
|
|
363
|
+
version TEXT NOT NULL,
|
|
364
|
+
schema_json JSONB NOT NULL,
|
|
365
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
366
|
+
UNIQUE(name, version)
|
|
367
|
+
);
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
2. **Cache Layer**:
|
|
371
|
+
- Redis for fast lookups
|
|
372
|
+
- In-memory cache for CLI
|
|
373
|
+
|
|
374
|
+
3. **Versioning**:
|
|
375
|
+
- Semantic versioning (1.0.0, 1.1.0, etc.)
|
|
376
|
+
- Latest version fallback
|
|
377
|
+
|
|
378
|
+
Once implemented, you can load agents by name:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
# Load latest version
|
|
382
|
+
rem ask query "Find documents"
|
|
383
|
+
|
|
384
|
+
# Load specific version
|
|
385
|
+
rem ask query "Find documents" --version 1.2.0
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
## Testing
|
|
389
|
+
|
|
390
|
+
```bash
|
|
391
|
+
# Test simple agent (default non-streaming)
|
|
392
|
+
rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
|
|
393
|
+
|
|
394
|
+
# Test simple agent (streaming)
|
|
395
|
+
rem ask simple "What is 2+2?" --stream --model openai:gpt-4o-mini
|
|
396
|
+
|
|
397
|
+
# Test structured output
|
|
398
|
+
rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
|
|
399
|
+
|
|
400
|
+
# Test file processing
|
|
401
|
+
rem ask contract-analyzer -i examples/contract.pdf -o output.yaml
|
|
402
|
+
|
|
403
|
+
# Test with different models
|
|
404
|
+
rem ask simple "Hello" --model openai:gpt-4o
|
|
405
|
+
rem ask simple "Hello" --model anthropic:claude-sonnet-4-5-20250929
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Troubleshooting
|
|
409
|
+
|
|
410
|
+
### API Key Not Found
|
|
411
|
+
|
|
412
|
+
```bash
|
|
413
|
+
# Set API key in environment
|
|
414
|
+
export OPENAI_API_KEY="sk-..."
|
|
415
|
+
|
|
416
|
+
# Or source your profile
|
|
417
|
+
source ~/.bash_profile
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### Schema Registry Not Implemented
|
|
421
|
+
|
|
422
|
+
```
|
|
423
|
+
Schema registry not implemented yet. Please use a file path instead.
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
Use file paths until registry is implemented:
|
|
427
|
+
```bash
|
|
428
|
+
rem ask simple "query"
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Model Not Found
|
|
432
|
+
|
|
433
|
+
Ensure you're using the correct model format:
|
|
434
|
+
- OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
|
|
435
|
+
- Anthropic: `anthropic:claude-sonnet-4-5-20250929`
|
|
436
|
+
|
|
437
|
+
## Data Visibility: PUBLIC vs PRIVATE
|
|
438
|
+
|
|
439
|
+
**IMPORTANT: All ingested data is PUBLIC by default.** This is the correct behavior
|
|
440
|
+
for shared knowledge bases (ontologies, procedures, reference data).
|
|
441
|
+
|
|
442
|
+
### Why PUBLIC by Default?
|
|
443
|
+
|
|
444
|
+
Most data in REM should be searchable by all users:
|
|
445
|
+
- Clinical ontologies (disorders, symptoms, drugs)
|
|
446
|
+
- Procedures and protocols (SCID-5, PHQ-9, etc.)
|
|
447
|
+
- Reference documentation
|
|
448
|
+
- Shared domain knowledge
|
|
449
|
+
|
|
450
|
+
The `rem_lookup()` function searches for data where `user_id IS NULL`, which means
|
|
451
|
+
public data. If you set `user_id` on data, it becomes invisible to other users.
|
|
452
|
+
|
|
453
|
+
### Ingesting Public Data (Default)
|
|
454
|
+
|
|
455
|
+
```bash
|
|
456
|
+
# Standard ingestion - data is PUBLIC
|
|
457
|
+
rem process ingest ontology/procedures/ --table ontologies
|
|
458
|
+
|
|
459
|
+
# From S3 - also PUBLIC
|
|
460
|
+
rem process ingest s3://bucket/docs/reference.pdf
|
|
461
|
+
```
|
|
462
|
+
|
|
463
|
+
### Ingesting Private Data (Rare)
|
|
464
|
+
|
|
465
|
+
Private data requires explicit `--make-private` flag:
|
|
466
|
+
|
|
467
|
+
```bash
|
|
468
|
+
# Private user data - requires --make-private and --user-id
|
|
469
|
+
rem process ingest personal-notes.md --make-private --user-id user-123
|
|
470
|
+
```
|
|
471
|
+
|
|
472
|
+
**When to use private data:**
|
|
473
|
+
- User-uploaded personal documents
|
|
474
|
+
- Session-specific content
|
|
475
|
+
- User notes and annotations
|
|
476
|
+
|
|
477
|
+
**NEVER use private data for:**
|
|
478
|
+
- Ontologies and reference material
|
|
479
|
+
- Clinical procedures and protocols
|
|
480
|
+
- Shared knowledge bases
|
|
481
|
+
- Anything that should be searchable by agents
|
|
482
|
+
|
|
483
|
+
### Common Mistake
|
|
484
|
+
|
|
485
|
+
If agents can't find data via `search_rem`, the most common cause is that the data
|
|
486
|
+
was ingested with a `user_id` set. Check with:
|
|
487
|
+
|
|
488
|
+
```sql
|
|
489
|
+
SELECT name, user_id FROM ontologies WHERE name = 'phq-9-procedure';
|
|
490
|
+
-- user_id should be NULL for public data
|
|
491
|
+
```
|
|
492
|
+
|
|
493
|
+
Fix by setting user_id to NULL:
|
|
494
|
+
```sql
|
|
495
|
+
UPDATE ontologies SET user_id = NULL WHERE user_id IS NOT NULL;
|
|
496
|
+
UPDATE kv_store SET user_id = NULL WHERE entity_type = 'ontologies' AND user_id IS NOT NULL;
|
|
497
|
+
```
|
|
498
|
+
|
|
499
|
+
## Next Steps
|
|
500
|
+
|
|
501
|
+
1. **Implement Schema Registry**
|
|
502
|
+
- PostgreSQL table for schema storage
|
|
503
|
+
- Redis cache for fast lookups
|
|
504
|
+
- Version management
|
|
505
|
+
|
|
506
|
+
2. **Add MCP Tool Support**
|
|
507
|
+
- Dynamic tool loading from schema
|
|
508
|
+
- MCP server configuration
|
|
509
|
+
|
|
510
|
+
3. **Temperature Override**
|
|
511
|
+
- Pass temperature to agent.run()
|
|
512
|
+
- Model-specific settings
|
|
513
|
+
|
|
514
|
+
4. **CLI Improvements**
|
|
515
|
+
- Interactive mode
|
|
516
|
+
- Multi-turn conversations
|
|
517
|
+
- Session management
|