remdb 0.3.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +2 -0
- rem/agentic/README.md +650 -0
- rem/agentic/__init__.py +39 -0
- rem/agentic/agents/README.md +155 -0
- rem/agentic/agents/__init__.py +8 -0
- rem/agentic/context.py +148 -0
- rem/agentic/context_builder.py +329 -0
- rem/agentic/mcp/__init__.py +0 -0
- rem/agentic/mcp/tool_wrapper.py +107 -0
- rem/agentic/otel/__init__.py +5 -0
- rem/agentic/otel/setup.py +151 -0
- rem/agentic/providers/phoenix.py +674 -0
- rem/agentic/providers/pydantic_ai.py +572 -0
- rem/agentic/query.py +117 -0
- rem/agentic/query_helper.py +89 -0
- rem/agentic/schema.py +396 -0
- rem/agentic/serialization.py +245 -0
- rem/agentic/tools/__init__.py +5 -0
- rem/agentic/tools/rem_tools.py +231 -0
- rem/api/README.md +420 -0
- rem/api/main.py +324 -0
- rem/api/mcp_router/prompts.py +182 -0
- rem/api/mcp_router/resources.py +536 -0
- rem/api/mcp_router/server.py +213 -0
- rem/api/mcp_router/tools.py +584 -0
- rem/api/routers/auth.py +229 -0
- rem/api/routers/chat/__init__.py +5 -0
- rem/api/routers/chat/completions.py +281 -0
- rem/api/routers/chat/json_utils.py +76 -0
- rem/api/routers/chat/models.py +124 -0
- rem/api/routers/chat/streaming.py +185 -0
- rem/auth/README.md +258 -0
- rem/auth/__init__.py +26 -0
- rem/auth/middleware.py +100 -0
- rem/auth/providers/__init__.py +13 -0
- rem/auth/providers/base.py +376 -0
- rem/auth/providers/google.py +163 -0
- rem/auth/providers/microsoft.py +237 -0
- rem/cli/README.md +455 -0
- rem/cli/__init__.py +8 -0
- rem/cli/commands/README.md +126 -0
- rem/cli/commands/__init__.py +3 -0
- rem/cli/commands/ask.py +566 -0
- rem/cli/commands/configure.py +497 -0
- rem/cli/commands/db.py +493 -0
- rem/cli/commands/dreaming.py +324 -0
- rem/cli/commands/experiments.py +1302 -0
- rem/cli/commands/mcp.py +66 -0
- rem/cli/commands/process.py +245 -0
- rem/cli/commands/schema.py +183 -0
- rem/cli/commands/serve.py +106 -0
- rem/cli/dreaming.py +363 -0
- rem/cli/main.py +96 -0
- rem/config.py +237 -0
- rem/mcp_server.py +41 -0
- rem/models/core/__init__.py +49 -0
- rem/models/core/core_model.py +64 -0
- rem/models/core/engram.py +333 -0
- rem/models/core/experiment.py +628 -0
- rem/models/core/inline_edge.py +132 -0
- rem/models/core/rem_query.py +243 -0
- rem/models/entities/__init__.py +43 -0
- rem/models/entities/file.py +57 -0
- rem/models/entities/image_resource.py +88 -0
- rem/models/entities/message.py +35 -0
- rem/models/entities/moment.py +123 -0
- rem/models/entities/ontology.py +191 -0
- rem/models/entities/ontology_config.py +131 -0
- rem/models/entities/resource.py +95 -0
- rem/models/entities/schema.py +87 -0
- rem/models/entities/user.py +85 -0
- rem/py.typed +0 -0
- rem/schemas/README.md +507 -0
- rem/schemas/__init__.py +6 -0
- rem/schemas/agents/README.md +92 -0
- rem/schemas/agents/core/moment-builder.yaml +178 -0
- rem/schemas/agents/core/rem-query-agent.yaml +226 -0
- rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
- rem/schemas/agents/core/simple-assistant.yaml +19 -0
- rem/schemas/agents/core/user-profile-builder.yaml +163 -0
- rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
- rem/schemas/agents/examples/contract-extractor.yaml +134 -0
- rem/schemas/agents/examples/cv-parser.yaml +263 -0
- rem/schemas/agents/examples/hello-world.yaml +37 -0
- rem/schemas/agents/examples/query.yaml +54 -0
- rem/schemas/agents/examples/simple.yaml +21 -0
- rem/schemas/agents/examples/test.yaml +29 -0
- rem/schemas/agents/rem.yaml +128 -0
- rem/schemas/evaluators/hello-world/default.yaml +77 -0
- rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
- rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
- rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
- rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
- rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
- rem/services/__init__.py +16 -0
- rem/services/audio/INTEGRATION.md +308 -0
- rem/services/audio/README.md +376 -0
- rem/services/audio/__init__.py +15 -0
- rem/services/audio/chunker.py +354 -0
- rem/services/audio/transcriber.py +259 -0
- rem/services/content/README.md +1269 -0
- rem/services/content/__init__.py +5 -0
- rem/services/content/providers.py +801 -0
- rem/services/content/service.py +676 -0
- rem/services/dreaming/README.md +230 -0
- rem/services/dreaming/__init__.py +53 -0
- rem/services/dreaming/affinity_service.py +336 -0
- rem/services/dreaming/moment_service.py +264 -0
- rem/services/dreaming/ontology_service.py +54 -0
- rem/services/dreaming/user_model_service.py +297 -0
- rem/services/dreaming/utils.py +39 -0
- rem/services/embeddings/__init__.py +11 -0
- rem/services/embeddings/api.py +120 -0
- rem/services/embeddings/worker.py +421 -0
- rem/services/fs/README.md +662 -0
- rem/services/fs/__init__.py +62 -0
- rem/services/fs/examples.py +206 -0
- rem/services/fs/examples_paths.py +204 -0
- rem/services/fs/git_provider.py +935 -0
- rem/services/fs/local_provider.py +760 -0
- rem/services/fs/parsing-hooks-examples.md +172 -0
- rem/services/fs/paths.py +276 -0
- rem/services/fs/provider.py +460 -0
- rem/services/fs/s3_provider.py +1042 -0
- rem/services/fs/service.py +186 -0
- rem/services/git/README.md +1075 -0
- rem/services/git/__init__.py +17 -0
- rem/services/git/service.py +469 -0
- rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
- rem/services/phoenix/README.md +453 -0
- rem/services/phoenix/__init__.py +46 -0
- rem/services/phoenix/client.py +686 -0
- rem/services/phoenix/config.py +88 -0
- rem/services/phoenix/prompt_labels.py +477 -0
- rem/services/postgres/README.md +575 -0
- rem/services/postgres/__init__.py +23 -0
- rem/services/postgres/migration_service.py +427 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
- rem/services/postgres/register_type.py +352 -0
- rem/services/postgres/repository.py +337 -0
- rem/services/postgres/schema_generator.py +379 -0
- rem/services/postgres/service.py +802 -0
- rem/services/postgres/sql_builder.py +354 -0
- rem/services/rem/README.md +304 -0
- rem/services/rem/__init__.py +23 -0
- rem/services/rem/exceptions.py +71 -0
- rem/services/rem/executor.py +293 -0
- rem/services/rem/parser.py +145 -0
- rem/services/rem/queries.py +196 -0
- rem/services/rem/query.py +371 -0
- rem/services/rem/service.py +527 -0
- rem/services/session/README.md +374 -0
- rem/services/session/__init__.py +6 -0
- rem/services/session/compression.py +360 -0
- rem/services/session/reload.py +77 -0
- rem/settings.py +1235 -0
- rem/sql/002_install_models.sql +1068 -0
- rem/sql/background_indexes.sql +42 -0
- rem/sql/install_models.sql +1038 -0
- rem/sql/migrations/001_install.sql +503 -0
- rem/sql/migrations/002_install_models.sql +1202 -0
- rem/utils/AGENTIC_CHUNKING.md +597 -0
- rem/utils/README.md +583 -0
- rem/utils/__init__.py +43 -0
- rem/utils/agentic_chunking.py +622 -0
- rem/utils/batch_ops.py +343 -0
- rem/utils/chunking.py +108 -0
- rem/utils/clip_embeddings.py +276 -0
- rem/utils/dict_utils.py +98 -0
- rem/utils/embeddings.py +423 -0
- rem/utils/examples/embeddings_example.py +305 -0
- rem/utils/examples/sql_types_example.py +202 -0
- rem/utils/markdown.py +16 -0
- rem/utils/model_helpers.py +236 -0
- rem/utils/schema_loader.py +336 -0
- rem/utils/sql_types.py +348 -0
- rem/utils/user_id.py +81 -0
- rem/utils/vision.py +330 -0
- rem/workers/README.md +506 -0
- rem/workers/__init__.py +5 -0
- rem/workers/dreaming.py +502 -0
- rem/workers/engram_processor.py +312 -0
- rem/workers/sqs_file_processor.py +193 -0
- remdb-0.3.7.dist-info/METADATA +1473 -0
- remdb-0.3.7.dist-info/RECORD +187 -0
- remdb-0.3.7.dist-info/WHEEL +4 -0
- remdb-0.3.7.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,237 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Microsoft Entra ID (Azure AD) OAuth Provider.
|
|
3
|
+
|
|
4
|
+
Implements OAuth 2.1 / OIDC for Microsoft authentication.
|
|
5
|
+
|
|
6
|
+
Configuration:
|
|
7
|
+
1. Register application at https://portal.azure.com/#view/Microsoft_AAD_RegisteredApps
|
|
8
|
+
2. Create client secret under "Certificates & secrets"
|
|
9
|
+
3. Add redirect URI: http://localhost:8000/api/auth/callback (dev)
|
|
10
|
+
4. Set API permissions:
|
|
11
|
+
- Microsoft Graph: User.Read (delegated)
|
|
12
|
+
- Optional: email, profile, openid (automatically included)
|
|
13
|
+
5. Set environment variables:
|
|
14
|
+
- AUTH__MICROSOFT__CLIENT_ID (Application ID)
|
|
15
|
+
- AUTH__MICROSOFT__CLIENT_SECRET
|
|
16
|
+
- AUTH__MICROSOFT__TENANT_ID (or "common" for multi-tenant)
|
|
17
|
+
- AUTH__MICROSOFT__REDIRECT_URI
|
|
18
|
+
|
|
19
|
+
Microsoft-specific features:
|
|
20
|
+
- Multi-tenant support (common, organizations, consumers)
|
|
21
|
+
- Azure AD B2C support
|
|
22
|
+
- Conditional access policies
|
|
23
|
+
- Token caching with MSAL
|
|
24
|
+
|
|
25
|
+
Tenant options:
|
|
26
|
+
- common: Multi-tenant + personal Microsoft accounts
|
|
27
|
+
- organizations: Multi-tenant (work/school only)
|
|
28
|
+
- consumers: Personal Microsoft accounts only
|
|
29
|
+
- {tenant-id}: Single tenant (specific organization)
|
|
30
|
+
|
|
31
|
+
References:
|
|
32
|
+
- Microsoft identity platform: https://learn.microsoft.com/en-us/entra/identity-platform/
|
|
33
|
+
- OAuth 2.0 flow: https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
|
|
34
|
+
- OIDC: https://learn.microsoft.com/en-us/entra/identity-platform/v2-protocols-oidc
|
|
35
|
+
- Scopes: https://learn.microsoft.com/en-us/graph/permissions-reference
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
from typing import Any
|
|
39
|
+
|
|
40
|
+
from .base import OAuthProvider, OAuthUserInfo
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class MicrosoftOAuthProvider(OAuthProvider):
|
|
44
|
+
"""
|
|
45
|
+
Microsoft Entra ID (Azure AD) OAuth 2.1 / OIDC provider.
|
|
46
|
+
|
|
47
|
+
Supports multi-tenant authentication and Microsoft Graph API access.
|
|
48
|
+
Uses Microsoft identity platform v2.0 endpoints.
|
|
49
|
+
"""
|
|
50
|
+
|
|
51
|
+
# Microsoft identity platform v2.0 endpoints
|
|
52
|
+
# Replace {tenant} with:
|
|
53
|
+
# - "common" for multi-tenant + personal accounts
|
|
54
|
+
# - "organizations" for work/school accounts only
|
|
55
|
+
# - "consumers" for personal Microsoft accounts only
|
|
56
|
+
# - Tenant ID/domain for single-tenant
|
|
57
|
+
AUTHORIZATION_ENDPOINT_TEMPLATE = (
|
|
58
|
+
"https://login.microsoftonline.com/{tenant}/oauth2/v2.0/authorize"
|
|
59
|
+
)
|
|
60
|
+
TOKEN_ENDPOINT_TEMPLATE = "https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token"
|
|
61
|
+
USERINFO_ENDPOINT = "https://graph.microsoft.com/v1.0/me"
|
|
62
|
+
JWKS_URI_TEMPLATE = "https://login.microsoftonline.com/{tenant}/discovery/v2.0/keys"
|
|
63
|
+
|
|
64
|
+
# Microsoft Graph scopes
|
|
65
|
+
# openid: Required for OIDC
|
|
66
|
+
# email: User email address
|
|
67
|
+
# profile: User profile information
|
|
68
|
+
# User.Read: Read user profile via Microsoft Graph
|
|
69
|
+
# offline_access: Request refresh token
|
|
70
|
+
DEFAULT_SCOPES = [
|
|
71
|
+
"openid",
|
|
72
|
+
"email",
|
|
73
|
+
"profile",
|
|
74
|
+
"User.Read", # Microsoft Graph: read user profile
|
|
75
|
+
]
|
|
76
|
+
|
|
77
|
+
def __init__(
|
|
78
|
+
self,
|
|
79
|
+
client_id: str,
|
|
80
|
+
client_secret: str,
|
|
81
|
+
redirect_uri: str,
|
|
82
|
+
tenant: str = "common",
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Initialize Microsoft OAuth provider.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
client_id: Application (client) ID from Azure portal
|
|
89
|
+
client_secret: Client secret from Azure portal
|
|
90
|
+
redirect_uri: Redirect URI registered in Azure portal
|
|
91
|
+
tenant: Tenant ID or "common"/"organizations"/"consumers"
|
|
92
|
+
"""
|
|
93
|
+
super().__init__(client_id, client_secret, redirect_uri)
|
|
94
|
+
self.tenant = tenant
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def authorization_endpoint(self) -> str:
|
|
98
|
+
"""Microsoft authorization endpoint."""
|
|
99
|
+
return self.AUTHORIZATION_ENDPOINT_TEMPLATE.format(tenant=self.tenant)
|
|
100
|
+
|
|
101
|
+
@property
|
|
102
|
+
def token_endpoint(self) -> str:
|
|
103
|
+
"""Microsoft token endpoint."""
|
|
104
|
+
return self.TOKEN_ENDPOINT_TEMPLATE.format(tenant=self.tenant)
|
|
105
|
+
|
|
106
|
+
@property
|
|
107
|
+
def userinfo_endpoint(self) -> str:
|
|
108
|
+
"""Microsoft Graph /me endpoint for user info."""
|
|
109
|
+
return self.USERINFO_ENDPOINT
|
|
110
|
+
|
|
111
|
+
@property
|
|
112
|
+
def jwks_uri(self) -> str:
|
|
113
|
+
"""Microsoft JWKS URI for token validation."""
|
|
114
|
+
return self.JWKS_URI_TEMPLATE.format(tenant=self.tenant)
|
|
115
|
+
|
|
116
|
+
@property
|
|
117
|
+
def default_scopes(self) -> list[str]:
|
|
118
|
+
"""Default scopes for Microsoft OAuth."""
|
|
119
|
+
return self.DEFAULT_SCOPES.copy()
|
|
120
|
+
|
|
121
|
+
def normalize_user_info(self, claims: dict[str, Any]) -> OAuthUserInfo:
|
|
122
|
+
"""
|
|
123
|
+
Normalize Microsoft claims to OAuthUserInfo.
|
|
124
|
+
|
|
125
|
+
Microsoft Graph /me response:
|
|
126
|
+
- id: Unique user ID (stable identifier)
|
|
127
|
+
- userPrincipalName: User principal name (UPN)
|
|
128
|
+
- mail: Primary email (may be null)
|
|
129
|
+
- displayName: Display name
|
|
130
|
+
- givenName: First name
|
|
131
|
+
- surname: Last name
|
|
132
|
+
- preferredLanguage: User locale
|
|
133
|
+
|
|
134
|
+
Microsoft ID token claims:
|
|
135
|
+
- sub: Subject (unique user ID, different from Graph ID)
|
|
136
|
+
- email: User email
|
|
137
|
+
- name: Full name
|
|
138
|
+
- given_name: First name
|
|
139
|
+
- family_name: Last name
|
|
140
|
+
- preferred_username: UPN or email
|
|
141
|
+
|
|
142
|
+
Args:
|
|
143
|
+
claims: Raw claims from ID token or Microsoft Graph /me
|
|
144
|
+
|
|
145
|
+
Returns:
|
|
146
|
+
Normalized user information
|
|
147
|
+
"""
|
|
148
|
+
# Handle both ID token claims and Graph API response
|
|
149
|
+
# Graph API uses different field names than OIDC claims
|
|
150
|
+
if "id" in claims:
|
|
151
|
+
# Microsoft Graph /me response
|
|
152
|
+
sub = claims["id"]
|
|
153
|
+
email = claims.get("mail") or claims.get("userPrincipalName")
|
|
154
|
+
name = claims.get("displayName")
|
|
155
|
+
given_name = claims.get("givenName")
|
|
156
|
+
family_name = claims.get("surname")
|
|
157
|
+
locale = claims.get("preferredLanguage")
|
|
158
|
+
else:
|
|
159
|
+
# OIDC ID token claims
|
|
160
|
+
sub = claims["sub"]
|
|
161
|
+
email = claims.get("email") or claims.get("preferred_username")
|
|
162
|
+
name = claims.get("name")
|
|
163
|
+
given_name = claims.get("given_name")
|
|
164
|
+
family_name = claims.get("family_name")
|
|
165
|
+
locale = claims.get("locale")
|
|
166
|
+
|
|
167
|
+
return OAuthUserInfo(
|
|
168
|
+
sub=sub,
|
|
169
|
+
email=email,
|
|
170
|
+
email_verified=True, # Microsoft verifies emails during account creation
|
|
171
|
+
name=name,
|
|
172
|
+
given_name=given_name,
|
|
173
|
+
family_name=family_name,
|
|
174
|
+
picture=None, # Microsoft Graph requires separate photo endpoint
|
|
175
|
+
locale=locale,
|
|
176
|
+
provider="microsoft",
|
|
177
|
+
raw_claims=claims,
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
def generate_auth_url_with_prompt(
|
|
181
|
+
self,
|
|
182
|
+
state: str,
|
|
183
|
+
code_challenge: str,
|
|
184
|
+
prompt: str | None = None,
|
|
185
|
+
domain_hint: str | None = None,
|
|
186
|
+
login_hint: str | None = None,
|
|
187
|
+
scopes: list[str] | None = None,
|
|
188
|
+
nonce: str | None = None,
|
|
189
|
+
) -> str:
|
|
190
|
+
"""
|
|
191
|
+
Generate authorization URL with Microsoft-specific parameters.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
state: CSRF protection state
|
|
195
|
+
code_challenge: PKCE code challenge
|
|
196
|
+
prompt: Authentication behavior (none, login, consent, select_account)
|
|
197
|
+
domain_hint: Domain hint for faster login (e.g., "contoso.com")
|
|
198
|
+
login_hint: Login hint (email) to pre-fill sign-in form
|
|
199
|
+
scopes: OAuth scopes (uses default_scopes if None)
|
|
200
|
+
nonce: OIDC nonce for ID token replay protection
|
|
201
|
+
|
|
202
|
+
Returns:
|
|
203
|
+
Authorization URL
|
|
204
|
+
|
|
205
|
+
Microsoft-specific parameters:
|
|
206
|
+
- prompt: Authentication behavior
|
|
207
|
+
- none: Silent authentication (fails if interaction required)
|
|
208
|
+
- login: Force user to re-authenticate
|
|
209
|
+
- consent: Force consent screen
|
|
210
|
+
- select_account: Show account picker
|
|
211
|
+
- domain_hint: Domain hint for faster login (skip domain discovery)
|
|
212
|
+
- login_hint: Email to pre-fill sign-in form
|
|
213
|
+
- response_mode: query (default), form_post, fragment
|
|
214
|
+
"""
|
|
215
|
+
extra_params: dict[str, str] = {}
|
|
216
|
+
|
|
217
|
+
if prompt:
|
|
218
|
+
extra_params["prompt"] = prompt
|
|
219
|
+
|
|
220
|
+
if domain_hint:
|
|
221
|
+
extra_params["domain_hint"] = domain_hint
|
|
222
|
+
|
|
223
|
+
if login_hint:
|
|
224
|
+
extra_params["login_hint"] = login_hint
|
|
225
|
+
|
|
226
|
+
# Add offline_access scope for refresh token
|
|
227
|
+
scopes_with_offline = scopes or self.default_scopes.copy()
|
|
228
|
+
if "offline_access" not in scopes_with_offline:
|
|
229
|
+
scopes_with_offline.append("offline_access")
|
|
230
|
+
|
|
231
|
+
return self.generate_auth_url(
|
|
232
|
+
state=state,
|
|
233
|
+
code_challenge=code_challenge,
|
|
234
|
+
scopes=scopes_with_offline,
|
|
235
|
+
nonce=nonce,
|
|
236
|
+
extra_params=extra_params,
|
|
237
|
+
)
|
rem/cli/README.md
ADDED
|
@@ -0,0 +1,455 @@
|
|
|
1
|
+
# REM CLI - Agent Testing Guide
|
|
2
|
+
|
|
3
|
+
## Overview
|
|
4
|
+
|
|
5
|
+
The `rem ask` command provides a CLI interface for testing Pydantic AI agents with YAML-based schemas. It supports both streaming and non-streaming modes, structured output, and optional OTEL/Phoenix instrumentation.
|
|
6
|
+
|
|
7
|
+
## Installation
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
# Install REM with all dependencies
|
|
11
|
+
cd /Users/sirsh/code/mr_saoirse/remstack/rem
|
|
12
|
+
uv pip install -e .
|
|
13
|
+
|
|
14
|
+
# Verify installation
|
|
15
|
+
rem --help
|
|
16
|
+
```
|
|
17
|
+
|
|
18
|
+
## Basic Usage
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# Simple question (non-streaming by default)
|
|
22
|
+
rem ask simple "What is 2+2?"
|
|
23
|
+
|
|
24
|
+
# Streaming mode for real-time output
|
|
25
|
+
rem ask simple "What is 2+2?" --stream
|
|
26
|
+
|
|
27
|
+
# With specific model
|
|
28
|
+
rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
|
|
29
|
+
|
|
30
|
+
# Structured output
|
|
31
|
+
rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
|
|
32
|
+
|
|
33
|
+
# Process file and save output
|
|
34
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt -o output.yaml
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## File Processing
|
|
38
|
+
|
|
39
|
+
The `--input-file` option allows you to process files directly instead of providing a text query:
|
|
40
|
+
|
|
41
|
+
```bash
|
|
42
|
+
# Extract data from contract (text file)
|
|
43
|
+
rem ask contract-analyzer \
|
|
44
|
+
-i rem/tests/data/content-examples/service_agreement.txt \
|
|
45
|
+
-o output.yaml
|
|
46
|
+
|
|
47
|
+
# Extract from PDF contract
|
|
48
|
+
rem ask contract-analyzer \
|
|
49
|
+
-i rem/tests/data/content-examples/pdf/service_contract.pdf \
|
|
50
|
+
-o output.yaml
|
|
51
|
+
|
|
52
|
+
# With specific model
|
|
53
|
+
rem ask contract-analyzer \
|
|
54
|
+
-i rem/tests/data/content-examples/service_agreement.txt \
|
|
55
|
+
-o output.yaml \
|
|
56
|
+
-m anthropic:claude-sonnet-4-5-20250929
|
|
57
|
+
|
|
58
|
+
# Output to console (default)
|
|
59
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt
|
|
60
|
+
|
|
61
|
+
# Stream output in real-time
|
|
62
|
+
rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt --stream
|
|
63
|
+
```
|
|
64
|
+
|
|
65
|
+
**Schema name resolution:**
|
|
66
|
+
- Short names: `contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
|
|
67
|
+
- With folder: `examples/contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
|
|
68
|
+
- Core agents: `moment-builder` → `schemas/agents/core/moment-builder.yaml`
|
|
69
|
+
- Full paths: `schemas/agents/examples/contract-analyzer.yaml` (as-is)
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
**Supported file types:**
|
|
73
|
+
- Documents: PDF, DOCX, PPTX, XLSX (via Kreuzberg)
|
|
74
|
+
- Text: TXT, MD, Markdown, code files
|
|
75
|
+
- Schemas: YAML, JSON
|
|
76
|
+
- Audio: MP3, WAV, M4A (via Whisper API)
|
|
77
|
+
|
|
78
|
+
See [examples/README.md](../../../examples/README.md) for complete contract extraction examples.
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Command Options
|
|
82
|
+
|
|
83
|
+
```
|
|
84
|
+
rem ask NAME [QUERY] [OPTIONS]
|
|
85
|
+
|
|
86
|
+
Arguments:
|
|
87
|
+
NAME Agent schema name (YAML files in schemas/agents/)
|
|
88
|
+
- Short name: contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
|
|
89
|
+
- With folder: examples/contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
|
|
90
|
+
- Core agent: moment-builder → schemas/agents/core/moment-builder.yaml
|
|
91
|
+
- Full path: schemas/agents/examples/contract-analyzer.yaml
|
|
92
|
+
|
|
93
|
+
QUERY User query to send to the agent (optional if --input-file is used)
|
|
94
|
+
|
|
95
|
+
Options:
|
|
96
|
+
--model, -m TEXT LLM model (default: from settings)
|
|
97
|
+
--temperature, -t FLOAT Temperature 0.0-1.0 (not yet implemented)
|
|
98
|
+
--max-turns INTEGER Maximum turns for execution (default: 10)
|
|
99
|
+
--version, -v TEXT Schema version for registry lookup
|
|
100
|
+
--stream / --no-stream Enable/disable streaming (default: disabled)
|
|
101
|
+
--input-file, -i PATH Read input from file (PDF, TXT, Markdown, etc.)
|
|
102
|
+
--output-file, -o PATH Write output to file (YAML format)
|
|
103
|
+
--user-id TEXT User ID for context (default: cli-user)
|
|
104
|
+
--session-id TEXT Session ID for context (default: auto-generated)
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Agent Schema Format
|
|
108
|
+
|
|
109
|
+
Agent schemas are YAML files following JSON Schema with embedded metadata:
|
|
110
|
+
|
|
111
|
+
```yaml
|
|
112
|
+
type: object
|
|
113
|
+
description: |
|
|
114
|
+
System prompt for the agent.
|
|
115
|
+
|
|
116
|
+
This describes what the agent does and how it should behave.
|
|
117
|
+
|
|
118
|
+
properties:
|
|
119
|
+
answer:
|
|
120
|
+
type: string
|
|
121
|
+
description: The response to the user's query
|
|
122
|
+
|
|
123
|
+
confidence:
|
|
124
|
+
type: number
|
|
125
|
+
minimum: 0
|
|
126
|
+
maximum: 1
|
|
127
|
+
description: Confidence score for the response
|
|
128
|
+
|
|
129
|
+
required:
|
|
130
|
+
- answer
|
|
131
|
+
|
|
132
|
+
json_schema_extra:
|
|
133
|
+
fully_qualified_name: "rem.agents.SimpleAgent"
|
|
134
|
+
version: "1.0.0"
|
|
135
|
+
tools: [] # MCP tool configurations (future)
|
|
136
|
+
resources: [] # MCP resource configurations (future)
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Example Schemas
|
|
140
|
+
|
|
141
|
+
### Simple Agent (`schemas/agents/examples/simple.yaml`)
|
|
142
|
+
|
|
143
|
+
A basic conversational agent that returns simple text answers:
|
|
144
|
+
|
|
145
|
+
```yaml
|
|
146
|
+
type: object
|
|
147
|
+
description: |
|
|
148
|
+
A simple conversational agent that provides helpful, friendly responses.
|
|
149
|
+
|
|
150
|
+
You are a helpful AI assistant. Answer questions clearly and concisely.
|
|
151
|
+
If you don't know something, say so. Be friendly and professional.
|
|
152
|
+
|
|
153
|
+
properties:
|
|
154
|
+
answer:
|
|
155
|
+
type: string
|
|
156
|
+
description: The response to the user's query
|
|
157
|
+
|
|
158
|
+
required:
|
|
159
|
+
- answer
|
|
160
|
+
|
|
161
|
+
json_schema_extra:
|
|
162
|
+
fully_qualified_name: "rem.agents.SimpleAgent"
|
|
163
|
+
version: "1.0.0"
|
|
164
|
+
tools: []
|
|
165
|
+
resources: []
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Query Agent (`schemas/agents/examples/query.yaml`)
|
|
169
|
+
|
|
170
|
+
An agent that provides structured output with confidence scores:
|
|
171
|
+
|
|
172
|
+
```yaml
|
|
173
|
+
type: object
|
|
174
|
+
description: |
|
|
175
|
+
REM Query Agent - Converts natural language questions to REM queries.
|
|
176
|
+
|
|
177
|
+
You are a specialized agent that understands REM (Resources Entities Moments) queries.
|
|
178
|
+
Your job is to interpret user questions and provide answers with confidence scores.
|
|
179
|
+
|
|
180
|
+
properties:
|
|
181
|
+
answer:
|
|
182
|
+
type: string
|
|
183
|
+
description: The answer to the user's query with supporting details
|
|
184
|
+
|
|
185
|
+
confidence:
|
|
186
|
+
type: number
|
|
187
|
+
minimum: 0
|
|
188
|
+
maximum: 1
|
|
189
|
+
description: Confidence score (0.0-1.0) for this answer
|
|
190
|
+
|
|
191
|
+
query_type:
|
|
192
|
+
type: string
|
|
193
|
+
enum:
|
|
194
|
+
- LOOKUP
|
|
195
|
+
- FUZZY
|
|
196
|
+
- TRAVERSE
|
|
197
|
+
- UNKNOWN
|
|
198
|
+
description: The type of REM query that would best answer this question
|
|
199
|
+
|
|
200
|
+
required:
|
|
201
|
+
- answer
|
|
202
|
+
- confidence
|
|
203
|
+
- query_type
|
|
204
|
+
|
|
205
|
+
json_schema_extra:
|
|
206
|
+
fully_qualified_name: "rem.agents.QueryAgent"
|
|
207
|
+
version: "1.0.0"
|
|
208
|
+
tools: []
|
|
209
|
+
resources: []
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Streaming vs Non-Streaming
|
|
213
|
+
|
|
214
|
+
### Non-Streaming Mode (default)
|
|
215
|
+
|
|
216
|
+
Uses `agent.run()` to return complete structured result at once:
|
|
217
|
+
|
|
218
|
+
```bash
|
|
219
|
+
rem ask simple "Explain quantum computing"
|
|
220
|
+
```
|
|
221
|
+
|
|
222
|
+
Output:
|
|
223
|
+
```json
|
|
224
|
+
{
|
|
225
|
+
"answer": "Quantum computing uses quantum mechanical phenomena..."
|
|
226
|
+
}
|
|
227
|
+
```
|
|
228
|
+
|
|
229
|
+
**Best for:**
|
|
230
|
+
- Saving output to files
|
|
231
|
+
- Structured data extraction
|
|
232
|
+
- Processing files with complex schemas
|
|
233
|
+
- Programmatic usage
|
|
234
|
+
|
|
235
|
+
### Streaming Mode
|
|
236
|
+
|
|
237
|
+
Uses `agent.iter()` to stream events in real-time:
|
|
238
|
+
- Tool call markers: `[Calling: tool_name]`
|
|
239
|
+
- Text content deltas as they arrive
|
|
240
|
+
- Final structured result after completion
|
|
241
|
+
|
|
242
|
+
```bash
|
|
243
|
+
rem ask simple "Explain quantum computing" --stream
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
Output:
|
|
247
|
+
```
|
|
248
|
+
[Calling: final_result]
|
|
249
|
+
Quantum computing uses quantum mechanical phenomena like superposition...
|
|
250
|
+
|
|
251
|
+
{
|
|
252
|
+
"answer": "Quantum computing uses quantum mechanical phenomena..."
|
|
253
|
+
}
|
|
254
|
+
```
|
|
255
|
+
|
|
256
|
+
**Best for:**
|
|
257
|
+
- Interactive conversations
|
|
258
|
+
- Long-running queries where you want to see progress
|
|
259
|
+
- Debugging agent behavior
|
|
260
|
+
|
|
261
|
+
## Implementation Details
|
|
262
|
+
|
|
263
|
+
### Architecture
|
|
264
|
+
|
|
265
|
+
```
|
|
266
|
+
CLI (ask.py)
|
|
267
|
+
├── load_schema_from_file() - YAML file loading
|
|
268
|
+
├── load_schema_from_registry() - TODO: Database/cache lookup
|
|
269
|
+
├── run_agent_streaming() - agent.iter() with event streaming
|
|
270
|
+
└── run_agent_non_streaming() - agent.run() for complete result
|
|
271
|
+
|
|
272
|
+
Agent Factory (providers/pydantic_ai.py)
|
|
273
|
+
├── create_pydantic_ai_agent() - Main factory
|
|
274
|
+
├── _create_model_from_schema() - JSON Schema → Pydantic model
|
|
275
|
+
└── _create_schema_wrapper() - Strip description for LLM
|
|
276
|
+
|
|
277
|
+
OTEL (otel/setup.py)
|
|
278
|
+
├── setup_instrumentation() - Initialize OTLP exporters
|
|
279
|
+
└── set_agent_resource_attributes() - Set span attributes
|
|
280
|
+
```
|
|
281
|
+
|
|
282
|
+
### Design Patterns
|
|
283
|
+
|
|
284
|
+
1. **JsonSchema to Pydantic Pattern**
|
|
285
|
+
- Agent schemas are JSON Schema with embedded metadata
|
|
286
|
+
- `description` field becomes system prompt
|
|
287
|
+
- `properties` field becomes Pydantic output model
|
|
288
|
+
- Dynamic model creation using `json-schema-to-pydantic`
|
|
289
|
+
|
|
290
|
+
2. **Streaming with agent.iter() Pattern**
|
|
291
|
+
- Use `agent.iter()` for complete execution (not `run_stream()`)
|
|
292
|
+
- `agent.iter()` captures tool calls, `run_stream()` stops after first output
|
|
293
|
+
- Stream tool call events with `[Calling: tool_name]` markers
|
|
294
|
+
- Stream text content deltas as they arrive
|
|
295
|
+
|
|
296
|
+
3. **Conditional OTEL Instrumentation**
|
|
297
|
+
- OTEL disabled by default for local development
|
|
298
|
+
- Enabled in production via `OTEL__ENABLED=true`
|
|
299
|
+
- Applied at agent creation time: `Agent(..., instrument=settings.otel.enabled)`
|
|
300
|
+
|
|
301
|
+
## Environment Variables
|
|
302
|
+
|
|
303
|
+
Set API keys for LLM providers:
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
# In ~/.bash_profile or ~/.zshrc
|
|
307
|
+
export OPENAI_API_KEY="sk-..."
|
|
308
|
+
export ANTHROPIC_API_KEY="sk-ant-..."
|
|
309
|
+
|
|
310
|
+
# Optional: OTEL/Phoenix configuration
|
|
311
|
+
export OTEL__ENABLED=true
|
|
312
|
+
export OTEL__SERVICE_NAME=rem-cli
|
|
313
|
+
export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
314
|
+
export PHOENIX__ENABLED=true
|
|
315
|
+
export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
## Observability (Optional)
|
|
319
|
+
|
|
320
|
+
### OTEL Configuration
|
|
321
|
+
|
|
322
|
+
Enable distributed tracing with OpenTelemetry:
|
|
323
|
+
|
|
324
|
+
```bash
|
|
325
|
+
# Enable OTEL
|
|
326
|
+
export OTEL__ENABLED=true
|
|
327
|
+
export OTEL__SERVICE_NAME=rem-cli
|
|
328
|
+
export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
|
|
329
|
+
export OTEL__PROTOCOL=http
|
|
330
|
+
|
|
331
|
+
# Run agent with tracing
|
|
332
|
+
rem ask query "Find documents" --model openai:gpt-4o-mini
|
|
333
|
+
```
|
|
334
|
+
|
|
335
|
+
### Phoenix Integration
|
|
336
|
+
|
|
337
|
+
Enable LLM observability with Arize Phoenix:
|
|
338
|
+
|
|
339
|
+
```bash
|
|
340
|
+
# Start Phoenix locally
|
|
341
|
+
docker run -p 6006:6006 arizephoenix/phoenix:latest
|
|
342
|
+
|
|
343
|
+
# Enable Phoenix
|
|
344
|
+
export PHOENIX__ENABLED=true
|
|
345
|
+
export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
346
|
+
export PHOENIX__PROJECT_NAME=rem-cli
|
|
347
|
+
|
|
348
|
+
# Run agent with Phoenix tracing
|
|
349
|
+
rem ask query "Find documents" --model openai:gpt-4o-mini
|
|
350
|
+
|
|
351
|
+
# View traces at http://localhost:6006
|
|
352
|
+
```
|
|
353
|
+
|
|
354
|
+
## Schema Registry (TODO)
|
|
355
|
+
|
|
356
|
+
The schema registry is stubbed but not yet implemented. To implement:
|
|
357
|
+
|
|
358
|
+
1. **Database Schema**:
|
|
359
|
+
```sql
|
|
360
|
+
CREATE TABLE agent_schemas (
|
|
361
|
+
id UUID PRIMARY KEY,
|
|
362
|
+
name TEXT NOT NULL,
|
|
363
|
+
version TEXT NOT NULL,
|
|
364
|
+
schema_json JSONB NOT NULL,
|
|
365
|
+
created_at TIMESTAMPTZ DEFAULT NOW(),
|
|
366
|
+
UNIQUE(name, version)
|
|
367
|
+
);
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
2. **Cache Layer**:
|
|
371
|
+
- Redis for fast lookups
|
|
372
|
+
- In-memory cache for CLI
|
|
373
|
+
|
|
374
|
+
3. **Versioning**:
|
|
375
|
+
- Semantic versioning (1.0.0, 1.1.0, etc.)
|
|
376
|
+
- Latest version fallback
|
|
377
|
+
|
|
378
|
+
Once implemented, you can load agents by name:
|
|
379
|
+
|
|
380
|
+
```bash
|
|
381
|
+
# Load latest version
|
|
382
|
+
rem ask query "Find documents"
|
|
383
|
+
|
|
384
|
+
# Load specific version
|
|
385
|
+
rem ask query "Find documents" --version 1.2.0
|
|
386
|
+
```
|
|
387
|
+
|
|
388
|
+
## Testing
|
|
389
|
+
|
|
390
|
+
```bash
|
|
391
|
+
# Test simple agent (default non-streaming)
|
|
392
|
+
rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
|
|
393
|
+
|
|
394
|
+
# Test simple agent (streaming)
|
|
395
|
+
rem ask simple "What is 2+2?" --stream --model openai:gpt-4o-mini
|
|
396
|
+
|
|
397
|
+
# Test structured output
|
|
398
|
+
rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
|
|
399
|
+
|
|
400
|
+
# Test file processing
|
|
401
|
+
rem ask contract-analyzer -i examples/contract.pdf -o output.yaml
|
|
402
|
+
|
|
403
|
+
# Test with different models
|
|
404
|
+
rem ask simple "Hello" --model openai:gpt-4o
|
|
405
|
+
rem ask simple "Hello" --model anthropic:claude-sonnet-4-5-20250929
|
|
406
|
+
```
|
|
407
|
+
|
|
408
|
+
## Troubleshooting
|
|
409
|
+
|
|
410
|
+
### API Key Not Found
|
|
411
|
+
|
|
412
|
+
```bash
|
|
413
|
+
# Set API key in environment
|
|
414
|
+
export OPENAI_API_KEY="sk-..."
|
|
415
|
+
|
|
416
|
+
# Or source your profile
|
|
417
|
+
source ~/.bash_profile
|
|
418
|
+
```
|
|
419
|
+
|
|
420
|
+
### Schema Registry Not Implemented
|
|
421
|
+
|
|
422
|
+
```
|
|
423
|
+
Schema registry not implemented yet. Please use a file path instead.
|
|
424
|
+
```
|
|
425
|
+
|
|
426
|
+
Use file paths until registry is implemented:
|
|
427
|
+
```bash
|
|
428
|
+
rem ask simple "query"
|
|
429
|
+
```
|
|
430
|
+
|
|
431
|
+
### Model Not Found
|
|
432
|
+
|
|
433
|
+
Ensure you're using the correct model format:
|
|
434
|
+
- OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
|
|
435
|
+
- Anthropic: `anthropic:claude-sonnet-4-5-20250929`
|
|
436
|
+
|
|
437
|
+
## Next Steps
|
|
438
|
+
|
|
439
|
+
1. **Implement Schema Registry**
|
|
440
|
+
- PostgreSQL table for schema storage
|
|
441
|
+
- Redis cache for fast lookups
|
|
442
|
+
- Version management
|
|
443
|
+
|
|
444
|
+
2. **Add MCP Tool Support**
|
|
445
|
+
- Dynamic tool loading from schema
|
|
446
|
+
- MCP server configuration
|
|
447
|
+
|
|
448
|
+
3. **Temperature Override**
|
|
449
|
+
- Pass temperature to agent.run()
|
|
450
|
+
- Model-specific settings
|
|
451
|
+
|
|
452
|
+
4. **CLI Improvements**
|
|
453
|
+
- Interactive mode
|
|
454
|
+
- Multi-turn conversations
|
|
455
|
+
- Session management
|