remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show
  1. rem/__init__.py +2 -0
  2. rem/agentic/README.md +650 -0
  3. rem/agentic/__init__.py +39 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +8 -0
  6. rem/agentic/context.py +148 -0
  7. rem/agentic/context_builder.py +329 -0
  8. rem/agentic/mcp/__init__.py +0 -0
  9. rem/agentic/mcp/tool_wrapper.py +107 -0
  10. rem/agentic/otel/__init__.py +5 -0
  11. rem/agentic/otel/setup.py +151 -0
  12. rem/agentic/providers/phoenix.py +674 -0
  13. rem/agentic/providers/pydantic_ai.py +572 -0
  14. rem/agentic/query.py +117 -0
  15. rem/agentic/query_helper.py +89 -0
  16. rem/agentic/schema.py +396 -0
  17. rem/agentic/serialization.py +245 -0
  18. rem/agentic/tools/__init__.py +5 -0
  19. rem/agentic/tools/rem_tools.py +231 -0
  20. rem/api/README.md +420 -0
  21. rem/api/main.py +324 -0
  22. rem/api/mcp_router/prompts.py +182 -0
  23. rem/api/mcp_router/resources.py +536 -0
  24. rem/api/mcp_router/server.py +213 -0
  25. rem/api/mcp_router/tools.py +584 -0
  26. rem/api/routers/auth.py +229 -0
  27. rem/api/routers/chat/__init__.py +5 -0
  28. rem/api/routers/chat/completions.py +281 -0
  29. rem/api/routers/chat/json_utils.py +76 -0
  30. rem/api/routers/chat/models.py +124 -0
  31. rem/api/routers/chat/streaming.py +185 -0
  32. rem/auth/README.md +258 -0
  33. rem/auth/__init__.py +26 -0
  34. rem/auth/middleware.py +100 -0
  35. rem/auth/providers/__init__.py +13 -0
  36. rem/auth/providers/base.py +376 -0
  37. rem/auth/providers/google.py +163 -0
  38. rem/auth/providers/microsoft.py +237 -0
  39. rem/cli/README.md +455 -0
  40. rem/cli/__init__.py +8 -0
  41. rem/cli/commands/README.md +126 -0
  42. rem/cli/commands/__init__.py +3 -0
  43. rem/cli/commands/ask.py +565 -0
  44. rem/cli/commands/configure.py +423 -0
  45. rem/cli/commands/db.py +493 -0
  46. rem/cli/commands/dreaming.py +324 -0
  47. rem/cli/commands/experiments.py +1124 -0
  48. rem/cli/commands/mcp.py +66 -0
  49. rem/cli/commands/process.py +245 -0
  50. rem/cli/commands/schema.py +183 -0
  51. rem/cli/commands/serve.py +106 -0
  52. rem/cli/dreaming.py +363 -0
  53. rem/cli/main.py +88 -0
  54. rem/config.py +237 -0
  55. rem/mcp_server.py +41 -0
  56. rem/models/core/__init__.py +49 -0
  57. rem/models/core/core_model.py +64 -0
  58. rem/models/core/engram.py +333 -0
  59. rem/models/core/experiment.py +628 -0
  60. rem/models/core/inline_edge.py +132 -0
  61. rem/models/core/rem_query.py +243 -0
  62. rem/models/entities/__init__.py +43 -0
  63. rem/models/entities/file.py +57 -0
  64. rem/models/entities/image_resource.py +88 -0
  65. rem/models/entities/message.py +35 -0
  66. rem/models/entities/moment.py +123 -0
  67. rem/models/entities/ontology.py +191 -0
  68. rem/models/entities/ontology_config.py +131 -0
  69. rem/models/entities/resource.py +95 -0
  70. rem/models/entities/schema.py +87 -0
  71. rem/models/entities/user.py +85 -0
  72. rem/py.typed +0 -0
  73. rem/schemas/README.md +507 -0
  74. rem/schemas/__init__.py +6 -0
  75. rem/schemas/agents/README.md +92 -0
  76. rem/schemas/agents/core/moment-builder.yaml +178 -0
  77. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  78. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  79. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  80. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  81. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  82. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  83. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  84. rem/schemas/agents/examples/hello-world.yaml +37 -0
  85. rem/schemas/agents/examples/query.yaml +54 -0
  86. rem/schemas/agents/examples/simple.yaml +21 -0
  87. rem/schemas/agents/examples/test.yaml +29 -0
  88. rem/schemas/agents/rem.yaml +128 -0
  89. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  90. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  91. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  92. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  93. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  94. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  95. rem/services/__init__.py +16 -0
  96. rem/services/audio/INTEGRATION.md +308 -0
  97. rem/services/audio/README.md +376 -0
  98. rem/services/audio/__init__.py +15 -0
  99. rem/services/audio/chunker.py +354 -0
  100. rem/services/audio/transcriber.py +259 -0
  101. rem/services/content/README.md +1269 -0
  102. rem/services/content/__init__.py +5 -0
  103. rem/services/content/providers.py +806 -0
  104. rem/services/content/service.py +657 -0
  105. rem/services/dreaming/README.md +230 -0
  106. rem/services/dreaming/__init__.py +53 -0
  107. rem/services/dreaming/affinity_service.py +336 -0
  108. rem/services/dreaming/moment_service.py +264 -0
  109. rem/services/dreaming/ontology_service.py +54 -0
  110. rem/services/dreaming/user_model_service.py +297 -0
  111. rem/services/dreaming/utils.py +39 -0
  112. rem/services/embeddings/__init__.py +11 -0
  113. rem/services/embeddings/api.py +120 -0
  114. rem/services/embeddings/worker.py +421 -0
  115. rem/services/fs/README.md +662 -0
  116. rem/services/fs/__init__.py +62 -0
  117. rem/services/fs/examples.py +206 -0
  118. rem/services/fs/examples_paths.py +204 -0
  119. rem/services/fs/git_provider.py +935 -0
  120. rem/services/fs/local_provider.py +760 -0
  121. rem/services/fs/parsing-hooks-examples.md +172 -0
  122. rem/services/fs/paths.py +276 -0
  123. rem/services/fs/provider.py +460 -0
  124. rem/services/fs/s3_provider.py +1042 -0
  125. rem/services/fs/service.py +186 -0
  126. rem/services/git/README.md +1075 -0
  127. rem/services/git/__init__.py +17 -0
  128. rem/services/git/service.py +469 -0
  129. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  130. rem/services/phoenix/README.md +453 -0
  131. rem/services/phoenix/__init__.py +46 -0
  132. rem/services/phoenix/client.py +686 -0
  133. rem/services/phoenix/config.py +88 -0
  134. rem/services/phoenix/prompt_labels.py +477 -0
  135. rem/services/postgres/README.md +575 -0
  136. rem/services/postgres/__init__.py +23 -0
  137. rem/services/postgres/migration_service.py +427 -0
  138. rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
  139. rem/services/postgres/register_type.py +352 -0
  140. rem/services/postgres/repository.py +337 -0
  141. rem/services/postgres/schema_generator.py +379 -0
  142. rem/services/postgres/service.py +802 -0
  143. rem/services/postgres/sql_builder.py +354 -0
  144. rem/services/rem/README.md +304 -0
  145. rem/services/rem/__init__.py +23 -0
  146. rem/services/rem/exceptions.py +71 -0
  147. rem/services/rem/executor.py +293 -0
  148. rem/services/rem/parser.py +145 -0
  149. rem/services/rem/queries.py +196 -0
  150. rem/services/rem/query.py +371 -0
  151. rem/services/rem/service.py +527 -0
  152. rem/services/session/README.md +374 -0
  153. rem/services/session/__init__.py +6 -0
  154. rem/services/session/compression.py +360 -0
  155. rem/services/session/reload.py +77 -0
  156. rem/settings.py +1235 -0
  157. rem/sql/002_install_models.sql +1068 -0
  158. rem/sql/background_indexes.sql +42 -0
  159. rem/sql/install_models.sql +1038 -0
  160. rem/sql/migrations/001_install.sql +503 -0
  161. rem/sql/migrations/002_install_models.sql +1202 -0
  162. rem/utils/AGENTIC_CHUNKING.md +597 -0
  163. rem/utils/README.md +583 -0
  164. rem/utils/__init__.py +43 -0
  165. rem/utils/agentic_chunking.py +622 -0
  166. rem/utils/batch_ops.py +343 -0
  167. rem/utils/chunking.py +108 -0
  168. rem/utils/clip_embeddings.py +276 -0
  169. rem/utils/dict_utils.py +98 -0
  170. rem/utils/embeddings.py +423 -0
  171. rem/utils/examples/embeddings_example.py +305 -0
  172. rem/utils/examples/sql_types_example.py +202 -0
  173. rem/utils/markdown.py +16 -0
  174. rem/utils/model_helpers.py +236 -0
  175. rem/utils/schema_loader.py +229 -0
  176. rem/utils/sql_types.py +348 -0
  177. rem/utils/user_id.py +81 -0
  178. rem/utils/vision.py +330 -0
  179. rem/workers/README.md +506 -0
  180. rem/workers/__init__.py +5 -0
  181. rem/workers/dreaming.py +502 -0
  182. rem/workers/engram_processor.py +312 -0
  183. rem/workers/sqs_file_processor.py +193 -0
  184. remdb-0.2.6.dist-info/METADATA +1191 -0
  185. remdb-0.2.6.dist-info/RECORD +187 -0
  186. remdb-0.2.6.dist-info/WHEEL +4 -0
  187. remdb-0.2.6.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,237 @@
1
+ """
2
+ Microsoft Entra ID (Azure AD) OAuth Provider.
3
+
4
+ Implements OAuth 2.1 / OIDC for Microsoft authentication.
5
+
6
+ Configuration:
7
+ 1. Register application at https://portal.azure.com/#view/Microsoft_AAD_RegisteredApps
8
+ 2. Create client secret under "Certificates & secrets"
9
+ 3. Add redirect URI: http://localhost:8000/api/auth/callback (dev)
10
+ 4. Set API permissions:
11
+ - Microsoft Graph: User.Read (delegated)
12
+ - Optional: email, profile, openid (automatically included)
13
+ 5. Set environment variables:
14
+ - AUTH__MICROSOFT__CLIENT_ID (Application ID)
15
+ - AUTH__MICROSOFT__CLIENT_SECRET
16
+ - AUTH__MICROSOFT__TENANT_ID (or "common" for multi-tenant)
17
+ - AUTH__MICROSOFT__REDIRECT_URI
18
+
19
+ Microsoft-specific features:
20
+ - Multi-tenant support (common, organizations, consumers)
21
+ - Azure AD B2C support
22
+ - Conditional access policies
23
+ - Token caching with MSAL
24
+
25
+ Tenant options:
26
+ - common: Multi-tenant + personal Microsoft accounts
27
+ - organizations: Multi-tenant (work/school only)
28
+ - consumers: Personal Microsoft accounts only
29
+ - {tenant-id}: Single tenant (specific organization)
30
+
31
+ References:
32
+ - Microsoft identity platform: https://learn.microsoft.com/en-us/entra/identity-platform/
33
+ - OAuth 2.0 flow: https://learn.microsoft.com/en-us/entra/identity-platform/v2-oauth2-auth-code-flow
34
+ - OIDC: https://learn.microsoft.com/en-us/entra/identity-platform/v2-protocols-oidc
35
+ - Scopes: https://learn.microsoft.com/en-us/graph/permissions-reference
36
+ """
37
+
38
+ from typing import Any
39
+
40
+ from .base import OAuthProvider, OAuthUserInfo
41
+
42
+
43
+ class MicrosoftOAuthProvider(OAuthProvider):
44
+ """
45
+ Microsoft Entra ID (Azure AD) OAuth 2.1 / OIDC provider.
46
+
47
+ Supports multi-tenant authentication and Microsoft Graph API access.
48
+ Uses Microsoft identity platform v2.0 endpoints.
49
+ """
50
+
51
+ # Microsoft identity platform v2.0 endpoints
52
+ # Replace {tenant} with:
53
+ # - "common" for multi-tenant + personal accounts
54
+ # - "organizations" for work/school accounts only
55
+ # - "consumers" for personal Microsoft accounts only
56
+ # - Tenant ID/domain for single-tenant
57
+ AUTHORIZATION_ENDPOINT_TEMPLATE = (
58
+ "https://login.microsoftonline.com/{tenant}/oauth2/v2.0/authorize"
59
+ )
60
+ TOKEN_ENDPOINT_TEMPLATE = "https://login.microsoftonline.com/{tenant}/oauth2/v2.0/token"
61
+ USERINFO_ENDPOINT = "https://graph.microsoft.com/v1.0/me"
62
+ JWKS_URI_TEMPLATE = "https://login.microsoftonline.com/{tenant}/discovery/v2.0/keys"
63
+
64
+ # Microsoft Graph scopes
65
+ # openid: Required for OIDC
66
+ # email: User email address
67
+ # profile: User profile information
68
+ # User.Read: Read user profile via Microsoft Graph
69
+ # offline_access: Request refresh token
70
+ DEFAULT_SCOPES = [
71
+ "openid",
72
+ "email",
73
+ "profile",
74
+ "User.Read", # Microsoft Graph: read user profile
75
+ ]
76
+
77
+ def __init__(
78
+ self,
79
+ client_id: str,
80
+ client_secret: str,
81
+ redirect_uri: str,
82
+ tenant: str = "common",
83
+ ):
84
+ """
85
+ Initialize Microsoft OAuth provider.
86
+
87
+ Args:
88
+ client_id: Application (client) ID from Azure portal
89
+ client_secret: Client secret from Azure portal
90
+ redirect_uri: Redirect URI registered in Azure portal
91
+ tenant: Tenant ID or "common"/"organizations"/"consumers"
92
+ """
93
+ super().__init__(client_id, client_secret, redirect_uri)
94
+ self.tenant = tenant
95
+
96
+ @property
97
+ def authorization_endpoint(self) -> str:
98
+ """Microsoft authorization endpoint."""
99
+ return self.AUTHORIZATION_ENDPOINT_TEMPLATE.format(tenant=self.tenant)
100
+
101
+ @property
102
+ def token_endpoint(self) -> str:
103
+ """Microsoft token endpoint."""
104
+ return self.TOKEN_ENDPOINT_TEMPLATE.format(tenant=self.tenant)
105
+
106
+ @property
107
+ def userinfo_endpoint(self) -> str:
108
+ """Microsoft Graph /me endpoint for user info."""
109
+ return self.USERINFO_ENDPOINT
110
+
111
+ @property
112
+ def jwks_uri(self) -> str:
113
+ """Microsoft JWKS URI for token validation."""
114
+ return self.JWKS_URI_TEMPLATE.format(tenant=self.tenant)
115
+
116
+ @property
117
+ def default_scopes(self) -> list[str]:
118
+ """Default scopes for Microsoft OAuth."""
119
+ return self.DEFAULT_SCOPES.copy()
120
+
121
+ def normalize_user_info(self, claims: dict[str, Any]) -> OAuthUserInfo:
122
+ """
123
+ Normalize Microsoft claims to OAuthUserInfo.
124
+
125
+ Microsoft Graph /me response:
126
+ - id: Unique user ID (stable identifier)
127
+ - userPrincipalName: User principal name (UPN)
128
+ - mail: Primary email (may be null)
129
+ - displayName: Display name
130
+ - givenName: First name
131
+ - surname: Last name
132
+ - preferredLanguage: User locale
133
+
134
+ Microsoft ID token claims:
135
+ - sub: Subject (unique user ID, different from Graph ID)
136
+ - email: User email
137
+ - name: Full name
138
+ - given_name: First name
139
+ - family_name: Last name
140
+ - preferred_username: UPN or email
141
+
142
+ Args:
143
+ claims: Raw claims from ID token or Microsoft Graph /me
144
+
145
+ Returns:
146
+ Normalized user information
147
+ """
148
+ # Handle both ID token claims and Graph API response
149
+ # Graph API uses different field names than OIDC claims
150
+ if "id" in claims:
151
+ # Microsoft Graph /me response
152
+ sub = claims["id"]
153
+ email = claims.get("mail") or claims.get("userPrincipalName")
154
+ name = claims.get("displayName")
155
+ given_name = claims.get("givenName")
156
+ family_name = claims.get("surname")
157
+ locale = claims.get("preferredLanguage")
158
+ else:
159
+ # OIDC ID token claims
160
+ sub = claims["sub"]
161
+ email = claims.get("email") or claims.get("preferred_username")
162
+ name = claims.get("name")
163
+ given_name = claims.get("given_name")
164
+ family_name = claims.get("family_name")
165
+ locale = claims.get("locale")
166
+
167
+ return OAuthUserInfo(
168
+ sub=sub,
169
+ email=email,
170
+ email_verified=True, # Microsoft verifies emails during account creation
171
+ name=name,
172
+ given_name=given_name,
173
+ family_name=family_name,
174
+ picture=None, # Microsoft Graph requires separate photo endpoint
175
+ locale=locale,
176
+ provider="microsoft",
177
+ raw_claims=claims,
178
+ )
179
+
180
+ def generate_auth_url_with_prompt(
181
+ self,
182
+ state: str,
183
+ code_challenge: str,
184
+ prompt: str | None = None,
185
+ domain_hint: str | None = None,
186
+ login_hint: str | None = None,
187
+ scopes: list[str] | None = None,
188
+ nonce: str | None = None,
189
+ ) -> str:
190
+ """
191
+ Generate authorization URL with Microsoft-specific parameters.
192
+
193
+ Args:
194
+ state: CSRF protection state
195
+ code_challenge: PKCE code challenge
196
+ prompt: Authentication behavior (none, login, consent, select_account)
197
+ domain_hint: Domain hint for faster login (e.g., "contoso.com")
198
+ login_hint: Login hint (email) to pre-fill sign-in form
199
+ scopes: OAuth scopes (uses default_scopes if None)
200
+ nonce: OIDC nonce for ID token replay protection
201
+
202
+ Returns:
203
+ Authorization URL
204
+
205
+ Microsoft-specific parameters:
206
+ - prompt: Authentication behavior
207
+ - none: Silent authentication (fails if interaction required)
208
+ - login: Force user to re-authenticate
209
+ - consent: Force consent screen
210
+ - select_account: Show account picker
211
+ - domain_hint: Domain hint for faster login (skip domain discovery)
212
+ - login_hint: Email to pre-fill sign-in form
213
+ - response_mode: query (default), form_post, fragment
214
+ """
215
+ extra_params: dict[str, str] = {}
216
+
217
+ if prompt:
218
+ extra_params["prompt"] = prompt
219
+
220
+ if domain_hint:
221
+ extra_params["domain_hint"] = domain_hint
222
+
223
+ if login_hint:
224
+ extra_params["login_hint"] = login_hint
225
+
226
+ # Add offline_access scope for refresh token
227
+ scopes_with_offline = scopes or self.default_scopes.copy()
228
+ if "offline_access" not in scopes_with_offline:
229
+ scopes_with_offline.append("offline_access")
230
+
231
+ return self.generate_auth_url(
232
+ state=state,
233
+ code_challenge=code_challenge,
234
+ scopes=scopes_with_offline,
235
+ nonce=nonce,
236
+ extra_params=extra_params,
237
+ )
rem/cli/README.md ADDED
@@ -0,0 +1,455 @@
1
+ # REM CLI - Agent Testing Guide
2
+
3
+ ## Overview
4
+
5
+ The `rem ask` command provides a CLI interface for testing Pydantic AI agents with YAML-based schemas. It supports both streaming and non-streaming modes, structured output, and optional OTEL/Phoenix instrumentation.
6
+
7
+ ## Installation
8
+
9
+ ```bash
10
+ # Install REM with all dependencies
11
+ cd /Users/sirsh/code/mr_saoirse/remstack/rem
12
+ uv pip install -e .
13
+
14
+ # Verify installation
15
+ rem --help
16
+ ```
17
+
18
+ ## Basic Usage
19
+
20
+ ```bash
21
+ # Simple question (non-streaming by default)
22
+ rem ask simple "What is 2+2?"
23
+
24
+ # Streaming mode for real-time output
25
+ rem ask simple "What is 2+2?" --stream
26
+
27
+ # With specific model
28
+ rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
29
+
30
+ # Structured output
31
+ rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
32
+
33
+ # Process file and save output
34
+ rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt -o output.yaml
35
+ ```
36
+
37
+ ## File Processing
38
+
39
+ The `--input-file` option allows you to process files directly instead of providing a text query:
40
+
41
+ ```bash
42
+ # Extract data from contract (text file)
43
+ rem ask contract-analyzer \
44
+ -i rem/tests/data/content-examples/service_agreement.txt \
45
+ -o output.yaml
46
+
47
+ # Extract from PDF contract
48
+ rem ask contract-analyzer \
49
+ -i rem/tests/data/content-examples/pdf/service_contract.pdf \
50
+ -o output.yaml
51
+
52
+ # With specific model
53
+ rem ask contract-analyzer \
54
+ -i rem/tests/data/content-examples/service_agreement.txt \
55
+ -o output.yaml \
56
+ -m anthropic:claude-sonnet-4-5-20250929
57
+
58
+ # Output to console (default)
59
+ rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt
60
+
61
+ # Stream output in real-time
62
+ rem ask contract-analyzer -i rem/tests/data/content-examples/service_agreement.txt --stream
63
+ ```
64
+
65
+ **Schema name resolution:**
66
+ - Short names: `contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
67
+ - With folder: `examples/contract-analyzer` → `schemas/agents/examples/contract-analyzer.yaml`
68
+ - Core agents: `moment-builder` → `schemas/agents/core/moment-builder.yaml`
69
+ - Full paths: `schemas/agents/examples/contract-analyzer.yaml` (as-is)
70
+ ```
71
+
72
+ **Supported file types:**
73
+ - Documents: PDF, DOCX, PPTX, XLSX (via Kreuzberg)
74
+ - Text: TXT, MD, Markdown, code files
75
+ - Schemas: YAML, JSON
76
+ - Audio: MP3, WAV, M4A (via Whisper API)
77
+
78
+ See [examples/README.md](../../../examples/README.md) for complete contract extraction examples.
79
+ ```
80
+
81
+ ## Command Options
82
+
83
+ ```
84
+ rem ask NAME [QUERY] [OPTIONS]
85
+
86
+ Arguments:
87
+ NAME Agent schema name (YAML files in schemas/agents/)
88
+ - Short name: contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
89
+ - With folder: examples/contract-analyzer → schemas/agents/examples/contract-analyzer.yaml
90
+ - Core agent: moment-builder → schemas/agents/core/moment-builder.yaml
91
+ - Full path: schemas/agents/examples/contract-analyzer.yaml
92
+
93
+ QUERY User query to send to the agent (optional if --input-file is used)
94
+
95
+ Options:
96
+ --model, -m TEXT LLM model (default: from settings)
97
+ --temperature, -t FLOAT Temperature 0.0-1.0 (not yet implemented)
98
+ --max-turns INTEGER Maximum turns for execution (default: 10)
99
+ --version, -v TEXT Schema version for registry lookup
100
+ --stream / --no-stream Enable/disable streaming (default: disabled)
101
+ --input-file, -i PATH Read input from file (PDF, TXT, Markdown, etc.)
102
+ --output-file, -o PATH Write output to file (YAML format)
103
+ --user-id TEXT User ID for context (default: cli-user)
104
+ --session-id TEXT Session ID for context (default: auto-generated)
105
+ ```
106
+
107
+ ## Agent Schema Format
108
+
109
+ Agent schemas are YAML files following JSON Schema with embedded metadata:
110
+
111
+ ```yaml
112
+ type: object
113
+ description: |
114
+ System prompt for the agent.
115
+
116
+ This describes what the agent does and how it should behave.
117
+
118
+ properties:
119
+ answer:
120
+ type: string
121
+ description: The response to the user's query
122
+
123
+ confidence:
124
+ type: number
125
+ minimum: 0
126
+ maximum: 1
127
+ description: Confidence score for the response
128
+
129
+ required:
130
+ - answer
131
+
132
+ json_schema_extra:
133
+ fully_qualified_name: "rem.agents.SimpleAgent"
134
+ version: "1.0.0"
135
+ tools: [] # MCP tool configurations (future)
136
+ resources: [] # MCP resource configurations (future)
137
+ ```
138
+
139
+ ## Example Schemas
140
+
141
+ ### Simple Agent (`schemas/agents/examples/simple.yaml`)
142
+
143
+ A basic conversational agent that returns simple text answers:
144
+
145
+ ```yaml
146
+ type: object
147
+ description: |
148
+ A simple conversational agent that provides helpful, friendly responses.
149
+
150
+ You are a helpful AI assistant. Answer questions clearly and concisely.
151
+ If you don't know something, say so. Be friendly and professional.
152
+
153
+ properties:
154
+ answer:
155
+ type: string
156
+ description: The response to the user's query
157
+
158
+ required:
159
+ - answer
160
+
161
+ json_schema_extra:
162
+ fully_qualified_name: "rem.agents.SimpleAgent"
163
+ version: "1.0.0"
164
+ tools: []
165
+ resources: []
166
+ ```
167
+
168
+ ### Query Agent (`schemas/agents/examples/query.yaml`)
169
+
170
+ An agent that provides structured output with confidence scores:
171
+
172
+ ```yaml
173
+ type: object
174
+ description: |
175
+ REM Query Agent - Converts natural language questions to REM queries.
176
+
177
+ You are a specialized agent that understands REM (Resources Entities Moments) queries.
178
+ Your job is to interpret user questions and provide answers with confidence scores.
179
+
180
+ properties:
181
+ answer:
182
+ type: string
183
+ description: The answer to the user's query with supporting details
184
+
185
+ confidence:
186
+ type: number
187
+ minimum: 0
188
+ maximum: 1
189
+ description: Confidence score (0.0-1.0) for this answer
190
+
191
+ query_type:
192
+ type: string
193
+ enum:
194
+ - LOOKUP
195
+ - FUZZY
196
+ - TRAVERSE
197
+ - UNKNOWN
198
+ description: The type of REM query that would best answer this question
199
+
200
+ required:
201
+ - answer
202
+ - confidence
203
+ - query_type
204
+
205
+ json_schema_extra:
206
+ fully_qualified_name: "rem.agents.QueryAgent"
207
+ version: "1.0.0"
208
+ tools: []
209
+ resources: []
210
+ ```
211
+
212
+ ## Streaming vs Non-Streaming
213
+
214
+ ### Non-Streaming Mode (default)
215
+
216
+ Uses `agent.run()` to return complete structured result at once:
217
+
218
+ ```bash
219
+ rem ask simple "Explain quantum computing"
220
+ ```
221
+
222
+ Output:
223
+ ```json
224
+ {
225
+ "answer": "Quantum computing uses quantum mechanical phenomena..."
226
+ }
227
+ ```
228
+
229
+ **Best for:**
230
+ - Saving output to files
231
+ - Structured data extraction
232
+ - Processing files with complex schemas
233
+ - Programmatic usage
234
+
235
+ ### Streaming Mode
236
+
237
+ Uses `agent.iter()` to stream events in real-time:
238
+ - Tool call markers: `[Calling: tool_name]`
239
+ - Text content deltas as they arrive
240
+ - Final structured result after completion
241
+
242
+ ```bash
243
+ rem ask simple "Explain quantum computing" --stream
244
+ ```
245
+
246
+ Output:
247
+ ```
248
+ [Calling: final_result]
249
+ Quantum computing uses quantum mechanical phenomena like superposition...
250
+
251
+ {
252
+ "answer": "Quantum computing uses quantum mechanical phenomena..."
253
+ }
254
+ ```
255
+
256
+ **Best for:**
257
+ - Interactive conversations
258
+ - Long-running queries where you want to see progress
259
+ - Debugging agent behavior
260
+
261
+ ## Implementation Details
262
+
263
+ ### Architecture
264
+
265
+ ```
266
+ CLI (ask.py)
267
+ ├── load_schema_from_file() - YAML file loading
268
+ ├── load_schema_from_registry() - TODO: Database/cache lookup
269
+ ├── run_agent_streaming() - agent.iter() with event streaming
270
+ └── run_agent_non_streaming() - agent.run() for complete result
271
+
272
+ Agent Factory (providers/pydantic_ai.py)
273
+ ├── create_pydantic_ai_agent() - Main factory
274
+ ├── _create_model_from_schema() - JSON Schema → Pydantic model
275
+ └── _create_schema_wrapper() - Strip description for LLM
276
+
277
+ OTEL (otel/setup.py)
278
+ ├── setup_instrumentation() - Initialize OTLP exporters
279
+ └── set_agent_resource_attributes() - Set span attributes
280
+ ```
281
+
282
+ ### Design Patterns
283
+
284
+ 1. **JsonSchema to Pydantic Pattern**
285
+ - Agent schemas are JSON Schema with embedded metadata
286
+ - `description` field becomes system prompt
287
+ - `properties` field becomes Pydantic output model
288
+ - Dynamic model creation using `json-schema-to-pydantic`
289
+
290
+ 2. **Streaming with agent.iter() Pattern**
291
+ - Use `agent.iter()` for complete execution (not `run_stream()`)
292
+ - `agent.iter()` captures tool calls, `run_stream()` stops after first output
293
+ - Stream tool call events with `[Calling: tool_name]` markers
294
+ - Stream text content deltas as they arrive
295
+
296
+ 3. **Conditional OTEL Instrumentation**
297
+ - OTEL disabled by default for local development
298
+ - Enabled in production via `OTEL__ENABLED=true`
299
+ - Applied at agent creation time: `Agent(..., instrument=settings.otel.enabled)`
300
+
301
+ ## Environment Variables
302
+
303
+ Set API keys for LLM providers:
304
+
305
+ ```bash
306
+ # In ~/.bash_profile or ~/.zshrc
307
+ export OPENAI_API_KEY="sk-..."
308
+ export ANTHROPIC_API_KEY="sk-ant-..."
309
+
310
+ # Optional: OTEL/Phoenix configuration
311
+ export OTEL__ENABLED=true
312
+ export OTEL__SERVICE_NAME=rem-cli
313
+ export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
314
+ export PHOENIX__ENABLED=true
315
+ export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
316
+ ```
317
+
318
+ ## Observability (Optional)
319
+
320
+ ### OTEL Configuration
321
+
322
+ Enable distributed tracing with OpenTelemetry:
323
+
324
+ ```bash
325
+ # Enable OTEL
326
+ export OTEL__ENABLED=true
327
+ export OTEL__SERVICE_NAME=rem-cli
328
+ export OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
329
+ export OTEL__PROTOCOL=http
330
+
331
+ # Run agent with tracing
332
+ rem ask query "Find documents" --model openai:gpt-4o-mini
333
+ ```
334
+
335
+ ### Phoenix Integration
336
+
337
+ Enable LLM observability with Arize Phoenix:
338
+
339
+ ```bash
340
+ # Start Phoenix locally
341
+ docker run -p 6006:6006 arizephoenix/phoenix:latest
342
+
343
+ # Enable Phoenix
344
+ export PHOENIX__ENABLED=true
345
+ export PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
346
+ export PHOENIX__PROJECT_NAME=rem-cli
347
+
348
+ # Run agent with Phoenix tracing
349
+ rem ask query "Find documents" --model openai:gpt-4o-mini
350
+
351
+ # View traces at http://localhost:6006
352
+ ```
353
+
354
+ ## Schema Registry (TODO)
355
+
356
+ The schema registry is stubbed but not yet implemented. To implement:
357
+
358
+ 1. **Database Schema**:
359
+ ```sql
360
+ CREATE TABLE agent_schemas (
361
+ id UUID PRIMARY KEY,
362
+ name TEXT NOT NULL,
363
+ version TEXT NOT NULL,
364
+ schema_json JSONB NOT NULL,
365
+ created_at TIMESTAMPTZ DEFAULT NOW(),
366
+ UNIQUE(name, version)
367
+ );
368
+ ```
369
+
370
+ 2. **Cache Layer**:
371
+ - Redis for fast lookups
372
+ - In-memory cache for CLI
373
+
374
+ 3. **Versioning**:
375
+ - Semantic versioning (1.0.0, 1.1.0, etc.)
376
+ - Latest version fallback
377
+
378
+ Once implemented, you can load agents by name:
379
+
380
+ ```bash
381
+ # Load latest version
382
+ rem ask query "Find documents"
383
+
384
+ # Load specific version
385
+ rem ask query "Find documents" --version 1.2.0
386
+ ```
387
+
388
+ ## Testing
389
+
390
+ ```bash
391
+ # Test simple agent (default non-streaming)
392
+ rem ask simple "What is 2+2?" --model openai:gpt-4o-mini
393
+
394
+ # Test simple agent (streaming)
395
+ rem ask simple "What is 2+2?" --stream --model openai:gpt-4o-mini
396
+
397
+ # Test structured output
398
+ rem ask query "Find all documents by Sarah" --model openai:gpt-4o-mini
399
+
400
+ # Test file processing
401
+ rem ask contract-analyzer -i examples/contract.pdf -o output.yaml
402
+
403
+ # Test with different models
404
+ rem ask simple "Hello" --model openai:gpt-4o
405
+ rem ask simple "Hello" --model anthropic:claude-sonnet-4-5-20250929
406
+ ```
407
+
408
+ ## Troubleshooting
409
+
410
+ ### API Key Not Found
411
+
412
+ ```bash
413
+ # Set API key in environment
414
+ export OPENAI_API_KEY="sk-..."
415
+
416
+ # Or source your profile
417
+ source ~/.bash_profile
418
+ ```
419
+
420
+ ### Schema Registry Not Implemented
421
+
422
+ ```
423
+ Schema registry not implemented yet. Please use a file path instead.
424
+ ```
425
+
426
+ Use file paths until registry is implemented:
427
+ ```bash
428
+ rem ask simple "query"
429
+ ```
430
+
431
+ ### Model Not Found
432
+
433
+ Ensure you're using the correct model format:
434
+ - OpenAI: `openai:gpt-4o-mini`, `openai:gpt-4o`
435
+ - Anthropic: `anthropic:claude-sonnet-4-5-20250929`
436
+
437
+ ## Next Steps
438
+
439
+ 1. **Implement Schema Registry**
440
+ - PostgreSQL table for schema storage
441
+ - Redis cache for fast lookups
442
+ - Version management
443
+
444
+ 2. **Add MCP Tool Support**
445
+ - Dynamic tool loading from schema
446
+ - MCP server configuration
447
+
448
+ 3. **Temperature Override**
449
+ - Pass temperature to agent.run()
450
+ - Model-specific settings
451
+
452
+ 4. **CLI Improvements**
453
+ - Interactive mode
454
+ - Multi-turn conversations
455
+ - Session management
rem/cli/__init__.py ADDED
@@ -0,0 +1,8 @@
1
+ """
2
+ REM CLI - Command-line interface for REM operations.
3
+
4
+ Commands:
5
+ - schema: Database schema generation and management
6
+ - migrate: Run database migrations
7
+ - dev: Development utilities
8
+ """