remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/agentic/schema.py ADDED
@@ -0,0 +1,737 @@
1
+ """
2
+ Agent Schema Protocol - Pydantic models for REM agent schemas.
3
+
4
+ This module defines the structure of agent schemas used in REM.
5
+ Agent schemas are JSON Schema documents with REM-specific extensions
6
+ in the `json_schema_extra` field.
7
+
8
+ The schema protocol serves as:
9
+ 1. Documentation for agent schema structure
10
+ 2. Validation for agent schema files
11
+ 3. Type hints for schema manipulation
12
+ 4. Single source of truth for schema conventions
13
+ """
14
+
15
+ from typing import Any, Literal
16
+ from pydantic import BaseModel, Field, field_validator
17
+
18
+
19
+ class MCPToolReference(BaseModel):
20
+ """
21
+ Reference to an MCP tool available to the agent.
22
+
23
+ Tools are functions that agents can call during execution to
24
+ interact with external systems, retrieve data, or perform actions.
25
+
26
+ Two usage patterns:
27
+ 1. With mcp_servers config: Just declare name + description, tools loaded from MCP servers
28
+ 2. Explicit MCP server: Specify mcp_server to load tool from specific server
29
+
30
+ Example (declarative with mcp_servers):
31
+ {
32
+ "name": "search_rem",
33
+ "description": "Execute REM queries for entity lookup and search"
34
+ }
35
+
36
+ Example (explicit server):
37
+ {
38
+ "name": "lookup_entity",
39
+ "mcp_server": "rem",
40
+ "description": "Lookup entities by exact key"
41
+ }
42
+ """
43
+
44
+ name: str = Field(
45
+ description=(
46
+ "Tool name as defined in the MCP server. "
47
+ "Must match the tool name exposed by the MCP server exactly."
48
+ )
49
+ )
50
+
51
+ mcp_server: str | None = Field(
52
+ default=None,
53
+ description=(
54
+ "MCP server identifier (optional when using mcp_servers config). "
55
+ "If not specified, tool is expected from configured mcp_servers. "
56
+ "Resolved via environment variable: MCP_SERVER_{NAME} or MCP__{NAME}__URL."
57
+ )
58
+ )
59
+
60
+ description: str | None = Field(
61
+ default=None,
62
+ description=(
63
+ "Tool description for the agent. Explains what the tool does "
64
+ "and when to use it. This is visible to the LLM."
65
+ ),
66
+ )
67
+
68
+
69
+ class MCPResourceReference(BaseModel):
70
+ """
71
+ Reference to MCP resources accessible to the agent.
72
+
73
+ Resources are data sources that can be read by agents, such as
74
+ knowledge graph entities, files, or API endpoints.
75
+
76
+ Two formats supported:
77
+ 1. uri: Exact URI or URI with query params
78
+ 2. uri_pattern: Regex pattern for flexible matching
79
+
80
+ Example (exact URI):
81
+ {
82
+ "uri": "rem://agents",
83
+ "name": "Agent Schemas",
84
+ "description": "List all available agent schemas"
85
+ }
86
+
87
+ Example (pattern):
88
+ {
89
+ "uri_pattern": "rem://resources/.*",
90
+ "mcp_server": "rem"
91
+ }
92
+ """
93
+
94
+ # Support both exact URI and pattern
95
+ uri: str | None = Field(
96
+ default=None,
97
+ description=(
98
+ "Exact resource URI or URI with query parameters. "
99
+ "Examples: 'rem://agents', 'rem://resources?category=drug.*'"
100
+ )
101
+ )
102
+
103
+ uri_pattern: str | None = Field(
104
+ default=None,
105
+ description=(
106
+ "Regex pattern matching resource URIs. "
107
+ "Examples: 'rem://resources/.*' (all resources). "
108
+ "Use uri for exact URIs, uri_pattern for regex matching."
109
+ )
110
+ )
111
+
112
+ name: str | None = Field(
113
+ default=None,
114
+ description="Human-readable name for the resource."
115
+ )
116
+
117
+ description: str | None = Field(
118
+ default=None,
119
+ description="Description of what the resource provides."
120
+ )
121
+
122
+ mcp_server: str | None = Field(
123
+ default=None,
124
+ description=(
125
+ "MCP server identifier (optional when using mcp_servers config). "
126
+ "Resolved via environment variable MCP_SERVER_{NAME}."
127
+ )
128
+ )
129
+
130
+
131
+ class MCPServerConfig(BaseModel):
132
+ """
133
+ MCP server configuration for in-process tool loading.
134
+
135
+ Example:
136
+ {
137
+ "type": "local",
138
+ "module": "rem.mcp_server",
139
+ "id": "rem-local"
140
+ }
141
+ """
142
+
143
+ type: Literal["local"] = Field(
144
+ default="local",
145
+ description="Server type. Currently only 'local' (in-process) is supported.",
146
+ )
147
+
148
+ module: str = Field(
149
+ description=(
150
+ "Python module path containing the MCP server. "
151
+ "The module must export an 'mcp' object that supports get_tools(). "
152
+ "Example: 'rem.mcp_server'"
153
+ )
154
+ )
155
+
156
+ id: str = Field(
157
+ default="mcp-server",
158
+ description=(
159
+ "Server identifier for logging and debugging. "
160
+ "Defaults to 'mcp-server' if not specified. "
161
+ "Example: 'rem-local'"
162
+ )
163
+ )
164
+
165
+
166
+ class AgentSchemaMetadata(BaseModel):
167
+ """
168
+ REM-specific metadata for agent schemas.
169
+
170
+ This is stored in the `json_schema_extra` field of the JSON Schema
171
+ and extends standard JSON Schema with REM agent conventions.
172
+
173
+ All fields are optional but recommended for production agents.
174
+ """
175
+
176
+ kind: str | None = Field(
177
+ default=None,
178
+ description=(
179
+ "Schema kind/type. Determines how the schema is processed. "
180
+ "Values: 'agent', 'evaluator', 'engram'. "
181
+ "Examples: 'agent' for agents, 'evaluator' for LLM-as-a-Judge evaluators, "
182
+ "'engram' for memory documents. "
183
+ "Used by processors to route schemas to the correct handler."
184
+ )
185
+ )
186
+
187
+ name: str = Field(
188
+ description=(
189
+ "Unique schema identifier (kebab-case). "
190
+ "Examples: 'query-agent', 'cv-parser', 'rem-lookup-correctness'. "
191
+ "Used in URLs, file paths, database keys, and references. "
192
+ "Must be unique within the kind namespace."
193
+ ),
194
+ )
195
+
196
+ version: str | None = Field(
197
+ default=None,
198
+ description=(
199
+ "Semantic version of the agent schema. "
200
+ "Format: 'MAJOR.MINOR.PATCH' (e.g., '1.0.0', '2.1.3'). "
201
+ "Increment MAJOR for breaking changes, MINOR for new features, "
202
+ "PATCH for bug fixes. Used for schema evolution and compatibility."
203
+ ),
204
+ )
205
+
206
+ # System prompt override (takes precedence over description when present)
207
+ system_prompt: str | None = Field(
208
+ default=None,
209
+ description=(
210
+ "Custom system prompt that overrides or extends the schema description. "
211
+ "When present, this is combined with the main schema.description field "
212
+ "to form the complete system prompt. Use this for detailed instructions "
213
+ "that you don't want in the public schema description."
214
+ ),
215
+ )
216
+
217
+ # Structured output toggle
218
+ structured_output: bool | None = Field(
219
+ default=None,
220
+ description=(
221
+ "Whether to enforce structured JSON output. "
222
+ "When False, the agent produces free-form text and schema properties "
223
+ "are converted to prompt guidance instead. "
224
+ "Default: None (uses LLM__DEFAULT_STRUCTURED_OUTPUT setting, which defaults to False)."
225
+ ),
226
+ )
227
+
228
+ # MCP server configurations (for dynamic tool loading)
229
+ mcp_servers: list[MCPServerConfig] = Field(
230
+ default_factory=list,
231
+ description=(
232
+ "MCP server configurations for dynamic tool loading. "
233
+ "Servers are loaded in-process at agent creation time. "
234
+ "All tools from configured servers become available to the agent. "
235
+ "If not specified, defaults to rem.mcp_server (REM's built-in tools)."
236
+ ),
237
+ )
238
+
239
+ tools: list[MCPToolReference] = Field(
240
+ default_factory=list,
241
+ description=(
242
+ "MCP tools available to the agent. "
243
+ "Tools are loaded dynamically from MCP servers at agent creation time. "
244
+ "The agent can call these tools during execution to retrieve data, "
245
+ "perform actions, or interact with external systems."
246
+ ),
247
+ )
248
+
249
+ resources: list[MCPResourceReference] = Field(
250
+ default_factory=list,
251
+ description=(
252
+ "MCP resources accessible to the agent. "
253
+ "Resources are data sources that can be read by the agent, "
254
+ "such as knowledge graph entities, files, or API endpoints. "
255
+ "URI patterns are matched against resource URIs to determine access."
256
+ ),
257
+ )
258
+
259
+ tags: list[str] = Field(
260
+ default_factory=list,
261
+ description=(
262
+ "Categorization tags for the agent. "
263
+ "Examples: ['query', 'knowledge-graph'], ['summarization', 'nlp']. "
264
+ "Used for discovery, filtering, and organization of agents."
265
+ ),
266
+ )
267
+
268
+ author: str | None = Field(
269
+ default=None,
270
+ description=(
271
+ "Agent author or team. "
272
+ "Examples: 'REM Team', 'john@example.com'. "
273
+ "Used for attribution and maintenance tracking."
274
+ ),
275
+ )
276
+
277
+ override_temperature: float | None = Field(
278
+ default=None,
279
+ description=(
280
+ "Override default LLM temperature (0.0-1.0) for this agent. "
281
+ "If None, uses global settings.llm.default_temperature."
282
+ ),
283
+ )
284
+
285
+ override_max_iterations: int | None = Field(
286
+ default=None,
287
+ description=(
288
+ "Override maximum iterations for this agent. "
289
+ "If None, uses global settings.llm.default_max_iterations."
290
+ ),
291
+ )
292
+
293
+ model_config = {"extra": "allow"} # Allow additional custom metadata
294
+
295
+
296
+ class AgentSchema(BaseModel):
297
+ """
298
+ Complete REM agent schema following JSON Schema Draft 7.
299
+
300
+ Agent schemas are JSON Schema documents that define:
301
+ 1. System prompt (in `description` field)
302
+ 2. Structured output format (in `properties` field)
303
+ 3. REM-specific metadata (in `json_schema_extra` field)
304
+
305
+ This is the single source of truth for agent behavior, output structure,
306
+ and available tools/resources.
307
+
308
+ Design Pattern:
309
+ - JSON Schema as the schema language (framework-agnostic)
310
+ - System prompt embedded in description (visible to LLM)
311
+ - Output structure as standard JSON Schema properties
312
+ - REM extensions in json_schema_extra (invisible to LLM)
313
+
314
+ Example:
315
+ ```json
316
+ {
317
+ "type": "object",
318
+ "description": "You are a Query Agent that answers questions...",
319
+ "properties": {
320
+ "answer": {"type": "string", "description": "Query answer"},
321
+ "confidence": {"type": "number", "minimum": 0, "maximum": 1}
322
+ },
323
+ "required": ["answer", "confidence"],
324
+ "json_schema_extra": {
325
+ "kind": "agent",
326
+ "name": "query-agent",
327
+ "version": "1.0.0",
328
+ "tools": [{"name": "lookup_entity", "mcp_server": "rem"}]
329
+ }
330
+ }
331
+ ```
332
+ """
333
+
334
+ type: Literal["object"] = Field(
335
+ default="object",
336
+ description="JSON Schema type. Must be 'object' for agent schemas.",
337
+ )
338
+
339
+ description: str = Field(
340
+ description=(
341
+ "System prompt for the agent. This is the primary instruction "
342
+ "given to the LLM explaining:\n"
343
+ "- Agent's role and purpose\n"
344
+ "- Available capabilities\n"
345
+ "- Workflow and reasoning steps\n"
346
+ "- Guidelines and constraints\n"
347
+ "- Output format expectations\n\n"
348
+ "This field is visible to the LLM and should be comprehensive, "
349
+ "clear, and actionable. Use markdown formatting for structure."
350
+ )
351
+ )
352
+
353
+ properties: dict[str, Any] = Field(
354
+ description=(
355
+ "Output schema properties following JSON Schema Draft 7. "
356
+ "Each property defines:\n"
357
+ "- type: JSON type (string, number, boolean, array, object)\n"
358
+ "- description: Field purpose and content guidance\n"
359
+ "- Validation: minimum, maximum, pattern, enum, etc.\n\n"
360
+ "These properties define the structured output the agent produces. "
361
+ "The agent must return a JSON object matching this schema."
362
+ )
363
+ )
364
+
365
+ required: list[str] = Field(
366
+ default_factory=list,
367
+ description=(
368
+ "List of required property names. "
369
+ "The agent must include these fields in its output. "
370
+ "Optional fields can be omitted. "
371
+ "Example: ['answer', 'confidence']"
372
+ ),
373
+ )
374
+
375
+ json_schema_extra: AgentSchemaMetadata | dict[str, Any] = Field(
376
+ default_factory=dict,
377
+ description=(
378
+ "REM-specific metadata extending JSON Schema. "
379
+ "Contains agent identification, versioning, and MCP configuration. "
380
+ "This field is not visible to the LLM - it's used by the REM system "
381
+ "for agent creation, tool loading, and resource access control."
382
+ ),
383
+ )
384
+
385
+ # Additional JSON Schema fields (optional)
386
+ title: str | None = Field(
387
+ default=None,
388
+ description="Schema title. If not provided, derived from name.",
389
+ )
390
+
391
+ definitions: dict[str, Any] | None = Field(
392
+ default=None,
393
+ description=(
394
+ "Reusable schema definitions for complex nested types. "
395
+ "Use JSON Schema $ref to reference definitions. "
396
+ "Example: {'EntityKey': {'type': 'string', 'pattern': '^[a-z0-9-]+$'}}"
397
+ ),
398
+ )
399
+
400
+ additionalProperties: bool = Field(
401
+ default=False,
402
+ description=(
403
+ "Whether to allow additional properties not defined in schema. "
404
+ "Default: False (strict validation). Set to True for flexible schemas."
405
+ ),
406
+ )
407
+
408
+ model_config = {"extra": "allow"} # Support full JSON Schema extensions
409
+
410
+
411
+ # Convenience type aliases for common use cases
412
+ AgentSchemaDict = dict[str, Any] # Raw JSON Schema dict
413
+ AgentSchemaJSON = str # JSON-serialized schema
414
+
415
+
416
+ def validate_agent_schema(schema: dict[str, Any]) -> AgentSchema:
417
+ """
418
+ Validate agent schema structure.
419
+
420
+ Args:
421
+ schema: Raw agent schema dict
422
+
423
+ Returns:
424
+ Validated AgentSchema instance
425
+
426
+ Raises:
427
+ ValidationError: If schema is invalid
428
+
429
+ Example:
430
+ >>> schema = load_schema("agents/query_agent.json")
431
+ >>> validated = validate_agent_schema(schema)
432
+ >>> print(validated.json_schema_extra["name"])
433
+ "query-agent"
434
+ """
435
+ return AgentSchema.model_validate(schema)
436
+
437
+
438
+ def create_agent_schema(
439
+ description: str,
440
+ properties: dict[str, Any],
441
+ required: list[str],
442
+ name: str,
443
+ kind: str | None = None,
444
+ tools: list[dict[str, Any]] | None = None,
445
+ resources: list[dict[str, Any]] | None = None,
446
+ version: str = "1.0.0",
447
+ override_temperature: float | None = None,
448
+ override_max_iterations: int | None = None,
449
+ **kwargs,
450
+ ) -> AgentSchema:
451
+ """
452
+ Create agent schema programmatically.
453
+
454
+ Args:
455
+ description: System prompt
456
+ properties: Output schema properties
457
+ required: Required field names
458
+ name: Schema name in kebab-case (e.g., 'query-agent')
459
+ kind: Schema kind ('agent' or 'evaluator'), optional
460
+ tools: MCP tool references
461
+ resources: MCP resource patterns
462
+ version: Schema version
463
+ override_temperature: Override default LLM temperature for this agent.
464
+ override_max_iterations: Override maximum iterations for this agent.
465
+ **kwargs: Additional JSON Schema fields
466
+
467
+ Returns:
468
+ AgentSchema instance
469
+
470
+ Example:
471
+ >>> schema = create_agent_schema(
472
+ ... description="You are a helpful assistant...",
473
+ ... properties={
474
+ ... "answer": {"type": "string", "description": "Response"},
475
+ ... "confidence": {"type": "number", "minimum": 0, "maximum": 1}
476
+ ... },
477
+ ... required=["answer"],
478
+ ... kind="agent",
479
+ ... name="assistant",
480
+ ... tools=[{"name": "search", "mcp_server": "rem"}],
481
+ ... version="1.0.0"
482
+ ... )
483
+ >>> schema.json_schema_extra["tools"][0]["name"]
484
+ "search"
485
+ """
486
+ metadata = AgentSchemaMetadata(
487
+ kind=kind,
488
+ name=name,
489
+ tools=[MCPToolReference.model_validate(t) for t in (tools or [])],
490
+ resources=[MCPResourceReference.model_validate(r) for r in (resources or [])],
491
+ version=version,
492
+ override_temperature=override_temperature,
493
+ override_max_iterations=override_max_iterations,
494
+ )
495
+
496
+ return AgentSchema(
497
+ description=description,
498
+ properties=properties,
499
+ required=required,
500
+ json_schema_extra=metadata.model_dump(),
501
+ **kwargs,
502
+ )
503
+
504
+
505
+ # =============================================================================
506
+ # YAML and Database Serialization
507
+ # =============================================================================
508
+
509
+
510
+ def schema_to_dict(schema: AgentSchema, exclude_none: bool = True) -> dict[str, Any]:
511
+ """
512
+ Serialize AgentSchema to a dictionary suitable for YAML or database storage.
513
+
514
+ This produces the canonical format used in:
515
+ - YAML files (schemas/agents/*.yaml)
516
+ - Database spec column (schemas table)
517
+ - API responses
518
+
519
+ Args:
520
+ schema: AgentSchema instance to serialize
521
+ exclude_none: If True, omit None values from output
522
+
523
+ Returns:
524
+ Dictionary representation of the schema
525
+
526
+ Example:
527
+ >>> schema = AgentSchema(
528
+ ... description="System prompt...",
529
+ ... properties={"answer": {"type": "string"}},
530
+ ... json_schema_extra={"name": "my-agent", "structured_output": False}
531
+ ... )
532
+ >>> d = schema_to_dict(schema)
533
+ >>> d["json_schema_extra"]["name"]
534
+ "my-agent"
535
+ """
536
+ return schema.model_dump(exclude_none=exclude_none)
537
+
538
+
539
+ def schema_from_dict(data: dict[str, Any]) -> AgentSchema:
540
+ """
541
+ Deserialize a dictionary to AgentSchema.
542
+
543
+ This handles:
544
+ - YAML files loaded with yaml.safe_load()
545
+ - Database spec column (JSON)
546
+ - API request bodies
547
+
548
+ Args:
549
+ data: Dictionary containing schema data
550
+
551
+ Returns:
552
+ Validated AgentSchema instance
553
+
554
+ Raises:
555
+ ValidationError: If data doesn't match schema structure
556
+
557
+ Example:
558
+ >>> data = {"type": "object", "description": "...", "properties": {}, "json_schema_extra": {"name": "test"}}
559
+ >>> schema = schema_from_dict(data)
560
+ >>> schema.json_schema_extra["name"]
561
+ "test"
562
+ """
563
+ return AgentSchema.model_validate(data)
564
+
565
+
566
+ def schema_to_yaml(schema: AgentSchema) -> str:
567
+ """
568
+ Serialize AgentSchema to YAML string.
569
+
570
+ The output format matches the canonical schema file format:
571
+ ```yaml
572
+ type: object
573
+ description: |
574
+ System prompt here...
575
+ properties:
576
+ answer:
577
+ type: string
578
+ json_schema_extra:
579
+ name: my-agent
580
+ system_prompt: |
581
+ Extended prompt here...
582
+ ```
583
+
584
+ Args:
585
+ schema: AgentSchema instance to serialize
586
+
587
+ Returns:
588
+ YAML string representation
589
+
590
+ Example:
591
+ >>> schema = create_agent_schema(
592
+ ... description="You are a test agent",
593
+ ... properties={"answer": {"type": "string"}},
594
+ ... required=["answer"],
595
+ ... name="test-agent"
596
+ ... )
597
+ >>> yaml_str = schema_to_yaml(schema)
598
+ >>> "test-agent" in yaml_str
599
+ True
600
+ """
601
+ import yaml
602
+
603
+ return yaml.dump(
604
+ schema_to_dict(schema),
605
+ default_flow_style=False,
606
+ allow_unicode=True,
607
+ sort_keys=False,
608
+ )
609
+
610
+
611
+ def schema_from_yaml(yaml_content: str) -> AgentSchema:
612
+ """
613
+ Deserialize YAML string to AgentSchema.
614
+
615
+ Args:
616
+ yaml_content: YAML string containing schema definition
617
+
618
+ Returns:
619
+ Validated AgentSchema instance
620
+
621
+ Raises:
622
+ yaml.YAMLError: If YAML parsing fails
623
+ ValidationError: If schema structure is invalid
624
+
625
+ Example:
626
+ >>> yaml_str = '''
627
+ ... type: object
628
+ ... description: Test agent
629
+ ... properties:
630
+ ... answer:
631
+ ... type: string
632
+ ... json_schema_extra:
633
+ ... name: test
634
+ ... '''
635
+ >>> schema = schema_from_yaml(yaml_str)
636
+ >>> schema.json_schema_extra["name"]
637
+ "test"
638
+ """
639
+ import yaml
640
+
641
+ data = yaml.safe_load(yaml_content)
642
+ return schema_from_dict(data)
643
+
644
+
645
+ def schema_from_yaml_file(file_path: str) -> AgentSchema:
646
+ """
647
+ Load AgentSchema from a YAML file.
648
+
649
+ Args:
650
+ file_path: Path to YAML file
651
+
652
+ Returns:
653
+ Validated AgentSchema instance
654
+
655
+ Raises:
656
+ FileNotFoundError: If file doesn't exist
657
+ yaml.YAMLError: If YAML parsing fails
658
+ ValidationError: If schema structure is invalid
659
+
660
+ Example:
661
+ >>> schema = schema_from_yaml_file("schemas/agents/rem.yaml")
662
+ >>> schema.json_schema_extra["name"]
663
+ "rem"
664
+ """
665
+ with open(file_path, "r") as f:
666
+ return schema_from_yaml(f.read())
667
+
668
+
669
+ def get_system_prompt(schema: AgentSchema | dict[str, Any]) -> str:
670
+ """
671
+ Extract the complete system prompt from a schema.
672
+
673
+ Combines:
674
+ 1. schema.description (base system prompt / public description)
675
+ 2. json_schema_extra.system_prompt (extended instructions if present)
676
+
677
+ Args:
678
+ schema: AgentSchema instance or raw dict
679
+
680
+ Returns:
681
+ Complete system prompt string
682
+
683
+ Example:
684
+ >>> schema = AgentSchema(
685
+ ... description="Base description",
686
+ ... properties={},
687
+ ... json_schema_extra={"name": "test", "system_prompt": "Extended instructions"}
688
+ ... )
689
+ >>> prompt = get_system_prompt(schema)
690
+ >>> "Base description" in prompt and "Extended instructions" in prompt
691
+ True
692
+ """
693
+ if isinstance(schema, dict):
694
+ base = schema.get("description", "")
695
+ extra = schema.get("json_schema_extra", {})
696
+ custom = extra.get("system_prompt") if isinstance(extra, dict) else None
697
+ else:
698
+ base = schema.description
699
+ extra = schema.json_schema_extra
700
+ if isinstance(extra, dict):
701
+ custom = extra.get("system_prompt")
702
+ elif isinstance(extra, AgentSchemaMetadata):
703
+ custom = extra.system_prompt
704
+ else:
705
+ custom = None
706
+
707
+ if custom:
708
+ return f"{base}\n\n{custom}" if base else custom
709
+ return base
710
+
711
+
712
+ def get_metadata(schema: AgentSchema | dict[str, Any]) -> AgentSchemaMetadata:
713
+ """
714
+ Extract and validate metadata from a schema.
715
+
716
+ Args:
717
+ schema: AgentSchema instance or raw dict
718
+
719
+ Returns:
720
+ Validated AgentSchemaMetadata instance
721
+
722
+ Example:
723
+ >>> schema = {"json_schema_extra": {"name": "test", "system_prompt": "hello"}}
724
+ >>> meta = get_metadata(schema)
725
+ >>> meta.name
726
+ "test"
727
+ >>> meta.system_prompt
728
+ "hello"
729
+ """
730
+ if isinstance(schema, dict):
731
+ extra = schema.get("json_schema_extra", {})
732
+ else:
733
+ extra = schema.json_schema_extra
734
+
735
+ if isinstance(extra, AgentSchemaMetadata):
736
+ return extra
737
+ return AgentSchemaMetadata.model_validate(extra)