remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,661 @@
1
+ """
2
+ Schema generation utility from Pydantic models.
3
+
4
+ Generates complete database schemas from:
5
+ 1. REM's core models (Resource, Moment, User, etc.)
6
+ 2. Models registered via rem.register_model() or rem.register_models()
7
+ 3. Models discovered from a directory scan
8
+
9
+ Output includes:
10
+ - Primary tables
11
+ - Embeddings tables
12
+ - KV_STORE triggers
13
+ - Indexes (foreground and background)
14
+ - Migrations
15
+ - Schema table entries (for agent-like table access)
16
+
17
+ Usage:
18
+ from rem.services.postgres.schema_generator import SchemaGenerator
19
+
20
+ # Generate from registry (includes core + registered models)
21
+ generator = SchemaGenerator()
22
+ schema = await generator.generate_from_registry()
23
+
24
+ # Or generate from directory (legacy)
25
+ schema = await generator.generate_from_directory("src/rem/models/entities")
26
+
27
+ # Write to file
28
+ with open("src/rem/sql/schema.sql", "w") as f:
29
+ f.write(schema)
30
+ """
31
+
32
+ import importlib.util
33
+ import inspect
34
+ import json
35
+ import uuid
36
+ from pathlib import Path
37
+ from typing import Any, Type
38
+
39
+ from loguru import logger
40
+ from pydantic import BaseModel
41
+
42
+ from ...settings import settings
43
+ from ...utils.sql_paths import get_package_sql_dir
44
+ from .register_type import register_type, should_embed_field
45
+
46
+ # Namespace UUID for generating deterministic UUIDs from model names
47
+ # Using UUID5 with this namespace ensures same model always gets same UUID
48
+ REM_SCHEMA_NAMESPACE = uuid.UUID("6ba7b810-9dad-11d1-80b4-00c04fd430c8") # DNS namespace
49
+
50
+
51
+ def generate_model_uuid(fully_qualified_name: str) -> uuid.UUID:
52
+ """
53
+ Generate deterministic UUID from fully qualified model name.
54
+
55
+ Uses UUID5 (SHA-1 hash) with REM namespace for reproducibility.
56
+ Same fully qualified name always produces same UUID.
57
+
58
+ Args:
59
+ fully_qualified_name: Full module path, e.g., "rem.models.entities.Resource"
60
+
61
+ Returns:
62
+ Deterministic UUID for this model
63
+ """
64
+ return uuid.uuid5(REM_SCHEMA_NAMESPACE, fully_qualified_name)
65
+
66
+
67
+ def extract_model_schema_metadata(
68
+ model: Type[BaseModel],
69
+ table_name: str,
70
+ entity_key_field: str,
71
+ include_search_tool: bool = True,
72
+ ) -> dict[str, Any]:
73
+ """
74
+ Extract schema metadata from a Pydantic model for schemas table.
75
+
76
+ Args:
77
+ model: Pydantic model class
78
+ table_name: Database table name
79
+ entity_key_field: Field used as entity key in kv_store
80
+ include_search_tool: If True, add search_rem tool for querying this table
81
+
82
+ Returns:
83
+ Dict with schema metadata ready for schemas table insert
84
+ """
85
+ # Get fully qualified name
86
+ fqn = f"{model.__module__}.{model.__name__}"
87
+
88
+ # Generate deterministic UUID
89
+ schema_id = generate_model_uuid(fqn)
90
+
91
+ # Get JSON schema from Pydantic
92
+ json_schema = model.model_json_schema()
93
+
94
+ # Find embedding fields
95
+ embedding_fields = []
96
+ for field_name, field_info in model.model_fields.items():
97
+ if should_embed_field(field_name, field_info):
98
+ embedding_fields.append(field_name)
99
+
100
+ # Build description with search capability note
101
+ base_description = model.__doc__ or f"Schema for {model.__name__}"
102
+ search_note = (
103
+ f"\n\nThis agent can search the `{table_name}` table using the `search_rem` tool. "
104
+ f"Use REM query syntax: LOOKUP for exact match, FUZZY for typo-tolerant search, "
105
+ f"SEARCH for semantic similarity, or SQL for complex queries."
106
+ ) if include_search_tool else ""
107
+
108
+ # Build spec with table metadata and tools
109
+ # Note: default_search_table is used by create_agent to append a description
110
+ # suffix to the search_rem tool when loading it dynamically
111
+ has_embeddings = bool(embedding_fields)
112
+
113
+ spec = {
114
+ "type": "object",
115
+ "description": base_description + search_note,
116
+ "properties": json_schema.get("properties", {}),
117
+ "required": json_schema.get("required", []),
118
+ "json_schema_extra": {
119
+ "table_name": table_name,
120
+ "entity_key_field": entity_key_field,
121
+ "embedding_fields": embedding_fields,
122
+ "fully_qualified_name": fqn,
123
+ "tools": ["search_rem"] if include_search_tool else [],
124
+ "default_search_table": table_name,
125
+ "has_embeddings": has_embeddings,
126
+ },
127
+ }
128
+
129
+ # Build content (documentation)
130
+ content = f"""# {model.__name__}
131
+
132
+ {base_description}
133
+
134
+ ## Overview
135
+
136
+ The `{model.__name__}` entity is stored in the `{table_name}` table. Each record is uniquely
137
+ identified by its `{entity_key_field}` field for lookups and graph traversal.
138
+
139
+ ## Search Capabilities
140
+
141
+ This schema includes the `search_rem` tool which supports:
142
+ - **LOOKUP**: O(1) exact match by {entity_key_field} (e.g., `LOOKUP "entity-name"`)
143
+ - **FUZZY**: Typo-tolerant search (e.g., `FUZZY "partial" THRESHOLD 0.3`)
144
+ - **SEARCH**: Semantic vector search on {', '.join(embedding_fields) if embedding_fields else 'content'} (e.g., `SEARCH "concept" FROM {table_name} LIMIT 10`)
145
+ - **SQL**: Complex queries (e.g., `SELECT * FROM {table_name} WHERE ...`)
146
+
147
+ ## Table Info
148
+
149
+ | Property | Value |
150
+ |----------|-------|
151
+ | Table | `{table_name}` |
152
+ | Entity Key | `{entity_key_field}` |
153
+ | Embedding Fields | {', '.join(f'`{f}`' for f in embedding_fields) if embedding_fields else 'None'} |
154
+ | Tools | {', '.join(['`search_rem`'] if include_search_tool else ['None'])} |
155
+
156
+ ## Fields
157
+
158
+ """
159
+ for field_name, field_info in model.model_fields.items():
160
+ field_type = str(field_info.annotation) if field_info.annotation else "Any"
161
+ field_desc = field_info.description or ""
162
+ required = "Required" if field_info.is_required() else "Optional"
163
+ content += f"### `{field_name}`\n"
164
+ content += f"- **Type**: `{field_type}`\n"
165
+ content += f"- **{required}**\n"
166
+ if field_desc:
167
+ content += f"- {field_desc}\n"
168
+ content += "\n"
169
+
170
+ return {
171
+ "id": str(schema_id),
172
+ "name": model.__name__,
173
+ "table_name": table_name,
174
+ "entity_key_field": entity_key_field,
175
+ "embedding_fields": embedding_fields,
176
+ "fqn": fqn,
177
+ "spec": spec,
178
+ "content": content,
179
+ "category": "entity",
180
+ }
181
+
182
+
183
+ def generate_schema_upsert_sql(schema_metadata: dict[str, Any]) -> str:
184
+ """
185
+ Generate SQL UPSERT statement for schemas table.
186
+
187
+ Uses ON CONFLICT DO UPDATE for idempotency.
188
+
189
+ Args:
190
+ schema_metadata: Dict from extract_model_schema_metadata()
191
+
192
+ Returns:
193
+ SQL INSERT ... ON CONFLICT statement
194
+ """
195
+ # Escape single quotes in content and spec
196
+ content_escaped = schema_metadata["content"].replace("'", "''")
197
+ spec_json = json.dumps(schema_metadata["spec"]).replace("'", "''")
198
+
199
+ sql = f"""
200
+ -- Schema entry for {schema_metadata['name']} ({schema_metadata['table_name']})
201
+ INSERT INTO schemas (id, tenant_id, name, content, spec, category, metadata)
202
+ VALUES (
203
+ '{schema_metadata['id']}'::uuid,
204
+ 'system',
205
+ '{schema_metadata['name']}',
206
+ '{content_escaped}',
207
+ '{spec_json}'::jsonb,
208
+ 'entity',
209
+ '{{"table_name": "{schema_metadata['table_name']}", "entity_key_field": "{schema_metadata['entity_key_field']}", "embedding_fields": {json.dumps(schema_metadata['embedding_fields'])}, "fqn": "{schema_metadata['fqn']}"}}'::jsonb
210
+ )
211
+ ON CONFLICT (id) DO UPDATE SET
212
+ name = EXCLUDED.name,
213
+ content = EXCLUDED.content,
214
+ spec = EXCLUDED.spec,
215
+ category = EXCLUDED.category,
216
+ metadata = EXCLUDED.metadata,
217
+ updated_at = CURRENT_TIMESTAMP;
218
+ """
219
+ return sql.strip()
220
+
221
+
222
+ class SchemaGenerator:
223
+ """
224
+ Generate database schema from Pydantic models in a directory.
225
+
226
+ Discovers all Pydantic models in Python files and generates:
227
+ - CREATE TABLE statements
228
+ - Embeddings tables
229
+ - KV_STORE triggers
230
+ - Indexes
231
+ """
232
+
233
+ def __init__(self, output_dir: Path | None = None):
234
+ """
235
+ Initialize schema generator.
236
+
237
+ Args:
238
+ output_dir: Optional directory for output files (defaults to package sql dir)
239
+ """
240
+ self.output_dir = output_dir or get_package_sql_dir()
241
+ self.schemas: dict[str, dict] = {}
242
+
243
+ def discover_models(self, directory: str | Path) -> dict[str, Type[BaseModel]]:
244
+ """
245
+ Discover all Pydantic models in a directory.
246
+
247
+ Args:
248
+ directory: Path to directory containing Python files with models
249
+
250
+ Returns:
251
+ Dict mapping model name to model class
252
+ """
253
+ import sys
254
+ import importlib
255
+
256
+ directory = Path(directory).resolve()
257
+ models = {}
258
+
259
+ logger.info(f"Discovering models in {directory}")
260
+
261
+ # Add src directory to Python path to handle relative imports
262
+ src_dir = directory
263
+ while src_dir.name != "src" and src_dir.parent != src_dir:
264
+ src_dir = src_dir.parent
265
+
266
+ if src_dir.name == "src" and str(src_dir) not in sys.path:
267
+ sys.path.insert(0, str(src_dir))
268
+ logger.debug(f"Added {src_dir} to sys.path for relative imports")
269
+
270
+ # Convert directory path to module path
271
+ # e.g., /path/to/src/rem/models/entities -> rem.models.entities
272
+ try:
273
+ rel_path = directory.relative_to(src_dir)
274
+ module_path = str(rel_path).replace("/", ".")
275
+
276
+ # Import the package to get all submodules
277
+ package = importlib.import_module(module_path)
278
+
279
+ # Find all Python files in the directory
280
+ for py_file in directory.rglob("*.py"):
281
+ if py_file.name.startswith("_"):
282
+ continue
283
+
284
+ try:
285
+ # Build module name from file path
286
+ rel_file = py_file.relative_to(src_dir)
287
+ module_name = str(rel_file.with_suffix("")).replace("/", ".")
288
+
289
+ # Import the module
290
+ module = importlib.import_module(module_name)
291
+
292
+ # Find Pydantic models
293
+ for name, obj in inspect.getmembers(module):
294
+ if (
295
+ inspect.isclass(obj)
296
+ and issubclass(obj, BaseModel)
297
+ and obj is not BaseModel
298
+ and not name.startswith("_")
299
+ # Only include models defined in this module
300
+ and obj.__module__ == module_name
301
+ ):
302
+ models[name] = obj
303
+ logger.debug(f"Found model: {name} in {module_name}")
304
+
305
+ except Exception as e:
306
+ logger.warning(f"Failed to load {py_file}: {e}")
307
+
308
+ except Exception as e:
309
+ logger.error(f"Failed to discover models in {directory}: {e}")
310
+
311
+ logger.info(f"Discovered {len(models)} models")
312
+ return models
313
+
314
+ def infer_table_name(self, model: Type[BaseModel]) -> str:
315
+ """
316
+ Infer table name from model class name.
317
+
318
+ Converts CamelCase to snake_case and pluralizes.
319
+
320
+ Examples:
321
+ Resource -> resources
322
+ UserProfile -> user_profiles
323
+ Message -> messages
324
+
325
+ Args:
326
+ model: Pydantic model class
327
+
328
+ Returns:
329
+ Table name
330
+ """
331
+ import re
332
+
333
+ name = model.__name__
334
+
335
+ # Convert CamelCase to snake_case
336
+ name = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name)
337
+ name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
338
+
339
+ # Simple pluralization (add 's' if doesn't end in 's')
340
+ if not name.endswith("s"):
341
+ if name.endswith("y"):
342
+ name = name[:-1] + "ies" # category -> categories
343
+ else:
344
+ name = name + "s" # resource -> resources
345
+
346
+ return name
347
+
348
+ def infer_entity_key_field(self, model: Type[BaseModel]) -> str:
349
+ """
350
+ Infer which field to use as entity_key in KV_STORE.
351
+
352
+ Priority:
353
+ 1. Field with json_schema_extra={\"entity_key\": True}
354
+ 2. Field named \"name\" (human-readable identifier)
355
+ 3. Field named \"key\"
356
+ 4. Field named \"uri\"
357
+ 5. Field named \"id\" (fallback)
358
+
359
+ Args:
360
+ model: Pydantic model class
361
+
362
+ Returns:
363
+ Field name to use as entity_key
364
+ """
365
+ # Check for explicit entity_key marker
366
+ for field_name, field_info in model.model_fields.items():
367
+ json_extra = getattr(field_info, "json_schema_extra", None)
368
+ if json_extra and isinstance(json_extra, dict):
369
+ if json_extra.get("entity_key"):
370
+ return field_name
371
+
372
+ # Check for key fields in priority order: name -> key -> uri -> id
373
+ # (matching sql_builder.get_entity_key convention)
374
+ for candidate in ["name", "key", "uri", "id"]:
375
+ if candidate in model.model_fields:
376
+ return candidate
377
+
378
+ # Should never reach here for CoreModel subclasses (they all have id)
379
+ logger.error(f"No suitable entity_key field found for {model.__name__}, using 'id'")
380
+ return "id"
381
+
382
+ async def generate_schema_for_model(
383
+ self,
384
+ model: Type[BaseModel],
385
+ table_name: str | None = None,
386
+ entity_key_field: str | None = None,
387
+ ) -> dict:
388
+ """
389
+ Generate schema for a single model.
390
+
391
+ Args:
392
+ model: Pydantic model class
393
+ table_name: Optional table name (inferred if not provided)
394
+ entity_key_field: Optional entity key field (inferred if not provided)
395
+
396
+ Returns:
397
+ Dict with SQL statements and metadata
398
+ """
399
+ if table_name is None:
400
+ table_name = self.infer_table_name(model)
401
+
402
+ if entity_key_field is None:
403
+ entity_key_field = self.infer_entity_key_field(model)
404
+
405
+ logger.info(f"Generating schema for {model.__name__} -> {table_name}")
406
+
407
+ schema = await register_type(
408
+ model=model,
409
+ table_name=table_name,
410
+ entity_key_field=entity_key_field,
411
+ tenant_scoped=True,
412
+ create_embeddings=True,
413
+ create_kv_trigger=True,
414
+ )
415
+
416
+ # Extract schema metadata for schemas table entry
417
+ schema_metadata = extract_model_schema_metadata(
418
+ model=model,
419
+ table_name=table_name,
420
+ entity_key_field=entity_key_field,
421
+ )
422
+ schema["schema_metadata"] = schema_metadata
423
+
424
+ self.schemas[table_name] = schema
425
+ return schema
426
+
427
+ async def generate_from_registry(
428
+ self, output_file: str | None = None, include_core: bool = True
429
+ ) -> str:
430
+ """
431
+ Generate complete schema from the model registry.
432
+
433
+ Includes:
434
+ 1. REM's core models (if include_core=True)
435
+ 2. Models registered via rem.register_model() or rem.register_models()
436
+
437
+ Args:
438
+ output_file: Optional output file path (relative to output_dir)
439
+ include_core: If True, include REM's core models (default: True)
440
+
441
+ Returns:
442
+ Complete SQL schema as string
443
+
444
+ Example:
445
+ import rem
446
+ from rem.models.core import CoreModel
447
+
448
+ # Register custom model
449
+ @rem.register_model
450
+ class CustomEntity(CoreModel):
451
+ name: str
452
+
453
+ # Generate schema (includes core + custom)
454
+ generator = SchemaGenerator()
455
+ schema = await generator.generate_from_registry()
456
+ """
457
+ from ...registry import get_model_registry
458
+
459
+ registry = get_model_registry()
460
+ models = registry.get_models(include_core=include_core)
461
+
462
+ logger.info(f"Generating schema from registry: {len(models)} models")
463
+
464
+ # Generate schemas for each model
465
+ for model_name, ext in models.items():
466
+ await self.generate_schema_for_model(
467
+ ext.model,
468
+ table_name=ext.table_name,
469
+ entity_key_field=ext.entity_key_field,
470
+ )
471
+
472
+ return self._generate_sql_output(
473
+ source="model registry",
474
+ output_file=output_file,
475
+ )
476
+
477
+ async def generate_from_directory(
478
+ self, directory: str | Path, output_file: str | None = None
479
+ ) -> str:
480
+ """
481
+ Generate complete schema from all models in a directory.
482
+
483
+ Note: For most use cases, prefer generate_from_registry() which uses
484
+ the model registry pattern.
485
+
486
+ Args:
487
+ directory: Path to directory with Pydantic models
488
+ output_file: Optional output file path (relative to output_dir)
489
+
490
+ Returns:
491
+ Complete SQL schema as string
492
+ """
493
+ # Discover models
494
+ models = self.discover_models(directory)
495
+
496
+ # Generate schemas for each model
497
+ for model_name, model in models.items():
498
+ await self.generate_schema_for_model(model)
499
+
500
+ return self._generate_sql_output(
501
+ source=f"directory: {directory}",
502
+ output_file=output_file,
503
+ )
504
+
505
+ def _generate_sql_output(
506
+ self, source: str, output_file: str | None = None
507
+ ) -> str:
508
+ """
509
+ Generate SQL output from accumulated schemas.
510
+
511
+ Args:
512
+ source: Description of schema source (for header comment)
513
+ output_file: Optional output file path (relative to output_dir)
514
+
515
+ Returns:
516
+ Complete SQL schema as string
517
+ """
518
+ import datetime
519
+
520
+ sql_parts = [
521
+ "-- REM Model Schema (install_models.sql)",
522
+ "-- Generated from Pydantic models",
523
+ f"-- Source: {source}",
524
+ f"-- Generated at: {datetime.datetime.now().isoformat()}",
525
+ "--",
526
+ "-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate",
527
+ "--",
528
+ "-- This script creates:",
529
+ "-- 1. Primary entity tables",
530
+ "-- 2. Embeddings tables (embeddings_<table>)",
531
+ "-- 3. KV_STORE triggers for cache maintenance",
532
+ "-- 4. Indexes (foreground only, background indexes separate)",
533
+ "-- 5. Schema table entries (for agent-like table access)",
534
+ "",
535
+ "-- ============================================================================",
536
+ "-- PREREQUISITES CHECK",
537
+ "-- ============================================================================",
538
+ "",
539
+ "DO $$",
540
+ "BEGIN",
541
+ " -- Check that install.sql has been run",
542
+ " IF NOT EXISTS (SELECT 1 FROM pg_tables WHERE tablename = 'kv_store') THEN",
543
+ " RAISE EXCEPTION 'KV_STORE table not found. Run migrations/001_install.sql first.';",
544
+ " END IF;",
545
+ "",
546
+ " IF NOT EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector') THEN",
547
+ " RAISE EXCEPTION 'pgvector extension not found. Run migrations/001_install.sql first.';",
548
+ " END IF;",
549
+ "",
550
+ " RAISE NOTICE 'Prerequisites check passed';",
551
+ "END $$;",
552
+ "",
553
+ ]
554
+
555
+ # Add each table schema
556
+ for table_name, schema in self.schemas.items():
557
+ sql_parts.append("-- " + "=" * 70)
558
+ sql_parts.append(f"-- {table_name.upper()} (Model: {schema['model']})")
559
+ sql_parts.append("-- " + "=" * 70)
560
+ sql_parts.append("")
561
+
562
+ # Primary table
563
+ if "table" in schema["sql"]:
564
+ sql_parts.append(schema["sql"]["table"])
565
+ sql_parts.append("")
566
+
567
+ # Embeddings table
568
+ if "embeddings" in schema["sql"] and schema["sql"]["embeddings"]:
569
+ sql_parts.append(f"-- Embeddings for {table_name}")
570
+ sql_parts.append(schema["sql"]["embeddings"])
571
+ sql_parts.append("")
572
+
573
+ # KV_STORE trigger
574
+ if "kv_trigger" in schema["sql"]:
575
+ sql_parts.append(f"-- KV_STORE trigger for {table_name}")
576
+ sql_parts.append(schema["sql"]["kv_trigger"])
577
+ sql_parts.append("")
578
+
579
+ # Add schema table entries (every entity table is also an "agent")
580
+ sql_parts.append("-- ============================================================================")
581
+ sql_parts.append("-- SCHEMA TABLE ENTRIES")
582
+ sql_parts.append("-- Every entity table gets a schemas entry for agent-like access")
583
+ sql_parts.append("-- ============================================================================")
584
+ sql_parts.append("")
585
+
586
+ for table_name, schema in self.schemas.items():
587
+ if "schema_metadata" in schema:
588
+ schema_upsert = generate_schema_upsert_sql(schema["schema_metadata"])
589
+ sql_parts.append(schema_upsert)
590
+ sql_parts.append("")
591
+
592
+ # Add migration record
593
+ sql_parts.append("-- ============================================================================")
594
+ sql_parts.append("-- RECORD MIGRATION")
595
+ sql_parts.append("-- ============================================================================")
596
+ sql_parts.append("")
597
+ sql_parts.append("INSERT INTO rem_migrations (name, type, version)")
598
+ sql_parts.append("VALUES ('install_models.sql', 'models', '1.0.0')")
599
+ sql_parts.append("ON CONFLICT (name) DO UPDATE")
600
+ sql_parts.append("SET applied_at = CURRENT_TIMESTAMP,")
601
+ sql_parts.append(" applied_by = CURRENT_USER;")
602
+ sql_parts.append("")
603
+
604
+ # Completion message
605
+ sql_parts.append("DO $$")
606
+ sql_parts.append("BEGIN")
607
+ sql_parts.append(" RAISE NOTICE '============================================================';")
608
+ sql_parts.append(f" RAISE NOTICE 'REM Model Schema Applied: {len(self.schemas)} tables';")
609
+ sql_parts.append(" RAISE NOTICE '============================================================';")
610
+ for table_name in sorted(self.schemas.keys()):
611
+ embeddable = len(self.schemas[table_name].get("embeddable_fields", []))
612
+ embed_info = f" ({embeddable} embeddable fields)" if embeddable else ""
613
+ sql_parts.append(f" RAISE NOTICE ' ✓ {table_name}{embed_info}';")
614
+ sql_parts.append(" RAISE NOTICE '';")
615
+ sql_parts.append(" RAISE NOTICE 'Next: Run background indexes if needed';")
616
+ sql_parts.append(" RAISE NOTICE ' rem db migrate --background-indexes';")
617
+ sql_parts.append(" RAISE NOTICE '============================================================';")
618
+ sql_parts.append("END $$;")
619
+
620
+ complete_sql = "\n".join(sql_parts)
621
+
622
+ # Write to file if specified
623
+ if output_file:
624
+ output_path = self.output_dir / output_file
625
+ output_path.parent.mkdir(parents=True, exist_ok=True)
626
+ output_path.write_text(complete_sql)
627
+ logger.info(f"Schema written to {output_path}")
628
+
629
+ return complete_sql
630
+
631
+ def generate_background_indexes(self) -> str:
632
+ """
633
+ Generate SQL for background index creation.
634
+
635
+ These indexes are created CONCURRENTLY to avoid blocking writes.
636
+ Should be run after initial data load.
637
+
638
+ Returns:
639
+ SQL for background index creation
640
+ """
641
+ sql_parts = [
642
+ "-- Background index creation",
643
+ "-- Run AFTER initial data load to avoid blocking writes",
644
+ "",
645
+ ]
646
+
647
+ for table_name, schema in self.schemas.items():
648
+ if not schema.get("embeddable_fields"):
649
+ continue
650
+
651
+ embeddings_table = f"embeddings_{table_name}"
652
+
653
+ sql_parts.append(f"-- HNSW vector index for {embeddings_table}")
654
+ sql_parts.append(
655
+ f"CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_{embeddings_table}_vector_hnsw"
656
+ )
657
+ sql_parts.append(f"ON {embeddings_table}")
658
+ sql_parts.append("USING hnsw (embedding vector_cosine_ops);")
659
+ sql_parts.append("")
660
+
661
+ return "\n".join(sql_parts)