remdb 0.3.242__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (235) hide show
  1. rem/__init__.py +129 -0
  2. rem/agentic/README.md +760 -0
  3. rem/agentic/__init__.py +54 -0
  4. rem/agentic/agents/README.md +155 -0
  5. rem/agentic/agents/__init__.py +38 -0
  6. rem/agentic/agents/agent_manager.py +311 -0
  7. rem/agentic/agents/sse_simulator.py +502 -0
  8. rem/agentic/context.py +425 -0
  9. rem/agentic/context_builder.py +360 -0
  10. rem/agentic/llm_provider_models.py +301 -0
  11. rem/agentic/mcp/__init__.py +0 -0
  12. rem/agentic/mcp/tool_wrapper.py +273 -0
  13. rem/agentic/otel/__init__.py +5 -0
  14. rem/agentic/otel/setup.py +240 -0
  15. rem/agentic/providers/phoenix.py +926 -0
  16. rem/agentic/providers/pydantic_ai.py +854 -0
  17. rem/agentic/query.py +117 -0
  18. rem/agentic/query_helper.py +89 -0
  19. rem/agentic/schema.py +737 -0
  20. rem/agentic/serialization.py +245 -0
  21. rem/agentic/tools/__init__.py +5 -0
  22. rem/agentic/tools/rem_tools.py +242 -0
  23. rem/api/README.md +657 -0
  24. rem/api/deps.py +253 -0
  25. rem/api/main.py +460 -0
  26. rem/api/mcp_router/prompts.py +182 -0
  27. rem/api/mcp_router/resources.py +820 -0
  28. rem/api/mcp_router/server.py +243 -0
  29. rem/api/mcp_router/tools.py +1605 -0
  30. rem/api/middleware/tracking.py +172 -0
  31. rem/api/routers/admin.py +520 -0
  32. rem/api/routers/auth.py +898 -0
  33. rem/api/routers/chat/__init__.py +5 -0
  34. rem/api/routers/chat/child_streaming.py +394 -0
  35. rem/api/routers/chat/completions.py +702 -0
  36. rem/api/routers/chat/json_utils.py +76 -0
  37. rem/api/routers/chat/models.py +202 -0
  38. rem/api/routers/chat/otel_utils.py +33 -0
  39. rem/api/routers/chat/sse_events.py +546 -0
  40. rem/api/routers/chat/streaming.py +950 -0
  41. rem/api/routers/chat/streaming_utils.py +327 -0
  42. rem/api/routers/common.py +18 -0
  43. rem/api/routers/dev.py +87 -0
  44. rem/api/routers/feedback.py +276 -0
  45. rem/api/routers/messages.py +620 -0
  46. rem/api/routers/models.py +86 -0
  47. rem/api/routers/query.py +362 -0
  48. rem/api/routers/shared_sessions.py +422 -0
  49. rem/auth/README.md +258 -0
  50. rem/auth/__init__.py +36 -0
  51. rem/auth/jwt.py +367 -0
  52. rem/auth/middleware.py +318 -0
  53. rem/auth/providers/__init__.py +16 -0
  54. rem/auth/providers/base.py +376 -0
  55. rem/auth/providers/email.py +215 -0
  56. rem/auth/providers/google.py +163 -0
  57. rem/auth/providers/microsoft.py +237 -0
  58. rem/cli/README.md +517 -0
  59. rem/cli/__init__.py +8 -0
  60. rem/cli/commands/README.md +299 -0
  61. rem/cli/commands/__init__.py +3 -0
  62. rem/cli/commands/ask.py +549 -0
  63. rem/cli/commands/cluster.py +1808 -0
  64. rem/cli/commands/configure.py +495 -0
  65. rem/cli/commands/db.py +828 -0
  66. rem/cli/commands/dreaming.py +324 -0
  67. rem/cli/commands/experiments.py +1698 -0
  68. rem/cli/commands/mcp.py +66 -0
  69. rem/cli/commands/process.py +388 -0
  70. rem/cli/commands/query.py +109 -0
  71. rem/cli/commands/scaffold.py +47 -0
  72. rem/cli/commands/schema.py +230 -0
  73. rem/cli/commands/serve.py +106 -0
  74. rem/cli/commands/session.py +453 -0
  75. rem/cli/dreaming.py +363 -0
  76. rem/cli/main.py +123 -0
  77. rem/config.py +244 -0
  78. rem/mcp_server.py +41 -0
  79. rem/models/core/__init__.py +49 -0
  80. rem/models/core/core_model.py +70 -0
  81. rem/models/core/engram.py +333 -0
  82. rem/models/core/experiment.py +672 -0
  83. rem/models/core/inline_edge.py +132 -0
  84. rem/models/core/rem_query.py +246 -0
  85. rem/models/entities/__init__.py +68 -0
  86. rem/models/entities/domain_resource.py +38 -0
  87. rem/models/entities/feedback.py +123 -0
  88. rem/models/entities/file.py +57 -0
  89. rem/models/entities/image_resource.py +88 -0
  90. rem/models/entities/message.py +64 -0
  91. rem/models/entities/moment.py +123 -0
  92. rem/models/entities/ontology.py +181 -0
  93. rem/models/entities/ontology_config.py +131 -0
  94. rem/models/entities/resource.py +95 -0
  95. rem/models/entities/schema.py +87 -0
  96. rem/models/entities/session.py +84 -0
  97. rem/models/entities/shared_session.py +180 -0
  98. rem/models/entities/subscriber.py +175 -0
  99. rem/models/entities/user.py +93 -0
  100. rem/py.typed +0 -0
  101. rem/registry.py +373 -0
  102. rem/schemas/README.md +507 -0
  103. rem/schemas/__init__.py +6 -0
  104. rem/schemas/agents/README.md +92 -0
  105. rem/schemas/agents/core/agent-builder.yaml +235 -0
  106. rem/schemas/agents/core/moment-builder.yaml +178 -0
  107. rem/schemas/agents/core/rem-query-agent.yaml +226 -0
  108. rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
  109. rem/schemas/agents/core/simple-assistant.yaml +19 -0
  110. rem/schemas/agents/core/user-profile-builder.yaml +163 -0
  111. rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
  112. rem/schemas/agents/examples/contract-extractor.yaml +134 -0
  113. rem/schemas/agents/examples/cv-parser.yaml +263 -0
  114. rem/schemas/agents/examples/hello-world.yaml +37 -0
  115. rem/schemas/agents/examples/query.yaml +54 -0
  116. rem/schemas/agents/examples/simple.yaml +21 -0
  117. rem/schemas/agents/examples/test.yaml +29 -0
  118. rem/schemas/agents/rem.yaml +132 -0
  119. rem/schemas/evaluators/hello-world/default.yaml +77 -0
  120. rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
  121. rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
  122. rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
  123. rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
  124. rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
  125. rem/services/__init__.py +18 -0
  126. rem/services/audio/INTEGRATION.md +308 -0
  127. rem/services/audio/README.md +376 -0
  128. rem/services/audio/__init__.py +15 -0
  129. rem/services/audio/chunker.py +354 -0
  130. rem/services/audio/transcriber.py +259 -0
  131. rem/services/content/README.md +1269 -0
  132. rem/services/content/__init__.py +5 -0
  133. rem/services/content/providers.py +760 -0
  134. rem/services/content/service.py +762 -0
  135. rem/services/dreaming/README.md +230 -0
  136. rem/services/dreaming/__init__.py +53 -0
  137. rem/services/dreaming/affinity_service.py +322 -0
  138. rem/services/dreaming/moment_service.py +251 -0
  139. rem/services/dreaming/ontology_service.py +54 -0
  140. rem/services/dreaming/user_model_service.py +297 -0
  141. rem/services/dreaming/utils.py +39 -0
  142. rem/services/email/__init__.py +10 -0
  143. rem/services/email/service.py +522 -0
  144. rem/services/email/templates.py +360 -0
  145. rem/services/embeddings/__init__.py +11 -0
  146. rem/services/embeddings/api.py +127 -0
  147. rem/services/embeddings/worker.py +435 -0
  148. rem/services/fs/README.md +662 -0
  149. rem/services/fs/__init__.py +62 -0
  150. rem/services/fs/examples.py +206 -0
  151. rem/services/fs/examples_paths.py +204 -0
  152. rem/services/fs/git_provider.py +935 -0
  153. rem/services/fs/local_provider.py +760 -0
  154. rem/services/fs/parsing-hooks-examples.md +172 -0
  155. rem/services/fs/paths.py +276 -0
  156. rem/services/fs/provider.py +460 -0
  157. rem/services/fs/s3_provider.py +1042 -0
  158. rem/services/fs/service.py +186 -0
  159. rem/services/git/README.md +1075 -0
  160. rem/services/git/__init__.py +17 -0
  161. rem/services/git/service.py +469 -0
  162. rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
  163. rem/services/phoenix/README.md +453 -0
  164. rem/services/phoenix/__init__.py +46 -0
  165. rem/services/phoenix/client.py +960 -0
  166. rem/services/phoenix/config.py +88 -0
  167. rem/services/phoenix/prompt_labels.py +477 -0
  168. rem/services/postgres/README.md +757 -0
  169. rem/services/postgres/__init__.py +49 -0
  170. rem/services/postgres/diff_service.py +599 -0
  171. rem/services/postgres/migration_service.py +427 -0
  172. rem/services/postgres/programmable_diff_service.py +635 -0
  173. rem/services/postgres/pydantic_to_sqlalchemy.py +562 -0
  174. rem/services/postgres/register_type.py +353 -0
  175. rem/services/postgres/repository.py +481 -0
  176. rem/services/postgres/schema_generator.py +661 -0
  177. rem/services/postgres/service.py +802 -0
  178. rem/services/postgres/sql_builder.py +355 -0
  179. rem/services/rate_limit.py +113 -0
  180. rem/services/rem/README.md +318 -0
  181. rem/services/rem/__init__.py +23 -0
  182. rem/services/rem/exceptions.py +71 -0
  183. rem/services/rem/executor.py +293 -0
  184. rem/services/rem/parser.py +180 -0
  185. rem/services/rem/queries.py +196 -0
  186. rem/services/rem/query.py +371 -0
  187. rem/services/rem/service.py +608 -0
  188. rem/services/session/README.md +374 -0
  189. rem/services/session/__init__.py +13 -0
  190. rem/services/session/compression.py +488 -0
  191. rem/services/session/pydantic_messages.py +310 -0
  192. rem/services/session/reload.py +85 -0
  193. rem/services/user_service.py +130 -0
  194. rem/settings.py +1877 -0
  195. rem/sql/background_indexes.sql +52 -0
  196. rem/sql/migrations/001_install.sql +983 -0
  197. rem/sql/migrations/002_install_models.sql +3157 -0
  198. rem/sql/migrations/003_optional_extensions.sql +326 -0
  199. rem/sql/migrations/004_cache_system.sql +282 -0
  200. rem/sql/migrations/005_schema_update.sql +145 -0
  201. rem/sql/migrations/migrate_session_id_to_uuid.sql +45 -0
  202. rem/utils/AGENTIC_CHUNKING.md +597 -0
  203. rem/utils/README.md +628 -0
  204. rem/utils/__init__.py +61 -0
  205. rem/utils/agentic_chunking.py +622 -0
  206. rem/utils/batch_ops.py +343 -0
  207. rem/utils/chunking.py +108 -0
  208. rem/utils/clip_embeddings.py +276 -0
  209. rem/utils/constants.py +97 -0
  210. rem/utils/date_utils.py +228 -0
  211. rem/utils/dict_utils.py +98 -0
  212. rem/utils/embeddings.py +436 -0
  213. rem/utils/examples/embeddings_example.py +305 -0
  214. rem/utils/examples/sql_types_example.py +202 -0
  215. rem/utils/files.py +323 -0
  216. rem/utils/markdown.py +16 -0
  217. rem/utils/mime_types.py +158 -0
  218. rem/utils/model_helpers.py +492 -0
  219. rem/utils/schema_loader.py +649 -0
  220. rem/utils/sql_paths.py +146 -0
  221. rem/utils/sql_types.py +350 -0
  222. rem/utils/user_id.py +81 -0
  223. rem/utils/vision.py +325 -0
  224. rem/workers/README.md +506 -0
  225. rem/workers/__init__.py +7 -0
  226. rem/workers/db_listener.py +579 -0
  227. rem/workers/db_maintainer.py +74 -0
  228. rem/workers/dreaming.py +502 -0
  229. rem/workers/engram_processor.py +312 -0
  230. rem/workers/sqs_file_processor.py +193 -0
  231. rem/workers/unlogged_maintainer.py +463 -0
  232. remdb-0.3.242.dist-info/METADATA +1632 -0
  233. remdb-0.3.242.dist-info/RECORD +235 -0
  234. remdb-0.3.242.dist-info/WHEEL +4 -0
  235. remdb-0.3.242.dist-info/entry_points.txt +2 -0
rem/cli/commands/db.py ADDED
@@ -0,0 +1,828 @@
1
+ """
2
+ Database management commands.
3
+
4
+ Usage:
5
+ rem db migrate # Apply both install.sql and install_models.sql
6
+ rem db migrate --install # Apply only install.sql
7
+ rem db migrate --models # Apply only install_models.sql
8
+ rem db migrate --background-indexes # Apply background indexes
9
+ rem db status # Show migration status
10
+ rem db rebuild-cache # Rebuild KV_STORE cache
11
+ """
12
+
13
+ import asyncio
14
+ import hashlib
15
+ import subprocess
16
+ import time
17
+ from pathlib import Path
18
+ from typing import Type
19
+
20
+ import click
21
+ from loguru import logger
22
+ from pydantic import BaseModel
23
+
24
+
25
+ def get_connection_string() -> str:
26
+ """
27
+ Get PostgreSQL connection string from environment or settings.
28
+
29
+ Returns:
30
+ Connection string for psql
31
+ """
32
+ import os
33
+
34
+ # Try environment variables first
35
+ host = os.getenv("POSTGRES__HOST", "localhost")
36
+ port = os.getenv("POSTGRES__PORT", "5432")
37
+ database = os.getenv("POSTGRES__DATABASE", "remdb")
38
+ user = os.getenv("POSTGRES__USER", "postgres")
39
+ password = os.getenv("POSTGRES__PASSWORD", "")
40
+
41
+ # Build connection string
42
+ conn_str = f"host={host} port={port} dbname={database} user={user}"
43
+ if password:
44
+ conn_str += f" password={password}"
45
+
46
+ return conn_str
47
+
48
+
49
+ async def run_sql_file_async(file_path: Path, db) -> tuple[bool, str, float]:
50
+ """
51
+ Execute a SQL file using psycopg3 (synchronous, handles multi-statement SQL).
52
+
53
+ Args:
54
+ file_path: Path to SQL file
55
+ db: PostgresService instance (used to get connection info)
56
+
57
+ Returns:
58
+ Tuple of (success, output, execution_time_ms)
59
+ """
60
+ if not file_path.exists():
61
+ return False, f"File not found: {file_path}", 0
62
+
63
+ start_time = time.time()
64
+
65
+ try:
66
+ # Read SQL file
67
+ sql_content = file_path.read_text(encoding="utf-8")
68
+
69
+ # Use psycopg3 for reliable multi-statement execution
70
+ # This is the synchronous PostgreSQL driver, perfect for migrations
71
+ import psycopg
72
+ from ...settings import settings
73
+
74
+ # Use connection string from settings
75
+ conn_str = settings.postgres.connection_string
76
+
77
+ # Execute using synchronous psycopg (not async)
78
+ # This properly handles multi-statement SQL scripts
79
+ with psycopg.connect(conn_str) as conn:
80
+ with conn.cursor() as cur:
81
+ cur.execute(sql_content)
82
+ conn.commit()
83
+
84
+ execution_time = (time.time() - start_time) * 1000
85
+ return True, f"Successfully executed {file_path.name}", execution_time
86
+
87
+ except Exception as e:
88
+ execution_time = (time.time() - start_time) * 1000
89
+ error_output = str(e)
90
+ return False, error_output, execution_time
91
+
92
+
93
+ def calculate_checksum(file_path: Path) -> str:
94
+ """Calculate SHA256 checksum of file."""
95
+ if not file_path.exists():
96
+ return ""
97
+ return hashlib.sha256(file_path.read_bytes()).hexdigest()
98
+
99
+
100
+ @click.command()
101
+ @click.option(
102
+ "--background-indexes",
103
+ is_flag=True,
104
+ help="Also apply background HNSW indexes (run after data load)",
105
+ )
106
+ def migrate(background_indexes: bool):
107
+ """
108
+ Apply standard database migrations (001_install + 002_install_models).
109
+
110
+ This is a convenience command for initial setup. It applies:
111
+ 1. 001_install.sql - Core infrastructure (extensions, kv_store)
112
+ 2. 002_install_models.sql - Entity tables from registered models
113
+
114
+ For incremental changes, use the diff-based workflow instead:
115
+ rem db schema generate # Regenerate from models
116
+ rem db diff # Check what changed
117
+ rem db apply <file> # Apply changes
118
+
119
+ Examples:
120
+ rem db migrate # Initial setup
121
+ rem db migrate --background-indexes # Include HNSW indexes
122
+ """
123
+ asyncio.run(_migrate_async(background_indexes))
124
+
125
+
126
+ async def _migrate_async(background_indexes: bool):
127
+ """Async implementation of migrate command."""
128
+ from ...settings import settings
129
+ from ...utils.sql_paths import (
130
+ get_package_sql_dir,
131
+ get_user_sql_dir,
132
+ list_all_migrations,
133
+ )
134
+
135
+ click.echo()
136
+ click.echo("REM Database Migration")
137
+ click.echo("=" * 60)
138
+
139
+ # Find package SQL directory
140
+ try:
141
+ package_sql_dir = get_package_sql_dir()
142
+ click.echo(f"Package SQL: {package_sql_dir}")
143
+ except FileNotFoundError as e:
144
+ click.secho(f"✗ {e}", fg="red")
145
+ raise click.Abort()
146
+
147
+ # Check for user migrations
148
+ user_sql_dir = get_user_sql_dir()
149
+ if user_sql_dir:
150
+ click.echo(f"User SQL: {user_sql_dir}")
151
+
152
+ # Get all migrations (package + user)
153
+ all_migrations = list_all_migrations()
154
+
155
+ if not all_migrations:
156
+ click.secho("✗ No migration files found", fg="red")
157
+ raise click.Abort()
158
+
159
+ click.echo(f"Found {len(all_migrations)} migration(s)")
160
+ click.echo()
161
+
162
+ # Add background indexes if requested
163
+ migrations_to_apply = [(f, f.stem) for f in all_migrations]
164
+
165
+ if background_indexes:
166
+ bg_indexes = package_sql_dir / "background_indexes.sql"
167
+ if bg_indexes.exists():
168
+ migrations_to_apply.append((bg_indexes, "Background Indexes"))
169
+ else:
170
+ click.secho("⚠ background_indexes.sql not found, skipping", fg="yellow")
171
+
172
+ # Check all files exist (they should, but verify)
173
+ for file_path, description in migrations_to_apply:
174
+ if not file_path.exists():
175
+ click.secho(f"✗ {file_path.name} not found", fg="red")
176
+ if "002" in file_path.name:
177
+ click.echo()
178
+ click.secho("Generate it first with:", fg="yellow")
179
+ click.secho(" rem db schema generate", fg="yellow")
180
+ raise click.Abort()
181
+
182
+ # Apply each migration
183
+ import psycopg
184
+ conn_str = settings.postgres.connection_string
185
+ total_time = 0.0
186
+
187
+ for file_path, description in migrations_to_apply:
188
+ click.echo(f"Applying: {file_path.name}")
189
+
190
+ sql_content = file_path.read_text(encoding="utf-8")
191
+ start_time = time.time()
192
+
193
+ try:
194
+ with psycopg.connect(conn_str) as conn:
195
+ with conn.cursor() as cur:
196
+ cur.execute(sql_content)
197
+ conn.commit()
198
+
199
+ exec_time = (time.time() - start_time) * 1000
200
+ total_time += exec_time
201
+ click.secho(f" ✓ Applied in {exec_time:.0f}ms", fg="green")
202
+
203
+ except Exception as e:
204
+ click.secho(f" ✗ Failed: {e}", fg="red")
205
+ raise click.Abort()
206
+
207
+ click.echo()
208
+
209
+ click.echo("=" * 60)
210
+ click.secho("✓ All migrations applied", fg="green")
211
+ click.echo(f" Total time: {total_time:.0f}ms")
212
+ click.echo()
213
+ click.echo("Next: verify with 'rem db diff'")
214
+
215
+
216
+ @click.command()
217
+ @click.option(
218
+ "--connection",
219
+ "-c",
220
+ help="PostgreSQL connection string (overrides environment)",
221
+ )
222
+ def status(connection: str | None):
223
+ """
224
+ Show migration status.
225
+
226
+ Displays:
227
+ - Applied migrations
228
+ - Execution times
229
+ - Last applied timestamps
230
+ """
231
+ asyncio.run(_status_async(connection))
232
+
233
+
234
+ async def _status_async(connection: str | None):
235
+ """Async implementation of status command."""
236
+ from ...services.postgres import get_postgres_service
237
+
238
+ click.echo()
239
+ click.echo("REM Migration Status")
240
+ click.echo("=" * 60)
241
+
242
+ db = get_postgres_service()
243
+ if not db:
244
+ click.secho("Error: PostgreSQL is disabled in settings.", fg="red")
245
+ raise click.Abort()
246
+
247
+ try:
248
+ await db.connect()
249
+
250
+ # Query migration status
251
+ query = "SELECT * FROM migration_status();"
252
+
253
+ try:
254
+ rows = await db.fetch(query)
255
+
256
+ if not rows:
257
+ click.echo("No migrations found")
258
+ click.echo()
259
+ click.secho("Run: rem db migrate", fg="yellow")
260
+ return
261
+
262
+ # Display results
263
+ click.echo()
264
+ for row in rows:
265
+ migration_type = row.get("migration_type", "unknown")
266
+ count = row.get("count", 0)
267
+ last_applied = row.get("last_applied", "never")
268
+ total_time = row.get("total_time_ms", 0)
269
+
270
+ click.echo(f"{migration_type.upper()}:")
271
+ click.echo(f" Count: {count}")
272
+ click.echo(f" Last Applied: {last_applied}")
273
+ click.echo(f" Total Time: {total_time}ms")
274
+ click.echo()
275
+
276
+ except Exception as e:
277
+ error_str = str(e)
278
+ if "does not exist" in error_str or "relation" in error_str or "function" in error_str:
279
+ click.secho("✗ Migration tracking not found", fg="red")
280
+ click.echo()
281
+ click.secho("Run: rem db migrate", fg="yellow")
282
+ else:
283
+ click.secho(f"✗ Error: {error_str}", fg="red")
284
+ raise click.Abort()
285
+
286
+ finally:
287
+ await db.disconnect()
288
+
289
+
290
+ @click.command()
291
+ @click.option(
292
+ "--connection",
293
+ "-c",
294
+ help="PostgreSQL connection string (overrides environment)",
295
+ )
296
+ def rebuild_cache(connection: str | None):
297
+ """
298
+ Rebuild KV_STORE cache from entity tables.
299
+
300
+ Call this after:
301
+ - Database restart (UNLOGGED tables are cleared)
302
+ - Manual cache invalidation
303
+ - Bulk data imports
304
+ """
305
+ conn_str = connection or get_connection_string()
306
+
307
+ click.echo("Rebuilding KV_STORE cache...")
308
+
309
+ query = "SELECT rebuild_kv_store();"
310
+
311
+ try:
312
+ result = subprocess.run(
313
+ ["psql", conn_str, "-c", query],
314
+ capture_output=True,
315
+ text=True,
316
+ check=True,
317
+ )
318
+
319
+ click.secho("✓ Cache rebuilt successfully", fg="green")
320
+
321
+ # Show any NOTICE messages
322
+ for line in result.stdout.split("\n") + result.stderr.split("\n"):
323
+ if "NOTICE:" in line:
324
+ notice = line.split("NOTICE:")[-1].strip()
325
+ if notice:
326
+ click.echo(f" {notice}")
327
+
328
+ except subprocess.CalledProcessError as e:
329
+ error = e.stderr or e.stdout or str(e)
330
+ click.secho(f"✗ Error: {error}", fg="red")
331
+ raise click.Abort()
332
+
333
+
334
+ @click.command()
335
+ @click.argument("file_path", type=click.Path(exists=True, path_type=Path))
336
+ @click.option("--table", "-t", default=None, help="Target table name (required for non-YAML formats)")
337
+ @click.option("--user-id", default=None, help="User ID to scope data privately (default: public/shared)")
338
+ @click.option("--dry-run", is_flag=True, help="Show what would be loaded without loading")
339
+ def load(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
340
+ """
341
+ Load data from file into database.
342
+
343
+ Supports YAML with embedded metadata, or any tabular format via Polars
344
+ (jsonl, parquet, csv, json, arrow, etc.). For non-YAML formats, use --table.
345
+
346
+ Examples:
347
+ rem db load data.yaml # YAML with metadata
348
+ rem db load data.jsonl -t resources # Any Polars-supported format
349
+ """
350
+ asyncio.run(_load_async(file_path, table, user_id, dry_run))
351
+
352
+
353
+ def _load_dataframe_from_file(file_path: Path) -> "pl.DataFrame":
354
+ """Load any Polars-supported file format into a DataFrame."""
355
+ import polars as pl
356
+
357
+ suffix = file_path.suffix.lower()
358
+
359
+ if suffix in {".jsonl", ".ndjson"}:
360
+ return pl.read_ndjson(file_path)
361
+ elif suffix in {".parquet", ".pq"}:
362
+ return pl.read_parquet(file_path)
363
+ elif suffix == ".csv":
364
+ return pl.read_csv(file_path)
365
+ elif suffix == ".json":
366
+ return pl.read_json(file_path)
367
+ elif suffix in {".ipc", ".arrow"}:
368
+ return pl.read_ipc(file_path)
369
+ else:
370
+ raise ValueError(f"Unsupported file format: {suffix}. Use any Polars-supported format.")
371
+
372
+
373
+ async def _load_async(file_path: Path, table: str | None, user_id: str | None, dry_run: bool):
374
+ """Async implementation of load command."""
375
+ import polars as pl
376
+ import yaml
377
+ from ...models.core.inline_edge import InlineEdge
378
+ from ...models.entities import SharedSession
379
+ from ...services.postgres import get_postgres_service
380
+ from ...utils.model_helpers import get_table_name
381
+ from ... import get_model_registry
382
+
383
+ logger.info(f"Loading data from: {file_path}")
384
+ scope_msg = f"user: {user_id}" if user_id else "public"
385
+ logger.info(f"Scope: {scope_msg}")
386
+
387
+ suffix = file_path.suffix.lower()
388
+ is_yaml = suffix in {".yaml", ".yml"}
389
+
390
+ # Build MODEL_MAP dynamically from registry
391
+ registry = get_model_registry()
392
+ registry.register_core_models()
393
+ MODEL_MAP = {
394
+ get_table_name(model): model
395
+ for model in registry.get_model_classes().values()
396
+ }
397
+
398
+ # Non-CoreModel tables that need direct SQL insertion
399
+ DIRECT_INSERT_TABLES = {"shared_sessions"}
400
+
401
+ # Parse file based on format
402
+ if is_yaml:
403
+ # YAML with embedded metadata
404
+ with open(file_path) as f:
405
+ data = yaml.safe_load(f)
406
+
407
+ if not isinstance(data, list):
408
+ logger.error("YAML must be a list of table definitions")
409
+ raise click.Abort()
410
+
411
+ if dry_run:
412
+ logger.info("DRY RUN - Would load:")
413
+ logger.info(yaml.dump(data, default_flow_style=False))
414
+ return
415
+
416
+ table_defs = data
417
+ else:
418
+ # Polars-supported format - require --table
419
+ if not table:
420
+ logger.error(f"For {suffix} files, --table is required. Example: rem db load {file_path.name} -t resources")
421
+ raise click.Abort()
422
+
423
+ try:
424
+ df = _load_dataframe_from_file(file_path)
425
+ except Exception as e:
426
+ logger.error(f"Failed to load file: {e}")
427
+ raise click.Abort()
428
+
429
+ rows = df.to_dicts()
430
+
431
+ if dry_run:
432
+ logger.info(f"DRY RUN - Would load {len(rows)} rows to table '{table}':")
433
+ logger.info(f"Columns: {list(df.columns)}")
434
+
435
+ # Validate first row against model if table is known
436
+ if table in MODEL_MAP and rows:
437
+ from ...utils.model_helpers import validate_data_for_model
438
+ result = validate_data_for_model(MODEL_MAP[table], rows[0])
439
+ if result.extra_fields:
440
+ logger.warning(f"Unknown fields (ignored): {result.extra_fields}")
441
+ if result.valid:
442
+ logger.success(f"Sample row validates OK. Required: {result.required_fields or '(none)'}")
443
+ else:
444
+ result.log_errors("Sample row")
445
+ return
446
+
447
+ # Wrap as single table definition
448
+ table_defs = [{"table": table, "rows": rows}]
449
+
450
+ # Connect to database
451
+ pg = get_postgres_service()
452
+ if not pg:
453
+ logger.error("PostgreSQL is disabled in settings. Enable with POSTGRES__ENABLED=true")
454
+ raise click.Abort()
455
+
456
+ await pg.connect()
457
+
458
+ # Start embedding worker for generating embeddings
459
+ if pg.embedding_worker:
460
+ await pg.embedding_worker.start()
461
+
462
+ try:
463
+ total_loaded = 0
464
+
465
+ for table_def in table_defs:
466
+ table_name = table_def["table"]
467
+ rows = table_def.get("rows", [])
468
+
469
+ # Handle direct insert tables (non-CoreModel)
470
+ if table_name in DIRECT_INSERT_TABLES:
471
+ for row_data in rows:
472
+ # tenant_id is optional - NULL means public/shared
473
+
474
+ if table_name == "shared_sessions":
475
+ await pg.fetch(
476
+ """INSERT INTO shared_sessions
477
+ (session_id, owner_user_id, shared_with_user_id, tenant_id)
478
+ VALUES ($1, $2, $3, $4)
479
+ ON CONFLICT DO NOTHING""",
480
+ row_data["session_id"],
481
+ row_data["owner_user_id"],
482
+ row_data["shared_with_user_id"],
483
+ row_data.get("tenant_id"), # Optional - NULL means public
484
+ )
485
+ total_loaded += 1
486
+ logger.success(f"Loaded shared_session: {row_data['owner_user_id']} -> {row_data['shared_with_user_id']}")
487
+ continue
488
+
489
+ if table_name not in MODEL_MAP:
490
+ logger.warning(f"Unknown table: {table_name}, skipping")
491
+ continue
492
+
493
+ model_class = MODEL_MAP[table_name]
494
+
495
+ for row_idx, row_data in enumerate(rows):
496
+ # tenant_id and user_id are optional - NULL means public/shared data
497
+ # Data files can explicitly set tenant_id/user_id if needed
498
+
499
+ # Convert graph_edges to InlineEdge format if present
500
+ if "graph_edges" in row_data:
501
+ row_data["graph_edges"] = [
502
+ InlineEdge(**edge).model_dump(mode='json')
503
+ for edge in row_data["graph_edges"]
504
+ ]
505
+
506
+ # Convert ISO timestamp strings
507
+ from ...utils.date_utils import parse_iso
508
+ for key, value in list(row_data.items()):
509
+ if isinstance(value, str) and (key.endswith("_timestamp") or key.endswith("_at")):
510
+ try:
511
+ row_data[key] = parse_iso(value)
512
+ except (ValueError, TypeError):
513
+ pass
514
+
515
+ from ...services.postgres.repository import Repository
516
+ from ...utils.model_helpers import validate_data_for_model
517
+
518
+ result = validate_data_for_model(model_class, row_data)
519
+ if not result.valid:
520
+ result.log_errors(f"Row {row_idx + 1} ({table_name})")
521
+ raise click.Abort()
522
+
523
+ repo = Repository(model_class, table_name, pg)
524
+ await repo.upsert(result.instance) # type: ignore[arg-type]
525
+ total_loaded += 1
526
+
527
+ name = getattr(result.instance, 'name', getattr(result.instance, 'id', '?'))
528
+ logger.success(f"Loaded {table_name[:-1]}: {name}")
529
+
530
+ logger.success(f"Data loaded successfully! Total rows: {total_loaded}")
531
+
532
+ # Wait for embeddings to complete
533
+ if pg.embedding_worker and pg.embedding_worker.running:
534
+ queue_size = pg.embedding_worker.task_queue.qsize()
535
+ if queue_size > 0:
536
+ logger.info(f"Waiting for {queue_size} embeddings to complete...")
537
+ await pg.embedding_worker.stop()
538
+ logger.success("Embeddings generated successfully")
539
+
540
+ finally:
541
+ await pg.disconnect()
542
+
543
+
544
+ @click.command()
545
+ @click.option(
546
+ "--check",
547
+ is_flag=True,
548
+ help="Exit with non-zero status if drift detected (for CI)",
549
+ )
550
+ @click.option(
551
+ "--generate",
552
+ is_flag=True,
553
+ help="Generate incremental migration file from diff",
554
+ )
555
+ @click.option(
556
+ "--strategy",
557
+ "-s",
558
+ type=click.Choice(["additive", "full", "safe"]),
559
+ default="additive",
560
+ help="Migration strategy: additive (no drops, default), full (all changes), safe (additive + type widenings)",
561
+ )
562
+ @click.option(
563
+ "--models",
564
+ "-m",
565
+ type=click.Path(exists=True, path_type=Path),
566
+ default=None,
567
+ help="Directory containing Pydantic models (default: auto-detect)",
568
+ )
569
+ @click.option(
570
+ "--output-dir",
571
+ "-o",
572
+ type=click.Path(path_type=Path),
573
+ default=None,
574
+ help="Output directory for generated migration (default: sql/migrations)",
575
+ )
576
+ @click.option(
577
+ "--message",
578
+ default="schema_update",
579
+ help="Migration message/description (used in filename)",
580
+ )
581
+ def diff(
582
+ check: bool,
583
+ generate: bool,
584
+ strategy: str,
585
+ models: Path | None,
586
+ output_dir: Path | None,
587
+ message: str,
588
+ ):
589
+ """
590
+ Compare database schema against Pydantic models.
591
+
592
+ Uses Alembic autogenerate to detect differences between:
593
+ - Your Pydantic models (the target schema)
594
+ - The current database (what's actually deployed)
595
+
596
+ Strategies:
597
+ additive Only ADD columns/tables/indexes (safe, no data loss) [default]
598
+ full All changes including DROPs (use with caution)
599
+ safe Additive + safe column type changes (widenings only)
600
+
601
+ Examples:
602
+ rem db diff # Show additive changes only
603
+ rem db diff --strategy full # Show all changes including drops
604
+ rem db diff --generate # Create migration file
605
+ rem db diff --check # CI mode: exit 1 if drift
606
+
607
+ Workflow:
608
+ 1. Develop locally, modify Pydantic models
609
+ 2. Run 'rem db diff' to see changes
610
+ 3. Run 'rem db diff --generate' to create migration
611
+ 4. Review generated SQL, then 'rem db apply <file>'
612
+ """
613
+ asyncio.run(_diff_async(check, generate, strategy, models, output_dir, message))
614
+
615
+
616
+ async def _diff_async(
617
+ check: bool,
618
+ generate: bool,
619
+ strategy: str,
620
+ models: Path | None,
621
+ output_dir: Path | None,
622
+ message: str,
623
+ ):
624
+ """Async implementation of diff command."""
625
+ from ...services.postgres.diff_service import DiffService
626
+
627
+ click.echo()
628
+ click.echo("REM Schema Diff")
629
+ click.echo("=" * 60)
630
+ click.echo(f"Strategy: {strategy}")
631
+
632
+ # Initialize diff service
633
+ diff_service = DiffService(models_dir=models, strategy=strategy)
634
+
635
+ try:
636
+ # Compute diff
637
+ click.echo("Comparing Pydantic models against database...")
638
+ click.echo()
639
+
640
+ result = diff_service.compute_diff()
641
+
642
+ if not result.has_changes:
643
+ click.secho("✓ No schema drift detected", fg="green")
644
+ click.echo(" Database matches source (tables, functions, triggers, views)")
645
+ if result.filtered_count > 0:
646
+ click.echo()
647
+ click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
648
+ click.echo(" Use --strategy full to see all changes")
649
+ return
650
+
651
+ # Show changes
652
+ click.secho(f"⚠ Schema drift detected: {result.change_count} change(s)", fg="yellow")
653
+ if result.filtered_count > 0:
654
+ click.secho(f" ({result.filtered_count} destructive change(s) hidden by '{strategy}' strategy)", fg="yellow")
655
+ click.echo()
656
+
657
+ # Table/column changes (Alembic)
658
+ if result.summary:
659
+ click.echo("Table Changes:")
660
+ for line in result.summary:
661
+ if line.startswith("+"):
662
+ click.secho(f" {line}", fg="green")
663
+ elif line.startswith("-"):
664
+ click.secho(f" {line}", fg="red")
665
+ elif line.startswith("~"):
666
+ click.secho(f" {line}", fg="yellow")
667
+ else:
668
+ click.echo(f" {line}")
669
+ click.echo()
670
+
671
+ # Programmable object changes (functions, triggers, views)
672
+ if result.programmable_summary:
673
+ click.echo("Programmable Objects (functions/triggers/views):")
674
+ for line in result.programmable_summary:
675
+ if line.startswith("+"):
676
+ click.secho(f" {line}", fg="green")
677
+ elif line.startswith("-"):
678
+ click.secho(f" {line}", fg="red")
679
+ elif line.startswith("~"):
680
+ click.secho(f" {line}", fg="yellow")
681
+ else:
682
+ click.echo(f" {line}")
683
+ click.echo()
684
+
685
+ # Generate migration if requested
686
+ if generate:
687
+ # Determine output directory
688
+ if output_dir is None:
689
+ import importlib.resources
690
+ try:
691
+ sql_ref = importlib.resources.files("rem") / "sql" / "migrations"
692
+ output_dir = Path(str(sql_ref))
693
+ except AttributeError:
694
+ import rem
695
+ package_dir = Path(rem.__file__).parent.parent
696
+ output_dir = package_dir / "sql" / "migrations"
697
+
698
+ click.echo(f"Generating migration to: {output_dir}")
699
+ migration_path = diff_service.generate_migration_file(output_dir, message)
700
+
701
+ if migration_path:
702
+ click.secho(f"✓ Migration generated: {migration_path.name}", fg="green")
703
+ click.echo()
704
+ click.echo("Next steps:")
705
+ click.echo(" 1. Review the generated SQL file")
706
+ click.echo(" 2. Run: rem db apply <file>")
707
+ else:
708
+ click.echo("No migration file generated (no changes)")
709
+
710
+ # CI check mode
711
+ if check:
712
+ click.echo()
713
+ click.secho("✗ Schema drift detected (--check mode)", fg="red")
714
+ raise SystemExit(1)
715
+
716
+ except SystemExit:
717
+ raise
718
+ except Exception as e:
719
+ click.secho(f"✗ Error: {e}", fg="red")
720
+ logger.exception("Diff failed")
721
+ raise click.Abort()
722
+
723
+
724
+ @click.command()
725
+ @click.argument("sql_file", type=click.Path(exists=True, path_type=Path))
726
+ @click.option(
727
+ "--log/--no-log",
728
+ default=True,
729
+ help="Log migration to rem_migrations table (default: yes)",
730
+ )
731
+ @click.option(
732
+ "--dry-run",
733
+ is_flag=True,
734
+ help="Show SQL that would be executed without running it",
735
+ )
736
+ def apply(sql_file: Path, log: bool, dry_run: bool):
737
+ """
738
+ Apply a SQL file directly to the database.
739
+
740
+ This is the simple, code-as-source-of-truth approach:
741
+ - Pydantic models define the schema
742
+ - `rem db diff` detects drift
743
+ - `rem db diff --generate` creates migration SQL
744
+ - `rem db apply <file>` runs it
745
+
746
+ Examples:
747
+ rem db apply migrations/004_add_field.sql
748
+ rem db apply --dry-run migrations/004_add_field.sql
749
+ rem db apply --no-log migrations/004_add_field.sql
750
+ """
751
+ asyncio.run(_apply_async(sql_file, log, dry_run))
752
+
753
+
754
+ async def _apply_async(sql_file: Path, log: bool, dry_run: bool):
755
+ """Async implementation of apply command."""
756
+ from ...services.postgres import get_postgres_service
757
+
758
+ click.echo()
759
+ click.echo(f"Applying: {sql_file.name}")
760
+ click.echo("=" * 60)
761
+
762
+ # Read SQL content
763
+ sql_content = sql_file.read_text(encoding="utf-8")
764
+
765
+ if dry_run:
766
+ click.echo()
767
+ click.echo("SQL to execute (dry run):")
768
+ click.echo("-" * 40)
769
+ click.echo(sql_content)
770
+ click.echo("-" * 40)
771
+ click.echo()
772
+ click.secho("Dry run - no changes made", fg="yellow")
773
+ return
774
+
775
+ # Execute SQL
776
+ db = get_postgres_service()
777
+ if not db:
778
+ click.secho("✗ Could not connect to database", fg="red")
779
+ raise click.Abort()
780
+
781
+ start_time = time.time()
782
+
783
+ try:
784
+ import psycopg
785
+ from ...settings import settings
786
+
787
+ conn_str = settings.postgres.connection_string
788
+
789
+ with psycopg.connect(conn_str) as conn:
790
+ with conn.cursor() as cur:
791
+ cur.execute(sql_content)
792
+ conn.commit()
793
+
794
+ # Log to rem_migrations if requested
795
+ if log:
796
+ checksum = calculate_checksum(sql_file)
797
+ with conn.cursor() as cur:
798
+ cur.execute(
799
+ """
800
+ INSERT INTO rem_migrations (name, type, checksum, applied_by)
801
+ VALUES (%s, 'diff', %s, CURRENT_USER)
802
+ ON CONFLICT (name) DO UPDATE SET
803
+ applied_at = CURRENT_TIMESTAMP,
804
+ checksum = EXCLUDED.checksum
805
+ """,
806
+ (sql_file.name, checksum[:16]),
807
+ )
808
+ conn.commit()
809
+
810
+ execution_time = (time.time() - start_time) * 1000
811
+ click.secho(f"✓ Applied successfully in {execution_time:.0f}ms", fg="green")
812
+
813
+ if log:
814
+ click.echo(f" Logged to rem_migrations as '{sql_file.name}'")
815
+
816
+ except Exception as e:
817
+ click.secho(f"✗ Failed: {e}", fg="red")
818
+ raise click.Abort()
819
+
820
+
821
+ def register_commands(db_group):
822
+ """Register all db commands."""
823
+ db_group.add_command(migrate)
824
+ db_group.add_command(status)
825
+ db_group.add_command(rebuild_cache, name="rebuild-cache")
826
+ db_group.add_command(load)
827
+ db_group.add_command(diff)
828
+ db_group.add_command(apply)