remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. rem/agentic/README.md +76 -0
  2. rem/agentic/__init__.py +15 -0
  3. rem/agentic/agents/__init__.py +16 -2
  4. rem/agentic/agents/sse_simulator.py +502 -0
  5. rem/agentic/context.py +51 -27
  6. rem/agentic/llm_provider_models.py +301 -0
  7. rem/agentic/mcp/tool_wrapper.py +112 -17
  8. rem/agentic/otel/setup.py +93 -4
  9. rem/agentic/providers/phoenix.py +302 -109
  10. rem/agentic/providers/pydantic_ai.py +215 -26
  11. rem/agentic/schema.py +361 -21
  12. rem/agentic/tools/rem_tools.py +3 -3
  13. rem/api/README.md +215 -1
  14. rem/api/deps.py +255 -0
  15. rem/api/main.py +132 -40
  16. rem/api/mcp_router/resources.py +1 -1
  17. rem/api/mcp_router/server.py +26 -5
  18. rem/api/mcp_router/tools.py +465 -7
  19. rem/api/routers/admin.py +494 -0
  20. rem/api/routers/auth.py +70 -0
  21. rem/api/routers/chat/completions.py +402 -20
  22. rem/api/routers/chat/models.py +88 -10
  23. rem/api/routers/chat/otel_utils.py +33 -0
  24. rem/api/routers/chat/sse_events.py +542 -0
  25. rem/api/routers/chat/streaming.py +642 -45
  26. rem/api/routers/dev.py +81 -0
  27. rem/api/routers/feedback.py +268 -0
  28. rem/api/routers/messages.py +473 -0
  29. rem/api/routers/models.py +78 -0
  30. rem/api/routers/query.py +360 -0
  31. rem/api/routers/shared_sessions.py +406 -0
  32. rem/auth/middleware.py +126 -27
  33. rem/cli/commands/README.md +237 -64
  34. rem/cli/commands/cluster.py +1808 -0
  35. rem/cli/commands/configure.py +1 -3
  36. rem/cli/commands/db.py +386 -143
  37. rem/cli/commands/experiments.py +418 -27
  38. rem/cli/commands/process.py +14 -8
  39. rem/cli/commands/schema.py +97 -50
  40. rem/cli/main.py +27 -6
  41. rem/config.py +10 -3
  42. rem/models/core/core_model.py +7 -1
  43. rem/models/core/experiment.py +54 -0
  44. rem/models/core/rem_query.py +5 -2
  45. rem/models/entities/__init__.py +21 -0
  46. rem/models/entities/domain_resource.py +38 -0
  47. rem/models/entities/feedback.py +123 -0
  48. rem/models/entities/message.py +30 -1
  49. rem/models/entities/session.py +83 -0
  50. rem/models/entities/shared_session.py +180 -0
  51. rem/registry.py +10 -4
  52. rem/schemas/agents/rem.yaml +7 -3
  53. rem/services/content/service.py +92 -20
  54. rem/services/embeddings/api.py +4 -4
  55. rem/services/embeddings/worker.py +16 -16
  56. rem/services/phoenix/client.py +154 -14
  57. rem/services/postgres/README.md +159 -15
  58. rem/services/postgres/__init__.py +2 -1
  59. rem/services/postgres/diff_service.py +531 -0
  60. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  61. rem/services/postgres/repository.py +132 -0
  62. rem/services/postgres/schema_generator.py +205 -4
  63. rem/services/postgres/service.py +6 -6
  64. rem/services/rem/parser.py +44 -9
  65. rem/services/rem/service.py +36 -2
  66. rem/services/session/compression.py +24 -1
  67. rem/services/session/reload.py +1 -1
  68. rem/settings.py +324 -23
  69. rem/sql/background_indexes.sql +21 -16
  70. rem/sql/migrations/001_install.sql +387 -54
  71. rem/sql/migrations/002_install_models.sql +2320 -393
  72. rem/sql/migrations/003_optional_extensions.sql +326 -0
  73. rem/sql/migrations/004_cache_system.sql +548 -0
  74. rem/utils/__init__.py +18 -0
  75. rem/utils/date_utils.py +2 -2
  76. rem/utils/model_helpers.py +156 -1
  77. rem/utils/schema_loader.py +220 -22
  78. rem/utils/sql_paths.py +146 -0
  79. rem/utils/sql_types.py +3 -1
  80. rem/workers/__init__.py +3 -1
  81. rem/workers/db_listener.py +579 -0
  82. rem/workers/unlogged_maintainer.py +463 -0
  83. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
  84. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
  85. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
  86. rem/sql/002_install_models.sql +0 -1068
  87. rem/sql/install_models.sql +0 -1051
  88. rem/sql/migrations/003_seed_default_user.sql +0 -48
  89. {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,463 @@
1
+ """
2
+ UNLOGGED Table Maintainer.
3
+
4
+ Handles backup (snapshot) and restore of PostgreSQL UNLOGGED tables:
5
+ - kv_store: O(1) entity lookups, graph edges for REM queries
6
+ - rate_limits: Rate limiting counters
7
+
8
+ UNLOGGED tables are NOT replicated to standby servers and are truncated
9
+ on crash/restart. This worker ensures they are rebuilt after:
10
+ 1. Primary pod restart
11
+ 2. Failover to a replica (replica has empty UNLOGGED tables)
12
+ 3. Crash recovery
13
+
14
+ Modes:
15
+ --snapshot Push current state to S3 watermark
16
+ --restore Force rebuild kv_store from entity tables
17
+ --check-and-restore Check if rebuild needed, restore if so (idempotent)
18
+
19
+ Triggers:
20
+ 1. Argo Events: Watches CNPG Cluster CR for status.currentPrimary changes
21
+ 2. CronJob: Periodic check every 5 minutes (belt & suspenders)
22
+ 3. Manual: python -m rem.workers.unlogged_maintainer --restore
23
+
24
+ Usage:
25
+ python -m rem.workers.unlogged_maintainer --check-and-restore
26
+ python -m rem.workers.unlogged_maintainer --snapshot
27
+ python -m rem.workers.unlogged_maintainer --restore
28
+
29
+ # Kubernetes Job (triggered by Argo Events or CronJob):
30
+ # command: ["python", "-m", "rem.workers.unlogged_maintainer", "--check-and-restore"]
31
+ """
32
+
33
+ import asyncio
34
+ import json
35
+ import time
36
+ from typing import Any
37
+
38
+ import click
39
+ from loguru import logger
40
+
41
+ from ..services.postgres import get_postgres_service
42
+ from ..registry import get_model_registry
43
+ from ..utils.date_utils import utc_now
44
+
45
+
46
+ # Advisory lock ID for preventing concurrent rebuilds
47
+ # Using a fixed large integer that's unlikely to collide
48
+ REBUILD_LOCK_ID = 2147483647
49
+
50
+
51
+ class UnloggedMaintainer:
52
+ """
53
+ Maintains UNLOGGED tables across PostgreSQL restarts and failovers.
54
+
55
+ UNLOGGED tables (kv_store, rate_limits) provide high-performance caching
56
+ but are not persisted to WAL and not replicated. They are truncated:
57
+ - On primary crash/restart
58
+ - On failover (replicas have empty UNLOGGED tables by design)
59
+
60
+ This class provides:
61
+ - Detection: Check if rebuild is needed (kv_store empty but entities exist)
62
+ - Restore: Rebuild kv_store from entity tables using rebuild_kv_store()
63
+ - Snapshot: Push watermark to S3 for observability and future delta rebuilds
64
+ """
65
+
66
+ def __init__(self):
67
+ self.db = get_postgres_service()
68
+ self._s3 = None # Lazy load
69
+
70
+ @property
71
+ def s3(self):
72
+ """Lazy load S3 provider."""
73
+ if self._s3 is None:
74
+ from ..services.fs.s3_provider import S3Provider
75
+ self._s3 = S3Provider()
76
+ return self._s3
77
+
78
+ def _get_watermark_uri(self) -> str:
79
+ """Get S3 URI for watermark state file."""
80
+ from ..settings import settings
81
+ # Use the main bucket with a state/ prefix
82
+ return f"s3://{settings.s3.bucket_name}/state/unlogged-watermark.json"
83
+
84
+ def _get_entity_tables(self) -> list[str]:
85
+ """
86
+ Get list of entity tables that feed into kv_store.
87
+
88
+ These are the tables that have kv_store triggers and should
89
+ have data if kv_store needs to be populated.
90
+ """
91
+ # Get from registry - these are the CoreModel tables
92
+ registry = get_model_registry()
93
+ models = registry.get_models(include_core=True)
94
+
95
+ # Convert model names to table names (pluralize, lowercase)
96
+ tables = []
97
+ for name, ext in models.items():
98
+ if ext.table_name:
99
+ tables.append(ext.table_name)
100
+ else:
101
+ # Default: lowercase + 's' (e.g., Resource -> resources)
102
+ table_name = name.lower()
103
+ if not table_name.endswith('s'):
104
+ table_name += 's'
105
+ tables.append(table_name)
106
+
107
+ # Filter to tables that actually have kv_store triggers
108
+ # These are the main entity tables
109
+ kv_tables = ['resources', 'moments', 'users', 'schemas', 'files', 'messages']
110
+ return [t for t in tables if t in kv_tables]
111
+
112
+ async def is_primary(self) -> bool:
113
+ """
114
+ Check if we're connected to the primary (not a replica).
115
+
116
+ UNLOGGED tables cannot be accessed on replicas - they error with:
117
+ "cannot access temporary or unlogged relations during recovery"
118
+ """
119
+ try:
120
+ result = await self.db.fetchval("SELECT NOT pg_is_in_recovery()")
121
+ return bool(result)
122
+ except Exception as e:
123
+ logger.warning(f"Could not determine primary status: {e}")
124
+ return False
125
+
126
+ async def get_kv_store_count(self) -> int:
127
+ """Get count of entries in kv_store."""
128
+ try:
129
+ count = await self.db.fetchval("SELECT count(*) FROM kv_store")
130
+ return int(count) if count else 0
131
+ except Exception as e:
132
+ # If we get an error about UNLOGGED tables, we're on a replica
133
+ if "cannot access" in str(e) or "recovery" in str(e):
134
+ logger.warning("Cannot access kv_store (likely on replica)")
135
+ return -1 # Signal that we can't access
136
+ raise
137
+
138
+ async def get_entity_counts(self) -> dict[str, int]:
139
+ """Get counts from all entity tables."""
140
+ counts = {}
141
+ for table in self._get_entity_tables():
142
+ try:
143
+ count = await self.db.fetchval(
144
+ f"SELECT count(*) FROM {table} WHERE deleted_at IS NULL"
145
+ )
146
+ counts[table] = int(count) if count else 0
147
+ except Exception as e:
148
+ logger.warning(f"Could not count {table}: {e}")
149
+ counts[table] = 0
150
+ return counts
151
+
152
+ async def check_rebuild_needed(self) -> tuple[bool, str]:
153
+ """
154
+ Check if UNLOGGED tables need to be rebuilt.
155
+
156
+ Returns:
157
+ Tuple of (needs_rebuild: bool, reason: str)
158
+
159
+ Detection logic:
160
+ 1. Must be connected to primary (replicas can't access UNLOGGED tables)
161
+ 2. kv_store is empty (count = 0)
162
+ 3. At least one entity table has data
163
+ """
164
+ # Check if we're on primary
165
+ if not await self.is_primary():
166
+ return False, "Connected to replica, skipping (UNLOGGED tables not accessible)"
167
+
168
+ # Check kv_store count
169
+ kv_count = await self.get_kv_store_count()
170
+ if kv_count < 0:
171
+ return False, "Cannot access kv_store"
172
+
173
+ if kv_count > 0:
174
+ return False, f"kv_store has {kv_count} entries, no rebuild needed"
175
+
176
+ # kv_store is empty - check if entities exist
177
+ entity_counts = await self.get_entity_counts()
178
+ total_entities = sum(entity_counts.values())
179
+
180
+ if total_entities == 0:
181
+ return False, "kv_store empty but no entities exist (fresh database)"
182
+
183
+ # Rebuild needed!
184
+ tables_with_data = [t for t, c in entity_counts.items() if c > 0]
185
+ return True, (
186
+ f"kv_store empty but {total_entities} entities exist in "
187
+ f"{tables_with_data}. Likely failover or restart."
188
+ )
189
+
190
+ async def check_and_rebuild_if_needed(self) -> bool:
191
+ """
192
+ Check if UNLOGGED tables need rebuild and restore if so.
193
+
194
+ This is the main entry point for automated triggers.
195
+ Safe to call multiple times (idempotent).
196
+
197
+ Returns:
198
+ True if rebuild was performed, False otherwise
199
+ """
200
+ needs_rebuild, reason = await self.check_rebuild_needed()
201
+
202
+ if not needs_rebuild:
203
+ logger.info(f"No rebuild needed: {reason}")
204
+ return False
205
+
206
+ logger.warning(f"Rebuild needed: {reason}")
207
+ await self.rebuild_with_lock()
208
+ return True
209
+
210
+ async def rebuild_with_lock(self) -> dict[str, Any]:
211
+ """
212
+ Rebuild kv_store with advisory lock to prevent concurrent rebuilds.
213
+
214
+ Uses PostgreSQL advisory locks to ensure only one rebuild runs at a time,
215
+ even across multiple pods/processes.
216
+
217
+ Returns:
218
+ Dict with rebuild statistics
219
+ """
220
+ # Try to acquire advisory lock (non-blocking)
221
+ locked = await self.db.fetchval(
222
+ "SELECT pg_try_advisory_lock($1)", REBUILD_LOCK_ID
223
+ )
224
+
225
+ if not locked:
226
+ logger.info("Another process is rebuilding, skipping")
227
+ return {"skipped": True, "reason": "lock_held"}
228
+
229
+ try:
230
+ logger.info("Acquired rebuild lock, starting kv_store rebuild...")
231
+ start_time = time.time()
232
+
233
+ # Call the PostgreSQL rebuild function
234
+ results = await self.db.fetch("SELECT * FROM rebuild_kv_store()")
235
+
236
+ duration_ms = (time.time() - start_time) * 1000
237
+
238
+ # Parse results
239
+ tables_rebuilt = []
240
+ total_rows = 0
241
+ for row in results:
242
+ table_name = row.get('table_name', 'unknown')
243
+ rows_inserted = row.get('rows_inserted', 0)
244
+ tables_rebuilt.append(table_name)
245
+ total_rows += rows_inserted
246
+ logger.info(f" Rebuilt {rows_inserted} entries for {table_name}")
247
+
248
+ logger.success(
249
+ f"Rebuilt kv_store: {total_rows} entries "
250
+ f"from {len(tables_rebuilt)} tables in {duration_ms:.0f}ms"
251
+ )
252
+
253
+ # Push watermark to S3
254
+ await self.push_watermark()
255
+
256
+ return {
257
+ "success": True,
258
+ "tables_rebuilt": tables_rebuilt,
259
+ "total_rows": total_rows,
260
+ "duration_ms": duration_ms,
261
+ }
262
+
263
+ except Exception as e:
264
+ logger.error(f"Rebuild failed: {e}")
265
+ raise
266
+
267
+ finally:
268
+ # Always release the lock
269
+ await self.db.fetch(
270
+ "SELECT pg_advisory_unlock($1)", REBUILD_LOCK_ID
271
+ )
272
+ logger.debug("Released rebuild lock")
273
+
274
+ async def push_watermark(self) -> dict[str, Any]:
275
+ """
276
+ Push current state to S3 watermark for observability and delta rebuilds.
277
+
278
+ Watermark contains:
279
+ - Timestamp of snapshot
280
+ - Current primary instance info
281
+ - kv_store count
282
+ - Per-table entity counts and max updated_at timestamps
283
+
284
+ Returns:
285
+ The watermark dict that was written
286
+ """
287
+ try:
288
+ # Get current state
289
+ kv_count = await self.get_kv_store_count()
290
+
291
+ # Get server info
292
+ server_info = await self.db.fetchval(
293
+ "SELECT inet_server_addr()::text || ':' || inet_server_port()::text"
294
+ )
295
+
296
+ # Get per-table stats with max updated_at for delta rebuild
297
+ tables = {}
298
+ for table in self._get_entity_tables():
299
+ try:
300
+ row = await self.db.fetchrow(f"""
301
+ SELECT
302
+ count(*) as count,
303
+ max(updated_at) as max_updated
304
+ FROM {table}
305
+ WHERE deleted_at IS NULL
306
+ """)
307
+ tables[table] = {
308
+ "count": int(row['count']) if row['count'] else 0,
309
+ "max_updated_at": (
310
+ row['max_updated'].isoformat()
311
+ if row['max_updated'] else None
312
+ ),
313
+ }
314
+ except Exception as e:
315
+ logger.warning(f"Could not get stats for {table}: {e}")
316
+ tables[table] = {"count": 0, "max_updated_at": None}
317
+
318
+ watermark = {
319
+ "snapshot_ts": utc_now().isoformat(),
320
+ "primary_instance": server_info,
321
+ "kv_store_count": kv_count,
322
+ "tables": tables,
323
+ }
324
+
325
+ # Write to S3
326
+ uri = self._get_watermark_uri()
327
+ self.s3.write(uri, watermark)
328
+
329
+ logger.info(
330
+ f"Pushed watermark to S3: kv_store={kv_count}, "
331
+ f"tables={list(tables.keys())}"
332
+ )
333
+
334
+ return watermark
335
+
336
+ except Exception as e:
337
+ logger.error(f"Failed to push watermark to S3: {e}")
338
+ # Don't fail the rebuild if watermark push fails
339
+ return {"error": str(e)}
340
+
341
+ async def read_watermark(self) -> dict[str, Any] | None:
342
+ """
343
+ Read watermark from S3.
344
+
345
+ Returns:
346
+ Watermark dict or None if not found
347
+ """
348
+ try:
349
+ uri = self._get_watermark_uri()
350
+ if self.s3.exists(uri):
351
+ return self.s3.read(uri)
352
+ return None
353
+ except Exception as e:
354
+ logger.warning(f"Could not read watermark from S3: {e}")
355
+ return None
356
+
357
+
358
+ async def _run_maintainer(
359
+ snapshot: bool,
360
+ restore: bool,
361
+ check_and_restore: bool,
362
+ ) -> int:
363
+ """
364
+ Async entry point for the maintainer.
365
+
366
+ Returns exit code (0 for success, 1 for error).
367
+ """
368
+ maintainer = UnloggedMaintainer()
369
+
370
+ try:
371
+ await maintainer.db.connect()
372
+
373
+ if snapshot:
374
+ logger.info("Pushing watermark snapshot to S3...")
375
+ result = await maintainer.push_watermark()
376
+ if "error" in result:
377
+ logger.error(f"Snapshot failed: {result['error']}")
378
+ return 1
379
+ logger.success("Watermark snapshot complete")
380
+ return 0
381
+
382
+ elif restore:
383
+ logger.info("Forcing kv_store rebuild...")
384
+ result = await maintainer.rebuild_with_lock()
385
+ if result.get("skipped"):
386
+ logger.warning(f"Rebuild skipped: {result.get('reason')}")
387
+ return 0
388
+ if result.get("success"):
389
+ logger.success(
390
+ f"Rebuild complete: {result['total_rows']} rows "
391
+ f"in {result['duration_ms']:.0f}ms"
392
+ )
393
+ return 0
394
+ return 1
395
+
396
+ elif check_and_restore:
397
+ logger.info("Checking if rebuild is needed...")
398
+ rebuilt = await maintainer.check_and_rebuild_if_needed()
399
+ if rebuilt:
400
+ logger.success("Rebuild completed successfully")
401
+ else:
402
+ logger.info("No rebuild was needed")
403
+ return 0
404
+
405
+ else:
406
+ # Default: check and restore
407
+ logger.info("No mode specified, defaulting to --check-and-restore")
408
+ await maintainer.check_and_rebuild_if_needed()
409
+ return 0
410
+
411
+ except Exception as e:
412
+ logger.exception(f"Maintainer failed: {e}")
413
+ return 1
414
+
415
+ finally:
416
+ await maintainer.db.disconnect()
417
+
418
+
419
+ @click.command()
420
+ @click.option(
421
+ '--snapshot',
422
+ is_flag=True,
423
+ help='Push current state to S3 watermark (for observability)',
424
+ )
425
+ @click.option(
426
+ '--restore',
427
+ is_flag=True,
428
+ help='Force rebuild kv_store from entity tables',
429
+ )
430
+ @click.option(
431
+ '--check-and-restore',
432
+ 'check_and_restore',
433
+ is_flag=True,
434
+ help='Check if rebuild needed, restore if so (idempotent, default)',
435
+ )
436
+ def main(snapshot: bool, restore: bool, check_and_restore: bool):
437
+ """
438
+ UNLOGGED Table Maintainer for REM.
439
+
440
+ Ensures kv_store and other UNLOGGED tables are rebuilt after
441
+ PostgreSQL restarts or failovers.
442
+
443
+ \b
444
+ Examples:
445
+ # Check and rebuild if needed (safe to run anytime)
446
+ python -m rem.workers.unlogged_maintainer --check-and-restore
447
+
448
+ # Force rebuild (useful for manual recovery)
449
+ python -m rem.workers.unlogged_maintainer --restore
450
+
451
+ # Push snapshot to S3 (for monitoring)
452
+ python -m rem.workers.unlogged_maintainer --snapshot
453
+ """
454
+ # If no mode specified, default to check-and-restore
455
+ if not any([snapshot, restore, check_and_restore]):
456
+ check_and_restore = True
457
+
458
+ exit_code = asyncio.run(_run_maintainer(snapshot, restore, check_and_restore))
459
+ raise SystemExit(exit_code)
460
+
461
+
462
+ if __name__ == "__main__":
463
+ main()