remdb 0.3.14__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -27
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +302 -109
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +215 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +132 -40
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +70 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +1 -3
- rem/cli/commands/db.py +386 -143
- rem/cli/commands/experiments.py +418 -27
- rem/cli/commands/process.py +14 -8
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +27 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/registry.py +10 -4
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/service.py +92 -20
- rem/services/embeddings/api.py +4 -4
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/client.py +154 -14
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +205 -4
- rem/services/postgres/service.py +6 -6
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/settings.py +324 -23
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/date_utils.py +2 -2
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +220 -22
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/METADATA +335 -226
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/RECORD +86 -66
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1051
- rem/sql/migrations/003_seed_default_user.sql +0 -48
- {remdb-0.3.14.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,463 @@
|
|
|
1
|
+
"""
|
|
2
|
+
UNLOGGED Table Maintainer.
|
|
3
|
+
|
|
4
|
+
Handles backup (snapshot) and restore of PostgreSQL UNLOGGED tables:
|
|
5
|
+
- kv_store: O(1) entity lookups, graph edges for REM queries
|
|
6
|
+
- rate_limits: Rate limiting counters
|
|
7
|
+
|
|
8
|
+
UNLOGGED tables are NOT replicated to standby servers and are truncated
|
|
9
|
+
on crash/restart. This worker ensures they are rebuilt after:
|
|
10
|
+
1. Primary pod restart
|
|
11
|
+
2. Failover to a replica (replica has empty UNLOGGED tables)
|
|
12
|
+
3. Crash recovery
|
|
13
|
+
|
|
14
|
+
Modes:
|
|
15
|
+
--snapshot Push current state to S3 watermark
|
|
16
|
+
--restore Force rebuild kv_store from entity tables
|
|
17
|
+
--check-and-restore Check if rebuild needed, restore if so (idempotent)
|
|
18
|
+
|
|
19
|
+
Triggers:
|
|
20
|
+
1. Argo Events: Watches CNPG Cluster CR for status.currentPrimary changes
|
|
21
|
+
2. CronJob: Periodic check every 5 minutes (belt & suspenders)
|
|
22
|
+
3. Manual: python -m rem.workers.unlogged_maintainer --restore
|
|
23
|
+
|
|
24
|
+
Usage:
|
|
25
|
+
python -m rem.workers.unlogged_maintainer --check-and-restore
|
|
26
|
+
python -m rem.workers.unlogged_maintainer --snapshot
|
|
27
|
+
python -m rem.workers.unlogged_maintainer --restore
|
|
28
|
+
|
|
29
|
+
# Kubernetes Job (triggered by Argo Events or CronJob):
|
|
30
|
+
# command: ["python", "-m", "rem.workers.unlogged_maintainer", "--check-and-restore"]
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
import asyncio
|
|
34
|
+
import json
|
|
35
|
+
import time
|
|
36
|
+
from typing import Any
|
|
37
|
+
|
|
38
|
+
import click
|
|
39
|
+
from loguru import logger
|
|
40
|
+
|
|
41
|
+
from ..services.postgres import get_postgres_service
|
|
42
|
+
from ..registry import get_model_registry
|
|
43
|
+
from ..utils.date_utils import utc_now
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# Advisory lock ID for preventing concurrent rebuilds
|
|
47
|
+
# Using a fixed large integer that's unlikely to collide
|
|
48
|
+
REBUILD_LOCK_ID = 2147483647
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class UnloggedMaintainer:
|
|
52
|
+
"""
|
|
53
|
+
Maintains UNLOGGED tables across PostgreSQL restarts and failovers.
|
|
54
|
+
|
|
55
|
+
UNLOGGED tables (kv_store, rate_limits) provide high-performance caching
|
|
56
|
+
but are not persisted to WAL and not replicated. They are truncated:
|
|
57
|
+
- On primary crash/restart
|
|
58
|
+
- On failover (replicas have empty UNLOGGED tables by design)
|
|
59
|
+
|
|
60
|
+
This class provides:
|
|
61
|
+
- Detection: Check if rebuild is needed (kv_store empty but entities exist)
|
|
62
|
+
- Restore: Rebuild kv_store from entity tables using rebuild_kv_store()
|
|
63
|
+
- Snapshot: Push watermark to S3 for observability and future delta rebuilds
|
|
64
|
+
"""
|
|
65
|
+
|
|
66
|
+
def __init__(self):
|
|
67
|
+
self.db = get_postgres_service()
|
|
68
|
+
self._s3 = None # Lazy load
|
|
69
|
+
|
|
70
|
+
@property
|
|
71
|
+
def s3(self):
|
|
72
|
+
"""Lazy load S3 provider."""
|
|
73
|
+
if self._s3 is None:
|
|
74
|
+
from ..services.fs.s3_provider import S3Provider
|
|
75
|
+
self._s3 = S3Provider()
|
|
76
|
+
return self._s3
|
|
77
|
+
|
|
78
|
+
def _get_watermark_uri(self) -> str:
|
|
79
|
+
"""Get S3 URI for watermark state file."""
|
|
80
|
+
from ..settings import settings
|
|
81
|
+
# Use the main bucket with a state/ prefix
|
|
82
|
+
return f"s3://{settings.s3.bucket_name}/state/unlogged-watermark.json"
|
|
83
|
+
|
|
84
|
+
def _get_entity_tables(self) -> list[str]:
|
|
85
|
+
"""
|
|
86
|
+
Get list of entity tables that feed into kv_store.
|
|
87
|
+
|
|
88
|
+
These are the tables that have kv_store triggers and should
|
|
89
|
+
have data if kv_store needs to be populated.
|
|
90
|
+
"""
|
|
91
|
+
# Get from registry - these are the CoreModel tables
|
|
92
|
+
registry = get_model_registry()
|
|
93
|
+
models = registry.get_models(include_core=True)
|
|
94
|
+
|
|
95
|
+
# Convert model names to table names (pluralize, lowercase)
|
|
96
|
+
tables = []
|
|
97
|
+
for name, ext in models.items():
|
|
98
|
+
if ext.table_name:
|
|
99
|
+
tables.append(ext.table_name)
|
|
100
|
+
else:
|
|
101
|
+
# Default: lowercase + 's' (e.g., Resource -> resources)
|
|
102
|
+
table_name = name.lower()
|
|
103
|
+
if not table_name.endswith('s'):
|
|
104
|
+
table_name += 's'
|
|
105
|
+
tables.append(table_name)
|
|
106
|
+
|
|
107
|
+
# Filter to tables that actually have kv_store triggers
|
|
108
|
+
# These are the main entity tables
|
|
109
|
+
kv_tables = ['resources', 'moments', 'users', 'schemas', 'files', 'messages']
|
|
110
|
+
return [t for t in tables if t in kv_tables]
|
|
111
|
+
|
|
112
|
+
async def is_primary(self) -> bool:
|
|
113
|
+
"""
|
|
114
|
+
Check if we're connected to the primary (not a replica).
|
|
115
|
+
|
|
116
|
+
UNLOGGED tables cannot be accessed on replicas - they error with:
|
|
117
|
+
"cannot access temporary or unlogged relations during recovery"
|
|
118
|
+
"""
|
|
119
|
+
try:
|
|
120
|
+
result = await self.db.fetchval("SELECT NOT pg_is_in_recovery()")
|
|
121
|
+
return bool(result)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logger.warning(f"Could not determine primary status: {e}")
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
async def get_kv_store_count(self) -> int:
|
|
127
|
+
"""Get count of entries in kv_store."""
|
|
128
|
+
try:
|
|
129
|
+
count = await self.db.fetchval("SELECT count(*) FROM kv_store")
|
|
130
|
+
return int(count) if count else 0
|
|
131
|
+
except Exception as e:
|
|
132
|
+
# If we get an error about UNLOGGED tables, we're on a replica
|
|
133
|
+
if "cannot access" in str(e) or "recovery" in str(e):
|
|
134
|
+
logger.warning("Cannot access kv_store (likely on replica)")
|
|
135
|
+
return -1 # Signal that we can't access
|
|
136
|
+
raise
|
|
137
|
+
|
|
138
|
+
async def get_entity_counts(self) -> dict[str, int]:
|
|
139
|
+
"""Get counts from all entity tables."""
|
|
140
|
+
counts = {}
|
|
141
|
+
for table in self._get_entity_tables():
|
|
142
|
+
try:
|
|
143
|
+
count = await self.db.fetchval(
|
|
144
|
+
f"SELECT count(*) FROM {table} WHERE deleted_at IS NULL"
|
|
145
|
+
)
|
|
146
|
+
counts[table] = int(count) if count else 0
|
|
147
|
+
except Exception as e:
|
|
148
|
+
logger.warning(f"Could not count {table}: {e}")
|
|
149
|
+
counts[table] = 0
|
|
150
|
+
return counts
|
|
151
|
+
|
|
152
|
+
async def check_rebuild_needed(self) -> tuple[bool, str]:
|
|
153
|
+
"""
|
|
154
|
+
Check if UNLOGGED tables need to be rebuilt.
|
|
155
|
+
|
|
156
|
+
Returns:
|
|
157
|
+
Tuple of (needs_rebuild: bool, reason: str)
|
|
158
|
+
|
|
159
|
+
Detection logic:
|
|
160
|
+
1. Must be connected to primary (replicas can't access UNLOGGED tables)
|
|
161
|
+
2. kv_store is empty (count = 0)
|
|
162
|
+
3. At least one entity table has data
|
|
163
|
+
"""
|
|
164
|
+
# Check if we're on primary
|
|
165
|
+
if not await self.is_primary():
|
|
166
|
+
return False, "Connected to replica, skipping (UNLOGGED tables not accessible)"
|
|
167
|
+
|
|
168
|
+
# Check kv_store count
|
|
169
|
+
kv_count = await self.get_kv_store_count()
|
|
170
|
+
if kv_count < 0:
|
|
171
|
+
return False, "Cannot access kv_store"
|
|
172
|
+
|
|
173
|
+
if kv_count > 0:
|
|
174
|
+
return False, f"kv_store has {kv_count} entries, no rebuild needed"
|
|
175
|
+
|
|
176
|
+
# kv_store is empty - check if entities exist
|
|
177
|
+
entity_counts = await self.get_entity_counts()
|
|
178
|
+
total_entities = sum(entity_counts.values())
|
|
179
|
+
|
|
180
|
+
if total_entities == 0:
|
|
181
|
+
return False, "kv_store empty but no entities exist (fresh database)"
|
|
182
|
+
|
|
183
|
+
# Rebuild needed!
|
|
184
|
+
tables_with_data = [t for t, c in entity_counts.items() if c > 0]
|
|
185
|
+
return True, (
|
|
186
|
+
f"kv_store empty but {total_entities} entities exist in "
|
|
187
|
+
f"{tables_with_data}. Likely failover or restart."
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
async def check_and_rebuild_if_needed(self) -> bool:
|
|
191
|
+
"""
|
|
192
|
+
Check if UNLOGGED tables need rebuild and restore if so.
|
|
193
|
+
|
|
194
|
+
This is the main entry point for automated triggers.
|
|
195
|
+
Safe to call multiple times (idempotent).
|
|
196
|
+
|
|
197
|
+
Returns:
|
|
198
|
+
True if rebuild was performed, False otherwise
|
|
199
|
+
"""
|
|
200
|
+
needs_rebuild, reason = await self.check_rebuild_needed()
|
|
201
|
+
|
|
202
|
+
if not needs_rebuild:
|
|
203
|
+
logger.info(f"No rebuild needed: {reason}")
|
|
204
|
+
return False
|
|
205
|
+
|
|
206
|
+
logger.warning(f"Rebuild needed: {reason}")
|
|
207
|
+
await self.rebuild_with_lock()
|
|
208
|
+
return True
|
|
209
|
+
|
|
210
|
+
async def rebuild_with_lock(self) -> dict[str, Any]:
|
|
211
|
+
"""
|
|
212
|
+
Rebuild kv_store with advisory lock to prevent concurrent rebuilds.
|
|
213
|
+
|
|
214
|
+
Uses PostgreSQL advisory locks to ensure only one rebuild runs at a time,
|
|
215
|
+
even across multiple pods/processes.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
Dict with rebuild statistics
|
|
219
|
+
"""
|
|
220
|
+
# Try to acquire advisory lock (non-blocking)
|
|
221
|
+
locked = await self.db.fetchval(
|
|
222
|
+
"SELECT pg_try_advisory_lock($1)", REBUILD_LOCK_ID
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
if not locked:
|
|
226
|
+
logger.info("Another process is rebuilding, skipping")
|
|
227
|
+
return {"skipped": True, "reason": "lock_held"}
|
|
228
|
+
|
|
229
|
+
try:
|
|
230
|
+
logger.info("Acquired rebuild lock, starting kv_store rebuild...")
|
|
231
|
+
start_time = time.time()
|
|
232
|
+
|
|
233
|
+
# Call the PostgreSQL rebuild function
|
|
234
|
+
results = await self.db.fetch("SELECT * FROM rebuild_kv_store()")
|
|
235
|
+
|
|
236
|
+
duration_ms = (time.time() - start_time) * 1000
|
|
237
|
+
|
|
238
|
+
# Parse results
|
|
239
|
+
tables_rebuilt = []
|
|
240
|
+
total_rows = 0
|
|
241
|
+
for row in results:
|
|
242
|
+
table_name = row.get('table_name', 'unknown')
|
|
243
|
+
rows_inserted = row.get('rows_inserted', 0)
|
|
244
|
+
tables_rebuilt.append(table_name)
|
|
245
|
+
total_rows += rows_inserted
|
|
246
|
+
logger.info(f" Rebuilt {rows_inserted} entries for {table_name}")
|
|
247
|
+
|
|
248
|
+
logger.success(
|
|
249
|
+
f"Rebuilt kv_store: {total_rows} entries "
|
|
250
|
+
f"from {len(tables_rebuilt)} tables in {duration_ms:.0f}ms"
|
|
251
|
+
)
|
|
252
|
+
|
|
253
|
+
# Push watermark to S3
|
|
254
|
+
await self.push_watermark()
|
|
255
|
+
|
|
256
|
+
return {
|
|
257
|
+
"success": True,
|
|
258
|
+
"tables_rebuilt": tables_rebuilt,
|
|
259
|
+
"total_rows": total_rows,
|
|
260
|
+
"duration_ms": duration_ms,
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
except Exception as e:
|
|
264
|
+
logger.error(f"Rebuild failed: {e}")
|
|
265
|
+
raise
|
|
266
|
+
|
|
267
|
+
finally:
|
|
268
|
+
# Always release the lock
|
|
269
|
+
await self.db.fetch(
|
|
270
|
+
"SELECT pg_advisory_unlock($1)", REBUILD_LOCK_ID
|
|
271
|
+
)
|
|
272
|
+
logger.debug("Released rebuild lock")
|
|
273
|
+
|
|
274
|
+
async def push_watermark(self) -> dict[str, Any]:
|
|
275
|
+
"""
|
|
276
|
+
Push current state to S3 watermark for observability and delta rebuilds.
|
|
277
|
+
|
|
278
|
+
Watermark contains:
|
|
279
|
+
- Timestamp of snapshot
|
|
280
|
+
- Current primary instance info
|
|
281
|
+
- kv_store count
|
|
282
|
+
- Per-table entity counts and max updated_at timestamps
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
The watermark dict that was written
|
|
286
|
+
"""
|
|
287
|
+
try:
|
|
288
|
+
# Get current state
|
|
289
|
+
kv_count = await self.get_kv_store_count()
|
|
290
|
+
|
|
291
|
+
# Get server info
|
|
292
|
+
server_info = await self.db.fetchval(
|
|
293
|
+
"SELECT inet_server_addr()::text || ':' || inet_server_port()::text"
|
|
294
|
+
)
|
|
295
|
+
|
|
296
|
+
# Get per-table stats with max updated_at for delta rebuild
|
|
297
|
+
tables = {}
|
|
298
|
+
for table in self._get_entity_tables():
|
|
299
|
+
try:
|
|
300
|
+
row = await self.db.fetchrow(f"""
|
|
301
|
+
SELECT
|
|
302
|
+
count(*) as count,
|
|
303
|
+
max(updated_at) as max_updated
|
|
304
|
+
FROM {table}
|
|
305
|
+
WHERE deleted_at IS NULL
|
|
306
|
+
""")
|
|
307
|
+
tables[table] = {
|
|
308
|
+
"count": int(row['count']) if row['count'] else 0,
|
|
309
|
+
"max_updated_at": (
|
|
310
|
+
row['max_updated'].isoformat()
|
|
311
|
+
if row['max_updated'] else None
|
|
312
|
+
),
|
|
313
|
+
}
|
|
314
|
+
except Exception as e:
|
|
315
|
+
logger.warning(f"Could not get stats for {table}: {e}")
|
|
316
|
+
tables[table] = {"count": 0, "max_updated_at": None}
|
|
317
|
+
|
|
318
|
+
watermark = {
|
|
319
|
+
"snapshot_ts": utc_now().isoformat(),
|
|
320
|
+
"primary_instance": server_info,
|
|
321
|
+
"kv_store_count": kv_count,
|
|
322
|
+
"tables": tables,
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
# Write to S3
|
|
326
|
+
uri = self._get_watermark_uri()
|
|
327
|
+
self.s3.write(uri, watermark)
|
|
328
|
+
|
|
329
|
+
logger.info(
|
|
330
|
+
f"Pushed watermark to S3: kv_store={kv_count}, "
|
|
331
|
+
f"tables={list(tables.keys())}"
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
return watermark
|
|
335
|
+
|
|
336
|
+
except Exception as e:
|
|
337
|
+
logger.error(f"Failed to push watermark to S3: {e}")
|
|
338
|
+
# Don't fail the rebuild if watermark push fails
|
|
339
|
+
return {"error": str(e)}
|
|
340
|
+
|
|
341
|
+
async def read_watermark(self) -> dict[str, Any] | None:
|
|
342
|
+
"""
|
|
343
|
+
Read watermark from S3.
|
|
344
|
+
|
|
345
|
+
Returns:
|
|
346
|
+
Watermark dict or None if not found
|
|
347
|
+
"""
|
|
348
|
+
try:
|
|
349
|
+
uri = self._get_watermark_uri()
|
|
350
|
+
if self.s3.exists(uri):
|
|
351
|
+
return self.s3.read(uri)
|
|
352
|
+
return None
|
|
353
|
+
except Exception as e:
|
|
354
|
+
logger.warning(f"Could not read watermark from S3: {e}")
|
|
355
|
+
return None
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
async def _run_maintainer(
|
|
359
|
+
snapshot: bool,
|
|
360
|
+
restore: bool,
|
|
361
|
+
check_and_restore: bool,
|
|
362
|
+
) -> int:
|
|
363
|
+
"""
|
|
364
|
+
Async entry point for the maintainer.
|
|
365
|
+
|
|
366
|
+
Returns exit code (0 for success, 1 for error).
|
|
367
|
+
"""
|
|
368
|
+
maintainer = UnloggedMaintainer()
|
|
369
|
+
|
|
370
|
+
try:
|
|
371
|
+
await maintainer.db.connect()
|
|
372
|
+
|
|
373
|
+
if snapshot:
|
|
374
|
+
logger.info("Pushing watermark snapshot to S3...")
|
|
375
|
+
result = await maintainer.push_watermark()
|
|
376
|
+
if "error" in result:
|
|
377
|
+
logger.error(f"Snapshot failed: {result['error']}")
|
|
378
|
+
return 1
|
|
379
|
+
logger.success("Watermark snapshot complete")
|
|
380
|
+
return 0
|
|
381
|
+
|
|
382
|
+
elif restore:
|
|
383
|
+
logger.info("Forcing kv_store rebuild...")
|
|
384
|
+
result = await maintainer.rebuild_with_lock()
|
|
385
|
+
if result.get("skipped"):
|
|
386
|
+
logger.warning(f"Rebuild skipped: {result.get('reason')}")
|
|
387
|
+
return 0
|
|
388
|
+
if result.get("success"):
|
|
389
|
+
logger.success(
|
|
390
|
+
f"Rebuild complete: {result['total_rows']} rows "
|
|
391
|
+
f"in {result['duration_ms']:.0f}ms"
|
|
392
|
+
)
|
|
393
|
+
return 0
|
|
394
|
+
return 1
|
|
395
|
+
|
|
396
|
+
elif check_and_restore:
|
|
397
|
+
logger.info("Checking if rebuild is needed...")
|
|
398
|
+
rebuilt = await maintainer.check_and_rebuild_if_needed()
|
|
399
|
+
if rebuilt:
|
|
400
|
+
logger.success("Rebuild completed successfully")
|
|
401
|
+
else:
|
|
402
|
+
logger.info("No rebuild was needed")
|
|
403
|
+
return 0
|
|
404
|
+
|
|
405
|
+
else:
|
|
406
|
+
# Default: check and restore
|
|
407
|
+
logger.info("No mode specified, defaulting to --check-and-restore")
|
|
408
|
+
await maintainer.check_and_rebuild_if_needed()
|
|
409
|
+
return 0
|
|
410
|
+
|
|
411
|
+
except Exception as e:
|
|
412
|
+
logger.exception(f"Maintainer failed: {e}")
|
|
413
|
+
return 1
|
|
414
|
+
|
|
415
|
+
finally:
|
|
416
|
+
await maintainer.db.disconnect()
|
|
417
|
+
|
|
418
|
+
|
|
419
|
+
@click.command()
|
|
420
|
+
@click.option(
|
|
421
|
+
'--snapshot',
|
|
422
|
+
is_flag=True,
|
|
423
|
+
help='Push current state to S3 watermark (for observability)',
|
|
424
|
+
)
|
|
425
|
+
@click.option(
|
|
426
|
+
'--restore',
|
|
427
|
+
is_flag=True,
|
|
428
|
+
help='Force rebuild kv_store from entity tables',
|
|
429
|
+
)
|
|
430
|
+
@click.option(
|
|
431
|
+
'--check-and-restore',
|
|
432
|
+
'check_and_restore',
|
|
433
|
+
is_flag=True,
|
|
434
|
+
help='Check if rebuild needed, restore if so (idempotent, default)',
|
|
435
|
+
)
|
|
436
|
+
def main(snapshot: bool, restore: bool, check_and_restore: bool):
|
|
437
|
+
"""
|
|
438
|
+
UNLOGGED Table Maintainer for REM.
|
|
439
|
+
|
|
440
|
+
Ensures kv_store and other UNLOGGED tables are rebuilt after
|
|
441
|
+
PostgreSQL restarts or failovers.
|
|
442
|
+
|
|
443
|
+
\b
|
|
444
|
+
Examples:
|
|
445
|
+
# Check and rebuild if needed (safe to run anytime)
|
|
446
|
+
python -m rem.workers.unlogged_maintainer --check-and-restore
|
|
447
|
+
|
|
448
|
+
# Force rebuild (useful for manual recovery)
|
|
449
|
+
python -m rem.workers.unlogged_maintainer --restore
|
|
450
|
+
|
|
451
|
+
# Push snapshot to S3 (for monitoring)
|
|
452
|
+
python -m rem.workers.unlogged_maintainer --snapshot
|
|
453
|
+
"""
|
|
454
|
+
# If no mode specified, default to check-and-restore
|
|
455
|
+
if not any([snapshot, restore, check_and_restore]):
|
|
456
|
+
check_and_restore = True
|
|
457
|
+
|
|
458
|
+
exit_code = asyncio.run(_run_maintainer(snapshot, restore, check_and_restore))
|
|
459
|
+
raise SystemExit(exit_code)
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
if __name__ == "__main__":
|
|
463
|
+
main()
|