remdb 0.2.6__py3-none-any.whl → 0.3.118__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (104) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +28 -22
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/mcp/tool_wrapper.py +29 -3
  9. rem/agentic/otel/setup.py +92 -4
  10. rem/agentic/providers/phoenix.py +32 -43
  11. rem/agentic/providers/pydantic_ai.py +168 -24
  12. rem/agentic/schema.py +358 -21
  13. rem/agentic/tools/rem_tools.py +3 -3
  14. rem/api/README.md +238 -1
  15. rem/api/deps.py +255 -0
  16. rem/api/main.py +154 -37
  17. rem/api/mcp_router/resources.py +1 -1
  18. rem/api/mcp_router/server.py +26 -5
  19. rem/api/mcp_router/tools.py +454 -7
  20. rem/api/middleware/tracking.py +172 -0
  21. rem/api/routers/admin.py +494 -0
  22. rem/api/routers/auth.py +124 -0
  23. rem/api/routers/chat/completions.py +152 -16
  24. rem/api/routers/chat/models.py +7 -3
  25. rem/api/routers/chat/sse_events.py +526 -0
  26. rem/api/routers/chat/streaming.py +608 -45
  27. rem/api/routers/dev.py +81 -0
  28. rem/api/routers/feedback.py +148 -0
  29. rem/api/routers/messages.py +473 -0
  30. rem/api/routers/models.py +78 -0
  31. rem/api/routers/query.py +360 -0
  32. rem/api/routers/shared_sessions.py +406 -0
  33. rem/auth/middleware.py +126 -27
  34. rem/cli/commands/README.md +237 -64
  35. rem/cli/commands/ask.py +15 -11
  36. rem/cli/commands/cluster.py +1300 -0
  37. rem/cli/commands/configure.py +170 -97
  38. rem/cli/commands/db.py +396 -139
  39. rem/cli/commands/experiments.py +278 -96
  40. rem/cli/commands/process.py +22 -15
  41. rem/cli/commands/scaffold.py +47 -0
  42. rem/cli/commands/schema.py +97 -50
  43. rem/cli/main.py +37 -6
  44. rem/config.py +2 -2
  45. rem/models/core/core_model.py +7 -1
  46. rem/models/core/rem_query.py +5 -2
  47. rem/models/entities/__init__.py +21 -0
  48. rem/models/entities/domain_resource.py +38 -0
  49. rem/models/entities/feedback.py +123 -0
  50. rem/models/entities/message.py +30 -1
  51. rem/models/entities/session.py +83 -0
  52. rem/models/entities/shared_session.py +180 -0
  53. rem/models/entities/user.py +10 -3
  54. rem/registry.py +373 -0
  55. rem/schemas/agents/rem.yaml +7 -3
  56. rem/services/content/providers.py +94 -140
  57. rem/services/content/service.py +115 -24
  58. rem/services/dreaming/affinity_service.py +2 -16
  59. rem/services/dreaming/moment_service.py +2 -15
  60. rem/services/embeddings/api.py +24 -17
  61. rem/services/embeddings/worker.py +16 -16
  62. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  63. rem/services/phoenix/client.py +252 -19
  64. rem/services/postgres/README.md +159 -15
  65. rem/services/postgres/__init__.py +2 -1
  66. rem/services/postgres/diff_service.py +531 -0
  67. rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
  68. rem/services/postgres/repository.py +132 -0
  69. rem/services/postgres/schema_generator.py +291 -9
  70. rem/services/postgres/service.py +6 -6
  71. rem/services/rate_limit.py +113 -0
  72. rem/services/rem/README.md +14 -0
  73. rem/services/rem/parser.py +44 -9
  74. rem/services/rem/service.py +36 -2
  75. rem/services/session/compression.py +17 -1
  76. rem/services/session/reload.py +1 -1
  77. rem/services/user_service.py +98 -0
  78. rem/settings.py +169 -22
  79. rem/sql/background_indexes.sql +21 -16
  80. rem/sql/migrations/001_install.sql +387 -54
  81. rem/sql/migrations/002_install_models.sql +2320 -393
  82. rem/sql/migrations/003_optional_extensions.sql +326 -0
  83. rem/sql/migrations/004_cache_system.sql +548 -0
  84. rem/utils/__init__.py +18 -0
  85. rem/utils/constants.py +97 -0
  86. rem/utils/date_utils.py +228 -0
  87. rem/utils/embeddings.py +17 -4
  88. rem/utils/files.py +167 -0
  89. rem/utils/mime_types.py +158 -0
  90. rem/utils/model_helpers.py +156 -1
  91. rem/utils/schema_loader.py +284 -21
  92. rem/utils/sql_paths.py +146 -0
  93. rem/utils/sql_types.py +3 -1
  94. rem/utils/vision.py +9 -14
  95. rem/workers/README.md +14 -14
  96. rem/workers/__init__.py +2 -1
  97. rem/workers/db_maintainer.py +74 -0
  98. rem/workers/unlogged_maintainer.py +463 -0
  99. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/METADATA +598 -171
  100. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/RECORD +102 -73
  101. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/WHEEL +1 -1
  102. rem/sql/002_install_models.sql +0 -1068
  103. rem/sql/install_models.sql +0 -1038
  104. {remdb-0.2.6.dist-info → remdb-0.3.118.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,172 @@
1
+ """
2
+ Anonymous User Tracking & Rate Limiting Middleware.
3
+
4
+ Handles:
5
+ 1. Anonymous Identity: Generates/Validates 'rem_anon_id' cookie.
6
+ 2. Context Injection: Sets request.state.anon_id.
7
+ 3. Rate Limiting: Enforces tenant-aware tiered limits via RateLimitService.
8
+ """
9
+
10
+ import hmac
11
+ import hashlib
12
+ import uuid
13
+ import secrets
14
+ from typing import Optional
15
+
16
+ from fastapi import Request, Response
17
+ from fastapi.responses import JSONResponse
18
+ from starlette.middleware.base import BaseHTTPMiddleware
19
+ from starlette.types import ASGIApp
20
+
21
+ from ...services.postgres.service import PostgresService
22
+ from ...services.rate_limit import RateLimitService
23
+ from ...models.entities.user import UserTier
24
+ from ...settings import settings
25
+
26
+
27
+ class AnonymousTrackingMiddleware(BaseHTTPMiddleware):
28
+ """
29
+ Middleware for anonymous user tracking and rate limiting.
30
+
31
+ Design Pattern:
32
+ - Uses a secure, signed cookie for anonymous ID.
33
+ - Enforces rate limits before request processing.
34
+ - Injects anon_id into request state.
35
+ """
36
+
37
+ def __init__(self, app: ASGIApp):
38
+ super().__init__(app)
39
+ # Secret for signing cookies (should be in settings, fallback for safety)
40
+ self.secret_key = settings.auth.session_secret or "fallback-secret-change-me"
41
+ self.cookie_name = "rem_anon_id"
42
+
43
+ # Dedicated DB service for this middleware (one pool per app instance)
44
+ self.db = PostgresService()
45
+ self.rate_limiter = RateLimitService(self.db)
46
+
47
+ # Excluded paths (health checks, static assets, auth callbacks)
48
+ self.excluded_paths = {
49
+ "/health",
50
+ "/docs",
51
+ "/openapi.json",
52
+ "/favicon.ico",
53
+ "/api/auth", # Don't rate limit auth flow heavily
54
+ }
55
+
56
+ async def dispatch(self, request: Request, call_next):
57
+ # 0. Skip excluded paths
58
+ if any(request.url.path.startswith(p) for p in self.excluded_paths):
59
+ return await call_next(request)
60
+
61
+ # 1. Lazy DB Connection
62
+ if not self.db.pool:
63
+ # Note: simple lazy init. In high concurrency startup, might trigger multiple connects
64
+ # followed by disconnects, but asyncpg pool handles this gracefully usually.
65
+ # Ideally hook into lifespan, but middleware is separate.
66
+ if settings.postgres.enabled:
67
+ await self.db.connect()
68
+
69
+ # 2. Identification (Cookie Strategy)
70
+ anon_id = request.cookies.get(self.cookie_name)
71
+ is_new_anon = False
72
+
73
+ if not anon_id or not self._validate_signature(anon_id):
74
+ anon_id = self._generate_signed_id()
75
+ is_new_anon = True
76
+
77
+ # Strip signature for internal use
78
+ raw_anon_id = anon_id.split(".")[0]
79
+ request.state.anon_id = raw_anon_id
80
+
81
+ # 3. Determine User Tier & ID for Rate Limiting
82
+ # Check if user is authenticated (set by AuthMiddleware usually, but that runs AFTER?)
83
+ # Actually middleware runs in reverse order of addition.
84
+ # If AuthMiddleware adds user to request.session, we might need to access session directly.
85
+ # request.user is standard.
86
+
87
+ user = getattr(request.state, "user", None)
88
+ if user:
89
+ # Authenticated User
90
+ identifier = user.get("id") # Assuming user dict or object
91
+ # Determine tier from user object
92
+ tier_str = user.get("tier", UserTier.FREE.value)
93
+ try:
94
+ tier = UserTier(tier_str)
95
+ except ValueError:
96
+ tier = UserTier.FREE
97
+ tenant_id = user.get("tenant_id", "default")
98
+ else:
99
+ # Anonymous User
100
+ identifier = raw_anon_id
101
+ tier = UserTier.ANONYMOUS
102
+ # Tenant ID from header or default
103
+ tenant_id = request.headers.get("X-Tenant-Id", "default")
104
+
105
+ # 4. Rate Limiting
106
+ if settings.postgres.enabled:
107
+ is_allowed, current, limit = await self.rate_limiter.check_rate_limit(
108
+ tenant_id=tenant_id,
109
+ identifier=identifier,
110
+ tier=tier
111
+ )
112
+
113
+ if not is_allowed:
114
+ return JSONResponse(
115
+ status_code=429,
116
+ content={
117
+ "error": {
118
+ "code": "rate_limit_exceeded",
119
+ "message": "You have exceeded your rate limit. Please sign in or upgrade to continue.",
120
+ "details": {
121
+ "limit": limit,
122
+ "tier": tier.value,
123
+ "retry_after": 60
124
+ }
125
+ }
126
+ },
127
+ headers={"Retry-After": "60"}
128
+ )
129
+
130
+ # 5. Process Request
131
+ response = await call_next(request)
132
+
133
+ # 6. Set Cookie if new
134
+ if is_new_anon:
135
+ response.set_cookie(
136
+ key=self.cookie_name,
137
+ value=anon_id,
138
+ max_age=31536000, # 1 year
139
+ httponly=True,
140
+ samesite="lax",
141
+ secure=settings.environment == "production"
142
+ )
143
+
144
+ # Add Rate Limit headers
145
+ if settings.postgres.enabled and 'limit' in locals():
146
+ response.headers["X-RateLimit-Limit"] = str(limit)
147
+ response.headers["X-RateLimit-Remaining"] = str(max(0, limit - current))
148
+
149
+ return response
150
+
151
+ def _generate_signed_id(self) -> str:
152
+ """Generate a UUID4 signed with HMAC."""
153
+ val = str(uuid.uuid4())
154
+ sig = hmac.new(
155
+ self.secret_key.encode(),
156
+ val.encode(),
157
+ hashlib.sha256
158
+ ).hexdigest()[:12] # Short signature
159
+ return f"{val}.{sig}"
160
+
161
+ def _validate_signature(self, signed_val: str) -> bool:
162
+ """Validate the HMAC signature."""
163
+ try:
164
+ val, sig = signed_val.split(".")
165
+ expected_sig = hmac.new(
166
+ self.secret_key.encode(),
167
+ val.encode(),
168
+ hashlib.sha256
169
+ ).hexdigest()[:12]
170
+ return secrets.compare_digest(sig, expected_sig)
171
+ except ValueError:
172
+ return False
@@ -0,0 +1,494 @@
1
+ """
2
+ Admin API Router.
3
+
4
+ Protected endpoints requiring admin role for system management tasks.
5
+
6
+ Endpoints:
7
+ GET /api/admin/users - List all users (admin only)
8
+ GET /api/admin/sessions - List all sessions across users (admin only)
9
+ GET /api/admin/messages - List all messages across users (admin only)
10
+ GET /api/admin/stats - System statistics (admin only)
11
+
12
+ Internal Endpoints (hidden from Swagger, secret-protected):
13
+ POST /api/admin/internal/rebuild-kv - Trigger kv_store rebuild (called by pg_net)
14
+
15
+ All endpoints require:
16
+ 1. Authentication (valid session)
17
+ 2. Admin role in user's roles list
18
+
19
+ Design Pattern:
20
+ - Uses require_admin dependency for role enforcement
21
+ - Cross-tenant queries (no user_id filtering)
22
+ - Audit logging for admin actions
23
+ - Internal endpoints use X-Internal-Secret header for authentication
24
+ """
25
+
26
+ import asyncio
27
+ import threading
28
+ from typing import Literal
29
+
30
+ from fastapi import APIRouter, Depends, Header, HTTPException, Query, BackgroundTasks
31
+ from loguru import logger
32
+ from pydantic import BaseModel
33
+
34
+ from ..deps import require_admin
35
+ from ...models.entities import Message, Session, SessionMode
36
+ from ...services.postgres import Repository
37
+ from ...settings import settings
38
+
39
+ router = APIRouter(prefix="/api/admin", tags=["admin"])
40
+
41
+ # =============================================================================
42
+ # Internal Router (hidden from Swagger)
43
+ # =============================================================================
44
+
45
+ internal_router = APIRouter(prefix="/internal", include_in_schema=False)
46
+
47
+
48
+ # =============================================================================
49
+ # Response Models
50
+ # =============================================================================
51
+
52
+
53
+ class UserSummary(BaseModel):
54
+ """User summary for admin listing."""
55
+
56
+ id: str
57
+ email: str | None
58
+ name: str | None
59
+ tier: str
60
+ role: str | None
61
+ created_at: str | None
62
+
63
+
64
+ class UserListResponse(BaseModel):
65
+ """Response for user list endpoint."""
66
+
67
+ object: Literal["list"] = "list"
68
+ data: list[UserSummary]
69
+ total: int
70
+ has_more: bool
71
+
72
+
73
+ class SessionListResponse(BaseModel):
74
+ """Response for session list endpoint."""
75
+
76
+ object: Literal["list"] = "list"
77
+ data: list[Session]
78
+ total: int
79
+ has_more: bool
80
+
81
+
82
+ class MessageListResponse(BaseModel):
83
+ """Response for message list endpoint."""
84
+
85
+ object: Literal["list"] = "list"
86
+ data: list[Message]
87
+ total: int
88
+ has_more: bool
89
+
90
+
91
+ class SystemStats(BaseModel):
92
+ """System statistics for admin dashboard."""
93
+
94
+ total_users: int
95
+ total_sessions: int
96
+ total_messages: int
97
+ active_sessions_24h: int
98
+ messages_24h: int
99
+
100
+
101
+ # =============================================================================
102
+ # Admin Endpoints
103
+ # =============================================================================
104
+
105
+
106
+ @router.get("/users", response_model=UserListResponse)
107
+ async def list_all_users(
108
+ user: dict = Depends(require_admin),
109
+ limit: int = Query(default=50, ge=1, le=100),
110
+ offset: int = Query(default=0, ge=0),
111
+ ) -> UserListResponse:
112
+ """
113
+ List all users in the system.
114
+
115
+ Admin-only endpoint for user management.
116
+ Returns users across all tenants.
117
+ """
118
+ if not settings.postgres.enabled:
119
+ raise HTTPException(status_code=503, detail="Database not enabled")
120
+
121
+ logger.info(f"Admin {user.get('email')} listing all users")
122
+
123
+ # Import User model dynamically to avoid circular imports
124
+ from ...models.entities import User
125
+
126
+ repo = Repository(User, table_name="users")
127
+
128
+ # No tenant filter - admin sees all
129
+ users = await repo.find(
130
+ filters={},
131
+ order_by="created_at DESC",
132
+ limit=limit + 1,
133
+ offset=offset,
134
+ )
135
+
136
+ has_more = len(users) > limit
137
+ if has_more:
138
+ users = users[:limit]
139
+
140
+ total = await repo.count({})
141
+
142
+ # Convert to summary format
143
+ summaries = [
144
+ UserSummary(
145
+ id=str(u.id),
146
+ email=u.email,
147
+ name=u.name,
148
+ tier=u.tier.value if u.tier else "free",
149
+ role=u.role,
150
+ created_at=u.created_at.isoformat() if u.created_at else None,
151
+ )
152
+ for u in users
153
+ ]
154
+
155
+ return UserListResponse(data=summaries, total=total, has_more=has_more)
156
+
157
+
158
+ @router.get("/sessions", response_model=SessionListResponse)
159
+ async def list_all_sessions(
160
+ user: dict = Depends(require_admin),
161
+ user_id: str | None = Query(default=None, description="Filter by user ID"),
162
+ mode: SessionMode | None = Query(default=None, description="Filter by mode"),
163
+ limit: int = Query(default=50, ge=1, le=100),
164
+ offset: int = Query(default=0, ge=0),
165
+ ) -> SessionListResponse:
166
+ """
167
+ List all sessions across all users.
168
+
169
+ Admin-only endpoint for session monitoring.
170
+ Can optionally filter by user_id or mode.
171
+ """
172
+ if not settings.postgres.enabled:
173
+ raise HTTPException(status_code=503, detail="Database not enabled")
174
+
175
+ logger.info(
176
+ f"Admin {user.get('email')} listing sessions "
177
+ f"(user_id={user_id}, mode={mode})"
178
+ )
179
+
180
+ repo = Repository(Session, table_name="sessions")
181
+
182
+ # Build optional filters
183
+ filters: dict = {}
184
+ if user_id:
185
+ filters["user_id"] = user_id
186
+ if mode:
187
+ filters["mode"] = mode.value
188
+
189
+ sessions = await repo.find(
190
+ filters=filters,
191
+ order_by="created_at DESC",
192
+ limit=limit + 1,
193
+ offset=offset,
194
+ )
195
+
196
+ has_more = len(sessions) > limit
197
+ if has_more:
198
+ sessions = sessions[:limit]
199
+
200
+ total = await repo.count(filters)
201
+
202
+ return SessionListResponse(data=sessions, total=total, has_more=has_more)
203
+
204
+
205
+ @router.get("/messages", response_model=MessageListResponse)
206
+ async def list_all_messages(
207
+ user: dict = Depends(require_admin),
208
+ user_id: str | None = Query(default=None, description="Filter by user ID"),
209
+ session_id: str | None = Query(default=None, description="Filter by session ID"),
210
+ message_type: str | None = Query(default=None, description="Filter by type"),
211
+ limit: int = Query(default=50, ge=1, le=100),
212
+ offset: int = Query(default=0, ge=0),
213
+ ) -> MessageListResponse:
214
+ """
215
+ List all messages across all users.
216
+
217
+ Admin-only endpoint for message auditing.
218
+ Can filter by user_id, session_id, or message_type.
219
+ """
220
+ if not settings.postgres.enabled:
221
+ raise HTTPException(status_code=503, detail="Database not enabled")
222
+
223
+ logger.info(
224
+ f"Admin {user.get('email')} listing messages "
225
+ f"(user_id={user_id}, session_id={session_id})"
226
+ )
227
+
228
+ repo = Repository(Message, table_name="messages")
229
+
230
+ # Build optional filters
231
+ filters: dict = {}
232
+ if user_id:
233
+ filters["user_id"] = user_id
234
+ if session_id:
235
+ filters["session_id"] = session_id
236
+ if message_type:
237
+ filters["message_type"] = message_type
238
+
239
+ messages = await repo.find(
240
+ filters=filters,
241
+ order_by="created_at DESC",
242
+ limit=limit + 1,
243
+ offset=offset,
244
+ )
245
+
246
+ has_more = len(messages) > limit
247
+ if has_more:
248
+ messages = messages[:limit]
249
+
250
+ total = await repo.count(filters)
251
+
252
+ return MessageListResponse(data=messages, total=total, has_more=has_more)
253
+
254
+
255
+ @router.get("/stats", response_model=SystemStats)
256
+ async def get_system_stats(
257
+ user: dict = Depends(require_admin),
258
+ ) -> SystemStats:
259
+ """
260
+ Get system-wide statistics.
261
+
262
+ Admin-only endpoint for monitoring dashboard.
263
+ """
264
+ if not settings.postgres.enabled:
265
+ raise HTTPException(status_code=503, detail="Database not enabled")
266
+
267
+ logger.info(f"Admin {user.get('email')} fetching system stats")
268
+
269
+ from ...models.entities import User
270
+ from ...utils.date_utils import days_ago
271
+
272
+ user_repo = Repository(User, table_name="users")
273
+ session_repo = Repository(Session, table_name="sessions")
274
+ message_repo = Repository(Message, table_name="messages")
275
+
276
+ # Get totals
277
+ total_users = await user_repo.count({})
278
+ total_sessions = await session_repo.count({})
279
+ total_messages = await message_repo.count({})
280
+
281
+ # For 24h stats, we'd need date filtering in Repository
282
+ # For now, return totals (TODO: add date range support)
283
+ return SystemStats(
284
+ total_users=total_users,
285
+ total_sessions=total_sessions,
286
+ total_messages=total_messages,
287
+ active_sessions_24h=0, # TODO: implement
288
+ messages_24h=0, # TODO: implement
289
+ )
290
+
291
+
292
+ # =============================================================================
293
+ # Internal Endpoints (hidden from Swagger, secret-protected)
294
+ # =============================================================================
295
+
296
+
297
+ class RebuildKVRequest(BaseModel):
298
+ """Request body for kv_store rebuild trigger."""
299
+
300
+ user_id: str | None = None
301
+ triggered_by: str = "api"
302
+ timestamp: str | None = None
303
+
304
+
305
+ class RebuildKVResponse(BaseModel):
306
+ """Response from kv_store rebuild trigger."""
307
+
308
+ status: Literal["submitted", "started", "skipped"]
309
+ message: str
310
+ job_method: str | None = None # "sqs" or "thread"
311
+
312
+
313
+ async def _get_internal_secret() -> str | None:
314
+ """
315
+ Get the internal API secret from cache_system_state table.
316
+
317
+ Returns None if the table doesn't exist or secret not found.
318
+ """
319
+ from ...services.postgres import get_postgres_service
320
+
321
+ db = get_postgres_service()
322
+ if not db:
323
+ return None
324
+
325
+ try:
326
+ await db.connect()
327
+ secret = await db.fetchval("SELECT rem_get_cache_api_secret()")
328
+ return secret
329
+ except Exception as e:
330
+ logger.warning(f"Could not get internal API secret: {e}")
331
+ return None
332
+ finally:
333
+ await db.disconnect()
334
+
335
+
336
+ async def _validate_internal_secret(x_internal_secret: str | None = Header(None)):
337
+ """
338
+ Dependency to validate the X-Internal-Secret header.
339
+
340
+ Raises 401 if secret is missing or invalid.
341
+ """
342
+ if not x_internal_secret:
343
+ logger.warning("Internal endpoint called without X-Internal-Secret header")
344
+ raise HTTPException(status_code=401, detail="Missing X-Internal-Secret header")
345
+
346
+ expected_secret = await _get_internal_secret()
347
+ if not expected_secret:
348
+ logger.error("Could not retrieve internal secret from database")
349
+ raise HTTPException(status_code=503, detail="Internal secret not configured")
350
+
351
+ if x_internal_secret != expected_secret:
352
+ logger.warning("Internal endpoint called with invalid secret")
353
+ raise HTTPException(status_code=401, detail="Invalid X-Internal-Secret")
354
+
355
+ return True
356
+
357
+
358
+ def _run_rebuild_in_thread():
359
+ """
360
+ Run the kv_store rebuild in a background thread.
361
+
362
+ This is the fallback when SQS is not available.
363
+ """
364
+
365
+ def rebuild_task():
366
+ """Thread target function."""
367
+ import asyncio
368
+ from ...workers.unlogged_maintainer import UnloggedMaintainer
369
+
370
+ async def _run():
371
+ maintainer = UnloggedMaintainer()
372
+ if not maintainer.db:
373
+ logger.error("Database not configured, cannot rebuild")
374
+ return
375
+ try:
376
+ await maintainer.db.connect()
377
+ await maintainer.rebuild_with_lock()
378
+ except Exception as e:
379
+ logger.error(f"Background rebuild failed: {e}")
380
+ finally:
381
+ await maintainer.db.disconnect()
382
+
383
+ # Create new event loop for this thread
384
+ loop = asyncio.new_event_loop()
385
+ asyncio.set_event_loop(loop)
386
+ try:
387
+ loop.run_until_complete(_run())
388
+ finally:
389
+ loop.close()
390
+
391
+ thread = threading.Thread(target=rebuild_task, name="kv-rebuild-worker")
392
+ thread.daemon = True
393
+ thread.start()
394
+ logger.info(f"Started background rebuild thread: {thread.name}")
395
+
396
+
397
+ def _submit_sqs_rebuild_job_sync(request: RebuildKVRequest) -> bool:
398
+ """
399
+ Submit rebuild job to SQS queue (synchronous).
400
+
401
+ Returns True if job was submitted, False if SQS unavailable.
402
+ """
403
+ import json
404
+
405
+ import boto3
406
+ from botocore.exceptions import ClientError
407
+
408
+ if not settings.sqs.queue_url:
409
+ logger.debug("SQS queue URL not configured, cannot submit SQS job")
410
+ return False
411
+
412
+ try:
413
+ sqs = boto3.client("sqs", region_name=settings.sqs.region)
414
+
415
+ message_body = {
416
+ "action": "rebuild_kv_store",
417
+ "user_id": request.user_id,
418
+ "triggered_by": request.triggered_by,
419
+ "timestamp": request.timestamp,
420
+ }
421
+
422
+ response = sqs.send_message(
423
+ QueueUrl=settings.sqs.queue_url,
424
+ MessageBody=json.dumps(message_body),
425
+ MessageAttributes={
426
+ "action": {"DataType": "String", "StringValue": "rebuild_kv_store"},
427
+ },
428
+ )
429
+
430
+ message_id = response.get("MessageId")
431
+ logger.info(f"Submitted rebuild job to SQS: {message_id}")
432
+ return True
433
+
434
+ except ClientError as e:
435
+ logger.warning(f"Failed to submit SQS job: {e}")
436
+ return False
437
+ except Exception as e:
438
+ logger.warning(f"SQS submission error: {e}")
439
+ return False
440
+
441
+
442
+ async def _submit_sqs_rebuild_job(request: RebuildKVRequest) -> bool:
443
+ """
444
+ Submit rebuild job to SQS queue (async wrapper).
445
+
446
+ Runs boto3 call in thread pool to avoid blocking event loop.
447
+ """
448
+ import asyncio
449
+
450
+ return await asyncio.to_thread(_submit_sqs_rebuild_job_sync, request)
451
+
452
+
453
+ @internal_router.post("/rebuild-kv", response_model=RebuildKVResponse)
454
+ async def trigger_kv_rebuild(
455
+ request: RebuildKVRequest,
456
+ _: bool = Depends(_validate_internal_secret),
457
+ ) -> RebuildKVResponse:
458
+ """
459
+ Trigger kv_store rebuild (internal endpoint, not shown in Swagger).
460
+
461
+ Called by pg_net from PostgreSQL when self-healing detects empty cache.
462
+ Authentication: X-Internal-Secret header must match secret in cache_system_state.
463
+
464
+ Priority:
465
+ 1. Submit job to SQS (if configured) - scales with KEDA
466
+ 2. Fallback to background thread - runs in same process
467
+
468
+ Note: This endpoint returns immediately. Rebuild happens asynchronously.
469
+ """
470
+ logger.info(
471
+ f"Rebuild kv_store requested by {request.triggered_by} "
472
+ f"(user_id={request.user_id})"
473
+ )
474
+
475
+ # Try SQS first
476
+ if await _submit_sqs_rebuild_job(request):
477
+ return RebuildKVResponse(
478
+ status="submitted",
479
+ message="Rebuild job submitted to SQS queue",
480
+ job_method="sqs",
481
+ )
482
+
483
+ # Fallback to background thread
484
+ _run_rebuild_in_thread()
485
+
486
+ return RebuildKVResponse(
487
+ status="started",
488
+ message="Rebuild started in background thread (SQS unavailable)",
489
+ job_method="thread",
490
+ )
491
+
492
+
493
+ # Include internal router in main router
494
+ router.include_router(internal_router)