remdb 0.2.6__py3-none-any.whl → 0.3.103__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (82) hide show
  1. rem/__init__.py +129 -2
  2. rem/agentic/README.md +76 -0
  3. rem/agentic/__init__.py +15 -0
  4. rem/agentic/agents/__init__.py +16 -2
  5. rem/agentic/agents/sse_simulator.py +500 -0
  6. rem/agentic/context.py +7 -5
  7. rem/agentic/llm_provider_models.py +301 -0
  8. rem/agentic/providers/phoenix.py +32 -43
  9. rem/agentic/providers/pydantic_ai.py +84 -10
  10. rem/api/README.md +238 -1
  11. rem/api/deps.py +255 -0
  12. rem/api/main.py +70 -22
  13. rem/api/mcp_router/server.py +8 -1
  14. rem/api/mcp_router/tools.py +80 -0
  15. rem/api/middleware/tracking.py +172 -0
  16. rem/api/routers/admin.py +277 -0
  17. rem/api/routers/auth.py +124 -0
  18. rem/api/routers/chat/completions.py +123 -14
  19. rem/api/routers/chat/models.py +7 -3
  20. rem/api/routers/chat/sse_events.py +526 -0
  21. rem/api/routers/chat/streaming.py +468 -45
  22. rem/api/routers/dev.py +81 -0
  23. rem/api/routers/feedback.py +455 -0
  24. rem/api/routers/messages.py +473 -0
  25. rem/api/routers/models.py +78 -0
  26. rem/api/routers/shared_sessions.py +406 -0
  27. rem/auth/middleware.py +126 -27
  28. rem/cli/commands/ask.py +15 -11
  29. rem/cli/commands/configure.py +169 -94
  30. rem/cli/commands/db.py +53 -7
  31. rem/cli/commands/experiments.py +278 -96
  32. rem/cli/commands/process.py +8 -7
  33. rem/cli/commands/scaffold.py +47 -0
  34. rem/cli/commands/schema.py +9 -9
  35. rem/cli/main.py +10 -0
  36. rem/config.py +2 -2
  37. rem/models/core/core_model.py +7 -1
  38. rem/models/entities/__init__.py +21 -0
  39. rem/models/entities/domain_resource.py +38 -0
  40. rem/models/entities/feedback.py +123 -0
  41. rem/models/entities/message.py +30 -1
  42. rem/models/entities/session.py +83 -0
  43. rem/models/entities/shared_session.py +206 -0
  44. rem/models/entities/user.py +10 -3
  45. rem/registry.py +367 -0
  46. rem/schemas/agents/rem.yaml +7 -3
  47. rem/services/content/providers.py +94 -140
  48. rem/services/content/service.py +85 -16
  49. rem/services/dreaming/affinity_service.py +2 -16
  50. rem/services/dreaming/moment_service.py +2 -15
  51. rem/services/embeddings/api.py +20 -13
  52. rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
  53. rem/services/phoenix/client.py +252 -19
  54. rem/services/postgres/README.md +29 -10
  55. rem/services/postgres/repository.py +132 -0
  56. rem/services/postgres/schema_generator.py +86 -5
  57. rem/services/rate_limit.py +113 -0
  58. rem/services/rem/README.md +14 -0
  59. rem/services/session/compression.py +17 -1
  60. rem/services/user_service.py +98 -0
  61. rem/settings.py +115 -17
  62. rem/sql/background_indexes.sql +10 -0
  63. rem/sql/migrations/001_install.sql +152 -2
  64. rem/sql/migrations/002_install_models.sql +580 -231
  65. rem/sql/migrations/003_seed_default_user.sql +48 -0
  66. rem/utils/constants.py +97 -0
  67. rem/utils/date_utils.py +228 -0
  68. rem/utils/embeddings.py +17 -4
  69. rem/utils/files.py +167 -0
  70. rem/utils/mime_types.py +158 -0
  71. rem/utils/model_helpers.py +156 -1
  72. rem/utils/schema_loader.py +273 -14
  73. rem/utils/sql_types.py +3 -1
  74. rem/utils/vision.py +9 -14
  75. rem/workers/README.md +14 -14
  76. rem/workers/db_maintainer.py +74 -0
  77. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/METADATA +486 -132
  78. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/RECORD +80 -57
  79. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/WHEEL +1 -1
  80. rem/sql/002_install_models.sql +0 -1068
  81. rem/sql/install_models.sql +0 -1038
  82. {remdb-0.2.6.dist-info → remdb-0.3.103.dist-info}/entry_points.txt +0 -0
@@ -335,3 +335,135 @@ class Repository(Generic[T]):
335
335
  row = await conn.fetchrow(sql, *params)
336
336
 
337
337
  return row[0] if row else 0
338
+
339
+ async def find_paginated(
340
+ self,
341
+ filters: dict[str, Any],
342
+ page: int = 1,
343
+ page_size: int = 50,
344
+ order_by: str = "created_at DESC",
345
+ partition_by: str | None = None,
346
+ ) -> dict[str, Any]:
347
+ """
348
+ Find records with page-based pagination using CTE with ROW_NUMBER().
349
+
350
+ Uses a CTE with ROW_NUMBER() OVER (PARTITION BY ... ORDER BY ...) for
351
+ efficient pagination with total count in a single query.
352
+
353
+ Args:
354
+ filters: Dict of field -> value filters (AND-ed together)
355
+ page: Page number (1-indexed)
356
+ page_size: Number of records per page
357
+ order_by: ORDER BY clause for row numbering (default: "created_at DESC")
358
+ partition_by: Optional field to partition by (e.g., "user_id").
359
+ If None, uses global row numbering.
360
+
361
+ Returns:
362
+ Dict containing:
363
+ - data: List of model instances for the page
364
+ - total: Total count of records matching filters
365
+ - page: Current page number
366
+ - page_size: Records per page
367
+ - total_pages: Total number of pages
368
+ - has_next: Whether there are more pages
369
+ - has_previous: Whether there are previous pages
370
+
371
+ Example:
372
+ result = await repo.find_paginated(
373
+ {"tenant_id": "acme", "user_id": "alice"},
374
+ page=2,
375
+ page_size=20,
376
+ order_by="created_at DESC",
377
+ partition_by="user_id"
378
+ )
379
+ # result = {
380
+ # "data": [...],
381
+ # "total": 150,
382
+ # "page": 2,
383
+ # "page_size": 20,
384
+ # "total_pages": 8,
385
+ # "has_next": True,
386
+ # "has_previous": True
387
+ # }
388
+ """
389
+ if not settings.postgres.enabled or not self.db:
390
+ logger.debug(f"Postgres disabled, returning empty {self.model_class.__name__} pagination")
391
+ return {
392
+ "data": [],
393
+ "total": 0,
394
+ "page": page,
395
+ "page_size": page_size,
396
+ "total_pages": 0,
397
+ "has_next": False,
398
+ "has_previous": False,
399
+ }
400
+
401
+ # Ensure connection
402
+ if not self.db.pool:
403
+ await self.db.connect()
404
+
405
+ # Type guard: ensure pool is not None after connect
406
+ if not self.db.pool:
407
+ raise RuntimeError("Failed to establish database connection")
408
+
409
+ # Build WHERE clause from filters
410
+ where_conditions = ["deleted_at IS NULL"]
411
+ params: list[Any] = []
412
+ param_idx = 1
413
+
414
+ for field, value in filters.items():
415
+ where_conditions.append(f"{field} = ${param_idx}")
416
+ params.append(value)
417
+ param_idx += 1
418
+
419
+ where_clause = " AND ".join(where_conditions)
420
+
421
+ # Build PARTITION BY clause
422
+ partition_clause = f"PARTITION BY {partition_by}" if partition_by else ""
423
+
424
+ # Build the CTE query with ROW_NUMBER() and COUNT() window functions
425
+ # This gives us pagination + total count in a single query
426
+ sql = f"""
427
+ WITH numbered AS (
428
+ SELECT *,
429
+ ROW_NUMBER() OVER ({partition_clause} ORDER BY {order_by}) as _row_num,
430
+ COUNT(*) OVER ({partition_clause}) as _total_count
431
+ FROM {self.table_name}
432
+ WHERE {where_clause}
433
+ )
434
+ SELECT * FROM numbered
435
+ WHERE _row_num > ${param_idx} AND _row_num <= ${param_idx + 1}
436
+ ORDER BY _row_num
437
+ """
438
+
439
+ # Calculate row range for the page
440
+ start_row = (page - 1) * page_size
441
+ end_row = page * page_size
442
+ params.extend([start_row, end_row])
443
+
444
+ async with self.db.pool.acquire() as conn:
445
+ rows = await conn.fetch(sql, *params)
446
+
447
+ # Extract total from first row (all rows have the same _total_count)
448
+ total = rows[0]["_total_count"] if rows else 0
449
+
450
+ # Remove internal columns and convert to models
451
+ data = []
452
+ for row in rows:
453
+ row_dict = dict(row)
454
+ row_dict.pop("_row_num", None)
455
+ row_dict.pop("_total_count", None)
456
+ data.append(self.model_class.model_validate(row_dict))
457
+
458
+ # Calculate pagination metadata
459
+ total_pages = (total + page_size - 1) // page_size if total > 0 else 0
460
+
461
+ return {
462
+ "data": data,
463
+ "total": total,
464
+ "page": page,
465
+ "page_size": page_size,
466
+ "total_pages": total_pages,
467
+ "has_next": page < total_pages,
468
+ "has_previous": page > 1,
469
+ }
@@ -1,7 +1,12 @@
1
1
  """
2
2
  Schema generation utility from Pydantic models.
3
3
 
4
- Scans a directory of Pydantic models and generates complete database schemas including:
4
+ Generates complete database schemas from:
5
+ 1. REM's core models (Resource, Moment, User, etc.)
6
+ 2. Models registered via rem.register_model() or rem.register_models()
7
+ 3. Models discovered from a directory scan
8
+
9
+ Output includes:
5
10
  - Primary tables
6
11
  - Embeddings tables
7
12
  - KV_STORE triggers
@@ -11,8 +16,12 @@ Scans a directory of Pydantic models and generates complete database schemas inc
11
16
  Usage:
12
17
  from rem.services.postgres.schema_generator import SchemaGenerator
13
18
 
19
+ # Generate from registry (includes core + registered models)
14
20
  generator = SchemaGenerator()
15
- schema = generator.generate_from_directory("src/rem/models/entities")
21
+ schema = await generator.generate_from_registry()
22
+
23
+ # Or generate from directory (legacy)
24
+ schema = await generator.generate_from_directory("src/rem/models/entities")
16
25
 
17
26
  # Write to file
18
27
  with open("src/rem/sql/schema.sql", "w") as f:
@@ -228,12 +237,65 @@ class SchemaGenerator:
228
237
  self.schemas[table_name] = schema
229
238
  return schema
230
239
 
240
+ async def generate_from_registry(
241
+ self, output_file: str | None = None, include_core: bool = True
242
+ ) -> str:
243
+ """
244
+ Generate complete schema from the model registry.
245
+
246
+ Includes:
247
+ 1. REM's core models (if include_core=True)
248
+ 2. Models registered via rem.register_model() or rem.register_models()
249
+
250
+ Args:
251
+ output_file: Optional output file path (relative to output_dir)
252
+ include_core: If True, include REM's core models (default: True)
253
+
254
+ Returns:
255
+ Complete SQL schema as string
256
+
257
+ Example:
258
+ import rem
259
+ from rem.models.core import CoreModel
260
+
261
+ # Register custom model
262
+ @rem.register_model
263
+ class CustomEntity(CoreModel):
264
+ name: str
265
+
266
+ # Generate schema (includes core + custom)
267
+ generator = SchemaGenerator()
268
+ schema = await generator.generate_from_registry()
269
+ """
270
+ from ...registry import get_model_registry
271
+
272
+ registry = get_model_registry()
273
+ models = registry.get_models(include_core=include_core)
274
+
275
+ logger.info(f"Generating schema from registry: {len(models)} models")
276
+
277
+ # Generate schemas for each model
278
+ for model_name, ext in models.items():
279
+ await self.generate_schema_for_model(
280
+ ext.model,
281
+ table_name=ext.table_name,
282
+ entity_key_field=ext.entity_key_field,
283
+ )
284
+
285
+ return self._generate_sql_output(
286
+ source="model registry",
287
+ output_file=output_file,
288
+ )
289
+
231
290
  async def generate_from_directory(
232
291
  self, directory: str | Path, output_file: str | None = None
233
292
  ) -> str:
234
293
  """
235
294
  Generate complete schema from all models in a directory.
236
295
 
296
+ Note: For most use cases, prefer generate_from_registry() which uses
297
+ the model registry pattern.
298
+
237
299
  Args:
238
300
  directory: Path to directory with Pydantic models
239
301
  output_file: Optional output file path (relative to output_dir)
@@ -248,12 +310,31 @@ class SchemaGenerator:
248
310
  for model_name, model in models.items():
249
311
  await self.generate_schema_for_model(model)
250
312
 
251
- # Combine into single SQL file
313
+ return self._generate_sql_output(
314
+ source=f"directory: {directory}",
315
+ output_file=output_file,
316
+ )
317
+
318
+ def _generate_sql_output(
319
+ self, source: str, output_file: str | None = None
320
+ ) -> str:
321
+ """
322
+ Generate SQL output from accumulated schemas.
323
+
324
+ Args:
325
+ source: Description of schema source (for header comment)
326
+ output_file: Optional output file path (relative to output_dir)
327
+
328
+ Returns:
329
+ Complete SQL schema as string
330
+ """
331
+ import datetime
332
+
252
333
  sql_parts = [
253
334
  "-- REM Model Schema (install_models.sql)",
254
335
  "-- Generated from Pydantic models",
255
- f"-- Source directory: {directory}",
256
- "-- Generated at: " + __import__("datetime").datetime.now().isoformat(),
336
+ f"-- Source: {source}",
337
+ f"-- Generated at: {datetime.datetime.now().isoformat()}",
257
338
  "--",
258
339
  "-- DO NOT EDIT MANUALLY - Regenerate with: rem db schema generate",
259
340
  "--",
@@ -0,0 +1,113 @@
1
+ """
2
+ Rate Limit Service - Postgres-backed rate limiting.
3
+
4
+ Implements tenant-aware, tiered rate limiting using PostgreSQL UNLOGGED tables
5
+ for high performance. Supports monthly quotas and short-term burst limits.
6
+ """
7
+
8
+ import random
9
+ from datetime import datetime, timezone
10
+ from enum import Enum
11
+ from typing import Optional
12
+
13
+ from loguru import logger
14
+
15
+ from ..models.entities.user import UserTier
16
+ from .postgres.service import PostgresService
17
+
18
+
19
+ class RateLimitService:
20
+ """
21
+ Service for tracking and enforcing API rate limits.
22
+
23
+ Uses an UNLOGGED table `rate_limits` for performance.
24
+ Note: Counts in UNLOGGED tables may be lost on database crash/restart.
25
+ """
26
+
27
+ def __init__(self, db: PostgresService):
28
+ self.db = db
29
+
30
+ # Rate limits configuration
31
+ # Format: (limit, period_seconds)
32
+ # This is a simple implementation. In production, move to settings.
33
+ self.TIER_CONFIG = {
34
+ UserTier.ANONYMOUS: {"limit": 1000, "period": 3600}, # 1000/hour (for testing)
35
+ UserTier.FREE: {"limit": 50, "period": 2592000}, # 50/month (~30 days)
36
+ UserTier.BASIC: {"limit": 10000, "period": 2592000}, # 10k/month
37
+ UserTier.PRO: {"limit": 100000, "period": 2592000}, # 100k/month
38
+ }
39
+
40
+ async def check_rate_limit(
41
+ self,
42
+ tenant_id: str,
43
+ identifier: str,
44
+ tier: UserTier
45
+ ) -> tuple[bool, int, int]:
46
+ """
47
+ Check if request is allowed under the rate limit.
48
+
49
+ Args:
50
+ tenant_id: Tenant identifier
51
+ identifier: User ID or Anonymous ID
52
+ tier: User subscription tier
53
+
54
+ Returns:
55
+ Tuple (is_allowed, current_count, limit)
56
+ """
57
+ config = self.TIER_CONFIG.get(tier, self.TIER_CONFIG[UserTier.FREE])
58
+ limit = config["limit"]
59
+ period = config["period"]
60
+
61
+ # Construct time-window key
62
+ now = datetime.now(timezone.utc)
63
+
64
+ if period >= 2592000: # Monthly
65
+ time_key = now.strftime("%Y-%m")
66
+ elif period >= 86400: # Daily
67
+ time_key = now.strftime("%Y-%m-%d")
68
+ elif period >= 3600: # Hourly
69
+ time_key = now.strftime("%Y-%m-%d-%H")
70
+ else: # Minute/Second (fallback)
71
+ time_key = int(now.timestamp() / period)
72
+
73
+ key = f"{tenant_id}:{identifier}:{tier.value}:{time_key}"
74
+
75
+ # Calculate expiry (for cleanup)
76
+ expires_at = now.timestamp() + period
77
+
78
+ # Atomic UPSERT to increment counter
79
+ # Returns the new count
80
+ query = """
81
+ INSERT INTO rate_limits (key, count, expires_at)
82
+ VALUES ($1, 1, to_timestamp($2))
83
+ ON CONFLICT (key) DO UPDATE
84
+ SET count = rate_limits.count + 1
85
+ RETURNING count;
86
+ """
87
+
88
+ try:
89
+ count = await self.db.fetchval(query, key, expires_at)
90
+ except Exception as e:
91
+ logger.error(f"Rate limit check failed: {e}")
92
+ # Fail open to avoid blocking users on DB error
93
+ return True, 0, limit
94
+
95
+ is_allowed = count <= limit
96
+
97
+ # Probabilistic cleanup (1% chance)
98
+ if random.random() < 0.01:
99
+ await self.cleanup_expired()
100
+
101
+ return is_allowed, count, limit
102
+
103
+ async def cleanup_expired(self):
104
+ """Remove expired rate limit keys."""
105
+ try:
106
+ # Use a small limit to avoid locking/long queries
107
+ query = """
108
+ DELETE FROM rate_limits
109
+ WHERE expires_at < NOW()
110
+ """
111
+ await self.db.execute(query)
112
+ except Exception as e:
113
+ logger.warning(f"Rate limit cleanup failed: {e}")
@@ -302,3 +302,17 @@ See `tests/integration/test_rem_query_evolution.py` for stage-based validation a
302
302
  * **Unified View**: The underlying SQL function `rem_traverse` uses a view `all_graph_edges` that unions `graph_edges` from all entity tables (`resources`, `moments`, `users`, etc.). This enables polymorphic traversal without complex joins in the application layer.
303
303
  * **KV Store**: Edge destinations (`dst`) are resolved to entity IDs using the `kv_store`. This requires that all traversable entities have an entry in the `kv_store` (handled automatically by database triggers).
304
304
  * **Iterated Retrieval**: REM is architected for multi-turn retrieval where LLMs conduct conversational database exploration. Each query informs the next, enabling emergent information discovery without requiring upfront schema knowledge.
305
+
306
+ ## Scaling & Architectural Decisions
307
+
308
+ ### 1. Hybrid Adjacency List
309
+ REM implements a **Hybrid Adjacency List** pattern to balance strict relational guarantees with graph flexibility:
310
+ * **Primary Storage (Source of Truth):** Standard PostgreSQL tables (`resources`, `moments`, etc.) enforce schema validation, constraints, and type safety.
311
+ * **Graph Overlay:** Relationships are stored as "inline edges" within a JSONB column (`graph_edges`) on each entity.
312
+ * **Performance Layer:** A denormalized `UNLOGGED` table (`kv_store`) acts as a high-speed cache, mapping human-readable keys to internal UUIDs and edges. This avoids the traditional "join bomb" of traversing normalized SQL tables while avoiding the operational complexity of a separate graph database (e.g., Neo4j).
313
+
314
+ ### 2. The Pareto Principle in Graph Algorithms
315
+ We explicitly choose **Simplicity over Full-Scale Graph Analytics**.
316
+ * **Hypothesis:** For LLM Agent workloads, 80% of the value is derived from **local context retrieval** (1-3 hops via `LOOKUP` and `TRAVERSE`).
317
+ * **Diminishing Returns:** Global graph algorithms (PageRank, Community Detection) offer diminishing returns for real-time agentic retrieval tasks. Agents typically need to answer specific questions ("Who worked on file X?"), which is a local neighborhood problem, not a global cluster analysis problem.
318
+ * **Future Scaling:** If deeper analysis is needed, we prefer **Graph + Vector (RAG)** approaches (using semantic similarity to find implicit links) over complex explicit graph algorithms.
@@ -14,6 +14,21 @@ from typing import Any
14
14
 
15
15
  from loguru import logger
16
16
 
17
+ # Max length for entity keys (kv_store.entity_key is varchar(255))
18
+ MAX_ENTITY_KEY_LENGTH = 255
19
+
20
+
21
+ def truncate_key(key: str, max_length: int = MAX_ENTITY_KEY_LENGTH) -> str:
22
+ """Truncate a key to max length, preserving useful suffix if possible."""
23
+ if len(key) <= max_length:
24
+ return key
25
+ # Keep first part and add hash suffix for uniqueness
26
+ import hashlib
27
+ hash_suffix = hashlib.md5(key.encode()).hexdigest()[:8]
28
+ truncated = key[:max_length - 9] + "-" + hash_suffix
29
+ logger.warning(f"Truncated key from {len(key)} to {len(truncated)} chars: {key[:50]}...")
30
+ return truncated
31
+
17
32
  from rem.models.entities import Message
18
33
  from rem.services.postgres import PostgresService, Repository
19
34
  from rem.settings import settings
@@ -151,7 +166,8 @@ class SessionMessageStore:
151
166
  return f"msg-{message_index}"
152
167
 
153
168
  # Create entity key for REM LOOKUP: session-{session_id}-msg-{index}
154
- entity_key = f"session-{session_id}-msg-{message_index}"
169
+ # Truncate to avoid exceeding kv_store.entity_key varchar(255) limit
170
+ entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
155
171
 
156
172
  # Create Message entity for assistant response
157
173
  msg = Message(
@@ -0,0 +1,98 @@
1
+ """
2
+ User Service - User account management.
3
+
4
+ Handles user creation, profile updates, and session linking.
5
+ """
6
+
7
+ from datetime import datetime
8
+ from typing import Optional
9
+
10
+ from loguru import logger
11
+
12
+ from ..models.entities.user import User, UserTier
13
+ from .postgres.repository import Repository
14
+ from .postgres.service import PostgresService
15
+
16
+
17
+ class UserService:
18
+ """
19
+ Service for managing user accounts and sessions.
20
+ """
21
+
22
+ def __init__(self, db: PostgresService):
23
+ self.db = db
24
+ self.repo = Repository(User, "users", db=db)
25
+
26
+ async def get_or_create_user(
27
+ self,
28
+ email: str,
29
+ tenant_id: str = "default",
30
+ name: str = "New User",
31
+ avatar_url: Optional[str] = None,
32
+ ) -> User:
33
+ """
34
+ Get existing user by email or create a new one.
35
+ """
36
+ users = await self.repo.find(filters={"email": email}, limit=1)
37
+
38
+ if users:
39
+ user = users[0]
40
+ # Update profile if needed (e.g., name/avatar from OAuth)
41
+ updated = False
42
+ if name and user.name == "New User": # Only update if placeholder
43
+ user.name = name
44
+ updated = True
45
+
46
+ # Store avatar in metadata if provided
47
+ if avatar_url:
48
+ user.metadata = user.metadata or {}
49
+ if user.metadata.get("avatar_url") != avatar_url:
50
+ user.metadata["avatar_url"] = avatar_url
51
+ updated = True
52
+
53
+ if updated:
54
+ user.updated_at = datetime.utcnow()
55
+ await self.repo.upsert(user)
56
+
57
+ return user
58
+
59
+ # Create new user
60
+ user = User(
61
+ tenant_id=tenant_id,
62
+ user_id=email, # Use email as user_id for now? Or UUID?
63
+ # The User model has 'user_id' field but also 'id' UUID.
64
+ # Usually user_id is the external ID or email.
65
+ name=name,
66
+ email=email,
67
+ tier=UserTier.FREE,
68
+ created_at=datetime.utcnow(),
69
+ updated_at=datetime.utcnow(),
70
+ metadata={"avatar_url": avatar_url} if avatar_url else {},
71
+ )
72
+ await self.repo.upsert(user)
73
+ logger.info(f"Created new user: {email}")
74
+ return user
75
+
76
+ async def link_anonymous_session(self, user: User, anon_id: str) -> None:
77
+ """
78
+ Link an anonymous session ID to a user account.
79
+
80
+ This allows merging history from the anonymous session into the user's profile.
81
+ """
82
+ if not anon_id:
83
+ return
84
+
85
+ # Check if already linked
86
+ if anon_id in user.anonymous_ids:
87
+ return
88
+
89
+ # Add to list
90
+ user.anonymous_ids.append(anon_id)
91
+ user.updated_at = datetime.utcnow()
92
+
93
+ # Save
94
+ await self.repo.upsert(user)
95
+ logger.info(f"Linked anonymous session {anon_id} to user {user.email}")
96
+
97
+ # TODO: Migrate/Merge actual data (rate limit counts, history) if needed.
98
+ # For now, we just link the IDs so future queries can include data from this anon_id.