remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (44) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +126 -15
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +122 -43
  7. rem/agentic/schema.py +4 -1
  8. rem/api/mcp_router/tools.py +13 -2
  9. rem/api/routers/chat/completions.py +250 -4
  10. rem/api/routers/chat/models.py +81 -7
  11. rem/api/routers/chat/otel_utils.py +33 -0
  12. rem/api/routers/chat/sse_events.py +17 -1
  13. rem/api/routers/chat/streaming.py +35 -1
  14. rem/api/routers/feedback.py +134 -14
  15. rem/auth/middleware.py +66 -1
  16. rem/cli/commands/cluster.py +590 -82
  17. rem/cli/commands/configure.py +3 -4
  18. rem/cli/commands/experiments.py +468 -76
  19. rem/cli/commands/session.py +336 -0
  20. rem/cli/dreaming.py +2 -2
  21. rem/cli/main.py +2 -0
  22. rem/config.py +8 -1
  23. rem/models/core/experiment.py +58 -14
  24. rem/models/entities/ontology.py +1 -1
  25. rem/models/entities/ontology_config.py +1 -1
  26. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  27. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  28. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  29. rem/services/phoenix/client.py +59 -18
  30. rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
  31. rem/services/session/compression.py +7 -0
  32. rem/settings.py +260 -17
  33. rem/sql/migrations/002_install_models.sql +91 -91
  34. rem/sql/migrations/004_cache_system.sql +1 -1
  35. rem/utils/README.md +45 -0
  36. rem/utils/files.py +157 -1
  37. rem/utils/schema_loader.py +94 -3
  38. rem/utils/vision.py +1 -1
  39. rem/workers/__init__.py +2 -1
  40. rem/workers/db_listener.py +579 -0
  41. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
  42. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
  43. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
  44. {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
@@ -793,40 +793,72 @@ class PhoenixClient:
793
793
  score: float | None = None,
794
794
  explanation: str | None = None,
795
795
  metadata: dict[str, Any] | None = None,
796
+ trace_id: str | None = None,
796
797
  ) -> str | None:
797
- """Add feedback annotation to a span.
798
+ """Add feedback annotation to a span via Phoenix REST API.
799
+
800
+ Uses direct HTTP POST to /v1/span_annotations for reliability
801
+ (Phoenix Python client API changes frequently).
798
802
 
799
803
  Args:
800
- span_id: Span ID to annotate
804
+ span_id: Span ID to annotate (hex string)
801
805
  annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
802
806
  annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
803
807
  label: Optional label (e.g., "correct", "incorrect", "helpful")
804
808
  score: Optional numeric score (0.0-1.0)
805
809
  explanation: Optional explanation text
806
810
  metadata: Optional additional metadata dict
811
+ trace_id: Optional trace ID (used if span lookup needed)
807
812
 
808
813
  Returns:
809
814
  Annotation ID if successful, None otherwise
810
815
  """
816
+ import httpx
817
+
811
818
  try:
812
- result = self._client.add_span_annotation( # type: ignore[attr-defined]
813
- span_id=span_id,
814
- name=annotation_name,
815
- annotator_kind=annotator_kind,
816
- label=label,
817
- score=score,
818
- explanation=explanation,
819
- metadata=metadata,
820
- )
819
+ # Build annotation payload for Phoenix REST API
820
+ annotation_data = {
821
+ "span_id": span_id,
822
+ "name": annotation_name,
823
+ "annotator_kind": annotator_kind,
824
+ "result": {
825
+ "label": label,
826
+ "score": score,
827
+ "explanation": explanation,
828
+ },
829
+ "metadata": metadata or {},
830
+ }
821
831
 
822
- annotation_id = getattr(result, "id", None) if result else None
823
- logger.info(f"Added {annotator_kind} feedback to span {span_id} -> {annotation_id}")
832
+ # Add trace_id if provided
833
+ if trace_id:
834
+ annotation_data["trace_id"] = trace_id
835
+
836
+ # POST to Phoenix REST API
837
+ annotations_endpoint = f"{self.config.base_url}/v1/span_annotations"
838
+ headers = {}
839
+ if self.config.api_key:
840
+ headers["Authorization"] = f"Bearer {self.config.api_key}"
841
+
842
+ with httpx.Client(timeout=5.0) as client:
843
+ response = client.post(
844
+ annotations_endpoint,
845
+ json={"data": [annotation_data]},
846
+ headers=headers,
847
+ )
848
+ response.raise_for_status()
824
849
 
825
- return annotation_id
850
+ logger.info(f"Added {annotator_kind} feedback to span {span_id}")
851
+ return span_id # Return span_id as annotation reference
826
852
 
853
+ except httpx.HTTPStatusError as e:
854
+ logger.error(
855
+ f"Failed to add span feedback (HTTP {e.response.status_code}): "
856
+ f"{e.response.text if hasattr(e, 'response') else 'N/A'}"
857
+ )
858
+ return None
827
859
  except Exception as e:
828
860
  logger.error(f"Failed to add span feedback: {e}")
829
- raise
861
+ return None
830
862
 
831
863
  def sync_user_feedback(
832
864
  self,
@@ -835,6 +867,7 @@ class PhoenixClient:
835
867
  categories: list[str] | None = None,
836
868
  comment: str | None = None,
837
869
  feedback_id: str | None = None,
870
+ trace_id: str | None = None,
838
871
  ) -> str | None:
839
872
  """Sync user feedback to Phoenix as a span annotation.
840
873
 
@@ -847,6 +880,7 @@ class PhoenixClient:
847
880
  categories: List of feedback categories
848
881
  comment: Free-text comment
849
882
  feedback_id: Optional REM feedback ID for reference
883
+ trace_id: Optional trace ID for the span
850
884
 
851
885
  Returns:
852
886
  Phoenix annotation ID if successful
@@ -860,12 +894,18 @@ class PhoenixClient:
860
894
  ... )
861
895
  """
862
896
  # Convert rating to 0-1 score
897
+ # Rating scheme:
898
+ # -1 = thumbs down → score 0.0
899
+ # 1 = thumbs up → score 1.0
900
+ # 2-5 = star rating → normalized to 0-1 range
863
901
  score = None
864
902
  if rating is not None:
865
903
  if rating == -1:
866
904
  score = 0.0
867
- elif 1 <= rating <= 5:
868
- score = rating / 5.0
905
+ elif rating == 1:
906
+ score = 1.0 # Thumbs up
907
+ elif 2 <= rating <= 5:
908
+ score = (rating - 1) / 4.0 # 2→0.25, 3→0.5, 4→0.75, 5→1.0
869
909
 
870
910
  # Use primary category as label
871
911
  label = categories[0] if categories else None
@@ -880,7 +920,7 @@ class PhoenixClient:
880
920
  explanation = f"Categories: {cats_str}"
881
921
 
882
922
  # Build metadata
883
- metadata = {
923
+ metadata: dict[str, Any] = {
884
924
  "rating": rating,
885
925
  "categories": categories or [],
886
926
  }
@@ -895,6 +935,7 @@ class PhoenixClient:
895
935
  score=score,
896
936
  explanation=explanation,
897
937
  metadata=metadata,
938
+ trace_id=trace_id,
898
939
  )
899
940
 
900
941
  def get_span_annotations(
@@ -513,18 +513,15 @@ def get_target_metadata() -> MetaData:
513
513
  """
514
514
  Get SQLAlchemy metadata for Alembic autogenerate.
515
515
 
516
- This is the main entry point used by alembic/env.py.
516
+ This is the main entry point used by alembic/env.py and rem db diff.
517
+
518
+ Uses the model registry as the source of truth, which includes:
519
+ - Core REM models (Resource, Message, User, etc.)
520
+ - User-registered models via @rem.register_model decorator
517
521
 
518
522
  Returns:
519
- SQLAlchemy MetaData object representing current Pydantic models
523
+ SQLAlchemy MetaData object representing all registered Pydantic models
520
524
  """
521
- import rem
522
-
523
- package_root = Path(rem.__file__).parent.parent.parent
524
- models_dir = package_root / "src" / "rem" / "models" / "entities"
525
-
526
- if not models_dir.exists():
527
- logger.error(f"Models directory not found: {models_dir}")
528
- return MetaData()
529
-
530
- return build_sqlalchemy_metadata_from_pydantic(models_dir)
525
+ # build_sqlalchemy_metadata_from_pydantic uses the registry internally,
526
+ # so no directory path is needed (the parameter is kept for backwards compat)
527
+ return build_sqlalchemy_metadata_from_pydantic()
@@ -170,12 +170,16 @@ class SessionMessageStore:
170
170
  entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
171
171
 
172
172
  # Create Message entity for assistant response
173
+ # Use pre-generated id from message dict if available (for frontend feedback)
173
174
  msg = Message(
175
+ id=message.get("id"), # Use pre-generated ID if provided
174
176
  content=message.get("content", ""),
175
177
  message_type=message.get("role", "assistant"),
176
178
  session_id=session_id,
177
179
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
178
180
  user_id=user_id or self.user_id,
181
+ trace_id=message.get("trace_id"),
182
+ span_id=message.get("span_id"),
179
183
  metadata={
180
184
  "message_index": message_index,
181
185
  "entity_key": entity_key, # Store entity key for LOOKUP
@@ -284,11 +288,14 @@ class SessionMessageStore:
284
288
  # Short assistant messages, user messages, and system messages stored as-is
285
289
  # Store ALL messages in database for full audit trail
286
290
  msg = Message(
291
+ id=message.get("id"), # Use pre-generated ID if provided
287
292
  content=content,
288
293
  message_type=message.get("role", "user"),
289
294
  session_id=session_id,
290
295
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
291
296
  user_id=user_id or self.user_id,
297
+ trace_id=message.get("trace_id"),
298
+ span_id=message.get("span_id"),
292
299
  metadata={
293
300
  "message_index": idx,
294
301
  "timestamp": message.get("timestamp"),
rem/settings.py CHANGED
@@ -21,8 +21,8 @@ Example .env file:
21
21
  LLM__OPENAI_API_KEY=sk-...
22
22
  LLM__ANTHROPIC_API_KEY=sk-ant-...
23
23
 
24
- # Database (port 5050 for Docker Compose)
25
- POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
24
+ # Database (port 5051 for Docker Compose prebuilt, 5050 for local dev)
25
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5051/rem
26
26
  POSTGRES__POOL_MIN_SIZE=5
27
27
  POSTGRES__POOL_MAX_SIZE=20
28
28
  POSTGRES__STATEMENT_TIMEOUT=30000
@@ -33,14 +33,15 @@ Example .env file:
33
33
  AUTH__OIDC_CLIENT_ID=your-client-id
34
34
  AUTH__SESSION_SECRET=your-secret-key
35
35
 
36
- # OpenTelemetry (disabled by default)
36
+ # OpenTelemetry (disabled by default - enable via env var when collector available)
37
+ # Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
37
38
  OTEL__ENABLED=false
38
39
  OTEL__SERVICE_NAME=rem-api
39
- OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
40
- OTEL__PROTOCOL=http
40
+ OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
41
+ OTEL__PROTOCOL=grpc
41
42
 
42
- # Arize Phoenix (disabled by default)
43
- PHOENIX__ENABLED=false
43
+ # Arize Phoenix (enabled by default - can be disabled via env var)
44
+ PHOENIX__ENABLED=true
44
45
  PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
45
46
  PHOENIX__PROJECT_NAME=rem
46
47
 
@@ -241,6 +242,11 @@ class OTELSettings(BaseSettings):
241
242
  description="Export timeout in milliseconds",
242
243
  )
243
244
 
245
+ insecure: bool = Field(
246
+ default=True,
247
+ description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
248
+ )
249
+
244
250
 
245
251
  class PhoenixSettings(BaseSettings):
246
252
  """
@@ -267,8 +273,8 @@ class PhoenixSettings(BaseSettings):
267
273
  )
268
274
 
269
275
  enabled: bool = Field(
270
- default=False,
271
- description="Enable Phoenix integration (disabled by default for local dev)",
276
+ default=True,
277
+ description="Enable Phoenix integration (enabled by default)",
272
278
  )
273
279
 
274
280
  base_url: str = Field(
@@ -458,10 +464,11 @@ class PostgresSettings(BaseSettings):
458
464
  )
459
465
 
460
466
  connection_string: str = Field(
461
- default="postgresql://rem:rem@localhost:5050/rem",
462
- description="PostgreSQL connection string (default uses Docker Compose port 5050)",
467
+ default="postgresql://rem:rem@localhost:5051/rem",
468
+ description="PostgreSQL connection string (default uses Docker Compose prebuilt port 5051)",
463
469
  )
464
470
 
471
+
465
472
  pool_size: int = Field(
466
473
  default=10,
467
474
  description="Connection pool size (deprecated, use pool_min_size/pool_max_size)",
@@ -686,6 +693,91 @@ class S3Settings(BaseSettings):
686
693
  )
687
694
 
688
695
 
696
+ class DataLakeSettings(BaseSettings):
697
+ """
698
+ Data lake settings for experiment and dataset storage.
699
+
700
+ Data Lake Convention:
701
+ The data lake provides a standardized structure for storing datasets,
702
+ experiments, and calibration data in S3. Users bring their own bucket
703
+ and the version is pinned by default to v0 in the path.
704
+
705
+ S3 Path Structure:
706
+ s3://{bucket}/{version}/datasets/
707
+ ├── raw/ # Raw source data + transformers
708
+ │ └── {dataset_name}/ # e.g., cns_drugs, codes, care
709
+ ├── tables/ # Database table data (JSONL)
710
+ │ ├── resources/ # → resources table
711
+ │ │ ├── drugs/{category}/ # Psychotropic drugs
712
+ │ │ ├── care/stages/ # Treatment stages
713
+ │ │ └── crisis/ # Crisis resources
714
+ │ └── codes/ # → codes table
715
+ │ ├── icd10/{category}/ # ICD-10 codes
716
+ │ └── cpt/ # CPT codes
717
+ └── calibration/ # Agent calibration
718
+ ├── experiments/ # Experiment configs + results
719
+ │ └── {agent}/{task}/ # e.g., siggy/risk-assessment
720
+ └── datasets/ # Shared evaluation datasets
721
+
722
+ Experiment Storage:
723
+ - Local: experiments/{agent}/{task}/experiment.yaml
724
+ - S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
725
+
726
+ Environment variables:
727
+ DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
728
+ DATA_LAKE__VERSION - Path version prefix (default: v0)
729
+ DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
730
+ DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
731
+ """
732
+
733
+ model_config = SettingsConfigDict(
734
+ env_prefix="DATA_LAKE__",
735
+ env_file=".env",
736
+ env_file_encoding="utf-8",
737
+ extra="ignore",
738
+ )
739
+
740
+ bucket_name: str | None = Field(
741
+ default=None,
742
+ description="S3 bucket for data lake storage (user-provided)",
743
+ )
744
+
745
+ version: str = Field(
746
+ default="v0",
747
+ description="API version for data lake paths",
748
+ )
749
+
750
+ datasets_prefix: str = Field(
751
+ default="datasets",
752
+ description="Root directory for datasets in the bucket",
753
+ )
754
+
755
+ experiments_prefix: str = Field(
756
+ default="experiments",
757
+ description="Subdirectory within calibration for experiments",
758
+ )
759
+
760
+ def get_base_uri(self) -> str | None:
761
+ """Get the base S3 URI for the data lake."""
762
+ if not self.bucket_name:
763
+ return None
764
+ return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
765
+
766
+ def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
767
+ """Get the S3 URI for an experiment."""
768
+ base = self.get_base_uri()
769
+ if not base:
770
+ return None
771
+ return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
772
+
773
+ def get_tables_uri(self, table: str = "resources") -> str | None:
774
+ """Get the S3 URI for a table directory."""
775
+ base = self.get_base_uri()
776
+ if not base:
777
+ return None
778
+ return f"{base}/tables/{table}"
779
+
780
+
689
781
  class ChunkingSettings(BaseSettings):
690
782
  """
691
783
  Document chunking settings for semantic text splitting.
@@ -969,6 +1061,8 @@ class APISettings(BaseSettings):
969
1061
  API__RELOAD - Enable auto-reload for development
970
1062
  API__WORKERS - Number of worker processes (production)
971
1063
  API__LOG_LEVEL - Logging level (debug, info, warning, error)
1064
+ API__API_KEY_ENABLED - Enable X-API-Key header authentication
1065
+ API__API_KEY - API key for X-API-Key authentication
972
1066
  """
973
1067
 
974
1068
  model_config = SettingsConfigDict(
@@ -1003,6 +1097,23 @@ class APISettings(BaseSettings):
1003
1097
  description="Logging level (debug, info, warning, error, critical)",
1004
1098
  )
1005
1099
 
1100
+ api_key_enabled: bool = Field(
1101
+ default=False,
1102
+ description=(
1103
+ "Enable X-API-Key header authentication for API endpoints. "
1104
+ "When enabled, requests must include X-API-Key header with valid key. "
1105
+ "This provides simple API key auth independent of OAuth."
1106
+ ),
1107
+ )
1108
+
1109
+ api_key: str | None = Field(
1110
+ default=None,
1111
+ description=(
1112
+ "API key for X-API-Key authentication. Required when api_key_enabled=true. "
1113
+ "Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
1114
+ ),
1115
+ )
1116
+
1006
1117
 
1007
1118
  class ModelsSettings(BaseSettings):
1008
1119
  """
@@ -1051,10 +1162,26 @@ class ModelsSettings(BaseSettings):
1051
1162
 
1052
1163
  @property
1053
1164
  def module_list(self) -> list[str]:
1054
- """Get modules as a list, filtering empty strings."""
1055
- if not self.import_modules:
1056
- return []
1057
- return [m.strip() for m in self.import_modules.split(";") if m.strip()]
1165
+ """
1166
+ Get modules as a list, filtering empty strings.
1167
+
1168
+ Auto-detects ./models folder if it exists and is importable.
1169
+ """
1170
+ modules = []
1171
+ if self.import_modules:
1172
+ modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
1173
+
1174
+ # Auto-detect ./models if it exists and is a Python package (convention over configuration)
1175
+ from pathlib import Path
1176
+
1177
+ models_path = Path("./models")
1178
+ if models_path.exists() and models_path.is_dir():
1179
+ # Check if it's a Python package (has __init__.py)
1180
+ if (models_path / "__init__.py").exists():
1181
+ if "models" not in modules:
1182
+ modules.insert(0, "models")
1183
+
1184
+ return modules
1058
1185
 
1059
1186
 
1060
1187
  class SchemaSettings(BaseSettings):
@@ -1240,6 +1367,110 @@ class GitSettings(BaseSettings):
1240
1367
  )
1241
1368
 
1242
1369
 
1370
+ class DBListenerSettings(BaseSettings):
1371
+ """
1372
+ PostgreSQL LISTEN/NOTIFY database listener settings.
1373
+
1374
+ The DB Listener is a lightweight worker that subscribes to PostgreSQL
1375
+ NOTIFY events and dispatches them to external systems (SQS, REST, custom).
1376
+
1377
+ Architecture:
1378
+ - Single-replica deployment (to avoid duplicate processing)
1379
+ - Dedicated connection for LISTEN (not from connection pool)
1380
+ - Automatic reconnection with exponential backoff
1381
+ - Graceful shutdown on SIGTERM
1382
+
1383
+ Use Cases:
1384
+ - Sync data changes to external systems (Phoenix, webhooks)
1385
+ - Trigger async jobs without polling
1386
+ - Event-driven architectures with PostgreSQL as event source
1387
+
1388
+ Example PostgreSQL trigger:
1389
+ CREATE OR REPLACE FUNCTION notify_feedback_insert()
1390
+ RETURNS TRIGGER AS $$
1391
+ BEGIN
1392
+ PERFORM pg_notify('feedback_sync', json_build_object(
1393
+ 'id', NEW.id,
1394
+ 'table', 'feedbacks',
1395
+ 'action', 'insert'
1396
+ )::text);
1397
+ RETURN NEW;
1398
+ END;
1399
+ $$ LANGUAGE plpgsql;
1400
+
1401
+ Environment variables:
1402
+ DB_LISTENER__ENABLED - Enable the listener worker (default: false)
1403
+ DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
1404
+ DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
1405
+ DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
1406
+ DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
1407
+ DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
1408
+ DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
1409
+
1410
+ References:
1411
+ - PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
1412
+ - Brandur's Notifier: https://brandur.org/notifier
1413
+ """
1414
+
1415
+ model_config = SettingsConfigDict(
1416
+ env_prefix="DB_LISTENER__",
1417
+ env_file=".env",
1418
+ env_file_encoding="utf-8",
1419
+ extra="ignore",
1420
+ )
1421
+
1422
+ enabled: bool = Field(
1423
+ default=False,
1424
+ description="Enable the DB Listener worker (disabled by default)",
1425
+ )
1426
+
1427
+ channels: str = Field(
1428
+ default="",
1429
+ description=(
1430
+ "Comma-separated list of PostgreSQL channels to LISTEN on. "
1431
+ "Example: 'feedback_sync,entity_update,user_events'"
1432
+ ),
1433
+ )
1434
+
1435
+ handler_type: str = Field(
1436
+ default="rest",
1437
+ description=(
1438
+ "Handler type for dispatching notifications. Options: "
1439
+ "'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
1440
+ ),
1441
+ )
1442
+
1443
+ sqs_queue_url: str = Field(
1444
+ default="",
1445
+ description="SQS queue URL for handler_type='sqs'",
1446
+ )
1447
+
1448
+ rest_endpoint: str = Field(
1449
+ default="http://localhost:8000/api/v1/internal/events",
1450
+ description=(
1451
+ "REST endpoint URL for handler_type='rest'. "
1452
+ "Receives POST with {channel, payload, source} JSON body."
1453
+ ),
1454
+ )
1455
+
1456
+ reconnect_delay: float = Field(
1457
+ default=1.0,
1458
+ description="Initial delay (seconds) between reconnection attempts",
1459
+ )
1460
+
1461
+ max_reconnect_delay: float = Field(
1462
+ default=60.0,
1463
+ description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
1464
+ )
1465
+
1466
+ @property
1467
+ def channel_list(self) -> list[str]:
1468
+ """Get channels as a list, filtering empty strings."""
1469
+ if not self.channels:
1470
+ return []
1471
+ return [c.strip() for c in self.channels.split(",") if c.strip()]
1472
+
1473
+
1243
1474
  class TestSettings(BaseSettings):
1244
1475
  """
1245
1476
  Test environment settings.
@@ -1347,18 +1578,30 @@ class Settings(BaseSettings):
1347
1578
  migration: MigrationSettings = Field(default_factory=MigrationSettings)
1348
1579
  storage: StorageSettings = Field(default_factory=StorageSettings)
1349
1580
  s3: S3Settings = Field(default_factory=S3Settings)
1581
+ data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
1350
1582
  git: GitSettings = Field(default_factory=GitSettings)
1351
1583
  sqs: SQSSettings = Field(default_factory=SQSSettings)
1584
+ db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
1352
1585
  chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
1353
1586
  content: ContentSettings = Field(default_factory=ContentSettings)
1354
1587
  schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
1355
1588
  test: TestSettings = Field(default_factory=TestSettings)
1356
1589
 
1357
1590
 
1591
+ # Auto-load .env file from current directory if it exists
1592
+ # This happens BEFORE config file loading, so .env takes precedence
1593
+ from pathlib import Path
1594
+ from dotenv import load_dotenv
1595
+
1596
+ _dotenv_path = Path(".env")
1597
+ if _dotenv_path.exists():
1598
+ load_dotenv(_dotenv_path, override=False) # Don't override existing env vars
1599
+ logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
1600
+
1358
1601
  # Load configuration from ~/.rem/config.yaml before initializing settings
1359
1602
  # This allows user configuration to be merged with environment variables
1360
- # Set REM_SKIP_CONFIG_FILE=true to disable (useful for development with .env)
1361
- if not os.getenv("REM_SKIP_CONFIG_FILE", "").lower() in ("true", "1", "yes"):
1603
+ # Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
1604
+ if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
1362
1605
  try:
1363
1606
  from rem.config import load_config, merge_config_to_env
1364
1607