remdb 0.3.118__py3-none-any.whl → 0.3.146__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of remdb might be problematic. Click here for more details.
- rem/agentic/agents/sse_simulator.py +2 -0
- rem/agentic/context.py +23 -3
- rem/agentic/mcp/tool_wrapper.py +126 -15
- rem/agentic/otel/setup.py +1 -0
- rem/agentic/providers/phoenix.py +371 -108
- rem/agentic/providers/pydantic_ai.py +122 -43
- rem/agentic/schema.py +4 -1
- rem/api/mcp_router/tools.py +13 -2
- rem/api/routers/chat/completions.py +250 -4
- rem/api/routers/chat/models.py +81 -7
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +17 -1
- rem/api/routers/chat/streaming.py +35 -1
- rem/api/routers/feedback.py +134 -14
- rem/auth/middleware.py +66 -1
- rem/cli/commands/cluster.py +590 -82
- rem/cli/commands/configure.py +3 -4
- rem/cli/commands/experiments.py +468 -76
- rem/cli/commands/session.py +336 -0
- rem/cli/dreaming.py +2 -2
- rem/cli/main.py +2 -0
- rem/config.py +8 -1
- rem/models/core/experiment.py +58 -14
- rem/models/entities/ontology.py +1 -1
- rem/models/entities/ontology_config.py +1 -1
- rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
- rem/schemas/agents/examples/contract-extractor.yaml +1 -1
- rem/schemas/agents/examples/cv-parser.yaml +1 -1
- rem/services/phoenix/client.py +59 -18
- rem/services/postgres/pydantic_to_sqlalchemy.py +9 -12
- rem/services/session/compression.py +7 -0
- rem/settings.py +260 -17
- rem/sql/migrations/002_install_models.sql +91 -91
- rem/sql/migrations/004_cache_system.sql +1 -1
- rem/utils/README.md +45 -0
- rem/utils/files.py +157 -1
- rem/utils/schema_loader.py +94 -3
- rem/utils/vision.py +1 -1
- rem/workers/__init__.py +2 -1
- rem/workers/db_listener.py +579 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/METADATA +161 -147
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/RECORD +44 -41
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/WHEEL +0 -0
- {remdb-0.3.118.dist-info → remdb-0.3.146.dist-info}/entry_points.txt +0 -0
rem/services/phoenix/client.py
CHANGED
|
@@ -793,40 +793,72 @@ class PhoenixClient:
|
|
|
793
793
|
score: float | None = None,
|
|
794
794
|
explanation: str | None = None,
|
|
795
795
|
metadata: dict[str, Any] | None = None,
|
|
796
|
+
trace_id: str | None = None,
|
|
796
797
|
) -> str | None:
|
|
797
|
-
"""Add feedback annotation to a span.
|
|
798
|
+
"""Add feedback annotation to a span via Phoenix REST API.
|
|
799
|
+
|
|
800
|
+
Uses direct HTTP POST to /v1/span_annotations for reliability
|
|
801
|
+
(Phoenix Python client API changes frequently).
|
|
798
802
|
|
|
799
803
|
Args:
|
|
800
|
-
span_id: Span ID to annotate
|
|
804
|
+
span_id: Span ID to annotate (hex string)
|
|
801
805
|
annotation_name: Name of the annotation (e.g., "correctness", "user_feedback")
|
|
802
806
|
annotator_kind: Type of annotator ("HUMAN", "LLM", "CODE")
|
|
803
807
|
label: Optional label (e.g., "correct", "incorrect", "helpful")
|
|
804
808
|
score: Optional numeric score (0.0-1.0)
|
|
805
809
|
explanation: Optional explanation text
|
|
806
810
|
metadata: Optional additional metadata dict
|
|
811
|
+
trace_id: Optional trace ID (used if span lookup needed)
|
|
807
812
|
|
|
808
813
|
Returns:
|
|
809
814
|
Annotation ID if successful, None otherwise
|
|
810
815
|
"""
|
|
816
|
+
import httpx
|
|
817
|
+
|
|
811
818
|
try:
|
|
812
|
-
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
|
|
816
|
-
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
819
|
+
# Build annotation payload for Phoenix REST API
|
|
820
|
+
annotation_data = {
|
|
821
|
+
"span_id": span_id,
|
|
822
|
+
"name": annotation_name,
|
|
823
|
+
"annotator_kind": annotator_kind,
|
|
824
|
+
"result": {
|
|
825
|
+
"label": label,
|
|
826
|
+
"score": score,
|
|
827
|
+
"explanation": explanation,
|
|
828
|
+
},
|
|
829
|
+
"metadata": metadata or {},
|
|
830
|
+
}
|
|
821
831
|
|
|
822
|
-
|
|
823
|
-
|
|
832
|
+
# Add trace_id if provided
|
|
833
|
+
if trace_id:
|
|
834
|
+
annotation_data["trace_id"] = trace_id
|
|
835
|
+
|
|
836
|
+
# POST to Phoenix REST API
|
|
837
|
+
annotations_endpoint = f"{self.config.base_url}/v1/span_annotations"
|
|
838
|
+
headers = {}
|
|
839
|
+
if self.config.api_key:
|
|
840
|
+
headers["Authorization"] = f"Bearer {self.config.api_key}"
|
|
841
|
+
|
|
842
|
+
with httpx.Client(timeout=5.0) as client:
|
|
843
|
+
response = client.post(
|
|
844
|
+
annotations_endpoint,
|
|
845
|
+
json={"data": [annotation_data]},
|
|
846
|
+
headers=headers,
|
|
847
|
+
)
|
|
848
|
+
response.raise_for_status()
|
|
824
849
|
|
|
825
|
-
|
|
850
|
+
logger.info(f"Added {annotator_kind} feedback to span {span_id}")
|
|
851
|
+
return span_id # Return span_id as annotation reference
|
|
826
852
|
|
|
853
|
+
except httpx.HTTPStatusError as e:
|
|
854
|
+
logger.error(
|
|
855
|
+
f"Failed to add span feedback (HTTP {e.response.status_code}): "
|
|
856
|
+
f"{e.response.text if hasattr(e, 'response') else 'N/A'}"
|
|
857
|
+
)
|
|
858
|
+
return None
|
|
827
859
|
except Exception as e:
|
|
828
860
|
logger.error(f"Failed to add span feedback: {e}")
|
|
829
|
-
|
|
861
|
+
return None
|
|
830
862
|
|
|
831
863
|
def sync_user_feedback(
|
|
832
864
|
self,
|
|
@@ -835,6 +867,7 @@ class PhoenixClient:
|
|
|
835
867
|
categories: list[str] | None = None,
|
|
836
868
|
comment: str | None = None,
|
|
837
869
|
feedback_id: str | None = None,
|
|
870
|
+
trace_id: str | None = None,
|
|
838
871
|
) -> str | None:
|
|
839
872
|
"""Sync user feedback to Phoenix as a span annotation.
|
|
840
873
|
|
|
@@ -847,6 +880,7 @@ class PhoenixClient:
|
|
|
847
880
|
categories: List of feedback categories
|
|
848
881
|
comment: Free-text comment
|
|
849
882
|
feedback_id: Optional REM feedback ID for reference
|
|
883
|
+
trace_id: Optional trace ID for the span
|
|
850
884
|
|
|
851
885
|
Returns:
|
|
852
886
|
Phoenix annotation ID if successful
|
|
@@ -860,12 +894,18 @@ class PhoenixClient:
|
|
|
860
894
|
... )
|
|
861
895
|
"""
|
|
862
896
|
# Convert rating to 0-1 score
|
|
897
|
+
# Rating scheme:
|
|
898
|
+
# -1 = thumbs down → score 0.0
|
|
899
|
+
# 1 = thumbs up → score 1.0
|
|
900
|
+
# 2-5 = star rating → normalized to 0-1 range
|
|
863
901
|
score = None
|
|
864
902
|
if rating is not None:
|
|
865
903
|
if rating == -1:
|
|
866
904
|
score = 0.0
|
|
867
|
-
elif
|
|
868
|
-
score =
|
|
905
|
+
elif rating == 1:
|
|
906
|
+
score = 1.0 # Thumbs up
|
|
907
|
+
elif 2 <= rating <= 5:
|
|
908
|
+
score = (rating - 1) / 4.0 # 2→0.25, 3→0.5, 4→0.75, 5→1.0
|
|
869
909
|
|
|
870
910
|
# Use primary category as label
|
|
871
911
|
label = categories[0] if categories else None
|
|
@@ -880,7 +920,7 @@ class PhoenixClient:
|
|
|
880
920
|
explanation = f"Categories: {cats_str}"
|
|
881
921
|
|
|
882
922
|
# Build metadata
|
|
883
|
-
metadata = {
|
|
923
|
+
metadata: dict[str, Any] = {
|
|
884
924
|
"rating": rating,
|
|
885
925
|
"categories": categories or [],
|
|
886
926
|
}
|
|
@@ -895,6 +935,7 @@ class PhoenixClient:
|
|
|
895
935
|
score=score,
|
|
896
936
|
explanation=explanation,
|
|
897
937
|
metadata=metadata,
|
|
938
|
+
trace_id=trace_id,
|
|
898
939
|
)
|
|
899
940
|
|
|
900
941
|
def get_span_annotations(
|
|
@@ -513,18 +513,15 @@ def get_target_metadata() -> MetaData:
|
|
|
513
513
|
"""
|
|
514
514
|
Get SQLAlchemy metadata for Alembic autogenerate.
|
|
515
515
|
|
|
516
|
-
This is the main entry point used by alembic/env.py.
|
|
516
|
+
This is the main entry point used by alembic/env.py and rem db diff.
|
|
517
|
+
|
|
518
|
+
Uses the model registry as the source of truth, which includes:
|
|
519
|
+
- Core REM models (Resource, Message, User, etc.)
|
|
520
|
+
- User-registered models via @rem.register_model decorator
|
|
517
521
|
|
|
518
522
|
Returns:
|
|
519
|
-
SQLAlchemy MetaData object representing
|
|
523
|
+
SQLAlchemy MetaData object representing all registered Pydantic models
|
|
520
524
|
"""
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
models_dir = package_root / "src" / "rem" / "models" / "entities"
|
|
525
|
-
|
|
526
|
-
if not models_dir.exists():
|
|
527
|
-
logger.error(f"Models directory not found: {models_dir}")
|
|
528
|
-
return MetaData()
|
|
529
|
-
|
|
530
|
-
return build_sqlalchemy_metadata_from_pydantic(models_dir)
|
|
525
|
+
# build_sqlalchemy_metadata_from_pydantic uses the registry internally,
|
|
526
|
+
# so no directory path is needed (the parameter is kept for backwards compat)
|
|
527
|
+
return build_sqlalchemy_metadata_from_pydantic()
|
|
@@ -170,12 +170,16 @@ class SessionMessageStore:
|
|
|
170
170
|
entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
|
|
171
171
|
|
|
172
172
|
# Create Message entity for assistant response
|
|
173
|
+
# Use pre-generated id from message dict if available (for frontend feedback)
|
|
173
174
|
msg = Message(
|
|
175
|
+
id=message.get("id"), # Use pre-generated ID if provided
|
|
174
176
|
content=message.get("content", ""),
|
|
175
177
|
message_type=message.get("role", "assistant"),
|
|
176
178
|
session_id=session_id,
|
|
177
179
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
178
180
|
user_id=user_id or self.user_id,
|
|
181
|
+
trace_id=message.get("trace_id"),
|
|
182
|
+
span_id=message.get("span_id"),
|
|
179
183
|
metadata={
|
|
180
184
|
"message_index": message_index,
|
|
181
185
|
"entity_key": entity_key, # Store entity key for LOOKUP
|
|
@@ -284,11 +288,14 @@ class SessionMessageStore:
|
|
|
284
288
|
# Short assistant messages, user messages, and system messages stored as-is
|
|
285
289
|
# Store ALL messages in database for full audit trail
|
|
286
290
|
msg = Message(
|
|
291
|
+
id=message.get("id"), # Use pre-generated ID if provided
|
|
287
292
|
content=content,
|
|
288
293
|
message_type=message.get("role", "user"),
|
|
289
294
|
session_id=session_id,
|
|
290
295
|
tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
|
|
291
296
|
user_id=user_id or self.user_id,
|
|
297
|
+
trace_id=message.get("trace_id"),
|
|
298
|
+
span_id=message.get("span_id"),
|
|
292
299
|
metadata={
|
|
293
300
|
"message_index": idx,
|
|
294
301
|
"timestamp": message.get("timestamp"),
|
rem/settings.py
CHANGED
|
@@ -21,8 +21,8 @@ Example .env file:
|
|
|
21
21
|
LLM__OPENAI_API_KEY=sk-...
|
|
22
22
|
LLM__ANTHROPIC_API_KEY=sk-ant-...
|
|
23
23
|
|
|
24
|
-
# Database (port
|
|
25
|
-
POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:
|
|
24
|
+
# Database (port 5051 for Docker Compose prebuilt, 5050 for local dev)
|
|
25
|
+
POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5051/rem
|
|
26
26
|
POSTGRES__POOL_MIN_SIZE=5
|
|
27
27
|
POSTGRES__POOL_MAX_SIZE=20
|
|
28
28
|
POSTGRES__STATEMENT_TIMEOUT=30000
|
|
@@ -33,14 +33,15 @@ Example .env file:
|
|
|
33
33
|
AUTH__OIDC_CLIENT_ID=your-client-id
|
|
34
34
|
AUTH__SESSION_SECRET=your-secret-key
|
|
35
35
|
|
|
36
|
-
# OpenTelemetry (disabled by default)
|
|
36
|
+
# OpenTelemetry (disabled by default - enable via env var when collector available)
|
|
37
|
+
# Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
|
|
37
38
|
OTEL__ENABLED=false
|
|
38
39
|
OTEL__SERVICE_NAME=rem-api
|
|
39
|
-
OTEL__COLLECTOR_ENDPOINT=http://localhost:
|
|
40
|
-
OTEL__PROTOCOL=
|
|
40
|
+
OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
|
|
41
|
+
OTEL__PROTOCOL=grpc
|
|
41
42
|
|
|
42
|
-
# Arize Phoenix (
|
|
43
|
-
PHOENIX__ENABLED=
|
|
43
|
+
# Arize Phoenix (enabled by default - can be disabled via env var)
|
|
44
|
+
PHOENIX__ENABLED=true
|
|
44
45
|
PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
45
46
|
PHOENIX__PROJECT_NAME=rem
|
|
46
47
|
|
|
@@ -241,6 +242,11 @@ class OTELSettings(BaseSettings):
|
|
|
241
242
|
description="Export timeout in milliseconds",
|
|
242
243
|
)
|
|
243
244
|
|
|
245
|
+
insecure: bool = Field(
|
|
246
|
+
default=True,
|
|
247
|
+
description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
|
|
248
|
+
)
|
|
249
|
+
|
|
244
250
|
|
|
245
251
|
class PhoenixSettings(BaseSettings):
|
|
246
252
|
"""
|
|
@@ -267,8 +273,8 @@ class PhoenixSettings(BaseSettings):
|
|
|
267
273
|
)
|
|
268
274
|
|
|
269
275
|
enabled: bool = Field(
|
|
270
|
-
default=
|
|
271
|
-
description="Enable Phoenix integration (
|
|
276
|
+
default=True,
|
|
277
|
+
description="Enable Phoenix integration (enabled by default)",
|
|
272
278
|
)
|
|
273
279
|
|
|
274
280
|
base_url: str = Field(
|
|
@@ -458,10 +464,11 @@ class PostgresSettings(BaseSettings):
|
|
|
458
464
|
)
|
|
459
465
|
|
|
460
466
|
connection_string: str = Field(
|
|
461
|
-
default="postgresql://rem:rem@localhost:
|
|
462
|
-
description="PostgreSQL connection string (default uses Docker Compose port
|
|
467
|
+
default="postgresql://rem:rem@localhost:5051/rem",
|
|
468
|
+
description="PostgreSQL connection string (default uses Docker Compose prebuilt port 5051)",
|
|
463
469
|
)
|
|
464
470
|
|
|
471
|
+
|
|
465
472
|
pool_size: int = Field(
|
|
466
473
|
default=10,
|
|
467
474
|
description="Connection pool size (deprecated, use pool_min_size/pool_max_size)",
|
|
@@ -686,6 +693,91 @@ class S3Settings(BaseSettings):
|
|
|
686
693
|
)
|
|
687
694
|
|
|
688
695
|
|
|
696
|
+
class DataLakeSettings(BaseSettings):
|
|
697
|
+
"""
|
|
698
|
+
Data lake settings for experiment and dataset storage.
|
|
699
|
+
|
|
700
|
+
Data Lake Convention:
|
|
701
|
+
The data lake provides a standardized structure for storing datasets,
|
|
702
|
+
experiments, and calibration data in S3. Users bring their own bucket
|
|
703
|
+
and the version is pinned by default to v0 in the path.
|
|
704
|
+
|
|
705
|
+
S3 Path Structure:
|
|
706
|
+
s3://{bucket}/{version}/datasets/
|
|
707
|
+
├── raw/ # Raw source data + transformers
|
|
708
|
+
│ └── {dataset_name}/ # e.g., cns_drugs, codes, care
|
|
709
|
+
├── tables/ # Database table data (JSONL)
|
|
710
|
+
│ ├── resources/ # → resources table
|
|
711
|
+
│ │ ├── drugs/{category}/ # Psychotropic drugs
|
|
712
|
+
│ │ ├── care/stages/ # Treatment stages
|
|
713
|
+
│ │ └── crisis/ # Crisis resources
|
|
714
|
+
│ └── codes/ # → codes table
|
|
715
|
+
│ ├── icd10/{category}/ # ICD-10 codes
|
|
716
|
+
│ └── cpt/ # CPT codes
|
|
717
|
+
└── calibration/ # Agent calibration
|
|
718
|
+
├── experiments/ # Experiment configs + results
|
|
719
|
+
│ └── {agent}/{task}/ # e.g., siggy/risk-assessment
|
|
720
|
+
└── datasets/ # Shared evaluation datasets
|
|
721
|
+
|
|
722
|
+
Experiment Storage:
|
|
723
|
+
- Local: experiments/{agent}/{task}/experiment.yaml
|
|
724
|
+
- S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
|
|
725
|
+
|
|
726
|
+
Environment variables:
|
|
727
|
+
DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
|
|
728
|
+
DATA_LAKE__VERSION - Path version prefix (default: v0)
|
|
729
|
+
DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
|
|
730
|
+
DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
|
|
731
|
+
"""
|
|
732
|
+
|
|
733
|
+
model_config = SettingsConfigDict(
|
|
734
|
+
env_prefix="DATA_LAKE__",
|
|
735
|
+
env_file=".env",
|
|
736
|
+
env_file_encoding="utf-8",
|
|
737
|
+
extra="ignore",
|
|
738
|
+
)
|
|
739
|
+
|
|
740
|
+
bucket_name: str | None = Field(
|
|
741
|
+
default=None,
|
|
742
|
+
description="S3 bucket for data lake storage (user-provided)",
|
|
743
|
+
)
|
|
744
|
+
|
|
745
|
+
version: str = Field(
|
|
746
|
+
default="v0",
|
|
747
|
+
description="API version for data lake paths",
|
|
748
|
+
)
|
|
749
|
+
|
|
750
|
+
datasets_prefix: str = Field(
|
|
751
|
+
default="datasets",
|
|
752
|
+
description="Root directory for datasets in the bucket",
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
experiments_prefix: str = Field(
|
|
756
|
+
default="experiments",
|
|
757
|
+
description="Subdirectory within calibration for experiments",
|
|
758
|
+
)
|
|
759
|
+
|
|
760
|
+
def get_base_uri(self) -> str | None:
|
|
761
|
+
"""Get the base S3 URI for the data lake."""
|
|
762
|
+
if not self.bucket_name:
|
|
763
|
+
return None
|
|
764
|
+
return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
|
|
765
|
+
|
|
766
|
+
def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
|
|
767
|
+
"""Get the S3 URI for an experiment."""
|
|
768
|
+
base = self.get_base_uri()
|
|
769
|
+
if not base:
|
|
770
|
+
return None
|
|
771
|
+
return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
|
|
772
|
+
|
|
773
|
+
def get_tables_uri(self, table: str = "resources") -> str | None:
|
|
774
|
+
"""Get the S3 URI for a table directory."""
|
|
775
|
+
base = self.get_base_uri()
|
|
776
|
+
if not base:
|
|
777
|
+
return None
|
|
778
|
+
return f"{base}/tables/{table}"
|
|
779
|
+
|
|
780
|
+
|
|
689
781
|
class ChunkingSettings(BaseSettings):
|
|
690
782
|
"""
|
|
691
783
|
Document chunking settings for semantic text splitting.
|
|
@@ -969,6 +1061,8 @@ class APISettings(BaseSettings):
|
|
|
969
1061
|
API__RELOAD - Enable auto-reload for development
|
|
970
1062
|
API__WORKERS - Number of worker processes (production)
|
|
971
1063
|
API__LOG_LEVEL - Logging level (debug, info, warning, error)
|
|
1064
|
+
API__API_KEY_ENABLED - Enable X-API-Key header authentication
|
|
1065
|
+
API__API_KEY - API key for X-API-Key authentication
|
|
972
1066
|
"""
|
|
973
1067
|
|
|
974
1068
|
model_config = SettingsConfigDict(
|
|
@@ -1003,6 +1097,23 @@ class APISettings(BaseSettings):
|
|
|
1003
1097
|
description="Logging level (debug, info, warning, error, critical)",
|
|
1004
1098
|
)
|
|
1005
1099
|
|
|
1100
|
+
api_key_enabled: bool = Field(
|
|
1101
|
+
default=False,
|
|
1102
|
+
description=(
|
|
1103
|
+
"Enable X-API-Key header authentication for API endpoints. "
|
|
1104
|
+
"When enabled, requests must include X-API-Key header with valid key. "
|
|
1105
|
+
"This provides simple API key auth independent of OAuth."
|
|
1106
|
+
),
|
|
1107
|
+
)
|
|
1108
|
+
|
|
1109
|
+
api_key: str | None = Field(
|
|
1110
|
+
default=None,
|
|
1111
|
+
description=(
|
|
1112
|
+
"API key for X-API-Key authentication. Required when api_key_enabled=true. "
|
|
1113
|
+
"Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
|
|
1114
|
+
),
|
|
1115
|
+
)
|
|
1116
|
+
|
|
1006
1117
|
|
|
1007
1118
|
class ModelsSettings(BaseSettings):
|
|
1008
1119
|
"""
|
|
@@ -1051,10 +1162,26 @@ class ModelsSettings(BaseSettings):
|
|
|
1051
1162
|
|
|
1052
1163
|
@property
|
|
1053
1164
|
def module_list(self) -> list[str]:
|
|
1054
|
-
"""
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1165
|
+
"""
|
|
1166
|
+
Get modules as a list, filtering empty strings.
|
|
1167
|
+
|
|
1168
|
+
Auto-detects ./models folder if it exists and is importable.
|
|
1169
|
+
"""
|
|
1170
|
+
modules = []
|
|
1171
|
+
if self.import_modules:
|
|
1172
|
+
modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
|
|
1173
|
+
|
|
1174
|
+
# Auto-detect ./models if it exists and is a Python package (convention over configuration)
|
|
1175
|
+
from pathlib import Path
|
|
1176
|
+
|
|
1177
|
+
models_path = Path("./models")
|
|
1178
|
+
if models_path.exists() and models_path.is_dir():
|
|
1179
|
+
# Check if it's a Python package (has __init__.py)
|
|
1180
|
+
if (models_path / "__init__.py").exists():
|
|
1181
|
+
if "models" not in modules:
|
|
1182
|
+
modules.insert(0, "models")
|
|
1183
|
+
|
|
1184
|
+
return modules
|
|
1058
1185
|
|
|
1059
1186
|
|
|
1060
1187
|
class SchemaSettings(BaseSettings):
|
|
@@ -1240,6 +1367,110 @@ class GitSettings(BaseSettings):
|
|
|
1240
1367
|
)
|
|
1241
1368
|
|
|
1242
1369
|
|
|
1370
|
+
class DBListenerSettings(BaseSettings):
|
|
1371
|
+
"""
|
|
1372
|
+
PostgreSQL LISTEN/NOTIFY database listener settings.
|
|
1373
|
+
|
|
1374
|
+
The DB Listener is a lightweight worker that subscribes to PostgreSQL
|
|
1375
|
+
NOTIFY events and dispatches them to external systems (SQS, REST, custom).
|
|
1376
|
+
|
|
1377
|
+
Architecture:
|
|
1378
|
+
- Single-replica deployment (to avoid duplicate processing)
|
|
1379
|
+
- Dedicated connection for LISTEN (not from connection pool)
|
|
1380
|
+
- Automatic reconnection with exponential backoff
|
|
1381
|
+
- Graceful shutdown on SIGTERM
|
|
1382
|
+
|
|
1383
|
+
Use Cases:
|
|
1384
|
+
- Sync data changes to external systems (Phoenix, webhooks)
|
|
1385
|
+
- Trigger async jobs without polling
|
|
1386
|
+
- Event-driven architectures with PostgreSQL as event source
|
|
1387
|
+
|
|
1388
|
+
Example PostgreSQL trigger:
|
|
1389
|
+
CREATE OR REPLACE FUNCTION notify_feedback_insert()
|
|
1390
|
+
RETURNS TRIGGER AS $$
|
|
1391
|
+
BEGIN
|
|
1392
|
+
PERFORM pg_notify('feedback_sync', json_build_object(
|
|
1393
|
+
'id', NEW.id,
|
|
1394
|
+
'table', 'feedbacks',
|
|
1395
|
+
'action', 'insert'
|
|
1396
|
+
)::text);
|
|
1397
|
+
RETURN NEW;
|
|
1398
|
+
END;
|
|
1399
|
+
$$ LANGUAGE plpgsql;
|
|
1400
|
+
|
|
1401
|
+
Environment variables:
|
|
1402
|
+
DB_LISTENER__ENABLED - Enable the listener worker (default: false)
|
|
1403
|
+
DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
|
|
1404
|
+
DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
|
|
1405
|
+
DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
|
|
1406
|
+
DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
|
|
1407
|
+
DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
|
|
1408
|
+
DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
|
|
1409
|
+
|
|
1410
|
+
References:
|
|
1411
|
+
- PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
|
|
1412
|
+
- Brandur's Notifier: https://brandur.org/notifier
|
|
1413
|
+
"""
|
|
1414
|
+
|
|
1415
|
+
model_config = SettingsConfigDict(
|
|
1416
|
+
env_prefix="DB_LISTENER__",
|
|
1417
|
+
env_file=".env",
|
|
1418
|
+
env_file_encoding="utf-8",
|
|
1419
|
+
extra="ignore",
|
|
1420
|
+
)
|
|
1421
|
+
|
|
1422
|
+
enabled: bool = Field(
|
|
1423
|
+
default=False,
|
|
1424
|
+
description="Enable the DB Listener worker (disabled by default)",
|
|
1425
|
+
)
|
|
1426
|
+
|
|
1427
|
+
channels: str = Field(
|
|
1428
|
+
default="",
|
|
1429
|
+
description=(
|
|
1430
|
+
"Comma-separated list of PostgreSQL channels to LISTEN on. "
|
|
1431
|
+
"Example: 'feedback_sync,entity_update,user_events'"
|
|
1432
|
+
),
|
|
1433
|
+
)
|
|
1434
|
+
|
|
1435
|
+
handler_type: str = Field(
|
|
1436
|
+
default="rest",
|
|
1437
|
+
description=(
|
|
1438
|
+
"Handler type for dispatching notifications. Options: "
|
|
1439
|
+
"'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
|
|
1440
|
+
),
|
|
1441
|
+
)
|
|
1442
|
+
|
|
1443
|
+
sqs_queue_url: str = Field(
|
|
1444
|
+
default="",
|
|
1445
|
+
description="SQS queue URL for handler_type='sqs'",
|
|
1446
|
+
)
|
|
1447
|
+
|
|
1448
|
+
rest_endpoint: str = Field(
|
|
1449
|
+
default="http://localhost:8000/api/v1/internal/events",
|
|
1450
|
+
description=(
|
|
1451
|
+
"REST endpoint URL for handler_type='rest'. "
|
|
1452
|
+
"Receives POST with {channel, payload, source} JSON body."
|
|
1453
|
+
),
|
|
1454
|
+
)
|
|
1455
|
+
|
|
1456
|
+
reconnect_delay: float = Field(
|
|
1457
|
+
default=1.0,
|
|
1458
|
+
description="Initial delay (seconds) between reconnection attempts",
|
|
1459
|
+
)
|
|
1460
|
+
|
|
1461
|
+
max_reconnect_delay: float = Field(
|
|
1462
|
+
default=60.0,
|
|
1463
|
+
description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
|
|
1464
|
+
)
|
|
1465
|
+
|
|
1466
|
+
@property
|
|
1467
|
+
def channel_list(self) -> list[str]:
|
|
1468
|
+
"""Get channels as a list, filtering empty strings."""
|
|
1469
|
+
if not self.channels:
|
|
1470
|
+
return []
|
|
1471
|
+
return [c.strip() for c in self.channels.split(",") if c.strip()]
|
|
1472
|
+
|
|
1473
|
+
|
|
1243
1474
|
class TestSettings(BaseSettings):
|
|
1244
1475
|
"""
|
|
1245
1476
|
Test environment settings.
|
|
@@ -1347,18 +1578,30 @@ class Settings(BaseSettings):
|
|
|
1347
1578
|
migration: MigrationSettings = Field(default_factory=MigrationSettings)
|
|
1348
1579
|
storage: StorageSettings = Field(default_factory=StorageSettings)
|
|
1349
1580
|
s3: S3Settings = Field(default_factory=S3Settings)
|
|
1581
|
+
data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
|
|
1350
1582
|
git: GitSettings = Field(default_factory=GitSettings)
|
|
1351
1583
|
sqs: SQSSettings = Field(default_factory=SQSSettings)
|
|
1584
|
+
db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
|
|
1352
1585
|
chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
|
|
1353
1586
|
content: ContentSettings = Field(default_factory=ContentSettings)
|
|
1354
1587
|
schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
|
|
1355
1588
|
test: TestSettings = Field(default_factory=TestSettings)
|
|
1356
1589
|
|
|
1357
1590
|
|
|
1591
|
+
# Auto-load .env file from current directory if it exists
|
|
1592
|
+
# This happens BEFORE config file loading, so .env takes precedence
|
|
1593
|
+
from pathlib import Path
|
|
1594
|
+
from dotenv import load_dotenv
|
|
1595
|
+
|
|
1596
|
+
_dotenv_path = Path(".env")
|
|
1597
|
+
if _dotenv_path.exists():
|
|
1598
|
+
load_dotenv(_dotenv_path, override=False) # Don't override existing env vars
|
|
1599
|
+
logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
|
|
1600
|
+
|
|
1358
1601
|
# Load configuration from ~/.rem/config.yaml before initializing settings
|
|
1359
1602
|
# This allows user configuration to be merged with environment variables
|
|
1360
|
-
# Set
|
|
1361
|
-
if not os.getenv("
|
|
1603
|
+
# Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
|
|
1604
|
+
if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
|
|
1362
1605
|
try:
|
|
1363
1606
|
from rem.config import load_config, merge_config_to_env
|
|
1364
1607
|
|