remdb 0.3.7__py3-none-any.whl → 0.3.133__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- rem/__init__.py +129 -2
- rem/agentic/README.md +76 -0
- rem/agentic/__init__.py +15 -0
- rem/agentic/agents/__init__.py +16 -2
- rem/agentic/agents/sse_simulator.py +502 -0
- rem/agentic/context.py +51 -25
- rem/agentic/llm_provider_models.py +301 -0
- rem/agentic/mcp/tool_wrapper.py +112 -17
- rem/agentic/otel/setup.py +93 -4
- rem/agentic/providers/phoenix.py +314 -132
- rem/agentic/providers/pydantic_ai.py +215 -26
- rem/agentic/schema.py +361 -21
- rem/agentic/tools/rem_tools.py +3 -3
- rem/api/README.md +238 -1
- rem/api/deps.py +255 -0
- rem/api/main.py +154 -37
- rem/api/mcp_router/resources.py +1 -1
- rem/api/mcp_router/server.py +26 -5
- rem/api/mcp_router/tools.py +465 -7
- rem/api/middleware/tracking.py +172 -0
- rem/api/routers/admin.py +494 -0
- rem/api/routers/auth.py +124 -0
- rem/api/routers/chat/completions.py +402 -20
- rem/api/routers/chat/models.py +88 -10
- rem/api/routers/chat/otel_utils.py +33 -0
- rem/api/routers/chat/sse_events.py +542 -0
- rem/api/routers/chat/streaming.py +642 -45
- rem/api/routers/dev.py +81 -0
- rem/api/routers/feedback.py +268 -0
- rem/api/routers/messages.py +473 -0
- rem/api/routers/models.py +78 -0
- rem/api/routers/query.py +360 -0
- rem/api/routers/shared_sessions.py +406 -0
- rem/auth/middleware.py +126 -27
- rem/cli/commands/README.md +237 -64
- rem/cli/commands/ask.py +13 -10
- rem/cli/commands/cluster.py +1808 -0
- rem/cli/commands/configure.py +5 -6
- rem/cli/commands/db.py +396 -139
- rem/cli/commands/experiments.py +469 -74
- rem/cli/commands/process.py +22 -15
- rem/cli/commands/scaffold.py +47 -0
- rem/cli/commands/schema.py +97 -50
- rem/cli/main.py +29 -6
- rem/config.py +10 -3
- rem/models/core/core_model.py +7 -1
- rem/models/core/experiment.py +54 -0
- rem/models/core/rem_query.py +5 -2
- rem/models/entities/__init__.py +21 -0
- rem/models/entities/domain_resource.py +38 -0
- rem/models/entities/feedback.py +123 -0
- rem/models/entities/message.py +30 -1
- rem/models/entities/session.py +83 -0
- rem/models/entities/shared_session.py +180 -0
- rem/models/entities/user.py +10 -3
- rem/registry.py +373 -0
- rem/schemas/agents/rem.yaml +7 -3
- rem/services/content/providers.py +92 -133
- rem/services/content/service.py +92 -20
- rem/services/dreaming/affinity_service.py +2 -16
- rem/services/dreaming/moment_service.py +2 -15
- rem/services/embeddings/api.py +24 -17
- rem/services/embeddings/worker.py +16 -16
- rem/services/phoenix/EXPERIMENT_DESIGN.md +3 -3
- rem/services/phoenix/client.py +302 -28
- rem/services/postgres/README.md +159 -15
- rem/services/postgres/__init__.py +2 -1
- rem/services/postgres/diff_service.py +531 -0
- rem/services/postgres/pydantic_to_sqlalchemy.py +427 -129
- rem/services/postgres/repository.py +132 -0
- rem/services/postgres/schema_generator.py +291 -9
- rem/services/postgres/service.py +6 -6
- rem/services/rate_limit.py +113 -0
- rem/services/rem/README.md +14 -0
- rem/services/rem/parser.py +44 -9
- rem/services/rem/service.py +36 -2
- rem/services/session/compression.py +24 -1
- rem/services/session/reload.py +1 -1
- rem/services/user_service.py +98 -0
- rem/settings.py +399 -29
- rem/sql/background_indexes.sql +21 -16
- rem/sql/migrations/001_install.sql +387 -54
- rem/sql/migrations/002_install_models.sql +2320 -393
- rem/sql/migrations/003_optional_extensions.sql +326 -0
- rem/sql/migrations/004_cache_system.sql +548 -0
- rem/utils/__init__.py +18 -0
- rem/utils/constants.py +97 -0
- rem/utils/date_utils.py +228 -0
- rem/utils/embeddings.py +17 -4
- rem/utils/files.py +167 -0
- rem/utils/mime_types.py +158 -0
- rem/utils/model_helpers.py +156 -1
- rem/utils/schema_loader.py +282 -35
- rem/utils/sql_paths.py +146 -0
- rem/utils/sql_types.py +3 -1
- rem/utils/vision.py +9 -14
- rem/workers/README.md +14 -14
- rem/workers/__init__.py +3 -1
- rem/workers/db_listener.py +579 -0
- rem/workers/db_maintainer.py +74 -0
- rem/workers/unlogged_maintainer.py +463 -0
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/METADATA +460 -303
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/RECORD +105 -74
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/WHEEL +1 -1
- rem/sql/002_install_models.sql +0 -1068
- rem/sql/install_models.sql +0 -1038
- {remdb-0.3.7.dist-info → remdb-0.3.133.dist-info}/entry_points.txt +0 -0
rem/settings.py
CHANGED
|
@@ -15,7 +15,7 @@ Example .env file:
|
|
|
15
15
|
API__LOG_LEVEL=info
|
|
16
16
|
|
|
17
17
|
# LLM
|
|
18
|
-
LLM__DEFAULT_MODEL=
|
|
18
|
+
LLM__DEFAULT_MODEL=openai:gpt-4.1
|
|
19
19
|
LLM__DEFAULT_TEMPERATURE=0.5
|
|
20
20
|
LLM__MAX_RETRIES=10
|
|
21
21
|
LLM__OPENAI_API_KEY=sk-...
|
|
@@ -33,14 +33,15 @@ Example .env file:
|
|
|
33
33
|
AUTH__OIDC_CLIENT_ID=your-client-id
|
|
34
34
|
AUTH__SESSION_SECRET=your-secret-key
|
|
35
35
|
|
|
36
|
-
# OpenTelemetry (disabled by default)
|
|
36
|
+
# OpenTelemetry (disabled by default - enable via env var when collector available)
|
|
37
|
+
# Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
|
|
37
38
|
OTEL__ENABLED=false
|
|
38
39
|
OTEL__SERVICE_NAME=rem-api
|
|
39
|
-
OTEL__COLLECTOR_ENDPOINT=http://localhost:
|
|
40
|
-
OTEL__PROTOCOL=
|
|
40
|
+
OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
|
|
41
|
+
OTEL__PROTOCOL=grpc
|
|
41
42
|
|
|
42
|
-
# Arize Phoenix (
|
|
43
|
-
PHOENIX__ENABLED=
|
|
43
|
+
# Arize Phoenix (enabled by default - can be disabled via env var)
|
|
44
|
+
PHOENIX__ENABLED=true
|
|
44
45
|
PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
|
|
45
46
|
PHOENIX__PROJECT_NAME=rem
|
|
46
47
|
|
|
@@ -57,8 +58,10 @@ Example .env file:
|
|
|
57
58
|
"""
|
|
58
59
|
|
|
59
60
|
import os
|
|
60
|
-
|
|
61
|
+
import hashlib
|
|
62
|
+
from pydantic import Field, field_validator, ValidationInfo
|
|
61
63
|
from pydantic_settings import BaseSettings, SettingsConfigDict
|
|
64
|
+
from loguru import logger
|
|
62
65
|
|
|
63
66
|
|
|
64
67
|
class LLMSettings(BaseSettings):
|
|
@@ -72,7 +75,7 @@ class LLMSettings(BaseSettings):
|
|
|
72
75
|
LLM__EVALUATOR_MODEL or EVALUATOR_MODEL - Model for LLM-as-judge evaluation
|
|
73
76
|
LLM__OPENAI_API_KEY or OPENAI_API_KEY - OpenAI API key
|
|
74
77
|
LLM__ANTHROPIC_API_KEY or ANTHROPIC_API_KEY - Anthropic API key
|
|
75
|
-
LLM__EMBEDDING_PROVIDER or EMBEDDING_PROVIDER - Default embedding provider (openai
|
|
78
|
+
LLM__EMBEDDING_PROVIDER or EMBEDDING_PROVIDER - Default embedding provider (openai)
|
|
76
79
|
LLM__EMBEDDING_MODEL or EMBEDDING_MODEL - Default embedding model name
|
|
77
80
|
"""
|
|
78
81
|
|
|
@@ -84,7 +87,7 @@ class LLMSettings(BaseSettings):
|
|
|
84
87
|
)
|
|
85
88
|
|
|
86
89
|
default_model: str = Field(
|
|
87
|
-
default="
|
|
90
|
+
default="openai:gpt-4.1",
|
|
88
91
|
description="Default LLM model (format: provider:model-id)",
|
|
89
92
|
)
|
|
90
93
|
|
|
@@ -127,7 +130,7 @@ class LLMSettings(BaseSettings):
|
|
|
127
130
|
|
|
128
131
|
embedding_provider: str = Field(
|
|
129
132
|
default="openai",
|
|
130
|
-
description="Default embedding provider (
|
|
133
|
+
description="Default embedding provider (currently only openai supported)",
|
|
131
134
|
)
|
|
132
135
|
|
|
133
136
|
embedding_model: str = Field(
|
|
@@ -239,6 +242,11 @@ class OTELSettings(BaseSettings):
|
|
|
239
242
|
description="Export timeout in milliseconds",
|
|
240
243
|
)
|
|
241
244
|
|
|
245
|
+
insecure: bool = Field(
|
|
246
|
+
default=True,
|
|
247
|
+
description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
|
|
248
|
+
)
|
|
249
|
+
|
|
242
250
|
|
|
243
251
|
class PhoenixSettings(BaseSettings):
|
|
244
252
|
"""
|
|
@@ -265,8 +273,8 @@ class PhoenixSettings(BaseSettings):
|
|
|
265
273
|
)
|
|
266
274
|
|
|
267
275
|
enabled: bool = Field(
|
|
268
|
-
default=
|
|
269
|
-
description="Enable Phoenix integration (
|
|
276
|
+
default=True,
|
|
277
|
+
description="Enable Phoenix integration (enabled by default)",
|
|
270
278
|
)
|
|
271
279
|
|
|
272
280
|
base_url: str = Field(
|
|
@@ -359,10 +367,16 @@ class AuthSettings(BaseSettings):
|
|
|
359
367
|
- Custom OIDC provider
|
|
360
368
|
|
|
361
369
|
Environment variables:
|
|
362
|
-
AUTH__ENABLED - Enable authentication (default:
|
|
370
|
+
AUTH__ENABLED - Enable authentication (default: true)
|
|
371
|
+
AUTH__ALLOW_ANONYMOUS - Allow rate-limited anonymous access (default: true)
|
|
363
372
|
AUTH__SESSION_SECRET - Secret for session cookie signing
|
|
364
373
|
AUTH__GOOGLE__* - Google OAuth settings
|
|
365
374
|
AUTH__MICROSOFT__* - Microsoft OAuth settings
|
|
375
|
+
|
|
376
|
+
Access modes:
|
|
377
|
+
- enabled=true, allow_anonymous=true: Auth available, anonymous gets rate-limited access
|
|
378
|
+
- enabled=true, allow_anonymous=false: Auth required for all requests
|
|
379
|
+
- enabled=false: No auth, all requests treated as default user (dev mode)
|
|
366
380
|
"""
|
|
367
381
|
|
|
368
382
|
model_config = SettingsConfigDict(
|
|
@@ -373,8 +387,26 @@ class AuthSettings(BaseSettings):
|
|
|
373
387
|
)
|
|
374
388
|
|
|
375
389
|
enabled: bool = Field(
|
|
376
|
-
default=
|
|
377
|
-
description="Enable authentication (
|
|
390
|
+
default=True,
|
|
391
|
+
description="Enable authentication (OAuth endpoints and middleware)",
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
allow_anonymous: bool = Field(
|
|
395
|
+
default=True,
|
|
396
|
+
description=(
|
|
397
|
+
"Allow anonymous (unauthenticated) access with rate limits. "
|
|
398
|
+
"When true, requests without auth get ANONYMOUS tier rate limits. "
|
|
399
|
+
"When false, all requests require authentication."
|
|
400
|
+
),
|
|
401
|
+
)
|
|
402
|
+
|
|
403
|
+
mcp_requires_auth: bool = Field(
|
|
404
|
+
default=True,
|
|
405
|
+
description=(
|
|
406
|
+
"Require authentication for MCP endpoints. "
|
|
407
|
+
"MCP is a protected service and should always require login in production. "
|
|
408
|
+
"Set to false only for local development/testing."
|
|
409
|
+
),
|
|
378
410
|
)
|
|
379
411
|
|
|
380
412
|
session_secret: str = Field(
|
|
@@ -386,6 +418,22 @@ class AuthSettings(BaseSettings):
|
|
|
386
418
|
google: GoogleOAuthSettings = Field(default_factory=GoogleOAuthSettings)
|
|
387
419
|
microsoft: MicrosoftOAuthSettings = Field(default_factory=MicrosoftOAuthSettings)
|
|
388
420
|
|
|
421
|
+
@field_validator("session_secret", mode="before")
|
|
422
|
+
@classmethod
|
|
423
|
+
def generate_dev_secret(cls, v: str | None, info: ValidationInfo) -> str:
|
|
424
|
+
# Only generate if not already set and not in production
|
|
425
|
+
if not v and info.data.get("environment") != "production":
|
|
426
|
+
# Deterministic secret for development
|
|
427
|
+
seed_string = f"{info.data.get('team', 'rem')}-{info.data.get('environment', 'development')}-auth-secret-salt"
|
|
428
|
+
logger.warning(
|
|
429
|
+
"AUTH__SESSION_SECRET not set. Generating deterministic secret for non-production environment. "
|
|
430
|
+
"DO NOT use in production."
|
|
431
|
+
)
|
|
432
|
+
return hashlib.sha256(seed_string.encode()).hexdigest()
|
|
433
|
+
elif not v and info.data.get("environment") == "production":
|
|
434
|
+
raise ValueError("AUTH__SESSION_SECRET must be set in production environment.")
|
|
435
|
+
return v
|
|
436
|
+
|
|
389
437
|
|
|
390
438
|
class PostgresSettings(BaseSettings):
|
|
391
439
|
"""
|
|
@@ -644,6 +692,91 @@ class S3Settings(BaseSettings):
|
|
|
644
692
|
)
|
|
645
693
|
|
|
646
694
|
|
|
695
|
+
class DataLakeSettings(BaseSettings):
|
|
696
|
+
"""
|
|
697
|
+
Data lake settings for experiment and dataset storage.
|
|
698
|
+
|
|
699
|
+
Data Lake Convention:
|
|
700
|
+
The data lake provides a standardized structure for storing datasets,
|
|
701
|
+
experiments, and calibration data in S3. Users bring their own bucket
|
|
702
|
+
and the version is pinned by default to v0 in the path.
|
|
703
|
+
|
|
704
|
+
S3 Path Structure:
|
|
705
|
+
s3://{bucket}/{version}/datasets/
|
|
706
|
+
├── raw/ # Raw source data + transformers
|
|
707
|
+
│ └── {dataset_name}/ # e.g., cns_drugs, codes, care
|
|
708
|
+
├── tables/ # Database table data (JSONL)
|
|
709
|
+
│ ├── resources/ # → resources table
|
|
710
|
+
│ │ ├── drugs/{category}/ # Psychotropic drugs
|
|
711
|
+
│ │ ├── care/stages/ # Treatment stages
|
|
712
|
+
│ │ └── crisis/ # Crisis resources
|
|
713
|
+
│ └── codes/ # → codes table
|
|
714
|
+
│ ├── icd10/{category}/ # ICD-10 codes
|
|
715
|
+
│ └── cpt/ # CPT codes
|
|
716
|
+
└── calibration/ # Agent calibration
|
|
717
|
+
├── experiments/ # Experiment configs + results
|
|
718
|
+
│ └── {agent}/{task}/ # e.g., siggy/risk-assessment
|
|
719
|
+
└── datasets/ # Shared evaluation datasets
|
|
720
|
+
|
|
721
|
+
Experiment Storage:
|
|
722
|
+
- Local: experiments/{agent}/{task}/experiment.yaml
|
|
723
|
+
- S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
|
|
724
|
+
|
|
725
|
+
Environment variables:
|
|
726
|
+
DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
|
|
727
|
+
DATA_LAKE__VERSION - Path version prefix (default: v0)
|
|
728
|
+
DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
|
|
729
|
+
DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
|
|
730
|
+
"""
|
|
731
|
+
|
|
732
|
+
model_config = SettingsConfigDict(
|
|
733
|
+
env_prefix="DATA_LAKE__",
|
|
734
|
+
env_file=".env",
|
|
735
|
+
env_file_encoding="utf-8",
|
|
736
|
+
extra="ignore",
|
|
737
|
+
)
|
|
738
|
+
|
|
739
|
+
bucket_name: str | None = Field(
|
|
740
|
+
default=None,
|
|
741
|
+
description="S3 bucket for data lake storage (user-provided)",
|
|
742
|
+
)
|
|
743
|
+
|
|
744
|
+
version: str = Field(
|
|
745
|
+
default="v0",
|
|
746
|
+
description="API version for data lake paths",
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
datasets_prefix: str = Field(
|
|
750
|
+
default="datasets",
|
|
751
|
+
description="Root directory for datasets in the bucket",
|
|
752
|
+
)
|
|
753
|
+
|
|
754
|
+
experiments_prefix: str = Field(
|
|
755
|
+
default="experiments",
|
|
756
|
+
description="Subdirectory within calibration for experiments",
|
|
757
|
+
)
|
|
758
|
+
|
|
759
|
+
def get_base_uri(self) -> str | None:
|
|
760
|
+
"""Get the base S3 URI for the data lake."""
|
|
761
|
+
if not self.bucket_name:
|
|
762
|
+
return None
|
|
763
|
+
return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
|
|
764
|
+
|
|
765
|
+
def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
|
|
766
|
+
"""Get the S3 URI for an experiment."""
|
|
767
|
+
base = self.get_base_uri()
|
|
768
|
+
if not base:
|
|
769
|
+
return None
|
|
770
|
+
return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
|
|
771
|
+
|
|
772
|
+
def get_tables_uri(self, table: str = "resources") -> str | None:
|
|
773
|
+
"""Get the S3 URI for a table directory."""
|
|
774
|
+
base = self.get_base_uri()
|
|
775
|
+
if not base:
|
|
776
|
+
return None
|
|
777
|
+
return f"{base}/tables/{table}"
|
|
778
|
+
|
|
779
|
+
|
|
647
780
|
class ChunkingSettings(BaseSettings):
|
|
648
781
|
"""
|
|
649
782
|
Document chunking settings for semantic text splitting.
|
|
@@ -962,6 +1095,123 @@ class APISettings(BaseSettings):
|
|
|
962
1095
|
)
|
|
963
1096
|
|
|
964
1097
|
|
|
1098
|
+
class ModelsSettings(BaseSettings):
|
|
1099
|
+
"""
|
|
1100
|
+
Custom model registration settings for downstream applications.
|
|
1101
|
+
|
|
1102
|
+
Allows downstream apps to specify Python modules containing custom models
|
|
1103
|
+
that should be imported (and thus registered) before schema generation.
|
|
1104
|
+
|
|
1105
|
+
This enables `rem db schema generate` to discover models registered with
|
|
1106
|
+
`@rem.register_model` in downstream applications.
|
|
1107
|
+
|
|
1108
|
+
Environment variables:
|
|
1109
|
+
MODELS__IMPORT_MODULES - Semicolon-separated list of Python modules to import
|
|
1110
|
+
Example: "models;myapp.entities;myapp.custom_models"
|
|
1111
|
+
|
|
1112
|
+
Example:
|
|
1113
|
+
# In downstream app's .env
|
|
1114
|
+
MODELS__IMPORT_MODULES=models
|
|
1115
|
+
|
|
1116
|
+
# In downstream app's models/__init__.py
|
|
1117
|
+
import rem
|
|
1118
|
+
from rem.models.core import CoreModel
|
|
1119
|
+
|
|
1120
|
+
@rem.register_model
|
|
1121
|
+
class MyCustomEntity(CoreModel):
|
|
1122
|
+
name: str
|
|
1123
|
+
|
|
1124
|
+
# Then run schema generation
|
|
1125
|
+
rem db schema generate # Includes MyCustomEntity
|
|
1126
|
+
"""
|
|
1127
|
+
|
|
1128
|
+
model_config = SettingsConfigDict(
|
|
1129
|
+
env_prefix="MODELS__",
|
|
1130
|
+
extra="ignore",
|
|
1131
|
+
)
|
|
1132
|
+
|
|
1133
|
+
import_modules: str = Field(
|
|
1134
|
+
default="",
|
|
1135
|
+
description=(
|
|
1136
|
+
"Semicolon-separated list of Python modules to import for model registration. "
|
|
1137
|
+
"These modules are imported before schema generation to ensure custom models "
|
|
1138
|
+
"decorated with @rem.register_model are discovered. "
|
|
1139
|
+
"Example: 'models;myapp.entities'"
|
|
1140
|
+
),
|
|
1141
|
+
)
|
|
1142
|
+
|
|
1143
|
+
@property
|
|
1144
|
+
def module_list(self) -> list[str]:
|
|
1145
|
+
"""
|
|
1146
|
+
Get modules as a list, filtering empty strings.
|
|
1147
|
+
|
|
1148
|
+
Auto-detects ./models folder if it exists and is importable.
|
|
1149
|
+
"""
|
|
1150
|
+
modules = []
|
|
1151
|
+
if self.import_modules:
|
|
1152
|
+
modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
|
|
1153
|
+
|
|
1154
|
+
# Auto-detect ./models if it exists and is a Python package (convention over configuration)
|
|
1155
|
+
from pathlib import Path
|
|
1156
|
+
|
|
1157
|
+
models_path = Path("./models")
|
|
1158
|
+
if models_path.exists() and models_path.is_dir():
|
|
1159
|
+
# Check if it's a Python package (has __init__.py)
|
|
1160
|
+
if (models_path / "__init__.py").exists():
|
|
1161
|
+
if "models" not in modules:
|
|
1162
|
+
modules.insert(0, "models")
|
|
1163
|
+
|
|
1164
|
+
return modules
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
class SchemaSettings(BaseSettings):
|
|
1168
|
+
"""
|
|
1169
|
+
Schema search path settings for agent and evaluator schemas.
|
|
1170
|
+
|
|
1171
|
+
Allows extending REM's schema search with custom directories.
|
|
1172
|
+
Custom paths are searched BEFORE built-in package schemas.
|
|
1173
|
+
|
|
1174
|
+
Environment variables:
|
|
1175
|
+
SCHEMA__PATHS - Semicolon-separated list of directories to search
|
|
1176
|
+
Example: "/app/schemas;/shared/agents;./local-schemas"
|
|
1177
|
+
|
|
1178
|
+
Search Order:
|
|
1179
|
+
1. Exact path (if file exists)
|
|
1180
|
+
2. Custom paths from SCHEMA__PATHS (in order)
|
|
1181
|
+
3. Built-in package schemas (schemas/agents/, schemas/evaluators/, etc.)
|
|
1182
|
+
4. Database LOOKUP (if enabled)
|
|
1183
|
+
|
|
1184
|
+
Example:
|
|
1185
|
+
# In .env or environment
|
|
1186
|
+
SCHEMA__PATHS=/app/custom-agents;/shared/evaluators
|
|
1187
|
+
|
|
1188
|
+
# Then in code
|
|
1189
|
+
from rem.utils.schema_loader import load_agent_schema
|
|
1190
|
+
schema = load_agent_schema("my-custom-agent") # Found in /app/custom-agents/
|
|
1191
|
+
"""
|
|
1192
|
+
|
|
1193
|
+
model_config = SettingsConfigDict(
|
|
1194
|
+
env_prefix="SCHEMA__",
|
|
1195
|
+
extra="ignore",
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
paths: str = Field(
|
|
1199
|
+
default="",
|
|
1200
|
+
description=(
|
|
1201
|
+
"Semicolon-separated list of directories to search for schemas. "
|
|
1202
|
+
"These paths are searched BEFORE built-in package schemas. "
|
|
1203
|
+
"Example: '/app/schemas;/shared/agents'"
|
|
1204
|
+
),
|
|
1205
|
+
)
|
|
1206
|
+
|
|
1207
|
+
@property
|
|
1208
|
+
def path_list(self) -> list[str]:
|
|
1209
|
+
"""Get paths as a list, filtering empty strings."""
|
|
1210
|
+
if not self.paths:
|
|
1211
|
+
return []
|
|
1212
|
+
return [p.strip() for p in self.paths.split(";") if p.strip()]
|
|
1213
|
+
|
|
1214
|
+
|
|
965
1215
|
class GitSettings(BaseSettings):
|
|
966
1216
|
"""
|
|
967
1217
|
Git repository provider settings for versioned schema/experiment syncing.
|
|
@@ -1097,6 +1347,110 @@ class GitSettings(BaseSettings):
|
|
|
1097
1347
|
)
|
|
1098
1348
|
|
|
1099
1349
|
|
|
1350
|
+
class DBListenerSettings(BaseSettings):
|
|
1351
|
+
"""
|
|
1352
|
+
PostgreSQL LISTEN/NOTIFY database listener settings.
|
|
1353
|
+
|
|
1354
|
+
The DB Listener is a lightweight worker that subscribes to PostgreSQL
|
|
1355
|
+
NOTIFY events and dispatches them to external systems (SQS, REST, custom).
|
|
1356
|
+
|
|
1357
|
+
Architecture:
|
|
1358
|
+
- Single-replica deployment (to avoid duplicate processing)
|
|
1359
|
+
- Dedicated connection for LISTEN (not from connection pool)
|
|
1360
|
+
- Automatic reconnection with exponential backoff
|
|
1361
|
+
- Graceful shutdown on SIGTERM
|
|
1362
|
+
|
|
1363
|
+
Use Cases:
|
|
1364
|
+
- Sync data changes to external systems (Phoenix, webhooks)
|
|
1365
|
+
- Trigger async jobs without polling
|
|
1366
|
+
- Event-driven architectures with PostgreSQL as event source
|
|
1367
|
+
|
|
1368
|
+
Example PostgreSQL trigger:
|
|
1369
|
+
CREATE OR REPLACE FUNCTION notify_feedback_insert()
|
|
1370
|
+
RETURNS TRIGGER AS $$
|
|
1371
|
+
BEGIN
|
|
1372
|
+
PERFORM pg_notify('feedback_sync', json_build_object(
|
|
1373
|
+
'id', NEW.id,
|
|
1374
|
+
'table', 'feedbacks',
|
|
1375
|
+
'action', 'insert'
|
|
1376
|
+
)::text);
|
|
1377
|
+
RETURN NEW;
|
|
1378
|
+
END;
|
|
1379
|
+
$$ LANGUAGE plpgsql;
|
|
1380
|
+
|
|
1381
|
+
Environment variables:
|
|
1382
|
+
DB_LISTENER__ENABLED - Enable the listener worker (default: false)
|
|
1383
|
+
DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
|
|
1384
|
+
DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
|
|
1385
|
+
DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
|
|
1386
|
+
DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
|
|
1387
|
+
DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
|
|
1388
|
+
DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
|
|
1389
|
+
|
|
1390
|
+
References:
|
|
1391
|
+
- PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
|
|
1392
|
+
- Brandur's Notifier: https://brandur.org/notifier
|
|
1393
|
+
"""
|
|
1394
|
+
|
|
1395
|
+
model_config = SettingsConfigDict(
|
|
1396
|
+
env_prefix="DB_LISTENER__",
|
|
1397
|
+
env_file=".env",
|
|
1398
|
+
env_file_encoding="utf-8",
|
|
1399
|
+
extra="ignore",
|
|
1400
|
+
)
|
|
1401
|
+
|
|
1402
|
+
enabled: bool = Field(
|
|
1403
|
+
default=False,
|
|
1404
|
+
description="Enable the DB Listener worker (disabled by default)",
|
|
1405
|
+
)
|
|
1406
|
+
|
|
1407
|
+
channels: str = Field(
|
|
1408
|
+
default="",
|
|
1409
|
+
description=(
|
|
1410
|
+
"Comma-separated list of PostgreSQL channels to LISTEN on. "
|
|
1411
|
+
"Example: 'feedback_sync,entity_update,user_events'"
|
|
1412
|
+
),
|
|
1413
|
+
)
|
|
1414
|
+
|
|
1415
|
+
handler_type: str = Field(
|
|
1416
|
+
default="rest",
|
|
1417
|
+
description=(
|
|
1418
|
+
"Handler type for dispatching notifications. Options: "
|
|
1419
|
+
"'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
|
|
1420
|
+
),
|
|
1421
|
+
)
|
|
1422
|
+
|
|
1423
|
+
sqs_queue_url: str = Field(
|
|
1424
|
+
default="",
|
|
1425
|
+
description="SQS queue URL for handler_type='sqs'",
|
|
1426
|
+
)
|
|
1427
|
+
|
|
1428
|
+
rest_endpoint: str = Field(
|
|
1429
|
+
default="http://localhost:8000/api/v1/internal/events",
|
|
1430
|
+
description=(
|
|
1431
|
+
"REST endpoint URL for handler_type='rest'. "
|
|
1432
|
+
"Receives POST with {channel, payload, source} JSON body."
|
|
1433
|
+
),
|
|
1434
|
+
)
|
|
1435
|
+
|
|
1436
|
+
reconnect_delay: float = Field(
|
|
1437
|
+
default=1.0,
|
|
1438
|
+
description="Initial delay (seconds) between reconnection attempts",
|
|
1439
|
+
)
|
|
1440
|
+
|
|
1441
|
+
max_reconnect_delay: float = Field(
|
|
1442
|
+
default=60.0,
|
|
1443
|
+
description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
|
|
1444
|
+
)
|
|
1445
|
+
|
|
1446
|
+
@property
|
|
1447
|
+
def channel_list(self) -> list[str]:
|
|
1448
|
+
"""Get channels as a list, filtering empty strings."""
|
|
1449
|
+
if not self.channels:
|
|
1450
|
+
return []
|
|
1451
|
+
return [c.strip() for c in self.channels.split(",") if c.strip()]
|
|
1452
|
+
|
|
1453
|
+
|
|
1100
1454
|
class TestSettings(BaseSettings):
|
|
1101
1455
|
"""
|
|
1102
1456
|
Test environment settings.
|
|
@@ -1166,6 +1520,11 @@ class Settings(BaseSettings):
|
|
|
1166
1520
|
extra="ignore",
|
|
1167
1521
|
)
|
|
1168
1522
|
|
|
1523
|
+
app_name: str = Field(
|
|
1524
|
+
default="REM",
|
|
1525
|
+
description="Application/API name used in docs, titles, and user-facing text",
|
|
1526
|
+
)
|
|
1527
|
+
|
|
1169
1528
|
team: str = Field(
|
|
1170
1529
|
default="rem",
|
|
1171
1530
|
description="Team or project name for observability",
|
|
@@ -1186,16 +1545,12 @@ class Settings(BaseSettings):
|
|
|
1186
1545
|
description="Root path for reverse proxy (e.g., /rem for ALB routing)",
|
|
1187
1546
|
)
|
|
1188
1547
|
|
|
1189
|
-
sql_dir: str = Field(
|
|
1190
|
-
default="src/rem/sql",
|
|
1191
|
-
description="Directory for SQL files and migrations",
|
|
1192
|
-
)
|
|
1193
|
-
|
|
1194
1548
|
# Nested settings groups
|
|
1195
1549
|
api: APISettings = Field(default_factory=APISettings)
|
|
1196
1550
|
chat: ChatSettings = Field(default_factory=ChatSettings)
|
|
1197
1551
|
llm: LLMSettings = Field(default_factory=LLMSettings)
|
|
1198
1552
|
mcp: MCPSettings = Field(default_factory=MCPSettings)
|
|
1553
|
+
models: ModelsSettings = Field(default_factory=ModelsSettings)
|
|
1199
1554
|
otel: OTELSettings = Field(default_factory=OTELSettings)
|
|
1200
1555
|
phoenix: PhoenixSettings = Field(default_factory=PhoenixSettings)
|
|
1201
1556
|
auth: AuthSettings = Field(default_factory=AuthSettings)
|
|
@@ -1203,24 +1558,39 @@ class Settings(BaseSettings):
|
|
|
1203
1558
|
migration: MigrationSettings = Field(default_factory=MigrationSettings)
|
|
1204
1559
|
storage: StorageSettings = Field(default_factory=StorageSettings)
|
|
1205
1560
|
s3: S3Settings = Field(default_factory=S3Settings)
|
|
1561
|
+
data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
|
|
1206
1562
|
git: GitSettings = Field(default_factory=GitSettings)
|
|
1207
1563
|
sqs: SQSSettings = Field(default_factory=SQSSettings)
|
|
1564
|
+
db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
|
|
1208
1565
|
chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
|
|
1209
1566
|
content: ContentSettings = Field(default_factory=ContentSettings)
|
|
1567
|
+
schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
|
|
1210
1568
|
test: TestSettings = Field(default_factory=TestSettings)
|
|
1211
1569
|
|
|
1212
1570
|
|
|
1571
|
+
# Auto-load .env file from current directory if it exists
|
|
1572
|
+
# This happens BEFORE config file loading, so .env takes precedence
|
|
1573
|
+
from pathlib import Path
|
|
1574
|
+
from dotenv import load_dotenv
|
|
1575
|
+
|
|
1576
|
+
_dotenv_path = Path(".env")
|
|
1577
|
+
if _dotenv_path.exists():
|
|
1578
|
+
load_dotenv(_dotenv_path, override=False) # Don't override existing env vars
|
|
1579
|
+
logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
|
|
1580
|
+
|
|
1213
1581
|
# Load configuration from ~/.rem/config.yaml before initializing settings
|
|
1214
1582
|
# This allows user configuration to be merged with environment variables
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
|
|
1220
|
-
|
|
1221
|
-
|
|
1222
|
-
|
|
1223
|
-
|
|
1583
|
+
# Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
|
|
1584
|
+
if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
|
|
1585
|
+
try:
|
|
1586
|
+
from rem.config import load_config, merge_config_to_env
|
|
1587
|
+
|
|
1588
|
+
_config = load_config()
|
|
1589
|
+
if _config:
|
|
1590
|
+
merge_config_to_env(_config)
|
|
1591
|
+
except ImportError:
|
|
1592
|
+
# config module not available (e.g., during initial setup)
|
|
1593
|
+
pass
|
|
1224
1594
|
|
|
1225
1595
|
# Global settings singleton
|
|
1226
1596
|
settings = Settings()
|
rem/sql/background_indexes.sql
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
-- Background index creation
|
|
2
2
|
-- Run AFTER initial data load to avoid blocking writes
|
|
3
3
|
|
|
4
|
-
-- HNSW vector index for
|
|
5
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
|
6
|
-
ON
|
|
4
|
+
-- HNSW vector index for embeddings_files
|
|
5
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_files_vector_hnsw
|
|
6
|
+
ON embeddings_files
|
|
7
7
|
USING hnsw (embedding vector_cosine_ops);
|
|
8
8
|
|
|
9
9
|
-- HNSW vector index for embeddings_image_resources
|
|
@@ -11,24 +11,14 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_image_resources_vector_hn
|
|
|
11
11
|
ON embeddings_image_resources
|
|
12
12
|
USING hnsw (embedding vector_cosine_ops);
|
|
13
13
|
|
|
14
|
-
-- HNSW vector index for embeddings_moments
|
|
15
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
|
|
16
|
-
ON embeddings_moments
|
|
17
|
-
USING hnsw (embedding vector_cosine_ops);
|
|
18
|
-
|
|
19
|
-
-- HNSW vector index for embeddings_resources
|
|
20
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
|
|
21
|
-
ON embeddings_resources
|
|
22
|
-
USING hnsw (embedding vector_cosine_ops);
|
|
23
|
-
|
|
24
14
|
-- HNSW vector index for embeddings_messages
|
|
25
15
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_messages_vector_hnsw
|
|
26
16
|
ON embeddings_messages
|
|
27
17
|
USING hnsw (embedding vector_cosine_ops);
|
|
28
18
|
|
|
29
|
-
-- HNSW vector index for
|
|
30
|
-
CREATE INDEX CONCURRENTLY IF NOT EXISTS
|
|
31
|
-
ON
|
|
19
|
+
-- HNSW vector index for embeddings_moments
|
|
20
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_moments_vector_hnsw
|
|
21
|
+
ON embeddings_moments
|
|
32
22
|
USING hnsw (embedding vector_cosine_ops);
|
|
33
23
|
|
|
34
24
|
-- HNSW vector index for embeddings_ontology_configs
|
|
@@ -36,7 +26,22 @@ CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_ontology_configs_vector_h
|
|
|
36
26
|
ON embeddings_ontology_configs
|
|
37
27
|
USING hnsw (embedding vector_cosine_ops);
|
|
38
28
|
|
|
29
|
+
-- HNSW vector index for embeddings_resources
|
|
30
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_resources_vector_hnsw
|
|
31
|
+
ON embeddings_resources
|
|
32
|
+
USING hnsw (embedding vector_cosine_ops);
|
|
33
|
+
|
|
39
34
|
-- HNSW vector index for embeddings_schemas
|
|
40
35
|
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_schemas_vector_hnsw
|
|
41
36
|
ON embeddings_schemas
|
|
42
37
|
USING hnsw (embedding vector_cosine_ops);
|
|
38
|
+
|
|
39
|
+
-- HNSW vector index for embeddings_sessions
|
|
40
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_sessions_vector_hnsw
|
|
41
|
+
ON embeddings_sessions
|
|
42
|
+
USING hnsw (embedding vector_cosine_ops);
|
|
43
|
+
|
|
44
|
+
-- HNSW vector index for embeddings_users
|
|
45
|
+
CREATE INDEX CONCURRENTLY IF NOT EXISTS idx_embeddings_users_vector_hnsw
|
|
46
|
+
ON embeddings_users
|
|
47
|
+
USING hnsw (embedding vector_cosine_ops);
|