remdb 0.3.118__py3-none-any.whl → 0.3.141__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (40) hide show
  1. rem/agentic/agents/sse_simulator.py +2 -0
  2. rem/agentic/context.py +23 -3
  3. rem/agentic/mcp/tool_wrapper.py +126 -15
  4. rem/agentic/otel/setup.py +1 -0
  5. rem/agentic/providers/phoenix.py +371 -108
  6. rem/agentic/providers/pydantic_ai.py +122 -43
  7. rem/agentic/schema.py +4 -1
  8. rem/api/mcp_router/tools.py +13 -2
  9. rem/api/routers/chat/completions.py +250 -4
  10. rem/api/routers/chat/models.py +81 -7
  11. rem/api/routers/chat/otel_utils.py +33 -0
  12. rem/api/routers/chat/sse_events.py +17 -1
  13. rem/api/routers/chat/streaming.py +35 -1
  14. rem/api/routers/feedback.py +134 -14
  15. rem/cli/commands/cluster.py +590 -82
  16. rem/cli/commands/configure.py +3 -4
  17. rem/cli/commands/experiments.py +436 -30
  18. rem/cli/commands/session.py +336 -0
  19. rem/cli/dreaming.py +2 -2
  20. rem/cli/main.py +2 -0
  21. rem/config.py +8 -1
  22. rem/models/core/experiment.py +54 -0
  23. rem/models/entities/ontology.py +1 -1
  24. rem/models/entities/ontology_config.py +1 -1
  25. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  26. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  27. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  28. rem/services/phoenix/client.py +59 -18
  29. rem/services/session/compression.py +7 -0
  30. rem/settings.py +236 -13
  31. rem/sql/migrations/002_install_models.sql +91 -91
  32. rem/sql/migrations/004_cache_system.sql +1 -1
  33. rem/utils/schema_loader.py +94 -3
  34. rem/utils/vision.py +1 -1
  35. rem/workers/__init__.py +2 -1
  36. rem/workers/db_listener.py +579 -0
  37. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/METADATA +156 -144
  38. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/RECORD +40 -37
  39. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/WHEEL +0 -0
  40. {remdb-0.3.118.dist-info → remdb-0.3.141.dist-info}/entry_points.txt +0 -0
@@ -170,12 +170,16 @@ class SessionMessageStore:
170
170
  entity_key = truncate_key(f"session-{session_id}-msg-{message_index}")
171
171
 
172
172
  # Create Message entity for assistant response
173
+ # Use pre-generated id from message dict if available (for frontend feedback)
173
174
  msg = Message(
175
+ id=message.get("id"), # Use pre-generated ID if provided
174
176
  content=message.get("content", ""),
175
177
  message_type=message.get("role", "assistant"),
176
178
  session_id=session_id,
177
179
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
178
180
  user_id=user_id or self.user_id,
181
+ trace_id=message.get("trace_id"),
182
+ span_id=message.get("span_id"),
179
183
  metadata={
180
184
  "message_index": message_index,
181
185
  "entity_key": entity_key, # Store entity key for LOOKUP
@@ -284,11 +288,14 @@ class SessionMessageStore:
284
288
  # Short assistant messages, user messages, and system messages stored as-is
285
289
  # Store ALL messages in database for full audit trail
286
290
  msg = Message(
291
+ id=message.get("id"), # Use pre-generated ID if provided
287
292
  content=content,
288
293
  message_type=message.get("role", "user"),
289
294
  session_id=session_id,
290
295
  tenant_id=self.user_id, # Set tenant_id to user_id (application scoped to user)
291
296
  user_id=user_id or self.user_id,
297
+ trace_id=message.get("trace_id"),
298
+ span_id=message.get("span_id"),
292
299
  metadata={
293
300
  "message_index": idx,
294
301
  "timestamp": message.get("timestamp"),
rem/settings.py CHANGED
@@ -33,14 +33,15 @@ Example .env file:
33
33
  AUTH__OIDC_CLIENT_ID=your-client-id
34
34
  AUTH__SESSION_SECRET=your-secret-key
35
35
 
36
- # OpenTelemetry (disabled by default)
36
+ # OpenTelemetry (disabled by default - enable via env var when collector available)
37
+ # Standard OTLP collector ports: 4317 (gRPC), 4318 (HTTP)
37
38
  OTEL__ENABLED=false
38
39
  OTEL__SERVICE_NAME=rem-api
39
- OTEL__COLLECTOR_ENDPOINT=http://localhost:4318
40
- OTEL__PROTOCOL=http
40
+ OTEL__COLLECTOR_ENDPOINT=http://localhost:4317
41
+ OTEL__PROTOCOL=grpc
41
42
 
42
- # Arize Phoenix (disabled by default)
43
- PHOENIX__ENABLED=false
43
+ # Arize Phoenix (enabled by default - can be disabled via env var)
44
+ PHOENIX__ENABLED=true
44
45
  PHOENIX__COLLECTOR_ENDPOINT=http://localhost:6006/v1/traces
45
46
  PHOENIX__PROJECT_NAME=rem
46
47
 
@@ -241,6 +242,11 @@ class OTELSettings(BaseSettings):
241
242
  description="Export timeout in milliseconds",
242
243
  )
243
244
 
245
+ insecure: bool = Field(
246
+ default=True,
247
+ description="Use insecure (non-TLS) gRPC connection (default: True for local dev)",
248
+ )
249
+
244
250
 
245
251
  class PhoenixSettings(BaseSettings):
246
252
  """
@@ -267,8 +273,8 @@ class PhoenixSettings(BaseSettings):
267
273
  )
268
274
 
269
275
  enabled: bool = Field(
270
- default=False,
271
- description="Enable Phoenix integration (disabled by default for local dev)",
276
+ default=True,
277
+ description="Enable Phoenix integration (enabled by default)",
272
278
  )
273
279
 
274
280
  base_url: str = Field(
@@ -686,6 +692,91 @@ class S3Settings(BaseSettings):
686
692
  )
687
693
 
688
694
 
695
+ class DataLakeSettings(BaseSettings):
696
+ """
697
+ Data lake settings for experiment and dataset storage.
698
+
699
+ Data Lake Convention:
700
+ The data lake provides a standardized structure for storing datasets,
701
+ experiments, and calibration data in S3. Users bring their own bucket
702
+ and the version is pinned by default to v0 in the path.
703
+
704
+ S3 Path Structure:
705
+ s3://{bucket}/{version}/datasets/
706
+ ├── raw/ # Raw source data + transformers
707
+ │ └── {dataset_name}/ # e.g., cns_drugs, codes, care
708
+ ├── tables/ # Database table data (JSONL)
709
+ │ ├── resources/ # → resources table
710
+ │ │ ├── drugs/{category}/ # Psychotropic drugs
711
+ │ │ ├── care/stages/ # Treatment stages
712
+ │ │ └── crisis/ # Crisis resources
713
+ │ └── codes/ # → codes table
714
+ │ ├── icd10/{category}/ # ICD-10 codes
715
+ │ └── cpt/ # CPT codes
716
+ └── calibration/ # Agent calibration
717
+ ├── experiments/ # Experiment configs + results
718
+ │ └── {agent}/{task}/ # e.g., siggy/risk-assessment
719
+ └── datasets/ # Shared evaluation datasets
720
+
721
+ Experiment Storage:
722
+ - Local: experiments/{agent}/{task}/experiment.yaml
723
+ - S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
724
+
725
+ Environment variables:
726
+ DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
727
+ DATA_LAKE__VERSION - Path version prefix (default: v0)
728
+ DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
729
+ DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
730
+ """
731
+
732
+ model_config = SettingsConfigDict(
733
+ env_prefix="DATA_LAKE__",
734
+ env_file=".env",
735
+ env_file_encoding="utf-8",
736
+ extra="ignore",
737
+ )
738
+
739
+ bucket_name: str | None = Field(
740
+ default=None,
741
+ description="S3 bucket for data lake storage (user-provided)",
742
+ )
743
+
744
+ version: str = Field(
745
+ default="v0",
746
+ description="API version for data lake paths",
747
+ )
748
+
749
+ datasets_prefix: str = Field(
750
+ default="datasets",
751
+ description="Root directory for datasets in the bucket",
752
+ )
753
+
754
+ experiments_prefix: str = Field(
755
+ default="experiments",
756
+ description="Subdirectory within calibration for experiments",
757
+ )
758
+
759
+ def get_base_uri(self) -> str | None:
760
+ """Get the base S3 URI for the data lake."""
761
+ if not self.bucket_name:
762
+ return None
763
+ return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
764
+
765
+ def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
766
+ """Get the S3 URI for an experiment."""
767
+ base = self.get_base_uri()
768
+ if not base:
769
+ return None
770
+ return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
771
+
772
+ def get_tables_uri(self, table: str = "resources") -> str | None:
773
+ """Get the S3 URI for a table directory."""
774
+ base = self.get_base_uri()
775
+ if not base:
776
+ return None
777
+ return f"{base}/tables/{table}"
778
+
779
+
689
780
  class ChunkingSettings(BaseSettings):
690
781
  """
691
782
  Document chunking settings for semantic text splitting.
@@ -1051,10 +1142,26 @@ class ModelsSettings(BaseSettings):
1051
1142
 
1052
1143
  @property
1053
1144
  def module_list(self) -> list[str]:
1054
- """Get modules as a list, filtering empty strings."""
1055
- if not self.import_modules:
1056
- return []
1057
- return [m.strip() for m in self.import_modules.split(";") if m.strip()]
1145
+ """
1146
+ Get modules as a list, filtering empty strings.
1147
+
1148
+ Auto-detects ./models folder if it exists and is importable.
1149
+ """
1150
+ modules = []
1151
+ if self.import_modules:
1152
+ modules = [m.strip() for m in self.import_modules.split(";") if m.strip()]
1153
+
1154
+ # Auto-detect ./models if it exists and is a Python package (convention over configuration)
1155
+ from pathlib import Path
1156
+
1157
+ models_path = Path("./models")
1158
+ if models_path.exists() and models_path.is_dir():
1159
+ # Check if it's a Python package (has __init__.py)
1160
+ if (models_path / "__init__.py").exists():
1161
+ if "models" not in modules:
1162
+ modules.insert(0, "models")
1163
+
1164
+ return modules
1058
1165
 
1059
1166
 
1060
1167
  class SchemaSettings(BaseSettings):
@@ -1240,6 +1347,110 @@ class GitSettings(BaseSettings):
1240
1347
  )
1241
1348
 
1242
1349
 
1350
+ class DBListenerSettings(BaseSettings):
1351
+ """
1352
+ PostgreSQL LISTEN/NOTIFY database listener settings.
1353
+
1354
+ The DB Listener is a lightweight worker that subscribes to PostgreSQL
1355
+ NOTIFY events and dispatches them to external systems (SQS, REST, custom).
1356
+
1357
+ Architecture:
1358
+ - Single-replica deployment (to avoid duplicate processing)
1359
+ - Dedicated connection for LISTEN (not from connection pool)
1360
+ - Automatic reconnection with exponential backoff
1361
+ - Graceful shutdown on SIGTERM
1362
+
1363
+ Use Cases:
1364
+ - Sync data changes to external systems (Phoenix, webhooks)
1365
+ - Trigger async jobs without polling
1366
+ - Event-driven architectures with PostgreSQL as event source
1367
+
1368
+ Example PostgreSQL trigger:
1369
+ CREATE OR REPLACE FUNCTION notify_feedback_insert()
1370
+ RETURNS TRIGGER AS $$
1371
+ BEGIN
1372
+ PERFORM pg_notify('feedback_sync', json_build_object(
1373
+ 'id', NEW.id,
1374
+ 'table', 'feedbacks',
1375
+ 'action', 'insert'
1376
+ )::text);
1377
+ RETURN NEW;
1378
+ END;
1379
+ $$ LANGUAGE plpgsql;
1380
+
1381
+ Environment variables:
1382
+ DB_LISTENER__ENABLED - Enable the listener worker (default: false)
1383
+ DB_LISTENER__CHANNELS - Comma-separated PostgreSQL channels to listen on
1384
+ DB_LISTENER__HANDLER_TYPE - Handler type: 'sqs', 'rest', or 'custom'
1385
+ DB_LISTENER__SQS_QUEUE_URL - SQS queue URL (for handler_type=sqs)
1386
+ DB_LISTENER__REST_ENDPOINT - REST endpoint URL (for handler_type=rest)
1387
+ DB_LISTENER__RECONNECT_DELAY - Initial reconnect delay in seconds
1388
+ DB_LISTENER__MAX_RECONNECT_DELAY - Maximum reconnect delay in seconds
1389
+
1390
+ References:
1391
+ - PostgreSQL NOTIFY: https://www.postgresql.org/docs/current/sql-notify.html
1392
+ - Brandur's Notifier: https://brandur.org/notifier
1393
+ """
1394
+
1395
+ model_config = SettingsConfigDict(
1396
+ env_prefix="DB_LISTENER__",
1397
+ env_file=".env",
1398
+ env_file_encoding="utf-8",
1399
+ extra="ignore",
1400
+ )
1401
+
1402
+ enabled: bool = Field(
1403
+ default=False,
1404
+ description="Enable the DB Listener worker (disabled by default)",
1405
+ )
1406
+
1407
+ channels: str = Field(
1408
+ default="",
1409
+ description=(
1410
+ "Comma-separated list of PostgreSQL channels to LISTEN on. "
1411
+ "Example: 'feedback_sync,entity_update,user_events'"
1412
+ ),
1413
+ )
1414
+
1415
+ handler_type: str = Field(
1416
+ default="rest",
1417
+ description=(
1418
+ "Handler type for dispatching notifications. Options: "
1419
+ "'sqs' (publish to SQS), 'rest' (POST to endpoint), 'custom' (Python handlers)"
1420
+ ),
1421
+ )
1422
+
1423
+ sqs_queue_url: str = Field(
1424
+ default="",
1425
+ description="SQS queue URL for handler_type='sqs'",
1426
+ )
1427
+
1428
+ rest_endpoint: str = Field(
1429
+ default="http://localhost:8000/api/v1/internal/events",
1430
+ description=(
1431
+ "REST endpoint URL for handler_type='rest'. "
1432
+ "Receives POST with {channel, payload, source} JSON body."
1433
+ ),
1434
+ )
1435
+
1436
+ reconnect_delay: float = Field(
1437
+ default=1.0,
1438
+ description="Initial delay (seconds) between reconnection attempts",
1439
+ )
1440
+
1441
+ max_reconnect_delay: float = Field(
1442
+ default=60.0,
1443
+ description="Maximum delay (seconds) between reconnection attempts (exponential backoff cap)",
1444
+ )
1445
+
1446
+ @property
1447
+ def channel_list(self) -> list[str]:
1448
+ """Get channels as a list, filtering empty strings."""
1449
+ if not self.channels:
1450
+ return []
1451
+ return [c.strip() for c in self.channels.split(",") if c.strip()]
1452
+
1453
+
1243
1454
  class TestSettings(BaseSettings):
1244
1455
  """
1245
1456
  Test environment settings.
@@ -1347,18 +1558,30 @@ class Settings(BaseSettings):
1347
1558
  migration: MigrationSettings = Field(default_factory=MigrationSettings)
1348
1559
  storage: StorageSettings = Field(default_factory=StorageSettings)
1349
1560
  s3: S3Settings = Field(default_factory=S3Settings)
1561
+ data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
1350
1562
  git: GitSettings = Field(default_factory=GitSettings)
1351
1563
  sqs: SQSSettings = Field(default_factory=SQSSettings)
1564
+ db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
1352
1565
  chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
1353
1566
  content: ContentSettings = Field(default_factory=ContentSettings)
1354
1567
  schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
1355
1568
  test: TestSettings = Field(default_factory=TestSettings)
1356
1569
 
1357
1570
 
1571
+ # Auto-load .env file from current directory if it exists
1572
+ # This happens BEFORE config file loading, so .env takes precedence
1573
+ from pathlib import Path
1574
+ from dotenv import load_dotenv
1575
+
1576
+ _dotenv_path = Path(".env")
1577
+ if _dotenv_path.exists():
1578
+ load_dotenv(_dotenv_path, override=False) # Don't override existing env vars
1579
+ logger.debug(f"Loaded environment from {_dotenv_path.resolve()}")
1580
+
1358
1581
  # Load configuration from ~/.rem/config.yaml before initializing settings
1359
1582
  # This allows user configuration to be merged with environment variables
1360
- # Set REM_SKIP_CONFIG_FILE=true to disable (useful for development with .env)
1361
- if not os.getenv("REM_SKIP_CONFIG_FILE", "").lower() in ("true", "1", "yes"):
1583
+ # Set REM_SKIP_CONFIG=1 to disable (useful for development with .env)
1584
+ if not os.getenv("REM_SKIP_CONFIG", "").lower() in ("true", "1", "yes"):
1362
1585
  try:
1363
1586
  from rem.config import load_config, merge_config_to_env
1364
1587