remdb 0.3.127__py3-none-any.whl → 0.3.172__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of remdb might be problematic. Click here for more details.

Files changed (62) hide show
  1. rem/agentic/agents/__init__.py +16 -0
  2. rem/agentic/agents/agent_manager.py +311 -0
  3. rem/agentic/context.py +81 -3
  4. rem/agentic/context_builder.py +36 -9
  5. rem/agentic/mcp/tool_wrapper.py +132 -15
  6. rem/agentic/providers/phoenix.py +371 -108
  7. rem/agentic/providers/pydantic_ai.py +163 -45
  8. rem/agentic/schema.py +8 -4
  9. rem/api/deps.py +3 -5
  10. rem/api/main.py +22 -3
  11. rem/api/mcp_router/resources.py +15 -10
  12. rem/api/mcp_router/server.py +2 -0
  13. rem/api/mcp_router/tools.py +94 -2
  14. rem/api/middleware/tracking.py +5 -5
  15. rem/api/routers/auth.py +349 -6
  16. rem/api/routers/chat/completions.py +5 -3
  17. rem/api/routers/chat/streaming.py +95 -22
  18. rem/api/routers/messages.py +24 -15
  19. rem/auth/__init__.py +13 -3
  20. rem/auth/jwt.py +352 -0
  21. rem/auth/middleware.py +115 -10
  22. rem/auth/providers/__init__.py +4 -1
  23. rem/auth/providers/email.py +215 -0
  24. rem/cli/commands/configure.py +3 -4
  25. rem/cli/commands/experiments.py +226 -50
  26. rem/cli/commands/session.py +336 -0
  27. rem/cli/dreaming.py +2 -2
  28. rem/cli/main.py +2 -0
  29. rem/models/core/experiment.py +58 -14
  30. rem/models/entities/__init__.py +4 -0
  31. rem/models/entities/ontology.py +1 -1
  32. rem/models/entities/ontology_config.py +1 -1
  33. rem/models/entities/subscriber.py +175 -0
  34. rem/models/entities/user.py +1 -0
  35. rem/schemas/agents/core/agent-builder.yaml +235 -0
  36. rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
  37. rem/schemas/agents/examples/contract-extractor.yaml +1 -1
  38. rem/schemas/agents/examples/cv-parser.yaml +1 -1
  39. rem/services/__init__.py +3 -1
  40. rem/services/content/service.py +4 -3
  41. rem/services/email/__init__.py +10 -0
  42. rem/services/email/service.py +513 -0
  43. rem/services/email/templates.py +360 -0
  44. rem/services/postgres/README.md +38 -0
  45. rem/services/postgres/diff_service.py +19 -3
  46. rem/services/postgres/pydantic_to_sqlalchemy.py +45 -13
  47. rem/services/postgres/repository.py +5 -4
  48. rem/services/session/compression.py +113 -50
  49. rem/services/session/reload.py +14 -7
  50. rem/services/user_service.py +41 -9
  51. rem/settings.py +292 -5
  52. rem/sql/migrations/001_install.sql +1 -1
  53. rem/sql/migrations/002_install_models.sql +91 -91
  54. rem/sql/migrations/005_schema_update.sql +145 -0
  55. rem/utils/README.md +45 -0
  56. rem/utils/files.py +157 -1
  57. rem/utils/schema_loader.py +45 -7
  58. rem/utils/vision.py +1 -1
  59. {remdb-0.3.127.dist-info → remdb-0.3.172.dist-info}/METADATA +7 -5
  60. {remdb-0.3.127.dist-info → remdb-0.3.172.dist-info}/RECORD +62 -52
  61. {remdb-0.3.127.dist-info → remdb-0.3.172.dist-info}/WHEEL +0 -0
  62. {remdb-0.3.127.dist-info → remdb-0.3.172.dist-info}/entry_points.txt +0 -0
rem/settings.py CHANGED
@@ -21,8 +21,8 @@ Example .env file:
21
21
  LLM__OPENAI_API_KEY=sk-...
22
22
  LLM__ANTHROPIC_API_KEY=sk-ant-...
23
23
 
24
- # Database (port 5050 for Docker Compose)
25
- POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5050/rem
24
+ # Database (port 5051 for Docker Compose prebuilt, 5050 for local dev)
25
+ POSTGRES__CONNECTION_STRING=postgresql://rem:rem@localhost:5051/rem
26
26
  POSTGRES__POOL_MIN_SIZE=5
27
27
  POSTGRES__POOL_MAX_SIZE=20
28
28
  POSTGRES__STATEMENT_TIMEOUT=30000
@@ -77,6 +77,7 @@ class LLMSettings(BaseSettings):
77
77
  LLM__ANTHROPIC_API_KEY or ANTHROPIC_API_KEY - Anthropic API key
78
78
  LLM__EMBEDDING_PROVIDER or EMBEDDING_PROVIDER - Default embedding provider (openai)
79
79
  LLM__EMBEDDING_MODEL or EMBEDDING_MODEL - Default embedding model name
80
+ LLM__DEFAULT_STRUCTURED_OUTPUT - Default structured output mode (False = streaming text)
80
81
  """
81
82
 
82
83
  model_config = SettingsConfigDict(
@@ -138,6 +139,11 @@ class LLMSettings(BaseSettings):
138
139
  description="Default embedding model (provider-specific model name)",
139
140
  )
140
141
 
142
+ default_structured_output: bool = Field(
143
+ default=False,
144
+ description="Default structured output mode for agents. False = streaming text (easier), True = JSON schema validation",
145
+ )
146
+
141
147
  @field_validator("openai_api_key", mode="before")
142
148
  @classmethod
143
149
  def validate_openai_api_key(cls, v):
@@ -464,10 +470,11 @@ class PostgresSettings(BaseSettings):
464
470
  )
465
471
 
466
472
  connection_string: str = Field(
467
- default="postgresql://rem:rem@localhost:5050/rem",
468
- description="PostgreSQL connection string (default uses Docker Compose port 5050)",
473
+ default="postgresql://rem:rem@localhost:5051/rem",
474
+ description="PostgreSQL connection string (default uses Docker Compose prebuilt port 5051)",
469
475
  )
470
476
 
477
+
471
478
  pool_size: int = Field(
472
479
  default=10,
473
480
  description="Connection pool size (deprecated, use pool_min_size/pool_max_size)",
@@ -692,6 +699,91 @@ class S3Settings(BaseSettings):
692
699
  )
693
700
 
694
701
 
702
+ class DataLakeSettings(BaseSettings):
703
+ """
704
+ Data lake settings for experiment and dataset storage.
705
+
706
+ Data Lake Convention:
707
+ The data lake provides a standardized structure for storing datasets,
708
+ experiments, and calibration data in S3. Users bring their own bucket
709
+ and the version is pinned by default to v0 in the path.
710
+
711
+ S3 Path Structure:
712
+ s3://{bucket}/{version}/datasets/
713
+ ├── raw/ # Raw source data + transformers
714
+ │ └── {dataset_name}/ # e.g., cns_drugs, codes, care
715
+ ├── tables/ # Database table data (JSONL)
716
+ │ ├── resources/ # → resources table
717
+ │ │ ├── drugs/{category}/ # Psychotropic drugs
718
+ │ │ ├── care/stages/ # Treatment stages
719
+ │ │ └── crisis/ # Crisis resources
720
+ │ └── codes/ # → codes table
721
+ │ ├── icd10/{category}/ # ICD-10 codes
722
+ │ └── cpt/ # CPT codes
723
+ └── calibration/ # Agent calibration
724
+ ├── experiments/ # Experiment configs + results
725
+ │ └── {agent}/{task}/ # e.g., siggy/risk-assessment
726
+ └── datasets/ # Shared evaluation datasets
727
+
728
+ Experiment Storage:
729
+ - Local: experiments/{agent}/{task}/experiment.yaml
730
+ - S3: s3://{bucket}/{version}/datasets/calibration/experiments/{agent}/{task}/
731
+
732
+ Environment variables:
733
+ DATA_LAKE__BUCKET_NAME - S3 bucket for data lake (required)
734
+ DATA_LAKE__VERSION - Path version prefix (default: v0)
735
+ DATA_LAKE__DATASETS_PREFIX - Datasets directory (default: datasets)
736
+ DATA_LAKE__EXPERIMENTS_PREFIX - Experiments subdirectory (default: experiments)
737
+ """
738
+
739
+ model_config = SettingsConfigDict(
740
+ env_prefix="DATA_LAKE__",
741
+ env_file=".env",
742
+ env_file_encoding="utf-8",
743
+ extra="ignore",
744
+ )
745
+
746
+ bucket_name: str | None = Field(
747
+ default=None,
748
+ description="S3 bucket for data lake storage (user-provided)",
749
+ )
750
+
751
+ version: str = Field(
752
+ default="v0",
753
+ description="API version for data lake paths",
754
+ )
755
+
756
+ datasets_prefix: str = Field(
757
+ default="datasets",
758
+ description="Root directory for datasets in the bucket",
759
+ )
760
+
761
+ experiments_prefix: str = Field(
762
+ default="experiments",
763
+ description="Subdirectory within calibration for experiments",
764
+ )
765
+
766
+ def get_base_uri(self) -> str | None:
767
+ """Get the base S3 URI for the data lake."""
768
+ if not self.bucket_name:
769
+ return None
770
+ return f"s3://{self.bucket_name}/{self.version}/{self.datasets_prefix}"
771
+
772
+ def get_experiment_uri(self, agent: str, task: str = "general") -> str | None:
773
+ """Get the S3 URI for an experiment."""
774
+ base = self.get_base_uri()
775
+ if not base:
776
+ return None
777
+ return f"{base}/calibration/{self.experiments_prefix}/{agent}/{task}"
778
+
779
+ def get_tables_uri(self, table: str = "resources") -> str | None:
780
+ """Get the S3 URI for a table directory."""
781
+ base = self.get_base_uri()
782
+ if not base:
783
+ return None
784
+ return f"{base}/tables/{table}"
785
+
786
+
695
787
  class ChunkingSettings(BaseSettings):
696
788
  """
697
789
  Document chunking settings for semantic text splitting.
@@ -942,7 +1034,7 @@ class ChatSettings(BaseSettings):
942
1034
  - Prevents context window bloat while maintaining conversation continuity
943
1035
 
944
1036
  User Context (on-demand by default):
945
- - Agent system prompt includes: "User ID: {user_id}. To load user profile: Use REM LOOKUP users/{user_id}"
1037
+ - Agent system prompt includes: "User: {email}. To load user profile: Use REM LOOKUP \"{email}\""
946
1038
  - Agent decides whether to load profile based on query
947
1039
  - More efficient for queries that don't need personalization
948
1040
 
@@ -975,6 +1067,8 @@ class APISettings(BaseSettings):
975
1067
  API__RELOAD - Enable auto-reload for development
976
1068
  API__WORKERS - Number of worker processes (production)
977
1069
  API__LOG_LEVEL - Logging level (debug, info, warning, error)
1070
+ API__API_KEY_ENABLED - Enable X-API-Key header authentication
1071
+ API__API_KEY - API key for X-API-Key authentication
978
1072
  """
979
1073
 
980
1074
  model_config = SettingsConfigDict(
@@ -1009,6 +1103,31 @@ class APISettings(BaseSettings):
1009
1103
  description="Logging level (debug, info, warning, error, critical)",
1010
1104
  )
1011
1105
 
1106
+ api_key_enabled: bool = Field(
1107
+ default=False,
1108
+ description=(
1109
+ "Enable X-API-Key header authentication for API endpoints. "
1110
+ "When enabled, requests must include X-API-Key header with valid key. "
1111
+ "This provides simple API key auth independent of OAuth."
1112
+ ),
1113
+ )
1114
+
1115
+ api_key: str | None = Field(
1116
+ default=None,
1117
+ description=(
1118
+ "API key for X-API-Key authentication. Required when api_key_enabled=true. "
1119
+ "Generate with: python -c \"import secrets; print(secrets.token_urlsafe(32))\""
1120
+ ),
1121
+ )
1122
+
1123
+ rate_limit_enabled: bool = Field(
1124
+ default=True,
1125
+ description=(
1126
+ "Enable rate limiting for API endpoints. "
1127
+ "Set to false to disable rate limiting entirely (useful for development)."
1128
+ ),
1129
+ )
1130
+
1012
1131
 
1013
1132
  class ModelsSettings(BaseSettings):
1014
1133
  """
@@ -1366,6 +1485,172 @@ class DBListenerSettings(BaseSettings):
1366
1485
  return [c.strip() for c in self.channels.split(",") if c.strip()]
1367
1486
 
1368
1487
 
1488
+ class EmailSettings(BaseSettings):
1489
+ """
1490
+ Email service settings for SMTP.
1491
+
1492
+ Supports passwordless login via email codes and transactional emails.
1493
+ Uses Gmail SMTP with App Passwords by default.
1494
+
1495
+ Generate app password at: https://myaccount.google.com/apppasswords
1496
+
1497
+ Environment variables:
1498
+ EMAIL__ENABLED - Enable email service (default: false)
1499
+ EMAIL__SMTP_HOST - SMTP server host (default: smtp.gmail.com)
1500
+ EMAIL__SMTP_PORT - SMTP server port (default: 587 for TLS)
1501
+ EMAIL__SENDER_EMAIL - Sender email address
1502
+ EMAIL__SENDER_NAME - Sender display name
1503
+ EMAIL__APP_PASSWORD - Gmail app password (from secrets)
1504
+ EMAIL__USE_TLS - Use TLS encryption (default: true)
1505
+ EMAIL__LOGIN_CODE_EXPIRY_MINUTES - Login code expiry (default: 10)
1506
+
1507
+ Branding environment variables (for email templates):
1508
+ EMAIL__APP_NAME - Application name in emails (default: REM)
1509
+ EMAIL__LOGO_URL - Logo URL for email templates (40x40 recommended)
1510
+ EMAIL__TAGLINE - Tagline shown in email footer
1511
+ EMAIL__WEBSITE_URL - Main website URL for email links
1512
+ EMAIL__PRIVACY_URL - Privacy policy URL for email footer
1513
+ EMAIL__TERMS_URL - Terms of service URL for email footer
1514
+ """
1515
+
1516
+ model_config = SettingsConfigDict(
1517
+ env_prefix="EMAIL__",
1518
+ env_file=".env",
1519
+ env_file_encoding="utf-8",
1520
+ extra="ignore",
1521
+ )
1522
+
1523
+ enabled: bool = Field(
1524
+ default=False,
1525
+ description="Enable email service (requires app_password to be set)",
1526
+ )
1527
+
1528
+ smtp_host: str = Field(
1529
+ default="smtp.gmail.com",
1530
+ description="SMTP server host",
1531
+ )
1532
+
1533
+ smtp_port: int = Field(
1534
+ default=587,
1535
+ description="SMTP server port (587 for TLS, 465 for SSL)",
1536
+ )
1537
+
1538
+ sender_email: str = Field(
1539
+ default="",
1540
+ description="Sender email address",
1541
+ )
1542
+
1543
+ sender_name: str = Field(
1544
+ default="REM",
1545
+ description="Sender display name",
1546
+ )
1547
+
1548
+ # Branding settings for email templates
1549
+ app_name: str = Field(
1550
+ default="REM",
1551
+ description="Application name shown in email templates",
1552
+ )
1553
+
1554
+ logo_url: str | None = Field(
1555
+ default=None,
1556
+ description="Logo URL for email templates (40x40 recommended)",
1557
+ )
1558
+
1559
+ tagline: str = Field(
1560
+ default="Your AI-powered platform",
1561
+ description="Tagline shown in email footer",
1562
+ )
1563
+
1564
+ website_url: str = Field(
1565
+ default="https://rem.ai",
1566
+ description="Main website URL for email links",
1567
+ )
1568
+
1569
+ privacy_url: str = Field(
1570
+ default="https://rem.ai/privacy",
1571
+ description="Privacy policy URL for email footer",
1572
+ )
1573
+
1574
+ terms_url: str = Field(
1575
+ default="https://rem.ai/terms",
1576
+ description="Terms of service URL for email footer",
1577
+ )
1578
+
1579
+ app_password: str | None = Field(
1580
+ default=None,
1581
+ description="Gmail app password for SMTP authentication",
1582
+ )
1583
+
1584
+ use_tls: bool = Field(
1585
+ default=True,
1586
+ description="Use TLS encryption for SMTP",
1587
+ )
1588
+
1589
+ login_code_expiry_minutes: int = Field(
1590
+ default=10,
1591
+ description="Login code expiry in minutes",
1592
+ )
1593
+
1594
+ trusted_email_domains: str = Field(
1595
+ default="",
1596
+ description=(
1597
+ "Comma-separated list of trusted email domains for new user registration. "
1598
+ "Existing users can always login regardless of domain. "
1599
+ "New users must have an email from a trusted domain. "
1600
+ "Empty string means all domains are allowed. "
1601
+ "Example: 'siggymd.ai,example.com'"
1602
+ ),
1603
+ )
1604
+
1605
+ @property
1606
+ def trusted_domain_list(self) -> list[str]:
1607
+ """Get trusted domains as a list, filtering empty strings."""
1608
+ if not self.trusted_email_domains:
1609
+ return []
1610
+ return [d.strip().lower() for d in self.trusted_email_domains.split(",") if d.strip()]
1611
+
1612
+ def is_domain_trusted(self, email: str) -> bool:
1613
+ """Check if an email's domain is in the trusted list.
1614
+
1615
+ Args:
1616
+ email: Email address to check
1617
+
1618
+ Returns:
1619
+ True if domain is trusted (or if no trusted domains configured)
1620
+ """
1621
+ domains = self.trusted_domain_list
1622
+ if not domains:
1623
+ # No restrictions configured
1624
+ return True
1625
+
1626
+ email_domain = email.lower().split("@")[-1].strip()
1627
+ return email_domain in domains
1628
+
1629
+ @property
1630
+ def is_configured(self) -> bool:
1631
+ """Check if email service is properly configured."""
1632
+ return bool(self.sender_email and self.app_password)
1633
+
1634
+ @property
1635
+ def template_kwargs(self) -> dict:
1636
+ """
1637
+ Get branding kwargs for email templates.
1638
+
1639
+ Returns a dict that can be passed to template functions:
1640
+ login_code_template(..., **settings.email.template_kwargs)
1641
+ """
1642
+ kwargs = {
1643
+ "app_name": self.app_name,
1644
+ "tagline": self.tagline,
1645
+ "website_url": self.website_url,
1646
+ "privacy_url": self.privacy_url,
1647
+ "terms_url": self.terms_url,
1648
+ }
1649
+ if self.logo_url:
1650
+ kwargs["logo_url"] = self.logo_url
1651
+ return kwargs
1652
+
1653
+
1369
1654
  class TestSettings(BaseSettings):
1370
1655
  """
1371
1656
  Test environment settings.
@@ -1473,12 +1758,14 @@ class Settings(BaseSettings):
1473
1758
  migration: MigrationSettings = Field(default_factory=MigrationSettings)
1474
1759
  storage: StorageSettings = Field(default_factory=StorageSettings)
1475
1760
  s3: S3Settings = Field(default_factory=S3Settings)
1761
+ data_lake: DataLakeSettings = Field(default_factory=DataLakeSettings)
1476
1762
  git: GitSettings = Field(default_factory=GitSettings)
1477
1763
  sqs: SQSSettings = Field(default_factory=SQSSettings)
1478
1764
  db_listener: DBListenerSettings = Field(default_factory=DBListenerSettings)
1479
1765
  chunking: ChunkingSettings = Field(default_factory=ChunkingSettings)
1480
1766
  content: ContentSettings = Field(default_factory=ContentSettings)
1481
1767
  schema_search: SchemaSettings = Field(default_factory=SchemaSettings)
1768
+ email: EmailSettings = Field(default_factory=EmailSettings)
1482
1769
  test: TestSettings = Field(default_factory=TestSettings)
1483
1770
 
1484
1771
 
@@ -657,7 +657,7 @@ BEGIN
657
657
  MIN(msg_counts.first_msg)::TIMESTAMP AS first_message_at,
658
658
  MAX(msg_counts.last_msg)::TIMESTAMP AS last_message_at
659
659
  FROM shared_sessions ss
660
- LEFT JOIN users u ON u.user_id = ss.owner_user_id AND u.tenant_id = ss.tenant_id
660
+ LEFT JOIN users u ON u.id::text = ss.owner_user_id AND u.tenant_id = ss.tenant_id
661
661
  LEFT JOIN (
662
662
  SELECT
663
663
  m.session_id,