PyPI - remdb - Versions diffs - 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl - Mend

remdb 0.3.14py3-none-any.whl → 0.3.157py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (112) hide show

rem/agentic/README.md +76 -0
rem/agentic/__init__.py +15 -0
rem/agentic/agents/__init__.py +32 -2
rem/agentic/agents/agent_manager.py +310 -0
rem/agentic/agents/sse_simulator.py +502 -0
rem/agentic/context.py +51 -27
rem/agentic/context_builder.py +5 -3
rem/agentic/llm_provider_models.py +301 -0
rem/agentic/mcp/tool_wrapper.py +155 -18
rem/agentic/otel/setup.py +93 -4
rem/agentic/providers/phoenix.py +371 -108
rem/agentic/providers/pydantic_ai.py +280 -57
rem/agentic/schema.py +361 -21
rem/agentic/tools/rem_tools.py +3 -3
rem/api/README.md +215 -1
rem/api/deps.py +255 -0
rem/api/main.py +132 -40
rem/api/mcp_router/resources.py +1 -1
rem/api/mcp_router/server.py +28 -5
rem/api/mcp_router/tools.py +555 -7
rem/api/routers/admin.py +494 -0
rem/api/routers/auth.py +278 -4
rem/api/routers/chat/completions.py +402 -20
rem/api/routers/chat/models.py +88 -10
rem/api/routers/chat/otel_utils.py +33 -0
rem/api/routers/chat/sse_events.py +542 -0
rem/api/routers/chat/streaming.py +697 -45
rem/api/routers/dev.py +81 -0
rem/api/routers/feedback.py +268 -0
rem/api/routers/messages.py +473 -0
rem/api/routers/models.py +78 -0
rem/api/routers/query.py +360 -0
rem/api/routers/shared_sessions.py +406 -0
rem/auth/__init__.py +13 -3
rem/auth/middleware.py +186 -22
rem/auth/providers/__init__.py +4 -1
rem/auth/providers/email.py +215 -0
rem/cli/commands/README.md +237 -64
rem/cli/commands/cluster.py +1808 -0
rem/cli/commands/configure.py +4 -7
rem/cli/commands/db.py +386 -143
rem/cli/commands/experiments.py +468 -76
rem/cli/commands/process.py +14 -8
rem/cli/commands/schema.py +97 -50
rem/cli/commands/session.py +336 -0
rem/cli/dreaming.py +2 -2
rem/cli/main.py +29 -6
rem/config.py +10 -3
rem/models/core/core_model.py +7 -1
rem/models/core/experiment.py +58 -14
rem/models/core/rem_query.py +5 -2
rem/models/entities/__init__.py +25 -0
rem/models/entities/domain_resource.py +38 -0
rem/models/entities/feedback.py +123 -0
rem/models/entities/message.py +30 -1
rem/models/entities/ontology.py +1 -1
rem/models/entities/ontology_config.py +1 -1
rem/models/entities/session.py +83 -0
rem/models/entities/shared_session.py +180 -0
rem/models/entities/subscriber.py +175 -0
rem/models/entities/user.py +1 -0
rem/registry.py +10 -4
rem/schemas/agents/core/agent-builder.yaml +134 -0
rem/schemas/agents/examples/contract-analyzer.yaml +1 -1
rem/schemas/agents/examples/contract-extractor.yaml +1 -1
rem/schemas/agents/examples/cv-parser.yaml +1 -1
rem/schemas/agents/rem.yaml +7 -3
rem/services/__init__.py +3 -1
rem/services/content/service.py +92 -19
rem/services/email/__init__.py +10 -0
rem/services/email/service.py +459 -0
rem/services/email/templates.py +360 -0
rem/services/embeddings/api.py +4 -4
rem/services/embeddings/worker.py +16 -16
rem/services/phoenix/client.py +154 -14
rem/services/postgres/README.md +197 -15
rem/services/postgres/__init__.py +2 -1
rem/services/postgres/diff_service.py +547 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +470 -140
rem/services/postgres/repository.py +132 -0
rem/services/postgres/schema_generator.py +205 -4
rem/services/postgres/service.py +6 -6
rem/services/rem/parser.py +44 -9
rem/services/rem/service.py +36 -2
rem/services/session/compression.py +137 -51
rem/services/session/reload.py +15 -8
rem/settings.py +515 -27
rem/sql/background_indexes.sql +21 -16
rem/sql/migrations/001_install.sql +387 -54
rem/sql/migrations/002_install_models.sql +2304 -377
rem/sql/migrations/003_optional_extensions.sql +326 -0
rem/sql/migrations/004_cache_system.sql +548 -0
rem/sql/migrations/005_schema_update.sql +145 -0
rem/utils/README.md +45 -0
rem/utils/__init__.py +18 -0
rem/utils/date_utils.py +2 -2
rem/utils/files.py +157 -1
rem/utils/model_helpers.py +156 -1
rem/utils/schema_loader.py +220 -22
rem/utils/sql_paths.py +146 -0
rem/utils/sql_types.py +3 -1
rem/utils/vision.py +1 -1
rem/workers/__init__.py +3 -1
rem/workers/db_listener.py +579 -0
rem/workers/unlogged_maintainer.py +463 -0
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/METADATA +340 -229
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/RECORD +109 -80
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/WHEEL +1 -1
rem/sql/002_install_models.sql +0 -1068
rem/sql/install_models.sql +0 -1051
rem/sql/migrations/003_seed_default_user.sql +0 -48
{remdb-0.3.14.dist-info → remdb-0.3.157.dist-info}/entry_points.txt +0 -0

rem/utils/README.md CHANGED Viewed

@@ -4,6 +4,7 @@
 1. [SQL Types](#sql-types-sql_typespy) - Pydantic to PostgreSQL type mapping
 2. [Embeddings](#embeddings-embeddingspy) - Vector embeddings generation
+3. [Files](#files-filespy) - File utilities and DataFrame I/O
 ## SQL Types (`sql_types.py`)
@@ -581,3 +582,47 @@ This will demonstrate:
 - `sql_types.py` - Use `embedding_provider` in json_schema_extra for TEXT fields
 - OpenAI Embeddings API: https://platform.openai.com/docs/api-reference/embeddings
 - pgvector Documentation: https://github.com/pgvector/pgvector
+---
+## Files (`files.py`)
+File utilities including temporary file handling and DataFrame I/O with automatic format detection.
+### DataFrame I/O
+Read and write DataFrames with format auto-detected from file extension:
+```python
+from rem.utils.files import read_dataframe, write_dataframe
+# Read - format inferred from extension
+df = read_dataframe("data.csv")
+df = read_dataframe("data.parquet")
+df = read_dataframe("data.xlsx")
+# Read from bytes (e.g., from S3)
+df = read_dataframe(content_bytes, filename="data.csv")
+# Write - format inferred from extension
+write_dataframe(df, "output.parquet")
+```
+**Supported formats**: `.csv`, `.tsv`, `.parquet`, `.json`, `.jsonl`, `.avro`, `.xlsx`, `.xls`, `.ods`, `.ipc`, `.arrow`, `.feather`
+Note: Some formats require optional dependencies (e.g., `fastexcel` for Excel).
+### Temporary File Utilities
+```python
+from rem.utils.files import temp_file_from_bytes, temp_directory
+# Create temp file from bytes, auto-cleanup
+with temp_file_from_bytes(pdf_bytes, suffix=".pdf") as tmp_path:
+    result = process_pdf(tmp_path)
+# Create temp directory, auto-cleanup
+with temp_directory() as tmp_dir:
+    # Work with files in tmp_dir
+    pass
+```

rem/utils/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ Utility functions and helpers for the REM system:
 - sql_types: Pydantic to PostgreSQL type mapping
 - embeddings: Vector embeddings generation using requests library
 - user_id: Deterministic UUID generation from email addresses
+- sql_paths: SQL file path resolution for packages and user migrations
 """
 from .embeddings import (
@@ -24,6 +25,15 @@ from .user_id import (
     is_valid_uuid,
     user_id_to_uuid,
 )
+from .sql_paths import (
+    USER_SQL_DIR_CONVENTION,
+    get_package_sql_dir,
+    get_package_migrations_dir,
+    get_user_sql_dir,
+    list_package_migrations,
+    list_user_migrations,
+    list_all_migrations,
+)
 __all__ = [
     # SQL Types
@@ -40,4 +50,12 @@ __all__ = [
     "email_to_user_id",
     "user_id_to_uuid",
     "is_valid_uuid",
+    # SQL Paths
+    "USER_SQL_DIR_CONVENTION",
+    "get_package_sql_dir",
+    "get_package_migrations_dir",
+    "get_user_sql_dir",
+    "list_package_migrations",
+    "list_user_migrations",
+    "list_all_migrations",
 ]

rem/utils/date_utils.py CHANGED Viewed

@@ -14,7 +14,7 @@ Convention:
 See CLAUDE.md Section 1 (Datetime Convention) for details.
 """
-from datetime import datetime, timedelta
+from datetime import UTC, datetime, timedelta
 from typing import Optional
@@ -30,7 +30,7 @@ def utc_now() -> datetime:
         >>> now.tzinfo is None
         True
     """
-    return datetime.utcnow()
+    return datetime.now(UTC).replace(tzinfo=None)
 def to_iso(dt: datetime) -> str:

rem/utils/files.py CHANGED Viewed

@@ -3,13 +3,18 @@ File utilities for consistent file handling throughout REM.
 Provides context managers and helpers for temporary file operations,
 ensuring proper cleanup and consistent patterns.
+Also provides DataFrame I/O utilities using Polars with automatic
+format detection based on file extension.
 """
 import tempfile
 from contextlib import contextmanager
+from io import BytesIO
 from pathlib import Path
-from typing import Generator, Optional
+from typing import Generator, Optional, Union
+import polars as pl
 from loguru import logger
@@ -165,3 +170,154 @@ def safe_delete(path: Path) -> bool:
     except Exception as e:
         logger.warning(f"Failed to delete {path}: {e}")
         return False
+# Extension to Polars reader mapping
+_EXTENSION_READERS = {
+    ".csv": pl.read_csv,
+    ".tsv": lambda p, **kw: pl.read_csv(p, separator="\t", **kw),
+    ".parquet": pl.read_parquet,
+    ".pq": pl.read_parquet,
+    ".json": pl.read_json,
+    ".jsonl": pl.read_ndjson,
+    ".ndjson": pl.read_ndjson,
+    ".avro": pl.read_avro,
+    ".xlsx": pl.read_excel,
+    ".xls": pl.read_excel,
+    ".ods": pl.read_ods,
+    ".ipc": pl.read_ipc,
+    ".arrow": pl.read_ipc,
+    ".feather": pl.read_ipc,
+}
+# Extension to Polars writer mapping
+_EXTENSION_WRITERS = {
+    ".csv": "write_csv",
+    ".tsv": "write_csv",  # with separator="\t"
+    ".parquet": "write_parquet",
+    ".pq": "write_parquet",
+    ".json": "write_json",
+    ".jsonl": "write_ndjson",
+    ".ndjson": "write_ndjson",
+    ".avro": "write_avro",
+    ".xlsx": "write_excel",
+    ".ipc": "write_ipc",
+    ".arrow": "write_ipc",
+    ".feather": "write_ipc",
+}
+def read_dataframe(
+    source: Union[str, Path, bytes],
+    filename: Optional[str] = None,
+    **kwargs,
+) -> pl.DataFrame:
+    """
+    Read a DataFrame from a file, inferring format from extension.
+    Supports all Polars-compatible formats:
+    - CSV (.csv), TSV (.tsv)
+    - Parquet (.parquet, .pq)
+    - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
+    - Avro (.avro)
+    - Excel (.xlsx, .xls)
+    - OpenDocument (.ods)
+    - Arrow IPC (.ipc, .arrow, .feather)
+    Args:
+        source: File path (str/Path) or bytes content
+        filename: Required when source is bytes, to determine format
+        **kwargs: Additional arguments passed to the Polars reader
+    Returns:
+        Polars DataFrame
+    Raises:
+        ValueError: If format cannot be determined or is unsupported
+    Examples:
+        >>> df = read_dataframe("data.csv")
+        >>> df = read_dataframe("data.parquet")
+        >>> df = read_dataframe(csv_bytes, filename="data.csv")
+    """
+    # Determine the file extension
+    if isinstance(source, bytes):
+        if not filename:
+            raise ValueError("filename is required when source is bytes")
+        ext = Path(filename).suffix.lower()
+        # For bytes, we need to wrap in BytesIO
+        file_like = BytesIO(source)
+    else:
+        path = Path(source)
+        ext = path.suffix.lower()
+        file_like = path
+    # Get the appropriate reader
+    reader = _EXTENSION_READERS.get(ext)
+    if reader is None:
+        supported = ", ".join(sorted(_EXTENSION_READERS.keys()))
+        raise ValueError(
+            f"Unsupported file format: {ext}. "
+            f"Supported formats: {supported}"
+        )
+    try:
+        return reader(file_like, **kwargs)
+    except Exception as e:
+        logger.error(f"Failed to read DataFrame from {ext} format: {e}")
+        raise
+def write_dataframe(
+    df: pl.DataFrame,
+    dest: Union[str, Path],
+    **kwargs,
+) -> None:
+    """
+    Write a DataFrame to a file, inferring format from extension.
+    Supports most Polars-writable formats:
+    - CSV (.csv), TSV (.tsv)
+    - Parquet (.parquet, .pq)
+    - JSON (.json), JSONL/NDJSON (.jsonl, .ndjson)
+    - Avro (.avro)
+    - Excel (.xlsx)
+    - Arrow IPC (.ipc, .arrow, .feather)
+    Args:
+        df: Polars DataFrame to write
+        dest: Destination file path
+        **kwargs: Additional arguments passed to the Polars writer
+    Raises:
+        ValueError: If format cannot be determined or is unsupported
+    Examples:
+        >>> write_dataframe(df, "output.csv")
+        >>> write_dataframe(df, "output.parquet")
+        >>> write_dataframe(df, "output.jsonl")
+    """
+    path = Path(dest)
+    ext = path.suffix.lower()
+    writer_method = _EXTENSION_WRITERS.get(ext)
+    if writer_method is None:
+        supported = ", ".join(sorted(_EXTENSION_WRITERS.keys()))
+        raise ValueError(
+            f"Unsupported file format for writing: {ext}. "
+            f"Supported formats: {supported}"
+        )
+    # Ensure parent directory exists
+    ensure_parent_exists(path)
+    # Handle TSV special case
+    if ext == ".tsv":
+        kwargs.setdefault("separator", "\t")
+    try:
+        writer = getattr(df, writer_method)
+        writer(path, **kwargs)
+    except Exception as e:
+        logger.error(f"Failed to write DataFrame to {ext} format: {e}")
+        raise

rem/utils/model_helpers.py CHANGED Viewed

@@ -16,8 +16,12 @@ Embedding Field Detection:
 Table Name Inference:
 1. model_config.json_schema_extra.table_name
 2. CamelCase → snake_case + pluralization
+Model Resolution:
+- model_from_arbitrary_casing: Resolve model class from flexible input casing
 """
+import re
 from typing import Any, Type
 from loguru import logger
@@ -94,7 +98,9 @@ def get_table_name(model: Type[BaseModel]) -> str:
         if isinstance(model_config, dict):
             json_extra = model_config.get("json_schema_extra", {})
             if isinstance(json_extra, dict) and "table_name" in json_extra:
-                return json_extra["table_name"]
+                table_name = json_extra["table_name"]
+                if isinstance(table_name, str):
+                    return table_name
     # Infer from class name
     name = model.__name__
@@ -234,3 +240,152 @@ def get_model_metadata(model: Type[BaseModel]) -> dict[str, Any]:
         "entity_key_field": get_entity_key_field(model),
         "embeddable_fields": get_embeddable_fields(model),
     }
+def normalize_to_title_case(name: str) -> str:
+    """
+    Normalize arbitrary casing to TitleCase (PascalCase).
+    Handles various input formats:
+    - kebab-case: domain-resource → DomainResource
+    - snake_case: domain_resource → DomainResource
+    - lowercase: domainresource → Domainresource (single word)
+    - TitleCase: DomainResource → DomainResource (passthrough)
+    - Mixed: Domain-Resource, DOMAIN_RESOURCE → DomainResource
+    Args:
+        name: Input name in any casing format
+    Returns:
+        TitleCase (PascalCase) version of the name
+    Example:
+        >>> normalize_to_title_case("domain-resource")
+        'DomainResource'
+        >>> normalize_to_title_case("domain_resources")
+        'DomainResources'
+        >>> normalize_to_title_case("DomainResource")
+        'DomainResource'
+    """
+    # If already TitleCase (starts with uppercase, has no delimiters, and has
+    # at least one lowercase letter), return as-is
+    if (
+        name
+        and name[0].isupper()
+        and '-' not in name
+        and '_' not in name
+        and any(c.islower() for c in name)
+    ):
+        return name
+    # Split on common delimiters (hyphen, underscore)
+    parts = re.split(r'[-_]', name)
+    # Capitalize first letter of each part, lowercase the rest
+    normalized_parts = [part.capitalize() for part in parts if part]
+    return "".join(normalized_parts)
+def model_from_arbitrary_casing(
+    name: str,
+    registry: dict[str, Type[BaseModel]] | None = None,
+) -> Type[BaseModel]:
+    """
+    Resolve a model class from arbitrary casing input.
+    REM entity models use strict TitleCase (PascalCase) naming. This function
+    allows flexible input formats while maintaining consistency:
+    Input formats supported:
+    - kebab-case: domain-resource, domain-resources
+    - snake_case: domain_resource, domain_resources
+    - lowercase: resource, domainresource
+    - TitleCase: Resource, DomainResource
+    Args:
+        name: Model name in any supported casing format
+        registry: Optional dict mapping TitleCase names to model classes.
+                  If not provided, uses rem.models.entities module.
+    Returns:
+        The resolved Pydantic model class
+    Raises:
+        ValueError: If no model matches the normalized name
+    Example:
+        >>> model = model_from_arbitrary_casing("domain-resources")
+        >>> model.__name__
+        'DomainResource'
+        >>> model = model_from_arbitrary_casing("Resource")
+        >>> model.__name__
+        'Resource'
+    """
+    # Build default registry from entities module if not provided
+    if registry is None:
+        from rem.models.entities import (
+            DomainResource,
+            Feedback,
+            File,
+            ImageResource,
+            Message,
+            Moment,
+            Ontology,
+            OntologyConfig,
+            Resource,
+            Schema,
+            Session,
+            User,
+        )
+        registry = {
+            "Resource": Resource,
+            "Resources": Resource,  # Plural alias
+            "DomainResource": DomainResource,
+            "DomainResources": DomainResource,  # Plural alias
+            "ImageResource": ImageResource,
+            "ImageResources": ImageResource,
+            "File": File,
+            "Files": File,
+            "Message": Message,
+            "Messages": Message,
+            "Moment": Moment,
+            "Moments": Moment,
+            "Session": Session,
+            "Sessions": Session,
+            "Feedback": Feedback,
+            "User": User,
+            "Users": User,
+            "Schema": Schema,
+            "Schemas": Schema,
+            "Ontology": Ontology,
+            "Ontologies": Ontology,
+            "OntologyConfig": OntologyConfig,
+            "OntologyConfigs": OntologyConfig,
+        }
+    # Normalize input to TitleCase
+    normalized = normalize_to_title_case(name)
+    # Look up in registry
+    if normalized in registry:
+        logger.debug(f"Resolved model '{name}' → {registry[normalized].__name__}")
+        return registry[normalized]
+    # Try without trailing 's' (singular form)
+    if normalized.endswith("s") and normalized[:-1] in registry:
+        logger.debug(f"Resolved model '{name}' → {registry[normalized[:-1]].__name__} (singular)")
+        return registry[normalized[:-1]]
+    # Try with trailing 's' (plural form)
+    plural = normalized + "s"
+    if plural in registry:
+        logger.debug(f"Resolved model '{name}' → {registry[plural].__name__} (plural)")
+        return registry[plural]
+    available = sorted(set(m.__name__ for m in registry.values()))
+    raise ValueError(
+        f"Unknown model: '{name}' (normalized: '{normalized}'). "
+        f"Available models: {', '.join(available)}"
+    )

remdb 0.3.14__py3-none-any.whl → 0.3.157__py3-none-any.whl

remdb 0.3.14py3-none-any.whl → 0.3.157py3-none-any.whl