PyPI - remdb - Versions diffs - 0.2.6__py3-none-any.whl - Mend

remdb 0.2.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of remdb might be problematic. Click here for more details.

Files changed (187) hide show

rem/__init__.py +2 -0
rem/agentic/README.md +650 -0
rem/agentic/__init__.py +39 -0
rem/agentic/agents/README.md +155 -0
rem/agentic/agents/__init__.py +8 -0
rem/agentic/context.py +148 -0
rem/agentic/context_builder.py +329 -0
rem/agentic/mcp/__init__.py +0 -0
rem/agentic/mcp/tool_wrapper.py +107 -0
rem/agentic/otel/__init__.py +5 -0
rem/agentic/otel/setup.py +151 -0
rem/agentic/providers/phoenix.py +674 -0
rem/agentic/providers/pydantic_ai.py +572 -0
rem/agentic/query.py +117 -0
rem/agentic/query_helper.py +89 -0
rem/agentic/schema.py +396 -0
rem/agentic/serialization.py +245 -0
rem/agentic/tools/__init__.py +5 -0
rem/agentic/tools/rem_tools.py +231 -0
rem/api/README.md +420 -0
rem/api/main.py +324 -0
rem/api/mcp_router/prompts.py +182 -0
rem/api/mcp_router/resources.py +536 -0
rem/api/mcp_router/server.py +213 -0
rem/api/mcp_router/tools.py +584 -0
rem/api/routers/auth.py +229 -0
rem/api/routers/chat/__init__.py +5 -0
rem/api/routers/chat/completions.py +281 -0
rem/api/routers/chat/json_utils.py +76 -0
rem/api/routers/chat/models.py +124 -0
rem/api/routers/chat/streaming.py +185 -0
rem/auth/README.md +258 -0
rem/auth/__init__.py +26 -0
rem/auth/middleware.py +100 -0
rem/auth/providers/__init__.py +13 -0
rem/auth/providers/base.py +376 -0
rem/auth/providers/google.py +163 -0
rem/auth/providers/microsoft.py +237 -0
rem/cli/README.md +455 -0
rem/cli/__init__.py +8 -0
rem/cli/commands/README.md +126 -0
rem/cli/commands/__init__.py +3 -0
rem/cli/commands/ask.py +565 -0
rem/cli/commands/configure.py +423 -0
rem/cli/commands/db.py +493 -0
rem/cli/commands/dreaming.py +324 -0
rem/cli/commands/experiments.py +1124 -0
rem/cli/commands/mcp.py +66 -0
rem/cli/commands/process.py +245 -0
rem/cli/commands/schema.py +183 -0
rem/cli/commands/serve.py +106 -0
rem/cli/dreaming.py +363 -0
rem/cli/main.py +88 -0
rem/config.py +237 -0
rem/mcp_server.py +41 -0
rem/models/core/__init__.py +49 -0
rem/models/core/core_model.py +64 -0
rem/models/core/engram.py +333 -0
rem/models/core/experiment.py +628 -0
rem/models/core/inline_edge.py +132 -0
rem/models/core/rem_query.py +243 -0
rem/models/entities/__init__.py +43 -0
rem/models/entities/file.py +57 -0
rem/models/entities/image_resource.py +88 -0
rem/models/entities/message.py +35 -0
rem/models/entities/moment.py +123 -0
rem/models/entities/ontology.py +191 -0
rem/models/entities/ontology_config.py +131 -0
rem/models/entities/resource.py +95 -0
rem/models/entities/schema.py +87 -0
rem/models/entities/user.py +85 -0
rem/py.typed +0 -0
rem/schemas/README.md +507 -0
rem/schemas/__init__.py +6 -0
rem/schemas/agents/README.md +92 -0
rem/schemas/agents/core/moment-builder.yaml +178 -0
rem/schemas/agents/core/rem-query-agent.yaml +226 -0
rem/schemas/agents/core/resource-affinity-assessor.yaml +99 -0
rem/schemas/agents/core/simple-assistant.yaml +19 -0
rem/schemas/agents/core/user-profile-builder.yaml +163 -0
rem/schemas/agents/examples/contract-analyzer.yaml +317 -0
rem/schemas/agents/examples/contract-extractor.yaml +134 -0
rem/schemas/agents/examples/cv-parser.yaml +263 -0
rem/schemas/agents/examples/hello-world.yaml +37 -0
rem/schemas/agents/examples/query.yaml +54 -0
rem/schemas/agents/examples/simple.yaml +21 -0
rem/schemas/agents/examples/test.yaml +29 -0
rem/schemas/agents/rem.yaml +128 -0
rem/schemas/evaluators/hello-world/default.yaml +77 -0
rem/schemas/evaluators/rem/faithfulness.yaml +219 -0
rem/schemas/evaluators/rem/lookup-correctness.yaml +182 -0
rem/schemas/evaluators/rem/retrieval-precision.yaml +199 -0
rem/schemas/evaluators/rem/retrieval-recall.yaml +211 -0
rem/schemas/evaluators/rem/search-correctness.yaml +192 -0
rem/services/__init__.py +16 -0
rem/services/audio/INTEGRATION.md +308 -0
rem/services/audio/README.md +376 -0
rem/services/audio/__init__.py +15 -0
rem/services/audio/chunker.py +354 -0
rem/services/audio/transcriber.py +259 -0
rem/services/content/README.md +1269 -0
rem/services/content/__init__.py +5 -0
rem/services/content/providers.py +806 -0
rem/services/content/service.py +657 -0
rem/services/dreaming/README.md +230 -0
rem/services/dreaming/__init__.py +53 -0
rem/services/dreaming/affinity_service.py +336 -0
rem/services/dreaming/moment_service.py +264 -0
rem/services/dreaming/ontology_service.py +54 -0
rem/services/dreaming/user_model_service.py +297 -0
rem/services/dreaming/utils.py +39 -0
rem/services/embeddings/__init__.py +11 -0
rem/services/embeddings/api.py +120 -0
rem/services/embeddings/worker.py +421 -0
rem/services/fs/README.md +662 -0
rem/services/fs/__init__.py +62 -0
rem/services/fs/examples.py +206 -0
rem/services/fs/examples_paths.py +204 -0
rem/services/fs/git_provider.py +935 -0
rem/services/fs/local_provider.py +760 -0
rem/services/fs/parsing-hooks-examples.md +172 -0
rem/services/fs/paths.py +276 -0
rem/services/fs/provider.py +460 -0
rem/services/fs/s3_provider.py +1042 -0
rem/services/fs/service.py +186 -0
rem/services/git/README.md +1075 -0
rem/services/git/__init__.py +17 -0
rem/services/git/service.py +469 -0
rem/services/phoenix/EXPERIMENT_DESIGN.md +1146 -0
rem/services/phoenix/README.md +453 -0
rem/services/phoenix/__init__.py +46 -0
rem/services/phoenix/client.py +686 -0
rem/services/phoenix/config.py +88 -0
rem/services/phoenix/prompt_labels.py +477 -0
rem/services/postgres/README.md +575 -0
rem/services/postgres/__init__.py +23 -0
rem/services/postgres/migration_service.py +427 -0
rem/services/postgres/pydantic_to_sqlalchemy.py +232 -0
rem/services/postgres/register_type.py +352 -0
rem/services/postgres/repository.py +337 -0
rem/services/postgres/schema_generator.py +379 -0
rem/services/postgres/service.py +802 -0
rem/services/postgres/sql_builder.py +354 -0
rem/services/rem/README.md +304 -0
rem/services/rem/__init__.py +23 -0
rem/services/rem/exceptions.py +71 -0
rem/services/rem/executor.py +293 -0
rem/services/rem/parser.py +145 -0
rem/services/rem/queries.py +196 -0
rem/services/rem/query.py +371 -0
rem/services/rem/service.py +527 -0
rem/services/session/README.md +374 -0
rem/services/session/__init__.py +6 -0
rem/services/session/compression.py +360 -0
rem/services/session/reload.py +77 -0
rem/settings.py +1235 -0
rem/sql/002_install_models.sql +1068 -0
rem/sql/background_indexes.sql +42 -0
rem/sql/install_models.sql +1038 -0
rem/sql/migrations/001_install.sql +503 -0
rem/sql/migrations/002_install_models.sql +1202 -0
rem/utils/AGENTIC_CHUNKING.md +597 -0
rem/utils/README.md +583 -0
rem/utils/__init__.py +43 -0
rem/utils/agentic_chunking.py +622 -0
rem/utils/batch_ops.py +343 -0
rem/utils/chunking.py +108 -0
rem/utils/clip_embeddings.py +276 -0
rem/utils/dict_utils.py +98 -0
rem/utils/embeddings.py +423 -0
rem/utils/examples/embeddings_example.py +305 -0
rem/utils/examples/sql_types_example.py +202 -0
rem/utils/markdown.py +16 -0
rem/utils/model_helpers.py +236 -0
rem/utils/schema_loader.py +229 -0
rem/utils/sql_types.py +348 -0
rem/utils/user_id.py +81 -0
rem/utils/vision.py +330 -0
rem/workers/README.md +506 -0
rem/workers/__init__.py +5 -0
rem/workers/dreaming.py +502 -0
rem/workers/engram_processor.py +312 -0
rem/workers/sqs_file_processor.py +193 -0
remdb-0.2.6.dist-info/METADATA +1191 -0
remdb-0.2.6.dist-info/RECORD +187 -0
remdb-0.2.6.dist-info/WHEEL +4 -0
remdb-0.2.6.dist-info/entry_points.txt +2 -0

rem/models/core/inline_edge.py ADDED Viewed

@@ -0,0 +1,132 @@
+"""
+InlineEdge - Knowledge graph edge representation.
+REM uses human-readable entity labels instead of UUIDs for graph edges,
+enabling natural language queries without schema knowledge.
+Key Design Decision:
+- dst field contains LABELS (e.g., "sarah-chen", "tidb-migration-spec")
+- NOT UUIDs (e.g., "550e8400-e29b-41d4-a716-446655440000")
+- This enables LOOKUP operations on labels directly
+- LLMs can query "LOOKUP sarah-chen" without knowing internal IDs
+Edge Weight Guidelines:
+- 1.0: Primary/strong relationships (authored_by, owns, part_of)
+- 0.8-0.9: Important relationships (depends_on, reviewed_by, implements)
+- 0.5-0.7: Secondary relationships (references, related_to, inspired_by)
+- 0.3-0.4: Weak relationships (mentions, cites)
+Destination Entity Type Convention (CRITICAL - properties.dst_entity_type):
+Format: <table_schema>:<category>/<key>
+Where:
+- table_schema: Database table (resources, moments, users, etc.)
+- category: Optional entity category within that table
+- key: The actual entity key (must match dst field)
+Examples:
+- "resources:managers/bob" → Look up bob in resources table with category="managers"
+- "users:engineers/sarah-chen" → Look up sarah-chen in users table with category="engineers"
+- "moments:meetings/standup-2024-01" → Look up in moments table with category="meetings"
+- "resources/api-design-v2" → Look up api-design-v2 in resources table (no category)
+- "bob" → Defaults to resources table, no category (use sparingly)
+IMPORTANT - Upsert Rules:
+1. When upserting referenced entities, parse dst_entity_type to determine:
+   - table_schema → which table to upsert into
+   - category → set the 'category' field in that table
+   - key → match against entity_key_field (usually 'name' or 'id')
+2. If dst_entity_type is missing or just a type like "managers":
+   - Default table_schema to "resources"
+   - Set category to the type (e.g., "managers")
+   - Use dst as the key
+3. Agents should NEVER guess entity types
+   - If type is unknown, omit dst_entity_type or set to null
+   - Better to have no category than wrong category
+   - System will handle entities without categories
+4. Category is optional and can be null - this is perfectly fine
+   - Categories enable filtering but are not required for graph traversal
+   - Use categories when they add semantic value (roles, types, domains)
+Edge Type Format Guidelines (rel_type):
+- Use snake_case: "authored_by", "depends_on", "references"
+- Be specific but consistent: "reviewed_by" not "reviewed"
+- Use passive voice for bidirectional clarity: "authored_by" (reverse: "authors")
+"""
+from datetime import datetime, timezone
+from typing import Optional
+from pydantic import BaseModel, ConfigDict, Field
+class InlineEdge(BaseModel):
+    """
+    Knowledge graph edge with human-readable destination labels.
+    Stores relationships between entities using natural language labels
+    instead of UUIDs, enabling conversational queries.
+    """
+    dst: str = Field(
+        ...,
+        description="Human-readable destination key matching the entity's name/id field (e.g., 'tidb-migration-spec', 'sarah-chen', 'bob')",
+    )
+    rel_type: str = Field(
+        ...,
+        description="Relationship type in snake_case (e.g., 'authored_by', 'depends_on', 'references')",
+    )
+    weight: float = Field(
+        default=0.5,
+        ge=0.0,
+        le=1.0,
+        description="Relationship strength: 1.0=primary, 0.8-0.9=important, 0.5-0.7=secondary, 0.3-0.4=weak",
+    )
+    properties: dict = Field(
+        default_factory=dict,
+        description=(
+            "Rich metadata. CRITICAL field: dst_entity_type with format 'table_schema:category/key' "
+            "(e.g., 'resources:managers/bob', 'users:engineers/sarah-chen'). "
+            "Used to determine upsert target table and category. Can be null/omitted if unknown."
+        ),
+    )
+    created_at: datetime = Field(
+        default_factory=lambda: datetime.now(timezone.utc).replace(tzinfo=None), description="Edge creation timestamp"
+    )
+class InlineEdges(BaseModel):
+    """
+    Collection of InlineEdge objects.
+    Used for structured edge operations and batch processing.
+    """
+    edges: list[InlineEdge] = Field(
+        default_factory=list, description="List of graph edges"
+    )
+    def add_edge(
+        self,
+        dst: str,
+        rel_type: str,
+        weight: float = 0.5,
+        properties: Optional[dict] = None,
+    ) -> None:
+        """Add a new edge to the collection."""
+        edge = InlineEdge(
+            dst=dst, rel_type=rel_type, weight=weight, properties=properties or {}
+        )
+        self.edges.append(edge)
+    def filter_by_rel_type(self, rel_types: list[str]) -> list[InlineEdge]:
+        """Filter edges by relationship types."""
+        return [edge for edge in self.edges if edge.rel_type in rel_types]
+    def filter_by_weight(self, min_weight: float = 0.0) -> list[InlineEdge]:
+        """Filter edges by minimum weight threshold."""
+        return [edge for edge in self.edges if edge.weight >= min_weight]

rem/models/core/rem_query.py ADDED Viewed

@@ -0,0 +1,243 @@
+"""
+REM Query Models
+REM provides schema-agnostic query operations optimized for LLM-augmented
+iterated retrieval. Unlike traditional SQL, REM queries work with natural
+language labels instead of UUIDs and support multi-turn exploration.
+Query Types (Performance Contract):
+- LOOKUP: O(1) schema-agnostic entity resolution
+- FUZZY: Indexed fuzzy text matching across all entities
+- SEARCH: Indexed semantic vector search
+- SQL: Direct table queries (provider dialect)
+- TRAVERSE: Iterative O(1) lookups on graph edges
+Key Design Principles:
+1. Natural language surface area (labels, not UUIDs)
+2. Schema-agnostic operations (no table name required for LOOKUP/FUZZY/TRAVERSE)
+3. Multi-turn iteration with stage tracking and memos
+4. O(1) performance guarantees for entity resolution
+Iterated Retrieval Pattern:
+- Stage 1: Find entry point (LOOKUP/SEARCH)
+- Stage 2: Analyze neighborhood (TRAVERSE DEPTH 0 = PLAN mode)
+- Stage 3: Selective traversal (TRAVERSE with edge filters)
+- Stage 4: Refinement based on results
+Example Multi-Turn Query:
+```python
+# Turn 1: PLAN mode to analyze edges
+TRAVERSE WITH LOOKUP "sarah chen" DEPTH 0
+# Turn 2: Follow specific edge types
+TRAVERSE manages,mentors WITH LOOKUP "sarah chen" DEPTH 2
+# Turn 3: Refine based on results
+TRAVERSE authored_by WITH LOOKUP "api-design-v2" DEPTH 1
+```
+REM Query Contract (MANDATORY for all providers):
+| Query Type | Performance | Schema | Multi-Match | Required |
+|------------|-------------|--------|-------------|----------|
+| LOOKUP | O(1) | Agnostic | Yes | ✅ |
+| FUZZY | Indexed | Agnostic | Yes | ✅ |
+| SEARCH | Indexed | Specific | Yes | ✅ |
+| SQL | O(n) | Specific | No | ✅ |
+| TRAVERSE | O(k) | Agnostic | Yes | ✅ |
+"""
+from enum import Enum
+from typing import Any, Optional, Union
+from pydantic import BaseModel, Field
+class QueryType(str, Enum):
+    """
+    REM query types.
+    Each type has specific performance and schema requirements
+    defined in the REM contract.
+    """
+    LOOKUP = "LOOKUP"
+    FUZZY = "FUZZY"
+    SEARCH = "SEARCH"
+    SQL = "SQL"
+    TRAVERSE = "TRAVERSE"
+class LookupParameters(BaseModel):
+    """
+    LOOKUP query parameters.
+    Performance: O(1) per key
+    Schema: Agnostic - No table name required
+    Multi-match: Returns entities from ALL tables with matching keys
+    """
+    key: Union[str, list[str]] = Field(
+        ..., description="Entity identifier(s) - single key or list of keys (natural language labels)"
+    )
+    user_id: Optional[str] = Field(
+        default=None, description="Optional user ID filter for multi-user tenants"
+    )
+class FuzzyParameters(BaseModel):
+    """
+    FUZZY query parameters.
+    Performance: Indexed - FTS or trigram index required
+    Schema: Agnostic - Searches across all entity names
+    Multi-match: Returns entities from ALL tables matching fuzzy pattern
+    """
+    query_text: str = Field(..., description="Fuzzy search text")
+    threshold: float = Field(
+        default=0.5, ge=0.0, le=1.0, description="Similarity threshold"
+    )
+    limit: int = Field(default=5, gt=0, description="Maximum results")
+class SearchParameters(BaseModel):
+    """
+    SEARCH query parameters.
+    Performance: Indexed - Vector index required (IVF, HNSW)
+    Schema: Table-specific - Requires table name
+    """
+    query_text: str = Field(..., description="Semantic search query")
+    table_name: str = Field(..., description="Table to search (resources, moments, etc.)")
+    limit: int = Field(default=10, gt=0, description="Maximum results")
+    min_similarity: float = Field(
+        default=0.7, ge=0.0, le=1.0, description="Minimum similarity score"
+    )
+class SQLParameters(BaseModel):
+    """
+    SQL query parameters.
+    Performance: O(n) - Table scan with optional indexes
+    Schema: Table-specific - Requires table name and column knowledge
+    Provider-specific: Uses native SQL dialect
+    Supports two modes:
+    1. Structured: table_name + where_clause + order_by + limit
+    2. Raw: raw_query (full SQL statement like SELECT...)
+    """
+    raw_query: Optional[str] = Field(
+        default=None, description="Raw SQL query (e.g., SELECT * FROM resources WHERE...)"
+    )
+    table_name: Optional[str] = Field(default=None, description="Table to query (structured mode)")
+    where_clause: Optional[str] = Field(
+        default=None, description="SQL WHERE clause (structured mode)"
+    )
+    order_by: Optional[str] = Field(default=None, description="SQL ORDER BY clause (structured mode)")
+    limit: Optional[int] = Field(default=None, description="SQL LIMIT (structured mode)")
+class TraverseParameters(BaseModel):
+    """
+    TRAVERSE query parameters.
+    Performance: O(k) where k = number of keys traversed
+    Schema: Agnostic - Follows graph edges across tables
+    Implementation: Iterative LOOKUP calls on edge destinations
+    Syntax: TRAVERSE {edge_filter} WITH [REM_QUERY] DEPTH [0-N]
+    Depth Modes:
+    - 0: PLAN mode (analyze edges without traversal)
+    - 1: Single-hop traversal (default)
+    - N: Multi-hop traversal (N hops from source)
+    Plan Memo:
+    Agent-maintained scratchpad for tracking multi-turn progress.
+    Kept terse for fast token generation.
+    Example: "Goal: org chart. Step 1: find CEO"
+    """
+    initial_query: str = Field(
+        ..., description="Initial query to find entry nodes (LOOKUP key, SEARCH text, etc.)"
+    )
+    edge_types: list[str] = Field(
+        default_factory=lambda: ["*"],
+        description="Edge types to follow (e.g., ['manages', 'reports-to']). Default: ['*'] (all)",
+    )
+    max_depth: int = Field(
+        default=1, ge=0, description="Maximum traversal depth. 0 = PLAN mode (no traversal)"
+    )
+    order_by: str = Field(
+        default="edge.created_at DESC",
+        description="Result ordering (edge.created_at, node.name, edge.weight)",
+    )
+    limit: int = Field(default=9, gt=0, description="Maximum nodes to return")
+    plan_memo: Optional[str] = Field(
+        default=None,
+        description="Agent's terse scratchpad for tracking multi-turn progress",
+    )
+class RemQuery(BaseModel):
+    """
+    REM query plan.
+    Combines query type with type-specific parameters.
+    Used by both direct REM queries and ask_rem() natural language interface.
+    """
+    query_type: QueryType = Field(..., description="REM query type")
+    parameters: (
+        LookupParameters
+        | FuzzyParameters
+        | SearchParameters
+        | SQLParameters
+        | TraverseParameters
+    ) = Field(..., description="Query parameters")
+    user_id: str = Field(..., description="User identifier for isolation")
+class TraverseStage(BaseModel):
+    """
+    TRAVERSE execution stage information.
+    Captures query execution details for LLM interaction and multi-turn planning.
+    """
+    depth: int = Field(..., description="Traversal depth for this stage")
+    executed: str = Field(..., description="Query executed at this stage")
+    found: dict[str, int] = Field(
+        ..., description="Discovery stats (nodes, edges counts)"
+    )
+    plan_memo: Optional[str] = Field(
+        default=None, description="Agent's memo echoed from request"
+    )
+class TraverseResponse(BaseModel):
+    """
+    TRAVERSE query response.
+    Returns nodes, execution stages, and metadata for LLM-driven iteration.
+    """
+    nodes: list[dict[str, Any]] = Field(
+        default_factory=list, description="Discovered nodes"
+    )
+    stages: list[TraverseStage] = Field(
+        default_factory=list, description="Execution stage information"
+    )
+    source_nodes: list[str] = Field(
+        default_factory=list, description="Initial entry node labels"
+    )
+    edge_summary: list[tuple[str, str, str]] = Field(
+        default_factory=list,
+        description="Edge shorthand tuples (src, rel_type, dst) for analysis",
+    )
+    metadata: dict[str, Any] = Field(
+        default_factory=dict, description="Query metadata (total_nodes, max_depth_reached, etc.)"
+    )

rem/models/entities/__init__.py ADDED Viewed

@@ -0,0 +1,43 @@
+"""
+REM Entity Models
+Core entity types for the REM system:
+- Resources: Base content units (documents, conversations, artifacts)
+- ImageResources: Image-specific resources with CLIP embeddings
+- Messages: Communication content
+- Users: User entities
+- Files: File metadata and tracking
+- Moments: Temporal narratives (meetings, coding sessions, conversations)
+- Schemas: Agent schema definitions (JsonSchema specifications for Pydantic AI)
+- Ontologies: Domain-specific extracted knowledge from files
+- OntologyConfigs: User-defined rules for automatic ontology extraction
+All entities inherit from CoreModel and support:
+- Graph connectivity via InlineEdge
+- Temporal tracking
+- Flexible metadata
+- Natural language labels for conversational queries
+"""
+from .file import File
+from .image_resource import ImageResource
+from .message import Message
+from .moment import Moment
+from .ontology import Ontology
+from .ontology_config import OntologyConfig
+from .resource import Resource
+from .schema import Schema
+from .user import User, UserTier
+__all__ = [
+    "Resource",
+    "ImageResource",
+    "Message",
+    "User",
+    "UserTier",
+    "File",
+    "Moment",
+    "Schema",
+    "Ontology",
+    "OntologyConfig",
+]

rem/models/entities/file.py ADDED Viewed

@@ -0,0 +1,57 @@
+"""
+File - File metadata and tracking in REM.
+Files represent uploaded or referenced files (PDFs, images, audio, etc.)
+that are parsed into Resources or used as input to dreaming workflows.
+File entities track:
+- File metadata (name, size, mime type)
+- Storage location (URI)
+- Processing status
+- Relationships to derived Resources
+"""
+from typing import Optional
+from pydantic import Field
+from ..core import CoreModel
+class File(CoreModel):
+    """
+    File metadata and tracking.
+    Represents files uploaded to or referenced by the REM system,
+    tracking their metadata and processing status. Tenant isolation
+    is provided via CoreModel.tenant_id field.
+    """
+    name: str = Field(
+        ...,
+        description="File name",
+    )
+    uri: str = Field(
+        ...,
+        description="File storage URI (S3, local path, etc.)",
+    )
+    content: Optional[str] = Field(
+        default=None,
+        description="Extracted text content (if applicable)",
+    )
+    timestamp: Optional[str] = Field(
+        default=None,
+        description="File creation/modification timestamp",
+    )
+    size_bytes: Optional[int] = Field(
+        default=None,
+        description="File size in bytes",
+    )
+    mime_type: Optional[str] = Field(
+        default=None,
+        description="File MIME type",
+    )
+    processing_status: Optional[str] = Field(
+        default="pending",
+        description="File processing status (pending, processing, completed, failed)",
+    )

rem/models/entities/image_resource.py ADDED Viewed

@@ -0,0 +1,88 @@
+"""
+ImageResource - Image-specific resource with CLIP embeddings.
+ImageResources are a specialized subclass of Resource for images,
+with support for CLIP embeddings and vision LLM descriptions.
+Key differences from base Resource:
+- **Separate table**: Stored in `image_resources` table, not `resources`
+- **Different embeddings**: Uses CLIP embeddings (multimodal) instead of text embeddings
+- **Embedding provider override**: Must use CLIP-compatible provider (Jina AI, self-hosted)
+- **Vision descriptions**: Optional vision LLM descriptions (tier/sampling gated)
+- **Image metadata**: Dimensions, format, and other image-specific fields
+Why separate table?
+1. Different embedding dimensionality (512/768 vs 1536)
+2. Different embedding model (CLIP vs text-embedding-3-small)
+3. Multimodal search capabilities (text-to-image, image-to-image)
+4. Image-specific indexes and queries
+5. Cost tracking (CLIP tokens vs text tokens)
+Usage:
+- ImageProvider saves to ImageResource table with CLIP embeddings
+- Regular text Resources use standard text embeddings
+- Cross-modal search: text queries can search ImageResources via CLIP
+"""
+from typing import Optional
+from pydantic import Field
+from .resource import Resource
+class ImageResource(Resource):
+    """
+    Image-specific resource with CLIP embeddings.
+    Stored in separate `image_resources` table with CLIP embeddings
+    instead of text embeddings. This enables:
+    - Multimodal search (text-to-image, image-to-image)
+    - Proper dimensionality (512/768 for CLIP vs 1536 for text)
+    - Cost tracking (CLIP tokens separate from text tokens)
+    Embedding Strategy:
+    - Default (when JINA_API_KEY set): Jina CLIP API (jina-clip-v2)
+    - Future: Self-hosted OpenCLIP models via KEDA-scaled pods
+    - Fallback: No embeddings (images searchable by metadata only)
+    Vision LLM Strategy (tier/sampling gated):
+    - Gold tier: Always get vision descriptions
+    - Silver/Free: Probabilistic sampling (IMAGE_VLLM_SAMPLE_RATE)
+    - Fallback: Basic metadata only
+    Tenant isolation provided via CoreModel.tenant_id field.
+    """
+    image_width: Optional[int] = Field(
+        default=None,
+        description="Image width in pixels",
+    )
+    image_height: Optional[int] = Field(
+        default=None,
+        description="Image height in pixels",
+    )
+    image_format: Optional[str] = Field(
+        default=None,
+        description="Image format (PNG, JPEG, GIF, WebP)",
+    )
+    vision_description: Optional[str] = Field(
+        default=None,
+        description="Vision LLM generated description (markdown, only for gold tier or sampled images)",
+    )
+    vision_provider: Optional[str] = Field(
+        default=None,
+        description="Vision provider used (anthropic, gemini, openai)",
+    )
+    vision_model: Optional[str] = Field(
+        default=None,
+        description="Vision model used for description",
+    )
+    clip_embedding: Optional[list[float]] = Field(
+        default=None,
+        description="CLIP embedding vector (512 or 768 dimensions, from Jina AI or self-hosted)",
+    )
+    clip_dimensions: Optional[int] = Field(
+        default=None,
+        description="CLIP embedding dimensionality (512 for jina-clip-v2, 768 for jina-clip-v1)",
+    )

rem/models/entities/message.py ADDED Viewed

@@ -0,0 +1,35 @@
+"""
+Message - Communication content in REM.
+Messages represent individual communication units (chat messages, emails, etc.)
+that can be grouped into conversations or moments.
+Messages are simpler than Resources but share the same graph connectivity
+through CoreModel inheritance.
+"""
+from pydantic import Field
+from ..core import CoreModel
+class Message(CoreModel):
+    """
+    Communication content unit.
+    Represents individual messages in conversations, chats, or other
+    communication contexts. Tenant isolation is provided via CoreModel.tenant_id field.
+    """
+    content: str = Field(
+        ...,
+        description="Message content text",
+    )
+    message_type: str | None = Field(
+        default=None,
+        description="Message type e.g role",
+    )
+    session_id: str | None = Field(
+        default=None,
+        description="Session identifier for tracking message context",
+    )