PyPI - dao-ai - Versions diffs - 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl - Mend

dao-ai 0.1.17py3-none-any.whl → 0.1.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

dao_ai/cli.py +8 -3
dao_ai/config.py +513 -32
dao_ai/evaluation.py +543 -0
dao_ai/genie/cache/__init__.py +2 -0
dao_ai/genie/cache/core.py +1 -1
dao_ai/genie/cache/in_memory_semantic.py +871 -0
dao_ai/genie/cache/lru.py +15 -11
dao_ai/genie/cache/semantic.py +52 -18
dao_ai/memory/postgres.py +146 -35
dao_ai/orchestration/core.py +33 -9
dao_ai/orchestration/supervisor.py +23 -8
dao_ai/{prompts.py → prompts/__init__.py} +10 -1
dao_ai/prompts/instructed_retriever_decomposition.yaml +58 -0
dao_ai/prompts/instruction_reranker.yaml +14 -0
dao_ai/prompts/router.yaml +37 -0
dao_ai/prompts/verifier.yaml +46 -0
dao_ai/providers/databricks.py +33 -12
dao_ai/tools/genie.py +28 -3
dao_ai/tools/instructed_retriever.py +366 -0
dao_ai/tools/instruction_reranker.py +202 -0
dao_ai/tools/router.py +89 -0
dao_ai/tools/vector_search.py +441 -134
dao_ai/tools/verifier.py +159 -0
dao_ai/utils.py +182 -2
dao_ai/vector_search.py +9 -1
{dao_ai-0.1.17.dist-info → dao_ai-0.1.19.dist-info}/METADATA +4 -3
{dao_ai-0.1.17.dist-info → dao_ai-0.1.19.dist-info}/RECORD +30 -20
{dao_ai-0.1.17.dist-info → dao_ai-0.1.19.dist-info}/WHEEL +0 -0
{dao_ai-0.1.17.dist-info → dao_ai-0.1.19.dist-info}/entry_points.txt +0 -0
{dao_ai-0.1.17.dist-info → dao_ai-0.1.19.dist-info}/licenses/LICENSE +0 -0

dao_ai/cli.py CHANGED Viewed

@@ -521,14 +521,19 @@ def handle_chat_command(options: Namespace) -> None:
                             )
                             continue
+                # Normalize user_id for memory namespace compatibility (replace . with _)
+                # This matches the normalization in models.py _convert_to_context
+                if configurable.get("user_id"):
+                    configurable["user_id"] = configurable["user_id"].replace(".", "_")
                 # Create Context object from configurable dict
                 from dao_ai.state import Context
                 context = Context(**configurable)
-                # Prepare config with thread_id for checkpointer
-                # Note: thread_id is needed in config for checkpointer/memory
-                config = {"configurable": {"thread_id": options.thread_id}}
+                # Prepare config with all context fields for checkpointer/memory
+                # Note: langmem tools require user_id in config.configurable for namespace resolution
+                config = {"configurable": context.model_dump()}
                 # Invoke the graph and handle interrupts (HITL)
                 # Wrap in async function to maintain connection pool throughout

dao_ai/config.py CHANGED Viewed

@@ -1402,13 +1402,20 @@ class DatabaseModel(IsDatabricksResource):
     - Databricks Lakebase: Provide `instance_name` (authentication optional, supports ambient auth)
     - Standard PostgreSQL: Provide `host` (authentication required via user/password)
-    Note: `instance_name` and `host` are mutually exclusive. Provide one or the other.
+    Note: For Lakebase connections, `name` is optional and defaults to `instance_name`.
+    For PostgreSQL connections, `name` is required.
+    Example Databricks Lakebase (minimal):
+    ```yaml
+    databases:
+      my_lakebase:
+        instance_name: my-lakebase-instance  # name defaults to instance_name
+    ```
     Example Databricks Lakebase with Service Principal:
     ```yaml
     databases:
       my_lakebase:
-        name: my-database
         instance_name: my-lakebase-instance
         service_principal:
           client_id:
@@ -1424,7 +1431,6 @@ class DatabaseModel(IsDatabricksResource):
     ```yaml
     databases:
       my_lakebase:
-        name: my-database
         instance_name: my-lakebase-instance
         on_behalf_of_user: true
     ```
@@ -1444,7 +1450,7 @@ class DatabaseModel(IsDatabricksResource):
     """
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
-    name: str
+    name: Optional[str] = None
     instance_name: Optional[str] = None
     description: Optional[str] = None
     host: Optional[AnyVariable] = None
@@ -1493,6 +1499,17 @@ class DatabaseModel(IsDatabricksResource):
             )
         return self
+    @model_validator(mode="after")
+    def populate_name_from_instance_name(self) -> Self:
+        """Populate name from instance_name if not provided for Lakebase connections."""
+        if self.name is None and self.instance_name:
+            self.name = self.instance_name
+        elif self.name is None:
+            raise ValueError(
+                "Either 'name' or 'instance_name' must be provided for DatabaseModel."
+            )
+        return self
     @model_validator(mode="after")
     def update_user(self) -> Self:
         # Skip if using OBO (passive auth), explicit credentials, or explicit user
@@ -1590,10 +1607,10 @@ class DatabaseModel(IsDatabricksResource):
         username: str | None = None
         password_value: str | None = None
-        # Resolve host - may need to fetch at runtime for OBO mode
+        # Resolve host - fetch from API at runtime for Lakebase if not provided
         host_value: Any = self.host
-        if host_value is None and self.is_lakebase and self.on_behalf_of_user:
-            # Fetch host at runtime for OBO mode
+        if host_value is None and self.is_lakebase:
+            # Fetch host from Lakebase instance API
             existing_instance: DatabaseInstance = (
                 self.workspace_client.database.get_database_instance(
                     name=self.instance_name
@@ -1756,6 +1773,105 @@ class GenieSemanticCacheParametersModel(BaseModel):
         return self
+# Memory estimation for capacity planning:
+# - Each entry: ~20KB (8KB question embedding + 8KB context embedding + 4KB strings/overhead)
+# - 1,000 entries: ~20MB (0.4% of 8GB)
+# - 5,000 entries: ~100MB (2% of 8GB)
+# - 10,000 entries: ~200MB (4-5% of 8GB) - default for ~30 users
+# - 20,000 entries: ~400MB (8-10% of 8GB)
+# Default 10,000 entries provides ~330 queries per user for 30 users.
+class GenieInMemorySemanticCacheParametersModel(BaseModel):
+    """
+    Configuration for in-memory semantic cache (no database required).
+    This cache stores embeddings and cache entries entirely in memory, providing
+    semantic similarity matching without requiring external database dependencies
+    like PostgreSQL or Databricks Lakebase.
+    Default settings are tuned for ~30 users on an 8GB machine:
+    - Capacity: 10,000 entries (~200MB memory, ~330 queries per user)
+    - Eviction: LRU (Least Recently Used) - keeps frequently accessed queries
+    - TTL: 1 week (accommodates weekly work patterns and batch jobs)
+    - Memory overhead: ~4-5% of 8GB system
+    The LRU eviction strategy ensures hot queries stay cached while cold queries
+    are evicted, providing better hit rates than FIFO eviction.
+    For larger deployments or memory-constrained environments, adjust capacity and TTL accordingly.
+    Use this when:
+    - No external database access is available
+    - Single-instance deployments (cache not shared across instances)
+    - Cache persistence across restarts is not required
+    - Cache sizes are moderate (hundreds to low thousands of entries)
+    For multi-instance deployments or large cache sizes, use GenieSemanticCacheParametersModel
+    with PostgreSQL backend instead.
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    time_to_live_seconds: int | None = (
+        60 * 60 * 24 * 7
+    )  # 1 week default (604800 seconds), None or negative = never expires
+    similarity_threshold: float = 0.85  # Minimum similarity for question matching (L2 distance converted to 0-1 scale)
+    context_similarity_threshold: float = 0.80  # Minimum similarity for context matching (L2 distance converted to 0-1 scale)
+    question_weight: Optional[float] = (
+        0.6  # Weight for question similarity in combined score (0-1). If not provided, computed as 1 - context_weight
+    )
+    context_weight: Optional[float] = (
+        None  # Weight for context similarity in combined score (0-1). If not provided, computed as 1 - question_weight
+    )
+    embedding_model: str | LLMModel = "databricks-gte-large-en"
+    embedding_dims: int | None = None  # Auto-detected if None
+    warehouse: WarehouseModel
+    capacity: int | None = (
+        10000  # Maximum cache entries. ~200MB for 10000 entries (1024-dim embeddings). LRU eviction when full. None = unlimited (not recommended for production).
+    )
+    context_window_size: int = 3  # Number of previous turns to include for context
+    max_context_tokens: int = (
+        2000  # Maximum context length to prevent extremely long embeddings
+    )
+    @model_validator(mode="after")
+    def compute_and_validate_weights(self) -> Self:
+        """
+        Compute missing weight and validate that question_weight + context_weight = 1.0.
+        Either question_weight or context_weight (or both) can be provided.
+        The missing one will be computed as 1.0 - provided_weight.
+        If both are provided, they must sum to 1.0.
+        """
+        if self.question_weight is None and self.context_weight is None:
+            # Both missing - use defaults
+            self.question_weight = 0.6
+            self.context_weight = 0.4
+        elif self.question_weight is None:
+            # Compute question_weight from context_weight
+            if not (0.0 <= self.context_weight <= 1.0):
+                raise ValueError(
+                    f"context_weight must be between 0.0 and 1.0, got {self.context_weight}"
+                )
+            self.question_weight = 1.0 - self.context_weight
+        elif self.context_weight is None:
+            # Compute context_weight from question_weight
+            if not (0.0 <= self.question_weight <= 1.0):
+                raise ValueError(
+                    f"question_weight must be between 0.0 and 1.0, got {self.question_weight}"
+                )
+            self.context_weight = 1.0 - self.question_weight
+        else:
+            # Both provided - validate they sum to 1.0
+            total_weight = self.question_weight + self.context_weight
+            if not abs(total_weight - 1.0) < 0.0001:  # Allow small floating point error
+                raise ValueError(
+                    f"question_weight ({self.question_weight}) + context_weight ({self.context_weight}) "
+                    f"must equal 1.0 (got {total_weight}). These weights determine the relative importance "
+                    f"of question vs context similarity in the combined score."
+                )
+        return self
 class SearchParametersModel(BaseModel):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
     num_results: Optional[int] = 10
@@ -1763,43 +1879,83 @@ class SearchParametersModel(BaseModel):
     query_type: Optional[str] = "ANN"
+class InstructionAwareRerankModel(BaseModel):
+    """
+    LLM-based reranking considering user instructions and constraints.
+    Use fast models (GPT-3.5, Haiku, Llama 3 8B) to minimize latency (~100ms).
+    Runs AFTER FlashRank as an additional constraint-aware reranking stage.
+    Skipped for 'standard' mode when auto_bypass=true in router config.
+    Example:
+        ```yaml
+        rerank:
+          model: ms-marco-MiniLM-L-12-v2
+          top_n: 20
+          instruction_aware:
+            model: *fast_llm
+            instructions: |
+              Prioritize results matching price and brand constraints.
+            top_n: 10
+        ```
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    model: Optional["LLMModel"] = Field(
+        default=None,
+        description="LLM for instruction reranking (fast model recommended)",
+    )
+    instructions: Optional[str] = Field(
+        default=None,
+        description="Custom reranking instructions for constraint prioritization",
+    )
+    top_n: Optional[int] = Field(
+        default=None,
+        description="Number of documents to return after instruction reranking",
+    )
 class RerankParametersModel(BaseModel):
     """
-    Configuration for reranking retrieved documents using FlashRank.
+    Configuration for reranking retrieved documents.
-    FlashRank provides fast, local reranking without API calls using lightweight
-    cross-encoder models. Reranking improves retrieval quality by reordering results
-    based on semantic relevance to the query.
+    Supports three reranking options that can be combined:
+    1. FlashRank (local cross-encoder) - set `model`
+    2. Databricks server-side reranking - set `columns`
+    3. LLM instruction-aware reranking - set `instruction_aware`
-    Typical workflow:
-    1. Retrieve more documents than needed (e.g., 50 via num_results)
-    2. Rerank all retrieved documents
-    3. Return top_n best matches (e.g., 5)
+    Example with Databricks columns + instruction-aware (no FlashRank):
+        ```yaml
+        rerank:
+          columns:                    # Databricks server-side reranking
+            - product_name
+            - brand_name
+          instruction_aware:          # LLM-based constraint reranking
+            model: *fast_llm
+            instructions: "Prioritize by brand preferences"
+            top_n: 10
+        ```
-    Example:
+    Example with FlashRank:
         ```yaml
-        retriever:
-          search_parameters:
-            num_results: 50  # Retrieve more candidates
-          rerank:
-            model: ms-marco-MiniLM-L-12-v2
-            top_n: 5  # Return top 5 after reranking
+        rerank:
+          model: ms-marco-MiniLM-L-12-v2  # FlashRank model
+          top_n: 10
         ```
-    Available models (see https://github.com/PrithivirajDamodaran/FlashRank):
+    Available FlashRank models (see https://github.com/PrithivirajDamodaran/FlashRank):
     - "ms-marco-TinyBERT-L-2-v2" (~4MB, fastest)
-    - "ms-marco-MiniLM-L-12-v2" (~34MB, best cross-encoder, default)
+    - "ms-marco-MiniLM-L-12-v2" (~34MB, best cross-encoder)
     - "rank-T5-flan" (~110MB, best non cross-encoder)
     - "ms-marco-MultiBERT-L-12" (~150MB, multilingual 100+ languages)
-    - "ce-esci-MiniLM-L12-v2" (e-commerce optimized, Amazon ESCI)
-    - "miniReranker_arabic_v1" (Arabic language)
     """
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
-    model: str = Field(
-        default="ms-marco-MiniLM-L-12-v2",
-        description="FlashRank model name. Default provides good balance of speed and accuracy.",
+    model: Optional[str] = Field(
+        default=None,
+        description="FlashRank model name. If None, FlashRank is not used (use columns for Databricks reranking).",
     )
     top_n: Optional[int] = Field(
         default=None,
@@ -1812,6 +1968,289 @@ class RerankParametersModel(BaseModel):
     columns: Optional[list[str]] = Field(
         default_factory=list, description="Columns to rerank using DatabricksReranker"
     )
+    instruction_aware: Optional[InstructionAwareRerankModel] = Field(
+        default=None,
+        description="Optional LLM-based reranking stage after FlashRank",
+    )
+class FilterItem(BaseModel):
+    """A metadata filter for vector search.
+    Filters constrain search results by matching column values.
+    Use column names from the provided schema description.
+    """
+    model_config = ConfigDict(extra="forbid")
+    key: str = Field(
+        description=(
+            "Column name with optional operator suffix. "
+            "Operators: (none) for equality, NOT for exclusion, "
+            "< <= > >= for numeric comparison, "
+            "LIKE for token match, NOT LIKE to exclude tokens."
+        )
+    )
+    value: Union[str, int, float, bool, list[Union[str, int, float, bool]]] = Field(
+        description=(
+            "The filter value matching the column type. "
+            "Use an array for IN-style matching multiple values."
+        )
+    )
+class SearchQuery(BaseModel):
+    """A single search query with optional metadata filters.
+    Represents one focused search intent extracted from the user's request.
+    The text should be a natural language query optimized for semantic search.
+    Filters constrain results to match specific metadata values.
+    """
+    model_config = ConfigDict(extra="forbid")
+    text: str = Field(
+        description=(
+            "Natural language search query text optimized for semantic similarity. "
+            "Should be focused on a single search intent. "
+            "Do NOT include filter criteria in the text; use the filters field instead."
+        )
+    )
+    filters: Optional[list[FilterItem]] = Field(
+        default=None,
+        description=(
+            "Metadata filters to constrain search results. "
+            "Set to null if no filters apply. "
+            "Extract filter values from explicit constraints in the user query."
+        ),
+    )
+class DecomposedQueries(BaseModel):
+    """Decomposed search queries extracted from a user request.
+    Break down complex user queries into multiple focused search queries.
+    Each query targets a distinct search intent with appropriate filters.
+    Generate 1-3 queries depending on the complexity of the user request.
+    """
+    model_config = ConfigDict(extra="forbid")
+    queries: list[SearchQuery] = Field(
+        description=(
+            "List of search queries extracted from the user request. "
+            "Each query should target a distinct search intent. "
+            "Order queries by importance, with the most relevant first."
+        )
+    )
+class ColumnInfo(BaseModel):
+    """Column metadata for dynamic schema generation in structured output.
+    When provided, column information is embedded directly into the JSON schema
+    that with_structured_output sends to the LLM, improving filter accuracy.
+    """
+    model_config = ConfigDict(extra="forbid")
+    name: str = Field(description="Column name as it appears in the database")
+    type: Literal["string", "number", "boolean", "datetime"] = Field(
+        default="string",
+        description="Column data type for value validation",
+    )
+    operators: list[str] = Field(
+        default=["", "NOT", "<", "<=", ">", ">=", "LIKE", "NOT LIKE"],
+        description="Valid filter operators for this column",
+    )
+class InstructedRetrieverModel(BaseModel):
+    """
+    Configuration for instructed retrieval with query decomposition and RRF merging.
+    Instructed retrieval decomposes user queries into multiple subqueries with
+    metadata filters, executes them in parallel, and merges results using
+    Reciprocal Rank Fusion (RRF) before reranking.
+    Example:
+        ```yaml
+        retriever:
+          vector_store: *products_vector_store
+          instructed:
+            decomposition_model: *fast_llm
+            schema_description: |
+              Products table: product_id, brand_name, category, price, updated_at
+              Filter operators: {"col": val}, {"col >": val}, {"col NOT": val}
+            columns:
+              - name: brand_name
+                type: string
+              - name: price
+                type: number
+                operators: ["", "<", "<=", ">", ">="]
+            constraints:
+              - "Prefer recent products"
+            max_subqueries: 3
+            examples:
+              - query: "cheap drills"
+                filters: {"price <": 100}
+        ```
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    decomposition_model: Optional["LLMModel"] = Field(
+        default=None,
+        description="LLM for query decomposition (smaller/faster model recommended)",
+    )
+    schema_description: str = Field(
+        description="Column names, types, and valid filter syntax for the LLM"
+    )
+    columns: Optional[list[ColumnInfo]] = Field(
+        default=None,
+        description=(
+            "Structured column info for dynamic schema generation. "
+            "When provided, column names are embedded in the JSON schema for better LLM accuracy."
+        ),
+    )
+    constraints: Optional[list[str]] = Field(
+        default=None, description="Default constraints to always apply"
+    )
+    max_subqueries: int = Field(
+        default=3, description="Maximum number of parallel subqueries"
+    )
+    rrf_k: int = Field(
+        default=60,
+        description="RRF constant (lower values weight top ranks more heavily)",
+    )
+    examples: Optional[list[dict[str, Any]]] = Field(
+        default=None,
+        description="Few-shot examples for domain-specific filter translation",
+    )
+    normalize_filter_case: Optional[Literal["uppercase", "lowercase"]] = Field(
+        default=None,
+        description="Auto-normalize filter string values to uppercase or lowercase",
+    )
+class RouterModel(BaseModel):
+    """
+    Select internal execution mode based on query characteristics.
+    Use fast models (GPT-3.5, Haiku, Llama 3 8B) to minimize latency (~50-100ms).
+    Routes to internal modes within the same retriever, not external retrievers.
+    Cross-index routing belongs at the agent/tool-selection level.
+    Execution Modes:
+    - "standard": Single similarity_search() for simple keyword/product searches
+    - "instructed": Decompose -> Parallel Search -> RRF for constrained queries
+    Example:
+        ```yaml
+        retriever:
+          router:
+            model: *fast_llm
+            default_mode: standard
+            auto_bypass: true
+        ```
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    model: Optional["LLMModel"] = Field(
+        default=None,
+        description="LLM for routing decision (fast model recommended)",
+    )
+    default_mode: Literal["standard", "instructed"] = Field(
+        default="standard",
+        description="Fallback mode if routing fails",
+    )
+    auto_bypass: bool = Field(
+        default=True,
+        description="Skip Instruction Reranker and Verifier for standard mode",
+    )
+class VerificationResult(BaseModel):
+    """Verification of whether search results satisfy the user's constraints.
+    Analyze the retrieved results against the original query and any explicit
+    constraints to determine if a retry with modified filters is needed.
+    """
+    model_config = ConfigDict(extra="forbid")
+    passed: bool = Field(
+        description="True if results satisfy the user's query intent and constraints."
+    )
+    confidence: float = Field(
+        ge=0.0,
+        le=1.0,
+        description="Confidence in the verification decision, from 0.0 (uncertain) to 1.0 (certain).",
+    )
+    feedback: Optional[str] = Field(
+        default=None,
+        description="Explanation of why verification passed or failed. Include specific issues found.",
+    )
+    suggested_filter_relaxation: Optional[dict[str, Any]] = Field(
+        default=None,
+        description=(
+            "Suggested filter modifications for retry. "
+            "Keys are column names, values indicate changes (e.g., 'REMOVE', 'WIDEN', or new values)."
+        ),
+    )
+    unmet_constraints: Optional[list[str]] = Field(
+        default=None,
+        description="List of user constraints that the results failed to satisfy.",
+    )
+class VerifierModel(BaseModel):
+    """
+    Validate results against user constraints with structured feedback.
+    Use fast models (GPT-3.5, Haiku, Llama 3 8B) to minimize latency (~50-100ms).
+    Skipped for 'standard' mode when auto_bypass=true in router config.
+    Returns structured feedback for intelligent retry, not blind retry.
+    Example:
+        ```yaml
+        retriever:
+          verifier:
+            model: *fast_llm
+            on_failure: warn_and_retry
+            max_retries: 1
+        ```
+    """
+    model_config = ConfigDict(use_enum_values=True, extra="forbid")
+    model: Optional["LLMModel"] = Field(
+        default=None,
+        description="LLM for verification (fast model recommended)",
+    )
+    on_failure: Literal["warn", "retry", "warn_and_retry"] = Field(
+        default="warn",
+        description="Behavior when verification fails",
+    )
+    max_retries: int = Field(
+        default=1,
+        description="Maximum retry attempts before returning with warning",
+    )
+class RankedDocument(BaseModel):
+    """Single ranked document."""
+    index: int = Field(description="Document index from input list")
+    score: float = Field(description="0.0-1.0 relevance score")
+    reason: str = Field(default="", description="Why this score")
+class RankingResult(BaseModel):
+    """Reranking output."""
+    rankings: list[RankedDocument] = Field(
+        default_factory=list,
+        description="Ranked documents, highest score first",
+    )
 class RetrieverModel(BaseModel):
@@ -1821,10 +2260,22 @@ class RetrieverModel(BaseModel):
     search_parameters: SearchParametersModel = Field(
         default_factory=SearchParametersModel
     )
+    router: Optional[RouterModel] = Field(
+        default=None,
+        description="Optional query router for selecting execution mode (standard vs instructed).",
+    )
     rerank: Optional[RerankParametersModel | bool] = Field(
         default=None,
         description="Optional reranking configuration. Set to true for defaults, or provide ReRankParametersModel for custom settings.",
     )
+    instructed: Optional[InstructedRetrieverModel] = Field(
+        default=None,
+        description="Optional instructed retrieval with query decomposition and RRF merging.",
+    )
+    verifier: Optional[VerifierModel] = Field(
+        default=None,
+        description="Optional result verification with structured feedback for retry.",
+    )
     @model_validator(mode="after")
     def set_default_columns(self) -> Self:
@@ -1835,9 +2286,13 @@ class RetrieverModel(BaseModel):
     @model_validator(mode="after")
     def set_default_reranker(self) -> Self:
-        """Convert bool to ReRankParametersModel with defaults."""
+        """Convert bool to ReRankParametersModel with defaults.
+        When rerank: true is used, sets the default FlashRank model
+        (ms-marco-MiniLM-L-12-v2) to enable reranking.
+        """
         if isinstance(self.rerank, bool) and self.rerank:
-            self.rerank = RerankParametersModel()
+            self.rerank = RerankParametersModel(model="ms-marco-MiniLM-L-12-v2")
         return self
@@ -2985,8 +3440,24 @@ class GuidelineModel(BaseModel):
 class EvaluationModel(BaseModel):
+    """
+    Configuration for MLflow GenAI evaluation.
+    Attributes:
+        model: LLM model used as the judge for LLM-based scorers (e.g., Guidelines, Safety).
+               This model evaluates agent responses during evaluation.
+        table: Table to store evaluation results.
+        num_evals: Number of evaluation samples to generate.
+        agent_description: Description of the agent for evaluation data generation.
+        question_guidelines: Guidelines for generating evaluation questions.
+        custom_inputs: Custom inputs to pass to the agent during evaluation.
+        guidelines: List of guideline configurations for Guidelines scorers.
+    """
     model_config = ConfigDict(use_enum_values=True, extra="forbid")
-    model: LLMModel
+    model: LLMModel = Field(
+        ..., description="LLM model used as the judge for LLM-based evaluation scorers"
+    )
     table: TableModel
     num_evals: int
     agent_description: Optional[str] = None
@@ -2994,6 +3465,16 @@ class EvaluationModel(BaseModel):
     custom_inputs: dict[str, Any] = Field(default_factory=dict)
     guidelines: list[GuidelineModel] = Field(default_factory=list)
+    @property
+    def judge_model_endpoint(self) -> str:
+        """
+        Get the judge model endpoint string for MLflow scorers.
+        Returns:
+            Endpoint string in format 'databricks:/model-name'
+        """
+        return f"databricks:/{self.model.name}"
 class EvaluationDatasetExpectationsModel(BaseModel):
     model_config = ConfigDict(use_enum_values=True, extra="forbid")

dao-ai 0.1.17__py3-none-any.whl → 0.1.19__py3-none-any.whl

dao-ai 0.1.17py3-none-any.whl → 0.1.19py3-none-any.whl