PyPI - experiment-configuration-agent - Versions diffs - 0.1.6__py3-none-any.whl - Mend

experiment-configuration-agent 0.1.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

experiment_config_agent/__init__.py +0 -0
experiment_config_agent/agent.py +121 -0
experiment_config_agent/config.py +26 -0
experiment_config_agent/constants.py +210 -0
experiment_config_agent/models.py +61 -0
experiment_configuration_agent-0.1.6.dist-info/METADATA +120 -0
experiment_configuration_agent-0.1.6.dist-info/RECORD +9 -0
experiment_configuration_agent-0.1.6.dist-info/WHEEL +5 -0
experiment_configuration_agent-0.1.6.dist-info/top_level.txt +1 -0

experiment_config_agent/__init__.py ADDED Viewed

File without changes

experiment_config_agent/agent.py ADDED Viewed

@@ -0,0 +1,121 @@
+import logging
+from typing import Dict, Any, Optional, Tuple
+from sfn_blueprint.utils.llm_handler import SFNAIHandler
+from .config import GluonConfig
+from .constants import (
+    format_autogluon_config_prompt,
+    AUTOGLUON_CONFIG_SYSTEM_PROMPT,
+    AUTOGLUON_STRUCTURING_SYSTEM_PROMPT,
+)
+from .models import AutoGluonConfig
+class AutoGluonConfigAgent:
+    """
+    Two-stage AutoGluon configuration agent:
+    Agent 1 → Reasoning / recommendation (human-style)
+    Agent 2 → Structuring / strict JSON (Pydantic enforced)
+    """
+    def __init__(self, config: Optional[GluonConfig] = None):
+        self.config = config or GluonConfig()
+        self.logger = logging.getLogger(__name__)
+        self.ai_handler = SFNAIHandler(logger_name="AutoGluonConfigAgent")
+    def _run_reasoning_agent(
+        self,
+        domain: Dict[str, Any],
+        use_case: Dict[str, Any],
+        methodology: str,
+        dataset_insights: Dict[str, Any],
+    ) -> Tuple[str, Dict[str, Any]]:
+        system_prompt, user_prompt = format_autogluon_config_prompt(
+            domain=domain,
+            use_case=use_case,
+            methodology=methodology,
+            dataset_insights=dataset_insights,
+        )
+        response, cost = self.ai_handler.route_to(
+            llm_provider=self.config.provider,
+            model=self.config.model,
+            configuration={
+                "messages": [
+                    {"role": "system", "content": AUTOGLUON_CONFIG_SYSTEM_PROMPT},
+                    {"role": "user", "content": user_prompt},
+                ],
+                "temperature": self.config.temperature,
+                "max_tokens": self.config.max_tokens,
+            },
+        )
+        return response, cost
+    def _run_structuring_agent(
+        self,
+        reasoning_output: str,
+    ) -> Tuple[AutoGluonConfig, Dict[str, Any]]:
+        response, cost = self.ai_handler.route_to(
+            llm_provider=self.config.provider,
+            model=self.config.model,
+            configuration={
+                "messages": [
+                    {
+                        "role": "system",
+                        "content": AUTOGLUON_STRUCTURING_SYSTEM_PROMPT,
+                    },
+                    {
+                        "role": "user",
+                        "content": reasoning_output,
+                    },
+                ],
+                "temperature": self.config.temperature,
+                "max_tokens": self.config.max_tokens,
+            },
+        )
+        config = AutoGluonConfig.model_validate_json(response)
+        return config, cost
+    def configure_training(
+        self,
+        domain: Dict[str, Any],
+        use_case: Dict[str, Any],
+        methodology: str,
+        dataset_insights: Dict[str, Any],
+    ) -> Tuple[AutoGluonConfig, Dict[str, Any]]:
+        reasoning_output, cost_1 = self._run_reasoning_agent(
+            domain, use_case, methodology, dataset_insights
+        )
+        config, cost_2 = self._run_structuring_agent(reasoning_output)
+        return config, {
+            "reasoning_agent": cost_1,
+            "structuring_agent": cost_2,
+        }
+    def execute_task(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
+        config, cost = self.configure_training(
+            domain=task_data["domain"],
+            use_case=task_data["use_case"],
+            methodology=task_data["methodology"],
+            dataset_insights=task_data["dataset_insights"],
+        )
+        return {
+            "configuration": config.model_dump(),
+            "cost_summary": cost,
+        }
+    def __call__(self, task_data: Dict[str, Any]) -> Dict[str, Any]:
+        return self.execute_task(task_data)

experiment_config_agent/config.py ADDED Viewed

@@ -0,0 +1,26 @@
+from pydantic import Field
+from pydantic_settings import BaseSettings, SettingsConfigDict
+import os
+class GluonConfig(BaseSettings):
+    model_config = SettingsConfigDict(
+        env_file='.env',
+        env_file_encoding='utf-8',
+        case_sensitive=False,
+        extra='ignore'
+    )
+    # provider: str = Field(default="openai", description="AI provider to use")
+    # model: str = Field(default="gpt-5-mini", description="AI model to use")
+    # temperature: float = Field(default=0.3, ge=0.0, le=0.5, description="AI model temperature")
+    # max_tokens: int = Field(default=4000, ge=0, le=8000, description="Maximum tokens for AI response")
+    # This will now read from LM_PROVIDER environment variable
+    provider: str = Field(default=os.getenv('LM_SERVICE_PROVIDER', 'openai'), description="AI provider to use")
+    model: str = Field(default=os.getenv('LLM_MODEL_NAME', 'gpt-5-mini'), description="AI model to use")
+    temperature: float = Field(default=0.3, ge=0.0, le=0.5, description="AI model temperature")
+    max_tokens: int = Field(default=4000, ge=0, le=8000, description="Maximum tokens for AI response")

experiment_config_agent/constants.py ADDED Viewed

@@ -0,0 +1,210 @@
+AUTOGLUON_CONFIG_SYSTEM_PROMPT = """You are an expert AutoGluon configuration advisor specializing in TabularPredictor. Your goal is to provide production-ready configurations that optimize predictive performance while strictly adhering to the defined schema.
+Your role is to analyze the business domain and dataset characteristics to recommend optimal settings.
+CORE MODEL CONCEPTS:
+====================
+1. EVALUATION METRICS (Must be one of: 'accuracy', 'log_loss', 'f1', 'roc_auc', 'precision', 'recall'):
+- Use 'roc_auc' or 'f1' for imbalanced classification.
+- Use 'precision' or 'recall' when the cost of False Positives vs. False Negatives is asymmetric.
+- Use 'log_loss' for well-calibrated probability estimates.
+2. ENSEMBLE STRATEGY:
+- BAGGING (num_bag_folds): Essential for 'best_quality' or 'extreme_quality'. If > 0, k-fold cross-validation is used.
+- STACKING (num_stack_levels): Uses model predictions as features for higher layers. 1-2 levels recommended for 'extreme_quality'.
+- WEIGHTED ENSEMBLE: Always set fit_weighted_ensemble=True for maximum accuracy.
+3. ALLOWED MODELS (Only use these aliases):
+- 'GBM': LightGBM (Gradient Boosting Machine).
+- 'CAT': CatBoost (Excellent for categorical data).
+- 'XGB': XGBoost (High-performance gradient boosting).
+- 'RF': Random Forest (Robust and stable).
+- 'XT': Extremely Randomized Trees (Reduces variance).
+- 'KNN': K-Nearest Neighbors (Simple distance-based baseline).
+4. VALIDATION STRATEGY:
+- If bagging is enabled (num_bag_folds > 0), 'split_test_size' is ignored as CV is used.
+- If bagging is 0, 'split_test_size' (e.g., 0.1 to 0.2) is mandatory to monitor overfitting.
+PRESET SELECTION LOGIC (Ordered by Quality/Complexity):
+======================================================
+- "best_quality": High accuracy with bagging/stacking. Standard for competitions.
+- "high_quality": Balance of high accuracy and reasonable training time.
+- "good_quality": Recommended default for most production use cases.
+- "medium_quality": Fast prototyping and quick iterations.
+DOMAIN GUIDANCE:
+================
+- FRAUD/HEALTHCARE: High recall focus. Use 'f1' or 'roc_auc'. Enable bagging for stability.
+- AD TECH/CLICK-THROUGH: Use 'log_loss' to optimize probability calibration.
+- CUSTOMER CHURN: Focus on 'f1' to balance identifying leavers vs. misclassifying loyalists.
+CONSTRAINTS:
+- You MUST only use the 6 allowed models ('GBM', 'CAT', 'XGB', 'RF', 'XT', 'KNN').
+- You MUST only use the 5 allowed presets ('extreme_quality', 'best_quality', 'high_quality', 'good_quality', 'medium_quality').
+- You MUST only use the 6 allowed metrics for both eval_metric and additional_metrics.
+Provide three distinct scenarios: Max Accuracy (Heavy), Production-Ready (Balanced), and Fast-Track (Speed)."""
+AUTOGLUON_STRUCTURING_SYSTEM_PROMPT = """
+You are a STRICT JSON CONVERSION AGENT.
+You will receive a human-written AutoGluon recommendation that may contain:
+- multiple scenarios
+- explanations
+- markdown
+- YAML blocks
+YOUR TASK:
+1. Select ONLY the **Production-Ready / Balanced** scenario
+2. Convert it into VALID JSON
+3. The JSON MUST strictly match this schema:
+- The 'models' field MUST include at least one model from the allowed list: ['GBM', 'CAT', 'XGB', 'RF', 'XT']
+- If num_bag_folds > 0, set split_test_size to 0.0 (as bagging handles validation)
+- If num_bag_folds = 0, set split_test_size to a value between 0.1 and 0.5
+{
+  "eval_metric": string,
+  "preset": string,
+  "additional_metrics": list[string],
+  "time_limit": integer,
+  "num_bag_folds": integer,
+  "num_bag_sets": integer,
+  "num_stack_levels": integer,
+  "models": list[string],
+  "fit_weighted_ensemble": boolean,
+  "split_test_size": float  # Must be one of: 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, or 0.5
+}
+STRICT RULES:
+- OUTPUT JSON ONLY
+- NO markdown
+- NO YAML
+- NO explanations
+- NO comments
+- NO extra keys
+- Start with '{' and end with '}'
+"""
+def format_autogluon_config_prompt(
+    domain: dict,
+    use_case: str,
+    methodology: str,
+    dataset_insights: dict
+) -> tuple[str, str]:
+    """
+    Format the system and user prompts for AutoGluon configuration recommendation.
+    Args:
+        domain_name: Name of the business domain
+        domain_description: Detailed description of the domain context
+        use_case: Description of the specific use case and problem
+        methodology: Type of ML problem (binary_classification, multiclass_classification, regression)
+        dataset_insights: Dictionary containing feature and target information
+    Returns:
+        Tuple of (system_prompt, user_prompt)
+    """
+    # feature_columns = dataset_insights.get('feature_columns', {})
+    # target_info = dataset_insights.get('target', {})
+    # feature_summary = []
+    # for col_name, col_info in feature_columns.items():
+    #     feature_summary.append(
+    #         f"  - {col_name}: "
+    #         f"type={col_info.get('dtype', 'unknown')}, "
+    #         f"missing={col_info.get('missing_pct', 0):.1f}%, "
+    #         f"unique={col_info.get('unique_count', 'N/A')}"
+    #     )
+    #     if 'min' in col_info and 'max' in col_info:
+    #         feature_summary.append(f"    range=[{col_info['min']}, {col_info['max']}]")
+    # feature_text = "\n".join(feature_summary) if feature_summary else "No feature information provided"
+    # # Format target information
+    # target_text = []
+    # if target_info:
+    #     target_text.append(f"Target Column: {target_info.get('name', 'unknown')}")
+    #     target_text.append(f"  Type: {target_info.get('dtype', 'unknown')}")
+    #     if 'class_distribution' in target_info:
+    #         target_text.append("  Class Distribution:")
+    #         for cls, count in target_info['class_distribution'].items():
+    #             target_text.append(f"    - {cls}: {count}")
+    #     if 'min' in target_info and 'max' in target_info:
+    #         target_text.append(f"  Range: [{target_info['min']}, {target_info['max']}]")
+    #     if 'mean' in target_info:
+    #         target_text.append(f"  Mean: {target_info['mean']:.2f}")
+    # target_summary = "\n".join(target_text) if target_text else "No target information provided"
+    # # Get dataset size information
+    # num_samples = dataset_insights.get('num_samples', 'unknown')
+    # num_features = len(feature_columns) if feature_columns else 'unknown'
+    # DATASET INSIGHTS:
+    # ================
+    # Number of Samples: {num_samples}
+    # Number of Features: {num_features}
+    # Features:
+    # {feature_text}
+    # {target_summary}
+    # 8. hyperparameters: Hyperparameter preset ("default", "light", "very_light")
+    # 9. auto_stack: Whether to use automatic stacking (true/false)
+    # 10. infer_limit: Max inference time per row in seconds (or null)
+    # 11. infer_limit_batch_size: Batch size for inference speed (or null)
+    # 12. refit_full: Whether to retrain on full data (true/false)
+    # 13. calibrate_decision_threshold: Threshold calibration setting ("auto", true, false)
+    user_prompt = f"""Please recommend optimal AutoGluon TabularPredictor configuration for the following scenario:
+DOMAIN INFORMATION:
+==================
+Domain: {domain}
+USE CASE:
+=========
+{use_case}
+METHODOLOGY:
+===========
+Problem Type: {methodology}
+DATASET INSIGHTS:
+================
+{dataset_insights}
+TASK:
+=====
+Based on the above information, recommend an optimal AutoGluon configuration that includes:
+1. eval_metric: The primary metric to optimize
+2. preset: Quality/speed tradeoff preset
+3. additional_metrics: Other metrics to track (list)
+4. time_limit: Training time in seconds
+5. num_bag_folds: Number of k-fold bagging folds (0 for none, 5-10 for bagging)
+6. num_bag_sets: Number of bagging sets (1-3, only if bagging is used)
+7. num_stack_levels: Number of stacking levels
+Consider multiple scenarios:
+- Scenario A: Maximum accuracy (accepting longer training time)
+- Scenario B: Balanced accuracy and speed (production-ready)
+- Scenario C: Fast training and inference (prototyping/deployment constrained)
+"""
+    return AUTOGLUON_CONFIG_SYSTEM_PROMPT, user_prompt

experiment_config_agent/models.py ADDED Viewed

@@ -0,0 +1,61 @@
+from pydantic import BaseModel, Field
+from typing import List, Literal
+class AutoGluonConfig(BaseModel):
+    eval_metric: str = Field(
+        ...,
+        description="Primary metric to optimize. Allowed: 'accuracy', 'log_loss', 'f1', 'roc_auc', 'precision', 'recall'."
+    )
+    preset: Literal[
+        'best_quality', 'high_quality', 'good_quality', 'medium_quality'
+    ] = Field(
+        ...,
+         description="Preset configurations. 'extreme_quality' and 'best_quality' enable bagging/stacking for maximum accuracy."
+    )
+    additional_metrics: List[str] = Field(
+        ...,
+        description="List of additional metrics to track. Allowed: 'accuracy', 'log_loss', 'f1', 'roc_auc', 'precision', 'recall'."
+    )
+    time_limit: int = Field(
+        ...,
+        description="Total training time in seconds. AutoGluon will distribute this across models. Small datasets: 300, Medium: 3600, Large: 7200+."
+    )
+    num_bag_folds: int = Field(
+        ...,
+        description="Number of folds for k-fold bagging. 0 = no bagging. 5-10 is standard for 'best_quality'. Bagging reduces variance and allows the model to be trained on all data (if refit_full=True)."
+    )
+    num_bag_sets: int = Field(
+        ...,
+        description="Number of bagging sets. Each set repeats k-fold bagging to reduce variance further. Only used if num_bag_folds > 0. Usually 1-3."
+    )
+    num_stack_levels: int = Field(
+        ...,
+        description="Levels of stacking. 0 = no stacking, 1 = one level (models trained on base model predictions). Higher values increase accuracy but exponentially increase training time."
+    )
+    models: list[str] = Field(
+        ...,
+        description="""Models to train.
+        'GBM',
+        'CAT',
+        'XGB',
+        'RF',
+        'XT',
+        'KNN'. """
+    )
+    fit_weighted_ensemble: bool = Field(
+        ...,
+        description="Whether to fit an ensemble that weights predictions of base models to improve accuracy. Usually recommended to keep True."
+    )
+    split_test_size: float = Field(
+        ...,
+        description="Fraction of data held out for validation. You MUST choose exactly one value from: [0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]."
+    )

experiment_configuration_agent-0.1.6.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,120 @@
+Metadata-Version: 2.4
+Name: experiment-configuration-agent
+Version: 0.1.6
+Summary: Add your description here
+Requires-Python: >=3.10
+Description-Content-Type: text/markdown
+Requires-Dist: pydantic-settings
+Requires-Dist: sfn-blueprint>=0.7.4
+Provides-Extra: dev
+Requires-Dist: pytest; extra == "dev"
+Requires-Dist: pytest-mock; extra == "dev"
+# Experiment Configuration Agent for AutoGluon
+This agent uses a Large Language Model to recommend optimal configurations for AutoGluon's `TabularPredictor` based on your machine learning problem context. By providing details about your domain, use case, and dataset, the agent will generate a set of `TabularPredictor` parameters designed to optimize for performance and efficiency.
+## Features
+-   **Intelligent Configuration:** Leverages LLMs to recommend `eval_metric`, `presets`, `time_limit`, and ensembling parameters.
+-   **Context-Aware:** Considers the business domain, specific use case, ML methodology (e.g., classification, regression), and dataset characteristics.
+-   **Flexible Backend:** Powered by `sfn-blueprint`, allowing for a configurable LLM backend.
+-   **Multiple Scenarios:** Provides recommendations for different optimization goals, such as maximizing accuracy, balancing performance and speed, or fast prototyping.
+## Installation
+This project uses `uv` for dependency management and requires Python 3.10 or higher.
+1.  **Clone the repository:**
+    ```bash
+    git clone https://github.com/stepfnAI/experiment_config_agent.git
+    cd experiment-configuration-agent
+    ```
+2.  **Set up the environment and install dependencies:**
+    It is recommended to use a virtual environment. `uv` can create one for you.
+    ```bash
+    # Create a virtual environment and install dependencies
+    uv sync --extra dev
+     source .venv/bin/activate
+    ```
+## Usage
+### Basic usage
+```python
+python ./examples/basic_usage.py
+```
+To get a configuration recommendation, instantiate the `AutoGluonConfigAgent` and pass a dictionary containing the problem context.
+1.  **Create a `.env` file** in the project root to configure the LLM provider. See the [Configuration](#configuration) section for more details.
+    ```
+    PROVIDER="openai"
+    MODEL="gpt-4-turbo"
+    # Add your API key, e.g., OPENAI_API_KEY="sk-..."
+    ```
+2.  **Create your Python script:**
+    ```python
+    from experiment_configuration_agent.agent import AutoGluonConfigAgent
+    # 1. Define the problem context
+    task_data = {
+        "domain": {
+            "name": "Manufacturing",
+            "description": "An automotive parts manufacturing facility with multiple production lines."
+        },
+        "use_case": {
+            "name": "Predictive Maintenance",
+            "description": "Detect unusual temporal patterns in sensor data to predict equipment failure and prevent breakdowns."
+        },
+        "methodology": "binary_classification",
+        "dataset_insights": {
+            "num_samples": 5000,
+            "num_features": 10,
+            "target": {
+                "name": "failure_flag",
+                "imbalance_ratio": 0.05 # Highly imbalanced
+            },
+            "feature_summary": {
+                "sensor_A": {"min": 0.1, "max": 100.5, "dtype": "float"},
+                "production_line_id": {"unique_count": 3, "dtype": "category"}
+            }
+        }
+    }
+    # 2. Initialize the agent
+    agent = AutoGluonConfigAgent()
+    # 3. Get the configuration recommendation
+    result = agent(task_data)
+    # 4. Print the result
+    print("Recommended AutoGluon Configuration:")
+    print(result.get("configuration"))
+    print("\nCost Summary:")
+    print(result.get("cost_summary"))
+    ```
+## Configuration
+The agent is configured via environment variables, which can be placed in a `.env` file in the project root. The primary configurations are inherited from the `GluonConfig` class.
+-   `PROVIDER`: The LLM provider to use (e.g., `"openai"`, `"anthropic"`).
+-   `MODEL`: The specific model to use (e.g., `"gpt-4-turbo"`, `"claude-3-opus-20240229"`).
+-   `TEMPERATURE`: The model's temperature setting (e.g., `0.3`).
+-   `MAX_TOKENS`: The maximum number of tokens for the response (e.g., `4000`).
+You will also need to set the API key for your chosen provider, for example `OPENAI_API_KEY="your-key-here"`.
+## Testing
+This project uses `pytest`. To run the test suite, execute the following command from the project root:
+```bash
+pytest
+```

experiment_configuration_agent-0.1.6.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,9 @@
+experiment_config_agent/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+experiment_config_agent/agent.py,sha256=bjFBjyR0_0hRUePfCDICitGfG7fVuZxBhPkwZZ0gsxM,3700
+experiment_config_agent/config.py,sha256=yAS1XWdOklcpuHwq5F3u-j2zQmX-ErLowj9IqcgnqH4,1138
+experiment_config_agent/constants.py,sha256=O7fsJQXVmt8Zs-A3sYxTafyNdpQv2H4tbL3E--rJ7Ug,8167
+experiment_config_agent/models.py,sha256=u8bANPWUUE2hlH8S7ZA2N9bRKuN6vp7vGRpVRwXn-aE,2271
+experiment_configuration_agent-0.1.6.dist-info/METADATA,sha256=y30snhqZgHUF5XXxG8DCPR9-1uQ2XplYdu-h6TEfu-I,4453
+experiment_configuration_agent-0.1.6.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+experiment_configuration_agent-0.1.6.dist-info/top_level.txt,sha256=5c9CyVEjFUlvEf08vJIvi6BkzGuS4wdwtjdmCk2uL2U,24
+experiment_configuration_agent-0.1.6.dist-info/RECORD,,

experiment_configuration_agent-0.1.6.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

experiment_configuration_agent-0.1.6.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@
1	+ experiment_config_agent