PyPI - edsl - Versions diffs - 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl - Mend

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (105) hide show

edsl/__init__.py +8 -1
edsl/__init__original.py +134 -0
edsl/__version__.py +1 -1
edsl/agents/agent.py +29 -0
edsl/agents/agent_list.py +36 -1
edsl/base/base_class.py +281 -151
edsl/base/data_transfer_models.py +15 -4
edsl/buckets/__init__.py +8 -3
edsl/buckets/bucket_collection.py +9 -3
edsl/buckets/model_buckets.py +4 -2
edsl/buckets/token_bucket.py +2 -2
edsl/buckets/token_bucket_client.py +5 -3
edsl/caching/cache.py +131 -62
edsl/caching/cache_entry.py +70 -58
edsl/caching/sql_dict.py +17 -0
edsl/cli.py +99 -0
edsl/config/config_class.py +16 -0
edsl/conversation/__init__.py +31 -0
edsl/coop/coop.py +276 -242
edsl/coop/coop_jobs_objects.py +59 -0
edsl/coop/coop_objects.py +29 -0
edsl/coop/coop_regular_objects.py +26 -0
edsl/coop/utils.py +24 -19
edsl/dataset/dataset.py +338 -101
edsl/dataset/dataset_operations_mixin.py +216 -180
edsl/db_list/sqlite_list.py +349 -0
edsl/inference_services/__init__.py +40 -5
edsl/inference_services/exceptions.py +11 -0
edsl/inference_services/services/anthropic_service.py +5 -2
edsl/inference_services/services/aws_bedrock.py +6 -2
edsl/inference_services/services/azure_ai.py +6 -2
edsl/inference_services/services/google_service.py +7 -3
edsl/inference_services/services/mistral_ai_service.py +6 -2
edsl/inference_services/services/open_ai_service.py +6 -2
edsl/inference_services/services/perplexity_service.py +6 -2
edsl/inference_services/services/test_service.py +94 -5
edsl/interviews/answering_function.py +167 -59
edsl/interviews/interview.py +124 -72
edsl/interviews/interview_task_manager.py +10 -0
edsl/interviews/request_token_estimator.py +8 -0
edsl/invigilators/invigilators.py +35 -13
edsl/jobs/async_interview_runner.py +146 -104
edsl/jobs/data_structures.py +6 -4
edsl/jobs/decorators.py +61 -0
edsl/jobs/fetch_invigilator.py +61 -18
edsl/jobs/html_table_job_logger.py +14 -2
edsl/jobs/jobs.py +180 -104
edsl/jobs/jobs_component_constructor.py +2 -2
edsl/jobs/jobs_interview_constructor.py +2 -0
edsl/jobs/jobs_pricing_estimation.py +154 -113
edsl/jobs/jobs_remote_inference_logger.py +4 -0
edsl/jobs/jobs_runner_status.py +30 -25
edsl/jobs/progress_bar_manager.py +79 -0
edsl/jobs/remote_inference.py +35 -1
edsl/key_management/key_lookup_builder.py +6 -1
edsl/language_models/language_model.py +110 -12
edsl/language_models/model.py +10 -3
edsl/language_models/price_manager.py +176 -71
edsl/language_models/registry.py +5 -0
edsl/notebooks/notebook.py +77 -10
edsl/questions/VALIDATION_README.md +134 -0
edsl/questions/__init__.py +24 -1
edsl/questions/exceptions.py +21 -0
edsl/questions/question_dict.py +201 -16
edsl/questions/question_multiple_choice_with_other.py +624 -0
edsl/questions/question_registry.py +2 -1
edsl/questions/templates/multiple_choice_with_other/__init__.py +0 -0
edsl/questions/templates/multiple_choice_with_other/answering_instructions.jinja +15 -0
edsl/questions/templates/multiple_choice_with_other/question_presentation.jinja +17 -0
edsl/questions/validation_analysis.py +185 -0
edsl/questions/validation_cli.py +131 -0
edsl/questions/validation_html_report.py +404 -0
edsl/questions/validation_logger.py +136 -0
edsl/results/result.py +115 -46
edsl/results/results.py +702 -171
edsl/scenarios/construct_download_link.py +16 -3
edsl/scenarios/directory_scanner.py +226 -226
edsl/scenarios/file_methods.py +5 -0
edsl/scenarios/file_store.py +150 -9
edsl/scenarios/handlers/__init__.py +5 -1
edsl/scenarios/handlers/mp4_file_store.py +104 -0
edsl/scenarios/handlers/webm_file_store.py +104 -0
edsl/scenarios/scenario.py +120 -101
edsl/scenarios/scenario_list.py +800 -727
edsl/scenarios/scenario_list_gc_test.py +146 -0
edsl/scenarios/scenario_list_memory_test.py +214 -0
edsl/scenarios/scenario_list_source_refactor.md +35 -0
edsl/scenarios/scenario_selector.py +5 -4
edsl/scenarios/scenario_source.py +1990 -0
edsl/scenarios/tests/test_scenario_list_sources.py +52 -0
edsl/surveys/survey.py +22 -0
edsl/tasks/__init__.py +4 -2
edsl/tasks/task_history.py +198 -36
edsl/tests/scenarios/test_ScenarioSource.py +51 -0
edsl/tests/scenarios/test_scenario_list_sources.py +51 -0
edsl/utilities/__init__.py +2 -1
edsl/utilities/decorators.py +121 -0
edsl/utilities/memory_debugger.py +1010 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/METADATA +51 -76
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/RECORD +103 -79
edsl/jobs/jobs_runner_asyncio.py +0 -281
edsl/language_models/unused/fake_openai_service.py +0 -60
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/LICENSE +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/WHEEL +0 -0
{edsl-0.1.54.dist-info → edsl-0.1.56.dist-info}/entry_points.txt +0 -0

edsl/questions/validation_html_report.py ADDED Viewed

@@ -0,0 +1,404 @@
+"""Generate an HTML report for validation failures.
+This module provides functionality to create an HTML report of validation failures,
+including statistics, suggestions for improvements, and examples of common failures.
+"""
+import json
+import os
+from datetime import datetime
+from pathlib import Path
+from typing import Dict, List, Optional
+from ..config import CONFIG
+from .validation_analysis import (
+    get_validation_failure_stats,
+    suggest_fix_improvements,
+    export_improvements_report
+)
+from .validation_logger import get_validation_failure_logs
+HTML_TEMPLATE = """
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>EDSL Validation Failures Report</title>
+    <style>
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
+            line-height: 1.6;
+            color: #333;
+            max-width: 1200px;
+            margin: 0 auto;
+            padding: 20px;
+        }
+        h1, h2, h3, h4 {
+            color: #2c3e50;
+        }
+        .header {
+            border-bottom: 1px solid #eee;
+            padding-bottom: 10px;
+            margin-bottom: 20px;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+        .timestamp {
+            color: #7f8c8d;
+            font-size: 0.9em;
+        }
+        .summary {
+            background-color: #f8f9fa;
+            border-radius: 5px;
+            padding: 15px;
+            margin-bottom: 20px;
+        }
+        .stats-container, .suggestions-container, .examples-container {
+            margin-bottom: 30px;
+        }
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin-bottom: 20px;
+        }
+        th, td {
+            padding: 12px 15px;
+            text-align: left;
+            border-bottom: 1px solid #ddd;
+        }
+        th {
+            background-color: #f8f9fa;
+            font-weight: 600;
+        }
+        tr:hover {
+            background-color: #f5f5f5;
+        }
+        .suggestion {
+            background-color: #e3f2fd;
+            border-left: 4px solid #2196f3;
+            padding: 10px 15px;
+            margin-bottom: 10px;
+            border-radius: 0 4px 4px 0;
+        }
+        .card {
+            border: 1px solid #ddd;
+            border-radius: 4px;
+            padding: 15px;
+            margin-bottom: 20px;
+            box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1);
+        }
+        .card-header {
+            font-weight: 600;
+            margin-bottom: 10px;
+            padding-bottom: 10px;
+            border-bottom: 1px solid #eee;
+        }
+        .example {
+            background-color: #fff8e1;
+            border-left: 4px solid #ffc107;
+            padding: 10px 15px;
+            margin-bottom: 10px;
+            border-radius: 0 4px 4px 0;
+            overflow-x: auto;
+        }
+        pre {
+            background-color: #f5f5f5;
+            padding: 10px;
+            border-radius: 4px;
+            overflow-x: auto;
+        }
+        code {
+            font-family: SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace;
+            font-size: 0.9em;
+        }
+        .badge {
+            display: inline-block;
+            padding: 3px 7px;
+            font-size: 0.75em;
+            font-weight: 600;
+            line-height: 1;
+            text-align: center;
+            white-space: nowrap;
+            vertical-align: baseline;
+            border-radius: 10px;
+            background-color: #e9ecef;
+            margin-right: 5px;
+        }
+        .badge-warning {
+            background-color: #fff3cd;
+            color: #856404;
+        }
+        .badge-primary {
+            background-color: #cfe2ff;
+            color: #084298;
+        }
+        .badge-success {
+            background-color: #d1e7dd;
+            color: #0f5132;
+        }
+        .fix-method {
+            background-color: #e8f5e9;
+            border-left: 4px solid #4caf50;
+            padding: 10px 15px;
+            margin: 10px 0;
+            border-radius: 0 4px 4px 0;
+        }
+    </style>
+</head>
+<body>
+    <div class="header">
+        <h1>EDSL Validation Failures Report</h1>
+        <span class="timestamp">Generated on {{timestamp}}</span>
+    </div>
+    <div class="summary">
+        <h2>Summary</h2>
+        <p>This report analyzes validation failures that occurred when question answers didn't meet the expected format or constraints.
+        It provides statistics, improvement suggestions for fix methods, and examples of common failures.</p>
+        <p><strong>Total validation failures:</strong> {{total_failures}}</p>
+        <p><strong>Question types with failures:</strong> {{question_types_count}}</p>
+    </div>
+    <div class="stats-container">
+        <h2>Validation Failure Statistics</h2>
+        <div class="card">
+            <div class="card-header">Failures by Question Type</div>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Question Type</th>
+                        <th>Failure Count</th>
+                        <th>Percentage</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {{type_stats_rows}}
+                </tbody>
+            </table>
+        </div>
+        <div class="card">
+            <div class="card-header">Top Error Messages</div>
+            <table>
+                <thead>
+                    <tr>
+                        <th>Error Message</th>
+                        <th>Occurrence Count</th>
+                    </tr>
+                </thead>
+                <tbody>
+                    {{error_stats_rows}}
+                </tbody>
+            </table>
+        </div>
+    </div>
+    <div class="suggestions-container">
+        <h2>Fix Method Improvement Suggestions</h2>
+        {{suggestions_content}}
+    </div>
+    <div class="examples-container">
+        <h2>Example Validation Failures</h2>
+        {{examples_content}}
+    </div>
+</body>
+</html>
+"""
+def _generate_type_stats_rows(stats: Dict) -> str:
+    """Generate HTML table rows for question type statistics."""
+    type_stats = stats.get("by_question_type", {})
+    total_failures = sum(type_stats.values())
+    rows = []
+    for question_type, count in sorted(type_stats.items(), key=lambda x: x[1], reverse=True):
+        percentage = (count / total_failures) * 100 if total_failures > 0 else 0
+        row = (
+            f"<tr>"
+            f"<td>{question_type}</td>"
+            f"<td>{count}</td>"
+            f"<td>{percentage:.1f}%</td>"
+            f"</tr>"
+        )
+        rows.append(row)
+    return "\n".join(rows)
+def _generate_error_stats_rows(stats: Dict) -> str:
+    """Generate HTML table rows for error message statistics."""
+    error_counts = {}
+    # Aggregate error counts across all question types
+    for question_type, errors in stats.get("by_error_message", {}).items():
+        for error_msg, count in errors.items():
+            error_counts[error_msg] = error_counts.get(error_msg, 0) + count
+    # Sort by count (descending)
+    sorted_errors = sorted(error_counts.items(), key=lambda x: x[1], reverse=True)
+    rows = []
+    for error_msg, count in sorted_errors[:10]:  # Show top 10 errors
+        shortened_msg = error_msg[:100] + "..." if len(error_msg) > 100 else error_msg
+        row = (
+            f"<tr>"
+            f"<td>{shortened_msg}</td>"
+            f"<td>{count}</td>"
+            f"</tr>"
+        )
+        rows.append(row)
+    return "\n".join(rows)
+def _generate_suggestions_content(suggestions: Dict) -> str:
+    """Generate HTML content for fix method suggestions."""
+    if not suggestions:
+        return "<p>No suggestions available. Log more validation failures to generate improvement suggestions.</p>"
+    content = []
+    for question_type, question_suggestions in suggestions.items():
+        content.append(f"<div class='card'>")
+        content.append(f"<div class='card-header'>{question_type}</div>")
+        for suggestion in question_suggestions:
+            error_msg = suggestion.get("error_message", "")
+            occurrence_count = suggestion.get("occurrence_count", 0)
+            suggestion_text = suggestion.get("suggestion", "")
+            content.append(
+                f"<div class='suggestion'>"
+                f"<p><strong>Error:</strong> {error_msg}</p>"
+                f"<p><strong>Occurrences:</strong> {occurrence_count}</p>"
+                f"<div class='fix-method'>"
+                f"<p><strong>Suggested improvement:</strong></p>"
+                f"<p>{suggestion_text}</p>"
+                f"</div>"
+                f"</div>"
+            )
+        content.append("</div>")
+    return "\n".join(content)
+def _generate_examples_content(logs: List[Dict]) -> str:
+    """Generate HTML content for example validation failures."""
+    if not logs:
+        return "<p>No validation failure examples available.</p>"
+    content = []
+    # Group logs by question type
+    logs_by_type = {}
+    for log in logs:
+        question_type = log.get("question_type", "unknown")
+        if question_type not in logs_by_type:
+            logs_by_type[question_type] = []
+        logs_by_type[question_type].append(log)
+    # For each question type, show the most recent example
+    for question_type, type_logs in logs_by_type.items():
+        # Sort by timestamp (newest first)
+        sorted_logs = sorted(type_logs, key=lambda x: x.get("timestamp", ""), reverse=True)
+        example_log = sorted_logs[0]
+        error_message = example_log.get("error_message", "")
+        invalid_data = example_log.get("invalid_data", {})
+        model_schema = example_log.get("model_schema", {})
+        content.append(f"<div class='card'>")
+        content.append(f"<div class='card-header'>{question_type}</div>")
+        content.append(
+            f"<div class='example'>"
+            f"<p><strong>Error:</strong> {error_message}</p>"
+            f"<p><strong>Invalid Data:</strong></p>"
+            f"<pre><code>{json.dumps(invalid_data, indent=2)}</code></pre>"
+            f"<p><strong>Expected Schema:</strong></p>"
+            f"<pre><code>{json.dumps(model_schema, indent=2)}</code></pre>"
+            f"</div>"
+        )
+        content.append("</div>")
+    return "\n".join(content)
+def generate_html_report(output_path: Optional[Path] = None) -> Path:
+    """
+    Generate an HTML report of validation failures.
+    Args:
+        output_path: Optional custom path for the report
+    Returns:
+        Path to the generated HTML report
+    """
+    # Determine output path
+    if output_path is None:
+        default_log_dir = Path.home() / ".edsl" / "logs"
+        try:
+            report_dir = Path(CONFIG.get("EDSL_LOG_DIR"))
+        except Exception:
+            # If EDSL_LOG_DIR is not defined, use default
+            report_dir = default_log_dir
+        os.makedirs(report_dir, exist_ok=True)
+        output_path = report_dir / "validation_report.html"
+    # Get validation data
+    logs = get_validation_failure_logs(n=100)  # Get up to 100 recent logs
+    stats = get_validation_failure_stats()
+    suggestions = suggest_fix_improvements()
+    # Calculate summary statistics
+    total_failures = sum(stats.get("by_question_type", {}).values())
+    question_types_count = len(stats.get("by_question_type", {}))
+    # Generate report content
+    type_stats_rows = _generate_type_stats_rows(stats)
+    error_stats_rows = _generate_error_stats_rows(stats)
+    suggestions_content = _generate_suggestions_content(suggestions)
+    examples_content = _generate_examples_content(logs)
+    # Format timestamp
+    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+    # Fill the template
+    html_content = HTML_TEMPLATE.replace("{{timestamp}}", timestamp)
+    html_content = html_content.replace("{{total_failures}}", str(total_failures))
+    html_content = html_content.replace("{{question_types_count}}", str(question_types_count))
+    html_content = html_content.replace("{{type_stats_rows}}", type_stats_rows)
+    html_content = html_content.replace("{{error_stats_rows}}", error_stats_rows)
+    html_content = html_content.replace("{{suggestions_content}}", suggestions_content)
+    html_content = html_content.replace("{{examples_content}}", examples_content)
+    # Write the report
+    with open(output_path, "w") as f:
+        f.write(html_content)
+    return output_path
+def generate_and_open_report() -> None:
+    """Generate a validation report and open it in the default browser."""
+    report_path = generate_html_report()
+    print(f"Report generated at: {report_path}")
+    # Try to open the report in a browser
+    try:
+        import webbrowser
+        webbrowser.open(f"file://{report_path}")
+    except Exception as e:
+        print(f"Could not open browser: {e}")
+        print(f"Report is available at: {report_path}")
+if __name__ == "__main__":
+    generate_and_open_report()

edsl/questions/validation_logger.py ADDED Viewed

@@ -0,0 +1,136 @@
+"""Logger for validation failures in questions.
+This module provides functionality to log validation failures that occur when
+question answers don't meet the expected format or constraints. The logs can be
+used to improve the "fix" methods for questions.
+"""
+import datetime
+import json
+import logging
+import os
+import traceback
+from pathlib import Path
+from typing import Any, Dict, Optional
+from ..config import CONFIG
+# Set up logging
+logger = logging.getLogger("validation_failures")
+logger.setLevel(logging.INFO)
+# Determine log directory path
+DEFAULT_LOG_DIR = Path.home() / ".edsl" / "logs"
+try:
+    LOG_DIR = Path(CONFIG.get("EDSL_LOG_DIR"))
+except Exception:
+    # If EDSL_LOG_DIR is not defined, use default
+    LOG_DIR = DEFAULT_LOG_DIR
+VALIDATION_LOG_FILE = LOG_DIR / "validation_failures.log"
+# Create log directory if it doesn't exist
+os.makedirs(LOG_DIR, exist_ok=True)
+# Create file handler
+file_handler = logging.FileHandler(VALIDATION_LOG_FILE)
+file_handler.setLevel(logging.INFO)
+# Create formatter
+formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+file_handler.setFormatter(formatter)
+# Add handler to logger
+logger.addHandler(file_handler)
+# Touch the log file to make sure it exists
+if not os.path.exists(VALIDATION_LOG_FILE):
+    with open(VALIDATION_LOG_FILE, 'a'):
+        pass
+def log_validation_failure(
+    question_type: str,
+    question_name: str,
+    error_message: str,
+    invalid_data: Dict[str, Any],
+    model_schema: Dict[str, Any],
+    question_dict: Optional[Dict[str, Any]] = None,
+) -> None:
+    """
+    Log a validation failure to the validation failures log file.
+    Args:
+        question_type: The type of question that had a validation failure
+        question_name: The name of the question
+        error_message: The validation error message
+        invalid_data: The data that failed validation
+        model_schema: The schema of the model used for validation
+        question_dict: Optional dictionary representation of the question
+    """
+    log_entry = {
+        "timestamp": datetime.datetime.now().isoformat(),
+        "question_type": question_type,
+        "question_name": question_name,
+        "error_message": error_message,
+        "invalid_data": invalid_data,
+        "model_schema": model_schema,
+        "question_dict": question_dict,
+        "traceback": traceback.format_exc(),
+    }
+    # Log as JSON for easier parsing
+    logger.info(json.dumps(log_entry))
+    # Write directly to the file as well to ensure it's written
+    with open(VALIDATION_LOG_FILE, "a") as f:
+        f.write(f"{datetime.datetime.now().isoformat()} - validation_failures - INFO - {json.dumps(log_entry)}\n")
+        f.flush()
+def get_validation_failure_logs(n: int = 10) -> list:
+    """
+    Get the latest n validation failure logs.
+    Args:
+        n: Number of logs to return (default: 10)
+    Returns:
+        List of validation failure log entries as dictionaries
+    """
+    logs = []
+    # Check if log file exists
+    if not os.path.exists(VALIDATION_LOG_FILE):
+        return logs
+    with open(VALIDATION_LOG_FILE, "r") as f:
+        for line in f:
+            try:
+                # Skip non-JSON lines (like logger initialization)
+                if not line.strip():
+                    continue
+                # Handle both the Python logging format and our direct write format
+                parts = line.strip().split(" - ")
+                if len(parts) >= 4:
+                    # Regular log line format: timestamp - name - level - message
+                    json_part = parts[-1]
+                    try:
+                        log_entry = json.loads(json_part)
+                        logs.append(log_entry)
+                    except json.JSONDecodeError:
+                        # Skip malformed JSON
+                        continue
+            except (IndexError, ValueError):
+                # Skip malformed lines
+                continue
+    # Return most recent logs first
+    return sorted(logs, key=lambda x: x.get("timestamp", ""), reverse=True)[:n]
+def clear_validation_logs() -> None:
+    """Clear all validation failure logs."""
+    if os.path.exists(VALIDATION_LOG_FILE):
+        with open(VALIDATION_LOG_FILE, "w") as f:
+            f.write("")

edsl 0.1.54__py3-none-any.whl → 0.1.56__py3-none-any.whl

edsl 0.1.54py3-none-any.whl → 0.1.56py3-none-any.whl