PyPI - promptum - Versions diffs - 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl - Mend

promptum 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

promptum/__init__.py +3 -18
promptum/benchmark/__init__.py +4 -1
promptum/benchmark/benchmark.py +8 -12
promptum/benchmark/report.py +5 -6
promptum/{core → benchmark}/result.py +2 -2
promptum/{execution → benchmark}/runner.py +2 -3
promptum/{core → benchmark}/test_case.py +1 -1
promptum/providers/__init__.py +5 -0
promptum/providers/openrouter.py +3 -3
promptum/providers/protocol.py +1 -1
promptum/validation/validators.py +0 -18
{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/METADATA +14 -23
promptum-0.0.2.dist-info/RECORD +20 -0
promptum/core/__init__.py +0 -12
promptum/execution/__init__.py +0 -3
promptum/serialization/__init__.py +0 -11
promptum/serialization/base.py +0 -48
promptum/serialization/html.py +0 -52
promptum/serialization/json.py +0 -28
promptum/serialization/protocol.py +0 -13
promptum/serialization/report_template.html +0 -293
promptum/serialization/yaml.py +0 -17
promptum/storage/__init__.py +0 -7
promptum/storage/file.py +0 -157
promptum/storage/protocol.py +0 -23
promptum-0.0.1.dist-info/RECORD +0 -32
/promptum/{core → providers}/metrics.py +0 -0
/promptum/{core → providers}/retry.py +0 -0
{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/WHEEL +0 -0
{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/licenses/LICENSE +0 -0

promptum/__init__.py CHANGED Viewed

@@ -1,14 +1,5 @@
-from promptum.benchmark import Benchmark, Report
-from promptum.core import Metrics, RetryConfig, RetryStrategy, TestCase, TestResult
-from promptum.execution import Runner
-from promptum.providers import LLMProvider, OpenRouterClient
-from promptum.serialization import (
-    HTMLSerializer,
-    JSONSerializer,
-    Serializer,
-    YAMLSerializer,
-)
-from promptum.storage import FileStorage, ResultStorage
+from promptum.benchmark import Benchmark, Report, Runner, TestCase, TestResult
+from promptum.providers import LLMProvider, Metrics, OpenRouterClient, RetryConfig, RetryStrategy
 from promptum.validation import (
     Contains,
     ExactMatch,
@@ -17,7 +8,7 @@ from promptum.validation import (
     Validator,
 )
-__version__ = "0.1.0"
+__version__ = "0.0.1"
 __all__ = [
     "TestCase",
@@ -35,10 +26,4 @@ __all__ = [
     "Runner",
     "Benchmark",
     "Report",
-    "Serializer",
-    "JSONSerializer",
-    "YAMLSerializer",
-    "HTMLSerializer",
-    "ResultStorage",
-    "FileStorage",
 ]

promptum/benchmark/__init__.py CHANGED Viewed

@@ -1,4 +1,7 @@
 from promptum.benchmark.benchmark import Benchmark
 from promptum.benchmark.report import Report
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.runner import Runner
+from promptum.benchmark.test_case import TestCase
-__all__ = ["Benchmark", "Report"]
+__all__ = ["Benchmark", "Report", "Runner", "TestCase", "TestResult"]

promptum/benchmark/benchmark.py CHANGED Viewed

@@ -1,11 +1,10 @@
 import asyncio
 from collections.abc import Callable, Sequence
-from typing import Any
 from promptum.benchmark.report import Report
-from promptum.core.result import TestResult
-from promptum.core.test_case import TestCase
-from promptum.execution.runner import Runner
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.runner import Runner
+from promptum.benchmark.test_case import TestCase
 from promptum.providers.protocol import LLMProvider
@@ -29,12 +28,12 @@ class Benchmark:
     def add_tests(self, test_cases: Sequence[TestCase]) -> None:
         self._test_cases.extend(test_cases)
-    def run(self, metadata: dict[str, Any] | None = None) -> Report:
-        return asyncio.run(self.run_async(metadata))
+    def run(self) -> Report:
+        return asyncio.run(self.run_async())
-    async def run_async(self, metadata: dict[str, Any] | None = None) -> Report:
+    async def run_async(self) -> Report:
         if not self._test_cases:
-            return Report(results=[], metadata=metadata or {})
+            return Report(results=[])
         runner = Runner(
             provider=self.provider,
@@ -44,7 +43,4 @@ class Benchmark:
         results = await runner.run(self._test_cases)
-        return Report(
-            results=results,
-            metadata=metadata or {},
-        )
+        return Report(results=results)

promptum/benchmark/report.py CHANGED Viewed

@@ -2,13 +2,12 @@ from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from typing import Any
-from promptum.core.result import TestResult
+from promptum.benchmark.result import TestResult
 @dataclass(frozen=True, slots=True)
 class Report:
     results: Sequence[TestResult]
-    metadata: dict[str, Any]
     def get_summary(self) -> dict[str, Any]:
         total = len(self.results)
@@ -49,7 +48,7 @@ class Report:
         if passed is not None:
             filtered = [r for r in filtered if r.passed == passed]
-        return Report(results=filtered, metadata=self.metadata)
+        return Report(results=filtered)
     def group_by(self, key: Callable[[TestResult], str]) -> dict[str, "Report"]:
         groups: dict[str, list[TestResult]] = {}
@@ -60,7 +59,7 @@ class Report:
                 groups[group_key] = []
             groups[group_key].append(result)
-        return {k: Report(results=v, metadata=self.metadata) for k, v in groups.items()}
+        return {k: Report(results=v) for k, v in groups.items()}
     def compare_models(self) -> dict[str, dict[str, Any]]:
         by_model = self.group_by(lambda r: r.test_case.model)
@@ -71,5 +70,5 @@ class Report:
         if not values:
             return 0
         sorted_values = sorted(values)
-        index = int(len(sorted_values) * p)
-        return sorted_values[min(index, len(sorted_values) - 1)]
+        index = int((len(sorted_values) - 1) * p)
+        return sorted_values[index]

promptum/{core → benchmark}/result.py RENAMED Viewed

@@ -2,8 +2,8 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any
-from promptum.core.metrics import Metrics
-from promptum.core.test_case import TestCase
+from promptum.benchmark.test_case import TestCase
+from promptum.providers.metrics import Metrics
 @dataclass(frozen=True, slots=True)

promptum/{execution → benchmark}/runner.py RENAMED Viewed

@@ -3,8 +3,8 @@ from collections.abc import Callable, Sequence
 import httpx
-from promptum.core.result import TestResult
-from promptum.core.test_case import TestCase
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.test_case import TestCase
 from promptum.providers.protocol import LLMProvider
@@ -37,7 +37,6 @@ class Runner:
         results = await asyncio.gather(
             *[run_with_semaphore(tc) for tc in test_cases],
-            return_exceptions=False,
         )
         return list(results)

promptum/{core → benchmark}/test_case.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from promptum.validation.protocol import Validator
-from promptum.core.retry import RetryConfig
+from promptum.providers.retry import RetryConfig
 @dataclass(frozen=True, slots=True)

promptum/providers/__init__.py CHANGED Viewed

@@ -1,7 +1,12 @@
+from promptum.providers.metrics import Metrics
 from promptum.providers.openrouter import OpenRouterClient
 from promptum.providers.protocol import LLMProvider
+from promptum.providers.retry import RetryConfig, RetryStrategy
 __all__ = [
     "LLMProvider",
+    "Metrics",
     "OpenRouterClient",
+    "RetryConfig",
+    "RetryStrategy",
 ]

promptum/providers/openrouter.py CHANGED Viewed

@@ -4,8 +4,8 @@ from typing import Any
 import httpx
-from promptum.core.metrics import Metrics
-from promptum.core.retry import RetryConfig, RetryStrategy
+from promptum.providers.metrics import Metrics
+from promptum.providers.retry import RetryConfig, RetryStrategy
 class OpenRouterClient:
@@ -61,7 +61,7 @@ class OpenRouterClient:
             "messages": messages,
             "temperature": temperature,
         }
-        if max_tokens:
+        if max_tokens is not None:
             payload["max_tokens"] = max_tokens
         payload.update(kwargs)

promptum/providers/protocol.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any, Protocol
-from promptum.core.metrics import Metrics
+from promptum.providers.metrics import Metrics
 class LLMProvider(Protocol):

promptum/validation/validators.py CHANGED Viewed

@@ -88,21 +88,3 @@ class JsonSchema:
             keys = ", ".join(self.required_keys)
             return f"Valid JSON with keys: {keys}"
         return "Valid JSON object"
-@dataclass(frozen=True, slots=True)
-class PlaceholderValidator:
-    """
-    Placeholder validator for deserialized reports.
-    Used when original validator cannot be reconstructed from storage.
-    Always returns True. Original validator logic is not preserved.
-    """
-    description: str
-    def validate(self, response: str) -> tuple[bool, dict[str, Any]]:
-        return True, {"placeholder": True, "note": "Original validator could not be reconstructed"}
-    def describe(self) -> str:
-        return self.description

{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: promptum
-Version: 0.0.1
+Version: 0.0.2
 Summary: Async LLM benchmarking library with protocol-based extensibility
 Project-URL: Homepage, https://github.com/deyna256/promptum
 Project-URL: Repository, https://github.com/deyna256/promptum
@@ -36,8 +36,6 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.13
 Requires-Dist: httpx>=0.27.0
-Requires-Dist: jinja2>=3.1.0
-Requires-Dist: pyyaml>=6.0
 Description-Content-Type: text/markdown
 # promptum
@@ -97,15 +95,12 @@ for attempt in range(max_retries):
         break
     except Exception:
         sleep(2 ** attempt)
-# Export results manually
-json.dump(results, open("results.json", "w"))
 ```
 **After promptum:**
 ```python
 report = await benchmark.run_async()
-HTMLSerializer().serialize(report)  # Beautiful HTML report
+summary = report.get_summary()  # Metrics captured automatically
 ```
 ---
@@ -151,14 +146,13 @@ python your_script.py
 ## What You Get
-✅ **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
-✅ **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
-✅ **Automatic Retries** - Exponential/linear backoff with configurable attempts
-✅ **Metrics Tracking** - Latency, tokens, cost - automatically captured
-✅ **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
-✅ **Async by Default** - Run 100 tests in parallel without breaking a sweat
-✅ **Type Safe** - Full type hints, catches errors before runtime
-✅ **Zero Config** - No YAML files, no setup scripts, just Python
+- [x] **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
+- [x] **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
+- [x] **Automatic Retries** - Exponential/linear backoff with configurable attempts
+- [x] **Metrics Tracking** - Latency, tokens, cost - automatically captured
+- [x] **Async by Default** - Run 100 tests in parallel without breaking a sweat
+- [x] **Type Safe** - Full type hints, catches errors before runtime
+- [x] **Zero Config** - No YAML files, no setup scripts, just Python
 ---
@@ -193,14 +187,11 @@ tests = [
 benchmark.add_tests(tests)
 report = await benchmark.run_async()
-# Export as HTML
-from promptum import HTMLSerializer
-html = HTMLSerializer().serialize(report)
-open("comparison.html", "w").write(html)
+# Side-by-side model comparison
+for model, summary in report.compare_models().items():
+    print(f"{model}: {summary['pass_rate']:.0%} pass rate, {summary['avg_latency_ms']:.0f}ms avg")
 ```
-Open `comparison.html` in your browser - see side-by-side model performance with charts.
 ---
 ## Use Cases
@@ -252,7 +243,7 @@ Found a bug? Want a feature? PRs welcome!
 ```bash
 # Development setup
-git clone https://github.com/yourusername/promptum.git
+git clone https://github.com/deyna256/promptum.git
 cd promptum
 just sync       # Install dependencies
 just test       # Run tests
@@ -273,7 +264,7 @@ MIT - do whatever you want with it.
 <div align="center">
-**[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
+**[⭐ Star on GitHub](https://github.com/deyna256/promptum)** | **[🐛 Report Bug](https://github.com/deyna256/promptum/issues)** | **[💡 Request Feature](https://github.com/deyna256/promptum/issues)**
 Made for developers who value their time.

promptum-0.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+promptum/__init__.py,sha256=8IAk_9VlnKEJIdwf-hEDkOfOCV456H2Jng-HrZfewso,582
+promptum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+promptum/benchmark/__init__.py,sha256=0FXYDnK4SGa5ZqX2k9aVkwy3ENDlF_5nW2Mut_OCCbg,311
+promptum/benchmark/benchmark.py,sha256=hZ3557qPKqFeNNuxrRLPs-b6XBy2JCowIhRDDwatfeI,1403
+promptum/benchmark/report.py,sha256=DhY1p3n29xOSwRYUiQW6V6FhGFGGn-JF6nuNuvj9rro,2659
+promptum/benchmark/result.py,sha256=nKh-T4zlam2LxsaFoL8jeVaO6kZJ1sfB_tnp4gdNPhM,482
+promptum/benchmark/runner.py,sha256=5p6JBwjTlEHTh6jNv_iuFH1nIrI4_Gv3wmzCT0TWpvA,2407
+promptum/benchmark/test_case.py,sha256=Okypf2334ewVrvmQG7M3I3D7BzqXDsQ2ihjNw9gGF00,598
+promptum/providers/__init__.py,sha256=UprvJ4vxHqo-VTzzUmZ4wFCj6VybP9xBd7HtpPPSvbI,335
+promptum/providers/metrics.py,sha256=FnS10nHFjQ5Clj5X21C_nW6zAUJU_ZHt0s2fLgp6L28,427
+promptum/providers/openrouter.py,sha256=fOqBm4ak7szNNeKNhSI6y4WpFsUx6iQg_3jaFsXc0dQ,4623
+promptum/providers/protocol.py,sha256=g9zIH91HysBIATMHd9Z2Mpk1tKiTOkAyd-zynRaQsuk,493
+promptum/providers/retry.py,sha256=mA_RRz9_9J_mge_AUd9f1A-gACOxZLGTI8vTIstAr8s,538
+promptum/validation/__init__.py,sha256=mhykyxaIwn2PJh2RXAi0fi2NRIveFmlC5bg1nyCbfVU,252
+promptum/validation/protocol.py,sha256=xqxm23YX6eNeZHKMLMZ-Wz8iQKn4ZRzAI5Xryxg0uq4,418
+promptum/validation/validators.py,sha256=qSMva2P2miXXJJ5XeTKJsyYgh2x5wORi3dhOnBYuACE,2686
+promptum-0.0.2.dist-info/METADATA,sha256=MQcy0pxUoMpu4uZgM_Q3HEE_RnY3Krcg-_FTF9vvQ54,7845
+promptum-0.0.2.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+promptum-0.0.2.dist-info/licenses/LICENSE,sha256=Fgn285H5Vy9diOlqO1TzS3hD97WcdF6-GFHvUcFNtmg,1067
+promptum-0.0.2.dist-info/RECORD,,

promptum/core/__init__.py DELETED Viewed

@@ -1,12 +0,0 @@
-from promptum.core.metrics import Metrics
-from promptum.core.result import TestResult
-from promptum.core.retry import RetryConfig, RetryStrategy
-from promptum.core.test_case import TestCase
-__all__ = [
-    "Metrics",
-    "RetryConfig",
-    "RetryStrategy",
-    "TestCase",
-    "TestResult",
-]

promptum/execution/__init__.py DELETED Viewed

@@ -1,3 +0,0 @@
-from promptum.execution.runner import Runner
-__all__ = ["Runner"]

promptum/serialization/__init__.py DELETED Viewed

@@ -1,11 +0,0 @@
-from promptum.serialization.html import HTMLSerializer
-from promptum.serialization.json import JSONSerializer
-from promptum.serialization.protocol import Serializer
-from promptum.serialization.yaml import YAMLSerializer
-__all__ = [
-    "Serializer",
-    "JSONSerializer",
-    "YAMLSerializer",
-    "HTMLSerializer",
-]

promptum/serialization/base.py DELETED Viewed

@@ -1,48 +0,0 @@
-"""Base serializer with shared result serialization logic."""
-from typing import Any
-from promptum.core.result import TestResult
-class BaseSerializer:
-    """
-    Base class for serializers with common result serialization logic.
-    Subclasses should implement:
-    - serialize(report: Report) -> str
-    - get_file_extension() -> str
-    """
-    @staticmethod
-    def _serialize_result(result: TestResult) -> dict[str, Any]:
-        """Convert TestResult to dictionary representation."""
-        return {
-            "test_case": {
-                "name": result.test_case.name,
-                "prompt": result.test_case.prompt,
-                "model": result.test_case.model,
-                "tags": list(result.test_case.tags),
-                "system_prompt": result.test_case.system_prompt,
-                "temperature": result.test_case.temperature,
-                "max_tokens": result.test_case.max_tokens,
-                "metadata": result.test_case.metadata,
-                "validator": result.test_case.validator.describe(),
-            },
-            "response": result.response,
-            "passed": result.passed,
-            "metrics": {
-                "latency_ms": result.metrics.latency_ms,
-                "prompt_tokens": result.metrics.prompt_tokens,
-                "completion_tokens": result.metrics.completion_tokens,
-                "total_tokens": result.metrics.total_tokens,
-                "cost_usd": result.metrics.cost_usd,
-                "retry_delays": list(result.metrics.retry_delays),
-                "total_attempts": result.metrics.total_attempts,
-            }
-            if result.metrics
-            else None,
-            "validation_details": result.validation_details,
-            "execution_error": result.execution_error,
-            "timestamp": result.timestamp.isoformat(),
-        }

promptum/serialization/html.py DELETED Viewed

@@ -1,52 +0,0 @@
-import json
-from pathlib import Path
-from jinja2 import Template
-from promptum.benchmark.report import Report
-class HTMLSerializer:
-    def __init__(self) -> None:
-        template_path = Path(__file__).parent / "report_template.html"
-        self._template = Template(template_path.read_text())
-    def serialize(self, report: Report) -> str:
-        summary = report.get_summary()
-        results_data = []
-        for result in report.results:
-            results_data.append(
-                {
-                    "test_case": {
-                        "name": result.test_case.name,
-                        "prompt": result.test_case.prompt,
-                        "model": result.test_case.model,
-                        "tags": list(result.test_case.tags),
-                        "system_prompt": result.test_case.system_prompt,
-                        "validator": result.test_case.validator.describe(),
-                    },
-                    "response": result.response,
-                    "passed": result.passed,
-                    "metrics": {
-                        "latency_ms": result.metrics.latency_ms,
-                        "prompt_tokens": result.metrics.prompt_tokens,
-                        "completion_tokens": result.metrics.completion_tokens,
-                        "total_tokens": result.metrics.total_tokens,
-                        "cost_usd": result.metrics.cost_usd,
-                        "total_attempts": result.metrics.total_attempts,
-                    }
-                    if result.metrics
-                    else None,
-                    "execution_error": result.execution_error,
-                }
-            )
-        return self._template.render(
-            summary=summary,
-            results=results_data,
-            results_json=json.dumps(results_data),
-        )
-    def get_file_extension(self) -> str:
-        return "html"

promptum/serialization/json.py DELETED Viewed

@@ -1,28 +0,0 @@
-import json
-from datetime import datetime
-from typing import Any
-from promptum.benchmark.report import Report
-from promptum.serialization.base import BaseSerializer
-class JSONSerializer(BaseSerializer):
-    def __init__(self, indent: int = 2):
-        self.indent = indent
-    def serialize(self, report: Report) -> str:
-        data = {
-            "metadata": report.metadata,
-            "summary": report.get_summary(),
-            "results": [self._serialize_result(r) for r in report.results],
-        }
-        return json.dumps(data, indent=self.indent, default=self._json_default)
-    def get_file_extension(self) -> str:
-        return "json"
-    @staticmethod
-    def _json_default(obj: Any) -> Any:
-        if isinstance(obj, datetime):
-            return obj.isoformat()
-        raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

promptum/serialization/protocol.py DELETED Viewed

@@ -1,13 +0,0 @@
-from typing import Protocol
-from promptum.benchmark.report import Report
-class Serializer(Protocol):
-    def serialize(self, report: Report) -> str:
-        """Serializes a Report to a string format."""
-        ...
-    def get_file_extension(self) -> str:
-        """Returns the file extension for this format (e.g., 'json', 'html')."""
-        ...

promptum/serialization/report_template.html DELETED Viewed

@@ -1,293 +0,0 @@
-<!DOCTYPE html>
-<html lang="en">
-<head>
-    <meta charset="UTF-8">
-    <meta name="viewport" content="width=device-width, initial-scale=1.0">
-    <title>LLM Benchmark Report</title>
-    <script src="https://cdn.jsdelivr.net/npm/chart.js@4.4.0/dist/chart.umd.js"></script>
-    <style>
-        * { margin: 0; padding: 0; box-sizing: border-box; }
-        :root {
-            --bg: #ffffff;
-            --surface: #f5f5f5;
-            --text: #1a1a1a;
-            --text-muted: #666;
-            --border: #ddd;
-            --success: #22c55e;
-            --error: #ef4444;
-            --warning: #f59e0b;
-        }
-        @media (prefers-color-scheme: dark) {
-            :root {
-                --bg: #0a0a0a;
-                --surface: #1a1a1a;
-                --text: #e5e5e5;
-                --text-muted: #a3a3a3;
-                --border: #333;
-            }
-        }
-        body {
-            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif;
-            background: var(--bg);
-            color: var(--text);
-            line-height: 1.6;
-        }
-        .container { max-width: 1400px; margin: 0 auto; padding: 2rem; }
-        h1 { font-size: 2rem; margin-bottom: 0.5rem; }
-        h2 { font-size: 1.5rem; margin: 2rem 0 1rem; }
-        .summary { display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 1rem; margin-bottom: 2rem; }
-        .card {
-            background: var(--surface);
-            border: 1px solid var(--border);
-            border-radius: 8px;
-            padding: 1.5rem;
-        }
-        .card-title { font-size: 0.875rem; color: var(--text-muted); margin-bottom: 0.5rem; }
-        .card-value { font-size: 2rem; font-weight: 700; }
-        .chart-container { height: 300px; margin-bottom: 2rem; }
-        table {
-            width: 100%;
-            border-collapse: collapse;
-            background: var(--surface);
-            border-radius: 8px;
-            overflow: hidden;
-        }
-        th, td {
-            text-align: left;
-            padding: 1rem;
-            border-bottom: 1px solid var(--border);
-        }
-        th {
-            background: var(--surface);
-            font-weight: 600;
-            position: sticky;
-            top: 0;
-        }
-        tr:hover { background: var(--bg); }
-        .badge {
-            display: inline-block;
-            padding: 0.25rem 0.75rem;
-            border-radius: 12px;
-            font-size: 0.75rem;
-            font-weight: 600;
-        }
-        .badge-success { background: var(--success); color: white; }
-        .badge-error { background: var(--error); color: white; }
-        .tag {
-            display: inline-block;
-            padding: 0.125rem 0.5rem;
-            background: var(--border);
-            border-radius: 4px;
-            font-size: 0.75rem;
-            margin-right: 0.25rem;
-        }
-        .search {
-            width: 100%;
-            padding: 0.75rem;
-            margin-bottom: 1rem;
-            background: var(--surface);
-            border: 1px solid var(--border);
-            border-radius: 8px;
-            color: var(--text);
-            font-size: 1rem;
-        }
-        .truncate {
-            max-width: 300px;
-            white-space: nowrap;
-            overflow: hidden;
-            text-overflow: ellipsis;
-        }
-        button {
-            background: var(--surface);
-            border: 1px solid var(--border);
-            color: var(--text);
-            padding: 0.5rem 1rem;
-            border-radius: 6px;
-            cursor: pointer;
-            font-size: 0.875rem;
-        }
-        button:hover { background: var(--border); }
-        .modal {
-            display: none;
-            position: fixed;
-            top: 0;
-            left: 0;
-            width: 100%;
-            height: 100%;
-            background: rgba(0, 0, 0, 0.7);
-            z-index: 1000;
-            overflow: auto;
-        }
-        .modal-content {
-            background: var(--surface);
-            margin: 2rem auto;
-            padding: 2rem;
-            max-width: 800px;
-            border-radius: 12px;
-            position: relative;
-        }
-        .modal-close {
-            position: absolute;
-            top: 1rem;
-            right: 1rem;
-            font-size: 1.5rem;
-            cursor: pointer;
-        }
-        pre {
-            background: var(--bg);
-            padding: 1rem;
-            border-radius: 6px;
-            overflow-x: auto;
-            margin: 0.5rem 0;
-        }
-        code { font-family: 'Courier New', monospace; font-size: 0.875rem; }
-    </style>
-</head>
-<body>
-    <div class="container">
-        <h1>LLM Benchmark Report</h1>
-        <p style="color: var(--text-muted); margin-bottom: 2rem;">{{ summary.total }} tests executed</p>
-        <div class="summary">
-            <div class="card">
-                <div class="card-title">Pass Rate</div>
-                <div class="card-value" style="color: var(--success);">{{ "%.1f"|format(summary.pass_rate * 100) }}%</div>
-            </div>
-            <div class="card">
-                <div class="card-title">Avg Latency</div>
-                <div class="card-value">{{ "%.0f"|format(summary.avg_latency_ms) }}ms</div>
-            </div>
-            <div class="card">
-                <div class="card-title">Total Cost</div>
-                <div class="card-value">${{ "%.6f"|format(summary.total_cost_usd) }}</div>
-            </div>
-            <div class="card">
-                <div class="card-title">Total Tokens</div>
-                <div class="card-value">{{ "{:,}".format(summary.total_tokens) }}</div>
-            </div>
-        </div>
-        <div class="card chart-container">
-            <canvas id="latencyChart"></canvas>
-        </div>
-        <h2>Test Results</h2>
-        <input type="text" class="search" id="searchInput" placeholder="Search tests...">
-        <table id="resultsTable">
-            <thead>
-                <tr>
-                    <th>Status</th>
-                    <th>Name</th>
-                    <th>Model</th>
-                    <th>Latency</th>
-                    <th>Cost</th>
-                    <th>Tags</th>
-                    <th>Actions</th>
-                </tr>
-            </thead>
-            <tbody>
-                {% for result in results %}
-                <tr class="result-row">
-                    <td>
-                        {% if result.passed %}
-                        <span class="badge badge-success">PASS</span>
-                        {% else %}
-                        <span class="badge badge-error">FAIL</span>
-                        {% endif %}
-                    </td>
-                    <td>{{ result.test_case.name }}</td>
-                    <td>{{ result.test_case.model }}</td>
-                    <td>{{ "%.0f"|format(result.metrics.latency_ms if result.metrics else 0) }}ms</td>
-                    <td>${{ "%.6f"|format(result.metrics.cost_usd if result.metrics and result.metrics.cost_usd else 0) }}</td>
-                    <td>
-                        {% for tag in result.test_case.tags %}
-                        <span class="tag">{{ tag }}</span>
-                        {% endfor %}
-                    </td>
-                    <td><button onclick="showDetails({{ loop.index0 }})">Details</button></td>
-                </tr>
-                {% endfor %}
-            </tbody>
-        </table>
-    </div>
-    <div id="detailsModal" class="modal">
-        <div class="modal-content">
-            <span class="modal-close" onclick="closeModal()">&times;</span>
-            <div id="modalBody"></div>
-        </div>
-    </div>
-    <script>
-        const results = {{ results_json }};
-        new Chart(document.getElementById('latencyChart'), {
-            type: 'bar',
-            data: {
-                labels: results.map((r, i) => r.test_case.name),
-                datasets: [{
-                    label: 'Latency (ms)',
-                    data: results.map(r => r.metrics ? r.metrics.latency_ms : 0),
-                    backgroundColor: results.map(r => r.passed ? '#22c55e' : '#ef4444')
-                }]
-            },
-            options: {
-                responsive: true,
-                maintainAspectRatio: false,
-                plugins: { legend: { display: false } }
-            }
-        });
-        document.getElementById('searchInput').addEventListener('input', function(e) {
-            const term = e.target.value.toLowerCase();
-            document.querySelectorAll('.result-row').forEach(row => {
-                const text = row.textContent.toLowerCase();
-                row.style.display = text.includes(term) ? '' : 'none';
-            });
-        });
-        function showDetails(index) {
-            const result = results[index];
-            const html = `
-                <h2>${result.test_case.name}</h2>
-                <p><strong>Status:</strong> <span class="badge ${result.passed ? 'badge-success' : 'badge-error'}">${result.passed ? 'PASS' : 'FAIL'}</span></p>
-                <p><strong>Model:</strong> ${result.test_case.model}</p>
-                <p><strong>Validator:</strong> ${result.test_case.validator}</p>
-                <h3>Prompt</h3>
-                <pre><code>${escapeHtml(result.test_case.prompt)}</code></pre>
-                ${result.test_case.system_prompt ? `<h3>System Prompt</h3><pre><code>${escapeHtml(result.test_case.system_prompt)}</code></pre>` : ''}
-                <h3>Response</h3>
-                <pre><code>${escapeHtml(result.response || 'No response')}</code></pre>
-                ${result.execution_error ? `<h3>Error</h3><pre style="color: var(--error);"><code>${escapeHtml(result.execution_error)}</code></pre>` : ''}
-                ${result.metrics ? `
-                <h3>Metrics</h3>
-                <ul>
-                    <li>Latency: ${result.metrics.latency_ms.toFixed(0)}ms</li>
-                    <li>Tokens: ${result.metrics.total_tokens || 'N/A'}</li>
-                    <li>Cost: $${(result.metrics.cost_usd || 0).toFixed(6)}</li>
-                    <li>Attempts: ${result.metrics.total_attempts}</li>
-                </ul>
-                ` : ''}
-            `;
-            document.getElementById('modalBody').innerHTML = html;
-            document.getElementById('detailsModal').style.display = 'block';
-        }
-        function closeModal() {
-            document.getElementById('detailsModal').style.display = 'none';
-        }
-        function escapeHtml(text) {
-            const div = document.createElement('div');
-            div.textContent = text;
-            return div.innerHTML;
-        }
-        window.onclick = function(event) {
-            const modal = document.getElementById('detailsModal');
-            if (event.target === modal) closeModal();
-        }
-    </script>
-</body>
-</html>

promptum/serialization/yaml.py DELETED Viewed

@@ -1,17 +0,0 @@
-import yaml
-from promptum.benchmark.report import Report
-from promptum.serialization.base import BaseSerializer
-class YAMLSerializer(BaseSerializer):
-    def serialize(self, report: Report) -> str:
-        data = {
-            "metadata": report.metadata,
-            "summary": report.get_summary(),
-            "results": [self._serialize_result(r) for r in report.results],
-        }
-        return yaml.dump(data, default_flow_style=False, sort_keys=False)
-    def get_file_extension(self) -> str:
-        return "yaml"

promptum/storage/__init__.py DELETED Viewed

@@ -1,7 +0,0 @@
-from promptum.storage.file import FileStorage
-from promptum.storage.protocol import ResultStorage
-__all__ = [
-    "ResultStorage",
-    "FileStorage",
-]

promptum/storage/file.py DELETED Viewed

@@ -1,157 +0,0 @@
-import json
-import tempfile
-from datetime import datetime
-from pathlib import Path
-from typing import Any
-from promptum.benchmark.report import Report
-from promptum.core.metrics import Metrics
-from promptum.core.result import TestResult
-from promptum.core.test_case import TestCase
-from promptum.validation.validators import PlaceholderValidator
-class FileStorage:
-    def __init__(self, base_dir: str = "results"):
-        self.base_dir = Path(base_dir)
-        self.reports_dir = self.base_dir / "reports"
-        self.metadata_file = self.base_dir / "metadata.json"
-        self.reports_dir.mkdir(parents=True, exist_ok=True)
-    def save(self, report: Report, name: str) -> str:
-        timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
-        identifier = f"{timestamp}_{name}"
-        filename = f"{identifier}.json"
-        filepath = self.reports_dir / filename
-        data = self._serialize_report(report)
-        with tempfile.NamedTemporaryFile(
-            mode="w", delete=False, dir=self.reports_dir, suffix=".tmp"
-        ) as tmp:
-            json.dump(data, tmp, indent=2)
-            tmp_path = Path(tmp.name)
-        tmp_path.replace(filepath)
-        self._update_metadata(identifier, name, str(filepath))
-        return identifier
-    def load(self, identifier: str) -> Report:
-        filepath = self.reports_dir / f"{identifier}.json"
-        if not filepath.exists():
-            raise FileNotFoundError(f"Report not found: {identifier}")
-        with open(filepath) as f:
-            data = json.load(f)
-        return self._deserialize_report(data)
-    def list_reports(self) -> list[dict[str, Any]]:
-        if not self.metadata_file.exists():
-            return []
-        with open(self.metadata_file) as f:
-            return json.load(f)
-    def _update_metadata(self, identifier: str, name: str, path: str) -> None:
-        metadata = self.list_reports()
-        metadata.append(
-            {
-                "id": identifier,
-                "name": name,
-                "path": path,
-                "timestamp": datetime.now().isoformat(),
-            }
-        )
-        with tempfile.NamedTemporaryFile(
-            mode="w", delete=False, dir=self.base_dir, suffix=".tmp"
-        ) as tmp:
-            json.dump(metadata, tmp, indent=2)
-            tmp_path = Path(tmp.name)
-        tmp_path.replace(self.metadata_file)
-    @staticmethod
-    def _serialize_report(report: Report) -> dict[str, Any]:
-        return {
-            "metadata": report.metadata,
-            "results": [
-                {
-                    "test_case": {
-                        "name": r.test_case.name,
-                        "prompt": r.test_case.prompt,
-                        "model": r.test_case.model,
-                        "tags": list(r.test_case.tags),
-                        "system_prompt": r.test_case.system_prompt,
-                        "temperature": r.test_case.temperature,
-                        "max_tokens": r.test_case.max_tokens,
-                        "metadata": r.test_case.metadata,
-                        "validator_description": r.test_case.validator.describe(),
-                    },
-                    "response": r.response,
-                    "passed": r.passed,
-                    "metrics": {
-                        "latency_ms": r.metrics.latency_ms,
-                        "prompt_tokens": r.metrics.prompt_tokens,
-                        "completion_tokens": r.metrics.completion_tokens,
-                        "total_tokens": r.metrics.total_tokens,
-                        "cost_usd": r.metrics.cost_usd,
-                        "retry_delays": list(r.metrics.retry_delays),
-                    }
-                    if r.metrics
-                    else None,
-                    "validation_details": r.validation_details,
-                    "execution_error": r.execution_error,
-                    "timestamp": r.timestamp.isoformat(),
-                }
-                for r in report.results
-            ],
-        }
-    @staticmethod
-    def _deserialize_report(data: dict[str, Any]) -> Report:
-        results = []
-        for r in data["results"]:
-            test_case = TestCase(
-                name=r["test_case"]["name"],
-                prompt=r["test_case"]["prompt"],
-                model=r["test_case"]["model"],
-                validator=PlaceholderValidator(
-                    description=r["test_case"]["validator_description"],
-                ),
-                tags=tuple(r["test_case"]["tags"]),
-                system_prompt=r["test_case"]["system_prompt"],
-                temperature=r["test_case"]["temperature"],
-                max_tokens=r["test_case"]["max_tokens"],
-                metadata=r["test_case"]["metadata"],
-            )
-            metrics = None
-            if r["metrics"]:
-                metrics = Metrics(
-                    latency_ms=r["metrics"]["latency_ms"],
-                    prompt_tokens=r["metrics"]["prompt_tokens"],
-                    completion_tokens=r["metrics"]["completion_tokens"],
-                    total_tokens=r["metrics"]["total_tokens"],
-                    cost_usd=r["metrics"]["cost_usd"],
-                    retry_delays=tuple(r["metrics"]["retry_delays"]),
-                )
-            result = TestResult(
-                test_case=test_case,
-                response=r["response"],
-                passed=r["passed"],
-                metrics=metrics,
-                validation_details=r["validation_details"],
-                execution_error=r["execution_error"],
-                timestamp=datetime.fromisoformat(r["timestamp"]),
-            )
-            results.append(result)
-        return Report(results=results, metadata=data["metadata"])

promptum/storage/protocol.py DELETED Viewed

@@ -1,23 +0,0 @@
-from typing import Any, Protocol
-from promptum.benchmark.report import Report
-class ResultStorage(Protocol):
-    def save(self, report: Report, name: str) -> str:
-        """
-        Saves a report and returns its identifier.
-        """
-        ...
-    def load(self, identifier: str) -> Report:
-        """
-        Loads a report by its identifier.
-        """
-        ...
-    def list_reports(self) -> list[dict[str, Any]]:
-        """
-        Returns metadata for all stored reports.
-        """
-        ...

promptum-0.0.1.dist-info/RECORD DELETED Viewed

@@ -1,32 +0,0 @@
-promptum/__init__.py,sha256=AjeGgmIbpp9Uv-0ybq6knejEJMK-Dnn_-fV9Z86Bp74,932
-promptum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-promptum/benchmark/__init__.py,sha256=NJYiXm6wVFKMloxKNAXMY4H3bMQORTtLh6__nYWYWa0,131
-promptum/benchmark/benchmark.py,sha256=3enQSACdLwHW78fqSZj0Un3r7_Ua3V-MjfbEIIKFSWs,1589
-promptum/benchmark/report.py,sha256=ol_UO8rw43zbQxhs2o4AwYN5TP7O_Apa77V-pZKq6Uw,2754
-promptum/core/__init__.py,sha256=mqajsOdUBNJfcR2krxpwa7rM_wd88vJaAov-9SnVm68,294
-promptum/core/metrics.py,sha256=FnS10nHFjQ5Clj5X21C_nW6zAUJU_ZHt0s2fLgp6L28,427
-promptum/core/result.py,sha256=nyuVMQFY6DmZwzpgqDPsj0FaAuairpKLJ-0be5WQtTg,472
-promptum/core/retry.py,sha256=mA_RRz9_9J_mge_AUd9f1A-gACOxZLGTI8vTIstAr8s,538
-promptum/core/test_case.py,sha256=YNlVNj7FkoCyBFb2N0Dzrhce6o3DzUtke4PR6WoXhZo,593
-promptum/execution/__init__.py,sha256=fUZa7Bo7yn921sl49cS6TCGsG-lOUNVdhdeRsIa5vCc,67
-promptum/execution/runner.py,sha256=sP3uDu2VDLxFi9BkltMHwsyMuCXnz4oP1kVN28KpVZ0,2434
-promptum/providers/__init__.py,sha256=OW-CK198wOV7_bz_keOaxxQeRlFPZgINQcVJUZq_uus,169
-promptum/providers/openrouter.py,sha256=owquGxHaTB-pZ8jr06l4HouETuFj1lEg92oGX2mM5uo,4601
-promptum/providers/protocol.py,sha256=vdTGAGKN3FzThHLwyMMWicU87_LpW-gn0cM3vMcWiEY,488
-promptum/serialization/__init__.py,sha256=0dlpgF3dngaw_oR4mg7nuc4Z_VFVl2bATmhe2mHA9T4,319
-promptum/serialization/base.py,sha256=JnB4zb7D4oy44k6ndbJu3Xw1PVLpY_9-Y7k3Et2p43g,1851
-promptum/serialization/html.py,sha256=kJEd2s6fVfFHH7snJWrD5RGaUW66x3vtMKGMJ_ekmcI,1901
-promptum/serialization/json.py,sha256=koqgr5_WHmrpWUOCq6rWXoC07um3mkDDaob2k9vkEK8,870
-promptum/serialization/protocol.py,sha256=MZeMYt_HZJIYSyrRd_ZYbEJXDiXLMuJ5tosAeHLxpTM,353
-promptum/serialization/report_template.html,sha256=RC8qSLzolqWkWBIGfyhPtPkRWM7_0JkauEWPkaKiB9A,10802
-promptum/serialization/yaml.py,sha256=50A612OkX2L3EjhxTZJMZQb5zL8-2PmwcBjjNUhCWsA,528
-promptum/storage/__init__.py,sha256=QWOP5Al43WmmQ_kFCM9JGi8amXJzO_pR-x5AKDNy4ds,153
-promptum/storage/file.py,sha256=gnNBpNBQ_NeAWn7P2itsw2L99AxS7zOd8Nef6PyYxlk,5750
-promptum/storage/protocol.py,sha256=_NpkJzOQB_98Ud_TA_ZYubHf3o2DDXGMveRN3kRyYKI,517
-promptum/validation/__init__.py,sha256=mhykyxaIwn2PJh2RXAi0fi2NRIveFmlC5bg1nyCbfVU,252
-promptum/validation/protocol.py,sha256=xqxm23YX6eNeZHKMLMZ-Wz8iQKn4ZRzAI5Xryxg0uq4,418
-promptum/validation/validators.py,sha256=3lJwSMhhWb9x8BK_-S0FJBj7PFgno79II_i3Z1mCKTs,3217
-promptum-0.0.1.dist-info/METADATA,sha256=vt_PN0Ns0JuJalM7p8hJZsz-Y2hwQrbHZ4Jacy7P6L8,8083
-promptum-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-promptum-0.0.1.dist-info/licenses/LICENSE,sha256=Fgn285H5Vy9diOlqO1TzS3hD97WcdF6-GFHvUcFNtmg,1067
-promptum-0.0.1.dist-info/RECORD,,

/promptum/{core → providers}/metrics.py RENAMED Viewed

File without changes

/promptum/{core → providers}/retry.py RENAMED Viewed

File without changes

{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{promptum-0.0.1.dist-info → promptum-0.0.2.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

promptum 0.0.1__py3-none-any.whl → 0.0.2__py3-none-any.whl

promptum 0.0.1py3-none-any.whl → 0.0.2py3-none-any.whl