PyPI - promptum - Versions diffs - 0.0.1__tar.gz → 0.0.2__tar.gz - Mend

promptum 0.0.1tar.gz → 0.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (66) hide show

promptum-0.0.2/CONTRIBUTING.md ADDED Viewed

@@ -0,0 +1,78 @@
+# Contributing to Promptum
+Thank you for your interest in contributing to Promptum! We welcome contributions from the community.
+## Getting Started
+1. **Fork the repository** to your own GitHub account
+2. **Clone your fork** locally:
+   ```bash
+   git clone https://github.com/YOUR_USERNAME/promptum.git
+   cd promptum
+   ```
+3. **Set up the development environment**:
+   ```bash
+   just sync  # Install/sync dependencies
+   ```
+## Making Changes
+### Branch Naming
+Create a new branch named after the issue number you're working on:
+```bash
+git checkout -b 42  # For issue #42
+```
+### One PR = One Issue
+Each pull request should address exactly one issue. If you want to work on multiple issues, create separate branches and PRs for each.
+### Work in Progress
+If your PR is not ready for review, add `[WIP]` to the title:
+```
+[WIP] #42: Fix retry logic in OpenRouterClient
+```
+Remove `[WIP]` when the PR is ready for review.
+## Submitting Changes
+1. **Run tests and linting** before committing:
+   ```bash
+   just lint       # Lint and auto-fix
+   just typecheck  # Type check
+   just test       # Run tests
+   ```
+2. **Commit your changes** with clear, descriptive messages:
+   ```bash
+   git commit -m "#42: Fix retry logic in OpenRouterClient"
+   ```
+3. **Push to your fork**:
+   ```bash
+   git push origin 42
+   ```
+4. **Create a Pull Request** from your fork to the main repository
+5. **Tag the maintainer** (@deyna256) in a comment when your PR is ready for review
+## CI Requirements
+Pull requests must pass all CI checks before review. The maintainer will not review PRs with failing checks.
+CI runs:
+- Linting
+- Type checking
+- Tests
+## Questions?
+Feel free to ask questions in the issue comments or open a discussion.
+Thank you for contributing!

{promptum-0.0.1 → promptum-0.0.2}/Justfile RENAMED Viewed

@@ -23,10 +23,6 @@ cov-html:
     uv run pytest tests/ --cov-report=html
     xdg-open htmlcov/index.html
-# Open benchmark HTML report
-report:
-    xdg-open results/report.html
 # Clean up generated files and caches
 clean:
     rm -rf .pytest_cache .ruff_cache .coverage htmlcov results/

{promptum-0.0.1 → promptum-0.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: promptum
-Version: 0.0.1
+Version: 0.0.2
 Summary: Async LLM benchmarking library with protocol-based extensibility
 Project-URL: Homepage, https://github.com/deyna256/promptum
 Project-URL: Repository, https://github.com/deyna256/promptum
@@ -36,8 +36,6 @@ Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries :: Python Modules
 Requires-Python: >=3.13
 Requires-Dist: httpx>=0.27.0
-Requires-Dist: jinja2>=3.1.0
-Requires-Dist: pyyaml>=6.0
 Description-Content-Type: text/markdown
 # promptum
@@ -97,15 +95,12 @@ for attempt in range(max_retries):
         break
     except Exception:
         sleep(2 ** attempt)
-# Export results manually
-json.dump(results, open("results.json", "w"))
 ```
 **After promptum:**
 ```python
 report = await benchmark.run_async()
-HTMLSerializer().serialize(report)  # Beautiful HTML report
+summary = report.get_summary()  # Metrics captured automatically
 ```
 ---
@@ -151,14 +146,13 @@ python your_script.py
 ## What You Get
-✅ **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
-✅ **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
-✅ **Automatic Retries** - Exponential/linear backoff with configurable attempts
-✅ **Metrics Tracking** - Latency, tokens, cost - automatically captured
-✅ **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
-✅ **Async by Default** - Run 100 tests in parallel without breaking a sweat
-✅ **Type Safe** - Full type hints, catches errors before runtime
-✅ **Zero Config** - No YAML files, no setup scripts, just Python
+- [x] **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
+- [x] **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
+- [x] **Automatic Retries** - Exponential/linear backoff with configurable attempts
+- [x] **Metrics Tracking** - Latency, tokens, cost - automatically captured
+- [x] **Async by Default** - Run 100 tests in parallel without breaking a sweat
+- [x] **Type Safe** - Full type hints, catches errors before runtime
+- [x] **Zero Config** - No YAML files, no setup scripts, just Python
 ---
@@ -193,14 +187,11 @@ tests = [
 benchmark.add_tests(tests)
 report = await benchmark.run_async()
-# Export as HTML
-from promptum import HTMLSerializer
-html = HTMLSerializer().serialize(report)
-open("comparison.html", "w").write(html)
+# Side-by-side model comparison
+for model, summary in report.compare_models().items():
+    print(f"{model}: {summary['pass_rate']:.0%} pass rate, {summary['avg_latency_ms']:.0f}ms avg")
 ```
-Open `comparison.html` in your browser - see side-by-side model performance with charts.
 ---
 ## Use Cases
@@ -252,7 +243,7 @@ Found a bug? Want a feature? PRs welcome!
 ```bash
 # Development setup
-git clone https://github.com/yourusername/promptum.git
+git clone https://github.com/deyna256/promptum.git
 cd promptum
 just sync       # Install dependencies
 just test       # Run tests
@@ -273,7 +264,7 @@ MIT - do whatever you want with it.
 <div align="center">
-**[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
+**[⭐ Star on GitHub](https://github.com/deyna256/promptum)** | **[🐛 Report Bug](https://github.com/deyna256/promptum/issues)** | **[💡 Request Feature](https://github.com/deyna256/promptum/issues)**
 Made for developers who value their time.

{promptum-0.0.1 → promptum-0.0.2}/README.md RENAMED Viewed

@@ -55,15 +55,12 @@ for attempt in range(max_retries):
         break
     except Exception:
         sleep(2 ** attempt)
-# Export results manually
-json.dump(results, open("results.json", "w"))
 ```
 **After promptum:**
 ```python
 report = await benchmark.run_async()
-HTMLSerializer().serialize(report)  # Beautiful HTML report
+summary = report.get_summary()  # Metrics captured automatically
 ```
 ---
@@ -109,14 +106,13 @@ python your_script.py
 ## What You Get
-✅ **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
-✅ **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
-✅ **Automatic Retries** - Exponential/linear backoff with configurable attempts
-✅ **Metrics Tracking** - Latency, tokens, cost - automatically captured
-✅ **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
-✅ **Async by Default** - Run 100 tests in parallel without breaking a sweat
-✅ **Type Safe** - Full type hints, catches errors before runtime
-✅ **Zero Config** - No YAML files, no setup scripts, just Python
+- [x] **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
+- [x] **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
+- [x] **Automatic Retries** - Exponential/linear backoff with configurable attempts
+- [x] **Metrics Tracking** - Latency, tokens, cost - automatically captured
+- [x] **Async by Default** - Run 100 tests in parallel without breaking a sweat
+- [x] **Type Safe** - Full type hints, catches errors before runtime
+- [x] **Zero Config** - No YAML files, no setup scripts, just Python
 ---
@@ -151,14 +147,11 @@ tests = [
 benchmark.add_tests(tests)
 report = await benchmark.run_async()
-# Export as HTML
-from promptum import HTMLSerializer
-html = HTMLSerializer().serialize(report)
-open("comparison.html", "w").write(html)
+# Side-by-side model comparison
+for model, summary in report.compare_models().items():
+    print(f"{model}: {summary['pass_rate']:.0%} pass rate, {summary['avg_latency_ms']:.0f}ms avg")
 ```
-Open `comparison.html` in your browser - see side-by-side model performance with charts.
 ---
 ## Use Cases
@@ -210,7 +203,7 @@ Found a bug? Want a feature? PRs welcome!
 ```bash
 # Development setup
-git clone https://github.com/yourusername/promptum.git
+git clone https://github.com/deyna256/promptum.git
 cd promptum
 just sync       # Install dependencies
 just test       # Run tests
@@ -231,7 +224,7 @@ MIT - do whatever you want with it.
 <div align="center">
-**[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
+**[⭐ Star on GitHub](https://github.com/deyna256/promptum)** | **[🐛 Report Bug](https://github.com/deyna256/promptum/issues)** | **[💡 Request Feature](https://github.com/deyna256/promptum/issues)**
 Made for developers who value their time.

{promptum-0.0.1 → promptum-0.0.2}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "promptum"
-version = "0.0.1"
+version = "0.0.2"
 description = "Async LLM benchmarking library with protocol-based extensibility"
 readme = "README.md"
 requires-python = ">=3.13"
@@ -18,8 +18,6 @@ classifiers = [
 ]
 dependencies = [
     "httpx>=0.27.0",
-    "pyyaml>=6.0",
-    "jinja2>=3.1.0",
 ]
 [project.urls]

promptum-0.0.2/src/promptum/__init__.py ADDED Viewed

@@ -0,0 +1,29 @@
+from promptum.benchmark import Benchmark, Report, Runner, TestCase, TestResult
+from promptum.providers import LLMProvider, Metrics, OpenRouterClient, RetryConfig, RetryStrategy
+from promptum.validation import (
+    Contains,
+    ExactMatch,
+    JsonSchema,
+    Regex,
+    Validator,
+)
+__version__ = "0.0.1"
+__all__ = [
+    "TestCase",
+    "TestResult",
+    "Metrics",
+    "RetryConfig",
+    "RetryStrategy",
+    "Validator",
+    "ExactMatch",
+    "Contains",
+    "Regex",
+    "JsonSchema",
+    "LLMProvider",
+    "OpenRouterClient",
+    "Runner",
+    "Benchmark",
+    "Report",
+]

promptum-0.0.2/src/promptum/benchmark/__init__.py ADDED Viewed

@@ -0,0 +1,7 @@
+from promptum.benchmark.benchmark import Benchmark
+from promptum.benchmark.report import Report
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.runner import Runner
+from promptum.benchmark.test_case import TestCase
+__all__ = ["Benchmark", "Report", "Runner", "TestCase", "TestResult"]

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/benchmark/benchmark.py RENAMED Viewed

@@ -1,11 +1,10 @@
 import asyncio
 from collections.abc import Callable, Sequence
-from typing import Any
 from promptum.benchmark.report import Report
-from promptum.core.result import TestResult
-from promptum.core.test_case import TestCase
-from promptum.execution.runner import Runner
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.runner import Runner
+from promptum.benchmark.test_case import TestCase
 from promptum.providers.protocol import LLMProvider
@@ -29,12 +28,12 @@ class Benchmark:
     def add_tests(self, test_cases: Sequence[TestCase]) -> None:
         self._test_cases.extend(test_cases)
-    def run(self, metadata: dict[str, Any] | None = None) -> Report:
-        return asyncio.run(self.run_async(metadata))
+    def run(self) -> Report:
+        return asyncio.run(self.run_async())
-    async def run_async(self, metadata: dict[str, Any] | None = None) -> Report:
+    async def run_async(self) -> Report:
         if not self._test_cases:
-            return Report(results=[], metadata=metadata or {})
+            return Report(results=[])
         runner = Runner(
             provider=self.provider,
@@ -44,7 +43,4 @@ class Benchmark:
         results = await runner.run(self._test_cases)
-        return Report(
-            results=results,
-            metadata=metadata or {},
-        )
+        return Report(results=results)

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/benchmark/report.py RENAMED Viewed

@@ -2,13 +2,12 @@ from collections.abc import Callable, Sequence
 from dataclasses import dataclass
 from typing import Any
-from promptum.core.result import TestResult
+from promptum.benchmark.result import TestResult
 @dataclass(frozen=True, slots=True)
 class Report:
     results: Sequence[TestResult]
-    metadata: dict[str, Any]
     def get_summary(self) -> dict[str, Any]:
         total = len(self.results)
@@ -49,7 +48,7 @@ class Report:
         if passed is not None:
             filtered = [r for r in filtered if r.passed == passed]
-        return Report(results=filtered, metadata=self.metadata)
+        return Report(results=filtered)
     def group_by(self, key: Callable[[TestResult], str]) -> dict[str, "Report"]:
         groups: dict[str, list[TestResult]] = {}
@@ -60,7 +59,7 @@ class Report:
                 groups[group_key] = []
             groups[group_key].append(result)
-        return {k: Report(results=v, metadata=self.metadata) for k, v in groups.items()}
+        return {k: Report(results=v) for k, v in groups.items()}
     def compare_models(self) -> dict[str, dict[str, Any]]:
         by_model = self.group_by(lambda r: r.test_case.model)
@@ -71,5 +70,5 @@ class Report:
         if not values:
             return 0
         sorted_values = sorted(values)
-        index = int(len(sorted_values) * p)
-        return sorted_values[min(index, len(sorted_values) - 1)]
+        index = int((len(sorted_values) - 1) * p)
+        return sorted_values[index]

{promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/benchmark}/result.py RENAMED Viewed

@@ -2,8 +2,8 @@ from dataclasses import dataclass, field
 from datetime import datetime
 from typing import Any
-from promptum.core.metrics import Metrics
-from promptum.core.test_case import TestCase
+from promptum.benchmark.test_case import TestCase
+from promptum.providers.metrics import Metrics
 @dataclass(frozen=True, slots=True)

{promptum-0.0.1/src/promptum/execution → promptum-0.0.2/src/promptum/benchmark}/runner.py RENAMED Viewed

@@ -3,8 +3,8 @@ from collections.abc import Callable, Sequence
 import httpx
-from promptum.core.result import TestResult
-from promptum.core.test_case import TestCase
+from promptum.benchmark.result import TestResult
+from promptum.benchmark.test_case import TestCase
 from promptum.providers.protocol import LLMProvider
@@ -37,7 +37,6 @@ class Runner:
         results = await asyncio.gather(
             *[run_with_semaphore(tc) for tc in test_cases],
-            return_exceptions=False,
         )
         return list(results)

{promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/benchmark}/test_case.py RENAMED Viewed

@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any
 if TYPE_CHECKING:
     from promptum.validation.protocol import Validator
-from promptum.core.retry import RetryConfig
+from promptum.providers.retry import RetryConfig
 @dataclass(frozen=True, slots=True)

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/__init__.py RENAMED Viewed

@@ -1,7 +1,12 @@
+from promptum.providers.metrics import Metrics
 from promptum.providers.openrouter import OpenRouterClient
 from promptum.providers.protocol import LLMProvider
+from promptum.providers.retry import RetryConfig, RetryStrategy
 __all__ = [
     "LLMProvider",
+    "Metrics",
     "OpenRouterClient",
+    "RetryConfig",
+    "RetryStrategy",
 ]

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/openrouter.py RENAMED Viewed

@@ -4,8 +4,8 @@ from typing import Any
 import httpx
-from promptum.core.metrics import Metrics
-from promptum.core.retry import RetryConfig, RetryStrategy
+from promptum.providers.metrics import Metrics
+from promptum.providers.retry import RetryConfig, RetryStrategy
 class OpenRouterClient:
@@ -61,7 +61,7 @@ class OpenRouterClient:
             "messages": messages,
             "temperature": temperature,
         }
-        if max_tokens:
+        if max_tokens is not None:
             payload["max_tokens"] = max_tokens
         payload.update(kwargs)

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/protocol.py RENAMED Viewed

@@ -1,6 +1,6 @@
 from typing import Any, Protocol
-from promptum.core.metrics import Metrics
+from promptum.providers.metrics import Metrics
 class LLMProvider(Protocol):

{promptum-0.0.1 → promptum-0.0.2}/src/promptum/validation/validators.py RENAMED Viewed

@@ -88,21 +88,3 @@ class JsonSchema:
             keys = ", ".join(self.required_keys)
             return f"Valid JSON with keys: {keys}"
         return "Valid JSON object"
-@dataclass(frozen=True, slots=True)
-class PlaceholderValidator:
-    """
-    Placeholder validator for deserialized reports.
-    Used when original validator cannot be reconstructed from storage.
-    Always returns True. Original validator logic is not preserved.
-    """
-    description: str
-    def validate(self, response: str) -> tuple[bool, dict[str, Any]]:
-        return True, {"placeholder": True, "note": "Original validator could not be reconstructed"}
-    def describe(self) -> str:
-        return self.description

{promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/conftest.py RENAMED Viewed

@@ -2,8 +2,8 @@ from datetime import datetime
 import pytest
-from promptum.benchmark import Report
-from promptum.core import Metrics, TestCase, TestResult
+from promptum.benchmark import Report, TestCase, TestResult
+from promptum.providers import Metrics
 from promptum.validation import Contains
@@ -57,4 +57,4 @@ def sample_results() -> list[TestResult]:
 @pytest.fixture
 def sample_report(sample_results: list[TestResult]) -> Report:
-    return Report(results=sample_results, metadata={"version": "1.0"})
+    return Report(results=sample_results)

{promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/test_report_summary.py RENAMED Viewed

@@ -13,7 +13,7 @@ def test_report_summary(sample_report: Report) -> None:
 def test_report_summary_empty() -> None:
-    report = Report(results=[], metadata={})
+    report = Report(results=[])
     summary = report.get_summary()
     assert summary["total"] == 0

{promptum-0.0.1/tests/core → promptum-0.0.2/tests/benchmark}/test_test_case.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from promptum.core import TestCase
+from promptum.benchmark import TestCase
 from promptum.validation import Contains

promptum-0.0.2/tests/conftest.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

promptum-0.0.2/tests/providers/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+

{promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/conftest.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import pytest
-from promptum.core import Metrics, RetryConfig
+from promptum.providers import Metrics, RetryConfig
 @pytest.fixture

{promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/test_metrics.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from promptum.core import Metrics
+from promptum.providers import Metrics
 def test_metrics_creation(basic_metrics: Metrics) -> None:

{promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/test_retry.py RENAMED Viewed

@@ -1,4 +1,4 @@
-from promptum.core import RetryConfig, RetryStrategy
+from promptum.providers import RetryConfig, RetryStrategy
 def test_retry_config_defaults(default_retry_config: RetryConfig) -> None:

promptum 0.0.1__tar.gz → 0.0.2__tar.gz

promptum 0.0.1tar.gz → 0.0.2tar.gz