promptum 0.0.1__tar.gz → 0.0.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. promptum-0.0.2/CONTRIBUTING.md +78 -0
  2. {promptum-0.0.1 → promptum-0.0.2}/Justfile +0 -4
  3. {promptum-0.0.1 → promptum-0.0.2}/PKG-INFO +14 -23
  4. {promptum-0.0.1 → promptum-0.0.2}/README.md +13 -20
  5. {promptum-0.0.1 → promptum-0.0.2}/pyproject.toml +1 -3
  6. promptum-0.0.2/src/promptum/__init__.py +29 -0
  7. promptum-0.0.2/src/promptum/benchmark/__init__.py +7 -0
  8. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/benchmark/benchmark.py +8 -12
  9. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/benchmark/report.py +5 -6
  10. {promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/benchmark}/result.py +2 -2
  11. {promptum-0.0.1/src/promptum/execution → promptum-0.0.2/src/promptum/benchmark}/runner.py +2 -3
  12. {promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/benchmark}/test_case.py +1 -1
  13. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/__init__.py +5 -0
  14. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/openrouter.py +3 -3
  15. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/providers/protocol.py +1 -1
  16. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/validation/validators.py +0 -18
  17. {promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/conftest.py +3 -3
  18. {promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/test_report_summary.py +1 -1
  19. {promptum-0.0.1/tests/core → promptum-0.0.2/tests/benchmark}/test_test_case.py +1 -1
  20. promptum-0.0.2/tests/conftest.py +1 -0
  21. promptum-0.0.2/tests/providers/__init__.py +1 -0
  22. {promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/conftest.py +1 -1
  23. {promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/test_metrics.py +1 -1
  24. {promptum-0.0.1/tests/core → promptum-0.0.2/tests/providers}/test_retry.py +1 -1
  25. {promptum-0.0.1 → promptum-0.0.2}/uv.lock +1 -107
  26. promptum-0.0.1/src/promptum/__init__.py +0 -44
  27. promptum-0.0.1/src/promptum/benchmark/__init__.py +0 -4
  28. promptum-0.0.1/src/promptum/core/__init__.py +0 -12
  29. promptum-0.0.1/src/promptum/execution/__init__.py +0 -3
  30. promptum-0.0.1/src/promptum/serialization/__init__.py +0 -11
  31. promptum-0.0.1/src/promptum/serialization/base.py +0 -48
  32. promptum-0.0.1/src/promptum/serialization/html.py +0 -52
  33. promptum-0.0.1/src/promptum/serialization/json.py +0 -28
  34. promptum-0.0.1/src/promptum/serialization/protocol.py +0 -13
  35. promptum-0.0.1/src/promptum/serialization/report_template.html +0 -293
  36. promptum-0.0.1/src/promptum/serialization/yaml.py +0 -17
  37. promptum-0.0.1/src/promptum/storage/__init__.py +0 -7
  38. promptum-0.0.1/src/promptum/storage/file.py +0 -157
  39. promptum-0.0.1/src/promptum/storage/protocol.py +0 -23
  40. promptum-0.0.1/tests/conftest.py +0 -40
  41. promptum-0.0.1/tests/validation/__init__.py +0 -0
  42. {promptum-0.0.1 → promptum-0.0.2}/.coveragerc +0 -0
  43. {promptum-0.0.1 → promptum-0.0.2}/.github/workflows/lint.yml +0 -0
  44. {promptum-0.0.1 → promptum-0.0.2}/.github/workflows/publish-test.yml +0 -0
  45. {promptum-0.0.1 → promptum-0.0.2}/.github/workflows/publish.yml +0 -0
  46. {promptum-0.0.1 → promptum-0.0.2}/.github/workflows/test.yml +0 -0
  47. {promptum-0.0.1 → promptum-0.0.2}/.github/workflows/typecheck.yml +0 -0
  48. {promptum-0.0.1 → promptum-0.0.2}/.gitignore +0 -0
  49. {promptum-0.0.1 → promptum-0.0.2}/.python-version +0 -0
  50. {promptum-0.0.1 → promptum-0.0.2}/LICENSE +0 -0
  51. {promptum-0.0.1 → promptum-0.0.2}/pytest.ini +0 -0
  52. {promptum-0.0.1 → promptum-0.0.2}/ruff.toml +0 -0
  53. {promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/providers}/metrics.py +0 -0
  54. {promptum-0.0.1/src/promptum/core → promptum-0.0.2/src/promptum/providers}/retry.py +0 -0
  55. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/py.typed +0 -0
  56. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/validation/__init__.py +0 -0
  57. {promptum-0.0.1 → promptum-0.0.2}/src/promptum/validation/protocol.py +0 -0
  58. {promptum-0.0.1 → promptum-0.0.2}/tests/__init__.py +0 -0
  59. {promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/__init__.py +0 -0
  60. {promptum-0.0.1 → promptum-0.0.2}/tests/benchmark/test_report_filtering.py +0 -0
  61. {promptum-0.0.1/tests/core → promptum-0.0.2/tests/validation}/__init__.py +0 -0
  62. {promptum-0.0.1 → promptum-0.0.2}/tests/validation/conftest.py +0 -0
  63. {promptum-0.0.1 → promptum-0.0.2}/tests/validation/test_contains.py +0 -0
  64. {promptum-0.0.1 → promptum-0.0.2}/tests/validation/test_exact_match.py +0 -0
  65. {promptum-0.0.1 → promptum-0.0.2}/tests/validation/test_json_schema.py +0 -0
  66. {promptum-0.0.1 → promptum-0.0.2}/tests/validation/test_regex.py +0 -0
@@ -0,0 +1,78 @@
1
+ # Contributing to Promptum
2
+
3
+ Thank you for your interest in contributing to Promptum! We welcome contributions from the community.
4
+
5
+ ## Getting Started
6
+
7
+ 1. **Fork the repository** to your own GitHub account
8
+ 2. **Clone your fork** locally:
9
+ ```bash
10
+ git clone https://github.com/YOUR_USERNAME/promptum.git
11
+ cd promptum
12
+ ```
13
+ 3. **Set up the development environment**:
14
+ ```bash
15
+ just sync # Install/sync dependencies
16
+ ```
17
+
18
+ ## Making Changes
19
+
20
+ ### Branch Naming
21
+
22
+ Create a new branch named after the issue number you're working on:
23
+
24
+ ```bash
25
+ git checkout -b 42 # For issue #42
26
+ ```
27
+
28
+ ### One PR = One Issue
29
+
30
+ Each pull request should address exactly one issue. If you want to work on multiple issues, create separate branches and PRs for each.
31
+
32
+ ### Work in Progress
33
+
34
+ If your PR is not ready for review, add `[WIP]` to the title:
35
+
36
+ ```
37
+ [WIP] #42: Fix retry logic in OpenRouterClient
38
+ ```
39
+
40
+ Remove `[WIP]` when the PR is ready for review.
41
+
42
+ ## Submitting Changes
43
+
44
+ 1. **Run tests and linting** before committing:
45
+ ```bash
46
+ just lint # Lint and auto-fix
47
+ just typecheck # Type check
48
+ just test # Run tests
49
+ ```
50
+
51
+ 2. **Commit your changes** with clear, descriptive messages:
52
+ ```bash
53
+ git commit -m "#42: Fix retry logic in OpenRouterClient"
54
+ ```
55
+
56
+ 3. **Push to your fork**:
57
+ ```bash
58
+ git push origin 42
59
+ ```
60
+
61
+ 4. **Create a Pull Request** from your fork to the main repository
62
+
63
+ 5. **Tag the maintainer** (@deyna256) in a comment when your PR is ready for review
64
+
65
+ ## CI Requirements
66
+
67
+ Pull requests must pass all CI checks before review. The maintainer will not review PRs with failing checks.
68
+
69
+ CI runs:
70
+ - Linting
71
+ - Type checking
72
+ - Tests
73
+
74
+ ## Questions?
75
+
76
+ Feel free to ask questions in the issue comments or open a discussion.
77
+
78
+ Thank you for contributing!
@@ -23,10 +23,6 @@ cov-html:
23
23
  uv run pytest tests/ --cov-report=html
24
24
  xdg-open htmlcov/index.html
25
25
 
26
- # Open benchmark HTML report
27
- report:
28
- xdg-open results/report.html
29
-
30
26
  # Clean up generated files and caches
31
27
  clean:
32
28
  rm -rf .pytest_cache .ruff_cache .coverage htmlcov results/
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: promptum
3
- Version: 0.0.1
3
+ Version: 0.0.2
4
4
  Summary: Async LLM benchmarking library with protocol-based extensibility
5
5
  Project-URL: Homepage, https://github.com/deyna256/promptum
6
6
  Project-URL: Repository, https://github.com/deyna256/promptum
@@ -36,8 +36,6 @@ Classifier: Programming Language :: Python :: 3.13
36
36
  Classifier: Topic :: Software Development :: Libraries :: Python Modules
37
37
  Requires-Python: >=3.13
38
38
  Requires-Dist: httpx>=0.27.0
39
- Requires-Dist: jinja2>=3.1.0
40
- Requires-Dist: pyyaml>=6.0
41
39
  Description-Content-Type: text/markdown
42
40
 
43
41
  # promptum
@@ -97,15 +95,12 @@ for attempt in range(max_retries):
97
95
  break
98
96
  except Exception:
99
97
  sleep(2 ** attempt)
100
-
101
- # Export results manually
102
- json.dump(results, open("results.json", "w"))
103
98
  ```
104
99
 
105
100
  **After promptum:**
106
101
  ```python
107
102
  report = await benchmark.run_async()
108
- HTMLSerializer().serialize(report) # Beautiful HTML report
103
+ summary = report.get_summary() # Metrics captured automatically
109
104
  ```
110
105
 
111
106
  ---
@@ -151,14 +146,13 @@ python your_script.py
151
146
 
152
147
  ## What You Get
153
148
 
154
- **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
155
- **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
156
- **Automatic Retries** - Exponential/linear backoff with configurable attempts
157
- **Metrics Tracking** - Latency, tokens, cost - automatically captured
158
- **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
159
- **Async by Default** - Run 100 tests in parallel without breaking a sweat
160
- **Type Safe** - Full type hints, catches errors before runtime
161
- ✅ **Zero Config** - No YAML files, no setup scripts, just Python
149
+ - [x] **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
150
+ - [x] **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
151
+ - [x] **Automatic Retries** - Exponential/linear backoff with configurable attempts
152
+ - [x] **Metrics Tracking** - Latency, tokens, cost - automatically captured
153
+ - [x] **Async by Default** - Run 100 tests in parallel without breaking a sweat
154
+ - [x] **Type Safe** - Full type hints, catches errors before runtime
155
+ - [x] **Zero Config** - No YAML files, no setup scripts, just Python
162
156
 
163
157
  ---
164
158
 
@@ -193,14 +187,11 @@ tests = [
193
187
  benchmark.add_tests(tests)
194
188
  report = await benchmark.run_async()
195
189
 
196
- # Export as HTML
197
- from promptum import HTMLSerializer
198
- html = HTMLSerializer().serialize(report)
199
- open("comparison.html", "w").write(html)
190
+ # Side-by-side model comparison
191
+ for model, summary in report.compare_models().items():
192
+ print(f"{model}: {summary['pass_rate']:.0%} pass rate, {summary['avg_latency_ms']:.0f}ms avg")
200
193
  ```
201
194
 
202
- Open `comparison.html` in your browser - see side-by-side model performance with charts.
203
-
204
195
  ---
205
196
 
206
197
  ## Use Cases
@@ -252,7 +243,7 @@ Found a bug? Want a feature? PRs welcome!
252
243
 
253
244
  ```bash
254
245
  # Development setup
255
- git clone https://github.com/yourusername/promptum.git
246
+ git clone https://github.com/deyna256/promptum.git
256
247
  cd promptum
257
248
  just sync # Install dependencies
258
249
  just test # Run tests
@@ -273,7 +264,7 @@ MIT - do whatever you want with it.
273
264
 
274
265
  <div align="center">
275
266
 
276
- **[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
267
+ **[⭐ Star on GitHub](https://github.com/deyna256/promptum)** | **[🐛 Report Bug](https://github.com/deyna256/promptum/issues)** | **[💡 Request Feature](https://github.com/deyna256/promptum/issues)**
277
268
 
278
269
  Made for developers who value their time.
279
270
 
@@ -55,15 +55,12 @@ for attempt in range(max_retries):
55
55
  break
56
56
  except Exception:
57
57
  sleep(2 ** attempt)
58
-
59
- # Export results manually
60
- json.dump(results, open("results.json", "w"))
61
58
  ```
62
59
 
63
60
  **After promptum:**
64
61
  ```python
65
62
  report = await benchmark.run_async()
66
- HTMLSerializer().serialize(report) # Beautiful HTML report
63
+ summary = report.get_summary() # Metrics captured automatically
67
64
  ```
68
65
 
69
66
  ---
@@ -109,14 +106,13 @@ python your_script.py
109
106
 
110
107
  ## What You Get
111
108
 
112
- **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
113
- **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
114
- **Automatic Retries** - Exponential/linear backoff with configurable attempts
115
- **Metrics Tracking** - Latency, tokens, cost - automatically captured
116
- **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
117
- **Async by Default** - Run 100 tests in parallel without breaking a sweat
118
- **Type Safe** - Full type hints, catches errors before runtime
119
- ✅ **Zero Config** - No YAML files, no setup scripts, just Python
109
+ - [x] **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
110
+ - [x] **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
111
+ - [x] **Automatic Retries** - Exponential/linear backoff with configurable attempts
112
+ - [x] **Metrics Tracking** - Latency, tokens, cost - automatically captured
113
+ - [x] **Async by Default** - Run 100 tests in parallel without breaking a sweat
114
+ - [x] **Type Safe** - Full type hints, catches errors before runtime
115
+ - [x] **Zero Config** - No YAML files, no setup scripts, just Python
120
116
 
121
117
  ---
122
118
 
@@ -151,14 +147,11 @@ tests = [
151
147
  benchmark.add_tests(tests)
152
148
  report = await benchmark.run_async()
153
149
 
154
- # Export as HTML
155
- from promptum import HTMLSerializer
156
- html = HTMLSerializer().serialize(report)
157
- open("comparison.html", "w").write(html)
150
+ # Side-by-side model comparison
151
+ for model, summary in report.compare_models().items():
152
+ print(f"{model}: {summary['pass_rate']:.0%} pass rate, {summary['avg_latency_ms']:.0f}ms avg")
158
153
  ```
159
154
 
160
- Open `comparison.html` in your browser - see side-by-side model performance with charts.
161
-
162
155
  ---
163
156
 
164
157
  ## Use Cases
@@ -210,7 +203,7 @@ Found a bug? Want a feature? PRs welcome!
210
203
 
211
204
  ```bash
212
205
  # Development setup
213
- git clone https://github.com/yourusername/promptum.git
206
+ git clone https://github.com/deyna256/promptum.git
214
207
  cd promptum
215
208
  just sync # Install dependencies
216
209
  just test # Run tests
@@ -231,7 +224,7 @@ MIT - do whatever you want with it.
231
224
 
232
225
  <div align="center">
233
226
 
234
- **[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
227
+ **[⭐ Star on GitHub](https://github.com/deyna256/promptum)** | **[🐛 Report Bug](https://github.com/deyna256/promptum/issues)** | **[💡 Request Feature](https://github.com/deyna256/promptum/issues)**
235
228
 
236
229
  Made for developers who value their time.
237
230
 
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "promptum"
3
- version = "0.0.1"
3
+ version = "0.0.2"
4
4
  description = "Async LLM benchmarking library with protocol-based extensibility"
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.13"
@@ -18,8 +18,6 @@ classifiers = [
18
18
  ]
19
19
  dependencies = [
20
20
  "httpx>=0.27.0",
21
- "pyyaml>=6.0",
22
- "jinja2>=3.1.0",
23
21
  ]
24
22
 
25
23
  [project.urls]
@@ -0,0 +1,29 @@
1
+ from promptum.benchmark import Benchmark, Report, Runner, TestCase, TestResult
2
+ from promptum.providers import LLMProvider, Metrics, OpenRouterClient, RetryConfig, RetryStrategy
3
+ from promptum.validation import (
4
+ Contains,
5
+ ExactMatch,
6
+ JsonSchema,
7
+ Regex,
8
+ Validator,
9
+ )
10
+
11
+ __version__ = "0.0.1"
12
+
13
+ __all__ = [
14
+ "TestCase",
15
+ "TestResult",
16
+ "Metrics",
17
+ "RetryConfig",
18
+ "RetryStrategy",
19
+ "Validator",
20
+ "ExactMatch",
21
+ "Contains",
22
+ "Regex",
23
+ "JsonSchema",
24
+ "LLMProvider",
25
+ "OpenRouterClient",
26
+ "Runner",
27
+ "Benchmark",
28
+ "Report",
29
+ ]
@@ -0,0 +1,7 @@
1
+ from promptum.benchmark.benchmark import Benchmark
2
+ from promptum.benchmark.report import Report
3
+ from promptum.benchmark.result import TestResult
4
+ from promptum.benchmark.runner import Runner
5
+ from promptum.benchmark.test_case import TestCase
6
+
7
+ __all__ = ["Benchmark", "Report", "Runner", "TestCase", "TestResult"]
@@ -1,11 +1,10 @@
1
1
  import asyncio
2
2
  from collections.abc import Callable, Sequence
3
- from typing import Any
4
3
 
5
4
  from promptum.benchmark.report import Report
6
- from promptum.core.result import TestResult
7
- from promptum.core.test_case import TestCase
8
- from promptum.execution.runner import Runner
5
+ from promptum.benchmark.result import TestResult
6
+ from promptum.benchmark.runner import Runner
7
+ from promptum.benchmark.test_case import TestCase
9
8
  from promptum.providers.protocol import LLMProvider
10
9
 
11
10
 
@@ -29,12 +28,12 @@ class Benchmark:
29
28
  def add_tests(self, test_cases: Sequence[TestCase]) -> None:
30
29
  self._test_cases.extend(test_cases)
31
30
 
32
- def run(self, metadata: dict[str, Any] | None = None) -> Report:
33
- return asyncio.run(self.run_async(metadata))
31
+ def run(self) -> Report:
32
+ return asyncio.run(self.run_async())
34
33
 
35
- async def run_async(self, metadata: dict[str, Any] | None = None) -> Report:
34
+ async def run_async(self) -> Report:
36
35
  if not self._test_cases:
37
- return Report(results=[], metadata=metadata or {})
36
+ return Report(results=[])
38
37
 
39
38
  runner = Runner(
40
39
  provider=self.provider,
@@ -44,7 +43,4 @@ class Benchmark:
44
43
 
45
44
  results = await runner.run(self._test_cases)
46
45
 
47
- return Report(
48
- results=results,
49
- metadata=metadata or {},
50
- )
46
+ return Report(results=results)
@@ -2,13 +2,12 @@ from collections.abc import Callable, Sequence
2
2
  from dataclasses import dataclass
3
3
  from typing import Any
4
4
 
5
- from promptum.core.result import TestResult
5
+ from promptum.benchmark.result import TestResult
6
6
 
7
7
 
8
8
  @dataclass(frozen=True, slots=True)
9
9
  class Report:
10
10
  results: Sequence[TestResult]
11
- metadata: dict[str, Any]
12
11
 
13
12
  def get_summary(self) -> dict[str, Any]:
14
13
  total = len(self.results)
@@ -49,7 +48,7 @@ class Report:
49
48
  if passed is not None:
50
49
  filtered = [r for r in filtered if r.passed == passed]
51
50
 
52
- return Report(results=filtered, metadata=self.metadata)
51
+ return Report(results=filtered)
53
52
 
54
53
  def group_by(self, key: Callable[[TestResult], str]) -> dict[str, "Report"]:
55
54
  groups: dict[str, list[TestResult]] = {}
@@ -60,7 +59,7 @@ class Report:
60
59
  groups[group_key] = []
61
60
  groups[group_key].append(result)
62
61
 
63
- return {k: Report(results=v, metadata=self.metadata) for k, v in groups.items()}
62
+ return {k: Report(results=v) for k, v in groups.items()}
64
63
 
65
64
  def compare_models(self) -> dict[str, dict[str, Any]]:
66
65
  by_model = self.group_by(lambda r: r.test_case.model)
@@ -71,5 +70,5 @@ class Report:
71
70
  if not values:
72
71
  return 0
73
72
  sorted_values = sorted(values)
74
- index = int(len(sorted_values) * p)
75
- return sorted_values[min(index, len(sorted_values) - 1)]
73
+ index = int((len(sorted_values) - 1) * p)
74
+ return sorted_values[index]
@@ -2,8 +2,8 @@ from dataclasses import dataclass, field
2
2
  from datetime import datetime
3
3
  from typing import Any
4
4
 
5
- from promptum.core.metrics import Metrics
6
- from promptum.core.test_case import TestCase
5
+ from promptum.benchmark.test_case import TestCase
6
+ from promptum.providers.metrics import Metrics
7
7
 
8
8
 
9
9
  @dataclass(frozen=True, slots=True)
@@ -3,8 +3,8 @@ from collections.abc import Callable, Sequence
3
3
 
4
4
  import httpx
5
5
 
6
- from promptum.core.result import TestResult
7
- from promptum.core.test_case import TestCase
6
+ from promptum.benchmark.result import TestResult
7
+ from promptum.benchmark.test_case import TestCase
8
8
  from promptum.providers.protocol import LLMProvider
9
9
 
10
10
 
@@ -37,7 +37,6 @@ class Runner:
37
37
 
38
38
  results = await asyncio.gather(
39
39
  *[run_with_semaphore(tc) for tc in test_cases],
40
- return_exceptions=False,
41
40
  )
42
41
 
43
42
  return list(results)
@@ -5,7 +5,7 @@ from typing import TYPE_CHECKING, Any
5
5
  if TYPE_CHECKING:
6
6
  from promptum.validation.protocol import Validator
7
7
 
8
- from promptum.core.retry import RetryConfig
8
+ from promptum.providers.retry import RetryConfig
9
9
 
10
10
 
11
11
  @dataclass(frozen=True, slots=True)
@@ -1,7 +1,12 @@
1
+ from promptum.providers.metrics import Metrics
1
2
  from promptum.providers.openrouter import OpenRouterClient
2
3
  from promptum.providers.protocol import LLMProvider
4
+ from promptum.providers.retry import RetryConfig, RetryStrategy
3
5
 
4
6
  __all__ = [
5
7
  "LLMProvider",
8
+ "Metrics",
6
9
  "OpenRouterClient",
10
+ "RetryConfig",
11
+ "RetryStrategy",
7
12
  ]
@@ -4,8 +4,8 @@ from typing import Any
4
4
 
5
5
  import httpx
6
6
 
7
- from promptum.core.metrics import Metrics
8
- from promptum.core.retry import RetryConfig, RetryStrategy
7
+ from promptum.providers.metrics import Metrics
8
+ from promptum.providers.retry import RetryConfig, RetryStrategy
9
9
 
10
10
 
11
11
  class OpenRouterClient:
@@ -61,7 +61,7 @@ class OpenRouterClient:
61
61
  "messages": messages,
62
62
  "temperature": temperature,
63
63
  }
64
- if max_tokens:
64
+ if max_tokens is not None:
65
65
  payload["max_tokens"] = max_tokens
66
66
  payload.update(kwargs)
67
67
 
@@ -1,6 +1,6 @@
1
1
  from typing import Any, Protocol
2
2
 
3
- from promptum.core.metrics import Metrics
3
+ from promptum.providers.metrics import Metrics
4
4
 
5
5
 
6
6
  class LLMProvider(Protocol):
@@ -88,21 +88,3 @@ class JsonSchema:
88
88
  keys = ", ".join(self.required_keys)
89
89
  return f"Valid JSON with keys: {keys}"
90
90
  return "Valid JSON object"
91
-
92
-
93
- @dataclass(frozen=True, slots=True)
94
- class PlaceholderValidator:
95
- """
96
- Placeholder validator for deserialized reports.
97
-
98
- Used when original validator cannot be reconstructed from storage.
99
- Always returns True. Original validator logic is not preserved.
100
- """
101
-
102
- description: str
103
-
104
- def validate(self, response: str) -> tuple[bool, dict[str, Any]]:
105
- return True, {"placeholder": True, "note": "Original validator could not be reconstructed"}
106
-
107
- def describe(self) -> str:
108
- return self.description
@@ -2,8 +2,8 @@ from datetime import datetime
2
2
 
3
3
  import pytest
4
4
 
5
- from promptum.benchmark import Report
6
- from promptum.core import Metrics, TestCase, TestResult
5
+ from promptum.benchmark import Report, TestCase, TestResult
6
+ from promptum.providers import Metrics
7
7
  from promptum.validation import Contains
8
8
 
9
9
 
@@ -57,4 +57,4 @@ def sample_results() -> list[TestResult]:
57
57
 
58
58
  @pytest.fixture
59
59
  def sample_report(sample_results: list[TestResult]) -> Report:
60
- return Report(results=sample_results, metadata={"version": "1.0"})
60
+ return Report(results=sample_results)
@@ -13,7 +13,7 @@ def test_report_summary(sample_report: Report) -> None:
13
13
 
14
14
 
15
15
  def test_report_summary_empty() -> None:
16
- report = Report(results=[], metadata={})
16
+ report = Report(results=[])
17
17
  summary = report.get_summary()
18
18
 
19
19
  assert summary["total"] == 0
@@ -1,4 +1,4 @@
1
- from promptum.core import TestCase
1
+ from promptum.benchmark import TestCase
2
2
  from promptum.validation import Contains
3
3
 
4
4
 
@@ -0,0 +1 @@
1
+
@@ -0,0 +1 @@
1
+
@@ -1,6 +1,6 @@
1
1
  import pytest
2
2
 
3
- from promptum.core import Metrics, RetryConfig
3
+ from promptum.providers import Metrics, RetryConfig
4
4
 
5
5
 
6
6
  @pytest.fixture
@@ -1,4 +1,4 @@
1
- from promptum.core import Metrics
1
+ from promptum.providers import Metrics
2
2
 
3
3
 
4
4
  def test_metrics_creation(basic_metrics: Metrics) -> None:
@@ -1,4 +1,4 @@
1
- from promptum.core import RetryConfig, RetryStrategy
1
+ from promptum.providers import RetryConfig, RetryStrategy
2
2
 
3
3
 
4
4
  def test_retry_config_defaults(default_retry_config: RetryConfig) -> None: