promptum 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,280 @@
1
+ Metadata-Version: 2.4
2
+ Name: promptum
3
+ Version: 0.0.1
4
+ Summary: Async LLM benchmarking library with protocol-based extensibility
5
+ Project-URL: Homepage, https://github.com/deyna256/promptum
6
+ Project-URL: Repository, https://github.com/deyna256/promptum
7
+ Project-URL: Issues, https://github.com/deyna256/promptum/issues
8
+ Author-email: deyna256 <literallybugcreator@gmail.com>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Ivan Deyna
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Keywords: anthropic,async,benchmarking,llm,openai
32
+ Classifier: Development Status :: 3 - Alpha
33
+ Classifier: Intended Audience :: Developers
34
+ Classifier: Programming Language :: Python :: 3
35
+ Classifier: Programming Language :: Python :: 3.13
36
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
37
+ Requires-Python: >=3.13
38
+ Requires-Dist: httpx>=0.27.0
39
+ Requires-Dist: jinja2>=3.1.0
40
+ Requires-Dist: pyyaml>=6.0
41
+ Description-Content-Type: text/markdown
42
+
43
+ # promptum
44
+
45
+ <div align="center">
46
+
47
+ ![Python 3.13+](https://img.shields.io/badge/Python-3.13+-blue?style=for-the-badge&logo=python)
48
+ ![Async](https://img.shields.io/badge/Async-First-green?style=for-the-badge)
49
+ ![License: MIT](https://img.shields.io/badge/License-MIT-yellow?style=for-the-badge)
50
+
51
+ **Benchmark LLMs Like a Pro. In 5 Lines of Code.**
52
+
53
+ Stop writing boilerplate to test LLMs. Start getting results.
54
+
55
+ </div>
56
+
57
+ ---
58
+
59
+ ## What's This?
60
+
61
+ A dead-simple Python library for benchmarking LLM providers. Write tests once, run them across any model, get beautiful reports.
62
+
63
+ ```python
64
+ benchmark = Benchmark(provider=client, name="my_test")
65
+ benchmark.add_test(TestCase(
66
+ prompt="What is 2+2?",
67
+ model="gpt-3.5-turbo",
68
+ validator=Contains("4")
69
+ ))
70
+ report = await benchmark.run_async()
71
+ ```
72
+
73
+ That's it. No setup. No config files. Just results.
74
+
75
+ ---
76
+
77
+ ## Why You Need This
78
+
79
+ **Before promptum:**
80
+ ```python
81
+ # Custom API client for each provider
82
+ openai_client = OpenAI(api_key=...)
83
+ anthropic_client = Anthropic(api_key=...)
84
+
85
+ # Manual validation logic
86
+ if "correct answer" not in response:
87
+ failed_tests.append(...)
88
+
89
+ # Track metrics yourself
90
+ latency = end_time - start_time
91
+ tokens = response.usage.total_tokens
92
+
93
+ # Write your own retry logic
94
+ for attempt in range(max_retries):
95
+ try:
96
+ response = client.chat.completions.create(...)
97
+ break
98
+ except Exception:
99
+ sleep(2 ** attempt)
100
+
101
+ # Export results manually
102
+ json.dump(results, open("results.json", "w"))
103
+ ```
104
+
105
+ **After promptum:**
106
+ ```python
107
+ report = await benchmark.run_async()
108
+ HTMLSerializer().serialize(report) # Beautiful HTML report
109
+ ```
110
+
111
+ ---
112
+
113
+ ## Quick Start
114
+
115
+ ```bash
116
+ pip install promptum # (or: uv pip install promptum)
117
+ export OPENROUTER_API_KEY="your-key"
118
+ ```
119
+
120
+ ```python
121
+ import asyncio
122
+ from promptum import Benchmark, TestCase, OpenRouterClient, Contains
123
+
124
+ async def main():
125
+ async with OpenRouterClient(api_key="your-key") as client:
126
+ benchmark = Benchmark(provider=client, name="quick_test")
127
+
128
+ benchmark.add_test(TestCase(
129
+ name="basic_math",
130
+ prompt="What is 15 * 7? Reply with just the number.",
131
+ model="openai/gpt-3.5-turbo",
132
+ validator=Contains("105")
133
+ ))
134
+
135
+ report = await benchmark.run_async()
136
+ summary = report.get_summary()
137
+
138
+ print(f"✓ {summary['passed']}/{summary['total']} tests passed")
139
+ print(f"⚡ {summary['avg_latency_ms']:.0f}ms average")
140
+ print(f"💰 ${summary['total_cost_usd']:.6f} total cost")
141
+
142
+ asyncio.run(main())
143
+ ```
144
+
145
+ Run it:
146
+ ```bash
147
+ python your_script.py
148
+ ```
149
+
150
+ ---
151
+
152
+ ## What You Get
153
+
154
+ ✅ **One API for 100+ Models** - OpenRouter support out of the box (OpenAI, Anthropic, Google, etc.)
155
+ ✅ **Smart Validation** - ExactMatch, Contains, Regex, JsonSchema, or write your own
156
+ ✅ **Automatic Retries** - Exponential/linear backoff with configurable attempts
157
+ ✅ **Metrics Tracking** - Latency, tokens, cost - automatically captured
158
+ ✅ **Beautiful Reports** - JSON, YAML, or interactive HTML with charts
159
+ ✅ **Async by Default** - Run 100 tests in parallel without breaking a sweat
160
+ ✅ **Type Safe** - Full type hints, catches errors before runtime
161
+ ✅ **Zero Config** - No YAML files, no setup scripts, just Python
162
+
163
+ ---
164
+
165
+ ## Real Example
166
+
167
+ Compare GPT-4 vs Claude on your tasks:
168
+
169
+ ```python
170
+ from promptum import Benchmark, TestCase, ExactMatch, Contains, Regex
171
+
172
+ tests = [
173
+ TestCase(
174
+ name="json_output",
175
+ prompt='Output JSON: {"status": "ok"}',
176
+ model="openai/gpt-4",
177
+ validator=Regex(r'\{"status":\s*"ok"\}')
178
+ ),
179
+ TestCase(
180
+ name="json_output",
181
+ prompt='Output JSON: {"status": "ok"}',
182
+ model="anthropic/claude-3-5-sonnet",
183
+ validator=Regex(r'\{"status":\s*"ok"\}')
184
+ ),
185
+ TestCase(
186
+ name="creative_writing",
187
+ prompt="Write a haiku about Python",
188
+ model="openai/gpt-4",
189
+ validator=Contains("Python", case_sensitive=False)
190
+ ),
191
+ ]
192
+
193
+ benchmark.add_tests(tests)
194
+ report = await benchmark.run_async()
195
+
196
+ # Export as HTML
197
+ from promptum import HTMLSerializer
198
+ html = HTMLSerializer().serialize(report)
199
+ open("comparison.html", "w").write(html)
200
+ ```
201
+
202
+ Open `comparison.html` in your browser - see side-by-side model performance with charts.
203
+
204
+ ---
205
+
206
+ ## Use Cases
207
+
208
+ **🔬 Model Evaluation** - Compare GPT-4, Claude, Gemini on your specific tasks
209
+ **🎯 Prompt Engineering** - Test 100 prompt variations, find what works
210
+ **⚡ Latency Testing** - Measure real-world response times across providers
211
+ **💰 Cost Analysis** - Track spending per model/task before production
212
+ **🔄 Regression Testing** - Ensure model updates don't break your prompts
213
+ **📊 A/B Testing** - Data-driven model selection for your product
214
+
215
+ ---
216
+
217
+ ## Requirements
218
+
219
+ - Python 3.13+
220
+ - An OpenRouter API key (or implement your own provider)
221
+
222
+ That's it. No Docker, no complex setup.
223
+
224
+ ---
225
+
226
+ ## Why Protocol-Based?
227
+
228
+ Most libraries force inheritance:
229
+ ```python
230
+ class MyProvider(BaseProvider): # Tightly coupled
231
+ def generate(self): ...
232
+ ```
233
+
234
+ We use protocols (structural typing):
235
+ ```python
236
+ class MyProvider: # No inheritance needed
237
+ async def generate(self) -> tuple[str, Metrics]:
238
+ # Your implementation
239
+ return response, metrics
240
+
241
+ # It just works
242
+ benchmark = Benchmark(provider=MyProvider())
243
+ ```
244
+
245
+ Cleaner. More flexible. More Pythonic.
246
+
247
+ ---
248
+
249
+ ## Contributing
250
+
251
+ Found a bug? Want a feature? PRs welcome!
252
+
253
+ ```bash
254
+ # Development setup
255
+ git clone https://github.com/yourusername/promptum.git
256
+ cd promptum
257
+ just sync # Install dependencies
258
+ just test # Run tests
259
+
260
+ # Development commands
261
+ just lint # Check code style
262
+ just format # Format code
263
+ just typecheck # Type checking
264
+ ```
265
+
266
+ ---
267
+
268
+ ## License
269
+
270
+ MIT - do whatever you want with it.
271
+
272
+ ---
273
+
274
+ <div align="center">
275
+
276
+ **[⭐ Star on GitHub](https://github.com/yourusername/promptum)** | **[🐛 Report Bug](https://github.com/yourusername/promptum/issues)** | **[💡 Request Feature](https://github.com/yourusername/promptum/issues)**
277
+
278
+ Made for developers who value their time.
279
+
280
+ </div>
@@ -0,0 +1,32 @@
1
+ promptum/__init__.py,sha256=AjeGgmIbpp9Uv-0ybq6knejEJMK-Dnn_-fV9Z86Bp74,932
2
+ promptum/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ promptum/benchmark/__init__.py,sha256=NJYiXm6wVFKMloxKNAXMY4H3bMQORTtLh6__nYWYWa0,131
4
+ promptum/benchmark/benchmark.py,sha256=3enQSACdLwHW78fqSZj0Un3r7_Ua3V-MjfbEIIKFSWs,1589
5
+ promptum/benchmark/report.py,sha256=ol_UO8rw43zbQxhs2o4AwYN5TP7O_Apa77V-pZKq6Uw,2754
6
+ promptum/core/__init__.py,sha256=mqajsOdUBNJfcR2krxpwa7rM_wd88vJaAov-9SnVm68,294
7
+ promptum/core/metrics.py,sha256=FnS10nHFjQ5Clj5X21C_nW6zAUJU_ZHt0s2fLgp6L28,427
8
+ promptum/core/result.py,sha256=nyuVMQFY6DmZwzpgqDPsj0FaAuairpKLJ-0be5WQtTg,472
9
+ promptum/core/retry.py,sha256=mA_RRz9_9J_mge_AUd9f1A-gACOxZLGTI8vTIstAr8s,538
10
+ promptum/core/test_case.py,sha256=YNlVNj7FkoCyBFb2N0Dzrhce6o3DzUtke4PR6WoXhZo,593
11
+ promptum/execution/__init__.py,sha256=fUZa7Bo7yn921sl49cS6TCGsG-lOUNVdhdeRsIa5vCc,67
12
+ promptum/execution/runner.py,sha256=sP3uDu2VDLxFi9BkltMHwsyMuCXnz4oP1kVN28KpVZ0,2434
13
+ promptum/providers/__init__.py,sha256=OW-CK198wOV7_bz_keOaxxQeRlFPZgINQcVJUZq_uus,169
14
+ promptum/providers/openrouter.py,sha256=owquGxHaTB-pZ8jr06l4HouETuFj1lEg92oGX2mM5uo,4601
15
+ promptum/providers/protocol.py,sha256=vdTGAGKN3FzThHLwyMMWicU87_LpW-gn0cM3vMcWiEY,488
16
+ promptum/serialization/__init__.py,sha256=0dlpgF3dngaw_oR4mg7nuc4Z_VFVl2bATmhe2mHA9T4,319
17
+ promptum/serialization/base.py,sha256=JnB4zb7D4oy44k6ndbJu3Xw1PVLpY_9-Y7k3Et2p43g,1851
18
+ promptum/serialization/html.py,sha256=kJEd2s6fVfFHH7snJWrD5RGaUW66x3vtMKGMJ_ekmcI,1901
19
+ promptum/serialization/json.py,sha256=koqgr5_WHmrpWUOCq6rWXoC07um3mkDDaob2k9vkEK8,870
20
+ promptum/serialization/protocol.py,sha256=MZeMYt_HZJIYSyrRd_ZYbEJXDiXLMuJ5tosAeHLxpTM,353
21
+ promptum/serialization/report_template.html,sha256=RC8qSLzolqWkWBIGfyhPtPkRWM7_0JkauEWPkaKiB9A,10802
22
+ promptum/serialization/yaml.py,sha256=50A612OkX2L3EjhxTZJMZQb5zL8-2PmwcBjjNUhCWsA,528
23
+ promptum/storage/__init__.py,sha256=QWOP5Al43WmmQ_kFCM9JGi8amXJzO_pR-x5AKDNy4ds,153
24
+ promptum/storage/file.py,sha256=gnNBpNBQ_NeAWn7P2itsw2L99AxS7zOd8Nef6PyYxlk,5750
25
+ promptum/storage/protocol.py,sha256=_NpkJzOQB_98Ud_TA_ZYubHf3o2DDXGMveRN3kRyYKI,517
26
+ promptum/validation/__init__.py,sha256=mhykyxaIwn2PJh2RXAi0fi2NRIveFmlC5bg1nyCbfVU,252
27
+ promptum/validation/protocol.py,sha256=xqxm23YX6eNeZHKMLMZ-Wz8iQKn4ZRzAI5Xryxg0uq4,418
28
+ promptum/validation/validators.py,sha256=3lJwSMhhWb9x8BK_-S0FJBj7PFgno79II_i3Z1mCKTs,3217
29
+ promptum-0.0.1.dist-info/METADATA,sha256=vt_PN0Ns0JuJalM7p8hJZsz-Y2hwQrbHZ4Jacy7P6L8,8083
30
+ promptum-0.0.1.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
31
+ promptum-0.0.1.dist-info/licenses/LICENSE,sha256=Fgn285H5Vy9diOlqO1TzS3hD97WcdF6-GFHvUcFNtmg,1067
32
+ promptum-0.0.1.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.28.0
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Ivan Deyna
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.