hellmholtz 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. hellmholtz-0.3.0/LICENSE +21 -0
  2. hellmholtz-0.3.0/PKG-INFO +505 -0
  3. hellmholtz-0.3.0/README.md +462 -0
  4. hellmholtz-0.3.0/pyproject.toml +178 -0
  5. hellmholtz-0.3.0/src/hellmholtz/__init__.py +1 -0
  6. hellmholtz-0.3.0/src/hellmholtz/benchmark/__init__.py +22 -0
  7. hellmholtz-0.3.0/src/hellmholtz/benchmark/evaluator.py +94 -0
  8. hellmholtz-0.3.0/src/hellmholtz/benchmark/prompts.json +49 -0
  9. hellmholtz-0.3.0/src/hellmholtz/benchmark/prompts.py +203 -0
  10. hellmholtz-0.3.0/src/hellmholtz/benchmark/prompts.txt +6 -0
  11. hellmholtz-0.3.0/src/hellmholtz/benchmark/runner.py +438 -0
  12. hellmholtz-0.3.0/src/hellmholtz/cli/__init__.py +47 -0
  13. hellmholtz-0.3.0/src/hellmholtz/cli/benchmark.py +274 -0
  14. hellmholtz-0.3.0/src/hellmholtz/cli/chat.py +37 -0
  15. hellmholtz-0.3.0/src/hellmholtz/cli/common.py +277 -0
  16. hellmholtz-0.3.0/src/hellmholtz/cli/integrations.py +79 -0
  17. hellmholtz-0.3.0/src/hellmholtz/cli/models.py +116 -0
  18. hellmholtz-0.3.0/src/hellmholtz/client.py +148 -0
  19. hellmholtz-0.3.0/src/hellmholtz/core/__init__.py +5 -0
  20. hellmholtz-0.3.0/src/hellmholtz/core/config.py +47 -0
  21. hellmholtz-0.3.0/src/hellmholtz/core/prompts.py +224 -0
  22. hellmholtz-0.3.0/src/hellmholtz/evaluation_analysis.py +925 -0
  23. hellmholtz-0.3.0/src/hellmholtz/export.py +81 -0
  24. hellmholtz-0.3.0/src/hellmholtz/integrations/__init__.py +1 -0
  25. hellmholtz-0.3.0/src/hellmholtz/integrations/litellm.py +30 -0
  26. hellmholtz-0.3.0/src/hellmholtz/integrations/lm_eval.py +67 -0
  27. hellmholtz-0.3.0/src/hellmholtz/monitoring.py +556 -0
  28. hellmholtz-0.3.0/src/hellmholtz/providers/__init__.py +1 -0
  29. hellmholtz-0.3.0/src/hellmholtz/providers/blablador.py +121 -0
  30. hellmholtz-0.3.0/src/hellmholtz/providers/blablador_config.py +702 -0
  31. hellmholtz-0.3.0/src/hellmholtz/providers/blablador_provider.py +166 -0
  32. hellmholtz-0.3.0/src/hellmholtz/reporting/__init__.py +33 -0
  33. hellmholtz-0.3.0/src/hellmholtz/reporting/chart.py +304 -0
  34. hellmholtz-0.3.0/src/hellmholtz/reporting/html.py +304 -0
  35. hellmholtz-0.3.0/src/hellmholtz/reporting/markdown.py +39 -0
  36. hellmholtz-0.3.0/src/hellmholtz/reporting/stats.py +267 -0
  37. hellmholtz-0.3.0/src/hellmholtz/reporting/templates/detailed.html +302 -0
  38. hellmholtz-0.3.0/src/hellmholtz/reporting/templates/simple.html +186 -0
  39. hellmholtz-0.3.0/src/hellmholtz/reporting/utils.py +59 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Jonas Heinicke
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,505 @@
1
+ Metadata-Version: 2.4
2
+ Name: hellmholtz
3
+ Version: 0.3.0
4
+ Summary: A comprehensive Python package for unified LLM access, benchmarking, evaluation, and reporting
5
+ License: MIT
6
+ License-File: LICENSE
7
+ Keywords: llm,benchmark,aisuite,openai,anthropic,google,ollama
8
+ Author: jhe24
9
+ Author-email: jhe24@example.com
10
+ Requires-Python: >=3.10
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Environment :: Console
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
+ Provides-Extra: eval
23
+ Provides-Extra: proxy
24
+ Provides-Extra: reporting
25
+ Requires-Dist: aisuite[all] (>=0.1.6,<0.2.0)
26
+ Requires-Dist: jinja2 (>=3.1.0,<4.0.0)
27
+ Requires-Dist: litellm ; extra == "proxy"
28
+ Requires-Dist: lm-eval ; extra == "eval"
29
+ Requires-Dist: matplotlib ; extra == "reporting"
30
+ Requires-Dist: pydantic (>=2.10.3,<3.0.0)
31
+ Requires-Dist: python-dotenv (>=1.0.1,<2.0.0)
32
+ Requires-Dist: requests (>=2.32.0,<3.0.0)
33
+ Requires-Dist: scipy ; extra == "reporting"
34
+ Requires-Dist: seaborn ; extra == "reporting"
35
+ Requires-Dist: typer (>=0.15.1,<0.16.0)
36
+ Project-URL: Changelog, https://github.com/JonasHeinickeBio/HeLLMholtz/releases
37
+ Project-URL: Documentation, https://github.com/JonasHeinickeBio/HeLLMholtz#readme
38
+ Project-URL: Homepage, https://github.com/JonasHeinickeBio/HeLLMholtz
39
+ Project-URL: Issues, https://github.com/JonasHeinickeBio/HeLLMholtz/issues
40
+ Project-URL: Repository, https://github.com/JonasHeinickeBio/HeLLMholtz.git
41
+ Description-Content-Type: text/markdown
42
+
43
+ # HeLLMholtz LLM Suite
44
+
45
+ [![Python Version](https://img.shields.io/badge/python-3.10+-blue.svg)](https://www.python.org/downloads/)
46
+ [![PyPI Version](https://img.shields.io/pypi/v/hellmholtz.svg)](https://pypi.org/project/hellmholtz/)
47
+ [![License](https://img.shields.io/badge/license-MIT-green.svg)](LICENSE)
48
+ [![Code Style](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
49
+ [![Tests](https://img.shields.io/badge/tests-passing-brightgreen.svg)](https://github.com/JonasHeinickeBio/HeLLMholtz/actions)
50
+
51
+ A comprehensive Python package for unified LLM access, benchmarking, evaluation, and reporting. Built on top of `aisuite` with specialized support for Helmholtz Blablador models.
52
+
53
+ ## Features
54
+
55
+ - **Unified Client**: Single interface for OpenAI, Google, Anthropic, Ollama, and Helmholtz Blablador models
56
+ - **Centralized Configuration**: Environment-based configuration for all your projects
57
+ - **Advanced Benchmarking**: Compare model performance across temperatures, replications, and prompt categories
58
+ - **LLM-as-a-Judge Evaluation**: Automated evaluation with comprehensive statistical analysis
59
+ - **Interactive Reports**: HTML reports with Chart.js visualizations and Markdown summaries
60
+ - **Flexible Prompt System**: Support for both simple text files and structured JSON prompt collections
61
+ - **Model Monitoring**: Track Blablador model availability and configuration consistency
62
+ - **LM Evaluation Harness**: Integration with EleutherAI's comprehensive evaluation suite
63
+ - **LiteLLM Proxy**: Built-in proxy server for model routing and load balancing
64
+ - **Throughput Testing**: Performance benchmarking for high-throughput scenarios
65
+ - **Model Discovery**: Dynamic model listing and availability checking (19+ BLABLADOR models currently available)
66
+
67
+ ## Installation
68
+
69
+ ### Basic Installation
70
+
71
+ ```bash
72
+ pip install hellmholtz
73
+ ```
74
+
75
+ ### Development Installation
76
+
77
+ For development with all optional dependencies:
78
+
79
+ ```bash
80
+ git clone https://github.com/JonasHeinickeBio/HeLLMholtz.git
81
+ cd HeLLMholtz
82
+ pip install -e ".[eval,proxy]"
83
+ ```
84
+
85
+ ### Poetry Installation
86
+
87
+ ```bash
88
+ poetry install --with eval,proxy
89
+ ```
90
+
91
+ ## Configuration
92
+
93
+ 1. Copy the example environment file:
94
+ ```bash
95
+ cp .env.example .env
96
+ ```
97
+
98
+ 2. Configure your API keys in `.env`:
99
+ ```bash
100
+ # OpenAI
101
+ OPENAI_API_KEY=your_openai_key
102
+
103
+ # Anthropic
104
+ ANTHROPIC_API_KEY=your_anthropic_key
105
+
106
+ # Google
107
+ GOOGLE_API_KEY=your_google_key
108
+
109
+ # Helmholtz Blablador
110
+ BLABLADOR_API_KEY=your_blablador_key
111
+ BLABLADOR_API_BASE=https://your-blablador-instance.com
112
+
113
+ # Optional: Default models
114
+ AISUITE_DEFAULT_MODELS='{"openai": "gpt-4o", "anthropic": "claude-3-haiku"}'
115
+ ```
116
+
117
+ ## Usage
118
+
119
+ ### Python API
120
+
121
+ #### Basic Chat Interface
122
+
123
+ ```python
124
+ from hellmholtz.client import chat
125
+
126
+ # Simple chat
127
+ response = chat("openai:gpt-4o", "Hello, how are you?")
128
+ print(response)
129
+
130
+ # With conversation history
131
+ messages = [
132
+ {"role": "system", "content": "You are a helpful assistant."},
133
+ {"role": "user", "content": "Explain quantum computing in simple terms."}
134
+ ]
135
+ response = chat("anthropic:claude-3-sonnet", messages)
136
+ ```
137
+
138
+ #### Benchmarking
139
+
140
+ ```python
141
+ from hellmholtz.benchmark import run_benchmarks
142
+ from hellmholtz.core.prompts import load_prompts
143
+
144
+ # Load prompts from JSON file
145
+ prompts = load_prompts("prompts.json", category="reasoning")
146
+
147
+ # Run benchmarks
148
+ results = run_benchmarks(
149
+ models=["openai:gpt-4o", "anthropic:claude-3-haiku", "blablador:gpt-4o"],
150
+ prompts=prompts,
151
+ temperatures=[0.1, 0.7, 1.0],
152
+ replications=3
153
+ )
154
+
155
+ # Analyze results
156
+ from hellmholtz.evaluation_analysis import EvaluationAnalyzer
157
+ analyzer = EvaluationAnalyzer()
158
+ analysis = analyzer.analyze_evaluation_results("results/benchmark_latest.json")
159
+ analyzer.print_analysis_summary(analysis)
160
+ ```
161
+
162
+ ### Command Line Interface
163
+
164
+ HeLLMholtz provides a comprehensive CLI for all operations:
165
+
166
+ #### Chat Interface
167
+
168
+ ```bash
169
+ # Simple chat
170
+ hellm chat --model openai:gpt-4o "Explain the theory of relativity"
171
+
172
+ # Interactive mode
173
+ hellm chat --model anthropic:claude-3-sonnet --interactive
174
+
175
+ # With system prompt
176
+ hellm chat --model blablador:gpt-4o --system "You are a coding assistant" "Write a Python function to calculate fibonacci numbers"
177
+ ```
178
+
179
+ #### Benchmarking
180
+
181
+ ```bash
182
+ # Basic benchmark
183
+ hellm bench --models openai:gpt-4o,anthropic:claude-3-haiku --prompts-file prompts.txt
184
+
185
+ # Advanced benchmark with evaluation
186
+ hellm bench \
187
+ --models openai:gpt-4o,blablador:gpt-4o \
188
+ --prompts-file prompts.json \
189
+ --prompts-category reasoning \
190
+ --temperatures 0.1,0.7,1.0 \
191
+ --replications 3 \
192
+ --evaluate-with openai:gpt-4o \
193
+ --results-dir results/
194
+
195
+ # Throughput testing
196
+ hellm bench-throughput \
197
+ --model openai:gpt-4o \
198
+ --requests 100 \
199
+ --concurrency 10 \
200
+ --prompt "Write a short story about AI"
201
+ ```
202
+
203
+ #### Evaluation and Analysis
204
+
205
+ ```bash
206
+ # Analyze benchmark results
207
+ hellm analyze results/benchmark_latest.json --html-report analysis_report.html
208
+
209
+ # Generate reports
210
+ hellm report --results-file results/benchmark_latest.json --output report.md
211
+ ```
212
+
213
+ #### Model Management
214
+
215
+ ```bash
216
+ # List available Blablador models
217
+ hellm models
218
+
219
+ # Monitor model availability and test accessibility
220
+ hellm monitor --test-accessibility
221
+
222
+ # Check model configuration consistency
223
+ hellm monitor --check-config
224
+ ```
225
+
226
+ #### Weekly Automated Benchmarking
227
+
228
+ The repository includes a GitHub Actions workflow that automatically runs benchmarks weekly and updates reports:
229
+
230
+ - **Scheduled**: Runs every Sunday at 00:00 UTC
231
+ - **Model Discovery**: Automatically fetches latest Blablador models
232
+ - **Performance Charts**: Generates visual charts comparing model performance
233
+ - **Multiple Formats**: Creates HTML, Markdown, and PNG chart reports
234
+ - **Auto-commit**: Updates reports in the repository for public viewing
235
+
236
+ To enable automated benchmarking:
237
+
238
+ 1. Set repository secrets for API keys:
239
+ - `BLABLADOR_API_KEY`: Your Blablador API key
240
+ - `BLABLADOR_API_BASE`: Blablador API base URL (optional)
241
+
242
+ 2. The workflow will automatically:
243
+ - Run benchmarks on selected models
244
+ - Generate performance reports
245
+ - Create visual charts
246
+ - Commit updated reports to the repository
247
+
248
+ Reports are available in the `reports/` directory and include:
249
+ - `weekly_benchmark_report.html`: Interactive HTML report
250
+ - `weekly_benchmark_report.md`: Markdown summary
251
+ - `weekly_benchmark_chart.png`: Performance visualization
252
+
253
+ #### Advanced Features
254
+
255
+ ```bash
256
+ # Run LM Evaluation Harness
257
+ hellm lm-eval \
258
+ --model openai:gpt-4o \
259
+ --tasks hellaswag,winogrande \
260
+ --limit 100
261
+
262
+ # Start LiteLLM proxy server
263
+ hellm proxy \
264
+ --config litellm_config.yaml \
265
+ --port 8000
266
+ ```
267
+
268
+ ## Project Structure
269
+
270
+ ```
271
+ hellmholtz/
272
+ ├── cli.py # Command-line interface
273
+ ├── client.py # Unified LLM client
274
+ ├── monitoring.py # Model availability monitoring
275
+ ├── evaluation_analysis.py # Statistical analysis and reporting
276
+ ├── export.py # Result export utilities
277
+ ├── core/
278
+ │ ├── config.py # Configuration management
279
+ │ └── prompts.py # Prompt loading and validation
280
+ ├── benchmark/
281
+ │ ├── runner.py # Benchmark execution
282
+ │ ├── evaluator.py # LLM-as-a-Judge evaluation
283
+ │ └── prompts.py # Benchmark-specific prompts
284
+ ├── providers/
285
+ │ ├── blablador_provider.py # Custom Blablador provider
286
+ │ ├── blablador_config.py # Blablador model configuration
287
+ │ ├── blablador.py # Blablador utilities
288
+ │ └── __init__.py
289
+ ├── reporting/
290
+ │ ├── html.py # HTML report generation
291
+ │ ├── markdown.py # Markdown report generation
292
+ │ ├── stats.py # Statistical calculations
293
+ │ ├── utils.py # Reporting utilities
294
+ │ └── templates/ # HTML templates
295
+ └── integrations/
296
+ ├── lm_eval.py # LM Evaluation Harness integration
297
+ └── litellm.py # LiteLLM proxy integration
298
+ ```
299
+
300
+ ## Prompt System
301
+
302
+ HeLLMholtz supports two prompt formats:
303
+
304
+ ### Simple Text Format (`prompts.txt`)
305
+
306
+ ```
307
+ What is the capital of France?
308
+ Explain quantum computing in simple terms.
309
+ Write a Python function to reverse a string.
310
+ ```
311
+
312
+ ### Structured JSON Format (`prompts.json`)
313
+
314
+ ```json
315
+ [
316
+ {
317
+ "id": "capital-france",
318
+ "category": "knowledge",
319
+ "description": "Test basic geographical knowledge",
320
+ "messages": [
321
+ {
322
+ "role": "user",
323
+ "content": "What is the capital of France?"
324
+ }
325
+ ],
326
+ "expected_output": "Paris"
327
+ },
328
+ {
329
+ "id": "quantum-explanation",
330
+ "category": "reasoning",
331
+ "description": "Test ability to explain complex concepts simply",
332
+ "messages": [
333
+ {
334
+ "role": "user",
335
+ "content": "Explain quantum computing in simple terms."
336
+ }
337
+ ]
338
+ }
339
+ ]
340
+ ```
341
+
342
+ ## Evaluation System
343
+
344
+ The LLM-as-a-Judge evaluation system provides:
345
+
346
+ - **Automated Scoring**: AI-powered evaluation of response quality
347
+ - **Statistical Analysis**: Comprehensive metrics and distributions
348
+ - **Model Rankings**: Performance comparisons across all dimensions
349
+ - **Interactive Reports**: Web-based visualizations of results
350
+ - **Detailed Critiques**: Specific feedback for each response
351
+
352
+ ### Example Analysis Output
353
+
354
+ ```
355
+ [Monitor] EVALUATION ANALYSIS RESULTS
356
+ ══════════════════════════════════════════════════════════════
357
+
358
+ OVERVIEW
359
+ • Total Evaluations: 150
360
+ • Models Tested: 3
361
+ • Prompts Tested: 5
362
+ • Success Rate: 94.7%
363
+
364
+ 🏆 MODEL RANKINGS
365
+ 1. openai:gpt-4o - Avg Score: 8.7/10 (±0.8)
366
+ 2. anthropic:claude-3-opus - Avg Score: 8.4/10 (±0.9)
367
+ 3. blablador:gpt-4o - Avg Score: 7.9/10 (±1.1)
368
+
369
+ DETAILED METRICS
370
+ • Response Quality: 8.3/10 average
371
+ • Relevance: 8.6/10 average
372
+ • Accuracy: 9.1/10 average
373
+ • Creativity: 7.8/10 average
374
+ ```
375
+
376
+ ## Latest Benchmark Results
377
+
378
+ Recent benchmarking results from the automated weekly workflow testing BLABLADOR models:
379
+
380
+ ### Model Performance Overview
381
+
382
+ | Model | Success Rate | Avg Latency | Avg Rating (1-10) | Rating Std Dev |
383
+ |-------|-------------|-------------|-------------------|----------------|
384
+ | GPT-OSS-120b | 100.0% | 5.35s | 8.5 | ±2.38 |
385
+ | Ministral-3-14B-Instruct-2512 | 100.0% | 9.55s | 7.5 | ±3.70 |
386
+
387
+ **Overall Statistics:**
388
+ - **Total Evaluations**: 8 across 4 different prompts
389
+ - **Models Tested**: 2 BLABLADOR models
390
+ - **Overall Success Rate**: 100.0%
391
+ - **Average Rating**: 8.0/10
392
+ - **Average Latency**: 7.45s
393
+
394
+ ### Key Findings
395
+ - **Top Performer**: GPT-OSS-120b with highest rating (8.5/10) and fastest response time (5.35s)
396
+ - **Most Consistent**: GPT-OSS-120b with lower rating variation (±2.38)
397
+ - **Performance Gap**: 1.0 point difference between best and worst performing models
398
+ - **Model Availability**: Both tested models are fully operational with 100% success rates
399
+
400
+ ### Evaluation Details
401
+ - **Prompt Categories**: Reasoning, coding, and creative writing tasks
402
+ - **Temperature Testing**: Multiple temperature settings (0.1, 0.7, 1.0) for response variation
403
+ - **LLM-as-a-Judge**: Automated evaluation with detailed critiques and statistical analysis
404
+ - **Rating Distribution**: GPT-OSS-120b received mostly 9-10 ratings, Ministral-3-14B showed more variation
405
+
406
+ ### Reports and Visualizations
407
+
408
+ - [Interactive HTML Report](reports/evaluation_analysis.html) - Comprehensive evaluation analysis with charts
409
+ - [Markdown Summary](reports/benchmark_report_comprehensive.md) - Detailed performance metrics
410
+ - [Performance Chart](reports/benchmark_chart_comprehensive.png) - Visual model comparison
411
+ - [Basic Report](reports/benchmark_report.md) - Simple performance overview
412
+
413
+ Reports are automatically updated and include LLM-as-a-Judge evaluation with detailed statistical analysis and model rankings.
414
+
415
+ ## Development
416
+
417
+ ### Setup Development Environment
418
+
419
+ ```bash
420
+ # Clone repository
421
+ git clone https://github.com/JonasHeinickeBio/HeLLMholtz.git
422
+ cd HeLLMholtz
423
+
424
+ # Install with development dependencies
425
+ poetry install --with dev
426
+
427
+ # Install pre-commit hooks
428
+ poetry run pre-commit install
429
+ ```
430
+
431
+ ### Running Tests
432
+
433
+ ```bash
434
+ # Run all tests
435
+ poetry run pytest
436
+
437
+ # Run with coverage
438
+ poetry run pytest --cov=hellmholtz --cov-report=html
439
+
440
+ # Run specific test categories
441
+ poetry run pytest -m "slow" # Slow integration tests
442
+ poetry run pytest -m "network" # Tests requiring network access
443
+ poetry run pytest -m "model" # Tests using actual models
444
+ ```
445
+
446
+ ### Code Quality
447
+
448
+ ```bash
449
+ # Lint code
450
+ poetry run ruff check .
451
+
452
+ # Format code
453
+ poetry run ruff format .
454
+
455
+ # Type checking
456
+ poetry run mypy src/
457
+
458
+ # Security scanning
459
+ poetry run bandit -r src/
460
+ ```
461
+
462
+ ### Building Documentation
463
+
464
+ ```bash
465
+ # Generate API documentation
466
+ poetry run sphinx-build docs/ docs/_build/
467
+
468
+ # Serve documentation locally
469
+ poetry run sphinx-serve docs/_build/
470
+ ```
471
+
472
+ ## Contributing
473
+
474
+ We welcome contributions! Please see our [Contributing Guide](CONTRIBUTING.md) for details.
475
+
476
+ 1. Fork the repository
477
+ 2. Create a feature branch: `git checkout -b feature/amazing-feature`
478
+ 3. Make your changes and add tests
479
+ 4. Run the full test suite: `poetry run pytest`
480
+ 5. Ensure code quality: `poetry run ruff check . && poetry run mypy src/`
481
+ 6. Commit your changes: `git commit -m 'Add amazing feature'`
482
+ 7. Push to the branch: `git push origin feature/amazing-feature`
483
+ 8. Open a Pull Request
484
+
485
+ ## License
486
+
487
+ This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
488
+
489
+ ## Acknowledgments
490
+
491
+ - Built on top of [aisuite](https://github.com/andrewyng/aisuite) for unified LLM access
492
+ - LLM evaluation powered by [EleutherAI's LM Evaluation Harness](https://github.com/EleutherAI/lm-evaluation-harness)
493
+ - Proxy functionality via [LiteLLM](https://github.com/BerriAI/litellm)
494
+ - Special thanks to the Helmholtz Association for Blablador model access
495
+
496
+ ## Support
497
+
498
+ - Documentation: https://hellmholtz.readthedocs.io/
499
+ - Issue Tracker: https://github.com/JonasHeinickeBio/HeLLMholtz/issues
500
+ - Discussions: https://github.com/JonasHeinickeBio/HeLLMholtz/discussions
501
+
502
+ ---
503
+
504
+ <p align="center">Made with love for the scientific computing community</p>
505
+