inspect-mlflow 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (33) hide show
  1. inspect_mlflow-0.1.0/.github/ISSUE_TEMPLATE/bug_report.yml +60 -0
  2. inspect_mlflow-0.1.0/.github/ISSUE_TEMPLATE/feature_request.yml +28 -0
  3. inspect_mlflow-0.1.0/.github/dependabot.yml +15 -0
  4. inspect_mlflow-0.1.0/.github/pull_request_template.md +14 -0
  5. inspect_mlflow-0.1.0/.github/workflows/api-compat.yml +48 -0
  6. inspect_mlflow-0.1.0/.github/workflows/ci.yml +64 -0
  7. inspect_mlflow-0.1.0/.github/workflows/codeql.yml +21 -0
  8. inspect_mlflow-0.1.0/.github/workflows/release.yml +50 -0
  9. inspect_mlflow-0.1.0/.gitignore +17 -0
  10. inspect_mlflow-0.1.0/.pre-commit-config.yaml +25 -0
  11. inspect_mlflow-0.1.0/CHANGELOG.md +10 -0
  12. inspect_mlflow-0.1.0/CONTRIBUTING.md +67 -0
  13. inspect_mlflow-0.1.0/LICENSE +21 -0
  14. inspect_mlflow-0.1.0/PKG-INFO +161 -0
  15. inspect_mlflow-0.1.0/README.md +135 -0
  16. inspect_mlflow-0.1.0/SECURITY.md +17 -0
  17. inspect_mlflow-0.1.0/docs/images/inspect-tracing-01-traces-list.png +0 -0
  18. inspect_mlflow-0.1.0/docs/images/inspect-tracing-03-trace-panel.png +0 -0
  19. inspect_mlflow-0.1.0/docs/images/inspect-tracing-04-timeline.png +0 -0
  20. inspect_mlflow-0.1.0/docs/images/inspect-tracing-05-model-expanded.png +0 -0
  21. inspect_mlflow-0.1.0/docs/images/logo.png +0 -0
  22. inspect_mlflow-0.1.0/docs/images/logo.svg +37 -0
  23. inspect_mlflow-0.1.0/inspect_mlflow/__init__.py +26 -0
  24. inspect_mlflow-0.1.0/inspect_mlflow/_registry.py +9 -0
  25. inspect_mlflow-0.1.0/inspect_mlflow/py.typed +0 -0
  26. inspect_mlflow-0.1.0/inspect_mlflow/tracing.py +348 -0
  27. inspect_mlflow-0.1.0/inspect_mlflow/tracking.py +322 -0
  28. inspect_mlflow-0.1.0/inspect_mlflow/util.py +50 -0
  29. inspect_mlflow-0.1.0/pyproject.toml +94 -0
  30. inspect_mlflow-0.1.0/tests/__init__.py +0 -0
  31. inspect_mlflow-0.1.0/tests/test_tracing.py +861 -0
  32. inspect_mlflow-0.1.0/tests/test_tracking.py +667 -0
  33. inspect_mlflow-0.1.0/uv.lock +4521 -0
@@ -0,0 +1,60 @@
1
+ name: Bug Report
2
+ description: Report a bug in inspect-mlflow
3
+ labels: [bug]
4
+ body:
5
+ - type: textarea
6
+ id: description
7
+ attributes:
8
+ label: What happened?
9
+ description: Describe the bug clearly. Include error messages if any.
10
+ validations:
11
+ required: true
12
+
13
+ - type: textarea
14
+ id: reproduce
15
+ attributes:
16
+ label: Steps to reproduce
17
+ description: Minimal code or commands to reproduce the issue.
18
+ render: python
19
+ validations:
20
+ required: true
21
+
22
+ - type: textarea
23
+ id: expected
24
+ attributes:
25
+ label: Expected behavior
26
+ description: What you expected to happen.
27
+ validations:
28
+ required: true
29
+
30
+ - type: input
31
+ id: version
32
+ attributes:
33
+ label: inspect-mlflow version
34
+ placeholder: "0.1.0"
35
+ validations:
36
+ required: true
37
+
38
+ - type: input
39
+ id: inspect-version
40
+ attributes:
41
+ label: inspect-ai version
42
+ placeholder: "0.3.190"
43
+ validations:
44
+ required: true
45
+
46
+ - type: input
47
+ id: mlflow-version
48
+ attributes:
49
+ label: MLflow version
50
+ placeholder: "3.10.1"
51
+ validations:
52
+ required: true
53
+
54
+ - type: input
55
+ id: python-version
56
+ attributes:
57
+ label: Python version
58
+ placeholder: "3.12"
59
+ validations:
60
+ required: true
@@ -0,0 +1,28 @@
1
+ name: Feature Request
2
+ description: Suggest a new feature or improvement
3
+ labels: [enhancement]
4
+ body:
5
+ - type: textarea
6
+ id: problem
7
+ attributes:
8
+ label: Problem or use case
9
+ description: What problem does this solve? Why do you need it?
10
+ validations:
11
+ required: true
12
+
13
+ - type: textarea
14
+ id: solution
15
+ attributes:
16
+ label: Proposed solution
17
+ description: How should it work? Include code examples if helpful.
18
+ render: python
19
+ validations:
20
+ required: false
21
+
22
+ - type: textarea
23
+ id: alternatives
24
+ attributes:
25
+ label: Alternatives considered
26
+ description: Other approaches you thought about and why they don't work.
27
+ validations:
28
+ required: false
@@ -0,0 +1,15 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: pip
4
+ directory: /
5
+ schedule:
6
+ interval: weekly
7
+ open-pull-requests-limit: 5
8
+ labels: [dependencies]
9
+
10
+ - package-ecosystem: github-actions
11
+ directory: /
12
+ schedule:
13
+ interval: weekly
14
+ open-pull-requests-limit: 5
15
+ labels: [dependencies, ci]
@@ -0,0 +1,14 @@
1
+ ## What changed and why
2
+
3
+ ## Testing
4
+
5
+ - [ ] `uv run pytest tests/ -v` passes
6
+ - [ ] `uv run ruff check .` clean
7
+ - [ ] `uv run mypy inspect_mlflow/` clean
8
+ - [ ] New tests added for new functionality
9
+
10
+ ## Checklist
11
+
12
+ - [ ] Read [CONTRIBUTING.md](CONTRIBUTING.md)
13
+ - [ ] Pre-commit hooks pass (`uv run pre-commit run --all-files`)
14
+ - [ ] No breaking changes (or documented in description)
@@ -0,0 +1,48 @@
1
+ name: API Compatibility
2
+
3
+ on:
4
+ schedule:
5
+ - cron: "0 0 * * 0" # Sunday midnight UTC
6
+ workflow_dispatch:
7
+
8
+ jobs:
9
+ test-latest:
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ - uses: astral-sh/setup-uv@v5
14
+ with:
15
+ enable-cache: true
16
+ - name: Install with latest deps
17
+ run: |
18
+ uv sync --group dev
19
+ uv pip install --upgrade inspect-ai mlflow
20
+ - name: Run tests
21
+ run: uv run pytest tests/ -v --tb=short
22
+ - name: Report versions
23
+ if: always()
24
+ run: |
25
+ echo "inspect-ai: $(uv run python -c 'import inspect_ai; print(inspect_ai.__version__)')"
26
+ echo "mlflow: $(uv run python -c 'import mlflow; print(mlflow.__version__)')"
27
+
28
+ - name: Create issue on failure
29
+ if: failure()
30
+ uses: actions/github-script@v7
31
+ with:
32
+ script: |
33
+ const title = `API compatibility failure: ${new Date().toISOString().slice(0, 10)}`;
34
+ const existing = await github.rest.issues.listForRepo({
35
+ owner: context.repo.owner,
36
+ repo: context.repo.repo,
37
+ state: 'open',
38
+ labels: 'api-compat',
39
+ });
40
+ if (existing.data.length === 0) {
41
+ await github.rest.issues.create({
42
+ owner: context.repo.owner,
43
+ repo: context.repo.repo,
44
+ title,
45
+ body: `Tests failed against latest inspect-ai and mlflow.\n\nSee: ${context.serverUrl}/${context.repo.owner}/${context.repo.repo}/actions/runs/${context.runId}`,
46
+ labels: ['bug', 'api-compat'],
47
+ });
48
+ }
@@ -0,0 +1,64 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ concurrency:
10
+ group: ${{ github.workflow }}-${{ github.ref }}
11
+ cancel-in-progress: true
12
+
13
+ jobs:
14
+ lint:
15
+ runs-on: ubuntu-latest
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: astral-sh/setup-uv@v5
19
+ with:
20
+ enable-cache: true
21
+ - run: uv sync --group dev
22
+ - run: uv run ruff check .
23
+ - run: uv run ruff format --check .
24
+ - run: uv run mypy inspect_mlflow/
25
+
26
+ test:
27
+ runs-on: ubuntu-latest
28
+ strategy:
29
+ fail-fast: false
30
+ matrix:
31
+ python-version: ["3.11", "3.12", "3.13"]
32
+ steps:
33
+ - uses: actions/checkout@v4
34
+ - uses: astral-sh/setup-uv@v5
35
+ with:
36
+ enable-cache: true
37
+ - run: uv python install ${{ matrix.python-version }}
38
+ - run: uv sync --group dev --python ${{ matrix.python-version }}
39
+ - name: Run tests with coverage
40
+ run: uv run pytest tests/ -v --tb=short --cov=inspect_mlflow --cov-report=xml --cov-report=term-missing
41
+ - name: Enforce coverage threshold
42
+ run: uv run coverage report --fail-under=70
43
+ - name: Upload coverage
44
+ if: matrix.python-version == '3.12' && github.event_name == 'push'
45
+ uses: codecov/codecov-action@v4
46
+ with:
47
+ file: coverage.xml
48
+ fail_ci_if_error: false
49
+
50
+ build:
51
+ runs-on: ubuntu-latest
52
+ steps:
53
+ - uses: actions/checkout@v4
54
+ - uses: hynek/build-and-inspect-python-package@v2
55
+
56
+ check:
57
+ if: always()
58
+ needs: [lint, test, build]
59
+ runs-on: ubuntu-latest
60
+ steps:
61
+ - name: All jobs passed
62
+ uses: re-actors/alls-green@release/v1
63
+ with:
64
+ jobs: ${{ toJSON(needs) }}
@@ -0,0 +1,21 @@
1
+ name: CodeQL
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+ schedule:
9
+ - cron: "0 6 * * 1"
10
+
11
+ jobs:
12
+ analyze:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ security-events: write
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ - uses: github/codeql-action/init@v3
19
+ with:
20
+ languages: python
21
+ - uses: github/codeql-action/analyze@v3
@@ -0,0 +1,50 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+
7
+ permissions:
8
+ contents: write
9
+ id-token: write
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - uses: actions/checkout@v4
16
+ - uses: astral-sh/setup-uv@v5
17
+ with:
18
+ enable-cache: true
19
+ - run: uv sync --group dev
20
+ - run: uv run pytest tests/ -v --tb=short
21
+
22
+ build:
23
+ needs: test
24
+ runs-on: ubuntu-latest
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ - uses: hynek/build-and-inspect-python-package@v2
28
+
29
+ publish:
30
+ needs: build
31
+ runs-on: ubuntu-latest
32
+ environment: release
33
+ steps:
34
+ - name: Download dist
35
+ uses: actions/download-artifact@v4
36
+ with:
37
+ name: Packages
38
+ path: dist/
39
+ - name: Publish to PyPI
40
+ uses: pypa/gh-action-pypi-publish@release/v1
41
+
42
+ github-release:
43
+ needs: publish
44
+ runs-on: ubuntu-latest
45
+ steps:
46
+ - uses: actions/checkout@v4
47
+ - name: Create GitHub Release
48
+ env:
49
+ GH_TOKEN: ${{ github.token }}
50
+ run: gh release create ${{ github.ref_name }} dist/* --generate-notes
@@ -0,0 +1,17 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ .eggs/
8
+ *.egg
9
+ .venv/
10
+ .env
11
+ .mypy_cache/
12
+ .ruff_cache/
13
+ .pytest_cache/
14
+ mlflow.db
15
+ mlartifacts/
16
+ mlruns/
17
+ .coverage
@@ -0,0 +1,25 @@
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: v0.14.0
4
+ hooks:
5
+ - id: ruff
6
+ args: [--fix]
7
+ - id: ruff-format
8
+
9
+ - repo: https://github.com/pre-commit/pre-commit-hooks
10
+ rev: v5.0.0
11
+ hooks:
12
+ - id: trailing-whitespace
13
+ - id: end-of-file-fixer
14
+ - id: check-yaml
15
+ - id: check-toml
16
+ - id: check-added-large-files
17
+ args: [--maxkb=500]
18
+ - id: no-commit-to-branch
19
+ args: [--branch=main]
20
+
21
+ - repo: https://github.com/codespell-project/codespell
22
+ rev: v2.4.1
23
+ hooks:
24
+ - id: codespell
25
+ args: [--skip=uv.lock]
@@ -0,0 +1,10 @@
1
+ # Changelog
2
+
3
+ ## 0.1.0 (2026-03-20)
4
+
5
+ Initial release.
6
+
7
+ - MLflow tracking hook: hierarchical runs, per-sample metrics, model usage, artifact logging
8
+ - MLflow tracing hook: execution span tree with model calls, tool calls, scoring
9
+ - Auto-registration via Inspect AI entry points
10
+ - 33 unit tests
@@ -0,0 +1,67 @@
1
+ # Contributing
2
+
3
+ ## Development Setup
4
+
5
+ ```bash
6
+ git clone https://github.com/debu-sinha/inspect-mlflow.git
7
+ cd inspect-mlflow
8
+ uv sync --group dev
9
+ uv run pre-commit install
10
+ ```
11
+
12
+ ## Running Tests
13
+
14
+ ```bash
15
+ uv run pytest tests/ -v
16
+ uv run pytest tests/ --cov=inspect_mlflow --cov-report=term-missing
17
+ ```
18
+
19
+ ## Linting
20
+
21
+ ```bash
22
+ uv run ruff check .
23
+ uv run ruff format .
24
+ uv run mypy inspect_mlflow/
25
+ ```
26
+
27
+ ## Pre-commit
28
+
29
+ Pre-commit hooks run automatically on `git commit`. To run manually:
30
+
31
+ ```bash
32
+ uv run pre-commit run --all-files
33
+ ```
34
+
35
+ ## Integration Testing
36
+
37
+ To test with a real MLflow server and OpenAI API:
38
+
39
+ ```bash
40
+ mlflow server --port 5556
41
+ export MLFLOW_TRACKING_URI="http://127.0.0.1:5556"
42
+ export MLFLOW_INSPECT_TRACING="true"
43
+ export OPENAI_API_KEY="sk-..."
44
+
45
+ uv run python -c "
46
+ from inspect_ai import Task, eval
47
+ from inspect_ai.dataset import Sample
48
+ from inspect_ai.scorer import match
49
+ from inspect_ai.solver import generate
50
+
51
+ task = Task(
52
+ dataset=[Sample(input='What is 2+2?', target='4')],
53
+ solver=generate(),
54
+ scorer=match(),
55
+ )
56
+ eval(task, model='openai/gpt-4o-mini')
57
+ "
58
+ ```
59
+
60
+ Open http://127.0.0.1:5556 to see runs and traces.
61
+
62
+ ## Pull Requests
63
+
64
+ - Keep PRs focused on a single change
65
+ - Include tests for new functionality
66
+ - Run `uv run pre-commit run --all-files` before pushing
67
+ - All CI checks must pass before merge
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Debu Sinha
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,161 @@
1
+ Metadata-Version: 2.4
2
+ Name: inspect-mlflow
3
+ Version: 0.1.0
4
+ Summary: MLflow integration for Inspect AI: experiment tracking, execution tracing, and Scout analysis
5
+ Project-URL: Homepage, https://github.com/debu-sinha/inspect-mlflow
6
+ Project-URL: Issues, https://github.com/debu-sinha/inspect-mlflow/issues
7
+ Project-URL: Repository, https://github.com/debu-sinha/inspect-mlflow
8
+ Author-email: Debu Sinha <debusinha2009@gmail.com>
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: inspect-ai>=0.3.180
22
+ Requires-Dist: mlflow<4.0,>=3.0
23
+ Provides-Extra: scout
24
+ Requires-Dist: inspect-scout>=0.1.0; extra == 'scout'
25
+ Description-Content-Type: text/markdown
26
+
27
+ # inspect-mlflow
28
+
29
+ ![logo](https://raw.githubusercontent.com/debu-sinha/inspect-mlflow/main/docs/images/logo.png)
30
+
31
+ [![CI](https://github.com/debu-sinha/inspect-mlflow/actions/workflows/ci.yml/badge.svg)](https://github.com/debu-sinha/inspect-mlflow/actions/workflows/ci.yml)
32
+ [![CodeQL](https://github.com/debu-sinha/inspect-mlflow/actions/workflows/codeql.yml/badge.svg)](https://github.com/debu-sinha/inspect-mlflow/actions/workflows/codeql.yml)
33
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
34
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
35
+
36
+ MLflow integration for [Inspect AI](https://inspect.aisi.org.uk/). Provides experiment tracking, execution tracing, and artifact logging for Inspect AI evaluations.
37
+
38
+ ## Install
39
+
40
+ ```bash
41
+ pip install inspect-mlflow
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ No code changes needed. Hooks auto-register via entry points when the package is installed. Set env vars and run evals as usual.
47
+
48
+ ```bash
49
+ # Start MLflow server
50
+ mlflow server --port 5000
51
+
52
+ # Set env vars
53
+ export MLFLOW_TRACKING_URI="http://localhost:5000"
54
+ export MLFLOW_INSPECT_TRACING="true"
55
+
56
+ # Run evals. Hooks auto-activate.
57
+ inspect eval my_task.py --model openai/gpt-4o
58
+ ```
59
+
60
+ Then open http://localhost:5000 to see runs and traces.
61
+
62
+ ## What it does
63
+
64
+ ### Tracking Hook
65
+
66
+ Activated when `MLFLOW_TRACKING_URI` is set. Creates hierarchical MLflow runs mirroring the eval structure.
67
+
68
+ - Parent run per eval invocation, nested child runs per task
69
+ - Task config logged as parameters (model, dataset, solver, temperature)
70
+ - Per-sample scores as step metrics
71
+ - Model token usage (input/output/total per model)
72
+ - Real-time event counting (model calls, tool calls)
73
+ - Eval artifacts: per-sample results JSON + full eval log JSON
74
+
75
+ ### Tracing Hook
76
+
77
+ Activated when `MLFLOW_INSPECT_TRACING=true` is also set. Maps eval execution to MLflow trace spans.
78
+
79
+ ```
80
+ eval_run:6fvmKSZv (CHAIN)
81
+ task:task (CHAIN)
82
+ sample:gM9UtEAM (CHAIN)
83
+ solvers -> generate -> model:openai/gpt-4o-mini (LLM)
84
+ scorers -> match -> score (EVALUATOR)
85
+ sample:628Qbuhr (CHAIN)
86
+ ...
87
+ ```
88
+
89
+ Each span captures relevant data:
90
+
91
+ | Span Type | Data |
92
+ |-----------|------|
93
+ | LLM | model name, token counts, temperature, cache, response |
94
+ | TOOL | function name, arguments, result, errors |
95
+ | EVALUATOR | score value, explanation, target |
96
+
97
+ ## Screenshots
98
+
99
+ **Traces list** showing an eval run with execution time and status:
100
+
101
+ ![Traces list](docs/images/inspect-tracing-01-traces-list.png)
102
+
103
+ **Full span tree** showing the eval hierarchy (eval_run -> task -> samples -> solvers/scorers):
104
+
105
+ ![Span tree](docs/images/inspect-tracing-04-timeline.png)
106
+
107
+ **LLM span detail** with model name, token counts, and response text:
108
+
109
+ ![LLM detail](docs/images/inspect-tracing-05-model-expanded.png)
110
+
111
+ ## Configuration
112
+
113
+ | Env var | Required | Default | Description |
114
+ |---------|----------|---------|-------------|
115
+ | `MLFLOW_TRACKING_URI` | Yes | - | MLflow server URL |
116
+ | `MLFLOW_EXPERIMENT_NAME` | No | `inspect_ai` | Experiment name |
117
+ | `MLFLOW_INSPECT_TRACING` | No | `false` | Enable execution tracing |
118
+ | `MLFLOW_INSPECT_LOG_ARTIFACTS` | No | `true` | Log eval artifacts |
119
+
120
+ ## Example
121
+
122
+ ```python
123
+ from inspect_ai import Task, eval
124
+ from inspect_ai.dataset import Sample
125
+ from inspect_ai.scorer import match
126
+ from inspect_ai.solver import generate
127
+
128
+ # No special imports needed. Hooks auto-register on install.
129
+
130
+ task = Task(
131
+ dataset=[
132
+ Sample(input="What is 2 + 2?", target="4"),
133
+ Sample(input="What is 3 * 5?", target="15"),
134
+ Sample(input="What is 10 - 7?", target="3"),
135
+ ],
136
+ solver=generate(),
137
+ scorer=match(),
138
+ )
139
+
140
+ logs = eval(task, model="openai/gpt-4o-mini")
141
+ # Results are now in MLflow: runs with metrics + traces with spans
142
+ ```
143
+
144
+ ## Development
145
+
146
+ ```bash
147
+ git clone https://github.com/debu-sinha/inspect-mlflow.git
148
+ cd inspect-mlflow
149
+ uv sync --group dev
150
+ uv run pre-commit install
151
+ uv run pytest tests/ -v
152
+ ```
153
+
154
+ See [CONTRIBUTING.md](CONTRIBUTING.md) for details.
155
+
156
+ ## Related
157
+
158
+ - [Inspect AI](https://inspect.aisi.org.uk/) - AI evaluation framework by UK AISI
159
+ - [MLflow](https://mlflow.org/) - ML experiment tracking and model management
160
+ - [Inspect AI hooks docs](https://inspect.aisi.org.uk/extensions.html#sec-hooks) - How hooks work
161
+ - [Issue #3547](https://github.com/UKGovernmentBEIS/inspect_ai/issues/3547) - Original proposal