skilltest-pytest 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,70 @@
1
+ """skilltest-pytest: run AI-skill tests and natural-language evals as pytest.
2
+
3
+ The pytest integration on top of [`skilltest-sdk`][skilltest_sdk]: drop a
4
+ ``*.skilltest.yaml`` next to your other tests and pytest collects it as a test
5
+ item. The SDK's code-level API is re-exported here for convenience, so a pytest
6
+ suite only needs one dependency:
7
+
8
+ from skilltest_pytest import run_skill, validate_skill
9
+
10
+ def test_greeter():
11
+ report = run_skill("cases/greet.yaml")
12
+ assert report.passed, describe_failures(report)
13
+ # Mix in a deterministic check on the transcript:
14
+ assert "Dr. Smith" in assistant_text(report.runs[0].transcript)
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ from skilltest_sdk import (
20
+ ENV_BIN,
21
+ ENV_PROVIDER,
22
+ BooleanDetail,
23
+ CaseRun,
24
+ EvalOutcome,
25
+ Message,
26
+ NumericDetail,
27
+ Report,
28
+ SkilltestError,
29
+ SkilltestProviderError,
30
+ SkilltestUsageError,
31
+ Summary,
32
+ Transcript,
33
+ Usage,
34
+ ValidationFinding,
35
+ ValidationReport,
36
+ assistant_text,
37
+ describe_failures,
38
+ failed_evals,
39
+ failed_runs,
40
+ run_skill,
41
+ validate_skill,
42
+ )
43
+
44
+ from .plugin import SkilltestFailure
45
+
46
+ __all__ = [
47
+ "ENV_BIN",
48
+ "ENV_PROVIDER",
49
+ "BooleanDetail",
50
+ "CaseRun",
51
+ "EvalOutcome",
52
+ "Message",
53
+ "NumericDetail",
54
+ "Report",
55
+ "SkilltestError",
56
+ "SkilltestFailure",
57
+ "SkilltestProviderError",
58
+ "SkilltestUsageError",
59
+ "Summary",
60
+ "Transcript",
61
+ "Usage",
62
+ "ValidationFinding",
63
+ "ValidationReport",
64
+ "assistant_text",
65
+ "describe_failures",
66
+ "failed_evals",
67
+ "failed_runs",
68
+ "run_skill",
69
+ "validate_skill",
70
+ ]
@@ -0,0 +1,86 @@
1
+ """pytest integration: collect ``*.skilltest.yaml`` files as test items.
2
+
3
+ Drop a ``greets.skilltest.yaml`` next to your other tests and `pytest` will run
4
+ it as a case, failing with the judge's reasons when an eval does not pass. For
5
+ finer control — multiple platforms/models, or deterministic mix-in assertions on
6
+ the transcript — call [`run_skill`][skilltest_sdk.runner.run_skill] from an
7
+ ordinary test function instead.
8
+
9
+ Settings come from ``pytest.ini``/``pyproject.toml`` (``skilltest_bin``,
10
+ ``skilltest_provider``, ``skilltest_platforms``, ``skilltest_models``,
11
+ ``skilltest_config``) or the ``SKILLTEST_BIN`` / ``SKILLTEST_PROVIDER`` env vars.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ from typing import TYPE_CHECKING
17
+
18
+ import pytest
19
+ from skilltest_sdk import Report, describe_failures, run_skill
20
+
21
+ if TYPE_CHECKING:
22
+ from collections.abc import Sequence
23
+
24
+ _SUFFIXES = (".skilltest.yaml", ".skilltest.yml")
25
+
26
+
27
+ def pytest_addoption(parser: pytest.Parser) -> None:
28
+ parser.addini("skilltest_bin", "Path to the skilltest binary", default=None)
29
+ parser.addini("skilltest_provider", "Provider command for skilltest", default=None)
30
+ parser.addini("skilltest_platforms", "Platforms to run cases on", type="args", default=[])
31
+ parser.addini("skilltest_models", "Models to run cases on", type="args", default=[])
32
+ parser.addini("skilltest_config", "Path to a skilltest config file", default=None)
33
+
34
+
35
+ def pytest_collect_file(parent: pytest.Collector, file_path) -> SkilltestFile | None:
36
+ name = file_path.name
37
+ if any(name.endswith(suffix) for suffix in _SUFFIXES):
38
+ return SkilltestFile.from_parent(parent, path=file_path)
39
+ return None
40
+
41
+
42
+ class _Settings:
43
+ """Resolved collector settings, read once from the pytest config."""
44
+
45
+ def __init__(self, config: pytest.Config) -> None:
46
+ self.bin: str | None = config.getini("skilltest_bin") or None
47
+ self.provider: str | None = config.getini("skilltest_provider") or None
48
+ self.platforms: Sequence[str] = config.getini("skilltest_platforms")
49
+ self.models: Sequence[str] = config.getini("skilltest_models")
50
+ self.config: str | None = config.getini("skilltest_config") or None
51
+
52
+
53
+ class SkilltestFailure(Exception):
54
+ """Raised when a collected case fails, carrying the report for reporting."""
55
+
56
+ def __init__(self, report: Report) -> None:
57
+ super().__init__(describe_failures(report))
58
+ self.report = report
59
+
60
+
61
+ class SkilltestFile(pytest.File):
62
+ def collect(self): # type: ignore[override]
63
+ yield SkilltestItem.from_parent(self, name=self.path.stem)
64
+
65
+
66
+ class SkilltestItem(pytest.Item):
67
+ def runtest(self) -> None:
68
+ settings = _Settings(self.config)
69
+ report = run_skill(
70
+ self.path,
71
+ bin=settings.bin,
72
+ provider=settings.provider,
73
+ platforms=settings.platforms,
74
+ models=settings.models,
75
+ config=settings.config,
76
+ )
77
+ if not report.passed:
78
+ raise SkilltestFailure(report)
79
+
80
+ def repr_failure(self, excinfo, style=None): # type: ignore[override]
81
+ if isinstance(excinfo.value, SkilltestFailure):
82
+ return f"skilltest case failed:\n{excinfo.value}"
83
+ return super().repr_failure(excinfo, style=style)
84
+
85
+ def reportinfo(self): # type: ignore[override]
86
+ return self.path, 0, f"skilltest: {self.name}"
@@ -0,0 +1,58 @@
1
+ Metadata-Version: 2.4
2
+ Name: skilltest-pytest
3
+ Version: 0.2.0
4
+ Summary: pytest integration for skilltest: auto-collect *.skilltest.yaml cases as pytest tests, built on skilltest-sdk.
5
+ Author: Nick DeRobertis
6
+ License-Expression: MIT
7
+ Requires-Python: >=3.12
8
+ Requires-Dist: pytest>=8
9
+ Requires-Dist: skilltest-sdk<0.2,>=0.1.0
10
+ Description-Content-Type: text/markdown
11
+
12
+ # skilltest-pytest
13
+
14
+ A [pytest](https://pytest.org) plugin for [skilltest](../../README.md): run
15
+ AI-skill tests and natural-language evals as ordinary pytest tests, and mix in
16
+ your own deterministic checks. Built on
17
+ [`skilltest-sdk`](../../sdks/python/README.md) — the SDK's code API is
18
+ re-exported here, so a pytest suite needs only this one dependency.
19
+
20
+ ## Two ways to use it
21
+
22
+ **Auto-collected case files.** Name a case `something.skilltest.yaml` and pytest
23
+ runs it:
24
+
25
+ ```yaml
26
+ # greet.skilltest.yaml
27
+ skill: ./skills/greeter
28
+ input: "Greet Dr. Smith."
29
+ evals:
30
+ - type: boolean
31
+ criterion: "the reply greets Dr. Smith by name"
32
+ ```
33
+
34
+ **As code**, for matrices and deterministic mix-ins:
35
+
36
+ ```python
37
+ from skilltest_pytest import run_skill
38
+
39
+ def test_greeter():
40
+ report = run_skill("cases/greet.yaml", platforms=["claude-code"], models=["claude-opus-4-8"])
41
+ assert report.passed, report.describe_failures()
42
+ assert "Dr. Smith" in report.runs[0].transcript.assistant_text()
43
+ ```
44
+
45
+ ## Configuration
46
+
47
+ The plugin shells out to the `skilltest` binary. Point it at one with the
48
+ `SKILLTEST_BIN` env var (or `bin=`), the provider with `SKILLTEST_PROVIDER` (or
49
+ `provider=`), and set defaults in `pyproject.toml`:
50
+
51
+ ```toml
52
+ [tool.pytest.ini_options]
53
+ skilltest_provider = "oneharness"
54
+ skilltest_platforms = ["claude-code"]
55
+ skilltest_models = ["claude-opus-4-8"]
56
+ ```
57
+
58
+ See the repository root for the provider protocol and the full schema.
@@ -0,0 +1,6 @@
1
+ skilltest_pytest/__init__.py,sha256=zUtKpvSZqJlJvX3LicBIJ0xB6ni0vPrWyyQ86WSTPV4,1645
2
+ skilltest_pytest/plugin.py,sha256=6MgeZLz7s_gfM0NodYxG-A_Qlm1RSD-mglt6Y7gE0i4,3374
3
+ skilltest_pytest-0.2.0.dist-info/METADATA,sha256=DmT5vH94tbSGAuiyRLTC9Vl7xMqR3uKKjFbEC9kaO98,1788
4
+ skilltest_pytest-0.2.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
5
+ skilltest_pytest-0.2.0.dist-info/entry_points.txt,sha256=r7Haj7qWnhqk2CyI8I1yttI8L7qXG9ssAZ5Nyccz4Ro,47
6
+ skilltest_pytest-0.2.0.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: hatchling 1.30.1
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,2 @@
1
+ [pytest11]
2
+ skilltest = skilltest_pytest.plugin