shipwright-kit 0.7.0__tar.gz → 0.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {shipwright_kit-0.7.0/shipwright_kit.egg-info → shipwright_kit-0.8.1}/PKG-INFO +7 -8
  2. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/README.md +6 -7
  3. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/pyproject.toml +1 -1
  4. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/__init__.py +1 -1
  5. shipwright_kit-0.8.1/shipwright_kit/eval/__init__.py +18 -0
  6. shipwright_kit-0.8.1/shipwright_kit/eval/harness.py +129 -0
  7. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1/shipwright_kit.egg-info}/PKG-INFO +7 -8
  8. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_template_wiring.py +8 -2
  9. shipwright_kit-0.7.0/shipwright_kit/eval/__init__.py +0 -7
  10. shipwright_kit-0.7.0/shipwright_kit/eval/harness.py +0 -50
  11. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/LICENSE +0 -0
  12. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/setup.cfg +0 -0
  13. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/cli.py +0 -0
  14. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/config.py +0 -0
  15. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/__init__.py +0 -0
  16. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/banner.py +0 -0
  17. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/console.py +0 -0
  18. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/glyphs.py +0 -0
  19. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/output.py +0 -0
  20. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/palette.py +0 -0
  21. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/design/tiers.py +0 -0
  22. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/eval/corpus.py +0 -0
  23. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/eval/metrics.py +0 -0
  24. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/py.typed +0 -0
  25. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/security/__init__.py +0 -0
  26. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/security/eval.py +0 -0
  27. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/security/injection.py +0 -0
  28. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit/security/theme.py +0 -0
  29. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit.egg-info/SOURCES.txt +0 -0
  30. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit.egg-info/dependency_links.txt +0 -0
  31. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit.egg-info/entry_points.txt +0 -0
  32. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit.egg-info/requires.txt +0 -0
  33. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/shipwright_kit.egg-info/top_level.txt +0 -0
  34. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_cli.py +0 -0
  35. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_config.py +0 -0
  36. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_packaging.py +0 -0
  37. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_packs_entrypoint.py +0 -0
  38. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_release_config.py +0 -0
  39. {shipwright_kit-0.7.0 → shipwright_kit-0.8.1}/tests/test_tooling.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shipwright-kit
3
- Version: 0.7.0
3
+ Version: 0.8.1
4
4
  Summary: Shipwright — AI-agent dev framework + import-light design/eval/security library
5
5
  Author: Christian Huhn
6
6
  License-Expression: MIT
@@ -51,19 +51,18 @@ The library is consumed today by two real tools: **barb** and **sift** both impo
51
51
 
52
52
  ## Install
53
53
 
54
- The library is **not on PyPI** — the bare name `shipwright` belongs to an unrelated
55
- project, so the published distribution is **`shipwright-kit`** and the import name is
56
- **`shipwright_kit`**. For now, install from git:
54
+ The bare name `shipwright` belongs to an unrelated project on PyPI, so the
55
+ published distribution is **`shipwright-kit`** and the import name is
56
+ **`shipwright_kit`**.
57
57
 
58
58
  ```bash
59
- uv pip install "git+https://github.com/duathron/shipwright@main"
59
+ uv pip install "shipwright-kit>=0.7,<0.8"
60
60
  # then: import shipwright_kit
61
61
  ```
62
62
 
63
63
  > [!NOTE]
64
- > Pin a release tag instead of `@main` for reproducible builds once a tagged
65
- > release of the `shipwright-kit` distribution is cut. Do **not** `pip install
66
- > shipwright` from PyPI — that is a different, unrelated package.
64
+ > Do **not** `pip install shipwright` from PyPI that is a different, unrelated
65
+ > package. The correct dist name is `shipwright-kit`.
67
66
 
68
67
  The security pack needs no extra — it ships with the base install and registers
69
68
  through the `shipwright_kit.packs` entry point.
@@ -21,19 +21,18 @@ The library is consumed today by two real tools: **barb** and **sift** both impo
21
21
 
22
22
  ## Install
23
23
 
24
- The library is **not on PyPI** — the bare name `shipwright` belongs to an unrelated
25
- project, so the published distribution is **`shipwright-kit`** and the import name is
26
- **`shipwright_kit`**. For now, install from git:
24
+ The bare name `shipwright` belongs to an unrelated project on PyPI, so the
25
+ published distribution is **`shipwright-kit`** and the import name is
26
+ **`shipwright_kit`**.
27
27
 
28
28
  ```bash
29
- uv pip install "git+https://github.com/duathron/shipwright@main"
29
+ uv pip install "shipwright-kit>=0.7,<0.8"
30
30
  # then: import shipwright_kit
31
31
  ```
32
32
 
33
33
  > [!NOTE]
34
- > Pin a release tag instead of `@main` for reproducible builds once a tagged
35
- > release of the `shipwright-kit` distribution is cut. Do **not** `pip install
36
- > shipwright` from PyPI — that is a different, unrelated package.
34
+ > Do **not** `pip install shipwright` from PyPI that is a different, unrelated
35
+ > package. The correct dist name is `shipwright-kit`.
37
36
 
38
37
  The security pack needs no extra — it ships with the base install and registers
39
38
  through the `shipwright_kit.packs` entry point.
@@ -7,7 +7,7 @@ build-backend = "setuptools.build_meta"
7
7
  # PyPI distribution name. The bare `shipwright` is taken on PyPI (unrelated 6si
8
8
  # tool), so the dist is `shipwright-kit`; the IMPORT name is `shipwright_kit`.
9
9
  name = "shipwright-kit"
10
- version = "0.7.0"
10
+ version = "0.8.1"
11
11
  description = "Shipwright — AI-agent dev framework + import-light design/eval/security library"
12
12
  readme = "README.md"
13
13
  requires-python = ">=3.11"
@@ -1,3 +1,3 @@
1
1
  """Shipwright — design-token + tooling library."""
2
2
 
3
- __version__ = "0.7.0"
3
+ __version__ = "0.8.1"
@@ -0,0 +1,18 @@
1
+ """Generic classification eval harness: corpus, metrics, evaluate + gate."""
2
+
3
+ from .corpus import Sample, load_corpus
4
+ from .harness import CorpusDisagreement, CorpusVerifyReport, EvalGateError, evaluate, gate, verify_corpus
5
+ from .metrics import EVAL_SCHEMA_VERSION, EvalResult
6
+
7
+ __all__ = [
8
+ "Sample",
9
+ "load_corpus",
10
+ "EvalResult",
11
+ "EVAL_SCHEMA_VERSION",
12
+ "EvalGateError",
13
+ "evaluate",
14
+ "gate",
15
+ "CorpusDisagreement",
16
+ "CorpusVerifyReport",
17
+ "verify_corpus",
18
+ ]
@@ -0,0 +1,129 @@
1
+ """Run a predict function over a corpus and gate the result. Count-and-skip on a
2
+ predict-time exception (faithful to barb — a bad row must not abort the run)."""
3
+
4
+ from __future__ import annotations
5
+
6
+ from collections.abc import Callable
7
+ from dataclasses import dataclass
8
+
9
+ from .corpus import Sample
10
+ from .metrics import EvalResult
11
+
12
+
13
+ class EvalGateError(AssertionError):
14
+ """Raised when an eval result is below the required thresholds."""
15
+
16
+
17
+ @dataclass(frozen=True)
18
+ class CorpusDisagreement:
19
+ """A single row where the predictor's output disagrees with the human label."""
20
+
21
+ value: str # the sample input
22
+ label: str # the human-assigned label
23
+ predicted: str # what the predictor returned
24
+
25
+
26
+ @dataclass(frozen=True)
27
+ class CorpusVerifyReport:
28
+ """Result of :func:`verify_corpus`. Stdlib-only, no rich/pyfiglet import."""
29
+
30
+ disagreements: list[CorpusDisagreement]
31
+ total: int
32
+ disagreement_count: int
33
+
34
+ @property
35
+ def clean(self) -> bool:
36
+ """True when every row agrees — safe to proceed to floor-setting."""
37
+ return self.disagreement_count == 0
38
+
39
+ def summary(self) -> str:
40
+ """Single-line human-readable summary, suitable for stderr."""
41
+ if self.clean:
42
+ return f"corpus-verify: OK — {self.total} rows, 0 disagreements"
43
+ return f"corpus-verify: FAIL — {self.disagreement_count}/{self.total} rows disagree (label vs prediction)"
44
+
45
+
46
+ def verify_corpus(
47
+ corpus: list[Sample],
48
+ predictor: Callable[[str], str],
49
+ *,
50
+ eq: Callable[[str, str], bool] | None = None,
51
+ ) -> CorpusVerifyReport:
52
+ """Run *predictor* over every labeled row and report label-vs-prediction disagreements.
53
+
54
+ Use this **before** setting a precision/recall floor to catch mislabeled or
55
+ dishonest corpus rows — a predictor that is believed correct is compared
56
+ directly to the human label; rows that differ are flagged.
57
+
58
+ Parameters
59
+ ----------
60
+ corpus:
61
+ List of :class:`~shipwright_kit.eval.Sample` objects (input + label pairs).
62
+ predictor:
63
+ Callable that maps an input string to a prediction string. Must be the
64
+ same callable you intend to gate — usually the production classifier.
65
+ eq:
66
+ Optional equality function ``(label, predicted) -> bool``. Defaults to
67
+ plain string equality (``label == predicted``). Supply a custom function
68
+ when the label space differs from the prediction space (e.g. case-folding,
69
+ synonyms, or a mapping dict).
70
+
71
+ Returns
72
+ -------
73
+ CorpusVerifyReport
74
+ Structured report with the full list of disagreements plus a summary count.
75
+ Predictor exceptions on a row are treated as a disagreement (predicted value
76
+ is set to ``"<error>"``).
77
+ """
78
+ _eq: Callable[[str, str], bool] = eq if eq is not None else (lambda a, b: a == b)
79
+ disagreements: list[CorpusDisagreement] = []
80
+ for sample in corpus:
81
+ try:
82
+ pred = predictor(sample.input)
83
+ except Exception:
84
+ disagreements.append(CorpusDisagreement(sample.input, sample.label, "<error>"))
85
+ continue
86
+ if not _eq(sample.label, pred):
87
+ disagreements.append(CorpusDisagreement(sample.input, sample.label, pred))
88
+ return CorpusVerifyReport(
89
+ disagreements=disagreements,
90
+ total=len(corpus),
91
+ disagreement_count=len(disagreements),
92
+ )
93
+
94
+
95
+ def evaluate(
96
+ predict_fn: Callable[[str], str],
97
+ corpus: list[Sample],
98
+ *,
99
+ positive_pred: Callable[[str], bool],
100
+ positive_expected: Callable[[str], bool] | None = None,
101
+ ) -> EvalResult:
102
+ binarize_expected = positive_expected or positive_pred # default = same-space (Phase B)
103
+ tp = fp = tn = fn = errors = 0
104
+ for sample in corpus:
105
+ try:
106
+ pred = predict_fn(sample.input)
107
+ except Exception: # count-and-skip, surfaced via errors
108
+ errors += 1
109
+ continue
110
+ exp = binarize_expected(sample.label)
111
+ got = positive_pred(pred)
112
+ if exp and got:
113
+ tp += 1
114
+ elif got and not exp:
115
+ fp += 1
116
+ elif exp and not got:
117
+ fn += 1
118
+ else:
119
+ tn += 1
120
+ return EvalResult(tp, fp, tn, fn, errors)
121
+
122
+
123
+ def gate(result: EvalResult, *, min_precision: float, min_recall: float) -> None:
124
+ if result.precision < min_precision:
125
+ raise EvalGateError(f"precision {result.precision:.3f} < {min_precision}")
126
+ if result.recall < min_recall:
127
+ raise EvalGateError(f"recall {result.recall:.3f} < {min_recall}")
128
+ if (result.tp + result.fn) > 0 and result.recall == 0.0:
129
+ raise EvalGateError("zero recall with positives present")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: shipwright-kit
3
- Version: 0.7.0
3
+ Version: 0.8.1
4
4
  Summary: Shipwright — AI-agent dev framework + import-light design/eval/security library
5
5
  Author: Christian Huhn
6
6
  License-Expression: MIT
@@ -51,19 +51,18 @@ The library is consumed today by two real tools: **barb** and **sift** both impo
51
51
 
52
52
  ## Install
53
53
 
54
- The library is **not on PyPI** — the bare name `shipwright` belongs to an unrelated
55
- project, so the published distribution is **`shipwright-kit`** and the import name is
56
- **`shipwright_kit`**. For now, install from git:
54
+ The bare name `shipwright` belongs to an unrelated project on PyPI, so the
55
+ published distribution is **`shipwright-kit`** and the import name is
56
+ **`shipwright_kit`**.
57
57
 
58
58
  ```bash
59
- uv pip install "git+https://github.com/duathron/shipwright@main"
59
+ uv pip install "shipwright-kit>=0.7,<0.8"
60
60
  # then: import shipwright_kit
61
61
  ```
62
62
 
63
63
  > [!NOTE]
64
- > Pin a release tag instead of `@main` for reproducible builds once a tagged
65
- > release of the `shipwright-kit` distribution is cut. Do **not** `pip install
66
- > shipwright` from PyPI — that is a different, unrelated package.
64
+ > Do **not** `pip install shipwright` from PyPI that is a different, unrelated
65
+ > package. The correct dist name is `shipwright-kit`.
67
66
 
68
67
  The security pack needs no extra — it ships with the base install and registers
69
68
  through the `shipwright_kit.packs` entry point.
@@ -56,7 +56,10 @@ def test_security_preset_installs_security_extra(tmp_path):
56
56
  text = (proj / "pyproject.toml").read_text()
57
57
  req = _shipwright_req(text)
58
58
  assert req.extras == set() # security pack ships with base (entry-point); no [security] extra exists
59
- assert "git+https://github.com/duathron/shipwright" in str(req.url)
59
+ # W2: PyPI range pin (>=0.7,<0.8) — url is None, specifier encodes the range
60
+ assert req.url is None
61
+ assert ">=0.7" in str(req.specifier)
62
+ assert "<0.8" in str(req.specifier)
60
63
  assert 'preset = "security"' in text
61
64
  banner = proj / "acme" / "banner.py"
62
65
  assert banner.exists()
@@ -68,5 +71,8 @@ def test_none_preset_core_only(tmp_path):
68
71
  text = (proj / "pyproject.toml").read_text()
69
72
  req = _shipwright_req(text)
70
73
  assert req.extras == set() # no security extra
71
- assert "git+https://github.com/duathron/shipwright" in str(req.url)
74
+ # W2: PyPI range pin (>=0.7,<0.8) — url is None, specifier encodes the range
75
+ assert req.url is None
76
+ assert ">=0.7" in str(req.specifier)
77
+ assert "<0.8" in str(req.specifier)
72
78
  assert 'preset = "none"' in text
@@ -1,7 +0,0 @@
1
- """Generic classification eval harness: corpus, metrics, evaluate + gate."""
2
-
3
- from .corpus import Sample, load_corpus
4
- from .harness import EvalGateError, evaluate, gate
5
- from .metrics import EVAL_SCHEMA_VERSION, EvalResult
6
-
7
- __all__ = ["Sample", "load_corpus", "EvalResult", "EVAL_SCHEMA_VERSION", "EvalGateError", "evaluate", "gate"]
@@ -1,50 +0,0 @@
1
- """Run a predict function over a corpus and gate the result. Count-and-skip on a
2
- predict-time exception (faithful to barb — a bad row must not abort the run)."""
3
-
4
- from __future__ import annotations
5
-
6
- from collections.abc import Callable
7
-
8
- from .corpus import Sample
9
- from .metrics import EvalResult
10
-
11
-
12
- class EvalGateError(AssertionError):
13
- """Raised when an eval result is below the required thresholds."""
14
-
15
-
16
- def evaluate(
17
- predict_fn: Callable[[str], str],
18
- corpus: list[Sample],
19
- *,
20
- positive_pred: Callable[[str], bool],
21
- positive_expected: Callable[[str], bool] | None = None,
22
- ) -> EvalResult:
23
- binarize_expected = positive_expected or positive_pred # default = same-space (Phase B)
24
- tp = fp = tn = fn = errors = 0
25
- for sample in corpus:
26
- try:
27
- pred = predict_fn(sample.input)
28
- except Exception: # count-and-skip, surfaced via errors
29
- errors += 1
30
- continue
31
- exp = binarize_expected(sample.label)
32
- got = positive_pred(pred)
33
- if exp and got:
34
- tp += 1
35
- elif got and not exp:
36
- fp += 1
37
- elif exp and not got:
38
- fn += 1
39
- else:
40
- tn += 1
41
- return EvalResult(tp, fp, tn, fn, errors)
42
-
43
-
44
- def gate(result: EvalResult, *, min_precision: float, min_recall: float) -> None:
45
- if result.precision < min_precision:
46
- raise EvalGateError(f"precision {result.precision:.3f} < {min_precision}")
47
- if result.recall < min_recall:
48
- raise EvalGateError(f"recall {result.recall:.3f} < {min_recall}")
49
- if (result.tp + result.fn) > 0 and result.recall == 0.0:
50
- raise EvalGateError("zero recall with positives present")
File without changes
File without changes