falsify 0.3.4__tar.gz → 0.3.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {falsify-0.3.4/falsify.egg-info → falsify-0.3.6}/PKG-INFO +3 -3
- {falsify-0.3.4 → falsify-0.3.6}/README.md +2 -2
- {falsify-0.3.4 → falsify-0.3.6/falsify.egg-info}/PKG-INFO +3 -3
- {falsify-0.3.4 → falsify-0.3.6}/falsify.py +1 -1
- {falsify-0.3.4 → falsify-0.3.6}/falsify_prml.py +31 -2
- {falsify-0.3.4 → falsify-0.3.6}/pyproject.toml +1 -1
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_version.py +14 -2
- {falsify-0.3.4 → falsify-0.3.6}/LICENSE +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/NOTICE +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/SOURCES.txt +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/dependency_links.txt +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/entry_points.txt +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/requires.txt +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/top_level.txt +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/mcp_server/__init__.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/mcp_server/__main__.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/mcp_server/server.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/setup.cfg +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_adversarial_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_agent_claim_auditor.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_agent_verdict_refresher.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_architecture.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_bench.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_calibration_sample.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_case_studies_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_changelog.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_ci_workflow.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_claude_md.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_code_of_conduct.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_comparison_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_contributing.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_script.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_script_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_shot_list.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_diff.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_docker.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_doctor.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_editorconfig.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_examples_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_export.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_faq.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_github_repo_maturity.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_github_templates.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_gitignore.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_glossary_doc.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_guard.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_hook_install.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_init.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_init_templates.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_integration_e2e.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_list.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_lock.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_makefile.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_managed_agents.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_mcp.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_mcp_server.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_pitch.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_pre_commit.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_cli.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_v02_candidates.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_vectors.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_pyproject.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_readme.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_release_check.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_release_workflow.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_replay.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_roadmap.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_run.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_score.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_self_dogfood.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_author.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_ci_doctor.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_claim_audit.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_claim_review.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_falsify.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_slash_commands.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_stats.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_stats_html.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_submission.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_submission_md.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_trend.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_tutorial.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_verdict.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_verify.py +0 -0
- {falsify-0.3.4 → falsify-0.3.6}/tests/test_why.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: falsify
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
|
|
5
5
|
Author: Cüneyt Öztürk
|
|
6
6
|
License: MIT
|
|
@@ -75,7 +75,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
|
|
|
75
75
|
|
|
76
76
|
**Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
|
|
77
77
|
|
|
78
|
-
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@
|
|
78
|
+
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
|
|
79
79
|
|
|
80
80
|
**Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
|
|
81
81
|
|
|
@@ -128,7 +128,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
|
|
|
128
128
|
|
|
129
129
|
---
|
|
130
130
|
|
|
131
|
-
**Current version:** falsify 0.3.
|
|
131
|
+
**Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
|
|
132
132
|
**Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
|
|
133
133
|
|
|
134
134
|
---
|
|
@@ -43,7 +43,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
|
|
|
43
43
|
|
|
44
44
|
**Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
|
|
45
45
|
|
|
46
|
-
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@
|
|
46
|
+
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
|
|
47
47
|
|
|
48
48
|
**Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
|
|
49
49
|
|
|
@@ -96,7 +96,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
|
|
|
96
96
|
|
|
97
97
|
---
|
|
98
98
|
|
|
99
|
-
**Current version:** falsify 0.3.
|
|
99
|
+
**Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
|
|
100
100
|
**Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
|
|
101
101
|
|
|
102
102
|
---
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: falsify
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.6
|
|
4
4
|
Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
|
|
5
5
|
Author: Cüneyt Öztürk
|
|
6
6
|
License: MIT
|
|
@@ -75,7 +75,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
|
|
|
75
75
|
|
|
76
76
|
**Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
|
|
77
77
|
|
|
78
|
-
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@
|
|
78
|
+
**Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
|
|
79
79
|
|
|
80
80
|
**Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
|
|
81
81
|
|
|
@@ -128,7 +128,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
|
|
|
128
128
|
|
|
129
129
|
---
|
|
130
130
|
|
|
131
|
-
**Current version:** falsify 0.3.
|
|
131
|
+
**Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
|
|
132
132
|
**Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
|
|
133
133
|
|
|
134
134
|
---
|
|
@@ -8,7 +8,7 @@ anyone; edit the manifest after locking and the hash no longer matches.
|
|
|
8
8
|
Canonicalisation (PRML v0.1 §4): keys recursively sorted, block style, LF,
|
|
9
9
|
trailing whitespace stripped, exactly one trailing newline, UTF-8. This is the
|
|
10
10
|
same rule the Go / JS / Rust reference implementations use; all four produce
|
|
11
|
-
byte-identical canonical bytes on the
|
|
11
|
+
byte-identical canonical bytes on the 21 published conformance vectors.
|
|
12
12
|
|
|
13
13
|
Commands:
|
|
14
14
|
falsify lock <spec.yaml|spec.json> canonicalize, hash, write sidecar
|
|
@@ -33,7 +33,7 @@ import os
|
|
|
33
33
|
import re
|
|
34
34
|
import sys
|
|
35
35
|
|
|
36
|
-
__version__ = "0.3.
|
|
36
|
+
__version__ = "0.3.6"
|
|
37
37
|
|
|
38
38
|
EXIT_PASS = 0
|
|
39
39
|
EXIT_BAD = 2
|
|
@@ -50,6 +50,32 @@ REQUIRED_PRODUCER = ["id"]
|
|
|
50
50
|
VALID_COMPARATORS = {">=", "<=", ">", "<", "=="}
|
|
51
51
|
_HEX64 = re.compile(r"^[0-9a-f]{64}$")
|
|
52
52
|
|
|
53
|
+
# Characters that break canonical-byte portability across the reference impls:
|
|
54
|
+
# C0/C1 control chars (incl. U+0085 NEL, which PyYAML does not round-trip),
|
|
55
|
+
# U+007F DEL, the Unicode line/paragraph separators U+2028/U+2029, and U+FEFF
|
|
56
|
+
# (BOM / zero-width no-break space). These have no legitimate place in a PRML
|
|
57
|
+
# string field (metric, ids, etc.); a manifest containing them would canonicalize
|
|
58
|
+
# to different bytes — or fail to round-trip — across Python/JS/Go/Rust, so it is
|
|
59
|
+
# rejected at validation rather than silently producing a non-portable hash.
|
|
60
|
+
# Rejecting them is additive: no conformance vector contains these, so no valid
|
|
61
|
+
# manifest's hash changes. Printable Unicode (emoji, CJK, accents) is unaffected.
|
|
62
|
+
_FORBIDDEN_CHARS = re.compile(r"[\x00-\x1f\x7f-\x9f
]")
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def _bad_char_fields(obj, path="") -> list[str]:
|
|
66
|
+
"""Return field paths whose string value contains a portability-breaking char."""
|
|
67
|
+
out = []
|
|
68
|
+
if isinstance(obj, str):
|
|
69
|
+
if _FORBIDDEN_CHARS.search(obj):
|
|
70
|
+
out.append(path or "(value)")
|
|
71
|
+
elif isinstance(obj, dict):
|
|
72
|
+
for k, v in obj.items():
|
|
73
|
+
out.extend(_bad_char_fields(v, f"{path}.{k}" if path else str(k)))
|
|
74
|
+
elif isinstance(obj, (list, tuple)):
|
|
75
|
+
for i, v in enumerate(obj):
|
|
76
|
+
out.extend(_bad_char_fields(v, f"{path}[{i}]"))
|
|
77
|
+
return out
|
|
78
|
+
|
|
53
79
|
|
|
54
80
|
def _require_yaml():
|
|
55
81
|
try:
|
|
@@ -133,6 +159,9 @@ def validate_manifest(m: dict) -> list[str]:
|
|
|
133
159
|
for f in REQUIRED_PRODUCER:
|
|
134
160
|
if f not in prod:
|
|
135
161
|
errors.append(f"missing required field: producer.{f}")
|
|
162
|
+
for fld in _bad_char_fields(m):
|
|
163
|
+
errors.append(f"{fld}: contains a control / non-portable character "
|
|
164
|
+
f"(C0/C1, U+007F, U+2028/U+2029, or U+FEFF) — not allowed in a PRML string field")
|
|
136
165
|
return errors
|
|
137
166
|
|
|
138
167
|
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "falsify"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.6"
|
|
8
8
|
description = "PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -15,6 +15,18 @@ REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
|
15
15
|
FALSIFY = REPO_ROOT / "falsify.py"
|
|
16
16
|
|
|
17
17
|
|
|
18
|
+
def _version() -> str:
|
|
19
|
+
"""The version under test, read from falsify.py — never hardcode it here."""
|
|
20
|
+
spec = importlib.util.spec_from_file_location("_falsify_ver", FALSIFY)
|
|
21
|
+
assert spec is not None and spec.loader is not None
|
|
22
|
+
module = importlib.util.module_from_spec(spec)
|
|
23
|
+
spec.loader.exec_module(module)
|
|
24
|
+
return module.__version__
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
VERSION = _version()
|
|
28
|
+
|
|
29
|
+
|
|
18
30
|
def _run(args: list[str], cwd: Path) -> subprocess.CompletedProcess:
|
|
19
31
|
return subprocess.run(
|
|
20
32
|
[sys.executable, str(FALSIFY), *args],
|
|
@@ -35,14 +47,14 @@ class VersionTests(unittest.TestCase):
|
|
|
35
47
|
def test_version_subcommand_prints_version(self) -> None:
|
|
36
48
|
result = _run(["version"], cwd=self.cwd)
|
|
37
49
|
self.assertEqual(result.returncode, 0, msg=result.stderr)
|
|
38
|
-
self.assertIn(
|
|
50
|
+
self.assertIn(VERSION, result.stdout)
|
|
39
51
|
self.assertIn("falsify", result.stdout)
|
|
40
52
|
|
|
41
53
|
def test_version_flag_prints_version(self) -> None:
|
|
42
54
|
result = _run(["--version"], cwd=self.cwd)
|
|
43
55
|
self.assertEqual(result.returncode, 0, msg=result.stderr)
|
|
44
56
|
# argparse's `action='version'` writes to stdout on Python 3.11+.
|
|
45
|
-
self.assertIn(
|
|
57
|
+
self.assertIn(VERSION, result.stdout)
|
|
46
58
|
|
|
47
59
|
def test_version_subcommand_json_mode(self) -> None:
|
|
48
60
|
result = _run(["version", "--json"], cwd=self.cwd)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|