falsify 0.3.4__tar.gz → 0.3.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. {falsify-0.3.4/falsify.egg-info → falsify-0.3.6}/PKG-INFO +3 -3
  2. {falsify-0.3.4 → falsify-0.3.6}/README.md +2 -2
  3. {falsify-0.3.4 → falsify-0.3.6/falsify.egg-info}/PKG-INFO +3 -3
  4. {falsify-0.3.4 → falsify-0.3.6}/falsify.py +1 -1
  5. {falsify-0.3.4 → falsify-0.3.6}/falsify_prml.py +31 -2
  6. {falsify-0.3.4 → falsify-0.3.6}/pyproject.toml +1 -1
  7. {falsify-0.3.4 → falsify-0.3.6}/tests/test_version.py +14 -2
  8. {falsify-0.3.4 → falsify-0.3.6}/LICENSE +0 -0
  9. {falsify-0.3.4 → falsify-0.3.6}/NOTICE +0 -0
  10. {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/SOURCES.txt +0 -0
  11. {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/dependency_links.txt +0 -0
  12. {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/entry_points.txt +0 -0
  13. {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/requires.txt +0 -0
  14. {falsify-0.3.4 → falsify-0.3.6}/falsify.egg-info/top_level.txt +0 -0
  15. {falsify-0.3.4 → falsify-0.3.6}/mcp_server/__init__.py +0 -0
  16. {falsify-0.3.4 → falsify-0.3.6}/mcp_server/__main__.py +0 -0
  17. {falsify-0.3.4 → falsify-0.3.6}/mcp_server/server.py +0 -0
  18. {falsify-0.3.4 → falsify-0.3.6}/setup.cfg +0 -0
  19. {falsify-0.3.4 → falsify-0.3.6}/tests/test_adversarial_doc.py +0 -0
  20. {falsify-0.3.4 → falsify-0.3.6}/tests/test_agent_claim_auditor.py +0 -0
  21. {falsify-0.3.4 → falsify-0.3.6}/tests/test_agent_verdict_refresher.py +0 -0
  22. {falsify-0.3.4 → falsify-0.3.6}/tests/test_architecture.py +0 -0
  23. {falsify-0.3.4 → falsify-0.3.6}/tests/test_bench.py +0 -0
  24. {falsify-0.3.4 → falsify-0.3.6}/tests/test_calibration_sample.py +0 -0
  25. {falsify-0.3.4 → falsify-0.3.6}/tests/test_case_studies_doc.py +0 -0
  26. {falsify-0.3.4 → falsify-0.3.6}/tests/test_changelog.py +0 -0
  27. {falsify-0.3.4 → falsify-0.3.6}/tests/test_ci_workflow.py +0 -0
  28. {falsify-0.3.4 → falsify-0.3.6}/tests/test_claude_md.py +0 -0
  29. {falsify-0.3.4 → falsify-0.3.6}/tests/test_code_of_conduct.py +0 -0
  30. {falsify-0.3.4 → falsify-0.3.6}/tests/test_comparison_doc.py +0 -0
  31. {falsify-0.3.4 → falsify-0.3.6}/tests/test_contributing.py +0 -0
  32. {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_script.py +0 -0
  33. {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_script_doc.py +0 -0
  34. {falsify-0.3.4 → falsify-0.3.6}/tests/test_demo_shot_list.py +0 -0
  35. {falsify-0.3.4 → falsify-0.3.6}/tests/test_diff.py +0 -0
  36. {falsify-0.3.4 → falsify-0.3.6}/tests/test_docker.py +0 -0
  37. {falsify-0.3.4 → falsify-0.3.6}/tests/test_doctor.py +0 -0
  38. {falsify-0.3.4 → falsify-0.3.6}/tests/test_editorconfig.py +0 -0
  39. {falsify-0.3.4 → falsify-0.3.6}/tests/test_examples_doc.py +0 -0
  40. {falsify-0.3.4 → falsify-0.3.6}/tests/test_export.py +0 -0
  41. {falsify-0.3.4 → falsify-0.3.6}/tests/test_faq.py +0 -0
  42. {falsify-0.3.4 → falsify-0.3.6}/tests/test_github_repo_maturity.py +0 -0
  43. {falsify-0.3.4 → falsify-0.3.6}/tests/test_github_templates.py +0 -0
  44. {falsify-0.3.4 → falsify-0.3.6}/tests/test_gitignore.py +0 -0
  45. {falsify-0.3.4 → falsify-0.3.6}/tests/test_glossary_doc.py +0 -0
  46. {falsify-0.3.4 → falsify-0.3.6}/tests/test_guard.py +0 -0
  47. {falsify-0.3.4 → falsify-0.3.6}/tests/test_hook_install.py +0 -0
  48. {falsify-0.3.4 → falsify-0.3.6}/tests/test_init.py +0 -0
  49. {falsify-0.3.4 → falsify-0.3.6}/tests/test_init_templates.py +0 -0
  50. {falsify-0.3.4 → falsify-0.3.6}/tests/test_integration_e2e.py +0 -0
  51. {falsify-0.3.4 → falsify-0.3.6}/tests/test_list.py +0 -0
  52. {falsify-0.3.4 → falsify-0.3.6}/tests/test_lock.py +0 -0
  53. {falsify-0.3.4 → falsify-0.3.6}/tests/test_makefile.py +0 -0
  54. {falsify-0.3.4 → falsify-0.3.6}/tests/test_managed_agents.py +0 -0
  55. {falsify-0.3.4 → falsify-0.3.6}/tests/test_mcp.py +0 -0
  56. {falsify-0.3.4 → falsify-0.3.6}/tests/test_mcp_server.py +0 -0
  57. {falsify-0.3.4 → falsify-0.3.6}/tests/test_pitch.py +0 -0
  58. {falsify-0.3.4 → falsify-0.3.6}/tests/test_pre_commit.py +0 -0
  59. {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_cli.py +0 -0
  60. {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_v02_candidates.py +0 -0
  61. {falsify-0.3.4 → falsify-0.3.6}/tests/test_prml_vectors.py +0 -0
  62. {falsify-0.3.4 → falsify-0.3.6}/tests/test_pyproject.py +0 -0
  63. {falsify-0.3.4 → falsify-0.3.6}/tests/test_readme.py +0 -0
  64. {falsify-0.3.4 → falsify-0.3.6}/tests/test_release_check.py +0 -0
  65. {falsify-0.3.4 → falsify-0.3.6}/tests/test_release_workflow.py +0 -0
  66. {falsify-0.3.4 → falsify-0.3.6}/tests/test_replay.py +0 -0
  67. {falsify-0.3.4 → falsify-0.3.6}/tests/test_roadmap.py +0 -0
  68. {falsify-0.3.4 → falsify-0.3.6}/tests/test_run.py +0 -0
  69. {falsify-0.3.4 → falsify-0.3.6}/tests/test_score.py +0 -0
  70. {falsify-0.3.4 → falsify-0.3.6}/tests/test_self_dogfood.py +0 -0
  71. {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_author.py +0 -0
  72. {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_ci_doctor.py +0 -0
  73. {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_claim_audit.py +0 -0
  74. {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_claim_review.py +0 -0
  75. {falsify-0.3.4 → falsify-0.3.6}/tests/test_skill_falsify.py +0 -0
  76. {falsify-0.3.4 → falsify-0.3.6}/tests/test_slash_commands.py +0 -0
  77. {falsify-0.3.4 → falsify-0.3.6}/tests/test_stats.py +0 -0
  78. {falsify-0.3.4 → falsify-0.3.6}/tests/test_stats_html.py +0 -0
  79. {falsify-0.3.4 → falsify-0.3.6}/tests/test_submission.py +0 -0
  80. {falsify-0.3.4 → falsify-0.3.6}/tests/test_submission_md.py +0 -0
  81. {falsify-0.3.4 → falsify-0.3.6}/tests/test_trend.py +0 -0
  82. {falsify-0.3.4 → falsify-0.3.6}/tests/test_tutorial.py +0 -0
  83. {falsify-0.3.4 → falsify-0.3.6}/tests/test_verdict.py +0 -0
  84. {falsify-0.3.4 → falsify-0.3.6}/tests/test_verify.py +0 -0
  85. {falsify-0.3.4 → falsify-0.3.6}/tests/test_why.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: falsify
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
5
5
  Author: Cüneyt Öztürk
6
6
  License: MIT
@@ -75,7 +75,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
75
75
 
76
76
  **Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
77
77
 
78
- **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v1`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
78
+ **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
79
79
 
80
80
  **Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
81
81
 
@@ -128,7 +128,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
128
128
 
129
129
  ---
130
130
 
131
- **Current version:** falsify 0.3.4 (PRML CLI) · falsify-engine 0.3.4 — `falsify --version`.
131
+ **Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
132
132
  **Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
133
133
 
134
134
  ---
@@ -43,7 +43,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
43
43
 
44
44
  **Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
45
45
 
46
- **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v1`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
46
+ **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
47
47
 
48
48
  **Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
49
49
 
@@ -96,7 +96,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
96
96
 
97
97
  ---
98
98
 
99
- **Current version:** falsify 0.3.4 (PRML CLI) · falsify-engine 0.3.4 — `falsify --version`.
99
+ **Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
100
100
  **Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
101
101
 
102
102
  ---
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: falsify
3
- Version: 0.3.4
3
+ Version: 0.3.6
4
4
  Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
5
5
  Author: Cüneyt Öztürk
6
6
  License: MIT
@@ -75,7 +75,7 @@ No install? Verify any manifest in-browser at [registry.falsify.dev](https://reg
75
75
 
76
76
  **Try it without installing:** [`registry.falsify.dev`](https://registry.falsify.dev) — paste a PRML manifest, get a SHA-256 permalink and a README badge. No account, no server-side state beyond the hash.
77
77
 
78
- **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v1`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
78
+ **Add it to your CI in five lines:** [`studio-11-co/prml-verify-action@v2`](https://github.com/studio-11-co/prml-verify-action) — composite GitHub Action wrapping the falsify CLI ([listed on the GitHub Marketplace](https://github.com/marketplace/actions/prml-verify)). Block merges on tampered or regressed eval claims. Optional public registry anchor.
79
79
 
80
80
  **Already on MLflow?** [`pip install mlflow-falsify`](https://pypi.org/project/mlflow-falsify/) — discoverable plugin that tags every MLflow run with the PRML manifest hash, version, metric, comparator, threshold, and dataset id. Zero code changes to your existing MLflow workflow. Source: [`studio-11-co/mlflow-falsify`](https://github.com/studio-11-co/mlflow-falsify).
81
81
 
@@ -128,7 +128,7 @@ See [docs/CASE_STUDIES.md](docs/CASE_STUDIES.md) for three concrete adoption sto
128
128
 
129
129
  ---
130
130
 
131
- **Current version:** falsify 0.3.4 (PRML CLI) · falsify-engine 0.3.4 — `falsify --version`.
131
+ **Current version:** falsify 0.3.5 (PRML CLI) · falsify-engine 0.3.5 — `falsify --version`.
132
132
  **Working with Claude Code?** See [CLAUDE.md](CLAUDE.md).
133
133
 
134
134
  ---
@@ -25,7 +25,7 @@ from typing import Any, Callable
25
25
 
26
26
  import yaml
27
27
 
28
- __version__ = "0.3.4"
28
+ __version__ = "0.3.6"
29
29
 
30
30
  EXIT_PASS = 0
31
31
  EXIT_FAIL = 10
@@ -8,7 +8,7 @@ anyone; edit the manifest after locking and the hash no longer matches.
8
8
  Canonicalisation (PRML v0.1 §4): keys recursively sorted, block style, LF,
9
9
  trailing whitespace stripped, exactly one trailing newline, UTF-8. This is the
10
10
  same rule the Go / JS / Rust reference implementations use; all four produce
11
- byte-identical canonical bytes on the 20 published conformance vectors.
11
+ byte-identical canonical bytes on the 21 published conformance vectors.
12
12
 
13
13
  Commands:
14
14
  falsify lock <spec.yaml|spec.json> canonicalize, hash, write sidecar
@@ -33,7 +33,7 @@ import os
33
33
  import re
34
34
  import sys
35
35
 
36
- __version__ = "0.3.4"
36
+ __version__ = "0.3.6"
37
37
 
38
38
  EXIT_PASS = 0
39
39
  EXIT_BAD = 2
@@ -50,6 +50,32 @@ REQUIRED_PRODUCER = ["id"]
50
50
  VALID_COMPARATORS = {">=", "<=", ">", "<", "=="}
51
51
  _HEX64 = re.compile(r"^[0-9a-f]{64}$")
52
52
 
53
+ # Characters that break canonical-byte portability across the reference impls:
54
+ # C0/C1 control chars (incl. U+0085 NEL, which PyYAML does not round-trip),
55
+ # U+007F DEL, the Unicode line/paragraph separators U+2028/U+2029, and U+FEFF
56
+ # (BOM / zero-width no-break space). These have no legitimate place in a PRML
57
+ # string field (metric, ids, etc.); a manifest containing them would canonicalize
58
+ # to different bytes — or fail to round-trip — across Python/JS/Go/Rust, so it is
59
+ # rejected at validation rather than silently producing a non-portable hash.
60
+ # Rejecting them is additive: no conformance vector contains these, so no valid
61
+ # manifest's hash changes. Printable Unicode (emoji, CJK, accents) is unaffected.
62
+ _FORBIDDEN_CHARS = re.compile(r"[\x00-\x1f\x7f-\x9f

]")
63
+
64
+
65
+ def _bad_char_fields(obj, path="") -> list[str]:
66
+ """Return field paths whose string value contains a portability-breaking char."""
67
+ out = []
68
+ if isinstance(obj, str):
69
+ if _FORBIDDEN_CHARS.search(obj):
70
+ out.append(path or "(value)")
71
+ elif isinstance(obj, dict):
72
+ for k, v in obj.items():
73
+ out.extend(_bad_char_fields(v, f"{path}.{k}" if path else str(k)))
74
+ elif isinstance(obj, (list, tuple)):
75
+ for i, v in enumerate(obj):
76
+ out.extend(_bad_char_fields(v, f"{path}[{i}]"))
77
+ return out
78
+
53
79
 
54
80
  def _require_yaml():
55
81
  try:
@@ -133,6 +159,9 @@ def validate_manifest(m: dict) -> list[str]:
133
159
  for f in REQUIRED_PRODUCER:
134
160
  if f not in prod:
135
161
  errors.append(f"missing required field: producer.{f}")
162
+ for fld in _bad_char_fields(m):
163
+ errors.append(f"{fld}: contains a control / non-portable character "
164
+ f"(C0/C1, U+007F, U+2028/U+2029, or U+FEFF) — not allowed in a PRML string field")
136
165
  return errors
137
166
 
138
167
 
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "falsify"
7
- version = "0.3.4"
7
+ version = "0.3.6"
8
8
  description = "PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED."
9
9
  readme = "README.md"
10
10
  license = { text = "MIT" }
@@ -15,6 +15,18 @@ REPO_ROOT = Path(__file__).resolve().parent.parent
15
15
  FALSIFY = REPO_ROOT / "falsify.py"
16
16
 
17
17
 
18
+ def _version() -> str:
19
+ """The version under test, read from falsify.py — never hardcode it here."""
20
+ spec = importlib.util.spec_from_file_location("_falsify_ver", FALSIFY)
21
+ assert spec is not None and spec.loader is not None
22
+ module = importlib.util.module_from_spec(spec)
23
+ spec.loader.exec_module(module)
24
+ return module.__version__
25
+
26
+
27
+ VERSION = _version()
28
+
29
+
18
30
  def _run(args: list[str], cwd: Path) -> subprocess.CompletedProcess:
19
31
  return subprocess.run(
20
32
  [sys.executable, str(FALSIFY), *args],
@@ -35,14 +47,14 @@ class VersionTests(unittest.TestCase):
35
47
  def test_version_subcommand_prints_version(self) -> None:
36
48
  result = _run(["version"], cwd=self.cwd)
37
49
  self.assertEqual(result.returncode, 0, msg=result.stderr)
38
- self.assertIn("0.3.4", result.stdout)
50
+ self.assertIn(VERSION, result.stdout)
39
51
  self.assertIn("falsify", result.stdout)
40
52
 
41
53
  def test_version_flag_prints_version(self) -> None:
42
54
  result = _run(["--version"], cwd=self.cwd)
43
55
  self.assertEqual(result.returncode, 0, msg=result.stderr)
44
56
  # argparse's `action='version'` writes to stdout on Python 3.11+.
45
- self.assertIn("0.3.4", result.stdout)
57
+ self.assertIn(VERSION, result.stdout)
46
58
 
47
59
  def test_version_subcommand_json_mode(self) -> None:
48
60
  result = _run(["version", "--json"], cwd=self.cwd)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes