falsify 0.3.1__tar.gz → 0.3.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {falsify-0.3.1/falsify.egg-info → falsify-0.3.2}/PKG-INFO +10 -10
- {falsify-0.3.1 → falsify-0.3.2}/README.md +9 -9
- {falsify-0.3.1 → falsify-0.3.2/falsify.egg-info}/PKG-INFO +10 -10
- {falsify-0.3.1 → falsify-0.3.2}/falsify.py +1 -1
- {falsify-0.3.1 → falsify-0.3.2}/falsify_prml.py +1 -1
- {falsify-0.3.1 → falsify-0.3.2}/pyproject.toml +1 -1
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_prml_vectors.py +12 -6
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_version.py +2 -2
- {falsify-0.3.1 → falsify-0.3.2}/LICENSE +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/NOTICE +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/falsify.egg-info/SOURCES.txt +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/falsify.egg-info/dependency_links.txt +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/falsify.egg-info/entry_points.txt +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/falsify.egg-info/requires.txt +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/falsify.egg-info/top_level.txt +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/mcp_server/__init__.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/mcp_server/__main__.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/mcp_server/server.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/setup.cfg +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_adversarial_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_agent_claim_auditor.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_agent_verdict_refresher.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_architecture.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_bench.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_calibration_sample.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_case_studies_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_changelog.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_ci_workflow.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_claude_md.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_code_of_conduct.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_comparison_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_contributing.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_demo_script.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_demo_script_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_demo_shot_list.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_diff.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_docker.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_doctor.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_editorconfig.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_examples_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_export.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_faq.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_github_repo_maturity.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_github_templates.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_gitignore.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_glossary_doc.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_guard.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_hook_install.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_init.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_init_templates.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_integration_e2e.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_list.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_lock.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_makefile.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_managed_agents.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_mcp.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_mcp_server.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_pitch.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_pre_commit.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_prml_cli.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_prml_v02_candidates.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_pyproject.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_readme.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_release_check.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_release_workflow.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_replay.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_roadmap.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_run.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_score.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_self_dogfood.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_skill_author.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_skill_ci_doctor.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_skill_claim_audit.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_skill_claim_review.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_skill_falsify.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_slash_commands.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_stats.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_stats_html.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_submission.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_submission_md.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_trend.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_tutorial.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_verdict.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_verify.py +0 -0
- {falsify-0.3.1 → falsify-0.3.2}/tests/test_why.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: falsify
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
|
|
5
5
|
Author: Cüneyt Öztürk
|
|
6
6
|
License: MIT
|
|
@@ -52,14 +52,14 @@ TAMPERED (exit 3)
|
|
|
52
52
|
|
|
53
53
|
No install? Verify any manifest in-browser at [registry.falsify.dev](https://registry.falsify.dev). Byte-equivalent reference CLIs also ship for JS (`npm i -g falsify-js`), Go, and Rust.
|
|
54
54
|
|
|
55
|
-
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all
|
|
55
|
+
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all 21 conformance vectors (13 v0.1 stable + 8 v0.2). PRML v0.2 frozen 2026-05-22. The same day, Lock #2 (a public hypothesis on the spec's own distribution, target ≥3 external contributors in 14 days) resolved at 0/3. The mechanism worked, the post-mortem is at [falsify.dev/notes/lock-2-postmortem](https://falsify.dev/notes/lock-2-postmortem/). Designed for ML eval rigor. Maps to EU AI Act Article 12 evidence as a side effect.
|
|
56
56
|
|
|
57
57
|
> **Pre-registration + CI for AI-agent claims.** Lock the claim and threshold with SHA-256 *before* running the experiment — or the result doesn't count.
|
|
58
58
|
|
|
59
59
|

|
|
60
60
|

|
|
61
61
|

|
|
62
|
-

|
|
63
63
|
-brightgreen)
|
|
64
64
|

|
|
65
65
|

|
|
@@ -148,12 +148,12 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
148
148
|
|
|
149
149
|
**Audit & compliance crosswalks** (subcategory-by-subcategory maps from major AI governance frameworks to PRML fields, FULL/PARTIAL/NONE tagged):
|
|
150
150
|
|
|
151
|
-
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2
|
|
151
|
+
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2 December 2027 high-risk applicability deadline
|
|
152
152
|
- **[Article 12 readiness diagnostic](https://spec.falsify.dev/article-12-readiness/)** — 10-question browser-only self-assessment
|
|
153
153
|
- **[NIST AI RMF 1.0 crosswalk](https://spec.falsify.dev/nist-ai-rmf/)** — GOVERN / MAP / MEASURE / MANAGE subcategory map (incl. AI 600-1 GenAI Profile)
|
|
154
154
|
- **[ISO/IEC 42001:2023 crosswalk](https://spec.falsify.dev/iso-42001/)** — AIMS clause-by-clause evidence map (Clauses 7-9 + Annex A controls)
|
|
155
155
|
|
|
156
|
-
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2
|
|
156
|
+
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2 December 2027 deadline; CC BY 4.0):
|
|
157
157
|
|
|
158
158
|
- **[EU AI Act readiness assessment](https://falsify.dev/eu-ai-act-readiness/)** — six binding articles, ten-question gap check, evidence shape per obligation
|
|
159
159
|
- **[2 August 2026 deadline](https://falsify.dev/ai-act-deadline-august-2026/)** — three application dates, Article 99 penalty structure, ten-week plan
|
|
@@ -162,7 +162,7 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
162
162
|
- **[ISO/IEC 42001 readiness](https://falsify.dev/iso-42001-readiness/)** — seven clauses, EU AI Act Article 17 overlap, twelve-month certification path
|
|
163
163
|
- **[Lock #2 post-mortem](https://falsify.dev/notes/lock-2-postmortem/)** — field report on running a falsifiable spec in public
|
|
164
164
|
|
|
165
|
-
**Reference implementations** (four languages,
|
|
165
|
+
**Reference implementations** (four languages, 13 v0.1 + 8 v0.2 candidate vectors = 21 total; multi-lang CI runs all 21 byte-for-byte per push and daily at 04:00 UTC):
|
|
166
166
|
|
|
167
167
|
- **Python:** [`falsify.py`](falsify.py) — original reference, uses PyYAML
|
|
168
168
|
- **Node.js:** [`impl/js/`](impl/js/) — second reference, ~400 LOC, hand-rolled, zero deps
|
|
@@ -173,7 +173,7 @@ Hosted spec at [spec.falsify.dev/v0.1](https://spec.falsify.dev/v0.1). Public re
|
|
|
173
173
|
|
|
174
174
|
**Companion projects** (separate repos under `studio-11-co`, each MIT or CC0 licensed):
|
|
175
175
|
|
|
176
|
-
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec:
|
|
176
|
+
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec: 13 patterns + 4 anti-patterns, every one a single page with a runnable example, including [Pattern 11: PRML + Sigstore for execution integrity](https://github.com/studio-11-co/falsify-cookbook/blob/main/patterns/11-sigstore-execution.md) closing the §8.1 gap. CC0.
|
|
177
177
|
- **[`falsify-integrity-index`](https://github.com/studio-11-co/falsify-integrity-index)** — public scorecard of how 25+ well-known ML eval claims meet the 9 PRML falsifiability criteria. Live at [falsify.dev/integrity](https://falsify.dev/integrity). CC0 data, MIT tooling.
|
|
178
178
|
- **[`falsify-inspect`](https://github.com/studio-11-co/falsify-inspect)** — Inspect AI adapter: anchor an Inspect AI eval claim's threshold to a SHA-256 hash before the run, verify the post-run log against it. MIT.
|
|
179
179
|
- **[`prml-verify-action`](https://github.com/studio-11-co/prml-verify-action)** — composite GitHub Action ([listed on Marketplace](https://github.com/marketplace/actions/prml-verify)) for CI integration. MIT.
|
|
@@ -504,9 +504,9 @@ ln -sf "$(pwd)/hooks/commit-msg" .git/hooks/commit-msg
|
|
|
504
504
|
## Repository layout
|
|
505
505
|
|
|
506
506
|
- `falsify.py` — single-file Python CLI, stdlib + pyyaml only.
|
|
507
|
-
- `impl/js/falsify.js` — Node.js second reference implementation (
|
|
508
|
-
- `impl/go/falsify.go` — Go third reference implementation (
|
|
509
|
-
- `impl/rust/` — Rust fourth reference implementation (
|
|
507
|
+
- `impl/js/falsify.js` — Node.js second reference implementation (13/13 v0.1 + 8/8 v0.2 = 21/21 vectors). Also published to npm as [`falsify-js`](https://www.npmjs.com/package/falsify-js).
|
|
508
|
+
- `impl/go/falsify.go` — Go third reference implementation (21/21 vectors).
|
|
509
|
+
- `impl/rust/` — Rust fourth reference implementation (21/21 vectors).
|
|
510
510
|
- `spec/PRML-v0.1.md` + `spec/test-vectors/v0.1/` (12) + `spec/test-vectors/v0.2/` (8) — spec + conformance suite.
|
|
511
511
|
- `spec/analysis/` — positioning + canonicalization portability findings.
|
|
512
512
|
- `spec/compliance/` — EU AI Act mapping + compliance landing copy.
|
|
@@ -20,14 +20,14 @@ TAMPERED (exit 3)
|
|
|
20
20
|
|
|
21
21
|
No install? Verify any manifest in-browser at [registry.falsify.dev](https://registry.falsify.dev). Byte-equivalent reference CLIs also ship for JS (`npm i -g falsify-js`), Go, and Rust.
|
|
22
22
|
|
|
23
|
-
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all
|
|
23
|
+
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all 21 conformance vectors (13 v0.1 stable + 8 v0.2). PRML v0.2 frozen 2026-05-22. The same day, Lock #2 (a public hypothesis on the spec's own distribution, target ≥3 external contributors in 14 days) resolved at 0/3. The mechanism worked, the post-mortem is at [falsify.dev/notes/lock-2-postmortem](https://falsify.dev/notes/lock-2-postmortem/). Designed for ML eval rigor. Maps to EU AI Act Article 12 evidence as a side effect.
|
|
24
24
|
|
|
25
25
|
> **Pre-registration + CI for AI-agent claims.** Lock the claim and threshold with SHA-256 *before* running the experiment — or the result doesn't count.
|
|
26
26
|
|
|
27
27
|

|
|
28
28
|

|
|
29
29
|

|
|
30
|
-

|
|
31
31
|
-brightgreen)
|
|
32
32
|

|
|
33
33
|

|
|
@@ -116,12 +116,12 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
116
116
|
|
|
117
117
|
**Audit & compliance crosswalks** (subcategory-by-subcategory maps from major AI governance frameworks to PRML fields, FULL/PARTIAL/NONE tagged):
|
|
118
118
|
|
|
119
|
-
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2
|
|
119
|
+
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2 December 2027 high-risk applicability deadline
|
|
120
120
|
- **[Article 12 readiness diagnostic](https://spec.falsify.dev/article-12-readiness/)** — 10-question browser-only self-assessment
|
|
121
121
|
- **[NIST AI RMF 1.0 crosswalk](https://spec.falsify.dev/nist-ai-rmf/)** — GOVERN / MAP / MEASURE / MANAGE subcategory map (incl. AI 600-1 GenAI Profile)
|
|
122
122
|
- **[ISO/IEC 42001:2023 crosswalk](https://spec.falsify.dev/iso-42001/)** — AIMS clause-by-clause evidence map (Clauses 7-9 + Annex A controls)
|
|
123
123
|
|
|
124
|
-
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2
|
|
124
|
+
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2 December 2027 deadline; CC BY 4.0):
|
|
125
125
|
|
|
126
126
|
- **[EU AI Act readiness assessment](https://falsify.dev/eu-ai-act-readiness/)** — six binding articles, ten-question gap check, evidence shape per obligation
|
|
127
127
|
- **[2 August 2026 deadline](https://falsify.dev/ai-act-deadline-august-2026/)** — three application dates, Article 99 penalty structure, ten-week plan
|
|
@@ -130,7 +130,7 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
130
130
|
- **[ISO/IEC 42001 readiness](https://falsify.dev/iso-42001-readiness/)** — seven clauses, EU AI Act Article 17 overlap, twelve-month certification path
|
|
131
131
|
- **[Lock #2 post-mortem](https://falsify.dev/notes/lock-2-postmortem/)** — field report on running a falsifiable spec in public
|
|
132
132
|
|
|
133
|
-
**Reference implementations** (four languages,
|
|
133
|
+
**Reference implementations** (four languages, 13 v0.1 + 8 v0.2 candidate vectors = 21 total; multi-lang CI runs all 21 byte-for-byte per push and daily at 04:00 UTC):
|
|
134
134
|
|
|
135
135
|
- **Python:** [`falsify.py`](falsify.py) — original reference, uses PyYAML
|
|
136
136
|
- **Node.js:** [`impl/js/`](impl/js/) — second reference, ~400 LOC, hand-rolled, zero deps
|
|
@@ -141,7 +141,7 @@ Hosted spec at [spec.falsify.dev/v0.1](https://spec.falsify.dev/v0.1). Public re
|
|
|
141
141
|
|
|
142
142
|
**Companion projects** (separate repos under `studio-11-co`, each MIT or CC0 licensed):
|
|
143
143
|
|
|
144
|
-
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec:
|
|
144
|
+
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec: 13 patterns + 4 anti-patterns, every one a single page with a runnable example, including [Pattern 11: PRML + Sigstore for execution integrity](https://github.com/studio-11-co/falsify-cookbook/blob/main/patterns/11-sigstore-execution.md) closing the §8.1 gap. CC0.
|
|
145
145
|
- **[`falsify-integrity-index`](https://github.com/studio-11-co/falsify-integrity-index)** — public scorecard of how 25+ well-known ML eval claims meet the 9 PRML falsifiability criteria. Live at [falsify.dev/integrity](https://falsify.dev/integrity). CC0 data, MIT tooling.
|
|
146
146
|
- **[`falsify-inspect`](https://github.com/studio-11-co/falsify-inspect)** — Inspect AI adapter: anchor an Inspect AI eval claim's threshold to a SHA-256 hash before the run, verify the post-run log against it. MIT.
|
|
147
147
|
- **[`prml-verify-action`](https://github.com/studio-11-co/prml-verify-action)** — composite GitHub Action ([listed on Marketplace](https://github.com/marketplace/actions/prml-verify)) for CI integration. MIT.
|
|
@@ -472,9 +472,9 @@ ln -sf "$(pwd)/hooks/commit-msg" .git/hooks/commit-msg
|
|
|
472
472
|
## Repository layout
|
|
473
473
|
|
|
474
474
|
- `falsify.py` — single-file Python CLI, stdlib + pyyaml only.
|
|
475
|
-
- `impl/js/falsify.js` — Node.js second reference implementation (
|
|
476
|
-
- `impl/go/falsify.go` — Go third reference implementation (
|
|
477
|
-
- `impl/rust/` — Rust fourth reference implementation (
|
|
475
|
+
- `impl/js/falsify.js` — Node.js second reference implementation (13/13 v0.1 + 8/8 v0.2 = 21/21 vectors). Also published to npm as [`falsify-js`](https://www.npmjs.com/package/falsify-js).
|
|
476
|
+
- `impl/go/falsify.go` — Go third reference implementation (21/21 vectors).
|
|
477
|
+
- `impl/rust/` — Rust fourth reference implementation (21/21 vectors).
|
|
478
478
|
- `spec/PRML-v0.1.md` + `spec/test-vectors/v0.1/` (12) + `spec/test-vectors/v0.2/` (8) — spec + conformance suite.
|
|
479
479
|
- `spec/analysis/` — positioning + canonicalization portability findings.
|
|
480
480
|
- `spec/compliance/` — EU AI Act mapping + compliance landing copy.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: falsify
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.2
|
|
4
4
|
Summary: PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED.
|
|
5
5
|
Author: Cüneyt Öztürk
|
|
6
6
|
License: MIT
|
|
@@ -52,14 +52,14 @@ TAMPERED (exit 3)
|
|
|
52
52
|
|
|
53
53
|
No install? Verify any manifest in-browser at [registry.falsify.dev](https://registry.falsify.dev). Byte-equivalent reference CLIs also ship for JS (`npm i -g falsify-js`), Go, and Rust.
|
|
54
54
|
|
|
55
|
-
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all
|
|
55
|
+
4 reference implementations (Python, JavaScript, Go, Rust) byte-equivalent on all 21 conformance vectors (13 v0.1 stable + 8 v0.2). PRML v0.2 frozen 2026-05-22. The same day, Lock #2 (a public hypothesis on the spec's own distribution, target ≥3 external contributors in 14 days) resolved at 0/3. The mechanism worked, the post-mortem is at [falsify.dev/notes/lock-2-postmortem](https://falsify.dev/notes/lock-2-postmortem/). Designed for ML eval rigor. Maps to EU AI Act Article 12 evidence as a side effect.
|
|
56
56
|
|
|
57
57
|
> **Pre-registration + CI for AI-agent claims.** Lock the claim and threshold with SHA-256 *before* running the experiment — or the result doesn't count.
|
|
58
58
|
|
|
59
59
|

|
|
60
60
|

|
|
61
61
|

|
|
62
|
-

|
|
63
63
|
-brightgreen)
|
|
64
64
|

|
|
65
65
|

|
|
@@ -148,12 +148,12 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
148
148
|
|
|
149
149
|
**Audit & compliance crosswalks** (subcategory-by-subcategory maps from major AI governance frameworks to PRML fields, FULL/PARTIAL/NONE tagged):
|
|
150
150
|
|
|
151
|
-
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2
|
|
151
|
+
- **[EU AI Act Article 12](https://spec.falsify.dev/eu-ai-act/article-12/)** — code-level pattern for the 2 December 2027 high-risk applicability deadline
|
|
152
152
|
- **[Article 12 readiness diagnostic](https://spec.falsify.dev/article-12-readiness/)** — 10-question browser-only self-assessment
|
|
153
153
|
- **[NIST AI RMF 1.0 crosswalk](https://spec.falsify.dev/nist-ai-rmf/)** — GOVERN / MAP / MEASURE / MANAGE subcategory map (incl. AI 600-1 GenAI Profile)
|
|
154
154
|
- **[ISO/IEC 42001:2023 crosswalk](https://spec.falsify.dev/iso-42001/)** — AIMS clause-by-clause evidence map (Clauses 7-9 + Annex A controls)
|
|
155
155
|
|
|
156
|
-
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2
|
|
156
|
+
**Long-form working notes** (2026-05-23, written for compliance leads, AI governance officers, and notified body assessors preparing for the 2 December 2027 deadline; CC BY 4.0):
|
|
157
157
|
|
|
158
158
|
- **[EU AI Act readiness assessment](https://falsify.dev/eu-ai-act-readiness/)** — six binding articles, ten-question gap check, evidence shape per obligation
|
|
159
159
|
- **[2 August 2026 deadline](https://falsify.dev/ai-act-deadline-august-2026/)** — three application dates, Article 99 penalty structure, ten-week plan
|
|
@@ -162,7 +162,7 @@ This repository is the home of **PRML v0.1** — Pre-Registered ML Manifest Spec
|
|
|
162
162
|
- **[ISO/IEC 42001 readiness](https://falsify.dev/iso-42001-readiness/)** — seven clauses, EU AI Act Article 17 overlap, twelve-month certification path
|
|
163
163
|
- **[Lock #2 post-mortem](https://falsify.dev/notes/lock-2-postmortem/)** — field report on running a falsifiable spec in public
|
|
164
164
|
|
|
165
|
-
**Reference implementations** (four languages,
|
|
165
|
+
**Reference implementations** (four languages, 13 v0.1 + 8 v0.2 candidate vectors = 21 total; multi-lang CI runs all 21 byte-for-byte per push and daily at 04:00 UTC):
|
|
166
166
|
|
|
167
167
|
- **Python:** [`falsify.py`](falsify.py) — original reference, uses PyYAML
|
|
168
168
|
- **Node.js:** [`impl/js/`](impl/js/) — second reference, ~400 LOC, hand-rolled, zero deps
|
|
@@ -173,7 +173,7 @@ Hosted spec at [spec.falsify.dev/v0.1](https://spec.falsify.dev/v0.1). Public re
|
|
|
173
173
|
|
|
174
174
|
**Companion projects** (separate repos under `studio-11-co`, each MIT or CC0 licensed):
|
|
175
175
|
|
|
176
|
-
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec:
|
|
176
|
+
- **[`falsify-cookbook`](https://github.com/studio-11-co/falsify-cookbook)** — field manual for the spec: 13 patterns + 4 anti-patterns, every one a single page with a runnable example, including [Pattern 11: PRML + Sigstore for execution integrity](https://github.com/studio-11-co/falsify-cookbook/blob/main/patterns/11-sigstore-execution.md) closing the §8.1 gap. CC0.
|
|
177
177
|
- **[`falsify-integrity-index`](https://github.com/studio-11-co/falsify-integrity-index)** — public scorecard of how 25+ well-known ML eval claims meet the 9 PRML falsifiability criteria. Live at [falsify.dev/integrity](https://falsify.dev/integrity). CC0 data, MIT tooling.
|
|
178
178
|
- **[`falsify-inspect`](https://github.com/studio-11-co/falsify-inspect)** — Inspect AI adapter: anchor an Inspect AI eval claim's threshold to a SHA-256 hash before the run, verify the post-run log against it. MIT.
|
|
179
179
|
- **[`prml-verify-action`](https://github.com/studio-11-co/prml-verify-action)** — composite GitHub Action ([listed on Marketplace](https://github.com/marketplace/actions/prml-verify)) for CI integration. MIT.
|
|
@@ -504,9 +504,9 @@ ln -sf "$(pwd)/hooks/commit-msg" .git/hooks/commit-msg
|
|
|
504
504
|
## Repository layout
|
|
505
505
|
|
|
506
506
|
- `falsify.py` — single-file Python CLI, stdlib + pyyaml only.
|
|
507
|
-
- `impl/js/falsify.js` — Node.js second reference implementation (
|
|
508
|
-
- `impl/go/falsify.go` — Go third reference implementation (
|
|
509
|
-
- `impl/rust/` — Rust fourth reference implementation (
|
|
507
|
+
- `impl/js/falsify.js` — Node.js second reference implementation (13/13 v0.1 + 8/8 v0.2 = 21/21 vectors). Also published to npm as [`falsify-js`](https://www.npmjs.com/package/falsify-js).
|
|
508
|
+
- `impl/go/falsify.go` — Go third reference implementation (21/21 vectors).
|
|
509
|
+
- `impl/rust/` — Rust fourth reference implementation (21/21 vectors).
|
|
510
510
|
- `spec/PRML-v0.1.md` + `spec/test-vectors/v0.1/` (12) + `spec/test-vectors/v0.2/` (8) — spec + conformance suite.
|
|
511
511
|
- `spec/analysis/` — positioning + canonicalization portability findings.
|
|
512
512
|
- `spec/compliance/` — EU AI Act mapping + compliance landing copy.
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "falsify"
|
|
7
|
-
version = "0.3.
|
|
7
|
+
version = "0.3.2"
|
|
8
8
|
description = "PRML reference CLI — pre-register an ML evaluation claim as a SHA-256 manifest; verify PASS/FAIL/TAMPERED."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = { text = "MIT" }
|
|
@@ -5,7 +5,7 @@ manifest, the canonical UTF-8 bytes the canonicalizer MUST produce, and
|
|
|
5
5
|
the SHA-256 hex digest of those bytes.
|
|
6
6
|
|
|
7
7
|
If any of these fail, either:
|
|
8
|
-
(a) the canonicalizer in
|
|
8
|
+
(a) the canonicalizer in falsify_prml.canonicalize has changed in a way
|
|
9
9
|
that breaks the v0.1 contract — bump to v0.2 instead, OR
|
|
10
10
|
(b) test vectors must be regenerated by running
|
|
11
11
|
`python3 spec/test-vectors/v0.1/generate.py`.
|
|
@@ -25,7 +25,11 @@ REPO_ROOT = Path(__file__).resolve().parent.parent
|
|
|
25
25
|
VECTORS_PATH = REPO_ROOT / "spec" / "test-vectors" / "v0.1" / "test-vectors.json"
|
|
26
26
|
|
|
27
27
|
sys.path.insert(0, str(REPO_ROOT))
|
|
28
|
-
|
|
28
|
+
# The PRML conformance suite verifies the PRML manifest canonicalizer — the
|
|
29
|
+
# `falsify` command (falsify_prml), NOT the falsify-engine workflow canonicalizer
|
|
30
|
+
# (falsify.py), which canonicalizes a different schema and does not apply the
|
|
31
|
+
# PRML v0.1 threshold-float coercion.
|
|
32
|
+
import falsify_prml # noqa: E402
|
|
29
33
|
|
|
30
34
|
|
|
31
35
|
def _load_vectors():
|
|
@@ -46,8 +50,10 @@ class PRMLVectorTests(unittest.TestCase):
|
|
|
46
50
|
raise unittest.SkipTest(f"test vectors not generated yet: {VECTORS_PATH}")
|
|
47
51
|
|
|
48
52
|
def test_vector_count(self):
|
|
49
|
-
"""v0.1 ships
|
|
50
|
-
|
|
53
|
+
"""v0.1 ships 13 test vectors (TV-001..TV-013). TV-013 locks the
|
|
54
|
+
integer-threshold->float coercion; it is a non-breaking, clarifying
|
|
55
|
+
addition that leaves every prior vector's hash unchanged."""
|
|
56
|
+
self.assertEqual(len(VECTORS), 13, f"Expected 13 vectors, got {len(VECTORS)}")
|
|
51
57
|
|
|
52
58
|
def test_key_order_invariance(self):
|
|
53
59
|
"""TV-001 and TV-002 differ only in input key insertion order. Hashes MUST match."""
|
|
@@ -83,7 +89,7 @@ class PRMLVectorTests(unittest.TestCase):
|
|
|
83
89
|
|
|
84
90
|
def _make_canonical_test(vector):
|
|
85
91
|
def test(self):
|
|
86
|
-
produced =
|
|
92
|
+
produced = falsify_prml.canonicalize(vector["input"])
|
|
87
93
|
self.assertEqual(
|
|
88
94
|
produced,
|
|
89
95
|
vector["canonical"],
|
|
@@ -99,7 +105,7 @@ def _make_canonical_test(vector):
|
|
|
99
105
|
|
|
100
106
|
def _make_hash_test(vector):
|
|
101
107
|
def test(self):
|
|
102
|
-
canonical =
|
|
108
|
+
canonical = falsify_prml.canonicalize(vector["input"])
|
|
103
109
|
produced_hash = hashlib.sha256(canonical.encode("utf-8")).hexdigest()
|
|
104
110
|
self.assertEqual(
|
|
105
111
|
produced_hash,
|
|
@@ -35,14 +35,14 @@ class VersionTests(unittest.TestCase):
|
|
|
35
35
|
def test_version_subcommand_prints_version(self) -> None:
|
|
36
36
|
result = _run(["version"], cwd=self.cwd)
|
|
37
37
|
self.assertEqual(result.returncode, 0, msg=result.stderr)
|
|
38
|
-
self.assertIn("0.3.
|
|
38
|
+
self.assertIn("0.3.2", result.stdout)
|
|
39
39
|
self.assertIn("falsify", result.stdout)
|
|
40
40
|
|
|
41
41
|
def test_version_flag_prints_version(self) -> None:
|
|
42
42
|
result = _run(["--version"], cwd=self.cwd)
|
|
43
43
|
self.assertEqual(result.returncode, 0, msg=result.stderr)
|
|
44
44
|
# argparse's `action='version'` writes to stdout on Python 3.11+.
|
|
45
|
-
self.assertIn("0.3.
|
|
45
|
+
self.assertIn("0.3.2", result.stdout)
|
|
46
46
|
|
|
47
47
|
def test_version_subcommand_json_mode(self) -> None:
|
|
48
48
|
result = _run(["version", "--json"], cwd=self.cwd)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|