agent-assure 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (427) hide show
  1. agent_assure-0.3.0/.gitignore +14 -0
  2. agent_assure-0.3.0/LICENSE +21 -0
  3. agent_assure-0.3.0/Makefile +51 -0
  4. agent_assure-0.3.0/PKG-INFO +376 -0
  5. agent_assure-0.3.0/README.md +329 -0
  6. agent_assure-0.3.0/docs/adr/0001-new-repository.md +3 -0
  7. agent_assure-0.3.0/docs/adr/0002-expectations-over-baseline.md +3 -0
  8. agent_assure-0.3.0/docs/adr/0003-fixture-vs-live-mode.md +5 -0
  9. agent_assure-0.3.0/docs/adr/0004-hashes-are-provenance.md +3 -0
  10. agent_assure-0.3.0/docs/adr/0005-otel-projection-not-duplication.md +4 -0
  11. agent_assure-0.3.0/docs/adr/0006-canonicalization-profile.md +4 -0
  12. agent_assure-0.3.0/docs/adr/0007-yaml-authoring-json-artifacts.md +3 -0
  13. agent_assure-0.3.0/docs/adr/0008-materiality-is-fixture-declared.md +4 -0
  14. agent_assure-0.3.0/docs/api_surface.md +31 -0
  15. agent_assure-0.3.0/docs/architecture.md +30 -0
  16. agent_assure-0.3.0/docs/assets/flagship_demo_transcript.txt +27 -0
  17. agent_assure-0.3.0/docs/claim_boundary.md +26 -0
  18. agent_assure-0.3.0/docs/claims_traceability_matrix.md +31 -0
  19. agent_assure-0.3.0/docs/claims_traceability_matrix.yaml +183 -0
  20. agent_assure-0.3.0/docs/cli_contract.md +243 -0
  21. agent_assure-0.3.0/docs/demo_expense.md +20 -0
  22. agent_assure-0.3.0/docs/demo_flagship.md +41 -0
  23. agent_assure-0.3.0/docs/dependency_locking.md +22 -0
  24. agent_assure-0.3.0/docs/digest_field_inventory.md +62 -0
  25. agent_assure-0.3.0/docs/documentation_alignment.md +14 -0
  26. agent_assure-0.3.0/docs/evidence_diff.md +29 -0
  27. agent_assure-0.3.0/docs/evidence_packets.md +47 -0
  28. agent_assure-0.3.0/docs/expectation_authoring.md +19 -0
  29. agent_assure-0.3.0/docs/fixture_mode.md +25 -0
  30. agent_assure-0.3.0/docs/for_ai_leaders.md +44 -0
  31. agent_assure-0.3.0/docs/for_engineers.md +84 -0
  32. agent_assure-0.3.0/docs/governance_crosswalk_iso42001.md +5 -0
  33. agent_assure-0.3.0/docs/index.md +29 -0
  34. agent_assure-0.3.0/docs/invariant_diffing.md +11 -0
  35. agent_assure-0.3.0/docs/limitations.md +153 -0
  36. agent_assure-0.3.0/docs/live_calibration.md +79 -0
  37. agent_assure-0.3.0/docs/live_mode_roadmap.md +73 -0
  38. agent_assure-0.3.0/docs/measurement/blind_review_release_evidence.md +48 -0
  39. agent_assure-0.3.0/docs/measurement/executive_one_pager.md +117 -0
  40. agent_assure-0.3.0/docs/measurement/experiment_protocol.md +583 -0
  41. agent_assure-0.3.0/docs/measurement/measurement_brief_abstract.md +27 -0
  42. agent_assure-0.3.0/docs/measurement/measurement_questions.md +7 -0
  43. agent_assure-0.3.0/docs/measurement/nist_agentic_measurement_use_case.md +191 -0
  44. agent_assure-0.3.0/docs/motivation.md +48 -0
  45. agent_assure-0.3.0/docs/otel_alignment.md +61 -0
  46. agent_assure-0.3.0/docs/posts/output_equivalence_is_not_process_equivalence.md +58 -0
  47. agent_assure-0.3.0/docs/privacy_model.md +26 -0
  48. agent_assure-0.3.0/docs/provenance_and_hashing.md +23 -0
  49. agent_assure-0.3.0/docs/reason_code_registry.md +26 -0
  50. agent_assure-0.3.0/docs/release_evidence.md +138 -0
  51. agent_assure-0.3.0/docs/release_notes/v0.1.0.md +22 -0
  52. agent_assure-0.3.0/docs/release_notes/v0.2.0.md +64 -0
  53. agent_assure-0.3.0/docs/release_notes/v0.3.0.md +40 -0
  54. agent_assure-0.3.0/docs/release_pypi.md +275 -0
  55. agent_assure-0.3.0/docs/schema_evolution.md +132 -0
  56. agent_assure-0.3.0/docs/schema_reference.md +164 -0
  57. agent_assure-0.3.0/docs/showcase.md +139 -0
  58. agent_assure-0.3.0/docs/social/demo_video_script.md +21 -0
  59. agent_assure-0.3.0/docs/standards/freshness_checklist.md +43 -0
  60. agent_assure-0.3.0/docs/standards/otel_contribution_candidate.md +66 -0
  61. agent_assure-0.3.0/docs/standards/otel_genai_gap_analysis.md +121 -0
  62. agent_assure-0.3.0/docs/standards/upstream_issue_inventory.md +15 -0
  63. agent_assure-0.3.0/docs/threat_coverage_matrix.yaml +109 -0
  64. agent_assure-0.3.0/docs/threat_model.md +70 -0
  65. agent_assure-0.3.0/docs/what_this_measures.md +14 -0
  66. agent_assure-0.3.0/examples/expense_approval_minimal/README.md +16 -0
  67. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-001.json +8 -0
  68. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-002.json +8 -0
  69. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-003.json +8 -0
  70. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/requests/exp-001.json +6 -0
  71. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/requests/exp-002.json +6 -0
  72. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/requests/exp-003.json +6 -0
  73. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-001.json +15 -0
  74. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-002.json +15 -0
  75. agent_assure-0.3.0/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-003.json +15 -0
  76. agent_assure-0.3.0/examples/expense_approval_minimal/suite.yaml +57 -0
  77. agent_assure-0.3.0/examples/expense_approval_minimal/variants/baseline.yaml +11 -0
  78. agent_assure-0.3.0/examples/expense_approval_minimal/variants/candidate_provider_policy.yaml +11 -0
  79. agent_assure-0.3.0/examples/prior_auth_synthetic/README.md +40 -0
  80. agent_assure-0.3.0/examples/prior_auth_synthetic/app/__init__.py +1 -0
  81. agent_assure-0.3.0/examples/prior_auth_synthetic/app/output_schema.py +12 -0
  82. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/ambiguous-case.json +8 -0
  83. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/conflicting-evidence.json +8 -0
  84. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/fake-phi-redaction.json +8 -0
  85. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/forbidden-provider.json +8 -0
  86. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/missing-documentation.json +8 -0
  87. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/prompt-injection-note.json +8 -0
  88. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/shared-source-multi-claim.json +8 -0
  89. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/straightforward-approval.json +8 -0
  90. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/straightforward-denial.json +8 -0
  91. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/model_outputs/tool-failure.json +8 -0
  92. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/ambiguous-case.json +6 -0
  93. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/conflicting-evidence.json +6 -0
  94. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/fake-phi-redaction.json +10 -0
  95. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/forbidden-provider.json +6 -0
  96. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/missing-documentation.json +6 -0
  97. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/prompt-injection-note.json +6 -0
  98. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/shared-source-multi-claim.json +6 -0
  99. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/straightforward-approval.json +6 -0
  100. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/straightforward-denial.json +6 -0
  101. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/requests/tool-failure.json +6 -0
  102. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/ambiguous-case.json +15 -0
  103. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/conflicting-evidence.json +15 -0
  104. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/fake-phi-redaction.json +15 -0
  105. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/forbidden-provider.json +15 -0
  106. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/missing-documentation.json +15 -0
  107. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/prompt-injection-note.json +16 -0
  108. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/shared-source-multi-claim.json +24 -0
  109. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/straightforward-approval.json +15 -0
  110. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/straightforward-denial.json +15 -0
  111. agent_assure-0.3.0/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/tool-failure.json +15 -0
  112. agent_assure-0.3.0/examples/prior_auth_synthetic/runner.py +3 -0
  113. agent_assure-0.3.0/examples/prior_auth_synthetic/suite.yaml +134 -0
  114. agent_assure-0.3.0/examples/prior_auth_synthetic/variants/baseline.yaml +11 -0
  115. agent_assure-0.3.0/examples/prior_auth_synthetic/variants/candidate_evidence_normalization.yaml +10 -0
  116. agent_assure-0.3.0/examples/prior_auth_synthetic/variants/candidate_provider_policy.yaml +11 -0
  117. agent_assure-0.3.0/examples/prior_auth_synthetic/variants/candidate_smoke_fail.yaml +12 -0
  118. agent_assure-0.3.0/pyproject.toml +142 -0
  119. agent_assure-0.3.0/schemas/unreleased/.gitkeep +1 -0
  120. agent_assure-0.3.0/schemas/v0.1.0/agent-run-record.schema.json +819 -0
  121. agent_assure-0.3.0/schemas/v0.1.0/comparison-report.schema.json +753 -0
  122. agent_assure-0.3.0/schemas/v0.1.0/comparison-summary.schema.json +245 -0
  123. agent_assure-0.3.0/schemas/v0.1.0/compiled-suite.schema.json +323 -0
  124. agent_assure-0.3.0/schemas/v0.1.0/environment-info.schema.json +148 -0
  125. agent_assure-0.3.0/schemas/v0.1.0/evaluation-report.schema.json +476 -0
  126. agent_assure-0.3.0/schemas/v0.1.0/evaluation-summary.schema.json +286 -0
  127. agent_assure-0.3.0/schemas/v0.1.0/evidence-packet.schema.json +581 -0
  128. agent_assure-0.3.0/schemas/v0.1.0/expectation-change-record.schema.json +97 -0
  129. agent_assure-0.3.0/schemas/v0.1.0/expectation.schema.json +129 -0
  130. agent_assure-0.3.0/schemas/v0.1.0/fixture-manifest.schema.json +92 -0
  131. agent_assure-0.3.0/schemas/v0.1.0/live-comparison-report.schema.json +328 -0
  132. agent_assure-0.3.0/schemas/v0.1.0/live-evaluation-report.schema.json +860 -0
  133. agent_assure-0.3.0/schemas/v0.1.0/live-protocol-record.schema.json +353 -0
  134. agent_assure-0.3.0/schemas/v0.1.0/release-artifact-manifest.schema.json +233 -0
  135. agent_assure-0.3.0/schemas/v0.1.0/release-digest-replay.schema.json +113 -0
  136. agent_assure-0.3.0/schemas/v0.1.0/run-set.schema.json +921 -0
  137. agent_assure-0.3.0/schemas/v0.1.0/span-plan.schema.json +131 -0
  138. agent_assure-0.3.0/schemas/v0.2.0/agent-run-record.schema.json +862 -0
  139. agent_assure-0.3.0/schemas/v0.2.0/comparison-report.schema.json +753 -0
  140. agent_assure-0.3.0/schemas/v0.2.0/comparison-summary.schema.json +245 -0
  141. agent_assure-0.3.0/schemas/v0.2.0/compiled-suite.schema.json +323 -0
  142. agent_assure-0.3.0/schemas/v0.2.0/emergency-process-record.schema.json +260 -0
  143. agent_assure-0.3.0/schemas/v0.2.0/environment-info.schema.json +148 -0
  144. agent_assure-0.3.0/schemas/v0.2.0/evaluation-report.schema.json +476 -0
  145. agent_assure-0.3.0/schemas/v0.2.0/evaluation-summary.schema.json +286 -0
  146. agent_assure-0.3.0/schemas/v0.2.0/evidence-packet.schema.json +581 -0
  147. agent_assure-0.3.0/schemas/v0.2.0/expectation-change-record.schema.json +97 -0
  148. agent_assure-0.3.0/schemas/v0.2.0/expectation.schema.json +129 -0
  149. agent_assure-0.3.0/schemas/v0.2.0/fixture-manifest.schema.json +92 -0
  150. agent_assure-0.3.0/schemas/v0.2.0/live-comparison-report.schema.json +496 -0
  151. agent_assure-0.3.0/schemas/v0.2.0/live-drift-report.schema.json +1011 -0
  152. agent_assure-0.3.0/schemas/v0.2.0/live-evaluation-report.schema.json +1361 -0
  153. agent_assure-0.3.0/schemas/v0.2.0/live-protocol-record.schema.json +1097 -0
  154. agent_assure-0.3.0/schemas/v0.2.0/live-trajectory-report.schema.json +770 -0
  155. agent_assure-0.3.0/schemas/v0.2.0/release-artifact-manifest.schema.json +233 -0
  156. agent_assure-0.3.0/schemas/v0.2.0/release-digest-replay.schema.json +113 -0
  157. agent_assure-0.3.0/schemas/v0.2.0/run-set.schema.json +1230 -0
  158. agent_assure-0.3.0/schemas/v0.2.0/span-plan.schema.json +156 -0
  159. agent_assure-0.3.0/schemas/v0.3.0/agent-run-record.schema.json +862 -0
  160. agent_assure-0.3.0/schemas/v0.3.0/comparison-report.schema.json +753 -0
  161. agent_assure-0.3.0/schemas/v0.3.0/comparison-summary.schema.json +245 -0
  162. agent_assure-0.3.0/schemas/v0.3.0/compiled-suite.schema.json +323 -0
  163. agent_assure-0.3.0/schemas/v0.3.0/emergency-process-record.schema.json +260 -0
  164. agent_assure-0.3.0/schemas/v0.3.0/environment-info.schema.json +148 -0
  165. agent_assure-0.3.0/schemas/v0.3.0/evaluation-report.schema.json +476 -0
  166. agent_assure-0.3.0/schemas/v0.3.0/evaluation-summary.schema.json +286 -0
  167. agent_assure-0.3.0/schemas/v0.3.0/evidence-packet.schema.json +581 -0
  168. agent_assure-0.3.0/schemas/v0.3.0/expectation-change-record.schema.json +97 -0
  169. agent_assure-0.3.0/schemas/v0.3.0/expectation.schema.json +129 -0
  170. agent_assure-0.3.0/schemas/v0.3.0/fixture-manifest.schema.json +92 -0
  171. agent_assure-0.3.0/schemas/v0.3.0/live-comparison-report.schema.json +496 -0
  172. agent_assure-0.3.0/schemas/v0.3.0/live-drift-report.schema.json +1011 -0
  173. agent_assure-0.3.0/schemas/v0.3.0/live-evaluation-report.schema.json +1361 -0
  174. agent_assure-0.3.0/schemas/v0.3.0/live-protocol-record.schema.json +1097 -0
  175. agent_assure-0.3.0/schemas/v0.3.0/live-trajectory-report.schema.json +770 -0
  176. agent_assure-0.3.0/schemas/v0.3.0/release-artifact-manifest.schema.json +233 -0
  177. agent_assure-0.3.0/schemas/v0.3.0/release-digest-replay.schema.json +113 -0
  178. agent_assure-0.3.0/schemas/v0.3.0/run-set.schema.json +1230 -0
  179. agent_assure-0.3.0/schemas/v0.3.0/span-plan.schema.json +156 -0
  180. agent_assure-0.3.0/scripts/assert_dist_reproducible.py +156 -0
  181. agent_assure-0.3.0/scripts/build_release_bundle.py +182 -0
  182. agent_assure-0.3.0/scripts/check_claim_boundaries.py +266 -0
  183. agent_assure-0.3.0/scripts/check_docs_alignment.py +573 -0
  184. agent_assure-0.3.0/scripts/check_frozen_schemas.py +89 -0
  185. agent_assure-0.3.0/scripts/check_packaged_examples.py +119 -0
  186. agent_assure-0.3.0/scripts/check_schema_staging.py +48 -0
  187. agent_assure-0.3.0/scripts/check_version_matches_tag.py +125 -0
  188. agent_assure-0.3.0/scripts/check_wheel_contents.py +159 -0
  189. agent_assure-0.3.0/scripts/clean_dist.py +28 -0
  190. agent_assure-0.3.0/scripts/cosign_release_artifacts.py +311 -0
  191. agent_assure-0.3.0/scripts/record_demo_transcript.ps1 +10 -0
  192. agent_assure-0.3.0/scripts/record_demo_transcript.sh +8 -0
  193. agent_assure-0.3.0/scripts/reproduce_release.py +203 -0
  194. agent_assure-0.3.0/scripts/run_source_cli.py +17 -0
  195. agent_assure-0.3.0/scripts/smoke_install_wheel.py +243 -0
  196. agent_assure-0.3.0/scripts/update_golden.py +206 -0
  197. agent_assure-0.3.0/src/agent_assure/__init__.py +4 -0
  198. agent_assure-0.3.0/src/agent_assure/artifact_io.py +26 -0
  199. agent_assure-0.3.0/src/agent_assure/authoring/__init__.py +4 -0
  200. agent_assure-0.3.0/src/agent_assure/authoring/compiler.py +92 -0
  201. agent_assure-0.3.0/src/agent_assure/authoring/string_fields.py +15 -0
  202. agent_assure-0.3.0/src/agent_assure/authoring/yaml_lint.py +9 -0
  203. agent_assure-0.3.0/src/agent_assure/authoring/yaml_loader.py +4 -0
  204. agent_assure-0.3.0/src/agent_assure/authoring/yaml_nodes.py +84 -0
  205. agent_assure-0.3.0/src/agent_assure/canonical/__init__.py +11 -0
  206. agent_assure-0.3.0/src/agent_assure/canonical/digests.py +11 -0
  207. agent_assure-0.3.0/src/agent_assure/canonical/hmac_tokens.py +21 -0
  208. agent_assure-0.3.0/src/agent_assure/canonical/jcs.py +12 -0
  209. agent_assure-0.3.0/src/agent_assure/canonical/manifest.py +10 -0
  210. agent_assure-0.3.0/src/agent_assure/canonical/normalize.py +70 -0
  211. agent_assure-0.3.0/src/agent_assure/canonical/projection.py +3 -0
  212. agent_assure-0.3.0/src/agent_assure/ci.py +444 -0
  213. agent_assure-0.3.0/src/agent_assure/cli/__init__.py +1 -0
  214. agent_assure-0.3.0/src/agent_assure/cli/ci_cmd.py +116 -0
  215. agent_assure-0.3.0/src/agent_assure/cli/compare_cmd.py +161 -0
  216. agent_assure-0.3.0/src/agent_assure/cli/demo_cmd.py +73 -0
  217. agent_assure-0.3.0/src/agent_assure/cli/diff_cmd.py +176 -0
  218. agent_assure-0.3.0/src/agent_assure/cli/evaluate_cmd.py +147 -0
  219. agent_assure-0.3.0/src/agent_assure/cli/init_cmd.py +10 -0
  220. agent_assure-0.3.0/src/agent_assure/cli/live_cmd.py +222 -0
  221. agent_assure-0.3.0/src/agent_assure/cli/main.py +58 -0
  222. agent_assure-0.3.0/src/agent_assure/cli/otel_cmd.py +119 -0
  223. agent_assure-0.3.0/src/agent_assure/cli/packet_cmd.py +97 -0
  224. agent_assure-0.3.0/src/agent_assure/cli/release_cmd.py +103 -0
  225. agent_assure-0.3.0/src/agent_assure/cli/schema_cmd.py +19 -0
  226. agent_assure-0.3.0/src/agent_assure/cli/suite_cmd.py +132 -0
  227. agent_assure-0.3.0/src/agent_assure/cli/validate_cmd.py +19 -0
  228. agent_assure-0.3.0/src/agent_assure/cli/waivers.py +24 -0
  229. agent_assure-0.3.0/src/agent_assure/compare/__init__.py +1 -0
  230. agent_assure-0.3.0/src/agent_assure/compare/case_map.py +21 -0
  231. agent_assure-0.3.0/src/agent_assure/compare/classifications.py +39 -0
  232. agent_assure-0.3.0/src/agent_assure/compare/invariant_diff.py +186 -0
  233. agent_assure-0.3.0/src/agent_assure/compare/provenance_diff.py +52 -0
  234. agent_assure-0.3.0/src/agent_assure/compare/runsets.py +402 -0
  235. agent_assure-0.3.0/src/agent_assure/demo/__init__.py +2 -0
  236. agent_assure-0.3.0/src/agent_assure/demo/common.py +344 -0
  237. agent_assure-0.3.0/src/agent_assure/demo/flagship.py +459 -0
  238. agent_assure-0.3.0/src/agent_assure/evaluation/__init__.py +1 -0
  239. agent_assure-0.3.0/src/agent_assure/evaluation/aggregation.py +6 -0
  240. agent_assure-0.3.0/src/agent_assure/evaluation/applicability.py +7 -0
  241. agent_assure-0.3.0/src/agent_assure/evaluation/evaluator.py +261 -0
  242. agent_assure-0.3.0/src/agent_assure/evaluation/expectations.py +45 -0
  243. agent_assure-0.3.0/src/agent_assure/evaluation/invariants.py +219 -0
  244. agent_assure-0.3.0/src/agent_assure/evaluation/resolver.py +5 -0
  245. agent_assure-0.3.0/src/agent_assure/examples/__init__.py +1 -0
  246. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/README.md +16 -0
  247. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/__init__.py +1 -0
  248. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-001.json +8 -0
  249. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-002.json +8 -0
  250. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/model_outputs/exp-003.json +8 -0
  251. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/requests/exp-001.json +6 -0
  252. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/requests/exp-002.json +6 -0
  253. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/requests/exp-003.json +6 -0
  254. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-001.json +15 -0
  255. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-002.json +15 -0
  256. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/fixtures/shared/tool_outputs/exp-003.json +15 -0
  257. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/runner.py +64 -0
  258. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/suite.yaml +57 -0
  259. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/variants/baseline.yaml +11 -0
  260. agent_assure-0.3.0/src/agent_assure/examples/expense_approval_minimal/variants/candidate_provider_policy.yaml +11 -0
  261. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/README.md +40 -0
  262. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/__init__.py +1 -0
  263. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/app.py +138 -0
  264. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/ambiguous-case.json +8 -0
  265. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/conflicting-evidence.json +8 -0
  266. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/fake-phi-redaction.json +8 -0
  267. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/forbidden-provider.json +8 -0
  268. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/missing-documentation.json +8 -0
  269. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/prompt-injection-note.json +8 -0
  270. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/shared-source-multi-claim.json +8 -0
  271. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/straightforward-approval.json +8 -0
  272. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/straightforward-denial.json +8 -0
  273. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/model_outputs/tool-failure.json +8 -0
  274. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/ambiguous-case.json +6 -0
  275. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/conflicting-evidence.json +6 -0
  276. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/fake-phi-redaction.json +10 -0
  277. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/forbidden-provider.json +6 -0
  278. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/missing-documentation.json +6 -0
  279. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/prompt-injection-note.json +6 -0
  280. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/shared-source-multi-claim.json +6 -0
  281. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/straightforward-approval.json +6 -0
  282. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/straightforward-denial.json +6 -0
  283. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/requests/tool-failure.json +6 -0
  284. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/ambiguous-case.json +15 -0
  285. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/conflicting-evidence.json +15 -0
  286. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/fake-phi-redaction.json +15 -0
  287. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/forbidden-provider.json +15 -0
  288. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/missing-documentation.json +15 -0
  289. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/prompt-injection-note.json +16 -0
  290. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/shared-source-multi-claim.json +24 -0
  291. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/straightforward-approval.json +15 -0
  292. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/straightforward-denial.json +15 -0
  293. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/fixtures/shared/tool_outputs/tool-failure.json +15 -0
  294. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/runner.py +64 -0
  295. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/suite.yaml +134 -0
  296. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/variants/baseline.yaml +11 -0
  297. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/variants/candidate_evidence_normalization.yaml +10 -0
  298. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/variants/candidate_provider_policy.yaml +11 -0
  299. agent_assure-0.3.0/src/agent_assure/examples/prior_auth_synthetic/variants/candidate_smoke_fail.yaml +12 -0
  300. agent_assure-0.3.0/src/agent_assure/fixtures/__init__.py +32 -0
  301. agent_assure-0.3.0/src/agent_assure/fixtures/loader.py +42 -0
  302. agent_assure-0.3.0/src/agent_assure/fixtures/manifest.py +137 -0
  303. agent_assure-0.3.0/src/agent_assure/fixtures/resolver.py +62 -0
  304. agent_assure-0.3.0/src/agent_assure/live/__init__.py +2 -0
  305. agent_assure-0.3.0/src/agent_assure/live/adapters.py +497 -0
  306. agent_assure-0.3.0/src/agent_assure/live/advanced.py +587 -0
  307. agent_assure-0.3.0/src/agent_assure/live/comparison.py +452 -0
  308. agent_assure-0.3.0/src/agent_assure/live/config.py +125 -0
  309. agent_assure-0.3.0/src/agent_assure/live/drift.py +866 -0
  310. agent_assure-0.3.0/src/agent_assure/live/intervals.py +230 -0
  311. agent_assure-0.3.0/src/agent_assure/live/output_contract.py +78 -0
  312. agent_assure-0.3.0/src/agent_assure/live/paths.py +19 -0
  313. agent_assure-0.3.0/src/agent_assure/live/primitives.py +73 -0
  314. agent_assure-0.3.0/src/agent_assure/live/runner.py +783 -0
  315. agent_assure-0.3.0/src/agent_assure/live/statistics.py +629 -0
  316. agent_assure-0.3.0/src/agent_assure/live/trajectory.py +773 -0
  317. agent_assure-0.3.0/src/agent_assure/policies/__init__.py +1 -0
  318. agent_assure-0.3.0/src/agent_assure/policies/base.py +170 -0
  319. agent_assure-0.3.0/src/agent_assure/policies/catalog.py +58 -0
  320. agent_assure-0.3.0/src/agent_assure/policies/evidence.py +57 -0
  321. agent_assure-0.3.0/src/agent_assure/policies/human_review.py +25 -0
  322. agent_assure-0.3.0/src/agent_assure/policies/injection.py +30 -0
  323. agent_assure-0.3.0/src/agent_assure/policies/output_schema.py +27 -0
  324. agent_assure-0.3.0/src/agent_assure/policies/privacy.py +65 -0
  325. agent_assure-0.3.0/src/agent_assure/policies/providers.py +60 -0
  326. agent_assure-0.3.0/src/agent_assure/policies/review_boundary.py +10 -0
  327. agent_assure-0.3.0/src/agent_assure/policies/runtime.py +22 -0
  328. agent_assure-0.3.0/src/agent_assure/policies/tools.py +45 -0
  329. agent_assure-0.3.0/src/agent_assure/privacy/__init__.py +18 -0
  330. agent_assure-0.3.0/src/agent_assure/privacy/detectors.py +36 -0
  331. agent_assure-0.3.0/src/agent_assure/privacy/redaction.py +121 -0
  332. agent_assure-0.3.0/src/agent_assure/privacy/safe_errors.py +51 -0
  333. agent_assure-0.3.0/src/agent_assure/release_evidence.py +487 -0
  334. agent_assure-0.3.0/src/agent_assure/reporting/__init__.py +1 -0
  335. agent_assure-0.3.0/src/agent_assure/reporting/console.py +137 -0
  336. agent_assure-0.3.0/src/agent_assure/reporting/environment.py +191 -0
  337. agent_assure-0.3.0/src/agent_assure/reporting/evidence_diff_html.py +1035 -0
  338. agent_assure-0.3.0/src/agent_assure/reporting/json_report.py +73 -0
  339. agent_assure-0.3.0/src/agent_assure/reporting/live.py +386 -0
  340. agent_assure-0.3.0/src/agent_assure/reporting/markdown.py +227 -0
  341. agent_assure-0.3.0/src/agent_assure/reporting/packet.py +186 -0
  342. agent_assure-0.3.0/src/agent_assure/reporting/sbom.py +126 -0
  343. agent_assure-0.3.0/src/agent_assure/runner/__init__.py +35 -0
  344. agent_assure-0.3.0/src/agent_assure/runner/clock.py +17 -0
  345. agent_assure-0.3.0/src/agent_assure/runner/emergency.py +1 -0
  346. agent_assure-0.3.0/src/agent_assure/runner/evidence.py +107 -0
  347. agent_assure-0.3.0/src/agent_assure/runner/fixture_runner.py +247 -0
  348. agent_assure-0.3.0/src/agent_assure/runner/fixture_values.py +20 -0
  349. agent_assure-0.3.0/src/agent_assure/runner/governance_controls.py +109 -0
  350. agent_assure-0.3.0/src/agent_assure/runner/ids.py +20 -0
  351. agent_assure-0.3.0/src/agent_assure/runner/records.py +91 -0
  352. agent_assure-0.3.0/src/agent_assure/runner/registry.py +54 -0
  353. agent_assure-0.3.0/src/agent_assure/runner/subprocess_harness.py +287 -0
  354. agent_assure-0.3.0/src/agent_assure/schema/__init__.py +98 -0
  355. agent_assure-0.3.0/src/agent_assure/schema/base.py +38 -0
  356. agent_assure-0.3.0/src/agent_assure/schema/common.py +105 -0
  357. agent_assure-0.3.0/src/agent_assure/schema/comparison.py +47 -0
  358. agent_assure-0.3.0/src/agent_assure/schema/environment.py +32 -0
  359. agent_assure-0.3.0/src/agent_assure/schema/evaluation.py +48 -0
  360. agent_assure-0.3.0/src/agent_assure/schema/expectation.py +66 -0
  361. agent_assure-0.3.0/src/agent_assure/schema/export.py +83 -0
  362. agent_assure-0.3.0/src/agent_assure/schema/live.py +1310 -0
  363. agent_assure-0.3.0/src/agent_assure/schema/packet.py +37 -0
  364. agent_assure-0.3.0/src/agent_assure/schema/provenance.py +18 -0
  365. agent_assure-0.3.0/src/agent_assure/schema/release.py +61 -0
  366. agent_assure-0.3.0/src/agent_assure/schema/run.py +237 -0
  367. agent_assure-0.3.0/src/agent_assure/schema/runtime.py +46 -0
  368. agent_assure-0.3.0/src/agent_assure/schema/suite.py +128 -0
  369. agent_assure-0.3.0/src/agent_assure/schema/telemetry.py +50 -0
  370. agent_assure-0.3.0/src/agent_assure/schema/validation.py +29 -0
  371. agent_assure-0.3.0/src/agent_assure/schema_resources/__init__.py +1 -0
  372. agent_assure-0.3.0/src/agent_assure/telemetry/__init__.py +15 -0
  373. agent_assure-0.3.0/src/agent_assure/telemetry/context.py +55 -0
  374. agent_assure-0.3.0/src/agent_assure/telemetry/otel_mapping.py +78 -0
  375. agent_assure-0.3.0/src/agent_assure/telemetry/otel_sdk.py +138 -0
  376. agent_assure-0.3.0/src/agent_assure/telemetry/privacy_filter.py +7 -0
  377. agent_assure-0.3.0/src/agent_assure/telemetry/semconv_lock.py +4 -0
  378. agent_assure-0.3.0/src/agent_assure/telemetry/span_plan.py +3 -0
  379. agent_assure-0.3.0/tests/fixtures/run_record.json +35 -0
  380. agent_assure-0.3.0/tests/golden/compiled_suites/prior_auth_synthetic.compiled.json +356 -0
  381. agent_assure-0.3.0/tests/golden/compiled_suites/prior_auth_synthetic.fixture-manifest.json +221 -0
  382. agent_assure-0.3.0/tests/golden/reports/flagship-evidence-diff.html +241 -0
  383. agent_assure-0.3.0/tests/integration/test_baseline_passes.py +36 -0
  384. agent_assure-0.3.0/tests/integration/test_ci_gate.py +150 -0
  385. agent_assure-0.3.0/tests/integration/test_compare_reports.py +148 -0
  386. agent_assure-0.3.0/tests/integration/test_demo_flagship.py +98 -0
  387. agent_assure-0.3.0/tests/integration/test_diff_render_cli.py +181 -0
  388. agent_assure-0.3.0/tests/integration/test_evaluate_reports.py +104 -0
  389. agent_assure-0.3.0/tests/integration/test_evidence_packet.py +165 -0
  390. agent_assure-0.3.0/tests/integration/test_evidence_regression.py +129 -0
  391. agent_assure-0.3.0/tests/integration/test_expense_approval_minimal.py +101 -0
  392. agent_assure-0.3.0/tests/integration/test_fixture_equivalence.py +30 -0
  393. agent_assure-0.3.0/tests/integration/test_live_cli.py +278 -0
  394. agent_assure-0.3.0/tests/integration/test_no_network.py +38 -0
  395. agent_assure-0.3.0/tests/integration/test_offline_suite.py +64 -0
  396. agent_assure-0.3.0/tests/integration/test_provider_regression.py +39 -0
  397. agent_assure-0.3.0/tests/integration/test_schema_parity.py +436 -0
  398. agent_assure-0.3.0/tests/integration/test_showcase_demo.py +157 -0
  399. agent_assure-0.3.0/tests/integration/test_smoke_failure.py +20 -0
  400. agent_assure-0.3.0/tests/unit/authoring/test_yaml_loader.py +119 -0
  401. agent_assure-0.3.0/tests/unit/canonical/test_digests.py +61 -0
  402. agent_assure-0.3.0/tests/unit/compare/test_runsets.py +101 -0
  403. agent_assure-0.3.0/tests/unit/demo/test_common.py +147 -0
  404. agent_assure-0.3.0/tests/unit/docs/test_claim_boundary_language.py +143 -0
  405. agent_assure-0.3.0/tests/unit/docs/test_docs_alignment.py +338 -0
  406. agent_assure-0.3.0/tests/unit/evaluation/test_evaluator.py +300 -0
  407. agent_assure-0.3.0/tests/unit/evaluation/test_live_primitives.py +35 -0
  408. agent_assure-0.3.0/tests/unit/evaluation/test_live_runner.py +416 -0
  409. agent_assure-0.3.0/tests/unit/evaluation/test_live_statistics.py +2040 -0
  410. agent_assure-0.3.0/tests/unit/fixtures/test_manifest_and_loader.py +174 -0
  411. agent_assure-0.3.0/tests/unit/privacy/test_hmac_and_redaction.py +111 -0
  412. agent_assure-0.3.0/tests/unit/release/test_cosign_release_artifacts.py +60 -0
  413. agent_assure-0.3.0/tests/unit/release/test_dist_reproducibility.py +80 -0
  414. agent_assure-0.3.0/tests/unit/release/test_packaged_examples.py +79 -0
  415. agent_assure-0.3.0/tests/unit/release/test_release_evidence.py +522 -0
  416. agent_assure-0.3.0/tests/unit/release/test_sbom.py +58 -0
  417. agent_assure-0.3.0/tests/unit/release/test_version_tag.py +107 -0
  418. agent_assure-0.3.0/tests/unit/release/test_wheel_content_checks.py +45 -0
  419. agent_assure-0.3.0/tests/unit/reporting/test_evidence_diff.py +63 -0
  420. agent_assure-0.3.0/tests/unit/reporting/test_evidence_diff_html.py +277 -0
  421. agent_assure-0.3.0/tests/unit/runner/test_evidence_helpers.py +48 -0
  422. agent_assure-0.3.0/tests/unit/runner/test_fixture_runner.py +125 -0
  423. agent_assure-0.3.0/tests/unit/runner/test_subprocess_harness.py +188 -0
  424. agent_assure-0.3.0/tests/unit/schema/test_frozen_schema_check.py +57 -0
  425. agent_assure-0.3.0/tests/unit/schema/test_strict_models.py +88 -0
  426. agent_assure-0.3.0/tests/unit/telemetry/test_otel_sdk.py +118 -0
  427. agent_assure-0.3.0/tests/unit/telemetry/test_span_plan.py +38 -0
@@ -0,0 +1,14 @@
1
+ .tmp/
2
+ .tmp-review/
3
+ .venv/
4
+ .mypy_cache/
5
+ .pytest_cache/
6
+ .ruff_cache/
7
+ dist/
8
+ build/
9
+ schemas/unreleased/*.schema.json
10
+ *.egg-info/
11
+ Lib/
12
+ __pycache__/
13
+ *.py[cod]
14
+ agent_assure_final_implementation_plan.md
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 acblabs
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,51 @@
1
+ PYTHON ?= $(or $(wildcard .venv/Scripts/python.exe),$(wildcard .venv/bin/python),python)
2
+ SOURCE_CLI_PYTHON := $(PYTHON)
3
+
4
+ .PHONY: test lint type clean-dist build docs-align claim-boundary examples-parity schemas schema-staging schema-check release-bundle check release-check demo
5
+
6
+ test:
7
+ $(PYTHON) -m pytest
8
+
9
+ lint:
10
+ $(PYTHON) -m ruff check .
11
+
12
+ type:
13
+ $(PYTHON) -m mypy src
14
+
15
+ clean-dist:
16
+ $(PYTHON) scripts/clean_dist.py
17
+
18
+ build: clean-dist
19
+ $(PYTHON) -m build --no-isolation
20
+
21
+ release-bundle:
22
+ $(PYTHON) scripts/build_release_bundle.py --out .tmp/release --write-digests .tmp/release/release-digest-replay.json
23
+
24
+ docs-align:
25
+ $(PYTHON) scripts/check_docs_alignment.py
26
+
27
+ claim-boundary:
28
+ $(PYTHON) scripts/check_claim_boundaries.py
29
+
30
+ examples-parity:
31
+ $(PYTHON) scripts/check_packaged_examples.py
32
+
33
+ check: lint type test docs-align claim-boundary examples-parity build
34
+
35
+ release-check: check schema-check
36
+ $(PYTHON) -m twine check dist/*
37
+ $(PYTHON) scripts/check_wheel_contents.py
38
+ $(PYTHON) scripts/smoke_install_wheel.py
39
+
40
+ demo:
41
+ $(SOURCE_CLI_PYTHON) scripts/run_source_cli.py demo flagship --out .tmp/demo/flagship --clean
42
+
43
+ schemas:
44
+ $(SOURCE_CLI_PYTHON) scripts/run_source_cli.py schema export --out schemas/v0.3.0
45
+
46
+ schema-staging:
47
+ $(PYTHON) scripts/check_schema_staging.py
48
+
49
+ schema-check:
50
+ $(PYTHON) scripts/check_frozen_schemas.py --schema-dir schemas/v0.3.0
51
+ $(PYTHON) scripts/check_schema_staging.py
@@ -0,0 +1,376 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent-assure
3
+ Version: 0.3.0
4
+ Summary: Local-first process assurance for agentic AI pipelines.
5
+ Project-URL: Homepage, https://github.com/acblabs/agent-assure
6
+ Project-URL: Repository, https://github.com/acblabs/agent-assure
7
+ Project-URL: Issues, https://github.com/acblabs/agent-assure/issues
8
+ Project-URL: Documentation, https://github.com/acblabs/agent-assure#readme
9
+ Project-URL: Changelog, https://github.com/acblabs/agent-assure/releases
10
+ Author: ACB Labs
11
+ License: MIT
12
+ License-File: LICENSE
13
+ Keywords: agentic-ai,ai-assurance,ai-governance,ci-cd,llmops,mlops,opentelemetry,responsible-ai
14
+ Classifier: Development Status :: 3 - Alpha
15
+ Classifier: Environment :: Console
16
+ Classifier: Intended Audience :: Developers
17
+ Classifier: Intended Audience :: Information Technology
18
+ Classifier: License :: OSI Approved :: MIT License
19
+ Classifier: Programming Language :: Python :: 3
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Classifier: Programming Language :: Python :: 3.13
23
+ Classifier: Topic :: Software Development :: Quality Assurance
24
+ Requires-Python: >=3.11
25
+ Requires-Dist: jsonschema>=4.23
26
+ Requires-Dist: pydantic<3,>=2.12
27
+ Requires-Dist: pyyaml<7,>=6
28
+ Requires-Dist: rfc8785==0.1.4
29
+ Requires-Dist: rich<15,>=13
30
+ Requires-Dist: typer<1,>=0.12
31
+ Provides-Extra: dev
32
+ Requires-Dist: build; extra == 'dev'
33
+ Requires-Dist: hatchling>=1.27; extra == 'dev'
34
+ Requires-Dist: hypothesis>=6; extra == 'dev'
35
+ Requires-Dist: mkdocs<2,>=1.6; extra == 'dev'
36
+ Requires-Dist: mypy; extra == 'dev'
37
+ Requires-Dist: pytest-cov>=5; extra == 'dev'
38
+ Requires-Dist: pytest-socket>=0.7; extra == 'dev'
39
+ Requires-Dist: pytest>=8; extra == 'dev'
40
+ Requires-Dist: ruff; extra == 'dev'
41
+ Requires-Dist: twine>=5; extra == 'dev'
42
+ Provides-Extra: otel
43
+ Requires-Dist: opentelemetry-api<2,>=1.25; extra == 'otel'
44
+ Requires-Dist: opentelemetry-exporter-otlp-proto-http<2,>=1.25; extra == 'otel'
45
+ Requires-Dist: opentelemetry-sdk<2,>=1.25; extra == 'otel'
46
+ Description-Content-Type: text/markdown
47
+
48
+ # agent-assure
49
+
50
+ Local-first process assurance for agentic AI pipelines.
51
+
52
+ **Core thesis:** output equivalence is not process equivalence.
53
+
54
+ A candidate agent pipeline can return the same final approval, denial,
55
+ recommendation, or summary while silently changing material evidence, review
56
+ routing, provider/tool boundaries, redaction behavior, retries, or provenance.
57
+ `agent-assure` produces local evidence packets and CI gates so reviewers can
58
+ detect those observable process regressions.
59
+
60
+ ## Install
61
+
62
+ Install from PyPI and run the flagship demo:
63
+
64
+ ```bash
65
+ pip install agent-assure
66
+ agent-assure demo flagship
67
+ ```
68
+
69
+ The demo runs offline with bundled deterministic fixtures. It writes local
70
+ review artifacts under `.tmp/demo/flagship` by default.
71
+
72
+ ## One-command demo
73
+
74
+ ```text
75
+ Expected punchline:
76
+
77
+ output equivalence: preserved
78
+ missing evidence link: claim-duration
79
+ classification: new_failure
80
+ CI gate: blocked as expected
81
+ ```
82
+
83
+ The baseline and candidate both keep
84
+ `recommendation=approve; outcome=approve`. The candidate still fails because it
85
+ drops the material evidence link for `claim-duration`.
86
+
87
+ ## Claim boundary
88
+
89
+ `agent-assure` produces local review evidence, traceability, evidence mapping,
90
+ artifact digests, and CI-gate signals. It does not replace legal, regulatory,
91
+ clinical, provider-quality, model-quality, or business-impact review.
92
+
93
+ This project is not a compliance attestation. Safety review remains a separate
94
+ human and organizational responsibility.
95
+
96
+ ## Schemas
97
+
98
+ Schema changes are versioned. Development work uses `schemas/unreleased/`.
99
+ Stable releases freeze a copy into `schemas/vX.Y.Z/`.
100
+ The release gate verifies the latest frozen schema directory, while schema
101
+ staging exports the current development schema surface to `schemas/unreleased/`.
102
+
103
+ ## Local development
104
+
105
+ From a repository checkout:
106
+
107
+ ```bash
108
+ pip install -e .
109
+ ```
110
+
111
+ For validation checks, install the development extras:
112
+
113
+ ```bash
114
+ pip install -e ".[dev]"
115
+ ```
116
+
117
+ ## Five-minute fixture walkthrough
118
+
119
+ Run these commands one at a time from the repository root. The final two
120
+ commands write reports and are expected to exit `1`; the GitHub Actions snippet
121
+ below shows how to assert those expected failures in `set -e` contexts.
122
+
123
+ ```bash
124
+ pip install -e ".[dev]"
125
+ mkdir -p .tmp/showcase
126
+ agent-assure suite compile examples/prior_auth_synthetic/suite.yaml --out .tmp/showcase/prior-auth.compiled.json --manifest .tmp/showcase/prior-auth.fixtures.json
127
+ agent-assure suite run .tmp/showcase/prior-auth.compiled.json --variant examples/prior_auth_synthetic/variants/baseline.yaml --manifest .tmp/showcase/prior-auth.fixtures.json --out .tmp/showcase/prior-auth.baseline.json
128
+ agent-assure suite run .tmp/showcase/prior-auth.compiled.json --variant examples/prior_auth_synthetic/variants/candidate_evidence_normalization.yaml --manifest .tmp/showcase/prior-auth.fixtures.json --out .tmp/showcase/prior-auth.evidence-candidate.json
129
+ agent-assure evaluate .tmp/showcase/prior-auth.baseline.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/baseline-report
130
+ agent-assure evaluate .tmp/showcase/prior-auth.evidence-candidate.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/evidence-report
131
+ agent-assure compare .tmp/showcase/prior-auth.baseline.json .tmp/showcase/prior-auth.evidence-candidate.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/comparison-report
132
+ agent-assure ci .tmp/showcase/prior-auth.evidence-candidate.json --suite .tmp/showcase/prior-auth.compiled.json --baseline .tmp/showcase/prior-auth.baseline.json --out-dir .tmp/showcase/ci-report --report-mode full
133
+ ```
134
+
135
+ The baseline evaluation exits `0` and writes a `pass` summary with ten evaluated
136
+ cases and zero blocking findings. The candidate evaluation is expected to exit
137
+ `1`; its report contains one blocking finding for
138
+ `shared-source-multi-claim` with reason code
139
+ `MATERIAL_CLAIM_MISSING_EVIDENCE`.
140
+
141
+ The comparison command is also expected to exit `1`. It writes
142
+ `.tmp/showcase/comparison-report/comparison-report.md` with classification
143
+ `new_failure` and fixture-equivalence state `pass`. For the failing case, the
144
+ baseline and candidate both keep `recommendation=approve; outcome=approve`; the
145
+ material regression is the missing `claim-duration` evidence link. See
146
+ `docs/showcase.md` for the expected report fields, GitHub Actions snippet, and
147
+ artifact digest summary.
148
+
149
+ After reports exist, an evidence packet can also be built and gated from
150
+ summaries:
151
+
152
+ ```bash
153
+ agent-assure packet build .tmp/showcase/evidence-report/evaluation-summary.json --comparison .tmp/showcase/comparison-report/comparison-summary.json --out .tmp/showcase/evidence-packet.json
154
+ agent-assure ci gate .tmp/showcase/evidence-packet.json
155
+ ```
156
+
157
+ For this known failing candidate, both the CI command and packet gate are
158
+ expected to exit `1`. The CI command writes JSON/Markdown reports,
159
+ `evidence-packet.json`, `evidence-packet.md`, `dependency-inventory.json`,
160
+ `release-artifact-manifest.json`, and `ci-diagnostics.json`.
161
+
162
+ Release evidence can be bundled and replayed from raw digests for stable source
163
+ artifacts and stable JSON projection digests for environment-bearing packet
164
+ artifacts:
165
+
166
+ ```bash
167
+ python scripts/build_release_bundle.py --out .tmp/release --write-digests .tmp/release/release-digest-replay.json
168
+ agent-assure release replay .tmp/release/release-digest-replay.json --artifact-root . --require-current-commit
169
+ ```
170
+
171
+ The release bundle includes the evidence packet, release manifest, replay file,
172
+ SBOM, source distribution, wheel, manifest-listed digest cross-checks, and
173
+ exact cosign-verifiable blobs when built by the release workflow. For keyless
174
+ cosign verification of workflow-signed release blobs, see
175
+ `docs/release_evidence.md`.
176
+
177
+ ## What the demo shows
178
+
179
+ The flagship demo is intentionally narrow. It shows that a candidate can keep
180
+ the same visible answer while losing a material evidence link, and that the
181
+ evaluation report identifies the failing invariant under equivalent fixtures.
182
+ It is deterministic review evidence for a declared fixture, not a broad model
183
+ or provider assessment.
184
+
185
+ ### Flagship regression at a glance
186
+
187
+ The key idea: ordinary output comparison can miss governance regressions. In the
188
+ flagship fixture, the candidate keeps the same visible recommendation and
189
+ outcome as the baseline, but drops a material evidence link. `agent-assure`
190
+ catches the missing evidence invariant and classifies the baseline-to-candidate
191
+ comparison as a `new_failure` under passing fixture equivalence.
192
+
193
+ ```mermaid
194
+ flowchart LR
195
+ subgraph OutputCheck["Ordinary visible-output check"]
196
+ BOut["Baseline output<br/>recommendation=approve<br/>outcome=approve"]
197
+ COut["Candidate output<br/>recommendation=approve<br/>outcome=approve"]
198
+ Same["Visible answer unchanged"]
199
+ BOut --> Same
200
+ COut --> Same
201
+ end
202
+
203
+ subgraph InvariantCheck["agent-assure invariant check"]
204
+ BEv["Baseline evidence<br/>claim-duration linked"]
205
+ CEv["Candidate evidence<br/>claim-duration missing link"]
206
+ Pass["Baseline evaluation: pass"]
207
+ Fail["Candidate evaluation: fail<br/>MATERIAL_CLAIM_MISSING_EVIDENCE"]
208
+ BEv --> Pass
209
+ CEv --> Fail
210
+ end
211
+
212
+ Same --> Tension["Output unchanged<br/>but governance invariant regressed"]
213
+ Equiv["Fixture equivalence: pass"] --> Compare["Baseline-to-candidate comparison"]
214
+ Pass --> Compare
215
+ Fail --> Compare
216
+ Tension --> Compare
217
+
218
+ Compare --> NewFailure["Classification: new_failure"]
219
+
220
+ classDef pass fill:#e5f5ff,stroke:#0072b2,color:#003b5c;
221
+ classDef fail fill:#fff1e0,stroke:#d55e00,color:#5c2a00;
222
+ classDef neutral fill:#eef3ff,stroke:#3f51b5,color:#1a237e;
223
+ classDef warn fill:#fff8e1,stroke:#f9a825,color:#5d4037;
224
+
225
+ class Pass,Equiv pass;
226
+ class Fail,NewFailure fail;
227
+ class Same,Compare neutral;
228
+ class Tension warn;
229
+ ```
230
+
231
+ ## Architecture
232
+
233
+ This is the full toolkit shape. The five-minute demo exercises the fixture-mode
234
+ path and evidence outputs.
235
+
236
+ ```mermaid
237
+ flowchart LR
238
+ A[Authoring<br/>YAML suites<br/>live protocols] --> B[Compile and bind<br/>strict JSON<br/>canonical digests]
239
+ B --> C{Execution}
240
+ C -->|Fixture mode| D[Fixed local fixtures<br/>offline<br/>no token spend]
241
+ C -->|Live mode| E[Declared adapters<br/>static JSONL<br/>external script<br/>OpenAI-compatible]
242
+ D --> F[RunSet records<br/>redacted summaries<br/>provenance<br/>trace context]
243
+ E --> F
244
+ F --> G[Evaluate controls<br/>expectations<br/>policies<br/>privacy checks]
245
+ G --> H[Change review<br/>fixture equivalence<br/>verdicts<br/>provenance diffs]
246
+ G --> I[Live review<br/>cluster rates<br/>rare-event bounds<br/>drift and trajectories]
247
+ H --> J[Evidence outputs<br/>reports<br/>packets<br/>CI gates<br/>release replay]
248
+ I --> J
249
+ J --> K[Observability<br/>span plans<br/>optional SDK/OTLP]
250
+ ```
251
+
252
+ ## Small generic example
253
+
254
+ The expense-approval example is a compact non-healthcare suite that uses the
255
+ same offline fixture and expectation method. It is a generic demonstration, not
256
+ a benchmark.
257
+
258
+ ```bash
259
+ agent-assure suite compile examples/expense_approval_minimal/suite.yaml --out .tmp/expense.compiled.json --manifest .tmp/expense.fixtures.json
260
+ agent-assure suite run .tmp/expense.compiled.json --variant examples/expense_approval_minimal/variants/baseline.yaml --manifest .tmp/expense.fixtures.json --out .tmp/expense.baseline.json
261
+ agent-assure suite run .tmp/expense.compiled.json --variant examples/expense_approval_minimal/variants/candidate_provider_policy.yaml --manifest .tmp/expense.fixtures.json --out .tmp/expense.candidate.json
262
+ agent-assure evaluate .tmp/expense.baseline.json --suite .tmp/expense.compiled.json --out-dir .tmp/expense.baseline-report
263
+ agent-assure evaluate .tmp/expense.candidate.json --suite .tmp/expense.compiled.json --out-dir .tmp/expense.candidate-report
264
+ ```
265
+
266
+ The baseline evaluation exits `0`. The provider-policy candidate is expected to
267
+ exit `1` with deterministic provider, outcome, and human-review control
268
+ findings.
269
+
270
+ ## Current claim boundary
271
+
272
+ The project currently claims deterministic offline controls and
273
+ protocol-bound live operational evaluation implemented in this repository.
274
+ Public claims are tracked in
275
+ `docs/claims_traceability_matrix.yaml`.
276
+
277
+ A statistical protocol is documented in
278
+ `docs/measurement/experiment_protocol.md` for live stochastic evaluation. The
279
+ `agent-assure live` commands require a machine-readable protocol, run
280
+ explicitly configured adapters, and analyze repeated observations with
281
+ cluster-aware rates, protocol-declared comparison methods, and exploratory
282
+ guardrails for low cluster counts. Optional advanced endpoint plans bind
283
+ confirmatory/exploratory labels, Bonferroni multiplicity controls, rare-event upper
284
+ bounds, observed cluster-correlation summaries, and paired randomization-test
285
+ prerequisites to the protocol digest. Optional trajectory reports derive
286
+ privacy-filtered observable state paths, canonical transition profiles,
287
+ sequence invariants, and operational event-process summaries from structured
288
+ run artifacts. Live results remain bounded by the declared
289
+ protocol, data boundary, provider/model configuration, and execution window.
290
+ They are not general model-quality, safety, compliance, or clinical-validation
291
+ claims.
292
+
293
+ Synthetic calibration and regression coverage for the live statistical,
294
+ drift-monitoring, trajectory, and event-process paths is summarized in
295
+ `docs/live_calibration.md`.
296
+
297
+ The `external-script` live adapter runs configured scripts through a no-shell
298
+ subprocess harness and records redacted `emergency-process-record` artifacts
299
+ for process failures. It passes only declared environment allowlist entries,
300
+ explicit config variables, and runner-injected trace/request variables.
301
+ OpenTelemetry export is optional:
302
+
303
+ ```bash
304
+ pip install -e ".[otel]"
305
+ agent-assure otel export RUNSET_OR_RECORD_OR_SPAN_PLAN.json --protocol otlp-http --endpoint http://localhost:4318/v1/traces
306
+ ```
307
+
308
+ Exported spans are derived from span plans and structured run records, not live
309
+ SDK instrumentation of provider calls; raw prompts, raw outputs, tool
310
+ arguments, and unredacted summaries are not emitted.
311
+
312
+ ## GitHub Actions snippet
313
+
314
+ ```yaml
315
+ name: agent-assure-showcase
316
+ on: [push, pull_request]
317
+ jobs:
318
+ flagship:
319
+ runs-on: ubuntu-latest
320
+ steps:
321
+ - uses: actions/checkout@v4
322
+ - uses: actions/setup-python@v5
323
+ with:
324
+ python-version: "3.11"
325
+ - run: pip install -e ".[dev]"
326
+ - run: mkdir -p .tmp/showcase
327
+ - run: agent-assure suite compile examples/prior_auth_synthetic/suite.yaml --out .tmp/showcase/prior-auth.compiled.json --manifest .tmp/showcase/prior-auth.fixtures.json
328
+ - run: agent-assure suite run .tmp/showcase/prior-auth.compiled.json --variant examples/prior_auth_synthetic/variants/baseline.yaml --manifest .tmp/showcase/prior-auth.fixtures.json --out .tmp/showcase/prior-auth.baseline.json
329
+ - run: agent-assure suite run .tmp/showcase/prior-auth.compiled.json --variant examples/prior_auth_synthetic/variants/candidate_evidence_normalization.yaml --manifest .tmp/showcase/prior-auth.fixtures.json --out .tmp/showcase/prior-auth.evidence-candidate.json
330
+ - run: agent-assure evaluate .tmp/showcase/prior-auth.baseline.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/baseline-report
331
+ - name: Evaluate evidence candidate
332
+ run: |
333
+ set +e
334
+ agent-assure evaluate .tmp/showcase/prior-auth.evidence-candidate.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/evidence-report
335
+ status=$?
336
+ set -e
337
+ if [ "$status" -ne 1 ]; then
338
+ echo "expected exit 1, got $status"
339
+ exit 1
340
+ fi
341
+ grep -q "MATERIAL_CLAIM_MISSING_EVIDENCE" .tmp/showcase/evidence-report/evaluation-report.md
342
+ - name: Compare baseline to candidate
343
+ run: |
344
+ set +e
345
+ agent-assure compare .tmp/showcase/prior-auth.baseline.json .tmp/showcase/prior-auth.evidence-candidate.json --suite .tmp/showcase/prior-auth.compiled.json --out-dir .tmp/showcase/comparison-report
346
+ status=$?
347
+ set -e
348
+ if [ "$status" -ne 1 ]; then
349
+ echo "expected exit 1, got $status"
350
+ exit 1
351
+ fi
352
+ grep -q 'Classification: `new_failure`' .tmp/showcase/comparison-report/comparison-report.md
353
+ grep -q 'Fixture-Equivalence Result' .tmp/showcase/comparison-report/comparison-report.md
354
+ grep -q 'State: `pass`' .tmp/showcase/comparison-report/comparison-report.md
355
+ ```
356
+
357
+ ## Development
358
+
359
+ ```bash
360
+ git config core.hooksPath .githooks
361
+ python scripts/check_docs_alignment.py
362
+ ruff check .
363
+ mypy src
364
+ pytest
365
+ python -m build
366
+ ```
367
+
368
+ Dependency locking for release builds is documented in
369
+ `docs/dependency_locking.md`. Release bundle reproduction, SBOM generation, and
370
+ cosign verification are documented in `docs/release_evidence.md`.
371
+
372
+ The installed package includes bundled deterministic examples for reproducible
373
+ local demos. The top-level `examples/` tree mirrors those packaged resources
374
+ for repository-oriented docs and tests; `scripts/check_packaged_examples.py`
375
+ keeps the copies aligned. They are not a stable extension API; see
376
+ `docs/api_surface.md`.