operational-evidence-plane 0.3.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. operational_evidence_plane-0.3.2/.gitattributes +1 -0
  2. operational_evidence_plane-0.3.2/.github/ISSUE_TEMPLATE/bug_report.yml +35 -0
  3. operational_evidence_plane-0.3.2/.github/ISSUE_TEMPLATE/config.yml +5 -0
  4. operational_evidence_plane-0.3.2/.github/ISSUE_TEMPLATE/docs_question.yml +21 -0
  5. operational_evidence_plane-0.3.2/.github/dependabot.yml +10 -0
  6. operational_evidence_plane-0.3.2/.github/workflows/release.yml +27 -0
  7. operational_evidence_plane-0.3.2/.github/workflows/verify.yml +74 -0
  8. operational_evidence_plane-0.3.2/CHANGELOG.md +293 -0
  9. operational_evidence_plane-0.3.2/CITATION.cff +21 -0
  10. operational_evidence_plane-0.3.2/CONTRIBUTING.md +76 -0
  11. operational_evidence_plane-0.3.2/LICENSE +176 -0
  12. operational_evidence_plane-0.3.2/MANIFEST.in +42 -0
  13. operational_evidence_plane-0.3.2/Makefile +238 -0
  14. operational_evidence_plane-0.3.2/PKG-INFO +422 -0
  15. operational_evidence_plane-0.3.2/README.md +388 -0
  16. operational_evidence_plane-0.3.2/SECURITY.md +18 -0
  17. operational_evidence_plane-0.3.2/demo/counterfactual/.gitkeep +0 -0
  18. operational_evidence_plane-0.3.2/demo/fixtures/diff_synthetic_001.patch +11 -0
  19. operational_evidence_plane-0.3.2/demo/model/deterministic_mock_reviewer.md +24 -0
  20. operational_evidence_plane-0.3.2/demo/prompts/code_review_agent.md +30 -0
  21. operational_evidence_plane-0.3.2/demo/pyproject.toml +20 -0
  22. operational_evidence_plane-0.3.2/demo/scripts/check_replay_state.py +90 -0
  23. operational_evidence_plane-0.3.2/demo/scripts/run_approval_escalation_counterfactual.py +37 -0
  24. operational_evidence_plane-0.3.2/demo/scripts/run_budget_per_run_counterfactual.py +38 -0
  25. operational_evidence_plane-0.3.2/demo/scripts/run_code_review_demo.py +8 -0
  26. operational_evidence_plane-0.3.2/demo/scripts/run_compound_reliability_counterfactual.py +37 -0
  27. operational_evidence_plane-0.3.2/demo/src/oep_demo/__init__.py +23 -0
  28. operational_evidence_plane-0.3.2/demo/src/oep_demo/cli.py +31 -0
  29. operational_evidence_plane-0.3.2/demo/src/oep_demo/counterfactual.py +852 -0
  30. operational_evidence_plane-0.3.2/demo/src/oep_demo/paths.py +47 -0
  31. operational_evidence_plane-0.3.2/demo/src/oep_demo/py.typed +1 -0
  32. operational_evidence_plane-0.3.2/demo/src/oep_demo/resources/fixtures/diff_synthetic_001.patch +11 -0
  33. operational_evidence_plane-0.3.2/demo/src/oep_demo/resources/model/deterministic_mock_reviewer.md +24 -0
  34. operational_evidence_plane-0.3.2/demo/src/oep_demo/resources/prompts/code_review_agent.md +30 -0
  35. operational_evidence_plane-0.3.2/demo/src/oep_demo/resources/state/replay_state_recipe.md +54 -0
  36. operational_evidence_plane-0.3.2/demo/src/oep_demo/runner.py +429 -0
  37. operational_evidence_plane-0.3.2/demo/state/.gitkeep +1 -0
  38. operational_evidence_plane-0.3.2/demo/state/replay_state_recipe.md +54 -0
  39. operational_evidence_plane-0.3.2/docs/architecture.md +118 -0
  40. operational_evidence_plane-0.3.2/docs/counterfactual_replay.md +68 -0
  41. operational_evidence_plane-0.3.2/docs/decision_log.md +73 -0
  42. operational_evidence_plane-0.3.2/docs/landscape.md +31 -0
  43. operational_evidence_plane-0.3.2/docs/oep_evidence_chain.svg +95 -0
  44. operational_evidence_plane-0.3.2/docs/public_claims.md +52 -0
  45. operational_evidence_plane-0.3.2/docs/quickstart_walkthrough.md +73 -0
  46. operational_evidence_plane-0.3.2/docs/record_keeping_reference.md +78 -0
  47. operational_evidence_plane-0.3.2/docs/release_checklist.md +99 -0
  48. operational_evidence_plane-0.3.2/docs/schema_migration_v0.3.md +89 -0
  49. operational_evidence_plane-0.3.2/docs/schema_reference.md +54 -0
  50. operational_evidence_plane-0.3.2/docs/schema_versioning.md +52 -0
  51. operational_evidence_plane-0.3.2/events/examples/code_review_agent_denied_step.v0.json +59 -0
  52. operational_evidence_plane-0.3.2/events/examples/code_review_agent_step.v0.json +58 -0
  53. operational_evidence_plane-0.3.2/events/examples/human_review/human_review_approved.v0.json +47 -0
  54. operational_evidence_plane-0.3.2/events/examples/human_review/human_review_rejected.v0.json +47 -0
  55. operational_evidence_plane-0.3.2/events/pyproject.toml +20 -0
  56. operational_evidence_plane-0.3.2/events/schema/agent_step_event.v0.schema.json +278 -0
  57. operational_evidence_plane-0.3.2/events/schema/human_review_event.v0.schema.json +186 -0
  58. operational_evidence_plane-0.3.2/events/scripts/check_agent_step_event.py +90 -0
  59. operational_evidence_plane-0.3.2/events/scripts/demo_human_review_reconstruct.py +287 -0
  60. operational_evidence_plane-0.3.2/events/src/oep_events/__init__.py +5 -0
  61. operational_evidence_plane-0.3.2/events/src/oep_events/paths.py +14 -0
  62. operational_evidence_plane-0.3.2/events/src/oep_events/py.typed +1 -0
  63. operational_evidence_plane-0.3.2/events/src/oep_events/resources/examples/code_review_agent_denied_step.v0.json +59 -0
  64. operational_evidence_plane-0.3.2/events/src/oep_events/resources/examples/code_review_agent_step.v0.json +58 -0
  65. operational_evidence_plane-0.3.2/events/src/oep_events/resources/schema/agent_step_event.v0.schema.json +278 -0
  66. operational_evidence_plane-0.3.2/integrations/__init__.py +1 -0
  67. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/README.md +109 -0
  68. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/code_review_agent.expected_feasibility.json +53 -0
  69. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/code_review_agent.jsonl +6 -0
  70. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/code_review_agent_denied.jsonl +5 -0
  71. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/mapping.v0.yaml +42 -0
  72. operational_evidence_plane-0.3.2/integrations/decision-trace-reconstructor/scripts/to_dtr_jsonl.py +253 -0
  73. operational_evidence_plane-0.3.2/integrations/langgraph/README.md +143 -0
  74. operational_evidence_plane-0.3.2/integrations/langgraph/__init__.py +1 -0
  75. operational_evidence_plane-0.3.2/integrations/langgraph/examples/code_review_langgraph_checkpoint.v0.json +179 -0
  76. operational_evidence_plane-0.3.2/integrations/langgraph/mapping.v0.yaml +264 -0
  77. operational_evidence_plane-0.3.2/integrations/langgraph/scripts/__init__.py +1 -0
  78. operational_evidence_plane-0.3.2/integrations/langgraph/scripts/to_oep_permission.py +186 -0
  79. operational_evidence_plane-0.3.2/integrations/mcp/README.md +76 -0
  80. operational_evidence_plane-0.3.2/integrations/mcp/__init__.py +1 -0
  81. operational_evidence_plane-0.3.2/integrations/mcp/examples/code_review_mcp_tool_call.v0.json +164 -0
  82. operational_evidence_plane-0.3.2/integrations/mcp/mapping.v0.yaml +110 -0
  83. operational_evidence_plane-0.3.2/integrations/mcp/scripts/__init__.py +1 -0
  84. operational_evidence_plane-0.3.2/integrations/mcp/scripts/to_oep_permission.py +155 -0
  85. operational_evidence_plane-0.3.2/manifest/examples/code_review_agent_release.v0.json +197 -0
  86. operational_evidence_plane-0.3.2/manifest/pyproject.toml +20 -0
  87. operational_evidence_plane-0.3.2/manifest/schema/release_manifest.v0.schema.json +303 -0
  88. operational_evidence_plane-0.3.2/manifest/scripts/check_release_manifest.py +21 -0
  89. operational_evidence_plane-0.3.2/manifest/scripts/update_manifest_digests.py +80 -0
  90. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/__init__.py +5 -0
  91. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/cli.py +95 -0
  92. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/paths.py +13 -0
  93. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/py.typed +1 -0
  94. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/resources/examples/code_review_agent_release.v0.json +197 -0
  95. operational_evidence_plane-0.3.2/manifest/src/oep_manifest/resources/schema/release_manifest.v0.schema.json +303 -0
  96. operational_evidence_plane-0.3.2/oep_verify/__init__.py +1 -0
  97. operational_evidence_plane-0.3.2/oep_verify/artifacts.py +387 -0
  98. operational_evidence_plane-0.3.2/oep_verify/cli.py +533 -0
  99. operational_evidence_plane-0.3.2/oep_verify/py.typed +1 -0
  100. operational_evidence_plane-0.3.2/oep_verify/resources.py +40 -0
  101. operational_evidence_plane-0.3.2/oep_verify/scenarios.py +122 -0
  102. operational_evidence_plane-0.3.2/oep_verify/verify_support.py +443 -0
  103. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/PKG-INFO +422 -0
  104. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/SOURCES.txt +205 -0
  105. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/dependency_links.txt +1 -0
  106. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/entry_points.txt +5 -0
  107. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/requires.txt +10 -0
  108. operational_evidence_plane-0.3.2/operational_evidence_plane.egg-info/top_level.txt +7 -0
  109. operational_evidence_plane-0.3.2/permissions/examples/code_review_tool_permission.v0.json +145 -0
  110. operational_evidence_plane-0.3.2/permissions/examples/code_review_tool_permission_denied.v0.json +57 -0
  111. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/approval_per_step_escalation.rego +112 -0
  112. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/approval_per_step_escalation_test.rego +80 -0
  113. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/budget_per_run_cap.rego +103 -0
  114. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/budget_per_run_cap_test.rego +52 -0
  115. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/compound_reliability_step_bound.rego +101 -0
  116. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/compound_reliability_step_bound_test.rego +48 -0
  117. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/oep.rego +26 -0
  118. operational_evidence_plane-0.3.2/permissions/policy/counterfactual/oep_test.rego +14 -0
  119. operational_evidence_plane-0.3.2/permissions/policy/input/code_review_read_diff.json +29 -0
  120. operational_evidence_plane-0.3.2/permissions/policy/input/code_review_write_diff.json +28 -0
  121. operational_evidence_plane-0.3.2/permissions/policy/tool_permissions.rego +38 -0
  122. operational_evidence_plane-0.3.2/permissions/policy/tool_permissions_test.rego +75 -0
  123. operational_evidence_plane-0.3.2/permissions/pyproject.toml +26 -0
  124. operational_evidence_plane-0.3.2/permissions/schema/tool_permission_packet.v0.schema.json +478 -0
  125. operational_evidence_plane-0.3.2/permissions/scripts/check_tool_permission_packet.py +90 -0
  126. operational_evidence_plane-0.3.2/permissions/scripts/update_permission_digests.py +120 -0
  127. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/__init__.py +61 -0
  128. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/paths.py +50 -0
  129. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/py.typed +1 -0
  130. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/__init__.py +187 -0
  131. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/opa.py +281 -0
  132. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/records.py +261 -0
  133. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/storage.py +359 -0
  134. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/surfaces.py +770 -0
  135. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/replay/wrapper.py +418 -0
  136. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/examples/code_review_tool_permission.v0.json +145 -0
  137. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/examples/code_review_tool_permission_denied.v0.json +57 -0
  138. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/counterfactual/approval_per_step_escalation.rego +112 -0
  139. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/counterfactual/budget_per_run_cap.rego +103 -0
  140. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/counterfactual/compound_reliability_step_bound.rego +101 -0
  141. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/counterfactual/oep.rego +26 -0
  142. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/input/code_review_read_diff.json +29 -0
  143. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/input/code_review_write_diff.json +28 -0
  144. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/tool_permissions.rego +38 -0
  145. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/policy/tool_permissions_test.rego +75 -0
  146. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/schema/counterfactual_replay.v0.schema.json +383 -0
  147. operational_evidence_plane-0.3.2/permissions/src/oep_permissions/resources/schema/tool_permission_packet.v0.schema.json +478 -0
  148. operational_evidence_plane-0.3.2/playbooks/examples/code_review_denied_reconstruction_packet.v0.json +95 -0
  149. operational_evidence_plane-0.3.2/playbooks/examples/code_review_reconstruction_packet.v0.json +96 -0
  150. operational_evidence_plane-0.3.2/playbooks/pyproject.toml +20 -0
  151. operational_evidence_plane-0.3.2/playbooks/rollback_reconstruction.md +71 -0
  152. operational_evidence_plane-0.3.2/playbooks/schema/reconstruction_packet.v0.schema.json +132 -0
  153. operational_evidence_plane-0.3.2/playbooks/scripts/check_reconstruction_packet.py +364 -0
  154. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/__init__.py +5 -0
  155. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/cli.py +47 -0
  156. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/paths.py +22 -0
  157. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/py.typed +1 -0
  158. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/resources/examples/code_review_denied_reconstruction_packet.v0.json +95 -0
  159. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/resources/examples/code_review_reconstruction_packet.v0.json +96 -0
  160. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/resources/rollback_reconstruction.md +71 -0
  161. operational_evidence_plane-0.3.2/playbooks/src/oep_playbooks/resources/schema/reconstruction_packet.v0.schema.json +132 -0
  162. operational_evidence_plane-0.3.2/pyproject.toml +132 -0
  163. operational_evidence_plane-0.3.2/replay/counterfactual_replay.v0.schema.json +383 -0
  164. operational_evidence_plane-0.3.2/replay/scripts/check_counterfactual_replay.py +288 -0
  165. operational_evidence_plane-0.3.2/replay/scripts/check_counterfactual_replay_schema.py +124 -0
  166. operational_evidence_plane-0.3.2/replay/scripts/check_v03_features.py +438 -0
  167. operational_evidence_plane-0.3.2/scripts/check_package_build.py +357 -0
  168. operational_evidence_plane-0.3.2/scripts/check_public_docs.py +84 -0
  169. operational_evidence_plane-0.3.2/scripts/sync_packaged_resources.py +59 -0
  170. operational_evidence_plane-0.3.2/setup.cfg +4 -0
  171. operational_evidence_plane-0.3.2/tests/conftest.py +16 -0
  172. operational_evidence_plane-0.3.2/tests/helpers.py +220 -0
  173. operational_evidence_plane-0.3.2/tests/test_counterfactual_replay.py +525 -0
  174. operational_evidence_plane-0.3.2/tests/test_demo_runner.py +277 -0
  175. operational_evidence_plane-0.3.2/tests/test_integrations_dtr.py +105 -0
  176. operational_evidence_plane-0.3.2/tests/test_packaging_digests.py +221 -0
  177. operational_evidence_plane-0.3.2/tests/test_replay_cli.py +302 -0
  178. operational_evidence_plane-0.3.2/tests/test_replay_opa.py +596 -0
  179. operational_evidence_plane-0.3.2/tests/test_replay_storage.py +258 -0
  180. operational_evidence_plane-0.3.2/tests/test_verify_scripts.py +258 -0
  181. operational_evidence_plane-0.3.2/tests/test_verify_support.py +244 -0
  182. operational_evidence_plane-0.3.2/traces/examples/code_review_agent_denied_eval.v0.json +41 -0
  183. operational_evidence_plane-0.3.2/traces/examples/code_review_agent_denied_trace.v0.json +54 -0
  184. operational_evidence_plane-0.3.2/traces/examples/code_review_agent_eval.v0.json +41 -0
  185. operational_evidence_plane-0.3.2/traces/examples/code_review_agent_trace.v0.json +48 -0
  186. operational_evidence_plane-0.3.2/traces/pyproject.toml +20 -0
  187. operational_evidence_plane-0.3.2/traces/schema/eval_result.v0.schema.json +112 -0
  188. operational_evidence_plane-0.3.2/traces/schema/operational_trace.v0.schema.json +173 -0
  189. operational_evidence_plane-0.3.2/traces/scripts/check_eval_result.py +115 -0
  190. operational_evidence_plane-0.3.2/traces/scripts/check_operational_trace.py +140 -0
  191. operational_evidence_plane-0.3.2/traces/src/oep_traces/__init__.py +19 -0
  192. operational_evidence_plane-0.3.2/traces/src/oep_traces/paths.py +28 -0
  193. operational_evidence_plane-0.3.2/traces/src/oep_traces/py.typed +1 -0
  194. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/examples/code_review_agent_denied_eval.v0.json +41 -0
  195. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/examples/code_review_agent_denied_trace.v0.json +54 -0
  196. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/examples/code_review_agent_eval.v0.json +41 -0
  197. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/examples/code_review_agent_trace.v0.json +48 -0
  198. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/schema/eval_result.v0.schema.json +112 -0
  199. operational_evidence_plane-0.3.2/traces/src/oep_traces/resources/schema/operational_trace.v0.schema.json +173 -0
  200. operational_evidence_plane-0.3.2/translations/bedrock/README.md +29 -0
  201. operational_evidence_plane-0.3.2/translations/bedrock/examples/code_review_bedrock_translation.v0.json +103 -0
  202. operational_evidence_plane-0.3.2/translations/bedrock/layer_mapping.md +16 -0
  203. operational_evidence_plane-0.3.2/translations/bedrock/runtime_mapping.md +24 -0
  204. operational_evidence_plane-0.3.2/translations/bedrock/schema/bedrock_translation.v0.schema.json +47 -0
  205. operational_evidence_plane-0.3.2/translations/bedrock/scripts/check_bedrock_translation.py +79 -0
  206. operational_evidence_plane-0.3.2/translations/bedrock/source_notes.md +25 -0
  207. operational_evidence_plane-0.3.2/uv.lock +767 -0
@@ -0,0 +1 @@
1
+ * text=auto eol=lf
@@ -0,0 +1,35 @@
1
+ name: Bug report
2
+ description: Report a reproducible issue in local verification or examples.
3
+ title: "[Bug]: "
4
+ labels: ["bug"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ This repository is a research preview reference implementation, not a production support channel.
10
+ - type: textarea
11
+ id: what-happened
12
+ attributes:
13
+ label: What happened?
14
+ description: Describe the observed behavior and expected behavior.
15
+ validations:
16
+ required: true
17
+ - type: textarea
18
+ id: reproduce
19
+ attributes:
20
+ label: Reproduction steps
21
+ description: Include exact commands and relevant output.
22
+ placeholder: |
23
+ 1. Run `make clean-state`
24
+ 2. Run `make verify`
25
+ 3. Observe ...
26
+ validations:
27
+ required: true
28
+ - type: input
29
+ id: environment
30
+ attributes:
31
+ label: Environment
32
+ description: OS, Python version, OPA version.
33
+ placeholder: "macOS 15, Python 3.11, OPA 1.x"
34
+ validations:
35
+ required: true
@@ -0,0 +1,5 @@
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: Security report
4
+ url: https://github.com/agent-runtime-evidence/operational-evidence-plane/security/advisories/new
5
+ about: Use GitHub Security Advisories for vulnerability reports.
@@ -0,0 +1,21 @@
1
+ name: Documentation question
2
+ description: Ask for clarification about the reference implementation or docs.
3
+ title: "[Docs]: "
4
+ labels: ["documentation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Use this for documentation gaps, unclear examples, or claim-boundary questions.
10
+ - type: textarea
11
+ id: question
12
+ attributes:
13
+ label: Question
14
+ description: What part of the repository needs clarification?
15
+ validations:
16
+ required: true
17
+ - type: input
18
+ id: page
19
+ attributes:
20
+ label: Related file or page
21
+ placeholder: "README.md, docs/architecture.md, manifest/examples/..."
@@ -0,0 +1,10 @@
1
+ version: 2
2
+ updates:
3
+ - package-ecosystem: "github-actions"
4
+ directory: "/"
5
+ schedule:
6
+ interval: "weekly"
7
+ - package-ecosystem: "uv"
8
+ directory: "/"
9
+ schedule:
10
+ interval: "weekly"
@@ -0,0 +1,27 @@
1
+ name: Release
2
+
3
+ on:
4
+ push:
5
+ tags: ["v*"]
6
+ workflow_dispatch:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ build:
13
+ name: Build & Publish
14
+ runs-on: ubuntu-latest
15
+ permissions:
16
+ id-token: write
17
+ steps:
18
+ - uses: actions/checkout@v6
19
+ - uses: actions/setup-python@v6
20
+ with:
21
+ python-version: "3.11"
22
+ - name: Install build tools
23
+ run: pip install build
24
+ - name: Build sdist and wheel
25
+ run: python -m build
26
+ - name: Publish to PyPI
27
+ uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,74 @@
1
+ name: Verify
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ concurrency:
10
+ group: verify-${{ github.ref }}
11
+ cancel-in-progress: ${{ github.event_name == 'pull_request' }}
12
+
13
+ jobs:
14
+ verify:
15
+ name: Python ${{ matrix.python-version }}
16
+ runs-on: ubuntu-latest
17
+ strategy:
18
+ fail-fast: false
19
+ matrix:
20
+ python-version: ["3.11", "3.12", "3.13", "3.14"]
21
+ steps:
22
+ - name: Check out repository
23
+ uses: actions/checkout@v4
24
+
25
+ - name: Set up Python
26
+ uses: actions/setup-python@v5
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+
30
+ - name: Cache uv downloads
31
+ uses: actions/cache@v4
32
+ with:
33
+ path: ~/.cache/uv
34
+ key: uv-${{ runner.os }}-py${{ matrix.python-version }}-${{ hashFiles('uv.lock') }}
35
+ restore-keys: |
36
+ uv-${{ runner.os }}-py${{ matrix.python-version }}-
37
+
38
+ - name: Install uv and locked dependencies
39
+ run: |
40
+ python -VV
41
+ python -m pip install --upgrade pip
42
+ python -m pip install uv==0.11.10
43
+ uv sync --extra dev --locked --python "$(command -v python)"
44
+
45
+ - name: Install OPA
46
+ run: |
47
+ OPA_VERSION=v1.7.1
48
+ OPA_SHA256=86cf5e8d189f5d56cc2b05a7920b557c61338aa088334ad2fb3f6de0ec931f04
49
+ curl -fsSL -o opa "https://github.com/open-policy-agent/opa/releases/download/${OPA_VERSION}/opa_linux_amd64_static"
50
+ echo "${OPA_SHA256} opa" | sha256sum -c -
51
+ chmod +x opa
52
+ sudo mv opa /usr/local/bin/opa
53
+ opa version
54
+
55
+ - name: Run lint
56
+ run: make lint PYTHON=.venv/bin/python
57
+
58
+ - name: Run typecheck
59
+ run: make typecheck PYTHON=.venv/bin/python
60
+
61
+ - name: Check manifest digests
62
+ run: make check-digests PYTHON=.venv/bin/python
63
+
64
+ - name: Run public documentation checks
65
+ run: make check-docs PYTHON=.venv/bin/python
66
+
67
+ - name: Run verification
68
+ run: make verify PYTHON=.venv/bin/python
69
+
70
+ - name: Run smoke tests
71
+ run: make test PYTHON=.venv/bin/python
72
+
73
+ - name: Run coverage gate
74
+ run: make coverage PYTHON=.venv/bin/python
@@ -0,0 +1,293 @@
1
+ # Changelog
2
+
3
+ All notable changes to this reference implementation are documented here.
4
+
5
+ ## v0.3.2 - 2026-06-13
6
+
7
+ ### Added
8
+
9
+ - Added `make check-lock`, a lockfile freshness gate wired into `make verify`
10
+ (soft warning without `uv` locally; CI enforces via `uv sync --locked`),
11
+ with `uv` carried as a locked dev dependency.
12
+ - Added a release workflow publishing the distribution to PyPI via trusted
13
+ publishing on tag push; this release is the first PyPI publication.
14
+
15
+ ### Changed
16
+
17
+ - Adopted `ruff format` across the codebase and enforce it in `make lint`
18
+ (and therefore in the pre-commit hooks).
19
+ - Recomputed the release manifest workflow digest after the formatting pass
20
+ touched bound demo sources; `release_manifest_version` is updated in the
21
+ permission packet examples, the MCP and LangGraph adapter examples, and
22
+ the packaged mirrors.
23
+
24
+ ### Notes
25
+
26
+ - public API unchanged
27
+ - schema contract unchanged
28
+ - Manifest digests changed because bound workflow-source bytes changed;
29
+ deterministic replay output remains byte-identical.
30
+
31
+ ## v0.3.1 - 2026-06-12
32
+
33
+ ### Added
34
+
35
+ - Added `make validate-human-review` to the `verify` chain: the
36
+ human-review reconstruct and tamper-evidence demo now regenerates the
37
+ committed `human_review_event.v0` examples deterministically and gates
38
+ every verification run.
39
+ - Added a shared packaged-resource loader (`oep_verify.resources`) and
40
+ shared script helpers in `oep_verify.verify_support` (`stable_json`,
41
+ `eval_opa_decision`, `required_field`, `load_json_object_or_exit`,
42
+ `read_only_sqlite_connection`), replacing duplicated copies across the
43
+ per-package paths modules and validation scripts.
44
+ - Added pre-commit hooks bound to `make lint` and `make typecheck`, a
45
+ Dependabot configuration for uv and GitHub Actions, and CI concurrency
46
+ groups plus uv download caching.
47
+
48
+ ### Changed
49
+
50
+ - Split the `oep_permissions.replay` monolith into a five-module package
51
+ (`records`, `storage`, `wrapper`, `opa`, `surfaces`) behind unchanged
52
+ import paths; all public names re-export from `oep_permissions.replay`.
53
+ - Collapsed the eight v0.3 feature gates into one `validate-v03-features`
54
+ invocation in `verify`; the narrow targets remain as focused aliases.
55
+ - Derived the `coverage` target from the same Makefile validation targets
56
+ as `verify` through a `PY_RUN` runner override, removing the manually
57
+ duplicated coverage command list.
58
+ - Hoisted repeated identifier and digest regex patterns into per-schema
59
+ `$defs` in the agent step event, human review event, tool permission
60
+ packet, and operational trace schemas; validation semantics are
61
+ unchanged.
62
+ - Recomputed the release manifest workflow and tool-schema digests after
63
+ the bound workflow-source and schema bytes changed; updated
64
+ `release_manifest_version` in the permission packet examples, the MCP
65
+ and LangGraph adapter examples, and the packaged mirrors.
66
+
67
+ ### Docs
68
+
69
+ - Restructured the README into a shorter landing page; moved the
70
+ counterfactual replay deep dive, the record-keeping reference tables,
71
+ and the landscape/prior-art sections under `docs/`.
72
+ - Added `docs/quickstart_walkthrough.md`, `docs/schema_reference.md`, and
73
+ `docs/schema_versioning.md`; linked the release checklist from the
74
+ README docs index.
75
+
76
+ ### Quality
77
+
78
+ - Split the two test monoliths into nine domain modules with shared
79
+ fixtures and helpers (`tests/conftest.py`, `tests/helpers.py`);
80
+ parametrized the byte-identical counterfactual replay check.
81
+ - Kept the coverage gate at 95% and byte-identical replay determinism
82
+ green across the refactor.
83
+
84
+ ### Notes
85
+
86
+ - public API unchanged
87
+ - schema contract unchanged
88
+ - Manifest digests changed because bound workflow-source and schema bytes
89
+ changed; deterministic replay output remains byte-identical. This
90
+ release remains a bounded reference implementation: not a
91
+ production-grade replay engine, not a compliance certification, not a
92
+ vendor replacement, and not legal or regulatory adequacy by itself.
93
+ - Version metadata (the `pyproject.toml` bump, this changelog entry, and
94
+ the citation date) landed in a follow-up commit after the v0.3.1 tag;
95
+ the tagged tree archived under DOI 10.5281/zenodo.20667482
96
+ self-identifies as 0.3.0 in `pyproject.toml`.
97
+
98
+ ## v0.3.0 - 2026-05-24
99
+
100
+ ### Added
101
+
102
+ - Added the counterfactual policy replay primitive: given a stored
103
+ decision record from the v0.2 evidence chain, substitute a different
104
+ policy bundle version retroactively and re-derive the discrete OPA
105
+ decision that would have been made under the substituted policy.
106
+ - Added `OEP_REPLAY_MODE=counterfactual` and extended `oep replay` with
107
+ `--counterfactual`, `--policy-bundle`, `--output-format`,
108
+ `--replay-timestamp-utc`, and `--strip-exclusions` options.
109
+ - Added the counterfactual replay output schema
110
+ (`replay/counterfactual_replay.v0.schema.json`) and packaged schema
111
+ resource.
112
+ - Added optional `nd_builtin_cache` capture to the tool permission
113
+ packet schema for deterministic injection of non-deterministic OPA
114
+ builtin outputs during counterfactual replay.
115
+ - Added three counterfactual demos over the existing deterministic
116
+ code-review fixture: compound reliability, budget-per-run cross-over,
117
+ and approval-per-step escalation.
118
+ - Added cross-provider drift counterfactual replay:
119
+ `oep replay --substitute-model <provider:model_version>`. Output is
120
+ labelled as evaluative replay (`replay_class: evaluative`); both
121
+ the recorded estimate and the actual are retained.
122
+ - Added cost-bounded counterfactual replay:
123
+ `oep replay --substitute-budget <policy>` re-evaluates each step
124
+ under a substituted budget policy and reports the first step that
125
+ would have been blocked. Added `per_step_cost_usd`,
126
+ `per_step_cost_tokens`, `budget_cap_active`, and
127
+ `budget_cap_source` fields.
128
+ - Added the reserve-commit-release cost-reservation lifecycle via
129
+ `oep reserve`, and the pre-session projected-cost gate via
130
+ `oep project --approve`. Added `budget_reservation_id`,
131
+ `reservation_estimated_cost_usd`,
132
+ `reservation_committed_cost_usd`,
133
+ `reservation_excess_released_usd`, `reservation_outcome`, and
134
+ `pre_session_projection_event` fields. The projection path emits
135
+ the evaluative-replay marker.
136
+ - Added the 5-surface drift attribution diff and historical replay:
137
+ `oep diff <decision_id_a> <decision_id_b> --surface
138
+ model,policy,prompt,tool,corpus`, and extended substitution via
139
+ `oep replay <id> --substitute
140
+ model=...,policy=...,prompt=...,tool=...,corpus=...`. Added
141
+ per-surface `before_version`, `after_version`, `change_class`,
142
+ and `attribution_confidence` fields.
143
+ - Added cache-substitution counterfactual replay and cache-provenance
144
+ fields: `oep replay <id> --substitute-cache-policy
145
+ <staleness|embedding_version>` plus `cache_hit_id`,
146
+ `cache_version`, `embedding_model_version`, `staleness_flag`,
147
+ `cache_correctness_status`, `similarity_score`, and
148
+ `invalidation_event_id` fields. Staleness-policy rejection is
149
+ deterministic; cache→fresh-call substitution emits the
150
+ evaluative-replay marker.
151
+ - Added the ID-JAG agent-identity integration: an agent-identity
152
+ object bound into the approval-capture record alongside scoped
153
+ credential lifetime. ID-JAG is cited as the IETF draft
154
+ `draft-ietf-oauth-identity-assertion-authz-grant`, not as an
155
+ adopted standard, and the MCP basic specification is NOT claimed
156
+ to reference ID-JAG.
157
+ - Added a unified `decision_id` composite that joins policy,
158
+ permission, cost, 5-surface drift, cache, and identity sub-objects
159
+ into a single counterfactually replayable record, together with a
160
+ composite integration test that runs a composed substitution
161
+ (policy + budget + model) over a fixture decision record carrying
162
+ all six sub-objects.
163
+ - Bumped schema to `schema_version: "0.3"` with additive, optional
164
+ field additions across cost, cache, identity, and the 5-surface
165
+ drift namespace. v0.2 records continue to validate and replay
166
+ against the v0.3 schema; absent surfaces are reported as
167
+ "not recorded" rather than as errors. Migration documented in
168
+ `docs/schema_migration_v0.3.md`.
169
+ - Added `make validate-counterfactual-replay`,
170
+ `make check-replay-determinism`, and
171
+ `make validate-counterfactual-schema`, wired into `make verify`.
172
+ - Added `replay/scripts/check_v03_features.py` and the
173
+ `validate-5surface-diff`, `validate-cost-counterfactual`,
174
+ `validate-reserve-commit-release`,
175
+ `validate-cross-provider-drift`, `validate-cache-substitution`,
176
+ `validate-identity-binding`, `validate-composite`, and
177
+ `validate-backward-compat` targets, all wired into `make verify`.
178
+ - Extended pytest coverage to policy substitution, CLI counterfactual
179
+ mode, non-deterministic builtin cache injection, all three demos,
180
+ schema validation, cross-run byte identity, denied-path replay
181
+ state discipline, the five v0.3 feature checks (reserve,
182
+ cross-provider, cache, identity, composite), and the
183
+ backward-compat regression of v0.2 fixtures against the v0.3
184
+ schema.
185
+ - Added the v0.3 documentation block: EU AI Act Articles
186
+ 19 / 26(6) / 50 / 73 mapping (education-only; no compliance
187
+ claim); AAGATE (arXiv:2510.25863) framed as complementary, not
188
+ competitor; MCP supply-chain non-claim statement; Replay
189
+ Divergence Problem positioning hook (SDB arXiv:2605.20173);
190
+ Lusser's Law reliability-arithmetic anchor; NIST AI RMF "1.0
191
+ current (under revision); 1.1 via addenda / profiles" wording.
192
+
193
+ ### Notes
194
+
195
+ - Counterfactual replay across the five substitution axes (policy,
196
+ model, budget, cache, identity) is positioned as one inspectable
197
+ demonstration that the v0.2 evidence chain composes into a
198
+ unified, counterfactually replayable decision record. It is not a
199
+ production-grade replay engine, not a compliance certification,
200
+ not a substitute for vendor authorization-replay or observability
201
+ products, and does not constitute legal or regulatory adequacy by
202
+ itself.
203
+ - The non-determinism boundary is honest: policy / permission /
204
+ budget substitution and 5-surface config diff produce
205
+ deterministic "would / would-not have been allowed" outputs;
206
+ cross-provider model substitution, cache→fresh-call substitution,
207
+ and pre-session cost projection emit a `replay_class: evaluative`
208
+ marker and record the substitution as a counterfactual estimate,
209
+ not as a definitive re-derivation. This ties to the Replay
210
+ Divergence Problem positioning hook (SDB arXiv:2605.20173).
211
+ - The closest commercial precedents are Styra DAS log-replay and
212
+ Permit.io Audit Log Replay (policy domain); RunCycles
213
+ (https://runcycles.io, Apache 2.0) ships the closest
214
+ reserve-commit-release transactional cost model; SAFE-CACHE
215
+ (PMC12894985), Krites / AVSC (arXiv:2602.13165), and the
216
+ NDSS 2026 cache-poisoning research provide the academic substrate
217
+ for the cache-correctness primitives. OEP keeps the combined
218
+ implementation open-source, vendor-neutral, and native to agent
219
+ runtime decision records.
220
+ - Per-claim caveats applied: cost incidents (the $47K agent loop,
221
+ the $437 overnight loop, the Particula 847-step incident) are
222
+ cited as practitioner-reported, single-source; OTel GenAI
223
+ crypto-identity fields (`agent.trust_score`,
224
+ `agent.drift_score`, `agent.scan_verdict`, Ed25519 as an OTel
225
+ standard), MLflow GEPA / MIPRO / MemAlign tuning, and AWS
226
+ AgentCore "graduated budget gates 50% / 75% / 90%" as shipped
227
+ vendor features are NOT cited in this release.
228
+ - v0.2 records remain valid and replayable against the v0.3
229
+ schema.
230
+
231
+ ## v0.2.0 - 2026-05-15
232
+
233
+ ### Added
234
+
235
+ - Added v0.2 replayable permission trace fields to the OPA-backed tool
236
+ permission packet schema (`scoped_credential_lifetime`,
237
+ `approval_capture`, `policy_bundle_version`,
238
+ `release_manifest_version`, `model_alias`, `resolved_model_version`,
239
+ `model_provider`). Fields are additive and optional so v0.1 records
240
+ continue to validate; the deterministic code-review demo populates
241
+ them.
242
+ - Added the `oep` console script with a `replay <decision_id>`
243
+ subcommand. The subcommand is a read-only reader over the existing
244
+ SQLite replay store and reconstructs the recorded permission trace
245
+ for a decision id (the `pder_*` packet identifier). It does not
246
+ make live model or vendor API calls.
247
+ - Added an illustrative Model Context Protocol (MCP) adapter under
248
+ `integrations/mcp/` with a mapping reference, synthetic envelope,
249
+ and standalone projection script that translates an MCP
250
+ `tools/call` envelope into an OEP permission packet.
251
+ - Added a README record-keeping reference table mapping OEP record
252
+ fields to EU AI Act articles (Regulation (EU) 2024/1689) and
253
+ NIST AI RMF 1.0 functions (GOVERN / MAP / MEASURE / MANAGE). The
254
+ table is documentation and education only; it does not create a
255
+ compliance or audit claim.
256
+ - Added `make validate-mcp` and `make validate-replay-cli` targets,
257
+ wired into `make verify`.
258
+
259
+ ### Notes
260
+
261
+ - This release does not change the previous boundary statements. It is
262
+ still not ready for production use, not standardization, not proof of
263
+ compliance, and not a vendor replacement.
264
+ - v0.1 records remain valid against the extended permission packet
265
+ schema. The new fields are nullable / omittable for backward
266
+ compatibility.
267
+
268
+ ## v0.1.0 - 2026-05-06
269
+
270
+ Initial public release candidate for the Operational Evidence Plane reference implementation.
271
+
272
+ ### Added
273
+
274
+ - Added a release manifest schema and code-review-agent release example.
275
+ - Added resolved model, prompt, tool schema, policy, workflow, rollout, eval, and data-state layer bindings with content digests.
276
+ - Added an agent-step event profile joined to the release manifest.
277
+ - Added an OPA-backed tool permission packet and executable policy check.
278
+ - Added an operational trace bundle and deterministic eval result.
279
+ - Added a deterministic code-review demo that regenerates local SQLite replay state.
280
+ - Added a reconstruction packet playbook over the full evidence chain.
281
+ - Added Decision Trace Reconstructor JSONL projection, mapping config, and pinned feasibility output for the allowed scenario.
282
+ - Added top-level `make regen-dtr-jsonl` and optional `make validate-dtr` targets.
283
+ - Added inspectability docs, public-claim guardrails, and release checklist.
284
+ - Added optional Bedrock translation notes and mapping data as post-core documentation.
285
+ - Added root sdist/wheel release guardrails for canonical resources, package resources, tests, CI metadata, and generated-artifact exclusions.
286
+ - Added GitHub Actions verification workflow, SECURITY.md, CITATION.cff, CONTRIBUTING.md, and pytest smoke tests for public-readiness hygiene.
287
+
288
+ ### Notes
289
+
290
+ - Public API unchanged.
291
+ - Schema contract introduced.
292
+ - Public package boundary is the root `operational-evidence-plane` distribution; workspace directories are source and development boundaries for this release line.
293
+ - This release candidate is not ready for production use and does not create compliance readiness, legal-audit sufficiency, or standardization status.
@@ -0,0 +1,21 @@
1
+ cff-version: 1.2.0
2
+ message: "If you use this software, please cite it as below."
3
+ type: software
4
+ title: "Operational Evidence Plane"
5
+ authors:
6
+ - family-names: "Solozobov"
7
+ given-names: "Oleg"
8
+ version: "0.3.2"
9
+ date-released: "2026-06-13"
10
+ doi: "10.5281/zenodo.20051036"
11
+ license: "Apache-2.0"
12
+ url: "https://github.com/agent-runtime-evidence/operational-evidence-plane"
13
+ repository-code: "https://github.com/agent-runtime-evidence/operational-evidence-plane"
14
+ abstract: "Vendor-neutral reference implementation for reconstructable agent runtime evidence across release manifests, runtime events, permissioned tool calls, traces, replay state, eval results, and reconstruction packets."
15
+ keywords:
16
+ - agent runtime
17
+ - operational evidence
18
+ - release manifest
19
+ - replay
20
+ - OPA
21
+ - SQLite
@@ -0,0 +1,76 @@
1
+ # Contributing
2
+
3
+ Operational Evidence Plane is a reference implementation for inspectable agent
4
+ runtime evidence. Contributions should keep the repository small,
5
+ deterministic, and vendor-neutral.
6
+
7
+ ## Local Checks
8
+
9
+ Run the verification chain before opening a pull request:
10
+
11
+ ```bash
12
+ make clean-state
13
+ make verify
14
+ make test
15
+ make coverage
16
+ ```
17
+
18
+ Run focused checks while iterating:
19
+
20
+ ```bash
21
+ make lint
22
+ make typecheck
23
+ make check-lock
24
+ make test-policy
25
+ make check-digests
26
+ make check-dtr-jsonl
27
+ make build-check
28
+ ```
29
+
30
+ `make verify` regenerates SQLite replay state and checks cross-artifact joins.
31
+ Generated state, coverage files, build outputs, DTR fragments, and report
32
+ directories should not be committed.
33
+
34
+ Optional: install the pre-commit hooks so `make lint` and `make typecheck`
35
+ run automatically before each commit. The hooks call the Makefile targets, so
36
+ they always match CI. Set up the locked `uv` environment first (see
37
+ `README.md` Quickstart) — the hooks run the project's pinned toolchain:
38
+
39
+ ```bash
40
+ pre-commit install
41
+ ```
42
+
43
+ ## Artifact Updates
44
+
45
+ When changing any file referenced by a resolved release-manifest binding, run:
46
+
47
+ ```bash
48
+ make update-digests
49
+ ```
50
+
51
+ When changing canonical resources that are mirrored into packages, run:
52
+
53
+ ```bash
54
+ make sync-resources
55
+ make build-check
56
+ ```
57
+
58
+ The sync target copies canonical artifacts into package resource directories.
59
+ The package build check fails if canonical artifacts and packaged resources
60
+ drift, if required source files are missing from the sdist, or if generated
61
+ artifacts enter the release package.
62
+
63
+ ## Claim Boundaries
64
+
65
+ Keep public wording aligned with `docs/public_claims.md`.
66
+
67
+ Avoid claims of production readiness, audit readiness, proof of compliance,
68
+ standardization, vendor replacement, agent-framework scope, or model-quality
69
+ benchmarking.
70
+
71
+ ## Scope
72
+
73
+ The public distribution is the root `operational-evidence-plane` package.
74
+ Workspace member directories are source and development boundaries for the
75
+ reference implementation; they are not independently published packages for the
76
+ current release line.