drawbore 0.0.1.dev1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (342) hide show
  1. drawbore-0.2.0/.github/workflows/ci.yml +15 -0
  2. drawbore-0.2.0/.github/workflows/release.yml +33 -0
  3. drawbore-0.2.0/.gitignore +20 -0
  4. drawbore-0.2.0/CHANGELOG.md +207 -0
  5. drawbore-0.2.0/LICENSE +21 -0
  6. drawbore-0.2.0/PKG-INFO +104 -0
  7. drawbore-0.2.0/README.md +73 -0
  8. drawbore-0.2.0/context7.json +17 -0
  9. drawbore-0.2.0/docs/agents/common-mistakes.mdx +33 -0
  10. drawbore-0.2.0/docs/agents/context7.mdx +26 -0
  11. drawbore-0.2.0/docs/agents/index.mdx +23 -0
  12. drawbore-0.2.0/docs/agents/rules.mdx +27 -0
  13. drawbore-0.2.0/docs/agents/testing-drawbore.mdx +31 -0
  14. drawbore-0.2.0/docs/examples/index.mdx +28 -0
  15. drawbore-0.2.0/docs/examples/live-openai-smoke.mdx +34 -0
  16. drawbore-0.2.0/docs/examples/number-chain.mdx +36 -0
  17. drawbore-0.2.0/docs/examples/remittance-validation.mdx +42 -0
  18. drawbore-0.2.0/docs/examples/risk-branch.mdx +50 -0
  19. drawbore-0.2.0/docs/examples/tiny-pipeline.mdx +28 -0
  20. drawbore-0.2.0/docs/guide/agentic-tool-loop.mdx +58 -0
  21. drawbore-0.2.0/docs/guide/authority-regression.mdx +64 -0
  22. drawbore-0.2.0/docs/guide/durable-resume.mdx +96 -0
  23. drawbore-0.2.0/docs/guide/escalation.mdx +73 -0
  24. drawbore-0.2.0/docs/guide/evidence-compression.mdx +73 -0
  25. drawbore-0.2.0/docs/guide/identity-versioning.mdx +81 -0
  26. drawbore-0.2.0/docs/guide/json-config.mdx +56 -0
  27. drawbore-0.2.0/docs/guide/local-testing.mdx +60 -0
  28. drawbore-0.2.0/docs/guide/mcp-tools.mdx +134 -0
  29. drawbore-0.2.0/docs/guide/model-backed-agents.mdx +80 -0
  30. drawbore-0.2.0/docs/guide/observability-audit.mdx +72 -0
  31. drawbore-0.2.0/docs/guide/pipelines.mdx +81 -0
  32. drawbore-0.2.0/docs/guide/production-llm-gateway.mdx +81 -0
  33. drawbore-0.2.0/docs/guide/reliability.mdx +51 -0
  34. drawbore-0.2.0/docs/guide/safety-gauntlet.mdx +35 -0
  35. drawbore-0.2.0/docs/guide/taint-and-trust.mdx +47 -0
  36. drawbore-0.2.0/docs/guide/tools.mdx +66 -0
  37. drawbore-0.2.0/docs/index.mdx +47 -0
  38. drawbore-0.2.0/docs/quickstart.mdx +57 -0
  39. drawbore-0.2.0/docs.json +63 -0
  40. drawbore-0.2.0/examples/authority_regression_ci.py +28 -0
  41. drawbore-0.2.0/examples/live_openai_smoke/README.md +22 -0
  42. drawbore-0.2.0/examples/live_openai_smoke/pipeline.py +95 -0
  43. drawbore-0.2.0/examples/live_openai_smoke/test_live_openai_smoke.py +36 -0
  44. drawbore-0.2.0/examples/number_chain/README.md +27 -0
  45. drawbore-0.2.0/examples/number_chain/pipeline.py +132 -0
  46. drawbore-0.2.0/examples/number_chain/test_number_chain.py +46 -0
  47. drawbore-0.2.0/examples/remittance_validation/README.md +28 -0
  48. drawbore-0.2.0/examples/remittance_validation/pipeline.py +119 -0
  49. drawbore-0.2.0/examples/remittance_validation/test_remittance_validation.py +58 -0
  50. drawbore-0.2.0/examples/risk_branch/README.md +28 -0
  51. drawbore-0.2.0/examples/risk_branch/pipeline.py +45 -0
  52. drawbore-0.2.0/examples/risk_branch/test_risk_branch.py +39 -0
  53. drawbore-0.2.0/examples/safety_gauntlet.py +36 -0
  54. drawbore-0.2.0/examples/taint_breaker.py +55 -0
  55. drawbore-0.2.0/examples/tiny_pipeline/README.md +17 -0
  56. drawbore-0.2.0/examples/tiny_pipeline/pipeline.py +44 -0
  57. drawbore-0.2.0/examples/tiny_pipeline/test_tiny_pipeline.py +24 -0
  58. drawbore-0.2.0/pyproject.toml +68 -0
  59. drawbore-0.2.0/src/drawbore/__init__.py +9 -0
  60. drawbore-0.2.0/src/drawbore/_canon.py +34 -0
  61. drawbore-0.2.0/src/drawbore/agent/__init__.py +6 -0
  62. drawbore-0.2.0/src/drawbore/agent/decorator.py +67 -0
  63. drawbore-0.2.0/src/drawbore/agent/spec.py +32 -0
  64. drawbore-0.2.0/src/drawbore/audit/__init__.py +15 -0
  65. drawbore-0.2.0/src/drawbore/audit/recorder.py +120 -0
  66. drawbore-0.2.0/src/drawbore/audit/records.py +102 -0
  67. drawbore-0.2.0/src/drawbore/audit/sink.py +43 -0
  68. drawbore-0.2.0/src/drawbore/config/__init__.py +36 -0
  69. drawbore-0.2.0/src/drawbore/config/authority.py +138 -0
  70. drawbore-0.2.0/src/drawbore/config/catalog.py +91 -0
  71. drawbore-0.2.0/src/drawbore/config/errors.py +29 -0
  72. drawbore-0.2.0/src/drawbore/config/fingerprint.py +38 -0
  73. drawbore-0.2.0/src/drawbore/config/models.py +224 -0
  74. drawbore-0.2.0/src/drawbore/config/resolver.py +281 -0
  75. drawbore-0.2.0/src/drawbore/config/serialization.py +190 -0
  76. drawbore-0.2.0/src/drawbore/context/__init__.py +6 -0
  77. drawbore-0.2.0/src/drawbore/context/build.py +49 -0
  78. drawbore-0.2.0/src/drawbore/context/sanitize.py +48 -0
  79. drawbore-0.2.0/src/drawbore/errors/__init__.py +5 -0
  80. drawbore-0.2.0/src/drawbore/errors/errors.py +109 -0
  81. drawbore-0.2.0/src/drawbore/escalation/__init__.py +15 -0
  82. drawbore-0.2.0/src/drawbore/escalation/confidence.py +15 -0
  83. drawbore-0.2.0/src/drawbore/escalation/dispatch.py +30 -0
  84. drawbore-0.2.0/src/drawbore/escalation/package.py +77 -0
  85. drawbore-0.2.0/src/drawbore/escalation/policy.py +23 -0
  86. drawbore-0.2.0/src/drawbore/evidence/__init__.py +38 -0
  87. drawbore-0.2.0/src/drawbore/evidence/compress.py +116 -0
  88. drawbore-0.2.0/src/drawbore/evidence/errors.py +32 -0
  89. drawbore-0.2.0/src/drawbore/evidence/policy.py +30 -0
  90. drawbore-0.2.0/src/drawbore/evidence/records.py +61 -0
  91. drawbore-0.2.0/src/drawbore/evidence/retrieval.py +77 -0
  92. drawbore-0.2.0/src/drawbore/evidence/store.py +149 -0
  93. drawbore-0.2.0/src/drawbore/evidence/tokens.py +19 -0
  94. drawbore-0.2.0/src/drawbore/evidence/transforms.py +226 -0
  95. drawbore-0.2.0/src/drawbore/identity/__init__.py +14 -0
  96. drawbore-0.2.0/src/drawbore/identity/errors.py +10 -0
  97. drawbore-0.2.0/src/drawbore/identity/identity.py +68 -0
  98. drawbore-0.2.0/src/drawbore/identity/registry.py +162 -0
  99. drawbore-0.2.0/src/drawbore/llm/__init__.py +50 -0
  100. drawbore-0.2.0/src/drawbore/llm/attempts.py +60 -0
  101. drawbore-0.2.0/src/drawbore/llm/build.py +40 -0
  102. drawbore-0.2.0/src/drawbore/llm/chain.py +28 -0
  103. drawbore-0.2.0/src/drawbore/llm/classify.py +69 -0
  104. drawbore-0.2.0/src/drawbore/llm/config.py +110 -0
  105. drawbore-0.2.0/src/drawbore/llm/credentials.py +30 -0
  106. drawbore-0.2.0/src/drawbore/llm/errors.py +37 -0
  107. drawbore-0.2.0/src/drawbore/llm/gateway.py +72 -0
  108. drawbore-0.2.0/src/drawbore/llm/production.py +74 -0
  109. drawbore-0.2.0/src/drawbore/llm/request.py +38 -0
  110. drawbore-0.2.0/src/drawbore/llm/resolution.py +147 -0
  111. drawbore-0.2.0/src/drawbore/llm/runtime.py +136 -0
  112. drawbore-0.2.0/src/drawbore/mcp/__init__.py +21 -0
  113. drawbore-0.2.0/src/drawbore/mcp/auth.py +24 -0
  114. drawbore-0.2.0/src/drawbore/mcp/client.py +101 -0
  115. drawbore-0.2.0/src/drawbore/mcp/errors.py +32 -0
  116. drawbore-0.2.0/src/drawbore/mcp/live.py +169 -0
  117. drawbore-0.2.0/src/drawbore/mcp/registration.py +77 -0
  118. drawbore-0.2.0/src/drawbore/observability/__init__.py +27 -0
  119. drawbore-0.2.0/src/drawbore/observability/errors.py +12 -0
  120. drawbore-0.2.0/src/drawbore/observability/export.py +68 -0
  121. drawbore-0.2.0/src/drawbore/observability/hashing.py +17 -0
  122. drawbore-0.2.0/src/drawbore/observability/semconv.py +44 -0
  123. drawbore-0.2.0/src/drawbore/observability/tracing.py +76 -0
  124. drawbore-0.2.0/src/drawbore/orchestration/__init__.py +46 -0
  125. drawbore-0.2.0/src/drawbore/orchestration/adk_engine.py +93 -0
  126. drawbore-0.2.0/src/drawbore/orchestration/adk_loop.py +333 -0
  127. drawbore-0.2.0/src/drawbore/orchestration/adk_tools.py +222 -0
  128. drawbore-0.2.0/src/drawbore/orchestration/engine.py +79 -0
  129. drawbore-0.2.0/src/drawbore/orchestration/errors.py +15 -0
  130. drawbore-0.2.0/src/drawbore/orchestration/local.py +29 -0
  131. drawbore-0.2.0/src/drawbore/orchestration/scripted_model.py +77 -0
  132. drawbore-0.2.0/src/drawbore/pipeline/__init__.py +8 -0
  133. drawbore-0.2.0/src/drawbore/pipeline/binding.py +25 -0
  134. drawbore-0.2.0/src/drawbore/pipeline/conditions.py +67 -0
  135. drawbore-0.2.0/src/drawbore/pipeline/executor.py +210 -0
  136. drawbore-0.2.0/src/drawbore/pipeline/graph.py +47 -0
  137. drawbore-0.2.0/src/drawbore/pipeline/outcome.py +59 -0
  138. drawbore-0.2.0/src/drawbore/pipeline/pipeline.py +873 -0
  139. drawbore-0.2.0/src/drawbore/py.typed +0 -0
  140. drawbore-0.2.0/src/drawbore/schema/__init__.py +14 -0
  141. drawbore-0.2.0/src/drawbore/schema/errors.py +26 -0
  142. drawbore-0.2.0/src/drawbore/schema/runtime.py +24 -0
  143. drawbore-0.2.0/src/drawbore/schema/static.py +87 -0
  144. drawbore-0.2.0/src/drawbore/state/__init__.py +18 -0
  145. drawbore-0.2.0/src/drawbore/state/checkpoint.py +154 -0
  146. drawbore-0.2.0/src/drawbore/state/resume_ledger.py +158 -0
  147. drawbore-0.2.0/src/drawbore/state/run_state.py +16 -0
  148. drawbore-0.2.0/src/drawbore/state/step_seal.py +88 -0
  149. drawbore-0.2.0/src/drawbore/testing/__init__.py +33 -0
  150. drawbore-0.2.0/src/drawbore/testing/authority.py +32 -0
  151. drawbore-0.2.0/src/drawbore/testing/credentials.py +17 -0
  152. drawbore-0.2.0/src/drawbore/testing/engine.py +107 -0
  153. drawbore-0.2.0/src/drawbore/testing/errors.py +19 -0
  154. drawbore-0.2.0/src/drawbore/testing/gateway.py +70 -0
  155. drawbore-0.2.0/src/drawbore/testing/gauntlet/__init__.py +21 -0
  156. drawbore-0.2.0/src/drawbore/testing/gauntlet/cases.py +68 -0
  157. drawbore-0.2.0/src/drawbore/testing/gauntlet/runner.py +123 -0
  158. drawbore-0.2.0/src/drawbore/testing/harness.py +179 -0
  159. drawbore-0.2.0/src/drawbore/testing/loop.py +74 -0
  160. drawbore-0.2.0/src/drawbore/testing/models.py +61 -0
  161. drawbore-0.2.0/src/drawbore/testing/tools.py +107 -0
  162. drawbore-0.2.0/src/drawbore/tools/__init__.py +41 -0
  163. drawbore-0.2.0/src/drawbore/tools/access.py +92 -0
  164. drawbore-0.2.0/src/drawbore/tools/errors.py +29 -0
  165. drawbore-0.2.0/src/drawbore/tools/proxy/__init__.py +5 -0
  166. drawbore-0.2.0/src/drawbore/tools/proxy/proxy.py +151 -0
  167. drawbore-0.2.0/src/drawbore/tools/registry/__init__.py +5 -0
  168. drawbore-0.2.0/src/drawbore/tools/registry/registry.py +136 -0
  169. drawbore-0.2.0/src/drawbore/tools/taint.py +81 -0
  170. drawbore-0.2.0/src/drawbore/tools/tokens/__init__.py +5 -0
  171. drawbore-0.2.0/src/drawbore/tools/tokens/issuer.py +83 -0
  172. drawbore-0.2.0/src/drawbore/versioning/__init__.py +12 -0
  173. drawbore-0.2.0/src/drawbore/versioning/compat.py +70 -0
  174. drawbore-0.2.0/src/drawbore/versioning/deployment.py +114 -0
  175. drawbore-0.2.0/tests/acceptance/test_authority_regression.py +112 -0
  176. drawbore-0.2.0/tests/acceptance/test_m12_provider_resolution.py +114 -0
  177. drawbore-0.2.0/tests/acceptance/test_remittance_validation.py +539 -0
  178. drawbore-0.2.0/tests/acceptance/test_safety_gauntlet.py +89 -0
  179. drawbore-0.2.0/tests/acceptance/test_taint_breaker.py +188 -0
  180. drawbore-0.2.0/tests/agent/test_agent.py +50 -0
  181. drawbore-0.2.0/tests/agent/test_agent_tools.py +26 -0
  182. drawbore-0.2.0/tests/agent/test_model_fields.py +34 -0
  183. drawbore-0.2.0/tests/agent/test_requires_approval.py +26 -0
  184. drawbore-0.2.0/tests/agent/test_risk_tier_version.py +28 -0
  185. drawbore-0.2.0/tests/audit/__init__.py +0 -0
  186. drawbore-0.2.0/tests/audit/test_failed_step_and_turns.py +48 -0
  187. drawbore-0.2.0/tests/audit/test_recorder.py +45 -0
  188. drawbore-0.2.0/tests/audit/test_records.py +75 -0
  189. drawbore-0.2.0/tests/audit/test_sink.py +33 -0
  190. drawbore-0.2.0/tests/audit/test_skip_record.py +13 -0
  191. drawbore-0.2.0/tests/config/test_authority.py +166 -0
  192. drawbore-0.2.0/tests/config/test_catalog.py +104 -0
  193. drawbore-0.2.0/tests/config/test_config_containment.py +43 -0
  194. drawbore-0.2.0/tests/config/test_errors.py +17 -0
  195. drawbore-0.2.0/tests/config/test_fingerprint.py +64 -0
  196. drawbore-0.2.0/tests/config/test_fingerprint_golden.py +54 -0
  197. drawbore-0.2.0/tests/config/test_graph_round_trip.py +137 -0
  198. drawbore-0.2.0/tests/config/test_m12_fallback_combo.py +58 -0
  199. drawbore-0.2.0/tests/config/test_models.py +134 -0
  200. drawbore-0.2.0/tests/config/test_resolver.py +285 -0
  201. drawbore-0.2.0/tests/config/test_round_trip.py +164 -0
  202. drawbore-0.2.0/tests/config/test_serialization.py +140 -0
  203. drawbore-0.2.0/tests/conftest.py +78 -0
  204. drawbore-0.2.0/tests/context/__init__.py +0 -0
  205. drawbore-0.2.0/tests/context/test_build.py +45 -0
  206. drawbore-0.2.0/tests/context/test_sanitize.py +34 -0
  207. drawbore-0.2.0/tests/errors/test_errors.py +34 -0
  208. drawbore-0.2.0/tests/errors/test_resume_drift_error.py +12 -0
  209. drawbore-0.2.0/tests/escalation/__init__.py +0 -0
  210. drawbore-0.2.0/tests/escalation/test_agent_id.py +19 -0
  211. drawbore-0.2.0/tests/escalation/test_confidence.py +22 -0
  212. drawbore-0.2.0/tests/escalation/test_package.py +57 -0
  213. drawbore-0.2.0/tests/escalation/test_policy_dispatch.py +25 -0
  214. drawbore-0.2.0/tests/evidence/__init__.py +0 -0
  215. drawbore-0.2.0/tests/evidence/test_compress.py +107 -0
  216. drawbore-0.2.0/tests/evidence/test_errors.py +18 -0
  217. drawbore-0.2.0/tests/evidence/test_evidence_containment.py +26 -0
  218. drawbore-0.2.0/tests/evidence/test_policy.py +36 -0
  219. drawbore-0.2.0/tests/evidence/test_records.py +45 -0
  220. drawbore-0.2.0/tests/evidence/test_retrieval.py +152 -0
  221. drawbore-0.2.0/tests/evidence/test_store.py +94 -0
  222. drawbore-0.2.0/tests/evidence/test_tokens.py +22 -0
  223. drawbore-0.2.0/tests/evidence/test_transforms_json_rows.py +85 -0
  224. drawbore-0.2.0/tests/evidence/test_transforms_logs.py +80 -0
  225. drawbore-0.2.0/tests/identity/__init__.py +0 -0
  226. drawbore-0.2.0/tests/identity/test_identity.py +123 -0
  227. drawbore-0.2.0/tests/identity/test_registry.py +146 -0
  228. drawbore-0.2.0/tests/llm/__init__.py +0 -0
  229. drawbore-0.2.0/tests/llm/test_classify.py +57 -0
  230. drawbore-0.2.0/tests/llm/test_containment.py +50 -0
  231. drawbore-0.2.0/tests/llm/test_credentials.py +30 -0
  232. drawbore-0.2.0/tests/llm/test_gateway.py +113 -0
  233. drawbore-0.2.0/tests/llm/test_model_audit.py +46 -0
  234. drawbore-0.2.0/tests/llm/test_production_gateway.py +82 -0
  235. drawbore-0.2.0/tests/llm/test_profile_grammar.py +34 -0
  236. drawbore-0.2.0/tests/llm/test_request_chain_build.py +64 -0
  237. drawbore-0.2.0/tests/llm/test_resolver.py +117 -0
  238. drawbore-0.2.0/tests/llm/test_runtime.py +126 -0
  239. drawbore-0.2.0/tests/llm/test_runtime_config.py +59 -0
  240. drawbore-0.2.0/tests/llm/test_secret_hygiene.py +18 -0
  241. drawbore-0.2.0/tests/mcp/__init__.py +0 -0
  242. drawbore-0.2.0/tests/mcp/test_client.py +59 -0
  243. drawbore-0.2.0/tests/mcp/test_errors.py +27 -0
  244. drawbore-0.2.0/tests/mcp/test_live_client.py +53 -0
  245. drawbore-0.2.0/tests/mcp/test_mcp_containment.py +29 -0
  246. drawbore-0.2.0/tests/mcp/test_proxy_security.py +112 -0
  247. drawbore-0.2.0/tests/mcp/test_registration.py +80 -0
  248. drawbore-0.2.0/tests/observability/__init__.py +0 -0
  249. drawbore-0.2.0/tests/observability/test_export.py +23 -0
  250. drawbore-0.2.0/tests/observability/test_hashing.py +22 -0
  251. drawbore-0.2.0/tests/observability/test_observability_containment.py +32 -0
  252. drawbore-0.2.0/tests/observability/test_semconv.py +31 -0
  253. drawbore-0.2.0/tests/observability/test_tracing.py +46 -0
  254. drawbore-0.2.0/tests/orchestration/__init__.py +0 -0
  255. drawbore-0.2.0/tests/orchestration/test_adk_containment.py +26 -0
  256. drawbore-0.2.0/tests/orchestration/test_adk_engine.py +107 -0
  257. drawbore-0.2.0/tests/orchestration/test_adk_tools.py +57 -0
  258. drawbore-0.2.0/tests/orchestration/test_agentic_loop.py +68 -0
  259. drawbore-0.2.0/tests/orchestration/test_chat_spans.py +61 -0
  260. drawbore-0.2.0/tests/orchestration/test_engine_seam.py +71 -0
  261. drawbore-0.2.0/tests/orchestration/test_local.py +27 -0
  262. drawbore-0.2.0/tests/orchestration/test_loop_callbacks.py +66 -0
  263. drawbore-0.2.0/tests/orchestration/test_loop_engine.py +78 -0
  264. drawbore-0.2.0/tests/orchestration/test_loop_failure.py +91 -0
  265. drawbore-0.2.0/tests/orchestration/test_loop_fallback.py +298 -0
  266. drawbore-0.2.0/tests/orchestration/test_proxy_basetool.py +78 -0
  267. drawbore-0.2.0/tests/orchestration/test_scripted_model.py +78 -0
  268. drawbore-0.2.0/tests/orchestration/test_step_execution.py +70 -0
  269. drawbore-0.2.0/tests/pipeline/test_add.py +102 -0
  270. drawbore-0.2.0/tests/pipeline/test_agent_spans.py +79 -0
  271. drawbore-0.2.0/tests/pipeline/test_audit_trace.py +89 -0
  272. drawbore-0.2.0/tests/pipeline/test_branching.py +53 -0
  273. drawbore-0.2.0/tests/pipeline/test_conditions.py +44 -0
  274. drawbore-0.2.0/tests/pipeline/test_duplicate_names.py +51 -0
  275. drawbore-0.2.0/tests/pipeline/test_escalation.py +188 -0
  276. drawbore-0.2.0/tests/pipeline/test_evidence_pipeline.py +169 -0
  277. drawbore-0.2.0/tests/pipeline/test_executor.py +92 -0
  278. drawbore-0.2.0/tests/pipeline/test_halt_and_resume.py +147 -0
  279. drawbore-0.2.0/tests/pipeline/test_identity_gate.py +116 -0
  280. drawbore-0.2.0/tests/pipeline/test_join.py +209 -0
  281. drawbore-0.2.0/tests/pipeline/test_loop_evidence_compose.py +77 -0
  282. drawbore-0.2.0/tests/pipeline/test_loop_pipeline.py +103 -0
  283. drawbore-0.2.0/tests/pipeline/test_mcp_pipeline_run.py +106 -0
  284. drawbore-0.2.0/tests/pipeline/test_model_audit_recorded.py +42 -0
  285. drawbore-0.2.0/tests/pipeline/test_model_backed_run.py +158 -0
  286. drawbore-0.2.0/tests/pipeline/test_registry_override.py +99 -0
  287. drawbore-0.2.0/tests/pipeline/test_resume_drift.py +444 -0
  288. drawbore-0.2.0/tests/pipeline/test_resume_skip.py +74 -0
  289. drawbore-0.2.0/tests/pipeline/test_run.py +110 -0
  290. drawbore-0.2.0/tests/pipeline/test_run_guards.py +136 -0
  291. drawbore-0.2.0/tests/pipeline/test_skip.py +135 -0
  292. drawbore-0.2.0/tests/pipeline/test_taint_integration.py +83 -0
  293. drawbore-0.2.0/tests/schema/test_runtime.py +32 -0
  294. drawbore-0.2.0/tests/schema/test_static.py +59 -0
  295. drawbore-0.2.0/tests/state/test_checkpoint.py +32 -0
  296. drawbore-0.2.0/tests/state/test_checkpoint_seal.py +86 -0
  297. drawbore-0.2.0/tests/state/test_checkpoint_trust.py +101 -0
  298. drawbore-0.2.0/tests/state/test_resume_ledger.py +118 -0
  299. drawbore-0.2.0/tests/state/test_run_state.py +16 -0
  300. drawbore-0.2.0/tests/state/test_state_containment.py +152 -0
  301. drawbore-0.2.0/tests/state/test_step_seal.py +134 -0
  302. drawbore-0.2.0/tests/test_end_to_end.py +51 -0
  303. drawbore-0.2.0/tests/test_import_structure.py +23 -0
  304. drawbore-0.2.0/tests/test_packaging.py +33 -0
  305. drawbore-0.2.0/tests/testing/test_acceptance_config_safety.py +131 -0
  306. drawbore-0.2.0/tests/testing/test_acceptance_evidence.py +260 -0
  307. drawbore-0.2.0/tests/testing/test_acceptance_failures.py +120 -0
  308. drawbore-0.2.0/tests/testing/test_acceptance_loop.py +124 -0
  309. drawbore-0.2.0/tests/testing/test_acceptance_model.py +108 -0
  310. drawbore-0.2.0/tests/testing/test_engine.py +95 -0
  311. drawbore-0.2.0/tests/testing/test_errors.py +16 -0
  312. drawbore-0.2.0/tests/testing/test_footprint_soundness.py +45 -0
  313. drawbore-0.2.0/tests/testing/test_gateway.py +61 -0
  314. drawbore-0.2.0/tests/testing/test_gauntlet_cases.py +43 -0
  315. drawbore-0.2.0/tests/testing/test_gauntlet_mapping.py +58 -0
  316. drawbore-0.2.0/tests/testing/test_gauntlet_runner_smoke.py +35 -0
  317. drawbore-0.2.0/tests/testing/test_harness_happy_path.py +140 -0
  318. drawbore-0.2.0/tests/testing/test_loop_vocab.py +38 -0
  319. drawbore-0.2.0/tests/testing/test_m12_test_mode.py +65 -0
  320. drawbore-0.2.0/tests/testing/test_mock_key_validation.py +101 -0
  321. drawbore-0.2.0/tests/testing/test_overlay_taint.py +62 -0
  322. drawbore-0.2.0/tests/testing/test_scoped_registry.py +129 -0
  323. drawbore-0.2.0/tests/testing/test_testing_containment.py +32 -0
  324. drawbore-0.2.0/tests/tools/test_access.py +103 -0
  325. drawbore-0.2.0/tests/tools/test_errors.py +15 -0
  326. drawbore-0.2.0/tests/tools/test_pipeline_tools.py +86 -0
  327. drawbore-0.2.0/tests/tools/test_proxy.py +155 -0
  328. drawbore-0.2.0/tests/tools/test_proxy_spans.py +60 -0
  329. drawbore-0.2.0/tests/tools/test_proxy_taint.py +107 -0
  330. drawbore-0.2.0/tests/tools/test_register_mcp_tool.py +22 -0
  331. drawbore-0.2.0/tests/tools/test_registry.py +58 -0
  332. drawbore-0.2.0/tests/tools/test_registry_taint.py +64 -0
  333. drawbore-0.2.0/tests/tools/test_taint.py +83 -0
  334. drawbore-0.2.0/tests/tools/test_tokens.py +71 -0
  335. drawbore-0.2.0/tests/versioning/__init__.py +0 -0
  336. drawbore-0.2.0/tests/versioning/test_compat.py +80 -0
  337. drawbore-0.2.0/tests/versioning/test_deployment.py +114 -0
  338. drawbore-0.2.0/uv.lock +2669 -0
  339. drawbore-0.0.1.dev1/PKG-INFO +0 -27
  340. drawbore-0.0.1.dev1/README.md +0 -14
  341. drawbore-0.0.1.dev1/pyproject.toml +0 -21
  342. drawbore-0.0.1.dev1/src/drawbore/__init__.py +0 -7
@@ -0,0 +1,15 @@
1
+ name: CI
2
+ on:
3
+ push: { branches: [main] }
4
+ pull_request:
5
+ jobs:
6
+ test:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v4
10
+ - uses: astral-sh/setup-uv@v5
11
+ - run: uv sync
12
+ - run: uv run pytest -q
13
+ - name: marker backstop
14
+ run: |
15
+ ! grep -rInE '\bD[0-9]{1,3}\b|\bM[0-9]{1,2}\b|private/' --include='*.py' --include='*.md' --include='*.toml' src/ docs/ README.md CHANGELOG.md pyproject.toml
@@ -0,0 +1,33 @@
1
+ name: Release
2
+ on:
3
+ push:
4
+ tags: ["v*"]
5
+ jobs:
6
+ build:
7
+ runs-on: ubuntu-latest
8
+ steps:
9
+ - uses: actions/checkout@v4
10
+ - uses: astral-sh/setup-uv@v5
11
+ - run: uv sync && uv run pytest -q
12
+ - run: uv build
13
+ - uses: actions/upload-artifact@v4
14
+ with: { name: dist, path: dist/ }
15
+ publish-testpypi:
16
+ needs: build
17
+ runs-on: ubuntu-latest
18
+ environment: testpypi
19
+ permissions: { id-token: write }
20
+ steps:
21
+ - uses: actions/download-artifact@v4
22
+ with: { name: dist, path: dist/ }
23
+ - uses: pypa/gh-action-pypi-publish@release/v1
24
+ with: { repository-url: "https://test.pypi.org/legacy/" }
25
+ publish-pypi:
26
+ needs: publish-testpypi
27
+ runs-on: ubuntu-latest
28
+ environment: pypi
29
+ permissions: { id-token: write }
30
+ steps:
31
+ - uses: actions/download-artifact@v4
32
+ with: { name: dist, path: dist/ }
33
+ - uses: pypa/gh-action-pypi-publish@release/v1
@@ -0,0 +1,20 @@
1
+ # macOS
2
+ .DS_Store
3
+
4
+ # Maintainer overlay — never tracked here
5
+ private/
6
+ docs/internal
7
+ docs/superpowers
8
+ docs/release-checklist.md
9
+ AGENTS.override.md
10
+ CLAUDE.local.md
11
+ .env
12
+ .env.*
13
+ .private-journal/
14
+ .repowise/
15
+ .claude/settings.local.json
16
+
17
+ # Local artifacts
18
+ __pycache__/
19
+ .vscode/
20
+ DECISIONS.md
@@ -0,0 +1,207 @@
1
+ # Changelog
2
+
3
+ All notable changes to this project will be documented in this file.
4
+
5
+ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
6
+ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
+
8
+ ## [Unreleased]
9
+
10
+ ## [0.2.0] - 2026-06-11
11
+
12
+ ### Added
13
+ - Refuse-on-drift resume: resuming a checkpointed run now verifies a per-step
14
+ seal of each completed step's declared contract (version, risk tier, tools,
15
+ model, instructions, schemas, evidence policy) before anything executes, and
16
+ halts with the new `resume_drift` halt code naming the changed field instead
17
+ of replaying outputs into changed logic.
18
+ - `RunResult.resume_ledger`: a regulator-readable record of every step's
19
+ resume disposition (`executed` / `restored` / `skipped` / `refused`) with a
20
+ `legible()` rendering, present on every run.
21
+ - `CheckpointStore.record_seal` / `seal_of` with safe defaults; custom durable
22
+ stores should implement both and persist output, trust, and seal atomically.
23
+ - New guide: durable resume (`docs/guide/durable-resume.mdx`).
24
+
25
+ ### Changed
26
+ - Resuming under a changed pipeline topology now halts with `resume_drift`
27
+ when the run had prior progress. Previously the stale checkpoints were
28
+ silently discarded and the pipeline re-ran from the beginning under the same
29
+ `run_id`; that silent restart could re-execute already-completed steps.
30
+ - Resumes from checkpoint stores that do not persist seals (including
31
+ checkpoints written by earlier versions) now refuse with `resume_drift`:
32
+ an unverifiable step is never restored. Re-run such flows under a fresh
33
+ `run_id`, or upgrade the store.
34
+
35
+ ## [0.1.0] - 2026-06-10
36
+
37
+ ### Changed
38
+
39
+ - Development dependencies moved out of the public `dev` extra into PEP 735 dependency-groups.
40
+ - `pipeline.test_mode()` now exposes its full typed signature (was `**kwargs`) for IDE completion.
41
+ - Internal development annotations removed from `src/` and `pyproject.toml`.
42
+ These identifiers were never intended for the public package surface;
43
+ documentation and explanatory content is preserved, only the codenames are
44
+ gone. A new automated test (`tests/docs/test_source_boundary.py`) asserts
45
+ zero matches going forward.
46
+ - `import drawbore` no longer eagerly imports provider/engine SDKs for
47
+ deterministic-only pipelines (~2.6 s → under 0.3 s); the cost moves to first
48
+ engine construction.
49
+ - Schema-violation halt reasons use the stable token `schema_violation: input|output: ...`
50
+ with a readable field-by-field summary (no raw error dumps); the audit record's
51
+ violation count now keys on the token, not prose.
52
+
53
+ ### Fixed
54
+
55
+ - `Pipeline.run()` raises a legible `TypeError` when the initial input is not a
56
+ Pydantic model (was a raw `AttributeError`).
57
+ - Checkpointed runs fail closed: `Pipeline.run(checkpoints=...)` without an explicit
58
+ `run_id` now raises instead of silently replaying a previous run's outputs.
59
+ `RunResult` gains `run_id` so callers can see which run identity executed.
60
+ - Resolver fail-closed completeness: `from_json` now surfaces malformed JSON as `ConfigResolutionError` (was a raw `json.JSONDecodeError`), so the resolver's contract — it raises only `ConfigResolutionError` — holds for parse errors too.
61
+ - In-loop tool denial no longer prints an engine-internal traceback or an OpenTelemetry context-detach error to stderr. The success/failure output of examples like the taint-gated demo is now clean; exit code and runtime behaviour are unchanged.
62
+
63
+ ### Added
64
+
65
+ - PEP 561 `py.typed` marker: type checkers now see Drawbore's annotations.
66
+ - `JoinNode` is exported from `drawbore.pipeline` (it is `Join`'s return type).
67
+ - `RunResult.halt_code`: a stable, documented halt token (`drawbore.errors.HALT_CODES`)
68
+ so callers branch on a code instead of parsing the human-readable `reason`.
69
+ - Taint-gated tool calls: declare `source_trust` / `exfil_capable` on tools and Drawbore
70
+ refuses to invoke an exfil-capable tool while a step is handling untrusted data — a
71
+ deterministic lethal-trifecta breaker (untrusted input, tool access, outward data flow). Untrusted data also cannot steer a `When` branch,
72
+ and trust survives checkpoint resume. MCP tools are untrusted-source by default; set the
73
+ pipeline's `initial_trust` for untrusted-ingest (triage) workflows.
74
+ - Safety gauntlet: a reusable test-mode corpus that drives canonical attacks (schema
75
+ violation, low-confidence escalation, unmocked-tool fail-closed, circuit-breaker trip,
76
+ parallel-call refusal, non-JSON model output) through the real safety layer and asserts
77
+ structural refusal — `drawbore.testing.run_containment` / `assert_contained` / `run_pack`
78
+ with `Containment` verdicts. `None` means the attack was not contained.
79
+ - Authority regression check: derive a capability footprint from a pipeline manifest and
80
+ fail closed when an edit grants an agent new tool or evidence authority
81
+ (`drawbore.config.check_no_new_authority`, `authority_diff`, `effective_authority`),
82
+ with a `test_mode` soundness helper (`drawbore.testing.assert_footprint_sound`).
83
+ - Native branching with serializable `When` conditions, explicit `Join` nodes
84
+ (`exactly_one` / `first_by_priority` / `all_present`), and skip-propagation —
85
+ every not-taken branch is explained in the audit trace. Pipeline JSON config
86
+ round-trips the new constructs.
87
+ - Runnable examples: `examples/tiny_pipeline/` and `examples/remittance_validation/`, each with a README and tests that run via `pipeline.test_mode` (synthetic data, no live calls), plus example pages under the docs Examples section. Examples now run in CI.
88
+ - Public documentation site skeleton: a `docs.json` navigation, a docs landing page and quickstart, coding-agent docs (`docs/agents/*`), and the guides reorganized into a clear public structure.
89
+ - MIT `LICENSE` file at the repository root (the package already declared MIT).
90
+ - Engine carrier: `StepExecution(output, model_audit, model_turns)`; `ADKEngine` consumes the one `LLMRuntime` (one-shot resolves and completes through it), with `gateway=` kept as a back-compat alias.
91
+ - Runtime LLM authority: `LLMRuntime` resolves provider chains, walks fallbacks with classification, and records a `ModelAudit`; `from_gateway(...)` keeps direct model strings working.
92
+ - Production gateway: single-call `ProductionLLMGateway` (per-provider base_url/timeout/extra; contract failures fail closed; transport exceptions propagate for the runtime to classify); `ModelResponse.audit` additive field.
93
+ - Provider classification: `classify_provider_exception` maps LiteLLM exceptions to the closed `transport`/`auth`/`contract`/`unknown` taxonomy — no broad catch-and-fallback.
94
+ - Model audit: `ModelAttemptAudit`/`ModelAudit` (declared refs, attempts, selected model, loop fallback phase) with a legible summary.
95
+ - Audit: `StepAuditRecord.model` carries the per-step `ModelAudit`; the pipeline records the model attempts (rendered in `audit_trace.legible()`) and reads `model_turns` from the `StepExecution` carrier.
96
+ - Model resolver: `resolve_chain(...)` -> `ResolvedModelChain` of `ModelAttempt`s; profile expansion, direct-string passthrough, `(provider, model)` dedup, credential preflight.
97
+ - Runtime LLM config: typed, closed `LLMRuntimeConfig`/`ModelProfile`/`ModelTarget`/`ProviderConfig` (separate from pipeline JSON).
98
+ - Failure taxonomy: `LLMConfigError` (halt reason `model_config_error`) and the `profile:` grammar.
99
+ - Credential seam: `CredentialChecker` protocol + `EnvCredentialChecker` (availability only, never the secret value).
100
+ - Loop fallback: model+tools agents resolve through the runtime; provider fallback before any tool call may advance, a post-tool provider failure fails closed, and tool failures/denials always abort — bounded by `max_llm_calls`.
101
+ - Test mode: `profile:*` resolves under test mode via a per-step `LLMRuntime`; `StaticCredentialChecker` (default available) keeps CI key-free, while `available=False` exercises `model_config_error`; missing mocks still fail closed with `TestingError` — never a provider call.
102
+ - Observability + docs: chat spans tag the declared ref (`drawbore.declared_model`) and resolved model; containment tests keep LiteLLM under `drawbore.llm` and ADK under `drawbore.orchestration`; the production-LLM-gateway guide.
103
+ - Acceptance tests: end-to-end provider resolution, one-shot fallback audit, missing-credential `model_config_error`, and `LocalEngine` engine-agnostic fail-closed through `pipeline.run`.
104
+ - Phase 1 release readiness for the `v0.1.0` tag.
105
+ - Validation-pipeline docs: the canonical validation-pipeline example (`docs/examples/remittance-validation.mdx`).
106
+ - Acceptance (config composition): tests proving the canonical pipeline exports to JSON config with named `AgentCatalog` refs, loads back through `from_json(data, agents=catalog, registry=registry)` against a pre-populated registry, runs config-loaded in test mode with mocks supplied outside the config, rejects version drift before test mode, and never serializes mocks/stores/seeded handles/model scripts.
107
+ - Acceptance (tool access): a test proving a declared tool with no mock fails closed in test mode (`testing_error`), halts at `transaction_retriever`, and records a failed step where the tool call was attempted — no live external is reached.
108
+ - Acceptance (schema violation): a test proving a mocked model response that violates a downstream output schema halts at `risk_scorer`, records one schema violation, and runs no step past the failed boundary.
109
+ - Acceptance (escalation): a negative case proving a low-confidence `risk_scorer` output (a `HasConfidence` model below the pipeline threshold) escalates through the configured policy with a legible reason on the run and audit record; the canonical happy path stays zero-escalation.
110
+ - Acceptance (authority-bound retrieval): tests proving the `evidence_reviewer` loop reaches `evidence://retrieve` via proxy/JIT with an explicit seeded handle — allowed search succeeds and is audited, and a policy-forbidden full retrieval fails closed and halts the step.
111
+ - Acceptance (evidence compression): a test proving the `risk_scorer` step compresses large model-bound evidence, records a compressed decision and handle in audit, and retains the original in the injected evidence store — distinct from the seeded review handle.
112
+ - Acceptance (audit legibility): a test asserting `audit_trace.legible()` carries the pipeline name/version, completed-step count, each tool call, model turns, the evidence-compression decision and handle, and the final status — durable phrases, not a snapshot.
113
+ - Acceptance (happy path): the canonical five-step remittance_confirmation pipeline (deterministic + tool-backed + model one-shot + model+tools loop + fan-in) runs end-to-end through `pipeline.test_mode` with explicit `From(...)` bindings and completes with a typed confirmation, five audited steps, and zero escalations/violations.
114
+ - Validation scenario: locked refs/models/tools for the canonical remittance-confirmation acceptance pipeline (`tests/acceptance/test_remittance_validation.py`).
115
+ - Test-mode hardening: `pipeline.test_mode(...)` now validates `mock_model_responses` and `mock_loop_scripts` keys eagerly at construction — each must name a step of the matching category (a one-shot model agent for responses, a model+tools agent for loop scripts), else a legible `TestingError`. Previously a mistyped or miscategorized agent-name key did nothing until a run-time fail-closed halt; this mirrors the existing eager tool-mock guard.
116
+ - Test-mode containment: a test enforcing that `drawbore.testing` imports no `google.adk`/`litellm`/MCP SDK — the fake loop model (a `BaseLlm`) lives under `drawbore.orchestration` and is reached only as an opaque model factory.
117
+ - Test-mode acceptance (config + safety): tests proving a `from_json` pipeline runs in test mode without weakening config drift checks, mocks never leak to the global registry, declared externals fail closed by default, and `allow_real_tools` is the only real-tool opt-in.
118
+ - Test-mode acceptance (evidence): tests proving evidence policy disabled/simulate/compress behave as designed in test mode, and `evidence://retrieve` is auto-bound to the test store, runs through the proxy for both allow and deny, and shadows any original-store binding.
119
+ - Test-mode acceptance (loop): tests proving the scripted loop drives the real agentic tool loop — happy path (tool call + final JSON, model turns audited), in-loop tool denial (immediate abort + failed-step audit), multi-call fail-closed, the loop bound, and agent-name routing for two same-`model` loop agents.
120
+ - Test-mode acceptance (model): tests proving mocked model outputs flow through the real escalation/confidence and human-approval gates, agent-name routing distinguishes two same-`model` agents, a missing response fails closed, and a fallback chain is exercised without a live provider.
121
+ - Test-mode acceptance (failures): tests proving local test mode preserves the real input/output schema gates, proxy-routed mocked tool calls and step audit, operation-scope denial, and the per-step circuit breaker.
122
+ - Test-mode harness: `Pipeline.test_mode(...)` returns an async context manager (`drawbore.testing.TestMode` -> `TestPipeline`) that runs the pipeline through the REAL `Pipeline.run` with a scoped registry overlay, a name-routing `TestEngine`, an in-memory audit sink and evidence store, and deterministic run ids. `TestPipeline.run` returns a production-shaped `RunResult`/`audit_trace`; `.audit_sink`/`.evidence_store` are exposed. No global state is mutated.
123
+ - Test engine: `drawbore.testing.engine.TestEngine` — an `OrchestratorEngine` that routes by `AgentSpec.name` and delegates each step to a per-step `ADKEngine` (agent-scoped `FakeGateway` for one-shot, agent-scoped scripted `model_factory` for the real agentic loop; deterministic agents run their real `fn`); missing model/loop mocks fail closed.
124
+ - Loop-script vocabulary: `drawbore.testing.{call,final,text,multi_call}` describe scripted model turns for a model+tools agent; `to_turns` normalizes them to the orchestration scripted-model tuple form (`final` becomes a JSON text turn).
125
+ - Scripted loop model: `drawbore.orchestration.make_scripted_model_factory` builds an ADK-contained fake `BaseLlm` (scripted call/multicall/text turns) handed to `ADKEngine(model_factory=...)` so local test mode drives the real agentic loop with no network; `drawbore.testing` reaches it only as an opaque callable.
126
+ - Fake gateway: `drawbore.testing.gateway.FakeGateway` — a pure `LLMGateway` (no ADK) that returns scripted one-shot `ModelResponse`s for a single named agent (dict/model/sequence/callable(ModelRequest)); missing/exhausted fails closed with `TestingError`; never infers the agent from prompt or model name.
127
+ - Scoped registry: `drawbore.testing.tools.build_scoped_registry` builds a per-test `ToolRegistry` overlay of only the declared tools, preserving each tool's allowed-operations/schema/kind; mock handlers still run through `ToolProxy`; unmocked declared tools fail closed at invoke; `allow_real_tools` opts a tool into its real handler; `evidence://retrieve` rebinds to the test evidence store. Mock value forms: static / sync / async / sequence (`drawbore.testing.models`).
128
+ - Runtime seam: an internal `registry_override` keyword on `Pipeline.run`/`_run_inner` (default `None`, backward-compatible) routes a single scoped `ToolRegistry` to BOTH the `ToolProxy` and the `ToolLoopBundle` for a run — the seam local test mode uses; the public mock surface remains `Pipeline.test_mode`.
129
+ - Test-mode foundation: `drawbore.testing.TestingError` (a `DrawboreError` with a self-declared `halt_reason="testing_error"`) and decisions for local test mode.
130
+ - Test-mode docs: a local-testing guide (`docs/guide/local-testing.mdx`).
131
+ - JSON-config docs: a JSON-config guide (`docs/guide/json-config.mdx`).
132
+ - Config containment: a test enforcing that `drawbore.config` imports no `google.adk`, `litellm`, MCP SDK, or `drawbore.orchestration`/`llm`/`mcp`/`audit`/`observability` — the config layer is an ADK-free construction layer over existing declarations that computes its own `schema_fingerprint` rather than reaching for the observability/exporter surface.
133
+ - Config round-trip coverage: linear and fan-in (`From("agent.field")`) pipelines, escalation policy + confidence threshold, per-step `EvidencePolicy` (applied to `Step.evidence`), one-shot model metadata, and model+tools-without-fallback all round-trip through `to_config`/`from_config` without importing ADK or constructing an engine; `to_json` is deterministic.
134
+ - Config import: `from_config(config, *, agents, registry=None)` / `from_json(data, *, agents, registry=None)` resolve a manifest into a live `Pipeline` — fail-closed on unknown `schema_version`, missing/ambiguous refs, declaration drift (version/risk-tier/tools/model/fallback/instructions/schema), duplicate names, `model+tools+fallback_model` and literal bindings, invalid tagged input modes / lying `depends_on`, and unregistered tools; static compatibility still runs through `Pipeline.add` and surfaces as `ConfigResolutionError`.
135
+ - Config export: `to_config(pipeline, *, agents)` / `to_json(...)` walk a live `Pipeline` to a typed manifest — agent declarations (ref via the bidirectional catalog, schema evidence + full SHA-256 hashes), tagged input modes derived from runtime semantics, serialized per-step `EvidencePolicy`. Export fails closed on an uncataloged/ambiguous agent and on any live step whose `depends_on` lies about the derived input mode. `to_json` is deterministic (sorted canonical JSON).
136
+ - Agent catalog: `drawbore.config.AgentCatalog` (bidirectional `ref <-> Agent`) plus `resolve_ref`/`ref_for` helpers that also accept a plain `Mapping[str, Agent]`, inverting it by object identity and failing closed on uncataloged (zero) or ambiguous (multiple) refs.
137
+ - Config models: the `extra="forbid"` manifest models in `drawbore.config.models` (`PipelineConfig`, `PipelineMetaConfig`, `OnFailureConfig`, `AgentConfig`, `StepConfig`, `StepInputConfig`, `BindingConfig`, `EvidencePolicyConfig`) — tagged input modes (`initial`/`previous`/`bindings`), literal bindings rejected with a legible message, unknown fields rejected at every level.
138
+ - Config foundation: `drawbore.config.ConfigResolutionError` (a `DrawboreError` with a self-declared `halt_reason="config_resolution_error"`) and `config.fingerprint.schema_fingerprint` (canonical-JSON + full SHA-256 schema-drift fingerprint).
139
+ - Agentic-tool-loop docs: an agentic-tool-loop guide (`docs/guide/agentic-tool-loop.mdx`); a pipeline test proving the agentic tool loop composes with evidence compression (a compressed model+tools step retrieves the original mid-loop via the proxy-scoped `evidence://retrieve` tool).
140
+ - Agentic pipeline wiring: for a model-backed step that declares tools the pipeline builds a `ToolLoopBundle` (proxy/issuer/registry/declared/run-ctx) and passes it to `engine.run_step(..., tool_loop=)`, so the engine drives the proxy-scoped loop; every step records `StepAuditRecord.model_turns` (0/1/N), and a step that halts after running tools (the loop's denied-tool case) is recorded as a failed step with its tool calls and reason. Evidence compression still runs before the loop and `evidence://retrieve` is callable in it.
141
+ - Agentic engine routing: `ADKEngine` routes a model-backed agent that declares tools through the hidden ADK Runner loop; one-shot model agents keep the gateway path and deterministic agents the in-process path (unchanged). `ADKEngine(gateway, *, model_factory=…, max_llm_calls=8)` — `model_factory` is duck-typed (default builds ADK's `LiteLlm`; the test seam), `max_llm_calls` is validated `>= 1`. A loop agent that also sets `fallback_model` fails closed with a legible reason.
142
+ - Agentic loop failure semantics: any in-loop tool failure aborts immediately (the engine hard-breaks the event stream and the loop re-raises the precise tool error — no further model turn or tool call runs); a runaway loop is bounded by `max_llm_calls` and halts as `LLMError`; a non-JSON / non-object final answer fails closed; and more than one function call in a single turn fails closed (one-call-per-turn).
143
+ - Agentic loop: `run_agentic_loop(spec, payload, *, tool_loop, run_id, model_factory, max_llm_calls)` drives a hidden ADK `Runner`/`LlmAgent` for a model-backed agent that declares tools — proxy-backed schema tools, the prompt from `build_model_request`, each tool call through the proxy + JIT, model turns traced and counted. Returns `(final_json_dict, model_turns)`; the pipeline validates the dict. Tested with a fake ADK `BaseLlm` (no network).
144
+ - Agentic loop callbacks: `make_loop_model_callbacks` traces each in-loop model turn as a `chat` span nested under `invoke_agent` and counts turns, and short-circuits (returns a terminating response) once a tool failure is recorded; `make_loop_before_tool_callback` blocks undeclared tools and blocks any tool after a failure — the secondary guard behind the engine's hard-break.
145
+ - Proxy-backed ADK tool: `proxy_backed_tool(ref, proxy=, issuer=, run_ctx=, schema=, failures=)` wraps a declared Drawbore tool as a custom ADK `BaseTool` whose declaration is built from the registry tool's JSON schema (so the model sees its parameters) and whose execution routes through `proxy.invoke` + a single-use JIT token; any failure is recorded in the bundle's `failures` and re-raised (immediate-abort signal). ADK stays under `drawbore.orchestration`.
146
+ - Agentic audit: `StepAuditRecord.model_turns` (0 deterministic / 1 one-shot model / N loop) makes every model turn auditable, not only traced; `AuditRecorder.record_failed_step` records a step that halted after doing work (e.g. a denied in-loop tool call) with its tool calls and reason, present in the trail but not counted in `steps` (success-only) — so the loop's headline failure mode is legible in the audit, not just the spans.
147
+ - Engine seam: `OrchestratorEngine.run_step` gains an optional `tool_loop` (a `ToolLoopBundle` carrying the proxy/issuer/registry/declared-refs/run-ctx plus mutable `failures`/`turns` accumulators) for the in-step tool loop; `LocalEngine` now fails closed with an engine-agnostic `EngineError` when asked to run a model-backed agent (it has no model path) instead of silently calling `spec.fn`.
148
+ - Evidence pipeline wiring: `Pipeline.add(agent, ..., evidence=EvidencePolicy(...))` (per-step) and `Pipeline.run(..., evidence_store=...)`. For a model-backed step with an enabled policy, the pipeline compresses the model view before the engine runs, re-validates the bounded view against the input model (never sends an invalid view — strict halts, else passthrough), stores the original, records the decision on the step's audit record (`StepAuditRecord.evidence`, rendered in `legible()`), and tags the `invoke_agent` span with `drawbore.evidence.*`. A fail-closed evidence failure halts-and-escalates with `evidence_error`. Deterministic and policy-less steps are unchanged.
149
+ - Evidence retrieval tool: `register_evidence_tool(registry, store=...)` registers a declared, proxy-backed builtin tool (`evidence://retrieve`) that serves full/search retrieval from the store, gated by per-handle policy (default search-allowed/full-denied) and expiry, failing closed. Search is bounded (capped result count) so it stays scoped and cannot reconstruct the full original. Reached only through the proxy + single-use JIT token — a handle visible in context does not by itself authorize retrieval (the proxy is authoritative). The registry is duck-typed so `evidence` imports nothing from `tools`.
150
+ - Evidence compression entrypoint: `compress_for_model(payload, policy, *, store, run_id, step, source_agent)` — routes to a transform, honours the mode (disabled/simulate/compress), gates on `min_tokens`, stores the original with a deterministic handle, and returns `(model_view, decision, handle)`. Disabled = byte-identical passthrough; simulate records the decision without changing the payload; compress fails closed (`EvidenceStoreError`) if the original can't be stored and is required. Pure (no pipeline/audit/observability import).
151
+ - Evidence logs transform: a deterministic `EvidenceTransform` that compresses long log text to errors, warnings, stack traces, and head/tail context with a dropped-line summary, keying on line-level severity markers (so a benign mid-sentence "error" is not a false ERROR line). Same input + policy gives byte-identical output.
152
+ - Evidence json_rows transform: a deterministic `EvidenceTransform` (protocol + name→transform registry) that compresses large record lists to a bounded, order-preserving view — head + tail + structurally-notable rows (flagged by a signal field, not benign prose) + an evenly-spaced sample, optionally capped to an output-token budget, with warnings. Same input + policy gives byte-identical output. `EvidencePolicy` gains `max_output_tokens`.
153
+ - Initial Phase 1 SDK scaffold: package layout for all modules, the `OrchestratorEngine` abstraction, and project metadata.
154
+ - Foundation: strict runtime schema validation, structural static compatibility, the `@agent` decorator + `AgentSpec`, `RunState`, the `OrchestratorEngine` ABC with an in-process `LocalEngine`, and `Pipeline` with `From` data bindings and a run loop that validates every boundary and halts on violation. Deterministic fan-in pipelines run end-to-end with no ADK.
155
+ - Tool access layer: `tools` package with a tool registry (custom + built-in), single-use opaque non-serializable capability tokens, an in-path tool proxy that enforces tool-level scope, logs calls, and trips a per-run circuit breaker, and a `ToolContext` that exposes only an agent's declared tools (run context flows via a contextvar, never through the agent). Agents declare tools via `@agent(tools=[...])`; `Pipeline(registry=...)` validates declared tools at registration. Still no ADK.
156
+ - Context isolation and errors/checkpointing: a `context` module (`build_input` for orchestrator-constructed isolated payloads; `sanitize` for structural bounds on external input), an `errors` module (`DrawboreError` + `halt_reason_for` taxonomy), and a `CheckpointStore` ABC with an in-memory default. The pipeline now halts-and-escalates on every failure — tool-misuse and agent errors return a halted `RunResult` instead of propagating — and a halted run resumes from the last successful step when given a stable `run_id` and a checkpoint store.
157
+ - Human escalation primitives: an `escalation` module with `EscalationPackage` (carrying a legible, non-engineer-readable summary via `legible()`), `EscalationPolicy` (sync/async delivery modes), an `EscalationDispatcher` interface plus an in-process `RecordingDispatcher` default, and the opt-in `HasConfidence` marker (only models that explicitly inherit it are confidence-checked). Agents may declare `@agent(requires_human_approval=True)`, recorded on `AgentSpec` (default `False`). The pipeline now turns any halt into a dispatched escalation when an `on_failure` policy is configured (`RunResult.status="escalated"`, `RunResult.escalations`), escalates on a `HasConfidence` output below the declared `confidence_threshold` (synchronously, or async = dispatch-and-continue), and treats `@agent(requires_human_approval=True)` as a synchronous approval gate. Without a policy, failures still halt as before (`status="halted"`).
158
+ - Identity and versioning (agent fields): `AgentSpec` and `@agent` now carry a `risk_tier` (`low`/`medium`/`high`/`critical`, default `low`) and a `version` (default `0.0.0`) — the inputs the identity and versioning layers consume.
159
+ - Identity value object: an `identity` module with `AgentIdentity` (the immutable id/ttl/purpose/risk_tier/sponsor/delegation/created-at record) and `attestation_surface` (the tool + input/output schema + risk-tier fingerprint whose change forces re-attestation).
160
+ - Identity registry: an in-process `IdentityRegistry` enforcing the identity lifecycle invariants — registration requires a human sponsor (no agent without an owner), a strict `draft → active → suspended → decommissioned` lifecycle, single-action atomic decommission, and a re-attestation block (`run_block_reason`) when an agent's attestation surface (tools/input/output/risk-tier) changes until the human sponsor re-attests.
161
+ - Versioning classifier: a `versioning` module with `classify_change(old_input, old_output, new_input, new_output)` that labels a change `breaking` (removed field, changed type, modified required-ness, or an added required field) or `non_breaking` (added optional field, or schema-invisible prompt/model swaps), via Pydantic field introspection.
162
+ - Versioning deployment: `RolloutPlan` + `Deployment` — the in-core staging state machine for the "never big-bang deploy" rule (shadow→canary→full for breaking changes, canary→full for non-breaking; a stage must pass before advancing; instant rollback to the last stable version is always available and a failed gate routes to rollback, not forward). Live traffic-splitting and the rollback dashboard are managed-service.
163
+ - Identity gate (pipeline): `Pipeline.run(..., identities=IdentityRegistry)` blocks a registered agent that is suspended, decommissioned, or pending re-attestation, halting-and-escalating through the escalation path (`reason="identity_<state>"`); an unregistered (draft) agent runs exactly as before. The `EscalationPackage` now carries the `agent_id` (the slot escalation reserved), populated on identity-gated halts and shown in `legible()`.
164
+ - Agent model fields: `AgentSpec`/`@agent` now carry `model`, `fallback_model`, and free-text `instructions` (all default `None`). An agent with `model=None` is a deterministic agent and is unchanged; an agent with a `model` is backed by the LLM gateway via the ADK engine.
165
+ - LLM boundary (core): a `llm` module with explicit `ModelRequest`/`ModelResponse` (Drawbore-owned, not ADK/LiteLLM internals), `resolve_model_chain` (primary→fallback), and `build_model_request` — the single, centralised model-input assembly point and the documented seam for the future pre-model evidence transform. No ADK.
166
+ - LLM gateway: `LLMGateway` (ABC) + `LiteLLMGateway` — non-streaming completion with a request-time fallback chain via `litellm.acompletion`. The gateway parses the model's JSON output into a `dict` (the pipeline validates it), falls back model-by-model on provider failure, and fails closed (`ModelUnavailableError`) when the whole chain fails or (`LLMError`) when a model returns non-JSON. Bifrost can later implement the same ABC.
167
+ - ADK tool guardrail: `drawbore.orchestration.adk_tools` exposes a declared Drawbore tool to ADK as a `FunctionTool` whose callable is the proxy-backed invocation (authoritative) plus a `before_tool_callback` that hard-blocks any undeclared tool (the second, independent guardrail). `google.adk` is imported only under `orchestration` — enforced by a test.
168
+ - ADK engine: `drawbore.orchestration.ADKEngine` (an `OrchestratorEngine`). Deterministic agents (no `model`) run in-process identically to `LocalEngine`; model-backed agents have their completion performed by the injected `LLMGateway` with the prompt built by `build_model_request`, returning the raw structured output for the pipeline to validate. A `ModelUnavailableError` from the gateway propagates for the pipeline to halt-and-escalate.
169
+ - Model-backed pipelines: a model-backed agent runs end-to-end through `Pipeline.run(engine=ADKEngine(gateway=...))` with every Drawbore guarantee intact — the model's structured output is validated against the agent's output model at the boundary, a schema violation halts, and a model-unavailable failure halts-and-escalates through the escalation path. Deterministic and model-backed agents compose in one pipeline.
170
+ - MCP errors and extra: an `mcp` module with `MCPError`/`MCPAuthError`/`MCPToolNotFoundError`; `MCPError` self-declares `halt_reason="mcp_error"` so a failed MCP tool call escalates legibly. The MCP SDK ships opt-in via the `drawbore[mcp]` extra (`google-adk[mcp]`) — the core dependency set is unchanged.
171
+ - MCP transport seam: `MCPClient` (ABC — connect/list_tools/call_tool/close), `MCPServerConfig`, `MCPToolSpec`, `OAuthConfig` (server-level OAuth 2.1/PKCE), and a dependency-free `FakeMCPClient` test double. The seam keeps the MCP security model unit-testable without a live server or the `mcp` SDK.
172
+ - MCP registration: `drawbore.mcp.register_mcp_server(registry, name=, url=, allowed_tools=, auth=, client=)` connects (server-level OAuth), validates each declared tool exists, and registers ONLY the declared tools as proxy-backed `Tool`s (`kind="mcp"`) under `mcp://<server>/<tool>` — registering a server does not expose its other tools. A declared tool the server doesn't advertise raises `MCPToolNotFoundError`. The registry gains a generic, MCP-agnostic `register_mcp_tool` (no `mcp` import).
173
+ - MCP security separation (proven): an MCP tool is reached only through the existing in-path proxy with a single-use JIT token scoped to that exact `mcp://<server>/<tool>` ref; an agent cannot reach an undeclared tool on the same server (blocked at both registration scope and proxy scope); server-level OAuth is consumed only at registration and never flows through the invocation path; the per-step circuit breaker applies to MCP tools. No new proxy authority was added.
174
+ - MCP-backed pipelines: a deterministic agent declaring `@agent(tools=["mcp://<server>/<tool>"])` calls the MCP tool through the existing `ToolContext`/proxy path and runs end-to-end through `Pipeline.run(...)` with every Drawbore guarantee intact — an MCP-tool failure halts-and-escalates with the legible `mcp_error` reason, and declaring an unregistered MCP tool is rejected at pipeline registration. `pipeline.py` is unchanged; the engine/proxy abstractions already carry MCP tools.
175
+ - Live MCP transport: `drawbore.mcp.live.LiveMCPClient` talks to real MCP servers over Streamable HTTP using the `mcp` SDK (the `drawbore[mcp]` extra), with server-level OAuth. It is import-guarded — using it without the extra raises a legible `MCPError` (`pip install drawbore[mcp]`) — and is the only `mcp`-SDK user, enforced by a containment test. A real server round-trip is not unit-tested (no live server in CI); the security model is proven against `FakeMCPClient`.
176
+ - Observability core: a leaf `observability` module — OTel GenAI-semconv span helper (`genai_span`), the Drawbore-owned GenAI attribute-name constants (`semconv`, so the framework depends on the spec, not the private `opentelemetry.semconv._incubating` path), a shared `payload_hash` identity scheme, a test-isolated tracer-provider override (`use_tracer_provider`/`reset_tracer_provider`), and `ObservabilityError`. No new dependency — `opentelemetry-api`/`-sdk` are already core.
177
+ - Tool-call spans: `ToolProxy.invoke` now emits an `execute_tool {tool}` OTel span tagged with `gen_ai.tool.name`, the run id, step, operation, and input/output hashes, with ERROR status and the denial label (`denied:token`/`denied:scope`/`denied:breaker`) on a blocked call. The proxy's input/output hashing now routes through the shared `payload_hash`; `self.log` is unchanged (the span is additive).
178
+ - Agent-invocation spans: the pipeline emits an `invoke_agent {name}` OTel span around each step, tagged with `gen_ai.agent.name`/`.version`/`.id` (when identity-gated), risk tier, run id, step, tenant id (new optional `Pipeline.run(..., tenant_id=...)`), and the input hash; a failed step marks the span ERROR. The step's `execute_tool` and `chat` spans nest under it; spans are correlated across a run by the `drawbore.run_id` attribute.
179
+ - Model-call spans: the ADK engine's model path emits a `chat {model}` OTel span around the gateway completion, tagged with `gen_ai.request.model` (the resolved primary) and `gen_ai.response.model` (the model actually used after any fallback). It nests under the step's `invoke_agent` span; deterministic agents emit no `chat` span.
180
+ - Audit primitives: an `audit` module — an immutable, regulator-legible `AuditRecord` (with `StepAuditRecord` per-step detail and a `legible()` rendering), an append-only `AuditSink` ABC + in-process `InMemoryAuditSink` default (queryable by run id), and an `AuditRecorder` that builds the record during a run. The core is the basic readable/exportable log; tamper-evidence/crypto-signing/compliance export are managed-service.
181
+ - Audit wiring: every `Pipeline.run(...)` now produces an `AuditRecord` on `RunResult.audit_trace` (always built — a completed run reports `audit_trace.steps`/`.escalations`/`.schema_violations`) and writes it to an injected `AuditSink` via the new `run(..., audit=...)` parameter. Each successful step's record carries the agent/version/id, input/output hashes, and a summary of the tool calls it made; a halt records the stopping reason. Finalization happens at a single point, so all the existing halt-and-escalate exits are unchanged.
182
+ - OTLP export: `drawbore.observability.configure_otlp_export(endpoint, *, headers=, set_global=)` wires a `TracerProvider` that batches Drawbore's spans to any OTLP drain (Axiom/Datadog/Grafana/Honeycomb). The exporter ships in the optional `drawbore[otlp]` extra — calling it without the extra fails closed with a legible `ObservabilityError` (`pip install drawbore[otlp]`). Spans always emit to the configured provider, so the core needs only `opentelemetry-api`/`-sdk`.
183
+ - Observability containment and docs: a containment test locks the layering invariant — `observability`/`audit` import no `google.adk` and no `mcp` SDK, and `observability` stays a leaf (no `agent`/`pipeline`/`tools`/`orchestration`/`llm`/`mcp` imports). Added the observability/audit user guide (`docs/guide/observability-audit.mdx`).
184
+ - Evidence primitives: a new `evidence` extension module — `EvidencePolicy` (opt-in, per-step; default disabled = passthrough), immutable `EvidenceHandle`/`EvidenceDecision` records (with `legible()`), a dependency-free deterministic `estimate_tokens` (char/4 heuristic — no tokenizer dependency), and `EvidenceError`/subclasses that self-declare `halt_reason="evidence_error"` so a fail-closed evidence failure escalates legibly.
185
+ - Evidence store: `EvidenceStore` (ABC) + `InMemoryEvidenceStore` — retains the original alongside the compressed view keyed by a deterministic handle id, serves metadata (no content leak), full retrieval, and bounded deterministic search within the original, and fails closed on an unknown or expired handle (injectable clock for expiry). Durable stores are managed-service.
186
+
187
+ ### Changed
188
+
189
+ - Config amendment: `from_config`/`from_json` no longer reject a `model + tools + fallback_model` manifest; execution enforces the safe loop fallback rules. Import stays string-preserving and runtime-free.
190
+ - Optional dependencies: added the `otlp` extra to `pyproject.toml` for OTLP span export; core dependencies remain exactly the four runtime libraries.
191
+ - Runtime hardening: `Pipeline.add(...)` now rejects a second step that reuses an existing agent name (`ValueError`) instead of silently overwriting `_by_name`/`outputs` state — the runtime keys steps and outputs by agent name, so names must be unique within a pipeline.
192
+
193
+ ### Fixed
194
+
195
+ - Tool-access layer security hardening: closed a proxy bypass where an agent could reach the raw tool handler and self-mint tokens for undeclared tools — agents now receive only proxy-backed shims with no reference to the proxy/issuer/registry. The circuit breaker is now per-agent (not per-run) and counts only authorised calls; the proxy logs every call including denials — token/scope/breaker — and handler errors; capability tokens enforce operation scope on consume; `register_tool` accepts a `schema` and tools record `kind` provenance; `get_run_context` raises a typed `ToolAccessError`.
196
+ - Escalation: a `HasConfidence`-marked output that omits the required numeric `confidence` field now fails closed (halts, or escalates under a policy, with reason `confidence_marker_without_value`) instead of letting an `AttributeError` propagate out of `Pipeline.run()` — preserving the halt-and-escalate invariant.
197
+ - LLM gateway hardening: a model returning a 200 with an unexpected response shape (missing `choices`/`message`/`content`) now fails closed as a legible `LLMError` instead of escaping as a raw `KeyError`/`IndexError` — consistent with the non-JSON contract-violation path and the legibility principle. The non-object-JSON fail-closed branch is now test-covered.
198
+ - LLM halt-reason legibility: a model-backed agent whose fallback chain is exhausted now halts/escalates with the legible reason `model_unavailable` (and a model returning unusable content with `model_error`) instead of the generic `agent_error` — so an escalation reader can tell the LLM provider chain failed, not the agent's own code. `LLMError`/`ModelUnavailableError` self-declare a `halt_reason` class attribute, which `halt_reason_for` honours after the curated `_HALT_REASONS` registry — keeping `drawbore.errors` free of any `drawbore.llm` import (no cycle).
199
+ - Live MCP transport resource safety: `LiveMCPClient.connect` now closes the transport if `ClientSession` setup/`initialize` fails after the connection opened, `LiveMCPClient.close` tears the transport down even if the session exit raises (try/finally), and `register_mcp_server` closes the session if a declared tool is not advertised — so a failed connect/registration on a real server cannot leak a socket. (No effect on `FakeMCPClient`; the live path needs the `drawbore[mcp]` extra.)
200
+
201
+ ### Documentation
202
+
203
+ - Fixed the remittance example snippet (missing required bindings) and the
204
+ agentic-tool-loop snippet (undefined variable, superseded constructor form);
205
+ added the risk-branch example page; normalized imports to the top-level surface.
206
+ - Agent guidance no longer references a partial-results option that does not exist.
207
+ - README rewritten to describe the implemented SDK (was a stale scaffolding notice).
drawbore-0.2.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 the Drawbore authors
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,104 @@
1
+ Metadata-Version: 2.4
2
+ Name: drawbore
3
+ Version: 0.2.0
4
+ Summary: Constrained, composable agents that hold by construction — a safety-first SDK for high-stakes, deterministic agent workflows.
5
+ Project-URL: Repository, https://github.com/daviesayo/drawbore
6
+ Project-URL: Documentation, https://github.com/daviesayo/drawbore/tree/main/docs
7
+ Project-URL: Changelog, https://github.com/daviesayo/drawbore/blob/main/CHANGELOG.md
8
+ Author: Davies Ayo
9
+ License: MIT
10
+ License-File: LICENSE
11
+ Keywords: agents,ai,compliance,fintech,observability,pipelines,safety
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Typing :: Typed
20
+ Requires-Python: >=3.11
21
+ Requires-Dist: google-adk
22
+ Requires-Dist: litellm
23
+ Requires-Dist: opentelemetry-api
24
+ Requires-Dist: opentelemetry-sdk
25
+ Requires-Dist: pydantic>=2
26
+ Provides-Extra: mcp
27
+ Requires-Dist: google-adk[mcp]; extra == 'mcp'
28
+ Provides-Extra: otlp
29
+ Requires-Dist: opentelemetry-exporter-otlp; extra == 'otlp'
30
+ Description-Content-Type: text/markdown
31
+
32
+ # Drawbore
33
+
34
+ > Composable agents that hold by construction.
35
+
36
+ Drawbore is a Python SDK for building constrained, composable agent pipelines
37
+ with safety guarantees baked in as architecture — for high-stakes, deterministic
38
+ workflows in fintech, healthcare, legal, compliance, and beyond.
39
+
40
+ The thesis: **capability is a liability in high-stakes workflows; constraint is
41
+ the feature.** You compose tightly scoped, typed, schema-enforced, audit-logged
42
+ agents; the framework enforces the contracts and you write the business logic.
43
+
44
+ ## Install
45
+
46
+ ```bash
47
+ pip install drawbore # core
48
+ pip install "drawbore[mcp]" # + MCP server tools
49
+ pip install "drawbore[otlp]" # + OTLP span export
50
+ ```
51
+
52
+ ## Sixty seconds
53
+
54
+ ```python
55
+ from pydantic import BaseModel
56
+ from drawbore import agent, Pipeline
57
+
58
+ class Payment(BaseModel):
59
+ cents: int
60
+
61
+ class Normalized(BaseModel):
62
+ dollars: float
63
+
64
+ @agent(name="normalize", input=Payment, output=Normalized)
65
+ async def normalize(p: Payment) -> Normalized:
66
+ return Normalized(dollars=p.cents / 100)
67
+
68
+ pipeline = Pipeline(name="payments", version="1.0.0")
69
+ pipeline.add(normalize)
70
+
71
+ async def main():
72
+ async with pipeline.test_mode() as tp:
73
+ result = await tp.run(Payment(cents=2500))
74
+ assert result.status == "completed"
75
+ print(result.audit_trace.legible()) # a regulator-readable run record
76
+ ```
77
+
78
+ Every step is schema-checked at both edges. Every tool call passes through a
79
+ proxy with a single-use permission token. Every run produces a legible audit
80
+ record. None of it is configurable off.
81
+
82
+ ## What the framework guarantees
83
+
84
+ - **Typed contracts at every edge** — Pydantic-validated inputs and outputs;
85
+ a wrong shape halts the run with a readable reason, it never propagates.
86
+ - **Least-privilege tools** — agents call only the tools they declared, through
87
+ one audited chokepoint, with single-use scoped tokens and circuit breakers.
88
+ - **Halt-and-escalate by default** — failures stop the pipeline and produce an
89
+ escalation package a human can read; there is no silent degradation.
90
+ - **Audit as a side effect** — `result.audit_trace.legible()` renders what ran,
91
+ what was called, and why it stopped, with no tracing setup.
92
+ - **Test mode with the real safety layer** — `pipeline.test_mode(...)` mocks the
93
+ outside world (models, tools, MCP) while every control runs for real.
94
+
95
+ ## Learn more
96
+
97
+ - [Quickstart](docs/quickstart.mdx) — install to first run.
98
+ - [Guides](docs/guide/) — pipelines, tools, model-backed agents, escalation,
99
+ testing, observability.
100
+ - [Examples](examples/) — runnable, tested pipelines from tiny to high-stakes.
101
+
102
+ ## License
103
+
104
+ MIT
@@ -0,0 +1,73 @@
1
+ # Drawbore
2
+
3
+ > Composable agents that hold by construction.
4
+
5
+ Drawbore is a Python SDK for building constrained, composable agent pipelines
6
+ with safety guarantees baked in as architecture — for high-stakes, deterministic
7
+ workflows in fintech, healthcare, legal, compliance, and beyond.
8
+
9
+ The thesis: **capability is a liability in high-stakes workflows; constraint is
10
+ the feature.** You compose tightly scoped, typed, schema-enforced, audit-logged
11
+ agents; the framework enforces the contracts and you write the business logic.
12
+
13
+ ## Install
14
+
15
+ ```bash
16
+ pip install drawbore # core
17
+ pip install "drawbore[mcp]" # + MCP server tools
18
+ pip install "drawbore[otlp]" # + OTLP span export
19
+ ```
20
+
21
+ ## Sixty seconds
22
+
23
+ ```python
24
+ from pydantic import BaseModel
25
+ from drawbore import agent, Pipeline
26
+
27
+ class Payment(BaseModel):
28
+ cents: int
29
+
30
+ class Normalized(BaseModel):
31
+ dollars: float
32
+
33
+ @agent(name="normalize", input=Payment, output=Normalized)
34
+ async def normalize(p: Payment) -> Normalized:
35
+ return Normalized(dollars=p.cents / 100)
36
+
37
+ pipeline = Pipeline(name="payments", version="1.0.0")
38
+ pipeline.add(normalize)
39
+
40
+ async def main():
41
+ async with pipeline.test_mode() as tp:
42
+ result = await tp.run(Payment(cents=2500))
43
+ assert result.status == "completed"
44
+ print(result.audit_trace.legible()) # a regulator-readable run record
45
+ ```
46
+
47
+ Every step is schema-checked at both edges. Every tool call passes through a
48
+ proxy with a single-use permission token. Every run produces a legible audit
49
+ record. None of it is configurable off.
50
+
51
+ ## What the framework guarantees
52
+
53
+ - **Typed contracts at every edge** — Pydantic-validated inputs and outputs;
54
+ a wrong shape halts the run with a readable reason, it never propagates.
55
+ - **Least-privilege tools** — agents call only the tools they declared, through
56
+ one audited chokepoint, with single-use scoped tokens and circuit breakers.
57
+ - **Halt-and-escalate by default** — failures stop the pipeline and produce an
58
+ escalation package a human can read; there is no silent degradation.
59
+ - **Audit as a side effect** — `result.audit_trace.legible()` renders what ran,
60
+ what was called, and why it stopped, with no tracing setup.
61
+ - **Test mode with the real safety layer** — `pipeline.test_mode(...)` mocks the
62
+ outside world (models, tools, MCP) while every control runs for real.
63
+
64
+ ## Learn more
65
+
66
+ - [Quickstart](docs/quickstart.mdx) — install to first run.
67
+ - [Guides](docs/guide/) — pipelines, tools, model-backed agents, escalation,
68
+ testing, observability.
69
+ - [Examples](examples/) — runnable, tested pipelines from tiny to high-stakes.
70
+
71
+ ## License
72
+
73
+ MIT
@@ -0,0 +1,17 @@
1
+ {
2
+ "$schema": "https://context7.com/schema/context7.json",
3
+ "url": "https://context7.com/daviesayo/drawbore",
4
+ "public_key": "pk_QLkcrofppIBgPn98X7UzC",
5
+ "projectTitle": "Drawbore",
6
+ "description": "A safety-first Python SDK for constrained, composable agent pipelines.",
7
+ "folders": ["docs", "examples"],
8
+ "excludeFolders": [],
9
+ "excludeFiles": ["CHANGELOG.md"],
10
+ "rules": [
11
+ "Prefer the documented Drawbore public APIs shown in the guides and examples.",
12
+ "Use pipeline.test_mode for local tests that mock LLMs, tools, MCP servers, databases, and external APIs.",
13
+ "Use explicit From(...) bindings when a step consumes fields from earlier steps.",
14
+ "Treat pipeline JSON as workflow shape and policy, not as runtime credentials or live objects.",
15
+ "Assert on RunResult and audit_trace when testing pipeline behavior."
16
+ ]
17
+ }
@@ -0,0 +1,33 @@
1
+ ---
2
+ title: "Common mistakes"
3
+ description: "Mistakes coding agents make with Drawbore, and the correct pattern for each."
4
+ ---
5
+
6
+ Each entry names a mistake and the correct approach.
7
+
8
+ ### Catching validation errors to "keep going"
9
+ Wrong: wrapping a step in try/except to swallow a schema violation. Right: let the
10
+ boundary halt; fix the data or the schema. Drawbore halts on purpose.
11
+
12
+ ### Calling a tool that wasn't declared
13
+ Wrong: invoking a tool the agent didn't list. Right: declare it in
14
+ `@agent(tools=[...])`; the proxy blocks undeclared calls.
15
+
16
+ ### Relying on implicit data flow for fan-in
17
+ Wrong: assuming a later step automatically sees an earlier step's output. Right:
18
+ use `From("agent.field")` bindings. See [Pipelines](/docs/guide/pipelines).
19
+
20
+ ### Putting provider keys in pipeline JSON
21
+ Wrong: serializing credentials or a gateway into the manifest. Right: keep
22
+ `model="profile:..."` in the manifest; configure providers in the runtime LLM
23
+ config. See [Model-backed agents](/docs/guide/model-backed-agents).
24
+
25
+ ### Testing against live models or tools
26
+ Wrong: hitting a real provider in tests. Right: use `pipeline.test_mode(...)` with
27
+ mocked models and tools — the safety layer runs for real. See
28
+ [Testing Drawbore](/docs/agents/testing-drawbore).
29
+
30
+ ### Producing partial results on failure
31
+ Wrong: returning a half-finished result on failure. Right: let the run halt and
32
+ escalate — Drawbore has no partial-results mode; a halted run's `halt_code` and
33
+ escalation package tell a human exactly what stopped and why.
@@ -0,0 +1,26 @@
1
+ ---
2
+ title: "Retrieving Drawbore docs with Context7"
3
+ description: "How a coding agent fetches current Drawbore documentation through Context7 and llms.txt."
4
+ ---
5
+
6
+ Drawbore publishes its public documentation for retrieval. A coding agent can pull
7
+ current docs through Context7 or the site's `llms.txt`, rather than relying on
8
+ training data.
9
+
10
+ ## When to use this
11
+
12
+ Before writing Drawbore code, fetch the current guides and reference so you use the
13
+ documented public APIs and the latest patterns.
14
+
15
+ ## How
16
+
17
+ - **Context7:** resolve the Drawbore library, then query the docs for the topic you
18
+ need (e.g. "pipeline.test_mode usage", "From bindings", "model profiles").
19
+ - **llms.txt:** the docs site exposes `llms.txt` / `llms-full.txt` listing the
20
+ public pages; fetch the page whose title matches your topic.
21
+
22
+ Each public page states its topic in the title, says when to use it in the first
23
+ paragraph, and names common mistakes — so a retrieved page is usable on its own.
24
+
25
+ Prefer the documented public APIs shown in the [guides](/docs/guide/pipelines) and
26
+ examples.
@@ -0,0 +1,23 @@
1
+ ---
2
+ title: "Drawbore for coding agents"
3
+ description: "How a coding agent in a consumer repository should use Drawbore correctly."
4
+ ---
5
+
6
+ This section is for **coding agents working in a user's application repository**
7
+ after Drawbore is installed. If you are an agent generating Drawbore code, read
8
+ these pages before writing a pipeline.
9
+
10
+ Drawbore's public APIs are the contract. Prefer the patterns shown in the
11
+ [guides](/docs/guide/pipelines) and examples over inventing your own.
12
+
13
+ ## Pages
14
+
15
+ - [Rules](/docs/agents/rules) — the short, load-bearing rules to follow.
16
+ - [Common mistakes](/docs/agents/common-mistakes) — what agents get wrong and the
17
+ correct pattern.
18
+ - [Testing Drawbore](/docs/agents/testing-drawbore) — write tests with
19
+ `pipeline.test_mode(...)`.
20
+ - [Context7](/docs/agents/context7) — retrieve current Drawbore docs.
21
+
22
+ These pages link to the same human-facing guides and reference; they are not a
23
+ separate source of truth.