armature-agents 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (290) hide show
  1. armature_agents-0.2.0/.gitignore +42 -0
  2. armature_agents-0.2.0/LICENSE +21 -0
  3. armature_agents-0.2.0/PKG-INFO +560 -0
  4. armature_agents-0.2.0/README.md +496 -0
  5. armature_agents-0.2.0/armature/__init__.py +9 -0
  6. armature_agents-0.2.0/armature/cache/__init__.py +0 -0
  7. armature_agents-0.2.0/armature/cache/llm_cache.py +59 -0
  8. armature_agents-0.2.0/armature/channels/__init__.py +0 -0
  9. armature_agents-0.2.0/armature/channels/models.py +22 -0
  10. armature_agents-0.2.0/armature/channels/router.py +19 -0
  11. armature_agents-0.2.0/armature/cli.py +1273 -0
  12. armature_agents-0.2.0/armature/cli_wizard.py +968 -0
  13. armature_agents-0.2.0/armature/config.py +150 -0
  14. armature_agents-0.2.0/armature/demo.py +273 -0
  15. armature_agents-0.2.0/armature/emitters/__init__.py +0 -0
  16. armature_agents-0.2.0/armature/emitters/hermes.py +168 -0
  17. armature_agents-0.2.0/armature/hooks/__init__.py +0 -0
  18. armature_agents-0.2.0/armature/hooks/lifecycle.py +232 -0
  19. armature_agents-0.2.0/armature/mcp/__init__.py +0 -0
  20. armature_agents-0.2.0/armature/mcp/client.py +98 -0
  21. armature_agents-0.2.0/armature/nodes/__init__.py +0 -0
  22. armature_agents-0.2.0/armature/nodes/base.py +9 -0
  23. armature_agents-0.2.0/armature/nodes/gate.py +29 -0
  24. armature_agents-0.2.0/armature/nodes/llm.py +496 -0
  25. armature_agents-0.2.0/armature/nodes/script.py +35 -0
  26. armature_agents-0.2.0/armature/nodes/subagent.py +127 -0
  27. armature_agents-0.2.0/armature/nodes/tool_call.py +52 -0
  28. armature_agents-0.2.0/armature/optimizer/__init__.py +0 -0
  29. armature_agents-0.2.0/armature/optimizer/history.py +85 -0
  30. armature_agents-0.2.0/armature/optimizer/runner.py +248 -0
  31. armature_agents-0.2.0/armature/optimizer/workflow.yaml +100 -0
  32. armature_agents-0.2.0/armature/permissions/__init__.py +0 -0
  33. armature_agents-0.2.0/armature/permissions/permissions.py +41 -0
  34. armature_agents-0.2.0/armature/registry/__init__.py +0 -0
  35. armature_agents-0.2.0/armature/registry/builtins.py +86 -0
  36. armature_agents-0.2.0/armature/registry/registry.py +48 -0
  37. armature_agents-0.2.0/armature/report/__init__.py +0 -0
  38. armature_agents-0.2.0/armature/report/aggregator.py +205 -0
  39. armature_agents-0.2.0/armature/report/layout.py +111 -0
  40. armature_agents-0.2.0/armature/report/loader.py +84 -0
  41. armature_agents-0.2.0/armature/report/panels.py +217 -0
  42. armature_agents-0.2.0/armature/report/run_report.py +361 -0
  43. armature_agents-0.2.0/armature/report/sparkline.py +20 -0
  44. armature_agents-0.2.0/armature/reporting.py +348 -0
  45. armature_agents-0.2.0/armature/runtime/__init__.py +0 -0
  46. armature_agents-0.2.0/armature/runtime/checkpoint.py +41 -0
  47. armature_agents-0.2.0/armature/runtime/context.py +36 -0
  48. armature_agents-0.2.0/armature/runtime/dag.py +87 -0
  49. armature_agents-0.2.0/armature/runtime/engine.py +976 -0
  50. armature_agents-0.2.0/armature/runtime/loop.py +25 -0
  51. armature_agents-0.2.0/armature/runtime/prompt.py +130 -0
  52. armature_agents-0.2.0/armature/runtime/truncation.py +52 -0
  53. armature_agents-0.2.0/armature/sandbox/__init__.py +0 -0
  54. armature_agents-0.2.0/armature/sandbox/docker.py +121 -0
  55. armature_agents-0.2.0/armature/service/__init__.py +0 -0
  56. armature_agents-0.2.0/armature/service/app.py +186 -0
  57. armature_agents-0.2.0/armature/service/jobs.py +61 -0
  58. armature_agents-0.2.0/armature/service/models.py +16 -0
  59. armature_agents-0.2.0/armature/service/registry.py +32 -0
  60. armature_agents-0.2.0/armature/service/triggers.py +82 -0
  61. armature_agents-0.2.0/armature/skills/__init__.py +0 -0
  62. armature_agents-0.2.0/armature/spec/__init__.py +0 -0
  63. armature_agents-0.2.0/armature/spec/loader.py +61 -0
  64. armature_agents-0.2.0/armature/spec/models.py +311 -0
  65. armature_agents-0.2.0/armature/spec/risk.py +79 -0
  66. armature_agents-0.2.0/armature/spec/validator.py +328 -0
  67. armature_agents-0.2.0/armature/state/__init__.py +0 -0
  68. armature_agents-0.2.0/armature/state/artifacts.py +39 -0
  69. armature_agents-0.2.0/armature/state/bootstrap.py +25 -0
  70. armature_agents-0.2.0/armature/state/diagnostics.py +155 -0
  71. armature_agents-0.2.0/armature/state/embedder.py +76 -0
  72. armature_agents-0.2.0/armature/state/evaluator.py +228 -0
  73. armature_agents-0.2.0/armature/state/export.py +216 -0
  74. armature_agents-0.2.0/armature/state/extractor.py +86 -0
  75. armature_agents-0.2.0/armature/state/knowledge.py +176 -0
  76. armature_agents-0.2.0/armature/state/memory.py +112 -0
  77. armature_agents-0.2.0/armature/state/session.py +36 -0
  78. armature_agents-0.2.0/armature/state/traces.py +246 -0
  79. armature_agents-0.2.0/armature/synthesis/__init__.py +0 -0
  80. armature_agents-0.2.0/armature/synthesis/autoharness.py +147 -0
  81. armature_agents-0.2.0/armature/synthesis/improve.py +703 -0
  82. armature_agents-0.2.0/armature/telemetry/__init__.py +88 -0
  83. armature_agents-0.2.0/armature/telemetry/langfuse.py +67 -0
  84. armature_agents-0.2.0/armature/telemetry/langsmith.py +66 -0
  85. armature_agents-0.2.0/armature/templates/six_thinking_hats.yml +327 -0
  86. armature_agents-0.2.0/docs/ARCHITECTURE.md +392 -0
  87. armature_agents-0.2.0/docs/ARMATURE-IN-PRODUCTION.md +417 -0
  88. armature_agents-0.2.0/docs/ARMATURE-PHILOSOPHY.md +848 -0
  89. armature_agents-0.2.0/docs/ARMATURE-SPEC-REF.md +330 -0
  90. armature_agents-0.2.0/docs/BUILD_FIRST_WORKFLOW.md +558 -0
  91. armature_agents-0.2.0/docs/CHATBOT-AND-STREAMING.md +491 -0
  92. armature_agents-0.2.0/docs/CHECKPOINT-AND-RESUME.md +306 -0
  93. armature_agents-0.2.0/docs/CONTEXT-ISOLATION.md +126 -0
  94. armature_agents-0.2.0/docs/DAG-vs-LANGGRAPH.md +139 -0
  95. armature_agents-0.2.0/docs/DECLARATIVE-CONTROL-FLOW.md +531 -0
  96. armature_agents-0.2.0/docs/FAN-IN_FAN-OUT.md +296 -0
  97. armature_agents-0.2.0/docs/FAQ.md +959 -0
  98. armature_agents-0.2.0/docs/HUMAN-IN-THE-LOOP.md +315 -0
  99. armature_agents-0.2.0/docs/IHR-AND-SELF-IMPROVEMENT.md +427 -0
  100. armature_agents-0.2.0/docs/INTEGRATION.md +213 -0
  101. armature_agents-0.2.0/docs/JUDGE-PATTERN.md +483 -0
  102. armature_agents-0.2.0/docs/MEMORY-AND-CONTEXT.md +408 -0
  103. armature_agents-0.2.0/docs/MISSION-AS-CONTEXT.md +169 -0
  104. armature_agents-0.2.0/docs/MODEL-TIERS.md +364 -0
  105. armature_agents-0.2.0/docs/QUORUM-SCORING.md +191 -0
  106. armature_agents-0.2.0/docs/ROLE-TAXONOMY.md +307 -0
  107. armature_agents-0.2.0/docs/SAFETY-AND-GOVERNANCE.md +334 -0
  108. armature_agents-0.2.0/docs/SANDBOX-AND-ISOLATION.md +415 -0
  109. armature_agents-0.2.0/docs/SUBAGENT-COMPOSITION.md +270 -0
  110. armature_agents-0.2.0/docs/USER-GUIDE.md +4007 -0
  111. armature_agents-0.2.0/examples/01_hello_world.yml +31 -0
  112. armature_agents-0.2.0/examples/02_research_pipeline.yml +164 -0
  113. armature_agents-0.2.0/examples/03_deliberation_standard.yml +265 -0
  114. armature_agents-0.2.0/examples/starter_template.yml +446 -0
  115. armature_agents-0.2.0/pyproject.toml +84 -0
  116. armature_agents-0.2.0/run_tests.sh +15 -0
  117. armature_agents-0.2.0/site/node_modules/@next/env/README.md +3 -0
  118. armature_agents-0.2.0/site/node_modules/@next/swc-darwin-arm64/README.md +3 -0
  119. armature_agents-0.2.0/site/node_modules/@swc/counter/README.md +7 -0
  120. armature_agents-0.2.0/site/node_modules/@swc/helpers/LICENSE +201 -0
  121. armature_agents-0.2.0/site/node_modules/@types/node/LICENSE +21 -0
  122. armature_agents-0.2.0/site/node_modules/@types/node/README.md +15 -0
  123. armature_agents-0.2.0/site/node_modules/@types/prop-types/LICENSE +21 -0
  124. armature_agents-0.2.0/site/node_modules/@types/prop-types/README.md +15 -0
  125. armature_agents-0.2.0/site/node_modules/@types/react/LICENSE +21 -0
  126. armature_agents-0.2.0/site/node_modules/@types/react/README.md +15 -0
  127. armature_agents-0.2.0/site/node_modules/@types/react-dom/LICENSE +21 -0
  128. armature_agents-0.2.0/site/node_modules/@types/react-dom/README.md +16 -0
  129. armature_agents-0.2.0/site/node_modules/busboy/LICENSE +19 -0
  130. armature_agents-0.2.0/site/node_modules/busboy/README.md +191 -0
  131. armature_agents-0.2.0/site/node_modules/caniuse-lite/LICENSE +395 -0
  132. armature_agents-0.2.0/site/node_modules/caniuse-lite/README.md +6 -0
  133. armature_agents-0.2.0/site/node_modules/csstype/LICENSE +19 -0
  134. armature_agents-0.2.0/site/node_modules/csstype/README.md +291 -0
  135. armature_agents-0.2.0/site/node_modules/graceful-fs/LICENSE +15 -0
  136. armature_agents-0.2.0/site/node_modules/graceful-fs/README.md +143 -0
  137. armature_agents-0.2.0/site/node_modules/js-tokens/LICENSE +21 -0
  138. armature_agents-0.2.0/site/node_modules/js-tokens/README.md +240 -0
  139. armature_agents-0.2.0/site/node_modules/loose-envify/LICENSE +21 -0
  140. armature_agents-0.2.0/site/node_modules/loose-envify/README.md +45 -0
  141. armature_agents-0.2.0/site/node_modules/nanoid/LICENSE +20 -0
  142. armature_agents-0.2.0/site/node_modules/nanoid/README.md +39 -0
  143. armature_agents-0.2.0/site/node_modules/next/README.md +68 -0
  144. armature_agents-0.2.0/site/node_modules/picocolors/LICENSE +15 -0
  145. armature_agents-0.2.0/site/node_modules/picocolors/README.md +21 -0
  146. armature_agents-0.2.0/site/node_modules/postcss/LICENSE +20 -0
  147. armature_agents-0.2.0/site/node_modules/postcss/README.md +28 -0
  148. armature_agents-0.2.0/site/node_modules/react/LICENSE +21 -0
  149. armature_agents-0.2.0/site/node_modules/react/README.md +37 -0
  150. armature_agents-0.2.0/site/node_modules/react-dom/LICENSE +21 -0
  151. armature_agents-0.2.0/site/node_modules/react-dom/README.md +60 -0
  152. armature_agents-0.2.0/site/node_modules/scheduler/LICENSE +21 -0
  153. armature_agents-0.2.0/site/node_modules/scheduler/README.md +9 -0
  154. armature_agents-0.2.0/site/node_modules/source-map-js/LICENSE +28 -0
  155. armature_agents-0.2.0/site/node_modules/source-map-js/README.md +765 -0
  156. armature_agents-0.2.0/site/node_modules/streamsearch/LICENSE +19 -0
  157. armature_agents-0.2.0/site/node_modules/streamsearch/README.md +95 -0
  158. armature_agents-0.2.0/site/node_modules/tslib/README.md +164 -0
  159. armature_agents-0.2.0/site/node_modules/typescript/README.md +50 -0
  160. armature_agents-0.2.0/site/node_modules/undici-types/LICENSE +21 -0
  161. armature_agents-0.2.0/site/node_modules/undici-types/README.md +6 -0
  162. armature_agents-0.2.0/templates/langgraph-sidecar/.env.example +1 -0
  163. armature_agents-0.2.0/templates/langgraph-sidecar/bot/Dockerfile +6 -0
  164. armature_agents-0.2.0/templates/langgraph-sidecar/bot/__init__.py +0 -0
  165. armature_agents-0.2.0/templates/langgraph-sidecar/bot/app.py +112 -0
  166. armature_agents-0.2.0/templates/langgraph-sidecar/bot/armature_client.py +61 -0
  167. armature_agents-0.2.0/templates/langgraph-sidecar/bot/graph.py +39 -0
  168. armature_agents-0.2.0/templates/langgraph-sidecar/bot/nodes.py +75 -0
  169. armature_agents-0.2.0/templates/langgraph-sidecar/bot/requirements.txt +6 -0
  170. armature_agents-0.2.0/templates/langgraph-sidecar/bot/state.py +14 -0
  171. armature_agents-0.2.0/templates/langgraph-sidecar/docker-compose.yml +39 -0
  172. armature_agents-0.2.0/templates/langgraph-sidecar/workflows/research.yml +80 -0
  173. armature_agents-0.2.0/tests/__init__.py +0 -0
  174. armature_agents-0.2.0/tests/cache/__init__.py +0 -0
  175. armature_agents-0.2.0/tests/cache/test_llm_cache.py +45 -0
  176. armature_agents-0.2.0/tests/channels/__init__.py +0 -0
  177. armature_agents-0.2.0/tests/channels/test_channels.py +356 -0
  178. armature_agents-0.2.0/tests/cli/test_validate_warnings.py +81 -0
  179. armature_agents-0.2.0/tests/cli/test_watch_command.py +18 -0
  180. armature_agents-0.2.0/tests/conftest.py +1 -0
  181. armature_agents-0.2.0/tests/emitters/__init__.py +0 -0
  182. armature_agents-0.2.0/tests/emitters/test_hermes_emitter.py +217 -0
  183. armature_agents-0.2.0/tests/fixtures/child-workflow.yaml +13 -0
  184. armature_agents-0.2.0/tests/fixtures/echo-workflow.yaml +21 -0
  185. armature_agents-0.2.0/tests/fixtures/guided-json-workflow.yaml +21 -0
  186. armature_agents-0.2.0/tests/fixtures/isolated-child.yaml +13 -0
  187. armature_agents-0.2.0/tests/fixtures/minimal.yaml +10 -0
  188. armature_agents-0.2.0/tests/hooks/__init__.py +0 -0
  189. armature_agents-0.2.0/tests/hooks/test_behaviors.py +81 -0
  190. armature_agents-0.2.0/tests/hooks/test_lifecycle.py +378 -0
  191. armature_agents-0.2.0/tests/hooks/test_rogue_signals.py +88 -0
  192. armature_agents-0.2.0/tests/hooks/test_safety_rules.py +120 -0
  193. armature_agents-0.2.0/tests/integration/__init__.py +0 -0
  194. armature_agents-0.2.0/tests/integration/test_end_to_end.py +94 -0
  195. armature_agents-0.2.0/tests/integration/test_phase2.py +156 -0
  196. armature_agents-0.2.0/tests/mcp/__init__.py +0 -0
  197. armature_agents-0.2.0/tests/mcp/test_mcp_client.py +196 -0
  198. armature_agents-0.2.0/tests/nodes/__init__.py +0 -0
  199. armature_agents-0.2.0/tests/nodes/test_gate.py +76 -0
  200. armature_agents-0.2.0/tests/nodes/test_llm.py +871 -0
  201. armature_agents-0.2.0/tests/nodes/test_llm_cache_integration.py +64 -0
  202. armature_agents-0.2.0/tests/nodes/test_llm_mission.py +78 -0
  203. armature_agents-0.2.0/tests/nodes/test_llm_streaming.py +77 -0
  204. armature_agents-0.2.0/tests/nodes/test_llm_tool_dispatch.py +420 -0
  205. armature_agents-0.2.0/tests/nodes/test_llm_tools_tracked.py +134 -0
  206. armature_agents-0.2.0/tests/nodes/test_script.py +102 -0
  207. armature_agents-0.2.0/tests/nodes/test_subagent.py +202 -0
  208. armature_agents-0.2.0/tests/nodes/test_tool_call.py +235 -0
  209. armature_agents-0.2.0/tests/optimizer/__init__.py +0 -0
  210. armature_agents-0.2.0/tests/optimizer/test_history.py +70 -0
  211. armature_agents-0.2.0/tests/optimizer/test_optimizer.py +514 -0
  212. armature_agents-0.2.0/tests/permissions/__init__.py +0 -0
  213. armature_agents-0.2.0/tests/permissions/test_permissions.py +108 -0
  214. armature_agents-0.2.0/tests/registry/__init__.py +0 -0
  215. armature_agents-0.2.0/tests/registry/test_builtins.py +348 -0
  216. armature_agents-0.2.0/tests/registry/test_registry.py +90 -0
  217. armature_agents-0.2.0/tests/report/__init__.py +0 -0
  218. armature_agents-0.2.0/tests/report/test_aggregator.py +376 -0
  219. armature_agents-0.2.0/tests/report/test_dashboard_cli.py +129 -0
  220. armature_agents-0.2.0/tests/report/test_panels.py +423 -0
  221. armature_agents-0.2.0/tests/report/test_sparkline.py +46 -0
  222. armature_agents-0.2.0/tests/runtime/__init__.py +0 -0
  223. armature_agents-0.2.0/tests/runtime/test_checkpoint.py +321 -0
  224. armature_agents-0.2.0/tests/runtime/test_condition.py +199 -0
  225. armature_agents-0.2.0/tests/runtime/test_context.py +95 -0
  226. armature_agents-0.2.0/tests/runtime/test_continuation.py +120 -0
  227. armature_agents-0.2.0/tests/runtime/test_contract_inputs.py +157 -0
  228. armature_agents-0.2.0/tests/runtime/test_contract_limits.py +213 -0
  229. armature_agents-0.2.0/tests/runtime/test_contract_outputs.py +166 -0
  230. armature_agents-0.2.0/tests/runtime/test_dag.py +205 -0
  231. armature_agents-0.2.0/tests/runtime/test_engine.py +748 -0
  232. armature_agents-0.2.0/tests/runtime/test_engine_response_stage.py +91 -0
  233. armature_agents-0.2.0/tests/runtime/test_escalation_warning.py +85 -0
  234. armature_agents-0.2.0/tests/runtime/test_events.py +323 -0
  235. armature_agents-0.2.0/tests/runtime/test_fan_in.py +407 -0
  236. armature_agents-0.2.0/tests/runtime/test_inject_file_as.py +219 -0
  237. armature_agents-0.2.0/tests/runtime/test_loop.py +98 -0
  238. armature_agents-0.2.0/tests/runtime/test_mission_block.py +54 -0
  239. armature_agents-0.2.0/tests/runtime/test_output_truncation.py +227 -0
  240. armature_agents-0.2.0/tests/runtime/test_prompt.py +233 -0
  241. armature_agents-0.2.0/tests/runtime/test_prompt_bootstrap.py +259 -0
  242. armature_agents-0.2.0/tests/runtime/test_recovery.py +171 -0
  243. armature_agents-0.2.0/tests/runtime/test_retry_backoff.py +233 -0
  244. armature_agents-0.2.0/tests/runtime/test_retry_until.py +235 -0
  245. armature_agents-0.2.0/tests/runtime/test_skip_if.py +207 -0
  246. armature_agents-0.2.0/tests/runtime/test_timeout_and_failure.py +227 -0
  247. armature_agents-0.2.0/tests/sandbox/__init__.py +0 -0
  248. armature_agents-0.2.0/tests/sandbox/test_docker.py +837 -0
  249. armature_agents-0.2.0/tests/service/__init__.py +0 -0
  250. armature_agents-0.2.0/tests/service/test_app.py +291 -0
  251. armature_agents-0.2.0/tests/service/test_registry.py +126 -0
  252. armature_agents-0.2.0/tests/service/test_response_stage_sse.py +76 -0
  253. armature_agents-0.2.0/tests/service/test_triggers.py +73 -0
  254. armature_agents-0.2.0/tests/skills/__init__.py +0 -0
  255. armature_agents-0.2.0/tests/spec/__init__.py +0 -0
  256. armature_agents-0.2.0/tests/spec/test_continuation.py +38 -0
  257. armature_agents-0.2.0/tests/spec/test_loader.py +167 -0
  258. armature_agents-0.2.0/tests/spec/test_mission.py +26 -0
  259. armature_agents-0.2.0/tests/spec/test_models.py +371 -0
  260. armature_agents-0.2.0/tests/spec/test_response_stage.py +20 -0
  261. armature_agents-0.2.0/tests/spec/test_signature_validation.py +361 -0
  262. armature_agents-0.2.0/tests/spec/test_spec_risk.py +129 -0
  263. armature_agents-0.2.0/tests/spec/test_triggers.py +47 -0
  264. armature_agents-0.2.0/tests/spec/test_validator.py +592 -0
  265. armature_agents-0.2.0/tests/state/__init__.py +0 -0
  266. armature_agents-0.2.0/tests/state/test_artifacts.py +92 -0
  267. armature_agents-0.2.0/tests/state/test_diagnostics.py +237 -0
  268. armature_agents-0.2.0/tests/state/test_embedder.py +100 -0
  269. armature_agents-0.2.0/tests/state/test_evaluation.py +378 -0
  270. armature_agents-0.2.0/tests/state/test_export.py +295 -0
  271. armature_agents-0.2.0/tests/state/test_hfr.py +121 -0
  272. armature_agents-0.2.0/tests/state/test_knowledge.py +423 -0
  273. armature_agents-0.2.0/tests/state/test_memory.py +166 -0
  274. armature_agents-0.2.0/tests/state/test_replay.py +90 -0
  275. armature_agents-0.2.0/tests/state/test_session.py +78 -0
  276. armature_agents-0.2.0/tests/state/test_slr.py +83 -0
  277. armature_agents-0.2.0/tests/state/test_traces.py +468 -0
  278. armature_agents-0.2.0/tests/synthesis/__init__.py +0 -0
  279. armature_agents-0.2.0/tests/synthesis/test_autoharness.py +321 -0
  280. armature_agents-0.2.0/tests/synthesis/test_cheap_evolver.py +26 -0
  281. armature_agents-0.2.0/tests/synthesis/test_improve.py +1406 -0
  282. armature_agents-0.2.0/tests/telemetry/__init__.py +0 -0
  283. armature_agents-0.2.0/tests/telemetry/test_langfuse.py +148 -0
  284. armature_agents-0.2.0/tests/telemetry/test_langsmith.py +99 -0
  285. armature_agents-0.2.0/tests/test_cli.py +494 -0
  286. armature_agents-0.2.0/tests/test_config.py +250 -0
  287. armature_agents-0.2.0/tests/test_report.py +517 -0
  288. armature_agents-0.2.0/tests/test_telemetry.py +143 -0
  289. armature_agents-0.2.0/tests/test_tool_modules.py +129 -0
  290. armature_agents-0.2.0/tests/test_wizard.py +201 -0
@@ -0,0 +1,42 @@
1
+ .venv/
2
+ __pycache__/
3
+ *.egg-info/
4
+ .pytest_cache/
5
+ dist/
6
+ *.pyc
7
+ .coverage
8
+ htmlcov/
9
+ .env
10
+
11
+ # Auto-memory (local only, never public)
12
+ memory/
13
+
14
+ # Internal architecture and scratch files (not for public repo)
15
+ examples/dangerous-pretzel/
16
+ VISION.md
17
+ taking_stock.txt
18
+ TODO.md
19
+ .playwright-mcp/
20
+ armature-*.png
21
+ docs/superpowers/
22
+ docs/plan-*.md
23
+ docs/deferred-research.md
24
+ docs/use-case-ad-campaign.md
25
+ docs/ARMATURE-AGENTCORE.md
26
+ docs/plan-gaps-agentcore.md
27
+
28
+ # Generated HTML docs (regenerate with: bash docs/regen-guide.sh)
29
+ USER-GUIDE.html
30
+ BUILD_FIRST_WORKFLOW.html
31
+
32
+ # Launch/marketing internals (not for public repo)
33
+ LAUNCH_CONTENT.md
34
+ LAUNCH_CONTENT_ENHANCED.md
35
+ LAUNCH_CHECKLIST.md
36
+ LINKEDIN_LAUNCH_STRATEGY.md
37
+ PRESS_KIT.md
38
+ CONTINUATION_PLAN.md
39
+
40
+ # Local tooling artifacts
41
+ .benchmarks/
42
+ PRE-LAUNCH-READINESS.md
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Bryan Sparks
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,560 @@
1
+ Metadata-Version: 2.4
2
+ Name: armature-agents
3
+ Version: 0.2.0
4
+ Summary: Agent execution harness — wraps LLMs in structured, inspectable workflow specs
5
+ Project-URL: Repository, https://github.com/bryansparks/armature
6
+ Project-URL: Documentation, https://github.com/bryansparks/armature/blob/main/docs/USER-GUIDE.md
7
+ Project-URL: Changelog, https://github.com/bryansparks/armature/blob/main/CHANGELOG.md
8
+ Author-email: Bryan Sparks <bryan@elftech.ai>
9
+ License: MIT License
10
+
11
+ Copyright (c) 2026 Bryan Sparks
12
+
13
+ Permission is hereby granted, free of charge, to any person obtaining a copy
14
+ of this software and associated documentation files (the "Software"), to deal
15
+ in the Software without restriction, including without limitation the rights
16
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
17
+ copies of the Software, and to permit persons to whom the Software is
18
+ furnished to do so, subject to the following conditions:
19
+
20
+ The above copyright notice and this permission notice shall be included in all
21
+ copies or substantial portions of the Software.
22
+
23
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
28
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
29
+ SOFTWARE.
30
+ License-File: LICENSE
31
+ Requires-Python: >=3.11
32
+ Requires-Dist: aiosqlite>=0.20
33
+ Requires-Dist: croniter>=2.0
34
+ Requires-Dist: httpx>=0.27
35
+ Requires-Dist: jinja2>=3.1
36
+ Requires-Dist: litellm>=1.40
37
+ Requires-Dist: pydantic>=2.0
38
+ Requires-Dist: rich>=13.0
39
+ Requires-Dist: ruamel-yaml>=0.18
40
+ Requires-Dist: typer>=0.12
41
+ Provides-Extra: dev
42
+ Requires-Dist: opentelemetry-sdk>=1.24; extra == 'dev'
43
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
44
+ Requires-Dist: pytest-cov>=5.0; extra == 'dev'
45
+ Requires-Dist: pytest>=8.0; extra == 'dev'
46
+ Requires-Dist: ruff>=0.4; extra == 'dev'
47
+ Provides-Extra: embeddings
48
+ Requires-Dist: sentence-transformers>=2.7; extra == 'embeddings'
49
+ Provides-Extra: langfuse
50
+ Requires-Dist: langfuse>=3.0; extra == 'langfuse'
51
+ Provides-Extra: langsmith
52
+ Requires-Dist: langsmith>=0.2; extra == 'langsmith'
53
+ Provides-Extra: mcp
54
+ Requires-Dist: mcp>=1.0; extra == 'mcp'
55
+ Provides-Extra: service
56
+ Requires-Dist: fastapi>=0.111; extra == 'service'
57
+ Requires-Dist: uvicorn[standard]>=0.30; extra == 'service'
58
+ Provides-Extra: telemetry
59
+ Requires-Dist: opentelemetry-exporter-otlp-proto-grpc>=1.24; extra == 'telemetry'
60
+ Requires-Dist: opentelemetry-sdk>=1.24; extra == 'telemetry'
61
+ Provides-Extra: wizard
62
+ Requires-Dist: questionary>=2.0; extra == 'wizard'
63
+ Description-Content-Type: text/markdown
64
+
65
+ # Armature
66
+
67
+ [![CI](https://github.com/bryansparks/armature/actions/workflows/ci.yml/badge.svg)](https://github.com/bryansparks/armature/actions/workflows/ci.yml)
68
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
69
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11+-blue.svg)](https://www.python.org/downloads/)
70
+
71
+ A lightweight, declarative agent execution harness. Define multi-agent workflows as YAML specs. Run them with a single Python call or from the CLI.
72
+
73
+ No framework dependency. No prescribed team structure. Just a DAG executor, an LLM adapter, and your workflow spec.
74
+
75
+ Armature is the execution engine for **Reasoning Automation** — end-to-end business processes where multi-agent deliberation replaces brittle rule-based logic. The harness owns orchestration, retries, safety, telemetry, and human approval gates. You supply the domain logic as YAML workflow specs and Python tool modules. The same engine that runs a code-review pipeline can run a contract risk assessment, a social media creative chain, or a compliance audit — without any changes to Armature itself.
76
+
77
+ > **Part of a larger vision.** Armature is one component of a broader autonomous-organization platform I'm building under the working name ElfTech — a stack of AI systems covering reasoning, deliberation, code generation, deployment, and coordination. More details to come. Stay tuned.
78
+
79
+ ---
80
+
81
+ ## What it does
82
+
83
+ Armature reads a YAML spec that defines a **workflow** as a directed acyclic graph (DAG) of **stages**. Each stage is one of four things:
84
+
85
+ - An **LLM call** — a role with a system prompt, model tier, and output format
86
+ - A **script/adapter** — a Python function or shell command
87
+ - A **human gate** — pauses execution for human approval
88
+ - A **direct tool call** — invokes a registered tool deterministically, no LLM involved
89
+ - A **subagent** — spawns a child workflow (with optional fan-out/fan-in for parallelism)
90
+
91
+ Stages declare `depends_on` relationships. The engine resolves execution order automatically, passes accumulated results downstream as context, and handles retries, safety hooks, and telemetry.
92
+
93
+ ---
94
+
95
+ ## Installation
96
+
97
+ ```bash
98
+ pip install armature-agents
99
+ ```
100
+
101
+ With optional extras:
102
+
103
+ ```bash
104
+ pip install "armature-agents[service]" # FastAPI HTTP service
105
+ pip install "armature-agents[telemetry]" # OpenTelemetry export
106
+ ```
107
+
108
+ Verify:
109
+
110
+ ```bash
111
+ armature --version
112
+ ```
113
+
114
+ Set your LLM provider key:
115
+
116
+ ```bash
117
+ export ANTHROPIC_API_KEY=sk-...
118
+ # or OPENAI_API_KEY, or configure any litellm-supported provider
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Quick start
124
+
125
+ **1. Write a spec** (`my_workflow.yml`):
126
+
127
+ ```yaml
128
+ name: summarize
129
+ version: "1.0"
130
+
131
+ model_tiers:
132
+ small:
133
+ provider: anthropic
134
+ model: claude-haiku-4-5-20251001
135
+
136
+ # Optional: map role types to tiers so stages don't need explicit model_tier
137
+ role_type_defaults:
138
+ worker: small
139
+ judge: small
140
+
141
+ stages:
142
+ - id: summarizer
143
+ role:
144
+ name: Summarizer
145
+ type: worker # picks up "small" from role_type_defaults
146
+ description: |
147
+ Summarize the provided text in 3 bullet points.
148
+ Be concise and capture the key ideas.
149
+ output_mode: text
150
+ depends_on: []
151
+ ```
152
+
153
+ **2. Run it from Python:**
154
+
155
+ ```python
156
+ import asyncio
157
+ from armature import Harness
158
+
159
+ async def main():
160
+ harness = Harness.from_spec("my_workflow.yml")
161
+ result = await harness.run({"text": "Your content here..."})
162
+ print(result["summarizer"]["content"])
163
+
164
+ asyncio.run(main())
165
+ ```
166
+
167
+ **3. Or from the CLI:**
168
+
169
+ ```bash
170
+ armature run my_workflow.yml --input text="Your content here..."
171
+ ```
172
+
173
+ ---
174
+
175
+ ## CLI
176
+
177
+ ```bash
178
+ armature run <spec> # execute a workflow
179
+ armature run <spec> --no-cache # run without LLM response cache
180
+ armature run <spec> --auto-improve # run then auto-apply spec improvements when IHR < 0.75
181
+ armature validate <spec> # validate spec + show KYA-inspired risk score (LOW/MEDIUM/HIGH/CRITICAL)
182
+ armature new [output] # interactive spec creation wizard
183
+ armature doctor # environment health check
184
+ armature serve # start HTTP service (requires armature[service])
185
+ armature serve --specs-dir ./specs/ # serve with named workflow registry (/workflows API)
186
+ armature optimize <spec> # single-shot meta-harness optimizer
187
+ armature improve <spec> # analyze traces, auto-apply spec improvements
188
+ armature improve <spec> --apply-pending # promote a staged pending.yaml revision
189
+ armature report --run-id <id> # per-run text report with failure signatures
190
+ armature replay <run_id> # display a recorded run stage-by-stage
191
+ armature dashboard <spec> # Rich 4-panel aggregate health dashboard
192
+ armature dashboard <spec> --watch # auto-refresh every 5 seconds
193
+ armature dashboard <spec> --format json # machine-readable JSON output
194
+ armature export-traces # export traces as SFT/DPO training data
195
+ armature channels start # messaging channel connectors
196
+ armature watch <spec> # listen for cron/webhook triggers and fire runs
197
+ ```
198
+
199
+ ---
200
+
201
+ ## Built-in tools
202
+
203
+ Armature ships with a tool registry pre-loaded with the following tools. Any stage can invoke them via `tool_call` or by listing them in `role.tools`.
204
+
205
+ | Tool name | Permission | Description |
206
+ |-----------|-----------|-------------|
207
+ | `file_read` | READ_ONLY | Read a file from disk |
208
+ | `file_write` | WORKSPACE | Write content to a file |
209
+ | `shell` | WORKSPACE | Run a shell command; returns stdout, stderr, exit_code |
210
+ | `http_get` | NETWORK | HTTP GET request; returns status and body |
211
+ | `http_post` | NETWORK | Authenticated HTTP POST with JSON body and custom headers; returns status and body |
212
+
213
+ `http_post` is the general-purpose adapter for any external API — image generation, ad platforms, analytics services, webhooks, etc. Pass auth credentials in `headers`:
214
+
215
+ ```yaml
216
+ - id: generate_image
217
+ tool_call:
218
+ name: http_post
219
+ args:
220
+ url: "https://api.openai.com/v1/images/generations"
221
+ headers:
222
+ Authorization: "Bearer {{ env.OPENAI_API_KEY }}"
223
+ Content-Type: "application/json"
224
+ body:
225
+ model: "dall-e-3"
226
+ prompt: "{{ visual_prompt }}"
227
+ size: "1024x1024"
228
+ n: 1
229
+ ```
230
+
231
+ ---
232
+
233
+ ## Reasoning Automation
234
+
235
+ Armature's `tools:` spec section lets any workflow load external Python modules that register additional tools. This is the primary extension point for building **Reasoning Automation** applications — end-to-end processes that connect LLM reasoning to real external systems.
236
+
237
+ ### The pattern
238
+
239
+ Create a Python package alongside your workflows. Each module exposes a `register(registry)` function:
240
+
241
+ ```python
242
+ # myapp/tools/dalle.py
243
+ import openai
244
+ from armature.registry.registry import ToolRegistry, ToolDescriptor, PermissionLevel
245
+
246
+ _client = openai.AsyncOpenAI()
247
+
248
+ async def generate_image(args: dict) -> dict:
249
+ response = await _client.images.generate(
250
+ model="dall-e-3",
251
+ prompt=args["prompt"],
252
+ size=args.get("size", "1024x1024"),
253
+ n=1,
254
+ )
255
+ return {"url": response.data[0].url, "revised_prompt": response.data[0].revised_prompt}
256
+
257
+ def register(registry: ToolRegistry) -> None:
258
+ registry.register(ToolDescriptor(
259
+ name="dalle.generate_image",
260
+ description="Generate an image using DALL-E 3",
261
+ permission=PermissionLevel.NETWORK,
262
+ handler=generate_image,
263
+ parameters={
264
+ "prompt": {"type": "string"},
265
+ "size": {"type": "string", "optional": True},
266
+ },
267
+ ))
268
+ ```
269
+
270
+ Declare it in your workflow spec:
271
+
272
+ ```yaml
273
+ tools:
274
+ - module: myapp.tools.dalle
275
+ - module: myapp.tools.meta_publisher
276
+ - module: myapp.tools.analytics
277
+
278
+ stages:
279
+ - id: generate_image
280
+ tool_call:
281
+ name: dalle.generate_image
282
+ args:
283
+ prompt: "{{ visual_director.prompt_a }}"
284
+ ```
285
+
286
+ The tool modules live entirely in your application project. Armature imports them at startup. No changes to Armature are required.
287
+
288
+ ### What you can build
289
+
290
+ | Use case | Tool modules needed |
291
+ |----------|-------------------|
292
+ | Social ad campaign automation | Image gen (DALL-E 3), platform publishers (Meta, TikTok), analytics collectors |
293
+ | Contract risk review | Document extractor, clause classifier, risk scorer |
294
+ | Vendor assessment | Web search, company lookup, scoring rubric |
295
+ | Compliance documentation | Regulatory corpus retrieval, template filler, diff checker |
296
+ | Code review pipeline | GitHub API, static analysis runner, security scanner |
297
+
298
+ Each use case is a YAML workflow spec + a small set of Python tool modules. The Armature engine is the shared execution layer across all of them.
299
+
300
+ ---
301
+
302
+ ## Research foundation
303
+
304
+ Armature is built from nine academic papers, one industry governance framework, and one open-source agent architecture project, all published between February and June 2026. Every major design decision traces to an experimentally validated finding: **the harness matters more than the model.**
305
+
306
+ ### The papers
307
+
308
+ **[NLAH] Natural-Language Agent Harnesses** — Tsinghua University, March 2026 ([arXiv:2603.25723](https://arxiv.org/abs/2603.25723))
309
+
310
+ Establishes the architectural model. NLAH defines seven mandatory harness components (Contracts, Roles, Stages, Adapters, State, Failure Taxonomy, File-backed State) and shows that workflows defined in structured natural language outperform code-based equivalents on complex benchmark tasks (47.2% vs. 30.4% on OSWorld). It also defines IHR (Implicit Harness Rating), a composite quality metric for scoring run quality objectively, and specifies parallel fan-out as a core orchestration primitive.
311
+
312
+ **[Meta-Harness] Automated Optimization End-to-End** — Stanford University, March 2026 ([arXiv:2603.28052](https://arxiv.org/abs/2603.28052))
313
+
314
+ The paper behind the optimizer. Meta-Harness introduces an outer optimization loop where a frontier model reads execution traces and proposes improvements to the harness spec itself. Key finding: giving the optimizer access to the *history* of prior proposals — what was tried, whether it was accepted, and what score it achieved — improves accuracy from 41% to 57% by enabling causal reasoning. Implemented in `ProposalStore` and `run_loop()`.
315
+
316
+ **[AutoHarness] LLM-Synthesized Harnesses** — February 2026 ([arXiv:2603.03329](https://arxiv.org/abs/2603.03329))
317
+
318
+ Demonstrates that LLMs can iteratively write their own harness code and produce systems that outperform larger models without harnesses. The concept most directly applied: the **harness-as-verifier**, where the harness validates outputs meet domain-specific legality constraints before accepting them — the ancestor of the `judge` role type and `SpecDrafter`.
319
+
320
+ **[AgentSpec] Runtime Enforcement for Safe Agents** — March 2025 ([arXiv:2503.18666](https://arxiv.org/abs/2503.18666))
321
+
322
+ Introduces a declarative rule language for constraining agent behavior at runtime. Rules are composable, lightweight (sub-millisecond evaluation), and LLM-generatable. Armature implements the full enforcement architecture: pre/post-tool hooks wired into the engine and a declarative condition DSL (`ToolSafetyRule` + `SafetyCondition`) written directly in YAML.
323
+
324
+ **[Continual Harness] Reset-Free Self-Improvement** — May 2026 ([arXiv:2605.09998](https://arxiv.org/abs/2605.09998))
325
+
326
+ Formalizes the two-loop self-improvement design: an inner loop (a `post_run` refiner stage that sees the full transcript after the DAG completes) and an outer loop (`SelfImproveRunner` — load traces → diagnose → propose YAML revision → auto-apply). Introduces the 4-code failure taxonomy (`stage_failed`, `output_invalid`, `low_confidence`, `high_escalation`) and the fine-tuning bridge: high-quality judge traces exported as SFT/DPO training data.
327
+
328
+ **[AHE] Agentic Harness Engineering** — April 2026 ([arXiv:2604.25850](https://arxiv.org/abs/2604.25850))
329
+
330
+ The accountability paper. AHE introduces the prediction-verification loop: every proposed spec revision carries a falsifiable contract (`predicted_fixes`, `predicted_regressions`), and the next cycle verifies those predictions against observed diagnostic shift. Implements component-level improvement targeting — long-term memory evolution alone yielded +5.6pp; system prompt evolution *alone* caused -2.3pp regression, validating the "one component at a time" discipline.
331
+
332
+ **[System Scaling] From Model Scaling to System Scaling** — May 2026 ([arXiv:2605.26112](https://arxiv.org/abs/2605.26112))
333
+
334
+ Identifies three system-level failure modes: stale memory reaching LLMs without warning, context values flowing between stages without provenance, and tool side effects going unverified. Adds drift score (regression detection across improvement cycles) and component governance (auto-apply vs. human-review classification for spec changes).
335
+
336
+ **[AGT] Microsoft Agent Governance Toolkit** — 2025
337
+
338
+ Five governance primitives borrowed directly: reversibility classification for every tool call (`FULL / PARTIAL / NONE`), tamper-evident SHA-256 hashing of trace inputs and the governing policy, a `require_approval` gate wired into the tool-call path, and `safety_mode: strict` (fail-closed — deny on no-match).
339
+
340
+ **[ActiveGraph]** — yoheinakajima, May 2026 ([arXiv:2605.21997](https://arxiv.org/abs/2605.21997))
341
+
342
+ Graph-memory agent architecture introducing content-addressed caching of LLM responses and event-triggered reactive behaviors. Adopted concepts: SHA-256 cache keying by model + messages + kwargs (`LLMCache`), audit replay from the trace store (`armature replay`), and the `BehaviorRule`/`BehaviorRegistry` hook layer for pattern-triggered post-run behaviors.
343
+
344
+ **[KYA] Know Your Agents** — Veldt Labs, May 2026 ([arXiv:2605.25376](https://arxiv.org/abs/2605.25376))
345
+
346
+ Governance layer operating at definition-time (static risk scoring), runtime-trust (anomaly counting), and composition (only-tighten). Adopted: five-factor static spec risk score surfaced by `armature validate`, `RogueSignalCounter` wired into safety hooks and the run summary, and `CONFLICTING_SAFETY_RULES` validation enforcing the only-tighten composition principle.
347
+
348
+ ---
349
+
350
+ ### What's implemented
351
+
352
+ | Source | Concept | Status |
353
+ |---|---|---|
354
+ | NLAH | 7-component spec, four role types, IHR, fan-out/fan-in | ✅ |
355
+ | Meta-Harness | Single-shot + multi-iteration optimizer, proposal history, prompt bootstrapping | ✅ |
356
+ | AutoHarness | Harness-as-verifier, NL-to-spec synthesis (`SpecDrafter`), `AutoHarness` loop | ✅ |
357
+ | AgentSpec | Pre/post-tool hooks, declarative safety DSL (6 operators, 5 actions) | ✅ |
358
+ | Continual Harness | 4-code failure taxonomy, inner refiner loop, `SelfImproveRunner`, `TraceExporter` | ✅ |
359
+ | Harness Benefit ([arXiv:2605.30621](https://arxiv.org/abs/2605.30621)v1) | Cheap-evolver (medium-tier `SpecRefiner`), HFR as 5th IHR component, SLR `low_skill_activation` diagnostic | ✅ |
360
+ | AHE | Falsifiable improvement contract, prediction-verification, `_verify_predictions()` | ✅ |
361
+ | System Scaling | Memory staleness, context provenance, drift score, postcondition verification, consensus fan-in, component governance | ✅ |
362
+ | AGT | Reversibility classification, trace hashing, policy version, `require_approval`, strict mode | ✅ |
363
+ | ActiveGraph | LLM response caching, audit replay, trace-triggered behaviors (`BehaviorRule`), `--auto-improve` | ✅ |
364
+ | KYA | Static spec risk score, rogue signal counter, only-tighten safety rule validation | ✅ |
365
+
366
+ ---
367
+
368
+ ## The self-improvement flywheel
369
+
370
+ Armature is the **execution layer** — the first component in a larger system designed to improve itself the more it runs. The chart below shows where the current implementation stands and where the flywheel leads aspirationally.
371
+
372
+ ```
373
+ TODAY NEAR-TERM ASPIRATIONAL
374
+ ─────────────────────────────────────────────────────────────────────────
375
+
376
+ ┌──────────────────┐
377
+ │ Armature │ ─── every run records ──► ┌─────────────────────┐
378
+ │ Harness │ │ TraceStore │
379
+ │ │ ◄── optimizer proposes ─── │ (SQLite, per run) │
380
+ │ • DAG executor │ spec improvements └──────────┬──────────┘
381
+ │ • Role routing │ │
382
+ │ • Safety hooks │ ┌──────────▼──────────┐
383
+ │ • IHR scoring │ │ Loop 1: │
384
+ │ • Session log │ │ Harness Optimizer │
385
+ └──────────────────┘ │ │
386
+ │ Reads traces + │
387
+ │ proposal history │
388
+ │ → proposes YAML │
389
+ │ spec improvements │
390
+ │ → A/B tests by IHR │
391
+ └──────────┬──────────┘
392
+ │ accepted diffs
393
+ ┌──────────▼──────────┐
394
+ │ Loop 2: │
395
+ │ SLM Fine-Tuning │
396
+ │ │
397
+ │ High-quality │
398
+ │ traces → LoRA │
399
+ │ fine-tune workers │
400
+ │ → register as │
401
+ │ new model tier │
402
+ └──────────┬──────────┘
403
+ │ better workers
404
+ ┌──────────▼──────────┐
405
+ │ Loop 3: │
406
+ │ RAG │
407
+ │ │
408
+ │ Trace failures │
409
+ │ reveal knowledge │
410
+ │ gaps → improve │
411
+ │ retrieval index │
412
+ └──────────┬──────────┘
413
+ │ richer context
414
+ ┌──────────▼──────────┐
415
+ │ Loop 4: │
416
+ │ Consensus │
417
+ │ deliberation │
418
+ │ │
419
+ │ Calibrate │
420
+ │ deliberation │
421
+ │ priors from │
422
+ │ outcomes → │
423
+ │ cleaner quality │
424
+ │ signal back to │
425
+ │ Loop 1 │
426
+ └─────────────────────┘
427
+
428
+ ─────────────────────────────────────────────────────────────────────────
429
+ All four loops are implemented. 1,388 tests passing.
430
+ ```
431
+
432
+ **The compounding property:** Each loop feeds the next. Better traces → better optimizer proposals → better specs → better traces. Fine-tuned worker models produce better outputs → fewer judge rejections → cleaner quality signal. The harness measurably improves the more it runs, without engineering effort after initial deployment.
433
+
434
+ ---
435
+
436
+ ## Key concepts
437
+
438
+ | Concept | Description |
439
+ |---|---|
440
+ | **Spec** | YAML file defining the complete workflow — model tiers, stages, safety rules, memory |
441
+ | **Stage** | One unit of work: an LLM call, script, gate, direct tool call, or subagent |
442
+ | **DAG** | Stages declare `depends_on`; the engine resolves execution order |
443
+ | **Context** | Shared dict that accumulates stage outputs; every stage sees all upstream results |
444
+ | **Model tiers** | Named model slots (`tiny`, `small`, `medium`, `large`, `frontier`); the using app defines what each name maps to (provider, model, temperature, max_tokens) |
445
+ | **Role type defaults** | Maps role types to tiers automatically (`worker → small`, `judge → frontier`, etc.); stages can omit `model_tier` and inherit from this mapping |
446
+ | **Native tool calling** | Stages declare `role.tools` to scope which registry tools they can call; the engine runs a ReAct dispatch loop — tool calls returned by the model are executed and results fed back until a final response is produced |
447
+ | **Direct tool call** | A `tool_call` stage invokes a registered tool without an LLM — deterministic, zero-latency, no JSON hallucination. Args are Jinja2-rendered against context. |
448
+ | **Mission context** | A `mission:` field on the spec is automatically injected into every LLM stage's system prompt, anchoring agents to the stated goal across long-running workflows and including a compact prior-stage breadcrumb |
449
+ | **Continuation** | A `continuation:` block carries selected stage outputs from a prior run into the next activation via `carry_forward` key references; the merged values arrive under an `inject_as` context key (default: `prior_run`). Enables long-horizon workflows that accumulate state across repeated executions without custom code. |
450
+ | **Triggers** | A `triggers:` list declares `cron` (schedule expression) and `webhook` (HTTP path) trigger sources. `armature watch <spec>` runs a persistent dispatcher that fires `Harness.run()` on every matching event. |
451
+ | **Response stage** | Mark one text-mode LLM stage as `response_stage: true` to enable token streaming; the HTTP service forwards each token to the SSE stream immediately and fires a `response_stage_complete` event so clients can render the answer before background stages finish |
452
+ | **Context filtering** | A stage's `signature.input` declares which context keys appear in its prompt — keeps prompts focused, hides internal state from irrelevant stages |
453
+ | **Cross-run memory** | The `memory:` spec section captures stage outputs across runs and injects them into subsequent runs — lets workflows accumulate knowledge without code changes |
454
+ | **IHR** | Implicit Harness Rating — 5-component quality score: output validity (35%), success rate (25%), quorum score (20%), latency (10%), harness-following rate / HFR (10%). HFR = fraction of stages that succeed without escalation, per [arXiv:2605.30621](https://arxiv.org/abs/2605.30621)v1 |
455
+ | **Sandbox isolation** | `sandbox.mode: docker` routes shell, file_write, and file_read tool calls through ephemeral Docker containers — network-isolated, CPU/memory bounded, workspace-scoped. Per-stage image overrides with `sandbox_image`. Image content digest recorded on every trace for audit. |
456
+ | **Templates** | Pre-built spec files for common patterns (Six Thinking Hats deliberation, etc.) |
457
+
458
+ ---
459
+
460
+ ## Examples
461
+
462
+ `examples/` — annotated workflows you can copy and modify:
463
+
464
+ | File | What it demonstrates |
465
+ |---|---|
466
+ | `01_hello_world.yml` | Minimal single-stage LLM workflow |
467
+ | `02_research_pipeline.yml` | Multi-stage pipeline with dependencies |
468
+ | `03_deliberation_standard.yml` | Judge/evaluator pattern with quality scoring |
469
+ | `starter_template.yml` | **Full-featured reference** — every section documented inline, showing model tiers, context filtering, cross-run memory, safety rules, guided JSON, and a human gate |
470
+
471
+ ## Templates
472
+
473
+ Ready-to-use deliberation patterns in `armature/templates/`:
474
+
475
+ | Template | Pattern |
476
+ |---|---|
477
+ | `six_thinking_hats.yml` | Edward de Bono's Six Thinking Hats — structured multi-perspective deliberation |
478
+
479
+ ---
480
+
481
+ ## Project layout
482
+
483
+ ```
484
+ armature/
485
+ ├── nodes/ # Stage executors (LLMNode, ScriptNode, HumanGateNode, SubagentNode)
486
+ ├── registry/ # Tool registry, built-in tools, ToolDescriptor, reversibility
487
+ ├── runtime/ # DAG executor, engine, prompt assembler, context manager
488
+ ├── spec/ # YAML loader, Pydantic models (HarnessSpec, Stage, SandboxConfig, ...)
489
+ ├── hooks/ # Lifecycle hooks, safety rule evaluation, PostconditionFailed
490
+ ├── permissions/ # PermissionLevel, PermissionChecker
491
+ ├── optimizer/ # Meta-Harness: trace-driven spec optimization, ProposalStore
492
+ ├── synthesis/ # SelfImproveRunner, SpecRefiner, DiagnosticAnalyzer, TraceExporter
493
+ ├── state/ # TraceStore, MemoryStore, SessionLog, ArtifactStore (SQLite + JSONL)
494
+ ├── report/ # Rich dashboard, sparkline, aggregator, panels
495
+ ├── sandbox/ # DockerSandboxProvider — shell/file tool sandboxing
496
+ ├── emitters/ # HermesEmitter — agent bundle generation
497
+ ├── adapters/ # Observability adapters (LangFuse, LangSmith)
498
+ ├── templates/ # Reusable workflow spec templates
499
+ ├── service/ # FastAPI HTTP service — WorkflowRegistry, build_app(), /workflows API
500
+ └── cli.py # CLI entry point
501
+
502
+ examples/ # Annotated workflow YAML specs (copy and modify)
503
+ docs/ # Full documentation (see index below)
504
+ ```
505
+
506
+ ## Documentation
507
+
508
+ ### Getting started
509
+
510
+ | Document | Purpose |
511
+ |---|---|
512
+ | [BUILD_FIRST_WORKFLOW](docs/BUILD_FIRST_WORKFLOW.md) | Hands-on tutorial — build a working workflow from scratch |
513
+ | [USER-GUIDE](docs/USER-GUIDE.md) | Full spec reference — every field, every option, worked examples |
514
+ | [ARMATURE-SPEC-REF](docs/ARMATURE-SPEC-REF.md) | All spec fields and valid values on one page |
515
+ | [FAQ](docs/FAQ.md) | Common questions — positioning, capabilities, comparisons |
516
+
517
+ ### Design & philosophy
518
+
519
+ | Document | Purpose |
520
+ |---|---|
521
+ | [ARCHITECTURE](docs/ARCHITECTURE.md) | Design rationale, research foundation, implementation table |
522
+ | [ARMATURE-PHILOSOPHY](docs/ARMATURE-PHILOSOPHY.md) | Why a harness — philosophy, research papers, architecture deep-dive |
523
+ | [DECLARATIVE-CONTROL-FLOW](docs/DECLARATIVE-CONTROL-FLOW.md) | YAML-first control flow — branching, loops, conditions |
524
+ | [DAG-vs-LANGGRAPH](docs/DAG-vs-LANGGRAPH.md) | How Armature's DAG model compares to LangGraph |
525
+ | [MISSION-AS-CONTEXT](docs/MISSION-AS-CONTEXT.md) | Mission statements as persistent agent context |
526
+ | [ROLE-TAXONOMY](docs/ROLE-TAXONOMY.md) | Agent role definitions and the role system |
527
+ | [MODEL-TIERS](docs/MODEL-TIERS.md) | Routing work across SLM workers and frontier orchestrators |
528
+
529
+ ### Patterns & features
530
+
531
+ | Document | Purpose |
532
+ |---|---|
533
+ | [JUDGE-PATTERN](docs/JUDGE-PATTERN.md) | Output validation with judge agents |
534
+ | [QUORUM-SCORING](docs/QUORUM-SCORING.md) | Deliberative quality scoring across agents |
535
+ | [FAN-IN_FAN-OUT](docs/FAN-IN_FAN-OUT.md) | Parallel fan-out and aggregation patterns |
536
+ | [SUBAGENT-COMPOSITION](docs/SUBAGENT-COMPOSITION.md) | Composing workflows from subagent stages |
537
+ | [CONTEXT-ISOLATION](docs/CONTEXT-ISOLATION.md) | Isolating subagent context for focus and safety |
538
+ | [MEMORY-AND-CONTEXT](docs/MEMORY-AND-CONTEXT.md) | Memory persistence and context management |
539
+ | [CHECKPOINT-AND-RESUME](docs/CHECKPOINT-AND-RESUME.md) | Execution state persistence and resumption |
540
+ | [CHATBOT-AND-STREAMING](docs/CHATBOT-AND-STREAMING.md) | Chat applications and streaming responses |
541
+ | [HUMAN-IN-THE-LOOP](docs/HUMAN-IN-THE-LOOP.md) | Approval gates and human decision points |
542
+ | [IHR-AND-SELF-IMPROVEMENT](docs/IHR-AND-SELF-IMPROVEMENT.md) | The IHR formula and self-improvement loop |
543
+
544
+ ### Operations & safety
545
+
546
+ | Document | Purpose |
547
+ |---|---|
548
+ | [ARMATURE-IN-PRODUCTION](docs/ARMATURE-IN-PRODUCTION.md) | Running Armature in production — patterns and case studies |
549
+ | [SAFETY-AND-GOVERNANCE](docs/SAFETY-AND-GOVERNANCE.md) | Safety rules, governance, and guardrails |
550
+ | [SANDBOX-AND-ISOLATION](docs/SANDBOX-AND-ISOLATION.md) | Sandboxed tool execution (Docker isolation) |
551
+ | [INTEGRATION](docs/INTEGRATION.md) | LangGraph sidecar pattern, HTTP endpoint reference |
552
+
553
+ ### Project
554
+
555
+ | Document | Purpose |
556
+ |---|---|
557
+ | [CONTRIBUTING](CONTRIBUTING.md) | How to run tests, PR conventions, adding tools and commands |
558
+ | [CHANGELOG](CHANGELOG.md) | Release history |
559
+ | [ROADMAP](ROADMAP.md) | Where Armature is headed |
560
+ | [SECURITY](SECURITY.md) | Reporting vulnerabilities |