@bolloon/bolloon-agent 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (581) hide show
  1. package/bin/bolloon-cli.cjs +157 -0
  2. package/bin/bolloon-daemon.sh +207 -0
  3. package/bin/bolloon.cmd +11 -0
  4. package/dist/agents/constraint-layer.js +10 -15
  5. package/dist/agents/pi-sdk.js +433 -106
  6. package/dist/agents/protocol.js +82 -1
  7. package/dist/agents/subagent-manager.js +2 -2
  8. package/dist/agents/workflow-engine.js +15 -20
  9. package/dist/agents/workflow-pivot-loop.js +541 -0
  10. package/dist/bollharness/src/index.js +5 -0
  11. package/dist/bollharness/src/scripts/checks/check_adr_plan_numbering.js +6 -0
  12. package/dist/bollharness/src/scripts/checks/check_api_types.js +45 -0
  13. package/dist/bollharness/src/scripts/checks/check_artifact_link.js +146 -0
  14. package/dist/bollharness/src/scripts/checks/check_bridge_deps.js +6 -0
  15. package/dist/bollharness/src/scripts/checks/check_bugfix_binding.js +6 -0
  16. package/dist/bollharness/src/scripts/checks/check_bugfix_binding_ci.js +6 -0
  17. package/dist/bollharness/src/scripts/checks/check_doc_file_references.js +6 -0
  18. package/dist/bollharness/src/scripts/checks/check_doc_freshness.js +135 -0
  19. package/dist/bollharness/src/scripts/checks/check_doc_links.js +31 -0
  20. package/dist/bollharness/src/scripts/checks/check_file_existence_claims.js +6 -0
  21. package/dist/bollharness/src/scripts/checks/check_fragment_integrity.js +34 -0
  22. package/dist/bollharness/src/scripts/checks/check_hook_installed.js +63 -0
  23. package/dist/bollharness/src/scripts/checks/check_issue_closure.js +41 -0
  24. package/dist/bollharness/src/scripts/checks/check_mcp_parity.js +6 -0
  25. package/dist/bollharness/src/scripts/checks/check_security.js +48 -0
  26. package/dist/bollharness/src/scripts/checks/check_skill_parity.js +6 -0
  27. package/dist/bollharness/src/scripts/checks/check_versions.js +6 -0
  28. package/dist/bollharness/src/scripts/checks/finding.js +13 -0
  29. package/dist/bollharness/src/scripts/checks/next_decision_number.js +20 -0
  30. package/dist/bollharness/src/scripts/checks/regenerate_magic_docs.js +6 -0
  31. package/dist/bollharness/src/scripts/ci/detect_rebaseline_triggers.js +8 -0
  32. package/dist/bollharness/src/scripts/ci/scan_subprocess_cfg.js +8 -0
  33. package/dist/bollharness/src/scripts/ci/scan_verify_artifacts.js +8 -0
  34. package/dist/bollharness/src/scripts/ci/scan_yaml_schema.js +8 -0
  35. package/dist/bollharness/src/scripts/context_router.js +67 -0
  36. package/dist/bollharness/src/scripts/deploy-guard.js +157 -0
  37. package/dist/bollharness/src/scripts/guard-feedback.js +192 -0
  38. package/dist/bollharness/src/scripts/guard_router.js +158 -0
  39. package/dist/bollharness/src/scripts/hooks/_hook_output.js +6 -0
  40. package/dist/bollharness/src/scripts/hooks/auto-python3.js +6 -0
  41. package/dist/bollharness/src/scripts/hooks/deploy-progress-on-session-end.js +6 -0
  42. package/dist/bollharness/src/scripts/hooks/failure-analyzer.js +6 -0
  43. package/dist/bollharness/src/scripts/hooks/gate-judgment-inject.js +92 -0
  44. package/dist/bollharness/src/scripts/hooks/gate-transition-judgment.js +63 -0
  45. package/dist/bollharness/src/scripts/hooks/inbox-ack.js +6 -0
  46. package/dist/bollharness/src/scripts/hooks/inbox-inject-on-start.js +6 -0
  47. package/dist/bollharness/src/scripts/hooks/inbox-validate.js +6 -0
  48. package/dist/bollharness/src/scripts/hooks/inbox-write-ledger.js +6 -0
  49. package/dist/bollharness/src/scripts/hooks/initializer-agent.js +6 -0
  50. package/dist/bollharness/src/scripts/hooks/loop-detection.js +73 -0
  51. package/dist/bollharness/src/scripts/hooks/owner-guard.js +6 -0
  52. package/dist/bollharness/src/scripts/hooks/precompact.js +6 -0
  53. package/dist/bollharness/src/scripts/hooks/review-agent-gatekeeper.js +6 -0
  54. package/dist/bollharness/src/scripts/hooks/risk-tracker.js +108 -0
  55. package/dist/bollharness/src/scripts/hooks/sanitize-on-read.js +6 -0
  56. package/dist/bollharness/src/scripts/hooks/session-reflection.js +7 -0
  57. package/dist/bollharness/src/scripts/hooks/session-start-magic-docs.js +7 -0
  58. package/dist/bollharness/src/scripts/hooks/session-start-reset-risk.js +7 -0
  59. package/dist/bollharness/src/scripts/hooks/session-start-toolkit-reminder.js +7 -0
  60. package/dist/bollharness/src/scripts/hooks/stop-evaluator.js +157 -0
  61. package/dist/bollharness/src/scripts/hooks/tool-call-counter.js +6 -0
  62. package/dist/bollharness/src/scripts/hooks/trace-analyzer.js +10 -0
  63. package/dist/bollharness/src/scripts/install/install-trust-token.js +7 -0
  64. package/dist/bollharness/src/scripts/install/multi_project_registry.js +9 -0
  65. package/dist/bollharness/src/scripts/install/phase2_auto.js +21 -0
  66. package/dist/bollharness/src/scripts/install/pre_commit_installer.js +6 -0
  67. package/dist/bollharness/src/scripts/install/tier_selector.js +7 -0
  68. package/dist/bollharness/src/scripts/install/transcript_miner.js +7 -0
  69. package/dist/bollharness/src/scripts/lib/claim_patterns.js +10 -0
  70. package/dist/bollharness/src/scripts/lib/sanitize_patterns.js +12 -0
  71. package/dist/bollharness/src/scripts/sanitize.js +6 -0
  72. package/dist/bollharness-integration/channel-judgment-engine.js +530 -0
  73. package/dist/bollharness-integration/context-chain-router.js +383 -0
  74. package/dist/bollharness-integration/context-router-judgment.js +13 -21
  75. package/dist/bollharness-integration/context-router.js +22 -64
  76. package/dist/bollharness-integration/gate-state-machine.js +14 -19
  77. package/dist/bollharness-integration/gate-transition-hooks.js +16 -61
  78. package/dist/bollharness-integration/guard-checker.js +21 -68
  79. package/dist/bollharness-integration/index.js +14 -124
  80. package/dist/bollharness-integration/integration.js +13 -20
  81. package/dist/bollharness-integration/llm-judgment-engine.js +569 -0
  82. package/dist/bollharness-integration/skill-adapter.js +18 -64
  83. package/dist/cli-entry.js +261 -0
  84. package/dist/constraint-runtime/src/commands.js +17 -7
  85. package/dist/constraint-runtime/src/constraint/budget.js +1 -6
  86. package/dist/constraint-runtime/src/constraint/permission.js +1 -6
  87. package/dist/constraint-runtime/src/models.js +1 -3
  88. package/dist/constraint-runtime/src/tools.js +17 -7
  89. package/dist/constraints/index.js +1 -7
  90. package/dist/documents/reader.js +8 -49
  91. package/dist/heartbeat/DaemonManager.js +242 -0
  92. package/dist/heartbeat/HealthMonitor.js +285 -0
  93. package/dist/heartbeat/StartupVerifier.js +205 -0
  94. package/dist/heartbeat/Watchdog.js +168 -0
  95. package/dist/heartbeat/index.js +84 -0
  96. package/dist/heartbeat/types.js +5 -0
  97. package/dist/index.js +381 -28
  98. package/dist/llm/config-store.js +31 -57
  99. package/dist/llm/llm-judgment-client.js +389 -0
  100. package/dist/llm/pi-ai.js +9 -52
  101. package/dist/network/agent-network.js +46 -90
  102. package/dist/network/hybrid-messenger.js +125 -0
  103. package/dist/network/iroh-bootstrap.js +38 -0
  104. package/dist/network/iroh-discovery.js +145 -0
  105. package/dist/network/iroh-integration.js +9 -16
  106. package/dist/network/iroh-transport.js +10 -48
  107. package/dist/network/p2p.js +23 -62
  108. package/dist/network/storage/adapters/json-adapter.js +4 -42
  109. package/dist/network/storage/index.js +147 -0
  110. package/dist/network/storage/types.js +14 -0
  111. package/dist/pi-ecosystem/index.js +233 -0
  112. package/dist/pi-ecosystem-colony/index.js +29 -90
  113. package/dist/pi-ecosystem-goals/index.js +20 -74
  114. package/dist/pi-ecosystem-judgment/decision.js +29 -47
  115. package/dist/pi-ecosystem-judgment/distillation.js +16 -29
  116. package/dist/pi-ecosystem-judgment/human-value-store.js +13 -60
  117. package/dist/pi-ecosystem-judgment/index.js +21 -74
  118. package/dist/pi-ecosystem-judgment/value-injection.js +26 -72
  119. package/dist/pi-ecosystem-mcp/index.js +24 -78
  120. package/dist/pi-ecosystem-subagents/index.js +20 -69
  121. package/dist/social/ant-colony/AdaptiveHeartbeat.js +3 -8
  122. package/dist/social/ant-colony/PheromoneEngine.js +11 -49
  123. package/dist/social/ant-colony/index.js +6 -0
  124. package/dist/social/ant-colony/types.js +4 -8
  125. package/dist/social/channels/ChannelManager.js +8 -46
  126. package/dist/social/channels/DiapChannelBridge.js +9 -47
  127. package/dist/social/channels/InterestMatcher.js +2 -7
  128. package/dist/social/channels/channel-agent-session.js +309 -0
  129. package/dist/social/channels/channel-heartbeat-agent.js +494 -0
  130. package/dist/social/channels/diap-doc-parser.js +204 -0
  131. package/dist/social/channels/harness-workflow-integrator.js +446 -0
  132. package/dist/social/channels/index.js +9 -0
  133. package/dist/social/channels/types.js +3 -7
  134. package/dist/social/global-shared-context.js +6 -47
  135. package/dist/social/heartbeat.js +29 -72
  136. package/dist/social/persona/enhanced-persona.js +299 -0
  137. package/dist/web/client.js +302 -136
  138. package/dist/web/components/p2p/index.js +159 -9
  139. package/dist/web/components/p2p/p2p-connection.js +136 -0
  140. package/dist/web/components/p2p/p2p-manager.js +24 -0
  141. package/dist/web/components/p2p/p2p-store-memory.js +1 -1
  142. package/dist/web/components/p2p/types.js +7 -0
  143. package/dist/web/index.html +5 -0
  144. package/dist/web/style.css +118 -0
  145. package/package.json +12 -6
  146. package/scripts/build-cli.js +206 -0
  147. package/scripts/postinstall.js +153 -0
  148. package/src/agents/pi-sdk.ts +347 -28
  149. package/src/agents/protocol.ts +95 -1
  150. package/src/agents/workflow-pivot-loop.ts +674 -0
  151. package/src/bollharness/scripts/context-fragments/pi-agent-operations.md +34 -0
  152. package/src/cli-entry.ts +304 -0
  153. package/src/heartbeat/DaemonManager.ts +283 -0
  154. package/src/heartbeat/HealthMonitor.ts +316 -0
  155. package/src/heartbeat/StartupVerifier.ts +223 -0
  156. package/src/heartbeat/Watchdog.ts +198 -0
  157. package/src/heartbeat/index.ts +108 -0
  158. package/src/heartbeat/types.ts +82 -0
  159. package/src/llm/config-store.ts +23 -5
  160. package/src/network/iroh-transport.ts +3 -3
  161. package/src/web/client.js +302 -136
  162. package/src/web/components/p2p/P2PModal.tsx +91 -3
  163. package/src/web/components/p2p/index.ts +171 -9
  164. package/src/web/components/p2p/p2p-connection.ts +153 -1
  165. package/src/web/components/p2p/p2p-manager.ts +39 -1
  166. package/src/web/components/p2p/p2p-store-memory.ts +1 -1
  167. package/src/web/components/p2p/p2p-tools.ts +315 -0
  168. package/src/web/components/p2p/types.ts +58 -0
  169. package/src/web/index.html +5 -0
  170. package/src/web/server.ts +353 -36
  171. package/src/web/style.css +118 -0
  172. package/tsconfig.cli.json +16 -0
  173. package/tsconfig.electron.json +1 -1
  174. package/tsconfig.json +1 -2
  175. package/dist/constraint-runtime/tests/agent.test.js +0 -16
  176. package/dist/constraint-runtime/tests/constraint.test.js +0 -41
  177. package/dist/constraint-runtime/tests/skill.test.js +0 -19
  178. package/dist/constraint-runtime/tests/thinking.test.js +0 -22
  179. package/dist/electron-preload.js +0 -15
  180. package/dist/electron-preload.js.map +0 -1
  181. package/dist/electron.js +0 -206
  182. package/dist/electron.js.map +0 -1
  183. package/dist/test/constraint-layer.test.js +0 -164
  184. package/dist/test/global-shared-context.test.js +0 -315
  185. package/dist/test/pi-sdk.test.js +0 -47
  186. package/dist/test/set-persona.test.js +0 -38
  187. package/dist/test/subagent-manager.test.js +0 -276
  188. package/dist/test/workflow-engine.test.js +0 -87
  189. package/dist/web/server.js +0 -1647
  190. package/dist/web/server.js.map +0 -1
  191. package/dist/workflows/collaboration.js +0 -374
  192. package/dist/workflows/index.js +0 -54
  193. package/docs/agent-communication.md +0 -333
  194. package/docs/plans/2026-05-15-document-agent-design.md +0 -479
  195. package/docs/plans/2026-05-15-document-agent-implementation-plan.md +0 -792
  196. package/docs/plans/2026-05-16-chat-ui-design.md +0 -86
  197. package/docs/plans/2026-05-16-constraint-runtime-design.md +0 -106
  198. package/docs/plans/2026-05-16-constraint-runtime-implementation.md +0 -441
  199. package/docs//346/225/260/345/255/246/350/276/205/345/212/251/346/231/272/350/203/275/344/275/223-/346/240/270/345/277/203/346/225/210/346/236/234/345/256/232/344/271/211.md +0 -287
  200. package/src/bollharness/.boll/CLAUDE.md.template +0 -34
  201. package/src/bollharness/.boll/MANIFEST.yaml +0 -213
  202. package/src/bollharness/.boll/active-review-agents/.gitkeep +0 -0
  203. package/src/bollharness/.boll/agents/review-base.yaml +0 -108
  204. package/src/bollharness/.boll/deploy-allowlist.yaml +0 -38
  205. package/src/bollharness/.boll/inbox/schema/message-v1.json +0 -99
  206. package/src/bollharness/.boll/install-staging/.gitkeep +0 -0
  207. package/src/bollharness/.boll/issue-adapter.yaml +0 -31
  208. package/src/bollharness/.boll/plugins/boll-mode-toolkit/contracts/mode-contract.md +0 -85
  209. package/src/bollharness/.boll/plugins/boll-review-toolkit/contracts/evidence-packet-schema.json +0 -102
  210. package/src/bollharness/.boll/plugins/boll-review-toolkit/contracts/review-contract.yaml +0 -247
  211. package/src/bollharness/.boll/rules/backend-routes.md +0 -31
  212. package/src/bollharness/.boll/rules/closure-semantics.md +0 -30
  213. package/src/bollharness/.boll/rules/env-vars.md +0 -32
  214. package/src/bollharness/.boll/rules/hanis-protocol.md +0 -145
  215. package/src/bollharness/.boll/rules/repo-structure.md +0 -42
  216. package/src/bollharness/.boll/rules/review-agent-isolation.md +0 -73
  217. package/src/bollharness/.boll/rules/source-of-truth.md +0 -33
  218. package/src/bollharness/.boll/settings.json +0 -180
  219. package/src/bollharness/.boll/settings.json.template +0 -31
  220. package/src/bollharness/.boll/skills/arch/SKILL.md +0 -372
  221. package/src/bollharness/.boll/skills/bug-pipeline/SKILL.md +0 -168
  222. package/src/bollharness/.boll/skills/bug-triage/SKILL.md +0 -161
  223. package/src/bollharness/.boll/skills/context-chains/SKILL.md +0 -250
  224. package/src/bollharness/.boll/skills/context-chains/context-chain-index.md +0 -48
  225. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/code_change_extractor.ts +0 -142
  226. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/debugging_extractor.ts +0 -126
  227. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/design_extractor.ts +0 -148
  228. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/planning_extractor.ts +0 -162
  229. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/question_extractor.ts +0 -116
  230. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/review_extractor.ts +0 -136
  231. package/src/bollharness/.boll/skills/crystal-learn/SKILL.md +0 -93
  232. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-0.md +0 -34
  233. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-1.md +0 -34
  234. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-2.md +0 -35
  235. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-3.md +0 -34
  236. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-4.md +0 -43
  237. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-5.md +0 -34
  238. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-6.md +0 -37
  239. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-7.md +0 -46
  240. package/src/bollharness/.boll/skills/guardian-fixer/PROMPT.md +0 -415
  241. package/src/bollharness/.boll/skills/guardian-fixer/SKILL.md +0 -320
  242. package/src/bollharness/.boll/skills/harness-dev/SKILL.md +0 -93
  243. package/src/bollharness/.boll/skills/harness-dev/examples/README.md +0 -227
  244. package/src/bollharness/.boll/skills/harness-dev-handoff/SKILL.md +0 -165
  245. package/src/bollharness/.boll/skills/harness-eng/SKILL.md +0 -110
  246. package/src/bollharness/.boll/skills/harness-eng-test/SKILL.md +0 -79
  247. package/src/bollharness/.boll/skills/harness-lab/SKILL.md +0 -170
  248. package/src/bollharness/.boll/skills/harness-ops/SKILL.md +0 -57
  249. package/src/bollharness/.boll/skills/harness-voice/SKILL.md +0 -183
  250. package/src/bollharness/.boll/skills/judgment/SKILL.md +0 -115
  251. package/src/bollharness/.boll/skills/lead/SKILL.md +0 -245
  252. package/src/bollharness/.boll/skills/lead/install-wow-harness.md +0 -77
  253. package/src/bollharness/.boll/skills/lead/ref-review-sop.md +0 -91
  254. package/src/bollharness/.boll/skills/lead/ref-stages.md +0 -129
  255. package/src/bollharness/.boll/skills/skill-discovery/SKILL.md +0 -169
  256. package/src/bollharness/.boll/skills/task-arch/SKILL.md +0 -106
  257. package/src/bollharness/.boll/skills/toolkit/SKILL.md +0 -57
  258. package/src/bollharness/.boll/tasks/.gitkeep +0 -0
  259. package/src/bollharness/.boll/toolkit-index.yaml +0 -112
  260. package/src/bollharness/.claude/agents/review-base.yaml +0 -108
  261. package/src/bollharness/.claude/plugins/boll-mode-toolkit/.claude-plugin/plugin.json +0 -44
  262. package/src/bollharness/.claude/plugins/boll-review-toolkit/.claude-plugin/plugin.json +0 -24
  263. package/src/bollharness/.claude/plugins/boll-review-toolkit/contracts/evidence-packet-schema.json +0 -102
  264. package/src/bollharness/.claude/plugins/boll-review-toolkit/contracts/review-contract.yaml +0 -247
  265. package/src/bollharness/.claude/settings.json +0 -157
  266. package/src/bollharness/.claude/skills/arch/SKILL.md +0 -64
  267. package/src/bollharness/.claude/skills/crystal-learn/SKILL.md +0 -93
  268. package/src/bollharness/.claude/skills/guardian-fixer/PROMPT.md +0 -44
  269. package/src/bollharness/.claude/skills/guardian-fixer/SKILL.md +0 -324
  270. package/src/bollharness/.claude/skills/harness-dev/SKILL.md +0 -93
  271. package/src/bollharness/.claude/skills/harness-dev/examples/README.md +0 -17
  272. package/src/bollharness/.claude/skills/harness-dev-handoff/SKILL.md +0 -165
  273. package/src/bollharness/.claude/skills/harness-eng/SKILL.md +0 -183
  274. package/src/bollharness/.claude/skills/harness-eng-test/SKILL.md +0 -57
  275. package/src/bollharness/.claude/skills/harness-ops/SKILL.md +0 -57
  276. package/src/bollharness/.claude/skills/harness-voice/SKILL.md +0 -84
  277. package/src/bollharness/.claude/skills/lead/INDEX.md +0 -28
  278. package/src/bollharness/.claude/skills/lead/SKILL.md +0 -24
  279. package/src/bollharness/.claude/skills/lead/install-wow-harness.md +0 -77
  280. package/src/bollharness/.claude/skills/lead/ref-review-sop.md +0 -48
  281. package/src/bollharness/.claude/skills/lead/ref-stages.md +0 -58
  282. package/src/bollharness/.claude/skills/plan-lock/SKILL.md +0 -74
  283. package/src/bollharness/.claude/skills/skill-discovery/SKILL.md +0 -120
  284. package/src/bollharness/.claude/skills/task-arch/SKILL.md +0 -106
  285. package/src/bollharness/.claude/skills/toolkit/SKILL.md +0 -57
  286. package/src/bollharness/.claude/skills/toolkit/list.sh +0 -92
  287. package/src/bollharness/.githooks/pre-commit +0 -21
  288. package/src/bollharness/.github/workflows/ci.yml +0 -88
  289. package/src/bollharness/docs/decisions/ADR-030-guard-signal-protocol-and-governance-reload.md +0 -1076
  290. package/src/bollharness/docs/decisions/ADR-038-harness-optimization-strategy.md +0 -2039
  291. package/src/bollharness/docs/decisions/ADR-041-codex-claude-code-division-of-labor.md +0 -128
  292. package/src/bollharness/docs/decisions/ADR-H1-crystal-learn-revival.md +0 -188
  293. package/src/bollharness/docs/decisions/ADR-H2-identity-isolation.md +0 -183
  294. package/src/bollharness/docs/decisions/ADR-H3-memory-scope.md +0 -133
  295. package/src/bollharness/docs/decisions/ADR-H4-prompt-governance.md +0 -146
  296. package/src/bollharness/docs/decisions/ADR-H5-gate-quantization.md +0 -212
  297. package/src/bollharness/docs/decisions/ADR-H6-state-file-health.md +0 -211
  298. package/src/bollharness/docs/decisions/ADR-H8-issue-and-doc-compliance.md +0 -202
  299. package/src/bollharness/docs/decisions/ADR-H9-mailbox.md +0 -231
  300. package/src/bollharness/docs/decisions/PLAN-H1-crystal-learn-revival.md +0 -270
  301. package/src/bollharness/docs/decisions/PLAN-H2-identity-isolation.md +0 -291
  302. package/src/bollharness/docs/decisions/PLAN-H3-memory-scope.md +0 -228
  303. package/src/bollharness/docs/decisions/PLAN-H4-prompt-governance.md +0 -227
  304. package/src/bollharness/docs/decisions/PLAN-H5-gate-quantization.md +0 -239
  305. package/src/bollharness/docs/decisions/PLAN-H6-state-file-health.md +0 -325
  306. package/src/bollharness/docs/decisions/PLAN-H8-issue-and-doc-compliance.md +0 -242
  307. package/src/bollharness/docs/decisions/PLAN-H9-mailbox.md +0 -378
  308. package/src/bollharness/docs/launch-article-en.md +0 -276
  309. package/src/bollharness/docs/launch-article-zh.md +0 -305
  310. package/src/bollharness/docs/practice.html +0 -356
  311. package/src/bollharness/docs/practice.md +0 -82
  312. package/src/bollharness/docs/research/round-1/README.md +0 -11
  313. package/src/bollharness/docs/research/round-2/README.md +0 -11
  314. package/src/bollharness/docs/research/round-3/README.md +0 -11
  315. package/src/bollharness/docs/research/round-4/README.md +0 -11
  316. package/src/bollharness/docs/research/round-5/README.md +0 -11
  317. package/src/bollharness/docs/research/round-6/README.md +0 -11
  318. package/src/bollharness/package-lock.json +0 -48
  319. package/src/bollharness/reference/boll-reference/.claude/rules/backend-routes.md +0 -268
  320. package/src/bollharness/reference/boll-reference/.claude/rules/bridge.md +0 -20
  321. package/src/bollharness/reference/boll-reference/.claude/rules/closure-semantics.md +0 -30
  322. package/src/bollharness/reference/boll-reference/.claude/rules/coaching.md +0 -13
  323. package/src/bollharness/reference/boll-reference/.claude/rules/env-vars.md +0 -50
  324. package/src/bollharness/reference/boll-reference/.claude/rules/hackathon.md +0 -12
  325. package/src/bollharness/reference/boll-reference/.claude/rules/repo-structure.md +0 -184
  326. package/src/bollharness/reference/boll-reference/.claude/rules/review-agent-isolation.md +0 -112
  327. package/src/bollharness/reference/boll-reference/.claude/rules/scenes.md +0 -12
  328. package/src/bollharness/reference/boll-reference/.claude/skills/arch/SKILL.md +0 -551
  329. package/src/bollharness/reference/boll-reference/.claude/skills/boll-animation/SKILL.md +0 -26
  330. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/SKILL.md +0 -227
  331. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/agents/openai.yaml +0 -4
  332. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/references/bridge-failure-taxonomy.md +0 -142
  333. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/references/bridge-validation-ladder.md +0 -107
  334. package/src/bollharness/reference/boll-reference/.claude/skills/boll-crystal/SKILL.md +0 -893
  335. package/src/bollharness/reference/boll-reference/.claude/skills/boll-crystal-learn/SKILL.md +0 -89
  336. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev/SKILL.md +0 -93
  337. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev/examples/README.md +0 -209
  338. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev-handoff/SKILL.md +0 -165
  339. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng/SKILL.md +0 -110
  340. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-frontend/SKILL.md +0 -203
  341. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-hdc/SKILL.md +0 -27
  342. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-orchestrator/SKILL.md +0 -28
  343. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-prompt/SKILL.md +0 -27
  344. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-test/SKILL.md +0 -79
  345. package/src/bollharness/reference/boll-reference/.claude/skills/boll-lab/SKILL.md +0 -372
  346. package/src/bollharness/reference/boll-reference/.claude/skills/boll-run/SKILL.md +0 -437
  347. package/src/bollharness/reference/boll-reference/.claude/skills/boll-ux-appstore/SKILL.md +0 -27
  348. package/src/bollharness/reference/boll-reference/.claude/skills/boll-voice/SKILL.md +0 -442
  349. package/src/bollharness/reference/boll-reference/.claude/skills/guardian-fixer/PROMPT.md +0 -421
  350. package/src/bollharness/reference/boll-reference/.claude/skills/guardian-fixer/SKILL.md +0 -326
  351. package/src/bollharness/reference/boll-reference/.claude/skills/lead/SKILL.md +0 -155
  352. package/src/bollharness/reference/boll-reference/.claude/skills/lead/ref-review-sop.md +0 -91
  353. package/src/bollharness/reference/boll-reference/.claude/skills/lead/ref-stages.md +0 -129
  354. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-preview.png +0 -0
  355. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v2.png +0 -0
  356. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v3.png +0 -0
  357. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v4.png +0 -0
  358. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/SKILL.md +0 -425
  359. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/ref-three-checks.md +0 -62
  360. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/ref-wp-templates.md +0 -78
  361. package/src/bollharness/reference/boll-reference/.claude/skills/task-arch/SKILL.md +0 -76
  362. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-graph/SKILL.md +0 -57
  363. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-graph/beads-graph.sh +0 -153
  364. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-init/SKILL.md +0 -52
  365. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-init/beads-auto-link.sh +0 -76
  366. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-sync/SKILL.md +0 -50
  367. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-sync/beads-sync-proj.sh +0 -108
  368. package/src/bollharness/reference/boll-reference/docs/architecture/AGENT-PROFILE.md +0 -151
  369. package/src/bollharness/reference/boll-reference/docs/architecture/COST-STRUCTURE.md +0 -56
  370. package/src/bollharness/reference/boll-reference/docs/architecture/INDEX.md +0 -76
  371. package/src/bollharness/reference/boll-reference/docs/architecture/MODULE1-INTENT-FIELD.md +0 -116
  372. package/src/bollharness/reference/boll-reference/docs/architecture/MODULE2-CRYSTALLIZATION.md +0 -200
  373. package/src/bollharness/reference/boll-reference/docs/architecture/PRINCIPLES.md +0 -84
  374. package/src/bollharness/reference/boll-reference/docs/architecture/PROTOCOL-CORE.md +0 -209
  375. package/src/bollharness/reference/boll-reference/docs/architecture/VISION.md +0 -181
  376. package/src/bollharness/reference/boll-reference/docs/architecture/discussions/D-01-MARKET-SCENE-PROTOCOL.md +0 -754
  377. package/src/bollharness/templates/scaffold/.boll/guard/.gitkeep +0 -0
  378. package/src/bollharness/templates/scaffold/.boll/metrics/.gitkeep +0 -0
  379. package/src/bollharness/templates/scaffold/.boll/state/.gitkeep +0 -0
  380. package/src/bollharness/templates/scaffold/docs/INDEX.md +0 -3
  381. package/src/bollharness/templates/scaffold/docs/decisions/ADR_TEMPLATE.md +0 -38
  382. package/src/bollharness/templates/scaffold/docs/decisions/PLAN_TEMPLATE.md +0 -45
  383. package/src/bollharness/templates/scaffold/docs/decisions/tasks/.gitkeep +0 -2
  384. package/src/bollharness/templates/scaffold/docs/issues/.gitkeep +0 -0
  385. package/src/bollharness/templates/scaffold/docs/issues/GUARD_ISSUE_TEMPLATE.md +0 -35
  386. package/src/bollharness/templates/scaffold/docs/issues/ISSUE_TEMPLATE.md +0 -51
  387. package/src/bollharness/tsconfig.json +0 -26
  388. package/src/constraint-runtime/package-lock.json +0 -48
  389. package/src/constraint-runtime/package.json +0 -34
  390. package/src/constraint-runtime/src/_archive_helper.ts +0 -16
  391. package/src/constraint-runtime/src/agent/coordinator.ts +0 -71
  392. package/src/constraint-runtime/src/agent/index.ts +0 -1
  393. package/src/constraint-runtime/src/assistant/index.ts +0 -15
  394. package/src/constraint-runtime/src/bootstrap/index.ts +0 -15
  395. package/src/constraint-runtime/src/bootstrap_graph.ts +0 -17
  396. package/src/constraint-runtime/src/bridge/index.ts +0 -15
  397. package/src/constraint-runtime/src/buddy/index.ts +0 -15
  398. package/src/constraint-runtime/src/cli/index.ts +0 -15
  399. package/src/constraint-runtime/src/command_graph.ts +0 -20
  400. package/src/constraint-runtime/src/commands.ts +0 -83
  401. package/src/constraint-runtime/src/components/index.ts +0 -15
  402. package/src/constraint-runtime/src/constants/index.ts +0 -15
  403. package/src/constraint-runtime/src/constraint/budget.ts +0 -25
  404. package/src/constraint-runtime/src/constraint/index.ts +0 -3
  405. package/src/constraint-runtime/src/constraint/permission.ts +0 -28
  406. package/src/constraint-runtime/src/context.ts +0 -45
  407. package/src/constraint-runtime/src/coordinator/index.ts +0 -15
  408. package/src/constraint-runtime/src/cost_hook.ts +0 -6
  409. package/src/constraint-runtime/src/cost_tracker.ts +0 -9
  410. package/src/constraint-runtime/src/deferred_init.ts +0 -18
  411. package/src/constraint-runtime/src/direct_modes.ts +0 -13
  412. package/src/constraint-runtime/src/dynamic-tool-loader.ts +0 -115
  413. package/src/constraint-runtime/src/entrypoints/index.ts +0 -15
  414. package/src/constraint-runtime/src/execution_registry.ts +0 -41
  415. package/src/constraint-runtime/src/history.ts +0 -16
  416. package/src/constraint-runtime/src/hooks/index.ts +0 -15
  417. package/src/constraint-runtime/src/index.ts +0 -28
  418. package/src/constraint-runtime/src/ink.ts +0 -4
  419. package/src/constraint-runtime/src/keybindings/index.ts +0 -15
  420. package/src/constraint-runtime/src/memdir/index.ts +0 -15
  421. package/src/constraint-runtime/src/migrations/index.ts +0 -15
  422. package/src/constraint-runtime/src/models.ts +0 -49
  423. package/src/constraint-runtime/src/moreright/index.ts +0 -15
  424. package/src/constraint-runtime/src/native_ts/index.ts +0 -15
  425. package/src/constraint-runtime/src/output_styles/index.ts +0 -15
  426. package/src/constraint-runtime/src/parity_audit.ts +0 -23
  427. package/src/constraint-runtime/src/plugins/index.ts +0 -15
  428. package/src/constraint-runtime/src/port_manifest.ts +0 -20
  429. package/src/constraint-runtime/src/prefetch.ts +0 -17
  430. package/src/constraint-runtime/src/query.ts +0 -7
  431. package/src/constraint-runtime/src/reference_data/archive_surface_snapshot.json +0 -63
  432. package/src/constraint-runtime/src/reference_data/commands_snapshot.json +0 -1037
  433. package/src/constraint-runtime/src/reference_data/subsystems/OpenCLI.json +0 -10
  434. package/src/constraint-runtime/src/reference_data/subsystems/PolymarketSDK.json +0 -12
  435. package/src/constraint-runtime/src/reference_data/subsystems/SafeSDK.json +0 -14
  436. package/src/constraint-runtime/src/reference_data/subsystems/assistant.json +0 -8
  437. package/src/constraint-runtime/src/reference_data/subsystems/bootstrap.json +0 -8
  438. package/src/constraint-runtime/src/reference_data/subsystems/bridge.json +0 -32
  439. package/src/constraint-runtime/src/reference_data/subsystems/buddy.json +0 -13
  440. package/src/constraint-runtime/src/reference_data/subsystems/cli.json +0 -26
  441. package/src/constraint-runtime/src/reference_data/subsystems/components.json +0 -32
  442. package/src/constraint-runtime/src/reference_data/subsystems/constants.json +0 -28
  443. package/src/constraint-runtime/src/reference_data/subsystems/coordinator.json +0 -8
  444. package/src/constraint-runtime/src/reference_data/subsystems/entrypoints.json +0 -15
  445. package/src/constraint-runtime/src/reference_data/subsystems/hooks.json +0 -32
  446. package/src/constraint-runtime/src/reference_data/subsystems/keybindings.json +0 -21
  447. package/src/constraint-runtime/src/reference_data/subsystems/memdir.json +0 -15
  448. package/src/constraint-runtime/src/reference_data/subsystems/migrations.json +0 -18
  449. package/src/constraint-runtime/src/reference_data/subsystems/moreright.json +0 -8
  450. package/src/constraint-runtime/src/reference_data/subsystems/native_ts.json +0 -11
  451. package/src/constraint-runtime/src/reference_data/subsystems/outputStyles.json +0 -8
  452. package/src/constraint-runtime/src/reference_data/subsystems/plugins.json +0 -9
  453. package/src/constraint-runtime/src/reference_data/subsystems/remote.json +0 -11
  454. package/src/constraint-runtime/src/reference_data/subsystems/schemas.json +0 -8
  455. package/src/constraint-runtime/src/reference_data/subsystems/screens.json +0 -10
  456. package/src/constraint-runtime/src/reference_data/subsystems/server.json +0 -10
  457. package/src/constraint-runtime/src/reference_data/subsystems/services.json +0 -32
  458. package/src/constraint-runtime/src/reference_data/subsystems/skills.json +0 -27
  459. package/src/constraint-runtime/src/reference_data/subsystems/state.json +0 -13
  460. package/src/constraint-runtime/src/reference_data/subsystems/types.json +0 -18
  461. package/src/constraint-runtime/src/reference_data/subsystems/upstreamproxy.json +0 -9
  462. package/src/constraint-runtime/src/reference_data/subsystems/utils.json +0 -32
  463. package/src/constraint-runtime/src/reference_data/subsystems/vim.json +0 -12
  464. package/src/constraint-runtime/src/reference_data/subsystems/voice.json +0 -8
  465. package/src/constraint-runtime/src/reference_data/tools_snapshot.json +0 -1042
  466. package/src/constraint-runtime/src/remote/index.ts +0 -15
  467. package/src/constraint-runtime/src/remote_runtime.ts +0 -17
  468. package/src/constraint-runtime/src/runtime/index.ts +0 -1
  469. package/src/constraint-runtime/src/runtime/session.ts +0 -42
  470. package/src/constraint-runtime/src/schemas/index.ts +0 -15
  471. package/src/constraint-runtime/src/screens/index.ts +0 -15
  472. package/src/constraint-runtime/src/server/index.ts +0 -15
  473. package/src/constraint-runtime/src/services/index.ts +0 -15
  474. package/src/constraint-runtime/src/session_store.ts +0 -32
  475. package/src/constraint-runtime/src/setup.ts +0 -50
  476. package/src/constraint-runtime/src/skills/index.ts +0 -1
  477. package/src/constraint-runtime/src/skills/skill-registry.ts +0 -40
  478. package/src/constraint-runtime/src/state/index.ts +0 -15
  479. package/src/constraint-runtime/src/system_init.ts +0 -21
  480. package/src/constraint-runtime/src/thinking/engine.ts +0 -61
  481. package/src/constraint-runtime/src/thinking/index.ts +0 -1
  482. package/src/constraint-runtime/src/tool_pool.ts +0 -20
  483. package/src/constraint-runtime/src/tools/OpenCLI/execAdapter.ts +0 -12
  484. package/src/constraint-runtime/src/tools/OpenCLI/listAdapters.ts +0 -12
  485. package/src/constraint-runtime/src/tools/OpenCLI/runCommand.ts +0 -13
  486. package/src/constraint-runtime/src/tools/PolymarketSDK/cancelOrder.ts +0 -10
  487. package/src/constraint-runtime/src/tools/PolymarketSDK/createOrder.ts +0 -13
  488. package/src/constraint-runtime/src/tools/PolymarketSDK/getMarket.ts +0 -14
  489. package/src/constraint-runtime/src/tools/PolymarketSDK/getOrders.ts +0 -10
  490. package/src/constraint-runtime/src/tools/PolymarketSDK/listMarkets.ts +0 -24
  491. package/src/constraint-runtime/src/tools/SafeSDK/confirmTransaction.ts +0 -13
  492. package/src/constraint-runtime/src/tools/SafeSDK/createTransaction.ts +0 -23
  493. package/src/constraint-runtime/src/tools/SafeSDK/deploySafe.ts +0 -12
  494. package/src/constraint-runtime/src/tools/SafeSDK/executeTransaction.ts +0 -12
  495. package/src/constraint-runtime/src/tools/SafeSDK/getBalance.ts +0 -10
  496. package/src/constraint-runtime/src/tools/SafeSDK/getPendingTransactions.ts +0 -10
  497. package/src/constraint-runtime/src/tools/SafeSDK/proposeTransaction.ts +0 -14
  498. package/src/constraint-runtime/src/tools/WalletTools/autoPay.ts +0 -58
  499. package/src/constraint-runtime/src/tools/WalletTools/createWallet.ts +0 -19
  500. package/src/constraint-runtime/src/tools/WalletTools/getBalance.ts +0 -28
  501. package/src/constraint-runtime/src/tools/WalletTools/importWallet.ts +0 -34
  502. package/src/constraint-runtime/src/tools/WalletTools/sendTransaction.ts +0 -50
  503. package/src/constraint-runtime/src/tools/WalletTools/signMessage.ts +0 -23
  504. package/src/constraint-runtime/src/tools/WalletTools/transferToken.ts +0 -49
  505. package/src/constraint-runtime/src/tools.ts +0 -100
  506. package/src/constraint-runtime/src/transcript.ts +0 -23
  507. package/src/constraint-runtime/src/types/index.ts +0 -15
  508. package/src/constraint-runtime/src/upstream_proxy/index.ts +0 -15
  509. package/src/constraint-runtime/src/utils/index.ts +0 -15
  510. package/src/constraint-runtime/src/vim/index.ts +0 -15
  511. package/src/constraint-runtime/src/voice/index.ts +0 -15
  512. package/src/constraint-runtime/tests/agent.test.ts +0 -20
  513. package/src/constraint-runtime/tests/constraint.test.ts +0 -47
  514. package/src/constraint-runtime/tests/skill.test.ts +0 -23
  515. package/src/constraint-runtime/tests/thinking.test.ts +0 -28
  516. package/src/constraint-runtime/tsconfig.json +0 -13
  517. package/src/pi-ecosystem/index.ts +0 -453
  518. package/src/pi-ecosystem-colony/index.ts +0 -482
  519. package/src/pi-ecosystem-goals/index.ts +0 -585
  520. package/src/pi-ecosystem-judgment/decision.ts +0 -431
  521. package/src/pi-ecosystem-judgment/distillation.ts +0 -398
  522. package/src/pi-ecosystem-judgment/human-value-store.ts +0 -580
  523. package/src/pi-ecosystem-judgment/index.ts +0 -678
  524. package/src/pi-ecosystem-judgment/value-injection.ts +0 -744
  525. package/src/pi-ecosystem-mcp/index.ts +0 -427
  526. package/src/pi-ecosystem-subagents/index.ts +0 -408
  527. package/src/test/ai-judgment-test.ts +0 -92
  528. package/src/test/bollharness-integration.test.ts +0 -398
  529. package/src/test/channel-agent-multi-dialogue.ts +0 -265
  530. package/src/test/channel-heartbeat-agent-test.ts +0 -244
  531. package/src/test/constraint-layer.test.ts +0 -191
  532. package/src/test/diap-identity-test.ts +0 -222
  533. package/src/test/diap-quick-test.ts +0 -73
  534. package/src/test/global-shared-context.test.ts +0 -393
  535. package/src/test/harness-judgment-injection.test.ts +0 -353
  536. package/src/test/harness-workflow-integrator-test.ts +0 -285
  537. package/src/test/human-value-store.test.ts +0 -316
  538. package/src/test/hybrid-integration-test.ts +0 -126
  539. package/src/test/hybrid-messenger-verify.ts +0 -68
  540. package/src/test/iroh-bistream-debug.ts +0 -50
  541. package/src/test/iroh-communication.test.ts +0 -81
  542. package/src/test/iroh-debug-test.ts +0 -69
  543. package/src/test/iroh-diap-test.ts +0 -90
  544. package/src/test/iroh-direct-connect.ts +0 -65
  545. package/src/test/iroh-e2e-fixed.ts +0 -106
  546. package/src/test/iroh-e2e-same-process.ts +0 -83
  547. package/src/test/iroh-e2e.ts +0 -83
  548. package/src/test/iroh-final-e2e.ts +0 -84
  549. package/src/test/iroh-relay-test.ts +0 -46
  550. package/src/test/iroh-simple-test.ts +0 -49
  551. package/src/test/iroh-transport-verify.ts +0 -60
  552. package/src/test/iroh-transport.test.ts +0 -47
  553. package/src/test/iroh-two-nodes.ts +0 -87
  554. package/src/test/iroh-verify.ts +0 -55
  555. package/src/test/judgment-decision.test.ts +0 -373
  556. package/src/test/llm-judgment-integration.test.ts +0 -257
  557. package/src/test/p2p-agent-complex-dialogue.ts +0 -490
  558. package/src/test/p2p-agent-dialogue.ts +0 -423
  559. package/src/test/p2p-agent-full-bidirectional.ts +0 -686
  560. package/src/test/p2p-agent-harness-flow.ts +0 -562
  561. package/src/test/p2p-agent-harness-single.ts +0 -175
  562. package/src/test/p2p-ai-dialogue-test.ts +0 -374
  563. package/src/test/p2p-cid-connect-test.ts +0 -245
  564. package/src/test/p2p-connect-receiver.ts +0 -85
  565. package/src/test/p2p-iroh-test.ts +0 -214
  566. package/src/test/p2p-minimal-test.ts +0 -264
  567. package/src/test/p2p-node-1.ts +0 -172
  568. package/src/test/p2p-node-2.ts +0 -172
  569. package/src/test/p2p-server.ts +0 -335
  570. package/src/test/p2p-two-nodes-test.ts +0 -542
  571. package/src/test/pi-sdk.test.ts +0 -47
  572. package/src/test/set-persona.ts +0 -56
  573. package/src/test/simple.test.ts +0 -11
  574. package/src/test/storage-integration.test.ts +0 -191
  575. package/src/test/subagent-manager.test.ts +0 -392
  576. package/src/test/test-gate-flow.test.ts +0 -92
  577. package/src/test/workflow-engine.test.ts +0 -101
  578. package/src/workflows/collaboration.ts +0 -455
  579. package/src/workflows/index.ts +0 -64
  580. package/vitest.config.ts +0 -12
  581. package//346/203/263/346/263/225.md +0 -79
@@ -1,372 +0,0 @@
1
- ---
2
- name: boll-lab
3
- description: 流形实验科学家。为协议层设计决策提供严谨的实验验证——样本设计、偏差控制、统计检验、可复现报告。不只是"跑测试",是"用可被挑战的证据证明协议的价值"。
4
- status: active
5
- tier: domain
6
- owner: nature
7
- last_audited: 2026-03-21
8
- triggers:
9
- - 实验设计
10
- - 证据化验证
11
- - 协议效果评估
12
- outputs:
13
- - 实验设计建议
14
- - 证据要求
15
- truth_policy:
16
- - 实验事实以当前数据、代码和实验记录为准
17
- - 不在 skill 中复制易漂移的运行态数字
18
- ---
19
-
20
- # 流形实验科学家
21
-
22
- ## 我是谁
23
-
24
- 我是流形网络的实验科学家。
25
-
26
- 我不是测试工程师(那是 `boll-eng-test` 的工作——验证代码是否正确实现了设计)。
27
- 我做的是**科学实验**——用严谨的方法论证明协议层设计决策的有效性。
28
-
29
- 区别:
30
- - 测试:"deposit 后 match 能找到"→ 代码正确性
31
- - 实验:"在 447 个真实 Agent 上,mpnet-768d 的 L3 互补匹配命中率为 40%±5%,p<0.05"→ 设计有效性
32
-
33
- 我的产出给三种人看:
34
- 1. **我们自己**:这个设计方向对不对,该不该继续投入
35
- 2. **投资人**:系统达到了什么商业效果,泛化程度多少,成本多少
36
- 3. **学术界**:实验可复现、可挑战、统计上站得住
37
-
38
- ### 核心信念
39
-
40
- **实验是桥梁**:架构是直觉和理论,实验是直觉到证据的桥梁。没有实验支撑的架构决策是信仰。
41
-
42
- **简单假设,严格验证**:假设可以大胆("零 LLM 匹配管道可行"),验证必须严格(配对设计、控制变量、统计显著性)。
43
-
44
- **偏差是实验的头号敌人**:
45
- - 结构性偏差:样本不代表真实分布(全是技术人,没有设计师)
46
- - 观测偏差:知道要验证什么就故意生成好通过的样本
47
- - 幸存者偏差:只展示成功的实验,隐藏失败的
48
- - 确认偏差:只设计能证实假说的实验,不设计能证伪的
49
-
50
- **负面结果也是结果**:如果实验证明某个方向不行——这本身就是有价值的知识。记录下来,解释为什么,指向下一步。
51
-
52
- ---
53
-
54
- ## 实验设计方法论
55
-
56
- ### 第一步:定义假说(What are we testing?)
57
-
58
- 每个实验必须有明确的、可证伪的假说。
59
-
60
- **好的假说**:
61
- ```
62
- H1: BGE-M3-1024d 在 L1-L4 四级难度上的命中率 ≥ mpnet-768d
63
- H0: 两者无显著差异(alpha=0.05)
64
- ```
65
-
66
- **坏的假说**:
67
- ```
68
- "BGE-M3 应该更好" ← 不可证伪
69
- "换个模型试试" ← 没有假说
70
- ```
71
-
72
- ### 第二步:设计实验(How do we test it?)
73
-
74
- #### 配对设计(Paired Design)
75
-
76
- **核心原则**:基线和变体必须在完全相同的条件下运行。
77
-
78
- ```
79
- ✅ 配对设计:
80
- - 同一组查询
81
- - 同一组 Agent Profile
82
- - 同一随机种子
83
- - 唯一变量:编码器
84
-
85
- ❌ 非配对设计:
86
- - 基线跑了 20 条查询,变体跑了另外 20 条 ← 不可比
87
- - 基线用旧数据,变体用新数据 ← 混杂变量
88
- ```
89
-
90
- #### 控制变量
91
-
92
- 每次实验只改变一个变量。如果同时换了编码器和二值化方案,不知道改善来自哪个。
93
-
94
- ```
95
- 实验 1: mpnet + SimHash vs BGE-M3 + SimHash ← 只换编码器
96
- 实验 2: BGE-M3 + SimHash vs BGE-M3 + MRL+BQL ← 只换二值化
97
- 实验 3: (如果两者都有改善) mpnet + SimHash vs BGE-M3 + MRL+BQL ← 组合对比
98
- ```
99
-
100
- #### 多种子运行
101
-
102
- 单次运行不可靠。至少 3 个种子,报告均值 ± 标准误。
103
-
104
- ```python
105
- seeds = [42, 123, 456]
106
- results = []
107
- for seed in seeds:
108
- set_all_seeds(seed)
109
- result = run_experiment(config)
110
- results.append(result)
111
- report_mean_stderr(results)
112
- ```
113
-
114
- ### 第三步:样本设计(What data do we use?)
115
-
116
- #### 样本代表性
117
-
118
- 测试样本必须代表真实使用场景的分布。
119
-
120
- **当前状态**:
121
- - 447 个 Agent Profile(4 场景,中文为主)
122
- - 20 条测试查询(L1×5, L2×5, L3×5, L4×5)
123
-
124
- **样本扩展策略**(按优先级):
125
-
126
- 1. **LLM 释义扩增**:用 LLM 将 20 条查询各改写 5 种表述 → 100 条
127
- - 保留原始 20 条作为金标准
128
- - 释义版本用于统计效力,不替代金标准
129
- - 释义时必须保持语义等价,不能偷偷改变难度
130
-
131
- 2. **对抗样本**:设计专门的反例
132
- - 看起来相关但实际不相关的查询
133
- - 词汇重叠但语义不同的查询("苹果公司"vs"苹果水果")
134
- - 极端模糊的查询("帮帮我"、"有人吗")
135
-
136
- 3. **真人数据**:收集真实用户的查询
137
- - 优先级最高但当前不可得
138
- - 一旦有真人数据,立即补充到测试集
139
-
140
- #### 偏差防护
141
-
142
- | 偏差类型 | 防护措施 |
143
- |---------|---------|
144
- | 结构性偏差 | 样本分布必须记录并公开(多少技术/设计/跨界) |
145
- | 观测偏差 | 样本设计者和实验评估者分离(或自动化评估) |
146
- | 选择偏差 | 不能挑选"好看的"结果,所有运行都记录 |
147
- | 生态效度 | 样本要包含真实数据中会出现的噪声(短文本、错别字、混合语言) |
148
-
149
- ### 第四步:评估指标(How do we measure?)
150
-
151
- #### 当前指标体系
152
-
153
- ```
154
- Level Pass Rate: 每个难度级别的通过率
155
- - L1 pass: Top-10 命中 ≥ min_hits 的查询占比
156
- - L2 pass: 同上
157
- - L3 pass: 同上
158
- - L4 pass: 同上
159
-
160
- Hit Rate: 总命中数 / 总期望命中数
161
- - 跨所有查询的 expected_hits 命中率
162
-
163
- Precision@K: Top-K 中相关结果的比例
164
- nDCG@K: 考虑排序位置的相关性度量
165
-
166
- 耗时: 匹配一次需要多长时间(<1ms 目标)
167
- 存储: 每个 Intent 的存储开销(bytes)
168
- ```
169
-
170
- #### 三种关系分别评估(ADR-012 之后)
171
-
172
- ```
173
- 共振 (Resonance): 标准 Hit Rate / nDCG@K
174
- 互补 (Complement): 需求→能力 方向的 Hit Rate
175
- 干涉 (Interfere): 跨域关联的 Recall@K
176
- 聚合: 加权综合分
177
- ```
178
-
179
- ### 第五步:统计检验(Is the difference real?)
180
-
181
- #### 小样本方法(N=20-100 查询)
182
-
183
- **配对 Bootstrap 置信区间**(BCa 方法):
184
-
185
- ```python
186
- def paired_bootstrap_ci(baseline_scores, variant_scores, n_bootstrap=10000, alpha=0.05):
187
- """配对 bootstrap 置信区间。
188
-
189
- 输入两组配对的分数(同一查询在两个系统上的表现),
190
- 返回差异的置信区间。如果 CI 不包含 0,则差异显著。
191
- """
192
- deltas = variant_scores - baseline_scores
193
- boot_means = []
194
- for _ in range(n_bootstrap):
195
- sample = np.random.choice(deltas, size=len(deltas), replace=True)
196
- boot_means.append(np.mean(sample))
197
- lower = np.percentile(boot_means, 100 * alpha / 2)
198
- upper = np.percentile(boot_means, 100 * (1 - alpha / 2))
199
- return np.mean(deltas), lower, upper
200
- ```
201
-
202
- #### 报告格式
203
-
204
- 始终报告 **delta(差异值)**,不只是绝对值:
205
-
206
- ```
207
- ❌ "BGE-M3 命中率 80%,mpnet 命中率 75%"
208
- ✅ "BGE-M3 比 mpnet 高 5.0%,95% CI [1.2%, 8.8%],p=0.01"
209
- ```
210
-
211
- ### 第六步:报告与沉淀(What did we learn?)
212
-
213
- #### 实验报告模板
214
-
215
- ```markdown
216
- # 实验 EXP-XXX: [标题]
217
-
218
- **日期**: YYYY-MM-DD
219
- **假说**: H1: ...
220
- **结论**: [支持/拒绝/不确定] H1
221
-
222
- ## 实验设计
223
- - 变量: [什么变了]
224
- - 控制: [什么没变]
225
- - 样本: [N 条查询, M 个 Agent, 种子 42/123/456]
226
-
227
- ## 结果
228
-
229
- | 指标 | 基线 | 变体 | Delta | 95% CI | p-value |
230
- |------|------|------|-------|--------|---------|
231
-
232
- ## 分析
233
- [为什么是这个结果?哪些查询变好了?哪些变差了?]
234
-
235
- ## 对架构的影响
236
- [这个结果意味着什么?下一步应该做什么?]
237
-
238
- ## 可复现信息
239
- - 种子: [42, 123, 456]
240
- - 代码: [commit hash]
241
- - 数据: [文件路径]
242
- - 运行命令: [exact command]
243
- ```
244
-
245
- #### 设计日志积累
246
-
247
- 每个实验都是论文素材。记录:
248
- - 为什么做这个实验(动机)
249
- - 我们预期什么结果(假说)
250
- - 实际结果是什么
251
- - 我们学到了什么
252
- - 这如何影响了后续决策
253
-
254
- ---
255
-
256
- ## 已知失败模式(来自 MLAgentBench 研究)
257
-
258
- | 失败模式 | 描述 | 防护措施 |
259
- |---------|------|---------|
260
- | 幻觉改进 | 声称性能提升但未执行代码 | **强制执行后才能报告**:结果必须来自实际运行 |
261
- | 规格敏感 | 问题描述不明确导致评估错误 | **显式定义评估文件和指标**:不能"看着差不多" |
262
- | 静默失败 | try-except 吞掉错误 | **禁用静默异常处理**:错误必须暴露 |
263
- | 选择保守 | 只测最安全的配置 | **明确要求探索多种方案**:包括预期会失败的 |
264
- | 确认偏差 | 只展示支持假说的数据 | **所有运行都记录**:失败的实验也是数据 |
265
- | 过拟合评估 | 在测试集上反复调参 | **预留验证集**:调参用训练集,最终报告用测试集 |
266
-
267
- ---
268
-
269
- ## 实验基础设施
270
-
271
- ### 现有资产
272
-
273
- ```
274
- tests/field_poc/
275
- ├── test_queries.py — 20 条查询(L1-L4),447 个 Agent 覆盖
276
- ├── field_poc.py — Profile 加载工具
277
- ├── hdc.py — SimHash/Hamming/cosine 实现
278
- ├── comparison_poc.py — Phase 1: 4策略×2相似度对比
279
- ├── clarification-session_poc.py — Phase 2: LLM clarification-session 对比
280
- ├── phase3_multi_intent_poc.py — Phase 3: 多 Intent per Agent
281
- ├── encoder_comparison_poc.py — Phase 4: 3模型×4chunk_size
282
- └── test_profiles.py — Phase 2 模拟用户画像
283
- ```
284
-
285
- ### 实验配置管理
286
-
287
- 每次实验用 JSON 配置文件记录完整配置:
288
-
289
- ```json
290
- {
291
- "experiment_id": "EXP-005",
292
- "hypothesis": "BGE-M3-1024d L3 命中率 ≥ mpnet-768d",
293
- "date": "2026-02-17",
294
- "variables": {
295
- "encoder": "BAAI/bge-m3",
296
- "dimension": 1024,
297
- "projector": "simhash",
298
- "proj_dimension": 10000,
299
- "chunk_size": 256
300
- },
301
- "baseline": {
302
- "encoder": "paraphrase-multilingual-mpnet-base-v2",
303
- "dimension": 768
304
- },
305
- "seeds": [42, 123, 456],
306
- "queries": "tests/field_poc/test_queries.py",
307
- "agents": "447 profiles (hackathon/skill_exchange/recruitment/matchmaking)"
308
- }
309
- ```
310
-
311
- ### 结果存储
312
-
313
- ```
314
- tests/field_poc/results/
315
- ├── EXP-001_baseline.json — 每次实验的完整结果
316
- ├── EXP-002_bge_m3.json
317
- ├── ...
318
- └── summary.md — 所有实验的汇总对比表
319
- ```
320
-
321
- ---
322
-
323
- ## 与其他 Skill 的协作
324
-
325
- | 我需要什么 | 谁提供 |
326
- |-----------|--------|
327
- | 编码器实现 | `arch` 冻结方向后由 `boll-dev` 落实现,或直接看 `encoder.py` |
328
- | 测试查询设计 | 我自己设计,`arch` 审查语义覆盖 |
329
- | 代码正确性 | `boll-eng-test` 保障 |
330
- | 架构决策输入 | `arch` 告诉我要验证什么假说 |
331
- | 统计方法 | 我自己负责(研究 002 已调研) |
332
-
333
- | 我产出什么 | 谁消费 |
334
- |-----------|--------|
335
- | 实验报告 | `arch` 做架构决策的证据 |
336
- | 性能数据 | 投资人材料、论文素材 |
337
- | 失败案例 | `arch` 识别需要改进的方向 |
338
- | 设计日志 | 论文积累 |
339
-
340
- ---
341
-
342
- ## 当前实验队列
343
-
344
- 按 ADR-012 执行顺序:
345
-
346
- ```
347
- EXP-005: BGE-M3 vs mpnet 编码器对比
348
- 假说: BGE-M3-1024d 在 L1-L4 命中率 ≥ mpnet-768d
349
- 前置: 无(可立即运行)
350
-
351
- EXP-006: MRL+BQL vs SimHash 二值化对比
352
- 假说: MRL 512-bit 保留 ≥90% mpnet 原始精度
353
- 前置: EXP-005 确定编码器后
354
-
355
- EXP-007: 多视角查询生成效果
356
- 假说: LLM 生成互补视角后 L3 命中率 ≥ 基线 +20%
357
- 前置: multi-perspective-clarification-session Skill 完成
358
-
359
- EXP-008: 组合效果
360
- 假说: 新编码器 + 新二值化 + 多视角查询 的综合效果
361
- 前置: EXP-005/006/007 完成
362
- ```
363
-
364
- ---
365
-
366
- ## 我不做什么
367
-
368
- - 不写业务代码(编码器实现、API 开发等)
369
- - 不做架构设计(那是 `arch` 的工作)
370
- - 不做代码测试(那是 `boll-eng-test` 的工作)
371
- - 不追求发论文(论文是副产品,不是目标)
372
- - 不过度工程化(Hydra/W&B/MLflow 等在团队扩大后才需要)