@bolloon/bolloon-agent 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (431) hide show
  1. package/package.json +1 -1
  2. package/dist/constraint-runtime/tests/agent.test.js +0 -16
  3. package/dist/constraint-runtime/tests/constraint.test.js +0 -41
  4. package/dist/constraint-runtime/tests/skill.test.js +0 -19
  5. package/dist/constraint-runtime/tests/thinking.test.js +0 -22
  6. package/dist/electron-preload.js +0 -15
  7. package/dist/electron-preload.js.map +0 -1
  8. package/dist/electron.js +0 -206
  9. package/dist/electron.js.map +0 -1
  10. package/dist/test/constraint-layer.test.js +0 -164
  11. package/dist/test/global-shared-context.test.js +0 -315
  12. package/dist/test/pi-sdk.test.js +0 -47
  13. package/dist/test/set-persona.test.js +0 -38
  14. package/dist/test/subagent-manager.test.js +0 -276
  15. package/dist/test/workflow-engine.test.js +0 -87
  16. package/dist/workflows/collaboration.js +0 -374
  17. package/dist/workflows/index.js +0 -54
  18. package/docs/agent-communication.md +0 -333
  19. package/docs/plans/2026-05-15-document-agent-design.md +0 -479
  20. package/docs/plans/2026-05-15-document-agent-implementation-plan.md +0 -792
  21. package/docs/plans/2026-05-16-chat-ui-design.md +0 -86
  22. package/docs/plans/2026-05-16-constraint-runtime-design.md +0 -106
  23. package/docs/plans/2026-05-16-constraint-runtime-implementation.md +0 -441
  24. package/docs//346/225/260/345/255/246/350/276/205/345/212/251/346/231/272/350/203/275/344/275/223-/346/240/270/345/277/203/346/225/210/346/236/234/345/256/232/344/271/211.md +0 -287
  25. package/src/bollharness/.boll/CLAUDE.md.template +0 -34
  26. package/src/bollharness/.boll/MANIFEST.yaml +0 -213
  27. package/src/bollharness/.boll/active-review-agents/.gitkeep +0 -0
  28. package/src/bollharness/.boll/agents/review-base.yaml +0 -108
  29. package/src/bollharness/.boll/deploy-allowlist.yaml +0 -38
  30. package/src/bollharness/.boll/inbox/schema/message-v1.json +0 -99
  31. package/src/bollharness/.boll/install-staging/.gitkeep +0 -0
  32. package/src/bollharness/.boll/issue-adapter.yaml +0 -31
  33. package/src/bollharness/.boll/plugins/boll-mode-toolkit/contracts/mode-contract.md +0 -85
  34. package/src/bollharness/.boll/plugins/boll-review-toolkit/contracts/evidence-packet-schema.json +0 -102
  35. package/src/bollharness/.boll/plugins/boll-review-toolkit/contracts/review-contract.yaml +0 -247
  36. package/src/bollharness/.boll/rules/backend-routes.md +0 -31
  37. package/src/bollharness/.boll/rules/closure-semantics.md +0 -30
  38. package/src/bollharness/.boll/rules/env-vars.md +0 -32
  39. package/src/bollharness/.boll/rules/hanis-protocol.md +0 -145
  40. package/src/bollharness/.boll/rules/repo-structure.md +0 -42
  41. package/src/bollharness/.boll/rules/review-agent-isolation.md +0 -73
  42. package/src/bollharness/.boll/rules/source-of-truth.md +0 -33
  43. package/src/bollharness/.boll/settings.json +0 -180
  44. package/src/bollharness/.boll/settings.json.template +0 -31
  45. package/src/bollharness/.boll/skills/arch/SKILL.md +0 -372
  46. package/src/bollharness/.boll/skills/bug-pipeline/SKILL.md +0 -168
  47. package/src/bollharness/.boll/skills/bug-triage/SKILL.md +0 -161
  48. package/src/bollharness/.boll/skills/context-chains/SKILL.md +0 -250
  49. package/src/bollharness/.boll/skills/context-chains/context-chain-index.md +0 -48
  50. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/code_change_extractor.ts +0 -142
  51. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/debugging_extractor.ts +0 -126
  52. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/design_extractor.ts +0 -148
  53. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/planning_extractor.ts +0 -162
  54. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/question_extractor.ts +0 -116
  55. package/src/bollharness/.boll/skills/context-chains/work-type-extractors/review_extractor.ts +0 -136
  56. package/src/bollharness/.boll/skills/crystal-learn/SKILL.md +0 -93
  57. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-0.md +0 -34
  58. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-1.md +0 -34
  59. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-2.md +0 -35
  60. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-3.md +0 -34
  61. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-4.md +0 -43
  62. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-5.md +0 -34
  63. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-6.md +0 -37
  64. package/src/bollharness/.boll/skills/crystal-learn/invariants/INV-7.md +0 -46
  65. package/src/bollharness/.boll/skills/guardian-fixer/PROMPT.md +0 -415
  66. package/src/bollharness/.boll/skills/guardian-fixer/SKILL.md +0 -320
  67. package/src/bollharness/.boll/skills/harness-dev/SKILL.md +0 -93
  68. package/src/bollharness/.boll/skills/harness-dev/examples/README.md +0 -227
  69. package/src/bollharness/.boll/skills/harness-dev-handoff/SKILL.md +0 -165
  70. package/src/bollharness/.boll/skills/harness-eng/SKILL.md +0 -110
  71. package/src/bollharness/.boll/skills/harness-eng-test/SKILL.md +0 -79
  72. package/src/bollharness/.boll/skills/harness-lab/SKILL.md +0 -170
  73. package/src/bollharness/.boll/skills/harness-ops/SKILL.md +0 -57
  74. package/src/bollharness/.boll/skills/harness-voice/SKILL.md +0 -183
  75. package/src/bollharness/.boll/skills/judgment/SKILL.md +0 -115
  76. package/src/bollharness/.boll/skills/lead/SKILL.md +0 -245
  77. package/src/bollharness/.boll/skills/lead/install-wow-harness.md +0 -77
  78. package/src/bollharness/.boll/skills/lead/ref-review-sop.md +0 -91
  79. package/src/bollharness/.boll/skills/lead/ref-stages.md +0 -129
  80. package/src/bollharness/.boll/skills/skill-discovery/SKILL.md +0 -169
  81. package/src/bollharness/.boll/skills/task-arch/SKILL.md +0 -106
  82. package/src/bollharness/.boll/skills/toolkit/SKILL.md +0 -57
  83. package/src/bollharness/.boll/tasks/.gitkeep +0 -0
  84. package/src/bollharness/.boll/toolkit-index.yaml +0 -112
  85. package/src/bollharness/.claude/agents/review-base.yaml +0 -108
  86. package/src/bollharness/.claude/plugins/boll-mode-toolkit/.claude-plugin/plugin.json +0 -44
  87. package/src/bollharness/.claude/plugins/boll-review-toolkit/.claude-plugin/plugin.json +0 -24
  88. package/src/bollharness/.claude/plugins/boll-review-toolkit/contracts/evidence-packet-schema.json +0 -102
  89. package/src/bollharness/.claude/plugins/boll-review-toolkit/contracts/review-contract.yaml +0 -247
  90. package/src/bollharness/.claude/settings.json +0 -157
  91. package/src/bollharness/.claude/skills/arch/SKILL.md +0 -64
  92. package/src/bollharness/.claude/skills/crystal-learn/SKILL.md +0 -93
  93. package/src/bollharness/.claude/skills/guardian-fixer/PROMPT.md +0 -44
  94. package/src/bollharness/.claude/skills/guardian-fixer/SKILL.md +0 -324
  95. package/src/bollharness/.claude/skills/harness-dev/SKILL.md +0 -93
  96. package/src/bollharness/.claude/skills/harness-dev/examples/README.md +0 -17
  97. package/src/bollharness/.claude/skills/harness-dev-handoff/SKILL.md +0 -165
  98. package/src/bollharness/.claude/skills/harness-eng/SKILL.md +0 -183
  99. package/src/bollharness/.claude/skills/harness-eng-test/SKILL.md +0 -57
  100. package/src/bollharness/.claude/skills/harness-ops/SKILL.md +0 -57
  101. package/src/bollharness/.claude/skills/harness-voice/SKILL.md +0 -84
  102. package/src/bollharness/.claude/skills/lead/INDEX.md +0 -28
  103. package/src/bollharness/.claude/skills/lead/SKILL.md +0 -24
  104. package/src/bollharness/.claude/skills/lead/install-wow-harness.md +0 -77
  105. package/src/bollharness/.claude/skills/lead/ref-review-sop.md +0 -48
  106. package/src/bollharness/.claude/skills/lead/ref-stages.md +0 -58
  107. package/src/bollharness/.claude/skills/plan-lock/SKILL.md +0 -74
  108. package/src/bollharness/.claude/skills/skill-discovery/SKILL.md +0 -120
  109. package/src/bollharness/.claude/skills/task-arch/SKILL.md +0 -106
  110. package/src/bollharness/.claude/skills/toolkit/SKILL.md +0 -57
  111. package/src/bollharness/.claude/skills/toolkit/list.sh +0 -92
  112. package/src/bollharness/.githooks/pre-commit +0 -21
  113. package/src/bollharness/.github/workflows/ci.yml +0 -88
  114. package/src/bollharness/CLAUDE.md +0 -73
  115. package/src/bollharness/README.md +0 -143
  116. package/src/bollharness/README.zh-CN.md +0 -131
  117. package/src/bollharness/docs/decisions/ADR-030-guard-signal-protocol-and-governance-reload.md +0 -1076
  118. package/src/bollharness/docs/decisions/ADR-038-harness-optimization-strategy.md +0 -2039
  119. package/src/bollharness/docs/decisions/ADR-041-codex-claude-code-division-of-labor.md +0 -128
  120. package/src/bollharness/docs/decisions/ADR-H1-crystal-learn-revival.md +0 -188
  121. package/src/bollharness/docs/decisions/ADR-H2-identity-isolation.md +0 -183
  122. package/src/bollharness/docs/decisions/ADR-H3-memory-scope.md +0 -133
  123. package/src/bollharness/docs/decisions/ADR-H4-prompt-governance.md +0 -146
  124. package/src/bollharness/docs/decisions/ADR-H5-gate-quantization.md +0 -212
  125. package/src/bollharness/docs/decisions/ADR-H6-state-file-health.md +0 -211
  126. package/src/bollharness/docs/decisions/ADR-H8-issue-and-doc-compliance.md +0 -202
  127. package/src/bollharness/docs/decisions/ADR-H9-mailbox.md +0 -231
  128. package/src/bollharness/docs/decisions/PLAN-H1-crystal-learn-revival.md +0 -270
  129. package/src/bollharness/docs/decisions/PLAN-H2-identity-isolation.md +0 -291
  130. package/src/bollharness/docs/decisions/PLAN-H3-memory-scope.md +0 -228
  131. package/src/bollharness/docs/decisions/PLAN-H4-prompt-governance.md +0 -227
  132. package/src/bollharness/docs/decisions/PLAN-H5-gate-quantization.md +0 -239
  133. package/src/bollharness/docs/decisions/PLAN-H6-state-file-health.md +0 -325
  134. package/src/bollharness/docs/decisions/PLAN-H8-issue-and-doc-compliance.md +0 -242
  135. package/src/bollharness/docs/decisions/PLAN-H9-mailbox.md +0 -378
  136. package/src/bollharness/docs/launch-article-en.md +0 -276
  137. package/src/bollharness/docs/launch-article-zh.md +0 -305
  138. package/src/bollharness/docs/practice.html +0 -356
  139. package/src/bollharness/docs/practice.md +0 -82
  140. package/src/bollharness/docs/research/round-1/README.md +0 -11
  141. package/src/bollharness/docs/research/round-2/README.md +0 -11
  142. package/src/bollharness/docs/research/round-3/README.md +0 -11
  143. package/src/bollharness/docs/research/round-4/README.md +0 -11
  144. package/src/bollharness/docs/research/round-5/README.md +0 -11
  145. package/src/bollharness/docs/research/round-6/README.md +0 -11
  146. package/src/bollharness/package-lock.json +0 -48
  147. package/src/bollharness/reference/boll-reference/.claude/rules/backend-routes.md +0 -268
  148. package/src/bollharness/reference/boll-reference/.claude/rules/bridge.md +0 -20
  149. package/src/bollharness/reference/boll-reference/.claude/rules/closure-semantics.md +0 -30
  150. package/src/bollharness/reference/boll-reference/.claude/rules/coaching.md +0 -13
  151. package/src/bollharness/reference/boll-reference/.claude/rules/env-vars.md +0 -50
  152. package/src/bollharness/reference/boll-reference/.claude/rules/hackathon.md +0 -12
  153. package/src/bollharness/reference/boll-reference/.claude/rules/repo-structure.md +0 -184
  154. package/src/bollharness/reference/boll-reference/.claude/rules/review-agent-isolation.md +0 -112
  155. package/src/bollharness/reference/boll-reference/.claude/rules/scenes.md +0 -12
  156. package/src/bollharness/reference/boll-reference/.claude/skills/arch/SKILL.md +0 -551
  157. package/src/bollharness/reference/boll-reference/.claude/skills/boll-animation/SKILL.md +0 -26
  158. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/SKILL.md +0 -227
  159. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/agents/openai.yaml +0 -4
  160. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/references/bridge-failure-taxonomy.md +0 -142
  161. package/src/bollharness/reference/boll-reference/.claude/skills/boll-bridge/references/bridge-validation-ladder.md +0 -107
  162. package/src/bollharness/reference/boll-reference/.claude/skills/boll-crystal/SKILL.md +0 -893
  163. package/src/bollharness/reference/boll-reference/.claude/skills/boll-crystal-learn/SKILL.md +0 -89
  164. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev/SKILL.md +0 -93
  165. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev/examples/README.md +0 -209
  166. package/src/bollharness/reference/boll-reference/.claude/skills/boll-dev-handoff/SKILL.md +0 -165
  167. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng/SKILL.md +0 -110
  168. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-frontend/SKILL.md +0 -203
  169. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-hdc/SKILL.md +0 -27
  170. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-orchestrator/SKILL.md +0 -28
  171. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-prompt/SKILL.md +0 -27
  172. package/src/bollharness/reference/boll-reference/.claude/skills/boll-eng-test/SKILL.md +0 -79
  173. package/src/bollharness/reference/boll-reference/.claude/skills/boll-lab/SKILL.md +0 -372
  174. package/src/bollharness/reference/boll-reference/.claude/skills/boll-run/SKILL.md +0 -437
  175. package/src/bollharness/reference/boll-reference/.claude/skills/boll-ux-appstore/SKILL.md +0 -27
  176. package/src/bollharness/reference/boll-reference/.claude/skills/boll-voice/SKILL.md +0 -442
  177. package/src/bollharness/reference/boll-reference/.claude/skills/guardian-fixer/PROMPT.md +0 -421
  178. package/src/bollharness/reference/boll-reference/.claude/skills/guardian-fixer/SKILL.md +0 -326
  179. package/src/bollharness/reference/boll-reference/.claude/skills/lead/SKILL.md +0 -155
  180. package/src/bollharness/reference/boll-reference/.claude/skills/lead/ref-review-sop.md +0 -91
  181. package/src/bollharness/reference/boll-reference/.claude/skills/lead/ref-stages.md +0 -129
  182. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-preview.png +0 -0
  183. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v2.png +0 -0
  184. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v3.png +0 -0
  185. package/src/bollharness/reference/boll-reference/.claude/skills/nature-designer/output/skill-map-v4.png +0 -0
  186. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/SKILL.md +0 -425
  187. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/ref-three-checks.md +0 -62
  188. package/src/bollharness/reference/boll-reference/.claude/skills/plan-lock/ref-wp-templates.md +0 -78
  189. package/src/bollharness/reference/boll-reference/.claude/skills/task-arch/SKILL.md +0 -76
  190. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-graph/SKILL.md +0 -57
  191. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-graph/beads-graph.sh +0 -153
  192. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-init/SKILL.md +0 -52
  193. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-init/beads-auto-link.sh +0 -76
  194. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-sync/SKILL.md +0 -50
  195. package/src/bollharness/reference/boll-reference/.claude/skills/vibedevteam-sync/beads-sync-proj.sh +0 -108
  196. package/src/bollharness/reference/boll-reference/docs/architecture/AGENT-PROFILE.md +0 -151
  197. package/src/bollharness/reference/boll-reference/docs/architecture/COST-STRUCTURE.md +0 -56
  198. package/src/bollharness/reference/boll-reference/docs/architecture/INDEX.md +0 -76
  199. package/src/bollharness/reference/boll-reference/docs/architecture/MODULE1-INTENT-FIELD.md +0 -116
  200. package/src/bollharness/reference/boll-reference/docs/architecture/MODULE2-CRYSTALLIZATION.md +0 -200
  201. package/src/bollharness/reference/boll-reference/docs/architecture/PRINCIPLES.md +0 -84
  202. package/src/bollharness/reference/boll-reference/docs/architecture/PROTOCOL-CORE.md +0 -209
  203. package/src/bollharness/reference/boll-reference/docs/architecture/VISION.md +0 -181
  204. package/src/bollharness/reference/boll-reference/docs/architecture/discussions/D-01-MARKET-SCENE-PROTOCOL.md +0 -754
  205. package/src/bollharness/reference/boll-reference/scripts/hooks/stop-evaluator.md +0 -57
  206. package/src/bollharness/scripts/context-fragments/artifact-linkage.md +0 -14
  207. package/src/bollharness/scripts/context-fragments/auth-consumers.md +0 -17
  208. package/src/bollharness/scripts/context-fragments/bridge-constitution.md +0 -13
  209. package/src/bollharness/scripts/context-fragments/catalyst-distributed.md +0 -18
  210. package/src/bollharness/scripts/context-fragments/closure-checklist.md +0 -13
  211. package/src/bollharness/scripts/context-fragments/contract-consumers.md +0 -15
  212. package/src/bollharness/scripts/context-fragments/db-shared-structures.md +0 -15
  213. package/src/bollharness/scripts/context-fragments/fixed-three-layers.md +0 -19
  214. package/src/bollharness/scripts/context-fragments/general-dev-principles.md +0 -11
  215. package/src/bollharness/scripts/context-fragments/issue-first.md +0 -8
  216. package/src/bollharness/scripts/context-fragments/mcp-parity.md +0 -16
  217. package/src/bollharness/scripts/context-fragments/pi-agent-operations.md +0 -74
  218. package/src/bollharness/scripts/context-fragments/protocol-consumers.md +0 -15
  219. package/src/bollharness/scripts/context-fragments/run-events-consumers.md +0 -15
  220. package/src/bollharness/scripts/context-fragments/scene-fidelity.md +0 -13
  221. package/src/bollharness/scripts/context-fragments/truth-source-hierarchy.md +0 -15
  222. package/src/bollharness/scripts/context-fragments/two-language.md +0 -15
  223. package/src/bollharness/scripts/context-fragments/version-sources.md +0 -14
  224. package/src/bollharness/scripts/hooks/stop-evaluator.md +0 -83
  225. package/src/bollharness/templates/scaffold/.boll/guard/.gitkeep +0 -0
  226. package/src/bollharness/templates/scaffold/.boll/metrics/.gitkeep +0 -0
  227. package/src/bollharness/templates/scaffold/.boll/state/.gitkeep +0 -0
  228. package/src/bollharness/templates/scaffold/CLAUDE.md +0 -89
  229. package/src/bollharness/templates/scaffold/docs/INDEX.md +0 -3
  230. package/src/bollharness/templates/scaffold/docs/decisions/ADR_TEMPLATE.md +0 -38
  231. package/src/bollharness/templates/scaffold/docs/decisions/PLAN_TEMPLATE.md +0 -45
  232. package/src/bollharness/templates/scaffold/docs/decisions/tasks/.gitkeep +0 -2
  233. package/src/bollharness/templates/scaffold/docs/issues/.gitkeep +0 -0
  234. package/src/bollharness/templates/scaffold/docs/issues/GUARD_ISSUE_TEMPLATE.md +0 -35
  235. package/src/bollharness/templates/scaffold/docs/issues/ISSUE_TEMPLATE.md +0 -51
  236. package/src/bollharness/tsconfig.json +0 -26
  237. package/src/constraint-runtime/package-lock.json +0 -48
  238. package/src/constraint-runtime/package.json +0 -34
  239. package/src/constraint-runtime/src/_archive_helper.ts +0 -16
  240. package/src/constraint-runtime/src/agent/coordinator.ts +0 -71
  241. package/src/constraint-runtime/src/agent/index.ts +0 -1
  242. package/src/constraint-runtime/src/assistant/index.ts +0 -15
  243. package/src/constraint-runtime/src/bootstrap/index.ts +0 -15
  244. package/src/constraint-runtime/src/bootstrap_graph.ts +0 -17
  245. package/src/constraint-runtime/src/bridge/index.ts +0 -15
  246. package/src/constraint-runtime/src/buddy/index.ts +0 -15
  247. package/src/constraint-runtime/src/cli/index.ts +0 -15
  248. package/src/constraint-runtime/src/command_graph.ts +0 -20
  249. package/src/constraint-runtime/src/commands.ts +0 -83
  250. package/src/constraint-runtime/src/components/index.ts +0 -15
  251. package/src/constraint-runtime/src/constants/index.ts +0 -15
  252. package/src/constraint-runtime/src/constraint/budget.ts +0 -25
  253. package/src/constraint-runtime/src/constraint/index.ts +0 -3
  254. package/src/constraint-runtime/src/constraint/permission.ts +0 -28
  255. package/src/constraint-runtime/src/context.ts +0 -45
  256. package/src/constraint-runtime/src/coordinator/index.ts +0 -15
  257. package/src/constraint-runtime/src/cost_hook.ts +0 -6
  258. package/src/constraint-runtime/src/cost_tracker.ts +0 -9
  259. package/src/constraint-runtime/src/deferred_init.ts +0 -18
  260. package/src/constraint-runtime/src/direct_modes.ts +0 -13
  261. package/src/constraint-runtime/src/dynamic-tool-loader.ts +0 -115
  262. package/src/constraint-runtime/src/entrypoints/index.ts +0 -15
  263. package/src/constraint-runtime/src/execution_registry.ts +0 -41
  264. package/src/constraint-runtime/src/history.ts +0 -16
  265. package/src/constraint-runtime/src/hooks/index.ts +0 -15
  266. package/src/constraint-runtime/src/index.ts +0 -28
  267. package/src/constraint-runtime/src/ink.ts +0 -4
  268. package/src/constraint-runtime/src/keybindings/index.ts +0 -15
  269. package/src/constraint-runtime/src/memdir/index.ts +0 -15
  270. package/src/constraint-runtime/src/migrations/index.ts +0 -15
  271. package/src/constraint-runtime/src/models.ts +0 -49
  272. package/src/constraint-runtime/src/moreright/index.ts +0 -15
  273. package/src/constraint-runtime/src/native_ts/index.ts +0 -15
  274. package/src/constraint-runtime/src/output_styles/index.ts +0 -15
  275. package/src/constraint-runtime/src/parity_audit.ts +0 -23
  276. package/src/constraint-runtime/src/plugins/index.ts +0 -15
  277. package/src/constraint-runtime/src/port_manifest.ts +0 -20
  278. package/src/constraint-runtime/src/prefetch.ts +0 -17
  279. package/src/constraint-runtime/src/query.ts +0 -7
  280. package/src/constraint-runtime/src/reference_data/archive_surface_snapshot.json +0 -63
  281. package/src/constraint-runtime/src/reference_data/commands_snapshot.json +0 -1037
  282. package/src/constraint-runtime/src/reference_data/subsystems/OpenCLI.json +0 -10
  283. package/src/constraint-runtime/src/reference_data/subsystems/PolymarketSDK.json +0 -12
  284. package/src/constraint-runtime/src/reference_data/subsystems/SafeSDK.json +0 -14
  285. package/src/constraint-runtime/src/reference_data/subsystems/assistant.json +0 -8
  286. package/src/constraint-runtime/src/reference_data/subsystems/bootstrap.json +0 -8
  287. package/src/constraint-runtime/src/reference_data/subsystems/bridge.json +0 -32
  288. package/src/constraint-runtime/src/reference_data/subsystems/buddy.json +0 -13
  289. package/src/constraint-runtime/src/reference_data/subsystems/cli.json +0 -26
  290. package/src/constraint-runtime/src/reference_data/subsystems/components.json +0 -32
  291. package/src/constraint-runtime/src/reference_data/subsystems/constants.json +0 -28
  292. package/src/constraint-runtime/src/reference_data/subsystems/coordinator.json +0 -8
  293. package/src/constraint-runtime/src/reference_data/subsystems/entrypoints.json +0 -15
  294. package/src/constraint-runtime/src/reference_data/subsystems/hooks.json +0 -32
  295. package/src/constraint-runtime/src/reference_data/subsystems/keybindings.json +0 -21
  296. package/src/constraint-runtime/src/reference_data/subsystems/memdir.json +0 -15
  297. package/src/constraint-runtime/src/reference_data/subsystems/migrations.json +0 -18
  298. package/src/constraint-runtime/src/reference_data/subsystems/moreright.json +0 -8
  299. package/src/constraint-runtime/src/reference_data/subsystems/native_ts.json +0 -11
  300. package/src/constraint-runtime/src/reference_data/subsystems/outputStyles.json +0 -8
  301. package/src/constraint-runtime/src/reference_data/subsystems/plugins.json +0 -9
  302. package/src/constraint-runtime/src/reference_data/subsystems/remote.json +0 -11
  303. package/src/constraint-runtime/src/reference_data/subsystems/schemas.json +0 -8
  304. package/src/constraint-runtime/src/reference_data/subsystems/screens.json +0 -10
  305. package/src/constraint-runtime/src/reference_data/subsystems/server.json +0 -10
  306. package/src/constraint-runtime/src/reference_data/subsystems/services.json +0 -32
  307. package/src/constraint-runtime/src/reference_data/subsystems/skills.json +0 -27
  308. package/src/constraint-runtime/src/reference_data/subsystems/state.json +0 -13
  309. package/src/constraint-runtime/src/reference_data/subsystems/types.json +0 -18
  310. package/src/constraint-runtime/src/reference_data/subsystems/upstreamproxy.json +0 -9
  311. package/src/constraint-runtime/src/reference_data/subsystems/utils.json +0 -32
  312. package/src/constraint-runtime/src/reference_data/subsystems/vim.json +0 -12
  313. package/src/constraint-runtime/src/reference_data/subsystems/voice.json +0 -8
  314. package/src/constraint-runtime/src/reference_data/tools_snapshot.json +0 -1042
  315. package/src/constraint-runtime/src/remote/index.ts +0 -15
  316. package/src/constraint-runtime/src/remote_runtime.ts +0 -17
  317. package/src/constraint-runtime/src/runtime/index.ts +0 -1
  318. package/src/constraint-runtime/src/runtime/session.ts +0 -42
  319. package/src/constraint-runtime/src/schemas/index.ts +0 -15
  320. package/src/constraint-runtime/src/screens/index.ts +0 -15
  321. package/src/constraint-runtime/src/server/index.ts +0 -15
  322. package/src/constraint-runtime/src/services/index.ts +0 -15
  323. package/src/constraint-runtime/src/session_store.ts +0 -32
  324. package/src/constraint-runtime/src/setup.ts +0 -50
  325. package/src/constraint-runtime/src/skills/index.ts +0 -1
  326. package/src/constraint-runtime/src/skills/skill-registry.ts +0 -40
  327. package/src/constraint-runtime/src/state/index.ts +0 -15
  328. package/src/constraint-runtime/src/system_init.ts +0 -21
  329. package/src/constraint-runtime/src/thinking/engine.ts +0 -61
  330. package/src/constraint-runtime/src/thinking/index.ts +0 -1
  331. package/src/constraint-runtime/src/tool_pool.ts +0 -20
  332. package/src/constraint-runtime/src/tools/OpenCLI/execAdapter.ts +0 -12
  333. package/src/constraint-runtime/src/tools/OpenCLI/listAdapters.ts +0 -12
  334. package/src/constraint-runtime/src/tools/OpenCLI/runCommand.ts +0 -13
  335. package/src/constraint-runtime/src/tools/PolymarketSDK/cancelOrder.ts +0 -10
  336. package/src/constraint-runtime/src/tools/PolymarketSDK/createOrder.ts +0 -13
  337. package/src/constraint-runtime/src/tools/PolymarketSDK/getMarket.ts +0 -14
  338. package/src/constraint-runtime/src/tools/PolymarketSDK/getOrders.ts +0 -10
  339. package/src/constraint-runtime/src/tools/PolymarketSDK/listMarkets.ts +0 -24
  340. package/src/constraint-runtime/src/tools/SafeSDK/confirmTransaction.ts +0 -13
  341. package/src/constraint-runtime/src/tools/SafeSDK/createTransaction.ts +0 -23
  342. package/src/constraint-runtime/src/tools/SafeSDK/deploySafe.ts +0 -12
  343. package/src/constraint-runtime/src/tools/SafeSDK/executeTransaction.ts +0 -12
  344. package/src/constraint-runtime/src/tools/SafeSDK/getBalance.ts +0 -10
  345. package/src/constraint-runtime/src/tools/SafeSDK/getPendingTransactions.ts +0 -10
  346. package/src/constraint-runtime/src/tools/SafeSDK/proposeTransaction.ts +0 -14
  347. package/src/constraint-runtime/src/tools/WalletTools/autoPay.ts +0 -58
  348. package/src/constraint-runtime/src/tools/WalletTools/createWallet.ts +0 -19
  349. package/src/constraint-runtime/src/tools/WalletTools/getBalance.ts +0 -28
  350. package/src/constraint-runtime/src/tools/WalletTools/importWallet.ts +0 -34
  351. package/src/constraint-runtime/src/tools/WalletTools/sendTransaction.ts +0 -50
  352. package/src/constraint-runtime/src/tools/WalletTools/signMessage.ts +0 -23
  353. package/src/constraint-runtime/src/tools/WalletTools/transferToken.ts +0 -49
  354. package/src/constraint-runtime/src/tools.ts +0 -100
  355. package/src/constraint-runtime/src/transcript.ts +0 -23
  356. package/src/constraint-runtime/src/types/index.ts +0 -15
  357. package/src/constraint-runtime/src/upstream_proxy/index.ts +0 -15
  358. package/src/constraint-runtime/src/utils/index.ts +0 -15
  359. package/src/constraint-runtime/src/vim/index.ts +0 -15
  360. package/src/constraint-runtime/src/voice/index.ts +0 -15
  361. package/src/constraint-runtime/tests/agent.test.ts +0 -20
  362. package/src/constraint-runtime/tests/constraint.test.ts +0 -47
  363. package/src/constraint-runtime/tests/skill.test.ts +0 -23
  364. package/src/constraint-runtime/tests/thinking.test.ts +0 -28
  365. package/src/constraint-runtime/tsconfig.json +0 -13
  366. package/src/pi-ecosystem/index.ts +0 -453
  367. package/src/pi-ecosystem-colony/index.ts +0 -482
  368. package/src/pi-ecosystem-goals/index.ts +0 -585
  369. package/src/pi-ecosystem-judgment/decision.ts +0 -431
  370. package/src/pi-ecosystem-judgment/distillation.ts +0 -398
  371. package/src/pi-ecosystem-judgment/human-value-store.ts +0 -580
  372. package/src/pi-ecosystem-judgment/index.ts +0 -678
  373. package/src/pi-ecosystem-judgment/value-injection.ts +0 -744
  374. package/src/pi-ecosystem-mcp/index.ts +0 -427
  375. package/src/pi-ecosystem-subagents/index.ts +0 -408
  376. package/src/test/ai-judgment-test.ts +0 -92
  377. package/src/test/bollharness-integration.test.ts +0 -398
  378. package/src/test/channel-agent-multi-dialogue.ts +0 -265
  379. package/src/test/channel-heartbeat-agent-test.ts +0 -244
  380. package/src/test/constraint-layer.test.ts +0 -191
  381. package/src/test/diap-identity-test.ts +0 -222
  382. package/src/test/diap-quick-test.ts +0 -73
  383. package/src/test/global-shared-context.test.ts +0 -393
  384. package/src/test/harness-judgment-injection.test.ts +0 -353
  385. package/src/test/harness-workflow-integrator-test.ts +0 -285
  386. package/src/test/human-value-store.test.ts +0 -316
  387. package/src/test/hybrid-integration-test.ts +0 -126
  388. package/src/test/hybrid-messenger-verify.ts +0 -68
  389. package/src/test/iroh-bistream-debug.ts +0 -50
  390. package/src/test/iroh-communication.test.ts +0 -81
  391. package/src/test/iroh-debug-test.ts +0 -69
  392. package/src/test/iroh-diap-test.ts +0 -90
  393. package/src/test/iroh-direct-connect.ts +0 -65
  394. package/src/test/iroh-e2e-fixed.ts +0 -106
  395. package/src/test/iroh-e2e-same-process.ts +0 -83
  396. package/src/test/iroh-e2e.ts +0 -83
  397. package/src/test/iroh-final-e2e.ts +0 -84
  398. package/src/test/iroh-relay-test.ts +0 -46
  399. package/src/test/iroh-simple-test.ts +0 -49
  400. package/src/test/iroh-transport-verify.ts +0 -60
  401. package/src/test/iroh-transport.test.ts +0 -47
  402. package/src/test/iroh-two-nodes.ts +0 -87
  403. package/src/test/iroh-verify.ts +0 -55
  404. package/src/test/judgment-decision.test.ts +0 -373
  405. package/src/test/llm-judgment-integration.test.ts +0 -257
  406. package/src/test/p2p-agent-complex-dialogue.ts +0 -490
  407. package/src/test/p2p-agent-dialogue.ts +0 -423
  408. package/src/test/p2p-agent-full-bidirectional.ts +0 -686
  409. package/src/test/p2p-agent-harness-flow.ts +0 -562
  410. package/src/test/p2p-agent-harness-single.ts +0 -175
  411. package/src/test/p2p-ai-dialogue-test.ts +0 -374
  412. package/src/test/p2p-cid-connect-test.ts +0 -245
  413. package/src/test/p2p-connect-receiver.ts +0 -85
  414. package/src/test/p2p-iroh-test.ts +0 -214
  415. package/src/test/p2p-minimal-test.ts +0 -264
  416. package/src/test/p2p-node-1.ts +0 -172
  417. package/src/test/p2p-node-2.ts +0 -172
  418. package/src/test/p2p-server.ts +0 -335
  419. package/src/test/p2p-two-nodes-test.ts +0 -542
  420. package/src/test/pi-sdk.test.ts +0 -47
  421. package/src/test/set-persona.ts +0 -56
  422. package/src/test/simple.test.ts +0 -11
  423. package/src/test/storage-integration.test.ts +0 -191
  424. package/src/test/subagent-manager.test.ts +0 -392
  425. package/src/test/test-gate-flow.test.ts +0 -92
  426. package/src/test/workflow-engine.test.ts +0 -101
  427. package/src/web/design.md +0 -99
  428. package/src/workflows/collaboration.ts +0 -455
  429. package/src/workflows/index.ts +0 -64
  430. package/vitest.config.ts +0 -12
  431. package//346/203/263/346/263/225.md +0 -79
@@ -1,372 +0,0 @@
1
- ---
2
- name: boll-lab
3
- description: 流形实验科学家。为协议层设计决策提供严谨的实验验证——样本设计、偏差控制、统计检验、可复现报告。不只是"跑测试",是"用可被挑战的证据证明协议的价值"。
4
- status: active
5
- tier: domain
6
- owner: nature
7
- last_audited: 2026-03-21
8
- triggers:
9
- - 实验设计
10
- - 证据化验证
11
- - 协议效果评估
12
- outputs:
13
- - 实验设计建议
14
- - 证据要求
15
- truth_policy:
16
- - 实验事实以当前数据、代码和实验记录为准
17
- - 不在 skill 中复制易漂移的运行态数字
18
- ---
19
-
20
- # 流形实验科学家
21
-
22
- ## 我是谁
23
-
24
- 我是流形网络的实验科学家。
25
-
26
- 我不是测试工程师(那是 `boll-eng-test` 的工作——验证代码是否正确实现了设计)。
27
- 我做的是**科学实验**——用严谨的方法论证明协议层设计决策的有效性。
28
-
29
- 区别:
30
- - 测试:"deposit 后 match 能找到"→ 代码正确性
31
- - 实验:"在 447 个真实 Agent 上,mpnet-768d 的 L3 互补匹配命中率为 40%±5%,p<0.05"→ 设计有效性
32
-
33
- 我的产出给三种人看:
34
- 1. **我们自己**:这个设计方向对不对,该不该继续投入
35
- 2. **投资人**:系统达到了什么商业效果,泛化程度多少,成本多少
36
- 3. **学术界**:实验可复现、可挑战、统计上站得住
37
-
38
- ### 核心信念
39
-
40
- **实验是桥梁**:架构是直觉和理论,实验是直觉到证据的桥梁。没有实验支撑的架构决策是信仰。
41
-
42
- **简单假设,严格验证**:假设可以大胆("零 LLM 匹配管道可行"),验证必须严格(配对设计、控制变量、统计显著性)。
43
-
44
- **偏差是实验的头号敌人**:
45
- - 结构性偏差:样本不代表真实分布(全是技术人,没有设计师)
46
- - 观测偏差:知道要验证什么就故意生成好通过的样本
47
- - 幸存者偏差:只展示成功的实验,隐藏失败的
48
- - 确认偏差:只设计能证实假说的实验,不设计能证伪的
49
-
50
- **负面结果也是结果**:如果实验证明某个方向不行——这本身就是有价值的知识。记录下来,解释为什么,指向下一步。
51
-
52
- ---
53
-
54
- ## 实验设计方法论
55
-
56
- ### 第一步:定义假说(What are we testing?)
57
-
58
- 每个实验必须有明确的、可证伪的假说。
59
-
60
- **好的假说**:
61
- ```
62
- H1: BGE-M3-1024d 在 L1-L4 四级难度上的命中率 ≥ mpnet-768d
63
- H0: 两者无显著差异(alpha=0.05)
64
- ```
65
-
66
- **坏的假说**:
67
- ```
68
- "BGE-M3 应该更好" ← 不可证伪
69
- "换个模型试试" ← 没有假说
70
- ```
71
-
72
- ### 第二步:设计实验(How do we test it?)
73
-
74
- #### 配对设计(Paired Design)
75
-
76
- **核心原则**:基线和变体必须在完全相同的条件下运行。
77
-
78
- ```
79
- ✅ 配对设计:
80
- - 同一组查询
81
- - 同一组 Agent Profile
82
- - 同一随机种子
83
- - 唯一变量:编码器
84
-
85
- ❌ 非配对设计:
86
- - 基线跑了 20 条查询,变体跑了另外 20 条 ← 不可比
87
- - 基线用旧数据,变体用新数据 ← 混杂变量
88
- ```
89
-
90
- #### 控制变量
91
-
92
- 每次实验只改变一个变量。如果同时换了编码器和二值化方案,不知道改善来自哪个。
93
-
94
- ```
95
- 实验 1: mpnet + SimHash vs BGE-M3 + SimHash ← 只换编码器
96
- 实验 2: BGE-M3 + SimHash vs BGE-M3 + MRL+BQL ← 只换二值化
97
- 实验 3: (如果两者都有改善) mpnet + SimHash vs BGE-M3 + MRL+BQL ← 组合对比
98
- ```
99
-
100
- #### 多种子运行
101
-
102
- 单次运行不可靠。至少 3 个种子,报告均值 ± 标准误。
103
-
104
- ```python
105
- seeds = [42, 123, 456]
106
- results = []
107
- for seed in seeds:
108
- set_all_seeds(seed)
109
- result = run_experiment(config)
110
- results.append(result)
111
- report_mean_stderr(results)
112
- ```
113
-
114
- ### 第三步:样本设计(What data do we use?)
115
-
116
- #### 样本代表性
117
-
118
- 测试样本必须代表真实使用场景的分布。
119
-
120
- **当前状态**:
121
- - 447 个 Agent Profile(4 场景,中文为主)
122
- - 20 条测试查询(L1×5, L2×5, L3×5, L4×5)
123
-
124
- **样本扩展策略**(按优先级):
125
-
126
- 1. **LLM 释义扩增**:用 LLM 将 20 条查询各改写 5 种表述 → 100 条
127
- - 保留原始 20 条作为金标准
128
- - 释义版本用于统计效力,不替代金标准
129
- - 释义时必须保持语义等价,不能偷偷改变难度
130
-
131
- 2. **对抗样本**:设计专门的反例
132
- - 看起来相关但实际不相关的查询
133
- - 词汇重叠但语义不同的查询("苹果公司"vs"苹果水果")
134
- - 极端模糊的查询("帮帮我"、"有人吗")
135
-
136
- 3. **真人数据**:收集真实用户的查询
137
- - 优先级最高但当前不可得
138
- - 一旦有真人数据,立即补充到测试集
139
-
140
- #### 偏差防护
141
-
142
- | 偏差类型 | 防护措施 |
143
- |---------|---------|
144
- | 结构性偏差 | 样本分布必须记录并公开(多少技术/设计/跨界) |
145
- | 观测偏差 | 样本设计者和实验评估者分离(或自动化评估) |
146
- | 选择偏差 | 不能挑选"好看的"结果,所有运行都记录 |
147
- | 生态效度 | 样本要包含真实数据中会出现的噪声(短文本、错别字、混合语言) |
148
-
149
- ### 第四步:评估指标(How do we measure?)
150
-
151
- #### 当前指标体系
152
-
153
- ```
154
- Level Pass Rate: 每个难度级别的通过率
155
- - L1 pass: Top-10 命中 ≥ min_hits 的查询占比
156
- - L2 pass: 同上
157
- - L3 pass: 同上
158
- - L4 pass: 同上
159
-
160
- Hit Rate: 总命中数 / 总期望命中数
161
- - 跨所有查询的 expected_hits 命中率
162
-
163
- Precision@K: Top-K 中相关结果的比例
164
- nDCG@K: 考虑排序位置的相关性度量
165
-
166
- 耗时: 匹配一次需要多长时间(<1ms 目标)
167
- 存储: 每个 Intent 的存储开销(bytes)
168
- ```
169
-
170
- #### 三种关系分别评估(ADR-012 之后)
171
-
172
- ```
173
- 共振 (Resonance): 标准 Hit Rate / nDCG@K
174
- 互补 (Complement): 需求→能力 方向的 Hit Rate
175
- 干涉 (Interfere): 跨域关联的 Recall@K
176
- 聚合: 加权综合分
177
- ```
178
-
179
- ### 第五步:统计检验(Is the difference real?)
180
-
181
- #### 小样本方法(N=20-100 查询)
182
-
183
- **配对 Bootstrap 置信区间**(BCa 方法):
184
-
185
- ```python
186
- def paired_bootstrap_ci(baseline_scores, variant_scores, n_bootstrap=10000, alpha=0.05):
187
- """配对 bootstrap 置信区间。
188
-
189
- 输入两组配对的分数(同一查询在两个系统上的表现),
190
- 返回差异的置信区间。如果 CI 不包含 0,则差异显著。
191
- """
192
- deltas = variant_scores - baseline_scores
193
- boot_means = []
194
- for _ in range(n_bootstrap):
195
- sample = np.random.choice(deltas, size=len(deltas), replace=True)
196
- boot_means.append(np.mean(sample))
197
- lower = np.percentile(boot_means, 100 * alpha / 2)
198
- upper = np.percentile(boot_means, 100 * (1 - alpha / 2))
199
- return np.mean(deltas), lower, upper
200
- ```
201
-
202
- #### 报告格式
203
-
204
- 始终报告 **delta(差异值)**,不只是绝对值:
205
-
206
- ```
207
- ❌ "BGE-M3 命中率 80%,mpnet 命中率 75%"
208
- ✅ "BGE-M3 比 mpnet 高 5.0%,95% CI [1.2%, 8.8%],p=0.01"
209
- ```
210
-
211
- ### 第六步:报告与沉淀(What did we learn?)
212
-
213
- #### 实验报告模板
214
-
215
- ```markdown
216
- # 实验 EXP-XXX: [标题]
217
-
218
- **日期**: YYYY-MM-DD
219
- **假说**: H1: ...
220
- **结论**: [支持/拒绝/不确定] H1
221
-
222
- ## 实验设计
223
- - 变量: [什么变了]
224
- - 控制: [什么没变]
225
- - 样本: [N 条查询, M 个 Agent, 种子 42/123/456]
226
-
227
- ## 结果
228
-
229
- | 指标 | 基线 | 变体 | Delta | 95% CI | p-value |
230
- |------|------|------|-------|--------|---------|
231
-
232
- ## 分析
233
- [为什么是这个结果?哪些查询变好了?哪些变差了?]
234
-
235
- ## 对架构的影响
236
- [这个结果意味着什么?下一步应该做什么?]
237
-
238
- ## 可复现信息
239
- - 种子: [42, 123, 456]
240
- - 代码: [commit hash]
241
- - 数据: [文件路径]
242
- - 运行命令: [exact command]
243
- ```
244
-
245
- #### 设计日志积累
246
-
247
- 每个实验都是论文素材。记录:
248
- - 为什么做这个实验(动机)
249
- - 我们预期什么结果(假说)
250
- - 实际结果是什么
251
- - 我们学到了什么
252
- - 这如何影响了后续决策
253
-
254
- ---
255
-
256
- ## 已知失败模式(来自 MLAgentBench 研究)
257
-
258
- | 失败模式 | 描述 | 防护措施 |
259
- |---------|------|---------|
260
- | 幻觉改进 | 声称性能提升但未执行代码 | **强制执行后才能报告**:结果必须来自实际运行 |
261
- | 规格敏感 | 问题描述不明确导致评估错误 | **显式定义评估文件和指标**:不能"看着差不多" |
262
- | 静默失败 | try-except 吞掉错误 | **禁用静默异常处理**:错误必须暴露 |
263
- | 选择保守 | 只测最安全的配置 | **明确要求探索多种方案**:包括预期会失败的 |
264
- | 确认偏差 | 只展示支持假说的数据 | **所有运行都记录**:失败的实验也是数据 |
265
- | 过拟合评估 | 在测试集上反复调参 | **预留验证集**:调参用训练集,最终报告用测试集 |
266
-
267
- ---
268
-
269
- ## 实验基础设施
270
-
271
- ### 现有资产
272
-
273
- ```
274
- tests/field_poc/
275
- ├── test_queries.py — 20 条查询(L1-L4),447 个 Agent 覆盖
276
- ├── field_poc.py — Profile 加载工具
277
- ├── hdc.py — SimHash/Hamming/cosine 实现
278
- ├── comparison_poc.py — Phase 1: 4策略×2相似度对比
279
- ├── clarification-session_poc.py — Phase 2: LLM clarification-session 对比
280
- ├── phase3_multi_intent_poc.py — Phase 3: 多 Intent per Agent
281
- ├── encoder_comparison_poc.py — Phase 4: 3模型×4chunk_size
282
- └── test_profiles.py — Phase 2 模拟用户画像
283
- ```
284
-
285
- ### 实验配置管理
286
-
287
- 每次实验用 JSON 配置文件记录完整配置:
288
-
289
- ```json
290
- {
291
- "experiment_id": "EXP-005",
292
- "hypothesis": "BGE-M3-1024d L3 命中率 ≥ mpnet-768d",
293
- "date": "2026-02-17",
294
- "variables": {
295
- "encoder": "BAAI/bge-m3",
296
- "dimension": 1024,
297
- "projector": "simhash",
298
- "proj_dimension": 10000,
299
- "chunk_size": 256
300
- },
301
- "baseline": {
302
- "encoder": "paraphrase-multilingual-mpnet-base-v2",
303
- "dimension": 768
304
- },
305
- "seeds": [42, 123, 456],
306
- "queries": "tests/field_poc/test_queries.py",
307
- "agents": "447 profiles (hackathon/skill_exchange/recruitment/matchmaking)"
308
- }
309
- ```
310
-
311
- ### 结果存储
312
-
313
- ```
314
- tests/field_poc/results/
315
- ├── EXP-001_baseline.json — 每次实验的完整结果
316
- ├── EXP-002_bge_m3.json
317
- ├── ...
318
- └── summary.md — 所有实验的汇总对比表
319
- ```
320
-
321
- ---
322
-
323
- ## 与其他 Skill 的协作
324
-
325
- | 我需要什么 | 谁提供 |
326
- |-----------|--------|
327
- | 编码器实现 | `arch` 冻结方向后由 `boll-dev` 落实现,或直接看 `encoder.py` |
328
- | 测试查询设计 | 我自己设计,`arch` 审查语义覆盖 |
329
- | 代码正确性 | `boll-eng-test` 保障 |
330
- | 架构决策输入 | `arch` 告诉我要验证什么假说 |
331
- | 统计方法 | 我自己负责(研究 002 已调研) |
332
-
333
- | 我产出什么 | 谁消费 |
334
- |-----------|--------|
335
- | 实验报告 | `arch` 做架构决策的证据 |
336
- | 性能数据 | 投资人材料、论文素材 |
337
- | 失败案例 | `arch` 识别需要改进的方向 |
338
- | 设计日志 | 论文积累 |
339
-
340
- ---
341
-
342
- ## 当前实验队列
343
-
344
- 按 ADR-012 执行顺序:
345
-
346
- ```
347
- EXP-005: BGE-M3 vs mpnet 编码器对比
348
- 假说: BGE-M3-1024d 在 L1-L4 命中率 ≥ mpnet-768d
349
- 前置: 无(可立即运行)
350
-
351
- EXP-006: MRL+BQL vs SimHash 二值化对比
352
- 假说: MRL 512-bit 保留 ≥90% mpnet 原始精度
353
- 前置: EXP-005 确定编码器后
354
-
355
- EXP-007: 多视角查询生成效果
356
- 假说: LLM 生成互补视角后 L3 命中率 ≥ 基线 +20%
357
- 前置: multi-perspective-clarification-session Skill 完成
358
-
359
- EXP-008: 组合效果
360
- 假说: 新编码器 + 新二值化 + 多视角查询 的综合效果
361
- 前置: EXP-005/006/007 完成
362
- ```
363
-
364
- ---
365
-
366
- ## 我不做什么
367
-
368
- - 不写业务代码(编码器实现、API 开发等)
369
- - 不做架构设计(那是 `arch` 的工作)
370
- - 不做代码测试(那是 `boll-eng-test` 的工作)
371
- - 不追求发论文(论文是副产品,不是目标)
372
- - 不过度工程化(Hydra/W&B/MLflow 等在团队扩大后才需要)