@bastani/atomic 0.6.8 → 0.7.0-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (765) hide show
  1. package/bin/atomic +65 -0
  2. package/package.json +17 -82
  3. package/postinstall.mjs +47 -0
  4. package/.agents/skills/ado-commit/SKILL.md +0 -94
  5. package/.agents/skills/ado-create-pr/SKILL.md +0 -211
  6. package/.agents/skills/advanced-evaluation/SKILL.md +0 -404
  7. package/.agents/skills/advanced-evaluation/references/bias-mitigation.md +0 -288
  8. package/.agents/skills/advanced-evaluation/references/evaluation-pipeline.md +0 -43
  9. package/.agents/skills/advanced-evaluation/references/implementation-patterns.md +0 -315
  10. package/.agents/skills/advanced-evaluation/references/metrics-guide.md +0 -331
  11. package/.agents/skills/advanced-evaluation/scripts/evaluation_example.py +0 -392
  12. package/.agents/skills/ast-grep/SKILL.md +0 -325
  13. package/.agents/skills/ast-grep/references/rule_reference.md +0 -297
  14. package/.agents/skills/bdi-mental-states/SKILL.md +0 -313
  15. package/.agents/skills/bdi-mental-states/references/bdi-ontology-core.md +0 -207
  16. package/.agents/skills/bdi-mental-states/references/framework-integration.md +0 -582
  17. package/.agents/skills/bdi-mental-states/references/rdf-examples.md +0 -315
  18. package/.agents/skills/bdi-mental-states/references/sparql-competency.md +0 -420
  19. package/.agents/skills/bun/SKILL.md +0 -233
  20. package/.agents/skills/context-compression/SKILL.md +0 -274
  21. package/.agents/skills/context-compression/references/evaluation-framework.md +0 -213
  22. package/.agents/skills/context-compression/scripts/compression_evaluator.py +0 -862
  23. package/.agents/skills/context-compression/tests/test_compression_evaluator.py +0 -56
  24. package/.agents/skills/context-degradation/SKILL.md +0 -208
  25. package/.agents/skills/context-degradation/references/patterns.md +0 -314
  26. package/.agents/skills/context-degradation/scripts/degradation_detector.py +0 -614
  27. package/.agents/skills/context-fundamentals/SKILL.md +0 -203
  28. package/.agents/skills/context-fundamentals/references/context-components.md +0 -283
  29. package/.agents/skills/context-fundamentals/scripts/context_manager.py +0 -533
  30. package/.agents/skills/context-optimization/SKILL.md +0 -197
  31. package/.agents/skills/context-optimization/references/optimization_techniques.md +0 -272
  32. package/.agents/skills/context-optimization/scripts/compaction.py +0 -562
  33. package/.agents/skills/create-spec/SKILL.md +0 -249
  34. package/.agents/skills/docx/LICENSE.txt +0 -30
  35. package/.agents/skills/docx/SKILL.md +0 -592
  36. package/.agents/skills/docx/scripts/__init__.py +0 -1
  37. package/.agents/skills/docx/scripts/accept_changes.py +0 -135
  38. package/.agents/skills/docx/scripts/comment.py +0 -318
  39. package/.agents/skills/docx/scripts/office/helpers/__init__.py +0 -0
  40. package/.agents/skills/docx/scripts/office/helpers/merge_runs.py +0 -199
  41. package/.agents/skills/docx/scripts/office/helpers/simplify_redlines.py +0 -197
  42. package/.agents/skills/docx/scripts/office/pack.py +0 -159
  43. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  44. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  45. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  46. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  47. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  48. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  49. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  50. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  51. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  52. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  53. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  54. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  55. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  56. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  57. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  58. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  59. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  60. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  61. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  62. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  63. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  64. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  65. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  66. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  67. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  68. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  69. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  70. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  71. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  72. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  73. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  74. package/.agents/skills/docx/scripts/office/schemas/mce/mc.xsd +0 -75
  75. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  76. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  77. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  78. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  79. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  80. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  81. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  82. package/.agents/skills/docx/scripts/office/soffice.py +0 -183
  83. package/.agents/skills/docx/scripts/office/unpack.py +0 -132
  84. package/.agents/skills/docx/scripts/office/validate.py +0 -111
  85. package/.agents/skills/docx/scripts/office/validators/__init__.py +0 -15
  86. package/.agents/skills/docx/scripts/office/validators/base.py +0 -847
  87. package/.agents/skills/docx/scripts/office/validators/docx.py +0 -446
  88. package/.agents/skills/docx/scripts/office/validators/pptx.py +0 -275
  89. package/.agents/skills/docx/scripts/office/validators/redlining.py +0 -247
  90. package/.agents/skills/docx/scripts/templates/comments.xml +0 -3
  91. package/.agents/skills/docx/scripts/templates/commentsExtended.xml +0 -3
  92. package/.agents/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
  93. package/.agents/skills/docx/scripts/templates/commentsIds.xml +0 -3
  94. package/.agents/skills/docx/scripts/templates/people.xml +0 -3
  95. package/.agents/skills/evaluation/SKILL.md +0 -253
  96. package/.agents/skills/evaluation/references/metrics.md +0 -339
  97. package/.agents/skills/evaluation/scripts/evaluator.py +0 -627
  98. package/.agents/skills/explain-code/SKILL.md +0 -232
  99. package/.agents/skills/filesystem-context/SKILL.md +0 -289
  100. package/.agents/skills/filesystem-context/references/implementation-patterns.md +0 -549
  101. package/.agents/skills/filesystem-context/scripts/filesystem_context.py +0 -425
  102. package/.agents/skills/find-skills/SKILL.md +0 -144
  103. package/.agents/skills/gh-commit/SKILL.md +0 -245
  104. package/.agents/skills/gh-create-pr/SKILL.md +0 -95
  105. package/.agents/skills/hosted-agents/SKILL.md +0 -262
  106. package/.agents/skills/hosted-agents/references/infrastructure-patterns.md +0 -700
  107. package/.agents/skills/hosted-agents/scripts/sandbox_manager.py +0 -590
  108. package/.agents/skills/impeccable/SKILL.md +0 -178
  109. package/.agents/skills/impeccable/agents/openai.yaml +0 -4
  110. package/.agents/skills/impeccable/reference/adapt.md +0 -190
  111. package/.agents/skills/impeccable/reference/animate.md +0 -175
  112. package/.agents/skills/impeccable/reference/audit.md +0 -134
  113. package/.agents/skills/impeccable/reference/bolder.md +0 -113
  114. package/.agents/skills/impeccable/reference/brand.md +0 -114
  115. package/.agents/skills/impeccable/reference/clarify.md +0 -174
  116. package/.agents/skills/impeccable/reference/cognitive-load.md +0 -106
  117. package/.agents/skills/impeccable/reference/color-and-contrast.md +0 -105
  118. package/.agents/skills/impeccable/reference/colorize.md +0 -154
  119. package/.agents/skills/impeccable/reference/craft.md +0 -193
  120. package/.agents/skills/impeccable/reference/critique.md +0 -213
  121. package/.agents/skills/impeccable/reference/delight.md +0 -302
  122. package/.agents/skills/impeccable/reference/distill.md +0 -111
  123. package/.agents/skills/impeccable/reference/document.md +0 -427
  124. package/.agents/skills/impeccable/reference/extract.md +0 -70
  125. package/.agents/skills/impeccable/reference/harden.md +0 -347
  126. package/.agents/skills/impeccable/reference/heuristics-scoring.md +0 -234
  127. package/.agents/skills/impeccable/reference/interaction-design.md +0 -195
  128. package/.agents/skills/impeccable/reference/layout.md +0 -141
  129. package/.agents/skills/impeccable/reference/live.md +0 -594
  130. package/.agents/skills/impeccable/reference/motion-design.md +0 -109
  131. package/.agents/skills/impeccable/reference/onboard.md +0 -234
  132. package/.agents/skills/impeccable/reference/optimize.md +0 -258
  133. package/.agents/skills/impeccable/reference/overdrive.md +0 -130
  134. package/.agents/skills/impeccable/reference/personas.md +0 -178
  135. package/.agents/skills/impeccable/reference/polish.md +0 -232
  136. package/.agents/skills/impeccable/reference/product.md +0 -62
  137. package/.agents/skills/impeccable/reference/quieter.md +0 -99
  138. package/.agents/skills/impeccable/reference/responsive-design.md +0 -114
  139. package/.agents/skills/impeccable/reference/shape.md +0 -151
  140. package/.agents/skills/impeccable/reference/spatial-design.md +0 -100
  141. package/.agents/skills/impeccable/reference/teach.md +0 -156
  142. package/.agents/skills/impeccable/reference/typeset.md +0 -124
  143. package/.agents/skills/impeccable/reference/typography.md +0 -159
  144. package/.agents/skills/impeccable/reference/ux-writing.md +0 -107
  145. package/.agents/skills/impeccable/scripts/cleanup-deprecated.mjs +0 -284
  146. package/.agents/skills/impeccable/scripts/command-metadata.json +0 -94
  147. package/.agents/skills/impeccable/scripts/design-parser.mjs +0 -820
  148. package/.agents/skills/impeccable/scripts/detect-csp.mjs +0 -198
  149. package/.agents/skills/impeccable/scripts/is-generated.mjs +0 -69
  150. package/.agents/skills/impeccable/scripts/live-accept.mjs +0 -595
  151. package/.agents/skills/impeccable/scripts/live-browser.js +0 -4781
  152. package/.agents/skills/impeccable/scripts/live-inject.mjs +0 -445
  153. package/.agents/skills/impeccable/scripts/live-poll.mjs +0 -186
  154. package/.agents/skills/impeccable/scripts/live-server.mjs +0 -694
  155. package/.agents/skills/impeccable/scripts/live-wrap.mjs +0 -571
  156. package/.agents/skills/impeccable/scripts/live.mjs +0 -247
  157. package/.agents/skills/impeccable/scripts/load-context.mjs +0 -141
  158. package/.agents/skills/impeccable/scripts/modern-screenshot.umd.js +0 -14
  159. package/.agents/skills/impeccable/scripts/pin.mjs +0 -214
  160. package/.agents/skills/init/SKILL.md +0 -140
  161. package/.agents/skills/liteparse/SKILL.md +0 -223
  162. package/.agents/skills/memory-systems/SKILL.md +0 -221
  163. package/.agents/skills/memory-systems/references/implementation.md +0 -551
  164. package/.agents/skills/memory-systems/scripts/memory_store.py +0 -616
  165. package/.agents/skills/multi-agent-patterns/SKILL.md +0 -259
  166. package/.agents/skills/multi-agent-patterns/references/frameworks.md +0 -433
  167. package/.agents/skills/multi-agent-patterns/scripts/coordination.py +0 -613
  168. package/.agents/skills/opentui/SKILL.md +0 -202
  169. package/.agents/skills/opentui/references/animation/REFERENCE.md +0 -431
  170. package/.agents/skills/opentui/references/components/REFERENCE.md +0 -144
  171. package/.agents/skills/opentui/references/components/code-diff.md +0 -672
  172. package/.agents/skills/opentui/references/components/containers.md +0 -417
  173. package/.agents/skills/opentui/references/components/inputs.md +0 -531
  174. package/.agents/skills/opentui/references/components/text-display.md +0 -386
  175. package/.agents/skills/opentui/references/core/REFERENCE.md +0 -145
  176. package/.agents/skills/opentui/references/core/api.md +0 -543
  177. package/.agents/skills/opentui/references/core/configuration.md +0 -168
  178. package/.agents/skills/opentui/references/core/gotchas.md +0 -393
  179. package/.agents/skills/opentui/references/core/patterns.md +0 -449
  180. package/.agents/skills/opentui/references/keyboard/REFERENCE.md +0 -617
  181. package/.agents/skills/opentui/references/layout/REFERENCE.md +0 -337
  182. package/.agents/skills/opentui/references/layout/patterns.md +0 -444
  183. package/.agents/skills/opentui/references/react/REFERENCE.md +0 -174
  184. package/.agents/skills/opentui/references/react/api.md +0 -436
  185. package/.agents/skills/opentui/references/react/configuration.md +0 -302
  186. package/.agents/skills/opentui/references/react/gotchas.md +0 -443
  187. package/.agents/skills/opentui/references/react/patterns.md +0 -501
  188. package/.agents/skills/opentui/references/solid/REFERENCE.md +0 -201
  189. package/.agents/skills/opentui/references/solid/api.md +0 -564
  190. package/.agents/skills/opentui/references/solid/configuration.md +0 -316
  191. package/.agents/skills/opentui/references/solid/gotchas.md +0 -427
  192. package/.agents/skills/opentui/references/solid/patterns.md +0 -560
  193. package/.agents/skills/opentui/references/testing/REFERENCE.md +0 -614
  194. package/.agents/skills/pdf/LICENSE.txt +0 -30
  195. package/.agents/skills/pdf/SKILL.md +0 -316
  196. package/.agents/skills/pdf/forms.md +0 -294
  197. package/.agents/skills/pdf/reference.md +0 -612
  198. package/.agents/skills/pdf/scripts/check_bounding_boxes.py +0 -65
  199. package/.agents/skills/pdf/scripts/check_fillable_fields.py +0 -11
  200. package/.agents/skills/pdf/scripts/convert_pdf_to_images.py +0 -33
  201. package/.agents/skills/pdf/scripts/create_validation_image.py +0 -37
  202. package/.agents/skills/pdf/scripts/extract_form_field_info.py +0 -122
  203. package/.agents/skills/pdf/scripts/extract_form_structure.py +0 -115
  204. package/.agents/skills/pdf/scripts/fill_fillable_fields.py +0 -98
  205. package/.agents/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -107
  206. package/.agents/skills/playwright-cli/SKILL.md +0 -390
  207. package/.agents/skills/playwright-cli/references/element-attributes.md +0 -23
  208. package/.agents/skills/playwright-cli/references/playwright-tests.md +0 -39
  209. package/.agents/skills/playwright-cli/references/request-mocking.md +0 -87
  210. package/.agents/skills/playwright-cli/references/running-code.md +0 -241
  211. package/.agents/skills/playwright-cli/references/session-management.md +0 -225
  212. package/.agents/skills/playwright-cli/references/spec-driven-testing.md +0 -305
  213. package/.agents/skills/playwright-cli/references/storage-state.md +0 -275
  214. package/.agents/skills/playwright-cli/references/test-generation.md +0 -134
  215. package/.agents/skills/playwright-cli/references/tracing.md +0 -139
  216. package/.agents/skills/playwright-cli/references/video-recording.md +0 -143
  217. package/.agents/skills/pptx/LICENSE.txt +0 -30
  218. package/.agents/skills/pptx/SKILL.md +0 -234
  219. package/.agents/skills/pptx/editing.md +0 -205
  220. package/.agents/skills/pptx/pptxgenjs.md +0 -420
  221. package/.agents/skills/pptx/scripts/__init__.py +0 -0
  222. package/.agents/skills/pptx/scripts/add_slide.py +0 -195
  223. package/.agents/skills/pptx/scripts/clean.py +0 -286
  224. package/.agents/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  225. package/.agents/skills/pptx/scripts/office/helpers/merge_runs.py +0 -199
  226. package/.agents/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -197
  227. package/.agents/skills/pptx/scripts/office/pack.py +0 -159
  228. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  229. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  230. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  231. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  232. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  233. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  234. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  235. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  236. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  237. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  238. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  239. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  240. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  241. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  242. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  243. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  244. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  245. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  246. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  247. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  248. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  249. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  250. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  251. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  252. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  253. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  254. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  255. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  256. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  257. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  258. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  259. package/.agents/skills/pptx/scripts/office/schemas/mce/mc.xsd +0 -75
  260. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  261. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  262. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  263. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  264. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  265. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  266. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  267. package/.agents/skills/pptx/scripts/office/soffice.py +0 -183
  268. package/.agents/skills/pptx/scripts/office/unpack.py +0 -132
  269. package/.agents/skills/pptx/scripts/office/validate.py +0 -111
  270. package/.agents/skills/pptx/scripts/office/validators/__init__.py +0 -15
  271. package/.agents/skills/pptx/scripts/office/validators/base.py +0 -847
  272. package/.agents/skills/pptx/scripts/office/validators/docx.py +0 -446
  273. package/.agents/skills/pptx/scripts/office/validators/pptx.py +0 -275
  274. package/.agents/skills/pptx/scripts/office/validators/redlining.py +0 -247
  275. package/.agents/skills/pptx/scripts/thumbnail.py +0 -289
  276. package/.agents/skills/project-development/SKILL.md +0 -293
  277. package/.agents/skills/project-development/references/case-studies.md +0 -388
  278. package/.agents/skills/project-development/references/pipeline-patterns.md +0 -610
  279. package/.agents/skills/project-development/scripts/pipeline_template.py +0 -796
  280. package/.agents/skills/prompt-engineer/SKILL.md +0 -265
  281. package/.agents/skills/prompt-engineer/references/advanced_patterns.md +0 -271
  282. package/.agents/skills/prompt-engineer/references/core_prompting.md +0 -137
  283. package/.agents/skills/prompt-engineer/references/quality_improvement.md +0 -193
  284. package/.agents/skills/research-codebase/SKILL.md +0 -229
  285. package/.agents/skills/ripgrep/SKILL.md +0 -384
  286. package/.agents/skills/skill-creator/LICENSE.txt +0 -202
  287. package/.agents/skills/skill-creator/SKILL.md +0 -487
  288. package/.agents/skills/skill-creator/agents/analyzer.md +0 -274
  289. package/.agents/skills/skill-creator/agents/comparator.md +0 -202
  290. package/.agents/skills/skill-creator/agents/grader.md +0 -223
  291. package/.agents/skills/skill-creator/assets/eval_review.html +0 -146
  292. package/.agents/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  293. package/.agents/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  294. package/.agents/skills/skill-creator/references/schemas.md +0 -430
  295. package/.agents/skills/skill-creator/scripts/__init__.py +0 -0
  296. package/.agents/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  297. package/.agents/skills/skill-creator/scripts/generate_report.py +0 -326
  298. package/.agents/skills/skill-creator/scripts/improve_description.py +0 -247
  299. package/.agents/skills/skill-creator/scripts/package_skill.py +0 -136
  300. package/.agents/skills/skill-creator/scripts/quick_validate.py +0 -103
  301. package/.agents/skills/skill-creator/scripts/run_eval.py +0 -310
  302. package/.agents/skills/skill-creator/scripts/run_loop.py +0 -328
  303. package/.agents/skills/skill-creator/scripts/utils.py +0 -47
  304. package/.agents/skills/sl-commit/SKILL.md +0 -53
  305. package/.agents/skills/sl-submit-diff/SKILL.md +0 -57
  306. package/.agents/skills/tdd/SKILL.md +0 -111
  307. package/.agents/skills/tdd/deep-modules.md +0 -33
  308. package/.agents/skills/tdd/interface-design.md +0 -31
  309. package/.agents/skills/tdd/mocking.md +0 -59
  310. package/.agents/skills/tdd/refactoring.md +0 -10
  311. package/.agents/skills/tdd/tests.md +0 -61
  312. package/.agents/skills/tool-design/SKILL.md +0 -273
  313. package/.agents/skills/tool-design/references/architectural_reduction.md +0 -210
  314. package/.agents/skills/tool-design/references/best_practices.md +0 -176
  315. package/.agents/skills/tool-design/scripts/description_generator.py +0 -528
  316. package/.agents/skills/typescript-advanced-types/SKILL.md +0 -720
  317. package/.agents/skills/typescript-expert/SKILL.md +0 -434
  318. package/.agents/skills/typescript-expert/references/tsconfig-strict.json +0 -92
  319. package/.agents/skills/typescript-expert/references/typescript-cheatsheet.md +0 -383
  320. package/.agents/skills/typescript-expert/references/utility-types.ts +0 -335
  321. package/.agents/skills/typescript-expert/scripts/ts_diagnostic.py +0 -203
  322. package/.agents/skills/typescript-react-reviewer/SKILL.md +0 -201
  323. package/.agents/skills/typescript-react-reviewer/references/antipatterns.md +0 -510
  324. package/.agents/skills/typescript-react-reviewer/references/checklist.md +0 -267
  325. package/.agents/skills/typescript-react-reviewer/references/react19-patterns.md +0 -305
  326. package/.agents/skills/workflow-creator/SKILL.md +0 -553
  327. package/.agents/skills/workflow-creator/references/agent-sessions.md +0 -891
  328. package/.agents/skills/workflow-creator/references/agent-setup-recipe.md +0 -266
  329. package/.agents/skills/workflow-creator/references/computation-and-validation.md +0 -201
  330. package/.agents/skills/workflow-creator/references/control-flow.md +0 -470
  331. package/.agents/skills/workflow-creator/references/failure-modes.md +0 -1014
  332. package/.agents/skills/workflow-creator/references/getting-started.md +0 -392
  333. package/.agents/skills/workflow-creator/references/registry-and-validation.md +0 -141
  334. package/.agents/skills/workflow-creator/references/running-workflows.md +0 -418
  335. package/.agents/skills/workflow-creator/references/session-config.md +0 -431
  336. package/.agents/skills/workflow-creator/references/state-and-data-flow.md +0 -356
  337. package/.agents/skills/workflow-creator/references/user-input.md +0 -234
  338. package/.agents/skills/workflow-creator/references/workflow-inputs.md +0 -392
  339. package/.agents/skills/xlsx/LICENSE.txt +0 -30
  340. package/.agents/skills/xlsx/SKILL.md +0 -294
  341. package/.agents/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  342. package/.agents/skills/xlsx/scripts/office/helpers/merge_runs.py +0 -199
  343. package/.agents/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -197
  344. package/.agents/skills/xlsx/scripts/office/pack.py +0 -159
  345. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  346. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  347. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  348. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  349. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  350. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  351. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  352. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  353. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  354. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  355. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  356. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  357. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  358. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  359. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  360. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  361. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  362. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  363. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  364. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  365. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  366. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  367. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  368. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  369. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  370. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  371. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  372. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  373. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  374. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  375. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  376. package/.agents/skills/xlsx/scripts/office/schemas/mce/mc.xsd +0 -75
  377. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  378. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  379. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  380. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  381. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  382. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  383. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  384. package/.agents/skills/xlsx/scripts/office/soffice.py +0 -183
  385. package/.agents/skills/xlsx/scripts/office/unpack.py +0 -132
  386. package/.agents/skills/xlsx/scripts/office/validate.py +0 -111
  387. package/.agents/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  388. package/.agents/skills/xlsx/scripts/office/validators/base.py +0 -847
  389. package/.agents/skills/xlsx/scripts/office/validators/docx.py +0 -446
  390. package/.agents/skills/xlsx/scripts/office/validators/pptx.py +0 -275
  391. package/.agents/skills/xlsx/scripts/office/validators/redlining.py +0 -247
  392. package/.agents/skills/xlsx/scripts/recalc.py +0 -184
  393. package/.claude/agents/code-simplifier.md +0 -52
  394. package/.claude/agents/codebase-analyzer.md +0 -166
  395. package/.claude/agents/codebase-locator.md +0 -122
  396. package/.claude/agents/codebase-online-researcher.md +0 -148
  397. package/.claude/agents/codebase-pattern-finder.md +0 -247
  398. package/.claude/agents/codebase-research-analyzer.md +0 -179
  399. package/.claude/agents/codebase-research-locator.md +0 -145
  400. package/.claude/agents/debugger.md +0 -91
  401. package/.claude/agents/orchestrator.md +0 -19
  402. package/.claude/agents/planner.md +0 -295
  403. package/.claude/agents/reviewer.md +0 -98
  404. package/.claude/agents/worker.md +0 -165
  405. package/.claude/settings.json +0 -27
  406. package/.github/agents/code-simplifier.md +0 -52
  407. package/.github/agents/codebase-analyzer.md +0 -166
  408. package/.github/agents/codebase-locator.md +0 -122
  409. package/.github/agents/codebase-online-researcher.md +0 -146
  410. package/.github/agents/codebase-pattern-finder.md +0 -247
  411. package/.github/agents/codebase-research-analyzer.md +0 -179
  412. package/.github/agents/codebase-research-locator.md +0 -145
  413. package/.github/agents/debugger.md +0 -98
  414. package/.github/agents/orchestrator.md +0 -27
  415. package/.github/agents/planner.md +0 -305
  416. package/.github/agents/reviewer.md +0 -95
  417. package/.github/agents/worker.md +0 -237
  418. package/.github/lsp.json +0 -93
  419. package/.mcp.json +0 -20
  420. package/.opencode/agents/code-simplifier.md +0 -62
  421. package/.opencode/agents/codebase-analyzer.md +0 -171
  422. package/.opencode/agents/codebase-locator.md +0 -127
  423. package/.opencode/agents/codebase-online-researcher.md +0 -152
  424. package/.opencode/agents/codebase-pattern-finder.md +0 -252
  425. package/.opencode/agents/codebase-research-analyzer.md +0 -183
  426. package/.opencode/agents/codebase-research-locator.md +0 -149
  427. package/.opencode/agents/debugger.md +0 -99
  428. package/.opencode/agents/orchestrator.md +0 -27
  429. package/.opencode/agents/planner.md +0 -309
  430. package/.opencode/agents/reviewer.md +0 -103
  431. package/.opencode/agents/worker.md +0 -165
  432. package/.opencode/opencode.json +0 -25
  433. package/README.md +0 -1624
  434. package/assets/settings.schema.json +0 -51
  435. package/dist/commands/cli/claude-inflight-hook.d.ts +0 -100
  436. package/dist/commands/cli/claude-inflight-hook.d.ts.map +0 -1
  437. package/dist/commands/cli/claude-stop-hook.d.ts +0 -80
  438. package/dist/commands/cli/claude-stop-hook.d.ts.map +0 -1
  439. package/dist/lib/atomic-temp.d.ts +0 -8
  440. package/dist/lib/atomic-temp.d.ts.map +0 -1
  441. package/dist/lib/path-root-guard.d.ts +0 -4
  442. package/dist/lib/path-root-guard.d.ts.map +0 -1
  443. package/dist/lib/spawn.d.ts +0 -102
  444. package/dist/lib/spawn.d.ts.map +0 -1
  445. package/dist/lib/terminal-env.d.ts +0 -9
  446. package/dist/lib/terminal-env.d.ts.map +0 -1
  447. package/dist/sdk/components/attached-statusline.d.ts +0 -26
  448. package/dist/sdk/components/attached-statusline.d.ts.map +0 -1
  449. package/dist/sdk/components/color-utils.d.ts +0 -4
  450. package/dist/sdk/components/color-utils.d.ts.map +0 -1
  451. package/dist/sdk/components/compact-switcher.d.ts +0 -10
  452. package/dist/sdk/components/compact-switcher.d.ts.map +0 -1
  453. package/dist/sdk/components/connectors.d.ts +0 -16
  454. package/dist/sdk/components/connectors.d.ts.map +0 -1
  455. package/dist/sdk/components/edge.d.ts +0 -4
  456. package/dist/sdk/components/edge.d.ts.map +0 -1
  457. package/dist/sdk/components/error-boundary.d.ts +0 -23
  458. package/dist/sdk/components/error-boundary.d.ts.map +0 -1
  459. package/dist/sdk/components/graph-theme.d.ts +0 -18
  460. package/dist/sdk/components/graph-theme.d.ts.map +0 -1
  461. package/dist/sdk/components/header.d.ts +0 -3
  462. package/dist/sdk/components/header.d.ts.map +0 -1
  463. package/dist/sdk/components/hooks.d.ts +0 -15
  464. package/dist/sdk/components/hooks.d.ts.map +0 -1
  465. package/dist/sdk/components/layout.d.ts +0 -27
  466. package/dist/sdk/components/layout.d.ts.map +0 -1
  467. package/dist/sdk/components/node-card.d.ts +0 -10
  468. package/dist/sdk/components/node-card.d.ts.map +0 -1
  469. package/dist/sdk/components/orchestrator-panel-contexts.d.ts +0 -16
  470. package/dist/sdk/components/orchestrator-panel-contexts.d.ts.map +0 -1
  471. package/dist/sdk/components/orchestrator-panel-store.d.ts +0 -52
  472. package/dist/sdk/components/orchestrator-panel-store.d.ts.map +0 -1
  473. package/dist/sdk/components/orchestrator-panel-types.d.ts +0 -18
  474. package/dist/sdk/components/orchestrator-panel-types.d.ts.map +0 -1
  475. package/dist/sdk/components/orchestrator-panel.d.ts +0 -86
  476. package/dist/sdk/components/orchestrator-panel.d.ts.map +0 -1
  477. package/dist/sdk/components/renderer-background.d.ts +0 -9
  478. package/dist/sdk/components/renderer-background.d.ts.map +0 -1
  479. package/dist/sdk/components/session-graph-panel.d.ts +0 -7
  480. package/dist/sdk/components/session-graph-panel.d.ts.map +0 -1
  481. package/dist/sdk/components/status-helpers.d.ts +0 -6
  482. package/dist/sdk/components/status-helpers.d.ts.map +0 -1
  483. package/dist/sdk/components/statusline.d.ts +0 -5
  484. package/dist/sdk/components/statusline.d.ts.map +0 -1
  485. package/dist/sdk/components/tui-diagnostics.d.ts +0 -56
  486. package/dist/sdk/components/tui-diagnostics.d.ts.map +0 -1
  487. package/dist/sdk/components/workflow-picker-panel.d.ts +0 -126
  488. package/dist/sdk/components/workflow-picker-panel.d.ts.map +0 -1
  489. package/dist/sdk/define-workflow.d.ts +0 -107
  490. package/dist/sdk/define-workflow.d.ts.map +0 -1
  491. package/dist/sdk/errors.d.ts +0 -46
  492. package/dist/sdk/errors.d.ts.map +0 -1
  493. package/dist/sdk/index.d.ts +0 -26
  494. package/dist/sdk/index.d.ts.map +0 -1
  495. package/dist/sdk/primitives/inputs.d.ts +0 -36
  496. package/dist/sdk/primitives/inputs.d.ts.map +0 -1
  497. package/dist/sdk/primitives/metadata.d.ts +0 -40
  498. package/dist/sdk/primitives/metadata.d.ts.map +0 -1
  499. package/dist/sdk/primitives/run.d.ts +0 -57
  500. package/dist/sdk/primitives/run.d.ts.map +0 -1
  501. package/dist/sdk/primitives/sessions.d.ts +0 -128
  502. package/dist/sdk/primitives/sessions.d.ts.map +0 -1
  503. package/dist/sdk/providers/claude.d.ts +0 -392
  504. package/dist/sdk/providers/claude.d.ts.map +0 -1
  505. package/dist/sdk/providers/copilot.d.ts +0 -55
  506. package/dist/sdk/providers/copilot.d.ts.map +0 -1
  507. package/dist/sdk/providers/opencode.d.ts +0 -27
  508. package/dist/sdk/providers/opencode.d.ts.map +0 -1
  509. package/dist/sdk/registry.d.ts +0 -27
  510. package/dist/sdk/registry.d.ts.map +0 -1
  511. package/dist/sdk/runtime/attached-footer.d.ts +0 -31
  512. package/dist/sdk/runtime/attached-footer.d.ts.map +0 -1
  513. package/dist/sdk/runtime/cc-debounce.d.ts +0 -29
  514. package/dist/sdk/runtime/cc-debounce.d.ts.map +0 -1
  515. package/dist/sdk/runtime/executor-env.d.ts +0 -20
  516. package/dist/sdk/runtime/executor-env.d.ts.map +0 -1
  517. package/dist/sdk/runtime/executor.d.ts +0 -265
  518. package/dist/sdk/runtime/executor.d.ts.map +0 -1
  519. package/dist/sdk/runtime/graph-inference.d.ts +0 -35
  520. package/dist/sdk/runtime/graph-inference.d.ts.map +0 -1
  521. package/dist/sdk/runtime/orchestrator-entry.d.ts +0 -26
  522. package/dist/sdk/runtime/orchestrator-entry.d.ts.map +0 -1
  523. package/dist/sdk/runtime/panel.d.ts +0 -9
  524. package/dist/sdk/runtime/panel.d.ts.map +0 -1
  525. package/dist/sdk/runtime/port-discovery.d.ts +0 -71
  526. package/dist/sdk/runtime/port-discovery.d.ts.map +0 -1
  527. package/dist/sdk/runtime/status-writer.d.ts +0 -101
  528. package/dist/sdk/runtime/status-writer.d.ts.map +0 -1
  529. package/dist/sdk/runtime/theme.d.ts +0 -33
  530. package/dist/sdk/runtime/theme.d.ts.map +0 -1
  531. package/dist/sdk/runtime/tmux.d.ts +0 -307
  532. package/dist/sdk/runtime/tmux.d.ts.map +0 -1
  533. package/dist/sdk/runtime/version-compat.d.ts +0 -28
  534. package/dist/sdk/runtime/version-compat.d.ts.map +0 -1
  535. package/dist/sdk/types.d.ts +0 -435
  536. package/dist/sdk/types.d.ts.map +0 -1
  537. package/dist/sdk/worker-shared.d.ts +0 -42
  538. package/dist/sdk/worker-shared.d.ts.map +0 -1
  539. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts +0 -81
  540. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +0 -1
  541. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts +0 -37
  542. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +0 -1
  543. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts +0 -43
  544. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts.map +0 -1
  545. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts +0 -14
  546. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts.map +0 -1
  547. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts +0 -136
  548. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts.map +0 -1
  549. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts +0 -58
  550. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts.map +0 -1
  551. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts +0 -43
  552. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts.map +0 -1
  553. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts +0 -37
  554. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +0 -1
  555. package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts +0 -68
  556. package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts.map +0 -1
  557. package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts +0 -56
  558. package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts.map +0 -1
  559. package/dist/sdk/workflows/builtin/open-claude-design/helpers/constants.d.ts +0 -72
  560. package/dist/sdk/workflows/builtin/open-claude-design/helpers/constants.d.ts.map +0 -1
  561. package/dist/sdk/workflows/builtin/open-claude-design/helpers/design-system.d.ts +0 -46
  562. package/dist/sdk/workflows/builtin/open-claude-design/helpers/design-system.d.ts.map +0 -1
  563. package/dist/sdk/workflows/builtin/open-claude-design/helpers/export.d.ts +0 -32
  564. package/dist/sdk/workflows/builtin/open-claude-design/helpers/export.d.ts.map +0 -1
  565. package/dist/sdk/workflows/builtin/open-claude-design/helpers/import.d.ts +0 -33
  566. package/dist/sdk/workflows/builtin/open-claude-design/helpers/import.d.ts.map +0 -1
  567. package/dist/sdk/workflows/builtin/open-claude-design/helpers/prompts.d.ts +0 -106
  568. package/dist/sdk/workflows/builtin/open-claude-design/helpers/prompts.d.ts.map +0 -1
  569. package/dist/sdk/workflows/builtin/open-claude-design/helpers/scan.d.ts +0 -50
  570. package/dist/sdk/workflows/builtin/open-claude-design/helpers/scan.d.ts.map +0 -1
  571. package/dist/sdk/workflows/builtin/open-claude-design/helpers/validation.d.ts +0 -12
  572. package/dist/sdk/workflows/builtin/open-claude-design/helpers/validation.d.ts.map +0 -1
  573. package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts +0 -58
  574. package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts.map +0 -1
  575. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts +0 -37
  576. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +0 -1
  577. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts +0 -34
  578. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts.map +0 -1
  579. package/dist/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.d.ts +0 -25
  580. package/dist/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.d.ts.map +0 -1
  581. package/dist/sdk/workflows/builtin/ralph/helpers/git.d.ts +0 -69
  582. package/dist/sdk/workflows/builtin/ralph/helpers/git.d.ts.map +0 -1
  583. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts +0 -266
  584. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts.map +0 -1
  585. package/dist/sdk/workflows/builtin/ralph/helpers/review.d.ts +0 -24
  586. package/dist/sdk/workflows/builtin/ralph/helpers/review.d.ts.map +0 -1
  587. package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts +0 -33
  588. package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts.map +0 -1
  589. package/dist/sdk/workflows/index.d.ts +0 -32
  590. package/dist/sdk/workflows/index.d.ts.map +0 -1
  591. package/dist/services/config/additional-instructions.d.ts +0 -67
  592. package/dist/services/config/additional-instructions.d.ts.map +0 -1
  593. package/dist/services/config/atomic-config.d.ts +0 -42
  594. package/dist/services/config/atomic-config.d.ts.map +0 -1
  595. package/dist/services/config/definitions.d.ts +0 -52
  596. package/dist/services/config/definitions.d.ts.map +0 -1
  597. package/dist/services/config/index.d.ts +0 -7
  598. package/dist/services/config/index.d.ts.map +0 -1
  599. package/dist/services/config/scm-sync.d.ts +0 -37
  600. package/dist/services/config/scm-sync.d.ts.map +0 -1
  601. package/dist/services/config/settings-schema.d.ts +0 -2
  602. package/dist/services/config/settings-schema.d.ts.map +0 -1
  603. package/dist/services/system/copy.d.ts +0 -84
  604. package/dist/services/system/copy.d.ts.map +0 -1
  605. package/dist/services/system/detect.d.ts +0 -75
  606. package/dist/services/system/detect.d.ts.map +0 -1
  607. package/dist/theme/colors.d.ts +0 -35
  608. package/dist/theme/colors.d.ts.map +0 -1
  609. package/src/cli.ts +0 -397
  610. package/src/commands/builtin-registry.ts +0 -37
  611. package/src/commands/cli/chat/index.test.ts +0 -252
  612. package/src/commands/cli/chat/index.ts +0 -430
  613. package/src/commands/cli/chat.ts +0 -8
  614. package/src/commands/cli/claude-ask-hook.test.ts +0 -128
  615. package/src/commands/cli/claude-ask-hook.ts +0 -84
  616. package/src/commands/cli/claude-inflight-hook.test.ts +0 -598
  617. package/src/commands/cli/claude-inflight-hook.ts +0 -359
  618. package/src/commands/cli/claude-session-start-hook.ts +0 -61
  619. package/src/commands/cli/claude-stop-hook.test.ts +0 -317
  620. package/src/commands/cli/claude-stop-hook.ts +0 -441
  621. package/src/commands/cli/completions.ts +0 -24
  622. package/src/commands/cli/config.ts +0 -80
  623. package/src/commands/cli/footer.tsx +0 -248
  624. package/src/commands/cli/init/index.ts +0 -41
  625. package/src/commands/cli/init/onboarding.ts +0 -61
  626. package/src/commands/cli/init.ts +0 -8
  627. package/src/commands/cli/management-commands.ts +0 -112
  628. package/src/commands/cli/session.test.ts +0 -830
  629. package/src/commands/cli/session.ts +0 -447
  630. package/src/commands/cli/workflow-command.test.ts +0 -618
  631. package/src/commands/cli/workflow-inputs.test.ts +0 -353
  632. package/src/commands/cli/workflow-inputs.ts +0 -266
  633. package/src/commands/cli/workflow-list.test.ts +0 -235
  634. package/src/commands/cli/workflow-list.ts +0 -0
  635. package/src/commands/cli/workflow-status.test.ts +0 -451
  636. package/src/commands/cli/workflow-status.ts +0 -330
  637. package/src/commands/cli/workflow.ts +0 -196
  638. package/src/completions/bash.ts +0 -102
  639. package/src/completions/fish.ts +0 -136
  640. package/src/completions/index.ts +0 -7
  641. package/src/completions/powershell.ts +0 -195
  642. package/src/completions/zsh.ts +0 -150
  643. package/src/lib/atomic-temp.test.ts +0 -86
  644. package/src/lib/atomic-temp.ts +0 -62
  645. package/src/lib/common-ignore.ts +0 -46
  646. package/src/lib/merge.ts +0 -103
  647. package/src/lib/path-root-guard.ts +0 -38
  648. package/src/lib/spawn.test.ts +0 -109
  649. package/src/lib/spawn.ts +0 -678
  650. package/src/lib/terminal-env.test.ts +0 -343
  651. package/src/lib/terminal-env.ts +0 -100
  652. package/src/scripts/bump-version.ts +0 -94
  653. package/src/scripts/bundle-configs.ts +0 -116
  654. package/src/scripts/clean-dist.test.ts +0 -53
  655. package/src/scripts/clean-dist.ts +0 -37
  656. package/src/scripts/constants-base.ts +0 -14
  657. package/src/scripts/constants.ts +0 -35
  658. package/src/sdk/components/attached-statusline.tsx +0 -86
  659. package/src/sdk/components/color-utils.ts +0 -20
  660. package/src/sdk/components/compact-switcher.tsx +0 -78
  661. package/src/sdk/components/connectors.test.ts +0 -707
  662. package/src/sdk/components/connectors.ts +0 -160
  663. package/src/sdk/components/edge.tsx +0 -13
  664. package/src/sdk/components/error-boundary.tsx +0 -38
  665. package/src/sdk/components/graph-theme.ts +0 -37
  666. package/src/sdk/components/header.tsx +0 -85
  667. package/src/sdk/components/hooks.ts +0 -21
  668. package/src/sdk/components/layout.test.ts +0 -1245
  669. package/src/sdk/components/layout.ts +0 -223
  670. package/src/sdk/components/node-card.tsx +0 -91
  671. package/src/sdk/components/orchestrator-panel-contexts.ts +0 -35
  672. package/src/sdk/components/orchestrator-panel-store.test.ts +0 -847
  673. package/src/sdk/components/orchestrator-panel-store.ts +0 -187
  674. package/src/sdk/components/orchestrator-panel-types.ts +0 -23
  675. package/src/sdk/components/orchestrator-panel.tsx +0 -262
  676. package/src/sdk/components/renderer-background.ts +0 -49
  677. package/src/sdk/components/session-graph-panel.tsx +0 -471
  678. package/src/sdk/components/status-helpers.ts +0 -33
  679. package/src/sdk/components/statusline.tsx +0 -68
  680. package/src/sdk/components/tui-diagnostics.ts +0 -273
  681. package/src/sdk/components/workflow-picker-panel.tsx +0 -1613
  682. package/src/sdk/define-workflow.test.ts +0 -354
  683. package/src/sdk/define-workflow.ts +0 -275
  684. package/src/sdk/errors.test.ts +0 -83
  685. package/src/sdk/errors.ts +0 -77
  686. package/src/sdk/index.test.ts +0 -92
  687. package/src/sdk/index.ts +0 -101
  688. package/src/sdk/primitives/inputs.ts +0 -48
  689. package/src/sdk/primitives/metadata.ts +0 -63
  690. package/src/sdk/primitives/run.ts +0 -81
  691. package/src/sdk/primitives/sessions.test.ts +0 -594
  692. package/src/sdk/primitives/sessions.ts +0 -328
  693. package/src/sdk/providers/claude.ts +0 -1450
  694. package/src/sdk/providers/copilot.test.ts +0 -365
  695. package/src/sdk/providers/copilot.ts +0 -185
  696. package/src/sdk/providers/headless-hil-policy.test.ts +0 -211
  697. package/src/sdk/providers/opencode.ts +0 -88
  698. package/src/sdk/registry.ts +0 -132
  699. package/src/sdk/runtime/attached-footer.ts +0 -155
  700. package/src/sdk/runtime/cc-debounce.ts +0 -104
  701. package/src/sdk/runtime/executor-env.ts +0 -45
  702. package/src/sdk/runtime/executor.test.ts +0 -1321
  703. package/src/sdk/runtime/executor.ts +0 -2136
  704. package/src/sdk/runtime/graph-inference.ts +0 -50
  705. package/src/sdk/runtime/orchestrator-entry.ts +0 -110
  706. package/src/sdk/runtime/panel.tsx +0 -9
  707. package/src/sdk/runtime/port-discovery.test.ts +0 -573
  708. package/src/sdk/runtime/port-discovery.ts +0 -496
  709. package/src/sdk/runtime/status-writer.test.ts +0 -245
  710. package/src/sdk/runtime/status-writer.ts +0 -201
  711. package/src/sdk/runtime/theme.ts +0 -71
  712. package/src/sdk/runtime/tmux.conf +0 -112
  713. package/src/sdk/runtime/tmux.ts +0 -785
  714. package/src/sdk/runtime/version-compat.ts +0 -68
  715. package/src/sdk/types.ts +0 -548
  716. package/src/sdk/worker-shared.test.ts +0 -163
  717. package/src/sdk/worker-shared.ts +0 -155
  718. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +0 -569
  719. package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +0 -481
  720. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/batching.ts +0 -65
  721. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.ts +0 -24
  722. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/ignore-by-default.d.ts +0 -8
  723. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.ts +0 -958
  724. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scout.ts +0 -505
  725. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.ts +0 -115
  726. package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +0 -530
  727. package/src/sdk/workflows/builtin/open-claude-design/claude/index.ts +0 -500
  728. package/src/sdk/workflows/builtin/open-claude-design/copilot/index.ts +0 -508
  729. package/src/sdk/workflows/builtin/open-claude-design/helpers/constants.ts +0 -159
  730. package/src/sdk/workflows/builtin/open-claude-design/helpers/design-system.ts +0 -88
  731. package/src/sdk/workflows/builtin/open-claude-design/helpers/export.ts +0 -193
  732. package/src/sdk/workflows/builtin/open-claude-design/helpers/import.ts +0 -52
  733. package/src/sdk/workflows/builtin/open-claude-design/helpers/prompts.ts +0 -1110
  734. package/src/sdk/workflows/builtin/open-claude-design/helpers/scan.ts +0 -117
  735. package/src/sdk/workflows/builtin/open-claude-design/helpers/validation.ts +0 -38
  736. package/src/sdk/workflows/builtin/open-claude-design/opencode/index.ts +0 -610
  737. package/src/sdk/workflows/builtin/ralph/claude/index.ts +0 -272
  738. package/src/sdk/workflows/builtin/ralph/copilot/index.ts +0 -298
  739. package/src/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.ts +0 -105
  740. package/src/sdk/workflows/builtin/ralph/helpers/git.ts +0 -201
  741. package/src/sdk/workflows/builtin/ralph/helpers/prompts.ts +0 -1108
  742. package/src/sdk/workflows/builtin/ralph/helpers/review.ts +0 -33
  743. package/src/sdk/workflows/builtin/ralph/opencode/index.ts +0 -290
  744. package/src/sdk/workflows/index.ts +0 -116
  745. package/src/services/config/additional-instructions.ts +0 -273
  746. package/src/services/config/atomic-config.ts +0 -210
  747. package/src/services/config/atomic-global-config.ts +0 -348
  748. package/src/services/config/config-path.ts +0 -19
  749. package/src/services/config/definitions.ts +0 -125
  750. package/src/services/config/index.ts +0 -7
  751. package/src/services/config/scm-sync.ts +0 -185
  752. package/src/services/config/settings-schema.ts +0 -2
  753. package/src/services/config/settings.ts +0 -144
  754. package/src/services/system/agents.ts +0 -95
  755. package/src/services/system/auth.test.ts +0 -343
  756. package/src/services/system/auth.ts +0 -140
  757. package/src/services/system/auto-sync.ts +0 -128
  758. package/src/services/system/copy.ts +0 -392
  759. package/src/services/system/detect.ts +0 -161
  760. package/src/services/system/file-lock.ts +0 -289
  761. package/src/services/system/install-ui.ts +0 -296
  762. package/src/services/system/skills.ts +0 -58
  763. package/src/theme/colors.ts +0 -96
  764. package/src/theme/logo.ts +0 -123
  765. package/src/version.ts +0 -7
@@ -1,43 +0,0 @@
1
- # Evaluation Pipeline Diagram
2
-
3
- Visual layout of a production evaluation pipeline.
4
-
5
- ```
6
- ┌─────────────────────────────────────────────────┐
7
- │ Evaluation Pipeline │
8
- ├─────────────────────────────────────────────────┤
9
- │ │
10
- │ Input: Response + Prompt + Context │
11
- │ │ │
12
- │ ▼ │
13
- │ ┌─────────────────────┐ │
14
- │ │ Criteria Loader │ ◄── Rubrics, weights │
15
- │ └──────────┬──────────┘ │
16
- │ │ │
17
- │ ▼ │
18
- │ ┌─────────────────────┐ │
19
- │ │ Primary Scorer │ ◄── Direct or Pairwise │
20
- │ └──────────┬──────────┘ │
21
- │ │ │
22
- │ ▼ │
23
- │ ┌─────────────────────┐ │
24
- │ │ Bias Mitigation │ ◄── Position swap, etc. │
25
- │ └──────────┬──────────┘ │
26
- │ │ │
27
- │ ▼ │
28
- │ ┌─────────────────────┐ │
29
- │ │ Confidence Scoring │ ◄── Calibration │
30
- │ └──────────┬──────────┘ │
31
- │ │ │
32
- │ ▼ │
33
- │ Output: Scores + Justifications + Confidence │
34
- │ │
35
- └─────────────────────────────────────────────────┘
36
- ```
37
-
38
- ## Pipeline Stages
39
-
40
- 1. **Criteria Loader**: Loads rubrics and criterion weights from configuration
41
- 2. **Primary Scorer**: Applies direct scoring or pairwise comparison
42
- 3. **Bias Mitigation**: Runs position swaps, length normalization, and other debiasing
43
- 4. **Confidence Scoring**: Calibrates confidence based on position consistency and evidence strength
@@ -1,315 +0,0 @@
1
- # LLM-as-Judge Implementation Patterns
2
-
3
- This reference provides detailed implementation patterns for building production-grade LLM evaluation systems.
4
-
5
- ## Pattern 1: Structured Evaluation Pipeline
6
-
7
- The most reliable evaluation systems follow a structured pipeline that separates concerns:
8
-
9
- ```
10
- Input Validation → Criteria Loading → Scoring → Bias Mitigation → Output Formatting
11
- ```
12
-
13
- ### Input Validation Layer
14
-
15
- Before evaluation begins, validate:
16
-
17
- 1. **Response presence**: Non-empty response to evaluate
18
- 2. **Prompt presence**: Original prompt for context
19
- 3. **Criteria validity**: At least one criterion with name and description
20
- 4. **Weight normalization**: Weights sum to 1.0 (or normalize them)
21
-
22
- ```python
23
- def validate_input(response, prompt, criteria):
24
- if not response or not response.strip():
25
- raise ValueError("Response cannot be empty")
26
- if not prompt or not prompt.strip():
27
- raise ValueError("Prompt cannot be empty")
28
- if not criteria or len(criteria) == 0:
29
- raise ValueError("At least one criterion required")
30
-
31
- # Normalize weights
32
- total_weight = sum(c.get('weight', 1) for c in criteria)
33
- for c in criteria:
34
- c['weight'] = c.get('weight', 1) / total_weight
35
- ```
36
-
37
- ### Criteria Loading Layer
38
-
39
- Criteria should be loaded from configuration, not hardcoded:
40
-
41
- ```python
42
- class CriteriaLoader:
43
- def __init__(self, rubric_path=None):
44
- self.rubrics = self._load_rubrics(rubric_path)
45
-
46
- def get_criteria(self, task_type):
47
- return self.rubrics.get(task_type, self.default_criteria)
48
-
49
- def get_rubric(self, criterion_name):
50
- return self.rubrics.get(criterion_name, {}).get('levels', [])
51
- ```
52
-
53
- ### Scoring Layer
54
-
55
- The scoring layer handles the actual LLM call:
56
-
57
- ```python
58
- async def score_response(response, prompt, criteria, rubric, model):
59
- system_prompt = build_system_prompt(criteria, rubric)
60
- user_prompt = build_user_prompt(response, prompt, criteria)
61
-
62
- result = await generate_text(
63
- model=model,
64
- system=system_prompt,
65
- prompt=user_prompt,
66
- temperature=0.3 # Lower temperature for consistency
67
- )
68
-
69
- return parse_scores(result.text)
70
- ```
71
-
72
- ### Bias Mitigation Layer
73
-
74
- For pairwise comparison, always include position swapping:
75
-
76
- ```python
77
- async def compare_with_bias_mitigation(response_a, response_b, prompt, criteria, model):
78
- # First pass: A first
79
- pass1 = await compare_pair(response_a, response_b, prompt, criteria, model)
80
-
81
- # Second pass: B first
82
- pass2 = await compare_pair(response_b, response_a, prompt, criteria, model)
83
-
84
- # Map pass2 winner back
85
- pass2_mapped = map_winner(pass2.winner) # A→B, B→A, TIE→TIE
86
-
87
- # Check consistency
88
- if pass1.winner == pass2_mapped:
89
- return {
90
- 'winner': pass1.winner,
91
- 'confidence': (pass1.confidence + pass2.confidence) / 2,
92
- 'consistent': True
93
- }
94
- else:
95
- return {
96
- 'winner': 'TIE',
97
- 'confidence': 0.5,
98
- 'consistent': False
99
- }
100
- ```
101
-
102
- ## Pattern 2: Hierarchical Evaluation
103
-
104
- For complex evaluations, use a hierarchical approach:
105
-
106
- ```
107
- Quick Screen (cheap model) → Detailed Evaluation (expensive model) → Human Review (edge cases)
108
- ```
109
-
110
- ### Quick Screen Implementation
111
-
112
- ```python
113
- async def quick_screen(response, prompt, threshold=0.7):
114
- """Fast, cheap screening for obvious passes/fails."""
115
- result = await generate_text(
116
- model='gpt-5.2', # Cheaper model
117
- prompt=f"Rate 0-1 if this response adequately addresses the prompt:\n\nPrompt: {prompt}\n\nResponse: {response}",
118
- temperature=0
119
- )
120
- score = float(result.text.strip())
121
- return score, score > threshold
122
- ```
123
-
124
- ### Detailed Evaluation
125
-
126
- ```python
127
- async def detailed_evaluation(response, prompt, criteria):
128
- """Full evaluation for borderline or important cases."""
129
- result = await generate_text(
130
- model='gpt-5.2', # More capable model
131
- system=DETAILED_EVALUATION_PROMPT,
132
- prompt=build_detailed_prompt(response, prompt, criteria),
133
- temperature=0.3
134
- )
135
- return parse_detailed_scores(result.text)
136
- ```
137
-
138
- ## Pattern 3: Panel of LLM Judges (PoLL)
139
-
140
- For high-stakes evaluation, use multiple models:
141
-
142
- ```python
143
- async def poll_evaluation(response, prompt, criteria, models):
144
- """Aggregate judgments from multiple LLM judges."""
145
- results = await asyncio.gather(*[
146
- score_with_model(response, prompt, criteria, model)
147
- for model in models
148
- ])
149
-
150
- # Aggregate scores
151
- aggregated = aggregate_scores(results)
152
-
153
- # Calculate agreement
154
- agreement = calculate_agreement(results)
155
-
156
- return {
157
- 'scores': aggregated,
158
- 'agreement': agreement,
159
- 'individual_results': results
160
- }
161
-
162
- def aggregate_scores(results):
163
- """Aggregate scores using median (robust to outliers)."""
164
- scores = {}
165
- for criterion in results[0]['scores'].keys():
166
- criterion_scores = [r['scores'][criterion] for r in results]
167
- scores[criterion] = {
168
- 'score': statistics.median(criterion_scores),
169
- 'std': statistics.stdev(criterion_scores) if len(criterion_scores) > 1 else 0
170
- }
171
- return scores
172
- ```
173
-
174
- ## Pattern 4: Confidence Calibration
175
-
176
- Confidence scores should be calibrated to actual reliability:
177
-
178
- ```python
179
- def calibrate_confidence(raw_confidence, position_consistent, evidence_count):
180
- """Calibrate confidence based on multiple signals."""
181
-
182
- # Base confidence from model output
183
- calibrated = raw_confidence
184
-
185
- # Position consistency is a strong signal
186
- if not position_consistent:
187
- calibrated *= 0.6 # Significant reduction
188
-
189
- # More evidence = higher confidence
190
- evidence_factor = min(evidence_count / 3, 1.0) # Cap at 3 pieces
191
- calibrated *= (0.7 + 0.3 * evidence_factor)
192
-
193
- return min(calibrated, 0.99) # Never 100% confident
194
- ```
195
-
196
- ## Pattern 5: Output Formatting
197
-
198
- Always return structured outputs with consistent schemas:
199
-
200
- ```python
201
- @dataclass
202
- class ScoreResult:
203
- criterion: str
204
- score: float
205
- max_score: float
206
- justification: str
207
- evidence: List[str]
208
- improvement: str
209
-
210
- @dataclass
211
- class EvaluationResult:
212
- success: bool
213
- scores: List[ScoreResult]
214
- overall_score: float
215
- weighted_score: float
216
- summary: Dict[str, Any]
217
- metadata: Dict[str, Any]
218
-
219
- def format_output(scores, metadata) -> EvaluationResult:
220
- """Format evaluation results consistently."""
221
- return EvaluationResult(
222
- success=True,
223
- scores=scores,
224
- overall_score=sum(s.score for s in scores) / len(scores),
225
- weighted_score=calculate_weighted_score(scores),
226
- summary=generate_summary(scores),
227
- metadata=metadata
228
- )
229
- ```
230
-
231
- ## Error Handling Patterns
232
-
233
- ### Graceful Degradation
234
-
235
- ```python
236
- async def evaluate_with_fallback(response, prompt, criteria):
237
- try:
238
- return await full_evaluation(response, prompt, criteria)
239
- except RateLimitError:
240
- # Fall back to simpler evaluation
241
- return await simple_evaluation(response, prompt, criteria)
242
- except ParseError as e:
243
- # Return partial results with error flag
244
- return {
245
- 'success': False,
246
- 'partial_results': e.partial_data,
247
- 'error': str(e)
248
- }
249
- ```
250
-
251
- ### Retry Logic
252
-
253
- ```python
254
- async def evaluate_with_retry(response, prompt, criteria, max_retries=3):
255
- for attempt in range(max_retries):
256
- try:
257
- result = await evaluate(response, prompt, criteria)
258
- if is_valid_result(result):
259
- return result
260
- except TransientError:
261
- await asyncio.sleep(2 ** attempt) # Exponential backoff
262
-
263
- raise EvaluationError("Max retries exceeded")
264
- ```
265
-
266
- ## Testing Patterns
267
-
268
- ### Unit Tests for Parsing
269
-
270
- ```python
271
- def test_score_parsing():
272
- raw_output = '{"scores": [{"criterion": "Accuracy", "score": 4}]}'
273
- result = parse_scores(raw_output)
274
- assert result.scores[0].criterion == "Accuracy"
275
- assert result.scores[0].score == 4
276
-
277
- def test_malformed_output():
278
- raw_output = 'Invalid JSON'
279
- with pytest.raises(ParseError):
280
- parse_scores(raw_output)
281
- ```
282
-
283
- ### Integration Tests with Real API
284
-
285
- ```python
286
- @pytest.mark.integration
287
- async def test_full_evaluation_pipeline():
288
- result = await evaluate(
289
- response="Water boils at 100°C at sea level.",
290
- prompt="At what temperature does water boil?",
291
- criteria=[{"name": "Accuracy", "description": "Factual correctness", "weight": 1}]
292
- )
293
-
294
- assert result.success
295
- assert len(result.scores) == 1
296
- assert result.scores[0].score >= 4 # Should score high for accurate response
297
- ```
298
-
299
- ### Bias Detection Tests
300
-
301
- ```python
302
- async def test_position_bias_mitigation():
303
- # Same response in both positions should tie
304
- result = await compare(
305
- response_a="Same response",
306
- response_b="Same response",
307
- prompt="Test prompt",
308
- criteria=["quality"],
309
- swap_positions=True
310
- )
311
-
312
- assert result.winner == "TIE"
313
- assert result.consistent == True
314
- ```
315
-
@@ -1,331 +0,0 @@
1
- # Metric Selection Guide for LLM Evaluation
2
-
3
- This reference provides guidance on selecting appropriate metrics for different evaluation scenarios.
4
-
5
- ## Metric Categories
6
-
7
- ### Classification Metrics
8
-
9
- Use for binary or multi-class evaluation tasks (pass/fail, correct/incorrect).
10
-
11
- #### Precision
12
-
13
- ```
14
- Precision = True Positives / (True Positives + False Positives)
15
- ```
16
-
17
- **Interpretation**: Of all responses the judge said were good, what fraction were actually good?
18
-
19
- **Use when**: False positives are costly (e.g., approving unsafe content)
20
-
21
- ```python
22
- def precision(predictions, ground_truth):
23
- true_positives = sum(1 for p, g in zip(predictions, ground_truth) if p == 1 and g == 1)
24
- predicted_positives = sum(predictions)
25
- return true_positives / predicted_positives if predicted_positives > 0 else 0
26
- ```
27
-
28
- #### Recall
29
-
30
- ```
31
- Recall = True Positives / (True Positives + False Negatives)
32
- ```
33
-
34
- **Interpretation**: Of all actually good responses, what fraction did the judge identify?
35
-
36
- **Use when**: False negatives are costly (e.g., missing good content in filtering)
37
-
38
- ```python
39
- def recall(predictions, ground_truth):
40
- true_positives = sum(1 for p, g in zip(predictions, ground_truth) if p == 1 and g == 1)
41
- actual_positives = sum(ground_truth)
42
- return true_positives / actual_positives if actual_positives > 0 else 0
43
- ```
44
-
45
- #### F1 Score
46
-
47
- ```
48
- F1 = 2 * (Precision * Recall) / (Precision + Recall)
49
- ```
50
-
51
- **Interpretation**: Harmonic mean of precision and recall
52
-
53
- **Use when**: You need a single number balancing both concerns
54
-
55
- ```python
56
- def f1_score(predictions, ground_truth):
57
- p = precision(predictions, ground_truth)
58
- r = recall(predictions, ground_truth)
59
- return 2 * p * r / (p + r) if (p + r) > 0 else 0
60
- ```
61
-
62
- ### Agreement Metrics
63
-
64
- Use for comparing automated evaluation with human judgment.
65
-
66
- #### Cohen's Kappa (κ)
67
-
68
- ```
69
- κ = (Observed Agreement - Expected Agreement) / (1 - Expected Agreement)
70
- ```
71
-
72
- **Interpretation**: Agreement adjusted for chance
73
- - κ > 0.8: Almost perfect agreement
74
- - κ 0.6-0.8: Substantial agreement
75
- - κ 0.4-0.6: Moderate agreement
76
- - κ < 0.4: Fair to poor agreement
77
-
78
- **Use for**: Binary or categorical judgments
79
-
80
- ```python
81
- def cohens_kappa(judge1, judge2):
82
- from sklearn.metrics import cohen_kappa_score
83
- return cohen_kappa_score(judge1, judge2)
84
- ```
85
-
86
- #### Weighted Kappa
87
-
88
- For ordinal scales where disagreement severity matters:
89
-
90
- ```python
91
- def weighted_kappa(judge1, judge2):
92
- from sklearn.metrics import cohen_kappa_score
93
- return cohen_kappa_score(judge1, judge2, weights='quadratic')
94
- ```
95
-
96
- **Interpretation**: Penalizes large disagreements more than small ones
97
-
98
- ### Correlation Metrics
99
-
100
- Use for ordinal/continuous scores.
101
-
102
- #### Spearman's Rank Correlation (ρ)
103
-
104
- **Interpretation**: Correlation between rankings, not absolute values
105
- - ρ > 0.9: Very strong correlation
106
- - ρ 0.7-0.9: Strong correlation
107
- - ρ 0.5-0.7: Moderate correlation
108
- - ρ < 0.5: Weak correlation
109
-
110
- **Use when**: Order matters more than exact values
111
-
112
- ```python
113
- def spearmans_rho(scores1, scores2):
114
- from scipy.stats import spearmanr
115
- rho, p_value = spearmanr(scores1, scores2)
116
- return {'rho': rho, 'p_value': p_value}
117
- ```
118
-
119
- #### Kendall's Tau (τ)
120
-
121
- **Interpretation**: Similar to Spearman but based on pairwise concordance
122
-
123
- **Use when**: You have many tied values
124
-
125
- ```python
126
- def kendalls_tau(scores1, scores2):
127
- from scipy.stats import kendalltau
128
- tau, p_value = kendalltau(scores1, scores2)
129
- return {'tau': tau, 'p_value': p_value}
130
- ```
131
-
132
- #### Pearson Correlation (r)
133
-
134
- **Interpretation**: Linear correlation between scores
135
-
136
- **Use when**: Exact score values matter, not just order
137
-
138
- ```python
139
- def pearsons_r(scores1, scores2):
140
- from scipy.stats import pearsonr
141
- r, p_value = pearsonr(scores1, scores2)
142
- return {'r': r, 'p_value': p_value}
143
- ```
144
-
145
- ### Pairwise Comparison Metrics
146
-
147
- #### Agreement Rate
148
-
149
- ```
150
- Agreement = (Matching Decisions) / (Total Comparisons)
151
- ```
152
-
153
- **Interpretation**: Simple percentage of agreement
154
-
155
- ```python
156
- def pairwise_agreement(decisions1, decisions2):
157
- matches = sum(1 for d1, d2 in zip(decisions1, decisions2) if d1 == d2)
158
- return matches / len(decisions1)
159
- ```
160
-
161
- #### Position Consistency
162
-
163
- ```
164
- Consistency = (Consistent across position swaps) / (Total comparisons)
165
- ```
166
-
167
- **Interpretation**: How often does swapping position change the decision?
168
-
169
- ```python
170
- def position_consistency(results):
171
- consistent = sum(1 for r in results if r['position_consistent'])
172
- return consistent / len(results)
173
- ```
174
-
175
- ## Selection Decision Tree
176
-
177
- ```
178
- What type of evaluation task?
179
-
180
- ├── Binary classification (pass/fail)
181
- │ └── Use: Precision, Recall, F1, Cohen's κ
182
-
183
- ├── Ordinal scale (1-5 rating)
184
- │ ├── Comparing to human judgments?
185
- │ │ └── Use: Spearman's ρ, Weighted κ
186
- │ └── Comparing two automated judges?
187
- │ └── Use: Kendall's τ, Spearman's ρ
188
-
189
- ├── Pairwise preference
190
- │ └── Use: Agreement rate, Position consistency
191
-
192
- └── Multi-label classification
193
- └── Use: Macro-F1, Micro-F1, Per-label metrics
194
- ```
195
-
196
- ## Metric Selection by Use Case
197
-
198
- ### Use Case 1: Validating Automated Evaluation
199
-
200
- **Goal**: Ensure automated evaluation correlates with human judgment
201
-
202
- **Recommended Metrics**:
203
- 1. Primary: Spearman's ρ (for ordinal scales) or Cohen's κ (for categorical)
204
- 2. Secondary: Per-criterion agreement
205
- 3. Diagnostic: Confusion matrix for systematic errors
206
-
207
- ```python
208
- def validate_automated_eval(automated_scores, human_scores, criteria):
209
- results = {}
210
-
211
- # Overall correlation
212
- results['overall_spearman'] = spearmans_rho(automated_scores, human_scores)
213
-
214
- # Per-criterion agreement
215
- for criterion in criteria:
216
- auto_crit = [s[criterion] for s in automated_scores]
217
- human_crit = [s[criterion] for s in human_scores]
218
- results[f'{criterion}_spearman'] = spearmans_rho(auto_crit, human_crit)
219
-
220
- return results
221
- ```
222
-
223
- ### Use Case 2: Comparing Two Models
224
-
225
- **Goal**: Determine which model produces better outputs
226
-
227
- **Recommended Metrics**:
228
- 1. Primary: Win rate (from pairwise comparison)
229
- 2. Secondary: Position consistency (bias check)
230
- 3. Diagnostic: Per-criterion breakdown
231
-
232
- ```python
233
- def compare_models(model_a_outputs, model_b_outputs, prompts):
234
- results = []
235
- for a, b, p in zip(model_a_outputs, model_b_outputs, prompts):
236
- comparison = await compare_with_position_swap(a, b, p)
237
- results.append(comparison)
238
-
239
- return {
240
- 'a_wins': sum(1 for r in results if r['winner'] == 'A'),
241
- 'b_wins': sum(1 for r in results if r['winner'] == 'B'),
242
- 'ties': sum(1 for r in results if r['winner'] == 'TIE'),
243
- 'position_consistency': position_consistency(results)
244
- }
245
- ```
246
-
247
- ### Use Case 3: Quality Monitoring
248
-
249
- **Goal**: Track evaluation quality over time
250
-
251
- **Recommended Metrics**:
252
- 1. Primary: Rolling agreement with human spot-checks
253
- 2. Secondary: Score distribution stability
254
- 3. Diagnostic: Bias indicators (position, length)
255
-
256
- ```python
257
- class QualityMonitor:
258
- def __init__(self, window_size=100):
259
- self.window = deque(maxlen=window_size)
260
-
261
- def add_evaluation(self, automated, human_spot_check=None):
262
- self.window.append({
263
- 'automated': automated,
264
- 'human': human_spot_check,
265
- 'length': len(automated['response'])
266
- })
267
-
268
- def get_metrics(self):
269
- # Filter to evaluations with human spot-checks
270
- with_human = [e for e in self.window if e['human'] is not None]
271
-
272
- if len(with_human) < 10:
273
- return {'insufficient_data': True}
274
-
275
- auto_scores = [e['automated']['score'] for e in with_human]
276
- human_scores = [e['human']['score'] for e in with_human]
277
-
278
- return {
279
- 'correlation': spearmans_rho(auto_scores, human_scores),
280
- 'mean_difference': np.mean([a - h for a, h in zip(auto_scores, human_scores)]),
281
- 'length_correlation': spearmans_rho(
282
- [e['length'] for e in self.window],
283
- [e['automated']['score'] for e in self.window]
284
- )
285
- }
286
- ```
287
-
288
- ## Interpreting Metric Results
289
-
290
- ### Good Evaluation System Indicators
291
-
292
- | Metric | Good | Acceptable | Concerning |
293
- |--------|------|------------|------------|
294
- | Spearman's ρ | > 0.8 | 0.6-0.8 | < 0.6 |
295
- | Cohen's κ | > 0.7 | 0.5-0.7 | < 0.5 |
296
- | Position consistency | > 0.9 | 0.8-0.9 | < 0.8 |
297
- | Length correlation | < 0.2 | 0.2-0.4 | > 0.4 |
298
-
299
- ### Warning Signs
300
-
301
- 1. **High agreement but low correlation**: May indicate calibration issues
302
- 2. **Low position consistency**: Position bias affecting results
303
- 3. **High length correlation**: Length bias inflating scores
304
- 4. **Per-criterion variance**: Some criteria may be poorly defined
305
-
306
- ## Reporting Template
307
-
308
- ```markdown
309
- ## Evaluation System Metrics Report
310
-
311
- ### Human Agreement
312
- - Spearman's ρ: 0.82 (p < 0.001)
313
- - Cohen's κ: 0.74
314
- - Sample size: 500 evaluations
315
-
316
- ### Bias Indicators
317
- - Position consistency: 91%
318
- - Length-score correlation: 0.12
319
-
320
- ### Per-Criterion Performance
321
- | Criterion | Spearman's ρ | κ |
322
- |-----------|--------------|---|
323
- | Accuracy | 0.88 | 0.79 |
324
- | Clarity | 0.76 | 0.68 |
325
- | Completeness | 0.81 | 0.72 |
326
-
327
- ### Recommendations
328
- - All metrics within acceptable ranges
329
- - Monitor "Clarity" criterion - lower agreement may indicate need for rubric refinement
330
- ```
331
-