@bastani/atomic 0.6.8 → 0.7.0-2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (765) hide show
  1. package/bin/atomic +65 -0
  2. package/package.json +17 -82
  3. package/postinstall.mjs +47 -0
  4. package/.agents/skills/ado-commit/SKILL.md +0 -94
  5. package/.agents/skills/ado-create-pr/SKILL.md +0 -211
  6. package/.agents/skills/advanced-evaluation/SKILL.md +0 -404
  7. package/.agents/skills/advanced-evaluation/references/bias-mitigation.md +0 -288
  8. package/.agents/skills/advanced-evaluation/references/evaluation-pipeline.md +0 -43
  9. package/.agents/skills/advanced-evaluation/references/implementation-patterns.md +0 -315
  10. package/.agents/skills/advanced-evaluation/references/metrics-guide.md +0 -331
  11. package/.agents/skills/advanced-evaluation/scripts/evaluation_example.py +0 -392
  12. package/.agents/skills/ast-grep/SKILL.md +0 -325
  13. package/.agents/skills/ast-grep/references/rule_reference.md +0 -297
  14. package/.agents/skills/bdi-mental-states/SKILL.md +0 -313
  15. package/.agents/skills/bdi-mental-states/references/bdi-ontology-core.md +0 -207
  16. package/.agents/skills/bdi-mental-states/references/framework-integration.md +0 -582
  17. package/.agents/skills/bdi-mental-states/references/rdf-examples.md +0 -315
  18. package/.agents/skills/bdi-mental-states/references/sparql-competency.md +0 -420
  19. package/.agents/skills/bun/SKILL.md +0 -233
  20. package/.agents/skills/context-compression/SKILL.md +0 -274
  21. package/.agents/skills/context-compression/references/evaluation-framework.md +0 -213
  22. package/.agents/skills/context-compression/scripts/compression_evaluator.py +0 -862
  23. package/.agents/skills/context-compression/tests/test_compression_evaluator.py +0 -56
  24. package/.agents/skills/context-degradation/SKILL.md +0 -208
  25. package/.agents/skills/context-degradation/references/patterns.md +0 -314
  26. package/.agents/skills/context-degradation/scripts/degradation_detector.py +0 -614
  27. package/.agents/skills/context-fundamentals/SKILL.md +0 -203
  28. package/.agents/skills/context-fundamentals/references/context-components.md +0 -283
  29. package/.agents/skills/context-fundamentals/scripts/context_manager.py +0 -533
  30. package/.agents/skills/context-optimization/SKILL.md +0 -197
  31. package/.agents/skills/context-optimization/references/optimization_techniques.md +0 -272
  32. package/.agents/skills/context-optimization/scripts/compaction.py +0 -562
  33. package/.agents/skills/create-spec/SKILL.md +0 -249
  34. package/.agents/skills/docx/LICENSE.txt +0 -30
  35. package/.agents/skills/docx/SKILL.md +0 -592
  36. package/.agents/skills/docx/scripts/__init__.py +0 -1
  37. package/.agents/skills/docx/scripts/accept_changes.py +0 -135
  38. package/.agents/skills/docx/scripts/comment.py +0 -318
  39. package/.agents/skills/docx/scripts/office/helpers/__init__.py +0 -0
  40. package/.agents/skills/docx/scripts/office/helpers/merge_runs.py +0 -199
  41. package/.agents/skills/docx/scripts/office/helpers/simplify_redlines.py +0 -197
  42. package/.agents/skills/docx/scripts/office/pack.py +0 -159
  43. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  44. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  45. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  46. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  47. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  48. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  49. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  50. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  51. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  52. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  53. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  54. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  55. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  56. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  57. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  58. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  59. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  60. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  61. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  62. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  63. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  64. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  65. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  66. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  67. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  68. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  69. package/.agents/skills/docx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  70. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  71. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  72. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  73. package/.agents/skills/docx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  74. package/.agents/skills/docx/scripts/office/schemas/mce/mc.xsd +0 -75
  75. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  76. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  77. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  78. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  79. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  80. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  81. package/.agents/skills/docx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  82. package/.agents/skills/docx/scripts/office/soffice.py +0 -183
  83. package/.agents/skills/docx/scripts/office/unpack.py +0 -132
  84. package/.agents/skills/docx/scripts/office/validate.py +0 -111
  85. package/.agents/skills/docx/scripts/office/validators/__init__.py +0 -15
  86. package/.agents/skills/docx/scripts/office/validators/base.py +0 -847
  87. package/.agents/skills/docx/scripts/office/validators/docx.py +0 -446
  88. package/.agents/skills/docx/scripts/office/validators/pptx.py +0 -275
  89. package/.agents/skills/docx/scripts/office/validators/redlining.py +0 -247
  90. package/.agents/skills/docx/scripts/templates/comments.xml +0 -3
  91. package/.agents/skills/docx/scripts/templates/commentsExtended.xml +0 -3
  92. package/.agents/skills/docx/scripts/templates/commentsExtensible.xml +0 -3
  93. package/.agents/skills/docx/scripts/templates/commentsIds.xml +0 -3
  94. package/.agents/skills/docx/scripts/templates/people.xml +0 -3
  95. package/.agents/skills/evaluation/SKILL.md +0 -253
  96. package/.agents/skills/evaluation/references/metrics.md +0 -339
  97. package/.agents/skills/evaluation/scripts/evaluator.py +0 -627
  98. package/.agents/skills/explain-code/SKILL.md +0 -232
  99. package/.agents/skills/filesystem-context/SKILL.md +0 -289
  100. package/.agents/skills/filesystem-context/references/implementation-patterns.md +0 -549
  101. package/.agents/skills/filesystem-context/scripts/filesystem_context.py +0 -425
  102. package/.agents/skills/find-skills/SKILL.md +0 -144
  103. package/.agents/skills/gh-commit/SKILL.md +0 -245
  104. package/.agents/skills/gh-create-pr/SKILL.md +0 -95
  105. package/.agents/skills/hosted-agents/SKILL.md +0 -262
  106. package/.agents/skills/hosted-agents/references/infrastructure-patterns.md +0 -700
  107. package/.agents/skills/hosted-agents/scripts/sandbox_manager.py +0 -590
  108. package/.agents/skills/impeccable/SKILL.md +0 -178
  109. package/.agents/skills/impeccable/agents/openai.yaml +0 -4
  110. package/.agents/skills/impeccable/reference/adapt.md +0 -190
  111. package/.agents/skills/impeccable/reference/animate.md +0 -175
  112. package/.agents/skills/impeccable/reference/audit.md +0 -134
  113. package/.agents/skills/impeccable/reference/bolder.md +0 -113
  114. package/.agents/skills/impeccable/reference/brand.md +0 -114
  115. package/.agents/skills/impeccable/reference/clarify.md +0 -174
  116. package/.agents/skills/impeccable/reference/cognitive-load.md +0 -106
  117. package/.agents/skills/impeccable/reference/color-and-contrast.md +0 -105
  118. package/.agents/skills/impeccable/reference/colorize.md +0 -154
  119. package/.agents/skills/impeccable/reference/craft.md +0 -193
  120. package/.agents/skills/impeccable/reference/critique.md +0 -213
  121. package/.agents/skills/impeccable/reference/delight.md +0 -302
  122. package/.agents/skills/impeccable/reference/distill.md +0 -111
  123. package/.agents/skills/impeccable/reference/document.md +0 -427
  124. package/.agents/skills/impeccable/reference/extract.md +0 -70
  125. package/.agents/skills/impeccable/reference/harden.md +0 -347
  126. package/.agents/skills/impeccable/reference/heuristics-scoring.md +0 -234
  127. package/.agents/skills/impeccable/reference/interaction-design.md +0 -195
  128. package/.agents/skills/impeccable/reference/layout.md +0 -141
  129. package/.agents/skills/impeccable/reference/live.md +0 -594
  130. package/.agents/skills/impeccable/reference/motion-design.md +0 -109
  131. package/.agents/skills/impeccable/reference/onboard.md +0 -234
  132. package/.agents/skills/impeccable/reference/optimize.md +0 -258
  133. package/.agents/skills/impeccable/reference/overdrive.md +0 -130
  134. package/.agents/skills/impeccable/reference/personas.md +0 -178
  135. package/.agents/skills/impeccable/reference/polish.md +0 -232
  136. package/.agents/skills/impeccable/reference/product.md +0 -62
  137. package/.agents/skills/impeccable/reference/quieter.md +0 -99
  138. package/.agents/skills/impeccable/reference/responsive-design.md +0 -114
  139. package/.agents/skills/impeccable/reference/shape.md +0 -151
  140. package/.agents/skills/impeccable/reference/spatial-design.md +0 -100
  141. package/.agents/skills/impeccable/reference/teach.md +0 -156
  142. package/.agents/skills/impeccable/reference/typeset.md +0 -124
  143. package/.agents/skills/impeccable/reference/typography.md +0 -159
  144. package/.agents/skills/impeccable/reference/ux-writing.md +0 -107
  145. package/.agents/skills/impeccable/scripts/cleanup-deprecated.mjs +0 -284
  146. package/.agents/skills/impeccable/scripts/command-metadata.json +0 -94
  147. package/.agents/skills/impeccable/scripts/design-parser.mjs +0 -820
  148. package/.agents/skills/impeccable/scripts/detect-csp.mjs +0 -198
  149. package/.agents/skills/impeccable/scripts/is-generated.mjs +0 -69
  150. package/.agents/skills/impeccable/scripts/live-accept.mjs +0 -595
  151. package/.agents/skills/impeccable/scripts/live-browser.js +0 -4781
  152. package/.agents/skills/impeccable/scripts/live-inject.mjs +0 -445
  153. package/.agents/skills/impeccable/scripts/live-poll.mjs +0 -186
  154. package/.agents/skills/impeccable/scripts/live-server.mjs +0 -694
  155. package/.agents/skills/impeccable/scripts/live-wrap.mjs +0 -571
  156. package/.agents/skills/impeccable/scripts/live.mjs +0 -247
  157. package/.agents/skills/impeccable/scripts/load-context.mjs +0 -141
  158. package/.agents/skills/impeccable/scripts/modern-screenshot.umd.js +0 -14
  159. package/.agents/skills/impeccable/scripts/pin.mjs +0 -214
  160. package/.agents/skills/init/SKILL.md +0 -140
  161. package/.agents/skills/liteparse/SKILL.md +0 -223
  162. package/.agents/skills/memory-systems/SKILL.md +0 -221
  163. package/.agents/skills/memory-systems/references/implementation.md +0 -551
  164. package/.agents/skills/memory-systems/scripts/memory_store.py +0 -616
  165. package/.agents/skills/multi-agent-patterns/SKILL.md +0 -259
  166. package/.agents/skills/multi-agent-patterns/references/frameworks.md +0 -433
  167. package/.agents/skills/multi-agent-patterns/scripts/coordination.py +0 -613
  168. package/.agents/skills/opentui/SKILL.md +0 -202
  169. package/.agents/skills/opentui/references/animation/REFERENCE.md +0 -431
  170. package/.agents/skills/opentui/references/components/REFERENCE.md +0 -144
  171. package/.agents/skills/opentui/references/components/code-diff.md +0 -672
  172. package/.agents/skills/opentui/references/components/containers.md +0 -417
  173. package/.agents/skills/opentui/references/components/inputs.md +0 -531
  174. package/.agents/skills/opentui/references/components/text-display.md +0 -386
  175. package/.agents/skills/opentui/references/core/REFERENCE.md +0 -145
  176. package/.agents/skills/opentui/references/core/api.md +0 -543
  177. package/.agents/skills/opentui/references/core/configuration.md +0 -168
  178. package/.agents/skills/opentui/references/core/gotchas.md +0 -393
  179. package/.agents/skills/opentui/references/core/patterns.md +0 -449
  180. package/.agents/skills/opentui/references/keyboard/REFERENCE.md +0 -617
  181. package/.agents/skills/opentui/references/layout/REFERENCE.md +0 -337
  182. package/.agents/skills/opentui/references/layout/patterns.md +0 -444
  183. package/.agents/skills/opentui/references/react/REFERENCE.md +0 -174
  184. package/.agents/skills/opentui/references/react/api.md +0 -436
  185. package/.agents/skills/opentui/references/react/configuration.md +0 -302
  186. package/.agents/skills/opentui/references/react/gotchas.md +0 -443
  187. package/.agents/skills/opentui/references/react/patterns.md +0 -501
  188. package/.agents/skills/opentui/references/solid/REFERENCE.md +0 -201
  189. package/.agents/skills/opentui/references/solid/api.md +0 -564
  190. package/.agents/skills/opentui/references/solid/configuration.md +0 -316
  191. package/.agents/skills/opentui/references/solid/gotchas.md +0 -427
  192. package/.agents/skills/opentui/references/solid/patterns.md +0 -560
  193. package/.agents/skills/opentui/references/testing/REFERENCE.md +0 -614
  194. package/.agents/skills/pdf/LICENSE.txt +0 -30
  195. package/.agents/skills/pdf/SKILL.md +0 -316
  196. package/.agents/skills/pdf/forms.md +0 -294
  197. package/.agents/skills/pdf/reference.md +0 -612
  198. package/.agents/skills/pdf/scripts/check_bounding_boxes.py +0 -65
  199. package/.agents/skills/pdf/scripts/check_fillable_fields.py +0 -11
  200. package/.agents/skills/pdf/scripts/convert_pdf_to_images.py +0 -33
  201. package/.agents/skills/pdf/scripts/create_validation_image.py +0 -37
  202. package/.agents/skills/pdf/scripts/extract_form_field_info.py +0 -122
  203. package/.agents/skills/pdf/scripts/extract_form_structure.py +0 -115
  204. package/.agents/skills/pdf/scripts/fill_fillable_fields.py +0 -98
  205. package/.agents/skills/pdf/scripts/fill_pdf_form_with_annotations.py +0 -107
  206. package/.agents/skills/playwright-cli/SKILL.md +0 -390
  207. package/.agents/skills/playwright-cli/references/element-attributes.md +0 -23
  208. package/.agents/skills/playwright-cli/references/playwright-tests.md +0 -39
  209. package/.agents/skills/playwright-cli/references/request-mocking.md +0 -87
  210. package/.agents/skills/playwright-cli/references/running-code.md +0 -241
  211. package/.agents/skills/playwright-cli/references/session-management.md +0 -225
  212. package/.agents/skills/playwright-cli/references/spec-driven-testing.md +0 -305
  213. package/.agents/skills/playwright-cli/references/storage-state.md +0 -275
  214. package/.agents/skills/playwright-cli/references/test-generation.md +0 -134
  215. package/.agents/skills/playwright-cli/references/tracing.md +0 -139
  216. package/.agents/skills/playwright-cli/references/video-recording.md +0 -143
  217. package/.agents/skills/pptx/LICENSE.txt +0 -30
  218. package/.agents/skills/pptx/SKILL.md +0 -234
  219. package/.agents/skills/pptx/editing.md +0 -205
  220. package/.agents/skills/pptx/pptxgenjs.md +0 -420
  221. package/.agents/skills/pptx/scripts/__init__.py +0 -0
  222. package/.agents/skills/pptx/scripts/add_slide.py +0 -195
  223. package/.agents/skills/pptx/scripts/clean.py +0 -286
  224. package/.agents/skills/pptx/scripts/office/helpers/__init__.py +0 -0
  225. package/.agents/skills/pptx/scripts/office/helpers/merge_runs.py +0 -199
  226. package/.agents/skills/pptx/scripts/office/helpers/simplify_redlines.py +0 -197
  227. package/.agents/skills/pptx/scripts/office/pack.py +0 -159
  228. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  229. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  230. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  231. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  232. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  233. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  234. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  235. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  236. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  237. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  238. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  239. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  240. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  241. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  242. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  243. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  244. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  245. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  246. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  247. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  248. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  249. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  250. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  251. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  252. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  253. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  254. package/.agents/skills/pptx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  255. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  256. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  257. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  258. package/.agents/skills/pptx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  259. package/.agents/skills/pptx/scripts/office/schemas/mce/mc.xsd +0 -75
  260. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  261. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  262. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  263. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  264. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  265. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  266. package/.agents/skills/pptx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  267. package/.agents/skills/pptx/scripts/office/soffice.py +0 -183
  268. package/.agents/skills/pptx/scripts/office/unpack.py +0 -132
  269. package/.agents/skills/pptx/scripts/office/validate.py +0 -111
  270. package/.agents/skills/pptx/scripts/office/validators/__init__.py +0 -15
  271. package/.agents/skills/pptx/scripts/office/validators/base.py +0 -847
  272. package/.agents/skills/pptx/scripts/office/validators/docx.py +0 -446
  273. package/.agents/skills/pptx/scripts/office/validators/pptx.py +0 -275
  274. package/.agents/skills/pptx/scripts/office/validators/redlining.py +0 -247
  275. package/.agents/skills/pptx/scripts/thumbnail.py +0 -289
  276. package/.agents/skills/project-development/SKILL.md +0 -293
  277. package/.agents/skills/project-development/references/case-studies.md +0 -388
  278. package/.agents/skills/project-development/references/pipeline-patterns.md +0 -610
  279. package/.agents/skills/project-development/scripts/pipeline_template.py +0 -796
  280. package/.agents/skills/prompt-engineer/SKILL.md +0 -265
  281. package/.agents/skills/prompt-engineer/references/advanced_patterns.md +0 -271
  282. package/.agents/skills/prompt-engineer/references/core_prompting.md +0 -137
  283. package/.agents/skills/prompt-engineer/references/quality_improvement.md +0 -193
  284. package/.agents/skills/research-codebase/SKILL.md +0 -229
  285. package/.agents/skills/ripgrep/SKILL.md +0 -384
  286. package/.agents/skills/skill-creator/LICENSE.txt +0 -202
  287. package/.agents/skills/skill-creator/SKILL.md +0 -487
  288. package/.agents/skills/skill-creator/agents/analyzer.md +0 -274
  289. package/.agents/skills/skill-creator/agents/comparator.md +0 -202
  290. package/.agents/skills/skill-creator/agents/grader.md +0 -223
  291. package/.agents/skills/skill-creator/assets/eval_review.html +0 -146
  292. package/.agents/skills/skill-creator/eval-viewer/generate_review.py +0 -471
  293. package/.agents/skills/skill-creator/eval-viewer/viewer.html +0 -1325
  294. package/.agents/skills/skill-creator/references/schemas.md +0 -430
  295. package/.agents/skills/skill-creator/scripts/__init__.py +0 -0
  296. package/.agents/skills/skill-creator/scripts/aggregate_benchmark.py +0 -401
  297. package/.agents/skills/skill-creator/scripts/generate_report.py +0 -326
  298. package/.agents/skills/skill-creator/scripts/improve_description.py +0 -247
  299. package/.agents/skills/skill-creator/scripts/package_skill.py +0 -136
  300. package/.agents/skills/skill-creator/scripts/quick_validate.py +0 -103
  301. package/.agents/skills/skill-creator/scripts/run_eval.py +0 -310
  302. package/.agents/skills/skill-creator/scripts/run_loop.py +0 -328
  303. package/.agents/skills/skill-creator/scripts/utils.py +0 -47
  304. package/.agents/skills/sl-commit/SKILL.md +0 -53
  305. package/.agents/skills/sl-submit-diff/SKILL.md +0 -57
  306. package/.agents/skills/tdd/SKILL.md +0 -111
  307. package/.agents/skills/tdd/deep-modules.md +0 -33
  308. package/.agents/skills/tdd/interface-design.md +0 -31
  309. package/.agents/skills/tdd/mocking.md +0 -59
  310. package/.agents/skills/tdd/refactoring.md +0 -10
  311. package/.agents/skills/tdd/tests.md +0 -61
  312. package/.agents/skills/tool-design/SKILL.md +0 -273
  313. package/.agents/skills/tool-design/references/architectural_reduction.md +0 -210
  314. package/.agents/skills/tool-design/references/best_practices.md +0 -176
  315. package/.agents/skills/tool-design/scripts/description_generator.py +0 -528
  316. package/.agents/skills/typescript-advanced-types/SKILL.md +0 -720
  317. package/.agents/skills/typescript-expert/SKILL.md +0 -434
  318. package/.agents/skills/typescript-expert/references/tsconfig-strict.json +0 -92
  319. package/.agents/skills/typescript-expert/references/typescript-cheatsheet.md +0 -383
  320. package/.agents/skills/typescript-expert/references/utility-types.ts +0 -335
  321. package/.agents/skills/typescript-expert/scripts/ts_diagnostic.py +0 -203
  322. package/.agents/skills/typescript-react-reviewer/SKILL.md +0 -201
  323. package/.agents/skills/typescript-react-reviewer/references/antipatterns.md +0 -510
  324. package/.agents/skills/typescript-react-reviewer/references/checklist.md +0 -267
  325. package/.agents/skills/typescript-react-reviewer/references/react19-patterns.md +0 -305
  326. package/.agents/skills/workflow-creator/SKILL.md +0 -553
  327. package/.agents/skills/workflow-creator/references/agent-sessions.md +0 -891
  328. package/.agents/skills/workflow-creator/references/agent-setup-recipe.md +0 -266
  329. package/.agents/skills/workflow-creator/references/computation-and-validation.md +0 -201
  330. package/.agents/skills/workflow-creator/references/control-flow.md +0 -470
  331. package/.agents/skills/workflow-creator/references/failure-modes.md +0 -1014
  332. package/.agents/skills/workflow-creator/references/getting-started.md +0 -392
  333. package/.agents/skills/workflow-creator/references/registry-and-validation.md +0 -141
  334. package/.agents/skills/workflow-creator/references/running-workflows.md +0 -418
  335. package/.agents/skills/workflow-creator/references/session-config.md +0 -431
  336. package/.agents/skills/workflow-creator/references/state-and-data-flow.md +0 -356
  337. package/.agents/skills/workflow-creator/references/user-input.md +0 -234
  338. package/.agents/skills/workflow-creator/references/workflow-inputs.md +0 -392
  339. package/.agents/skills/xlsx/LICENSE.txt +0 -30
  340. package/.agents/skills/xlsx/SKILL.md +0 -294
  341. package/.agents/skills/xlsx/scripts/office/helpers/__init__.py +0 -0
  342. package/.agents/skills/xlsx/scripts/office/helpers/merge_runs.py +0 -199
  343. package/.agents/skills/xlsx/scripts/office/helpers/simplify_redlines.py +0 -197
  344. package/.agents/skills/xlsx/scripts/office/pack.py +0 -159
  345. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chart.xsd +0 -1499
  346. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-chartDrawing.xsd +0 -146
  347. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-diagram.xsd +0 -1085
  348. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-lockedCanvas.xsd +0 -11
  349. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-main.xsd +0 -3081
  350. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-picture.xsd +0 -23
  351. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-spreadsheetDrawing.xsd +0 -185
  352. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/dml-wordprocessingDrawing.xsd +0 -287
  353. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/pml.xsd +0 -1676
  354. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-additionalCharacteristics.xsd +0 -28
  355. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-bibliography.xsd +0 -144
  356. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-commonSimpleTypes.xsd +0 -174
  357. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlDataProperties.xsd +0 -25
  358. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-customXmlSchemaProperties.xsd +0 -18
  359. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesCustom.xsd +0 -59
  360. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesExtended.xsd +0 -56
  361. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-documentPropertiesVariantTypes.xsd +0 -195
  362. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-math.xsd +0 -582
  363. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/shared-relationshipReference.xsd +0 -25
  364. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/sml.xsd +0 -4439
  365. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-main.xsd +0 -570
  366. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-officeDrawing.xsd +0 -509
  367. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-presentationDrawing.xsd +0 -12
  368. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-spreadsheetDrawing.xsd +0 -108
  369. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/vml-wordprocessingDrawing.xsd +0 -96
  370. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/wml.xsd +0 -3646
  371. package/.agents/skills/xlsx/scripts/office/schemas/ISO-IEC29500-4_2016/xml.xsd +0 -116
  372. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-contentTypes.xsd +0 -42
  373. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-coreProperties.xsd +0 -50
  374. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-digSig.xsd +0 -49
  375. package/.agents/skills/xlsx/scripts/office/schemas/ecma/fouth-edition/opc-relationships.xsd +0 -33
  376. package/.agents/skills/xlsx/scripts/office/schemas/mce/mc.xsd +0 -75
  377. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2010.xsd +0 -560
  378. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2012.xsd +0 -67
  379. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-2018.xsd +0 -14
  380. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-cex-2018.xsd +0 -20
  381. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-cid-2016.xsd +0 -13
  382. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-sdtdatahash-2020.xsd +0 -4
  383. package/.agents/skills/xlsx/scripts/office/schemas/microsoft/wml-symex-2015.xsd +0 -8
  384. package/.agents/skills/xlsx/scripts/office/soffice.py +0 -183
  385. package/.agents/skills/xlsx/scripts/office/unpack.py +0 -132
  386. package/.agents/skills/xlsx/scripts/office/validate.py +0 -111
  387. package/.agents/skills/xlsx/scripts/office/validators/__init__.py +0 -15
  388. package/.agents/skills/xlsx/scripts/office/validators/base.py +0 -847
  389. package/.agents/skills/xlsx/scripts/office/validators/docx.py +0 -446
  390. package/.agents/skills/xlsx/scripts/office/validators/pptx.py +0 -275
  391. package/.agents/skills/xlsx/scripts/office/validators/redlining.py +0 -247
  392. package/.agents/skills/xlsx/scripts/recalc.py +0 -184
  393. package/.claude/agents/code-simplifier.md +0 -52
  394. package/.claude/agents/codebase-analyzer.md +0 -166
  395. package/.claude/agents/codebase-locator.md +0 -122
  396. package/.claude/agents/codebase-online-researcher.md +0 -148
  397. package/.claude/agents/codebase-pattern-finder.md +0 -247
  398. package/.claude/agents/codebase-research-analyzer.md +0 -179
  399. package/.claude/agents/codebase-research-locator.md +0 -145
  400. package/.claude/agents/debugger.md +0 -91
  401. package/.claude/agents/orchestrator.md +0 -19
  402. package/.claude/agents/planner.md +0 -295
  403. package/.claude/agents/reviewer.md +0 -98
  404. package/.claude/agents/worker.md +0 -165
  405. package/.claude/settings.json +0 -27
  406. package/.github/agents/code-simplifier.md +0 -52
  407. package/.github/agents/codebase-analyzer.md +0 -166
  408. package/.github/agents/codebase-locator.md +0 -122
  409. package/.github/agents/codebase-online-researcher.md +0 -146
  410. package/.github/agents/codebase-pattern-finder.md +0 -247
  411. package/.github/agents/codebase-research-analyzer.md +0 -179
  412. package/.github/agents/codebase-research-locator.md +0 -145
  413. package/.github/agents/debugger.md +0 -98
  414. package/.github/agents/orchestrator.md +0 -27
  415. package/.github/agents/planner.md +0 -305
  416. package/.github/agents/reviewer.md +0 -95
  417. package/.github/agents/worker.md +0 -237
  418. package/.github/lsp.json +0 -93
  419. package/.mcp.json +0 -20
  420. package/.opencode/agents/code-simplifier.md +0 -62
  421. package/.opencode/agents/codebase-analyzer.md +0 -171
  422. package/.opencode/agents/codebase-locator.md +0 -127
  423. package/.opencode/agents/codebase-online-researcher.md +0 -152
  424. package/.opencode/agents/codebase-pattern-finder.md +0 -252
  425. package/.opencode/agents/codebase-research-analyzer.md +0 -183
  426. package/.opencode/agents/codebase-research-locator.md +0 -149
  427. package/.opencode/agents/debugger.md +0 -99
  428. package/.opencode/agents/orchestrator.md +0 -27
  429. package/.opencode/agents/planner.md +0 -309
  430. package/.opencode/agents/reviewer.md +0 -103
  431. package/.opencode/agents/worker.md +0 -165
  432. package/.opencode/opencode.json +0 -25
  433. package/README.md +0 -1624
  434. package/assets/settings.schema.json +0 -51
  435. package/dist/commands/cli/claude-inflight-hook.d.ts +0 -100
  436. package/dist/commands/cli/claude-inflight-hook.d.ts.map +0 -1
  437. package/dist/commands/cli/claude-stop-hook.d.ts +0 -80
  438. package/dist/commands/cli/claude-stop-hook.d.ts.map +0 -1
  439. package/dist/lib/atomic-temp.d.ts +0 -8
  440. package/dist/lib/atomic-temp.d.ts.map +0 -1
  441. package/dist/lib/path-root-guard.d.ts +0 -4
  442. package/dist/lib/path-root-guard.d.ts.map +0 -1
  443. package/dist/lib/spawn.d.ts +0 -102
  444. package/dist/lib/spawn.d.ts.map +0 -1
  445. package/dist/lib/terminal-env.d.ts +0 -9
  446. package/dist/lib/terminal-env.d.ts.map +0 -1
  447. package/dist/sdk/components/attached-statusline.d.ts +0 -26
  448. package/dist/sdk/components/attached-statusline.d.ts.map +0 -1
  449. package/dist/sdk/components/color-utils.d.ts +0 -4
  450. package/dist/sdk/components/color-utils.d.ts.map +0 -1
  451. package/dist/sdk/components/compact-switcher.d.ts +0 -10
  452. package/dist/sdk/components/compact-switcher.d.ts.map +0 -1
  453. package/dist/sdk/components/connectors.d.ts +0 -16
  454. package/dist/sdk/components/connectors.d.ts.map +0 -1
  455. package/dist/sdk/components/edge.d.ts +0 -4
  456. package/dist/sdk/components/edge.d.ts.map +0 -1
  457. package/dist/sdk/components/error-boundary.d.ts +0 -23
  458. package/dist/sdk/components/error-boundary.d.ts.map +0 -1
  459. package/dist/sdk/components/graph-theme.d.ts +0 -18
  460. package/dist/sdk/components/graph-theme.d.ts.map +0 -1
  461. package/dist/sdk/components/header.d.ts +0 -3
  462. package/dist/sdk/components/header.d.ts.map +0 -1
  463. package/dist/sdk/components/hooks.d.ts +0 -15
  464. package/dist/sdk/components/hooks.d.ts.map +0 -1
  465. package/dist/sdk/components/layout.d.ts +0 -27
  466. package/dist/sdk/components/layout.d.ts.map +0 -1
  467. package/dist/sdk/components/node-card.d.ts +0 -10
  468. package/dist/sdk/components/node-card.d.ts.map +0 -1
  469. package/dist/sdk/components/orchestrator-panel-contexts.d.ts +0 -16
  470. package/dist/sdk/components/orchestrator-panel-contexts.d.ts.map +0 -1
  471. package/dist/sdk/components/orchestrator-panel-store.d.ts +0 -52
  472. package/dist/sdk/components/orchestrator-panel-store.d.ts.map +0 -1
  473. package/dist/sdk/components/orchestrator-panel-types.d.ts +0 -18
  474. package/dist/sdk/components/orchestrator-panel-types.d.ts.map +0 -1
  475. package/dist/sdk/components/orchestrator-panel.d.ts +0 -86
  476. package/dist/sdk/components/orchestrator-panel.d.ts.map +0 -1
  477. package/dist/sdk/components/renderer-background.d.ts +0 -9
  478. package/dist/sdk/components/renderer-background.d.ts.map +0 -1
  479. package/dist/sdk/components/session-graph-panel.d.ts +0 -7
  480. package/dist/sdk/components/session-graph-panel.d.ts.map +0 -1
  481. package/dist/sdk/components/status-helpers.d.ts +0 -6
  482. package/dist/sdk/components/status-helpers.d.ts.map +0 -1
  483. package/dist/sdk/components/statusline.d.ts +0 -5
  484. package/dist/sdk/components/statusline.d.ts.map +0 -1
  485. package/dist/sdk/components/tui-diagnostics.d.ts +0 -56
  486. package/dist/sdk/components/tui-diagnostics.d.ts.map +0 -1
  487. package/dist/sdk/components/workflow-picker-panel.d.ts +0 -126
  488. package/dist/sdk/components/workflow-picker-panel.d.ts.map +0 -1
  489. package/dist/sdk/define-workflow.d.ts +0 -107
  490. package/dist/sdk/define-workflow.d.ts.map +0 -1
  491. package/dist/sdk/errors.d.ts +0 -46
  492. package/dist/sdk/errors.d.ts.map +0 -1
  493. package/dist/sdk/index.d.ts +0 -26
  494. package/dist/sdk/index.d.ts.map +0 -1
  495. package/dist/sdk/primitives/inputs.d.ts +0 -36
  496. package/dist/sdk/primitives/inputs.d.ts.map +0 -1
  497. package/dist/sdk/primitives/metadata.d.ts +0 -40
  498. package/dist/sdk/primitives/metadata.d.ts.map +0 -1
  499. package/dist/sdk/primitives/run.d.ts +0 -57
  500. package/dist/sdk/primitives/run.d.ts.map +0 -1
  501. package/dist/sdk/primitives/sessions.d.ts +0 -128
  502. package/dist/sdk/primitives/sessions.d.ts.map +0 -1
  503. package/dist/sdk/providers/claude.d.ts +0 -392
  504. package/dist/sdk/providers/claude.d.ts.map +0 -1
  505. package/dist/sdk/providers/copilot.d.ts +0 -55
  506. package/dist/sdk/providers/copilot.d.ts.map +0 -1
  507. package/dist/sdk/providers/opencode.d.ts +0 -27
  508. package/dist/sdk/providers/opencode.d.ts.map +0 -1
  509. package/dist/sdk/registry.d.ts +0 -27
  510. package/dist/sdk/registry.d.ts.map +0 -1
  511. package/dist/sdk/runtime/attached-footer.d.ts +0 -31
  512. package/dist/sdk/runtime/attached-footer.d.ts.map +0 -1
  513. package/dist/sdk/runtime/cc-debounce.d.ts +0 -29
  514. package/dist/sdk/runtime/cc-debounce.d.ts.map +0 -1
  515. package/dist/sdk/runtime/executor-env.d.ts +0 -20
  516. package/dist/sdk/runtime/executor-env.d.ts.map +0 -1
  517. package/dist/sdk/runtime/executor.d.ts +0 -265
  518. package/dist/sdk/runtime/executor.d.ts.map +0 -1
  519. package/dist/sdk/runtime/graph-inference.d.ts +0 -35
  520. package/dist/sdk/runtime/graph-inference.d.ts.map +0 -1
  521. package/dist/sdk/runtime/orchestrator-entry.d.ts +0 -26
  522. package/dist/sdk/runtime/orchestrator-entry.d.ts.map +0 -1
  523. package/dist/sdk/runtime/panel.d.ts +0 -9
  524. package/dist/sdk/runtime/panel.d.ts.map +0 -1
  525. package/dist/sdk/runtime/port-discovery.d.ts +0 -71
  526. package/dist/sdk/runtime/port-discovery.d.ts.map +0 -1
  527. package/dist/sdk/runtime/status-writer.d.ts +0 -101
  528. package/dist/sdk/runtime/status-writer.d.ts.map +0 -1
  529. package/dist/sdk/runtime/theme.d.ts +0 -33
  530. package/dist/sdk/runtime/theme.d.ts.map +0 -1
  531. package/dist/sdk/runtime/tmux.d.ts +0 -307
  532. package/dist/sdk/runtime/tmux.d.ts.map +0 -1
  533. package/dist/sdk/runtime/version-compat.d.ts +0 -28
  534. package/dist/sdk/runtime/version-compat.d.ts.map +0 -1
  535. package/dist/sdk/types.d.ts +0 -435
  536. package/dist/sdk/types.d.ts.map +0 -1
  537. package/dist/sdk/worker-shared.d.ts +0 -42
  538. package/dist/sdk/worker-shared.d.ts.map +0 -1
  539. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts +0 -81
  540. package/dist/sdk/workflows/builtin/deep-research-codebase/claude/index.d.ts.map +0 -1
  541. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts +0 -37
  542. package/dist/sdk/workflows/builtin/deep-research-codebase/copilot/index.d.ts.map +0 -1
  543. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts +0 -43
  544. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/batching.d.ts.map +0 -1
  545. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts +0 -14
  546. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.d.ts.map +0 -1
  547. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts +0 -136
  548. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.d.ts.map +0 -1
  549. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts +0 -58
  550. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scout.d.ts.map +0 -1
  551. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts +0 -43
  552. package/dist/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.d.ts.map +0 -1
  553. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts +0 -37
  554. package/dist/sdk/workflows/builtin/deep-research-codebase/opencode/index.d.ts.map +0 -1
  555. package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts +0 -68
  556. package/dist/sdk/workflows/builtin/open-claude-design/claude/index.d.ts.map +0 -1
  557. package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts +0 -56
  558. package/dist/sdk/workflows/builtin/open-claude-design/copilot/index.d.ts.map +0 -1
  559. package/dist/sdk/workflows/builtin/open-claude-design/helpers/constants.d.ts +0 -72
  560. package/dist/sdk/workflows/builtin/open-claude-design/helpers/constants.d.ts.map +0 -1
  561. package/dist/sdk/workflows/builtin/open-claude-design/helpers/design-system.d.ts +0 -46
  562. package/dist/sdk/workflows/builtin/open-claude-design/helpers/design-system.d.ts.map +0 -1
  563. package/dist/sdk/workflows/builtin/open-claude-design/helpers/export.d.ts +0 -32
  564. package/dist/sdk/workflows/builtin/open-claude-design/helpers/export.d.ts.map +0 -1
  565. package/dist/sdk/workflows/builtin/open-claude-design/helpers/import.d.ts +0 -33
  566. package/dist/sdk/workflows/builtin/open-claude-design/helpers/import.d.ts.map +0 -1
  567. package/dist/sdk/workflows/builtin/open-claude-design/helpers/prompts.d.ts +0 -106
  568. package/dist/sdk/workflows/builtin/open-claude-design/helpers/prompts.d.ts.map +0 -1
  569. package/dist/sdk/workflows/builtin/open-claude-design/helpers/scan.d.ts +0 -50
  570. package/dist/sdk/workflows/builtin/open-claude-design/helpers/scan.d.ts.map +0 -1
  571. package/dist/sdk/workflows/builtin/open-claude-design/helpers/validation.d.ts +0 -12
  572. package/dist/sdk/workflows/builtin/open-claude-design/helpers/validation.d.ts.map +0 -1
  573. package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts +0 -58
  574. package/dist/sdk/workflows/builtin/open-claude-design/opencode/index.d.ts.map +0 -1
  575. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts +0 -37
  576. package/dist/sdk/workflows/builtin/ralph/claude/index.d.ts.map +0 -1
  577. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts +0 -34
  578. package/dist/sdk/workflows/builtin/ralph/copilot/index.d.ts.map +0 -1
  579. package/dist/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.d.ts +0 -25
  580. package/dist/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.d.ts.map +0 -1
  581. package/dist/sdk/workflows/builtin/ralph/helpers/git.d.ts +0 -69
  582. package/dist/sdk/workflows/builtin/ralph/helpers/git.d.ts.map +0 -1
  583. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts +0 -266
  584. package/dist/sdk/workflows/builtin/ralph/helpers/prompts.d.ts.map +0 -1
  585. package/dist/sdk/workflows/builtin/ralph/helpers/review.d.ts +0 -24
  586. package/dist/sdk/workflows/builtin/ralph/helpers/review.d.ts.map +0 -1
  587. package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts +0 -33
  588. package/dist/sdk/workflows/builtin/ralph/opencode/index.d.ts.map +0 -1
  589. package/dist/sdk/workflows/index.d.ts +0 -32
  590. package/dist/sdk/workflows/index.d.ts.map +0 -1
  591. package/dist/services/config/additional-instructions.d.ts +0 -67
  592. package/dist/services/config/additional-instructions.d.ts.map +0 -1
  593. package/dist/services/config/atomic-config.d.ts +0 -42
  594. package/dist/services/config/atomic-config.d.ts.map +0 -1
  595. package/dist/services/config/definitions.d.ts +0 -52
  596. package/dist/services/config/definitions.d.ts.map +0 -1
  597. package/dist/services/config/index.d.ts +0 -7
  598. package/dist/services/config/index.d.ts.map +0 -1
  599. package/dist/services/config/scm-sync.d.ts +0 -37
  600. package/dist/services/config/scm-sync.d.ts.map +0 -1
  601. package/dist/services/config/settings-schema.d.ts +0 -2
  602. package/dist/services/config/settings-schema.d.ts.map +0 -1
  603. package/dist/services/system/copy.d.ts +0 -84
  604. package/dist/services/system/copy.d.ts.map +0 -1
  605. package/dist/services/system/detect.d.ts +0 -75
  606. package/dist/services/system/detect.d.ts.map +0 -1
  607. package/dist/theme/colors.d.ts +0 -35
  608. package/dist/theme/colors.d.ts.map +0 -1
  609. package/src/cli.ts +0 -397
  610. package/src/commands/builtin-registry.ts +0 -37
  611. package/src/commands/cli/chat/index.test.ts +0 -252
  612. package/src/commands/cli/chat/index.ts +0 -430
  613. package/src/commands/cli/chat.ts +0 -8
  614. package/src/commands/cli/claude-ask-hook.test.ts +0 -128
  615. package/src/commands/cli/claude-ask-hook.ts +0 -84
  616. package/src/commands/cli/claude-inflight-hook.test.ts +0 -598
  617. package/src/commands/cli/claude-inflight-hook.ts +0 -359
  618. package/src/commands/cli/claude-session-start-hook.ts +0 -61
  619. package/src/commands/cli/claude-stop-hook.test.ts +0 -317
  620. package/src/commands/cli/claude-stop-hook.ts +0 -441
  621. package/src/commands/cli/completions.ts +0 -24
  622. package/src/commands/cli/config.ts +0 -80
  623. package/src/commands/cli/footer.tsx +0 -248
  624. package/src/commands/cli/init/index.ts +0 -41
  625. package/src/commands/cli/init/onboarding.ts +0 -61
  626. package/src/commands/cli/init.ts +0 -8
  627. package/src/commands/cli/management-commands.ts +0 -112
  628. package/src/commands/cli/session.test.ts +0 -830
  629. package/src/commands/cli/session.ts +0 -447
  630. package/src/commands/cli/workflow-command.test.ts +0 -618
  631. package/src/commands/cli/workflow-inputs.test.ts +0 -353
  632. package/src/commands/cli/workflow-inputs.ts +0 -266
  633. package/src/commands/cli/workflow-list.test.ts +0 -235
  634. package/src/commands/cli/workflow-list.ts +0 -0
  635. package/src/commands/cli/workflow-status.test.ts +0 -451
  636. package/src/commands/cli/workflow-status.ts +0 -330
  637. package/src/commands/cli/workflow.ts +0 -196
  638. package/src/completions/bash.ts +0 -102
  639. package/src/completions/fish.ts +0 -136
  640. package/src/completions/index.ts +0 -7
  641. package/src/completions/powershell.ts +0 -195
  642. package/src/completions/zsh.ts +0 -150
  643. package/src/lib/atomic-temp.test.ts +0 -86
  644. package/src/lib/atomic-temp.ts +0 -62
  645. package/src/lib/common-ignore.ts +0 -46
  646. package/src/lib/merge.ts +0 -103
  647. package/src/lib/path-root-guard.ts +0 -38
  648. package/src/lib/spawn.test.ts +0 -109
  649. package/src/lib/spawn.ts +0 -678
  650. package/src/lib/terminal-env.test.ts +0 -343
  651. package/src/lib/terminal-env.ts +0 -100
  652. package/src/scripts/bump-version.ts +0 -94
  653. package/src/scripts/bundle-configs.ts +0 -116
  654. package/src/scripts/clean-dist.test.ts +0 -53
  655. package/src/scripts/clean-dist.ts +0 -37
  656. package/src/scripts/constants-base.ts +0 -14
  657. package/src/scripts/constants.ts +0 -35
  658. package/src/sdk/components/attached-statusline.tsx +0 -86
  659. package/src/sdk/components/color-utils.ts +0 -20
  660. package/src/sdk/components/compact-switcher.tsx +0 -78
  661. package/src/sdk/components/connectors.test.ts +0 -707
  662. package/src/sdk/components/connectors.ts +0 -160
  663. package/src/sdk/components/edge.tsx +0 -13
  664. package/src/sdk/components/error-boundary.tsx +0 -38
  665. package/src/sdk/components/graph-theme.ts +0 -37
  666. package/src/sdk/components/header.tsx +0 -85
  667. package/src/sdk/components/hooks.ts +0 -21
  668. package/src/sdk/components/layout.test.ts +0 -1245
  669. package/src/sdk/components/layout.ts +0 -223
  670. package/src/sdk/components/node-card.tsx +0 -91
  671. package/src/sdk/components/orchestrator-panel-contexts.ts +0 -35
  672. package/src/sdk/components/orchestrator-panel-store.test.ts +0 -847
  673. package/src/sdk/components/orchestrator-panel-store.ts +0 -187
  674. package/src/sdk/components/orchestrator-panel-types.ts +0 -23
  675. package/src/sdk/components/orchestrator-panel.tsx +0 -262
  676. package/src/sdk/components/renderer-background.ts +0 -49
  677. package/src/sdk/components/session-graph-panel.tsx +0 -471
  678. package/src/sdk/components/status-helpers.ts +0 -33
  679. package/src/sdk/components/statusline.tsx +0 -68
  680. package/src/sdk/components/tui-diagnostics.ts +0 -273
  681. package/src/sdk/components/workflow-picker-panel.tsx +0 -1613
  682. package/src/sdk/define-workflow.test.ts +0 -354
  683. package/src/sdk/define-workflow.ts +0 -275
  684. package/src/sdk/errors.test.ts +0 -83
  685. package/src/sdk/errors.ts +0 -77
  686. package/src/sdk/index.test.ts +0 -92
  687. package/src/sdk/index.ts +0 -101
  688. package/src/sdk/primitives/inputs.ts +0 -48
  689. package/src/sdk/primitives/metadata.ts +0 -63
  690. package/src/sdk/primitives/run.ts +0 -81
  691. package/src/sdk/primitives/sessions.test.ts +0 -594
  692. package/src/sdk/primitives/sessions.ts +0 -328
  693. package/src/sdk/providers/claude.ts +0 -1450
  694. package/src/sdk/providers/copilot.test.ts +0 -365
  695. package/src/sdk/providers/copilot.ts +0 -185
  696. package/src/sdk/providers/headless-hil-policy.test.ts +0 -211
  697. package/src/sdk/providers/opencode.ts +0 -88
  698. package/src/sdk/registry.ts +0 -132
  699. package/src/sdk/runtime/attached-footer.ts +0 -155
  700. package/src/sdk/runtime/cc-debounce.ts +0 -104
  701. package/src/sdk/runtime/executor-env.ts +0 -45
  702. package/src/sdk/runtime/executor.test.ts +0 -1321
  703. package/src/sdk/runtime/executor.ts +0 -2136
  704. package/src/sdk/runtime/graph-inference.ts +0 -50
  705. package/src/sdk/runtime/orchestrator-entry.ts +0 -110
  706. package/src/sdk/runtime/panel.tsx +0 -9
  707. package/src/sdk/runtime/port-discovery.test.ts +0 -573
  708. package/src/sdk/runtime/port-discovery.ts +0 -496
  709. package/src/sdk/runtime/status-writer.test.ts +0 -245
  710. package/src/sdk/runtime/status-writer.ts +0 -201
  711. package/src/sdk/runtime/theme.ts +0 -71
  712. package/src/sdk/runtime/tmux.conf +0 -112
  713. package/src/sdk/runtime/tmux.ts +0 -785
  714. package/src/sdk/runtime/version-compat.ts +0 -68
  715. package/src/sdk/types.ts +0 -548
  716. package/src/sdk/worker-shared.test.ts +0 -163
  717. package/src/sdk/worker-shared.ts +0 -155
  718. package/src/sdk/workflows/builtin/deep-research-codebase/claude/index.ts +0 -569
  719. package/src/sdk/workflows/builtin/deep-research-codebase/copilot/index.ts +0 -481
  720. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/batching.ts +0 -65
  721. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/heuristic.ts +0 -24
  722. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/ignore-by-default.d.ts +0 -8
  723. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/prompts.ts +0 -958
  724. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scout.ts +0 -505
  725. package/src/sdk/workflows/builtin/deep-research-codebase/helpers/scratch.ts +0 -115
  726. package/src/sdk/workflows/builtin/deep-research-codebase/opencode/index.ts +0 -530
  727. package/src/sdk/workflows/builtin/open-claude-design/claude/index.ts +0 -500
  728. package/src/sdk/workflows/builtin/open-claude-design/copilot/index.ts +0 -508
  729. package/src/sdk/workflows/builtin/open-claude-design/helpers/constants.ts +0 -159
  730. package/src/sdk/workflows/builtin/open-claude-design/helpers/design-system.ts +0 -88
  731. package/src/sdk/workflows/builtin/open-claude-design/helpers/export.ts +0 -193
  732. package/src/sdk/workflows/builtin/open-claude-design/helpers/import.ts +0 -52
  733. package/src/sdk/workflows/builtin/open-claude-design/helpers/prompts.ts +0 -1110
  734. package/src/sdk/workflows/builtin/open-claude-design/helpers/scan.ts +0 -117
  735. package/src/sdk/workflows/builtin/open-claude-design/helpers/validation.ts +0 -38
  736. package/src/sdk/workflows/builtin/open-claude-design/opencode/index.ts +0 -610
  737. package/src/sdk/workflows/builtin/ralph/claude/index.ts +0 -272
  738. package/src/sdk/workflows/builtin/ralph/copilot/index.ts +0 -298
  739. package/src/sdk/workflows/builtin/ralph/helpers/copilot-reviewer.ts +0 -105
  740. package/src/sdk/workflows/builtin/ralph/helpers/git.ts +0 -201
  741. package/src/sdk/workflows/builtin/ralph/helpers/prompts.ts +0 -1108
  742. package/src/sdk/workflows/builtin/ralph/helpers/review.ts +0 -33
  743. package/src/sdk/workflows/builtin/ralph/opencode/index.ts +0 -290
  744. package/src/sdk/workflows/index.ts +0 -116
  745. package/src/services/config/additional-instructions.ts +0 -273
  746. package/src/services/config/atomic-config.ts +0 -210
  747. package/src/services/config/atomic-global-config.ts +0 -348
  748. package/src/services/config/config-path.ts +0 -19
  749. package/src/services/config/definitions.ts +0 -125
  750. package/src/services/config/index.ts +0 -7
  751. package/src/services/config/scm-sync.ts +0 -185
  752. package/src/services/config/settings-schema.ts +0 -2
  753. package/src/services/config/settings.ts +0 -144
  754. package/src/services/system/agents.ts +0 -95
  755. package/src/services/system/auth.test.ts +0 -343
  756. package/src/services/system/auth.ts +0 -140
  757. package/src/services/system/auto-sync.ts +0 -128
  758. package/src/services/system/copy.ts +0 -392
  759. package/src/services/system/detect.ts +0 -161
  760. package/src/services/system/file-lock.ts +0 -289
  761. package/src/services/system/install-ui.ts +0 -296
  762. package/src/services/system/skills.ts +0 -58
  763. package/src/theme/colors.ts +0 -96
  764. package/src/theme/logo.ts +0 -123
  765. package/src/version.ts +0 -7
@@ -1,404 +0,0 @@
1
- ---
2
- name: advanced-evaluation
3
- description: This skill should be used when the user asks to "implement LLM-as-judge", "compare model outputs", "create evaluation rubrics", "mitigate evaluation bias", or mentions direct scoring, pairwise comparison, position bias, evaluation pipelines, or automated quality assessment. Part of the context engineering skill suite — also activates when the user mentions "context engineering" or "context-engineering" in the context of evaluating LLM output quality.
4
- metadata:
5
- provider: atomic
6
- ---
7
-
8
- # Advanced Evaluation
9
-
10
- This skill covers production-grade techniques for evaluating LLM outputs using LLMs as judges. It synthesizes research from academic papers, industry practices, and practical implementation experience into actionable patterns for building reliable evaluation systems.
11
-
12
- **Key insight**: LLM-as-a-Judge is not a single technique but a family of approaches, each suited to different evaluation contexts. Choosing the right approach and mitigating known biases is the core competency this skill develops.
13
-
14
- ## When to Activate
15
-
16
- Activate this skill when:
17
-
18
- - Building automated evaluation pipelines for LLM outputs
19
- - Comparing multiple model responses to select the best one
20
- - Establishing consistent quality standards across evaluation teams
21
- - Debugging evaluation systems that show inconsistent results
22
- - Designing A/B tests for prompt or model changes
23
- - Creating rubrics for human or automated evaluation
24
- - Analyzing correlation between automated and human judgments
25
-
26
- ## Core Concepts
27
-
28
- ### The Evaluation Taxonomy
29
-
30
- Select between two primary approaches based on whether ground truth exists:
31
-
32
- **Direct Scoring** — Use when objective criteria exist (factual accuracy, instruction following, toxicity). A single LLM rates one response on a defined scale. Achieves moderate-to-high reliability for well-defined criteria. Watch for score calibration drift and inconsistent scale interpretation.
33
-
34
- **Pairwise Comparison** — Use for subjective preferences (tone, style, persuasiveness). An LLM compares two responses and selects the better one. Achieves higher human-judge agreement than direct scoring for preference tasks (Zheng et al., 2023). Watch for position bias and length bias.
35
-
36
- ### The Bias Landscape
37
-
38
- Mitigate these systematic biases in every evaluation system:
39
-
40
- **Position Bias**: First-position responses get preferential treatment. Mitigate by evaluating twice with swapped positions, then apply majority vote or consistency check.
41
-
42
- **Length Bias**: Longer responses score higher regardless of quality. Mitigate by explicitly prompting to ignore length and applying length-normalized scoring.
43
-
44
- **Self-Enhancement Bias**: Models rate their own outputs higher. Mitigate by using different models for generation and evaluation.
45
-
46
- **Verbosity Bias**: Excessive detail scores higher even when unnecessary. Mitigate with criteria-specific rubrics that penalize irrelevant detail.
47
-
48
- **Authority Bias**: Confident tone scores higher regardless of accuracy. Mitigate by requiring evidence citation and adding a fact-checking layer.
49
-
50
- ### Metric Selection Framework
51
-
52
- Match metrics to the evaluation task structure:
53
-
54
- | Task Type | Primary Metrics | Secondary Metrics |
55
- |-----------|-----------------|-------------------|
56
- | Binary classification (pass/fail) | Recall, Precision, F1 | Cohen's kappa |
57
- | Ordinal scale (1-5 rating) | Spearman's rho, Kendall's tau | Cohen's kappa (weighted) |
58
- | Pairwise preference | Agreement rate, Position consistency | Confidence calibration |
59
- | Multi-label | Macro-F1, Micro-F1 | Per-label precision/recall |
60
-
61
- Prioritize systematic disagreement patterns over absolute agreement rates because a judge that consistently disagrees with humans on specific criteria is more problematic than one with random noise.
62
-
63
- ## Evaluation Approaches
64
-
65
- ### Direct Scoring Implementation
66
-
67
- Build direct scoring with three components: clear criteria, a calibrated scale, and structured output format.
68
-
69
- **Criteria Definition Pattern**:
70
- ```
71
- Criterion: [Name]
72
- Description: [What this criterion measures]
73
- Weight: [Relative importance, 0-1]
74
- ```
75
-
76
- **Scale Calibration** — Choose scale granularity based on rubric detail:
77
- - 1-3: Binary with neutral option, lowest cognitive load
78
- - 1-5: Standard Likert, best balance of granularity and reliability
79
- - 1-10: Use only with detailed per-level rubrics because calibration is harder
80
-
81
- **Prompt Structure for Direct Scoring**:
82
- ```
83
- You are an expert evaluator assessing response quality.
84
-
85
- ## Task
86
- Evaluate the following response against each criterion.
87
-
88
- ## Original Prompt
89
- {prompt}
90
-
91
- ## Response to Evaluate
92
- {response}
93
-
94
- ## Criteria
95
- {for each criterion: name, description, weight}
96
-
97
- ## Instructions
98
- For each criterion:
99
- 1. Find specific evidence in the response
100
- 2. Score according to the rubric (1-{max} scale)
101
- 3. Justify your score with evidence
102
- 4. Suggest one specific improvement
103
-
104
- ## Output Format
105
- Respond with structured JSON containing scores, justifications, and summary.
106
- ```
107
-
108
- Always require justification before the score in all scoring prompts because research shows this improves reliability by 15-25% compared to score-first approaches.
109
-
110
- ### Pairwise Comparison Implementation
111
-
112
- Apply position bias mitigation in every pairwise evaluation:
113
-
114
- 1. First pass: Response A in first position, Response B in second
115
- 2. Second pass: Response B in first position, Response A in second
116
- 3. Consistency check: If passes disagree, return TIE with reduced confidence
117
- 4. Final verdict: Consistent winner with averaged confidence
118
-
119
- **Prompt Structure for Pairwise Comparison**:
120
- ```
121
- You are an expert evaluator comparing two AI responses.
122
-
123
- ## Critical Instructions
124
- - Do NOT prefer responses because they are longer
125
- - Do NOT prefer responses based on position (first vs second)
126
- - Focus ONLY on quality according to the specified criteria
127
- - Ties are acceptable when responses are genuinely equivalent
128
-
129
- ## Original Prompt
130
- {prompt}
131
-
132
- ## Response A
133
- {response_a}
134
-
135
- ## Response B
136
- {response_b}
137
-
138
- ## Comparison Criteria
139
- {criteria list}
140
-
141
- ## Instructions
142
- 1. Analyze each response independently first
143
- 2. Compare them on each criterion
144
- 3. Determine overall winner with confidence level
145
-
146
- ## Output Format
147
- JSON with per-criterion comparison, overall winner, confidence (0-1), and reasoning.
148
- ```
149
-
150
- **Confidence Calibration** — Map confidence to position consistency:
151
- - Both passes agree: confidence = average of individual confidences
152
- - Passes disagree: confidence = 0.5, verdict = TIE
153
-
154
- ### Rubric Generation
155
-
156
- Generate rubrics to reduce evaluation variance by 40-60% compared to open-ended scoring.
157
-
158
- **Include these rubric components**:
159
- 1. **Level descriptions**: Clear boundaries for each score level
160
- 2. **Characteristics**: Observable features that define each level
161
- 3. **Examples**: Representative text for each level (optional but valuable)
162
- 4. **Edge cases**: Guidance for ambiguous situations
163
- 5. **Scoring guidelines**: General principles for consistent application
164
-
165
- **Set strictness calibration** for the use case:
166
- - **Lenient**: Lower passing bar, appropriate for encouraging iteration
167
- - **Balanced**: Typical production expectations
168
- - **Strict**: High standards for safety-critical or high-stakes evaluation
169
-
170
- Adapt rubrics to the domain — use domain-specific terminology. A code readability rubric mentions variables, functions, and comments. A medical accuracy rubric references clinical terminology and evidence standards.
171
-
172
- ## Practical Guidance
173
-
174
- ### Evaluation Pipeline Design
175
-
176
- Build production evaluation systems with these layers: Criteria Loader (rubrics + weights) -> Primary Scorer (direct or pairwise) -> Bias Mitigation (position swap, etc.) -> Confidence Scoring (calibration) -> Output (scores + justifications + confidence). See [Evaluation Pipeline Diagram](./references/evaluation-pipeline.md) for the full visual layout.
177
-
178
- ### Decision Framework: Direct vs. Pairwise
179
-
180
- Apply this decision tree:
181
-
182
- ```
183
- Is there an objective ground truth?
184
- +-- Yes -> Direct Scoring
185
- | Examples: factual accuracy, instruction following, format compliance
186
- |
187
- +-- No -> Is it a preference or quality judgment?
188
- +-- Yes -> Pairwise Comparison
189
- | Examples: tone, style, persuasiveness, creativity
190
- |
191
- +-- No -> Consider reference-based evaluation
192
- Examples: summarization (compare to source), translation (compare to reference)
193
- ```
194
-
195
- ### Scaling Evaluation
196
-
197
- For high-volume evaluation, apply one of these strategies:
198
-
199
- 1. **Panel of LLMs (PoLL)**: Use multiple models as judges and aggregate votes to reduce individual model bias. More expensive but more reliable for high-stakes decisions.
200
-
201
- 2. **Hierarchical evaluation**: Use a fast cheap model for screening and an expensive model for edge cases. Requires calibration of the screening threshold.
202
-
203
- 3. **Human-in-the-loop**: Automate clear cases and route low-confidence decisions to human review. Design feedback loops to improve automated evaluation over time.
204
-
205
- ## Examples
206
-
207
- ### Example 1: Direct Scoring for Accuracy
208
-
209
- **Input**:
210
- ```
211
- Prompt: "What causes seasons on Earth?"
212
- Response: "Seasons are caused by Earth's tilted axis. As Earth orbits the Sun,
213
- different hemispheres receive more direct sunlight at different times of year."
214
- Criterion: Factual Accuracy (weight: 1.0)
215
- Scale: 1-5
216
- ```
217
-
218
- **Output**:
219
- ```json
220
- {
221
- "criterion": "Factual Accuracy",
222
- "score": 5,
223
- "evidence": [
224
- "Correctly identifies axial tilt as primary cause",
225
- "Correctly explains differential sunlight by hemisphere",
226
- "No factual errors present"
227
- ],
228
- "justification": "Response accurately explains the cause of seasons with correct
229
- scientific reasoning. Both the axial tilt and its effect on sunlight distribution
230
- are correctly described.",
231
- "improvement": "Could add the specific tilt angle (23.5 degrees) for completeness."
232
- }
233
- ```
234
-
235
- ### Example 2: Pairwise Comparison with Position Swap
236
-
237
- **Input**:
238
- ```
239
- Prompt: "Explain machine learning to a beginner"
240
- Response A: [Technical explanation with jargon]
241
- Response B: [Simple analogy-based explanation]
242
- Criteria: ["clarity", "accessibility"]
243
- ```
244
-
245
- **First Pass (A first)**:
246
- ```json
247
- { "winner": "B", "confidence": 0.8 }
248
- ```
249
-
250
- **Second Pass (B first)**:
251
- ```json
252
- { "winner": "A", "confidence": 0.6 }
253
- ```
254
- (Note: Winner is A because B was in first position)
255
-
256
- **Mapped Second Pass**:
257
- ```json
258
- { "winner": "B", "confidence": 0.6 }
259
- ```
260
-
261
- **Final Result**:
262
- ```json
263
- {
264
- "winner": "B",
265
- "confidence": 0.7,
266
- "positionConsistency": {
267
- "consistent": true,
268
- "firstPassWinner": "B",
269
- "secondPassWinner": "B"
270
- }
271
- }
272
- ```
273
-
274
- ### Example 3: Rubric Generation
275
-
276
- **Input**:
277
- ```
278
- criterionName: "Code Readability"
279
- criterionDescription: "How easy the code is to understand and maintain"
280
- domain: "software engineering"
281
- scale: "1-5"
282
- strictness: "balanced"
283
- ```
284
-
285
- **Output** (abbreviated):
286
- ```json
287
- {
288
- "levels": [
289
- {
290
- "score": 1,
291
- "label": "Poor",
292
- "description": "Code is difficult to understand without significant effort",
293
- "characteristics": [
294
- "No meaningful variable or function names",
295
- "No comments or documentation",
296
- "Deeply nested or convoluted logic"
297
- ]
298
- },
299
- {
300
- "score": 3,
301
- "label": "Adequate",
302
- "description": "Code is understandable with some effort",
303
- "characteristics": [
304
- "Most variables have meaningful names",
305
- "Basic comments present for complex sections",
306
- "Logic is followable but could be cleaner"
307
- ]
308
- },
309
- {
310
- "score": 5,
311
- "label": "Excellent",
312
- "description": "Code is immediately clear and maintainable",
313
- "characteristics": [
314
- "All names are descriptive and consistent",
315
- "Comprehensive documentation",
316
- "Clean, modular structure"
317
- ]
318
- }
319
- ],
320
- "edgeCases": [
321
- {
322
- "situation": "Code is well-structured but uses domain-specific abbreviations",
323
- "guidance": "Score based on readability for domain experts, not general audience"
324
- }
325
- ]
326
- }
327
- ```
328
-
329
- ## Guidelines
330
-
331
- 1. **Always require justification before scores** - Chain-of-thought prompting improves reliability by 15-25%
332
-
333
- 2. **Always swap positions in pairwise comparison** - Single-pass comparison is corrupted by position bias
334
-
335
- 3. **Match scale granularity to rubric specificity** - Don't use 1-10 without detailed level descriptions
336
-
337
- 4. **Separate objective and subjective criteria** - Use direct scoring for objective, pairwise for subjective
338
-
339
- 5. **Include confidence scores** - Calibrate to position consistency and evidence strength
340
-
341
- 6. **Define edge cases explicitly** - Ambiguous situations cause the most evaluation variance
342
-
343
- 7. **Use domain-specific rubrics** - Generic rubrics produce generic (less useful) evaluations
344
-
345
- 8. **Validate against human judgments** - Automated evaluation is only valuable if it correlates with human assessment
346
-
347
- 9. **Monitor for systematic bias** - Track disagreement patterns by criterion, response type, model
348
-
349
- 10. **Design for iteration** - Evaluation systems improve with feedback loops
350
-
351
- ## Gotchas
352
-
353
- 1. **Scoring without justification**: Scores lack grounding and are difficult to debug. Always require evidence-based justification before the score.
354
-
355
- 2. **Single-pass pairwise comparison**: Position bias corrupts results when positions are not swapped. Always evaluate twice with swapped positions and check consistency.
356
-
357
- 3. **Overloaded criteria**: Criteria that measure multiple things at once produce unreliable scores. Enforce one criterion = one measurable aspect.
358
-
359
- 4. **Missing edge case guidance**: Evaluators handle ambiguous cases inconsistently without explicit instructions. Include edge cases in rubrics with clear resolution rules.
360
-
361
- 5. **Ignoring confidence calibration**: High-confidence wrong judgments are worse than low-confidence ones. Calibrate confidence to position consistency and evidence strength.
362
-
363
- 6. **Rubric drift**: Rubrics become miscalibrated as quality standards evolve or model capabilities improve. Schedule periodic rubric reviews and re-anchor score levels against fresh human-annotated examples.
364
-
365
- 7. **Evaluation prompt sensitivity**: Minor wording changes in evaluation prompts (e.g., reordering instructions, changing phrasing) can cause 10-20% score swings. Version-control evaluation prompts and run regression tests before deploying prompt changes.
366
-
367
- 8. **Uncontrolled length bias**: Longer responses systematically score higher even when conciseness is preferred. Add explicit length-neutrality instructions to evaluation prompts and validate with length-controlled test pairs.
368
-
369
- ## Integration
370
-
371
- This skill integrates with:
372
-
373
- - **context-fundamentals** - Evaluation prompts require effective context structure
374
- - **tool-design** - Evaluation tools need proper schemas and error handling
375
- - **context-optimization** - Evaluation prompts can be optimized for token efficiency
376
- - **evaluation** (foundational) - This skill extends the foundational evaluation concepts
377
-
378
- ## References
379
-
380
- Internal reference:
381
- - [LLM-as-Judge Implementation Patterns](./references/implementation-patterns.md) - Read when: building an evaluation pipeline from scratch or integrating LLM judges into CI/CD
382
- - [Bias Mitigation Techniques](./references/bias-mitigation.md) - Read when: evaluation results show inconsistent or suspicious scoring patterns
383
- - [Metric Selection Guide](./references/metrics-guide.md) - Read when: choosing statistical metrics to validate evaluation reliability
384
- - [Evaluation Pipeline Diagram](./references/evaluation-pipeline.md) - Read when: designing the architecture of a multi-stage evaluation system
385
-
386
- External research:
387
- - [Eugene Yan: Evaluating the Effectiveness of LLM-Evaluators](https://eugeneyan.com/writing/llm-evaluators/) - Read when: surveying the state of the art in LLM evaluation
388
- - [Judging LLM-as-a-Judge (Zheng et al., 2023)](https://arxiv.org/abs/2306.05685) - Read when: understanding position bias and MT-Bench methodology
389
- - [G-Eval: NLG Evaluation using GPT-4 (Liu et al., 2023)](https://arxiv.org/abs/2303.16634) - Read when: implementing chain-of-thought evaluation scoring
390
- - [Large Language Models are not Fair Evaluators (Wang et al., 2023)](https://arxiv.org/abs/2305.17926) - Read when: diagnosing systematic bias in evaluation outputs
391
-
392
- Related skills in this collection:
393
- - evaluation - Foundational evaluation concepts
394
- - context-fundamentals - Context structure for evaluation prompts
395
- - tool-design - Building evaluation tools
396
-
397
- ---
398
-
399
- ## Skill Metadata
400
-
401
- **Created**: 2025-12-24
402
- **Last Updated**: 2026-03-17
403
- **Author**: Agent Skills for Context Engineering Contributors
404
- **Version**: 2.0.0
@@ -1,288 +0,0 @@
1
- # Bias Mitigation Techniques for LLM Evaluation
2
-
3
- This reference details specific techniques for mitigating known biases in LLM-as-a-Judge systems.
4
-
5
- ## Position Bias
6
-
7
- ### The Problem
8
-
9
- In pairwise comparison, LLMs systematically prefer responses in certain positions. Research shows:
10
- - GPT has mild first-position bias (~55% preference for first position in ties)
11
- - Claude shows similar patterns
12
- - Smaller models often show stronger bias
13
-
14
- ### Mitigation: Position Swapping Protocol
15
-
16
- ```python
17
- async def position_swap_comparison(response_a, response_b, prompt, criteria):
18
- # Pass 1: Original order
19
- result_ab = await compare(response_a, response_b, prompt, criteria)
20
-
21
- # Pass 2: Swapped order
22
- result_ba = await compare(response_b, response_a, prompt, criteria)
23
-
24
- # Map second result (A in second position → B in first)
25
- result_ba_mapped = {
26
- 'winner': {'A': 'B', 'B': 'A', 'TIE': 'TIE'}[result_ba['winner']],
27
- 'confidence': result_ba['confidence']
28
- }
29
-
30
- # Consistency check
31
- if result_ab['winner'] == result_ba_mapped['winner']:
32
- return {
33
- 'winner': result_ab['winner'],
34
- 'confidence': (result_ab['confidence'] + result_ba_mapped['confidence']) / 2,
35
- 'position_consistent': True
36
- }
37
- else:
38
- # Disagreement indicates position bias was a factor
39
- return {
40
- 'winner': 'TIE',
41
- 'confidence': 0.5,
42
- 'position_consistent': False,
43
- 'bias_detected': True
44
- }
45
- ```
46
-
47
- ### Alternative: Multiple Shuffles
48
-
49
- For higher reliability, use multiple position orderings:
50
-
51
- ```python
52
- async def multi_shuffle_comparison(response_a, response_b, prompt, criteria, n_shuffles=3):
53
- results = []
54
- for i in range(n_shuffles):
55
- if i % 2 == 0:
56
- r = await compare(response_a, response_b, prompt, criteria)
57
- else:
58
- r = await compare(response_b, response_a, prompt, criteria)
59
- r['winner'] = {'A': 'B', 'B': 'A', 'TIE': 'TIE'}[r['winner']]
60
- results.append(r)
61
-
62
- # Majority vote
63
- winners = [r['winner'] for r in results]
64
- final_winner = max(set(winners), key=winners.count)
65
- agreement = winners.count(final_winner) / len(winners)
66
-
67
- return {
68
- 'winner': final_winner,
69
- 'confidence': agreement,
70
- 'n_shuffles': n_shuffles
71
- }
72
- ```
73
-
74
- ## Length Bias
75
-
76
- ### The Problem
77
-
78
- LLMs tend to rate longer responses higher, regardless of quality. This manifests as:
79
- - Verbose responses receiving inflated scores
80
- - Concise but complete responses penalized
81
- - Padding and repetition being rewarded
82
-
83
- ### Mitigation: Explicit Prompting
84
-
85
- Include anti-length-bias instructions in the prompt:
86
-
87
- ```
88
- CRITICAL EVALUATION GUIDELINES:
89
- - Do NOT prefer responses because they are longer
90
- - Concise, complete answers are as valuable as detailed ones
91
- - Penalize unnecessary verbosity or repetition
92
- - Focus on information density, not word count
93
- ```
94
-
95
- ### Mitigation: Length-Normalized Scoring
96
-
97
- ```python
98
- def length_normalized_score(score, response_length, target_length=500):
99
- """Adjust score based on response length."""
100
- length_ratio = response_length / target_length
101
-
102
- if length_ratio > 2.0:
103
- # Penalize excessively long responses
104
- penalty = (length_ratio - 2.0) * 0.1
105
- return max(score - penalty, 1)
106
- elif length_ratio < 0.3:
107
- # Penalize excessively short responses
108
- penalty = (0.3 - length_ratio) * 0.5
109
- return max(score - penalty, 1)
110
- else:
111
- return score
112
- ```
113
-
114
- ### Mitigation: Separate Length Criterion
115
-
116
- Make length a separate, explicit criterion so it's not implicitly rewarded:
117
-
118
- ```python
119
- criteria = [
120
- {"name": "Accuracy", "description": "Factual correctness", "weight": 0.4},
121
- {"name": "Completeness", "description": "Covers key points", "weight": 0.3},
122
- {"name": "Conciseness", "description": "No unnecessary content", "weight": 0.3} # Explicit
123
- ]
124
- ```
125
-
126
- ## Self-Enhancement Bias
127
-
128
- ### The Problem
129
-
130
- Models rate outputs generated by themselves (or similar models) higher than outputs from different models.
131
-
132
- ### Mitigation: Cross-Model Evaluation
133
-
134
- Use a different model family for evaluation than generation:
135
-
136
- ```python
137
- def get_evaluator_model(generator_model):
138
- """Select evaluator to avoid self-enhancement bias."""
139
- if 'gpt' in generator_model.lower():
140
- return 'claude-4-5-sonnet'
141
- elif 'claude' in generator_model.lower():
142
- return 'gpt-5.2'
143
- else:
144
- return 'gpt-5.2' # Default
145
- ```
146
-
147
- ### Mitigation: Blind Evaluation
148
-
149
- Remove model attribution from responses before evaluation:
150
-
151
- ```python
152
- def anonymize_response(response, model_name):
153
- """Remove model-identifying patterns."""
154
- patterns = [
155
- f"As {model_name}",
156
- "I am an AI",
157
- "I don't have personal opinions",
158
- # Model-specific patterns
159
- ]
160
- anonymized = response
161
- for pattern in patterns:
162
- anonymized = anonymized.replace(pattern, "[REDACTED]")
163
- return anonymized
164
- ```
165
-
166
- ## Verbosity Bias
167
-
168
- ### The Problem
169
-
170
- Detailed explanations receive higher scores even when the extra detail is irrelevant or incorrect.
171
-
172
- ### Mitigation: Relevance-Weighted Scoring
173
-
174
- ```python
175
- async def relevance_weighted_evaluation(response, prompt, criteria):
176
- # First, assess relevance of each segment
177
- relevance_scores = await assess_relevance(response, prompt)
178
-
179
- # Weight evaluation by relevance
180
- segments = split_into_segments(response)
181
- weighted_scores = []
182
- for segment, relevance in zip(segments, relevance_scores):
183
- if relevance > 0.5: # Only count relevant segments
184
- score = await evaluate_segment(segment, prompt, criteria)
185
- weighted_scores.append(score * relevance)
186
-
187
- return sum(weighted_scores) / len(weighted_scores)
188
- ```
189
-
190
- ### Mitigation: Rubric with Verbosity Penalty
191
-
192
- Include explicit verbosity penalties in rubrics:
193
-
194
- ```python
195
- rubric_levels = [
196
- {
197
- "score": 5,
198
- "description": "Complete and concise. All necessary information, nothing extraneous.",
199
- "characteristics": ["Every sentence adds value", "No repetition", "Appropriately scoped"]
200
- },
201
- {
202
- "score": 3,
203
- "description": "Complete but verbose. Contains unnecessary detail or repetition.",
204
- "characteristics": ["Main points covered", "Some tangents", "Could be more concise"]
205
- },
206
- # ... etc
207
- ]
208
- ```
209
-
210
- ## Authority Bias
211
-
212
- ### The Problem
213
-
214
- Confident, authoritative tone is rated higher regardless of accuracy.
215
-
216
- ### Mitigation: Evidence Requirement
217
-
218
- Require explicit evidence for claims:
219
-
220
- ```
221
- For each claim in the response:
222
- 1. Identify whether it's a factual claim
223
- 2. Note if evidence or sources are provided
224
- 3. Score based on verifiability, not confidence
225
-
226
- IMPORTANT: Confident claims without evidence should NOT receive higher scores than
227
- hedged claims with evidence.
228
- ```
229
-
230
- ### Mitigation: Fact-Checking Layer
231
-
232
- Add a fact-checking step before scoring:
233
-
234
- ```python
235
- async def fact_checked_evaluation(response, prompt, criteria):
236
- # Extract claims
237
- claims = await extract_claims(response)
238
-
239
- # Fact-check each claim
240
- fact_check_results = await asyncio.gather(*[
241
- verify_claim(claim) for claim in claims
242
- ])
243
-
244
- # Adjust score based on fact-check results
245
- accuracy_factor = sum(r['verified'] for r in fact_check_results) / len(fact_check_results)
246
-
247
- base_score = await evaluate(response, prompt, criteria)
248
- return base_score * (0.7 + 0.3 * accuracy_factor) # At least 70% of score
249
- ```
250
-
251
- ## Aggregate Bias Detection
252
-
253
- Monitor for systematic biases in production:
254
-
255
- ```python
256
- class BiasMonitor:
257
- def __init__(self):
258
- self.evaluations = []
259
-
260
- def record(self, evaluation):
261
- self.evaluations.append(evaluation)
262
-
263
- def detect_position_bias(self):
264
- """Detect if first position wins more often than expected."""
265
- first_wins = sum(1 for e in self.evaluations if e['first_position_winner'])
266
- expected = len(self.evaluations) * 0.5
267
- z_score = (first_wins - expected) / (expected * 0.5) ** 0.5
268
- return {'bias_detected': abs(z_score) > 2, 'z_score': z_score}
269
-
270
- def detect_length_bias(self):
271
- """Detect if longer responses score higher."""
272
- from scipy.stats import spearmanr
273
- lengths = [e['response_length'] for e in self.evaluations]
274
- scores = [e['score'] for e in self.evaluations]
275
- corr, p_value = spearmanr(lengths, scores)
276
- return {'bias_detected': corr > 0.3 and p_value < 0.05, 'correlation': corr}
277
- ```
278
-
279
- ## Summary Table
280
-
281
- | Bias | Primary Mitigation | Secondary Mitigation | Detection Method |
282
- |------|-------------------|---------------------|------------------|
283
- | Position | Position swapping | Multiple shuffles | Consistency check |
284
- | Length | Explicit prompting | Length normalization | Length-score correlation |
285
- | Self-enhancement | Cross-model evaluation | Anonymization | Model comparison study |
286
- | Verbosity | Relevance weighting | Rubric penalties | Relevance scoring |
287
- | Authority | Evidence requirement | Fact-checking layer | Confidence-accuracy correlation |
288
-