@shakudo/kaji-setup-external 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (411) hide show
  1. package/README.md +155 -0
  2. package/assets/skills/ci-cd/.claude-plugin/plugin.json +8 -0
  3. package/assets/skills/ci-cd/SKILL.md +573 -0
  4. package/assets/skills/ci-cd/assets/templates/github-actions/docker-build.yml +164 -0
  5. package/assets/skills/ci-cd/assets/templates/github-actions/go-ci.yml +420 -0
  6. package/assets/skills/ci-cd/assets/templates/github-actions/node-ci.yml +313 -0
  7. package/assets/skills/ci-cd/assets/templates/github-actions/python-ci.yml +388 -0
  8. package/assets/skills/ci-cd/assets/templates/github-actions/security-scan.yml +416 -0
  9. package/assets/skills/ci-cd/assets/templates/gitlab-ci/docker-build.yml +298 -0
  10. package/assets/skills/ci-cd/assets/templates/gitlab-ci/go-ci.yml +548 -0
  11. package/assets/skills/ci-cd/assets/templates/gitlab-ci/node-ci.yml +334 -0
  12. package/assets/skills/ci-cd/assets/templates/gitlab-ci/python-ci.yml +472 -0
  13. package/assets/skills/ci-cd/assets/templates/gitlab-ci/security-scan.yml +479 -0
  14. package/assets/skills/ci-cd/references/best_practices.md +675 -0
  15. package/assets/skills/ci-cd/references/devsecops.md +862 -0
  16. package/assets/skills/ci-cd/references/optimization.md +651 -0
  17. package/assets/skills/ci-cd/references/security.md +611 -0
  18. package/assets/skills/ci-cd/references/troubleshooting.md +656 -0
  19. package/assets/skills/ci-cd/scripts/ci_health.py +301 -0
  20. package/assets/skills/ci-cd/scripts/pipeline_analyzer.py +440 -0
  21. package/assets/skills/context-optimization/CONTRIBUTING.md +78 -0
  22. package/assets/skills/context-optimization/LICENSE +22 -0
  23. package/assets/skills/context-optimization/README.md +228 -0
  24. package/assets/skills/context-optimization/SKILL.md +104 -0
  25. package/assets/skills/context-optimization/docs/agentskills.md +1264 -0
  26. package/assets/skills/context-optimization/docs/blogs.md +1230 -0
  27. package/assets/skills/context-optimization/docs/claude_research.md +85 -0
  28. package/assets/skills/context-optimization/docs/compression.md +298 -0
  29. package/assets/skills/context-optimization/docs/gemini_research.md +22 -0
  30. package/assets/skills/context-optimization/docs/hncapsule.md +92 -0
  31. package/assets/skills/context-optimization/docs/netflix_context.md +10 -0
  32. package/assets/skills/context-optimization/docs/vercel_tool.md +140 -0
  33. package/assets/skills/context-optimization/examples/book-sft-pipeline/README.md +78 -0
  34. package/assets/skills/context-optimization/examples/book-sft-pipeline/SKILL.md +380 -0
  35. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/README.md +168 -0
  36. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/dataset_sample.jsonl +5 -0
  37. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/pangram/Screenshot 2025-12-27 at 3.05.04/342/200/257AM.png +0 -0
  38. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/pangram/Screenshot 2025-12-27 at 3.05.36/342/200/257AM.png +0 -0
  39. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/pangram/Screenshot 2025-12-27 at 3.07.18/342/200/257AM.png +0 -0
  40. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/sample_outputs.md +63 -0
  41. package/assets/skills/context-optimization/examples/book-sft-pipeline/examples/gertrude-stein/training_config.json +80 -0
  42. package/assets/skills/context-optimization/examples/book-sft-pipeline/references/segmentation-strategies.md +324 -0
  43. package/assets/skills/context-optimization/examples/book-sft-pipeline/references/tinker-format.md +211 -0
  44. package/assets/skills/context-optimization/examples/book-sft-pipeline/references/tinker.txt +3176 -0
  45. package/assets/skills/context-optimization/examples/book-sft-pipeline/scripts/pipeline_example.py +187 -0
  46. package/assets/skills/context-optimization/examples/digital-brain-skill/AGENT.md +35 -0
  47. package/assets/skills/context-optimization/examples/digital-brain-skill/HOW-SKILLS-BUILT-THIS.md +407 -0
  48. package/assets/skills/context-optimization/examples/digital-brain-skill/README.md +209 -0
  49. package/assets/skills/context-optimization/examples/digital-brain-skill/SKILL.md +203 -0
  50. package/assets/skills/context-optimization/examples/digital-brain-skill/SKILLS-MAPPING.md +219 -0
  51. package/assets/skills/context-optimization/examples/digital-brain-skill/agents/AGENTS.md +82 -0
  52. package/assets/skills/context-optimization/examples/digital-brain-skill/agents/scripts/content_ideas.py +132 -0
  53. package/assets/skills/context-optimization/examples/digital-brain-skill/agents/scripts/idea_to_draft.py +181 -0
  54. package/assets/skills/context-optimization/examples/digital-brain-skill/agents/scripts/stale_contacts.py +139 -0
  55. package/assets/skills/context-optimization/examples/digital-brain-skill/agents/scripts/weekly_review.py +121 -0
  56. package/assets/skills/context-optimization/examples/digital-brain-skill/content/CONTENT.md +88 -0
  57. package/assets/skills/context-optimization/examples/digital-brain-skill/content/calendar.md +108 -0
  58. package/assets/skills/context-optimization/examples/digital-brain-skill/content/engagement.jsonl +2 -0
  59. package/assets/skills/context-optimization/examples/digital-brain-skill/content/ideas.jsonl +2 -0
  60. package/assets/skills/context-optimization/examples/digital-brain-skill/content/posts.jsonl +2 -0
  61. package/assets/skills/context-optimization/examples/digital-brain-skill/content/templates/linkedin-post.md +102 -0
  62. package/assets/skills/context-optimization/examples/digital-brain-skill/content/templates/newsletter.md +92 -0
  63. package/assets/skills/context-optimization/examples/digital-brain-skill/content/templates/thread.md +73 -0
  64. package/assets/skills/context-optimization/examples/digital-brain-skill/examples/content-workflow.md +204 -0
  65. package/assets/skills/context-optimization/examples/digital-brain-skill/examples/meeting-prep.md +243 -0
  66. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/IDENTITY.md +46 -0
  67. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/bio-variants.md +101 -0
  68. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/brand.md +165 -0
  69. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/prompts/content-generation.xml +46 -0
  70. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/prompts/reply-generator.xml +40 -0
  71. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/values.yaml +60 -0
  72. package/assets/skills/context-optimization/examples/digital-brain-skill/identity/voice.md +165 -0
  73. package/assets/skills/context-optimization/examples/digital-brain-skill/knowledge/KNOWLEDGE.md +85 -0
  74. package/assets/skills/context-optimization/examples/digital-brain-skill/knowledge/bookmarks.jsonl +2 -0
  75. package/assets/skills/context-optimization/examples/digital-brain-skill/knowledge/competitors.md +117 -0
  76. package/assets/skills/context-optimization/examples/digital-brain-skill/knowledge/learning.yaml +74 -0
  77. package/assets/skills/context-optimization/examples/digital-brain-skill/knowledge/research/_template.md +79 -0
  78. package/assets/skills/context-optimization/examples/digital-brain-skill/network/NETWORK.md +110 -0
  79. package/assets/skills/context-optimization/examples/digital-brain-skill/network/circles.yaml +80 -0
  80. package/assets/skills/context-optimization/examples/digital-brain-skill/network/contacts.jsonl +2 -0
  81. package/assets/skills/context-optimization/examples/digital-brain-skill/network/interactions.jsonl +2 -0
  82. package/assets/skills/context-optimization/examples/digital-brain-skill/network/intros.md +92 -0
  83. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/OPERATIONS.md +75 -0
  84. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/goals.yaml +83 -0
  85. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/meetings.jsonl +2 -0
  86. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/metrics.jsonl +2 -0
  87. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/reviews/_weekly_template.md +114 -0
  88. package/assets/skills/context-optimization/examples/digital-brain-skill/operations/todos.md +76 -0
  89. package/assets/skills/context-optimization/examples/digital-brain-skill/package.json +41 -0
  90. package/assets/skills/context-optimization/examples/digital-brain-skill/references/file-formats.md +386 -0
  91. package/assets/skills/context-optimization/examples/digital-brain-skill/scripts/install.sh +79 -0
  92. package/assets/skills/context-optimization/examples/interleaved_thinking/README.md +620 -0
  93. package/assets/skills/context-optimization/examples/interleaved_thinking/SKILL.md +221 -0
  94. package/assets/skills/context-optimization/examples/interleaved_thinking/docs/agentthinking.md +63 -0
  95. package/assets/skills/context-optimization/examples/interleaved_thinking/docs/interleavedthinking.md +610 -0
  96. package/assets/skills/context-optimization/examples/interleaved_thinking/docs/m2-1.md +224 -0
  97. package/assets/skills/context-optimization/examples/interleaved_thinking/examples/01_basic_capture.py +76 -0
  98. package/assets/skills/context-optimization/examples/interleaved_thinking/examples/02_tool_usage.py +187 -0
  99. package/assets/skills/context-optimization/examples/interleaved_thinking/examples/03_full_optimization.py +1222 -0
  100. package/assets/skills/context-optimization/examples/interleaved_thinking/generated_skills/comprehensive-research-agent/SKILL.md +90 -0
  101. package/assets/skills/context-optimization/examples/interleaved_thinking/generated_skills/comprehensive-research-agent/references/optimization_summary.json +9 -0
  102. package/assets/skills/context-optimization/examples/interleaved_thinking/generated_skills/comprehensive-research-agent/references/optimized_prompt.txt +1 -0
  103. package/assets/skills/context-optimization/examples/interleaved_thinking/generated_skills/comprehensive-research-agent/references/patterns_found.json +205 -0
  104. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/final_prompt.txt +67 -0
  105. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_1/analysis.txt +48 -0
  106. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_1/optimization.txt +15 -0
  107. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_1/optimized_prompt.txt +1 -0
  108. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_1/trace.txt +178 -0
  109. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_10/analysis.txt +47 -0
  110. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_10/trace.txt +162 -0
  111. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_2/analysis.txt +48 -0
  112. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_2/optimization.txt +130 -0
  113. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_2/optimized_prompt.txt +72 -0
  114. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_2/trace.txt +156 -0
  115. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_3/analysis.txt +46 -0
  116. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_3/optimization.txt +147 -0
  117. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_3/optimized_prompt.txt +84 -0
  118. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_3/trace.txt +159 -0
  119. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_4/analysis.txt +46 -0
  120. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_4/optimization.txt +134 -0
  121. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_4/optimized_prompt.txt +67 -0
  122. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_4/trace.txt +165 -0
  123. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_5/analysis.txt +50 -0
  124. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_5/optimization.txt +135 -0
  125. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_5/optimized_prompt.txt +71 -0
  126. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_5/trace.txt +146 -0
  127. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_6/analysis.txt +15 -0
  128. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_6/optimization.txt +15 -0
  129. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_6/optimized_prompt.txt +1 -0
  130. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_6/trace.txt +147 -0
  131. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_7/analysis.txt +46 -0
  132. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_7/optimization.txt +103 -0
  133. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_7/optimized_prompt.txt +45 -0
  134. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_7/trace.txt +134 -0
  135. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_8/analysis.txt +47 -0
  136. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_8/optimization.txt +114 -0
  137. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_8/optimized_prompt.txt +60 -0
  138. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_8/trace.txt +135 -0
  139. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_9/analysis.txt +44 -0
  140. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_9/optimization.txt +106 -0
  141. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_9/optimized_prompt.txt +51 -0
  142. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/iteration_9/trace.txt +170 -0
  143. package/assets/skills/context-optimization/examples/interleaved_thinking/optimization_artifacts/summary.json +11 -0
  144. package/assets/skills/context-optimization/examples/interleaved_thinking/pyproject.toml +70 -0
  145. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/__init__.py +53 -0
  146. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/analyzer.py +465 -0
  147. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/capture.py +417 -0
  148. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/cli.py +271 -0
  149. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/loop.py +468 -0
  150. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/models.py +193 -0
  151. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/optimizer.py +449 -0
  152. package/assets/skills/context-optimization/examples/interleaved_thinking/reasoning_trace_optimizer/skill_generator.py +502 -0
  153. package/assets/skills/context-optimization/examples/interleaved_thinking/tests/__init__.py +1 -0
  154. package/assets/skills/context-optimization/examples/interleaved_thinking/tests/test_models.py +144 -0
  155. package/assets/skills/context-optimization/examples/llm-as-judge-skills/.prettierrc +8 -0
  156. package/assets/skills/context-optimization/examples/llm-as-judge-skills/CONTRIBUTING.md +78 -0
  157. package/assets/skills/context-optimization/examples/llm-as-judge-skills/LICENSE +21 -0
  158. package/assets/skills/context-optimization/examples/llm-as-judge-skills/README.md +659 -0
  159. package/assets/skills/context-optimization/examples/llm-as-judge-skills/agents/evaluator-agent/evaluator-agent.md +177 -0
  160. package/assets/skills/context-optimization/examples/llm-as-judge-skills/agents/index.md +114 -0
  161. package/assets/skills/context-optimization/examples/llm-as-judge-skills/agents/orchestrator-agent/orchestrator-agent.md +205 -0
  162. package/assets/skills/context-optimization/examples/llm-as-judge-skills/agents/research-agent/research-agent.md +183 -0
  163. package/assets/skills/context-optimization/examples/llm-as-judge-skills/env.example +6 -0
  164. package/assets/skills/context-optimization/examples/llm-as-judge-skills/eslint.config.js +18 -0
  165. package/assets/skills/context-optimization/examples/llm-as-judge-skills/examples/basic-evaluation.ts +89 -0
  166. package/assets/skills/context-optimization/examples/llm-as-judge-skills/examples/full-evaluation-workflow.ts +136 -0
  167. package/assets/skills/context-optimization/examples/llm-as-judge-skills/examples/generate-rubric.ts +67 -0
  168. package/assets/skills/context-optimization/examples/llm-as-judge-skills/examples/pairwise-comparison.ts +97 -0
  169. package/assets/skills/context-optimization/examples/llm-as-judge-skills/package.json +79 -0
  170. package/assets/skills/context-optimization/examples/llm-as-judge-skills/prompts/agent-system/orchestrator-prompt.md +197 -0
  171. package/assets/skills/context-optimization/examples/llm-as-judge-skills/prompts/evaluation/direct-scoring-prompt.md +153 -0
  172. package/assets/skills/context-optimization/examples/llm-as-judge-skills/prompts/evaluation/pairwise-comparison-prompt.md +200 -0
  173. package/assets/skills/context-optimization/examples/llm-as-judge-skills/prompts/index.md +138 -0
  174. package/assets/skills/context-optimization/examples/llm-as-judge-skills/prompts/research/research-synthesis-prompt.md +171 -0
  175. package/assets/skills/context-optimization/examples/llm-as-judge-skills/skills/context-fundamentals/context-fundamentals.md +114 -0
  176. package/assets/skills/context-optimization/examples/llm-as-judge-skills/skills/index.md +79 -0
  177. package/assets/skills/context-optimization/examples/llm-as-judge-skills/skills/llm-evaluator/llm-evaluator.md +77 -0
  178. package/assets/skills/context-optimization/examples/llm-as-judge-skills/skills/tool-design/tool-design.md +198 -0
  179. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/agents/evaluator.ts +112 -0
  180. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/agents/index.ts +3 -0
  181. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/config/index.ts +18 -0
  182. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/index.ts +19 -0
  183. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/tools/evaluation/direct-score.ts +164 -0
  184. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/tools/evaluation/generate-rubric.ts +161 -0
  185. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/tools/evaluation/index.ts +9 -0
  186. package/assets/skills/context-optimization/examples/llm-as-judge-skills/src/tools/evaluation/pairwise-compare.ts +255 -0
  187. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tests/evaluation.test.ts +233 -0
  188. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tests/setup.ts +27 -0
  189. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tests/skills.test.ts +213 -0
  190. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/evaluation/direct-score.md +159 -0
  191. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/evaluation/generate-rubric.md +189 -0
  192. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/evaluation/pairwise-compare.md +182 -0
  193. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/index.md +141 -0
  194. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/orchestration/delegate-to-agent.md +171 -0
  195. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/research/read-url.md +162 -0
  196. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tools/research/web-search.md +128 -0
  197. package/assets/skills/context-optimization/examples/llm-as-judge-skills/tsconfig.json +26 -0
  198. package/assets/skills/context-optimization/examples/llm-as-judge-skills/vitest.config.ts +20 -0
  199. package/assets/skills/context-optimization/examples/x-to-book-system/PRD.md +644 -0
  200. package/assets/skills/context-optimization/examples/x-to-book-system/README.md +181 -0
  201. package/assets/skills/context-optimization/examples/x-to-book-system/SKILLS-MAPPING.md +187 -0
  202. package/assets/skills/context-optimization/researcher/example_output.md +75 -0
  203. package/assets/skills/context-optimization/researcher/llm-as-a-judge.md +362 -0
  204. package/assets/skills/context-optimization/skills/advanced-evaluation/SKILL.md +454 -0
  205. package/assets/skills/context-optimization/skills/advanced-evaluation/references/bias-mitigation.md +288 -0
  206. package/assets/skills/context-optimization/skills/advanced-evaluation/references/implementation-patterns.md +315 -0
  207. package/assets/skills/context-optimization/skills/advanced-evaluation/references/metrics-guide.md +331 -0
  208. package/assets/skills/context-optimization/skills/advanced-evaluation/scripts/evaluation_example.py +337 -0
  209. package/assets/skills/context-optimization/skills/bdi-mental-states/SKILL.md +295 -0
  210. package/assets/skills/context-optimization/skills/bdi-mental-states/references/bdi-ontology-core.md +207 -0
  211. package/assets/skills/context-optimization/skills/bdi-mental-states/references/framework-integration.md +582 -0
  212. package/assets/skills/context-optimization/skills/bdi-mental-states/references/rdf-examples.md +315 -0
  213. package/assets/skills/context-optimization/skills/bdi-mental-states/references/sparql-competency.md +420 -0
  214. package/assets/skills/context-optimization/skills/context-compression/SKILL.md +265 -0
  215. package/assets/skills/context-optimization/skills/context-compression/references/evaluation-framework.md +213 -0
  216. package/assets/skills/context-optimization/skills/context-compression/scripts/compression_evaluator.py +658 -0
  217. package/assets/skills/context-optimization/skills/context-degradation/SKILL.md +231 -0
  218. package/assets/skills/context-optimization/skills/context-degradation/references/patterns.md +314 -0
  219. package/assets/skills/context-optimization/skills/context-degradation/scripts/degradation_detector.py +419 -0
  220. package/assets/skills/context-optimization/skills/context-fundamentals/SKILL.md +185 -0
  221. package/assets/skills/context-optimization/skills/context-fundamentals/references/context-components.md +283 -0
  222. package/assets/skills/context-optimization/skills/context-fundamentals/scripts/context_manager.py +370 -0
  223. package/assets/skills/context-optimization/skills/context-optimization/SKILL.md +179 -0
  224. package/assets/skills/context-optimization/skills/context-optimization/references/optimization_techniques.md +272 -0
  225. package/assets/skills/context-optimization/skills/context-optimization/scripts/compaction.py +379 -0
  226. package/assets/skills/context-optimization/skills/evaluation/SKILL.md +231 -0
  227. package/assets/skills/context-optimization/skills/evaluation/references/metrics.md +339 -0
  228. package/assets/skills/context-optimization/skills/evaluation/scripts/evaluator.py +474 -0
  229. package/assets/skills/context-optimization/skills/filesystem-context/SKILL.md +321 -0
  230. package/assets/skills/context-optimization/skills/filesystem-context/references/implementation-patterns.md +549 -0
  231. package/assets/skills/context-optimization/skills/filesystem-context/scripts/filesystem_context.py +353 -0
  232. package/assets/skills/context-optimization/skills/hosted-agents/SKILL.md +279 -0
  233. package/assets/skills/context-optimization/skills/hosted-agents/references/infrastructure-patterns.md +700 -0
  234. package/assets/skills/context-optimization/skills/hosted-agents/scripts/sandbox_manager.py +495 -0
  235. package/assets/skills/context-optimization/skills/memory-systems/SKILL.md +221 -0
  236. package/assets/skills/context-optimization/skills/memory-systems/references/implementation.md +458 -0
  237. package/assets/skills/context-optimization/skills/memory-systems/scripts/memory_store.py +396 -0
  238. package/assets/skills/context-optimization/skills/multi-agent-patterns/SKILL.md +255 -0
  239. package/assets/skills/context-optimization/skills/multi-agent-patterns/references/frameworks.md +433 -0
  240. package/assets/skills/context-optimization/skills/multi-agent-patterns/scripts/coordination.py +439 -0
  241. package/assets/skills/context-optimization/skills/project-development/SKILL.md +342 -0
  242. package/assets/skills/context-optimization/skills/project-development/references/case-studies.md +388 -0
  243. package/assets/skills/context-optimization/skills/project-development/references/pipeline-patterns.md +610 -0
  244. package/assets/skills/context-optimization/skills/project-development/scripts/pipeline_template.py +677 -0
  245. package/assets/skills/context-optimization/skills/tool-design/SKILL.md +311 -0
  246. package/assets/skills/context-optimization/skills/tool-design/references/architectural_reduction.md +210 -0
  247. package/assets/skills/context-optimization/skills/tool-design/references/best_practices.md +176 -0
  248. package/assets/skills/context-optimization/skills/tool-design/scripts/description_generator.py +237 -0
  249. package/assets/skills/context-optimization/template/SKILL.md +98 -0
  250. package/assets/skills/dremio-analytics/SKILL.md +287 -0
  251. package/assets/skills/elevenlabs-voice/SKILL.md +269 -0
  252. package/assets/skills/git-workflow/SKILL.md +266 -0
  253. package/assets/skills/gitops-workflows/.claude-plugin/plugin.json +8 -0
  254. package/assets/skills/gitops-workflows/SKILL.md +568 -0
  255. package/assets/skills/gitops-workflows/assets/applicationsets/cluster-generator.yaml +32 -0
  256. package/assets/skills/gitops-workflows/assets/argocd/install-argocd-3.x.yaml +92 -0
  257. package/assets/skills/gitops-workflows/assets/flux/flux-bootstrap-github.sh +49 -0
  258. package/assets/skills/gitops-workflows/assets/flux/oci-helmrelease.yaml +38 -0
  259. package/assets/skills/gitops-workflows/assets/progressive-delivery/argo-rollouts-canary.yaml +62 -0
  260. package/assets/skills/gitops-workflows/assets/secrets/sops-age-config.yaml +33 -0
  261. package/assets/skills/gitops-workflows/references/argocd_vs_flux.md +243 -0
  262. package/assets/skills/gitops-workflows/references/best_practices.md +160 -0
  263. package/assets/skills/gitops-workflows/references/multi_cluster.md +80 -0
  264. package/assets/skills/gitops-workflows/references/oci_artifacts.md +290 -0
  265. package/assets/skills/gitops-workflows/references/progressive_delivery.md +94 -0
  266. package/assets/skills/gitops-workflows/references/repo_patterns.md +184 -0
  267. package/assets/skills/gitops-workflows/references/secret_management.md +213 -0
  268. package/assets/skills/gitops-workflows/references/troubleshooting.md +134 -0
  269. package/assets/skills/gitops-workflows/scripts/applicationset_generator.py +156 -0
  270. package/assets/skills/gitops-workflows/scripts/check_argocd_health.py +275 -0
  271. package/assets/skills/gitops-workflows/scripts/check_flux_health.py +418 -0
  272. package/assets/skills/gitops-workflows/scripts/oci_artifact_checker.py +150 -0
  273. package/assets/skills/gitops-workflows/scripts/promotion_validator.py +88 -0
  274. package/assets/skills/gitops-workflows/scripts/secret_audit.py +178 -0
  275. package/assets/skills/gitops-workflows/scripts/sync_drift_detector.py +144 -0
  276. package/assets/skills/gitops-workflows/scripts/validate_gitops_repo.py +299 -0
  277. package/assets/skills/iac-terraform/.claude-plugin/plugin.json +8 -0
  278. package/assets/skills/iac-terraform/SKILL.md +653 -0
  279. package/assets/skills/iac-terraform/assets/templates/MODULE_TEMPLATE.md +386 -0
  280. package/assets/skills/iac-terraform/assets/workflows/github-actions-terraform.yml +224 -0
  281. package/assets/skills/iac-terraform/assets/workflows/github-actions-terragrunt.yml +236 -0
  282. package/assets/skills/iac-terraform/assets/workflows/gitlab-ci-terraform.yml +184 -0
  283. package/assets/skills/iac-terraform/references/best_practices.md +709 -0
  284. package/assets/skills/iac-terraform/references/cost_optimization.md +665 -0
  285. package/assets/skills/iac-terraform/references/troubleshooting.md +635 -0
  286. package/assets/skills/iac-terraform/scripts/init_module.py +319 -0
  287. package/assets/skills/iac-terraform/scripts/inspect_state.py +232 -0
  288. package/assets/skills/iac-terraform/scripts/validate_module.py +227 -0
  289. package/assets/skills/k8s-troubleshooter/.claude-plugin/plugin.json +8 -0
  290. package/assets/skills/k8s-troubleshooter/SKILL.md +336 -0
  291. package/assets/skills/k8s-troubleshooter/references/common_issues.md +582 -0
  292. package/assets/skills/k8s-troubleshooter/references/helm_troubleshooting.md +708 -0
  293. package/assets/skills/k8s-troubleshooter/references/incident_response.md +466 -0
  294. package/assets/skills/k8s-troubleshooter/references/performance_troubleshooting.md +687 -0
  295. package/assets/skills/k8s-troubleshooter/scripts/check_namespace.py +500 -0
  296. package/assets/skills/k8s-troubleshooter/scripts/cluster_health.py +223 -0
  297. package/assets/skills/k8s-troubleshooter/scripts/diagnose_pod.py +157 -0
  298. package/assets/skills/mattermost-notify/SKILL.md +248 -0
  299. package/assets/skills/monitoring-observability/SKILL.md +869 -0
  300. package/assets/skills/monitoring-observability/assets/templates/otel-config/collector-config.yaml +227 -0
  301. package/assets/skills/monitoring-observability/assets/templates/prometheus-alerts/kubernetes-alerts.yml +293 -0
  302. package/assets/skills/monitoring-observability/assets/templates/prometheus-alerts/webapp-alerts.yml +243 -0
  303. package/assets/skills/monitoring-observability/assets/templates/runbooks/incident-runbook-template.md +409 -0
  304. package/assets/skills/monitoring-observability/monitoring-observability.skill +0 -0
  305. package/assets/skills/monitoring-observability/references/alerting_best_practices.md +609 -0
  306. package/assets/skills/monitoring-observability/references/datadog_migration.md +649 -0
  307. package/assets/skills/monitoring-observability/references/dql_promql_translation.md +756 -0
  308. package/assets/skills/monitoring-observability/references/logging_guide.md +775 -0
  309. package/assets/skills/monitoring-observability/references/metrics_design.md +406 -0
  310. package/assets/skills/monitoring-observability/references/slo_sla_guide.md +652 -0
  311. package/assets/skills/monitoring-observability/references/tool_comparison.md +697 -0
  312. package/assets/skills/monitoring-observability/references/tracing_guide.md +663 -0
  313. package/assets/skills/monitoring-observability/scripts/alert_quality_checker.py +315 -0
  314. package/assets/skills/monitoring-observability/scripts/analyze_metrics.py +279 -0
  315. package/assets/skills/monitoring-observability/scripts/dashboard_generator.py +395 -0
  316. package/assets/skills/monitoring-observability/scripts/datadog_cost_analyzer.py +477 -0
  317. package/assets/skills/monitoring-observability/scripts/health_check_validator.py +297 -0
  318. package/assets/skills/monitoring-observability/scripts/log_analyzer.py +321 -0
  319. package/assets/skills/monitoring-observability/scripts/slo_calculator.py +365 -0
  320. package/assets/skills/neo4j-graph-rag/SKILL.md +258 -0
  321. package/assets/skills/pagerduty-ops/SKILL.md +380 -0
  322. package/assets/skills/playwright/API_REFERENCE.md +653 -0
  323. package/assets/skills/playwright/SKILL.md +453 -0
  324. package/assets/skills/playwright/lib/helpers.js +441 -0
  325. package/assets/skills/playwright/package.json +26 -0
  326. package/assets/skills/playwright/run.js +228 -0
  327. package/assets/skills/project-memory/README.md +687 -0
  328. package/assets/skills/project-memory/SKILL.md +298 -0
  329. package/assets/skills/project-memory/references/bugs_template.md +41 -0
  330. package/assets/skills/project-memory/references/decisions_template.md +92 -0
  331. package/assets/skills/project-memory/references/issues_template.md +76 -0
  332. package/assets/skills/project-memory/references/key_facts_template.md +158 -0
  333. package/assets/skills/recruit-workflow/SKILL.md +276 -0
  334. package/assets/skills/recruit-workflow/references/email-templates.md +347 -0
  335. package/assets/skills/recruit-workflow/references/workflow-stages.md +395 -0
  336. package/assets/skills/recruit-workflow/scripts/clay_client.py +188 -0
  337. package/assets/skills/recruit-workflow/scripts/lever_client.py +197 -0
  338. package/assets/skills/recruit-workflow/scripts/mailgun_client.py +245 -0
  339. package/assets/skills/recruit-workflow/scripts/minio_client.py +426 -0
  340. package/assets/skills/shakudo-microservice/SKILL.md +215 -0
  341. package/assets/skills/tmux/SKILL.md +631 -0
  342. package/assets/skills/tmux/references/direct-socket-control.md +108 -0
  343. package/assets/skills/tmux/references/session-lifecycle.md +503 -0
  344. package/assets/skills/tmux/references/session-registry.md +1484 -0
  345. package/assets/skills/tmux/tools/cleanup-sessions.sh +263 -0
  346. package/assets/skills/tmux/tools/create-session.sh +224 -0
  347. package/assets/skills/tmux/tools/find-sessions.sh +262 -0
  348. package/assets/skills/tmux/tools/kill-session.sh +308 -0
  349. package/assets/skills/tmux/tools/lib/registry.sh +437 -0
  350. package/assets/skills/tmux/tools/lib/time_utils.sh +54 -0
  351. package/assets/skills/tmux/tools/list-sessions.sh +255 -0
  352. package/assets/skills/tmux/tools/pane-health.sh +424 -0
  353. package/assets/skills/tmux/tools/safe-send.sh +503 -0
  354. package/assets/skills/tmux/tools/wait-for-text.sh +260 -0
  355. package/assets/skills/twilio-sms/SKILL.md +508 -0
  356. package/assets/skills/zellij/SKILL.md +274 -0
  357. package/assets/skills/zellij/references/actions.md +558 -0
  358. package/assets/skills/zellij/references/layouts.md +424 -0
  359. package/bin/cli.ts +46 -0
  360. package/package.json +43 -0
  361. package/src/alias.ts +108 -0
  362. package/src/backup.ts +51 -0
  363. package/src/config.ts +115 -0
  364. package/src/dependencies.ts +163 -0
  365. package/src/errors.ts +77 -0
  366. package/src/index.ts +207 -0
  367. package/src/prompts.ts +142 -0
  368. package/src/schemas.ts +21 -0
  369. package/src/skills.ts +45 -0
  370. package/src/speckit.ts +116 -0
  371. package/src/types.ts +106 -0
  372. package/src/utils.ts +110 -0
  373. package/src/vibe-git.ts +50 -0
  374. package/templates/.specify/memory/constitution.md +109 -0
  375. package/templates/.specify/scripts/bash/check-prerequisites.sh +262 -0
  376. package/templates/.specify/scripts/bash/common.sh +670 -0
  377. package/templates/.specify/scripts/bash/create-new-feature.sh +594 -0
  378. package/templates/.specify/scripts/bash/create-worktree-feature.sh +401 -0
  379. package/templates/.specify/scripts/bash/init-workspace.sh +433 -0
  380. package/templates/.specify/scripts/bash/list-spec-worktrees.sh +198 -0
  381. package/templates/.specify/scripts/bash/setup-plan.sh +105 -0
  382. package/templates/.specify/scripts/bash/test-workspace-rollup.sh +175 -0
  383. package/templates/.specify/scripts/bash/update-agent-context.sh +799 -0
  384. package/templates/.specify/templates/agent-file-template.md +28 -0
  385. package/templates/.specify/templates/checklist-template.md +40 -0
  386. package/templates/.specify/templates/commands/analyze.md +197 -0
  387. package/templates/.specify/templates/commands/checklist.md +306 -0
  388. package/templates/.specify/templates/commands/clarify.md +194 -0
  389. package/templates/.specify/templates/commands/constitution.md +97 -0
  390. package/templates/.specify/templates/commands/implement.md +149 -0
  391. package/templates/.specify/templates/commands/plan.md +123 -0
  392. package/templates/.specify/templates/commands/projects.md +48 -0
  393. package/templates/.specify/templates/commands/rollup.md +66 -0
  394. package/templates/.specify/templates/commands/specify.md +275 -0
  395. package/templates/.specify/templates/commands/specs.md +71 -0
  396. package/templates/.specify/templates/commands/tasks.md +151 -0
  397. package/templates/.specify/templates/commands/taskstoissues.md +35 -0
  398. package/templates/.specify/templates/commands/workspace.md +128 -0
  399. package/templates/.specify/templates/plan-template.md +104 -0
  400. package/templates/.specify/templates/spec-template.md +115 -0
  401. package/templates/.specify/templates/tasks-template.md +251 -0
  402. package/templates/.specify/templates/workspace.yaml +110 -0
  403. package/templates/.specify/workspace.yaml +95 -0
  404. package/templates/AGENTS.md +460 -0
  405. package/templates/oh-my-opencode.json +27 -0
  406. package/templates/opencode.json +383 -0
  407. package/templates/package.json +10 -0
  408. package/templates/project-memory/bugs.md +16 -0
  409. package/templates/project-memory/decisions.md +22 -0
  410. package/templates/project-memory/issues.md +15 -0
  411. package/templates/project-memory/key_facts.md +26 -0
@@ -0,0 +1,339 @@
1
+ # Evaluation Reference: Metrics and Implementation
2
+
3
+ This document provides implementation details for evaluation metrics and evaluation systems.
4
+
5
+ ## Core Metric Definitions
6
+
7
+ ### Factual Accuracy
8
+
9
+ Factual accuracy measures whether claims in agent output match ground truth.
10
+
11
+ ```
12
+ Excellent (1.0): All claims verified against ground truth, no errors
13
+ Good (0.8): Minor errors that do not affect main conclusions
14
+ Acceptable (0.6): Major claims correct, minor inaccuracies present
15
+ Poor (0.3): Significant factual errors in key claims
16
+ Failed (0.0): Fundamental factual errors that invalidate output
17
+ ```
18
+
19
+ Calculation approach:
20
+ - Extract claims from output
21
+ - Verify each claim against ground truth
22
+ - Weight claims by importance (major claims more weight)
23
+ - Calculate weighted average of claim accuracy
24
+
25
+ ### Completeness
26
+
27
+ Completeness measures whether output covers all requested aspects.
28
+
29
+ ```
30
+ Excellent (1.0): All requested aspects thoroughly covered
31
+ Good (0.8): Most aspects covered with minor gaps
32
+ Acceptable (0.6): Key aspects covered, some gaps
33
+ Poor (0.3): Major aspects missing from output
34
+ Failed (0.0): Fundamental aspects not addressed
35
+ ```
36
+
37
+ ### Citation Accuracy
38
+
39
+ Citation accuracy measures whether cited sources match claimed sources.
40
+
41
+ ```
42
+ Excellent (1.0): All citations accurate and complete
43
+ Good (0.8): Minor citation formatting issues
44
+ Acceptable (0.6): Major citations accurate
45
+ Poor (0.3): Significant citation problems
46
+ Failed (0.0): Citations missing or completely incorrect
47
+ ```
48
+
49
+ ### Source Quality
50
+
51
+ Source quality measures whether appropriate primary sources were used.
52
+
53
+ ```
54
+ Excellent (1.0): Primary authoritative sources
55
+ Good (0.8): Mostly primary sources with some secondary
56
+ Acceptable (0.6): Mix of primary and secondary sources
57
+ Poor (0.3): Mostly secondary or unreliable sources
58
+ Failed (0.0): No credible sources cited
59
+ ```
60
+
61
+ ### Tool Efficiency
62
+
63
+ Tool efficiency measures whether the agent used appropriate tools a reasonable number of times.
64
+
65
+ ```
66
+ Excellent (1.0): Optimal tool selection and call count
67
+ Good (0.8): Good tool selection with minor inefficiencies
68
+ Acceptable (0.6): Appropriate tools with some redundancy
69
+ Poor (0.3): Wrong tools or excessive call counts
70
+ Failed (0.0): Severe tool misuse or extremely excessive calls
71
+ ```
72
+
73
+ ## Rubric Implementation
74
+
75
+ ```python
76
+ EVALUATION_DIMENSIONS = {
77
+ "factual_accuracy": {
78
+ "weight": 0.30,
79
+ "description": "Claims match ground truth",
80
+ "levels": {
81
+ "excellent": 1.0,
82
+ "good": 0.8,
83
+ "acceptable": 0.6,
84
+ "poor": 0.3,
85
+ "failed": 0.0
86
+ }
87
+ },
88
+ "completeness": {
89
+ "weight": 0.25,
90
+ "description": "All requested aspects covered",
91
+ "levels": {
92
+ "excellent": 1.0,
93
+ "good": 0.8,
94
+ "acceptable": 0.6,
95
+ "poor": 0.3,
96
+ "failed": 0.0
97
+ }
98
+ },
99
+ "citation_accuracy": {
100
+ "weight": 0.15,
101
+ "description": "Citations match sources",
102
+ "levels": {
103
+ "excellent": 1.0,
104
+ "good": 0.8,
105
+ "acceptable": 0.6,
106
+ "poor": 0.3,
107
+ "failed": 0.0
108
+ }
109
+ },
110
+ "source_quality": {
111
+ "weight": 0.10,
112
+ "description": "Appropriate primary sources used",
113
+ "levels": {
114
+ "excellent": 1.0,
115
+ "good": 0.8,
116
+ "acceptable": 0.6,
117
+ "poor": 0.3,
118
+ "failed": 0.0
119
+ }
120
+ },
121
+ "tool_efficiency": {
122
+ "weight": 0.20,
123
+ "description": "Right tools used reasonably",
124
+ "levels": {
125
+ "excellent": 1.0,
126
+ "good": 0.8,
127
+ "acceptable": 0.6,
128
+ "poor": 0.3,
129
+ "failed": 0.0
130
+ }
131
+ }
132
+ }
133
+
134
+ def calculate_overall_score(dimension_scores, rubric):
135
+ """Calculate weighted overall score from dimension scores."""
136
+ total_weight = 0
137
+ weighted_sum = 0
138
+
139
+ for dimension, score in dimension_scores.items():
140
+ if dimension in rubric:
141
+ weight = rubric[dimension]["weight"]
142
+ weighted_sum += score * weight
143
+ total_weight += weight
144
+
145
+ return weighted_sum / total_weight if total_weight > 0 else 0
146
+ ```
147
+
148
+ ## Test Set Management
149
+
150
+ ```python
151
+ class TestSet:
152
+ def __init__(self, name):
153
+ self.name = name
154
+ self.tests = []
155
+ self.tags = {}
156
+
157
+ def add_test(self, test_case):
158
+ """Add test case to test set."""
159
+ self.tests.append(test_case)
160
+
161
+ # Index by tags
162
+ for tag in test_case.get("tags", []):
163
+ if tag not in self.tags:
164
+ self.tags[tag] = []
165
+ self.tags[tag].append(len(self.tests) - 1)
166
+
167
+ def filter(self, **criteria):
168
+ """Filter tests by criteria."""
169
+ filtered = []
170
+ for test in self.tests:
171
+ match = True
172
+ for key, value in criteria.items():
173
+ if test.get(key) != value:
174
+ match = False
175
+ break
176
+ if match:
177
+ filtered.append(test)
178
+ return filtered
179
+
180
+ def get_complexity_distribution(self):
181
+ """Get distribution of tests by complexity."""
182
+ distribution = {}
183
+ for test in self.tests:
184
+ complexity = test.get("complexity", "medium")
185
+ distribution[complexity] = distribution.get(complexity, 0) + 1
186
+ return distribution
187
+ ```
188
+
189
+ ## Evaluation Runner
190
+
191
+ ```python
192
+ class EvaluationRunner:
193
+ def __init__(self, test_set, rubric, agent):
194
+ self.test_set = test_set
195
+ self.rubric = rubric
196
+ self.agent = agent
197
+ self.results = []
198
+
199
+ def run_all(self, verbose=False):
200
+ """Run evaluation on all tests."""
201
+ self.results = []
202
+
203
+ for i, test in enumerate(self.test_set.tests):
204
+ if verbose:
205
+ print(f"Running test {i+1}/{len(self.test_set.tests)}")
206
+
207
+ result = self.run_test(test)
208
+ self.results.append(result)
209
+
210
+ return self.summarize()
211
+
212
+ def run_test(self, test):
213
+ """Run single evaluation test."""
214
+ # Get agent output
215
+ output = self.agent.run(test["input"])
216
+
217
+ # Evaluate
218
+ evaluation = self.evaluate_output(output, test)
219
+
220
+ return {
221
+ "test": test,
222
+ "output": output,
223
+ "evaluation": evaluation
224
+ }
225
+
226
+ def evaluate_output(self, output, test):
227
+ """Evaluate agent output against test."""
228
+ ground_truth = test.get("expected", {})
229
+
230
+ dimension_scores = {}
231
+ for dimension, config in self.rubric.items():
232
+ score = self.evaluate_dimension(
233
+ output, ground_truth, dimension, config
234
+ )
235
+ dimension_scores[dimension] = score
236
+
237
+ overall = calculate_overall_score(dimension_scores, self.rubric)
238
+
239
+ return {
240
+ "overall_score": overall,
241
+ "dimension_scores": dimension_scores,
242
+ "passed": overall >= 0.7
243
+ }
244
+
245
+ def summarize(self):
246
+ """Summarize evaluation results."""
247
+ if not self.results:
248
+ return {"error": "No results"}
249
+
250
+ passed = sum(1 for r in self.results if r["evaluation"]["passed"])
251
+
252
+ dimension_totals = {}
253
+ for dimension in self.rubric.keys():
254
+ dimension_totals[dimension] = {
255
+ "total": 0,
256
+ "count": 0
257
+ }
258
+
259
+ for result in self.results:
260
+ for dimension, score in result["evaluation"]["dimension_scores"].items():
261
+ if dimension in dimension_totals:
262
+ dimension_totals[dimension]["total"] += score
263
+ dimension_totals[dimension]["count"] += 1
264
+
265
+ dimension_averages = {}
266
+ for dimension, data in dimension_totals.items():
267
+ if data["count"] > 0:
268
+ dimension_averages[dimension] = data["total"] / data["count"]
269
+
270
+ return {
271
+ "total_tests": len(self.results),
272
+ "passed": passed,
273
+ "failed": len(self.results) - passed,
274
+ "pass_rate": passed / len(self.results) if self.results else 0,
275
+ "dimension_averages": dimension_averages,
276
+ "failures": [
277
+ r for r in self.results
278
+ if not r["evaluation"]["passed"]
279
+ ]
280
+ }
281
+ ```
282
+
283
+ ## Production Monitoring
284
+
285
+ ```python
286
+ class ProductionMonitor:
287
+ def __init__(self, sample_rate=0.01):
288
+ self.sample_rate = sample_rate
289
+ self.samples = []
290
+ self.alert_thresholds = {
291
+ "pass_rate_warning": 0.85,
292
+ "pass_rate_critical": 0.70
293
+ }
294
+
295
+ def sample_and_evaluate(self, query, output):
296
+ """Sample production interaction for evaluation."""
297
+ if random.random() > self.sample_rate:
298
+ return None
299
+
300
+ evaluation = evaluate_output(output, {}, EVALUATION_RUBRIC)
301
+
302
+ sample = {
303
+ "query": query[:200],
304
+ "output_preview": output[:200],
305
+ "score": evaluation["overall_score"],
306
+ "passed": evaluation["passed"],
307
+ "timestamp": current_timestamp()
308
+ }
309
+
310
+ self.samples.append(sample)
311
+ return sample
312
+
313
+ def get_metrics(self):
314
+ """Calculate current metrics from samples."""
315
+ if not self.samples:
316
+ return {"status": "insufficient_data"}
317
+
318
+ passed = sum(1 for s in self.samples if s["passed"])
319
+ pass_rate = passed / len(self.samples)
320
+
321
+ avg_score = sum(s["score"] for s in self.samples) / len(self.samples)
322
+
323
+ return {
324
+ "sample_count": len(self.samples),
325
+ "pass_rate": pass_rate,
326
+ "average_score": avg_score,
327
+ "status": self._get_status(pass_rate)
328
+ }
329
+
330
+ def _get_status(self, pass_rate):
331
+ """Get status based on pass rate."""
332
+ if pass_rate < self.alert_thresholds["pass_rate_critical"]:
333
+ return "critical"
334
+ elif pass_rate < self.alert_thresholds["pass_rate_warning"]:
335
+ return "warning"
336
+ else:
337
+ return "healthy"
338
+ ```
339
+