gkt-node 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (758) hide show
  1. package/README.md +89 -0
  2. package/assets/.agent/brain/agent_index.json +208 -0
  3. package/assets/.agent/brain/project_context.json +31 -0
  4. package/assets/.agent/skills/ab-test-setup/SKILL.md +232 -0
  5. package/assets/.agent/skills/agent-evaluation/SKILL.md +64 -0
  6. package/assets/.agent/skills/agent-memory-mcp/SKILL.md +82 -0
  7. package/assets/.agent/skills/agent-memory-systems/SKILL.md +67 -0
  8. package/assets/.agent/skills/agent-orchestration-multi-agent-optimize/SKILL.md +239 -0
  9. package/assets/.agent/skills/agent-tool-builder/SKILL.md +53 -0
  10. package/assets/.agent/skills/ai-agents-architect/SKILL.md +90 -0
  11. package/assets/.agent/skills/ai-engineer/SKILL.md +171 -0
  12. package/assets/.agent/skills/ai-product/SKILL.md +54 -0
  13. package/assets/.agent/skills/ai-wrapper-product/SKILL.md +273 -0
  14. package/assets/.agent/skills/airflow-dag-patterns/SKILL.md +41 -0
  15. package/assets/.agent/skills/airflow-dag-patterns/resources/implementation-playbook.md +509 -0
  16. package/assets/.agent/skills/airtable-automation/SKILL.md +170 -0
  17. package/assets/.agent/skills/analytics-tracking/SKILL.md +404 -0
  18. package/assets/.agent/skills/api-design-principles/SKILL.md +37 -0
  19. package/assets/.agent/skills/api-design-principles/assets/__pycache__/rest-api-template.cpython-314.pyc +0 -0
  20. package/assets/.agent/skills/api-design-principles/assets/api-design-checklist.md +155 -0
  21. package/assets/.agent/skills/api-design-principles/assets/rest-api-template.py +182 -0
  22. package/assets/.agent/skills/api-design-principles/references/graphql-schema-design.md +583 -0
  23. package/assets/.agent/skills/api-design-principles/references/rest-best-practices.md +408 -0
  24. package/assets/.agent/skills/api-design-principles/resources/implementation-playbook.md +513 -0
  25. package/assets/.agent/skills/api-designer/SKILL.md +22 -0
  26. package/assets/.agent/skills/api-designer/data/api_patterns.json +107 -0
  27. package/assets/.agent/skills/api-designer/scripts/__pycache__/api_gen.cpython-314.pyc +0 -0
  28. package/assets/.agent/skills/api-designer/scripts/api_gen.py +80 -0
  29. package/assets/.agent/skills/api-documentation-generator/SKILL.md +484 -0
  30. package/assets/.agent/skills/api-documenter/SKILL.md +184 -0
  31. package/assets/.agent/skills/api-fuzzing-bug-bounty/SKILL.md +433 -0
  32. package/assets/.agent/skills/api-patterns/SKILL.md +81 -0
  33. package/assets/.agent/skills/api-patterns/api-style.md +42 -0
  34. package/assets/.agent/skills/api-patterns/auth.md +24 -0
  35. package/assets/.agent/skills/api-patterns/documentation.md +26 -0
  36. package/assets/.agent/skills/api-patterns/graphql.md +41 -0
  37. package/assets/.agent/skills/api-patterns/rate-limiting.md +31 -0
  38. package/assets/.agent/skills/api-patterns/response.md +37 -0
  39. package/assets/.agent/skills/api-patterns/rest.md +40 -0
  40. package/assets/.agent/skills/api-patterns/scripts/__pycache__/api_validator.cpython-314.pyc +0 -0
  41. package/assets/.agent/skills/api-patterns/scripts/api_validator.py +211 -0
  42. package/assets/.agent/skills/api-patterns/security-testing.md +122 -0
  43. package/assets/.agent/skills/api-patterns/trpc.md +41 -0
  44. package/assets/.agent/skills/api-patterns/versioning.md +22 -0
  45. package/assets/.agent/skills/api-security-best-practices/SKILL.md +907 -0
  46. package/assets/.agent/skills/api-testing-observability-api-mock/SKILL.md +46 -0
  47. package/assets/.agent/skills/api-testing-observability-api-mock/resources/implementation-playbook.md +1327 -0
  48. package/assets/.agent/skills/app-store-optimization/HOW_TO_USE.md +281 -0
  49. package/assets/.agent/skills/app-store-optimization/README.md +430 -0
  50. package/assets/.agent/skills/app-store-optimization/SKILL.md +403 -0
  51. package/assets/.agent/skills/app-store-optimization/__pycache__/ab_test_planner.cpython-314.pyc +0 -0
  52. package/assets/.agent/skills/app-store-optimization/__pycache__/aso_scorer.cpython-314.pyc +0 -0
  53. package/assets/.agent/skills/app-store-optimization/__pycache__/competitor_analyzer.cpython-314.pyc +0 -0
  54. package/assets/.agent/skills/app-store-optimization/__pycache__/keyword_analyzer.cpython-314.pyc +0 -0
  55. package/assets/.agent/skills/app-store-optimization/__pycache__/launch_checklist.cpython-314.pyc +0 -0
  56. package/assets/.agent/skills/app-store-optimization/__pycache__/localization_helper.cpython-314.pyc +0 -0
  57. package/assets/.agent/skills/app-store-optimization/__pycache__/metadata_optimizer.cpython-314.pyc +0 -0
  58. package/assets/.agent/skills/app-store-optimization/__pycache__/review_analyzer.cpython-314.pyc +0 -0
  59. package/assets/.agent/skills/app-store-optimization/ab_test_planner.py +662 -0
  60. package/assets/.agent/skills/app-store-optimization/aso_scorer.py +482 -0
  61. package/assets/.agent/skills/app-store-optimization/competitor_analyzer.py +577 -0
  62. package/assets/.agent/skills/app-store-optimization/expected_output.json +170 -0
  63. package/assets/.agent/skills/app-store-optimization/keyword_analyzer.py +406 -0
  64. package/assets/.agent/skills/app-store-optimization/launch_checklist.py +739 -0
  65. package/assets/.agent/skills/app-store-optimization/localization_helper.py +588 -0
  66. package/assets/.agent/skills/app-store-optimization/metadata_optimizer.py +581 -0
  67. package/assets/.agent/skills/app-store-optimization/review_analyzer.py +714 -0
  68. package/assets/.agent/skills/app-store-optimization/sample_input.json +30 -0
  69. package/assets/.agent/skills/architecture-auditor/SKILL.md +35 -0
  70. package/assets/.agent/skills/architecture-auditor/data/standards.json +45 -0
  71. package/assets/.agent/skills/architecture-auditor/scripts/__pycache__/auditor.cpython-314.pyc +0 -0
  72. package/assets/.agent/skills/architecture-auditor/scripts/auditor.py +69 -0
  73. package/assets/.agent/skills/architecture-decision-records/SKILL.md +441 -0
  74. package/assets/.agent/skills/architecture-patterns/SKILL.md +37 -0
  75. package/assets/.agent/skills/architecture-patterns/resources/implementation-playbook.md +479 -0
  76. package/assets/.agent/skills/asana-automation/SKILL.md +171 -0
  77. package/assets/.agent/skills/attack-tree-construction/SKILL.md +38 -0
  78. package/assets/.agent/skills/attack-tree-construction/resources/implementation-playbook.md +671 -0
  79. package/assets/.agent/skills/auth-implementation-patterns/SKILL.md +39 -0
  80. package/assets/.agent/skills/auth-implementation-patterns/resources/implementation-playbook.md +618 -0
  81. package/assets/.agent/skills/autonomous-agent-patterns/SKILL.md +761 -0
  82. package/assets/.agent/skills/aws-penetration-testing/SKILL.md +405 -0
  83. package/assets/.agent/skills/aws-penetration-testing/references/advanced-aws-pentesting.md +469 -0
  84. package/assets/.agent/skills/aws-serverless/SKILL.md +323 -0
  85. package/assets/.agent/skills/backend-architect/SKILL.md +333 -0
  86. package/assets/.agent/skills/billing-automation/SKILL.md +42 -0
  87. package/assets/.agent/skills/billing-automation/resources/implementation-playbook.md +544 -0
  88. package/assets/.agent/skills/broken-authentication/SKILL.md +476 -0
  89. package/assets/.agent/skills/bullmq-specialist/SKILL.md +57 -0
  90. package/assets/.agent/skills/business-analyst/SKILL.md +182 -0
  91. package/assets/.agent/skills/cc-skill-backend-patterns/SKILL.md +584 -0
  92. package/assets/.agent/skills/cc-skill-clickhouse-io/SKILL.md +431 -0
  93. package/assets/.agent/skills/cc-skill-coding-standards/SKILL.md +522 -0
  94. package/assets/.agent/skills/cc-skill-continuous-learning/SKILL.md +10 -0
  95. package/assets/.agent/skills/cc-skill-continuous-learning/config.json +18 -0
  96. package/assets/.agent/skills/cc-skill-continuous-learning/evaluate-session.sh +60 -0
  97. package/assets/.agent/skills/cc-skill-frontend-patterns/SKILL.md +633 -0
  98. package/assets/.agent/skills/cc-skill-project-guidelines-example/SKILL.md +352 -0
  99. package/assets/.agent/skills/cc-skill-security-review/SKILL.md +496 -0
  100. package/assets/.agent/skills/cc-skill-strategic-compact/SKILL.md +10 -0
  101. package/assets/.agent/skills/cc-skill-strategic-compact/suggest-compact.sh +52 -0
  102. package/assets/.agent/skills/ci-cd-setup/SKILL.md +14 -0
  103. package/assets/.agent/skills/ci-cd-setup/data/pipeline_templates.json +42 -0
  104. package/assets/.agent/skills/ci-cd-setup/scripts/__pycache__/ci_gen.cpython-314.pyc +0 -0
  105. package/assets/.agent/skills/ci-cd-setup/scripts/ci_gen.py +75 -0
  106. package/assets/.agent/skills/claude-code-guide/SKILL.md +68 -0
  107. package/assets/.agent/skills/clean-code/SKILL.md +94 -0
  108. package/assets/.agent/skills/clickup-automation/SKILL.md +234 -0
  109. package/assets/.agent/skills/cloud-architect/SKILL.md +135 -0
  110. package/assets/.agent/skills/cloud-penetration-testing/SKILL.md +501 -0
  111. package/assets/.agent/skills/cloud-penetration-testing/references/advanced-cloud-scripts.md +318 -0
  112. package/assets/.agent/skills/code-reviewer/SKILL.md +31 -0
  113. package/assets/.agent/skills/code-reviewer/data/review_rules.json +172 -0
  114. package/assets/.agent/skills/code-reviewer/scripts/__pycache__/reviewer.cpython-314.pyc +0 -0
  115. package/assets/.agent/skills/code-reviewer/scripts/reviewer.py +192 -0
  116. package/assets/.agent/skills/codebase-navigator/SKILL.md +37 -0
  117. package/assets/.agent/skills/codebase-navigator/data/codebase_index.json +577 -0
  118. package/assets/.agent/skills/codebase-navigator/scripts/__pycache__/navigator.cpython-314.pyc +0 -0
  119. package/assets/.agent/skills/codebase-navigator/scripts/navigator.py +240 -0
  120. package/assets/.agent/skills/color-palette-generator/SKILL.md +26 -0
  121. package/assets/.agent/skills/color-palette-generator/data/palettes.json +37 -0
  122. package/assets/.agent/skills/color-palette-generator/scripts/__pycache__/palette.cpython-314.pyc +0 -0
  123. package/assets/.agent/skills/color-palette-generator/scripts/palette.py +57 -0
  124. package/assets/.agent/skills/competitive-landscape/SKILL.md +34 -0
  125. package/assets/.agent/skills/competitive-landscape/resources/implementation-playbook.md +494 -0
  126. package/assets/.agent/skills/competitor-alternatives/SKILL.md +750 -0
  127. package/assets/.agent/skills/computer-use-agents/SKILL.md +315 -0
  128. package/assets/.agent/skills/content-creator/SKILL.md +248 -0
  129. package/assets/.agent/skills/content-creator/assets/content_calendar_template.md +99 -0
  130. package/assets/.agent/skills/content-creator/references/brand_guidelines.md +199 -0
  131. package/assets/.agent/skills/content-creator/references/content_frameworks.md +534 -0
  132. package/assets/.agent/skills/content-creator/references/social_media_optimization.md +317 -0
  133. package/assets/.agent/skills/content-creator/scripts/__pycache__/brand_voice_analyzer.cpython-314.pyc +0 -0
  134. package/assets/.agent/skills/content-creator/scripts/__pycache__/seo_optimizer.cpython-314.pyc +0 -0
  135. package/assets/.agent/skills/content-creator/scripts/brand_voice_analyzer.py +185 -0
  136. package/assets/.agent/skills/content-creator/scripts/seo_optimizer.py +419 -0
  137. package/assets/.agent/skills/content-marketer/SKILL.md +170 -0
  138. package/assets/.agent/skills/context-compression/SKILL.md +266 -0
  139. package/assets/.agent/skills/context-degradation/SKILL.md +238 -0
  140. package/assets/.agent/skills/context-driven-development/SKILL.md +400 -0
  141. package/assets/.agent/skills/context-fundamentals/SKILL.md +192 -0
  142. package/assets/.agent/skills/context-manager/SKILL.md +26 -0
  143. package/assets/.agent/skills/context-manager/scripts/__pycache__/minify.cpython-314.pyc +0 -0
  144. package/assets/.agent/skills/context-manager/scripts/minify.py +82 -0
  145. package/assets/.agent/skills/context-optimization/SKILL.md +186 -0
  146. package/assets/.agent/skills/context-router/SKILL.md +42 -0
  147. package/assets/.agent/skills/context-router/scripts/__pycache__/context_router.cpython-314.pyc +0 -0
  148. package/assets/.agent/skills/context-router/scripts/context_router.py +185 -0
  149. package/assets/.agent/skills/context-window-management/SKILL.md +53 -0
  150. package/assets/.agent/skills/conversation-memory/SKILL.md +61 -0
  151. package/assets/.agent/skills/copywriting/SKILL.md +225 -0
  152. package/assets/.agent/skills/cost-optimization/SKILL.md +286 -0
  153. package/assets/.agent/skills/cqrs-implementation/SKILL.md +35 -0
  154. package/assets/.agent/skills/cqrs-implementation/resources/implementation-playbook.md +540 -0
  155. package/assets/.agent/skills/crewai/SKILL.md +243 -0
  156. package/assets/.agent/skills/daily-news-report/SKILL.md +356 -0
  157. package/assets/.agent/skills/daily-news-report/cache.json +41 -0
  158. package/assets/.agent/skills/daily-news-report/sources.json +183 -0
  159. package/assets/.agent/skills/data-engineer/SKILL.md +224 -0
  160. package/assets/.agent/skills/data-engineering-data-driven-feature/SKILL.md +182 -0
  161. package/assets/.agent/skills/data-engineering-data-pipeline/SKILL.md +201 -0
  162. package/assets/.agent/skills/data-quality-frameworks/SKILL.md +40 -0
  163. package/assets/.agent/skills/data-quality-frameworks/resources/implementation-playbook.md +573 -0
  164. package/assets/.agent/skills/data-scientist/SKILL.md +199 -0
  165. package/assets/.agent/skills/data-storytelling/SKILL.md +465 -0
  166. package/assets/.agent/skills/database-admin/SKILL.md +165 -0
  167. package/assets/.agent/skills/database-architect/SKILL.md +268 -0
  168. package/assets/.agent/skills/database-cloud-optimization-cost-optimize/SKILL.md +44 -0
  169. package/assets/.agent/skills/database-cloud-optimization-cost-optimize/resources/implementation-playbook.md +1441 -0
  170. package/assets/.agent/skills/database-design/SKILL.md +52 -0
  171. package/assets/.agent/skills/database-design/database-selection.md +43 -0
  172. package/assets/.agent/skills/database-design/indexing.md +39 -0
  173. package/assets/.agent/skills/database-design/migrations.md +48 -0
  174. package/assets/.agent/skills/database-design/optimization.md +36 -0
  175. package/assets/.agent/skills/database-design/orm-selection.md +30 -0
  176. package/assets/.agent/skills/database-design/schema-design.md +56 -0
  177. package/assets/.agent/skills/database-design/scripts/__pycache__/schema_validator.cpython-314.pyc +0 -0
  178. package/assets/.agent/skills/database-design/scripts/schema_validator.py +172 -0
  179. package/assets/.agent/skills/database-migration/SKILL.md +436 -0
  180. package/assets/.agent/skills/database-migrations-migration-observability/SKILL.md +420 -0
  181. package/assets/.agent/skills/database-migrations-sql-migrations/SKILL.md +53 -0
  182. package/assets/.agent/skills/database-migrations-sql-migrations/resources/implementation-playbook.md +499 -0
  183. package/assets/.agent/skills/database-optimizer/SKILL.md +167 -0
  184. package/assets/.agent/skills/datadog-automation/SKILL.md +235 -0
  185. package/assets/.agent/skills/db-designer/SKILL.md +22 -0
  186. package/assets/.agent/skills/db-designer/data/schema_patterns.json +156 -0
  187. package/assets/.agent/skills/db-designer/scripts/__pycache__/sql_gen.cpython-314.pyc +0 -0
  188. package/assets/.agent/skills/db-designer/scripts/sql_gen.py +90 -0
  189. package/assets/.agent/skills/dbt-transformation-patterns/SKILL.md +34 -0
  190. package/assets/.agent/skills/dbt-transformation-patterns/resources/implementation-playbook.md +547 -0
  191. package/assets/.agent/skills/debugger/SKILL.md +49 -0
  192. package/assets/.agent/skills/debugging-strategies/SKILL.md +34 -0
  193. package/assets/.agent/skills/debugging-strategies/resources/implementation-playbook.md +511 -0
  194. package/assets/.agent/skills/deep-research/SKILL.md +114 -0
  195. package/assets/.agent/skills/deployment-engineer/SKILL.md +170 -0
  196. package/assets/.agent/skills/deployment-pipeline-design/SKILL.md +371 -0
  197. package/assets/.agent/skills/deployment-procedures/SKILL.md +241 -0
  198. package/assets/.agent/skills/diff-applier/SKILL.md +34 -0
  199. package/assets/.agent/skills/diff-applier/scripts/__pycache__/apply_patch.cpython-314.pyc +0 -0
  200. package/assets/.agent/skills/diff-applier/scripts/apply_patch.py +137 -0
  201. package/assets/.agent/skills/discord-automation/SKILL.md +187 -0
  202. package/assets/.agent/skills/distributed-debugging-debug-trace/SKILL.md +44 -0
  203. package/assets/.agent/skills/distributed-debugging-debug-trace/resources/implementation-playbook.md +1307 -0
  204. package/assets/.agent/skills/distributed-tracing/SKILL.md +450 -0
  205. package/assets/.agent/skills/doc-generator/SKILL.md +14 -0
  206. package/assets/.agent/skills/doc-generator/data/doc_templates.json +40 -0
  207. package/assets/.agent/skills/doc-generator/scripts/__pycache__/doc_gen.cpython-314.pyc +0 -0
  208. package/assets/.agent/skills/doc-generator/scripts/doc_gen.py +48 -0
  209. package/assets/.agent/skills/docker-expert/SKILL.md +409 -0
  210. package/assets/.agent/skills/docker-wizard/SKILL.md +14 -0
  211. package/assets/.agent/skills/docker-wizard/data/docker_templates.json +93 -0
  212. package/assets/.agent/skills/docker-wizard/scripts/__pycache__/docker_gen.cpython-314.pyc +0 -0
  213. package/assets/.agent/skills/docker-wizard/scripts/docker_gen.py +86 -0
  214. package/assets/.agent/skills/docs-architect/SKILL.md +98 -0
  215. package/assets/.agent/skills/e2e-testing-patterns/SKILL.md +41 -0
  216. package/assets/.agent/skills/e2e-testing-patterns/resources/implementation-playbook.md +531 -0
  217. package/assets/.agent/skills/email-sequence/SKILL.md +925 -0
  218. package/assets/.agent/skills/embedding-strategies/SKILL.md +491 -0
  219. package/assets/.agent/skills/employment-contract-templates/SKILL.md +39 -0
  220. package/assets/.agent/skills/employment-contract-templates/resources/implementation-playbook.md +493 -0
  221. package/assets/.agent/skills/env-manager/SKILL.md +33 -0
  222. package/assets/.agent/skills/env-manager/scripts/__pycache__/env_scanner.cpython-314.pyc +0 -0
  223. package/assets/.agent/skills/env-manager/scripts/env_scanner.py +181 -0
  224. package/assets/.agent/skills/error-detective/SKILL.md +53 -0
  225. package/assets/.agent/skills/error-diagnostics-error-analysis/SKILL.md +47 -0
  226. package/assets/.agent/skills/error-diagnostics-error-analysis/resources/implementation-playbook.md +1143 -0
  227. package/assets/.agent/skills/error-diagnostics-error-trace/SKILL.md +48 -0
  228. package/assets/.agent/skills/error-diagnostics-error-trace/resources/implementation-playbook.md +1371 -0
  229. package/assets/.agent/skills/error-handling-patterns/SKILL.md +35 -0
  230. package/assets/.agent/skills/error-handling-patterns/resources/implementation-playbook.md +635 -0
  231. package/assets/.agent/skills/ethical-hacking-methodology/SKILL.md +466 -0
  232. package/assets/.agent/skills/event-sourcing-architect/SKILL.md +58 -0
  233. package/assets/.agent/skills/event-store-design/SKILL.md +449 -0
  234. package/assets/.agent/skills/exa-search/SKILL.md +36 -0
  235. package/assets/.agent/skills/fastapi-pro/SKILL.md +192 -0
  236. package/assets/.agent/skills/fastapi-router-py/SKILL.md +52 -0
  237. package/assets/.agent/skills/fastapi-templates/SKILL.md +32 -0
  238. package/assets/.agent/skills/fastapi-templates/resources/implementation-playbook.md +566 -0
  239. package/assets/.agent/skills/free-tool-strategy/SKILL.md +576 -0
  240. package/assets/.agent/skills/freshdesk-automation/SKILL.md +219 -0
  241. package/assets/.agent/skills/frontend-developer/SKILL.md +171 -0
  242. package/assets/.agent/skills/gcp-cloud-run/SKILL.md +288 -0
  243. package/assets/.agent/skills/gemini-api-dev/SKILL.md +127 -0
  244. package/assets/.agent/skills/geo-fundamentals/SKILL.md +156 -0
  245. package/assets/.agent/skills/geo-fundamentals/scripts/__pycache__/geo_checker.cpython-314.pyc +0 -0
  246. package/assets/.agent/skills/geo-fundamentals/scripts/geo_checker.py +289 -0
  247. package/assets/.agent/skills/git-advanced-workflows/SKILL.md +412 -0
  248. package/assets/.agent/skills/git-manager/SKILL.md +22 -0
  249. package/assets/.agent/skills/git-manager/data/git_conventions.json +84 -0
  250. package/assets/.agent/skills/git-manager/scripts/__pycache__/commit.cpython-314.pyc +0 -0
  251. package/assets/.agent/skills/git-manager/scripts/__pycache__/log.cpython-314.pyc +0 -0
  252. package/assets/.agent/skills/git-manager/scripts/commit.py +70 -0
  253. package/assets/.agent/skills/git-manager/scripts/log.py +29 -0
  254. package/assets/.agent/skills/github-actions-templates/SKILL.md +345 -0
  255. package/assets/.agent/skills/github-automation/SKILL.md +227 -0
  256. package/assets/.agent/skills/gitlab-ci-patterns/SKILL.md +283 -0
  257. package/assets/.agent/skills/gitops-workflow/SKILL.md +303 -0
  258. package/assets/.agent/skills/gitops-workflow/references/argocd-setup.md +134 -0
  259. package/assets/.agent/skills/gitops-workflow/references/sync-policies.md +131 -0
  260. package/assets/.agent/skills/gmail-automation/SKILL.md +270 -0
  261. package/assets/.agent/skills/google-calendar-automation/SKILL.md +176 -0
  262. package/assets/.agent/skills/google-drive-automation/SKILL.md +193 -0
  263. package/assets/.agent/skills/googlesheets-automation/SKILL.md +197 -0
  264. package/assets/.agent/skills/grafana-dashboards/SKILL.md +381 -0
  265. package/assets/.agent/skills/graphql/SKILL.md +68 -0
  266. package/assets/.agent/skills/graphql-architect/SKILL.md +182 -0
  267. package/assets/.agent/skills/helm-chart-scaffolding/SKILL.md +34 -0
  268. package/assets/.agent/skills/helm-chart-scaffolding/assets/Chart.yaml.template +42 -0
  269. package/assets/.agent/skills/helm-chart-scaffolding/assets/values.yaml.template +185 -0
  270. package/assets/.agent/skills/helm-chart-scaffolding/references/chart-structure.md +500 -0
  271. package/assets/.agent/skills/helm-chart-scaffolding/resources/implementation-playbook.md +543 -0
  272. package/assets/.agent/skills/helm-chart-scaffolding/scripts/validate-chart.sh +244 -0
  273. package/assets/.agent/skills/hr-pro/SKILL.md +126 -0
  274. package/assets/.agent/skills/hubspot-automation/SKILL.md +178 -0
  275. package/assets/.agent/skills/hubspot-integration/SKILL.md +42 -0
  276. package/assets/.agent/skills/hybrid-search-implementation/SKILL.md +32 -0
  277. package/assets/.agent/skills/hybrid-search-implementation/resources/implementation-playbook.md +567 -0
  278. package/assets/.agent/skills/incident-responder/SKILL.md +213 -0
  279. package/assets/.agent/skills/incident-response-incident-response/SKILL.md +168 -0
  280. package/assets/.agent/skills/incident-response-smart-fix/SKILL.md +29 -0
  281. package/assets/.agent/skills/incident-response-smart-fix/resources/implementation-playbook.md +838 -0
  282. package/assets/.agent/skills/incident-runbook-templates/SKILL.md +395 -0
  283. package/assets/.agent/skills/infinite-gratitude/SKILL.md +26 -0
  284. package/assets/.agent/skills/inngest/SKILL.md +55 -0
  285. package/assets/.agent/skills/instagram-automation/SKILL.md +192 -0
  286. package/assets/.agent/skills/intercom-automation/SKILL.md +248 -0
  287. package/assets/.agent/skills/javascript-mastery/SKILL.md +645 -0
  288. package/assets/.agent/skills/javascript-testing-patterns/SKILL.md +35 -0
  289. package/assets/.agent/skills/javascript-testing-patterns/resources/implementation-playbook.md +1024 -0
  290. package/assets/.agent/skills/jira-automation/SKILL.md +185 -0
  291. package/assets/.agent/skills/k8s-manifest-generator/SKILL.md +35 -0
  292. package/assets/.agent/skills/k8s-manifest-generator/assets/configmap-template.yaml +296 -0
  293. package/assets/.agent/skills/k8s-manifest-generator/assets/deployment-template.yaml +203 -0
  294. package/assets/.agent/skills/k8s-manifest-generator/assets/service-template.yaml +171 -0
  295. package/assets/.agent/skills/k8s-manifest-generator/references/deployment-spec.md +753 -0
  296. package/assets/.agent/skills/k8s-manifest-generator/references/service-spec.md +724 -0
  297. package/assets/.agent/skills/k8s-manifest-generator/resources/implementation-playbook.md +510 -0
  298. package/assets/.agent/skills/k8s-security-policies/SKILL.md +346 -0
  299. package/assets/.agent/skills/k8s-security-policies/assets/network-policy-template.yaml +177 -0
  300. package/assets/.agent/skills/k8s-security-policies/references/rbac-patterns.md +187 -0
  301. package/assets/.agent/skills/knowledge-guide/SKILL.md +34 -0
  302. package/assets/.agent/skills/knowledge-guide/scripts/__pycache__/note_taker.cpython-314.pyc +0 -0
  303. package/assets/.agent/skills/knowledge-guide/scripts/note_taker.py +50 -0
  304. package/assets/.agent/skills/kpi-dashboard-design/SKILL.md +440 -0
  305. package/assets/.agent/skills/kubernetes-architect/SKILL.md +170 -0
  306. package/assets/.agent/skills/langchain-architecture/SKILL.md +350 -0
  307. package/assets/.agent/skills/langfuse/SKILL.md +238 -0
  308. package/assets/.agent/skills/langgraph/SKILL.md +287 -0
  309. package/assets/.agent/skills/launch-strategy/SKILL.md +344 -0
  310. package/assets/.agent/skills/legal-advisor/SKILL.md +70 -0
  311. package/assets/.agent/skills/linkedin-automation/SKILL.md +175 -0
  312. package/assets/.agent/skills/linux-privilege-escalation/SKILL.md +504 -0
  313. package/assets/.agent/skills/llm-app-patterns/SKILL.md +760 -0
  314. package/assets/.agent/skills/llm-evaluation/SKILL.md +483 -0
  315. package/assets/.agent/skills/mailchimp-automation/SKILL.md +231 -0
  316. package/assets/.agent/skills/market-sizing-analysis/SKILL.md +425 -0
  317. package/assets/.agent/skills/market-sizing-analysis/examples/saas-market-sizing.md +349 -0
  318. package/assets/.agent/skills/market-sizing-analysis/references/data-sources.md +360 -0
  319. package/assets/.agent/skills/marketing-ideas/SKILL.md +221 -0
  320. package/assets/.agent/skills/marketing-psychology/SKILL.md +255 -0
  321. package/assets/.agent/skills/mcp-builder/LICENSE.txt +202 -0
  322. package/assets/.agent/skills/mcp-builder/SKILL.md +236 -0
  323. package/assets/.agent/skills/mcp-builder/reference/evaluation.md +602 -0
  324. package/assets/.agent/skills/mcp-builder/reference/mcp_best_practices.md +249 -0
  325. package/assets/.agent/skills/mcp-builder/reference/node_mcp_server.md +970 -0
  326. package/assets/.agent/skills/mcp-builder/reference/python_mcp_server.md +719 -0
  327. package/assets/.agent/skills/mcp-builder/scripts/__pycache__/connections.cpython-314.pyc +0 -0
  328. package/assets/.agent/skills/mcp-builder/scripts/__pycache__/evaluation.cpython-314.pyc +0 -0
  329. package/assets/.agent/skills/mcp-builder/scripts/connections.py +151 -0
  330. package/assets/.agent/skills/mcp-builder/scripts/evaluation.py +373 -0
  331. package/assets/.agent/skills/mcp-builder/scripts/example_evaluation.xml +22 -0
  332. package/assets/.agent/skills/mcp-builder/scripts/requirements.txt +2 -0
  333. package/assets/.agent/skills/mermaid-expert/SKILL.md +59 -0
  334. package/assets/.agent/skills/meta-thinker/SKILL.md +41 -0
  335. package/assets/.agent/skills/meta-thinker/data/brainstorm_frameworks.json +614 -0
  336. package/assets/.agent/skills/meta-thinker/data/feature_ideas.json +352 -0
  337. package/assets/.agent/skills/meta-thinker/data/industry_database.json +1693 -0
  338. package/assets/.agent/skills/meta-thinker/data/monetization_models.json +570 -0
  339. package/assets/.agent/skills/meta-thinker/data/platform_guide.json +361 -0
  340. package/assets/.agent/skills/meta-thinker/data/product_archetypes.json +998 -0
  341. package/assets/.agent/skills/meta-thinker/scripts/__pycache__/idea_engine.cpython-314.pyc +0 -0
  342. package/assets/.agent/skills/meta-thinker/scripts/idea_engine.py +246 -0
  343. package/assets/.agent/skills/metasploit-framework/SKILL.md +478 -0
  344. package/assets/.agent/skills/micro-saas-launcher/SKILL.md +212 -0
  345. package/assets/.agent/skills/monday-automation/SKILL.md +233 -0
  346. package/assets/.agent/skills/multi-agent-patterns/SKILL.md +262 -0
  347. package/assets/.agent/skills/n8n-code-python/SKILL.md +750 -0
  348. package/assets/.agent/skills/n8n-mcp-tools-expert/SKILL.md +654 -0
  349. package/assets/.agent/skills/n8n-node-configuration/SKILL.md +796 -0
  350. package/assets/.agent/skills/neon-postgres/SKILL.md +56 -0
  351. package/assets/.agent/skills/nestjs-expert/SKILL.md +552 -0
  352. package/assets/.agent/skills/nextjs-best-practices/SKILL.md +203 -0
  353. package/assets/.agent/skills/nocobase-plugin-developer/SKILL.md +1289 -0
  354. package/assets/.agent/skills/nodejs-backend-patterns/SKILL.md +35 -0
  355. package/assets/.agent/skills/nodejs-backend-patterns/resources/implementation-playbook.md +1019 -0
  356. package/assets/.agent/skills/nodejs-best-practices/SKILL.md +333 -0
  357. package/assets/.agent/skills/nosql-expert/SKILL.md +111 -0
  358. package/assets/.agent/skills/notion-automation/SKILL.md +215 -0
  359. package/assets/.agent/skills/observability-engineer/SKILL.md +237 -0
  360. package/assets/.agent/skills/observability-monitoring-monitor-setup/SKILL.md +48 -0
  361. package/assets/.agent/skills/observability-monitoring-monitor-setup/resources/implementation-playbook.md +505 -0
  362. package/assets/.agent/skills/observability-monitoring-slo-implement/SKILL.md +43 -0
  363. package/assets/.agent/skills/observability-monitoring-slo-implement/resources/implementation-playbook.md +1077 -0
  364. package/assets/.agent/skills/on-call-handoff-patterns/SKILL.md +453 -0
  365. package/assets/.agent/skills/onboarding-cro/SKILL.md +433 -0
  366. package/assets/.agent/skills/openapi-spec-generation/SKILL.md +33 -0
  367. package/assets/.agent/skills/openapi-spec-generation/resources/implementation-playbook.md +1027 -0
  368. package/assets/.agent/skills/page-cro/SKILL.md +343 -0
  369. package/assets/.agent/skills/paid-ads/SKILL.md +551 -0
  370. package/assets/.agent/skills/payment-integration/SKILL.md +77 -0
  371. package/assets/.agent/skills/paypal-integration/SKILL.md +479 -0
  372. package/assets/.agent/skills/paywall-upgrade-cro/SKILL.md +570 -0
  373. package/assets/.agent/skills/pentest-checklist/SKILL.md +334 -0
  374. package/assets/.agent/skills/pentest-commands/SKILL.md +438 -0
  375. package/assets/.agent/skills/pipedrive-automation/SKILL.md +224 -0
  376. package/assets/.agent/skills/plaid-fintech/SKILL.md +50 -0
  377. package/assets/.agent/skills/popup-cro/SKILL.md +346 -0
  378. package/assets/.agent/skills/postgresql/SKILL.md +230 -0
  379. package/assets/.agent/skills/postmortem-writing/SKILL.md +386 -0
  380. package/assets/.agent/skills/pricing-strategy/SKILL.md +356 -0
  381. package/assets/.agent/skills/prisma-expert/SKILL.md +355 -0
  382. package/assets/.agent/skills/product-manager-toolkit/SKILL.md +351 -0
  383. package/assets/.agent/skills/product-manager-toolkit/references/prd_templates.md +317 -0
  384. package/assets/.agent/skills/product-manager-toolkit/scripts/__pycache__/customer_interview_analyzer.cpython-314.pyc +0 -0
  385. package/assets/.agent/skills/product-manager-toolkit/scripts/__pycache__/rice_prioritizer.cpython-314.pyc +0 -0
  386. package/assets/.agent/skills/product-manager-toolkit/scripts/customer_interview_analyzer.py +441 -0
  387. package/assets/.agent/skills/product-manager-toolkit/scripts/rice_prioritizer.py +296 -0
  388. package/assets/.agent/skills/programmatic-seo/SKILL.md +351 -0
  389. package/assets/.agent/skills/project-scaffolder/SKILL.md +26 -0
  390. package/assets/.agent/skills/project-scaffolder/data/scaffold_templates.json +150 -0
  391. package/assets/.agent/skills/project-scaffolder/scripts/__pycache__/scaffold.cpython-314.pyc +0 -0
  392. package/assets/.agent/skills/project-scaffolder/scripts/scaffold.py +249 -0
  393. package/assets/.agent/skills/projection-patterns/SKILL.md +33 -0
  394. package/assets/.agent/skills/projection-patterns/resources/implementation-playbook.md +501 -0
  395. package/assets/.agent/skills/prometheus-configuration/SKILL.md +404 -0
  396. package/assets/.agent/skills/prompt-caching/SKILL.md +61 -0
  397. package/assets/.agent/skills/prompt-engineer/README.md +659 -0
  398. package/assets/.agent/skills/prompt-engineer/SKILL.md +252 -0
  399. package/assets/.agent/skills/prompt-engineering/SKILL.md +171 -0
  400. package/assets/.agent/skills/prompt-engineering-patterns/SKILL.md +213 -0
  401. package/assets/.agent/skills/prompt-engineering-patterns/assets/few-shot-examples.json +106 -0
  402. package/assets/.agent/skills/prompt-engineering-patterns/assets/prompt-template-library.md +246 -0
  403. package/assets/.agent/skills/prompt-engineering-patterns/references/chain-of-thought.md +399 -0
  404. package/assets/.agent/skills/prompt-engineering-patterns/references/few-shot-learning.md +369 -0
  405. package/assets/.agent/skills/prompt-engineering-patterns/references/prompt-optimization.md +414 -0
  406. package/assets/.agent/skills/prompt-engineering-patterns/references/prompt-templates.md +470 -0
  407. package/assets/.agent/skills/prompt-engineering-patterns/references/system-prompts.md +189 -0
  408. package/assets/.agent/skills/prompt-engineering-patterns/scripts/__pycache__/optimize-prompt.cpython-314.pyc +0 -0
  409. package/assets/.agent/skills/prompt-engineering-patterns/scripts/optimize-prompt.py +279 -0
  410. package/assets/.agent/skills/prompt-library/SKILL.md +322 -0
  411. package/assets/.agent/skills/rag-engineer/SKILL.md +90 -0
  412. package/assets/.agent/skills/rag-implementation/SKILL.md +421 -0
  413. package/assets/.agent/skills/react-patterns/SKILL.md +198 -0
  414. package/assets/.agent/skills/react-state-management/SKILL.md +441 -0
  415. package/assets/.agent/skills/react-ui-patterns/SKILL.md +289 -0
  416. package/assets/.agent/skills/readme-generator/SKILL.md +14 -0
  417. package/assets/.agent/skills/readme-generator/data/readme_templates.json +22 -0
  418. package/assets/.agent/skills/readme-generator/scripts/__pycache__/readme_gen.cpython-314.pyc +0 -0
  419. package/assets/.agent/skills/readme-generator/scripts/readme_gen.py +81 -0
  420. package/assets/.agent/skills/red-team-tactics/SKILL.md +199 -0
  421. package/assets/.agent/skills/red-team-tools/SKILL.md +310 -0
  422. package/assets/.agent/skills/reference-builder/SKILL.md +188 -0
  423. package/assets/.agent/skills/referral-program/SKILL.md +602 -0
  424. package/assets/.agent/skills/release-manager/SKILL.md +30 -0
  425. package/assets/.agent/skills/release-manager/scripts/__pycache__/release.cpython-314.pyc +0 -0
  426. package/assets/.agent/skills/release-manager/scripts/release.py +210 -0
  427. package/assets/.agent/skills/reliability-engineer/SKILL.md +30 -0
  428. package/assets/.agent/skills/reliability-engineer/data/reliability.json +57 -0
  429. package/assets/.agent/skills/reliability-engineer/scripts/__pycache__/sre.cpython-314.pyc +0 -0
  430. package/assets/.agent/skills/reliability-engineer/scripts/sre.py +94 -0
  431. package/assets/.agent/skills/saga-orchestration/SKILL.md +496 -0
  432. package/assets/.agent/skills/sales-automator/SKILL.md +55 -0
  433. package/assets/.agent/skills/salesforce-automation/SKILL.md +190 -0
  434. package/assets/.agent/skills/sast-configuration/SKILL.md +212 -0
  435. package/assets/.agent/skills/scanning-tools/SKILL.md +589 -0
  436. package/assets/.agent/skills/schema-markup/SKILL.md +360 -0
  437. package/assets/.agent/skills/search-specialist/SKILL.md +80 -0
  438. package/assets/.agent/skills/secrets-management/SKILL.md +364 -0
  439. package/assets/.agent/skills/security-auditor/SKILL.md +169 -0
  440. package/assets/.agent/skills/security-bluebook-builder/SKILL.md +22 -0
  441. package/assets/.agent/skills/security-requirement-extraction/SKILL.md +33 -0
  442. package/assets/.agent/skills/security-requirement-extraction/resources/implementation-playbook.md +676 -0
  443. package/assets/.agent/skills/security-scanner/SKILL.md +21 -0
  444. package/assets/.agent/skills/security-scanner/data/security_patterns.json +101 -0
  445. package/assets/.agent/skills/security-scanner/scripts/__pycache__/checklist_gen.cpython-314.pyc +0 -0
  446. package/assets/.agent/skills/security-scanner/scripts/__pycache__/vuln_scan.cpython-314.pyc +0 -0
  447. package/assets/.agent/skills/security-scanner/scripts/checklist_gen.py +49 -0
  448. package/assets/.agent/skills/security-scanner/scripts/vuln_scan.py +81 -0
  449. package/assets/.agent/skills/sendgrid-automation/SKILL.md +228 -0
  450. package/assets/.agent/skills/seo-audit/SKILL.md +487 -0
  451. package/assets/.agent/skills/seo-authority-builder/SKILL.md +136 -0
  452. package/assets/.agent/skills/seo-cannibalization-detector/SKILL.md +123 -0
  453. package/assets/.agent/skills/seo-content-auditor/SKILL.md +83 -0
  454. package/assets/.agent/skills/seo-content-planner/SKILL.md +108 -0
  455. package/assets/.agent/skills/seo-content-refresher/SKILL.md +118 -0
  456. package/assets/.agent/skills/seo-content-writer/SKILL.md +96 -0
  457. package/assets/.agent/skills/seo-fundamentals/SKILL.md +173 -0
  458. package/assets/.agent/skills/seo-fundamentals/scripts/__pycache__/seo_checker.cpython-314.pyc +0 -0
  459. package/assets/.agent/skills/seo-fundamentals/scripts/seo_checker.py +219 -0
  460. package/assets/.agent/skills/seo-keyword-strategist/SKILL.md +95 -0
  461. package/assets/.agent/skills/seo-meta-optimizer/SKILL.md +92 -0
  462. package/assets/.agent/skills/seo-snippet-hunter/SKILL.md +114 -0
  463. package/assets/.agent/skills/seo-structure-architect/SKILL.md +108 -0
  464. package/assets/.agent/skills/service-mesh-observability/SKILL.md +395 -0
  465. package/assets/.agent/skills/shodan-reconnaissance/SKILL.md +503 -0
  466. package/assets/.agent/skills/shopify-apps/SKILL.md +42 -0
  467. package/assets/.agent/skills/shopify-automation/SKILL.md +168 -0
  468. package/assets/.agent/skills/signup-flow-cro/SKILL.md +355 -0
  469. package/assets/.agent/skills/similarity-search-patterns/SKILL.md +33 -0
  470. package/assets/.agent/skills/similarity-search-patterns/resources/implementation-playbook.md +557 -0
  471. package/assets/.agent/skills/skill-creator/LICENSE.txt +202 -0
  472. package/assets/.agent/skills/skill-creator/README.md +270 -0
  473. package/assets/.agent/skills/skill-creator/SKILL.md +593 -0
  474. package/assets/.agent/skills/skill-creator/references/output-patterns.md +82 -0
  475. package/assets/.agent/skills/skill-creator/references/workflows.md +28 -0
  476. package/assets/.agent/skills/skill-creator/scripts/__pycache__/init_skill.cpython-314.pyc +0 -0
  477. package/assets/.agent/skills/skill-creator/scripts/__pycache__/package_skill.cpython-314.pyc +0 -0
  478. package/assets/.agent/skills/skill-creator/scripts/__pycache__/quick_validate.cpython-314.pyc +0 -0
  479. package/assets/.agent/skills/skill-creator/scripts/init_skill.py +303 -0
  480. package/assets/.agent/skills/skill-creator/scripts/package_skill.py +110 -0
  481. package/assets/.agent/skills/skill-creator/scripts/quick_validate.py +95 -0
  482. package/assets/.agent/skills/skill-developer/ADVANCED.md +197 -0
  483. package/assets/.agent/skills/skill-developer/HOOK_MECHANISMS.md +306 -0
  484. package/assets/.agent/skills/skill-developer/PATTERNS_LIBRARY.md +152 -0
  485. package/assets/.agent/skills/skill-developer/SKILL.md +426 -0
  486. package/assets/.agent/skills/skill-developer/SKILL_RULES_REFERENCE.md +315 -0
  487. package/assets/.agent/skills/skill-developer/TRIGGER_TYPES.md +305 -0
  488. package/assets/.agent/skills/skill-developer/TROUBLESHOOTING.md +514 -0
  489. package/assets/.agent/skills/slack-automation/SKILL.md +189 -0
  490. package/assets/.agent/skills/slo-implementation/SKILL.md +341 -0
  491. package/assets/.agent/skills/social-content/SKILL.md +807 -0
  492. package/assets/.agent/skills/spark-optimization/SKILL.md +427 -0
  493. package/assets/.agent/skills/sql-injection-testing/SKILL.md +448 -0
  494. package/assets/.agent/skills/sql-optimization-patterns/SKILL.md +35 -0
  495. package/assets/.agent/skills/sql-optimization-patterns/resources/implementation-playbook.md +504 -0
  496. package/assets/.agent/skills/sql-pro/SKILL.md +173 -0
  497. package/assets/.agent/skills/ssh-penetration-testing/SKILL.md +488 -0
  498. package/assets/.agent/skills/startup-analyst/SKILL.md +328 -0
  499. package/assets/.agent/skills/startup-business-analyst-business-case/SKILL.md +487 -0
  500. package/assets/.agent/skills/startup-business-analyst-financial-projections/SKILL.md +353 -0
  501. package/assets/.agent/skills/startup-business-analyst-market-opportunity/SKILL.md +240 -0
  502. package/assets/.agent/skills/startup-financial-modeling/SKILL.md +467 -0
  503. package/assets/.agent/skills/startup-metrics-framework/SKILL.md +34 -0
  504. package/assets/.agent/skills/startup-metrics-framework/resources/implementation-playbook.md +500 -0
  505. package/assets/.agent/skills/stride-analysis-patterns/SKILL.md +33 -0
  506. package/assets/.agent/skills/stride-analysis-patterns/resources/implementation-playbook.md +655 -0
  507. package/assets/.agent/skills/stripe-automation/SKILL.md +198 -0
  508. package/assets/.agent/skills/stripe-integration/SKILL.md +454 -0
  509. package/assets/.agent/skills/supabase-postgres-best-practices/AGENTS.md +1490 -0
  510. package/assets/.agent/skills/supabase-postgres-best-practices/README.md +119 -0
  511. package/assets/.agent/skills/supabase-postgres-best-practices/SKILL.md +57 -0
  512. package/assets/.agent/skills/supabase-postgres-best-practices/metadata.json +13 -0
  513. package/assets/.agent/skills/supabase-postgres-best-practices/rules/_contributing.md +171 -0
  514. package/assets/.agent/skills/supabase-postgres-best-practices/rules/_sections.md +39 -0
  515. package/assets/.agent/skills/supabase-postgres-best-practices/rules/_template.md +34 -0
  516. package/assets/.agent/skills/supabase-postgres-best-practices/rules/advanced-full-text-search.md +55 -0
  517. package/assets/.agent/skills/supabase-postgres-best-practices/rules/advanced-jsonb-indexing.md +49 -0
  518. package/assets/.agent/skills/supabase-postgres-best-practices/rules/conn-idle-timeout.md +46 -0
  519. package/assets/.agent/skills/supabase-postgres-best-practices/rules/conn-limits.md +44 -0
  520. package/assets/.agent/skills/supabase-postgres-best-practices/rules/conn-pooling.md +41 -0
  521. package/assets/.agent/skills/supabase-postgres-best-practices/rules/conn-prepared-statements.md +46 -0
  522. package/assets/.agent/skills/supabase-postgres-best-practices/rules/data-batch-inserts.md +54 -0
  523. package/assets/.agent/skills/supabase-postgres-best-practices/rules/data-n-plus-one.md +53 -0
  524. package/assets/.agent/skills/supabase-postgres-best-practices/rules/data-pagination.md +50 -0
  525. package/assets/.agent/skills/supabase-postgres-best-practices/rules/data-upsert.md +50 -0
  526. package/assets/.agent/skills/supabase-postgres-best-practices/rules/lock-advisory.md +56 -0
  527. package/assets/.agent/skills/supabase-postgres-best-practices/rules/lock-deadlock-prevention.md +68 -0
  528. package/assets/.agent/skills/supabase-postgres-best-practices/rules/lock-short-transactions.md +50 -0
  529. package/assets/.agent/skills/supabase-postgres-best-practices/rules/lock-skip-locked.md +54 -0
  530. package/assets/.agent/skills/supabase-postgres-best-practices/rules/monitor-explain-analyze.md +45 -0
  531. package/assets/.agent/skills/supabase-postgres-best-practices/rules/monitor-pg-stat-statements.md +55 -0
  532. package/assets/.agent/skills/supabase-postgres-best-practices/rules/monitor-vacuum-analyze.md +55 -0
  533. package/assets/.agent/skills/supabase-postgres-best-practices/rules/query-composite-indexes.md +44 -0
  534. package/assets/.agent/skills/supabase-postgres-best-practices/rules/query-covering-indexes.md +40 -0
  535. package/assets/.agent/skills/supabase-postgres-best-practices/rules/query-index-types.md +45 -0
  536. package/assets/.agent/skills/supabase-postgres-best-practices/rules/query-missing-indexes.md +43 -0
  537. package/assets/.agent/skills/supabase-postgres-best-practices/rules/query-partial-indexes.md +45 -0
  538. package/assets/.agent/skills/supabase-postgres-best-practices/rules/schema-data-types.md +46 -0
  539. package/assets/.agent/skills/supabase-postgres-best-practices/rules/schema-foreign-key-indexes.md +59 -0
  540. package/assets/.agent/skills/supabase-postgres-best-practices/rules/schema-lowercase-identifiers.md +55 -0
  541. package/assets/.agent/skills/supabase-postgres-best-practices/rules/schema-partitioning.md +55 -0
  542. package/assets/.agent/skills/supabase-postgres-best-practices/rules/schema-primary-keys.md +61 -0
  543. package/assets/.agent/skills/supabase-postgres-best-practices/rules/security-privileges.md +54 -0
  544. package/assets/.agent/skills/supabase-postgres-best-practices/rules/security-rls-basics.md +50 -0
  545. package/assets/.agent/skills/supabase-postgres-best-practices/rules/security-rls-performance.md +57 -0
  546. package/assets/.agent/skills/system-diagrammer/SKILL.md +25 -0
  547. package/assets/.agent/skills/system-diagrammer/data/diagram_templates.json +69 -0
  548. package/assets/.agent/skills/system-diagrammer/scripts/__pycache__/diagram.cpython-314.pyc +0 -0
  549. package/assets/.agent/skills/system-diagrammer/scripts/diagram.py +81 -0
  550. package/assets/.agent/skills/system-strategist/SKILL.md +35 -0
  551. package/assets/.agent/skills/system-strategist/data/strategy_patterns.json +170 -0
  552. package/assets/.agent/skills/system-strategist/scripts/__pycache__/strategist.cpython-314.pyc +0 -0
  553. package/assets/.agent/skills/system-strategist/scripts/strategist.py +136 -0
  554. package/assets/.agent/skills/tavily-web/SKILL.md +36 -0
  555. package/assets/.agent/skills/team-composition-analysis/SKILL.md +413 -0
  556. package/assets/.agent/skills/tech-stack-advisor/SKILL.md +59 -0
  557. package/assets/.agent/skills/tech-stack-advisor/data/stacks.json +727 -0
  558. package/assets/.agent/skills/tech-stack-advisor/data/tech_data.json +1297 -0
  559. package/assets/.agent/skills/tech-stack-advisor/scripts/__pycache__/advisor.cpython-314.pyc +0 -0
  560. package/assets/.agent/skills/tech-stack-advisor/scripts/__pycache__/scanner.cpython-314.pyc +0 -0
  561. package/assets/.agent/skills/tech-stack-advisor/scripts/advisor.py +211 -0
  562. package/assets/.agent/skills/tech-stack-advisor/scripts/scanner.py +102 -0
  563. package/assets/.agent/skills/terraform-module-library/SKILL.md +261 -0
  564. package/assets/.agent/skills/terraform-module-library/references/aws-modules.md +63 -0
  565. package/assets/.agent/skills/terraform-skill/SKILL.md +517 -0
  566. package/assets/.agent/skills/terraform-specialist/SKILL.md +166 -0
  567. package/assets/.agent/skills/test-generator/SKILL.md +14 -0
  568. package/assets/.agent/skills/test-generator/data/test_patterns.json +39 -0
  569. package/assets/.agent/skills/test-generator/scripts/__pycache__/gen_skeleton.cpython-314.pyc +0 -0
  570. package/assets/.agent/skills/test-generator/scripts/gen_skeleton.py +61 -0
  571. package/assets/.agent/skills/testing-patterns/SKILL.md +259 -0
  572. package/assets/.agent/skills/threat-mitigation-mapping/SKILL.md +33 -0
  573. package/assets/.agent/skills/threat-mitigation-mapping/resources/implementation-playbook.md +744 -0
  574. package/assets/.agent/skills/threat-modeling-expert/SKILL.md +60 -0
  575. package/assets/.agent/skills/tool-design/SKILL.md +318 -0
  576. package/assets/.agent/skills/top-web-vulnerabilities/SKILL.md +543 -0
  577. package/assets/.agent/skills/trello-automation/SKILL.md +181 -0
  578. package/assets/.agent/skills/trigger-dev/SKILL.md +67 -0
  579. package/assets/.agent/skills/tutorial-engineer/SKILL.md +139 -0
  580. package/assets/.agent/skills/twitter-automation/SKILL.md +231 -0
  581. package/assets/.agent/skills/typescript-pro/SKILL.md +55 -0
  582. package/assets/.agent/skills/ui-ux-pro-max/SKILL.md +30 -0
  583. package/assets/.agent/skills/ui-ux-pro-max/data/charts.csv +26 -0
  584. package/assets/.agent/skills/ui-ux-pro-max/data/colors.csv +97 -0
  585. package/assets/.agent/skills/ui-ux-pro-max/data/icons.csv +101 -0
  586. package/assets/.agent/skills/ui-ux-pro-max/data/landing.csv +31 -0
  587. package/assets/.agent/skills/ui-ux-pro-max/data/products.csv +97 -0
  588. package/assets/.agent/skills/ui-ux-pro-max/data/react-performance.csv +45 -0
  589. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/astro.csv +54 -0
  590. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  591. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  592. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  593. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  594. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  595. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  596. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  597. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/react.csv +54 -0
  598. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  599. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  600. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  601. package/assets/.agent/skills/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  602. package/assets/.agent/skills/ui-ux-pro-max/data/styles.csv +68 -0
  603. package/assets/.agent/skills/ui-ux-pro-max/data/typography.csv +58 -0
  604. package/assets/.agent/skills/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  605. package/assets/.agent/skills/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  606. package/assets/.agent/skills/ui-ux-pro-max/data/web-interface.csv +31 -0
  607. package/assets/.agent/skills/ui-ux-pro-max/scripts/__pycache__/core.cpython-314.pyc +0 -0
  608. package/assets/.agent/skills/ui-ux-pro-max/scripts/__pycache__/design_system.cpython-314.pyc +0 -0
  609. package/assets/.agent/skills/ui-ux-pro-max/scripts/__pycache__/search.cpython-314.pyc +0 -0
  610. package/assets/.agent/skills/ui-ux-pro-max/scripts/core.py +253 -0
  611. package/assets/.agent/skills/ui-ux-pro-max/scripts/design_system.py +1110 -0
  612. package/assets/.agent/skills/ui-ux-pro-max/scripts/search.py +162 -0
  613. package/assets/.agent/skills/using-neon/SKILL.md +84 -0
  614. package/assets/.agent/skills/vector-database-engineer/SKILL.md +60 -0
  615. package/assets/.agent/skills/vector-index-tuning/SKILL.md +42 -0
  616. package/assets/.agent/skills/vector-index-tuning/resources/implementation-playbook.md +507 -0
  617. package/assets/.agent/skills/vercel-deployment/SKILL.md +79 -0
  618. package/assets/.agent/skills/voice-agents/SKILL.md +68 -0
  619. package/assets/.agent/skills/vulnerability-scanner/SKILL.md +276 -0
  620. package/assets/.agent/skills/vulnerability-scanner/checklists.md +121 -0
  621. package/assets/.agent/skills/vulnerability-scanner/scripts/__pycache__/security_scan.cpython-314.pyc +0 -0
  622. package/assets/.agent/skills/vulnerability-scanner/scripts/security_scan.py +458 -0
  623. package/assets/.agent/skills/whatsapp-automation/SKILL.md +214 -0
  624. package/assets/.agent/skills/wiki-architect/SKILL.md +60 -0
  625. package/assets/.agent/skills/wiki-onboarding/SKILL.md +77 -0
  626. package/assets/.agent/skills/wiki-page-writer/SKILL.md +65 -0
  627. package/assets/.agent/skills/wiki-researcher/SKILL.md +65 -0
  628. package/assets/.agent/skills/windows-privilege-escalation/SKILL.md +496 -0
  629. package/assets/.agent/skills/wireshark-analysis/SKILL.md +497 -0
  630. package/assets/.agent/skills/wordpress-penetration-testing/SKILL.md +485 -0
  631. package/assets/.agent/skills/workflow-automation/SKILL.md +68 -0
  632. package/assets/.agent/skills/xss-html-injection/SKILL.md +499 -0
  633. package/assets/.agent/skills/zapier-make-patterns/SKILL.md +67 -0
  634. package/assets/.agent/skills/zendesk-automation/SKILL.md +215 -0
  635. package/assets/.agent/workflows/ai-agent-builder.md +93 -0
  636. package/assets/.agent/workflows/api-graphql-dev.md +45 -0
  637. package/assets/.agent/workflows/architect.md +43 -0
  638. package/assets/.agent/workflows/backend-dev.md +43 -0
  639. package/assets/.agent/workflows/claude-code-dev.md +51 -0
  640. package/assets/.agent/workflows/cloud-deployer.md +422 -0
  641. package/assets/.agent/workflows/code-reviewer.md +62 -0
  642. package/assets/.agent/workflows/context-data-eng.md +58 -0
  643. package/assets/.agent/workflows/database-eng.md +60 -0
  644. package/assets/.agent/workflows/deep-researcher.md +175 -0
  645. package/assets/.agent/workflows/designer.md +36 -0
  646. package/assets/.agent/workflows/devops.md +37 -0
  647. package/assets/.agent/workflows/doc-writer.md +211 -0
  648. package/assets/.agent/workflows/frontend-dev.md +63 -0
  649. package/assets/.agent/workflows/fullstack-coder.md +141 -0
  650. package/assets/.agent/workflows/image-creator.md +186 -0
  651. package/assets/.agent/workflows/knowledge-guide.md +48 -0
  652. package/assets/.agent/workflows/leader.md +238 -0
  653. package/assets/.agent/workflows/meta-thinker.md +204 -0
  654. package/assets/.agent/workflows/mobile-dev.md +26 -0
  655. package/assets/.agent/workflows/n8n-automator.md +114 -0
  656. package/assets/.agent/workflows/nocobase-plugin-build.md +291 -0
  657. package/assets/.agent/workflows/nocobase-plugin-expert.md +250 -0
  658. package/assets/.agent/workflows/observability-eng.md +54 -0
  659. package/assets/.agent/workflows/planner.md +48 -0
  660. package/assets/.agent/workflows/prompt-engineer.md +303 -0
  661. package/assets/.agent/workflows/qa-engineer.md +65 -0
  662. package/assets/.agent/workflows/quality-guardian.md +126 -0
  663. package/assets/.agent/workflows/quickstart.md +107 -0
  664. package/assets/.agent/workflows/release-manager.md +56 -0
  665. package/assets/.agent/workflows/research-analyst.md +483 -0
  666. package/assets/.agent/workflows/researcher.md +98 -0
  667. package/assets/.agent/workflows/saas-connector.md +86 -0
  668. package/assets/.agent/workflows/security-auditor.md +82 -0
  669. package/assets/.agent/workflows/security-engineer.md +48 -0
  670. package/assets/.agent/workflows/seo-marketer.md +91 -0
  671. package/assets/.agent/workflows/seo-specialist.md +27 -0
  672. package/assets/.agent/workflows/solution-architect.md +164 -0
  673. package/assets/.agent/workflows/startup-advisor.md +94 -0
  674. package/assets/.agent/workflows/tech-writer.md +43 -0
  675. package/assets/ide-adapters/cline/architect.md +43 -0
  676. package/assets/ide-adapters/cline/backend-dev.md +43 -0
  677. package/assets/ide-adapters/cline/designer.md +36 -0
  678. package/assets/ide-adapters/cline/devops.md +37 -0
  679. package/assets/ide-adapters/cline/frontend-dev.md +63 -0
  680. package/assets/ide-adapters/cline/knowledge-guide.md +48 -0
  681. package/assets/ide-adapters/cline/leader.md +88 -0
  682. package/assets/ide-adapters/cline/meta-thinker.md +87 -0
  683. package/assets/ide-adapters/cline/mobile-dev.md +26 -0
  684. package/assets/ide-adapters/cline/planner.md +48 -0
  685. package/assets/ide-adapters/cline/qa-engineer.md +65 -0
  686. package/assets/ide-adapters/cline/security-engineer.md +48 -0
  687. package/assets/ide-adapters/cline/seo-specialist.md +27 -0
  688. package/assets/ide-adapters/cline/tech-writer.md +43 -0
  689. package/assets/ide-adapters/copilot/architect.instructions.md +43 -0
  690. package/assets/ide-adapters/copilot/backend-dev.instructions.md +43 -0
  691. package/assets/ide-adapters/copilot/designer.instructions.md +36 -0
  692. package/assets/ide-adapters/copilot/devops.instructions.md +37 -0
  693. package/assets/ide-adapters/copilot/frontend-dev.instructions.md +63 -0
  694. package/assets/ide-adapters/copilot/knowledge-guide.instructions.md +48 -0
  695. package/assets/ide-adapters/copilot/leader.instructions.md +88 -0
  696. package/assets/ide-adapters/copilot/meta-thinker.instructions.md +87 -0
  697. package/assets/ide-adapters/copilot/mobile-dev.instructions.md +26 -0
  698. package/assets/ide-adapters/copilot/planner.instructions.md +48 -0
  699. package/assets/ide-adapters/copilot/qa-engineer.instructions.md +65 -0
  700. package/assets/ide-adapters/copilot/security-engineer.instructions.md +48 -0
  701. package/assets/ide-adapters/copilot/seo-specialist.instructions.md +27 -0
  702. package/assets/ide-adapters/copilot/tech-writer.instructions.md +43 -0
  703. package/assets/ide-adapters/cursor/architect.mdc +44 -0
  704. package/assets/ide-adapters/cursor/backend-dev.mdc +44 -0
  705. package/assets/ide-adapters/cursor/designer.mdc +37 -0
  706. package/assets/ide-adapters/cursor/devops.mdc +38 -0
  707. package/assets/ide-adapters/cursor/frontend-dev.mdc +64 -0
  708. package/assets/ide-adapters/cursor/knowledge-guide.mdc +49 -0
  709. package/assets/ide-adapters/cursor/leader.mdc +89 -0
  710. package/assets/ide-adapters/cursor/meta-thinker.mdc +88 -0
  711. package/assets/ide-adapters/cursor/mobile-dev.mdc +27 -0
  712. package/assets/ide-adapters/cursor/planner.mdc +49 -0
  713. package/assets/ide-adapters/cursor/qa-engineer.mdc +66 -0
  714. package/assets/ide-adapters/cursor/security-engineer.mdc +49 -0
  715. package/assets/ide-adapters/cursor/seo-specialist.mdc +28 -0
  716. package/assets/ide-adapters/cursor/tech-writer.mdc +44 -0
  717. package/assets/ide-adapters/kilocode/architect.md +43 -0
  718. package/assets/ide-adapters/kilocode/backend-dev.md +43 -0
  719. package/assets/ide-adapters/kilocode/designer.md +36 -0
  720. package/assets/ide-adapters/kilocode/devops.md +37 -0
  721. package/assets/ide-adapters/kilocode/frontend-dev.md +63 -0
  722. package/assets/ide-adapters/kilocode/knowledge-guide.md +48 -0
  723. package/assets/ide-adapters/kilocode/leader.md +88 -0
  724. package/assets/ide-adapters/kilocode/meta-thinker.md +87 -0
  725. package/assets/ide-adapters/kilocode/mobile-dev.md +26 -0
  726. package/assets/ide-adapters/kilocode/planner.md +48 -0
  727. package/assets/ide-adapters/kilocode/qa-engineer.md +65 -0
  728. package/assets/ide-adapters/kilocode/security-engineer.md +48 -0
  729. package/assets/ide-adapters/kilocode/seo-specialist.md +27 -0
  730. package/assets/ide-adapters/kilocode/tech-writer.md +43 -0
  731. package/assets/ide-adapters/kiro/hooks/auto-lint.json +8 -0
  732. package/assets/ide-adapters/kiro/hooks/auto-test.json +8 -0
  733. package/assets/ide-adapters/kiro/specs/.gitkeep +0 -0
  734. package/assets/ide-adapters/kiro/steering/product.md +23 -0
  735. package/assets/ide-adapters/kiro/steering/structure.md +23 -0
  736. package/assets/ide-adapters/kiro/steering/tech.md +27 -0
  737. package/assets/ide-adapters/windsurf/architect.md +41 -0
  738. package/assets/ide-adapters/windsurf/backend-dev.md +41 -0
  739. package/assets/ide-adapters/windsurf/designer.md +34 -0
  740. package/assets/ide-adapters/windsurf/devops.md +35 -0
  741. package/assets/ide-adapters/windsurf/frontend-dev.md +61 -0
  742. package/assets/ide-adapters/windsurf/knowledge-guide.md +46 -0
  743. package/assets/ide-adapters/windsurf/leader.md +86 -0
  744. package/assets/ide-adapters/windsurf/meta-thinker.md +85 -0
  745. package/assets/ide-adapters/windsurf/mobile-dev.md +24 -0
  746. package/assets/ide-adapters/windsurf/planner.md +46 -0
  747. package/assets/ide-adapters/windsurf/qa-engineer.md +63 -0
  748. package/assets/ide-adapters/windsurf/security-engineer.md +46 -0
  749. package/assets/ide-adapters/windsurf/seo-specialist.md +25 -0
  750. package/assets/ide-adapters/windsurf/tech-writer.md +41 -0
  751. package/assets/skill_groups.json +574 -0
  752. package/bin/cli.js +101 -0
  753. package/lib/commands.js +133 -0
  754. package/lib/constants.js +76 -0
  755. package/lib/download.js +165 -0
  756. package/lib/init.js +294 -0
  757. package/package.json +45 -0
  758. package/scripts/build-assets.js +177 -0
@@ -0,0 +1,483 @@
1
+ ---
2
+ name: llm-evaluation
3
+ description: Implement comprehensive evaluation strategies for LLM applications using automated metrics, human feedback, and benchmarking. Use when testing LLM performance, measuring AI application quality, or establishing evaluation frameworks.
4
+ ---
5
+
6
+ # LLM Evaluation
7
+
8
+ Master comprehensive evaluation strategies for LLM applications, from automated metrics to human evaluation and A/B testing.
9
+
10
+ ## Do not use this skill when
11
+
12
+ - The task is unrelated to llm evaluation
13
+ - You need a different domain or tool outside this scope
14
+
15
+ ## Instructions
16
+
17
+ - Clarify goals, constraints, and required inputs.
18
+ - Apply relevant best practices and validate outcomes.
19
+ - Provide actionable steps and verification.
20
+ - If detailed examples are required, open `resources/implementation-playbook.md`.
21
+
22
+ ## Use this skill when
23
+
24
+ - Measuring LLM application performance systematically
25
+ - Comparing different models or prompts
26
+ - Detecting performance regressions before deployment
27
+ - Validating improvements from prompt changes
28
+ - Building confidence in production systems
29
+ - Establishing baselines and tracking progress over time
30
+ - Debugging unexpected model behavior
31
+
32
+ ## Core Evaluation Types
33
+
34
+ ### 1. Automated Metrics
35
+ Fast, repeatable, scalable evaluation using computed scores.
36
+
37
+ **Text Generation:**
38
+ - **BLEU**: N-gram overlap (translation)
39
+ - **ROUGE**: Recall-oriented (summarization)
40
+ - **METEOR**: Semantic similarity
41
+ - **BERTScore**: Embedding-based similarity
42
+ - **Perplexity**: Language model confidence
43
+
44
+ **Classification:**
45
+ - **Accuracy**: Percentage correct
46
+ - **Precision/Recall/F1**: Class-specific performance
47
+ - **Confusion Matrix**: Error patterns
48
+ - **AUC-ROC**: Ranking quality
49
+
50
+ **Retrieval (RAG):**
51
+ - **MRR**: Mean Reciprocal Rank
52
+ - **NDCG**: Normalized Discounted Cumulative Gain
53
+ - **Precision@K**: Relevant in top K
54
+ - **Recall@K**: Coverage in top K
55
+
56
+ ### 2. Human Evaluation
57
+ Manual assessment for quality aspects difficult to automate.
58
+
59
+ **Dimensions:**
60
+ - **Accuracy**: Factual correctness
61
+ - **Coherence**: Logical flow
62
+ - **Relevance**: Answers the question
63
+ - **Fluency**: Natural language quality
64
+ - **Safety**: No harmful content
65
+ - **Helpfulness**: Useful to the user
66
+
67
+ ### 3. LLM-as-Judge
68
+ Use stronger LLMs to evaluate weaker model outputs.
69
+
70
+ **Approaches:**
71
+ - **Pointwise**: Score individual responses
72
+ - **Pairwise**: Compare two responses
73
+ - **Reference-based**: Compare to gold standard
74
+ - **Reference-free**: Judge without ground truth
75
+
76
+ ## Quick Start
77
+
78
+ ```python
79
+ from llm_eval import EvaluationSuite, Metric
80
+
81
+ # Define evaluation suite
82
+ suite = EvaluationSuite([
83
+ Metric.accuracy(),
84
+ Metric.bleu(),
85
+ Metric.bertscore(),
86
+ Metric.custom(name="groundedness", fn=check_groundedness)
87
+ ])
88
+
89
+ # Prepare test cases
90
+ test_cases = [
91
+ {
92
+ "input": "What is the capital of France?",
93
+ "expected": "Paris",
94
+ "context": "France is a country in Europe. Paris is its capital."
95
+ },
96
+ # ... more test cases
97
+ ]
98
+
99
+ # Run evaluation
100
+ results = suite.evaluate(
101
+ model=your_model,
102
+ test_cases=test_cases
103
+ )
104
+
105
+ print(f"Overall Accuracy: {results.metrics['accuracy']}")
106
+ print(f"BLEU Score: {results.metrics['bleu']}")
107
+ ```
108
+
109
+ ## Automated Metrics Implementation
110
+
111
+ ### BLEU Score
112
+ ```python
113
+ from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
114
+
115
+ def calculate_bleu(reference, hypothesis):
116
+ """Calculate BLEU score between reference and hypothesis."""
117
+ smoothie = SmoothingFunction().method4
118
+
119
+ return sentence_bleu(
120
+ [reference.split()],
121
+ hypothesis.split(),
122
+ smoothing_function=smoothie
123
+ )
124
+
125
+ # Usage
126
+ bleu = calculate_bleu(
127
+ reference="The cat sat on the mat",
128
+ hypothesis="A cat is sitting on the mat"
129
+ )
130
+ ```
131
+
132
+ ### ROUGE Score
133
+ ```python
134
+ from rouge_score import rouge_scorer
135
+
136
+ def calculate_rouge(reference, hypothesis):
137
+ """Calculate ROUGE scores."""
138
+ scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
139
+ scores = scorer.score(reference, hypothesis)
140
+
141
+ return {
142
+ 'rouge1': scores['rouge1'].fmeasure,
143
+ 'rouge2': scores['rouge2'].fmeasure,
144
+ 'rougeL': scores['rougeL'].fmeasure
145
+ }
146
+ ```
147
+
148
+ ### BERTScore
149
+ ```python
150
+ from bert_score import score
151
+
152
+ def calculate_bertscore(references, hypotheses):
153
+ """Calculate BERTScore using pre-trained BERT."""
154
+ P, R, F1 = score(
155
+ hypotheses,
156
+ references,
157
+ lang='en',
158
+ model_type='microsoft/deberta-xlarge-mnli'
159
+ )
160
+
161
+ return {
162
+ 'precision': P.mean().item(),
163
+ 'recall': R.mean().item(),
164
+ 'f1': F1.mean().item()
165
+ }
166
+ ```
167
+
168
+ ### Custom Metrics
169
+ ```python
170
+ def calculate_groundedness(response, context):
171
+ """Check if response is grounded in provided context."""
172
+ # Use NLI model to check entailment
173
+ from transformers import pipeline
174
+
175
+ nli = pipeline("text-classification", model="microsoft/deberta-large-mnli")
176
+
177
+ result = nli(f"{context} [SEP] {response}")[0]
178
+
179
+ # Return confidence that response is entailed by context
180
+ return result['score'] if result['label'] == 'ENTAILMENT' else 0.0
181
+
182
+ def calculate_toxicity(text):
183
+ """Measure toxicity in generated text."""
184
+ from detoxify import Detoxify
185
+
186
+ results = Detoxify('original').predict(text)
187
+ return max(results.values()) # Return highest toxicity score
188
+
189
+ def calculate_factuality(claim, knowledge_base):
190
+ """Verify factual claims against knowledge base."""
191
+ # Implementation depends on your knowledge base
192
+ # Could use retrieval + NLI, or fact-checking API
193
+ pass
194
+ ```
195
+
196
+ ## LLM-as-Judge Patterns
197
+
198
+ ### Single Output Evaluation
199
+ ```python
200
+ def llm_judge_quality(response, question):
201
+ """Use GPT-5 to judge response quality."""
202
+ prompt = f"""Rate the following response on a scale of 1-10 for:
203
+ 1. Accuracy (factually correct)
204
+ 2. Helpfulness (answers the question)
205
+ 3. Clarity (well-written and understandable)
206
+
207
+ Question: {question}
208
+ Response: {response}
209
+
210
+ Provide ratings in JSON format:
211
+ {{
212
+ "accuracy": <1-10>,
213
+ "helpfulness": <1-10>,
214
+ "clarity": <1-10>,
215
+ "reasoning": "<brief explanation>"
216
+ }}
217
+ """
218
+
219
+ result = openai.ChatCompletion.create(
220
+ model="gpt-5",
221
+ messages=[{"role": "user", "content": prompt}],
222
+ temperature=0
223
+ )
224
+
225
+ return json.loads(result.choices[0].message.content)
226
+ ```
227
+
228
+ ### Pairwise Comparison
229
+ ```python
230
+ def compare_responses(question, response_a, response_b):
231
+ """Compare two responses using LLM judge."""
232
+ prompt = f"""Compare these two responses to the question and determine which is better.
233
+
234
+ Question: {question}
235
+
236
+ Response A: {response_a}
237
+
238
+ Response B: {response_b}
239
+
240
+ Which response is better and why? Consider accuracy, helpfulness, and clarity.
241
+
242
+ Answer with JSON:
243
+ {{
244
+ "winner": "A" or "B" or "tie",
245
+ "reasoning": "<explanation>",
246
+ "confidence": <1-10>
247
+ }}
248
+ """
249
+
250
+ result = openai.ChatCompletion.create(
251
+ model="gpt-5",
252
+ messages=[{"role": "user", "content": prompt}],
253
+ temperature=0
254
+ )
255
+
256
+ return json.loads(result.choices[0].message.content)
257
+ ```
258
+
259
+ ## Human Evaluation Frameworks
260
+
261
+ ### Annotation Guidelines
262
+ ```python
263
+ class AnnotationTask:
264
+ """Structure for human annotation task."""
265
+
266
+ def __init__(self, response, question, context=None):
267
+ self.response = response
268
+ self.question = question
269
+ self.context = context
270
+
271
+ def get_annotation_form(self):
272
+ return {
273
+ "question": self.question,
274
+ "context": self.context,
275
+ "response": self.response,
276
+ "ratings": {
277
+ "accuracy": {
278
+ "scale": "1-5",
279
+ "description": "Is the response factually correct?"
280
+ },
281
+ "relevance": {
282
+ "scale": "1-5",
283
+ "description": "Does it answer the question?"
284
+ },
285
+ "coherence": {
286
+ "scale": "1-5",
287
+ "description": "Is it logically consistent?"
288
+ }
289
+ },
290
+ "issues": {
291
+ "factual_error": False,
292
+ "hallucination": False,
293
+ "off_topic": False,
294
+ "unsafe_content": False
295
+ },
296
+ "feedback": ""
297
+ }
298
+ ```
299
+
300
+ ### Inter-Rater Agreement
301
+ ```python
302
+ from sklearn.metrics import cohen_kappa_score
303
+
304
+ def calculate_agreement(rater1_scores, rater2_scores):
305
+ """Calculate inter-rater agreement."""
306
+ kappa = cohen_kappa_score(rater1_scores, rater2_scores)
307
+
308
+ interpretation = {
309
+ kappa < 0: "Poor",
310
+ kappa < 0.2: "Slight",
311
+ kappa < 0.4: "Fair",
312
+ kappa < 0.6: "Moderate",
313
+ kappa < 0.8: "Substantial",
314
+ kappa <= 1.0: "Almost Perfect"
315
+ }
316
+
317
+ return {
318
+ "kappa": kappa,
319
+ "interpretation": interpretation[True]
320
+ }
321
+ ```
322
+
323
+ ## A/B Testing
324
+
325
+ ### Statistical Testing Framework
326
+ ```python
327
+ from scipy import stats
328
+ import numpy as np
329
+
330
+ class ABTest:
331
+ def __init__(self, variant_a_name="A", variant_b_name="B"):
332
+ self.variant_a = {"name": variant_a_name, "scores": []}
333
+ self.variant_b = {"name": variant_b_name, "scores": []}
334
+
335
+ def add_result(self, variant, score):
336
+ """Add evaluation result for a variant."""
337
+ if variant == "A":
338
+ self.variant_a["scores"].append(score)
339
+ else:
340
+ self.variant_b["scores"].append(score)
341
+
342
+ def analyze(self, alpha=0.05):
343
+ """Perform statistical analysis."""
344
+ a_scores = self.variant_a["scores"]
345
+ b_scores = self.variant_b["scores"]
346
+
347
+ # T-test
348
+ t_stat, p_value = stats.ttest_ind(a_scores, b_scores)
349
+
350
+ # Effect size (Cohen's d)
351
+ pooled_std = np.sqrt((np.std(a_scores)**2 + np.std(b_scores)**2) / 2)
352
+ cohens_d = (np.mean(b_scores) - np.mean(a_scores)) / pooled_std
353
+
354
+ return {
355
+ "variant_a_mean": np.mean(a_scores),
356
+ "variant_b_mean": np.mean(b_scores),
357
+ "difference": np.mean(b_scores) - np.mean(a_scores),
358
+ "relative_improvement": (np.mean(b_scores) - np.mean(a_scores)) / np.mean(a_scores),
359
+ "p_value": p_value,
360
+ "statistically_significant": p_value < alpha,
361
+ "cohens_d": cohens_d,
362
+ "effect_size": self.interpret_cohens_d(cohens_d),
363
+ "winner": "B" if np.mean(b_scores) > np.mean(a_scores) else "A"
364
+ }
365
+
366
+ @staticmethod
367
+ def interpret_cohens_d(d):
368
+ """Interpret Cohen's d effect size."""
369
+ abs_d = abs(d)
370
+ if abs_d < 0.2:
371
+ return "negligible"
372
+ elif abs_d < 0.5:
373
+ return "small"
374
+ elif abs_d < 0.8:
375
+ return "medium"
376
+ else:
377
+ return "large"
378
+ ```
379
+
380
+ ## Regression Testing
381
+
382
+ ### Regression Detection
383
+ ```python
384
+ class RegressionDetector:
385
+ def __init__(self, baseline_results, threshold=0.05):
386
+ self.baseline = baseline_results
387
+ self.threshold = threshold
388
+
389
+ def check_for_regression(self, new_results):
390
+ """Detect if new results show regression."""
391
+ regressions = []
392
+
393
+ for metric in self.baseline.keys():
394
+ baseline_score = self.baseline[metric]
395
+ new_score = new_results.get(metric)
396
+
397
+ if new_score is None:
398
+ continue
399
+
400
+ # Calculate relative change
401
+ relative_change = (new_score - baseline_score) / baseline_score
402
+
403
+ # Flag if significant decrease
404
+ if relative_change < -self.threshold:
405
+ regressions.append({
406
+ "metric": metric,
407
+ "baseline": baseline_score,
408
+ "current": new_score,
409
+ "change": relative_change
410
+ })
411
+
412
+ return {
413
+ "has_regression": len(regressions) > 0,
414
+ "regressions": regressions
415
+ }
416
+ ```
417
+
418
+ ## Benchmarking
419
+
420
+ ### Running Benchmarks
421
+ ```python
422
+ class BenchmarkRunner:
423
+ def __init__(self, benchmark_dataset):
424
+ self.dataset = benchmark_dataset
425
+
426
+ def run_benchmark(self, model, metrics):
427
+ """Run model on benchmark and calculate metrics."""
428
+ results = {metric.name: [] for metric in metrics}
429
+
430
+ for example in self.dataset:
431
+ # Generate prediction
432
+ prediction = model.predict(example["input"])
433
+
434
+ # Calculate each metric
435
+ for metric in metrics:
436
+ score = metric.calculate(
437
+ prediction=prediction,
438
+ reference=example["reference"],
439
+ context=example.get("context")
440
+ )
441
+ results[metric.name].append(score)
442
+
443
+ # Aggregate results
444
+ return {
445
+ metric: {
446
+ "mean": np.mean(scores),
447
+ "std": np.std(scores),
448
+ "min": min(scores),
449
+ "max": max(scores)
450
+ }
451
+ for metric, scores in results.items()
452
+ }
453
+ ```
454
+
455
+ ## Resources
456
+
457
+ - **references/metrics.md**: Comprehensive metric guide
458
+ - **references/human-evaluation.md**: Annotation best practices
459
+ - **references/benchmarking.md**: Standard benchmarks
460
+ - **references/a-b-testing.md**: Statistical testing guide
461
+ - **references/regression-testing.md**: CI/CD integration
462
+ - **assets/evaluation-framework.py**: Complete evaluation harness
463
+ - **assets/benchmark-dataset.jsonl**: Example datasets
464
+ - **scripts/evaluate-model.py**: Automated evaluation runner
465
+
466
+ ## Best Practices
467
+
468
+ 1. **Multiple Metrics**: Use diverse metrics for comprehensive view
469
+ 2. **Representative Data**: Test on real-world, diverse examples
470
+ 3. **Baselines**: Always compare against baseline performance
471
+ 4. **Statistical Rigor**: Use proper statistical tests for comparisons
472
+ 5. **Continuous Evaluation**: Integrate into CI/CD pipeline
473
+ 6. **Human Validation**: Combine automated metrics with human judgment
474
+ 7. **Error Analysis**: Investigate failures to understand weaknesses
475
+ 8. **Version Control**: Track evaluation results over time
476
+
477
+ ## Common Pitfalls
478
+
479
+ - **Single Metric Obsession**: Optimizing for one metric at the expense of others
480
+ - **Small Sample Size**: Drawing conclusions from too few examples
481
+ - **Data Contamination**: Testing on training data
482
+ - **Ignoring Variance**: Not accounting for statistical uncertainty
483
+ - **Metric Mismatch**: Using metrics not aligned with business goals