aigroup-workflow 2.1.2 → 2.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (916) hide show
  1. package/.codex/AGENTS.md +1 -1
  2. package/CLAUDE.md +1 -4
  3. package/README.md +333 -333
  4. package/cli/commands/init.mjs +20 -6
  5. package/cli/utils/scaffold.mjs +39 -9
  6. package/docs/red-flags.md +1 -1
  7. package/docs/rules/entropy.md +1 -1
  8. package/docs/rules/performance.md +1 -1
  9. package/docs/workflow-pipeline.md +8 -6
  10. package/manifests/install-modules.json +223 -133
  11. package/package.json +39 -39
  12. package/scripts/hooks/checks/orchestration-artifacts.cjs +28 -23
  13. package/scripts/hooks/checks/workflow-state.cjs +4 -5
  14. package/scripts/orchestration/lib/orchestrator.cjs +353 -92
  15. package/scripts/orchestration/lib/validate.cjs +145 -0
  16. package/scripts/orchestration/session.cjs +100 -33
  17. package/skills/ai-ml/fine-tuning-expert/SKILL.md +162 -0
  18. package/skills/ai-ml/fine-tuning-expert/references/dataset-preparation.md +540 -0
  19. package/skills/ai-ml/fine-tuning-expert/references/deployment-optimization.md +673 -0
  20. package/skills/ai-ml/fine-tuning-expert/references/evaluation-metrics.md +597 -0
  21. package/skills/ai-ml/fine-tuning-expert/references/hyperparameter-tuning.md +565 -0
  22. package/skills/ai-ml/fine-tuning-expert/references/lora-peft.md +347 -0
  23. package/skills/ai-ml/ml-pipeline/SKILL.md +159 -0
  24. package/skills/ai-ml/ml-pipeline/references/experiment-tracking.md +833 -0
  25. package/skills/ai-ml/ml-pipeline/references/feature-engineering.md +631 -0
  26. package/skills/ai-ml/ml-pipeline/references/model-validation.md +978 -0
  27. package/skills/ai-ml/ml-pipeline/references/pipeline-orchestration.md +907 -0
  28. package/skills/ai-ml/ml-pipeline/references/training-pipelines.md +782 -0
  29. package/skills/ai-ml/rag-architect/SKILL.md +194 -0
  30. package/skills/ai-ml/rag-architect/references/chunking-strategies.md +878 -0
  31. package/skills/ai-ml/rag-architect/references/embedding-models.md +561 -0
  32. package/skills/ai-ml/rag-architect/references/rag-evaluation.md +833 -0
  33. package/skills/ai-ml/rag-architect/references/retrieval-optimization.md +795 -0
  34. package/skills/ai-ml/rag-architect/references/vector-databases.md +589 -0
  35. package/skills/ai-ml/spark-engineer/SKILL.md +148 -0
  36. package/skills/ai-ml/spark-engineer/references/partitioning-caching.md +543 -0
  37. package/skills/ai-ml/spark-engineer/references/performance-tuning.md +544 -0
  38. package/skills/ai-ml/spark-engineer/references/rdd-operations.md +599 -0
  39. package/skills/ai-ml/spark-engineer/references/spark-sql-dataframes.md +474 -0
  40. package/skills/ai-ml/spark-engineer/references/streaming-patterns.md +786 -0
  41. package/skills/backend/api-designer/SKILL.md +217 -0
  42. package/skills/backend/api-designer/references/error-handling.md +541 -0
  43. package/skills/backend/api-designer/references/openapi.md +824 -0
  44. package/skills/backend/api-designer/references/pagination.md +494 -0
  45. package/skills/backend/api-designer/references/rest-patterns.md +335 -0
  46. package/skills/backend/api-designer/references/versioning.md +391 -0
  47. package/skills/backend/architecture-designer/SKILL.md +117 -0
  48. package/skills/backend/architecture-designer/references/adr-template.md +116 -0
  49. package/skills/backend/architecture-designer/references/architecture-patterns.md +111 -0
  50. package/skills/backend/architecture-designer/references/database-selection.md +102 -0
  51. package/skills/backend/architecture-designer/references/nfr-checklist.md +112 -0
  52. package/skills/backend/architecture-designer/references/system-design.md +100 -0
  53. package/skills/backend/code-documenter/SKILL.md +147 -0
  54. package/skills/backend/code-documenter/references/api-docs-fastapi-django.md +166 -0
  55. package/skills/backend/code-documenter/references/api-docs-nestjs-express.md +220 -0
  56. package/skills/backend/code-documenter/references/coverage-reports.md +125 -0
  57. package/skills/backend/code-documenter/references/documentation-systems.md +333 -0
  58. package/skills/backend/code-documenter/references/interactive-api-docs.md +531 -0
  59. package/skills/backend/code-documenter/references/python-docstrings.md +121 -0
  60. package/skills/backend/code-documenter/references/typescript-jsdoc.md +145 -0
  61. package/skills/backend/code-documenter/references/user-guides-tutorials.md +530 -0
  62. package/skills/backend/debugging-wizard/SKILL.md +105 -0
  63. package/skills/backend/debugging-wizard/references/common-patterns.md +132 -0
  64. package/skills/backend/debugging-wizard/references/debugging-tools.md +140 -0
  65. package/skills/backend/debugging-wizard/references/quick-fixes.md +177 -0
  66. package/skills/backend/debugging-wizard/references/strategies.md +142 -0
  67. package/skills/backend/debugging-wizard/references/systematic-debugging.md +367 -0
  68. package/skills/backend/feature-forge/SKILL.md +98 -0
  69. package/skills/backend/feature-forge/references/acceptance-criteria.md +104 -0
  70. package/skills/backend/feature-forge/references/ears-syntax.md +99 -0
  71. package/skills/backend/feature-forge/references/interview-questions.md +150 -0
  72. package/skills/backend/feature-forge/references/pre-discovery-subagents.md +54 -0
  73. package/skills/backend/feature-forge/references/specification-template.md +103 -0
  74. package/skills/backend/fullstack-guardian/SKILL.md +105 -0
  75. package/skills/backend/fullstack-guardian/references/api-design-standards.md +307 -0
  76. package/skills/backend/fullstack-guardian/references/architecture-decisions.md +350 -0
  77. package/skills/backend/fullstack-guardian/references/backend-patterns.md +237 -0
  78. package/skills/backend/fullstack-guardian/references/common-patterns.md +134 -0
  79. package/skills/backend/fullstack-guardian/references/deliverables-checklist.md +354 -0
  80. package/skills/backend/fullstack-guardian/references/design-template.md +91 -0
  81. package/skills/backend/fullstack-guardian/references/error-handling.md +135 -0
  82. package/skills/backend/fullstack-guardian/references/frontend-patterns.md +340 -0
  83. package/skills/backend/fullstack-guardian/references/integration-patterns.md +333 -0
  84. package/skills/backend/fullstack-guardian/references/security-checklist.md +106 -0
  85. package/skills/backend/graphql-architect/SKILL.md +146 -0
  86. package/skills/backend/graphql-architect/references/federation.md +418 -0
  87. package/skills/backend/graphql-architect/references/migration-from-rest.md +1141 -0
  88. package/skills/backend/graphql-architect/references/resolvers.md +425 -0
  89. package/skills/backend/graphql-architect/references/schema-design.md +393 -0
  90. package/skills/backend/graphql-architect/references/security.md +569 -0
  91. package/skills/backend/graphql-architect/references/subscriptions.md +510 -0
  92. package/skills/backend/legacy-modernizer/SKILL.md +137 -0
  93. package/skills/backend/legacy-modernizer/references/legacy-testing.md +381 -0
  94. package/skills/backend/legacy-modernizer/references/migration-strategies.md +423 -0
  95. package/skills/backend/legacy-modernizer/references/refactoring-patterns.md +395 -0
  96. package/skills/backend/legacy-modernizer/references/strangler-fig-pattern.md +281 -0
  97. package/skills/backend/legacy-modernizer/references/system-assessment.md +487 -0
  98. package/skills/backend/microservices-architect/SKILL.md +164 -0
  99. package/skills/backend/microservices-architect/references/communication.md +499 -0
  100. package/skills/backend/microservices-architect/references/data.md +721 -0
  101. package/skills/backend/microservices-architect/references/decomposition.md +344 -0
  102. package/skills/backend/microservices-architect/references/observability.md +805 -0
  103. package/skills/backend/microservices-architect/references/patterns.md +603 -0
  104. package/skills/database/database-optimizer/SKILL.md +147 -0
  105. package/skills/database/database-optimizer/references/index-strategies.md +331 -0
  106. package/skills/database/database-optimizer/references/monitoring-analysis.md +501 -0
  107. package/skills/database/database-optimizer/references/mysql-tuning.md +452 -0
  108. package/skills/database/database-optimizer/references/postgresql-tuning.md +413 -0
  109. package/skills/database/database-optimizer/references/query-optimization.md +251 -0
  110. package/skills/database/postgres-pro/SKILL.md +152 -0
  111. package/skills/database/postgres-pro/references/extensions.md +404 -0
  112. package/skills/database/postgres-pro/references/jsonb.md +321 -0
  113. package/skills/database/postgres-pro/references/maintenance.md +481 -0
  114. package/skills/database/postgres-pro/references/performance.md +265 -0
  115. package/skills/database/postgres-pro/references/replication.md +446 -0
  116. package/skills/database/sql-pro/SKILL.md +129 -0
  117. package/skills/database/sql-pro/references/database-design.md +402 -0
  118. package/skills/database/sql-pro/references/dialect-differences.md +419 -0
  119. package/skills/database/sql-pro/references/optimization.md +384 -0
  120. package/skills/database/sql-pro/references/query-patterns.md +285 -0
  121. package/skills/database/sql-pro/references/window-functions.md +328 -0
  122. package/skills/dotnet/csharp-developer/SKILL.md +125 -0
  123. package/skills/dotnet/csharp-developer/references/aspnet-core.md +394 -0
  124. package/skills/dotnet/csharp-developer/references/blazor.md +553 -0
  125. package/skills/dotnet/csharp-developer/references/entity-framework.md +409 -0
  126. package/skills/dotnet/csharp-developer/references/modern-csharp.md +248 -0
  127. package/skills/dotnet/csharp-developer/references/performance.md +498 -0
  128. package/skills/dotnet/dotnet-core-expert/SKILL.md +138 -0
  129. package/skills/dotnet/dotnet-core-expert/references/authentication.md +546 -0
  130. package/skills/dotnet/dotnet-core-expert/references/clean-architecture.md +455 -0
  131. package/skills/dotnet/dotnet-core-expert/references/cloud-native.md +548 -0
  132. package/skills/dotnet/dotnet-core-expert/references/entity-framework.md +440 -0
  133. package/skills/dotnet/dotnet-core-expert/references/minimal-apis.md +319 -0
  134. package/skills/frontend/angular-architect/SKILL.md +152 -0
  135. package/skills/frontend/angular-architect/references/components.md +297 -0
  136. package/skills/frontend/angular-architect/references/ngrx.md +401 -0
  137. package/skills/frontend/angular-architect/references/routing.md +361 -0
  138. package/skills/frontend/angular-architect/references/rxjs.md +319 -0
  139. package/skills/frontend/angular-architect/references/testing.md +405 -0
  140. package/skills/frontend/flutter-expert/SKILL.md +138 -0
  141. package/skills/frontend/flutter-expert/references/bloc-state.md +259 -0
  142. package/skills/frontend/flutter-expert/references/gorouter-navigation.md +119 -0
  143. package/skills/frontend/flutter-expert/references/performance.md +99 -0
  144. package/skills/frontend/flutter-expert/references/project-structure.md +118 -0
  145. package/skills/frontend/flutter-expert/references/riverpod-state.md +130 -0
  146. package/skills/frontend/flutter-expert/references/widget-patterns.md +123 -0
  147. package/skills/frontend/nextjs-developer/SKILL.md +143 -0
  148. package/skills/frontend/nextjs-developer/references/app-router.md +311 -0
  149. package/skills/frontend/nextjs-developer/references/data-fetching.md +482 -0
  150. package/skills/frontend/nextjs-developer/references/deployment.md +545 -0
  151. package/skills/frontend/nextjs-developer/references/server-actions.md +462 -0
  152. package/skills/frontend/nextjs-developer/references/server-components.md +384 -0
  153. package/skills/frontend/react-expert/SKILL.md +149 -0
  154. package/skills/frontend/react-expert/references/hooks-patterns.md +162 -0
  155. package/skills/frontend/react-expert/references/migration-class-to-modern.md +1119 -0
  156. package/skills/frontend/react-expert/references/performance.md +168 -0
  157. package/skills/frontend/react-expert/references/react-19-features.md +174 -0
  158. package/skills/frontend/react-expert/references/server-components.md +143 -0
  159. package/skills/frontend/react-expert/references/state-management.md +171 -0
  160. package/skills/frontend/react-expert/references/testing-react.md +174 -0
  161. package/skills/frontend/react-native-expert/SKILL.md +185 -0
  162. package/skills/frontend/react-native-expert/references/expo-router.md +187 -0
  163. package/skills/frontend/react-native-expert/references/list-optimization.md +204 -0
  164. package/skills/frontend/react-native-expert/references/platform-handling.md +188 -0
  165. package/skills/frontend/react-native-expert/references/project-structure.md +171 -0
  166. package/skills/frontend/react-native-expert/references/storage-hooks.md +173 -0
  167. package/skills/frontend/vue-expert/SKILL.md +98 -0
  168. package/skills/frontend/vue-expert/references/build-tooling.md +480 -0
  169. package/skills/frontend/vue-expert/references/components.md +448 -0
  170. package/skills/frontend/vue-expert/references/composition-api.md +299 -0
  171. package/skills/frontend/vue-expert/references/mobile-hybrid.md +636 -0
  172. package/skills/frontend/vue-expert/references/nuxt.md +669 -0
  173. package/skills/frontend/vue-expert/references/state-management.md +449 -0
  174. package/skills/frontend/vue-expert/references/typescript.md +584 -0
  175. package/skills/frontend/vue-expert-js/SKILL.md +167 -0
  176. package/skills/frontend/vue-expert-js/references/component-architecture.md +219 -0
  177. package/skills/frontend/vue-expert-js/references/composables-patterns.md +183 -0
  178. package/skills/frontend/vue-expert-js/references/jsdoc-typing.md +535 -0
  179. package/skills/frontend/vue-expert-js/references/state-management.md +249 -0
  180. package/skills/frontend/vue-expert-js/references/testing-patterns.md +237 -0
  181. package/skills/go-rust-cpp/cpp-pro/SKILL.md +115 -0
  182. package/skills/go-rust-cpp/cpp-pro/references/build-tooling.md +440 -0
  183. package/skills/go-rust-cpp/cpp-pro/references/concurrency.md +437 -0
  184. package/skills/go-rust-cpp/cpp-pro/references/memory-performance.md +397 -0
  185. package/skills/go-rust-cpp/cpp-pro/references/modern-cpp.md +304 -0
  186. package/skills/go-rust-cpp/cpp-pro/references/templates.md +357 -0
  187. package/skills/go-rust-cpp/golang-pro/SKILL.md +122 -0
  188. package/skills/go-rust-cpp/golang-pro/references/concurrency.md +329 -0
  189. package/skills/go-rust-cpp/golang-pro/references/generics.md +442 -0
  190. package/skills/go-rust-cpp/golang-pro/references/interfaces.md +432 -0
  191. package/skills/go-rust-cpp/golang-pro/references/project-structure.md +477 -0
  192. package/skills/go-rust-cpp/golang-pro/references/testing.md +451 -0
  193. package/skills/go-rust-cpp/rust-engineer/SKILL.md +167 -0
  194. package/skills/go-rust-cpp/rust-engineer/references/async.md +458 -0
  195. package/skills/go-rust-cpp/rust-engineer/references/error-handling.md +334 -0
  196. package/skills/go-rust-cpp/rust-engineer/references/ownership.md +278 -0
  197. package/skills/go-rust-cpp/rust-engineer/references/testing.md +470 -0
  198. package/skills/go-rust-cpp/rust-engineer/references/traits.md +413 -0
  199. package/skills/infra/cli-developer/SKILL.md +113 -0
  200. package/skills/infra/cli-developer/references/design-patterns.md +221 -0
  201. package/skills/infra/cli-developer/references/go-cli.md +540 -0
  202. package/skills/infra/cli-developer/references/node-cli.md +383 -0
  203. package/skills/infra/cli-developer/references/python-cli.md +422 -0
  204. package/skills/infra/cli-developer/references/ux-patterns.md +448 -0
  205. package/skills/infra/cloud-architect/SKILL.md +216 -0
  206. package/skills/infra/cloud-architect/references/aws.md +394 -0
  207. package/skills/infra/cloud-architect/references/azure.md +562 -0
  208. package/skills/infra/cloud-architect/references/cost.md +582 -0
  209. package/skills/infra/cloud-architect/references/gcp.md +633 -0
  210. package/skills/infra/cloud-architect/references/multi-cloud.md +483 -0
  211. package/skills/infra/devops-engineer/SKILL.md +144 -0
  212. package/skills/infra/devops-engineer/references/deployment-strategies.md +241 -0
  213. package/skills/infra/devops-engineer/references/docker-patterns.md +113 -0
  214. package/skills/infra/devops-engineer/references/github-actions.md +139 -0
  215. package/skills/infra/devops-engineer/references/incident-response.md +331 -0
  216. package/skills/infra/devops-engineer/references/kubernetes.md +154 -0
  217. package/skills/infra/devops-engineer/references/platform-engineering.md +417 -0
  218. package/skills/infra/devops-engineer/references/release-automation.md +527 -0
  219. package/skills/infra/devops-engineer/references/terraform-iac.md +141 -0
  220. package/skills/infra/kubernetes-specialist/SKILL.md +241 -0
  221. package/skills/infra/kubernetes-specialist/references/configuration.md +452 -0
  222. package/skills/infra/kubernetes-specialist/references/cost-optimization.md +458 -0
  223. package/skills/infra/kubernetes-specialist/references/custom-operators.md +563 -0
  224. package/skills/infra/kubernetes-specialist/references/gitops.md +530 -0
  225. package/skills/infra/kubernetes-specialist/references/helm-charts.md +912 -0
  226. package/skills/infra/kubernetes-specialist/references/multi-cluster.md +507 -0
  227. package/skills/infra/kubernetes-specialist/references/networking.md +447 -0
  228. package/skills/infra/kubernetes-specialist/references/service-mesh.md +459 -0
  229. package/skills/infra/kubernetes-specialist/references/storage.md +535 -0
  230. package/skills/infra/kubernetes-specialist/references/troubleshooting.md +414 -0
  231. package/skills/infra/kubernetes-specialist/references/workloads.md +377 -0
  232. package/skills/infra/mcp-developer/SKILL.md +143 -0
  233. package/skills/infra/mcp-developer/references/protocol.md +244 -0
  234. package/skills/infra/mcp-developer/references/python-sdk.md +367 -0
  235. package/skills/infra/mcp-developer/references/resources.md +554 -0
  236. package/skills/infra/mcp-developer/references/tools.md +480 -0
  237. package/skills/infra/mcp-developer/references/typescript-sdk.md +350 -0
  238. package/skills/infra/monitoring-expert/SKILL.md +176 -0
  239. package/skills/infra/monitoring-expert/references/alerting-rules.md +141 -0
  240. package/skills/infra/monitoring-expert/references/application-profiling.md +331 -0
  241. package/skills/infra/monitoring-expert/references/capacity-planning.md +344 -0
  242. package/skills/infra/monitoring-expert/references/dashboards.md +126 -0
  243. package/skills/infra/monitoring-expert/references/opentelemetry.md +123 -0
  244. package/skills/infra/monitoring-expert/references/performance-testing.md +269 -0
  245. package/skills/infra/monitoring-expert/references/prometheus-metrics.md +136 -0
  246. package/skills/infra/monitoring-expert/references/structured-logging.md +142 -0
  247. package/skills/infra/sre-engineer/SKILL.md +181 -0
  248. package/skills/infra/sre-engineer/references/automation-toil.md +492 -0
  249. package/skills/infra/sre-engineer/references/error-budget-policy.md +334 -0
  250. package/skills/infra/sre-engineer/references/incident-chaos.md +576 -0
  251. package/skills/infra/sre-engineer/references/monitoring-alerting.md +424 -0
  252. package/skills/infra/sre-engineer/references/slo-sli-management.md +238 -0
  253. package/skills/infra/terraform-engineer/SKILL.md +143 -0
  254. package/skills/infra/terraform-engineer/references/best-practices.md +583 -0
  255. package/skills/infra/terraform-engineer/references/module-patterns.md +297 -0
  256. package/skills/infra/terraform-engineer/references/providers.md +452 -0
  257. package/skills/infra/terraform-engineer/references/state-management.md +371 -0
  258. package/skills/infra/terraform-engineer/references/testing.md +486 -0
  259. package/skills/infra/websocket-engineer/SKILL.md +168 -0
  260. package/skills/infra/websocket-engineer/references/alternatives.md +391 -0
  261. package/skills/infra/websocket-engineer/references/patterns.md +400 -0
  262. package/skills/infra/websocket-engineer/references/protocol.md +195 -0
  263. package/skills/infra/websocket-engineer/references/scaling.md +333 -0
  264. package/skills/infra/websocket-engineer/references/security.md +474 -0
  265. package/skills/java/java-architect/SKILL.md +132 -0
  266. package/skills/java/java-architect/references/jpa-optimization.md +393 -0
  267. package/skills/java/java-architect/references/reactive-webflux.md +356 -0
  268. package/skills/java/java-architect/references/spring-boot-setup.md +269 -0
  269. package/skills/java/java-architect/references/spring-security.md +445 -0
  270. package/skills/java/java-architect/references/testing-patterns.md +500 -0
  271. package/skills/java/kotlin-specialist/SKILL.md +147 -0
  272. package/skills/java/kotlin-specialist/references/android-compose.md +419 -0
  273. package/skills/java/kotlin-specialist/references/coroutines-flow.md +276 -0
  274. package/skills/java/kotlin-specialist/references/dsl-idioms.md +421 -0
  275. package/skills/java/kotlin-specialist/references/ktor-server.md +426 -0
  276. package/skills/java/kotlin-specialist/references/multiplatform-kmp.md +380 -0
  277. package/skills/java/spring-boot-engineer/SKILL.md +195 -0
  278. package/skills/java/spring-boot-engineer/references/cloud.md +498 -0
  279. package/skills/java/spring-boot-engineer/references/data.md +381 -0
  280. package/skills/java/spring-boot-engineer/references/security.md +459 -0
  281. package/skills/java/spring-boot-engineer/references/testing.md +545 -0
  282. package/skills/java/spring-boot-engineer/references/web.md +295 -0
  283. package/skills/javascript/javascript-pro/SKILL.md +132 -0
  284. package/skills/javascript/javascript-pro/references/async-patterns.md +334 -0
  285. package/skills/javascript/javascript-pro/references/browser-apis.md +398 -0
  286. package/skills/javascript/javascript-pro/references/modern-syntax.md +272 -0
  287. package/skills/javascript/javascript-pro/references/modules.md +357 -0
  288. package/skills/javascript/javascript-pro/references/node-essentials.md +471 -0
  289. package/skills/javascript/nestjs-expert/SKILL.md +206 -0
  290. package/skills/javascript/nestjs-expert/references/authentication.md +166 -0
  291. package/skills/javascript/nestjs-expert/references/controllers-routing.md +111 -0
  292. package/skills/javascript/nestjs-expert/references/dtos-validation.md +153 -0
  293. package/skills/javascript/nestjs-expert/references/migration-from-express.md +1237 -0
  294. package/skills/javascript/nestjs-expert/references/services-di.md +140 -0
  295. package/skills/javascript/nestjs-expert/references/testing-patterns.md +186 -0
  296. package/skills/javascript/typescript-pro/SKILL.md +145 -0
  297. package/skills/javascript/typescript-pro/references/advanced-types.md +259 -0
  298. package/skills/javascript/typescript-pro/references/configuration.md +445 -0
  299. package/skills/javascript/typescript-pro/references/patterns.md +484 -0
  300. package/skills/javascript/typescript-pro/references/type-guards.md +352 -0
  301. package/skills/javascript/typescript-pro/references/utility-types.md +329 -0
  302. package/skills/php/laravel-specialist/SKILL.md +262 -0
  303. package/skills/php/laravel-specialist/references/eloquent.md +351 -0
  304. package/skills/php/laravel-specialist/references/livewire.md +512 -0
  305. package/skills/php/laravel-specialist/references/queues.md +423 -0
  306. package/skills/php/laravel-specialist/references/routing.md +362 -0
  307. package/skills/php/laravel-specialist/references/testing.md +522 -0
  308. package/skills/php/php-pro/SKILL.md +206 -0
  309. package/skills/php/php-pro/references/async-patterns.md +412 -0
  310. package/skills/php/php-pro/references/laravel-patterns.md +377 -0
  311. package/skills/php/php-pro/references/modern-php-features.md +323 -0
  312. package/skills/php/php-pro/references/symfony-patterns.md +466 -0
  313. package/skills/php/php-pro/references/testing-quality.md +466 -0
  314. package/skills/python/django-expert/SKILL.md +162 -0
  315. package/skills/python/django-expert/references/authentication.md +145 -0
  316. package/skills/python/django-expert/references/drf-serializers.md +148 -0
  317. package/skills/python/django-expert/references/models-orm.md +151 -0
  318. package/skills/python/django-expert/references/testing-django.md +204 -0
  319. package/skills/python/django-expert/references/viewsets-views.md +153 -0
  320. package/skills/python/fastapi-expert/SKILL.md +185 -0
  321. package/skills/python/fastapi-expert/references/async-sqlalchemy.md +146 -0
  322. package/skills/python/fastapi-expert/references/authentication.md +159 -0
  323. package/skills/python/fastapi-expert/references/endpoints-routing.md +142 -0
  324. package/skills/python/fastapi-expert/references/migration-from-django.md +997 -0
  325. package/skills/python/fastapi-expert/references/pydantic-v2.md +135 -0
  326. package/skills/python/fastapi-expert/references/testing-async.md +159 -0
  327. package/skills/python/pandas-pro/SKILL.md +178 -0
  328. package/skills/python/pandas-pro/references/aggregation-groupby.md +545 -0
  329. package/skills/python/pandas-pro/references/data-cleaning.md +500 -0
  330. package/skills/python/pandas-pro/references/dataframe-operations.md +420 -0
  331. package/skills/python/pandas-pro/references/merging-joining.md +596 -0
  332. package/skills/python/pandas-pro/references/performance-optimization.md +597 -0
  333. package/skills/python/python-pro/SKILL.md +177 -0
  334. package/skills/python/python-pro/references/async-patterns.md +356 -0
  335. package/skills/python/python-pro/references/packaging.md +460 -0
  336. package/skills/python/python-pro/references/standard-library.md +378 -0
  337. package/skills/python/python-pro/references/testing.md +404 -0
  338. package/skills/python/python-pro/references/type-system.md +290 -0
  339. package/skills/quality/chaos-engineer/SKILL.md +182 -0
  340. package/skills/quality/chaos-engineer/references/chaos-tools.md +511 -0
  341. package/skills/quality/chaos-engineer/references/experiment-design.md +229 -0
  342. package/skills/quality/chaos-engineer/references/game-days.md +434 -0
  343. package/skills/quality/chaos-engineer/references/infrastructure-chaos.md +348 -0
  344. package/skills/quality/chaos-engineer/references/kubernetes-chaos.md +432 -0
  345. package/skills/quality/code-reviewer/SKILL.md +119 -0
  346. package/skills/quality/code-reviewer/references/common-issues.md +142 -0
  347. package/skills/quality/code-reviewer/references/feedback-examples.md +144 -0
  348. package/skills/quality/code-reviewer/references/receiving-feedback.md +238 -0
  349. package/skills/quality/code-reviewer/references/report-template.md +109 -0
  350. package/skills/quality/code-reviewer/references/review-checklist.md +88 -0
  351. package/skills/quality/code-reviewer/references/spec-compliance-review.md +258 -0
  352. package/skills/quality/playwright-expert/SKILL.md +169 -0
  353. package/skills/quality/playwright-expert/references/api-mocking.md +140 -0
  354. package/skills/quality/playwright-expert/references/configuration.md +155 -0
  355. package/skills/quality/playwright-expert/references/debugging-flaky.md +150 -0
  356. package/skills/quality/playwright-expert/references/page-object-model.md +152 -0
  357. package/skills/quality/playwright-expert/references/selectors-locators.md +119 -0
  358. package/skills/quality/secure-code-guardian/SKILL.md +191 -0
  359. package/skills/quality/secure-code-guardian/references/authentication.md +136 -0
  360. package/skills/quality/secure-code-guardian/references/input-validation.md +146 -0
  361. package/skills/quality/secure-code-guardian/references/owasp-prevention.md +135 -0
  362. package/skills/quality/secure-code-guardian/references/security-headers.md +133 -0
  363. package/skills/quality/secure-code-guardian/references/xss-csrf.md +157 -0
  364. package/skills/quality/security-reviewer/SKILL.md +103 -0
  365. package/skills/quality/security-reviewer/references/infrastructure-security.md +268 -0
  366. package/skills/quality/security-reviewer/references/penetration-testing.md +268 -0
  367. package/skills/quality/security-reviewer/references/report-template.md +170 -0
  368. package/skills/quality/security-reviewer/references/sast-tools.md +117 -0
  369. package/skills/quality/security-reviewer/references/secret-scanning.md +125 -0
  370. package/skills/quality/security-reviewer/references/vulnerability-patterns.md +152 -0
  371. package/skills/quality/tdd-guide/assets/sample_coverage_report.lcov +0 -0
  372. package/skills/quality/test-master/SKILL.md +94 -0
  373. package/skills/quality/test-master/references/automation-frameworks.md +294 -0
  374. package/skills/quality/test-master/references/e2e-testing.md +128 -0
  375. package/skills/quality/test-master/references/integration-testing.md +120 -0
  376. package/skills/quality/test-master/references/performance-testing.md +118 -0
  377. package/skills/quality/test-master/references/qa-methodology.md +247 -0
  378. package/skills/quality/test-master/references/security-testing.md +127 -0
  379. package/skills/quality/test-master/references/tdd-iron-laws.md +174 -0
  380. package/skills/quality/test-master/references/test-reports.md +104 -0
  381. package/skills/quality/test-master/references/testing-anti-patterns.md +231 -0
  382. package/skills/quality/test-master/references/unit-testing.md +113 -0
  383. package/skills/ruby/rails-expert/SKILL.md +154 -0
  384. package/skills/ruby/rails-expert/references/active-record.md +244 -0
  385. package/skills/ruby/rails-expert/references/api-development.md +401 -0
  386. package/skills/ruby/rails-expert/references/background-jobs.md +272 -0
  387. package/skills/ruby/rails-expert/references/hotwire-turbo.md +228 -0
  388. package/skills/ruby/rails-expert/references/rspec-testing.md +367 -0
  389. package/skills/swift/swift-expert/SKILL.md +163 -0
  390. package/skills/swift/swift-expert/references/async-concurrency.md +360 -0
  391. package/skills/swift/swift-expert/references/memory-performance.md +377 -0
  392. package/skills/swift/swift-expert/references/protocol-oriented.md +354 -0
  393. package/skills/swift/swift-expert/references/swiftui-patterns.md +291 -0
  394. package/skills/swift/swift-expert/references/testing-patterns.md +399 -0
  395. package/skills/workflow/brainstorming/SKILL.md +164 -0
  396. package/skills/workflow/brainstorming/scripts/helper.js +88 -0
  397. package/skills/workflow/brainstorming/scripts/start-server.sh +148 -0
  398. package/skills/workflow/brainstorming/scripts/stop-server.sh +56 -0
  399. package/skills/workflow/brainstorming/spec-document-reviewer-prompt.md +49 -0
  400. package/skills/workflow/brainstorming/visual-companion.md +287 -0
  401. package/skills/workflow/documentation/SKILL.md +45 -0
  402. package/skills/workflow/entropy-management/SKILL.md +115 -0
  403. package/skills/workflow/executing-plans/SKILL.md +70 -0
  404. package/skills/workflow/finishing-a-development-branch/SKILL.md +200 -0
  405. package/skills/workflow/receiving-code-review/SKILL.md +213 -0
  406. package/skills/workflow/requesting-code-review/SKILL.md +105 -0
  407. package/skills/workflow/requesting-code-review/code-reviewer.md +146 -0
  408. package/skills/workflow/requirement-engineering/SKILL.md +111 -0
  409. package/skills/workflow/systematic-debugging/CREATION-LOG.md +119 -0
  410. package/skills/workflow/systematic-debugging/SKILL.md +296 -0
  411. package/skills/workflow/systematic-debugging/condition-based-waiting-example.ts +158 -0
  412. package/skills/workflow/systematic-debugging/condition-based-waiting.md +115 -0
  413. package/skills/workflow/systematic-debugging/defense-in-depth.md +122 -0
  414. package/skills/workflow/systematic-debugging/find-polluter.sh +63 -0
  415. package/skills/workflow/systematic-debugging/root-cause-tracing.md +169 -0
  416. package/skills/workflow/systematic-debugging/test-academic.md +14 -0
  417. package/skills/workflow/systematic-debugging/test-pressure-1.md +58 -0
  418. package/skills/workflow/systematic-debugging/test-pressure-2.md +68 -0
  419. package/skills/workflow/systematic-debugging/test-pressure-3.md +69 -0
  420. package/skills/workflow/using-git-worktrees/SKILL.md +218 -0
  421. package/skills/workflow/verification-before-completion/SKILL.md +139 -0
  422. package/skills/workflow/writing-plans/SKILL.md +151 -0
  423. package/skills/workflow/writing-plans/plan-document-reviewer-prompt.md +49 -0
  424. package/skills/workflow/writing-skills/SKILL.md +655 -0
  425. package/skills/workflow/writing-skills/anthropic-best-practices.md +1150 -0
  426. package/skills/workflow/writing-skills/examples/CLAUDE_MD_TESTING.md +189 -0
  427. package/skills/workflow/writing-skills/graphviz-conventions.dot +0 -0
  428. package/skills/workflow/writing-skills/persuasion-principles.md +187 -0
  429. package/skills/workflow/writing-skills/render-graphs.js +168 -0
  430. package/skills/workflow/writing-skills/testing-skills-with-subagents.md +384 -0
  431. package/skills/angular-architect/SKILL.md +0 -152
  432. package/skills/angular-architect/references/components.md +0 -297
  433. package/skills/angular-architect/references/ngrx.md +0 -401
  434. package/skills/angular-architect/references/routing.md +0 -361
  435. package/skills/angular-architect/references/rxjs.md +0 -319
  436. package/skills/angular-architect/references/testing.md +0 -405
  437. package/skills/api-designer/SKILL.md +0 -217
  438. package/skills/api-designer/references/error-handling.md +0 -541
  439. package/skills/api-designer/references/openapi.md +0 -824
  440. package/skills/api-designer/references/pagination.md +0 -494
  441. package/skills/api-designer/references/rest-patterns.md +0 -335
  442. package/skills/api-designer/references/versioning.md +0 -391
  443. package/skills/architecture-designer/SKILL.md +0 -117
  444. package/skills/architecture-designer/references/adr-template.md +0 -116
  445. package/skills/architecture-designer/references/architecture-patterns.md +0 -111
  446. package/skills/architecture-designer/references/database-selection.md +0 -102
  447. package/skills/architecture-designer/references/nfr-checklist.md +0 -112
  448. package/skills/architecture-designer/references/system-design.md +0 -100
  449. package/skills/brainstorming/SKILL.md +0 -164
  450. package/skills/brainstorming/scripts/helper.js +0 -88
  451. package/skills/brainstorming/scripts/start-server.sh +0 -148
  452. package/skills/brainstorming/scripts/stop-server.sh +0 -56
  453. package/skills/brainstorming/spec-document-reviewer-prompt.md +0 -49
  454. package/skills/brainstorming/visual-companion.md +0 -287
  455. package/skills/chaos-engineer/SKILL.md +0 -182
  456. package/skills/chaos-engineer/references/chaos-tools.md +0 -511
  457. package/skills/chaos-engineer/references/experiment-design.md +0 -229
  458. package/skills/chaos-engineer/references/game-days.md +0 -434
  459. package/skills/chaos-engineer/references/infrastructure-chaos.md +0 -348
  460. package/skills/chaos-engineer/references/kubernetes-chaos.md +0 -432
  461. package/skills/cli-developer/SKILL.md +0 -113
  462. package/skills/cli-developer/references/design-patterns.md +0 -221
  463. package/skills/cli-developer/references/go-cli.md +0 -540
  464. package/skills/cli-developer/references/node-cli.md +0 -383
  465. package/skills/cli-developer/references/python-cli.md +0 -422
  466. package/skills/cli-developer/references/ux-patterns.md +0 -448
  467. package/skills/cloud-architect/SKILL.md +0 -216
  468. package/skills/cloud-architect/references/aws.md +0 -394
  469. package/skills/cloud-architect/references/azure.md +0 -562
  470. package/skills/cloud-architect/references/cost.md +0 -582
  471. package/skills/cloud-architect/references/gcp.md +0 -633
  472. package/skills/cloud-architect/references/multi-cloud.md +0 -483
  473. package/skills/code-documenter/SKILL.md +0 -147
  474. package/skills/code-documenter/references/api-docs-fastapi-django.md +0 -166
  475. package/skills/code-documenter/references/api-docs-nestjs-express.md +0 -220
  476. package/skills/code-documenter/references/coverage-reports.md +0 -125
  477. package/skills/code-documenter/references/documentation-systems.md +0 -333
  478. package/skills/code-documenter/references/interactive-api-docs.md +0 -531
  479. package/skills/code-documenter/references/python-docstrings.md +0 -121
  480. package/skills/code-documenter/references/typescript-jsdoc.md +0 -145
  481. package/skills/code-documenter/references/user-guides-tutorials.md +0 -530
  482. package/skills/code-reviewer/SKILL.md +0 -119
  483. package/skills/code-reviewer/references/common-issues.md +0 -142
  484. package/skills/code-reviewer/references/feedback-examples.md +0 -144
  485. package/skills/code-reviewer/references/receiving-feedback.md +0 -238
  486. package/skills/code-reviewer/references/report-template.md +0 -109
  487. package/skills/code-reviewer/references/review-checklist.md +0 -88
  488. package/skills/code-reviewer/references/spec-compliance-review.md +0 -258
  489. package/skills/cpp-pro/SKILL.md +0 -115
  490. package/skills/cpp-pro/references/build-tooling.md +0 -440
  491. package/skills/cpp-pro/references/concurrency.md +0 -437
  492. package/skills/cpp-pro/references/memory-performance.md +0 -397
  493. package/skills/cpp-pro/references/modern-cpp.md +0 -304
  494. package/skills/cpp-pro/references/templates.md +0 -357
  495. package/skills/csharp-developer/SKILL.md +0 -125
  496. package/skills/csharp-developer/references/aspnet-core.md +0 -394
  497. package/skills/csharp-developer/references/blazor.md +0 -553
  498. package/skills/csharp-developer/references/entity-framework.md +0 -409
  499. package/skills/csharp-developer/references/modern-csharp.md +0 -248
  500. package/skills/csharp-developer/references/performance.md +0 -498
  501. package/skills/database-optimizer/SKILL.md +0 -147
  502. package/skills/database-optimizer/references/index-strategies.md +0 -331
  503. package/skills/database-optimizer/references/monitoring-analysis.md +0 -501
  504. package/skills/database-optimizer/references/mysql-tuning.md +0 -452
  505. package/skills/database-optimizer/references/postgresql-tuning.md +0 -413
  506. package/skills/database-optimizer/references/query-optimization.md +0 -251
  507. package/skills/debugging-wizard/SKILL.md +0 -105
  508. package/skills/debugging-wizard/references/common-patterns.md +0 -132
  509. package/skills/debugging-wizard/references/debugging-tools.md +0 -140
  510. package/skills/debugging-wizard/references/quick-fixes.md +0 -177
  511. package/skills/debugging-wizard/references/strategies.md +0 -142
  512. package/skills/debugging-wizard/references/systematic-debugging.md +0 -367
  513. package/skills/devops-engineer/SKILL.md +0 -144
  514. package/skills/devops-engineer/references/deployment-strategies.md +0 -241
  515. package/skills/devops-engineer/references/docker-patterns.md +0 -113
  516. package/skills/devops-engineer/references/github-actions.md +0 -139
  517. package/skills/devops-engineer/references/incident-response.md +0 -331
  518. package/skills/devops-engineer/references/kubernetes.md +0 -154
  519. package/skills/devops-engineer/references/platform-engineering.md +0 -417
  520. package/skills/devops-engineer/references/release-automation.md +0 -527
  521. package/skills/devops-engineer/references/terraform-iac.md +0 -141
  522. package/skills/django-expert/SKILL.md +0 -162
  523. package/skills/django-expert/references/authentication.md +0 -145
  524. package/skills/django-expert/references/drf-serializers.md +0 -148
  525. package/skills/django-expert/references/models-orm.md +0 -151
  526. package/skills/django-expert/references/testing-django.md +0 -204
  527. package/skills/django-expert/references/viewsets-views.md +0 -153
  528. package/skills/documentation/SKILL.md +0 -45
  529. package/skills/dotnet-core-expert/SKILL.md +0 -138
  530. package/skills/dotnet-core-expert/references/authentication.md +0 -546
  531. package/skills/dotnet-core-expert/references/clean-architecture.md +0 -455
  532. package/skills/dotnet-core-expert/references/cloud-native.md +0 -548
  533. package/skills/dotnet-core-expert/references/entity-framework.md +0 -440
  534. package/skills/dotnet-core-expert/references/minimal-apis.md +0 -319
  535. package/skills/entropy-management/SKILL.md +0 -115
  536. package/skills/executing-plans/SKILL.md +0 -70
  537. package/skills/fastapi-expert/SKILL.md +0 -185
  538. package/skills/fastapi-expert/references/async-sqlalchemy.md +0 -146
  539. package/skills/fastapi-expert/references/authentication.md +0 -159
  540. package/skills/fastapi-expert/references/endpoints-routing.md +0 -142
  541. package/skills/fastapi-expert/references/migration-from-django.md +0 -997
  542. package/skills/fastapi-expert/references/pydantic-v2.md +0 -135
  543. package/skills/fastapi-expert/references/testing-async.md +0 -159
  544. package/skills/feature-forge/SKILL.md +0 -98
  545. package/skills/feature-forge/references/acceptance-criteria.md +0 -104
  546. package/skills/feature-forge/references/ears-syntax.md +0 -99
  547. package/skills/feature-forge/references/interview-questions.md +0 -150
  548. package/skills/feature-forge/references/pre-discovery-subagents.md +0 -54
  549. package/skills/feature-forge/references/specification-template.md +0 -103
  550. package/skills/fine-tuning-expert/SKILL.md +0 -162
  551. package/skills/fine-tuning-expert/references/dataset-preparation.md +0 -540
  552. package/skills/fine-tuning-expert/references/deployment-optimization.md +0 -673
  553. package/skills/fine-tuning-expert/references/evaluation-metrics.md +0 -597
  554. package/skills/fine-tuning-expert/references/hyperparameter-tuning.md +0 -565
  555. package/skills/fine-tuning-expert/references/lora-peft.md +0 -347
  556. package/skills/finishing-a-development-branch/SKILL.md +0 -200
  557. package/skills/flutter-expert/SKILL.md +0 -138
  558. package/skills/flutter-expert/references/bloc-state.md +0 -259
  559. package/skills/flutter-expert/references/gorouter-navigation.md +0 -119
  560. package/skills/flutter-expert/references/performance.md +0 -99
  561. package/skills/flutter-expert/references/project-structure.md +0 -118
  562. package/skills/flutter-expert/references/riverpod-state.md +0 -130
  563. package/skills/flutter-expert/references/widget-patterns.md +0 -123
  564. package/skills/fullstack-guardian/SKILL.md +0 -105
  565. package/skills/fullstack-guardian/references/api-design-standards.md +0 -307
  566. package/skills/fullstack-guardian/references/architecture-decisions.md +0 -350
  567. package/skills/fullstack-guardian/references/backend-patterns.md +0 -237
  568. package/skills/fullstack-guardian/references/common-patterns.md +0 -134
  569. package/skills/fullstack-guardian/references/deliverables-checklist.md +0 -354
  570. package/skills/fullstack-guardian/references/design-template.md +0 -91
  571. package/skills/fullstack-guardian/references/error-handling.md +0 -135
  572. package/skills/fullstack-guardian/references/frontend-patterns.md +0 -340
  573. package/skills/fullstack-guardian/references/integration-patterns.md +0 -333
  574. package/skills/fullstack-guardian/references/security-checklist.md +0 -106
  575. package/skills/golang-pro/SKILL.md +0 -122
  576. package/skills/golang-pro/references/concurrency.md +0 -329
  577. package/skills/golang-pro/references/generics.md +0 -442
  578. package/skills/golang-pro/references/interfaces.md +0 -432
  579. package/skills/golang-pro/references/project-structure.md +0 -477
  580. package/skills/golang-pro/references/testing.md +0 -451
  581. package/skills/graphql-architect/SKILL.md +0 -146
  582. package/skills/graphql-architect/references/federation.md +0 -418
  583. package/skills/graphql-architect/references/migration-from-rest.md +0 -1141
  584. package/skills/graphql-architect/references/resolvers.md +0 -425
  585. package/skills/graphql-architect/references/schema-design.md +0 -393
  586. package/skills/graphql-architect/references/security.md +0 -569
  587. package/skills/graphql-architect/references/subscriptions.md +0 -510
  588. package/skills/java-architect/SKILL.md +0 -132
  589. package/skills/java-architect/references/jpa-optimization.md +0 -393
  590. package/skills/java-architect/references/reactive-webflux.md +0 -356
  591. package/skills/java-architect/references/spring-boot-setup.md +0 -269
  592. package/skills/java-architect/references/spring-security.md +0 -445
  593. package/skills/java-architect/references/testing-patterns.md +0 -500
  594. package/skills/javascript-pro/SKILL.md +0 -132
  595. package/skills/javascript-pro/references/async-patterns.md +0 -334
  596. package/skills/javascript-pro/references/browser-apis.md +0 -398
  597. package/skills/javascript-pro/references/modern-syntax.md +0 -272
  598. package/skills/javascript-pro/references/modules.md +0 -357
  599. package/skills/javascript-pro/references/node-essentials.md +0 -471
  600. package/skills/kotlin-specialist/SKILL.md +0 -147
  601. package/skills/kotlin-specialist/references/android-compose.md +0 -419
  602. package/skills/kotlin-specialist/references/coroutines-flow.md +0 -276
  603. package/skills/kotlin-specialist/references/dsl-idioms.md +0 -421
  604. package/skills/kotlin-specialist/references/ktor-server.md +0 -426
  605. package/skills/kotlin-specialist/references/multiplatform-kmp.md +0 -380
  606. package/skills/kubernetes-specialist/SKILL.md +0 -241
  607. package/skills/kubernetes-specialist/references/configuration.md +0 -452
  608. package/skills/kubernetes-specialist/references/cost-optimization.md +0 -458
  609. package/skills/kubernetes-specialist/references/custom-operators.md +0 -563
  610. package/skills/kubernetes-specialist/references/gitops.md +0 -530
  611. package/skills/kubernetes-specialist/references/helm-charts.md +0 -912
  612. package/skills/kubernetes-specialist/references/multi-cluster.md +0 -507
  613. package/skills/kubernetes-specialist/references/networking.md +0 -447
  614. package/skills/kubernetes-specialist/references/service-mesh.md +0 -459
  615. package/skills/kubernetes-specialist/references/storage.md +0 -535
  616. package/skills/kubernetes-specialist/references/troubleshooting.md +0 -414
  617. package/skills/kubernetes-specialist/references/workloads.md +0 -377
  618. package/skills/laravel-specialist/SKILL.md +0 -262
  619. package/skills/laravel-specialist/references/eloquent.md +0 -351
  620. package/skills/laravel-specialist/references/livewire.md +0 -512
  621. package/skills/laravel-specialist/references/queues.md +0 -423
  622. package/skills/laravel-specialist/references/routing.md +0 -362
  623. package/skills/laravel-specialist/references/testing.md +0 -522
  624. package/skills/legacy-modernizer/SKILL.md +0 -137
  625. package/skills/legacy-modernizer/references/legacy-testing.md +0 -381
  626. package/skills/legacy-modernizer/references/migration-strategies.md +0 -423
  627. package/skills/legacy-modernizer/references/refactoring-patterns.md +0 -395
  628. package/skills/legacy-modernizer/references/strangler-fig-pattern.md +0 -281
  629. package/skills/legacy-modernizer/references/system-assessment.md +0 -487
  630. package/skills/mcp-developer/SKILL.md +0 -143
  631. package/skills/mcp-developer/references/protocol.md +0 -244
  632. package/skills/mcp-developer/references/python-sdk.md +0 -367
  633. package/skills/mcp-developer/references/resources.md +0 -554
  634. package/skills/mcp-developer/references/tools.md +0 -480
  635. package/skills/mcp-developer/references/typescript-sdk.md +0 -350
  636. package/skills/microservices-architect/SKILL.md +0 -164
  637. package/skills/microservices-architect/references/communication.md +0 -499
  638. package/skills/microservices-architect/references/data.md +0 -721
  639. package/skills/microservices-architect/references/decomposition.md +0 -344
  640. package/skills/microservices-architect/references/observability.md +0 -805
  641. package/skills/microservices-architect/references/patterns.md +0 -603
  642. package/skills/ml-pipeline/SKILL.md +0 -159
  643. package/skills/ml-pipeline/references/experiment-tracking.md +0 -833
  644. package/skills/ml-pipeline/references/feature-engineering.md +0 -631
  645. package/skills/ml-pipeline/references/model-validation.md +0 -978
  646. package/skills/ml-pipeline/references/pipeline-orchestration.md +0 -907
  647. package/skills/ml-pipeline/references/training-pipelines.md +0 -782
  648. package/skills/monitoring-expert/SKILL.md +0 -176
  649. package/skills/monitoring-expert/references/alerting-rules.md +0 -141
  650. package/skills/monitoring-expert/references/application-profiling.md +0 -331
  651. package/skills/monitoring-expert/references/capacity-planning.md +0 -344
  652. package/skills/monitoring-expert/references/dashboards.md +0 -126
  653. package/skills/monitoring-expert/references/opentelemetry.md +0 -123
  654. package/skills/monitoring-expert/references/performance-testing.md +0 -269
  655. package/skills/monitoring-expert/references/prometheus-metrics.md +0 -136
  656. package/skills/monitoring-expert/references/structured-logging.md +0 -142
  657. package/skills/nestjs-expert/SKILL.md +0 -206
  658. package/skills/nestjs-expert/references/authentication.md +0 -166
  659. package/skills/nestjs-expert/references/controllers-routing.md +0 -111
  660. package/skills/nestjs-expert/references/dtos-validation.md +0 -153
  661. package/skills/nestjs-expert/references/migration-from-express.md +0 -1237
  662. package/skills/nestjs-expert/references/services-di.md +0 -140
  663. package/skills/nestjs-expert/references/testing-patterns.md +0 -186
  664. package/skills/nextjs-developer/SKILL.md +0 -143
  665. package/skills/nextjs-developer/references/app-router.md +0 -311
  666. package/skills/nextjs-developer/references/data-fetching.md +0 -482
  667. package/skills/nextjs-developer/references/deployment.md +0 -545
  668. package/skills/nextjs-developer/references/server-actions.md +0 -462
  669. package/skills/nextjs-developer/references/server-components.md +0 -384
  670. package/skills/pandas-pro/SKILL.md +0 -178
  671. package/skills/pandas-pro/references/aggregation-groupby.md +0 -545
  672. package/skills/pandas-pro/references/data-cleaning.md +0 -500
  673. package/skills/pandas-pro/references/dataframe-operations.md +0 -420
  674. package/skills/pandas-pro/references/merging-joining.md +0 -596
  675. package/skills/pandas-pro/references/performance-optimization.md +0 -597
  676. package/skills/php-pro/SKILL.md +0 -206
  677. package/skills/php-pro/references/async-patterns.md +0 -412
  678. package/skills/php-pro/references/laravel-patterns.md +0 -377
  679. package/skills/php-pro/references/modern-php-features.md +0 -323
  680. package/skills/php-pro/references/symfony-patterns.md +0 -466
  681. package/skills/php-pro/references/testing-quality.md +0 -466
  682. package/skills/playwright-expert/SKILL.md +0 -169
  683. package/skills/playwright-expert/references/api-mocking.md +0 -140
  684. package/skills/playwright-expert/references/configuration.md +0 -155
  685. package/skills/playwright-expert/references/debugging-flaky.md +0 -150
  686. package/skills/playwright-expert/references/page-object-model.md +0 -152
  687. package/skills/playwright-expert/references/selectors-locators.md +0 -119
  688. package/skills/postgres-pro/SKILL.md +0 -152
  689. package/skills/postgres-pro/references/extensions.md +0 -404
  690. package/skills/postgres-pro/references/jsonb.md +0 -321
  691. package/skills/postgres-pro/references/maintenance.md +0 -481
  692. package/skills/postgres-pro/references/performance.md +0 -265
  693. package/skills/postgres-pro/references/replication.md +0 -446
  694. package/skills/python-pro/SKILL.md +0 -177
  695. package/skills/python-pro/references/async-patterns.md +0 -356
  696. package/skills/python-pro/references/packaging.md +0 -460
  697. package/skills/python-pro/references/standard-library.md +0 -378
  698. package/skills/python-pro/references/testing.md +0 -404
  699. package/skills/python-pro/references/type-system.md +0 -290
  700. package/skills/rag-architect/SKILL.md +0 -194
  701. package/skills/rag-architect/references/chunking-strategies.md +0 -878
  702. package/skills/rag-architect/references/embedding-models.md +0 -561
  703. package/skills/rag-architect/references/rag-evaluation.md +0 -833
  704. package/skills/rag-architect/references/retrieval-optimization.md +0 -795
  705. package/skills/rag-architect/references/vector-databases.md +0 -589
  706. package/skills/rails-expert/SKILL.md +0 -154
  707. package/skills/rails-expert/references/active-record.md +0 -244
  708. package/skills/rails-expert/references/api-development.md +0 -401
  709. package/skills/rails-expert/references/background-jobs.md +0 -272
  710. package/skills/rails-expert/references/hotwire-turbo.md +0 -228
  711. package/skills/rails-expert/references/rspec-testing.md +0 -367
  712. package/skills/react-expert/SKILL.md +0 -149
  713. package/skills/react-expert/references/hooks-patterns.md +0 -162
  714. package/skills/react-expert/references/migration-class-to-modern.md +0 -1119
  715. package/skills/react-expert/references/performance.md +0 -168
  716. package/skills/react-expert/references/react-19-features.md +0 -174
  717. package/skills/react-expert/references/server-components.md +0 -143
  718. package/skills/react-expert/references/state-management.md +0 -171
  719. package/skills/react-expert/references/testing-react.md +0 -174
  720. package/skills/react-native-expert/SKILL.md +0 -185
  721. package/skills/react-native-expert/references/expo-router.md +0 -187
  722. package/skills/react-native-expert/references/list-optimization.md +0 -204
  723. package/skills/react-native-expert/references/platform-handling.md +0 -188
  724. package/skills/react-native-expert/references/project-structure.md +0 -171
  725. package/skills/react-native-expert/references/storage-hooks.md +0 -173
  726. package/skills/receiving-code-review/SKILL.md +0 -213
  727. package/skills/requesting-code-review/SKILL.md +0 -105
  728. package/skills/requesting-code-review/code-reviewer.md +0 -146
  729. package/skills/requirement-engineering/SKILL.md +0 -111
  730. package/skills/rust-engineer/SKILL.md +0 -167
  731. package/skills/rust-engineer/references/async.md +0 -458
  732. package/skills/rust-engineer/references/error-handling.md +0 -334
  733. package/skills/rust-engineer/references/ownership.md +0 -278
  734. package/skills/rust-engineer/references/testing.md +0 -470
  735. package/skills/rust-engineer/references/traits.md +0 -413
  736. package/skills/secure-code-guardian/SKILL.md +0 -191
  737. package/skills/secure-code-guardian/references/authentication.md +0 -136
  738. package/skills/secure-code-guardian/references/input-validation.md +0 -146
  739. package/skills/secure-code-guardian/references/owasp-prevention.md +0 -135
  740. package/skills/secure-code-guardian/references/security-headers.md +0 -133
  741. package/skills/secure-code-guardian/references/xss-csrf.md +0 -157
  742. package/skills/security-reviewer/SKILL.md +0 -103
  743. package/skills/security-reviewer/references/infrastructure-security.md +0 -268
  744. package/skills/security-reviewer/references/penetration-testing.md +0 -268
  745. package/skills/security-reviewer/references/report-template.md +0 -170
  746. package/skills/security-reviewer/references/sast-tools.md +0 -117
  747. package/skills/security-reviewer/references/secret-scanning.md +0 -125
  748. package/skills/security-reviewer/references/vulnerability-patterns.md +0 -152
  749. package/skills/spark-engineer/SKILL.md +0 -148
  750. package/skills/spark-engineer/references/partitioning-caching.md +0 -543
  751. package/skills/spark-engineer/references/performance-tuning.md +0 -544
  752. package/skills/spark-engineer/references/rdd-operations.md +0 -599
  753. package/skills/spark-engineer/references/spark-sql-dataframes.md +0 -474
  754. package/skills/spark-engineer/references/streaming-patterns.md +0 -786
  755. package/skills/spring-boot-engineer/SKILL.md +0 -195
  756. package/skills/spring-boot-engineer/references/cloud.md +0 -498
  757. package/skills/spring-boot-engineer/references/data.md +0 -381
  758. package/skills/spring-boot-engineer/references/security.md +0 -459
  759. package/skills/spring-boot-engineer/references/testing.md +0 -545
  760. package/skills/spring-boot-engineer/references/web.md +0 -295
  761. package/skills/sql-pro/SKILL.md +0 -129
  762. package/skills/sql-pro/references/database-design.md +0 -402
  763. package/skills/sql-pro/references/dialect-differences.md +0 -419
  764. package/skills/sql-pro/references/optimization.md +0 -384
  765. package/skills/sql-pro/references/query-patterns.md +0 -285
  766. package/skills/sql-pro/references/window-functions.md +0 -328
  767. package/skills/sre-engineer/SKILL.md +0 -181
  768. package/skills/sre-engineer/references/automation-toil.md +0 -492
  769. package/skills/sre-engineer/references/error-budget-policy.md +0 -334
  770. package/skills/sre-engineer/references/incident-chaos.md +0 -576
  771. package/skills/sre-engineer/references/monitoring-alerting.md +0 -424
  772. package/skills/sre-engineer/references/slo-sli-management.md +0 -238
  773. package/skills/swift-expert/SKILL.md +0 -163
  774. package/skills/swift-expert/references/async-concurrency.md +0 -360
  775. package/skills/swift-expert/references/memory-performance.md +0 -377
  776. package/skills/swift-expert/references/protocol-oriented.md +0 -354
  777. package/skills/swift-expert/references/swiftui-patterns.md +0 -291
  778. package/skills/swift-expert/references/testing-patterns.md +0 -399
  779. package/skills/systematic-debugging/CREATION-LOG.md +0 -119
  780. package/skills/systematic-debugging/SKILL.md +0 -296
  781. package/skills/systematic-debugging/condition-based-waiting-example.ts +0 -158
  782. package/skills/systematic-debugging/condition-based-waiting.md +0 -115
  783. package/skills/systematic-debugging/defense-in-depth.md +0 -122
  784. package/skills/systematic-debugging/find-polluter.sh +0 -63
  785. package/skills/systematic-debugging/root-cause-tracing.md +0 -169
  786. package/skills/systematic-debugging/test-academic.md +0 -14
  787. package/skills/systematic-debugging/test-pressure-1.md +0 -58
  788. package/skills/systematic-debugging/test-pressure-2.md +0 -68
  789. package/skills/systematic-debugging/test-pressure-3.md +0 -69
  790. package/skills/tdd-guide/assets/sample_coverage_report.lcov +0 -56
  791. package/skills/terraform-engineer/SKILL.md +0 -143
  792. package/skills/terraform-engineer/references/best-practices.md +0 -583
  793. package/skills/terraform-engineer/references/module-patterns.md +0 -297
  794. package/skills/terraform-engineer/references/providers.md +0 -452
  795. package/skills/terraform-engineer/references/state-management.md +0 -371
  796. package/skills/terraform-engineer/references/testing.md +0 -486
  797. package/skills/test-master/SKILL.md +0 -94
  798. package/skills/test-master/references/automation-frameworks.md +0 -294
  799. package/skills/test-master/references/e2e-testing.md +0 -128
  800. package/skills/test-master/references/integration-testing.md +0 -120
  801. package/skills/test-master/references/performance-testing.md +0 -118
  802. package/skills/test-master/references/qa-methodology.md +0 -247
  803. package/skills/test-master/references/security-testing.md +0 -127
  804. package/skills/test-master/references/tdd-iron-laws.md +0 -174
  805. package/skills/test-master/references/test-reports.md +0 -104
  806. package/skills/test-master/references/testing-anti-patterns.md +0 -231
  807. package/skills/test-master/references/unit-testing.md +0 -113
  808. package/skills/typescript-pro/SKILL.md +0 -145
  809. package/skills/typescript-pro/references/advanced-types.md +0 -259
  810. package/skills/typescript-pro/references/configuration.md +0 -445
  811. package/skills/typescript-pro/references/patterns.md +0 -484
  812. package/skills/typescript-pro/references/type-guards.md +0 -352
  813. package/skills/typescript-pro/references/utility-types.md +0 -329
  814. package/skills/using-git-worktrees/SKILL.md +0 -218
  815. package/skills/verification-before-completion/SKILL.md +0 -139
  816. package/skills/vue-expert/SKILL.md +0 -98
  817. package/skills/vue-expert/references/build-tooling.md +0 -480
  818. package/skills/vue-expert/references/components.md +0 -448
  819. package/skills/vue-expert/references/composition-api.md +0 -299
  820. package/skills/vue-expert/references/mobile-hybrid.md +0 -636
  821. package/skills/vue-expert/references/nuxt.md +0 -669
  822. package/skills/vue-expert/references/state-management.md +0 -449
  823. package/skills/vue-expert/references/typescript.md +0 -584
  824. package/skills/vue-expert-js/SKILL.md +0 -167
  825. package/skills/vue-expert-js/references/component-architecture.md +0 -219
  826. package/skills/vue-expert-js/references/composables-patterns.md +0 -183
  827. package/skills/vue-expert-js/references/jsdoc-typing.md +0 -535
  828. package/skills/vue-expert-js/references/state-management.md +0 -249
  829. package/skills/vue-expert-js/references/testing-patterns.md +0 -237
  830. package/skills/websocket-engineer/SKILL.md +0 -168
  831. package/skills/websocket-engineer/references/alternatives.md +0 -391
  832. package/skills/websocket-engineer/references/patterns.md +0 -400
  833. package/skills/websocket-engineer/references/protocol.md +0 -195
  834. package/skills/websocket-engineer/references/scaling.md +0 -333
  835. package/skills/websocket-engineer/references/security.md +0 -474
  836. package/skills/writing-plans/SKILL.md +0 -151
  837. package/skills/writing-plans/plan-document-reviewer-prompt.md +0 -49
  838. package/skills/writing-skills/SKILL.md +0 -655
  839. package/skills/writing-skills/anthropic-best-practices.md +0 -1150
  840. package/skills/writing-skills/examples/CLAUDE_MD_TESTING.md +0 -189
  841. package/skills/writing-skills/graphviz-conventions.dot +0 -172
  842. package/skills/writing-skills/persuasion-principles.md +0 -187
  843. package/skills/writing-skills/render-graphs.js +0 -168
  844. package/skills/writing-skills/testing-skills-with-subagents.md +0 -384
  845. /package/skills/{design-commands → frontend/design-commands}/design.md +0 -0
  846. /package/skills/{design-commands → frontend/design-commands}/handoff.md +0 -0
  847. /package/skills/{design-commands → frontend/design-commands}/prototype.md +0 -0
  848. /package/skills/{design-commands → frontend/design-commands}/spec.md +0 -0
  849. /package/skills/{design-commands → frontend/design-commands}/style.md +0 -0
  850. /package/skills/{senior-frontend → frontend/senior-frontend}/SKILL.md +0 -0
  851. /package/skills/{senior-frontend → frontend/senior-frontend}/references/frontend_best_practices.md +0 -0
  852. /package/skills/{senior-frontend → frontend/senior-frontend}/references/nextjs_optimization_guide.md +0 -0
  853. /package/skills/{senior-frontend → frontend/senior-frontend}/references/react_patterns.md +0 -0
  854. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/bundle_analyzer.py +0 -0
  855. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/component_generator.py +0 -0
  856. /package/skills/{senior-frontend → frontend/senior-frontend}/scripts/frontend_scaffolder.py +0 -0
  857. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/SKILL.md +0 -0
  858. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/charts.csv +0 -0
  859. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/colors.csv +0 -0
  860. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/icons.csv +0 -0
  861. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/landing.csv +0 -0
  862. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/products.csv +0 -0
  863. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/react-performance.csv +0 -0
  864. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/astro.csv +0 -0
  865. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/flutter.csv +0 -0
  866. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/html-tailwind.csv +0 -0
  867. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/jetpack-compose.csv +0 -0
  868. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nextjs.csv +0 -0
  869. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxt-ui.csv +0 -0
  870. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/nuxtjs.csv +0 -0
  871. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react-native.csv +0 -0
  872. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/react.csv +0 -0
  873. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/shadcn.csv +0 -0
  874. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/svelte.csv +0 -0
  875. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/swiftui.csv +0 -0
  876. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/stacks/vue.csv +0 -0
  877. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/styles.csv +0 -0
  878. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/typography.csv +0 -0
  879. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ui-reasoning.csv +0 -0
  880. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/ux-guidelines.csv +0 -0
  881. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/data/web-interface.csv +0 -0
  882. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/core.py +0 -0
  883. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/design_system.py +0 -0
  884. /package/skills/{ui-ux-pro-max → frontend/ui-ux-pro-max}/scripts/search.py +0 -0
  885. /package/skills/{competitive-analysis → product/competitive-analysis}/SKILL.md +0 -0
  886. /package/skills/{meeting-notes → product/meeting-notes}/SKILL.md +0 -0
  887. /package/skills/{prd-template → product/prd-template}/SKILL.md +0 -0
  888. /package/skills/{stakeholder-update → product/stakeholder-update}/SKILL.md +0 -0
  889. /package/skills/{user-research-synthesis → product/user-research-synthesis}/SKILL.md +0 -0
  890. /package/skills/{senior-qa → quality/senior-qa}/README.md +0 -0
  891. /package/skills/{senior-qa → quality/senior-qa}/SKILL.md +0 -0
  892. /package/skills/{senior-qa → quality/senior-qa}/references/qa_best_practices.md +0 -0
  893. /package/skills/{senior-qa → quality/senior-qa}/references/test_automation_patterns.md +0 -0
  894. /package/skills/{senior-qa → quality/senior-qa}/references/testing_strategies.md +0 -0
  895. /package/skills/{senior-qa → quality/senior-qa}/scripts/coverage_analyzer.py +0 -0
  896. /package/skills/{senior-qa → quality/senior-qa}/scripts/e2e_test_scaffolder.py +0 -0
  897. /package/skills/{senior-qa → quality/senior-qa}/scripts/test_suite_generator.py +0 -0
  898. /package/skills/{tdd-guide → quality/tdd-guide}/HOW_TO_USE.md +0 -0
  899. /package/skills/{tdd-guide → quality/tdd-guide}/README.md +0 -0
  900. /package/skills/{tdd-guide → quality/tdd-guide}/SKILL.md +0 -0
  901. /package/skills/{tdd-guide → quality/tdd-guide}/assets/expected_output.json +0 -0
  902. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_python.json +0 -0
  903. /package/skills/{tdd-guide → quality/tdd-guide}/assets/sample_input_typescript.json +0 -0
  904. /package/skills/{tdd-guide → quality/tdd-guide}/references/ci-integration.md +0 -0
  905. /package/skills/{tdd-guide → quality/tdd-guide}/references/framework-guide.md +0 -0
  906. /package/skills/{tdd-guide → quality/tdd-guide}/references/tdd-best-practices.md +0 -0
  907. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/coverage_analyzer.py +0 -0
  908. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/fixture_generator.py +0 -0
  909. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/format_detector.py +0 -0
  910. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/framework_adapter.py +0 -0
  911. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/metrics_calculator.py +0 -0
  912. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/output_formatter.py +0 -0
  913. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/tdd_workflow.py +0 -0
  914. /package/skills/{tdd-guide → quality/tdd-guide}/scripts/test_generator.py +0 -0
  915. /package/skills/{brainstorming → workflow/brainstorming}/scripts/frame-template.html +0 -0
  916. /package/skills/{brainstorming → workflow/brainstorming}/scripts/server.cjs +0 -0
@@ -0,0 +1,597 @@
1
+ # Evaluation Metrics for Fine-Tuned Models
2
+
3
+ ---
4
+
5
+ ## Overview
6
+
7
+ Proper evaluation is critical for understanding fine-tuning success. This reference covers metrics, benchmarking strategies, and evaluation frameworks for fine-tuned language models.
8
+
9
+ ## Core Metrics
10
+
11
+ ### Perplexity
12
+
13
+ ```python
14
+ import torch
15
+ import math
16
+ from transformers import AutoModelForCausalLM, AutoTokenizer
17
+ from torch.utils.data import DataLoader
18
+ from tqdm import tqdm
19
+
20
+ def calculate_perplexity(
21
+ model,
22
+ tokenizer,
23
+ texts: list[str],
24
+ batch_size: int = 8,
25
+ max_length: int = 2048
26
+ ) -> float:
27
+ """
28
+ Calculate perplexity on a test set.
29
+
30
+ Lower perplexity = better language modeling performance.
31
+ """
32
+ model.eval()
33
+ total_loss = 0
34
+ total_tokens = 0
35
+
36
+ encodings = tokenizer(
37
+ texts,
38
+ truncation=True,
39
+ max_length=max_length,
40
+ padding=True,
41
+ return_tensors="pt"
42
+ )
43
+
44
+ dataset = torch.utils.data.TensorDataset(
45
+ encodings["input_ids"],
46
+ encodings["attention_mask"]
47
+ )
48
+ dataloader = DataLoader(dataset, batch_size=batch_size)
49
+
50
+ with torch.no_grad():
51
+ for batch in tqdm(dataloader, desc="Calculating perplexity"):
52
+ input_ids, attention_mask = batch
53
+ input_ids = input_ids.to(model.device)
54
+ attention_mask = attention_mask.to(model.device)
55
+
56
+ outputs = model(
57
+ input_ids=input_ids,
58
+ attention_mask=attention_mask,
59
+ labels=input_ids
60
+ )
61
+
62
+ # Count actual tokens (not padding)
63
+ num_tokens = attention_mask.sum().item()
64
+ total_loss += outputs.loss.item() * num_tokens
65
+ total_tokens += num_tokens
66
+
67
+ avg_loss = total_loss / total_tokens
68
+ perplexity = math.exp(avg_loss)
69
+
70
+ return perplexity
71
+
72
+ # Usage
73
+ # perplexity = calculate_perplexity(model, tokenizer, test_texts)
74
+ # print(f"Perplexity: {perplexity:.2f}")
75
+ ```
76
+
77
+ ### Generation-Based Metrics
78
+
79
+ ```python
80
+ from evaluate import load
81
+ import numpy as np
82
+
83
+ def evaluate_generation(
84
+ model,
85
+ tokenizer,
86
+ test_examples: list[dict],
87
+ max_new_tokens: int = 256
88
+ ) -> dict:
89
+ """
90
+ Evaluate model generation quality with multiple metrics.
91
+
92
+ Args:
93
+ test_examples: List of {"input": str, "reference": str}
94
+ """
95
+ # Load metrics
96
+ bleu = load("bleu")
97
+ rouge = load("rouge")
98
+ bertscore = load("bertscore")
99
+
100
+ predictions = []
101
+ references = []
102
+
103
+ model.eval()
104
+ for example in tqdm(test_examples, desc="Generating"):
105
+ inputs = tokenizer(example["input"], return_tensors="pt").to(model.device)
106
+
107
+ with torch.no_grad():
108
+ outputs = model.generate(
109
+ **inputs,
110
+ max_new_tokens=max_new_tokens,
111
+ do_sample=False, # Greedy for reproducibility
112
+ pad_token_id=tokenizer.pad_token_id
113
+ )
114
+
115
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
116
+ # Remove input from prediction if model includes it
117
+ prediction = prediction[len(example["input"]):].strip()
118
+
119
+ predictions.append(prediction)
120
+ references.append(example["reference"])
121
+
122
+ # Calculate metrics
123
+ results = {}
124
+
125
+ # BLEU (0-100, higher is better)
126
+ bleu_result = bleu.compute(predictions=predictions, references=[[r] for r in references])
127
+ results["bleu"] = bleu_result["bleu"] * 100
128
+
129
+ # ROUGE (0-1, higher is better)
130
+ rouge_result = rouge.compute(predictions=predictions, references=references)
131
+ results["rouge1"] = rouge_result["rouge1"]
132
+ results["rouge2"] = rouge_result["rouge2"]
133
+ results["rougeL"] = rouge_result["rougeL"]
134
+
135
+ # BERTScore (0-1, higher is better)
136
+ bertscore_result = bertscore.compute(
137
+ predictions=predictions,
138
+ references=references,
139
+ lang="en"
140
+ )
141
+ results["bertscore_f1"] = np.mean(bertscore_result["f1"])
142
+
143
+ return results
144
+
145
+ # Example
146
+ # metrics = evaluate_generation(model, tokenizer, test_data)
147
+ # print(f"BLEU: {metrics['bleu']:.2f}, ROUGE-L: {metrics['rougeL']:.4f}")
148
+ ```
149
+
150
+ ### Task-Specific Metrics
151
+
152
+ ```python
153
+ from sklearn.metrics import accuracy_score, f1_score, classification_report
154
+ import re
155
+
156
+ def evaluate_classification(
157
+ model,
158
+ tokenizer,
159
+ test_examples: list[dict],
160
+ labels: list[str]
161
+ ) -> dict:
162
+ """
163
+ Evaluate fine-tuned model on classification task.
164
+
165
+ Args:
166
+ test_examples: List of {"input": str, "label": str}
167
+ labels: List of valid label strings
168
+ """
169
+ predictions = []
170
+ true_labels = []
171
+
172
+ model.eval()
173
+ for example in tqdm(test_examples, desc="Classifying"):
174
+ inputs = tokenizer(example["input"], return_tensors="pt").to(model.device)
175
+
176
+ with torch.no_grad():
177
+ outputs = model.generate(
178
+ **inputs,
179
+ max_new_tokens=20,
180
+ do_sample=False,
181
+ pad_token_id=tokenizer.pad_token_id
182
+ )
183
+
184
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
185
+ prediction = prediction[len(example["input"]):].strip().lower()
186
+
187
+ # Extract label from prediction
188
+ predicted_label = None
189
+ for label in labels:
190
+ if label.lower() in prediction:
191
+ predicted_label = label
192
+ break
193
+
194
+ if predicted_label is None:
195
+ predicted_label = labels[0] # Default to first label
196
+
197
+ predictions.append(predicted_label)
198
+ true_labels.append(example["label"])
199
+
200
+ return {
201
+ "accuracy": accuracy_score(true_labels, predictions),
202
+ "f1_macro": f1_score(true_labels, predictions, average="macro"),
203
+ "f1_weighted": f1_score(true_labels, predictions, average="weighted"),
204
+ "classification_report": classification_report(true_labels, predictions)
205
+ }
206
+
207
+ def evaluate_extraction(
208
+ model,
209
+ tokenizer,
210
+ test_examples: list[dict]
211
+ ) -> dict:
212
+ """
213
+ Evaluate information extraction tasks.
214
+
215
+ Args:
216
+ test_examples: List of {"input": str, "expected_entities": list[str]}
217
+ """
218
+ total_precision = 0
219
+ total_recall = 0
220
+ total_f1 = 0
221
+
222
+ for example in test_examples:
223
+ inputs = tokenizer(example["input"], return_tensors="pt").to(model.device)
224
+
225
+ with torch.no_grad():
226
+ outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
227
+
228
+ prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
229
+ prediction = prediction[len(example["input"]):].strip()
230
+
231
+ # Extract entities (customize based on output format)
232
+ predicted_entities = set(re.findall(r'\b[A-Z][a-z]+(?:\s[A-Z][a-z]+)*\b', prediction))
233
+ expected_entities = set(example["expected_entities"])
234
+
235
+ # Calculate metrics
236
+ if len(predicted_entities) > 0:
237
+ precision = len(predicted_entities & expected_entities) / len(predicted_entities)
238
+ else:
239
+ precision = 0
240
+
241
+ if len(expected_entities) > 0:
242
+ recall = len(predicted_entities & expected_entities) / len(expected_entities)
243
+ else:
244
+ recall = 1.0
245
+
246
+ if precision + recall > 0:
247
+ f1 = 2 * precision * recall / (precision + recall)
248
+ else:
249
+ f1 = 0
250
+
251
+ total_precision += precision
252
+ total_recall += recall
253
+ total_f1 += f1
254
+
255
+ n = len(test_examples)
256
+ return {
257
+ "precision": total_precision / n,
258
+ "recall": total_recall / n,
259
+ "f1": total_f1 / n
260
+ }
261
+ ```
262
+
263
+ ## Evaluation Framework
264
+
265
+ ```python
266
+ from dataclasses import dataclass, field
267
+ from typing import Callable, Any
268
+ import json
269
+ from datetime import datetime
270
+
271
+ @dataclass
272
+ class EvaluationSuite:
273
+ """Complete evaluation suite for fine-tuned models."""
274
+ name: str
275
+ metrics: dict[str, Callable] = field(default_factory=dict)
276
+ results: dict[str, Any] = field(default_factory=dict)
277
+
278
+ def add_metric(self, name: str, metric_fn: Callable):
279
+ """Add a metric to the suite."""
280
+ self.metrics[name] = metric_fn
281
+
282
+ def run(self, model, tokenizer, test_data: dict) -> dict:
283
+ """Run all metrics and return results."""
284
+ self.results = {
285
+ "model_name": getattr(model.config, "_name_or_path", "unknown"),
286
+ "timestamp": datetime.now().isoformat(),
287
+ "metrics": {}
288
+ }
289
+
290
+ for metric_name, metric_fn in self.metrics.items():
291
+ print(f"Running {metric_name}...")
292
+ try:
293
+ result = metric_fn(model, tokenizer, test_data.get(metric_name, test_data))
294
+ self.results["metrics"][metric_name] = result
295
+ except Exception as e:
296
+ self.results["metrics"][metric_name] = {"error": str(e)}
297
+
298
+ return self.results
299
+
300
+ def save_results(self, path: str):
301
+ """Save results to JSON file."""
302
+ with open(path, "w") as f:
303
+ json.dump(self.results, f, indent=2, default=str)
304
+
305
+ def compare_with(self, other_results: dict) -> dict:
306
+ """Compare results with another evaluation."""
307
+ comparison = {}
308
+ for metric_name, value in self.results["metrics"].items():
309
+ if metric_name in other_results.get("metrics", {}):
310
+ other_value = other_results["metrics"][metric_name]
311
+ if isinstance(value, (int, float)) and isinstance(other_value, (int, float)):
312
+ comparison[metric_name] = {
313
+ "current": value,
314
+ "baseline": other_value,
315
+ "delta": value - other_value,
316
+ "delta_pct": ((value - other_value) / other_value * 100)
317
+ if other_value != 0 else 0
318
+ }
319
+ return comparison
320
+
321
+ # Usage example
322
+ def create_evaluation_suite() -> EvaluationSuite:
323
+ suite = EvaluationSuite(name="fine_tuning_eval")
324
+
325
+ # Add perplexity
326
+ suite.add_metric("perplexity", lambda m, t, d: calculate_perplexity(m, t, d["texts"]))
327
+
328
+ # Add generation metrics
329
+ suite.add_metric("generation", lambda m, t, d: evaluate_generation(m, t, d["generation"]))
330
+
331
+ return suite
332
+
333
+ # Run evaluation
334
+ # suite = create_evaluation_suite()
335
+ # results = suite.run(model, tokenizer, test_data)
336
+ # suite.save_results("eval_results.json")
337
+ ```
338
+
339
+ ## Model Comparison
340
+
341
+ ```python
342
+ import pandas as pd
343
+ from typing import Optional
344
+
345
+ class ModelComparison:
346
+ """Compare multiple fine-tuned models."""
347
+
348
+ def __init__(self):
349
+ self.models = {}
350
+ self.results = {}
351
+
352
+ def add_model(self, name: str, model, tokenizer, adapter_path: Optional[str] = None):
353
+ """Register a model for comparison."""
354
+ self.models[name] = {
355
+ "model": model,
356
+ "tokenizer": tokenizer,
357
+ "adapter_path": adapter_path
358
+ }
359
+
360
+ def evaluate_all(self, test_data: dict, metrics: list[str]) -> pd.DataFrame:
361
+ """Evaluate all models and return comparison DataFrame."""
362
+ all_results = []
363
+
364
+ for model_name, model_info in self.models.items():
365
+ model = model_info["model"]
366
+ tokenizer = model_info["tokenizer"]
367
+
368
+ model_results = {"model": model_name}
369
+
370
+ for metric in metrics:
371
+ if metric == "perplexity":
372
+ model_results["perplexity"] = calculate_perplexity(
373
+ model, tokenizer, test_data["texts"]
374
+ )
375
+ elif metric == "generation":
376
+ gen_metrics = evaluate_generation(
377
+ model, tokenizer, test_data["generation"]
378
+ )
379
+ model_results.update(gen_metrics)
380
+
381
+ all_results.append(model_results)
382
+ self.results[model_name] = model_results
383
+
384
+ return pd.DataFrame(all_results)
385
+
386
+ def get_best_model(self, metric: str, higher_is_better: bool = True) -> str:
387
+ """Return name of best performing model for a metric."""
388
+ if not self.results:
389
+ raise ValueError("No results available. Run evaluate_all first.")
390
+
391
+ values = {name: r.get(metric, float('-inf') if higher_is_better else float('inf'))
392
+ for name, r in self.results.items()}
393
+
394
+ if higher_is_better:
395
+ return max(values, key=values.get)
396
+ else:
397
+ return min(values, key=values.get)
398
+
399
+ # Usage
400
+ # comparison = ModelComparison()
401
+ # comparison.add_model("base", base_model, tokenizer)
402
+ # comparison.add_model("lora_r8", lora_model_r8, tokenizer)
403
+ # comparison.add_model("lora_r16", lora_model_r16, tokenizer)
404
+ # df = comparison.evaluate_all(test_data, ["perplexity", "generation"])
405
+ # print(df)
406
+ ```
407
+
408
+ ## LLM-as-Judge Evaluation
409
+
410
+ ```python
411
+ from openai import OpenAI
412
+ import json
413
+
414
+ def llm_judge_evaluation(
415
+ predictions: list[str],
416
+ references: list[str],
417
+ inputs: list[str],
418
+ judge_model: str = "gpt-4o",
419
+ criteria: list[str] = None
420
+ ) -> list[dict]:
421
+ """
422
+ Use LLM as judge to evaluate generation quality.
423
+
424
+ Args:
425
+ predictions: Model outputs
426
+ references: Reference/gold outputs
427
+ inputs: Original inputs
428
+ judge_model: Model to use as judge
429
+ criteria: Evaluation criteria (default: helpfulness, accuracy, coherence)
430
+ """
431
+ if criteria is None:
432
+ criteria = ["helpfulness", "accuracy", "coherence", "relevance"]
433
+
434
+ client = OpenAI()
435
+
436
+ judge_prompt = """You are an expert evaluator. Rate the following model response on a scale of 1-5 for each criterion.
437
+
438
+ Input: {input}
439
+
440
+ Reference Response: {reference}
441
+
442
+ Model Response: {prediction}
443
+
444
+ Rate the model response on these criteria (1=poor, 5=excellent):
445
+ {criteria_list}
446
+
447
+ Return your ratings as JSON: {{"criterion_name": score, ...}}
448
+ Also include a brief explanation for each rating."""
449
+
450
+ results = []
451
+
452
+ for input_text, pred, ref in zip(inputs, predictions, references):
453
+ prompt = judge_prompt.format(
454
+ input=input_text,
455
+ reference=ref,
456
+ prediction=pred,
457
+ criteria_list="\n".join(f"- {c}" for c in criteria)
458
+ )
459
+
460
+ response = client.chat.completions.create(
461
+ model=judge_model,
462
+ messages=[{"role": "user", "content": prompt}],
463
+ temperature=0
464
+ )
465
+
466
+ # Parse response
467
+ try:
468
+ content = response.choices[0].message.content
469
+ # Extract JSON from response
470
+ json_match = re.search(r'\{[^}]+\}', content)
471
+ if json_match:
472
+ scores = json.loads(json_match.group())
473
+ else:
474
+ scores = {c: 3 for c in criteria} # Default scores
475
+ except:
476
+ scores = {c: 3 for c in criteria}
477
+
478
+ results.append({
479
+ "input": input_text,
480
+ "prediction": pred,
481
+ "reference": ref,
482
+ "scores": scores,
483
+ "raw_response": content
484
+ })
485
+
486
+ # Aggregate scores
487
+ aggregated = {c: [] for c in criteria}
488
+ for r in results:
489
+ for c in criteria:
490
+ if c in r["scores"]:
491
+ aggregated[c].append(r["scores"][c])
492
+
493
+ summary = {c: sum(scores) / len(scores) if scores else 0
494
+ for c, scores in aggregated.items()}
495
+
496
+ return {
497
+ "individual_results": results,
498
+ "summary": summary
499
+ }
500
+ ```
501
+
502
+ ## Benchmark Suites
503
+
504
+ ```python
505
+ from lm_eval import evaluator
506
+ from lm_eval.models.huggingface import HFLM
507
+
508
+ def run_standard_benchmarks(
509
+ model,
510
+ tokenizer,
511
+ tasks: list[str] = None,
512
+ num_fewshot: int = 0
513
+ ) -> dict:
514
+ """
515
+ Run standard LLM benchmarks using lm-evaluation-harness.
516
+
517
+ Args:
518
+ model: HuggingFace model
519
+ tokenizer: Tokenizer
520
+ tasks: List of tasks (default: common benchmarks)
521
+ num_fewshot: Number of few-shot examples
522
+ """
523
+ if tasks is None:
524
+ tasks = [
525
+ "hellaswag", # Commonsense reasoning
526
+ "arc_easy", # Science questions
527
+ "arc_challenge", # Harder science questions
528
+ "winogrande", # Commonsense reasoning
529
+ "mmlu", # Multi-task language understanding
530
+ "truthfulqa_mc", # Truthfulness
531
+ ]
532
+
533
+ # Wrap model for lm-eval
534
+ lm = HFLM(pretrained=model, tokenizer=tokenizer)
535
+
536
+ results = evaluator.simple_evaluate(
537
+ model=lm,
538
+ tasks=tasks,
539
+ num_fewshot=num_fewshot,
540
+ batch_size="auto"
541
+ )
542
+
543
+ # Extract key metrics
544
+ summary = {}
545
+ for task in tasks:
546
+ if task in results["results"]:
547
+ task_results = results["results"][task]
548
+ # Get primary metric (usually accuracy)
549
+ for key, value in task_results.items():
550
+ if "acc" in key or "accuracy" in key:
551
+ summary[task] = value
552
+ break
553
+
554
+ return {
555
+ "full_results": results,
556
+ "summary": summary
557
+ }
558
+
559
+ # Usage with common benchmarks
560
+ BENCHMARK_TASKS = {
561
+ "reasoning": ["hellaswag", "winogrande", "arc_easy", "arc_challenge"],
562
+ "knowledge": ["mmlu", "triviaqa"],
563
+ "code": ["humaneval", "mbpp"],
564
+ "math": ["gsm8k", "math"],
565
+ "safety": ["truthfulqa_mc", "toxigen"]
566
+ }
567
+ ```
568
+
569
+ ## Quick Reference
570
+
571
+ ### Metric Selection by Task
572
+
573
+ | Task Type | Primary Metrics | Secondary Metrics |
574
+ |-----------|-----------------|-------------------|
575
+ | General Fine-Tuning | Perplexity, Loss | ROUGE, BLEU |
576
+ | Classification | Accuracy, F1 | Precision, Recall |
577
+ | Generation | ROUGE-L, BERTScore | Human eval, LLM-as-judge |
578
+ | Summarization | ROUGE-1/2/L | BERTScore, factuality |
579
+ | Translation | BLEU, chrF | TER, COMET |
580
+ | Code | pass@k, HumanEval | CodeBLEU |
581
+ | Chat/Assistant | LLM-as-judge | User preference |
582
+
583
+ ### Interpreting Results
584
+
585
+ | Metric | Poor | Acceptable | Good | Excellent |
586
+ |--------|------|------------|------|-----------|
587
+ | Perplexity | >50 | 20-50 | 10-20 | <10 |
588
+ | BLEU | <20 | 20-40 | 40-60 | >60 |
589
+ | ROUGE-L | <0.3 | 0.3-0.5 | 0.5-0.7 | >0.7 |
590
+ | BERTScore F1 | <0.7 | 0.7-0.85 | 0.85-0.92 | >0.92 |
591
+ | Accuracy | <0.6 | 0.6-0.8 | 0.8-0.9 | >0.9 |
592
+
593
+ ## Related References
594
+
595
+ - `hyperparameter-tuning.md` - Adjusting training based on eval results
596
+ - `dataset-preparation.md` - Creating evaluation sets
597
+ - `deployment-optimization.md` - Production evaluation considerations