@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,470 @@
1
+ # Production Serving Guide
2
+
3
+ Comprehensive guide to deploying TensorRT-LLM in production environments.
4
+
5
+ ## Server Modes
6
+
7
+ ### trtllm-serve (Recommended)
8
+
9
+ **Features**:
10
+ - OpenAI-compatible API
11
+ - Automatic model download and compilation
12
+ - Built-in load balancing
13
+ - Prometheus metrics
14
+ - Health checks
15
+
16
+ **Basic usage**:
17
+ ```bash
18
+ trtllm-serve meta-llama/Meta-Llama-3-8B \
19
+ --tp_size 1 \
20
+ --max_batch_size 256 \
21
+ --port 8000
22
+ ```
23
+
24
+ **Advanced configuration**:
25
+ ```bash
26
+ trtllm-serve meta-llama/Meta-Llama-3-70B \
27
+ --tp_size 4 \
28
+ --dtype fp8 \
29
+ --max_batch_size 256 \
30
+ --max_num_tokens 4096 \
31
+ --enable_chunked_context \
32
+ --scheduler_policy max_utilization \
33
+ --port 8000 \
34
+ --api_key $API_KEY # Optional authentication
35
+ ```
36
+
37
+ ### Python LLM API (For embedding)
38
+
39
+ ```python
40
+ from tensorrt_llm import LLM
41
+
42
+ class LLMService:
43
+ def __init__(self):
44
+ self.llm = LLM(
45
+ model="meta-llama/Meta-Llama-3-8B",
46
+ dtype="fp8"
47
+ )
48
+
49
+ def generate(self, prompt, max_tokens=100):
50
+ from tensorrt_llm import SamplingParams
51
+
52
+ params = SamplingParams(
53
+ max_tokens=max_tokens,
54
+ temperature=0.7
55
+ )
56
+ outputs = self.llm.generate([prompt], params)
57
+ return outputs[0].text
58
+
59
+ # Use in FastAPI, Flask, etc
60
+ from fastapi import FastAPI
61
+ app = FastAPI()
62
+ service = LLMService()
63
+
64
+ @app.post("/generate")
65
+ def generate(prompt: str):
66
+ return {"response": service.generate(prompt)}
67
+ ```
68
+
69
+ ## OpenAI-Compatible API
70
+
71
+ ### Chat Completions
72
+
73
+ ```bash
74
+ curl -X POST http://localhost:8000/v1/chat/completions \
75
+ -H "Content-Type: application/json" \
76
+ -d '{
77
+ "model": "meta-llama/Meta-Llama-3-8B",
78
+ "messages": [
79
+ {"role": "system", "content": "You are a helpful assistant."},
80
+ {"role": "user", "content": "Explain quantum computing"}
81
+ ],
82
+ "temperature": 0.7,
83
+ "max_tokens": 500,
84
+ "stream": false
85
+ }'
86
+ ```
87
+
88
+ **Response**:
89
+ ```json
90
+ {
91
+ "id": "chat-abc123",
92
+ "object": "chat.completion",
93
+ "created": 1234567890,
94
+ "model": "meta-llama/Meta-Llama-3-8B",
95
+ "choices": [{
96
+ "index": 0,
97
+ "message": {
98
+ "role": "assistant",
99
+ "content": "Quantum computing is..."
100
+ },
101
+ "finish_reason": "stop"
102
+ }],
103
+ "usage": {
104
+ "prompt_tokens": 25,
105
+ "completion_tokens": 150,
106
+ "total_tokens": 175
107
+ }
108
+ }
109
+ ```
110
+
111
+ ### Streaming
112
+
113
+ ```bash
114
+ curl -X POST http://localhost:8000/v1/chat/completions \
115
+ -H "Content-Type: application/json" \
116
+ -d '{
117
+ "model": "meta-llama/Meta-Llama-3-8B",
118
+ "messages": [{"role": "user", "content": "Count to 10"}],
119
+ "stream": true
120
+ }'
121
+ ```
122
+
123
+ **Response** (SSE stream):
124
+ ```
125
+ data: {"choices":[{"delta":{"content":"1"}}]}
126
+
127
+ data: {"choices":[{"delta":{"content":", 2"}}]}
128
+
129
+ data: {"choices":[{"delta":{"content":", 3"}}]}
130
+
131
+ data: [DONE]
132
+ ```
133
+
134
+ ### Completions
135
+
136
+ ```bash
137
+ curl -X POST http://localhost:8000/v1/completions \
138
+ -H "Content-Type: application/json" \
139
+ -d '{
140
+ "model": "meta-llama/Meta-Llama-3-8B",
141
+ "prompt": "The capital of France is",
142
+ "max_tokens": 10,
143
+ "temperature": 0.0
144
+ }'
145
+ ```
146
+
147
+ ## Monitoring
148
+
149
+ ### Prometheus Metrics
150
+
151
+ **Enable metrics**:
152
+ ```bash
153
+ trtllm-serve meta-llama/Meta-Llama-3-8B \
154
+ --enable_metrics \
155
+ --metrics_port 9090
156
+ ```
157
+
158
+ **Key metrics**:
159
+ ```bash
160
+ # Scrape metrics
161
+ curl http://localhost:9090/metrics
162
+
163
+ # Important metrics:
164
+ # - trtllm_request_success_total - Total successful requests
165
+ # - trtllm_request_latency_seconds - Request latency histogram
166
+ # - trtllm_tokens_generated_total - Total tokens generated
167
+ # - trtllm_active_requests - Current active requests
168
+ # - trtllm_queue_size - Requests waiting in queue
169
+ # - trtllm_gpu_memory_usage_bytes - GPU memory usage
170
+ # - trtllm_kv_cache_usage_ratio - KV cache utilization
171
+ ```
172
+
173
+ ### Health Checks
174
+
175
+ ```bash
176
+ # Readiness probe
177
+ curl http://localhost:8000/health/ready
178
+
179
+ # Liveness probe
180
+ curl http://localhost:8000/health/live
181
+
182
+ # Model info
183
+ curl http://localhost:8000/v1/models
184
+ ```
185
+
186
+ **Kubernetes probes**:
187
+ ```yaml
188
+ livenessProbe:
189
+ httpGet:
190
+ path: /health/live
191
+ port: 8000
192
+ initialDelaySeconds: 60
193
+ periodSeconds: 10
194
+
195
+ readinessProbe:
196
+ httpGet:
197
+ path: /health/ready
198
+ port: 8000
199
+ initialDelaySeconds: 30
200
+ periodSeconds: 5
201
+ ```
202
+
203
+ ## Production Deployment
204
+
205
+ ### Docker Deployment
206
+
207
+ **Dockerfile**:
208
+ ```dockerfile
209
+ FROM nvidia/tensorrt_llm:latest
210
+
211
+ # Copy any custom configs
212
+ COPY config.yaml /app/config.yaml
213
+
214
+ # Expose ports
215
+ EXPOSE 8000 9090
216
+
217
+ # Start server
218
+ CMD ["trtllm-serve", "meta-llama/Meta-Llama-3-8B", \
219
+ "--tp_size", "4", \
220
+ "--dtype", "fp8", \
221
+ "--max_batch_size", "256", \
222
+ "--enable_metrics", \
223
+ "--metrics_port", "9090"]
224
+ ```
225
+
226
+ **Run container**:
227
+ ```bash
228
+ docker run --gpus all -p 8000:8000 -p 9090:9090 \
229
+ tensorrt-llm:latest
230
+ ```
231
+
232
+ ### Kubernetes Deployment
233
+
234
+ **Complete deployment**:
235
+ ```yaml
236
+ apiVersion: apps/v1
237
+ kind: Deployment
238
+ metadata:
239
+ name: tensorrt-llm
240
+ spec:
241
+ replicas: 2 # Multiple replicas for HA
242
+ selector:
243
+ matchLabels:
244
+ app: tensorrt-llm
245
+ template:
246
+ metadata:
247
+ labels:
248
+ app: tensorrt-llm
249
+ spec:
250
+ containers:
251
+ - name: trtllm
252
+ image: nvidia/tensorrt_llm:latest
253
+ command:
254
+ - trtllm-serve
255
+ - meta-llama/Meta-Llama-3-70B
256
+ - --tp_size=4
257
+ - --dtype=fp8
258
+ - --max_batch_size=256
259
+ - --enable_metrics
260
+ ports:
261
+ - containerPort: 8000
262
+ name: http
263
+ - containerPort: 9090
264
+ name: metrics
265
+ resources:
266
+ limits:
267
+ nvidia.com/gpu: 4
268
+ livenessProbe:
269
+ httpGet:
270
+ path: /health/live
271
+ port: 8000
272
+ readinessProbe:
273
+ httpGet:
274
+ path: /health/ready
275
+ port: 8000
276
+ ---
277
+ apiVersion: v1
278
+ kind: Service
279
+ metadata:
280
+ name: tensorrt-llm
281
+ spec:
282
+ selector:
283
+ app: tensorrt-llm
284
+ ports:
285
+ - name: http
286
+ port: 80
287
+ targetPort: 8000
288
+ - name: metrics
289
+ port: 9090
290
+ targetPort: 9090
291
+ type: LoadBalancer
292
+ ```
293
+
294
+ ### Load Balancing
295
+
296
+ **NGINX configuration**:
297
+ ```nginx
298
+ upstream tensorrt_llm {
299
+ least_conn; # Route to least busy server
300
+ server trtllm-1:8000 max_fails=3 fail_timeout=30s;
301
+ server trtllm-2:8000 max_fails=3 fail_timeout=30s;
302
+ server trtllm-3:8000 max_fails=3 fail_timeout=30s;
303
+ }
304
+
305
+ server {
306
+ listen 80;
307
+ location / {
308
+ proxy_pass http://tensorrt_llm;
309
+ proxy_read_timeout 300s; # Long timeout for slow generations
310
+ proxy_connect_timeout 10s;
311
+ }
312
+ }
313
+ ```
314
+
315
+ ## Autoscaling
316
+
317
+ ### Horizontal Pod Autoscaler (HPA)
318
+
319
+ ```yaml
320
+ apiVersion: autoscaling/v2
321
+ kind: HorizontalPodAutoscaler
322
+ metadata:
323
+ name: tensorrt-llm-hpa
324
+ spec:
325
+ scaleTargetRef:
326
+ apiVersion: apps/v1
327
+ kind: Deployment
328
+ name: tensorrt-llm
329
+ minReplicas: 2
330
+ maxReplicas: 10
331
+ metrics:
332
+ - type: Pods
333
+ pods:
334
+ metric:
335
+ name: trtllm_active_requests
336
+ target:
337
+ type: AverageValue
338
+ averageValue: "50" # Scale when avg >50 active requests
339
+ ```
340
+
341
+ ### Custom Metrics
342
+
343
+ ```yaml
344
+ # Scale based on queue size
345
+ - type: Pods
346
+ pods:
347
+ metric:
348
+ name: trtllm_queue_size
349
+ target:
350
+ type: AverageValue
351
+ averageValue: "10"
352
+ ```
353
+
354
+ ## Cost Optimization
355
+
356
+ ### GPU Selection
357
+
358
+ **A100 80GB** ($3-4/hour):
359
+ - Use for: 70B models with FP8
360
+ - Throughput: 10,000-15,000 tok/s (TP=4)
361
+ - Cost per 1M tokens: $0.20-0.30
362
+
363
+ **H100 80GB** ($6-8/hour):
364
+ - Use for: 70B models with FP8, 405B models
365
+ - Throughput: 20,000-30,000 tok/s (TP=4)
366
+ - Cost per 1M tokens: $0.15-0.25 (2× faster = lower cost)
367
+
368
+ **L4** ($0.50-1/hour):
369
+ - Use for: 7-8B models
370
+ - Throughput: 1,000-2,000 tok/s
371
+ - Cost per 1M tokens: $0.25-0.50
372
+
373
+ ### Batch Size Tuning
374
+
375
+ **Impact on cost**:
376
+ - Batch size 1: 1,000 tok/s → $3/hour per 1M = $3/M tokens
377
+ - Batch size 64: 5,000 tok/s → $3/hour per 5M = $0.60/M tokens
378
+ - **5× cost reduction** with batching
379
+
380
+ **Recommendation**: Target batch size 32-128 for cost efficiency.
381
+
382
+ ## Security
383
+
384
+ ### API Authentication
385
+
386
+ ```bash
387
+ # Generate API key
388
+ export API_KEY=$(openssl rand -hex 32)
389
+
390
+ # Start server with authentication
391
+ trtllm-serve meta-llama/Meta-Llama-3-8B \
392
+ --api_key $API_KEY
393
+
394
+ # Client request
395
+ curl -X POST http://localhost:8000/v1/chat/completions \
396
+ -H "Authorization: Bearer $API_KEY" \
397
+ -H "Content-Type: application/json" \
398
+ -d '{"model": "...", "messages": [...]}'
399
+ ```
400
+
401
+ ### Network Policies
402
+
403
+ ```yaml
404
+ apiVersion: networking.k8s.io/v1
405
+ kind: NetworkPolicy
406
+ metadata:
407
+ name: tensorrt-llm-policy
408
+ spec:
409
+ podSelector:
410
+ matchLabels:
411
+ app: tensorrt-llm
412
+ policyTypes:
413
+ - Ingress
414
+ ingress:
415
+ - from:
416
+ - podSelector:
417
+ matchLabels:
418
+ app: api-gateway # Only allow from gateway
419
+ ports:
420
+ - protocol: TCP
421
+ port: 8000
422
+ ```
423
+
424
+ ## Troubleshooting
425
+
426
+ ### High latency
427
+
428
+ **Diagnosis**:
429
+ ```bash
430
+ # Check queue size
431
+ curl http://localhost:9090/metrics | grep queue_size
432
+
433
+ # Check active requests
434
+ curl http://localhost:9090/metrics | grep active_requests
435
+ ```
436
+
437
+ **Solutions**:
438
+ - Scale horizontally (more replicas)
439
+ - Increase batch size (if GPU underutilized)
440
+ - Enable chunked context (if long prompts)
441
+ - Use FP8 quantization
442
+
443
+ ### OOM crashes
444
+
445
+ **Solutions**:
446
+ - Reduce `max_batch_size`
447
+ - Reduce `max_num_tokens`
448
+ - Enable FP8 or INT4 quantization
449
+ - Increase `tensor_parallel_size`
450
+
451
+ ### Timeout errors
452
+
453
+ **NGINX config**:
454
+ ```nginx
455
+ proxy_read_timeout 600s; # 10 minutes for very long generations
456
+ proxy_send_timeout 600s;
457
+ ```
458
+
459
+ ## Best Practices
460
+
461
+ 1. **Use FP8 on H100** for 2× speedup and 50% cost reduction
462
+ 2. **Monitor metrics** - Set up Prometheus + Grafana
463
+ 3. **Set readiness probes** - Prevent routing to unhealthy pods
464
+ 4. **Use load balancing** - Distribute load across replicas
465
+ 5. **Tune batch size** - Balance latency and throughput
466
+ 6. **Enable streaming** - Better UX for chat applications
467
+ 7. **Set up autoscaling** - Handle traffic spikes
468
+ 8. **Use persistent volumes** - Cache compiled models
469
+ 9. **Implement retries** - Handle transient failures
470
+ 10. **Monitor costs** - Track cost per token