@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,475 @@
1
+ # Troubleshooting Guide
2
+
3
+ Common issues and solutions for Hugging Face Jobs.
4
+
5
+ ## Authentication Issues
6
+
7
+ ### Error: 401 Unauthorized
8
+
9
+ **Symptoms:**
10
+ ```
11
+ 401 Client Error: Unauthorized for url: https://huggingface.co/api/...
12
+ ```
13
+
14
+ **Causes:**
15
+ - Token missing from job
16
+ - Token invalid or expired
17
+ - Token not passed correctly
18
+
19
+ **Solutions:**
20
+ 1. Add `secrets={"HF_TOKEN": "$HF_TOKEN"}` to job config
21
+ 2. Verify `hf_whoami()` works locally
22
+ 3. Re-login: `hf auth login`
23
+ 4. Check token hasn't expired
24
+
25
+ **Verification:**
26
+ ```python
27
+ # In your script
28
+ import os
29
+ assert "HF_TOKEN" in os.environ, "HF_TOKEN missing!"
30
+ ```
31
+
32
+ ### Error: 403 Forbidden
33
+
34
+ **Symptoms:**
35
+ ```
36
+ 403 Client Error: Forbidden for url: https://huggingface.co/api/...
37
+ ```
38
+
39
+ **Causes:**
40
+ - Token lacks required permissions
41
+ - No access to private repository
42
+ - Organization permissions insufficient
43
+
44
+ **Solutions:**
45
+ 1. Ensure token has write permissions
46
+ 2. Check token type at https://huggingface.co/settings/tokens
47
+ 3. Verify access to target repository
48
+ 4. Use organization token if needed
49
+
50
+ ### Error: Token not found in environment
51
+
52
+ **Symptoms:**
53
+ ```
54
+ KeyError: 'HF_TOKEN'
55
+ ValueError: HF_TOKEN not found
56
+ ```
57
+
58
+ **Causes:**
59
+ - `secrets` not passed in job config
60
+ - Wrong key name (should be `HF_TOKEN`)
61
+ - Using `env` instead of `secrets`
62
+
63
+ **Solutions:**
64
+ 1. Use `secrets={"HF_TOKEN": "$HF_TOKEN"}` (not `env`)
65
+ 2. Verify key name is exactly `HF_TOKEN`
66
+ 3. Check job config syntax
67
+
68
+ ## Job Execution Issues
69
+
70
+ ### Error: Job Timeout
71
+
72
+ **Symptoms:**
73
+ - Job stops unexpectedly
74
+ - Status shows "TIMEOUT"
75
+ - Partial results only
76
+
77
+ **Causes:**
78
+ - Default 30min timeout exceeded
79
+ - Job takes longer than expected
80
+ - No timeout specified
81
+
82
+ **Solutions:**
83
+ 1. Check logs for actual runtime
84
+ 2. Increase timeout with buffer: `"timeout": "3h"`
85
+ 3. Optimize code for faster execution
86
+ 4. Process data in chunks
87
+ 5. Add 20-30% buffer to estimated time
88
+
89
+ **MCP Tool Example:**
90
+ ```python
91
+ hf_jobs("uv", {
92
+ "script": "...",
93
+ "timeout": "2h" # Set appropriate timeout
94
+ })
95
+ ```
96
+
97
+ **Python API Example:**
98
+ ```python
99
+ from huggingface_hub import run_uv_job, inspect_job, fetch_job_logs
100
+
101
+ job = run_uv_job("script.py", timeout="4h")
102
+
103
+ # Check if job failed
104
+ job_info = inspect_job(job_id=job.id)
105
+ if job_info.status.stage == "ERROR":
106
+ print(f"Job failed: {job_info.status.message}")
107
+ # Check logs for details
108
+ for log in fetch_job_logs(job_id=job.id):
109
+ print(log)
110
+ ```
111
+
112
+ ### Error: Out of Memory (OOM)
113
+
114
+ **Symptoms:**
115
+ ```
116
+ RuntimeError: CUDA out of memory
117
+ MemoryError: Unable to allocate array
118
+ ```
119
+
120
+ **Causes:**
121
+ - Batch size too large
122
+ - Model too large for hardware
123
+ - Insufficient GPU memory
124
+
125
+ **Solutions:**
126
+ 1. Reduce batch size
127
+ 2. Process data in smaller chunks
128
+ 3. Upgrade hardware: cpu → t4 → a10g → a100
129
+ 4. Use smaller models or quantization
130
+ 5. Enable gradient checkpointing (for training)
131
+
132
+ **Example:**
133
+ ```python
134
+ # Reduce batch size
135
+ batch_size = 1
136
+
137
+ # Process in chunks
138
+ for chunk in chunks:
139
+ process(chunk)
140
+ ```
141
+
142
+ ### Error: Missing Dependencies
143
+
144
+ **Symptoms:**
145
+ ```
146
+ ModuleNotFoundError: No module named 'package_name'
147
+ ImportError: cannot import name 'X'
148
+ ```
149
+
150
+ **Causes:**
151
+ - Package not in dependencies
152
+ - Wrong package name
153
+ - Version mismatch
154
+
155
+ **Solutions:**
156
+ 1. Add to PEP 723 header:
157
+ ```python
158
+ # /// script
159
+ # dependencies = ["package-name>=1.0.0"]
160
+ # ///
161
+ ```
162
+ 2. Check package name spelling
163
+ 3. Specify version if needed
164
+ 4. Check package availability
165
+
166
+ ### Error: Script Not Found
167
+
168
+ **Symptoms:**
169
+ ```
170
+ FileNotFoundError: script.py not found
171
+ ```
172
+
173
+ **Causes:**
174
+ - Local file path used (not supported)
175
+ - URL incorrect
176
+ - Script not accessible
177
+
178
+ **Solutions:**
179
+ 1. Use inline script (recommended)
180
+ 2. Use publicly accessible URL
181
+ 3. Upload script to Hub first
182
+ 4. Check URL is correct
183
+
184
+ **Correct approaches:**
185
+ ```python
186
+ # ✅ Inline code
187
+ hf_jobs("uv", {"script": "# /// script\n# dependencies = [...]\n# ///\n\n<code>"})
188
+
189
+ # ✅ From URL
190
+ hf_jobs("uv", {"script": "https://huggingface.co/user/repo/resolve/main/script.py"})
191
+ ```
192
+
193
+ ## Hub Push Issues
194
+
195
+ ### Error: Push Failed
196
+
197
+ **Symptoms:**
198
+ ```
199
+ Error pushing to Hub
200
+ Upload failed
201
+ ```
202
+
203
+ **Causes:**
204
+ - Network issues
205
+ - Token missing or invalid
206
+ - Repository access denied
207
+ - File too large
208
+
209
+ **Solutions:**
210
+ 1. Check token: `assert "HF_TOKEN" in os.environ`
211
+ 2. Verify repository exists or can be created
212
+ 3. Check network connectivity in logs
213
+ 4. Retry push operation
214
+ 5. Split large files into chunks
215
+
216
+ ### Error: Repository Not Found
217
+
218
+ **Symptoms:**
219
+ ```
220
+ 404 Client Error: Not Found
221
+ Repository not found
222
+ ```
223
+
224
+ **Causes:**
225
+ - Repository doesn't exist
226
+ - Wrong repository name
227
+ - No access to private repo
228
+
229
+ **Solutions:**
230
+ 1. Create repository first:
231
+ ```python
232
+ from huggingface_hub import HfApi
233
+ api = HfApi()
234
+ api.create_repo("username/repo-name", repo_type="dataset")
235
+ ```
236
+ 2. Check repository name format
237
+ 3. Verify namespace exists
238
+ 4. Check repository visibility
239
+
240
+ ### Error: Results Not Saved
241
+
242
+ **Symptoms:**
243
+ - Job completes successfully
244
+ - No results visible on Hub
245
+ - Files not persisted
246
+
247
+ **Causes:**
248
+ - No persistence code in script
249
+ - Push code not executed
250
+ - Push failed silently
251
+
252
+ **Solutions:**
253
+ 1. Add persistence code to script
254
+ 2. Verify push executes successfully
255
+ 3. Check logs for push errors
256
+ 4. Add error handling around push
257
+
258
+ **Example:**
259
+ ```python
260
+ try:
261
+ dataset.push_to_hub("username/dataset")
262
+ print("✅ Push successful")
263
+ except Exception as e:
264
+ print(f"❌ Push failed: {e}")
265
+ raise
266
+ ```
267
+
268
+ ## Hardware Issues
269
+
270
+ ### Error: GPU Not Available
271
+
272
+ **Symptoms:**
273
+ ```
274
+ CUDA not available
275
+ No GPU found
276
+ ```
277
+
278
+ **Causes:**
279
+ - CPU flavor used instead of GPU
280
+ - GPU not requested
281
+ - CUDA not installed in image
282
+
283
+ **Solutions:**
284
+ 1. Use GPU flavor: `"flavor": "a10g-large"`
285
+ 2. Check image has CUDA support
286
+ 3. Verify GPU availability in logs
287
+
288
+ ### Error: Slow Performance
289
+
290
+ **Symptoms:**
291
+ - Job takes longer than expected
292
+ - Low GPU utilization
293
+ - CPU bottleneck
294
+
295
+ **Causes:**
296
+ - Wrong hardware selected
297
+ - Inefficient code
298
+ - Data loading bottleneck
299
+
300
+ **Solutions:**
301
+ 1. Upgrade hardware
302
+ 2. Optimize code
303
+ 3. Use batch processing
304
+ 4. Profile code to find bottlenecks
305
+
306
+ ## General Issues
307
+
308
+ ### Error: Job Status Unknown
309
+
310
+ **Symptoms:**
311
+ - Can't check job status
312
+ - Status API returns error
313
+
314
+ **Solutions:**
315
+ 1. Use job URL: `https://huggingface.co/jobs/username/job-id`
316
+ 2. Check logs: `hf_jobs("logs", {"job_id": "..."})`
317
+ 3. Inspect job: `hf_jobs("inspect", {"job_id": "..."})`
318
+
319
+ ### Error: Logs Not Available
320
+
321
+ **Symptoms:**
322
+ - No logs visible
323
+ - Logs delayed
324
+
325
+ **Causes:**
326
+ - Job just started (logs delayed 30-60s)
327
+ - Job failed before logging
328
+ - Logs not yet generated
329
+
330
+ **Solutions:**
331
+ 1. Wait 30-60 seconds after job start
332
+ 2. Check job status first
333
+ 3. Use job URL for web interface
334
+
335
+ ### Error: Cost Unexpectedly High
336
+
337
+ **Symptoms:**
338
+ - Job costs more than expected
339
+ - Longer runtime than estimated
340
+
341
+ **Causes:**
342
+ - Job ran longer than timeout
343
+ - Wrong hardware selected
344
+ - Inefficient code
345
+
346
+ **Solutions:**
347
+ 1. Monitor job runtime
348
+ 2. Set appropriate timeout
349
+ 3. Optimize code
350
+ 4. Choose right hardware
351
+ 5. Check cost estimates before running
352
+
353
+ ## Debugging Tips
354
+
355
+ ### 1. Add Logging
356
+
357
+ ```python
358
+ import logging
359
+ logging.basicConfig(level=logging.INFO)
360
+ logger = logging.getLogger(__name__)
361
+
362
+ logger.info("Starting processing...")
363
+ logger.info(f"Processed {count} items")
364
+ ```
365
+
366
+ ### 2. Verify Environment
367
+
368
+ ```python
369
+ import os
370
+ print(f"Python version: {os.sys.version}")
371
+ print(f"CUDA available: {torch.cuda.is_available()}")
372
+ print(f"HF_TOKEN present: {'HF_TOKEN' in os.environ}")
373
+ ```
374
+
375
+ ### 3. Test Locally First
376
+
377
+ Run script locally before submitting to catch errors early:
378
+ ```bash
379
+ python script.py
380
+ # Or with uv
381
+ uv run script.py
382
+ ```
383
+
384
+ ### 4. Check Job Logs
385
+
386
+ **MCP Tool:**
387
+ ```python
388
+ # View logs
389
+ hf_jobs("logs", {"job_id": "your-job-id"})
390
+ ```
391
+
392
+ **CLI:**
393
+ ```bash
394
+ hf jobs logs <job-id>
395
+ ```
396
+
397
+ **Python API:**
398
+ ```python
399
+ from huggingface_hub import fetch_job_logs
400
+ for log in fetch_job_logs(job_id="your-job-id"):
401
+ print(log)
402
+ ```
403
+
404
+ **Or use job URL:** `https://huggingface.co/jobs/username/job-id`
405
+
406
+ ### 5. Add Error Handling
407
+
408
+ ```python
409
+ try:
410
+ # Your code
411
+ process_data()
412
+ except Exception as e:
413
+ print(f"Error: {e}")
414
+ import traceback
415
+ traceback.print_exc()
416
+ raise
417
+ ```
418
+
419
+ ### 6. Check Job Status Programmatically
420
+
421
+ ```python
422
+ from huggingface_hub import inspect_job, fetch_job_logs
423
+
424
+ job_info = inspect_job(job_id="your-job-id")
425
+ print(f"Status: {job_info.status.stage}")
426
+ print(f"Message: {job_info.status.message}")
427
+
428
+ if job_info.status.stage == "ERROR":
429
+ print("Job failed! Logs:")
430
+ for log in fetch_job_logs(job_id="your-job-id"):
431
+ print(log)
432
+ ```
433
+
434
+ ## Quick Reference
435
+
436
+ ### Common Error Codes
437
+
438
+ | Code | Meaning | Solution |
439
+ |------|---------|----------|
440
+ | 401 | Unauthorized | Add `secrets={"HF_TOKEN": "$HF_TOKEN"}` |
441
+ | 403 | Forbidden | Check token permissions |
442
+ | 404 | Not Found | Verify repository exists |
443
+ | 500 | Server Error | Retry or contact support |
444
+
445
+ ### Checklist Before Submitting
446
+
447
+ - [ ] Token configured: `secrets={"HF_TOKEN": "$HF_TOKEN"}`
448
+ - [ ] Script checks for token: `assert "HF_TOKEN" in os.environ`
449
+ - [ ] Timeout set appropriately
450
+ - [ ] Hardware selected correctly
451
+ - [ ] Dependencies listed in PEP 723 header
452
+ - [ ] Persistence code included
453
+ - [ ] Error handling added
454
+ - [ ] Logging added for debugging
455
+
456
+ ## Getting Help
457
+
458
+ If issues persist:
459
+
460
+ 1. **Check logs** - Most errors include detailed messages
461
+ 2. **Review documentation** - See main SKILL.md
462
+ 3. **Check Hub status** - https://status.huggingface.co
463
+ 4. **Community forums** - https://discuss.huggingface.co
464
+ 5. **GitHub issues** - For bugs in huggingface_hub
465
+
466
+ ## Key Takeaways
467
+
468
+ 1. **Always include token** - `secrets={"HF_TOKEN": "$HF_TOKEN"}`
469
+ 2. **Set appropriate timeout** - Default 30min may be insufficient
470
+ 3. **Verify persistence** - Results won't persist without code
471
+ 4. **Check logs** - Most issues visible in job logs
472
+ 5. **Test locally** - Catch errors before submitting
473
+ 6. **Add error handling** - Better debugging information
474
+ 7. **Monitor costs** - Set timeouts to avoid unexpected charges
475
+