@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,326 @@
1
+ ---
2
+ name: ray-data
3
+ description: Scalable data processing for ML workloads. Streaming execution across CPU/GPU, supports Parquet/CSV/JSON/images. Integrates with Ray Train, PyTorch, TensorFlow. Scales from single machine to 100s of nodes. Use for batch inference, data preprocessing, multi-modal data loading, or distributed ETL pipelines.
4
+ version: 1.0.0
5
+ author: Synthetic Sciences
6
+ license: MIT
7
+ tags: [Data Processing, Ray Data, Distributed Computing, ML Pipelines, Batch Inference, ETL, Scalable, Ray, PyTorch, TensorFlow]
8
+ dependencies: [ray[data], pyarrow, pandas]
9
+ ---
10
+
11
+ # Ray Data - Scalable ML Data Processing
12
+
13
+ Distributed data processing library for ML and AI workloads.
14
+
15
+ ## When to use Ray Data
16
+
17
+ **Use Ray Data when:**
18
+ - Processing large datasets (>100GB) for ML training
19
+ - Need distributed data preprocessing across cluster
20
+ - Building batch inference pipelines
21
+ - Loading multi-modal data (images, audio, video)
22
+ - Scaling data processing from laptop to cluster
23
+
24
+ **Key features**:
25
+ - **Streaming execution**: Process data larger than memory
26
+ - **GPU support**: Accelerate transforms with GPUs
27
+ - **Framework integration**: PyTorch, TensorFlow, HuggingFace
28
+ - **Multi-modal**: Images, Parquet, CSV, JSON, audio, video
29
+
30
+ **Use alternatives instead**:
31
+ - **Pandas**: Small data (<1GB) on single machine
32
+ - **Dask**: Tabular data, SQL-like operations
33
+ - **Spark**: Enterprise ETL, SQL queries
34
+
35
+ ## Quick start
36
+
37
+ ### Installation
38
+
39
+ ```bash
40
+ pip install -U 'ray[data]'
41
+ ```
42
+
43
+ ### Load and transform data
44
+
45
+ ```python
46
+ import ray
47
+
48
+ # Read Parquet files
49
+ ds = ray.data.read_parquet("s3://bucket/data/*.parquet")
50
+
51
+ # Transform data (lazy execution)
52
+ ds = ds.map_batches(lambda batch: {"processed": batch["text"].str.lower()})
53
+
54
+ # Consume data
55
+ for batch in ds.iter_batches(batch_size=100):
56
+ print(batch)
57
+ ```
58
+
59
+ ### Integration with Ray Train
60
+
61
+ ```python
62
+ import ray
63
+ from ray.train import ScalingConfig
64
+ from ray.train.torch import TorchTrainer
65
+
66
+ # Create dataset
67
+ train_ds = ray.data.read_parquet("s3://bucket/train/*.parquet")
68
+
69
+ def train_func(config):
70
+ # Access dataset in training
71
+ train_ds = ray.train.get_dataset_shard("train")
72
+
73
+ for epoch in range(10):
74
+ for batch in train_ds.iter_batches(batch_size=32):
75
+ # Train on batch
76
+ pass
77
+
78
+ # Train with Ray
79
+ trainer = TorchTrainer(
80
+ train_func,
81
+ datasets={"train": train_ds},
82
+ scaling_config=ScalingConfig(num_workers=4, use_gpu=True)
83
+ )
84
+ trainer.fit()
85
+ ```
86
+
87
+ ## Reading data
88
+
89
+ ### From cloud storage
90
+
91
+ ```python
92
+ import ray
93
+
94
+ # Parquet (recommended for ML)
95
+ ds = ray.data.read_parquet("s3://bucket/data/*.parquet")
96
+
97
+ # CSV
98
+ ds = ray.data.read_csv("s3://bucket/data/*.csv")
99
+
100
+ # JSON
101
+ ds = ray.data.read_json("gs://bucket/data/*.json")
102
+
103
+ # Images
104
+ ds = ray.data.read_images("s3://bucket/images/")
105
+ ```
106
+
107
+ ### From Python objects
108
+
109
+ ```python
110
+ # From list
111
+ ds = ray.data.from_items([{"id": i, "value": i * 2} for i in range(1000)])
112
+
113
+ # From range
114
+ ds = ray.data.range(1000000) # Synthetic data
115
+
116
+ # From pandas
117
+ import pandas as pd
118
+ df = pd.DataFrame({"col1": [1, 2, 3], "col2": [4, 5, 6]})
119
+ ds = ray.data.from_pandas(df)
120
+ ```
121
+
122
+ ## Transformations
123
+
124
+ ### Map batches (vectorized)
125
+
126
+ ```python
127
+ # Batch transformation (fast)
128
+ def process_batch(batch):
129
+ batch["doubled"] = batch["value"] * 2
130
+ return batch
131
+
132
+ ds = ds.map_batches(process_batch, batch_size=1000)
133
+ ```
134
+
135
+ ### Row transformations
136
+
137
+ ```python
138
+ # Row-by-row (slower)
139
+ def process_row(row):
140
+ row["squared"] = row["value"] ** 2
141
+ return row
142
+
143
+ ds = ds.map(process_row)
144
+ ```
145
+
146
+ ### Filter
147
+
148
+ ```python
149
+ # Filter rows
150
+ ds = ds.filter(lambda row: row["value"] > 100)
151
+ ```
152
+
153
+ ### Group by and aggregate
154
+
155
+ ```python
156
+ # Group by column
157
+ ds = ds.groupby("category").count()
158
+
159
+ # Custom aggregation
160
+ ds = ds.groupby("category").map_groups(lambda group: {"sum": group["value"].sum()})
161
+ ```
162
+
163
+ ## GPU-accelerated transforms
164
+
165
+ ```python
166
+ # Use GPU for preprocessing
167
+ def preprocess_images_gpu(batch):
168
+ import torch
169
+ images = torch.tensor(batch["image"]).cuda()
170
+ # GPU preprocessing
171
+ processed = images * 255
172
+ return {"processed": processed.cpu().numpy()}
173
+
174
+ ds = ds.map_batches(
175
+ preprocess_images_gpu,
176
+ batch_size=64,
177
+ num_gpus=1 # Request GPU
178
+ )
179
+ ```
180
+
181
+ ## Writing data
182
+
183
+ ```python
184
+ # Write to Parquet
185
+ ds.write_parquet("s3://bucket/output/")
186
+
187
+ # Write to CSV
188
+ ds.write_csv("output/")
189
+
190
+ # Write to JSON
191
+ ds.write_json("output/")
192
+ ```
193
+
194
+ ## Performance optimization
195
+
196
+ ### Repartition
197
+
198
+ ```python
199
+ # Control parallelism
200
+ ds = ds.repartition(100) # 100 blocks for 100-core cluster
201
+ ```
202
+
203
+ ### Batch size tuning
204
+
205
+ ```python
206
+ # Larger batches = faster vectorized ops
207
+ ds.map_batches(process_fn, batch_size=10000) # vs batch_size=100
208
+ ```
209
+
210
+ ### Streaming execution
211
+
212
+ ```python
213
+ # Process data larger than memory
214
+ ds = ray.data.read_parquet("s3://huge-dataset/")
215
+ for batch in ds.iter_batches(batch_size=1000):
216
+ process(batch) # Streamed, not loaded to memory
217
+ ```
218
+
219
+ ## Common patterns
220
+
221
+ ### Batch inference
222
+
223
+ ```python
224
+ import ray
225
+
226
+ # Load model
227
+ def load_model():
228
+ # Load once per worker
229
+ return MyModel()
230
+
231
+ # Inference function
232
+ class BatchInference:
233
+ def __init__(self):
234
+ self.model = load_model()
235
+
236
+ def __call__(self, batch):
237
+ predictions = self.model(batch["input"])
238
+ return {"prediction": predictions}
239
+
240
+ # Run distributed inference
241
+ ds = ray.data.read_parquet("s3://data/")
242
+ predictions = ds.map_batches(BatchInference, batch_size=32, num_gpus=1)
243
+ predictions.write_parquet("s3://output/")
244
+ ```
245
+
246
+ ### Data preprocessing pipeline
247
+
248
+ ```python
249
+ # Multi-step pipeline
250
+ ds = (
251
+ ray.data.read_parquet("s3://raw/")
252
+ .map_batches(clean_data)
253
+ .map_batches(tokenize)
254
+ .map_batches(augment)
255
+ .write_parquet("s3://processed/")
256
+ )
257
+ ```
258
+
259
+ ## Integration with ML frameworks
260
+
261
+ ### PyTorch
262
+
263
+ ```python
264
+ # Convert to PyTorch
265
+ torch_ds = ds.to_torch(label_column="label", batch_size=32)
266
+
267
+ for batch in torch_ds:
268
+ # batch is dict with tensors
269
+ inputs, labels = batch["features"], batch["label"]
270
+ ```
271
+
272
+ ### TensorFlow
273
+
274
+ ```python
275
+ # Convert to TensorFlow
276
+ tf_ds = ds.to_tf(feature_columns=["image"], label_column="label", batch_size=32)
277
+
278
+ for features, labels in tf_ds:
279
+ # Train model
280
+ pass
281
+ ```
282
+
283
+ ## Supported data formats
284
+
285
+ | Format | Read | Write | Use Case |
286
+ |--------|------|-------|----------|
287
+ | Parquet | ✅ | ✅ | ML data (recommended) |
288
+ | CSV | ✅ | ✅ | Tabular data |
289
+ | JSON | ✅ | ✅ | Semi-structured |
290
+ | Images | ✅ | ❌ | Computer vision |
291
+ | NumPy | ✅ | ✅ | Arrays |
292
+ | Pandas | ✅ | ❌ | DataFrames |
293
+
294
+ ## Performance benchmarks
295
+
296
+ **Scaling** (processing 100GB data):
297
+ - 1 node (16 cores): ~30 minutes
298
+ - 4 nodes (64 cores): ~8 minutes
299
+ - 16 nodes (256 cores): ~2 minutes
300
+
301
+ **GPU acceleration** (image preprocessing):
302
+ - CPU only: 1,000 images/sec
303
+ - 1 GPU: 5,000 images/sec
304
+ - 4 GPUs: 18,000 images/sec
305
+
306
+ ## Use cases
307
+
308
+ **Production deployments**:
309
+ - **Pinterest**: Last-mile data processing for model training
310
+ - **ByteDance**: Scaling offline inference with multi-modal LLMs
311
+ - **Spotify**: ML platform for batch inference
312
+
313
+ ## References
314
+
315
+ - **[Transformations Guide](references/transformations.md)** - Map, filter, groupby operations
316
+ - **[Integration Guide](references/integration.md)** - Ray Train, PyTorch, TensorFlow
317
+
318
+ ## Resources
319
+
320
+ - **Docs**: https://docs.ray.io/en/latest/data/data.html
321
+ - **GitHub**: https://github.com/ray-project/ray ⭐ 36,000+
322
+ - **Version**: Ray 2.40.0+
323
+ - **Examples**: https://docs.ray.io/en/latest/data/examples/overview.html
324
+
325
+
326
+
@@ -0,0 +1,82 @@
1
+ # Ray Data Integration Guide
2
+
3
+ Integration with Ray Train and ML frameworks.
4
+
5
+ ## Ray Train integration
6
+
7
+ ### Basic training with datasets
8
+
9
+ ```python
10
+ import ray
11
+ from ray.train import ScalingConfig
12
+ from ray.train.torch import TorchTrainer
13
+
14
+ # Create datasets
15
+ train_ds = ray.data.read_parquet("s3://data/train/")
16
+ val_ds = ray.data.read_parquet("s3://data/val/")
17
+
18
+ def train_func(config):
19
+ # Get dataset shards
20
+ train_ds = ray.train.get_dataset_shard("train")
21
+ val_ds = ray.train.get_dataset_shard("val")
22
+
23
+ for epoch in range(config["epochs"]):
24
+ # Iterate over batches
25
+ for batch in train_ds.iter_batches(batch_size=32):
26
+ # Train on batch
27
+ pass
28
+
29
+ # Launch training
30
+ trainer = TorchTrainer(
31
+ train_func,
32
+ train_loop_config={"epochs": 10},
33
+ datasets={"train": train_ds, "val": val_ds},
34
+ scaling_config=ScalingConfig(num_workers=4, use_gpu=True)
35
+ )
36
+
37
+ result = trainer.fit()
38
+ ```
39
+
40
+ ## PyTorch integration
41
+
42
+ ### Convert to PyTorch Dataset
43
+
44
+ ```python
45
+ # Option 1: to_torch (recommended)
46
+ torch_ds = ds.to_torch(
47
+ label_column="label",
48
+ batch_size=32,
49
+ drop_last=True
50
+ )
51
+
52
+ for batch in torch_ds:
53
+ inputs = batch["features"]
54
+ labels = batch["label"]
55
+ # Train model
56
+
57
+ # Option 2: iter_torch_batches
58
+ for batch in ds.iter_torch_batches(batch_size=32):
59
+ # batch is dict of tensors
60
+ pass
61
+ ```
62
+
63
+ ## TensorFlow integration
64
+
65
+ ```python
66
+ tf_ds = ds.to_tf(
67
+ feature_columns=["image", "text"],
68
+ label_column="label",
69
+ batch_size=32
70
+ )
71
+
72
+ for features, labels in tf_ds:
73
+ # Train TensorFlow model
74
+ pass
75
+ ```
76
+
77
+ ## Best practices
78
+
79
+ 1. **Shard datasets in Ray Train** - Automatic with `get_dataset_shard()`
80
+ 2. **Use streaming** - Don't load entire dataset to memory
81
+ 3. **Preprocess in Ray Data** - Distribute preprocessing across cluster
82
+ 4. **Cache preprocessed data** - Write to Parquet, read in training
@@ -0,0 +1,83 @@
1
+ # Ray Data Transformations
2
+
3
+ Complete guide to data transformations in Ray Data.
4
+
5
+ ## Core operations
6
+
7
+ ### Map batches (vectorized)
8
+
9
+ ```python
10
+ # Recommended for performance
11
+ def process_batch(batch):
12
+ # batch is dict of numpy arrays or pandas Series
13
+ batch["doubled"] = batch["value"] * 2
14
+ return batch
15
+
16
+ ds = ds.map_batches(process_batch, batch_size=1000)
17
+ ```
18
+
19
+ **Performance**: 10-100× faster than row-by-row
20
+
21
+ ### Map (row-by-row)
22
+
23
+ ```python
24
+ # Use only when vectorization not possible
25
+ def process_row(row):
26
+ row["squared"] = row["value"] ** 2
27
+ return row
28
+
29
+ ds = ds.map(process_row)
30
+ ```
31
+
32
+ ### Filter
33
+
34
+ ```python
35
+ # Remove rows
36
+ ds = ds.filter(lambda row: row["score"] > 0.5)
37
+ ```
38
+
39
+ ### Flat map
40
+
41
+ ```python
42
+ # One row → multiple rows
43
+ def expand_row(row):
44
+ return [{"value": row["value"] + i} for i in range(3)]
45
+
46
+ ds = ds.flat_map(expand_row)
47
+ ```
48
+
49
+ ## GPU-accelerated transforms
50
+
51
+ ```python
52
+ def gpu_transform(batch):
53
+ import torch
54
+ data = torch.tensor(batch["data"]).cuda()
55
+ # GPU processing
56
+ result = data * 2
57
+ return {"processed": result.cpu().numpy()}
58
+
59
+ ds = ds.map_batches(gpu_transform, num_gpus=1, batch_size=64)
60
+ ```
61
+
62
+ ## Groupby operations
63
+
64
+ ```python
65
+ # Group by column
66
+ grouped = ds.groupby("category")
67
+
68
+ # Aggregate
69
+ result = grouped.count()
70
+
71
+ # Custom aggregation
72
+ result = grouped.map_groups(lambda group: {
73
+ "sum": group["value"].sum(),
74
+ "mean": group["value"].mean()
75
+ })
76
+ ```
77
+
78
+ ## Best practices
79
+
80
+ 1. **Use map_batches over map** - 10-100× faster
81
+ 2. **Tune batch_size** - Larger = faster (balance with memory)
82
+ 3. **Use GPUs for heavy compute** - Image/audio preprocessing
83
+ 4. **Stream large datasets** - Use iter_batches for >memory data