@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,386 @@
1
+ # RWKV-7: Latest Improvements (March 2025)
2
+
3
+ ## Overview
4
+
5
+ RWKV-7 is the latest version released in March 2025, introducing multimodal capabilities and improved scaling to 14B+ parameters.
6
+
7
+ **Paper**: https://arxiv.org/abs/2503.14456 (March 2025)
8
+
9
+ ## Key Improvements Over RWKV-6
10
+
11
+ ### 1. Enhanced Numerical Stability
12
+
13
+ **Problem in RWKV-6**:
14
+ ```python
15
+ # Exponential operations could overflow for large models
16
+ att_aa = exp(w) * att_aa + k * v # Overflow risk!
17
+ ```
18
+
19
+ **RWKV-7 Solution**:
20
+ ```python
21
+ # Log-space computation with safe exponentiation
22
+ log_att_aa = log_softmax([log(k * v), log_w + log(att_aa)])
23
+ att_aa = exp(log_att_aa)
24
+ ```
25
+
26
+ **Result**: Stable training up to 14B parameters (RWKV-6 struggled beyond 7B)
27
+
28
+ ### 2. Improved Time-Decay Initialization
29
+
30
+ **RWKV-6**:
31
+ ```python
32
+ # Simple logarithmic spacing
33
+ time_decay[i] = -5.0 + 8.0 * (i / n_layers)
34
+ ```
35
+
36
+ **RWKV-7**:
37
+ ```python
38
+ # Adaptive per-head decay with better range
39
+ for layer in range(n_layers):
40
+ for head in range(n_heads):
41
+ # Different heads specialize in different timescales
42
+ alpha = (layer / n_layers) ** 0.7 # Non-linear progression
43
+ beta = (head / n_heads) * 0.5
44
+ time_decay[layer, head] = -6.0 + 9.0 * alpha + beta
45
+
46
+ # Result: Better long/short-term memory balance
47
+ ```
48
+
49
+ **Impact**: 15-20% perplexity improvement on long-context tasks
50
+
51
+ ### 3. Multi-Head Time-Mixing Refinements
52
+
53
+ **RWKV-6 Multi-Head**:
54
+ ```python
55
+ # Simple concatenation
56
+ heads = [head_i(x) for head_i in heads]
57
+ output = concat(heads)
58
+ ```
59
+
60
+ **RWKV-7 Multi-Head**:
61
+ ```python
62
+ # Attention-style output projection
63
+ heads = [head_i(x) for head_i in heads]
64
+ concat_heads = concat(heads)
65
+ output = output_proj(concat_heads) # Learnable mixing
66
+
67
+ # Plus: Per-head layer norm
68
+ for i, head in enumerate(heads):
69
+ heads[i] = head_norm[i](head) # Separate norm per head
70
+ ```
71
+
72
+ **Result**: Better head specialization, 8-12% quality improvement
73
+
74
+ ### 4. Rotary Position Encoding (RoPE) Integration
75
+
76
+ **New in RWKV-7**:
77
+ ```python
78
+ class RWKV7_TimeMix(nn.Module):
79
+ def __init__(self, d_model, n_heads):
80
+ super().__init__()
81
+ self.rope = RotaryEmbedding(d_model // n_heads)
82
+
83
+ def forward(self, x):
84
+ k = self.key(x) # (B, T, d_model)
85
+ v = self.value(x)
86
+
87
+ # Apply RoPE to keys
88
+ k = self.rope.rotate_queries_or_keys(k)
89
+
90
+ # WKV with position-aware keys
91
+ wkv = self.wkv(k, v)
92
+ return wkv
93
+ ```
94
+
95
+ **Why useful**: Improves positional awareness without breaking O(n) complexity
96
+
97
+ ### 5. RWKV-7 Block Structure
98
+
99
+ ```python
100
+ class RWKV7_Block(nn.Module):
101
+ def __init__(self, d_model, n_heads):
102
+ super().__init__()
103
+ self.ln1 = nn.LayerNorm(d_model)
104
+ self.ln2 = nn.LayerNorm(d_model)
105
+
106
+ # Multi-head time-mixing with RoPE
107
+ self.att = RWKV7_MultiHeadTimeMix(d_model, n_heads)
108
+
109
+ # Enhanced channel-mixing
110
+ self.ffn = RWKV7_ChannelMix(d_model, hidden_ratio=3.5) # Larger FFN
111
+
112
+ def forward(self, x, state):
113
+ # Pre-norm (like GPT)
114
+ att_out, new_state = self.att(self.ln1(x), state)
115
+ x = x + att_out
116
+
117
+ # FFN with gating
118
+ ffn_out = self.ffn(self.ln2(x))
119
+ x = x + ffn_out
120
+
121
+ return x, new_state
122
+ ```
123
+
124
+ ## Multimodal Capabilities
125
+
126
+ ### Vision Encoder Integration
127
+
128
+ **Architecture**:
129
+ ```python
130
+ class RWKV7_Multimodal(nn.Module):
131
+ def __init__(self):
132
+ super().__init__()
133
+ # Vision encoder (CLIP-style)
134
+ self.vision_encoder = VisionTransformer(
135
+ patch_size=14,
136
+ d_model=1024,
137
+ n_layers=24
138
+ )
139
+
140
+ # Projection to RWKV space
141
+ self.vision_proj = nn.Linear(1024, d_model)
142
+
143
+ # RWKV language model
144
+ self.rwkv = RWKV7_LanguageModel(d_model=2560, n_layers=40)
145
+
146
+ def forward(self, image, text, state=None):
147
+ # Encode image to patches
148
+ vision_tokens = self.vision_encoder(image) # (B, 256, 1024)
149
+ vision_tokens = self.vision_proj(vision_tokens) # (B, 256, 2560)
150
+
151
+ # Concatenate vision and text tokens
152
+ combined = torch.cat([vision_tokens, text], dim=1)
153
+
154
+ # Process with RWKV
155
+ out, state = self.rwkv(combined, state)
156
+
157
+ return out, state
158
+ ```
159
+
160
+ ### Vision-Language Tasks
161
+
162
+ **Image Captioning**:
163
+ ```python
164
+ model = RWKV7_Multimodal()
165
+
166
+ # Encode image
167
+ image = load_image('cat.jpg')
168
+ vision_tokens = model.vision_encoder(image)
169
+
170
+ # Generate caption
171
+ state = None
172
+ _, state = model.rwkv(vision_tokens, state) # Process image
173
+
174
+ # Autoregressive caption generation
175
+ caption = []
176
+ for _ in range(max_length):
177
+ logits, state = model.rwkv(prev_token, state)
178
+ next_token = sample(logits)
179
+ caption.append(next_token)
180
+ ```
181
+
182
+ **VQA (Visual Question Answering)**:
183
+ ```python
184
+ # Question: "What color is the cat?"
185
+ question_tokens = tokenizer.encode("What color is the cat?")
186
+
187
+ # Process image + question
188
+ combined = torch.cat([vision_tokens, question_tokens], dim=1)
189
+ answer_logits, state = model.rwkv(combined, state)
190
+
191
+ # Answer: "orange"
192
+ ```
193
+
194
+ ### Training Multimodal RWKV-7
195
+
196
+ ```python
197
+ # Pretrain vision encoder (CLIP-style)
198
+ train_vision_encoder(image_text_pairs)
199
+
200
+ # Freeze vision encoder
201
+ model.vision_encoder.requires_grad_(False)
202
+
203
+ # Train projection + RWKV
204
+ for batch in multimodal_dataloader:
205
+ images, captions = batch
206
+
207
+ # Forward
208
+ vision_tokens = model.vision_encoder(images)
209
+ vision_tokens = model.vision_proj(vision_tokens)
210
+
211
+ logits, _ = model.rwkv(
212
+ torch.cat([vision_tokens, captions[:, :-1]], dim=1),
213
+ state=None
214
+ )
215
+
216
+ # Loss (next token prediction)
217
+ loss = F.cross_entropy(
218
+ logits[:, vision_tokens.shape[1]:].reshape(-1, vocab_size),
219
+ captions.reshape(-1)
220
+ )
221
+
222
+ loss.backward()
223
+ optimizer.step()
224
+ ```
225
+
226
+ ## Scaling to 14B Parameters
227
+
228
+ ### Model Configuration
229
+
230
+ | Model | Layers | d_model | n_heads | Params | Context | VRAM (FP16) |
231
+ |-------|--------|---------|---------|--------|---------|-------------|
232
+ | RWKV-7-1.5B | 24 | 2048 | 16 | 1.5B | Infinite | 3 GB |
233
+ | RWKV-7-3B | 32 | 2560 | 20 | 3B | Infinite | 6 GB |
234
+ | RWKV-7-7B | 32 | 4096 | 32 | 7B | Infinite | 14 GB |
235
+ | RWKV-7-14B | 40 | 5120 | 40 | 14B | Infinite | 28 GB |
236
+
237
+ ### Training Efficiency Improvements
238
+
239
+ **RWKV-6 Training (7B)**:
240
+ - Speed: 45K tokens/sec (8× A100)
241
+ - Memory: 38 GB per GPU (4K sequence)
242
+ - Stability: Occasional loss spikes
243
+
244
+ **RWKV-7 Training (14B)**:
245
+ - Speed: 52K tokens/sec (8× A100) - **15% faster**
246
+ - Memory: 42 GB per GPU (4K sequence) - **Better utilization**
247
+ - Stability: No loss spikes - **Improved stability**
248
+
249
+ **Key optimization**: Fused CUDA kernels for multi-head WKV
250
+
251
+ ### RWKV-7 vs GPT-3 (14B)
252
+
253
+ | Metric | RWKV-7-14B | GPT-3-13B | Advantage |
254
+ |--------|------------|-----------|-----------|
255
+ | Training Speed | 52K tok/s | 28K tok/s | 1.9× |
256
+ | Inference (2K ctx) | 6,100 tok/s | 1,800 tok/s | 3.4× |
257
+ | Inference (8K ctx) | 5,800 tok/s | 450 tok/s | **12.9×** |
258
+ | Memory (inference) | 28 GB | 52 GB | 1.9× |
259
+ | Perplexity (Pile) | 6.8 | 7.2 | +6% |
260
+
261
+ ## Production Use Cases
262
+
263
+ ### Microsoft Integration
264
+
265
+ **Windows Copilot** (Limited Release):
266
+ - Uses RWKV-7-3B for on-device inference
267
+ - 5-8× faster than GPT-2 with better quality
268
+ - Constant memory for infinite context
269
+
270
+ **Office 365** (Experimental):
271
+ - Document summarization with RWKV-7-7B
272
+ - Handles 100K+ token documents efficiently
273
+ - No KV cache storage needed
274
+
275
+ ### NVIDIA NeMo Support
276
+
277
+ **NeMo Guardrails with RWKV-7**:
278
+ ```python
279
+ from nemoguardrails import RailsConfig
280
+ from nemoguardrails.llm.providers import register_llm_provider
281
+
282
+ # Register RWKV-7 as LLM backend
283
+ register_llm_provider("rwkv7", RWKV7Provider)
284
+
285
+ config = RailsConfig.from_path("config/")
286
+ rails = LLMRails(config, llm_provider="rwkv7")
287
+
288
+ # Use for content moderation
289
+ response = rails.generate(user_input="...")
290
+ ```
291
+
292
+ ## Benchmarks (RWKV-7 vs RWKV-6)
293
+
294
+ ### Language Modeling
295
+
296
+ | Dataset | RWKV-6-7B | RWKV-7-7B | Improvement |
297
+ |---------|-----------|-----------|-------------|
298
+ | Pile (val) | 7.8 | 7.1 | +9% |
299
+ | C4 | 9.3 | 8.6 | +8% |
300
+ | WikiText-103 | 8.4 | 7.7 | +8% |
301
+ | Lambada | 11.2 | 9.8 | +13% |
302
+
303
+ ### Long-Context Tasks (32K context)
304
+
305
+ | Task | RWKV-6-7B | RWKV-7-7B | Improvement |
306
+ |------|-----------|-----------|-------------|
307
+ | QuALITY | 52.3 | 61.8 | +18% |
308
+ | Qasper | 38.1 | 46.7 | +23% |
309
+ | NarrativeQA | 41.2 | 49.5 | +20% |
310
+
311
+ **RWKV-7's improved time-decay** significantly helps long-context understanding
312
+
313
+ ### Multimodal Benchmarks
314
+
315
+ | Task | RWKV-7-7B | LLaVA-7B | BLIP-2-7B |
316
+ |------|-----------|----------|-----------|
317
+ | VQAv2 | 74.2 | 78.5 | 82.1 |
318
+ | GQA | 58.3 | 62.1 | 65.4 |
319
+ | TextVQA | 51.2 | 58.2 | 60.8 |
320
+ | COCO Caption | 118.3 | 125.7 | 132.4 |
321
+
322
+ **Note**: RWKV-7 competitive but not SOTA on vision (vision-focused models still better)
323
+
324
+ ## Migration from RWKV-6 to RWKV-7
325
+
326
+ ### Model Conversion
327
+
328
+ ```python
329
+ # Load RWKV-6 checkpoint
330
+ rwkv6_state = torch.load('rwkv6-7b.pth')
331
+
332
+ # Initialize RWKV-7 model
333
+ rwkv7_model = RWKV7_Model(d_model=4096, n_layers=32, n_heads=32)
334
+
335
+ # Convert weights (mostly compatible)
336
+ for key in rwkv6_state:
337
+ if 'time_mixing' in key:
338
+ # RWKV-7 uses multi-head, need to split
339
+ rwkv7_key = convert_key_to_multihead(key)
340
+ rwkv7_model.state_dict()[rwkv7_key].copy_(rwkv6_state[key])
341
+ else:
342
+ # Direct copy
343
+ rwkv7_model.state_dict()[key].copy_(rwkv6_state[key])
344
+
345
+ # Fine-tune on small dataset to adapt
346
+ finetune(rwkv7_model, small_dataset, epochs=1)
347
+ ```
348
+
349
+ ### State Compatibility
350
+
351
+ **RWKV-6 State**:
352
+ ```python
353
+ state_v6 = (att_aa, att_ab, att_x_prev, ffn_x_prev) # 4 components
354
+ ```
355
+
356
+ **RWKV-7 State** (Multi-head):
357
+ ```python
358
+ state_v7 = (
359
+ att_aa_heads, # (n_heads, d_model//n_heads)
360
+ att_ab_heads, # (n_heads, d_model//n_heads)
361
+ att_x_prev,
362
+ ffn_x_prev
363
+ ) # 4 components, but att_* are multi-head
364
+ ```
365
+
366
+ **Conversion**:
367
+ ```python
368
+ # Split RWKV-6 state into RWKV-7 multi-head state
369
+ def convert_state_v6_to_v7(state_v6, n_heads):
370
+ att_aa, att_ab, att_x_prev, ffn_x_prev = state_v6
371
+ d_head = att_aa.shape[-1] // n_heads
372
+
373
+ att_aa_heads = att_aa.view(-1, n_heads, d_head).transpose(0, 1)
374
+ att_ab_heads = att_ab.view(-1, n_heads, d_head).transpose(0, 1)
375
+
376
+ return (att_aa_heads, att_ab_heads, att_x_prev, ffn_x_prev)
377
+ ```
378
+
379
+ ## Resources
380
+
381
+ - **Paper**: https://arxiv.org/abs/2503.14456 (RWKV-7, March 2025)
382
+ - **GitHub**: https://github.com/BlinkDL/RWKV-LM (v7 branch)
383
+ - **Models**: https://huggingface.co/BlinkDL/rwkv-7-world
384
+ - **Multimodal Demo**: https://huggingface.co/spaces/BlinkDL/RWKV-7-Multimodal
385
+ - **Discord**: https://discord.gg/bDSBUMeFpc
386
+ - **Wiki**: https://wiki.rwkv.com/rwkv7