@synsci/cli-darwin-arm64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,615 @@
1
+ # Backend Configuration Guide
2
+
3
+ Complete guide to configuring Outlines with different model backends.
4
+
5
+ ## Table of Contents
6
+ - Local Models (Transformers, llama.cpp, vLLM)
7
+ - API Models (OpenAI)
8
+ - Performance Comparison
9
+ - Configuration Examples
10
+ - Production Deployment
11
+
12
+ ## Transformers (Hugging Face)
13
+
14
+ ### Basic Setup
15
+
16
+ ```python
17
+ import outlines
18
+
19
+ # Load model from Hugging Face
20
+ model = outlines.models.transformers("microsoft/Phi-3-mini-4k-instruct")
21
+
22
+ # Use with generator
23
+ generator = outlines.generate.json(model, YourModel)
24
+ result = generator("Your prompt")
25
+ ```
26
+
27
+ ### GPU Configuration
28
+
29
+ ```python
30
+ # Use CUDA GPU
31
+ model = outlines.models.transformers(
32
+ "microsoft/Phi-3-mini-4k-instruct",
33
+ device="cuda"
34
+ )
35
+
36
+ # Use specific GPU
37
+ model = outlines.models.transformers(
38
+ "microsoft/Phi-3-mini-4k-instruct",
39
+ device="cuda:0" # GPU 0
40
+ )
41
+
42
+ # Use CPU
43
+ model = outlines.models.transformers(
44
+ "microsoft/Phi-3-mini-4k-instruct",
45
+ device="cpu"
46
+ )
47
+
48
+ # Use Apple Silicon MPS
49
+ model = outlines.models.transformers(
50
+ "microsoft/Phi-3-mini-4k-instruct",
51
+ device="mps"
52
+ )
53
+ ```
54
+
55
+ ### Advanced Configuration
56
+
57
+ ```python
58
+ # FP16 for faster inference
59
+ model = outlines.models.transformers(
60
+ "microsoft/Phi-3-mini-4k-instruct",
61
+ device="cuda",
62
+ model_kwargs={
63
+ "torch_dtype": "float16"
64
+ }
65
+ )
66
+
67
+ # 8-bit quantization (less memory)
68
+ model = outlines.models.transformers(
69
+ "microsoft/Phi-3-mini-4k-instruct",
70
+ device="cuda",
71
+ model_kwargs={
72
+ "load_in_8bit": True,
73
+ "device_map": "auto"
74
+ }
75
+ )
76
+
77
+ # 4-bit quantization (even less memory)
78
+ model = outlines.models.transformers(
79
+ "meta-llama/Llama-3.1-70B-Instruct",
80
+ device="cuda",
81
+ model_kwargs={
82
+ "load_in_4bit": True,
83
+ "device_map": "auto",
84
+ "bnb_4bit_compute_dtype": "float16"
85
+ }
86
+ )
87
+
88
+ # Multi-GPU
89
+ model = outlines.models.transformers(
90
+ "meta-llama/Llama-3.1-70B-Instruct",
91
+ device="cuda",
92
+ model_kwargs={
93
+ "device_map": "auto", # Automatic GPU distribution
94
+ "max_memory": {0: "40GB", 1: "40GB"} # Per-GPU limits
95
+ }
96
+ )
97
+ ```
98
+
99
+ ### Popular Models
100
+
101
+ ```python
102
+ # Phi-4 (Microsoft)
103
+ model = outlines.models.transformers("microsoft/Phi-4-mini-instruct")
104
+ model = outlines.models.transformers("microsoft/Phi-3-medium-4k-instruct")
105
+
106
+ # Llama 3.1 (Meta)
107
+ model = outlines.models.transformers("meta-llama/Llama-3.1-8B-Instruct")
108
+ model = outlines.models.transformers("meta-llama/Llama-3.1-70B-Instruct")
109
+ model = outlines.models.transformers("meta-llama/Llama-3.1-405B-Instruct")
110
+
111
+ # Mistral (Mistral AI)
112
+ model = outlines.models.transformers("mistralai/Mistral-7B-Instruct-v0.3")
113
+ model = outlines.models.transformers("mistralai/Mixtral-8x7B-Instruct-v0.1")
114
+ model = outlines.models.transformers("mistralai/Mixtral-8x22B-Instruct-v0.1")
115
+
116
+ # Qwen (Alibaba)
117
+ model = outlines.models.transformers("Qwen/Qwen2.5-7B-Instruct")
118
+ model = outlines.models.transformers("Qwen/Qwen2.5-14B-Instruct")
119
+ model = outlines.models.transformers("Qwen/Qwen2.5-72B-Instruct")
120
+
121
+ # Gemma (Google)
122
+ model = outlines.models.transformers("google/gemma-2-9b-it")
123
+ model = outlines.models.transformers("google/gemma-2-27b-it")
124
+
125
+ # Llava (Vision)
126
+ model = outlines.models.transformers("llava-hf/llava-v1.6-mistral-7b-hf")
127
+ ```
128
+
129
+ ### Custom Model Loading
130
+
131
+ ```python
132
+ from transformers import AutoTokenizer, AutoModelForCausalLM
133
+ import outlines
134
+
135
+ # Load model manually
136
+ tokenizer = AutoTokenizer.from_pretrained("your-model")
137
+ model_hf = AutoModelForCausalLM.from_pretrained(
138
+ "your-model",
139
+ device_map="auto",
140
+ torch_dtype="float16"
141
+ )
142
+
143
+ # Use with Outlines
144
+ model = outlines.models.transformers(
145
+ model=model_hf,
146
+ tokenizer=tokenizer
147
+ )
148
+ ```
149
+
150
+ ## llama.cpp
151
+
152
+ ### Basic Setup
153
+
154
+ ```python
155
+ import outlines
156
+
157
+ # Load GGUF model
158
+ model = outlines.models.llamacpp(
159
+ "./models/llama-3.1-8b-instruct.Q4_K_M.gguf",
160
+ n_ctx=4096 # Context window
161
+ )
162
+
163
+ # Use with generator
164
+ generator = outlines.generate.json(model, YourModel)
165
+ ```
166
+
167
+ ### GPU Configuration
168
+
169
+ ```python
170
+ # CPU only
171
+ model = outlines.models.llamacpp(
172
+ "./models/model.gguf",
173
+ n_ctx=4096,
174
+ n_threads=8 # Use 8 CPU threads
175
+ )
176
+
177
+ # GPU offload (partial)
178
+ model = outlines.models.llamacpp(
179
+ "./models/model.gguf",
180
+ n_ctx=4096,
181
+ n_gpu_layers=35, # Offload 35 layers to GPU
182
+ n_threads=4 # CPU threads for remaining layers
183
+ )
184
+
185
+ # Full GPU offload
186
+ model = outlines.models.llamacpp(
187
+ "./models/model.gguf",
188
+ n_ctx=8192,
189
+ n_gpu_layers=-1 # All layers on GPU
190
+ )
191
+ ```
192
+
193
+ ### Advanced Configuration
194
+
195
+ ```python
196
+ model = outlines.models.llamacpp(
197
+ "./models/llama-3.1-8b.Q4_K_M.gguf",
198
+ n_ctx=8192, # Context window (tokens)
199
+ n_gpu_layers=35, # GPU layers
200
+ n_threads=8, # CPU threads
201
+ n_batch=512, # Batch size for prompt processing
202
+ use_mmap=True, # Memory-map model file (faster loading)
203
+ use_mlock=False, # Lock model in RAM (prevents swapping)
204
+ seed=42, # Random seed for reproducibility
205
+ verbose=False # Suppress verbose output
206
+ )
207
+ ```
208
+
209
+ ### Quantization Formats
210
+
211
+ ```python
212
+ # Q4_K_M (4-bit, recommended for most cases)
213
+ # - Size: ~4.5GB for 7B model
214
+ # - Quality: Good
215
+ # - Speed: Fast
216
+ model = outlines.models.llamacpp("./models/model.Q4_K_M.gguf")
217
+
218
+ # Q5_K_M (5-bit, better quality)
219
+ # - Size: ~5.5GB for 7B model
220
+ # - Quality: Very good
221
+ # - Speed: Slightly slower than Q4
222
+ model = outlines.models.llamacpp("./models/model.Q5_K_M.gguf")
223
+
224
+ # Q6_K (6-bit, high quality)
225
+ # - Size: ~6.5GB for 7B model
226
+ # - Quality: Excellent
227
+ # - Speed: Slower than Q5
228
+ model = outlines.models.llamacpp("./models/model.Q6_K.gguf")
229
+
230
+ # Q8_0 (8-bit, near-original quality)
231
+ # - Size: ~8GB for 7B model
232
+ # - Quality: Near FP16
233
+ # - Speed: Slower than Q6
234
+ model = outlines.models.llamacpp("./models/model.Q8_0.gguf")
235
+
236
+ # F16 (16-bit float, original quality)
237
+ # - Size: ~14GB for 7B model
238
+ # - Quality: Original
239
+ # - Speed: Slowest
240
+ model = outlines.models.llamacpp("./models/model.F16.gguf")
241
+ ```
242
+
243
+ ### Popular GGUF Models
244
+
245
+ ```python
246
+ # Llama 3.1
247
+ model = outlines.models.llamacpp("llama-3.1-8b-instruct.Q4_K_M.gguf")
248
+ model = outlines.models.llamacpp("llama-3.1-70b-instruct.Q4_K_M.gguf")
249
+
250
+ # Mistral
251
+ model = outlines.models.llamacpp("mistral-7b-instruct-v0.3.Q4_K_M.gguf")
252
+
253
+ # Phi-4
254
+ model = outlines.models.llamacpp("phi-4-mini-instruct.Q4_K_M.gguf")
255
+
256
+ # Qwen
257
+ model = outlines.models.llamacpp("qwen2.5-7b-instruct.Q4_K_M.gguf")
258
+ ```
259
+
260
+ ### Apple Silicon Optimization
261
+
262
+ ```python
263
+ # Optimized for M1/M2/M3 Macs
264
+ model = outlines.models.llamacpp(
265
+ "./models/llama-3.1-8b.Q4_K_M.gguf",
266
+ n_ctx=4096,
267
+ n_gpu_layers=-1, # Use Metal GPU acceleration
268
+ use_mmap=True, # Efficient memory mapping
269
+ n_threads=8 # Use performance cores
270
+ )
271
+ ```
272
+
273
+ ## vLLM (Production)
274
+
275
+ ### Basic Setup
276
+
277
+ ```python
278
+ import outlines
279
+
280
+ # Load model with vLLM
281
+ model = outlines.models.vllm("meta-llama/Llama-3.1-8B-Instruct")
282
+
283
+ # Use with generator
284
+ generator = outlines.generate.json(model, YourModel)
285
+ ```
286
+
287
+ ### Single GPU
288
+
289
+ ```python
290
+ model = outlines.models.vllm(
291
+ "meta-llama/Llama-3.1-8B-Instruct",
292
+ gpu_memory_utilization=0.9, # Use 90% of GPU memory
293
+ max_model_len=4096 # Max sequence length
294
+ )
295
+ ```
296
+
297
+ ### Multi-GPU
298
+
299
+ ```python
300
+ # Tensor parallelism (split model across GPUs)
301
+ model = outlines.models.vllm(
302
+ "meta-llama/Llama-3.1-70B-Instruct",
303
+ tensor_parallel_size=4, # Use 4 GPUs
304
+ gpu_memory_utilization=0.9
305
+ )
306
+
307
+ # Pipeline parallelism (rare, for very large models)
308
+ model = outlines.models.vllm(
309
+ "meta-llama/Llama-3.1-405B-Instruct",
310
+ pipeline_parallel_size=8, # 8-GPU pipeline
311
+ tensor_parallel_size=4 # 4-GPU tensor split
312
+ # Total: 32 GPUs
313
+ )
314
+ ```
315
+
316
+ ### Quantization
317
+
318
+ ```python
319
+ # AWQ quantization (4-bit)
320
+ model = outlines.models.vllm(
321
+ "meta-llama/Llama-3.1-8B-Instruct",
322
+ quantization="awq",
323
+ dtype="float16"
324
+ )
325
+
326
+ # GPTQ quantization (4-bit)
327
+ model = outlines.models.vllm(
328
+ "meta-llama/Llama-3.1-8B-Instruct",
329
+ quantization="gptq"
330
+ )
331
+
332
+ # SqueezeLLM quantization
333
+ model = outlines.models.vllm(
334
+ "meta-llama/Llama-3.1-8B-Instruct",
335
+ quantization="squeezellm"
336
+ )
337
+ ```
338
+
339
+ ### Advanced Configuration
340
+
341
+ ```python
342
+ model = outlines.models.vllm(
343
+ "meta-llama/Llama-3.1-8B-Instruct",
344
+ tensor_parallel_size=1,
345
+ gpu_memory_utilization=0.9,
346
+ max_model_len=8192,
347
+ max_num_seqs=256, # Max concurrent sequences
348
+ max_num_batched_tokens=8192, # Max tokens per batch
349
+ dtype="float16",
350
+ trust_remote_code=True,
351
+ enforce_eager=False, # Use CUDA graphs (faster)
352
+ swap_space=4 # CPU swap space (GB)
353
+ )
354
+ ```
355
+
356
+ ### Batch Processing
357
+
358
+ ```python
359
+ # vLLM optimized for high-throughput batch processing
360
+ model = outlines.models.vllm(
361
+ "meta-llama/Llama-3.1-8B-Instruct",
362
+ max_num_seqs=128 # Process 128 sequences in parallel
363
+ )
364
+
365
+ generator = outlines.generate.json(model, YourModel)
366
+
367
+ # Process many prompts efficiently
368
+ prompts = ["prompt1", "prompt2", ..., "prompt100"]
369
+ results = [generator(p) for p in prompts]
370
+ # vLLM automatically batches and optimizes
371
+ ```
372
+
373
+ ## OpenAI (Limited Support)
374
+
375
+ ### Basic Setup
376
+
377
+ ```python
378
+ import outlines
379
+
380
+ # Basic OpenAI support
381
+ model = outlines.models.openai("gpt-4o-mini", api_key="your-api-key")
382
+
383
+ # Use with generator
384
+ generator = outlines.generate.json(model, YourModel)
385
+ result = generator("Your prompt")
386
+ ```
387
+
388
+ ### Configuration
389
+
390
+ ```python
391
+ model = outlines.models.openai(
392
+ "gpt-4o-mini",
393
+ api_key="your-api-key", # Or set OPENAI_API_KEY env var
394
+ max_tokens=2048,
395
+ temperature=0.7
396
+ )
397
+ ```
398
+
399
+ ### Available Models
400
+
401
+ ```python
402
+ # GPT-4o (latest)
403
+ model = outlines.models.openai("gpt-4o")
404
+
405
+ # GPT-4o Mini (cost-effective)
406
+ model = outlines.models.openai("gpt-4o-mini")
407
+
408
+ # GPT-4 Turbo
409
+ model = outlines.models.openai("gpt-4-turbo")
410
+
411
+ # GPT-3.5 Turbo
412
+ model = outlines.models.openai("gpt-3.5-turbo")
413
+ ```
414
+
415
+ **Note**: OpenAI support is limited compared to local models. Some advanced features may not work.
416
+
417
+ ## Backend Comparison
418
+
419
+ ### Feature Matrix
420
+
421
+ | Feature | Transformers | llama.cpp | vLLM | OpenAI |
422
+ |---------|-------------|-----------|------|--------|
423
+ | Structured Generation | ✅ Full | ✅ Full | ✅ Full | ⚠️ Limited |
424
+ | FSM Optimization | ✅ Yes | ✅ Yes | ✅ Yes | ❌ No |
425
+ | GPU Support | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
426
+ | Multi-GPU | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
427
+ | Quantization | ✅ Yes | ✅ Yes | ✅ Yes | N/A |
428
+ | High Throughput | ⚠️ Medium | ⚠️ Medium | ✅ Excellent | ⚠️ API-limited |
429
+ | Setup Difficulty | Easy | Medium | Medium | Easy |
430
+ | Cost | Hardware | Hardware | Hardware | API usage |
431
+
432
+ ### Performance Characteristics
433
+
434
+ **Transformers:**
435
+ - **Latency**: 50-200ms (single request, GPU)
436
+ - **Throughput**: 10-50 tokens/sec (depends on hardware)
437
+ - **Memory**: 2-4GB per 1B parameters (FP16)
438
+ - **Best for**: Development, small-scale deployment, flexibility
439
+
440
+ **llama.cpp:**
441
+ - **Latency**: 30-150ms (single request)
442
+ - **Throughput**: 20-150 tokens/sec (depends on quantization)
443
+ - **Memory**: 0.5-2GB per 1B parameters (Q4-Q8)
444
+ - **Best for**: CPU inference, Apple Silicon, edge deployment, low memory
445
+
446
+ **vLLM:**
447
+ - **Latency**: 30-100ms (single request)
448
+ - **Throughput**: 100-1000+ tokens/sec (batch processing)
449
+ - **Memory**: 2-4GB per 1B parameters (FP16)
450
+ - **Best for**: Production, high-throughput, batch processing, serving
451
+
452
+ **OpenAI:**
453
+ - **Latency**: 200-500ms (API call)
454
+ - **Throughput**: API rate limits
455
+ - **Memory**: N/A (cloud-based)
456
+ - **Best for**: Quick prototyping, no infrastructure
457
+
458
+ ### Memory Requirements
459
+
460
+ **7B Model:**
461
+ - FP16: ~14GB
462
+ - 8-bit: ~7GB
463
+ - 4-bit: ~4GB
464
+ - Q4_K_M (GGUF): ~4.5GB
465
+
466
+ **13B Model:**
467
+ - FP16: ~26GB
468
+ - 8-bit: ~13GB
469
+ - 4-bit: ~7GB
470
+ - Q4_K_M (GGUF): ~8GB
471
+
472
+ **70B Model:**
473
+ - FP16: ~140GB (multi-GPU)
474
+ - 8-bit: ~70GB (multi-GPU)
475
+ - 4-bit: ~35GB (single A100/H100)
476
+ - Q4_K_M (GGUF): ~40GB
477
+
478
+ ## Performance Tuning
479
+
480
+ ### Transformers Optimization
481
+
482
+ ```python
483
+ # Use FP16
484
+ model = outlines.models.transformers(
485
+ "meta-llama/Llama-3.1-8B-Instruct",
486
+ device="cuda",
487
+ model_kwargs={"torch_dtype": "float16"}
488
+ )
489
+
490
+ # Use flash attention (2-4x faster)
491
+ model = outlines.models.transformers(
492
+ "meta-llama/Llama-3.1-8B-Instruct",
493
+ device="cuda",
494
+ model_kwargs={
495
+ "torch_dtype": "float16",
496
+ "use_flash_attention_2": True
497
+ }
498
+ )
499
+
500
+ # Use 8-bit quantization (2x less memory)
501
+ model = outlines.models.transformers(
502
+ "meta-llama/Llama-3.1-8B-Instruct",
503
+ device="cuda",
504
+ model_kwargs={
505
+ "load_in_8bit": True,
506
+ "device_map": "auto"
507
+ }
508
+ )
509
+ ```
510
+
511
+ ### llama.cpp Optimization
512
+
513
+ ```python
514
+ # Maximize GPU usage
515
+ model = outlines.models.llamacpp(
516
+ "./models/model.Q4_K_M.gguf",
517
+ n_gpu_layers=-1, # All layers on GPU
518
+ n_ctx=8192,
519
+ n_batch=512 # Larger batch = faster
520
+ )
521
+
522
+ # Optimize for CPU (Apple Silicon)
523
+ model = outlines.models.llamacpp(
524
+ "./models/model.Q4_K_M.gguf",
525
+ n_ctx=4096,
526
+ n_threads=8, # Use all performance cores
527
+ use_mmap=True
528
+ )
529
+ ```
530
+
531
+ ### vLLM Optimization
532
+
533
+ ```python
534
+ # High throughput
535
+ model = outlines.models.vllm(
536
+ "meta-llama/Llama-3.1-8B-Instruct",
537
+ gpu_memory_utilization=0.95, # Use 95% of GPU
538
+ max_num_seqs=256, # High concurrency
539
+ enforce_eager=False # Use CUDA graphs
540
+ )
541
+
542
+ # Multi-GPU
543
+ model = outlines.models.vllm(
544
+ "meta-llama/Llama-3.1-70B-Instruct",
545
+ tensor_parallel_size=4, # 4 GPUs
546
+ gpu_memory_utilization=0.9
547
+ )
548
+ ```
549
+
550
+ ## Production Deployment
551
+
552
+ ### Docker with vLLM
553
+
554
+ ```dockerfile
555
+ FROM vllm/vllm-openai:latest
556
+
557
+ # Install outlines
558
+ RUN pip install outlines
559
+
560
+ # Copy your code
561
+ COPY app.py /app/
562
+
563
+ # Run
564
+ CMD ["python", "/app/app.py"]
565
+ ```
566
+
567
+ ### Environment Variables
568
+
569
+ ```bash
570
+ # Transformers cache
571
+ export HF_HOME="/path/to/cache"
572
+ export TRANSFORMERS_CACHE="/path/to/cache"
573
+
574
+ # GPU selection
575
+ export CUDA_VISIBLE_DEVICES=0,1,2,3
576
+
577
+ # OpenAI API key
578
+ export OPENAI_API_KEY="sk-..."
579
+
580
+ # Disable tokenizers parallelism warning
581
+ export TOKENIZERS_PARALLELISM=false
582
+ ```
583
+
584
+ ### Model Serving
585
+
586
+ ```python
587
+ # Simple HTTP server with vLLM
588
+ import outlines
589
+ from fastapi import FastAPI
590
+ from pydantic import BaseModel
591
+
592
+ app = FastAPI()
593
+
594
+ # Load model once at startup
595
+ model = outlines.models.vllm("meta-llama/Llama-3.1-8B-Instruct")
596
+
597
+ class User(BaseModel):
598
+ name: str
599
+ age: int
600
+ email: str
601
+
602
+ generator = outlines.generate.json(model, User)
603
+
604
+ @app.post("/extract")
605
+ def extract(text: str):
606
+ result = generator(f"Extract user from: {text}")
607
+ return result.model_dump()
608
+ ```
609
+
610
+ ## Resources
611
+
612
+ - **Transformers**: https://huggingface.co/docs/transformers
613
+ - **llama.cpp**: https://github.com/ggerganov/llama.cpp
614
+ - **vLLM**: https://docs.vllm.ai
615
+ - **Outlines**: https://github.com/outlines-dev/outlines