@synsci/cli-darwin-x64 1.1.49

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (373) hide show
  1. package/bin/skills/accelerate/SKILL.md +332 -0
  2. package/bin/skills/accelerate/references/custom-plugins.md +453 -0
  3. package/bin/skills/accelerate/references/megatron-integration.md +489 -0
  4. package/bin/skills/accelerate/references/performance.md +525 -0
  5. package/bin/skills/audiocraft/SKILL.md +564 -0
  6. package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
  7. package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
  8. package/bin/skills/autogpt/SKILL.md +403 -0
  9. package/bin/skills/autogpt/references/advanced-usage.md +535 -0
  10. package/bin/skills/autogpt/references/troubleshooting.md +420 -0
  11. package/bin/skills/awq/SKILL.md +310 -0
  12. package/bin/skills/awq/references/advanced-usage.md +324 -0
  13. package/bin/skills/awq/references/troubleshooting.md +344 -0
  14. package/bin/skills/axolotl/SKILL.md +158 -0
  15. package/bin/skills/axolotl/references/api.md +5548 -0
  16. package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
  17. package/bin/skills/axolotl/references/index.md +15 -0
  18. package/bin/skills/axolotl/references/other.md +3563 -0
  19. package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
  20. package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
  21. package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
  22. package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
  23. package/bin/skills/bitsandbytes/SKILL.md +411 -0
  24. package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
  25. package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
  26. package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
  27. package/bin/skills/blip-2/SKILL.md +564 -0
  28. package/bin/skills/blip-2/references/advanced-usage.md +680 -0
  29. package/bin/skills/blip-2/references/troubleshooting.md +526 -0
  30. package/bin/skills/chroma/SKILL.md +406 -0
  31. package/bin/skills/chroma/references/integration.md +38 -0
  32. package/bin/skills/clip/SKILL.md +253 -0
  33. package/bin/skills/clip/references/applications.md +207 -0
  34. package/bin/skills/constitutional-ai/SKILL.md +290 -0
  35. package/bin/skills/crewai/SKILL.md +498 -0
  36. package/bin/skills/crewai/references/flows.md +438 -0
  37. package/bin/skills/crewai/references/tools.md +429 -0
  38. package/bin/skills/crewai/references/troubleshooting.md +480 -0
  39. package/bin/skills/deepspeed/SKILL.md +141 -0
  40. package/bin/skills/deepspeed/references/08.md +17 -0
  41. package/bin/skills/deepspeed/references/09.md +173 -0
  42. package/bin/skills/deepspeed/references/2020.md +378 -0
  43. package/bin/skills/deepspeed/references/2023.md +279 -0
  44. package/bin/skills/deepspeed/references/assets.md +179 -0
  45. package/bin/skills/deepspeed/references/index.md +35 -0
  46. package/bin/skills/deepspeed/references/mii.md +118 -0
  47. package/bin/skills/deepspeed/references/other.md +1191 -0
  48. package/bin/skills/deepspeed/references/tutorials.md +6554 -0
  49. package/bin/skills/dspy/SKILL.md +590 -0
  50. package/bin/skills/dspy/references/examples.md +663 -0
  51. package/bin/skills/dspy/references/modules.md +475 -0
  52. package/bin/skills/dspy/references/optimizers.md +566 -0
  53. package/bin/skills/faiss/SKILL.md +221 -0
  54. package/bin/skills/faiss/references/index_types.md +280 -0
  55. package/bin/skills/flash-attention/SKILL.md +367 -0
  56. package/bin/skills/flash-attention/references/benchmarks.md +215 -0
  57. package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
  58. package/bin/skills/gguf/SKILL.md +427 -0
  59. package/bin/skills/gguf/references/advanced-usage.md +504 -0
  60. package/bin/skills/gguf/references/troubleshooting.md +442 -0
  61. package/bin/skills/gptq/SKILL.md +450 -0
  62. package/bin/skills/gptq/references/calibration.md +337 -0
  63. package/bin/skills/gptq/references/integration.md +129 -0
  64. package/bin/skills/gptq/references/troubleshooting.md +95 -0
  65. package/bin/skills/grpo-rl-training/README.md +97 -0
  66. package/bin/skills/grpo-rl-training/SKILL.md +572 -0
  67. package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
  68. package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
  69. package/bin/skills/guidance/SKILL.md +572 -0
  70. package/bin/skills/guidance/references/backends.md +554 -0
  71. package/bin/skills/guidance/references/constraints.md +674 -0
  72. package/bin/skills/guidance/references/examples.md +767 -0
  73. package/bin/skills/hqq/SKILL.md +445 -0
  74. package/bin/skills/hqq/references/advanced-usage.md +528 -0
  75. package/bin/skills/hqq/references/troubleshooting.md +503 -0
  76. package/bin/skills/hugging-face-cli/SKILL.md +191 -0
  77. package/bin/skills/hugging-face-cli/references/commands.md +954 -0
  78. package/bin/skills/hugging-face-cli/references/examples.md +374 -0
  79. package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
  80. package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
  81. package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
  82. package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
  83. package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
  84. package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
  85. package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
  86. package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
  87. package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
  88. package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
  89. package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
  90. package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
  91. package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
  92. package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
  93. package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
  94. package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
  95. package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
  96. package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
  97. package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
  98. package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
  99. package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
  100. package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
  101. package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
  102. package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
  103. package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
  104. package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
  105. package/bin/skills/hugging-face-jobs/index.html +216 -0
  106. package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  107. package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
  108. package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
  109. package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  110. package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  111. package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  112. package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  113. package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
  114. package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  115. package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  116. package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  117. package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  118. package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  119. package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  120. package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  121. package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  122. package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  123. package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  124. package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  125. package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  126. package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  127. package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  128. package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
  129. package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
  130. package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  131. package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
  132. package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  133. package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  134. package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  135. package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  136. package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
  137. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
  138. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
  139. package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
  140. package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
  141. package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
  142. package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
  143. package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
  144. package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
  145. package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  146. package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
  147. package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
  148. package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
  149. package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
  150. package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
  151. package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
  152. package/bin/skills/instructor/SKILL.md +740 -0
  153. package/bin/skills/instructor/references/examples.md +107 -0
  154. package/bin/skills/instructor/references/providers.md +70 -0
  155. package/bin/skills/instructor/references/validation.md +606 -0
  156. package/bin/skills/knowledge-distillation/SKILL.md +458 -0
  157. package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
  158. package/bin/skills/lambda-labs/SKILL.md +545 -0
  159. package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
  160. package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
  161. package/bin/skills/langchain/SKILL.md +480 -0
  162. package/bin/skills/langchain/references/agents.md +499 -0
  163. package/bin/skills/langchain/references/integration.md +562 -0
  164. package/bin/skills/langchain/references/rag.md +600 -0
  165. package/bin/skills/langsmith/SKILL.md +422 -0
  166. package/bin/skills/langsmith/references/advanced-usage.md +548 -0
  167. package/bin/skills/langsmith/references/troubleshooting.md +537 -0
  168. package/bin/skills/litgpt/SKILL.md +469 -0
  169. package/bin/skills/litgpt/references/custom-models.md +568 -0
  170. package/bin/skills/litgpt/references/distributed-training.md +451 -0
  171. package/bin/skills/litgpt/references/supported-models.md +336 -0
  172. package/bin/skills/litgpt/references/training-recipes.md +619 -0
  173. package/bin/skills/llama-cpp/SKILL.md +258 -0
  174. package/bin/skills/llama-cpp/references/optimization.md +89 -0
  175. package/bin/skills/llama-cpp/references/quantization.md +213 -0
  176. package/bin/skills/llama-cpp/references/server.md +125 -0
  177. package/bin/skills/llama-factory/SKILL.md +80 -0
  178. package/bin/skills/llama-factory/references/_images.md +23 -0
  179. package/bin/skills/llama-factory/references/advanced.md +1055 -0
  180. package/bin/skills/llama-factory/references/getting_started.md +349 -0
  181. package/bin/skills/llama-factory/references/index.md +19 -0
  182. package/bin/skills/llama-factory/references/other.md +31 -0
  183. package/bin/skills/llamaguard/SKILL.md +337 -0
  184. package/bin/skills/llamaindex/SKILL.md +569 -0
  185. package/bin/skills/llamaindex/references/agents.md +83 -0
  186. package/bin/skills/llamaindex/references/data_connectors.md +108 -0
  187. package/bin/skills/llamaindex/references/query_engines.md +406 -0
  188. package/bin/skills/llava/SKILL.md +304 -0
  189. package/bin/skills/llava/references/training.md +197 -0
  190. package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
  191. package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
  192. package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
  193. package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
  194. package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
  195. package/bin/skills/long-context/SKILL.md +536 -0
  196. package/bin/skills/long-context/references/extension_methods.md +468 -0
  197. package/bin/skills/long-context/references/fine_tuning.md +611 -0
  198. package/bin/skills/long-context/references/rope.md +402 -0
  199. package/bin/skills/mamba/SKILL.md +260 -0
  200. package/bin/skills/mamba/references/architecture-details.md +206 -0
  201. package/bin/skills/mamba/references/benchmarks.md +255 -0
  202. package/bin/skills/mamba/references/training-guide.md +388 -0
  203. package/bin/skills/megatron-core/SKILL.md +366 -0
  204. package/bin/skills/megatron-core/references/benchmarks.md +249 -0
  205. package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
  206. package/bin/skills/megatron-core/references/production-examples.md +473 -0
  207. package/bin/skills/megatron-core/references/training-recipes.md +547 -0
  208. package/bin/skills/miles/SKILL.md +315 -0
  209. package/bin/skills/miles/references/api-reference.md +141 -0
  210. package/bin/skills/miles/references/troubleshooting.md +352 -0
  211. package/bin/skills/mlflow/SKILL.md +704 -0
  212. package/bin/skills/mlflow/references/deployment.md +744 -0
  213. package/bin/skills/mlflow/references/model-registry.md +770 -0
  214. package/bin/skills/mlflow/references/tracking.md +680 -0
  215. package/bin/skills/modal/SKILL.md +341 -0
  216. package/bin/skills/modal/references/advanced-usage.md +503 -0
  217. package/bin/skills/modal/references/troubleshooting.md +494 -0
  218. package/bin/skills/model-merging/SKILL.md +539 -0
  219. package/bin/skills/model-merging/references/evaluation.md +462 -0
  220. package/bin/skills/model-merging/references/examples.md +428 -0
  221. package/bin/skills/model-merging/references/methods.md +352 -0
  222. package/bin/skills/model-pruning/SKILL.md +495 -0
  223. package/bin/skills/model-pruning/references/wanda.md +347 -0
  224. package/bin/skills/moe-training/SKILL.md +526 -0
  225. package/bin/skills/moe-training/references/architectures.md +432 -0
  226. package/bin/skills/moe-training/references/inference.md +348 -0
  227. package/bin/skills/moe-training/references/training.md +425 -0
  228. package/bin/skills/nanogpt/SKILL.md +290 -0
  229. package/bin/skills/nanogpt/references/architecture.md +382 -0
  230. package/bin/skills/nanogpt/references/data.md +476 -0
  231. package/bin/skills/nanogpt/references/training.md +564 -0
  232. package/bin/skills/nemo-curator/SKILL.md +383 -0
  233. package/bin/skills/nemo-curator/references/deduplication.md +87 -0
  234. package/bin/skills/nemo-curator/references/filtering.md +102 -0
  235. package/bin/skills/nemo-evaluator/SKILL.md +494 -0
  236. package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
  237. package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
  238. package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
  239. package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
  240. package/bin/skills/nemo-guardrails/SKILL.md +297 -0
  241. package/bin/skills/nnsight/SKILL.md +436 -0
  242. package/bin/skills/nnsight/references/README.md +78 -0
  243. package/bin/skills/nnsight/references/api.md +344 -0
  244. package/bin/skills/nnsight/references/tutorials.md +300 -0
  245. package/bin/skills/openrlhf/SKILL.md +249 -0
  246. package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
  247. package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
  248. package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
  249. package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
  250. package/bin/skills/outlines/SKILL.md +652 -0
  251. package/bin/skills/outlines/references/backends.md +615 -0
  252. package/bin/skills/outlines/references/examples.md +773 -0
  253. package/bin/skills/outlines/references/json_generation.md +652 -0
  254. package/bin/skills/peft/SKILL.md +431 -0
  255. package/bin/skills/peft/references/advanced-usage.md +514 -0
  256. package/bin/skills/peft/references/troubleshooting.md +480 -0
  257. package/bin/skills/phoenix/SKILL.md +475 -0
  258. package/bin/skills/phoenix/references/advanced-usage.md +619 -0
  259. package/bin/skills/phoenix/references/troubleshooting.md +538 -0
  260. package/bin/skills/pinecone/SKILL.md +358 -0
  261. package/bin/skills/pinecone/references/deployment.md +181 -0
  262. package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
  263. package/bin/skills/pytorch-fsdp/references/index.md +7 -0
  264. package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
  265. package/bin/skills/pytorch-lightning/SKILL.md +346 -0
  266. package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
  267. package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
  268. package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
  269. package/bin/skills/pyvene/SKILL.md +473 -0
  270. package/bin/skills/pyvene/references/README.md +73 -0
  271. package/bin/skills/pyvene/references/api.md +383 -0
  272. package/bin/skills/pyvene/references/tutorials.md +376 -0
  273. package/bin/skills/qdrant/SKILL.md +493 -0
  274. package/bin/skills/qdrant/references/advanced-usage.md +648 -0
  275. package/bin/skills/qdrant/references/troubleshooting.md +631 -0
  276. package/bin/skills/ray-data/SKILL.md +326 -0
  277. package/bin/skills/ray-data/references/integration.md +82 -0
  278. package/bin/skills/ray-data/references/transformations.md +83 -0
  279. package/bin/skills/ray-train/SKILL.md +406 -0
  280. package/bin/skills/ray-train/references/multi-node.md +628 -0
  281. package/bin/skills/rwkv/SKILL.md +260 -0
  282. package/bin/skills/rwkv/references/architecture-details.md +344 -0
  283. package/bin/skills/rwkv/references/rwkv7.md +386 -0
  284. package/bin/skills/rwkv/references/state-management.md +369 -0
  285. package/bin/skills/saelens/SKILL.md +386 -0
  286. package/bin/skills/saelens/references/README.md +70 -0
  287. package/bin/skills/saelens/references/api.md +333 -0
  288. package/bin/skills/saelens/references/tutorials.md +318 -0
  289. package/bin/skills/segment-anything/SKILL.md +500 -0
  290. package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
  291. package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
  292. package/bin/skills/sentence-transformers/SKILL.md +255 -0
  293. package/bin/skills/sentence-transformers/references/models.md +123 -0
  294. package/bin/skills/sentencepiece/SKILL.md +235 -0
  295. package/bin/skills/sentencepiece/references/algorithms.md +200 -0
  296. package/bin/skills/sentencepiece/references/training.md +304 -0
  297. package/bin/skills/sglang/SKILL.md +442 -0
  298. package/bin/skills/sglang/references/deployment.md +490 -0
  299. package/bin/skills/sglang/references/radix-attention.md +413 -0
  300. package/bin/skills/sglang/references/structured-generation.md +541 -0
  301. package/bin/skills/simpo/SKILL.md +219 -0
  302. package/bin/skills/simpo/references/datasets.md +478 -0
  303. package/bin/skills/simpo/references/hyperparameters.md +452 -0
  304. package/bin/skills/simpo/references/loss-functions.md +350 -0
  305. package/bin/skills/skypilot/SKILL.md +509 -0
  306. package/bin/skills/skypilot/references/advanced-usage.md +491 -0
  307. package/bin/skills/skypilot/references/troubleshooting.md +570 -0
  308. package/bin/skills/slime/SKILL.md +464 -0
  309. package/bin/skills/slime/references/api-reference.md +392 -0
  310. package/bin/skills/slime/references/troubleshooting.md +386 -0
  311. package/bin/skills/speculative-decoding/SKILL.md +467 -0
  312. package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
  313. package/bin/skills/speculative-decoding/references/medusa.md +350 -0
  314. package/bin/skills/stable-diffusion/SKILL.md +519 -0
  315. package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
  316. package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
  317. package/bin/skills/tensorboard/SKILL.md +629 -0
  318. package/bin/skills/tensorboard/references/integrations.md +638 -0
  319. package/bin/skills/tensorboard/references/profiling.md +545 -0
  320. package/bin/skills/tensorboard/references/visualization.md +620 -0
  321. package/bin/skills/tensorrt-llm/SKILL.md +187 -0
  322. package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
  323. package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
  324. package/bin/skills/tensorrt-llm/references/serving.md +470 -0
  325. package/bin/skills/tinker/SKILL.md +362 -0
  326. package/bin/skills/tinker/references/api-reference.md +168 -0
  327. package/bin/skills/tinker/references/getting-started.md +157 -0
  328. package/bin/skills/tinker/references/loss-functions.md +163 -0
  329. package/bin/skills/tinker/references/models-and-lora.md +139 -0
  330. package/bin/skills/tinker/references/recipes.md +280 -0
  331. package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
  332. package/bin/skills/tinker/references/rendering.md +243 -0
  333. package/bin/skills/tinker/references/supervised-learning.md +232 -0
  334. package/bin/skills/tinker-training-cost/SKILL.md +187 -0
  335. package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
  336. package/bin/skills/torchforge/SKILL.md +433 -0
  337. package/bin/skills/torchforge/references/api-reference.md +327 -0
  338. package/bin/skills/torchforge/references/troubleshooting.md +409 -0
  339. package/bin/skills/torchtitan/SKILL.md +358 -0
  340. package/bin/skills/torchtitan/references/checkpoint.md +181 -0
  341. package/bin/skills/torchtitan/references/custom-models.md +258 -0
  342. package/bin/skills/torchtitan/references/float8.md +133 -0
  343. package/bin/skills/torchtitan/references/fsdp.md +126 -0
  344. package/bin/skills/transformer-lens/SKILL.md +346 -0
  345. package/bin/skills/transformer-lens/references/README.md +54 -0
  346. package/bin/skills/transformer-lens/references/api.md +362 -0
  347. package/bin/skills/transformer-lens/references/tutorials.md +339 -0
  348. package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
  349. package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
  350. package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
  351. package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
  352. package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
  353. package/bin/skills/unsloth/SKILL.md +80 -0
  354. package/bin/skills/unsloth/references/index.md +7 -0
  355. package/bin/skills/unsloth/references/llms-full.md +16799 -0
  356. package/bin/skills/unsloth/references/llms-txt.md +12044 -0
  357. package/bin/skills/unsloth/references/llms.md +82 -0
  358. package/bin/skills/verl/SKILL.md +391 -0
  359. package/bin/skills/verl/references/api-reference.md +301 -0
  360. package/bin/skills/verl/references/troubleshooting.md +391 -0
  361. package/bin/skills/vllm/SKILL.md +364 -0
  362. package/bin/skills/vllm/references/optimization.md +226 -0
  363. package/bin/skills/vllm/references/quantization.md +284 -0
  364. package/bin/skills/vllm/references/server-deployment.md +255 -0
  365. package/bin/skills/vllm/references/troubleshooting.md +447 -0
  366. package/bin/skills/weights-and-biases/SKILL.md +590 -0
  367. package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
  368. package/bin/skills/weights-and-biases/references/integrations.md +700 -0
  369. package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
  370. package/bin/skills/whisper/SKILL.md +317 -0
  371. package/bin/skills/whisper/references/languages.md +189 -0
  372. package/bin/synsc +0 -0
  373. package/package.json +10 -0
@@ -0,0 +1,546 @@
1
+ # /// script
2
+ # requires-python = ">=3.12"
3
+ # dependencies = [
4
+ # "polars>=1.31.0",
5
+ # "huggingface-hub",
6
+ # "datasets",
7
+ # "ascii-graph",
8
+ # ]
9
+ # ///
10
+ """
11
+ Analyze educational quality trends across CommonCrawl dumps using Polars streaming.
12
+
13
+ Answers: "Is the web getting more educational over time?"
14
+
15
+ Demonstrates Polars HF Hub integration - process 50M+ docs without downloading 300GB+.
16
+
17
+ Example usage:
18
+ # Analyze English PDFs (default)
19
+ uv run finepdfs-stats.py
20
+
21
+ # Analyze all 70+ languages
22
+ uv run finepdfs-stats.py --all-languages
23
+
24
+ # Quick test
25
+ uv run finepdfs-stats.py --limit 10000 --show-plan
26
+
27
+ # Save results to HF Hub
28
+ uv run finepdfs-stats.py --output-repo username/finepdfs-temporal-stats
29
+
30
+ # Run on HF Jobs
31
+ hf jobs uv run \\
32
+ -s HF_TOKEN \\
33
+ -e HF_XET_HIGH_PERFORMANCE=1 \\
34
+ https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\
35
+ -- --output-repo username/stats
36
+ """
37
+
38
+ import argparse
39
+ import logging
40
+ import os
41
+ import sys
42
+ import time
43
+ from pathlib import Path
44
+
45
+ import polars as pl
46
+ from ascii_graph import Pyasciigraph
47
+ from datasets import Dataset
48
+ from huggingface_hub import HfApi, create_repo, list_repo_tree, login
49
+
50
+ logging.basicConfig(
51
+ level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
52
+ )
53
+ logger = logging.getLogger(__name__)
54
+
55
+ # Common language+script codes for finepdfs-edu
56
+ COMMON_LANGUAGES = {
57
+ "eng_Latn": "English (Latin script)",
58
+ "fra_Latn": "French (Latin script)",
59
+ "deu_Latn": "German (Latin script)",
60
+ "spa_Latn": "Spanish (Latin script)",
61
+ "por_Latn": "Portuguese (Latin script)",
62
+ "ita_Latn": "Italian (Latin script)",
63
+ "nld_Latn": "Dutch (Latin script)",
64
+ "pol_Latn": "Polish (Latin script)",
65
+ "rus_Cyrl": "Russian (Cyrillic script)",
66
+ "zho_Hans": "Chinese (Simplified)",
67
+ "zho_Hant": "Chinese (Traditional)",
68
+ "jpn_Jpan": "Japanese",
69
+ "kor_Hang": "Korean",
70
+ "ara_Arab": "Arabic",
71
+ "hin_Deva": "Hindi (Devanagari)",
72
+ }
73
+
74
+
75
+ def list_available_languages(dataset_id: str) -> list[str]:
76
+ """List available language subsets in the dataset."""
77
+ try:
78
+ tree = list_repo_tree(dataset_id, path_in_repo="data", repo_type="dataset")
79
+ languages = [
80
+ item.path.replace("data/", "")
81
+ for item in tree
82
+ if item.path.startswith("data/")
83
+ and "/" not in item.path.replace("data/", "")
84
+ ]
85
+ return sorted(languages)
86
+ except Exception as e:
87
+ logger.warning(f"Could not list languages: {e}")
88
+ return list(COMMON_LANGUAGES.keys())
89
+
90
+
91
+ def compute_temporal_stats(df: pl.LazyFrame, output_path: Path) -> pl.DataFrame:
92
+ """Single scan: compute stats grouped by dump for temporal analysis."""
93
+ query = df.group_by("dump").agg(
94
+ pl.len().alias("doc_count"),
95
+ pl.col("token_count").sum().alias("total_tokens"),
96
+ pl.col("fw_edu_scores").list.mean().mean().alias("avg_edu_score"),
97
+ (pl.col("fw_edu_scores").list.mean() >= 3).sum().alias("high_edu_count"),
98
+ )
99
+ query.sink_parquet(output_path, engine="streaming")
100
+ return pl.read_parquet(output_path)
101
+
102
+
103
+ def compute_global_stats(temporal: pl.DataFrame) -> pl.DataFrame:
104
+ """Compute global stats from temporal breakdown."""
105
+ total = temporal["doc_count"].sum()
106
+ return pl.DataFrame(
107
+ {
108
+ "total_docs": [total],
109
+ "total_tokens": [temporal["total_tokens"].sum()],
110
+ "avg_edu_score": [
111
+ (temporal["avg_edu_score"] * temporal["doc_count"]).sum() / total
112
+ ],
113
+ "high_edu_rate": [temporal["high_edu_count"].sum() / total],
114
+ "num_dumps": [len(temporal)],
115
+ }
116
+ )
117
+
118
+
119
+ def format_temporal_stats(temporal: pl.DataFrame) -> pl.DataFrame:
120
+ """Format temporal stats with high_edu_rate, sorted chronologically."""
121
+ return (
122
+ temporal.with_columns(
123
+ (pl.col("high_edu_count") / pl.col("doc_count")).alias("high_edu_rate")
124
+ )
125
+ .select(["dump", "doc_count", "avg_edu_score", "high_edu_rate"])
126
+ .sort(
127
+ "dump"
128
+ ) # Chronological order (CC-MAIN-2017-xx comes before CC-MAIN-2024-xx)
129
+ )
130
+
131
+
132
+ def create_ascii_charts(temporal_stats: pl.DataFrame) -> str:
133
+ """Create ASCII bar charts showing temporal trends."""
134
+ # Extract year from dump name (CC-MAIN-2024-42 -> 2024)
135
+ # Group by year and average the values for cleaner display
136
+ yearly = (
137
+ temporal_stats.with_columns(
138
+ pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year")
139
+ )
140
+ .group_by("year")
141
+ .agg(
142
+ pl.col("doc_count").sum(),
143
+ pl.col("avg_edu_score").mean(),
144
+ pl.col("high_edu_rate").mean(),
145
+ )
146
+ .sort("year")
147
+ )
148
+
149
+ lines = []
150
+
151
+ # High edu rate chart (more dramatic differences)
152
+ data_rate = [
153
+ (row["year"], row["high_edu_rate"] * 100)
154
+ for row in yearly.iter_rows(named=True)
155
+ ]
156
+ graph = Pyasciigraph(line_length=60, float_format="{0:.1f}%")
157
+ lines.extend(graph.graph("High Educational Content (edu >= 3)", data_rate))
158
+
159
+ lines.append("")
160
+
161
+ # Avg edu score chart
162
+ data_score = [
163
+ (row["year"], row["avg_edu_score"]) for row in yearly.iter_rows(named=True)
164
+ ]
165
+ graph2 = Pyasciigraph(line_length=60, float_format="{0:.2f}")
166
+ lines.extend(graph2.graph("Average Educational Score", data_score))
167
+
168
+ return "\n".join(lines)
169
+
170
+
171
+ def create_readme(
172
+ args,
173
+ global_stats: pl.DataFrame,
174
+ temporal_stats: pl.DataFrame,
175
+ scan_time: float,
176
+ ascii_charts: str,
177
+ ) -> str:
178
+ """Create README content for the stats dataset."""
179
+ stats = global_stats.to_dicts()[0]
180
+ total_docs = stats.get("total_docs", 0)
181
+ docs_per_sec = total_docs / scan_time if scan_time > 0 else 0
182
+
183
+ # Get first and last year averages for trend (more representative than single dumps)
184
+ yearly = (
185
+ temporal_stats.with_columns(
186
+ pl.col("dump").str.extract(r"CC-MAIN-(\d{4})", 1).alias("year")
187
+ )
188
+ .group_by("year")
189
+ .agg(
190
+ pl.col("doc_count").sum(),
191
+ pl.col("avg_edu_score").mean(),
192
+ pl.col("high_edu_rate").mean(),
193
+ )
194
+ .sort("year")
195
+ )
196
+ first_year = yearly.head(1).to_dicts()[0]
197
+ last_year = yearly.tail(1).to_dicts()[0]
198
+
199
+ scope = (
200
+ "all languages"
201
+ if args.all_languages
202
+ else COMMON_LANGUAGES.get(args.lang, args.lang)
203
+ )
204
+
205
+ return f"""---
206
+ tags:
207
+ - uv-script
208
+ - statistics
209
+ - polars
210
+ - finepdfs-edu
211
+ - temporal-analysis
212
+ license: odc-by
213
+ configs:
214
+ - config_name: global_stats
215
+ data_files: global_stats/train-*.parquet
216
+ - config_name: temporal_stats
217
+ data_files: temporal_stats/train-*.parquet
218
+ default_viewer_config: temporal_stats
219
+ ---
220
+
221
+ # Is the Web Getting More Educational?
222
+
223
+ Temporal analysis of educational quality in **{scope}** across {stats.get("num_dumps", 0)} CommonCrawl dumps.
224
+
225
+ ## Trend
226
+
227
+ ```
228
+ {ascii_charts}
229
+ ```
230
+
231
+ ## Key Finding
232
+
233
+ | Year | Avg Edu Score | High Edu Rate |
234
+ |------|---------------|---------------|
235
+ | {first_year["year"]} | {first_year["avg_edu_score"]:.2f} | {first_year["high_edu_rate"] * 100:.1f}% |
236
+ | {last_year["year"]} | {last_year["avg_edu_score"]:.2f} | {last_year["high_edu_rate"] * 100:.1f}% |
237
+
238
+ ## Performance
239
+
240
+ - **{total_docs:,} documents** processed in **{scan_time:.0f} seconds**
241
+ - **{docs_per_sec:,.0f} docs/sec** using Polars streaming
242
+ - Single scan, no full dataset download required
243
+
244
+ ## Summary
245
+
246
+ | Metric | Value |
247
+ |--------|-------|
248
+ | Scope | {scope} |
249
+ | Total Documents | {total_docs:,} |
250
+ | Total Tokens | {stats.get("total_tokens", 0):,} |
251
+ | Avg Edu Score | {stats.get("avg_edu_score", 0):.3f} |
252
+ | High Edu Rate | {stats.get("high_edu_rate", 0) * 100:.1f}% |
253
+ | CommonCrawl Dumps | {stats.get("num_dumps", 0)} |
254
+
255
+ ## Files
256
+
257
+ - `global_stats` - Overall summary
258
+ - `temporal_stats` - Per-dump breakdown (sorted chronologically)
259
+
260
+ ## Reproduce
261
+
262
+ ```bash
263
+ uv run https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\
264
+ {"--all-languages" if args.all_languages else f"--lang {args.lang}"} --output-repo your-username/stats
265
+ ```
266
+
267
+ ## Source
268
+
269
+ - **Dataset**: [{args.source_dataset}](https://huggingface.co/datasets/{args.source_dataset})
270
+ - **Script**: [uv-scripts/dataset-stats](https://huggingface.co/datasets/uv-scripts/dataset-stats)
271
+ """
272
+
273
+
274
+ def main():
275
+ parser = argparse.ArgumentParser(
276
+ description="Analyze educational quality trends across CommonCrawl dumps",
277
+ formatter_class=argparse.RawDescriptionHelpFormatter,
278
+ epilog=__doc__,
279
+ )
280
+
281
+ parser.add_argument(
282
+ "--source-dataset",
283
+ type=str,
284
+ default="HuggingFaceFW/finepdfs-edu",
285
+ help="Source dataset (default: HuggingFaceFW/finepdfs-edu)",
286
+ )
287
+
288
+ parser.add_argument(
289
+ "--lang",
290
+ type=str,
291
+ default="eng_Latn",
292
+ help="Language+script code (default: eng_Latn)",
293
+ )
294
+
295
+ parser.add_argument(
296
+ "--all-languages",
297
+ action="store_true",
298
+ help="Analyze all languages (70+) instead of single language",
299
+ )
300
+
301
+ parser.add_argument(
302
+ "--show-plan",
303
+ action="store_true",
304
+ help="Show Polars query plan (demonstrates optimization)",
305
+ )
306
+
307
+ parser.add_argument(
308
+ "--list-languages",
309
+ action="store_true",
310
+ help="List available languages and exit",
311
+ )
312
+
313
+ parser.add_argument(
314
+ "--limit",
315
+ type=int,
316
+ help="Limit to first N rows (for testing)",
317
+ )
318
+
319
+ parser.add_argument(
320
+ "--output-repo",
321
+ type=str,
322
+ help="HuggingFace dataset repository to upload results",
323
+ )
324
+
325
+ parser.add_argument(
326
+ "--output-dir",
327
+ type=str,
328
+ default="./stats_output",
329
+ help="Local directory for output files",
330
+ )
331
+
332
+ parser.add_argument(
333
+ "--hf-token",
334
+ type=str,
335
+ help="HuggingFace API token (or set HF_TOKEN env var)",
336
+ )
337
+
338
+ parser.add_argument(
339
+ "--private",
340
+ action="store_true",
341
+ help="Make the output dataset private",
342
+ )
343
+
344
+ args = parser.parse_args()
345
+
346
+ # Check for high-performance mode
347
+ if os.environ.get("HF_XET_HIGH_PERFORMANCE"):
348
+ logger.info("High-performance mode enabled (HF_XET_HIGH_PERFORMANCE=1)")
349
+
350
+ # List languages mode
351
+ if args.list_languages:
352
+ print(f"Available language+script codes for {args.source_dataset}:\n")
353
+ print("Common languages:")
354
+ for code, name in COMMON_LANGUAGES.items():
355
+ print(f" {code:12} - {name}")
356
+ print("\nFetching full list from HF Hub...")
357
+ all_langs = list_available_languages(args.source_dataset)
358
+ print(f"\nAll available ({len(all_langs)} total):")
359
+ for lang in all_langs[:30]: # Show first 30
360
+ name = COMMON_LANGUAGES.get(lang, "")
361
+ print(f" {lang:12} {name}")
362
+ if len(all_langs) > 30:
363
+ print(f" ... and {len(all_langs) - 30} more")
364
+ sys.exit(0)
365
+
366
+ # Build the parquet path
367
+ if args.all_languages:
368
+ source_path = f"hf://datasets/{args.source_dataset}/data/*/train/*.parquet"
369
+ scope_desc = "all languages"
370
+ else:
371
+ source_path = (
372
+ f"hf://datasets/{args.source_dataset}/data/{args.lang}/train/*.parquet"
373
+ )
374
+ scope_desc = f"{args.lang} ({COMMON_LANGUAGES.get(args.lang, 'unknown')})"
375
+
376
+ logger.info(f"Scanning: {source_path}")
377
+ logger.info(f"Scope: {scope_desc}")
378
+
379
+ # Create lazy frame - this doesn't load any data yet!
380
+ logger.info("Creating lazy query plan...")
381
+ df = pl.scan_parquet(source_path)
382
+
383
+ # Apply limit if specified
384
+ if args.limit:
385
+ logger.info(f"Limiting to first {args.limit:,} rows")
386
+ df = df.head(args.limit)
387
+
388
+ # Show query plan if requested
389
+ if args.show_plan:
390
+ # Build a sample query to show the plan
391
+ sample_query = df.select(
392
+ pl.len(),
393
+ pl.col("token_count").sum(),
394
+ pl.col("language").n_unique(),
395
+ )
396
+ print("\nQuery Plan (showing Polars optimization):")
397
+ print("=" * 60)
398
+ print(sample_query.explain())
399
+ print("=" * 60)
400
+ print("\nNote: Polars uses projection pushdown - only reads columns needed!")
401
+ print("The 'text' column is never loaded, making this very fast.\n")
402
+
403
+ # Create output directory
404
+ output_dir = Path(args.output_dir)
405
+ output_dir.mkdir(parents=True, exist_ok=True)
406
+
407
+ # Single scan: compute temporal stats
408
+ logger.info("Computing temporal stats (single scan)...")
409
+ start = time.perf_counter()
410
+ temporal_path = output_dir / "temporal_stats.parquet"
411
+ temporal_raw = compute_temporal_stats(df, temporal_path)
412
+ scan_time = time.perf_counter() - start
413
+ logger.info(f"Scan complete in {scan_time:.2f}s - {len(temporal_raw)} dumps")
414
+
415
+ # Compute stats
416
+ global_stats = compute_global_stats(temporal_raw)
417
+ temporal_stats = format_temporal_stats(temporal_raw)
418
+
419
+ # Save
420
+ global_stats.write_parquet(output_dir / "global_stats.parquet")
421
+ temporal_stats.write_parquet(output_dir / "temporal_stats.parquet")
422
+
423
+ # Print results
424
+ total_docs = global_stats["total_docs"][0]
425
+ docs_per_sec = total_docs / scan_time if scan_time > 0 else 0
426
+
427
+ print("\n" + "=" * 70)
428
+ print("IS THE WEB GETTING MORE EDUCATIONAL?")
429
+ print("=" * 70)
430
+
431
+ print(f"\nScope: {scope_desc}")
432
+ print(f"Dataset: {args.source_dataset}")
433
+
434
+ print("\n" + "-" * 70)
435
+ print("GLOBAL STATS")
436
+ print("-" * 70)
437
+ print(global_stats)
438
+
439
+ print("\n" + "-" * 70)
440
+ print(f"TEMPORAL TREND ({len(temporal_stats)} CommonCrawl dumps)")
441
+ print("-" * 70)
442
+ # Show first 5 and last 5
443
+ if len(temporal_stats) > 10:
444
+ print("Earliest dumps:")
445
+ print(temporal_stats.head(5))
446
+ print("\n...")
447
+ print("\nLatest dumps:")
448
+ print(temporal_stats.tail(5))
449
+ else:
450
+ print(temporal_stats)
451
+
452
+ # Create ASCII charts
453
+ ascii_charts = create_ascii_charts(temporal_stats)
454
+ print("\n" + "-" * 70)
455
+ print("TREND VISUALIZATION")
456
+ print("-" * 70)
457
+ print(ascii_charts)
458
+
459
+ print("\n" + "-" * 70)
460
+ print("PERFORMANCE")
461
+ print("-" * 70)
462
+ print(f"Scan time: {scan_time:.2f}s")
463
+ print(f"Documents: {total_docs:,}")
464
+ print(f"Throughput: {docs_per_sec:,.0f} docs/sec")
465
+
466
+ logger.info(f"Results saved to: {output_dir}")
467
+
468
+ # Upload to HF Hub if requested
469
+ if args.output_repo:
470
+ hf_token = args.hf_token or os.environ.get("HF_TOKEN")
471
+ if hf_token:
472
+ login(token=hf_token)
473
+
474
+ api = HfApi(token=hf_token)
475
+
476
+ logger.info(f"Creating/updating dataset repository: {args.output_repo}")
477
+ create_repo(
478
+ args.output_repo,
479
+ repo_type="dataset",
480
+ private=args.private,
481
+ token=hf_token,
482
+ exist_ok=True,
483
+ )
484
+
485
+ # Upload each as a dataset config
486
+ configs = [
487
+ ("global_stats", global_stats),
488
+ ("temporal_stats", temporal_stats),
489
+ ]
490
+
491
+ for config_name, stats_df in configs:
492
+ logger.info(f"Uploading {config_name}...")
493
+ ds = Dataset.from_polars(stats_df)
494
+ ds.push_to_hub(
495
+ args.output_repo,
496
+ config_name=config_name,
497
+ token=hf_token,
498
+ private=args.private,
499
+ )
500
+ time.sleep(1) # Avoid 409 conflicts
501
+
502
+ # Upload README
503
+ readme_content = create_readme(
504
+ args, global_stats, temporal_stats, scan_time, ascii_charts
505
+ )
506
+ api.upload_file(
507
+ path_or_fileobj=readme_content.encode(),
508
+ path_in_repo="README.md",
509
+ repo_id=args.output_repo,
510
+ repo_type="dataset",
511
+ token=hf_token,
512
+ )
513
+
514
+ dataset_url = f"https://huggingface.co/datasets/{args.output_repo}"
515
+ logger.info(f"Dataset uploaded: {dataset_url}")
516
+ print(f"\nResults uploaded to: {dataset_url}")
517
+
518
+
519
+ if __name__ == "__main__":
520
+ if len(sys.argv) == 1:
521
+ print("Is the Web Getting More Educational?")
522
+ print("=" * 40)
523
+ print("\nAnalyze educational quality trends across CommonCrawl dumps")
524
+ print("using Polars streaming - no download needed!\n")
525
+ print("Example commands:\n")
526
+ print("# Quick test:")
527
+ print("uv run finepdfs-stats.py --limit 10000\n")
528
+ print("# Analyze English PDFs:")
529
+ print("uv run finepdfs-stats.py\n")
530
+ print("# Analyze ALL 70+ languages:")
531
+ print("uv run finepdfs-stats.py --all-languages\n")
532
+ print("# Show query plan (see Polars optimization):")
533
+ print("uv run finepdfs-stats.py --show-plan --limit 1000\n")
534
+ print("# Save results to HF Hub:")
535
+ print("uv run finepdfs-stats.py --output-repo username/temporal-stats\n")
536
+ print("# Run on HF Jobs:")
537
+ print("hf jobs uv run \\")
538
+ print(" -s HF_TOKEN \\")
539
+ print(" -e HF_XET_HIGH_PERFORMANCE=1 \\")
540
+ print(
541
+ " https://huggingface.co/datasets/uv-scripts/dataset-stats/raw/main/finepdfs-stats.py \\"
542
+ )
543
+ print(" -- --output-repo username/stats")
544
+ sys.exit(0)
545
+
546
+ main()