@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,522 @@
|
|
|
1
|
+
#!/usr/bin/env -S uv run
|
|
2
|
+
# /// script
|
|
3
|
+
# requires-python = ">=3.10"
|
|
4
|
+
# dependencies = [
|
|
5
|
+
# "huggingface_hub>=0.20.0",
|
|
6
|
+
# ]
|
|
7
|
+
# ///
|
|
8
|
+
"""
|
|
9
|
+
Hugging Face Dataset Manager
|
|
10
|
+
|
|
11
|
+
Enhanced dataset creation and management tool designed to work alongside
|
|
12
|
+
the HF MCP server. Provides dataset creation, configuration, and content
|
|
13
|
+
management capabilities optimized for conversational AI training data.
|
|
14
|
+
|
|
15
|
+
Version: 2.0.0
|
|
16
|
+
|
|
17
|
+
Usage:
|
|
18
|
+
uv run dataset_manager.py init --repo_id username/dataset-name
|
|
19
|
+
uv run dataset_manager.py quick_setup --repo_id username/dataset-name --template chat
|
|
20
|
+
uv run dataset_manager.py add_rows --repo_id username/dataset-name --rows_json '[{"messages": [...]}]'
|
|
21
|
+
uv run dataset_manager.py stats --repo_id username/dataset-name
|
|
22
|
+
uv run dataset_manager.py list_templates
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
import os
|
|
26
|
+
import json
|
|
27
|
+
import time
|
|
28
|
+
import argparse
|
|
29
|
+
from pathlib import Path
|
|
30
|
+
from typing import List, Dict, Any, Optional
|
|
31
|
+
from huggingface_hub import HfApi, create_repo
|
|
32
|
+
from huggingface_hub.utils import HfHubHTTPError
|
|
33
|
+
|
|
34
|
+
# Configuration
|
|
35
|
+
HF_TOKEN = os.environ.get("HF_TOKEN")
|
|
36
|
+
EXAMPLES_DIR = Path(__file__).parent.parent / "examples"
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def init_dataset(repo_id, token=None, private=True):
|
|
40
|
+
"""
|
|
41
|
+
Initialize a new dataset repository on Hugging Face Hub.
|
|
42
|
+
"""
|
|
43
|
+
api = HfApi(token=token)
|
|
44
|
+
try:
|
|
45
|
+
create_repo(repo_id, repo_type="dataset", private=private, token=token)
|
|
46
|
+
print(f"Created dataset repository: {repo_id}")
|
|
47
|
+
except HfHubHTTPError as e:
|
|
48
|
+
if "409" in str(e):
|
|
49
|
+
print(f"Repository {repo_id} already exists.")
|
|
50
|
+
else:
|
|
51
|
+
raise e
|
|
52
|
+
|
|
53
|
+
# Create a basic README.md with metadata if it doesn't exist
|
|
54
|
+
readme_content = f"""---
|
|
55
|
+
license: mit
|
|
56
|
+
---
|
|
57
|
+
|
|
58
|
+
# {repo_id.split("/")[-1]}
|
|
59
|
+
|
|
60
|
+
This dataset was created using the Claude Dataset Skill.
|
|
61
|
+
"""
|
|
62
|
+
try:
|
|
63
|
+
api.upload_file(
|
|
64
|
+
path_or_fileobj=readme_content.encode("utf-8"),
|
|
65
|
+
path_in_repo="README.md",
|
|
66
|
+
repo_id=repo_id,
|
|
67
|
+
repo_type="dataset",
|
|
68
|
+
commit_message="Initialize dataset README",
|
|
69
|
+
)
|
|
70
|
+
except Exception as e:
|
|
71
|
+
print(f"Note: README might already exist or failed to update: {e}")
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
def define_config(repo_id, system_prompt=None, token=None):
|
|
75
|
+
"""
|
|
76
|
+
Define a configuration for the dataset, including a system prompt.
|
|
77
|
+
This saves a config.json file to the repository.
|
|
78
|
+
"""
|
|
79
|
+
api = HfApi(token=token)
|
|
80
|
+
|
|
81
|
+
config_data = {"dataset_config": {"version": "1.0", "created_at": time.time()}}
|
|
82
|
+
|
|
83
|
+
if system_prompt:
|
|
84
|
+
config_data["system_prompt"] = system_prompt
|
|
85
|
+
|
|
86
|
+
# Upload config.json
|
|
87
|
+
api.upload_file(
|
|
88
|
+
path_or_fileobj=json.dumps(config_data, indent=2).encode("utf-8"),
|
|
89
|
+
path_in_repo="config.json",
|
|
90
|
+
repo_id=repo_id,
|
|
91
|
+
repo_type="dataset",
|
|
92
|
+
commit_message="Update dataset configuration",
|
|
93
|
+
)
|
|
94
|
+
print(f"Configuration updated for {repo_id}")
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def load_dataset_template(template_name: str) -> Dict[str, Any]:
|
|
98
|
+
"""Load dataset template configuration from templates directory."""
|
|
99
|
+
template_path = EXAMPLES_DIR.parent / "templates" / f"{template_name}.json"
|
|
100
|
+
if not template_path.exists():
|
|
101
|
+
available_templates = [f.stem for f in (EXAMPLES_DIR.parent / "templates").glob("*.json")]
|
|
102
|
+
print(f"❌ Template '{template_name}' not found.")
|
|
103
|
+
print(f"Available templates: {', '.join(available_templates)}")
|
|
104
|
+
return {}
|
|
105
|
+
|
|
106
|
+
with open(template_path) as f:
|
|
107
|
+
return json.load(f)
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def validate_by_template(rows: List[Dict[str, Any]], template: Dict[str, Any]) -> bool:
|
|
111
|
+
"""Validate data according to template schema."""
|
|
112
|
+
if not template:
|
|
113
|
+
return False
|
|
114
|
+
|
|
115
|
+
schema = template.get("validation_schema", {})
|
|
116
|
+
required_fields = set(schema.get("required_fields", []))
|
|
117
|
+
recommended_fields = set(schema.get("recommended_fields", []))
|
|
118
|
+
field_types = schema.get("field_types", {})
|
|
119
|
+
|
|
120
|
+
for i, row in enumerate(rows):
|
|
121
|
+
# Check required fields
|
|
122
|
+
if not all(field in row for field in required_fields):
|
|
123
|
+
missing = required_fields - set(row.keys())
|
|
124
|
+
print(f"Row {i}: Missing required fields: {missing}")
|
|
125
|
+
return False
|
|
126
|
+
|
|
127
|
+
# Validate field types
|
|
128
|
+
for field, expected_type in field_types.items():
|
|
129
|
+
if field in row:
|
|
130
|
+
if not _validate_field_type(row[field], expected_type, f"Row {i}, field '{field}'"):
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
# Template-specific validation
|
|
134
|
+
if template["type"] == "chat":
|
|
135
|
+
if not _validate_chat_format(row, i):
|
|
136
|
+
return False
|
|
137
|
+
elif template["type"] == "classification":
|
|
138
|
+
if not _validate_classification_format(row, i):
|
|
139
|
+
return False
|
|
140
|
+
elif template["type"] == "tabular":
|
|
141
|
+
if not _validate_tabular_format(row, i):
|
|
142
|
+
return False
|
|
143
|
+
|
|
144
|
+
# Warn about missing recommended fields
|
|
145
|
+
missing_recommended = recommended_fields - set(row.keys())
|
|
146
|
+
if missing_recommended:
|
|
147
|
+
print(f"Row {i}: Recommended to include: {missing_recommended}")
|
|
148
|
+
|
|
149
|
+
print(f"✓ Validated {len(rows)} examples for {template['type']} dataset")
|
|
150
|
+
return True
|
|
151
|
+
|
|
152
|
+
|
|
153
|
+
def _validate_field_type(value: Any, expected_type: str, context: str) -> bool:
|
|
154
|
+
"""Validate individual field type."""
|
|
155
|
+
if expected_type.startswith("enum:"):
|
|
156
|
+
valid_values = expected_type[5:].split(",")
|
|
157
|
+
if value not in valid_values:
|
|
158
|
+
print(f"{context}: Invalid value '{value}'. Must be one of: {valid_values}")
|
|
159
|
+
return False
|
|
160
|
+
elif expected_type == "array" and not isinstance(value, list):
|
|
161
|
+
print(f"{context}: Expected array, got {type(value).__name__}")
|
|
162
|
+
return False
|
|
163
|
+
elif expected_type == "object" and not isinstance(value, dict):
|
|
164
|
+
print(f"{context}: Expected object, got {type(value).__name__}")
|
|
165
|
+
return False
|
|
166
|
+
elif expected_type == "string" and not isinstance(value, str):
|
|
167
|
+
print(f"{context}: Expected string, got {type(value).__name__}")
|
|
168
|
+
return False
|
|
169
|
+
elif expected_type == "number" and not isinstance(value, (int, float)):
|
|
170
|
+
print(f"{context}: Expected number, got {type(value).__name__}")
|
|
171
|
+
return False
|
|
172
|
+
|
|
173
|
+
return True
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
def _validate_chat_format(row: Dict[str, Any], row_index: int) -> bool:
|
|
177
|
+
"""Validate chat-specific format."""
|
|
178
|
+
messages = row.get("messages", [])
|
|
179
|
+
if not isinstance(messages, list) or len(messages) == 0:
|
|
180
|
+
print(f"Row {row_index}: 'messages' must be a non-empty list")
|
|
181
|
+
return False
|
|
182
|
+
|
|
183
|
+
valid_roles = {"user", "assistant", "tool", "system"}
|
|
184
|
+
for j, msg in enumerate(messages):
|
|
185
|
+
if not isinstance(msg, dict):
|
|
186
|
+
print(f"Row {row_index}, message {j}: Must be an object")
|
|
187
|
+
return False
|
|
188
|
+
if "role" not in msg or msg["role"] not in valid_roles:
|
|
189
|
+
print(f"Row {row_index}, message {j}: Invalid role. Use: {valid_roles}")
|
|
190
|
+
return False
|
|
191
|
+
if "content" not in msg:
|
|
192
|
+
print(f"Row {row_index}, message {j}: Missing 'content' field")
|
|
193
|
+
return False
|
|
194
|
+
|
|
195
|
+
return True
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
def _validate_classification_format(row: Dict[str, Any], row_index: int) -> bool:
|
|
199
|
+
"""Validate classification-specific format."""
|
|
200
|
+
if "text" not in row:
|
|
201
|
+
print(f"Row {row_index}: Missing 'text' field")
|
|
202
|
+
return False
|
|
203
|
+
if "label" not in row:
|
|
204
|
+
print(f"Row {row_index}: Missing 'label' field")
|
|
205
|
+
return False
|
|
206
|
+
|
|
207
|
+
return True
|
|
208
|
+
|
|
209
|
+
|
|
210
|
+
def _validate_tabular_format(row: Dict[str, Any], row_index: int) -> bool:
|
|
211
|
+
"""Validate tabular-specific format."""
|
|
212
|
+
if "data" not in row:
|
|
213
|
+
print(f"Row {row_index}: Missing 'data' field")
|
|
214
|
+
return False
|
|
215
|
+
if "columns" not in row:
|
|
216
|
+
print(f"Row {row_index}: Missing 'columns' field")
|
|
217
|
+
return False
|
|
218
|
+
|
|
219
|
+
data = row["data"]
|
|
220
|
+
columns = row["columns"]
|
|
221
|
+
|
|
222
|
+
if not isinstance(data, list):
|
|
223
|
+
print(f"Row {row_index}: 'data' must be an array")
|
|
224
|
+
return False
|
|
225
|
+
if not isinstance(columns, list):
|
|
226
|
+
print(f"Row {row_index}: 'columns' must be an array")
|
|
227
|
+
return False
|
|
228
|
+
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
def validate_training_data(rows: List[Dict[str, Any]], template_name: str = "chat") -> bool:
|
|
233
|
+
"""
|
|
234
|
+
Validate training data structure according to template.
|
|
235
|
+
Supports multiple dataset types with appropriate validation.
|
|
236
|
+
"""
|
|
237
|
+
template = load_dataset_template(template_name)
|
|
238
|
+
if not template:
|
|
239
|
+
print(f"❌ Could not load template '{template_name}', falling back to basic validation")
|
|
240
|
+
return _basic_validation(rows)
|
|
241
|
+
|
|
242
|
+
return validate_by_template(rows, template)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def _basic_validation(rows: List[Dict[str, Any]]) -> bool:
|
|
246
|
+
"""Basic validation when no template is available."""
|
|
247
|
+
for i, row in enumerate(rows):
|
|
248
|
+
if not isinstance(row, dict):
|
|
249
|
+
print(f"Row {i}: Must be a dictionary/object")
|
|
250
|
+
return False
|
|
251
|
+
print(f"✓ Basic validation passed for {len(rows)} rows")
|
|
252
|
+
return True
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def add_rows(
|
|
256
|
+
repo_id: str,
|
|
257
|
+
rows: List[Dict[str, Any]],
|
|
258
|
+
split: str = "train",
|
|
259
|
+
validate: bool = True,
|
|
260
|
+
template: str = "chat",
|
|
261
|
+
token: Optional[str] = None,
|
|
262
|
+
) -> None:
|
|
263
|
+
"""
|
|
264
|
+
Stream updates to the dataset by uploading a new chunk of rows.
|
|
265
|
+
Enhanced with validation for multiple dataset types.
|
|
266
|
+
|
|
267
|
+
Args:
|
|
268
|
+
repo_id: Repository identifier (username/dataset-name)
|
|
269
|
+
rows: List of training examples
|
|
270
|
+
split: Dataset split name (train, test, validation)
|
|
271
|
+
validate: Whether to validate data structure before upload
|
|
272
|
+
template: Dataset template type (chat, classification, qa, completion, tabular, custom)
|
|
273
|
+
token: HuggingFace API token
|
|
274
|
+
"""
|
|
275
|
+
api = HfApi(token=token)
|
|
276
|
+
|
|
277
|
+
if not rows:
|
|
278
|
+
print("No rows to add.")
|
|
279
|
+
return
|
|
280
|
+
|
|
281
|
+
# Validate training data structure
|
|
282
|
+
if validate and not validate_training_data(rows, template):
|
|
283
|
+
print("❌ Validation failed. Use --no-validate to skip validation.")
|
|
284
|
+
return
|
|
285
|
+
|
|
286
|
+
# Create a newline-delimited JSON string
|
|
287
|
+
jsonl_content = "\n".join(json.dumps(row) for row in rows)
|
|
288
|
+
|
|
289
|
+
# Generate a unique filename for this chunk
|
|
290
|
+
timestamp = int(time.time() * 1000)
|
|
291
|
+
filename = f"data/{split}-{timestamp}.jsonl"
|
|
292
|
+
|
|
293
|
+
try:
|
|
294
|
+
api.upload_file(
|
|
295
|
+
path_or_fileobj=jsonl_content.encode("utf-8"),
|
|
296
|
+
path_in_repo=filename,
|
|
297
|
+
repo_id=repo_id,
|
|
298
|
+
repo_type="dataset",
|
|
299
|
+
commit_message=f"Add {len(rows)} rows to {split} split",
|
|
300
|
+
)
|
|
301
|
+
print(f"✅ Added {len(rows)} rows to {repo_id} (split: {split})")
|
|
302
|
+
except Exception as e:
|
|
303
|
+
print(f"❌ Upload failed: {e}")
|
|
304
|
+
return
|
|
305
|
+
|
|
306
|
+
|
|
307
|
+
def load_template(template_name: str = "system_prompt_template.txt") -> str:
|
|
308
|
+
"""Load a template file from the examples directory."""
|
|
309
|
+
template_path = EXAMPLES_DIR / template_name
|
|
310
|
+
if template_path.exists():
|
|
311
|
+
return template_path.read_text()
|
|
312
|
+
else:
|
|
313
|
+
print(f"⚠️ Template {template_name} not found at {template_path}")
|
|
314
|
+
return ""
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
def quick_setup(repo_id: str, template_type: str = "chat", token: Optional[str] = None) -> None:
|
|
318
|
+
"""
|
|
319
|
+
Quick setup for different dataset types using templates.
|
|
320
|
+
|
|
321
|
+
Args:
|
|
322
|
+
repo_id: Repository identifier
|
|
323
|
+
template_type: Dataset template (chat, classification, qa, completion, tabular, custom)
|
|
324
|
+
token: HuggingFace API token
|
|
325
|
+
"""
|
|
326
|
+
print(f"🚀 Quick setup for {repo_id} with '{template_type}' template...")
|
|
327
|
+
|
|
328
|
+
# Load template configuration
|
|
329
|
+
template_config = load_dataset_template(template_type)
|
|
330
|
+
if not template_config:
|
|
331
|
+
print(f"❌ Could not load template '{template_type}'. Setup cancelled.")
|
|
332
|
+
return
|
|
333
|
+
|
|
334
|
+
# Initialize repository
|
|
335
|
+
init_dataset(repo_id, token=token, private=True)
|
|
336
|
+
|
|
337
|
+
# Configure with template system prompt
|
|
338
|
+
system_prompt = template_config.get("system_prompt", "")
|
|
339
|
+
if system_prompt:
|
|
340
|
+
define_config(repo_id, system_prompt=system_prompt, token=token)
|
|
341
|
+
|
|
342
|
+
# Add template examples
|
|
343
|
+
examples = template_config.get("examples", [])
|
|
344
|
+
if examples:
|
|
345
|
+
add_rows(repo_id, examples, template=template_type, token=token)
|
|
346
|
+
print(f"✅ Added {len(examples)} example(s) from template")
|
|
347
|
+
|
|
348
|
+
print(f"✅ Quick setup complete for {repo_id}")
|
|
349
|
+
print(f"📊 Dataset type: {template_config.get('description', 'No description')}")
|
|
350
|
+
|
|
351
|
+
# Show next steps
|
|
352
|
+
print(f"\n📋 Next steps:")
|
|
353
|
+
print(
|
|
354
|
+
f"1. Add more data: python scripts/dataset_manager.py add_rows --repo_id {repo_id} --template {template_type} --rows_json 'your_data.json'"
|
|
355
|
+
)
|
|
356
|
+
print(f"2. View stats: python scripts/dataset_manager.py stats --repo_id {repo_id}")
|
|
357
|
+
print(f"3. Explore at: https://huggingface.co/datasets/{repo_id}")
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def show_stats(repo_id: str, token: Optional[str] = None) -> None:
|
|
361
|
+
"""Display statistics about the dataset."""
|
|
362
|
+
api = HfApi(token=token)
|
|
363
|
+
|
|
364
|
+
try:
|
|
365
|
+
# Get repository info
|
|
366
|
+
repo_info = api.repo_info(repo_id, repo_type="dataset")
|
|
367
|
+
print(f"\n📊 Dataset Stats: {repo_id}")
|
|
368
|
+
print(f"Created: {repo_info.created_at}")
|
|
369
|
+
print(f"Updated: {repo_info.last_modified}")
|
|
370
|
+
print(f"Private: {repo_info.private}")
|
|
371
|
+
|
|
372
|
+
# List files
|
|
373
|
+
files = api.list_repo_files(repo_id, repo_type="dataset")
|
|
374
|
+
data_files = [f for f in files if f.startswith("data/")]
|
|
375
|
+
print(f"Data files: {len(data_files)}")
|
|
376
|
+
|
|
377
|
+
if "config.json" in files:
|
|
378
|
+
print("✅ Configuration present")
|
|
379
|
+
else:
|
|
380
|
+
print("⚠️ No configuration found")
|
|
381
|
+
|
|
382
|
+
except Exception as e:
|
|
383
|
+
print(f"❌ Failed to get stats: {e}")
|
|
384
|
+
|
|
385
|
+
|
|
386
|
+
def list_available_templates() -> None:
|
|
387
|
+
"""List all available dataset templates with descriptions."""
|
|
388
|
+
templates_dir = EXAMPLES_DIR.parent / "templates"
|
|
389
|
+
|
|
390
|
+
if not templates_dir.exists():
|
|
391
|
+
print("❌ Templates directory not found")
|
|
392
|
+
return
|
|
393
|
+
|
|
394
|
+
print("\n📋 Available Dataset Templates:")
|
|
395
|
+
print("=" * 50)
|
|
396
|
+
|
|
397
|
+
for template_file in templates_dir.glob("*.json"):
|
|
398
|
+
try:
|
|
399
|
+
with open(template_file) as f:
|
|
400
|
+
template = json.load(f)
|
|
401
|
+
|
|
402
|
+
name = template_file.stem
|
|
403
|
+
desc = template.get("description", "No description available")
|
|
404
|
+
template_type = template.get("type", name)
|
|
405
|
+
|
|
406
|
+
print(f"\n🏷️ {name}")
|
|
407
|
+
print(f" Type: {template_type}")
|
|
408
|
+
print(f" Description: {desc}")
|
|
409
|
+
|
|
410
|
+
# Show required fields
|
|
411
|
+
schema = template.get("validation_schema", {})
|
|
412
|
+
required = schema.get("required_fields", [])
|
|
413
|
+
if required:
|
|
414
|
+
print(f" Required fields: {', '.join(required)}")
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
print(f"❌ Error loading template {template_file.name}: {e}")
|
|
418
|
+
|
|
419
|
+
print(
|
|
420
|
+
f"\n💡 Usage: python scripts/dataset_manager.py quick_setup --repo_id your-username/dataset-name --template TEMPLATE_NAME"
|
|
421
|
+
)
|
|
422
|
+
print(f"📚 Example templates directory: {templates_dir}")
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
if __name__ == "__main__":
|
|
426
|
+
parser = argparse.ArgumentParser(description="Hugging Face Dataset Manager")
|
|
427
|
+
subparsers = parser.add_subparsers(dest="command", required=True)
|
|
428
|
+
|
|
429
|
+
# Init command
|
|
430
|
+
init_parser = subparsers.add_parser("init", help="Initialize a new dataset")
|
|
431
|
+
init_parser.add_argument("--repo_id", required=True, help="Repository ID (user/repo_name)")
|
|
432
|
+
init_parser.add_argument("--private", action="store_true", help="Make repository private")
|
|
433
|
+
|
|
434
|
+
# Config command
|
|
435
|
+
config_parser = subparsers.add_parser("config", help="Setup dataset config")
|
|
436
|
+
config_parser.add_argument("--repo_id", required=True, help="Repository ID")
|
|
437
|
+
config_parser.add_argument("--system_prompt", help="System prompt to store in config")
|
|
438
|
+
|
|
439
|
+
# Add rows command
|
|
440
|
+
add_parser = subparsers.add_parser("add_rows", help="Add rows to the dataset")
|
|
441
|
+
add_parser.add_argument("--repo_id", required=True, help="Repository ID")
|
|
442
|
+
add_parser.add_argument("--split", default="train", help="Dataset split (e.g., train, test)")
|
|
443
|
+
add_parser.add_argument(
|
|
444
|
+
"--template",
|
|
445
|
+
default="chat",
|
|
446
|
+
choices=[
|
|
447
|
+
"chat",
|
|
448
|
+
"classification",
|
|
449
|
+
"qa",
|
|
450
|
+
"completion",
|
|
451
|
+
"tabular",
|
|
452
|
+
"custom",
|
|
453
|
+
],
|
|
454
|
+
help="Dataset template type for validation",
|
|
455
|
+
)
|
|
456
|
+
add_parser.add_argument(
|
|
457
|
+
"--rows_json",
|
|
458
|
+
required=True,
|
|
459
|
+
help="JSON string containing a list of rows",
|
|
460
|
+
)
|
|
461
|
+
add_parser.add_argument(
|
|
462
|
+
"--no-validate",
|
|
463
|
+
dest="validate",
|
|
464
|
+
action="store_false",
|
|
465
|
+
help="Skip data validation",
|
|
466
|
+
)
|
|
467
|
+
|
|
468
|
+
# Quick setup command
|
|
469
|
+
setup_parser = subparsers.add_parser("quick_setup", help="Quick setup with template")
|
|
470
|
+
setup_parser.add_argument("--repo_id", required=True, help="Repository ID")
|
|
471
|
+
setup_parser.add_argument(
|
|
472
|
+
"--template",
|
|
473
|
+
default="chat",
|
|
474
|
+
choices=[
|
|
475
|
+
"chat",
|
|
476
|
+
"classification",
|
|
477
|
+
"qa",
|
|
478
|
+
"completion",
|
|
479
|
+
"tabular",
|
|
480
|
+
"custom",
|
|
481
|
+
],
|
|
482
|
+
help="Dataset template type",
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
# Stats command
|
|
486
|
+
stats_parser = subparsers.add_parser("stats", help="Show dataset statistics")
|
|
487
|
+
stats_parser.add_argument("--repo_id", required=True, help="Repository ID")
|
|
488
|
+
|
|
489
|
+
# List templates command
|
|
490
|
+
templates_parser = subparsers.add_parser("list_templates", help="List available dataset templates")
|
|
491
|
+
|
|
492
|
+
args = parser.parse_args()
|
|
493
|
+
|
|
494
|
+
token = HF_TOKEN
|
|
495
|
+
if not token:
|
|
496
|
+
print("Warning: HF_TOKEN environment variable not set.")
|
|
497
|
+
|
|
498
|
+
if args.command == "init":
|
|
499
|
+
init_dataset(args.repo_id, token=token, private=args.private)
|
|
500
|
+
elif args.command == "config":
|
|
501
|
+
define_config(args.repo_id, system_prompt=args.system_prompt, token=token)
|
|
502
|
+
elif args.command == "add_rows":
|
|
503
|
+
try:
|
|
504
|
+
rows = json.loads(args.rows_json)
|
|
505
|
+
if not isinstance(rows, list):
|
|
506
|
+
raise ValueError("rows_json must be a JSON list of objects")
|
|
507
|
+
add_rows(
|
|
508
|
+
args.repo_id,
|
|
509
|
+
rows,
|
|
510
|
+
split=args.split,
|
|
511
|
+
template=args.template,
|
|
512
|
+
validate=args.validate,
|
|
513
|
+
token=token,
|
|
514
|
+
)
|
|
515
|
+
except json.JSONDecodeError:
|
|
516
|
+
print("Error: Invalid JSON provided for --rows_json")
|
|
517
|
+
elif args.command == "quick_setup":
|
|
518
|
+
quick_setup(args.repo_id, template_type=args.template, token=token)
|
|
519
|
+
elif args.command == "stats":
|
|
520
|
+
show_stats(args.repo_id, token=token)
|
|
521
|
+
elif args.command == "list_templates":
|
|
522
|
+
list_available_templates()
|