@synsci/cli-darwin-x64 1.1.49
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/skills/accelerate/SKILL.md +332 -0
- package/bin/skills/accelerate/references/custom-plugins.md +453 -0
- package/bin/skills/accelerate/references/megatron-integration.md +489 -0
- package/bin/skills/accelerate/references/performance.md +525 -0
- package/bin/skills/audiocraft/SKILL.md +564 -0
- package/bin/skills/audiocraft/references/advanced-usage.md +666 -0
- package/bin/skills/audiocraft/references/troubleshooting.md +504 -0
- package/bin/skills/autogpt/SKILL.md +403 -0
- package/bin/skills/autogpt/references/advanced-usage.md +535 -0
- package/bin/skills/autogpt/references/troubleshooting.md +420 -0
- package/bin/skills/awq/SKILL.md +310 -0
- package/bin/skills/awq/references/advanced-usage.md +324 -0
- package/bin/skills/awq/references/troubleshooting.md +344 -0
- package/bin/skills/axolotl/SKILL.md +158 -0
- package/bin/skills/axolotl/references/api.md +5548 -0
- package/bin/skills/axolotl/references/dataset-formats.md +1029 -0
- package/bin/skills/axolotl/references/index.md +15 -0
- package/bin/skills/axolotl/references/other.md +3563 -0
- package/bin/skills/bigcode-evaluation-harness/SKILL.md +405 -0
- package/bin/skills/bigcode-evaluation-harness/references/benchmarks.md +393 -0
- package/bin/skills/bigcode-evaluation-harness/references/custom-tasks.md +424 -0
- package/bin/skills/bigcode-evaluation-harness/references/issues.md +394 -0
- package/bin/skills/bitsandbytes/SKILL.md +411 -0
- package/bin/skills/bitsandbytes/references/memory-optimization.md +521 -0
- package/bin/skills/bitsandbytes/references/qlora-training.md +521 -0
- package/bin/skills/bitsandbytes/references/quantization-formats.md +447 -0
- package/bin/skills/blip-2/SKILL.md +564 -0
- package/bin/skills/blip-2/references/advanced-usage.md +680 -0
- package/bin/skills/blip-2/references/troubleshooting.md +526 -0
- package/bin/skills/chroma/SKILL.md +406 -0
- package/bin/skills/chroma/references/integration.md +38 -0
- package/bin/skills/clip/SKILL.md +253 -0
- package/bin/skills/clip/references/applications.md +207 -0
- package/bin/skills/constitutional-ai/SKILL.md +290 -0
- package/bin/skills/crewai/SKILL.md +498 -0
- package/bin/skills/crewai/references/flows.md +438 -0
- package/bin/skills/crewai/references/tools.md +429 -0
- package/bin/skills/crewai/references/troubleshooting.md +480 -0
- package/bin/skills/deepspeed/SKILL.md +141 -0
- package/bin/skills/deepspeed/references/08.md +17 -0
- package/bin/skills/deepspeed/references/09.md +173 -0
- package/bin/skills/deepspeed/references/2020.md +378 -0
- package/bin/skills/deepspeed/references/2023.md +279 -0
- package/bin/skills/deepspeed/references/assets.md +179 -0
- package/bin/skills/deepspeed/references/index.md +35 -0
- package/bin/skills/deepspeed/references/mii.md +118 -0
- package/bin/skills/deepspeed/references/other.md +1191 -0
- package/bin/skills/deepspeed/references/tutorials.md +6554 -0
- package/bin/skills/dspy/SKILL.md +590 -0
- package/bin/skills/dspy/references/examples.md +663 -0
- package/bin/skills/dspy/references/modules.md +475 -0
- package/bin/skills/dspy/references/optimizers.md +566 -0
- package/bin/skills/faiss/SKILL.md +221 -0
- package/bin/skills/faiss/references/index_types.md +280 -0
- package/bin/skills/flash-attention/SKILL.md +367 -0
- package/bin/skills/flash-attention/references/benchmarks.md +215 -0
- package/bin/skills/flash-attention/references/transformers-integration.md +293 -0
- package/bin/skills/gguf/SKILL.md +427 -0
- package/bin/skills/gguf/references/advanced-usage.md +504 -0
- package/bin/skills/gguf/references/troubleshooting.md +442 -0
- package/bin/skills/gptq/SKILL.md +450 -0
- package/bin/skills/gptq/references/calibration.md +337 -0
- package/bin/skills/gptq/references/integration.md +129 -0
- package/bin/skills/gptq/references/troubleshooting.md +95 -0
- package/bin/skills/grpo-rl-training/README.md +97 -0
- package/bin/skills/grpo-rl-training/SKILL.md +572 -0
- package/bin/skills/grpo-rl-training/examples/reward_functions_library.py +393 -0
- package/bin/skills/grpo-rl-training/templates/basic_grpo_training.py +228 -0
- package/bin/skills/guidance/SKILL.md +572 -0
- package/bin/skills/guidance/references/backends.md +554 -0
- package/bin/skills/guidance/references/constraints.md +674 -0
- package/bin/skills/guidance/references/examples.md +767 -0
- package/bin/skills/hqq/SKILL.md +445 -0
- package/bin/skills/hqq/references/advanced-usage.md +528 -0
- package/bin/skills/hqq/references/troubleshooting.md +503 -0
- package/bin/skills/hugging-face-cli/SKILL.md +191 -0
- package/bin/skills/hugging-face-cli/references/commands.md +954 -0
- package/bin/skills/hugging-face-cli/references/examples.md +374 -0
- package/bin/skills/hugging-face-datasets/SKILL.md +547 -0
- package/bin/skills/hugging-face-datasets/examples/diverse_training_examples.json +239 -0
- package/bin/skills/hugging-face-datasets/examples/system_prompt_template.txt +196 -0
- package/bin/skills/hugging-face-datasets/examples/training_examples.json +176 -0
- package/bin/skills/hugging-face-datasets/scripts/dataset_manager.py +522 -0
- package/bin/skills/hugging-face-datasets/scripts/sql_manager.py +844 -0
- package/bin/skills/hugging-face-datasets/templates/chat.json +55 -0
- package/bin/skills/hugging-face-datasets/templates/classification.json +62 -0
- package/bin/skills/hugging-face-datasets/templates/completion.json +51 -0
- package/bin/skills/hugging-face-datasets/templates/custom.json +75 -0
- package/bin/skills/hugging-face-datasets/templates/qa.json +54 -0
- package/bin/skills/hugging-face-datasets/templates/tabular.json +81 -0
- package/bin/skills/hugging-face-evaluation/SKILL.md +656 -0
- package/bin/skills/hugging-face-evaluation/examples/USAGE_EXAMPLES.md +382 -0
- package/bin/skills/hugging-face-evaluation/examples/artificial_analysis_to_hub.py +141 -0
- package/bin/skills/hugging-face-evaluation/examples/example_readme_tables.md +135 -0
- package/bin/skills/hugging-face-evaluation/examples/metric_mapping.json +50 -0
- package/bin/skills/hugging-face-evaluation/requirements.txt +20 -0
- package/bin/skills/hugging-face-evaluation/scripts/evaluation_manager.py +1374 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_eval_uv.py +104 -0
- package/bin/skills/hugging-face-evaluation/scripts/inspect_vllm_uv.py +317 -0
- package/bin/skills/hugging-face-evaluation/scripts/lighteval_vllm_uv.py +303 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_eval_job.py +98 -0
- package/bin/skills/hugging-face-evaluation/scripts/run_vllm_eval_job.py +331 -0
- package/bin/skills/hugging-face-evaluation/scripts/test_extraction.py +206 -0
- package/bin/skills/hugging-face-jobs/SKILL.md +1041 -0
- package/bin/skills/hugging-face-jobs/index.html +216 -0
- package/bin/skills/hugging-face-jobs/references/hardware_guide.md +336 -0
- package/bin/skills/hugging-face-jobs/references/hub_saving.md +352 -0
- package/bin/skills/hugging-face-jobs/references/token_usage.md +546 -0
- package/bin/skills/hugging-face-jobs/references/troubleshooting.md +475 -0
- package/bin/skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
- package/bin/skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
- package/bin/skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
- package/bin/skills/hugging-face-model-trainer/SKILL.md +711 -0
- package/bin/skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
- package/bin/skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
- package/bin/skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
- package/bin/skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
- package/bin/skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
- package/bin/skills/hugging-face-model-trainer/references/training_methods.md +150 -0
- package/bin/skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
- package/bin/skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
- package/bin/skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
- package/bin/skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
- package/bin/skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
- package/bin/skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
- package/bin/skills/hugging-face-paper-publisher/SKILL.md +627 -0
- package/bin/skills/hugging-face-paper-publisher/examples/example_usage.md +327 -0
- package/bin/skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
- package/bin/skills/hugging-face-paper-publisher/scripts/paper_manager.py +508 -0
- package/bin/skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
- package/bin/skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
- package/bin/skills/hugging-face-paper-publisher/templates/modern.md +319 -0
- package/bin/skills/hugging-face-paper-publisher/templates/standard.md +201 -0
- package/bin/skills/hugging-face-tool-builder/SKILL.md +115 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.py +57 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.sh +40 -0
- package/bin/skills/hugging-face-tool-builder/references/baseline_hf_api.tsx +57 -0
- package/bin/skills/hugging-face-tool-builder/references/find_models_by_paper.sh +230 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_enrich_models.sh +96 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_card_frontmatter.sh +188 -0
- package/bin/skills/hugging-face-tool-builder/references/hf_model_papers_auth.sh +171 -0
- package/bin/skills/hugging-face-trackio/SKILL.md +65 -0
- package/bin/skills/hugging-face-trackio/references/logging_metrics.md +206 -0
- package/bin/skills/hugging-face-trackio/references/retrieving_metrics.md +223 -0
- package/bin/skills/huggingface-tokenizers/SKILL.md +516 -0
- package/bin/skills/huggingface-tokenizers/references/algorithms.md +653 -0
- package/bin/skills/huggingface-tokenizers/references/integration.md +637 -0
- package/bin/skills/huggingface-tokenizers/references/pipeline.md +723 -0
- package/bin/skills/huggingface-tokenizers/references/training.md +565 -0
- package/bin/skills/instructor/SKILL.md +740 -0
- package/bin/skills/instructor/references/examples.md +107 -0
- package/bin/skills/instructor/references/providers.md +70 -0
- package/bin/skills/instructor/references/validation.md +606 -0
- package/bin/skills/knowledge-distillation/SKILL.md +458 -0
- package/bin/skills/knowledge-distillation/references/minillm.md +334 -0
- package/bin/skills/lambda-labs/SKILL.md +545 -0
- package/bin/skills/lambda-labs/references/advanced-usage.md +611 -0
- package/bin/skills/lambda-labs/references/troubleshooting.md +530 -0
- package/bin/skills/langchain/SKILL.md +480 -0
- package/bin/skills/langchain/references/agents.md +499 -0
- package/bin/skills/langchain/references/integration.md +562 -0
- package/bin/skills/langchain/references/rag.md +600 -0
- package/bin/skills/langsmith/SKILL.md +422 -0
- package/bin/skills/langsmith/references/advanced-usage.md +548 -0
- package/bin/skills/langsmith/references/troubleshooting.md +537 -0
- package/bin/skills/litgpt/SKILL.md +469 -0
- package/bin/skills/litgpt/references/custom-models.md +568 -0
- package/bin/skills/litgpt/references/distributed-training.md +451 -0
- package/bin/skills/litgpt/references/supported-models.md +336 -0
- package/bin/skills/litgpt/references/training-recipes.md +619 -0
- package/bin/skills/llama-cpp/SKILL.md +258 -0
- package/bin/skills/llama-cpp/references/optimization.md +89 -0
- package/bin/skills/llama-cpp/references/quantization.md +213 -0
- package/bin/skills/llama-cpp/references/server.md +125 -0
- package/bin/skills/llama-factory/SKILL.md +80 -0
- package/bin/skills/llama-factory/references/_images.md +23 -0
- package/bin/skills/llama-factory/references/advanced.md +1055 -0
- package/bin/skills/llama-factory/references/getting_started.md +349 -0
- package/bin/skills/llama-factory/references/index.md +19 -0
- package/bin/skills/llama-factory/references/other.md +31 -0
- package/bin/skills/llamaguard/SKILL.md +337 -0
- package/bin/skills/llamaindex/SKILL.md +569 -0
- package/bin/skills/llamaindex/references/agents.md +83 -0
- package/bin/skills/llamaindex/references/data_connectors.md +108 -0
- package/bin/skills/llamaindex/references/query_engines.md +406 -0
- package/bin/skills/llava/SKILL.md +304 -0
- package/bin/skills/llava/references/training.md +197 -0
- package/bin/skills/lm-evaluation-harness/SKILL.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/api-evaluation.md +490 -0
- package/bin/skills/lm-evaluation-harness/references/benchmark-guide.md +488 -0
- package/bin/skills/lm-evaluation-harness/references/custom-tasks.md +602 -0
- package/bin/skills/lm-evaluation-harness/references/distributed-eval.md +519 -0
- package/bin/skills/long-context/SKILL.md +536 -0
- package/bin/skills/long-context/references/extension_methods.md +468 -0
- package/bin/skills/long-context/references/fine_tuning.md +611 -0
- package/bin/skills/long-context/references/rope.md +402 -0
- package/bin/skills/mamba/SKILL.md +260 -0
- package/bin/skills/mamba/references/architecture-details.md +206 -0
- package/bin/skills/mamba/references/benchmarks.md +255 -0
- package/bin/skills/mamba/references/training-guide.md +388 -0
- package/bin/skills/megatron-core/SKILL.md +366 -0
- package/bin/skills/megatron-core/references/benchmarks.md +249 -0
- package/bin/skills/megatron-core/references/parallelism-guide.md +404 -0
- package/bin/skills/megatron-core/references/production-examples.md +473 -0
- package/bin/skills/megatron-core/references/training-recipes.md +547 -0
- package/bin/skills/miles/SKILL.md +315 -0
- package/bin/skills/miles/references/api-reference.md +141 -0
- package/bin/skills/miles/references/troubleshooting.md +352 -0
- package/bin/skills/mlflow/SKILL.md +704 -0
- package/bin/skills/mlflow/references/deployment.md +744 -0
- package/bin/skills/mlflow/references/model-registry.md +770 -0
- package/bin/skills/mlflow/references/tracking.md +680 -0
- package/bin/skills/modal/SKILL.md +341 -0
- package/bin/skills/modal/references/advanced-usage.md +503 -0
- package/bin/skills/modal/references/troubleshooting.md +494 -0
- package/bin/skills/model-merging/SKILL.md +539 -0
- package/bin/skills/model-merging/references/evaluation.md +462 -0
- package/bin/skills/model-merging/references/examples.md +428 -0
- package/bin/skills/model-merging/references/methods.md +352 -0
- package/bin/skills/model-pruning/SKILL.md +495 -0
- package/bin/skills/model-pruning/references/wanda.md +347 -0
- package/bin/skills/moe-training/SKILL.md +526 -0
- package/bin/skills/moe-training/references/architectures.md +432 -0
- package/bin/skills/moe-training/references/inference.md +348 -0
- package/bin/skills/moe-training/references/training.md +425 -0
- package/bin/skills/nanogpt/SKILL.md +290 -0
- package/bin/skills/nanogpt/references/architecture.md +382 -0
- package/bin/skills/nanogpt/references/data.md +476 -0
- package/bin/skills/nanogpt/references/training.md +564 -0
- package/bin/skills/nemo-curator/SKILL.md +383 -0
- package/bin/skills/nemo-curator/references/deduplication.md +87 -0
- package/bin/skills/nemo-curator/references/filtering.md +102 -0
- package/bin/skills/nemo-evaluator/SKILL.md +494 -0
- package/bin/skills/nemo-evaluator/references/adapter-system.md +340 -0
- package/bin/skills/nemo-evaluator/references/configuration.md +447 -0
- package/bin/skills/nemo-evaluator/references/custom-benchmarks.md +315 -0
- package/bin/skills/nemo-evaluator/references/execution-backends.md +361 -0
- package/bin/skills/nemo-guardrails/SKILL.md +297 -0
- package/bin/skills/nnsight/SKILL.md +436 -0
- package/bin/skills/nnsight/references/README.md +78 -0
- package/bin/skills/nnsight/references/api.md +344 -0
- package/bin/skills/nnsight/references/tutorials.md +300 -0
- package/bin/skills/openrlhf/SKILL.md +249 -0
- package/bin/skills/openrlhf/references/algorithm-comparison.md +404 -0
- package/bin/skills/openrlhf/references/custom-rewards.md +530 -0
- package/bin/skills/openrlhf/references/hybrid-engine.md +287 -0
- package/bin/skills/openrlhf/references/multi-node-training.md +454 -0
- package/bin/skills/outlines/SKILL.md +652 -0
- package/bin/skills/outlines/references/backends.md +615 -0
- package/bin/skills/outlines/references/examples.md +773 -0
- package/bin/skills/outlines/references/json_generation.md +652 -0
- package/bin/skills/peft/SKILL.md +431 -0
- package/bin/skills/peft/references/advanced-usage.md +514 -0
- package/bin/skills/peft/references/troubleshooting.md +480 -0
- package/bin/skills/phoenix/SKILL.md +475 -0
- package/bin/skills/phoenix/references/advanced-usage.md +619 -0
- package/bin/skills/phoenix/references/troubleshooting.md +538 -0
- package/bin/skills/pinecone/SKILL.md +358 -0
- package/bin/skills/pinecone/references/deployment.md +181 -0
- package/bin/skills/pytorch-fsdp/SKILL.md +126 -0
- package/bin/skills/pytorch-fsdp/references/index.md +7 -0
- package/bin/skills/pytorch-fsdp/references/other.md +4249 -0
- package/bin/skills/pytorch-lightning/SKILL.md +346 -0
- package/bin/skills/pytorch-lightning/references/callbacks.md +436 -0
- package/bin/skills/pytorch-lightning/references/distributed.md +490 -0
- package/bin/skills/pytorch-lightning/references/hyperparameter-tuning.md +556 -0
- package/bin/skills/pyvene/SKILL.md +473 -0
- package/bin/skills/pyvene/references/README.md +73 -0
- package/bin/skills/pyvene/references/api.md +383 -0
- package/bin/skills/pyvene/references/tutorials.md +376 -0
- package/bin/skills/qdrant/SKILL.md +493 -0
- package/bin/skills/qdrant/references/advanced-usage.md +648 -0
- package/bin/skills/qdrant/references/troubleshooting.md +631 -0
- package/bin/skills/ray-data/SKILL.md +326 -0
- package/bin/skills/ray-data/references/integration.md +82 -0
- package/bin/skills/ray-data/references/transformations.md +83 -0
- package/bin/skills/ray-train/SKILL.md +406 -0
- package/bin/skills/ray-train/references/multi-node.md +628 -0
- package/bin/skills/rwkv/SKILL.md +260 -0
- package/bin/skills/rwkv/references/architecture-details.md +344 -0
- package/bin/skills/rwkv/references/rwkv7.md +386 -0
- package/bin/skills/rwkv/references/state-management.md +369 -0
- package/bin/skills/saelens/SKILL.md +386 -0
- package/bin/skills/saelens/references/README.md +70 -0
- package/bin/skills/saelens/references/api.md +333 -0
- package/bin/skills/saelens/references/tutorials.md +318 -0
- package/bin/skills/segment-anything/SKILL.md +500 -0
- package/bin/skills/segment-anything/references/advanced-usage.md +589 -0
- package/bin/skills/segment-anything/references/troubleshooting.md +484 -0
- package/bin/skills/sentence-transformers/SKILL.md +255 -0
- package/bin/skills/sentence-transformers/references/models.md +123 -0
- package/bin/skills/sentencepiece/SKILL.md +235 -0
- package/bin/skills/sentencepiece/references/algorithms.md +200 -0
- package/bin/skills/sentencepiece/references/training.md +304 -0
- package/bin/skills/sglang/SKILL.md +442 -0
- package/bin/skills/sglang/references/deployment.md +490 -0
- package/bin/skills/sglang/references/radix-attention.md +413 -0
- package/bin/skills/sglang/references/structured-generation.md +541 -0
- package/bin/skills/simpo/SKILL.md +219 -0
- package/bin/skills/simpo/references/datasets.md +478 -0
- package/bin/skills/simpo/references/hyperparameters.md +452 -0
- package/bin/skills/simpo/references/loss-functions.md +350 -0
- package/bin/skills/skypilot/SKILL.md +509 -0
- package/bin/skills/skypilot/references/advanced-usage.md +491 -0
- package/bin/skills/skypilot/references/troubleshooting.md +570 -0
- package/bin/skills/slime/SKILL.md +464 -0
- package/bin/skills/slime/references/api-reference.md +392 -0
- package/bin/skills/slime/references/troubleshooting.md +386 -0
- package/bin/skills/speculative-decoding/SKILL.md +467 -0
- package/bin/skills/speculative-decoding/references/lookahead.md +309 -0
- package/bin/skills/speculative-decoding/references/medusa.md +350 -0
- package/bin/skills/stable-diffusion/SKILL.md +519 -0
- package/bin/skills/stable-diffusion/references/advanced-usage.md +716 -0
- package/bin/skills/stable-diffusion/references/troubleshooting.md +555 -0
- package/bin/skills/tensorboard/SKILL.md +629 -0
- package/bin/skills/tensorboard/references/integrations.md +638 -0
- package/bin/skills/tensorboard/references/profiling.md +545 -0
- package/bin/skills/tensorboard/references/visualization.md +620 -0
- package/bin/skills/tensorrt-llm/SKILL.md +187 -0
- package/bin/skills/tensorrt-llm/references/multi-gpu.md +298 -0
- package/bin/skills/tensorrt-llm/references/optimization.md +242 -0
- package/bin/skills/tensorrt-llm/references/serving.md +470 -0
- package/bin/skills/tinker/SKILL.md +362 -0
- package/bin/skills/tinker/references/api-reference.md +168 -0
- package/bin/skills/tinker/references/getting-started.md +157 -0
- package/bin/skills/tinker/references/loss-functions.md +163 -0
- package/bin/skills/tinker/references/models-and-lora.md +139 -0
- package/bin/skills/tinker/references/recipes.md +280 -0
- package/bin/skills/tinker/references/reinforcement-learning.md +212 -0
- package/bin/skills/tinker/references/rendering.md +243 -0
- package/bin/skills/tinker/references/supervised-learning.md +232 -0
- package/bin/skills/tinker-training-cost/SKILL.md +187 -0
- package/bin/skills/tinker-training-cost/scripts/calculate_cost.py +123 -0
- package/bin/skills/torchforge/SKILL.md +433 -0
- package/bin/skills/torchforge/references/api-reference.md +327 -0
- package/bin/skills/torchforge/references/troubleshooting.md +409 -0
- package/bin/skills/torchtitan/SKILL.md +358 -0
- package/bin/skills/torchtitan/references/checkpoint.md +181 -0
- package/bin/skills/torchtitan/references/custom-models.md +258 -0
- package/bin/skills/torchtitan/references/float8.md +133 -0
- package/bin/skills/torchtitan/references/fsdp.md +126 -0
- package/bin/skills/transformer-lens/SKILL.md +346 -0
- package/bin/skills/transformer-lens/references/README.md +54 -0
- package/bin/skills/transformer-lens/references/api.md +362 -0
- package/bin/skills/transformer-lens/references/tutorials.md +339 -0
- package/bin/skills/trl-fine-tuning/SKILL.md +455 -0
- package/bin/skills/trl-fine-tuning/references/dpo-variants.md +227 -0
- package/bin/skills/trl-fine-tuning/references/online-rl.md +82 -0
- package/bin/skills/trl-fine-tuning/references/reward-modeling.md +122 -0
- package/bin/skills/trl-fine-tuning/references/sft-training.md +168 -0
- package/bin/skills/unsloth/SKILL.md +80 -0
- package/bin/skills/unsloth/references/index.md +7 -0
- package/bin/skills/unsloth/references/llms-full.md +16799 -0
- package/bin/skills/unsloth/references/llms-txt.md +12044 -0
- package/bin/skills/unsloth/references/llms.md +82 -0
- package/bin/skills/verl/SKILL.md +391 -0
- package/bin/skills/verl/references/api-reference.md +301 -0
- package/bin/skills/verl/references/troubleshooting.md +391 -0
- package/bin/skills/vllm/SKILL.md +364 -0
- package/bin/skills/vllm/references/optimization.md +226 -0
- package/bin/skills/vllm/references/quantization.md +284 -0
- package/bin/skills/vllm/references/server-deployment.md +255 -0
- package/bin/skills/vllm/references/troubleshooting.md +447 -0
- package/bin/skills/weights-and-biases/SKILL.md +590 -0
- package/bin/skills/weights-and-biases/references/artifacts.md +584 -0
- package/bin/skills/weights-and-biases/references/integrations.md +700 -0
- package/bin/skills/weights-and-biases/references/sweeps.md +847 -0
- package/bin/skills/whisper/SKILL.md +317 -0
- package/bin/skills/whisper/references/languages.md +189 -0
- package/bin/synsc +0 -0
- package/package.json +10 -0
|
@@ -0,0 +1,600 @@
|
|
|
1
|
+
# LangChain RAG Guide
|
|
2
|
+
|
|
3
|
+
Complete guide to Retrieval-Augmented Generation with LangChain.
|
|
4
|
+
|
|
5
|
+
## What is RAG?
|
|
6
|
+
|
|
7
|
+
**RAG (Retrieval-Augmented Generation)** combines:
|
|
8
|
+
1. **Retrieval**: Find relevant documents from knowledge base
|
|
9
|
+
2. **Generation**: LLM generates answer using retrieved context
|
|
10
|
+
|
|
11
|
+
**Benefits**:
|
|
12
|
+
- Reduce hallucinations
|
|
13
|
+
- Up-to-date information
|
|
14
|
+
- Domain-specific knowledge
|
|
15
|
+
- Source citations
|
|
16
|
+
|
|
17
|
+
## RAG pipeline components
|
|
18
|
+
|
|
19
|
+
### 1. Document loading
|
|
20
|
+
|
|
21
|
+
```python
|
|
22
|
+
from langchain_community.document_loaders import (
|
|
23
|
+
WebBaseLoader,
|
|
24
|
+
PyPDFLoader,
|
|
25
|
+
TextLoader,
|
|
26
|
+
DirectoryLoader,
|
|
27
|
+
CSVLoader,
|
|
28
|
+
UnstructuredMarkdownLoader
|
|
29
|
+
)
|
|
30
|
+
|
|
31
|
+
# Web pages
|
|
32
|
+
loader = WebBaseLoader("https://docs.python.org/3/tutorial/")
|
|
33
|
+
docs = loader.load()
|
|
34
|
+
|
|
35
|
+
# PDF files
|
|
36
|
+
loader = PyPDFLoader("paper.pdf")
|
|
37
|
+
docs = loader.load()
|
|
38
|
+
|
|
39
|
+
# Multiple PDFs
|
|
40
|
+
loader = DirectoryLoader("./papers/", glob="**/*.pdf", loader_cls=PyPDFLoader)
|
|
41
|
+
docs = loader.load()
|
|
42
|
+
|
|
43
|
+
# Text files
|
|
44
|
+
loader = TextLoader("data.txt")
|
|
45
|
+
docs = loader.load()
|
|
46
|
+
|
|
47
|
+
# CSV
|
|
48
|
+
loader = CSVLoader("data.csv")
|
|
49
|
+
docs = loader.load()
|
|
50
|
+
|
|
51
|
+
# Markdown
|
|
52
|
+
loader = UnstructuredMarkdownLoader("README.md")
|
|
53
|
+
docs = loader.load()
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### 2. Text splitting
|
|
57
|
+
|
|
58
|
+
```python
|
|
59
|
+
from langchain.text_splitter import (
|
|
60
|
+
RecursiveCharacterTextSplitter,
|
|
61
|
+
CharacterTextSplitter,
|
|
62
|
+
TokenTextSplitter
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
# Recommended: Recursive (tries multiple separators)
|
|
66
|
+
text_splitter = RecursiveCharacterTextSplitter(
|
|
67
|
+
chunk_size=1000, # Characters per chunk
|
|
68
|
+
chunk_overlap=200, # Overlap between chunks
|
|
69
|
+
length_function=len,
|
|
70
|
+
separators=["\n\n", "\n", " ", ""]
|
|
71
|
+
)
|
|
72
|
+
|
|
73
|
+
splits = text_splitter.split_documents(docs)
|
|
74
|
+
|
|
75
|
+
# Token-based (for precise token limits)
|
|
76
|
+
text_splitter = TokenTextSplitter(
|
|
77
|
+
chunk_size=512, # Tokens per chunk
|
|
78
|
+
chunk_overlap=50
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
# Character-based (simple)
|
|
82
|
+
text_splitter = CharacterTextSplitter(
|
|
83
|
+
chunk_size=1000,
|
|
84
|
+
chunk_overlap=200,
|
|
85
|
+
separator="\n\n"
|
|
86
|
+
)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
**Chunk size recommendations**:
|
|
90
|
+
- **Short answers**: 256-512 tokens
|
|
91
|
+
- **General Q&A**: 512-1024 tokens (recommended)
|
|
92
|
+
- **Long context**: 1024-2048 tokens
|
|
93
|
+
- **Overlap**: 10-20% of chunk_size
|
|
94
|
+
|
|
95
|
+
### 3. Embeddings
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from langchain_openai import OpenAIEmbeddings
|
|
99
|
+
from langchain_community.embeddings import (
|
|
100
|
+
HuggingFaceEmbeddings,
|
|
101
|
+
CohereEmbeddings
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
# OpenAI (fast, high quality)
|
|
105
|
+
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
|
|
106
|
+
|
|
107
|
+
# HuggingFace (free, local)
|
|
108
|
+
embeddings = HuggingFaceEmbeddings(
|
|
109
|
+
model_name="sentence-transformers/all-mpnet-base-v2"
|
|
110
|
+
)
|
|
111
|
+
|
|
112
|
+
# Cohere
|
|
113
|
+
embeddings = CohereEmbeddings(model="embed-english-v3.0")
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### 4. Vector stores
|
|
117
|
+
|
|
118
|
+
```python
|
|
119
|
+
from langchain_chroma import Chroma
|
|
120
|
+
from langchain_community.vectorstores import FAISS
|
|
121
|
+
from langchain_pinecone import PineconeVectorStore
|
|
122
|
+
|
|
123
|
+
# Chroma (local, persistent)
|
|
124
|
+
vectorstore = Chroma.from_documents(
|
|
125
|
+
documents=splits,
|
|
126
|
+
embedding=embeddings,
|
|
127
|
+
persist_directory="./chroma_db"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
# FAISS (fast similarity search)
|
|
131
|
+
vectorstore = FAISS.from_documents(splits, embeddings)
|
|
132
|
+
vectorstore.save_local("./faiss_index")
|
|
133
|
+
|
|
134
|
+
# Pinecone (cloud, scalable)
|
|
135
|
+
vectorstore = PineconeVectorStore.from_documents(
|
|
136
|
+
documents=splits,
|
|
137
|
+
embedding=embeddings,
|
|
138
|
+
index_name="my-index"
|
|
139
|
+
)
|
|
140
|
+
```
|
|
141
|
+
|
|
142
|
+
### 5. Retrieval
|
|
143
|
+
|
|
144
|
+
```python
|
|
145
|
+
# Basic retriever (top-k similarity)
|
|
146
|
+
retriever = vectorstore.as_retriever(
|
|
147
|
+
search_type="similarity",
|
|
148
|
+
search_kwargs={"k": 4} # Return top 4 documents
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# MMR (Maximal Marginal Relevance) - diverse results
|
|
152
|
+
retriever = vectorstore.as_retriever(
|
|
153
|
+
search_type="mmr",
|
|
154
|
+
search_kwargs={
|
|
155
|
+
"k": 4,
|
|
156
|
+
"fetch_k": 20, # Fetch 20, return diverse 4
|
|
157
|
+
"lambda_mult": 0.5 # Diversity (0=diverse, 1=similar)
|
|
158
|
+
}
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
# Similarity score threshold
|
|
162
|
+
retriever = vectorstore.as_retriever(
|
|
163
|
+
search_type="similarity_score_threshold",
|
|
164
|
+
search_kwargs={
|
|
165
|
+
"score_threshold": 0.5 # Minimum similarity score
|
|
166
|
+
}
|
|
167
|
+
)
|
|
168
|
+
|
|
169
|
+
# Query documents directly
|
|
170
|
+
docs = retriever.get_relevant_documents("What is Python?")
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
### 6. QA chain
|
|
174
|
+
|
|
175
|
+
```python
|
|
176
|
+
from langchain.chains import RetrievalQA
|
|
177
|
+
from langchain_anthropic import ChatAnthropic
|
|
178
|
+
|
|
179
|
+
llm = ChatAnthropic(model="claude-sonnet-4-5-20250929")
|
|
180
|
+
|
|
181
|
+
# Basic QA chain
|
|
182
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
183
|
+
llm=llm,
|
|
184
|
+
retriever=retriever,
|
|
185
|
+
return_source_documents=True
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Query
|
|
189
|
+
result = qa_chain({"query": "What are Python decorators?"})
|
|
190
|
+
print(result["result"])
|
|
191
|
+
print(f"Sources: {len(result['source_documents'])}")
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
## Advanced RAG patterns
|
|
195
|
+
|
|
196
|
+
### Conversational RAG
|
|
197
|
+
|
|
198
|
+
```python
|
|
199
|
+
from langchain.chains import ConversationalRetrievalChain
|
|
200
|
+
from langchain.memory import ConversationBufferMemory
|
|
201
|
+
|
|
202
|
+
# Add memory
|
|
203
|
+
memory = ConversationBufferMemory(
|
|
204
|
+
memory_key="chat_history",
|
|
205
|
+
return_messages=True,
|
|
206
|
+
output_key="answer"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
# Conversational RAG chain
|
|
210
|
+
qa = ConversationalRetrievalChain.from_llm(
|
|
211
|
+
llm=llm,
|
|
212
|
+
retriever=retriever,
|
|
213
|
+
memory=memory,
|
|
214
|
+
return_source_documents=True
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
# Multi-turn conversation
|
|
218
|
+
result1 = qa({"question": "What is Python used for?"})
|
|
219
|
+
result2 = qa({"question": "Can you give examples?"}) # Remembers context
|
|
220
|
+
result3 = qa({"question": "What about web development?"})
|
|
221
|
+
```
|
|
222
|
+
|
|
223
|
+
### Custom prompt template
|
|
224
|
+
|
|
225
|
+
```python
|
|
226
|
+
from langchain.prompts import PromptTemplate
|
|
227
|
+
|
|
228
|
+
# Custom QA prompt
|
|
229
|
+
template = """Use the following pieces of context to answer the question.
|
|
230
|
+
If you don't know the answer, say so - don't make it up.
|
|
231
|
+
Always cite your sources using [Source N] notation.
|
|
232
|
+
|
|
233
|
+
Context: {context}
|
|
234
|
+
|
|
235
|
+
Question: {question}
|
|
236
|
+
|
|
237
|
+
Helpful Answer:"""
|
|
238
|
+
|
|
239
|
+
prompt = PromptTemplate(
|
|
240
|
+
template=template,
|
|
241
|
+
input_variables=["context", "question"]
|
|
242
|
+
)
|
|
243
|
+
|
|
244
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
245
|
+
llm=llm,
|
|
246
|
+
retriever=retriever,
|
|
247
|
+
chain_type_kwargs={"prompt": prompt}
|
|
248
|
+
)
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
### Chain types
|
|
252
|
+
|
|
253
|
+
```python
|
|
254
|
+
# 1. Stuff (default) - Put all docs in context
|
|
255
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
256
|
+
llm=llm,
|
|
257
|
+
retriever=retriever,
|
|
258
|
+
chain_type="stuff" # Fast, works if docs fit in context
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
# 2. Map-reduce - Summarize each doc, then combine
|
|
262
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
263
|
+
llm=llm,
|
|
264
|
+
retriever=retriever,
|
|
265
|
+
chain_type="map_reduce" # For many documents
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
# 3. Refine - Iteratively refine answer
|
|
269
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
270
|
+
llm=llm,
|
|
271
|
+
retriever=retriever,
|
|
272
|
+
chain_type="refine" # Most thorough, slowest
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# 4. Map-rerank - Score answers, return best
|
|
276
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
277
|
+
llm=llm,
|
|
278
|
+
retriever=retriever,
|
|
279
|
+
chain_type="map_rerank" # Good for multiple perspectives
|
|
280
|
+
)
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Multi-query retrieval
|
|
284
|
+
|
|
285
|
+
```python
|
|
286
|
+
from langchain.retrievers import MultiQueryRetriever
|
|
287
|
+
|
|
288
|
+
# Generate multiple queries for better recall
|
|
289
|
+
retriever = MultiQueryRetriever.from_llm(
|
|
290
|
+
retriever=vectorstore.as_retriever(),
|
|
291
|
+
llm=llm
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
# "What is Python?" becomes:
|
|
295
|
+
# - "What is Python programming language?"
|
|
296
|
+
# - "Python language definition"
|
|
297
|
+
# - "Overview of Python"
|
|
298
|
+
docs = retriever.get_relevant_documents("What is Python?")
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
### Contextual compression
|
|
302
|
+
|
|
303
|
+
```python
|
|
304
|
+
from langchain.retrievers import ContextualCompressionRetriever
|
|
305
|
+
from langchain.retrievers.document_compressors import LLMChainExtractor
|
|
306
|
+
|
|
307
|
+
# Compress retrieved docs to relevant parts only
|
|
308
|
+
compressor = LLMChainExtractor.from_llm(llm)
|
|
309
|
+
|
|
310
|
+
compression_retriever = ContextualCompressionRetriever(
|
|
311
|
+
base_compressor=compressor,
|
|
312
|
+
base_retriever=vectorstore.as_retriever()
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Returns only relevant excerpts
|
|
316
|
+
compressed_docs = compression_retriever.get_relevant_documents("Python decorators")
|
|
317
|
+
```
|
|
318
|
+
|
|
319
|
+
### Ensemble retrieval (hybrid search)
|
|
320
|
+
|
|
321
|
+
```python
|
|
322
|
+
from langchain.retrievers import EnsembleRetriever
|
|
323
|
+
from langchain.retrievers import BM25Retriever
|
|
324
|
+
|
|
325
|
+
# Vector search (semantic)
|
|
326
|
+
vector_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
|
|
327
|
+
|
|
328
|
+
# Keyword search (BM25)
|
|
329
|
+
keyword_retriever = BM25Retriever.from_documents(splits)
|
|
330
|
+
keyword_retriever.k = 5
|
|
331
|
+
|
|
332
|
+
# Combine both
|
|
333
|
+
ensemble_retriever = EnsembleRetriever(
|
|
334
|
+
retrievers=[vector_retriever, keyword_retriever],
|
|
335
|
+
weights=[0.5, 0.5] # Equal weight
|
|
336
|
+
)
|
|
337
|
+
|
|
338
|
+
docs = ensemble_retriever.get_relevant_documents("Python async")
|
|
339
|
+
```
|
|
340
|
+
|
|
341
|
+
## RAG with agents
|
|
342
|
+
|
|
343
|
+
### Agent-based RAG
|
|
344
|
+
|
|
345
|
+
```python
|
|
346
|
+
from langchain.agents import create_tool_calling_agent
|
|
347
|
+
from langchain.tools.retriever import create_retriever_tool
|
|
348
|
+
|
|
349
|
+
# Create retriever tool
|
|
350
|
+
retriever_tool = create_retriever_tool(
|
|
351
|
+
retriever=retriever,
|
|
352
|
+
name="python_docs",
|
|
353
|
+
description="Searches Python documentation for answers about Python programming"
|
|
354
|
+
)
|
|
355
|
+
|
|
356
|
+
# Create agent with retriever tool
|
|
357
|
+
agent = create_tool_calling_agent(
|
|
358
|
+
llm=llm,
|
|
359
|
+
tools=[retriever_tool, calculator, search],
|
|
360
|
+
system_prompt="Use python_docs tool for Python questions"
|
|
361
|
+
)
|
|
362
|
+
|
|
363
|
+
# Agent decides when to retrieve
|
|
364
|
+
from langchain.agents import AgentExecutor
|
|
365
|
+
agent_executor = AgentExecutor(agent=agent, tools=[retriever_tool])
|
|
366
|
+
|
|
367
|
+
result = agent_executor.invoke({"input": "What are Python generators?"})
|
|
368
|
+
```
|
|
369
|
+
|
|
370
|
+
### Multi-document agents
|
|
371
|
+
|
|
372
|
+
```python
|
|
373
|
+
# Multiple knowledge bases
|
|
374
|
+
python_retriever = create_retriever_tool(
|
|
375
|
+
retriever=python_vectorstore.as_retriever(),
|
|
376
|
+
name="python_docs",
|
|
377
|
+
description="Python programming documentation"
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
numpy_retriever = create_retriever_tool(
|
|
381
|
+
retriever=numpy_vectorstore.as_retriever(),
|
|
382
|
+
name="numpy_docs",
|
|
383
|
+
description="NumPy library documentation"
|
|
384
|
+
)
|
|
385
|
+
|
|
386
|
+
# Agent chooses which knowledge base to query
|
|
387
|
+
agent = create_agent(
|
|
388
|
+
model=llm,
|
|
389
|
+
tools=[python_retriever, numpy_retriever, search]
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
result = agent.invoke({"input": "How do I create numpy arrays?"})
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
## Metadata filtering
|
|
396
|
+
|
|
397
|
+
### Add metadata to documents
|
|
398
|
+
|
|
399
|
+
```python
|
|
400
|
+
from langchain.schema import Document
|
|
401
|
+
|
|
402
|
+
# Documents with metadata
|
|
403
|
+
docs = [
|
|
404
|
+
Document(
|
|
405
|
+
page_content="Python is a programming language",
|
|
406
|
+
metadata={"source": "tutorial.pdf", "page": 1, "category": "intro"}
|
|
407
|
+
),
|
|
408
|
+
Document(
|
|
409
|
+
page_content="Python decorators modify functions",
|
|
410
|
+
metadata={"source": "advanced.pdf", "page": 42, "category": "advanced"}
|
|
411
|
+
)
|
|
412
|
+
]
|
|
413
|
+
|
|
414
|
+
vectorstore = Chroma.from_documents(docs, embeddings)
|
|
415
|
+
```
|
|
416
|
+
|
|
417
|
+
### Filter by metadata
|
|
418
|
+
|
|
419
|
+
```python
|
|
420
|
+
# Retrieve only from specific source
|
|
421
|
+
retriever = vectorstore.as_retriever(
|
|
422
|
+
search_kwargs={
|
|
423
|
+
"k": 4,
|
|
424
|
+
"filter": {"category": "intro"} # Only intro documents
|
|
425
|
+
}
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# Multiple filters
|
|
429
|
+
retriever = vectorstore.as_retriever(
|
|
430
|
+
search_kwargs={
|
|
431
|
+
"k": 4,
|
|
432
|
+
"filter": {
|
|
433
|
+
"category": "advanced",
|
|
434
|
+
"source": "advanced.pdf"
|
|
435
|
+
}
|
|
436
|
+
}
|
|
437
|
+
)
|
|
438
|
+
```
|
|
439
|
+
|
|
440
|
+
## Document preprocessing
|
|
441
|
+
|
|
442
|
+
### Clean documents
|
|
443
|
+
|
|
444
|
+
```python
|
|
445
|
+
def preprocess_doc(doc):
|
|
446
|
+
"""Clean and normalize document."""
|
|
447
|
+
# Remove extra whitespace
|
|
448
|
+
doc.page_content = " ".join(doc.page_content.split())
|
|
449
|
+
|
|
450
|
+
# Remove special characters
|
|
451
|
+
doc.page_content = re.sub(r'[^\w\s]', '', doc.page_content)
|
|
452
|
+
|
|
453
|
+
# Lowercase (optional)
|
|
454
|
+
doc.page_content = doc.page_content.lower()
|
|
455
|
+
|
|
456
|
+
return doc
|
|
457
|
+
|
|
458
|
+
# Apply preprocessing
|
|
459
|
+
clean_docs = [preprocess_doc(doc) for doc in docs]
|
|
460
|
+
```
|
|
461
|
+
|
|
462
|
+
### Extract structured data
|
|
463
|
+
|
|
464
|
+
```python
|
|
465
|
+
from langchain.document_transformers import Html2TextTransformer
|
|
466
|
+
|
|
467
|
+
# HTML to clean text
|
|
468
|
+
transformer = Html2TextTransformer()
|
|
469
|
+
clean_docs = transformer.transform_documents(html_docs)
|
|
470
|
+
|
|
471
|
+
# Extract tables
|
|
472
|
+
from langchain.document_loaders import UnstructuredHTMLLoader
|
|
473
|
+
|
|
474
|
+
loader = UnstructuredHTMLLoader("data.html")
|
|
475
|
+
docs = loader.load() # Extracts tables as structured data
|
|
476
|
+
```
|
|
477
|
+
|
|
478
|
+
## Evaluation & monitoring
|
|
479
|
+
|
|
480
|
+
### Evaluate retrieval quality
|
|
481
|
+
|
|
482
|
+
```python
|
|
483
|
+
from langchain.evaluation import load_evaluator
|
|
484
|
+
|
|
485
|
+
# Relevance evaluator
|
|
486
|
+
evaluator = load_evaluator("relevance", llm=llm)
|
|
487
|
+
|
|
488
|
+
# Test retrieval
|
|
489
|
+
query = "What are Python decorators?"
|
|
490
|
+
retrieved_docs = retriever.get_relevant_documents(query)
|
|
491
|
+
|
|
492
|
+
for doc in retrieved_docs:
|
|
493
|
+
result = evaluator.evaluate_strings(
|
|
494
|
+
input=query,
|
|
495
|
+
prediction=doc.page_content
|
|
496
|
+
)
|
|
497
|
+
print(f"Relevance score: {result['score']}")
|
|
498
|
+
```
|
|
499
|
+
|
|
500
|
+
### Track sources
|
|
501
|
+
|
|
502
|
+
```python
|
|
503
|
+
# Always return sources
|
|
504
|
+
qa_chain = RetrievalQA.from_chain_type(
|
|
505
|
+
llm=llm,
|
|
506
|
+
retriever=retriever,
|
|
507
|
+
return_source_documents=True
|
|
508
|
+
)
|
|
509
|
+
|
|
510
|
+
result = qa_chain({"query": "What is Python?"})
|
|
511
|
+
|
|
512
|
+
# Show sources to user
|
|
513
|
+
print(result["result"])
|
|
514
|
+
print("\nSources:")
|
|
515
|
+
for i, doc in enumerate(result["source_documents"]):
|
|
516
|
+
print(f"[{i+1}] {doc.metadata.get('source', 'Unknown')}")
|
|
517
|
+
print(f" {doc.page_content[:100]}...")
|
|
518
|
+
```
|
|
519
|
+
|
|
520
|
+
## Best practices
|
|
521
|
+
|
|
522
|
+
1. **Chunk size matters** - 512-1024 tokens is usually optimal
|
|
523
|
+
2. **Add overlap** - 10-20% overlap prevents context loss
|
|
524
|
+
3. **Use metadata** - Track sources for citations
|
|
525
|
+
4. **Test retrieval quality** - Evaluate before using in production
|
|
526
|
+
5. **Hybrid search** - Combine vector + keyword for best results
|
|
527
|
+
6. **Compress context** - Remove irrelevant parts before LLM
|
|
528
|
+
7. **Cache embeddings** - Expensive, cache when possible
|
|
529
|
+
8. **Version your index** - Track changes to knowledge base
|
|
530
|
+
9. **Monitor failures** - Log when retrieval doesn't find answers
|
|
531
|
+
10. **Update regularly** - Keep knowledge base current
|
|
532
|
+
|
|
533
|
+
## Common pitfalls
|
|
534
|
+
|
|
535
|
+
1. **Chunks too large** - Won't fit in context
|
|
536
|
+
2. **No overlap** - Important context lost at boundaries
|
|
537
|
+
3. **No metadata** - Can't cite sources
|
|
538
|
+
4. **Poor splitting** - Breaks mid-sentence or mid-paragraph
|
|
539
|
+
5. **Wrong embedding model** - Domain mismatch hurts retrieval
|
|
540
|
+
6. **No reranking** - Lower quality results
|
|
541
|
+
7. **Ignoring failures** - No handling when retrieval fails
|
|
542
|
+
|
|
543
|
+
## Performance optimization
|
|
544
|
+
|
|
545
|
+
### Caching
|
|
546
|
+
|
|
547
|
+
```python
|
|
548
|
+
from langchain.cache import InMemoryCache, SQLiteCache
|
|
549
|
+
from langchain.globals import set_llm_cache
|
|
550
|
+
|
|
551
|
+
# In-memory cache
|
|
552
|
+
set_llm_cache(InMemoryCache())
|
|
553
|
+
|
|
554
|
+
# Persistent cache
|
|
555
|
+
set_llm_cache(SQLiteCache(database_path=".langchain.db"))
|
|
556
|
+
|
|
557
|
+
# Same query uses cache (faster + cheaper)
|
|
558
|
+
result1 = qa_chain({"query": "What is Python?"})
|
|
559
|
+
result2 = qa_chain({"query": "What is Python?"}) # Cached
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
### Batch processing
|
|
563
|
+
|
|
564
|
+
```python
|
|
565
|
+
# Process multiple queries efficiently
|
|
566
|
+
queries = [
|
|
567
|
+
"What is Python?",
|
|
568
|
+
"What are decorators?",
|
|
569
|
+
"How do I use async?"
|
|
570
|
+
]
|
|
571
|
+
|
|
572
|
+
# Batch retrieval
|
|
573
|
+
all_docs = vectorstore.similarity_search_batch(queries)
|
|
574
|
+
|
|
575
|
+
# Batch QA
|
|
576
|
+
results = qa_chain.batch([{"query": q} for q in queries])
|
|
577
|
+
```
|
|
578
|
+
|
|
579
|
+
### Async operations
|
|
580
|
+
|
|
581
|
+
```python
|
|
582
|
+
# Async RAG for concurrent queries
|
|
583
|
+
import asyncio
|
|
584
|
+
|
|
585
|
+
async def async_qa(query):
|
|
586
|
+
return await qa_chain.ainvoke({"query": query})
|
|
587
|
+
|
|
588
|
+
# Run multiple queries concurrently
|
|
589
|
+
results = await asyncio.gather(
|
|
590
|
+
async_qa("What is Python?"),
|
|
591
|
+
async_qa("What are decorators?")
|
|
592
|
+
)
|
|
593
|
+
```
|
|
594
|
+
|
|
595
|
+
## Resources
|
|
596
|
+
|
|
597
|
+
- **LangChain RAG Docs**: https://docs.langchain.com/oss/python/langchain/rag
|
|
598
|
+
- **Vector Stores**: https://python.langchain.com/docs/integrations/vectorstores
|
|
599
|
+
- **Document Loaders**: https://python.langchain.com/docs/integrations/document_loaders
|
|
600
|
+
- **Retrievers**: https://python.langchain.com/docs/modules/data_connection/retrievers
|