opencode-skills-antigravity 1.0.40 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +7 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  6. package/bundled-skills/docs/sources/sources.md +2 -2
  7. package/bundled-skills/docs/users/bundles.md +1 -1
  8. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  9. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  10. package/bundled-skills/docs/users/getting-started.md +1 -1
  11. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  12. package/bundled-skills/docs/users/usage.md +4 -4
  13. package/bundled-skills/docs/users/visual-guide.md +4 -4
  14. package/bundled-skills/hugging-face-cli/SKILL.md +192 -195
  15. package/bundled-skills/hugging-face-community-evals/SKILL.md +213 -0
  16. package/bundled-skills/hugging-face-community-evals/examples/.env.example +3 -0
  17. package/bundled-skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md +101 -0
  18. package/bundled-skills/hugging-face-community-evals/scripts/inspect_eval_uv.py +104 -0
  19. package/bundled-skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py +306 -0
  20. package/bundled-skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py +297 -0
  21. package/bundled-skills/hugging-face-dataset-viewer/SKILL.md +120 -120
  22. package/bundled-skills/hugging-face-gradio/SKILL.md +304 -0
  23. package/bundled-skills/hugging-face-gradio/examples.md +613 -0
  24. package/bundled-skills/hugging-face-jobs/SKILL.md +25 -18
  25. package/bundled-skills/hugging-face-jobs/index.html +216 -0
  26. package/bundled-skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  27. package/bundled-skills/hugging-face-jobs/references/hub_saving.md +352 -0
  28. package/bundled-skills/hugging-face-jobs/references/token_usage.md +570 -0
  29. package/bundled-skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  30. package/bundled-skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  31. package/bundled-skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  32. package/bundled-skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  33. package/bundled-skills/hugging-face-model-trainer/SKILL.md +11 -12
  34. package/bundled-skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  35. package/bundled-skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  36. package/bundled-skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  37. package/bundled-skills/hugging-face-model-trainer/references/local_training_macos.md +231 -0
  38. package/bundled-skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  39. package/bundled-skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  40. package/bundled-skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  41. package/bundled-skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  42. package/bundled-skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  43. package/bundled-skills/hugging-face-model-trainer/references/unsloth.md +313 -0
  44. package/bundled-skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  45. package/bundled-skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  46. package/bundled-skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  47. package/bundled-skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  48. package/bundled-skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  49. package/bundled-skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  50. package/bundled-skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py +512 -0
  51. package/bundled-skills/hugging-face-paper-publisher/SKILL.md +11 -4
  52. package/bundled-skills/hugging-face-paper-publisher/examples/example_usage.md +326 -0
  53. package/bundled-skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  54. package/bundled-skills/hugging-face-paper-publisher/scripts/paper_manager.py +606 -0
  55. package/bundled-skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  56. package/bundled-skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  57. package/bundled-skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  58. package/bundled-skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  59. package/bundled-skills/hugging-face-papers/SKILL.md +241 -0
  60. package/bundled-skills/hugging-face-trackio/.claude-plugin/plugin.json +19 -0
  61. package/bundled-skills/hugging-face-trackio/SKILL.md +117 -0
  62. package/bundled-skills/hugging-face-trackio/references/alerts.md +196 -0
  63. package/bundled-skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  64. package/bundled-skills/hugging-face-trackio/references/retrieving_metrics.md +251 -0
  65. package/bundled-skills/hugging-face-vision-trainer/SKILL.md +595 -0
  66. package/bundled-skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md +254 -0
  67. package/bundled-skills/hugging-face-vision-trainer/references/hub_saving.md +618 -0
  68. package/bundled-skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md +279 -0
  69. package/bundled-skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md +700 -0
  70. package/bundled-skills/hugging-face-vision-trainer/references/reliability_principles.md +310 -0
  71. package/bundled-skills/hugging-face-vision-trainer/references/timm_trainer.md +91 -0
  72. package/bundled-skills/hugging-face-vision-trainer/scripts/dataset_inspector.py +814 -0
  73. package/bundled-skills/hugging-face-vision-trainer/scripts/estimate_cost.py +217 -0
  74. package/bundled-skills/hugging-face-vision-trainer/scripts/image_classification_training.py +383 -0
  75. package/bundled-skills/hugging-face-vision-trainer/scripts/object_detection_training.py +710 -0
  76. package/bundled-skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py +382 -0
  77. package/bundled-skills/transformers-js/SKILL.md +639 -0
  78. package/bundled-skills/transformers-js/references/CACHE.md +339 -0
  79. package/bundled-skills/transformers-js/references/CONFIGURATION.md +390 -0
  80. package/bundled-skills/transformers-js/references/EXAMPLES.md +605 -0
  81. package/bundled-skills/transformers-js/references/MODEL_ARCHITECTURES.md +167 -0
  82. package/bundled-skills/transformers-js/references/PIPELINE_OPTIONS.md +545 -0
  83. package/bundled-skills/transformers-js/references/TEXT_GENERATION.md +315 -0
  84. package/package.json +1 -1
@@ -0,0 +1,101 @@
1
+ # Usage Examples
2
+
3
+ This document provides practical examples for **running evaluations locally** against Hugging Face Hub models.
4
+
5
+ ## What this skill covers
6
+
7
+ - `inspect-ai` local runs
8
+ - `inspect-ai` with `vllm` or Transformers backends
9
+ - `lighteval` local runs with `vllm` or `accelerate`
10
+ - smoke tests and backend fallback patterns
11
+
12
+ ## What this skill does NOT cover
13
+
14
+ - `model-index`
15
+ - `.eval_results`
16
+ - community eval publication workflows
17
+ - model-card PR creation
18
+ - Hugging Face Jobs orchestration
19
+
20
+ If you want to run these same scripts remotely, use the `hugging-face-jobs` skill and pass one of the scripts in `scripts/`.
21
+
22
+ ## Setup
23
+
24
+ ```bash
25
+ cd skills/hugging-face-evaluation
26
+ export HF_TOKEN=hf_xxx
27
+ uv --version
28
+ ```
29
+
30
+ For local GPU runs:
31
+
32
+ ```bash
33
+ nvidia-smi
34
+ ```
35
+
36
+ ## inspect-ai examples
37
+
38
+ ### Quick smoke test
39
+
40
+ ```bash
41
+ uv run scripts/inspect_eval_uv.py \
42
+ --model meta-llama/Llama-3.2-1B \
43
+ --task mmlu \
44
+ --limit 10
45
+ ```
46
+
47
+ ### Local GPU with vLLM
48
+
49
+ ```bash
50
+ uv run scripts/inspect_vllm_uv.py \
51
+ --model meta-llama/Llama-3.2-8B-Instruct \
52
+ --task gsm8k \
53
+ --limit 20
54
+ ```
55
+
56
+ ### Transformers fallback
57
+
58
+ ```bash
59
+ uv run scripts/inspect_vllm_uv.py \
60
+ --model microsoft/phi-2 \
61
+ --task mmlu \
62
+ --backend hf \
63
+ --trust-remote-code \
64
+ --limit 20
65
+ ```
66
+
67
+ ## lighteval examples
68
+
69
+ ### Single task
70
+
71
+ ```bash
72
+ uv run scripts/lighteval_vllm_uv.py \
73
+ --model meta-llama/Llama-3.2-3B-Instruct \
74
+ --tasks "leaderboard|mmlu|5" \
75
+ --max-samples 20
76
+ ```
77
+
78
+ ### Multiple tasks
79
+
80
+ ```bash
81
+ uv run scripts/lighteval_vllm_uv.py \
82
+ --model meta-llama/Llama-3.2-3B-Instruct \
83
+ --tasks "leaderboard|mmlu|5,leaderboard|gsm8k|5" \
84
+ --max-samples 20 \
85
+ --use-chat-template
86
+ ```
87
+
88
+ ### accelerate fallback
89
+
90
+ ```bash
91
+ uv run scripts/lighteval_vllm_uv.py \
92
+ --model microsoft/phi-2 \
93
+ --tasks "leaderboard|mmlu|5" \
94
+ --backend accelerate \
95
+ --trust-remote-code \
96
+ --max-samples 20
97
+ ```
98
+
99
+ ## Hand-off to Hugging Face Jobs
100
+
101
+ When local hardware is not enough, switch to the `hugging-face-jobs` skill and run one of these scripts remotely. Keep the script path and args; move the orchestration there.
@@ -0,0 +1,104 @@
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "inspect-ai>=0.3.0",
5
+ # "inspect-evals",
6
+ # "openai",
7
+ # ]
8
+ # ///
9
+
10
+ """
11
+ Entry point script for running inspect-ai evaluations against Hugging Face inference providers.
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import argparse
17
+ import os
18
+ import subprocess
19
+ import sys
20
+ from pathlib import Path
21
+ from typing import Optional
22
+
23
+
24
+ def _inspect_evals_tasks_root() -> Optional[Path]:
25
+ """Return the installed inspect_evals package path if available."""
26
+ try:
27
+ import inspect_evals
28
+
29
+ return Path(inspect_evals.__file__).parent
30
+ except Exception:
31
+ return None
32
+
33
+
34
+ def _normalize_task(task: str) -> str:
35
+ """Allow lighteval-style `suite|task|shots` strings by keeping the task name."""
36
+ if "|" in task:
37
+ parts = task.split("|")
38
+ if len(parts) >= 2 and parts[1]:
39
+ return parts[1]
40
+ return task
41
+
42
+
43
+ def main() -> None:
44
+ parser = argparse.ArgumentParser(description="Inspect-ai job runner")
45
+ parser.add_argument("--model", required=True, help="Model ID on Hugging Face Hub")
46
+ parser.add_argument("--task", required=True, help="inspect-ai task to execute")
47
+ parser.add_argument("--limit", type=int, default=None, help="Limit number of samples to evaluate")
48
+ parser.add_argument(
49
+ "--tasks-root",
50
+ default=None,
51
+ help="Optional path to inspect task files. Defaults to the installed inspect_evals package.",
52
+ )
53
+ parser.add_argument(
54
+ "--sandbox",
55
+ default="local",
56
+ help="Sandbox backend to use (default: local for HF jobs without Docker).",
57
+ )
58
+ args = parser.parse_args()
59
+
60
+ # Ensure downstream libraries can read the token passed as a secret
61
+ hf_token = os.getenv("HF_TOKEN")
62
+ if hf_token:
63
+ os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token)
64
+ os.environ.setdefault("HF_HUB_TOKEN", hf_token)
65
+
66
+ task = _normalize_task(args.task)
67
+ tasks_root = Path(args.tasks_root) if args.tasks_root else _inspect_evals_tasks_root()
68
+ if tasks_root and not tasks_root.exists():
69
+ tasks_root = None
70
+
71
+ cmd = [
72
+ "inspect",
73
+ "eval",
74
+ task,
75
+ "--model",
76
+ f"hf-inference-providers/{args.model}",
77
+ "--log-level",
78
+ "info",
79
+ # Reduce batch size to avoid OOM errors (default is 32)
80
+ "--max-connections",
81
+ "1",
82
+ # Set a small positive temperature (HF doesn't allow temperature=0)
83
+ "--temperature",
84
+ "0.001",
85
+ ]
86
+
87
+ if args.sandbox:
88
+ cmd.extend(["--sandbox", args.sandbox])
89
+
90
+ if args.limit:
91
+ cmd.extend(["--limit", str(args.limit)])
92
+
93
+ try:
94
+ subprocess.run(cmd, check=True, cwd=tasks_root)
95
+ print("Evaluation complete.")
96
+ except subprocess.CalledProcessError as exc:
97
+ location = f" (cwd={tasks_root})" if tasks_root else ""
98
+ print(f"Evaluation failed with exit code {exc.returncode}{location}", file=sys.stderr)
99
+ raise
100
+
101
+
102
+ if __name__ == "__main__":
103
+ main()
104
+
@@ -0,0 +1,306 @@
1
+ # /// script
2
+ # requires-python = ">=3.10"
3
+ # dependencies = [
4
+ # "inspect-ai>=0.3.0",
5
+ # "inspect-evals",
6
+ # "vllm>=0.4.0",
7
+ # "torch>=2.0.0",
8
+ # "transformers>=4.40.0",
9
+ # ]
10
+ # ///
11
+
12
+ """
13
+ Entry point script for running inspect-ai evaluations with vLLM or HuggingFace Transformers backend.
14
+
15
+ This script runs evaluations on custom HuggingFace models using local GPU inference,
16
+ separate from inference provider scripts (which use external APIs).
17
+
18
+ Usage (standalone):
19
+ uv run scripts/inspect_vllm_uv.py --model "meta-llama/Llama-3.2-1B" --task "mmlu"
20
+
21
+ Model backends:
22
+ - vllm: Fast inference with vLLM (recommended for large models)
23
+ - hf: HuggingFace Transformers backend (broader model compatibility)
24
+ """
25
+
26
+ from __future__ import annotations
27
+
28
+ import argparse
29
+ import os
30
+ import subprocess
31
+ import sys
32
+ from typing import Optional
33
+
34
+
35
+ def setup_environment() -> None:
36
+ """Configure environment variables for HuggingFace authentication."""
37
+ hf_token = os.getenv("HF_TOKEN")
38
+ if hf_token:
39
+ os.environ.setdefault("HUGGING_FACE_HUB_TOKEN", hf_token)
40
+ os.environ.setdefault("HF_HUB_TOKEN", hf_token)
41
+
42
+
43
+ def run_inspect_vllm(
44
+ model_id: str,
45
+ task: str,
46
+ limit: Optional[int] = None,
47
+ max_connections: int = 4,
48
+ temperature: float = 0.0,
49
+ tensor_parallel_size: int = 1,
50
+ gpu_memory_utilization: float = 0.8,
51
+ dtype: str = "auto",
52
+ trust_remote_code: bool = False,
53
+ log_level: str = "info",
54
+ ) -> None:
55
+ """
56
+ Run inspect-ai evaluation with vLLM backend.
57
+
58
+ Args:
59
+ model_id: HuggingFace model ID
60
+ task: inspect-ai task to execute (e.g., "mmlu", "gsm8k")
61
+ limit: Limit number of samples to evaluate
62
+ max_connections: Maximum concurrent connections
63
+ temperature: Sampling temperature
64
+ tensor_parallel_size: Number of GPUs for tensor parallelism
65
+ gpu_memory_utilization: GPU memory fraction
66
+ dtype: Data type (auto, float16, bfloat16)
67
+ trust_remote_code: Allow remote code execution
68
+ log_level: Logging level
69
+ """
70
+ setup_environment()
71
+
72
+ model_spec = f"vllm/{model_id}"
73
+ cmd = [
74
+ "inspect",
75
+ "eval",
76
+ task,
77
+ "--model",
78
+ model_spec,
79
+ "--log-level",
80
+ log_level,
81
+ "--max-connections",
82
+ str(max_connections),
83
+ ]
84
+
85
+ # vLLM supports temperature=0 unlike HF inference providers
86
+ cmd.extend(["--temperature", str(temperature)])
87
+
88
+ # Older inspect-ai CLI versions do not support --model-args; rely on defaults
89
+ # and let vLLM choose sensible settings for small models.
90
+ if tensor_parallel_size != 1:
91
+ cmd.extend(["--tensor-parallel-size", str(tensor_parallel_size)])
92
+ if gpu_memory_utilization != 0.8:
93
+ cmd.extend(["--gpu-memory-utilization", str(gpu_memory_utilization)])
94
+ if dtype != "auto":
95
+ cmd.extend(["--dtype", dtype])
96
+ if trust_remote_code:
97
+ cmd.append("--trust-remote-code")
98
+
99
+ if limit:
100
+ cmd.extend(["--limit", str(limit)])
101
+
102
+ print(f"Running: {' '.join(cmd)}")
103
+
104
+ try:
105
+ subprocess.run(cmd, check=True)
106
+ print("Evaluation complete.")
107
+ except subprocess.CalledProcessError as exc:
108
+ print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr)
109
+ sys.exit(exc.returncode)
110
+
111
+
112
+ def run_inspect_hf(
113
+ model_id: str,
114
+ task: str,
115
+ limit: Optional[int] = None,
116
+ max_connections: int = 1,
117
+ temperature: float = 0.001,
118
+ device: str = "auto",
119
+ dtype: str = "auto",
120
+ trust_remote_code: bool = False,
121
+ log_level: str = "info",
122
+ ) -> None:
123
+ """
124
+ Run inspect-ai evaluation with HuggingFace Transformers backend.
125
+
126
+ Use this when vLLM doesn't support the model architecture.
127
+
128
+ Args:
129
+ model_id: HuggingFace model ID
130
+ task: inspect-ai task to execute
131
+ limit: Limit number of samples
132
+ max_connections: Maximum concurrent connections (keep low for memory)
133
+ temperature: Sampling temperature
134
+ device: Device to use (auto, cuda, cpu)
135
+ dtype: Data type
136
+ trust_remote_code: Allow remote code execution
137
+ log_level: Logging level
138
+ """
139
+ setup_environment()
140
+
141
+ model_spec = f"hf/{model_id}"
142
+
143
+ cmd = [
144
+ "inspect",
145
+ "eval",
146
+ task,
147
+ "--model",
148
+ model_spec,
149
+ "--log-level",
150
+ log_level,
151
+ "--max-connections",
152
+ str(max_connections),
153
+ "--temperature",
154
+ str(temperature),
155
+ ]
156
+
157
+ if device != "auto":
158
+ cmd.extend(["--device", device])
159
+ if dtype != "auto":
160
+ cmd.extend(["--dtype", dtype])
161
+ if trust_remote_code:
162
+ cmd.append("--trust-remote-code")
163
+
164
+ if limit:
165
+ cmd.extend(["--limit", str(limit)])
166
+
167
+ print(f"Running: {' '.join(cmd)}")
168
+
169
+ try:
170
+ subprocess.run(cmd, check=True)
171
+ print("Evaluation complete.")
172
+ except subprocess.CalledProcessError as exc:
173
+ print(f"Evaluation failed with exit code {exc.returncode}", file=sys.stderr)
174
+ sys.exit(exc.returncode)
175
+
176
+
177
+ def main() -> None:
178
+ parser = argparse.ArgumentParser(
179
+ description="Run inspect-ai evaluations with vLLM or HuggingFace Transformers on custom models",
180
+ formatter_class=argparse.RawDescriptionHelpFormatter,
181
+ epilog="""
182
+ Examples:
183
+ # Run MMLU with vLLM backend
184
+ uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu
185
+
186
+ # Run with HuggingFace Transformers backend
187
+ uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --backend hf
188
+
189
+ # Run with limited samples for testing
190
+ uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-1B --task mmlu --limit 10
191
+
192
+ # Run on multiple GPUs with tensor parallelism
193
+ uv run scripts/inspect_vllm_uv.py --model meta-llama/Llama-3.2-70B --task mmlu --tensor-parallel-size 4
194
+
195
+ Available tasks (from inspect-evals):
196
+ - mmlu: Massive Multitask Language Understanding
197
+ - gsm8k: Grade School Math
198
+ - hellaswag: Common sense reasoning
199
+ - arc_challenge: AI2 Reasoning Challenge
200
+ - truthfulqa: TruthfulQA benchmark
201
+ - winogrande: Winograd Schema Challenge
202
+ - humaneval: Code generation (HumanEval)
203
+
204
+ """,
205
+ )
206
+
207
+ parser.add_argument(
208
+ "--model",
209
+ required=True,
210
+ help="HuggingFace model ID (e.g., meta-llama/Llama-3.2-1B)",
211
+ )
212
+ parser.add_argument(
213
+ "--task",
214
+ required=True,
215
+ help="inspect-ai task to execute (e.g., mmlu, gsm8k)",
216
+ )
217
+ parser.add_argument(
218
+ "--backend",
219
+ choices=["vllm", "hf"],
220
+ default="vllm",
221
+ help="Model backend (default: vllm)",
222
+ )
223
+ parser.add_argument(
224
+ "--limit",
225
+ type=int,
226
+ default=None,
227
+ help="Limit number of samples to evaluate",
228
+ )
229
+ parser.add_argument(
230
+ "--max-connections",
231
+ type=int,
232
+ default=None,
233
+ help="Maximum concurrent connections (default: 4 for vllm, 1 for hf)",
234
+ )
235
+ parser.add_argument(
236
+ "--temperature",
237
+ type=float,
238
+ default=None,
239
+ help="Sampling temperature (default: 0.0 for vllm, 0.001 for hf)",
240
+ )
241
+ parser.add_argument(
242
+ "--tensor-parallel-size",
243
+ type=int,
244
+ default=1,
245
+ help="Number of GPUs for tensor parallelism (vLLM only, default: 1)",
246
+ )
247
+ parser.add_argument(
248
+ "--gpu-memory-utilization",
249
+ type=float,
250
+ default=0.8,
251
+ help="GPU memory fraction to use (vLLM only, default: 0.8)",
252
+ )
253
+ parser.add_argument(
254
+ "--dtype",
255
+ default="auto",
256
+ choices=["auto", "float16", "bfloat16", "float32"],
257
+ help="Data type for model weights (default: auto)",
258
+ )
259
+ parser.add_argument(
260
+ "--device",
261
+ default="auto",
262
+ help="Device for HF backend (auto, cuda, cpu)",
263
+ )
264
+ parser.add_argument(
265
+ "--trust-remote-code",
266
+ action="store_true",
267
+ help="Allow executing remote code from model repository",
268
+ )
269
+ parser.add_argument(
270
+ "--log-level",
271
+ default="info",
272
+ choices=["debug", "info", "warning", "error"],
273
+ help="Logging level (default: info)",
274
+ )
275
+
276
+ args = parser.parse_args()
277
+
278
+ if args.backend == "vllm":
279
+ run_inspect_vllm(
280
+ model_id=args.model,
281
+ task=args.task,
282
+ limit=args.limit,
283
+ max_connections=args.max_connections or 4,
284
+ temperature=args.temperature if args.temperature is not None else 0.0,
285
+ tensor_parallel_size=args.tensor_parallel_size,
286
+ gpu_memory_utilization=args.gpu_memory_utilization,
287
+ dtype=args.dtype,
288
+ trust_remote_code=args.trust_remote_code,
289
+ log_level=args.log_level,
290
+ )
291
+ else:
292
+ run_inspect_hf(
293
+ model_id=args.model,
294
+ task=args.task,
295
+ limit=args.limit,
296
+ max_connections=args.max_connections or 1,
297
+ temperature=args.temperature if args.temperature is not None else 0.001,
298
+ device=args.device,
299
+ dtype=args.dtype,
300
+ trust_remote_code=args.trust_remote_code,
301
+ log_level=args.log_level,
302
+ )
303
+
304
+
305
+ if __name__ == "__main__":
306
+ main()