opencode-skills-antigravity 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +10 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.csv +34 -0
  6. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.md +2 -0
  7. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  8. package/bundled-skills/docs/sources/sources.md +2 -2
  9. package/bundled-skills/docs/users/bundles.md +1 -1
  10. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  11. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  12. package/bundled-skills/docs/users/getting-started.md +1 -1
  13. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  14. package/bundled-skills/docs/users/usage.md +4 -4
  15. package/bundled-skills/docs/users/visual-guide.md +4 -4
  16. package/bundled-skills/hugging-face-cli/SKILL.md +192 -195
  17. package/bundled-skills/hugging-face-community-evals/SKILL.md +213 -0
  18. package/bundled-skills/hugging-face-community-evals/examples/.env.example +3 -0
  19. package/bundled-skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md +101 -0
  20. package/bundled-skills/hugging-face-community-evals/scripts/inspect_eval_uv.py +104 -0
  21. package/bundled-skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py +306 -0
  22. package/bundled-skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py +297 -0
  23. package/bundled-skills/hugging-face-dataset-viewer/SKILL.md +120 -120
  24. package/bundled-skills/hugging-face-gradio/SKILL.md +304 -0
  25. package/bundled-skills/hugging-face-gradio/examples.md +613 -0
  26. package/bundled-skills/hugging-face-jobs/SKILL.md +25 -18
  27. package/bundled-skills/hugging-face-jobs/index.html +216 -0
  28. package/bundled-skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  29. package/bundled-skills/hugging-face-jobs/references/hub_saving.md +352 -0
  30. package/bundled-skills/hugging-face-jobs/references/token_usage.md +570 -0
  31. package/bundled-skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  32. package/bundled-skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  33. package/bundled-skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  34. package/bundled-skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  35. package/bundled-skills/hugging-face-model-trainer/SKILL.md +11 -12
  36. package/bundled-skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  37. package/bundled-skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  38. package/bundled-skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  39. package/bundled-skills/hugging-face-model-trainer/references/local_training_macos.md +231 -0
  40. package/bundled-skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  41. package/bundled-skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  42. package/bundled-skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  43. package/bundled-skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  44. package/bundled-skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  45. package/bundled-skills/hugging-face-model-trainer/references/unsloth.md +313 -0
  46. package/bundled-skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  47. package/bundled-skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  48. package/bundled-skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  49. package/bundled-skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  50. package/bundled-skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  51. package/bundled-skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  52. package/bundled-skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py +512 -0
  53. package/bundled-skills/hugging-face-paper-publisher/SKILL.md +11 -4
  54. package/bundled-skills/hugging-face-paper-publisher/examples/example_usage.md +326 -0
  55. package/bundled-skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  56. package/bundled-skills/hugging-face-paper-publisher/scripts/paper_manager.py +606 -0
  57. package/bundled-skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  58. package/bundled-skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  59. package/bundled-skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  60. package/bundled-skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  61. package/bundled-skills/hugging-face-papers/SKILL.md +241 -0
  62. package/bundled-skills/hugging-face-trackio/.claude-plugin/plugin.json +19 -0
  63. package/bundled-skills/hugging-face-trackio/SKILL.md +117 -0
  64. package/bundled-skills/hugging-face-trackio/references/alerts.md +196 -0
  65. package/bundled-skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  66. package/bundled-skills/hugging-face-trackio/references/retrieving_metrics.md +251 -0
  67. package/bundled-skills/hugging-face-vision-trainer/SKILL.md +595 -0
  68. package/bundled-skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md +254 -0
  69. package/bundled-skills/hugging-face-vision-trainer/references/hub_saving.md +618 -0
  70. package/bundled-skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md +279 -0
  71. package/bundled-skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md +700 -0
  72. package/bundled-skills/hugging-face-vision-trainer/references/reliability_principles.md +310 -0
  73. package/bundled-skills/hugging-face-vision-trainer/references/timm_trainer.md +91 -0
  74. package/bundled-skills/hugging-face-vision-trainer/scripts/dataset_inspector.py +814 -0
  75. package/bundled-skills/hugging-face-vision-trainer/scripts/estimate_cost.py +217 -0
  76. package/bundled-skills/hugging-face-vision-trainer/scripts/image_classification_training.py +383 -0
  77. package/bundled-skills/hugging-face-vision-trainer/scripts/object_detection_training.py +710 -0
  78. package/bundled-skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py +382 -0
  79. package/bundled-skills/jq/SKILL.md +273 -0
  80. package/bundled-skills/odoo-edi-connector/SKILL.md +32 -10
  81. package/bundled-skills/odoo-woocommerce-bridge/SKILL.md +9 -5
  82. package/bundled-skills/tmux/SKILL.md +370 -0
  83. package/bundled-skills/transformers-js/SKILL.md +639 -0
  84. package/bundled-skills/transformers-js/references/CACHE.md +339 -0
  85. package/bundled-skills/transformers-js/references/CONFIGURATION.md +390 -0
  86. package/bundled-skills/transformers-js/references/EXAMPLES.md +605 -0
  87. package/bundled-skills/transformers-js/references/MODEL_ARCHITECTURES.md +167 -0
  88. package/bundled-skills/transformers-js/references/PIPELINE_OPTIONS.md +545 -0
  89. package/bundled-skills/transformers-js/references/TEXT_GENERATION.md +315 -0
  90. package/bundled-skills/viboscope/SKILL.md +64 -0
  91. package/package.json +1 -1
@@ -0,0 +1,618 @@
1
+ # Saving Vision Models to Hugging Face Hub
2
+
3
+ ## Contents
4
+ - Why Hub Push is Required
5
+ - Required Configuration (TrainingArguments, job config)
6
+ - Complete Example
7
+ - What Gets Saved
8
+ - Important: Save Image Processor
9
+ - Checkpoint Saving
10
+ - Model Card Configuration
11
+ - Saving Label Mappings
12
+ - Authentication Methods
13
+ - Verification Checklist
14
+ - Repository Setup (automatic/manual creation, naming)
15
+ - Troubleshooting (401, 403, push failures, inference issues)
16
+ - Manual Push After Training
17
+ - Example: Full Production Setup
18
+ - Inference Example
19
+
20
+ ---
21
+
22
+ **CRITICAL:** Training environments are ephemeral. ALL results are lost when a job completes unless pushed to the Hub.
23
+
24
+ ## Why Hub Push is Required
25
+
26
+ When running on Hugging Face Jobs:
27
+ - Environment is temporary
28
+ - All files deleted on job completion
29
+ - No local disk persistence
30
+ - Cannot access results after job ends
31
+
32
+ **Without Hub push, training is completely wasted.**
33
+
34
+ ## Required Configuration
35
+
36
+ ### 1. Training Configuration
37
+
38
+ In your TrainingArguments:
39
+
40
+ ```python
41
+ from transformers import TrainingArguments
42
+
43
+ training_args = TrainingArguments(
44
+ output_dir="my-object-detector",
45
+ push_to_hub=True, # Enable Hub push
46
+ hub_model_id="username/model-name", # Target repository
47
+ )
48
+ ```
49
+
50
+ ### 2. Job Configuration
51
+
52
+ When submitting the job:
53
+
54
+ ```python
55
+ hf_jobs("uv", {
56
+ "script": training_script_content, # Pass the Python script content directly as a string
57
+ "secrets": {"HF_TOKEN": "$HF_TOKEN"} # Provide authentication
58
+ })
59
+ ```
60
+
61
+ **The `$HF_TOKEN` syntax references your actual Hugging Face token value.**
62
+
63
+ ## Complete Example
64
+
65
+ ```python
66
+ # train_detector.py
67
+ # /// script
68
+ # dependencies = ["transformers", "torch", "torchvision", "datasets"]
69
+ # ///
70
+
71
+ from transformers import (
72
+ AutoImageProcessor,
73
+ AutoModelForObjectDetection,
74
+ TrainingArguments,
75
+ Trainer
76
+ )
77
+ from datasets import load_dataset
78
+ import os
79
+ import torch
80
+
81
+ # Load dataset
82
+ dataset = load_dataset("cppe-5", split="train")
83
+
84
+ # Load model and processor
85
+ model_name = "facebook/detr-resnet-50"
86
+ image_processor = AutoImageProcessor.from_pretrained(model_name)
87
+ model = AutoModelForObjectDetection.from_pretrained(
88
+ model_name,
89
+ num_labels=5, # Number of classes
90
+ ignore_mismatched_sizes=True
91
+ )
92
+
93
+ # Configure with Hub push
94
+ training_args = TrainingArguments(
95
+ output_dir="my-detector",
96
+ num_train_epochs=10,
97
+ per_device_train_batch_size=8,
98
+
99
+ # ✅ CRITICAL: Hub push configuration
100
+ push_to_hub=True,
101
+ hub_model_id="myusername/cppe5-detector",
102
+
103
+ # Optional: Push strategy
104
+ hub_strategy="checkpoint", # Push checkpoints during training
105
+ )
106
+
107
+ # ✅ CRITICAL: Authenticate with Hub BEFORE creating Trainer
108
+ from huggingface_hub import login
109
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("hfjob")
110
+ if hf_token:
111
+ login(token=hf_token)
112
+ training_args.hub_token = hf_token
113
+ elif training_args.push_to_hub:
114
+ raise ValueError("HF_TOKEN not found! Add secrets={'HF_TOKEN': '$HF_TOKEN'} to job config.")
115
+
116
+ # Define collate function
117
+ def collate_fn(batch):
118
+ pixel_values = [item["pixel_values"] for item in batch]
119
+ labels = [item["labels"] for item in batch]
120
+ encoding = image_processor.pad(pixel_values, return_tensors="pt")
121
+ return {
122
+ "pixel_values": encoding["pixel_values"],
123
+ "labels": labels
124
+ }
125
+
126
+ trainer = Trainer(
127
+ model=model,
128
+ args=training_args,
129
+ train_dataset=dataset,
130
+ data_collator=collate_fn,
131
+ )
132
+
133
+ trainer.train()
134
+
135
+ # ✅ Push final model and processor
136
+ trainer.push_to_hub()
137
+ image_processor.push_to_hub("myusername/cppe5-detector")
138
+
139
+ print("✅ Model saved to: https://huggingface.co/myusername/cppe5-detector")
140
+ ```
141
+
142
+ **Submit with authentication:**
143
+
144
+ ```python
145
+ hf_jobs("uv", {
146
+ "script": training_script_content, # Pass script content as a string, NOT a filename
147
+ "flavor": "a10g-large",
148
+ "timeout": "4h",
149
+ "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Required!
150
+ })
151
+ ```
152
+
153
+ ## What Gets Saved
154
+
155
+ When `push_to_hub=True`:
156
+
157
+ 1. **Model weights** - Final trained parameters
158
+ 2. **Image processor** - Associated preprocessing configuration
159
+ 3. **Configuration** - Model config (config.json) including:
160
+ - Number of labels/classes
161
+ - Architecture details (backbone, num_queries, etc.)
162
+ - Label mappings (id2label, label2id)
163
+ 4. **Training arguments** - Hyperparameters used
164
+ 5. **Model card** - Auto-generated documentation
165
+ 6. **Checkpoints** - If `save_strategy="steps"` enabled
166
+
167
+ ## Important: Save Image Processor
168
+
169
+ **Object detection models require the image processor to be saved separately:**
170
+
171
+ ```python
172
+ # After training completes
173
+ trainer.push_to_hub()
174
+
175
+ # ✅ Also push the image processor
176
+ image_processor.push_to_hub(
177
+ repo_id="username/model-name",
178
+ commit_message="Upload image processor"
179
+ )
180
+ ```
181
+
182
+ **Why this matters:**
183
+ - Models need specific image preprocessing (resizing, normalization)
184
+ - Image processor contains critical configuration
185
+ - Without it, model cannot be used for inference
186
+
187
+ ## Checkpoint Saving
188
+
189
+ Save intermediate checkpoints during training:
190
+
191
+ ```python
192
+ TrainingArguments(
193
+ output_dir="my-detector",
194
+ push_to_hub=True,
195
+ hub_model_id="username/my-detector",
196
+
197
+ # Checkpoint configuration
198
+ save_strategy="steps",
199
+ save_steps=500, # Save every 500 steps
200
+ save_total_limit=3, # Keep only last 3 checkpoints
201
+ hub_strategy="checkpoint", # Push checkpoints to Hub
202
+ )
203
+ ```
204
+
205
+ **Benefits:**
206
+ - Resume training if job fails
207
+ - Compare checkpoint performance
208
+ - Use intermediate models
209
+ - Track training progress
210
+
211
+ **Checkpoints are pushed to:** `username/my-detector` (same repo)
212
+
213
+ ## Model Card Configuration
214
+
215
+ Add metadata for better discoverability:
216
+
217
+ ```python
218
+ # At the end of training script
219
+ model.push_to_hub(
220
+ "username/my-detector",
221
+ commit_message="Upload trained object detection model",
222
+ tags=["object-detection", "vision", "cppe-5"],
223
+ model_card_kwargs={
224
+ "license": "apache-2.0",
225
+ "dataset": "cppe-5",
226
+ "metrics": ["map", "recall", "precision"],
227
+ "pipeline_tag": "object-detection",
228
+ }
229
+ )
230
+ ```
231
+
232
+ ## Saving Label Mappings
233
+
234
+ **Critical for object detection:** Save class labels with the model:
235
+
236
+ ```python
237
+ # Define your label mappings
238
+ id2label = {0: "Coverall", 1: "Face_Shield", 2: "Gloves", 3: "Goggles", 4: "Mask"}
239
+ label2id = {v: k for k, v in id2label.items()}
240
+
241
+ # Update model config before training
242
+ model.config.id2label = id2label
243
+ model.config.label2id = label2id
244
+
245
+ # Now train and push
246
+ trainer.train()
247
+ trainer.push_to_hub()
248
+ ```
249
+
250
+ **Without label mappings:**
251
+ - Model outputs will be numeric IDs only
252
+ - No human-readable class names
253
+ - Difficult to interpret results
254
+
255
+ ## Authentication Methods
256
+
257
+ For a complete guide on token types, `$HF_TOKEN` automatic replacement, `secrets` vs `env` differences, and security best practices, see the `hugging-face-jobs` skill → *Token Usage Guide*.
258
+
259
+ **Recommended:** Always pass tokens via `secrets` (encrypted server-side):
260
+
261
+ ```python
262
+ "secrets": {"HF_TOKEN": "$HF_TOKEN"} # ✅ Automatic replacement with your logged-in token
263
+ ```
264
+
265
+ ## Verification Checklist
266
+
267
+ Before submitting any training job, verify:
268
+
269
+ - [ ] `push_to_hub=True` in TrainingArguments
270
+ - [ ] `hub_model_id` is specified (format: `username/model-name`)
271
+ - [ ] Image processor will be saved separately
272
+ - [ ] Label mappings (id2label, label2id) are configured
273
+ - [ ] Repository name doesn't conflict with existing repos
274
+ - [ ] You have write access to the target namespace
275
+
276
+ ## Repository Setup
277
+
278
+ ### Automatic Creation
279
+
280
+ If repository doesn't exist, it's created automatically when first pushing.
281
+
282
+ ### Manual Creation
283
+
284
+ Create repository before training:
285
+
286
+ ```python
287
+ from huggingface_hub import HfApi
288
+
289
+ api = HfApi()
290
+ api.create_repo(
291
+ repo_id="username/detector-name",
292
+ repo_type="model",
293
+ private=False, # or True for private repo
294
+ )
295
+ ```
296
+
297
+ ### Repository Naming
298
+
299
+ **Valid names:**
300
+ - `username/detr-cppe5`
301
+ - `username/yolos-object-detector`
302
+ - `organization/custom-detector`
303
+
304
+ **Invalid names:**
305
+ - `detector-name` (missing username)
306
+ - `username/detector name` (spaces not allowed)
307
+ - `username/DETECTOR` (uppercase discouraged)
308
+
309
+ **Recommended naming:**
310
+ - Include model architecture: `detr-`, `yolos-`, `deta-`
311
+ - Include dataset: `-cppe5`, `-coco`, `-voc`
312
+ - Be descriptive: `detr-resnet50-cppe5` > `model1`
313
+
314
+ ## Troubleshooting
315
+
316
+ ### Error: 401 Unauthorized
317
+
318
+ **Cause:** HF_TOKEN not provided, invalid, or not authenticated before Trainer init
319
+
320
+ **Solutions:**
321
+ 1. Verify `secrets={"HF_TOKEN": "$HF_TOKEN"}` in job config
322
+ 2. Verify script calls `login(token=hf_token)` AND sets `training_args.hub_token = hf_token` BEFORE creating the `Trainer`
323
+ 3. Check you're logged in locally: `hf auth whoami`
324
+ 4. Re-login: `hf auth login`
325
+
326
+ **Root cause:** The `Trainer` calls `create_repo(token=self.args.hub_token)` during `__init__()` when `push_to_hub=True`. Relying on implicit env-var token resolution is unreliable in Jobs. Calling `login()` saves the token globally, and setting `training_args.hub_token` ensures the Trainer passes it explicitly to all Hub API calls.
327
+
328
+ ### Error: 403 Forbidden
329
+
330
+ **Cause:** No write access to repository
331
+
332
+ **Solutions:**
333
+ 1. Check repository namespace matches your username
334
+ 2. Verify you're a member of organization (if using org namespace)
335
+ 3. Check repository isn't private (if accessing org repo)
336
+
337
+ ### Error: Repository not found
338
+
339
+ **Cause:** Repository doesn't exist and auto-creation failed
340
+
341
+ **Solutions:**
342
+ 1. Manually create repository first
343
+ 2. Check repository name format
344
+ 3. Verify namespace exists
345
+
346
+ ### Error: Push failed during training
347
+
348
+ **Cause:** Network issues or Hub unavailable
349
+
350
+ **Solutions:**
351
+ 1. Training continues but final push fails
352
+ 2. Checkpoints may be saved
353
+ 3. Re-run push manually after job completes
354
+
355
+ ### Issue: Model loads but inference fails
356
+
357
+ **Possible causes:**
358
+ 1. Image processor not saved—verify it's pushed separately
359
+ 2. Label mappings missing—check config.json has id2label
360
+ 3. Wrong image size—verify image processor matches training config
361
+
362
+ ### Issue: Model saved but not visible
363
+
364
+ **Possible causes:**
365
+ 1. Repository is private—check https://huggingface.co/username
366
+ 2. Wrong namespace—verify `hub_model_id` matches login
367
+ 3. Push still in progress—wait a few minutes
368
+
369
+ ## Manual Push After Training
370
+
371
+ If training completes but push fails, push manually:
372
+
373
+ ```python
374
+ from transformers import AutoModelForObjectDetection, AutoImageProcessor
375
+
376
+ # Load from local checkpoint
377
+ model = AutoModelForObjectDetection.from_pretrained("./output_dir")
378
+ image_processor = AutoImageProcessor.from_pretrained("./output_dir")
379
+
380
+ # Push to Hub
381
+ model.push_to_hub("username/model-name", token="hf_abc123...")
382
+ image_processor.push_to_hub("username/model-name", token="hf_abc123...")
383
+ ```
384
+
385
+ **Note:** Only possible if job hasn't completed (files still exist).
386
+
387
+ ## Best Practices
388
+
389
+ 1. **Always enable `push_to_hub=True`**
390
+ 2. **Save image processor separately** - critical for inference
391
+ 3. **Configure label mappings** before training
392
+ 4. **Use checkpoint saving** for long training runs
393
+ 5. **Verify Hub push** in logs before job completes
394
+ 6. **Set appropriate `save_total_limit`** to avoid excessive checkpoints
395
+ 7. **Use descriptive repo names** (e.g., `detr-cppe5` not `detector1`)
396
+ 8. **Add model card** with:
397
+ - Training dataset
398
+ - Evaluation metrics (mAP, IoU)
399
+ - Example usage code
400
+ - Limitations
401
+ 9. **Tag models appropriately**:
402
+ - `object-detection`
403
+ - Architecture: `detr`, `yolos`, `deta`
404
+ - Dataset: `coco`, `voc`, `cppe-5`
405
+
406
+ ## Monitoring Push Progress
407
+
408
+ Check logs for push progress:
409
+
410
+ ```python
411
+ hf_jobs("logs", {"job_id": "your-job-id"})
412
+ ```
413
+
414
+ **Look for:**
415
+ ```
416
+ Pushing model to username/detector-name...
417
+ Upload file pytorch_model.bin: 100%
418
+ ✅ Model pushed successfully
419
+ Pushing image processor...
420
+ ✅ Image processor pushed successfully
421
+ ```
422
+
423
+ ## Example: Full Production Setup
424
+
425
+ ```python
426
+ # production_detector.py
427
+ # /// script
428
+ # dependencies = [
429
+ # "transformers>=4.30.0",
430
+ # "torch>=2.0.0",
431
+ # "torchvision>=0.15.0",
432
+ # "datasets>=2.12.0",
433
+ # "evaluate>=0.4.0"
434
+ # ]
435
+ # ///
436
+
437
+ from transformers import (
438
+ AutoImageProcessor,
439
+ AutoModelForObjectDetection,
440
+ TrainingArguments,
441
+ Trainer
442
+ )
443
+ from datasets import load_dataset
444
+ import os
445
+ import torch
446
+
447
+ # Configuration
448
+ MODEL_NAME = "facebook/detr-resnet-50"
449
+ DATASET_NAME = "cppe-5"
450
+ HUB_MODEL_ID = "myusername/detr-cppe5-detector"
451
+ NUM_CLASSES = 5
452
+
453
+ # Class labels
454
+ id2label = {0: "Coverall", 1: "Face_Shield", 2: "Gloves", 3: "Goggles", 4: "Mask"}
455
+ label2id = {v: k for k, v in id2label.items()}
456
+
457
+ print(f"🔧 Loading dataset: {DATASET_NAME}")
458
+ dataset = load_dataset(DATASET_NAME, split="train")
459
+ print(f"✅ Dataset loaded: {len(dataset)} examples")
460
+
461
+ print(f"🔧 Loading model: {MODEL_NAME}")
462
+ image_processor = AutoImageProcessor.from_pretrained(MODEL_NAME)
463
+ model = AutoModelForObjectDetection.from_pretrained(
464
+ MODEL_NAME,
465
+ num_labels=NUM_CLASSES,
466
+ id2label=id2label,
467
+ label2id=label2id,
468
+ ignore_mismatched_sizes=True
469
+ )
470
+ print("✅ Model loaded")
471
+
472
+ # Configure with comprehensive Hub settings
473
+ training_args = TrainingArguments(
474
+ output_dir="detr-cppe5",
475
+
476
+ # Hub configuration
477
+ push_to_hub=True,
478
+ hub_model_id=HUB_MODEL_ID,
479
+ hub_strategy="checkpoint", # Push checkpoints
480
+
481
+ # Checkpoint configuration
482
+ save_strategy="steps",
483
+ save_steps=500,
484
+ save_total_limit=3,
485
+
486
+ # Training settings
487
+ num_train_epochs=10,
488
+ per_device_train_batch_size=8,
489
+ gradient_accumulation_steps=2,
490
+ learning_rate=1e-4,
491
+ warmup_steps=500,
492
+
493
+ # Evaluation
494
+ eval_strategy="steps",
495
+ eval_steps=500,
496
+
497
+ # Logging
498
+ logging_steps=50,
499
+ logging_first_step=True,
500
+
501
+ # Performance
502
+ fp16=True, # Mixed precision training
503
+ dataloader_num_workers=4,
504
+ )
505
+
506
+ # ✅ CRITICAL: Authenticate with Hub BEFORE creating Trainer
507
+ # login() saves the token globally so ALL hub operations can find it.
508
+ from huggingface_hub import login
509
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("hfjob")
510
+ if hf_token:
511
+ login(token=hf_token)
512
+ training_args.hub_token = hf_token
513
+ elif training_args.push_to_hub:
514
+ raise ValueError("HF_TOKEN not found! Add secrets={'HF_TOKEN': '$HF_TOKEN'} to job config.")
515
+
516
+ # Data collator
517
+ def collate_fn(batch):
518
+ pixel_values = [item["pixel_values"] for item in batch]
519
+ labels = [item["labels"] for item in batch]
520
+ encoding = image_processor.pad(pixel_values, return_tensors="pt")
521
+ return {
522
+ "pixel_values": encoding["pixel_values"],
523
+ "labels": labels
524
+ }
525
+
526
+ # Create trainer
527
+ trainer = Trainer(
528
+ model=model,
529
+ args=training_args,
530
+ train_dataset=dataset,
531
+ data_collator=collate_fn,
532
+ )
533
+
534
+ print("🚀 Starting training...")
535
+ trainer.train()
536
+
537
+ print("💾 Pushing final model to Hub...")
538
+ trainer.push_to_hub(
539
+ commit_message="Upload trained DETR model on CPPE-5",
540
+ tags=["object-detection", "detr", "cppe-5", "vision"],
541
+ )
542
+
543
+ print("💾 Pushing image processor to Hub...")
544
+ image_processor.push_to_hub(
545
+ repo_id=HUB_MODEL_ID,
546
+ commit_message="Upload image processor"
547
+ )
548
+
549
+ print("✅ Training complete!")
550
+ print(f"Model available at: https://huggingface.co/{HUB_MODEL_ID}")
551
+ print(f"\nTo use your model:")
552
+ print(f"```python")
553
+ print(f"from transformers import AutoImageProcessor, AutoModelForObjectDetection")
554
+ print(f"")
555
+ print(f"processor = AutoImageProcessor.from_pretrained('{HUB_MODEL_ID}')")
556
+ print(f"model = AutoModelForObjectDetection.from_pretrained('{HUB_MODEL_ID}')")
557
+ print(f"```")
558
+ ```
559
+
560
+ **Submit:**
561
+
562
+ ```python
563
+ hf_jobs("uv", {
564
+ "script": training_script_content, # Pass script content as a string, NOT a filename
565
+ "flavor": "a10g-large",
566
+ "timeout": "8h",
567
+ "secrets": {"HF_TOKEN": "$HF_TOKEN"}
568
+ })
569
+ ```
570
+
571
+ ## Inference Example
572
+
573
+ After training, use your model:
574
+
575
+ ```python
576
+ from transformers import AutoImageProcessor, AutoModelForObjectDetection
577
+ from PIL import Image
578
+ import torch
579
+
580
+ # Load model from Hub
581
+ processor = AutoImageProcessor.from_pretrained("username/detr-cppe5-detector")
582
+ model = AutoModelForObjectDetection.from_pretrained("username/detr-cppe5-detector")
583
+
584
+ # Load and process image
585
+ image = Image.open("test_image.jpg")
586
+ inputs = processor(images=image, return_tensors="pt")
587
+
588
+ # Run inference
589
+ with torch.no_grad():
590
+ outputs = model(**inputs)
591
+
592
+ # Post-process results
593
+ target_sizes = torch.tensor([image.size[::-1]])
594
+ results = processor.post_process_object_detection(
595
+ outputs,
596
+ threshold=0.5,
597
+ target_sizes=target_sizes
598
+ )[0]
599
+
600
+ # Print detections
601
+ for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
602
+ box = [round(i, 2) for i in box.tolist()]
603
+ print(
604
+ f"Detected {model.config.id2label[label.item()]} with confidence "
605
+ f"{round(score.item(), 3)} at location {box}"
606
+ )
607
+ ```
608
+
609
+ ## Key Takeaway
610
+
611
+ **Without `push_to_hub=True` and `secrets={"HF_TOKEN": "$HF_TOKEN"}`, all training results are permanently lost.**
612
+
613
+ **For object detection, also remember to:**
614
+ 1. Save the image processor separately
615
+ 2. Configure label mappings (id2label, label2id)
616
+ 3. Include appropriate model card metadata
617
+
618
+ Always verify all three are configured before submitting any training job.