opencode-skills-antigravity 1.0.39 → 1.0.41

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (91) hide show
  1. package/bundled-skills/.antigravity-install-manifest.json +10 -1
  2. package/bundled-skills/docs/integrations/jetski-cortex.md +3 -3
  3. package/bundled-skills/docs/integrations/jetski-gemini-loader/README.md +1 -1
  4. package/bundled-skills/docs/maintainers/repo-growth-seo.md +3 -3
  5. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.csv +34 -0
  6. package/bundled-skills/docs/maintainers/security-findings-triage-2026-03-29-refresh.md +2 -0
  7. package/bundled-skills/docs/maintainers/skills-update-guide.md +1 -1
  8. package/bundled-skills/docs/sources/sources.md +2 -2
  9. package/bundled-skills/docs/users/bundles.md +1 -1
  10. package/bundled-skills/docs/users/claude-code-skills.md +1 -1
  11. package/bundled-skills/docs/users/gemini-cli-skills.md +1 -1
  12. package/bundled-skills/docs/users/getting-started.md +1 -1
  13. package/bundled-skills/docs/users/kiro-integration.md +1 -1
  14. package/bundled-skills/docs/users/usage.md +4 -4
  15. package/bundled-skills/docs/users/visual-guide.md +4 -4
  16. package/bundled-skills/hugging-face-cli/SKILL.md +192 -195
  17. package/bundled-skills/hugging-face-community-evals/SKILL.md +213 -0
  18. package/bundled-skills/hugging-face-community-evals/examples/.env.example +3 -0
  19. package/bundled-skills/hugging-face-community-evals/examples/USAGE_EXAMPLES.md +101 -0
  20. package/bundled-skills/hugging-face-community-evals/scripts/inspect_eval_uv.py +104 -0
  21. package/bundled-skills/hugging-face-community-evals/scripts/inspect_vllm_uv.py +306 -0
  22. package/bundled-skills/hugging-face-community-evals/scripts/lighteval_vllm_uv.py +297 -0
  23. package/bundled-skills/hugging-face-dataset-viewer/SKILL.md +120 -120
  24. package/bundled-skills/hugging-face-gradio/SKILL.md +304 -0
  25. package/bundled-skills/hugging-face-gradio/examples.md +613 -0
  26. package/bundled-skills/hugging-face-jobs/SKILL.md +25 -18
  27. package/bundled-skills/hugging-face-jobs/index.html +216 -0
  28. package/bundled-skills/hugging-face-jobs/references/hardware_guide.md +336 -0
  29. package/bundled-skills/hugging-face-jobs/references/hub_saving.md +352 -0
  30. package/bundled-skills/hugging-face-jobs/references/token_usage.md +570 -0
  31. package/bundled-skills/hugging-face-jobs/references/troubleshooting.md +475 -0
  32. package/bundled-skills/hugging-face-jobs/scripts/cot-self-instruct.py +718 -0
  33. package/bundled-skills/hugging-face-jobs/scripts/finepdfs-stats.py +546 -0
  34. package/bundled-skills/hugging-face-jobs/scripts/generate-responses.py +587 -0
  35. package/bundled-skills/hugging-face-model-trainer/SKILL.md +11 -12
  36. package/bundled-skills/hugging-face-model-trainer/references/gguf_conversion.md +296 -0
  37. package/bundled-skills/hugging-face-model-trainer/references/hardware_guide.md +283 -0
  38. package/bundled-skills/hugging-face-model-trainer/references/hub_saving.md +364 -0
  39. package/bundled-skills/hugging-face-model-trainer/references/local_training_macos.md +231 -0
  40. package/bundled-skills/hugging-face-model-trainer/references/reliability_principles.md +371 -0
  41. package/bundled-skills/hugging-face-model-trainer/references/trackio_guide.md +189 -0
  42. package/bundled-skills/hugging-face-model-trainer/references/training_methods.md +150 -0
  43. package/bundled-skills/hugging-face-model-trainer/references/training_patterns.md +203 -0
  44. package/bundled-skills/hugging-face-model-trainer/references/troubleshooting.md +282 -0
  45. package/bundled-skills/hugging-face-model-trainer/references/unsloth.md +313 -0
  46. package/bundled-skills/hugging-face-model-trainer/scripts/convert_to_gguf.py +424 -0
  47. package/bundled-skills/hugging-face-model-trainer/scripts/dataset_inspector.py +417 -0
  48. package/bundled-skills/hugging-face-model-trainer/scripts/estimate_cost.py +150 -0
  49. package/bundled-skills/hugging-face-model-trainer/scripts/train_dpo_example.py +106 -0
  50. package/bundled-skills/hugging-face-model-trainer/scripts/train_grpo_example.py +89 -0
  51. package/bundled-skills/hugging-face-model-trainer/scripts/train_sft_example.py +122 -0
  52. package/bundled-skills/hugging-face-model-trainer/scripts/unsloth_sft_example.py +512 -0
  53. package/bundled-skills/hugging-face-paper-publisher/SKILL.md +11 -4
  54. package/bundled-skills/hugging-face-paper-publisher/examples/example_usage.md +326 -0
  55. package/bundled-skills/hugging-face-paper-publisher/references/quick_reference.md +216 -0
  56. package/bundled-skills/hugging-face-paper-publisher/scripts/paper_manager.py +606 -0
  57. package/bundled-skills/hugging-face-paper-publisher/templates/arxiv.md +299 -0
  58. package/bundled-skills/hugging-face-paper-publisher/templates/ml-report.md +358 -0
  59. package/bundled-skills/hugging-face-paper-publisher/templates/modern.md +319 -0
  60. package/bundled-skills/hugging-face-paper-publisher/templates/standard.md +201 -0
  61. package/bundled-skills/hugging-face-papers/SKILL.md +241 -0
  62. package/bundled-skills/hugging-face-trackio/.claude-plugin/plugin.json +19 -0
  63. package/bundled-skills/hugging-face-trackio/SKILL.md +117 -0
  64. package/bundled-skills/hugging-face-trackio/references/alerts.md +196 -0
  65. package/bundled-skills/hugging-face-trackio/references/logging_metrics.md +206 -0
  66. package/bundled-skills/hugging-face-trackio/references/retrieving_metrics.md +251 -0
  67. package/bundled-skills/hugging-face-vision-trainer/SKILL.md +595 -0
  68. package/bundled-skills/hugging-face-vision-trainer/references/finetune_sam2_trainer.md +254 -0
  69. package/bundled-skills/hugging-face-vision-trainer/references/hub_saving.md +618 -0
  70. package/bundled-skills/hugging-face-vision-trainer/references/image_classification_training_notebook.md +279 -0
  71. package/bundled-skills/hugging-face-vision-trainer/references/object_detection_training_notebook.md +700 -0
  72. package/bundled-skills/hugging-face-vision-trainer/references/reliability_principles.md +310 -0
  73. package/bundled-skills/hugging-face-vision-trainer/references/timm_trainer.md +91 -0
  74. package/bundled-skills/hugging-face-vision-trainer/scripts/dataset_inspector.py +814 -0
  75. package/bundled-skills/hugging-face-vision-trainer/scripts/estimate_cost.py +217 -0
  76. package/bundled-skills/hugging-face-vision-trainer/scripts/image_classification_training.py +383 -0
  77. package/bundled-skills/hugging-face-vision-trainer/scripts/object_detection_training.py +710 -0
  78. package/bundled-skills/hugging-face-vision-trainer/scripts/sam_segmentation_training.py +382 -0
  79. package/bundled-skills/jq/SKILL.md +273 -0
  80. package/bundled-skills/odoo-edi-connector/SKILL.md +32 -10
  81. package/bundled-skills/odoo-woocommerce-bridge/SKILL.md +9 -5
  82. package/bundled-skills/tmux/SKILL.md +370 -0
  83. package/bundled-skills/transformers-js/SKILL.md +639 -0
  84. package/bundled-skills/transformers-js/references/CACHE.md +339 -0
  85. package/bundled-skills/transformers-js/references/CONFIGURATION.md +390 -0
  86. package/bundled-skills/transformers-js/references/EXAMPLES.md +605 -0
  87. package/bundled-skills/transformers-js/references/MODEL_ARCHITECTURES.md +167 -0
  88. package/bundled-skills/transformers-js/references/PIPELINE_OPTIONS.md +545 -0
  89. package/bundled-skills/transformers-js/references/TEXT_GENERATION.md +315 -0
  90. package/bundled-skills/viboscope/SKILL.md +64 -0
  91. package/package.json +1 -1
@@ -0,0 +1,382 @@
1
+ # /// script
2
+ # dependencies = [
3
+ # "transformers>=5.2.0",
4
+ # "accelerate>=1.1.0",
5
+ # "datasets>=4.0",
6
+ # "torchvision",
7
+ # "monai",
8
+ # "trackio",
9
+ # "huggingface_hub",
10
+ # ]
11
+ # ///
12
+
13
+ """Fine-tune SAM or SAM2 for segmentation using bounding-box or point prompts with the HF Trainer API."""
14
+
15
+ import json
16
+ import logging
17
+ import math
18
+ import os
19
+ import sys
20
+ from dataclasses import dataclass, field
21
+ from typing import Any
22
+
23
+ import numpy as np
24
+ import torch
25
+ import torch.nn.functional as F
26
+ from datasets import load_dataset
27
+ from torch.utils.data import Dataset
28
+
29
+ import monai
30
+ import trackio
31
+
32
+ import transformers
33
+ from transformers import (
34
+ HfArgumentParser,
35
+ Trainer,
36
+ TrainingArguments,
37
+ )
38
+ from transformers.utils import check_min_version
39
+
40
+ logger = logging.getLogger(__name__)
41
+
42
+ check_min_version("4.57.0.dev0")
43
+
44
+
45
+ # ---------------------------------------------------------------------------
46
+ # Dataset wrapper
47
+ # ---------------------------------------------------------------------------
48
+
49
+ class SAMSegmentationDataset(Dataset):
50
+ """Wraps a HF dataset into the format expected by SAM/SAM2 processors.
51
+
52
+ Each sample must contain an image, a binary mask, and a prompt (bbox or
53
+ point). Prompts are read from a JSON-encoded ``prompt`` column or from
54
+ dedicated ``bbox`` / ``point`` columns.
55
+ """
56
+
57
+ def __init__(self, dataset, processor, prompt_type: str,
58
+ image_col: str, mask_col: str, prompt_col: str | None,
59
+ bbox_col: str | None, point_col: str | None):
60
+ self.dataset = dataset
61
+ self.processor = processor
62
+ self.prompt_type = prompt_type
63
+ self.image_col = image_col
64
+ self.mask_col = mask_col
65
+ self.prompt_col = prompt_col
66
+ self.bbox_col = bbox_col
67
+ self.point_col = point_col
68
+
69
+ def __len__(self):
70
+ return len(self.dataset)
71
+
72
+ def _extract_prompt(self, item):
73
+ if self.prompt_col and self.prompt_col in item:
74
+ raw = item[self.prompt_col]
75
+ parsed = json.loads(raw) if isinstance(raw, str) else raw
76
+ if self.prompt_type == "bbox":
77
+ return parsed.get("bbox") or parsed.get("box")
78
+ return parsed.get("point") or parsed.get("points")
79
+
80
+ if self.prompt_type == "bbox" and self.bbox_col:
81
+ return item[self.bbox_col]
82
+ if self.prompt_type == "point" and self.point_col:
83
+ return item[self.point_col]
84
+ raise ValueError("Could not extract prompt from sample")
85
+
86
+ def __getitem__(self, idx):
87
+ item = self.dataset[idx]
88
+ image = item[self.image_col]
89
+ prompt = self._extract_prompt(item)
90
+
91
+ if self.prompt_type == "bbox":
92
+ inputs = self.processor(image, input_boxes=[[prompt]], return_tensors="pt")
93
+ else:
94
+ if isinstance(prompt[0], (int, float)):
95
+ prompt = [prompt]
96
+ inputs = self.processor(image, input_points=[[prompt]], return_tensors="pt")
97
+
98
+ mask = np.array(item[self.mask_col])
99
+ if mask.ndim == 3:
100
+ mask = mask[:, :, 0]
101
+ inputs["labels"] = (mask > 0).astype(np.float32)
102
+ inputs["original_image_size"] = torch.tensor(image.size[::-1])
103
+ return inputs
104
+
105
+
106
+ def collate_fn(batch):
107
+ pixel_values = torch.cat([item["pixel_values"] for item in batch], dim=0)
108
+ original_sizes = torch.stack([item["original_sizes"] for item in batch])
109
+ original_image_size = torch.stack([item["original_image_size"] for item in batch])
110
+
111
+ has_boxes = "input_boxes" in batch[0]
112
+ has_points = "input_points" in batch[0]
113
+
114
+ labels = torch.cat(
115
+ [
116
+ F.interpolate(
117
+ torch.as_tensor(x["labels"]).unsqueeze(0).unsqueeze(0).float(),
118
+ size=(256, 256),
119
+ mode="nearest",
120
+ )
121
+ for x in batch
122
+ ],
123
+ dim=0,
124
+ ).long()
125
+
126
+ result = {
127
+ "pixel_values": pixel_values,
128
+ "original_sizes": original_sizes,
129
+ "labels": labels,
130
+ "original_image_size": original_image_size,
131
+ "multimask_output": False,
132
+ }
133
+
134
+ if has_boxes:
135
+ result["input_boxes"] = torch.cat([item["input_boxes"] for item in batch], dim=0)
136
+ if has_points:
137
+ result["input_points"] = torch.cat([item["input_points"] for item in batch], dim=0)
138
+ if "input_labels" in batch[0]:
139
+ result["input_labels"] = torch.cat([item["input_labels"] for item in batch], dim=0)
140
+
141
+ return result
142
+
143
+
144
+ # ---------------------------------------------------------------------------
145
+ # Custom loss (SAM/SAM2 don't compute loss in forward())
146
+ # ---------------------------------------------------------------------------
147
+
148
+ seg_loss = monai.losses.DiceCELoss(sigmoid=True, squared_pred=True, reduction="mean")
149
+
150
+
151
+ def compute_loss(outputs, labels, num_items_in_batch=None):
152
+ predicted_masks = outputs.pred_masks.squeeze(1)
153
+ return seg_loss(predicted_masks, labels.float())
154
+
155
+
156
+ # ---------------------------------------------------------------------------
157
+ # CLI arguments
158
+ # ---------------------------------------------------------------------------
159
+
160
+ @dataclass
161
+ class DataTrainingArguments:
162
+ dataset_name: str = field(
163
+ default="merve/MicroMat-mini",
164
+ metadata={"help": "Hub dataset ID."},
165
+ )
166
+ dataset_config_name: str | None = field(
167
+ default=None,
168
+ metadata={"help": "Dataset config name."},
169
+ )
170
+ train_val_split: float | None = field(
171
+ default=0.1,
172
+ metadata={"help": "Fraction to split off for validation (used when no validation split exists)."},
173
+ )
174
+ max_train_samples: int | None = field(
175
+ default=None,
176
+ metadata={"help": "Truncate training set (for quick tests)."},
177
+ )
178
+ max_eval_samples: int | None = field(
179
+ default=None,
180
+ metadata={"help": "Truncate evaluation set."},
181
+ )
182
+ image_column_name: str = field(
183
+ default="image",
184
+ metadata={"help": "Column containing PIL images."},
185
+ )
186
+ mask_column_name: str = field(
187
+ default="mask",
188
+ metadata={"help": "Column containing ground-truth binary masks."},
189
+ )
190
+ prompt_column_name: str | None = field(
191
+ default="prompt",
192
+ metadata={"help": "Column with JSON-encoded prompt (bbox/point). Set to '' to disable."},
193
+ )
194
+ bbox_column_name: str | None = field(
195
+ default=None,
196
+ metadata={"help": "Column with bbox prompt ([x0,y0,x1,y1]). Used when prompt_column_name is unset."},
197
+ )
198
+ point_column_name: str | None = field(
199
+ default=None,
200
+ metadata={"help": "Column with point prompt ([x,y] or [[x,y],...]). Used when prompt_column_name is unset."},
201
+ )
202
+ prompt_type: str = field(
203
+ default="bbox",
204
+ metadata={"help": "Prompt type: 'bbox' or 'point'."},
205
+ )
206
+
207
+
208
+ @dataclass
209
+ class ModelArguments:
210
+ model_name_or_path: str = field(
211
+ default="facebook/sam2.1-hiera-small",
212
+ metadata={"help": "Pretrained SAM/SAM2 model identifier."},
213
+ )
214
+ cache_dir: str | None = field(default=None, metadata={"help": "Cache directory."})
215
+ model_revision: str = field(default="main", metadata={"help": "Model revision."})
216
+ token: str | None = field(default=None, metadata={"help": "Auth token."})
217
+ trust_remote_code: bool = field(default=False, metadata={"help": "Trust remote code."})
218
+ freeze_vision_encoder: bool = field(
219
+ default=True,
220
+ metadata={"help": "Freeze vision encoder weights."},
221
+ )
222
+ freeze_prompt_encoder: bool = field(
223
+ default=True,
224
+ metadata={"help": "Freeze prompt encoder weights."},
225
+ )
226
+
227
+
228
+ # ---------------------------------------------------------------------------
229
+ # Main
230
+ # ---------------------------------------------------------------------------
231
+
232
+ def main():
233
+ parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments))
234
+ parser.set_defaults(per_device_train_batch_size=4, num_train_epochs=30)
235
+ if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
236
+ model_args, data_args, training_args = parser.parse_json_file(
237
+ json_file=os.path.abspath(sys.argv[1])
238
+ )
239
+ else:
240
+ model_args, data_args, training_args = parser.parse_args_into_dataclasses()
241
+
242
+ from huggingface_hub import login
243
+ hf_token = os.environ.get("HF_TOKEN") or os.environ.get("hfjob")
244
+ if hf_token:
245
+ login(token=hf_token)
246
+ training_args.hub_token = hf_token
247
+ logger.info("Logged in to Hugging Face Hub")
248
+ elif training_args.push_to_hub:
249
+ logger.warning("HF_TOKEN not found in environment. Hub push will likely fail.")
250
+
251
+ trackio.init(project=training_args.output_dir, name=training_args.run_name)
252
+
253
+ logging.basicConfig(
254
+ format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
255
+ datefmt="%m/%d/%Y %H:%M:%S",
256
+ handlers=[logging.StreamHandler(sys.stdout)],
257
+ )
258
+ if training_args.should_log:
259
+ transformers.utils.logging.set_verbosity_info()
260
+
261
+ log_level = training_args.get_process_log_level()
262
+ logger.setLevel(log_level)
263
+ transformers.utils.logging.set_verbosity(log_level)
264
+ transformers.utils.logging.enable_default_handler()
265
+ transformers.utils.logging.enable_explicit_format()
266
+
267
+ logger.info(f"Training/evaluation parameters {training_args}")
268
+
269
+ # ---- Load dataset ----
270
+ dataset = load_dataset(
271
+ data_args.dataset_name,
272
+ data_args.dataset_config_name,
273
+ cache_dir=model_args.cache_dir,
274
+ trust_remote_code=model_args.trust_remote_code,
275
+ )
276
+
277
+ if "train" not in dataset:
278
+ if len(dataset.keys()) == 1:
279
+ only_split = list(dataset.keys())[0]
280
+ dataset[only_split] = dataset[only_split].shuffle(seed=training_args.seed)
281
+ dataset = dataset[only_split].train_test_split(test_size=data_args.train_val_split or 0.1)
282
+ dataset = {"train": dataset["train"], "validation": dataset["test"]}
283
+ else:
284
+ raise ValueError(f"No 'train' split found. Available: {list(dataset.keys())}")
285
+ elif "validation" not in dataset and "test" not in dataset:
286
+ dataset["train"] = dataset["train"].shuffle(seed=training_args.seed)
287
+ split = dataset["train"].train_test_split(
288
+ test_size=data_args.train_val_split or 0.1, seed=training_args.seed
289
+ )
290
+ dataset["train"] = split["train"]
291
+ dataset["validation"] = split["test"]
292
+
293
+ if data_args.max_train_samples is not None:
294
+ n = min(data_args.max_train_samples, len(dataset["train"]))
295
+ dataset["train"] = dataset["train"].select(range(n))
296
+ logger.info(f"Truncated training set to {n} samples")
297
+ eval_key = "validation" if "validation" in dataset else "test"
298
+ if data_args.max_eval_samples is not None and eval_key in dataset:
299
+ n = min(data_args.max_eval_samples, len(dataset[eval_key]))
300
+ dataset[eval_key] = dataset[eval_key].select(range(n))
301
+ logger.info(f"Truncated eval set to {n} samples")
302
+
303
+ # ---- Detect model family (SAM vs SAM2) and load processor/model ----
304
+ model_id = model_args.model_name_or_path.lower()
305
+ is_sam2 = "sam2" in model_id
306
+
307
+ if is_sam2:
308
+ from transformers import Sam2Processor, Sam2Model
309
+ processor = Sam2Processor.from_pretrained(model_args.model_name_or_path)
310
+ model = Sam2Model.from_pretrained(model_args.model_name_or_path)
311
+ else:
312
+ from transformers import SamProcessor, SamModel
313
+ processor = SamProcessor.from_pretrained(model_args.model_name_or_path)
314
+ model = SamModel.from_pretrained(model_args.model_name_or_path)
315
+
316
+ if model_args.freeze_vision_encoder:
317
+ for name, param in model.named_parameters():
318
+ if name.startswith("vision_encoder"):
319
+ param.requires_grad_(False)
320
+ if model_args.freeze_prompt_encoder:
321
+ for name, param in model.named_parameters():
322
+ if name.startswith("prompt_encoder"):
323
+ param.requires_grad_(False)
324
+
325
+ trainable = sum(p.numel() for p in model.parameters() if p.requires_grad)
326
+ total = sum(p.numel() for p in model.parameters())
327
+ logger.info(f"Trainable params: {trainable:,} / {total:,} ({100 * trainable / total:.1f}%)")
328
+
329
+ # ---- Build datasets ----
330
+ prompt_col = data_args.prompt_column_name if data_args.prompt_column_name else None
331
+ ds_kwargs = dict(
332
+ processor=processor,
333
+ prompt_type=data_args.prompt_type,
334
+ image_col=data_args.image_column_name,
335
+ mask_col=data_args.mask_column_name,
336
+ prompt_col=prompt_col,
337
+ bbox_col=data_args.bbox_column_name,
338
+ point_col=data_args.point_column_name,
339
+ )
340
+
341
+ train_dataset = SAMSegmentationDataset(dataset=dataset["train"], **ds_kwargs)
342
+ eval_dataset = None
343
+ if eval_key in dataset:
344
+ eval_dataset = SAMSegmentationDataset(dataset=dataset[eval_key], **ds_kwargs)
345
+
346
+ # ---- Train ----
347
+ trainer = Trainer(
348
+ model=model,
349
+ args=training_args,
350
+ train_dataset=train_dataset if training_args.do_train else None,
351
+ eval_dataset=eval_dataset if training_args.do_eval else None,
352
+ data_collator=collate_fn,
353
+ compute_loss_func=compute_loss,
354
+ )
355
+
356
+ if training_args.do_train:
357
+ train_result = trainer.train(resume_from_checkpoint=training_args.resume_from_checkpoint)
358
+ trainer.save_model()
359
+ trainer.log_metrics("train", train_result.metrics)
360
+ trainer.save_metrics("train", train_result.metrics)
361
+ trainer.save_state()
362
+
363
+ if training_args.do_eval and eval_dataset is not None:
364
+ metrics = trainer.evaluate()
365
+ trainer.log_metrics("eval", metrics)
366
+ trainer.save_metrics("eval", metrics)
367
+
368
+ trackio.finish()
369
+
370
+ kwargs = {
371
+ "finetuned_from": model_args.model_name_or_path,
372
+ "dataset": data_args.dataset_name,
373
+ "tags": ["image-segmentation", "vision", "sam"],
374
+ }
375
+ if training_args.push_to_hub:
376
+ trainer.push_to_hub(**kwargs)
377
+ else:
378
+ trainer.create_model_card(**kwargs)
379
+
380
+
381
+ if __name__ == "__main__":
382
+ main()
@@ -0,0 +1,273 @@
1
+ ---
2
+ name: jq
3
+ description: "Expert jq usage for JSON querying, filtering, transformation, and pipeline integration. Practical patterns for real shell workflows."
4
+ category: development
5
+ risk: safe
6
+ source: community
7
+ date_added: "2026-03-28"
8
+ author: kostakost2
9
+ tags: [jq, json, shell, cli, data-transformation, bash]
10
+ tools: [claude, cursor, gemini]
11
+ ---
12
+
13
+ # jq — JSON Querying and Transformation
14
+
15
+ ## Overview
16
+
17
+ `jq` is the standard CLI tool for querying and reshaping JSON. This skill covers practical, expert-level usage: filtering deeply nested data, transforming structures, aggregating values, and composing `jq` into shell pipelines. Every example is copy-paste ready for real workflows.
18
+
19
+ ## When to Use This Skill
20
+
21
+ - Use when parsing JSON output from APIs, CLI tools (AWS, GitHub, kubectl, docker), or log files
22
+ - Use when transforming JSON structure (rename keys, flatten arrays, group records)
23
+ - Use when the user needs `jq` inside a bash script or one-liner
24
+ - Use when explaining what a complex `jq` expression does
25
+
26
+ ## How It Works
27
+
28
+ `jq` takes a filter expression and applies it to JSON input. Filters compose with pipes (`|`), and `jq` handles arrays, objects, strings, numbers, booleans, and `null` natively.
29
+
30
+ ### Basic Selection
31
+
32
+ ```bash
33
+ # Extract a field
34
+ echo '{"name":"alice","age":30}' | jq '.name'
35
+ # "alice"
36
+
37
+ # Nested access
38
+ echo '{"user":{"email":"a@b.com"}}' | jq '.user.email'
39
+
40
+ # Array index
41
+ echo '[10, 20, 30]' | jq '.[1]'
42
+ # 20
43
+
44
+ # Array slice
45
+ echo '[1,2,3,4,5]' | jq '.[2:4]'
46
+ # [3, 4]
47
+
48
+ # All array elements
49
+ echo '[{"id":1},{"id":2}]' | jq '.[]'
50
+ ```
51
+
52
+ ### Filtering with `select`
53
+
54
+ ```bash
55
+ # Keep only matching elements
56
+ echo '[{"role":"admin"},{"role":"user"},{"role":"admin"}]' \
57
+ | jq '[.[] | select(.role == "admin")]'
58
+
59
+ # Numeric comparison
60
+ curl -s https://api.github.com/repos/owner/repo/issues \
61
+ | jq '[.[] | select(.comments > 5)]'
62
+
63
+ # Test a field exists and is non-null
64
+ jq '[.[] | select(.email != null)]'
65
+
66
+ # Combine conditions
67
+ jq '[.[] | select(.active == true and .score >= 80)]'
68
+ ```
69
+
70
+ ### Mapping and Transformation
71
+
72
+ ```bash
73
+ # Extract a field from every array element
74
+ echo '[{"name":"alice","age":30},{"name":"bob","age":25}]' \
75
+ | jq '[.[] | .name]'
76
+ # ["alice", "bob"]
77
+
78
+ # Shorthand: map()
79
+ jq 'map(.name)'
80
+
81
+ # Build a new object per element
82
+ jq '[.[] | {user: .name, years: .age}]'
83
+
84
+ # Add a computed field
85
+ jq '[.[] | . + {senior: (.age > 28)}]'
86
+
87
+ # Rename keys
88
+ jq '[.[] | {username: .name, email_address: .email}]'
89
+ ```
90
+
91
+ ### Aggregation and Reduce
92
+
93
+ ```bash
94
+ # Sum all values
95
+ echo '[1, 2, 3, 4, 5]' | jq 'add'
96
+ # 15
97
+
98
+ # Sum a field across objects
99
+ jq '[.[].price] | add'
100
+
101
+ # Count elements
102
+ jq 'length'
103
+
104
+ # Max / min
105
+ jq 'max_by(.score)'
106
+ jq 'min_by(.created_at)'
107
+
108
+ # reduce: custom accumulator
109
+ echo '[1,2,3,4,5]' | jq 'reduce .[] as $x (0; . + $x)'
110
+ # 15
111
+
112
+ # Group by field
113
+ jq 'group_by(.department)'
114
+
115
+ # Count per group
116
+ jq 'group_by(.status) | map({status: .[0].status, count: length})'
117
+ ```
118
+
119
+ ### String Interpolation and Formatting
120
+
121
+ ```bash
122
+ # String interpolation
123
+ jq -r '.[] | "\(.name) is \(.age) years old"'
124
+
125
+ # Format as CSV (no header)
126
+ jq -r '.[] | [.name, .age, .email] | @csv'
127
+
128
+ # Format as TSV
129
+ jq -r '.[] | [.name, .score] | @tsv'
130
+
131
+ # URL-encode a value
132
+ jq -r '.query | @uri'
133
+
134
+ # Base64 encode
135
+ jq -r '.data | @base64'
136
+ ```
137
+
138
+ ### Working with Keys and Paths
139
+
140
+ ```bash
141
+ # List all top-level keys
142
+ jq 'keys'
143
+
144
+ # Check if key exists
145
+ jq 'has("email")'
146
+
147
+ # Delete a key
148
+ jq 'del(.password)'
149
+
150
+ # Delete nested keys from every element
151
+ jq '[.[] | del(.internal_id, .raw_payload)]'
152
+
153
+ # Recursive descent: find all values for a key anywhere in tree
154
+ jq '.. | .id? // empty'
155
+
156
+ # Get all leaf paths
157
+ jq '[paths(scalars)]'
158
+ ```
159
+
160
+ ### Conditionals and Error Handling
161
+
162
+ ```bash
163
+ # if-then-else
164
+ jq 'if .score >= 90 then "A" elif .score >= 80 then "B" else "C" end'
165
+
166
+ # Alternative operator: use fallback if null or false
167
+ jq '.nickname // .name'
168
+
169
+ # try-catch: skip errors instead of halting
170
+ jq '[.[] | try .nested.value catch null]'
171
+
172
+ # Suppress null output with // empty
173
+ jq '.[] | .optional_field // empty'
174
+ ```
175
+
176
+ ### Practical Shell Integration
177
+
178
+ ```bash
179
+ # Read from file
180
+ jq '.users' data.json
181
+
182
+ # Compact output (no whitespace) for further piping
183
+ jq -c '.[]' records.json | while IFS= read -r record; do
184
+ echo "Processing: $record"
185
+ done
186
+
187
+ # Pass a shell variable into jq
188
+ STATUS="active"
189
+ jq --arg s "$STATUS" '[.[] | select(.status == $s)]'
190
+
191
+ # Pass a number
192
+ jq --argjson threshold 42 '[.[] | select(.value > $threshold)]'
193
+
194
+ # Slurp multiple JSON lines into an array
195
+ jq -s '.' records.ndjson
196
+
197
+ # Multiple files: slurp all into one array
198
+ jq -s 'add' file1.json file2.json
199
+
200
+ # Null-safe pipeline from a command
201
+ kubectl get pods -o json | jq '.items[] | {name: .metadata.name, status: .status.phase}'
202
+
203
+ # GitHub CLI: extract PR numbers
204
+ gh pr list --json number,title | jq -r '.[] | "\(.number)\t\(.title)"'
205
+
206
+ # AWS CLI: list running instance IDs
207
+ aws ec2 describe-instances \
208
+ | jq -r '.Reservations[].Instances[] | select(.State.Name=="running") | .InstanceId'
209
+
210
+ # Docker: show container names and images
211
+ docker inspect $(docker ps -q) | jq -r '.[] | "\(.Name)\t\(.Config.Image)"'
212
+ ```
213
+
214
+ ### Advanced Patterns
215
+
216
+ ```bash
217
+ # Transpose an object of arrays to an array of objects
218
+ # Input: {"names":["a","b"],"scores":[10,20]}
219
+ jq '[.names, .scores] | transpose | map({name: .[0], score: .[1]})'
220
+
221
+ # Flatten one level
222
+ jq 'flatten(1)'
223
+
224
+ # Unique by field
225
+ jq 'unique_by(.email)'
226
+
227
+ # Sort, deduplicate and re-index
228
+ jq '[.[] | .name] | unique | sort'
229
+
230
+ # Walk: apply transformation to every node recursively
231
+ jq 'walk(if type == "string" then ascii_downcase else . end)'
232
+
233
+ # env: read environment variables inside jq
234
+ export API_KEY=secret
235
+ jq -n 'env.API_KEY'
236
+ ```
237
+
238
+ ## Best Practices
239
+
240
+ - Always use `-r` (raw output) when passing `jq` results to shell variables or other commands to strip JSON string quotes
241
+ - Use `--arg` / `--argjson` to inject shell variables safely — never interpolate shell variables directly into filter strings
242
+ - Prefer `map(f)` over `[.[] | f]` for readability
243
+ - Use `-c` (compact) for newline-delimited JSON pipelines; omit it for human-readable debugging
244
+ - Test filters interactively with `jq -n` and literal input before embedding in scripts
245
+ - Use `empty` to drop unwanted elements rather than filtering to `null`
246
+
247
+ ## Security & Safety Notes
248
+
249
+ - `jq` is read-only by design — it cannot write files or execute commands
250
+ - Avoid embedding untrusted JSON field values directly into shell commands; always quote or use `--arg`
251
+
252
+ ## Common Pitfalls
253
+
254
+ - **Problem:** `jq` outputs `null` instead of the expected value
255
+ **Solution:** Check for typos in key names; use `keys` to inspect actual field names. Remember JSON is case-sensitive.
256
+
257
+ - **Problem:** Numbers are quoted as strings in the output
258
+ **Solution:** Use `--argjson` instead of `--arg` when injecting numeric values.
259
+
260
+ - **Problem:** Filter works in the terminal but fails in a script
261
+ **Solution:** Ensure the filter string uses single quotes in the shell to prevent variable expansion. Example: `jq '.field'` not `jq ".field"`.
262
+
263
+ - **Problem:** `add` returns `null` on an empty array
264
+ **Solution:** Use `add // 0` or `add // ""` to provide a fallback default.
265
+
266
+ - **Problem:** Streaming large files is slow
267
+ **Solution:** Use `jq --stream` or switch to `jstream`/`gron` for very large files.
268
+
269
+ ## Related Skills
270
+
271
+ - `@bash-pro` — Wrapping jq calls in robust shell scripts
272
+ - `@bash-linux` — General shell pipeline patterns
273
+ - `@github-automation` — Using jq with GitHub CLI JSON output