labelr 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labelr/apps/datasets.py CHANGED
@@ -152,7 +152,10 @@ def export(
152
152
  label_studio_url: Optional[str] = LABEL_STUDIO_DEFAULT_URL,
153
153
  output_dir: Annotated[
154
154
  Optional[Path],
155
- typer.Option(help="Path to the output directory", file_okay=False),
155
+ typer.Option(
156
+ help="Path to the output directory. Only used if the destintation (`to`) is `ultralytics`",
157
+ file_okay=False,
158
+ ),
156
159
  ] = None,
157
160
  dataset_dir: Annotated[
158
161
  Optional[Path],
@@ -200,11 +203,18 @@ def export(
200
203
  help="Use the AWS S3 cache for image downloads instead of images.openfoodfacts.org, "
201
204
  "it is ignored if the export format is not Ultralytics"
202
205
  ),
203
- ] = True,
206
+ ] = False,
204
207
  merge_labels: Annotated[
205
208
  bool,
206
209
  typer.Option(help="Merge multiple labels into a single label"),
207
210
  ] = False,
211
+ revision: Annotated[
212
+ str,
213
+ typer.Option(
214
+ help="Revision (branch, tag or commit) for the Hugging Face Datasets repository. "
215
+ "Only used when importing from or exporting to Hugging Face Datasets."
216
+ ),
217
+ ] = "main",
208
218
  ):
209
219
  """Export Label Studio annotation, either to Hugging Face Datasets or
210
220
  local files (ultralytics format)."""
@@ -261,6 +271,7 @@ def export(
261
271
  project_id=typing.cast(int, project_id),
262
272
  merge_labels=merge_labels,
263
273
  use_aws_cache=use_aws_cache,
274
+ revision=revision,
264
275
  )
265
276
  elif to == ExportDestination.ultralytics:
266
277
  export_from_ls_to_ultralytics_object_detection(
@@ -286,6 +297,7 @@ def export(
286
297
  download_images=download_images,
287
298
  error_raise=error_raise,
288
299
  use_aws_cache=use_aws_cache,
300
+ revision=revision,
289
301
  )
290
302
  else:
291
303
  raise typer.BadParameter("Unsupported export format")
labelr/apps/train.py CHANGED
@@ -6,6 +6,31 @@ from google.cloud import batch_v1
6
6
  app = typer.Typer()
7
7
 
8
8
 
9
+ AVAILABLE_OBJECT_DETECTION_MODELS = [
10
+ "yolov8n.pt",
11
+ "yolov8s.pt",
12
+ "yolov8m.pt",
13
+ "yolov8l.pt",
14
+ "yolov8x.pt",
15
+ "yolov9t.pt",
16
+ "yolov9s.pt",
17
+ "yolov9m.pt",
18
+ "yolov9c.pt",
19
+ "yolov9e.pt",
20
+ "yolov10n.pt",
21
+ "yolov10s.pt",
22
+ "yolov10m.pt",
23
+ "yolov10b.pt",
24
+ "yolov10l.pt",
25
+ "yolov10x.pt",
26
+ "yolo11n.pt",
27
+ "yolo11s.pt",
28
+ "yolo11m.pt",
29
+ "yolo11l.pt",
30
+ "yolo11x.pt",
31
+ ]
32
+
33
+
9
34
  @app.command()
10
35
  def train_object_detection(
11
36
  wandb_project: str = typer.Option(
@@ -25,9 +50,20 @@ def train_object_detection(
25
50
  ),
26
51
  epochs: int = typer.Option(100, help="Number of training epochs."),
27
52
  imgsz: int = typer.Option(640, help="Size of the image during training."),
28
- batch_size: int = typer.Option(64, help="Batch size for training."),
53
+ batch: int = typer.Option(64, help="Batch size for training."),
54
+ model_name: str = typer.Option(
55
+ "yolov8n.pt",
56
+ help="The YOLO model variant to use for training. "
57
+ "This should be a valid Ultralytics model name.",
58
+ ),
29
59
  ):
30
60
  """Train an object detection model."""
61
+
62
+ if model_name not in AVAILABLE_OBJECT_DETECTION_MODELS:
63
+ raise typer.BadParameter(
64
+ f"Invalid model name '{model_name}'. Available models are: {', '.join(AVAILABLE_OBJECT_DETECTION_MODELS)}"
65
+ )
66
+
31
67
  env_variables = {
32
68
  "HF_REPO_ID": hf_repo_id,
33
69
  "HF_TRAINED_MODEL_REPO_ID": hf_trained_model_repo_id,
@@ -37,8 +73,9 @@ def train_object_detection(
37
73
  "WANDB_API_KEY": wandb_api_key,
38
74
  "EPOCHS": str(epochs),
39
75
  "IMGSZ": str(imgsz),
40
- "BATCH_SIZE": str(batch_size),
76
+ "BATCH_SIZE": str(batch),
41
77
  "USE_AWS_IMAGE_CACHE": "False",
78
+ "YOLO_MODEL_NAME": model_name,
42
79
  }
43
80
  job_name = "train-yolo-job"
44
81
  job_name = job_name + "-" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
@@ -128,7 +165,7 @@ def launch_job(
128
165
  # will run on.
129
166
  policy = batch_v1.AllocationPolicy.InstancePolicy()
130
167
  # See list of machine types here:
131
- # https://docs.cloud.google.com/compute/docs/gpus#l4-gpus
168
+ # https://docs.cloud.google.com/compute/docs/gpus#t4-gpus
132
169
  policy.machine_type = machine_type
133
170
 
134
171
  accelerator = batch_v1.AllocationPolicy.Accelerator()
@@ -0,0 +1,31 @@
1
+ import datasets
2
+ from datasets import Features
3
+ from datasets import Image as HFImage
4
+
5
+ # dataset features for predictions.parquet used in evaluation and visualization
6
+ OBJECT_DETECTION_DS_PREDICTION_FEATURES = Features(
7
+ {
8
+ "image": HFImage(),
9
+ "image_with_prediction": HFImage(),
10
+ "image_id": datasets.Value("string"),
11
+ "detected": {
12
+ "bbox": datasets.Sequence(datasets.Sequence(datasets.Value("float32"))),
13
+ "category_id": datasets.Sequence(datasets.Value("int64")),
14
+ "category_name": datasets.Sequence(datasets.Value("string")),
15
+ "confidence": datasets.Sequence(datasets.Value("float32")),
16
+ },
17
+ "split": datasets.Value("string"),
18
+ "width": datasets.Value("int64"),
19
+ "height": datasets.Value("int64"),
20
+ "meta": {
21
+ "barcode": datasets.Value("string"),
22
+ "off_image_id": datasets.Value("string"),
23
+ "image_url": datasets.Value("string"),
24
+ },
25
+ "objects": {
26
+ "bbox": datasets.Sequence(datasets.Sequence(datasets.Value("float32"))),
27
+ "category_id": datasets.Sequence(datasets.Value("int64")),
28
+ "category_name": datasets.Sequence(datasets.Value("string")),
29
+ },
30
+ }
31
+ )
labelr/export.py CHANGED
@@ -36,11 +36,18 @@ def export_from_ls_to_hf_object_detection(
36
36
  project_id: int,
37
37
  merge_labels: bool = False,
38
38
  use_aws_cache: bool = True,
39
+ revision: str = "main",
39
40
  ):
40
41
  if merge_labels:
41
42
  label_names = ["object"]
42
43
 
43
- logger.info("Project ID: %d, label names: %s", project_id, label_names)
44
+ logger.info(
45
+ "Project ID: %d, label names: %s, repo_id: %s, revision: %s",
46
+ project_id,
47
+ label_names,
48
+ repo_id,
49
+ revision,
50
+ )
44
51
 
45
52
  for split in ["train", "val"]:
46
53
  logger.info("Processing split: %s", split)
@@ -70,7 +77,13 @@ def export_from_ls_to_hf_object_detection(
70
77
  functools.partial(_pickle_sample_generator, tmp_dir),
71
78
  features=HF_DS_OBJECT_DETECTION_FEATURES,
72
79
  )
73
- hf_ds.push_to_hub(repo_id, split=split)
80
+ hf_ds.push_to_hub(
81
+ repo_id,
82
+ split=split,
83
+ revision=revision,
84
+ # Create a PR if not pushing to main branch
85
+ create_pr=revision != "main",
86
+ )
74
87
 
75
88
 
76
89
  def export_from_ls_to_ultralytics_object_detection(
@@ -234,19 +247,28 @@ def export_from_hf_to_ultralytics_object_detection(
234
247
  is True. Defaults to True.
235
248
  revision (str): The dataset revision to load. Defaults to 'main'.
236
249
  """
237
- logger.info("Repo ID: %s", repo_id)
250
+ logger.info("Repo ID: %s, revision: %s", repo_id, revision)
238
251
  ds = datasets.load_dataset(repo_id, revision=revision)
239
252
  data_dir = output_dir / "data"
240
253
  data_dir.mkdir(parents=True, exist_ok=True)
241
254
  category_id_to_name = {}
242
255
 
256
+ split_map = {
257
+ "train": "train",
258
+ "val": "val",
259
+ }
260
+ if "val" not in ds and "test" in ds:
261
+ logger.info("val split not found, using test split instead as val")
262
+ split_map["val"] = "test"
263
+
243
264
  for split in ["train", "val"]:
265
+ split_target = split_map[split]
244
266
  split_labels_dir = data_dir / "labels" / split
245
267
  split_labels_dir.mkdir(parents=True, exist_ok=True)
246
268
  split_images_dir = data_dir / "images" / split
247
269
  split_images_dir.mkdir(parents=True, exist_ok=True)
248
270
 
249
- for sample in tqdm.tqdm(ds[split], desc="samples"):
271
+ for sample in tqdm.tqdm(ds[split_target], desc="samples"):
250
272
  image_id = sample["image_id"]
251
273
 
252
274
  if download_images:
labelr/utils.py ADDED
@@ -0,0 +1,13 @@
1
+ def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
2
+ """Parse the repo_id and the revision from a hf_repo_id in the format:
3
+ `org/repo-name@revision`.
4
+
5
+ Returns a tuple (repo_id, revision), with revision = 'main' if it
6
+ was not provided.
7
+ """
8
+ if "@" in hf_repo_id:
9
+ hf_repo_id, revision = hf_repo_id.split("@", 1)
10
+ else:
11
+ revision = "main"
12
+
13
+ return hf_repo_id, revision
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: labelr
3
- Version: 0.5.0
3
+ Version: 0.7.0
4
4
  Summary: A command-line tool to manage labeling tasks with Label Studio.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -12,8 +12,11 @@ Requires-Dist: more-itertools>=10.5.0
12
12
  Requires-Dist: openfoodfacts>=2.9.0
13
13
  Requires-Dist: typer>=0.15.1
14
14
  Requires-Dist: google-cloud-batch==0.18.0
15
+ Requires-Dist: huggingface-hub
15
16
  Provides-Extra: ultralytics
16
- Requires-Dist: ultralytics>=8.3.49; extra == "ultralytics"
17
+ Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
18
+ Provides-Extra: fiftyone
19
+ Requires-Dist: fiftyone~=1.10.0; extra == "fiftyone"
17
20
  Dynamic: license-file
18
21
 
19
22
  # Labelr
@@ -3,19 +3,21 @@ labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
3
3
  labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
4
4
  labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
5
5
  labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
6
- labelr/export.py,sha256=h4_n-twXHnrd5FCL9NEeEFjWw6Fzo5b9JayXHcDLAF0,17336
6
+ labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
7
+ labelr/export.py,sha256=gjC2_RJ_yX8zVYXyo1RAgI07iXSgkeqckOTEzSscRXc,17940
7
8
  labelr/main.py,sha256=CioMPtaPoGL_5Oxwj8PfalhTyFahMbfp2kd9KdZzm3Y,2258
8
9
  labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
9
10
  labelr/sample.py,sha256=unu9AQ64FhKPgssuL7gb3qyMd1EQJvMOfqvjdefmWOU,7807
10
11
  labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
12
+ labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
11
13
  labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
- labelr/apps/datasets.py,sha256=twwH8wzbUauDWVZFObOvk0gsohAeYm0usHCigng_ucM,11262
14
+ labelr/apps/datasets.py,sha256=4PMfKS5c7Zw3-NNRBkFbZidMQUI2RBMcXFYBvWHLz3o,11688
13
15
  labelr/apps/projects.py,sha256=HpgqIaPrUQzIR7eOLn4EBbEzXRi7hoWStT4jLMQPcBg,15153
14
- labelr/apps/train.py,sha256=SdbCOPWxBkpnQ7P93flhIcnZIgfEX0Na0ce0RM0M91U,6023
16
+ labelr/apps/train.py,sha256=sI0p3h39LPXhynwl_yMuZnIPlaqlcWSO_81zPC3H3yI,6886
15
17
  labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
16
- labelr-0.5.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
17
- labelr-0.5.0.dist-info/METADATA,sha256=kK-wEIUdt7LbkLCZ7jrPj88znftFnw6DO_bAR9l8ZDY,6881
18
- labelr-0.5.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
19
- labelr-0.5.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
20
- labelr-0.5.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
21
- labelr-0.5.0.dist-info/RECORD,,
18
+ labelr-0.7.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
19
+ labelr-0.7.0.dist-info/METADATA,sha256=NghQ_6mNj1Dkets_GlOOOyoAVEQqoPBbbJXhysOKAWI,6991
20
+ labelr-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
+ labelr-0.7.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
22
+ labelr-0.7.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
23
+ labelr-0.7.0.dist-info/RECORD,,
File without changes