labelr 0.3.0__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (25) hide show
  1. {labelr-0.3.0/src/labelr.egg-info → labelr-0.4.1}/PKG-INFO +1 -1
  2. {labelr-0.3.0 → labelr-0.4.1}/pyproject.toml +1 -1
  3. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/apps/datasets.py +87 -10
  4. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/apps/projects.py +3 -3
  5. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/export.py +172 -21
  6. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/sample.py +32 -10
  7. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/types.py +1 -0
  8. {labelr-0.3.0 → labelr-0.4.1/src/labelr.egg-info}/PKG-INFO +1 -1
  9. {labelr-0.3.0 → labelr-0.4.1}/LICENSE +0 -0
  10. {labelr-0.3.0 → labelr-0.4.1}/README.md +0 -0
  11. {labelr-0.3.0 → labelr-0.4.1}/setup.cfg +0 -0
  12. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/__init__.py +0 -0
  13. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/__main__.py +0 -0
  14. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/annotate.py +0 -0
  15. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/apps/__init__.py +0 -0
  16. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/apps/users.py +0 -0
  17. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/check.py +0 -0
  18. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/config.py +0 -0
  19. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/main.py +0 -0
  20. {labelr-0.3.0 → labelr-0.4.1}/src/labelr/project_config.py +0 -0
  21. {labelr-0.3.0 → labelr-0.4.1}/src/labelr.egg-info/SOURCES.txt +0 -0
  22. {labelr-0.3.0 → labelr-0.4.1}/src/labelr.egg-info/dependency_links.txt +0 -0
  23. {labelr-0.3.0 → labelr-0.4.1}/src/labelr.egg-info/entry_points.txt +0 -0
  24. {labelr-0.3.0 → labelr-0.4.1}/src/labelr.egg-info/requires.txt +0 -0
  25. {labelr-0.3.0 → labelr-0.4.1}/src/labelr.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: labelr
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: A command-line tool to manage labeling tasks with Label Studio.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "labelr"
3
- version = "0.3.0"
3
+ version = "0.4.1"
4
4
  description = "A command-line tool to manage labeling tasks with Label Studio."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.10"
@@ -6,8 +6,11 @@ from pathlib import Path
6
6
  from typing import Annotated, Optional
7
7
 
8
8
  import typer
9
+ from openfoodfacts import Flavor
9
10
  from openfoodfacts.utils import get_logger
10
11
 
12
+ from labelr.export import export_from_ultralytics_to_hf
13
+
11
14
  from ..config import LABEL_STUDIO_DEFAULT_URL
12
15
  from ..types import ExportDestination, ExportSource, TaskType
13
16
 
@@ -130,6 +133,9 @@ def export(
130
133
  from_: Annotated[ExportSource, typer.Option("--from", help="Input source to use")],
131
134
  to: Annotated[ExportDestination, typer.Option(help="Where to export the data")],
132
135
  api_key: Annotated[Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")],
136
+ task_type: Annotated[
137
+ TaskType, typer.Option(help="Type of task to export")
138
+ ] = TaskType.object_detection,
133
139
  repo_id: Annotated[
134
140
  Optional[str],
135
141
  typer.Option(
@@ -148,12 +154,33 @@ def export(
148
154
  Optional[Path],
149
155
  typer.Option(help="Path to the output directory", file_okay=False),
150
156
  ] = None,
157
+ dataset_dir: Annotated[
158
+ Optional[Path],
159
+ typer.Option(help="Path to the dataset directory, only for Ultralytics source"),
160
+ ] = None,
151
161
  download_images: Annotated[
152
162
  bool,
153
163
  typer.Option(
154
164
  help="if True, don't use HF images and download images from the server"
155
165
  ),
156
166
  ] = False,
167
+ is_openfoodfacts_dataset: Annotated[
168
+ bool,
169
+ typer.Option(
170
+ help="Whether the Ultralytics dataset is an OpenFoodFacts dataset, only "
171
+ "for Ultralytics source. This is used to generate the correct image URLs "
172
+ "each image name."
173
+ ),
174
+ ] = True,
175
+ openfoodfacts_flavor: Annotated[
176
+ Flavor,
177
+ typer.Option(
178
+ help="Flavor of the Open Food Facts dataset to use for image URLs, only "
179
+ "for Ultralytics source if is_openfoodfacts_dataset is True. This is used to "
180
+ "generate the correct image URLs each image name. This option is ignored if "
181
+ "is_openfoodfacts_dataset is False."
182
+ ),
183
+ ] = Flavor.off,
157
184
  train_ratio: Annotated[
158
185
  float,
159
186
  typer.Option(
@@ -167,20 +194,38 @@ def export(
167
194
  help="Raise an error if an image download fails, only for Ultralytics"
168
195
  ),
169
196
  ] = True,
197
+ use_aws_cache: Annotated[
198
+ bool,
199
+ typer.Option(
200
+ help="Use the AWS S3 cache for image downloads instead of images.openfoodfacts.org, "
201
+ "it is ignored if the export format is not Ultralytics"
202
+ ),
203
+ ] = True,
204
+ merge_labels: Annotated[
205
+ bool,
206
+ typer.Option(help="Merge multiple labels into a single label"),
207
+ ] = False,
170
208
  ):
171
209
  """Export Label Studio annotation, either to Hugging Face Datasets or
172
210
  local files (ultralytics format)."""
173
211
  from label_studio_sdk.client import LabelStudio
174
212
 
175
213
  from labelr.export import (
176
- export_from_hf_to_ultralytics,
177
- export_from_ls_to_hf,
178
- export_from_ls_to_ultralytics,
214
+ export_from_hf_to_ultralytics_object_detection,
215
+ export_from_ls_to_hf_object_detection,
216
+ export_from_ls_to_ultralytics_object_detection,
179
217
  )
180
218
 
181
219
  if (to == ExportDestination.hf or from_ == ExportSource.hf) and repo_id is None:
182
220
  raise typer.BadParameter("Repository ID is required for export/import with HF")
183
221
 
222
+ if from_ == ExportSource.ultralytics and dataset_dir is None:
223
+ raise typer.BadParameter(
224
+ "Dataset directory is required for export from Ultralytics source"
225
+ )
226
+
227
+ label_names_list: list[str] | None = None
228
+
184
229
  if label_names is None:
185
230
  if to == ExportDestination.hf:
186
231
  raise typer.BadParameter("Label names are required for HF export")
@@ -188,6 +233,9 @@ def export(
188
233
  raise typer.BadParameter(
189
234
  "Label names are required for export from LS source"
190
235
  )
236
+ else:
237
+ label_names = typing.cast(str, label_names)
238
+ label_names_list = label_names.split(",")
191
239
 
192
240
  if from_ == ExportSource.ls:
193
241
  if project_id is None:
@@ -199,31 +247,60 @@ def export(
199
247
  raise typer.BadParameter("Output directory is required for Ultralytics export")
200
248
 
201
249
  if from_ == ExportSource.ls:
250
+ if task_type != TaskType.object_detection:
251
+ raise typer.BadParameter(
252
+ "Only object detection task is currently supported with LS source"
253
+ )
202
254
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
203
- label_names = typing.cast(str, label_names)
204
- label_names_list = label_names.split(",")
205
255
  if to == ExportDestination.hf:
206
256
  repo_id = typing.cast(str, repo_id)
207
- export_from_ls_to_hf(
208
- ls, repo_id, label_names_list, typing.cast(int, project_id)
257
+ export_from_ls_to_hf_object_detection(
258
+ ls,
259
+ repo_id=repo_id,
260
+ label_names=typing.cast(list[str], label_names_list),
261
+ project_id=typing.cast(int, project_id),
262
+ merge_labels=merge_labels,
263
+ use_aws_cache=use_aws_cache,
209
264
  )
210
265
  elif to == ExportDestination.ultralytics:
211
- export_from_ls_to_ultralytics(
266
+ export_from_ls_to_ultralytics_object_detection(
212
267
  ls,
213
268
  typing.cast(Path, output_dir),
214
- label_names_list,
269
+ typing.cast(list[str], label_names_list),
215
270
  typing.cast(int, project_id),
216
271
  train_ratio=train_ratio,
217
272
  error_raise=error_raise,
273
+ merge_labels=merge_labels,
274
+ use_aws_cache=use_aws_cache,
218
275
  )
219
276
 
220
277
  elif from_ == ExportSource.hf:
278
+ if task_type != TaskType.object_detection:
279
+ raise typer.BadParameter(
280
+ "Only object detection task is currently supported with HF source"
281
+ )
221
282
  if to == ExportDestination.ultralytics:
222
- export_from_hf_to_ultralytics(
283
+ export_from_hf_to_ultralytics_object_detection(
223
284
  typing.cast(str, repo_id),
224
285
  typing.cast(Path, output_dir),
225
286
  download_images=download_images,
226
287
  error_raise=error_raise,
288
+ use_aws_cache=use_aws_cache,
227
289
  )
228
290
  else:
229
291
  raise typer.BadParameter("Unsupported export format")
292
+ elif from_ == ExportSource.ultralytics:
293
+ if task_type != TaskType.classification:
294
+ raise typer.BadParameter(
295
+ "Only classification task is currently supported with Ultralytics source"
296
+ )
297
+ if to == ExportDestination.hf:
298
+ export_from_ultralytics_to_hf(
299
+ task_type=task_type,
300
+ dataset_dir=typing.cast(Path, dataset_dir),
301
+ repo_id=typing.cast(str, repo_id),
302
+ merge_labels=merge_labels,
303
+ label_names=typing.cast(list[str], label_names_list),
304
+ is_openfoodfacts_dataset=is_openfoodfacts_dataset,
305
+ openfoodfacts_flavor=openfoodfacts_flavor,
306
+ )
@@ -90,6 +90,8 @@ def add_split(
90
90
  train_split: Annotated[
91
91
  float, typer.Option(help="fraction of samples to add in train split")
92
92
  ],
93
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
94
+ project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
93
95
  split_name: Annotated[
94
96
  Optional[str],
95
97
  typer.Option(
@@ -97,9 +99,7 @@ def add_split(
97
99
  "with the task ID file. If --task-id-file is not provided, "
98
100
  "this field is ignored."
99
101
  ),
100
- ],
101
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
102
- project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
102
+ ] = None,
103
103
  train_split_name: Annotated[
104
104
  str,
105
105
  typer.Option(help="name of the train split"),
@@ -3,16 +3,21 @@ import logging
3
3
  import pickle
4
4
  import random
5
5
  import tempfile
6
- import typing
7
6
  from pathlib import Path
8
7
 
9
8
  import datasets
10
9
  import tqdm
11
10
  from label_studio_sdk.client import LabelStudio
12
- from openfoodfacts.images import download_image
13
- from PIL import Image
11
+ from openfoodfacts.images import download_image, generate_image_url
12
+ from openfoodfacts.types import Flavor
13
+ from PIL import Image, ImageOps
14
14
 
15
- from labelr.sample import HF_DS_FEATURES, format_object_detection_sample_to_hf
15
+ from labelr.sample import (
16
+ HF_DS_CLASSIFICATION_FEATURES,
17
+ HF_DS_OBJECT_DETECTION_FEATURES,
18
+ format_object_detection_sample_to_hf,
19
+ )
20
+ from labelr.types import TaskType
16
21
 
17
22
  logger = logging.getLogger(__name__)
18
23
 
@@ -24,13 +29,18 @@ def _pickle_sample_generator(dir: Path):
24
29
  yield pickle.load(f)
25
30
 
26
31
 
27
- def export_from_ls_to_hf(
32
+ def export_from_ls_to_hf_object_detection(
28
33
  ls: LabelStudio,
29
34
  repo_id: str,
30
- category_names: list[str],
35
+ label_names: list[str],
31
36
  project_id: int,
37
+ merge_labels: bool = False,
38
+ use_aws_cache: bool = True,
32
39
  ):
33
- logger.info("Project ID: %d, category names: %s", project_id, category_names)
40
+ if merge_labels:
41
+ label_names = ["object"]
42
+
43
+ logger.info("Project ID: %d, label names: %s", project_id, label_names)
34
44
 
35
45
  for split in ["train", "val"]:
36
46
  logger.info("Processing split: %s", split)
@@ -45,7 +55,11 @@ def export_from_ls_to_hf(
45
55
  if task.data["split"] != split:
46
56
  continue
47
57
  sample = format_object_detection_sample_to_hf(
48
- task.data, task.annotations, category_names
58
+ task_data=task.data,
59
+ annotations=task.annotations,
60
+ label_names=label_names,
61
+ merge_labels=merge_labels,
62
+ use_aws_cache=use_aws_cache,
49
63
  )
50
64
  if sample is not None:
51
65
  # Save output as pickle
@@ -54,18 +68,20 @@ def export_from_ls_to_hf(
54
68
 
55
69
  hf_ds = datasets.Dataset.from_generator(
56
70
  functools.partial(_pickle_sample_generator, tmp_dir),
57
- features=HF_DS_FEATURES,
71
+ features=HF_DS_OBJECT_DETECTION_FEATURES,
58
72
  )
59
73
  hf_ds.push_to_hub(repo_id, split=split)
60
74
 
61
75
 
62
- def export_from_ls_to_ultralytics(
76
+ def export_from_ls_to_ultralytics_object_detection(
63
77
  ls: LabelStudio,
64
78
  output_dir: Path,
65
- category_names: list[str],
79
+ label_names: list[str],
66
80
  project_id: int,
67
81
  train_ratio: float = 0.8,
68
82
  error_raise: bool = True,
83
+ merge_labels: bool = False,
84
+ use_aws_cache: bool = True,
69
85
  ):
70
86
  """Export annotations from a Label Studio project to the Ultralytics
71
87
  format.
@@ -73,7 +89,9 @@ def export_from_ls_to_ultralytics(
73
89
  The Label Studio project should be an object detection project with a
74
90
  single rectanglelabels annotation result per task.
75
91
  """
76
- logger.info("Project ID: %d, category names: %s", project_id, category_names)
92
+ if merge_labels:
93
+ label_names = ["object"]
94
+ logger.info("Project ID: %d, label names: %s", project_id, label_names)
77
95
 
78
96
  data_dir = output_dir / "data"
79
97
  data_dir.mkdir(parents=True, exist_ok=True)
@@ -146,25 +164,30 @@ def export_from_ls_to_ultralytics(
146
164
  y_min = value["y"] / 100
147
165
  width = value["width"] / 100
148
166
  height = value["height"] / 100
149
- category_name = value["rectanglelabels"][0]
150
- category_id = category_names.index(category_name)
167
+ label_name = (
168
+ label_names[0] if merge_labels else value["rectanglelabels"][0]
169
+ )
170
+ label_id = label_names.index(label_name)
151
171
 
152
172
  # Save the labels in the Ultralytics format:
153
173
  # - one label per line
154
174
  # - each line is a list of 5 elements:
155
- # - category_id
175
+ # - label_id
156
176
  # - x_center
157
177
  # - y_center
158
178
  # - width
159
179
  # - height
160
180
  x_center = x_min + width / 2
161
181
  y_center = y_min + height / 2
162
- f.write(f"{category_id} {x_center} {y_center} {width} {height}\n")
182
+ f.write(f"{label_id} {x_center} {y_center} {width} {height}\n")
163
183
  has_valid_annotation = True
164
184
 
165
185
  if has_valid_annotation:
166
186
  download_output = download_image(
167
- image_url, return_struct=True, error_raise=error_raise
187
+ image_url,
188
+ return_struct=True,
189
+ error_raise=error_raise,
190
+ use_cache=use_aws_cache,
168
191
  )
169
192
  if download_output is None:
170
193
  logger.error("Failed to download image: %s", image_url)
@@ -179,15 +202,16 @@ def export_from_ls_to_ultralytics(
179
202
  f.write("val: images/val\n")
180
203
  f.write("test:\n")
181
204
  f.write("names:\n")
182
- for i, category_name in enumerate(category_names):
183
- f.write(f" {i}: {category_name}\n")
205
+ for i, label_name in enumerate(label_names):
206
+ f.write(f" {i}: {label_name}\n")
184
207
 
185
208
 
186
- def export_from_hf_to_ultralytics(
209
+ def export_from_hf_to_ultralytics_object_detection(
187
210
  repo_id: str,
188
211
  output_dir: Path,
189
212
  download_images: bool = True,
190
213
  error_raise: bool = True,
214
+ use_aws_cache: bool = True,
191
215
  ):
192
216
  """Export annotations from a Hugging Face dataset project to the
193
217
  Ultralytics format.
@@ -213,7 +237,10 @@ def export_from_hf_to_ultralytics(
213
237
 
214
238
  if download_images:
215
239
  download_output = download_image(
216
- image_url, return_struct=True, error_raise=error_raise
240
+ image_url,
241
+ return_struct=True,
242
+ error_raise=error_raise,
243
+ use_cache=use_aws_cache,
217
244
  )
218
245
  if download_output is None:
219
246
  logger.error("Failed to download image: %s", image_url)
@@ -266,3 +293,127 @@ def export_from_hf_to_ultralytics(
266
293
  f.write("names:\n")
267
294
  for i, category_name in enumerate(category_names):
268
295
  f.write(f" {i}: {category_name}\n")
296
+
297
+
298
+ def export_from_ultralytics_to_hf(
299
+ task_type: TaskType,
300
+ dataset_dir: Path,
301
+ repo_id: str,
302
+ label_names: list[str],
303
+ merge_labels: bool = False,
304
+ is_openfoodfacts_dataset: bool = False,
305
+ openfoodfacts_flavor: Flavor = Flavor.off,
306
+ ) -> None:
307
+ if task_type != TaskType.classification:
308
+ raise NotImplementedError(
309
+ "Only classification task is currently supported for Ultralytics to HF export"
310
+ )
311
+
312
+ if task_type == TaskType.classification:
313
+ export_from_ultralytics_to_hf_classification(
314
+ dataset_dir=dataset_dir,
315
+ repo_id=repo_id,
316
+ label_names=label_names,
317
+ merge_labels=merge_labels,
318
+ is_openfoodfacts_dataset=is_openfoodfacts_dataset,
319
+ openfoodfacts_flavor=openfoodfacts_flavor,
320
+ )
321
+
322
+
323
+ def export_from_ultralytics_to_hf_classification(
324
+ dataset_dir: Path,
325
+ repo_id: str,
326
+ label_names: list[str],
327
+ merge_labels: bool = False,
328
+ is_openfoodfacts_dataset: bool = False,
329
+ openfoodfacts_flavor: Flavor = Flavor.off,
330
+ ) -> None:
331
+ """Export an Ultralytics classification dataset to a Hugging Face dataset.
332
+
333
+ The Ultralytics dataset directory should contain 'train', 'val' and/or
334
+ 'test' subdirectories, each containing subdirectories for each label.
335
+
336
+ Args:
337
+ dataset_dir (Path): Path to the Ultralytics dataset directory.
338
+ repo_id (str): Hugging Face repository ID to push the dataset to.
339
+ label_names (list[str]): List of label names.
340
+ merge_labels (bool): Whether to merge all labels into a single label
341
+ named 'object'.
342
+ is_openfoodfacts_dataset (bool): Whether the dataset is from
343
+ Open Food Facts. If True, the `off_image_id` and `image_url` will
344
+ be generated automatically. `off_image_id` is extracted from the
345
+ image filename.
346
+ openfoodfacts_flavor (Flavor): Flavor of Open Food Facts dataset. This
347
+ is ignored if `is_openfoodfacts_dataset` is False.
348
+ """
349
+ logger.info("Repo ID: %s, dataset_dir: %s", repo_id, dataset_dir)
350
+
351
+ if not any((dataset_dir / split).is_dir() for split in ["train", "val", "test"]):
352
+ raise ValueError(
353
+ f"Dataset directory {dataset_dir} does not contain 'train', 'val' or 'test' subdirectories"
354
+ )
355
+
356
+ # Save output as pickle
357
+ for split in ["train", "val", "test"]:
358
+ split_dir = dataset_dir / split
359
+
360
+ if not split_dir.is_dir():
361
+ logger.info("Skipping missing split directory: %s", split_dir)
362
+ continue
363
+
364
+ with tempfile.TemporaryDirectory() as tmp_dir_str:
365
+ tmp_dir = Path(tmp_dir_str)
366
+ for label_dir in (d for d in split_dir.iterdir() if d.is_dir()):
367
+ label_name = label_dir.name
368
+ if merge_labels:
369
+ label_name = "object"
370
+ if label_name not in label_names:
371
+ raise ValueError(
372
+ "Label name %s not in provided label names (label names: %s)"
373
+ % (label_name, label_names),
374
+ )
375
+ label_id = label_names.index(label_name)
376
+
377
+ for image_path in label_dir.glob("*"):
378
+ if is_openfoodfacts_dataset:
379
+ image_stem_parts = image_path.stem.split("_")
380
+ barcode = image_stem_parts[0]
381
+ off_image_id = image_stem_parts[1]
382
+ image_id = f"{barcode}_{off_image_id}"
383
+ image_url = generate_image_url(
384
+ barcode, off_image_id, flavor=openfoodfacts_flavor
385
+ )
386
+ else:
387
+ image_id = image_path.stem
388
+ barcode = ""
389
+ off_image_id = ""
390
+ image_url = ""
391
+ image = Image.open(image_path)
392
+ image.load()
393
+
394
+ if image.mode != "RGB":
395
+ image = image.convert("RGB")
396
+
397
+ # Rotate image according to exif orientation using Pillow
398
+ ImageOps.exif_transpose(image, in_place=True)
399
+ sample = {
400
+ "image_id": image_id,
401
+ "image": image,
402
+ "width": image.width,
403
+ "height": image.height,
404
+ "meta": {
405
+ "barcode": barcode,
406
+ "off_image_id": off_image_id,
407
+ "image_url": image_url,
408
+ },
409
+ "category_id": label_id,
410
+ "category_name": label_name,
411
+ }
412
+ with open(tmp_dir / f"{split}_{image_id}.pkl", "wb") as f:
413
+ pickle.dump(sample, f)
414
+
415
+ hf_ds = datasets.Dataset.from_generator(
416
+ functools.partial(_pickle_sample_generator, tmp_dir),
417
+ features=HF_DS_CLASSIFICATION_FEATURES,
418
+ )
419
+ hf_ds.push_to_hub(repo_id, split=split)
@@ -145,7 +145,11 @@ def format_object_detection_sample_to_ls(
145
145
 
146
146
 
147
147
  def format_object_detection_sample_to_hf(
148
- task_data: dict, annotations: list[dict], category_names: list[str]
148
+ task_data: dict,
149
+ annotations: list[dict],
150
+ label_names: list[str],
151
+ merge_labels: bool = False,
152
+ use_aws_cache: bool = True,
149
153
  ) -> dict | None:
150
154
  if len(annotations) > 1:
151
155
  logger.info("More than one annotation found, skipping")
@@ -156,8 +160,8 @@ def format_object_detection_sample_to_hf(
156
160
 
157
161
  annotation = annotations[0]
158
162
  bboxes = []
159
- bbox_category_ids = []
160
- bbox_category_names = []
163
+ bbox_label_ids = []
164
+ bbox_label_names = []
161
165
 
162
166
  for annotation_result in annotation["result"]:
163
167
  if annotation_result["type"] != "rectanglelabels":
@@ -171,12 +175,13 @@ def format_object_detection_sample_to_hf(
171
175
  x_max = x_min + width
172
176
  y_max = y_min + height
173
177
  bboxes.append([y_min, x_min, y_max, x_max])
174
- category_name = value["rectanglelabels"][0]
175
- bbox_category_names.append(category_name)
176
- bbox_category_ids.append(category_names.index(category_name))
178
+
179
+ label_name = label_names[0] if merge_labels else value["rectanglelabels"][0]
180
+ bbox_label_names.append(label_name)
181
+ bbox_label_ids.append(label_names.index(label_name))
177
182
 
178
183
  image_url = task_data["image_url"]
179
- image = download_image(image_url, error_raise=False)
184
+ image = download_image(image_url, error_raise=False, use_cache=use_aws_cache)
180
185
  if image is None:
181
186
  logger.error("Failed to download image: %s", image_url)
182
187
  return None
@@ -193,14 +198,14 @@ def format_object_detection_sample_to_hf(
193
198
  },
194
199
  "objects": {
195
200
  "bbox": bboxes,
196
- "category_id": bbox_category_ids,
197
- "category_name": bbox_category_names,
201
+ "category_id": bbox_label_ids,
202
+ "category_name": bbox_label_names,
198
203
  },
199
204
  }
200
205
 
201
206
 
202
207
  # The HuggingFace Dataset features
203
- HF_DS_FEATURES = datasets.Features(
208
+ HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
204
209
  {
205
210
  "image_id": datasets.Value("string"),
206
211
  "image": datasets.features.Image(),
@@ -218,3 +223,20 @@ HF_DS_FEATURES = datasets.Features(
218
223
  },
219
224
  }
220
225
  )
226
+
227
+
228
+ HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
229
+ {
230
+ "image_id": datasets.Value("string"),
231
+ "image": datasets.features.Image(),
232
+ "width": datasets.Value("int64"),
233
+ "height": datasets.Value("int64"),
234
+ "meta": {
235
+ "barcode": datasets.Value("string"),
236
+ "off_image_id": datasets.Value("string"),
237
+ "image_url": datasets.Value("string"),
238
+ },
239
+ "category_id": datasets.Value("int64"),
240
+ "category_name": datasets.Value("string"),
241
+ }
242
+ )
@@ -4,6 +4,7 @@ import enum
4
4
  class ExportSource(str, enum.Enum):
5
5
  hf = "hf"
6
6
  ls = "ls"
7
+ ultralytics = "ultralytics"
7
8
 
8
9
 
9
10
  class ExportDestination(str, enum.Enum):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: labelr
3
- Version: 0.3.0
3
+ Version: 0.4.1
4
4
  Summary: A command-line tool to manage labeling tasks with Label Studio.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes