labelr 0.10.0__py3-none-any.whl → 0.11.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labelr/annotate.py +3 -54
- labelr/apps/datasets.py +140 -9
- labelr/apps/directus.py +212 -0
- labelr/apps/google_batch.py +38 -0
- labelr/apps/label_studio.py +295 -104
- labelr/apps/typer_description.py +2 -0
- labelr/check.py +68 -7
- labelr/config.py +57 -1
- labelr/export/object_detection.py +96 -18
- labelr/main.py +16 -0
- labelr/sample/object_detection.py +42 -13
- labelr-0.11.1.dist-info/METADATA +230 -0
- {labelr-0.10.0.dist-info → labelr-0.11.1.dist-info}/RECORD +17 -15
- {labelr-0.10.0.dist-info → labelr-0.11.1.dist-info}/WHEEL +1 -1
- labelr-0.10.0.dist-info/METADATA +0 -158
- {labelr-0.10.0.dist-info → labelr-0.11.1.dist-info}/entry_points.txt +0 -0
- {labelr-0.10.0.dist-info → labelr-0.11.1.dist-info}/licenses/LICENSE +0 -0
- {labelr-0.10.0.dist-info → labelr-0.11.1.dist-info}/top_level.txt +0 -0
labelr/check.py
CHANGED
|
@@ -1,30 +1,64 @@
|
|
|
1
|
+
import typing
|
|
1
2
|
from collections import defaultdict
|
|
2
3
|
from pathlib import Path
|
|
3
4
|
|
|
4
5
|
import imagehash
|
|
5
6
|
import tqdm
|
|
6
7
|
from label_studio_sdk.client import LabelStudio
|
|
7
|
-
from openfoodfacts.
|
|
8
|
+
from openfoodfacts.types import JSONType
|
|
9
|
+
from openfoodfacts.utils import ImageDownloadItem, get_image_from_url, get_logger
|
|
8
10
|
from PIL import Image
|
|
9
11
|
|
|
10
12
|
logger = get_logger(__name__)
|
|
11
13
|
|
|
12
14
|
|
|
13
|
-
def check_ls_dataset(
|
|
15
|
+
def check_ls_dataset(
|
|
16
|
+
ls: LabelStudio,
|
|
17
|
+
project_id: int,
|
|
18
|
+
view_id: int | None = None,
|
|
19
|
+
delete_missing_images: bool = False,
|
|
20
|
+
delete_duplicate_images: bool = False,
|
|
21
|
+
):
|
|
22
|
+
"""Perform sanity checks of a Label Studio dataset.
|
|
23
|
+
|
|
24
|
+
This function checks for:
|
|
25
|
+
- Tasks with missing images (404)
|
|
26
|
+
- Duplicate images based on perceptual hash (pHash)
|
|
27
|
+
- Tasks with multiple annotations
|
|
28
|
+
|
|
29
|
+
This function doesn't perform any modifications to the dataset, except
|
|
30
|
+
optionally deleting tasks with missing images if `delete_missing_images`
|
|
31
|
+
is set to True.
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
ls (LabelStudio): Label Studio client instance.
|
|
35
|
+
project_id (int): ID of the Label Studio project to check.
|
|
36
|
+
view_id (int): ID of the Label Studio view to check. If None, no
|
|
37
|
+
filtering is done.
|
|
38
|
+
delete_missing_images (bool): Whether to delete tasks with missing
|
|
39
|
+
images.
|
|
40
|
+
delete_duplicate_images (bool): Whether to delete tasks with duplicate
|
|
41
|
+
images. If one task has annotations and the other doesn't, the task
|
|
42
|
+
with annotations will be kept. Otherwise, the most recent task will
|
|
43
|
+
be kept.
|
|
44
|
+
"""
|
|
14
45
|
skipped = 0
|
|
15
46
|
not_annotated = 0
|
|
16
47
|
annotated = 0
|
|
48
|
+
deleted = 0
|
|
49
|
+
multiple_annotations = 0
|
|
17
50
|
hash_map = defaultdict(list)
|
|
18
51
|
for task in tqdm.tqdm(
|
|
19
|
-
ls.tasks.list(project=project_id, fields="all"), desc="tasks"
|
|
52
|
+
ls.tasks.list(project=project_id, fields="all", view=view_id), desc="tasks"
|
|
20
53
|
):
|
|
21
|
-
annotations = task.annotations
|
|
54
|
+
annotations = typing.cast(list[JSONType], task.annotations)
|
|
22
55
|
|
|
23
56
|
if len(annotations) == 0:
|
|
24
57
|
not_annotated += 1
|
|
25
58
|
continue
|
|
26
59
|
elif len(annotations) > 1:
|
|
27
60
|
logger.warning("Task has multiple annotations: %s", task.id)
|
|
61
|
+
multiple_annotations += 1
|
|
28
62
|
continue
|
|
29
63
|
|
|
30
64
|
annotation = annotations[0]
|
|
@@ -34,20 +68,47 @@ def check_ls_dataset(ls: LabelStudio, project_id: int):
|
|
|
34
68
|
|
|
35
69
|
annotated += 1
|
|
36
70
|
image_url = task.data["image_url"]
|
|
37
|
-
|
|
38
|
-
|
|
71
|
+
image_struct = typing.cast(
|
|
72
|
+
ImageDownloadItem,
|
|
73
|
+
get_image_from_url(image_url, return_struct=True, error_raise=False),
|
|
74
|
+
)
|
|
75
|
+
|
|
76
|
+
if image_struct.response.status_code == 404:
|
|
77
|
+
logger.warning("Image not found (404): %s", image_url)
|
|
78
|
+
|
|
79
|
+
if delete_missing_images:
|
|
80
|
+
ls.tasks.delete(task.id)
|
|
81
|
+
deleted += 1
|
|
82
|
+
logger.info("Deleted task with missing image: %s", task.id)
|
|
83
|
+
continue
|
|
84
|
+
|
|
85
|
+
if image_struct.image is None:
|
|
86
|
+
logger.warning("Could not open image: %s", image_url)
|
|
87
|
+
continue
|
|
88
|
+
|
|
89
|
+
image_hash = str(imagehash.phash(image_struct.image))
|
|
39
90
|
hash_map[image_hash].append(task.id)
|
|
40
91
|
|
|
41
92
|
for image_hash, task_ids in hash_map.items():
|
|
42
93
|
if len(task_ids) > 1:
|
|
43
94
|
logger.warning("Duplicate images: %s", task_ids)
|
|
95
|
+
if delete_duplicate_images:
|
|
96
|
+
tasks = [ls.tasks.get(id=task_id) for task_id in task_ids]
|
|
97
|
+
# We sort the tasks by the number of annotations, so that we keep the
|
|
98
|
+
# one with at least one annotation.
|
|
99
|
+
for task in sorted(tasks, key=lambda x: len(x.annotations) > 0)[:-1]:
|
|
100
|
+
logger.info("Deleting duplicate task: %s", task.id)
|
|
101
|
+
ls.tasks.delete(task.id)
|
|
102
|
+
deleted += 1
|
|
44
103
|
|
|
45
104
|
logger.info(
|
|
46
|
-
"Tasks - annotated: %d, skipped: %d, not annotated: %d",
|
|
105
|
+
"Tasks - annotated: %d, skipped: %d, not annotated: %d, multiple annotations: %d",
|
|
47
106
|
annotated,
|
|
48
107
|
skipped,
|
|
49
108
|
not_annotated,
|
|
109
|
+
multiple_annotations,
|
|
50
110
|
)
|
|
111
|
+
logger.info("Deleted tasks with missing images: %d", deleted)
|
|
51
112
|
|
|
52
113
|
|
|
53
114
|
def check_local_dataset(dataset_dir: Path, remove: bool = False):
|
labelr/config.py
CHANGED
|
@@ -1 +1,57 @@
|
|
|
1
|
-
|
|
1
|
+
from pathlib import Path
|
|
2
|
+
|
|
3
|
+
from pydantic import BaseModel, Field
|
|
4
|
+
import os
|
|
5
|
+
|
|
6
|
+
CONFIG_PATH = Path("~").expanduser() / ".config/labelr/config.json"
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
# validate_assignment allows to validate the model everytime it is updated
|
|
10
|
+
class LabelrConfig(BaseModel, validate_assignment=True):
|
|
11
|
+
label_studio_url: str = Field(
|
|
12
|
+
default="http://127.0.0.1:8080",
|
|
13
|
+
description="URL of the Label Studio instance to use. Defaults to http://127.0.0.1:8080.",
|
|
14
|
+
)
|
|
15
|
+
label_studio_api_key: str | None = Field(
|
|
16
|
+
default=None,
|
|
17
|
+
description="API key for Label Studio.",
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def get_config() -> LabelrConfig:
|
|
22
|
+
"""Get labelr configuration.
|
|
23
|
+
|
|
24
|
+
The configuration can come from (by order of precedence):
|
|
25
|
+
- Environment variables
|
|
26
|
+
- JSON file (see below)
|
|
27
|
+
|
|
28
|
+
The configuration is stored in a JSON file at ~/.config/.labelr/config.json.
|
|
29
|
+
|
|
30
|
+
The following environment variables are supported:
|
|
31
|
+
- LABELR_LABEL_STUDIO_URL
|
|
32
|
+
- LABELR_LABEL_STUDIO_API_KEY
|
|
33
|
+
"""
|
|
34
|
+
if CONFIG_PATH.exists():
|
|
35
|
+
config = LabelrConfig.model_validate_json(CONFIG_PATH.read_bytes())
|
|
36
|
+
|
|
37
|
+
if "LABELR_LABEL_STUDIO_URL" in os.environ:
|
|
38
|
+
config.label_studio_url = os.environ["LABELR_LABEL_STUDIO_URL"]
|
|
39
|
+
if "LABELR_LABEL_STUDIO_API_KEY" in os.environ:
|
|
40
|
+
config.label_studio_api_key = os.environ["LABELR_LABEL_STUDIO_API_KEY"]
|
|
41
|
+
return config
|
|
42
|
+
else:
|
|
43
|
+
return LabelrConfig()
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def set_file_config(key: str, value: str):
|
|
47
|
+
"""Update the labelr configuration.
|
|
48
|
+
|
|
49
|
+
The configuration is stored in a JSON file at ~/.config/.labelr/config.json.
|
|
50
|
+
"""
|
|
51
|
+
config = get_config()
|
|
52
|
+
setattr(config, key, value)
|
|
53
|
+
CONFIG_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
54
|
+
CONFIG_PATH.write_text(config.model_dump_json(indent=2))
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
config = get_config()
|
|
@@ -1,18 +1,21 @@
|
|
|
1
1
|
import functools
|
|
2
2
|
import logging
|
|
3
3
|
import pickle
|
|
4
|
+
import random
|
|
4
5
|
import tempfile
|
|
6
|
+
import typing
|
|
5
7
|
from pathlib import Path
|
|
6
8
|
|
|
7
9
|
import datasets
|
|
8
10
|
import tqdm
|
|
9
11
|
from label_studio_sdk.client import LabelStudio
|
|
10
12
|
from openfoodfacts.images import download_image
|
|
13
|
+
from PIL import Image, ImageOps
|
|
11
14
|
|
|
12
15
|
from labelr.export.common import _pickle_sample_generator
|
|
13
16
|
from labelr.sample.object_detection import (
|
|
14
|
-
HF_DS_OBJECT_DETECTION_FEATURES,
|
|
15
17
|
format_object_detection_sample_to_hf,
|
|
18
|
+
get_hf_object_detection_features,
|
|
16
19
|
)
|
|
17
20
|
|
|
18
21
|
logger = logging.getLogger(__name__)
|
|
@@ -23,19 +26,47 @@ def export_from_ls_to_hf_object_detection(
|
|
|
23
26
|
repo_id: str,
|
|
24
27
|
label_names: list[str],
|
|
25
28
|
project_id: int,
|
|
29
|
+
is_openfoodfacts_dataset: bool,
|
|
30
|
+
image_max_size: int | None = None,
|
|
31
|
+
view_id: int | None = None,
|
|
26
32
|
merge_labels: bool = False,
|
|
27
33
|
use_aws_cache: bool = True,
|
|
28
34
|
revision: str = "main",
|
|
29
|
-
):
|
|
35
|
+
) -> None:
|
|
36
|
+
"""Export annotations from a Label Studio project to a Hugging Face
|
|
37
|
+
dataset.
|
|
38
|
+
|
|
39
|
+
The Label Studio project should be an object detection project.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
ls (LabelStudio): Label Studio client instance.
|
|
43
|
+
repo_id (str): Hugging Face repository ID to push the dataset to.
|
|
44
|
+
label_names (list[str]): List of label names in the project.
|
|
45
|
+
project_id (int): Label Studio project ID to export from.
|
|
46
|
+
is_openfoodfacts_dataset (bool): Whether the dataset is an Open Food
|
|
47
|
+
Facts dataset. If True, the dataset will include additional
|
|
48
|
+
metadata fields specific to Open Food Facts (`barcode` and
|
|
49
|
+
`off_image_id`).
|
|
50
|
+
image_max_size (int | None): Maximum size (in pixels) for the images.
|
|
51
|
+
If None, no resizing is performed. Defaults to None.
|
|
52
|
+
view_id (int | None): Label Studio view ID to export from. If None,
|
|
53
|
+
all tasks are exported. Defaults to None.
|
|
54
|
+
merge_labels (bool): Whether to merge all labels into a single label
|
|
55
|
+
named "object". Defaults to False.
|
|
56
|
+
use_aws_cache (bool): Whether to use the AWS image cache when
|
|
57
|
+
downloading images. Defaults to True.
|
|
58
|
+
revision (str): The dataset revision to push to. Defaults to 'main'.
|
|
59
|
+
"""
|
|
30
60
|
if merge_labels:
|
|
31
61
|
label_names = ["object"]
|
|
32
62
|
|
|
33
63
|
logger.info(
|
|
34
|
-
"Project ID: %d, label names: %s, repo_id: %s, revision: %s",
|
|
64
|
+
"Project ID: %d, label names: %s, repo_id: %s, revision: %s, view ID: %s",
|
|
35
65
|
project_id,
|
|
36
66
|
label_names,
|
|
37
67
|
repo_id,
|
|
38
68
|
revision,
|
|
69
|
+
view_id,
|
|
39
70
|
)
|
|
40
71
|
|
|
41
72
|
for split in ["train", "val"]:
|
|
@@ -45,7 +76,9 @@ def export_from_ls_to_hf_object_detection(
|
|
|
45
76
|
tmp_dir = Path(tmp_dir_str)
|
|
46
77
|
logger.info("Saving samples to temporary directory: %s", tmp_dir)
|
|
47
78
|
for i, task in tqdm.tqdm(
|
|
48
|
-
enumerate(
|
|
79
|
+
enumerate(
|
|
80
|
+
ls.tasks.list(project=project_id, fields="all", view=view_id)
|
|
81
|
+
),
|
|
49
82
|
desc="tasks",
|
|
50
83
|
):
|
|
51
84
|
if task.data["split"] != split:
|
|
@@ -56,15 +89,17 @@ def export_from_ls_to_hf_object_detection(
|
|
|
56
89
|
label_names=label_names,
|
|
57
90
|
merge_labels=merge_labels,
|
|
58
91
|
use_aws_cache=use_aws_cache,
|
|
92
|
+
image_max_size=image_max_size,
|
|
59
93
|
)
|
|
60
94
|
if sample is not None:
|
|
61
95
|
# Save output as pickle
|
|
62
96
|
with open(tmp_dir / f"{split}_{i:05}.pkl", "wb") as f:
|
|
63
97
|
pickle.dump(sample, f)
|
|
64
98
|
|
|
99
|
+
features = get_hf_object_detection_features(is_openfoodfacts_dataset)
|
|
65
100
|
hf_ds = datasets.Dataset.from_generator(
|
|
66
101
|
functools.partial(_pickle_sample_generator, tmp_dir),
|
|
67
|
-
features=
|
|
102
|
+
features=features,
|
|
68
103
|
)
|
|
69
104
|
hf_ds.push_to_hub(repo_id, split=split, revision=revision)
|
|
70
105
|
|
|
@@ -78,12 +113,32 @@ def export_from_ls_to_ultralytics_object_detection(
|
|
|
78
113
|
error_raise: bool = True,
|
|
79
114
|
merge_labels: bool = False,
|
|
80
115
|
use_aws_cache: bool = True,
|
|
116
|
+
view_id: int | None = None,
|
|
117
|
+
image_max_size: int | None = None,
|
|
81
118
|
):
|
|
82
119
|
"""Export annotations from a Label Studio project to the Ultralytics
|
|
83
120
|
format.
|
|
84
121
|
|
|
85
122
|
The Label Studio project should be an object detection project with a
|
|
86
123
|
single rectanglelabels annotation result per task.
|
|
124
|
+
|
|
125
|
+
Args:
|
|
126
|
+
ls (LabelStudio): Label Studio client instance.
|
|
127
|
+
output_dir (Path): Path to the output directory.
|
|
128
|
+
label_names (list[str]): List of label names in the project.
|
|
129
|
+
project_id (int): Label Studio project ID to export from.
|
|
130
|
+
train_ratio (float): Ratio of training samples. The rest will be used
|
|
131
|
+
for validation. Defaults to 0.8.
|
|
132
|
+
error_raise (bool): Whether to raise an error if an image fails to
|
|
133
|
+
download. If False, the image will be skipped. Defaults to True.
|
|
134
|
+
merge_labels (bool): Whether to merge all labels into a single label
|
|
135
|
+
named "object". Defaults to False.
|
|
136
|
+
use_aws_cache (bool): Whether to use the AWS image cache when
|
|
137
|
+
downloading images. Defaults to True.
|
|
138
|
+
view_id (int | None): Label Studio view ID to export from. If None,
|
|
139
|
+
all tasks are exported. Defaults to None.
|
|
140
|
+
image_max_size (int | None): Maximum size (in pixels) for the images.
|
|
141
|
+
If None, no resizing is performed. Defaults to None.
|
|
87
142
|
"""
|
|
88
143
|
if merge_labels:
|
|
89
144
|
label_names = ["object"]
|
|
@@ -101,7 +156,7 @@ def export_from_ls_to_ultralytics_object_detection(
|
|
|
101
156
|
(images_dir / split).mkdir(parents=True, exist_ok=True)
|
|
102
157
|
|
|
103
158
|
for task in tqdm.tqdm(
|
|
104
|
-
ls.tasks.list(project=project_id, fields="all"),
|
|
159
|
+
ls.tasks.list(project=project_id, fields="all", view=view_id),
|
|
105
160
|
desc="tasks",
|
|
106
161
|
):
|
|
107
162
|
split = task.data.get("split")
|
|
@@ -179,18 +234,28 @@ def export_from_ls_to_ultralytics_object_detection(
|
|
|
179
234
|
has_valid_annotation = True
|
|
180
235
|
|
|
181
236
|
if has_valid_annotation:
|
|
182
|
-
|
|
237
|
+
image = download_image(
|
|
183
238
|
image_url,
|
|
184
|
-
return_struct=
|
|
239
|
+
return_struct=False,
|
|
185
240
|
error_raise=error_raise,
|
|
186
241
|
use_cache=use_aws_cache,
|
|
187
242
|
)
|
|
188
|
-
if
|
|
243
|
+
if image is None:
|
|
189
244
|
logger.error("Failed to download image: %s", image_url)
|
|
190
245
|
continue
|
|
191
246
|
|
|
192
|
-
|
|
193
|
-
|
|
247
|
+
image = typing.cast(Image.Image, image)
|
|
248
|
+
|
|
249
|
+
# Rotate image according to exif orientation using Pillow
|
|
250
|
+
ImageOps.exif_transpose(image, in_place=True)
|
|
251
|
+
# Resize image if larger than max size
|
|
252
|
+
if image_max_size is not None and (
|
|
253
|
+
image.width > image_max_size or image.height > image_max_size
|
|
254
|
+
):
|
|
255
|
+
image.thumbnail(
|
|
256
|
+
(image_max_size, image_max_size), Image.Resampling.LANCZOS
|
|
257
|
+
)
|
|
258
|
+
image.save(images_dir / split / f"{image_id}.jpg", format="JPEG")
|
|
194
259
|
|
|
195
260
|
with (output_dir / "data.yaml").open("w") as f:
|
|
196
261
|
f.write("path: data\n")
|
|
@@ -208,6 +273,7 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
208
273
|
download_images: bool = True,
|
|
209
274
|
error_raise: bool = True,
|
|
210
275
|
use_aws_cache: bool = True,
|
|
276
|
+
image_max_size: int | None = None,
|
|
211
277
|
revision: str = "main",
|
|
212
278
|
):
|
|
213
279
|
"""Export annotations from a Hugging Face dataset project to the
|
|
@@ -228,6 +294,8 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
228
294
|
use_aws_cache (bool): Whether to use the AWS image cache when
|
|
229
295
|
downloading images. This option is only used if `download_images`
|
|
230
296
|
is True. Defaults to True.
|
|
297
|
+
image_max_size (int | None): Maximum size (in pixels) for the images.
|
|
298
|
+
If None, no resizing is performed. Defaults to None.
|
|
231
299
|
revision (str): The dataset revision to load. Defaults to 'main'.
|
|
232
300
|
"""
|
|
233
301
|
logger.info("Repo ID: %s, revision: %s", repo_id, revision)
|
|
@@ -263,21 +331,31 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
263
331
|
"`download_images` to False."
|
|
264
332
|
)
|
|
265
333
|
image_url = sample["meta"]["image_url"]
|
|
266
|
-
|
|
334
|
+
image = download_image(
|
|
267
335
|
image_url,
|
|
268
|
-
return_struct=
|
|
336
|
+
return_struct=False,
|
|
269
337
|
error_raise=error_raise,
|
|
270
338
|
use_cache=use_aws_cache,
|
|
271
339
|
)
|
|
272
|
-
if
|
|
340
|
+
if image is None:
|
|
273
341
|
logger.error("Failed to download image: %s", image_url)
|
|
274
342
|
continue
|
|
275
|
-
|
|
276
|
-
with (split_images_dir / f"{image_id}.jpg").open("wb") as f:
|
|
277
|
-
f.write(download_output.image_bytes)
|
|
278
343
|
else:
|
|
279
344
|
image = sample["image"]
|
|
280
|
-
|
|
345
|
+
|
|
346
|
+
image = typing.cast(Image.Image, image)
|
|
347
|
+
# Rotate image according to exif orientation using Pillow
|
|
348
|
+
# If the image source is Hugging Face, EXIF data is not preserved,
|
|
349
|
+
# so this step is only useful when downloading images.
|
|
350
|
+
ImageOps.exif_transpose(image, in_place=True)
|
|
351
|
+
# Resize image if larger than max size
|
|
352
|
+
if image_max_size is not None and (
|
|
353
|
+
image.width > image_max_size or image.height > image_max_size
|
|
354
|
+
):
|
|
355
|
+
image.thumbnail(
|
|
356
|
+
(image_max_size, image_max_size), Image.Resampling.LANCZOS
|
|
357
|
+
)
|
|
358
|
+
image.save(split_images_dir / f"{image_id}.jpg")
|
|
281
359
|
|
|
282
360
|
objects = sample["objects"]
|
|
283
361
|
bboxes = objects["bbox"]
|
labelr/main.py
CHANGED
|
@@ -4,11 +4,13 @@ import typer
|
|
|
4
4
|
from openfoodfacts.utils import get_logger
|
|
5
5
|
|
|
6
6
|
from labelr.apps import datasets as dataset_app
|
|
7
|
+
from labelr.apps import directus as directus_app
|
|
7
8
|
from labelr.apps import evaluate as evaluate_app
|
|
8
9
|
from labelr.apps import google_batch as google_batch_app
|
|
9
10
|
from labelr.apps import hugging_face as hf_app
|
|
10
11
|
from labelr.apps import label_studio as ls_app
|
|
11
12
|
from labelr.apps import train as train_app
|
|
13
|
+
from labelr import config as _config
|
|
12
14
|
|
|
13
15
|
app = typer.Typer(pretty_exceptions_show_locals=False)
|
|
14
16
|
|
|
@@ -60,6 +62,17 @@ def predict(
|
|
|
60
62
|
typer.echo(result)
|
|
61
63
|
|
|
62
64
|
|
|
65
|
+
@app.command()
|
|
66
|
+
def config(name: str, value: str):
|
|
67
|
+
"""Set a Labelr configuration value.
|
|
68
|
+
|
|
69
|
+
The configuration is stored in a JSON file at ~/.config/.labelr/config.json.
|
|
70
|
+
"""
|
|
71
|
+
typer.echo(f"Set '{name}' to '{value}'")
|
|
72
|
+
_config.set_file_config(name, value)
|
|
73
|
+
typer.echo(f"Configuration saved to {_config.CONFIG_PATH}")
|
|
74
|
+
|
|
75
|
+
|
|
63
76
|
app.add_typer(
|
|
64
77
|
ls_app.app,
|
|
65
78
|
name="ls",
|
|
@@ -90,6 +103,9 @@ app.add_typer(
|
|
|
90
103
|
name="google-batch",
|
|
91
104
|
help="Generate datasets and launch batch jobs on Google Gemini.",
|
|
92
105
|
)
|
|
106
|
+
app.add_typer(
|
|
107
|
+
directus_app.app, name="directus", help="Manage directus collections and items."
|
|
108
|
+
)
|
|
93
109
|
|
|
94
110
|
if __name__ == "__main__":
|
|
95
111
|
app()
|
|
@@ -8,7 +8,7 @@ import PIL
|
|
|
8
8
|
from openfoodfacts import Flavor
|
|
9
9
|
from openfoodfacts.barcode import normalize_barcode
|
|
10
10
|
from openfoodfacts.images import download_image, generate_image_url
|
|
11
|
-
from PIL import ImageOps
|
|
11
|
+
from PIL import Image, ImageOps
|
|
12
12
|
|
|
13
13
|
logger = logging.getLogger(__name__)
|
|
14
14
|
|
|
@@ -153,6 +153,7 @@ def format_object_detection_sample_to_hf(
|
|
|
153
153
|
label_names: list[str],
|
|
154
154
|
merge_labels: bool = False,
|
|
155
155
|
use_aws_cache: bool = False,
|
|
156
|
+
image_max_size: int | None = None,
|
|
156
157
|
) -> dict | None:
|
|
157
158
|
"""Format a Label Studio object detection sample to Hugging Face format.
|
|
158
159
|
|
|
@@ -163,6 +164,8 @@ def format_object_detection_sample_to_hf(
|
|
|
163
164
|
merge_labels: Whether to merge all labels into a single label (the
|
|
164
165
|
first label in `label_names`).
|
|
165
166
|
use_aws_cache: Whether to use AWS cache when downloading images.
|
|
167
|
+
image_max_size: Maximum size (in pixels) for the images.
|
|
168
|
+
If None, no resizing is performed. Defaults to None.
|
|
166
169
|
|
|
167
170
|
Returns:
|
|
168
171
|
The formatted sample, or None in the following cases:
|
|
@@ -184,7 +187,8 @@ def format_object_detection_sample_to_hf(
|
|
|
184
187
|
|
|
185
188
|
for annotation_result in annotation["result"]:
|
|
186
189
|
if annotation_result["type"] != "rectanglelabels":
|
|
187
|
-
|
|
190
|
+
continue
|
|
191
|
+
# raise ValueError("Invalid annotation type: %s" % annotation_result["type"])
|
|
188
192
|
|
|
189
193
|
value = annotation_result["value"]
|
|
190
194
|
x_min = value["x"] / 100
|
|
@@ -205,21 +209,34 @@ def format_object_detection_sample_to_hf(
|
|
|
205
209
|
logger.error("Failed to download image: %s", image_url)
|
|
206
210
|
return None
|
|
207
211
|
|
|
212
|
+
image = typing.cast(Image.Image, image)
|
|
208
213
|
# Correct image orientation using EXIF data
|
|
209
214
|
# Label Studio provides bounding boxes based on the displayed image (after
|
|
210
215
|
# eventual EXIF rotation), so we need to apply the same transformation to
|
|
211
216
|
# the image.
|
|
212
217
|
# Indeed, Hugging Face stores images without applying EXIF rotation, and
|
|
213
218
|
# EXIF data is not preserved in the dataset.
|
|
214
|
-
ImageOps.exif_transpose(
|
|
219
|
+
ImageOps.exif_transpose(image, in_place=True)
|
|
220
|
+
|
|
221
|
+
# Resize image if larger than max size
|
|
222
|
+
if image_max_size is not None and (
|
|
223
|
+
image.width > image_max_size or image.height > image_max_size
|
|
224
|
+
):
|
|
225
|
+
image.thumbnail((image_max_size, image_max_size), Image.Resampling.LANCZOS)
|
|
226
|
+
|
|
227
|
+
meta = task_data.get("meta", {})
|
|
228
|
+
barcode = meta.get("barcode", None)
|
|
229
|
+
off_image_id = meta.get("off_image_id", None)
|
|
230
|
+
width = image.width
|
|
231
|
+
height = image.height
|
|
215
232
|
return {
|
|
216
233
|
"image_id": task_data["image_id"],
|
|
217
234
|
"image": image,
|
|
218
|
-
"width":
|
|
219
|
-
"height":
|
|
235
|
+
"width": width,
|
|
236
|
+
"height": height,
|
|
220
237
|
"meta": {
|
|
221
|
-
"barcode":
|
|
222
|
-
"off_image_id":
|
|
238
|
+
"barcode": barcode,
|
|
239
|
+
"off_image_id": off_image_id,
|
|
223
240
|
"image_url": image_url,
|
|
224
241
|
},
|
|
225
242
|
"objects": {
|
|
@@ -230,16 +247,23 @@ def format_object_detection_sample_to_hf(
|
|
|
230
247
|
}
|
|
231
248
|
|
|
232
249
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
250
|
+
def get_hf_object_detection_features(
|
|
251
|
+
is_openfoodfacts_dataset: bool,
|
|
252
|
+
) -> datasets.Features:
|
|
253
|
+
"""Get the HuggingFace Dataset features for object detection.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
is_openfoodfacts_dataset (bool): Whether the dataset is an Open Food
|
|
257
|
+
Facts dataset. If True, the dataset will include additional
|
|
258
|
+
metadata fields specific to Open Food Facts (`barcode` and
|
|
259
|
+
`off_image_id`).
|
|
260
|
+
"""
|
|
261
|
+
features_dict = {
|
|
236
262
|
"image_id": datasets.Value("string"),
|
|
237
263
|
"image": datasets.features.Image(),
|
|
238
264
|
"width": datasets.Value("int64"),
|
|
239
265
|
"height": datasets.Value("int64"),
|
|
240
266
|
"meta": {
|
|
241
|
-
"barcode": datasets.Value("string"),
|
|
242
|
-
"off_image_id": datasets.Value("string"),
|
|
243
267
|
"image_url": datasets.Value("string"),
|
|
244
268
|
},
|
|
245
269
|
"objects": {
|
|
@@ -248,4 +272,9 @@ HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
|
|
|
248
272
|
"category_name": datasets.Sequence(datasets.Value("string")),
|
|
249
273
|
},
|
|
250
274
|
}
|
|
251
|
-
|
|
275
|
+
|
|
276
|
+
if is_openfoodfacts_dataset:
|
|
277
|
+
features_dict["meta"]["barcode"] = datasets.Value("string")
|
|
278
|
+
features_dict["meta"]["off_image_id"] = datasets.Value("string")
|
|
279
|
+
|
|
280
|
+
return datasets.Features(features_dict)
|