labelr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labelr/__init__.py ADDED
File without changes
labelr/__main__.py ADDED
@@ -0,0 +1,4 @@
1
+ from labelr.main import app
2
+
3
+ if __name__ == "__main__":
4
+ app()
labelr/annotate.py ADDED
@@ -0,0 +1,107 @@
1
+ import random
2
+ import string
3
+
4
+ from openfoodfacts.utils import get_logger
5
+
6
+ try:
7
+ from ultralytics.engine.results import Results
8
+ except ImportError:
9
+ pass
10
+
11
+ from labelr.triton.object_detection import ObjectDetectionResult
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ def format_annotation_results_from_triton(
17
+ objects: list[ObjectDetectionResult], image_width: int, image_height: int
18
+ ):
19
+ """Format annotation results from a Triton object detection model into
20
+ Label Studio format."""
21
+ annotation_results = []
22
+ for object_ in objects:
23
+ bbox = object_.bounding_box
24
+ category_name = object_.label
25
+ # These are relative coordinates (between 0.0 and 1.0)
26
+ y_min, x_min, y_max, x_max = bbox
27
+ # Make sure the coordinates are within the image boundaries,
28
+ # and convert them to percentages
29
+ y_min = min(max(0, y_min), 1.0) * 100
30
+ x_min = min(max(0, x_min), 1.0) * 100
31
+ y_max = min(max(0, y_max), 1.0) * 100
32
+ x_max = min(max(0, x_max), 1.0) * 100
33
+ x = x_min
34
+ y = y_min
35
+ width = x_max - x_min
36
+ height = y_max - y_min
37
+
38
+ id_ = generate_id()
39
+ annotation_results.append(
40
+ {
41
+ "id": id_,
42
+ "type": "rectanglelabels",
43
+ "from_name": "label",
44
+ "to_name": "image",
45
+ "original_width": image_width,
46
+ "original_height": image_height,
47
+ "image_rotation": 0,
48
+ "value": {
49
+ "rotation": 0,
50
+ "x": x,
51
+ "y": y,
52
+ "width": width,
53
+ "height": height,
54
+ "rectanglelabels": [category_name],
55
+ },
56
+ },
57
+ )
58
+ return annotation_results
59
+
60
+
61
+ def format_annotation_results_from_ultralytics(
62
+ results: "Results",
63
+ labels: list[str],
64
+ label_mapping: dict[str, str] | None = None,
65
+ ) -> list[dict]:
66
+ annotation_results = []
67
+ orig_height, orig_width = results.orig_shape
68
+ boxes = results.boxes
69
+ classes = boxes.cls.tolist()
70
+ for i, xyxyn in enumerate(boxes.xyxyn):
71
+ # Boxes found.
72
+ if len(xyxyn) > 0:
73
+ xyxyn = xyxyn.tolist()
74
+ x1 = xyxyn[0] * 100
75
+ y1 = xyxyn[1] * 100
76
+ x2 = xyxyn[2] * 100
77
+ y2 = xyxyn[3] * 100
78
+ width = x2 - x1
79
+ height = y2 - y1
80
+ label_id = int(classes[i])
81
+ label_name = labels[label_id]
82
+ if label_mapping:
83
+ label_name = label_mapping.get(label_name, label_name)
84
+ annotation_results.append(
85
+ {
86
+ "id": generate_id(),
87
+ "type": "rectanglelabels",
88
+ "from_name": "label",
89
+ "to_name": "image",
90
+ "original_width": orig_width,
91
+ "original_height": orig_height,
92
+ "image_rotation": 0,
93
+ "value": {
94
+ "rotation": 0,
95
+ "x": x1,
96
+ "y": y1,
97
+ "width": width,
98
+ "height": height,
99
+ "rectanglelabels": [label_name],
100
+ },
101
+ },
102
+ )
103
+ return annotation_results
104
+
105
+
106
+ def generate_id(length: int = 10) -> str:
107
+ return "".join(random.choices(string.ascii_letters + string.digits, k=length))
File without changes
@@ -0,0 +1,227 @@
1
+ import json
2
+ import random
3
+ import shutil
4
+ import typing
5
+ from pathlib import Path
6
+ from typing import Annotated, Optional
7
+
8
+ import typer
9
+ from openfoodfacts.utils import get_logger
10
+
11
+ from ..config import LABEL_STUDIO_DEFAULT_URL
12
+ from ..types import ExportDestination, ExportSource, TaskType
13
+
14
+ app = typer.Typer()
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ @app.command()
20
+ def check(
21
+ api_key: Annotated[
22
+ Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")
23
+ ] = None,
24
+ project_id: Annotated[
25
+ Optional[int], typer.Option(help="Label Studio Project ID")
26
+ ] = None,
27
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
28
+ dataset_dir: Annotated[
29
+ Optional[Path],
30
+ typer.Option(
31
+ help="Path to the dataset directory", exists=True, file_okay=False
32
+ ),
33
+ ] = None,
34
+ remove: Annotated[
35
+ bool,
36
+ typer.Option(
37
+ help="Remove duplicate images from the dataset, only for local datasets"
38
+ ),
39
+ ] = False,
40
+ ):
41
+ """Check a dataset for duplicate images."""
42
+ from label_studio_sdk.client import LabelStudio
43
+
44
+ from ..check import check_local_dataset, check_ls_dataset
45
+
46
+ if project_id is not None:
47
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
48
+ check_ls_dataset(ls, project_id)
49
+ elif dataset_dir is not None:
50
+ check_local_dataset(dataset_dir, remove=remove)
51
+ else:
52
+ raise typer.BadParameter("Either project ID or dataset directory is required")
53
+
54
+
55
+ @app.command()
56
+ def split_train_test(
57
+ task_type: TaskType, dataset_dir: Path, output_dir: Path, train_ratio: float = 0.8
58
+ ):
59
+ """Split a dataset into training and test sets.
60
+
61
+ Only classification tasks are supported.
62
+ """
63
+ if task_type == TaskType.classification:
64
+ class_dirs = [d for d in dataset_dir.iterdir() if d.is_dir()]
65
+ logger.info("Found classes: %s", [d.name for d in class_dirs])
66
+
67
+ output_dir.mkdir(parents=True, exist_ok=True)
68
+ train_dir = output_dir / "train"
69
+ test_dir = output_dir / "test"
70
+ train_dir.mkdir(parents=True, exist_ok=True)
71
+ test_dir.mkdir(parents=True, exist_ok=True)
72
+
73
+ for class_dir in class_dirs:
74
+ input_paths = list(class_dir.glob("*"))
75
+ random.shuffle(input_paths)
76
+
77
+ test_count = int(len(input_paths) * (1 - train_ratio))
78
+ if test_count == 0:
79
+ logger.warning("Not enough samples, skipping class: %s", class_dir.name)
80
+ continue
81
+
82
+ test_paths = input_paths[:test_count]
83
+ train_paths = input_paths[test_count:]
84
+
85
+ for output_dir, input_paths in (
86
+ (train_dir, train_paths),
87
+ (test_dir, test_paths),
88
+ ):
89
+ output_cls_dir = output_dir / class_dir.name
90
+ output_cls_dir.mkdir(parents=True, exist_ok=True)
91
+
92
+ for path in input_paths:
93
+ logger.info("Copying: %s to %s", path, output_cls_dir)
94
+ shutil.copy(path, output_cls_dir / path.name)
95
+ else:
96
+ raise typer.BadParameter("Unsupported task type")
97
+
98
+
99
+ @app.command()
100
+ def convert_object_detection_dataset(
101
+ repo_id: Annotated[
102
+ str, typer.Option(help="Hugging Face Datasets repository ID to convert")
103
+ ],
104
+ output_file: Annotated[
105
+ Path, typer.Option(help="Path to the output JSON file", exists=False)
106
+ ],
107
+ ):
108
+ """Convert object detection dataset from Hugging Face Datasets to Label
109
+ Studio format, and save it to a JSON file."""
110
+ from datasets import load_dataset
111
+
112
+ from labelr.sample import format_object_detection_sample_from_hf
113
+
114
+ logger.info("Loading dataset: %s", repo_id)
115
+ ds = load_dataset(repo_id)
116
+ logger.info("Dataset loaded: %s", tuple(ds.keys()))
117
+
118
+ with output_file.open("wt") as f:
119
+ for split in ds.keys():
120
+ logger.info("Processing split: %s", split)
121
+ for sample in ds[split]:
122
+ label_studio_sample = format_object_detection_sample_from_hf(
123
+ sample, split=split
124
+ )
125
+ f.write(json.dumps(label_studio_sample) + "\n")
126
+
127
+
128
+ @app.command()
129
+ def export(
130
+ from_: Annotated[ExportSource, typer.Option("--from", help="Input source to use")],
131
+ to: Annotated[ExportDestination, typer.Option(help="Where to export the data")],
132
+ api_key: Annotated[Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")],
133
+ repo_id: Annotated[
134
+ Optional[str],
135
+ typer.Option(help="Hugging Face Datasets repository ID to convert"),
136
+ ] = None,
137
+ label_names: Annotated[
138
+ Optional[str],
139
+ typer.Option(help="Label names to use, as a comma-separated list"),
140
+ ] = None,
141
+ project_id: Annotated[
142
+ Optional[int], typer.Option(help="Label Studio Project ID")
143
+ ] = None,
144
+ label_studio_url: Optional[str] = LABEL_STUDIO_DEFAULT_URL,
145
+ output_dir: Annotated[
146
+ Optional[Path],
147
+ typer.Option(help="Path to the output directory", file_okay=False),
148
+ ] = None,
149
+ download_images: Annotated[
150
+ bool,
151
+ typer.Option(
152
+ help="if True, don't use HF images and download images from the server"
153
+ ),
154
+ ] = False,
155
+ train_ratio: Annotated[
156
+ float,
157
+ typer.Option(
158
+ help="Train ratio for splitting the dataset, if the split name is not "
159
+ "provided (typically, if the source is Label Studio)"
160
+ ),
161
+ ] = 0.8,
162
+ error_raise: Annotated[
163
+ bool,
164
+ typer.Option(
165
+ help="Raise an error if an image download fails, only for Ultralytics"
166
+ ),
167
+ ] = True,
168
+ ):
169
+ """Export Label Studio annotation, either to Hugging Face Datasets or
170
+ local files (ultralytics format)."""
171
+ from label_studio_sdk.client import LabelStudio
172
+
173
+ from labelr.export import (
174
+ export_from_hf_to_ultralytics,
175
+ export_from_ls_to_hf,
176
+ export_from_ls_to_ultralytics,
177
+ )
178
+
179
+ if (to == ExportDestination.hf or from_ == ExportSource.hf) and repo_id is None:
180
+ raise typer.BadParameter("Repository ID is required for export/import with HF")
181
+
182
+ if label_names is None:
183
+ if to == ExportDestination.hf:
184
+ raise typer.BadParameter("Label names are required for HF export")
185
+ if from_ == ExportSource.ls:
186
+ raise typer.BadParameter(
187
+ "Label names are required for export from LS source"
188
+ )
189
+
190
+ if from_ == ExportSource.ls:
191
+ if project_id is None:
192
+ raise typer.BadParameter("Project ID is required for LS export")
193
+ if api_key is None:
194
+ raise typer.BadParameter("API key is required for LS export")
195
+
196
+ if to == ExportDestination.ultralytics and output_dir is None:
197
+ raise typer.BadParameter("Output directory is required for Ultralytics export")
198
+
199
+ if from_ == ExportSource.ls:
200
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
201
+ label_names = typing.cast(str, label_names)
202
+ label_names_list = label_names.split(",")
203
+ if to == ExportDestination.hf:
204
+ repo_id = typing.cast(str, repo_id)
205
+ export_from_ls_to_hf(
206
+ ls, repo_id, label_names_list, typing.cast(int, project_id)
207
+ )
208
+ elif to == ExportDestination.ultralytics:
209
+ export_from_ls_to_ultralytics(
210
+ ls,
211
+ typing.cast(Path, output_dir),
212
+ label_names_list,
213
+ typing.cast(int, project_id),
214
+ train_ratio=train_ratio,
215
+ error_raise=error_raise,
216
+ )
217
+
218
+ elif from_ == ExportSource.hf:
219
+ if to == ExportDestination.ultralytics:
220
+ export_from_hf_to_ultralytics(
221
+ typing.cast(str, repo_id),
222
+ typing.cast(Path, output_dir),
223
+ download_images=download_images,
224
+ error_raise=error_raise,
225
+ )
226
+ else:
227
+ raise typer.BadParameter("Unsupported export format")
@@ -0,0 +1,353 @@
1
+ import enum
2
+ import json
3
+ import typing
4
+ from pathlib import Path
5
+ from typing import Annotated, Optional
6
+
7
+ import typer
8
+ from openfoodfacts.utils import get_logger
9
+ from PIL import Image
10
+
11
+ from ..annotate import (
12
+ format_annotation_results_from_triton,
13
+ format_annotation_results_from_ultralytics,
14
+ )
15
+ from ..config import LABEL_STUDIO_DEFAULT_URL
16
+
17
+ app = typer.Typer()
18
+
19
+ logger = get_logger(__name__)
20
+
21
+
22
+ @app.command()
23
+ def create(
24
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
25
+ title: Annotated[str, typer.Option(help="Project title")],
26
+ config_file: Annotated[
27
+ Path, typer.Option(help="Path to label config file", file_okay=True)
28
+ ],
29
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
30
+ ):
31
+ """Create a new Label Studio project."""
32
+ from label_studio_sdk.client import LabelStudio
33
+
34
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
35
+ label_config = config_file.read_text()
36
+
37
+ project = ls.projects.create(title=title, label_config=label_config)
38
+ logger.info(f"Project created: {project}")
39
+
40
+
41
+ @app.command()
42
+ def import_data(
43
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
44
+ project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
45
+ dataset_path: Annotated[
46
+ Path, typer.Option(help="Path to the Label Studio dataset file", file_okay=True)
47
+ ],
48
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
49
+ batch_size: int = 25,
50
+ ):
51
+ """Import tasks from a dataset file to a Label Studio project.
52
+
53
+ The dataset file should contain one JSON object per line."""
54
+ import more_itertools
55
+ import tqdm
56
+ from label_studio_sdk.client import LabelStudio
57
+
58
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
59
+
60
+ with dataset_path.open("rt") as f:
61
+ for batch in more_itertools.chunked(
62
+ tqdm.tqdm(map(json.loads, f), desc="tasks"), batch_size
63
+ ):
64
+ ls.projects.import_tasks(id=project_id, request=batch)
65
+
66
+
67
+ @app.command()
68
+ def update_prediction(
69
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
70
+ project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
71
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
72
+ ):
73
+ from label_studio_sdk.client import LabelStudio
74
+
75
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
76
+
77
+ for task in ls.tasks.list(project=project_id, fields="all"):
78
+ for prediction in task.predictions:
79
+ prediction_id = prediction["id"]
80
+ if prediction["model_version"] == "":
81
+ logger.info("Updating prediction: %s", prediction_id)
82
+ ls.predictions.update(
83
+ id=prediction_id,
84
+ model_version="undefined",
85
+ )
86
+
87
+
88
+ @app.command()
89
+ def add_split(
90
+ train_split: Annotated[
91
+ float, typer.Option(help="fraction of samples to add in train split")
92
+ ],
93
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
94
+ project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
95
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
96
+ ):
97
+ """Update the split field of tasks in a Label Studio project.
98
+
99
+ The split field is set to "train" with probability `train_split`, and "val"
100
+ otherwise. Tasks without a split field are assigned a split based on the
101
+ probability, and updated in the server. Tasks with a non-null split field
102
+ are not updated.
103
+ """
104
+ import random
105
+
106
+ from label_studio_sdk import Task
107
+ from label_studio_sdk.client import LabelStudio
108
+
109
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
110
+
111
+ for task in ls.tasks.list(project=project_id, fields="all"):
112
+ task: Task
113
+ split = task.data.get("split")
114
+ if split is None:
115
+ split = "train" if random.random() < train_split else "val"
116
+ logger.info("Updating task: %s, split: %s", task.id, split)
117
+ ls.tasks.update(task.id, data={**task.data, "split": split})
118
+
119
+
120
+ @app.command()
121
+ def annotate_from_prediction(
122
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
123
+ project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
124
+ updated_by: Annotated[
125
+ Optional[int], typer.Option(help="User ID to declare as annotator")
126
+ ] = None,
127
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
128
+ ):
129
+ """Create annotations for all tasks from predictions.
130
+
131
+ This command is useful if you imported tasks with predictions, and want to
132
+ "validate" these predictions by creating annotations.
133
+ """
134
+ import tqdm
135
+ from label_studio_sdk.client import LabelStudio
136
+ from label_studio_sdk.types.task import Task
137
+
138
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
139
+
140
+ task: Task
141
+ for task in tqdm.tqdm(
142
+ ls.tasks.list(project=project_id, fields="all"), desc="tasks"
143
+ ):
144
+ task_id = task.id
145
+ if task.total_annotations == 0 and task.total_predictions > 0:
146
+ logger.info("Creating annotation for task: %s", task_id)
147
+ ls.annotations.create(
148
+ id=task_id,
149
+ result=task.predictions[0]["result"],
150
+ project=project_id,
151
+ updated_by=updated_by,
152
+ )
153
+
154
+
155
+ class PredictorBackend(enum.Enum):
156
+ triton = "triton"
157
+ ultralytics = "ultralytics"
158
+
159
+
160
+ @app.command()
161
+ def add_prediction(
162
+ api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
163
+ project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
164
+ model_name: Annotated[
165
+ str,
166
+ typer.Option(
167
+ help="Name of the object detection model to run (for Triton server) or "
168
+ "of the Ultralytics zero-shot model to run."
169
+ ),
170
+ ] = "yolov8x-worldv2.pt",
171
+ triton_uri: Annotated[
172
+ Optional[str],
173
+ typer.Option(help="URI (host+port) of the Triton Inference Server"),
174
+ ] = None,
175
+ backend: Annotated[
176
+ PredictorBackend,
177
+ typer.Option(
178
+ help="Prediction backend: either use a Triton server to perform "
179
+ "the prediction or uses Ultralytics."
180
+ ),
181
+ ] = PredictorBackend.ultralytics,
182
+ labels: Annotated[
183
+ Optional[list[str]],
184
+ typer.Option(
185
+ help="List of class labels to use for Yolo model. If you're using Yolo-World or other "
186
+ "zero-shot models, this is the list of label names that are going to be provided to the "
187
+ "model. In such case, you can use `label_mapping` to map the model's output to the "
188
+ "actual class names expected by Label Studio."
189
+ ),
190
+ ] = None,
191
+ label_mapping: Annotated[
192
+ Optional[str],
193
+ typer.Option(help="Mapping of model labels to class names, as a JSON string"),
194
+ ] = None,
195
+ label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
196
+ threshold: Annotated[
197
+ Optional[float],
198
+ typer.Option(
199
+ help="Confidence threshold for selecting bounding boxes. The default is 0.5 "
200
+ "for Triton backend and 0.1 for Ultralytics backend."
201
+ ),
202
+ ] = None,
203
+ max_det: Annotated[int, typer.Option(help="Maximum numbers of detections")] = 300,
204
+ dry_run: Annotated[
205
+ bool,
206
+ typer.Option(
207
+ help="Launch in dry run mode, without uploading annotations to Label Studio"
208
+ ),
209
+ ] = False,
210
+ error_raise: Annotated[
211
+ bool,
212
+ typer.Option(help="Raise an error if image download fails"),
213
+ ] = True,
214
+ model_version: Annotated[
215
+ Optional[str],
216
+ typer.Option(help="Model version to use for the prediction"),
217
+ ] = None,
218
+ ):
219
+ """Add predictions as pre-annotations to Label Studio tasks,
220
+ for an object detection model running on Triton Inference Server."""
221
+
222
+ import tqdm
223
+ from label_studio_sdk.client import LabelStudio
224
+ from openfoodfacts.utils import get_image_from_url
225
+
226
+ from labelr.triton.object_detection import ObjectDetectionModelRegistry
227
+
228
+ label_mapping_dict = None
229
+ if label_mapping:
230
+ label_mapping_dict = json.loads(label_mapping)
231
+
232
+ if dry_run:
233
+ logger.info("** Dry run mode enabled **")
234
+
235
+ logger.info(
236
+ "backend: %s, model_name: %s, labels: %s, threshold: %s, label mapping: %s",
237
+ backend,
238
+ model_name,
239
+ labels,
240
+ threshold,
241
+ label_mapping,
242
+ )
243
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
244
+
245
+ model: ObjectDetectionModelRegistry | "YOLO"
246
+
247
+ if backend == PredictorBackend.ultralytics:
248
+ from ultralytics import YOLO
249
+
250
+ if labels is None:
251
+ raise typer.BadParameter("Labels are required for Ultralytics backend")
252
+
253
+ if threshold is None:
254
+ threshold = 0.1
255
+
256
+ model = YOLO(model_name)
257
+ if hasattr(model, "set_classes"):
258
+ model.set_classes(labels)
259
+ else:
260
+ logger.warning("The model does not support setting classes directly.")
261
+ elif backend == PredictorBackend.triton:
262
+ if triton_uri is None:
263
+ raise typer.BadParameter("Triton URI is required for Triton backend")
264
+
265
+ if threshold is None:
266
+ threshold = 0.5
267
+
268
+ model = ObjectDetectionModelRegistry.load(model_name)
269
+ else:
270
+ raise typer.BadParameter(f"Unsupported backend: {backend}")
271
+
272
+ for task in tqdm.tqdm(ls.tasks.list(project=project_id), desc="tasks"):
273
+ if task.total_predictions == 0:
274
+ image_url = task.data["image_url"]
275
+ image = typing.cast(
276
+ Image.Image,
277
+ get_image_from_url(image_url, error_raise=error_raise),
278
+ )
279
+ if backend == PredictorBackend.ultralytics:
280
+ results = model.predict(
281
+ image,
282
+ conf=threshold,
283
+ max_det=max_det,
284
+ )[0]
285
+ labels = typing.cast(list[str], labels)
286
+ label_studio_result = format_annotation_results_from_ultralytics(
287
+ results, labels, label_mapping_dict
288
+ )
289
+ else:
290
+ output = model.detect_from_image(image, triton_uri=triton_uri)
291
+ results = output.select(threshold=threshold)
292
+ logger.info("Adding prediction to task: %s", task.id)
293
+ label_studio_result = format_annotation_results_from_triton(
294
+ results, image.width, image.height
295
+ )
296
+ if dry_run:
297
+ logger.info("image_url: %s", image_url)
298
+ logger.info("result: %s", label_studio_result)
299
+ else:
300
+ ls.predictions.create(
301
+ task=task.id,
302
+ result=label_studio_result,
303
+ model_version=model_version,
304
+ )
305
+
306
+
307
+ @app.command()
308
+ def create_dataset_file(
309
+ input_file: Annotated[
310
+ Path,
311
+ typer.Option(help="Path to a list of image URLs", exists=True),
312
+ ],
313
+ output_file: Annotated[
314
+ Path, typer.Option(help="Path to the output JSON file", exists=False)
315
+ ],
316
+ ):
317
+ """Create a Label Studio object detection dataset file from a list of
318
+ image URLs."""
319
+ from urllib.parse import urlparse
320
+
321
+ import tqdm
322
+ from openfoodfacts.images import extract_barcode_from_url, extract_source_from_url
323
+ from openfoodfacts.utils import get_image_from_url
324
+
325
+ from labelr.sample import format_object_detection_sample_to_ls
326
+
327
+ logger.info("Loading dataset: %s", input_file)
328
+
329
+ with output_file.open("wt") as f:
330
+ for line in tqdm.tqdm(input_file.open("rt"), desc="images"):
331
+ url = line.strip()
332
+ if not url:
333
+ continue
334
+
335
+ extra_meta = {}
336
+ image_id = Path(urlparse(url).path).stem
337
+ if ".openfoodfacts.org" in url:
338
+ barcode = extract_barcode_from_url(url)
339
+ extra_meta["barcode"] = barcode
340
+ off_image_id = Path(extract_source_from_url(url)).stem
341
+ extra_meta["off_image_id"] = off_image_id
342
+ image_id = f"{barcode}-{off_image_id}"
343
+
344
+ image = get_image_from_url(url, error_raise=False)
345
+
346
+ if image is None:
347
+ logger.warning("Failed to load image: %s", url)
348
+ continue
349
+
350
+ label_studio_sample = format_object_detection_sample_to_ls(
351
+ image_id, url, image.width, image.height, extra_meta
352
+ )
353
+ f.write(json.dumps(label_studio_sample) + "\n")