labelr 0.10.0__py3-none-any.whl → 0.11.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,25 +7,39 @@ from typing import Annotated, Optional
7
7
  import typer
8
8
  from openfoodfacts.utils import get_logger
9
9
 
10
- from ..config import LABEL_STUDIO_DEFAULT_URL
10
+ from . import typer_description
11
+ from ..config import config
11
12
 
12
13
  app = typer.Typer()
13
14
 
14
15
  logger = get_logger(__name__)
15
16
 
16
17
 
18
+ def check_label_studio_api_key(api_key: str | None):
19
+ if not api_key:
20
+ raise typer.BadParameter(
21
+ "Label Studio API key not provided. Please provide it with the "
22
+ "--api-key option or set the LABELR_LABEL_STUDIO_API_KEY environment variable."
23
+ )
24
+
25
+
17
26
  @app.command()
18
27
  def create(
19
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
20
28
  title: Annotated[str, typer.Option(help="Project title")],
21
29
  config_file: Annotated[
22
30
  Path, typer.Option(help="Path to label config file", file_okay=True)
23
31
  ],
24
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
32
+ api_key: Annotated[
33
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
34
+ ] = config.label_studio_api_key,
35
+ label_studio_url: Annotated[
36
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
37
+ ] = config.label_studio_url,
25
38
  ):
26
39
  """Create a new Label Studio project."""
27
40
  from label_studio_sdk.client import LabelStudio
28
41
 
42
+ check_label_studio_api_key(api_key)
29
43
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
30
44
  label_config = config_file.read_text()
31
45
 
@@ -35,7 +49,6 @@ def create(
35
49
 
36
50
  @app.command()
37
51
  def import_data(
38
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
39
52
  project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
40
53
  dataset_path: Annotated[
41
54
  Path,
@@ -43,8 +56,15 @@ def import_data(
43
56
  help="Path to the Label Studio dataset JSONL file", file_okay=True
44
57
  ),
45
58
  ],
46
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
47
- batch_size: int = 25,
59
+ api_key: Annotated[
60
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
61
+ ] = config.label_studio_api_key,
62
+ label_studio_url: Annotated[
63
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
64
+ ] = config.label_studio_url,
65
+ batch_size: Annotated[
66
+ int, typer.Option(help="Number of tasks to import as a single batch")
67
+ ] = 25,
48
68
  ):
49
69
  """Import tasks from a dataset file to a Label Studio project.
50
70
 
@@ -56,6 +76,7 @@ def import_data(
56
76
  import tqdm
57
77
  from label_studio_sdk.client import LabelStudio
58
78
 
79
+ check_label_studio_api_key(api_key)
59
80
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
60
81
 
61
82
  with dataset_path.open("rt") as f:
@@ -67,12 +88,17 @@ def import_data(
67
88
 
68
89
  @app.command()
69
90
  def update_prediction(
70
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
71
91
  project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
72
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
92
+ api_key: Annotated[
93
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
94
+ ] = config.label_studio_api_key,
95
+ label_studio_url: Annotated[
96
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
97
+ ] = config.label_studio_url,
73
98
  ):
74
99
  from label_studio_sdk.client import LabelStudio
75
100
 
101
+ check_label_studio_api_key(api_key)
76
102
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
77
103
 
78
104
  for task in ls.tasks.list(project=project_id, fields="all"):
@@ -91,16 +117,7 @@ def add_split(
91
117
  train_split: Annotated[
92
118
  float, typer.Option(help="fraction of samples to add in train split")
93
119
  ],
94
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
95
120
  project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
96
- split_name: Annotated[
97
- Optional[str],
98
- typer.Option(
99
- help="name of the split associated "
100
- "with the task ID file. If --task-id-file is not provided, "
101
- "this field is ignored."
102
- ),
103
- ] = None,
104
121
  train_split_name: Annotated[
105
122
  str,
106
123
  typer.Option(help="name of the train split"),
@@ -110,13 +127,33 @@ def add_split(
110
127
  typer.Option(help="name of the validation split"),
111
128
  ] = "val",
112
129
  task_id_file: Annotated[
113
- Optional[Path],
130
+ Path | None,
114
131
  typer.Option(help="path of a text file containing IDs of samples"),
115
132
  ] = None,
133
+ split_name: Annotated[
134
+ str | None,
135
+ typer.Option(
136
+ help="name of the split associated "
137
+ "with the task ID file. If --task-id-file is not provided, "
138
+ "this field is ignored."
139
+ ),
140
+ ] = None,
116
141
  overwrite: Annotated[
117
142
  bool, typer.Option(help="overwrite existing split field")
118
143
  ] = False,
119
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
144
+ view_id: Annotated[
145
+ int | None,
146
+ typer.Option(
147
+ help="ID of the Label Studio view, if any. This option is useful "
148
+ "to filter the task to process."
149
+ ),
150
+ ] = None,
151
+ api_key: Annotated[
152
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
153
+ ] = config.label_studio_api_key,
154
+ label_studio_url: Annotated[
155
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
156
+ ] = config.label_studio_url,
120
157
  ):
121
158
  """Update the split field of tasks in a Label Studio project.
122
159
 
@@ -124,7 +161,7 @@ def add_split(
124
161
 
125
162
  If `--task-id-file` is provided, it should contain a list of task IDs,
126
163
  one per line. The split field of these tasks will be updated to the value
127
- of `--split-name`.
164
+ of `--split-name`. The `--train-split` value is ignored in this case.
128
165
 
129
166
  If `--task-id-file` is not provided, the split field of all tasks in the
130
167
  project will be updated based on the `train_split` probability.
@@ -133,12 +170,16 @@ def add_split(
133
170
 
134
171
  In both cases, tasks with a non-null split field are not updated unless
135
172
  the `--overwrite` flag is provided.
173
+
174
+ The `--view-id` option can be used to only assign the split on a subset
175
+ of the tasks.
136
176
  """
137
177
  import random
138
178
 
139
179
  from label_studio_sdk import Task
140
180
  from label_studio_sdk.client import LabelStudio
141
181
 
182
+ check_label_studio_api_key(api_key)
142
183
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
143
184
 
144
185
  task_ids = None
@@ -149,14 +190,17 @@ def add_split(
149
190
  )
150
191
  task_ids = task_id_file.read_text().strip().split("\n")
151
192
 
152
- for task in ls.tasks.list(project=project_id, fields="all"):
193
+ for task in ls.tasks.list(project=project_id, fields="all", view=view_id):
153
194
  task: Task
154
195
  task_id = task.id
155
196
 
156
197
  split = task.data.get("split")
157
198
  if split is None or overwrite:
158
- if task_ids and str(task_id) in task_ids:
159
- split = split_name
199
+ if task_ids:
200
+ if str(task_id) in task_ids:
201
+ split = split_name
202
+ else:
203
+ continue
160
204
  else:
161
205
  split = (
162
206
  train_split_name
@@ -170,12 +214,16 @@ def add_split(
170
214
 
171
215
  @app.command()
172
216
  def annotate_from_prediction(
173
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
174
217
  project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
218
+ api_key: Annotated[
219
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
220
+ ] = config.label_studio_api_key,
175
221
  updated_by: Annotated[
176
222
  Optional[int], typer.Option(help="User ID to declare as annotator")
177
223
  ] = None,
178
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
224
+ label_studio_url: Annotated[
225
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
226
+ ] = config.label_studio_url,
179
227
  ):
180
228
  """Create annotations for all tasks from predictions.
181
229
 
@@ -186,6 +234,7 @@ def annotate_from_prediction(
186
234
  from label_studio_sdk.client import LabelStudio
187
235
  from label_studio_sdk.types.task import Task
188
236
 
237
+ check_label_studio_api_key(api_key)
189
238
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
190
239
 
191
240
  task: Task
@@ -203,42 +252,66 @@ def annotate_from_prediction(
203
252
  )
204
253
 
205
254
 
206
- class PredictorBackend(enum.Enum):
207
- ultralytics = "ultralytics"
208
- robotoff = "robotoff"
255
+ class PredictorBackend(enum.StrEnum):
256
+ ultralytics = enum.auto()
257
+ ultralytics_yolo_world = enum.auto()
258
+ ultralytics_sam3 = enum.auto()
259
+
260
+
261
+ YOLO_WORLD_MODELS = (
262
+ "yolov8s-world.pt",
263
+ "yolov8s-worldv2.pt",
264
+ "yolov8m-world.pt",
265
+ "yolov8m-worldv2.pt",
266
+ "yolov8l-world.pt",
267
+ "yolov8l-worldv2.pt",
268
+ "yolov8x-world.pt",
269
+ "yolov8x-worldv2.pt",
270
+ )
209
271
 
210
272
 
211
273
  @app.command()
212
274
  def add_prediction(
213
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
214
275
  project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
276
+ api_key: Annotated[
277
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
278
+ ] = config.label_studio_api_key,
215
279
  view_id: Annotated[
216
- Optional[int],
280
+ int | None,
217
281
  typer.Option(
218
282
  help="Label Studio View ID to filter tasks. If not provided, all tasks in the "
219
283
  "project are processed."
220
284
  ),
221
285
  ] = None,
222
286
  model_name: Annotated[
223
- str,
287
+ str | None,
224
288
  typer.Option(
225
- help="Name of the object detection model to run (for Robotoff server) or "
226
- "of the Ultralytics zero-shot model to run."
289
+ "--model",
290
+ help="Name or path of the object detection model to run. How this is used depends "
291
+ "on the backend. If using `ultralytics` backend, the option is required and is the "
292
+ "name of the model to download from the Ultralytics model zoo or the path to a local "
293
+ "model. "
294
+ "If using `ultralytics_yolo_world` backend, this is optional and is the name of the "
295
+ "`yolo-world` model to download from the Ultralytics model zoo or the path to a local "
296
+ "model (Defaults: `yolov8x-worldv2.pt`). "
297
+ "If using `ultralytics_sam3` backend, this option is ignored, as there is a single model. "
298
+ "The model is downloaded automatically from Hugging Face.",
227
299
  ),
228
- ] = "yolov8x-worldv2.pt",
229
- server_url: Annotated[
230
- Optional[str],
231
- typer.Option(help="The Robotoff URL if the backend is robotoff"),
232
- ] = "https://robotoff.openfoodfacts.org",
300
+ ] = None,
301
+ skip_existing: Annotated[
302
+ bool,
303
+ typer.Option(
304
+ help="Skip tasks that already have predictions",
305
+ ),
306
+ ] = True,
233
307
  backend: Annotated[
234
308
  PredictorBackend,
235
309
  typer.Option(
236
- help="Prediction backend: either use Ultralytics to perform "
237
- "the prediction or Robotoff server."
310
+ help="The prediction backend, possible options are: `ultralytics` or `ultralytics_sam3`"
238
311
  ),
239
312
  ] = PredictorBackend.ultralytics,
240
313
  labels: Annotated[
241
- Optional[list[str]],
314
+ list[str] | None,
242
315
  typer.Option(
243
316
  help="List of class labels to use for Yolo model. If you're using Yolo-World or other "
244
317
  "zero-shot models, this is the list of label names that are going to be provided to the "
@@ -247,15 +320,19 @@ def add_prediction(
247
320
  ),
248
321
  ] = None,
249
322
  label_mapping: Annotated[
250
- Optional[str],
251
- typer.Option(help="Mapping of model labels to class names, as a JSON string"),
323
+ str | None,
324
+ typer.Option(
325
+ help='Mapping of model labels to class names, as a JSON string. Example: \'{"price tag": "price-tag"}\''
326
+ ),
252
327
  ] = None,
253
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
328
+ label_studio_url: Annotated[
329
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
330
+ ] = config.label_studio_url,
254
331
  threshold: Annotated[
255
- Optional[float],
332
+ float | None,
256
333
  typer.Option(
257
- help="Confidence threshold for selecting bounding boxes. The default is 0.3 "
258
- "for robotoff backend and 0.1 for ultralytics backend."
334
+ help="Confidence threshold for selecting bounding boxes. The default is 0.1 for "
335
+ "ultralytics backend and 0.25 for ultralytics_sam3 backend."
259
336
  ),
260
337
  ] = None,
261
338
  max_det: Annotated[int, typer.Option(help="Maximum numbers of detections")] = 300,
@@ -270,22 +347,31 @@ def add_prediction(
270
347
  typer.Option(help="Raise an error if image download fails"),
271
348
  ] = True,
272
349
  model_version: Annotated[
273
- Optional[str],
274
- typer.Option(help="Model version to use for the prediction"),
350
+ str | None,
351
+ typer.Option(
352
+ help="Set the model version field of the prediction sent to Label Studio. "
353
+ "This is used to track which model generated the prediction."
354
+ ),
355
+ ] = None,
356
+ imgsz: Annotated[
357
+ int | None,
358
+ typer.Option(
359
+ help="Image size to use for Ultralytics models. If not provided, "
360
+ "the default size of the model is used."
361
+ ),
275
362
  ] = None,
276
363
  ):
277
- """Add predictions as pre-annotations to Label Studio tasks,
278
- for an object detection model running on Triton Inference Server."""
364
+ """Add predictions as pre-annotations to Label Studio tasks."""
279
365
 
280
366
  import tqdm
367
+ from huggingface_hub import hf_hub_download
281
368
  from label_studio_sdk.client import LabelStudio
282
- from openfoodfacts.utils import get_image_from_url, http_session
369
+ from openfoodfacts.utils import get_image_from_url
283
370
  from PIL import Image
284
371
 
285
- from ..annotate import (
286
- format_annotation_results_from_robotoff,
287
- format_annotation_results_from_ultralytics,
288
- )
372
+ from ..annotate import format_annotation_results_from_ultralytics
373
+
374
+ check_label_studio_api_key(api_key)
289
375
 
290
376
  label_mapping_dict = None
291
377
  if label_mapping:
@@ -294,6 +380,13 @@ def add_prediction(
294
380
  if dry_run:
295
381
  logger.info("** Dry run mode enabled **")
296
382
 
383
+ if backend == PredictorBackend.ultralytics and not Path(model_name).is_file():
384
+ raise typer.BadParameter(
385
+ f"Model file '{model_name}' not found. When the backend is `ultralytics` "
386
+ "and the --model does not refer to a YOLO-World model, --model is expected "
387
+ "to be a local Ultralytics model file (`.pt`)."
388
+ )
389
+
297
390
  logger.info(
298
391
  "backend: %s, model_name: %s, labels: %s, threshold: %s, label mapping: %s",
299
392
  backend,
@@ -304,66 +397,83 @@ def add_prediction(
304
397
  )
305
398
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
306
399
 
307
- if backend == PredictorBackend.ultralytics:
308
- from ultralytics import YOLO
400
+ if backend in (
401
+ PredictorBackend.ultralytics,
402
+ PredictorBackend.ultralytics_yolo_world,
403
+ ):
404
+ from ultralytics import YOLO, YOLOWorld
309
405
 
310
406
  if labels is None:
311
- raise typer.BadParameter("Labels are required for Ultralytics backend")
407
+ raise typer.BadParameter("Labels are required for `ultralytics` backend")
312
408
 
313
409
  if threshold is None:
314
410
  threshold = 0.1
315
411
 
316
- model = YOLO(model_name)
317
- if hasattr(model, "set_classes"):
412
+ if backend == PredictorBackend.ultralytics:
413
+ model = YOLO(model_name)
414
+ elif backend == PredictorBackend.ultralytics_yolo_world:
415
+ if model_name is None:
416
+ model_name = "yolov8x-worldv2.pt"
417
+ model = YOLOWorld(model_name)
318
418
  model.set_classes(labels)
319
- else:
320
- logger.warning("The model does not support setting classes directly.")
321
- elif backend == PredictorBackend.robotoff:
322
- if server_url is None:
323
- raise typer.BadParameter("--server-url is required for Robotoff backend")
419
+
420
+ elif backend == PredictorBackend.ultralytics_sam3:
421
+ from ultralytics.models.sam import SAM3SemanticPredictor
324
422
 
325
423
  if threshold is None:
326
- threshold = 0.1
327
- server_url = server_url.rstrip("/")
424
+ threshold = 0.25
425
+
426
+ # SAM3 cannot be downloaded directly using to to a gated access. Use a
427
+ # proxy repo.
428
+ model_path = hf_hub_download(
429
+ "1038lab/sam3",
430
+ filename="sam3.pt",
431
+ revision="f055b060a4de0a040891ba2ebac9c5cb3c1c0132",
432
+ )
433
+ overrides = dict(
434
+ task="segment",
435
+ mode="predict",
436
+ model=model_path,
437
+ save=False,
438
+ )
439
+
440
+ if imgsz is not None:
441
+ overrides["imgsz"] = imgsz
442
+ model = SAM3SemanticPredictor(overrides=overrides)
328
443
  else:
329
444
  raise typer.BadParameter(f"Unsupported backend: {backend}")
330
445
 
331
446
  for task in tqdm.tqdm(
332
447
  ls.tasks.list(project=project_id, view=view_id), desc="tasks"
333
448
  ):
334
- if task.total_predictions == 0:
449
+ if not (skip_existing and task.total_predictions > 0):
335
450
  image_url = task.data["image_url"]
336
- image = typing.cast(
337
- Image.Image,
338
- get_image_from_url(image_url, error_raise=error_raise),
339
- )
340
- if backend == PredictorBackend.ultralytics:
341
- results = model.predict(
342
- image,
343
- conf=threshold,
344
- max_det=max_det,
345
- )[0]
451
+ image = get_image_from_url(image_url, error_raise=error_raise)
452
+ if image is None:
453
+ continue
454
+ min_score = None
455
+ if backend in (
456
+ PredictorBackend.ultralytics,
457
+ PredictorBackend.ultralytics_yolo_world,
458
+ ):
459
+ predict_kwargs = {
460
+ "conf": threshold,
461
+ "max_det": max_det,
462
+ }
463
+ if imgsz is not None:
464
+ predict_kwargs["imgsz"] = imgsz
465
+ results = model.predict(image, **predict_kwargs)[0]
346
466
  labels = typing.cast(list[str], labels)
347
467
  label_studio_result = format_annotation_results_from_ultralytics(
348
468
  results, labels, label_mapping_dict
349
469
  )
350
- elif backend == PredictorBackend.robotoff:
351
- r = http_session.get(
352
- f"{server_url}/api/v1/images/predict",
353
- params={
354
- "models": model_name,
355
- "output_image": 0,
356
- "image_url": image_url,
357
- },
358
- )
359
- r.raise_for_status()
360
- response = r.json()
361
- label_studio_result = format_annotation_results_from_robotoff(
362
- response["predictions"][model_name],
363
- image.width,
364
- image.height,
365
- label_mapping_dict,
470
+ elif backend == PredictorBackend.ultralytics_sam3:
471
+ model.set_image(image)
472
+ results = model(text=labels)[0]
473
+ label_studio_result = format_annotation_results_from_ultralytics(
474
+ results, labels, label_mapping_dict
366
475
  )
476
+ min_score = min(results.boxes.conf.tolist(), default=None)
367
477
  if dry_run:
368
478
  logger.info("image_url: %s", image_url)
369
479
  logger.info("result: %s", label_studio_result)
@@ -372,7 +482,9 @@ def add_prediction(
372
482
  task=task.id,
373
483
  result=label_studio_result,
374
484
  model_version=model_version,
485
+ score=min_score,
375
486
  )
487
+ logger.info("Prediction added for task: %s", task.id)
376
488
 
377
489
 
378
490
  @app.command()
@@ -449,28 +561,61 @@ def create_config_file(
449
561
  @app.command()
450
562
  def check_dataset(
451
563
  project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
564
+ view_id: Annotated[int, typer.Option(help="Label Studio View ID, if any.")] = None,
452
565
  api_key: Annotated[
453
- Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")
454
- ] = None,
455
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
566
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
567
+ ] = config.label_studio_api_key,
568
+ label_studio_url: Annotated[
569
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
570
+ ] = config.label_studio_url,
571
+ delete_missing_images: Annotated[
572
+ bool,
573
+ typer.Option(help="Delete tasks with missing images from the dataset"),
574
+ ] = False,
575
+ delete_duplicate_images: Annotated[
576
+ bool, typer.Option(help="Delete duplicate images from the dataset")
577
+ ] = False,
456
578
  ):
457
- """Check a dataset for duplicate images on Label Studio."""
579
+ """Perform sanity checks of a Label Studio dataset.
580
+
581
+ This function checks for:
582
+ - Tasks with missing images (404)
583
+ - Duplicate images based on perceptual hash (pHash)
584
+ - Tasks with multiple annotations
585
+
586
+ This function doesn't perform any modifications to the dataset, except
587
+ optionally deleting tasks with missing images if --delete-missing-images
588
+ is provided and tasks with duplicate images if --delete-duplicate-images
589
+ is provided.
590
+ """
458
591
  from label_studio_sdk.client import LabelStudio
459
592
 
460
593
  from ..check import check_ls_dataset
461
594
 
595
+ check_label_studio_api_key(api_key)
462
596
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
463
- check_ls_dataset(ls, project_id)
597
+ check_ls_dataset(
598
+ ls=ls,
599
+ project_id=project_id,
600
+ view_id=view_id,
601
+ delete_missing_images=delete_missing_images,
602
+ delete_duplicate_images=delete_duplicate_images,
603
+ )
464
604
 
465
605
 
466
606
  @app.command()
467
607
  def list_users(
468
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
469
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
608
+ api_key: Annotated[
609
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
610
+ ] = config.label_studio_api_key,
611
+ label_studio_url: Annotated[
612
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
613
+ ] = config.label_studio_url,
470
614
  ):
471
615
  """List all users in Label Studio."""
472
616
  from label_studio_sdk.client import LabelStudio
473
617
 
618
+ check_label_studio_api_key(api_key)
474
619
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
475
620
 
476
621
  for user in ls.users.list():
@@ -480,11 +625,57 @@ def list_users(
480
625
  @app.command()
481
626
  def delete_user(
482
627
  user_id: int,
483
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
484
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
628
+ api_key: Annotated[
629
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
630
+ ] = config.label_studio_api_key,
631
+ label_studio_url: Annotated[
632
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
633
+ ] = config.label_studio_url,
485
634
  ):
486
635
  """Delete a user from Label Studio."""
487
636
  from label_studio_sdk.client import LabelStudio
488
637
 
638
+ check_label_studio_api_key(api_key)
489
639
  ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
490
640
  ls.users.delete(user_id)
641
+
642
+
643
+ @app.command()
644
+ def dump_dataset(
645
+ project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
646
+ output_file: Annotated[
647
+ Path, typer.Option(help="Path of the output file", writable=True)
648
+ ],
649
+ api_key: Annotated[
650
+ str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
651
+ ] = config.label_studio_api_key,
652
+ view_id: Annotated[
653
+ int | None,
654
+ typer.Option(
655
+ help="ID of the Label Studio view, if any. This option is useful "
656
+ "to filter the tasks to dump."
657
+ ),
658
+ ] = None,
659
+ label_studio_url: Annotated[
660
+ str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
661
+ ] = config.label_studio_url,
662
+ ):
663
+ """Dump all the tasks of a dataset in a JSONL file.
664
+
665
+ All fields of the tasks are exported. A subset of the tasks can be
666
+ selected by filtering tasks based on a view (=tab) using the `--view-id`
667
+ option.
668
+ """
669
+ import orjson
670
+ import tqdm
671
+ from label_studio_sdk.client import LabelStudio
672
+
673
+ check_label_studio_api_key(api_key)
674
+ ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
675
+
676
+ with output_file.open("wb") as f:
677
+ for task in tqdm.tqdm(
678
+ ls.tasks.list(project=project_id, view=view_id), desc="tasks"
679
+ ):
680
+ content = orjson.dumps(task.dict())
681
+ f.write(content + b"\n")
@@ -0,0 +1,2 @@
1
+ LABEL_STUDIO_API_KEY = """API Key to authenticate to the Label Studio server. Can also be set with the LABELR_LABEL_STUDIO_API_KEY environment variable."""
2
+ LABEL_STUDIO_URL = """URL of the Label Studio server. Can also be set with the LABELR_LABEL_STUDIO_URL environment variable."""