PyPI - labelr - Versions diffs - 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

labelr 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

labelr/apps/datasets.py +140 -9
labelr/apps/directus.py +212 -0
labelr/apps/google_batch.py +38 -0
labelr/apps/label_studio.py +260 -63
labelr/apps/typer_description.py +2 -0
labelr/check.py +68 -7
labelr/config.py +57 -1
labelr/export/object_detection.py +96 -18
labelr/main.py +16 -0
labelr/sample/object_detection.py +42 -13
labelr-0.11.0.dist-info/METADATA +230 -0
{labelr-0.10.0.dist-info → labelr-0.11.0.dist-info}/RECORD +16 -14
{labelr-0.10.0.dist-info → labelr-0.11.0.dist-info}/WHEEL +1 -1
labelr-0.10.0.dist-info/METADATA +0 -158
{labelr-0.10.0.dist-info → labelr-0.11.0.dist-info}/entry_points.txt +0 -0
{labelr-0.10.0.dist-info → labelr-0.11.0.dist-info}/licenses/LICENSE +0 -0
{labelr-0.10.0.dist-info → labelr-0.11.0.dist-info}/top_level.txt +0 -0

labelr/apps/label_studio.py CHANGED Viewed

@@ -7,25 +7,39 @@ from typing import Annotated, Optional
 import typer
 from openfoodfacts.utils import get_logger
-from ..config import LABEL_STUDIO_DEFAULT_URL
+from . import typer_description
+from ..config import config
 app = typer.Typer()
 logger = get_logger(__name__)
+def check_label_studio_api_key(api_key: str | None):
+    if not api_key:
+        raise typer.BadParameter(
+            "Label Studio API key not provided. Please provide it with the "
+            "--api-key option or set the LABELR_LABEL_STUDIO_API_KEY environment variable."
+        )
 @app.command()
 def create(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     title: Annotated[str, typer.Option(help="Project title")],
     config_file: Annotated[
         Path, typer.Option(help="Path to label config file", file_okay=True)
     ],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     """Create a new Label Studio project."""
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     label_config = config_file.read_text()
@@ -35,7 +49,6 @@ def create(
 @app.command()
 def import_data(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
     dataset_path: Annotated[
         Path,
@@ -43,8 +56,15 @@ def import_data(
             help="Path to the Label Studio dataset JSONL file", file_okay=True
         ),
     ],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-    batch_size: int = 25,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
+    batch_size: Annotated[
+        int, typer.Option(help="Number of tasks to import as a single batch")
+    ] = 25,
 ):
     """Import tasks from a dataset file to a Label Studio project.
@@ -56,6 +76,7 @@ def import_data(
     import tqdm
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     with dataset_path.open("rt") as f:
@@ -67,12 +88,17 @@ def import_data(
 @app.command()
 def update_prediction(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     for task in ls.tasks.list(project=project_id, fields="all"):
@@ -91,16 +117,7 @@ def add_split(
     train_split: Annotated[
         float, typer.Option(help="fraction of samples to add in train split")
     ],
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    split_name: Annotated[
-        Optional[str],
-        typer.Option(
-            help="name of the split associated "
-            "with the task ID file. If --task-id-file is not provided, "
-            "this field is ignored."
-        ),
-    ] = None,
     train_split_name: Annotated[
         str,
         typer.Option(help="name of the train split"),
@@ -110,13 +127,33 @@ def add_split(
         typer.Option(help="name of the validation split"),
     ] = "val",
     task_id_file: Annotated[
-        Optional[Path],
+        Path | None,
         typer.Option(help="path of a text file containing IDs of samples"),
     ] = None,
+    split_name: Annotated[
+        str | None,
+        typer.Option(
+            help="name of the split associated "
+            "with the task ID file. If --task-id-file is not provided, "
+            "this field is ignored."
+        ),
+    ] = None,
     overwrite: Annotated[
         bool, typer.Option(help="overwrite existing split field")
     ] = False,
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    view_id: Annotated[
+        int | None,
+        typer.Option(
+            help="ID of the Label Studio view, if any. This option is useful "
+            "to filter the task to process."
+        ),
+    ] = None,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     """Update the split field of tasks in a Label Studio project.
@@ -124,7 +161,7 @@ def add_split(
     If `--task-id-file` is provided, it should contain a list of task IDs,
     one per line. The split field of these tasks will be updated to the value
-    of `--split-name`.
+    of `--split-name`. The `--train-split` value is ignored in this case.
     If `--task-id-file` is not provided, the split field of all tasks in the
     project will be updated based on the `train_split` probability.
@@ -133,12 +170,16 @@ def add_split(
     In both cases, tasks with a non-null split field are not updated unless
     the `--overwrite` flag is provided.
+    The `--view-id` option can be used to only assign the split on a subset
+    of the tasks.
     """
     import random
     from label_studio_sdk import Task
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     task_ids = None
@@ -149,14 +190,17 @@ def add_split(
             )
         task_ids = task_id_file.read_text().strip().split("\n")
-    for task in ls.tasks.list(project=project_id, fields="all"):
+    for task in ls.tasks.list(project=project_id, fields="all", view=view_id):
         task: Task
         task_id = task.id
         split = task.data.get("split")
         if split is None or overwrite:
-            if task_ids and str(task_id) in task_ids:
-                split = split_name
+            if task_ids:
+                if str(task_id) in task_ids:
+                    split = split_name
+                else:
+                    continue
             else:
                 split = (
                     train_split_name
@@ -170,12 +214,16 @@ def add_split(
 @app.command()
 def annotate_from_prediction(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
     updated_by: Annotated[
         Optional[int], typer.Option(help="User ID to declare as annotator")
     ] = None,
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     """Create annotations for all tasks from predictions.
@@ -186,6 +234,7 @@ def annotate_from_prediction(
     from label_studio_sdk.client import LabelStudio
     from label_studio_sdk.types.task import Task
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     task: Task
@@ -203,42 +252,55 @@ def annotate_from_prediction(
             )
-class PredictorBackend(enum.Enum):
-    ultralytics = "ultralytics"
-    robotoff = "robotoff"
+class PredictorBackend(enum.StrEnum):
+    ultralytics = enum.auto()
+    ultralytics_sam3 = enum.auto()
+    robotoff = enum.auto()
 @app.command()
 def add_prediction(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
     view_id: Annotated[
-        Optional[int],
+        int | None,
         typer.Option(
             help="Label Studio View ID to filter tasks. If not provided, all tasks in the "
             "project are processed."
         ),
     ] = None,
     model_name: Annotated[
-        str,
+        str | None,
         typer.Option(
             help="Name of the object detection model to run (for Robotoff server) or "
-            "of the Ultralytics zero-shot model to run."
+            "of the Ultralytics zero-shot model to run. If using Ultralytics backend "
+            "and no model name is provided, the default is yolov8x-worldv2.pt. "
+            "If using ultralytics_sam3 backend, the model name is ignored."
+        ),
+    ] = None,
+    skip_existing: Annotated[
+        bool,
+        typer.Option(
+            help="Skip tasks that already have predictions",
         ),
-    ] = "yolov8x-worldv2.pt",
+    ] = True,
     server_url: Annotated[
-        Optional[str],
-        typer.Option(help="The Robotoff URL if the backend is robotoff"),
+        str | None,
+        typer.Option(
+            help="The Robotoff URL if the backend is robotoff. If the backend is "
+            "different than robotoff, this option is ignored."
+        ),
     ] = "https://robotoff.openfoodfacts.org",
     backend: Annotated[
         PredictorBackend,
         typer.Option(
-            help="Prediction backend: either use Ultralytics to perform "
-            "the prediction or Robotoff server."
+            help="The prediction backend, possible options are: `ultralytics`, `ultralytics_sam3` and `robotoff`"
         ),
     ] = PredictorBackend.ultralytics,
     labels: Annotated[
-        Optional[list[str]],
+        list[str] | None,
         typer.Option(
             help="List of class labels to use for Yolo model. If you're using Yolo-World or other "
             "zero-shot models, this is the list of label names that are going to be provided to the "
@@ -247,15 +309,20 @@ def add_prediction(
         ),
     ] = None,
     label_mapping: Annotated[
-        Optional[str],
-        typer.Option(help="Mapping of model labels to class names, as a JSON string"),
+        str | None,
+        typer.Option(
+            help='Mapping of model labels to class names, as a JSON string. Example: \'{"price tag": "price-tag"}\''
+        ),
     ] = None,
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
     threshold: Annotated[
-        Optional[float],
+        float | None,
         typer.Option(
             help="Confidence threshold for selecting bounding boxes. The default is 0.3 "
-            "for robotoff backend and 0.1 for ultralytics backend."
+            "for robotoff backend, 0.1 for ultralytics backend and 0.25 for "
+            "ultralytics_sam3 backend."
         ),
     ] = None,
     max_det: Annotated[int, typer.Option(help="Maximum numbers of detections")] = 300,
@@ -270,14 +337,24 @@ def add_prediction(
         typer.Option(help="Raise an error if image download fails"),
     ] = True,
     model_version: Annotated[
-        Optional[str],
-        typer.Option(help="Model version to use for the prediction"),
+        str | None,
+        typer.Option(
+            help="Set the model version field of the prediction sent to Label Studio. "
+            "This is used to track which model generated the prediction."
+        ),
+    ] = None,
+    imgsz: Annotated[
+        int | None,
+        typer.Option(
+            help="Image size to use for Ultralytics models. If not provided, "
+            "the default size of the model is used."
+        ),
     ] = None,
 ):
-    """Add predictions as pre-annotations to Label Studio tasks,
-    for an object detection model running on Triton Inference Server."""
+    """Add predictions as pre-annotations to Label Studio tasks."""
     import tqdm
+    from huggingface_hub import hf_hub_download
     from label_studio_sdk.client import LabelStudio
     from openfoodfacts.utils import get_image_from_url, http_session
     from PIL import Image
@@ -287,6 +364,8 @@ def add_prediction(
         format_annotation_results_from_ultralytics,
     )
+    check_label_studio_api_key(api_key)
     label_mapping_dict = None
     if label_mapping:
         label_mapping_dict = json.loads(label_mapping)
@@ -305,7 +384,10 @@ def add_prediction(
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     if backend == PredictorBackend.ultralytics:
-        from ultralytics import YOLO
+        from ultralytics import YOLO, YOLOWorld
+        if model_name is None:
+            model_name = "yolov8x-worldv2.pt"
         if labels is None:
             raise typer.BadParameter("Labels are required for Ultralytics backend")
@@ -315,9 +397,33 @@ def add_prediction(
         model = YOLO(model_name)
         if hasattr(model, "set_classes"):
+            model = typing.cast(YOLOWorld, model)
             model.set_classes(labels)
         else:
             logger.warning("The model does not support setting classes directly.")
+    elif backend == PredictorBackend.ultralytics_sam3:
+        from ultralytics.models.sam import SAM3SemanticPredictor
+        if threshold is None:
+            threshold = 0.25
+        # SAM3 cannot be downloaded directly using to to a gated access. Use a
+        # proxy repo.
+        model_path = hf_hub_download(
+            "1038lab/sam3",
+            filename="sam3.pt",
+            revision="f055b060a4de0a040891ba2ebac9c5cb3c1c0132",
+        )
+        overrides = dict(
+            task="segment",
+            mode="predict",
+            model=model_path,
+            save=False,
+        )
+        if imgsz is not None:
+            overrides["imgsz"] = imgsz
+        model = SAM3SemanticPredictor(overrides=overrides)
     elif backend == PredictorBackend.robotoff:
         if server_url is None:
             raise typer.BadParameter("--server-url is required for Robotoff backend")
@@ -331,22 +437,32 @@ def add_prediction(
     for task in tqdm.tqdm(
         ls.tasks.list(project=project_id, view=view_id), desc="tasks"
     ):
-        if task.total_predictions == 0:
+        if not (skip_existing and task.total_predictions > 0):
             image_url = task.data["image_url"]
             image = typing.cast(
                 Image.Image,
                 get_image_from_url(image_url, error_raise=error_raise),
             )
+            min_score = None
             if backend == PredictorBackend.ultralytics:
-                results = model.predict(
-                    image,
-                    conf=threshold,
-                    max_det=max_det,
-                )[0]
+                predict_kwargs = {
+                    "conf": threshold,
+                    "max_det": max_det,
+                }
+                if imgsz is not None:
+                    predict_kwargs["imgsz"] = imgsz
+                results = model.predict(image, **predict_kwargs)[0]
                 labels = typing.cast(list[str], labels)
                 label_studio_result = format_annotation_results_from_ultralytics(
                     results, labels, label_mapping_dict
                 )
+            elif backend == PredictorBackend.ultralytics_sam3:
+                model.set_image(image)
+                results = model(text=labels)[0]
+                label_studio_result = format_annotation_results_from_ultralytics(
+                    results, labels, label_mapping_dict
+                )
+                min_score = min(results.boxes.conf.tolist(), default=None)
             elif backend == PredictorBackend.robotoff:
                 r = http_session.get(
                     f"{server_url}/api/v1/images/predict",
@@ -372,7 +488,9 @@ def add_prediction(
                     task=task.id,
                     result=label_studio_result,
                     model_version=model_version,
+                    score=min_score,
                 )
+                logger.info("Prediction added for task: %s", task.id)
 @app.command()
@@ -449,28 +567,61 @@ def create_config_file(
 @app.command()
 def check_dataset(
     project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    view_id: Annotated[int, typer.Option(help="Label Studio View ID, if any.")] = None,
     api_key: Annotated[
-        Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")
-    ] = None,
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
+    delete_missing_images: Annotated[
+        bool,
+        typer.Option(help="Delete tasks with missing images from the dataset"),
+    ] = False,
+    delete_duplicate_images: Annotated[
+        bool, typer.Option(help="Delete duplicate images from the dataset")
+    ] = False,
 ):
-    """Check a dataset for duplicate images on Label Studio."""
+    """Perform sanity checks of a Label Studio dataset.
+    This function checks for:
+    - Tasks with missing images (404)
+    - Duplicate images based on perceptual hash (pHash)
+    - Tasks with multiple annotations
+    This function doesn't perform any modifications to the dataset, except
+    optionally deleting tasks with missing images if --delete-missing-images
+    is provided and tasks with duplicate images if --delete-duplicate-images
+    is provided.
+    """
     from label_studio_sdk.client import LabelStudio
     from ..check import check_ls_dataset
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    check_ls_dataset(ls, project_id)
+    check_ls_dataset(
+        ls=ls,
+        project_id=project_id,
+        view_id=view_id,
+        delete_missing_images=delete_missing_images,
+        delete_duplicate_images=delete_duplicate_images,
+    )
 @app.command()
 def list_users(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     """List all users in Label Studio."""
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     for user in ls.users.list():
@@ -480,11 +631,57 @@ def list_users(
 @app.command()
 def delete_user(
     user_id: int,
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
 ):
     """Delete a user from Label Studio."""
     from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
     ls.users.delete(user_id)
+@app.command()
+def dump_dataset(
+    project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    output_file: Annotated[
+        Path, typer.Option(help="Path of the output file", writable=True)
+    ],
+    api_key: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_API_KEY)
+    ] = config.label_studio_api_key,
+    view_id: Annotated[
+        int | None,
+        typer.Option(
+            help="ID of the Label Studio view, if any. This option is useful "
+            "to filter the tasks to dump."
+        ),
+    ] = None,
+    label_studio_url: Annotated[
+        str, typer.Option(help=typer_description.LABEL_STUDIO_URL)
+    ] = config.label_studio_url,
+):
+    """Dump all the tasks of a dataset in a JSONL file.
+    All fields of the tasks are exported. A subset of the tasks can be
+    selected by filtering tasks based on a view (=tab) using the `--view-id`
+    option.
+    """
+    import orjson
+    import tqdm
+    from label_studio_sdk.client import LabelStudio
+    check_label_studio_api_key(api_key)
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    with output_file.open("wb") as f:
+        for task in tqdm.tqdm(
+            ls.tasks.list(project=project_id, view=view_id), desc="tasks"
+        ):
+            content = orjson.dumps(task.dict())
+            f.write(content + b"\n")

labelr/apps/typer_description.py ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ LABEL_STUDIO_API_KEY = """API Key to authenticate to the Label Studio server. Can also be set with the LABELR_LABEL_STUDIO_API_KEY environment variable."""
2	+ LABEL_STUDIO_URL = """URL of the Label Studio server. Can also be set with the LABELR_LABEL_STUDIO_URL environment variable."""

labelr 0.10.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

labelr 0.10.0py3-none-any.whl → 0.11.0py3-none-any.whl