PyPI - labelr - Versions diffs - 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl - Mend

labelr 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

labelr/annotate.py +16 -15
labelr/apps/datasets.py +3 -1
labelr/apps/projects.py +115 -34
labelr/export.py +5 -7
labelr/main.py +29 -225
labelr/project_config.py +45 -0
labelr/sample.py +39 -5
{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/METADATA +24 -44
labelr-0.3.0.dist-info/RECORD +20 -0
{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/WHEEL +1 -1
labelr/triton/object_detection.py +0 -241
labelr-0.1.0.dist-info/RECORD +0 -20
{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/entry_points.txt +0 -0
{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info/licenses}/LICENSE +0 -0
{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/top_level.txt +0 -0

labelr/annotate.py CHANGED Viewed

@@ -1,29 +1,30 @@
 import random
 import string
+from openfoodfacts.types import JSONType
 from openfoodfacts.utils import get_logger
-try:
-    from ultralytics.engine.results import Results
-except ImportError:
-    pass
-from labelr.triton.object_detection import ObjectDetectionResult
 logger = get_logger(__name__)
-def format_annotation_results_from_triton(
-    objects: list[ObjectDetectionResult], image_width: int, image_height: int
-):
-    """Format annotation results from a Triton object detection model into
+def format_annotation_results_from_robotoff(
+    objects: list[JSONType],
+    image_width: int,
+    image_height: int,
+    label_mapping: dict[str, str] | None = None,
+) -> list[JSONType]:
+    """Format annotation results from Robotoff prediction endpoint into
     Label Studio format."""
     annotation_results = []
     for object_ in objects:
-        bbox = object_.bounding_box
-        category_name = object_.label
+        bounding_box = object_["bounding_box"]
+        label_name = object_["label"]
+        if label_mapping:
+            label_name = label_mapping.get(label_name, label_name)
         # These are relative coordinates (between 0.0 and 1.0)
-        y_min, x_min, y_max, x_max = bbox
+        y_min, x_min, y_max, x_max = bounding_box
         # Make sure the coordinates are within the image boundaries,
         # and convert them to percentages
         y_min = min(max(0, y_min), 1.0) * 100
@@ -51,7 +52,7 @@ def format_annotation_results_from_triton(
                     "y": y,
                     "width": width,
                     "height": height,
-                    "rectanglelabels": [category_name],
+                    "rectanglelabels": [label_name],
                 },
             },
         )

labelr/apps/datasets.py CHANGED Viewed

@@ -132,7 +132,9 @@ def export(
     api_key: Annotated[Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     repo_id: Annotated[
         Optional[str],
-        typer.Option(help="Hugging Face Datasets repository ID to convert"),
+        typer.Option(
+            help="Hugging Face Datasets repository ID to convert (only if --from or --to is `hf`)"
+        ),
     ] = None,
     label_names: Annotated[
         Optional[str],

labelr/apps/projects.py CHANGED Viewed

@@ -9,7 +9,7 @@ from openfoodfacts.utils import get_logger
 from PIL import Image
 from ..annotate import (
-    format_annotation_results_from_triton,
+    format_annotation_results_from_robotoff,
     format_annotation_results_from_ultralytics,
 )
 from ..config import LABEL_STUDIO_DEFAULT_URL
@@ -90,16 +90,48 @@ def add_split(
     train_split: Annotated[
         float, typer.Option(help="fraction of samples to add in train split")
     ],
+    split_name: Annotated[
+        Optional[str],
+        typer.Option(
+            help="name of the split associated "
+            "with the task ID file. If --task-id-file is not provided, "
+            "this field is ignored."
+        ),
+    ],
     api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
+    train_split_name: Annotated[
+        str,
+        typer.Option(help="name of the train split"),
+    ] = "train",
+    val_split_name: Annotated[
+        str,
+        typer.Option(help="name of the validation split"),
+    ] = "val",
+    task_id_file: Annotated[
+        Optional[Path],
+        typer.Option(help="path of a text file containing IDs of samples"),
+    ] = None,
+    overwrite: Annotated[
+        bool, typer.Option(help="overwrite existing split field")
+    ] = False,
     label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
 ):
     """Update the split field of tasks in a Label Studio project.
+    The behavior of this command depends on the `--task-id-file` option.
+    If `--task-id-file` is provided, it should contain a list of task IDs,
+    one per line. The split field of these tasks will be updated to the value
+    of `--split-name`.
+    If `--task-id-file` is not provided, the split field of all tasks in the
+    project will be updated based on the `train_split` probability.
     The split field is set to "train" with probability `train_split`, and "val"
-    otherwise. Tasks without a split field are assigned a split based on the
-    probability, and updated in the server. Tasks with a non-null split field
-    are not updated.
+    otherwise.
+    In both cases, tasks with a non-null split field are not updated unless
+    the `--overwrite` flag is provided.
     """
     import random
@@ -108,11 +140,29 @@ def add_split(
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    task_ids = None
+    if task_id_file is not None:
+        if split_name is None or split_name not in (train_split_name, val_split_name):
+            raise typer.BadParameter(
+                "--split-name is required when using --task-id-file"
+            )
+        task_ids = task_id_file.read_text().strip().split("\n")
     for task in ls.tasks.list(project=project_id, fields="all"):
         task: Task
+        task_id = task.id
         split = task.data.get("split")
-        if split is None:
-            split = "train" if random.random() < train_split else "val"
+        if split is None or overwrite:
+            if task_ids and str(task_id) in task_ids:
+                split = split_name
+            else:
+                split = (
+                    train_split_name
+                    if random.random() < train_split
+                    else val_split_name
+                )
             logger.info("Updating task: %s, split: %s", task.id, split)
             ls.tasks.update(task.id, data={**task.data, "split": split})
@@ -153,30 +203,37 @@ def annotate_from_prediction(
 class PredictorBackend(enum.Enum):
-    triton = "triton"
     ultralytics = "ultralytics"
+    robotoff = "robotoff"
 @app.command()
 def add_prediction(
     api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
     project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    view_id: Annotated[
+        Optional[int],
+        typer.Option(
+            help="Label Studio View ID to filter tasks. If not provided, all tasks in the "
+            "project are processed."
+        ),
+    ] = None,
     model_name: Annotated[
         str,
         typer.Option(
-            help="Name of the object detection model to run (for Triton server) or "
+            help="Name of the object detection model to run (for Robotoff server) or "
             "of the Ultralytics zero-shot model to run."
         ),
     ] = "yolov8x-worldv2.pt",
-    triton_uri: Annotated[
+    server_url: Annotated[
         Optional[str],
-        typer.Option(help="URI (host+port) of the Triton Inference Server"),
-    ] = None,
+        typer.Option(help="The Robotoff URL if the backend is robotoff"),
+    ] = "https://robotoff.openfoodfacts.org",
     backend: Annotated[
         PredictorBackend,
         typer.Option(
-            help="Prediction backend: either use a Triton server to perform "
-            "the prediction or uses Ultralytics."
+            help="Prediction backend: either use Ultralytics to perform "
+            "the prediction or Robotoff server."
         ),
     ] = PredictorBackend.ultralytics,
     labels: Annotated[
@@ -196,8 +253,8 @@ def add_prediction(
     threshold: Annotated[
         Optional[float],
         typer.Option(
-            help="Confidence threshold for selecting bounding boxes. The default is 0.5 "
-            "for Triton backend and 0.1 for Ultralytics backend."
+            help="Confidence threshold for selecting bounding boxes. The default is 0.3 "
+            "for robotoff backend and 0.1 for ultralytics backend."
         ),
     ] = None,
     max_det: Annotated[int, typer.Option(help="Maximum numbers of detections")] = 300,
@@ -221,9 +278,7 @@ def add_prediction(
     import tqdm
     from label_studio_sdk.client import LabelStudio
-    from openfoodfacts.utils import get_image_from_url
-    from labelr.triton.object_detection import ObjectDetectionModelRegistry
+    from openfoodfacts.utils import get_image_from_url, http_session
     label_mapping_dict = None
     if label_mapping:
@@ -242,8 +297,6 @@ def add_prediction(
     )
     ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    model: ObjectDetectionModelRegistry | "YOLO"
     if backend == PredictorBackend.ultralytics:
         from ultralytics import YOLO
@@ -258,18 +311,19 @@ def add_prediction(
             model.set_classes(labels)
         else:
             logger.warning("The model does not support setting classes directly.")
-    elif backend == PredictorBackend.triton:
-        if triton_uri is None:
-            raise typer.BadParameter("Triton URI is required for Triton backend")
+    elif backend == PredictorBackend.robotoff:
+        if server_url is None:
+            raise typer.BadParameter("--server-url is required for Robotoff backend")
         if threshold is None:
-            threshold = 0.5
-        model = ObjectDetectionModelRegistry.load(model_name)
+            threshold = 0.1
+            server_url = server_url.rstrip("/")
     else:
         raise typer.BadParameter(f"Unsupported backend: {backend}")
-    for task in tqdm.tqdm(ls.tasks.list(project=project_id), desc="tasks"):
+    for task in tqdm.tqdm(
+        ls.tasks.list(project=project_id, view=view_id), desc="tasks"
+    ):
         if task.total_predictions == 0:
             image_url = task.data["image_url"]
             image = typing.cast(
@@ -286,12 +340,22 @@ def add_prediction(
                 label_studio_result = format_annotation_results_from_ultralytics(
                     results, labels, label_mapping_dict
                 )
-            else:
-                output = model.detect_from_image(image, triton_uri=triton_uri)
-                results = output.select(threshold=threshold)
-                logger.info("Adding prediction to task: %s", task.id)
-                label_studio_result = format_annotation_results_from_triton(
-                    results, image.width, image.height
+            elif backend == PredictorBackend.robotoff:
+                r = http_session.get(
+                    f"{server_url}/api/v1/images/predict",
+                    params={
+                        "models": model_name,
+                        "output_image": 0,
+                        "image_url": image_url,
+                    },
+                )
+                r.raise_for_status()
+                response = r.json()
+                label_studio_result = format_annotation_results_from_robotoff(
+                    response["predictions"][model_name],
+                    image.width,
+                    image.height,
+                    label_mapping_dict,
                 )
             if dry_run:
                 logger.info("image_url: %s", image_url)
@@ -339,7 +403,7 @@ def create_dataset_file(
                 extra_meta["barcode"] = barcode
                 off_image_id = Path(extract_source_from_url(url)).stem
                 extra_meta["off_image_id"] = off_image_id
-                image_id = f"{barcode}-{off_image_id}"
+                image_id = f"{barcode}_{off_image_id}"
             image = get_image_from_url(url, error_raise=False)
@@ -351,3 +415,20 @@ def create_dataset_file(
                 image_id, url, image.width, image.height, extra_meta
             )
             f.write(json.dumps(label_studio_sample) + "\n")
+@app.command()
+def create_config_file(
+    output_file: Annotated[
+        Path, typer.Option(help="Path to the output label config file", exists=False)
+    ],
+    labels: Annotated[
+        list[str], typer.Option(help="List of class labels to use for the model")
+    ],
+):
+    """Create a Label Studio label config file for object detection tasks."""
+    from labelr.project_config import create_object_detection_label_config
+    config = create_object_detection_label_config(labels)
+    output_file.write_text(config)
+    logger.info("Label config file created: %s", output_file)

labelr/export.py CHANGED Viewed

@@ -164,16 +164,14 @@ def export_from_ls_to_ultralytics(
         if has_valid_annotation:
             download_output = download_image(
-                image_url, return_bytes=True, error_raise=error_raise
+                image_url, return_struct=True, error_raise=error_raise
             )
             if download_output is None:
                 logger.error("Failed to download image: %s", image_url)
                 continue
-            _, image_bytes = typing.cast(tuple[Image.Image, bytes], download_output)
             with (images_dir / split / f"{image_id}.jpg").open("wb") as f:
-                f.write(image_bytes)
+                f.write(download_output.image_bytes)
     with (output_dir / "data.yaml").open("w") as f:
         f.write("path: data\n")
@@ -215,14 +213,14 @@ def export_from_hf_to_ultralytics(
             if download_images:
                 download_output = download_image(
-                    image_url, return_bytes=True, error_raise=error_raise
+                    image_url, return_struct=True, error_raise=error_raise
                 )
                 if download_output is None:
                     logger.error("Failed to download image: %s", image_url)
                     continue
-                _, image_bytes = download_output
                 with (split_images_dir / f"{image_id}.jpg").open("wb") as f:
-                    f.write(image_bytes)
+                    f.write(download_output.image_bytes)
             else:
                 image = sample["image"]
                 image.save(split_images_dir / f"{image_id}.jpg")

labelr/main.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from typing import Annotated, Optional
+from typing import Annotated
 import typer
 from openfoodfacts.utils import get_logger
@@ -6,7 +6,6 @@ from openfoodfacts.utils import get_logger
 from labelr.apps import datasets as dataset_app
 from labelr.apps import projects as project_app
 from labelr.apps import users as user_app
-from labelr.config import LABEL_STUDIO_DEFAULT_URL
 app = typer.Typer(pretty_exceptions_show_locals=False)
@@ -14,243 +13,48 @@ logger = get_logger()
 @app.command()
-def predict_object(
+def predict(
     model_name: Annotated[
         str, typer.Option(help="Name of the object detection model to run")
     ],
+    label_names: Annotated[list[str], typer.Argument(help="List of label names")],
     image_url: Annotated[str, typer.Option(help="URL of the image to process")],
     triton_uri: Annotated[
         str, typer.Option(help="URI (host+port) of the Triton Inference Server")
     ],
-    threshold: float = 0.5,
-):
-    from openfoodfacts.utils import get_image_from_url
-    from labelr.triton.object_detection import ObjectDetectionModelRegistry
-    model = ObjectDetectionModelRegistry.get(model_name)
-    image = get_image_from_url(image_url)
-    output = model.detect_from_image(image, triton_uri=triton_uri)
-    results = output.select(threshold=threshold)
-    for result in results:
-        typer.echo(result)
-# Temporary scripts
-@app.command()
-def skip_rotated_images(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    updated_by: Annotated[
-        Optional[int], typer.Option(help="User ID to declare as annotator")
-    ] = None,
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-):
-    import requests
-    import tqdm
-    from label_studio_sdk.client import LabelStudio
-    from label_studio_sdk.types.task import Task
-    from openfoodfacts.ocr import OCRResult
-    session = requests.Session()
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    task: Task
-    for task in tqdm.tqdm(
-        ls.tasks.list(project=project_id, fields="all"), desc="tasks"
-    ):
-        if any(annotation["was_cancelled"] for annotation in task.annotations):
-            continue
-        assert task.total_annotations == 1, (
-            "Task has multiple annotations (%s)" % task.id
-        )
-        task_id = task.id
-        annotation = task.annotations[0]
-        annotation_id = annotation["id"]
-        ocr_url = task.data["image_url"].replace(".jpg", ".json")
-        ocr_result = OCRResult.from_url(ocr_url, session=session, error_raise=False)
-        if ocr_result is None:
-            logger.warning("No OCR result for task: %s", task_id)
-            continue
-        orientation_result = ocr_result.get_orientation()
-        if orientation_result is None:
-            # logger.info("No orientation for task: %s", task_id)
-            continue
-        orientation = orientation_result.orientation.name
-        if orientation != "up":
-            logger.info(
-                "Skipping rotated image for task: %s (orientation: %s)",
-                task_id,
-                orientation,
-            )
-            ls.annotations.update(
-                id=annotation_id,
-                was_cancelled=True,
-                updated_by=updated_by,
-            )
-        elif orientation == "up":
-            logger.debug("Keeping annotation for task: %s", task_id)
-@app.command()
-def fix_label(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-):
-    import tqdm
-    from label_studio_sdk.client import LabelStudio
-    from label_studio_sdk.types.task import Task
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    task: Task
-    for task in tqdm.tqdm(
-        ls.tasks.list(project=project_id, fields="all"), desc="tasks"
-    ):
-        for prediction in task.predictions:
-            updated = False
-            if "result" in prediction:
-                for result in prediction["result"]:
-                    value = result["value"]
-                    if "rectanglelabels" in value and value["rectanglelabels"] != [
-                        "price-tag"
-                    ]:
-                        value["rectanglelabels"] = ["price-tag"]
-                        updated = True
-            if updated:
-                print(f"Updating prediction {prediction['id']}, task {task.id}")
-                ls.predictions.update(prediction["id"], result=prediction["result"])
-        for annotation in task.annotations:
-            updated = False
-            if "result" in annotation:
-                for result in annotation["result"]:
-                    value = result["value"]
-                    if "rectanglelabels" in value and value["rectanglelabels"] != [
-                        "price-tag"
-                    ]:
-                        value["rectanglelabels"] = ["price-tag"]
-                        updated = True
-            if updated:
-                print(f"Updating annotation {annotation['id']}, task {task.id}")
-                ls.annotations.update(annotation["id"], result=annotation["result"])
-@app.command()
-def select_price_tag_images(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-):
-    import typing
-    from pathlib import Path
-    from typing import Any
-    from urllib.parse import urlparse
-    import requests
-    import tqdm
-    from label_studio_sdk.client import LabelStudio
-    from label_studio_sdk.types.task import Task
-    session = requests.Session()
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    proof_paths = (Path(__file__).parent / "proof.txt").read_text().splitlines()
-    task: Task
-    for task in tqdm.tqdm(
-        ls.tasks.list(project=project_id, include="data,id"), desc="tasks"
-    ):
-        data = typing.cast(dict[str, Any], task.data)
-        if "is_raw_product_shelf" in data:
-            continue
-        image_url = data["image_url"]
-        file_path = urlparse(image_url).path.replace("/img/", "")
-        r = session.get(
-            f"https://robotoff.openfoodfacts.org/api/v1/images/predict?image_url={image_url}&models=price_proof_classification",
-        )
-        if r.status_code != 200:
-            print(
-                f"Failed to get prediction for {image_url}, error: {r.text} (status: {r.status_code})"
-            )
-            continue
-        prediction = r.json()["predictions"]["price_proof_classification"][0]["label"]
-        is_raw_preduct_shelf = False
-        if prediction in ("PRICE_TAG", "SHELF"):
-            is_raw_preduct_shelf = file_path in proof_paths
-        ls.tasks.update(
-            task.id,
-            data={
-                **data,
-                "is_raw_product_shelf": "true" if is_raw_preduct_shelf else "false",
-            },
-        )
-@app.command()
-def add_predicted_category(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    image_size: Annotated[
+        int, typer.Option(help="Size of the image the model expects")
+    ] = 640,
+    threshold: Annotated[float, typer.Option(help="Detection threshold")] = 0.5,
+    triton_model_version: str = "1",
 ):
+    """Predict objects in an image using an object detection model served by
+    Triton."""
     import typing
-    from typing import Any
-    import requests
-    import tqdm
-    from label_studio_sdk.client import LabelStudio
-    from label_studio_sdk.types.task import Task
-    session = requests.Session()
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    task: Task
-    for task in tqdm.tqdm(
-        ls.tasks.list(project=project_id, include="data,id"), desc="tasks"
-    ):
-        data = typing.cast(dict[str, Any], task.data)
+    from openfoodfacts.ml.object_detection import ObjectDetector
+    from openfoodfacts.utils import get_image_from_url
+    from PIL import Image
-        if "predicted_category" in data:
-            continue
-        image_url = data["image_url"]
-        r = session.get(
-            f"https://robotoff.openfoodfacts.org/api/v1/images/predict?image_url={image_url}&models=price_proof_classification",
-        )
+    model = ObjectDetector(
+        model_name=model_name, label_names=label_names, image_size=image_size
+    )
+    image = typing.cast(Image.Image | None, get_image_from_url(image_url))
-        if r.status_code != 200:
-            print(
-                f"Failed to get prediction for {image_url}, error: {r.text} (status: {r.status_code})"
-            )
-            continue
+    if image is None:
+        logger.error("Failed to download image from URL: %s", image_url)
+        raise typer.Abort()
-        predicted_category = r.json()["predictions"]["price_proof_classification"][0][
-            "label"
-        ]
+    output = model.detect_from_image(
+        image,
+        triton_uri=triton_uri,
+        model_version=triton_model_version,
+        threshold=threshold,
+    )
+    results = output.to_list()
-        ls.tasks.update(
-            task.id,
-            data={
-                **data,
-                "predicted_category": predicted_category,
-            },
-        )
+    for result in results:
+        typer.echo(result)
 app.add_typer(user_app.app, name="users", help="Manage Label Studio users")

labelr/project_config.py ADDED Viewed

@@ -0,0 +1,45 @@
+COLORS = [
+    "blue",
+    "green",
+    "yellow",
+    "red",
+    "purple",
+    "orange",
+    "pink",
+    "brown",
+    "gray",
+    "black",
+    "white",
+]
+def create_object_detection_label_config(labels_names: list[str]) -> str:
+    """Create a Label Studio label configuration for object detection tasks.
+    The format is the following:
+    ```xml
+    <View>
+    <Image name="image" value="$image_url"/>
+    <RectangleLabels name="label" toName="image">
+    <Label value="nutrition-table" background="green"/>
+        <Label value="nutrition-table-small" background="blue"/>
+        <Label value="nutrition-table-small-energy" background="yellow"/>
+        <Label value="nutrition-table-text" background="red"/>
+    </RectangleLabels>
+    </View>
+    ```
+    """
+    if len(labels_names) > len(COLORS):
+        raise ValueError(
+            f"Too many labels ({len(labels_names)}) for the available colors ({len(COLORS)})."
+        )
+    labels_xml = "\n".join(
+        f'    <Label value="{label}" background="{color}"/>'
+        for label, color in zip(labels_names, COLORS[: len(labels_names)])
+    )
+    return f"""<View>
+<Image name="image" value="$image_url"/>
+<RectangleLabels name="label" toName="image">
+{labels_xml}
+</RectangleLabels>
+</View>"""

labelr/sample.py CHANGED Viewed

@@ -3,7 +3,9 @@ import random
 import string
 import datasets
-from openfoodfacts.images import download_image
+from openfoodfacts import Flavor
+from openfoodfacts.barcode import normalize_barcode
+from openfoodfacts.images import download_image, generate_image_url
 logger = logging.getLogger(__name__)
@@ -62,17 +64,49 @@ def format_object_detection_sample_from_hf(hf_sample: dict, split: str) -> dict:
     annotation_results = format_annotation_results_from_hf(
         objects, image_width, image_height
     )
+    image_id = hf_sample["image_id"]
+    image_url = hf_meta["image_url"]
+    meta_kwargs = {}
+    if "off_image_id" in hf_meta:
+        # If `off_image_id` is present, we assume this is an Open Food Facts
+        # dataset sample.
+        # We normalize the barcode, and generate a new image URL
+        # to make sure that:
+        # - the image URL is valid with correct path
+        # - we use the images subdomain everywhere
+        off_image_id = hf_meta["off_image_id"]
+        meta_kwargs["off_image_id"] = off_image_id
+        barcode = normalize_barcode(hf_meta["barcode"])
+        meta_kwargs["barcode"] = barcode
+        image_id = f"{barcode}_{off_image_id}"
+        if ".openfoodfacts." in image_url:
+            flavor = Flavor.off
+        elif ".openbeautyfacts." in image_url:
+            flavor = Flavor.obf
+        elif ".openpetfoodfacts." in image_url:
+            flavor = Flavor.opf
+        elif ".openproductsfacts." in image_url:
+            flavor = Flavor.opf
+        else:
+            raise ValueError(
+                f"Unknown Open Food Facts flavor for image URL: {image_url}"
+            )
+        image_url = generate_image_url(
+            code=barcode, image_id=off_image_id, flavor=flavor
+        )
     return {
         "data": {
-            "image_id": hf_sample["image_id"],
-            "image_url": hf_meta["image_url"],
+            "image_id": image_id,
+            "image_url": image_url,
             "batch": "null",
             "split": split,
             "meta": {
                 "width": image_width,
                 "height": image_height,
-                "barcode": hf_meta["barcode"],
-                "off_image_id": hf_meta["off_image_id"],
+                **meta_kwargs,
             },
         },
         "predictions": [{"result": annotation_results}],

{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/METADATA RENAMED Viewed

@@ -1,7 +1,7 @@
-Metadata-Version: 2.1
+Metadata-Version: 2.4
 Name: labelr
-Version: 0.1.0
-Summary: Add your description here
+Version: 0.3.0
+Summary: A command-line tool to manage labeling tasks with Label Studio.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
 License-File: LICENSE
@@ -9,13 +9,11 @@ Requires-Dist: datasets>=3.2.0
 Requires-Dist: imagehash>=4.3.1
 Requires-Dist: label-studio-sdk>=1.0.8
 Requires-Dist: more-itertools>=10.5.0
-Requires-Dist: openfoodfacts>=2.3.4
-Requires-Dist: protobuf>=5.29.1
+Requires-Dist: openfoodfacts>=2.9.0
 Requires-Dist: typer>=0.15.1
 Provides-Extra: ultralytics
 Requires-Dist: ultralytics>=8.3.49; extra == "ultralytics"
-Provides-Extra: triton
-Requires-Dist: tritonclient>=2.52.0; extra == "triton"
+Dynamic: license-file
 # Labelr
@@ -36,50 +34,22 @@ It currently allows to:
 ## Installation
 Python 3.10 or higher is required to run this CLI.
-You need to install the CLI manually for now, there is no project published on Pypi.
-To do so:
-We recommend to install the CLI in a virtual environment. You can either use pip or conda for that.
-### Pip
-Create the virtualenv:
-```bash
-python3 -m venv labelr
-source labelr/bin/activate
-```
-### Conda
+To install the CLI, simply run:
-With conda:
 ```bash
-conda create -n labelr python=3.12
-conda activate labelr
-```
-Then, clone the repository and install the requirements:
-```bash
-git clone git@github.com:openfoodfacts/openfoodfacts-ai.git
-```
-```bash
-pip install -r requirements.txt
+pip install labelr
 ```
+We recommend to install the CLI in a virtual environment. You can either use pip or conda for that.
-We assume in the following that you have installed the CLI in a virtual environment, and defined the following alias in your shell configuration file (e.g. `.bashrc` or `.zshrc`):
+There are two optional dependencies that you can install to use the CLI:
+- `ultralytics`: pre-annotate object detection datasets with an ultralytics model (yolo, yolo-world)
+- `triton`: pre-annotate object detection datasets using a model served by a Triton inference server
-```bash
-alias labelr='${VIRTUALENV_DIR}/bin/python3 ${PROJECT_PATH}/main.py'
-```
-or if you are using conda:
-```bash
-alias labelr='${CONDA_PREFIX}/bin/python3 ${PROJECT_PATH}/main.py'
-```
+To install the optional dependencies, you can run:
-with `${VIRTUALENV_DIR}` the path to the virtual environment where you installed the CLI and `${PROJECT_PATH}` the path to the root of the project, for example:
 ```bash
-${PROJECT_PATH} = /home/user/openfoodfacts-ai/ml_utils/ml_utils_cli
+pip install labelr[ultralytics,triton]
 ```
 ## Usage
@@ -94,7 +64,17 @@ For all the commands that interact with Label Studio, you need to provide an API
 #### Create a project
-Once you have a Label Studio instance running, you can create a project with the following command:
+Once you have a Label Studio instance running, you can create a project easily. First, you need to create a configuration file for the project. The configuration file is an XML file that defines the labeling interface and the labels to use for the project. You can find an example of a configuration file in the [Label Studio documentation](https://labelstud.io/guide/setup).
+For an object detection task, a command allows you to create the configuration file automatically:
+```bash
+labelr projects create-config --labels 'label1' --labels 'label2' --output-file label_config.xml
+```
+where `label1` and `label2` are the labels you want to use for the object detection task, and `label_config.xml` is the output file that will contain the configuration.
+Then, you can create a project on Label Studio with the following command:
 ```bash
 labelr projects create --title my_project --api-key API_KEY --config-file label_config.xml

labelr-0.3.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,20 @@
+labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
+labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
+labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
+labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
+labelr/export.py,sha256=MuU7M0H1THg3FcA6IEYPKFb58nIakNCCpcItQSSwNzM,10070
+labelr/main.py,sha256=gQ8I287mpLy3HIUWqZUyoLAfPwkphwOIzut7hEbH8tY,2135
+labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
+labelr/sample.py,sha256=WPWKJbyFDp1T-pmd1DfCpz2LWUApGJ71MvnMYkHeORU,7164
+labelr/types.py,sha256=CahqnkLnGj23Jg0X9nftK7Jiorq50WYQqR8u9Ln4E-k,281
+labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+labelr/apps/datasets.py,sha256=OihvNIaqvny9QH64kq3oUrITRppGPz6WlJUSeObS3kE,7991
+labelr/apps/projects.py,sha256=mF25efdNsNaOyMJindi60EHdKP6kR_7L6KFBEbqMlqM,15146
+labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
+labelr-0.3.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+labelr-0.3.0.dist-info/METADATA,sha256=bxgaoKo6fCFfTtFI2YSjb_cSDbURB_JDsMA_5PV09gs,6583
+labelr-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+labelr-0.3.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
+labelr-0.3.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
+labelr-0.3.0.dist-info/RECORD,,

{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (80.9.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

labelr/triton/object_detection.py DELETED Viewed

@@ -1,241 +0,0 @@
-import dataclasses
-import functools
-import logging
-import time
-from typing import Any, Optional
-import numpy as np
-from PIL import Image
-try:
-    import grpc
-    from tritonclient.grpc import service_pb2, service_pb2_grpc
-    from tritonclient.grpc.service_pb2_grpc import GRPCInferenceServiceStub
-except ImportError:
-    pass
-logger = logging.getLogger(__name__)
-JSONType = dict[str, Any]
-OBJECT_DETECTION_MODEL_VERSION = {
-    "nutriscore": "tf-nutriscore-1.0",
-    "nutrition_table": "tf-nutrition-table-1.0",
-    "universal_logo_detector": "tf-universal-logo-detector-1.0",
-}
-LABELS = {
-    "nutriscore": [
-        "NULL",
-        "nutriscore-a",
-        "nutriscore-b",
-        "nutriscore-c",
-        "nutriscore-d",
-        "nutriscore-e",
-    ],
-}
-OBJECT_DETECTION_IMAGE_MAX_SIZE = (1024, 1024)
-@functools.cache
-def get_triton_inference_stub(
-    triton_uri: str,
-) -> "GRPCInferenceServiceStub":
-    """Return a gRPC stub for Triton Inference Server.
-    :param triton_uri: URI of the Triton Inference Server
-    :return: gRPC stub for Triton Inference Server
-    """
-    triton_uri = triton_uri
-    channel = grpc.insecure_channel(triton_uri)
-    return service_pb2_grpc.GRPCInferenceServiceStub(channel)
-def convert_image_to_array(image: Image.Image) -> np.ndarray:
-    """Convert a PIL Image into a numpy array.
-    The image is converted to RGB if needed before generating the array.
-    :param image: the input image
-    :return: the generated numpy array of shape (width, height, 3)
-    """
-    if image.mode != "RGB":
-        image = image.convert("RGB")
-    (im_width, im_height) = image.size
-    return np.array(image.getdata()).reshape((im_height, im_width, 3)).astype(np.uint8)
-@dataclasses.dataclass
-class ObjectDetectionResult:
-    bounding_box: tuple[int, int, int, int]
-    score: float
-    label: str
-@dataclasses.dataclass
-class ObjectDetectionRawResult:
-    num_detections: int
-    detection_boxes: np.ndarray
-    detection_scores: np.ndarray
-    detection_classes: np.ndarray
-    label_names: list[str]
-    detection_masks: Optional[np.ndarray] = None
-    boxed_image: Optional[Image.Image] = None
-    def select(self, threshold: Optional[float] = None) -> list[ObjectDetectionResult]:
-        if threshold is None:
-            threshold = 0.5
-        box_masks = self.detection_scores > threshold
-        selected_boxes = self.detection_boxes[box_masks]
-        selected_scores = self.detection_scores[box_masks]
-        selected_classes = self.detection_classes[box_masks]
-        results = []
-        for bounding_box, score, label in zip(
-            selected_boxes, selected_scores, selected_classes
-        ):
-            label_int = int(label)
-            label_str = self.label_names[label_int]
-            if label_str is not None:
-                result = ObjectDetectionResult(
-                    bounding_box=tuple(bounding_box.tolist()),  # type: ignore
-                    score=float(score),
-                    label=label_str,
-                )
-                results.append(result)
-        return results
-    def to_json(self, threshold: Optional[float] = None) -> list[JSONType]:
-        return [dataclasses.asdict(r) for r in self.select(threshold)]
-def resize_image(image: Image.Image, max_size: tuple[int, int]) -> Image.Image:
-    width, height = image.size
-    max_width, max_height = max_size
-    if width > max_width or height > max_height:
-        new_image = image.copy()
-        new_image.thumbnail((max_width, max_height))
-        return new_image
-    return image
-class RemoteModel:
-    def __init__(self, name: str, label_names: list[str]):
-        self.name: str = name
-        self.label_names = label_names
-    def detect_from_image(
-        self,
-        image: Image.Image,
-        triton_uri: str,
-    ) -> ObjectDetectionRawResult:
-        """Run object detection model on an image.
-        :param image: the input Pillow image
-        :param triton_uri: URI of the Triton Inference Server.
-        :return: the detection result
-        """
-        resized_image = resize_image(image, OBJECT_DETECTION_IMAGE_MAX_SIZE)
-        image_array = convert_image_to_array(resized_image)
-        grpc_stub = get_triton_inference_stub(triton_uri)
-        request = service_pb2.ModelInferRequest()
-        request.model_name = self.name
-        image_input = service_pb2.ModelInferRequest().InferInputTensor()
-        image_input.name = "inputs"
-        image_input.datatype = "UINT8"
-        image_input.shape.extend([1, image_array.shape[0], image_array.shape[1], 3])
-        request.inputs.extend([image_input])
-        for output_name in (
-            "num_detections",
-            "detection_classes",
-            "detection_scores",
-            "detection_boxes",
-        ):
-            output = service_pb2.ModelInferRequest().InferRequestedOutputTensor()
-            output.name = output_name
-            request.outputs.extend([output])
-        request.raw_input_contents.extend([image_array.tobytes()])
-        start_time = time.monotonic()
-        response = grpc_stub.ModelInfer(request)
-        logger.debug(
-            "Inference time for %s: %s", self.name, time.monotonic() - start_time
-        )
-        if len(response.outputs) != 4:
-            raise Exception(f"expected 4 output, got {len(response.outputs)}")
-        if len(response.raw_output_contents) != 4:
-            raise Exception(
-                f"expected 4 raw output content, got {len(response.raw_output_contents)}"
-            )
-        output_index = {output.name: i for i, output in enumerate(response.outputs)}
-        num_detections = (
-            np.frombuffer(
-                response.raw_output_contents[output_index["num_detections"]],
-                dtype=np.float32,
-            )
-            .reshape((1, 1))
-            .astype(int)[0][0]  # type: ignore
-        )
-        detection_scores = np.frombuffer(
-            response.raw_output_contents[output_index["detection_scores"]],
-            dtype=np.float32,
-        ).reshape((1, -1))[0]
-        detection_classes = (
-            np.frombuffer(
-                response.raw_output_contents[output_index["detection_classes"]],
-                dtype=np.float32,
-            )
-            .reshape((1, -1))
-            .astype(int)  # type: ignore
-        )[0]
-        detection_boxes = np.frombuffer(
-            response.raw_output_contents[output_index["detection_boxes"]],
-            dtype=np.float32,
-        ).reshape((1, -1, 4))[0]
-        result = ObjectDetectionRawResult(
-            num_detections=num_detections,
-            detection_classes=detection_classes,
-            detection_boxes=detection_boxes,
-            detection_scores=detection_scores,
-            detection_masks=None,
-            label_names=self.label_names,
-        )
-        return result
-class ObjectDetectionModelRegistry:
-    models: dict[str, RemoteModel] = {}
-    _loaded = False
-    @classmethod
-    def get_available_models(cls) -> list[str]:
-        cls.load_all()
-        return list(cls.models.keys())
-    @classmethod
-    def load(cls, name: str) -> RemoteModel:
-        label_names = LABELS[name]
-        model = RemoteModel(name, label_names)
-        cls.models[name] = model
-        return model
-    @classmethod
-    def get(cls, name: str) -> RemoteModel:
-        if name not in cls.models:
-            cls.load(name)
-        return cls.models[name]

labelr-0.1.0.dist-info/RECORD DELETED Viewed

@@ -1,20 +0,0 @@
-labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
-labelr/annotate.py,sha256=8O9SO2thevo_Aa6etIUxCz2xJVXB4MwuSHj4jxz8sqQ,3441
-labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
-labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
-labelr/export.py,sha256=tcOmVnOdJidWfNouNWoQ4OJgHMbbG-bLFHkId9huiS0,10170
-labelr/main.py,sha256=1_cZoJLBMpUV-lnaKb1XaVff4XxWjpIUZbSNQh44tPE,8715
-labelr/sample.py,sha256=cpzvgZWVU6GzwD35tqGKEFVKAgqQbSHlWW6IL9FG15Q,5918
-labelr/types.py,sha256=CahqnkLnGj23Jg0X9nftK7Jiorq50WYQqR8u9Ln4E-k,281
-labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-labelr/apps/datasets.py,sha256=DXU8XZx0iEHDI5SvUeI8atCKSUmj9YJwO6xTgMZDgEI,7936
-labelr/apps/projects.py,sha256=HpulSciBVTk1sSR1uXjtHytny9t-rN8wiaQ5llNBX6Y,12420
-labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
-labelr/triton/object_detection.py,sha256=QKUOWiYFH72omyZH4SdbA56JDiVA_e_N8YCSQarkzWQ,7409
-labelr-0.1.0.dist-info/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-labelr-0.1.0.dist-info/METADATA,sha256=tBsu8c-LehNqjPNiCG3XjRLboQNeq2RSy9JZiv4v9Dc,6528
-labelr-0.1.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-labelr-0.1.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
-labelr-0.1.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
-labelr-0.1.0.dist-info/RECORD,,

{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info/licenses}/LICENSE RENAMED Viewed

File without changes

{labelr-0.1.0.dist-info → labelr-0.3.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

labelr 0.1.0__py3-none-any.whl → 0.3.0__py3-none-any.whl

labelr 0.1.0py3-none-any.whl → 0.3.0py3-none-any.whl