PyPI - labelr - Versions diffs - 0.1.0__py3-none-any.whl - Mend

labelr 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

labelr/__init__.py +0 -0
labelr/__main__.py +4 -0
labelr/annotate.py +107 -0
labelr/apps/__init__.py +0 -0
labelr/apps/datasets.py +227 -0
labelr/apps/projects.py +353 -0
labelr/apps/users.py +36 -0
labelr/check.py +86 -0
labelr/config.py +1 -0
labelr/export.py +270 -0
labelr/main.py +269 -0
labelr/sample.py +186 -0
labelr/triton/object_detection.py +241 -0
labelr/types.py +16 -0
labelr-0.1.0.dist-info/LICENSE +661 -0
labelr-0.1.0.dist-info/METADATA +160 -0
labelr-0.1.0.dist-info/RECORD +20 -0
labelr-0.1.0.dist-info/WHEEL +5 -0
labelr-0.1.0.dist-info/entry_points.txt +2 -0
labelr-0.1.0.dist-info/top_level.txt +1 -0

labelr/__init__.py ADDED Viewed

File without changes

labelr/__main__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from labelr.main import app
+if __name__ == "__main__":
+    app()

labelr/annotate.py ADDED Viewed

@@ -0,0 +1,107 @@
+import random
+import string
+from openfoodfacts.utils import get_logger
+try:
+    from ultralytics.engine.results import Results
+except ImportError:
+    pass
+from labelr.triton.object_detection import ObjectDetectionResult
+logger = get_logger(__name__)
+def format_annotation_results_from_triton(
+    objects: list[ObjectDetectionResult], image_width: int, image_height: int
+):
+    """Format annotation results from a Triton object detection model into
+    Label Studio format."""
+    annotation_results = []
+    for object_ in objects:
+        bbox = object_.bounding_box
+        category_name = object_.label
+        # These are relative coordinates (between 0.0 and 1.0)
+        y_min, x_min, y_max, x_max = bbox
+        # Make sure the coordinates are within the image boundaries,
+        # and convert them to percentages
+        y_min = min(max(0, y_min), 1.0) * 100
+        x_min = min(max(0, x_min), 1.0) * 100
+        y_max = min(max(0, y_max), 1.0) * 100
+        x_max = min(max(0, x_max), 1.0) * 100
+        x = x_min
+        y = y_min
+        width = x_max - x_min
+        height = y_max - y_min
+        id_ = generate_id()
+        annotation_results.append(
+            {
+                "id": id_,
+                "type": "rectanglelabels",
+                "from_name": "label",
+                "to_name": "image",
+                "original_width": image_width,
+                "original_height": image_height,
+                "image_rotation": 0,
+                "value": {
+                    "rotation": 0,
+                    "x": x,
+                    "y": y,
+                    "width": width,
+                    "height": height,
+                    "rectanglelabels": [category_name],
+                },
+            },
+        )
+    return annotation_results
+def format_annotation_results_from_ultralytics(
+    results: "Results",
+    labels: list[str],
+    label_mapping: dict[str, str] | None = None,
+) -> list[dict]:
+    annotation_results = []
+    orig_height, orig_width = results.orig_shape
+    boxes = results.boxes
+    classes = boxes.cls.tolist()
+    for i, xyxyn in enumerate(boxes.xyxyn):
+        # Boxes found.
+        if len(xyxyn) > 0:
+            xyxyn = xyxyn.tolist()
+            x1 = xyxyn[0] * 100
+            y1 = xyxyn[1] * 100
+            x2 = xyxyn[2] * 100
+            y2 = xyxyn[3] * 100
+            width = x2 - x1
+            height = y2 - y1
+            label_id = int(classes[i])
+            label_name = labels[label_id]
+            if label_mapping:
+                label_name = label_mapping.get(label_name, label_name)
+            annotation_results.append(
+                {
+                    "id": generate_id(),
+                    "type": "rectanglelabels",
+                    "from_name": "label",
+                    "to_name": "image",
+                    "original_width": orig_width,
+                    "original_height": orig_height,
+                    "image_rotation": 0,
+                    "value": {
+                        "rotation": 0,
+                        "x": x1,
+                        "y": y1,
+                        "width": width,
+                        "height": height,
+                        "rectanglelabels": [label_name],
+                    },
+                },
+            )
+    return annotation_results
+def generate_id(length: int = 10) -> str:
+    return "".join(random.choices(string.ascii_letters + string.digits, k=length))

labelr/apps/__init__.py ADDED Viewed

File without changes

labelr/apps/datasets.py ADDED Viewed

@@ -0,0 +1,227 @@
+import json
+import random
+import shutil
+import typing
+from pathlib import Path
+from typing import Annotated, Optional
+import typer
+from openfoodfacts.utils import get_logger
+from ..config import LABEL_STUDIO_DEFAULT_URL
+from ..types import ExportDestination, ExportSource, TaskType
+app = typer.Typer()
+logger = get_logger(__name__)
+@app.command()
+def check(
+    api_key: Annotated[
+        Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")
+    ] = None,
+    project_id: Annotated[
+        Optional[int], typer.Option(help="Label Studio Project ID")
+    ] = None,
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    dataset_dir: Annotated[
+        Optional[Path],
+        typer.Option(
+            help="Path to the dataset directory", exists=True, file_okay=False
+        ),
+    ] = None,
+    remove: Annotated[
+        bool,
+        typer.Option(
+            help="Remove duplicate images from the dataset, only for local datasets"
+        ),
+    ] = False,
+):
+    """Check a dataset for duplicate images."""
+    from label_studio_sdk.client import LabelStudio
+    from ..check import check_local_dataset, check_ls_dataset
+    if project_id is not None:
+        ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+        check_ls_dataset(ls, project_id)
+    elif dataset_dir is not None:
+        check_local_dataset(dataset_dir, remove=remove)
+    else:
+        raise typer.BadParameter("Either project ID or dataset directory is required")
+@app.command()
+def split_train_test(
+    task_type: TaskType, dataset_dir: Path, output_dir: Path, train_ratio: float = 0.8
+):
+    """Split a dataset into training and test sets.
+    Only classification tasks are supported.
+    """
+    if task_type == TaskType.classification:
+        class_dirs = [d for d in dataset_dir.iterdir() if d.is_dir()]
+        logger.info("Found classes: %s", [d.name for d in class_dirs])
+        output_dir.mkdir(parents=True, exist_ok=True)
+        train_dir = output_dir / "train"
+        test_dir = output_dir / "test"
+        train_dir.mkdir(parents=True, exist_ok=True)
+        test_dir.mkdir(parents=True, exist_ok=True)
+        for class_dir in class_dirs:
+            input_paths = list(class_dir.glob("*"))
+            random.shuffle(input_paths)
+            test_count = int(len(input_paths) * (1 - train_ratio))
+            if test_count == 0:
+                logger.warning("Not enough samples, skipping class: %s", class_dir.name)
+                continue
+            test_paths = input_paths[:test_count]
+            train_paths = input_paths[test_count:]
+            for output_dir, input_paths in (
+                (train_dir, train_paths),
+                (test_dir, test_paths),
+            ):
+                output_cls_dir = output_dir / class_dir.name
+                output_cls_dir.mkdir(parents=True, exist_ok=True)
+                for path in input_paths:
+                    logger.info("Copying: %s to %s", path, output_cls_dir)
+                    shutil.copy(path, output_cls_dir / path.name)
+    else:
+        raise typer.BadParameter("Unsupported task type")
+@app.command()
+def convert_object_detection_dataset(
+    repo_id: Annotated[
+        str, typer.Option(help="Hugging Face Datasets repository ID to convert")
+    ],
+    output_file: Annotated[
+        Path, typer.Option(help="Path to the output JSON file", exists=False)
+    ],
+):
+    """Convert object detection dataset from Hugging Face Datasets to Label
+    Studio format, and save it to a JSON file."""
+    from datasets import load_dataset
+    from labelr.sample import format_object_detection_sample_from_hf
+    logger.info("Loading dataset: %s", repo_id)
+    ds = load_dataset(repo_id)
+    logger.info("Dataset loaded: %s", tuple(ds.keys()))
+    with output_file.open("wt") as f:
+        for split in ds.keys():
+            logger.info("Processing split: %s", split)
+            for sample in ds[split]:
+                label_studio_sample = format_object_detection_sample_from_hf(
+                    sample, split=split
+                )
+                f.write(json.dumps(label_studio_sample) + "\n")
+@app.command()
+def export(
+    from_: Annotated[ExportSource, typer.Option("--from", help="Input source to use")],
+    to: Annotated[ExportDestination, typer.Option(help="Where to export the data")],
+    api_key: Annotated[Optional[str], typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    repo_id: Annotated[
+        Optional[str],
+        typer.Option(help="Hugging Face Datasets repository ID to convert"),
+    ] = None,
+    label_names: Annotated[
+        Optional[str],
+        typer.Option(help="Label names to use, as a comma-separated list"),
+    ] = None,
+    project_id: Annotated[
+        Optional[int], typer.Option(help="Label Studio Project ID")
+    ] = None,
+    label_studio_url: Optional[str] = LABEL_STUDIO_DEFAULT_URL,
+    output_dir: Annotated[
+        Optional[Path],
+        typer.Option(help="Path to the output directory", file_okay=False),
+    ] = None,
+    download_images: Annotated[
+        bool,
+        typer.Option(
+            help="if True, don't use HF images and download images from the server"
+        ),
+    ] = False,
+    train_ratio: Annotated[
+        float,
+        typer.Option(
+            help="Train ratio for splitting the dataset, if the split name is not "
+            "provided (typically, if the source is Label Studio)"
+        ),
+    ] = 0.8,
+    error_raise: Annotated[
+        bool,
+        typer.Option(
+            help="Raise an error if an image download fails, only for Ultralytics"
+        ),
+    ] = True,
+):
+    """Export Label Studio annotation, either to Hugging Face Datasets or
+    local files (ultralytics format)."""
+    from label_studio_sdk.client import LabelStudio
+    from labelr.export import (
+        export_from_hf_to_ultralytics,
+        export_from_ls_to_hf,
+        export_from_ls_to_ultralytics,
+    )
+    if (to == ExportDestination.hf or from_ == ExportSource.hf) and repo_id is None:
+        raise typer.BadParameter("Repository ID is required for export/import with HF")
+    if label_names is None:
+        if to == ExportDestination.hf:
+            raise typer.BadParameter("Label names are required for HF export")
+        if from_ == ExportSource.ls:
+            raise typer.BadParameter(
+                "Label names are required for export from LS source"
+            )
+    if from_ == ExportSource.ls:
+        if project_id is None:
+            raise typer.BadParameter("Project ID is required for LS export")
+        if api_key is None:
+            raise typer.BadParameter("API key is required for LS export")
+    if to == ExportDestination.ultralytics and output_dir is None:
+        raise typer.BadParameter("Output directory is required for Ultralytics export")
+    if from_ == ExportSource.ls:
+        ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+        label_names = typing.cast(str, label_names)
+        label_names_list = label_names.split(",")
+        if to == ExportDestination.hf:
+            repo_id = typing.cast(str, repo_id)
+            export_from_ls_to_hf(
+                ls, repo_id, label_names_list, typing.cast(int, project_id)
+            )
+        elif to == ExportDestination.ultralytics:
+            export_from_ls_to_ultralytics(
+                ls,
+                typing.cast(Path, output_dir),
+                label_names_list,
+                typing.cast(int, project_id),
+                train_ratio=train_ratio,
+                error_raise=error_raise,
+            )
+    elif from_ == ExportSource.hf:
+        if to == ExportDestination.ultralytics:
+            export_from_hf_to_ultralytics(
+                typing.cast(str, repo_id),
+                typing.cast(Path, output_dir),
+                download_images=download_images,
+                error_raise=error_raise,
+            )
+        else:
+            raise typer.BadParameter("Unsupported export format")

labelr/apps/projects.py ADDED Viewed

@@ -0,0 +1,353 @@
+import enum
+import json
+import typing
+from pathlib import Path
+from typing import Annotated, Optional
+import typer
+from openfoodfacts.utils import get_logger
+from PIL import Image
+from ..annotate import (
+    format_annotation_results_from_triton,
+    format_annotation_results_from_ultralytics,
+)
+from ..config import LABEL_STUDIO_DEFAULT_URL
+app = typer.Typer()
+logger = get_logger(__name__)
+@app.command()
+def create(
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    title: Annotated[str, typer.Option(help="Project title")],
+    config_file: Annotated[
+        Path, typer.Option(help="Path to label config file", file_okay=True)
+    ],
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+):
+    """Create a new Label Studio project."""
+    from label_studio_sdk.client import LabelStudio
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    label_config = config_file.read_text()
+    project = ls.projects.create(title=title, label_config=label_config)
+    logger.info(f"Project created: {project}")
+@app.command()
+def import_data(
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    dataset_path: Annotated[
+        Path, typer.Option(help="Path to the Label Studio dataset file", file_okay=True)
+    ],
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    batch_size: int = 25,
+):
+    """Import tasks from a dataset file to a Label Studio project.
+    The dataset file should contain one JSON object per line."""
+    import more_itertools
+    import tqdm
+    from label_studio_sdk.client import LabelStudio
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    with dataset_path.open("rt") as f:
+        for batch in more_itertools.chunked(
+            tqdm.tqdm(map(json.loads, f), desc="tasks"), batch_size
+        ):
+            ls.projects.import_tasks(id=project_id, request=batch)
+@app.command()
+def update_prediction(
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+):
+    from label_studio_sdk.client import LabelStudio
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    for task in ls.tasks.list(project=project_id, fields="all"):
+        for prediction in task.predictions:
+            prediction_id = prediction["id"]
+            if prediction["model_version"] == "":
+                logger.info("Updating prediction: %s", prediction_id)
+                ls.predictions.update(
+                    id=prediction_id,
+                    model_version="undefined",
+                )
+@app.command()
+def add_split(
+    train_split: Annotated[
+        float, typer.Option(help="fraction of samples to add in train split")
+    ],
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+):
+    """Update the split field of tasks in a Label Studio project.
+    The split field is set to "train" with probability `train_split`, and "val"
+    otherwise. Tasks without a split field are assigned a split based on the
+    probability, and updated in the server. Tasks with a non-null split field
+    are not updated.
+    """
+    import random
+    from label_studio_sdk import Task
+    from label_studio_sdk.client import LabelStudio
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    for task in ls.tasks.list(project=project_id, fields="all"):
+        task: Task
+        split = task.data.get("split")
+        if split is None:
+            split = "train" if random.random() < train_split else "val"
+            logger.info("Updating task: %s, split: %s", task.id, split)
+            ls.tasks.update(task.id, data={**task.data, "split": split})
+@app.command()
+def annotate_from_prediction(
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    project_id: Annotated[int, typer.Option(help="Label Studio project ID")],
+    updated_by: Annotated[
+        Optional[int], typer.Option(help="User ID to declare as annotator")
+    ] = None,
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+):
+    """Create annotations for all tasks from predictions.
+    This command is useful if you imported tasks with predictions, and want to
+    "validate" these predictions by creating annotations.
+    """
+    import tqdm
+    from label_studio_sdk.client import LabelStudio
+    from label_studio_sdk.types.task import Task
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    task: Task
+    for task in tqdm.tqdm(
+        ls.tasks.list(project=project_id, fields="all"), desc="tasks"
+    ):
+        task_id = task.id
+        if task.total_annotations == 0 and task.total_predictions > 0:
+            logger.info("Creating annotation for task: %s", task_id)
+            ls.annotations.create(
+                id=task_id,
+                result=task.predictions[0]["result"],
+                project=project_id,
+                updated_by=updated_by,
+            )
+class PredictorBackend(enum.Enum):
+    triton = "triton"
+    ultralytics = "ultralytics"
+@app.command()
+def add_prediction(
+    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
+    project_id: Annotated[int, typer.Option(help="Label Studio Project ID")],
+    model_name: Annotated[
+        str,
+        typer.Option(
+            help="Name of the object detection model to run (for Triton server) or "
+            "of the Ultralytics zero-shot model to run."
+        ),
+    ] = "yolov8x-worldv2.pt",
+    triton_uri: Annotated[
+        Optional[str],
+        typer.Option(help="URI (host+port) of the Triton Inference Server"),
+    ] = None,
+    backend: Annotated[
+        PredictorBackend,
+        typer.Option(
+            help="Prediction backend: either use a Triton server to perform "
+            "the prediction or uses Ultralytics."
+        ),
+    ] = PredictorBackend.ultralytics,
+    labels: Annotated[
+        Optional[list[str]],
+        typer.Option(
+            help="List of class labels to use for Yolo model. If you're using Yolo-World or other "
+            "zero-shot models, this is the list of label names that are going to be provided to the "
+            "model. In such case, you can use `label_mapping` to map the model's output to the "
+            "actual class names expected by Label Studio."
+        ),
+    ] = None,
+    label_mapping: Annotated[
+        Optional[str],
+        typer.Option(help="Mapping of model labels to class names, as a JSON string"),
+    ] = None,
+    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
+    threshold: Annotated[
+        Optional[float],
+        typer.Option(
+            help="Confidence threshold for selecting bounding boxes. The default is 0.5 "
+            "for Triton backend and 0.1 for Ultralytics backend."
+        ),
+    ] = None,
+    max_det: Annotated[int, typer.Option(help="Maximum numbers of detections")] = 300,
+    dry_run: Annotated[
+        bool,
+        typer.Option(
+            help="Launch in dry run mode, without uploading annotations to Label Studio"
+        ),
+    ] = False,
+    error_raise: Annotated[
+        bool,
+        typer.Option(help="Raise an error if image download fails"),
+    ] = True,
+    model_version: Annotated[
+        Optional[str],
+        typer.Option(help="Model version to use for the prediction"),
+    ] = None,
+):
+    """Add predictions as pre-annotations to Label Studio tasks,
+    for an object detection model running on Triton Inference Server."""
+    import tqdm
+    from label_studio_sdk.client import LabelStudio
+    from openfoodfacts.utils import get_image_from_url
+    from labelr.triton.object_detection import ObjectDetectionModelRegistry
+    label_mapping_dict = None
+    if label_mapping:
+        label_mapping_dict = json.loads(label_mapping)
+    if dry_run:
+        logger.info("** Dry run mode enabled **")
+    logger.info(
+        "backend: %s, model_name: %s, labels: %s, threshold: %s, label mapping: %s",
+        backend,
+        model_name,
+        labels,
+        threshold,
+        label_mapping,
+    )
+    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
+    model: ObjectDetectionModelRegistry | "YOLO"
+    if backend == PredictorBackend.ultralytics:
+        from ultralytics import YOLO
+        if labels is None:
+            raise typer.BadParameter("Labels are required for Ultralytics backend")
+        if threshold is None:
+            threshold = 0.1
+        model = YOLO(model_name)
+        if hasattr(model, "set_classes"):
+            model.set_classes(labels)
+        else:
+            logger.warning("The model does not support setting classes directly.")
+    elif backend == PredictorBackend.triton:
+        if triton_uri is None:
+            raise typer.BadParameter("Triton URI is required for Triton backend")
+        if threshold is None:
+            threshold = 0.5
+        model = ObjectDetectionModelRegistry.load(model_name)
+    else:
+        raise typer.BadParameter(f"Unsupported backend: {backend}")
+    for task in tqdm.tqdm(ls.tasks.list(project=project_id), desc="tasks"):
+        if task.total_predictions == 0:
+            image_url = task.data["image_url"]
+            image = typing.cast(
+                Image.Image,
+                get_image_from_url(image_url, error_raise=error_raise),
+            )
+            if backend == PredictorBackend.ultralytics:
+                results = model.predict(
+                    image,
+                    conf=threshold,
+                    max_det=max_det,
+                )[0]
+                labels = typing.cast(list[str], labels)
+                label_studio_result = format_annotation_results_from_ultralytics(
+                    results, labels, label_mapping_dict
+                )
+            else:
+                output = model.detect_from_image(image, triton_uri=triton_uri)
+                results = output.select(threshold=threshold)
+                logger.info("Adding prediction to task: %s", task.id)
+                label_studio_result = format_annotation_results_from_triton(
+                    results, image.width, image.height
+                )
+            if dry_run:
+                logger.info("image_url: %s", image_url)
+                logger.info("result: %s", label_studio_result)
+            else:
+                ls.predictions.create(
+                    task=task.id,
+                    result=label_studio_result,
+                    model_version=model_version,
+                )
+@app.command()
+def create_dataset_file(
+    input_file: Annotated[
+        Path,
+        typer.Option(help="Path to a list of image URLs", exists=True),
+    ],
+    output_file: Annotated[
+        Path, typer.Option(help="Path to the output JSON file", exists=False)
+    ],
+):
+    """Create a Label Studio object detection dataset file from a list of
+    image URLs."""
+    from urllib.parse import urlparse
+    import tqdm
+    from openfoodfacts.images import extract_barcode_from_url, extract_source_from_url
+    from openfoodfacts.utils import get_image_from_url
+    from labelr.sample import format_object_detection_sample_to_ls
+    logger.info("Loading dataset: %s", input_file)
+    with output_file.open("wt") as f:
+        for line in tqdm.tqdm(input_file.open("rt"), desc="images"):
+            url = line.strip()
+            if not url:
+                continue
+            extra_meta = {}
+            image_id = Path(urlparse(url).path).stem
+            if ".openfoodfacts.org" in url:
+                barcode = extract_barcode_from_url(url)
+                extra_meta["barcode"] = barcode
+                off_image_id = Path(extract_source_from_url(url)).stem
+                extra_meta["off_image_id"] = off_image_id
+                image_id = f"{barcode}-{off_image_id}"
+            image = get_image_from_url(url, error_raise=False)
+            if image is None:
+                logger.warning("Failed to load image: %s", url)
+                continue
+            label_studio_sample = format_object_detection_sample_to_ls(
+                image_id, url, image.width, image.height, extra_meta
+            )
+            f.write(json.dumps(label_studio_sample) + "\n")