PyPI - labelr - Versions diffs - 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl - Mend

labelr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (28) hide show

labelr/apps/datasets.py +196 -14
labelr/apps/directus.py +212 -0
labelr/apps/google_batch.py +46 -1
labelr/apps/label_studio.py +261 -64
labelr/apps/typer_description.py +2 -0
labelr/check.py +68 -7
labelr/config.py +57 -1
labelr/export/__init__.py +0 -0
labelr/export/classification.py +114 -0
labelr/export/common.py +42 -0
labelr/export/llm.py +91 -0
labelr/{export.py → export/object_detection.py} +97 -217
labelr/google_genai.py +9 -3
labelr/main.py +16 -0
labelr/sample/__init__.py +0 -0
labelr/sample/classification.py +17 -0
labelr/sample/common.py +14 -0
labelr/sample/llm.py +75 -0
labelr/{sample.py → sample/object_detection.py} +38 -68
labelr/utils.py +55 -5
labelr-0.11.0.dist-info/METADATA +230 -0
labelr-0.11.0.dist-info/RECORD +38 -0
{labelr-0.9.0.dist-info → labelr-0.11.0.dist-info}/WHEEL +1 -1
labelr-0.9.0.dist-info/METADATA +0 -159
labelr-0.9.0.dist-info/RECORD +0 -28
{labelr-0.9.0.dist-info → labelr-0.11.0.dist-info}/entry_points.txt +0 -0
{labelr-0.9.0.dist-info → labelr-0.11.0.dist-info}/licenses/LICENSE +0 -0
{labelr-0.9.0.dist-info → labelr-0.11.0.dist-info}/top_level.txt +0 -0

labelr/{export.py → export/object_detection.py} RENAMED Viewed

@@ -3,54 +3,70 @@ import logging
 import pickle
 import random
 import tempfile
-from collections.abc import Iterator
+import typing
 from pathlib import Path
 import datasets
 import tqdm
 from label_studio_sdk.client import LabelStudio
-from openfoodfacts.images import download_image, generate_image_url
-from openfoodfacts.types import Flavor
+from openfoodfacts.images import download_image
 from PIL import Image, ImageOps
-from labelr.sample import (
-    HF_DS_CLASSIFICATION_FEATURES,
-    HF_DS_LLM_IMAGE_EXTRACTION_FEATURES,
-    HF_DS_OBJECT_DETECTION_FEATURES,
-    LLMImageExtractionSample,
+from labelr.export.common import _pickle_sample_generator
+from labelr.sample.object_detection import (
     format_object_detection_sample_to_hf,
+    get_hf_object_detection_features,
 )
-from labelr.types import TaskType
-from labelr.utils import PathWithContext
 logger = logging.getLogger(__name__)
-def _pickle_sample_generator(dir: Path):
-    """Generator that yields samples from pickles in a directory."""
-    for pkl in dir.glob("*.pkl"):
-        with open(pkl, "rb") as f:
-            yield pickle.load(f)
 def export_from_ls_to_hf_object_detection(
     ls: LabelStudio,
     repo_id: str,
     label_names: list[str],
     project_id: int,
+    is_openfoodfacts_dataset: bool,
+    image_max_size: int | None = None,
+    view_id: int | None = None,
     merge_labels: bool = False,
     use_aws_cache: bool = True,
     revision: str = "main",
-):
+) -> None:
+    """Export annotations from a Label Studio project to a Hugging Face
+    dataset.
+    The Label Studio project should be an object detection project.
+    Args:
+        ls (LabelStudio): Label Studio client instance.
+        repo_id (str): Hugging Face repository ID to push the dataset to.
+        label_names (list[str]): List of label names in the project.
+        project_id (int): Label Studio project ID to export from.
+        is_openfoodfacts_dataset (bool): Whether the dataset is an Open Food
+            Facts dataset. If True, the dataset will include additional
+            metadata fields specific to Open Food Facts (`barcode` and
+            `off_image_id`).
+        image_max_size (int | None): Maximum size (in pixels) for the images.
+            If None, no resizing is performed. Defaults to None.
+        view_id (int | None): Label Studio view ID to export from. If None,
+            all tasks are exported. Defaults to None.
+        merge_labels (bool): Whether to merge all labels into a single label
+            named "object". Defaults to False.
+        use_aws_cache (bool): Whether to use the AWS image cache when
+            downloading images. Defaults to True.
+        revision (str): The dataset revision to push to. Defaults to 'main'.
+    """
     if merge_labels:
         label_names = ["object"]
     logger.info(
-        "Project ID: %d, label names: %s, repo_id: %s, revision: %s",
+        "Project ID: %d, label names: %s, repo_id: %s, revision: %s, view ID: %s",
         project_id,
         label_names,
         repo_id,
         revision,
+        view_id,
     )
     for split in ["train", "val"]:
@@ -60,7 +76,9 @@ def export_from_ls_to_hf_object_detection(
             tmp_dir = Path(tmp_dir_str)
             logger.info("Saving samples to temporary directory: %s", tmp_dir)
             for i, task in tqdm.tqdm(
-                enumerate(ls.tasks.list(project=project_id, fields="all")),
+                enumerate(
+                    ls.tasks.list(project=project_id, fields="all", view=view_id)
+                ),
                 desc="tasks",
             ):
                 if task.data["split"] != split:
@@ -71,15 +89,17 @@ def export_from_ls_to_hf_object_detection(
                     label_names=label_names,
                     merge_labels=merge_labels,
                     use_aws_cache=use_aws_cache,
+                    image_max_size=image_max_size,
                 )
                 if sample is not None:
                     # Save output as pickle
                     with open(tmp_dir / f"{split}_{i:05}.pkl", "wb") as f:
                         pickle.dump(sample, f)
+            features = get_hf_object_detection_features(is_openfoodfacts_dataset)
             hf_ds = datasets.Dataset.from_generator(
                 functools.partial(_pickle_sample_generator, tmp_dir),
-                features=HF_DS_OBJECT_DETECTION_FEATURES,
+                features=features,
             )
             hf_ds.push_to_hub(repo_id, split=split, revision=revision)
@@ -93,12 +113,32 @@ def export_from_ls_to_ultralytics_object_detection(
     error_raise: bool = True,
     merge_labels: bool = False,
     use_aws_cache: bool = True,
+    view_id: int | None = None,
+    image_max_size: int | None = None,
 ):
     """Export annotations from a Label Studio project to the Ultralytics
     format.
     The Label Studio project should be an object detection project with a
     single rectanglelabels annotation result per task.
+    Args:
+        ls (LabelStudio): Label Studio client instance.
+        output_dir (Path): Path to the output directory.
+        label_names (list[str]): List of label names in the project.
+        project_id (int): Label Studio project ID to export from.
+        train_ratio (float): Ratio of training samples. The rest will be used
+            for validation. Defaults to 0.8.
+        error_raise (bool): Whether to raise an error if an image fails to
+            download. If False, the image will be skipped. Defaults to True.
+        merge_labels (bool): Whether to merge all labels into a single label
+            named "object". Defaults to False.
+        use_aws_cache (bool): Whether to use the AWS image cache when
+            downloading images. Defaults to True.
+        view_id (int | None): Label Studio view ID to export from. If None,
+            all tasks are exported. Defaults to None.
+        image_max_size (int | None): Maximum size (in pixels) for the images.
+            If None, no resizing is performed. Defaults to None.
     """
     if merge_labels:
         label_names = ["object"]
@@ -116,7 +156,7 @@ def export_from_ls_to_ultralytics_object_detection(
         (images_dir / split).mkdir(parents=True, exist_ok=True)
     for task in tqdm.tqdm(
-        ls.tasks.list(project=project_id, fields="all"),
+        ls.tasks.list(project=project_id, fields="all", view=view_id),
         desc="tasks",
     ):
         split = task.data.get("split")
@@ -194,18 +234,28 @@ def export_from_ls_to_ultralytics_object_detection(
                     has_valid_annotation = True
         if has_valid_annotation:
-            download_output = download_image(
+            image = download_image(
                 image_url,
-                return_struct=True,
+                return_struct=False,
                 error_raise=error_raise,
                 use_cache=use_aws_cache,
             )
-            if download_output is None:
+            if image is None:
                 logger.error("Failed to download image: %s", image_url)
                 continue
-            with (images_dir / split / f"{image_id}.jpg").open("wb") as f:
-                f.write(download_output.image_bytes)
+            image = typing.cast(Image.Image, image)
+            # Rotate image according to exif orientation using Pillow
+            ImageOps.exif_transpose(image, in_place=True)
+            # Resize image if larger than max size
+            if image_max_size is not None and (
+                image.width > image_max_size or image.height > image_max_size
+            ):
+                image.thumbnail(
+                    (image_max_size, image_max_size), Image.Resampling.LANCZOS
+                )
+            image.save(images_dir / split / f"{image_id}.jpg", format="JPEG")
     with (output_dir / "data.yaml").open("w") as f:
         f.write("path: data\n")
@@ -223,6 +273,7 @@ def export_from_hf_to_ultralytics_object_detection(
     download_images: bool = True,
     error_raise: bool = True,
     use_aws_cache: bool = True,
+    image_max_size: int | None = None,
     revision: str = "main",
 ):
     """Export annotations from a Hugging Face dataset project to the
@@ -243,6 +294,8 @@ def export_from_hf_to_ultralytics_object_detection(
         use_aws_cache (bool): Whether to use the AWS image cache when
             downloading images. This option is only used if `download_images`
             is True. Defaults to True.
+        image_max_size (int | None): Maximum size (in pixels) for the images.
+            If None, no resizing is performed. Defaults to None.
         revision (str): The dataset revision to load. Defaults to 'main'.
     """
     logger.info("Repo ID: %s, revision: %s", repo_id, revision)
@@ -278,21 +331,31 @@ def export_from_hf_to_ultralytics_object_detection(
                         "`download_images` to False."
                     )
                 image_url = sample["meta"]["image_url"]
-                download_output = download_image(
+                image = download_image(
                     image_url,
-                    return_struct=True,
+                    return_struct=False,
                     error_raise=error_raise,
                     use_cache=use_aws_cache,
                 )
-                if download_output is None:
+                if image is None:
                     logger.error("Failed to download image: %s", image_url)
                     continue
-                with (split_images_dir / f"{image_id}.jpg").open("wb") as f:
-                    f.write(download_output.image_bytes)
             else:
                 image = sample["image"]
-                image.save(split_images_dir / f"{image_id}.jpg")
+            image = typing.cast(Image.Image, image)
+            # Rotate image according to exif orientation using Pillow
+            # If the image source is Hugging Face, EXIF data is not preserved,
+            # so this step is only useful when downloading images.
+            ImageOps.exif_transpose(image, in_place=True)
+            # Resize image if larger than max size
+            if image_max_size is not None and (
+                image.width > image_max_size or image.height > image_max_size
+            ):
+                image.thumbnail(
+                    (image_max_size, image_max_size), Image.Resampling.LANCZOS
+                )
+            image.save(split_images_dir / f"{image_id}.jpg")
             objects = sample["objects"]
             bboxes = objects["bbox"]
@@ -335,186 +398,3 @@ def export_from_hf_to_ultralytics_object_detection(
         f.write("names:\n")
         for i, category_name in enumerate(category_names):
             f.write(f"  {i}: {category_name}\n")
-def export_from_ultralytics_to_hf(
-    task_type: TaskType,
-    dataset_dir: Path,
-    repo_id: str,
-    label_names: list[str],
-    merge_labels: bool = False,
-    is_openfoodfacts_dataset: bool = False,
-    openfoodfacts_flavor: Flavor = Flavor.off,
-) -> None:
-    if task_type != TaskType.classification:
-        raise NotImplementedError(
-            "Only classification task is currently supported for Ultralytics to HF export"
-        )
-    if task_type == TaskType.classification:
-        export_from_ultralytics_to_hf_classification(
-            dataset_dir=dataset_dir,
-            repo_id=repo_id,
-            label_names=label_names,
-            merge_labels=merge_labels,
-            is_openfoodfacts_dataset=is_openfoodfacts_dataset,
-            openfoodfacts_flavor=openfoodfacts_flavor,
-        )
-def export_from_ultralytics_to_hf_classification(
-    dataset_dir: Path,
-    repo_id: str,
-    label_names: list[str],
-    merge_labels: bool = False,
-    is_openfoodfacts_dataset: bool = False,
-    openfoodfacts_flavor: Flavor = Flavor.off,
-) -> None:
-    """Export an Ultralytics classification dataset to a Hugging Face dataset.
-    The Ultralytics dataset directory should contain 'train', 'val' and/or
-    'test' subdirectories, each containing subdirectories for each label.
-    Args:
-        dataset_dir (Path): Path to the Ultralytics dataset directory.
-        repo_id (str): Hugging Face repository ID to push the dataset to.
-        label_names (list[str]): List of label names.
-        merge_labels (bool): Whether to merge all labels into a single label
-            named 'object'.
-        is_openfoodfacts_dataset (bool): Whether the dataset is from
-            Open Food Facts. If True, the `off_image_id` and `image_url` will
-            be generated automatically. `off_image_id` is extracted from the
-            image filename.
-        openfoodfacts_flavor (Flavor): Flavor of Open Food Facts dataset. This
-            is ignored if `is_openfoodfacts_dataset` is False.
-    """
-    logger.info("Repo ID: %s, dataset_dir: %s", repo_id, dataset_dir)
-    if not any((dataset_dir / split).is_dir() for split in ["train", "val", "test"]):
-        raise ValueError(
-            f"Dataset directory {dataset_dir} does not contain 'train', 'val' or 'test' subdirectories"
-        )
-    # Save output as pickle
-    for split in ["train", "val", "test"]:
-        split_dir = dataset_dir / split
-        if not split_dir.is_dir():
-            logger.info("Skipping missing split directory: %s", split_dir)
-            continue
-        with tempfile.TemporaryDirectory() as tmp_dir_str:
-            tmp_dir = Path(tmp_dir_str)
-            for label_dir in (d for d in split_dir.iterdir() if d.is_dir()):
-                label_name = label_dir.name
-                if merge_labels:
-                    label_name = "object"
-                if label_name not in label_names:
-                    raise ValueError(
-                        "Label name %s not in provided label names (label names: %s)"
-                        % (label_name, label_names),
-                    )
-                label_id = label_names.index(label_name)
-                for image_path in label_dir.glob("*"):
-                    if is_openfoodfacts_dataset:
-                        image_stem_parts = image_path.stem.split("_")
-                        barcode = image_stem_parts[0]
-                        off_image_id = image_stem_parts[1]
-                        image_id = f"{barcode}_{off_image_id}"
-                        image_url = generate_image_url(
-                            barcode, off_image_id, flavor=openfoodfacts_flavor
-                        )
-                    else:
-                        image_id = image_path.stem
-                        barcode = ""
-                        off_image_id = ""
-                        image_url = ""
-                    image = Image.open(image_path)
-                    image.load()
-                    if image.mode != "RGB":
-                        image = image.convert("RGB")
-                    # Rotate image according to exif orientation using Pillow
-                    ImageOps.exif_transpose(image, in_place=True)
-                    sample = {
-                        "image_id": image_id,
-                        "image": image,
-                        "width": image.width,
-                        "height": image.height,
-                        "meta": {
-                            "barcode": barcode,
-                            "off_image_id": off_image_id,
-                            "image_url": image_url,
-                        },
-                        "category_id": label_id,
-                        "category_name": label_name,
-                    }
-                    with open(tmp_dir / f"{split}_{image_id}.pkl", "wb") as f:
-                        pickle.dump(sample, f)
-            hf_ds = datasets.Dataset.from_generator(
-                functools.partial(_pickle_sample_generator, tmp_dir),
-                features=HF_DS_CLASSIFICATION_FEATURES,
-            )
-            hf_ds.push_to_hub(repo_id, split=split)
-def export_to_hf_llm_image_extraction(
-    sample_iter: Iterator[LLMImageExtractionSample],
-    split: str,
-    repo_id: str,
-    revision: str = "main",
-    tmp_dir: Path | None = None,
-) -> None:
-    """Export LLM image extraction samples to a Hugging Face dataset.
-    Args:
-        sample_iter (Iterator[LLMImageExtractionSample]): Iterator of samples
-            to export.
-        split (str): Name of the dataset split (e.g., 'train', 'val').
-        repo_id (str): Hugging Face repository ID to push the dataset to.
-        revision (str): Revision (branch, tag or commit) to use for the
-            Hugging Face Datasets repository.
-        tmp_dir (Path | None): Temporary directory to use for intermediate
-            files. If None, a temporary directory will be created
-            automatically.
-    """
-    logger.info(
-        "Repo ID: %s, revision: %s, split: %s, tmp_dir: %s",
-        repo_id,
-        revision,
-        split,
-        tmp_dir,
-    )
-    tmp_dir_with_context: PathWithContext | tempfile.TemporaryDirectory
-    if tmp_dir:
-        tmp_dir.mkdir(parents=True, exist_ok=True)
-        tmp_dir_with_context = PathWithContext(tmp_dir)
-    else:
-        tmp_dir_with_context = tempfile.TemporaryDirectory()
-    with tmp_dir_with_context as tmp_dir_str:
-        tmp_dir = Path(tmp_dir_str)
-        for sample in tqdm.tqdm(sample_iter, desc="samples"):
-            image = sample.image
-            # Rotate image according to exif orientation using Pillow
-            image = ImageOps.exif_transpose(image)
-            image_id = sample.image_id
-            sample = {
-                "image_id": image_id,
-                "image": image,
-                "meta": sample.meta.model_dump(),
-                "output": sample.output,
-            }
-            # Save output as pickle
-            with open(tmp_dir / f"{split}_{image_id}.pkl", "wb") as f:
-                pickle.dump(sample, f)
-        hf_ds = datasets.Dataset.from_generator(
-            functools.partial(_pickle_sample_generator, tmp_dir),
-            features=HF_DS_LLM_IMAGE_EXTRACTION_FEATURES,
-        )
-        hf_ds.push_to_hub(repo_id, split=split, revision=revision)

labelr/google_genai.py CHANGED Viewed

@@ -11,10 +11,11 @@ import orjson
 import typer
 from gcloud.aio.storage import Storage
 from openfoodfacts import Flavor
-from openfoodfacts.images import download_image, generate_image_url
+from openfoodfacts.images import generate_image_url
 from tqdm.asyncio import tqdm
-from labelr.sample import LLMImageExtractionSample, SampleMeta
+from labelr.sample.common import SampleMeta
+from labelr.sample.llm import LLMImageExtractionSample
 from labelr.utils import download_image_from_gcs
 try:
@@ -335,6 +336,7 @@ def generate_sample_iter(
     """
     skipped = 0
     invalid = 0
+    storage_client = storage.Client()
     with prediction_path.open("r") as f_in:
         for i, sample_str in enumerate(f_in):
             if i < skip:
@@ -349,6 +351,7 @@ def generate_sample_iter(
                     sample=sample,
                     is_openfoodfacts_dataset=is_openfoodfacts_dataset,
                     openfoodfacts_flavor=openfoodfacts_flavor,
+                    storage_client=storage_client,
                 )
             except Exception as e:
                 if raise_on_invalid_sample:
@@ -370,6 +373,7 @@ def generate_sample_from_prediction(
     sample: JSONType,
     is_openfoodfacts_dataset: bool = False,
     openfoodfacts_flavor: Flavor = Flavor.off,
+    storage_client: storage.Client | None = None,
 ) -> LLMImageExtractionSample:
     """Generate a LLMImageExtractionSample from a prediction sample.
     Args:
@@ -378,13 +382,15 @@ def generate_sample_from_prediction(
         is_openfoodfacts_dataset (bool): Whether the dataset is from Open Food
             Facts.
         openfoodfacts_flavor (Flavor): Flavor of the Open Food Facts dataset.
+        storage_client (storage.Client | None): Optional Google Cloud Storage
+            client. If not provided, a new client will be created.
     Returns:
         LLMImageExtractionSample: Generated sample.
     """
     image_id = sample["key"][len("key:") :]
     response_str = sample["response"]["candidates"][0]["content"]["parts"][0]["text"]
     image_uri = sample["request"]["contents"][0]["parts"][1]["file_data"]["file_uri"]
-    image = download_image_from_gcs(image_uri=image_uri)
+    image = download_image_from_gcs(image_uri=image_uri, client=storage_client)
     response = orjson.loads(response_str)
     jsonschema.validate(response, json_schema)

labelr/main.py CHANGED Viewed

@@ -4,11 +4,13 @@ import typer
 from openfoodfacts.utils import get_logger
 from labelr.apps import datasets as dataset_app
+from labelr.apps import directus as directus_app
 from labelr.apps import evaluate as evaluate_app
 from labelr.apps import google_batch as google_batch_app
 from labelr.apps import hugging_face as hf_app
 from labelr.apps import label_studio as ls_app
 from labelr.apps import train as train_app
+from labelr import config as _config
 app = typer.Typer(pretty_exceptions_show_locals=False)
@@ -60,6 +62,17 @@ def predict(
         typer.echo(result)
+@app.command()
+def config(name: str, value: str):
+    """Set a Labelr configuration value.
+    The configuration is stored in a JSON file at ~/.config/.labelr/config.json.
+    """
+    typer.echo(f"Set '{name}' to '{value}'")
+    _config.set_file_config(name, value)
+    typer.echo(f"Configuration saved to {_config.CONFIG_PATH}")
 app.add_typer(
     ls_app.app,
     name="ls",
@@ -90,6 +103,9 @@ app.add_typer(
     name="google-batch",
     help="Generate datasets and launch batch jobs on Google Gemini.",
 )
+app.add_typer(
+    directus_app.app, name="directus", help="Manage directus collections and items."
+)
 if __name__ == "__main__":
     app()

labelr/sample/__init__.py ADDED Viewed

File without changes

labelr/sample/classification.py ADDED Viewed

@@ -0,0 +1,17 @@
+import datasets
+HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
+    {
+        "image_id": datasets.Value("string"),
+        "image": datasets.features.Image(),
+        "width": datasets.Value("int64"),
+        "height": datasets.Value("int64"),
+        "meta": {
+            "barcode": datasets.Value("string"),
+            "off_image_id": datasets.Value("string"),
+            "image_url": datasets.Value("string"),
+        },
+        "category_id": datasets.Value("int64"),
+        "category_name": datasets.Value("string"),
+    }
+)

labelr/sample/common.py ADDED Viewed

@@ -0,0 +1,14 @@
+from pydantic import BaseModel, Field
+class SampleMeta(BaseModel):
+    barcode: str | None = Field(
+        ..., description="The barcode of the product, if applicable"
+    )
+    off_image_id: str | None = Field(
+        ...,
+        description="The Open Food Facts image ID associated with the image, if applicable",
+    )
+    image_url: str | None = Field(
+        ..., description="The URL of the image, if applicable"
+    )

labelr/sample/llm.py ADDED Viewed

@@ -0,0 +1,75 @@
+import typing
+from collections.abc import Iterator
+from pathlib import Path
+import datasets
+import orjson
+from PIL import Image
+from pydantic import BaseModel, Field
+from labelr.sample.common import SampleMeta
+from labelr.utils import download_image
+class LLMImageExtractionSample(BaseModel):
+    class Config:
+        # required to allow PIL Image type
+        arbitrary_types_allowed = True
+    image_id: str = Field(
+        ...,
+        description="unique ID for the image. For Open Food Facts images, it follows the "
+        "format `barcode:imgid`",
+    )
+    image: Image.Image = Field(..., description="Image to extract information from")
+    output: str | None = Field(..., description="Expected response of the LLM")
+    meta: SampleMeta = Field(..., description="Metadata associated with the sample")
+HF_DS_LLM_IMAGE_EXTRACTION_FEATURES = datasets.Features(
+    {
+        "image_id": datasets.Value("string"),
+        "image": datasets.features.Image(),
+        "output": datasets.features.Value("string"),
+        "meta": {
+            "barcode": datasets.Value("string"),
+            "off_image_id": datasets.Value("string"),
+            "image_url": datasets.Value("string"),
+        },
+    }
+)
+def load_llm_image_extraction_dataset_from_jsonl(
+    dataset_path: Path, **kwargs
+) -> Iterator[LLMImageExtractionSample]:
+    """Load a Hugging Face dataset for LLM image extraction from a JSONL file.
+    Args:
+        dataset_path (Path): Path to the JSONL dataset file.
+        **kwargs: Additional keyword arguments to pass to the image downloader.
+    Yields:
+        Iterator[LLMImageExtractionSample]: Iterator of LLM image extraction
+            samples.
+    """
+    with dataset_path.open("r") as f:
+        for line in f:
+            item = orjson.loads(line)
+            image_id = item["image_id"]
+            image_url = item["image_url"]
+            image = typing.cast(Image.Image, download_image(image_url, **kwargs))
+            barcode = item.pop("barcode", None)
+            off_image_id = item.pop("off_image_id", None)
+            output = item.pop("output", None)
+            meta = SampleMeta(
+                barcode=barcode,
+                off_image_id=off_image_id,
+                image_url=image_url,
+            )
+            sample = LLMImageExtractionSample(
+                image_id=image_id,
+                image=image,
+                output=output,
+                meta=meta,
+            )
+            yield sample

labelr 0.9.0__py3-none-any.whl → 0.11.0__py3-none-any.whl

labelr 0.9.0py3-none-any.whl → 0.11.0py3-none-any.whl