PyPI - labelr - Versions diffs - 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl - Mend

labelr 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

labelr/apps/datasets.py +12 -25
labelr/apps/evaluate.py +41 -0
labelr/apps/google_batch.py +289 -0
labelr/apps/hugging_face.py +57 -0
labelr/apps/{projects.py → label_studio.py} +65 -9
labelr/apps/train.py +22 -4
labelr/evaluate/__init__.py +0 -0
labelr/evaluate/object_detection.py +100 -0
labelr/export.py +64 -7
labelr/google_genai.py +415 -0
labelr/main.py +23 -8
labelr/sample.py +72 -4
labelr/utils.py +35 -0
{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/METADATA +17 -6
labelr-0.9.0.dist-info/RECORD +28 -0
labelr/apps/users.py +0 -36
labelr-0.7.0.dist-info/RECORD +0 -23
{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/WHEEL +0 -0
{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/entry_points.txt +0 -0
{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/licenses/LICENSE +0 -0
{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/top_level.txt +0 -0

labelr/sample.py CHANGED Viewed

@@ -1,16 +1,20 @@
 import logging
 import random
 import string
+import typing
 import datasets
+import PIL
 from openfoodfacts import Flavor
 from openfoodfacts.barcode import normalize_barcode
 from openfoodfacts.images import download_image, generate_image_url
+from PIL import Image, ImageOps
+from pydantic import BaseModel, Field
 logger = logging.getLogger(__name__)
-def format_annotation_results_from_hf(
+def format_annotation_results_from_hf_to_ls(
     objects: dict, image_width: int, image_height: int
 ):
     """Format annotation results from a HF object detection dataset into Label
@@ -56,12 +60,12 @@ def format_annotation_results_from_hf(
     return annotation_results
-def format_object_detection_sample_from_hf(hf_sample: dict, split: str) -> dict:
+def format_object_detection_sample_from_hf_to_ls(hf_sample: dict, split: str) -> dict:
     hf_meta = hf_sample["meta"]
     objects = hf_sample["objects"]
     image_width = hf_sample["width"]
     image_height = hf_sample["height"]
-    annotation_results = format_annotation_results_from_hf(
+    annotation_results = format_annotation_results_from_hf_to_ls(
         objects, image_width, image_height
     )
     image_id = hf_sample["image_id"]
@@ -149,8 +153,24 @@ def format_object_detection_sample_to_hf(
     annotations: list[dict],
     label_names: list[str],
     merge_labels: bool = False,
-    use_aws_cache: bool = True,
+    use_aws_cache: bool = False,
 ) -> dict | None:
+    """Format a Label Studio object detection sample to Hugging Face format.
+    Args:
+        task_data: The task data from Label Studio.
+        annotations: The annotations from Label Studio.
+        label_names: The list of label names.
+        merge_labels: Whether to merge all labels into a single label (the
+            first label in `label_names`).
+        use_aws_cache: Whether to use AWS cache when downloading images.
+    Returns:
+        The formatted sample, or None in the following cases:
+        - More than one annotation is found
+        - No annotation is found
+        - An error occurs when downloading the image
+    """
     if len(annotations) > 1:
         logger.info("More than one annotation found, skipping")
         return None
@@ -186,6 +206,13 @@ def format_object_detection_sample_to_hf(
         logger.error("Failed to download image: %s", image_url)
         return None
+    # Correct image orientation using EXIF data
+    # Label Studio provides bounding boxes based on the displayed image (after
+    # eventual EXIF rotation), so we need to apply the same transformation to
+    # the image.
+    # Indeed, Hugging Face stores images without applying EXIF rotation, and
+    # EXIF data is not preserved in the dataset.
+    ImageOps.exif_transpose(typing.cast(PIL.Image.Image, image), in_place=True)
     return {
         "image_id": task_data["image_id"],
         "image": image,
@@ -204,6 +231,34 @@ def format_object_detection_sample_to_hf(
     }
+class SampleMeta(BaseModel):
+    barcode: str | None = Field(
+        ..., description="The barcode of the product, if applicable"
+    )
+    off_image_id: str | None = Field(
+        ...,
+        description="The Open Food Facts image ID associated with the image, if applicable",
+    )
+    image_url: str | None = Field(
+        ..., description="The URL of the image, if applicable"
+    )
+class LLMImageExtractionSample(BaseModel):
+    class Config:
+        # required to allow PIL Image type
+        arbitrary_types_allowed = True
+    image_id: str = Field(
+        ...,
+        description="unique ID for the image. For Open Food Facts images, it follows the "
+        "format `barcode:imgid`",
+    )
+    image: Image.Image = Field(..., description="Image to extract information from")
+    output: str = Field(..., description="Expected response of the LLM")
+    meta: SampleMeta = Field(..., description="Metadata associated with the sample")
 # The HuggingFace Dataset features
 HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
     {
@@ -240,3 +295,16 @@ HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
         "category_name": datasets.Value("string"),
     }
 )
+HF_DS_LLM_IMAGE_EXTRACTION_FEATURES = datasets.Features(
+    {
+        "image_id": datasets.Value("string"),
+        "image": datasets.features.Image(),
+        "output": datasets.features.Value("string"),
+        "meta": {
+            "barcode": datasets.Value("string"),
+            "off_image_id": datasets.Value("string"),
+            "image_url": datasets.Value("string"),
+        },
+    }
+)

labelr/utils.py CHANGED Viewed

@@ -1,3 +1,10 @@
+import io
+from pathlib import Path
+from google.cloud import storage
+from PIL import Image
 def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
     """Parse the repo_id and the revision from a hf_repo_id in the format:
     `org/repo-name@revision`.
@@ -11,3 +18,31 @@ def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
         revision = "main"
     return hf_repo_id, revision
+def download_image_from_gcs(image_uri: str) -> Image.Image:
+    """Download an image from a Google Cloud Storage URI and return it as a
+    PIL Image."""
+    storage_client = storage.Client()
+    bucket_name, blob_name = image_uri.replace("gs://", "").split("/", 1)
+    bucket = storage_client.bucket(bucket_name)
+    blob = bucket.blob(blob_name)
+    image_data = blob.download_as_bytes()
+    return Image.open(io.BytesIO(image_data))
+class PathWithContext:
+    """A context manager that yields a Path object.
+    This is useful to have a common interface with tempfile.TemporaryDirectory
+    without actually creating a temporary directory.
+    """
+    def __init__(self, path: Path):
+        self.path = path
+    def __enter__(self) -> Path:
+        return self.path
+    def __exit__(self, exc_type, exc_value, traceback) -> None:
+        pass

{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: labelr
-Version: 0.7.0
+Version: 0.9.0
 Summary: A command-line tool to manage labeling tasks with Label Studio.
 Requires-Python: >=3.10
 Description-Content-Type: text/markdown
@@ -13,10 +13,19 @@ Requires-Dist: openfoodfacts>=2.9.0
 Requires-Dist: typer>=0.15.1
 Requires-Dist: google-cloud-batch==0.18.0
 Requires-Dist: huggingface-hub
+Requires-Dist: deepdiff>=8.6.1
+Requires-Dist: rapidfuzz>=3.14.3
+Requires-Dist: aiohttp
+Requires-Dist: aiofiles
+Requires-Dist: orjson
 Provides-Extra: ultralytics
 Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
 Provides-Extra: fiftyone
 Requires-Dist: fiftyone~=1.10.0; extra == "fiftyone"
+Provides-Extra: google
+Requires-Dist: google-genai>=1.56.0; extra == "google"
+Requires-Dist: gcloud-aio-storage; extra == "google"
+Requires-Dist: google-cloud-storage; extra == "google"
 Dynamic: license-file
 # Labelr
@@ -73,7 +82,7 @@ Once you have a Label Studio instance running, you can create a project easily.
 For an object detection task, a command allows you to create the configuration file automatically:
 ```bash
-labelr projects create-config --labels 'label1' --labels 'label2' --output-file label_config.xml
+labelr ls create-config --labels 'label1' --labels 'label2' --output-file label_config.xml
 ```
 where `label1` and `label2` are the labels you want to use for the object detection task, and `label_config.xml` is the output file that will contain the configuration.
@@ -81,17 +90,19 @@ where `label1` and `label2` are the labels you want to use for the object detect
 Then, you can create a project on Label Studio with the following command:
 ```bash
-labelr projects create --title my_project --api-key API_KEY --config-file label_config.xml
+labelr ls create --title my_project --api-key API_KEY --config-file label_config.xml
 ```
 where `API_KEY` is the API key of the Label Studio instance (API key is available at Account page), and `label_config.xml` is the configuration file of the project.
+`ls` stands for Label Studio in the CLI.
 #### Create a dataset file
 If you have a list of images, for an object detection task, you can quickly create a dataset file with the following command:
 ```bash
-labelr projects create-dataset-file --input-file image_urls.txt --output-file dataset.json
+labelr ls create-dataset-file --input-file image_urls.txt --output-file dataset.json
 ```
 where `image_urls.txt` is a file containing the URLs of the images, one per line, and `dataset.json` is the output file.
@@ -101,7 +112,7 @@ where `image_urls.txt` is a file containing the URLs of the images, one per line
 Next, import the generated data to a project with the following command:
 ```bash
-labelr projects import-data --project-id PROJECT_ID --dataset-path dataset.json
+labelr ls import-data --project-id PROJECT_ID --dataset-path dataset.json
 ```
 where `PROJECT_ID` is the ID of the project you created.
@@ -117,7 +128,7 @@ To accelerate annotation, you can pre-annotate the images with an object detecti
 To pre-annotate the data with Triton, use the following command:
 ```bash
-labelr projects add-prediction --project-id PROJECT_ID --backend ultralytics --labels 'product' --labels 'price tag' --label-mapping '{"price tag": "price-tag"}'
+labelr ls add-prediction --project-id PROJECT_ID --backend ultralytics --labels 'product' --labels 'price tag' --label-mapping '{"price tag": "price-tag"}'
 ```
 where `labels` is the list of labels to use for the object detection task (you can add as many labels as you want).

labelr-0.9.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,28 @@
+labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
+labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
+labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
+labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
+labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
+labelr/export.py,sha256=aPfQ-RaK3C2WJrzbETYdC9kRe0MTpCRs0nu5l2SqiRg,20092
+labelr/google_genai.py,sha256=vn_UNQOxUDOTTTWz-emAVErjOtQmnlxM_m8yo2q01Ok,14401
+labelr/main.py,sha256=OTiJSkD_TrzQmQQm291FhknD-HQQTWfBEBgImxqL0KM,2634
+labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
+labelr/sample.py,sha256=VL-iKDvLaIeViJ0TaBY9uCbv0ey528fkaRTYE-Zr12I,10347
+labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
+labelr/utils.py,sha256=-zLOWLbvLwtNFtzzwZ6RjJD9GstoYR-gt4wz9r6u9lE,1363
+labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+labelr/apps/datasets.py,sha256=kJQWwm3mjA2uWIA8O_DslM7OS5ht5mgWqcFC_zF4gCo,11187
+labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
+labelr/apps/google_batch.py,sha256=BMcfBkDwfu-zOOR80bYmtEy6k_Qc70m7K7wmp4Ww0r8,9335
+labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
+labelr/apps/label_studio.py,sha256=su9shoi0K9PmI8RBLipV2KQf_MRjkF5vy5-JUcbXr5A,16852
+labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
+labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
+labelr-0.9.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
+labelr-0.9.0.dist-info/METADATA,sha256=cNkf4LPmbO_k3UuR7O7NtcCwRF-Z5c-yIyQRAocsjww,7322
+labelr-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+labelr-0.9.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
+labelr-0.9.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
+labelr-0.9.0.dist-info/RECORD,,

labelr/apps/users.py DELETED Viewed

@@ -1,36 +0,0 @@
-from typing import Annotated
-import typer
-from ..config import LABEL_STUDIO_DEFAULT_URL
-app = typer.Typer()
-# Label Studio user management
-@app.command()
-def list(
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-):
-    """List all users in Label Studio."""
-    from label_studio_sdk.client import LabelStudio
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    for user in ls.users.list():
-        print(f"{user.id:02d}: {user.email}")
-@app.command()
-def delete(
-    user_id: int,
-    api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
-    label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
-):
-    """Delete a user from Label Studio."""
-    from label_studio_sdk.client import LabelStudio
-    ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
-    ls.users.delete(user_id)

labelr-0.7.0.dist-info/RECORD DELETED Viewed

@@ -1,23 +0,0 @@
-labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
-labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
-labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
-labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
-labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
-labelr/export.py,sha256=gjC2_RJ_yX8zVYXyo1RAgI07iXSgkeqckOTEzSscRXc,17940
-labelr/main.py,sha256=CioMPtaPoGL_5Oxwj8PfalhTyFahMbfp2kd9KdZzm3Y,2258
-labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
-labelr/sample.py,sha256=unu9AQ64FhKPgssuL7gb3qyMd1EQJvMOfqvjdefmWOU,7807
-labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
-labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
-labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-labelr/apps/datasets.py,sha256=4PMfKS5c7Zw3-NNRBkFbZidMQUI2RBMcXFYBvWHLz3o,11688
-labelr/apps/projects.py,sha256=HpgqIaPrUQzIR7eOLn4EBbEzXRi7hoWStT4jLMQPcBg,15153
-labelr/apps/train.py,sha256=sI0p3h39LPXhynwl_yMuZnIPlaqlcWSO_81zPC3H3yI,6886
-labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
-labelr-0.7.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
-labelr-0.7.0.dist-info/METADATA,sha256=NghQ_6mNj1Dkets_GlOOOyoAVEQqoPBbbJXhysOKAWI,6991
-labelr-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-labelr-0.7.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
-labelr-0.7.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
-labelr-0.7.0.dist-info/RECORD,,

{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{labelr-0.7.0.dist-info → labelr-0.9.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

labelr 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

labelr 0.7.0py3-none-any.whl → 0.9.0py3-none-any.whl