labelr 0.8.0__py3-none-any.whl → 0.10.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labelr/sample/llm.py ADDED
@@ -0,0 +1,75 @@
1
+ import typing
2
+ from collections.abc import Iterator
3
+ from pathlib import Path
4
+
5
+ import datasets
6
+ import orjson
7
+ from PIL import Image
8
+ from pydantic import BaseModel, Field
9
+
10
+ from labelr.sample.common import SampleMeta
11
+ from labelr.utils import download_image
12
+
13
+
14
+ class LLMImageExtractionSample(BaseModel):
15
+ class Config:
16
+ # required to allow PIL Image type
17
+ arbitrary_types_allowed = True
18
+
19
+ image_id: str = Field(
20
+ ...,
21
+ description="unique ID for the image. For Open Food Facts images, it follows the "
22
+ "format `barcode:imgid`",
23
+ )
24
+ image: Image.Image = Field(..., description="Image to extract information from")
25
+ output: str | None = Field(..., description="Expected response of the LLM")
26
+ meta: SampleMeta = Field(..., description="Metadata associated with the sample")
27
+
28
+
29
+ HF_DS_LLM_IMAGE_EXTRACTION_FEATURES = datasets.Features(
30
+ {
31
+ "image_id": datasets.Value("string"),
32
+ "image": datasets.features.Image(),
33
+ "output": datasets.features.Value("string"),
34
+ "meta": {
35
+ "barcode": datasets.Value("string"),
36
+ "off_image_id": datasets.Value("string"),
37
+ "image_url": datasets.Value("string"),
38
+ },
39
+ }
40
+ )
41
+
42
+
43
+ def load_llm_image_extraction_dataset_from_jsonl(
44
+ dataset_path: Path, **kwargs
45
+ ) -> Iterator[LLMImageExtractionSample]:
46
+ """Load a Hugging Face dataset for LLM image extraction from a JSONL file.
47
+
48
+ Args:
49
+ dataset_path (Path): Path to the JSONL dataset file.
50
+ **kwargs: Additional keyword arguments to pass to the image downloader.
51
+ Yields:
52
+ Iterator[LLMImageExtractionSample]: Iterator of LLM image extraction
53
+ samples.
54
+ """
55
+ with dataset_path.open("r") as f:
56
+ for line in f:
57
+ item = orjson.loads(line)
58
+ image_id = item["image_id"]
59
+ image_url = item["image_url"]
60
+ image = typing.cast(Image.Image, download_image(image_url, **kwargs))
61
+ barcode = item.pop("barcode", None)
62
+ off_image_id = item.pop("off_image_id", None)
63
+ output = item.pop("output", None)
64
+ meta = SampleMeta(
65
+ barcode=barcode,
66
+ off_image_id=off_image_id,
67
+ image_url=image_url,
68
+ )
69
+ sample = LLMImageExtractionSample(
70
+ image_id=image_id,
71
+ image=image,
72
+ output=output,
73
+ meta=meta,
74
+ )
75
+ yield sample
@@ -249,20 +249,3 @@ HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
249
249
  },
250
250
  }
251
251
  )
252
-
253
-
254
- HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
255
- {
256
- "image_id": datasets.Value("string"),
257
- "image": datasets.features.Image(),
258
- "width": datasets.Value("int64"),
259
- "height": datasets.Value("int64"),
260
- "meta": {
261
- "barcode": datasets.Value("string"),
262
- "off_image_id": datasets.Value("string"),
263
- "image_url": datasets.Value("string"),
264
- },
265
- "category_id": datasets.Value("int64"),
266
- "category_name": datasets.Value("string"),
267
- }
268
- )
labelr/utils.py CHANGED
@@ -1,3 +1,12 @@
1
+ import io
2
+ from pathlib import Path
3
+
4
+ from google.cloud import storage
5
+ from openfoodfacts.images import download_image as _download_image
6
+ from openfoodfacts.utils import ImageDownloadItem
7
+ from PIL import Image
8
+
9
+
1
10
  def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
2
11
  """Parse the repo_id and the revision from a hf_repo_id in the format:
3
12
  `org/repo-name@revision`.
@@ -11,3 +20,79 @@ def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
11
20
  revision = "main"
12
21
 
13
22
  return hf_repo_id, revision
23
+
24
+
25
+ def download_image(
26
+ image: str | tuple[str, str],
27
+ *,
28
+ error_raise: bool = True,
29
+ return_struct: bool = False,
30
+ **kwargs,
31
+ ) -> Image.Image | ImageDownloadItem | None:
32
+ """Download an image from a URL or GCS URI and return it as a PIL Image.
33
+ Args:
34
+ image (str | tuple[str, str]): The URL or GCS URI of the image.
35
+ error_raise (bool): Whether to raise an error if the image cannot be
36
+ downloaded.
37
+ return_struct (bool): Whether to return an ImageDownloadItem struct
38
+ instead of a PIL Image.
39
+ **kwargs: Additional arguments to pass to the download function.
40
+ Returns:
41
+ Image.Image | ImageDownloadItem: The downloaded image as a PIL Image
42
+ or an ImageDownloadItem struct.
43
+ """
44
+ if isinstance(image, str) and image.startswith("gs://"):
45
+ return download_image_from_gcs(image, return_struct=return_struct, **kwargs)
46
+ return _download_image(
47
+ image,
48
+ error_raise=error_raise,
49
+ return_struct=return_struct,
50
+ **kwargs,
51
+ )
52
+
53
+
54
+ def download_image_from_gcs(
55
+ image_uri: str, client: storage.Client | None = None, return_struct: bool = False
56
+ ) -> Image.Image | ImageDownloadItem:
57
+ """Download an image from a Google Cloud Storage URI and return it as a
58
+ PIL Image.
59
+
60
+ Args:
61
+ image_uri (str): The GCS URI of the image
62
+ (e.g., gs://bucket_name/path/to/image.jpg).
63
+ client (storage.Client | None): An optional Google Cloud Storage
64
+ client. If not provided, a new client will be created.
65
+ """
66
+ if client is None:
67
+ client = storage.Client()
68
+
69
+ bucket_name, blob_name = image_uri.replace("gs://", "").split("/", 1)
70
+ bucket = client.bucket(bucket_name)
71
+ blob = bucket.blob(blob_name)
72
+ image_data = blob.download_as_bytes()
73
+ pil_image = Image.open(io.BytesIO(image_data))
74
+
75
+ if return_struct:
76
+ return ImageDownloadItem(
77
+ url=image_uri,
78
+ image=pil_image,
79
+ error=None,
80
+ )
81
+ return pil_image
82
+
83
+
84
+ class PathWithContext:
85
+ """A context manager that yields a Path object.
86
+
87
+ This is useful to have a common interface with tempfile.TemporaryDirectory
88
+ without actually creating a temporary directory.
89
+ """
90
+
91
+ def __init__(self, path: Path):
92
+ self.path = path
93
+
94
+ def __enter__(self) -> Path:
95
+ return self.path
96
+
97
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
98
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: labelr
3
- Version: 0.8.0
3
+ Version: 0.10.0
4
4
  Summary: A command-line tool to manage labeling tasks with Label Studio.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -13,6 +13,14 @@ Requires-Dist: openfoodfacts>=2.9.0
13
13
  Requires-Dist: typer>=0.15.1
14
14
  Requires-Dist: google-cloud-batch==0.18.0
15
15
  Requires-Dist: huggingface-hub
16
+ Requires-Dist: deepdiff>=8.6.1
17
+ Requires-Dist: rapidfuzz>=3.14.3
18
+ Requires-Dist: aiohttp
19
+ Requires-Dist: aiofiles
20
+ Requires-Dist: orjson
21
+ Requires-Dist: google-cloud-storage
22
+ Requires-Dist: gcloud-aio-storage
23
+ Requires-Dist: google-genai>=1.56.0
16
24
  Provides-Extra: ultralytics
17
25
  Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
18
26
  Provides-Extra: fiftyone
@@ -0,0 +1,36 @@
1
+ labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
3
+ labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
4
+ labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
5
+ labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
6
+ labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
7
+ labelr/google_genai.py,sha256=x5p98eYoI887QMBDgziFxEW9WNdZ8Cw0EHjAFQ71SaE,14728
8
+ labelr/main.py,sha256=OTiJSkD_TrzQmQQm291FhknD-HQQTWfBEBgImxqL0KM,2634
9
+ labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
10
+ labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
11
+ labelr/utils.py,sha256=8Yp0L2MCIdUYSjvmF4U5iiaBpaZJbYw4rHJOMhCCudE,3075
12
+ labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
+ labelr/apps/datasets.py,sha256=tAD6TZSnwh7uhkleSfDP0PFqztXC1S3Vx2aMSVCFfRU,12725
14
+ labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
15
+ labelr/apps/google_batch.py,sha256=Mlz5jRVcR1XzRJg2HLte3rIhiOk4xQQjjLAJsc3lJjo,9572
16
+ labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
17
+ labelr/apps/label_studio.py,sha256=lQ7K16noA4Mnr1hc0oxya1sgGgABWnpIIJTM5ENp7so,16869
18
+ labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
19
+ labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
+ labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
21
+ labelr/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ labelr/export/classification.py,sha256=rnm99vGMJy1UkdXiZ8t_TgFe3CyLBBYowWwzaZeniIs,4699
23
+ labelr/export/common.py,sha256=lJ-ZDOMKGpC48fCuEnIrA8sZBhXGZOcghBbsLM1h66o,1252
24
+ labelr/export/llm.py,sha256=Jlopi0EQ4YUWLe_s-kTFcISTzO1QmdX-qXQxayO6E-k,3186
25
+ labelr/export/object_detection.py,sha256=91ywkPago7WgbY2COQKpwjFLYAAsXeGOu7TkGHi17OU,12338
26
+ labelr/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
27
+ labelr/sample/classification.py,sha256=7Z5hvxG6q6wfJMYj00JWbRBhfjOyhjaL8fpJjgBi9N8,539
28
+ labelr/sample/common.py,sha256=f0XDS6s0z6Vw4G2FDELJ1VQSe5Tsh0q3-3VU9unK9eY,431
29
+ labelr/sample/llm.py,sha256=zAsI3TmfGCbBPv4_hNtYR4Np3yAmUDzXGAvlQLF6V6w,2474
30
+ labelr/sample/object_detection.py,sha256=XZasR_k4AxzsiWdVMC2ZnyjfA14PKJPrx1U-XPr5tWQ,8427
31
+ labelr-0.10.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
32
+ labelr-0.10.0.dist-info/METADATA,sha256=pS2Ipq-aICU3TluuqSNocGP5-V8ztLk6X_udwwnECPk,7243
33
+ labelr-0.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
34
+ labelr-0.10.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
35
+ labelr-0.10.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
36
+ labelr-0.10.0.dist-info/RECORD,,
@@ -1,27 +0,0 @@
1
- labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
3
- labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
4
- labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
5
- labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
6
- labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
7
- labelr/export.py,sha256=HpsPT3MjWqOHbkNG8hHVns21t-2Ej2nXQXxXOKc1TFA,17771
8
- labelr/main.py,sha256=hVaaCU1voUZwr681d7wkA7HPyaDB2MaXaw20Hw2_Kz8,2439
9
- labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
10
- labelr/sample.py,sha256=Q1Itfwsis1VivaqsmtKfcwojaY0xKJNZ_88M5zOGTY0,8951
11
- labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
12
- labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
13
- labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- labelr/apps/datasets.py,sha256=kJQWwm3mjA2uWIA8O_DslM7OS5ht5mgWqcFC_zF4gCo,11187
15
- labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
16
- labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
17
- labelr/apps/label_studio.py,sha256=su9shoi0K9PmI8RBLipV2KQf_MRjkF5vy5-JUcbXr5A,16852
18
- labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
19
- labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
- labelr/evaluate/llm.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
- labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
22
- labelr-0.8.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
23
- labelr-0.8.0.dist-info/METADATA,sha256=FNA9oU3P_BbEZ8PRidRBxvthfm8ywLt1wwo2qn2cOKo,7003
24
- labelr-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
25
- labelr-0.8.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
26
- labelr-0.8.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
27
- labelr-0.8.0.dist-info/RECORD,,
File without changes