labelr 0.8.0__py3-none-any.whl → 0.10.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labelr/apps/datasets.py +56 -5
- labelr/apps/google_batch.py +296 -0
- labelr/apps/label_studio.py +1 -1
- labelr/export/classification.py +114 -0
- labelr/export/common.py +42 -0
- labelr/export/llm.py +91 -0
- labelr/{export.py → export/object_detection.py} +3 -138
- labelr/google_genai.py +421 -0
- labelr/main.py +6 -0
- labelr/sample/__init__.py +0 -0
- labelr/sample/classification.py +17 -0
- labelr/sample/common.py +14 -0
- labelr/sample/llm.py +75 -0
- labelr/{sample.py → sample/object_detection.py} +0 -17
- labelr/utils.py +85 -0
- {labelr-0.8.0.dist-info → labelr-0.10.0.dist-info}/METADATA +9 -1
- labelr-0.10.0.dist-info/RECORD +36 -0
- labelr-0.8.0.dist-info/RECORD +0 -27
- /labelr/{evaluate/llm.py → export/__init__.py} +0 -0
- {labelr-0.8.0.dist-info → labelr-0.10.0.dist-info}/WHEEL +0 -0
- {labelr-0.8.0.dist-info → labelr-0.10.0.dist-info}/entry_points.txt +0 -0
- {labelr-0.8.0.dist-info → labelr-0.10.0.dist-info}/licenses/LICENSE +0 -0
- {labelr-0.8.0.dist-info → labelr-0.10.0.dist-info}/top_level.txt +0 -0
labelr/sample/llm.py
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import typing
|
|
2
|
+
from collections.abc import Iterator
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
|
|
5
|
+
import datasets
|
|
6
|
+
import orjson
|
|
7
|
+
from PIL import Image
|
|
8
|
+
from pydantic import BaseModel, Field
|
|
9
|
+
|
|
10
|
+
from labelr.sample.common import SampleMeta
|
|
11
|
+
from labelr.utils import download_image
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class LLMImageExtractionSample(BaseModel):
|
|
15
|
+
class Config:
|
|
16
|
+
# required to allow PIL Image type
|
|
17
|
+
arbitrary_types_allowed = True
|
|
18
|
+
|
|
19
|
+
image_id: str = Field(
|
|
20
|
+
...,
|
|
21
|
+
description="unique ID for the image. For Open Food Facts images, it follows the "
|
|
22
|
+
"format `barcode:imgid`",
|
|
23
|
+
)
|
|
24
|
+
image: Image.Image = Field(..., description="Image to extract information from")
|
|
25
|
+
output: str | None = Field(..., description="Expected response of the LLM")
|
|
26
|
+
meta: SampleMeta = Field(..., description="Metadata associated with the sample")
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
HF_DS_LLM_IMAGE_EXTRACTION_FEATURES = datasets.Features(
|
|
30
|
+
{
|
|
31
|
+
"image_id": datasets.Value("string"),
|
|
32
|
+
"image": datasets.features.Image(),
|
|
33
|
+
"output": datasets.features.Value("string"),
|
|
34
|
+
"meta": {
|
|
35
|
+
"barcode": datasets.Value("string"),
|
|
36
|
+
"off_image_id": datasets.Value("string"),
|
|
37
|
+
"image_url": datasets.Value("string"),
|
|
38
|
+
},
|
|
39
|
+
}
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def load_llm_image_extraction_dataset_from_jsonl(
|
|
44
|
+
dataset_path: Path, **kwargs
|
|
45
|
+
) -> Iterator[LLMImageExtractionSample]:
|
|
46
|
+
"""Load a Hugging Face dataset for LLM image extraction from a JSONL file.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
dataset_path (Path): Path to the JSONL dataset file.
|
|
50
|
+
**kwargs: Additional keyword arguments to pass to the image downloader.
|
|
51
|
+
Yields:
|
|
52
|
+
Iterator[LLMImageExtractionSample]: Iterator of LLM image extraction
|
|
53
|
+
samples.
|
|
54
|
+
"""
|
|
55
|
+
with dataset_path.open("r") as f:
|
|
56
|
+
for line in f:
|
|
57
|
+
item = orjson.loads(line)
|
|
58
|
+
image_id = item["image_id"]
|
|
59
|
+
image_url = item["image_url"]
|
|
60
|
+
image = typing.cast(Image.Image, download_image(image_url, **kwargs))
|
|
61
|
+
barcode = item.pop("barcode", None)
|
|
62
|
+
off_image_id = item.pop("off_image_id", None)
|
|
63
|
+
output = item.pop("output", None)
|
|
64
|
+
meta = SampleMeta(
|
|
65
|
+
barcode=barcode,
|
|
66
|
+
off_image_id=off_image_id,
|
|
67
|
+
image_url=image_url,
|
|
68
|
+
)
|
|
69
|
+
sample = LLMImageExtractionSample(
|
|
70
|
+
image_id=image_id,
|
|
71
|
+
image=image,
|
|
72
|
+
output=output,
|
|
73
|
+
meta=meta,
|
|
74
|
+
)
|
|
75
|
+
yield sample
|
|
@@ -249,20 +249,3 @@ HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
|
|
|
249
249
|
},
|
|
250
250
|
}
|
|
251
251
|
)
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
|
|
255
|
-
{
|
|
256
|
-
"image_id": datasets.Value("string"),
|
|
257
|
-
"image": datasets.features.Image(),
|
|
258
|
-
"width": datasets.Value("int64"),
|
|
259
|
-
"height": datasets.Value("int64"),
|
|
260
|
-
"meta": {
|
|
261
|
-
"barcode": datasets.Value("string"),
|
|
262
|
-
"off_image_id": datasets.Value("string"),
|
|
263
|
-
"image_url": datasets.Value("string"),
|
|
264
|
-
},
|
|
265
|
-
"category_id": datasets.Value("int64"),
|
|
266
|
-
"category_name": datasets.Value("string"),
|
|
267
|
-
}
|
|
268
|
-
)
|
labelr/utils.py
CHANGED
|
@@ -1,3 +1,12 @@
|
|
|
1
|
+
import io
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from google.cloud import storage
|
|
5
|
+
from openfoodfacts.images import download_image as _download_image
|
|
6
|
+
from openfoodfacts.utils import ImageDownloadItem
|
|
7
|
+
from PIL import Image
|
|
8
|
+
|
|
9
|
+
|
|
1
10
|
def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
|
|
2
11
|
"""Parse the repo_id and the revision from a hf_repo_id in the format:
|
|
3
12
|
`org/repo-name@revision`.
|
|
@@ -11,3 +20,79 @@ def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
|
|
|
11
20
|
revision = "main"
|
|
12
21
|
|
|
13
22
|
return hf_repo_id, revision
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def download_image(
|
|
26
|
+
image: str | tuple[str, str],
|
|
27
|
+
*,
|
|
28
|
+
error_raise: bool = True,
|
|
29
|
+
return_struct: bool = False,
|
|
30
|
+
**kwargs,
|
|
31
|
+
) -> Image.Image | ImageDownloadItem | None:
|
|
32
|
+
"""Download an image from a URL or GCS URI and return it as a PIL Image.
|
|
33
|
+
Args:
|
|
34
|
+
image (str | tuple[str, str]): The URL or GCS URI of the image.
|
|
35
|
+
error_raise (bool): Whether to raise an error if the image cannot be
|
|
36
|
+
downloaded.
|
|
37
|
+
return_struct (bool): Whether to return an ImageDownloadItem struct
|
|
38
|
+
instead of a PIL Image.
|
|
39
|
+
**kwargs: Additional arguments to pass to the download function.
|
|
40
|
+
Returns:
|
|
41
|
+
Image.Image | ImageDownloadItem: The downloaded image as a PIL Image
|
|
42
|
+
or an ImageDownloadItem struct.
|
|
43
|
+
"""
|
|
44
|
+
if isinstance(image, str) and image.startswith("gs://"):
|
|
45
|
+
return download_image_from_gcs(image, return_struct=return_struct, **kwargs)
|
|
46
|
+
return _download_image(
|
|
47
|
+
image,
|
|
48
|
+
error_raise=error_raise,
|
|
49
|
+
return_struct=return_struct,
|
|
50
|
+
**kwargs,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def download_image_from_gcs(
|
|
55
|
+
image_uri: str, client: storage.Client | None = None, return_struct: bool = False
|
|
56
|
+
) -> Image.Image | ImageDownloadItem:
|
|
57
|
+
"""Download an image from a Google Cloud Storage URI and return it as a
|
|
58
|
+
PIL Image.
|
|
59
|
+
|
|
60
|
+
Args:
|
|
61
|
+
image_uri (str): The GCS URI of the image
|
|
62
|
+
(e.g., gs://bucket_name/path/to/image.jpg).
|
|
63
|
+
client (storage.Client | None): An optional Google Cloud Storage
|
|
64
|
+
client. If not provided, a new client will be created.
|
|
65
|
+
"""
|
|
66
|
+
if client is None:
|
|
67
|
+
client = storage.Client()
|
|
68
|
+
|
|
69
|
+
bucket_name, blob_name = image_uri.replace("gs://", "").split("/", 1)
|
|
70
|
+
bucket = client.bucket(bucket_name)
|
|
71
|
+
blob = bucket.blob(blob_name)
|
|
72
|
+
image_data = blob.download_as_bytes()
|
|
73
|
+
pil_image = Image.open(io.BytesIO(image_data))
|
|
74
|
+
|
|
75
|
+
if return_struct:
|
|
76
|
+
return ImageDownloadItem(
|
|
77
|
+
url=image_uri,
|
|
78
|
+
image=pil_image,
|
|
79
|
+
error=None,
|
|
80
|
+
)
|
|
81
|
+
return pil_image
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class PathWithContext:
|
|
85
|
+
"""A context manager that yields a Path object.
|
|
86
|
+
|
|
87
|
+
This is useful to have a common interface with tempfile.TemporaryDirectory
|
|
88
|
+
without actually creating a temporary directory.
|
|
89
|
+
"""
|
|
90
|
+
|
|
91
|
+
def __init__(self, path: Path):
|
|
92
|
+
self.path = path
|
|
93
|
+
|
|
94
|
+
def __enter__(self) -> Path:
|
|
95
|
+
return self.path
|
|
96
|
+
|
|
97
|
+
def __exit__(self, exc_type, exc_value, traceback) -> None:
|
|
98
|
+
pass
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: labelr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.10.0
|
|
4
4
|
Summary: A command-line tool to manage labeling tasks with Label Studio.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -13,6 +13,14 @@ Requires-Dist: openfoodfacts>=2.9.0
|
|
|
13
13
|
Requires-Dist: typer>=0.15.1
|
|
14
14
|
Requires-Dist: google-cloud-batch==0.18.0
|
|
15
15
|
Requires-Dist: huggingface-hub
|
|
16
|
+
Requires-Dist: deepdiff>=8.6.1
|
|
17
|
+
Requires-Dist: rapidfuzz>=3.14.3
|
|
18
|
+
Requires-Dist: aiohttp
|
|
19
|
+
Requires-Dist: aiofiles
|
|
20
|
+
Requires-Dist: orjson
|
|
21
|
+
Requires-Dist: google-cloud-storage
|
|
22
|
+
Requires-Dist: gcloud-aio-storage
|
|
23
|
+
Requires-Dist: google-genai>=1.56.0
|
|
16
24
|
Provides-Extra: ultralytics
|
|
17
25
|
Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
|
|
18
26
|
Provides-Extra: fiftyone
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
+
labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
|
|
3
|
+
labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
|
|
4
|
+
labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
|
|
5
|
+
labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
|
|
6
|
+
labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
|
|
7
|
+
labelr/google_genai.py,sha256=x5p98eYoI887QMBDgziFxEW9WNdZ8Cw0EHjAFQ71SaE,14728
|
|
8
|
+
labelr/main.py,sha256=OTiJSkD_TrzQmQQm291FhknD-HQQTWfBEBgImxqL0KM,2634
|
|
9
|
+
labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
|
|
10
|
+
labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
|
|
11
|
+
labelr/utils.py,sha256=8Yp0L2MCIdUYSjvmF4U5iiaBpaZJbYw4rHJOMhCCudE,3075
|
|
12
|
+
labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
13
|
+
labelr/apps/datasets.py,sha256=tAD6TZSnwh7uhkleSfDP0PFqztXC1S3Vx2aMSVCFfRU,12725
|
|
14
|
+
labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
|
|
15
|
+
labelr/apps/google_batch.py,sha256=Mlz5jRVcR1XzRJg2HLte3rIhiOk4xQQjjLAJsc3lJjo,9572
|
|
16
|
+
labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
|
|
17
|
+
labelr/apps/label_studio.py,sha256=lQ7K16noA4Mnr1hc0oxya1sgGgABWnpIIJTM5ENp7so,16869
|
|
18
|
+
labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
|
|
19
|
+
labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
+
labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
|
|
21
|
+
labelr/export/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
+
labelr/export/classification.py,sha256=rnm99vGMJy1UkdXiZ8t_TgFe3CyLBBYowWwzaZeniIs,4699
|
|
23
|
+
labelr/export/common.py,sha256=lJ-ZDOMKGpC48fCuEnIrA8sZBhXGZOcghBbsLM1h66o,1252
|
|
24
|
+
labelr/export/llm.py,sha256=Jlopi0EQ4YUWLe_s-kTFcISTzO1QmdX-qXQxayO6E-k,3186
|
|
25
|
+
labelr/export/object_detection.py,sha256=91ywkPago7WgbY2COQKpwjFLYAAsXeGOu7TkGHi17OU,12338
|
|
26
|
+
labelr/sample/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
|
+
labelr/sample/classification.py,sha256=7Z5hvxG6q6wfJMYj00JWbRBhfjOyhjaL8fpJjgBi9N8,539
|
|
28
|
+
labelr/sample/common.py,sha256=f0XDS6s0z6Vw4G2FDELJ1VQSe5Tsh0q3-3VU9unK9eY,431
|
|
29
|
+
labelr/sample/llm.py,sha256=zAsI3TmfGCbBPv4_hNtYR4Np3yAmUDzXGAvlQLF6V6w,2474
|
|
30
|
+
labelr/sample/object_detection.py,sha256=XZasR_k4AxzsiWdVMC2ZnyjfA14PKJPrx1U-XPr5tWQ,8427
|
|
31
|
+
labelr-0.10.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
32
|
+
labelr-0.10.0.dist-info/METADATA,sha256=pS2Ipq-aICU3TluuqSNocGP5-V8ztLk6X_udwwnECPk,7243
|
|
33
|
+
labelr-0.10.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
34
|
+
labelr-0.10.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
|
|
35
|
+
labelr-0.10.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
|
|
36
|
+
labelr-0.10.0.dist-info/RECORD,,
|
labelr-0.8.0.dist-info/RECORD
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
2
|
-
labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
|
|
3
|
-
labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
|
|
4
|
-
labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
|
|
5
|
-
labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
|
|
6
|
-
labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
|
|
7
|
-
labelr/export.py,sha256=HpsPT3MjWqOHbkNG8hHVns21t-2Ej2nXQXxXOKc1TFA,17771
|
|
8
|
-
labelr/main.py,sha256=hVaaCU1voUZwr681d7wkA7HPyaDB2MaXaw20Hw2_Kz8,2439
|
|
9
|
-
labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
|
|
10
|
-
labelr/sample.py,sha256=Q1Itfwsis1VivaqsmtKfcwojaY0xKJNZ_88M5zOGTY0,8951
|
|
11
|
-
labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
|
|
12
|
-
labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
|
|
13
|
-
labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
-
labelr/apps/datasets.py,sha256=kJQWwm3mjA2uWIA8O_DslM7OS5ht5mgWqcFC_zF4gCo,11187
|
|
15
|
-
labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
|
|
16
|
-
labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
|
|
17
|
-
labelr/apps/label_studio.py,sha256=su9shoi0K9PmI8RBLipV2KQf_MRjkF5vy5-JUcbXr5A,16852
|
|
18
|
-
labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
|
|
19
|
-
labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
|
-
labelr/evaluate/llm.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
|
-
labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
|
|
22
|
-
labelr-0.8.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
23
|
-
labelr-0.8.0.dist-info/METADATA,sha256=FNA9oU3P_BbEZ8PRidRBxvthfm8ywLt1wwo2qn2cOKo,7003
|
|
24
|
-
labelr-0.8.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
25
|
-
labelr-0.8.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
|
|
26
|
-
labelr-0.8.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
|
|
27
|
-
labelr-0.8.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|