labelr 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
labelr/sample.py CHANGED
@@ -1,16 +1,20 @@
1
1
  import logging
2
2
  import random
3
3
  import string
4
+ import typing
4
5
 
5
6
  import datasets
7
+ import PIL
6
8
  from openfoodfacts import Flavor
7
9
  from openfoodfacts.barcode import normalize_barcode
8
10
  from openfoodfacts.images import download_image, generate_image_url
11
+ from PIL import Image, ImageOps
12
+ from pydantic import BaseModel, Field
9
13
 
10
14
  logger = logging.getLogger(__name__)
11
15
 
12
16
 
13
- def format_annotation_results_from_hf(
17
+ def format_annotation_results_from_hf_to_ls(
14
18
  objects: dict, image_width: int, image_height: int
15
19
  ):
16
20
  """Format annotation results from a HF object detection dataset into Label
@@ -56,12 +60,12 @@ def format_annotation_results_from_hf(
56
60
  return annotation_results
57
61
 
58
62
 
59
- def format_object_detection_sample_from_hf(hf_sample: dict, split: str) -> dict:
63
+ def format_object_detection_sample_from_hf_to_ls(hf_sample: dict, split: str) -> dict:
60
64
  hf_meta = hf_sample["meta"]
61
65
  objects = hf_sample["objects"]
62
66
  image_width = hf_sample["width"]
63
67
  image_height = hf_sample["height"]
64
- annotation_results = format_annotation_results_from_hf(
68
+ annotation_results = format_annotation_results_from_hf_to_ls(
65
69
  objects, image_width, image_height
66
70
  )
67
71
  image_id = hf_sample["image_id"]
@@ -149,8 +153,24 @@ def format_object_detection_sample_to_hf(
149
153
  annotations: list[dict],
150
154
  label_names: list[str],
151
155
  merge_labels: bool = False,
152
- use_aws_cache: bool = True,
156
+ use_aws_cache: bool = False,
153
157
  ) -> dict | None:
158
+ """Format a Label Studio object detection sample to Hugging Face format.
159
+
160
+ Args:
161
+ task_data: The task data from Label Studio.
162
+ annotations: The annotations from Label Studio.
163
+ label_names: The list of label names.
164
+ merge_labels: Whether to merge all labels into a single label (the
165
+ first label in `label_names`).
166
+ use_aws_cache: Whether to use AWS cache when downloading images.
167
+
168
+ Returns:
169
+ The formatted sample, or None in the following cases:
170
+ - More than one annotation is found
171
+ - No annotation is found
172
+ - An error occurs when downloading the image
173
+ """
154
174
  if len(annotations) > 1:
155
175
  logger.info("More than one annotation found, skipping")
156
176
  return None
@@ -186,6 +206,13 @@ def format_object_detection_sample_to_hf(
186
206
  logger.error("Failed to download image: %s", image_url)
187
207
  return None
188
208
 
209
+ # Correct image orientation using EXIF data
210
+ # Label Studio provides bounding boxes based on the displayed image (after
211
+ # eventual EXIF rotation), so we need to apply the same transformation to
212
+ # the image.
213
+ # Indeed, Hugging Face stores images without applying EXIF rotation, and
214
+ # EXIF data is not preserved in the dataset.
215
+ ImageOps.exif_transpose(typing.cast(PIL.Image.Image, image), in_place=True)
189
216
  return {
190
217
  "image_id": task_data["image_id"],
191
218
  "image": image,
@@ -204,6 +231,34 @@ def format_object_detection_sample_to_hf(
204
231
  }
205
232
 
206
233
 
234
+ class SampleMeta(BaseModel):
235
+ barcode: str | None = Field(
236
+ ..., description="The barcode of the product, if applicable"
237
+ )
238
+ off_image_id: str | None = Field(
239
+ ...,
240
+ description="The Open Food Facts image ID associated with the image, if applicable",
241
+ )
242
+ image_url: str | None = Field(
243
+ ..., description="The URL of the image, if applicable"
244
+ )
245
+
246
+
247
+ class LLMImageExtractionSample(BaseModel):
248
+ class Config:
249
+ # required to allow PIL Image type
250
+ arbitrary_types_allowed = True
251
+
252
+ image_id: str = Field(
253
+ ...,
254
+ description="unique ID for the image. For Open Food Facts images, it follows the "
255
+ "format `barcode:imgid`",
256
+ )
257
+ image: Image.Image = Field(..., description="Image to extract information from")
258
+ output: str = Field(..., description="Expected response of the LLM")
259
+ meta: SampleMeta = Field(..., description="Metadata associated with the sample")
260
+
261
+
207
262
  # The HuggingFace Dataset features
208
263
  HF_DS_OBJECT_DETECTION_FEATURES = datasets.Features(
209
264
  {
@@ -240,3 +295,16 @@ HF_DS_CLASSIFICATION_FEATURES = datasets.Features(
240
295
  "category_name": datasets.Value("string"),
241
296
  }
242
297
  )
298
+
299
+ HF_DS_LLM_IMAGE_EXTRACTION_FEATURES = datasets.Features(
300
+ {
301
+ "image_id": datasets.Value("string"),
302
+ "image": datasets.features.Image(),
303
+ "output": datasets.features.Value("string"),
304
+ "meta": {
305
+ "barcode": datasets.Value("string"),
306
+ "off_image_id": datasets.Value("string"),
307
+ "image_url": datasets.Value("string"),
308
+ },
309
+ }
310
+ )
labelr/utils.py CHANGED
@@ -1,3 +1,10 @@
1
+ import io
2
+ from pathlib import Path
3
+
4
+ from google.cloud import storage
5
+ from PIL import Image
6
+
7
+
1
8
  def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
2
9
  """Parse the repo_id and the revision from a hf_repo_id in the format:
3
10
  `org/repo-name@revision`.
@@ -11,3 +18,31 @@ def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
11
18
  revision = "main"
12
19
 
13
20
  return hf_repo_id, revision
21
+
22
+
23
+ def download_image_from_gcs(image_uri: str) -> Image.Image:
24
+ """Download an image from a Google Cloud Storage URI and return it as a
25
+ PIL Image."""
26
+ storage_client = storage.Client()
27
+ bucket_name, blob_name = image_uri.replace("gs://", "").split("/", 1)
28
+ bucket = storage_client.bucket(bucket_name)
29
+ blob = bucket.blob(blob_name)
30
+ image_data = blob.download_as_bytes()
31
+ return Image.open(io.BytesIO(image_data))
32
+
33
+
34
+ class PathWithContext:
35
+ """A context manager that yields a Path object.
36
+
37
+ This is useful to have a common interface with tempfile.TemporaryDirectory
38
+ without actually creating a temporary directory.
39
+ """
40
+
41
+ def __init__(self, path: Path):
42
+ self.path = path
43
+
44
+ def __enter__(self) -> Path:
45
+ return self.path
46
+
47
+ def __exit__(self, exc_type, exc_value, traceback) -> None:
48
+ pass
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: labelr
3
- Version: 0.7.0
3
+ Version: 0.9.0
4
4
  Summary: A command-line tool to manage labeling tasks with Label Studio.
5
5
  Requires-Python: >=3.10
6
6
  Description-Content-Type: text/markdown
@@ -13,10 +13,19 @@ Requires-Dist: openfoodfacts>=2.9.0
13
13
  Requires-Dist: typer>=0.15.1
14
14
  Requires-Dist: google-cloud-batch==0.18.0
15
15
  Requires-Dist: huggingface-hub
16
+ Requires-Dist: deepdiff>=8.6.1
17
+ Requires-Dist: rapidfuzz>=3.14.3
18
+ Requires-Dist: aiohttp
19
+ Requires-Dist: aiofiles
20
+ Requires-Dist: orjson
16
21
  Provides-Extra: ultralytics
17
22
  Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
18
23
  Provides-Extra: fiftyone
19
24
  Requires-Dist: fiftyone~=1.10.0; extra == "fiftyone"
25
+ Provides-Extra: google
26
+ Requires-Dist: google-genai>=1.56.0; extra == "google"
27
+ Requires-Dist: gcloud-aio-storage; extra == "google"
28
+ Requires-Dist: google-cloud-storage; extra == "google"
20
29
  Dynamic: license-file
21
30
 
22
31
  # Labelr
@@ -73,7 +82,7 @@ Once you have a Label Studio instance running, you can create a project easily.
73
82
  For an object detection task, a command allows you to create the configuration file automatically:
74
83
 
75
84
  ```bash
76
- labelr projects create-config --labels 'label1' --labels 'label2' --output-file label_config.xml
85
+ labelr ls create-config --labels 'label1' --labels 'label2' --output-file label_config.xml
77
86
  ```
78
87
 
79
88
  where `label1` and `label2` are the labels you want to use for the object detection task, and `label_config.xml` is the output file that will contain the configuration.
@@ -81,17 +90,19 @@ where `label1` and `label2` are the labels you want to use for the object detect
81
90
  Then, you can create a project on Label Studio with the following command:
82
91
 
83
92
  ```bash
84
- labelr projects create --title my_project --api-key API_KEY --config-file label_config.xml
93
+ labelr ls create --title my_project --api-key API_KEY --config-file label_config.xml
85
94
  ```
86
95
 
87
96
  where `API_KEY` is the API key of the Label Studio instance (API key is available at Account page), and `label_config.xml` is the configuration file of the project.
88
97
 
98
+ `ls` stands for Label Studio in the CLI.
99
+
89
100
  #### Create a dataset file
90
101
 
91
102
  If you have a list of images, for an object detection task, you can quickly create a dataset file with the following command:
92
103
 
93
104
  ```bash
94
- labelr projects create-dataset-file --input-file image_urls.txt --output-file dataset.json
105
+ labelr ls create-dataset-file --input-file image_urls.txt --output-file dataset.json
95
106
  ```
96
107
 
97
108
  where `image_urls.txt` is a file containing the URLs of the images, one per line, and `dataset.json` is the output file.
@@ -101,7 +112,7 @@ where `image_urls.txt` is a file containing the URLs of the images, one per line
101
112
  Next, import the generated data to a project with the following command:
102
113
 
103
114
  ```bash
104
- labelr projects import-data --project-id PROJECT_ID --dataset-path dataset.json
115
+ labelr ls import-data --project-id PROJECT_ID --dataset-path dataset.json
105
116
  ```
106
117
 
107
118
  where `PROJECT_ID` is the ID of the project you created.
@@ -117,7 +128,7 @@ To accelerate annotation, you can pre-annotate the images with an object detecti
117
128
  To pre-annotate the data with Triton, use the following command:
118
129
 
119
130
  ```bash
120
- labelr projects add-prediction --project-id PROJECT_ID --backend ultralytics --labels 'product' --labels 'price tag' --label-mapping '{"price tag": "price-tag"}'
131
+ labelr ls add-prediction --project-id PROJECT_ID --backend ultralytics --labels 'product' --labels 'price tag' --label-mapping '{"price tag": "price-tag"}'
121
132
  ```
122
133
 
123
134
  where `labels` is the list of labels to use for the object detection task (you can add as many labels as you want).
@@ -0,0 +1,28 @@
1
+ labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
3
+ labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
4
+ labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
5
+ labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
6
+ labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
7
+ labelr/export.py,sha256=aPfQ-RaK3C2WJrzbETYdC9kRe0MTpCRs0nu5l2SqiRg,20092
8
+ labelr/google_genai.py,sha256=vn_UNQOxUDOTTTWz-emAVErjOtQmnlxM_m8yo2q01Ok,14401
9
+ labelr/main.py,sha256=OTiJSkD_TrzQmQQm291FhknD-HQQTWfBEBgImxqL0KM,2634
10
+ labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
11
+ labelr/sample.py,sha256=VL-iKDvLaIeViJ0TaBY9uCbv0ey528fkaRTYE-Zr12I,10347
12
+ labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
13
+ labelr/utils.py,sha256=-zLOWLbvLwtNFtzzwZ6RjJD9GstoYR-gt4wz9r6u9lE,1363
14
+ labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
15
+ labelr/apps/datasets.py,sha256=kJQWwm3mjA2uWIA8O_DslM7OS5ht5mgWqcFC_zF4gCo,11187
16
+ labelr/apps/evaluate.py,sha256=UC4CuSKa4vgR5xTBZ-dFgp_1pYnkM55s2IJgix0YtkI,1157
17
+ labelr/apps/google_batch.py,sha256=BMcfBkDwfu-zOOR80bYmtEy6k_Qc70m7K7wmp4Ww0r8,9335
18
+ labelr/apps/hugging_face.py,sha256=B0GaDZeUZj2A7nEeC1OtCANb0DqvBkhWwFWM_9Nm2kU,1608
19
+ labelr/apps/label_studio.py,sha256=su9shoi0K9PmI8RBLipV2KQf_MRjkF5vy5-JUcbXr5A,16852
20
+ labelr/apps/train.py,sha256=wmOSpO9JsrwCXYMgRg2srMbV5B5TvnlfhAKPqUt6wSg,7328
21
+ labelr/evaluate/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
+ labelr/evaluate/object_detection.py,sha256=QJIwrDY-Vsy0-It6tZSkN3qgAlmIu2W1-kGdmibiPSQ,3349
23
+ labelr-0.9.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
24
+ labelr-0.9.0.dist-info/METADATA,sha256=cNkf4LPmbO_k3UuR7O7NtcCwRF-Z5c-yIyQRAocsjww,7322
25
+ labelr-0.9.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
26
+ labelr-0.9.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
27
+ labelr-0.9.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
28
+ labelr-0.9.0.dist-info/RECORD,,
labelr/apps/users.py DELETED
@@ -1,36 +0,0 @@
1
- from typing import Annotated
2
-
3
- import typer
4
-
5
- from ..config import LABEL_STUDIO_DEFAULT_URL
6
-
7
- app = typer.Typer()
8
-
9
- # Label Studio user management
10
-
11
-
12
- @app.command()
13
- def list(
14
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
15
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
16
- ):
17
- """List all users in Label Studio."""
18
- from label_studio_sdk.client import LabelStudio
19
-
20
- ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
21
-
22
- for user in ls.users.list():
23
- print(f"{user.id:02d}: {user.email}")
24
-
25
-
26
- @app.command()
27
- def delete(
28
- user_id: int,
29
- api_key: Annotated[str, typer.Option(envvar="LABEL_STUDIO_API_KEY")],
30
- label_studio_url: str = LABEL_STUDIO_DEFAULT_URL,
31
- ):
32
- """Delete a user from Label Studio."""
33
- from label_studio_sdk.client import LabelStudio
34
-
35
- ls = LabelStudio(base_url=label_studio_url, api_key=api_key)
36
- ls.users.delete(user_id)
@@ -1,23 +0,0 @@
1
- labelr/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
3
- labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
4
- labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
5
- labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
6
- labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
7
- labelr/export.py,sha256=gjC2_RJ_yX8zVYXyo1RAgI07iXSgkeqckOTEzSscRXc,17940
8
- labelr/main.py,sha256=CioMPtaPoGL_5Oxwj8PfalhTyFahMbfp2kd9KdZzm3Y,2258
9
- labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
10
- labelr/sample.py,sha256=unu9AQ64FhKPgssuL7gb3qyMd1EQJvMOfqvjdefmWOU,7807
11
- labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
12
- labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
13
- labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
- labelr/apps/datasets.py,sha256=4PMfKS5c7Zw3-NNRBkFbZidMQUI2RBMcXFYBvWHLz3o,11688
15
- labelr/apps/projects.py,sha256=HpgqIaPrUQzIR7eOLn4EBbEzXRi7hoWStT4jLMQPcBg,15153
16
- labelr/apps/train.py,sha256=sI0p3h39LPXhynwl_yMuZnIPlaqlcWSO_81zPC3H3yI,6886
17
- labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
18
- labelr-0.7.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
19
- labelr-0.7.0.dist-info/METADATA,sha256=NghQ_6mNj1Dkets_GlOOOyoAVEQqoPBbbJXhysOKAWI,6991
20
- labelr-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
21
- labelr-0.7.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
22
- labelr-0.7.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
23
- labelr-0.7.0.dist-info/RECORD,,
File without changes