labelr 0.7.0__py3-none-any.whl → 0.9.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,100 @@
1
+ import tempfile
2
+ from pathlib import Path
3
+
4
+ import datasets
5
+ import fiftyone as fo
6
+ from huggingface_hub import hf_hub_download
7
+
8
+ from labelr.dataset_features import OBJECT_DETECTION_DS_PREDICTION_FEATURES
9
+ from labelr.utils import parse_hf_repo_id
10
+
11
+
12
+ def convert_bbox_to_fo_format(
13
+ bbox: tuple[float, float, float, float],
14
+ ) -> tuple[float, float, float, float]:
15
+ # Bounding box coordinates should be relative values
16
+ # in [0, 1] in the following format:
17
+ # [top-left-x, top-left-y, width, height]
18
+ y_min, x_min, y_max, x_max = bbox
19
+ return (
20
+ x_min,
21
+ y_min,
22
+ (x_max - x_min),
23
+ (y_max - y_min),
24
+ )
25
+
26
+
27
+ def visualize(
28
+ hf_repo_id: str,
29
+ dataset_name: str,
30
+ persistent: bool,
31
+ ):
32
+ hf_repo_id, hf_revision = parse_hf_repo_id(hf_repo_id)
33
+
34
+ file_path = hf_hub_download(
35
+ hf_repo_id,
36
+ filename="predictions.parquet",
37
+ revision=hf_revision,
38
+ repo_type="model",
39
+ # local_dir="./predictions/",
40
+ )
41
+ file_path = Path(file_path).absolute()
42
+ prediction_dataset = datasets.load_dataset(
43
+ "parquet",
44
+ data_files=str(file_path),
45
+ split="train",
46
+ features=OBJECT_DETECTION_DS_PREDICTION_FEATURES,
47
+ )
48
+ fo_dataset = fo.Dataset(name=dataset_name, persistent=persistent)
49
+
50
+ with tempfile.TemporaryDirectory() as tmpdir_str:
51
+ tmp_dir = Path(tmpdir_str)
52
+ for i, hf_sample in enumerate(prediction_dataset):
53
+ image = hf_sample["image"]
54
+ image_path = tmp_dir / f"{i}.jpg"
55
+ image.save(image_path)
56
+ split = hf_sample["split"]
57
+ sample = fo.Sample(
58
+ filepath=image_path,
59
+ split=split,
60
+ tags=[split],
61
+ image=hf_sample["image_id"],
62
+ )
63
+ ground_truth_detections = [
64
+ fo.Detection(
65
+ label=hf_sample["objects"]["category_name"][i],
66
+ bounding_box=convert_bbox_to_fo_format(
67
+ bbox=hf_sample["objects"]["bbox"][i],
68
+ ),
69
+ )
70
+ for i in range(len(hf_sample["objects"]["bbox"]))
71
+ ]
72
+ sample["ground_truth"] = fo.Detections(detections=ground_truth_detections)
73
+
74
+ if hf_sample["detected"] is not None and hf_sample["detected"]["bbox"]:
75
+ model_detections = [
76
+ fo.Detection(
77
+ label=hf_sample["detected"]["category_name"][i],
78
+ bounding_box=convert_bbox_to_fo_format(
79
+ bbox=hf_sample["detected"]["bbox"][i]
80
+ ),
81
+ confidence=hf_sample["detected"]["confidence"][i],
82
+ )
83
+ for i in range(len(hf_sample["detected"]["bbox"]))
84
+ ]
85
+ sample["model"] = fo.Detections(detections=model_detections)
86
+
87
+ fo_dataset.add_sample(sample)
88
+
89
+ # View summary info about the dataset
90
+ print(fo_dataset)
91
+
92
+ # Print the first few samples in the dataset
93
+ print(fo_dataset.head())
94
+
95
+ # Visualize the dataset in the FiftyOne App
96
+ session = fo.launch_app(fo_dataset)
97
+ fo_dataset.evaluate_detections(
98
+ "model", gt_field="ground_truth", eval_key="eval", compute_mAP=True
99
+ )
100
+ session.wait()
labelr/export.py CHANGED
@@ -3,6 +3,7 @@ import logging
3
3
  import pickle
4
4
  import random
5
5
  import tempfile
6
+ from collections.abc import Iterator
6
7
  from pathlib import Path
7
8
 
8
9
  import datasets
@@ -14,10 +15,13 @@ from PIL import Image, ImageOps
14
15
 
15
16
  from labelr.sample import (
16
17
  HF_DS_CLASSIFICATION_FEATURES,
18
+ HF_DS_LLM_IMAGE_EXTRACTION_FEATURES,
17
19
  HF_DS_OBJECT_DETECTION_FEATURES,
20
+ LLMImageExtractionSample,
18
21
  format_object_detection_sample_to_hf,
19
22
  )
20
23
  from labelr.types import TaskType
24
+ from labelr.utils import PathWithContext
21
25
 
22
26
  logger = logging.getLogger(__name__)
23
27
 
@@ -77,13 +81,7 @@ def export_from_ls_to_hf_object_detection(
77
81
  functools.partial(_pickle_sample_generator, tmp_dir),
78
82
  features=HF_DS_OBJECT_DETECTION_FEATURES,
79
83
  )
80
- hf_ds.push_to_hub(
81
- repo_id,
82
- split=split,
83
- revision=revision,
84
- # Create a PR if not pushing to main branch
85
- create_pr=revision != "main",
86
- )
84
+ hf_ds.push_to_hub(repo_id, split=split, revision=revision)
87
85
 
88
86
 
89
87
  def export_from_ls_to_ultralytics_object_detection(
@@ -461,3 +459,62 @@ def export_from_ultralytics_to_hf_classification(
461
459
  features=HF_DS_CLASSIFICATION_FEATURES,
462
460
  )
463
461
  hf_ds.push_to_hub(repo_id, split=split)
462
+
463
+
464
+ def export_to_hf_llm_image_extraction(
465
+ sample_iter: Iterator[LLMImageExtractionSample],
466
+ split: str,
467
+ repo_id: str,
468
+ revision: str = "main",
469
+ tmp_dir: Path | None = None,
470
+ ) -> None:
471
+ """Export LLM image extraction samples to a Hugging Face dataset.
472
+
473
+ Args:
474
+ sample_iter (Iterator[LLMImageExtractionSample]): Iterator of samples
475
+ to export.
476
+ split (str): Name of the dataset split (e.g., 'train', 'val').
477
+ repo_id (str): Hugging Face repository ID to push the dataset to.
478
+ revision (str): Revision (branch, tag or commit) to use for the
479
+ Hugging Face Datasets repository.
480
+ tmp_dir (Path | None): Temporary directory to use for intermediate
481
+ files. If None, a temporary directory will be created
482
+ automatically.
483
+ """
484
+ logger.info(
485
+ "Repo ID: %s, revision: %s, split: %s, tmp_dir: %s",
486
+ repo_id,
487
+ revision,
488
+ split,
489
+ tmp_dir,
490
+ )
491
+
492
+ tmp_dir_with_context: PathWithContext | tempfile.TemporaryDirectory
493
+ if tmp_dir:
494
+ tmp_dir.mkdir(parents=True, exist_ok=True)
495
+ tmp_dir_with_context = PathWithContext(tmp_dir)
496
+ else:
497
+ tmp_dir_with_context = tempfile.TemporaryDirectory()
498
+
499
+ with tmp_dir_with_context as tmp_dir_str:
500
+ tmp_dir = Path(tmp_dir_str)
501
+ for sample in tqdm.tqdm(sample_iter, desc="samples"):
502
+ image = sample.image
503
+ # Rotate image according to exif orientation using Pillow
504
+ image = ImageOps.exif_transpose(image)
505
+ image_id = sample.image_id
506
+ sample = {
507
+ "image_id": image_id,
508
+ "image": image,
509
+ "meta": sample.meta.model_dump(),
510
+ "output": sample.output,
511
+ }
512
+ # Save output as pickle
513
+ with open(tmp_dir / f"{split}_{image_id}.pkl", "wb") as f:
514
+ pickle.dump(sample, f)
515
+
516
+ hf_ds = datasets.Dataset.from_generator(
517
+ functools.partial(_pickle_sample_generator, tmp_dir),
518
+ features=HF_DS_LLM_IMAGE_EXTRACTION_FEATURES,
519
+ )
520
+ hf_ds.push_to_hub(repo_id, split=split, revision=revision)
labelr/google_genai.py ADDED
@@ -0,0 +1,415 @@
1
+ import asyncio
2
+ import mimetypes
3
+ from collections.abc import Iterator
4
+ from pathlib import Path
5
+ from typing import Literal
6
+ from urllib.parse import urlparse
7
+
8
+ import aiofiles
9
+ import jsonschema
10
+ import orjson
11
+ import typer
12
+ from gcloud.aio.storage import Storage
13
+ from openfoodfacts import Flavor
14
+ from openfoodfacts.images import download_image, generate_image_url
15
+ from tqdm.asyncio import tqdm
16
+
17
+ from labelr.sample import LLMImageExtractionSample, SampleMeta
18
+ from labelr.utils import download_image_from_gcs
19
+
20
+ try:
21
+ import google.genai # noqa: F401
22
+ except ImportError:
23
+ raise ImportError(
24
+ "The 'google-genai' package is required to use this module. "
25
+ "Please install labelr with the 'google' extra: "
26
+ "`pip install labelr[google]`"
27
+ )
28
+ import aiohttp
29
+ from google import genai
30
+ from google.cloud import storage
31
+ from google.genai.types import CreateBatchJobConfig, HttpOptions
32
+ from google.genai.types import JSONSchema as GoogleJSONSchema
33
+ from google.genai.types import Schema as GoogleSchema
34
+ from openfoodfacts.types import JSONType
35
+ from pydantic import BaseModel
36
+
37
+
38
+ class RawBatchSamplePart(BaseModel):
39
+ type: Literal["text", "image"]
40
+ data: str
41
+
42
+
43
+ class RawBatchSample(BaseModel):
44
+ key: str
45
+ parts: list[RawBatchSamplePart]
46
+ meta: JSONType = {}
47
+
48
+
49
+ def convert_pydantic_model_to_google_schema(schema: type[BaseModel]) -> JSONType:
50
+ """Google doesn't support natively OpenAPI schemas, so we convert them to
51
+ Google `Schema` (a subset of OpenAPI)."""
52
+ return GoogleSchema.from_json_schema(
53
+ json_schema=GoogleJSONSchema.model_validate(schema.model_json_schema())
54
+ ).model_dump(mode="json", exclude_none=True, exclude_unset=True)
55
+
56
+
57
+ async def download_image(url: str, session: aiohttp.ClientSession) -> bytes:
58
+ """Download an image from a URL and return its content as bytes.
59
+
60
+ Args:
61
+ url (str): URL of the image to download.
62
+ Returns:
63
+ bytes: Content of the downloaded image.
64
+ """
65
+ async with session.get(url) as response:
66
+ response.raise_for_status()
67
+ return await response.read()
68
+
69
+
70
+ async def download_image_from_filesystem(url: str, base_dir: Path) -> bytes:
71
+ """Download an image from the filesystem and return its content as bytes.
72
+
73
+ Args:
74
+ url (str): URL of the image to download.
75
+ base_dir (Path): Base directory where images are stored.
76
+ Returns:
77
+ bytes: Content of the downloaded image.
78
+ """
79
+ file_path = urlparse(url).path[1:] # Remove leading '/'
80
+ full_file_path = base_dir / file_path
81
+ async with aiofiles.open(full_file_path, "rb") as f:
82
+ return await f.read()
83
+
84
+
85
+ async def upload_to_gcs(
86
+ image_url: str,
87
+ bucket_name: str,
88
+ blob_name: str,
89
+ session: aiohttp.ClientSession,
90
+ base_image_dir: Path | None = None,
91
+ ) -> dict:
92
+ """Upload data to Google Cloud Storage.
93
+ Args:
94
+ bucket_name (str): Name of the GCS bucket.
95
+ blob_name (str): Name of the blob (object) in the bucket.
96
+ data (bytes): Data to upload.
97
+ session (aiohttp.ClientSession): HTTP session to use for downloading
98
+ the image.
99
+ base_image_dir (Path | None): If provided, images will be read from
100
+ the filesystem under this base directory instead of downloading
101
+ them from their URLs.
102
+ Returns:
103
+ dict: Status of the upload operation.
104
+ """
105
+ if base_image_dir is None:
106
+ image_data = await download_image(image_url, session)
107
+ else:
108
+ image_data = await download_image_from_filesystem(image_url, base_image_dir)
109
+
110
+ client = Storage(session=session)
111
+
112
+ status = await client.upload(
113
+ bucket_name,
114
+ blob_name,
115
+ image_data,
116
+ )
117
+ return status
118
+
119
+
120
+ async def upload_to_gcs_format_async(
121
+ sample: RawBatchSample,
122
+ google_json_schema: JSONType,
123
+ instructions: str | None,
124
+ bucket_name: str,
125
+ bucket_dir_name: str,
126
+ session: aiohttp.ClientSession,
127
+ base_image_dir: Path | None = None,
128
+ skip_upload: bool = False,
129
+ thinking_level: str | None = None,
130
+ ) -> JSONType | None:
131
+ parts: list[JSONType] = []
132
+
133
+ if instructions:
134
+ parts.append({"text": instructions})
135
+
136
+ for part in sample.parts:
137
+ if part.type == "image":
138
+ mime_type, _ = mimetypes.guess_type(part.data)
139
+ if mime_type is None:
140
+ raise ValueError(f"Cannot guess mimetype of file: {part.data}")
141
+
142
+ file_uri = part.data
143
+ image_blob_name = f"{bucket_dir_name}/{sample.key}/{Path(file_uri).name}"
144
+ # Download the image from the URL
145
+ if not skip_upload:
146
+ try:
147
+ await upload_to_gcs(
148
+ image_url=file_uri,
149
+ bucket_name=bucket_name,
150
+ blob_name=image_blob_name,
151
+ session=session,
152
+ base_image_dir=base_image_dir,
153
+ )
154
+ except FileNotFoundError:
155
+ return None
156
+
157
+ parts.append(
158
+ {
159
+ "file_data": {
160
+ "file_uri": f"gs://{bucket_name}/{image_blob_name}",
161
+ "mime_type": mime_type,
162
+ }
163
+ }
164
+ )
165
+ else:
166
+ parts.append({"text": part.data})
167
+
168
+ generation_config = {
169
+ "responseMimeType": "application/json",
170
+ "response_json_schema": google_json_schema,
171
+ }
172
+
173
+ if thinking_level is not None:
174
+ generation_config["thinkingConfig"] = {"thinkingLevel": thinking_level}
175
+
176
+ return {
177
+ "key": f"key:{sample.key}",
178
+ "request": {
179
+ "contents": [
180
+ {
181
+ "parts": parts,
182
+ "role": "user",
183
+ }
184
+ ],
185
+ "generationConfig": generation_config,
186
+ },
187
+ }
188
+
189
+
190
+ async def generate_batch_dataset(
191
+ data_path: Path,
192
+ output_path: Path,
193
+ google_json_schema: JSONType,
194
+ instructions: str | None,
195
+ bucket_name: str,
196
+ bucket_dir_name: str,
197
+ max_concurrent_uploads: int = 30,
198
+ base_image_dir: Path | None = None,
199
+ from_key: str | None = None,
200
+ skip_upload: bool = False,
201
+ thinking_level: str | None = None,
202
+ ):
203
+ limiter = asyncio.Semaphore(max_concurrent_uploads)
204
+ ignore = True if from_key is None else False
205
+ missing_files = 0
206
+ async with aiohttp.ClientSession() as session:
207
+ async with asyncio.TaskGroup() as tg:
208
+ async with (
209
+ aiofiles.open(data_path, "r") as input_file,
210
+ aiofiles.open(output_path, "wb") as output_file,
211
+ ):
212
+ async with limiter:
213
+ tasks = set()
214
+ async for line in tqdm(input_file, desc="samples"):
215
+ # print(f"line: {line}")
216
+ sample = RawBatchSample.model_validate_json(line)
217
+ # print(f"sample: {sample}")
218
+ record_key = sample.key
219
+ if from_key is not None and ignore:
220
+ if record_key == from_key:
221
+ ignore = False
222
+ else:
223
+ continue
224
+ task = tg.create_task(
225
+ upload_to_gcs_format_async(
226
+ sample=sample,
227
+ google_json_schema=google_json_schema,
228
+ instructions=instructions,
229
+ bucket_name=bucket_name,
230
+ bucket_dir_name=bucket_dir_name,
231
+ session=session,
232
+ base_image_dir=base_image_dir,
233
+ skip_upload=skip_upload,
234
+ thinking_level=thinking_level,
235
+ )
236
+ )
237
+ tasks.add(task)
238
+
239
+ if len(tasks) >= max_concurrent_uploads:
240
+ for task in tasks:
241
+ await task
242
+ updated_record = task.result()
243
+ if updated_record is not None:
244
+ await output_file.write(
245
+ orjson.dumps(updated_record) + "\n".encode()
246
+ )
247
+ else:
248
+ missing_files += 1
249
+ tasks.clear()
250
+
251
+ for task in tasks:
252
+ await task
253
+ updated_record = task.result()
254
+ if updated_record is not None:
255
+ await output_file.write(
256
+ orjson.dumps(updated_record) + "\n".encode()
257
+ )
258
+ else:
259
+ missing_files += 1
260
+
261
+ typer.echo(
262
+ f"Upload and dataset update completed. Wrote updated dataset to {output_path}. "
263
+ f"Missing files: {missing_files}."
264
+ )
265
+
266
+
267
+ def launch_batch_job(
268
+ run_name: str,
269
+ dataset_path: Path,
270
+ model: str,
271
+ location: str,
272
+ ):
273
+ """Launch a Gemini Batch Inference job.
274
+
275
+ Args:
276
+ run_name (str): Name of the batch run.
277
+ dataset_path (Path): Path to the dataset file in JSONL format.
278
+ model (str): Model to use for the batch job. Example:
279
+ 'gemini-2.5-flash'.
280
+ location (str): Location for the Vertex AI resources. Example:
281
+ 'europe-west4'.
282
+ """
283
+ # We upload the dataset to a GCS bucket using the Gcloud
284
+
285
+ if model == "gemini-3-pro-preview" and location != "global":
286
+ typer.echo(
287
+ "Warning: only 'global' location is supported for 'gemini-3-pro-preview' model. Overriding location to 'global'."
288
+ )
289
+ location = "global"
290
+
291
+ storage_client = storage.Client()
292
+ bucket_name = "robotoff-batch" # Replace with your bucket name
293
+ run_dir = f"gemini-batch/{run_name}"
294
+ input_file_blob_name = f"{run_dir}/inputs.jsonl"
295
+ bucket = storage_client.bucket(bucket_name)
296
+ blob = bucket.blob(input_file_blob_name)
297
+ blob.upload_from_filename(dataset_path)
298
+
299
+ client = genai.Client(
300
+ http_options=HttpOptions(api_version="v1"),
301
+ vertexai=True,
302
+ location=location,
303
+ )
304
+ output_uri = f"gs://{bucket_name}/{run_dir}"
305
+ job = client.batches.create(
306
+ model=model,
307
+ src=f"gs://{bucket_name}/{input_file_blob_name}",
308
+ config=CreateBatchJobConfig(dest=output_uri),
309
+ )
310
+ print(job)
311
+
312
+
313
+ def generate_sample_iter(
314
+ prediction_path: Path,
315
+ json_schema: JSONType,
316
+ skip: int = 0,
317
+ limit: int | None = None,
318
+ is_openfoodfacts_dataset: bool = False,
319
+ openfoodfacts_flavor: Flavor = Flavor.off,
320
+ raise_on_invalid_sample: bool = False,
321
+ ) -> Iterator[LLMImageExtractionSample]:
322
+ """Generate training samples from a Gemini Batch Inference prediction
323
+ JSONL file.
324
+
325
+ Args:
326
+ prediction_path (Path): Path to the prediction JSONL file.
327
+ json_schema (JSONType): JSON schema to validate the predictions.
328
+ skip (int): Number of initial samples to skip.
329
+ limit (int | None): Maximum number of samples to generate.
330
+ is_openfoodfacts_dataset (bool): Whether the dataset is from Open Food
331
+ Facts.
332
+ openfoodfacts_flavor (Flavor): Flavor of the Open Food Facts dataset.
333
+ Yields:
334
+ Iterator[LLMImageExtractionSample]: Generated samples.
335
+ """
336
+ skipped = 0
337
+ invalid = 0
338
+ with prediction_path.open("r") as f_in:
339
+ for i, sample_str in enumerate(f_in):
340
+ if i < skip:
341
+ skipped += 1
342
+ continue
343
+ if limit is not None and i >= skip + limit:
344
+ break
345
+ sample = orjson.loads(sample_str)
346
+ try:
347
+ yield generate_sample_from_prediction(
348
+ json_schema=json_schema,
349
+ sample=sample,
350
+ is_openfoodfacts_dataset=is_openfoodfacts_dataset,
351
+ openfoodfacts_flavor=openfoodfacts_flavor,
352
+ )
353
+ except Exception as e:
354
+ if raise_on_invalid_sample:
355
+ raise
356
+ else:
357
+ typer.echo(
358
+ f"Skipping invalid sample at line {i + 1} in {prediction_path}: {e}"
359
+ )
360
+ invalid += 1
361
+ continue
362
+ if skipped > 0:
363
+ typer.echo(f"Skipped {skipped} samples.")
364
+ if invalid > 0:
365
+ typer.echo(f"Skipped {invalid} invalid samples.")
366
+
367
+
368
+ def generate_sample_from_prediction(
369
+ json_schema: JSONType,
370
+ sample: JSONType,
371
+ is_openfoodfacts_dataset: bool = False,
372
+ openfoodfacts_flavor: Flavor = Flavor.off,
373
+ ) -> LLMImageExtractionSample:
374
+ """Generate a LLMImageExtractionSample from a prediction sample.
375
+ Args:
376
+ json_schema (JSONType): JSON schema to validate the predictions.
377
+ sample (JSONType): Prediction sample.
378
+ is_openfoodfacts_dataset (bool): Whether the dataset is from Open Food
379
+ Facts.
380
+ openfoodfacts_flavor (Flavor): Flavor of the Open Food Facts dataset.
381
+ Returns:
382
+ LLMImageExtractionSample: Generated sample.
383
+ """
384
+ image_id = sample["key"][len("key:") :]
385
+ response_str = sample["response"]["candidates"][0]["content"]["parts"][0]["text"]
386
+ image_uri = sample["request"]["contents"][0]["parts"][1]["file_data"]["file_uri"]
387
+ image = download_image_from_gcs(image_uri=image_uri)
388
+ response = orjson.loads(response_str)
389
+ jsonschema.validate(response, json_schema)
390
+
391
+ if is_openfoodfacts_dataset:
392
+ image_stem_parts = image_id.split("_")
393
+ barcode = image_stem_parts[0]
394
+ off_image_id = image_stem_parts[1]
395
+ image_id = f"{barcode}_{off_image_id}"
396
+ image_url = generate_image_url(
397
+ barcode, off_image_id, flavor=openfoodfacts_flavor
398
+ )
399
+ else:
400
+ image_id = image_id
401
+ barcode = ""
402
+ off_image_id = ""
403
+ image_url = ""
404
+
405
+ sample_meta = SampleMeta(
406
+ barcode=barcode,
407
+ off_image_id=off_image_id,
408
+ image_url=image_url,
409
+ )
410
+ return LLMImageExtractionSample(
411
+ image_id=image_id,
412
+ image=image,
413
+ output=orjson.dumps(response).decode("utf-8"),
414
+ meta=sample_meta,
415
+ )
labelr/main.py CHANGED
@@ -4,9 +4,11 @@ import typer
4
4
  from openfoodfacts.utils import get_logger
5
5
 
6
6
  from labelr.apps import datasets as dataset_app
7
- from labelr.apps import projects as project_app
7
+ from labelr.apps import evaluate as evaluate_app
8
+ from labelr.apps import google_batch as google_batch_app
9
+ from labelr.apps import hugging_face as hf_app
10
+ from labelr.apps import label_studio as ls_app
8
11
  from labelr.apps import train as train_app
9
- from labelr.apps import users as user_app
10
12
 
11
13
  app = typer.Typer(pretty_exceptions_show_locals=False)
12
14
 
@@ -58,22 +60,35 @@ def predict(
58
60
  typer.echo(result)
59
61
 
60
62
 
61
- app.add_typer(user_app.app, name="users", help="Manage Label Studio users")
62
63
  app.add_typer(
63
- project_app.app,
64
- name="projects",
65
- help="Manage Label Studio projects (create, import data, etc.)",
64
+ ls_app.app,
65
+ name="ls",
66
+ help="Manage Label Studio projects (create, import data, etc.).",
67
+ )
68
+ app.add_typer(
69
+ hf_app.app,
70
+ name="hf",
71
+ help="Manage Hugging Face Datasets repositories.",
66
72
  )
67
73
  app.add_typer(
68
74
  dataset_app.app,
69
75
  name="datasets",
70
76
  help="Manage datasets (convert, export, check, etc.)",
71
77
  )
72
-
73
78
  app.add_typer(
74
79
  train_app.app,
75
80
  name="train",
76
- help="Train models",
81
+ help="Train models.",
82
+ )
83
+ app.add_typer(
84
+ evaluate_app.app,
85
+ name="evaluate",
86
+ help="Visualize and evaluate trained models.",
87
+ )
88
+ app.add_typer(
89
+ google_batch_app.app,
90
+ name="google-batch",
91
+ help="Generate datasets and launch batch jobs on Google Gemini.",
77
92
  )
78
93
 
79
94
  if __name__ == "__main__":