labelr 0.5.0__py3-none-any.whl → 0.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- labelr/apps/datasets.py +14 -2
- labelr/apps/train.py +40 -3
- labelr/dataset_features.py +31 -0
- labelr/export.py +26 -4
- labelr/utils.py +13 -0
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/METADATA +5 -2
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/RECORD +11 -9
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/WHEEL +0 -0
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/entry_points.txt +0 -0
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/licenses/LICENSE +0 -0
- {labelr-0.5.0.dist-info → labelr-0.7.0.dist-info}/top_level.txt +0 -0
labelr/apps/datasets.py
CHANGED
|
@@ -152,7 +152,10 @@ def export(
|
|
|
152
152
|
label_studio_url: Optional[str] = LABEL_STUDIO_DEFAULT_URL,
|
|
153
153
|
output_dir: Annotated[
|
|
154
154
|
Optional[Path],
|
|
155
|
-
typer.Option(
|
|
155
|
+
typer.Option(
|
|
156
|
+
help="Path to the output directory. Only used if the destintation (`to`) is `ultralytics`",
|
|
157
|
+
file_okay=False,
|
|
158
|
+
),
|
|
156
159
|
] = None,
|
|
157
160
|
dataset_dir: Annotated[
|
|
158
161
|
Optional[Path],
|
|
@@ -200,11 +203,18 @@ def export(
|
|
|
200
203
|
help="Use the AWS S3 cache for image downloads instead of images.openfoodfacts.org, "
|
|
201
204
|
"it is ignored if the export format is not Ultralytics"
|
|
202
205
|
),
|
|
203
|
-
] =
|
|
206
|
+
] = False,
|
|
204
207
|
merge_labels: Annotated[
|
|
205
208
|
bool,
|
|
206
209
|
typer.Option(help="Merge multiple labels into a single label"),
|
|
207
210
|
] = False,
|
|
211
|
+
revision: Annotated[
|
|
212
|
+
str,
|
|
213
|
+
typer.Option(
|
|
214
|
+
help="Revision (branch, tag or commit) for the Hugging Face Datasets repository. "
|
|
215
|
+
"Only used when importing from or exporting to Hugging Face Datasets."
|
|
216
|
+
),
|
|
217
|
+
] = "main",
|
|
208
218
|
):
|
|
209
219
|
"""Export Label Studio annotation, either to Hugging Face Datasets or
|
|
210
220
|
local files (ultralytics format)."""
|
|
@@ -261,6 +271,7 @@ def export(
|
|
|
261
271
|
project_id=typing.cast(int, project_id),
|
|
262
272
|
merge_labels=merge_labels,
|
|
263
273
|
use_aws_cache=use_aws_cache,
|
|
274
|
+
revision=revision,
|
|
264
275
|
)
|
|
265
276
|
elif to == ExportDestination.ultralytics:
|
|
266
277
|
export_from_ls_to_ultralytics_object_detection(
|
|
@@ -286,6 +297,7 @@ def export(
|
|
|
286
297
|
download_images=download_images,
|
|
287
298
|
error_raise=error_raise,
|
|
288
299
|
use_aws_cache=use_aws_cache,
|
|
300
|
+
revision=revision,
|
|
289
301
|
)
|
|
290
302
|
else:
|
|
291
303
|
raise typer.BadParameter("Unsupported export format")
|
labelr/apps/train.py
CHANGED
|
@@ -6,6 +6,31 @@ from google.cloud import batch_v1
|
|
|
6
6
|
app = typer.Typer()
|
|
7
7
|
|
|
8
8
|
|
|
9
|
+
AVAILABLE_OBJECT_DETECTION_MODELS = [
|
|
10
|
+
"yolov8n.pt",
|
|
11
|
+
"yolov8s.pt",
|
|
12
|
+
"yolov8m.pt",
|
|
13
|
+
"yolov8l.pt",
|
|
14
|
+
"yolov8x.pt",
|
|
15
|
+
"yolov9t.pt",
|
|
16
|
+
"yolov9s.pt",
|
|
17
|
+
"yolov9m.pt",
|
|
18
|
+
"yolov9c.pt",
|
|
19
|
+
"yolov9e.pt",
|
|
20
|
+
"yolov10n.pt",
|
|
21
|
+
"yolov10s.pt",
|
|
22
|
+
"yolov10m.pt",
|
|
23
|
+
"yolov10b.pt",
|
|
24
|
+
"yolov10l.pt",
|
|
25
|
+
"yolov10x.pt",
|
|
26
|
+
"yolo11n.pt",
|
|
27
|
+
"yolo11s.pt",
|
|
28
|
+
"yolo11m.pt",
|
|
29
|
+
"yolo11l.pt",
|
|
30
|
+
"yolo11x.pt",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
9
34
|
@app.command()
|
|
10
35
|
def train_object_detection(
|
|
11
36
|
wandb_project: str = typer.Option(
|
|
@@ -25,9 +50,20 @@ def train_object_detection(
|
|
|
25
50
|
),
|
|
26
51
|
epochs: int = typer.Option(100, help="Number of training epochs."),
|
|
27
52
|
imgsz: int = typer.Option(640, help="Size of the image during training."),
|
|
28
|
-
|
|
53
|
+
batch: int = typer.Option(64, help="Batch size for training."),
|
|
54
|
+
model_name: str = typer.Option(
|
|
55
|
+
"yolov8n.pt",
|
|
56
|
+
help="The YOLO model variant to use for training. "
|
|
57
|
+
"This should be a valid Ultralytics model name.",
|
|
58
|
+
),
|
|
29
59
|
):
|
|
30
60
|
"""Train an object detection model."""
|
|
61
|
+
|
|
62
|
+
if model_name not in AVAILABLE_OBJECT_DETECTION_MODELS:
|
|
63
|
+
raise typer.BadParameter(
|
|
64
|
+
f"Invalid model name '{model_name}'. Available models are: {', '.join(AVAILABLE_OBJECT_DETECTION_MODELS)}"
|
|
65
|
+
)
|
|
66
|
+
|
|
31
67
|
env_variables = {
|
|
32
68
|
"HF_REPO_ID": hf_repo_id,
|
|
33
69
|
"HF_TRAINED_MODEL_REPO_ID": hf_trained_model_repo_id,
|
|
@@ -37,8 +73,9 @@ def train_object_detection(
|
|
|
37
73
|
"WANDB_API_KEY": wandb_api_key,
|
|
38
74
|
"EPOCHS": str(epochs),
|
|
39
75
|
"IMGSZ": str(imgsz),
|
|
40
|
-
"BATCH_SIZE": str(
|
|
76
|
+
"BATCH_SIZE": str(batch),
|
|
41
77
|
"USE_AWS_IMAGE_CACHE": "False",
|
|
78
|
+
"YOLO_MODEL_NAME": model_name,
|
|
42
79
|
}
|
|
43
80
|
job_name = "train-yolo-job"
|
|
44
81
|
job_name = job_name + "-" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
@@ -128,7 +165,7 @@ def launch_job(
|
|
|
128
165
|
# will run on.
|
|
129
166
|
policy = batch_v1.AllocationPolicy.InstancePolicy()
|
|
130
167
|
# See list of machine types here:
|
|
131
|
-
# https://docs.cloud.google.com/compute/docs/gpus#
|
|
168
|
+
# https://docs.cloud.google.com/compute/docs/gpus#t4-gpus
|
|
132
169
|
policy.machine_type = machine_type
|
|
133
170
|
|
|
134
171
|
accelerator = batch_v1.AllocationPolicy.Accelerator()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import datasets
|
|
2
|
+
from datasets import Features
|
|
3
|
+
from datasets import Image as HFImage
|
|
4
|
+
|
|
5
|
+
# dataset features for predictions.parquet used in evaluation and visualization
|
|
6
|
+
OBJECT_DETECTION_DS_PREDICTION_FEATURES = Features(
|
|
7
|
+
{
|
|
8
|
+
"image": HFImage(),
|
|
9
|
+
"image_with_prediction": HFImage(),
|
|
10
|
+
"image_id": datasets.Value("string"),
|
|
11
|
+
"detected": {
|
|
12
|
+
"bbox": datasets.Sequence(datasets.Sequence(datasets.Value("float32"))),
|
|
13
|
+
"category_id": datasets.Sequence(datasets.Value("int64")),
|
|
14
|
+
"category_name": datasets.Sequence(datasets.Value("string")),
|
|
15
|
+
"confidence": datasets.Sequence(datasets.Value("float32")),
|
|
16
|
+
},
|
|
17
|
+
"split": datasets.Value("string"),
|
|
18
|
+
"width": datasets.Value("int64"),
|
|
19
|
+
"height": datasets.Value("int64"),
|
|
20
|
+
"meta": {
|
|
21
|
+
"barcode": datasets.Value("string"),
|
|
22
|
+
"off_image_id": datasets.Value("string"),
|
|
23
|
+
"image_url": datasets.Value("string"),
|
|
24
|
+
},
|
|
25
|
+
"objects": {
|
|
26
|
+
"bbox": datasets.Sequence(datasets.Sequence(datasets.Value("float32"))),
|
|
27
|
+
"category_id": datasets.Sequence(datasets.Value("int64")),
|
|
28
|
+
"category_name": datasets.Sequence(datasets.Value("string")),
|
|
29
|
+
},
|
|
30
|
+
}
|
|
31
|
+
)
|
labelr/export.py
CHANGED
|
@@ -36,11 +36,18 @@ def export_from_ls_to_hf_object_detection(
|
|
|
36
36
|
project_id: int,
|
|
37
37
|
merge_labels: bool = False,
|
|
38
38
|
use_aws_cache: bool = True,
|
|
39
|
+
revision: str = "main",
|
|
39
40
|
):
|
|
40
41
|
if merge_labels:
|
|
41
42
|
label_names = ["object"]
|
|
42
43
|
|
|
43
|
-
logger.info(
|
|
44
|
+
logger.info(
|
|
45
|
+
"Project ID: %d, label names: %s, repo_id: %s, revision: %s",
|
|
46
|
+
project_id,
|
|
47
|
+
label_names,
|
|
48
|
+
repo_id,
|
|
49
|
+
revision,
|
|
50
|
+
)
|
|
44
51
|
|
|
45
52
|
for split in ["train", "val"]:
|
|
46
53
|
logger.info("Processing split: %s", split)
|
|
@@ -70,7 +77,13 @@ def export_from_ls_to_hf_object_detection(
|
|
|
70
77
|
functools.partial(_pickle_sample_generator, tmp_dir),
|
|
71
78
|
features=HF_DS_OBJECT_DETECTION_FEATURES,
|
|
72
79
|
)
|
|
73
|
-
hf_ds.push_to_hub(
|
|
80
|
+
hf_ds.push_to_hub(
|
|
81
|
+
repo_id,
|
|
82
|
+
split=split,
|
|
83
|
+
revision=revision,
|
|
84
|
+
# Create a PR if not pushing to main branch
|
|
85
|
+
create_pr=revision != "main",
|
|
86
|
+
)
|
|
74
87
|
|
|
75
88
|
|
|
76
89
|
def export_from_ls_to_ultralytics_object_detection(
|
|
@@ -234,19 +247,28 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
234
247
|
is True. Defaults to True.
|
|
235
248
|
revision (str): The dataset revision to load. Defaults to 'main'.
|
|
236
249
|
"""
|
|
237
|
-
logger.info("Repo ID: %s", repo_id)
|
|
250
|
+
logger.info("Repo ID: %s, revision: %s", repo_id, revision)
|
|
238
251
|
ds = datasets.load_dataset(repo_id, revision=revision)
|
|
239
252
|
data_dir = output_dir / "data"
|
|
240
253
|
data_dir.mkdir(parents=True, exist_ok=True)
|
|
241
254
|
category_id_to_name = {}
|
|
242
255
|
|
|
256
|
+
split_map = {
|
|
257
|
+
"train": "train",
|
|
258
|
+
"val": "val",
|
|
259
|
+
}
|
|
260
|
+
if "val" not in ds and "test" in ds:
|
|
261
|
+
logger.info("val split not found, using test split instead as val")
|
|
262
|
+
split_map["val"] = "test"
|
|
263
|
+
|
|
243
264
|
for split in ["train", "val"]:
|
|
265
|
+
split_target = split_map[split]
|
|
244
266
|
split_labels_dir = data_dir / "labels" / split
|
|
245
267
|
split_labels_dir.mkdir(parents=True, exist_ok=True)
|
|
246
268
|
split_images_dir = data_dir / "images" / split
|
|
247
269
|
split_images_dir.mkdir(parents=True, exist_ok=True)
|
|
248
270
|
|
|
249
|
-
for sample in tqdm.tqdm(ds[
|
|
271
|
+
for sample in tqdm.tqdm(ds[split_target], desc="samples"):
|
|
250
272
|
image_id = sample["image_id"]
|
|
251
273
|
|
|
252
274
|
if download_images:
|
labelr/utils.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
def parse_hf_repo_id(hf_repo_id: str) -> tuple[str, str]:
|
|
2
|
+
"""Parse the repo_id and the revision from a hf_repo_id in the format:
|
|
3
|
+
`org/repo-name@revision`.
|
|
4
|
+
|
|
5
|
+
Returns a tuple (repo_id, revision), with revision = 'main' if it
|
|
6
|
+
was not provided.
|
|
7
|
+
"""
|
|
8
|
+
if "@" in hf_repo_id:
|
|
9
|
+
hf_repo_id, revision = hf_repo_id.split("@", 1)
|
|
10
|
+
else:
|
|
11
|
+
revision = "main"
|
|
12
|
+
|
|
13
|
+
return hf_repo_id, revision
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: labelr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.7.0
|
|
4
4
|
Summary: A command-line tool to manage labeling tasks with Label Studio.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -12,8 +12,11 @@ Requires-Dist: more-itertools>=10.5.0
|
|
|
12
12
|
Requires-Dist: openfoodfacts>=2.9.0
|
|
13
13
|
Requires-Dist: typer>=0.15.1
|
|
14
14
|
Requires-Dist: google-cloud-batch==0.18.0
|
|
15
|
+
Requires-Dist: huggingface-hub
|
|
15
16
|
Provides-Extra: ultralytics
|
|
16
|
-
Requires-Dist: ultralytics
|
|
17
|
+
Requires-Dist: ultralytics==8.3.223; extra == "ultralytics"
|
|
18
|
+
Provides-Extra: fiftyone
|
|
19
|
+
Requires-Dist: fiftyone~=1.10.0; extra == "fiftyone"
|
|
17
20
|
Dynamic: license-file
|
|
18
21
|
|
|
19
22
|
# Labelr
|
|
@@ -3,19 +3,21 @@ labelr/__main__.py,sha256=G4e95-IfhI-lOmkOBP6kQ8wl1x_Fl7dZlLOYr90K83c,66
|
|
|
3
3
|
labelr/annotate.py,sha256=3fJ9FYbcozcOoKuhNtzPHV8sSnp-45FsNnMc8UeBHGU,3503
|
|
4
4
|
labelr/check.py,sha256=3wK6mE0UsKvoBNm0_lyWhCMq7gxkv5r50pvO70damXY,2476
|
|
5
5
|
labelr/config.py,sha256=3RXF_NdkSuHvfVMGMlYmjlw45fU77zQkLX7gmZq7NxM,64
|
|
6
|
-
labelr/
|
|
6
|
+
labelr/dataset_features.py,sha256=ZC9QAUw9oKHqyUPla2h3xQFaRT9sHq8hkPNN4RDDwmo,1257
|
|
7
|
+
labelr/export.py,sha256=gjC2_RJ_yX8zVYXyo1RAgI07iXSgkeqckOTEzSscRXc,17940
|
|
7
8
|
labelr/main.py,sha256=CioMPtaPoGL_5Oxwj8PfalhTyFahMbfp2kd9KdZzm3Y,2258
|
|
8
9
|
labelr/project_config.py,sha256=CIHEcgSOfXb53naHWEBkTDm2V9m3abAu8C54VSzHjAs,1260
|
|
9
10
|
labelr/sample.py,sha256=unu9AQ64FhKPgssuL7gb3qyMd1EQJvMOfqvjdefmWOU,7807
|
|
10
11
|
labelr/types.py,sha256=8CHfLyifF_N94OYDhG-7IcWboOh9o0Z_0LBtQapT8TQ,313
|
|
12
|
+
labelr/utils.py,sha256=e0R15jePWBzRdN8LB6kBSH5Dl_P0MNEtRmeqB9eu5d8,415
|
|
11
13
|
labelr/apps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
labelr/apps/datasets.py,sha256=
|
|
14
|
+
labelr/apps/datasets.py,sha256=4PMfKS5c7Zw3-NNRBkFbZidMQUI2RBMcXFYBvWHLz3o,11688
|
|
13
15
|
labelr/apps/projects.py,sha256=HpgqIaPrUQzIR7eOLn4EBbEzXRi7hoWStT4jLMQPcBg,15153
|
|
14
|
-
labelr/apps/train.py,sha256=
|
|
16
|
+
labelr/apps/train.py,sha256=sI0p3h39LPXhynwl_yMuZnIPlaqlcWSO_81zPC3H3yI,6886
|
|
15
17
|
labelr/apps/users.py,sha256=twQSlpHxE0hrYkgrJpEFbK8lYfWnpJr8vyfLHLtdAUU,909
|
|
16
|
-
labelr-0.
|
|
17
|
-
labelr-0.
|
|
18
|
-
labelr-0.
|
|
19
|
-
labelr-0.
|
|
20
|
-
labelr-0.
|
|
21
|
-
labelr-0.
|
|
18
|
+
labelr-0.7.0.dist-info/licenses/LICENSE,sha256=hIahDEOTzuHCU5J2nd07LWwkLW7Hko4UFO__ffsvB-8,34523
|
|
19
|
+
labelr-0.7.0.dist-info/METADATA,sha256=NghQ_6mNj1Dkets_GlOOOyoAVEQqoPBbbJXhysOKAWI,6991
|
|
20
|
+
labelr-0.7.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
21
|
+
labelr-0.7.0.dist-info/entry_points.txt,sha256=OACukVeR_2z54i8yQuWqqk_jdEHlyTwmTFOFBmxPp1k,43
|
|
22
|
+
labelr-0.7.0.dist-info/top_level.txt,sha256=bjZo50aGZhXIcZYpYOX4sdAQcamxh8nwfEh7A9RD_Ag,7
|
|
23
|
+
labelr-0.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|