labelr 0.4.1__tar.gz → 0.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {labelr-0.4.1/src/labelr.egg-info → labelr-0.5.0}/PKG-INFO +6 -1
- {labelr-0.4.1 → labelr-0.5.0}/README.md +5 -1
- {labelr-0.4.1 → labelr-0.5.0}/pyproject.toml +2 -1
- labelr-0.5.0/src/labelr/apps/train.py +158 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/export.py +24 -2
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/main.py +7 -0
- {labelr-0.4.1 → labelr-0.5.0/src/labelr.egg-info}/PKG-INFO +6 -1
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr.egg-info/SOURCES.txt +1 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr.egg-info/requires.txt +1 -0
- {labelr-0.4.1 → labelr-0.5.0}/LICENSE +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/setup.cfg +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/__init__.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/__main__.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/annotate.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/apps/__init__.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/apps/datasets.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/apps/projects.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/apps/users.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/check.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/config.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/project_config.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/sample.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr/types.py +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr.egg-info/dependency_links.txt +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr.egg-info/entry_points.txt +0 -0
- {labelr-0.4.1 → labelr-0.5.0}/src/labelr.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: labelr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A command-line tool to manage labeling tasks with Label Studio.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -11,6 +11,7 @@ Requires-Dist: label-studio-sdk>=1.0.8
|
|
|
11
11
|
Requires-Dist: more-itertools>=10.5.0
|
|
12
12
|
Requires-Dist: openfoodfacts>=2.9.0
|
|
13
13
|
Requires-Dist: typer>=0.15.1
|
|
14
|
+
Requires-Dist: google-cloud-batch==0.18.0
|
|
14
15
|
Provides-Extra: ultralytics
|
|
15
16
|
Requires-Dist: ultralytics>=8.3.49; extra == "ultralytics"
|
|
16
17
|
Dynamic: license-file
|
|
@@ -138,3 +139,7 @@ labelr datasets export --project-id PROJECT_ID --from ls --to huggingface --repo
|
|
|
138
139
|
```
|
|
139
140
|
|
|
140
141
|
where `REPO_ID` is the ID of the Hugging Face repository where the dataset will be uploaded (ex: `openfoodfacts/food-detection`).
|
|
142
|
+
|
|
143
|
+
### Lauch training jobs
|
|
144
|
+
|
|
145
|
+
You can also launch training jobs for YOLO object detection models using datasets hosted on Hugging Face. Please refer to the [train-yolo package README](packages/train-yolo/README.md) for more details on how to use this feature.
|
|
@@ -120,4 +120,8 @@ To export the data to a Hugging Face dataset, use the following command:
|
|
|
120
120
|
labelr datasets export --project-id PROJECT_ID --from ls --to huggingface --repo-id REPO_ID --label-names 'product,price-tag'
|
|
121
121
|
```
|
|
122
122
|
|
|
123
|
-
where `REPO_ID` is the ID of the Hugging Face repository where the dataset will be uploaded (ex: `openfoodfacts/food-detection`).
|
|
123
|
+
where `REPO_ID` is the ID of the Hugging Face repository where the dataset will be uploaded (ex: `openfoodfacts/food-detection`).
|
|
124
|
+
|
|
125
|
+
### Lauch training jobs
|
|
126
|
+
|
|
127
|
+
You can also launch training jobs for YOLO object detection models using datasets hosted on Hugging Face. Please refer to the [train-yolo package README](packages/train-yolo/README.md) for more details on how to use this feature.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "labelr"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.5.0"
|
|
4
4
|
description = "A command-line tool to manage labeling tasks with Label Studio."
|
|
5
5
|
readme = "README.md"
|
|
6
6
|
requires-python = ">=3.10"
|
|
@@ -11,6 +11,7 @@ dependencies = [
|
|
|
11
11
|
"more-itertools>=10.5.0",
|
|
12
12
|
"openfoodfacts>=2.9.0",
|
|
13
13
|
"typer>=0.15.1",
|
|
14
|
+
"google-cloud-batch==0.18.0",
|
|
14
15
|
]
|
|
15
16
|
|
|
16
17
|
[project.scripts]
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import datetime
|
|
2
|
+
|
|
3
|
+
import typer
|
|
4
|
+
from google.cloud import batch_v1
|
|
5
|
+
|
|
6
|
+
app = typer.Typer()
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@app.command()
|
|
10
|
+
def train_object_detection(
|
|
11
|
+
wandb_project: str = typer.Option(
|
|
12
|
+
"train-yolo", help="The Weights & Biases project name."
|
|
13
|
+
),
|
|
14
|
+
wandb_api_key: str = typer.Option(..., envvar="WANDB_API_KEY"),
|
|
15
|
+
hf_token: str = typer.Option(
|
|
16
|
+
...,
|
|
17
|
+
help="The Hugging Face token, used to push the trained model to Hugging Face Hub.",
|
|
18
|
+
),
|
|
19
|
+
run_name: str = typer.Option(..., help="A name for the training run."),
|
|
20
|
+
hf_repo_id: str = typer.Option(
|
|
21
|
+
..., help="The Hugging Face dataset repository ID to use to train."
|
|
22
|
+
),
|
|
23
|
+
hf_trained_model_repo_id: str = typer.Option(
|
|
24
|
+
..., help="The Hugging Face repository ID where to push the trained model."
|
|
25
|
+
),
|
|
26
|
+
epochs: int = typer.Option(100, help="Number of training epochs."),
|
|
27
|
+
imgsz: int = typer.Option(640, help="Size of the image during training."),
|
|
28
|
+
batch_size: int = typer.Option(64, help="Batch size for training."),
|
|
29
|
+
):
|
|
30
|
+
"""Train an object detection model."""
|
|
31
|
+
env_variables = {
|
|
32
|
+
"HF_REPO_ID": hf_repo_id,
|
|
33
|
+
"HF_TRAINED_MODEL_REPO_ID": hf_trained_model_repo_id,
|
|
34
|
+
"HF_TOKEN": hf_token,
|
|
35
|
+
"WANDB_PROJECT": wandb_project,
|
|
36
|
+
"RUN_NAME": run_name,
|
|
37
|
+
"WANDB_API_KEY": wandb_api_key,
|
|
38
|
+
"EPOCHS": str(epochs),
|
|
39
|
+
"IMGSZ": str(imgsz),
|
|
40
|
+
"BATCH_SIZE": str(batch_size),
|
|
41
|
+
"USE_AWS_IMAGE_CACHE": "False",
|
|
42
|
+
}
|
|
43
|
+
job_name = "train-yolo-job"
|
|
44
|
+
job_name = job_name + "-" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
|
45
|
+
job = launch_job(
|
|
46
|
+
job_name=job_name,
|
|
47
|
+
container_image_uri="europe-west9-docker.pkg.dev/robotoff/gcf-artifacts/train-yolo",
|
|
48
|
+
env_variables=env_variables,
|
|
49
|
+
)
|
|
50
|
+
typer.echo("Job launched")
|
|
51
|
+
typer.echo(job)
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def launch_job(
|
|
55
|
+
job_name: str = typer.Argument(
|
|
56
|
+
...,
|
|
57
|
+
help="The name of the Google Batch job that will be created. "
|
|
58
|
+
"It needs to be unique for each project and region pair.",
|
|
59
|
+
),
|
|
60
|
+
container_image_uri: str = typer.Argument(
|
|
61
|
+
..., help="The URI of the container image that will be run as part of the job."
|
|
62
|
+
),
|
|
63
|
+
commands: str | None = None,
|
|
64
|
+
env_variables: dict[str, str] | None = None,
|
|
65
|
+
entrypoint: str | None = None,
|
|
66
|
+
cpu_milli: int = 4000, # in milli-CPU units (4000 = 4 CPUs). This means the task requires 4 whole CPUs.
|
|
67
|
+
memory_mib: int = 16000, # Make sure to have enough memory for the 2GB of shared memory set below.
|
|
68
|
+
boot_disk_mib: int = 100000,
|
|
69
|
+
max_retry_count: int = 1,
|
|
70
|
+
max_run_duration: str = "86400s", # 24 hours
|
|
71
|
+
task_count: int = 1,
|
|
72
|
+
accelerators_type: str = "nvidia-tesla-t4",
|
|
73
|
+
machine_type: str = "n1-standard-8",
|
|
74
|
+
google_project_id: str = "robotoff",
|
|
75
|
+
accelerators_count: int = 1,
|
|
76
|
+
region: str = "europe-west4",
|
|
77
|
+
install_gpu_drivers: bool = True,
|
|
78
|
+
) -> batch_v1.Job:
|
|
79
|
+
"""This method creates a Batch Job on GCP.
|
|
80
|
+
|
|
81
|
+
Sources:
|
|
82
|
+
* https://github.com/GoogleCloudPlatform/python-docs-samples/tree/main/batch/create
|
|
83
|
+
* https://cloud.google.com/python/docs/reference/batch/latest/google.cloud.batch_v1.types # noqa
|
|
84
|
+
|
|
85
|
+
:param google_batch_launch_config: Config to run a job on Google Batch.
|
|
86
|
+
:param batch_job_config: Config to run a specific job on Google Batch.
|
|
87
|
+
:return: Batch job information.
|
|
88
|
+
|
|
89
|
+
Returns:
|
|
90
|
+
Batch job information.
|
|
91
|
+
"""
|
|
92
|
+
client = batch_v1.BatchServiceClient()
|
|
93
|
+
|
|
94
|
+
# Define what will be done as part of the job.
|
|
95
|
+
runnable = batch_v1.Runnable()
|
|
96
|
+
runnable.container = batch_v1.Runnable.Container()
|
|
97
|
+
runnable.container.image_uri = container_image_uri
|
|
98
|
+
runnable.container.entrypoint = entrypoint # type: ignore
|
|
99
|
+
# By default, /dev/shm is 64MB which is not enough for Pytorch
|
|
100
|
+
runnable.container.options = "--shm-size=2048m"
|
|
101
|
+
runnable.container.commands = commands
|
|
102
|
+
|
|
103
|
+
# Jobs can be divided into tasks. In this case, we have only one task.
|
|
104
|
+
task = batch_v1.TaskSpec()
|
|
105
|
+
task.runnables = [runnable]
|
|
106
|
+
|
|
107
|
+
# Environment variables.
|
|
108
|
+
envable = batch_v1.Environment()
|
|
109
|
+
envable.variables = env_variables or {}
|
|
110
|
+
task.environment = envable
|
|
111
|
+
|
|
112
|
+
# We can specify what resources are requested by each task.
|
|
113
|
+
resources = batch_v1.ComputeResource()
|
|
114
|
+
resources.cpu_milli = cpu_milli
|
|
115
|
+
resources.memory_mib = memory_mib
|
|
116
|
+
resources.boot_disk_mib = boot_disk_mib # type: ignore
|
|
117
|
+
task.compute_resource = resources
|
|
118
|
+
|
|
119
|
+
task.max_retry_count = max_retry_count
|
|
120
|
+
task.max_run_duration = max_run_duration # type: ignore
|
|
121
|
+
|
|
122
|
+
# Tasks are grouped inside a job using TaskGroups.
|
|
123
|
+
group = batch_v1.TaskGroup()
|
|
124
|
+
group.task_count = task_count # type: ignore
|
|
125
|
+
group.task_spec = task
|
|
126
|
+
|
|
127
|
+
# Policies are used to define on what kind of virtual machines the tasks
|
|
128
|
+
# will run on.
|
|
129
|
+
policy = batch_v1.AllocationPolicy.InstancePolicy()
|
|
130
|
+
# See list of machine types here:
|
|
131
|
+
# https://docs.cloud.google.com/compute/docs/gpus#l4-gpus
|
|
132
|
+
policy.machine_type = machine_type
|
|
133
|
+
|
|
134
|
+
accelerator = batch_v1.AllocationPolicy.Accelerator()
|
|
135
|
+
accelerator.type_ = accelerators_type
|
|
136
|
+
accelerator.count = accelerators_count
|
|
137
|
+
|
|
138
|
+
policy.accelerators = [accelerator]
|
|
139
|
+
instances = batch_v1.AllocationPolicy.InstancePolicyOrTemplate()
|
|
140
|
+
instances.policy = policy
|
|
141
|
+
instances.install_gpu_drivers = install_gpu_drivers
|
|
142
|
+
allocation_policy = batch_v1.AllocationPolicy()
|
|
143
|
+
allocation_policy.instances = [instances]
|
|
144
|
+
|
|
145
|
+
job = batch_v1.Job()
|
|
146
|
+
job.task_groups = [group]
|
|
147
|
+
job.allocation_policy = allocation_policy
|
|
148
|
+
# We use Cloud Logging as it's an out of the box available option
|
|
149
|
+
job.logs_policy = batch_v1.LogsPolicy()
|
|
150
|
+
job.logs_policy.destination = batch_v1.LogsPolicy.Destination.CLOUD_LOGGING # type: ignore
|
|
151
|
+
|
|
152
|
+
create_request = batch_v1.CreateJobRequest()
|
|
153
|
+
create_request.job = job
|
|
154
|
+
create_request.job_id = job_name
|
|
155
|
+
# The job's parent is the region in which the job will run
|
|
156
|
+
create_request.parent = f"projects/{google_project_id}/locations/{region}"
|
|
157
|
+
|
|
158
|
+
return client.create_job(create_request)
|
|
@@ -212,15 +212,30 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
212
212
|
download_images: bool = True,
|
|
213
213
|
error_raise: bool = True,
|
|
214
214
|
use_aws_cache: bool = True,
|
|
215
|
+
revision: str = "main",
|
|
215
216
|
):
|
|
216
217
|
"""Export annotations from a Hugging Face dataset project to the
|
|
217
218
|
Ultralytics format.
|
|
218
219
|
|
|
219
220
|
The Label Studio project should be an object detection project with a
|
|
220
221
|
single rectanglelabels annotation result per task.
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
repo_id (str): Hugging Face repository ID to load the dataset from.
|
|
225
|
+
output_dir (Path): Path to the output directory.
|
|
226
|
+
download_images (bool): Whether to download images from URLs in the
|
|
227
|
+
dataset. If False, the dataset is expected to contain an `image`
|
|
228
|
+
field with the image data.
|
|
229
|
+
error_raise (bool): Whether to raise an error if an image fails to
|
|
230
|
+
download. If False, the image will be skipped. This option is only
|
|
231
|
+
used if `download_images` is True. Defaults to True.
|
|
232
|
+
use_aws_cache (bool): Whether to use the AWS image cache when
|
|
233
|
+
downloading images. This option is only used if `download_images`
|
|
234
|
+
is True. Defaults to True.
|
|
235
|
+
revision (str): The dataset revision to load. Defaults to 'main'.
|
|
221
236
|
"""
|
|
222
237
|
logger.info("Repo ID: %s", repo_id)
|
|
223
|
-
ds = datasets.load_dataset(repo_id)
|
|
238
|
+
ds = datasets.load_dataset(repo_id, revision=revision)
|
|
224
239
|
data_dir = output_dir / "data"
|
|
225
240
|
data_dir.mkdir(parents=True, exist_ok=True)
|
|
226
241
|
category_id_to_name = {}
|
|
@@ -233,9 +248,16 @@ def export_from_hf_to_ultralytics_object_detection(
|
|
|
233
248
|
|
|
234
249
|
for sample in tqdm.tqdm(ds[split], desc="samples"):
|
|
235
250
|
image_id = sample["image_id"]
|
|
236
|
-
image_url = sample["meta"]["image_url"]
|
|
237
251
|
|
|
238
252
|
if download_images:
|
|
253
|
+
if "meta" not in sample or "image_url" not in sample["meta"]:
|
|
254
|
+
raise ValueError(
|
|
255
|
+
"`meta.image_url` field not found in sample. "
|
|
256
|
+
"Make sure the dataset contains the `meta.image_url` "
|
|
257
|
+
"field, which should be the URL of the image, or set "
|
|
258
|
+
"`download_images` to False."
|
|
259
|
+
)
|
|
260
|
+
image_url = sample["meta"]["image_url"]
|
|
239
261
|
download_output = download_image(
|
|
240
262
|
image_url,
|
|
241
263
|
return_struct=True,
|
|
@@ -5,6 +5,7 @@ from openfoodfacts.utils import get_logger
|
|
|
5
5
|
|
|
6
6
|
from labelr.apps import datasets as dataset_app
|
|
7
7
|
from labelr.apps import projects as project_app
|
|
8
|
+
from labelr.apps import train as train_app
|
|
8
9
|
from labelr.apps import users as user_app
|
|
9
10
|
|
|
10
11
|
app = typer.Typer(pretty_exceptions_show_locals=False)
|
|
@@ -69,5 +70,11 @@ app.add_typer(
|
|
|
69
70
|
help="Manage datasets (convert, export, check, etc.)",
|
|
70
71
|
)
|
|
71
72
|
|
|
73
|
+
app.add_typer(
|
|
74
|
+
train_app.app,
|
|
75
|
+
name="train",
|
|
76
|
+
help="Train models",
|
|
77
|
+
)
|
|
78
|
+
|
|
72
79
|
if __name__ == "__main__":
|
|
73
80
|
app()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: labelr
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.5.0
|
|
4
4
|
Summary: A command-line tool to manage labeling tasks with Label Studio.
|
|
5
5
|
Requires-Python: >=3.10
|
|
6
6
|
Description-Content-Type: text/markdown
|
|
@@ -11,6 +11,7 @@ Requires-Dist: label-studio-sdk>=1.0.8
|
|
|
11
11
|
Requires-Dist: more-itertools>=10.5.0
|
|
12
12
|
Requires-Dist: openfoodfacts>=2.9.0
|
|
13
13
|
Requires-Dist: typer>=0.15.1
|
|
14
|
+
Requires-Dist: google-cloud-batch==0.18.0
|
|
14
15
|
Provides-Extra: ultralytics
|
|
15
16
|
Requires-Dist: ultralytics>=8.3.49; extra == "ultralytics"
|
|
16
17
|
Dynamic: license-file
|
|
@@ -138,3 +139,7 @@ labelr datasets export --project-id PROJECT_ID --from ls --to huggingface --repo
|
|
|
138
139
|
```
|
|
139
140
|
|
|
140
141
|
where `REPO_ID` is the ID of the Hugging Face repository where the dataset will be uploaded (ex: `openfoodfacts/food-detection`).
|
|
142
|
+
|
|
143
|
+
### Lauch training jobs
|
|
144
|
+
|
|
145
|
+
You can also launch training jobs for YOLO object detection models using datasets hosted on Hugging Face. Please refer to the [train-yolo package README](packages/train-yolo/README.md) for more details on how to use this feature.
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|