lightly-studio 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lightly-studio might be problematic. Click here for more details.
- lightly_studio/__init__.py +1 -1
- lightly_studio/api/app.py +8 -4
- lightly_studio/api/db_tables.py +0 -3
- lightly_studio/api/routes/api/annotation.py +26 -0
- lightly_studio/api/routes/api/annotations/__init__.py +7 -0
- lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
- lightly_studio/api/routes/api/caption.py +30 -0
- lightly_studio/api/routes/api/dataset.py +3 -5
- lightly_studio/api/routes/api/embeddings2d.py +136 -0
- lightly_studio/api/routes/api/export.py +73 -0
- lightly_studio/api/routes/api/metadata.py +57 -1
- lightly_studio/api/routes/api/selection.py +87 -0
- lightly_studio/core/add_samples.py +138 -9
- lightly_studio/core/dataset.py +174 -63
- lightly_studio/core/dataset_query/dataset_query.py +5 -0
- lightly_studio/dataset/env.py +4 -0
- lightly_studio/dataset/file_utils.py +13 -2
- lightly_studio/dataset/loader.py +2 -62
- lightly_studio/dataset/mobileclip_embedding_generator.py +3 -2
- lightly_studio/db_manager.py +10 -4
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.B3oFNb6O.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/2.CkOblLn7.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/Samples.CIbricz7.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.7Ma7YdVg.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{useFeatureFlags.CV-KWLNP.css → _layout.CefECEWA.css} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.2jKMtOWG.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/-DXuGN29.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Ccq4ZD0B.js → B7302SU7.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BeWf8-vJ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bqz7dyEC.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C1FmrZbK.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DRZO-E-T.js → CSCQddQS.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CZGpyrcA.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CfQ4mGwl.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CiaNZCBa.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cqo0Vpvt.js +417 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cy4fgWTG.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5w4xp5l.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DD63uD-T.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQ8aZ1o-.js +3 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Df3aMO5B.js → DSxvnAMh.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_JuJOO3.js +20 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_ynJAfY.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dafy4oEQ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BqBqV92V.js → Dj4O-5se.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DmjAI-UV.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dug7Bq1S.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dv5BSBQG.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DzBTnFhV.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DzX_yyqb.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Frwd2CjB.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H4l0JFh9.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H60ATh8g.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/qIv1kPyv.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/sLqs1uaK.js +20 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/u-it74zV.js +96 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BPc0HQPq.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.SNvc2nrm.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.5jT7P06o.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.Cdy-7S5q.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.C_uoESTX.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.DcO8wIAc.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.BIldfkxL.js +1012 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.w9g4AcAx.js → 3.BC9z_TWM.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.BBI8KwnD.js → 4.D8X_Ch5n.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.CAXhxJu6.js +39 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{6.CrbkRPam.js → 6.DRA5Ru_2.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.WVBsruHQ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BuKUrCEN.js +20 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CUIn1yCR.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
- lightly_studio/dist_lightly_studio_view_app/index.html +15 -14
- lightly_studio/examples/example.py +4 -0
- lightly_studio/examples/example_coco.py +4 -0
- lightly_studio/examples/example_coco_caption.py +24 -0
- lightly_studio/examples/example_metadata.py +4 -1
- lightly_studio/examples/example_selection.py +4 -0
- lightly_studio/examples/example_split_work.py +4 -0
- lightly_studio/examples/example_yolo.py +4 -0
- lightly_studio/export/export_dataset.py +73 -0
- lightly_studio/export/lightly_studio_label_input.py +120 -0
- lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
- lightly_studio/metadata/compute_typicality.py +67 -0
- lightly_studio/models/annotation/annotation_base.py +11 -12
- lightly_studio/models/caption.py +73 -0
- lightly_studio/models/dataset.py +1 -2
- lightly_studio/models/metadata.py +1 -1
- lightly_studio/models/sample.py +2 -2
- lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
- lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
- lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
- lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
- lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
- lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
- lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
- lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
- lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
- lightly_studio/resolvers/caption_resolver.py +80 -0
- lightly_studio/resolvers/dataset_resolver.py +4 -7
- lightly_studio/resolvers/metadata_resolver/__init__.py +2 -2
- lightly_studio/resolvers/metadata_resolver/sample/__init__.py +3 -3
- lightly_studio/resolvers/metadata_resolver/sample/bulk_update_metadata.py +46 -0
- lightly_studio/resolvers/samples_filter.py +18 -10
- lightly_studio/selection/mundig.py +7 -10
- lightly_studio/selection/selection_config.py +4 -1
- lightly_studio/services/annotations_service/__init__.py +8 -0
- lightly_studio/services/annotations_service/create_annotation.py +63 -0
- lightly_studio/services/annotations_service/delete_annotation.py +22 -0
- lightly_studio/type_definitions.py +2 -0
- {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/METADATA +231 -41
- {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/RECORD +114 -104
- lightly_studio/api/routes/api/annotation_task.py +0 -37
- lightly_studio/api/routes/api/metrics.py +0 -76
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +0 -3
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/7YNGEs1C.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +0 -92
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C98Hk3r5.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CG0dMCJi.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Crk-jcvV.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cur71c3O.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcGCxgpH.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DkR_EZ_B.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H7C68rOM.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +0 -4
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/OH7-C_mc.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/gLNdjSzu.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +0 -2
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1._I9GR805.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +0 -6
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CajIG5ce.js +0 -1
- lightly_studio/metrics/__init__.py +0 -0
- lightly_studio/metrics/detection/__init__.py +0 -0
- lightly_studio/metrics/detection/map.py +0 -268
- lightly_studio/models/annotation_task.py +0 -28
- lightly_studio/resolvers/annotation_resolver/create.py +0 -19
- lightly_studio/resolvers/annotation_task_resolver.py +0 -31
- lightly_studio/resolvers/metadata_resolver/sample/bulk_set_metadata.py +0 -48
- {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/WHEEL +0 -0
|
@@ -2,6 +2,8 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
import json
|
|
6
|
+
from collections import defaultdict
|
|
5
7
|
from dataclasses import dataclass, field
|
|
6
8
|
from pathlib import Path
|
|
7
9
|
from typing import Iterable
|
|
@@ -26,10 +28,12 @@ from tqdm import tqdm
|
|
|
26
28
|
|
|
27
29
|
from lightly_studio.models.annotation.annotation_base import AnnotationCreate
|
|
28
30
|
from lightly_studio.models.annotation_label import AnnotationLabelCreate
|
|
31
|
+
from lightly_studio.models.caption import CaptionCreate
|
|
29
32
|
from lightly_studio.models.sample import SampleCreate, SampleTable
|
|
30
33
|
from lightly_studio.resolvers import (
|
|
31
34
|
annotation_label_resolver,
|
|
32
35
|
annotation_resolver,
|
|
36
|
+
caption_resolver,
|
|
33
37
|
sample_resolver,
|
|
34
38
|
)
|
|
35
39
|
|
|
@@ -46,7 +50,6 @@ class _AnnotationProcessingContext:
|
|
|
46
50
|
dataset_id: UUID
|
|
47
51
|
sample_id: UUID
|
|
48
52
|
label_map: dict[int, UUID]
|
|
49
|
-
annotation_task_id: UUID
|
|
50
53
|
|
|
51
54
|
|
|
52
55
|
@dataclass
|
|
@@ -137,7 +140,6 @@ def load_into_dataset_from_labelformat(
|
|
|
137
140
|
dataset_id: UUID,
|
|
138
141
|
input_labels: ObjectDetectionInput | InstanceSegmentationInput,
|
|
139
142
|
images_path: Path,
|
|
140
|
-
annotation_task_id: UUID,
|
|
141
143
|
) -> list[UUID]:
|
|
142
144
|
"""Load samples and their annotations from a labelformat input into the dataset.
|
|
143
145
|
|
|
@@ -146,7 +148,6 @@ def load_into_dataset_from_labelformat(
|
|
|
146
148
|
dataset_id: The ID of the dataset to load samples into.
|
|
147
149
|
input_labels: The labelformat input containing images and annotations.
|
|
148
150
|
images_path: The path to the directory containing the images.
|
|
149
|
-
annotation_task_id: The ID of the annotation task to associate with the annotations.
|
|
150
151
|
|
|
151
152
|
Returns:
|
|
152
153
|
A list of UUIDs of the created samples.
|
|
@@ -192,7 +193,6 @@ def load_into_dataset_from_labelformat(
|
|
|
192
193
|
image_path_to_anno_data=image_path_to_anno_data,
|
|
193
194
|
dataset_id=dataset_id,
|
|
194
195
|
label_map=label_map,
|
|
195
|
-
annotation_task_id=annotation_task_id,
|
|
196
196
|
annotations_to_create=annotations_to_create,
|
|
197
197
|
)
|
|
198
198
|
samples_to_create.clear()
|
|
@@ -210,7 +210,6 @@ def load_into_dataset_from_labelformat(
|
|
|
210
210
|
image_path_to_anno_data=image_path_to_anno_data,
|
|
211
211
|
dataset_id=dataset_id,
|
|
212
212
|
label_map=label_map,
|
|
213
|
-
annotation_task_id=annotation_task_id,
|
|
214
213
|
annotations_to_create=annotations_to_create,
|
|
215
214
|
)
|
|
216
215
|
|
|
@@ -223,6 +222,111 @@ def load_into_dataset_from_labelformat(
|
|
|
223
222
|
return created_sample_ids
|
|
224
223
|
|
|
225
224
|
|
|
225
|
+
def load_into_dataset_from_coco_captions(
|
|
226
|
+
session: Session,
|
|
227
|
+
dataset_id: UUID,
|
|
228
|
+
annotations_json: Path,
|
|
229
|
+
images_path: Path,
|
|
230
|
+
) -> list[UUID]:
|
|
231
|
+
"""Load samples and captions from a COCO captions file into the dataset.
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
session: Database session used for resolver operations.
|
|
235
|
+
dataset_id: Identifier of the dataset that receives the samples.
|
|
236
|
+
annotations_json: Path to the COCO captions annotations file.
|
|
237
|
+
images_path: Directory containing the referenced images.
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
The list of newly created sample identifiers.
|
|
241
|
+
"""
|
|
242
|
+
with fsspec.open(str(annotations_json), "r") as file:
|
|
243
|
+
coco_payload = json.load(file)
|
|
244
|
+
|
|
245
|
+
images: list[dict[str, object]] = coco_payload.get("images", [])
|
|
246
|
+
annotations: list[dict[str, object]] = coco_payload.get("annotations", [])
|
|
247
|
+
|
|
248
|
+
captions_by_image_id: dict[int, list[str]] = defaultdict(list)
|
|
249
|
+
for annotation in annotations:
|
|
250
|
+
image_id = annotation["image_id"]
|
|
251
|
+
caption = annotation["caption"]
|
|
252
|
+
if not isinstance(image_id, int):
|
|
253
|
+
continue
|
|
254
|
+
if not isinstance(caption, str):
|
|
255
|
+
continue
|
|
256
|
+
caption_text = caption.strip()
|
|
257
|
+
if not caption_text:
|
|
258
|
+
continue
|
|
259
|
+
captions_by_image_id[image_id].append(caption_text)
|
|
260
|
+
|
|
261
|
+
logging_context = _LoadingLoggingContext(
|
|
262
|
+
n_samples_to_be_inserted=len(images),
|
|
263
|
+
n_samples_before_loading=sample_resolver.count_by_dataset_id(
|
|
264
|
+
session=session, dataset_id=dataset_id
|
|
265
|
+
),
|
|
266
|
+
)
|
|
267
|
+
|
|
268
|
+
captions_to_create: list[CaptionCreate] = []
|
|
269
|
+
samples_to_create: list[SampleCreate] = []
|
|
270
|
+
created_sample_ids: list[UUID] = []
|
|
271
|
+
image_path_to_captions: dict[str, list[str]] = {}
|
|
272
|
+
|
|
273
|
+
for image_info in tqdm(images, desc="Processing images", unit=" images"):
|
|
274
|
+
if isinstance(image_info["id"], int):
|
|
275
|
+
image_id_raw = image_info["id"]
|
|
276
|
+
else:
|
|
277
|
+
continue
|
|
278
|
+
file_name_raw = str(image_info["file_name"])
|
|
279
|
+
|
|
280
|
+
width = image_info["width"] if isinstance(image_info["width"], int) else 0
|
|
281
|
+
height = image_info["height"] if isinstance(image_info["height"], int) else 0
|
|
282
|
+
sample = SampleCreate(
|
|
283
|
+
file_name=file_name_raw,
|
|
284
|
+
file_path_abs=str(images_path / file_name_raw),
|
|
285
|
+
width=width,
|
|
286
|
+
height=height,
|
|
287
|
+
dataset_id=dataset_id,
|
|
288
|
+
)
|
|
289
|
+
samples_to_create.append(sample)
|
|
290
|
+
image_path_to_captions[sample.file_path_abs] = captions_by_image_id.get(image_id_raw, [])
|
|
291
|
+
|
|
292
|
+
if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
|
|
293
|
+
created_samples_batch, paths_not_inserted = _create_batch_samples(
|
|
294
|
+
session=session, samples=samples_to_create
|
|
295
|
+
)
|
|
296
|
+
created_sample_ids.extend(s.sample_id for s in created_samples_batch)
|
|
297
|
+
logging_context.update_example_paths(paths_not_inserted)
|
|
298
|
+
_process_batch_captions(
|
|
299
|
+
session=session,
|
|
300
|
+
dataset_id=dataset_id,
|
|
301
|
+
stored_samples=created_samples_batch,
|
|
302
|
+
image_path_to_captions=image_path_to_captions,
|
|
303
|
+
captions_to_create=captions_to_create,
|
|
304
|
+
)
|
|
305
|
+
samples_to_create.clear()
|
|
306
|
+
image_path_to_captions.clear()
|
|
307
|
+
|
|
308
|
+
if samples_to_create:
|
|
309
|
+
created_samples_batch, paths_not_inserted = _create_batch_samples(
|
|
310
|
+
session=session, samples=samples_to_create
|
|
311
|
+
)
|
|
312
|
+
created_sample_ids.extend(s.sample_id for s in created_samples_batch)
|
|
313
|
+
logging_context.update_example_paths(paths_not_inserted)
|
|
314
|
+
_process_batch_captions(
|
|
315
|
+
session=session,
|
|
316
|
+
dataset_id=dataset_id,
|
|
317
|
+
stored_samples=created_samples_batch,
|
|
318
|
+
image_path_to_captions=image_path_to_captions,
|
|
319
|
+
captions_to_create=captions_to_create,
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
if captions_to_create:
|
|
323
|
+
caption_resolver.create_many(session=session, captions=captions_to_create)
|
|
324
|
+
|
|
325
|
+
_log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
|
|
326
|
+
|
|
327
|
+
return created_sample_ids
|
|
328
|
+
|
|
329
|
+
|
|
226
330
|
def _log_loading_results(
|
|
227
331
|
session: Session, dataset_id: UUID, logging_context: _LoadingLoggingContext
|
|
228
332
|
) -> None:
|
|
@@ -304,7 +408,6 @@ def _process_object_detection_annotations(
|
|
|
304
408
|
width=int(width),
|
|
305
409
|
height=int(height),
|
|
306
410
|
confidence=obj.confidence,
|
|
307
|
-
annotation_task_id=context.annotation_task_id,
|
|
308
411
|
)
|
|
309
412
|
)
|
|
310
413
|
return new_annotations
|
|
@@ -339,7 +442,6 @@ def _process_instance_segmentation_annotations(
|
|
|
339
442
|
width=int(width),
|
|
340
443
|
height=int(height),
|
|
341
444
|
segmentation_mask=segmentation_rle,
|
|
342
|
-
annotation_task_id=context.annotation_task_id,
|
|
343
445
|
)
|
|
344
446
|
)
|
|
345
447
|
return new_annotations
|
|
@@ -351,7 +453,6 @@ def _process_batch_annotations( # noqa: PLR0913
|
|
|
351
453
|
image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection],
|
|
352
454
|
dataset_id: UUID,
|
|
353
455
|
label_map: dict[int, UUID],
|
|
354
|
-
annotation_task_id: UUID,
|
|
355
456
|
annotations_to_create: list[AnnotationCreate],
|
|
356
457
|
) -> None:
|
|
357
458
|
"""Process annotations for a batch of samples."""
|
|
@@ -362,7 +463,6 @@ def _process_batch_annotations( # noqa: PLR0913
|
|
|
362
463
|
dataset_id=dataset_id,
|
|
363
464
|
sample_id=stored_sample.sample_id,
|
|
364
465
|
label_map=label_map,
|
|
365
|
-
annotation_task_id=annotation_task_id,
|
|
366
466
|
)
|
|
367
467
|
|
|
368
468
|
if isinstance(anno_data, ImageInstanceSegmentation):
|
|
@@ -381,3 +481,32 @@ def _process_batch_annotations( # noqa: PLR0913
|
|
|
381
481
|
if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
|
|
382
482
|
annotation_resolver.create_many(session=session, annotations=annotations_to_create)
|
|
383
483
|
annotations_to_create.clear()
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
def _process_batch_captions(
|
|
487
|
+
session: Session,
|
|
488
|
+
dataset_id: UUID,
|
|
489
|
+
stored_samples: list[SampleTable],
|
|
490
|
+
image_path_to_captions: dict[str, list[str]],
|
|
491
|
+
captions_to_create: list[CaptionCreate],
|
|
492
|
+
) -> None:
|
|
493
|
+
"""Process captions for a batch of samples."""
|
|
494
|
+
if not stored_samples:
|
|
495
|
+
return
|
|
496
|
+
|
|
497
|
+
for stored_sample in stored_samples:
|
|
498
|
+
captions = image_path_to_captions[stored_sample.file_path_abs]
|
|
499
|
+
if not captions:
|
|
500
|
+
continue
|
|
501
|
+
|
|
502
|
+
for caption_text in captions:
|
|
503
|
+
caption = CaptionCreate(
|
|
504
|
+
dataset_id=dataset_id,
|
|
505
|
+
sample_id=stored_sample.sample_id,
|
|
506
|
+
text=caption_text,
|
|
507
|
+
)
|
|
508
|
+
captions_to_create.append(caption)
|
|
509
|
+
|
|
510
|
+
if len(captions_to_create) >= ANNOTATION_BATCH_SIZE:
|
|
511
|
+
caption_resolver.create_many(session=session, captions=captions_to_create)
|
|
512
|
+
captions_to_create.clear()
|
lightly_studio/core/dataset.py
CHANGED
|
@@ -6,6 +6,7 @@ from pathlib import Path
|
|
|
6
6
|
from typing import Iterable, Iterator
|
|
7
7
|
from uuid import UUID
|
|
8
8
|
|
|
9
|
+
import yaml
|
|
9
10
|
from labelformat.formats import (
|
|
10
11
|
COCOInstanceSegmentationInput,
|
|
11
12
|
COCOObjectDetectionInput,
|
|
@@ -28,21 +29,23 @@ from lightly_studio.core.dataset_query.order_by import OrderByExpression
|
|
|
28
29
|
from lightly_studio.core.sample import Sample
|
|
29
30
|
from lightly_studio.dataset import fsspec_lister
|
|
30
31
|
from lightly_studio.dataset.embedding_manager import EmbeddingManagerProvider
|
|
31
|
-
from lightly_studio.
|
|
32
|
-
|
|
32
|
+
from lightly_studio.metadata import compute_typicality
|
|
33
|
+
from lightly_studio.models.annotation.annotation_base import (
|
|
33
34
|
AnnotationType,
|
|
34
35
|
)
|
|
35
36
|
from lightly_studio.models.dataset import DatasetCreate, DatasetTable
|
|
36
37
|
from lightly_studio.models.sample import SampleTable
|
|
37
38
|
from lightly_studio.resolvers import (
|
|
38
|
-
annotation_task_resolver,
|
|
39
39
|
dataset_resolver,
|
|
40
|
+
embedding_model_resolver,
|
|
40
41
|
sample_resolver,
|
|
42
|
+
tag_resolver,
|
|
41
43
|
)
|
|
42
44
|
from lightly_studio.type_definitions import PathLike
|
|
43
45
|
|
|
44
46
|
# Constants
|
|
45
47
|
DEFAULT_DATASET_NAME = "default_dataset"
|
|
48
|
+
ALLOWED_YOLO_SPLITS = {"train", "val", "test", "minival"}
|
|
46
49
|
|
|
47
50
|
_SliceType = slice # to avoid shadowing built-in slice in type annotations
|
|
48
51
|
|
|
@@ -68,7 +71,7 @@ class Dataset:
|
|
|
68
71
|
|
|
69
72
|
dataset = dataset_resolver.create(
|
|
70
73
|
session=db_manager.persistent_session(),
|
|
71
|
-
dataset=DatasetCreate(name=name
|
|
74
|
+
dataset=DatasetCreate(name=name),
|
|
72
75
|
)
|
|
73
76
|
return Dataset(dataset=dataset)
|
|
74
77
|
|
|
@@ -234,8 +237,6 @@ class Dataset:
|
|
|
234
237
|
self,
|
|
235
238
|
input_labels: ObjectDetectionInput | InstanceSegmentationInput,
|
|
236
239
|
images_path: PathLike,
|
|
237
|
-
is_prediction: bool = True,
|
|
238
|
-
task_name: str | None = None,
|
|
239
240
|
embed: bool = True,
|
|
240
241
|
) -> None:
|
|
241
242
|
"""Load a dataset from a labelformat object and store in database.
|
|
@@ -243,40 +244,17 @@ class Dataset:
|
|
|
243
244
|
Args:
|
|
244
245
|
input_labels: The labelformat input object.
|
|
245
246
|
images_path: Path to the folder containing the images.
|
|
246
|
-
is_prediction: Whether the task is for prediction or labels.
|
|
247
|
-
task_name: Optional name for the annotation task. If None, a
|
|
248
|
-
default name is generated.
|
|
249
247
|
embed: If True, generate embeddings for the newly added samples.
|
|
250
248
|
"""
|
|
251
249
|
if isinstance(images_path, str):
|
|
252
250
|
images_path = Path(images_path)
|
|
253
251
|
images_path = images_path.absolute()
|
|
254
252
|
|
|
255
|
-
# Determine annotation type based on input.
|
|
256
|
-
# Currently, we always create BBOX tasks, even for segmentation,
|
|
257
|
-
# as segmentation data is stored alongside bounding boxes.
|
|
258
|
-
annotation_type = AnnotationType.BBOX
|
|
259
|
-
|
|
260
|
-
# Generate a default task name if none is provided.
|
|
261
|
-
if task_name is None:
|
|
262
|
-
task_name = f"Loaded from labelformat: {self.name}"
|
|
263
|
-
|
|
264
|
-
# Create annotation task.
|
|
265
|
-
new_annotation_task = annotation_task_resolver.create(
|
|
266
|
-
session=self.session,
|
|
267
|
-
annotation_task=AnnotationTaskTable(
|
|
268
|
-
name=task_name,
|
|
269
|
-
annotation_type=annotation_type,
|
|
270
|
-
is_prediction=is_prediction,
|
|
271
|
-
),
|
|
272
|
-
)
|
|
273
|
-
|
|
274
253
|
created_sample_ids = add_samples.load_into_dataset_from_labelformat(
|
|
275
254
|
session=self.session,
|
|
276
255
|
dataset_id=self.dataset_id,
|
|
277
256
|
input_labels=input_labels,
|
|
278
257
|
images_path=images_path,
|
|
279
|
-
annotation_task_id=new_annotation_task.annotation_task_id,
|
|
280
258
|
)
|
|
281
259
|
|
|
282
260
|
if embed:
|
|
@@ -287,17 +265,15 @@ class Dataset:
|
|
|
287
265
|
def add_samples_from_yolo(
|
|
288
266
|
self,
|
|
289
267
|
data_yaml: PathLike,
|
|
290
|
-
input_split: str =
|
|
291
|
-
task_name: str | None = None,
|
|
268
|
+
input_split: str | None = None,
|
|
292
269
|
embed: bool = True,
|
|
293
270
|
) -> None:
|
|
294
271
|
"""Load a dataset in YOLO format and store in DB.
|
|
295
272
|
|
|
296
273
|
Args:
|
|
297
274
|
data_yaml: Path to the YOLO data.yaml file.
|
|
298
|
-
input_split: The split to load (e.g., 'train', 'val').
|
|
299
|
-
|
|
300
|
-
default name is generated.
|
|
275
|
+
input_split: The split to load (e.g., 'train', 'val', 'test').
|
|
276
|
+
If None, all available splits will be loaded and assigned a corresponding tag.
|
|
301
277
|
embed: If True, generate embeddings for the newly added samples.
|
|
302
278
|
"""
|
|
303
279
|
if isinstance(data_yaml, str):
|
|
@@ -307,30 +283,54 @@ class Dataset:
|
|
|
307
283
|
if not data_yaml.is_file() or data_yaml.suffix != ".yaml":
|
|
308
284
|
raise FileNotFoundError(f"YOLO data yaml file not found: '{data_yaml}'")
|
|
309
285
|
|
|
310
|
-
|
|
311
|
-
|
|
286
|
+
# Determine which splits to process
|
|
287
|
+
splits_to_process = _resolve_yolo_splits(data_yaml=data_yaml, input_split=input_split)
|
|
312
288
|
|
|
313
|
-
|
|
314
|
-
label_input = YOLOv8ObjectDetectionInput(
|
|
315
|
-
input_file=data_yaml,
|
|
316
|
-
input_split=input_split,
|
|
317
|
-
)
|
|
318
|
-
images_path = label_input._images_dir() # noqa: SLF001
|
|
289
|
+
all_created_sample_ids = []
|
|
319
290
|
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
291
|
+
# Process each split
|
|
292
|
+
for split in splits_to_process:
|
|
293
|
+
# Load the dataset using labelformat.
|
|
294
|
+
label_input = YOLOv8ObjectDetectionInput(
|
|
295
|
+
input_file=data_yaml,
|
|
296
|
+
input_split=split,
|
|
297
|
+
)
|
|
298
|
+
images_path = label_input._images_dir() # noqa: SLF001
|
|
299
|
+
|
|
300
|
+
created_sample_ids = add_samples.load_into_dataset_from_labelformat(
|
|
301
|
+
session=self.session,
|
|
302
|
+
dataset_id=self.dataset_id,
|
|
303
|
+
input_labels=label_input,
|
|
304
|
+
images_path=images_path,
|
|
305
|
+
)
|
|
306
|
+
|
|
307
|
+
# Tag samples with split name
|
|
308
|
+
if created_sample_ids:
|
|
309
|
+
tag = tag_resolver.get_or_create_sample_tag_by_name(
|
|
310
|
+
session=self.session,
|
|
311
|
+
dataset_id=self.dataset_id,
|
|
312
|
+
tag_name=split,
|
|
313
|
+
)
|
|
314
|
+
tag_resolver.add_sample_ids_to_tag_id(
|
|
315
|
+
session=self.session,
|
|
316
|
+
tag_id=tag.tag_id,
|
|
317
|
+
sample_ids=created_sample_ids,
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
all_created_sample_ids.extend(created_sample_ids)
|
|
321
|
+
|
|
322
|
+
# Generate embeddings for all samples at once
|
|
323
|
+
if embed:
|
|
324
|
+
_generate_embeddings(
|
|
325
|
+
session=self.session, dataset_id=self.dataset_id, sample_ids=all_created_sample_ids
|
|
326
|
+
)
|
|
327
327
|
|
|
328
328
|
def add_samples_from_coco(
|
|
329
329
|
self,
|
|
330
330
|
annotations_json: PathLike,
|
|
331
331
|
images_path: PathLike,
|
|
332
|
-
|
|
333
|
-
|
|
332
|
+
annotation_type: AnnotationType = AnnotationType.OBJECT_DETECTION,
|
|
333
|
+
split: str | None = None,
|
|
334
334
|
embed: bool = True,
|
|
335
335
|
) -> None:
|
|
336
336
|
"""Load a dataset in COCO Object Detection format and store in DB.
|
|
@@ -338,10 +338,10 @@ class Dataset:
|
|
|
338
338
|
Args:
|
|
339
339
|
annotations_json: Path to the COCO annotations JSON file.
|
|
340
340
|
images_path: Path to the folder containing the images.
|
|
341
|
-
task_name: Optional name for the annotation task. If None, a
|
|
342
|
-
default name is generated.
|
|
343
341
|
annotation_type: The type of annotation to be loaded (e.g., 'ObjectDetection',
|
|
344
342
|
'InstanceSegmentation').
|
|
343
|
+
split: Optional split name to tag samples (e.g., 'train', 'val').
|
|
344
|
+
If provided, all samples will be tagged with this name.
|
|
345
345
|
embed: If True, generate embeddings for the newly added samples.
|
|
346
346
|
"""
|
|
347
347
|
if isinstance(annotations_json, str):
|
|
@@ -353,30 +353,121 @@ class Dataset:
|
|
|
353
353
|
|
|
354
354
|
label_input: COCOObjectDetectionInput | COCOInstanceSegmentationInput
|
|
355
355
|
|
|
356
|
-
if annotation_type == AnnotationType.
|
|
356
|
+
if annotation_type == AnnotationType.OBJECT_DETECTION:
|
|
357
357
|
label_input = COCOObjectDetectionInput(
|
|
358
358
|
input_file=annotations_json,
|
|
359
359
|
)
|
|
360
|
-
task_name_default = f"Loaded from COCO Object Detection: {annotations_json.name}"
|
|
361
360
|
elif annotation_type == AnnotationType.INSTANCE_SEGMENTATION:
|
|
362
361
|
label_input = COCOInstanceSegmentationInput(
|
|
363
362
|
input_file=annotations_json,
|
|
364
363
|
)
|
|
365
|
-
task_name_default = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
|
|
366
364
|
else:
|
|
367
365
|
raise ValueError(f"Invalid annotation type: {annotation_type}")
|
|
368
366
|
|
|
369
|
-
if task_name is None:
|
|
370
|
-
task_name = task_name_default
|
|
371
|
-
|
|
372
367
|
images_path = Path(images_path).absolute()
|
|
373
368
|
|
|
374
|
-
|
|
369
|
+
created_sample_ids = add_samples.load_into_dataset_from_labelformat(
|
|
370
|
+
session=self.session,
|
|
371
|
+
dataset_id=self.dataset_id,
|
|
375
372
|
input_labels=label_input,
|
|
376
373
|
images_path=images_path,
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
# Tag samples with split name if provided
|
|
377
|
+
if split is not None and created_sample_ids:
|
|
378
|
+
tag = tag_resolver.get_or_create_sample_tag_by_name(
|
|
379
|
+
session=self.session,
|
|
380
|
+
dataset_id=self.dataset_id,
|
|
381
|
+
tag_name=split,
|
|
382
|
+
)
|
|
383
|
+
tag_resolver.add_sample_ids_to_tag_id(
|
|
384
|
+
session=self.session,
|
|
385
|
+
tag_id=tag.tag_id,
|
|
386
|
+
sample_ids=created_sample_ids,
|
|
387
|
+
)
|
|
388
|
+
|
|
389
|
+
if embed:
|
|
390
|
+
_generate_embeddings(
|
|
391
|
+
session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
|
|
392
|
+
)
|
|
393
|
+
|
|
394
|
+
def add_samples_from_coco_caption(
|
|
395
|
+
self,
|
|
396
|
+
annotations_json: PathLike,
|
|
397
|
+
images_path: PathLike,
|
|
398
|
+
split: str | None = None,
|
|
399
|
+
embed: bool = True,
|
|
400
|
+
) -> None:
|
|
401
|
+
"""Load a dataset in COCO caption format and store in DB.
|
|
402
|
+
|
|
403
|
+
Args:
|
|
404
|
+
annotations_json: Path to the COCO caption JSON file.
|
|
405
|
+
images_path: Path to the folder containing the images.
|
|
406
|
+
split: Optional split name to tag samples (e.g., 'train', 'val').
|
|
407
|
+
If provided, all samples will be tagged with this name.
|
|
408
|
+
embed: If True, generate embeddings for the newly added samples.
|
|
409
|
+
"""
|
|
410
|
+
if isinstance(annotations_json, str):
|
|
411
|
+
annotations_json = Path(annotations_json)
|
|
412
|
+
annotations_json = annotations_json.absolute()
|
|
413
|
+
|
|
414
|
+
if not annotations_json.is_file() or annotations_json.suffix != ".json":
|
|
415
|
+
raise FileNotFoundError(f"COCO caption json file not found: '{annotations_json}'")
|
|
416
|
+
|
|
417
|
+
if isinstance(images_path, str):
|
|
418
|
+
images_path = Path(images_path)
|
|
419
|
+
images_path = images_path.absolute()
|
|
420
|
+
|
|
421
|
+
created_sample_ids = add_samples.load_into_dataset_from_coco_captions(
|
|
422
|
+
session=self.session,
|
|
423
|
+
dataset_id=self.dataset_id,
|
|
424
|
+
annotations_json=annotations_json,
|
|
425
|
+
images_path=images_path,
|
|
426
|
+
)
|
|
427
|
+
|
|
428
|
+
# Tag samples with split name if provided
|
|
429
|
+
if split is not None and created_sample_ids:
|
|
430
|
+
tag = tag_resolver.get_or_create_sample_tag_by_name(
|
|
431
|
+
session=self.session,
|
|
432
|
+
dataset_id=self.dataset_id,
|
|
433
|
+
tag_name=split,
|
|
434
|
+
)
|
|
435
|
+
tag_resolver.add_sample_ids_to_tag_id(
|
|
436
|
+
session=self.session,
|
|
437
|
+
tag_id=tag.tag_id,
|
|
438
|
+
sample_ids=created_sample_ids,
|
|
439
|
+
)
|
|
440
|
+
|
|
441
|
+
if embed:
|
|
442
|
+
_generate_embeddings(
|
|
443
|
+
session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
|
|
444
|
+
)
|
|
445
|
+
|
|
446
|
+
def compute_typicality_metadata(
|
|
447
|
+
self,
|
|
448
|
+
embedding_model_name: str | None = None,
|
|
449
|
+
metadata_name: str = "typicality",
|
|
450
|
+
) -> None:
|
|
451
|
+
"""Computes typicality from embeddings, for K nearest neighbors.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
embedding_model_name:
|
|
455
|
+
The name of the embedding model to use. If not given, the default
|
|
456
|
+
embedding model is used.
|
|
457
|
+
metadata_name:
|
|
458
|
+
The name of the metadata to store the typicality values in. If not give, the default
|
|
459
|
+
name "typicality" is used.
|
|
460
|
+
"""
|
|
461
|
+
embedding_model_id = embedding_model_resolver.get_by_name(
|
|
462
|
+
session=self.session,
|
|
463
|
+
dataset_id=self.dataset_id,
|
|
464
|
+
embedding_model_name=embedding_model_name,
|
|
465
|
+
).embedding_model_id
|
|
466
|
+
compute_typicality.compute_typicality_metadata(
|
|
467
|
+
session=self.session,
|
|
468
|
+
dataset_id=self.dataset_id,
|
|
469
|
+
embedding_model_id=embedding_model_id,
|
|
470
|
+
metadata_name=metadata_name,
|
|
380
471
|
)
|
|
381
472
|
|
|
382
473
|
|
|
@@ -409,3 +500,23 @@ def _generate_embeddings(session: Session, dataset_id: UUID, sample_ids: list[UU
|
|
|
409
500
|
# Mark the embedding search feature as enabled.
|
|
410
501
|
if "embeddingSearchEnabled" not in features.lightly_studio_active_features:
|
|
411
502
|
features.lightly_studio_active_features.append("embeddingSearchEnabled")
|
|
503
|
+
|
|
504
|
+
|
|
505
|
+
def _resolve_yolo_splits(data_yaml: Path, input_split: str | None) -> list[str]:
|
|
506
|
+
"""Determine which YOLO splits to process for the given config."""
|
|
507
|
+
if input_split is not None:
|
|
508
|
+
if input_split not in ALLOWED_YOLO_SPLITS:
|
|
509
|
+
raise ValueError(
|
|
510
|
+
f"Split '{input_split}' not found in config file '{data_yaml}'. "
|
|
511
|
+
f"Allowed splits: {sorted(ALLOWED_YOLO_SPLITS)}"
|
|
512
|
+
)
|
|
513
|
+
return [input_split]
|
|
514
|
+
|
|
515
|
+
with data_yaml.open() as f:
|
|
516
|
+
config = yaml.safe_load(f)
|
|
517
|
+
|
|
518
|
+
config_keys = config.keys() if isinstance(config, dict) else []
|
|
519
|
+
splits = [key for key in config_keys if key in ALLOWED_YOLO_SPLITS]
|
|
520
|
+
if not splits:
|
|
521
|
+
raise ValueError(f"No splits found in config file '{data_yaml}'")
|
|
522
|
+
return splits
|
|
@@ -10,6 +10,7 @@ from lightly_studio.core.dataset_query.match_expression import MatchExpression
|
|
|
10
10
|
from lightly_studio.core.dataset_query.order_by import OrderByExpression, OrderByField
|
|
11
11
|
from lightly_studio.core.dataset_query.sample_field import SampleField
|
|
12
12
|
from lightly_studio.core.sample import Sample
|
|
13
|
+
from lightly_studio.export.export_dataset import DatasetExport
|
|
13
14
|
from lightly_studio.models.dataset import DatasetTable
|
|
14
15
|
from lightly_studio.models.sample import SampleTable
|
|
15
16
|
from lightly_studio.resolvers import tag_resolver
|
|
@@ -209,3 +210,7 @@ class DatasetQuery:
|
|
|
209
210
|
session=self.session,
|
|
210
211
|
input_sample_ids=input_sample_ids,
|
|
211
212
|
)
|
|
213
|
+
|
|
214
|
+
def export(self) -> DatasetExport:
|
|
215
|
+
"""Return a DatasetExport instance which can export the dataset in various formats."""
|
|
216
|
+
return DatasetExport(session=self.session, samples=self)
|
lightly_studio/dataset/env.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
"""Initialize environment variables for the dataset module."""
|
|
2
2
|
|
|
3
|
+
from typing import Optional
|
|
4
|
+
|
|
3
5
|
from environs import Env
|
|
4
6
|
|
|
5
7
|
env = Env()
|
|
@@ -14,3 +16,5 @@ LIGHTLY_STUDIO_HOST: str = env.str("LIGHTLY_STUDIO_HOST", "localhost")
|
|
|
14
16
|
LIGHTLY_STUDIO_DEBUG: str = env.bool("LIGHTLY_STUDIO_DEBUG", "false")
|
|
15
17
|
|
|
16
18
|
APP_URL = f"{LIGHTLY_STUDIO_PROTOCOL}://{LIGHTLY_STUDIO_HOST}:{LIGHTLY_STUDIO_PORT}"
|
|
19
|
+
|
|
20
|
+
LIGHTLY_STUDIO_LICENSE_KEY: Optional[str] = env.str("LIGHTLY_STUDIO_LICENSE_KEY", default=None)
|
|
@@ -13,8 +13,19 @@ def download_file_if_does_not_exist(url: str, local_filename: Path) -> None:
|
|
|
13
13
|
"""Download a file from a URL if it does not already exist locally."""
|
|
14
14
|
if local_filename.exists():
|
|
15
15
|
return
|
|
16
|
-
|
|
17
|
-
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
print(f"Downloading {url} to {local_filename}")
|
|
19
|
+
with requests.get(url, stream=True, timeout=30) as r:
|
|
20
|
+
# Raise an error for bad status codes
|
|
21
|
+
r.raise_for_status()
|
|
22
|
+
with open(local_filename, "wb") as f:
|
|
23
|
+
shutil.copyfileobj(r.raw, f)
|
|
24
|
+
except Exception:
|
|
25
|
+
# If download fails, remove any partial file to allow retry.
|
|
26
|
+
if local_filename.exists():
|
|
27
|
+
local_filename.unlink()
|
|
28
|
+
raise
|
|
18
29
|
|
|
19
30
|
|
|
20
31
|
def get_file_xxhash(file_path: Path) -> str:
|