lightly-studio 0.3.2__py3-none-any.whl → 0.3.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (163) hide show
  1. lightly_studio/__init__.py +1 -1
  2. lightly_studio/api/app.py +8 -4
  3. lightly_studio/api/db_tables.py +0 -3
  4. lightly_studio/api/routes/api/annotation.py +26 -0
  5. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  6. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  7. lightly_studio/api/routes/api/caption.py +30 -0
  8. lightly_studio/api/routes/api/dataset.py +3 -5
  9. lightly_studio/api/routes/api/embeddings2d.py +136 -0
  10. lightly_studio/api/routes/api/export.py +73 -0
  11. lightly_studio/api/routes/api/metadata.py +57 -1
  12. lightly_studio/api/routes/api/selection.py +87 -0
  13. lightly_studio/core/add_samples.py +138 -9
  14. lightly_studio/core/dataset.py +174 -63
  15. lightly_studio/core/dataset_query/dataset_query.py +5 -0
  16. lightly_studio/dataset/env.py +4 -0
  17. lightly_studio/dataset/file_utils.py +13 -2
  18. lightly_studio/dataset/loader.py +2 -62
  19. lightly_studio/dataset/mobileclip_embedding_generator.py +3 -2
  20. lightly_studio/db_manager.py +10 -4
  21. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.B3oFNb6O.css +1 -0
  22. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/2.CkOblLn7.css +1 -0
  23. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/Samples.CIbricz7.css +1 -0
  24. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.7Ma7YdVg.css +1 -0
  25. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{useFeatureFlags.CV-KWLNP.css → _layout.CefECEWA.css} +1 -1
  26. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.2jKMtOWG.css +1 -0
  27. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/-DXuGN29.js +1 -0
  28. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Ccq4ZD0B.js → B7302SU7.js} +1 -1
  29. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BeWf8-vJ.js +1 -0
  30. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bqz7dyEC.js +1 -0
  31. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C1FmrZbK.js +1 -0
  32. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DRZO-E-T.js → CSCQddQS.js} +1 -1
  33. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CZGpyrcA.js +1 -0
  34. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CfQ4mGwl.js +1 -0
  35. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CiaNZCBa.js +1 -0
  36. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cqo0Vpvt.js +417 -0
  37. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cy4fgWTG.js +1 -0
  38. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5w4xp5l.js +1 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DD63uD-T.js +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQ8aZ1o-.js +3 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Df3aMO5B.js → DSxvnAMh.js} +1 -1
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_JuJOO3.js +20 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_ynJAfY.js +2 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dafy4oEQ.js +1 -0
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BqBqV92V.js → Dj4O-5se.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DmjAI-UV.js +1 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dug7Bq1S.js +1 -0
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Dv5BSBQG.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DzBTnFhV.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DzX_yyqb.js +1 -0
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Frwd2CjB.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H4l0JFh9.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H60ATh8g.js +2 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/qIv1kPyv.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/sLqs1uaK.js +20 -0
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/u-it74zV.js +96 -0
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BPc0HQPq.js +2 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.SNvc2nrm.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.5jT7P06o.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.Cdy-7S5q.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.C_uoESTX.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.DcO8wIAc.js +1 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.BIldfkxL.js +1012 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.w9g4AcAx.js → 3.BC9z_TWM.js} +1 -1
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.BBI8KwnD.js → 4.D8X_Ch5n.js} +1 -1
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.CAXhxJu6.js +39 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{6.CrbkRPam.js → 6.DRA5Ru_2.js} +1 -1
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.WVBsruHQ.js +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BuKUrCEN.js +20 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CUIn1yCR.js +1 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  74. lightly_studio/dist_lightly_studio_view_app/index.html +15 -14
  75. lightly_studio/examples/example.py +4 -0
  76. lightly_studio/examples/example_coco.py +4 -0
  77. lightly_studio/examples/example_coco_caption.py +24 -0
  78. lightly_studio/examples/example_metadata.py +4 -1
  79. lightly_studio/examples/example_selection.py +4 -0
  80. lightly_studio/examples/example_split_work.py +4 -0
  81. lightly_studio/examples/example_yolo.py +4 -0
  82. lightly_studio/export/export_dataset.py +73 -0
  83. lightly_studio/export/lightly_studio_label_input.py +120 -0
  84. lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
  85. lightly_studio/metadata/compute_typicality.py +67 -0
  86. lightly_studio/models/annotation/annotation_base.py +11 -12
  87. lightly_studio/models/caption.py +73 -0
  88. lightly_studio/models/dataset.py +1 -2
  89. lightly_studio/models/metadata.py +1 -1
  90. lightly_studio/models/sample.py +2 -2
  91. lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
  92. lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
  93. lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
  94. lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
  95. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
  96. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
  97. lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
  98. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
  99. lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
  100. lightly_studio/resolvers/caption_resolver.py +80 -0
  101. lightly_studio/resolvers/dataset_resolver.py +4 -7
  102. lightly_studio/resolvers/metadata_resolver/__init__.py +2 -2
  103. lightly_studio/resolvers/metadata_resolver/sample/__init__.py +3 -3
  104. lightly_studio/resolvers/metadata_resolver/sample/bulk_update_metadata.py +46 -0
  105. lightly_studio/resolvers/samples_filter.py +18 -10
  106. lightly_studio/selection/mundig.py +7 -10
  107. lightly_studio/selection/selection_config.py +4 -1
  108. lightly_studio/services/annotations_service/__init__.py +8 -0
  109. lightly_studio/services/annotations_service/create_annotation.py +63 -0
  110. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  111. lightly_studio/type_definitions.py +2 -0
  112. {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/METADATA +231 -41
  113. {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/RECORD +114 -104
  114. lightly_studio/api/routes/api/annotation_task.py +0 -37
  115. lightly_studio/api/routes/api/metrics.py +0 -76
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +0 -1
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +0 -1
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +0 -3
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/7YNGEs1C.js +0 -1
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +0 -1
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +0 -92
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +0 -1
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +0 -1
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C98Hk3r5.js +0 -1
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CG0dMCJi.js +0 -1
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +0 -1
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Crk-jcvV.js +0 -1
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +0 -1
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +0 -1
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cur71c3O.js +0 -1
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +0 -1
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +0 -1
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +0 -1
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcGCxgpH.js +0 -1
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DkR_EZ_B.js +0 -1
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +0 -1
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H7C68rOM.js +0 -1
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +0 -1
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +0 -4
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/OH7-C_mc.js +0 -1
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/gLNdjSzu.js +0 -1
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +0 -1
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +0 -2
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +0 -1
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +0 -1
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1._I9GR805.js +0 -1
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +0 -1
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +0 -1
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +0 -6
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +0 -1
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +0 -1
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +0 -1
  155. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CajIG5ce.js +0 -1
  156. lightly_studio/metrics/__init__.py +0 -0
  157. lightly_studio/metrics/detection/__init__.py +0 -0
  158. lightly_studio/metrics/detection/map.py +0 -268
  159. lightly_studio/models/annotation_task.py +0 -28
  160. lightly_studio/resolvers/annotation_resolver/create.py +0 -19
  161. lightly_studio/resolvers/annotation_task_resolver.py +0 -31
  162. lightly_studio/resolvers/metadata_resolver/sample/bulk_set_metadata.py +0 -48
  163. {lightly_studio-0.3.2.dist-info → lightly_studio-0.3.4.dist-info}/WHEEL +0 -0
@@ -2,6 +2,8 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import json
6
+ from collections import defaultdict
5
7
  from dataclasses import dataclass, field
6
8
  from pathlib import Path
7
9
  from typing import Iterable
@@ -26,10 +28,12 @@ from tqdm import tqdm
26
28
 
27
29
  from lightly_studio.models.annotation.annotation_base import AnnotationCreate
28
30
  from lightly_studio.models.annotation_label import AnnotationLabelCreate
31
+ from lightly_studio.models.caption import CaptionCreate
29
32
  from lightly_studio.models.sample import SampleCreate, SampleTable
30
33
  from lightly_studio.resolvers import (
31
34
  annotation_label_resolver,
32
35
  annotation_resolver,
36
+ caption_resolver,
33
37
  sample_resolver,
34
38
  )
35
39
 
@@ -46,7 +50,6 @@ class _AnnotationProcessingContext:
46
50
  dataset_id: UUID
47
51
  sample_id: UUID
48
52
  label_map: dict[int, UUID]
49
- annotation_task_id: UUID
50
53
 
51
54
 
52
55
  @dataclass
@@ -137,7 +140,6 @@ def load_into_dataset_from_labelformat(
137
140
  dataset_id: UUID,
138
141
  input_labels: ObjectDetectionInput | InstanceSegmentationInput,
139
142
  images_path: Path,
140
- annotation_task_id: UUID,
141
143
  ) -> list[UUID]:
142
144
  """Load samples and their annotations from a labelformat input into the dataset.
143
145
 
@@ -146,7 +148,6 @@ def load_into_dataset_from_labelformat(
146
148
  dataset_id: The ID of the dataset to load samples into.
147
149
  input_labels: The labelformat input containing images and annotations.
148
150
  images_path: The path to the directory containing the images.
149
- annotation_task_id: The ID of the annotation task to associate with the annotations.
150
151
 
151
152
  Returns:
152
153
  A list of UUIDs of the created samples.
@@ -192,7 +193,6 @@ def load_into_dataset_from_labelformat(
192
193
  image_path_to_anno_data=image_path_to_anno_data,
193
194
  dataset_id=dataset_id,
194
195
  label_map=label_map,
195
- annotation_task_id=annotation_task_id,
196
196
  annotations_to_create=annotations_to_create,
197
197
  )
198
198
  samples_to_create.clear()
@@ -210,7 +210,6 @@ def load_into_dataset_from_labelformat(
210
210
  image_path_to_anno_data=image_path_to_anno_data,
211
211
  dataset_id=dataset_id,
212
212
  label_map=label_map,
213
- annotation_task_id=annotation_task_id,
214
213
  annotations_to_create=annotations_to_create,
215
214
  )
216
215
 
@@ -223,6 +222,111 @@ def load_into_dataset_from_labelformat(
223
222
  return created_sample_ids
224
223
 
225
224
 
225
+ def load_into_dataset_from_coco_captions(
226
+ session: Session,
227
+ dataset_id: UUID,
228
+ annotations_json: Path,
229
+ images_path: Path,
230
+ ) -> list[UUID]:
231
+ """Load samples and captions from a COCO captions file into the dataset.
232
+
233
+ Args:
234
+ session: Database session used for resolver operations.
235
+ dataset_id: Identifier of the dataset that receives the samples.
236
+ annotations_json: Path to the COCO captions annotations file.
237
+ images_path: Directory containing the referenced images.
238
+
239
+ Returns:
240
+ The list of newly created sample identifiers.
241
+ """
242
+ with fsspec.open(str(annotations_json), "r") as file:
243
+ coco_payload = json.load(file)
244
+
245
+ images: list[dict[str, object]] = coco_payload.get("images", [])
246
+ annotations: list[dict[str, object]] = coco_payload.get("annotations", [])
247
+
248
+ captions_by_image_id: dict[int, list[str]] = defaultdict(list)
249
+ for annotation in annotations:
250
+ image_id = annotation["image_id"]
251
+ caption = annotation["caption"]
252
+ if not isinstance(image_id, int):
253
+ continue
254
+ if not isinstance(caption, str):
255
+ continue
256
+ caption_text = caption.strip()
257
+ if not caption_text:
258
+ continue
259
+ captions_by_image_id[image_id].append(caption_text)
260
+
261
+ logging_context = _LoadingLoggingContext(
262
+ n_samples_to_be_inserted=len(images),
263
+ n_samples_before_loading=sample_resolver.count_by_dataset_id(
264
+ session=session, dataset_id=dataset_id
265
+ ),
266
+ )
267
+
268
+ captions_to_create: list[CaptionCreate] = []
269
+ samples_to_create: list[SampleCreate] = []
270
+ created_sample_ids: list[UUID] = []
271
+ image_path_to_captions: dict[str, list[str]] = {}
272
+
273
+ for image_info in tqdm(images, desc="Processing images", unit=" images"):
274
+ if isinstance(image_info["id"], int):
275
+ image_id_raw = image_info["id"]
276
+ else:
277
+ continue
278
+ file_name_raw = str(image_info["file_name"])
279
+
280
+ width = image_info["width"] if isinstance(image_info["width"], int) else 0
281
+ height = image_info["height"] if isinstance(image_info["height"], int) else 0
282
+ sample = SampleCreate(
283
+ file_name=file_name_raw,
284
+ file_path_abs=str(images_path / file_name_raw),
285
+ width=width,
286
+ height=height,
287
+ dataset_id=dataset_id,
288
+ )
289
+ samples_to_create.append(sample)
290
+ image_path_to_captions[sample.file_path_abs] = captions_by_image_id.get(image_id_raw, [])
291
+
292
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
293
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
294
+ session=session, samples=samples_to_create
295
+ )
296
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
297
+ logging_context.update_example_paths(paths_not_inserted)
298
+ _process_batch_captions(
299
+ session=session,
300
+ dataset_id=dataset_id,
301
+ stored_samples=created_samples_batch,
302
+ image_path_to_captions=image_path_to_captions,
303
+ captions_to_create=captions_to_create,
304
+ )
305
+ samples_to_create.clear()
306
+ image_path_to_captions.clear()
307
+
308
+ if samples_to_create:
309
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
310
+ session=session, samples=samples_to_create
311
+ )
312
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
313
+ logging_context.update_example_paths(paths_not_inserted)
314
+ _process_batch_captions(
315
+ session=session,
316
+ dataset_id=dataset_id,
317
+ stored_samples=created_samples_batch,
318
+ image_path_to_captions=image_path_to_captions,
319
+ captions_to_create=captions_to_create,
320
+ )
321
+
322
+ if captions_to_create:
323
+ caption_resolver.create_many(session=session, captions=captions_to_create)
324
+
325
+ _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
326
+
327
+ return created_sample_ids
328
+
329
+
226
330
  def _log_loading_results(
227
331
  session: Session, dataset_id: UUID, logging_context: _LoadingLoggingContext
228
332
  ) -> None:
@@ -304,7 +408,6 @@ def _process_object_detection_annotations(
304
408
  width=int(width),
305
409
  height=int(height),
306
410
  confidence=obj.confidence,
307
- annotation_task_id=context.annotation_task_id,
308
411
  )
309
412
  )
310
413
  return new_annotations
@@ -339,7 +442,6 @@ def _process_instance_segmentation_annotations(
339
442
  width=int(width),
340
443
  height=int(height),
341
444
  segmentation_mask=segmentation_rle,
342
- annotation_task_id=context.annotation_task_id,
343
445
  )
344
446
  )
345
447
  return new_annotations
@@ -351,7 +453,6 @@ def _process_batch_annotations( # noqa: PLR0913
351
453
  image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection],
352
454
  dataset_id: UUID,
353
455
  label_map: dict[int, UUID],
354
- annotation_task_id: UUID,
355
456
  annotations_to_create: list[AnnotationCreate],
356
457
  ) -> None:
357
458
  """Process annotations for a batch of samples."""
@@ -362,7 +463,6 @@ def _process_batch_annotations( # noqa: PLR0913
362
463
  dataset_id=dataset_id,
363
464
  sample_id=stored_sample.sample_id,
364
465
  label_map=label_map,
365
- annotation_task_id=annotation_task_id,
366
466
  )
367
467
 
368
468
  if isinstance(anno_data, ImageInstanceSegmentation):
@@ -381,3 +481,32 @@ def _process_batch_annotations( # noqa: PLR0913
381
481
  if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
382
482
  annotation_resolver.create_many(session=session, annotations=annotations_to_create)
383
483
  annotations_to_create.clear()
484
+
485
+
486
+ def _process_batch_captions(
487
+ session: Session,
488
+ dataset_id: UUID,
489
+ stored_samples: list[SampleTable],
490
+ image_path_to_captions: dict[str, list[str]],
491
+ captions_to_create: list[CaptionCreate],
492
+ ) -> None:
493
+ """Process captions for a batch of samples."""
494
+ if not stored_samples:
495
+ return
496
+
497
+ for stored_sample in stored_samples:
498
+ captions = image_path_to_captions[stored_sample.file_path_abs]
499
+ if not captions:
500
+ continue
501
+
502
+ for caption_text in captions:
503
+ caption = CaptionCreate(
504
+ dataset_id=dataset_id,
505
+ sample_id=stored_sample.sample_id,
506
+ text=caption_text,
507
+ )
508
+ captions_to_create.append(caption)
509
+
510
+ if len(captions_to_create) >= ANNOTATION_BATCH_SIZE:
511
+ caption_resolver.create_many(session=session, captions=captions_to_create)
512
+ captions_to_create.clear()
@@ -6,6 +6,7 @@ from pathlib import Path
6
6
  from typing import Iterable, Iterator
7
7
  from uuid import UUID
8
8
 
9
+ import yaml
9
10
  from labelformat.formats import (
10
11
  COCOInstanceSegmentationInput,
11
12
  COCOObjectDetectionInput,
@@ -28,21 +29,23 @@ from lightly_studio.core.dataset_query.order_by import OrderByExpression
28
29
  from lightly_studio.core.sample import Sample
29
30
  from lightly_studio.dataset import fsspec_lister
30
31
  from lightly_studio.dataset.embedding_manager import EmbeddingManagerProvider
31
- from lightly_studio.models.annotation_task import (
32
- AnnotationTaskTable,
32
+ from lightly_studio.metadata import compute_typicality
33
+ from lightly_studio.models.annotation.annotation_base import (
33
34
  AnnotationType,
34
35
  )
35
36
  from lightly_studio.models.dataset import DatasetCreate, DatasetTable
36
37
  from lightly_studio.models.sample import SampleTable
37
38
  from lightly_studio.resolvers import (
38
- annotation_task_resolver,
39
39
  dataset_resolver,
40
+ embedding_model_resolver,
40
41
  sample_resolver,
42
+ tag_resolver,
41
43
  )
42
44
  from lightly_studio.type_definitions import PathLike
43
45
 
44
46
  # Constants
45
47
  DEFAULT_DATASET_NAME = "default_dataset"
48
+ ALLOWED_YOLO_SPLITS = {"train", "val", "test", "minival"}
46
49
 
47
50
  _SliceType = slice # to avoid shadowing built-in slice in type annotations
48
51
 
@@ -68,7 +71,7 @@ class Dataset:
68
71
 
69
72
  dataset = dataset_resolver.create(
70
73
  session=db_manager.persistent_session(),
71
- dataset=DatasetCreate(name=name, directory=""),
74
+ dataset=DatasetCreate(name=name),
72
75
  )
73
76
  return Dataset(dataset=dataset)
74
77
 
@@ -234,8 +237,6 @@ class Dataset:
234
237
  self,
235
238
  input_labels: ObjectDetectionInput | InstanceSegmentationInput,
236
239
  images_path: PathLike,
237
- is_prediction: bool = True,
238
- task_name: str | None = None,
239
240
  embed: bool = True,
240
241
  ) -> None:
241
242
  """Load a dataset from a labelformat object and store in database.
@@ -243,40 +244,17 @@ class Dataset:
243
244
  Args:
244
245
  input_labels: The labelformat input object.
245
246
  images_path: Path to the folder containing the images.
246
- is_prediction: Whether the task is for prediction or labels.
247
- task_name: Optional name for the annotation task. If None, a
248
- default name is generated.
249
247
  embed: If True, generate embeddings for the newly added samples.
250
248
  """
251
249
  if isinstance(images_path, str):
252
250
  images_path = Path(images_path)
253
251
  images_path = images_path.absolute()
254
252
 
255
- # Determine annotation type based on input.
256
- # Currently, we always create BBOX tasks, even for segmentation,
257
- # as segmentation data is stored alongside bounding boxes.
258
- annotation_type = AnnotationType.BBOX
259
-
260
- # Generate a default task name if none is provided.
261
- if task_name is None:
262
- task_name = f"Loaded from labelformat: {self.name}"
263
-
264
- # Create annotation task.
265
- new_annotation_task = annotation_task_resolver.create(
266
- session=self.session,
267
- annotation_task=AnnotationTaskTable(
268
- name=task_name,
269
- annotation_type=annotation_type,
270
- is_prediction=is_prediction,
271
- ),
272
- )
273
-
274
253
  created_sample_ids = add_samples.load_into_dataset_from_labelformat(
275
254
  session=self.session,
276
255
  dataset_id=self.dataset_id,
277
256
  input_labels=input_labels,
278
257
  images_path=images_path,
279
- annotation_task_id=new_annotation_task.annotation_task_id,
280
258
  )
281
259
 
282
260
  if embed:
@@ -287,17 +265,15 @@ class Dataset:
287
265
  def add_samples_from_yolo(
288
266
  self,
289
267
  data_yaml: PathLike,
290
- input_split: str = "train",
291
- task_name: str | None = None,
268
+ input_split: str | None = None,
292
269
  embed: bool = True,
293
270
  ) -> None:
294
271
  """Load a dataset in YOLO format and store in DB.
295
272
 
296
273
  Args:
297
274
  data_yaml: Path to the YOLO data.yaml file.
298
- input_split: The split to load (e.g., 'train', 'val').
299
- task_name: Optional name for the annotation task. If None, a
300
- default name is generated.
275
+ input_split: The split to load (e.g., 'train', 'val', 'test').
276
+ If None, all available splits will be loaded and assigned a corresponding tag.
301
277
  embed: If True, generate embeddings for the newly added samples.
302
278
  """
303
279
  if isinstance(data_yaml, str):
@@ -307,30 +283,54 @@ class Dataset:
307
283
  if not data_yaml.is_file() or data_yaml.suffix != ".yaml":
308
284
  raise FileNotFoundError(f"YOLO data yaml file not found: '{data_yaml}'")
309
285
 
310
- if task_name is None:
311
- task_name = f"Loaded from YOLO: {data_yaml.name} ({input_split} split)"
286
+ # Determine which splits to process
287
+ splits_to_process = _resolve_yolo_splits(data_yaml=data_yaml, input_split=input_split)
312
288
 
313
- # Load the dataset using labelformat.
314
- label_input = YOLOv8ObjectDetectionInput(
315
- input_file=data_yaml,
316
- input_split=input_split,
317
- )
318
- images_path = label_input._images_dir() # noqa: SLF001
289
+ all_created_sample_ids = []
319
290
 
320
- self.add_samples_from_labelformat(
321
- input_labels=label_input,
322
- images_path=images_path,
323
- is_prediction=False,
324
- task_name=task_name,
325
- embed=embed,
326
- )
291
+ # Process each split
292
+ for split in splits_to_process:
293
+ # Load the dataset using labelformat.
294
+ label_input = YOLOv8ObjectDetectionInput(
295
+ input_file=data_yaml,
296
+ input_split=split,
297
+ )
298
+ images_path = label_input._images_dir() # noqa: SLF001
299
+
300
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
301
+ session=self.session,
302
+ dataset_id=self.dataset_id,
303
+ input_labels=label_input,
304
+ images_path=images_path,
305
+ )
306
+
307
+ # Tag samples with split name
308
+ if created_sample_ids:
309
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
310
+ session=self.session,
311
+ dataset_id=self.dataset_id,
312
+ tag_name=split,
313
+ )
314
+ tag_resolver.add_sample_ids_to_tag_id(
315
+ session=self.session,
316
+ tag_id=tag.tag_id,
317
+ sample_ids=created_sample_ids,
318
+ )
319
+
320
+ all_created_sample_ids.extend(created_sample_ids)
321
+
322
+ # Generate embeddings for all samples at once
323
+ if embed:
324
+ _generate_embeddings(
325
+ session=self.session, dataset_id=self.dataset_id, sample_ids=all_created_sample_ids
326
+ )
327
327
 
328
328
  def add_samples_from_coco(
329
329
  self,
330
330
  annotations_json: PathLike,
331
331
  images_path: PathLike,
332
- task_name: str | None = None,
333
- annotation_type: AnnotationType = AnnotationType.BBOX,
332
+ annotation_type: AnnotationType = AnnotationType.OBJECT_DETECTION,
333
+ split: str | None = None,
334
334
  embed: bool = True,
335
335
  ) -> None:
336
336
  """Load a dataset in COCO Object Detection format and store in DB.
@@ -338,10 +338,10 @@ class Dataset:
338
338
  Args:
339
339
  annotations_json: Path to the COCO annotations JSON file.
340
340
  images_path: Path to the folder containing the images.
341
- task_name: Optional name for the annotation task. If None, a
342
- default name is generated.
343
341
  annotation_type: The type of annotation to be loaded (e.g., 'ObjectDetection',
344
342
  'InstanceSegmentation').
343
+ split: Optional split name to tag samples (e.g., 'train', 'val').
344
+ If provided, all samples will be tagged with this name.
345
345
  embed: If True, generate embeddings for the newly added samples.
346
346
  """
347
347
  if isinstance(annotations_json, str):
@@ -353,30 +353,121 @@ class Dataset:
353
353
 
354
354
  label_input: COCOObjectDetectionInput | COCOInstanceSegmentationInput
355
355
 
356
- if annotation_type == AnnotationType.BBOX:
356
+ if annotation_type == AnnotationType.OBJECT_DETECTION:
357
357
  label_input = COCOObjectDetectionInput(
358
358
  input_file=annotations_json,
359
359
  )
360
- task_name_default = f"Loaded from COCO Object Detection: {annotations_json.name}"
361
360
  elif annotation_type == AnnotationType.INSTANCE_SEGMENTATION:
362
361
  label_input = COCOInstanceSegmentationInput(
363
362
  input_file=annotations_json,
364
363
  )
365
- task_name_default = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
366
364
  else:
367
365
  raise ValueError(f"Invalid annotation type: {annotation_type}")
368
366
 
369
- if task_name is None:
370
- task_name = task_name_default
371
-
372
367
  images_path = Path(images_path).absolute()
373
368
 
374
- self.add_samples_from_labelformat(
369
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
370
+ session=self.session,
371
+ dataset_id=self.dataset_id,
375
372
  input_labels=label_input,
376
373
  images_path=images_path,
377
- is_prediction=False,
378
- task_name=task_name,
379
- embed=embed,
374
+ )
375
+
376
+ # Tag samples with split name if provided
377
+ if split is not None and created_sample_ids:
378
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
379
+ session=self.session,
380
+ dataset_id=self.dataset_id,
381
+ tag_name=split,
382
+ )
383
+ tag_resolver.add_sample_ids_to_tag_id(
384
+ session=self.session,
385
+ tag_id=tag.tag_id,
386
+ sample_ids=created_sample_ids,
387
+ )
388
+
389
+ if embed:
390
+ _generate_embeddings(
391
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
392
+ )
393
+
394
+ def add_samples_from_coco_caption(
395
+ self,
396
+ annotations_json: PathLike,
397
+ images_path: PathLike,
398
+ split: str | None = None,
399
+ embed: bool = True,
400
+ ) -> None:
401
+ """Load a dataset in COCO caption format and store in DB.
402
+
403
+ Args:
404
+ annotations_json: Path to the COCO caption JSON file.
405
+ images_path: Path to the folder containing the images.
406
+ split: Optional split name to tag samples (e.g., 'train', 'val').
407
+ If provided, all samples will be tagged with this name.
408
+ embed: If True, generate embeddings for the newly added samples.
409
+ """
410
+ if isinstance(annotations_json, str):
411
+ annotations_json = Path(annotations_json)
412
+ annotations_json = annotations_json.absolute()
413
+
414
+ if not annotations_json.is_file() or annotations_json.suffix != ".json":
415
+ raise FileNotFoundError(f"COCO caption json file not found: '{annotations_json}'")
416
+
417
+ if isinstance(images_path, str):
418
+ images_path = Path(images_path)
419
+ images_path = images_path.absolute()
420
+
421
+ created_sample_ids = add_samples.load_into_dataset_from_coco_captions(
422
+ session=self.session,
423
+ dataset_id=self.dataset_id,
424
+ annotations_json=annotations_json,
425
+ images_path=images_path,
426
+ )
427
+
428
+ # Tag samples with split name if provided
429
+ if split is not None and created_sample_ids:
430
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
431
+ session=self.session,
432
+ dataset_id=self.dataset_id,
433
+ tag_name=split,
434
+ )
435
+ tag_resolver.add_sample_ids_to_tag_id(
436
+ session=self.session,
437
+ tag_id=tag.tag_id,
438
+ sample_ids=created_sample_ids,
439
+ )
440
+
441
+ if embed:
442
+ _generate_embeddings(
443
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
444
+ )
445
+
446
+ def compute_typicality_metadata(
447
+ self,
448
+ embedding_model_name: str | None = None,
449
+ metadata_name: str = "typicality",
450
+ ) -> None:
451
+ """Computes typicality from embeddings, for K nearest neighbors.
452
+
453
+ Args:
454
+ embedding_model_name:
455
+ The name of the embedding model to use. If not given, the default
456
+ embedding model is used.
457
+ metadata_name:
458
+ The name of the metadata to store the typicality values in. If not give, the default
459
+ name "typicality" is used.
460
+ """
461
+ embedding_model_id = embedding_model_resolver.get_by_name(
462
+ session=self.session,
463
+ dataset_id=self.dataset_id,
464
+ embedding_model_name=embedding_model_name,
465
+ ).embedding_model_id
466
+ compute_typicality.compute_typicality_metadata(
467
+ session=self.session,
468
+ dataset_id=self.dataset_id,
469
+ embedding_model_id=embedding_model_id,
470
+ metadata_name=metadata_name,
380
471
  )
381
472
 
382
473
 
@@ -409,3 +500,23 @@ def _generate_embeddings(session: Session, dataset_id: UUID, sample_ids: list[UU
409
500
  # Mark the embedding search feature as enabled.
410
501
  if "embeddingSearchEnabled" not in features.lightly_studio_active_features:
411
502
  features.lightly_studio_active_features.append("embeddingSearchEnabled")
503
+
504
+
505
+ def _resolve_yolo_splits(data_yaml: Path, input_split: str | None) -> list[str]:
506
+ """Determine which YOLO splits to process for the given config."""
507
+ if input_split is not None:
508
+ if input_split not in ALLOWED_YOLO_SPLITS:
509
+ raise ValueError(
510
+ f"Split '{input_split}' not found in config file '{data_yaml}'. "
511
+ f"Allowed splits: {sorted(ALLOWED_YOLO_SPLITS)}"
512
+ )
513
+ return [input_split]
514
+
515
+ with data_yaml.open() as f:
516
+ config = yaml.safe_load(f)
517
+
518
+ config_keys = config.keys() if isinstance(config, dict) else []
519
+ splits = [key for key in config_keys if key in ALLOWED_YOLO_SPLITS]
520
+ if not splits:
521
+ raise ValueError(f"No splits found in config file '{data_yaml}'")
522
+ return splits
@@ -10,6 +10,7 @@ from lightly_studio.core.dataset_query.match_expression import MatchExpression
10
10
  from lightly_studio.core.dataset_query.order_by import OrderByExpression, OrderByField
11
11
  from lightly_studio.core.dataset_query.sample_field import SampleField
12
12
  from lightly_studio.core.sample import Sample
13
+ from lightly_studio.export.export_dataset import DatasetExport
13
14
  from lightly_studio.models.dataset import DatasetTable
14
15
  from lightly_studio.models.sample import SampleTable
15
16
  from lightly_studio.resolvers import tag_resolver
@@ -209,3 +210,7 @@ class DatasetQuery:
209
210
  session=self.session,
210
211
  input_sample_ids=input_sample_ids,
211
212
  )
213
+
214
+ def export(self) -> DatasetExport:
215
+ """Return a DatasetExport instance which can export the dataset in various formats."""
216
+ return DatasetExport(session=self.session, samples=self)
@@ -1,5 +1,7 @@
1
1
  """Initialize environment variables for the dataset module."""
2
2
 
3
+ from typing import Optional
4
+
3
5
  from environs import Env
4
6
 
5
7
  env = Env()
@@ -14,3 +16,5 @@ LIGHTLY_STUDIO_HOST: str = env.str("LIGHTLY_STUDIO_HOST", "localhost")
14
16
  LIGHTLY_STUDIO_DEBUG: str = env.bool("LIGHTLY_STUDIO_DEBUG", "false")
15
17
 
16
18
  APP_URL = f"{LIGHTLY_STUDIO_PROTOCOL}://{LIGHTLY_STUDIO_HOST}:{LIGHTLY_STUDIO_PORT}"
19
+
20
+ LIGHTLY_STUDIO_LICENSE_KEY: Optional[str] = env.str("LIGHTLY_STUDIO_LICENSE_KEY", default=None)
@@ -13,8 +13,19 @@ def download_file_if_does_not_exist(url: str, local_filename: Path) -> None:
13
13
  """Download a file from a URL if it does not already exist locally."""
14
14
  if local_filename.exists():
15
15
  return
16
- with requests.get(url, stream=True) as r, open(local_filename, "wb") as f:
17
- shutil.copyfileobj(r.raw, f)
16
+
17
+ try:
18
+ print(f"Downloading {url} to {local_filename}")
19
+ with requests.get(url, stream=True, timeout=30) as r:
20
+ # Raise an error for bad status codes
21
+ r.raise_for_status()
22
+ with open(local_filename, "wb") as f:
23
+ shutil.copyfileobj(r.raw, f)
24
+ except Exception:
25
+ # If download fails, remove any partial file to allow retry.
26
+ if local_filename.exists():
27
+ local_filename.unlink()
28
+ raise
18
29
 
19
30
 
20
31
  def get_file_xxhash(file_path: Path) -> str: