lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (169) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +7 -5
  3. lightly_studio/api/db_tables.py +0 -3
  4. lightly_studio/api/routes/api/annotation.py +32 -16
  5. lightly_studio/api/routes/api/annotation_label.py +2 -5
  6. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  7. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  8. lightly_studio/api/routes/api/classifier.py +2 -5
  9. lightly_studio/api/routes/api/dataset.py +5 -8
  10. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  11. lightly_studio/api/routes/api/embeddings2d.py +104 -0
  12. lightly_studio/api/routes/api/export.py +73 -0
  13. lightly_studio/api/routes/api/metadata.py +2 -4
  14. lightly_studio/api/routes/api/sample.py +5 -13
  15. lightly_studio/api/routes/api/selection.py +87 -0
  16. lightly_studio/api/routes/api/settings.py +2 -6
  17. lightly_studio/api/routes/images.py +6 -6
  18. lightly_studio/core/add_samples.py +374 -0
  19. lightly_studio/core/dataset.py +272 -400
  20. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  21. lightly_studio/core/dataset_query/dataset_query.py +216 -0
  22. lightly_studio/core/dataset_query/field.py +113 -0
  23. lightly_studio/core/dataset_query/field_expression.py +79 -0
  24. lightly_studio/core/dataset_query/match_expression.py +23 -0
  25. lightly_studio/core/dataset_query/order_by.py +79 -0
  26. lightly_studio/core/dataset_query/sample_field.py +28 -0
  27. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  28. lightly_studio/core/sample.py +159 -32
  29. lightly_studio/core/start_gui.py +35 -0
  30. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  31. lightly_studio/dataset/embedding_generator.py +2 -3
  32. lightly_studio/dataset/embedding_manager.py +74 -6
  33. lightly_studio/dataset/env.py +4 -0
  34. lightly_studio/dataset/file_utils.py +13 -2
  35. lightly_studio/dataset/fsspec_lister.py +275 -0
  36. lightly_studio/dataset/loader.py +49 -84
  37. lightly_studio/dataset/mobileclip_embedding_generator.py +9 -6
  38. lightly_studio/db_manager.py +145 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.CA_CXIBb.css +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.DS78jgNY.css +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/index.BVs_sZj9.css +1 -0
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.D487hwJk.css +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/6t3IJ0vQ.js +1 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → 8NsknIT2.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → BND_-4Kp.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → BdfTHw61.js} +1 -1
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BfHVnyNT.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BjkP1AHA.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BuuNVL9G.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → BzKGpnl4.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CCx7Ho51.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → CH6P3X75.js} +1 -1
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D5-A_Ffd.js → CR2upx_Q.js} +2 -2
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWPZrTTJ.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Cs1XmhiF.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → CwPowJfP.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxFKfZ9T.js +1 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cxevwdid.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → D4whDBUi.js} +1 -1
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6r9vr07.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DA6bFLPR.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DEgUu98i.js +3 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DGTPl6Gk.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DKGxBSlK.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQXoLcsF.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQe_kdRt.js +92 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcY4jgG3.js +1 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → RmD8FzRo.js} +1 -1
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/V-MnMC1X.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → keKYsoph.js} +1 -1
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BVr6DYqP.js +2 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.u7zsVvqp.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.Da2agmdd.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1.B11tVRJV.js} +1 -1
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.l30Zud4h.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CgKPGcAP.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C8HLK8mj.js +857 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.CLvg3QcJ.js} +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BQhDtXUI.js} +1 -1
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.-6XqWX5G.js +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.uBV1Lhat.js +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.BXsgoQZh.js +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BkbcnUs8.js +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.Bkrv-Vww.js} +1 -1
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  88. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  89. lightly_studio/examples/example.py +13 -12
  90. lightly_studio/examples/example_coco.py +13 -0
  91. lightly_studio/examples/example_metadata.py +83 -98
  92. lightly_studio/examples/example_selection.py +7 -19
  93. lightly_studio/examples/example_split_work.py +12 -36
  94. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  95. lightly_studio/export/export_dataset.py +65 -0
  96. lightly_studio/export/lightly_studio_label_input.py +120 -0
  97. lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
  98. lightly_studio/metadata/compute_typicality.py +67 -0
  99. lightly_studio/models/annotation/annotation_base.py +18 -20
  100. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  101. lightly_studio/models/annotation/object_detection.py +4 -4
  102. lightly_studio/models/dataset.py +6 -2
  103. lightly_studio/models/sample.py +10 -3
  104. lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
  105. lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
  106. lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
  107. lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
  108. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
  109. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
  110. lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
  111. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
  112. lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
  113. lightly_studio/resolvers/dataset_resolver.py +10 -0
  114. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  115. lightly_studio/resolvers/sample_resolver.py +53 -9
  116. lightly_studio/resolvers/tag_resolver.py +23 -0
  117. lightly_studio/selection/mundig.py +7 -10
  118. lightly_studio/selection/select.py +55 -46
  119. lightly_studio/selection/select_via_db.py +23 -19
  120. lightly_studio/selection/selection_config.py +10 -4
  121. lightly_studio/services/annotations_service/__init__.py +12 -0
  122. lightly_studio/services/annotations_service/create_annotation.py +63 -0
  123. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  124. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  125. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  126. lightly_studio-0.3.3.dist-info/METADATA +814 -0
  127. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/RECORD +130 -113
  128. lightly_studio/api/db.py +0 -133
  129. lightly_studio/api/routes/api/annotation_task.py +0 -38
  130. lightly_studio/api/routes/api/metrics.py +0 -80
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +0 -1
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +0 -1
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  155. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  156. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  157. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  158. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  159. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  160. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  161. lightly_studio/metrics/detection/__init__.py +0 -0
  162. lightly_studio/metrics/detection/map.py +0 -268
  163. lightly_studio/models/annotation_task.py +0 -28
  164. lightly_studio/resolvers/annotation_resolver/create.py +0 -19
  165. lightly_studio/resolvers/annotation_task_resolver.py +0 -31
  166. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  167. /lightly_studio/{metrics → core/dataset_query}/__init__.py +0 -0
  168. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  169. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,87 @@
1
+ """This module contains the API routes for managing selections."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Union
6
+
7
+ from fastapi import APIRouter, Depends, HTTPException
8
+ from pydantic import BaseModel, Field
9
+ from typing_extensions import Annotated
10
+
11
+ from lightly_studio.api.routes.api.dataset import get_and_validate_dataset_id
12
+ from lightly_studio.db_manager import SessionDep
13
+ from lightly_studio.models.dataset import DatasetTable
14
+ from lightly_studio.resolvers import sample_resolver
15
+ from lightly_studio.selection.select_via_db import select_via_database
16
+ from lightly_studio.selection.selection_config import (
17
+ EmbeddingDiversityStrategy,
18
+ MetadataWeightingStrategy,
19
+ SelectionConfig,
20
+ )
21
+
22
+ selection_router = APIRouter()
23
+
24
+ Strategy = Annotated[
25
+ Union[EmbeddingDiversityStrategy, MetadataWeightingStrategy],
26
+ Field(discriminator="strategy_name"),
27
+ ]
28
+
29
+
30
+ class SelectionRequest(BaseModel):
31
+ """Request model for selection."""
32
+
33
+ n_samples_to_select: int = Field(gt=0, description="Number of samples to select")
34
+ selection_result_tag_name: str = Field(min_length=1, description="Name for the result tag")
35
+ strategies: list[Strategy]
36
+
37
+
38
+ @selection_router.post(
39
+ "/datasets/{dataset_id}/selection",
40
+ status_code=204,
41
+ response_model=None,
42
+ )
43
+ def create_combination_selection(
44
+ session: SessionDep,
45
+ dataset: Annotated[
46
+ DatasetTable,
47
+ Depends(get_and_validate_dataset_id),
48
+ ],
49
+ request: SelectionRequest,
50
+ ) -> None:
51
+ """Create a combination selection on the dataset.
52
+
53
+ This endpoint performs combination selection using embeddings and metadata.
54
+ The selected samples are tagged with the specified tag name.
55
+
56
+ Args:
57
+ session: Database session dependency.
58
+ dataset: Dataset to perform selection on.
59
+ request: Selection parameters including sample count and tag name.
60
+
61
+ Returns:
62
+ None (204 No Content on success).
63
+
64
+ Raises:
65
+ HTTPException: 400 if selection fails due to invalid parameters or other errors.
66
+ """
67
+ # Get all samples in dataset as input for selection.
68
+ all_samples_result = sample_resolver.get_all_by_dataset_id(
69
+ session=session, dataset_id=dataset.dataset_id
70
+ )
71
+ input_sample_ids = [sample.sample_id for sample in all_samples_result.samples]
72
+ # Validate we have enough samples to select from.
73
+ if len(input_sample_ids) < request.n_samples_to_select:
74
+ raise HTTPException(
75
+ status_code=400,
76
+ detail=f"Dataset has only {len(input_sample_ids)} samples, "
77
+ f"cannot select {request.n_samples_to_select}",
78
+ )
79
+ # Create SelectionConfig with diversity strategy.
80
+ config = SelectionConfig(
81
+ dataset_id=dataset.dataset_id,
82
+ n_samples_to_select=request.n_samples_to_select,
83
+ selection_result_tag_name=request.selection_result_tag_name,
84
+ strategies=request.strategies,
85
+ )
86
+ # Perform selection via database.
87
+ select_via_database(session=session, config=config, input_sample_ids=input_sample_ids)
@@ -1,17 +1,13 @@
1
1
  """This module contains the API routes for user settings."""
2
2
 
3
- from fastapi import APIRouter, Depends
4
- from sqlmodel import Session
5
- from typing_extensions import Annotated
3
+ from fastapi import APIRouter
6
4
 
7
- from lightly_studio.api.db import get_session
5
+ from lightly_studio.db_manager import SessionDep
8
6
  from lightly_studio.models.settings import SettingView
9
7
  from lightly_studio.resolvers import settings_resolver
10
8
 
11
9
  settings_router = APIRouter(tags=["settings"])
12
10
 
13
- SessionDep = Annotated[Session, Depends(get_session)]
14
-
15
11
 
16
12
  @settings_router.get("/settings")
17
13
  def get_settings(
@@ -5,12 +5,12 @@ from __future__ import annotations
5
5
  import os
6
6
  from collections.abc import Generator
7
7
 
8
- from fastapi import APIRouter, Depends, HTTPException
8
+ import fsspec
9
+ from fastapi import APIRouter, HTTPException
9
10
  from fastapi.responses import StreamingResponse
10
- from sqlmodel import Session
11
11
 
12
- from lightly_studio.api import db
13
12
  from lightly_studio.api.routes.api import status
13
+ from lightly_studio.db_manager import SessionDep
14
14
  from lightly_studio.models import sample
15
15
 
16
16
  app_router = APIRouter()
@@ -19,7 +19,7 @@ app_router = APIRouter()
19
19
  @app_router.get("/sample/{sample_id}")
20
20
  async def serve_image_by_sample_id(
21
21
  sample_id: str,
22
- session: Session = Depends(db.get_session), # noqa: B008
22
+ session: SessionDep,
23
23
  ) -> StreamingResponse:
24
24
  """Serve an image by sample ID.
25
25
 
@@ -45,8 +45,8 @@ async def serve_image_by_sample_id(
45
45
 
46
46
  try:
47
47
  # Open the file.
48
- with open(file_path, "rb") as file:
49
- content = file.read()
48
+ fs, fs_path = fsspec.core.url_to_fs(file_path)
49
+ content = fs.cat_file(fs_path)
50
50
 
51
51
  # Determine content type based on file extension.
52
52
  content_type = _get_content_type(file_path)
@@ -0,0 +1,374 @@
1
+ """Functions to add samples and their annotations to a dataset in the database."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+ from uuid import UUID
9
+
10
+ import fsspec
11
+ import PIL
12
+ from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
13
+ from labelformat.model.bounding_box import BoundingBoxFormat
14
+ from labelformat.model.image import Image
15
+ from labelformat.model.instance_segmentation import (
16
+ ImageInstanceSegmentation,
17
+ InstanceSegmentationInput,
18
+ )
19
+ from labelformat.model.multipolygon import MultiPolygon
20
+ from labelformat.model.object_detection import (
21
+ ImageObjectDetection,
22
+ ObjectDetectionInput,
23
+ )
24
+ from sqlmodel import Session
25
+ from tqdm import tqdm
26
+
27
+ from lightly_studio.models.annotation.annotation_base import AnnotationCreate
28
+ from lightly_studio.models.annotation_label import AnnotationLabelCreate
29
+ from lightly_studio.models.sample import SampleCreate, SampleTable
30
+ from lightly_studio.resolvers import (
31
+ annotation_label_resolver,
32
+ annotation_resolver,
33
+ sample_resolver,
34
+ )
35
+
36
+ # Constants
37
+ ANNOTATION_BATCH_SIZE = 64 # Number of annotations to process in a single batch
38
+ SAMPLE_BATCH_SIZE = 32 # Number of samples to process in a single batch
39
+ MAX_EXAMPLE_PATHS_TO_SHOW = 5
40
+
41
+
42
+ @dataclass
43
+ class _AnnotationProcessingContext:
44
+ """Context for processing annotations for a single sample."""
45
+
46
+ dataset_id: UUID
47
+ sample_id: UUID
48
+ label_map: dict[int, UUID]
49
+
50
+
51
+ @dataclass
52
+ class _LoadingLoggingContext:
53
+ """Context for the logging while loading data."""
54
+
55
+ n_samples_before_loading: int
56
+ n_samples_to_be_inserted: int = 0
57
+ example_paths_not_inserted: list[str] = field(default_factory=list)
58
+
59
+ def update_example_paths(self, example_paths_not_inserted: list[str]) -> None:
60
+ if len(self.example_paths_not_inserted) >= MAX_EXAMPLE_PATHS_TO_SHOW:
61
+ return
62
+ upper_limit = MAX_EXAMPLE_PATHS_TO_SHOW - len(self.example_paths_not_inserted)
63
+ self.example_paths_not_inserted.extend(example_paths_not_inserted[:upper_limit])
64
+
65
+
66
+ def load_into_dataset_from_paths(
67
+ session: Session,
68
+ dataset_id: UUID,
69
+ image_paths: Iterable[str],
70
+ ) -> list[UUID]:
71
+ """Load images from file paths into the dataset.
72
+
73
+ Args:
74
+ session: The database session.
75
+ dataset_id: The ID of the dataset to load images into.
76
+ image_paths: An iterable of file paths to the images to load.
77
+
78
+ Returns:
79
+ A list of UUIDs of the created samples.
80
+ """
81
+ samples_to_create: list[SampleCreate] = []
82
+ created_sample_ids: list[UUID] = []
83
+
84
+ logging_context = _LoadingLoggingContext(
85
+ n_samples_to_be_inserted=sum(1 for _ in image_paths),
86
+ n_samples_before_loading=sample_resolver.count_by_dataset_id(
87
+ session=session, dataset_id=dataset_id
88
+ ),
89
+ )
90
+
91
+ for image_path in tqdm(
92
+ image_paths,
93
+ desc="Processing images",
94
+ unit=" images",
95
+ ):
96
+ try:
97
+ with fsspec.open(image_path, "rb") as file:
98
+ image = PIL.Image.open(file)
99
+ width, height = image.size
100
+ image.close()
101
+ except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
102
+ continue
103
+
104
+ sample = SampleCreate(
105
+ file_name=Path(image_path).name,
106
+ file_path_abs=image_path,
107
+ width=width,
108
+ height=height,
109
+ dataset_id=dataset_id,
110
+ )
111
+ samples_to_create.append(sample)
112
+
113
+ # Process batch when it reaches SAMPLE_BATCH_SIZE
114
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
115
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
116
+ session=session, samples=samples_to_create
117
+ )
118
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
119
+ logging_context.update_example_paths(paths_not_inserted)
120
+ samples_to_create = []
121
+
122
+ # Handle remaining samples
123
+ if samples_to_create:
124
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
125
+ session=session, samples=samples_to_create
126
+ )
127
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
128
+ logging_context.update_example_paths(paths_not_inserted)
129
+
130
+ _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
131
+ return created_sample_ids
132
+
133
+
134
+ def load_into_dataset_from_labelformat(
135
+ session: Session,
136
+ dataset_id: UUID,
137
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
138
+ images_path: Path,
139
+ ) -> list[UUID]:
140
+ """Load samples and their annotations from a labelformat input into the dataset.
141
+
142
+ Args:
143
+ session: The database session.
144
+ dataset_id: The ID of the dataset to load samples into.
145
+ input_labels: The labelformat input containing images and annotations.
146
+ images_path: The path to the directory containing the images.
147
+
148
+ Returns:
149
+ A list of UUIDs of the created samples.
150
+ """
151
+ logging_context = _LoadingLoggingContext(
152
+ n_samples_to_be_inserted=sum(1 for _ in input_labels.get_labels()),
153
+ n_samples_before_loading=sample_resolver.count_by_dataset_id(
154
+ session=session, dataset_id=dataset_id
155
+ ),
156
+ )
157
+
158
+ # Create label mapping
159
+ label_map = _create_label_map(session=session, input_labels=input_labels)
160
+
161
+ annotations_to_create: list[AnnotationCreate] = []
162
+ samples_to_create: list[SampleCreate] = []
163
+ created_sample_ids: list[UUID] = []
164
+ image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection] = {}
165
+
166
+ for image_data in tqdm(input_labels.get_labels(), desc="Processing images", unit=" images"):
167
+ image: Image = image_data.image # type: ignore[attr-defined]
168
+
169
+ typed_image_data: ImageInstanceSegmentation | ImageObjectDetection = image_data # type: ignore[assignment]
170
+ sample = SampleCreate(
171
+ file_name=str(image.filename),
172
+ file_path_abs=str(images_path / image.filename),
173
+ width=image.width,
174
+ height=image.height,
175
+ dataset_id=dataset_id,
176
+ )
177
+ samples_to_create.append(sample)
178
+ image_path_to_anno_data[sample.file_path_abs] = typed_image_data
179
+
180
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
181
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
182
+ session=session, samples=samples_to_create
183
+ )
184
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
185
+ logging_context.update_example_paths(paths_not_inserted)
186
+ _process_batch_annotations(
187
+ session=session,
188
+ stored_samples=created_samples_batch,
189
+ image_path_to_anno_data=image_path_to_anno_data,
190
+ dataset_id=dataset_id,
191
+ label_map=label_map,
192
+ annotations_to_create=annotations_to_create,
193
+ )
194
+ samples_to_create.clear()
195
+ image_path_to_anno_data.clear()
196
+
197
+ if samples_to_create:
198
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
199
+ session=session, samples=samples_to_create
200
+ )
201
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
202
+ logging_context.update_example_paths(paths_not_inserted)
203
+ _process_batch_annotations(
204
+ session=session,
205
+ stored_samples=created_samples_batch,
206
+ image_path_to_anno_data=image_path_to_anno_data,
207
+ dataset_id=dataset_id,
208
+ label_map=label_map,
209
+ annotations_to_create=annotations_to_create,
210
+ )
211
+
212
+ # Insert any remaining annotations
213
+ if annotations_to_create:
214
+ annotation_resolver.create_many(session=session, annotations=annotations_to_create)
215
+
216
+ _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
217
+
218
+ return created_sample_ids
219
+
220
+
221
+ def _log_loading_results(
222
+ session: Session, dataset_id: UUID, logging_context: _LoadingLoggingContext
223
+ ) -> None:
224
+ n_samples_end = sample_resolver.count_by_dataset_id(session=session, dataset_id=dataset_id)
225
+ n_samples_inserted = n_samples_end - logging_context.n_samples_before_loading
226
+ print(
227
+ f"Added {n_samples_inserted} out of {logging_context.n_samples_to_be_inserted}"
228
+ " new samples to the dataset."
229
+ )
230
+ if logging_context.example_paths_not_inserted:
231
+ # TODO(Jonas, 09/2025): Use logging instead of print
232
+ print(
233
+ f"Examples of paths that were not added: "
234
+ f" {', '.join(logging_context.example_paths_not_inserted)}"
235
+ )
236
+
237
+
238
+ def _create_batch_samples(
239
+ session: Session, samples: list[SampleCreate]
240
+ ) -> tuple[list[SampleTable], list[str]]:
241
+ """Create the batch samples.
242
+
243
+ Args:
244
+ session: The database session.
245
+ samples: The samples to create.
246
+
247
+ Returns:
248
+ created_samples: A list of created SampleTable objects,
249
+ existing_file_paths: A list of file paths that already existed in the database,
250
+ """
251
+ file_paths_abs_mapping = {sample.file_path_abs: sample for sample in samples}
252
+ file_paths_new, file_paths_exist = sample_resolver.filter_new_paths(
253
+ session=session, file_paths_abs=list(file_paths_abs_mapping.keys())
254
+ )
255
+ samples_to_create_filtered = [
256
+ file_paths_abs_mapping[file_path_new] for file_path_new in file_paths_new
257
+ ]
258
+ return (
259
+ sample_resolver.create_many(session=session, samples=samples_to_create_filtered),
260
+ file_paths_exist,
261
+ )
262
+
263
+
264
+ def _create_label_map(
265
+ session: Session,
266
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
267
+ ) -> dict[int, UUID]:
268
+ """Create a mapping of category IDs to annotation label IDs."""
269
+ label_map = {}
270
+ for category in tqdm(
271
+ input_labels.get_categories(),
272
+ desc="Processing categories",
273
+ unit=" categories",
274
+ ):
275
+ label = AnnotationLabelCreate(annotation_label_name=category.name)
276
+ stored_label = annotation_label_resolver.create(session=session, label=label)
277
+ label_map[category.id] = stored_label.annotation_label_id
278
+ return label_map
279
+
280
+
281
+ def _process_object_detection_annotations(
282
+ context: _AnnotationProcessingContext,
283
+ anno_data: ImageObjectDetection,
284
+ ) -> list[AnnotationCreate]:
285
+ """Process object detection annotations for a single image."""
286
+ new_annotations = []
287
+ for obj in anno_data.objects:
288
+ box = obj.box.to_format(BoundingBoxFormat.XYWH)
289
+ x, y, width, height = box
290
+
291
+ new_annotations.append(
292
+ AnnotationCreate(
293
+ dataset_id=context.dataset_id,
294
+ sample_id=context.sample_id,
295
+ annotation_label_id=context.label_map[obj.category.id],
296
+ annotation_type="object_detection",
297
+ x=int(x),
298
+ y=int(y),
299
+ width=int(width),
300
+ height=int(height),
301
+ confidence=obj.confidence,
302
+ )
303
+ )
304
+ return new_annotations
305
+
306
+
307
+ def _process_instance_segmentation_annotations(
308
+ context: _AnnotationProcessingContext,
309
+ anno_data: ImageInstanceSegmentation,
310
+ ) -> list[AnnotationCreate]:
311
+ """Process instance segmentation annotations for a single image."""
312
+ new_annotations = []
313
+ for obj in anno_data.objects:
314
+ segmentation_rle: None | list[int] = None
315
+ if isinstance(obj.segmentation, MultiPolygon):
316
+ box = obj.segmentation.bounding_box().to_format(BoundingBoxFormat.XYWH)
317
+ elif isinstance(obj.segmentation, BinaryMaskSegmentation):
318
+ box = obj.segmentation.bounding_box.to_format(BoundingBoxFormat.XYWH)
319
+ segmentation_rle = obj.segmentation._rle_row_wise # noqa: SLF001
320
+ else:
321
+ raise ValueError(f"Unsupported segmentation type: {type(obj.segmentation)}")
322
+
323
+ x, y, width, height = box
324
+
325
+ new_annotations.append(
326
+ AnnotationCreate(
327
+ dataset_id=context.dataset_id,
328
+ sample_id=context.sample_id,
329
+ annotation_label_id=context.label_map[obj.category.id],
330
+ annotation_type="instance_segmentation",
331
+ x=int(x),
332
+ y=int(y),
333
+ width=int(width),
334
+ height=int(height),
335
+ segmentation_mask=segmentation_rle,
336
+ )
337
+ )
338
+ return new_annotations
339
+
340
+
341
+ def _process_batch_annotations( # noqa: PLR0913
342
+ session: Session,
343
+ stored_samples: list[SampleTable],
344
+ image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection],
345
+ dataset_id: UUID,
346
+ label_map: dict[int, UUID],
347
+ annotations_to_create: list[AnnotationCreate],
348
+ ) -> None:
349
+ """Process annotations for a batch of samples."""
350
+ for stored_sample in stored_samples:
351
+ anno_data = image_path_to_anno_data[stored_sample.file_path_abs]
352
+
353
+ context = _AnnotationProcessingContext(
354
+ dataset_id=dataset_id,
355
+ sample_id=stored_sample.sample_id,
356
+ label_map=label_map,
357
+ )
358
+
359
+ if isinstance(anno_data, ImageInstanceSegmentation):
360
+ new_annotations = _process_instance_segmentation_annotations(
361
+ context=context, anno_data=anno_data
362
+ )
363
+ elif isinstance(anno_data, ImageObjectDetection):
364
+ new_annotations = _process_object_detection_annotations(
365
+ context=context, anno_data=anno_data
366
+ )
367
+ else:
368
+ raise ValueError(f"Unsupported annotation type: {type(anno_data)}")
369
+
370
+ annotations_to_create.extend(new_annotations)
371
+
372
+ if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
373
+ annotation_resolver.create_many(session=session, annotations=annotations_to_create)
374
+ annotations_to_create.clear()