lightly-studio 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (133) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +1 -1
  3. lightly_studio/api/routes/api/annotation.py +6 -16
  4. lightly_studio/api/routes/api/annotation_label.py +2 -5
  5. lightly_studio/api/routes/api/annotation_task.py +4 -5
  6. lightly_studio/api/routes/api/classifier.py +2 -5
  7. lightly_studio/api/routes/api/dataset.py +2 -3
  8. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  9. lightly_studio/api/routes/api/metadata.py +2 -4
  10. lightly_studio/api/routes/api/metrics.py +2 -6
  11. lightly_studio/api/routes/api/sample.py +5 -13
  12. lightly_studio/api/routes/api/settings.py +2 -6
  13. lightly_studio/api/routes/images.py +6 -6
  14. lightly_studio/core/add_samples.py +383 -0
  15. lightly_studio/core/dataset.py +250 -362
  16. lightly_studio/core/dataset_query/__init__.py +0 -0
  17. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  18. lightly_studio/core/dataset_query/dataset_query.py +211 -0
  19. lightly_studio/core/dataset_query/field.py +113 -0
  20. lightly_studio/core/dataset_query/field_expression.py +79 -0
  21. lightly_studio/core/dataset_query/match_expression.py +23 -0
  22. lightly_studio/core/dataset_query/order_by.py +79 -0
  23. lightly_studio/core/dataset_query/sample_field.py +28 -0
  24. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  25. lightly_studio/core/sample.py +159 -32
  26. lightly_studio/core/start_gui.py +35 -0
  27. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  28. lightly_studio/dataset/embedding_generator.py +2 -3
  29. lightly_studio/dataset/embedding_manager.py +74 -6
  30. lightly_studio/dataset/fsspec_lister.py +275 -0
  31. lightly_studio/dataset/loader.py +49 -30
  32. lightly_studio/dataset/mobileclip_embedding_generator.py +6 -4
  33. lightly_studio/db_manager.py +145 -0
  34. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +1 -0
  35. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +1 -0
  36. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +3 -0
  37. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → 7YNGEs1C.js} +1 -1
  38. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +1 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +92 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BqBqV92V.js} +1 -1
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → C98Hk3r5.js} +1 -1
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{r64xT6ao.js → CG0dMCJi.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Ccq4ZD0B.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +1 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → Crk-jcvV.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → Cur71c3O.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → DRZO-E-T.js} +1 -1
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → DcGCxgpH.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → Df3aMO5B.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{hQVEETDE.js → DkR_EZ_B.js} +1 -1
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +4 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → OH7-C_mc.js} +1 -1
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → gLNdjSzu.js} +1 -1
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +2 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1._I9GR805.js} +1 -1
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +6 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.w9g4AcAx.js} +1 -1
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BBI8KwnD.js} +1 -1
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CrbkRPam.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.CajIG5ce.js} +1 -1
  78. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  80. lightly_studio/examples/example.py +13 -12
  81. lightly_studio/examples/example_coco.py +13 -0
  82. lightly_studio/examples/example_metadata.py +83 -98
  83. lightly_studio/examples/example_selection.py +7 -19
  84. lightly_studio/examples/example_split_work.py +12 -36
  85. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  86. lightly_studio/models/annotation/annotation_base.py +7 -8
  87. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  88. lightly_studio/models/annotation/object_detection.py +4 -4
  89. lightly_studio/models/dataset.py +6 -2
  90. lightly_studio/models/sample.py +10 -3
  91. lightly_studio/resolvers/dataset_resolver.py +10 -0
  92. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  93. lightly_studio/resolvers/sample_resolver.py +53 -9
  94. lightly_studio/resolvers/tag_resolver.py +23 -0
  95. lightly_studio/selection/select.py +55 -46
  96. lightly_studio/selection/select_via_db.py +23 -19
  97. lightly_studio/selection/selection_config.py +6 -3
  98. lightly_studio/services/annotations_service/__init__.py +4 -0
  99. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  100. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  101. lightly_studio-0.3.2.dist-info/METADATA +689 -0
  102. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/RECORD +104 -91
  103. lightly_studio/api/db.py +0 -133
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +0 -4
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  131. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  132. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  133. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,383 @@
1
+ """Functions to add samples and their annotations to a dataset in the database."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass, field
6
+ from pathlib import Path
7
+ from typing import Iterable
8
+ from uuid import UUID
9
+
10
+ import fsspec
11
+ import PIL
12
+ from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
13
+ from labelformat.model.bounding_box import BoundingBoxFormat
14
+ from labelformat.model.image import Image
15
+ from labelformat.model.instance_segmentation import (
16
+ ImageInstanceSegmentation,
17
+ InstanceSegmentationInput,
18
+ )
19
+ from labelformat.model.multipolygon import MultiPolygon
20
+ from labelformat.model.object_detection import (
21
+ ImageObjectDetection,
22
+ ObjectDetectionInput,
23
+ )
24
+ from sqlmodel import Session
25
+ from tqdm import tqdm
26
+
27
+ from lightly_studio.models.annotation.annotation_base import AnnotationCreate
28
+ from lightly_studio.models.annotation_label import AnnotationLabelCreate
29
+ from lightly_studio.models.sample import SampleCreate, SampleTable
30
+ from lightly_studio.resolvers import (
31
+ annotation_label_resolver,
32
+ annotation_resolver,
33
+ sample_resolver,
34
+ )
35
+
36
+ # Constants
37
+ ANNOTATION_BATCH_SIZE = 64 # Number of annotations to process in a single batch
38
+ SAMPLE_BATCH_SIZE = 32 # Number of samples to process in a single batch
39
+ MAX_EXAMPLE_PATHS_TO_SHOW = 5
40
+
41
+
42
+ @dataclass
43
+ class _AnnotationProcessingContext:
44
+ """Context for processing annotations for a single sample."""
45
+
46
+ dataset_id: UUID
47
+ sample_id: UUID
48
+ label_map: dict[int, UUID]
49
+ annotation_task_id: UUID
50
+
51
+
52
+ @dataclass
53
+ class _LoadingLoggingContext:
54
+ """Context for the logging while loading data."""
55
+
56
+ n_samples_before_loading: int
57
+ n_samples_to_be_inserted: int = 0
58
+ example_paths_not_inserted: list[str] = field(default_factory=list)
59
+
60
+ def update_example_paths(self, example_paths_not_inserted: list[str]) -> None:
61
+ if len(self.example_paths_not_inserted) >= MAX_EXAMPLE_PATHS_TO_SHOW:
62
+ return
63
+ upper_limit = MAX_EXAMPLE_PATHS_TO_SHOW - len(self.example_paths_not_inserted)
64
+ self.example_paths_not_inserted.extend(example_paths_not_inserted[:upper_limit])
65
+
66
+
67
+ def load_into_dataset_from_paths(
68
+ session: Session,
69
+ dataset_id: UUID,
70
+ image_paths: Iterable[str],
71
+ ) -> list[UUID]:
72
+ """Load images from file paths into the dataset.
73
+
74
+ Args:
75
+ session: The database session.
76
+ dataset_id: The ID of the dataset to load images into.
77
+ image_paths: An iterable of file paths to the images to load.
78
+
79
+ Returns:
80
+ A list of UUIDs of the created samples.
81
+ """
82
+ samples_to_create: list[SampleCreate] = []
83
+ created_sample_ids: list[UUID] = []
84
+
85
+ logging_context = _LoadingLoggingContext(
86
+ n_samples_to_be_inserted=sum(1 for _ in image_paths),
87
+ n_samples_before_loading=sample_resolver.count_by_dataset_id(
88
+ session=session, dataset_id=dataset_id
89
+ ),
90
+ )
91
+
92
+ for image_path in tqdm(
93
+ image_paths,
94
+ desc="Processing images",
95
+ unit=" images",
96
+ ):
97
+ try:
98
+ with fsspec.open(image_path, "rb") as file:
99
+ image = PIL.Image.open(file)
100
+ width, height = image.size
101
+ image.close()
102
+ except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
103
+ continue
104
+
105
+ sample = SampleCreate(
106
+ file_name=Path(image_path).name,
107
+ file_path_abs=image_path,
108
+ width=width,
109
+ height=height,
110
+ dataset_id=dataset_id,
111
+ )
112
+ samples_to_create.append(sample)
113
+
114
+ # Process batch when it reaches SAMPLE_BATCH_SIZE
115
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
116
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
117
+ session=session, samples=samples_to_create
118
+ )
119
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
120
+ logging_context.update_example_paths(paths_not_inserted)
121
+ samples_to_create = []
122
+
123
+ # Handle remaining samples
124
+ if samples_to_create:
125
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
126
+ session=session, samples=samples_to_create
127
+ )
128
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
129
+ logging_context.update_example_paths(paths_not_inserted)
130
+
131
+ _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
132
+ return created_sample_ids
133
+
134
+
135
+ def load_into_dataset_from_labelformat(
136
+ session: Session,
137
+ dataset_id: UUID,
138
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
139
+ images_path: Path,
140
+ annotation_task_id: UUID,
141
+ ) -> list[UUID]:
142
+ """Load samples and their annotations from a labelformat input into the dataset.
143
+
144
+ Args:
145
+ session: The database session.
146
+ dataset_id: The ID of the dataset to load samples into.
147
+ input_labels: The labelformat input containing images and annotations.
148
+ images_path: The path to the directory containing the images.
149
+ annotation_task_id: The ID of the annotation task to associate with the annotations.
150
+
151
+ Returns:
152
+ A list of UUIDs of the created samples.
153
+ """
154
+ logging_context = _LoadingLoggingContext(
155
+ n_samples_to_be_inserted=sum(1 for _ in input_labels.get_labels()),
156
+ n_samples_before_loading=sample_resolver.count_by_dataset_id(
157
+ session=session, dataset_id=dataset_id
158
+ ),
159
+ )
160
+
161
+ # Create label mapping
162
+ label_map = _create_label_map(session=session, input_labels=input_labels)
163
+
164
+ annotations_to_create: list[AnnotationCreate] = []
165
+ samples_to_create: list[SampleCreate] = []
166
+ created_sample_ids: list[UUID] = []
167
+ image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection] = {}
168
+
169
+ for image_data in tqdm(input_labels.get_labels(), desc="Processing images", unit=" images"):
170
+ image: Image = image_data.image # type: ignore[attr-defined]
171
+
172
+ typed_image_data: ImageInstanceSegmentation | ImageObjectDetection = image_data # type: ignore[assignment]
173
+ sample = SampleCreate(
174
+ file_name=str(image.filename),
175
+ file_path_abs=str(images_path / image.filename),
176
+ width=image.width,
177
+ height=image.height,
178
+ dataset_id=dataset_id,
179
+ )
180
+ samples_to_create.append(sample)
181
+ image_path_to_anno_data[sample.file_path_abs] = typed_image_data
182
+
183
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
184
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
185
+ session=session, samples=samples_to_create
186
+ )
187
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
188
+ logging_context.update_example_paths(paths_not_inserted)
189
+ _process_batch_annotations(
190
+ session=session,
191
+ stored_samples=created_samples_batch,
192
+ image_path_to_anno_data=image_path_to_anno_data,
193
+ dataset_id=dataset_id,
194
+ label_map=label_map,
195
+ annotation_task_id=annotation_task_id,
196
+ annotations_to_create=annotations_to_create,
197
+ )
198
+ samples_to_create.clear()
199
+ image_path_to_anno_data.clear()
200
+
201
+ if samples_to_create:
202
+ created_samples_batch, paths_not_inserted = _create_batch_samples(
203
+ session=session, samples=samples_to_create
204
+ )
205
+ created_sample_ids.extend(s.sample_id for s in created_samples_batch)
206
+ logging_context.update_example_paths(paths_not_inserted)
207
+ _process_batch_annotations(
208
+ session=session,
209
+ stored_samples=created_samples_batch,
210
+ image_path_to_anno_data=image_path_to_anno_data,
211
+ dataset_id=dataset_id,
212
+ label_map=label_map,
213
+ annotation_task_id=annotation_task_id,
214
+ annotations_to_create=annotations_to_create,
215
+ )
216
+
217
+ # Insert any remaining annotations
218
+ if annotations_to_create:
219
+ annotation_resolver.create_many(session=session, annotations=annotations_to_create)
220
+
221
+ _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
222
+
223
+ return created_sample_ids
224
+
225
+
226
+ def _log_loading_results(
227
+ session: Session, dataset_id: UUID, logging_context: _LoadingLoggingContext
228
+ ) -> None:
229
+ n_samples_end = sample_resolver.count_by_dataset_id(session=session, dataset_id=dataset_id)
230
+ n_samples_inserted = n_samples_end - logging_context.n_samples_before_loading
231
+ print(
232
+ f"Added {n_samples_inserted} out of {logging_context.n_samples_to_be_inserted}"
233
+ " new samples to the dataset."
234
+ )
235
+ if logging_context.example_paths_not_inserted:
236
+ # TODO(Jonas, 09/2025): Use logging instead of print
237
+ print(
238
+ f"Examples of paths that were not added: "
239
+ f" {', '.join(logging_context.example_paths_not_inserted)}"
240
+ )
241
+
242
+
243
+ def _create_batch_samples(
244
+ session: Session, samples: list[SampleCreate]
245
+ ) -> tuple[list[SampleTable], list[str]]:
246
+ """Create the batch samples.
247
+
248
+ Args:
249
+ session: The database session.
250
+ samples: The samples to create.
251
+
252
+ Returns:
253
+ created_samples: A list of created SampleTable objects,
254
+ existing_file_paths: A list of file paths that already existed in the database,
255
+ """
256
+ file_paths_abs_mapping = {sample.file_path_abs: sample for sample in samples}
257
+ file_paths_new, file_paths_exist = sample_resolver.filter_new_paths(
258
+ session=session, file_paths_abs=list(file_paths_abs_mapping.keys())
259
+ )
260
+ samples_to_create_filtered = [
261
+ file_paths_abs_mapping[file_path_new] for file_path_new in file_paths_new
262
+ ]
263
+ return (
264
+ sample_resolver.create_many(session=session, samples=samples_to_create_filtered),
265
+ file_paths_exist,
266
+ )
267
+
268
+
269
+ def _create_label_map(
270
+ session: Session,
271
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
272
+ ) -> dict[int, UUID]:
273
+ """Create a mapping of category IDs to annotation label IDs."""
274
+ label_map = {}
275
+ for category in tqdm(
276
+ input_labels.get_categories(),
277
+ desc="Processing categories",
278
+ unit=" categories",
279
+ ):
280
+ label = AnnotationLabelCreate(annotation_label_name=category.name)
281
+ stored_label = annotation_label_resolver.create(session=session, label=label)
282
+ label_map[category.id] = stored_label.annotation_label_id
283
+ return label_map
284
+
285
+
286
+ def _process_object_detection_annotations(
287
+ context: _AnnotationProcessingContext,
288
+ anno_data: ImageObjectDetection,
289
+ ) -> list[AnnotationCreate]:
290
+ """Process object detection annotations for a single image."""
291
+ new_annotations = []
292
+ for obj in anno_data.objects:
293
+ box = obj.box.to_format(BoundingBoxFormat.XYWH)
294
+ x, y, width, height = box
295
+
296
+ new_annotations.append(
297
+ AnnotationCreate(
298
+ dataset_id=context.dataset_id,
299
+ sample_id=context.sample_id,
300
+ annotation_label_id=context.label_map[obj.category.id],
301
+ annotation_type="object_detection",
302
+ x=int(x),
303
+ y=int(y),
304
+ width=int(width),
305
+ height=int(height),
306
+ confidence=obj.confidence,
307
+ annotation_task_id=context.annotation_task_id,
308
+ )
309
+ )
310
+ return new_annotations
311
+
312
+
313
+ def _process_instance_segmentation_annotations(
314
+ context: _AnnotationProcessingContext,
315
+ anno_data: ImageInstanceSegmentation,
316
+ ) -> list[AnnotationCreate]:
317
+ """Process instance segmentation annotations for a single image."""
318
+ new_annotations = []
319
+ for obj in anno_data.objects:
320
+ segmentation_rle: None | list[int] = None
321
+ if isinstance(obj.segmentation, MultiPolygon):
322
+ box = obj.segmentation.bounding_box().to_format(BoundingBoxFormat.XYWH)
323
+ elif isinstance(obj.segmentation, BinaryMaskSegmentation):
324
+ box = obj.segmentation.bounding_box.to_format(BoundingBoxFormat.XYWH)
325
+ segmentation_rle = obj.segmentation._rle_row_wise # noqa: SLF001
326
+ else:
327
+ raise ValueError(f"Unsupported segmentation type: {type(obj.segmentation)}")
328
+
329
+ x, y, width, height = box
330
+
331
+ new_annotations.append(
332
+ AnnotationCreate(
333
+ dataset_id=context.dataset_id,
334
+ sample_id=context.sample_id,
335
+ annotation_label_id=context.label_map[obj.category.id],
336
+ annotation_type="instance_segmentation",
337
+ x=int(x),
338
+ y=int(y),
339
+ width=int(width),
340
+ height=int(height),
341
+ segmentation_mask=segmentation_rle,
342
+ annotation_task_id=context.annotation_task_id,
343
+ )
344
+ )
345
+ return new_annotations
346
+
347
+
348
+ def _process_batch_annotations( # noqa: PLR0913
349
+ session: Session,
350
+ stored_samples: list[SampleTable],
351
+ image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection],
352
+ dataset_id: UUID,
353
+ label_map: dict[int, UUID],
354
+ annotation_task_id: UUID,
355
+ annotations_to_create: list[AnnotationCreate],
356
+ ) -> None:
357
+ """Process annotations for a batch of samples."""
358
+ for stored_sample in stored_samples:
359
+ anno_data = image_path_to_anno_data[stored_sample.file_path_abs]
360
+
361
+ context = _AnnotationProcessingContext(
362
+ dataset_id=dataset_id,
363
+ sample_id=stored_sample.sample_id,
364
+ label_map=label_map,
365
+ annotation_task_id=annotation_task_id,
366
+ )
367
+
368
+ if isinstance(anno_data, ImageInstanceSegmentation):
369
+ new_annotations = _process_instance_segmentation_annotations(
370
+ context=context, anno_data=anno_data
371
+ )
372
+ elif isinstance(anno_data, ImageObjectDetection):
373
+ new_annotations = _process_object_detection_annotations(
374
+ context=context, anno_data=anno_data
375
+ )
376
+ else:
377
+ raise ValueError(f"Unsupported annotation type: {type(anno_data)}")
378
+
379
+ annotations_to_create.extend(new_annotations)
380
+
381
+ if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
382
+ annotation_resolver.create_many(session=session, annotations=annotations_to_create)
383
+ annotations_to_create.clear()