lightly-studio 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (219) hide show
  1. lightly_studio/__init__.py +11 -0
  2. lightly_studio/api/__init__.py +0 -0
  3. lightly_studio/api/app.py +110 -0
  4. lightly_studio/api/cache.py +77 -0
  5. lightly_studio/api/db.py +133 -0
  6. lightly_studio/api/db_tables.py +32 -0
  7. lightly_studio/api/features.py +7 -0
  8. lightly_studio/api/routes/api/annotation.py +233 -0
  9. lightly_studio/api/routes/api/annotation_label.py +90 -0
  10. lightly_studio/api/routes/api/annotation_task.py +38 -0
  11. lightly_studio/api/routes/api/classifier.py +387 -0
  12. lightly_studio/api/routes/api/dataset.py +182 -0
  13. lightly_studio/api/routes/api/dataset_tag.py +257 -0
  14. lightly_studio/api/routes/api/exceptions.py +96 -0
  15. lightly_studio/api/routes/api/features.py +17 -0
  16. lightly_studio/api/routes/api/metadata.py +37 -0
  17. lightly_studio/api/routes/api/metrics.py +80 -0
  18. lightly_studio/api/routes/api/sample.py +196 -0
  19. lightly_studio/api/routes/api/settings.py +45 -0
  20. lightly_studio/api/routes/api/status.py +19 -0
  21. lightly_studio/api/routes/api/text_embedding.py +48 -0
  22. lightly_studio/api/routes/api/validators.py +17 -0
  23. lightly_studio/api/routes/healthz.py +13 -0
  24. lightly_studio/api/routes/images.py +104 -0
  25. lightly_studio/api/routes/webapp.py +51 -0
  26. lightly_studio/api/server.py +82 -0
  27. lightly_studio/core/__init__.py +0 -0
  28. lightly_studio/core/dataset.py +523 -0
  29. lightly_studio/core/sample.py +77 -0
  30. lightly_studio/core/start_gui.py +15 -0
  31. lightly_studio/dataset/__init__.py +0 -0
  32. lightly_studio/dataset/edge_embedding_generator.py +144 -0
  33. lightly_studio/dataset/embedding_generator.py +91 -0
  34. lightly_studio/dataset/embedding_manager.py +163 -0
  35. lightly_studio/dataset/env.py +16 -0
  36. lightly_studio/dataset/file_utils.py +35 -0
  37. lightly_studio/dataset/loader.py +622 -0
  38. lightly_studio/dataset/mobileclip_embedding_generator.py +144 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/env.js +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/LightlyLogo.BNjCIww-.png +0 -0
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans- +0 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Bold.DGvYQtcs.ttf +0 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Italic-VariableFont_wdth_wght.B4AZ-wl6.ttf +0 -0
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Regular.DxJTClRG.ttf +0 -0
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-SemiBold.D3TTYgdB.ttf +0 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-VariableFont_wdth_wght.BZBpG5Iz.ttf +0 -0
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +1 -0
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/useFeatureFlags.CV-KWLNP.css +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/69_IOA4Y.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B90CZVMX.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +1 -0
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +1 -0
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bsi3UGy5.js +1 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bu7uvVrG.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BylOuP6i.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C8I8rFJQ.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CDnpyLsT.js +1 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWj6FrbW.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CYgJF_JY.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +93 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +3 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +4 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6su9Aln.js +1 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +1 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIeogL5L.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DOlTMNyt.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjfY96ND.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H7C68rOM.js +1 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/O-EABkf9.js +1 -0
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +1 -0
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/l7KrR96u.js +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +1 -0
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/x9G_hzyY.js +1 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +2 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +1 -0
  88. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +1 -0
  89. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.B4rNYwVp.js +1 -0
  90. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +1 -0
  91. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/11.CWG1ehzT.js +1 -0
  92. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +1 -0
  93. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +6 -0
  94. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/3.CWHpKonm.js +1 -0
  95. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/4.OUWOLQeV.js +1 -0
  96. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +1 -0
  97. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +1 -0
  98. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +1 -0
  99. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +1 -0
  100. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CPu3CiBc.js +1 -0
  101. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -0
  102. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon-precomposed.png +0 -0
  103. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon.png +0 -0
  104. lightly_studio/dist_lightly_studio_view_app/favicon.png +0 -0
  105. lightly_studio/dist_lightly_studio_view_app/index.html +44 -0
  106. lightly_studio/examples/example.py +23 -0
  107. lightly_studio/examples/example_metadata.py +338 -0
  108. lightly_studio/examples/example_selection.py +39 -0
  109. lightly_studio/examples/example_split_work.py +67 -0
  110. lightly_studio/examples/example_v2.py +21 -0
  111. lightly_studio/export_schema.py +18 -0
  112. lightly_studio/few_shot_classifier/__init__.py +0 -0
  113. lightly_studio/few_shot_classifier/classifier.py +80 -0
  114. lightly_studio/few_shot_classifier/classifier_manager.py +663 -0
  115. lightly_studio/few_shot_classifier/random_forest_classifier.py +489 -0
  116. lightly_studio/metadata/complex_metadata.py +47 -0
  117. lightly_studio/metadata/gps_coordinate.py +41 -0
  118. lightly_studio/metadata/metadata_protocol.py +17 -0
  119. lightly_studio/metrics/__init__.py +0 -0
  120. lightly_studio/metrics/detection/__init__.py +0 -0
  121. lightly_studio/metrics/detection/map.py +268 -0
  122. lightly_studio/models/__init__.py +1 -0
  123. lightly_studio/models/annotation/__init__.py +0 -0
  124. lightly_studio/models/annotation/annotation_base.py +171 -0
  125. lightly_studio/models/annotation/instance_segmentation.py +56 -0
  126. lightly_studio/models/annotation/links.py +17 -0
  127. lightly_studio/models/annotation/object_detection.py +47 -0
  128. lightly_studio/models/annotation/semantic_segmentation.py +44 -0
  129. lightly_studio/models/annotation_label.py +47 -0
  130. lightly_studio/models/annotation_task.py +28 -0
  131. lightly_studio/models/classifier.py +20 -0
  132. lightly_studio/models/dataset.py +84 -0
  133. lightly_studio/models/embedding_model.py +30 -0
  134. lightly_studio/models/metadata.py +208 -0
  135. lightly_studio/models/sample.py +180 -0
  136. lightly_studio/models/sample_embedding.py +37 -0
  137. lightly_studio/models/settings.py +60 -0
  138. lightly_studio/models/tag.py +96 -0
  139. lightly_studio/py.typed +0 -0
  140. lightly_studio/resolvers/__init__.py +7 -0
  141. lightly_studio/resolvers/annotation_label_resolver/__init__.py +21 -0
  142. lightly_studio/resolvers/annotation_label_resolver/create.py +27 -0
  143. lightly_studio/resolvers/annotation_label_resolver/delete.py +28 -0
  144. lightly_studio/resolvers/annotation_label_resolver/get_all.py +22 -0
  145. lightly_studio/resolvers/annotation_label_resolver/get_by_id.py +24 -0
  146. lightly_studio/resolvers/annotation_label_resolver/get_by_ids.py +25 -0
  147. lightly_studio/resolvers/annotation_label_resolver/get_by_label_name.py +24 -0
  148. lightly_studio/resolvers/annotation_label_resolver/names_by_ids.py +25 -0
  149. lightly_studio/resolvers/annotation_label_resolver/update.py +38 -0
  150. lightly_studio/resolvers/annotation_resolver/__init__.py +33 -0
  151. lightly_studio/resolvers/annotation_resolver/count_annotations_by_dataset.py +120 -0
  152. lightly_studio/resolvers/annotation_resolver/create.py +19 -0
  153. lightly_studio/resolvers/annotation_resolver/create_many.py +96 -0
  154. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +45 -0
  155. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +56 -0
  156. lightly_studio/resolvers/annotation_resolver/get_all.py +74 -0
  157. lightly_studio/resolvers/annotation_resolver/get_by_id.py +18 -0
  158. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +144 -0
  159. lightly_studio/resolvers/annotation_resolver/update_bounding_box.py +68 -0
  160. lightly_studio/resolvers/annotation_task_resolver.py +31 -0
  161. lightly_studio/resolvers/annotations/__init__.py +1 -0
  162. lightly_studio/resolvers/annotations/annotations_filter.py +89 -0
  163. lightly_studio/resolvers/dataset_resolver.py +278 -0
  164. lightly_studio/resolvers/embedding_model_resolver.py +100 -0
  165. lightly_studio/resolvers/metadata_resolver/__init__.py +15 -0
  166. lightly_studio/resolvers/metadata_resolver/metadata_filter.py +163 -0
  167. lightly_studio/resolvers/metadata_resolver/sample/__init__.py +21 -0
  168. lightly_studio/resolvers/metadata_resolver/sample/bulk_set_metadata.py +48 -0
  169. lightly_studio/resolvers/metadata_resolver/sample/get_by_sample_id.py +24 -0
  170. lightly_studio/resolvers/metadata_resolver/sample/get_metadata_info.py +104 -0
  171. lightly_studio/resolvers/metadata_resolver/sample/get_value_for_sample.py +27 -0
  172. lightly_studio/resolvers/metadata_resolver/sample/set_value_for_sample.py +53 -0
  173. lightly_studio/resolvers/sample_embedding_resolver.py +86 -0
  174. lightly_studio/resolvers/sample_resolver.py +249 -0
  175. lightly_studio/resolvers/samples_filter.py +81 -0
  176. lightly_studio/resolvers/settings_resolver.py +58 -0
  177. lightly_studio/resolvers/tag_resolver.py +276 -0
  178. lightly_studio/selection/README.md +6 -0
  179. lightly_studio/selection/mundig.py +105 -0
  180. lightly_studio/selection/select.py +96 -0
  181. lightly_studio/selection/select_via_db.py +93 -0
  182. lightly_studio/selection/selection_config.py +31 -0
  183. lightly_studio/services/annotations_service/__init__.py +21 -0
  184. lightly_studio/services/annotations_service/get_annotation_by_id.py +31 -0
  185. lightly_studio/services/annotations_service/update_annotation.py +65 -0
  186. lightly_studio/services/annotations_service/update_annotation_label.py +48 -0
  187. lightly_studio/services/annotations_service/update_annotations.py +29 -0
  188. lightly_studio/setup_logging.py +19 -0
  189. lightly_studio/type_definitions.py +19 -0
  190. lightly_studio/vendor/ACKNOWLEDGEMENTS +422 -0
  191. lightly_studio/vendor/LICENSE +31 -0
  192. lightly_studio/vendor/LICENSE_weights_data +50 -0
  193. lightly_studio/vendor/README.md +5 -0
  194. lightly_studio/vendor/__init__.py +1 -0
  195. lightly_studio/vendor/mobileclip/__init__.py +96 -0
  196. lightly_studio/vendor/mobileclip/clip.py +77 -0
  197. lightly_studio/vendor/mobileclip/configs/mobileclip_b.json +18 -0
  198. lightly_studio/vendor/mobileclip/configs/mobileclip_s0.json +18 -0
  199. lightly_studio/vendor/mobileclip/configs/mobileclip_s1.json +18 -0
  200. lightly_studio/vendor/mobileclip/configs/mobileclip_s2.json +18 -0
  201. lightly_studio/vendor/mobileclip/image_encoder.py +67 -0
  202. lightly_studio/vendor/mobileclip/logger.py +154 -0
  203. lightly_studio/vendor/mobileclip/models/__init__.py +10 -0
  204. lightly_studio/vendor/mobileclip/models/mci.py +933 -0
  205. lightly_studio/vendor/mobileclip/models/vit.py +433 -0
  206. lightly_studio/vendor/mobileclip/modules/__init__.py +4 -0
  207. lightly_studio/vendor/mobileclip/modules/common/__init__.py +4 -0
  208. lightly_studio/vendor/mobileclip/modules/common/mobileone.py +341 -0
  209. lightly_studio/vendor/mobileclip/modules/common/transformer.py +451 -0
  210. lightly_studio/vendor/mobileclip/modules/image/__init__.py +4 -0
  211. lightly_studio/vendor/mobileclip/modules/image/image_projection.py +113 -0
  212. lightly_studio/vendor/mobileclip/modules/image/replknet.py +188 -0
  213. lightly_studio/vendor/mobileclip/modules/text/__init__.py +4 -0
  214. lightly_studio/vendor/mobileclip/modules/text/repmixer.py +281 -0
  215. lightly_studio/vendor/mobileclip/modules/text/tokenizer.py +38 -0
  216. lightly_studio/vendor/mobileclip/text_encoder.py +245 -0
  217. lightly_studio-0.3.1.dist-info/METADATA +520 -0
  218. lightly_studio-0.3.1.dist-info/RECORD +219 -0
  219. lightly_studio-0.3.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,622 @@
1
+ """Dataset functionality module."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Iterator
6
+ from dataclasses import dataclass
7
+ from pathlib import Path
8
+ from typing import Iterable
9
+ from uuid import UUID
10
+
11
+ import PIL
12
+ from labelformat.formats import (
13
+ COCOInstanceSegmentationInput,
14
+ COCOObjectDetectionInput,
15
+ YOLOv8ObjectDetectionInput,
16
+ )
17
+ from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
18
+ from labelformat.model.bounding_box import BoundingBoxFormat
19
+ from labelformat.model.image import Image
20
+ from labelformat.model.instance_segmentation import (
21
+ ImageInstanceSegmentation,
22
+ InstanceSegmentationInput,
23
+ )
24
+ from labelformat.model.multipolygon import MultiPolygon
25
+ from labelformat.model.object_detection import (
26
+ ImageObjectDetection,
27
+ ObjectDetectionInput,
28
+ )
29
+ from sqlmodel import Session
30
+ from tqdm import tqdm
31
+
32
+ from lightly_studio.api.db import db_manager
33
+ from lightly_studio.api.features import lightly_studio_active_features
34
+ from lightly_studio.api.server import Server
35
+ from lightly_studio.dataset import env
36
+ from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
37
+ from lightly_studio.dataset.embedding_manager import (
38
+ EmbeddingManager,
39
+ EmbeddingManagerProvider,
40
+ )
41
+ from lightly_studio.models.annotation.annotation_base import AnnotationCreate
42
+ from lightly_studio.models.annotation_label import AnnotationLabelCreate
43
+ from lightly_studio.models.annotation_task import (
44
+ AnnotationTaskTable,
45
+ AnnotationType,
46
+ )
47
+ from lightly_studio.models.dataset import DatasetCreate, DatasetTable
48
+ from lightly_studio.models.sample import SampleCreate, SampleTable
49
+ from lightly_studio.resolvers import (
50
+ annotation_label_resolver,
51
+ annotation_resolver,
52
+ annotation_task_resolver,
53
+ dataset_resolver,
54
+ sample_resolver,
55
+ )
56
+
57
+ # Constants
58
+ ANNOTATION_BATCH_SIZE = 64 # Number of annotations to process in a single batch
59
+ SAMPLE_BATCH_SIZE = 32 # Number of samples to process in a single batch
60
+
61
+
62
+ @dataclass
63
+ class AnnotationProcessingContext:
64
+ """Context for processing annotations for a single sample."""
65
+
66
+ dataset_id: UUID
67
+ sample_id: UUID
68
+ label_map: dict[int, UUID]
69
+ annotation_task_id: UUID
70
+
71
+
72
+ class DatasetLoader:
73
+ """Class responsible for loading datasets from various sources."""
74
+
75
+ def __init__(self) -> None:
76
+ """Initialize the dataset loader."""
77
+ self.session = db_manager.persistent_session()
78
+ self.embedding_manager = EmbeddingManagerProvider.get_embedding_manager()
79
+
80
+ def _load_into_dataset(
81
+ self,
82
+ dataset: DatasetTable,
83
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
84
+ img_dir: Path,
85
+ annotation_task_id: UUID,
86
+ ) -> None:
87
+ """Store a loaded dataset in database."""
88
+ # Create label mapping
89
+ label_map = _create_label_map(session=self.session, input_labels=input_labels)
90
+
91
+ annotations_to_create: list[AnnotationCreate] = []
92
+ sample_ids: list[UUID] = []
93
+ samples_to_create: list[SampleCreate] = []
94
+ samples_image_data: list[
95
+ tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]
96
+ ] = []
97
+
98
+ for image_data in tqdm(input_labels.get_labels(), desc="Processing images", unit=" images"):
99
+ image: Image = image_data.image # type: ignore[attr-defined]
100
+
101
+ typed_image_data: ImageInstanceSegmentation | ImageObjectDetection = image_data # type: ignore[assignment]
102
+ sample = SampleCreate(
103
+ file_name=str(image.filename),
104
+ file_path_abs=str(img_dir / image.filename),
105
+ width=image.width,
106
+ height=image.height,
107
+ dataset_id=dataset.dataset_id,
108
+ )
109
+ samples_to_create.append(sample)
110
+ samples_image_data.append((sample, typed_image_data))
111
+
112
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
113
+ stored_samples = sample_resolver.create_many(
114
+ session=self.session, samples=samples_to_create
115
+ )
116
+ _process_batch_annotations(
117
+ session=self.session,
118
+ stored_samples=stored_samples,
119
+ samples_data=samples_image_data,
120
+ dataset_id=dataset.dataset_id,
121
+ label_map=label_map,
122
+ annotation_task_id=annotation_task_id,
123
+ annotations_to_create=annotations_to_create,
124
+ sample_ids=sample_ids,
125
+ )
126
+ samples_to_create.clear()
127
+ samples_image_data.clear()
128
+
129
+ if samples_to_create:
130
+ stored_samples = sample_resolver.create_many(
131
+ session=self.session, samples=samples_to_create
132
+ )
133
+ _process_batch_annotations(
134
+ session=self.session,
135
+ stored_samples=stored_samples,
136
+ samples_data=samples_image_data,
137
+ dataset_id=dataset.dataset_id,
138
+ label_map=label_map,
139
+ annotation_task_id=annotation_task_id,
140
+ annotations_to_create=annotations_to_create,
141
+ sample_ids=sample_ids,
142
+ )
143
+
144
+ # Insert any remaining annotations
145
+ if annotations_to_create:
146
+ annotation_resolver.create_many(session=self.session, annotations=annotations_to_create)
147
+
148
+ # Generate embeddings for the dataset.
149
+ _generate_embeddings(
150
+ session=self.session,
151
+ embedding_manager=self.embedding_manager,
152
+ dataset_id=dataset.dataset_id,
153
+ sample_ids=sample_ids,
154
+ )
155
+
156
+ def from_yolo(
157
+ self,
158
+ data_yaml_path: str,
159
+ input_split: str = "train",
160
+ task_name: str | None = None,
161
+ ) -> DatasetTable:
162
+ """Load a dataset in YOLO format and store in DB.
163
+
164
+ Args:
165
+ data_yaml_path: Path to the YOLO data.yaml file.
166
+ input_split: The split to load (e.g., 'train', 'val').
167
+ task_name: Optional name for the annotation task. If None, a
168
+ default name is generated.
169
+
170
+ Returns:
171
+ DatasetTable: The created dataset table entry.
172
+ """
173
+ data_yaml = Path(data_yaml_path).absolute()
174
+ dataset_name = data_yaml.parent.name
175
+
176
+ if task_name is None:
177
+ task_name = f"Loaded from YOLO: {data_yaml.name} ({input_split} split)"
178
+
179
+ # Load the dataset using labelformat.
180
+ label_input = YOLOv8ObjectDetectionInput(
181
+ input_file=data_yaml,
182
+ input_split=input_split,
183
+ )
184
+ img_dir = label_input._images_dir() # noqa: SLF001
185
+
186
+ return self.from_labelformat(
187
+ input_labels=label_input,
188
+ dataset_name=dataset_name,
189
+ img_dir=str(img_dir),
190
+ is_prediction=False,
191
+ task_name=task_name,
192
+ )
193
+
194
+ def from_coco_object_detections(
195
+ self,
196
+ annotations_json_path: str,
197
+ img_dir: str,
198
+ task_name: str | None = None,
199
+ ) -> DatasetTable:
200
+ """Load a dataset in COCO Object Detection format and store in DB.
201
+
202
+ Args:
203
+ annotations_json_path: Path to the COCO annotations JSON file.
204
+ img_dir: Path to the folder containing the images.
205
+ task_name: Optional name for the annotation task. If None, a
206
+ default name is generated.
207
+
208
+ Returns:
209
+ DatasetTable: The created dataset table entry.
210
+ """
211
+ annotations_json = Path(annotations_json_path)
212
+ dataset_name = annotations_json.parent.name
213
+
214
+ if task_name is None:
215
+ task_name = f"Loaded from COCO Object Detection: {annotations_json.name}"
216
+
217
+ label_input = COCOObjectDetectionInput(
218
+ input_file=annotations_json,
219
+ )
220
+ img_dir_path = Path(img_dir).absolute()
221
+
222
+ return self.from_labelformat(
223
+ input_labels=label_input,
224
+ dataset_name=dataset_name,
225
+ img_dir=str(img_dir_path),
226
+ is_prediction=False,
227
+ task_name=task_name,
228
+ )
229
+
230
+ def from_coco_instance_segmentations(
231
+ self,
232
+ annotations_json_path: str,
233
+ img_dir: str,
234
+ task_name: str | None = None,
235
+ ) -> DatasetTable:
236
+ """Load a dataset in COCO Instance Segmentation format and store in DB.
237
+
238
+ Args:
239
+ annotations_json_path: Path to the COCO annotations JSON file.
240
+ img_dir: Path to the folder containing the images.
241
+ task_name: Optional name for the annotation task. If None, a
242
+ default name is generated.
243
+
244
+ Returns:
245
+ DatasetTable: The created dataset table entry.
246
+ """
247
+ annotations_json = Path(annotations_json_path)
248
+ dataset_name = annotations_json.parent.name
249
+
250
+ if task_name is None:
251
+ task_name = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
252
+
253
+ label_input = COCOInstanceSegmentationInput(
254
+ input_file=annotations_json,
255
+ )
256
+ img_dir_path = Path(img_dir).absolute()
257
+
258
+ return self.from_labelformat(
259
+ input_labels=label_input,
260
+ dataset_name=dataset_name,
261
+ img_dir=str(img_dir_path),
262
+ is_prediction=False,
263
+ task_name=task_name,
264
+ )
265
+
266
+ def from_labelformat(
267
+ self,
268
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
269
+ dataset_name: str,
270
+ img_dir: str,
271
+ is_prediction: bool = True,
272
+ task_name: str | None = None,
273
+ ) -> DatasetTable:
274
+ """Load a dataset from a labelformat object and store in database.
275
+
276
+ Args:
277
+ input_labels: The labelformat input object.
278
+ dataset_name: The name for the new dataset.
279
+ img_dir: Path to the folder containing the images.
280
+ is_prediction: Whether the task is for prediction or labels.
281
+ task_name: Optional name for the annotation task. If None, a
282
+ default name is generated.
283
+
284
+ Returns:
285
+ DatasetTable: The created dataset table entry.
286
+ """
287
+ img_dir_path = Path(img_dir).absolute()
288
+
289
+ # Determine annotation type based on input.
290
+ # Currently, we always create BBOX tasks, even for segmentation,
291
+ # as segmentation data is stored alongside bounding boxes.
292
+ annotation_type = AnnotationType.BBOX
293
+
294
+ # Generate a default task name if none is provided.
295
+ if task_name is None:
296
+ task_name = f"Loaded from labelformat: {dataset_name}"
297
+
298
+ # Create dataset and annotation task.
299
+ dataset = dataset_resolver.create(
300
+ session=self.session,
301
+ dataset=DatasetCreate(
302
+ name=dataset_name,
303
+ directory=str(img_dir_path),
304
+ ),
305
+ )
306
+ new_annotation_task = annotation_task_resolver.create(
307
+ session=self.session,
308
+ annotation_task=AnnotationTaskTable(
309
+ name=task_name,
310
+ annotation_type=annotation_type,
311
+ is_prediction=is_prediction,
312
+ ),
313
+ )
314
+
315
+ self._load_into_dataset(
316
+ dataset=dataset,
317
+ input_labels=input_labels,
318
+ img_dir=img_dir_path,
319
+ annotation_task_id=new_annotation_task.annotation_task_id,
320
+ )
321
+ return dataset
322
+
323
+ def from_directory(
324
+ self,
325
+ dataset_name: str,
326
+ img_dir: str,
327
+ recursive: bool = True,
328
+ allowed_extensions: Iterable[str] = {
329
+ ".png",
330
+ ".jpg",
331
+ ".jpeg",
332
+ ".gif",
333
+ ".webp",
334
+ ".bmp",
335
+ ".tiff",
336
+ },
337
+ ) -> DatasetTable:
338
+ """Load a dataset from a folder of images and store in database.
339
+
340
+ Args:
341
+ dataset_name: The name for the new dataset.
342
+ img_dir: Path to the folder containing the images.
343
+ recursive: If True, search for images recursively in subfolders.
344
+ allowed_extensions: An iterable container of allowed image file
345
+ extensions.
346
+ """
347
+ img_dir_path = Path(img_dir).absolute()
348
+ if not img_dir_path.exists() or not img_dir_path.is_dir():
349
+ raise ValueError(f"Input images folder is not a valid directory: {img_dir_path}")
350
+
351
+ # Create dataset.
352
+ dataset = dataset_resolver.create(
353
+ session=self.session,
354
+ dataset=DatasetCreate(
355
+ name=dataset_name,
356
+ directory=str(img_dir_path),
357
+ ),
358
+ )
359
+
360
+ # Collect image file paths.
361
+ allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
362
+ image_paths = []
363
+ path_iter = img_dir_path.rglob("*") if recursive else img_dir_path.glob("*")
364
+ for path in path_iter:
365
+ if path.is_file() and path.suffix.lower() in allowed_extensions_set:
366
+ image_paths.append(path)
367
+ print(f"Found {len(image_paths)} images in {img_dir_path}.")
368
+
369
+ # Process images.
370
+ sample_ids = _create_samples_from_paths(
371
+ session=self.session,
372
+ dataset_id=dataset.dataset_id,
373
+ image_paths=image_paths,
374
+ )
375
+
376
+ # Generate embeddings for the dataset.
377
+ _generate_embeddings(
378
+ session=self.session,
379
+ embedding_manager=self.embedding_manager,
380
+ dataset_id=dataset.dataset_id,
381
+ sample_ids=list(sample_ids),
382
+ )
383
+
384
+ return dataset
385
+
386
+ def start_gui(self) -> None:
387
+ """Launch the web interface for the loaded dataset."""
388
+ server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
389
+
390
+ print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
391
+
392
+ server.start()
393
+
394
+
395
+ def _create_samples_from_paths(
396
+ session: Session,
397
+ dataset_id: UUID,
398
+ image_paths: Iterable[Path],
399
+ ) -> Iterator[UUID]:
400
+ """Create samples from a list of image paths.
401
+
402
+ Args:
403
+ session: The database session to use.
404
+ dataset_id: The ID of the dataset to which samples belong.
405
+ image_paths: An iterable of image file paths.
406
+
407
+ Yields:
408
+ UUIDs of created sample records.
409
+ """
410
+ samples_to_create: list[SampleCreate] = []
411
+
412
+ for image_path in tqdm(
413
+ image_paths,
414
+ desc="Processing images",
415
+ unit=" images",
416
+ ):
417
+ try:
418
+ image = PIL.Image.open(image_path)
419
+ width, height = image.size
420
+ image.close()
421
+ except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
422
+ continue
423
+
424
+ sample = SampleCreate(
425
+ file_name=image_path.name,
426
+ file_path_abs=str(image_path),
427
+ width=width,
428
+ height=height,
429
+ dataset_id=dataset_id,
430
+ )
431
+ samples_to_create.append(sample)
432
+
433
+ # Process batch when it reaches SAMPLE_BATCH_SIZE
434
+ if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
435
+ stored_samples = sample_resolver.create_many(session=session, samples=samples_to_create)
436
+ for stored_sample in stored_samples:
437
+ yield stored_sample.sample_id
438
+ samples_to_create = []
439
+
440
+ # Handle remaining samples
441
+ if samples_to_create:
442
+ stored_samples = sample_resolver.create_many(session=session, samples=samples_to_create)
443
+ for stored_sample in stored_samples:
444
+ yield stored_sample.sample_id
445
+
446
+
447
+ def _create_label_map(
448
+ session: Session,
449
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
450
+ ) -> dict[int, UUID]:
451
+ """Create a mapping of category IDs to annotation label IDs."""
452
+ label_map = {}
453
+ for category in tqdm(
454
+ input_labels.get_categories(),
455
+ desc="Processing categories",
456
+ unit=" categories",
457
+ ):
458
+ label = AnnotationLabelCreate(annotation_label_name=category.name)
459
+ stored_label = annotation_label_resolver.create(session=session, label=label)
460
+ label_map[category.id] = stored_label.annotation_label_id
461
+ return label_map
462
+
463
+
464
+ def _process_object_detection_annotations(
465
+ context: AnnotationProcessingContext,
466
+ image_data: ImageObjectDetection,
467
+ ) -> list[AnnotationCreate]:
468
+ """Process object detection annotations for a single image."""
469
+ new_annotations = []
470
+ for obj in image_data.objects:
471
+ box = obj.box.to_format(BoundingBoxFormat.XYWH)
472
+ x, y, width, height = box
473
+
474
+ new_annotations.append(
475
+ AnnotationCreate(
476
+ dataset_id=context.dataset_id,
477
+ sample_id=context.sample_id,
478
+ annotation_label_id=context.label_map[obj.category.id],
479
+ annotation_type="object_detection",
480
+ x=x,
481
+ y=y,
482
+ width=width,
483
+ height=height,
484
+ confidence=obj.confidence,
485
+ annotation_task_id=context.annotation_task_id,
486
+ )
487
+ )
488
+ return new_annotations
489
+
490
+
491
+ def _process_instance_segmentation_annotations(
492
+ context: AnnotationProcessingContext,
493
+ image_data: ImageInstanceSegmentation,
494
+ ) -> list[AnnotationCreate]:
495
+ """Process instance segmentation annotations for a single image."""
496
+ new_annotations = []
497
+ for obj in image_data.objects:
498
+ segmentation_rle: None | list[int] = None
499
+ if isinstance(obj.segmentation, MultiPolygon):
500
+ box = obj.segmentation.bounding_box().to_format(BoundingBoxFormat.XYWH)
501
+ elif isinstance(obj.segmentation, BinaryMaskSegmentation):
502
+ box = obj.segmentation.bounding_box.to_format(BoundingBoxFormat.XYWH)
503
+ segmentation_rle = obj.segmentation._rle_row_wise # noqa: SLF001
504
+ else:
505
+ raise ValueError(f"Unsupported segmentation type: {type(obj.segmentation)}")
506
+
507
+ x, y, width, height = box
508
+
509
+ new_annotations.append(
510
+ AnnotationCreate(
511
+ dataset_id=context.dataset_id,
512
+ sample_id=context.sample_id,
513
+ annotation_label_id=context.label_map[obj.category.id],
514
+ annotation_type="instance_segmentation",
515
+ x=x,
516
+ y=y,
517
+ width=width,
518
+ height=height,
519
+ segmentation_mask=segmentation_rle,
520
+ annotation_task_id=context.annotation_task_id,
521
+ )
522
+ )
523
+ return new_annotations
524
+
525
+
526
+ def _process_batch_annotations( # noqa: PLR0913
527
+ session: Session,
528
+ stored_samples: list[SampleTable],
529
+ samples_data: list[tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]],
530
+ dataset_id: UUID,
531
+ label_map: dict[int, UUID],
532
+ annotation_task_id: UUID,
533
+ annotations_to_create: list[AnnotationCreate],
534
+ sample_ids: list[UUID],
535
+ ) -> None:
536
+ """Process annotations for a batch of samples."""
537
+ for stored_sample, (_, img_data) in zip(stored_samples, samples_data):
538
+ sample_ids.append(stored_sample.sample_id)
539
+
540
+ context = AnnotationProcessingContext(
541
+ dataset_id=dataset_id,
542
+ sample_id=stored_sample.sample_id,
543
+ label_map=label_map,
544
+ annotation_task_id=annotation_task_id,
545
+ )
546
+
547
+ if isinstance(img_data, ImageInstanceSegmentation):
548
+ new_annotations = _process_instance_segmentation_annotations(
549
+ context=context, image_data=img_data
550
+ )
551
+ elif isinstance(img_data, ImageObjectDetection):
552
+ new_annotations = _process_object_detection_annotations(
553
+ context=context, image_data=img_data
554
+ )
555
+ else:
556
+ raise ValueError(f"Unsupported annotation type: {type(img_data)}")
557
+
558
+ annotations_to_create.extend(new_annotations)
559
+
560
+ if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
561
+ annotation_resolver.create_many(session=session, annotations=annotations_to_create)
562
+ annotations_to_create.clear()
563
+
564
+
565
+ def _generate_embeddings(
566
+ session: Session,
567
+ embedding_manager: EmbeddingManager,
568
+ dataset_id: UUID,
569
+ sample_ids: list[UUID],
570
+ ) -> None:
571
+ """Generate embeddings for the dataset."""
572
+ # Load an embedding generator and register the model.
573
+ embedding_generator = _load_embedding_generator()
574
+
575
+ if embedding_generator:
576
+ lightly_studio_active_features.append("embeddingSearchEnabled")
577
+ embedding_model = embedding_manager.register_embedding_model(
578
+ session=session,
579
+ dataset_id=dataset_id,
580
+ embedding_generator=embedding_generator,
581
+ )
582
+ embedding_manager.embed_images(
583
+ session=session,
584
+ sample_ids=sample_ids,
585
+ embedding_model_id=embedding_model.embedding_model_id,
586
+ )
587
+
588
+
589
+ def _load_embedding_generator() -> EmbeddingGenerator | None:
590
+ """Load the embedding generator.
591
+
592
+ Use MobileCLIP if its dependencies have been installed,
593
+ otherwise return None.
594
+ """
595
+ if env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "EDGE":
596
+ try:
597
+ from lightly_studio.dataset.edge_embedding_generator import (
598
+ EdgeSDKEmbeddingGenerator,
599
+ )
600
+
601
+ print("Using LightlyEdge embedding generator.")
602
+ return EdgeSDKEmbeddingGenerator(model_path=env.LIGHTLY_STUDIO_EDGE_MODEL_FILE_PATH)
603
+ except ImportError:
604
+ print("Embedding functionality is disabled.")
605
+ return None
606
+ elif env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "MOBILE_CLIP":
607
+ try:
608
+ from lightly_studio.dataset.mobileclip_embedding_generator import (
609
+ MobileCLIPEmbeddingGenerator,
610
+ )
611
+
612
+ print("Using MobileCLIP embedding generator.")
613
+ return MobileCLIPEmbeddingGenerator()
614
+ except ImportError:
615
+ print("Embedding functionality is disabled.")
616
+ return None
617
+ else:
618
+ print(
619
+ f"Unsupported model type: '{env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE}'",
620
+ )
621
+ print("Embedding functionality is disabled.")
622
+ return None