lightly-studio 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (219) hide show
  1. lightly_studio/__init__.py +11 -0
  2. lightly_studio/api/__init__.py +0 -0
  3. lightly_studio/api/app.py +110 -0
  4. lightly_studio/api/cache.py +77 -0
  5. lightly_studio/api/db.py +133 -0
  6. lightly_studio/api/db_tables.py +32 -0
  7. lightly_studio/api/features.py +7 -0
  8. lightly_studio/api/routes/api/annotation.py +233 -0
  9. lightly_studio/api/routes/api/annotation_label.py +90 -0
  10. lightly_studio/api/routes/api/annotation_task.py +38 -0
  11. lightly_studio/api/routes/api/classifier.py +387 -0
  12. lightly_studio/api/routes/api/dataset.py +182 -0
  13. lightly_studio/api/routes/api/dataset_tag.py +257 -0
  14. lightly_studio/api/routes/api/exceptions.py +96 -0
  15. lightly_studio/api/routes/api/features.py +17 -0
  16. lightly_studio/api/routes/api/metadata.py +37 -0
  17. lightly_studio/api/routes/api/metrics.py +80 -0
  18. lightly_studio/api/routes/api/sample.py +196 -0
  19. lightly_studio/api/routes/api/settings.py +45 -0
  20. lightly_studio/api/routes/api/status.py +19 -0
  21. lightly_studio/api/routes/api/text_embedding.py +48 -0
  22. lightly_studio/api/routes/api/validators.py +17 -0
  23. lightly_studio/api/routes/healthz.py +13 -0
  24. lightly_studio/api/routes/images.py +104 -0
  25. lightly_studio/api/routes/webapp.py +51 -0
  26. lightly_studio/api/server.py +82 -0
  27. lightly_studio/core/__init__.py +0 -0
  28. lightly_studio/core/dataset.py +523 -0
  29. lightly_studio/core/sample.py +77 -0
  30. lightly_studio/core/start_gui.py +15 -0
  31. lightly_studio/dataset/__init__.py +0 -0
  32. lightly_studio/dataset/edge_embedding_generator.py +144 -0
  33. lightly_studio/dataset/embedding_generator.py +91 -0
  34. lightly_studio/dataset/embedding_manager.py +163 -0
  35. lightly_studio/dataset/env.py +16 -0
  36. lightly_studio/dataset/file_utils.py +35 -0
  37. lightly_studio/dataset/loader.py +622 -0
  38. lightly_studio/dataset/mobileclip_embedding_generator.py +144 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/env.js +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/LightlyLogo.BNjCIww-.png +0 -0
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans- +0 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Bold.DGvYQtcs.ttf +0 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Italic-VariableFont_wdth_wght.B4AZ-wl6.ttf +0 -0
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Regular.DxJTClRG.ttf +0 -0
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-SemiBold.D3TTYgdB.ttf +0 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-VariableFont_wdth_wght.BZBpG5Iz.ttf +0 -0
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +1 -0
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/useFeatureFlags.CV-KWLNP.css +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/69_IOA4Y.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B90CZVMX.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +1 -0
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +1 -0
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bsi3UGy5.js +1 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bu7uvVrG.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BylOuP6i.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C8I8rFJQ.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CDnpyLsT.js +1 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWj6FrbW.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CYgJF_JY.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +93 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +3 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +4 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6su9Aln.js +1 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +1 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIeogL5L.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DOlTMNyt.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjfY96ND.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/H7C68rOM.js +1 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/O-EABkf9.js +1 -0
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +1 -0
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/l7KrR96u.js +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +1 -0
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/x9G_hzyY.js +1 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +2 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +1 -0
  88. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +1 -0
  89. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.B4rNYwVp.js +1 -0
  90. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +1 -0
  91. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/11.CWG1ehzT.js +1 -0
  92. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +1 -0
  93. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +6 -0
  94. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/3.CWHpKonm.js +1 -0
  95. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/4.OUWOLQeV.js +1 -0
  96. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +1 -0
  97. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +1 -0
  98. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +1 -0
  99. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +1 -0
  100. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.CPu3CiBc.js +1 -0
  101. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -0
  102. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon-precomposed.png +0 -0
  103. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon.png +0 -0
  104. lightly_studio/dist_lightly_studio_view_app/favicon.png +0 -0
  105. lightly_studio/dist_lightly_studio_view_app/index.html +44 -0
  106. lightly_studio/examples/example.py +23 -0
  107. lightly_studio/examples/example_metadata.py +338 -0
  108. lightly_studio/examples/example_selection.py +39 -0
  109. lightly_studio/examples/example_split_work.py +67 -0
  110. lightly_studio/examples/example_v2.py +21 -0
  111. lightly_studio/export_schema.py +18 -0
  112. lightly_studio/few_shot_classifier/__init__.py +0 -0
  113. lightly_studio/few_shot_classifier/classifier.py +80 -0
  114. lightly_studio/few_shot_classifier/classifier_manager.py +663 -0
  115. lightly_studio/few_shot_classifier/random_forest_classifier.py +489 -0
  116. lightly_studio/metadata/complex_metadata.py +47 -0
  117. lightly_studio/metadata/gps_coordinate.py +41 -0
  118. lightly_studio/metadata/metadata_protocol.py +17 -0
  119. lightly_studio/metrics/__init__.py +0 -0
  120. lightly_studio/metrics/detection/__init__.py +0 -0
  121. lightly_studio/metrics/detection/map.py +268 -0
  122. lightly_studio/models/__init__.py +1 -0
  123. lightly_studio/models/annotation/__init__.py +0 -0
  124. lightly_studio/models/annotation/annotation_base.py +171 -0
  125. lightly_studio/models/annotation/instance_segmentation.py +56 -0
  126. lightly_studio/models/annotation/links.py +17 -0
  127. lightly_studio/models/annotation/object_detection.py +47 -0
  128. lightly_studio/models/annotation/semantic_segmentation.py +44 -0
  129. lightly_studio/models/annotation_label.py +47 -0
  130. lightly_studio/models/annotation_task.py +28 -0
  131. lightly_studio/models/classifier.py +20 -0
  132. lightly_studio/models/dataset.py +84 -0
  133. lightly_studio/models/embedding_model.py +30 -0
  134. lightly_studio/models/metadata.py +208 -0
  135. lightly_studio/models/sample.py +180 -0
  136. lightly_studio/models/sample_embedding.py +37 -0
  137. lightly_studio/models/settings.py +60 -0
  138. lightly_studio/models/tag.py +96 -0
  139. lightly_studio/py.typed +0 -0
  140. lightly_studio/resolvers/__init__.py +7 -0
  141. lightly_studio/resolvers/annotation_label_resolver/__init__.py +21 -0
  142. lightly_studio/resolvers/annotation_label_resolver/create.py +27 -0
  143. lightly_studio/resolvers/annotation_label_resolver/delete.py +28 -0
  144. lightly_studio/resolvers/annotation_label_resolver/get_all.py +22 -0
  145. lightly_studio/resolvers/annotation_label_resolver/get_by_id.py +24 -0
  146. lightly_studio/resolvers/annotation_label_resolver/get_by_ids.py +25 -0
  147. lightly_studio/resolvers/annotation_label_resolver/get_by_label_name.py +24 -0
  148. lightly_studio/resolvers/annotation_label_resolver/names_by_ids.py +25 -0
  149. lightly_studio/resolvers/annotation_label_resolver/update.py +38 -0
  150. lightly_studio/resolvers/annotation_resolver/__init__.py +33 -0
  151. lightly_studio/resolvers/annotation_resolver/count_annotations_by_dataset.py +120 -0
  152. lightly_studio/resolvers/annotation_resolver/create.py +19 -0
  153. lightly_studio/resolvers/annotation_resolver/create_many.py +96 -0
  154. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +45 -0
  155. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +56 -0
  156. lightly_studio/resolvers/annotation_resolver/get_all.py +74 -0
  157. lightly_studio/resolvers/annotation_resolver/get_by_id.py +18 -0
  158. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +144 -0
  159. lightly_studio/resolvers/annotation_resolver/update_bounding_box.py +68 -0
  160. lightly_studio/resolvers/annotation_task_resolver.py +31 -0
  161. lightly_studio/resolvers/annotations/__init__.py +1 -0
  162. lightly_studio/resolvers/annotations/annotations_filter.py +89 -0
  163. lightly_studio/resolvers/dataset_resolver.py +278 -0
  164. lightly_studio/resolvers/embedding_model_resolver.py +100 -0
  165. lightly_studio/resolvers/metadata_resolver/__init__.py +15 -0
  166. lightly_studio/resolvers/metadata_resolver/metadata_filter.py +163 -0
  167. lightly_studio/resolvers/metadata_resolver/sample/__init__.py +21 -0
  168. lightly_studio/resolvers/metadata_resolver/sample/bulk_set_metadata.py +48 -0
  169. lightly_studio/resolvers/metadata_resolver/sample/get_by_sample_id.py +24 -0
  170. lightly_studio/resolvers/metadata_resolver/sample/get_metadata_info.py +104 -0
  171. lightly_studio/resolvers/metadata_resolver/sample/get_value_for_sample.py +27 -0
  172. lightly_studio/resolvers/metadata_resolver/sample/set_value_for_sample.py +53 -0
  173. lightly_studio/resolvers/sample_embedding_resolver.py +86 -0
  174. lightly_studio/resolvers/sample_resolver.py +249 -0
  175. lightly_studio/resolvers/samples_filter.py +81 -0
  176. lightly_studio/resolvers/settings_resolver.py +58 -0
  177. lightly_studio/resolvers/tag_resolver.py +276 -0
  178. lightly_studio/selection/README.md +6 -0
  179. lightly_studio/selection/mundig.py +105 -0
  180. lightly_studio/selection/select.py +96 -0
  181. lightly_studio/selection/select_via_db.py +93 -0
  182. lightly_studio/selection/selection_config.py +31 -0
  183. lightly_studio/services/annotations_service/__init__.py +21 -0
  184. lightly_studio/services/annotations_service/get_annotation_by_id.py +31 -0
  185. lightly_studio/services/annotations_service/update_annotation.py +65 -0
  186. lightly_studio/services/annotations_service/update_annotation_label.py +48 -0
  187. lightly_studio/services/annotations_service/update_annotations.py +29 -0
  188. lightly_studio/setup_logging.py +19 -0
  189. lightly_studio/type_definitions.py +19 -0
  190. lightly_studio/vendor/ACKNOWLEDGEMENTS +422 -0
  191. lightly_studio/vendor/LICENSE +31 -0
  192. lightly_studio/vendor/LICENSE_weights_data +50 -0
  193. lightly_studio/vendor/README.md +5 -0
  194. lightly_studio/vendor/__init__.py +1 -0
  195. lightly_studio/vendor/mobileclip/__init__.py +96 -0
  196. lightly_studio/vendor/mobileclip/clip.py +77 -0
  197. lightly_studio/vendor/mobileclip/configs/mobileclip_b.json +18 -0
  198. lightly_studio/vendor/mobileclip/configs/mobileclip_s0.json +18 -0
  199. lightly_studio/vendor/mobileclip/configs/mobileclip_s1.json +18 -0
  200. lightly_studio/vendor/mobileclip/configs/mobileclip_s2.json +18 -0
  201. lightly_studio/vendor/mobileclip/image_encoder.py +67 -0
  202. lightly_studio/vendor/mobileclip/logger.py +154 -0
  203. lightly_studio/vendor/mobileclip/models/__init__.py +10 -0
  204. lightly_studio/vendor/mobileclip/models/mci.py +933 -0
  205. lightly_studio/vendor/mobileclip/models/vit.py +433 -0
  206. lightly_studio/vendor/mobileclip/modules/__init__.py +4 -0
  207. lightly_studio/vendor/mobileclip/modules/common/__init__.py +4 -0
  208. lightly_studio/vendor/mobileclip/modules/common/mobileone.py +341 -0
  209. lightly_studio/vendor/mobileclip/modules/common/transformer.py +451 -0
  210. lightly_studio/vendor/mobileclip/modules/image/__init__.py +4 -0
  211. lightly_studio/vendor/mobileclip/modules/image/image_projection.py +113 -0
  212. lightly_studio/vendor/mobileclip/modules/image/replknet.py +188 -0
  213. lightly_studio/vendor/mobileclip/modules/text/__init__.py +4 -0
  214. lightly_studio/vendor/mobileclip/modules/text/repmixer.py +281 -0
  215. lightly_studio/vendor/mobileclip/modules/text/tokenizer.py +38 -0
  216. lightly_studio/vendor/mobileclip/text_encoder.py +245 -0
  217. lightly_studio-0.3.1.dist-info/METADATA +520 -0
  218. lightly_studio-0.3.1.dist-info/RECORD +219 -0
  219. lightly_studio-0.3.1.dist-info/WHEEL +4 -0
@@ -0,0 +1,144 @@
1
+ """EdgeCLIP embedding generator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+ from pathlib import Path
7
+ from typing import Tuple
8
+ from uuid import UUID
9
+
10
+ import cv2
11
+ from lightly_edge_sdk import (
12
+ InferenceDeviceType,
13
+ LightlyEdge,
14
+ LightlyEdgeConfig,
15
+ LightlyEdgeDetectorConfig,
16
+ )
17
+ from torch.utils.data import DataLoader, Dataset
18
+ from tqdm import tqdm
19
+
20
+ from lightly_studio.models.embedding_model import EmbeddingModelCreate
21
+
22
+ from .embedding_generator import EmbeddingGenerator
23
+
24
+ MAX_BATCH_SIZE: int = 1
25
+
26
+
27
+ class _ImageFileDatasetEdge(Dataset[Tuple[bytes, int, int]]):
28
+ """Dataset wrapping image file paths for processing."""
29
+
30
+ def __init__(
31
+ self,
32
+ filepaths: Sequence[Path],
33
+ ) -> None:
34
+ self.filepaths = filepaths
35
+
36
+ def __len__(self) -> int:
37
+ return len(self.filepaths)
38
+
39
+ def __getitem__(self, idx: int) -> tuple[bytes, int, int]:
40
+ # Load the image.
41
+ bgr_image = cv2.imread(str(self.filepaths[idx]))
42
+ rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
43
+ rgb_bytes = rgb_image.tobytes()
44
+ height, width, _ = rgb_image.shape
45
+ return rgb_bytes, width, height
46
+
47
+
48
+ class EdgeSDKEmbeddingGenerator(EmbeddingGenerator):
49
+ """Embedding generator using Edge SDK runtime."""
50
+
51
+ def __init__(self, model_path: str) -> None:
52
+ """Initialize the LightlyEdge object.
53
+
54
+ Args:
55
+ model_path: Path to the model tar file.
56
+ """
57
+ # Initialize the LightlyEdge SDK.
58
+ config = _create_edge_config()
59
+ self.lightly_edge = LightlyEdge(
60
+ path=model_path,
61
+ config=config,
62
+ )
63
+ model_config = self.lightly_edge.get_image_model_config()
64
+ self._model_hash = model_config.model_hash
65
+ self._embedding_size = model_config.embedding_size
66
+ self._model_name = model_config.model_name
67
+
68
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
69
+ """Generate an EmbeddingModelInput instance.
70
+
71
+ Args:
72
+ dataset_id: The ID of the dataset.
73
+
74
+ Returns:
75
+ An EmbeddingModelInput instance with the model details.
76
+ """
77
+ return EmbeddingModelCreate(
78
+ name=self._model_name,
79
+ embedding_model_hash=self._model_hash,
80
+ embedding_dimension=self._embedding_size,
81
+ dataset_id=dataset_id,
82
+ )
83
+
84
+ def embed_text(self, text: str) -> list[float]:
85
+ """Embed a text with EdgeCLIP.
86
+
87
+ Args:
88
+ text: The text to embed.
89
+
90
+ Returns:
91
+ A list of floats representing the generated embedding.
92
+ """
93
+ embeddings = self.lightly_edge.embed_texts([text])
94
+ if len(embeddings):
95
+ return embeddings[0]
96
+ return []
97
+
98
+ def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
99
+ """Embed images with EdgeSDK.
100
+
101
+ Args:
102
+ filepaths: A list of file paths to the images to embed.
103
+
104
+ Returns:
105
+ A list of lists of floats representing the generated embeddings.
106
+ """
107
+ dataset = _ImageFileDatasetEdge(filepaths)
108
+ loader = DataLoader(
109
+ dataset,
110
+ batch_size=MAX_BATCH_SIZE,
111
+ num_workers=0,
112
+ pin_memory=True,
113
+ )
114
+
115
+ embeddings_list: list[list[float]] = []
116
+ total_images = len(filepaths)
117
+
118
+ with tqdm(total=total_images, desc="Generating embeddings", unit=" images") as progress_bar:
119
+ for rgb_bytes, width, height in loader:
120
+ embedding = self.lightly_edge.embed_frame_rgb_bytes(
121
+ rgb_bytes=rgb_bytes[0],
122
+ width=width[0].item(),
123
+ height=height[0].item(),
124
+ )
125
+ embeddings_list.append(embedding)
126
+ progress_bar.update(1)
127
+
128
+ return embeddings_list
129
+
130
+
131
+ def _create_edge_config() -> LightlyEdgeConfig:
132
+ """Create configuration for LightlyEdge.
133
+
134
+ Returns:
135
+ Configured LightlyEdgeConfig instance.
136
+ """
137
+ config = LightlyEdgeConfig.default()
138
+ config.inference_device_type = InferenceDeviceType.Auto
139
+ config.detector_config = LightlyEdgeDetectorConfig(
140
+ object_detector_enable=False,
141
+ classifiers_enable=False,
142
+ max_classifications=0,
143
+ )
144
+ return config
@@ -0,0 +1,91 @@
1
+ """EmbeddingGenerator implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from pathlib import Path
7
+ from typing import Protocol, runtime_checkable
8
+ from uuid import UUID
9
+
10
+ from lightly_studio.models.embedding_model import EmbeddingModelCreate
11
+
12
+
13
+ @runtime_checkable
14
+ class EmbeddingGenerator(Protocol):
15
+ """Protocol defining the interface for embedding models.
16
+
17
+ This protocol defines the interface that all embedding models must
18
+ implement. Concrete implementations will use different techniques
19
+ for creating embeddings.
20
+ """
21
+
22
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
23
+ """Generate an EmbeddingModelCreate instance.
24
+
25
+ Args:
26
+ dataset_id: The ID of the dataset.
27
+
28
+ Returns:
29
+ An EmbeddingModelCreate instance with the model details.
30
+ """
31
+
32
+ def embed_text(self, text: str) -> list[float]:
33
+ """Generate an embedding for a text sample.
34
+
35
+ Args:
36
+ text: The text to embed.
37
+
38
+ Returns:
39
+ A list of floats representing the generated embedding.
40
+ """
41
+ ...
42
+
43
+ def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
44
+ """Generate embeddings for multiple image samples.
45
+
46
+ TODO(Michal, 04/2025): Use DatasetLoader as input instead.
47
+
48
+ Args:
49
+ filepaths: A list of file paths to the images to embed.
50
+
51
+ Returns:
52
+ A list of lists of floats representing the generated embeddings
53
+ in the same order as the input file paths.
54
+ """
55
+ ...
56
+
57
+
58
+ class RandomEmbeddingGenerator(EmbeddingGenerator):
59
+ """Model that produces random embeddings with a fixed dimension."""
60
+
61
+ def __init__(self, dimension: int = 3):
62
+ """Initialize the random embedding model.
63
+
64
+ Args:
65
+ dimension: The dimension of the embedding vectors to generate.
66
+ """
67
+ self._dimension = dimension
68
+
69
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
70
+ """Generate an EmbeddingModelCreate instance.
71
+
72
+ Args:
73
+ dataset_id: The ID of the dataset.
74
+
75
+ Returns:
76
+ An EmbeddingModelCreate instance with the model details.
77
+ """
78
+ return EmbeddingModelCreate(
79
+ name="Random",
80
+ embedding_model_hash="random_model",
81
+ embedding_dimension=self._dimension,
82
+ dataset_id=dataset_id,
83
+ )
84
+
85
+ def embed_text(self, _text: str) -> list[float]:
86
+ """Generate a random embedding for a text sample."""
87
+ return [random.random() for _ in range(self._dimension)]
88
+
89
+ def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
90
+ """Generate random embeddings for multiple image samples."""
91
+ return [[random.random() for _ in range(self._dimension)] for _ in range(len(filepaths))]
@@ -0,0 +1,163 @@
1
+ """Embedding manager for dataset processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from dataclasses import dataclass
6
+ from pathlib import Path
7
+ from uuid import UUID
8
+
9
+ from sqlmodel import Session
10
+
11
+ from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
12
+ from lightly_studio.models.embedding_model import EmbeddingModelTable
13
+ from lightly_studio.models.sample_embedding import SampleEmbeddingCreate
14
+ from lightly_studio.resolvers import (
15
+ embedding_model_resolver,
16
+ sample_embedding_resolver,
17
+ sample_resolver,
18
+ )
19
+
20
+
21
+ class EmbeddingManagerProvider:
22
+ """Provider for the EmbeddingManager singleton instance."""
23
+
24
+ _instance: EmbeddingManager | None = None
25
+
26
+ @classmethod
27
+ def get_embedding_manager(cls) -> EmbeddingManager:
28
+ """Get the singleton instance of EmbeddingManager.
29
+
30
+ Returns:
31
+ The singleton instance of EmbeddingManager.
32
+
33
+ Raises:
34
+ ValueError: If no instance exists and no session is provided.
35
+ """
36
+ if cls._instance is None:
37
+ cls._instance = EmbeddingManager()
38
+ return cls._instance
39
+
40
+
41
+ @dataclass
42
+ class TextEmbedQuery:
43
+ """Parameters for text embedding generation."""
44
+
45
+ text: str
46
+ embedding_model_id: UUID | None = None
47
+
48
+
49
+ class EmbeddingManager:
50
+ """Manages embedding models and handles embedding generation and storage."""
51
+
52
+ def __init__(self) -> None:
53
+ """Initialize the embedding manager."""
54
+ self._models: dict[UUID, EmbeddingGenerator] = {}
55
+ self._default_model_id: UUID | None = None
56
+
57
+ def register_embedding_model(
58
+ self,
59
+ session: Session,
60
+ dataset_id: UUID,
61
+ embedding_generator: EmbeddingGenerator,
62
+ set_as_default: bool = False,
63
+ ) -> EmbeddingModelTable:
64
+ """Register an embedding model in the database.
65
+
66
+ The model is stored in an internal dictionary for later use.
67
+ The model is set as default if requested or if it's the first model.
68
+
69
+ Args:
70
+ session: Database session for resolver operations.
71
+ dataset_id: The ID of the dataset to associate with the model.
72
+ embedding_generator: The model implementation used for embeddings.
73
+ set_as_default: Whether to set this model as the default.
74
+
75
+ Returns:
76
+ The created EmbeddingModel.
77
+ """
78
+ # Create embedding model record in the database.
79
+ created_model = embedding_model_resolver.create(
80
+ session=session,
81
+ embedding_model=embedding_generator.get_embedding_model_input(dataset_id=dataset_id),
82
+ )
83
+ model_id = created_model.embedding_model_id
84
+
85
+ # Store the model in our dictionary
86
+ self._models[model_id] = embedding_generator
87
+
88
+ # Set as default if requested or if it's the first model
89
+ if set_as_default or self._default_model_id is None:
90
+ self._default_model_id = model_id
91
+
92
+ return created_model
93
+
94
+ def embed_text(self, text_query: TextEmbedQuery) -> list[float]:
95
+ """Generate an embedding for a text sample.
96
+
97
+ Args:
98
+ text_query: Text embedding query containing text and model ID.
99
+
100
+ Returns:
101
+ A list of floats representing the generated embedding.
102
+ """
103
+ model_id = text_query.embedding_model_id or self._default_model_id
104
+ if model_id is None:
105
+ raise ValueError("No embedding model specified and no default model set.")
106
+
107
+ model = self._models.get(model_id)
108
+ if model is None:
109
+ raise ValueError(f"Embedding model with ID {model_id} not found.")
110
+
111
+ return model.embed_text(text_query.text)
112
+
113
+ def embed_images(
114
+ self,
115
+ session: Session,
116
+ sample_ids: list[UUID],
117
+ embedding_model_id: UUID | None = None,
118
+ ) -> None:
119
+ """Generate and store embeddings for samples.
120
+
121
+ Args:
122
+ session: Database session for resolver operations.
123
+ sample_ids: List of sample IDs to generate embeddings for.
124
+ embedding_model_id: ID of the model to use. Uses default if None.
125
+
126
+ Raises:
127
+ ValueError: If no embedding model is registered or provided model
128
+ ID doesn't exist.
129
+ """
130
+ model_id = embedding_model_id or self._default_model_id
131
+ if not model_id:
132
+ raise ValueError("No default embedding model registered.")
133
+
134
+ if model_id not in self._models:
135
+ raise ValueError(f"No embedding model found with ID {model_id}")
136
+
137
+ # Query image filenames from the database.
138
+ sample_id_to_filepath = {
139
+ sample.sample_id: Path(sample.file_path_abs)
140
+ for sample in sample_resolver.get_many_by_id(
141
+ session=session,
142
+ sample_ids=sample_ids,
143
+ )
144
+ }
145
+
146
+ # Extract filepaths in the same order as sample_ids.
147
+ filepaths = [sample_id_to_filepath[sample_id] for sample_id in sample_ids]
148
+
149
+ # Generate embeddings for the samples.
150
+ embeddings = self._models[model_id].embed_images(filepaths=filepaths)
151
+
152
+ # Convert to SampleEmbeddingCreate objects.
153
+ sample_embeddings = [
154
+ SampleEmbeddingCreate(
155
+ sample_id=sample_id,
156
+ embedding_model_id=model_id,
157
+ embedding=embedding,
158
+ )
159
+ for sample_id, embedding in zip(sample_ids, embeddings)
160
+ ]
161
+
162
+ # Store the embeddings in the database.
163
+ sample_embedding_resolver.create_many(session=session, sample_embeddings=sample_embeddings)
@@ -0,0 +1,16 @@
1
+ """Initialize environment variables for the dataset module."""
2
+
3
+ from environs import Env
4
+
5
+ env = Env()
6
+ env.read_env()
7
+ LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE: str = env.str(
8
+ "LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE", "MOBILE_CLIP"
9
+ )
10
+ LIGHTLY_STUDIO_EDGE_MODEL_FILE_PATH: str = env.str("EDGE_MODEL_PATH", "./lightly_model.tar")
11
+ LIGHTLY_STUDIO_PROTOCOL: str = env.str("LIGHTLY_STUDIO_PROTOCOL", "http")
12
+ LIGHTLY_STUDIO_PORT: int = env.int("LIGHTLY_STUDIO_PORT", 8001)
13
+ LIGHTLY_STUDIO_HOST: str = env.str("LIGHTLY_STUDIO_HOST", "localhost")
14
+ LIGHTLY_STUDIO_DEBUG: str = env.bool("LIGHTLY_STUDIO_DEBUG", "false")
15
+
16
+ APP_URL = f"{LIGHTLY_STUDIO_PROTOCOL}://{LIGHTLY_STUDIO_HOST}:{LIGHTLY_STUDIO_PORT}"
@@ -0,0 +1,35 @@
1
+ """File manipulation utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import shutil
6
+ from pathlib import Path
7
+
8
+ import requests
9
+ import xxhash
10
+
11
+
12
+ def download_file_if_does_not_exist(url: str, local_filename: Path) -> None:
13
+ """Download a file from a URL if it does not already exist locally."""
14
+ if local_filename.exists():
15
+ return
16
+ with requests.get(url, stream=True) as r, open(local_filename, "wb") as f:
17
+ shutil.copyfileobj(r.raw, f)
18
+
19
+
20
+ def get_file_xxhash(file_path: Path) -> str:
21
+ """Calculate the xxhash of a file.
22
+
23
+ XXHash is a fast non-cryptographic hash function.
24
+
25
+ Args:
26
+ file_path: Path to the file.
27
+
28
+ Returns:
29
+ The xxhash of the file as a string.
30
+ """
31
+ hasher = xxhash.xxh64()
32
+ with file_path.open("rb") as f:
33
+ while chunk := f.read(8192):
34
+ hasher.update(chunk)
35
+ return hasher.hexdigest()