lightly-studio 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. lightly_studio/__init__.py +12 -0
  2. lightly_studio/api/__init__.py +0 -0
  3. lightly_studio/api/app.py +131 -0
  4. lightly_studio/api/cache.py +77 -0
  5. lightly_studio/api/db_tables.py +35 -0
  6. lightly_studio/api/features.py +5 -0
  7. lightly_studio/api/routes/api/annotation.py +305 -0
  8. lightly_studio/api/routes/api/annotation_label.py +87 -0
  9. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  10. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  11. lightly_studio/api/routes/api/caption.py +100 -0
  12. lightly_studio/api/routes/api/classifier.py +384 -0
  13. lightly_studio/api/routes/api/dataset.py +191 -0
  14. lightly_studio/api/routes/api/dataset_tag.py +266 -0
  15. lightly_studio/api/routes/api/embeddings2d.py +90 -0
  16. lightly_studio/api/routes/api/exceptions.py +114 -0
  17. lightly_studio/api/routes/api/export.py +114 -0
  18. lightly_studio/api/routes/api/features.py +17 -0
  19. lightly_studio/api/routes/api/frame.py +241 -0
  20. lightly_studio/api/routes/api/image.py +155 -0
  21. lightly_studio/api/routes/api/metadata.py +161 -0
  22. lightly_studio/api/routes/api/operator.py +75 -0
  23. lightly_studio/api/routes/api/sample.py +103 -0
  24. lightly_studio/api/routes/api/selection.py +87 -0
  25. lightly_studio/api/routes/api/settings.py +41 -0
  26. lightly_studio/api/routes/api/status.py +19 -0
  27. lightly_studio/api/routes/api/text_embedding.py +50 -0
  28. lightly_studio/api/routes/api/validators.py +17 -0
  29. lightly_studio/api/routes/api/video.py +133 -0
  30. lightly_studio/api/routes/healthz.py +13 -0
  31. lightly_studio/api/routes/images.py +104 -0
  32. lightly_studio/api/routes/video_frames_media.py +116 -0
  33. lightly_studio/api/routes/video_media.py +223 -0
  34. lightly_studio/api/routes/webapp.py +51 -0
  35. lightly_studio/api/server.py +94 -0
  36. lightly_studio/core/__init__.py +0 -0
  37. lightly_studio/core/add_samples.py +533 -0
  38. lightly_studio/core/add_videos.py +294 -0
  39. lightly_studio/core/dataset.py +780 -0
  40. lightly_studio/core/dataset_query/__init__.py +14 -0
  41. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  42. lightly_studio/core/dataset_query/dataset_query.py +317 -0
  43. lightly_studio/core/dataset_query/field.py +113 -0
  44. lightly_studio/core/dataset_query/field_expression.py +79 -0
  45. lightly_studio/core/dataset_query/match_expression.py +23 -0
  46. lightly_studio/core/dataset_query/order_by.py +79 -0
  47. lightly_studio/core/dataset_query/sample_field.py +37 -0
  48. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  49. lightly_studio/core/image_sample.py +36 -0
  50. lightly_studio/core/loading_log.py +56 -0
  51. lightly_studio/core/sample.py +291 -0
  52. lightly_studio/core/start_gui.py +54 -0
  53. lightly_studio/core/video_sample.py +38 -0
  54. lightly_studio/dataset/__init__.py +0 -0
  55. lightly_studio/dataset/edge_embedding_generator.py +155 -0
  56. lightly_studio/dataset/embedding_generator.py +129 -0
  57. lightly_studio/dataset/embedding_manager.py +349 -0
  58. lightly_studio/dataset/env.py +20 -0
  59. lightly_studio/dataset/file_utils.py +49 -0
  60. lightly_studio/dataset/fsspec_lister.py +275 -0
  61. lightly_studio/dataset/mobileclip_embedding_generator.py +158 -0
  62. lightly_studio/dataset/perception_encoder_embedding_generator.py +260 -0
  63. lightly_studio/db_manager.py +166 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/env.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.GcXvs2l7.css +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/12.Dx6SXgAb.css +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/17.9X9_k6TP.css +1 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/18.BxiimdIO.css +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/2.CkOblLn7.css +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/ClassifierSamplesGrid.BJbCDlvs.css +1 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/LightlyLogo.BNjCIww-.png +0 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Bold.DGvYQtcs.ttf +0 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Italic-VariableFont_wdth_wght.B4AZ-wl6.ttf +0 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Medium.DVUZMR_6.ttf +0 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Regular.DxJTClRG.ttf +0 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-SemiBold.D3TTYgdB.ttf +0 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-VariableFont_wdth_wght.BZBpG5Iz.ttf +0 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.CefECEWA.css +1 -0
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.D5tDcjY-.css +1 -0
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.9X9_k6TP.css +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.BxiimdIO.css +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.Dx6SXgAb.css +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform._-1mPSEI.css +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/0dDyq72A.js +20 -0
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/69_IOA4Y.js +1 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BK4An2kI.js +1 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRmB-kJ9.js +1 -0
  88. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B_1cpokE.js +1 -0
  89. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BiqpDEr0.js +1 -0
  90. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BpLiSKgx.js +1 -0
  91. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BscxbINH.js +39 -0
  92. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C1FmrZbK.js +1 -0
  93. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C80h3dJx.js +1 -0
  94. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C8mfFM-u.js +2 -0
  95. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CGY1p9L4.js +517 -0
  96. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/COfLknXM.js +1 -0
  97. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWj6FrbW.js +1 -0
  98. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CYgJF_JY.js +1 -0
  99. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CmLg0ys7.js +1 -0
  100. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvGjimpO.js +1 -0
  101. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D3RDXHoj.js +39 -0
  102. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D4y7iiT3.js +1 -0
  103. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D9SC3jBb.js +1 -0
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DCuAdx1Q.js +20 -0
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DDBy-_jD.js +1 -0
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIeogL5L.js +1 -0
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DL9a7v5o.js +1 -0
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DSKECuqX.js +39 -0
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_FFv0Oe.js +1 -0
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DiZ5o5vz.js +1 -0
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DkbXUtyG.js +1 -0
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DmK2hulV.js +1 -0
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqnHaLTj.js +1 -0
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DtWZc_tl.js +1 -0
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DuUalyFS.js +1 -0
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DwIonDAZ.js +1 -0
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Il-mSPmK.js +1 -0
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KNLP4aJU.js +1 -0
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KjYeVjkE.js +1 -0
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/MErlcOXj.js +1 -0
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VRI4prUD.js +1 -0
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VYb2dkNs.js +1 -0
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VqWvU2yF.js +1 -0
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/dHC3otuL.js +1 -0
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/da7Oy_lO.js +1 -0
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/eAy8rZzC.js +2 -0
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/erjNR5MX.js +1 -0
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/f1oG3eFE.js +1 -0
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/rsLi1iKv.js +20 -0
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/rwuuBP9f.js +1 -0
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/xGHZQ1pe.js +3 -0
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.DrTRUgT3.js +2 -0
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.BK5EOJl2.js +1 -0
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.CIvTuljF.js +4 -0
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.UBvSzxdA.js +1 -0
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.CQ_tiLJa.js +1 -0
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/11.KqkAcaxW.js +1 -0
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.DoYsmxQc.js +1 -0
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/13.571n2LZA.js +1 -0
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/14.DGs689M-.js +1 -0
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/15.CWG1ehzT.js +1 -0
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/16.Dpq6jbSh.js +1 -0
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/17.B5AZbHUU.js +1 -0
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/18.CBga8cnq.js +1 -0
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.D2HXgz-8.js +1090 -0
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/3.f4HAg-y3.js +1 -0
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/4.BKF4xuKQ.js +1 -0
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.BAE0Pm_f.js +39 -0
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CouWWpzA.js +1 -0
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.UBHT0ktp.js +1 -0
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.FiYNElcc.js +1 -0
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.B3-UaT23.js +1 -0
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  155. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -0
  156. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon-precomposed.png +0 -0
  157. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon.png +0 -0
  158. lightly_studio/dist_lightly_studio_view_app/favicon.png +0 -0
  159. lightly_studio/dist_lightly_studio_view_app/index.html +45 -0
  160. lightly_studio/errors.py +5 -0
  161. lightly_studio/examples/example.py +25 -0
  162. lightly_studio/examples/example_coco.py +27 -0
  163. lightly_studio/examples/example_coco_caption.py +29 -0
  164. lightly_studio/examples/example_metadata.py +369 -0
  165. lightly_studio/examples/example_operators.py +111 -0
  166. lightly_studio/examples/example_selection.py +28 -0
  167. lightly_studio/examples/example_split_work.py +48 -0
  168. lightly_studio/examples/example_video.py +22 -0
  169. lightly_studio/examples/example_video_annotations.py +157 -0
  170. lightly_studio/examples/example_yolo.py +22 -0
  171. lightly_studio/export/coco_captions.py +69 -0
  172. lightly_studio/export/export_dataset.py +104 -0
  173. lightly_studio/export/lightly_studio_label_input.py +120 -0
  174. lightly_studio/export_schema.py +18 -0
  175. lightly_studio/export_version.py +57 -0
  176. lightly_studio/few_shot_classifier/__init__.py +0 -0
  177. lightly_studio/few_shot_classifier/classifier.py +80 -0
  178. lightly_studio/few_shot_classifier/classifier_manager.py +644 -0
  179. lightly_studio/few_shot_classifier/random_forest_classifier.py +495 -0
  180. lightly_studio/metadata/complex_metadata.py +47 -0
  181. lightly_studio/metadata/compute_similarity.py +84 -0
  182. lightly_studio/metadata/compute_typicality.py +67 -0
  183. lightly_studio/metadata/gps_coordinate.py +41 -0
  184. lightly_studio/metadata/metadata_protocol.py +17 -0
  185. lightly_studio/models/__init__.py +1 -0
  186. lightly_studio/models/annotation/__init__.py +0 -0
  187. lightly_studio/models/annotation/annotation_base.py +303 -0
  188. lightly_studio/models/annotation/instance_segmentation.py +56 -0
  189. lightly_studio/models/annotation/links.py +17 -0
  190. lightly_studio/models/annotation/object_detection.py +47 -0
  191. lightly_studio/models/annotation/semantic_segmentation.py +44 -0
  192. lightly_studio/models/annotation_label.py +47 -0
  193. lightly_studio/models/caption.py +49 -0
  194. lightly_studio/models/classifier.py +20 -0
  195. lightly_studio/models/dataset.py +70 -0
  196. lightly_studio/models/embedding_model.py +30 -0
  197. lightly_studio/models/image.py +96 -0
  198. lightly_studio/models/metadata.py +208 -0
  199. lightly_studio/models/range.py +17 -0
  200. lightly_studio/models/sample.py +154 -0
  201. lightly_studio/models/sample_embedding.py +36 -0
  202. lightly_studio/models/settings.py +69 -0
  203. lightly_studio/models/tag.py +96 -0
  204. lightly_studio/models/two_dim_embedding.py +16 -0
  205. lightly_studio/models/video.py +161 -0
  206. lightly_studio/plugins/__init__.py +0 -0
  207. lightly_studio/plugins/base_operator.py +60 -0
  208. lightly_studio/plugins/operator_registry.py +47 -0
  209. lightly_studio/plugins/parameter.py +70 -0
  210. lightly_studio/py.typed +0 -0
  211. lightly_studio/resolvers/__init__.py +0 -0
  212. lightly_studio/resolvers/annotation_label_resolver/__init__.py +22 -0
  213. lightly_studio/resolvers/annotation_label_resolver/create.py +27 -0
  214. lightly_studio/resolvers/annotation_label_resolver/delete.py +28 -0
  215. lightly_studio/resolvers/annotation_label_resolver/get_all.py +37 -0
  216. lightly_studio/resolvers/annotation_label_resolver/get_by_id.py +24 -0
  217. lightly_studio/resolvers/annotation_label_resolver/get_by_ids.py +25 -0
  218. lightly_studio/resolvers/annotation_label_resolver/get_by_label_name.py +24 -0
  219. lightly_studio/resolvers/annotation_label_resolver/names_by_ids.py +25 -0
  220. lightly_studio/resolvers/annotation_label_resolver/update.py +38 -0
  221. lightly_studio/resolvers/annotation_resolver/__init__.py +40 -0
  222. lightly_studio/resolvers/annotation_resolver/count_annotations_by_dataset.py +129 -0
  223. lightly_studio/resolvers/annotation_resolver/create_many.py +124 -0
  224. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +87 -0
  225. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +60 -0
  226. lightly_studio/resolvers/annotation_resolver/get_all.py +85 -0
  227. lightly_studio/resolvers/annotation_resolver/get_all_with_payload.py +179 -0
  228. lightly_studio/resolvers/annotation_resolver/get_by_id.py +34 -0
  229. lightly_studio/resolvers/annotation_resolver/get_by_id_with_payload.py +130 -0
  230. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +142 -0
  231. lightly_studio/resolvers/annotation_resolver/update_bounding_box.py +68 -0
  232. lightly_studio/resolvers/annotations/__init__.py +1 -0
  233. lightly_studio/resolvers/annotations/annotations_filter.py +88 -0
  234. lightly_studio/resolvers/caption_resolver.py +129 -0
  235. lightly_studio/resolvers/dataset_resolver/__init__.py +55 -0
  236. lightly_studio/resolvers/dataset_resolver/check_dataset_type.py +29 -0
  237. lightly_studio/resolvers/dataset_resolver/create.py +20 -0
  238. lightly_studio/resolvers/dataset_resolver/delete.py +20 -0
  239. lightly_studio/resolvers/dataset_resolver/export.py +267 -0
  240. lightly_studio/resolvers/dataset_resolver/get_all.py +19 -0
  241. lightly_studio/resolvers/dataset_resolver/get_by_id.py +16 -0
  242. lightly_studio/resolvers/dataset_resolver/get_by_name.py +12 -0
  243. lightly_studio/resolvers/dataset_resolver/get_dataset_details.py +27 -0
  244. lightly_studio/resolvers/dataset_resolver/get_hierarchy.py +31 -0
  245. lightly_studio/resolvers/dataset_resolver/get_or_create_child_dataset.py +58 -0
  246. lightly_studio/resolvers/dataset_resolver/get_parent_dataset_by_sample_id.py +27 -0
  247. lightly_studio/resolvers/dataset_resolver/get_parent_dataset_id.py +22 -0
  248. lightly_studio/resolvers/dataset_resolver/get_root_dataset.py +61 -0
  249. lightly_studio/resolvers/dataset_resolver/get_root_datasets_overview.py +41 -0
  250. lightly_studio/resolvers/dataset_resolver/update.py +25 -0
  251. lightly_studio/resolvers/embedding_model_resolver.py +120 -0
  252. lightly_studio/resolvers/image_filter.py +50 -0
  253. lightly_studio/resolvers/image_resolver/__init__.py +21 -0
  254. lightly_studio/resolvers/image_resolver/create_many.py +52 -0
  255. lightly_studio/resolvers/image_resolver/delete.py +20 -0
  256. lightly_studio/resolvers/image_resolver/filter_new_paths.py +23 -0
  257. lightly_studio/resolvers/image_resolver/get_all_by_dataset_id.py +117 -0
  258. lightly_studio/resolvers/image_resolver/get_by_id.py +14 -0
  259. lightly_studio/resolvers/image_resolver/get_dimension_bounds.py +75 -0
  260. lightly_studio/resolvers/image_resolver/get_many_by_id.py +22 -0
  261. lightly_studio/resolvers/image_resolver/get_samples_excluding.py +43 -0
  262. lightly_studio/resolvers/metadata_resolver/__init__.py +15 -0
  263. lightly_studio/resolvers/metadata_resolver/metadata_filter.py +163 -0
  264. lightly_studio/resolvers/metadata_resolver/sample/__init__.py +21 -0
  265. lightly_studio/resolvers/metadata_resolver/sample/bulk_update_metadata.py +46 -0
  266. lightly_studio/resolvers/metadata_resolver/sample/get_by_sample_id.py +24 -0
  267. lightly_studio/resolvers/metadata_resolver/sample/get_metadata_info.py +104 -0
  268. lightly_studio/resolvers/metadata_resolver/sample/get_value_for_sample.py +27 -0
  269. lightly_studio/resolvers/metadata_resolver/sample/set_value_for_sample.py +53 -0
  270. lightly_studio/resolvers/sample_embedding_resolver.py +132 -0
  271. lightly_studio/resolvers/sample_resolver/__init__.py +17 -0
  272. lightly_studio/resolvers/sample_resolver/count_by_dataset_id.py +16 -0
  273. lightly_studio/resolvers/sample_resolver/create.py +16 -0
  274. lightly_studio/resolvers/sample_resolver/create_many.py +25 -0
  275. lightly_studio/resolvers/sample_resolver/get_by_id.py +14 -0
  276. lightly_studio/resolvers/sample_resolver/get_filtered_samples.py +56 -0
  277. lightly_studio/resolvers/sample_resolver/get_many_by_id.py +22 -0
  278. lightly_studio/resolvers/sample_resolver/sample_filter.py +74 -0
  279. lightly_studio/resolvers/settings_resolver.py +62 -0
  280. lightly_studio/resolvers/tag_resolver.py +299 -0
  281. lightly_studio/resolvers/twodim_embedding_resolver.py +119 -0
  282. lightly_studio/resolvers/video_frame_resolver/__init__.py +23 -0
  283. lightly_studio/resolvers/video_frame_resolver/count_video_frames_annotations.py +83 -0
  284. lightly_studio/resolvers/video_frame_resolver/create_many.py +57 -0
  285. lightly_studio/resolvers/video_frame_resolver/get_all_by_dataset_id.py +63 -0
  286. lightly_studio/resolvers/video_frame_resolver/get_by_id.py +13 -0
  287. lightly_studio/resolvers/video_frame_resolver/get_table_fields_bounds.py +44 -0
  288. lightly_studio/resolvers/video_frame_resolver/video_frame_annotations_counter_filter.py +47 -0
  289. lightly_studio/resolvers/video_frame_resolver/video_frame_filter.py +57 -0
  290. lightly_studio/resolvers/video_resolver/__init__.py +27 -0
  291. lightly_studio/resolvers/video_resolver/count_video_frame_annotations_by_video_dataset.py +86 -0
  292. lightly_studio/resolvers/video_resolver/create_many.py +58 -0
  293. lightly_studio/resolvers/video_resolver/filter_new_paths.py +33 -0
  294. lightly_studio/resolvers/video_resolver/get_all_by_dataset_id.py +181 -0
  295. lightly_studio/resolvers/video_resolver/get_by_id.py +22 -0
  296. lightly_studio/resolvers/video_resolver/get_table_fields_bounds.py +72 -0
  297. lightly_studio/resolvers/video_resolver/get_view_by_id.py +52 -0
  298. lightly_studio/resolvers/video_resolver/video_count_annotations_filter.py +50 -0
  299. lightly_studio/resolvers/video_resolver/video_filter.py +98 -0
  300. lightly_studio/selection/__init__.py +1 -0
  301. lightly_studio/selection/mundig.py +143 -0
  302. lightly_studio/selection/select.py +203 -0
  303. lightly_studio/selection/select_via_db.py +273 -0
  304. lightly_studio/selection/selection_config.py +49 -0
  305. lightly_studio/services/annotations_service/__init__.py +33 -0
  306. lightly_studio/services/annotations_service/create_annotation.py +64 -0
  307. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  308. lightly_studio/services/annotations_service/get_annotation_by_id.py +31 -0
  309. lightly_studio/services/annotations_service/update_annotation.py +54 -0
  310. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  311. lightly_studio/services/annotations_service/update_annotation_label.py +48 -0
  312. lightly_studio/services/annotations_service/update_annotations.py +29 -0
  313. lightly_studio/setup_logging.py +59 -0
  314. lightly_studio/type_definitions.py +31 -0
  315. lightly_studio/utils/__init__.py +3 -0
  316. lightly_studio/utils/download.py +94 -0
  317. lightly_studio/vendor/__init__.py +1 -0
  318. lightly_studio/vendor/mobileclip/ACKNOWLEDGEMENTS +422 -0
  319. lightly_studio/vendor/mobileclip/LICENSE +31 -0
  320. lightly_studio/vendor/mobileclip/LICENSE_weights_data +50 -0
  321. lightly_studio/vendor/mobileclip/README.md +5 -0
  322. lightly_studio/vendor/mobileclip/__init__.py +96 -0
  323. lightly_studio/vendor/mobileclip/clip.py +77 -0
  324. lightly_studio/vendor/mobileclip/configs/mobileclip_b.json +18 -0
  325. lightly_studio/vendor/mobileclip/configs/mobileclip_s0.json +18 -0
  326. lightly_studio/vendor/mobileclip/configs/mobileclip_s1.json +18 -0
  327. lightly_studio/vendor/mobileclip/configs/mobileclip_s2.json +18 -0
  328. lightly_studio/vendor/mobileclip/image_encoder.py +67 -0
  329. lightly_studio/vendor/mobileclip/logger.py +154 -0
  330. lightly_studio/vendor/mobileclip/models/__init__.py +10 -0
  331. lightly_studio/vendor/mobileclip/models/mci.py +933 -0
  332. lightly_studio/vendor/mobileclip/models/vit.py +433 -0
  333. lightly_studio/vendor/mobileclip/modules/__init__.py +4 -0
  334. lightly_studio/vendor/mobileclip/modules/common/__init__.py +4 -0
  335. lightly_studio/vendor/mobileclip/modules/common/mobileone.py +341 -0
  336. lightly_studio/vendor/mobileclip/modules/common/transformer.py +451 -0
  337. lightly_studio/vendor/mobileclip/modules/image/__init__.py +4 -0
  338. lightly_studio/vendor/mobileclip/modules/image/image_projection.py +113 -0
  339. lightly_studio/vendor/mobileclip/modules/image/replknet.py +188 -0
  340. lightly_studio/vendor/mobileclip/modules/text/__init__.py +4 -0
  341. lightly_studio/vendor/mobileclip/modules/text/repmixer.py +281 -0
  342. lightly_studio/vendor/mobileclip/modules/text/tokenizer.py +38 -0
  343. lightly_studio/vendor/mobileclip/text_encoder.py +245 -0
  344. lightly_studio/vendor/perception_encoder/LICENSE.PE +201 -0
  345. lightly_studio/vendor/perception_encoder/README.md +11 -0
  346. lightly_studio/vendor/perception_encoder/vision_encoder/__init__.py +0 -0
  347. lightly_studio/vendor/perception_encoder/vision_encoder/bpe_simple_vocab_16e6.txt.gz +0 -0
  348. lightly_studio/vendor/perception_encoder/vision_encoder/config.py +205 -0
  349. lightly_studio/vendor/perception_encoder/vision_encoder/config_src.py +264 -0
  350. lightly_studio/vendor/perception_encoder/vision_encoder/pe.py +766 -0
  351. lightly_studio/vendor/perception_encoder/vision_encoder/rope.py +352 -0
  352. lightly_studio/vendor/perception_encoder/vision_encoder/tokenizer.py +347 -0
  353. lightly_studio/vendor/perception_encoder/vision_encoder/transforms.py +36 -0
  354. lightly_studio-0.4.6.dist-info/METADATA +88 -0
  355. lightly_studio-0.4.6.dist-info/RECORD +356 -0
  356. lightly_studio-0.4.6.dist-info/WHEEL +4 -0
@@ -0,0 +1,155 @@
1
+ """EdgeCLIP embedding generator."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from collections.abc import Sequence
6
+ from typing import Tuple
7
+ from uuid import UUID
8
+
9
+ import cv2
10
+ import fsspec
11
+ import numpy as np
12
+ from lightly_edge_sdk import (
13
+ InferenceDeviceType,
14
+ LightlyEdge,
15
+ LightlyEdgeConfig,
16
+ LightlyEdgeDetectorConfig,
17
+ )
18
+ from numpy.typing import NDArray
19
+ from torch.utils.data import DataLoader, Dataset
20
+ from tqdm import tqdm
21
+
22
+ from lightly_studio.dataset.embedding_generator import ImageEmbeddingGenerator
23
+ from lightly_studio.models.embedding_model import EmbeddingModelCreate
24
+
25
+ MAX_BATCH_SIZE: int = 1
26
+
27
+
28
+ class _ImageFileDatasetEdge(Dataset[Tuple[bytes, int, int]]):
29
+ """Dataset wrapping image file paths for processing."""
30
+
31
+ def __init__(
32
+ self,
33
+ filepaths: Sequence[str],
34
+ ) -> None:
35
+ self.filepaths = filepaths
36
+
37
+ def __len__(self) -> int:
38
+ return len(self.filepaths)
39
+
40
+ def __getitem__(self, idx: int) -> tuple[bytes, int, int]:
41
+ # Load the image.
42
+ with fsspec.open(self.filepaths[idx], "rb") as file:
43
+ image_bytes = file.read()
44
+ # Decode image from bytes using OpenCV
45
+ nparr = np.frombuffer(image_bytes, np.uint8)
46
+ bgr_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
47
+ rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
48
+ rgb_bytes = rgb_image.tobytes()
49
+ height, width, _ = rgb_image.shape
50
+ return rgb_bytes, width, height
51
+
52
+
53
+ class EdgeSDKEmbeddingGenerator(ImageEmbeddingGenerator):
54
+ """Embedding generator using Edge SDK runtime."""
55
+
56
+ def __init__(self, model_path: str) -> None:
57
+ """Initialize the LightlyEdge object.
58
+
59
+ Args:
60
+ model_path: Path to the model tar file.
61
+ """
62
+ # Initialize the LightlyEdge SDK.
63
+ config = _create_edge_config()
64
+ self.lightly_edge = LightlyEdge(
65
+ path=model_path,
66
+ config=config,
67
+ )
68
+ model_config = self.lightly_edge.get_image_model_config()
69
+ self._model_hash = model_config.model_hash
70
+ self._embedding_size = model_config.embedding_size
71
+ self._model_name = model_config.model_name
72
+
73
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
74
+ """Generate an EmbeddingModelInput instance.
75
+
76
+ Args:
77
+ dataset_id: The ID of the dataset.
78
+
79
+ Returns:
80
+ An EmbeddingModelInput instance with the model details.
81
+ """
82
+ return EmbeddingModelCreate(
83
+ name=self._model_name,
84
+ embedding_model_hash=self._model_hash,
85
+ embedding_dimension=self._embedding_size,
86
+ dataset_id=dataset_id,
87
+ )
88
+
89
+ def embed_text(self, text: str) -> list[float]:
90
+ """Embed a text with EdgeCLIP.
91
+
92
+ Args:
93
+ text: The text to embed.
94
+
95
+ Returns:
96
+ A list of floats representing the generated embedding.
97
+ """
98
+ embeddings = self.lightly_edge.embed_texts([text])
99
+ if len(embeddings):
100
+ return embeddings[0]
101
+ return []
102
+
103
+ def embed_images(self, filepaths: list[str]) -> NDArray[np.float32]:
104
+ """Embed images with EdgeSDK.
105
+
106
+ Args:
107
+ filepaths: A list of file paths to the images to embed.
108
+
109
+ Returns:
110
+ A numpy array representing the generated embeddings.
111
+ """
112
+ total_images = len(filepaths)
113
+ if not total_images:
114
+ return np.empty((0, self._embedding_size), dtype=np.float32)
115
+
116
+ dataset = _ImageFileDatasetEdge(filepaths)
117
+
118
+ # To avoid issues with db locking and multiprocessing we set the
119
+ # number of workers to 0 (no multiprocessing). The DataLoader is still
120
+ # very useful for batching and async prefetching of images.
121
+ loader = DataLoader(
122
+ dataset,
123
+ batch_size=MAX_BATCH_SIZE,
124
+ num_workers=0, # must be 0 to avoid multiprocessing issues
125
+ pin_memory=True,
126
+ )
127
+
128
+ embeddings = np.empty((total_images, self._embedding_size), dtype=np.float32)
129
+ with tqdm(total=total_images, desc="Generating embeddings", unit=" images") as progress_bar:
130
+ for i, (rgb_bytes, width, height) in enumerate(loader):
131
+ embedding = self.lightly_edge.embed_frame_rgb_bytes(
132
+ rgb_bytes=rgb_bytes[0],
133
+ width=width[0].item(),
134
+ height=height[0].item(),
135
+ )
136
+ embeddings[i] = embedding
137
+ progress_bar.update(1)
138
+
139
+ return embeddings
140
+
141
+
142
+ def _create_edge_config() -> LightlyEdgeConfig:
143
+ """Create configuration for LightlyEdge.
144
+
145
+ Returns:
146
+ Configured LightlyEdgeConfig instance.
147
+ """
148
+ config = LightlyEdgeConfig.default()
149
+ config.inference_device_type = InferenceDeviceType.Auto
150
+ config.detector_config = LightlyEdgeDetectorConfig(
151
+ object_detector_enable=False,
152
+ classifiers_enable=False,
153
+ max_classifications=0,
154
+ )
155
+ return config
@@ -0,0 +1,129 @@
1
+ """EmbeddingGenerator implementations."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import random
6
+ from typing import Protocol, runtime_checkable
7
+ from uuid import UUID
8
+
9
+ import numpy as np
10
+ from numpy.typing import NDArray
11
+
12
+ from lightly_studio.models.embedding_model import EmbeddingModelCreate
13
+
14
+
15
+ @runtime_checkable
16
+ class EmbeddingGenerator(Protocol):
17
+ """Protocol defining the interface for embedding models.
18
+
19
+ This protocol defines the interface that all embedding models must
20
+ implement. Concrete implementations will use different techniques
21
+ for creating embeddings.
22
+ """
23
+
24
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
25
+ """Generate an EmbeddingModelCreate instance.
26
+
27
+ Args:
28
+ dataset_id: The ID of the dataset.
29
+
30
+ Returns:
31
+ An EmbeddingModelCreate instance with the model details.
32
+ """
33
+
34
+ def embed_text(self, text: str) -> list[float]:
35
+ """Generate an embedding for a text sample.
36
+
37
+ Args:
38
+ text: The text to embed.
39
+
40
+ Returns:
41
+ A list of floats representing the generated embedding.
42
+ """
43
+ ...
44
+
45
+
46
+ @runtime_checkable
47
+ class ImageEmbeddingGenerator(EmbeddingGenerator, Protocol):
48
+ """Protocol defining the interface for image embedding models.
49
+
50
+ This protocol defines the interface that all image embedding models must
51
+ implement. Concrete implementations will use different techniques
52
+ for creating embeddings.
53
+ """
54
+
55
+ def embed_images(self, filepaths: list[str]) -> NDArray[np.float32]:
56
+ """Generate embeddings for multiple image samples.
57
+
58
+ TODO(Michal, 04/2025): Use DatasetLoader as input instead.
59
+
60
+ Args:
61
+ filepaths: A list of file paths to the images to embed.
62
+
63
+ Returns:
64
+ A numpy array representing the generated embeddings
65
+ in the same order as the input file paths.
66
+ """
67
+ ...
68
+
69
+
70
+ @runtime_checkable
71
+ class VideoEmbeddingGenerator(EmbeddingGenerator, Protocol):
72
+ """Protocol defining the interface for video embedding models.
73
+
74
+ This protocol defines the interface that all video embedding models must
75
+ implement. Concrete implementations will use different techniques
76
+ for creating embeddings.
77
+ """
78
+
79
+ def embed_videos(self, filepaths: list[str]) -> NDArray[np.float32]:
80
+ """Generate embeddings for multiple video samples.
81
+
82
+ Args:
83
+ filepaths: A list of file paths to the videos to embed.
84
+
85
+ Returns:
86
+ A numpy array representing the generated embeddings
87
+ in the same order as the input file paths.
88
+ """
89
+ ...
90
+
91
+
92
+ class RandomEmbeddingGenerator(ImageEmbeddingGenerator, VideoEmbeddingGenerator):
93
+ """Model that produces random embeddings with a fixed dimension."""
94
+
95
+ def __init__(self, dimension: int = 3):
96
+ """Initialize the random embedding model.
97
+
98
+ Args:
99
+ dimension: The dimension of the embedding vectors to generate.
100
+ """
101
+ self._dimension = dimension
102
+
103
+ def get_embedding_model_input(self, dataset_id: UUID) -> EmbeddingModelCreate:
104
+ """Generate an EmbeddingModelCreate instance.
105
+
106
+ Args:
107
+ dataset_id: The ID of the dataset.
108
+
109
+ Returns:
110
+ An EmbeddingModelCreate instance with the model details.
111
+ """
112
+ return EmbeddingModelCreate(
113
+ name="Random",
114
+ embedding_model_hash="random_model",
115
+ embedding_dimension=self._dimension,
116
+ dataset_id=dataset_id,
117
+ )
118
+
119
+ def embed_text(self, _text: str) -> list[float]:
120
+ """Generate a random embedding for a text sample."""
121
+ return [random.random() for _ in range(self._dimension)]
122
+
123
+ def embed_images(self, filepaths: list[str]) -> NDArray[np.float32]:
124
+ """Generate random embeddings for multiple image samples."""
125
+ return np.random.rand(len(filepaths), self._dimension).astype(np.float32)
126
+
127
+ def embed_videos(self, filepaths: list[str]) -> NDArray[np.float32]:
128
+ """Generate random embeddings for multiple image samples."""
129
+ return np.random.rand(len(filepaths), self._dimension).astype(np.float32)
@@ -0,0 +1,349 @@
1
+ """Embedding manager for dataset processing."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from dataclasses import dataclass
7
+ from uuid import UUID
8
+
9
+ from sqlmodel import Session
10
+
11
+ from lightly_studio.dataset import env
12
+ from lightly_studio.dataset.embedding_generator import (
13
+ EmbeddingGenerator,
14
+ ImageEmbeddingGenerator,
15
+ VideoEmbeddingGenerator,
16
+ )
17
+ from lightly_studio.models.dataset import SampleType
18
+ from lightly_studio.models.embedding_model import EmbeddingModelTable
19
+ from lightly_studio.models.sample_embedding import SampleEmbeddingCreate
20
+ from lightly_studio.resolvers import (
21
+ dataset_resolver,
22
+ embedding_model_resolver,
23
+ image_resolver,
24
+ sample_embedding_resolver,
25
+ video_resolver,
26
+ )
27
+
28
+ logger = logging.getLogger(__name__)
29
+
30
+
31
+ class EmbeddingManagerProvider:
32
+ """Provider for the EmbeddingManager singleton instance."""
33
+
34
+ _instance: EmbeddingManager | None = None
35
+
36
+ @classmethod
37
+ def get_embedding_manager(cls) -> EmbeddingManager:
38
+ """Get the singleton instance of EmbeddingManager.
39
+
40
+ Returns:
41
+ The singleton instance of EmbeddingManager.
42
+
43
+ Raises:
44
+ ValueError: If no instance exists and no session is provided.
45
+ """
46
+ if cls._instance is None:
47
+ cls._instance = EmbeddingManager()
48
+ return cls._instance
49
+
50
+
51
+ @dataclass
52
+ class TextEmbedQuery:
53
+ """Parameters for text embedding generation."""
54
+
55
+ text: str
56
+ embedding_model_id: UUID | None = None
57
+
58
+
59
+ class EmbeddingManager:
60
+ """Manages embedding models and handles embedding generation and storage."""
61
+
62
+ def __init__(self) -> None:
63
+ """Initialize the embedding manager."""
64
+ self._models: dict[UUID, EmbeddingGenerator] = {}
65
+ self._dataset_id_to_default_model_id: dict[UUID, UUID] = {}
66
+
67
+ def register_embedding_model(
68
+ self,
69
+ session: Session,
70
+ dataset_id: UUID,
71
+ embedding_generator: EmbeddingGenerator,
72
+ set_as_default: bool = False,
73
+ ) -> EmbeddingModelTable:
74
+ """Register an embedding model in the database.
75
+
76
+ The model is stored in an internal dictionary for later use.
77
+ The model is set as default if requested or if it's the first model.
78
+
79
+ Args:
80
+ session: Database session for resolver operations.
81
+ dataset_id: The ID of the dataset to associate with the model.
82
+ And to register as default, if requested.
83
+ embedding_generator: The model implementation used for embeddings.
84
+ set_as_default: Whether to set this model as the default.
85
+
86
+ Returns:
87
+ The created EmbeddingModel.
88
+ """
89
+ # Get or create embedding model record in the database.
90
+ db_model = embedding_model_resolver.get_or_create(
91
+ session=session,
92
+ embedding_model=embedding_generator.get_embedding_model_input(dataset_id=dataset_id),
93
+ )
94
+ model_id = db_model.embedding_model_id
95
+
96
+ # Store the model in our dictionary
97
+ self._models[model_id] = embedding_generator
98
+
99
+ # Set as default if requested or if it's the first model
100
+ if set_as_default or dataset_id not in self._dataset_id_to_default_model_id:
101
+ self._dataset_id_to_default_model_id[dataset_id] = model_id
102
+
103
+ return db_model
104
+
105
+ def embed_text(self, dataset_id: UUID, text_query: TextEmbedQuery) -> list[float]:
106
+ """Generate an embedding for a text sample.
107
+
108
+ Args:
109
+ dataset_id: The ID of the dataset to determine the registered default model.
110
+ It is used if embedding_model_id is not valid.
111
+ text_query: Text embedding query containing text and model ID.
112
+
113
+ Returns:
114
+ A list of floats representing the generated embedding.
115
+ """
116
+ model_id = self._get_default_or_validate(
117
+ dataset_id=dataset_id, embedding_model_id=text_query.embedding_model_id
118
+ )
119
+
120
+ model = self._models[model_id]
121
+
122
+ return model.embed_text(text_query.text)
123
+
124
+ def embed_images(
125
+ self,
126
+ session: Session,
127
+ dataset_id: UUID,
128
+ sample_ids: list[UUID],
129
+ embedding_model_id: UUID | None = None,
130
+ ) -> None:
131
+ """Generate and store embeddings for image samples.
132
+
133
+ Args:
134
+ session: Database session for resolver operations.
135
+ dataset_id: The ID of the dataset to determine the registered default model.
136
+ It is used if embedding_model_id is not valid.
137
+ sample_ids: List of sample IDs to generate embeddings for.
138
+ embedding_model_id: ID of the model to use. Uses default if None.
139
+
140
+ Raises:
141
+ ValueError: If no embedding model is registered, provided model
142
+ ID doesn't exist or if the embedding model does not support images.
143
+ """
144
+ model_id = self._get_default_or_validate(
145
+ dataset_id=dataset_id, embedding_model_id=embedding_model_id
146
+ )
147
+
148
+ model = self._models[model_id]
149
+ if not isinstance(model, ImageEmbeddingGenerator):
150
+ raise ValueError("Embedding model not compatible with images.")
151
+
152
+ # Query image filenames from the database.
153
+ sample_id_to_filepath = {
154
+ sample.sample_id: sample.file_path_abs
155
+ for sample in image_resolver.get_many_by_id(
156
+ session=session,
157
+ sample_ids=sample_ids,
158
+ )
159
+ }
160
+
161
+ # Extract filepaths in the same order as sample_ids.
162
+ filepaths = [sample_id_to_filepath[sample_id] for sample_id in sample_ids]
163
+
164
+ # Generate embeddings for the samples.
165
+ embeddings = model.embed_images(filepaths=filepaths)
166
+
167
+ # Convert to SampleEmbeddingCreate objects.
168
+ sample_embeddings = [
169
+ SampleEmbeddingCreate(
170
+ sample_id=sample_id,
171
+ embedding_model_id=model_id,
172
+ embedding=embedding,
173
+ )
174
+ for sample_id, embedding in zip(sample_ids, embeddings)
175
+ ]
176
+
177
+ # Store the embeddings in the database.
178
+ sample_embedding_resolver.create_many(session=session, sample_embeddings=sample_embeddings)
179
+
180
+ def embed_videos(
181
+ self,
182
+ session: Session,
183
+ dataset_id: UUID,
184
+ sample_ids: list[UUID],
185
+ embedding_model_id: UUID | None = None,
186
+ ) -> None:
187
+ """Generate and store embeddings for video samples.
188
+
189
+ Args:
190
+ session: Database session for resolver operations.
191
+ dataset_id: The ID of the dataset to determine the registered default model.
192
+ It is used if embedding_model_id is not valid.
193
+ sample_ids: List of sample IDs to generate embeddings for.
194
+ embedding_model_id: ID of the model to use. Uses default if None.
195
+
196
+ Raises:
197
+ ValueError: If no embedding model is registered, provided model
198
+ ID doesn't exist or if the embedding model does not support videos.
199
+ """
200
+ model_id = self._get_default_or_validate(
201
+ dataset_id=dataset_id, embedding_model_id=embedding_model_id
202
+ )
203
+
204
+ model = self._models[model_id]
205
+ if not isinstance(model, VideoEmbeddingGenerator):
206
+ raise ValueError("Embedding model not compatible with videos.")
207
+
208
+ # Get the samples
209
+ filepaths = []
210
+ for sample_id in sample_ids:
211
+ sample = video_resolver.get_by_id(session=session, sample_id=sample_id)
212
+ if sample is not None:
213
+ filepaths.append(sample.file_path_abs)
214
+
215
+ if len(filepaths) != len(sample_ids):
216
+ raise ValueError("Could not fetch all video paths for the provided IDs.")
217
+
218
+ # Generate embeddings for the samples.
219
+ embeddings = model.embed_videos(filepaths=filepaths)
220
+
221
+ # Convert to SampleEmbeddingCreate objects.
222
+ sample_embeddings = [
223
+ SampleEmbeddingCreate(
224
+ sample_id=sample_id,
225
+ embedding_model_id=model_id,
226
+ embedding=embedding,
227
+ )
228
+ for sample_id, embedding in zip(sample_ids, embeddings)
229
+ ]
230
+
231
+ # Store the embeddings in the database.
232
+ sample_embedding_resolver.create_many(session=session, sample_embeddings=sample_embeddings)
233
+
234
+ def load_or_get_default_model(
235
+ self,
236
+ session: Session,
237
+ dataset_id: UUID,
238
+ ) -> UUID | None:
239
+ """Ensure a default embedding model exists and return its ID.
240
+
241
+ Args:
242
+ session: Database session for resolver operations.
243
+ dataset_id: Dataset identifier the model should belong to.
244
+
245
+ Returns:
246
+ UUID of the default embedding model or None if the model cannot be loaded.
247
+ """
248
+ # Return the existing default model ID if available.
249
+
250
+ if dataset_id in self._dataset_id_to_default_model_id:
251
+ return self._dataset_id_to_default_model_id[dataset_id]
252
+
253
+ # Load the embedding generator based on sample_type from the env var.
254
+ dataset = dataset_resolver.get_by_id(session=session, dataset_id=dataset_id)
255
+ if dataset is None:
256
+ raise ValueError("Provided dataset_id could not be found.")
257
+
258
+ embedding_generator = _load_embedding_generator_from_env(sample_type=dataset.sample_type)
259
+ if embedding_generator is None:
260
+ return None
261
+
262
+ # Register the embedding model and set it as default.
263
+ embedding_model = self.register_embedding_model(
264
+ session=session,
265
+ dataset_id=dataset_id,
266
+ embedding_generator=embedding_generator,
267
+ set_as_default=True,
268
+ )
269
+
270
+ return embedding_model.embedding_model_id
271
+
272
+ def _get_default_or_validate(self, dataset_id: UUID, embedding_model_id: UUID | None) -> UUID:
273
+ """Get a valid model_id or raise error of non available.
274
+
275
+ If embedding_model_id is not provided, returns the default model for dataset_id.
276
+ If embedding_model_id is provided, validates that the model has been loaded and returns it.
277
+ """
278
+ default_model_id = self._dataset_id_to_default_model_id.get(dataset_id, None)
279
+ if embedding_model_id is None and default_model_id is None:
280
+ raise ValueError(
281
+ "No embedding_model_id provided and no default embedding model registered."
282
+ )
283
+
284
+ if embedding_model_id is None and default_model_id is not None:
285
+ return default_model_id
286
+
287
+ if embedding_model_id not in self._models:
288
+ raise ValueError(f"No embedding model found with ID {embedding_model_id}")
289
+ return embedding_model_id
290
+
291
+
292
+ def _load_embedding_generator_from_env(sample_type: SampleType) -> EmbeddingGenerator | None:
293
+ """Load the embedding generator based on environment variable configuration."""
294
+ if sample_type == SampleType.IMAGE:
295
+ return _load_image_embedding_generator_from_env()
296
+ if sample_type == SampleType.VIDEO:
297
+ return _load_video_embedding_generator()
298
+ return None
299
+
300
+
301
+ # TODO(Michal, 09/2025): Write tests for this function.
302
+ def _load_image_embedding_generator_from_env() -> ImageEmbeddingGenerator | None:
303
+ if env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "EDGE":
304
+ try:
305
+ from lightly_studio.dataset.edge_embedding_generator import (
306
+ EdgeSDKEmbeddingGenerator,
307
+ )
308
+
309
+ logger.info("Using LightlyEdge embedding generator for images.")
310
+ return EdgeSDKEmbeddingGenerator(model_path=env.LIGHTLY_STUDIO_EDGE_MODEL_FILE_PATH)
311
+ except ImportError:
312
+ logger.warning("Embedding functionality is disabled.")
313
+ elif env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "MOBILE_CLIP":
314
+ try:
315
+ from lightly_studio.dataset.mobileclip_embedding_generator import (
316
+ MobileCLIPEmbeddingGenerator,
317
+ )
318
+
319
+ logger.info("Using MobileCLIP embedding generator for images.")
320
+ return MobileCLIPEmbeddingGenerator()
321
+ except ImportError:
322
+ logger.warning("Embedding functionality is disabled.")
323
+ elif env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "PE":
324
+ try:
325
+ from lightly_studio.dataset.perception_encoder_embedding_generator import (
326
+ PerceptionEncoderEmbeddingGenerator,
327
+ )
328
+
329
+ logger.info("Using PerceptionEncoder embedding generator for images.")
330
+ return PerceptionEncoderEmbeddingGenerator()
331
+ except ImportError:
332
+ logger.warning("Embedding functionality is disabled.")
333
+ else:
334
+ logger.warning(f"Unsupported model type: '{env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE}'")
335
+ logger.warning("Embedding functionality is disabled.")
336
+ return None
337
+
338
+
339
+ def _load_video_embedding_generator() -> VideoEmbeddingGenerator | None:
340
+ try:
341
+ from lightly_studio.dataset.perception_encoder_embedding_generator import (
342
+ PerceptionEncoderEmbeddingGenerator,
343
+ )
344
+
345
+ logger.info("Using PerceptionEncoder embedding generator for videos.")
346
+ return PerceptionEncoderEmbeddingGenerator()
347
+ except ImportError:
348
+ logger.warning("Embedding functionality is disabled.")
349
+ return None
@@ -0,0 +1,20 @@
1
+ """Initialize environment variables for the dataset module."""
2
+
3
+ from typing import Optional
4
+
5
+ from environs import Env
6
+
7
+ env = Env()
8
+ env.read_env()
9
+ LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE: str = env.str(
10
+ "LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE", "MOBILE_CLIP"
11
+ )
12
+ LIGHTLY_STUDIO_EDGE_MODEL_FILE_PATH: str = env.str("EDGE_MODEL_PATH", "./lightly_model.tar")
13
+ LIGHTLY_STUDIO_PROTOCOL: str = env.str("LIGHTLY_STUDIO_PROTOCOL", "http")
14
+ LIGHTLY_STUDIO_PORT: int = env.int("LIGHTLY_STUDIO_PORT", 8001)
15
+ LIGHTLY_STUDIO_HOST: str = env.str("LIGHTLY_STUDIO_HOST", "localhost")
16
+ LIGHTLY_STUDIO_DEBUG: bool = env.bool("LIGHTLY_STUDIO_DEBUG", False)
17
+
18
+ APP_URL = f"{LIGHTLY_STUDIO_PROTOCOL}://{LIGHTLY_STUDIO_HOST}:{LIGHTLY_STUDIO_PORT}"
19
+
20
+ LIGHTLY_STUDIO_LICENSE_KEY: Optional[str] = env.str("LIGHTLY_STUDIO_LICENSE_KEY", default=None)
@@ -0,0 +1,49 @@
1
+ """File manipulation utilities."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import shutil
7
+ from pathlib import Path
8
+
9
+ import requests
10
+ import xxhash
11
+
12
+ logger = logging.getLogger(__name__)
13
+
14
+
15
+ def download_file_if_does_not_exist(url: str, local_filename: Path) -> None:
16
+ """Download a file from a URL if it does not already exist locally."""
17
+ if local_filename.exists():
18
+ return
19
+
20
+ try:
21
+ logger.info(f"Downloading {url} to {local_filename}")
22
+ with requests.get(url, stream=True, timeout=30) as r:
23
+ # Raise an error for bad status codes
24
+ r.raise_for_status()
25
+ with open(local_filename, "wb") as f:
26
+ shutil.copyfileobj(r.raw, f)
27
+ except Exception:
28
+ # If download fails, remove any partial file to allow retry.
29
+ if local_filename.exists():
30
+ local_filename.unlink()
31
+ raise
32
+
33
+
34
+ def get_file_xxhash(file_path: Path) -> str:
35
+ """Calculate the xxhash of a file.
36
+
37
+ XXHash is a fast non-cryptographic hash function.
38
+
39
+ Args:
40
+ file_path: Path to the file.
41
+
42
+ Returns:
43
+ The xxhash of the file as a string.
44
+ """
45
+ hasher = xxhash.xxh64()
46
+ with file_path.open("rb") as f:
47
+ while chunk := f.read(8192):
48
+ hasher.update(chunk)
49
+ return hasher.hexdigest()