lightly-studio 0.4.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (356) hide show
  1. lightly_studio/__init__.py +12 -0
  2. lightly_studio/api/__init__.py +0 -0
  3. lightly_studio/api/app.py +131 -0
  4. lightly_studio/api/cache.py +77 -0
  5. lightly_studio/api/db_tables.py +35 -0
  6. lightly_studio/api/features.py +5 -0
  7. lightly_studio/api/routes/api/annotation.py +305 -0
  8. lightly_studio/api/routes/api/annotation_label.py +87 -0
  9. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  10. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  11. lightly_studio/api/routes/api/caption.py +100 -0
  12. lightly_studio/api/routes/api/classifier.py +384 -0
  13. lightly_studio/api/routes/api/dataset.py +191 -0
  14. lightly_studio/api/routes/api/dataset_tag.py +266 -0
  15. lightly_studio/api/routes/api/embeddings2d.py +90 -0
  16. lightly_studio/api/routes/api/exceptions.py +114 -0
  17. lightly_studio/api/routes/api/export.py +114 -0
  18. lightly_studio/api/routes/api/features.py +17 -0
  19. lightly_studio/api/routes/api/frame.py +241 -0
  20. lightly_studio/api/routes/api/image.py +155 -0
  21. lightly_studio/api/routes/api/metadata.py +161 -0
  22. lightly_studio/api/routes/api/operator.py +75 -0
  23. lightly_studio/api/routes/api/sample.py +103 -0
  24. lightly_studio/api/routes/api/selection.py +87 -0
  25. lightly_studio/api/routes/api/settings.py +41 -0
  26. lightly_studio/api/routes/api/status.py +19 -0
  27. lightly_studio/api/routes/api/text_embedding.py +50 -0
  28. lightly_studio/api/routes/api/validators.py +17 -0
  29. lightly_studio/api/routes/api/video.py +133 -0
  30. lightly_studio/api/routes/healthz.py +13 -0
  31. lightly_studio/api/routes/images.py +104 -0
  32. lightly_studio/api/routes/video_frames_media.py +116 -0
  33. lightly_studio/api/routes/video_media.py +223 -0
  34. lightly_studio/api/routes/webapp.py +51 -0
  35. lightly_studio/api/server.py +94 -0
  36. lightly_studio/core/__init__.py +0 -0
  37. lightly_studio/core/add_samples.py +533 -0
  38. lightly_studio/core/add_videos.py +294 -0
  39. lightly_studio/core/dataset.py +780 -0
  40. lightly_studio/core/dataset_query/__init__.py +14 -0
  41. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  42. lightly_studio/core/dataset_query/dataset_query.py +317 -0
  43. lightly_studio/core/dataset_query/field.py +113 -0
  44. lightly_studio/core/dataset_query/field_expression.py +79 -0
  45. lightly_studio/core/dataset_query/match_expression.py +23 -0
  46. lightly_studio/core/dataset_query/order_by.py +79 -0
  47. lightly_studio/core/dataset_query/sample_field.py +37 -0
  48. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  49. lightly_studio/core/image_sample.py +36 -0
  50. lightly_studio/core/loading_log.py +56 -0
  51. lightly_studio/core/sample.py +291 -0
  52. lightly_studio/core/start_gui.py +54 -0
  53. lightly_studio/core/video_sample.py +38 -0
  54. lightly_studio/dataset/__init__.py +0 -0
  55. lightly_studio/dataset/edge_embedding_generator.py +155 -0
  56. lightly_studio/dataset/embedding_generator.py +129 -0
  57. lightly_studio/dataset/embedding_manager.py +349 -0
  58. lightly_studio/dataset/env.py +20 -0
  59. lightly_studio/dataset/file_utils.py +49 -0
  60. lightly_studio/dataset/fsspec_lister.py +275 -0
  61. lightly_studio/dataset/mobileclip_embedding_generator.py +158 -0
  62. lightly_studio/dataset/perception_encoder_embedding_generator.py +260 -0
  63. lightly_studio/db_manager.py +166 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/env.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.GcXvs2l7.css +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/12.Dx6SXgAb.css +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/17.9X9_k6TP.css +1 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/18.BxiimdIO.css +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/2.CkOblLn7.css +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/ClassifierSamplesGrid.BJbCDlvs.css +1 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/LightlyLogo.BNjCIww-.png +0 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Bold.DGvYQtcs.ttf +0 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Italic-VariableFont_wdth_wght.B4AZ-wl6.ttf +0 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Medium.DVUZMR_6.ttf +0 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-Regular.DxJTClRG.ttf +0 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-SemiBold.D3TTYgdB.ttf +0 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/OpenSans-VariableFont_wdth_wght.BZBpG5Iz.ttf +0 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.CefECEWA.css +1 -0
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.D5tDcjY-.css +1 -0
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.9X9_k6TP.css +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.BxiimdIO.css +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_page.Dx6SXgAb.css +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform._-1mPSEI.css +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/0dDyq72A.js +20 -0
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/69_IOA4Y.js +1 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BK4An2kI.js +1 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRmB-kJ9.js +1 -0
  88. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B_1cpokE.js +1 -0
  89. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BiqpDEr0.js +1 -0
  90. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BpLiSKgx.js +1 -0
  91. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BscxbINH.js +39 -0
  92. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C1FmrZbK.js +1 -0
  93. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C80h3dJx.js +1 -0
  94. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C8mfFM-u.js +2 -0
  95. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CGY1p9L4.js +517 -0
  96. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/COfLknXM.js +1 -0
  97. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWj6FrbW.js +1 -0
  98. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CYgJF_JY.js +1 -0
  99. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CmLg0ys7.js +1 -0
  100. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvGjimpO.js +1 -0
  101. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D3RDXHoj.js +39 -0
  102. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D4y7iiT3.js +1 -0
  103. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D9SC3jBb.js +1 -0
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DCuAdx1Q.js +20 -0
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DDBy-_jD.js +1 -0
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIeogL5L.js +1 -0
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DL9a7v5o.js +1 -0
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DSKECuqX.js +39 -0
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D_FFv0Oe.js +1 -0
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DiZ5o5vz.js +1 -0
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DkbXUtyG.js +1 -0
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DmK2hulV.js +1 -0
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqnHaLTj.js +1 -0
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DtWZc_tl.js +1 -0
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DuUalyFS.js +1 -0
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DwIonDAZ.js +1 -0
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Il-mSPmK.js +1 -0
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KNLP4aJU.js +1 -0
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KjYeVjkE.js +1 -0
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/MErlcOXj.js +1 -0
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VRI4prUD.js +1 -0
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VYb2dkNs.js +1 -0
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/VqWvU2yF.js +1 -0
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/dHC3otuL.js +1 -0
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/da7Oy_lO.js +1 -0
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/eAy8rZzC.js +2 -0
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/erjNR5MX.js +1 -0
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/f1oG3eFE.js +1 -0
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/rsLi1iKv.js +20 -0
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/rwuuBP9f.js +1 -0
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/xGHZQ1pe.js +3 -0
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.DrTRUgT3.js +2 -0
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.BK5EOJl2.js +1 -0
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.CIvTuljF.js +4 -0
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/1.UBvSzxdA.js +1 -0
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.CQ_tiLJa.js +1 -0
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/11.KqkAcaxW.js +1 -0
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.DoYsmxQc.js +1 -0
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/13.571n2LZA.js +1 -0
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/14.DGs689M-.js +1 -0
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/15.CWG1ehzT.js +1 -0
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/16.Dpq6jbSh.js +1 -0
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/17.B5AZbHUU.js +1 -0
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/18.CBga8cnq.js +1 -0
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.D2HXgz-8.js +1090 -0
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/3.f4HAg-y3.js +1 -0
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/4.BKF4xuKQ.js +1 -0
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.BAE0Pm_f.js +39 -0
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CouWWpzA.js +1 -0
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.UBHT0ktp.js +1 -0
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.FiYNElcc.js +1 -0
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/9.B3-UaT23.js +1 -0
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  155. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -0
  156. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon-precomposed.png +0 -0
  157. lightly_studio/dist_lightly_studio_view_app/apple-touch-icon.png +0 -0
  158. lightly_studio/dist_lightly_studio_view_app/favicon.png +0 -0
  159. lightly_studio/dist_lightly_studio_view_app/index.html +45 -0
  160. lightly_studio/errors.py +5 -0
  161. lightly_studio/examples/example.py +25 -0
  162. lightly_studio/examples/example_coco.py +27 -0
  163. lightly_studio/examples/example_coco_caption.py +29 -0
  164. lightly_studio/examples/example_metadata.py +369 -0
  165. lightly_studio/examples/example_operators.py +111 -0
  166. lightly_studio/examples/example_selection.py +28 -0
  167. lightly_studio/examples/example_split_work.py +48 -0
  168. lightly_studio/examples/example_video.py +22 -0
  169. lightly_studio/examples/example_video_annotations.py +157 -0
  170. lightly_studio/examples/example_yolo.py +22 -0
  171. lightly_studio/export/coco_captions.py +69 -0
  172. lightly_studio/export/export_dataset.py +104 -0
  173. lightly_studio/export/lightly_studio_label_input.py +120 -0
  174. lightly_studio/export_schema.py +18 -0
  175. lightly_studio/export_version.py +57 -0
  176. lightly_studio/few_shot_classifier/__init__.py +0 -0
  177. lightly_studio/few_shot_classifier/classifier.py +80 -0
  178. lightly_studio/few_shot_classifier/classifier_manager.py +644 -0
  179. lightly_studio/few_shot_classifier/random_forest_classifier.py +495 -0
  180. lightly_studio/metadata/complex_metadata.py +47 -0
  181. lightly_studio/metadata/compute_similarity.py +84 -0
  182. lightly_studio/metadata/compute_typicality.py +67 -0
  183. lightly_studio/metadata/gps_coordinate.py +41 -0
  184. lightly_studio/metadata/metadata_protocol.py +17 -0
  185. lightly_studio/models/__init__.py +1 -0
  186. lightly_studio/models/annotation/__init__.py +0 -0
  187. lightly_studio/models/annotation/annotation_base.py +303 -0
  188. lightly_studio/models/annotation/instance_segmentation.py +56 -0
  189. lightly_studio/models/annotation/links.py +17 -0
  190. lightly_studio/models/annotation/object_detection.py +47 -0
  191. lightly_studio/models/annotation/semantic_segmentation.py +44 -0
  192. lightly_studio/models/annotation_label.py +47 -0
  193. lightly_studio/models/caption.py +49 -0
  194. lightly_studio/models/classifier.py +20 -0
  195. lightly_studio/models/dataset.py +70 -0
  196. lightly_studio/models/embedding_model.py +30 -0
  197. lightly_studio/models/image.py +96 -0
  198. lightly_studio/models/metadata.py +208 -0
  199. lightly_studio/models/range.py +17 -0
  200. lightly_studio/models/sample.py +154 -0
  201. lightly_studio/models/sample_embedding.py +36 -0
  202. lightly_studio/models/settings.py +69 -0
  203. lightly_studio/models/tag.py +96 -0
  204. lightly_studio/models/two_dim_embedding.py +16 -0
  205. lightly_studio/models/video.py +161 -0
  206. lightly_studio/plugins/__init__.py +0 -0
  207. lightly_studio/plugins/base_operator.py +60 -0
  208. lightly_studio/plugins/operator_registry.py +47 -0
  209. lightly_studio/plugins/parameter.py +70 -0
  210. lightly_studio/py.typed +0 -0
  211. lightly_studio/resolvers/__init__.py +0 -0
  212. lightly_studio/resolvers/annotation_label_resolver/__init__.py +22 -0
  213. lightly_studio/resolvers/annotation_label_resolver/create.py +27 -0
  214. lightly_studio/resolvers/annotation_label_resolver/delete.py +28 -0
  215. lightly_studio/resolvers/annotation_label_resolver/get_all.py +37 -0
  216. lightly_studio/resolvers/annotation_label_resolver/get_by_id.py +24 -0
  217. lightly_studio/resolvers/annotation_label_resolver/get_by_ids.py +25 -0
  218. lightly_studio/resolvers/annotation_label_resolver/get_by_label_name.py +24 -0
  219. lightly_studio/resolvers/annotation_label_resolver/names_by_ids.py +25 -0
  220. lightly_studio/resolvers/annotation_label_resolver/update.py +38 -0
  221. lightly_studio/resolvers/annotation_resolver/__init__.py +40 -0
  222. lightly_studio/resolvers/annotation_resolver/count_annotations_by_dataset.py +129 -0
  223. lightly_studio/resolvers/annotation_resolver/create_many.py +124 -0
  224. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +87 -0
  225. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +60 -0
  226. lightly_studio/resolvers/annotation_resolver/get_all.py +85 -0
  227. lightly_studio/resolvers/annotation_resolver/get_all_with_payload.py +179 -0
  228. lightly_studio/resolvers/annotation_resolver/get_by_id.py +34 -0
  229. lightly_studio/resolvers/annotation_resolver/get_by_id_with_payload.py +130 -0
  230. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +142 -0
  231. lightly_studio/resolvers/annotation_resolver/update_bounding_box.py +68 -0
  232. lightly_studio/resolvers/annotations/__init__.py +1 -0
  233. lightly_studio/resolvers/annotations/annotations_filter.py +88 -0
  234. lightly_studio/resolvers/caption_resolver.py +129 -0
  235. lightly_studio/resolvers/dataset_resolver/__init__.py +55 -0
  236. lightly_studio/resolvers/dataset_resolver/check_dataset_type.py +29 -0
  237. lightly_studio/resolvers/dataset_resolver/create.py +20 -0
  238. lightly_studio/resolvers/dataset_resolver/delete.py +20 -0
  239. lightly_studio/resolvers/dataset_resolver/export.py +267 -0
  240. lightly_studio/resolvers/dataset_resolver/get_all.py +19 -0
  241. lightly_studio/resolvers/dataset_resolver/get_by_id.py +16 -0
  242. lightly_studio/resolvers/dataset_resolver/get_by_name.py +12 -0
  243. lightly_studio/resolvers/dataset_resolver/get_dataset_details.py +27 -0
  244. lightly_studio/resolvers/dataset_resolver/get_hierarchy.py +31 -0
  245. lightly_studio/resolvers/dataset_resolver/get_or_create_child_dataset.py +58 -0
  246. lightly_studio/resolvers/dataset_resolver/get_parent_dataset_by_sample_id.py +27 -0
  247. lightly_studio/resolvers/dataset_resolver/get_parent_dataset_id.py +22 -0
  248. lightly_studio/resolvers/dataset_resolver/get_root_dataset.py +61 -0
  249. lightly_studio/resolvers/dataset_resolver/get_root_datasets_overview.py +41 -0
  250. lightly_studio/resolvers/dataset_resolver/update.py +25 -0
  251. lightly_studio/resolvers/embedding_model_resolver.py +120 -0
  252. lightly_studio/resolvers/image_filter.py +50 -0
  253. lightly_studio/resolvers/image_resolver/__init__.py +21 -0
  254. lightly_studio/resolvers/image_resolver/create_many.py +52 -0
  255. lightly_studio/resolvers/image_resolver/delete.py +20 -0
  256. lightly_studio/resolvers/image_resolver/filter_new_paths.py +23 -0
  257. lightly_studio/resolvers/image_resolver/get_all_by_dataset_id.py +117 -0
  258. lightly_studio/resolvers/image_resolver/get_by_id.py +14 -0
  259. lightly_studio/resolvers/image_resolver/get_dimension_bounds.py +75 -0
  260. lightly_studio/resolvers/image_resolver/get_many_by_id.py +22 -0
  261. lightly_studio/resolvers/image_resolver/get_samples_excluding.py +43 -0
  262. lightly_studio/resolvers/metadata_resolver/__init__.py +15 -0
  263. lightly_studio/resolvers/metadata_resolver/metadata_filter.py +163 -0
  264. lightly_studio/resolvers/metadata_resolver/sample/__init__.py +21 -0
  265. lightly_studio/resolvers/metadata_resolver/sample/bulk_update_metadata.py +46 -0
  266. lightly_studio/resolvers/metadata_resolver/sample/get_by_sample_id.py +24 -0
  267. lightly_studio/resolvers/metadata_resolver/sample/get_metadata_info.py +104 -0
  268. lightly_studio/resolvers/metadata_resolver/sample/get_value_for_sample.py +27 -0
  269. lightly_studio/resolvers/metadata_resolver/sample/set_value_for_sample.py +53 -0
  270. lightly_studio/resolvers/sample_embedding_resolver.py +132 -0
  271. lightly_studio/resolvers/sample_resolver/__init__.py +17 -0
  272. lightly_studio/resolvers/sample_resolver/count_by_dataset_id.py +16 -0
  273. lightly_studio/resolvers/sample_resolver/create.py +16 -0
  274. lightly_studio/resolvers/sample_resolver/create_many.py +25 -0
  275. lightly_studio/resolvers/sample_resolver/get_by_id.py +14 -0
  276. lightly_studio/resolvers/sample_resolver/get_filtered_samples.py +56 -0
  277. lightly_studio/resolvers/sample_resolver/get_many_by_id.py +22 -0
  278. lightly_studio/resolvers/sample_resolver/sample_filter.py +74 -0
  279. lightly_studio/resolvers/settings_resolver.py +62 -0
  280. lightly_studio/resolvers/tag_resolver.py +299 -0
  281. lightly_studio/resolvers/twodim_embedding_resolver.py +119 -0
  282. lightly_studio/resolvers/video_frame_resolver/__init__.py +23 -0
  283. lightly_studio/resolvers/video_frame_resolver/count_video_frames_annotations.py +83 -0
  284. lightly_studio/resolvers/video_frame_resolver/create_many.py +57 -0
  285. lightly_studio/resolvers/video_frame_resolver/get_all_by_dataset_id.py +63 -0
  286. lightly_studio/resolvers/video_frame_resolver/get_by_id.py +13 -0
  287. lightly_studio/resolvers/video_frame_resolver/get_table_fields_bounds.py +44 -0
  288. lightly_studio/resolvers/video_frame_resolver/video_frame_annotations_counter_filter.py +47 -0
  289. lightly_studio/resolvers/video_frame_resolver/video_frame_filter.py +57 -0
  290. lightly_studio/resolvers/video_resolver/__init__.py +27 -0
  291. lightly_studio/resolvers/video_resolver/count_video_frame_annotations_by_video_dataset.py +86 -0
  292. lightly_studio/resolvers/video_resolver/create_many.py +58 -0
  293. lightly_studio/resolvers/video_resolver/filter_new_paths.py +33 -0
  294. lightly_studio/resolvers/video_resolver/get_all_by_dataset_id.py +181 -0
  295. lightly_studio/resolvers/video_resolver/get_by_id.py +22 -0
  296. lightly_studio/resolvers/video_resolver/get_table_fields_bounds.py +72 -0
  297. lightly_studio/resolvers/video_resolver/get_view_by_id.py +52 -0
  298. lightly_studio/resolvers/video_resolver/video_count_annotations_filter.py +50 -0
  299. lightly_studio/resolvers/video_resolver/video_filter.py +98 -0
  300. lightly_studio/selection/__init__.py +1 -0
  301. lightly_studio/selection/mundig.py +143 -0
  302. lightly_studio/selection/select.py +203 -0
  303. lightly_studio/selection/select_via_db.py +273 -0
  304. lightly_studio/selection/selection_config.py +49 -0
  305. lightly_studio/services/annotations_service/__init__.py +33 -0
  306. lightly_studio/services/annotations_service/create_annotation.py +64 -0
  307. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  308. lightly_studio/services/annotations_service/get_annotation_by_id.py +31 -0
  309. lightly_studio/services/annotations_service/update_annotation.py +54 -0
  310. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  311. lightly_studio/services/annotations_service/update_annotation_label.py +48 -0
  312. lightly_studio/services/annotations_service/update_annotations.py +29 -0
  313. lightly_studio/setup_logging.py +59 -0
  314. lightly_studio/type_definitions.py +31 -0
  315. lightly_studio/utils/__init__.py +3 -0
  316. lightly_studio/utils/download.py +94 -0
  317. lightly_studio/vendor/__init__.py +1 -0
  318. lightly_studio/vendor/mobileclip/ACKNOWLEDGEMENTS +422 -0
  319. lightly_studio/vendor/mobileclip/LICENSE +31 -0
  320. lightly_studio/vendor/mobileclip/LICENSE_weights_data +50 -0
  321. lightly_studio/vendor/mobileclip/README.md +5 -0
  322. lightly_studio/vendor/mobileclip/__init__.py +96 -0
  323. lightly_studio/vendor/mobileclip/clip.py +77 -0
  324. lightly_studio/vendor/mobileclip/configs/mobileclip_b.json +18 -0
  325. lightly_studio/vendor/mobileclip/configs/mobileclip_s0.json +18 -0
  326. lightly_studio/vendor/mobileclip/configs/mobileclip_s1.json +18 -0
  327. lightly_studio/vendor/mobileclip/configs/mobileclip_s2.json +18 -0
  328. lightly_studio/vendor/mobileclip/image_encoder.py +67 -0
  329. lightly_studio/vendor/mobileclip/logger.py +154 -0
  330. lightly_studio/vendor/mobileclip/models/__init__.py +10 -0
  331. lightly_studio/vendor/mobileclip/models/mci.py +933 -0
  332. lightly_studio/vendor/mobileclip/models/vit.py +433 -0
  333. lightly_studio/vendor/mobileclip/modules/__init__.py +4 -0
  334. lightly_studio/vendor/mobileclip/modules/common/__init__.py +4 -0
  335. lightly_studio/vendor/mobileclip/modules/common/mobileone.py +341 -0
  336. lightly_studio/vendor/mobileclip/modules/common/transformer.py +451 -0
  337. lightly_studio/vendor/mobileclip/modules/image/__init__.py +4 -0
  338. lightly_studio/vendor/mobileclip/modules/image/image_projection.py +113 -0
  339. lightly_studio/vendor/mobileclip/modules/image/replknet.py +188 -0
  340. lightly_studio/vendor/mobileclip/modules/text/__init__.py +4 -0
  341. lightly_studio/vendor/mobileclip/modules/text/repmixer.py +281 -0
  342. lightly_studio/vendor/mobileclip/modules/text/tokenizer.py +38 -0
  343. lightly_studio/vendor/mobileclip/text_encoder.py +245 -0
  344. lightly_studio/vendor/perception_encoder/LICENSE.PE +201 -0
  345. lightly_studio/vendor/perception_encoder/README.md +11 -0
  346. lightly_studio/vendor/perception_encoder/vision_encoder/__init__.py +0 -0
  347. lightly_studio/vendor/perception_encoder/vision_encoder/bpe_simple_vocab_16e6.txt.gz +0 -0
  348. lightly_studio/vendor/perception_encoder/vision_encoder/config.py +205 -0
  349. lightly_studio/vendor/perception_encoder/vision_encoder/config_src.py +264 -0
  350. lightly_studio/vendor/perception_encoder/vision_encoder/pe.py +766 -0
  351. lightly_studio/vendor/perception_encoder/vision_encoder/rope.py +352 -0
  352. lightly_studio/vendor/perception_encoder/vision_encoder/tokenizer.py +347 -0
  353. lightly_studio/vendor/perception_encoder/vision_encoder/transforms.py +36 -0
  354. lightly_studio-0.4.6.dist-info/METADATA +88 -0
  355. lightly_studio-0.4.6.dist-info/RECORD +356 -0
  356. lightly_studio-0.4.6.dist-info/WHEEL +4 -0
@@ -0,0 +1,780 @@
1
+ """LightlyStudio Dataset."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from pathlib import Path
7
+ from typing import Generic, Iterable, Iterator
8
+ from uuid import UUID
9
+
10
+ import yaml
11
+ from labelformat.formats import (
12
+ COCOInstanceSegmentationInput,
13
+ COCOObjectDetectionInput,
14
+ YOLOv8ObjectDetectionInput,
15
+ )
16
+ from labelformat.model.instance_segmentation import (
17
+ InstanceSegmentationInput,
18
+ )
19
+ from labelformat.model.object_detection import (
20
+ ObjectDetectionInput,
21
+ )
22
+ from sqlmodel import Session, select
23
+ from typing_extensions import TypeVar
24
+
25
+ from lightly_studio import db_manager
26
+ from lightly_studio.api import features
27
+ from lightly_studio.core import add_samples, add_videos
28
+ from lightly_studio.core.add_videos import VIDEO_EXTENSIONS
29
+ from lightly_studio.core.dataset_query.dataset_query import DatasetQuery
30
+ from lightly_studio.core.dataset_query.match_expression import MatchExpression
31
+ from lightly_studio.core.dataset_query.order_by import OrderByExpression
32
+ from lightly_studio.core.image_sample import ImageSample
33
+ from lightly_studio.core.sample import Sample
34
+ from lightly_studio.dataset import fsspec_lister
35
+ from lightly_studio.dataset.embedding_manager import EmbeddingManagerProvider
36
+ from lightly_studio.metadata import compute_similarity, compute_typicality
37
+ from lightly_studio.models.annotation.annotation_base import (
38
+ AnnotationType,
39
+ )
40
+ from lightly_studio.models.dataset import DatasetCreate, DatasetTable, SampleType
41
+ from lightly_studio.models.image import ImageTable
42
+ from lightly_studio.models.sample import SampleTable
43
+ from lightly_studio.resolvers import (
44
+ dataset_resolver,
45
+ embedding_model_resolver,
46
+ image_resolver,
47
+ sample_embedding_resolver,
48
+ tag_resolver,
49
+ )
50
+ from lightly_studio.type_definitions import PathLike
51
+
52
+ logger = logging.getLogger(__name__)
53
+
54
+ # Constants
55
+ DEFAULT_DATASET_NAME = "default_dataset"
56
+ ALLOWED_YOLO_SPLITS = {"train", "val", "test", "minival"}
57
+
58
+ _SliceType = slice # to avoid shadowing built-in slice in type annotations
59
+
60
+
61
+ T = TypeVar("T", default=ImageSample, bound=Sample)
62
+
63
+
64
+ class Dataset(Generic[T]):
65
+ """A LightlyStudio Dataset.
66
+
67
+ It can be created or loaded using one of the static methods:
68
+ ```python
69
+ dataset = Dataset.create()
70
+ dataset = Dataset.load()
71
+ dataset = Dataset.load_or_create()
72
+ ```
73
+
74
+ Samples can be added to the dataset using various methods:
75
+ ```python
76
+ dataset.add_images_from_path(...)
77
+ dataset.add_samples_from_yolo(...)
78
+ dataset.add_samples_from_coco(...)
79
+ dataset.add_samples_from_coco_caption(...)
80
+ dataset.add_samples_from_labelformat(...)
81
+ dataset.add_videos_from_path(...)
82
+ ```
83
+
84
+ The dataset samples can be queried directly by iterating over it or slicing it:
85
+ ```python
86
+ dataset = Dataset.load("my_dataset")
87
+ first_ten_samples = dataset[:10]
88
+ for sample in dataset:
89
+ print(sample.file_name)
90
+ sample.metadata["new_key"] = "new_value"
91
+ ```
92
+
93
+ For filtering or ordering samples first, use the query interface:
94
+ ```python
95
+ from lightly_studio.core.dataset_query.sample_field import SampleField
96
+
97
+ dataset = Dataset.load("my_dataset")
98
+ query = dataset.match(SampleField.width > 10).order_by(SampleField.file_name)
99
+ for sample in query:
100
+ ...
101
+ ```
102
+ """
103
+
104
+ def __init__(self, dataset: DatasetTable) -> None:
105
+ """Initialize a LightlyStudio Dataset."""
106
+ self._inner = dataset
107
+ # TODO(Michal, 09/2025): Do not store the session. Instead, use the
108
+ # dataset object session.
109
+ self.session = db_manager.persistent_session()
110
+
111
+ @staticmethod
112
+ def create(name: str | None = None, sample_type: SampleType = SampleType.IMAGE) -> Dataset:
113
+ """Create a new dataset.
114
+
115
+ Args:
116
+ name: The name of the dataset. If None, a default name is used.
117
+ sample_type: The type of samples in the dataset. Defaults to SampleType.IMAGE.
118
+ """
119
+ if name is None:
120
+ name = DEFAULT_DATASET_NAME
121
+
122
+ dataset = dataset_resolver.create(
123
+ session=db_manager.persistent_session(),
124
+ dataset=DatasetCreate(name=name, sample_type=sample_type),
125
+ )
126
+ return Dataset(dataset=dataset)
127
+
128
+ @staticmethod
129
+ def load(name: str | None = None) -> Dataset:
130
+ """Load an existing dataset."""
131
+ if name is None:
132
+ name = "default_dataset"
133
+
134
+ dataset = dataset_resolver.get_by_name(session=db_manager.persistent_session(), name=name)
135
+ if dataset is None:
136
+ raise ValueError(f"Dataset with name '{name}' not found.")
137
+ # If we have embeddings in the database enable the FSC and embedding search features.
138
+ _enable_embedding_features_if_available(
139
+ session=db_manager.persistent_session(), dataset_id=dataset.dataset_id
140
+ )
141
+ return Dataset(dataset=dataset)
142
+
143
+ @staticmethod
144
+ def load_or_create(
145
+ name: str | None = None, sample_type: SampleType = SampleType.IMAGE
146
+ ) -> Dataset:
147
+ """Create a new dataset or load an existing one.
148
+
149
+ Args:
150
+ name: The name of the dataset. If None, a default name is used.
151
+ sample_type: The type of samples in the dataset. Defaults to SampleType.IMAGE.
152
+ """
153
+ if name is None:
154
+ name = "default_dataset"
155
+
156
+ dataset = dataset_resolver.get_by_name(session=db_manager.persistent_session(), name=name)
157
+ if dataset is None:
158
+ return Dataset.create(name=name, sample_type=sample_type)
159
+
160
+ # Dataset exists, verify the sample type matches.
161
+ if dataset.sample_type != sample_type:
162
+ raise ValueError(
163
+ f"Dataset with name '{name}' already exists with sample type "
164
+ f"'{dataset.sample_type.value}', but '{sample_type.value}' was requested."
165
+ )
166
+
167
+ # If we have embeddings in the database enable the FSC and embedding search features.
168
+ _enable_embedding_features_if_available(
169
+ session=db_manager.persistent_session(), dataset_id=dataset.dataset_id
170
+ )
171
+ return Dataset(dataset=dataset)
172
+
173
+ # TODO(lukas 12/2025): return `Iterator[T]` instead
174
+ def __iter__(self) -> Iterator[ImageSample]:
175
+ """Iterate over samples in the dataset."""
176
+ for sample in self.session.exec(
177
+ select(ImageTable)
178
+ .join(ImageTable.sample)
179
+ .where(SampleTable.dataset_id == self.dataset_id)
180
+ ):
181
+ yield ImageSample(inner=sample)
182
+
183
+ def get_sample(self, sample_id: UUID) -> ImageSample:
184
+ """Get a single sample from the dataset by its ID.
185
+
186
+ Args:
187
+ sample_id: The UUID of the sample to retrieve.
188
+
189
+ Returns:
190
+ A single ImageTable object.
191
+
192
+ Raises:
193
+ IndexError: If no sample is found with the given sample_id.
194
+ """
195
+ sample = image_resolver.get_by_id(self.session, sample_id=sample_id)
196
+
197
+ if sample is None:
198
+ raise IndexError(f"No sample found for sample_id: {sample_id}")
199
+ return ImageSample(inner=sample)
200
+
201
+ @property
202
+ def dataset_id(self) -> UUID:
203
+ """Get the dataset ID."""
204
+ return self._inner.dataset_id
205
+
206
+ @property
207
+ def name(self) -> str:
208
+ """Get the dataset name."""
209
+ return self._inner.name
210
+
211
+ def query(self) -> DatasetQuery:
212
+ """Create a DatasetQuery for this dataset.
213
+
214
+ Returns:
215
+ A DatasetQuery instance for querying samples in this dataset.
216
+ """
217
+ return DatasetQuery(dataset=self._inner, session=self.session)
218
+
219
+ def match(self, match_expression: MatchExpression) -> DatasetQuery:
220
+ """Create a query on the dataset and store a field condition for filtering.
221
+
222
+ Args:
223
+ match_expression: Defines the filter.
224
+
225
+ Returns:
226
+ DatasetQuery for method chaining.
227
+ """
228
+ return self.query().match(match_expression)
229
+
230
+ def order_by(self, *order_by: OrderByExpression) -> DatasetQuery:
231
+ """Create a query on the dataset and store ordering expressions.
232
+
233
+ Args:
234
+ order_by: One or more ordering expressions. They are applied in order.
235
+ E.g. first ordering by sample width and then by sample file_name will
236
+ only order the samples with the same sample width by file_name.
237
+
238
+ Returns:
239
+ DatasetQuery for method chaining.
240
+ """
241
+ return self.query().order_by(*order_by)
242
+
243
+ def slice(self, offset: int = 0, limit: int | None = None) -> DatasetQuery:
244
+ """Create a query on the dataset and apply offset and limit to results.
245
+
246
+ Args:
247
+ offset: Number of items to skip from beginning (default: 0).
248
+ limit: Maximum number of items to return (None = no limit).
249
+
250
+ Returns:
251
+ DatasetQuery for method chaining.
252
+ """
253
+ return self.query().slice(offset, limit)
254
+
255
+ def __getitem__(self, key: _SliceType) -> DatasetQuery:
256
+ """Create a query on the dataset and enable bracket notation for slicing.
257
+
258
+ Args:
259
+ key: A slice object (e.g., [10:20], [:50], [100:]).
260
+
261
+ Returns:
262
+ DatasetQuery with slice applied.
263
+
264
+ Raises:
265
+ TypeError: If key is not a slice object.
266
+ ValueError: If slice contains unsupported features or conflicts with existing slice.
267
+ """
268
+ return self.query()[key]
269
+
270
+ def add_videos_from_path(
271
+ self,
272
+ path: PathLike,
273
+ allowed_extensions: Iterable[str] | None = None,
274
+ num_decode_threads: int | None = None,
275
+ embed: bool = True,
276
+ ) -> None:
277
+ """Adding video frames from the specified path to the dataset.
278
+
279
+ Args:
280
+ path: Path to the folder containing the videos to add.
281
+ allowed_extensions: An iterable container of allowed video file
282
+ extensions in lowercase, including the leading dot. If None,
283
+ uses default VIDEO_EXTENSIONS.
284
+ num_decode_threads: Optional override for the number of FFmpeg decode threads.
285
+ If omitted, the available CPU cores - 1 (max 16) are used.
286
+ embed: If True, generate embeddings for the newly added videos.
287
+ """
288
+ # Collect video file paths.
289
+ if allowed_extensions:
290
+ allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
291
+ else:
292
+ allowed_extensions_set = VIDEO_EXTENSIONS
293
+ video_paths = list(
294
+ fsspec_lister.iter_files_from_path(
295
+ path=str(path), allowed_extensions=allowed_extensions_set
296
+ )
297
+ )
298
+ logger.info(f"Found {len(video_paths)} videos in {path}.")
299
+
300
+ # Process videos.
301
+ created_sample_ids, _ = add_videos.load_into_dataset_from_paths(
302
+ session=self.session,
303
+ dataset_id=self.dataset_id,
304
+ video_paths=video_paths,
305
+ num_decode_threads=num_decode_threads,
306
+ )
307
+
308
+ if embed:
309
+ _generate_embeddings_video(
310
+ session=self.session,
311
+ dataset_id=self.dataset_id,
312
+ sample_ids=created_sample_ids,
313
+ )
314
+
315
+ def add_images_from_path(
316
+ self,
317
+ path: PathLike,
318
+ allowed_extensions: Iterable[str] | None = None,
319
+ embed: bool = True,
320
+ tag_depth: int = 0,
321
+ ) -> None:
322
+ """Adding images from the specified path to the dataset.
323
+
324
+ Args:
325
+ path: Path to the folder containing the images to add.
326
+ allowed_extensions: An iterable container of allowed image file
327
+ extensions.
328
+ embed: If True, generate embeddings for the newly added images.
329
+ tag_depth: Defines the tagging behavior based on directory depth.
330
+ - `tag_depth=0` (default): No automatic tagging is performed.
331
+ - `tag_depth=1`: Automatically creates a tag for each
332
+ image based on its parent directory's name.
333
+
334
+ Raises:
335
+ NotImplementedError: If tag_depth > 1.
336
+ """
337
+ # Collect image file paths.
338
+ if allowed_extensions:
339
+ allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
340
+ else:
341
+ allowed_extensions_set = None
342
+ image_paths = list(
343
+ fsspec_lister.iter_files_from_path(
344
+ path=str(path), allowed_extensions=allowed_extensions_set
345
+ )
346
+ )
347
+
348
+ logger.info(f"Found {len(image_paths)} images in {path}.")
349
+
350
+ # Process images
351
+ created_sample_ids = add_samples.load_into_dataset_from_paths(
352
+ session=self.session,
353
+ dataset_id=self.dataset_id,
354
+ image_paths=image_paths,
355
+ )
356
+
357
+ if created_sample_ids:
358
+ add_samples.tag_samples_by_directory(
359
+ session=self.session,
360
+ dataset_id=self.dataset_id,
361
+ input_path=path,
362
+ sample_ids=created_sample_ids,
363
+ tag_depth=tag_depth,
364
+ )
365
+
366
+ if embed:
367
+ _generate_embeddings_image(
368
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
369
+ )
370
+
371
+ def add_samples_from_labelformat(
372
+ self,
373
+ input_labels: ObjectDetectionInput | InstanceSegmentationInput,
374
+ images_path: PathLike,
375
+ embed: bool = True,
376
+ ) -> None:
377
+ """Load a dataset from a labelformat object and store in database.
378
+
379
+ Args:
380
+ input_labels: The labelformat input object.
381
+ images_path: Path to the folder containing the images.
382
+ embed: If True, generate embeddings for the newly added samples.
383
+ """
384
+ if isinstance(images_path, str):
385
+ images_path = Path(images_path)
386
+ images_path = images_path.absolute()
387
+
388
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
389
+ session=self.session,
390
+ dataset_id=self.dataset_id,
391
+ input_labels=input_labels,
392
+ images_path=images_path,
393
+ )
394
+
395
+ if embed:
396
+ _generate_embeddings_image(
397
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
398
+ )
399
+
400
+ def add_samples_from_yolo(
401
+ self,
402
+ data_yaml: PathLike,
403
+ input_split: str | None = None,
404
+ embed: bool = True,
405
+ ) -> None:
406
+ """Load a dataset in YOLO format and store in DB.
407
+
408
+ Args:
409
+ data_yaml: Path to the YOLO data.yaml file.
410
+ input_split: The split to load (e.g., 'train', 'val', 'test').
411
+ If None, all available splits will be loaded and assigned a corresponding tag.
412
+ embed: If True, generate embeddings for the newly added samples.
413
+ """
414
+ if isinstance(data_yaml, str):
415
+ data_yaml = Path(data_yaml)
416
+ data_yaml = data_yaml.absolute()
417
+
418
+ if not data_yaml.is_file() or data_yaml.suffix != ".yaml":
419
+ raise FileNotFoundError(f"YOLO data yaml file not found: '{data_yaml}'")
420
+
421
+ # Determine which splits to process
422
+ splits_to_process = _resolve_yolo_splits(data_yaml=data_yaml, input_split=input_split)
423
+
424
+ all_created_sample_ids = []
425
+
426
+ # Process each split
427
+ for split in splits_to_process:
428
+ # Load the dataset using labelformat.
429
+ label_input = YOLOv8ObjectDetectionInput(
430
+ input_file=data_yaml,
431
+ input_split=split,
432
+ )
433
+ images_path = label_input._images_dir() # noqa: SLF001
434
+
435
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
436
+ session=self.session,
437
+ dataset_id=self.dataset_id,
438
+ input_labels=label_input,
439
+ images_path=images_path,
440
+ )
441
+
442
+ # Tag samples with split name
443
+ if created_sample_ids:
444
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
445
+ session=self.session,
446
+ dataset_id=self.dataset_id,
447
+ tag_name=split,
448
+ )
449
+ tag_resolver.add_sample_ids_to_tag_id(
450
+ session=self.session,
451
+ tag_id=tag.tag_id,
452
+ sample_ids=created_sample_ids,
453
+ )
454
+
455
+ all_created_sample_ids.extend(created_sample_ids)
456
+
457
+ # Generate embeddings for all samples at once
458
+ if embed:
459
+ _generate_embeddings_image(
460
+ session=self.session, dataset_id=self.dataset_id, sample_ids=all_created_sample_ids
461
+ )
462
+
463
+ def add_samples_from_coco(
464
+ self,
465
+ annotations_json: PathLike,
466
+ images_path: PathLike,
467
+ annotation_type: AnnotationType = AnnotationType.OBJECT_DETECTION,
468
+ split: str | None = None,
469
+ embed: bool = True,
470
+ ) -> None:
471
+ """Load a dataset in COCO Object Detection format and store in DB.
472
+
473
+ Args:
474
+ annotations_json: Path to the COCO annotations JSON file.
475
+ images_path: Path to the folder containing the images.
476
+ annotation_type: The type of annotation to be loaded (e.g., 'ObjectDetection',
477
+ 'InstanceSegmentation').
478
+ split: Optional split name to tag samples (e.g., 'train', 'val').
479
+ If provided, all samples will be tagged with this name.
480
+ embed: If True, generate embeddings for the newly added samples.
481
+ """
482
+ if isinstance(annotations_json, str):
483
+ annotations_json = Path(annotations_json)
484
+ annotations_json = annotations_json.absolute()
485
+
486
+ if not annotations_json.is_file() or annotations_json.suffix != ".json":
487
+ raise FileNotFoundError(f"COCO annotations json file not found: '{annotations_json}'")
488
+
489
+ label_input: COCOObjectDetectionInput | COCOInstanceSegmentationInput
490
+
491
+ if annotation_type == AnnotationType.OBJECT_DETECTION:
492
+ label_input = COCOObjectDetectionInput(
493
+ input_file=annotations_json,
494
+ )
495
+ elif annotation_type == AnnotationType.INSTANCE_SEGMENTATION:
496
+ label_input = COCOInstanceSegmentationInput(
497
+ input_file=annotations_json,
498
+ )
499
+ else:
500
+ raise ValueError(f"Invalid annotation type: {annotation_type}")
501
+
502
+ images_path = Path(images_path).absolute()
503
+
504
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
505
+ session=self.session,
506
+ dataset_id=self.dataset_id,
507
+ input_labels=label_input,
508
+ images_path=images_path,
509
+ )
510
+
511
+ # Tag samples with split name if provided
512
+ if split is not None and created_sample_ids:
513
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
514
+ session=self.session,
515
+ dataset_id=self.dataset_id,
516
+ tag_name=split,
517
+ )
518
+ tag_resolver.add_sample_ids_to_tag_id(
519
+ session=self.session,
520
+ tag_id=tag.tag_id,
521
+ sample_ids=created_sample_ids,
522
+ )
523
+
524
+ if embed:
525
+ _generate_embeddings_image(
526
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
527
+ )
528
+
529
+ def add_samples_from_coco_caption(
530
+ self,
531
+ annotations_json: PathLike,
532
+ images_path: PathLike,
533
+ split: str | None = None,
534
+ embed: bool = True,
535
+ ) -> None:
536
+ """Load a dataset in COCO caption format and store in DB.
537
+
538
+ Args:
539
+ annotations_json: Path to the COCO caption JSON file.
540
+ images_path: Path to the folder containing the images.
541
+ split: Optional split name to tag samples (e.g., 'train', 'val').
542
+ If provided, all samples will be tagged with this name.
543
+ embed: If True, generate embeddings for the newly added samples.
544
+ """
545
+ if isinstance(annotations_json, str):
546
+ annotations_json = Path(annotations_json)
547
+ annotations_json = annotations_json.absolute()
548
+
549
+ if not annotations_json.is_file() or annotations_json.suffix != ".json":
550
+ raise FileNotFoundError(f"COCO caption json file not found: '{annotations_json}'")
551
+
552
+ if isinstance(images_path, str):
553
+ images_path = Path(images_path)
554
+ images_path = images_path.absolute()
555
+
556
+ created_sample_ids = add_samples.load_into_dataset_from_coco_captions(
557
+ session=self.session,
558
+ dataset_id=self.dataset_id,
559
+ annotations_json=annotations_json,
560
+ images_path=images_path,
561
+ )
562
+
563
+ # Tag samples with split name if provided
564
+ if split is not None and created_sample_ids:
565
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
566
+ session=self.session,
567
+ dataset_id=self.dataset_id,
568
+ tag_name=split,
569
+ )
570
+ tag_resolver.add_sample_ids_to_tag_id(
571
+ session=self.session,
572
+ tag_id=tag.tag_id,
573
+ sample_ids=created_sample_ids,
574
+ )
575
+
576
+ if embed:
577
+ _generate_embeddings_image(
578
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
579
+ )
580
+
581
+ def compute_typicality_metadata(
582
+ self,
583
+ embedding_model_name: str | None = None,
584
+ metadata_name: str = "typicality",
585
+ ) -> None:
586
+ """Computes typicality from embeddings, for K nearest neighbors.
587
+
588
+ Args:
589
+ embedding_model_name:
590
+ The name of the embedding model to use. If not given, the default
591
+ embedding model is used.
592
+ metadata_name:
593
+ The name of the metadata to store the typicality values in. If not give, the default
594
+ name "typicality" is used.
595
+ """
596
+ embedding_model_id = embedding_model_resolver.get_by_name(
597
+ session=self.session,
598
+ dataset_id=self.dataset_id,
599
+ embedding_model_name=embedding_model_name,
600
+ ).embedding_model_id
601
+ compute_typicality.compute_typicality_metadata(
602
+ session=self.session,
603
+ dataset_id=self.dataset_id,
604
+ embedding_model_id=embedding_model_id,
605
+ metadata_name=metadata_name,
606
+ )
607
+
608
+ def compute_similarity_metadata(
609
+ self,
610
+ query_tag_name: str,
611
+ embedding_model_name: str | None = None,
612
+ metadata_name: str | None = None,
613
+ ) -> str:
614
+ """Computes similarity with respect to a query tag.
615
+
616
+ Args:
617
+ query_tag_name:
618
+ The name of the tag to use for the query.
619
+ embedding_model_name:
620
+ The name of the embedding model to use. If not given, the default
621
+ embedding model is used.
622
+ metadata_name:
623
+ The name of the metadata to store the similarity values in.
624
+ If not given, a name is generated automatically.
625
+
626
+ Returns:
627
+ The name of the metadata storing the similarity values.
628
+ """
629
+ embedding_model_id = embedding_model_resolver.get_by_name(
630
+ session=self.session,
631
+ dataset_id=self.dataset_id,
632
+ embedding_model_name=embedding_model_name,
633
+ ).embedding_model_id
634
+ query_tag = tag_resolver.get_by_name(
635
+ session=self.session, tag_name=query_tag_name, dataset_id=self.dataset_id
636
+ )
637
+ if query_tag is None:
638
+ raise ValueError("Query tag not found")
639
+ return compute_similarity.compute_similarity_metadata(
640
+ session=self.session,
641
+ key_dataset_id=self.dataset_id,
642
+ embedding_model_id=embedding_model_id,
643
+ query_tag_id=query_tag.tag_id,
644
+ metadata_name=metadata_name,
645
+ )
646
+
647
+
648
+ def _generate_embeddings_video(
649
+ session: Session,
650
+ dataset_id: UUID,
651
+ sample_ids: list[UUID],
652
+ ) -> None:
653
+ """Generate and store embeddings for samples.
654
+
655
+ Args:
656
+ session: Database session for resolver operations.
657
+ dataset_id: The ID of the dataset to associate with the embedding model.
658
+ sample_ids: List of sample IDs to generate embeddings for.
659
+ """
660
+ if not sample_ids:
661
+ return
662
+
663
+ embedding_manager = EmbeddingManagerProvider.get_embedding_manager()
664
+ model_id = embedding_manager.load_or_get_default_model(session=session, dataset_id=dataset_id)
665
+ if model_id is None:
666
+ logger.warning("No embedding model loaded. Skipping embedding generation.")
667
+ return
668
+
669
+ embedding_manager.embed_videos(
670
+ session=session,
671
+ dataset_id=dataset_id,
672
+ sample_ids=sample_ids,
673
+ embedding_model_id=model_id,
674
+ )
675
+
676
+ _mark_embedding_features_enabled()
677
+
678
+
679
+ def _generate_embeddings_image(
680
+ session: Session,
681
+ dataset_id: UUID,
682
+ sample_ids: list[UUID],
683
+ ) -> None:
684
+ """Generate and store embeddings for samples.
685
+
686
+ Args:
687
+ session: Database session for resolver operations.
688
+ dataset_id: The ID of the dataset to associate with the embedding model.
689
+ sample_ids: List of sample IDs to generate embeddings for.
690
+ sample_type: The sample_type to generate embeddings for.
691
+ """
692
+ if not sample_ids:
693
+ return
694
+
695
+ embedding_manager = EmbeddingManagerProvider.get_embedding_manager()
696
+ model_id = embedding_manager.load_or_get_default_model(session=session, dataset_id=dataset_id)
697
+ if model_id is None:
698
+ logger.warning("No embedding model loaded. Skipping embedding generation.")
699
+ return
700
+
701
+ embedding_manager.embed_images(
702
+ session=session,
703
+ dataset_id=dataset_id,
704
+ sample_ids=sample_ids,
705
+ embedding_model_id=model_id,
706
+ )
707
+
708
+ _mark_embedding_features_enabled()
709
+
710
+
711
+ def _mark_embedding_features_enabled() -> None:
712
+ # Mark the embedding search feature as enabled.
713
+ if "embeddingSearchEnabled" not in features.lightly_studio_active_features:
714
+ features.lightly_studio_active_features.append("embeddingSearchEnabled")
715
+ # Mark the FSC feature as enabled.
716
+ if "fewShotClassifierEnabled" not in features.lightly_studio_active_features:
717
+ features.lightly_studio_active_features.append("fewShotClassifierEnabled")
718
+
719
+
720
+ def _resolve_yolo_splits(data_yaml: Path, input_split: str | None) -> list[str]:
721
+ """Determine which YOLO splits to process for the given config."""
722
+ if input_split is not None:
723
+ if input_split not in ALLOWED_YOLO_SPLITS:
724
+ raise ValueError(
725
+ f"Split '{input_split}' not found in config file '{data_yaml}'. "
726
+ f"Allowed splits: {sorted(ALLOWED_YOLO_SPLITS)}"
727
+ )
728
+ return [input_split]
729
+
730
+ with data_yaml.open() as f:
731
+ config = yaml.safe_load(f)
732
+
733
+ config_keys = config.keys() if isinstance(config, dict) else []
734
+ splits = [key for key in config_keys if key in ALLOWED_YOLO_SPLITS]
735
+ if not splits:
736
+ raise ValueError(f"No splits found in config file '{data_yaml}'")
737
+ return splits
738
+
739
+
740
+ def _are_embeddings_available(session: Session, dataset_id: UUID) -> bool:
741
+ """Check if there are any embeddings available for the given dataset.
742
+
743
+ Args:
744
+ session: Database session for resolver operations.
745
+ dataset_id: The ID of the dataset to check for embeddings.
746
+
747
+ Returns:
748
+ True if embeddings exist for the dataset, False otherwise.
749
+ """
750
+ embedding_manager = EmbeddingManagerProvider.get_embedding_manager()
751
+ model_id = embedding_manager.load_or_get_default_model(
752
+ session=session,
753
+ dataset_id=dataset_id,
754
+ )
755
+ if model_id is None:
756
+ # No default embedding model loaded for this dataset.
757
+ return False
758
+
759
+ return (
760
+ len(
761
+ sample_embedding_resolver.get_all_by_dataset_id(
762
+ session=session, dataset_id=dataset_id, embedding_model_id=model_id
763
+ )
764
+ )
765
+ > 0
766
+ )
767
+
768
+
769
+ def _enable_embedding_features_if_available(session: Session, dataset_id: UUID) -> None:
770
+ """Enable embedding-related features if embeddings are available in the DB.
771
+
772
+ Args:
773
+ session: Database session for resolver operations.
774
+ dataset_id: The ID of the dataset to check for embeddings.
775
+ """
776
+ if _are_embeddings_available(session=session, dataset_id=dataset_id):
777
+ if "embeddingSearchEnabled" not in features.lightly_studio_active_features:
778
+ features.lightly_studio_active_features.append("embeddingSearchEnabled")
779
+ if "fewShotClassifierEnabled" not in features.lightly_studio_active_features:
780
+ features.lightly_studio_active_features.append("fewShotClassifierEnabled")