lightly-studio 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (133) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +1 -1
  3. lightly_studio/api/routes/api/annotation.py +6 -16
  4. lightly_studio/api/routes/api/annotation_label.py +2 -5
  5. lightly_studio/api/routes/api/annotation_task.py +4 -5
  6. lightly_studio/api/routes/api/classifier.py +2 -5
  7. lightly_studio/api/routes/api/dataset.py +2 -3
  8. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  9. lightly_studio/api/routes/api/metadata.py +2 -4
  10. lightly_studio/api/routes/api/metrics.py +2 -6
  11. lightly_studio/api/routes/api/sample.py +5 -13
  12. lightly_studio/api/routes/api/settings.py +2 -6
  13. lightly_studio/api/routes/images.py +6 -6
  14. lightly_studio/core/add_samples.py +383 -0
  15. lightly_studio/core/dataset.py +250 -362
  16. lightly_studio/core/dataset_query/__init__.py +0 -0
  17. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  18. lightly_studio/core/dataset_query/dataset_query.py +211 -0
  19. lightly_studio/core/dataset_query/field.py +113 -0
  20. lightly_studio/core/dataset_query/field_expression.py +79 -0
  21. lightly_studio/core/dataset_query/match_expression.py +23 -0
  22. lightly_studio/core/dataset_query/order_by.py +79 -0
  23. lightly_studio/core/dataset_query/sample_field.py +28 -0
  24. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  25. lightly_studio/core/sample.py +159 -32
  26. lightly_studio/core/start_gui.py +35 -0
  27. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  28. lightly_studio/dataset/embedding_generator.py +2 -3
  29. lightly_studio/dataset/embedding_manager.py +74 -6
  30. lightly_studio/dataset/fsspec_lister.py +275 -0
  31. lightly_studio/dataset/loader.py +49 -30
  32. lightly_studio/dataset/mobileclip_embedding_generator.py +6 -4
  33. lightly_studio/db_manager.py +145 -0
  34. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +1 -0
  35. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +1 -0
  36. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +3 -0
  37. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → 7YNGEs1C.js} +1 -1
  38. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +1 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +92 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BqBqV92V.js} +1 -1
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → C98Hk3r5.js} +1 -1
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{r64xT6ao.js → CG0dMCJi.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Ccq4ZD0B.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +1 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → Crk-jcvV.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → Cur71c3O.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → DRZO-E-T.js} +1 -1
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → DcGCxgpH.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → Df3aMO5B.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{hQVEETDE.js → DkR_EZ_B.js} +1 -1
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +4 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → OH7-C_mc.js} +1 -1
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → gLNdjSzu.js} +1 -1
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +2 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1._I9GR805.js} +1 -1
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +6 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.w9g4AcAx.js} +1 -1
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BBI8KwnD.js} +1 -1
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CrbkRPam.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.CajIG5ce.js} +1 -1
  78. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  80. lightly_studio/examples/example.py +13 -12
  81. lightly_studio/examples/example_coco.py +13 -0
  82. lightly_studio/examples/example_metadata.py +83 -98
  83. lightly_studio/examples/example_selection.py +7 -19
  84. lightly_studio/examples/example_split_work.py +12 -36
  85. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  86. lightly_studio/models/annotation/annotation_base.py +7 -8
  87. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  88. lightly_studio/models/annotation/object_detection.py +4 -4
  89. lightly_studio/models/dataset.py +6 -2
  90. lightly_studio/models/sample.py +10 -3
  91. lightly_studio/resolvers/dataset_resolver.py +10 -0
  92. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  93. lightly_studio/resolvers/sample_resolver.py +53 -9
  94. lightly_studio/resolvers/tag_resolver.py +23 -0
  95. lightly_studio/selection/select.py +55 -46
  96. lightly_studio/selection/select_via_db.py +23 -19
  97. lightly_studio/selection/selection_config.py +6 -3
  98. lightly_studio/services/annotations_service/__init__.py +4 -0
  99. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  100. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  101. lightly_studio-0.3.2.dist-info/METADATA +689 -0
  102. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/RECORD +104 -91
  103. lightly_studio/api/db.py +0 -133
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +0 -4
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  131. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  132. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  133. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/WHEEL +0 -0
@@ -0,0 +1,275 @@
1
+ """File listing utilities using fsspec.
2
+
3
+ Handles local and remote paths, directories, and glob patterns.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from collections.abc import Iterator
10
+ from typing import Any
11
+
12
+ import fsspec
13
+ from tqdm import tqdm
14
+
15
+ # Constants
16
+ PROTOCOL_SEPARATOR = "://"
17
+ DEFAULT_PROTOCOL = "file"
18
+ PATH_SEPARATOR = "/"
19
+
20
+ # Glob pattern characters
21
+ GLOB_CHARS = ["*", "?", "[", "]"]
22
+
23
+ # Cloud storage protocols
24
+ CLOUD_PROTOCOLS = ("s3", "gs", "gcs", "azure", "abfs")
25
+
26
+ # Image file extensions
27
+ IMAGE_EXTENSIONS = {
28
+ ".png",
29
+ ".jpg",
30
+ ".jpeg",
31
+ ".gif",
32
+ ".webp",
33
+ ".bmp",
34
+ ".tiff",
35
+ }
36
+
37
+
38
+ def iter_files_from_path(path: str, allowed_extensions: set[str] | None = None) -> Iterator[str]:
39
+ """List all files from a single path, handling directories, globs, and individual files.
40
+
41
+ Args:
42
+ path: A single path which can be:
43
+ - Individual file path
44
+ - Directory path (will list all files recursively)
45
+ - Glob pattern
46
+ - Remote path (s3://, gcs://, etc.)
47
+ allowed_extensions: Optional set of allowed file extensions (e.g., {".jpg", ".png"}).
48
+ If None, uses default IMAGE_EXTENSIONS.
49
+
50
+ Yields:
51
+ File paths as they are discovered, with progress tracking
52
+ """
53
+ seen: set[str] = set()
54
+ extensions = allowed_extensions or IMAGE_EXTENSIONS
55
+ with tqdm(desc="Discovering files", unit=" files", dynamic_ncols=True) as pbar:
56
+ cleaned_path = str(path).strip()
57
+ if not cleaned_path:
58
+ return
59
+ fs = _get_filesystem(cleaned_path)
60
+ yield from _process_single_path_streaming(fs, cleaned_path, seen, pbar, extensions)
61
+
62
+
63
+ def _process_single_path_streaming(
64
+ fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
65
+ ) -> Iterator[str]:
66
+ """Process a single path and yield matching image files.
67
+
68
+ Handles different path types: individual files, directories, and glob patterns.
69
+
70
+ Args:
71
+ fs: The filesystem instance.
72
+ path: The path to process (file, directory, or glob pattern).
73
+ seen: Set of already processed paths to avoid duplicates.
74
+ pbar: Progress bar instance for tracking progress.
75
+ extensions: Set of allowed file extensions.
76
+
77
+ Yields:
78
+ File paths that match the criteria.
79
+
80
+ Raises:
81
+ ValueError: If the path doesn't exist or is not an image file when expected.
82
+ """
83
+ if _is_glob_pattern(path):
84
+ yield from _process_glob_pattern(fs, path, seen, pbar, extensions)
85
+ elif not fs.exists(path):
86
+ raise ValueError(f"Path does not exist: {path}")
87
+ elif fs.isfile(path):
88
+ if _is_image_file(path, extensions) and path not in seen:
89
+ seen.add(path)
90
+ pbar.update(1)
91
+ yield path
92
+ elif not _is_image_file(path, extensions):
93
+ raise ValueError(f"File is not an image: {path}")
94
+ elif fs.isdir(path):
95
+ for file_path in _stream_files_from_directory(fs, path, extensions):
96
+ if file_path not in seen:
97
+ seen.add(file_path)
98
+ pbar.update(1)
99
+ yield file_path
100
+
101
+
102
+ def _process_glob_pattern(
103
+ fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
104
+ ) -> Iterator[str]:
105
+ """Process glob pattern and yield matching image files.
106
+
107
+ Args:
108
+ fs: The filesystem instance.
109
+ path: The glob pattern path.
110
+ seen: Set of already processed paths to avoid duplicates.
111
+ pbar: Progress bar instance for tracking progress.
112
+ extensions: Set of allowed file extensions.
113
+
114
+ Yields:
115
+ File paths that match the glob pattern and allowed extensions.
116
+ """
117
+ matching_paths = fs.glob(path)
118
+ for p in matching_paths:
119
+ path_str = str(p)
120
+ if _needs_protocol_prefix(path_str, fs):
121
+ protocol = _get_protocol_string(fs)
122
+ path_str = f"{protocol}{PROTOCOL_SEPARATOR}{path_str}"
123
+ if fs.isfile(path_str) and _is_image_file(path_str, extensions) and path_str not in seen:
124
+ seen.add(path_str)
125
+ pbar.update(1)
126
+ yield path_str
127
+
128
+
129
+ def _stream_files_from_directory(
130
+ fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
131
+ ) -> Iterator[str]:
132
+ """Stream files from a directory with progress tracking.
133
+
134
+ Args:
135
+ fs: The filesystem instance
136
+ path: Directory path to list
137
+ extensions: Set of allowed file extensions
138
+
139
+ Yields:
140
+ File paths as they are discovered
141
+ """
142
+ try:
143
+ protocol = _get_protocol_string(fs)
144
+ if protocol in CLOUD_PROTOCOLS:
145
+ yield from _stream_files_using_walk(fs, path, extensions)
146
+ else:
147
+ try:
148
+ all_paths = fs.find(path, detail=False)
149
+ for p in all_paths:
150
+ if fs.isfile(p) and _is_image_file(p, extensions):
151
+ yield p
152
+ except Exception as e:
153
+ logging.warning(f"fs.find() failed for {path}, trying alternative method: {e}")
154
+ yield from _stream_files_using_walk(fs, path, extensions)
155
+ except Exception as e:
156
+ logging.error(f"Error streaming files from '{path}': {e}")
157
+
158
+
159
+ def _stream_files_using_walk(
160
+ fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
161
+ ) -> Iterator[str]:
162
+ """Stream files using fs.walk() method.
163
+
164
+ Args:
165
+ fs: The filesystem instance.
166
+ path: The directory path to walk.
167
+ extensions: Set of allowed file extensions.
168
+
169
+ Yields:
170
+ File paths that match the allowed extensions.
171
+ """
172
+
173
+ def add_protocol_if_needed(p: str) -> str:
174
+ if _needs_protocol_prefix(p, fs):
175
+ protocol = _get_protocol_string(fs)
176
+ return f"{protocol}{PROTOCOL_SEPARATOR}{p}"
177
+ return p
178
+
179
+ for root, _dirs, files in fs.walk(path):
180
+ for file in files:
181
+ if not root.endswith(PATH_SEPARATOR):
182
+ full_path = f"{root}{PATH_SEPARATOR}{file}"
183
+ else:
184
+ full_path = f"{root}{file}"
185
+ full_path = add_protocol_if_needed(full_path)
186
+ if _is_image_file(full_path, extensions):
187
+ yield full_path
188
+
189
+
190
+ def _get_filesystem(path: str) -> fsspec.AbstractFileSystem:
191
+ """Get the appropriate filesystem for the given path.
192
+
193
+ Args:
194
+ path: The path to determine the filesystem for. Can be local or remote.
195
+
196
+ Returns:
197
+ An fsspec filesystem instance appropriate for the path's protocol.
198
+
199
+ Raises:
200
+ ValueError: If the protocol cannot be determined or is invalid.
201
+ """
202
+ protocol = path.split(PROTOCOL_SEPARATOR)[0] if PROTOCOL_SEPARATOR in path else DEFAULT_PROTOCOL
203
+
204
+ # Ensure protocol is a string, not a tuple
205
+ if isinstance(protocol, (list, tuple)):
206
+ protocol = protocol[0]
207
+
208
+ return fsspec.filesystem(protocol)
209
+
210
+
211
+ def _is_glob_pattern(path: str) -> bool:
212
+ """Check if a path contains glob pattern characters.
213
+
214
+ Args:
215
+ path: The path to check for glob patterns.
216
+
217
+ Returns:
218
+ True if the path contains glob pattern characters (*, ?, [, ]), False otherwise.
219
+ """
220
+ return any(char in path for char in GLOB_CHARS)
221
+
222
+
223
+ def _is_image_file(path: str, extensions: set[str]) -> bool:
224
+ """Check if a file is an image based on its extension.
225
+
226
+ Args:
227
+ path: The file path to check.
228
+ extensions: Set of allowed file extensions (e.g., {'.jpg', '.png'}).
229
+
230
+ Returns:
231
+ True if the file has an allowed image extension, False otherwise.
232
+ """
233
+ path_lower = path.lower()
234
+ return any(path_lower.endswith(ext) for ext in extensions)
235
+
236
+
237
+ def _needs_protocol_prefix(path: str, fs: fsspec.AbstractFileSystem) -> bool:
238
+ """Check if a path needs protocol prefix.
239
+
240
+ Args:
241
+ path: The path to check.
242
+ fs: The filesystem instance.
243
+
244
+ Returns:
245
+ True if the path needs a protocol prefix (e.g., for cloud storage),
246
+ False if it is a local path.
247
+ """
248
+ if PROTOCOL_SEPARATOR in path:
249
+ return False
250
+
251
+ if not hasattr(fs, "protocol"):
252
+ return False
253
+
254
+ protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
255
+ # Handle case where protocol is a tuple (common with fsspec)
256
+ if isinstance(protocol, (list, tuple)):
257
+ protocol = protocol[0]
258
+
259
+ return str(protocol) != DEFAULT_PROTOCOL
260
+
261
+
262
+ def _get_protocol_string(fs: fsspec.AbstractFileSystem) -> str:
263
+ """Get the protocol string from filesystem.
264
+
265
+ Args:
266
+ fs: The filesystem instance.
267
+
268
+ Returns:
269
+ The protocol string (e.g., 's3', 'file', 'gcs').
270
+ Returns 'file' as default if protocol cannot be determined.
271
+ """
272
+ protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
273
+ if isinstance(protocol, (list, tuple)):
274
+ return str(protocol[0])
275
+ return str(protocol)
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  from typing import Iterable
9
9
  from uuid import UUID
10
10
 
11
+ import fsspec
11
12
  import PIL
12
13
  from labelformat.formats import (
13
14
  COCOInstanceSegmentationInput,
@@ -29,10 +30,10 @@ from labelformat.model.object_detection import (
29
30
  from sqlmodel import Session
30
31
  from tqdm import tqdm
31
32
 
32
- from lightly_studio.api.db import db_manager
33
+ from lightly_studio import db_manager
33
34
  from lightly_studio.api.features import lightly_studio_active_features
34
35
  from lightly_studio.api.server import Server
35
- from lightly_studio.dataset import env
36
+ from lightly_studio.dataset import env, fsspec_lister
36
37
  from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
37
38
  from lightly_studio.dataset.embedding_manager import (
38
39
  EmbeddingManager,
@@ -324,7 +325,6 @@ class DatasetLoader:
324
325
  self,
325
326
  dataset_name: str,
326
327
  img_dir: str,
327
- recursive: bool = True,
328
328
  allowed_extensions: Iterable[str] = {
329
329
  ".png",
330
330
  ".jpg",
@@ -340,31 +340,22 @@ class DatasetLoader:
340
340
  Args:
341
341
  dataset_name: The name for the new dataset.
342
342
  img_dir: Path to the folder containing the images.
343
- recursive: If True, search for images recursively in subfolders.
344
343
  allowed_extensions: An iterable container of allowed image file
345
344
  extensions.
346
345
  """
347
- img_dir_path = Path(img_dir).absolute()
348
- if not img_dir_path.exists() or not img_dir_path.is_dir():
349
- raise ValueError(f"Input images folder is not a valid directory: {img_dir_path}")
350
-
351
346
  # Create dataset.
352
347
  dataset = dataset_resolver.create(
353
348
  session=self.session,
354
349
  dataset=DatasetCreate(
355
350
  name=dataset_name,
356
- directory=str(img_dir_path),
351
+ directory=img_dir,
357
352
  ),
358
353
  )
359
354
 
360
- # Collect image file paths.
355
+ # Collect image file paths with extension filtering.
361
356
  allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
362
- image_paths = []
363
- path_iter = img_dir_path.rglob("*") if recursive else img_dir_path.glob("*")
364
- for path in path_iter:
365
- if path.is_file() and path.suffix.lower() in allowed_extensions_set:
366
- image_paths.append(path)
367
- print(f"Found {len(image_paths)} images in {img_dir_path}.")
357
+ image_paths = list(fsspec_lister.iter_files_from_path(img_dir, allowed_extensions_set))
358
+ print(f"Found {len(image_paths)} images in {img_dir}.")
368
359
 
369
360
  # Process images.
370
361
  sample_ids = _create_samples_from_paths(
@@ -383,8 +374,37 @@ class DatasetLoader:
383
374
 
384
375
  return dataset
385
376
 
377
+ def _validate_has_samples(self) -> None:
378
+ """Validate that there are samples in the database before starting GUI.
379
+
380
+ Raises:
381
+ ValueError: If no samples are found in any dataset.
382
+ """
383
+ # Check if any datasets exist
384
+ datasets = dataset_resolver.get_all(session=self.session, offset=0, limit=1)
385
+
386
+ if not datasets:
387
+ raise ValueError(
388
+ "No datasets found. Please load a dataset using one of the loader methods "
389
+ "(e.g., from_yolo(), from_directory(), etc.) before starting the GUI."
390
+ )
391
+
392
+ # Check if there are any samples in the first dataset
393
+ first_dataset = datasets[0]
394
+ sample_count = sample_resolver.count_by_dataset_id(
395
+ session=self.session, dataset_id=first_dataset.dataset_id
396
+ )
397
+
398
+ if sample_count == 0:
399
+ raise ValueError(
400
+ "No images have been indexed for the first dataset. "
401
+ "Please ensure your dataset contains valid images and try loading again."
402
+ )
403
+
386
404
  def start_gui(self) -> None:
387
405
  """Launch the web interface for the loaded dataset."""
406
+ self._validate_has_samples()
407
+
388
408
  server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
389
409
 
390
410
  print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
@@ -395,7 +415,7 @@ class DatasetLoader:
395
415
  def _create_samples_from_paths(
396
416
  session: Session,
397
417
  dataset_id: UUID,
398
- image_paths: Iterable[Path],
418
+ image_paths: Iterable[str],
399
419
  ) -> Iterator[UUID]:
400
420
  """Create samples from a list of image paths.
401
421
 
@@ -415,15 +435,14 @@ def _create_samples_from_paths(
415
435
  unit=" images",
416
436
  ):
417
437
  try:
418
- image = PIL.Image.open(image_path)
419
- width, height = image.size
420
- image.close()
438
+ with fsspec.open(image_path, "rb") as file, PIL.Image.open(file) as img:
439
+ width, height = img.size
421
440
  except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
422
441
  continue
423
442
 
424
443
  sample = SampleCreate(
425
- file_name=image_path.name,
426
- file_path_abs=str(image_path),
444
+ file_name=Path(image_path).name,
445
+ file_path_abs=image_path,
427
446
  width=width,
428
447
  height=height,
429
448
  dataset_id=dataset_id,
@@ -477,10 +496,10 @@ def _process_object_detection_annotations(
477
496
  sample_id=context.sample_id,
478
497
  annotation_label_id=context.label_map[obj.category.id],
479
498
  annotation_type="object_detection",
480
- x=x,
481
- y=y,
482
- width=width,
483
- height=height,
499
+ x=int(x),
500
+ y=int(y),
501
+ width=int(width),
502
+ height=int(height),
484
503
  confidence=obj.confidence,
485
504
  annotation_task_id=context.annotation_task_id,
486
505
  )
@@ -512,10 +531,10 @@ def _process_instance_segmentation_annotations(
512
531
  sample_id=context.sample_id,
513
532
  annotation_label_id=context.label_map[obj.category.id],
514
533
  annotation_type="instance_segmentation",
515
- x=x,
516
- y=y,
517
- width=width,
518
- height=height,
534
+ x=int(x),
535
+ y=int(y),
536
+ width=int(width),
537
+ height=int(height),
519
538
  segmentation_mask=segmentation_rle,
520
539
  annotation_task_id=context.annotation_task_id,
521
540
  )
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  from typing import Callable
8
8
  from uuid import UUID
9
9
 
10
+ import fsspec
10
11
  import torch
11
12
  from PIL import Image
12
13
  from torch.utils.data import DataLoader, Dataset
@@ -31,7 +32,7 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
31
32
 
32
33
  def __init__(
33
34
  self,
34
- filepaths: list[Path],
35
+ filepaths: list[str],
35
36
  preprocess: Callable[[Image.Image], torch.Tensor],
36
37
  ) -> None:
37
38
  self.filepaths = filepaths
@@ -41,8 +42,9 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
41
42
  return len(self.filepaths)
42
43
 
43
44
  def __getitem__(self, idx: int) -> torch.Tensor:
44
- image = Image.open(self.filepaths[idx]).convert("RGB")
45
- return self.preprocess(image)
45
+ with fsspec.open(self.filepaths[idx], "rb") as file:
46
+ image = Image.open(file).convert("RGB")
47
+ return self.preprocess(image)
46
48
 
47
49
 
48
50
  class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
@@ -103,7 +105,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
103
105
  embedding_list: list[float] = embedding.cpu().numpy().flatten().tolist()
104
106
  return embedding_list
105
107
 
106
- def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
108
+ def embed_images(self, filepaths: list[str]) -> list[list[float]]:
107
109
  """Embed images with MobileCLIP.
108
110
 
109
111
  Args:
@@ -0,0 +1,145 @@
1
+ """Module provides functions to initialize and manage the DuckDB."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ from contextlib import contextmanager
7
+ from pathlib import Path
8
+ from typing import Generator
9
+
10
+ from fastapi import Depends
11
+ from sqlalchemy.engine import Engine
12
+ from sqlalchemy.pool import Pool
13
+ from sqlmodel import Session, SQLModel, create_engine
14
+ from typing_extensions import Annotated
15
+
16
+ import lightly_studio.api.db_tables # noqa: F401, required for SQLModel to work properly
17
+
18
+
19
+ class DatabaseEngine:
20
+ """Database engine wrapper."""
21
+
22
+ _engine_url: str
23
+ _engine: Engine
24
+ _persistent_session: Session | None = None
25
+
26
+ def __init__(
27
+ self,
28
+ engine_url: str | None = None,
29
+ cleanup_existing: bool = False,
30
+ poolclass: type[Pool] | None = None,
31
+ ) -> None:
32
+ """Create a new instance of the DatabaseEngine.
33
+
34
+ Args:
35
+ engine_url: The database engine URL. If None, defaults to a local DuckDB file.
36
+ cleanup_existing: If True, deletes the existing database file if it exists.
37
+ poolclass: The SQLAlchemy pool class to use. Use StaticPool for
38
+ in-memory databases for testing, otherwise different DB connections
39
+ connect to different in-memory databases.
40
+ """
41
+ self._engine_url = engine_url if engine_url else "duckdb:///lightly_studio.db"
42
+ if cleanup_existing:
43
+ _cleanup_database_file(engine_url=self._engine_url)
44
+ self._engine = create_engine(url=self._engine_url, poolclass=poolclass)
45
+ SQLModel.metadata.create_all(self._engine)
46
+
47
+ @contextmanager
48
+ def session(self) -> Generator[Session, None, None]:
49
+ """Create a short-lived database session. The session is autoclosed."""
50
+ # The code below addresses a foreign key constraint violation issue
51
+ # with DuckDB. See LIG-7527 for more details.
52
+ if self.get_persistent_session().in_transaction():
53
+ logging.debug("The persistent session is in transaction, committing changes.")
54
+ self.get_persistent_session().commit()
55
+
56
+ session = Session(self._engine, close_resets_only=False)
57
+ try:
58
+ yield session
59
+ session.commit()
60
+ except Exception:
61
+ session.rollback()
62
+ raise
63
+ finally:
64
+ session.close()
65
+
66
+ def get_persistent_session(self) -> Session:
67
+ """Get the persistent database session."""
68
+ if self._persistent_session is None:
69
+ self._persistent_session = Session(self._engine, close_resets_only=False)
70
+ return self._persistent_session
71
+
72
+
73
+ # Global database engine instance instantiated lazily.
74
+ _engine: DatabaseEngine | None = None
75
+
76
+
77
+ def get_engine() -> DatabaseEngine:
78
+ """Get the database engine.
79
+
80
+ If the engine does not exist yet, it is newly created with the default settings.
81
+ In that case, a pre-existing database file is deleted.
82
+ """
83
+ global _engine # noqa: PLW0603
84
+ if _engine is None:
85
+ _engine = DatabaseEngine(cleanup_existing=True)
86
+ return _engine
87
+
88
+
89
+ def set_engine(engine: DatabaseEngine) -> None:
90
+ """Set the database engine."""
91
+ global _engine # noqa: PLW0603
92
+ if _engine is not None:
93
+ raise RuntimeError("Database engine is already set and cannot be changed.")
94
+ _engine = engine
95
+
96
+
97
+ def connect(db_file: str | None, cleanup_existing: bool = False) -> None:
98
+ """Set up the database connection.
99
+
100
+ Helper function to set up the database engine.
101
+
102
+ Args:
103
+ db_file: The path to the DuckDB file. If None, uses a default, see DatabaseEngine class.
104
+ cleanup_existing: If True, deletes the pre-existing database file if a file database
105
+ is used.
106
+ """
107
+ engine_url = f"duckdb:///{db_file}" if db_file is not None else None
108
+ engine = DatabaseEngine(engine_url=engine_url, cleanup_existing=cleanup_existing)
109
+ set_engine(engine=engine)
110
+
111
+
112
+ @contextmanager
113
+ def session() -> Generator[Session, None, None]:
114
+ """Create a short-lived database session. The session is autoclosed."""
115
+ with get_engine().session() as session:
116
+ yield session
117
+
118
+
119
+ def persistent_session() -> Session:
120
+ """Create a persistent session."""
121
+ return get_engine().get_persistent_session()
122
+
123
+
124
+ def _cleanup_database_file(engine_url: str) -> None:
125
+ """Delete database file if it exists.
126
+
127
+ Args:
128
+ engine_url: The database engine URL
129
+ """
130
+ db_file = Path(engine_url.replace("duckdb:///", ""))
131
+ if db_file.exists() and db_file.is_file():
132
+ db_file.unlink()
133
+ logging.info(f"Deleted existing database: {db_file}")
134
+
135
+
136
+ def _session_dependency() -> Generator[Session, None, None]:
137
+ """Session dependency for FastAPI routes.
138
+
139
+ We need to convert the context manager to a generator.
140
+ """
141
+ with session() as sess:
142
+ yield sess
143
+
144
+
145
+ SessionDep = Annotated[Session, Depends(_session_dependency)]
@@ -0,0 +1 @@
1
+ .spinner.svelte-18zpdab{fill:hsl(var(--primary) / var(--tw-bg-opacity, 1))}[data-select-viewport],[data-combobox-viewport]{scrollbar-width:none!important;-ms-overflow-style:none!important;-webkit-overflow-scrolling:touch!important}[data-combobox-viewport]::-webkit-scrollbar{display:none!important}[data-select-viewport]::-webkit-scrollbar{display:none!important}.crop.svelte-20k33o{position:relative;overflow:hidden}.annotation-box.svelte-20k33o{position:absolute;border:1px solid rgba(0,0,0,0);box-sizing:content-box}.annotation-label.svelte-20k33o{position:absolute;transform:translate3d(-1px,-100%,0);padding:1px 6px 2px;white-space:nowrap;cursor:pointer}.annotation-selected.svelte-20k33o{outline:drop-shadow(1px 1px 1px hsl(var(--primary))) drop-shadow(1px -1px 1px hsl(var(--primary))) drop-shadow(-1px -1px 1px hsl(var(--primary))) drop-shadow(-1px 1px 1px hsl(var(--primary)))}.sample-image.svelte-1c6bz0a{width:var(--sample-width);height:var(--sample-height);-o-object-fit:var(--object-fit);object-fit:var(--object-fit)}.viewport.svelte-gzepom{overflow-y:hidden}.group.svelte-15p83yz{outline:0}
@@ -0,0 +1 @@
1
+ .spinner.svelte-18zpdab{fill:hsl(var(--primary) / var(--tw-bg-opacity, 1))}.crop.svelte-20k33o{position:relative;overflow:hidden}.annotation-box.svelte-20k33o{position:absolute;border:1px solid rgba(0,0,0,0);box-sizing:content-box}.annotation-label.svelte-20k33o{position:absolute;transform:translate3d(-1px,-100%,0);padding:1px 6px 2px;white-space:nowrap;cursor:pointer}.annotation-selected.svelte-20k33o{outline:drop-shadow(1px 1px 1px hsl(var(--primary))) drop-shadow(1px -1px 1px hsl(var(--primary))) drop-shadow(-1px -1px 1px hsl(var(--primary))) drop-shadow(-1px 1px 1px hsl(var(--primary)))}.sample-image.svelte-1c6bz0a{width:var(--sample-width);height:var(--sample-height);-o-object-fit:var(--object-fit);object-fit:var(--object-fit)}.viewport.svelte-gzepom{overflow-y:hidden}.group.svelte-15p83yz{outline:0}