lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (169) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +7 -5
  3. lightly_studio/api/db_tables.py +0 -3
  4. lightly_studio/api/routes/api/annotation.py +32 -16
  5. lightly_studio/api/routes/api/annotation_label.py +2 -5
  6. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  7. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  8. lightly_studio/api/routes/api/classifier.py +2 -5
  9. lightly_studio/api/routes/api/dataset.py +5 -8
  10. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  11. lightly_studio/api/routes/api/embeddings2d.py +104 -0
  12. lightly_studio/api/routes/api/export.py +73 -0
  13. lightly_studio/api/routes/api/metadata.py +2 -4
  14. lightly_studio/api/routes/api/sample.py +5 -13
  15. lightly_studio/api/routes/api/selection.py +87 -0
  16. lightly_studio/api/routes/api/settings.py +2 -6
  17. lightly_studio/api/routes/images.py +6 -6
  18. lightly_studio/core/add_samples.py +374 -0
  19. lightly_studio/core/dataset.py +272 -400
  20. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  21. lightly_studio/core/dataset_query/dataset_query.py +216 -0
  22. lightly_studio/core/dataset_query/field.py +113 -0
  23. lightly_studio/core/dataset_query/field_expression.py +79 -0
  24. lightly_studio/core/dataset_query/match_expression.py +23 -0
  25. lightly_studio/core/dataset_query/order_by.py +79 -0
  26. lightly_studio/core/dataset_query/sample_field.py +28 -0
  27. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  28. lightly_studio/core/sample.py +159 -32
  29. lightly_studio/core/start_gui.py +35 -0
  30. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  31. lightly_studio/dataset/embedding_generator.py +2 -3
  32. lightly_studio/dataset/embedding_manager.py +74 -6
  33. lightly_studio/dataset/env.py +4 -0
  34. lightly_studio/dataset/file_utils.py +13 -2
  35. lightly_studio/dataset/fsspec_lister.py +275 -0
  36. lightly_studio/dataset/loader.py +49 -84
  37. lightly_studio/dataset/mobileclip_embedding_generator.py +9 -6
  38. lightly_studio/db_manager.py +145 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.CA_CXIBb.css +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.DS78jgNY.css +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/index.BVs_sZj9.css +1 -0
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.D487hwJk.css +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/6t3IJ0vQ.js +1 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → 8NsknIT2.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → BND_-4Kp.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → BdfTHw61.js} +1 -1
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BfHVnyNT.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BjkP1AHA.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BuuNVL9G.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → BzKGpnl4.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CCx7Ho51.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → CH6P3X75.js} +1 -1
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D5-A_Ffd.js → CR2upx_Q.js} +2 -2
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWPZrTTJ.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Cs1XmhiF.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → CwPowJfP.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxFKfZ9T.js +1 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cxevwdid.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → D4whDBUi.js} +1 -1
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6r9vr07.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DA6bFLPR.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DEgUu98i.js +3 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DGTPl6Gk.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DKGxBSlK.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQXoLcsF.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQe_kdRt.js +92 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcY4jgG3.js +1 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → RmD8FzRo.js} +1 -1
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/V-MnMC1X.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → keKYsoph.js} +1 -1
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BVr6DYqP.js +2 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.u7zsVvqp.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.Da2agmdd.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1.B11tVRJV.js} +1 -1
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.l30Zud4h.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CgKPGcAP.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C8HLK8mj.js +857 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.CLvg3QcJ.js} +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BQhDtXUI.js} +1 -1
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.-6XqWX5G.js +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.uBV1Lhat.js +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.BXsgoQZh.js +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BkbcnUs8.js +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.Bkrv-Vww.js} +1 -1
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  88. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  89. lightly_studio/examples/example.py +13 -12
  90. lightly_studio/examples/example_coco.py +13 -0
  91. lightly_studio/examples/example_metadata.py +83 -98
  92. lightly_studio/examples/example_selection.py +7 -19
  93. lightly_studio/examples/example_split_work.py +12 -36
  94. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  95. lightly_studio/export/export_dataset.py +65 -0
  96. lightly_studio/export/lightly_studio_label_input.py +120 -0
  97. lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
  98. lightly_studio/metadata/compute_typicality.py +67 -0
  99. lightly_studio/models/annotation/annotation_base.py +18 -20
  100. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  101. lightly_studio/models/annotation/object_detection.py +4 -4
  102. lightly_studio/models/dataset.py +6 -2
  103. lightly_studio/models/sample.py +10 -3
  104. lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
  105. lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
  106. lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
  107. lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
  108. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
  109. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
  110. lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
  111. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
  112. lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
  113. lightly_studio/resolvers/dataset_resolver.py +10 -0
  114. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  115. lightly_studio/resolvers/sample_resolver.py +53 -9
  116. lightly_studio/resolvers/tag_resolver.py +23 -0
  117. lightly_studio/selection/mundig.py +7 -10
  118. lightly_studio/selection/select.py +55 -46
  119. lightly_studio/selection/select_via_db.py +23 -19
  120. lightly_studio/selection/selection_config.py +10 -4
  121. lightly_studio/services/annotations_service/__init__.py +12 -0
  122. lightly_studio/services/annotations_service/create_annotation.py +63 -0
  123. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  124. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  125. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  126. lightly_studio-0.3.3.dist-info/METADATA +814 -0
  127. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/RECORD +130 -113
  128. lightly_studio/api/db.py +0 -133
  129. lightly_studio/api/routes/api/annotation_task.py +0 -38
  130. lightly_studio/api/routes/api/metrics.py +0 -80
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +0 -1
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +0 -1
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  155. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  156. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  157. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  158. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  159. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  160. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  161. lightly_studio/metrics/detection/__init__.py +0 -0
  162. lightly_studio/metrics/detection/map.py +0 -268
  163. lightly_studio/models/annotation_task.py +0 -28
  164. lightly_studio/resolvers/annotation_resolver/create.py +0 -19
  165. lightly_studio/resolvers/annotation_task_resolver.py +0 -31
  166. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  167. /lightly_studio/{metrics → core/dataset_query}/__init__.py +0 -0
  168. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  169. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/WHEEL +0 -0
@@ -0,0 +1,275 @@
1
+ """File listing utilities using fsspec.
2
+
3
+ Handles local and remote paths, directories, and glob patterns.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ import logging
9
+ from collections.abc import Iterator
10
+ from typing import Any
11
+
12
+ import fsspec
13
+ from tqdm import tqdm
14
+
15
+ # Constants
16
+ PROTOCOL_SEPARATOR = "://"
17
+ DEFAULT_PROTOCOL = "file"
18
+ PATH_SEPARATOR = "/"
19
+
20
+ # Glob pattern characters
21
+ GLOB_CHARS = ["*", "?", "[", "]"]
22
+
23
+ # Cloud storage protocols
24
+ CLOUD_PROTOCOLS = ("s3", "gs", "gcs", "azure", "abfs")
25
+
26
+ # Image file extensions
27
+ IMAGE_EXTENSIONS = {
28
+ ".png",
29
+ ".jpg",
30
+ ".jpeg",
31
+ ".gif",
32
+ ".webp",
33
+ ".bmp",
34
+ ".tiff",
35
+ }
36
+
37
+
38
+ def iter_files_from_path(path: str, allowed_extensions: set[str] | None = None) -> Iterator[str]:
39
+ """List all files from a single path, handling directories, globs, and individual files.
40
+
41
+ Args:
42
+ path: A single path which can be:
43
+ - Individual file path
44
+ - Directory path (will list all files recursively)
45
+ - Glob pattern
46
+ - Remote path (s3://, gcs://, etc.)
47
+ allowed_extensions: Optional set of allowed file extensions (e.g., {".jpg", ".png"}).
48
+ If None, uses default IMAGE_EXTENSIONS.
49
+
50
+ Yields:
51
+ File paths as they are discovered, with progress tracking
52
+ """
53
+ seen: set[str] = set()
54
+ extensions = allowed_extensions or IMAGE_EXTENSIONS
55
+ with tqdm(desc="Discovering files", unit=" files", dynamic_ncols=True) as pbar:
56
+ cleaned_path = str(path).strip()
57
+ if not cleaned_path:
58
+ return
59
+ fs = _get_filesystem(cleaned_path)
60
+ yield from _process_single_path_streaming(fs, cleaned_path, seen, pbar, extensions)
61
+
62
+
63
+ def _process_single_path_streaming(
64
+ fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
65
+ ) -> Iterator[str]:
66
+ """Process a single path and yield matching image files.
67
+
68
+ Handles different path types: individual files, directories, and glob patterns.
69
+
70
+ Args:
71
+ fs: The filesystem instance.
72
+ path: The path to process (file, directory, or glob pattern).
73
+ seen: Set of already processed paths to avoid duplicates.
74
+ pbar: Progress bar instance for tracking progress.
75
+ extensions: Set of allowed file extensions.
76
+
77
+ Yields:
78
+ File paths that match the criteria.
79
+
80
+ Raises:
81
+ ValueError: If the path doesn't exist or is not an image file when expected.
82
+ """
83
+ if _is_glob_pattern(path):
84
+ yield from _process_glob_pattern(fs, path, seen, pbar, extensions)
85
+ elif not fs.exists(path):
86
+ raise ValueError(f"Path does not exist: {path}")
87
+ elif fs.isfile(path):
88
+ if _is_image_file(path, extensions) and path not in seen:
89
+ seen.add(path)
90
+ pbar.update(1)
91
+ yield path
92
+ elif not _is_image_file(path, extensions):
93
+ raise ValueError(f"File is not an image: {path}")
94
+ elif fs.isdir(path):
95
+ for file_path in _stream_files_from_directory(fs, path, extensions):
96
+ if file_path not in seen:
97
+ seen.add(file_path)
98
+ pbar.update(1)
99
+ yield file_path
100
+
101
+
102
+ def _process_glob_pattern(
103
+ fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
104
+ ) -> Iterator[str]:
105
+ """Process glob pattern and yield matching image files.
106
+
107
+ Args:
108
+ fs: The filesystem instance.
109
+ path: The glob pattern path.
110
+ seen: Set of already processed paths to avoid duplicates.
111
+ pbar: Progress bar instance for tracking progress.
112
+ extensions: Set of allowed file extensions.
113
+
114
+ Yields:
115
+ File paths that match the glob pattern and allowed extensions.
116
+ """
117
+ matching_paths = fs.glob(path)
118
+ for p in matching_paths:
119
+ path_str = str(p)
120
+ if _needs_protocol_prefix(path_str, fs):
121
+ protocol = _get_protocol_string(fs)
122
+ path_str = f"{protocol}{PROTOCOL_SEPARATOR}{path_str}"
123
+ if fs.isfile(path_str) and _is_image_file(path_str, extensions) and path_str not in seen:
124
+ seen.add(path_str)
125
+ pbar.update(1)
126
+ yield path_str
127
+
128
+
129
+ def _stream_files_from_directory(
130
+ fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
131
+ ) -> Iterator[str]:
132
+ """Stream files from a directory with progress tracking.
133
+
134
+ Args:
135
+ fs: The filesystem instance
136
+ path: Directory path to list
137
+ extensions: Set of allowed file extensions
138
+
139
+ Yields:
140
+ File paths as they are discovered
141
+ """
142
+ try:
143
+ protocol = _get_protocol_string(fs)
144
+ if protocol in CLOUD_PROTOCOLS:
145
+ yield from _stream_files_using_walk(fs, path, extensions)
146
+ else:
147
+ try:
148
+ all_paths = fs.find(path, detail=False)
149
+ for p in all_paths:
150
+ if fs.isfile(p) and _is_image_file(p, extensions):
151
+ yield p
152
+ except Exception as e:
153
+ logging.warning(f"fs.find() failed for {path}, trying alternative method: {e}")
154
+ yield from _stream_files_using_walk(fs, path, extensions)
155
+ except Exception as e:
156
+ logging.error(f"Error streaming files from '{path}': {e}")
157
+
158
+
159
+ def _stream_files_using_walk(
160
+ fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
161
+ ) -> Iterator[str]:
162
+ """Stream files using fs.walk() method.
163
+
164
+ Args:
165
+ fs: The filesystem instance.
166
+ path: The directory path to walk.
167
+ extensions: Set of allowed file extensions.
168
+
169
+ Yields:
170
+ File paths that match the allowed extensions.
171
+ """
172
+
173
+ def add_protocol_if_needed(p: str) -> str:
174
+ if _needs_protocol_prefix(p, fs):
175
+ protocol = _get_protocol_string(fs)
176
+ return f"{protocol}{PROTOCOL_SEPARATOR}{p}"
177
+ return p
178
+
179
+ for root, _dirs, files in fs.walk(path):
180
+ for file in files:
181
+ if not root.endswith(PATH_SEPARATOR):
182
+ full_path = f"{root}{PATH_SEPARATOR}{file}"
183
+ else:
184
+ full_path = f"{root}{file}"
185
+ full_path = add_protocol_if_needed(full_path)
186
+ if _is_image_file(full_path, extensions):
187
+ yield full_path
188
+
189
+
190
+ def _get_filesystem(path: str) -> fsspec.AbstractFileSystem:
191
+ """Get the appropriate filesystem for the given path.
192
+
193
+ Args:
194
+ path: The path to determine the filesystem for. Can be local or remote.
195
+
196
+ Returns:
197
+ An fsspec filesystem instance appropriate for the path's protocol.
198
+
199
+ Raises:
200
+ ValueError: If the protocol cannot be determined or is invalid.
201
+ """
202
+ protocol = path.split(PROTOCOL_SEPARATOR)[0] if PROTOCOL_SEPARATOR in path else DEFAULT_PROTOCOL
203
+
204
+ # Ensure protocol is a string, not a tuple
205
+ if isinstance(protocol, (list, tuple)):
206
+ protocol = protocol[0]
207
+
208
+ return fsspec.filesystem(protocol)
209
+
210
+
211
+ def _is_glob_pattern(path: str) -> bool:
212
+ """Check if a path contains glob pattern characters.
213
+
214
+ Args:
215
+ path: The path to check for glob patterns.
216
+
217
+ Returns:
218
+ True if the path contains glob pattern characters (*, ?, [, ]), False otherwise.
219
+ """
220
+ return any(char in path for char in GLOB_CHARS)
221
+
222
+
223
+ def _is_image_file(path: str, extensions: set[str]) -> bool:
224
+ """Check if a file is an image based on its extension.
225
+
226
+ Args:
227
+ path: The file path to check.
228
+ extensions: Set of allowed file extensions (e.g., {'.jpg', '.png'}).
229
+
230
+ Returns:
231
+ True if the file has an allowed image extension, False otherwise.
232
+ """
233
+ path_lower = path.lower()
234
+ return any(path_lower.endswith(ext) for ext in extensions)
235
+
236
+
237
+ def _needs_protocol_prefix(path: str, fs: fsspec.AbstractFileSystem) -> bool:
238
+ """Check if a path needs protocol prefix.
239
+
240
+ Args:
241
+ path: The path to check.
242
+ fs: The filesystem instance.
243
+
244
+ Returns:
245
+ True if the path needs a protocol prefix (e.g., for cloud storage),
246
+ False if it is a local path.
247
+ """
248
+ if PROTOCOL_SEPARATOR in path:
249
+ return False
250
+
251
+ if not hasattr(fs, "protocol"):
252
+ return False
253
+
254
+ protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
255
+ # Handle case where protocol is a tuple (common with fsspec)
256
+ if isinstance(protocol, (list, tuple)):
257
+ protocol = protocol[0]
258
+
259
+ return str(protocol) != DEFAULT_PROTOCOL
260
+
261
+
262
+ def _get_protocol_string(fs: fsspec.AbstractFileSystem) -> str:
263
+ """Get the protocol string from filesystem.
264
+
265
+ Args:
266
+ fs: The filesystem instance.
267
+
268
+ Returns:
269
+ The protocol string (e.g., 's3', 'file', 'gcs').
270
+ Returns 'file' as default if protocol cannot be determined.
271
+ """
272
+ protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
273
+ if isinstance(protocol, (list, tuple)):
274
+ return str(protocol[0])
275
+ return str(protocol)
@@ -8,6 +8,7 @@ from pathlib import Path
8
8
  from typing import Iterable
9
9
  from uuid import UUID
10
10
 
11
+ import fsspec
11
12
  import PIL
12
13
  from labelformat.formats import (
13
14
  COCOInstanceSegmentationInput,
@@ -29,10 +30,10 @@ from labelformat.model.object_detection import (
29
30
  from sqlmodel import Session
30
31
  from tqdm import tqdm
31
32
 
32
- from lightly_studio.api.db import db_manager
33
+ from lightly_studio import db_manager
33
34
  from lightly_studio.api.features import lightly_studio_active_features
34
35
  from lightly_studio.api.server import Server
35
- from lightly_studio.dataset import env
36
+ from lightly_studio.dataset import env, fsspec_lister
36
37
  from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
37
38
  from lightly_studio.dataset.embedding_manager import (
38
39
  EmbeddingManager,
@@ -40,16 +41,11 @@ from lightly_studio.dataset.embedding_manager import (
40
41
  )
41
42
  from lightly_studio.models.annotation.annotation_base import AnnotationCreate
42
43
  from lightly_studio.models.annotation_label import AnnotationLabelCreate
43
- from lightly_studio.models.annotation_task import (
44
- AnnotationTaskTable,
45
- AnnotationType,
46
- )
47
44
  from lightly_studio.models.dataset import DatasetCreate, DatasetTable
48
45
  from lightly_studio.models.sample import SampleCreate, SampleTable
49
46
  from lightly_studio.resolvers import (
50
47
  annotation_label_resolver,
51
48
  annotation_resolver,
52
- annotation_task_resolver,
53
49
  dataset_resolver,
54
50
  sample_resolver,
55
51
  )
@@ -66,7 +62,6 @@ class AnnotationProcessingContext:
66
62
  dataset_id: UUID
67
63
  sample_id: UUID
68
64
  label_map: dict[int, UUID]
69
- annotation_task_id: UUID
70
65
 
71
66
 
72
67
  class DatasetLoader:
@@ -82,7 +77,6 @@ class DatasetLoader:
82
77
  dataset: DatasetTable,
83
78
  input_labels: ObjectDetectionInput | InstanceSegmentationInput,
84
79
  img_dir: Path,
85
- annotation_task_id: UUID,
86
80
  ) -> None:
87
81
  """Store a loaded dataset in database."""
88
82
  # Create label mapping
@@ -119,7 +113,6 @@ class DatasetLoader:
119
113
  samples_data=samples_image_data,
120
114
  dataset_id=dataset.dataset_id,
121
115
  label_map=label_map,
122
- annotation_task_id=annotation_task_id,
123
116
  annotations_to_create=annotations_to_create,
124
117
  sample_ids=sample_ids,
125
118
  )
@@ -136,7 +129,6 @@ class DatasetLoader:
136
129
  samples_data=samples_image_data,
137
130
  dataset_id=dataset.dataset_id,
138
131
  label_map=label_map,
139
- annotation_task_id=annotation_task_id,
140
132
  annotations_to_create=annotations_to_create,
141
133
  sample_ids=sample_ids,
142
134
  )
@@ -187,23 +179,18 @@ class DatasetLoader:
187
179
  input_labels=label_input,
188
180
  dataset_name=dataset_name,
189
181
  img_dir=str(img_dir),
190
- is_prediction=False,
191
- task_name=task_name,
192
182
  )
193
183
 
194
184
  def from_coco_object_detections(
195
185
  self,
196
186
  annotations_json_path: str,
197
187
  img_dir: str,
198
- task_name: str | None = None,
199
188
  ) -> DatasetTable:
200
189
  """Load a dataset in COCO Object Detection format and store in DB.
201
190
 
202
191
  Args:
203
192
  annotations_json_path: Path to the COCO annotations JSON file.
204
193
  img_dir: Path to the folder containing the images.
205
- task_name: Optional name for the annotation task. If None, a
206
- default name is generated.
207
194
 
208
195
  Returns:
209
196
  DatasetTable: The created dataset table entry.
@@ -211,9 +198,6 @@ class DatasetLoader:
211
198
  annotations_json = Path(annotations_json_path)
212
199
  dataset_name = annotations_json.parent.name
213
200
 
214
- if task_name is None:
215
- task_name = f"Loaded from COCO Object Detection: {annotations_json.name}"
216
-
217
201
  label_input = COCOObjectDetectionInput(
218
202
  input_file=annotations_json,
219
203
  )
@@ -223,23 +207,18 @@ class DatasetLoader:
223
207
  input_labels=label_input,
224
208
  dataset_name=dataset_name,
225
209
  img_dir=str(img_dir_path),
226
- is_prediction=False,
227
- task_name=task_name,
228
210
  )
229
211
 
230
212
  def from_coco_instance_segmentations(
231
213
  self,
232
214
  annotations_json_path: str,
233
215
  img_dir: str,
234
- task_name: str | None = None,
235
216
  ) -> DatasetTable:
236
217
  """Load a dataset in COCO Instance Segmentation format and store in DB.
237
218
 
238
219
  Args:
239
220
  annotations_json_path: Path to the COCO annotations JSON file.
240
221
  img_dir: Path to the folder containing the images.
241
- task_name: Optional name for the annotation task. If None, a
242
- default name is generated.
243
222
 
244
223
  Returns:
245
224
  DatasetTable: The created dataset table entry.
@@ -247,9 +226,6 @@ class DatasetLoader:
247
226
  annotations_json = Path(annotations_json_path)
248
227
  dataset_name = annotations_json.parent.name
249
228
 
250
- if task_name is None:
251
- task_name = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
252
-
253
229
  label_input = COCOInstanceSegmentationInput(
254
230
  input_file=annotations_json,
255
231
  )
@@ -259,8 +235,6 @@ class DatasetLoader:
259
235
  input_labels=label_input,
260
236
  dataset_name=dataset_name,
261
237
  img_dir=str(img_dir_path),
262
- is_prediction=False,
263
- task_name=task_name,
264
238
  )
265
239
 
266
240
  def from_labelformat(
@@ -268,8 +242,6 @@ class DatasetLoader:
268
242
  input_labels: ObjectDetectionInput | InstanceSegmentationInput,
269
243
  dataset_name: str,
270
244
  img_dir: str,
271
- is_prediction: bool = True,
272
- task_name: str | None = None,
273
245
  ) -> DatasetTable:
274
246
  """Load a dataset from a labelformat object and store in database.
275
247
 
@@ -277,24 +249,12 @@ class DatasetLoader:
277
249
  input_labels: The labelformat input object.
278
250
  dataset_name: The name for the new dataset.
279
251
  img_dir: Path to the folder containing the images.
280
- is_prediction: Whether the task is for prediction or labels.
281
- task_name: Optional name for the annotation task. If None, a
282
- default name is generated.
283
252
 
284
253
  Returns:
285
254
  DatasetTable: The created dataset table entry.
286
255
  """
287
256
  img_dir_path = Path(img_dir).absolute()
288
257
 
289
- # Determine annotation type based on input.
290
- # Currently, we always create BBOX tasks, even for segmentation,
291
- # as segmentation data is stored alongside bounding boxes.
292
- annotation_type = AnnotationType.BBOX
293
-
294
- # Generate a default task name if none is provided.
295
- if task_name is None:
296
- task_name = f"Loaded from labelformat: {dataset_name}"
297
-
298
258
  # Create dataset and annotation task.
299
259
  dataset = dataset_resolver.create(
300
260
  session=self.session,
@@ -303,20 +263,11 @@ class DatasetLoader:
303
263
  directory=str(img_dir_path),
304
264
  ),
305
265
  )
306
- new_annotation_task = annotation_task_resolver.create(
307
- session=self.session,
308
- annotation_task=AnnotationTaskTable(
309
- name=task_name,
310
- annotation_type=annotation_type,
311
- is_prediction=is_prediction,
312
- ),
313
- )
314
266
 
315
267
  self._load_into_dataset(
316
268
  dataset=dataset,
317
269
  input_labels=input_labels,
318
270
  img_dir=img_dir_path,
319
- annotation_task_id=new_annotation_task.annotation_task_id,
320
271
  )
321
272
  return dataset
322
273
 
@@ -324,7 +275,6 @@ class DatasetLoader:
324
275
  self,
325
276
  dataset_name: str,
326
277
  img_dir: str,
327
- recursive: bool = True,
328
278
  allowed_extensions: Iterable[str] = {
329
279
  ".png",
330
280
  ".jpg",
@@ -340,31 +290,22 @@ class DatasetLoader:
340
290
  Args:
341
291
  dataset_name: The name for the new dataset.
342
292
  img_dir: Path to the folder containing the images.
343
- recursive: If True, search for images recursively in subfolders.
344
293
  allowed_extensions: An iterable container of allowed image file
345
294
  extensions.
346
295
  """
347
- img_dir_path = Path(img_dir).absolute()
348
- if not img_dir_path.exists() or not img_dir_path.is_dir():
349
- raise ValueError(f"Input images folder is not a valid directory: {img_dir_path}")
350
-
351
296
  # Create dataset.
352
297
  dataset = dataset_resolver.create(
353
298
  session=self.session,
354
299
  dataset=DatasetCreate(
355
300
  name=dataset_name,
356
- directory=str(img_dir_path),
301
+ directory=img_dir,
357
302
  ),
358
303
  )
359
304
 
360
- # Collect image file paths.
305
+ # Collect image file paths with extension filtering.
361
306
  allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
362
- image_paths = []
363
- path_iter = img_dir_path.rglob("*") if recursive else img_dir_path.glob("*")
364
- for path in path_iter:
365
- if path.is_file() and path.suffix.lower() in allowed_extensions_set:
366
- image_paths.append(path)
367
- print(f"Found {len(image_paths)} images in {img_dir_path}.")
307
+ image_paths = list(fsspec_lister.iter_files_from_path(img_dir, allowed_extensions_set))
308
+ print(f"Found {len(image_paths)} images in {img_dir}.")
368
309
 
369
310
  # Process images.
370
311
  sample_ids = _create_samples_from_paths(
@@ -383,8 +324,37 @@ class DatasetLoader:
383
324
 
384
325
  return dataset
385
326
 
327
+ def _validate_has_samples(self) -> None:
328
+ """Validate that there are samples in the database before starting GUI.
329
+
330
+ Raises:
331
+ ValueError: If no samples are found in any dataset.
332
+ """
333
+ # Check if any datasets exist
334
+ datasets = dataset_resolver.get_all(session=self.session, offset=0, limit=1)
335
+
336
+ if not datasets:
337
+ raise ValueError(
338
+ "No datasets found. Please load a dataset using one of the loader methods "
339
+ "(e.g., from_yolo(), from_directory(), etc.) before starting the GUI."
340
+ )
341
+
342
+ # Check if there are any samples in the first dataset
343
+ first_dataset = datasets[0]
344
+ sample_count = sample_resolver.count_by_dataset_id(
345
+ session=self.session, dataset_id=first_dataset.dataset_id
346
+ )
347
+
348
+ if sample_count == 0:
349
+ raise ValueError(
350
+ "No images have been indexed for the first dataset. "
351
+ "Please ensure your dataset contains valid images and try loading again."
352
+ )
353
+
386
354
  def start_gui(self) -> None:
387
355
  """Launch the web interface for the loaded dataset."""
356
+ self._validate_has_samples()
357
+
388
358
  server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
389
359
 
390
360
  print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
@@ -395,7 +365,7 @@ class DatasetLoader:
395
365
  def _create_samples_from_paths(
396
366
  session: Session,
397
367
  dataset_id: UUID,
398
- image_paths: Iterable[Path],
368
+ image_paths: Iterable[str],
399
369
  ) -> Iterator[UUID]:
400
370
  """Create samples from a list of image paths.
401
371
 
@@ -415,15 +385,14 @@ def _create_samples_from_paths(
415
385
  unit=" images",
416
386
  ):
417
387
  try:
418
- image = PIL.Image.open(image_path)
419
- width, height = image.size
420
- image.close()
388
+ with fsspec.open(image_path, "rb") as file, PIL.Image.open(file) as img:
389
+ width, height = img.size
421
390
  except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
422
391
  continue
423
392
 
424
393
  sample = SampleCreate(
425
- file_name=image_path.name,
426
- file_path_abs=str(image_path),
394
+ file_name=Path(image_path).name,
395
+ file_path_abs=image_path,
427
396
  width=width,
428
397
  height=height,
429
398
  dataset_id=dataset_id,
@@ -477,12 +446,11 @@ def _process_object_detection_annotations(
477
446
  sample_id=context.sample_id,
478
447
  annotation_label_id=context.label_map[obj.category.id],
479
448
  annotation_type="object_detection",
480
- x=x,
481
- y=y,
482
- width=width,
483
- height=height,
449
+ x=int(x),
450
+ y=int(y),
451
+ width=int(width),
452
+ height=int(height),
484
453
  confidence=obj.confidence,
485
- annotation_task_id=context.annotation_task_id,
486
454
  )
487
455
  )
488
456
  return new_annotations
@@ -512,12 +480,11 @@ def _process_instance_segmentation_annotations(
512
480
  sample_id=context.sample_id,
513
481
  annotation_label_id=context.label_map[obj.category.id],
514
482
  annotation_type="instance_segmentation",
515
- x=x,
516
- y=y,
517
- width=width,
518
- height=height,
483
+ x=int(x),
484
+ y=int(y),
485
+ width=int(width),
486
+ height=int(height),
519
487
  segmentation_mask=segmentation_rle,
520
- annotation_task_id=context.annotation_task_id,
521
488
  )
522
489
  )
523
490
  return new_annotations
@@ -529,7 +496,6 @@ def _process_batch_annotations( # noqa: PLR0913
529
496
  samples_data: list[tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]],
530
497
  dataset_id: UUID,
531
498
  label_map: dict[int, UUID],
532
- annotation_task_id: UUID,
533
499
  annotations_to_create: list[AnnotationCreate],
534
500
  sample_ids: list[UUID],
535
501
  ) -> None:
@@ -541,7 +507,6 @@ def _process_batch_annotations( # noqa: PLR0913
541
507
  dataset_id=dataset_id,
542
508
  sample_id=stored_sample.sample_id,
543
509
  label_map=label_map,
544
- annotation_task_id=annotation_task_id,
545
510
  )
546
511
 
547
512
  if isinstance(img_data, ImageInstanceSegmentation):
@@ -7,6 +7,7 @@ from pathlib import Path
7
7
  from typing import Callable
8
8
  from uuid import UUID
9
9
 
10
+ import fsspec
10
11
  import torch
11
12
  from PIL import Image
12
13
  from torch.utils.data import DataLoader, Dataset
@@ -23,6 +24,7 @@ MOBILECLIP_DOWNLOAD_URL = (
23
24
  f"https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/{MODEL_NAME}.pt"
24
25
  )
25
26
  MAX_BATCH_SIZE: int = 16
27
+ EMBEDDING_DIMENSION: int = 512
26
28
 
27
29
 
28
30
  # Dataset for efficient batched image loading and preprocessing
@@ -31,7 +33,7 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
31
33
 
32
34
  def __init__(
33
35
  self,
34
- filepaths: list[Path],
36
+ filepaths: list[str],
35
37
  preprocess: Callable[[Image.Image], torch.Tensor],
36
38
  ) -> None:
37
39
  self.filepaths = filepaths
@@ -41,8 +43,9 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
41
43
  return len(self.filepaths)
42
44
 
43
45
  def __getitem__(self, idx: int) -> torch.Tensor:
44
- image = Image.open(self.filepaths[idx]).convert("RGB")
45
- return self.preprocess(image)
46
+ with fsspec.open(self.filepaths[idx], "rb") as file:
47
+ image = Image.open(file).convert("RGB")
48
+ return self.preprocess(image)
46
49
 
47
50
 
48
51
  class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
@@ -83,7 +86,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
83
86
  return EmbeddingModelCreate(
84
87
  name=MODEL_NAME,
85
88
  embedding_model_hash=self._model_hash,
86
- embedding_dimension=512,
89
+ embedding_dimension=EMBEDDING_DIMENSION,
87
90
  dataset_id=dataset_id,
88
91
  )
89
92
 
@@ -103,7 +106,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
103
106
  embedding_list: list[float] = embedding.cpu().numpy().flatten().tolist()
104
107
  return embedding_list
105
108
 
106
- def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
109
+ def embed_images(self, filepaths: list[str]) -> list[list[float]]:
107
110
  """Embed images with MobileCLIP.
108
111
 
109
112
  Args:
@@ -136,7 +139,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
136
139
 
137
140
 
138
141
  def _get_cached_mobileclip_checkpoint() -> Path:
139
- file_path = Path(tempfile.gettempdir()) / "mobileclip_s0.pt"
142
+ file_path = Path(tempfile.gettempdir()) / f"{MODEL_NAME}.pt"
140
143
  file_utils.download_file_if_does_not_exist(
141
144
  url=MOBILECLIP_DOWNLOAD_URL,
142
145
  local_filename=file_path,