lightly-studio 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lightly-studio might be problematic. Click here for more details.
- lightly_studio/__init__.py +4 -4
- lightly_studio/api/app.py +1 -1
- lightly_studio/api/routes/api/annotation.py +6 -16
- lightly_studio/api/routes/api/annotation_label.py +2 -5
- lightly_studio/api/routes/api/annotation_task.py +4 -5
- lightly_studio/api/routes/api/classifier.py +2 -5
- lightly_studio/api/routes/api/dataset.py +2 -3
- lightly_studio/api/routes/api/dataset_tag.py +2 -3
- lightly_studio/api/routes/api/metadata.py +2 -4
- lightly_studio/api/routes/api/metrics.py +2 -6
- lightly_studio/api/routes/api/sample.py +5 -13
- lightly_studio/api/routes/api/settings.py +2 -6
- lightly_studio/api/routes/images.py +6 -6
- lightly_studio/core/add_samples.py +383 -0
- lightly_studio/core/dataset.py +250 -362
- lightly_studio/core/dataset_query/__init__.py +0 -0
- lightly_studio/core/dataset_query/boolean_expression.py +67 -0
- lightly_studio/core/dataset_query/dataset_query.py +211 -0
- lightly_studio/core/dataset_query/field.py +113 -0
- lightly_studio/core/dataset_query/field_expression.py +79 -0
- lightly_studio/core/dataset_query/match_expression.py +23 -0
- lightly_studio/core/dataset_query/order_by.py +79 -0
- lightly_studio/core/dataset_query/sample_field.py +28 -0
- lightly_studio/core/dataset_query/tags_expression.py +46 -0
- lightly_studio/core/sample.py +159 -32
- lightly_studio/core/start_gui.py +35 -0
- lightly_studio/dataset/edge_embedding_generator.py +13 -8
- lightly_studio/dataset/embedding_generator.py +2 -3
- lightly_studio/dataset/embedding_manager.py +74 -6
- lightly_studio/dataset/fsspec_lister.py +275 -0
- lightly_studio/dataset/loader.py +49 -30
- lightly_studio/dataset/mobileclip_embedding_generator.py +6 -4
- lightly_studio/db_manager.py +145 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +3 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → 7YNGEs1C.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +92 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BqBqV92V.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → C98Hk3r5.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{r64xT6ao.js → CG0dMCJi.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Ccq4ZD0B.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → Crk-jcvV.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → Cur71c3O.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → DRZO-E-T.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → DcGCxgpH.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → Df3aMO5B.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{hQVEETDE.js → DkR_EZ_B.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +4 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → OH7-C_mc.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → gLNdjSzu.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1._I9GR805.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +6 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.w9g4AcAx.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BBI8KwnD.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CrbkRPam.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.CajIG5ce.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
- lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
- lightly_studio/examples/example.py +13 -12
- lightly_studio/examples/example_coco.py +13 -0
- lightly_studio/examples/example_metadata.py +83 -98
- lightly_studio/examples/example_selection.py +7 -19
- lightly_studio/examples/example_split_work.py +12 -36
- lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
- lightly_studio/models/annotation/annotation_base.py +7 -8
- lightly_studio/models/annotation/instance_segmentation.py +8 -8
- lightly_studio/models/annotation/object_detection.py +4 -4
- lightly_studio/models/dataset.py +6 -2
- lightly_studio/models/sample.py +10 -3
- lightly_studio/resolvers/dataset_resolver.py +10 -0
- lightly_studio/resolvers/embedding_model_resolver.py +22 -0
- lightly_studio/resolvers/sample_resolver.py +53 -9
- lightly_studio/resolvers/tag_resolver.py +23 -0
- lightly_studio/selection/select.py +55 -46
- lightly_studio/selection/select_via_db.py +23 -19
- lightly_studio/selection/selection_config.py +6 -3
- lightly_studio/services/annotations_service/__init__.py +4 -0
- lightly_studio/services/annotations_service/update_annotation.py +21 -32
- lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
- lightly_studio-0.3.2.dist-info/METADATA +689 -0
- {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/RECORD +104 -91
- lightly_studio/api/db.py +0 -133
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +0 -4
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
- lightly_studio-0.3.1.dist-info/METADATA +0 -520
- /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
- {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""File listing utilities using fsspec.
|
|
2
|
+
|
|
3
|
+
Handles local and remote paths, directories, and glob patterns.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import fsspec
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
|
|
15
|
+
# Constants
|
|
16
|
+
PROTOCOL_SEPARATOR = "://"
|
|
17
|
+
DEFAULT_PROTOCOL = "file"
|
|
18
|
+
PATH_SEPARATOR = "/"
|
|
19
|
+
|
|
20
|
+
# Glob pattern characters
|
|
21
|
+
GLOB_CHARS = ["*", "?", "[", "]"]
|
|
22
|
+
|
|
23
|
+
# Cloud storage protocols
|
|
24
|
+
CLOUD_PROTOCOLS = ("s3", "gs", "gcs", "azure", "abfs")
|
|
25
|
+
|
|
26
|
+
# Image file extensions
|
|
27
|
+
IMAGE_EXTENSIONS = {
|
|
28
|
+
".png",
|
|
29
|
+
".jpg",
|
|
30
|
+
".jpeg",
|
|
31
|
+
".gif",
|
|
32
|
+
".webp",
|
|
33
|
+
".bmp",
|
|
34
|
+
".tiff",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def iter_files_from_path(path: str, allowed_extensions: set[str] | None = None) -> Iterator[str]:
|
|
39
|
+
"""List all files from a single path, handling directories, globs, and individual files.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
path: A single path which can be:
|
|
43
|
+
- Individual file path
|
|
44
|
+
- Directory path (will list all files recursively)
|
|
45
|
+
- Glob pattern
|
|
46
|
+
- Remote path (s3://, gcs://, etc.)
|
|
47
|
+
allowed_extensions: Optional set of allowed file extensions (e.g., {".jpg", ".png"}).
|
|
48
|
+
If None, uses default IMAGE_EXTENSIONS.
|
|
49
|
+
|
|
50
|
+
Yields:
|
|
51
|
+
File paths as they are discovered, with progress tracking
|
|
52
|
+
"""
|
|
53
|
+
seen: set[str] = set()
|
|
54
|
+
extensions = allowed_extensions or IMAGE_EXTENSIONS
|
|
55
|
+
with tqdm(desc="Discovering files", unit=" files", dynamic_ncols=True) as pbar:
|
|
56
|
+
cleaned_path = str(path).strip()
|
|
57
|
+
if not cleaned_path:
|
|
58
|
+
return
|
|
59
|
+
fs = _get_filesystem(cleaned_path)
|
|
60
|
+
yield from _process_single_path_streaming(fs, cleaned_path, seen, pbar, extensions)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _process_single_path_streaming(
|
|
64
|
+
fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
|
|
65
|
+
) -> Iterator[str]:
|
|
66
|
+
"""Process a single path and yield matching image files.
|
|
67
|
+
|
|
68
|
+
Handles different path types: individual files, directories, and glob patterns.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
fs: The filesystem instance.
|
|
72
|
+
path: The path to process (file, directory, or glob pattern).
|
|
73
|
+
seen: Set of already processed paths to avoid duplicates.
|
|
74
|
+
pbar: Progress bar instance for tracking progress.
|
|
75
|
+
extensions: Set of allowed file extensions.
|
|
76
|
+
|
|
77
|
+
Yields:
|
|
78
|
+
File paths that match the criteria.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If the path doesn't exist or is not an image file when expected.
|
|
82
|
+
"""
|
|
83
|
+
if _is_glob_pattern(path):
|
|
84
|
+
yield from _process_glob_pattern(fs, path, seen, pbar, extensions)
|
|
85
|
+
elif not fs.exists(path):
|
|
86
|
+
raise ValueError(f"Path does not exist: {path}")
|
|
87
|
+
elif fs.isfile(path):
|
|
88
|
+
if _is_image_file(path, extensions) and path not in seen:
|
|
89
|
+
seen.add(path)
|
|
90
|
+
pbar.update(1)
|
|
91
|
+
yield path
|
|
92
|
+
elif not _is_image_file(path, extensions):
|
|
93
|
+
raise ValueError(f"File is not an image: {path}")
|
|
94
|
+
elif fs.isdir(path):
|
|
95
|
+
for file_path in _stream_files_from_directory(fs, path, extensions):
|
|
96
|
+
if file_path not in seen:
|
|
97
|
+
seen.add(file_path)
|
|
98
|
+
pbar.update(1)
|
|
99
|
+
yield file_path
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _process_glob_pattern(
|
|
103
|
+
fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
|
|
104
|
+
) -> Iterator[str]:
|
|
105
|
+
"""Process glob pattern and yield matching image files.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
fs: The filesystem instance.
|
|
109
|
+
path: The glob pattern path.
|
|
110
|
+
seen: Set of already processed paths to avoid duplicates.
|
|
111
|
+
pbar: Progress bar instance for tracking progress.
|
|
112
|
+
extensions: Set of allowed file extensions.
|
|
113
|
+
|
|
114
|
+
Yields:
|
|
115
|
+
File paths that match the glob pattern and allowed extensions.
|
|
116
|
+
"""
|
|
117
|
+
matching_paths = fs.glob(path)
|
|
118
|
+
for p in matching_paths:
|
|
119
|
+
path_str = str(p)
|
|
120
|
+
if _needs_protocol_prefix(path_str, fs):
|
|
121
|
+
protocol = _get_protocol_string(fs)
|
|
122
|
+
path_str = f"{protocol}{PROTOCOL_SEPARATOR}{path_str}"
|
|
123
|
+
if fs.isfile(path_str) and _is_image_file(path_str, extensions) and path_str not in seen:
|
|
124
|
+
seen.add(path_str)
|
|
125
|
+
pbar.update(1)
|
|
126
|
+
yield path_str
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _stream_files_from_directory(
|
|
130
|
+
fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
|
|
131
|
+
) -> Iterator[str]:
|
|
132
|
+
"""Stream files from a directory with progress tracking.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
fs: The filesystem instance
|
|
136
|
+
path: Directory path to list
|
|
137
|
+
extensions: Set of allowed file extensions
|
|
138
|
+
|
|
139
|
+
Yields:
|
|
140
|
+
File paths as they are discovered
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
protocol = _get_protocol_string(fs)
|
|
144
|
+
if protocol in CLOUD_PROTOCOLS:
|
|
145
|
+
yield from _stream_files_using_walk(fs, path, extensions)
|
|
146
|
+
else:
|
|
147
|
+
try:
|
|
148
|
+
all_paths = fs.find(path, detail=False)
|
|
149
|
+
for p in all_paths:
|
|
150
|
+
if fs.isfile(p) and _is_image_file(p, extensions):
|
|
151
|
+
yield p
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logging.warning(f"fs.find() failed for {path}, trying alternative method: {e}")
|
|
154
|
+
yield from _stream_files_using_walk(fs, path, extensions)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logging.error(f"Error streaming files from '{path}': {e}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _stream_files_using_walk(
|
|
160
|
+
fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
|
|
161
|
+
) -> Iterator[str]:
|
|
162
|
+
"""Stream files using fs.walk() method.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
fs: The filesystem instance.
|
|
166
|
+
path: The directory path to walk.
|
|
167
|
+
extensions: Set of allowed file extensions.
|
|
168
|
+
|
|
169
|
+
Yields:
|
|
170
|
+
File paths that match the allowed extensions.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def add_protocol_if_needed(p: str) -> str:
|
|
174
|
+
if _needs_protocol_prefix(p, fs):
|
|
175
|
+
protocol = _get_protocol_string(fs)
|
|
176
|
+
return f"{protocol}{PROTOCOL_SEPARATOR}{p}"
|
|
177
|
+
return p
|
|
178
|
+
|
|
179
|
+
for root, _dirs, files in fs.walk(path):
|
|
180
|
+
for file in files:
|
|
181
|
+
if not root.endswith(PATH_SEPARATOR):
|
|
182
|
+
full_path = f"{root}{PATH_SEPARATOR}{file}"
|
|
183
|
+
else:
|
|
184
|
+
full_path = f"{root}{file}"
|
|
185
|
+
full_path = add_protocol_if_needed(full_path)
|
|
186
|
+
if _is_image_file(full_path, extensions):
|
|
187
|
+
yield full_path
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _get_filesystem(path: str) -> fsspec.AbstractFileSystem:
|
|
191
|
+
"""Get the appropriate filesystem for the given path.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
path: The path to determine the filesystem for. Can be local or remote.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
An fsspec filesystem instance appropriate for the path's protocol.
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
ValueError: If the protocol cannot be determined or is invalid.
|
|
201
|
+
"""
|
|
202
|
+
protocol = path.split(PROTOCOL_SEPARATOR)[0] if PROTOCOL_SEPARATOR in path else DEFAULT_PROTOCOL
|
|
203
|
+
|
|
204
|
+
# Ensure protocol is a string, not a tuple
|
|
205
|
+
if isinstance(protocol, (list, tuple)):
|
|
206
|
+
protocol = protocol[0]
|
|
207
|
+
|
|
208
|
+
return fsspec.filesystem(protocol)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _is_glob_pattern(path: str) -> bool:
|
|
212
|
+
"""Check if a path contains glob pattern characters.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
path: The path to check for glob patterns.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
True if the path contains glob pattern characters (*, ?, [, ]), False otherwise.
|
|
219
|
+
"""
|
|
220
|
+
return any(char in path for char in GLOB_CHARS)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _is_image_file(path: str, extensions: set[str]) -> bool:
|
|
224
|
+
"""Check if a file is an image based on its extension.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
path: The file path to check.
|
|
228
|
+
extensions: Set of allowed file extensions (e.g., {'.jpg', '.png'}).
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
True if the file has an allowed image extension, False otherwise.
|
|
232
|
+
"""
|
|
233
|
+
path_lower = path.lower()
|
|
234
|
+
return any(path_lower.endswith(ext) for ext in extensions)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _needs_protocol_prefix(path: str, fs: fsspec.AbstractFileSystem) -> bool:
|
|
238
|
+
"""Check if a path needs protocol prefix.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
path: The path to check.
|
|
242
|
+
fs: The filesystem instance.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
True if the path needs a protocol prefix (e.g., for cloud storage),
|
|
246
|
+
False if it is a local path.
|
|
247
|
+
"""
|
|
248
|
+
if PROTOCOL_SEPARATOR in path:
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
if not hasattr(fs, "protocol"):
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
|
|
255
|
+
# Handle case where protocol is a tuple (common with fsspec)
|
|
256
|
+
if isinstance(protocol, (list, tuple)):
|
|
257
|
+
protocol = protocol[0]
|
|
258
|
+
|
|
259
|
+
return str(protocol) != DEFAULT_PROTOCOL
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _get_protocol_string(fs: fsspec.AbstractFileSystem) -> str:
|
|
263
|
+
"""Get the protocol string from filesystem.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
fs: The filesystem instance.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
The protocol string (e.g., 's3', 'file', 'gcs').
|
|
270
|
+
Returns 'file' as default if protocol cannot be determined.
|
|
271
|
+
"""
|
|
272
|
+
protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
|
|
273
|
+
if isinstance(protocol, (list, tuple)):
|
|
274
|
+
return str(protocol[0])
|
|
275
|
+
return str(protocol)
|
lightly_studio/dataset/loader.py
CHANGED
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import Iterable
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
|
+
import fsspec
|
|
11
12
|
import PIL
|
|
12
13
|
from labelformat.formats import (
|
|
13
14
|
COCOInstanceSegmentationInput,
|
|
@@ -29,10 +30,10 @@ from labelformat.model.object_detection import (
|
|
|
29
30
|
from sqlmodel import Session
|
|
30
31
|
from tqdm import tqdm
|
|
31
32
|
|
|
32
|
-
from lightly_studio
|
|
33
|
+
from lightly_studio import db_manager
|
|
33
34
|
from lightly_studio.api.features import lightly_studio_active_features
|
|
34
35
|
from lightly_studio.api.server import Server
|
|
35
|
-
from lightly_studio.dataset import env
|
|
36
|
+
from lightly_studio.dataset import env, fsspec_lister
|
|
36
37
|
from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
|
|
37
38
|
from lightly_studio.dataset.embedding_manager import (
|
|
38
39
|
EmbeddingManager,
|
|
@@ -324,7 +325,6 @@ class DatasetLoader:
|
|
|
324
325
|
self,
|
|
325
326
|
dataset_name: str,
|
|
326
327
|
img_dir: str,
|
|
327
|
-
recursive: bool = True,
|
|
328
328
|
allowed_extensions: Iterable[str] = {
|
|
329
329
|
".png",
|
|
330
330
|
".jpg",
|
|
@@ -340,31 +340,22 @@ class DatasetLoader:
|
|
|
340
340
|
Args:
|
|
341
341
|
dataset_name: The name for the new dataset.
|
|
342
342
|
img_dir: Path to the folder containing the images.
|
|
343
|
-
recursive: If True, search for images recursively in subfolders.
|
|
344
343
|
allowed_extensions: An iterable container of allowed image file
|
|
345
344
|
extensions.
|
|
346
345
|
"""
|
|
347
|
-
img_dir_path = Path(img_dir).absolute()
|
|
348
|
-
if not img_dir_path.exists() or not img_dir_path.is_dir():
|
|
349
|
-
raise ValueError(f"Input images folder is not a valid directory: {img_dir_path}")
|
|
350
|
-
|
|
351
346
|
# Create dataset.
|
|
352
347
|
dataset = dataset_resolver.create(
|
|
353
348
|
session=self.session,
|
|
354
349
|
dataset=DatasetCreate(
|
|
355
350
|
name=dataset_name,
|
|
356
|
-
directory=
|
|
351
|
+
directory=img_dir,
|
|
357
352
|
),
|
|
358
353
|
)
|
|
359
354
|
|
|
360
|
-
# Collect image file paths.
|
|
355
|
+
# Collect image file paths with extension filtering.
|
|
361
356
|
allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
|
|
362
|
-
image_paths =
|
|
363
|
-
|
|
364
|
-
for path in path_iter:
|
|
365
|
-
if path.is_file() and path.suffix.lower() in allowed_extensions_set:
|
|
366
|
-
image_paths.append(path)
|
|
367
|
-
print(f"Found {len(image_paths)} images in {img_dir_path}.")
|
|
357
|
+
image_paths = list(fsspec_lister.iter_files_from_path(img_dir, allowed_extensions_set))
|
|
358
|
+
print(f"Found {len(image_paths)} images in {img_dir}.")
|
|
368
359
|
|
|
369
360
|
# Process images.
|
|
370
361
|
sample_ids = _create_samples_from_paths(
|
|
@@ -383,8 +374,37 @@ class DatasetLoader:
|
|
|
383
374
|
|
|
384
375
|
return dataset
|
|
385
376
|
|
|
377
|
+
def _validate_has_samples(self) -> None:
|
|
378
|
+
"""Validate that there are samples in the database before starting GUI.
|
|
379
|
+
|
|
380
|
+
Raises:
|
|
381
|
+
ValueError: If no samples are found in any dataset.
|
|
382
|
+
"""
|
|
383
|
+
# Check if any datasets exist
|
|
384
|
+
datasets = dataset_resolver.get_all(session=self.session, offset=0, limit=1)
|
|
385
|
+
|
|
386
|
+
if not datasets:
|
|
387
|
+
raise ValueError(
|
|
388
|
+
"No datasets found. Please load a dataset using one of the loader methods "
|
|
389
|
+
"(e.g., from_yolo(), from_directory(), etc.) before starting the GUI."
|
|
390
|
+
)
|
|
391
|
+
|
|
392
|
+
# Check if there are any samples in the first dataset
|
|
393
|
+
first_dataset = datasets[0]
|
|
394
|
+
sample_count = sample_resolver.count_by_dataset_id(
|
|
395
|
+
session=self.session, dataset_id=first_dataset.dataset_id
|
|
396
|
+
)
|
|
397
|
+
|
|
398
|
+
if sample_count == 0:
|
|
399
|
+
raise ValueError(
|
|
400
|
+
"No images have been indexed for the first dataset. "
|
|
401
|
+
"Please ensure your dataset contains valid images and try loading again."
|
|
402
|
+
)
|
|
403
|
+
|
|
386
404
|
def start_gui(self) -> None:
|
|
387
405
|
"""Launch the web interface for the loaded dataset."""
|
|
406
|
+
self._validate_has_samples()
|
|
407
|
+
|
|
388
408
|
server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
|
|
389
409
|
|
|
390
410
|
print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
|
|
@@ -395,7 +415,7 @@ class DatasetLoader:
|
|
|
395
415
|
def _create_samples_from_paths(
|
|
396
416
|
session: Session,
|
|
397
417
|
dataset_id: UUID,
|
|
398
|
-
image_paths: Iterable[
|
|
418
|
+
image_paths: Iterable[str],
|
|
399
419
|
) -> Iterator[UUID]:
|
|
400
420
|
"""Create samples from a list of image paths.
|
|
401
421
|
|
|
@@ -415,15 +435,14 @@ def _create_samples_from_paths(
|
|
|
415
435
|
unit=" images",
|
|
416
436
|
):
|
|
417
437
|
try:
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
image.close()
|
|
438
|
+
with fsspec.open(image_path, "rb") as file, PIL.Image.open(file) as img:
|
|
439
|
+
width, height = img.size
|
|
421
440
|
except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
|
|
422
441
|
continue
|
|
423
442
|
|
|
424
443
|
sample = SampleCreate(
|
|
425
|
-
file_name=image_path.name,
|
|
426
|
-
file_path_abs=
|
|
444
|
+
file_name=Path(image_path).name,
|
|
445
|
+
file_path_abs=image_path,
|
|
427
446
|
width=width,
|
|
428
447
|
height=height,
|
|
429
448
|
dataset_id=dataset_id,
|
|
@@ -477,10 +496,10 @@ def _process_object_detection_annotations(
|
|
|
477
496
|
sample_id=context.sample_id,
|
|
478
497
|
annotation_label_id=context.label_map[obj.category.id],
|
|
479
498
|
annotation_type="object_detection",
|
|
480
|
-
x=x,
|
|
481
|
-
y=y,
|
|
482
|
-
width=width,
|
|
483
|
-
height=height,
|
|
499
|
+
x=int(x),
|
|
500
|
+
y=int(y),
|
|
501
|
+
width=int(width),
|
|
502
|
+
height=int(height),
|
|
484
503
|
confidence=obj.confidence,
|
|
485
504
|
annotation_task_id=context.annotation_task_id,
|
|
486
505
|
)
|
|
@@ -512,10 +531,10 @@ def _process_instance_segmentation_annotations(
|
|
|
512
531
|
sample_id=context.sample_id,
|
|
513
532
|
annotation_label_id=context.label_map[obj.category.id],
|
|
514
533
|
annotation_type="instance_segmentation",
|
|
515
|
-
x=x,
|
|
516
|
-
y=y,
|
|
517
|
-
width=width,
|
|
518
|
-
height=height,
|
|
534
|
+
x=int(x),
|
|
535
|
+
y=int(y),
|
|
536
|
+
width=int(width),
|
|
537
|
+
height=int(height),
|
|
519
538
|
segmentation_mask=segmentation_rle,
|
|
520
539
|
annotation_task_id=context.annotation_task_id,
|
|
521
540
|
)
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Callable
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
|
+
import fsspec
|
|
10
11
|
import torch
|
|
11
12
|
from PIL import Image
|
|
12
13
|
from torch.utils.data import DataLoader, Dataset
|
|
@@ -31,7 +32,7 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
|
|
|
31
32
|
|
|
32
33
|
def __init__(
|
|
33
34
|
self,
|
|
34
|
-
filepaths: list[
|
|
35
|
+
filepaths: list[str],
|
|
35
36
|
preprocess: Callable[[Image.Image], torch.Tensor],
|
|
36
37
|
) -> None:
|
|
37
38
|
self.filepaths = filepaths
|
|
@@ -41,8 +42,9 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
|
|
|
41
42
|
return len(self.filepaths)
|
|
42
43
|
|
|
43
44
|
def __getitem__(self, idx: int) -> torch.Tensor:
|
|
44
|
-
|
|
45
|
-
|
|
45
|
+
with fsspec.open(self.filepaths[idx], "rb") as file:
|
|
46
|
+
image = Image.open(file).convert("RGB")
|
|
47
|
+
return self.preprocess(image)
|
|
46
48
|
|
|
47
49
|
|
|
48
50
|
class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
@@ -103,7 +105,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
|
103
105
|
embedding_list: list[float] = embedding.cpu().numpy().flatten().tolist()
|
|
104
106
|
return embedding_list
|
|
105
107
|
|
|
106
|
-
def embed_images(self, filepaths: list[
|
|
108
|
+
def embed_images(self, filepaths: list[str]) -> list[list[float]]:
|
|
107
109
|
"""Embed images with MobileCLIP.
|
|
108
110
|
|
|
109
111
|
Args:
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Module provides functions to initialize and manage the DuckDB."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Generator
|
|
9
|
+
|
|
10
|
+
from fastapi import Depends
|
|
11
|
+
from sqlalchemy.engine import Engine
|
|
12
|
+
from sqlalchemy.pool import Pool
|
|
13
|
+
from sqlmodel import Session, SQLModel, create_engine
|
|
14
|
+
from typing_extensions import Annotated
|
|
15
|
+
|
|
16
|
+
import lightly_studio.api.db_tables # noqa: F401, required for SQLModel to work properly
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class DatabaseEngine:
|
|
20
|
+
"""Database engine wrapper."""
|
|
21
|
+
|
|
22
|
+
_engine_url: str
|
|
23
|
+
_engine: Engine
|
|
24
|
+
_persistent_session: Session | None = None
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
engine_url: str | None = None,
|
|
29
|
+
cleanup_existing: bool = False,
|
|
30
|
+
poolclass: type[Pool] | None = None,
|
|
31
|
+
) -> None:
|
|
32
|
+
"""Create a new instance of the DatabaseEngine.
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
engine_url: The database engine URL. If None, defaults to a local DuckDB file.
|
|
36
|
+
cleanup_existing: If True, deletes the existing database file if it exists.
|
|
37
|
+
poolclass: The SQLAlchemy pool class to use. Use StaticPool for
|
|
38
|
+
in-memory databases for testing, otherwise different DB connections
|
|
39
|
+
connect to different in-memory databases.
|
|
40
|
+
"""
|
|
41
|
+
self._engine_url = engine_url if engine_url else "duckdb:///lightly_studio.db"
|
|
42
|
+
if cleanup_existing:
|
|
43
|
+
_cleanup_database_file(engine_url=self._engine_url)
|
|
44
|
+
self._engine = create_engine(url=self._engine_url, poolclass=poolclass)
|
|
45
|
+
SQLModel.metadata.create_all(self._engine)
|
|
46
|
+
|
|
47
|
+
@contextmanager
|
|
48
|
+
def session(self) -> Generator[Session, None, None]:
|
|
49
|
+
"""Create a short-lived database session. The session is autoclosed."""
|
|
50
|
+
# The code below addresses a foreign key constraint violation issue
|
|
51
|
+
# with DuckDB. See LIG-7527 for more details.
|
|
52
|
+
if self.get_persistent_session().in_transaction():
|
|
53
|
+
logging.debug("The persistent session is in transaction, committing changes.")
|
|
54
|
+
self.get_persistent_session().commit()
|
|
55
|
+
|
|
56
|
+
session = Session(self._engine, close_resets_only=False)
|
|
57
|
+
try:
|
|
58
|
+
yield session
|
|
59
|
+
session.commit()
|
|
60
|
+
except Exception:
|
|
61
|
+
session.rollback()
|
|
62
|
+
raise
|
|
63
|
+
finally:
|
|
64
|
+
session.close()
|
|
65
|
+
|
|
66
|
+
def get_persistent_session(self) -> Session:
|
|
67
|
+
"""Get the persistent database session."""
|
|
68
|
+
if self._persistent_session is None:
|
|
69
|
+
self._persistent_session = Session(self._engine, close_resets_only=False)
|
|
70
|
+
return self._persistent_session
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
# Global database engine instance instantiated lazily.
|
|
74
|
+
_engine: DatabaseEngine | None = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def get_engine() -> DatabaseEngine:
|
|
78
|
+
"""Get the database engine.
|
|
79
|
+
|
|
80
|
+
If the engine does not exist yet, it is newly created with the default settings.
|
|
81
|
+
In that case, a pre-existing database file is deleted.
|
|
82
|
+
"""
|
|
83
|
+
global _engine # noqa: PLW0603
|
|
84
|
+
if _engine is None:
|
|
85
|
+
_engine = DatabaseEngine(cleanup_existing=True)
|
|
86
|
+
return _engine
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def set_engine(engine: DatabaseEngine) -> None:
|
|
90
|
+
"""Set the database engine."""
|
|
91
|
+
global _engine # noqa: PLW0603
|
|
92
|
+
if _engine is not None:
|
|
93
|
+
raise RuntimeError("Database engine is already set and cannot be changed.")
|
|
94
|
+
_engine = engine
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def connect(db_file: str | None, cleanup_existing: bool = False) -> None:
|
|
98
|
+
"""Set up the database connection.
|
|
99
|
+
|
|
100
|
+
Helper function to set up the database engine.
|
|
101
|
+
|
|
102
|
+
Args:
|
|
103
|
+
db_file: The path to the DuckDB file. If None, uses a default, see DatabaseEngine class.
|
|
104
|
+
cleanup_existing: If True, deletes the pre-existing database file if a file database
|
|
105
|
+
is used.
|
|
106
|
+
"""
|
|
107
|
+
engine_url = f"duckdb:///{db_file}" if db_file is not None else None
|
|
108
|
+
engine = DatabaseEngine(engine_url=engine_url, cleanup_existing=cleanup_existing)
|
|
109
|
+
set_engine(engine=engine)
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
@contextmanager
|
|
113
|
+
def session() -> Generator[Session, None, None]:
|
|
114
|
+
"""Create a short-lived database session. The session is autoclosed."""
|
|
115
|
+
with get_engine().session() as session:
|
|
116
|
+
yield session
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def persistent_session() -> Session:
|
|
120
|
+
"""Create a persistent session."""
|
|
121
|
+
return get_engine().get_persistent_session()
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
def _cleanup_database_file(engine_url: str) -> None:
|
|
125
|
+
"""Delete database file if it exists.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
engine_url: The database engine URL
|
|
129
|
+
"""
|
|
130
|
+
db_file = Path(engine_url.replace("duckdb:///", ""))
|
|
131
|
+
if db_file.exists() and db_file.is_file():
|
|
132
|
+
db_file.unlink()
|
|
133
|
+
logging.info(f"Deleted existing database: {db_file}")
|
|
134
|
+
|
|
135
|
+
|
|
136
|
+
def _session_dependency() -> Generator[Session, None, None]:
|
|
137
|
+
"""Session dependency for FastAPI routes.
|
|
138
|
+
|
|
139
|
+
We need to convert the context manager to a generator.
|
|
140
|
+
"""
|
|
141
|
+
with session() as sess:
|
|
142
|
+
yield sess
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
SessionDep = Annotated[Session, Depends(_session_dependency)]
|
lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
.spinner.svelte-18zpdab{fill:hsl(var(--primary) / var(--tw-bg-opacity, 1))}[data-select-viewport],[data-combobox-viewport]{scrollbar-width:none!important;-ms-overflow-style:none!important;-webkit-overflow-scrolling:touch!important}[data-combobox-viewport]::-webkit-scrollbar{display:none!important}[data-select-viewport]::-webkit-scrollbar{display:none!important}.crop.svelte-20k33o{position:relative;overflow:hidden}.annotation-box.svelte-20k33o{position:absolute;border:1px solid rgba(0,0,0,0);box-sizing:content-box}.annotation-label.svelte-20k33o{position:absolute;transform:translate3d(-1px,-100%,0);padding:1px 6px 2px;white-space:nowrap;cursor:pointer}.annotation-selected.svelte-20k33o{outline:drop-shadow(1px 1px 1px hsl(var(--primary))) drop-shadow(1px -1px 1px hsl(var(--primary))) drop-shadow(-1px -1px 1px hsl(var(--primary))) drop-shadow(-1px 1px 1px hsl(var(--primary)))}.sample-image.svelte-1c6bz0a{width:var(--sample-width);height:var(--sample-height);-o-object-fit:var(--object-fit);object-fit:var(--object-fit)}.viewport.svelte-gzepom{overflow-y:hidden}.group.svelte-15p83yz{outline:0}
|
lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
.spinner.svelte-18zpdab{fill:hsl(var(--primary) / var(--tw-bg-opacity, 1))}.crop.svelte-20k33o{position:relative;overflow:hidden}.annotation-box.svelte-20k33o{position:absolute;border:1px solid rgba(0,0,0,0);box-sizing:content-box}.annotation-label.svelte-20k33o{position:absolute;transform:translate3d(-1px,-100%,0);padding:1px 6px 2px;white-space:nowrap;cursor:pointer}.annotation-selected.svelte-20k33o{outline:drop-shadow(1px 1px 1px hsl(var(--primary))) drop-shadow(1px -1px 1px hsl(var(--primary))) drop-shadow(-1px -1px 1px hsl(var(--primary))) drop-shadow(-1px 1px 1px hsl(var(--primary)))}.sample-image.svelte-1c6bz0a{width:var(--sample-width);height:var(--sample-height);-o-object-fit:var(--object-fit);object-fit:var(--object-fit)}.viewport.svelte-gzepom{overflow-y:hidden}.group.svelte-15p83yz{outline:0}
|