lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of lightly-studio might be problematic. Click here for more details.
- lightly_studio/__init__.py +4 -4
- lightly_studio/api/app.py +7 -5
- lightly_studio/api/db_tables.py +0 -3
- lightly_studio/api/routes/api/annotation.py +32 -16
- lightly_studio/api/routes/api/annotation_label.py +2 -5
- lightly_studio/api/routes/api/annotations/__init__.py +7 -0
- lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
- lightly_studio/api/routes/api/classifier.py +2 -5
- lightly_studio/api/routes/api/dataset.py +5 -8
- lightly_studio/api/routes/api/dataset_tag.py +2 -3
- lightly_studio/api/routes/api/embeddings2d.py +104 -0
- lightly_studio/api/routes/api/export.py +73 -0
- lightly_studio/api/routes/api/metadata.py +2 -4
- lightly_studio/api/routes/api/sample.py +5 -13
- lightly_studio/api/routes/api/selection.py +87 -0
- lightly_studio/api/routes/api/settings.py +2 -6
- lightly_studio/api/routes/images.py +6 -6
- lightly_studio/core/add_samples.py +374 -0
- lightly_studio/core/dataset.py +272 -400
- lightly_studio/core/dataset_query/boolean_expression.py +67 -0
- lightly_studio/core/dataset_query/dataset_query.py +216 -0
- lightly_studio/core/dataset_query/field.py +113 -0
- lightly_studio/core/dataset_query/field_expression.py +79 -0
- lightly_studio/core/dataset_query/match_expression.py +23 -0
- lightly_studio/core/dataset_query/order_by.py +79 -0
- lightly_studio/core/dataset_query/sample_field.py +28 -0
- lightly_studio/core/dataset_query/tags_expression.py +46 -0
- lightly_studio/core/sample.py +159 -32
- lightly_studio/core/start_gui.py +35 -0
- lightly_studio/dataset/edge_embedding_generator.py +13 -8
- lightly_studio/dataset/embedding_generator.py +2 -3
- lightly_studio/dataset/embedding_manager.py +74 -6
- lightly_studio/dataset/env.py +4 -0
- lightly_studio/dataset/file_utils.py +13 -2
- lightly_studio/dataset/fsspec_lister.py +275 -0
- lightly_studio/dataset/loader.py +49 -84
- lightly_studio/dataset/mobileclip_embedding_generator.py +9 -6
- lightly_studio/db_manager.py +145 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.CA_CXIBb.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.DS78jgNY.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/index.BVs_sZj9.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.D487hwJk.css +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/6t3IJ0vQ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → 8NsknIT2.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → BND_-4Kp.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → BdfTHw61.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BfHVnyNT.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BjkP1AHA.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BuuNVL9G.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → BzKGpnl4.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CCx7Ho51.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → CH6P3X75.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D5-A_Ffd.js → CR2upx_Q.js} +2 -2
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWPZrTTJ.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Cs1XmhiF.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → CwPowJfP.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxFKfZ9T.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cxevwdid.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → D4whDBUi.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6r9vr07.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DA6bFLPR.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DEgUu98i.js +3 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DGTPl6Gk.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DKGxBSlK.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQXoLcsF.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQe_kdRt.js +92 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcY4jgG3.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → RmD8FzRo.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/V-MnMC1X.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → keKYsoph.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BVr6DYqP.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.u7zsVvqp.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.Da2agmdd.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1.B11tVRJV.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.l30Zud4h.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CgKPGcAP.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C8HLK8mj.js +857 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.CLvg3QcJ.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BQhDtXUI.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.-6XqWX5G.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.uBV1Lhat.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.BXsgoQZh.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BkbcnUs8.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.Bkrv-Vww.js} +1 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
- lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
- lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
- lightly_studio/examples/example.py +13 -12
- lightly_studio/examples/example_coco.py +13 -0
- lightly_studio/examples/example_metadata.py +83 -98
- lightly_studio/examples/example_selection.py +7 -19
- lightly_studio/examples/example_split_work.py +12 -36
- lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
- lightly_studio/export/export_dataset.py +65 -0
- lightly_studio/export/lightly_studio_label_input.py +120 -0
- lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
- lightly_studio/metadata/compute_typicality.py +67 -0
- lightly_studio/models/annotation/annotation_base.py +18 -20
- lightly_studio/models/annotation/instance_segmentation.py +8 -8
- lightly_studio/models/annotation/object_detection.py +4 -4
- lightly_studio/models/dataset.py +6 -2
- lightly_studio/models/sample.py +10 -3
- lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
- lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
- lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
- lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
- lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
- lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
- lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
- lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
- lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
- lightly_studio/resolvers/dataset_resolver.py +10 -0
- lightly_studio/resolvers/embedding_model_resolver.py +22 -0
- lightly_studio/resolvers/sample_resolver.py +53 -9
- lightly_studio/resolvers/tag_resolver.py +23 -0
- lightly_studio/selection/mundig.py +7 -10
- lightly_studio/selection/select.py +55 -46
- lightly_studio/selection/select_via_db.py +23 -19
- lightly_studio/selection/selection_config.py +10 -4
- lightly_studio/services/annotations_service/__init__.py +12 -0
- lightly_studio/services/annotations_service/create_annotation.py +63 -0
- lightly_studio/services/annotations_service/delete_annotation.py +22 -0
- lightly_studio/services/annotations_service/update_annotation.py +21 -32
- lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
- lightly_studio-0.3.3.dist-info/METADATA +814 -0
- {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/RECORD +130 -113
- lightly_studio/api/db.py +0 -133
- lightly_studio/api/routes/api/annotation_task.py +0 -38
- lightly_studio/api/routes/api/metrics.py +0 -80
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
- lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
- lightly_studio/metrics/detection/__init__.py +0 -0
- lightly_studio/metrics/detection/map.py +0 -268
- lightly_studio/models/annotation_task.py +0 -28
- lightly_studio/resolvers/annotation_resolver/create.py +0 -19
- lightly_studio/resolvers/annotation_task_resolver.py +0 -31
- lightly_studio-0.3.1.dist-info/METADATA +0 -520
- /lightly_studio/{metrics → core/dataset_query}/__init__.py +0 -0
- /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
- {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,275 @@
|
|
|
1
|
+
"""File listing utilities using fsspec.
|
|
2
|
+
|
|
3
|
+
Handles local and remote paths, directories, and glob patterns.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from __future__ import annotations
|
|
7
|
+
|
|
8
|
+
import logging
|
|
9
|
+
from collections.abc import Iterator
|
|
10
|
+
from typing import Any
|
|
11
|
+
|
|
12
|
+
import fsspec
|
|
13
|
+
from tqdm import tqdm
|
|
14
|
+
|
|
15
|
+
# Constants
|
|
16
|
+
PROTOCOL_SEPARATOR = "://"
|
|
17
|
+
DEFAULT_PROTOCOL = "file"
|
|
18
|
+
PATH_SEPARATOR = "/"
|
|
19
|
+
|
|
20
|
+
# Glob pattern characters
|
|
21
|
+
GLOB_CHARS = ["*", "?", "[", "]"]
|
|
22
|
+
|
|
23
|
+
# Cloud storage protocols
|
|
24
|
+
CLOUD_PROTOCOLS = ("s3", "gs", "gcs", "azure", "abfs")
|
|
25
|
+
|
|
26
|
+
# Image file extensions
|
|
27
|
+
IMAGE_EXTENSIONS = {
|
|
28
|
+
".png",
|
|
29
|
+
".jpg",
|
|
30
|
+
".jpeg",
|
|
31
|
+
".gif",
|
|
32
|
+
".webp",
|
|
33
|
+
".bmp",
|
|
34
|
+
".tiff",
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def iter_files_from_path(path: str, allowed_extensions: set[str] | None = None) -> Iterator[str]:
|
|
39
|
+
"""List all files from a single path, handling directories, globs, and individual files.
|
|
40
|
+
|
|
41
|
+
Args:
|
|
42
|
+
path: A single path which can be:
|
|
43
|
+
- Individual file path
|
|
44
|
+
- Directory path (will list all files recursively)
|
|
45
|
+
- Glob pattern
|
|
46
|
+
- Remote path (s3://, gcs://, etc.)
|
|
47
|
+
allowed_extensions: Optional set of allowed file extensions (e.g., {".jpg", ".png"}).
|
|
48
|
+
If None, uses default IMAGE_EXTENSIONS.
|
|
49
|
+
|
|
50
|
+
Yields:
|
|
51
|
+
File paths as they are discovered, with progress tracking
|
|
52
|
+
"""
|
|
53
|
+
seen: set[str] = set()
|
|
54
|
+
extensions = allowed_extensions or IMAGE_EXTENSIONS
|
|
55
|
+
with tqdm(desc="Discovering files", unit=" files", dynamic_ncols=True) as pbar:
|
|
56
|
+
cleaned_path = str(path).strip()
|
|
57
|
+
if not cleaned_path:
|
|
58
|
+
return
|
|
59
|
+
fs = _get_filesystem(cleaned_path)
|
|
60
|
+
yield from _process_single_path_streaming(fs, cleaned_path, seen, pbar, extensions)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def _process_single_path_streaming(
|
|
64
|
+
fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
|
|
65
|
+
) -> Iterator[str]:
|
|
66
|
+
"""Process a single path and yield matching image files.
|
|
67
|
+
|
|
68
|
+
Handles different path types: individual files, directories, and glob patterns.
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
fs: The filesystem instance.
|
|
72
|
+
path: The path to process (file, directory, or glob pattern).
|
|
73
|
+
seen: Set of already processed paths to avoid duplicates.
|
|
74
|
+
pbar: Progress bar instance for tracking progress.
|
|
75
|
+
extensions: Set of allowed file extensions.
|
|
76
|
+
|
|
77
|
+
Yields:
|
|
78
|
+
File paths that match the criteria.
|
|
79
|
+
|
|
80
|
+
Raises:
|
|
81
|
+
ValueError: If the path doesn't exist or is not an image file when expected.
|
|
82
|
+
"""
|
|
83
|
+
if _is_glob_pattern(path):
|
|
84
|
+
yield from _process_glob_pattern(fs, path, seen, pbar, extensions)
|
|
85
|
+
elif not fs.exists(path):
|
|
86
|
+
raise ValueError(f"Path does not exist: {path}")
|
|
87
|
+
elif fs.isfile(path):
|
|
88
|
+
if _is_image_file(path, extensions) and path not in seen:
|
|
89
|
+
seen.add(path)
|
|
90
|
+
pbar.update(1)
|
|
91
|
+
yield path
|
|
92
|
+
elif not _is_image_file(path, extensions):
|
|
93
|
+
raise ValueError(f"File is not an image: {path}")
|
|
94
|
+
elif fs.isdir(path):
|
|
95
|
+
for file_path in _stream_files_from_directory(fs, path, extensions):
|
|
96
|
+
if file_path not in seen:
|
|
97
|
+
seen.add(file_path)
|
|
98
|
+
pbar.update(1)
|
|
99
|
+
yield file_path
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _process_glob_pattern(
|
|
103
|
+
fs: fsspec.AbstractFileSystem, path: str, seen: set[str], pbar: tqdm[Any], extensions: set[str]
|
|
104
|
+
) -> Iterator[str]:
|
|
105
|
+
"""Process glob pattern and yield matching image files.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
fs: The filesystem instance.
|
|
109
|
+
path: The glob pattern path.
|
|
110
|
+
seen: Set of already processed paths to avoid duplicates.
|
|
111
|
+
pbar: Progress bar instance for tracking progress.
|
|
112
|
+
extensions: Set of allowed file extensions.
|
|
113
|
+
|
|
114
|
+
Yields:
|
|
115
|
+
File paths that match the glob pattern and allowed extensions.
|
|
116
|
+
"""
|
|
117
|
+
matching_paths = fs.glob(path)
|
|
118
|
+
for p in matching_paths:
|
|
119
|
+
path_str = str(p)
|
|
120
|
+
if _needs_protocol_prefix(path_str, fs):
|
|
121
|
+
protocol = _get_protocol_string(fs)
|
|
122
|
+
path_str = f"{protocol}{PROTOCOL_SEPARATOR}{path_str}"
|
|
123
|
+
if fs.isfile(path_str) and _is_image_file(path_str, extensions) and path_str not in seen:
|
|
124
|
+
seen.add(path_str)
|
|
125
|
+
pbar.update(1)
|
|
126
|
+
yield path_str
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _stream_files_from_directory(
|
|
130
|
+
fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
|
|
131
|
+
) -> Iterator[str]:
|
|
132
|
+
"""Stream files from a directory with progress tracking.
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
fs: The filesystem instance
|
|
136
|
+
path: Directory path to list
|
|
137
|
+
extensions: Set of allowed file extensions
|
|
138
|
+
|
|
139
|
+
Yields:
|
|
140
|
+
File paths as they are discovered
|
|
141
|
+
"""
|
|
142
|
+
try:
|
|
143
|
+
protocol = _get_protocol_string(fs)
|
|
144
|
+
if protocol in CLOUD_PROTOCOLS:
|
|
145
|
+
yield from _stream_files_using_walk(fs, path, extensions)
|
|
146
|
+
else:
|
|
147
|
+
try:
|
|
148
|
+
all_paths = fs.find(path, detail=False)
|
|
149
|
+
for p in all_paths:
|
|
150
|
+
if fs.isfile(p) and _is_image_file(p, extensions):
|
|
151
|
+
yield p
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logging.warning(f"fs.find() failed for {path}, trying alternative method: {e}")
|
|
154
|
+
yield from _stream_files_using_walk(fs, path, extensions)
|
|
155
|
+
except Exception as e:
|
|
156
|
+
logging.error(f"Error streaming files from '{path}': {e}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def _stream_files_using_walk(
|
|
160
|
+
fs: fsspec.AbstractFileSystem, path: str, extensions: set[str]
|
|
161
|
+
) -> Iterator[str]:
|
|
162
|
+
"""Stream files using fs.walk() method.
|
|
163
|
+
|
|
164
|
+
Args:
|
|
165
|
+
fs: The filesystem instance.
|
|
166
|
+
path: The directory path to walk.
|
|
167
|
+
extensions: Set of allowed file extensions.
|
|
168
|
+
|
|
169
|
+
Yields:
|
|
170
|
+
File paths that match the allowed extensions.
|
|
171
|
+
"""
|
|
172
|
+
|
|
173
|
+
def add_protocol_if_needed(p: str) -> str:
|
|
174
|
+
if _needs_protocol_prefix(p, fs):
|
|
175
|
+
protocol = _get_protocol_string(fs)
|
|
176
|
+
return f"{protocol}{PROTOCOL_SEPARATOR}{p}"
|
|
177
|
+
return p
|
|
178
|
+
|
|
179
|
+
for root, _dirs, files in fs.walk(path):
|
|
180
|
+
for file in files:
|
|
181
|
+
if not root.endswith(PATH_SEPARATOR):
|
|
182
|
+
full_path = f"{root}{PATH_SEPARATOR}{file}"
|
|
183
|
+
else:
|
|
184
|
+
full_path = f"{root}{file}"
|
|
185
|
+
full_path = add_protocol_if_needed(full_path)
|
|
186
|
+
if _is_image_file(full_path, extensions):
|
|
187
|
+
yield full_path
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def _get_filesystem(path: str) -> fsspec.AbstractFileSystem:
|
|
191
|
+
"""Get the appropriate filesystem for the given path.
|
|
192
|
+
|
|
193
|
+
Args:
|
|
194
|
+
path: The path to determine the filesystem for. Can be local or remote.
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
An fsspec filesystem instance appropriate for the path's protocol.
|
|
198
|
+
|
|
199
|
+
Raises:
|
|
200
|
+
ValueError: If the protocol cannot be determined or is invalid.
|
|
201
|
+
"""
|
|
202
|
+
protocol = path.split(PROTOCOL_SEPARATOR)[0] if PROTOCOL_SEPARATOR in path else DEFAULT_PROTOCOL
|
|
203
|
+
|
|
204
|
+
# Ensure protocol is a string, not a tuple
|
|
205
|
+
if isinstance(protocol, (list, tuple)):
|
|
206
|
+
protocol = protocol[0]
|
|
207
|
+
|
|
208
|
+
return fsspec.filesystem(protocol)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
def _is_glob_pattern(path: str) -> bool:
|
|
212
|
+
"""Check if a path contains glob pattern characters.
|
|
213
|
+
|
|
214
|
+
Args:
|
|
215
|
+
path: The path to check for glob patterns.
|
|
216
|
+
|
|
217
|
+
Returns:
|
|
218
|
+
True if the path contains glob pattern characters (*, ?, [, ]), False otherwise.
|
|
219
|
+
"""
|
|
220
|
+
return any(char in path for char in GLOB_CHARS)
|
|
221
|
+
|
|
222
|
+
|
|
223
|
+
def _is_image_file(path: str, extensions: set[str]) -> bool:
|
|
224
|
+
"""Check if a file is an image based on its extension.
|
|
225
|
+
|
|
226
|
+
Args:
|
|
227
|
+
path: The file path to check.
|
|
228
|
+
extensions: Set of allowed file extensions (e.g., {'.jpg', '.png'}).
|
|
229
|
+
|
|
230
|
+
Returns:
|
|
231
|
+
True if the file has an allowed image extension, False otherwise.
|
|
232
|
+
"""
|
|
233
|
+
path_lower = path.lower()
|
|
234
|
+
return any(path_lower.endswith(ext) for ext in extensions)
|
|
235
|
+
|
|
236
|
+
|
|
237
|
+
def _needs_protocol_prefix(path: str, fs: fsspec.AbstractFileSystem) -> bool:
|
|
238
|
+
"""Check if a path needs protocol prefix.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
path: The path to check.
|
|
242
|
+
fs: The filesystem instance.
|
|
243
|
+
|
|
244
|
+
Returns:
|
|
245
|
+
True if the path needs a protocol prefix (e.g., for cloud storage),
|
|
246
|
+
False if it is a local path.
|
|
247
|
+
"""
|
|
248
|
+
if PROTOCOL_SEPARATOR in path:
|
|
249
|
+
return False
|
|
250
|
+
|
|
251
|
+
if not hasattr(fs, "protocol"):
|
|
252
|
+
return False
|
|
253
|
+
|
|
254
|
+
protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
|
|
255
|
+
# Handle case where protocol is a tuple (common with fsspec)
|
|
256
|
+
if isinstance(protocol, (list, tuple)):
|
|
257
|
+
protocol = protocol[0]
|
|
258
|
+
|
|
259
|
+
return str(protocol) != DEFAULT_PROTOCOL
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def _get_protocol_string(fs: fsspec.AbstractFileSystem) -> str:
|
|
263
|
+
"""Get the protocol string from filesystem.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
fs: The filesystem instance.
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
The protocol string (e.g., 's3', 'file', 'gcs').
|
|
270
|
+
Returns 'file' as default if protocol cannot be determined.
|
|
271
|
+
"""
|
|
272
|
+
protocol = getattr(fs, "protocol", DEFAULT_PROTOCOL)
|
|
273
|
+
if isinstance(protocol, (list, tuple)):
|
|
274
|
+
return str(protocol[0])
|
|
275
|
+
return str(protocol)
|
lightly_studio/dataset/loader.py
CHANGED
|
@@ -8,6 +8,7 @@ from pathlib import Path
|
|
|
8
8
|
from typing import Iterable
|
|
9
9
|
from uuid import UUID
|
|
10
10
|
|
|
11
|
+
import fsspec
|
|
11
12
|
import PIL
|
|
12
13
|
from labelformat.formats import (
|
|
13
14
|
COCOInstanceSegmentationInput,
|
|
@@ -29,10 +30,10 @@ from labelformat.model.object_detection import (
|
|
|
29
30
|
from sqlmodel import Session
|
|
30
31
|
from tqdm import tqdm
|
|
31
32
|
|
|
32
|
-
from lightly_studio
|
|
33
|
+
from lightly_studio import db_manager
|
|
33
34
|
from lightly_studio.api.features import lightly_studio_active_features
|
|
34
35
|
from lightly_studio.api.server import Server
|
|
35
|
-
from lightly_studio.dataset import env
|
|
36
|
+
from lightly_studio.dataset import env, fsspec_lister
|
|
36
37
|
from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
|
|
37
38
|
from lightly_studio.dataset.embedding_manager import (
|
|
38
39
|
EmbeddingManager,
|
|
@@ -40,16 +41,11 @@ from lightly_studio.dataset.embedding_manager import (
|
|
|
40
41
|
)
|
|
41
42
|
from lightly_studio.models.annotation.annotation_base import AnnotationCreate
|
|
42
43
|
from lightly_studio.models.annotation_label import AnnotationLabelCreate
|
|
43
|
-
from lightly_studio.models.annotation_task import (
|
|
44
|
-
AnnotationTaskTable,
|
|
45
|
-
AnnotationType,
|
|
46
|
-
)
|
|
47
44
|
from lightly_studio.models.dataset import DatasetCreate, DatasetTable
|
|
48
45
|
from lightly_studio.models.sample import SampleCreate, SampleTable
|
|
49
46
|
from lightly_studio.resolvers import (
|
|
50
47
|
annotation_label_resolver,
|
|
51
48
|
annotation_resolver,
|
|
52
|
-
annotation_task_resolver,
|
|
53
49
|
dataset_resolver,
|
|
54
50
|
sample_resolver,
|
|
55
51
|
)
|
|
@@ -66,7 +62,6 @@ class AnnotationProcessingContext:
|
|
|
66
62
|
dataset_id: UUID
|
|
67
63
|
sample_id: UUID
|
|
68
64
|
label_map: dict[int, UUID]
|
|
69
|
-
annotation_task_id: UUID
|
|
70
65
|
|
|
71
66
|
|
|
72
67
|
class DatasetLoader:
|
|
@@ -82,7 +77,6 @@ class DatasetLoader:
|
|
|
82
77
|
dataset: DatasetTable,
|
|
83
78
|
input_labels: ObjectDetectionInput | InstanceSegmentationInput,
|
|
84
79
|
img_dir: Path,
|
|
85
|
-
annotation_task_id: UUID,
|
|
86
80
|
) -> None:
|
|
87
81
|
"""Store a loaded dataset in database."""
|
|
88
82
|
# Create label mapping
|
|
@@ -119,7 +113,6 @@ class DatasetLoader:
|
|
|
119
113
|
samples_data=samples_image_data,
|
|
120
114
|
dataset_id=dataset.dataset_id,
|
|
121
115
|
label_map=label_map,
|
|
122
|
-
annotation_task_id=annotation_task_id,
|
|
123
116
|
annotations_to_create=annotations_to_create,
|
|
124
117
|
sample_ids=sample_ids,
|
|
125
118
|
)
|
|
@@ -136,7 +129,6 @@ class DatasetLoader:
|
|
|
136
129
|
samples_data=samples_image_data,
|
|
137
130
|
dataset_id=dataset.dataset_id,
|
|
138
131
|
label_map=label_map,
|
|
139
|
-
annotation_task_id=annotation_task_id,
|
|
140
132
|
annotations_to_create=annotations_to_create,
|
|
141
133
|
sample_ids=sample_ids,
|
|
142
134
|
)
|
|
@@ -187,23 +179,18 @@ class DatasetLoader:
|
|
|
187
179
|
input_labels=label_input,
|
|
188
180
|
dataset_name=dataset_name,
|
|
189
181
|
img_dir=str(img_dir),
|
|
190
|
-
is_prediction=False,
|
|
191
|
-
task_name=task_name,
|
|
192
182
|
)
|
|
193
183
|
|
|
194
184
|
def from_coco_object_detections(
|
|
195
185
|
self,
|
|
196
186
|
annotations_json_path: str,
|
|
197
187
|
img_dir: str,
|
|
198
|
-
task_name: str | None = None,
|
|
199
188
|
) -> DatasetTable:
|
|
200
189
|
"""Load a dataset in COCO Object Detection format and store in DB.
|
|
201
190
|
|
|
202
191
|
Args:
|
|
203
192
|
annotations_json_path: Path to the COCO annotations JSON file.
|
|
204
193
|
img_dir: Path to the folder containing the images.
|
|
205
|
-
task_name: Optional name for the annotation task. If None, a
|
|
206
|
-
default name is generated.
|
|
207
194
|
|
|
208
195
|
Returns:
|
|
209
196
|
DatasetTable: The created dataset table entry.
|
|
@@ -211,9 +198,6 @@ class DatasetLoader:
|
|
|
211
198
|
annotations_json = Path(annotations_json_path)
|
|
212
199
|
dataset_name = annotations_json.parent.name
|
|
213
200
|
|
|
214
|
-
if task_name is None:
|
|
215
|
-
task_name = f"Loaded from COCO Object Detection: {annotations_json.name}"
|
|
216
|
-
|
|
217
201
|
label_input = COCOObjectDetectionInput(
|
|
218
202
|
input_file=annotations_json,
|
|
219
203
|
)
|
|
@@ -223,23 +207,18 @@ class DatasetLoader:
|
|
|
223
207
|
input_labels=label_input,
|
|
224
208
|
dataset_name=dataset_name,
|
|
225
209
|
img_dir=str(img_dir_path),
|
|
226
|
-
is_prediction=False,
|
|
227
|
-
task_name=task_name,
|
|
228
210
|
)
|
|
229
211
|
|
|
230
212
|
def from_coco_instance_segmentations(
|
|
231
213
|
self,
|
|
232
214
|
annotations_json_path: str,
|
|
233
215
|
img_dir: str,
|
|
234
|
-
task_name: str | None = None,
|
|
235
216
|
) -> DatasetTable:
|
|
236
217
|
"""Load a dataset in COCO Instance Segmentation format and store in DB.
|
|
237
218
|
|
|
238
219
|
Args:
|
|
239
220
|
annotations_json_path: Path to the COCO annotations JSON file.
|
|
240
221
|
img_dir: Path to the folder containing the images.
|
|
241
|
-
task_name: Optional name for the annotation task. If None, a
|
|
242
|
-
default name is generated.
|
|
243
222
|
|
|
244
223
|
Returns:
|
|
245
224
|
DatasetTable: The created dataset table entry.
|
|
@@ -247,9 +226,6 @@ class DatasetLoader:
|
|
|
247
226
|
annotations_json = Path(annotations_json_path)
|
|
248
227
|
dataset_name = annotations_json.parent.name
|
|
249
228
|
|
|
250
|
-
if task_name is None:
|
|
251
|
-
task_name = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
|
|
252
|
-
|
|
253
229
|
label_input = COCOInstanceSegmentationInput(
|
|
254
230
|
input_file=annotations_json,
|
|
255
231
|
)
|
|
@@ -259,8 +235,6 @@ class DatasetLoader:
|
|
|
259
235
|
input_labels=label_input,
|
|
260
236
|
dataset_name=dataset_name,
|
|
261
237
|
img_dir=str(img_dir_path),
|
|
262
|
-
is_prediction=False,
|
|
263
|
-
task_name=task_name,
|
|
264
238
|
)
|
|
265
239
|
|
|
266
240
|
def from_labelformat(
|
|
@@ -268,8 +242,6 @@ class DatasetLoader:
|
|
|
268
242
|
input_labels: ObjectDetectionInput | InstanceSegmentationInput,
|
|
269
243
|
dataset_name: str,
|
|
270
244
|
img_dir: str,
|
|
271
|
-
is_prediction: bool = True,
|
|
272
|
-
task_name: str | None = None,
|
|
273
245
|
) -> DatasetTable:
|
|
274
246
|
"""Load a dataset from a labelformat object and store in database.
|
|
275
247
|
|
|
@@ -277,24 +249,12 @@ class DatasetLoader:
|
|
|
277
249
|
input_labels: The labelformat input object.
|
|
278
250
|
dataset_name: The name for the new dataset.
|
|
279
251
|
img_dir: Path to the folder containing the images.
|
|
280
|
-
is_prediction: Whether the task is for prediction or labels.
|
|
281
|
-
task_name: Optional name for the annotation task. If None, a
|
|
282
|
-
default name is generated.
|
|
283
252
|
|
|
284
253
|
Returns:
|
|
285
254
|
DatasetTable: The created dataset table entry.
|
|
286
255
|
"""
|
|
287
256
|
img_dir_path = Path(img_dir).absolute()
|
|
288
257
|
|
|
289
|
-
# Determine annotation type based on input.
|
|
290
|
-
# Currently, we always create BBOX tasks, even for segmentation,
|
|
291
|
-
# as segmentation data is stored alongside bounding boxes.
|
|
292
|
-
annotation_type = AnnotationType.BBOX
|
|
293
|
-
|
|
294
|
-
# Generate a default task name if none is provided.
|
|
295
|
-
if task_name is None:
|
|
296
|
-
task_name = f"Loaded from labelformat: {dataset_name}"
|
|
297
|
-
|
|
298
258
|
# Create dataset and annotation task.
|
|
299
259
|
dataset = dataset_resolver.create(
|
|
300
260
|
session=self.session,
|
|
@@ -303,20 +263,11 @@ class DatasetLoader:
|
|
|
303
263
|
directory=str(img_dir_path),
|
|
304
264
|
),
|
|
305
265
|
)
|
|
306
|
-
new_annotation_task = annotation_task_resolver.create(
|
|
307
|
-
session=self.session,
|
|
308
|
-
annotation_task=AnnotationTaskTable(
|
|
309
|
-
name=task_name,
|
|
310
|
-
annotation_type=annotation_type,
|
|
311
|
-
is_prediction=is_prediction,
|
|
312
|
-
),
|
|
313
|
-
)
|
|
314
266
|
|
|
315
267
|
self._load_into_dataset(
|
|
316
268
|
dataset=dataset,
|
|
317
269
|
input_labels=input_labels,
|
|
318
270
|
img_dir=img_dir_path,
|
|
319
|
-
annotation_task_id=new_annotation_task.annotation_task_id,
|
|
320
271
|
)
|
|
321
272
|
return dataset
|
|
322
273
|
|
|
@@ -324,7 +275,6 @@ class DatasetLoader:
|
|
|
324
275
|
self,
|
|
325
276
|
dataset_name: str,
|
|
326
277
|
img_dir: str,
|
|
327
|
-
recursive: bool = True,
|
|
328
278
|
allowed_extensions: Iterable[str] = {
|
|
329
279
|
".png",
|
|
330
280
|
".jpg",
|
|
@@ -340,31 +290,22 @@ class DatasetLoader:
|
|
|
340
290
|
Args:
|
|
341
291
|
dataset_name: The name for the new dataset.
|
|
342
292
|
img_dir: Path to the folder containing the images.
|
|
343
|
-
recursive: If True, search for images recursively in subfolders.
|
|
344
293
|
allowed_extensions: An iterable container of allowed image file
|
|
345
294
|
extensions.
|
|
346
295
|
"""
|
|
347
|
-
img_dir_path = Path(img_dir).absolute()
|
|
348
|
-
if not img_dir_path.exists() or not img_dir_path.is_dir():
|
|
349
|
-
raise ValueError(f"Input images folder is not a valid directory: {img_dir_path}")
|
|
350
|
-
|
|
351
296
|
# Create dataset.
|
|
352
297
|
dataset = dataset_resolver.create(
|
|
353
298
|
session=self.session,
|
|
354
299
|
dataset=DatasetCreate(
|
|
355
300
|
name=dataset_name,
|
|
356
|
-
directory=
|
|
301
|
+
directory=img_dir,
|
|
357
302
|
),
|
|
358
303
|
)
|
|
359
304
|
|
|
360
|
-
# Collect image file paths.
|
|
305
|
+
# Collect image file paths with extension filtering.
|
|
361
306
|
allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
|
|
362
|
-
image_paths =
|
|
363
|
-
|
|
364
|
-
for path in path_iter:
|
|
365
|
-
if path.is_file() and path.suffix.lower() in allowed_extensions_set:
|
|
366
|
-
image_paths.append(path)
|
|
367
|
-
print(f"Found {len(image_paths)} images in {img_dir_path}.")
|
|
307
|
+
image_paths = list(fsspec_lister.iter_files_from_path(img_dir, allowed_extensions_set))
|
|
308
|
+
print(f"Found {len(image_paths)} images in {img_dir}.")
|
|
368
309
|
|
|
369
310
|
# Process images.
|
|
370
311
|
sample_ids = _create_samples_from_paths(
|
|
@@ -383,8 +324,37 @@ class DatasetLoader:
|
|
|
383
324
|
|
|
384
325
|
return dataset
|
|
385
326
|
|
|
327
|
+
def _validate_has_samples(self) -> None:
|
|
328
|
+
"""Validate that there are samples in the database before starting GUI.
|
|
329
|
+
|
|
330
|
+
Raises:
|
|
331
|
+
ValueError: If no samples are found in any dataset.
|
|
332
|
+
"""
|
|
333
|
+
# Check if any datasets exist
|
|
334
|
+
datasets = dataset_resolver.get_all(session=self.session, offset=0, limit=1)
|
|
335
|
+
|
|
336
|
+
if not datasets:
|
|
337
|
+
raise ValueError(
|
|
338
|
+
"No datasets found. Please load a dataset using one of the loader methods "
|
|
339
|
+
"(e.g., from_yolo(), from_directory(), etc.) before starting the GUI."
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
# Check if there are any samples in the first dataset
|
|
343
|
+
first_dataset = datasets[0]
|
|
344
|
+
sample_count = sample_resolver.count_by_dataset_id(
|
|
345
|
+
session=self.session, dataset_id=first_dataset.dataset_id
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
if sample_count == 0:
|
|
349
|
+
raise ValueError(
|
|
350
|
+
"No images have been indexed for the first dataset. "
|
|
351
|
+
"Please ensure your dataset contains valid images and try loading again."
|
|
352
|
+
)
|
|
353
|
+
|
|
386
354
|
def start_gui(self) -> None:
|
|
387
355
|
"""Launch the web interface for the loaded dataset."""
|
|
356
|
+
self._validate_has_samples()
|
|
357
|
+
|
|
388
358
|
server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
|
|
389
359
|
|
|
390
360
|
print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
|
|
@@ -395,7 +365,7 @@ class DatasetLoader:
|
|
|
395
365
|
def _create_samples_from_paths(
|
|
396
366
|
session: Session,
|
|
397
367
|
dataset_id: UUID,
|
|
398
|
-
image_paths: Iterable[
|
|
368
|
+
image_paths: Iterable[str],
|
|
399
369
|
) -> Iterator[UUID]:
|
|
400
370
|
"""Create samples from a list of image paths.
|
|
401
371
|
|
|
@@ -415,15 +385,14 @@ def _create_samples_from_paths(
|
|
|
415
385
|
unit=" images",
|
|
416
386
|
):
|
|
417
387
|
try:
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
image.close()
|
|
388
|
+
with fsspec.open(image_path, "rb") as file, PIL.Image.open(file) as img:
|
|
389
|
+
width, height = img.size
|
|
421
390
|
except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
|
|
422
391
|
continue
|
|
423
392
|
|
|
424
393
|
sample = SampleCreate(
|
|
425
|
-
file_name=image_path.name,
|
|
426
|
-
file_path_abs=
|
|
394
|
+
file_name=Path(image_path).name,
|
|
395
|
+
file_path_abs=image_path,
|
|
427
396
|
width=width,
|
|
428
397
|
height=height,
|
|
429
398
|
dataset_id=dataset_id,
|
|
@@ -477,12 +446,11 @@ def _process_object_detection_annotations(
|
|
|
477
446
|
sample_id=context.sample_id,
|
|
478
447
|
annotation_label_id=context.label_map[obj.category.id],
|
|
479
448
|
annotation_type="object_detection",
|
|
480
|
-
x=x,
|
|
481
|
-
y=y,
|
|
482
|
-
width=width,
|
|
483
|
-
height=height,
|
|
449
|
+
x=int(x),
|
|
450
|
+
y=int(y),
|
|
451
|
+
width=int(width),
|
|
452
|
+
height=int(height),
|
|
484
453
|
confidence=obj.confidence,
|
|
485
|
-
annotation_task_id=context.annotation_task_id,
|
|
486
454
|
)
|
|
487
455
|
)
|
|
488
456
|
return new_annotations
|
|
@@ -512,12 +480,11 @@ def _process_instance_segmentation_annotations(
|
|
|
512
480
|
sample_id=context.sample_id,
|
|
513
481
|
annotation_label_id=context.label_map[obj.category.id],
|
|
514
482
|
annotation_type="instance_segmentation",
|
|
515
|
-
x=x,
|
|
516
|
-
y=y,
|
|
517
|
-
width=width,
|
|
518
|
-
height=height,
|
|
483
|
+
x=int(x),
|
|
484
|
+
y=int(y),
|
|
485
|
+
width=int(width),
|
|
486
|
+
height=int(height),
|
|
519
487
|
segmentation_mask=segmentation_rle,
|
|
520
|
-
annotation_task_id=context.annotation_task_id,
|
|
521
488
|
)
|
|
522
489
|
)
|
|
523
490
|
return new_annotations
|
|
@@ -529,7 +496,6 @@ def _process_batch_annotations( # noqa: PLR0913
|
|
|
529
496
|
samples_data: list[tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]],
|
|
530
497
|
dataset_id: UUID,
|
|
531
498
|
label_map: dict[int, UUID],
|
|
532
|
-
annotation_task_id: UUID,
|
|
533
499
|
annotations_to_create: list[AnnotationCreate],
|
|
534
500
|
sample_ids: list[UUID],
|
|
535
501
|
) -> None:
|
|
@@ -541,7 +507,6 @@ def _process_batch_annotations( # noqa: PLR0913
|
|
|
541
507
|
dataset_id=dataset_id,
|
|
542
508
|
sample_id=stored_sample.sample_id,
|
|
543
509
|
label_map=label_map,
|
|
544
|
-
annotation_task_id=annotation_task_id,
|
|
545
510
|
)
|
|
546
511
|
|
|
547
512
|
if isinstance(img_data, ImageInstanceSegmentation):
|
|
@@ -7,6 +7,7 @@ from pathlib import Path
|
|
|
7
7
|
from typing import Callable
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
|
+
import fsspec
|
|
10
11
|
import torch
|
|
11
12
|
from PIL import Image
|
|
12
13
|
from torch.utils.data import DataLoader, Dataset
|
|
@@ -23,6 +24,7 @@ MOBILECLIP_DOWNLOAD_URL = (
|
|
|
23
24
|
f"https://docs-assets.developer.apple.com/ml-research/datasets/mobileclip/{MODEL_NAME}.pt"
|
|
24
25
|
)
|
|
25
26
|
MAX_BATCH_SIZE: int = 16
|
|
27
|
+
EMBEDDING_DIMENSION: int = 512
|
|
26
28
|
|
|
27
29
|
|
|
28
30
|
# Dataset for efficient batched image loading and preprocessing
|
|
@@ -31,7 +33,7 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
|
|
|
31
33
|
|
|
32
34
|
def __init__(
|
|
33
35
|
self,
|
|
34
|
-
filepaths: list[
|
|
36
|
+
filepaths: list[str],
|
|
35
37
|
preprocess: Callable[[Image.Image], torch.Tensor],
|
|
36
38
|
) -> None:
|
|
37
39
|
self.filepaths = filepaths
|
|
@@ -41,8 +43,9 @@ class _ImageFileDataset(Dataset[torch.Tensor]):
|
|
|
41
43
|
return len(self.filepaths)
|
|
42
44
|
|
|
43
45
|
def __getitem__(self, idx: int) -> torch.Tensor:
|
|
44
|
-
|
|
45
|
-
|
|
46
|
+
with fsspec.open(self.filepaths[idx], "rb") as file:
|
|
47
|
+
image = Image.open(file).convert("RGB")
|
|
48
|
+
return self.preprocess(image)
|
|
46
49
|
|
|
47
50
|
|
|
48
51
|
class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
@@ -83,7 +86,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
|
83
86
|
return EmbeddingModelCreate(
|
|
84
87
|
name=MODEL_NAME,
|
|
85
88
|
embedding_model_hash=self._model_hash,
|
|
86
|
-
embedding_dimension=
|
|
89
|
+
embedding_dimension=EMBEDDING_DIMENSION,
|
|
87
90
|
dataset_id=dataset_id,
|
|
88
91
|
)
|
|
89
92
|
|
|
@@ -103,7 +106,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
|
103
106
|
embedding_list: list[float] = embedding.cpu().numpy().flatten().tolist()
|
|
104
107
|
return embedding_list
|
|
105
108
|
|
|
106
|
-
def embed_images(self, filepaths: list[
|
|
109
|
+
def embed_images(self, filepaths: list[str]) -> list[list[float]]:
|
|
107
110
|
"""Embed images with MobileCLIP.
|
|
108
111
|
|
|
109
112
|
Args:
|
|
@@ -136,7 +139,7 @@ class MobileCLIPEmbeddingGenerator(EmbeddingGenerator):
|
|
|
136
139
|
|
|
137
140
|
|
|
138
141
|
def _get_cached_mobileclip_checkpoint() -> Path:
|
|
139
|
-
file_path = Path(tempfile.gettempdir()) / "
|
|
142
|
+
file_path = Path(tempfile.gettempdir()) / f"{MODEL_NAME}.pt"
|
|
140
143
|
file_utils.download_file_if_does_not_exist(
|
|
141
144
|
url=MOBILECLIP_DOWNLOAD_URL,
|
|
142
145
|
local_filename=file_path,
|