lightly-studio 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (133) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +1 -1
  3. lightly_studio/api/routes/api/annotation.py +6 -16
  4. lightly_studio/api/routes/api/annotation_label.py +2 -5
  5. lightly_studio/api/routes/api/annotation_task.py +4 -5
  6. lightly_studio/api/routes/api/classifier.py +2 -5
  7. lightly_studio/api/routes/api/dataset.py +2 -3
  8. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  9. lightly_studio/api/routes/api/metadata.py +2 -4
  10. lightly_studio/api/routes/api/metrics.py +2 -6
  11. lightly_studio/api/routes/api/sample.py +5 -13
  12. lightly_studio/api/routes/api/settings.py +2 -6
  13. lightly_studio/api/routes/images.py +6 -6
  14. lightly_studio/core/add_samples.py +383 -0
  15. lightly_studio/core/dataset.py +250 -362
  16. lightly_studio/core/dataset_query/__init__.py +0 -0
  17. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  18. lightly_studio/core/dataset_query/dataset_query.py +211 -0
  19. lightly_studio/core/dataset_query/field.py +113 -0
  20. lightly_studio/core/dataset_query/field_expression.py +79 -0
  21. lightly_studio/core/dataset_query/match_expression.py +23 -0
  22. lightly_studio/core/dataset_query/order_by.py +79 -0
  23. lightly_studio/core/dataset_query/sample_field.py +28 -0
  24. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  25. lightly_studio/core/sample.py +159 -32
  26. lightly_studio/core/start_gui.py +35 -0
  27. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  28. lightly_studio/dataset/embedding_generator.py +2 -3
  29. lightly_studio/dataset/embedding_manager.py +74 -6
  30. lightly_studio/dataset/fsspec_lister.py +275 -0
  31. lightly_studio/dataset/loader.py +49 -30
  32. lightly_studio/dataset/mobileclip_embedding_generator.py +6 -4
  33. lightly_studio/db_manager.py +145 -0
  34. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +1 -0
  35. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +1 -0
  36. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +3 -0
  37. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → 7YNGEs1C.js} +1 -1
  38. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +1 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +92 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BqBqV92V.js} +1 -1
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → C98Hk3r5.js} +1 -1
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{r64xT6ao.js → CG0dMCJi.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Ccq4ZD0B.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +1 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → Crk-jcvV.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → Cur71c3O.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → DRZO-E-T.js} +1 -1
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → DcGCxgpH.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → Df3aMO5B.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{hQVEETDE.js → DkR_EZ_B.js} +1 -1
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +4 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → OH7-C_mc.js} +1 -1
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → gLNdjSzu.js} +1 -1
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +2 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1._I9GR805.js} +1 -1
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +6 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.w9g4AcAx.js} +1 -1
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BBI8KwnD.js} +1 -1
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CrbkRPam.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.CajIG5ce.js} +1 -1
  78. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  80. lightly_studio/examples/example.py +13 -12
  81. lightly_studio/examples/example_coco.py +13 -0
  82. lightly_studio/examples/example_metadata.py +83 -98
  83. lightly_studio/examples/example_selection.py +7 -19
  84. lightly_studio/examples/example_split_work.py +12 -36
  85. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  86. lightly_studio/models/annotation/annotation_base.py +7 -8
  87. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  88. lightly_studio/models/annotation/object_detection.py +4 -4
  89. lightly_studio/models/dataset.py +6 -2
  90. lightly_studio/models/sample.py +10 -3
  91. lightly_studio/resolvers/dataset_resolver.py +10 -0
  92. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  93. lightly_studio/resolvers/sample_resolver.py +53 -9
  94. lightly_studio/resolvers/tag_resolver.py +23 -0
  95. lightly_studio/selection/select.py +55 -46
  96. lightly_studio/selection/select_via_db.py +23 -19
  97. lightly_studio/selection/selection_config.py +6 -3
  98. lightly_studio/services/annotations_service/__init__.py +4 -0
  99. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  100. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  101. lightly_studio-0.3.2.dist-info/METADATA +689 -0
  102. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/RECORD +104 -91
  103. lightly_studio/api/db.py +0 -133
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +0 -4
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  131. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  132. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  133. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/WHEEL +0 -0
@@ -2,17 +2,22 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from typing import Any, Generic, Protocol, TypeVar
5
+ from collections.abc import Iterable
6
+ from typing import Any, Generic, Protocol, TypeVar, cast
6
7
 
7
- from sqlalchemy.orm import object_session
8
+ from sqlalchemy.orm import Mapped, object_session
9
+ from sqlmodel import Session, col
8
10
 
9
11
  from lightly_studio.models.sample import SampleTable
12
+ from lightly_studio.resolvers import metadata_resolver, tag_resolver
10
13
 
11
14
  T = TypeVar("T")
12
15
 
13
16
 
14
- class _HasInner(Protocol):
15
- _inner: Any
17
+ class _DBFieldOwner(Protocol):
18
+ inner: Any
19
+
20
+ def get_object_session(self) -> Session: ...
16
21
 
17
22
 
18
23
  class DBField(Generic[T]):
@@ -20,53 +25,42 @@ class DBField(Generic[T]):
20
25
 
21
26
  Provides interface to a SQLAlchemy model field. Setting the field
22
27
  immediately commits to the database. The owner class must implement
23
- the _inner attribute.
28
+ the inner attribute and the get_object_session() method.
24
29
  """
25
30
 
26
31
  __slots__ = ("_sqla_descriptor",)
27
32
  """Store the SQLAlchemy descriptor for accessing the field."""
28
33
 
29
- def __init__(self, sqla_descriptor: T) -> None:
30
- """Initialize the DBField with a SQLAlchemy descriptor.
31
-
32
- Note: Mypy thinks that the descriptor has type T. In reality, during
33
- runtime, it will be InstrumentedAttribute[T].
34
- """
34
+ def __init__(self, sqla_descriptor: Mapped[T]) -> None:
35
+ """Initialize the DBField with a SQLAlchemy descriptor."""
35
36
  self._sqla_descriptor = sqla_descriptor
36
37
 
37
- def __get__(self, obj: _HasInner | None, owner: type | None = None) -> T:
38
+ def __get__(self, obj: _DBFieldOwner | None, owner: type | None = None) -> T:
38
39
  """Get the value of the field from the database."""
39
40
  assert obj is not None, "DBField must be accessed via an instance, not the class"
40
41
  # Delegate to SQLAlchemy's descriptor.
41
- # Note: Mypy incorrectly thinks that the descriptor has type T. It complains
42
- # about the lack of a __get__ method.
43
- value: T = self._sqla_descriptor.__get__(obj._inner, type(obj._inner)) # type: ignore[attr-defined] # noqa: SLF001
42
+ value: T = self._sqla_descriptor.__get__(obj.inner, type(obj.inner))
44
43
  return value
45
44
 
46
- def __set__(self, obj: _HasInner, value: T) -> None:
45
+ def __set__(self, obj: _DBFieldOwner, value: T) -> None:
47
46
  """Set the value of the field in the database. Commits the session."""
48
47
  # Delegate to SQLAlchemy's descriptor.
49
- # Note: Mypy incorrectly thinks that the descriptor has type T. It complains
50
- # about the lack of a __set__ method.
51
- self._sqla_descriptor.__set__(obj._inner, value) # type: ignore[attr-defined] # noqa: SLF001
52
- sess = object_session(obj._inner) # noqa: SLF001
53
- if sess is None:
54
- raise RuntimeError("No active session found for the DBField object")
55
- sess.commit()
48
+ self._sqla_descriptor.__set__(obj.inner, value)
49
+ obj.get_object_session().commit()
56
50
 
57
51
 
58
52
  class Sample:
59
53
  """Interface to a dataset sample."""
60
54
 
61
- file_name = DBField(SampleTable.file_name)
62
- width = DBField(SampleTable.width)
63
- height = DBField(SampleTable.height)
64
- dataset_id = DBField(SampleTable.dataset_id)
65
- file_path_abs = DBField(SampleTable.file_path_abs)
55
+ file_name = DBField(col(SampleTable.file_name))
56
+ width = DBField(col(SampleTable.width))
57
+ height = DBField(col(SampleTable.height))
58
+ dataset_id = DBField(col(SampleTable.dataset_id))
59
+ file_path_abs = DBField(col(SampleTable.file_path_abs))
66
60
 
67
- sample_id = DBField(SampleTable.sample_id)
68
- created_at = DBField(SampleTable.created_at)
69
- updated_at = DBField(SampleTable.updated_at)
61
+ sample_id = DBField(col(SampleTable.sample_id))
62
+ created_at = DBField(col(SampleTable.created_at))
63
+ updated_at = DBField(col(SampleTable.updated_at))
70
64
 
71
65
  def __init__(self, inner: SampleTable) -> None:
72
66
  """Initialize the Sample.
@@ -74,4 +68,137 @@ class Sample:
74
68
  Args:
75
69
  inner: The SampleTable SQLAlchemy model instance.
76
70
  """
77
- self._inner = inner
71
+ self.inner = inner
72
+ self._metadata = SampleMetadata(self)
73
+
74
+ def get_object_session(self) -> Session:
75
+ """Get the database session for this sample.
76
+
77
+ Returns:
78
+ The SQLModel session.
79
+
80
+ Raises:
81
+ RuntimeError: If no active session is found.
82
+ """
83
+ session = object_session(self.inner)
84
+ if session is None:
85
+ raise RuntimeError("No active session found for the sample")
86
+ # Cast from SQLAlchemy Session to SQLModel Session for mypy.
87
+ return cast(Session, session)
88
+
89
+ def add_tag(self, name: str) -> None:
90
+ """Add a tag to this sample.
91
+
92
+ If the tag doesn't exist, it will be created first.
93
+
94
+ Args:
95
+ name: The name of the tag to add.
96
+ """
97
+ session = self.get_object_session()
98
+
99
+ # Get or create the tag for this dataset.
100
+ tag = tag_resolver.get_or_create_sample_tag_by_name(
101
+ session=session, dataset_id=self.dataset_id, tag_name=name
102
+ )
103
+
104
+ # Add the tag to the sample if not already associated.
105
+ if tag not in self.inner.tags:
106
+ tag_resolver.add_tag_to_sample(session=session, tag_id=tag.tag_id, sample=self.inner)
107
+
108
+ def remove_tag(self, name: str) -> None:
109
+ """Remove a tag from this sample.
110
+
111
+ Args:
112
+ name: The name of the tag to remove.
113
+ """
114
+ session = self.get_object_session()
115
+
116
+ # Find the tag by name for this dataset.
117
+ existing_tag = tag_resolver.get_by_name(
118
+ session=session, tag_name=name, dataset_id=self.dataset_id
119
+ )
120
+
121
+ # Remove the tag from the sample if it exists and is associated
122
+ if existing_tag is not None and existing_tag in self.inner.tags:
123
+ tag_resolver.remove_tag_from_sample(
124
+ session=session, tag_id=existing_tag.tag_id, sample=self.inner
125
+ )
126
+
127
+ @property
128
+ def tags(self) -> set[str]:
129
+ """Get the tag names associated with this sample.
130
+
131
+ Returns:
132
+ A set of tag names as strings.
133
+ """
134
+ return {tag.name for tag in self.inner.tags}
135
+
136
+ @tags.setter
137
+ def tags(self, tags: Iterable[str]) -> None:
138
+ """Set the tags for this sample, replacing any existing tags.
139
+
140
+ Args:
141
+ tags: Iterable of tag names to associate with this sample.
142
+ """
143
+ # Get current tag names
144
+ current_tags = self.tags
145
+ new_tags = set(tags)
146
+
147
+ # Remove tags that are no longer needed
148
+ tags_to_remove = current_tags - new_tags
149
+ for tag_name in tags_to_remove:
150
+ self.remove_tag(tag_name)
151
+
152
+ # Add new tags
153
+ tags_to_add = new_tags - current_tags
154
+ for tag_name in tags_to_add:
155
+ self.add_tag(tag_name)
156
+
157
+ @property
158
+ def metadata(self) -> SampleMetadata:
159
+ """Get dictionary-like access to sample metadata.
160
+
161
+ Returns:
162
+ A dictionary-like object for accessing metadata.
163
+ """
164
+ return self._metadata
165
+
166
+
167
+ class SampleMetadata:
168
+ """Dictionary-like interface for sample metadata."""
169
+
170
+ def __init__(self, sample: Sample) -> None:
171
+ """Initialize SampleMetadata.
172
+
173
+ Args:
174
+ sample: The Sample instance this metadata belongs to.
175
+ """
176
+ self._sample = sample
177
+
178
+ def __getitem__(self, key: str) -> Any:
179
+ """Get a metadata value by key.
180
+
181
+ Args:
182
+ key: The metadata key to access.
183
+
184
+ Returns:
185
+ The metadata value for the given key, or None if the key doesn't exist.
186
+ """
187
+ if self._sample.inner.metadata_dict is None:
188
+ return None
189
+ return self._sample.inner.metadata_dict.get_value(key)
190
+
191
+ def __setitem__(self, key: str, value: Any) -> None:
192
+ """Set a metadata key-value pair.
193
+
194
+ Args:
195
+ key: The metadata key.
196
+ value: The metadata value.
197
+ """
198
+ session = self._sample.get_object_session()
199
+ metadata_resolver.set_value_for_sample(
200
+ session=session,
201
+ sample_id=self._sample.sample_id,
202
+ key=key,
203
+ value=value,
204
+ )
@@ -2,12 +2,47 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from lightly_studio import db_manager
5
6
  from lightly_studio.api.server import Server
6
7
  from lightly_studio.dataset import env
8
+ from lightly_studio.resolvers import dataset_resolver, sample_resolver
9
+
10
+
11
+ def _validate_has_samples() -> None:
12
+ """Validate that there are samples in the database before starting GUI.
13
+
14
+ Raises:
15
+ ValueError: If no datasets are found or if no samples exist in any dataset.
16
+ """
17
+ session = db_manager.persistent_session()
18
+
19
+ # Check if any datasets exist
20
+ datasets = dataset_resolver.get_all(session=session, offset=0, limit=1)
21
+
22
+ if not datasets:
23
+ raise ValueError(
24
+ "No datasets found. Please load a dataset using Dataset class methods "
25
+ "(e.g., add_samples_from_path(), add_samples_from_yolo(), etc.) "
26
+ "before starting the GUI."
27
+ )
28
+
29
+ # Check if there are any samples in the first dataset
30
+ first_dataset = datasets[0]
31
+ sample_count = sample_resolver.count_by_dataset_id(
32
+ session=session, dataset_id=first_dataset.dataset_id
33
+ )
34
+
35
+ if sample_count == 0:
36
+ raise ValueError(
37
+ "No images have been indexed for the first dataset. "
38
+ "Please ensure your dataset contains valid images and try loading again."
39
+ )
7
40
 
8
41
 
9
42
  def start_gui() -> None:
10
43
  """Launch the web interface for the loaded dataset."""
44
+ _validate_has_samples()
45
+
11
46
  server = Server(host=env.LIGHTLY_STUDIO_HOST, port=env.LIGHTLY_STUDIO_PORT)
12
47
 
13
48
  print(f"Open the LightlyStudio GUI under: {env.APP_URL}")
@@ -3,11 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from collections.abc import Sequence
6
- from pathlib import Path
7
6
  from typing import Tuple
8
7
  from uuid import UUID
9
8
 
10
9
  import cv2
10
+ import fsspec
11
+ import numpy as np
11
12
  from lightly_edge_sdk import (
12
13
  InferenceDeviceType,
13
14
  LightlyEdge,
@@ -29,7 +30,7 @@ class _ImageFileDatasetEdge(Dataset[Tuple[bytes, int, int]]):
29
30
 
30
31
  def __init__(
31
32
  self,
32
- filepaths: Sequence[Path],
33
+ filepaths: Sequence[str],
33
34
  ) -> None:
34
35
  self.filepaths = filepaths
35
36
 
@@ -38,11 +39,15 @@ class _ImageFileDatasetEdge(Dataset[Tuple[bytes, int, int]]):
38
39
 
39
40
  def __getitem__(self, idx: int) -> tuple[bytes, int, int]:
40
41
  # Load the image.
41
- bgr_image = cv2.imread(str(self.filepaths[idx]))
42
- rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
43
- rgb_bytes = rgb_image.tobytes()
44
- height, width, _ = rgb_image.shape
45
- return rgb_bytes, width, height
42
+ with fsspec.open(self.filepaths[idx], "rb") as file:
43
+ image_bytes = file.read()
44
+ # Decode image from bytes using OpenCV
45
+ nparr = np.frombuffer(image_bytes, np.uint8)
46
+ bgr_image = cv2.imdecode(nparr, cv2.IMREAD_COLOR)
47
+ rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB)
48
+ rgb_bytes = rgb_image.tobytes()
49
+ height, width, _ = rgb_image.shape
50
+ return rgb_bytes, width, height
46
51
 
47
52
 
48
53
  class EdgeSDKEmbeddingGenerator(EmbeddingGenerator):
@@ -95,7 +100,7 @@ class EdgeSDKEmbeddingGenerator(EmbeddingGenerator):
95
100
  return embeddings[0]
96
101
  return []
97
102
 
98
- def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
103
+ def embed_images(self, filepaths: list[str]) -> list[list[float]]:
99
104
  """Embed images with EdgeSDK.
100
105
 
101
106
  Args:
@@ -3,7 +3,6 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  import random
6
- from pathlib import Path
7
6
  from typing import Protocol, runtime_checkable
8
7
  from uuid import UUID
9
8
 
@@ -40,7 +39,7 @@ class EmbeddingGenerator(Protocol):
40
39
  """
41
40
  ...
42
41
 
43
- def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
42
+ def embed_images(self, filepaths: list[str]) -> list[list[float]]:
44
43
  """Generate embeddings for multiple image samples.
45
44
 
46
45
  TODO(Michal, 04/2025): Use DatasetLoader as input instead.
@@ -86,6 +85,6 @@ class RandomEmbeddingGenerator(EmbeddingGenerator):
86
85
  """Generate a random embedding for a text sample."""
87
86
  return [random.random() for _ in range(self._dimension)]
88
87
 
89
- def embed_images(self, filepaths: list[Path]) -> list[list[float]]:
88
+ def embed_images(self, filepaths: list[str]) -> list[list[float]]:
90
89
  """Generate random embeddings for multiple image samples."""
91
90
  return [[random.random() for _ in range(self._dimension)] for _ in range(len(filepaths))]
@@ -3,11 +3,11 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from dataclasses import dataclass
6
- from pathlib import Path
7
6
  from uuid import UUID
8
7
 
9
8
  from sqlmodel import Session
10
9
 
10
+ from lightly_studio.dataset import env
11
11
  from lightly_studio.dataset.embedding_generator import EmbeddingGenerator
12
12
  from lightly_studio.models.embedding_model import EmbeddingModelTable
13
13
  from lightly_studio.models.sample_embedding import SampleEmbeddingCreate
@@ -75,12 +75,12 @@ class EmbeddingManager:
75
75
  Returns:
76
76
  The created EmbeddingModel.
77
77
  """
78
- # Create embedding model record in the database.
79
- created_model = embedding_model_resolver.create(
78
+ # Get or create embedding model record in the database.
79
+ db_model = embedding_model_resolver.get_or_create(
80
80
  session=session,
81
81
  embedding_model=embedding_generator.get_embedding_model_input(dataset_id=dataset_id),
82
82
  )
83
- model_id = created_model.embedding_model_id
83
+ model_id = db_model.embedding_model_id
84
84
 
85
85
  # Store the model in our dictionary
86
86
  self._models[model_id] = embedding_generator
@@ -89,7 +89,7 @@ class EmbeddingManager:
89
89
  if set_as_default or self._default_model_id is None:
90
90
  self._default_model_id = model_id
91
91
 
92
- return created_model
92
+ return db_model
93
93
 
94
94
  def embed_text(self, text_query: TextEmbedQuery) -> list[float]:
95
95
  """Generate an embedding for a text sample.
@@ -136,7 +136,7 @@ class EmbeddingManager:
136
136
 
137
137
  # Query image filenames from the database.
138
138
  sample_id_to_filepath = {
139
- sample.sample_id: Path(sample.file_path_abs)
139
+ sample.sample_id: sample.file_path_abs
140
140
  for sample in sample_resolver.get_many_by_id(
141
141
  session=session,
142
142
  sample_ids=sample_ids,
@@ -161,3 +161,71 @@ class EmbeddingManager:
161
161
 
162
162
  # Store the embeddings in the database.
163
163
  sample_embedding_resolver.create_many(session=session, sample_embeddings=sample_embeddings)
164
+
165
+ def load_or_get_default_model(
166
+ self,
167
+ session: Session,
168
+ dataset_id: UUID,
169
+ ) -> UUID | None:
170
+ """Ensure a default embedding model exists and return its ID.
171
+
172
+ Args:
173
+ session: Database session for resolver operations.
174
+ dataset_id: Dataset identifier the model should belong to.
175
+
176
+ Returns:
177
+ UUID of the default embedding model or None if the model cannot be loaded.
178
+ """
179
+ # Return the existing default model ID if available.
180
+ # TODO(Michal, 09/2025): We do not check if the model belongs to the dataset.
181
+ # The design of EmbeddingManager needs to change to support multiple datasets.
182
+ if self._default_model_id is not None:
183
+ return self._default_model_id
184
+
185
+ # Load the embedding generator based on configuration.
186
+ embedding_generator = _load_embedding_generator_from_env()
187
+ if embedding_generator is None:
188
+ return None
189
+
190
+ # Register the embedding model and set it as default.
191
+ embedding_model = self.register_embedding_model(
192
+ session=session,
193
+ dataset_id=dataset_id,
194
+ embedding_generator=embedding_generator,
195
+ set_as_default=True,
196
+ )
197
+
198
+ return embedding_model.embedding_model_id
199
+
200
+
201
+ # TODO(Michal, 09/2025): Write tests for this function.
202
+ def _load_embedding_generator_from_env() -> EmbeddingGenerator | None:
203
+ """Load the embedding generator based on environment variable configuration."""
204
+ if env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "EDGE":
205
+ try:
206
+ from lightly_studio.dataset.edge_embedding_generator import (
207
+ EdgeSDKEmbeddingGenerator,
208
+ )
209
+
210
+ print("Using LightlyEdge embedding generator.")
211
+ return EdgeSDKEmbeddingGenerator(model_path=env.LIGHTLY_STUDIO_EDGE_MODEL_FILE_PATH)
212
+ except ImportError:
213
+ print("Embedding functionality is disabled.")
214
+ return None
215
+ elif env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE == "MOBILE_CLIP":
216
+ try:
217
+ from lightly_studio.dataset.mobileclip_embedding_generator import (
218
+ MobileCLIPEmbeddingGenerator,
219
+ )
220
+
221
+ print("Using MobileCLIP embedding generator.")
222
+ return MobileCLIPEmbeddingGenerator()
223
+ except ImportError:
224
+ print("Embedding functionality is disabled.")
225
+ return None
226
+
227
+ print(
228
+ f"Unsupported model type: '{env.LIGHTLY_STUDIO_EMBEDDINGS_MODEL_TYPE}'",
229
+ )
230
+ print("Embedding functionality is disabled.")
231
+ return None