lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (169) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +7 -5
  3. lightly_studio/api/db_tables.py +0 -3
  4. lightly_studio/api/routes/api/annotation.py +32 -16
  5. lightly_studio/api/routes/api/annotation_label.py +2 -5
  6. lightly_studio/api/routes/api/annotations/__init__.py +7 -0
  7. lightly_studio/api/routes/api/annotations/create_annotation.py +52 -0
  8. lightly_studio/api/routes/api/classifier.py +2 -5
  9. lightly_studio/api/routes/api/dataset.py +5 -8
  10. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  11. lightly_studio/api/routes/api/embeddings2d.py +104 -0
  12. lightly_studio/api/routes/api/export.py +73 -0
  13. lightly_studio/api/routes/api/metadata.py +2 -4
  14. lightly_studio/api/routes/api/sample.py +5 -13
  15. lightly_studio/api/routes/api/selection.py +87 -0
  16. lightly_studio/api/routes/api/settings.py +2 -6
  17. lightly_studio/api/routes/images.py +6 -6
  18. lightly_studio/core/add_samples.py +374 -0
  19. lightly_studio/core/dataset.py +272 -400
  20. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  21. lightly_studio/core/dataset_query/dataset_query.py +216 -0
  22. lightly_studio/core/dataset_query/field.py +113 -0
  23. lightly_studio/core/dataset_query/field_expression.py +79 -0
  24. lightly_studio/core/dataset_query/match_expression.py +23 -0
  25. lightly_studio/core/dataset_query/order_by.py +79 -0
  26. lightly_studio/core/dataset_query/sample_field.py +28 -0
  27. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  28. lightly_studio/core/sample.py +159 -32
  29. lightly_studio/core/start_gui.py +35 -0
  30. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  31. lightly_studio/dataset/embedding_generator.py +2 -3
  32. lightly_studio/dataset/embedding_manager.py +74 -6
  33. lightly_studio/dataset/env.py +4 -0
  34. lightly_studio/dataset/file_utils.py +13 -2
  35. lightly_studio/dataset/fsspec_lister.py +275 -0
  36. lightly_studio/dataset/loader.py +49 -84
  37. lightly_studio/dataset/mobileclip_embedding_generator.py +9 -6
  38. lightly_studio/db_manager.py +145 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.CA_CXIBb.css +1 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.DS78jgNY.css +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/index.BVs_sZj9.css +1 -0
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/transform.D487hwJk.css +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/6t3IJ0vQ.js +1 -0
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → 8NsknIT2.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → BND_-4Kp.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → BdfTHw61.js} +1 -1
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BfHVnyNT.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BjkP1AHA.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BuuNVL9G.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → BzKGpnl4.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CCx7Ho51.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → CH6P3X75.js} +1 -1
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D5-A_Ffd.js → CR2upx_Q.js} +2 -2
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CWPZrTTJ.js +1 -0
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Cs1XmhiF.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → CwPowJfP.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxFKfZ9T.js +1 -0
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cxevwdid.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → D4whDBUi.js} +1 -1
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6r9vr07.js +1 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DA6bFLPR.js +1 -0
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DEgUu98i.js +3 -0
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DGTPl6Gk.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DKGxBSlK.js +1 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQXoLcsF.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DQe_kdRt.js +92 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DcY4jgG3.js +1 -0
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → RmD8FzRo.js} +1 -1
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/V-MnMC1X.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → keKYsoph.js} +1 -1
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BVr6DYqP.js +2 -0
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.u7zsVvqp.js +1 -0
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.Da2agmdd.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1.B11tVRJV.js} +1 -1
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.l30Zud4h.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CgKPGcAP.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C8HLK8mj.js +857 -0
  78. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.CLvg3QcJ.js} +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BQhDtXUI.js} +1 -1
  80. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.-6XqWX5G.js +1 -0
  81. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.uBV1Lhat.js +1 -0
  82. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.BXsgoQZh.js +1 -0
  83. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.BkbcnUs8.js +1 -0
  84. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.Bkrv-Vww.js} +1 -1
  85. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/clustering.worker-DKqeLtG0.js +2 -0
  86. lightly_studio/dist_lightly_studio_view_app/_app/immutable/workers/search.worker-vNSty3B0.js +1 -0
  87. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  88. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  89. lightly_studio/examples/example.py +13 -12
  90. lightly_studio/examples/example_coco.py +13 -0
  91. lightly_studio/examples/example_metadata.py +83 -98
  92. lightly_studio/examples/example_selection.py +7 -19
  93. lightly_studio/examples/example_split_work.py +12 -36
  94. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  95. lightly_studio/export/export_dataset.py +65 -0
  96. lightly_studio/export/lightly_studio_label_input.py +120 -0
  97. lightly_studio/few_shot_classifier/classifier_manager.py +5 -26
  98. lightly_studio/metadata/compute_typicality.py +67 -0
  99. lightly_studio/models/annotation/annotation_base.py +18 -20
  100. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  101. lightly_studio/models/annotation/object_detection.py +4 -4
  102. lightly_studio/models/dataset.py +6 -2
  103. lightly_studio/models/sample.py +10 -3
  104. lightly_studio/resolvers/annotation_label_resolver/__init__.py +2 -1
  105. lightly_studio/resolvers/annotation_label_resolver/get_all.py +15 -0
  106. lightly_studio/resolvers/annotation_resolver/__init__.py +2 -3
  107. lightly_studio/resolvers/annotation_resolver/create_many.py +3 -3
  108. lightly_studio/resolvers/annotation_resolver/delete_annotation.py +1 -1
  109. lightly_studio/resolvers/annotation_resolver/delete_annotations.py +7 -3
  110. lightly_studio/resolvers/annotation_resolver/get_by_id.py +19 -1
  111. lightly_studio/resolvers/annotation_resolver/update_annotation_label.py +0 -1
  112. lightly_studio/resolvers/annotations/annotations_filter.py +1 -11
  113. lightly_studio/resolvers/dataset_resolver.py +10 -0
  114. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  115. lightly_studio/resolvers/sample_resolver.py +53 -9
  116. lightly_studio/resolvers/tag_resolver.py +23 -0
  117. lightly_studio/selection/mundig.py +7 -10
  118. lightly_studio/selection/select.py +55 -46
  119. lightly_studio/selection/select_via_db.py +23 -19
  120. lightly_studio/selection/selection_config.py +10 -4
  121. lightly_studio/services/annotations_service/__init__.py +12 -0
  122. lightly_studio/services/annotations_service/create_annotation.py +63 -0
  123. lightly_studio/services/annotations_service/delete_annotation.py +22 -0
  124. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  125. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  126. lightly_studio-0.3.3.dist-info/METADATA +814 -0
  127. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/RECORD +130 -113
  128. lightly_studio/api/db.py +0 -133
  129. lightly_studio/api/routes/api/annotation_task.py +0 -38
  130. lightly_studio/api/routes/api/metrics.py +0 -80
  131. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/0.DenzbfeK.css +0 -1
  132. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  133. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  134. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/_layout.T-zjSUd3.css +0 -1
  135. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  136. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  137. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  138. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  139. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  140. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  141. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  142. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  143. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  144. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  145. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  146. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  147. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/hQVEETDE.js +0 -1
  148. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  149. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/r64xT6ao.js +0 -1
  150. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  151. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  152. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  153. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  154. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  155. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  156. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  157. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  158. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  159. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  160. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  161. lightly_studio/metrics/detection/__init__.py +0 -0
  162. lightly_studio/metrics/detection/map.py +0 -268
  163. lightly_studio/models/annotation_task.py +0 -28
  164. lightly_studio/resolvers/annotation_resolver/create.py +0 -19
  165. lightly_studio/resolvers/annotation_task_resolver.py +0 -31
  166. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  167. /lightly_studio/{metrics → core/dataset_query}/__init__.py +0 -0
  168. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  169. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.3.dist-info}/WHEEL +0 -0
@@ -2,522 +2,394 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- from dataclasses import dataclass
6
5
  from pathlib import Path
7
- from typing import Iterable
6
+ from typing import Iterable, Iterator
8
7
  from uuid import UUID
9
8
 
10
- import PIL
11
9
  from labelformat.formats import (
12
10
  COCOInstanceSegmentationInput,
13
11
  COCOObjectDetectionInput,
14
12
  YOLOv8ObjectDetectionInput,
15
13
  )
16
- from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
17
- from labelformat.model.bounding_box import BoundingBoxFormat
18
- from labelformat.model.image import Image
19
14
  from labelformat.model.instance_segmentation import (
20
- ImageInstanceSegmentation,
21
15
  InstanceSegmentationInput,
22
16
  )
23
- from labelformat.model.multipolygon import MultiPolygon
24
17
  from labelformat.model.object_detection import (
25
- ImageObjectDetection,
26
18
  ObjectDetectionInput,
27
19
  )
28
- from sqlmodel import Session
29
- from tqdm import tqdm
30
-
31
- from lightly_studio.api.db import db_manager
32
- from lightly_studio.models.annotation.annotation_base import AnnotationCreate
33
- from lightly_studio.models.annotation_label import AnnotationLabelCreate
34
- from lightly_studio.models.annotation_task import (
35
- AnnotationTaskTable,
20
+ from sqlmodel import Session, select
21
+
22
+ from lightly_studio import db_manager
23
+ from lightly_studio.api import features
24
+ from lightly_studio.core import add_samples
25
+ from lightly_studio.core.dataset_query.dataset_query import DatasetQuery
26
+ from lightly_studio.core.dataset_query.match_expression import MatchExpression
27
+ from lightly_studio.core.dataset_query.order_by import OrderByExpression
28
+ from lightly_studio.core.sample import Sample
29
+ from lightly_studio.dataset import fsspec_lister
30
+ from lightly_studio.dataset.embedding_manager import EmbeddingManagerProvider
31
+ from lightly_studio.metadata import compute_typicality
32
+ from lightly_studio.models.annotation.annotation_base import (
36
33
  AnnotationType,
37
34
  )
38
35
  from lightly_studio.models.dataset import DatasetCreate, DatasetTable
39
- from lightly_studio.models.sample import SampleCreate, SampleTable
36
+ from lightly_studio.models.sample import SampleTable
40
37
  from lightly_studio.resolvers import (
41
- annotation_label_resolver,
42
- annotation_resolver,
43
- annotation_task_resolver,
44
38
  dataset_resolver,
39
+ embedding_model_resolver,
45
40
  sample_resolver,
46
41
  )
47
42
  from lightly_studio.type_definitions import PathLike
48
43
 
49
44
  # Constants
50
- ANNOTATION_BATCH_SIZE = 64 # Number of annotations to process in a single batch
51
- SAMPLE_BATCH_SIZE = 32 # Number of samples to process in a single batch
45
+ DEFAULT_DATASET_NAME = "default_dataset"
52
46
 
53
-
54
- @dataclass
55
- class AnnotationProcessingContext:
56
- """Context for processing annotations for a single sample."""
57
-
58
- dataset_id: UUID
59
- sample_id: UUID
60
- label_map: dict[int, UUID]
61
- annotation_task_id: UUID
47
+ _SliceType = slice # to avoid shadowing built-in slice in type annotations
62
48
 
63
49
 
64
50
  class Dataset:
65
51
  """A LightlyStudio Dataset.
66
52
 
67
- Represents a dataset in LightlyStudio.
68
-
69
- Args:
70
- name: The name of the dataset. If None, a default name will be assigned.
53
+ Keeps a reference to the underlying DatasetTable.
71
54
  """
72
55
 
73
- def __init__(self, name: str | None = None) -> None:
56
+ def __init__(self, dataset: DatasetTable) -> None:
74
57
  """Initialize a LightlyStudio Dataset."""
58
+ self._inner = dataset
59
+ # TODO(Michal, 09/2025): Do not store the session. Instead, use the
60
+ # dataset object session.
61
+ self.session = db_manager.persistent_session()
62
+
63
+ @staticmethod
64
+ def create(name: str | None = None) -> Dataset:
65
+ """Create a new dataset."""
66
+ if name is None:
67
+ name = DEFAULT_DATASET_NAME
68
+
69
+ dataset = dataset_resolver.create(
70
+ session=db_manager.persistent_session(),
71
+ dataset=DatasetCreate(name=name, directory=""),
72
+ )
73
+ return Dataset(dataset=dataset)
74
+
75
+ @staticmethod
76
+ def load(name: str | None = None) -> Dataset:
77
+ """Load an existing dataset."""
75
78
  if name is None:
76
79
  name = "default_dataset"
77
- self.name = name
78
- self.session = db_manager.persistent_session()
79
- # Create dataset.
80
- self._dataset = dataset_resolver.create(
81
- session=self.session,
82
- dataset=DatasetCreate(
83
- name=self.name,
84
- directory="", # The directory is not used at the moment
85
- ),
80
+
81
+ dataset = dataset_resolver.get_by_name(session=db_manager.persistent_session(), name=name)
82
+ if dataset is None:
83
+ raise ValueError(f"Dataset with name '{name}' not found.")
84
+
85
+ return Dataset(dataset=dataset)
86
+
87
+ @staticmethod
88
+ def load_or_create(name: str | None = None) -> Dataset:
89
+ """Create a new dataset or load an existing one."""
90
+ if name is None:
91
+ name = "default_dataset"
92
+
93
+ dataset = dataset_resolver.get_by_name(session=db_manager.persistent_session(), name=name)
94
+ if dataset is None:
95
+ return Dataset.create(name=name)
96
+
97
+ return Dataset(dataset=dataset)
98
+
99
+ def __iter__(self) -> Iterator[Sample]:
100
+ """Iterate over samples in the dataset."""
101
+ for sample in self.session.exec(
102
+ select(SampleTable).where(SampleTable.dataset_id == self.dataset_id)
103
+ ):
104
+ yield Sample(inner=sample)
105
+
106
+ def get_sample(self, sample_id: UUID) -> Sample:
107
+ """Get a single sample from the dataset by its ID.
108
+
109
+ Args:
110
+ sample_id: The UUID of the sample to retrieve.
111
+
112
+ Returns:
113
+ A single SampleTable object.
114
+
115
+ Raises:
116
+ IndexError: If no sample is found with the given sample_id.
117
+ """
118
+ sample = sample_resolver.get_by_id(
119
+ self.session, dataset_id=self.dataset_id, sample_id=sample_id
86
120
  )
87
121
 
122
+ if sample is None:
123
+ raise IndexError(f"No sample found for sample_id: {sample_id}")
124
+ return Sample(inner=sample)
125
+
88
126
  @property
89
127
  def dataset_id(self) -> UUID:
90
128
  """Get the dataset ID."""
91
- return self._dataset.dataset_id
129
+ return self._inner.dataset_id
130
+
131
+ @property
132
+ def name(self) -> str:
133
+ """Get the dataset name."""
134
+ return self._inner.name
135
+
136
+ def query(self) -> DatasetQuery:
137
+ """Create a DatasetQuery for this dataset.
138
+
139
+ Returns:
140
+ A DatasetQuery instance for querying samples in this dataset.
141
+ """
142
+ return DatasetQuery(dataset=self._inner, session=self.session)
143
+
144
+ def match(self, match_expression: MatchExpression) -> DatasetQuery:
145
+ """Create a query on the dataset and store a field condition for filtering.
146
+
147
+ Args:
148
+ match_expression: Defines the filter.
149
+
150
+ Returns:
151
+ DatasetQuery for method chaining.
152
+ """
153
+ return self.query().match(match_expression)
154
+
155
+ def order_by(self, *order_by: OrderByExpression) -> DatasetQuery:
156
+ """Create a query on the dataset and store ordering expressions.
157
+
158
+ Args:
159
+ order_by: One or more ordering expressions. They are applied in order.
160
+ E.g. first ordering by sample width and then by sample file_name will
161
+ only order the samples with the same sample width by file_name.
162
+
163
+ Returns:
164
+ DatasetQuery for method chaining.
165
+ """
166
+ return self.query().order_by(*order_by)
167
+
168
+ def slice(self, offset: int = 0, limit: int | None = None) -> DatasetQuery:
169
+ """Create a query on the dataset and apply offset and limit to results.
170
+
171
+ Args:
172
+ offset: Number of items to skip from beginning (default: 0).
173
+ limit: Maximum number of items to return (None = no limit).
174
+
175
+ Returns:
176
+ DatasetQuery for method chaining.
177
+ """
178
+ return self.query().slice(offset, limit)
179
+
180
+ def __getitem__(self, key: _SliceType) -> DatasetQuery:
181
+ """Create a query on the dataset and enable bracket notation for slicing.
182
+
183
+ Args:
184
+ key: A slice object (e.g., [10:20], [:50], [100:]).
185
+
186
+ Returns:
187
+ DatasetQuery with slice applied.
188
+
189
+ Raises:
190
+ TypeError: If key is not a slice object.
191
+ ValueError: If slice contains unsupported features or conflicts with existing slice.
192
+ """
193
+ return self.query()[key]
92
194
 
93
195
  def add_samples_from_path(
94
196
  self,
95
197
  path: PathLike,
96
- recursive: bool = True,
97
- allowed_extensions: Iterable[str] = {
98
- ".png",
99
- ".jpg",
100
- ".jpeg",
101
- ".gif",
102
- ".webp",
103
- ".bmp",
104
- ".tiff",
105
- },
198
+ allowed_extensions: Iterable[str] | None = None,
199
+ embed: bool = True,
106
200
  ) -> None:
107
201
  """Adding samples from the specified path to the dataset.
108
202
 
109
203
  Args:
110
204
  path: Path to the folder containing the images to add.
111
- recursive: If True, search for images recursively in subfolders.
112
205
  allowed_extensions: An iterable container of allowed image file
113
206
  extensions.
207
+ embed: If True, generate embeddings for the newly added samples.
114
208
  """
115
- path = Path(path).absolute() if isinstance(path, str) else path.absolute()
116
- if not path.exists() or not path.is_dir():
117
- raise ValueError(f"Provided path is not a valid directory: {path}")
118
-
119
209
  # Collect image file paths.
120
- allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
121
- image_paths = []
122
- path_iter = path.rglob("*") if recursive else path.glob("*")
123
- for path in path_iter:
124
- if path.is_file() and path.suffix.lower() in allowed_extensions_set:
125
- image_paths.append(path)
210
+ if allowed_extensions:
211
+ allowed_extensions_set = {ext.lower() for ext in allowed_extensions}
212
+ else:
213
+ allowed_extensions_set = None
214
+ image_paths = list(
215
+ fsspec_lister.iter_files_from_path(
216
+ path=str(path), allowed_extensions=allowed_extensions_set
217
+ )
218
+ )
126
219
  print(f"Found {len(image_paths)} images in {path}.")
127
220
 
128
221
  # Process images.
129
- _load_into_dataset_from_paths(
222
+ created_sample_ids = add_samples.load_into_dataset_from_paths(
130
223
  session=self.session,
131
224
  dataset_id=self.dataset_id,
132
225
  image_paths=image_paths,
133
226
  )
134
227
 
228
+ if embed:
229
+ _generate_embeddings(
230
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
231
+ )
232
+
135
233
  def add_samples_from_labelformat(
136
234
  self,
137
235
  input_labels: ObjectDetectionInput | InstanceSegmentationInput,
138
236
  images_path: PathLike,
139
- is_prediction: bool = True,
140
- task_name: str | None = None,
237
+ embed: bool = True,
141
238
  ) -> None:
142
239
  """Load a dataset from a labelformat object and store in database.
143
240
 
144
241
  Args:
145
242
  input_labels: The labelformat input object.
146
243
  images_path: Path to the folder containing the images.
147
- is_prediction: Whether the task is for prediction or labels.
148
- task_name: Optional name for the annotation task. If None, a
149
- default name is generated.
150
-
151
- Returns:
152
- DatasetTable: The created dataset table entry.
244
+ embed: If True, generate embeddings for the newly added samples.
153
245
  """
154
246
  if isinstance(images_path, str):
155
247
  images_path = Path(images_path)
156
248
  images_path = images_path.absolute()
157
249
 
158
- # Determine annotation type based on input.
159
- # Currently, we always create BBOX tasks, even for segmentation,
160
- # as segmentation data is stored alongside bounding boxes.
161
- annotation_type = AnnotationType.BBOX
162
-
163
- # Generate a default task name if none is provided.
164
- if task_name is None:
165
- task_name = f"Loaded from labelformat: {self.name}"
166
-
167
- # Create annotation task.
168
- new_annotation_task = annotation_task_resolver.create(
169
- session=self.session,
170
- annotation_task=AnnotationTaskTable(
171
- name=task_name,
172
- annotation_type=annotation_type,
173
- is_prediction=is_prediction,
174
- ),
175
- )
176
-
177
- _load_into_dataset(
250
+ created_sample_ids = add_samples.load_into_dataset_from_labelformat(
178
251
  session=self.session,
179
252
  dataset_id=self.dataset_id,
180
253
  input_labels=input_labels,
181
254
  images_path=images_path,
182
- annotation_task_id=new_annotation_task.annotation_task_id,
183
255
  )
184
256
 
185
- def from_yolo(
257
+ if embed:
258
+ _generate_embeddings(
259
+ session=self.session, dataset_id=self.dataset_id, sample_ids=created_sample_ids
260
+ )
261
+
262
+ def add_samples_from_yolo(
186
263
  self,
187
- data_yaml_path: str,
264
+ data_yaml: PathLike,
188
265
  input_split: str = "train",
189
- task_name: str | None = None,
190
- ) -> DatasetTable:
266
+ embed: bool = True,
267
+ ) -> None:
191
268
  """Load a dataset in YOLO format and store in DB.
192
269
 
193
270
  Args:
194
- data_yaml_path: Path to the YOLO data.yaml file.
271
+ data_yaml: Path to the YOLO data.yaml file.
195
272
  input_split: The split to load (e.g., 'train', 'val').
196
- task_name: Optional name for the annotation task. If None, a
197
- default name is generated.
198
-
199
- Returns:
200
- DatasetTable: The created dataset table entry.
273
+ embed: If True, generate embeddings for the newly added samples.
201
274
  """
202
- data_yaml = Path(data_yaml_path).absolute()
203
- dataset_name = data_yaml.parent.name
275
+ if isinstance(data_yaml, str):
276
+ data_yaml = Path(data_yaml)
277
+ data_yaml = data_yaml.absolute()
204
278
 
205
- if task_name is None:
206
- task_name = f"Loaded from YOLO: {data_yaml.name} ({input_split} split)"
279
+ if not data_yaml.is_file() or data_yaml.suffix != ".yaml":
280
+ raise FileNotFoundError(f"YOLO data yaml file not found: '{data_yaml}'")
207
281
 
208
282
  # Load the dataset using labelformat.
209
283
  label_input = YOLOv8ObjectDetectionInput(
210
284
  input_file=data_yaml,
211
285
  input_split=input_split,
212
286
  )
213
- img_dir = label_input._images_dir() # noqa: SLF001
287
+ images_path = label_input._images_dir() # noqa: SLF001
214
288
 
215
- return self.from_labelformat( # type: ignore[no-any-return,attr-defined]
289
+ self.add_samples_from_labelformat(
216
290
  input_labels=label_input,
217
- dataset_name=dataset_name,
218
- img_dir=str(img_dir),
219
- is_prediction=False,
220
- task_name=task_name,
291
+ images_path=images_path,
292
+ embed=embed,
221
293
  )
222
294
 
223
- def from_coco_object_detections(
295
+ def add_samples_from_coco(
224
296
  self,
225
- annotations_json_path: str,
226
- img_dir: str,
227
- task_name: str | None = None,
228
- ) -> DatasetTable:
297
+ annotations_json: PathLike,
298
+ images_path: PathLike,
299
+ annotation_type: AnnotationType = AnnotationType.OBJECT_DETECTION,
300
+ embed: bool = True,
301
+ ) -> None:
229
302
  """Load a dataset in COCO Object Detection format and store in DB.
230
303
 
231
304
  Args:
232
- annotations_json_path: Path to the COCO annotations JSON file.
233
- img_dir: Path to the folder containing the images.
234
- task_name: Optional name for the annotation task. If None, a
235
- default name is generated.
236
-
237
- Returns:
238
- DatasetTable: The created dataset table entry.
305
+ annotations_json: Path to the COCO annotations JSON file.
306
+ images_path: Path to the folder containing the images.
307
+ annotation_type: The type of annotation to be loaded (e.g., 'ObjectDetection',
308
+ 'InstanceSegmentation').
309
+ embed: If True, generate embeddings for the newly added samples.
239
310
  """
240
- annotations_json = Path(annotations_json_path)
241
- dataset_name = annotations_json.parent.name
242
-
243
- if task_name is None:
244
- task_name = f"Loaded from COCO Object Detection: {annotations_json.name}"
311
+ if isinstance(annotations_json, str):
312
+ annotations_json = Path(annotations_json)
313
+ annotations_json = annotations_json.absolute()
245
314
 
246
- label_input = COCOObjectDetectionInput(
247
- input_file=annotations_json,
248
- )
249
- img_dir_path = Path(img_dir).absolute()
250
-
251
- return self.from_labelformat( # type: ignore[no-any-return, attr-defined]
252
- input_labels=label_input,
253
- dataset_name=dataset_name,
254
- img_dir=str(img_dir_path),
255
- is_prediction=False,
256
- task_name=task_name,
257
- )
315
+ if not annotations_json.is_file() or annotations_json.suffix != ".json":
316
+ raise FileNotFoundError(f"COCO annotations json file not found: '{annotations_json}'")
258
317
 
259
- def from_coco_instance_segmentations(
260
- self,
261
- annotations_json_path: str,
262
- img_dir: str,
263
- task_name: str | None = None,
264
- ) -> DatasetTable:
265
- """Load a dataset in COCO Instance Segmentation format and store in DB.
266
-
267
- Args:
268
- annotations_json_path: Path to the COCO annotations JSON file.
269
- img_dir: Path to the folder containing the images.
270
- task_name: Optional name for the annotation task. If None, a
271
- default name is generated.
318
+ label_input: COCOObjectDetectionInput | COCOInstanceSegmentationInput
272
319
 
273
- Returns:
274
- DatasetTable: The created dataset table entry.
275
- """
276
- annotations_json = Path(annotations_json_path)
277
- dataset_name = annotations_json.parent.name
278
-
279
- if task_name is None:
280
- task_name = f"Loaded from COCO Instance Segmentation: {annotations_json.name}"
320
+ if annotation_type == AnnotationType.OBJECT_DETECTION:
321
+ label_input = COCOObjectDetectionInput(
322
+ input_file=annotations_json,
323
+ )
324
+ elif annotation_type == AnnotationType.INSTANCE_SEGMENTATION:
325
+ label_input = COCOInstanceSegmentationInput(
326
+ input_file=annotations_json,
327
+ )
328
+ else:
329
+ raise ValueError(f"Invalid annotation type: {annotation_type}")
281
330
 
282
- label_input = COCOInstanceSegmentationInput(
283
- input_file=annotations_json,
284
- )
285
- img_dir_path = Path(img_dir).absolute()
331
+ images_path = Path(images_path).absolute()
286
332
 
287
- return self.from_labelformat( # type: ignore[no-any-return,attr-defined]
333
+ self.add_samples_from_labelformat(
288
334
  input_labels=label_input,
289
- dataset_name=dataset_name,
290
- img_dir=str(img_dir_path),
291
- is_prediction=False,
292
- task_name=task_name,
335
+ images_path=images_path,
336
+ embed=embed,
293
337
  )
294
338
 
295
- @staticmethod
296
- def load_from_db(name: str, db_path: PathLike) -> Dataset:
297
- """Load a dataset from the database.
339
+ def compute_typicality_metadata(
340
+ self,
341
+ embedding_model_name: str | None = None,
342
+ metadata_name: str = "typicality",
343
+ ) -> None:
344
+ """Computes typicality from embeddings, for K nearest neighbors.
298
345
 
299
- Returns:
300
- Dataset: The loaded dataset.
346
+ Args:
347
+ embedding_model_name:
348
+ The name of the embedding model to use. If not given, the default
349
+ embedding model is used.
350
+ metadata_name:
351
+ The name of the metadata to store the typicality values in. If not give, the default
352
+ name "typicality" is used.
301
353
  """
302
- raise NotImplementedError
303
-
304
-
305
- def _load_into_dataset_from_paths(
306
- dataset_id: UUID,
307
- session: Session,
308
- image_paths: Iterable[Path],
309
- ) -> None:
310
- samples_to_create: list[SampleCreate] = []
311
-
312
- for image_path in tqdm(
313
- image_paths,
314
- desc="Processing images",
315
- unit=" images",
316
- ):
317
- try:
318
- image = PIL.Image.open(image_path)
319
- width, height = image.size
320
- image.close()
321
- except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
322
- continue
323
-
324
- sample = SampleCreate(
325
- file_name=image_path.name,
326
- file_path_abs=str(image_path),
327
- width=width,
328
- height=height,
329
- dataset_id=dataset_id,
330
- )
331
- samples_to_create.append(sample)
332
-
333
- # Process batch when it reaches SAMPLE_BATCH_SIZE
334
- if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
335
- _ = sample_resolver.create_many(session=session, samples=samples_to_create)
336
- samples_to_create = []
337
-
338
- # Handle remaining samples
339
- if samples_to_create:
340
- _ = sample_resolver.create_many(session=session, samples=samples_to_create)
341
-
342
-
343
- def _load_into_dataset(
344
- session: Session,
345
- dataset_id: UUID,
346
- input_labels: ObjectDetectionInput | InstanceSegmentationInput,
347
- images_path: Path,
348
- annotation_task_id: UUID,
349
- ) -> None:
350
- """Store a loaded dataset in database."""
351
- # Create label mapping
352
- label_map = _create_label_map(session=session, input_labels=input_labels)
353
-
354
- annotations_to_create: list[AnnotationCreate] = []
355
- sample_ids: list[UUID] = []
356
- samples_to_create: list[SampleCreate] = []
357
- samples_image_data: list[
358
- tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]
359
- ] = []
360
-
361
- for image_data in tqdm(input_labels.get_labels(), desc="Processing images", unit=" images"):
362
- image: Image = image_data.image # type: ignore[attr-defined]
363
-
364
- typed_image_data: ImageInstanceSegmentation | ImageObjectDetection = image_data # type: ignore[assignment]
365
- sample = SampleCreate(
366
- file_name=str(image.filename),
367
- file_path_abs=str(images_path / image.filename),
368
- width=image.width,
369
- height=image.height,
370
- dataset_id=dataset_id,
371
- )
372
- samples_to_create.append(sample)
373
- samples_image_data.append((sample, typed_image_data))
374
-
375
- if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
376
- stored_samples = sample_resolver.create_many(session=session, samples=samples_to_create)
377
- _process_batch_annotations(
378
- session=session,
379
- stored_samples=stored_samples,
380
- samples_data=samples_image_data,
381
- dataset_id=dataset_id,
382
- label_map=label_map,
383
- annotation_task_id=annotation_task_id,
384
- annotations_to_create=annotations_to_create,
385
- sample_ids=sample_ids,
386
- )
387
- samples_to_create.clear()
388
- samples_image_data.clear()
389
-
390
- if samples_to_create:
391
- stored_samples = sample_resolver.create_many(session=session, samples=samples_to_create)
392
- _process_batch_annotations(
393
- session=session,
394
- stored_samples=stored_samples,
395
- samples_data=samples_image_data,
396
- dataset_id=dataset_id,
397
- label_map=label_map,
398
- annotation_task_id=annotation_task_id,
399
- annotations_to_create=annotations_to_create,
400
- sample_ids=sample_ids,
401
- )
402
-
403
- # Insert any remaining annotations
404
- if annotations_to_create:
405
- annotation_resolver.create_many(session=session, annotations=annotations_to_create)
406
-
407
-
408
- def _create_label_map(
409
- session: Session,
410
- input_labels: ObjectDetectionInput | InstanceSegmentationInput,
411
- ) -> dict[int, UUID]:
412
- """Create a mapping of category IDs to annotation label IDs."""
413
- label_map = {}
414
- for category in tqdm(
415
- input_labels.get_categories(),
416
- desc="Processing categories",
417
- unit=" categories",
418
- ):
419
- label = AnnotationLabelCreate(annotation_label_name=category.name)
420
- stored_label = annotation_label_resolver.create(session=session, label=label)
421
- label_map[category.id] = stored_label.annotation_label_id
422
- return label_map
423
-
424
-
425
- def _process_object_detection_annotations(
426
- context: AnnotationProcessingContext,
427
- image_data: ImageObjectDetection,
428
- ) -> list[AnnotationCreate]:
429
- """Process object detection annotations for a single image."""
430
- new_annotations = []
431
- for obj in image_data.objects:
432
- box = obj.box.to_format(BoundingBoxFormat.XYWH)
433
- x, y, width, height = box
434
-
435
- new_annotations.append(
436
- AnnotationCreate(
437
- dataset_id=context.dataset_id,
438
- sample_id=context.sample_id,
439
- annotation_label_id=context.label_map[obj.category.id],
440
- annotation_type="object_detection",
441
- x=x,
442
- y=y,
443
- width=width,
444
- height=height,
445
- confidence=obj.confidence,
446
- annotation_task_id=context.annotation_task_id,
447
- )
448
- )
449
- return new_annotations
450
-
451
-
452
- def _process_instance_segmentation_annotations(
453
- context: AnnotationProcessingContext,
454
- image_data: ImageInstanceSegmentation,
455
- ) -> list[AnnotationCreate]:
456
- """Process instance segmentation annotations for a single image."""
457
- new_annotations = []
458
- for obj in image_data.objects:
459
- segmentation_rle: None | list[int] = None
460
- if isinstance(obj.segmentation, MultiPolygon):
461
- box = obj.segmentation.bounding_box().to_format(BoundingBoxFormat.XYWH)
462
- elif isinstance(obj.segmentation, BinaryMaskSegmentation):
463
- box = obj.segmentation.bounding_box.to_format(BoundingBoxFormat.XYWH)
464
- segmentation_rle = obj.segmentation._rle_row_wise # noqa: SLF001
465
- else:
466
- raise ValueError(f"Unsupported segmentation type: {type(obj.segmentation)}")
467
-
468
- x, y, width, height = box
469
-
470
- new_annotations.append(
471
- AnnotationCreate(
472
- dataset_id=context.dataset_id,
473
- sample_id=context.sample_id,
474
- annotation_label_id=context.label_map[obj.category.id],
475
- annotation_type="instance_segmentation",
476
- x=x,
477
- y=y,
478
- width=width,
479
- height=height,
480
- segmentation_mask=segmentation_rle,
481
- annotation_task_id=context.annotation_task_id,
482
- )
483
- )
484
- return new_annotations
485
-
486
-
487
- def _process_batch_annotations( # noqa: PLR0913
488
- session: Session,
489
- stored_samples: list[SampleTable],
490
- samples_data: list[tuple[SampleCreate, ImageInstanceSegmentation | ImageObjectDetection]],
491
- dataset_id: UUID,
492
- label_map: dict[int, UUID],
493
- annotation_task_id: UUID,
494
- annotations_to_create: list[AnnotationCreate],
495
- sample_ids: list[UUID],
496
- ) -> None:
497
- """Process annotations for a batch of samples."""
498
- for stored_sample, (_, img_data) in zip(stored_samples, samples_data):
499
- sample_ids.append(stored_sample.sample_id)
500
-
501
- context = AnnotationProcessingContext(
502
- dataset_id=dataset_id,
503
- sample_id=stored_sample.sample_id,
504
- label_map=label_map,
505
- annotation_task_id=annotation_task_id,
354
+ embedding_model_id = embedding_model_resolver.get_by_name(
355
+ session=self.session,
356
+ dataset_id=self.dataset_id,
357
+ embedding_model_name=embedding_model_name,
358
+ ).embedding_model_id
359
+ compute_typicality.compute_typicality_metadata(
360
+ session=self.session,
361
+ dataset_id=self.dataset_id,
362
+ embedding_model_id=embedding_model_id,
363
+ metadata_name=metadata_name,
506
364
  )
507
365
 
508
- if isinstance(img_data, ImageInstanceSegmentation):
509
- new_annotations = _process_instance_segmentation_annotations(
510
- context=context, image_data=img_data
511
- )
512
- elif isinstance(img_data, ImageObjectDetection):
513
- new_annotations = _process_object_detection_annotations(
514
- context=context, image_data=img_data
515
- )
516
- else:
517
- raise ValueError(f"Unsupported annotation type: {type(img_data)}")
518
366
 
519
- annotations_to_create.extend(new_annotations)
367
+ def _generate_embeddings(session: Session, dataset_id: UUID, sample_ids: list[UUID]) -> None:
368
+ """Generate and store embeddings for samples.
520
369
 
521
- if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
522
- annotation_resolver.create_many(session=session, annotations=annotations_to_create)
523
- annotations_to_create.clear()
370
+ Args:
371
+ session: Database session for resolver operations.
372
+ dataset_id: The ID of the dataset to associate with the embedding model.
373
+ sample_ids: List of sample IDs to generate embeddings for.
374
+ """
375
+ if not sample_ids:
376
+ return
377
+
378
+ embedding_manager = EmbeddingManagerProvider.get_embedding_manager()
379
+ model_id = embedding_manager.load_or_get_default_model(
380
+ session=session,
381
+ dataset_id=dataset_id,
382
+ )
383
+ if model_id is None:
384
+ print("No embedding model loaded. Skipping embedding generation.")
385
+ return
386
+
387
+ embedding_manager.embed_images(
388
+ session=session,
389
+ sample_ids=sample_ids,
390
+ embedding_model_id=model_id,
391
+ )
392
+
393
+ # Mark the embedding search feature as enabled.
394
+ if "embeddingSearchEnabled" not in features.lightly_studio_active_features:
395
+ features.lightly_studio_active_features.append("embeddingSearchEnabled")