lightly-studio 0.3.1__py3-none-any.whl → 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of lightly-studio might be problematic. Click here for more details.

Files changed (133) hide show
  1. lightly_studio/__init__.py +4 -4
  2. lightly_studio/api/app.py +1 -1
  3. lightly_studio/api/routes/api/annotation.py +6 -16
  4. lightly_studio/api/routes/api/annotation_label.py +2 -5
  5. lightly_studio/api/routes/api/annotation_task.py +4 -5
  6. lightly_studio/api/routes/api/classifier.py +2 -5
  7. lightly_studio/api/routes/api/dataset.py +2 -3
  8. lightly_studio/api/routes/api/dataset_tag.py +2 -3
  9. lightly_studio/api/routes/api/metadata.py +2 -4
  10. lightly_studio/api/routes/api/metrics.py +2 -6
  11. lightly_studio/api/routes/api/sample.py +5 -13
  12. lightly_studio/api/routes/api/settings.py +2 -6
  13. lightly_studio/api/routes/images.py +6 -6
  14. lightly_studio/core/add_samples.py +383 -0
  15. lightly_studio/core/dataset.py +250 -362
  16. lightly_studio/core/dataset_query/__init__.py +0 -0
  17. lightly_studio/core/dataset_query/boolean_expression.py +67 -0
  18. lightly_studio/core/dataset_query/dataset_query.py +211 -0
  19. lightly_studio/core/dataset_query/field.py +113 -0
  20. lightly_studio/core/dataset_query/field_expression.py +79 -0
  21. lightly_studio/core/dataset_query/match_expression.py +23 -0
  22. lightly_studio/core/dataset_query/order_by.py +79 -0
  23. lightly_studio/core/dataset_query/sample_field.py +28 -0
  24. lightly_studio/core/dataset_query/tags_expression.py +46 -0
  25. lightly_studio/core/sample.py +159 -32
  26. lightly_studio/core/start_gui.py +35 -0
  27. lightly_studio/dataset/edge_embedding_generator.py +13 -8
  28. lightly_studio/dataset/embedding_generator.py +2 -3
  29. lightly_studio/dataset/embedding_manager.py +74 -6
  30. lightly_studio/dataset/fsspec_lister.py +275 -0
  31. lightly_studio/dataset/loader.py +49 -30
  32. lightly_studio/dataset/mobileclip_embedding_generator.py +6 -4
  33. lightly_studio/db_manager.py +145 -0
  34. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BBm0IWdq.css +1 -0
  35. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.BNTuXSAe.css +1 -0
  36. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/2O287xak.js +3 -0
  37. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{O-EABkf9.js → 7YNGEs1C.js} +1 -1
  38. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BBoGk9hq.js +1 -0
  39. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BRnH9v23.js +92 -0
  40. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bg1Y5eUZ.js +1 -0
  41. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DOlTMNyt.js → BqBqV92V.js} +1 -1
  42. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/C0JiMuYn.js +1 -0
  43. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{DjfY96ND.js → C98Hk3r5.js} +1 -1
  44. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{r64xT6ao.js → CG0dMCJi.js} +1 -1
  45. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{C8I8rFJQ.js → Ccq4ZD0B.js} +1 -1
  46. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cpy-nab_.js +1 -0
  47. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bu7uvVrG.js → Crk-jcvV.js} +1 -1
  48. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Cs31G8Qn.js +1 -0
  49. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CsKrY2zA.js +1 -0
  50. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{x9G_hzyY.js → Cur71c3O.js} +1 -1
  51. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CzgC3GFB.js +1 -0
  52. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D8GZDMNN.js +1 -0
  53. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DFRh-Spp.js +1 -0
  54. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{BylOuP6i.js → DRZO-E-T.js} +1 -1
  55. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{l7KrR96u.js → DcGCxgpH.js} +1 -1
  56. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{Bsi3UGy5.js → Df3aMO5B.js} +1 -1
  57. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{hQVEETDE.js → DkR_EZ_B.js} +1 -1
  58. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DqUGznj_.js +1 -0
  59. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/KpAtIldw.js +1 -0
  60. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/M1Q1F7bw.js +4 -0
  61. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{CDnpyLsT.js → OH7-C_mc.js} +1 -1
  62. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/{D6su9Aln.js → gLNdjSzu.js} +1 -1
  63. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/i0ZZ4z06.js +1 -0
  64. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.BI-EA5gL.js +2 -0
  65. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.CcsRl3cZ.js +1 -0
  66. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.BbO4Zc3r.js +1 -0
  67. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{1.B4rNYwVp.js → 1._I9GR805.js} +1 -1
  68. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.J2RBFrSr.js +1 -0
  69. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.Cmqj25a-.js +1 -0
  70. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.C45iKJHA.js +6 -0
  71. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{3.CWHpKonm.js → 3.w9g4AcAx.js} +1 -1
  72. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{4.OUWOLQeV.js → 4.BBI8KwnD.js} +1 -1
  73. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.huHuxdiF.js +1 -0
  74. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.CrbkRPam.js +1 -0
  75. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.FomEdhD6.js +1 -0
  76. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cb_ADSLk.js +1 -0
  77. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/{9.CPu3CiBc.js → 9.CajIG5ce.js} +1 -1
  78. lightly_studio/dist_lightly_studio_view_app/_app/version.json +1 -1
  79. lightly_studio/dist_lightly_studio_view_app/index.html +14 -14
  80. lightly_studio/examples/example.py +13 -12
  81. lightly_studio/examples/example_coco.py +13 -0
  82. lightly_studio/examples/example_metadata.py +83 -98
  83. lightly_studio/examples/example_selection.py +7 -19
  84. lightly_studio/examples/example_split_work.py +12 -36
  85. lightly_studio/examples/{example_v2.py → example_yolo.py} +3 -4
  86. lightly_studio/models/annotation/annotation_base.py +7 -8
  87. lightly_studio/models/annotation/instance_segmentation.py +8 -8
  88. lightly_studio/models/annotation/object_detection.py +4 -4
  89. lightly_studio/models/dataset.py +6 -2
  90. lightly_studio/models/sample.py +10 -3
  91. lightly_studio/resolvers/dataset_resolver.py +10 -0
  92. lightly_studio/resolvers/embedding_model_resolver.py +22 -0
  93. lightly_studio/resolvers/sample_resolver.py +53 -9
  94. lightly_studio/resolvers/tag_resolver.py +23 -0
  95. lightly_studio/selection/select.py +55 -46
  96. lightly_studio/selection/select_via_db.py +23 -19
  97. lightly_studio/selection/selection_config.py +6 -3
  98. lightly_studio/services/annotations_service/__init__.py +4 -0
  99. lightly_studio/services/annotations_service/update_annotation.py +21 -32
  100. lightly_studio/services/annotations_service/update_annotation_bounding_box.py +36 -0
  101. lightly_studio-0.3.2.dist-info/METADATA +689 -0
  102. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/RECORD +104 -91
  103. lightly_studio/api/db.py +0 -133
  104. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.OwPEPQZu.css +0 -1
  105. lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/SelectableSvgGroup.b653GmVf.css +0 -1
  106. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B2FVR0s0.js +0 -1
  107. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/B9zumHo5.js +0 -1
  108. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/BJXwVxaE.js +0 -1
  109. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/Bx1xMsFy.js +0 -1
  110. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CcaPhhk3.js +0 -1
  111. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CvOmgdoc.js +0 -93
  112. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/CxtLVaYz.js +0 -3
  113. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D5-A_Ffd.js +0 -4
  114. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D6RI2Zrd.js +0 -1
  115. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/D98V7j6A.js +0 -1
  116. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DIRAtgl0.js +0 -1
  117. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/DjUWrjOv.js +0 -1
  118. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/XO7A28GO.js +0 -1
  119. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/nAHhluT7.js +0 -1
  120. lightly_studio/dist_lightly_studio_view_app/_app/immutable/chunks/vC4nQVEB.js +0 -1
  121. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/app.CjnvpsmS.js +0 -2
  122. lightly_studio/dist_lightly_studio_view_app/_app/immutable/entry/start.0o1H7wM9.js +0 -1
  123. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/0.XRq_TUwu.js +0 -1
  124. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/10.DfBwOEhN.js +0 -1
  125. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/12.CwF2_8mP.js +0 -1
  126. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/2.CS4muRY-.js +0 -6
  127. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/5.Dm6t9F5W.js +0 -1
  128. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/6.Bw5ck4gK.js +0 -1
  129. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/7.CF0EDTR6.js +0 -1
  130. lightly_studio/dist_lightly_studio_view_app/_app/immutable/nodes/8.Cw30LEcV.js +0 -1
  131. lightly_studio-0.3.1.dist-info/METADATA +0 -520
  132. /lightly_studio/dist_lightly_studio_view_app/_app/immutable/assets/{OpenSans- → OpenSans-Medium.DVUZMR_6.ttf} +0 -0
  133. {lightly_studio-0.3.1.dist-info → lightly_studio-0.3.2.dist-info}/WHEEL +0 -0
@@ -16,14 +16,11 @@ from uuid import UUID
16
16
  from environs import Env
17
17
  from sqlmodel import Session
18
18
 
19
- from lightly_studio import DatasetLoader
20
- from lightly_studio.api.db import db_manager
19
+ import lightly_studio as ls
20
+ from lightly_studio import db_manager
21
+ from lightly_studio.core.sample import Sample
21
22
  from lightly_studio.metadata.gps_coordinate import GPSCoordinate
22
- from lightly_studio.models.dataset import DatasetTable
23
- from lightly_studio.resolvers import (
24
- metadata_resolver,
25
- sample_resolver,
26
- )
23
+ from lightly_studio.resolvers import metadata_resolver
27
24
  from lightly_studio.resolvers.metadata_resolver.metadata_filter import Metadata
28
25
  from lightly_studio.resolvers.samples_filter import SampleFilter
29
26
 
@@ -34,25 +31,24 @@ dataset_path = env.path("DATASET_PATH", "/path/to/your/yolo/dataset/data.yaml")
34
31
  LIGHTLY_STUDIO_DATASET_SPLIT = env.str("LIGHTLY_STUDIO_DATASET_SPLIT", "test")
35
32
 
36
33
 
37
- def load_existing_dataset() -> tuple[DatasetTable, list[UUID], DatasetLoader]:
34
+ def load_existing_dataset() -> tuple[ls.Dataset, list[Sample]]:
38
35
  """Load an existing dataset using DatasetLoader.
39
36
 
40
37
  Returns:
41
- Tuple of (dataset, sample_ids, loader).
38
+ Tuple of (dataset, samples).
42
39
  """
43
40
  print(" Loading existing dataset...")
44
41
 
45
- loader = DatasetLoader()
46
- dataset = loader.from_yolo(
47
- str(dataset_path),
42
+ dataset = ls.Dataset.create()
43
+ dataset.add_samples_from_yolo(
44
+ data_yaml=str(dataset_path),
48
45
  input_split=LIGHTLY_STUDIO_DATASET_SPLIT,
49
46
  )
50
- # Get all sample IDs from the dataset
51
- samples = dataset.get_samples()
52
- sample_ids = [s.sample_id for s in samples]
47
+ # Get all samples from the dataset
48
+ samples = dataset.query().to_list()
53
49
 
54
- print(f"✅ Loaded dataset with {len(sample_ids)} samples")
55
- return dataset, sample_ids, loader
50
+ print(f"✅ Loaded dataset with {len(samples)} samples")
51
+ return dataset, samples
56
52
 
57
53
 
58
54
  def add_bulk_metadata(session: Session, sample_ids: list[UUID]) -> None:
@@ -93,56 +89,38 @@ def add_bulk_metadata(session: Session, sample_ids: list[UUID]) -> None:
93
89
  print(f"✅ Added metadata to {len(sample_ids)} samples in {elapsed_time:.2f}s")
94
90
 
95
91
 
96
- def add_individual_metadata(session: Session, sample_ids: list[UUID]) -> None:
92
+ def add_individual_metadata(samples: list[Sample]) -> None:
97
93
  """Add metadata to individual samples."""
98
94
  print("\n Adding individual metadata to specific samples...")
99
95
 
100
96
  # Add metadata to first 5 samples individually
101
- for i, sample_id in enumerate(sample_ids[:5]):
97
+ for i, sample in enumerate(samples[:5]):
98
+ print(f" Adding metadata to sample {sample.file_name} {sample.sample_id}...")
102
99
  # Add some specific metadata
103
- metadata_resolver.set_value_for_sample(
104
- session=session,
105
- sample_id=sample_id,
106
- key="special_metadata",
107
- value=f"sample_{i + 1}_special",
100
+ sample.metadata["special_metadata"] = f"sample_{i + 1}_special"
101
+ sample.metadata["priority"] = random.randint(1, 10)
102
+ sample.metadata["list"] = [1, 2, 3]
103
+ sample.metadata["custom_gps"] = GPSCoordinate(
104
+ lat=40.7128 + i * 0.1, # Slightly different coordinates
105
+ lon=-74.0060 + i * 0.1,
108
106
  )
109
107
 
110
- metadata_resolver.set_value_for_sample(
111
- session=session,
112
- sample_id=sample_id,
113
- key="priority",
114
- value=random.randint(1, 10),
115
- )
116
-
117
- metadata_resolver.set_value_for_sample(
118
- session=session,
119
- sample_id=sample_id,
120
- key="list",
121
- value=[1, 2, 3],
122
- )
123
-
124
- metadata_resolver.set_value_for_sample(
125
- session=session,
126
- sample_id=sample_id,
127
- key="custom_gps",
128
- value=GPSCoordinate(
129
- lat=40.7128 + i * 0.1, # Slightly different coordinates
130
- lon=-74.0060 + i * 0.1,
131
- ),
132
- )
108
+ print(f"✅ Added individual metadata to {min(5, len(samples))} samples")
133
109
 
134
- print(f"✅ Added individual metadata to {min(5, len(sample_ids))} samples")
135
110
 
136
-
137
- def demonstrate_bulk_metadata_filters(dataset: DatasetTable) -> None:
111
+ def demonstrate_bulk_metadata_filters(dataset: ls.Dataset) -> None:
138
112
  """Demonstrate filtering with bulk-added metadata."""
113
+ # TODO(Michal, 09/2025): Update with native metadata filtering instead of accessing
114
+ # `dataset._inner` when implemented.
115
+ dataset_table = dataset._inner # noqa: SLF001
116
+
139
117
  print("\n Bulk Metadata Filters:")
140
118
  print("=" * 50)
141
119
 
142
120
  # Filter by temperature
143
121
  print("\n1. Filter by temperature > 25:")
144
122
  filter_temp = SampleFilter(metadata_filters=[Metadata("temperature") > 25]) # noqa PLR2004
145
- samples = dataset.get_samples(filters=filter_temp)
123
+ samples = dataset_table.get_samples(filters=filter_temp)
146
124
  print(f" Found {len(samples)} samples with temperature > 25")
147
125
  for sample in samples[:3]: # Show first 3
148
126
  print(f" {sample.file_name}: {sample['temperature']}")
@@ -150,7 +128,7 @@ def demonstrate_bulk_metadata_filters(dataset: DatasetTable) -> None:
150
128
  # Filter by location
151
129
  print("\n2. Filter by location == 'city':")
152
130
  filter_location = SampleFilter(metadata_filters=[Metadata("location") == "city"])
153
- samples = dataset.get_samples(filters=filter_location)
131
+ samples = dataset_table.get_samples(filters=filter_location)
154
132
  print(f" Found {len(samples)} samples from cities")
155
133
  for sample in samples[:3]: # Show first 3
156
134
  print(f" {sample.file_name}: {sample['location']}")
@@ -158,7 +136,7 @@ def demonstrate_bulk_metadata_filters(dataset: DatasetTable) -> None:
158
136
  # Filter by GPS coordinates
159
137
  print("\n3. Filter by latitude > 0° (Northern hemisphere):")
160
138
  filter_lat = SampleFilter(metadata_filters=[Metadata("gps_coordinates.lat") > 0])
161
- samples = dataset.get_samples(filters=filter_lat)
139
+ samples = dataset_table.get_samples(filters=filter_lat)
162
140
  print(f" Found {len(samples)} samples in Northern hemisphere")
163
141
  for sample in samples[:3]: # Show first 3
164
142
  gps = sample["gps_coordinates"]
@@ -169,14 +147,18 @@ def demonstrate_bulk_metadata_filters(dataset: DatasetTable) -> None:
169
147
  filter_confidence = SampleFilter(
170
148
  metadata_filters=[Metadata("confidence") > 0.9] # noqa PLR2004
171
149
  )
172
- samples = dataset.get_samples(filters=filter_confidence)
150
+ samples = dataset_table.get_samples(filters=filter_confidence)
173
151
  print(f" Found {len(samples)} samples with confidence > 0.9")
174
152
  for sample in samples[:3]: # Show first 3
175
153
  print(f" 📸 {sample.file_name}: confidence={sample['confidence']:.3f}")
176
154
 
177
155
 
178
- def demonstrate_individual_metadata_filters(dataset: DatasetTable) -> None:
156
+ def demonstrate_individual_metadata_filters(dataset: ls.Dataset) -> None:
179
157
  """Demonstrate filtering with individually-added metadata."""
158
+ # TODO(Michal, 09/2025): Update with native metadata filtering instead of accessing
159
+ # `dataset._inner` when implemented.
160
+ dataset_table = dataset._inner # noqa: SLF001
161
+
180
162
  print("\n Individual Metadata Filters:")
181
163
  print("=" * 50)
182
164
 
@@ -185,7 +167,7 @@ def demonstrate_individual_metadata_filters(dataset: DatasetTable) -> None:
185
167
  filter_special = SampleFilter(
186
168
  metadata_filters=[Metadata("special_metadata") == "sample_1_special"]
187
169
  )
188
- samples = dataset.get_samples(filters=filter_special)
170
+ samples = dataset_table.get_samples(filters=filter_special)
189
171
  print(f" Found {len(samples)} samples with special metadata")
190
172
  for sample in samples:
191
173
  print(f" {sample.file_name}: {sample['special_metadata']}")
@@ -193,7 +175,7 @@ def demonstrate_individual_metadata_filters(dataset: DatasetTable) -> None:
193
175
  # Filter by priority
194
176
  print("\n2. Filter by high priority (> 7):")
195
177
  filter_priority = SampleFilter(metadata_filters=[Metadata("priority") > 7]) # noqa PLR2004
196
- samples = dataset.get_samples(filters=filter_priority)
178
+ samples = dataset_table.get_samples(filters=filter_priority)
197
179
  print(f" Found {len(samples)} samples with priority > 7")
198
180
  for sample in samples:
199
181
  print(f" {sample.file_name}: priority={sample['priority']}")
@@ -203,15 +185,19 @@ def demonstrate_individual_metadata_filters(dataset: DatasetTable) -> None:
203
185
  filter_custom_gps = SampleFilter(
204
186
  metadata_filters=[Metadata("custom_gps.lat") > 40.8] # noqa PLR2004
205
187
  )
206
- samples = dataset.get_samples(filters=filter_custom_gps)
188
+ samples = dataset_table.get_samples(filters=filter_custom_gps)
207
189
  print(f" Found {len(samples)} samples with custom GPS lat > 40.8")
208
190
  for sample in samples:
209
191
  gps = sample["custom_gps"]
210
192
  print(f" {sample.file_name}: lat={gps.lat:.4f}, lon={gps.lon:.4f}")
211
193
 
212
194
 
213
- def demonstrate_combined_filters(dataset: DatasetTable) -> None:
195
+ def demonstrate_combined_filters(dataset: ls.Dataset) -> None:
214
196
  """Demonstrate combining multiple filters."""
197
+ # TODO(Michal, 09/2025): Update with native metadata filtering instead of accessing
198
+ # `dataset._inner` when implemented.
199
+ dataset_table = dataset._inner # noqa: SLF001
200
+
215
201
  print("\n Combined Filters:")
216
202
  print("=" * 50)
217
203
 
@@ -224,7 +210,7 @@ def demonstrate_combined_filters(dataset: DatasetTable) -> None:
224
210
  Metadata("temperature") > 25, # noqa PLR2004
225
211
  ]
226
212
  )
227
- samples = dataset.get_samples(filters=filter_combined)
213
+ samples = dataset_table.get_samples(filters=filter_combined)
228
214
  print(f" Found {len(samples)} samples matching all criteria")
229
215
  for sample in samples[:3]:
230
216
  print(
@@ -241,64 +227,64 @@ def demonstrate_combined_filters(dataset: DatasetTable) -> None:
241
227
  Metadata("location") == "city",
242
228
  ]
243
229
  )
244
- samples = dataset.get_samples(filters=filter_gps_combined)
230
+ samples = dataset_table.get_samples(filters=filter_gps_combined)
245
231
  print(f" Found {len(samples)} samples in northern hemisphere cities with high confidence")
246
232
  for sample in samples[:3]:
247
233
  gps = sample["gps_coordinates"]
248
234
  print(f" {sample.file_name}: lat={gps.lat:.4f}, conf={sample['confidence']:.2f}")
249
235
 
250
236
 
251
- def demonstrate_dictionary_like_access(session: Session, sample_ids: list[UUID]) -> None:
237
+ def demonstrate_dictionary_like_access(samples: list[Sample]) -> None:
252
238
  """Demonstrate adding metadata using dictionary-like access."""
253
239
  print("\n Dictionary-like Metadata Access:")
254
240
  print("=" * 50)
255
241
 
256
242
  # Get the first few samples to demonstrate
257
- samples = sample_resolver.get_many_by_id(session, sample_ids[:2])
243
+ samples = samples[:2]
258
244
 
259
- print("\n1. Adding metadata using sample['key'] = value syntax:")
245
+ print("\n1. Adding metadata using sample.metadata['key'] = value syntax:")
260
246
 
261
247
  # Add different types of metadata to different samples
262
- samples[0]["temperature"] = 25
263
- samples[0]["location"] = "city"
264
- samples[0]["is_processed"] = True
265
- samples[0]["confidence"] = 0.95
248
+ samples[0].metadata["temperature"] = 25
249
+ samples[0].metadata["location"] = "city"
250
+ samples[0].metadata["is_processed"] = True
251
+ samples[0].metadata["confidence"] = 0.95
266
252
  print(
267
- f" {samples[0].file_name}: temp={samples[0]['temperature']}°C, "
268
- f"location={samples[0]['location']},"
269
- f" processed={samples[0]['is_processed']}"
253
+ f" {samples[0].file_name}: temp={samples[0].metadata['temperature']}°C, "
254
+ f"location={samples[0].metadata['location']},"
255
+ f" processed={samples[0].metadata['is_processed']}"
270
256
  )
271
257
 
272
- samples[1]["temperature"] = 15
273
- samples[1]["location"] = "mountain"
274
- samples[1]["gps_coordinates"] = GPSCoordinate(lat=40.7128, lon=-74.0060)
275
- samples[1]["tags"] = ["outdoor", "nature", "landscape"]
258
+ samples[1].metadata["temperature"] = 15
259
+ samples[1].metadata["location"] = "mountain"
260
+ samples[1].metadata["gps_coordinates"] = GPSCoordinate(lat=40.7128, lon=-74.0060)
261
+ samples[1].metadata["tags"] = ["outdoor", "nature", "landscape"]
276
262
  print(
277
- f" {samples[1].file_name}: temp={samples[1]['temperature']}°C, "
278
- f"location={samples[1]['location']}, tags={samples[1]['tags']}"
263
+ f" {samples[1].file_name}: temp={samples[1].metadata['temperature']}°C, "
264
+ f"location={samples[1].metadata['location']}, tags={samples[1].metadata['tags']}"
279
265
  )
280
266
 
281
267
  # Demonstrate reading metadata
282
- print("\n2. Reading metadata using sample['key'] syntax:")
268
+ print("\n2. Reading metadata using sample.metadata['key'] syntax:")
283
269
  for sample in samples:
284
270
  print(f" {sample.file_name}:")
285
- print(f" Temperature: {sample['temperature']}°C")
286
- print(f" Location: {sample['location']}")
287
- gps = sample["gps_coordinates"]
271
+ print(f" Temperature: {sample.metadata['temperature']}°C")
272
+ print(f" Location: {sample.metadata['location']}")
273
+ gps = sample.metadata["gps_coordinates"]
288
274
  print(f" GPS: lat={gps.lat:.4f}, lon={gps.lon:.4f}")
289
- print(f" Tags: {sample['tags']}")
275
+ print(f" Tags: {sample.metadata['tags']}")
290
276
 
291
277
  # Demonstrate None return for missing keys
292
- print(" Note: sample['key'] returns None for missing keys")
293
- missing_value = samples[0]["nonexistent_key"]
278
+ print(" Note: sample.metadata['key'] returns None for missing keys")
279
+ missing_value = samples[0].metadata["nonexistent_key"]
294
280
  if missing_value is None:
295
- print(f" sample['nonexistent_key']: {missing_value}")
281
+ print(f" sample.metadata['nonexistent_key']: {missing_value}")
296
282
 
297
283
  print(f"✅ Added metadata to {len(samples)} samples using dictionary-like access")
298
284
 
299
285
  # Demonstrate schema presentation
300
286
  try:
301
- samples[0]["temperature"] = "string_value" # Invalid type for demonstration
287
+ samples[0].metadata["temperature"] = "string_value" # Invalid type for demonstration
302
288
  print(f" ❌ This should not print: {missing_value}")
303
289
  except ValueError:
304
290
  print(" ✅ Correctly raised ValueError for invalid type")
@@ -308,22 +294,21 @@ def main() -> None:
308
294
  """Main function to demonstrate metadata functionality."""
309
295
  try:
310
296
  # Load existing dataset
311
- dataset, sample_ids, loader = load_existing_dataset()
297
+ dataset, samples = load_existing_dataset()
312
298
 
313
- with db_manager.session() as session:
314
- # Add bulk metadata
315
- add_bulk_metadata(session, sample_ids)
299
+ # Add bulk metadata
300
+ add_bulk_metadata(db_manager.persistent_session(), [s.sample_id for s in samples])
316
301
 
317
- # Add individual metadata
318
- add_individual_metadata(session, sample_ids)
302
+ # Add individual metadata
303
+ add_individual_metadata(samples)
319
304
 
320
- # Demonstrate different types of filtering
321
- demonstrate_bulk_metadata_filters(dataset)
322
- demonstrate_individual_metadata_filters(dataset)
323
- demonstrate_combined_filters(dataset)
324
- demonstrate_dictionary_like_access(session, sample_ids)
305
+ # Demonstrate different types of filtering
306
+ demonstrate_bulk_metadata_filters(dataset)
307
+ demonstrate_individual_metadata_filters(dataset)
308
+ demonstrate_combined_filters(dataset)
309
+ demonstrate_dictionary_like_access(samples)
325
310
 
326
- loader.start_gui()
311
+ ls.start_gui()
327
312
 
328
313
  except ValueError as e:
329
314
  print(f"❌ Error: {e}")
@@ -4,8 +4,7 @@ from pathlib import Path
4
4
 
5
5
  from environs import Env
6
6
 
7
- from lightly_studio import DatasetLoader
8
- from lightly_studio.selection.select import Selection
7
+ import lightly_studio as ls
9
8
 
10
9
  # Read environment variables
11
10
  env = Env()
@@ -15,25 +14,14 @@ env.read_env()
15
14
  dataset_path = Path(env.path("DATASET_PATH", "/path/to/your/dataset"))
16
15
  dataset_path = dataset_path.parent if dataset_path.is_file() else dataset_path
17
16
 
18
- # Create a DatasetLoader from a path
19
- loader = DatasetLoader()
20
- dataset = loader.from_directory(
21
- dataset_name="clothing_small_test",
22
- img_dir=str(dataset_path),
23
- )
24
-
25
- # Create the selection interface
26
- # TODO(Malte, 08/2025): Replace this with using a DatasetView.
27
- # See the Select class for more details on the TODO.
28
- select = Selection(
29
- dataset_id=dataset.dataset_id,
30
- session=loader.session,
31
- )
17
+ # Create a Dataset from a path
18
+ dataset = ls.Dataset.create()
19
+ dataset.add_samples_from_path(path=str(dataset_path))
32
20
 
33
- # Select a diverse subset of 10 samples.
34
- select.diverse(
21
+ # Run selection via the dataset query
22
+ dataset.query().selection().diverse(
35
23
  n_samples_to_select=10,
36
24
  selection_result_tag_name="diverse_selection",
37
25
  )
38
26
 
39
- loader.start_gui()
27
+ ls.start_gui()
@@ -4,25 +4,21 @@ import math
4
4
 
5
5
  from environs import Env
6
6
 
7
- from lightly_studio import DatasetLoader
8
- from lightly_studio.models.tag import TagCreate
9
- from lightly_studio.resolvers import (
10
- tag_resolver,
11
- )
7
+ import lightly_studio as ls
12
8
 
13
9
  # Read environment variables
14
10
  env = Env()
15
11
  env.read_env()
16
12
 
17
- # Create a DatasetLoader instance
18
- loader = DatasetLoader()
13
+ # Create a Dataset instance
14
+ dataset = ls.Dataset.create()
19
15
 
20
16
  # Define the path to the dataset (folder containing data.yaml)
21
17
  dataset_path = env.path("DATASET_PATH", "/path/to/your/yolo/dataset/data.yaml")
22
18
 
23
19
  # Load YOLO dataset using data.yaml path
24
- dataset = loader.from_yolo(
25
- str(dataset_path),
20
+ dataset.add_samples_from_yolo(
21
+ data_yaml=str(dataset_path),
26
22
  input_split=env.str("LIGHTLY_STUDIO_DATASET_SPLIT", "test"),
27
23
  )
28
24
 
@@ -32,36 +28,16 @@ dataset = loader.from_yolo(
32
28
  # to work on.
33
29
  reviewers = env.str("DATASET_REVIEWERS", "Alice, Bob, Charlie, David")
34
30
 
35
- # Get all samples from the db
36
- samples = dataset.get_samples()
37
-
38
31
  # Create a tag for each reviewer to work on
39
- tags = []
40
- for reviewer in reviewers.split(","):
41
- tags.append(
42
- tag_resolver.create(
43
- session=loader.session,
44
- tag=TagCreate(
45
- dataset_id=dataset.dataset_id,
46
- name=f"""{reviewer.strip()} tasks""",
47
- kind="sample",
48
- ),
49
- )
50
- )
32
+ tags = [reviewer.strip() for reviewer in reviewers.split(",")]
33
+
34
+ # Get all samples from the db
35
+ samples = dataset.query().to_list()
51
36
 
52
37
  # Chunk the samples into portions equally divided among the reviewers.
53
38
  chunk_size = math.ceil(len(samples) / len(tags))
54
- for i, tag in enumerate(tags):
55
- # allocate all samples for this tag
56
- sample_ids = [sample.sample_id for sample in samples[i * chunk_size : (i + 1) * chunk_size]]
57
-
58
- # Add sample_ids to the tag
59
- tag_resolver.add_sample_ids_to_tag_id(
60
- session=loader.session,
61
- tag_id=tag.tag_id,
62
- sample_ids=sample_ids,
63
- )
64
-
39
+ for i, sample in enumerate(samples):
40
+ sample.add_tag(tags[i // chunk_size])
65
41
 
66
42
  # Launch the server to load data
67
- loader.start_gui()
43
+ ls.start_gui()
@@ -1,4 +1,4 @@
1
- """Example of how to load samples from path with the dataset class."""
1
+ """Example of how to add samples in yolo format to a dataset."""
2
2
 
3
3
  from pathlib import Path
4
4
 
@@ -12,10 +12,9 @@ env.read_env()
12
12
 
13
13
  # Define the path to the dataset directory
14
14
  dataset_path = Path(env.path("DATASET_PATH", "/path/to/your/dataset"))
15
- dataset_path = dataset_path.parent if dataset_path.is_file() else dataset_path
16
15
 
17
16
  # Create a DatasetLoader from a path
18
- dataset = ls.Dataset("clothing_small_test")
19
- dataset.add_samples_from_path(path=dataset_path)
17
+ dataset = ls.Dataset.create()
18
+ dataset.add_samples_from_yolo(data_yaml=dataset_path, input_split="train")
20
19
 
21
20
  ls.start_gui()
@@ -4,7 +4,7 @@ from datetime import datetime, timezone
4
4
  from typing import TYPE_CHECKING, List, Optional
5
5
  from uuid import UUID, uuid4
6
6
 
7
- from pydantic import BaseModel
7
+ from pydantic import BaseModel, ConfigDict
8
8
  from pydantic import Field as PydanticField
9
9
  from sqlalchemy.orm import Mapped
10
10
  from sqlmodel import Field, Relationship, SQLModel
@@ -107,10 +107,10 @@ class AnnotationCreate(SQLModel):
107
107
  sample_id: UUID
108
108
 
109
109
  """ Optional properties for object detection. """
110
- x: Optional[float] = None
111
- y: Optional[float] = None
112
- width: Optional[float] = None
113
- height: Optional[float] = None
110
+ x: Optional[int] = None
111
+ y: Optional[int] = None
112
+ width: Optional[int] = None
113
+ height: Optional[int] = None
114
114
 
115
115
  """ Optional properties for instance and semantic segmentation. """
116
116
  segmentation_mask: Optional[List[int]] = None
@@ -157,13 +157,12 @@ class AnnotationWithSampleView(AnnotationView):
157
157
  class AnnotationViewsWithCount(BaseModel):
158
158
  """Response model for counted annotations."""
159
159
 
160
+ model_config = ConfigDict(populate_by_name=True)
161
+
160
162
  annotations: List[AnnotationWithSampleView] = PydanticField(..., alias="data")
161
163
  total_count: int
162
164
  next_cursor: Optional[int] = PydanticField(..., alias="nextCursor")
163
165
 
164
- class Config: # noqa: D106
165
- populate_by_name = True
166
-
167
166
 
168
167
  class AnnotationDetailsView(AnnotationView):
169
168
  """Representing detailed view of an annotation."""
@@ -34,10 +34,10 @@ class InstanceSegmentationAnnotationTable(SQLModel, table=True):
34
34
  back_populates="instance_segmentation_details"
35
35
  )
36
36
 
37
- x: float
38
- y: float
39
- width: float
40
- height: float
37
+ x: int
38
+ y: int
39
+ width: int
40
+ height: int
41
41
  # TODO(Kondrat 06/2025): We need to fix logic in the loader,
42
42
  # because it shouldn't be optional.
43
43
  # lightly_studio/dataset/loader.py#L148
@@ -49,8 +49,8 @@ class InstanceSegmentationAnnotationTable(SQLModel, table=True):
49
49
  class InstanceSegmentationAnnotationView(SQLModel):
50
50
  """API response model for instance segmentation annotations."""
51
51
 
52
- x: float
53
- y: float
54
- width: float
55
- height: float
52
+ x: int
53
+ y: int
54
+ width: int
55
+ height: int
56
56
  segmentation_mask: Optional[List[int]] = None
@@ -32,10 +32,10 @@ class ObjectDetectionAnnotationTable(SQLModel, table=True):
32
32
  back_populates="object_detection_details"
33
33
  )
34
34
 
35
- x: float
36
- y: float
37
- width: float
38
- height: float
35
+ x: int
36
+ y: int
37
+ width: int
38
+ height: int
39
39
 
40
40
 
41
41
  class ObjectDetectionAnnotationView(SQLModel):
@@ -10,6 +10,7 @@ from uuid import UUID, uuid4
10
10
  from sqlalchemy.orm import Session as SQLAlchemySession
11
11
  from sqlmodel import Field, Session, SQLModel
12
12
 
13
+ from lightly_studio.api.routes.api.validators import Paginated
13
14
  from lightly_studio.models.sample import SampleTable
14
15
  from lightly_studio.resolvers import sample_resolver
15
16
  from lightly_studio.resolvers.samples_filter import SampleFilter
@@ -73,11 +74,14 @@ class DatasetTable(DatasetBase, table=True):
73
74
  if session is None:
74
75
  raise RuntimeError("No database session found for this instance")
75
76
 
77
+ pagination = None
78
+ if limit is not None:
79
+ pagination = Paginated(offset=offset, limit=limit)
80
+
76
81
  return sample_resolver.get_all_by_dataset_id(
77
82
  session=session,
78
83
  dataset_id=self.dataset_id,
79
- offset=offset,
80
- limit=limit,
84
+ pagination=pagination,
81
85
  filters=filters,
82
86
  text_embedding=text_embedding,
83
87
  sample_ids=sample_ids,
@@ -4,6 +4,8 @@ from datetime import datetime, timezone
4
4
  from typing import TYPE_CHECKING, Any, List, Literal, Optional
5
5
  from uuid import UUID, uuid4
6
6
 
7
+ from pydantic import BaseModel, ConfigDict
8
+ from pydantic import Field as PydanticField
7
9
  from sqlalchemy.orm import Mapped, Session
8
10
  from sqlmodel import Field, Relationship, SQLModel
9
11
 
@@ -44,7 +46,7 @@ class SampleBase(SQLModel):
44
46
  dataset_id: UUID = Field(default=None, foreign_key="datasets.dataset_id")
45
47
 
46
48
  """The dataset image path."""
47
- file_path_abs: str
49
+ file_path_abs: str = Field(default=None, unique=True)
48
50
 
49
51
 
50
52
  class SampleCreate(SampleBase):
@@ -97,6 +99,7 @@ class SampleTable(SampleBase, table=True):
97
99
  embeddings: Mapped[List["SampleEmbeddingTable"]] = Relationship(back_populates="sample")
98
100
  metadata_dict: "SampleMetadataTable" = Relationship(back_populates="sample")
99
101
 
102
+ # TODO(Michal, 9/2025): Remove this function in favour of Sample.metadata.
100
103
  def __getitem__(self, key: str) -> Any:
101
104
  """Provides dict-like access to sample metadata.
102
105
 
@@ -111,6 +114,7 @@ class SampleTable(SampleBase, table=True):
111
114
  return None
112
115
  return self.metadata_dict.get_value(key)
113
116
 
117
+ # TODO(Michal, 9/2025): Remove this function in favour of Sample.metadata.
114
118
  def __setitem__(self, key: str, value: Any) -> None:
115
119
  """Sets a metadata key-value pair for this sample.
116
120
 
@@ -173,8 +177,11 @@ class SampleView(SQLModel):
173
177
  height: int
174
178
 
175
179
 
176
- class SampleViewsWithCount(SQLModel):
180
+ class SampleViewsWithCount(BaseModel):
177
181
  """Response model for counted samples."""
178
182
 
179
- data: List[SampleView]
183
+ model_config = ConfigDict(populate_by_name=True)
184
+
185
+ samples: List[SampleView] = PydanticField(..., alias="data")
180
186
  total_count: int
187
+ next_cursor: Optional[int] = PydanticField(None, alias="nextCursor")
@@ -67,6 +67,16 @@ def get_by_id(session: Session, dataset_id: UUID) -> DatasetTable | None:
67
67
  ).one_or_none()
68
68
 
69
69
 
70
+ def get_by_name(session: Session, name: str) -> DatasetTable | None:
71
+ """Retrieve a single dataset by name."""
72
+ datasets = session.exec(select(DatasetTable).where(DatasetTable.name == name)).all()
73
+ if len(datasets) == 0:
74
+ return None
75
+ if len(datasets) > 1:
76
+ raise ValueError(f"Cannot retrieve a dataset, found multiple with name '{name}'.")
77
+ return datasets[0]
78
+
79
+
70
80
  def update(session: Session, dataset_id: UUID, dataset_data: DatasetCreate) -> DatasetTable:
71
81
  """Update an existing dataset."""
72
82
  dataset = get_by_id(session=session, dataset_id=dataset_id)