PyPI - lightly-studio - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

lightly-studio 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lightly-studio might be problematic. Click here for more details.

Files changed (169) hide show

lightly_studio/resolvers/annotation_label_resolver/get_all.py CHANGED Viewed

@@ -20,3 +20,18 @@ def get_all(session: Session) -> list[AnnotationLabelTable]:
         select(AnnotationLabelTable).order_by(col(AnnotationLabelTable.created_at).asc())
     ).all()
     return list(labels) if labels else []
+def get_all_sorted_alphabetically(session: Session) -> list[AnnotationLabelTable]:
+    """Retrieve all annotation labels sorted alphabetically.
+    Args:
+        session (Session): The database session.
+    Returns:
+        list[AnnotationLabelTable]: A list of annotation labels.
+    """
+    labels = session.exec(
+        select(AnnotationLabelTable).order_by(col(AnnotationLabelTable.annotation_label_name).asc())
+    ).all()
+    return list(labels) if labels else []

lightly_studio/resolvers/annotation_resolver/__init__.py CHANGED Viewed

@@ -3,7 +3,6 @@
 from lightly_studio.resolvers.annotation_resolver.count_annotations_by_dataset import (
     count_annotations_by_dataset,
 )
-from lightly_studio.resolvers.annotation_resolver.create import create
 from lightly_studio.resolvers.annotation_resolver.create_many import create_many
 from lightly_studio.resolvers.annotation_resolver.delete_annotation import (
     delete_annotation,
@@ -12,7 +11,7 @@ from lightly_studio.resolvers.annotation_resolver.delete_annotations import (
     delete_annotations,
 )
 from lightly_studio.resolvers.annotation_resolver.get_all import get_all
-from lightly_studio.resolvers.annotation_resolver.get_by_id import get_by_id
+from lightly_studio.resolvers.annotation_resolver.get_by_id import get_by_id, get_by_ids
 from lightly_studio.resolvers.annotation_resolver.update_annotation_label import (
     update_annotation_label,
 )
@@ -22,12 +21,12 @@ from lightly_studio.resolvers.annotation_resolver.update_bounding_box import (
 __all__ = [
     "count_annotations_by_dataset",
-    "create",
     "create_many",
     "delete_annotation",
     "delete_annotations",
     "get_all",
     "get_by_id",
+    "get_by_ids",
     "update_annotation_label",
     "update_bounding_box",
 ]

lightly_studio/resolvers/annotation_resolver/create_many.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 from collections.abc import Sequence
+from uuid import UUID
 from sqlmodel import Session
@@ -24,7 +25,7 @@ from lightly_studio.models.annotation.semantic_segmentation import (
 def create_many(
     session: Session,
     annotations: list[AnnotationCreate],
-) -> Sequence[AnnotationBaseTable]:
+) -> Sequence[UUID]:
     """Create many annotations with object detection details in bulk."""
     # Step 1: Create all base annotations
     base_annotations = []
@@ -37,7 +38,6 @@ def create_many(
         db_base_annotation = AnnotationBaseTable(
             annotation_label_id=annotation_create.annotation_label_id,
             annotation_type=annotation_create.annotation_type,
-            annotation_task_id=annotation_create.annotation_task_id,
             confidence=annotation_create.confidence,
             dataset_id=annotation_create.dataset_id,
             sample_id=annotation_create.sample_id,
@@ -93,4 +93,4 @@ def create_many(
     # Commit everything
     session.commit()
-    return base_annotations
+    return [annotation.annotation_id for annotation in base_annotations]

lightly_studio/resolvers/annotation_resolver/delete_annotation.py CHANGED Viewed

@@ -26,7 +26,7 @@ def delete_annotation(
         annotation_id=annotation_id,
     )
     if not annotation:
-        return
+        raise ValueError(f"Annotation {annotation_id} not found")
     if annotation.object_detection_details:
         session.delete(annotation.object_detection_details)
     if annotation.instance_segmentation_details:

lightly_studio/resolvers/annotation_resolver/delete_annotations.py CHANGED Viewed

@@ -18,14 +18,12 @@ from lightly_studio.resolvers.annotations.annotations_filter import (
 def delete_annotations(
     session: Session,
-    annotation_task_ids: list[UUID] | None,
     annotation_label_ids: list[UUID] | None,
 ) -> None:
     """Delete all annotations and their tag links using filters.
     Args:
         session: Database session.
-        annotation_task_ids: List of annotation task IDs to filter by.
         annotation_label_ids: List of annotation label IDs to filter by.
     """
     # Find annotation_ids to delete
@@ -33,9 +31,15 @@ def delete_annotations(
         session,
         filters=AnnotationsFilter(
             annotation_label_ids=annotation_label_ids,
-            annotation_task_ids=annotation_task_ids,
         ),
     ).annotations
+    for annotation in annotations:
+        if annotation.object_detection_details:
+            session.delete(annotation.object_detection_details)
+        if annotation.instance_segmentation_details:
+            session.delete(annotation.instance_segmentation_details)
+        if annotation.semantic_segmentation_details:
+            session.delete(annotation.semantic_segmentation_details)
     annotation_ids = [annotation.annotation_id for annotation in annotations]
     # TODO(Horatiu, 06/2025): Check if there is a way to delete the links
     # automatically using SQLModel/SQLAlchemy.

lightly_studio/resolvers/annotation_resolver/get_by_id.py CHANGED Viewed

@@ -2,9 +2,10 @@
 from __future__ import annotations
+from collections.abc import Sequence
 from uuid import UUID
-from sqlmodel import Session, select
+from sqlmodel import Session, col, select
 from lightly_studio.models.annotation.annotation_base import (
     AnnotationBaseTable,
@@ -16,3 +17,20 @@ def get_by_id(session: Session, annotation_id: UUID) -> AnnotationBaseTable | No
     return session.exec(
         select(AnnotationBaseTable).where(AnnotationBaseTable.annotation_id == annotation_id)
     ).one_or_none()
+def get_by_ids(session: Session, annotation_ids: Sequence[UUID]) -> Sequence[AnnotationBaseTable]:
+    """Retrieve multiple annotations by their IDs.
+    Args:
+        session: The database session to use for the query.
+        annotation_ids: A list of annotation IDs to retrieve.
+    Returns:
+        A list of annotations matching the provided IDs.
+    """
+    return session.exec(
+        select(AnnotationBaseTable).where(
+            col(AnnotationBaseTable.annotation_id).in_(annotation_ids)
+        )
+    ).all()

lightly_studio/resolvers/annotation_resolver/update_annotation_label.py CHANGED Viewed

@@ -111,7 +111,6 @@ def update_annotation_label(
             annotation_id=annotation_copy.annotation_id,
             annotation_label_id=annotation_copy.annotation_label_id,
             annotation_type=annotation_copy.annotation_type,
-            annotation_task_id=annotation_copy.annotation_task_id,
             confidence=annotation_copy.confidence,
             created_at=annotation_copy.created_at,
             dataset_id=annotation_copy.dataset_id,

lightly_studio/resolvers/annotations/annotations_filter.py CHANGED Viewed

@@ -7,8 +7,7 @@ from uuid import UUID
 from pydantic import BaseModel, Field
 from sqlmodel import col
-from lightly_studio.models.annotation.annotation_base import AnnotationBaseTable
-from lightly_studio.models.annotation_task import AnnotationType
+from lightly_studio.models.annotation.annotation_base import AnnotationBaseTable, AnnotationType
 from lightly_studio.models.sample import SampleTable
 from lightly_studio.models.tag import TagTable
 from lightly_studio.type_definitions import QueryType
@@ -30,9 +29,6 @@ class AnnotationsFilter(BaseModel):
         default=None,
         description="List of sample tag UUIDs to filter annotations by",
     )
-    annotation_task_ids: list[UUID] | None = Field(
-        default=None, description="List of annotation task UUIDs"
-    )
     def apply(
         self,
@@ -51,12 +47,6 @@ class AnnotationsFilter(BaseModel):
         if self.dataset_ids:
             query = query.where(col(AnnotationBaseTable.dataset_id).in_(self.dataset_ids))
-        # Filter by annotation task
-        if self.annotation_task_ids:
-            query = query.where(
-                col(AnnotationBaseTable.annotation_task_id).in_(self.annotation_task_ids)
-            )
         # Filter by annotation label
         if self.annotation_label_ids:
             query = query.where(

lightly_studio/resolvers/dataset_resolver.py CHANGED Viewed

@@ -67,6 +67,16 @@ def get_by_id(session: Session, dataset_id: UUID) -> DatasetTable | None:
     ).one_or_none()
+def get_by_name(session: Session, name: str) -> DatasetTable | None:
+    """Retrieve a single dataset by name."""
+    datasets = session.exec(select(DatasetTable).where(DatasetTable.name == name)).all()
+    if len(datasets) == 0:
+        return None
+    if len(datasets) > 1:
+        raise ValueError(f"Cannot retrieve a dataset, found multiple with name '{name}'.")
+    return datasets[0]
 def update(session: Session, dataset_id: UUID, dataset_data: DatasetCreate) -> DatasetTable:
     """Update an existing dataset."""
     dataset = get_by_id(session=session, dataset_id=dataset_id)

lightly_studio/resolvers/embedding_model_resolver.py CHANGED Viewed

@@ -21,6 +21,28 @@ def create(session: Session, embedding_model: EmbeddingModelCreate) -> Embedding
     return db_embedding_model
+def get_or_create(session: Session, embedding_model: EmbeddingModelCreate) -> EmbeddingModelTable:
+    """Retrieve an existing EmbeddingModel by hash or create a new one if it does not exist."""
+    db_model = get_by_model_hash(
+        session=session, embedding_model_hash=embedding_model.embedding_model_hash
+    )
+    if db_model is None:
+        return create(session=session, embedding_model=embedding_model)
+    # Validate that the existing model matches the provided data.
+    if (
+        db_model.name != embedding_model.name
+        or db_model.parameter_count_in_mb != embedding_model.parameter_count_in_mb
+        or db_model.embedding_dimension != embedding_model.embedding_dimension
+        # TODO(Michal, 09/2025): Allow same model for different datasets.
+        or db_model.dataset_id != embedding_model.dataset_id
+    ):
+        raise ValueError(
+            "An embedding model with the same hash but different parameters already exists."
+        )
+    return db_model
 def get_all_by_dataset_id(session: Session, dataset_id: UUID) -> list[EmbeddingModelTable]:
     """Retrieve all embedding models."""
     embedding_models = session.exec(

lightly_studio/resolvers/sample_resolver.py CHANGED Viewed

@@ -7,9 +7,11 @@ from datetime import datetime, timezone
 from uuid import UUID
 from pydantic import BaseModel
+from sqlalchemy.orm import joinedload, selectinload
 from sqlmodel import Session, col, func, select
 from sqlmodel.sql.expression import Select
+from lightly_studio.api.routes.api.validators import Paginated
 from lightly_studio.models.annotation.annotation_base import AnnotationBaseTable
 from lightly_studio.models.annotation_label import AnnotationLabelTable
 from lightly_studio.models.embedding_model import EmbeddingModelTable
@@ -36,6 +38,22 @@ def create_many(session: Session, samples: list[SampleCreate]) -> list[SampleTab
     return db_samples
+def filter_new_paths(session: Session, file_paths_abs: list[str]) -> tuple[list[str], list[str]]:
+    """Return a) file_path_abs that do not already exist in the database and b) those that do."""
+    existing_file_paths_abs = set(
+        session.exec(
+            select(col(SampleTable.file_path_abs)).where(
+                col(SampleTable.file_path_abs).in_(file_paths_abs)
+            )
+        ).all()
+    )
+    file_paths_abs_set = set(file_paths_abs)
+    return (
+        list(file_paths_abs_set - existing_file_paths_abs),  # paths that are not in the DB
+        list(file_paths_abs_set & existing_file_paths_abs),  # paths that are already in the DB
+    )
 def get_by_id(session: Session, dataset_id: UUID, sample_id: UUID) -> SampleTable | None:
     """Retrieve a single sample by ID."""
     return session.exec(
@@ -45,6 +63,13 @@ def get_by_id(session: Session, dataset_id: UUID, sample_id: UUID) -> SampleTabl
     ).one_or_none()
+def count_by_dataset_id(session: Session, dataset_id: UUID) -> int:
+    """Count the number of samples in a dataset."""
+    return session.exec(
+        select(func.count()).select_from(SampleTable).where(SampleTable.dataset_id == dataset_id)
+    ).one()
 def get_many_by_id(session: Session, sample_ids: list[UUID]) -> list[SampleTable]:
     """Retrieve multiple samples by their IDs.
@@ -63,19 +88,33 @@ class GetAllSamplesByDatasetIdResult(BaseModel):
     samples: Sequence[SampleTable]
     total_count: int
+    next_cursor: int | None = None
 def get_all_by_dataset_id(  # noqa: PLR0913
     session: Session,
     dataset_id: UUID,
-    offset: int = 0,
-    limit: int | None = None,
+    pagination: Paginated | None = None,
     filters: SampleFilter | None = None,
     text_embedding: list[float] | None = None,
     sample_ids: list[UUID] | None = None,
 ) -> GetAllSamplesByDatasetIdResult:
     """Retrieve samples for a specific dataset with optional filtering."""
-    samples_query = select(SampleTable).where(SampleTable.dataset_id == dataset_id)
+    samples_query = (
+        select(SampleTable)
+        .options(
+            selectinload(SampleTable.annotations).options(
+                joinedload(AnnotationBaseTable.annotation_label),
+                joinedload(AnnotationBaseTable.object_detection_details),
+                joinedload(AnnotationBaseTable.instance_segmentation_details),
+                joinedload(AnnotationBaseTable.semantic_segmentation_details),
+            ),
+            selectinload(SampleTable.tags),
+            # Ignore type checker error below as it's a false positive caused by TYPE_CHECKING.
+            joinedload(SampleTable.metadata_dict),  # type: ignore[arg-type]
+        )
+        .where(SampleTable.dataset_id == dataset_id)
+    )
     total_count_query = (
         select(func.count()).select_from(SampleTable).where(SampleTable.dataset_id == dataset_id)
     )
@@ -120,15 +159,20 @@ def get_all_by_dataset_id(  # noqa: PLR0913
             col(SampleTable.created_at).asc(), col(SampleTable.sample_id).asc()
         )
-    # paginate query when offset or limit are set/positive
-    if offset > 0:
-        samples_query = samples_query.offset(offset)
-    if limit is not None:
-        samples_query = samples_query.limit(limit)
+    # Apply pagination if provided
+    if pagination is not None:
+        samples_query = samples_query.offset(pagination.offset).limit(pagination.limit)
+    total_count = session.exec(total_count_query).one()
+    next_cursor = None
+    if pagination and pagination.offset + pagination.limit < total_count:
+        next_cursor = pagination.offset + pagination.limit
     return GetAllSamplesByDatasetIdResult(
         samples=session.exec(samples_query).all(),
-        total_count=session.exec(total_count_query).one(),
+        total_count=total_count,
+        next_cursor=next_cursor,
     )

lightly_studio/resolvers/tag_resolver.py CHANGED Viewed

@@ -274,3 +274,26 @@ def remove_annotation_ids_from_tag_id(
     session.commit()
     session.refresh(tag)
     return tag
+def get_or_create_sample_tag_by_name(
+    session: Session,
+    dataset_id: UUID,
+    tag_name: str,
+) -> TagTable:
+    """Get an existing sample tag by name or create a new one if it doesn't exist.
+    Args:
+        session: Database session for executing queries.
+        dataset_id: The dataset ID to search/create the tag for.
+        tag_name: Name of the tag to get or create.
+    Returns:
+        The existing or newly created sample tag.
+    """
+    existing_tag = get_by_name(session=session, tag_name=tag_name, dataset_id=dataset_id)
+    if existing_tag:
+        return existing_tag
+    new_tag = TagCreate(name=tag_name, dataset_id=dataset_id, kind="sample")
+    return create(session=session, tag=new_tag)

lightly_studio/selection/mundig.py CHANGED Viewed

@@ -10,29 +10,26 @@ from typing import Iterable
 # Or remove the type ignore once typing stubs were added manually.
 import lightly_mundig  # type: ignore[import-untyped]
 import numpy as np
-from environs import Env
+from lightly_studio.dataset.env import LIGHTLY_STUDIO_LICENSE_KEY
 class Mundig:
-    """Python wrapper for the Mundig selection algorithm.
+    """Python interface for the Mundig selection algorithm.
     This class provides a Python interface to the lightly_mundig Rust library
-    for sample selection.
+    for sample selection. It allows combining different selection strategies
+    such as diversity and weighting.
     """
     def __init__(self) -> None:
         """Initialize the Mundig selection interface."""
-        # Read LIGHTLY_STUDIO_LICENSE_KEY with .env file support
-        env = Env()
-        env.read_env()
-        license_key = env.str("LIGHTLY_STUDIO_LICENSE_KEY", default=None)
-        if license_key is None:
+        if LIGHTLY_STUDIO_LICENSE_KEY is None:
             raise ValueError(
                 "LIGHTLY_STUDIO_LICENSE_KEY environment variable is not set. "
                 "Please set it to your LightlyStudio license key."
             )
-        self.mundig = lightly_mundig.Selection(token=license_key)
+        self.mundig = lightly_mundig.Selection(token=LIGHTLY_STUDIO_LICENSE_KEY)
         self.n_input_samples: int | None = None

lightly_studio/selection/select.py CHANGED Viewed

@@ -1,96 +1,105 @@
-"""Provides the user python interface to selection."""
+"""Provides the user python interface to selection bound to sample ids."""
 from __future__ import annotations
+from collections.abc import Iterable
+from typing import Final
 from uuid import UUID
 from sqlmodel import Session
-from lightly_studio.resolvers.samples_filter import SampleFilter
 from lightly_studio.selection.select_via_db import select_via_database
 from lightly_studio.selection.selection_config import (
     EmbeddingDiversityStrategy,
+    MetadataWeightingStrategy,
     SelectionConfig,
     SelectionStrategy,
 )
 class Selection:
-    """User selection interface for the dataset."""
+    """Selection interface for candidate sample ids."""
-    # TODO(Malte, 08/2025): Create this class within the DatasetView.
-    # Then the arguments can be passed directly from the DatasetView.
-    # Example:
-    # class DatasetView:
-    #     def __init__(self, dataset_id: UUID, session: Session):
-    #         self.select = Select(dataset_id, session)
-    # User interface:
-    # dataset_view = ...
-    # dataset_view.select.diverse(...)
-    #
-    # See https://docs.google.com/document/d/1ZRICdFmfJmxUBy3FFoeUWsAgsCNWDHg8CK5MJiGmX74/edit?tab=t.kbfvnrepsuf#bookmark=id.8klhhwr5q4dp
-    def __init__(self, dataset_id: UUID, session: Session):
-        """Creates the interface to run selection.
+    def __init__(
+        self,
+        dataset_id: UUID,
+        session: Session,
+        input_sample_ids: Iterable[UUID],
+    ) -> None:
+        """Create the selection interface.
         Args:
-            dataset_id: The ID of the dataset to select from.
-            session: The database session to use for selection.
+            dataset_id: Dataset in which the selection is performed.
+            session: Database session to resolve selection dependencies.
+            input_sample_ids: Candidate sample ids considered for selection.
+                The iterable is consumed immediately to capture a stable snapshot.
+        """
+        self._dataset_id: Final[UUID] = dataset_id
+        self._session: Final[Session] = session
+        self._input_sample_ids: list[UUID] = list(input_sample_ids)
+    def metadata_weighting(
+        self,
+        n_samples_to_select: int,
+        selection_result_tag_name: str,
+        metadata_key: str,
+    ) -> None:
+        """Select a subset based on numeric metadata weights.
+        Args:
+            n_samples_to_select: Number of samples to select.
+            selection_result_tag_name: Tag name for the selection result.
+            metadata_key: Metadata key used as weights (float or int values).
         """
-        self.dataset_id = dataset_id
-        self.session = session
+        strategy = MetadataWeightingStrategy(metadata_key=metadata_key)
+        self.multi_strategies(
+            n_samples_to_select=n_samples_to_select,
+            selection_result_tag_name=selection_result_tag_name,
+            selection_strategies=[strategy],
+        )
     def diverse(
         self,
         n_samples_to_select: int,
         selection_result_tag_name: str,
         embedding_model_name: str | None = None,
-        sample_filter: SampleFilter | None = None,
     ) -> None:
-        """Selects a diverse subset of the dataset.
+        """Select a diverse subset using embeddings.
         Args:
-            n_samples_to_select: The number of samples to select.
-            selection_result_tag_name: The tag name to use for the selection result.
-            embedding_model_name:
-                The name of the embedding model to use.
-                If None, assert that there is only one embedding model and uses it.
-            sample_filter: An optional filter to apply to the samples.
+            n_samples_to_select: Number of samples to select.
+            selection_result_tag_name: Tag name for the selection result.
+            embedding_model_name: Optional embedding model name. If None, uses the only
+                available model or raises if multiple exist.
         """
         strategy = EmbeddingDiversityStrategy(embedding_model_name=embedding_model_name)
-        selection_config = SelectionConfig(
-            dataset_id=self.dataset_id,
+        self.multi_strategies(
             n_samples_to_select=n_samples_to_select,
             selection_result_tag_name=selection_result_tag_name,
-            sample_filter=sample_filter,
-            strategies=[strategy],
+            selection_strategies=[strategy],
         )
-        select_via_database(session=self.session, config=selection_config)
     def multi_strategies(
         self,
         n_samples_to_select: int,
         selection_result_tag_name: str,
         selection_strategies: list[SelectionStrategy],
-        sample_filter: SampleFilter | None = None,
     ) -> None:
-        """Select a subset of the dataset based on multiple selection strategies.
+        """Select a subset based on multiple strategies.
         Args:
-            n_samples_to_select: The number of samples to select.
-            selection_result_tag_name: The tag name to use for the selection result.
-            selection_strategies:
-                Selection strategies to use for the selection. They can be created after
-                importing them from `lightly_studio.selection.selection_config`.
-            sample_filter: An optional filter to apply to the samples.
+            n_samples_to_select: Number of samples to select.
+            selection_result_tag_name: Tag name for the selection result.
+            selection_strategies: Strategies to compose for selection.
         """
         config = SelectionConfig(
-            dataset_id=self.dataset_id,
+            dataset_id=self._dataset_id,
             n_samples_to_select=n_samples_to_select,
             selection_result_tag_name=selection_result_tag_name,
-            sample_filter=sample_filter,
             strategies=selection_strategies,
         )
-        select_via_database(session=self.session, config=config)
+        select_via_database(
+            session=self._session,
+            config=config,
+            input_sample_ids=self._input_sample_ids,
+        )

lightly_studio/selection/select_via_db.py CHANGED Viewed

@@ -3,29 +3,33 @@
 from __future__ import annotations
 import datetime
+from uuid import UUID
 from sqlmodel import Session
 from lightly_studio.models.tag import TagCreate
 from lightly_studio.resolvers import (
     embedding_model_resolver,
+    metadata_resolver,
     sample_embedding_resolver,
-    sample_resolver,
     tag_resolver,
 )
 from lightly_studio.selection.mundig import Mundig
 from lightly_studio.selection.selection_config import (
     EmbeddingDiversityStrategy,
+    MetadataWeightingStrategy,
     SelectionConfig,
 )
-def select_via_database(session: Session, config: SelectionConfig) -> None:
-    """Runs selection and all database interactions of it.
+def select_via_database(
+    session: Session, config: SelectionConfig, input_sample_ids: list[UUID]
+) -> None:
+    """Run selection using the provided candidate sample ids.
-    First resolves the selection config to actual database values.
+    First resolves the selection config to concrete database values.
     Then calls Mundig to run the selection with pure values.
-    Last creates a tag for the selected set.
+    Finally creates a tag for the selected set.
     """
     # Check if the tag name is already used
     existing_tag = tag_resolver.get_by_name(
@@ -40,18 +44,7 @@ def select_via_database(session: Session, config: SelectionConfig) -> None:
         )
         raise ValueError(msg)
-    # TODO(Malte, 08/2025): Use a DatasetQuery instead of SampleFilter once
-    # the latter is implemented.
-    # See https://linear.app/lightly/issue/LIG-7292/story-python-ui-mvp1-without-datasetquery-and-sample
-    samples = sample_resolver.get_all_by_dataset_id(
-        session,
-        limit=None,
-        dataset_id=config.dataset_id,
-        filters=config.sample_filter,
-    ).samples
-    sample_ids = [s.sample_id for s in samples]
-    n_samples_to_select = min(config.n_samples_to_select, len(sample_ids))
+    n_samples_to_select = min(config.n_samples_to_select, len(input_sample_ids))
     if n_samples_to_select == 0:
         print("No samples available for selection.")
         return
@@ -66,16 +59,27 @@ def select_via_database(session: Session, config: SelectionConfig) -> None:
             ).embedding_model_id
             embedding_tables = sample_embedding_resolver.get_by_sample_ids(
                 session=session,
-                sample_ids=sample_ids,
+                sample_ids=input_sample_ids,
                 embedding_model_id=embedding_model_id,
             )
             embeddings = [e.embedding for e in embedding_tables]
             mundig.add_diversity(embeddings=embeddings, strength=strat.strength)
+        elif isinstance(strat, MetadataWeightingStrategy):
+            key = strat.metadata_key
+            weights = []
+            for sample_id in input_sample_ids:
+                weight = metadata_resolver.get_value_for_sample(session, sample_id, key)
+                if not isinstance(weight, (float, int)):
+                    raise ValueError(
+                        f"Metadata {key} is not a number, only numbers can be used as weights"
+                    )
+                weights.append(float(weight))
+            mundig.add_weighting(weights, strength=strat.strength)
         else:
             raise ValueError(f"Selection strategy of type {type(strat)} is unknown.")
     selected_indices = mundig.run(n_samples=n_samples_to_select)
-    selected_sample_ids = [sample_ids[i] for i in selected_indices]
+    selected_sample_ids = [input_sample_ids[i] for i in selected_indices]
     datetime_str = datetime.datetime.now(tz=datetime.timezone.utc).isoformat()
     tag_description = f"Selected at {datetime_str} UTC"

lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

lightly-studio 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl