PyPI - lightly-studio - Versions diffs - 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl - Mend

lightly-studio 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lightly-studio might be problematic. Click here for more details.

Files changed (169) hide show

lightly_studio/api/routes/api/selection.py ADDED Viewed

@@ -0,0 +1,87 @@
+"""This module contains the API routes for managing selections."""
+from __future__ import annotations
+from typing import Union
+from fastapi import APIRouter, Depends, HTTPException
+from pydantic import BaseModel, Field
+from typing_extensions import Annotated
+from lightly_studio.api.routes.api.dataset import get_and_validate_dataset_id
+from lightly_studio.db_manager import SessionDep
+from lightly_studio.models.dataset import DatasetTable
+from lightly_studio.resolvers import sample_resolver
+from lightly_studio.selection.select_via_db import select_via_database
+from lightly_studio.selection.selection_config import (
+    EmbeddingDiversityStrategy,
+    MetadataWeightingStrategy,
+    SelectionConfig,
+)
+selection_router = APIRouter()
+Strategy = Annotated[
+    Union[EmbeddingDiversityStrategy, MetadataWeightingStrategy],
+    Field(discriminator="strategy_name"),
+]
+class SelectionRequest(BaseModel):
+    """Request model for selection."""
+    n_samples_to_select: int = Field(gt=0, description="Number of samples to select")
+    selection_result_tag_name: str = Field(min_length=1, description="Name for the result tag")
+    strategies: list[Strategy]
+@selection_router.post(
+    "/datasets/{dataset_id}/selection",
+    status_code=204,
+    response_model=None,
+)
+def create_combination_selection(
+    session: SessionDep,
+    dataset: Annotated[
+        DatasetTable,
+        Depends(get_and_validate_dataset_id),
+    ],
+    request: SelectionRequest,
+) -> None:
+    """Create a combination selection on the dataset.
+    This endpoint performs combination selection using embeddings and metadata.
+    The selected samples are tagged with the specified tag name.
+    Args:
+        session: Database session dependency.
+        dataset: Dataset to perform selection on.
+        request: Selection parameters including sample count and tag name.
+    Returns:
+        None (204 No Content on success).
+    Raises:
+        HTTPException: 400 if selection fails due to invalid parameters or other errors.
+    """
+    # Get all samples in dataset as input for selection.
+    all_samples_result = sample_resolver.get_all_by_dataset_id(
+        session=session, dataset_id=dataset.dataset_id
+    )
+    input_sample_ids = [sample.sample_id for sample in all_samples_result.samples]
+    # Validate we have enough samples to select from.
+    if len(input_sample_ids) < request.n_samples_to_select:
+        raise HTTPException(
+            status_code=400,
+            detail=f"Dataset has only {len(input_sample_ids)} samples, "
+            f"cannot select {request.n_samples_to_select}",
+        )
+    # Create SelectionConfig with diversity strategy.
+    config = SelectionConfig(
+        dataset_id=dataset.dataset_id,
+        n_samples_to_select=request.n_samples_to_select,
+        selection_result_tag_name=request.selection_result_tag_name,
+        strategies=request.strategies,
+    )
+    # Perform selection via database.
+    select_via_database(session=session, config=config, input_sample_ids=input_sample_ids)

lightly_studio/api/routes/api/settings.py CHANGED Viewed

@@ -1,17 +1,13 @@
 """This module contains the API routes for user settings."""
-from fastapi import APIRouter, Depends
-from sqlmodel import Session
-from typing_extensions import Annotated
+from fastapi import APIRouter
-from lightly_studio.api.db import get_session
+from lightly_studio.db_manager import SessionDep
 from lightly_studio.models.settings import SettingView
 from lightly_studio.resolvers import settings_resolver
 settings_router = APIRouter(tags=["settings"])
-SessionDep = Annotated[Session, Depends(get_session)]
 @settings_router.get("/settings")
 def get_settings(

lightly_studio/api/routes/images.py CHANGED Viewed

@@ -5,12 +5,12 @@ from __future__ import annotations
 import os
 from collections.abc import Generator
-from fastapi import APIRouter, Depends, HTTPException
+import fsspec
+from fastapi import APIRouter, HTTPException
 from fastapi.responses import StreamingResponse
-from sqlmodel import Session
-from lightly_studio.api import db
 from lightly_studio.api.routes.api import status
+from lightly_studio.db_manager import SessionDep
 from lightly_studio.models import sample
 app_router = APIRouter()
@@ -19,7 +19,7 @@ app_router = APIRouter()
 @app_router.get("/sample/{sample_id}")
 async def serve_image_by_sample_id(
     sample_id: str,
-    session: Session = Depends(db.get_session),  # noqa: B008
+    session: SessionDep,
 ) -> StreamingResponse:
     """Serve an image by sample ID.
@@ -45,8 +45,8 @@ async def serve_image_by_sample_id(
     try:
         # Open the file.
-        with open(file_path, "rb") as file:
-            content = file.read()
+        fs, fs_path = fsspec.core.url_to_fs(file_path)
+        content = fs.cat_file(fs_path)
         # Determine content type based on file extension.
         content_type = _get_content_type(file_path)

lightly_studio/core/add_samples.py ADDED Viewed

@@ -0,0 +1,374 @@
+"""Functions to add samples and their annotations to a dataset in the database."""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Iterable
+from uuid import UUID
+import fsspec
+import PIL
+from labelformat.model.binary_mask_segmentation import BinaryMaskSegmentation
+from labelformat.model.bounding_box import BoundingBoxFormat
+from labelformat.model.image import Image
+from labelformat.model.instance_segmentation import (
+    ImageInstanceSegmentation,
+    InstanceSegmentationInput,
+)
+from labelformat.model.multipolygon import MultiPolygon
+from labelformat.model.object_detection import (
+    ImageObjectDetection,
+    ObjectDetectionInput,
+)
+from sqlmodel import Session
+from tqdm import tqdm
+from lightly_studio.models.annotation.annotation_base import AnnotationCreate
+from lightly_studio.models.annotation_label import AnnotationLabelCreate
+from lightly_studio.models.sample import SampleCreate, SampleTable
+from lightly_studio.resolvers import (
+    annotation_label_resolver,
+    annotation_resolver,
+    sample_resolver,
+)
+# Constants
+ANNOTATION_BATCH_SIZE = 64  # Number of annotations to process in a single batch
+SAMPLE_BATCH_SIZE = 32  # Number of samples to process in a single batch
+MAX_EXAMPLE_PATHS_TO_SHOW = 5
+@dataclass
+class _AnnotationProcessingContext:
+    """Context for processing annotations for a single sample."""
+    dataset_id: UUID
+    sample_id: UUID
+    label_map: dict[int, UUID]
+@dataclass
+class _LoadingLoggingContext:
+    """Context for the logging while loading data."""
+    n_samples_before_loading: int
+    n_samples_to_be_inserted: int = 0
+    example_paths_not_inserted: list[str] = field(default_factory=list)
+    def update_example_paths(self, example_paths_not_inserted: list[str]) -> None:
+        if len(self.example_paths_not_inserted) >= MAX_EXAMPLE_PATHS_TO_SHOW:
+            return
+        upper_limit = MAX_EXAMPLE_PATHS_TO_SHOW - len(self.example_paths_not_inserted)
+        self.example_paths_not_inserted.extend(example_paths_not_inserted[:upper_limit])
+def load_into_dataset_from_paths(
+    session: Session,
+    dataset_id: UUID,
+    image_paths: Iterable[str],
+) -> list[UUID]:
+    """Load images from file paths into the dataset.
+    Args:
+        session: The database session.
+        dataset_id: The ID of the dataset to load images into.
+        image_paths: An iterable of file paths to the images to load.
+    Returns:
+        A list of UUIDs of the created samples.
+    """
+    samples_to_create: list[SampleCreate] = []
+    created_sample_ids: list[UUID] = []
+    logging_context = _LoadingLoggingContext(
+        n_samples_to_be_inserted=sum(1 for _ in image_paths),
+        n_samples_before_loading=sample_resolver.count_by_dataset_id(
+            session=session, dataset_id=dataset_id
+        ),
+    )
+    for image_path in tqdm(
+        image_paths,
+        desc="Processing images",
+        unit=" images",
+    ):
+        try:
+            with fsspec.open(image_path, "rb") as file:
+                image = PIL.Image.open(file)
+                width, height = image.size
+                image.close()
+        except (FileNotFoundError, PIL.UnidentifiedImageError, OSError):
+            continue
+        sample = SampleCreate(
+            file_name=Path(image_path).name,
+            file_path_abs=image_path,
+            width=width,
+            height=height,
+            dataset_id=dataset_id,
+        )
+        samples_to_create.append(sample)
+        # Process batch when it reaches SAMPLE_BATCH_SIZE
+        if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
+            created_samples_batch, paths_not_inserted = _create_batch_samples(
+                session=session, samples=samples_to_create
+            )
+            created_sample_ids.extend(s.sample_id for s in created_samples_batch)
+            logging_context.update_example_paths(paths_not_inserted)
+            samples_to_create = []
+    # Handle remaining samples
+    if samples_to_create:
+        created_samples_batch, paths_not_inserted = _create_batch_samples(
+            session=session, samples=samples_to_create
+        )
+        created_sample_ids.extend(s.sample_id for s in created_samples_batch)
+        logging_context.update_example_paths(paths_not_inserted)
+    _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
+    return created_sample_ids
+def load_into_dataset_from_labelformat(
+    session: Session,
+    dataset_id: UUID,
+    input_labels: ObjectDetectionInput | InstanceSegmentationInput,
+    images_path: Path,
+) -> list[UUID]:
+    """Load samples and their annotations from a labelformat input into the dataset.
+    Args:
+        session: The database session.
+        dataset_id: The ID of the dataset to load samples into.
+        input_labels: The labelformat input containing images and annotations.
+        images_path: The path to the directory containing the images.
+    Returns:
+        A list of UUIDs of the created samples.
+    """
+    logging_context = _LoadingLoggingContext(
+        n_samples_to_be_inserted=sum(1 for _ in input_labels.get_labels()),
+        n_samples_before_loading=sample_resolver.count_by_dataset_id(
+            session=session, dataset_id=dataset_id
+        ),
+    )
+    # Create label mapping
+    label_map = _create_label_map(session=session, input_labels=input_labels)
+    annotations_to_create: list[AnnotationCreate] = []
+    samples_to_create: list[SampleCreate] = []
+    created_sample_ids: list[UUID] = []
+    image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection] = {}
+    for image_data in tqdm(input_labels.get_labels(), desc="Processing images", unit=" images"):
+        image: Image = image_data.image  # type: ignore[attr-defined]
+        typed_image_data: ImageInstanceSegmentation | ImageObjectDetection = image_data  # type: ignore[assignment]
+        sample = SampleCreate(
+            file_name=str(image.filename),
+            file_path_abs=str(images_path / image.filename),
+            width=image.width,
+            height=image.height,
+            dataset_id=dataset_id,
+        )
+        samples_to_create.append(sample)
+        image_path_to_anno_data[sample.file_path_abs] = typed_image_data
+        if len(samples_to_create) >= SAMPLE_BATCH_SIZE:
+            created_samples_batch, paths_not_inserted = _create_batch_samples(
+                session=session, samples=samples_to_create
+            )
+            created_sample_ids.extend(s.sample_id for s in created_samples_batch)
+            logging_context.update_example_paths(paths_not_inserted)
+            _process_batch_annotations(
+                session=session,
+                stored_samples=created_samples_batch,
+                image_path_to_anno_data=image_path_to_anno_data,
+                dataset_id=dataset_id,
+                label_map=label_map,
+                annotations_to_create=annotations_to_create,
+            )
+            samples_to_create.clear()
+            image_path_to_anno_data.clear()
+    if samples_to_create:
+        created_samples_batch, paths_not_inserted = _create_batch_samples(
+            session=session, samples=samples_to_create
+        )
+        created_sample_ids.extend(s.sample_id for s in created_samples_batch)
+        logging_context.update_example_paths(paths_not_inserted)
+        _process_batch_annotations(
+            session=session,
+            stored_samples=created_samples_batch,
+            image_path_to_anno_data=image_path_to_anno_data,
+            dataset_id=dataset_id,
+            label_map=label_map,
+            annotations_to_create=annotations_to_create,
+        )
+    # Insert any remaining annotations
+    if annotations_to_create:
+        annotation_resolver.create_many(session=session, annotations=annotations_to_create)
+    _log_loading_results(session=session, dataset_id=dataset_id, logging_context=logging_context)
+    return created_sample_ids
+def _log_loading_results(
+    session: Session, dataset_id: UUID, logging_context: _LoadingLoggingContext
+) -> None:
+    n_samples_end = sample_resolver.count_by_dataset_id(session=session, dataset_id=dataset_id)
+    n_samples_inserted = n_samples_end - logging_context.n_samples_before_loading
+    print(
+        f"Added {n_samples_inserted} out of {logging_context.n_samples_to_be_inserted}"
+        " new samples to the dataset."
+    )
+    if logging_context.example_paths_not_inserted:
+        # TODO(Jonas, 09/2025): Use logging instead of print
+        print(
+            f"Examples of paths that were not added: "
+            f" {', '.join(logging_context.example_paths_not_inserted)}"
+        )
+def _create_batch_samples(
+    session: Session, samples: list[SampleCreate]
+) -> tuple[list[SampleTable], list[str]]:
+    """Create the batch samples.
+    Args:
+        session: The database session.
+        samples: The samples to create.
+    Returns:
+        created_samples: A list of created SampleTable objects,
+        existing_file_paths: A list of file paths that already existed in the database,
+    """
+    file_paths_abs_mapping = {sample.file_path_abs: sample for sample in samples}
+    file_paths_new, file_paths_exist = sample_resolver.filter_new_paths(
+        session=session, file_paths_abs=list(file_paths_abs_mapping.keys())
+    )
+    samples_to_create_filtered = [
+        file_paths_abs_mapping[file_path_new] for file_path_new in file_paths_new
+    ]
+    return (
+        sample_resolver.create_many(session=session, samples=samples_to_create_filtered),
+        file_paths_exist,
+    )
+def _create_label_map(
+    session: Session,
+    input_labels: ObjectDetectionInput | InstanceSegmentationInput,
+) -> dict[int, UUID]:
+    """Create a mapping of category IDs to annotation label IDs."""
+    label_map = {}
+    for category in tqdm(
+        input_labels.get_categories(),
+        desc="Processing categories",
+        unit=" categories",
+    ):
+        label = AnnotationLabelCreate(annotation_label_name=category.name)
+        stored_label = annotation_label_resolver.create(session=session, label=label)
+        label_map[category.id] = stored_label.annotation_label_id
+    return label_map
+def _process_object_detection_annotations(
+    context: _AnnotationProcessingContext,
+    anno_data: ImageObjectDetection,
+) -> list[AnnotationCreate]:
+    """Process object detection annotations for a single image."""
+    new_annotations = []
+    for obj in anno_data.objects:
+        box = obj.box.to_format(BoundingBoxFormat.XYWH)
+        x, y, width, height = box
+        new_annotations.append(
+            AnnotationCreate(
+                dataset_id=context.dataset_id,
+                sample_id=context.sample_id,
+                annotation_label_id=context.label_map[obj.category.id],
+                annotation_type="object_detection",
+                x=int(x),
+                y=int(y),
+                width=int(width),
+                height=int(height),
+                confidence=obj.confidence,
+            )
+        )
+    return new_annotations
+def _process_instance_segmentation_annotations(
+    context: _AnnotationProcessingContext,
+    anno_data: ImageInstanceSegmentation,
+) -> list[AnnotationCreate]:
+    """Process instance segmentation annotations for a single image."""
+    new_annotations = []
+    for obj in anno_data.objects:
+        segmentation_rle: None | list[int] = None
+        if isinstance(obj.segmentation, MultiPolygon):
+            box = obj.segmentation.bounding_box().to_format(BoundingBoxFormat.XYWH)
+        elif isinstance(obj.segmentation, BinaryMaskSegmentation):
+            box = obj.segmentation.bounding_box.to_format(BoundingBoxFormat.XYWH)
+            segmentation_rle = obj.segmentation._rle_row_wise  # noqa: SLF001
+        else:
+            raise ValueError(f"Unsupported segmentation type: {type(obj.segmentation)}")
+        x, y, width, height = box
+        new_annotations.append(
+            AnnotationCreate(
+                dataset_id=context.dataset_id,
+                sample_id=context.sample_id,
+                annotation_label_id=context.label_map[obj.category.id],
+                annotation_type="instance_segmentation",
+                x=int(x),
+                y=int(y),
+                width=int(width),
+                height=int(height),
+                segmentation_mask=segmentation_rle,
+            )
+        )
+    return new_annotations
+def _process_batch_annotations(  # noqa: PLR0913
+    session: Session,
+    stored_samples: list[SampleTable],
+    image_path_to_anno_data: dict[str, ImageInstanceSegmentation | ImageObjectDetection],
+    dataset_id: UUID,
+    label_map: dict[int, UUID],
+    annotations_to_create: list[AnnotationCreate],
+) -> None:
+    """Process annotations for a batch of samples."""
+    for stored_sample in stored_samples:
+        anno_data = image_path_to_anno_data[stored_sample.file_path_abs]
+        context = _AnnotationProcessingContext(
+            dataset_id=dataset_id,
+            sample_id=stored_sample.sample_id,
+            label_map=label_map,
+        )
+        if isinstance(anno_data, ImageInstanceSegmentation):
+            new_annotations = _process_instance_segmentation_annotations(
+                context=context, anno_data=anno_data
+            )
+        elif isinstance(anno_data, ImageObjectDetection):
+            new_annotations = _process_object_detection_annotations(
+                context=context, anno_data=anno_data
+            )
+        else:
+            raise ValueError(f"Unsupported annotation type: {type(anno_data)}")
+        annotations_to_create.extend(new_annotations)
+        if len(annotations_to_create) >= ANNOTATION_BATCH_SIZE:
+            annotation_resolver.create_many(session=session, annotations=annotations_to_create)
+            annotations_to_create.clear()

lightly-studio 0.3.1__py3-none-any.whl → 0.3.3__py3-none-any.whl

Potentially problematic release.

lightly-studio 0.3.1py3-none-any.whl → 0.3.3py3-none-any.whl