PyPI - lightly-studio - Versions diffs - 0.3.1__py3-none-any.whl - Mend

lightly-studio 0.3.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of lightly-studio might be problematic. Click here for more details.

Files changed (219) hide show

lightly_studio/vendor/LICENSE_weights_data ADDED Viewed

@@ -0,0 +1,50 @@
+ML-MobileCLIP Model Weights and Data
+Copyright (C) 2024 Apple Inc. All Rights Reserved.
+IMPORTANT:  This Apple software is supplied to you by Apple
+Inc. ("Apple") in consideration of your agreement to the following
+terms, and your use, installation, modification or redistribution of
+this Apple software constitutes acceptance of these terms.  If you do
+not agree with these terms, please do not use, install, modify or
+redistribute this Apple software.
+In consideration of your agreement to abide by the following terms, and
+subject to these terms, Apple grants you a personal, non-exclusive
+license, under Apple's copyrights in this original Apple software (the
+"Apple Software"), to use, reproduce, modify and redistribute the Apple
+Software, with or without modifications, in source and/or binary forms;
+provided that if you redistribute the Apple Software in its entirety and
+without modifications, you must retain this notice and the following
+text and disclaimers in all such redistributions of the Apple Software.
+Neither the name, trademarks, service marks or logos of Apple Inc. may
+be used to endorse or promote products derived from the Apple Software
+without specific prior written permission from Apple.  Except as
+expressly stated in this notice, no other rights or licenses, express or
+implied, are granted by Apple herein, including but not limited to any
+patent rights that may be infringed by your derivative works or by other
+works in which the Apple Software may be incorporated.
+The Apple Software is provided by Apple on an "AS IS" basis.  APPLE
+MAKES NO WARRANTIES, EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION
+THE IMPLIED WARRANTIES OF NON-INFRINGEMENT, MERCHANTABILITY AND FITNESS
+FOR A PARTICULAR PURPOSE, REGARDING THE APPLE SOFTWARE OR ITS USE AND
+OPERATION ALONE OR IN COMBINATION WITH YOUR PRODUCTS.
+IN NO EVENT SHALL APPLE BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL
+OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) ARISING IN ANY WAY OUT OF THE USE, REPRODUCTION,
+MODIFICATION AND/OR DISTRIBUTION OF THE APPLE SOFTWARE, HOWEVER CAUSED
+AND WHETHER UNDER THEORY OF CONTRACT, TORT (INCLUDING NEGLIGENCE),
+STRICT LIABILITY OR OTHERWISE, EVEN IF APPLE HAS BEEN ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-------------------------------------------------------------------------------
+SOFTWARE DISTRIBUTED WITH ML-MobileCLIP:
+The ML-MobileCLIP software copyright and license terms can be found in LICENSE.
+The ML-MobileCLIP software includes a number of subcomponents with separate
+copyright notices and license terms - please see the file ACKNOWLEDGEMENTS.
+-------------------------------------------------------------------------------

lightly_studio/vendor/README.md ADDED Viewed

@@ -0,0 +1,5 @@
+# MobileCLIP: Fast Image-Text Models through Multi-Modal Reinforced Training
+Trimmed down version of MobileCLIP with modified imports to handle relative paths.
+Vendored from https://github.com/apple/ml-mobileclip, commit 1140b8d.

lightly_studio/vendor/__init__.py ADDED Viewed

	@@ -0,0 +1 @@
1	+ """Vendor directory for third-party code."""

lightly_studio/vendor/mobileclip/__init__.py ADDED Viewed

@@ -0,0 +1,96 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2024 Apple Inc. All Rights Reserved.
+#
+import os
+import json
+from typing import Optional, Union, Tuple, Any
+import torch
+import torch.nn as nn
+from torchvision.transforms import (
+    CenterCrop,
+    Compose,
+    InterpolationMode,
+    Resize,
+    ToTensor,
+)
+from .clip import CLIP
+from .modules.text.tokenizer import (
+    ClipTokenizer,
+)
+from .modules.common.mobileone import reparameterize_model
+def create_model_and_transforms(
+    model_name: str,
+    pretrained: Optional[str] = None,
+    reparameterize: Optional[bool] = True,
+    device: Union[str, torch.device] = "cpu",
+) -> Tuple[nn.Module, Any, Any]:
+    """
+    Method to instantiate model and pre-processing transforms necessary for inference.
+    Args:
+        model_name: Model name. Choose from ['mobileclip_s0', 'mobileclip_s1', 'mobileclip_s2', 'mobileclip_b']
+        pretrained: Location of pretrained checkpoint.
+        reparameterize: When set to True, re-parameterizable branches get folded for faster inference.
+        device: Device identifier for model placement.
+    Returns:
+        Tuple of instantiated model, and preprocessing transforms for inference.
+    """
+    # Config files
+    root_dir = os.path.dirname(os.path.abspath(__file__))
+    configs_dir = os.path.join(root_dir, "configs")
+    model_cfg_file = os.path.join(configs_dir, model_name + ".json")
+    # Get config from yaml file
+    if not os.path.exists(model_cfg_file):
+        raise ValueError(f"Unsupported model name: {model_name}")
+    model_cfg = json.load(open(model_cfg_file, "r"))
+    # Build preprocessing transforms for inference
+    resolution = model_cfg["image_cfg"]["image_size"]
+    resize_size = resolution
+    centercrop_size = resolution
+    aug_list = [
+        Resize(
+            resize_size,
+            interpolation=InterpolationMode.BILINEAR,
+        ),
+        CenterCrop(centercrop_size),
+        ToTensor(),
+    ]
+    preprocess = Compose(aug_list)
+    # Build model
+    model = CLIP(cfg=model_cfg)
+    model.to(device)
+    model.eval()
+    # Load checkpoint
+    if pretrained is not None:
+        chkpt = torch.load(pretrained, weights_only=True)
+        model.load_state_dict(chkpt)
+    # Reparameterize model for inference (if specified)
+    if reparameterize:
+        model = reparameterize_model(model)
+    return model, None, preprocess
+def get_tokenizer(model_name: str) -> nn.Module:
+    # Config files
+    root_dir = os.path.dirname(os.path.abspath(__file__))
+    configs_dir = os.path.join(root_dir, "configs")
+    model_cfg_file = os.path.join(configs_dir, model_name + ".json")
+    # Get config from yaml file
+    model_cfg = json.load(open(model_cfg_file, "r"))
+    # Build tokenizer
+    text_tokenizer = ClipTokenizer(model_cfg)
+    return text_tokenizer

lightly_studio/vendor/mobileclip/clip.py ADDED Viewed

@@ -0,0 +1,77 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2024 Apple Inc. All Rights Reserved.
+#
+""" Model schema in open_clip format for inference only. """
+import math
+from typing import Any, Optional, Dict
+import torch
+import torch.nn.functional as F
+from torch import nn
+from .text_encoder import (
+    TextTransformer,
+)
+from .image_encoder import MCi
+class CLIP(nn.Module):
+    """Base class for multi-modal image-text data"""
+    def __init__(self, cfg: Dict, output_dict: bool = False, *args, **kwargs) -> None:
+        super().__init__()
+        self.output_dict = output_dict
+        self.projection_dim = cfg["embed_dim"]
+        if self.projection_dim is None:
+            raise ValueError("Please specify `embed_dim` in model config.")
+        self.image_encoder = MCi(
+            model_name=cfg["image_cfg"]["model_name"],
+            projection_dim=self.projection_dim,
+        )
+        self.text_encoder = TextTransformer(
+            cfg=cfg["text_cfg"], projection_dim=self.projection_dim
+        )
+        self.logit_scale = nn.Parameter(torch.ones([]) * math.log(1.0 / 0.07))
+    def _exponentiate_and_clip_logits(self, max_scale: float = 100.0):
+        scale = self.logit_scale.exp()
+        scale = torch.clamp(scale, 0, max_scale)
+        return scale
+    def encode_image(self, image: torch.Tensor, normalize: bool = False):
+        image_encoder_out = self.image_encoder(image)
+        if isinstance(image_encoder_out, dict):
+            features = image_encoder_out["logits"]
+        else:
+            features = image_encoder_out
+        return F.normalize(features, dim=-1) if normalize else features
+    def encode_text(self, text: torch.Tensor, normalize: bool = False):
+        text_features = self.text_encoder(text_tokens=text, key_padding_mask=None)
+        return F.normalize(text_features, dim=-1) if normalize else text_features
+    def forward(
+        self,
+        image: Optional[torch.Tensor] = None,
+        text: Optional[torch.Tensor] = None,
+        *args,
+        **kwargs
+    ) -> Any:
+        image_embeddings = (
+            self.encode_image(image, normalize=True) if image is not None else None
+        )
+        text_embeddings = (
+            self.encode_text(text, normalize=True) if text is not None else None
+        )
+        if self.output_dict:
+            return {
+                "image_features": image_embeddings,
+                "text_features": text_embeddings,
+                "logit_scale": self._exponentiate_and_clip_logits(),
+            }
+        return image_embeddings, text_embeddings, self._exponentiate_and_clip_logits()

lightly_studio/vendor/mobileclip/configs/mobileclip_b.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+    "embed_dim": 512,
+    "image_cfg": {
+        "image_size": 224,
+        "model_name": "vit_b16"
+    },
+    "text_cfg": {
+        "context_length": 77,
+        "vocab_size": 49408,
+        "dim": 512,
+        "ffn_multiplier_per_layer": 4.0,
+        "n_heads_per_layer": 8,
+        "n_transformer_layers": 12,
+        "norm_layer": "layer_norm_fp32",
+        "causal_masking": true,
+        "model_name": "base"
+    }
+}

lightly_studio/vendor/mobileclip/configs/mobileclip_s0.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+    "embed_dim": 512,
+    "image_cfg": {
+        "image_size": 256,
+        "model_name": "mci0"
+    },
+    "text_cfg": {
+        "context_length": 77,
+        "vocab_size": 49408,
+        "dim": 512,
+        "ffn_multiplier_per_layer": 4.0,
+        "n_heads_per_layer": 8,
+        "n_transformer_layers": 4,
+        "norm_layer": "layer_norm_fp32",
+        "causal_masking": false,
+        "model_name": "mct"
+    }
+}

lightly_studio/vendor/mobileclip/configs/mobileclip_s1.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+    "embed_dim": 512,
+    "image_cfg": {
+        "image_size": 256,
+        "model_name": "mci1"
+    },
+    "text_cfg": {
+        "context_length": 77,
+        "vocab_size": 49408,
+        "dim": 512,
+        "ffn_multiplier_per_layer": 4.0,
+        "n_heads_per_layer": 8,
+        "n_transformer_layers": 12,
+        "norm_layer": "layer_norm_fp32",
+        "causal_masking": false,
+        "model_name": "base"
+    }
+}

lightly_studio/vendor/mobileclip/configs/mobileclip_s2.json ADDED Viewed

@@ -0,0 +1,18 @@
+{
+    "embed_dim": 512,
+    "image_cfg": {
+        "image_size": 256,
+        "model_name": "mci2"
+    },
+    "text_cfg": {
+        "context_length": 77,
+        "vocab_size": 49408,
+        "dim": 512,
+        "ffn_multiplier_per_layer": 4.0,
+        "n_heads_per_layer": 8,
+        "n_transformer_layers": 12,
+        "norm_layer": "layer_norm_fp32",
+        "causal_masking": false,
+        "model_name": "base"
+    }
+}

lightly_studio/vendor/mobileclip/image_encoder.py ADDED Viewed

@@ -0,0 +1,67 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2024 Apple Inc. All Rights Reserved.
+#
+from typing import Any
+import torch.nn as nn
+from timm.models import create_model
+from . import models  # Added to register models
+from .modules.image.image_projection import GlobalPool2D
+class MCi(nn.Module):
+    """
+    This class implements `MCi Models <https://arxiv.org/pdf/2311.17049.pdf>`_
+    """
+    def __init__(self, model_name: str, *args, **kwargs) -> None:
+        super().__init__()
+        self.projection_dim = None
+        if "projection_dim" in kwargs:
+            self.projection_dim = kwargs.get("projection_dim")
+        # Create model
+        self.model = create_model(model_name, projection_dim=self.projection_dim)
+        # Build out projection head.
+        if self.projection_dim is not None:
+            if hasattr(self.model, "head"):
+                self.model.head = MCi._update_image_classifier(
+                    image_classifier=self.model.head, projection_dim=self.projection_dim
+                )
+    def forward(self, x: Any, *args, **kwargs) -> Any:
+        """A forward function of the model."""
+        x = self.model(x)
+        return x
+    @staticmethod
+    def _get_in_feature_dimension(image_classifier: nn.Module) -> int:
+        """Return the input feature dimension to the image classification head."""
+        in_features = None
+        if isinstance(image_classifier, nn.Sequential):
+            # Classifier that uses nn.Sequential usually has global pooling and
+            # multiple linear layers. Find the first linear layer and get its
+            # in_features
+            for layer in image_classifier:
+                if isinstance(layer, nn.Linear):
+                    in_features = layer.in_features
+                    break
+        elif isinstance(image_classifier, nn.Linear):
+            in_features = image_classifier.in_features
+        if in_features is None:
+            raise NotImplementedError(
+                f"Cannot get input feature dimension of {image_classifier}."
+            )
+        return in_features
+    @staticmethod
+    def _update_image_classifier(
+        image_classifier: nn.Module, projection_dim: int, *args, **kwargs
+    ) -> nn.Module:
+        in_features = MCi._get_in_feature_dimension(image_classifier)
+        new_img_classifier = GlobalPool2D(in_dim=in_features, out_dim=projection_dim)
+        return new_img_classifier

lightly_studio/vendor/mobileclip/logger.py ADDED Viewed

@@ -0,0 +1,154 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2024 Apple Inc. All Rights Reserved.
+#
+import os
+import sys
+import time
+import traceback
+from typing import Optional, Union
+text_colors = {
+    "logs": "\033[34m",  # 033 is the escape code and 34 is the color code
+    "info": "\033[32m",
+    "warning": "\033[33m",
+    "debug": "\033[93m",
+    "error": "\033[31m",
+    "bold": "\033[1m",
+    "end_color": "\033[0m",
+    "light_red": "\033[36m",
+}
+def get_curr_time_stamp() -> str:
+    return time.strftime("%Y-%m-%d %H:%M:%S")
+def error(message: str) -> None:
+    time_stamp = get_curr_time_stamp()
+    error_str = (
+        text_colors["error"]
+        + text_colors["bold"]
+        + "ERROR  "
+        + text_colors["end_color"]
+    )
+    # exiting with code -1 does not tell any information about the error (e.g., NaN encountered in the loss).
+    # For more descriptive error messages, we replace exit(-1) with sys.exit(ERROR_MESSAGE).
+    # This allows us to handle specific exceptions in the tests.
+    # print("{} - {} - {}".format(time_stamp, error_str, message), flush=True)
+    # print("{} - {} - {}".format(time_stamp, error_str, "Exiting!!!"), flush=True)
+    # exit(-1)
+    if sys.exc_info()[0] is None:
+        traceback.print_stack()
+    else:
+        traceback.print_exc()
+    sys.exit("{} - {} - {}. Exiting!!!".format(time_stamp, error_str, message))
+def color_text(in_text: str) -> str:
+    return text_colors["light_red"] + in_text + text_colors["end_color"]
+def log(message: str, end="\n") -> None:
+    time_stamp = get_curr_time_stamp()
+    log_str = (
+        text_colors["logs"] + text_colors["bold"] + "LOGS   " + text_colors["end_color"]
+    )
+    print("{} - {} - {}".format(time_stamp, log_str, message), end=end)
+def warning(message: Union[str, Warning]) -> None:
+    if isinstance(message, Warning):
+        message = f"{type(message).__name__}({','.join(map(repr, message.args))}"
+    time_stamp = get_curr_time_stamp()
+    warn_str = (
+        text_colors["warning"]
+        + text_colors["bold"]
+        + "WARNING"
+        + text_colors["end_color"]
+    )
+    print("{} - {} - {}".format(time_stamp, warn_str, message))
+def ignore_exception_with_warning(message: str) -> None:
+    """
+    After catching a tolerable exception E1 (e.g. when Model.forward() fails during
+    profiling with try-catch, it'll be helpful to log the exception for future
+    investigation. But printing the error stack trace, as is, could be confusing
+    when an uncaught (non-tolerable) exception "E2" raises down the road. Then, the log
+    will contain two stack traces for E1, E2. When looking for errors in logs, users
+    should look for E2, but they may find E1.
+    This function appends "(WARNING)" at the end of all lines of the E1 traceback, so
+    that the user can distinguish E1 from uncaught exception E2.
+    Args:
+        message: Extra explanation and context for debugging. (Note: the exception obj
+    will be automatically fetched from python. No need to pass it as an argument or as
+    message)
+    """
+    warning(f"{message}:\n{traceback.format_exc()}".replace("\n", "\n(WARNING)"))
+def info(message: str, print_line: Optional[bool] = False) -> None:
+    time_stamp = get_curr_time_stamp()
+    info_str = (
+        text_colors["info"] + text_colors["bold"] + "INFO   " + text_colors["end_color"]
+    )
+    print("{} - {} - {}".format(time_stamp, info_str, message))
+    if print_line:
+        double_dash_line(dashes=150)
+def debug(message: str) -> None:
+    time_stamp = get_curr_time_stamp()
+    log_str = (
+        text_colors["debug"]
+        + text_colors["bold"]
+        + "DEBUG   "
+        + text_colors["end_color"]
+    )
+    print("{} - {} - {}".format(time_stamp, log_str, message))
+def double_dash_line(dashes: Optional[int] = 75) -> None:
+    print(text_colors["error"] + "=" * dashes + text_colors["end_color"])
+def singe_dash_line(dashes: Optional[int] = 67) -> None:
+    print("-" * dashes)
+def print_header(header: str) -> None:
+    double_dash_line()
+    print(
+        text_colors["info"]
+        + text_colors["bold"]
+        + "=" * 50
+        + str(header)
+        + text_colors["end_color"]
+    )
+    double_dash_line()
+def print_header_minor(header: str) -> None:
+    print(
+        text_colors["warning"]
+        + text_colors["bold"]
+        + "=" * 25
+        + str(header)
+        + text_colors["end_color"]
+    )
+def disable_printing():
+    sys.stdout = open(os.devnull, "w")
+def enable_printing():
+    sys.stdout = sys.__stdout__

lightly_studio/vendor/mobileclip/models/__init__.py ADDED Viewed

@@ -0,0 +1,10 @@
+#
+# For licensing see accompanying LICENSE file.
+# Copyright (C) 2024 Apple Inc. All rights reserved.
+#
+from .mci import (
+    mci0,
+    mci1,
+    mci2,
+)
+from .vit import vit_b16