PyPI - PictSure - Versions diffs - 0.1.0__py3-none-any.whl - Mend

PictSure 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of PictSure might be problematic. Click here for more details.

Files changed (12) hide show

Examples/example.py +72 -0
PictSure/__init__.py +4 -0
PictSure/cli.py +84 -0
PictSure/model_PictSure.py +240 -0
PictSure/model_ViT.py +162 -0
PictSure/model_embeddings.py +47 -0
pictsure-0.1.0.dist-info/METADATA +148 -0
pictsure-0.1.0.dist-info/RECORD +12 -0
pictsure-0.1.0.dist-info/WHEEL +5 -0
pictsure-0.1.0.dist-info/entry_points.txt +2 -0
pictsure-0.1.0.dist-info/licenses/LICENSE +21 -0
pictsure-0.1.0.dist-info/top_level.txt +2 -0

Examples/example.py ADDED Viewed

@@ -0,0 +1,72 @@
+import os
+import random
+from PIL import Image
+import torch
+from PictSure import PictSure
+# CONFIG
+ROOT_DIR = "./BrainTumor_preprocessed/"
+NUM_CONTEXT_IMAGES = 5
+IMAGE_SIZE = 224
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu")
+# Load context/reference images
+def load_reference_images(path):
+    label_map = {}
+    context_images, context_labels = [], []
+    folders = sorted(os.listdir(path))
+    for label, folder in enumerate(folders):
+        folder_path = os.path.join(path, folder)
+        all_images = os.listdir(folder_path)
+        chosen = random.sample(all_images, NUM_CONTEXT_IMAGES + 1)  # +1 for extra test image
+        ref_imgs = chosen[:-1]
+        test_img = chosen[-1]
+        for img_name in ref_imgs:
+            img_path = os.path.join(folder_path, img_name)
+            img = Image.open(img_path).convert("RGB")
+            context_images.append(img)
+            context_labels.append(label)
+        label_map[folder] = label
+    return context_images, context_labels, label_map, chosen
+# Pick a single test image (one left out per class)
+def pick_test_image(path, label_map, chosen):
+    all_images = []
+    all_labels = []
+    for folder, label in label_map.items():
+        folder_path = os.path.join(path, folder)
+        images = [f for f in os.listdir(folder_path) if f not in chosen]
+        for img_name in images:
+            img_path = os.path.join(folder_path, img_name)
+            all_images.append(img_path)
+            all_labels.append(label)
+    if all_images:
+        random_index = random.randint(0, len(all_images) - 1)
+        img_path = all_images[random_index]
+        label = all_labels[random_index]
+        img = Image.open(img_path).convert("RGB")
+        return img, label
+# or pull our pre-trained models from HuggingFace
+pictsure_model = PictSure.from_pretrained("pictsure/pictsure-vit").to(DEVICE)
+results = []
+for i in range(200):
+    # Load references and test image
+    context_imgs, context_lbls, label_map, chosen = load_reference_images(ROOT_DIR)
+    test_img, test_lbl = pick_test_image(ROOT_DIR, label_map, chosen)
+    # Predict
+    with torch.no_grad():
+        pictsure_model.set_context_images(context_imgs, context_lbls)
+        pred = pictsure_model.predict(test_img)
+    results.append((pred == test_lbl))
+accuracy = sum(results) / len(results) * 100
+print(f"Accuracy over {len(results)} predictions: {accuracy:.1f}%")

PictSure/__init__.py ADDED Viewed

@@ -0,0 +1,4 @@
+from .model_PictSure import PictSure
+from .model_embeddings import ResNetWrapper, VitNetWrapper
+__all__ = ['PictSure', 'ResNetWrapper', 'VitNetWrapper']

PictSure/cli.py ADDED Viewed

@@ -0,0 +1,84 @@
+import os
+import click
+import shutil
+from .config import PRETRAINED
+import pkg_resources
+@click.group()
+def cli():
+    """PictSure command line interface."""
+    pass
+@cli.command()
+def list_models():
+    """List all available and downloaded models."""
+    click.echo("Available PictSure Models:")
+    click.echo("=" * 50)
+    # Get the package directory
+    package_dir = os.path.dirname(pkg_resources.resource_filename('PictSure', '__init__.py'))
+    for model_name, model_info in PRETRAINED.items():
+        # Check if model is downloaded using absolute path
+        local_folder = os.path.join(package_dir, 'weights', model_info['name'])
+        weights_path = os.path.join(local_folder, 'weights.pt')
+        is_downloaded = os.path.exists(weights_path)
+        # Create status indicator
+        status = "✓ Downloaded" if is_downloaded else "✗ Not downloaded"
+        status_color = "green" if is_downloaded else "red"
+        # Print model information
+        click.echo(f"\nModel: {click.style(model_info['name'], bold=True)}")
+        click.echo(f"Status: {click.style(status, fg=status_color)}")
+        click.echo(f"Type: {model_info['embed_model']}")
+        click.echo(f"Resolution: {model_info['resolution']}")
+        click.echo(f"Number of classes: {model_info['num_classes']}")
+        click.echo(f"Transformer heads: {model_info['nheads']}")
+        click.echo(f"Transformer layers: {model_info['nlayer']}")
+        click.echo(f"Model size: {model_info['size']} Million Parameters")
+        click.echo(f"Path: {weights_path}")
+        click.echo("-" * 50)
+@cli.command()
+@click.argument('model_name', type=click.Choice([info['name'] for info in PRETRAINED.values()]))
+@click.option('--force', '-f', is_flag=True, help='Skip confirmation prompt')
+def remove(model_name, force):
+    """Remove the weights of a specific model."""
+    # Get the package directory
+    package_dir = os.path.dirname(pkg_resources.resource_filename('PictSure', '__init__.py'))
+    # Find the model info by name
+    model_info = next((info for info in PRETRAINED.values() if info['name'] == model_name), None)
+    if not model_info:
+        click.echo(click.style(f"Model {model_name} not found.", fg='red'))
+        return
+    # Construct paths
+    local_folder = os.path.join(package_dir, 'weights', model_info['name'])
+    weights_path = os.path.join(local_folder, 'weights.pt')
+    if not os.path.exists(weights_path):
+        click.echo(click.style(f"Model {model_info['name']} is not downloaded.", fg='yellow'))
+        return
+    if not force:
+        if not click.confirm(f"Are you sure you want to remove the weights for {click.style(model_info['name'], bold=True)}?"):
+            click.echo("Operation cancelled.")
+            return
+    try:
+        # Remove the weights file
+        os.remove(weights_path)
+        # Try to remove the directory if it's empty
+        try:
+            os.rmdir(local_folder)
+        except OSError:
+            pass  # Directory might not be empty, which is fine
+        click.echo(click.style(f"Successfully removed weights for {model_info['name']}.", fg='green'))
+    except Exception as e:
+        click.echo(click.style(f"Error removing weights: {str(e)}", fg='red'))
+if __name__ == '__main__':
+    cli()

PictSure/model_PictSure.py ADDED Viewed

@@ -0,0 +1,240 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import datasets, transforms
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import models
+from .model_embeddings import ResNetWrapper, VitNetWrapper, load_encoder
+from huggingface_hub import PyTorchModelHubMixin
+from PIL import Image
+class PictSure(
+    nn.Module,
+    PyTorchModelHubMixin
+    ):
+    def __init__(self, embedding, num_classes=10, nheads=8, nlayer=4):
+        super(PictSure, self).__init__()
+        if isinstance(embedding, nn.Module):
+            embedding_layer = embedding
+            if not hasattr(embedding_layer, 'latent_dim'):
+                raise ValueError("Custom embedding module must have a 'latent_dim' attribute.")
+        elif embedding == 'resnet':
+            embedding_layer = load_encoder()
+        elif embedding == 'vit':
+            embedding_layer = VitNetWrapper(path=None, num_classes=1000)
+        else:
+            raise ValueError("Unsupported embedding type. Use 'resnet' or 'vit' or custom nn.Modul.")
+        self.x_projection = nn.Linear(embedding_layer.latent_dim, 512)
+        self.y_projection = nn.Linear(num_classes, 512)
+        self.transformer_layer = nn.TransformerEncoderLayer(
+            d_model=1024, nhead=nheads, dim_feedforward=2048, norm_first=True
+        )
+        self.transformer = nn.TransformerEncoder(self.transformer_layer, num_layers=nlayer)
+        self.fc = nn.Linear(1024, num_classes)
+        self._init_weights()
+        self.num_classes = num_classes
+        self.embedding = embedding_layer
+        self.context_images = None
+        self.context_labels = None
+    def to(self, device):
+        self.embedding = self.embedding.to(device)
+        self.x_projection = self.x_projection.to(device)
+        self.y_projection = self.y_projection.to(device)
+        self.transformer = self.transformer.to(device)
+        self.fc = self.fc.to(device)
+        return self
+    @property
+    def device(self):
+        return self.embedding.device
+    def _init_weights(self):
+        # Loop through all modules in the model
+        for name, param in self.named_parameters():
+            if 'weight' in name:
+                if param.dim() > 1:  # Apply Xavier only to 2D+ parameters
+                    nn.init.xavier_uniform_(param)
+            elif 'bias' in name:
+                nn.init.zeros_(param)  # Bias is initialized to zero
+    def normalize_samples(self, x, resize=(224, 224)):
+        """
+        Normalize and resize the input images.
+        :param x: Tensor of shape (batch, num_images, 3, 224, 224)
+        :param resize: Tuple for resizing images
+        :return: Normalized and resized images
+        """
+        original_shape = x.shape
+        if len(original_shape) == 5:
+            # Reshape to (batch * num_images, 3, 224, 224)
+            x = x.view(-1, 3, 224, 224)
+        elif len(original_shape) == 3:
+            x = x.unsqueeze(0)  # Add batch dimension if missing
+        # Rescale images to the specified size
+        if resize is not None:
+            x = F.interpolate(x, size=resize, mode='bilinear', align_corners=False)
+        # Normalize images to [0, 1] range
+        if x.max() > 1.0:
+            x = x / 255.0
+        # Check if the input is already normalized with the specified mean and std
+        mean = torch.tensor([0.4914, 0.4822, 0.4465], device=x.device).view(1, 3, 1, 1)
+        std = torch.tensor([0.2023, 0.1994, 0.2010], device=x.device).view(1, 3, 1, 1)
+        # print(f"Range of x before normalization: {x.min().item()} to {x.max().item()}")
+        x = (x - mean) / std
+        # print(f"Range of x after normalization: {x.min().item()} to {x.max().item()}")
+        # print("\n")
+        # Reshape back to (batch, num_images, 3, 224, 224)
+        if len(original_shape) == 5:
+            x = x.view(original_shape[0], original_shape[1], 3, resize[0], resize[1])
+        elif len(original_shape) == 3:
+            x = x.squeeze(0)
+        return x
+    def set_context_images(self, context_images, context_labels):
+        """
+        Set the context images and labels for the model.
+        :param context_images: Tensor of shape (1, num_images, 3, 224, 224)
+        :param context_labels: Tensor of shape (1, num_images)
+        """
+        if isinstance(context_images, list) and all(isinstance(img, Image.Image) for img in context_images):
+            # Convert list of PIL images to tensor
+            context_images = np.stack([np.array(img.resize((224, 224))) for img in context_images])
+            context_images = torch.tensor(context_images, dtype=torch.float32)
+            context_images = context_images.view(1, -1, 3, 224, 224)  # Ensure it has the right shape
+        if isinstance(context_labels, list):
+            context_labels = torch.tensor(context_labels, dtype=torch.int64)
+            context_labels = context_labels.unsqueeze(0)  # Shape: (1, num_images)
+        if context_images.ndim == 4:
+            context_images = context_images.unsqueeze(0)
+        # print(f"Min and max of context_images before normalization: {context_images.min().item()} to {context_images.max().item()}")
+        assert context_images.ndim == 5, "context_images must be of shape (1, num_images, 3, 224, 224)"
+        assert context_labels.ndim == 2, "context_labels must be of shape (1, num_images)"
+        context_images = self.normalize_samples(context_images, resize=(224, 224))
+        self.context_images = context_images
+        self.context_labels = context_labels
+    def predict(self, x_pred):
+        """
+        Predict the class logits for the given prediction images.
+        :param x_pred: Tensor of shape (batch, num_images, 3, 224, 224)
+        :return: Logits of shape (batch, num_classes)
+        """
+        if self.context_images is None or self.context_labels is None:
+            raise ValueError("Context images and labels must be set before prediction.")
+        if isinstance(x_pred, list) and all(isinstance(img, Image.Image) for img in x_pred):
+            # Convert list of PIL images to tensor
+            x_pred = np.stack([np.array(img.resize((224, 224))) for img in x_pred])
+            x_pred = torch.tensor(x_pred, dtype=torch.float32)
+            x_pred = x_pred.view(-1, 3, 224, 224)  # Ensure it has the right shape
+            x_pred = x_pred / 255.0  # Normalize to [0, 1] range
+        if isinstance(x_pred, Image.Image):
+            # Convert single PIL image to tensor
+            x_pred = np.array(x_pred.resize((224, 224)))
+            x_pred = torch.tensor(x_pred, dtype=torch.float32).unsqueeze(0)
+            x_pred = x_pred.view(1, 3, 224, 224)  # Ensure it has the right shape
+            x_pred = x_pred / 255.0  # Normalize to [0, 1] range
+        # Expand reference images and labels to match the batch size
+        batch_size = x_pred.size(0)
+        context_images = self.context_images.expand(batch_size, -1, -1, -1, -1)
+        context_labels = self.context_labels.expand(batch_size, -1)
+        # Concatenate context images and labels with prediction images
+        x_train = context_images.view(batch_size, -1, 3, 224, 224)  # Shape: (batch, num_context_images, 3, 224, 224)
+        y_train = context_labels.view(batch_size, -1)  # Shape: (batch, num_context_images)
+        x_pred = self.normalize_samples(x_pred, resize=(224, 224))  # Normalize prediction images
+        # Move to device
+        x_train = x_train.to(self.embedding.device)
+        y_train = y_train.to(self.embedding.device)
+        x_pred = x_pred.to(self.embedding.device)
+        output = self.forward(x_train, y_train, x_pred, embedd=True)
+        pred = torch.argmax(output, dim=1)
+        return pred.item()
+    def forward(self, x_train, y_train, x_pred, embedd=True):
+        if embedd:
+            x_embedded = self.embedding(x_train)  # Shape: (batch, seq, embedding_dim)
+            # (batch, rgb, seq, dim) -> (batch, 1, rgb, seq, dim)
+            x_pred = x_pred.unsqueeze(1)
+            x_pred_embedded = self.embedding(x_pred)  # Shape: (batch, seq, embedding_dim)
+        else:
+            x_embedded = x_train
+            x_pred_embedded = x_pred
+        x_projected = self.x_projection(x_embedded)  # Shape: (batch, seq, projection_dim)
+        # Ensure y_train in the right dimensions
+        y_train = y_train.unsqueeze(-1) if y_train.ndim == 1 else y_train  # Ensure shape (batch, seq, 1)
+        # One-hot encode y_train (batch_size, num_classes * num_images) -> (batch_size, num_images * num_classes, num_classes)
+        y_train = F.one_hot(y_train, num_classes=self.num_classes).float()
+        # (batch, seq, num_classes) -> (batch * seq, num_classes)
+        y_train = y_train.view(-1, self.num_classes)
+        y_projected = self.y_projection(y_train)  # Shape: (batch, seq, projection_dim)
+        # Reshape back to (batch, seq, projection_dim)
+        y_projected = y_projected.view(x_projected.size(0), x_projected.size(1), -1)
+        # Concatenate x and y projections
+        combined_embedded = torch.cat([x_projected, y_projected], dim=-1)  # Shape: (batch, seq, d_model)
+        # Applying the same projection to the prediction
+        x_pred_projected = self.x_projection(x_pred_embedded)  # Shape: (batch, seq, projection_dim)
+        y_pred_projected = torch.zeros_like(x_pred_projected, device=self.device) -1  # Shape: (batch, seq, projection_dim)
+        # Concatenate x_pred and y_pred projections
+        pred_combined_embedded = torch.cat([x_pred_projected, y_pred_projected], dim=-1)  # Shape: (batch, seq, d_model)
+        # Concatenate train and prediction embeddings
+        full_sequence = torch.cat([combined_embedded, pred_combined_embedded], dim=1)  # Shape: (batch, seq+pred_seq, d_model)
+        # (batch, seq, dim -> seq, batch, dim)
+        full_sequence = full_sequence.permute(1, 0, 2)
+        # Create an attention mask
+        seq_length = full_sequence.size(0)
+        attention_mask = torch.ones(seq_length, seq_length, device=self.device)
+        attention_mask[-1, :] = 1
+        attention_mask[:-1, -1] = 0
+        attention_mask = attention_mask.masked_fill(attention_mask == 0, float('-inf')).masked_fill(attention_mask == 1, float(0.0))
+        # Pass through transformer encoder
+        transformer_output = self.transformer(full_sequence, mask=attention_mask)
+        # Extract the prediction hidden state and compute logits
+        prediction_hidden_state = transformer_output[-1, :, :]  # Shape: (batch_size, hidden_dim)
+        # Calculate final logits
+        logits = self.fc(prediction_hidden_state)  # Shape: (batch_size, num_classes)
+        return logits

PictSure/model_ViT.py ADDED Viewed

@@ -0,0 +1,162 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torchvision import datasets, transforms
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class PatchEmbed(nn.Module):
+    def __init__(self, img_size=224, patch_size=16, in_chans=3, embed_dim=768):
+        super().__init__()
+        self.img_size = img_size
+        self.patch_size = patch_size
+        self.grid_size = img_size // patch_size  # e.g. 14 if 224 // 16
+        self.num_patches = self.grid_size ** 2
+        self.proj = nn.Conv2d(in_chans, embed_dim, kernel_size=patch_size, stride=patch_size)
+    def forward(self, x):
+        # x: [B, 3, H, W]
+        # project to embeddings with shape [B, D, #patches_row, #patches_col]
+        x = self.proj(x)  # -> [B, embed_dim, grid_size, grid_size]
+        # flatten the spatial dims
+        x = x.flatten(2)  # -> [B, embed_dim, grid_size*grid_size]
+        x = x.transpose(1, 2)  # -> [B, #patches, embed_dim]
+        return x
+class Attention(nn.Module):
+    def __init__(self, dim, num_heads=8, qkv_bias=True, attn_drop=0.0, proj_drop=0.0):
+        super().__init__()
+        self.num_heads = num_heads
+        head_dim = dim // num_heads
+        self.scale = head_dim ** -0.5
+        self.qkv = nn.Linear(dim, dim * 3, bias=qkv_bias)
+        self.attn_drop = nn.Dropout(attn_drop)
+        self.proj = nn.Linear(dim, dim)
+        self.proj_drop = nn.Dropout(proj_drop)
+    def forward(self, x):
+        B, N, C = x.shape
+        qkv = self.qkv(x)  # -> [B, N, 3*C]
+        qkv = qkv.reshape(B, N, 3, self.num_heads, C // self.num_heads)
+        qkv = qkv.permute(2, 0, 3, 1, 4)  # -> [3, B, heads, N, C//heads]
+        q, k, v = qkv[0], qkv[1], qkv[2]
+        # scaled dot product
+        attn = (q @ k.transpose(-2, -1)) * self.scale  # [B, heads, N, N]
+        attn = attn.softmax(dim=-1)
+        attn = self.attn_drop(attn)
+        x = (attn @ v).transpose(1, 2).reshape(B, N, C)
+        x = self.proj(x)
+        x = self.proj_drop(x)
+        return x
+class MLP(nn.Module):
+    def __init__(self, in_features, hidden_features=None, out_features=None, drop=0.0):
+        super().__init__()
+        out_features = out_features or in_features
+        hidden_features = hidden_features or in_features
+        self.fc1 = nn.Linear(in_features, hidden_features)
+        self.act = nn.GELU()
+        self.fc2 = nn.Linear(hidden_features, out_features)
+        self.drop = nn.Dropout(drop)
+    def forward(self, x):
+        x = self.fc1(x)
+        x = self.act(x)
+        x = self.drop(x)
+        x = self.fc2(x)
+        x = self.drop(x)
+        return x
+class Block(nn.Module):
+    def __init__(self, dim, num_heads, mlp_ratio=4.0, qkv_bias=True,
+                 drop=0.0, attn_drop=0.0):
+        super().__init__()
+        self.norm1 = nn.LayerNorm(dim)
+        self.attn = Attention(
+            dim, num_heads=num_heads, qkv_bias=qkv_bias,
+            attn_drop=attn_drop, proj_drop=drop
+        )
+        self.norm2 = nn.LayerNorm(dim)
+        self.mlp = MLP(
+            in_features=dim, hidden_features=int(dim*mlp_ratio),
+            out_features=dim, drop=drop
+        )
+    def forward(self, x):
+        x = x + self.attn(self.norm1(x))
+        x = x + self.mlp(self.norm2(x))
+        return x
+class VisionTransformer(nn.Module):
+    def __init__(
+        self,
+        img_size=224,
+        patch_size=16,
+        in_chans=3,
+        num_classes=1000,
+        embed_dim=768,
+        depth=12,
+        num_heads=12,
+        mlp_ratio=4.0,
+        qkv_bias=True,
+        drop_rate=0.0,
+        attn_drop_rate=0.0
+    ):
+        super().__init__()
+        self.num_classes = num_classes
+        self.embed_dim = embed_dim
+        self.patch_embed = PatchEmbed(img_size, patch_size, in_chans, embed_dim)
+        self.num_patches = self.patch_embed.num_patches
+        # CLS token
+        self.cls_token = nn.Parameter(torch.zeros(1, 1, embed_dim))
+        # 1D positional embedding
+        self.pos_embed = nn.Parameter(torch.zeros(1, self.num_patches+1, embed_dim))
+        self.pos_drop = nn.Dropout(p=drop_rate)
+        # Transformer blocks
+        self.blocks = nn.ModuleList([
+            Block(embed_dim, num_heads, mlp_ratio,
+                  qkv_bias, drop_rate, attn_drop_rate)
+            for _ in range(depth)
+        ])
+        self.norm = nn.LayerNorm(embed_dim)
+        # Classifier head
+        self.head = nn.Linear(embed_dim, num_classes)
+        # Weight initialization
+        self._init_weights()
+    def _init_weights(self):
+        # simple initialization
+        torch.nn.init.normal_(self.pos_embed, std=0.02)
+        torch.nn.init.normal_(self.cls_token, std=0.02)
+        torch.nn.init.xavier_uniform_(self.head.weight)
+        torch.nn.init.normal_(self.head.bias, std=1e-6)
+    def forward(self, x):
+        # x shape: [B, 3, H, W]
+        B = x.shape[0]
+        x = self.patch_embed(x)  # -> [B, N, D]
+        cls_tokens = self.cls_token.expand(B, -1, -1)  # -> [B, 1, D]
+        x = torch.cat((cls_tokens, x), dim=1)  # -> [B, N+1, D]
+        x = x + self.pos_embed[:, :(x.size(1)), :]
+        x = self.pos_drop(x)
+        for blk in self.blocks:
+            x = blk(x)
+        x = self.norm(x)
+        # extract CLS token
+        cls_token_final = x[:, 0]
+        # classification
+        logits = self.head(cls_token_final)
+        return logits, cls_token_final

PictSure/model_embeddings.py ADDED Viewed

@@ -0,0 +1,47 @@
+import torch
+import torch.nn as nn
+from torchvision import models
+from .model_ViT import VisionTransformer
+def load_encoder(device="cpu"):
+    base_model = models.resnet18(pretrained=True)
+    encoder = ResNetWrapper(base_model).to(device)
+    return encoder
+class ResNetWrapper(nn.Module):
+    def __init__(self, classifier):
+        super(ResNetWrapper, self).__init__()
+        self.feature_extractor = nn.Sequential(*list(classifier.children())[:-1], torch.nn.Flatten())
+        self.latent_dim = self.feature_extractor(torch.zeros(1, 3, 224, 224)).shape[-1]
+    def forward(self, x):
+        num_images = x.size(1)
+        batch_size = x.size(0)
+        x = x.view(-1, 3, 224, 224)
+        x = self.feature_extractor(x)
+        x = x.view(batch_size, num_images, self.latent_dim)
+        return x
+    @property
+    def device(self):
+        return next(self.parameters()).device
+class VitNetWrapper(nn.Module):
+    def __init__(self, path, num_classes=1000):
+        super().__init__()
+        self.embedding = VisionTransformer(num_classes=num_classes)
+        if path:
+            self.embedding.load_state_dict(torch.load(path))
+        self.latent_dim = self.embedding.embed_dim
+    def forward(self, x):
+        num_images = x.size(1)
+        batch_size = x.size(0)
+        x = x.view(-1, 3, 224, 224)
+        x = self.embedding.forward(x)[1]
+        x = x.view(batch_size, num_images, self.latent_dim)
+        return x
+    @property
+    def device(self):
+        return next(self.parameters()).device

pictsure-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,148 @@
+Metadata-Version: 2.4
+Name: PictSure
+Version: 0.1.0
+Summary: A package for generalized image classification using In-Context-Learning with PyTorch.
+Author-email: Cornelius Wolff <cornelius.wolff@cwi.nl>, Lukas Schiesser <lukas.schiesser@dfki.de>
+License: MIT License
+        Copyright (c) 2025 Cornelius Wolff; Lukas Schiesser
+        Permission is hereby granted, free of charge, to any person obtaining a copy
+        of this software and associated documentation files (the "Software"), to deal
+        in the Software without restriction, including without limitation the rights
+        to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+        copies of the Software, and to permit persons to whom the Software is
+        furnished to do so, subject to the following conditions:
+        The above copyright notice and this permission notice shall be included in all
+        copies or substantial portions of the Software.
+        THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+        IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+        FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+        AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+        LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+        OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+        SOFTWARE.
+Classifier: Programming Language :: Python :: 3
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Requires-Python: >=3.9
+Description-Content-Type: text/markdown
+License-File: LICENSE
+Requires-Dist: torch>=2.7.0
+Requires-Dist: torchvision>=0.22.0
+Requires-Dist: numpy>=1.26.4
+Requires-Dist: Pillow
+Requires-Dist: click>=8.1.7
+Requires-Dist: tqdm>=4.66.4
+Requires-Dist: requests>=2.32.3
+Requires-Dist: huggingface-hub>=0.33.1
+Requires-Dist: safetensors>=0.5.3
+Dynamic: license-file
+# PictSure: In-Context Learning for Image Classification
+PictSure is a deep learning library designed for **in-context learning** using images and labels. It allows users to provide a set of labeled reference images and then predict labels for new images based on those references. This approach eliminates the need for traditional training, making it highly adaptable for various classification tasks.
+<p align="center">
+  <img src="images/Flow-Chart.png" alt="The classification process" width="90%" />
+</p>
+## Features
+- **In-Context Learning**: Predict labels for new images using a set of reference images without traditional model training.
+- **Multiple Model Architectures**: Choose between ResNet and ViT-based models for your specific needs.
+- **Pretrained Models**: Use our pretrained models or train your own.
+- **Torch Compatibility**: Fully integrated with PyTorch, supporting CPU and GPU.
+- **Easy-to-use CLI**: Manage models and weights through a simple command-line interface.
+## Installation
+1. Clone this repository
+```bash
+git clone https://git.ni.dfki.de/pictsure/pictsure-library
+```
+2. Navigate into the folder
+```bash
+cd pictsure-library
+```
+3. Install the pip package
+```bash
+pip install .
+```
+## Quick Start
+```python
+from PictSure import PictSure
+import torch
+# Initialize the model (using ViT as an example)
+model = PictSure(
+    embedding='vit',  # or 'resnet'
+    pretrained=True,  # use pretrained weights
+    device='cuda'     # or 'cpu'
+)
+# you can also pull our pre-trained models from Huggingface
+model = PictSure.from_pretrained("pictsure/pictsure-vit")
+# Set your reference images and labels
+model.set_context_images(reference_images, reference_labels)
+# Make predictions on new images
+predictions = model.predict(new_images)
+```
+## Command Line Interface
+PictSure comes with a command-line interface to manage models and weights:
+### List Available Models
+```bash
+pictsure list-models
+```
+This command shows all available models, their status (downloaded/not downloaded), and detailed information about each model.
+### Remove Model Weights
+```bash
+pictsure remove <model_name> [--force]
+```
+Remove the weights of a specific model. Available models are:
+- `ViTPreAll`: ViT-based model
+- `ResPreAll`: ResNet-based model
+Use the `--force` or `-f` flag to skip the confirmation prompt.
+## Examples
+For a complete working example, check out the Jupyter notebook in the Examples directory:
+```bash
+Examples/example.ipynb
+```
+This notebook demonstrates:
+- Model initialization
+- Loading and preprocessing images
+- Setting up reference images
+- Making predictions
+- Visualizing results
+## Citation
+If you use this work, please cite it using the following BibTeX entry:
+```bibtex
+@article{schiesser2025pictsure,
+  title={PictSure: Pretraining Embeddings Matters for In-Context Learning Image Classifiers},
+  author={Schiesser, Lukas and Wolff, Cornelius and Haas, Sophie and Pukrop, Simon},
+  journal={arXiv preprint arXiv:2506.14842},
+  year={2025}
+}
+```
+## License
+This project is open-source under the MIT License.
+## Contributing
+Contributions and suggestions are welcome! Open an issue or submit a pull request.
+## Contact
+For questions or support, open an issue on GitHub.

pictsure-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+Examples/example.py,sha256=kJl9EOYegRYnv_Q3enzxRw3_-1JahuXrBJlMKQ1lI9c,2480
+PictSure/__init__.py,sha256=JD4sCnhwmPdDvCv4D2Tm5ZlrTVY7Yi1jYNI91hYKR2Q,154
+PictSure/cli.py,sha256=mchQYMF-PXr9QhYGP05_EUkZRy99KB8NYx-Cfz01IAA,3317
+PictSure/model_PictSure.py,sha256=hAv2Wc7N1sX-OgPB1aTo9lJxc9uEu-unvqTH8tzcgXU,10727
+PictSure/model_ViT.py,sha256=7XXgyyZrT5v_1ReTwaCvR4EJ8VjXLBrmbPElu4reDMc,5372
+PictSure/model_embeddings.py,sha256=o8_T-JE7dOUB7QgTQHMIzjCbNSGQux3css84lZTNxTw,1531
+pictsure-0.1.0.dist-info/licenses/LICENSE,sha256=EWEw5rrEDvPxG3Wz_TSZJoGJ3J9k1Rv6yMluaBojABc,1089
+pictsure-0.1.0.dist-info/METADATA,sha256=RVV2j6gRZI7dfwyjYSPrvyFTf-Yoa-kFM7gOZ7TwcCg,5270
+pictsure-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+pictsure-0.1.0.dist-info/entry_points.txt,sha256=TROsY1gBQxYsQfyNHKCSwDF6sxsFJYWdRguAtR1O1ec,46
+pictsure-0.1.0.dist-info/top_level.txt,sha256=4c6FfUQfr4v2hzAizS1iifVQaGVSLWweO2DICgcIbe4,18
+pictsure-0.1.0.dist-info/RECORD,,

pictsure-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,5 @@
+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: true
+Tag: py3-none-any

pictsure-0.1.0.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ [console_scripts]
2	+ pictsure = PictSure.cli:cli

pictsure-0.1.0.dist-info/licenses/LICENSE ADDED Viewed

@@ -0,0 +1,21 @@
+MIT License
+Copyright (c) 2025 Cornelius Wolff; Lukas Schiesser
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

pictsure-0.1.0.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ Examples
2	+ PictSure