PyPI - imageatlas - Versions diffs - 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl - Mend

imageatlas 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

imageatlas/__init__.py +1 -1
imageatlas/clustering/__init__.py +10 -0
imageatlas/clustering/base.py +33 -0
imageatlas/clustering/factory.py +24 -0
imageatlas/clustering/gmm.py +42 -1
imageatlas/clustering/hdbscan_clustering.py +28 -1
imageatlas/clustering/kmeans.py +27 -0
imageatlas/features/cache.py +1 -1
{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/METADATA +8 -1
{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/RECORD +13 -13
{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/WHEEL +1 -1
{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/licenses/LICENSE +0 -0
{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/top_level.txt +0 -0

imageatlas/__init__.py CHANGED Viewed

@@ -2,7 +2,7 @@
 ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
 """
-__version__ = '0.1.0'
+__version__ = '0.1.1'
 # 1. High level API (The everything tool)

imageatlas/clustering/__init__.py CHANGED Viewed

@@ -1,7 +1,15 @@
+"""
+Clustering Algorithms module.
+This module provides various clustering algorithms with a unified interface for clustering
+on image features.
+"""
 from .base import ClusteringResult, ClusteringAlgorithm
 from .kmeans import KMeansClustering
 from .hdbscan_clustering import HDBSCANClustering
 from .gmm import GMMClustering
+from .factory import create_clustering_algorithm, get_available_algorithms
@@ -11,4 +19,6 @@ __all__ = [
     'KMeansClustering',
     'HDBSCANClustering',
     'GMMClustering',
+    'create_clustering_algorithm',
+    'get_available_algorithms'
 ]

imageatlas/clustering/base.py CHANGED Viewed

@@ -8,7 +8,13 @@ import numpy as np
 class ClusteringResult:
     """
     Container for clustering Results.
+    Attributes:
+       cluster_labels: Array of cluster assignments for each sample.
+       cluster_dict: Dictionary mapping cluster IDs to list of sample indices.
+       n_clusters: Number of clusters found.
+       metadata: Additional algorithm-specific metadata.
     """
     cluster_labels: np.ndarray
     cluster_dict: Dict[int, List[int]]
     n_clusters: int
@@ -49,11 +55,18 @@ class ClusteringResult:
 class ClusteringAlgorithm(ABC):
     """
     Abstract base class for all clustering algorithms.
+    All the clustering algorithms must implement the fit_predict method and
+    provide a consistent interface for clustering operations.
     """
     def __init__(self, random_state=42, **kwargs):
         """
         Initialize the clustering algorithm.
+        Args:
+            random_state: Random seed for reproducibility.
+            **kwargs: Additional algorithm related parameters.
         """
         self.random_state = random_state
         self.params = kwargs
@@ -64,6 +77,12 @@ class ClusteringAlgorithm(ABC):
     def fit_predict(self, features) -> ClusteringResult:
         """
         Fit the clustering algorithms and predict cluster labels.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+        Returns:
+            ClusteringResult object containing cluster assignments and metadata.
         """
         pass
@@ -77,7 +96,14 @@ class ClusteringAlgorithm(ABC):
     def _validate_features(self, features:np.ndarray) -> None:
         """
         Validate the input feature matrix.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features) to validate.
+        Raises:
+            ValueError: If features are invalid.
         """
         if not isinstance(features, np.ndarray):
             raise ValueError(f"Feature must be a numpy array, got {type(features)}")
@@ -93,6 +119,13 @@ class ClusteringAlgorithm(ABC):
     def _create_cluster_dict(self, cluster_labels, filenames=None):
         """
         Createa dictionary mapping cluster IDs to indices or filenames
+        Args:
+            cluster_labels: Array of cluster assignments.
+            filenames: Optional list of filenames corresponding to images.
+        Returns:
+            Dictionary mapping cluster IDs to lists of indices or filenames
         """
         cluster_dict = {}

imageatlas/clustering/factory.py CHANGED Viewed

@@ -22,6 +22,27 @@ def create_clustering_algorithm(
 ) -> ClusteringAlgorithm:
     """
     Factory function to create clustering algorithms.
+    Args:
+        method: Name of the clustering algorithm ('kmeans', 'gmm', 'hdbscan')
+        **kwargs: Algorithm specific parameters
+    Returns:
+        Instance of the requested clustering algorithm
+    Raises:
+        Value Error: If clustering method is not supported.
+    Examples:
+        >>> # Create KMeans with 5 clusters
+        >>> clusterer = create_clustering_algorithm('kmeans', n_clusters=5)
+        >>>  # Create GMM with full covariance
+        >>> clusterer = create_clustering_algorithm('gmm', n_components=8, covariance_type='full')
+        >>> # Create HDBSCAN with auto parameters
+        >>> clusterer = create_clustering_algorithm('hdbscan', auto_params=True)
     """
     method = method.lower()
@@ -39,5 +60,8 @@ def create_clustering_algorithm(
 def get_available_algorithms():
     """
     Get a list of available clustering algorithms.
+    Returns:
+        List of algorithm names.
     """
     return sorted(CLUSTERING_ALGORITHMS.keys())

imageatlas/clustering/gmm.py CHANGED Viewed

@@ -10,6 +10,14 @@ from .base import ClusteringAlgorithm, ClusteringResult
 class GMMClustering(ClusteringAlgorithm):
     """
     Gaussian Mixture Model clustering algorithm.
+    Args:
+        n_components: Number of mixture components (clusters)
+        covariance_type: Type of covarince parameters ('full', 'diag', 'tied', 'spherical')
+        max_iter: Maximum number of EM iterations
+        n_init: Number of initializations to perform
+        reg_covar: Regularization added to diagonal of covariance (prevents singular matrices)
+        random_state: Random seed for reproducibility
     """
     def __init__(
@@ -46,10 +54,16 @@ class GMMClustering(ClusteringAlgorithm):
         """
         Fit GMM and predict cluster labels.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+            filenames: Optional list of filenames for cluster mapping
+        Returns:
+            ClusteringResult object with cluster assignments.
         """
         self._validate_features(features)
-        print('fshape: ', features.shape)
         n_samples = features.shape[0]
@@ -110,6 +124,15 @@ class GMMClustering(ClusteringAlgorithm):
     def predict(self, features):
         """
         Predict cluster label for new samples.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+        Returns:
+            Array of cluster labels
+        Raises:
+            RuntimeError: If model has not been fitted yet.
         """
         if not self.is_fitted or self._model is None:
@@ -121,6 +144,15 @@ class GMMClustering(ClusteringAlgorithm):
     def predict_proba(self, features):
         """
         Predict probability of each cluster for new samples.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+        Returns:
+            Array of cluster labels
+        Raises:
+            RuntimeError: If model has not been fitted yet.
         """
         if not self.is_fitted or self._model is None:
@@ -132,6 +164,9 @@ class GMMClustering(ClusteringAlgorithm):
     def get_cluster_means(self):
         """
         Get cluster means (centers) if model is fitted.
+        Returns:
+            Array of cluster centers or None if not fitted.
         """
         if self.is_fitted and self._model is not None:
@@ -142,6 +177,12 @@ class GMMClustering(ClusteringAlgorithm):
     def score(self, features):
         """
         Compute the log-likelihood of the data under the model.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+        Returns:
+            Log-likelihood score
         """
         if not self.is_fitted or self._model is None:

imageatlas/clustering/hdbscan_clustering.py CHANGED Viewed

@@ -7,7 +7,16 @@ from .base import ClusteringAlgorithm, ClusteringResult
 class HDBSCANClustering(ClusteringAlgorithm):
     """
-    HDBSCAN algorithm.
+    HDBSCAN (Hierarchical Density-Based Spatial Clustering) Algorithm.
+    Args:
+        min_cluster_size: Minimum number of samples in a cluster
+        min_samples: Number of samples in a neighborhood for core points.
+        metric: Distance metric to use
+        cluster_selection_method: Method for selecting clusters ('eom' or 'leaf')
+        auto_params: Whether to automatically set parameters based on dataset size
+        random_state: Random seed (note: HDBSCAN is deterministic, this is for consistency)
     """
     def __init__(
@@ -37,6 +46,12 @@ class HDBSCANClustering(ClusteringAlgorithm):
     def _auto_select_params(self, n_samples):
         """
         Automatically select HDBSCAN parameters based on dataset size.
+        Args:
+            n_samples: Number of samples in the dataset.
+        Returns:
+            Tuple of (min_cluster_size, min_samples)
         """
         if n_samples < 100:
@@ -62,6 +77,10 @@ class HDBSCANClustering(ClusteringAlgorithm):
         """
         Fit HDBSCAN and predict cluster labels.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+            filenames: Optional list of filenames for cluster mapping.
         """
         try:
@@ -137,6 +156,11 @@ class HDBSCANClustering(ClusteringAlgorithm):
     def get_outlier_score(self):
         """
         Get outlier score for each sample.
+        Higher scores indicate more likely outliers.
+        Returns:
+            Array of outlier scores or None if model is not fitted.
         """
         if self.is_fitted and self._model is not None:
@@ -147,6 +171,9 @@ class HDBSCANClustering(ClusteringAlgorithm):
     def get_condensed_tree(self):
         """
         Get condensed cluster hierarchy tree.
+        Returns:
+            Array of membership probabilities or None if model not fitted.
         """
         if self.is_fitted and self._model is not None:

imageatlas/clustering/kmeans.py CHANGED Viewed

@@ -10,6 +10,14 @@ from typing import Optional
 class KMeansClustering(ClusteringAlgorithm):
     """
     K-Means clustering algorithm.
+    Args:
+        n_clusters: Number of clusters to form
+        n_init: Number of times to run with different centroid seeds
+        max_iter: Maximum number of iterations
+        use_minibatch: Whether to use MiniBatchKMeans for large datasets
+        batch_size: Batch size for MiniBatchKMeans
+        random_state: Random seed for reproducibility
     """
     def __init__(
@@ -42,6 +50,13 @@ class KMeansClustering(ClusteringAlgorithm):
         """
         Fit K-Means and predict cluster labels.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+            filenames: Optional list of filenames for cluster mapping
+        Returns:
+            ClusteringResult object with cluster assignments.
         """
         self._validate_features(features)
@@ -108,6 +123,15 @@ class KMeansClustering(ClusteringAlgorithm):
     def predict(self, features):
         """
         Predict cluster label for new samples.
+        Args:
+            features: Feature matrix of shape (n_samples, n_features)
+        Returns:
+            Array of cluster labels
+        Raises:
+            RuntimeError: If model has not yet been fitted.
         """
         if not self.is_fitted or self._model == None:
@@ -119,6 +143,9 @@ class KMeansClustering(ClusteringAlgorithm):
     def get_cluster_centers(self):
         """
         Get cluster centers if model is fitted.
+        Returns:
+            Array of cluster centers or None if not fitted.
         """
         if self.is_fitted and self._model is not None:
             return self._model.cluster_centers_

imageatlas/features/cache.py CHANGED Viewed

@@ -121,7 +121,7 @@ class HDF5Cache(FeatureCache):
             path = path + ".h5"
         if not self.exists(path):
-            raise FileNotFoundError("fCache file not found: {path}")
+            raise FileNotFoundError(f"Cache file not found: {path}")
         with h5py.File(path, 'r') as f:
             # Load filenames

{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: imageatlas
-Version: 0.1.0
+Version: 0.1.1
 Summary: ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
 Author-email: Ahmad Javed <ahmadjaved97@gmail.com>
 Maintainer-email: Ahmad Javed <ahmadjaved97@gmail.com>
@@ -63,6 +63,7 @@ Requires-Dist: openpyxl; extra == "full"
 Dynamic: license-file
 # ImageAtlas
+[![PyPI Downloads](https://static.pepy.tech/personalized-badge/imageatlas?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=Downloads)](https://pepy.tech/projects/imageatlas)
 ## Overview
@@ -86,6 +87,12 @@ pip install imageatlas
 pip install imageatlas[full]
 ```
+**Note on CLIP**: If you wish to use the CLIP model, you must install it manually from GitHub using:
+```
+pip install git+https://github.com/openai/CLIP.git
+```
 **From Source**
 ```
 git clone https://github.com/ahmadjaved97/ImageAtlas.git

{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/RECORD RENAMED Viewed

@@ -1,17 +1,17 @@
-imageatlas/__init__.py,sha256=DXFdfWA3Q9WFnWABC-WiTvAGjBVYByukgg_C0sj2Cjk,1062
-imageatlas/clustering/__init__.py,sha256=43lfR8IXne6EY1syAXLwmUaBlBLZSsxPckS0_ENlC48,311
-imageatlas/clustering/base.py,sha256=bCo2gfcAWH-K0kPVwB693dnnRLbJT-ZzEQsjoBP2p70,3915
-imageatlas/clustering/factory.py,sha256=QRvqo0vTLme_O_KH-eUu-eUe_iJMuj7I73qosswHvHA,1069
-imageatlas/clustering/gmm.py,sha256=FF7xJgoW08cxU-l9-uwV3f0Md05CqOad2d8fVBSxJSk,5064
-imageatlas/clustering/hdbscan_clustering.py,sha256=HwbBIP60YgsRU77jbTpPXg87-IQqIrbJP1fwohbNAXQ,5256
-imageatlas/clustering/kmeans.py,sha256=BGcQ9jGEOTUkBJ60Fk1rVJiwhh5Nch7m-A4jxV3us-E,4138
+imageatlas/__init__.py,sha256=86BnEYgtEqL1tli1rI71ICbgpSCR8bwaSwVghk8ZG7I,1062
+imageatlas/clustering/__init__.py,sha256=5BlL9QeyQbml08f8YfHtrSpLUfwb5tlFRc7u_VRtRsc,602
+imageatlas/clustering/base.py,sha256=rpy_JI6nUakwF7qg1vUjy40FDqr7WyXu7W8dJvRd2qU,5082
+imageatlas/clustering/factory.py,sha256=h_NRt-edgJh-5jzfDYIuv5eeXgV-mQiJCPzj29YjV_E,1822
+imageatlas/clustering/gmm.py,sha256=heADnjFuQfZHxsedOmDLwORW2o1533sHrRGVOsjRUPE,6368
+imageatlas/clustering/hdbscan_clustering.py,sha256=H8Yj74XCiCBnuVhqygGL4gHFaZrBiNwl14GiedmyQMI,6291
+imageatlas/clustering/kmeans.py,sha256=M5ibQpgC43FQbEzDwKBPj4c4fjzWT3h3SvY670b9vRs,5043
 imageatlas/core/__init__.py,sha256=FnKCmANLS0flQzoPNAwTJbIJvn0JQXTlXDQ5n6F5rWo,347
 imageatlas/core/clusterer.py,sha256=-q6wovIfOhNJWwaU9sV1A9dTeksWFkCg8hZ14QwdDXM,11661
 imageatlas/core/results.py,sha256=jekDXZG4bjcmmsob21QKwWrRBucIxekEpz90E_uEnWs,11372
 imageatlas/features/__init__.py,sha256=Zk2IzFNhULQvzQWjscz2q9-lorpPHeARseoo-TwBJwU,442
 imageatlas/features/adapter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 imageatlas/features/batch.py,sha256=Xq8-qMV16L6JUyRXmSeAtDzcfxNCTN7T7JBVxgDGu88,4163
-imageatlas/features/cache.py,sha256=8g3g2uhVxVlwnzzBoj7NrAquFH7Mr9NzknN5du9JiL4,7273
+imageatlas/features/cache.py,sha256=_teubTZ0wUmzXgXHD5REnmM-emBJYYxzYAWFY7WijNI,7273
 imageatlas/features/loaders.py,sha256=r8srbGXGnolj06HGJZjEShjnZLseAmyK66LeRvqgSHo,4816
 imageatlas/features/metadata.py,sha256=NQLg4aE-lcGxrOzDMWThF2zroQrYB17HvvARE-uLGxw,2338
 imageatlas/features/pipeline.py,sha256=wk3xlUbK8OrrwM6h7X0f0vbeFVcg6Z4gpt8Uzbp3tsQ,9666
@@ -35,8 +35,8 @@ imageatlas/reduction/tsne.py,sha256=Ra5vq8sWfGQ_0nfLL_QcO0zg6BIe_nhn7D948Hy_LAY,
 imageatlas/reduction/umap_reducer.py,sha256=Lwu5_lDZt9CKBFmu7qpKYwyY2grChUMKwHi4129fxcQ,2951
 imageatlas/visualization/__init__.py,sha256=sWZUMQn3p3s9IYuksZ0tInifnM4QwYTQIZpvL0GrwOc,171
 imageatlas/visualization/grids.py,sha256=MITFnFo81yua7VG2tIsC8obXr7Tf24XxYw2oCNFQFnU,5456
-imageatlas-0.1.0.dist-info/licenses/LICENSE,sha256=FM0ees3eP8Mm6C2J9euHxj8RjBIPQk5EWLxk4bYfez0,1068
-imageatlas-0.1.0.dist-info/METADATA,sha256=Ba25kLzexTq_YU0beuFcAVkr-Cr1SnHP2t_gcIxlUJQ,7910
-imageatlas-0.1.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-imageatlas-0.1.0.dist-info/top_level.txt,sha256=jB6Ct7oH-wRZOSCZpFKo-yXZtkYcfq3ucb6eqI3JWig,11
-imageatlas-0.1.0.dist-info/RECORD,,
+imageatlas-0.1.1.dist-info/licenses/LICENSE,sha256=FM0ees3eP8Mm6C2J9euHxj8RjBIPQk5EWLxk4bYfez0,1068
+imageatlas-0.1.1.dist-info/METADATA,sha256=TynA0G4dzDJzPsHnJa7Xy_JwavL9tlxL5BC4saqd2T8,8282
+imageatlas-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+imageatlas-0.1.1.dist-info/top_level.txt,sha256=jB6Ct7oH-wRZOSCZpFKo-yXZtkYcfq3ucb6eqI3JWig,11
+imageatlas-0.1.1.dist-info/RECORD,,

{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{imageatlas-0.1.0.dist-info → imageatlas-0.1.1.dist-info}/top_level.txt RENAMED Viewed

File without changes

imageatlas 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

imageatlas 0.1.0py3-none-any.whl → 0.1.1py3-none-any.whl