imageatlas 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
imageatlas/__init__.py CHANGED
@@ -2,7 +2,7 @@
2
2
  ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
3
3
  """
4
4
 
5
- __version__ = '0.1.0'
5
+ __version__ = '0.1.1'
6
6
 
7
7
 
8
8
  # 1. High level API (The everything tool)
@@ -1,7 +1,15 @@
1
+ """
2
+ Clustering Algorithms module.
3
+
4
+ This module provides various clustering algorithms with a unified interface for clustering
5
+ on image features.
6
+
7
+ """
1
8
  from .base import ClusteringResult, ClusteringAlgorithm
2
9
  from .kmeans import KMeansClustering
3
10
  from .hdbscan_clustering import HDBSCANClustering
4
11
  from .gmm import GMMClustering
12
+ from .factory import create_clustering_algorithm, get_available_algorithms
5
13
 
6
14
 
7
15
 
@@ -11,4 +19,6 @@ __all__ = [
11
19
  'KMeansClustering',
12
20
  'HDBSCANClustering',
13
21
  'GMMClustering',
22
+ 'create_clustering_algorithm',
23
+ 'get_available_algorithms'
14
24
  ]
@@ -8,7 +8,13 @@ import numpy as np
8
8
  class ClusteringResult:
9
9
  """
10
10
  Container for clustering Results.
11
+ Attributes:
12
+ cluster_labels: Array of cluster assignments for each sample.
13
+ cluster_dict: Dictionary mapping cluster IDs to list of sample indices.
14
+ n_clusters: Number of clusters found.
15
+ metadata: Additional algorithm-specific metadata.
11
16
  """
17
+
12
18
  cluster_labels: np.ndarray
13
19
  cluster_dict: Dict[int, List[int]]
14
20
  n_clusters: int
@@ -49,11 +55,18 @@ class ClusteringResult:
49
55
  class ClusteringAlgorithm(ABC):
50
56
  """
51
57
  Abstract base class for all clustering algorithms.
58
+
59
+ All the clustering algorithms must implement the fit_predict method and
60
+ provide a consistent interface for clustering operations.
52
61
  """
53
62
 
54
63
  def __init__(self, random_state=42, **kwargs):
55
64
  """
56
65
  Initialize the clustering algorithm.
66
+
67
+ Args:
68
+ random_state: Random seed for reproducibility.
69
+ **kwargs: Additional algorithm related parameters.
57
70
  """
58
71
  self.random_state = random_state
59
72
  self.params = kwargs
@@ -64,6 +77,12 @@ class ClusteringAlgorithm(ABC):
64
77
  def fit_predict(self, features) -> ClusteringResult:
65
78
  """
66
79
  Fit the clustering algorithms and predict cluster labels.
80
+
81
+ Args:
82
+ features: Feature matrix of shape (n_samples, n_features)
83
+
84
+ Returns:
85
+ ClusteringResult object containing cluster assignments and metadata.
67
86
  """
68
87
  pass
69
88
 
@@ -77,7 +96,14 @@ class ClusteringAlgorithm(ABC):
77
96
  def _validate_features(self, features:np.ndarray) -> None:
78
97
  """
79
98
  Validate the input feature matrix.
99
+
100
+ Args:
101
+ features: Feature matrix of shape (n_samples, n_features) to validate.
102
+
103
+ Raises:
104
+ ValueError: If features are invalid.
80
105
  """
106
+
81
107
  if not isinstance(features, np.ndarray):
82
108
  raise ValueError(f"Feature must be a numpy array, got {type(features)}")
83
109
 
@@ -93,6 +119,13 @@ class ClusteringAlgorithm(ABC):
93
119
  def _create_cluster_dict(self, cluster_labels, filenames=None):
94
120
  """
95
121
  Createa dictionary mapping cluster IDs to indices or filenames
122
+
123
+ Args:
124
+ cluster_labels: Array of cluster assignments.
125
+ filenames: Optional list of filenames corresponding to images.
126
+
127
+ Returns:
128
+ Dictionary mapping cluster IDs to lists of indices or filenames
96
129
  """
97
130
 
98
131
  cluster_dict = {}
@@ -22,6 +22,27 @@ def create_clustering_algorithm(
22
22
  ) -> ClusteringAlgorithm:
23
23
  """
24
24
  Factory function to create clustering algorithms.
25
+
26
+ Args:
27
+ method: Name of the clustering algorithm ('kmeans', 'gmm', 'hdbscan')
28
+ **kwargs: Algorithm specific parameters
29
+
30
+ Returns:
31
+ Instance of the requested clustering algorithm
32
+
33
+ Raises:
34
+ Value Error: If clustering method is not supported.
35
+
36
+
37
+ Examples:
38
+ >>> # Create KMeans with 5 clusters
39
+ >>> clusterer = create_clustering_algorithm('kmeans', n_clusters=5)
40
+
41
+ >>> # Create GMM with full covariance
42
+ >>> clusterer = create_clustering_algorithm('gmm', n_components=8, covariance_type='full')
43
+
44
+ >>> # Create HDBSCAN with auto parameters
45
+ >>> clusterer = create_clustering_algorithm('hdbscan', auto_params=True)
25
46
  """
26
47
 
27
48
  method = method.lower()
@@ -39,5 +60,8 @@ def create_clustering_algorithm(
39
60
  def get_available_algorithms():
40
61
  """
41
62
  Get a list of available clustering algorithms.
63
+
64
+ Returns:
65
+ List of algorithm names.
42
66
  """
43
67
  return sorted(CLUSTERING_ALGORITHMS.keys())
@@ -10,6 +10,14 @@ from .base import ClusteringAlgorithm, ClusteringResult
10
10
  class GMMClustering(ClusteringAlgorithm):
11
11
  """
12
12
  Gaussian Mixture Model clustering algorithm.
13
+
14
+ Args:
15
+ n_components: Number of mixture components (clusters)
16
+ covariance_type: Type of covarince parameters ('full', 'diag', 'tied', 'spherical')
17
+ max_iter: Maximum number of EM iterations
18
+ n_init: Number of initializations to perform
19
+ reg_covar: Regularization added to diagonal of covariance (prevents singular matrices)
20
+ random_state: Random seed for reproducibility
13
21
  """
14
22
 
15
23
  def __init__(
@@ -46,10 +54,16 @@ class GMMClustering(ClusteringAlgorithm):
46
54
 
47
55
  """
48
56
  Fit GMM and predict cluster labels.
57
+
58
+ Args:
59
+ features: Feature matrix of shape (n_samples, n_features)
60
+ filenames: Optional list of filenames for cluster mapping
61
+
62
+ Returns:
63
+ ClusteringResult object with cluster assignments.
49
64
  """
50
65
 
51
66
  self._validate_features(features)
52
- print('fshape: ', features.shape)
53
67
 
54
68
  n_samples = features.shape[0]
55
69
 
@@ -110,6 +124,15 @@ class GMMClustering(ClusteringAlgorithm):
110
124
  def predict(self, features):
111
125
  """
112
126
  Predict cluster label for new samples.
127
+
128
+ Args:
129
+ features: Feature matrix of shape (n_samples, n_features)
130
+
131
+ Returns:
132
+ Array of cluster labels
133
+
134
+ Raises:
135
+ RuntimeError: If model has not been fitted yet.
113
136
  """
114
137
 
115
138
  if not self.is_fitted or self._model is None:
@@ -121,6 +144,15 @@ class GMMClustering(ClusteringAlgorithm):
121
144
  def predict_proba(self, features):
122
145
  """
123
146
  Predict probability of each cluster for new samples.
147
+
148
+ Args:
149
+ features: Feature matrix of shape (n_samples, n_features)
150
+
151
+ Returns:
152
+ Array of cluster labels
153
+
154
+ Raises:
155
+ RuntimeError: If model has not been fitted yet.
124
156
  """
125
157
 
126
158
  if not self.is_fitted or self._model is None:
@@ -132,6 +164,9 @@ class GMMClustering(ClusteringAlgorithm):
132
164
  def get_cluster_means(self):
133
165
  """
134
166
  Get cluster means (centers) if model is fitted.
167
+
168
+ Returns:
169
+ Array of cluster centers or None if not fitted.
135
170
  """
136
171
 
137
172
  if self.is_fitted and self._model is not None:
@@ -142,6 +177,12 @@ class GMMClustering(ClusteringAlgorithm):
142
177
  def score(self, features):
143
178
  """
144
179
  Compute the log-likelihood of the data under the model.
180
+
181
+ Args:
182
+ features: Feature matrix of shape (n_samples, n_features)
183
+
184
+ Returns:
185
+ Log-likelihood score
145
186
  """
146
187
 
147
188
  if not self.is_fitted or self._model is None:
@@ -7,7 +7,16 @@ from .base import ClusteringAlgorithm, ClusteringResult
7
7
 
8
8
  class HDBSCANClustering(ClusteringAlgorithm):
9
9
  """
10
- HDBSCAN algorithm.
10
+ HDBSCAN (Hierarchical Density-Based Spatial Clustering) Algorithm.
11
+
12
+ Args:
13
+ min_cluster_size: Minimum number of samples in a cluster
14
+ min_samples: Number of samples in a neighborhood for core points.
15
+ metric: Distance metric to use
16
+ cluster_selection_method: Method for selecting clusters ('eom' or 'leaf')
17
+ auto_params: Whether to automatically set parameters based on dataset size
18
+ random_state: Random seed (note: HDBSCAN is deterministic, this is for consistency)
19
+
11
20
  """
12
21
 
13
22
  def __init__(
@@ -37,6 +46,12 @@ class HDBSCANClustering(ClusteringAlgorithm):
37
46
  def _auto_select_params(self, n_samples):
38
47
  """
39
48
  Automatically select HDBSCAN parameters based on dataset size.
49
+
50
+ Args:
51
+ n_samples: Number of samples in the dataset.
52
+
53
+ Returns:
54
+ Tuple of (min_cluster_size, min_samples)
40
55
  """
41
56
 
42
57
  if n_samples < 100:
@@ -62,6 +77,10 @@ class HDBSCANClustering(ClusteringAlgorithm):
62
77
 
63
78
  """
64
79
  Fit HDBSCAN and predict cluster labels.
80
+
81
+ Args:
82
+ features: Feature matrix of shape (n_samples, n_features)
83
+ filenames: Optional list of filenames for cluster mapping.
65
84
  """
66
85
 
67
86
  try:
@@ -137,6 +156,11 @@ class HDBSCANClustering(ClusteringAlgorithm):
137
156
  def get_outlier_score(self):
138
157
  """
139
158
  Get outlier score for each sample.
159
+
160
+ Higher scores indicate more likely outliers.
161
+
162
+ Returns:
163
+ Array of outlier scores or None if model is not fitted.
140
164
  """
141
165
 
142
166
  if self.is_fitted and self._model is not None:
@@ -147,6 +171,9 @@ class HDBSCANClustering(ClusteringAlgorithm):
147
171
  def get_condensed_tree(self):
148
172
  """
149
173
  Get condensed cluster hierarchy tree.
174
+
175
+ Returns:
176
+ Array of membership probabilities or None if model not fitted.
150
177
  """
151
178
 
152
179
  if self.is_fitted and self._model is not None:
@@ -10,6 +10,14 @@ from typing import Optional
10
10
  class KMeansClustering(ClusteringAlgorithm):
11
11
  """
12
12
  K-Means clustering algorithm.
13
+
14
+ Args:
15
+ n_clusters: Number of clusters to form
16
+ n_init: Number of times to run with different centroid seeds
17
+ max_iter: Maximum number of iterations
18
+ use_minibatch: Whether to use MiniBatchKMeans for large datasets
19
+ batch_size: Batch size for MiniBatchKMeans
20
+ random_state: Random seed for reproducibility
13
21
  """
14
22
 
15
23
  def __init__(
@@ -42,6 +50,13 @@ class KMeansClustering(ClusteringAlgorithm):
42
50
 
43
51
  """
44
52
  Fit K-Means and predict cluster labels.
53
+
54
+ Args:
55
+ features: Feature matrix of shape (n_samples, n_features)
56
+ filenames: Optional list of filenames for cluster mapping
57
+
58
+ Returns:
59
+ ClusteringResult object with cluster assignments.
45
60
  """
46
61
 
47
62
  self._validate_features(features)
@@ -108,6 +123,15 @@ class KMeansClustering(ClusteringAlgorithm):
108
123
  def predict(self, features):
109
124
  """
110
125
  Predict cluster label for new samples.
126
+
127
+ Args:
128
+ features: Feature matrix of shape (n_samples, n_features)
129
+
130
+ Returns:
131
+ Array of cluster labels
132
+
133
+ Raises:
134
+ RuntimeError: If model has not yet been fitted.
111
135
  """
112
136
 
113
137
  if not self.is_fitted or self._model == None:
@@ -119,6 +143,9 @@ class KMeansClustering(ClusteringAlgorithm):
119
143
  def get_cluster_centers(self):
120
144
  """
121
145
  Get cluster centers if model is fitted.
146
+
147
+ Returns:
148
+ Array of cluster centers or None if not fitted.
122
149
  """
123
150
  if self.is_fitted and self._model is not None:
124
151
  return self._model.cluster_centers_
@@ -121,7 +121,7 @@ class HDF5Cache(FeatureCache):
121
121
  path = path + ".h5"
122
122
 
123
123
  if not self.exists(path):
124
- raise FileNotFoundError("fCache file not found: {path}")
124
+ raise FileNotFoundError(f"Cache file not found: {path}")
125
125
 
126
126
  with h5py.File(path, 'r') as f:
127
127
  # Load filenames
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: imageatlas
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
5
5
  Author-email: Ahmad Javed <ahmadjaved97@gmail.com>
6
6
  Maintainer-email: Ahmad Javed <ahmadjaved97@gmail.com>
@@ -63,6 +63,7 @@ Requires-Dist: openpyxl; extra == "full"
63
63
  Dynamic: license-file
64
64
 
65
65
  # ImageAtlas
66
+ [![PyPI Downloads](https://static.pepy.tech/personalized-badge/imageatlas?period=total&units=INTERNATIONAL_SYSTEM&left_color=BLACK&right_color=GREEN&left_text=Downloads)](https://pepy.tech/projects/imageatlas)
66
67
 
67
68
  ## Overview
68
69
 
@@ -86,6 +87,12 @@ pip install imageatlas
86
87
  pip install imageatlas[full]
87
88
  ```
88
89
 
90
+ **Note on CLIP**: If you wish to use the CLIP model, you must install it manually from GitHub using:
91
+
92
+ ```
93
+ pip install git+https://github.com/openai/CLIP.git
94
+ ```
95
+
89
96
  **From Source**
90
97
  ```
91
98
  git clone https://github.com/ahmadjaved97/ImageAtlas.git
@@ -1,17 +1,17 @@
1
- imageatlas/__init__.py,sha256=DXFdfWA3Q9WFnWABC-WiTvAGjBVYByukgg_C0sj2Cjk,1062
2
- imageatlas/clustering/__init__.py,sha256=43lfR8IXne6EY1syAXLwmUaBlBLZSsxPckS0_ENlC48,311
3
- imageatlas/clustering/base.py,sha256=bCo2gfcAWH-K0kPVwB693dnnRLbJT-ZzEQsjoBP2p70,3915
4
- imageatlas/clustering/factory.py,sha256=QRvqo0vTLme_O_KH-eUu-eUe_iJMuj7I73qosswHvHA,1069
5
- imageatlas/clustering/gmm.py,sha256=FF7xJgoW08cxU-l9-uwV3f0Md05CqOad2d8fVBSxJSk,5064
6
- imageatlas/clustering/hdbscan_clustering.py,sha256=HwbBIP60YgsRU77jbTpPXg87-IQqIrbJP1fwohbNAXQ,5256
7
- imageatlas/clustering/kmeans.py,sha256=BGcQ9jGEOTUkBJ60Fk1rVJiwhh5Nch7m-A4jxV3us-E,4138
1
+ imageatlas/__init__.py,sha256=86BnEYgtEqL1tli1rI71ICbgpSCR8bwaSwVghk8ZG7I,1062
2
+ imageatlas/clustering/__init__.py,sha256=5BlL9QeyQbml08f8YfHtrSpLUfwb5tlFRc7u_VRtRsc,602
3
+ imageatlas/clustering/base.py,sha256=rpy_JI6nUakwF7qg1vUjy40FDqr7WyXu7W8dJvRd2qU,5082
4
+ imageatlas/clustering/factory.py,sha256=h_NRt-edgJh-5jzfDYIuv5eeXgV-mQiJCPzj29YjV_E,1822
5
+ imageatlas/clustering/gmm.py,sha256=heADnjFuQfZHxsedOmDLwORW2o1533sHrRGVOsjRUPE,6368
6
+ imageatlas/clustering/hdbscan_clustering.py,sha256=H8Yj74XCiCBnuVhqygGL4gHFaZrBiNwl14GiedmyQMI,6291
7
+ imageatlas/clustering/kmeans.py,sha256=M5ibQpgC43FQbEzDwKBPj4c4fjzWT3h3SvY670b9vRs,5043
8
8
  imageatlas/core/__init__.py,sha256=FnKCmANLS0flQzoPNAwTJbIJvn0JQXTlXDQ5n6F5rWo,347
9
9
  imageatlas/core/clusterer.py,sha256=-q6wovIfOhNJWwaU9sV1A9dTeksWFkCg8hZ14QwdDXM,11661
10
10
  imageatlas/core/results.py,sha256=jekDXZG4bjcmmsob21QKwWrRBucIxekEpz90E_uEnWs,11372
11
11
  imageatlas/features/__init__.py,sha256=Zk2IzFNhULQvzQWjscz2q9-lorpPHeARseoo-TwBJwU,442
12
12
  imageatlas/features/adapter.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
13
13
  imageatlas/features/batch.py,sha256=Xq8-qMV16L6JUyRXmSeAtDzcfxNCTN7T7JBVxgDGu88,4163
14
- imageatlas/features/cache.py,sha256=8g3g2uhVxVlwnzzBoj7NrAquFH7Mr9NzknN5du9JiL4,7273
14
+ imageatlas/features/cache.py,sha256=_teubTZ0wUmzXgXHD5REnmM-emBJYYxzYAWFY7WijNI,7273
15
15
  imageatlas/features/loaders.py,sha256=r8srbGXGnolj06HGJZjEShjnZLseAmyK66LeRvqgSHo,4816
16
16
  imageatlas/features/metadata.py,sha256=NQLg4aE-lcGxrOzDMWThF2zroQrYB17HvvARE-uLGxw,2338
17
17
  imageatlas/features/pipeline.py,sha256=wk3xlUbK8OrrwM6h7X0f0vbeFVcg6Z4gpt8Uzbp3tsQ,9666
@@ -35,8 +35,8 @@ imageatlas/reduction/tsne.py,sha256=Ra5vq8sWfGQ_0nfLL_QcO0zg6BIe_nhn7D948Hy_LAY,
35
35
  imageatlas/reduction/umap_reducer.py,sha256=Lwu5_lDZt9CKBFmu7qpKYwyY2grChUMKwHi4129fxcQ,2951
36
36
  imageatlas/visualization/__init__.py,sha256=sWZUMQn3p3s9IYuksZ0tInifnM4QwYTQIZpvL0GrwOc,171
37
37
  imageatlas/visualization/grids.py,sha256=MITFnFo81yua7VG2tIsC8obXr7Tf24XxYw2oCNFQFnU,5456
38
- imageatlas-0.1.0.dist-info/licenses/LICENSE,sha256=FM0ees3eP8Mm6C2J9euHxj8RjBIPQk5EWLxk4bYfez0,1068
39
- imageatlas-0.1.0.dist-info/METADATA,sha256=Ba25kLzexTq_YU0beuFcAVkr-Cr1SnHP2t_gcIxlUJQ,7910
40
- imageatlas-0.1.0.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
41
- imageatlas-0.1.0.dist-info/top_level.txt,sha256=jB6Ct7oH-wRZOSCZpFKo-yXZtkYcfq3ucb6eqI3JWig,11
42
- imageatlas-0.1.0.dist-info/RECORD,,
38
+ imageatlas-0.1.1.dist-info/licenses/LICENSE,sha256=FM0ees3eP8Mm6C2J9euHxj8RjBIPQk5EWLxk4bYfez0,1068
39
+ imageatlas-0.1.1.dist-info/METADATA,sha256=TynA0G4dzDJzPsHnJa7Xy_JwavL9tlxL5BC4saqd2T8,8282
40
+ imageatlas-0.1.1.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
41
+ imageatlas-0.1.1.dist-info/top_level.txt,sha256=jB6Ct7oH-wRZOSCZpFKo-yXZtkYcfq3ucb6eqI3JWig,11
42
+ imageatlas-0.1.1.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5