imageatlas 0.1.1__tar.gz → 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {imageatlas-0.1.1 → imageatlas-0.1.2}/PKG-INFO +1 -1
  2. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/__init__.py +1 -1
  3. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/batch.py +23 -1
  4. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/cache.py +38 -0
  5. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/loaders.py +52 -0
  6. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/metadata.py +3 -0
  7. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/pipeline.py +50 -0
  8. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas.egg-info/PKG-INFO +1 -1
  9. {imageatlas-0.1.1 → imageatlas-0.1.2}/CHANGELOG.md +0 -0
  10. {imageatlas-0.1.1 → imageatlas-0.1.2}/CONTRIBUTING.md +0 -0
  11. {imageatlas-0.1.1 → imageatlas-0.1.2}/LICENSE +0 -0
  12. {imageatlas-0.1.1 → imageatlas-0.1.2}/MANIFEST.in +0 -0
  13. {imageatlas-0.1.1 → imageatlas-0.1.2}/README.md +0 -0
  14. {imageatlas-0.1.1 → imageatlas-0.1.2}/examples/example_apis.ipynb +0 -0
  15. {imageatlas-0.1.1 → imageatlas-0.1.2}/examples/example_complete_workflow.py +0 -0
  16. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/__init__.py +0 -0
  17. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/base.py +0 -0
  18. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/factory.py +0 -0
  19. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/gmm.py +0 -0
  20. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/hdbscan_clustering.py +0 -0
  21. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/clustering/kmeans.py +0 -0
  22. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/core/__init__.py +0 -0
  23. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/core/clusterer.py +0 -0
  24. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/core/results.py +0 -0
  25. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/__init__.py +0 -0
  26. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/adapter.py +0 -0
  27. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/__init__.py +0 -0
  28. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/base.py +0 -0
  29. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/clip.py +0 -0
  30. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/convnext.py +0 -0
  31. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/dinov2.py +0 -0
  32. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/efficientnet.py +0 -0
  33. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/factory.py +0 -0
  34. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/mobilenet.py +0 -0
  35. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/resnet.py +0 -0
  36. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/swin.py +0 -0
  37. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/vgg.py +0 -0
  38. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/features/extractors/vit.py +0 -0
  39. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/__init__.py +0 -0
  40. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/base.py +0 -0
  41. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/factory.py +0 -0
  42. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/pca.py +0 -0
  43. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/tsne.py +0 -0
  44. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/reduction/umap_reducer.py +0 -0
  45. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/visualization/__init__.py +0 -0
  46. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas/visualization/grids.py +0 -0
  47. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas.egg-info/SOURCES.txt +0 -0
  48. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas.egg-info/dependency_links.txt +0 -0
  49. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas.egg-info/requires.txt +0 -0
  50. {imageatlas-0.1.1 → imageatlas-0.1.2}/imageatlas.egg-info/top_level.txt +0 -0
  51. {imageatlas-0.1.1 → imageatlas-0.1.2}/pyproject.toml +0 -0
  52. {imageatlas-0.1.1 → imageatlas-0.1.2}/requirements.txt +0 -0
  53. {imageatlas-0.1.1 → imageatlas-0.1.2}/setup.cfg +0 -0
  54. {imageatlas-0.1.1 → imageatlas-0.1.2}/tests/test_batch_processing.py +0 -0
  55. {imageatlas-0.1.1 → imageatlas-0.1.2}/tests/test_core_api.py +0 -0
  56. {imageatlas-0.1.1 → imageatlas-0.1.2}/tests/test_features_pipeline.py +0 -0
  57. {imageatlas-0.1.1 → imageatlas-0.1.2}/tests/test_reduction_module.py +0 -0
  58. {imageatlas-0.1.1 → imageatlas-0.1.2}/tests/test_visualization.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: imageatlas
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
5
5
  Author-email: Ahmad Javed <ahmadjaved97@gmail.com>
6
6
  Maintainer-email: Ahmad Javed <ahmadjaved97@gmail.com>
@@ -2,7 +2,7 @@
2
2
  ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
3
3
  """
4
4
 
5
- __version__ = '0.1.1'
5
+ __version__ = '0.1.2'
6
6
 
7
7
 
8
8
  # 1. High level API (The everything tool)
@@ -10,6 +10,8 @@ import warnings
10
10
  class BatchProcessor:
11
11
  """
12
12
  Handles batch processing of images through feature extractors.
13
+
14
+ Manages batching, device placement and memory cleanup.
13
15
  """
14
16
 
15
17
  def __init__(
@@ -20,6 +22,11 @@ class BatchProcessor:
20
22
  ):
21
23
  """
22
24
  Initialize batch processor.
25
+
26
+ Args:
27
+ batch_size: Number of images to process at once.
28
+ device: Device to use ('cpu', 'cuda', 'cuda:0', etc.)
29
+ clear_cache: Whether to clear GPU cache after each batch
23
30
  """
24
31
 
25
32
  self.batch_size = batch_size
@@ -50,8 +57,15 @@ class BatchProcessor:
50
57
  ):
51
58
  """
52
59
  Process a batch of extractors through the feature extractor.
53
- TODO: use the correct batching method in the feature_extractors module.
60
+ Args:
61
+ images: List of PIL Images.
62
+ extractor: Feature extractor with extract_features method
63
+ return_numpy: Whether to return numpy array (vs torch tensor)
64
+
65
+ Returns:
66
+ Array of feature vectors, shape (batch_size, feature_dim)
54
67
  """
68
+ # TODO: use the correct batching method in the feature_extractors module.
55
69
 
56
70
  if not images:
57
71
  return np.array([])
@@ -108,6 +122,14 @@ class BatchProcessor:
108
122
  ):
109
123
  """
110
124
  Estimate memory usage for a batch.
125
+
126
+ Args:
127
+ n_images: Number of images in a batch
128
+ feature_dim: Dimensions of feature vector
129
+ dtype: Data type of features
130
+
131
+ Returns:
132
+ Estimated memory in GB
111
133
  """
112
134
 
113
135
  bytes_per_element = np.dtype(dtype).itemsize
@@ -70,6 +70,12 @@ class HDF5Cache(FeatureCache):
70
70
  ):
71
71
  """
72
72
  Save features to HDF5 file.
73
+
74
+ Args:
75
+ features: Feature array, shape (n_samples, feature_dim)
76
+ filenames: List of filenames corresponding to features
77
+ metadata: Feature metadata
78
+ path: Path to save HDF5 file
73
79
  """
74
80
 
75
81
  # Make sure path has .h5 extension
@@ -115,6 +121,13 @@ class HDF5Cache(FeatureCache):
115
121
  ):
116
122
  """
117
123
  Load features from HDF5 file.
124
+
125
+ Args:
126
+ path: Path to HDF5 File
127
+ lazy: If True, return memory-mapped array instead of loading to RAM
128
+
129
+ Returns:
130
+ Tuple of (features, filenames, metadata)
118
131
  """
119
132
 
120
133
  if not path.endswith(".h5"):
@@ -161,6 +174,14 @@ class HDF5Cache(FeatureCache):
161
174
  ):
162
175
  """
163
176
  Load a subset of features.
177
+
178
+ Args:
179
+ path: Path to HDF5 file
180
+ indices: Indices to load (if provided)
181
+ filenames: Filenames to load (if provided)
182
+
183
+ Returns:
184
+ Tuple of (features, filenames)
164
185
  """
165
186
 
166
187
  if not path.endswith(".h5"):
@@ -193,6 +214,11 @@ class HDF5Cache(FeatureCache):
193
214
  ):
194
215
  """
195
216
  Append new features to existing cache.
217
+
218
+ Args:
219
+ path: Path to the HDF5 file
220
+ new_features: New features to append
221
+ new_filenames: Corresponding filenames
196
222
  """
197
223
 
198
224
  if not path.endswith(".h5"):
@@ -223,6 +249,12 @@ class HDF5Cache(FeatureCache):
223
249
  def get_feature_dict(self, path):
224
250
  """
225
251
  Load features as dictionary (for backward compatibility)
252
+
253
+ Args:
254
+ path: Path to HDF5 file
255
+
256
+ Returns:
257
+ Dictionary mapping filenames to feature vectors
226
258
  """
227
259
  features, filenames, _ = self.load(path)
228
260
  return {fn: feat for fn, feat in zip(filenames, features)}
@@ -230,6 +262,12 @@ class HDF5Cache(FeatureCache):
230
262
  def get_info(self, path):
231
263
  """
232
264
  Get information about the cache without loading data.
265
+
266
+ Args:
267
+ path: Path to HDF5 file
268
+
269
+ Returns:
270
+ Dictionary with cache information
233
271
  """
234
272
 
235
273
  if not path.endswith(".h5"):
@@ -12,6 +12,8 @@ import warnings
12
12
  class ImageLoader:
13
13
  """
14
14
  Image loader with validation and error handling.
15
+
16
+ Handles corrupted images, format conversions, and EXIF orientations.
15
17
  """
16
18
  VALID_EXTENSIONS = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.tif', '.webp'}
17
19
 
@@ -23,6 +25,11 @@ class ImageLoader:
23
25
  ):
24
26
  """
25
27
  Initialize image loader.
28
+
29
+ Args:
30
+ max_size: Optional maximum size (width, height) for images
31
+ convert_mode: PIL odel to covert images to ('RGB', 'L', etc.)
32
+ handle_exif: Whether to handle EXIF orientation
26
33
  """
27
34
 
28
35
  self.max_size = max_size
@@ -33,6 +40,12 @@ class ImageLoader:
33
40
  def validate_path(self, path):
34
41
  """
35
42
  Check if path is valid
43
+
44
+ Args:
45
+ path: Path to image file
46
+
47
+ Returns:
48
+ True if valid, False otherwise
36
49
  """
37
50
  if not os.path.exists(path):
38
51
  return False
@@ -43,6 +56,12 @@ class ImageLoader:
43
56
  def load_image(self, path):
44
57
  """
45
58
  Load a single image.
59
+
60
+ Args:
61
+ path: Path to image file
62
+
63
+ Returns:
64
+ PIL Image or None if loadig failed
46
65
  """
47
66
 
48
67
  try:
@@ -78,6 +97,12 @@ class ImageLoader:
78
97
  def load_batch(self, paths):
79
98
  """
80
99
  Load a batch of images.
100
+
101
+ Args:
102
+ paths: List of image paths
103
+
104
+ Returns:
105
+ Tuple of (loaded_images, successful_paths, failed_paths)
81
106
  """
82
107
 
83
108
  images = []
@@ -97,6 +122,12 @@ class ImageLoader:
97
122
  def _handle_orientation(self, image):
98
123
  """
99
124
  Handle EXIF orientation tag.
125
+
126
+ Args:
127
+ image: PIL Image
128
+
129
+ Returns:
130
+ Oriented image
100
131
  """
101
132
 
102
133
  try:
@@ -129,6 +160,12 @@ class ImageLoader:
129
160
  def _resize_if_needed(self, image):
130
161
  """
131
162
  Resize image if it exceeds max size.
163
+
164
+ Args:
165
+ image: PIL Image
166
+
167
+ Returns:
168
+ Resized image
132
169
  """
133
170
  if self.max_size is None:
134
171
  return image
@@ -149,6 +186,14 @@ class ImageLoader:
149
186
  ):
150
187
  """
151
188
  Find all images in a directory.
189
+
190
+ Args:
191
+ directory: Directory to search
192
+ pattern: Glob pattern for filenames
193
+ recursive: Whether to search recursively
194
+
195
+ Returns:
196
+ List of image paths
152
197
  """
153
198
  path = Path(directory)
154
199
 
@@ -180,6 +225,13 @@ class ImageLoader:
180
225
 
181
226
  """
182
227
  Create batches from a list of items.
228
+
229
+ Args:
230
+ items: List of items to batch
231
+ batch_size: Size of each batch
232
+
233
+ Yeilds:
234
+ Batches of items
183
235
  """
184
236
 
185
237
  for i in range(0, len(items), batch_size):
@@ -12,6 +12,9 @@ import json
12
12
  class FeatureMetadata:
13
13
  """
14
14
  Metadata for extracted features.
15
+
16
+ Tracks information about the feature extractionn process including
17
+ model details, extraction parameters, and statistics.
15
18
  """
16
19
 
17
20
  # Model information
@@ -17,6 +17,18 @@ from .metadata import FeatureMetadata
17
17
  class FeaturePipeline:
18
18
  """
19
19
  Main pipeline for feature extraction.
20
+
21
+ Handles batch processing, caching, progress tracking, and error recovery.
22
+
23
+ Example:
24
+ >>> from features import FeaturePipeline
25
+ >>> from feature_extractors import create_feature_extractor
26
+ >>>
27
+ >>> extractor = create_feature_extractor('dinov2', device='cuda')
28
+ >>> pipeline = FeaturePipeline(extractor, batch_size=32)
29
+ >>>
30
+ >>> result = pipeline.extract_from_directory('./images')
31
+ >>> pipeline.save('./features/features.h5')
20
32
  """
21
33
 
22
34
  def __init__(
@@ -30,6 +42,14 @@ class FeaturePipeline:
30
42
  ):
31
43
  """
32
44
  Initialize feature extraction pipeline.
45
+
46
+ Args:
47
+ extractor: Feature extractor (from feature_extractors module)
48
+ batch_size: Number of images to process at once
49
+ device: Device for processing ('cpu', 'cuda')
50
+ cache_backend: Cache backend to use ('hdf5')
51
+ max_image_size: Optional max size for images (width, height)
52
+ verbose: Whether to show progress bars
33
53
  """
34
54
 
35
55
  self.extractor = extractor
@@ -79,6 +99,16 @@ class FeaturePipeline:
79
99
 
80
100
  """
81
101
  Extract features from all images in a directory.
102
+
103
+ Args:
104
+ directory: Directory containing images
105
+ pattern: Glob pattern for filenames
106
+ recursive: Whether to search recursively
107
+ save_every: Save checkpoint every N images (optional)
108
+ save_path: Path for checkpoint saves (required if save_every is set)
109
+
110
+ Returns:
111
+ Self for method chaining
82
112
  """
83
113
 
84
114
  # Find all images
@@ -112,6 +142,14 @@ class FeaturePipeline:
112
142
 
113
143
  """
114
144
  Extract features from a list of filepaths.
145
+
146
+ Args:
147
+ file_paths: List of image file paths
148
+ save_every: Save checkpoint every N images (optional)
149
+ save_path: Path for checkpoint saves (required if save_every is set)
150
+
151
+ Returns:
152
+ Self for method chaining
115
153
  """
116
154
 
117
155
  if save_every is not None and save_path is None:
@@ -223,6 +261,10 @@ class FeaturePipeline:
223
261
  def save(self, path, format='hdf5'):
224
262
  """
225
263
  Save extracted features to disk.
264
+
265
+ Args:
266
+ path: Path to save features
267
+ format: Format to use ('hdf5')
226
268
  """
227
269
 
228
270
  if self.features is None or self.metadata is None:
@@ -244,6 +286,11 @@ class FeaturePipeline:
244
286
  def load(self, path):
245
287
  """
246
288
  Load features from disk.
289
+
290
+ Args:
291
+ path: Path to feature cache
292
+
293
+ Returns: Self for method chaining
247
294
  """
248
295
 
249
296
  self.features, self.filenames, self.metadata = self.cache.load(path)
@@ -271,6 +318,9 @@ class FeaturePipeline:
271
318
  def get_feature_dict(self):
272
319
  """
273
320
  Get features as dictionary
321
+
322
+ Returns:
323
+ Dictionary mapping filenames to feature vectors
274
324
  """
275
325
 
276
326
  if self.features is None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: imageatlas
3
- Version: 0.1.1
3
+ Version: 0.1.2
4
4
  Summary: ImageAtlas: A toolkit for organizing, cleaning and analysing your image datasets.
5
5
  Author-email: Ahmad Javed <ahmadjaved97@gmail.com>
6
6
  Maintainer-email: Ahmad Javed <ahmadjaved97@gmail.com>
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes