objscale 0.2.3__tar.gz → 1.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: objscale
3
- Version: 0.2.3
3
+ Version: 1.1.0
4
4
  Summary: Object-based analysis functions for fractal dimensions and size distributions
5
5
  Author-email: Thomas DeWitt <thomas.dewitt@utah.edu>
6
6
  License: MIT
@@ -39,6 +39,9 @@ Requires-Dist: sphinx-rtd-theme; extra == "docs"
39
39
  Requires-Dist: numpydoc; extra == "docs"
40
40
  Dynamic: license-file
41
41
 
42
+ ![Correlation dimension example from DeWitt et al. (2025)](docs/images/correlation_dimension_example-1.png)
43
+ *Figure from DeWitt et al. (2025)*
44
+
42
45
  # objscale
43
46
 
44
47
  Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
@@ -52,6 +55,8 @@ The package implements methods from two main papers:
52
55
  - [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
53
56
  - [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
54
57
 
58
+ See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
59
+
55
60
  ## Key Functions
56
61
 
57
62
  ### `finite_array_powerlaw_exponent`
@@ -98,8 +103,6 @@ mkdir -p ~/.codex/skills/objscale
98
103
  cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
99
104
  ```
100
105
 
101
-
102
-
103
106
  ## Quick Example
104
107
 
105
108
  ```python
@@ -157,7 +160,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
157
160
  - `get_structure_props` - Calculate perimeter, area, width, height of structures
158
161
  - `total_perimeter` - Total perimeter of all objects
159
162
  - `total_number` - Count number of structures
160
- - `isolate_largest_structure` - Extract the largest connected structure
163
+ - `isolate_nth_largest_structure` - Extract the Nth largest connected structure
161
164
  - `remove_structures_touching_border_nan` - Remove border-touching structures
162
165
  - `remove_structure_holes` - Fill holes in structures
163
166
  - `clear_border_adjacent` - Clear structures touching array edges
@@ -1,3 +1,6 @@
1
+ ![Correlation dimension example from DeWitt et al. (2025)](docs/images/correlation_dimension_example-1.png)
2
+ *Figure from DeWitt et al. (2025)*
3
+
1
4
  # objscale
2
5
 
3
6
  Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
@@ -11,6 +14,8 @@ The package implements methods from two main papers:
11
14
  - [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
12
15
  - [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
13
16
 
17
+ See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
18
+
14
19
  ## Key Functions
15
20
 
16
21
  ### `finite_array_powerlaw_exponent`
@@ -57,8 +62,6 @@ mkdir -p ~/.codex/skills/objscale
57
62
  cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
58
63
  ```
59
64
 
60
-
61
-
62
65
  ## Quick Example
63
66
 
64
67
  ```python
@@ -116,7 +119,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
116
119
  - `get_structure_props` - Calculate perimeter, area, width, height of structures
117
120
  - `total_perimeter` - Total perimeter of all objects
118
121
  - `total_number` - Count number of structures
119
- - `isolate_largest_structure` - Extract the largest connected structure
122
+ - `isolate_nth_largest_structure` - Extract the Nth largest connected structure
120
123
  - `remove_structures_touching_border_nan` - Remove border-touching structures
121
124
  - `remove_structure_holes` - Fill holes in structures
122
125
  - `clear_border_adjacent` - Clear structures touching array edges
@@ -7,11 +7,12 @@ from ._size_distributions import (
7
7
 
8
8
  from ._fractal_dimensions import (
9
9
  individual_fractal_dimension,
10
+ individual_correlation_dimension,
10
11
  ensemble_correlation_dimension,
11
12
  ensemble_box_dimension,
12
13
  total_perimeter,
13
14
  total_number,
14
- isolate_largest_structure,
15
+ isolate_nth_largest_structure,
15
16
  coarsen_array,
16
17
  get_coords_of_boundaries,
17
18
  get_locations_from_pixel_sizes,
@@ -38,11 +39,12 @@ __all__ = [
38
39
  'finite_array_powerlaw_exponent',
39
40
  'array_size_distribution',
40
41
  'individual_fractal_dimension',
42
+ 'individual_correlation_dimension',
41
43
  'ensemble_correlation_dimension',
42
44
  'ensemble_box_dimension',
43
45
  'total_perimeter',
44
46
  'total_number',
45
- 'isolate_largest_structure',
47
+ 'isolate_nth_largest_structure',
46
48
  'coarsen_array',
47
49
  'get_coords_of_boundaries',
48
50
  'get_locations_from_pixel_sizes',
@@ -58,7 +60,7 @@ __all__ = [
58
60
  'set_num_threads',
59
61
  ]
60
62
 
61
- __version__ = "0.2.3"
63
+ __version__ = "1.1.0"
62
64
  __author__ = "Thomas DeWitt"
63
65
  __email__ = "thomas.dewitt@utah.edu"
64
66
  __description__ = "Object-based analysis functions for fractal dimensions and size distributions"
@@ -24,7 +24,8 @@ __all__ = [
24
24
  'coarsen_array',
25
25
  'total_perimeter',
26
26
  'total_number',
27
- 'isolate_largest_structure',
27
+ 'individual_correlation_dimension',
28
+ 'isolate_nth_largest_structure',
28
29
  'label_size',
29
30
  ]
30
31
 
@@ -221,6 +222,137 @@ def ensemble_correlation_dimension(
221
222
  return dimension, error
222
223
 
223
224
 
225
+ def individual_correlation_dimension(
226
+ array: NDArray,
227
+ n: int = 1,
228
+ x_sizes: NDArray | None = None,
229
+ y_sizes: NDArray | None = None,
230
+ minlength: str | float = 'auto',
231
+ maxlength: str | float = 'auto',
232
+ return_C_l: bool = False,
233
+ point_reduction_factor: float = 1,
234
+ nbins: int = 50,
235
+ ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
236
+ """
237
+ Calculate the correlation dimension of the Nth largest structure in an array.
238
+
239
+ Removes border-touching structures, isolates the Nth largest remaining
240
+ structure, crops to its bounding box, and computes the correlation dimension
241
+ via :func:`ensemble_correlation_dimension`.
242
+
243
+ Parameters
244
+ ----------
245
+ array : np.ndarray
246
+ 2-D binary array. May optionally have np.nan at borders; if not, a NaN
247
+ border is added internally.
248
+ n : int, default=1
249
+ Which structure to analyze, ranked by pixel count (1 = largest).
250
+ x_sizes : np.ndarray, optional
251
+ Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
252
+ y_sizes : np.ndarray, optional
253
+ Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
254
+ minlength : str or float, default='auto'
255
+ Minimum length scale for correlation calculation. If 'auto', uses 3 times
256
+ the minimum pixel size.
257
+ maxlength : str or float, default='auto'
258
+ Maximum length scale for correlation calculation. If 'auto', uses
259
+ ``0.33 * max(bbox_height, bbox_width)`` of the isolated structure in
260
+ physical units.
261
+ return_C_l : bool, default=False
262
+ If True, return dimension, error, bins, C_l. Otherwise, return dimension,
263
+ error.
264
+ point_reduction_factor : float, default=1
265
+ Draw N/point_reduction_factor circles, where N is the total number of
266
+ available circles. Must be >= 1.
267
+ nbins : int, default=50
268
+ Number of logarithmically spaced bins for the correlation integral.
269
+
270
+ Returns
271
+ -------
272
+ dimension : float
273
+ The correlation dimension.
274
+ error : float
275
+ Error estimate for the dimension (95% confidence interval).
276
+ bins : np.ndarray, optional
277
+ The bins used for calculation. Only returned if return_C_l=True.
278
+ C_l : np.ndarray, optional
279
+ The correlation integral values. Only returned if return_C_l=True.
280
+
281
+ Raises
282
+ ------
283
+ ValueError
284
+ If array is not 2-D, n < 1, or n exceeds the number of available
285
+ structures after border removal.
286
+ """
287
+ if array.ndim != 2:
288
+ raise ValueError('array must be 2-dimensional')
289
+ if n < 1:
290
+ raise ValueError('n must be >= 1')
291
+
292
+ # Pad with NaN border if not already present
293
+ if not np.any(np.isnan(array)):
294
+ array = np.pad(array.astype(float), pad_width=1, mode='constant',
295
+ constant_values=np.nan)
296
+ if x_sizes is not None:
297
+ x_sizes = np.pad(x_sizes, pad_width=1, mode='edge')
298
+ if y_sizes is not None:
299
+ y_sizes = np.pad(y_sizes, pad_width=1, mode='edge')
300
+
301
+ # Remove border-touching structures and clean NaN
302
+ cleaned = remove_structures_touching_border_nan(array)
303
+ binary = np.nan_to_num(cleaned, nan=0.0).astype(bool)
304
+
305
+ # Isolate the Nth largest structure
306
+ isolated = isolate_nth_largest_structure(binary, n=n)
307
+
308
+ # Crop to bounding box
309
+ rows = np.any(isolated, axis=1)
310
+ cols = np.any(isolated, axis=0)
311
+ rmin, rmax = np.where(rows)[0][[0, -1]]
312
+ cmin, cmax = np.where(cols)[0][[0, -1]]
313
+ cropped = isolated[rmin:rmax + 1, cmin:cmax + 1].astype(np.float64)
314
+
315
+ # Pad with 1px of zeros to prevent toroidal wrap artifacts
316
+ padded = np.pad(cropped, pad_width=1, mode='constant', constant_values=0)
317
+
318
+ # Handle pixel sizes
319
+ if x_sizes is not None:
320
+ cropped_x = x_sizes[rmin:rmax + 1, cmin:cmax + 1]
321
+ cropped_x = np.pad(cropped_x, pad_width=1, mode='edge')
322
+ else:
323
+ cropped_x = None
324
+
325
+ if y_sizes is not None:
326
+ cropped_y = y_sizes[rmin:rmax + 1, cmin:cmax + 1]
327
+ cropped_y = np.pad(cropped_y, pad_width=1, mode='edge')
328
+ else:
329
+ cropped_y = None
330
+
331
+ # Compute maxlength from crop dimensions if auto
332
+ if maxlength == 'auto':
333
+ if cropped_x is not None and cropped_y is not None:
334
+ locs_x, locs_y = get_locations_from_pixel_sizes(cropped_x, cropped_y)
335
+ h, w = cropped_x.shape
336
+ phys_width = locs_x[h // 2, w - 1] - locs_x[h // 2, 0]
337
+ phys_height = locs_y[h - 1, w // 2] - locs_y[0, w // 2]
338
+ maxlength = 0.33 * max(phys_width, phys_height)
339
+ else:
340
+ crop_h, crop_w = cropped.shape
341
+ maxlength = 0.33 * float(max(crop_h, crop_w))
342
+
343
+ return ensemble_correlation_dimension(
344
+ [padded],
345
+ x_sizes=cropped_x,
346
+ y_sizes=cropped_y,
347
+ minlength=minlength,
348
+ maxlength=maxlength,
349
+ interior_circles_only=False,
350
+ return_C_l=return_C_l,
351
+ point_reduction_factor=point_reduction_factor,
352
+ nbins=nbins,
353
+ )
354
+
355
+
224
356
  def ensemble_box_dimension(
225
357
  binary_arrays: NDArray | list[NDArray],
226
358
  set: str = 'edge',
@@ -448,6 +580,7 @@ def individual_fractal_dimension(
448
580
  y_sizes: NDArray | list[NDArray] | None = None,
449
581
  min_a: float = 10,
450
582
  max_a: float = np.inf,
583
+ bins: int | None = 30,
451
584
  return_values: bool = False
452
585
  ) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
453
586
  """
@@ -473,6 +606,10 @@ def individual_fractal_dimension(
473
606
  Minimum structure area to include in calculation.
474
607
  max_a : float, default=np.inf
475
608
  Maximum structure area to include in calculation.
609
+ bins : int or None, default=30
610
+ Number of bins along log10(sqrt(area)) for averaging. The regression
611
+ is performed on the bin-averaged values. If None, fit on all individual
612
+ points without binning.
476
613
  return_values : bool, default=False
477
614
  If True, return additional data used in the calculation.
478
615
 
@@ -483,9 +620,11 @@ def individual_fractal_dimension(
483
620
  uncertainty : float
484
621
  Uncertainty estimate (95% confidence).
485
622
  log10_sqrt_a : np.ndarray, optional
486
- Log10 of sqrt(area) values. Only returned if return_values=True.
623
+ Log10 of sqrt(area) values (bin centers if bins is not None).
624
+ Only returned if return_values=True.
487
625
  log10_p : np.ndarray, optional
488
- Log10 of perimeter values. Only returned if return_values=True.
626
+ Log10 of perimeter values (bin means if bins is not None).
627
+ Only returned if return_values=True.
489
628
 
490
629
  Raises
491
630
  ------
@@ -524,10 +663,25 @@ def individual_fractal_dimension(
524
663
  areas, perimeters = np.array(areas), np.array(perimeters)
525
664
  areas, perimeters = areas[(areas > min_a) & (areas < max_a)], perimeters[(areas > min_a) & (areas < max_a)]
526
665
 
527
- (slope, _), (err, _) = linear_regression(np.log10(np.sqrt(areas)), np.log10(perimeters))
666
+ log_sqrt_a = np.log10(np.sqrt(areas))
667
+ log_p = np.log10(perimeters)
668
+
669
+ if bins is not None:
670
+ bin_edges = np.linspace(log_sqrt_a.min(), log_sqrt_a.max(), bins + 1)
671
+ bin_indices = np.digitize(log_sqrt_a, bin_edges) - 1
672
+ bin_indices = np.clip(bin_indices, 0, bins - 1)
673
+ bin_centers = 0.5 * (bin_edges[:-1] + bin_edges[1:])
674
+ bin_means = np.array([log_p[bin_indices == i].mean()
675
+ if np.any(bin_indices == i) else np.nan
676
+ for i in range(bins)])
677
+ valid = np.isfinite(bin_means)
678
+ log_sqrt_a = bin_centers[valid]
679
+ log_p = bin_means[valid]
680
+
681
+ (slope, _), (err, _) = linear_regression(log_sqrt_a, log_p)
528
682
 
529
683
  if return_values:
530
- return slope, err, np.log10(np.sqrt(areas)), np.log10(perimeters)
684
+ return slope, err, log_sqrt_a, log_p
531
685
  else:
532
686
  return slope, err
533
687
 
@@ -815,37 +969,54 @@ def total_number(
815
969
  return n_structures
816
970
 
817
971
 
818
- def isolate_largest_structure(
972
+ def isolate_nth_largest_structure(
819
973
  binary_array: NDArray,
974
+ n: int = 1,
820
975
  structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
821
976
  ) -> NDArray[np.bool_]:
822
977
  """
823
- Isolate the largest connected structure in a binary array.
978
+ Isolate the Nth largest connected structure in a binary array.
824
979
 
825
980
  Parameters
826
981
  ----------
827
982
  binary_array : np.ndarray
828
983
  Binary input array.
984
+ n : int, default=1
985
+ Which structure to return, ranked by pixel count (1 = largest).
829
986
  structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
830
987
  Connectivity structure.
831
988
 
832
989
  Returns
833
990
  -------
834
991
  np.ndarray
835
- Boolean array with only the largest structure set to True.
992
+ Boolean array with only the Nth largest structure set to True.
836
993
 
837
994
  Raises
838
995
  ------
839
996
  ValueError
840
- If ``binary_array`` contains no structures (no non-zero pixels).
997
+ If ``binary_array`` contains no structures or ``n`` exceeds the number
998
+ of structures.
841
999
  """
1000
+ if n < 1:
1001
+ raise ValueError('n must be >= 1')
842
1002
  labelled_array = label(binary_array, structure)[0]
843
1003
  cloud_values = labelled_array[labelled_array != 0] # remove background
844
1004
  if cloud_values.size == 0:
845
1005
  raise ValueError('binary_array contains no structures')
846
1006
  values, counts = np.unique(cloud_values, return_counts=True)
847
- most_common = values[np.argmax(counts)]
848
- return labelled_array == most_common
1007
+ sorted_indices = np.argsort(counts)[::-1]
1008
+ if n > len(values):
1009
+ raise ValueError(f'Requested n={n} but only {len(values)} structures exist')
1010
+ selected = values[sorted_indices[n - 1]]
1011
+ return labelled_array == selected
1012
+
1013
+
1014
+ def isolate_largest_structure(*args, **kwargs):
1015
+ """Removed. Use :func:`isolate_nth_largest_structure` instead."""
1016
+ raise NotImplementedError(
1017
+ 'isolate_largest_structure has been renamed to '
1018
+ 'isolate_nth_largest_structure(binary_array, n=1)'
1019
+ )
849
1020
 
850
1021
 
851
1022
  def label_size(
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: objscale
3
- Version: 0.2.3
3
+ Version: 1.1.0
4
4
  Summary: Object-based analysis functions for fractal dimensions and size distributions
5
5
  Author-email: Thomas DeWitt <thomas.dewitt@utah.edu>
6
6
  License: MIT
@@ -39,6 +39,9 @@ Requires-Dist: sphinx-rtd-theme; extra == "docs"
39
39
  Requires-Dist: numpydoc; extra == "docs"
40
40
  Dynamic: license-file
41
41
 
42
+ ![Correlation dimension example from DeWitt et al. (2025)](docs/images/correlation_dimension_example-1.png)
43
+ *Figure from DeWitt et al. (2025)*
44
+
42
45
  # objscale
43
46
 
44
47
  Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
@@ -52,6 +55,8 @@ The package implements methods from two main papers:
52
55
  - [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
53
56
  - [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
54
57
 
58
+ See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
59
+
55
60
  ## Key Functions
56
61
 
57
62
  ### `finite_array_powerlaw_exponent`
@@ -98,8 +103,6 @@ mkdir -p ~/.codex/skills/objscale
98
103
  cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
99
104
  ```
100
105
 
101
-
102
-
103
106
  ## Quick Example
104
107
 
105
108
  ```python
@@ -157,7 +160,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
157
160
  - `get_structure_props` - Calculate perimeter, area, width, height of structures
158
161
  - `total_perimeter` - Total perimeter of all objects
159
162
  - `total_number` - Count number of structures
160
- - `isolate_largest_structure` - Extract the largest connected structure
163
+ - `isolate_nth_largest_structure` - Extract the Nth largest connected structure
161
164
  - `remove_structures_touching_border_nan` - Remove border-touching structures
162
165
  - `remove_structure_holes` - Fill holes in structures
163
166
  - `clear_border_adjacent` - Clear structures touching array edges
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "objscale"
7
- version = "0.2.3"
7
+ version = "1.1.0"
8
8
  description = "Object-based analysis functions for fractal dimensions and size distributions"
9
9
  readme = "README.md"
10
10
  authors = [
File without changes
File without changes
File without changes