objscale 0.2.2__tar.gz → 1.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {objscale-0.2.2 → objscale-1.0.0}/PKG-INFO +7 -4
- {objscale-0.2.2 → objscale-1.0.0}/README.md +6 -3
- {objscale-0.2.2 → objscale-1.0.0}/objscale/__init__.py +5 -5
- {objscale-0.2.2 → objscale-1.0.0}/objscale/_fractal_dimensions.py +230 -39
- {objscale-0.2.2 → objscale-1.0.0}/objscale.egg-info/PKG-INFO +7 -4
- {objscale-0.2.2 → objscale-1.0.0}/pyproject.toml +1 -1
- {objscale-0.2.2 → objscale-1.0.0}/LICENSE +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale/_object_analysis.py +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale/_size_distributions.py +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale/_utils.py +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale.egg-info/SOURCES.txt +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale.egg-info/dependency_links.txt +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale.egg-info/requires.txt +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/objscale.egg-info/top_level.txt +0 -0
- {objscale-0.2.2 → objscale-1.0.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: objscale
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Object-based analysis functions for fractal dimensions and size distributions
|
|
5
5
|
Author-email: Thomas DeWitt <thomas.dewitt@utah.edu>
|
|
6
6
|
License: MIT
|
|
@@ -39,6 +39,9 @@ Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
|
39
39
|
Requires-Dist: numpydoc; extra == "docs"
|
|
40
40
|
Dynamic: license-file
|
|
41
41
|
|
|
42
|
+

|
|
43
|
+
*Figure from DeWitt et al. (2025)*
|
|
44
|
+
|
|
42
45
|
# objscale
|
|
43
46
|
|
|
44
47
|
Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
|
|
@@ -52,6 +55,8 @@ The package implements methods from two main papers:
|
|
|
52
55
|
- [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
|
|
53
56
|
- [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
|
|
54
57
|
|
|
58
|
+
See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
|
|
59
|
+
|
|
55
60
|
## Key Functions
|
|
56
61
|
|
|
57
62
|
### `finite_array_powerlaw_exponent`
|
|
@@ -98,8 +103,6 @@ mkdir -p ~/.codex/skills/objscale
|
|
|
98
103
|
cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
|
|
99
104
|
```
|
|
100
105
|
|
|
101
|
-
|
|
102
|
-
|
|
103
106
|
## Quick Example
|
|
104
107
|
|
|
105
108
|
```python
|
|
@@ -157,7 +160,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
|
|
|
157
160
|
- `get_structure_props` - Calculate perimeter, area, width, height of structures
|
|
158
161
|
- `total_perimeter` - Total perimeter of all objects
|
|
159
162
|
- `total_number` - Count number of structures
|
|
160
|
-
- `
|
|
163
|
+
- `isolate_nth_largest_structure` - Extract the Nth largest connected structure
|
|
161
164
|
- `remove_structures_touching_border_nan` - Remove border-touching structures
|
|
162
165
|
- `remove_structure_holes` - Fill holes in structures
|
|
163
166
|
- `clear_border_adjacent` - Clear structures touching array edges
|
|
@@ -1,3 +1,6 @@
|
|
|
1
|
+

|
|
2
|
+
*Figure from DeWitt et al. (2025)*
|
|
3
|
+
|
|
1
4
|
# objscale
|
|
2
5
|
|
|
3
6
|
Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
|
|
@@ -11,6 +14,8 @@ The package implements methods from two main papers:
|
|
|
11
14
|
- [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
|
|
12
15
|
- [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
|
|
13
16
|
|
|
17
|
+
See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
|
|
18
|
+
|
|
14
19
|
## Key Functions
|
|
15
20
|
|
|
16
21
|
### `finite_array_powerlaw_exponent`
|
|
@@ -57,8 +62,6 @@ mkdir -p ~/.codex/skills/objscale
|
|
|
57
62
|
cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
|
|
58
63
|
```
|
|
59
64
|
|
|
60
|
-
|
|
61
|
-
|
|
62
65
|
## Quick Example
|
|
63
66
|
|
|
64
67
|
```python
|
|
@@ -116,7 +119,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
|
|
|
116
119
|
- `get_structure_props` - Calculate perimeter, area, width, height of structures
|
|
117
120
|
- `total_perimeter` - Total perimeter of all objects
|
|
118
121
|
- `total_number` - Count number of structures
|
|
119
|
-
- `
|
|
122
|
+
- `isolate_nth_largest_structure` - Extract the Nth largest connected structure
|
|
120
123
|
- `remove_structures_touching_border_nan` - Remove border-touching structures
|
|
121
124
|
- `remove_structure_holes` - Fill holes in structures
|
|
122
125
|
- `clear_border_adjacent` - Clear structures touching array edges
|
|
@@ -7,15 +7,15 @@ from ._size_distributions import (
|
|
|
7
7
|
|
|
8
8
|
from ._fractal_dimensions import (
|
|
9
9
|
individual_fractal_dimension,
|
|
10
|
+
individual_correlation_dimension,
|
|
10
11
|
ensemble_correlation_dimension,
|
|
11
12
|
ensemble_box_dimension,
|
|
12
13
|
total_perimeter,
|
|
13
14
|
total_number,
|
|
14
|
-
|
|
15
|
+
isolate_nth_largest_structure,
|
|
15
16
|
coarsen_array,
|
|
16
17
|
get_coords_of_boundaries,
|
|
17
18
|
get_locations_from_pixel_sizes,
|
|
18
|
-
correlation_integral,
|
|
19
19
|
label_size,
|
|
20
20
|
)
|
|
21
21
|
|
|
@@ -39,15 +39,15 @@ __all__ = [
|
|
|
39
39
|
'finite_array_powerlaw_exponent',
|
|
40
40
|
'array_size_distribution',
|
|
41
41
|
'individual_fractal_dimension',
|
|
42
|
+
'individual_correlation_dimension',
|
|
42
43
|
'ensemble_correlation_dimension',
|
|
43
44
|
'ensemble_box_dimension',
|
|
44
45
|
'total_perimeter',
|
|
45
46
|
'total_number',
|
|
46
|
-
'
|
|
47
|
+
'isolate_nth_largest_structure',
|
|
47
48
|
'coarsen_array',
|
|
48
49
|
'get_coords_of_boundaries',
|
|
49
50
|
'get_locations_from_pixel_sizes',
|
|
50
|
-
'correlation_integral',
|
|
51
51
|
'label_size',
|
|
52
52
|
'get_structure_props',
|
|
53
53
|
'get_every_boundary_perimeter',
|
|
@@ -60,7 +60,7 @@ __all__ = [
|
|
|
60
60
|
'set_num_threads',
|
|
61
61
|
]
|
|
62
62
|
|
|
63
|
-
__version__ = "0.
|
|
63
|
+
__version__ = "1.0.0"
|
|
64
64
|
__author__ = "Thomas DeWitt"
|
|
65
65
|
__email__ = "thomas.dewitt@utah.edu"
|
|
66
66
|
__description__ = "Object-based analysis functions for fractal dimensions and size distributions"
|
|
@@ -24,7 +24,8 @@ __all__ = [
|
|
|
24
24
|
'coarsen_array',
|
|
25
25
|
'total_perimeter',
|
|
26
26
|
'total_number',
|
|
27
|
-
'
|
|
27
|
+
'individual_correlation_dimension',
|
|
28
|
+
'isolate_nth_largest_structure',
|
|
28
29
|
'label_size',
|
|
29
30
|
]
|
|
30
31
|
|
|
@@ -183,7 +184,24 @@ def ensemble_correlation_dimension(
|
|
|
183
184
|
if len(circle_centers) == 0:
|
|
184
185
|
continue
|
|
185
186
|
|
|
186
|
-
|
|
187
|
+
# Convert index coordinates to physical coordinates for bounding box
|
|
188
|
+
boundary_phys_x = locations_x[all_boundary_coordinates[:, 0], all_boundary_coordinates[:, 1]]
|
|
189
|
+
boundary_phys_y = locations_y[all_boundary_coordinates[:, 0], all_boundary_coordinates[:, 1]]
|
|
190
|
+
boundary_phys = np.column_stack([boundary_phys_x, boundary_phys_y])
|
|
191
|
+
|
|
192
|
+
center_phys_x = locations_x[circle_centers[:, 0], circle_centers[:, 1]]
|
|
193
|
+
center_phys_y = locations_y[circle_centers[:, 0], circle_centers[:, 1]]
|
|
194
|
+
center_phys = np.column_stack([center_phys_x, center_phys_y])
|
|
195
|
+
|
|
196
|
+
# Sort boundary points by physical y for binary-search bounding box
|
|
197
|
+
sort_order = np.argsort(boundary_phys[:, 1])
|
|
198
|
+
sorted_boundary_phys = boundary_phys[sort_order]
|
|
199
|
+
|
|
200
|
+
max_bin = bins[-1]
|
|
201
|
+
bins_sq = bins ** 2
|
|
202
|
+
|
|
203
|
+
C_l += correlation_integral(center_phys, sorted_boundary_phys,
|
|
204
|
+
bins_sq, max_bin)
|
|
187
205
|
|
|
188
206
|
# Perform linear regression to estimate dimension
|
|
189
207
|
x, y = np.log10(bins), np.log10(C_l)
|
|
@@ -204,6 +222,137 @@ def ensemble_correlation_dimension(
|
|
|
204
222
|
return dimension, error
|
|
205
223
|
|
|
206
224
|
|
|
225
|
+
def individual_correlation_dimension(
|
|
226
|
+
array: NDArray,
|
|
227
|
+
n: int = 1,
|
|
228
|
+
x_sizes: NDArray | None = None,
|
|
229
|
+
y_sizes: NDArray | None = None,
|
|
230
|
+
minlength: str | float = 'auto',
|
|
231
|
+
maxlength: str | float = 'auto',
|
|
232
|
+
return_C_l: bool = False,
|
|
233
|
+
point_reduction_factor: float = 1,
|
|
234
|
+
nbins: int = 50,
|
|
235
|
+
) -> tuple[float, float] | tuple[float, float, NDArray, NDArray]:
|
|
236
|
+
"""
|
|
237
|
+
Calculate the correlation dimension of the Nth largest structure in an array.
|
|
238
|
+
|
|
239
|
+
Removes border-touching structures, isolates the Nth largest remaining
|
|
240
|
+
structure, crops to its bounding box, and computes the correlation dimension
|
|
241
|
+
via :func:`ensemble_correlation_dimension`.
|
|
242
|
+
|
|
243
|
+
Parameters
|
|
244
|
+
----------
|
|
245
|
+
array : np.ndarray
|
|
246
|
+
2-D binary array. May optionally have np.nan at borders; if not, a NaN
|
|
247
|
+
border is added internally.
|
|
248
|
+
n : int, default=1
|
|
249
|
+
Which structure to analyze, ranked by pixel count (1 = largest).
|
|
250
|
+
x_sizes : np.ndarray, optional
|
|
251
|
+
Pixel sizes in the x direction. If None, assume all pixel dimensions are 1.
|
|
252
|
+
y_sizes : np.ndarray, optional
|
|
253
|
+
Pixel sizes in the y direction. If None, assume all pixel dimensions are 1.
|
|
254
|
+
minlength : str or float, default='auto'
|
|
255
|
+
Minimum length scale for correlation calculation. If 'auto', uses 3 times
|
|
256
|
+
the minimum pixel size.
|
|
257
|
+
maxlength : str or float, default='auto'
|
|
258
|
+
Maximum length scale for correlation calculation. If 'auto', uses
|
|
259
|
+
``0.33 * max(bbox_height, bbox_width)`` of the isolated structure in
|
|
260
|
+
physical units.
|
|
261
|
+
return_C_l : bool, default=False
|
|
262
|
+
If True, return dimension, error, bins, C_l. Otherwise, return dimension,
|
|
263
|
+
error.
|
|
264
|
+
point_reduction_factor : float, default=1
|
|
265
|
+
Draw N/point_reduction_factor circles, where N is the total number of
|
|
266
|
+
available circles. Must be >= 1.
|
|
267
|
+
nbins : int, default=50
|
|
268
|
+
Number of logarithmically spaced bins for the correlation integral.
|
|
269
|
+
|
|
270
|
+
Returns
|
|
271
|
+
-------
|
|
272
|
+
dimension : float
|
|
273
|
+
The correlation dimension.
|
|
274
|
+
error : float
|
|
275
|
+
Error estimate for the dimension (95% confidence interval).
|
|
276
|
+
bins : np.ndarray, optional
|
|
277
|
+
The bins used for calculation. Only returned if return_C_l=True.
|
|
278
|
+
C_l : np.ndarray, optional
|
|
279
|
+
The correlation integral values. Only returned if return_C_l=True.
|
|
280
|
+
|
|
281
|
+
Raises
|
|
282
|
+
------
|
|
283
|
+
ValueError
|
|
284
|
+
If array is not 2-D, n < 1, or n exceeds the number of available
|
|
285
|
+
structures after border removal.
|
|
286
|
+
"""
|
|
287
|
+
if array.ndim != 2:
|
|
288
|
+
raise ValueError('array must be 2-dimensional')
|
|
289
|
+
if n < 1:
|
|
290
|
+
raise ValueError('n must be >= 1')
|
|
291
|
+
|
|
292
|
+
# Pad with NaN border if not already present
|
|
293
|
+
if not np.any(np.isnan(array)):
|
|
294
|
+
array = np.pad(array.astype(float), pad_width=1, mode='constant',
|
|
295
|
+
constant_values=np.nan)
|
|
296
|
+
if x_sizes is not None:
|
|
297
|
+
x_sizes = np.pad(x_sizes, pad_width=1, mode='edge')
|
|
298
|
+
if y_sizes is not None:
|
|
299
|
+
y_sizes = np.pad(y_sizes, pad_width=1, mode='edge')
|
|
300
|
+
|
|
301
|
+
# Remove border-touching structures and clean NaN
|
|
302
|
+
cleaned = remove_structures_touching_border_nan(array)
|
|
303
|
+
binary = np.nan_to_num(cleaned, nan=0.0).astype(bool)
|
|
304
|
+
|
|
305
|
+
# Isolate the Nth largest structure
|
|
306
|
+
isolated = isolate_nth_largest_structure(binary, n=n)
|
|
307
|
+
|
|
308
|
+
# Crop to bounding box
|
|
309
|
+
rows = np.any(isolated, axis=1)
|
|
310
|
+
cols = np.any(isolated, axis=0)
|
|
311
|
+
rmin, rmax = np.where(rows)[0][[0, -1]]
|
|
312
|
+
cmin, cmax = np.where(cols)[0][[0, -1]]
|
|
313
|
+
cropped = isolated[rmin:rmax + 1, cmin:cmax + 1].astype(np.float64)
|
|
314
|
+
|
|
315
|
+
# Pad with 1px of zeros to prevent toroidal wrap artifacts
|
|
316
|
+
padded = np.pad(cropped, pad_width=1, mode='constant', constant_values=0)
|
|
317
|
+
|
|
318
|
+
# Handle pixel sizes
|
|
319
|
+
if x_sizes is not None:
|
|
320
|
+
cropped_x = x_sizes[rmin:rmax + 1, cmin:cmax + 1]
|
|
321
|
+
cropped_x = np.pad(cropped_x, pad_width=1, mode='edge')
|
|
322
|
+
else:
|
|
323
|
+
cropped_x = None
|
|
324
|
+
|
|
325
|
+
if y_sizes is not None:
|
|
326
|
+
cropped_y = y_sizes[rmin:rmax + 1, cmin:cmax + 1]
|
|
327
|
+
cropped_y = np.pad(cropped_y, pad_width=1, mode='edge')
|
|
328
|
+
else:
|
|
329
|
+
cropped_y = None
|
|
330
|
+
|
|
331
|
+
# Compute maxlength from crop dimensions if auto
|
|
332
|
+
if maxlength == 'auto':
|
|
333
|
+
if cropped_x is not None and cropped_y is not None:
|
|
334
|
+
locs_x, locs_y = get_locations_from_pixel_sizes(cropped_x, cropped_y)
|
|
335
|
+
h, w = cropped_x.shape
|
|
336
|
+
phys_width = locs_x[h // 2, w - 1] - locs_x[h // 2, 0]
|
|
337
|
+
phys_height = locs_y[h - 1, w // 2] - locs_y[0, w // 2]
|
|
338
|
+
maxlength = 0.33 * max(phys_width, phys_height)
|
|
339
|
+
else:
|
|
340
|
+
crop_h, crop_w = cropped.shape
|
|
341
|
+
maxlength = 0.33 * float(max(crop_h, crop_w))
|
|
342
|
+
|
|
343
|
+
return ensemble_correlation_dimension(
|
|
344
|
+
[padded],
|
|
345
|
+
x_sizes=cropped_x,
|
|
346
|
+
y_sizes=cropped_y,
|
|
347
|
+
minlength=minlength,
|
|
348
|
+
maxlength=maxlength,
|
|
349
|
+
interior_circles_only=False,
|
|
350
|
+
return_C_l=return_C_l,
|
|
351
|
+
point_reduction_factor=point_reduction_factor,
|
|
352
|
+
nbins=nbins,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
|
|
207
356
|
def ensemble_box_dimension(
|
|
208
357
|
binary_arrays: NDArray | list[NDArray],
|
|
209
358
|
set: str = 'edge',
|
|
@@ -597,56 +746,81 @@ def get_locations_from_pixel_sizes(
|
|
|
597
746
|
|
|
598
747
|
|
|
599
748
|
@numba.njit(parallel=True)
|
|
600
|
-
def correlation_integral(
|
|
749
|
+
def correlation_integral(centers_phys, sorted_boundary_phys, bins_sq, max_bin):
|
|
601
750
|
"""
|
|
602
751
|
Calculate correlation integral for boundary coordinates.
|
|
603
752
|
|
|
604
|
-
For each
|
|
605
|
-
|
|
753
|
+
For each center, count how many boundary points are within each distance
|
|
754
|
+
threshold. Uses physical coordinates directly for correct bounding on any
|
|
755
|
+
grid geometry (including non-uniform and 2D-varying pixel sizes).
|
|
606
756
|
|
|
607
757
|
Parameters
|
|
608
758
|
----------
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
618
|
-
1-D array of distances to bin.
|
|
759
|
+
centers_phys : np.ndarray
|
|
760
|
+
Physical (x, y) coordinates of circle centers, shape (N, 2).
|
|
761
|
+
sorted_boundary_phys : np.ndarray
|
|
762
|
+
Physical (x, y) coordinates of boundary points, shape (M, 2),
|
|
763
|
+
sorted by y coordinate.
|
|
764
|
+
bins_sq : np.ndarray
|
|
765
|
+
1-D array of squared distance thresholds (bins**2), sorted ascending.
|
|
766
|
+
max_bin : float
|
|
767
|
+
Maximum bin distance (sqrt of bins_sq[-1]).
|
|
619
768
|
|
|
620
769
|
Returns
|
|
621
770
|
-------
|
|
622
771
|
C_l : np.ndarray
|
|
623
|
-
Correlation integral values, same shape as
|
|
772
|
+
Correlation integral values, same shape as bins_sq.
|
|
624
773
|
|
|
625
774
|
Notes
|
|
626
775
|
-----
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
776
|
+
Uses three optimizations over the naive approach:
|
|
777
|
+
1. Bounding box in physical space via sort + binary search on y coordinate.
|
|
778
|
+
2. Squared distances to avoid sqrt in the inner loop.
|
|
779
|
+
3. Binary-search binning with forward cumulative sum instead of linear scan.
|
|
630
780
|
"""
|
|
631
|
-
|
|
632
|
-
|
|
781
|
+
sorted_y = sorted_boundary_phys[:, 1].copy()
|
|
782
|
+
num_bins = bins_sq.shape[0]
|
|
633
783
|
|
|
634
|
-
|
|
784
|
+
# Each thread gets its own histogram to eliminate race conditions
|
|
785
|
+
hist_per_thread = np.zeros((numba.config.NUMBA_NUM_THREADS, num_bins))
|
|
786
|
+
|
|
787
|
+
for i in numba.prange(centers_phys.shape[0]):
|
|
635
788
|
thread_id = numba.get_thread_id()
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
789
|
+
cx = centers_phys[i, 0]
|
|
790
|
+
cy = centers_phys[i, 1]
|
|
791
|
+
|
|
792
|
+
# Binary search for physical y bounding box
|
|
793
|
+
lo = np.searchsorted(sorted_y, cy - max_bin, side='left')
|
|
794
|
+
hi = np.searchsorted(sorted_y, cy + max_bin, side='right')
|
|
795
|
+
|
|
796
|
+
for j in range(lo, hi):
|
|
797
|
+
bx = sorted_boundary_phys[j, 0]
|
|
798
|
+
|
|
799
|
+
# Physical x bounding box check
|
|
800
|
+
if abs(bx - cx) > max_bin:
|
|
801
|
+
continue
|
|
802
|
+
|
|
803
|
+
dx = cx - bx
|
|
804
|
+
dy = cy - sorted_boundary_phys[j, 1]
|
|
805
|
+
dist_sq = dx * dx + dy * dy
|
|
639
806
|
|
|
640
|
-
|
|
641
|
-
|
|
807
|
+
# Binary search: find first bin where bins_sq[bin_idx] > dist_sq
|
|
808
|
+
bin_idx = np.searchsorted(bins_sq, dist_sq, side='right')
|
|
809
|
+
if bin_idx < num_bins:
|
|
810
|
+
hist_per_thread[thread_id, bin_idx] += 1
|
|
642
811
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
if distance < bin:
|
|
646
|
-
C_l_per_thread[thread_id, bin_index] += 1
|
|
812
|
+
# Sum across threads
|
|
813
|
+
hist = np.sum(hist_per_thread, axis=0)
|
|
647
814
|
|
|
648
|
-
#
|
|
649
|
-
|
|
815
|
+
# Forward cumulative sum: convert histogram to cumulative counts
|
|
816
|
+
# hist[k] = count of pairs whose first qualifying bin index is k
|
|
817
|
+
# C_l[k] = count of pairs with dist_sq < bins_sq[k] = sum of hist[0..k]
|
|
818
|
+
C_l = np.zeros(num_bins)
|
|
819
|
+
cumsum = 0.0
|
|
820
|
+
for k in range(num_bins):
|
|
821
|
+
cumsum += hist[k]
|
|
822
|
+
C_l[k] = cumsum
|
|
823
|
+
return C_l
|
|
650
824
|
|
|
651
825
|
|
|
652
826
|
def coarsen_array(array: NDArray, factor: int) -> NDArray:
|
|
@@ -773,37 +947,54 @@ def total_number(
|
|
|
773
947
|
return n_structures
|
|
774
948
|
|
|
775
949
|
|
|
776
|
-
def
|
|
950
|
+
def isolate_nth_largest_structure(
|
|
777
951
|
binary_array: NDArray,
|
|
952
|
+
n: int = 1,
|
|
778
953
|
structure: NDArray = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
|
|
779
954
|
) -> NDArray[np.bool_]:
|
|
780
955
|
"""
|
|
781
|
-
Isolate the largest connected structure in a binary array.
|
|
956
|
+
Isolate the Nth largest connected structure in a binary array.
|
|
782
957
|
|
|
783
958
|
Parameters
|
|
784
959
|
----------
|
|
785
960
|
binary_array : np.ndarray
|
|
786
961
|
Binary input array.
|
|
962
|
+
n : int, default=1
|
|
963
|
+
Which structure to return, ranked by pixel count (1 = largest).
|
|
787
964
|
structure : np.ndarray, default=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
|
|
788
965
|
Connectivity structure.
|
|
789
966
|
|
|
790
967
|
Returns
|
|
791
968
|
-------
|
|
792
969
|
np.ndarray
|
|
793
|
-
Boolean array with only the largest structure set to True.
|
|
970
|
+
Boolean array with only the Nth largest structure set to True.
|
|
794
971
|
|
|
795
972
|
Raises
|
|
796
973
|
------
|
|
797
974
|
ValueError
|
|
798
|
-
If ``binary_array`` contains no structures
|
|
975
|
+
If ``binary_array`` contains no structures or ``n`` exceeds the number
|
|
976
|
+
of structures.
|
|
799
977
|
"""
|
|
978
|
+
if n < 1:
|
|
979
|
+
raise ValueError('n must be >= 1')
|
|
800
980
|
labelled_array = label(binary_array, structure)[0]
|
|
801
981
|
cloud_values = labelled_array[labelled_array != 0] # remove background
|
|
802
982
|
if cloud_values.size == 0:
|
|
803
983
|
raise ValueError('binary_array contains no structures')
|
|
804
984
|
values, counts = np.unique(cloud_values, return_counts=True)
|
|
805
|
-
|
|
806
|
-
|
|
985
|
+
sorted_indices = np.argsort(counts)[::-1]
|
|
986
|
+
if n > len(values):
|
|
987
|
+
raise ValueError(f'Requested n={n} but only {len(values)} structures exist')
|
|
988
|
+
selected = values[sorted_indices[n - 1]]
|
|
989
|
+
return labelled_array == selected
|
|
990
|
+
|
|
991
|
+
|
|
992
|
+
def isolate_largest_structure(*args, **kwargs):
|
|
993
|
+
"""Removed. Use :func:`isolate_nth_largest_structure` instead."""
|
|
994
|
+
raise NotImplementedError(
|
|
995
|
+
'isolate_largest_structure has been renamed to '
|
|
996
|
+
'isolate_nth_largest_structure(binary_array, n=1)'
|
|
997
|
+
)
|
|
807
998
|
|
|
808
999
|
|
|
809
1000
|
def label_size(
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: objscale
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Object-based analysis functions for fractal dimensions and size distributions
|
|
5
5
|
Author-email: Thomas DeWitt <thomas.dewitt@utah.edu>
|
|
6
6
|
License: MIT
|
|
@@ -39,6 +39,9 @@ Requires-Dist: sphinx-rtd-theme; extra == "docs"
|
|
|
39
39
|
Requires-Dist: numpydoc; extra == "docs"
|
|
40
40
|
Dynamic: license-file
|
|
41
41
|
|
|
42
|
+

|
|
43
|
+
*Figure from DeWitt et al. (2025)*
|
|
44
|
+
|
|
42
45
|
# objscale
|
|
43
46
|
|
|
44
47
|
Object-based analysis functions for fractal dimensions and size distributions in atmospheric sciences and beyond. Optimized for large datasets.
|
|
@@ -52,6 +55,8 @@ The package implements methods from two main papers:
|
|
|
52
55
|
- [DeWitt & Garrett (2024)](https://acp.copernicus.org/articles/24/8457/2024/acp-24-8457-2024.html) - finite domain effects in size distributions
|
|
53
56
|
- [DeWitt et al. (2025)](https://egusphere.copernicus.org/preprints/2025/egusphere-2025-3486/) - fractal dimensions for cloud field characterization
|
|
54
57
|
|
|
58
|
+
See the [interactive scaling explorer](https://thomasddewitt.com/visuals-and-tools/scaling-explorer/index.html) to visualize how the correlation dimension is computed.
|
|
59
|
+
|
|
55
60
|
## Key Functions
|
|
56
61
|
|
|
57
62
|
### `finite_array_powerlaw_exponent`
|
|
@@ -98,8 +103,6 @@ mkdir -p ~/.codex/skills/objscale
|
|
|
98
103
|
cp .claude/skills/objscale/SKILL.md ~/.codex/skills/objscale/
|
|
99
104
|
```
|
|
100
105
|
|
|
101
|
-
|
|
102
|
-
|
|
103
106
|
## Quick Example
|
|
104
107
|
|
|
105
108
|
```python
|
|
@@ -157,7 +160,7 @@ ind_dim, ind_error = objscale.individual_fractal_dimension(arrays)
|
|
|
157
160
|
- `get_structure_props` - Calculate perimeter, area, width, height of structures
|
|
158
161
|
- `total_perimeter` - Total perimeter of all objects
|
|
159
162
|
- `total_number` - Count number of structures
|
|
160
|
-
- `
|
|
163
|
+
- `isolate_nth_largest_structure` - Extract the Nth largest connected structure
|
|
161
164
|
- `remove_structures_touching_border_nan` - Remove border-touching structures
|
|
162
165
|
- `remove_structure_holes` - Fill holes in structures
|
|
163
166
|
- `clear_border_adjacent` - Clear structures touching array edges
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|