objscale 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- objscale/__init__.py +56 -0
- objscale/_fractal_dimensions.py +632 -0
- objscale/_object_analysis.py +285 -0
- objscale/_size_distributions.py +249 -0
- objscale/_utils.py +40 -0
- objscale-0.1.0.dist-info/METADATA +142 -0
- objscale-0.1.0.dist-info/RECORD +10 -0
- objscale-0.1.0.dist-info/WHEEL +5 -0
- objscale-0.1.0.dist-info/licenses/LICENSE +9 -0
- objscale-0.1.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,285 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from scipy.ndimage import label
|
|
3
|
+
from numba import njit
|
|
4
|
+
from numba.typed import List
|
|
5
|
+
from warnings import warn
|
|
6
|
+
from skimage.segmentation import clear_border
|
|
7
|
+
from ._utils import encase_in_value
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def get_structure_props(array, x_sizes, y_sizes, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), print_none=False, wrap=None):
|
|
11
|
+
"""
|
|
12
|
+
Input:
|
|
13
|
+
array - Binary array of strc: 2-d np.ndarray, padded with 0's or np.nan's
|
|
14
|
+
x_sizes = Sizes of pixels in horizontal direction, same shape as array: 2-d np.ndarray
|
|
15
|
+
y_sizes = Sizes of pixels in vertical direction, same shape as array: 2-d np.ndarray
|
|
16
|
+
structure = Defines connectivity
|
|
17
|
+
print_none = Print message if no structures found
|
|
18
|
+
wrap = None, 'sides', 'both':
|
|
19
|
+
if 'sides', connect structures that span the left/right edge
|
|
20
|
+
if 'both', connect structures that span the left/right edge and top/bottom edge
|
|
21
|
+
Output:
|
|
22
|
+
perimeter, area, height, width: 1-D np.ndarrays, each element the perimeter/area/height/width of an individual structure
|
|
23
|
+
|
|
24
|
+
Note: if x_sizes or y_sizes are not uniform, the width will be the sum of the average pixel widths of the pixels in the column and in the object.
|
|
25
|
+
Similarly, the height will be the sum of the average pixel heights of the pixels in the row and in the object.
|
|
26
|
+
Given a array and the sizes of each pixel in each direction, calculate properties of structures.
|
|
27
|
+
Any perimeter between structure and nan is not counted.
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
if array.shape != x_sizes.shape or array.shape != y_sizes.shape:
|
|
31
|
+
raise ValueError('array, x_sizes, and y_sizes must all be same shape. Currently {},{},{}'.format(array.shape, x_sizes.shape, y_sizes.shape))
|
|
32
|
+
|
|
33
|
+
if np.count_nonzero((np.isnan(x_sizes) | np.isnan(y_sizes)) & np.isfinite(array)):
|
|
34
|
+
raise ValueError('x or y sizes are nan in locations where array is not')
|
|
35
|
+
|
|
36
|
+
# if 1 in array[0] or 1 in array[:,0] or 1 in array[-1] or 1 in array[:,-1]: raise ValueError('array must be padded with 0s or nans.')
|
|
37
|
+
no_nans = array.copy()
|
|
38
|
+
no_nans[np.isnan(array)] = 0 # so we don't consider nans structures and also so they don't connect multiple structures
|
|
39
|
+
if np.count_nonzero(no_nans) == 0:
|
|
40
|
+
if print_none: print('No structures found')
|
|
41
|
+
return np.array([]),np.array([]),np.array([]),np.array([])
|
|
42
|
+
labelled_array, n_structures = label(no_nans.astype(bool), structure, output=np.float32) # creates array where every unique structure is composed of a unique number, 1 to n_structures
|
|
43
|
+
|
|
44
|
+
if wrap is None: pass
|
|
45
|
+
elif wrap == 'both' or wrap == 'sides':
|
|
46
|
+
labelled_array = label_periodic_boundaries(labelled_array, wrap)
|
|
47
|
+
else: raise ValueError(f'wrap={wrap} not supported')
|
|
48
|
+
|
|
49
|
+
# Flatten arrays to find their indices.
|
|
50
|
+
values = np.sort(labelled_array.flatten())
|
|
51
|
+
original_locations = np.argsort(labelled_array.flatten()) # Get indices where the original values were
|
|
52
|
+
indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T # convert flattened indices to 2-d
|
|
53
|
+
|
|
54
|
+
labelled_array[np.isnan(array)] = np.nan # Turn this back to nan so perimeter along it is not included
|
|
55
|
+
split_here = np.roll(values, shift=-1)-values # Split where the values changed.
|
|
56
|
+
split_here[-1] = 0 # Last value rolled over from first
|
|
57
|
+
|
|
58
|
+
separated_structure_indices = np.split(indices_2d, np.where(split_here!=0)[0]+1)
|
|
59
|
+
separated_structure_indices = separated_structure_indices[1:] # Remove the locations that were 0 (not structure)
|
|
60
|
+
if len(separated_structure_indices) == 0: return np.array([]),np.array([]),np.array([]),np.array([])
|
|
61
|
+
|
|
62
|
+
# must use numba.typed.List here for some reason https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list
|
|
63
|
+
p, a, h, w = _get_structure_props_helper(labelled_array, List(separated_structure_indices), x_sizes, y_sizes)
|
|
64
|
+
nanmask = np.logical_or(np.logical_or(np.isnan(p), np.isnan(a)), np.logical_or(np.isnan(h), np.isnan(w)))
|
|
65
|
+
if np.count_nonzero(nanmask) > 0: raise ValueError('Nan values found: {} out of {}'.format(np.count_nonzero(nanmask), len(p)))
|
|
66
|
+
p, a, h, w = np.array(p), np.array(a), np.array(h), np.array(w)
|
|
67
|
+
p, a, h, w = p[~nanmask], a[~nanmask],h[~nanmask], w[~nanmask]
|
|
68
|
+
return p,a,h,w
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
@njit()
|
|
72
|
+
def _get_structure_props_helper(labelled_array, separated_structure_indices, x_sizes, y_sizes):
|
|
73
|
+
|
|
74
|
+
p, a, = [],[]
|
|
75
|
+
h, w = [],[]
|
|
76
|
+
|
|
77
|
+
for indices in separated_structure_indices:
|
|
78
|
+
perimeter = 0
|
|
79
|
+
area = 0
|
|
80
|
+
|
|
81
|
+
y_coords_structure = np.array([c[0] for c in indices])
|
|
82
|
+
x_coords_structure = np.array([c[1] for c in indices])
|
|
83
|
+
unique_y_coords = []
|
|
84
|
+
unique_x_coords = []
|
|
85
|
+
height = 0
|
|
86
|
+
width = 0
|
|
87
|
+
|
|
88
|
+
for (i,j) in indices:
|
|
89
|
+
# Height, Width
|
|
90
|
+
if i not in unique_y_coords:
|
|
91
|
+
unique_y_coords.append(i)
|
|
92
|
+
indices = (y_coords_structure==i)
|
|
93
|
+
y_sizes_here = []
|
|
94
|
+
for loc,take in enumerate(indices):
|
|
95
|
+
if take: y_sizes_here.append(y_sizes[y_coords_structure[loc],x_coords_structure[loc]])
|
|
96
|
+
y_sizes_here = np.array(y_sizes_here)
|
|
97
|
+
height += np.mean(y_sizes_here)
|
|
98
|
+
if j not in unique_x_coords:
|
|
99
|
+
unique_x_coords.append(j)
|
|
100
|
+
indices = (x_coords_structure==j)
|
|
101
|
+
x_sizes_here = []
|
|
102
|
+
for loc,take in enumerate(indices):
|
|
103
|
+
if take: x_sizes_here.append(x_sizes[y_coords_structure[loc],x_coords_structure[loc]])
|
|
104
|
+
x_sizes_here = np.array(x_sizes_here)
|
|
105
|
+
width += np.mean(x_sizes_here)
|
|
106
|
+
|
|
107
|
+
# Perimeter:
|
|
108
|
+
if i != labelled_array.shape[0]-1 and labelled_array[i+1, j] == 0: perimeter += x_sizes[i,j]
|
|
109
|
+
elif i == labelled_array.shape[0]-1 and labelled_array[0, j] == 0: perimeter += x_sizes[i,j]
|
|
110
|
+
|
|
111
|
+
if i != 0 and labelled_array[i-1, j] == 0: perimeter += x_sizes[i,j]
|
|
112
|
+
elif i == 0 and labelled_array[labelled_array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j]
|
|
113
|
+
|
|
114
|
+
if j != labelled_array.shape[1]-1 and labelled_array[i, j+1] == 0: perimeter += y_sizes[i,j]
|
|
115
|
+
elif j == labelled_array.shape[1]-1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]
|
|
116
|
+
|
|
117
|
+
if j != 0 and labelled_array[i, j-1] == 0: perimeter += y_sizes[i,j]
|
|
118
|
+
elif j == 0 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]
|
|
119
|
+
|
|
120
|
+
# Area:
|
|
121
|
+
area += y_sizes[i,j] * x_sizes[i,j]
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
if area != 0:
|
|
125
|
+
p.append(perimeter)
|
|
126
|
+
a.append(area)
|
|
127
|
+
h.append(height)
|
|
128
|
+
w.append(width)
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
return p, a, h, w
|
|
132
|
+
|
|
133
|
+
|
|
134
|
+
def label_periodic_boundaries(labelled_array, wrap):
|
|
135
|
+
"""
|
|
136
|
+
This functions makes labelled structures that span the edge have the same label.
|
|
137
|
+
|
|
138
|
+
Parameters:
|
|
139
|
+
labelled_array (numpy.ndarray): A 2D array where each unique non-zero element represents a distinct label. Should be the output of scipy.ndimage.label().
|
|
140
|
+
wrap (str): A string that determines how the boundaries of the array should be wrapped.
|
|
141
|
+
It can take three values: 'sides', 'both', or any other string.
|
|
142
|
+
|
|
143
|
+
If 'wrap' is 'sides' or 'both':
|
|
144
|
+
The function sets the labels on the right boundary to be the same as those on the left boundary.
|
|
145
|
+
|
|
146
|
+
If 'wrap' is 'both':
|
|
147
|
+
The function also sets the labels on the top boundary to be the same as those on the bottom boundary.
|
|
148
|
+
|
|
149
|
+
If 'wrap' is neither 'sides' nor 'both':
|
|
150
|
+
The function raises a ValueError.
|
|
151
|
+
|
|
152
|
+
Returns:
|
|
153
|
+
labelled_array (numpy.ndarray): The input array with its periodic boundaries labelled as per the 'wrap' parameter.
|
|
154
|
+
|
|
155
|
+
Raises:
|
|
156
|
+
ValueError: If 'wrap' is neither 'sides' nor 'both'.
|
|
157
|
+
"""
|
|
158
|
+
if wrap == 'sides' or wrap == 'both':
|
|
159
|
+
# set those on right to the same i.d. as those on left
|
|
160
|
+
for j,value in enumerate(labelled_array[:,0]):
|
|
161
|
+
if value != 0:
|
|
162
|
+
if labelled_array[j, labelled_array.shape[1]-1] != 0 and labelled_array[j, labelled_array.shape[1]-1] != value:
|
|
163
|
+
# want not a structure and not already changed
|
|
164
|
+
labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1]-1]] = value # set to same identification number
|
|
165
|
+
|
|
166
|
+
if wrap == 'both':
|
|
167
|
+
# set those on top to the same i.d. as those on bottom
|
|
168
|
+
for i,value in enumerate(labelled_array[0,:]):
|
|
169
|
+
if value != 0:
|
|
170
|
+
if labelled_array[labelled_array.shape[0]-1,i] != 0 and labelled_array[labelled_array.shape[0]-1,i] != value:
|
|
171
|
+
# want not a structure and not already changed
|
|
172
|
+
labelled_array[labelled_array == labelled_array[labelled_array.shape[0]-1,i]] = value # set to same identification number
|
|
173
|
+
if wrap not in ['sides','both']: raise ValueError(f'wrap = {wrap} not supported')
|
|
174
|
+
return labelled_array
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def get_every_boundary_perimeter(array, x_sizes, y_sizes, return_nlevels=False):
|
|
178
|
+
"""
|
|
179
|
+
Return perimeters of each boundary between 0s and 1s.
|
|
180
|
+
where each individual boundary between 0s and 1s in the array is a unique value.
|
|
181
|
+
Ex: a donut of 1s gives 2 values for interior perimeters
|
|
182
|
+
|
|
183
|
+
Array should only contain 0s and 1s
|
|
184
|
+
"""
|
|
185
|
+
perimeters = []
|
|
186
|
+
counter = 0
|
|
187
|
+
while np.nansum(array) != 0:
|
|
188
|
+
counter += 1
|
|
189
|
+
if counter > 100: raise ValueError('Hole layer limit reached: 100 layers')
|
|
190
|
+
all_holes_filled = remove_structure_holes(array)
|
|
191
|
+
exterior_perimeters, _, _, _ = get_structure_props(encase_in_value(all_holes_filled), encase_in_value(x_sizes), encase_in_value(y_sizes))
|
|
192
|
+
perimeters.extend(exterior_perimeters)
|
|
193
|
+
|
|
194
|
+
# remove one layer
|
|
195
|
+
new_array = all_holes_filled - array
|
|
196
|
+
new_array[all_holes_filled == 0] = 0
|
|
197
|
+
array = new_array
|
|
198
|
+
# Now what were previously holes are clouds. What were previously clouds in holes are now holes in the "new" clouds.
|
|
199
|
+
if return_nlevels: return perimeters, counter
|
|
200
|
+
return perimeters
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def remove_structures_touching_border_nan(array):
|
|
204
|
+
"""
|
|
205
|
+
Input:
|
|
206
|
+
array: 2-D np.ndarray consisting of 0s, 1s, and np.nan. All values at the array edge should be np.nan
|
|
207
|
+
Output:
|
|
208
|
+
2-D np.ndarray consisting of 0s, 1s, and np.nan with any structure in contact with the nan
|
|
209
|
+
values around the outer edge of the good data removed
|
|
210
|
+
"in contact" is defined using adjacent connectivity, i.e. 4-connectivity
|
|
211
|
+
|
|
212
|
+
"""
|
|
213
|
+
if array.ndim != 2: raise ValueError('array not 2-dimensional')
|
|
214
|
+
|
|
215
|
+
nanmask = np.isnan(array).astype(int)
|
|
216
|
+
edge_nan_mask = (nanmask - clear_border_adjacent(nanmask)).astype(bool)
|
|
217
|
+
|
|
218
|
+
with_edge = array.copy()
|
|
219
|
+
with_edge[edge_nan_mask] = 1
|
|
220
|
+
|
|
221
|
+
cleared = clear_border_adjacent(with_edge).astype(float)
|
|
222
|
+
cleared[edge_nan_mask] = np.nan
|
|
223
|
+
cleared[np.isnan(array)] = np.nan
|
|
224
|
+
return cleared
|
|
225
|
+
|
|
226
|
+
|
|
227
|
+
def clear_border_adjacent(array, structure=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])):
|
|
228
|
+
"""
|
|
229
|
+
Input:
|
|
230
|
+
array: 2-D np.ndarray consisting of 0s and 1s
|
|
231
|
+
Output:
|
|
232
|
+
2-D np.ndarray consisting of 0s and 1s with border structures removed
|
|
233
|
+
|
|
234
|
+
Remove connected regions that touch the edge, using a connectivity determined
|
|
235
|
+
by structure. Similar to skimage.segmentation.clear_border but structure
|
|
236
|
+
can be changed.
|
|
237
|
+
|
|
238
|
+
Examples:
|
|
239
|
+
[[0,0,0,0], [[0,0,0,0], [[0,0,0,0],
|
|
240
|
+
[0,1,1,0], [0,1,1,0], [0,1,0,0],
|
|
241
|
+
[0,0,0,1], [0,0,1,1], [1,0,0,0],
|
|
242
|
+
[0,0,0,0]] [0,0,0,0]] [0,0,0,0]]
|
|
243
|
+
so ex 1 and 3 would still have one cloud in output but ex 2 would have 0
|
|
244
|
+
for a structure of np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]).
|
|
245
|
+
"""
|
|
246
|
+
border_cleared = clear_border(label(array.astype(bool), structure)[0])
|
|
247
|
+
border_cleared[border_cleared > 0] = 1
|
|
248
|
+
return border_cleared.astype(bool)
|
|
249
|
+
|
|
250
|
+
|
|
251
|
+
def remove_structure_holes(array, periodic=False):
|
|
252
|
+
"""
|
|
253
|
+
Fills in all holes in all structures within array.
|
|
254
|
+
|
|
255
|
+
Set any value of 0 that is not connected to the largest connected structure of 0s (the background) to 1.
|
|
256
|
+
|
|
257
|
+
Assume the largest contiguous area of 0s is the "background".
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
Input:
|
|
261
|
+
array: 2D np.ndarray with values either 0,1, or np.nan
|
|
262
|
+
periodic: False, 'both', 'sides':
|
|
263
|
+
For structures lying along the boundary, if periodic=False, the behavior is as if the array was padded with 1's, i.e. holes that are connected to the edge are filled.
|
|
264
|
+
|
|
265
|
+
Output: filled array
|
|
266
|
+
"""
|
|
267
|
+
if type(array) != np.ndarray: raise ValueError('array must be a np.ndarray object')
|
|
268
|
+
filled = array.copy()
|
|
269
|
+
filled[np.isnan(filled)] = 0
|
|
270
|
+
if np.any(filled>1): raise ValueError('array can only have values 0, 1, or np.nan')
|
|
271
|
+
|
|
272
|
+
# invert and label
|
|
273
|
+
labelled, _ = label((1-filled))
|
|
274
|
+
if periodic != False: labelled = label_periodic_boundaries(labelled, periodic)
|
|
275
|
+
# largest structure will be the background or the cloudy areas.
|
|
276
|
+
unique_values, unique_counts = np.unique(labelled.flatten(), return_counts=True)
|
|
277
|
+
# Make sure we don't identify the cloudy areas as the background.
|
|
278
|
+
unique_counts, unique_values = unique_counts[unique_values!=0], unique_values[unique_values!=0]
|
|
279
|
+
label_of_background = unique_values[unique_counts.argmax()]
|
|
280
|
+
|
|
281
|
+
filled[(labelled != 0) & (labelled != label_of_background)] = 1
|
|
282
|
+
|
|
283
|
+
if np.count_nonzero(np.isnan(array))>0: filled[np.isnan(array)] = np.nan
|
|
284
|
+
|
|
285
|
+
return filled
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Functions for calculating size distributions in 2-D domains while taking into account finite size effects.
|
|
3
|
+
By Thomas DeWitt (https://github.com/thomasdewitt/)
|
|
4
|
+
"""
|
|
5
|
+
import numpy as np
|
|
6
|
+
from scipy.ndimage import label
|
|
7
|
+
from numba import njit
|
|
8
|
+
from numba.typed import List
|
|
9
|
+
from warnings import warn
|
|
10
|
+
from skimage.segmentation import clear_border
|
|
11
|
+
from ._object_analysis import remove_structures_touching_border_nan, get_structure_props, get_every_boundary_perimeter, label_periodic_boundaries
|
|
12
|
+
from ._utils import linear_regression, encase_in_value
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def finite_array_size_distribution(arrays, variable, x_sizes=None, y_sizes=None, bins=100, bin_logs=True, min_threshold=10, truncation_threshold=0.5):
|
|
16
|
+
"""
|
|
17
|
+
Calculate the size distributions for structures within a
|
|
18
|
+
list of binary arrays, where 'size' is perimeter, area, length, or width.
|
|
19
|
+
Returns the size distributions for truncated objects and nontruncated objects
|
|
20
|
+
and the index where truncated object begin to dominate.
|
|
21
|
+
|
|
22
|
+
Works for binary arrays and also for binary arrays where the data boundary is
|
|
23
|
+
demarcated by nans. This enables the domain boundary to be an arbitrary shape,
|
|
24
|
+
rather than be rectangular (as is the case for a binary array).
|
|
25
|
+
|
|
26
|
+
Input:
|
|
27
|
+
- arrays: 2-D np.ndarray or list of 2-D np.ndarray, where objects of interest have value 1,
|
|
28
|
+
the background has value 0, and no data has np.nan.
|
|
29
|
+
Interior nans are treated like 0's, except the perimeter along them is not counted.
|
|
30
|
+
- variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
|
|
31
|
+
- x_sizes, y_sizes:
|
|
32
|
+
pixel sizes in the x and y directions.
|
|
33
|
+
If None, assume all pixel dimensions are 1.
|
|
34
|
+
If np.ndarray, use these for each array in 'arrays'
|
|
35
|
+
If list, assume x_sizes[i] corresponds to arrays[i], etc, for all i
|
|
36
|
+
- bins: int or 1-D array:
|
|
37
|
+
if int, auto calculate bin locations, make that number of bins
|
|
38
|
+
if 1-D array: use these as bin edges or log10(bin edges). They must be uniformly
|
|
39
|
+
linearly or logarithmically spaced (depending on bin_logs)
|
|
40
|
+
- bin_logs: T/F: if True, bin log10(variable) into logarithmically-spaced bins. If False, bin
|
|
41
|
+
variable into linearly spaced bins (if bins are explicitely passed, use these in any case)
|
|
42
|
+
- min_threshold: smallest bin edge. If bin edges are passed, this arg is ignored.
|
|
43
|
+
- truncation_threshold: float between 0 and 1. Bins with a larger fraction of truncated objects than this are omitted from the regression
|
|
44
|
+
Output:
|
|
45
|
+
- bin_middles, nontruncated_counts, truncated_counts, truncation_index
|
|
46
|
+
Note: if bin_logs is True, bin middles is actually log10(bin_middles)
|
|
47
|
+
|
|
48
|
+
Notes:
|
|
49
|
+
|
|
50
|
+
'variable' definitions:
|
|
51
|
+
'perimeter': Sum of pixel edge lengths between all pixels within a structure and
|
|
52
|
+
neighboring values of 0. Does not include perimeter adjacent to a nan.
|
|
53
|
+
A donut shaped structure returns a single value.
|
|
54
|
+
'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
|
|
55
|
+
and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
|
|
56
|
+
A donut shaped structure returns two values: one for the inner circle and one for the outer.
|
|
57
|
+
'area': Sum of individual pixel areas constituting the structure
|
|
58
|
+
'length' or 'width': Overall distance between the farthest two points in a structure in
|
|
59
|
+
the x- or y- direction.
|
|
60
|
+
|
|
61
|
+
"""
|
|
62
|
+
if type(arrays) == np.ndarray: arrays = [arrays]
|
|
63
|
+
if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=bool)
|
|
64
|
+
if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=bool)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
if type(bins) == int:
|
|
68
|
+
if type(x_sizes) == list:
|
|
69
|
+
max_value = np.nansum(x_sizes[0]*y_sizes[0])
|
|
70
|
+
else:
|
|
71
|
+
max_value = np.nansum(x_sizes*y_sizes)
|
|
72
|
+
if bin_logs: bin_edges = np.linspace(np.log10(min_threshold), np.log10(max_value), bins+1)
|
|
73
|
+
else: bin_edges = np.linspace(min_threshold, max_value, bins+1)
|
|
74
|
+
else: bin_edges = bins
|
|
75
|
+
|
|
76
|
+
truncated_counts = np.zeros(bin_edges.size-1)
|
|
77
|
+
nontruncated_counts = np.zeros(bin_edges.size-1)
|
|
78
|
+
|
|
79
|
+
for i in range(len(arrays)):
|
|
80
|
+
array = arrays[i]
|
|
81
|
+
if type(x_sizes) == list:
|
|
82
|
+
xs = x_sizes[i]
|
|
83
|
+
else:
|
|
84
|
+
xs = x_sizes
|
|
85
|
+
if type(y_sizes) == list:
|
|
86
|
+
ys = y_sizes[i]
|
|
87
|
+
else:
|
|
88
|
+
ys = y_sizes
|
|
89
|
+
|
|
90
|
+
# Encase the array in nans to ensure objects in contact with the edge are considered truncated
|
|
91
|
+
array = encase_in_value(array)
|
|
92
|
+
|
|
93
|
+
no_truncated = remove_structures_touching_border_nan(array)
|
|
94
|
+
truncated_only = array-no_truncated
|
|
95
|
+
|
|
96
|
+
truncated_counts += array_size_distribution(truncated_only, x_sizes=encase_in_value(xs), y_sizes=encase_in_value(ys), variable=variable, wrap=None, bins=bin_edges, bin_logs=bin_logs)[1]
|
|
97
|
+
nontruncated_counts += array_size_distribution(no_truncated, x_sizes=encase_in_value(xs), y_sizes=encase_in_value(ys), variable=variable, wrap=None, bins=bin_edges, bin_logs=bin_logs)[1]
|
|
98
|
+
|
|
99
|
+
# Find index where number of edge clouds is greater than threshold times total number of clouds
|
|
100
|
+
truncation_index = np.argwhere(truncated_counts>truncation_threshold*(truncated_counts+nontruncated_counts))
|
|
101
|
+
if truncation_index.size == 0: # then there is no need to truncate
|
|
102
|
+
truncation_index = len(bin_edges)
|
|
103
|
+
else: truncation_index = truncation_index[0,0]
|
|
104
|
+
|
|
105
|
+
bin_middles = bin_edges[:-1]+0.5*(bin_edges[1]-bin_edges[0]) # shift to center and remove value at end that shifted beyond bins
|
|
106
|
+
|
|
107
|
+
return bin_middles, nontruncated_counts, truncated_counts, truncation_index
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def finite_array_powerlaw_exponent(arrays, variable, x_sizes=None, y_sizes=None, bins=100, min_threshold=10, truncation_threshold=0.5, min_count_threshold=30, return_counts=False):
|
|
111
|
+
"""
|
|
112
|
+
Calculate the power-law exponent for size distributions of structures within a
|
|
113
|
+
list of binary arrays, where 'size' phi can be perimeter, area, length, or width:
|
|
114
|
+
|
|
115
|
+
n(phi) \propto phi^{-(1+exponent)}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
Works for binary arrays and also for binary arrays where the data boundary is
|
|
119
|
+
demarcated by nans. This enables the domain boundary to be an arbitrary shape,
|
|
120
|
+
rather than be rectangular (as is the case for a binary array).
|
|
121
|
+
|
|
122
|
+
Input:
|
|
123
|
+
- arrays: 2-D np.ndarray or list of 2-D np.ndarray, where objects of interest have value 1,
|
|
124
|
+
the background has value 0, and no data has np.nan.
|
|
125
|
+
Interior nans are treated like 0's, except the perimeter along them is not counted.
|
|
126
|
+
- variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
|
|
127
|
+
- x_sizes, y_sizes:
|
|
128
|
+
pixel sizes in the x and y directions.
|
|
129
|
+
If None, assume all pixel dimensions are 1.
|
|
130
|
+
If np.ndarray, use these for each array in 'arrays'
|
|
131
|
+
If list, assume x_sizes[i] corresponds to arrays[i], etc, for all i
|
|
132
|
+
- bins: int or 1-D array:
|
|
133
|
+
if int, auto calculate bin locations, make that number of bins
|
|
134
|
+
if 1-D array: use these as log10(bin edges). They must be uniformly logarithmically spaced.
|
|
135
|
+
- min_threshold: smallest bin edge. If bin edges are passed, this arg is ignored.
|
|
136
|
+
- min_count_threshold: Omit any bin with counts fewer than this value from the linear regression.
|
|
137
|
+
- truncation_threshold: float between 0 and 1. Bins with a larger fraction of truncated objects than this are omitted from the regression
|
|
138
|
+
Output:
|
|
139
|
+
if return_counts:
|
|
140
|
+
return (exponent, error), (log10(bin_middles), log10(good_counts))
|
|
141
|
+
where good_counts are the total counts for values smaller than the truncation threshold
|
|
142
|
+
error corresponding to 95% conf. interval
|
|
143
|
+
else:
|
|
144
|
+
return (exponent, error):
|
|
145
|
+
error corresponding to 95% conf. interval
|
|
146
|
+
|
|
147
|
+
Notes:
|
|
148
|
+
|
|
149
|
+
'variable' definitions:
|
|
150
|
+
'perimeter': Sum of pixel edge lengths between all pixels within a structure and
|
|
151
|
+
neighboring values of 0. Does not include perimeter adjacent to a nan.
|
|
152
|
+
A donut shaped structure returns a single value.
|
|
153
|
+
'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
|
|
154
|
+
and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
|
|
155
|
+
A donut shaped structure returns two values: one for the inner circle and one for the outer.
|
|
156
|
+
'area': Sum of individual pixel areas constituting the structure
|
|
157
|
+
'length' or 'width': Overall distance between the farthest two points in a structure in
|
|
158
|
+
the x- or y- direction.
|
|
159
|
+
"""
|
|
160
|
+
|
|
161
|
+
log_bin_middles, nontruncated_counts, truncated_counts, truncation_index = finite_array_size_distribution(arrays=arrays,
|
|
162
|
+
variable=variable,
|
|
163
|
+
x_sizes=x_sizes,
|
|
164
|
+
y_sizes=y_sizes,
|
|
165
|
+
bins=bins,
|
|
166
|
+
bin_logs=True,
|
|
167
|
+
min_threshold=min_threshold,
|
|
168
|
+
truncation_threshold=truncation_threshold)
|
|
169
|
+
|
|
170
|
+
total_good_counts = (truncated_counts+nontruncated_counts)
|
|
171
|
+
total_good_counts[truncation_index:] = np.nan # remove bins with too many truncated objects
|
|
172
|
+
|
|
173
|
+
total_good_counts[total_good_counts<min_count_threshold] = np.nan # remove bins with too few counts
|
|
174
|
+
|
|
175
|
+
if log_bin_middles[total_good_counts.size-1]-np.log10(min_threshold)<2:
|
|
176
|
+
warn(f'Power law exponent is being estimated using data spanning only {log_bin_middles[total_good_counts.size-1]-np.log10(min_threshold):.01f} orders of magnitude')
|
|
177
|
+
|
|
178
|
+
log_bin_middles[truncation_index:] = np.nan
|
|
179
|
+
|
|
180
|
+
total_good_counts[total_good_counts==0] = np.nan # eliminate log of 0 warning
|
|
181
|
+
|
|
182
|
+
log_total_good_counts = np.log10(total_good_counts)
|
|
183
|
+
|
|
184
|
+
(slope, _), (slope_error, _) = linear_regression(log_bin_middles, log_total_good_counts)
|
|
185
|
+
|
|
186
|
+
if return_counts:
|
|
187
|
+
return (-slope, slope_error), (log_bin_middles, log_total_good_counts)
|
|
188
|
+
return -slope, slope_error
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def array_size_distribution(array, variable='area', bins=30, bin_logs=True, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), wrap=None, x_sizes=None, y_sizes=None):
|
|
192
|
+
"""
|
|
193
|
+
Given a single binary array, calculate contiguous object sizes and bin them by area/perimeter/length/width
|
|
194
|
+
|
|
195
|
+
|
|
196
|
+
Input:
|
|
197
|
+
- array: 2-D np.ndarray, where objects of interest have value 1, the background has value 0, and no data has np.nan.
|
|
198
|
+
Nans are treated like 0's, except the perimeter along them is not counted.
|
|
199
|
+
- variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
|
|
200
|
+
- bins: int or 1-D array:
|
|
201
|
+
if int, auto calculate bin locations, make that number of bins
|
|
202
|
+
if 1-D array: use these as bin edges
|
|
203
|
+
- bin_logs: T/F: if True, bin log10(variable), else bin variable
|
|
204
|
+
- structure: 3x3 2-D np.ndarray: defines object connectivity
|
|
205
|
+
- wrap: None, 'sides, 'all:
|
|
206
|
+
if 'sides', connect structures that span the left/right edge
|
|
207
|
+
if 'all', connect structures that span the left/right edge and top/bottom edge
|
|
208
|
+
- x_sizes, y_sizes: 2-D np.ndarray of shape array.shape: lengths of pixels of array. If None, assume all lengths are 1
|
|
209
|
+
Output:
|
|
210
|
+
- bin_middles, counts: 1-D np.ndarrays of len(bins). If bin_logs, bin_middles will be log10(bin value)
|
|
211
|
+
|
|
212
|
+
Notes:
|
|
213
|
+
|
|
214
|
+
This function does not account for object truncation by the domain boundary.
|
|
215
|
+
|
|
216
|
+
'variable' definitions:
|
|
217
|
+
'perimeter': Sum of pixel edge lengths between all pixels within a structure and
|
|
218
|
+
neighboring values of 0. Does not include perimeter adjacent to a nan.
|
|
219
|
+
A donut shaped structure returns a single value.
|
|
220
|
+
'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
|
|
221
|
+
and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
|
|
222
|
+
A donut shaped structure returns two values: one for the inner circle and one for the outer.
|
|
223
|
+
'area': Sum of individual pixel areas constituting the structure
|
|
224
|
+
'length' or 'width': Overall distance between the farthest two points in a structure in
|
|
225
|
+
the x- or y- direction.
|
|
226
|
+
"""
|
|
227
|
+
if x_sizes is None: x_sizes = np.ones(array.shape, dtype=bool)
|
|
228
|
+
if y_sizes is None: y_sizes = np.ones(array.shape, dtype=bool)
|
|
229
|
+
if variable in ['area','perimeter','height','width']:
|
|
230
|
+
p, a, h, w = get_structure_props(array, x_sizes, y_sizes, structure, wrap=wrap)
|
|
231
|
+
if variable == 'area': to_bin = a
|
|
232
|
+
elif variable == 'perimeter': to_bin = p
|
|
233
|
+
elif variable == 'height': to_bin = h
|
|
234
|
+
elif variable == 'width': to_bin = w
|
|
235
|
+
elif variable == 'nested perimeter':
|
|
236
|
+
to_bin = get_every_boundary_perimeter(array, x_sizes, y_sizes, False)
|
|
237
|
+
else: raise ValueError(f'Unsupported variable: {variable}')
|
|
238
|
+
|
|
239
|
+
if bin_logs: to_bin = np.log10(to_bin)
|
|
240
|
+
|
|
241
|
+
if type(bins) == int: bin_edges = np.linspace(min(to_bin), max(to_bin), bins+1)
|
|
242
|
+
else: bin_edges = bins
|
|
243
|
+
|
|
244
|
+
if np.count_nonzero(to_bin>bin_edges[-1])>0: warn(f'There exist {variable}s outside of bin edges that are being ignored')
|
|
245
|
+
counts, _ = np.histogram(to_bin, bins=bin_edges)
|
|
246
|
+
|
|
247
|
+
bin_middles = bin_edges[:-1]+0.5*(bin_edges[1]-bin_edges[0]) # shift to center and remove value at end that shifted beyond bins
|
|
248
|
+
|
|
249
|
+
return bin_middles, counts
|
objscale/_utils.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
from warnings import warn
|
|
3
|
+
|
|
4
|
+
|
|
5
|
+
def linear_regression(x, y):
|
|
6
|
+
"""
|
|
7
|
+
Return (slope, y-int), (error_slope, error_y_int) for 95% conf
|
|
8
|
+
"""
|
|
9
|
+
if type(x) != np.ndarray or type(y) != np.ndarray: raise TypeError('x, y, must be of type np.ndarray')
|
|
10
|
+
index = np.isfinite(x) & np.isfinite(y)
|
|
11
|
+
if len(x[index]) <3: # "the number of data points must exceed order to scale the covariance matrix"
|
|
12
|
+
warn('Less than 3 points (x,y) are good (not nan), returning nans')
|
|
13
|
+
return (np.nan, np.nan),(np.nan, np.nan)
|
|
14
|
+
try:
|
|
15
|
+
coefficients, cov = np.polyfit(x[index], y[index], 1, cov=True)
|
|
16
|
+
error = np.sqrt(np.diag(cov))
|
|
17
|
+
except Exception as e:
|
|
18
|
+
warn('Linear regression failed, error message\n',' ',e)
|
|
19
|
+
return (np.nan, np.nan),(np.nan, np.nan)
|
|
20
|
+
return coefficients, 2*error # 95% conf interval is 2 times standard error
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def encase_in_value(array, value=np.nan, dtype=np.float32, n_deep=1):
|
|
24
|
+
"""
|
|
25
|
+
Input:
|
|
26
|
+
array: 2-D np.ndarray
|
|
27
|
+
value: value to append on the edge
|
|
28
|
+
dtype: dtype of the resulting array
|
|
29
|
+
Output:
|
|
30
|
+
array: Same as input but with a layer 'n_deep' of 'value' all around the edge: 2-D np.ndarray
|
|
31
|
+
"""
|
|
32
|
+
|
|
33
|
+
nans_lr = np.empty((array.shape[0],n_deep), dtype=dtype)
|
|
34
|
+
nans_tb = np.empty((n_deep, array.shape[1]+(2*n_deep)), dtype=dtype) # will be two bigger after first appends
|
|
35
|
+
nans_lr[:], nans_tb[:] = value, value
|
|
36
|
+
array = np.append(nans_lr, array, axis=1)
|
|
37
|
+
array = np.append(array, nans_lr, axis=1)
|
|
38
|
+
array = np.append(nans_tb, array, axis=0)
|
|
39
|
+
array = np.append(array, nans_tb, axis=0)
|
|
40
|
+
return array
|