objscale 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,285 @@
1
+ import numpy as np
2
+ from scipy.ndimage import label
3
+ from numba import njit
4
+ from numba.typed import List
5
+ from warnings import warn
6
+ from skimage.segmentation import clear_border
7
+ from ._utils import encase_in_value
8
+
9
+
10
+ def get_structure_props(array, x_sizes, y_sizes, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), print_none=False, wrap=None):
11
+ """
12
+ Input:
13
+ array - Binary array of strc: 2-d np.ndarray, padded with 0's or np.nan's
14
+ x_sizes = Sizes of pixels in horizontal direction, same shape as array: 2-d np.ndarray
15
+ y_sizes = Sizes of pixels in vertical direction, same shape as array: 2-d np.ndarray
16
+ structure = Defines connectivity
17
+ print_none = Print message if no structures found
18
+ wrap = None, 'sides', 'both':
19
+ if 'sides', connect structures that span the left/right edge
20
+ if 'both', connect structures that span the left/right edge and top/bottom edge
21
+ Output:
22
+ perimeter, area, height, width: 1-D np.ndarrays, each element the perimeter/area/height/width of an individual structure
23
+
24
+ Note: if x_sizes or y_sizes are not uniform, the width will be the sum of the average pixel widths of the pixels in the column and in the object.
25
+ Similarly, the height will be the sum of the average pixel heights of the pixels in the row and in the object.
26
+ Given a array and the sizes of each pixel in each direction, calculate properties of structures.
27
+ Any perimeter between structure and nan is not counted.
28
+ """
29
+
30
+ if array.shape != x_sizes.shape or array.shape != y_sizes.shape:
31
+ raise ValueError('array, x_sizes, and y_sizes must all be same shape. Currently {},{},{}'.format(array.shape, x_sizes.shape, y_sizes.shape))
32
+
33
+ if np.count_nonzero((np.isnan(x_sizes) | np.isnan(y_sizes)) & np.isfinite(array)):
34
+ raise ValueError('x or y sizes are nan in locations where array is not')
35
+
36
+ # if 1 in array[0] or 1 in array[:,0] or 1 in array[-1] or 1 in array[:,-1]: raise ValueError('array must be padded with 0s or nans.')
37
+ no_nans = array.copy()
38
+ no_nans[np.isnan(array)] = 0 # so we don't consider nans structures and also so they don't connect multiple structures
39
+ if np.count_nonzero(no_nans) == 0:
40
+ if print_none: print('No structures found')
41
+ return np.array([]),np.array([]),np.array([]),np.array([])
42
+ labelled_array, n_structures = label(no_nans.astype(bool), structure, output=np.float32) # creates array where every unique structure is composed of a unique number, 1 to n_structures
43
+
44
+ if wrap is None: pass
45
+ elif wrap == 'both' or wrap == 'sides':
46
+ labelled_array = label_periodic_boundaries(labelled_array, wrap)
47
+ else: raise ValueError(f'wrap={wrap} not supported')
48
+
49
+ # Flatten arrays to find their indices.
50
+ values = np.sort(labelled_array.flatten())
51
+ original_locations = np.argsort(labelled_array.flatten()) # Get indices where the original values were
52
+ indices_2d = np.array(np.unravel_index(original_locations, labelled_array.shape)).T # convert flattened indices to 2-d
53
+
54
+ labelled_array[np.isnan(array)] = np.nan # Turn this back to nan so perimeter along it is not included
55
+ split_here = np.roll(values, shift=-1)-values # Split where the values changed.
56
+ split_here[-1] = 0 # Last value rolled over from first
57
+
58
+ separated_structure_indices = np.split(indices_2d, np.where(split_here!=0)[0]+1)
59
+ separated_structure_indices = separated_structure_indices[1:] # Remove the locations that were 0 (not structure)
60
+ if len(separated_structure_indices) == 0: return np.array([]),np.array([]),np.array([]),np.array([])
61
+
62
+ # must use numba.typed.List here for some reason https://numba.readthedocs.io/en/stable/reference/pysupported.html#feature-typed-list
63
+ p, a, h, w = _get_structure_props_helper(labelled_array, List(separated_structure_indices), x_sizes, y_sizes)
64
+ nanmask = np.logical_or(np.logical_or(np.isnan(p), np.isnan(a)), np.logical_or(np.isnan(h), np.isnan(w)))
65
+ if np.count_nonzero(nanmask) > 0: raise ValueError('Nan values found: {} out of {}'.format(np.count_nonzero(nanmask), len(p)))
66
+ p, a, h, w = np.array(p), np.array(a), np.array(h), np.array(w)
67
+ p, a, h, w = p[~nanmask], a[~nanmask],h[~nanmask], w[~nanmask]
68
+ return p,a,h,w
69
+
70
+
71
+ @njit()
72
+ def _get_structure_props_helper(labelled_array, separated_structure_indices, x_sizes, y_sizes):
73
+
74
+ p, a, = [],[]
75
+ h, w = [],[]
76
+
77
+ for indices in separated_structure_indices:
78
+ perimeter = 0
79
+ area = 0
80
+
81
+ y_coords_structure = np.array([c[0] for c in indices])
82
+ x_coords_structure = np.array([c[1] for c in indices])
83
+ unique_y_coords = []
84
+ unique_x_coords = []
85
+ height = 0
86
+ width = 0
87
+
88
+ for (i,j) in indices:
89
+ # Height, Width
90
+ if i not in unique_y_coords:
91
+ unique_y_coords.append(i)
92
+ indices = (y_coords_structure==i)
93
+ y_sizes_here = []
94
+ for loc,take in enumerate(indices):
95
+ if take: y_sizes_here.append(y_sizes[y_coords_structure[loc],x_coords_structure[loc]])
96
+ y_sizes_here = np.array(y_sizes_here)
97
+ height += np.mean(y_sizes_here)
98
+ if j not in unique_x_coords:
99
+ unique_x_coords.append(j)
100
+ indices = (x_coords_structure==j)
101
+ x_sizes_here = []
102
+ for loc,take in enumerate(indices):
103
+ if take: x_sizes_here.append(x_sizes[y_coords_structure[loc],x_coords_structure[loc]])
104
+ x_sizes_here = np.array(x_sizes_here)
105
+ width += np.mean(x_sizes_here)
106
+
107
+ # Perimeter:
108
+ if i != labelled_array.shape[0]-1 and labelled_array[i+1, j] == 0: perimeter += x_sizes[i,j]
109
+ elif i == labelled_array.shape[0]-1 and labelled_array[0, j] == 0: perimeter += x_sizes[i,j]
110
+
111
+ if i != 0 and labelled_array[i-1, j] == 0: perimeter += x_sizes[i,j]
112
+ elif i == 0 and labelled_array[labelled_array.shape[0]-1, j] == 0: perimeter += x_sizes[i,j]
113
+
114
+ if j != labelled_array.shape[1]-1 and labelled_array[i, j+1] == 0: perimeter += y_sizes[i,j]
115
+ elif j == labelled_array.shape[1]-1 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]
116
+
117
+ if j != 0 and labelled_array[i, j-1] == 0: perimeter += y_sizes[i,j]
118
+ elif j == 0 and labelled_array[i, 0] == 0: perimeter += y_sizes[i,j]
119
+
120
+ # Area:
121
+ area += y_sizes[i,j] * x_sizes[i,j]
122
+
123
+
124
+ if area != 0:
125
+ p.append(perimeter)
126
+ a.append(area)
127
+ h.append(height)
128
+ w.append(width)
129
+
130
+
131
+ return p, a, h, w
132
+
133
+
134
+ def label_periodic_boundaries(labelled_array, wrap):
135
+ """
136
+ This functions makes labelled structures that span the edge have the same label.
137
+
138
+ Parameters:
139
+ labelled_array (numpy.ndarray): A 2D array where each unique non-zero element represents a distinct label. Should be the output of scipy.ndimage.label().
140
+ wrap (str): A string that determines how the boundaries of the array should be wrapped.
141
+ It can take three values: 'sides', 'both', or any other string.
142
+
143
+ If 'wrap' is 'sides' or 'both':
144
+ The function sets the labels on the right boundary to be the same as those on the left boundary.
145
+
146
+ If 'wrap' is 'both':
147
+ The function also sets the labels on the top boundary to be the same as those on the bottom boundary.
148
+
149
+ If 'wrap' is neither 'sides' nor 'both':
150
+ The function raises a ValueError.
151
+
152
+ Returns:
153
+ labelled_array (numpy.ndarray): The input array with its periodic boundaries labelled as per the 'wrap' parameter.
154
+
155
+ Raises:
156
+ ValueError: If 'wrap' is neither 'sides' nor 'both'.
157
+ """
158
+ if wrap == 'sides' or wrap == 'both':
159
+ # set those on right to the same i.d. as those on left
160
+ for j,value in enumerate(labelled_array[:,0]):
161
+ if value != 0:
162
+ if labelled_array[j, labelled_array.shape[1]-1] != 0 and labelled_array[j, labelled_array.shape[1]-1] != value:
163
+ # want not a structure and not already changed
164
+ labelled_array[labelled_array == labelled_array[j, labelled_array.shape[1]-1]] = value # set to same identification number
165
+
166
+ if wrap == 'both':
167
+ # set those on top to the same i.d. as those on bottom
168
+ for i,value in enumerate(labelled_array[0,:]):
169
+ if value != 0:
170
+ if labelled_array[labelled_array.shape[0]-1,i] != 0 and labelled_array[labelled_array.shape[0]-1,i] != value:
171
+ # want not a structure and not already changed
172
+ labelled_array[labelled_array == labelled_array[labelled_array.shape[0]-1,i]] = value # set to same identification number
173
+ if wrap not in ['sides','both']: raise ValueError(f'wrap = {wrap} not supported')
174
+ return labelled_array
175
+
176
+
177
+ def get_every_boundary_perimeter(array, x_sizes, y_sizes, return_nlevels=False):
178
+ """
179
+ Return perimeters of each boundary between 0s and 1s.
180
+ where each individual boundary between 0s and 1s in the array is a unique value.
181
+ Ex: a donut of 1s gives 2 values for interior perimeters
182
+
183
+ Array should only contain 0s and 1s
184
+ """
185
+ perimeters = []
186
+ counter = 0
187
+ while np.nansum(array) != 0:
188
+ counter += 1
189
+ if counter > 100: raise ValueError('Hole layer limit reached: 100 layers')
190
+ all_holes_filled = remove_structure_holes(array)
191
+ exterior_perimeters, _, _, _ = get_structure_props(encase_in_value(all_holes_filled), encase_in_value(x_sizes), encase_in_value(y_sizes))
192
+ perimeters.extend(exterior_perimeters)
193
+
194
+ # remove one layer
195
+ new_array = all_holes_filled - array
196
+ new_array[all_holes_filled == 0] = 0
197
+ array = new_array
198
+ # Now what were previously holes are clouds. What were previously clouds in holes are now holes in the "new" clouds.
199
+ if return_nlevels: return perimeters, counter
200
+ return perimeters
201
+
202
+
203
+ def remove_structures_touching_border_nan(array):
204
+ """
205
+ Input:
206
+ array: 2-D np.ndarray consisting of 0s, 1s, and np.nan. All values at the array edge should be np.nan
207
+ Output:
208
+ 2-D np.ndarray consisting of 0s, 1s, and np.nan with any structure in contact with the nan
209
+ values around the outer edge of the good data removed
210
+ "in contact" is defined using adjacent connectivity, i.e. 4-connectivity
211
+
212
+ """
213
+ if array.ndim != 2: raise ValueError('array not 2-dimensional')
214
+
215
+ nanmask = np.isnan(array).astype(int)
216
+ edge_nan_mask = (nanmask - clear_border_adjacent(nanmask)).astype(bool)
217
+
218
+ with_edge = array.copy()
219
+ with_edge[edge_nan_mask] = 1
220
+
221
+ cleared = clear_border_adjacent(with_edge).astype(float)
222
+ cleared[edge_nan_mask] = np.nan
223
+ cleared[np.isnan(array)] = np.nan
224
+ return cleared
225
+
226
+
227
+ def clear_border_adjacent(array, structure=np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])):
228
+ """
229
+ Input:
230
+ array: 2-D np.ndarray consisting of 0s and 1s
231
+ Output:
232
+ 2-D np.ndarray consisting of 0s and 1s with border structures removed
233
+
234
+ Remove connected regions that touch the edge, using a connectivity determined
235
+ by structure. Similar to skimage.segmentation.clear_border but structure
236
+ can be changed.
237
+
238
+ Examples:
239
+ [[0,0,0,0], [[0,0,0,0], [[0,0,0,0],
240
+ [0,1,1,0], [0,1,1,0], [0,1,0,0],
241
+ [0,0,0,1], [0,0,1,1], [1,0,0,0],
242
+ [0,0,0,0]] [0,0,0,0]] [0,0,0,0]]
243
+ so ex 1 and 3 would still have one cloud in output but ex 2 would have 0
244
+ for a structure of np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]).
245
+ """
246
+ border_cleared = clear_border(label(array.astype(bool), structure)[0])
247
+ border_cleared[border_cleared > 0] = 1
248
+ return border_cleared.astype(bool)
249
+
250
+
251
+ def remove_structure_holes(array, periodic=False):
252
+ """
253
+ Fills in all holes in all structures within array.
254
+
255
+ Set any value of 0 that is not connected to the largest connected structure of 0s (the background) to 1.
256
+
257
+ Assume the largest contiguous area of 0s is the "background".
258
+
259
+
260
+ Input:
261
+ array: 2D np.ndarray with values either 0,1, or np.nan
262
+ periodic: False, 'both', 'sides':
263
+ For structures lying along the boundary, if periodic=False, the behavior is as if the array was padded with 1's, i.e. holes that are connected to the edge are filled.
264
+
265
+ Output: filled array
266
+ """
267
+ if type(array) != np.ndarray: raise ValueError('array must be a np.ndarray object')
268
+ filled = array.copy()
269
+ filled[np.isnan(filled)] = 0
270
+ if np.any(filled>1): raise ValueError('array can only have values 0, 1, or np.nan')
271
+
272
+ # invert and label
273
+ labelled, _ = label((1-filled))
274
+ if periodic != False: labelled = label_periodic_boundaries(labelled, periodic)
275
+ # largest structure will be the background or the cloudy areas.
276
+ unique_values, unique_counts = np.unique(labelled.flatten(), return_counts=True)
277
+ # Make sure we don't identify the cloudy areas as the background.
278
+ unique_counts, unique_values = unique_counts[unique_values!=0], unique_values[unique_values!=0]
279
+ label_of_background = unique_values[unique_counts.argmax()]
280
+
281
+ filled[(labelled != 0) & (labelled != label_of_background)] = 1
282
+
283
+ if np.count_nonzero(np.isnan(array))>0: filled[np.isnan(array)] = np.nan
284
+
285
+ return filled
@@ -0,0 +1,249 @@
1
+ """
2
+ Functions for calculating size distributions in 2-D domains while taking into account finite size effects.
3
+ By Thomas DeWitt (https://github.com/thomasdewitt/)
4
+ """
5
+ import numpy as np
6
+ from scipy.ndimage import label
7
+ from numba import njit
8
+ from numba.typed import List
9
+ from warnings import warn
10
+ from skimage.segmentation import clear_border
11
+ from ._object_analysis import remove_structures_touching_border_nan, get_structure_props, get_every_boundary_perimeter, label_periodic_boundaries
12
+ from ._utils import linear_regression, encase_in_value
13
+
14
+
15
+ def finite_array_size_distribution(arrays, variable, x_sizes=None, y_sizes=None, bins=100, bin_logs=True, min_threshold=10, truncation_threshold=0.5):
16
+ """
17
+ Calculate the size distributions for structures within a
18
+ list of binary arrays, where 'size' is perimeter, area, length, or width.
19
+ Returns the size distributions for truncated objects and nontruncated objects
20
+ and the index where truncated object begin to dominate.
21
+
22
+ Works for binary arrays and also for binary arrays where the data boundary is
23
+ demarcated by nans. This enables the domain boundary to be an arbitrary shape,
24
+ rather than be rectangular (as is the case for a binary array).
25
+
26
+ Input:
27
+ - arrays: 2-D np.ndarray or list of 2-D np.ndarray, where objects of interest have value 1,
28
+ the background has value 0, and no data has np.nan.
29
+ Interior nans are treated like 0's, except the perimeter along them is not counted.
30
+ - variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
31
+ - x_sizes, y_sizes:
32
+ pixel sizes in the x and y directions.
33
+ If None, assume all pixel dimensions are 1.
34
+ If np.ndarray, use these for each array in 'arrays'
35
+ If list, assume x_sizes[i] corresponds to arrays[i], etc, for all i
36
+ - bins: int or 1-D array:
37
+ if int, auto calculate bin locations, make that number of bins
38
+ if 1-D array: use these as bin edges or log10(bin edges). They must be uniformly
39
+ linearly or logarithmically spaced (depending on bin_logs)
40
+ - bin_logs: T/F: if True, bin log10(variable) into logarithmically-spaced bins. If False, bin
41
+ variable into linearly spaced bins (if bins are explicitely passed, use these in any case)
42
+ - min_threshold: smallest bin edge. If bin edges are passed, this arg is ignored.
43
+ - truncation_threshold: float between 0 and 1. Bins with a larger fraction of truncated objects than this are omitted from the regression
44
+ Output:
45
+ - bin_middles, nontruncated_counts, truncated_counts, truncation_index
46
+ Note: if bin_logs is True, bin middles is actually log10(bin_middles)
47
+
48
+ Notes:
49
+
50
+ 'variable' definitions:
51
+ 'perimeter': Sum of pixel edge lengths between all pixels within a structure and
52
+ neighboring values of 0. Does not include perimeter adjacent to a nan.
53
+ A donut shaped structure returns a single value.
54
+ 'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
55
+ and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
56
+ A donut shaped structure returns two values: one for the inner circle and one for the outer.
57
+ 'area': Sum of individual pixel areas constituting the structure
58
+ 'length' or 'width': Overall distance between the farthest two points in a structure in
59
+ the x- or y- direction.
60
+
61
+ """
62
+ if type(arrays) == np.ndarray: arrays = [arrays]
63
+ if x_sizes is None: x_sizes = np.ones(arrays[0].shape, dtype=bool)
64
+ if y_sizes is None: y_sizes = np.ones(arrays[0].shape, dtype=bool)
65
+
66
+
67
+ if type(bins) == int:
68
+ if type(x_sizes) == list:
69
+ max_value = np.nansum(x_sizes[0]*y_sizes[0])
70
+ else:
71
+ max_value = np.nansum(x_sizes*y_sizes)
72
+ if bin_logs: bin_edges = np.linspace(np.log10(min_threshold), np.log10(max_value), bins+1)
73
+ else: bin_edges = np.linspace(min_threshold, max_value, bins+1)
74
+ else: bin_edges = bins
75
+
76
+ truncated_counts = np.zeros(bin_edges.size-1)
77
+ nontruncated_counts = np.zeros(bin_edges.size-1)
78
+
79
+ for i in range(len(arrays)):
80
+ array = arrays[i]
81
+ if type(x_sizes) == list:
82
+ xs = x_sizes[i]
83
+ else:
84
+ xs = x_sizes
85
+ if type(y_sizes) == list:
86
+ ys = y_sizes[i]
87
+ else:
88
+ ys = y_sizes
89
+
90
+ # Encase the array in nans to ensure objects in contact with the edge are considered truncated
91
+ array = encase_in_value(array)
92
+
93
+ no_truncated = remove_structures_touching_border_nan(array)
94
+ truncated_only = array-no_truncated
95
+
96
+ truncated_counts += array_size_distribution(truncated_only, x_sizes=encase_in_value(xs), y_sizes=encase_in_value(ys), variable=variable, wrap=None, bins=bin_edges, bin_logs=bin_logs)[1]
97
+ nontruncated_counts += array_size_distribution(no_truncated, x_sizes=encase_in_value(xs), y_sizes=encase_in_value(ys), variable=variable, wrap=None, bins=bin_edges, bin_logs=bin_logs)[1]
98
+
99
+ # Find index where number of edge clouds is greater than threshold times total number of clouds
100
+ truncation_index = np.argwhere(truncated_counts>truncation_threshold*(truncated_counts+nontruncated_counts))
101
+ if truncation_index.size == 0: # then there is no need to truncate
102
+ truncation_index = len(bin_edges)
103
+ else: truncation_index = truncation_index[0,0]
104
+
105
+ bin_middles = bin_edges[:-1]+0.5*(bin_edges[1]-bin_edges[0]) # shift to center and remove value at end that shifted beyond bins
106
+
107
+ return bin_middles, nontruncated_counts, truncated_counts, truncation_index
108
+
109
+
110
+ def finite_array_powerlaw_exponent(arrays, variable, x_sizes=None, y_sizes=None, bins=100, min_threshold=10, truncation_threshold=0.5, min_count_threshold=30, return_counts=False):
111
+ """
112
+ Calculate the power-law exponent for size distributions of structures within a
113
+ list of binary arrays, where 'size' phi can be perimeter, area, length, or width:
114
+
115
+ n(phi) \propto phi^{-(1+exponent)}
116
+
117
+
118
+ Works for binary arrays and also for binary arrays where the data boundary is
119
+ demarcated by nans. This enables the domain boundary to be an arbitrary shape,
120
+ rather than be rectangular (as is the case for a binary array).
121
+
122
+ Input:
123
+ - arrays: 2-D np.ndarray or list of 2-D np.ndarray, where objects of interest have value 1,
124
+ the background has value 0, and no data has np.nan.
125
+ Interior nans are treated like 0's, except the perimeter along them is not counted.
126
+ - variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
127
+ - x_sizes, y_sizes:
128
+ pixel sizes in the x and y directions.
129
+ If None, assume all pixel dimensions are 1.
130
+ If np.ndarray, use these for each array in 'arrays'
131
+ If list, assume x_sizes[i] corresponds to arrays[i], etc, for all i
132
+ - bins: int or 1-D array:
133
+ if int, auto calculate bin locations, make that number of bins
134
+ if 1-D array: use these as log10(bin edges). They must be uniformly logarithmically spaced.
135
+ - min_threshold: smallest bin edge. If bin edges are passed, this arg is ignored.
136
+ - min_count_threshold: Omit any bin with counts fewer than this value from the linear regression.
137
+ - truncation_threshold: float between 0 and 1. Bins with a larger fraction of truncated objects than this are omitted from the regression
138
+ Output:
139
+ if return_counts:
140
+ return (exponent, error), (log10(bin_middles), log10(good_counts))
141
+ where good_counts are the total counts for values smaller than the truncation threshold
142
+ error corresponding to 95% conf. interval
143
+ else:
144
+ return (exponent, error):
145
+ error corresponding to 95% conf. interval
146
+
147
+ Notes:
148
+
149
+ 'variable' definitions:
150
+ 'perimeter': Sum of pixel edge lengths between all pixels within a structure and
151
+ neighboring values of 0. Does not include perimeter adjacent to a nan.
152
+ A donut shaped structure returns a single value.
153
+ 'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
154
+ and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
155
+ A donut shaped structure returns two values: one for the inner circle and one for the outer.
156
+ 'area': Sum of individual pixel areas constituting the structure
157
+ 'length' or 'width': Overall distance between the farthest two points in a structure in
158
+ the x- or y- direction.
159
+ """
160
+
161
+ log_bin_middles, nontruncated_counts, truncated_counts, truncation_index = finite_array_size_distribution(arrays=arrays,
162
+ variable=variable,
163
+ x_sizes=x_sizes,
164
+ y_sizes=y_sizes,
165
+ bins=bins,
166
+ bin_logs=True,
167
+ min_threshold=min_threshold,
168
+ truncation_threshold=truncation_threshold)
169
+
170
+ total_good_counts = (truncated_counts+nontruncated_counts)
171
+ total_good_counts[truncation_index:] = np.nan # remove bins with too many truncated objects
172
+
173
+ total_good_counts[total_good_counts<min_count_threshold] = np.nan # remove bins with too few counts
174
+
175
+ if log_bin_middles[total_good_counts.size-1]-np.log10(min_threshold)<2:
176
+ warn(f'Power law exponent is being estimated using data spanning only {log_bin_middles[total_good_counts.size-1]-np.log10(min_threshold):.01f} orders of magnitude')
177
+
178
+ log_bin_middles[truncation_index:] = np.nan
179
+
180
+ total_good_counts[total_good_counts==0] = np.nan # eliminate log of 0 warning
181
+
182
+ log_total_good_counts = np.log10(total_good_counts)
183
+
184
+ (slope, _), (slope_error, _) = linear_regression(log_bin_middles, log_total_good_counts)
185
+
186
+ if return_counts:
187
+ return (-slope, slope_error), (log_bin_middles, log_total_good_counts)
188
+ return -slope, slope_error
189
+
190
+
191
+ def array_size_distribution(array, variable='area', bins=30, bin_logs=True, structure = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]), wrap=None, x_sizes=None, y_sizes=None):
192
+ """
193
+ Given a single binary array, calculate contiguous object sizes and bin them by area/perimeter/length/width
194
+
195
+
196
+ Input:
197
+ - array: 2-D np.ndarray, where objects of interest have value 1, the background has value 0, and no data has np.nan.
198
+ Nans are treated like 0's, except the perimeter along them is not counted.
199
+ - variable: 'area','perimeter','nested perimeter,'height','width': which object attribute to bin by. See below for definitions.
200
+ - bins: int or 1-D array:
201
+ if int, auto calculate bin locations, make that number of bins
202
+ if 1-D array: use these as bin edges
203
+ - bin_logs: T/F: if True, bin log10(variable), else bin variable
204
+ - structure: 3x3 2-D np.ndarray: defines object connectivity
205
+ - wrap: None, 'sides, 'all:
206
+ if 'sides', connect structures that span the left/right edge
207
+ if 'all', connect structures that span the left/right edge and top/bottom edge
208
+ - x_sizes, y_sizes: 2-D np.ndarray of shape array.shape: lengths of pixels of array. If None, assume all lengths are 1
209
+ Output:
210
+ - bin_middles, counts: 1-D np.ndarrays of len(bins). If bin_logs, bin_middles will be log10(bin value)
211
+
212
+ Notes:
213
+
214
+ This function does not account for object truncation by the domain boundary.
215
+
216
+ 'variable' definitions:
217
+ 'perimeter': Sum of pixel edge lengths between all pixels within a structure and
218
+ neighboring values of 0. Does not include perimeter adjacent to a nan.
219
+ A donut shaped structure returns a single value.
220
+ 'nested perimeter': Sum of pixel edge lengths between all pixels that are between a structure
221
+ and a neighboring region of 0s. Does not include perimeter adjacent to a nan.
222
+ A donut shaped structure returns two values: one for the inner circle and one for the outer.
223
+ 'area': Sum of individual pixel areas constituting the structure
224
+ 'length' or 'width': Overall distance between the farthest two points in a structure in
225
+ the x- or y- direction.
226
+ """
227
+ if x_sizes is None: x_sizes = np.ones(array.shape, dtype=bool)
228
+ if y_sizes is None: y_sizes = np.ones(array.shape, dtype=bool)
229
+ if variable in ['area','perimeter','height','width']:
230
+ p, a, h, w = get_structure_props(array, x_sizes, y_sizes, structure, wrap=wrap)
231
+ if variable == 'area': to_bin = a
232
+ elif variable == 'perimeter': to_bin = p
233
+ elif variable == 'height': to_bin = h
234
+ elif variable == 'width': to_bin = w
235
+ elif variable == 'nested perimeter':
236
+ to_bin = get_every_boundary_perimeter(array, x_sizes, y_sizes, False)
237
+ else: raise ValueError(f'Unsupported variable: {variable}')
238
+
239
+ if bin_logs: to_bin = np.log10(to_bin)
240
+
241
+ if type(bins) == int: bin_edges = np.linspace(min(to_bin), max(to_bin), bins+1)
242
+ else: bin_edges = bins
243
+
244
+ if np.count_nonzero(to_bin>bin_edges[-1])>0: warn(f'There exist {variable}s outside of bin edges that are being ignored')
245
+ counts, _ = np.histogram(to_bin, bins=bin_edges)
246
+
247
+ bin_middles = bin_edges[:-1]+0.5*(bin_edges[1]-bin_edges[0]) # shift to center and remove value at end that shifted beyond bins
248
+
249
+ return bin_middles, counts
objscale/_utils.py ADDED
@@ -0,0 +1,40 @@
1
+ import numpy as np
2
+ from warnings import warn
3
+
4
+
5
+ def linear_regression(x, y):
6
+ """
7
+ Return (slope, y-int), (error_slope, error_y_int) for 95% conf
8
+ """
9
+ if type(x) != np.ndarray or type(y) != np.ndarray: raise TypeError('x, y, must be of type np.ndarray')
10
+ index = np.isfinite(x) & np.isfinite(y)
11
+ if len(x[index]) <3: # "the number of data points must exceed order to scale the covariance matrix"
12
+ warn('Less than 3 points (x,y) are good (not nan), returning nans')
13
+ return (np.nan, np.nan),(np.nan, np.nan)
14
+ try:
15
+ coefficients, cov = np.polyfit(x[index], y[index], 1, cov=True)
16
+ error = np.sqrt(np.diag(cov))
17
+ except Exception as e:
18
+ warn('Linear regression failed, error message\n',' ',e)
19
+ return (np.nan, np.nan),(np.nan, np.nan)
20
+ return coefficients, 2*error # 95% conf interval is 2 times standard error
21
+
22
+
23
+ def encase_in_value(array, value=np.nan, dtype=np.float32, n_deep=1):
24
+ """
25
+ Input:
26
+ array: 2-D np.ndarray
27
+ value: value to append on the edge
28
+ dtype: dtype of the resulting array
29
+ Output:
30
+ array: Same as input but with a layer 'n_deep' of 'value' all around the edge: 2-D np.ndarray
31
+ """
32
+
33
+ nans_lr = np.empty((array.shape[0],n_deep), dtype=dtype)
34
+ nans_tb = np.empty((n_deep, array.shape[1]+(2*n_deep)), dtype=dtype) # will be two bigger after first appends
35
+ nans_lr[:], nans_tb[:] = value, value
36
+ array = np.append(nans_lr, array, axis=1)
37
+ array = np.append(array, nans_lr, axis=1)
38
+ array = np.append(nans_tb, array, axis=0)
39
+ array = np.append(array, nans_tb, axis=0)
40
+ return array