cfdb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cfdb/indexers.py ADDED
@@ -0,0 +1,452 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Nov 14 13:21:10 2023
5
+
6
+ @author: mike
7
+ """
8
+ import numpy as np
9
+ import rechunkit
10
+
11
+ from . import utils
12
+ # import utils
13
+
14
+ sup = np.testing.suppress_warnings()
15
+ sup.filter(FutureWarning)
16
+
17
+ ########################################################
18
+ ### Parameters
19
+
20
+
21
+
22
+
23
+ ########################################################
24
+ ### Helper functions
25
+
26
+
27
+ def loc_index_numeric(key, coord_data):
28
+ """
29
+
30
+ """
31
+ if coord_data.dtype.kind == 'f':
32
+ label_idx = np.nonzero(np.isclose(coord_data, key))[0][0]
33
+ else:
34
+ label_idx = np.searchsorted(coord_data, key)
35
+
36
+ return int(label_idx)
37
+
38
+
39
+ def loc_index_str(key, coord_data):
40
+ """
41
+
42
+ """
43
+ if coord_data.dtype.kind == 'M':
44
+ key = np.array(key, dtype=coord_data.dtype)
45
+
46
+ label_idx = np.searchsorted(coord_data, key)
47
+
48
+ return int(label_idx)
49
+
50
+
51
+ def loc_index_slice(slice_obj, coord_data):
52
+ """
53
+
54
+ """
55
+ start = slice_obj.start
56
+ stop = slice_obj.stop
57
+
58
+ ## use np.searchsorted because coordinates are sorted
59
+ if start is None:
60
+ start_idx = None
61
+ else:
62
+ if isinstance(start, str):
63
+ start_idx = loc_index_str(start, coord_data)
64
+ else:
65
+ start_idx = loc_index_numeric(start, coord_data)
66
+
67
+ ## stop_idx should include the stop label as per pandas
68
+ if stop is None:
69
+ stop_idx = None
70
+ else:
71
+ if isinstance(start, str):
72
+ stop_idx = loc_index_str(stop, coord_data)
73
+ else:
74
+ stop_idx = loc_index_numeric(stop, coord_data)
75
+
76
+ if (stop_idx is not None) and (start_idx is not None):
77
+ if start_idx >= stop_idx:
78
+ raise ValueError(f'start index at {start_idx} is equal to or greater than the stop index at {stop_idx}.')
79
+
80
+ return slice(start_idx, stop_idx)
81
+
82
+
83
+ # def loc_index_array(values, dim_data):
84
+ # """
85
+
86
+ # """
87
+ # values = np.asarray(values)
88
+
89
+ # val_len = len(values)
90
+ # if val_len == 0:
91
+ # raise ValueError('The array is empty...')
92
+ # elif val_len == 1:
93
+ # index = loc_index_label(values[0], dim_data)
94
+
95
+ # ## check if regular
96
+ # index = loc_index_slice(slice(values[0], values[-1]), dim_data)
97
+
98
+ # return index
99
+
100
+
101
+
102
+ # @sup
103
+ def loc_index_combo_one(key, coord_data):
104
+ """
105
+
106
+ """
107
+ if isinstance(key, str):
108
+ index_idx = loc_index_str(key, coord_data)
109
+
110
+ elif isinstance(key, slice):
111
+ index_idx = loc_index_slice(key, coord_data)
112
+
113
+ elif key is None:
114
+ index_idx = None
115
+
116
+ else:
117
+ index_idx = loc_index_numeric(key, coord_data)
118
+
119
+ return index_idx
120
+
121
+
122
+ def loc_index_combo_all(key, coords):
123
+ """
124
+
125
+ """
126
+ if isinstance(key, str):
127
+ idx = loc_index_str(key, coords[0].data)
128
+ elif isinstance(key, slice):
129
+ idx = loc_index_slice(key, coords[0].data)
130
+ elif key is None:
131
+ idx = None
132
+ elif isinstance(key, tuple):
133
+ key_len = len(key)
134
+ if key_len == 0:
135
+ idx = None
136
+ else:
137
+ idx = tuple(loc_index_combo_one(key1, coords[pos].data) for pos, key1 in enumerate(key))
138
+
139
+ else:
140
+ idx = loc_index_numeric(key, coords[0].data)
141
+
142
+ return idx
143
+
144
+ # def pos_to_keys(var_name, shape, pos):
145
+ # """
146
+
147
+ # """
148
+ # ndims = len(shape)
149
+ # if isinstance(pos, slice):
150
+ # start = pos.start
151
+ # stop = pos.stop
152
+ # if start is None:
153
+ # start = 0
154
+ # if stop is None:
155
+
156
+
157
+ # def numpy_indexer_coord(key, coord_name, origin, data):
158
+ # """
159
+
160
+ # """
161
+ # if isinstance(key, int):
162
+
163
+
164
+ def slice_int(key, coord_origins, var_shape, pos):
165
+ """
166
+
167
+ """
168
+ if key > var_shape[pos]:
169
+ raise ValueError('key is larger than the coord length.')
170
+
171
+ slice1 = slice(key + coord_origins[pos], key + coord_origins[pos] + 1)
172
+
173
+ return slice1
174
+
175
+
176
+ def slice_slice(key, coord_origins, var_shape, pos):
177
+ """
178
+
179
+ """
180
+ start = key.start
181
+ if isinstance(start, int):
182
+ start = start + coord_origins[pos]
183
+ else:
184
+ start = coord_origins[pos]
185
+
186
+ stop = key.stop
187
+ if isinstance(stop, int):
188
+ stop = stop + coord_origins[pos]
189
+ else:
190
+ stop = var_shape[pos] + coord_origins[pos]
191
+
192
+ # slices = [slice(co, cs) for co, cs in zip(coord_origins, coord_sizes)]
193
+
194
+ # TODO - Should I leave this test in here? Or should this be allowed?
195
+ if start == stop:
196
+ raise ValueError('The start and stop for the slice is the same, which will produce 0 output.')
197
+
198
+ slice1 = slice(start, stop)
199
+
200
+ return slice1
201
+
202
+
203
+ def slice_none(coord_origins, var_shape, pos):
204
+ """
205
+
206
+ """
207
+ start = coord_origins[pos]
208
+ stop = var_shape[pos] + coord_origins[pos]
209
+
210
+ # slices = [slice(co, cs) for co, cs in zip(coord_origins, coord_sizes)]
211
+
212
+ slice1 = slice(start, stop)
213
+
214
+ return slice1
215
+
216
+
217
+ def index_combo_one(key, coord_origins, var_shape, pos):
218
+ """
219
+
220
+ """
221
+ if isinstance(key, slice):
222
+ slice1 = slice_slice(key, coord_origins, var_shape, pos)
223
+ elif isinstance(key, int):
224
+ slice1 = slice_int(key, coord_origins, var_shape, pos)
225
+ elif key is None:
226
+ slice1 = slice_none(coord_origins, var_shape, pos)
227
+ else:
228
+ raise TypeError('key must be an int, slice of ints, or None.')
229
+
230
+ return slice1
231
+
232
+
233
+ def index_combo_all(key, coord_origins, var_shape):
234
+ """
235
+
236
+ """
237
+ if isinstance(key, int):
238
+ slices = [slice(co, cs) for co, cs in zip(coord_origins, var_shape)]
239
+ slices[0] = slice_int(key, coord_origins, var_shape, 0)
240
+ elif isinstance(key, slice):
241
+ slices = [slice(co, cs) for co, cs in zip(coord_origins, var_shape)]
242
+ slices[0] = slice_slice(key, coord_origins, var_shape, 0)
243
+ elif key is None:
244
+ slices = tuple(slice_none(coord_origins, var_shape, pos) for pos in range(0, len(var_shape)))
245
+ elif isinstance(key, tuple):
246
+ key_len = len(key)
247
+ if key_len == 0:
248
+ slices = tuple(slice_none(coord_origins, var_shape, pos) for pos in range(0, len(var_shape)))
249
+ elif key_len != len(var_shape):
250
+ raise ValueError('The tuple key must be the same length as the associated coordinates.')
251
+ else:
252
+ slices = tuple(index_combo_one(key1, coord_origins, var_shape, pos) for pos, key1 in enumerate(key))
253
+
254
+ else:
255
+ raise TypeError('key must be an int, slice of ints, or None.')
256
+
257
+ return tuple(slices)
258
+
259
+
260
+ def determine_final_array_shape(key, coord_origins, var_shape):
261
+ """
262
+
263
+ """
264
+ slices = index_combo_all(key, coord_origins, var_shape)
265
+ new_shape = tuple(s.stop - s.start for s in slices)
266
+
267
+ return new_shape
268
+
269
+
270
+ def slices_to_keys(slices, var_name, var_chunk_shape):
271
+ """
272
+ slices to keys
273
+ """
274
+ starts = tuple(s.start for s in slices)
275
+ stops = tuple(s.stop for s in slices)
276
+ chunk_iter2 = rechunkit.chunk_range(starts, stops, var_chunk_shape)
277
+ for partial_chunk in chunk_iter2:
278
+ starts_chunk = tuple((pc.start//cs) * cs for cs, pc in zip(var_chunk_shape, partial_chunk))
279
+ new_key = utils.make_var_chunk_key(var_name, starts_chunk)
280
+
281
+ yield new_key
282
+
283
+
284
+ def slices_to_chunks_keys(slices, var_name, var_chunk_shape, clip_ends=True):
285
+ """
286
+ slices from the output of index_combo_all.
287
+ """
288
+ starts = tuple(s.start for s in slices)
289
+ stops = tuple(s.stop for s in slices)
290
+ # chunk_iter1 = rechunkit.chunk_range(starts, stops, var_chunk_shape, clip_ends=False)
291
+ chunk_iter2 = rechunkit.chunk_range(starts, stops, var_chunk_shape, clip_ends=clip_ends)
292
+ # for full_chunk, partial_chunk in zip(chunk_iter1, chunk_iter2):
293
+ for partial_chunk in chunk_iter2:
294
+ # starts_chunk = tuple(s.start for s in full_chunk)
295
+ starts_chunk = tuple((pc.start//cs) * cs for cs, pc in zip(var_chunk_shape, partial_chunk))
296
+ new_key = utils.make_var_chunk_key(var_name, starts_chunk)
297
+
298
+ partial_chunk1 = tuple(slice(pc.start - start, pc.stop - start) for start, pc in zip(starts_chunk, partial_chunk))
299
+ target_chunk = tuple(slice(s.start - start, s.stop - start) for start, s in zip(starts, partial_chunk))
300
+
301
+ yield target_chunk, partial_chunk1, new_key
302
+
303
+
304
+
305
+ def check_sel_input_data(sel, input_data, coord_origins, shape):
306
+ """
307
+
308
+ """
309
+ slices = index_combo_all(sel, coord_origins, shape)
310
+ slices_shape = tuple(s.stop - s.start for s in slices)
311
+
312
+ if input_data.shape != slices_shape:
313
+ raise ValueError('The selection shape is not equal to the input data.')
314
+
315
+ return slices
316
+
317
+
318
+
319
+ # def indexer_to_keys(key, var_name, var_chunk_shape, coord_origins, coord_sizes):
320
+ # """
321
+
322
+ # """
323
+ # if isinstance(key, int):
324
+ # new_pos = key + origin
325
+
326
+ # new_key = utils.make_var_chunk_key(var_name, (new_pos,))
327
+
328
+ # yield new_key
329
+
330
+ # elif isinstance(key, slice):
331
+ # start = key.start
332
+ # if not isinstance(start, int):
333
+ # start = origin
334
+
335
+ # stop = key.stop
336
+ # if not isinstance(stop, int):
337
+ # stop = shape[0] + origin
338
+
339
+ # chunk_iter = rechunkit.chunk_range((start,), (stop,), chunk_shape, clip_ends=False)
340
+ # for chunk in chunk_iter:
341
+ # new_key = utils.make_var_chunk_key(var_name, (chunk[0].start,))
342
+
343
+ # yield new_key
344
+
345
+ # elif key is None:
346
+ # start = origin
347
+ # stop = shape[0] + origin
348
+
349
+ # chunk_iter = rechunkit.chunk_range((start,), (stop,), chunk_shape, clip_ends=False)
350
+ # for chunk in chunk_iter:
351
+ # new_key = utils.make_var_chunk_key(var_name, (chunk[0].start,))
352
+
353
+ # yield new_key
354
+
355
+ # # elif isinstance(key, (list, np.ndarray)):
356
+ # # key = np.asarray(key)
357
+
358
+ # # if key.dtype.kind == 'b':
359
+ # # if len(key) != shape[0]:
360
+ # # raise ValueError('If the input is a bool array, then it must be the same length as the coordinate.')
361
+ # # elif key.dtype.kind not in ('i', 'u'):
362
+ # # raise TypeError('If the input is an array, then it must be either a bool of the length of the coordinate or integers.')
363
+
364
+ # # return key
365
+ # # else:
366
+ # # idx = index_array(key, dim_data)
367
+
368
+ # # return idx
369
+ # else:
370
+ # raise TypeError('key must be an int, slice of ints, or None.')
371
+
372
+
373
+
374
+
375
+ #####################################################3
376
+ ### Classes
377
+
378
+
379
+ class LocationIndexer:
380
+ """
381
+
382
+ """
383
+ def __init__(self, variable):
384
+ """
385
+
386
+ """
387
+ self.variable = variable
388
+
389
+
390
+ def __getitem__(self, key):
391
+ """
392
+
393
+ """
394
+ idx = loc_index_combo_all(key, self.variable.coords)
395
+
396
+ return self.variable.get(idx)
397
+
398
+
399
+
400
+ def __setitem__(self, key, data):
401
+ """
402
+
403
+ """
404
+ idx = loc_index_combo_all(key, self.variable.coords)
405
+
406
+ self.variable[idx] = data
407
+
408
+
409
+
410
+
411
+
412
+
413
+
414
+
415
+
416
+
417
+
418
+
419
+
420
+
421
+
422
+
423
+
424
+
425
+
426
+
427
+
428
+
429
+
430
+
431
+
432
+
433
+
434
+
435
+
436
+
437
+
438
+
439
+
440
+
441
+
442
+
443
+
444
+
445
+
446
+
447
+
448
+
449
+
450
+
451
+
452
+