cfdb 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfdb/__init__.py +6 -0
- cfdb/combine.py +501 -0
- cfdb/core.py +1232 -0
- cfdb/creation.py +345 -0
- cfdb/data_models.py +189 -0
- cfdb/indexers.py +452 -0
- cfdb/main.py +857 -0
- cfdb/support_classes.py +1187 -0
- cfdb/utils.py +2079 -0
- cfdb-0.1.0.dist-info/METADATA +57 -0
- cfdb-0.1.0.dist-info/RECORD +13 -0
- cfdb-0.1.0.dist-info/WHEEL +4 -0
- cfdb-0.1.0.dist-info/licenses/LICENSE +16 -0
cfdb/indexers.py
ADDED
@@ -0,0 +1,452 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
Created on Tue Nov 14 13:21:10 2023
|
5
|
+
|
6
|
+
@author: mike
|
7
|
+
"""
|
8
|
+
import numpy as np
|
9
|
+
import rechunkit
|
10
|
+
|
11
|
+
from . import utils
|
12
|
+
# import utils
|
13
|
+
|
14
|
+
sup = np.testing.suppress_warnings()
|
15
|
+
sup.filter(FutureWarning)
|
16
|
+
|
17
|
+
########################################################
|
18
|
+
### Parameters
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
|
23
|
+
########################################################
|
24
|
+
### Helper functions
|
25
|
+
|
26
|
+
|
27
|
+
def loc_index_numeric(key, coord_data):
|
28
|
+
"""
|
29
|
+
|
30
|
+
"""
|
31
|
+
if coord_data.dtype.kind == 'f':
|
32
|
+
label_idx = np.nonzero(np.isclose(coord_data, key))[0][0]
|
33
|
+
else:
|
34
|
+
label_idx = np.searchsorted(coord_data, key)
|
35
|
+
|
36
|
+
return int(label_idx)
|
37
|
+
|
38
|
+
|
39
|
+
def loc_index_str(key, coord_data):
|
40
|
+
"""
|
41
|
+
|
42
|
+
"""
|
43
|
+
if coord_data.dtype.kind == 'M':
|
44
|
+
key = np.array(key, dtype=coord_data.dtype)
|
45
|
+
|
46
|
+
label_idx = np.searchsorted(coord_data, key)
|
47
|
+
|
48
|
+
return int(label_idx)
|
49
|
+
|
50
|
+
|
51
|
+
def loc_index_slice(slice_obj, coord_data):
|
52
|
+
"""
|
53
|
+
|
54
|
+
"""
|
55
|
+
start = slice_obj.start
|
56
|
+
stop = slice_obj.stop
|
57
|
+
|
58
|
+
## use np.searchsorted because coordinates are sorted
|
59
|
+
if start is None:
|
60
|
+
start_idx = None
|
61
|
+
else:
|
62
|
+
if isinstance(start, str):
|
63
|
+
start_idx = loc_index_str(start, coord_data)
|
64
|
+
else:
|
65
|
+
start_idx = loc_index_numeric(start, coord_data)
|
66
|
+
|
67
|
+
## stop_idx should include the stop label as per pandas
|
68
|
+
if stop is None:
|
69
|
+
stop_idx = None
|
70
|
+
else:
|
71
|
+
if isinstance(start, str):
|
72
|
+
stop_idx = loc_index_str(stop, coord_data)
|
73
|
+
else:
|
74
|
+
stop_idx = loc_index_numeric(stop, coord_data)
|
75
|
+
|
76
|
+
if (stop_idx is not None) and (start_idx is not None):
|
77
|
+
if start_idx >= stop_idx:
|
78
|
+
raise ValueError(f'start index at {start_idx} is equal to or greater than the stop index at {stop_idx}.')
|
79
|
+
|
80
|
+
return slice(start_idx, stop_idx)
|
81
|
+
|
82
|
+
|
83
|
+
# def loc_index_array(values, dim_data):
|
84
|
+
# """
|
85
|
+
|
86
|
+
# """
|
87
|
+
# values = np.asarray(values)
|
88
|
+
|
89
|
+
# val_len = len(values)
|
90
|
+
# if val_len == 0:
|
91
|
+
# raise ValueError('The array is empty...')
|
92
|
+
# elif val_len == 1:
|
93
|
+
# index = loc_index_label(values[0], dim_data)
|
94
|
+
|
95
|
+
# ## check if regular
|
96
|
+
# index = loc_index_slice(slice(values[0], values[-1]), dim_data)
|
97
|
+
|
98
|
+
# return index
|
99
|
+
|
100
|
+
|
101
|
+
|
102
|
+
# @sup
|
103
|
+
def loc_index_combo_one(key, coord_data):
|
104
|
+
"""
|
105
|
+
|
106
|
+
"""
|
107
|
+
if isinstance(key, str):
|
108
|
+
index_idx = loc_index_str(key, coord_data)
|
109
|
+
|
110
|
+
elif isinstance(key, slice):
|
111
|
+
index_idx = loc_index_slice(key, coord_data)
|
112
|
+
|
113
|
+
elif key is None:
|
114
|
+
index_idx = None
|
115
|
+
|
116
|
+
else:
|
117
|
+
index_idx = loc_index_numeric(key, coord_data)
|
118
|
+
|
119
|
+
return index_idx
|
120
|
+
|
121
|
+
|
122
|
+
def loc_index_combo_all(key, coords):
|
123
|
+
"""
|
124
|
+
|
125
|
+
"""
|
126
|
+
if isinstance(key, str):
|
127
|
+
idx = loc_index_str(key, coords[0].data)
|
128
|
+
elif isinstance(key, slice):
|
129
|
+
idx = loc_index_slice(key, coords[0].data)
|
130
|
+
elif key is None:
|
131
|
+
idx = None
|
132
|
+
elif isinstance(key, tuple):
|
133
|
+
key_len = len(key)
|
134
|
+
if key_len == 0:
|
135
|
+
idx = None
|
136
|
+
else:
|
137
|
+
idx = tuple(loc_index_combo_one(key1, coords[pos].data) for pos, key1 in enumerate(key))
|
138
|
+
|
139
|
+
else:
|
140
|
+
idx = loc_index_numeric(key, coords[0].data)
|
141
|
+
|
142
|
+
return idx
|
143
|
+
|
144
|
+
# def pos_to_keys(var_name, shape, pos):
|
145
|
+
# """
|
146
|
+
|
147
|
+
# """
|
148
|
+
# ndims = len(shape)
|
149
|
+
# if isinstance(pos, slice):
|
150
|
+
# start = pos.start
|
151
|
+
# stop = pos.stop
|
152
|
+
# if start is None:
|
153
|
+
# start = 0
|
154
|
+
# if stop is None:
|
155
|
+
|
156
|
+
|
157
|
+
# def numpy_indexer_coord(key, coord_name, origin, data):
|
158
|
+
# """
|
159
|
+
|
160
|
+
# """
|
161
|
+
# if isinstance(key, int):
|
162
|
+
|
163
|
+
|
164
|
+
def slice_int(key, coord_origins, var_shape, pos):
|
165
|
+
"""
|
166
|
+
|
167
|
+
"""
|
168
|
+
if key > var_shape[pos]:
|
169
|
+
raise ValueError('key is larger than the coord length.')
|
170
|
+
|
171
|
+
slice1 = slice(key + coord_origins[pos], key + coord_origins[pos] + 1)
|
172
|
+
|
173
|
+
return slice1
|
174
|
+
|
175
|
+
|
176
|
+
def slice_slice(key, coord_origins, var_shape, pos):
|
177
|
+
"""
|
178
|
+
|
179
|
+
"""
|
180
|
+
start = key.start
|
181
|
+
if isinstance(start, int):
|
182
|
+
start = start + coord_origins[pos]
|
183
|
+
else:
|
184
|
+
start = coord_origins[pos]
|
185
|
+
|
186
|
+
stop = key.stop
|
187
|
+
if isinstance(stop, int):
|
188
|
+
stop = stop + coord_origins[pos]
|
189
|
+
else:
|
190
|
+
stop = var_shape[pos] + coord_origins[pos]
|
191
|
+
|
192
|
+
# slices = [slice(co, cs) for co, cs in zip(coord_origins, coord_sizes)]
|
193
|
+
|
194
|
+
# TODO - Should I leave this test in here? Or should this be allowed?
|
195
|
+
if start == stop:
|
196
|
+
raise ValueError('The start and stop for the slice is the same, which will produce 0 output.')
|
197
|
+
|
198
|
+
slice1 = slice(start, stop)
|
199
|
+
|
200
|
+
return slice1
|
201
|
+
|
202
|
+
|
203
|
+
def slice_none(coord_origins, var_shape, pos):
|
204
|
+
"""
|
205
|
+
|
206
|
+
"""
|
207
|
+
start = coord_origins[pos]
|
208
|
+
stop = var_shape[pos] + coord_origins[pos]
|
209
|
+
|
210
|
+
# slices = [slice(co, cs) for co, cs in zip(coord_origins, coord_sizes)]
|
211
|
+
|
212
|
+
slice1 = slice(start, stop)
|
213
|
+
|
214
|
+
return slice1
|
215
|
+
|
216
|
+
|
217
|
+
def index_combo_one(key, coord_origins, var_shape, pos):
|
218
|
+
"""
|
219
|
+
|
220
|
+
"""
|
221
|
+
if isinstance(key, slice):
|
222
|
+
slice1 = slice_slice(key, coord_origins, var_shape, pos)
|
223
|
+
elif isinstance(key, int):
|
224
|
+
slice1 = slice_int(key, coord_origins, var_shape, pos)
|
225
|
+
elif key is None:
|
226
|
+
slice1 = slice_none(coord_origins, var_shape, pos)
|
227
|
+
else:
|
228
|
+
raise TypeError('key must be an int, slice of ints, or None.')
|
229
|
+
|
230
|
+
return slice1
|
231
|
+
|
232
|
+
|
233
|
+
def index_combo_all(key, coord_origins, var_shape):
|
234
|
+
"""
|
235
|
+
|
236
|
+
"""
|
237
|
+
if isinstance(key, int):
|
238
|
+
slices = [slice(co, cs) for co, cs in zip(coord_origins, var_shape)]
|
239
|
+
slices[0] = slice_int(key, coord_origins, var_shape, 0)
|
240
|
+
elif isinstance(key, slice):
|
241
|
+
slices = [slice(co, cs) for co, cs in zip(coord_origins, var_shape)]
|
242
|
+
slices[0] = slice_slice(key, coord_origins, var_shape, 0)
|
243
|
+
elif key is None:
|
244
|
+
slices = tuple(slice_none(coord_origins, var_shape, pos) for pos in range(0, len(var_shape)))
|
245
|
+
elif isinstance(key, tuple):
|
246
|
+
key_len = len(key)
|
247
|
+
if key_len == 0:
|
248
|
+
slices = tuple(slice_none(coord_origins, var_shape, pos) for pos in range(0, len(var_shape)))
|
249
|
+
elif key_len != len(var_shape):
|
250
|
+
raise ValueError('The tuple key must be the same length as the associated coordinates.')
|
251
|
+
else:
|
252
|
+
slices = tuple(index_combo_one(key1, coord_origins, var_shape, pos) for pos, key1 in enumerate(key))
|
253
|
+
|
254
|
+
else:
|
255
|
+
raise TypeError('key must be an int, slice of ints, or None.')
|
256
|
+
|
257
|
+
return tuple(slices)
|
258
|
+
|
259
|
+
|
260
|
+
def determine_final_array_shape(key, coord_origins, var_shape):
|
261
|
+
"""
|
262
|
+
|
263
|
+
"""
|
264
|
+
slices = index_combo_all(key, coord_origins, var_shape)
|
265
|
+
new_shape = tuple(s.stop - s.start for s in slices)
|
266
|
+
|
267
|
+
return new_shape
|
268
|
+
|
269
|
+
|
270
|
+
def slices_to_keys(slices, var_name, var_chunk_shape):
|
271
|
+
"""
|
272
|
+
slices to keys
|
273
|
+
"""
|
274
|
+
starts = tuple(s.start for s in slices)
|
275
|
+
stops = tuple(s.stop for s in slices)
|
276
|
+
chunk_iter2 = rechunkit.chunk_range(starts, stops, var_chunk_shape)
|
277
|
+
for partial_chunk in chunk_iter2:
|
278
|
+
starts_chunk = tuple((pc.start//cs) * cs for cs, pc in zip(var_chunk_shape, partial_chunk))
|
279
|
+
new_key = utils.make_var_chunk_key(var_name, starts_chunk)
|
280
|
+
|
281
|
+
yield new_key
|
282
|
+
|
283
|
+
|
284
|
+
def slices_to_chunks_keys(slices, var_name, var_chunk_shape, clip_ends=True):
|
285
|
+
"""
|
286
|
+
slices from the output of index_combo_all.
|
287
|
+
"""
|
288
|
+
starts = tuple(s.start for s in slices)
|
289
|
+
stops = tuple(s.stop for s in slices)
|
290
|
+
# chunk_iter1 = rechunkit.chunk_range(starts, stops, var_chunk_shape, clip_ends=False)
|
291
|
+
chunk_iter2 = rechunkit.chunk_range(starts, stops, var_chunk_shape, clip_ends=clip_ends)
|
292
|
+
# for full_chunk, partial_chunk in zip(chunk_iter1, chunk_iter2):
|
293
|
+
for partial_chunk in chunk_iter2:
|
294
|
+
# starts_chunk = tuple(s.start for s in full_chunk)
|
295
|
+
starts_chunk = tuple((pc.start//cs) * cs for cs, pc in zip(var_chunk_shape, partial_chunk))
|
296
|
+
new_key = utils.make_var_chunk_key(var_name, starts_chunk)
|
297
|
+
|
298
|
+
partial_chunk1 = tuple(slice(pc.start - start, pc.stop - start) for start, pc in zip(starts_chunk, partial_chunk))
|
299
|
+
target_chunk = tuple(slice(s.start - start, s.stop - start) for start, s in zip(starts, partial_chunk))
|
300
|
+
|
301
|
+
yield target_chunk, partial_chunk1, new_key
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
def check_sel_input_data(sel, input_data, coord_origins, shape):
|
306
|
+
"""
|
307
|
+
|
308
|
+
"""
|
309
|
+
slices = index_combo_all(sel, coord_origins, shape)
|
310
|
+
slices_shape = tuple(s.stop - s.start for s in slices)
|
311
|
+
|
312
|
+
if input_data.shape != slices_shape:
|
313
|
+
raise ValueError('The selection shape is not equal to the input data.')
|
314
|
+
|
315
|
+
return slices
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
# def indexer_to_keys(key, var_name, var_chunk_shape, coord_origins, coord_sizes):
|
320
|
+
# """
|
321
|
+
|
322
|
+
# """
|
323
|
+
# if isinstance(key, int):
|
324
|
+
# new_pos = key + origin
|
325
|
+
|
326
|
+
# new_key = utils.make_var_chunk_key(var_name, (new_pos,))
|
327
|
+
|
328
|
+
# yield new_key
|
329
|
+
|
330
|
+
# elif isinstance(key, slice):
|
331
|
+
# start = key.start
|
332
|
+
# if not isinstance(start, int):
|
333
|
+
# start = origin
|
334
|
+
|
335
|
+
# stop = key.stop
|
336
|
+
# if not isinstance(stop, int):
|
337
|
+
# stop = shape[0] + origin
|
338
|
+
|
339
|
+
# chunk_iter = rechunkit.chunk_range((start,), (stop,), chunk_shape, clip_ends=False)
|
340
|
+
# for chunk in chunk_iter:
|
341
|
+
# new_key = utils.make_var_chunk_key(var_name, (chunk[0].start,))
|
342
|
+
|
343
|
+
# yield new_key
|
344
|
+
|
345
|
+
# elif key is None:
|
346
|
+
# start = origin
|
347
|
+
# stop = shape[0] + origin
|
348
|
+
|
349
|
+
# chunk_iter = rechunkit.chunk_range((start,), (stop,), chunk_shape, clip_ends=False)
|
350
|
+
# for chunk in chunk_iter:
|
351
|
+
# new_key = utils.make_var_chunk_key(var_name, (chunk[0].start,))
|
352
|
+
|
353
|
+
# yield new_key
|
354
|
+
|
355
|
+
# # elif isinstance(key, (list, np.ndarray)):
|
356
|
+
# # key = np.asarray(key)
|
357
|
+
|
358
|
+
# # if key.dtype.kind == 'b':
|
359
|
+
# # if len(key) != shape[0]:
|
360
|
+
# # raise ValueError('If the input is a bool array, then it must be the same length as the coordinate.')
|
361
|
+
# # elif key.dtype.kind not in ('i', 'u'):
|
362
|
+
# # raise TypeError('If the input is an array, then it must be either a bool of the length of the coordinate or integers.')
|
363
|
+
|
364
|
+
# # return key
|
365
|
+
# # else:
|
366
|
+
# # idx = index_array(key, dim_data)
|
367
|
+
|
368
|
+
# # return idx
|
369
|
+
# else:
|
370
|
+
# raise TypeError('key must be an int, slice of ints, or None.')
|
371
|
+
|
372
|
+
|
373
|
+
|
374
|
+
|
375
|
+
#####################################################3
|
376
|
+
### Classes
|
377
|
+
|
378
|
+
|
379
|
+
class LocationIndexer:
|
380
|
+
"""
|
381
|
+
|
382
|
+
"""
|
383
|
+
def __init__(self, variable):
|
384
|
+
"""
|
385
|
+
|
386
|
+
"""
|
387
|
+
self.variable = variable
|
388
|
+
|
389
|
+
|
390
|
+
def __getitem__(self, key):
|
391
|
+
"""
|
392
|
+
|
393
|
+
"""
|
394
|
+
idx = loc_index_combo_all(key, self.variable.coords)
|
395
|
+
|
396
|
+
return self.variable.get(idx)
|
397
|
+
|
398
|
+
|
399
|
+
|
400
|
+
def __setitem__(self, key, data):
|
401
|
+
"""
|
402
|
+
|
403
|
+
"""
|
404
|
+
idx = loc_index_combo_all(key, self.variable.coords)
|
405
|
+
|
406
|
+
self.variable[idx] = data
|
407
|
+
|
408
|
+
|
409
|
+
|
410
|
+
|
411
|
+
|
412
|
+
|
413
|
+
|
414
|
+
|
415
|
+
|
416
|
+
|
417
|
+
|
418
|
+
|
419
|
+
|
420
|
+
|
421
|
+
|
422
|
+
|
423
|
+
|
424
|
+
|
425
|
+
|
426
|
+
|
427
|
+
|
428
|
+
|
429
|
+
|
430
|
+
|
431
|
+
|
432
|
+
|
433
|
+
|
434
|
+
|
435
|
+
|
436
|
+
|
437
|
+
|
438
|
+
|
439
|
+
|
440
|
+
|
441
|
+
|
442
|
+
|
443
|
+
|
444
|
+
|
445
|
+
|
446
|
+
|
447
|
+
|
448
|
+
|
449
|
+
|
450
|
+
|
451
|
+
|
452
|
+
|