cfdb 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cfdb/__init__.py +6 -0
- cfdb/combine.py +501 -0
- cfdb/core.py +1232 -0
- cfdb/creation.py +345 -0
- cfdb/data_models.py +189 -0
- cfdb/indexers.py +452 -0
- cfdb/main.py +857 -0
- cfdb/support_classes.py +1187 -0
- cfdb/utils.py +2079 -0
- cfdb-0.1.0.dist-info/METADATA +57 -0
- cfdb-0.1.0.dist-info/RECORD +13 -0
- cfdb-0.1.0.dist-info/WHEEL +4 -0
- cfdb-0.1.0.dist-info/licenses/LICENSE +16 -0
cfdb/creation.py
ADDED
@@ -0,0 +1,345 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
Created on Thu Feb 13 17:08:09 2025
|
5
|
+
|
6
|
+
@author: mike
|
7
|
+
"""
|
8
|
+
import numpy as np
|
9
|
+
from typing import Set, Optional, Dict, Tuple, List, Union, Any
|
10
|
+
|
11
|
+
from . import utils, support_classes as sc
|
12
|
+
# import utils, support_classes as sc
|
13
|
+
|
14
|
+
#################################################
|
15
|
+
|
16
|
+
|
17
|
+
class Coord:
|
18
|
+
"""
|
19
|
+
|
20
|
+
"""
|
21
|
+
def __init__(self, dataset):
|
22
|
+
"""
|
23
|
+
|
24
|
+
"""
|
25
|
+
self._dataset = dataset
|
26
|
+
# self._sys_meta = sys_meta
|
27
|
+
# self._finalizers = finalizers
|
28
|
+
# self._var_cache = var_cache
|
29
|
+
# self._compressor = compressor
|
30
|
+
|
31
|
+
|
32
|
+
def generic(self, name: str, data: np.ndarray | None = None, dtype_decoded: str | np.dtype | None = None, dtype_encoded: str | np.dtype | None = None, chunk_shape: Tuple[int] | None = None, fillvalue: Union[int, float, str] = None, scale_factor: Union[float, int, None] = None, add_offset: Union[float, int, None] = None, step: int | float | bool=False):
|
33
|
+
"""
|
34
|
+
The generic method to create a coordinate.
|
35
|
+
|
36
|
+
Parameters
|
37
|
+
----------
|
38
|
+
name: str
|
39
|
+
The name of the coordinate. It must be unique and follow the `CF conventions for variables names <https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#_naming_conventions>`_.
|
40
|
+
data: np.ndarray or None
|
41
|
+
Data to be added after creation. The length and dtype of the data will override other parameters.
|
42
|
+
dtype_decoded: str, np.dtype, or None
|
43
|
+
The dtype of the original data (data that the user will work with). If data is not passed, than this is a manditory parameter.
|
44
|
+
dtype_encoded: str, np.dtype, or None
|
45
|
+
The dtype of the stored data. This is the dtype that the data will be stored as. Only relevant for going from a float to a smaller integer. If this is None, then dtype_encoded will be assigned the dtype_decoded.
|
46
|
+
chunk_shape: tuple of ints or None
|
47
|
+
The chunk shape that the data will be stored as. If None, then it will be estimated. The estimated chunk shape will be optimally estimated to make it efficient to rechunk later.
|
48
|
+
fillvalue: int, float, str, or None
|
49
|
+
The fill value for the dtype_encoded. If the dtype_decoded is a float, then the decoded fill values will always be np.nan when returned. The fillvalue is primarily for storage.
|
50
|
+
scale_factor: int, float, or None
|
51
|
+
If dtype_decoded is a float and dtype_encoded is an int, then the stored values are encoded = int(round((decoded - add_offset)/scale_factor)).
|
52
|
+
add_offset: int, float, or None
|
53
|
+
As decribed by the scale_factor.
|
54
|
+
step: int, float, or None
|
55
|
+
If the coordinate data is regular (hourly for example), then assign a step to ensure the coordinate will always stay regular.
|
56
|
+
|
57
|
+
Returns
|
58
|
+
-------
|
59
|
+
cfdb.Coordinate
|
60
|
+
"""
|
61
|
+
if name in self._dataset._sys_meta.variables:
|
62
|
+
raise ValueError(f"Dataset already contains the variable {name}.")
|
63
|
+
|
64
|
+
# print(params)
|
65
|
+
|
66
|
+
name, var = utils.parse_coord_inputs(name, data, chunk_shape, dtype_decoded, dtype_encoded, fillvalue, scale_factor, add_offset, step=step)
|
67
|
+
|
68
|
+
## Var init process
|
69
|
+
self._dataset._sys_meta.variables[name] = var
|
70
|
+
|
71
|
+
## Init Coordinate
|
72
|
+
coord = sc.Coordinate(name, self._dataset)
|
73
|
+
# coord.attrs.update(utils.default_attrs['lat'])
|
74
|
+
|
75
|
+
## Add data if it has been passed
|
76
|
+
if isinstance(data, np.ndarray):
|
77
|
+
coord.append(data)
|
78
|
+
|
79
|
+
self._dataset._var_cache[name] = coord
|
80
|
+
|
81
|
+
## Add attributes to datetime vars
|
82
|
+
if coord.dtype_decoded.kind == 'M':
|
83
|
+
coord.attrs['units'] = utils.parse_cf_time_units(coord.dtype_decoded)
|
84
|
+
coord.attrs['calendar'] = 'proleptic_gregorian'
|
85
|
+
|
86
|
+
return coord
|
87
|
+
|
88
|
+
|
89
|
+
def like(self, name: str, coord: Union[sc.Coordinate, sc.CoordinateView], copy_data=False):
|
90
|
+
"""
|
91
|
+
Create a Coordinate based on the parameters of another Coordinate. A new unique name must be passed.
|
92
|
+
"""
|
93
|
+
if copy_data:
|
94
|
+
data = coord.data
|
95
|
+
else:
|
96
|
+
data = None
|
97
|
+
|
98
|
+
new_coord = self.generic(name, data, dtype_decoded=coord.dtype_decoded, dtype_encoded=coord.dtype_encoded, chunk_shape=coord.chunk_shape, fillvalue=coord.fillvalue, scale_factor=coord.scale_factor, add_offset=coord.add_offset, step=coord.step)
|
99
|
+
|
100
|
+
return new_coord
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
def latitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
|
105
|
+
"""
|
106
|
+
Create a latitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
|
107
|
+
"""
|
108
|
+
name, params = utils.get_var_params('lat', kwargs)
|
109
|
+
|
110
|
+
# print(params)
|
111
|
+
|
112
|
+
coord = self.generic(name, data, step=step, **params)
|
113
|
+
coord.attrs.update(utils.default_attrs['lat'])
|
114
|
+
|
115
|
+
return coord
|
116
|
+
|
117
|
+
|
118
|
+
def longitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
|
119
|
+
"""
|
120
|
+
Create a longitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
|
121
|
+
"""
|
122
|
+
name, params = utils.get_var_params('lon', kwargs)
|
123
|
+
|
124
|
+
# print(params)
|
125
|
+
|
126
|
+
coord = self.generic(name, data, step=step, **params)
|
127
|
+
coord.attrs.update(utils.default_attrs['lon'])
|
128
|
+
|
129
|
+
return coord
|
130
|
+
|
131
|
+
|
132
|
+
def time(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
|
133
|
+
"""
|
134
|
+
Create a time coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
|
135
|
+
"""
|
136
|
+
name, params = utils.get_var_params('time', kwargs)
|
137
|
+
|
138
|
+
# print(params)
|
139
|
+
|
140
|
+
coord = self.generic(name, data, step=step, **params)
|
141
|
+
coord.attrs.update(utils.default_attrs['time'])
|
142
|
+
|
143
|
+
return coord
|
144
|
+
|
145
|
+
|
146
|
+
def height(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
|
147
|
+
"""
|
148
|
+
Create a height coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
|
149
|
+
"""
|
150
|
+
name, params = utils.get_var_params('height', kwargs)
|
151
|
+
|
152
|
+
# print(params)
|
153
|
+
|
154
|
+
coord = self.generic(name, data, step=step, **params)
|
155
|
+
coord.attrs.update(utils.default_attrs['height'])
|
156
|
+
|
157
|
+
return coord
|
158
|
+
|
159
|
+
|
160
|
+
def altitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
|
161
|
+
"""
|
162
|
+
Create a altitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
|
163
|
+
"""
|
164
|
+
name, params = utils.get_var_params('altitude', kwargs)
|
165
|
+
|
166
|
+
# print(params)
|
167
|
+
|
168
|
+
coord = self.generic(name, data, step=step, **params)
|
169
|
+
coord.attrs.update(utils.default_attrs['altitude'])
|
170
|
+
|
171
|
+
return coord
|
172
|
+
|
173
|
+
|
174
|
+
class DataVar:
|
175
|
+
"""
|
176
|
+
|
177
|
+
"""
|
178
|
+
def __init__(self, dataset):
|
179
|
+
"""
|
180
|
+
|
181
|
+
"""
|
182
|
+
self._dataset = dataset
|
183
|
+
# self._sys_meta = sys_meta
|
184
|
+
# self._finalizers = finalizers
|
185
|
+
# self._var_cache = var_cache
|
186
|
+
# self._compressor = compressor
|
187
|
+
|
188
|
+
|
189
|
+
def generic(self, name: str, coords: Tuple[str], dtype_decoded: str | np.dtype, dtype_encoded: str | np.dtype | None = None, chunk_shape: Tuple[int] | None = None, fillvalue: Union[int, float, str] = None, scale_factor: Union[float, int, None] = None, add_offset: Union[float, int, None] = None):
|
190
|
+
"""
|
191
|
+
The generic method to create a Data Variable.
|
192
|
+
|
193
|
+
Parameters
|
194
|
+
----------
|
195
|
+
name: str
|
196
|
+
The name of the coordinate. It must be unique and follow the `CF conventions for variables names <https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#_naming_conventions>`_.
|
197
|
+
coords: tuple of str
|
198
|
+
The coordinate names in the order of the dimensions. The coordinate must already exist.
|
199
|
+
dtype_decoded: str, np.dtype, or None
|
200
|
+
The dtype of the original data (data that the user will work with). If data is not passed, than this is a manditory parameter.
|
201
|
+
dtype_encoded: str, np.dtype, or None
|
202
|
+
The dtype of the stored data. This is the dtype that the data will be stored as. Only relevant for going from a float to a smaller integer. If this is None, then dtype_encoded will be assigned the dtype_decoded.
|
203
|
+
chunk_shape: tuple of ints or None
|
204
|
+
The chunk shape that the data will be stored as. If None, then it will be estimated. The estimated chunk shape will be optimally estimated to make it efficient to rechunk later.
|
205
|
+
fillvalue: int, float, str, or None
|
206
|
+
The fill value for the dtype_encoded. If the dtype_decoded is a float, then the decoded fill values will always be np.nan when returned. The fillvalue is primarily for storage.
|
207
|
+
scale_factor: int, float, or None
|
208
|
+
If dtype_decoded is a float and dtype_encoded is an int, then the stored values are encoded = int(round((decoded - add_offset)/scale_factor)).
|
209
|
+
add_offset: int, float, or None
|
210
|
+
As decribed by the scale_factor.
|
211
|
+
|
212
|
+
Returns
|
213
|
+
-------
|
214
|
+
cfdb.DataVariable
|
215
|
+
"""
|
216
|
+
## Check base inputs
|
217
|
+
name, var = utils.parse_var_inputs(self._dataset._sys_meta, name, coords, dtype_decoded, dtype_encoded, chunk_shape, fillvalue, scale_factor, add_offset)
|
218
|
+
|
219
|
+
## Var init process
|
220
|
+
self._dataset._sys_meta.variables[name] = var
|
221
|
+
|
222
|
+
## Init Data var
|
223
|
+
data_var = sc.DataVariable(name, self._dataset)
|
224
|
+
|
225
|
+
self._dataset._var_cache[name] = data_var
|
226
|
+
|
227
|
+
## Add attributes to datetime vars
|
228
|
+
if data_var.dtype_decoded.kind == 'M':
|
229
|
+
data_var.attrs['units'] = utils.parse_cf_time_units(data_var.dtype_decoded)
|
230
|
+
data_var.attrs['calendar'] = 'proleptic_gregorian'
|
231
|
+
|
232
|
+
return data_var
|
233
|
+
|
234
|
+
|
235
|
+
def like(self, name: str, data_var: Union[sc.DataVariable, sc.DataVariableView]):
|
236
|
+
"""
|
237
|
+
Create a Data Variable based on the parameters of another Data Variable. A new unique name must be passed.
|
238
|
+
"""
|
239
|
+
new_data_var = self.generic(name, data_var.coord_names, dtype_decoded=data_var.dtype_decoded, dtype_encoded=data_var.dtype_encoded, chunk_shape=data_var.chunk_shape, fillvalue=data_var.fillvalue, scale_factor=data_var.scale_factor, add_offset=data_var.add_offset)
|
240
|
+
|
241
|
+
return new_data_var
|
242
|
+
|
243
|
+
|
244
|
+
class Creator:
|
245
|
+
"""
|
246
|
+
|
247
|
+
"""
|
248
|
+
def __init__(self, dataset):
|
249
|
+
"""
|
250
|
+
|
251
|
+
"""
|
252
|
+
self.coord = Coord(dataset)
|
253
|
+
self.data_var = DataVar(dataset)
|
254
|
+
|
255
|
+
|
256
|
+
|
257
|
+
|
258
|
+
|
259
|
+
|
260
|
+
|
261
|
+
|
262
|
+
|
263
|
+
|
264
|
+
|
265
|
+
|
266
|
+
|
267
|
+
|
268
|
+
|
269
|
+
|
270
|
+
|
271
|
+
|
272
|
+
|
273
|
+
|
274
|
+
|
275
|
+
|
276
|
+
|
277
|
+
|
278
|
+
|
279
|
+
|
280
|
+
|
281
|
+
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
|
288
|
+
|
289
|
+
|
290
|
+
|
291
|
+
|
292
|
+
|
293
|
+
|
294
|
+
|
295
|
+
|
296
|
+
|
297
|
+
|
298
|
+
|
299
|
+
|
300
|
+
|
301
|
+
|
302
|
+
|
303
|
+
|
304
|
+
|
305
|
+
|
306
|
+
|
307
|
+
|
308
|
+
|
309
|
+
|
310
|
+
|
311
|
+
|
312
|
+
|
313
|
+
|
314
|
+
|
315
|
+
|
316
|
+
|
317
|
+
|
318
|
+
|
319
|
+
|
320
|
+
|
321
|
+
|
322
|
+
|
323
|
+
|
324
|
+
|
325
|
+
|
326
|
+
|
327
|
+
|
328
|
+
|
329
|
+
|
330
|
+
|
331
|
+
|
332
|
+
|
333
|
+
|
334
|
+
|
335
|
+
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
|
341
|
+
|
342
|
+
|
343
|
+
|
344
|
+
|
345
|
+
|
cfdb/data_models.py
ADDED
@@ -0,0 +1,189 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
"""
|
4
|
+
Created on Tue Feb 11 09:23:18 2025
|
5
|
+
|
6
|
+
@author: mike
|
7
|
+
"""
|
8
|
+
import msgspec
|
9
|
+
import enum
|
10
|
+
from typing import Set, Optional, Dict, Tuple, List, Union, Any
|
11
|
+
# import numpy as np
|
12
|
+
|
13
|
+
# import utils
|
14
|
+
|
15
|
+
####################################################
|
16
|
+
### Parameters
|
17
|
+
|
18
|
+
|
19
|
+
|
20
|
+
|
21
|
+
|
22
|
+
###################################################
|
23
|
+
### Models
|
24
|
+
|
25
|
+
|
26
|
+
class Type(enum.Enum):
|
27
|
+
"""
|
28
|
+
|
29
|
+
"""
|
30
|
+
Dataset = 'Dataset'
|
31
|
+
|
32
|
+
|
33
|
+
class Compressor(enum.Enum):
|
34
|
+
"""
|
35
|
+
|
36
|
+
"""
|
37
|
+
zstd = 'zstd'
|
38
|
+
lz4 = 'lz4'
|
39
|
+
|
40
|
+
|
41
|
+
# class Encoding(msgspec.Struct):
|
42
|
+
# """
|
43
|
+
|
44
|
+
# """
|
45
|
+
# dtype_encoded: str
|
46
|
+
# dtype_decoded: str
|
47
|
+
# fillvalue: Union[int, None] = None
|
48
|
+
# # fillvalue_decoded: Union[int, None]
|
49
|
+
# scale_factor: Union[float, int, None] = None
|
50
|
+
# add_offset: Union[float, int, None] = None
|
51
|
+
# # units: Union[str, None] = None
|
52
|
+
# # calendar: Union[str, None] = None
|
53
|
+
|
54
|
+
# # def encode(self, values):
|
55
|
+
# # return utils.encode_data(np.asarray(values), **self._encoding)
|
56
|
+
|
57
|
+
# # def decode(self, bytes_data):
|
58
|
+
# # return utils.decode_data(bytes_data, **self._encoding)
|
59
|
+
|
60
|
+
|
61
|
+
class DataVariable(msgspec.Struct, tag='data_var'):
|
62
|
+
"""
|
63
|
+
|
64
|
+
"""
|
65
|
+
chunk_shape: Tuple[int, ...]
|
66
|
+
coords: Tuple[str, ...]
|
67
|
+
dtype_encoded: str
|
68
|
+
dtype_decoded: str
|
69
|
+
fillvalue: Union[int, None] = None
|
70
|
+
scale_factor: Union[float, int, None] = None
|
71
|
+
add_offset: Union[float, int, None] = None
|
72
|
+
|
73
|
+
|
74
|
+
class CoordinateVariable(msgspec.Struct, tag='coord'):
|
75
|
+
"""
|
76
|
+
|
77
|
+
"""
|
78
|
+
shape: Tuple[int, ...]
|
79
|
+
chunk_shape: Tuple[int, ...]
|
80
|
+
dtype_encoded: str
|
81
|
+
dtype_decoded: str
|
82
|
+
fillvalue: Union[int, None] = None
|
83
|
+
scale_factor: Union[float, int, None] = None
|
84
|
+
add_offset: Union[float, int, None] = None
|
85
|
+
origin: Union[int, None] = None
|
86
|
+
step: Union[float, int, None] = None
|
87
|
+
auto_increment: bool = False
|
88
|
+
|
89
|
+
|
90
|
+
class SysMeta(msgspec.Struct):
|
91
|
+
"""
|
92
|
+
|
93
|
+
"""
|
94
|
+
object_type: Type
|
95
|
+
compression: Compressor
|
96
|
+
compression_level: int
|
97
|
+
variables: Dict[str, Union[DataVariable, CoordinateVariable]] = {}
|
98
|
+
|
99
|
+
# def __post_init__(self):
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
|
109
|
+
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
|
115
|
+
|
116
|
+
|
117
|
+
|
118
|
+
|
119
|
+
|
120
|
+
|
121
|
+
|
122
|
+
|
123
|
+
|
124
|
+
|
125
|
+
|
126
|
+
|
127
|
+
|
128
|
+
|
129
|
+
|
130
|
+
|
131
|
+
|
132
|
+
|
133
|
+
|
134
|
+
|
135
|
+
|
136
|
+
|
137
|
+
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
|
142
|
+
|
143
|
+
|
144
|
+
|
145
|
+
|
146
|
+
|
147
|
+
|
148
|
+
|
149
|
+
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
|
154
|
+
|
155
|
+
|
156
|
+
|
157
|
+
|
158
|
+
|
159
|
+
|
160
|
+
|
161
|
+
|
162
|
+
|
163
|
+
|
164
|
+
|
165
|
+
|
166
|
+
|
167
|
+
|
168
|
+
|
169
|
+
|
170
|
+
|
171
|
+
|
172
|
+
|
173
|
+
|
174
|
+
|
175
|
+
|
176
|
+
|
177
|
+
|
178
|
+
|
179
|
+
|
180
|
+
|
181
|
+
|
182
|
+
|
183
|
+
|
184
|
+
|
185
|
+
|
186
|
+
|
187
|
+
|
188
|
+
|
189
|
+
|