cfdb 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cfdb/creation.py ADDED
@@ -0,0 +1,345 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Thu Feb 13 17:08:09 2025
5
+
6
+ @author: mike
7
+ """
8
+ import numpy as np
9
+ from typing import Set, Optional, Dict, Tuple, List, Union, Any
10
+
11
+ from . import utils, support_classes as sc
12
+ # import utils, support_classes as sc
13
+
14
+ #################################################
15
+
16
+
17
+ class Coord:
18
+ """
19
+
20
+ """
21
+ def __init__(self, dataset):
22
+ """
23
+
24
+ """
25
+ self._dataset = dataset
26
+ # self._sys_meta = sys_meta
27
+ # self._finalizers = finalizers
28
+ # self._var_cache = var_cache
29
+ # self._compressor = compressor
30
+
31
+
32
+ def generic(self, name: str, data: np.ndarray | None = None, dtype_decoded: str | np.dtype | None = None, dtype_encoded: str | np.dtype | None = None, chunk_shape: Tuple[int] | None = None, fillvalue: Union[int, float, str] = None, scale_factor: Union[float, int, None] = None, add_offset: Union[float, int, None] = None, step: int | float | bool=False):
33
+ """
34
+ The generic method to create a coordinate.
35
+
36
+ Parameters
37
+ ----------
38
+ name: str
39
+ The name of the coordinate. It must be unique and follow the `CF conventions for variables names <https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#_naming_conventions>`_.
40
+ data: np.ndarray or None
41
+ Data to be added after creation. The length and dtype of the data will override other parameters.
42
+ dtype_decoded: str, np.dtype, or None
43
+ The dtype of the original data (data that the user will work with). If data is not passed, than this is a manditory parameter.
44
+ dtype_encoded: str, np.dtype, or None
45
+ The dtype of the stored data. This is the dtype that the data will be stored as. Only relevant for going from a float to a smaller integer. If this is None, then dtype_encoded will be assigned the dtype_decoded.
46
+ chunk_shape: tuple of ints or None
47
+ The chunk shape that the data will be stored as. If None, then it will be estimated. The estimated chunk shape will be optimally estimated to make it efficient to rechunk later.
48
+ fillvalue: int, float, str, or None
49
+ The fill value for the dtype_encoded. If the dtype_decoded is a float, then the decoded fill values will always be np.nan when returned. The fillvalue is primarily for storage.
50
+ scale_factor: int, float, or None
51
+ If dtype_decoded is a float and dtype_encoded is an int, then the stored values are encoded = int(round((decoded - add_offset)/scale_factor)).
52
+ add_offset: int, float, or None
53
+ As decribed by the scale_factor.
54
+ step: int, float, or None
55
+ If the coordinate data is regular (hourly for example), then assign a step to ensure the coordinate will always stay regular.
56
+
57
+ Returns
58
+ -------
59
+ cfdb.Coordinate
60
+ """
61
+ if name in self._dataset._sys_meta.variables:
62
+ raise ValueError(f"Dataset already contains the variable {name}.")
63
+
64
+ # print(params)
65
+
66
+ name, var = utils.parse_coord_inputs(name, data, chunk_shape, dtype_decoded, dtype_encoded, fillvalue, scale_factor, add_offset, step=step)
67
+
68
+ ## Var init process
69
+ self._dataset._sys_meta.variables[name] = var
70
+
71
+ ## Init Coordinate
72
+ coord = sc.Coordinate(name, self._dataset)
73
+ # coord.attrs.update(utils.default_attrs['lat'])
74
+
75
+ ## Add data if it has been passed
76
+ if isinstance(data, np.ndarray):
77
+ coord.append(data)
78
+
79
+ self._dataset._var_cache[name] = coord
80
+
81
+ ## Add attributes to datetime vars
82
+ if coord.dtype_decoded.kind == 'M':
83
+ coord.attrs['units'] = utils.parse_cf_time_units(coord.dtype_decoded)
84
+ coord.attrs['calendar'] = 'proleptic_gregorian'
85
+
86
+ return coord
87
+
88
+
89
+ def like(self, name: str, coord: Union[sc.Coordinate, sc.CoordinateView], copy_data=False):
90
+ """
91
+ Create a Coordinate based on the parameters of another Coordinate. A new unique name must be passed.
92
+ """
93
+ if copy_data:
94
+ data = coord.data
95
+ else:
96
+ data = None
97
+
98
+ new_coord = self.generic(name, data, dtype_decoded=coord.dtype_decoded, dtype_encoded=coord.dtype_encoded, chunk_shape=coord.chunk_shape, fillvalue=coord.fillvalue, scale_factor=coord.scale_factor, add_offset=coord.add_offset, step=coord.step)
99
+
100
+ return new_coord
101
+
102
+
103
+
104
+ def latitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
105
+ """
106
+ Create a latitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
107
+ """
108
+ name, params = utils.get_var_params('lat', kwargs)
109
+
110
+ # print(params)
111
+
112
+ coord = self.generic(name, data, step=step, **params)
113
+ coord.attrs.update(utils.default_attrs['lat'])
114
+
115
+ return coord
116
+
117
+
118
+ def longitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
119
+ """
120
+ Create a longitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
121
+ """
122
+ name, params = utils.get_var_params('lon', kwargs)
123
+
124
+ # print(params)
125
+
126
+ coord = self.generic(name, data, step=step, **params)
127
+ coord.attrs.update(utils.default_attrs['lon'])
128
+
129
+ return coord
130
+
131
+
132
+ def time(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
133
+ """
134
+ Create a time coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
135
+ """
136
+ name, params = utils.get_var_params('time', kwargs)
137
+
138
+ # print(params)
139
+
140
+ coord = self.generic(name, data, step=step, **params)
141
+ coord.attrs.update(utils.default_attrs['time'])
142
+
143
+ return coord
144
+
145
+
146
+ def height(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
147
+ """
148
+ Create a height coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
149
+ """
150
+ name, params = utils.get_var_params('height', kwargs)
151
+
152
+ # print(params)
153
+
154
+ coord = self.generic(name, data, step=step, **params)
155
+ coord.attrs.update(utils.default_attrs['height'])
156
+
157
+ return coord
158
+
159
+
160
+ def altitude(self, data: np.ndarray | None = None, step: int | float | bool=False, **kwargs):
161
+ """
162
+ Create a altitude coordinate. The standard encodings and attributes will be assigned. See the generic method for all of the parameters.
163
+ """
164
+ name, params = utils.get_var_params('altitude', kwargs)
165
+
166
+ # print(params)
167
+
168
+ coord = self.generic(name, data, step=step, **params)
169
+ coord.attrs.update(utils.default_attrs['altitude'])
170
+
171
+ return coord
172
+
173
+
174
+ class DataVar:
175
+ """
176
+
177
+ """
178
+ def __init__(self, dataset):
179
+ """
180
+
181
+ """
182
+ self._dataset = dataset
183
+ # self._sys_meta = sys_meta
184
+ # self._finalizers = finalizers
185
+ # self._var_cache = var_cache
186
+ # self._compressor = compressor
187
+
188
+
189
+ def generic(self, name: str, coords: Tuple[str], dtype_decoded: str | np.dtype, dtype_encoded: str | np.dtype | None = None, chunk_shape: Tuple[int] | None = None, fillvalue: Union[int, float, str] = None, scale_factor: Union[float, int, None] = None, add_offset: Union[float, int, None] = None):
190
+ """
191
+ The generic method to create a Data Variable.
192
+
193
+ Parameters
194
+ ----------
195
+ name: str
196
+ The name of the coordinate. It must be unique and follow the `CF conventions for variables names <https://cfconventions.org/Data/cf-conventions/cf-conventions-1.12/cf-conventions.html#_naming_conventions>`_.
197
+ coords: tuple of str
198
+ The coordinate names in the order of the dimensions. The coordinate must already exist.
199
+ dtype_decoded: str, np.dtype, or None
200
+ The dtype of the original data (data that the user will work with). If data is not passed, than this is a manditory parameter.
201
+ dtype_encoded: str, np.dtype, or None
202
+ The dtype of the stored data. This is the dtype that the data will be stored as. Only relevant for going from a float to a smaller integer. If this is None, then dtype_encoded will be assigned the dtype_decoded.
203
+ chunk_shape: tuple of ints or None
204
+ The chunk shape that the data will be stored as. If None, then it will be estimated. The estimated chunk shape will be optimally estimated to make it efficient to rechunk later.
205
+ fillvalue: int, float, str, or None
206
+ The fill value for the dtype_encoded. If the dtype_decoded is a float, then the decoded fill values will always be np.nan when returned. The fillvalue is primarily for storage.
207
+ scale_factor: int, float, or None
208
+ If dtype_decoded is a float and dtype_encoded is an int, then the stored values are encoded = int(round((decoded - add_offset)/scale_factor)).
209
+ add_offset: int, float, or None
210
+ As decribed by the scale_factor.
211
+
212
+ Returns
213
+ -------
214
+ cfdb.DataVariable
215
+ """
216
+ ## Check base inputs
217
+ name, var = utils.parse_var_inputs(self._dataset._sys_meta, name, coords, dtype_decoded, dtype_encoded, chunk_shape, fillvalue, scale_factor, add_offset)
218
+
219
+ ## Var init process
220
+ self._dataset._sys_meta.variables[name] = var
221
+
222
+ ## Init Data var
223
+ data_var = sc.DataVariable(name, self._dataset)
224
+
225
+ self._dataset._var_cache[name] = data_var
226
+
227
+ ## Add attributes to datetime vars
228
+ if data_var.dtype_decoded.kind == 'M':
229
+ data_var.attrs['units'] = utils.parse_cf_time_units(data_var.dtype_decoded)
230
+ data_var.attrs['calendar'] = 'proleptic_gregorian'
231
+
232
+ return data_var
233
+
234
+
235
+ def like(self, name: str, data_var: Union[sc.DataVariable, sc.DataVariableView]):
236
+ """
237
+ Create a Data Variable based on the parameters of another Data Variable. A new unique name must be passed.
238
+ """
239
+ new_data_var = self.generic(name, data_var.coord_names, dtype_decoded=data_var.dtype_decoded, dtype_encoded=data_var.dtype_encoded, chunk_shape=data_var.chunk_shape, fillvalue=data_var.fillvalue, scale_factor=data_var.scale_factor, add_offset=data_var.add_offset)
240
+
241
+ return new_data_var
242
+
243
+
244
+ class Creator:
245
+ """
246
+
247
+ """
248
+ def __init__(self, dataset):
249
+ """
250
+
251
+ """
252
+ self.coord = Coord(dataset)
253
+ self.data_var = DataVar(dataset)
254
+
255
+
256
+
257
+
258
+
259
+
260
+
261
+
262
+
263
+
264
+
265
+
266
+
267
+
268
+
269
+
270
+
271
+
272
+
273
+
274
+
275
+
276
+
277
+
278
+
279
+
280
+
281
+
282
+
283
+
284
+
285
+
286
+
287
+
288
+
289
+
290
+
291
+
292
+
293
+
294
+
295
+
296
+
297
+
298
+
299
+
300
+
301
+
302
+
303
+
304
+
305
+
306
+
307
+
308
+
309
+
310
+
311
+
312
+
313
+
314
+
315
+
316
+
317
+
318
+
319
+
320
+
321
+
322
+
323
+
324
+
325
+
326
+
327
+
328
+
329
+
330
+
331
+
332
+
333
+
334
+
335
+
336
+
337
+
338
+
339
+
340
+
341
+
342
+
343
+
344
+
345
+
cfdb/data_models.py ADDED
@@ -0,0 +1,189 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ """
4
+ Created on Tue Feb 11 09:23:18 2025
5
+
6
+ @author: mike
7
+ """
8
+ import msgspec
9
+ import enum
10
+ from typing import Set, Optional, Dict, Tuple, List, Union, Any
11
+ # import numpy as np
12
+
13
+ # import utils
14
+
15
+ ####################################################
16
+ ### Parameters
17
+
18
+
19
+
20
+
21
+
22
+ ###################################################
23
+ ### Models
24
+
25
+
26
+ class Type(enum.Enum):
27
+ """
28
+
29
+ """
30
+ Dataset = 'Dataset'
31
+
32
+
33
+ class Compressor(enum.Enum):
34
+ """
35
+
36
+ """
37
+ zstd = 'zstd'
38
+ lz4 = 'lz4'
39
+
40
+
41
+ # class Encoding(msgspec.Struct):
42
+ # """
43
+
44
+ # """
45
+ # dtype_encoded: str
46
+ # dtype_decoded: str
47
+ # fillvalue: Union[int, None] = None
48
+ # # fillvalue_decoded: Union[int, None]
49
+ # scale_factor: Union[float, int, None] = None
50
+ # add_offset: Union[float, int, None] = None
51
+ # # units: Union[str, None] = None
52
+ # # calendar: Union[str, None] = None
53
+
54
+ # # def encode(self, values):
55
+ # # return utils.encode_data(np.asarray(values), **self._encoding)
56
+
57
+ # # def decode(self, bytes_data):
58
+ # # return utils.decode_data(bytes_data, **self._encoding)
59
+
60
+
61
+ class DataVariable(msgspec.Struct, tag='data_var'):
62
+ """
63
+
64
+ """
65
+ chunk_shape: Tuple[int, ...]
66
+ coords: Tuple[str, ...]
67
+ dtype_encoded: str
68
+ dtype_decoded: str
69
+ fillvalue: Union[int, None] = None
70
+ scale_factor: Union[float, int, None] = None
71
+ add_offset: Union[float, int, None] = None
72
+
73
+
74
+ class CoordinateVariable(msgspec.Struct, tag='coord'):
75
+ """
76
+
77
+ """
78
+ shape: Tuple[int, ...]
79
+ chunk_shape: Tuple[int, ...]
80
+ dtype_encoded: str
81
+ dtype_decoded: str
82
+ fillvalue: Union[int, None] = None
83
+ scale_factor: Union[float, int, None] = None
84
+ add_offset: Union[float, int, None] = None
85
+ origin: Union[int, None] = None
86
+ step: Union[float, int, None] = None
87
+ auto_increment: bool = False
88
+
89
+
90
+ class SysMeta(msgspec.Struct):
91
+ """
92
+
93
+ """
94
+ object_type: Type
95
+ compression: Compressor
96
+ compression_level: int
97
+ variables: Dict[str, Union[DataVariable, CoordinateVariable]] = {}
98
+
99
+ # def __post_init__(self):
100
+
101
+
102
+
103
+
104
+
105
+
106
+
107
+
108
+
109
+
110
+
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+
120
+
121
+
122
+
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+
131
+
132
+
133
+
134
+
135
+
136
+
137
+
138
+
139
+
140
+
141
+
142
+
143
+
144
+
145
+
146
+
147
+
148
+
149
+
150
+
151
+
152
+
153
+
154
+
155
+
156
+
157
+
158
+
159
+
160
+
161
+
162
+
163
+
164
+
165
+
166
+
167
+
168
+
169
+
170
+
171
+
172
+
173
+
174
+
175
+
176
+
177
+
178
+
179
+
180
+
181
+
182
+
183
+
184
+
185
+
186
+
187
+
188
+
189
+