sacc 1.0.2__py3-none-any.whl → 2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sacc/io.py ADDED
@@ -0,0 +1,414 @@
1
+ from .utils import numpy_to_vanilla
2
+ from io import BytesIO
3
+ import inspect
4
+ from astropy.table import Table
5
+
6
+ ONE_OBJECT_PER_TABLE = "ONE_OBJECT_PER_TABLE"
7
+ MULTIPLE_OBJECTS_PER_TABLE = "MULTIPLE_OBJECTS_PER_TABLE"
8
+ ONE_OBJECT_MULTIPLE_TABLES = "ONE_OBJECT_MULTIPLE_TABLES"
9
+
10
+
11
+ """
12
+ if storage_type == ONE_OBJECT_PER_TABLE then class must have
13
+ - to_table returning a single astropy table from an instance
14
+ - from_table returning a single instance from an astropy table
15
+
16
+ * tables must have a unique name
17
+
18
+ if storage_type == MULTIPLE_OBJECTS_PER_TABLE then the class must have
19
+ - to_table return one table from a list of instances
20
+ - from_table returning a list of instances from a single astropy table
21
+
22
+
23
+ if storage_type == ONE_OBJECT_MULTIPLE_TABLES then the class must have
24
+ - to_tables returning a list of astropy tables from a single instance
25
+ - from_tables returning a single instance from a list of astropy tables
26
+
27
+ * name base can be shared between tables
28
+
29
+ """
30
+
31
+ class BaseIO:
32
+ """
33
+ This base class represents interfaces for input/output operations
34
+ in which:
35
+ - subclasses define methods to_tables and from_tables
36
+ - to_tables converts a list of instances into a list of astropy tables
37
+ - from_tables converts a list of astropy tables into a dictionary of instances
38
+
39
+ The reason for this design is efficiency in packing together tracer data into
40
+ a smaller number of tables. For some tracer types we want to store many tracers
41
+ in a single table, while for others we want to store each tracer in its own table.
42
+
43
+ New hierarchies of objects can be created
44
+ """
45
+ _base_subclasses = {}
46
+ storage_type = "NOT_DEFINED"
47
+
48
+ def __init_subclass__(cls, type_name=""):
49
+
50
+ # We can have base subclasses that do not have a type name
51
+ if cls.__name__.startswith('Base'):
52
+ BaseIO._base_subclasses[cls.__name__[4:].lower()] = cls
53
+
54
+ # Check that the class variable _sub_classes, which bases classes
55
+ # use to register their subclasses, exists.
56
+ if not hasattr(cls, '_sub_classes'):
57
+ raise RuntimeError("Base subclasses of BaseIO must have a dictionary class variable _sub_classes, but"
58
+ f" {cls.__name__} does not have one defined.")
59
+
60
+ return
61
+
62
+ if type_name == "":
63
+ raise RuntimeError("Subclasses that use the table IO system like tracers must have a type_name set when defining them.")
64
+
65
+ # Check that the storage_type is defined and valid
66
+ if cls.storage_type == "NOT_DEFINED":
67
+ raise RuntimeError(f"Subclasses of BaseIO must define a class variable storage_type, but {cls.__name__} does not have one defined.")
68
+
69
+ if cls.storage_type not in (ONE_OBJECT_PER_TABLE, MULTIPLE_OBJECTS_PER_TABLE, ONE_OBJECT_MULTIPLE_TABLES):
70
+ raise RuntimeError(f"Subclasses of BaseIO must have a class variable storage_type set to one of "
71
+ f"{ONE_OBJECT_PER_TABLE}, {MULTIPLE_OBJECTS_PER_TABLE}, or {ONE_OBJECT_MULTIPLE_TABLES}, "
72
+ f"but {cls.__name__} has {cls.storage_type}.")
73
+
74
+
75
+ # We could probably be using an Abstract Base Class rather than doing this.
76
+ # Then you wouldn't get a warning until instantiation. That might be good
77
+ # or bad.
78
+ if cls.storage_type == ONE_OBJECT_PER_TABLE:
79
+ check_has_standard_method(cls, 'to_table')
80
+ check_has_class_method(cls, 'from_table')
81
+
82
+ elif cls.storage_type == ONE_OBJECT_MULTIPLE_TABLES:
83
+ check_has_standard_method(cls, 'to_tables')
84
+ check_has_class_method(cls, 'from_tables')
85
+
86
+ elif cls.storage_type == MULTIPLE_OBJECTS_PER_TABLE:
87
+ check_has_class_method(cls, 'to_table')
88
+ check_has_class_method(cls, 'from_table')
89
+ else:
90
+ raise RuntimeError(f"Subclasses of BaseIO must have a class variable storage_type set to one of "
91
+ f"{ONE_OBJECT_PER_TABLE}, {MULTIPLE_OBJECTS_PER_TABLE}, or {ONE_OBJECT_MULTIPLE_TABLES}, "
92
+ f"but {cls.__name__} has {cls.storage_type}.")
93
+
94
+
95
+ if type_name.lower() in cls._sub_classes:
96
+ raise RuntimeError(f"Subclasses of BaseIO must have unique type_name, "
97
+ f"but {type_name} is already registered.")
98
+
99
+ cls._sub_classes[type_name.lower()] = cls
100
+ cls.type_name = type_name
101
+
102
+
103
+ def to_tables(category_dict):
104
+ """Convert a dict of objects to a list of astropy tables
105
+
106
+ This is used when saving data to a file.
107
+
108
+ This class method converts a dict of objects, each of which
109
+ can instances of any subclass of BaseIO, and turns them
110
+ into a list of astropy tables, ready to be saved to disk.
111
+
112
+ Some object types generate a single table for all of the
113
+ different instances, and others generate one table per
114
+ instance, and some others generate multiple tables
115
+ for a single instance.
116
+
117
+ The storage type of each class decides which of these it is.
118
+
119
+ Parameters
120
+ ----------
121
+ category_: dict[str, dict[str, BaseIO]]
122
+ Tracer instances by category (e.g. "tracer", "window", "covariance"), then name
123
+ (e.g. "source_1")
124
+
125
+ Returns
126
+ -------
127
+ tables: list
128
+ List of astropy tables
129
+ """
130
+ from .data_types import DataPoint
131
+
132
+ # This is the list of tables that we will build up and return
133
+ tables = []
134
+
135
+ # The top level category_dict is a dict mapping
136
+ # general categories of data, each represented by a different
137
+ # subclass of BaseIO, to a dict of further subclasses of that subclass.
138
+ for category, instance_dict in category_dict.items():
139
+ multi_object_tables = {}
140
+
141
+ # We handle the "data" category separately, since it is a special case
142
+ if category == 'data' or category == 'metadata':
143
+ continue
144
+
145
+ for name, obj in instance_dict.items():
146
+ # Get the class of the instance
147
+ cls = type(obj)
148
+
149
+ # Check if the class is a subclass of BaseIO
150
+ if not issubclass(cls, BaseIO):
151
+ raise RuntimeError(f"Instance {obj} of type {cls.__name__} does not subclass BaseIO.")
152
+
153
+ if obj.storage_type == ONE_OBJECT_PER_TABLE:
154
+ # If the storage type is ONE_OBJECT_PER_TABLE, we expect
155
+ # that the table will return a single instance of the class.
156
+ # print(f"Saving {name} of type {cls.type_name} in category {category} to a single table.")
157
+ table = obj.to_table()
158
+ table.meta['SACCTYPE'] = category
159
+ table.meta['SACCCLSS'] = cls.type_name
160
+ table.meta['SACCNAME'] = name
161
+ tables.append(table)
162
+
163
+ elif obj.storage_type == MULTIPLE_OBJECTS_PER_TABLE:
164
+ # If the storage type is MULTIPLE_OBJECTS_PER_TABLE then
165
+ # we need to collect together all the instances of this
166
+ # class and convert at the end
167
+ key = (cls, name)
168
+ if key not in multi_object_tables:
169
+ multi_object_tables[key] = []
170
+ multi_object_tables[key].append(obj)
171
+
172
+ elif obj.storage_type == ONE_OBJECT_MULTIPLE_TABLES:
173
+ # If the storage type is ONE_OBJECT_MULTIPLE_TABLES, we expect
174
+ # that the table will return a dict of tables
175
+ # each in its own table.
176
+ # print(f"Saving {name} of type {cls.type_name} in category {category} to multiple tables.")
177
+ tabs = obj.to_tables()
178
+ for part_name, table in tabs.items():
179
+ table.meta['SACCTYPE'] = category
180
+ table.meta['SACCCLSS'] = cls.type_name
181
+ table.meta["SACCNAME"] = name
182
+ table.meta['SACCPART'] = part_name
183
+ tables.append(table)
184
+ else:
185
+ raise RuntimeError(f"Storage type {cls.storage_type} for {cls.__name__} is not recognized.")
186
+
187
+ # Now process the multi-object tables for this category
188
+ for (cls, name), instance_list in multi_object_tables.items():
189
+ # Convert the list of instances to a single table
190
+ table = cls.to_table(instance_list)
191
+ table.meta['SACCTYPE'] = category
192
+ table.meta['SACCNAME'] = name
193
+ table.meta['SACCCLSS'] = cls.type_name
194
+ tables.append(table)
195
+
196
+ # Handle data points separately, since they are a special case
197
+ data = category_dict.get('data', [])
198
+ lookups = category_dict.get('window', {})
199
+
200
+ # Because lots of objects share the same window function
201
+ # we map a code number for a window to the window object
202
+ # when serializing.
203
+ lookups = {'window': {v: k for k, v in lookups.items()}}
204
+ data_tables = DataPoint.to_tables(data, lookups=lookups)
205
+
206
+ for name, table in data_tables.items():
207
+ table.meta['SACCTYPE'] = "data"
208
+ table.meta['SACCNAME'] = name
209
+ tables.append(table)
210
+
211
+ # Also handle metadata separately. Could consider a metadata class
212
+ # that subclasses BaseIO and dict?
213
+ tables.append(metadata_to_table(category_dict.get('metadata', {})))
214
+
215
+
216
+ return tables
217
+
218
+ def from_tables(table_list):
219
+ """Convert a list of astropy tables into a dictionary of sacc objects.
220
+
221
+ This is used when loading data from a file.
222
+
223
+ This class method takes a list of astropy tables, typically read from
224
+ a file, and converts them all into instances of BaseIO subclasses.
225
+
226
+ Parameters
227
+ ----------
228
+ table_list: list
229
+ List of astropy tables
230
+
231
+ Returns
232
+ -------
233
+ objects: dict[Str, dict[str, BaseIO]]
234
+ Dict mapping category names then object names to instances of BaseIO subclasses.
235
+ """
236
+ from .data_types import DataPoint
237
+ outputs = {}
238
+ multi_tables = {}
239
+ data_point_tables = []
240
+
241
+ for table in table_list:
242
+ # what general category of object is this table, e.g.
243
+ # tracers, windows, data points.
244
+ table_category = table.meta['SACCTYPE'].lower()
245
+
246
+ # what specific subclass of that category is this table?
247
+ # e.g. N(z) tracer, top hat window, etc.
248
+ if table_category == 'data':
249
+
250
+ # This is a data table, which we treat as a special case.
251
+ # because the ordering here is particularly important
252
+ table_class_name = "datapoint"
253
+ table_class = DataPoint
254
+ data_point_tables.append(table)
255
+ continue
256
+ if table_category == 'metadata':
257
+ # This is a metadata table, which we treat as a special case.
258
+ outputs[table_category] = table_to_metadata(table)
259
+ continue
260
+
261
+ table_class_name = table.meta['SACCCLSS'].lower()
262
+ # The class that represents this specific subtype
263
+ base_class = BaseIO._base_subclasses[table_category]
264
+ table_class = base_class._sub_classes[table_class_name]
265
+ if table_category not in outputs:
266
+ outputs[table_category] = {}
267
+
268
+ # We will be doing the types where an object is split up
269
+ # over multiple tables separately, so we store them
270
+ # in a dict for later processing.
271
+ if table_class.storage_type == ONE_OBJECT_MULTIPLE_TABLES:
272
+ name = table.meta["SACCNAME"]
273
+ if "SACCPART" in table.meta:
274
+ part = table.meta["SACCPART"]
275
+ else:
276
+ # legacy tables may not have the part - in this case
277
+ # name should hopefully be kind:class:part
278
+ part = table.meta['EXTNAME'].rsplit(":", 1)[-1]
279
+ key = (table_category, table_class, name)
280
+ if key not in multi_tables:
281
+ multi_tables[key] = {}
282
+ multi_tables[key][part] = table
283
+ continue
284
+
285
+ # Convert the tables into either one instance of the class or a list of instances
286
+ if table_class.storage_type == ONE_OBJECT_PER_TABLE:
287
+ # If the storage type is ONE_OBJECT_PER_TABLE, we expect
288
+ # that the table will return a single instance of the class.
289
+ obj = table_class.from_table(table)
290
+ name = table.meta['SACCNAME']
291
+ outputs[table_category][name] = obj
292
+
293
+ elif table_class.storage_type == MULTIPLE_OBJECTS_PER_TABLE:
294
+ # If the storage type is MULTIPLE_OBJECTS_PER_TABLE, we expect
295
+ # that the table will return a dict of instances of the class,
296
+ # keyed by their names.
297
+ objs = table_class.from_table(table)
298
+ outputs[table_category].update(objs)
299
+
300
+ # Now process the multi-table objects that we collected above.
301
+ for key, m_tables in multi_tables.items():
302
+ # key is a tuple of (table_category, table_class, name)
303
+ table_category, table_class, name = key
304
+ # Convert the dict of tables into a single instance
305
+ obj = table_class.from_tables(m_tables)
306
+ outputs[table_category][name] = obj
307
+
308
+ # Now finally process the data point tables.
309
+ data_points = []
310
+ if 'window' in outputs:
311
+ lookups = {'window': outputs['window']}
312
+ else:
313
+ lookups = {}
314
+ for table in data_point_tables:
315
+ # Each data point table is a single data point
316
+ dps = DataPoint.from_table(table, lookups=lookups)
317
+ data_points.extend(dps)
318
+
319
+ outputs['data'] = data_points
320
+ return outputs
321
+
322
+
323
+ def astropy_buffered_fits_write(filename, hdu_list):
324
+ # Write out data - do it to a buffer because astropy
325
+ # metadata performance on some file systems is terrible.
326
+ buf = BytesIO()
327
+ hdu_list.writeto(buf)
328
+ # Rewind and read the binary data we just wrote
329
+ buf.seek(0)
330
+ output_data = buf.read()
331
+ # Write the binary data to the target file
332
+ with open(filename, "wb") as f:
333
+ f.write(output_data)
334
+
335
+
336
+ def is_class_method(method):
337
+ return callable(method) and inspect.ismethod(method) and not inspect.isfunction(method)
338
+
339
+
340
+ def check_has_class_method(cls, method_name):
341
+ """Check if a class has a class method with the given name."""
342
+ method = getattr(cls, method_name, None)
343
+ if method is None:
344
+ raise RuntimeError(f"As a BaseIO subclass, {cls.__name__} should have a class method {method_name} defined.")
345
+
346
+ if not is_class_method(method):
347
+ raise RuntimeError(f"As a BaseIO subclass, {cls.__name__} has {method_name}, but it is not defined as a class method.")
348
+
349
+ def check_has_standard_method(cls, method_name):
350
+ """Check if a class has a regular method with the given name."""
351
+ method = getattr(cls, method_name, None)
352
+ if method is None:
353
+ raise RuntimeError(f"As a BaseIO subclass, {cls.__name__} should have a method {method_name} defined.")
354
+
355
+ if not callable(method):
356
+ raise RuntimeError(f"As a BaseIO subclass, class {cls.__name__} has {method_name}, but it is not a method.")
357
+
358
+ if is_class_method(method):
359
+ raise RuntimeError(f"As a BaseIO subclass, {cls.__name__} has {method_name}, but it is defined as a class method or something else like that")
360
+
361
+ def metadata_to_table(metadata):
362
+ """
363
+ Convert a metadata dict to an astropy table.
364
+
365
+ Because astropy table columns must have a single type,
366
+ we store each item in the metadata dict as a separate column.
367
+
368
+ Parameters
369
+ ----------
370
+ metadata: dict
371
+ Dictionary of metadata items, where each key is a string,
372
+ and values are simple unstructured types (int, float, str, bool, etc.).
373
+
374
+ Returns
375
+ -------
376
+ table: astropy.table.Table
377
+ An astropy table with a single row, where each column corresponds
378
+ to a key in the metadata dict, and the first (only) row values
379
+ is the corresponding value.
380
+ """
381
+ # For typing reasons each key is a column in the table
382
+ # and there is only one row.
383
+
384
+ keys = list(metadata.keys())
385
+ values = [numpy_to_vanilla(metadata[key]) for key in keys]
386
+ table: Table = Table(rows=[values], names=keys)
387
+ table.meta['SACCTYPE'] = "metadata"
388
+ table.meta['SACCCLSS'] = "metadata"
389
+ table.meta['SACCNAME'] = "metadata"
390
+ return table
391
+
392
+ def table_to_metadata(table):
393
+ """
394
+ Convert an astropy table to a metadata dict.
395
+
396
+ See metadata_to_table for the format expected.
397
+
398
+ Parameters
399
+ ----------
400
+ table: astropy.table.Table
401
+ An astropy table with a single row, where each column corresponds
402
+ to a key in the metadata dict, and the first (only) row values
403
+ is the corresponding value.
404
+
405
+ Returns
406
+ -------
407
+ metadata: dict
408
+ Dictionary of metadata items, where each key is a string,
409
+ and values are simple unstructured types (int, float, str, bool, etc.).
410
+ """
411
+ metadata = {}
412
+ for key in table.colnames:
413
+ metadata[key] = numpy_to_vanilla(table[key][0])
414
+ return metadata