sacc 1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sacc/data_types.py ADDED
@@ -0,0 +1,463 @@
1
+ from collections import namedtuple
2
+ from astropy.table import Table
3
+
4
+ from .utils import (Namespace, hide_null_values,
5
+ null_values, camel_case_split_and_lowercase)
6
+
7
+ # The format for a data type name looks like this:
8
+ # {sources}_{properties}_{statistic_type}[_{statistic_subtype}]
9
+ # sources: type(s) of astrophysical sources to which this applies
10
+ # properties: feature(s)/characterisic(s) of those sources/fields to
11
+ # which the statistic applies
12
+ # statistic_type: mathematical type of the statistic
13
+ # statistic_subtype: optional additional specifier
14
+
15
+ required_tags_concise = {
16
+ "cl_00": ['ell'],
17
+ "cl_0e": ['ell'],
18
+ "cl_0b": ['ell'],
19
+ "cl_e0": ['ell'],
20
+ "cl_b0": ['ell'],
21
+ "cl_ee": ['ell'],
22
+ "cl_eb": ['ell'],
23
+ "cl_be": ['ell'],
24
+ "cl_bb": ['ell'],
25
+ "xi_00": ['theta'],
26
+ "xi_0e": ['theta'],
27
+ "xi_0b": ['theta'],
28
+ "xi_e0": ['theta'],
29
+ "xi_b0": ['theta'],
30
+ "xi_plus_re": ['theta'],
31
+ "xi_plus_im": ['theta'],
32
+ "xi_minus_re": ['theta'],
33
+ "xi_minus_im": ['theta'],
34
+ "count": [],
35
+ "cluster_counts": [],
36
+ "cluster_mean_log_mass": [],
37
+ "cluster_shear": []
38
+ }
39
+
40
+ required_tags_verbose = {
41
+ "clusterGalaxy_densityConvergence_cl": ['ell'],
42
+ "clusterGalaxy_densityConvergence_xi": ['theta'],
43
+ "clusterGalaxy_densityShear_cl_b": ['ell'],
44
+ "clusterGalaxy_densityShear_cl_e": ['ell'],
45
+ "clusterGalaxy_densityShear_xi_t": ['theta'],
46
+ "clusterGalaxy_densityShear_xi_x": ['theta'],
47
+ "clusterGalaxy_density_cl": ['ell'],
48
+ "clusterGalaxy_density_xi": ['theta'],
49
+ "cluster_density_cl": ['ell'],
50
+ "cluster_density_xi": ['theta'],
51
+ "cluster_mass_count_wl": [],
52
+ "cluster_mass_count_xray": [],
53
+ "cmbCluster_polarizationDensity_cl_b": ['ell'],
54
+ "cmbCluster_polarizationDensity_cl_e": ['ell'],
55
+ "cmbCluster_polarizationDensity_xi_t": ['theta'],
56
+ "cmbCluster_polarizationDensity_xi_x": ['theta'],
57
+ "cmbCluster_temperatureDensity_cl": ['ell'],
58
+ "cmbCluster_temperatureDensity_xi": ['theta'],
59
+ "cmbCluster_convergenceDensity_cl": ['ell'],
60
+ "cmbCluster_convergenceDensity_xi": ['theta'],
61
+ "cmbGalaxy_convergenceDensity_cl": ['ell'],
62
+ "cmbGalaxy_convergenceDensity_xi": ['theta'],
63
+ "cmbGalaxy_convergenceShear_cl_b": ['ell'],
64
+ "cmbGalaxy_convergenceShear_cl_e": ['ell'],
65
+ "cmbGalaxy_convergenceShear_xi_t": ['theta'],
66
+ "cmbGalaxy_convergenceShear_xi_x": ['theta'],
67
+ "cmbGalaxy_convergence_cl": ['ell'],
68
+ "cmbGalaxy_convergence_xi": ['theta'],
69
+ "cmbGalaxy_polarizationConvergence_cl_b": ['ell'],
70
+ "cmbGalaxy_polarizationConvergence_cl_e": ['ell'],
71
+ "cmbGalaxy_polarizationConvergence_xi_t": ['theta'],
72
+ "cmbGalaxy_polarizationConvergence_xi_x": ['theta'],
73
+ "cmbGalaxy_polarizationDensity_cl_b": ['ell'],
74
+ "cmbGalaxy_polarizationDensity_cl_e": ['ell'],
75
+ "cmbGalaxy_polarizationDensity_xi_t": ['theta'],
76
+ "cmbGalaxy_polarizationDensity_xi_x": ['theta'],
77
+ "cmbGalaxy_polarizationShear_cl_bb": ['ell'],
78
+ "cmbGalaxy_polarizationShear_cl_be": ['ell'],
79
+ "cmbGalaxy_polarizationShear_cl_eb": ['ell'],
80
+ "cmbGalaxy_polarizationShear_cl_ee": ['ell'],
81
+ "cmbGalaxy_polarizationShear_xi_minus": ['theta'],
82
+ "cmbGalaxy_polarizationShear_xi_plus": ['theta'],
83
+ "cmbGalaxy_polarizationShear_xi_imagMinus": ['theta'],
84
+ "cmbGalaxy_polarizationShear_xi_imagPlus": ['theta'],
85
+ "cmbGalaxy_polarizationShear_cosebi_e": ['ell'],
86
+ "cmbGalaxy_polarizationShear_cosebi_b": ['ell'],
87
+ "cmbGalaxy_temperatureConvergence_cl": ['ell'],
88
+ "cmbGalaxy_temperatureConvergence_xi": ['theta'],
89
+ "cmbGalaxy_temperatureDensity_cl": ['ell'],
90
+ "cmbGalaxy_temperatureDensity_xi": ['theta'],
91
+ "cmbGalaxy_temperatureShear_cl_b": ['ell'],
92
+ "cmbGalaxy_temperatureShear_cl_e": ['ell'],
93
+ "cmbGalaxy_temperatureShear_xi_t": ['theta'],
94
+ "cmbGalaxy_temperatureShear_xi_x": ['theta'],
95
+ "cmb_convergence_cl": ['ell'],
96
+ "cmb_convergence_xi": ['theta'],
97
+ "cmb_polarization_cl_bb": ['ell'],
98
+ "cmb_polarization_cl_be": ['ell'],
99
+ "cmb_polarization_cl_eb": ['ell'],
100
+ "cmb_polarization_cl_ee": ['ell'],
101
+ "cmb_polarization_xi_minus": ['theta'],
102
+ "cmb_polarization_xi_plus": ['theta'],
103
+ "cmb_polarization_xi_imagMinus": ['theta'],
104
+ "cmb_polarization_xi_imagPlus": ['theta'],
105
+ "cmb_polarization_cosebi_e": [],
106
+ "cmb_polarization_cosebi_b": [],
107
+ "cmb_polarizationConvergence_cl_e": ['ell'],
108
+ "cmb_polarizationConvergence_cl_b": ['ell'],
109
+ "cmb_polarizationConvergence_xi_t": ['theta'],
110
+ "cmb_polarizationConvergence_xi_x": ['theta'],
111
+ "cmb_temperaturePolarization_cl_e": ['ell'],
112
+ "cmb_temperaturePolarization_cl_b": ['ell'],
113
+ "cmb_temperaturePolarization_xi_t": ['theta'],
114
+ "cmb_temperaturePolarization_xi_x": ['theta'],
115
+ "cmb_temperature_cl": ['ell'],
116
+ "cmb_temperatureConvergence_cl": ["ell"],
117
+ "cmb_temperatureConvergence_xi": ["theta"],
118
+ "galaxy_convergenceDensity_cl": ['ell'],
119
+ "galaxy_convergenceDensity_xi": ['theta'],
120
+ "galaxy_convergence_cl": ['ell'],
121
+ "galaxy_convergence_xi": ['theta'],
122
+ "galaxy_density_cl": ['ell'],
123
+ "galaxy_density_xi": ['theta'],
124
+ "galaxy_shearConvergence_cl_b": ['ell'],
125
+ "galaxy_shearConvergence_cl_e": ['ell'],
126
+ "galaxy_shearConvergence_xi_t": ['theta'],
127
+ "galaxy_shearConvergence_xi_x": ['theta'],
128
+ "galaxy_shearDensity_cl_b": ['ell'],
129
+ "galaxy_shearDensity_cl_e": ['ell'],
130
+ "galaxy_shearDensity_xi_t": ['theta'],
131
+ "galaxy_shearDensity_xi_x": ['theta'],
132
+ "galaxy_shear_cl_bb": ['ell'],
133
+ "galaxy_shear_cl_be": ['ell'],
134
+ "galaxy_shear_cl_eb": ['ell'],
135
+ "galaxy_shear_cl_ee": ['ell'],
136
+ "galaxy_shear_cosebi_bb": [],
137
+ "galaxy_shear_cosebi_ee": [],
138
+ "galaxy_shear_xi_minus": ['theta'],
139
+ "galaxy_shear_xi_plus": ['theta'],
140
+ "galaxy_shear_xi_imagMinus": ['theta'],
141
+ "galaxy_shear_xi_imagPlus": ['theta'],
142
+ }
143
+
144
+ required_tags = {**required_tags_concise, **required_tags_verbose}
145
+
146
+ parsedDataTypeName = namedtuple('parsedDataTypeName',
147
+ 'sources properties statistic subtype')
148
+
149
+
150
+ def parse_data_type_name(name):
151
+ """Parse a verbose data type name into its component parts
152
+ Verbose data type names take the form:
153
+ {sources}_{properties}_{statistic_type}[_{statistic_subtype}]
154
+ where sources and properties are camel-case if there is more
155
+ than one of them
156
+
157
+ Parameters
158
+ ----------
159
+ name: str
160
+ A data type name
161
+
162
+ Returns
163
+ -------
164
+ sources: list[str]
165
+ type(s) of astrophysical sources to which this applies
166
+
167
+ properties: list[str]
168
+ feature(s)/characterisic(s) of those sources/fields to
169
+ which the statistic applies
170
+
171
+ statistic_type: str
172
+ mathematical type of the statistic
173
+
174
+ statistic_subtype: str or None
175
+ optional additional specifier
176
+ """
177
+ parts = name.split("_")
178
+ if len(parts) == 3:
179
+ sources, properties, statistic = parts
180
+ subtype = None
181
+ elif len(parts) == 4:
182
+ sources, properties, statistic, subtype = parts
183
+ else:
184
+ raise ValueError("The supplied name is not a valid "
185
+ "verbose data type name (must have "
186
+ f"3 or 4 underscore-sparated parts): {name}")
187
+ sources = camel_case_split_and_lowercase(sources)
188
+ properties = camel_case_split_and_lowercase(properties)
189
+ return parsedDataTypeName(sources, properties, statistic, subtype)
190
+
191
+
192
+ def build_data_type_name(sources, properties, statistic, subtype=None):
193
+ """
194
+ Parameters
195
+ ----------
196
+ sources: str or list[str]
197
+ type(s) of astrophysical sources to which this applies
198
+
199
+ properties: str or list[str]
200
+ feature(s)/characterisic(s) of those sources/fields to
201
+ which the statistic applies
202
+
203
+ statistic_type: str
204
+ mathematical type of the statistic
205
+
206
+ statistic_subtype: str or None
207
+ optional additional specifier. Default is None
208
+
209
+ Returns
210
+ -------
211
+ name: str
212
+ Type name of the form:
213
+ {sources}_{properties}_{statistic_type}[_{statistic_subtype}]
214
+ """
215
+ if not isinstance(sources, str):
216
+ sources = "".join([sources[0]] + [s.lower().capitalize()
217
+ for s in sources[1:]])
218
+ if not isinstance(properties, str):
219
+ properties = "".join([properties[0]] + [s.lower().capitalize()
220
+ for s in properties[1:]])
221
+ if subtype:
222
+ return f"{sources}_{properties}_{statistic}_{subtype}"
223
+ else:
224
+ return f"{sources}_{properties}_{statistic}"
225
+
226
+
227
+ # This makes a namespace object, so you can do:
228
+ # standard_types.ggl_e == "ggl_e"
229
+ # also, for convenience, you can do standard_types.index('ggl_e')
230
+ # and 'ggl_e' in standard_types
231
+
232
+ standard_types = Namespace(*required_tags.keys())
233
+
234
+
235
+ class DataPoint:
236
+ """A class for a single data point (one scalar value).
237
+
238
+ Data points have a type, zero or more tracers, a value,
239
+ and any arbitrary tags that are stored in a dictionary,
240
+ and can be used to describe angular scales, window functions,
241
+ or any arbitrary information to be attached to the data.
242
+
243
+ Data points can be automatically created and added to a
244
+ Sacc object, so you don't normally nee to manually create them.
245
+
246
+ Parameters
247
+ -----------
248
+ data_type: str
249
+ A string, indicating the type of data point
250
+
251
+ tracers: tuple
252
+ Tuple of strings with the names of tracers to use
253
+
254
+ value: float
255
+ Mean value of this statistics
256
+
257
+ tags: dict
258
+ Dictionary of further data point metadata, such as binning
259
+ info, angles, etc.
260
+ """
261
+ def __init__(self, data_type, tracers, value,
262
+ ignore_missing_tags=False, **tags):
263
+ """Create a new data point.
264
+
265
+ Data points can be automatically created and added to a
266
+ Sacc object, so you don't normally nee to manually create them.
267
+
268
+ Parameters
269
+ ----------
270
+ data_type: str
271
+ A string, indicating the type of data point
272
+
273
+ tracers: tuple
274
+ Tuple of strings with the names of tracers to use
275
+
276
+ value: float
277
+ Mean value of this statistics
278
+
279
+ ignore_missing_tags: bool
280
+ Optional, default=False. If True, do not complain if a
281
+ tracer usually needed for this data type is not present.
282
+
283
+ **tags: dict[str:any]
284
+ Dictionary of further data point metadata, such as binning
285
+ info, angles, etc.
286
+ """
287
+ self.data_type = data_type
288
+ self.tracers = tracers
289
+ self.value = value
290
+ self.tags = tags
291
+ # Data types can have required tags which must be present.
292
+ # Check for those here
293
+ if (data_type in required_tags) and (not ignore_missing_tags):
294
+ for tag in required_tags[data_type]:
295
+ if tag not in tags:
296
+ raise ValueError("Tag {tag} required for data type "
297
+ f"{data_type} "
298
+ "(ignore_missing_tags=False)")
299
+
300
+ def __repr__(self):
301
+ t = ", ".join(f'{k}={v}' for (k, v) in self.tags.items())
302
+ st = f"DataPoint(data_type='{self.data_type}', "
303
+ st += f"tracers={self.tracers}, value={self.value}, {t})"
304
+ return st
305
+
306
+ def get_tag(self, tag, default=None):
307
+ """
308
+ Get the value of the the named tag, or None if not found.
309
+
310
+ Parameters
311
+ ----------
312
+ tag: str
313
+ Tag to find on the data point
314
+
315
+ default: any
316
+ Value to return if the tag is not found
317
+
318
+ Returns
319
+ -------
320
+ value: any
321
+ Value of the tag in this data point
322
+ """
323
+ return self.tags.get(tag, default)
324
+
325
+ def __getitem__(self, tag):
326
+ """
327
+ Get the value of the the named tag, raising an
328
+ error if it is not found
329
+
330
+ Parameters
331
+ ----------
332
+ tag: str
333
+ Tag to find on the data point
334
+
335
+ Returns
336
+ -------
337
+ value: any
338
+ Value of the tag in this data point
339
+ """
340
+ return self.tags[tag]
341
+
342
+ @staticmethod
343
+ def _choose_fields(data):
344
+ """
345
+ Internal static method to generate a list of colum names from a list
346
+ of data points. Since the data points can be heterogenous then this
347
+ is not quite trivial - we use the union of the tag names and tracer_0,
348
+ tracer_1, etc. up to the max number of tracers.
349
+ """
350
+ tags = set()
351
+ ntracer = 0
352
+ for d in data:
353
+ ntracer = max(ntracer, len(d.tracers))
354
+ tags.update(d.tags.keys())
355
+ tags = list(tags)
356
+ tracers = [f'tracer_{i}' for i in range(ntracer)]
357
+ return tracers, tags
358
+
359
+ @classmethod
360
+ def to_table(cls, data, lookups=None):
361
+ """
362
+ Convert a list of data points to a single homogenous table
363
+
364
+ Since data points can have varying tags, this method uses
365
+ null values to represent non-present tags.
366
+
367
+ Parameters
368
+ ----------
369
+ data: list
370
+ A list of DataPoint objects
371
+
372
+ lookups: dict
373
+ A dictionary of tags->dict showing replacements to make
374
+ in the tags. Default is empty.
375
+
376
+ Returns
377
+ -------
378
+ table: astropy.table.Table
379
+ table object containing data points
380
+ """
381
+ if lookups is None:
382
+ lookups = {}
383
+ # Get the names of the columns to generate
384
+ tracers, tags = cls._choose_fields(data)
385
+ names = tracers + ['value'] + tags
386
+ ntracer = len(tracers)
387
+ # Convert each data point to a row
388
+ rows = [d._make_row(tracers, tags, lookups) for d in data]
389
+
390
+ # Convert to a table and fiddle slightly.
391
+ table = Table(rows=rows, names=names)
392
+ table.meta['NTRACER'] = ntracer
393
+ hide_null_values(table)
394
+ return table
395
+
396
+ @classmethod
397
+ def from_table(cls, table, lookups=None):
398
+ """Convert a table back into a list of data points.
399
+
400
+ This method removes null values from the tags.
401
+
402
+ Parameters
403
+ ----------
404
+ table: astropy.table.Table
405
+ A table of data containing the tracers, values, and tags
406
+
407
+ lookups: dict
408
+ A dictionary of tags->dict showing replacements to make
409
+ in the tags. Default is empty.
410
+
411
+ Returns
412
+ -------
413
+ data: list
414
+ list of DataPoint objects
415
+ """
416
+ if lookups is None:
417
+ lookups = {}
418
+ # Get out required table metadata
419
+ nt = table.meta['NTRACER']
420
+ data_type = table.meta['SACCNAME']
421
+
422
+ # Tag names - we will remove missing tags below
423
+ tag_names = table.colnames[nt + 1:]
424
+ data = []
425
+ for row in table:
426
+ # Get basic data elements
427
+ tracers = tuple([row[f'tracer_{i}'] for i in range(nt)])
428
+ value = row['value']
429
+
430
+ # Deal with tags. First just pull out all remaining columns
431
+ tags = {name: row[name] for name in tag_names}
432
+ for k, v in list(tags.items()):
433
+ # Deal with any tags that we should replace.
434
+ # This is mainly used for Window instances.
435
+ if k in lookups:
436
+ tags[k] = lookups[k].get(v, v)
437
+ # Now delete and null values, as indicated by the
438
+ # sentinel above.
439
+ if ((hasattr(tags[k], 'dtype')) and
440
+ (v == null_values[tags[k].dtype.kind])):
441
+ del tags[k]
442
+ # Finally convert back to a data point and record
443
+ data_point = cls(data_type, tracers, value, **tags)
444
+ data.append(data_point)
445
+ return data
446
+
447
+ def _make_row(self, tracers, tags, lookups):
448
+ """
449
+ Turn this data point into a list with specified tracers and tags.
450
+ If some tracers or tags are missing (homogenous data set) then
451
+ use blank values or Nones for them.
452
+ """
453
+ nt = len(tracers)
454
+ missing = nt - len(self.tracers)
455
+ row = list(self.tracers) + ["" for i in range(missing)]
456
+ row.append(self.value)
457
+ for t in tags:
458
+ v = self.tags.get(t)
459
+ lookup = lookups.get(t)
460
+ if lookup is not None:
461
+ v = lookup.get(v, v)
462
+ row.append(v)
463
+ return row