sacc 1.0.2__py3-none-any.whl → 2.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sacc/sacc.py CHANGED
@@ -1,18 +1,18 @@
1
1
  import copy
2
- import warnings
3
2
  import os
4
- from io import BytesIO
3
+ import re
4
+ import warnings
5
5
 
6
- import numpy as np
7
6
  from astropy.io import fits
8
7
  from astropy.table import Table
8
+ import numpy as np
9
9
 
10
10
  from .tracers import BaseTracer
11
- from .windows import BaseWindow, BandpowerWindow
11
+ from .windows import BandpowerWindow
12
12
  from .covariance import BaseCovariance, concatenate_covariances
13
13
  from .utils import unique_list
14
14
  from .data_types import standard_types, DataPoint
15
-
15
+ from . import io
16
16
 
17
17
  class Sacc:
18
18
  """
@@ -29,6 +29,7 @@ class Sacc:
29
29
  self.tracers = {}
30
30
  self.covariance = None
31
31
  self.metadata = {}
32
+ self.tracer_uncertainties = {}
32
33
 
33
34
  def __len__(self):
34
35
  """
@@ -41,6 +42,48 @@ class Sacc:
41
42
  """
42
43
  return len(self.data)
43
44
 
45
+ def __eq__(self, other):
46
+ """
47
+ Test for equality between two Sacc instances.
48
+
49
+ Checks whether the two values are equal. This is a
50
+ complete equality check, and will check that the data points,
51
+ tracers, covariance and metadata are all the same.
52
+
53
+ Parameters
54
+ ----------
55
+ other: Sacc instance
56
+ The other data set to compare with
57
+
58
+ Returns
59
+ -------
60
+ equal: bool
61
+ True if the two data sets are the same, False otherwise.
62
+ """
63
+ if not isinstance(other, Sacc):
64
+ return False
65
+
66
+ if self.data != other.data:
67
+ return False
68
+
69
+ if len(self.tracers) != len(other.tracers):
70
+ return False
71
+ if set(self.tracers.keys()) != set(other.tracers.keys()):
72
+ return False
73
+ for k1, v1 in self.tracers.items():
74
+ v2 = other.tracers[k1]
75
+ if not v1 == v2:
76
+ return False
77
+
78
+ if self.covariance != other.covariance:
79
+ return False
80
+
81
+ if self.metadata != other.metadata:
82
+ return False
83
+
84
+ return True
85
+
86
+
44
87
  def copy(self):
45
88
  """
46
89
  Create a copy of the data set with no data shared with the original.
@@ -72,10 +115,9 @@ class Sacc:
72
115
  # Otherwise just use whatever we have.
73
116
  if 'ell' in row.tags:
74
117
  return (dt, row.tracers, row.tags['ell'])
75
- elif 'theta' in row.tags:
118
+ if 'theta' in row.tags:
76
119
  return (dt, row.tracers, row.tags['theta'])
77
- else:
78
- return (dt, row.tracers, 0.0)
120
+ return (dt, row.tracers, 0.0)
79
121
  # This from
80
122
  # https://stackoverflow.com/questions/6422700/how-to-get-indices-of-a-sorted-array-in-python
81
123
  indices = [i[0] for i in sorted(enumerate(self.data),
@@ -105,14 +147,14 @@ class Sacc:
105
147
  # Builder methods for building up Sacc data from scratch in memory
106
148
  #
107
149
 
108
- def add_tracer(self, tracer_type, name,
150
+ def add_tracer(self, type_name, name,
109
151
  *args, **kwargs):
110
152
  """
111
153
  Add a new tracer
112
154
 
113
155
  Parameters
114
156
  ----------
115
- tracer_type: str
157
+ type_name: str
116
158
  A string corresponding to one of the known tracer types,
117
159
  or 'misc' to use a new tracer with no parameters.
118
160
  e.g. "NZ" for n(z) tracers
@@ -135,7 +177,7 @@ class Sacc:
135
177
  None
136
178
 
137
179
  """
138
- tracer = BaseTracer.make(tracer_type, name,
180
+ tracer = BaseTracer.make(type_name, name,
139
181
  *args, **kwargs)
140
182
  self.add_tracer_object(tracer)
141
183
 
@@ -152,6 +194,17 @@ class Sacc:
152
194
  """
153
195
  self.tracers[tracer.name] = tracer
154
196
 
197
+ def add_tracer_uncertainty_object(self, uncertainty):
198
+ """
199
+ Add a pre-constructed tracer uncertainty object to this data set.
200
+
201
+ Parameters
202
+ ----------
203
+ uncertainty: BaseTracerUncertainty instance
204
+ The uncertainty object to add to the data set
205
+ """
206
+ self.tracer_uncertainties[uncertainty.name] = uncertainty
207
+
155
208
  def add_data_point(self, data_type, tracers, value,
156
209
  tracers_later=False, **tags):
157
210
  """
@@ -347,7 +400,7 @@ class Sacc:
347
400
  # Skip things with the wrong type or tracer
348
401
  if not ((tracers is None) or (d.tracers == tracers)):
349
402
  continue
350
- if not ((data_type is None or d.data_type == data_type)):
403
+ if not (data_type is None or d.data_type == data_type):
351
404
  continue
352
405
  # Remove any objects that don't match the required tags,
353
406
  # including the fact that we can specify tag__lt and tag__gt
@@ -744,6 +797,7 @@ class Sacc:
744
797
  for tri in trs:
745
798
  if tri not in names:
746
799
  self.remove_selection(tracers=trs)
800
+ break
747
801
 
748
802
  trs_names = list(self.tracers.keys())
749
803
  for name in trs_names:
@@ -811,10 +865,48 @@ class Sacc:
811
865
  # Convert any window objects in the data set to tables,
812
866
  # and record a mapping from those objects to table references
813
867
  # This could easily be extended to other types
814
- all_windows = unique_list(d.get_tag('window') for d in self.data)
815
- window_ids = {w: id(w) for w in all_windows}
816
- tables = BaseWindow.to_tables(all_windows)
817
- return tables, window_ids
868
+ windows = []
869
+ for d in self.data:
870
+ w = d.get_tag("window")
871
+ if w is not None:
872
+ windows.append(w)
873
+
874
+ windows = unique_list(windows)
875
+ window_ids = {id(w):w for w in windows}
876
+ return window_ids
877
+
878
+ def to_tables(self):
879
+ """
880
+ Convert this data set to a collection of astropy tables.
881
+
882
+ Parameters
883
+ ----------
884
+ None
885
+
886
+ Returns
887
+ -------
888
+ tables: list of astropy Table objects
889
+ A list of tables, each corresponding to a different
890
+ type of object in the data set. The tables will have
891
+ metadata that can be used to reconstruct the data set.
892
+ """
893
+ # Get the tracers
894
+ objects = {
895
+ "tracer": self.tracers,
896
+ "data": self.data,
897
+ "window": self._make_window_tables(),
898
+ "metadata": self.metadata,
899
+ "traceruncertainty": self.tracer_uncertainties,
900
+ }
901
+
902
+ if self.has_covariance():
903
+ # For now the name will just be "cov", but in future
904
+ # we may support alternatives.
905
+ objects["covariance"] = {self.covariance.name: self.covariance}
906
+
907
+ tables = io.to_tables(objects)
908
+
909
+ return tables
818
910
 
819
911
  def save_fits(self, filename, overwrite=False):
820
912
  """
@@ -830,71 +922,30 @@ class Sacc:
830
922
  If True, overwrite the file silently.
831
923
  """
832
924
 
833
- # Since we don't want to re-order the file as a side effect
834
- # we first make a copy of ourself and re-order that.
835
- # Tables for the windows
836
- tables, window_ids = self._make_window_tables()
837
- lookup = {'window': window_ids}
925
+ if os.path.exists(filename) and not overwrite:
926
+ raise FileExistsError(f"File {filename} already exists. "
927
+ "Use overwrite=True to overwrite it.")
838
928
 
839
- # Tables for the tracers
840
- tables += BaseTracer.to_tables(self.tracers.values())
929
+ tables = self.to_tables()
841
930
 
842
- # Tables for the data sets
843
- for dt in self.get_data_types():
844
- indices = self.indices(dt)
845
- data = [self.data[i] for i in indices]
846
- table = DataPoint.to_table(data, lookup)
847
- table.add_column(indices, name='sacc_ordering')
848
- # Could move this inside to_table?
849
- table.meta['SACCTYPE'] = 'data'
850
- table.meta['SACCNAME'] = dt
851
- table.meta['EXTNAME'] = f'data:{dt}'
852
- tables.append(table)
931
+ # Add the EXTNAME metadata value to each table.
932
+ # This is used to set the HDU name in the FITS file.
933
+ for table in tables:
934
+ typ = table.meta['SACCTYPE']
935
+ name = table.meta['SACCNAME']
936
+ if typ != 'data':
937
+ cls = table.meta['SACCCLSS']
938
+ extname = f'{typ}:{cls}:{name}'
939
+ table.meta['EXTNAME'] = extname
853
940
 
854
941
  # Create the actual fits object
855
- hdr = fits.Header()
856
-
857
- # save any global metadata in the header.
858
- # We save the keys and values as separate header cards,
859
- # because otherwise the keys are all forced to upper case
860
- hdr['NMETA'] = len(self.metadata)
861
- for i, (k, v) in enumerate(self.metadata.items()):
862
- hdr[f'KEY{i}'] = k
863
- hdr[f'VAL{i}'] = v
864
- hdus = [fits.PrimaryHDU(header=hdr)] + \
865
- [fits.table_to_hdu(table) for table in tables]
866
-
867
- # Covariance, if needed.
868
- # All the other data elements become astropy tables first,
869
- # But covariances are a bit more complicated and dense, so we
870
- # allow them to convert straight to
871
- if self.covariance is not None:
872
- hdus.append(self.covariance.to_hdu())
873
-
874
- # Make and save the final FITS data
942
+ primary_header = fits.Header()
943
+ with warnings.catch_warnings():
944
+ warnings.simplefilter("ignore", category=fits.verify.VerifyWarning)
945
+ hdus = [fits.PrimaryHDU(header=primary_header)] + \
946
+ [fits.table_to_hdu(table) for table in tables]
875
947
  hdu_list = fits.HDUList(hdus)
876
-
877
- # The astropy writeto shows very poor performance
878
- # when writing lots of small metadata strings on
879
- # the NERSC Lustre file system. So we write to
880
- # a buffer first and then save that.
881
-
882
- # First we have to manually check for overwritten files
883
- # We raise the same error as astropy
884
- if os.path.exists(filename) and not overwrite:
885
- raise OSError(f"File {filename} already exists and overwrite=False")
886
-
887
- # Create the buffer and write the data to it
888
- buf = BytesIO()
889
- hdu_list.writeto(buf)
890
-
891
- # Rewind and read the binary data we just wrote
892
- buf.seek(0)
893
- output_data = buf.read()
894
-
895
- # Write the binary data to the target file
896
- with open(filename, "wb") as f:
897
- f.write(output_data)
948
+ io.astropy_buffered_fits_write(filename, hdu_list)
898
949
 
899
950
  @classmethod
900
951
  def load_fits(cls, filename):
@@ -909,87 +960,195 @@ class Sacc:
909
960
  filename: str
910
961
  A FITS format sacc file
911
962
  """
912
- hdu_list = fits.open(filename, "readonly")
913
-
914
- # Split the HDU's into the different sacc types
915
- tracer_tables = [Table.read(hdu)
916
- for hdu in hdu_list
917
- if hdu.header.get('SACCTYPE') == 'tracer']
918
- window_tables = [Table.read(hdu)
919
- for hdu in hdu_list
920
- if hdu.header.get('SACCTYPE') == 'window']
921
- data_tables = [Table.read(hdu) for hdu in hdu_list
922
- if hdu.header.get('SACCTYPE') == 'data']
923
- cov = [hdu for hdu in hdu_list if hdu.header.get('SACCTYPE') == 'cov']
924
-
925
- # Pull out the classes for these components.
926
- tracers = BaseTracer.from_tables(tracer_tables)
927
- windows = BaseWindow.from_tables(window_tables)
928
-
929
- # The lookup table is used to convert from ID numbers to
930
- # Window objects.
931
- lookup = {'window': windows}
932
-
933
- # Check if all tables have the 'sacc_ordering' column
934
- if not all("sacc_ordering" in table.colnames for table in data_tables):
935
- warnings.warn(
936
- "The FITS format without the 'sacc_ordering' column is deprecated. "
937
- "Assuming data rows are in the correct order as it was before version 1.0."
938
- )
939
- last_index = 0
940
- for table in data_tables:
941
- # Create a sequential order assuming rows are stored contiguously
942
- order = range(last_index, last_index + len(table))
943
- # Update last_index for the next table
944
- last_index += len(table)
945
- # Add the 'sacc_ordering' column to the table
946
- table.add_column(order, name="sacc_ordering")
947
-
948
- # Collect together all the data points from the different sections
949
- data_unordered = []
950
- index = []
951
- for table in data_tables:
952
- index += table["sacc_ordering"].tolist()
953
- table.remove_column('sacc_ordering')
954
- data_unordered += DataPoint.from_table(table, lookup)
955
-
956
- # Put the data back in its original order, matching the
957
- # covariance.
958
- data = [None for i in range(len(data_unordered))]
959
- for i, d in zip(index, data_unordered):
960
- data[i] = d
961
-
962
- # Finally, take all the pieces that we have collected
963
- # and add them all into this data set.
964
- S = cls()
963
+ cov = None
964
+ metadata = None
965
+
966
+ with fits.open(filename, mode="readonly") as f:
967
+ tables = []
968
+ for hdu in f:
969
+ if hdu.name.lower() == 'primary':
970
+ # The primary table is not a data table,
971
+ # but in older files it was used to store metadata
972
+ header = hdu.header
973
+ if "NMETA" in header:
974
+ metadata = {}
975
+ # Older format metadata is kept in the primary
976
+ # header, with keys KEY0, VAL0, etc.
977
+ n_meta = header['NMETA']
978
+ for i in range(n_meta):
979
+ k = header[f'KEY{i}']
980
+ v = header[f'VAL{i}']
981
+ metadata[k] = v
982
+ elif hdu.name.lower() == 'covariance':
983
+ # Legacy covariance - HDU will just be called covariance
984
+ # instead of the full name given by BaseIO.
985
+ # Note that this will also allow us to use multiple
986
+ # covariances in future.
987
+ cov = BaseCovariance.from_hdu(hdu)
988
+ else:
989
+ tables.append(Table.read(hdu))
990
+
991
+ # add the metadata table, if we are in the legacy format
992
+ if metadata is not None:
993
+ tables.append(io.metadata_to_table(metadata))
994
+
995
+ return cls.from_tables(tables, cov=cov)
996
+
997
+ def save_hdf5(self, filename, overwrite=False, compression='gzip', compression_opts=4):
998
+ """
999
+ Save this data to a HDF5 format Sacc file.
1000
+
1001
+ Parameters
1002
+ ----------
1003
+ filename: str
1004
+ Destination HDF5 file name
1005
+ overwrite: bool
1006
+ If False (the default), raise an error if the file already exists
1007
+ If True, overwrite the file silently.
1008
+ compression: str, optional
1009
+ Compression filter to use ('gzip', 'lzf', 'szip', or None). Default is 'gzip'.
1010
+ compression_opts : int, optional
1011
+ Compression level (0-9 for gzip, where 0 is no compression and 9 is maximum).
1012
+ Default is 4 (moderate compression).
1013
+ """
1014
+ import h5py
1015
+ if os.path.exists(filename) and not overwrite:
1016
+ raise FileExistsError(f"File {filename} already exists. "
1017
+ "Use overwrite=True to overwrite it.")
1018
+ tables = self.to_tables()
1019
+
1020
+ # Add the EXTNAME metadata value to each table.
1021
+ for table in tables:
1022
+ typ = table.meta['SACCTYPE']
1023
+ name = table.meta['SACCNAME']
1024
+ if typ != 'data':
1025
+ cls = table.meta['SACCCLSS']
1026
+ extname = f'{typ}:{cls}:{name}'
1027
+ table.meta['EXTNAME'] = extname
1028
+
1029
+ with h5py.File(filename, 'w') as f:
1030
+ used_names = {}
1031
+ for table in tables:
1032
+ # Build a meaningful dataset name
1033
+ typ = table.meta.get('SACCTYPE', 'unknown')
1034
+ name = table.meta.get('SACCNAME', None)
1035
+ cls = table.meta.get('SACCCLSS', None)
1036
+ part = table.meta.get('SACCPART', None)
1037
+
1038
+ # Compose base dataset name
1039
+ if typ == 'data' and name:
1040
+ dset_name = f"data/{name}"
1041
+ elif typ == 'tracer' and name:
1042
+ dset_name = f"tracer/{name}"
1043
+ elif typ == 'traceruncertainty' and name:
1044
+ dset_name = f"traceruncertainty/{name}"
1045
+ elif typ == 'window' and name:
1046
+ dset_name = f"window/{name}"
1047
+ if part:
1048
+ dset_name += f"_{part}"
1049
+ elif typ == 'covariance' and name:
1050
+ dset_name = f"covariance_{name}"
1051
+ elif typ == 'metadata':
1052
+ dset_name = "metadata"
1053
+ elif name:
1054
+ dset_name = f"{typ}_{name}"
1055
+ else:
1056
+ dset_name = typ
1057
+
1058
+ # Ensure uniqueness by appending an index if needed
1059
+ base_name = dset_name
1060
+ idx = used_names.get(base_name, 0)
1061
+ while dset_name in f:
1062
+ idx += 1
1063
+ dset_name = f"{base_name}_{idx}"
1064
+ used_names[base_name] = idx
1065
+
1066
+ table.write(f,
1067
+ path=dset_name,
1068
+ serialize_meta=False,
1069
+ compression=compression,
1070
+ compression_opts=compression_opts
1071
+ )
1072
+
1073
+ @classmethod
1074
+ def load_hdf5(cls, filename):
1075
+ """
1076
+ Load a Sacc object from an HDF5 file.
1077
+
1078
+ Parameters
1079
+ ----------
1080
+ filename: str
1081
+ Path to the HDF5 file.
1082
+
1083
+ Returns
1084
+ -------
1085
+ sacc_obj: Sacc
1086
+ A Sacc object reconstructed from the tables in the HDF5 file.
1087
+ """
1088
+ import h5py
1089
+ recovered_tables = []
1090
+ with h5py.File(filename, 'r') as f:
1091
+ # Read all datasets (not groups) in the order they appear
1092
+ for key in f.keys():
1093
+ item = f[key]
1094
+ if isinstance(item, h5py.Dataset):
1095
+ table = Table.read(f, path=key)
1096
+ recovered_tables.append(table)
1097
+ elif isinstance(item, h5py.Group):
1098
+ for subkey in item.keys():
1099
+ subitem = item[subkey]
1100
+ if isinstance(subitem, h5py.Dataset):
1101
+ table = Table.read(item, path=f"{subkey}")
1102
+ recovered_tables.append(table)
1103
+ sacc_obj = cls.from_tables(recovered_tables)
1104
+ return sacc_obj
1105
+
1106
+ @classmethod
1107
+ def from_tables(cls, tables, cov=None):
1108
+ """
1109
+ Reassmble a Sacc object from a collection of tables.
1110
+
1111
+ Parameters
1112
+ ----------
1113
+ objs: dict[str, dict[str, BaseIO]]
1114
+ A dictionary of objects, with some of 'tracer', 'data', 'window',
1115
+ and 'covariance'. Each key maps to a list of objects
1116
+ or a single object.
1117
+ """
1118
+ s = cls()
1119
+
1120
+ objs = io.from_tables(tables)
1121
+
1122
+ # Add all the tracers
1123
+ tracers = objs.get('tracer', {})
965
1124
  for tracer in tracers.values():
966
- S.add_tracer_object(tracer)
1125
+ s.add_tracer_object(tracer)
967
1126
 
968
- # Add the data points manually instead of using the API, since we
969
- # have already constructed them.
1127
+ # Add the actual data points. The windows and any future
1128
+ # objects that are attached to individual data points
1129
+ # will be included in the data points themselves, there is
1130
+ # no need to add them separately.
1131
+ data = fix_data_ordering(objs.get('data', []))
970
1132
  for d in data:
971
- S.data.append(d)
1133
+ s.data.append(d)
972
1134
 
973
- # Assume there is only a single covariance extension,
974
- # if there are any
975
- if cov:
976
- S.add_covariance(BaseCovariance.from_hdu(cov[0]))
1135
+ # Add the covariance, if it is present.
1136
+ if "covariance" in objs:
1137
+ if cov is not None:
1138
+ raise ValueError("Found both a legacy covariance and a new one in the same file.")
1139
+ cov = objs["covariance"]["cov"]
977
1140
 
978
- # Load metadata from the primary heaer
979
- header = hdu_list[0].header
1141
+ # copy in metadata
1142
+ s.metadata.update(objs.get('metadata', {}))
980
1143
 
981
- # Load each key,value pair in turn.
982
- # This will work for normal scalar data types;
983
- # arrays etc. will need some thought.
984
- n_meta = header['NMETA']
985
- for i in range(n_meta):
986
- k = header[f'KEY{i}']
987
- v = header[f'VAL{i}']
988
- S.metadata[k] = v
1144
+ if cov is not None:
1145
+ s.add_covariance(cov)
989
1146
 
990
- hdu_list.close()
1147
+ for uncertainty in objs.get('traceruncertainty', {}).values():
1148
+ s.add_tracer_uncertainty_object(uncertainty)
1149
+
1150
+ return s
991
1151
 
992
- return S
993
1152
 
994
1153
  #
995
1154
  # Methods below here are helper functions for specific types of data.
@@ -1012,13 +1171,10 @@ class Sacc:
1012
1171
  cov_block = self.covariance.get_block(ind)
1013
1172
  if return_ind:
1014
1173
  return angle, mu, cov_block, ind
1015
- else:
1016
- return angle, mu, cov_block
1017
- else:
1018
- if return_ind:
1019
- return angle, mu, ind
1020
- else:
1021
- return angle, mu
1174
+ return angle, mu, cov_block
1175
+ if return_ind:
1176
+ return angle, mu, ind
1177
+ return angle, mu
1022
1178
 
1023
1179
  def get_bandpower_windows(self, indices):
1024
1180
  """
@@ -1047,10 +1203,8 @@ class Sacc:
1047
1203
  if not isinstance(ws, BandpowerWindow):
1048
1204
  warnings.warn("No bandpower windows associated with these data")
1049
1205
  return None
1050
- else:
1051
- w_inds = np.array(self._get_tags_by_index(['window_ind'],
1052
- indices)[0])
1053
- return ws.get_section(w_inds)
1206
+ w_inds = np.array(self._get_tags_by_index(['window_ind'],indices)[0])
1207
+ return ws.get_section(w_inds)
1054
1208
 
1055
1209
  def get_ell_cl(self, data_type, tracer1, tracer2,
1056
1210
  return_cov=False, return_ind=False):
@@ -1151,7 +1305,7 @@ class Sacc:
1151
1305
  tracers_later=tracers_later, **t)
1152
1306
  return
1153
1307
  # multiple ell/theta values but same bin
1154
- elif np.isscalar(tracer1):
1308
+ if np.isscalar(tracer1):
1155
1309
  n1 = len(x)
1156
1310
  n2 = len(tag_val)
1157
1311
  if tag_extra_name is None:
@@ -1159,7 +1313,7 @@ class Sacc:
1159
1313
  n3 = n1
1160
1314
  else:
1161
1315
  n3 = len(tag_extra)
1162
- if not (n1 == n2 == n3):
1316
+ if not n1 == n2 == n3:
1163
1317
  raise ValueError("Length of inputs do not match in"
1164
1318
  f"added 2pt data ({n1},{n2},{n3})")
1165
1319
  if window is None:
@@ -1184,7 +1338,7 @@ class Sacc:
1184
1338
  n5 = n1
1185
1339
  else:
1186
1340
  n5 = len(tag_extra)
1187
- if not (n1 == n2 == n3 == n4 == n5):
1341
+ if not n1 == n2 == n3 == n4 == n5:
1188
1342
  raise ValueError("Length of inputs do not match in "
1189
1343
  f"added 2pt data ({n1},{n2},{n3},{n4},{n5})")
1190
1344
  if window is None:
@@ -1213,7 +1367,7 @@ class Sacc:
1213
1367
  n6 = n1
1214
1368
  else:
1215
1369
  n6 = len(tag_extra)
1216
- if not (n1 == n2 == n3 == n4 == n5 == n6):
1370
+ if not n1 == n2 == n3 == n4 == n5 == n6:
1217
1371
  raise ValueError("Length of inputs do not match in added "
1218
1372
  f"2pt data ({n1},{n2},{n3},{n4},{n5},{n6})")
1219
1373
  if window is None:
@@ -1471,3 +1625,57 @@ def concatenate_data_sets(*data_sets, labels=None, same_tracers=None):
1471
1625
  output.metadata[key] = val
1472
1626
 
1473
1627
  return output
1628
+
1629
+
1630
+
1631
+
1632
+ def fix_data_ordering(data_points):
1633
+ """
1634
+ SACC data points have an ordering column called 'sacc_ordering'
1635
+ which is used to keep the data points in the same order as
1636
+ the covariance matrix. This function re-orders the data points
1637
+ accordingly
1638
+
1639
+ Parameters
1640
+ ----------
1641
+ data_points: list of DataPoint objects
1642
+
1643
+ Returns
1644
+ -------
1645
+ ordered_data_points: list of DataPoint objects
1646
+
1647
+ """
1648
+ # Older versions of SACC did not have this column, so we
1649
+ # check for that situation and if not then add it here, in the
1650
+ # order the data points were found in the file.
1651
+ # In the old sacc version this order automatically matched the
1652
+ # covariance matrix.
1653
+ have_ordering = ['sacc_ordering' in dp.tags for dp in data_points]
1654
+ if not all(have_ordering):
1655
+
1656
+ if any(have_ordering):
1657
+ raise ValueError(
1658
+ "Some data points have sacc ordering and some do not. "
1659
+ "Hybrid old/new version. This is very wrong. "
1660
+ "Please check your data files or ask on #desc-sacc for help."
1661
+ )
1662
+
1663
+ print("Warning: The FITS format without the 'sacc_ordering' column is deprecated")
1664
+ print("Assuming data rows are in the correct order as it was before version 1.0.")
1665
+ for i, dp in enumerate(data_points):
1666
+ dp.tags['sacc_ordering'] = i
1667
+
1668
+
1669
+
1670
+ # In either case, we now have the 'sacc_ordering' column,
1671
+ # so can re-order the data points.
1672
+ ordered_data_points = [None for i in range(len(data_points))]
1673
+ for dp in data_points:
1674
+ i = dp.tags['sacc_ordering']
1675
+ ordered_data_points[i] = dp
1676
+
1677
+ # We remove the ordering tag now, as it is not needed
1678
+ # in the main library
1679
+ del dp.tags['sacc_ordering']
1680
+
1681
+ return ordered_data_points