galform-analysis 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. galform_analysis/__init__.py +86 -0
  2. galform_analysis/analysis/__init__.py +47 -0
  3. galform_analysis/analysis/aggregation.py +314 -0
  4. galform_analysis/analysis/correlation/__init__.py +68 -0
  5. galform_analysis/analysis/correlation/correlation.py +508 -0
  6. galform_analysis/analysis/correlation/dm_correlation.py +681 -0
  7. galform_analysis/analysis/correlation/galaxy_bias.py +55 -0
  8. galform_analysis/analysis/correlation/n_point_bruteforce.py +119 -0
  9. galform_analysis/analysis/correlation/satellite_cross_correlation.py +255 -0
  10. galform_analysis/analysis/correlation/scope_wrapper.py +21 -0
  11. galform_analysis/analysis/correlation/subvol_weighted_correction.py +693 -0
  12. galform_analysis/analysis/correlation/three_point_bruteforce.py +92 -0
  13. galform_analysis/analysis/correlation/three_point_reference.py +95 -0
  14. galform_analysis/analysis/correlation/three_point_scope.py +156 -0
  15. galform_analysis/analysis/mass_functions/__init__.py +53 -0
  16. galform_analysis/analysis/mass_functions/hmf.py +373 -0
  17. galform_analysis/analysis/mass_functions/hod.py +570 -0
  18. galform_analysis/analysis/mass_functions/smf.py +295 -0
  19. galform_analysis/analysis/mass_functions/theoretical_hmf.py +566 -0
  20. galform_analysis/analysis/redshift_space_distortions/__init__.py +1 -0
  21. galform_analysis/analysis/redshift_space_distortions/subvol_weighted_multipoles.py +429 -0
  22. galform_analysis/config.py +230 -0
  23. galform_analysis/readers/__init__.py +17 -0
  24. galform_analysis/readers/loaders.py +289 -0
  25. galform_analysis/redshift_lists/COLIBRE-L100m6.txt +3 -0
  26. galform_analysis/redshift_lists/FLAMINGO-L1000N1800.txt +78 -0
  27. galform_analysis/redshift_lists/L800.txt +253 -0
  28. galform_analysis/redshift_lists/Mill1.txt +2 -0
  29. galform_analysis/redshift_lists/Mill2.txt +2 -0
  30. galform_analysis/sim_configs/COLIBRE.json +14 -0
  31. galform_analysis/sim_configs/FLAMINGO.json +14 -0
  32. galform_analysis/sim_configs/L800.json +14 -0
  33. galform_analysis/sim_configs/Mill1.json +14 -0
  34. galform_analysis/sim_configs/Mill2.json +14 -0
  35. galform_analysis/utils/__init__.py +17 -0
  36. galform_analysis/utils/matplotlib_config.py +115 -0
  37. galform_analysis/utils/read_galaxies.py +357 -0
  38. galform_analysis/utils/stats.py +77 -0
  39. galform_analysis-0.1.0.dist-info/METADATA +133 -0
  40. galform_analysis-0.1.0.dist-info/RECORD +43 -0
  41. galform_analysis-0.1.0.dist-info/WHEEL +5 -0
  42. galform_analysis-0.1.0.dist-info/licenses/LICENSE +21 -0
  43. galform_analysis-0.1.0.dist-info/top_level.txt +1 -0
@@ -0,0 +1,86 @@
1
+ """galform_analysis - A Python library for GALFORM simulation analysis.
2
+
3
+ This library provides tools for analyzing GALFORM galaxy formation simulation outputs,
4
+ including:
5
+ - Reading HDF5 snapshot data
6
+ - Computing mass functions (stellar and halo)
7
+ - Aggregating data across subvolumes
8
+
9
+ Quick Start:
10
+ >>> from config import set_base_dir
11
+ >>> from analysis import avg_hmf_given_redshift_and_subvolumes
12
+ >>> from analysis import avg_smf_given_redshift_and_subvolumes
13
+ >>>
14
+ >>> # Set your GALFORM output directory
15
+ >>> set_base_dir('/path/to/galform/output')
16
+ >>>
17
+ >>> # Compute stellar mass function
18
+ >>> smf = avg_smf_given_redshift_and_subvolumes(iz_num=99, ivols=[0, 1, 2])
19
+
20
+ Configuration:
21
+ Set the BASE_DIR for your GALFORM outputs:
22
+ - Via Python: config.set_base_dir('/path')
23
+ - Via environment: export GALFORM_BASE_DIR=/path
24
+ - Edit config.py directly
25
+ """
26
+
27
+ __version__ = "0.1.0"
28
+
29
+ # Import key modules for convenience
30
+ from galform_analysis import analysis, config
31
+ from galform_analysis.analysis import (
32
+ aggregate_snapshot,
33
+ avg_hmf_given_redshift_and_subvolumes,
34
+ avg_hmf_given_redshifts_and_subvolume,
35
+ avg_smf_given_redshift_and_subvolumes,
36
+ avg_smf_given_redshifts_and_subvolume,
37
+ # HMF functions
38
+ hmf_given_redshift_and_subvolume,
39
+ hmfs_given_redshifts_and_subvolume,
40
+ # SMF functions
41
+ smf_given_redshift_and_subvolume,
42
+ smfs_given_redshifts_and_subvolume,
43
+ )
44
+
45
+ # Expose commonly used functions at package level
46
+ from galform_analysis.config import (
47
+ Cosmology,
48
+ SimulationConfig,
49
+ find_snapshot_at_redshift,
50
+ get_base_dir,
51
+ get_snapshot_redshift,
52
+ load_redshift_mapping,
53
+ load_sim_config,
54
+ set_base_dir,
55
+ )
56
+ from galform_analysis.readers import close_snapshot, read_snapshot_data
57
+
58
+ __all__ = [
59
+ "__version__",
60
+ # Submodules
61
+ "config",
62
+ "io",
63
+ "analysis",
64
+ # Common functions
65
+ "set_base_dir",
66
+ "get_base_dir",
67
+ "Cosmology",
68
+ "load_sim_config",
69
+ "SimulationConfig",
70
+ "load_redshift_mapping",
71
+ "get_snapshot_redshift",
72
+ "find_snapshot_at_redshift",
73
+ "read_snapshot_data",
74
+ "close_snapshot",
75
+ "aggregate_snapshot",
76
+ # HMF functions
77
+ "hmf_given_redshift_and_subvolume",
78
+ "hmfs_given_redshifts_and_subvolume",
79
+ "avg_hmf_given_redshift_and_subvolumes",
80
+ "avg_hmf_given_redshifts_and_subvolume",
81
+ # SMF functions
82
+ "smf_given_redshift_and_subvolume",
83
+ "smfs_given_redshifts_and_subvolume",
84
+ "avg_smf_given_redshift_and_subvolumes",
85
+ "avg_smf_given_redshifts_and_subvolume",
86
+ ]
@@ -0,0 +1,47 @@
1
+ """Analysis subpackage for GALFORM data processing."""
2
+
3
+ from .aggregation import aggregate_snapshot, completed_galaxies, incomplete_subvolumes
4
+ from .correlation import (
5
+ avg_correlation_given_redshift_and_subvolumes,
6
+ compute_xi_corrfunc,
7
+ correlation_given_redshift_and_subvolume,
8
+ )
9
+ from .mass_functions import (
10
+ avg_hmf_given_redshift_and_subvolumes,
11
+ avg_hmf_given_redshifts_and_subvolume,
12
+ avg_hod_given_redshift_and_subvolumes,
13
+ avg_hod_given_redshifts_and_subvolume,
14
+ avg_smf_given_redshift_and_subvolumes,
15
+ avg_smf_given_redshifts_and_subvolume,
16
+ hmf_given_redshift_and_subvolume,
17
+ hmfs_given_redshifts_and_subvolume,
18
+ hod_given_redshift_and_subvolume,
19
+ hods_given_redshifts_and_subvolume,
20
+ smf_given_redshift_and_subvolume,
21
+ smfs_given_redshifts_and_subvolume,
22
+ )
23
+
24
+ __all__ = [
25
+ "aggregate_snapshot",
26
+ "completed_galaxies",
27
+ "incomplete_subvolumes",
28
+ # HMF functions
29
+ "hmf_given_redshift_and_subvolume",
30
+ "hmfs_given_redshifts_and_subvolume",
31
+ "avg_hmf_given_redshift_and_subvolumes",
32
+ "avg_hmf_given_redshifts_and_subvolume",
33
+ # SMF functions
34
+ "smf_given_redshift_and_subvolume",
35
+ "smfs_given_redshifts_and_subvolume",
36
+ "avg_smf_given_redshift_and_subvolumes",
37
+ "avg_smf_given_redshifts_and_subvolume",
38
+ # HOD functions
39
+ "hod_given_redshift_and_subvolume",
40
+ "hods_given_redshifts_and_subvolume",
41
+ "avg_hod_given_redshift_and_subvolumes",
42
+ "avg_hod_given_redshifts_and_subvolume",
43
+ # Correlation functions
44
+ "compute_xi_corrfunc",
45
+ "correlation_given_redshift_and_subvolume",
46
+ "avg_correlation_given_redshift_and_subvolumes",
47
+ ]
@@ -0,0 +1,314 @@
1
+ """Analysis functions for aggregating GALFORM data across subvolumes."""
2
+
3
+ import glob
4
+ import os
5
+ from pathlib import Path
6
+ from typing import Any, Dict, List, Optional
7
+
8
+ import h5py
9
+ import numpy as np
10
+ import polars as pl
11
+
12
+ from galform_analysis.config import get_base_dir
13
+ from galform_analysis.readers.loaders import close_snapshot, read_snapshot_data
14
+
15
+
16
+ def completed_galaxies(
17
+ basedir: str = get_base_dir(), iz_snapshots: Optional[List[int]] = None
18
+ ) -> pl.DataFrame:
19
+ """Scan base directory and return DataFrame of all completed galaxy files.
20
+
21
+ Looks through all iz*/ivol* directories and checks CompletionFlag in
22
+ galaxies.hdf5 files.
23
+
24
+ Args:
25
+ basedir: Base directory containing iz* snapshot folders
26
+ iz_snapshots: Optional list of snapshot numbers (e.g., [82, 100, 105]).
27
+ If provided, only these snapshots will be scanned.
28
+ If None, all iz* directories are scanned.
29
+
30
+ Returns:
31
+ DataFrame with columns:
32
+ - iz: Snapshot name (e.g., 'iz100')
33
+ - iz_num: Numeric iz value (e.g., 100)
34
+ - ivol: Subvolume number
35
+ - path: Full path to the galaxies.hdf5 file
36
+ - completed: Whether CompletionFlag==1
37
+ """
38
+ records = []
39
+
40
+ # Find all iz* directories
41
+ if iz_snapshots is not None:
42
+ # Filter to only the requested snapshots
43
+ iz_dirs = sorted(
44
+ [
45
+ os.path.join(basedir, f"iz{iz}")
46
+ for iz in iz_snapshots
47
+ if os.path.isdir(os.path.join(basedir, f"iz{iz}"))
48
+ ]
49
+ )
50
+ else:
51
+ iz_dirs = sorted(glob.glob(os.path.join(basedir, "iz*")))
52
+
53
+ for iz_dir in iz_dirs:
54
+ iz_name = Path(iz_dir).name
55
+ iz_records = [] # Track records for this redshift only
56
+
57
+ # Extract numeric iz value
58
+ try:
59
+ iz_num = int(iz_name.replace("iz", ""))
60
+ except ValueError:
61
+ continue
62
+
63
+ ivol_dirs = sorted(glob.glob(os.path.join(iz_dir, "ivol*")))
64
+
65
+ for ivol_dir in ivol_dirs:
66
+ ivol_name = Path(ivol_dir).name
67
+
68
+ try:
69
+ ivol_num = int(ivol_name.replace("ivol", ""))
70
+ except ValueError:
71
+ continue
72
+
73
+ # Check for galaxies.hdf5 file
74
+ gal_file = os.path.join(ivol_dir, "galaxies.hdf5")
75
+
76
+ if not os.path.exists(gal_file):
77
+ continue
78
+
79
+ # Quick file size check - empty or very small files are incomplete
80
+ try:
81
+ file_size = os.path.getsize(gal_file)
82
+ if file_size < 1000: # Less than 1KB is definitely incomplete
83
+ record = {
84
+ "iz": iz_name,
85
+ "iz_num": iz_num,
86
+ "ivol": ivol_num,
87
+ "path": gal_file,
88
+ "completed": False,
89
+ }
90
+ records.append(record)
91
+ iz_records.append(record)
92
+ continue
93
+ except OSError:
94
+ continue
95
+
96
+ # Try to open the file - if it fails with serialization error,
97
+ # it's incomplete
98
+ completed = False
99
+
100
+ try:
101
+ # Use swmr mode for faster read access
102
+ with h5py.File(gal_file, "r", swmr=True):
103
+ # If we can open it without error, it's completed
104
+ completed = True
105
+ except (OSError, KeyError, RuntimeError) as e:
106
+ # Check if it's the specific serialization error indicating
107
+ # incomplete file
108
+ if "Can't deserialize" in str(e) or "bad object header" in str(e):
109
+ completed = False
110
+ else:
111
+ # Other errors might be temporary, but mark as incomplete
112
+ completed = False
113
+
114
+ record = {
115
+ "iz": iz_name,
116
+ "iz_num": iz_num,
117
+ "ivol": ivol_num,
118
+ "path": gal_file,
119
+ "completed": completed,
120
+ }
121
+ records.append(record)
122
+ iz_records.append(record)
123
+
124
+ df = pl.DataFrame(records)
125
+
126
+ if not df.is_empty():
127
+ df = df.sort(["iz_num", "ivol"])
128
+
129
+ return df
130
+
131
+
132
+ def incomplete_subvolumes(
133
+ basedir: str = get_base_dir(), iz_snapshots: Optional[List[int]] = None
134
+ ) -> pl.DataFrame:
135
+ """Scan base directory and return DataFrame of incomplete/missing galaxy files.
136
+
137
+ This is the complement of completed_galaxies(). Returns records for subvolumes
138
+ where galaxies.hdf5 either doesn't exist or is incomplete/corrupted.
139
+
140
+ Args:
141
+ basedir: Base directory containing iz* snapshot folders
142
+ iz_snapshots: Optional list of snapshot numbers (e.g., [82, 100, 105]).
143
+ If provided, only these snapshots will be scanned.
144
+ If None, all iz* directories are scanned.
145
+
146
+ Returns:
147
+ DataFrame with columns:
148
+ - iz: Snapshot name (e.g., 'iz100')
149
+ - iz_num: Numeric iz value (e.g., 100)
150
+ - ivol: Subvolume number
151
+ - path: Path to the expected galaxies.hdf5 file (may not exist)
152
+ - reason: Why the file is incomplete ('missing', 'incomplete',
153
+ or 'corrupted')
154
+ """
155
+ records = []
156
+
157
+ # Find all iz* directories
158
+ if iz_snapshots is not None:
159
+ # Filter to only the requested snapshots
160
+ iz_dirs = sorted(
161
+ [
162
+ os.path.join(basedir, f"iz{iz}")
163
+ for iz in iz_snapshots
164
+ if os.path.isdir(os.path.join(basedir, f"iz{iz}"))
165
+ ]
166
+ )
167
+ else:
168
+ iz_dirs = sorted(glob.glob(os.path.join(basedir, "iz*")))
169
+
170
+ for iz_dir in iz_dirs:
171
+ iz_name = Path(iz_dir).name
172
+ iz_incomplete = [] # Track incomplete records for this redshift
173
+
174
+ # Extract numeric iz value
175
+ try:
176
+ iz_num = int(iz_name.replace("iz", ""))
177
+ except ValueError:
178
+ continue
179
+
180
+ ivol_dirs = sorted(glob.glob(os.path.join(iz_dir, "ivol*")))
181
+
182
+ for ivol_dir in ivol_dirs:
183
+ ivol_name = Path(ivol_dir).name
184
+
185
+ try:
186
+ ivol_num = int(ivol_name.replace("ivol", ""))
187
+ except ValueError:
188
+ continue
189
+
190
+ # Check for galaxies.hdf5 file
191
+ gal_file = os.path.join(ivol_dir, "galaxies.hdf5")
192
+
193
+ if not os.path.exists(gal_file):
194
+ record = {
195
+ "iz": iz_name,
196
+ "iz_num": iz_num,
197
+ "ivol": ivol_num,
198
+ "path": gal_file,
199
+ "reason": "missing",
200
+ }
201
+ records.append(record)
202
+ iz_incomplete.append(record)
203
+ continue
204
+
205
+ # Quick file size check - empty or very small files are incomplete
206
+ try:
207
+ file_size = os.path.getsize(gal_file)
208
+ if file_size < 1000: # Less than 1KB is definitely incomplete
209
+ record = {
210
+ "iz": iz_name,
211
+ "iz_num": iz_num,
212
+ "ivol": ivol_num,
213
+ "path": gal_file,
214
+ "reason": "incomplete",
215
+ }
216
+ records.append(record)
217
+ iz_incomplete.append(record)
218
+ continue
219
+ except OSError:
220
+ record = {
221
+ "iz": iz_name,
222
+ "iz_num": iz_num,
223
+ "ivol": ivol_num,
224
+ "path": gal_file,
225
+ "reason": "inaccessible",
226
+ }
227
+ records.append(record)
228
+ iz_incomplete.append(record)
229
+ continue
230
+
231
+ # Try to open the file - if it fails, it's corrupted
232
+ try:
233
+ # Use swmr mode for faster read access
234
+ with h5py.File(gal_file, "r", swmr=True):
235
+ pass # File is valid
236
+ except (OSError, KeyError, RuntimeError) as e:
237
+ # Check if it's the specific serialization error indicating
238
+ # incomplete file
239
+ if "Can't deserialize" in str(e) or "bad object header" in str(e):
240
+ reason = "corrupted"
241
+ else:
242
+ reason = "corrupted"
243
+
244
+ record = {
245
+ "iz": iz_name,
246
+ "iz_num": iz_num,
247
+ "ivol": ivol_num,
248
+ "path": gal_file,
249
+ "reason": reason,
250
+ }
251
+ records.append(record)
252
+ iz_incomplete.append(record)
253
+
254
+ df = pl.DataFrame(records)
255
+
256
+ if not df.is_empty():
257
+ df = df.sort(["iz_num", "ivol"])
258
+
259
+ return df
260
+
261
+
262
+ def aggregate_snapshot(iz_path: str) -> Optional[Dict[str, Any]]:
263
+ """Aggregate mstar, mhalo, and volume from all ivols in a snapshot.
264
+
265
+ Args:
266
+ iz_path: Path to the snapshot directory
267
+
268
+ Returns:
269
+ Dictionary with keys: 'iz', 'z', 'volume', 'mstar', 'mhalo'
270
+ Returns None if no data found
271
+ """
272
+ ivol_paths = sorted(glob.glob(os.path.join(iz_path, "ivol*")))
273
+ if not ivol_paths:
274
+ return None
275
+
276
+ all_mstar, all_mhalo = [], []
277
+ total_vol = 0
278
+ z = None
279
+
280
+ for ivp in ivol_paths:
281
+ iv = int(Path(ivp).name.replace("ivol", ""))
282
+ try:
283
+ data = read_snapshot_data(iz_path, ivol=iv)
284
+ if data.get("V_ivol") and data["V_ivol"] > 0:
285
+ total_vol += data["V_ivol"]
286
+ if z is None:
287
+ z = data.get("z")
288
+
289
+ mstar = data.get("mstar")
290
+ mhalo = data.get("mhalo")
291
+ if mstar is not None:
292
+ all_mstar.append(mstar)
293
+ if mhalo is not None:
294
+ all_mhalo.append(mhalo)
295
+
296
+ close_snapshot(data)
297
+ except Exception:
298
+ continue
299
+
300
+ if not all_mstar and not all_mhalo:
301
+ return None
302
+
303
+ return {
304
+ "iz": Path(iz_path).name,
305
+ "z": z,
306
+ "volume": total_vol,
307
+ "mstar": np.concatenate(all_mstar) if all_mstar else np.array([]),
308
+ "mhalo": np.concatenate(all_mhalo) if all_mhalo else np.array([]),
309
+ }
310
+
311
+
312
+ if __name__ == "__main__":
313
+ base_dir = get_base_dir()
314
+ df = completed_galaxies(str(base_dir))
@@ -0,0 +1,68 @@
1
+ """Correlation function analysis subpackage."""
2
+
3
+ from .correlation import (
4
+ avg_correlation_given_redshift_and_subvolumes,
5
+ avg_correlation_given_subvolume_and_redshifts,
6
+ compute_xi_corrfunc,
7
+ correlation_given_redshift_and_subvolume,
8
+ correlations_given_redshifts_and_subvolume,
9
+ )
10
+
11
+ try:
12
+ from .subvol_weighted_correction import (
13
+ compute_weighted_wp_for_n_list as compute_weighted_wp_for_n_list,
14
+ )
15
+ from .subvol_weighted_correction import (
16
+ compute_weighted_wp_from_catalogue as compute_weighted_wp_from_catalogue,
17
+ )
18
+ from .subvol_weighted_correction import (
19
+ compute_weighted_xi_for_n_list as compute_weighted_xi_for_n_list,
20
+ )
21
+ from .subvol_weighted_correction import (
22
+ compute_weighted_xi_from_catalogue as compute_weighted_xi_from_catalogue,
23
+ )
24
+ from .subvol_weighted_correction import (
25
+ load_subvolume_galaxies as load_subvolume_galaxies,
26
+ )
27
+
28
+ _HAS_SUBVOL_WEIGHTED = True
29
+ except Exception: # pragma: no cover - optional dependency path
30
+ _HAS_SUBVOL_WEIGHTED = False
31
+
32
+ from .dm_correlation import (
33
+ avg_dm_correlation_from_tree_files,
34
+ dm_correlation_from_tree_file,
35
+ )
36
+ from .galaxy_bias import (
37
+ avg_galaxy_bias_over_subvolumes,
38
+ compute_galaxy_bias,
39
+ )
40
+ from .satellite_cross_correlation import (
41
+ compute_xi_cross_corrfunc,
42
+ satellite_central_cross_correlation,
43
+ )
44
+
45
+ __all__ = [
46
+ "compute_xi_corrfunc",
47
+ "correlation_given_redshift_and_subvolume",
48
+ "avg_correlation_given_redshift_and_subvolumes",
49
+ "correlations_given_redshifts_and_subvolume",
50
+ "avg_correlation_given_subvolume_and_redshifts",
51
+ "dm_correlation_from_tree_file",
52
+ "avg_dm_correlation_from_tree_files",
53
+ "compute_galaxy_bias",
54
+ "avg_galaxy_bias_over_subvolumes",
55
+ "satellite_central_cross_correlation",
56
+ "compute_xi_cross_corrfunc",
57
+ ]
58
+
59
+ if _HAS_SUBVOL_WEIGHTED:
60
+ __all__.extend(
61
+ [
62
+ "compute_weighted_xi_for_n_list",
63
+ "compute_weighted_xi_from_catalogue",
64
+ "compute_weighted_wp_for_n_list",
65
+ "compute_weighted_wp_from_catalogue",
66
+ "load_subvolume_galaxies",
67
+ ]
68
+ )