galform-analysis 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- galform_analysis-0.1.0/LICENSE +21 -0
- galform_analysis-0.1.0/PKG-INFO +133 -0
- galform_analysis-0.1.0/README.md +82 -0
- galform_analysis-0.1.0/galform_analysis/__init__.py +86 -0
- galform_analysis-0.1.0/galform_analysis/analysis/__init__.py +47 -0
- galform_analysis-0.1.0/galform_analysis/analysis/aggregation.py +314 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/__init__.py +68 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/correlation.py +508 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/dm_correlation.py +681 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/galaxy_bias.py +55 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/n_point_bruteforce.py +119 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/satellite_cross_correlation.py +255 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/scope_wrapper.py +21 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/subvol_weighted_correction.py +693 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/three_point_bruteforce.py +92 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/three_point_reference.py +95 -0
- galform_analysis-0.1.0/galform_analysis/analysis/correlation/three_point_scope.py +156 -0
- galform_analysis-0.1.0/galform_analysis/analysis/mass_functions/__init__.py +53 -0
- galform_analysis-0.1.0/galform_analysis/analysis/mass_functions/hmf.py +373 -0
- galform_analysis-0.1.0/galform_analysis/analysis/mass_functions/hod.py +570 -0
- galform_analysis-0.1.0/galform_analysis/analysis/mass_functions/smf.py +295 -0
- galform_analysis-0.1.0/galform_analysis/analysis/mass_functions/theoretical_hmf.py +566 -0
- galform_analysis-0.1.0/galform_analysis/analysis/redshift_space_distortions/__init__.py +1 -0
- galform_analysis-0.1.0/galform_analysis/analysis/redshift_space_distortions/subvol_weighted_multipoles.py +429 -0
- galform_analysis-0.1.0/galform_analysis/config.py +230 -0
- galform_analysis-0.1.0/galform_analysis/readers/__init__.py +17 -0
- galform_analysis-0.1.0/galform_analysis/readers/loaders.py +289 -0
- galform_analysis-0.1.0/galform_analysis/redshift_lists/COLIBRE-L100m6.txt +3 -0
- galform_analysis-0.1.0/galform_analysis/redshift_lists/FLAMINGO-L1000N1800.txt +78 -0
- galform_analysis-0.1.0/galform_analysis/redshift_lists/L800.txt +253 -0
- galform_analysis-0.1.0/galform_analysis/redshift_lists/Mill1.txt +2 -0
- galform_analysis-0.1.0/galform_analysis/redshift_lists/Mill2.txt +2 -0
- galform_analysis-0.1.0/galform_analysis/sim_configs/COLIBRE.json +14 -0
- galform_analysis-0.1.0/galform_analysis/sim_configs/FLAMINGO.json +14 -0
- galform_analysis-0.1.0/galform_analysis/sim_configs/L800.json +14 -0
- galform_analysis-0.1.0/galform_analysis/sim_configs/Mill1.json +14 -0
- galform_analysis-0.1.0/galform_analysis/sim_configs/Mill2.json +14 -0
- galform_analysis-0.1.0/galform_analysis/utils/__init__.py +17 -0
- galform_analysis-0.1.0/galform_analysis/utils/matplotlib_config.py +115 -0
- galform_analysis-0.1.0/galform_analysis/utils/read_galaxies.py +357 -0
- galform_analysis-0.1.0/galform_analysis/utils/stats.py +77 -0
- galform_analysis-0.1.0/galform_analysis.egg-info/PKG-INFO +133 -0
- galform_analysis-0.1.0/galform_analysis.egg-info/SOURCES.txt +46 -0
- galform_analysis-0.1.0/galform_analysis.egg-info/dependency_links.txt +1 -0
- galform_analysis-0.1.0/galform_analysis.egg-info/requires.txt +19 -0
- galform_analysis-0.1.0/galform_analysis.egg-info/top_level.txt +1 -0
- galform_analysis-0.1.0/pyproject.toml +58 -0
- galform_analysis-0.1.0/setup.cfg +4 -0
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Oscar Hickman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: galform_analysis
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A modular Python framework for reading and analyzing GALFORM HDF5 simulation outputs.
|
|
5
|
+
Author-email: Oscar Hickman <oscar.hickman17@alumni.imperial.ac.uk>
|
|
6
|
+
License: MIT License
|
|
7
|
+
|
|
8
|
+
Copyright (c) 2026 Oscar Hickman
|
|
9
|
+
|
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
12
|
+
in the Software without restriction, including without limitation the rights
|
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
15
|
+
furnished to do so, subject to the following conditions:
|
|
16
|
+
|
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
|
18
|
+
copies or substantial portions of the Software.
|
|
19
|
+
|
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
26
|
+
SOFTWARE.
|
|
27
|
+
Project-URL: Homepage, https://github.com/OscarHickman/galform_analysis
|
|
28
|
+
Project-URL: Repository, https://github.com/OscarHickman/galform_analysis
|
|
29
|
+
Project-URL: Issues, https://github.com/OscarHickman/galform_analysis/issues
|
|
30
|
+
Requires-Python: >=3.12
|
|
31
|
+
Description-Content-Type: text/markdown
|
|
32
|
+
License-File: LICENSE
|
|
33
|
+
Requires-Dist: numpy>=1.23.0
|
|
34
|
+
Requires-Dist: scipy>=1.7.0
|
|
35
|
+
Requires-Dist: matplotlib>=3.3.0
|
|
36
|
+
Requires-Dist: polars>=1.0.0
|
|
37
|
+
Requires-Dist: h5py>=3.0.0
|
|
38
|
+
Requires-Dist: seaborn>=0.11.0
|
|
39
|
+
Requires-Dist: Corrfunc>=2.3.0
|
|
40
|
+
Provides-Extra: dev
|
|
41
|
+
Requires-Dist: pytest>=8.0.0; extra == "dev"
|
|
42
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
43
|
+
Requires-Dist: build; extra == "dev"
|
|
44
|
+
Provides-Extra: science
|
|
45
|
+
Requires-Dist: astropy>=4.0; extra == "science"
|
|
46
|
+
Requires-Dist: hmf>=3.0; extra == "science"
|
|
47
|
+
Requires-Dist: packaging>=20.0; extra == "science"
|
|
48
|
+
Requires-Dist: deprecation>=2.0; extra == "science"
|
|
49
|
+
Requires-Dist: halotools>=0.7.0; extra == "science"
|
|
50
|
+
Dynamic: license-file
|
|
51
|
+
|
|
52
|
+
# galform_analysis
|
|
53
|
+
|
|
54
|
+
[](https://github.com/OscarHickman/galform_analysis/actions/workflows/ci.yml)
|
|
55
|
+
[](https://opensource.org/licenses/MIT)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
A modular Python framework designed for the efficient reading and analysis of GALFORM HDF5 simulation outputs. This library provides standardized tools for processing large-scale galaxy formation data, from low-level HDF5 I/O to high-level astronomical probes.
|
|
59
|
+
|
|
60
|
+
## Core Features
|
|
61
|
+
|
|
62
|
+
- **Standardized I/O**: Robust loaders for GALFORM `galaxies.hdf5` files with support for different output versions.
|
|
63
|
+
- **Data Aggregation**: Tools to scan simulation directories and aggregate data across subvolumes using high-performance `polars` dataframes.
|
|
64
|
+
- **Mass Functions**: Computation of Stellar Mass Functions (SMF), Halo Mass Functions (HMF), and Halo Occupation Distribution (HOD).
|
|
65
|
+
- **Correlation Functions**: Estimators for 2-point, 3-point, and N-point correlation functions (2PCF/NPCF) including subvolume-weighted corrections for convergence analysis.
|
|
66
|
+
- **Redshift-Space Distortions**: Estimators for anisotropic clustering ($\xi(s, \mu)$) and multipoles ($\xi_0, \xi_2, \xi_4$).
|
|
67
|
+
- **Simulation Management**: Built-in configurations for major N-body simulations including L800, Millennium I/II, COLIBRE, and FLAMINGO.
|
|
68
|
+
|
|
69
|
+
## Installation
|
|
70
|
+
|
|
71
|
+
Install the package in your Python environment:
|
|
72
|
+
|
|
73
|
+
```bash
|
|
74
|
+
uv pip install -e .
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### Dependencies
|
|
78
|
+
The library requires `numpy`, `scipy`, `matplotlib`, `polars`, `h5py`, `seaborn`, and `Corrfunc`. These are automatically managed during installation.
|
|
79
|
+
|
|
80
|
+
## Quick Start
|
|
81
|
+
|
|
82
|
+
The following example demonstrates how to load a simulation configuration and read galaxy data:
|
|
83
|
+
|
|
84
|
+
```python
|
|
85
|
+
from galform_analysis import SimulationConfig, config
|
|
86
|
+
from galform_analysis.readers.loaders import read_snapshot_data
|
|
87
|
+
|
|
88
|
+
# 1. Access simulation-specific constants (box size, cosmology, etc.)
|
|
89
|
+
sim = SimulationConfig('L800')
|
|
90
|
+
print(f"Simulation: {sim.name}, Box Size: {sim.box_size} Mpc/h")
|
|
91
|
+
|
|
92
|
+
# 2. Configure the data location
|
|
93
|
+
config.set_base_dir('/path/to/Galform_Out/L800/model_name')
|
|
94
|
+
|
|
95
|
+
# 3. Load snapshot data for a specific subvolume
|
|
96
|
+
data = read_snapshot_data('iz271', ivol=0)
|
|
97
|
+
mstar = data['mstar'] # Stellar masses in M_sun/h
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
## Simulation Metadata
|
|
101
|
+
|
|
102
|
+
Configurations for supported simulations are stored centrally in `galform_analysis/sim_configs/`. This allows for dynamic access to cosmological parameters and volume metadata:
|
|
103
|
+
|
|
104
|
+
```python
|
|
105
|
+
from galform_analysis import SimulationConfig
|
|
106
|
+
|
|
107
|
+
flamingo = SimulationConfig('FLAMINGO')
|
|
108
|
+
omega_m = flamingo.omega_m
|
|
109
|
+
h0 = flamingo.h0
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
## Documentation & Examples
|
|
113
|
+
|
|
114
|
+
Refer to the `examples/` directory for interactive Jupyter notebooks:
|
|
115
|
+
- `examples/readers/load_snapshot.ipynb`: Introduction to data loading.
|
|
116
|
+
- `examples/analysis/mass_functions/smf_example.ipynb`: Plotting Stellar Mass Functions.
|
|
117
|
+
- `examples/analysis/correlation/correlation_example.ipynb`: Computing clustering statistics.
|
|
118
|
+
|
|
119
|
+
## Testing & Quality Standards
|
|
120
|
+
|
|
121
|
+
The project maintains high code quality through automated linting and comprehensive testing:
|
|
122
|
+
|
|
123
|
+
```bash
|
|
124
|
+
# Run the test suite
|
|
125
|
+
pytest tests
|
|
126
|
+
|
|
127
|
+
# Check code style
|
|
128
|
+
ruff check galform_analysis
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Author
|
|
132
|
+
|
|
133
|
+
Oscar Hickman
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# galform_analysis
|
|
2
|
+
|
|
3
|
+
[](https://github.com/OscarHickman/galform_analysis/actions/workflows/ci.yml)
|
|
4
|
+
[](https://opensource.org/licenses/MIT)
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
A modular Python framework designed for the efficient reading and analysis of GALFORM HDF5 simulation outputs. This library provides standardized tools for processing large-scale galaxy formation data, from low-level HDF5 I/O to high-level astronomical probes.
|
|
8
|
+
|
|
9
|
+
## Core Features
|
|
10
|
+
|
|
11
|
+
- **Standardized I/O**: Robust loaders for GALFORM `galaxies.hdf5` files with support for different output versions.
|
|
12
|
+
- **Data Aggregation**: Tools to scan simulation directories and aggregate data across subvolumes using high-performance `polars` dataframes.
|
|
13
|
+
- **Mass Functions**: Computation of Stellar Mass Functions (SMF), Halo Mass Functions (HMF), and Halo Occupation Distribution (HOD).
|
|
14
|
+
- **Correlation Functions**: Estimators for 2-point, 3-point, and N-point correlation functions (2PCF/NPCF) including subvolume-weighted corrections for convergence analysis.
|
|
15
|
+
- **Redshift-Space Distortions**: Estimators for anisotropic clustering ($\xi(s, \mu)$) and multipoles ($\xi_0, \xi_2, \xi_4$).
|
|
16
|
+
- **Simulation Management**: Built-in configurations for major N-body simulations including L800, Millennium I/II, COLIBRE, and FLAMINGO.
|
|
17
|
+
|
|
18
|
+
## Installation
|
|
19
|
+
|
|
20
|
+
Install the package in your Python environment:
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
uv pip install -e .
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
### Dependencies
|
|
27
|
+
The library requires `numpy`, `scipy`, `matplotlib`, `polars`, `h5py`, `seaborn`, and `Corrfunc`. These are automatically managed during installation.
|
|
28
|
+
|
|
29
|
+
## Quick Start
|
|
30
|
+
|
|
31
|
+
The following example demonstrates how to load a simulation configuration and read galaxy data:
|
|
32
|
+
|
|
33
|
+
```python
|
|
34
|
+
from galform_analysis import SimulationConfig, config
|
|
35
|
+
from galform_analysis.readers.loaders import read_snapshot_data
|
|
36
|
+
|
|
37
|
+
# 1. Access simulation-specific constants (box size, cosmology, etc.)
|
|
38
|
+
sim = SimulationConfig('L800')
|
|
39
|
+
print(f"Simulation: {sim.name}, Box Size: {sim.box_size} Mpc/h")
|
|
40
|
+
|
|
41
|
+
# 2. Configure the data location
|
|
42
|
+
config.set_base_dir('/path/to/Galform_Out/L800/model_name')
|
|
43
|
+
|
|
44
|
+
# 3. Load snapshot data for a specific subvolume
|
|
45
|
+
data = read_snapshot_data('iz271', ivol=0)
|
|
46
|
+
mstar = data['mstar'] # Stellar masses in M_sun/h
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Simulation Metadata
|
|
50
|
+
|
|
51
|
+
Configurations for supported simulations are stored centrally in `galform_analysis/sim_configs/`. This allows for dynamic access to cosmological parameters and volume metadata:
|
|
52
|
+
|
|
53
|
+
```python
|
|
54
|
+
from galform_analysis import SimulationConfig
|
|
55
|
+
|
|
56
|
+
flamingo = SimulationConfig('FLAMINGO')
|
|
57
|
+
omega_m = flamingo.omega_m
|
|
58
|
+
h0 = flamingo.h0
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
## Documentation & Examples
|
|
62
|
+
|
|
63
|
+
Refer to the `examples/` directory for interactive Jupyter notebooks:
|
|
64
|
+
- `examples/readers/load_snapshot.ipynb`: Introduction to data loading.
|
|
65
|
+
- `examples/analysis/mass_functions/smf_example.ipynb`: Plotting Stellar Mass Functions.
|
|
66
|
+
- `examples/analysis/correlation/correlation_example.ipynb`: Computing clustering statistics.
|
|
67
|
+
|
|
68
|
+
## Testing & Quality Standards
|
|
69
|
+
|
|
70
|
+
The project maintains high code quality through automated linting and comprehensive testing:
|
|
71
|
+
|
|
72
|
+
```bash
|
|
73
|
+
# Run the test suite
|
|
74
|
+
pytest tests
|
|
75
|
+
|
|
76
|
+
# Check code style
|
|
77
|
+
ruff check galform_analysis
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
## Author
|
|
81
|
+
|
|
82
|
+
Oscar Hickman
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
"""galform_analysis - A Python library for GALFORM simulation analysis.
|
|
2
|
+
|
|
3
|
+
This library provides tools for analyzing GALFORM galaxy formation simulation outputs,
|
|
4
|
+
including:
|
|
5
|
+
- Reading HDF5 snapshot data
|
|
6
|
+
- Computing mass functions (stellar and halo)
|
|
7
|
+
- Aggregating data across subvolumes
|
|
8
|
+
|
|
9
|
+
Quick Start:
|
|
10
|
+
>>> from config import set_base_dir
|
|
11
|
+
>>> from analysis import avg_hmf_given_redshift_and_subvolumes
|
|
12
|
+
>>> from analysis import avg_smf_given_redshift_and_subvolumes
|
|
13
|
+
>>>
|
|
14
|
+
>>> # Set your GALFORM output directory
|
|
15
|
+
>>> set_base_dir('/path/to/galform/output')
|
|
16
|
+
>>>
|
|
17
|
+
>>> # Compute stellar mass function
|
|
18
|
+
>>> smf = avg_smf_given_redshift_and_subvolumes(iz_num=99, ivols=[0, 1, 2])
|
|
19
|
+
|
|
20
|
+
Configuration:
|
|
21
|
+
Set the BASE_DIR for your GALFORM outputs:
|
|
22
|
+
- Via Python: config.set_base_dir('/path')
|
|
23
|
+
- Via environment: export GALFORM_BASE_DIR=/path
|
|
24
|
+
- Edit config.py directly
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
__version__ = "0.1.0"
|
|
28
|
+
|
|
29
|
+
# Import key modules for convenience
|
|
30
|
+
from galform_analysis import analysis, config
|
|
31
|
+
from galform_analysis.analysis import (
|
|
32
|
+
aggregate_snapshot,
|
|
33
|
+
avg_hmf_given_redshift_and_subvolumes,
|
|
34
|
+
avg_hmf_given_redshifts_and_subvolume,
|
|
35
|
+
avg_smf_given_redshift_and_subvolumes,
|
|
36
|
+
avg_smf_given_redshifts_and_subvolume,
|
|
37
|
+
# HMF functions
|
|
38
|
+
hmf_given_redshift_and_subvolume,
|
|
39
|
+
hmfs_given_redshifts_and_subvolume,
|
|
40
|
+
# SMF functions
|
|
41
|
+
smf_given_redshift_and_subvolume,
|
|
42
|
+
smfs_given_redshifts_and_subvolume,
|
|
43
|
+
)
|
|
44
|
+
|
|
45
|
+
# Expose commonly used functions at package level
|
|
46
|
+
from galform_analysis.config import (
|
|
47
|
+
Cosmology,
|
|
48
|
+
SimulationConfig,
|
|
49
|
+
find_snapshot_at_redshift,
|
|
50
|
+
get_base_dir,
|
|
51
|
+
get_snapshot_redshift,
|
|
52
|
+
load_redshift_mapping,
|
|
53
|
+
load_sim_config,
|
|
54
|
+
set_base_dir,
|
|
55
|
+
)
|
|
56
|
+
from galform_analysis.readers import close_snapshot, read_snapshot_data
|
|
57
|
+
|
|
58
|
+
__all__ = [
|
|
59
|
+
"__version__",
|
|
60
|
+
# Submodules
|
|
61
|
+
"config",
|
|
62
|
+
"io",
|
|
63
|
+
"analysis",
|
|
64
|
+
# Common functions
|
|
65
|
+
"set_base_dir",
|
|
66
|
+
"get_base_dir",
|
|
67
|
+
"Cosmology",
|
|
68
|
+
"load_sim_config",
|
|
69
|
+
"SimulationConfig",
|
|
70
|
+
"load_redshift_mapping",
|
|
71
|
+
"get_snapshot_redshift",
|
|
72
|
+
"find_snapshot_at_redshift",
|
|
73
|
+
"read_snapshot_data",
|
|
74
|
+
"close_snapshot",
|
|
75
|
+
"aggregate_snapshot",
|
|
76
|
+
# HMF functions
|
|
77
|
+
"hmf_given_redshift_and_subvolume",
|
|
78
|
+
"hmfs_given_redshifts_and_subvolume",
|
|
79
|
+
"avg_hmf_given_redshift_and_subvolumes",
|
|
80
|
+
"avg_hmf_given_redshifts_and_subvolume",
|
|
81
|
+
# SMF functions
|
|
82
|
+
"smf_given_redshift_and_subvolume",
|
|
83
|
+
"smfs_given_redshifts_and_subvolume",
|
|
84
|
+
"avg_smf_given_redshift_and_subvolumes",
|
|
85
|
+
"avg_smf_given_redshifts_and_subvolume",
|
|
86
|
+
]
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""Analysis subpackage for GALFORM data processing."""
|
|
2
|
+
|
|
3
|
+
from .aggregation import aggregate_snapshot, completed_galaxies, incomplete_subvolumes
|
|
4
|
+
from .correlation import (
|
|
5
|
+
avg_correlation_given_redshift_and_subvolumes,
|
|
6
|
+
compute_xi_corrfunc,
|
|
7
|
+
correlation_given_redshift_and_subvolume,
|
|
8
|
+
)
|
|
9
|
+
from .mass_functions import (
|
|
10
|
+
avg_hmf_given_redshift_and_subvolumes,
|
|
11
|
+
avg_hmf_given_redshifts_and_subvolume,
|
|
12
|
+
avg_hod_given_redshift_and_subvolumes,
|
|
13
|
+
avg_hod_given_redshifts_and_subvolume,
|
|
14
|
+
avg_smf_given_redshift_and_subvolumes,
|
|
15
|
+
avg_smf_given_redshifts_and_subvolume,
|
|
16
|
+
hmf_given_redshift_and_subvolume,
|
|
17
|
+
hmfs_given_redshifts_and_subvolume,
|
|
18
|
+
hod_given_redshift_and_subvolume,
|
|
19
|
+
hods_given_redshifts_and_subvolume,
|
|
20
|
+
smf_given_redshift_and_subvolume,
|
|
21
|
+
smfs_given_redshifts_and_subvolume,
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
__all__ = [
|
|
25
|
+
"aggregate_snapshot",
|
|
26
|
+
"completed_galaxies",
|
|
27
|
+
"incomplete_subvolumes",
|
|
28
|
+
# HMF functions
|
|
29
|
+
"hmf_given_redshift_and_subvolume",
|
|
30
|
+
"hmfs_given_redshifts_and_subvolume",
|
|
31
|
+
"avg_hmf_given_redshift_and_subvolumes",
|
|
32
|
+
"avg_hmf_given_redshifts_and_subvolume",
|
|
33
|
+
# SMF functions
|
|
34
|
+
"smf_given_redshift_and_subvolume",
|
|
35
|
+
"smfs_given_redshifts_and_subvolume",
|
|
36
|
+
"avg_smf_given_redshift_and_subvolumes",
|
|
37
|
+
"avg_smf_given_redshifts_and_subvolume",
|
|
38
|
+
# HOD functions
|
|
39
|
+
"hod_given_redshift_and_subvolume",
|
|
40
|
+
"hods_given_redshifts_and_subvolume",
|
|
41
|
+
"avg_hod_given_redshift_and_subvolumes",
|
|
42
|
+
"avg_hod_given_redshifts_and_subvolume",
|
|
43
|
+
# Correlation functions
|
|
44
|
+
"compute_xi_corrfunc",
|
|
45
|
+
"correlation_given_redshift_and_subvolume",
|
|
46
|
+
"avg_correlation_given_redshift_and_subvolumes",
|
|
47
|
+
]
|
|
@@ -0,0 +1,314 @@
|
|
|
1
|
+
"""Analysis functions for aggregating GALFORM data across subvolumes."""
|
|
2
|
+
|
|
3
|
+
import glob
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import Any, Dict, List, Optional
|
|
7
|
+
|
|
8
|
+
import h5py
|
|
9
|
+
import numpy as np
|
|
10
|
+
import polars as pl
|
|
11
|
+
|
|
12
|
+
from galform_analysis.config import get_base_dir
|
|
13
|
+
from galform_analysis.readers.loaders import close_snapshot, read_snapshot_data
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def completed_galaxies(
|
|
17
|
+
basedir: str = get_base_dir(), iz_snapshots: Optional[List[int]] = None
|
|
18
|
+
) -> pl.DataFrame:
|
|
19
|
+
"""Scan base directory and return DataFrame of all completed galaxy files.
|
|
20
|
+
|
|
21
|
+
Looks through all iz*/ivol* directories and checks CompletionFlag in
|
|
22
|
+
galaxies.hdf5 files.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
basedir: Base directory containing iz* snapshot folders
|
|
26
|
+
iz_snapshots: Optional list of snapshot numbers (e.g., [82, 100, 105]).
|
|
27
|
+
If provided, only these snapshots will be scanned.
|
|
28
|
+
If None, all iz* directories are scanned.
|
|
29
|
+
|
|
30
|
+
Returns:
|
|
31
|
+
DataFrame with columns:
|
|
32
|
+
- iz: Snapshot name (e.g., 'iz100')
|
|
33
|
+
- iz_num: Numeric iz value (e.g., 100)
|
|
34
|
+
- ivol: Subvolume number
|
|
35
|
+
- path: Full path to the galaxies.hdf5 file
|
|
36
|
+
- completed: Whether CompletionFlag==1
|
|
37
|
+
"""
|
|
38
|
+
records = []
|
|
39
|
+
|
|
40
|
+
# Find all iz* directories
|
|
41
|
+
if iz_snapshots is not None:
|
|
42
|
+
# Filter to only the requested snapshots
|
|
43
|
+
iz_dirs = sorted(
|
|
44
|
+
[
|
|
45
|
+
os.path.join(basedir, f"iz{iz}")
|
|
46
|
+
for iz in iz_snapshots
|
|
47
|
+
if os.path.isdir(os.path.join(basedir, f"iz{iz}"))
|
|
48
|
+
]
|
|
49
|
+
)
|
|
50
|
+
else:
|
|
51
|
+
iz_dirs = sorted(glob.glob(os.path.join(basedir, "iz*")))
|
|
52
|
+
|
|
53
|
+
for iz_dir in iz_dirs:
|
|
54
|
+
iz_name = Path(iz_dir).name
|
|
55
|
+
iz_records = [] # Track records for this redshift only
|
|
56
|
+
|
|
57
|
+
# Extract numeric iz value
|
|
58
|
+
try:
|
|
59
|
+
iz_num = int(iz_name.replace("iz", ""))
|
|
60
|
+
except ValueError:
|
|
61
|
+
continue
|
|
62
|
+
|
|
63
|
+
ivol_dirs = sorted(glob.glob(os.path.join(iz_dir, "ivol*")))
|
|
64
|
+
|
|
65
|
+
for ivol_dir in ivol_dirs:
|
|
66
|
+
ivol_name = Path(ivol_dir).name
|
|
67
|
+
|
|
68
|
+
try:
|
|
69
|
+
ivol_num = int(ivol_name.replace("ivol", ""))
|
|
70
|
+
except ValueError:
|
|
71
|
+
continue
|
|
72
|
+
|
|
73
|
+
# Check for galaxies.hdf5 file
|
|
74
|
+
gal_file = os.path.join(ivol_dir, "galaxies.hdf5")
|
|
75
|
+
|
|
76
|
+
if not os.path.exists(gal_file):
|
|
77
|
+
continue
|
|
78
|
+
|
|
79
|
+
# Quick file size check - empty or very small files are incomplete
|
|
80
|
+
try:
|
|
81
|
+
file_size = os.path.getsize(gal_file)
|
|
82
|
+
if file_size < 1000: # Less than 1KB is definitely incomplete
|
|
83
|
+
record = {
|
|
84
|
+
"iz": iz_name,
|
|
85
|
+
"iz_num": iz_num,
|
|
86
|
+
"ivol": ivol_num,
|
|
87
|
+
"path": gal_file,
|
|
88
|
+
"completed": False,
|
|
89
|
+
}
|
|
90
|
+
records.append(record)
|
|
91
|
+
iz_records.append(record)
|
|
92
|
+
continue
|
|
93
|
+
except OSError:
|
|
94
|
+
continue
|
|
95
|
+
|
|
96
|
+
# Try to open the file - if it fails with serialization error,
|
|
97
|
+
# it's incomplete
|
|
98
|
+
completed = False
|
|
99
|
+
|
|
100
|
+
try:
|
|
101
|
+
# Use swmr mode for faster read access
|
|
102
|
+
with h5py.File(gal_file, "r", swmr=True):
|
|
103
|
+
# If we can open it without error, it's completed
|
|
104
|
+
completed = True
|
|
105
|
+
except (OSError, KeyError, RuntimeError) as e:
|
|
106
|
+
# Check if it's the specific serialization error indicating
|
|
107
|
+
# incomplete file
|
|
108
|
+
if "Can't deserialize" in str(e) or "bad object header" in str(e):
|
|
109
|
+
completed = False
|
|
110
|
+
else:
|
|
111
|
+
# Other errors might be temporary, but mark as incomplete
|
|
112
|
+
completed = False
|
|
113
|
+
|
|
114
|
+
record = {
|
|
115
|
+
"iz": iz_name,
|
|
116
|
+
"iz_num": iz_num,
|
|
117
|
+
"ivol": ivol_num,
|
|
118
|
+
"path": gal_file,
|
|
119
|
+
"completed": completed,
|
|
120
|
+
}
|
|
121
|
+
records.append(record)
|
|
122
|
+
iz_records.append(record)
|
|
123
|
+
|
|
124
|
+
df = pl.DataFrame(records)
|
|
125
|
+
|
|
126
|
+
if not df.is_empty():
|
|
127
|
+
df = df.sort(["iz_num", "ivol"])
|
|
128
|
+
|
|
129
|
+
return df
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def incomplete_subvolumes(
|
|
133
|
+
basedir: str = get_base_dir(), iz_snapshots: Optional[List[int]] = None
|
|
134
|
+
) -> pl.DataFrame:
|
|
135
|
+
"""Scan base directory and return DataFrame of incomplete/missing galaxy files.
|
|
136
|
+
|
|
137
|
+
This is the complement of completed_galaxies(). Returns records for subvolumes
|
|
138
|
+
where galaxies.hdf5 either doesn't exist or is incomplete/corrupted.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
basedir: Base directory containing iz* snapshot folders
|
|
142
|
+
iz_snapshots: Optional list of snapshot numbers (e.g., [82, 100, 105]).
|
|
143
|
+
If provided, only these snapshots will be scanned.
|
|
144
|
+
If None, all iz* directories are scanned.
|
|
145
|
+
|
|
146
|
+
Returns:
|
|
147
|
+
DataFrame with columns:
|
|
148
|
+
- iz: Snapshot name (e.g., 'iz100')
|
|
149
|
+
- iz_num: Numeric iz value (e.g., 100)
|
|
150
|
+
- ivol: Subvolume number
|
|
151
|
+
- path: Path to the expected galaxies.hdf5 file (may not exist)
|
|
152
|
+
- reason: Why the file is incomplete ('missing', 'incomplete',
|
|
153
|
+
or 'corrupted')
|
|
154
|
+
"""
|
|
155
|
+
records = []
|
|
156
|
+
|
|
157
|
+
# Find all iz* directories
|
|
158
|
+
if iz_snapshots is not None:
|
|
159
|
+
# Filter to only the requested snapshots
|
|
160
|
+
iz_dirs = sorted(
|
|
161
|
+
[
|
|
162
|
+
os.path.join(basedir, f"iz{iz}")
|
|
163
|
+
for iz in iz_snapshots
|
|
164
|
+
if os.path.isdir(os.path.join(basedir, f"iz{iz}"))
|
|
165
|
+
]
|
|
166
|
+
)
|
|
167
|
+
else:
|
|
168
|
+
iz_dirs = sorted(glob.glob(os.path.join(basedir, "iz*")))
|
|
169
|
+
|
|
170
|
+
for iz_dir in iz_dirs:
|
|
171
|
+
iz_name = Path(iz_dir).name
|
|
172
|
+
iz_incomplete = [] # Track incomplete records for this redshift
|
|
173
|
+
|
|
174
|
+
# Extract numeric iz value
|
|
175
|
+
try:
|
|
176
|
+
iz_num = int(iz_name.replace("iz", ""))
|
|
177
|
+
except ValueError:
|
|
178
|
+
continue
|
|
179
|
+
|
|
180
|
+
ivol_dirs = sorted(glob.glob(os.path.join(iz_dir, "ivol*")))
|
|
181
|
+
|
|
182
|
+
for ivol_dir in ivol_dirs:
|
|
183
|
+
ivol_name = Path(ivol_dir).name
|
|
184
|
+
|
|
185
|
+
try:
|
|
186
|
+
ivol_num = int(ivol_name.replace("ivol", ""))
|
|
187
|
+
except ValueError:
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
# Check for galaxies.hdf5 file
|
|
191
|
+
gal_file = os.path.join(ivol_dir, "galaxies.hdf5")
|
|
192
|
+
|
|
193
|
+
if not os.path.exists(gal_file):
|
|
194
|
+
record = {
|
|
195
|
+
"iz": iz_name,
|
|
196
|
+
"iz_num": iz_num,
|
|
197
|
+
"ivol": ivol_num,
|
|
198
|
+
"path": gal_file,
|
|
199
|
+
"reason": "missing",
|
|
200
|
+
}
|
|
201
|
+
records.append(record)
|
|
202
|
+
iz_incomplete.append(record)
|
|
203
|
+
continue
|
|
204
|
+
|
|
205
|
+
# Quick file size check - empty or very small files are incomplete
|
|
206
|
+
try:
|
|
207
|
+
file_size = os.path.getsize(gal_file)
|
|
208
|
+
if file_size < 1000: # Less than 1KB is definitely incomplete
|
|
209
|
+
record = {
|
|
210
|
+
"iz": iz_name,
|
|
211
|
+
"iz_num": iz_num,
|
|
212
|
+
"ivol": ivol_num,
|
|
213
|
+
"path": gal_file,
|
|
214
|
+
"reason": "incomplete",
|
|
215
|
+
}
|
|
216
|
+
records.append(record)
|
|
217
|
+
iz_incomplete.append(record)
|
|
218
|
+
continue
|
|
219
|
+
except OSError:
|
|
220
|
+
record = {
|
|
221
|
+
"iz": iz_name,
|
|
222
|
+
"iz_num": iz_num,
|
|
223
|
+
"ivol": ivol_num,
|
|
224
|
+
"path": gal_file,
|
|
225
|
+
"reason": "inaccessible",
|
|
226
|
+
}
|
|
227
|
+
records.append(record)
|
|
228
|
+
iz_incomplete.append(record)
|
|
229
|
+
continue
|
|
230
|
+
|
|
231
|
+
# Try to open the file - if it fails, it's corrupted
|
|
232
|
+
try:
|
|
233
|
+
# Use swmr mode for faster read access
|
|
234
|
+
with h5py.File(gal_file, "r", swmr=True):
|
|
235
|
+
pass # File is valid
|
|
236
|
+
except (OSError, KeyError, RuntimeError) as e:
|
|
237
|
+
# Check if it's the specific serialization error indicating
|
|
238
|
+
# incomplete file
|
|
239
|
+
if "Can't deserialize" in str(e) or "bad object header" in str(e):
|
|
240
|
+
reason = "corrupted"
|
|
241
|
+
else:
|
|
242
|
+
reason = "corrupted"
|
|
243
|
+
|
|
244
|
+
record = {
|
|
245
|
+
"iz": iz_name,
|
|
246
|
+
"iz_num": iz_num,
|
|
247
|
+
"ivol": ivol_num,
|
|
248
|
+
"path": gal_file,
|
|
249
|
+
"reason": reason,
|
|
250
|
+
}
|
|
251
|
+
records.append(record)
|
|
252
|
+
iz_incomplete.append(record)
|
|
253
|
+
|
|
254
|
+
df = pl.DataFrame(records)
|
|
255
|
+
|
|
256
|
+
if not df.is_empty():
|
|
257
|
+
df = df.sort(["iz_num", "ivol"])
|
|
258
|
+
|
|
259
|
+
return df
|
|
260
|
+
|
|
261
|
+
|
|
262
|
+
def aggregate_snapshot(iz_path: str) -> Optional[Dict[str, Any]]:
|
|
263
|
+
"""Aggregate mstar, mhalo, and volume from all ivols in a snapshot.
|
|
264
|
+
|
|
265
|
+
Args:
|
|
266
|
+
iz_path: Path to the snapshot directory
|
|
267
|
+
|
|
268
|
+
Returns:
|
|
269
|
+
Dictionary with keys: 'iz', 'z', 'volume', 'mstar', 'mhalo'
|
|
270
|
+
Returns None if no data found
|
|
271
|
+
"""
|
|
272
|
+
ivol_paths = sorted(glob.glob(os.path.join(iz_path, "ivol*")))
|
|
273
|
+
if not ivol_paths:
|
|
274
|
+
return None
|
|
275
|
+
|
|
276
|
+
all_mstar, all_mhalo = [], []
|
|
277
|
+
total_vol = 0
|
|
278
|
+
z = None
|
|
279
|
+
|
|
280
|
+
for ivp in ivol_paths:
|
|
281
|
+
iv = int(Path(ivp).name.replace("ivol", ""))
|
|
282
|
+
try:
|
|
283
|
+
data = read_snapshot_data(iz_path, ivol=iv)
|
|
284
|
+
if data.get("V_ivol") and data["V_ivol"] > 0:
|
|
285
|
+
total_vol += data["V_ivol"]
|
|
286
|
+
if z is None:
|
|
287
|
+
z = data.get("z")
|
|
288
|
+
|
|
289
|
+
mstar = data.get("mstar")
|
|
290
|
+
mhalo = data.get("mhalo")
|
|
291
|
+
if mstar is not None:
|
|
292
|
+
all_mstar.append(mstar)
|
|
293
|
+
if mhalo is not None:
|
|
294
|
+
all_mhalo.append(mhalo)
|
|
295
|
+
|
|
296
|
+
close_snapshot(data)
|
|
297
|
+
except Exception:
|
|
298
|
+
continue
|
|
299
|
+
|
|
300
|
+
if not all_mstar and not all_mhalo:
|
|
301
|
+
return None
|
|
302
|
+
|
|
303
|
+
return {
|
|
304
|
+
"iz": Path(iz_path).name,
|
|
305
|
+
"z": z,
|
|
306
|
+
"volume": total_vol,
|
|
307
|
+
"mstar": np.concatenate(all_mstar) if all_mstar else np.array([]),
|
|
308
|
+
"mhalo": np.concatenate(all_mhalo) if all_mhalo else np.array([]),
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
|
|
312
|
+
if __name__ == "__main__":
|
|
313
|
+
base_dir = get_base_dir()
|
|
314
|
+
df = completed_galaxies(str(base_dir))
|