PyPI - jaxspec - Versions diffs - 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl - Mend

jaxspec 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

jaxspec/_fit/__init__.py +0 -0
jaxspec/_fit/_build_model.py +63 -0
jaxspec/analysis/_plot.py +166 -7
jaxspec/analysis/results.py +238 -336
jaxspec/data/instrument.py +47 -12
jaxspec/data/obsconf.py +12 -2
jaxspec/data/observation.py +68 -11
jaxspec/data/ogip.py +32 -13
jaxspec/data/util.py +5 -75
jaxspec/fit.py +101 -140
jaxspec/model/_graph_util.py +151 -0
jaxspec/model/abc.py +275 -414
jaxspec/model/additive.py +276 -289
jaxspec/model/background.py +94 -87
jaxspec/model/multiplicative.py +101 -85
jaxspec/scripts/debug.py +1 -1
jaxspec/util/__init__.py +0 -45
jaxspec/util/misc.py +25 -0
jaxspec/util/typing.py +0 -63
{jaxspec-0.1.3.dist-info → jaxspec-0.2.0.dist-info}/METADATA +36 -16
jaxspec-0.2.0.dist-info/RECORD +34 -0
{jaxspec-0.1.3.dist-info → jaxspec-0.2.0.dist-info}/WHEEL +1 -1
jaxspec/data/grouping.py +0 -23
jaxspec-0.1.3.dist-info/RECORD +0 -31
{jaxspec-0.1.3.dist-info → jaxspec-0.2.0.dist-info}/LICENSE.md +0 -0
{jaxspec-0.1.3.dist-info → jaxspec-0.2.0.dist-info}/entry_points.txt +0 -0

jaxspec/fit.py CHANGED Viewed

@@ -7,126 +7,31 @@ from functools import cached_property
 from typing import Literal
 import arviz as az
-import haiku as hk
 import jax
 import jax.numpy as jnp
 import matplotlib.pyplot as plt
-import numpy as np
 import numpyro
 from jax import random
-from jax.experimental.sparse import BCOO
 from jax.random import PRNGKey
-from jax.tree_util import tree_map
-from jax.typing import ArrayLike
 from numpyro.contrib.nested_sampling import NestedSampler
-from numpyro.distributions import Distribution, Poisson, TransformedDistribution
+from numpyro.distributions import Poisson, TransformedDistribution
 from numpyro.infer import AIES, ESS, MCMC, NUTS, Predictive
 from numpyro.infer.inspect import get_model_relations
 from numpyro.infer.reparam import TransformReparam
 from numpyro.infer.util import log_density
-from .analysis._plot import _plot_poisson_data_with_error
+from ._fit._build_model import build_prior, forward_model
+from .analysis._plot import (
+    _error_bars_for_observed_data,
+    _plot_binned_samples_with_error,
+    _plot_poisson_data_with_error,
+)
 from .analysis.results import FitResult
 from .data import ObsConfiguration
 from .model.abc import SpectralModel
 from .model.background import BackgroundModel
-from .util.typing import PriorDictModel, PriorDictType
-def build_prior(prior: PriorDictType, expand_shape: tuple = (), prefix=""):
-    """
-    Transform a dictionary of prior distributions into a dictionary of parameters sampled from the prior.
-    Must be used within a numpyro model.
-    """
-    parameters = dict(hk.data_structures.to_haiku_dict(prior))
-    for i, (m, n, sample) in enumerate(hk.data_structures.traverse(prior)):
-        if isinstance(sample, Distribution):
-            parameters[m][n] = jnp.ones(expand_shape) * numpyro.sample(f"{prefix}{m}_{n}", sample)
-        elif isinstance(sample, ArrayLike):
-            parameters[m][n] = jnp.ones(expand_shape) * sample
-        else:
-            raise ValueError(
-                f"Invalid prior type {type(sample)} for parameter {prefix}{m}_{n} : {sample}"
-            )
-    return parameters
-def build_numpyro_model_for_single_obs(
-    obs: ObsConfiguration,
-    model: SpectralModel,
-    background_model: BackgroundModel,
-    name: str = "",
-    sparse: bool = False,
-) -> Callable:
-    """
-    Build a numpyro model for a given observation and spectral model.
-    """
-    def numpyro_model(prior_params, observed=True):
-        # prior_params = build_prior(prior_distributions, name=name)
-        transformed_model = hk.without_apply_rng(
-            hk.transform(lambda par: CountForwardModel(model, obs, sparse=sparse)(par))
-        )
-        if (getattr(obs, "folded_background", None) is not None) and (background_model is not None):
-            bkg_countrate = background_model.numpyro_model(
-                obs, model, name="bkg_" + name, observed=observed
-            )
-        elif (getattr(obs, "folded_background", None) is None) and (background_model is not None):
-            raise ValueError(
-                "Trying to fit a background model but no background is linked to this observation"
-            )
-        else:
-            bkg_countrate = 0.0
-        obs_model = jax.jit(lambda p: transformed_model.apply(None, p))
-        countrate = obs_model(prior_params)
-        # This is the case where we fit a model to a TOTAL spectrum as defined in OGIP standard
-        with numpyro.plate("obs_plate_" + name, len(obs.folded_counts)):
-            numpyro.sample(
-                "obs_" + name,
-                Poisson(countrate + bkg_countrate / obs.folded_backratio.data),
-                obs=obs.folded_counts.data if observed else None,
-            )
-    return numpyro_model
-class CountForwardModel(hk.Module):
-    """
-    A haiku module which allows to build the function that simulates the measured counts
-    """
-    def __init__(self, model: SpectralModel, folding: ObsConfiguration, sparse=False):
-        super().__init__()
-        self.model = model
-        self.energies = jnp.asarray(folding.in_energies)
-        if (
-            sparse
-        ):  # folding.transfer_matrix.data.density > 0.015 is a good criterion to consider sparsify
-            self.transfer_matrix = BCOO.from_scipy_sparse(
-                folding.transfer_matrix.data.to_scipy_sparse().tocsr()
-            )
-        else:
-            self.transfer_matrix = jnp.asarray(folding.transfer_matrix.data.todense())
-    def __call__(self, parameters):
-        """
-        Compute the count functions for a given observation.
-        """
-        expected_counts = self.transfer_matrix @ self.model.photon_flux(parameters, *self.energies)
-        return jnp.clip(expected_counts, a_min=1e-6)
+from .util.typing import PriorDictType
 class BayesianModel:
@@ -157,15 +62,16 @@ class BayesianModel:
         self.model = model
         self._observations = observations
         self.background_model = background_model
-        self.pars = tree_map(lambda x: jnp.float64(x), self.model.params)
         self.sparse = sparsify_matrix
         if not callable(prior_distributions):
             # Validate the entry with pydantic
-            prior = PriorDictModel.from_dict(prior_distributions).nested_dict
+            # prior = PriorDictModel.from_dict(prior_distributions).
             def prior_distributions_func():
-                return build_prior(prior, expand_shape=(len(self.observation_container),))
+                return build_prior(
+                    prior_distributions, expand_shape=(len(self.observation_container),)
+                )
         else:
             prior_distributions_func = prior_distributions
@@ -173,6 +79,22 @@ class BayesianModel:
         self.prior_distributions_func = prior_distributions_func
         self.init_params = self.prior_samples()
+        # Check the priors are suited for the observations
+        split_parameters = [
+            (param, shape[-1])
+            for param, shape in jax.tree.map(lambda x: x.shape, self.init_params).items()
+            if (len(shape) > 1)
+            and not param.startswith("_")
+            and not param.startswith("bkg")  # hardcoded for subtracted background
+        ]
+        for parameter, proposed_number_of_obs in split_parameters:
+            if proposed_number_of_obs != len(self.observation_container):
+                raise ValueError(
+                    f"Invalid splitting in the prior distribution. "
+                    f"Expected {len(self.observation_container)} but got {proposed_number_of_obs} for {parameter}"
+                )
     @cached_property
     def observation_container(self) -> dict[str, ObsConfiguration]:
         """
@@ -197,22 +119,52 @@ class BayesianModel:
         Build the numpyro model using the observed data, the prior distributions and the spectral model.
         """
-        def model(observed=True):
+        def numpyro_model(observed=True):
+            # Instantiate and register the parameters of the spectral model and the background
             prior_params = self.prior_distributions_func()
             # Iterate over all the observations in our container and build a single numpyro model for each observation
-            for i, (key, observation) in enumerate(self.observation_container.items()):
+            for i, (name, observation) in enumerate(self.observation_container.items()):
+                # Check that we can indeed fit a background
+                if (getattr(observation, "folded_background", None) is not None) and (
+                    self.background_model is not None
+                ):
+                    # This call should register the parameter and observation of our background model
+                    bkg_countrate = self.background_model.numpyro_model(
+                        observation, name=name, observed=observed
+                    )
+                elif (getattr(observation, "folded_background", None) is None) and (
+                    self.background_model is not None
+                ):
+                    raise ValueError(
+                        "Trying to fit a background model but no background is linked to this observation"
+                    )
+                else:
+                    bkg_countrate = 0.0
                 # We expect that prior_params contains an array of parameters for each observation
                 # They can be identical or different for each observation
-                params = tree_map(lambda x: x[i], prior_params)
+                params = jax.tree.map(lambda x: x[i], prior_params)
-                obs_model = build_numpyro_model_for_single_obs(
-                    observation, self.model, self.background_model, name=key, sparse=self.sparse
+                # Forward model the observation and get the associated countrate
+                obs_model = jax.jit(
+                    lambda par: forward_model(self.model, par, observation, sparse=self.sparse)
                 )
+                obs_countrate = obs_model(params)
-                obs_model(params, observed=observed)
+                # Register the observation as an observed site
+                with numpyro.plate("obs_plate_" + name, len(observation.folded_counts)):
+                    numpyro.sample(
+                        "obs_" + name,
+                        Poisson(
+                            obs_countrate + bkg_countrate
+                        ),  # / observation.folded_backratio.data
+                        obs=observation.folded_counts.data if observed else None,
+                    )
-        return model
+        return numpyro_model
     @cached_property
     def transformed_numpyro_model(self) -> Callable:
@@ -352,7 +304,9 @@ class BayesianModel:
         return fakeit(key, parameters)
     def prior_predictive_coverage(
-        self, key: PRNGKey = PRNGKey(0), num_samples: int = 1000, percentiles: tuple = (16, 84)
+        self,
+        key: PRNGKey = PRNGKey(0),
+        num_samples: int = 1000,
     ):
         """
         Check if the prior distribution include the observed data.
@@ -362,24 +316,25 @@ class BayesianModel:
         posterior_observations = self.mock_observations(prior_params, key=key_posterior)
         for key, value in self.observation_container.items():
-            fig, axs = plt.subplots(
-                nrows=2, ncols=1, sharex=True, figsize=(8, 8), height_ratios=[3, 1]
+            fig, ax = plt.subplots(
+                nrows=2, ncols=1, sharex=True, figsize=(5, 6), height_ratios=[3, 1]
             )
-            _plot_poisson_data_with_error(
-                axs[0],
+            y_observed, y_observed_low, y_observed_high = _error_bars_for_observed_data(
+                value.folded_counts.values, 1.0, "ct"
+            )
+            true_data_plot = _plot_poisson_data_with_error(
+                ax[0],
                 value.out_energies,
-                value.folded_counts.values,
-                percentiles=percentiles,
+                y_observed.value,
+                y_observed_low.value,
+                y_observed_high.value,
+                alpha=0.7,
             )
-            axs[0].stairs(
-                np.max(posterior_observations["obs_" + key], axis=0),
-                edges=[*list(value.out_energies[0]), value.out_energies[1][-1]],
-                baseline=np.min(posterior_observations["obs_" + key], axis=0),
-                alpha=0.3,
-                fill=True,
-                color=(0.15, 0.25, 0.45),
+            prior_plot = _plot_binned_samples_with_error(
+                ax[0], value.out_energies, posterior_observations["obs_" + key], n_sigmas=3
             )
             # rank = np.vstack((posterior_observations["obs_" + key], value.folded_counts.values)).argsort(axis=0)[-1] / (num_samples) * 100
@@ -393,22 +348,24 @@ class BayesianModel:
             rank = (less_than_obs + 0.5 * equal_to_obs) / num_samples * 100
-            axs[1].stairs(rank, edges=[*list(value.out_energies[0]), value.out_energies[1][-1]])
+            ax[1].stairs(rank, edges=[*list(value.out_energies[0]), value.out_energies[1][-1]])
-            axs[1].plot(
+            ax[1].plot(
                 (value.out_energies.min(), value.out_energies.max()),
                 (50, 50),
                 color="black",
                 linestyle="--",
             )
-            axs[1].set_xlabel("Energy (keV)")
-            axs[0].set_ylabel("Counts")
-            axs[1].set_ylabel("Rank (%)")
-            axs[1].set_ylim(0, 100)
-            axs[0].set_xlim(value.out_energies.min(), value.out_energies.max())
-            axs[0].loglog()
+            ax[1].set_xlabel("Energy (keV)")
+            ax[0].set_ylabel("Counts")
+            ax[1].set_ylabel("Rank (%)")
+            ax[1].set_ylim(0, 100)
+            ax[0].set_xlim(value.out_energies.min(), value.out_energies.max())
+            ax[0].loglog()
+            ax[0].legend(loc="upper right")
             plt.suptitle(f"Prior Predictive coverage for {key}")
+            plt.tight_layout()
             plt.show()
@@ -513,7 +470,11 @@ class BayesianModelFitter(BayesianModel, ABC):
             predictive_parameters
         ]
-        parameters = [x for x in inference_data.posterior.keys() if not x.endswith("_base")]
+        parameters = [
+            x
+            for x in inference_data.posterior.keys()
+            if not x.endswith("_base") or x.startswith("_")
+        ]
         inference_data.posterior = inference_data.posterior[parameters]
         inference_data.prior = inference_data.prior[parameters]
@@ -595,7 +556,6 @@ class MCMCFitter(BayesianModelFitter):
         return FitResult(
             self,
             inference_data,
-            self.model.params,
             background_model=self.background_model,
         )
@@ -641,11 +601,13 @@ class NSFitter(BayesianModelFitter):
         ns = NestedSampler(
             bayesian_model,
             constructor_kwargs=dict(
-                num_parallel_workers=1,
                 verbose=verbose,
                 difficult_model=True,
-                max_samples=1e6,
+                max_samples=1e5,
                 parameter_estimation=True,
+                gradient_guided=True,
+                devices=jax.devices(),
+                # init_efficiency_threshold=0.01,
                 num_live_points=num_live_points,
             ),
             termination_kwargs=termination_kwargs if termination_kwargs else dict(),
@@ -664,6 +626,5 @@ class NSFitter(BayesianModelFitter):
         return FitResult(
             self,
             inference_data,
-            self.model.params,
             background_model=self.background_model,
         )

jaxspec/model/_graph_util.py ADDED Viewed

@@ -0,0 +1,151 @@
+"""Helper functions to deal with the graph logic within model building"""
+import re
+from collections.abc import Callable
+from uuid import uuid4
+import networkx as nx
+def get_component_names(graph: nx.DiGraph):
+    """
+    Get the set of component names from the nodes of a graph.
+    Parameters:
+        graph: The graph to get the component names from.
+    """
+    return set(
+        data["name"] for _, data in graph.nodes(data=True) if "component" in data.get("type")
+    )
+def increment_name(name: str, used_names: set):
+    """
+    Increment the suffix number in a name if it is formated as 'name_1'.
+    Parameters:
+        name: The name to increment.
+        used_names: The set of names that are already used.
+    """
+    # Use regex to extract base name and suffix number
+    match = re.match(r"(.*?)(?:_(\d+))?$", name)
+    base_name = match.group(1)
+    suffix = match.group(2)
+    if suffix:
+        number = int(suffix)
+    else:
+        number = 1  # Start from 1 if there is no suffix
+    new_name = name
+    while new_name in used_names:
+        number += 1
+        new_name = f"{base_name}_{number}"
+    return new_name
+def compose_with_rename(graph_1: nx.DiGraph, graph_2: nx.DiGraph):
+    """
+    Compose two graphs by updating the 'name' attributes of nodes in graph_2,
+    and return the graph joined on the 'out' node.
+    Parameters:
+        graph_1: The first graph to compose.
+        graph_2: The second graph to compose.
+    """
+    # Initialize the set of used names with names from graph_1
+    used_names = get_component_names(graph_1)
+    # Update the 'name' attributes in graph_2 to make them unique
+    for node, data in graph_2.nodes(data=True):
+        if "component" in data.get("type"):
+            original_name = data["name"]
+            new_name = original_name
+            if new_name in used_names:
+                new_name = increment_name(original_name, used_names)
+                data["name"] = new_name
+                used_names.add(new_name)
+            else:
+                used_names.add(new_name)
+    # Now you can safely compose the graphs
+    composed_graph = nx.compose(graph_1, graph_2)
+    return composed_graph
+def compose(
+    graph_1: nx.DiGraph,
+    graph_2: nx.DiGraph,
+    operation: str = "",
+    operation_func: Callable = lambda x, y: None,
+):
+    """
+    Compose two graphs by joining the 'out' node of graph_1 and graph_2, and turning
+    it to an 'operation' node with the relevant operator and add a new 'out' node.
+    Parameters:
+        graph_1: The first graph to compose.
+        graph_2: The second graph to compose.
+        operation: The string describing the operation to perform.
+        operation_func: The callable that performs the operation.
+    """
+    combined_graph = compose_with_rename(graph_1, graph_2)
+    node_id = str(uuid4())
+    graph = nx.relabel_nodes(combined_graph, {"out": node_id})
+    nx.set_node_attributes(graph, {node_id: f"{operation}_operation"}, "type")
+    nx.set_node_attributes(graph, {node_id: operation_func}, "operator")
+    # Now add the output node and link it to the operation node
+    graph.add_node("out", type="out")
+    graph.add_edge(node_id, "out")
+    # Compute the new depth of each node
+    longest_path = nx.dag_longest_path_length(graph)
+    for node in graph.nodes:
+        nx.set_node_attributes(
+            graph,
+            {node: longest_path - nx.shortest_path_length(graph, node, "out")},
+            "depth",
+        )
+    return graph
+def export_to_mermaid(graph, file=None):
+    mermaid_code = "graph LR\n"  # LR = left to right
+    # Add nodes
+    for node, attributes in graph.nodes(data=True):
+        if attributes["type"] == "out":
+            mermaid_code += f'    {node}("Output")\n'
+        else:
+            operation_type, node_type = attributes["type"].split("_")
+            if node_type == "component":
+                name, number = attributes["name"].split("_")
+                mermaid_code += f'    {node}("{name.capitalize()} ({number})")\n'
+            elif node_type == "operation":
+                if operation_type == "add":
+                    mermaid_code += f"    {node}{{**+**}}\n"
+                elif operation_type == "mul":
+                    mermaid_code += f"    {node}{{**x**}}\n"
+    # Draw connexion between nodes
+    for source, target in graph.edges():
+        mermaid_code += f"    {source} --> {target}\n"
+    if file is None:
+        return mermaid_code
+    else:
+        with open(file, "w") as f:
+            f.write(mermaid_code)

jaxspec 0.1.3__py3-none-any.whl → 0.2.0__py3-none-any.whl

jaxspec 0.1.3py3-none-any.whl → 0.2.0py3-none-any.whl