PyPI - gensbi-examples - Versions diffs - 0.0.2__py3-none-any.whl - Mend

gensbi-examples 0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

gensbi_examples/__init__.py +0 -0
gensbi_examples/c2st.py +111 -0
gensbi_examples/c2st_v2.py.bk +147 -0
gensbi_examples/graph.py +211 -0
gensbi_examples/mask.py +80 -0
gensbi_examples/sbi_tasks.py.bk +417 -0
gensbi_examples/tasks.py +343 -0
gensbi_examples/utils.py +15 -0
gensbi_examples/utils.py.bk +56 -0
gensbi_examples-0.0.2.dist-info/METADATA +72 -0
gensbi_examples-0.0.2.dist-info/RECORD +13 -0
gensbi_examples-0.0.2.dist-info/WHEEL +4 -0
gensbi_examples-0.0.2.dist-info/licenses/LICENSE +13 -0

gensbi_examples/__init__.py ADDED Viewed

File without changes

gensbi_examples/c2st.py ADDED Viewed

@@ -0,0 +1,111 @@
+from typing import Optional
+import jax
+from jax import numpy as jnp
+from jax import Array
+import numpy as np
+from sklearn.model_selection import KFold, cross_val_score
+from sklearn.neural_network import MLPClassifier
+def c2st(
+    X: Array,
+    Y: Array,
+    seed: int = 1,
+    n_folds: int = 5,
+    scoring: str = "accuracy",
+    z_score: bool = True,
+    noise_scale: Optional[float] = None,
+) -> Array:
+    """Classifier-based 2-sample test returning accuracy
+    Trains classifiers with N-fold cross-validation [1]. Scikit learn MLPClassifier are
+    used, with 2 hidden layers of 10x dim each, where dim is the dimensionality of the
+    samples X and Y.
+    Args:
+        X: Sample 1
+        Y: Sample 2
+        seed: Seed for sklearn
+        n_folds: Number of folds
+        z_score: Z-scoring using X
+        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
+    References:
+        [1]: https://scikit-learn.org/stable/modules/cross_validation.html
+    """
+    if z_score:
+        X_mean = jnp.mean(X, axis=0)
+        X_std = jnp.std(X, axis=0)
+        X = (X - X_mean) / X_std
+        Y = (Y - X_mean) / X_std
+    if noise_scale is not None:
+        key = jax.random.PRNGKey(seed)
+        X += noise_scale * jax.random.normal(key, X.shape) * noise_scale
+        Y += noise_scale * jax.random.normal(key, Y.shape) * noise_scale
+    # Convert to numpy if not already
+    X = np.asarray(X)
+    Y = np.asarray(Y)
+    ndim = X.shape[1]
+    clf = MLPClassifier(
+        activation="relu",
+        hidden_layer_sizes=(10 * ndim, 10 * ndim),
+        max_iter=10000,
+        solver="adam",
+        random_state=seed,
+    )
+    data = np.concatenate((X, Y))
+    target = np.concatenate(
+        (
+            np.zeros((X.shape[0],)),
+            np.ones((Y.shape[0],)),
+        )
+    )
+    shuffle = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
+    scores = cross_val_score(clf, data, target, cv=shuffle, scoring=scoring)
+    scores = np.asarray(np.mean(scores)).astype(np.float32)
+    return scores
+def c2st_auc(
+    X: Array,
+    Y: Array,
+    seed: int = 1,
+    n_folds: int = 5,
+    z_score: bool = True,
+    noise_scale: Optional[float] = None,
+    ) -> Array:
+    """Classifier-based 2-sample test returning AUC (area under curve)
+    Same as c2st, except that it returns ROC AUC rather than accuracy
+    Args:
+        X: Sample 1
+        Y: Sample 2
+        seed: Seed for sklearn
+        n_folds: Number of folds
+        z_score: Z-scoring using X
+        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
+    Returns:
+        Metric
+    """
+    return c2st(
+        X,
+        Y,
+        seed=seed,
+        n_folds=n_folds,
+        scoring="roc_auc",
+        z_score=z_score,
+        noise_scale=noise_scale,
+    )

gensbi_examples/c2st_v2.py.bk ADDED Viewed

@@ -0,0 +1,147 @@
+from typing import Optional
+import jax
+import jax.numpy as jnp
+from jax import Array
+import numpy as np
+from sklearn.model_selection import KFold
+from flax import nnx
+import optax
+# Define MLP using flax.nnx
+class MLP(nnx.Module):
+    def __init__(self, in_dim, hidden_dim, *, rngs):
+        self.seq = nnx.Sequential([
+            nnx.Linear(in_dim, hidden_dim, rngs= rngs),
+            nnx.Relu(),
+            nnx.Linear(hidden_dim, hidden_dim, rngs=rngs),
+            nnx.Relu(),
+            nnx.Linear(hidden_dim, 2, rngs=rngs),
+        ])
+    def __call__(self, x):
+        return self.seq(x)
+def loss_fn(state, x, y):
+    logits = state.value(x)
+    labels = jax.nn.one_hot(y, 2)
+    loss = optax.softmax_cross_entropy(logits, labels).mean()
+    return loss
+def accuracy_fn(state, x, y):
+    logits = state.value(x)
+    preds = jnp.argmax(logits, axis=-1)
+    return (preds == y).mean()
+def c2st(
+    X: Array,
+    Y: Array,
+    seed: int = 1,
+    n_folds: int = 5,
+    z_score: bool = True,
+    noise_scale: Optional[float] = None,
+) -> Array:
+    """Classifier-based 2-sample test returning accuracy (using nnx for GPU training)
+    Trains classifiers with N-fold cross-validation [1]. nnx MLP is used, with 2 hidden layers of 10x dim each.
+    Args:
+        X: Sample 1
+        Y: Sample 2
+        seed: Seed for random number generation
+        n_folds: Number of folds
+        z_score: Z-scoring using X
+        noise_scale: If passed, will add Gaussian noise with std noise_scale to samples
+    References:
+        [1]: https://scikit-learn.org/stable/modules/cross_validation.html
+    """
+    rngs = nnx.Rngs(seed)
+    if z_score:
+        X_mean = jnp.mean(X, axis=0)
+        X_std = jnp.std(X, axis=0)
+        X = (X - X_mean) / X_std
+        Y = (Y - X_mean) / X_std
+    if noise_scale is not None:
+        key = jax.random.PRNGKey(seed)
+        X = X + noise_scale * jax.random.normal(key, X.shape)
+        Y = Y + noise_scale * jax.random.normal(key, Y.shape)
+    X = jnp.asarray(X)
+    Y = jnp.asarray(Y)
+    ndim = X.shape[1]
+    # Prepare data and targets
+    data = jnp.concatenate([X, Y], axis=0)
+    target = jnp.concatenate([
+        jnp.zeros((X.shape[0],), dtype=jnp.int32),
+        jnp.ones((Y.shape[0],), dtype=jnp.int32)
+    ], axis=0)
+    kf = KFold(n_splits=n_folds, shuffle=True, random_state=seed)
+    scores = []
+    for fold, (train_idx, test_idx) in enumerate(kf.split(data)):
+        x_train, y_train = data[train_idx], target[train_idx]
+        x_test, y_test = data[test_idx], target[test_idx]
+    for fold, (train_idx, test_idx) in enumerate(kf.split(data)):
+        # Model and optimizer
+        key = jax.random.PRNGKey(seed + fold)
+        model = MLP(ndim, 10 * ndim, rngs=rngs)
+        optimizer = nnx.Optimizer(model, optax.adam(1e-3))
+        @jax.jit
+        def train_step(optimizer, x, y):
+            def _loss_fn(model):
+                return loss_fn(model, x, y)
+            loss, grads = nnx.value_and_grad(_loss_fn)(optimizer.target)
+            optimizer.update(grads, value=loss)
+            return optimizer, loss
+        # Training loop
+        n_epochs = 100
+        batch_size = min(128, x_train.shape[0])
+        n_batches = int(jnp.ceil(x_train.shape[0] / batch_size))
+        for epoch in range(n_epochs):
+            perm = jax.random.permutation(key, x_train.shape[0])
+            x_train_shuffled = x_train[perm]
+            y_train_shuffled = y_train[perm]
+            for i in range(n_batches):
+                start = i * batch_size
+                end = min((i + 1) * batch_size, x_train.shape[0])
+                xb = x_train_shuffled[start:end]
+                yb = y_train_shuffled[start:end]
+                optimizer, _ = train_step(optimizer, xb, yb)
+        model = optimizer.target
+        score = float(accuracy_fn(model, x_test, y_test))
+        scores.append(score)
+    return np.asarray(np.mean(scores), dtype=np.float32)
+# def c2st_auc(
+#     X: Array,
+#     Y: Array,
+#     seed: int = 1,
+#     n_folds: int = 5,
+#     z_score: bool = True,
+#     noise_scale: Optional[float] = None,
+# ) -> Array:
+#     """Classifier-based 2-sample test returning AUC (area under curve)
+#     Same as c2st, except that it returns ROC AUC rather than accuracy
+#     """
+#     return c2st(
+#         X,
+#         Y,
+#         seed=seed,
+#         n_folds=n_folds,
+#         scoring="roc_auc",
+#         z_score=z_score,
+#         noise_scale=noise_scale,
+#     )

gensbi_examples/graph.py ADDED Viewed

@@ -0,0 +1,211 @@
+import jax
+import jax.numpy as jnp
+from functools import partial
+@jax.jit
+def find_ancestors_jax(mask, node):
+    """Find ancestors of a node in a graph.
+    Args:
+        mask (Array): Adjacency matrix of a directed graph.
+        node (int): Node of interest.
+    Returns:
+        _type_: _description_
+    """
+    num_nodes = mask.shape[0]
+    is_ancestor = jnp.zeros(num_nodes, dtype=jnp.bool_)
+    stack = jnp.empty(num_nodes, dtype=jnp.int32)
+    stack = stack.at[0].set(node)
+    def body_fn(carry, i):
+        is_ancestor, stack = carry
+        current_node = stack[i]
+        current_parents = mask[current_node, :]
+        def inner_body_fn(carry, j):
+            is_ancestor, stack = carry
+            value = current_parents[j]
+            cond = value & (j != current_node) & (~is_ancestor[j])
+            def true_fn(is_ancestor, stack):
+                is_ancestor = is_ancestor.at[j].set(True)
+                stack = stack.at[i+1].set(j)
+                return is_ancestor, stack
+            def false_fn(is_ancestor, stack):
+                return is_ancestor, stack
+            is_ancestor, stack = jax.lax.cond(cond, true_fn, false_fn, is_ancestor, stack)
+            return (is_ancestor, stack), None
+        (is_ancestor, stack), _ = jax.lax.scan(inner_body_fn, (is_ancestor, stack), jnp.arange(num_nodes))
+        return (is_ancestor, stack), None
+    (is_ancestor, stack), _ = jax.lax.scan(body_fn, (is_ancestor, stack), jnp.arange(num_nodes))
+    return is_ancestor
+@partial(jax.jit, static_argnums=(2,))
+def faithfull_mask(base_mask, condition_mask, conditioned_nodes="unchanged"):
+    """ Faithfull mask update for conditioning"""
+    graph = base_mask.astype(jnp.bool_).copy()
+    base_mask = base_mask.astype(jnp.bool_) # Rows are paraents, columns are children
+    condition_mask = condition_mask.astype(jnp.bool_)
+    num_nodes = base_mask.shape[0]
+    def body_fn(carry, i):
+        base_mask, condition_mask = carry
+        def condition_case(base_mask, condition_mask):
+            # We need to update all ancestors of i
+            is_ancestor = find_ancestors_jax(graph, i)
+            is_ancestor = is_ancestor & (~condition_mask)
+            all_ancestors = jnp.nonzero(is_ancestor, size=num_nodes, fill_value=i)[0]
+            # They will now depend on i
+            base_mask = base_mask.at[all_ancestors,i].set(True)
+            # They will now depend on each other!
+            base_mask = base_mask | (is_ancestor[:,None] & is_ancestor[None,:])
+            # The parents of all children of i will now depend on each other
+            children_of_i = base_mask[:,i]
+            parents_of_children_of_i = base_mask & (children_of_i[:,None] & ~children_of_i[None,:])
+            parents_of_children_of_i = jnp.any(parents_of_children_of_i, axis=0)
+            return base_mask, condition_mask
+        def uncondition_case(base_mask, condition_mask):
+            return base_mask, condition_mask
+        base_mask, condition_mask = jax.lax.cond(condition_mask[i], condition_case, uncondition_case, base_mask, condition_mask)
+        return (base_mask, condition_mask), None
+    (base_mask, condition_mask), _ = jax.lax.scan(body_fn, (base_mask, condition_mask), jnp.arange(num_nodes))
+    return base_mask
+@partial(jax.jit, static_argnums=(2,3))
+def min_faithfull_mask(mask, condition_mask, top_mode=0, conditioned_nodes="unchanged"):
+    """ Minimally faithfull mask update for conditioning"""
+    num_nodes = mask.shape[0]
+    I = moralize(mask)
+    H = jnp.zeros_like(mask, dtype=jnp.bool_)
+    # 0 is child, 1 is parent
+    UPSTREAM = top_mode
+    DOWNSTREAM = 1 - top_mode
+    num_parents_or_childs = jnp.sum(mask & (~condition_mask[None, :] & ~condition_mask[:, None]), axis=UPSTREAM)
+    #print(num_parents_or_childs)
+    S = (num_parents_or_childs == 1) & (~condition_mask) # Frontier set
+    M = jnp.zeros((num_nodes), dtype=jnp.bool_) # Marked nodes
+    def cond_fn(val):
+        S, _, _, _ = val
+        return jnp.any(S)
+    def body_fn(val):
+        S, M, I, H = val
+        #print(S)
+        # Find the node with the fewest edges added
+        v = min_fill_heuristic(mask, I, S,M, top_mode)
+        # print("Frontal set: ",S)
+        # print("Marked: ", M)
+        # print("Selected: ",v)
+        # Add edge in I between unmarked neighbours in I
+        neighbours_v = I[v,:] & (~M)
+        I = I | (neighbours_v[:,None] & neighbours_v[None,:])
+        # Make unmarked neighbours of v, the parents of v in H
+        H = H.at[v,:].set(neighbours_v)
+        # Remove v from S and mark it
+        S = S.at [v].set(False)
+        M = M.at[v].set(True)
+        if top_mode == 1:
+            u = mask[:,v] & (~M) # Not marked children
+            upstream_u = mask & (u[:, None]  & ~u[None,:]) # Parents of not marked children
+            all_upstream_u_marked = ~jnp.any(upstream_u & ~M, axis=1)
+        else:
+            u = mask[v,:] & (~M) # Not marked parents
+            upstream_u = mask & (u[None,:]  & ~u[:, None]) # Children of not marked parents
+            all_upstream_u_marked = ~jnp.any(upstream_u & ~M, axis=1)
+        S = S | (u & all_upstream_u_marked)
+        S = S & (~condition_mask)
+        return S, M, I, H
+    _,_,_, H = jax.lax.while_loop(cond_fn, body_fn, (S, M, I, H))
+    H = H | jnp.eye(num_nodes, dtype=jnp.bool_)
+    H = jax.lax.cond(jnp.any(condition_mask), lambda x: x, lambda x: mask, H)
+    # Conditioned nodes will keep the unconditional edges, hence each row of H where condition_mask is true should be equal to "mask"
+    if conditioned_nodes == "unchanged":
+        H = H & ~condition_mask[:, None] | mask & condition_mask[:, None]
+    elif conditioned_nodes == "removed":
+        H = H & ~condition_mask[:, None]
+    elif conditioned_nodes == "added":
+        H = H | condition_mask[:, None]
+    return H
+@partial(jax.jit, static_argnums=(4,))
+def min_fill_heuristic(G, I, S, M, top_mode=0):
+    """ Min-fill heuristic for finding a node to eliminate"""
+    # 0 is child, 1 is parent
+    UPSTREAM = top_mode
+    DOWNSTREAM = 1 - top_mode
+    # Find the number of edges that would be added if we eliminated each node
+    num_edges_added = I.sum(axis=DOWNSTREAM)
+    num_edges_added = S * num_edges_added + (~S) * (I.shape[0] + 1)
+    # Find the node that would add the fewest edges
+    # Additional constraint: Prefer marked parents
+    #print(num_edges_added)
+    min_val = jnp.min(num_edges_added)
+    marked_parents = -jnp.sum(M[None,:] & G, axis=DOWNSTREAM)
+    num_parents= marked_parents * (num_edges_added == min_val) + (I.shape[0] + 1) * (num_edges_added != min_val)
+    #node_to_eliminate = jnp.argmin(num_edges_added + num_parents)
+    #print(num_parents)
+    reversed_array = (num_parents)[::-1]
+    index = jnp.argmin(reversed_array)
+    node_to_eliminate = len(reversed_array) - 1 - index
+    return node_to_eliminate
+@jax.jit
+def moralize(adj_matrix):
+    adj_matrix = adj_matrix.astype(jnp.bool_)
+    # Make the graph undirected
+    undirected_graph = adj_matrix | adj_matrix.T
+    # Add edges between parents
+    undirected_graph = undirected_graph | (adj_matrix.T @ adj_matrix)
+    return undirected_graph
+def minimally_faithfull_mask(mask, condition_mask):
+    """ Minimally faithfull mask update for conditioning"""
+    I = moralize(mask)
+    H = jnp.zeros_like(mask, dtype=jnp.bool_)

gensbi_examples/mask.py ADDED Viewed

@@ -0,0 +1,80 @@
+import jax
+import jax.numpy as jnp
+import jax.random as jrandom
+from functools import partial
+def sample_random_conditional_mask(
+    key, num_samples, theta_dim, x_dim, alpha=1.0, beta=4.0
+):
+    # More likely to condition on a few nodes
+    key1, key2 = jax.random.split(key, 2)
+    condition_mask = jax.random.bernoulli(
+        key1,
+        jax.random.beta(key2, alpha, beta, shape=(num_samples, 1)),
+        shape=(num_samples, theta_dim + x_dim),
+    ).astype(jnp.bool_)
+    all_ones_mask = jnp.all(condition_mask, axis=-1)
+    # If all are ones, then set to false
+    condition_mask = jnp.where(all_ones_mask[..., None], False, condition_mask)
+    return condition_mask
+def joint_conditional_mask(key, num_samples, theta_dim, x_dim):
+    return jnp.array([[False] * (theta_dim + x_dim)]*num_samples)
+def posterior_conditional_mask(key, num_samples, theta_dim, x_dim):
+    return jnp.array([[False] * theta_dim + [True] * x_dim]*num_samples)
+def likelihood_conditional_mask(key, num_samples, theta_dim, x_dim):
+    return jnp.array([[True] * theta_dim + [False] * x_dim]*num_samples)
+def sample_structured_conditional_mask(
+    key,
+    num_samples,
+    theta_dim,
+    x_dim,
+    p_joint=0.2,
+    p_posterior=0.2,
+    p_likelihood=0.2,
+    p_rnd1=0.2,
+    p_rnd2=0.2,
+    rnd1_prob=0.3,
+    rnd2_prob=0.7,
+):
+    # Joint, posterior, likelihood, random1_mask, random2_mask
+    key1, key2, key3 = jax.random.split(key, 3)
+    joint_mask = jnp.array([False] * (theta_dim + x_dim), dtype=jnp.bool_)
+    posterior_mask = jnp.array([False] * theta_dim + [True] * x_dim, dtype=jnp.bool_)
+    likelihood_mask = jnp.array([True] * theta_dim + [False] * x_dim, dtype=jnp.bool_)
+    random1_mask = jax.random.bernoulli(key2, rnd1_prob, shape=(theta_dim + x_dim,)).astype(jnp.bool_)
+    random2_mask = jax.random.bernoulli(key3, rnd2_prob, shape=(theta_dim + x_dim,)).astype(jnp.bool_)
+    mask_options = jnp.stack([joint_mask, posterior_mask, likelihood_mask, random1_mask, random2_mask], axis=0)  # (5, theta_dim + x_dim)
+    idx = jax.random.choice(key1, 5, shape=(num_samples,), p=jnp.array([p_joint, p_posterior, p_likelihood, p_rnd1, p_rnd2]))
+    condition_mask = mask_options[idx]
+    all_ones_mask = jnp.all(condition_mask, axis=-1)
+    # If all are ones, then set to false
+    condition_mask = jnp.where(all_ones_mask[..., None], False, condition_mask)
+    return condition_mask
+def get_condition_mask_fn(name, **kwargs):
+    if name.lower() == "structured_random":
+        return partial(sample_structured_conditional_mask, **kwargs)
+    elif name.lower() == "random":
+        return partial(sample_random_conditional_mask, **kwargs)
+    elif name.lower() == "joint":
+        return partial(joint_conditional_mask, **kwargs)
+    elif name.lower() == "posterior":
+        return partial(posterior_conditional_mask, **kwargs)
+    elif name.lower() == "likelihood":
+        return partial(likelihood_conditional_mask, **kwargs)
+    else:
+        raise NotImplementedError()