PyPI - mintstate - Versions diffs - 0.1.0__py3-none-any.whl - Mend

mintstate 0.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

mintstate-0.1.0.dist-info/METADATA +371 -0
mintstate-0.1.0.dist-info/RECORD +33 -0
mintstate-0.1.0.dist-info/WHEEL +4 -0
stateMINT/__init__.py +0 -0
stateMINT/common/__init__.py +0 -0
stateMINT/common/dataclasses.py +11 -0
stateMINT/common/utils.py +74 -0
stateMINT/conf/export_config.yaml +25 -0
stateMINT/conf/sweeps/cases.yaml +46 -0
stateMINT/conf/sweeps/prevalence.yaml +46 -0
stateMINT/conf/target/cases.yaml +18 -0
stateMINT/conf/target/prevalence.yaml +18 -0
stateMINT/conf/train_config.yaml +46 -0
stateMINT/conf/viz_config.yaml +31 -0
stateMINT/data/__init__.py +21 -0
stateMINT/data/dataset.py +77 -0
stateMINT/data/features.py +104 -0
stateMINT/data/fetch.py +194 -0
stateMINT/data/preprocessing.py +504 -0
stateMINT/eval/__init__.py +0 -0
stateMINT/eval/metrics.py +180 -0
stateMINT/eval/viz_preds_truth.py +151 -0
stateMINT/filter_raw_data.py +45 -0
stateMINT/model/__init__.py +3 -0
stateMINT/model/hub.py +144 -0
stateMINT/model/mamba2.py +171 -0
stateMINT/model_export.py +90 -0
stateMINT/train.py +158 -0
stateMINT/training/__init__.py +0 -0
stateMINT/training/checkpoint.py +147 -0
stateMINT/training/loss.py +35 -0
stateMINT/training/train_step.py +167 -0
stateMINT/visualise_predictions.py +55 -0

mintstate-0.1.0.dist-info/METADATA ADDED Viewed

@@ -0,0 +1,371 @@
+Metadata-Version: 2.4
+Name: mintstate
+Version: 0.1.0
+Summary: StateMINT is a state space based neural network emulator for malariasimulation.
+Project-URL: Homepage, https://github.com/mrc-ide/stateMINT
+Project-URL: Repository, https://github.com/mrc-ide/stateMINT
+Project-URL: Weights, https://huggingface.co/dide-ic/stateMINT
+Author-email: Anmol Thapar <mr.anmolthapar@gmail.com>
+Requires-Python: >=3.12
+Requires-Dist: etils>=1.0
+Requires-Dist: flax>=0.12.7
+Requires-Dist: huggingface-hub>=1.19.0
+Requires-Dist: jax>=0.10.1
+Requires-Dist: jaxtyping>=0.3.10
+Requires-Dist: mamba2-jax>=1.0.1
+Requires-Dist: numpy>=2.4.6
+Requires-Dist: omegaconf>=2.3
+Requires-Dist: orbax-checkpoint>=0.12.0
+Requires-Dist: pandas>=3.0.3
+Provides-Extra: all
+Requires-Dist: duckdb>=1.5.3; extra == 'all'
+Requires-Dist: grain>=0.2.16; extra == 'all'
+Requires-Dist: hydra-core>=1.3.2; extra == 'all'
+Requires-Dist: jax[cuda12]>=0.10.1; extra == 'all'
+Requires-Dist: matplotlib>=3.10.9; extra == 'all'
+Requires-Dist: optax>=0.2.8; extra == 'all'
+Requires-Dist: tqdm>=4.67.3; extra == 'all'
+Requires-Dist: wandb>=0.27.0; extra == 'all'
+Provides-Extra: gpu
+Requires-Dist: jax[cuda12]>=0.10.1; extra == 'gpu'
+Provides-Extra: plot
+Requires-Dist: matplotlib>=3.10.9; extra == 'plot'
+Provides-Extra: train
+Requires-Dist: duckdb>=1.5.3; extra == 'train'
+Requires-Dist: grain>=0.2.16; extra == 'train'
+Requires-Dist: hydra-core>=1.3.2; extra == 'train'
+Requires-Dist: optax>=0.2.8; extra == 'train'
+Requires-Dist: tqdm>=4.67.3; extra == 'train'
+Requires-Dist: wandb>=0.27.0; extra == 'train'
+Description-Content-Type: text/markdown
+# StateMINT
+StateMINT is a JAX/Flax neural emulator for
+[`malariasimulation`](https://github.com/mrc-ide/malariasimulation) outputs. It
+uses a Mamba2 state-space sequence model to predict malaria trajectories from
+static scenario covariates and intervention timing features. It supersedes the
+earlier
+[`MINTelligence`](https://github.com/CosmoNaught/MINTelligence) RNN emulator.
+## What StateMINT Provides
+- Mamba2-based sequence regressors for malaria prevalence and case-count
+  trajectories.
+- Data extraction utilities for aggregating raw `malariasimulation` DuckDB
+  outputs into model-ready parquet files.
+- Preprocessing with target transforms, covariate scaling, and
+  intervention-aware feature construction.
+- Training, evaluation, visualization, checkpointing, and export workflows.
+- Hugging Face Hub loading utilities for exported inference artifacts.
+## Installation
+StateMINT requires Python 3.12 or newer and uses
+[`uv`](https://github.com/astral-sh/uv).
+```bash
+git clone https://github.com/mrc-ide/stateMINT.git
+cd stateMINT
+uv sync
+```
+For development dependencies and optional extras:
+```bash
+uv sync --all-extras --dev
+```
+Or install extras individually:
+```bash
+uv sync --extra plot
+uv sync --extra gpu
+```
+## Quick Start: Inference
+Load an exported artifact from the Hugging Face Hub or a local directory with
+`Mamba2Regressor.from_pretrained`.
+```python
+from stateMINT.model import Mamba2Regressor
+artifact = Mamba2Regressor.from_pretrained(
+    "dide-ic/stateMINT",
+    predictor="prevalence",
+    revision="v1.0.0",
+)
+static_covars = [{
+    "eir": 50.0,
+    "dn0_use": 0.3,
+    "dn0_future": 0.4,
+    "Q0": 0.8,
+    "phi_bednets": 0.7,
+    "seasonal": 1.0,
+    "routine": 0.5,
+    "itn_use": 0.2,
+    "irs_use": 0.1,
+    "itn_future": 0.3,
+    "irs_future": 0.2,
+    "lsm": 0.0,
+}]
+predicted_prevalence = artifact.predict(static_covars)
+print(predicted_prevalence.shape)  # (batch, timesteps)
+print(predicted_prevalence[0])     # first trajectory
+```
+For cases, load the cases artifact and use the same input format:
+```python
+artifact = Mamba2Regressor.from_pretrained(
+    "dide-ic/stateMINT",
+    predictor="cases",
+    revision="v1.0.0",
+)
+predicted_cases = artifact.predict(static_covars)
+```
+By default, predictions are returned on the original target scale: prevalence as
+probabilities and cases on the scale used by the training data. Pass
+`transformed=True` to return model-space outputs.
+```python
+raw_model_space = artifact.predict(static_covars, transformed=True)
+```
+For local artifacts, pass the target artifact directory:
+```python
+artifact = Mamba2Regressor.from_pretrained(
+    "artifacts/prevalence",
+    predictor="prevalence",
+)
+```
+## Static Covariates
+Inference inputs need one dictionary per scenario with these static covariates:
+```text
+eir
+dn0_use
+dn0_future
+Q0
+phi_bednets
+seasonal
+routine
+itn_use
+irs_use
+itn_future
+irs_future
+lsm
+```
+Artifacts include the fitted static scaler, timestep grid, intervention day,
+target transform, and other preprocessing metadata needed for inference.
+## Training Workflow
+Typical workflow:
+1. Fetch and aggregate simulation data from DuckDB.
+2. Train a target-specific model.
+3. Evaluate or visualize test-set predictions.
+4. Export the checkpoint into a portable inference artifact.
+5. Upload the artifact to the Hugging Face Hub, if needed.
+### 1. Fetch Filtered Data
+`stateMINT.filter_raw_data` reads raw DuckDB simulation rows, filters burn-in,
+aggregates fixed windows, and writes `filtered_data_<predictor>.parquet`.
+```bash
+uv run python -m stateMINT.filter_raw_data \
+  --db-path /path/to/simulations.duckdb \
+  --table-name simulation_results \
+  --predictor prevalence \
+  --window-size 14 \
+  --output-folder data
+```
+Useful options:
+- `--predictor prevalence` or `--predictor cases`
+- `--param-limit N` to keep only the first `N` parameter indices
+- `--sim-limit N` to sample up to `N` simulations per parameter
+The raw table should include identifiers (`parameter_index`, `simulation_index`,
+`global_index`), daily timesteps, the static covariates above, and output
+columns for prevalence or cases.
+### 2. Train a Model
+Training uses Hydra; the default target is prevalence.
+```bash
+uv run python -m stateMINT.train
+```
+Train the cases model:
+```bash
+uv run python -m stateMINT.train target=cases
+```
+Common overrides:
+```bash
+uv run python -m stateMINT.train \
+  target=prevalence \
+  data_file=data/filtered_data_prevalence.parquet \
+  output_dir=train_outputs/prevalence \
+  use_wandb=false
+```
+Training writes checkpoints under `checkpoint_dir`, saves
+`static_scaler.pkl` in `output_dir`, and reuses a split assignment file for
+consistent train/validation/test splits.
+### 3. W&B Sweeps
+Sweep definitions live in `stateMINT/conf/sweeps`. Create a sweep, then run one
+or more agents with the sweep ID returned by W&B:
+```bash
+uv run wandb sweep stateMINT/conf/sweeps/prevalence.yaml
+uv run wandb agent <entity>/stateMINT-sweep/<sweep-id>
+```
+Use `stateMINT/conf/sweeps/cases.yaml` for the cases target. Sweep commands set
+`use_wandb=true` and pass Hydra overrides through `${args_no_hyphens}`.
+### 4. Visualize Predictions
+Compare predictions with test-set targets. `checkpoint_dir` is required.
+```bash
+uv run python -m stateMINT.visualise_predictions \
+  target=prevalence \
+  checkpoint_dir=train_outputs/prevalence/ckpts-YYYY-MM-DDTHH:MM:SS \
+  data_file=data/filtered_data_prevalence.parquet
+```
+The default output path is `viz_outputs/<predictor>/preds-vs-targets.pdf`.
+### 5. Export an Artifact
+Export converts a trained Orbax checkpoint and preprocessing metadata into a
+self-contained artifact.
+```bash
+uv run python -m stateMINT.model_export \
+  predictor=prevalence \
+  checkpoint_dir=train_outputs/prevalence/ckpts-YYYY-MM-DDTHH:MM:SS \
+  scaler_file=train_outputs/prevalence/static_scaler.pkl \
+  artifact_dir=artifacts/prevalence
+```
+Export config architecture values must match the checkpoint, including
+`d_model`, `d_state`, `n_layers`, `dropout`, and related Mamba2 settings.
+An exported artifact contains:
+```text
+artifact_dir/
+|-- checkpoint/
+|-- model_config.json
+`-- preprocessing_config.json
+```
+`model_config.json` stores architecture settings; `preprocessing_config.json`
+stores feature order, target transform, intervention timing, timestep
+construction, and static scaler parameters.
+### 6. Upload To Hugging Face
+Authenticate first:
+```bash
+hf auth login
+```
+Upload an artifact:
+```bash
+hf upload dide-ic/stateMINT artifacts/prevalence prevalence/ \
+  --commit-message "Add prevalence model artifact"
+```
+Create a release tag:
+```bash
+hf repos tag create dide-ic/stateMINT v1.0.0 \
+  --revision main \
+  --message "Release v1.0.0"
+```
+## Configuration
+Main Hydra configs in `stateMINT/conf`:
+- `train_config.yaml` for training.
+- `viz_config.yaml` for prediction visualizations.
+- `export_config.yaml` for artifact export.
+- `target/prevalence.yaml` and `target/cases.yaml` for target-specific defaults.
+- `sweeps/*.yaml` for Weights & Biases sweep definitions.
+Select a target with `target=prevalence` or `target=cases`; override config
+values from the command line with Hydra syntax.
+## Development
+Run the test suite:
+```bash
+uv run pytest tests/
+```
+Skip slow or local-only tests:
+```bash
+uv run pytest tests/ -m "not slow"
+uv run pytest tests/ -m "not local"
+uv run pytest tests/ -m "not slow and not local" # skip both
+```
+Run linting and formatting:
+```bash
+uv run ruff check
+uv run ruff format
+```
+## Repository Layout
+```text
+stateMINT/
+|-- common/              # shared dataclasses, transforms, and model helpers
+|-- conf/                # Hydra configs for training, export, viz, and sweeps
+|-- data/                # DuckDB fetch, preprocessing, features, and loaders
+|-- eval/                # metrics and prediction/target visualization helpers
+|-- model/               # Mamba2 regressor and artifact loading utilities
+|-- training/            # optimizer, train/eval steps, loss, and checkpointing
+|-- filter_raw_data.py   # CLI for building filtered parquet datasets
+|-- train.py             # Hydra training entry point
+|-- visualise_predictions.py
+`-- model_export.py      # Hydra artifact export entry point
+tests/                   # unit tests
+artifacts/               # exported model artifact examples/metadata
+viz_outputs/             # generated prediction visualization outputs
+```
+## Contributing
+See [CONTRIBUTING.md](CONTRIBUTING.md) for contribution guidelines and the
+development workflow.

mintstate-0.1.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,33 @@
+stateMINT/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+stateMINT/filter_raw_data.py,sha256=KjOC7GeZ19lOl4i4PTqkC17cCCYdMPrrizfQftlwKLM,1623
+stateMINT/model_export.py,sha256=JynbP6gTHgV7ZrSDpWWf4l_lZbHl89zd5R_ZTaTupL4,2903
+stateMINT/train.py,sha256=eI_f2gjCXe-PYXZd1Gm_cv404GezX793Cbb5g0n859E,5696
+stateMINT/visualise_predictions.py,sha256=4MrwoutGevqHilSEAWToCchuxHcykZuPkFrX5d-bBlo,1853
+stateMINT/common/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+stateMINT/common/dataclasses.py,sha256=YYVby4nOZQehS5qiYngVkdomDL8BUkZQ6xoTa5gAr98,260
+stateMINT/common/utils.py,sha256=oLmz5HWAb5QwXP26_4WmeJ_AefBCW6wAT5pzQ7n0X7w,1748
+stateMINT/conf/export_config.yaml,sha256=aPmSsxJMJvZiigfjrpwavVNidl2tR9yJVJTcdMtoJmM,426
+stateMINT/conf/train_config.yaml,sha256=lszJtBir7GEvVKjlfFUeBzgtHnKnAUiJ5V7DFyuZgQQ,911
+stateMINT/conf/viz_config.yaml,sha256=q74YLMK7QWoBm_uXJTWZW2GcAx5AaMTjI_tLMD_YyrI,517
+stateMINT/conf/sweeps/cases.yaml,sha256=P2w1a3VDwP_6h7bBVRkt7DG9rUe4EDl--cfSeAbkhe0,699
+stateMINT/conf/sweeps/prevalence.yaml,sha256=cQNg8YqldLuzYwwUERjjQvZaK3EMfiOpoWDGY92k-Ig,711
+stateMINT/conf/target/cases.yaml,sha256=9LkY6jjAoC2tD1QObJlghTO0t1xl95Q3x9ypyTfgVh4,199
+stateMINT/conf/target/prevalence.yaml,sha256=nUKOpVnqutYpCuEchz5rhbVr_bZ6tpFZK_pPbBqBIao,198
+stateMINT/data/__init__.py,sha256=FXunf0wXzF1jDJUHo2AogDqvzmbryc7bLo9Mg6VjLmQ,403
+stateMINT/data/dataset.py,sha256=idTvwz0i3ualujXp-Bh8F67ROASNT1vr5ygHqf_RbDQ,1689
+stateMINT/data/features.py,sha256=W64e3PE4ZRM7ufLyPB4sJGWDnSpmpJZxkMNgWqfuQxc,2714
+stateMINT/data/fetch.py,sha256=MZelwGuOkGwx_5b0rKWryIoUJPNF7GgxoissA2Ebyvk,6110
+stateMINT/data/preprocessing.py,sha256=KBbH-lIMUgixac3YjfENBr5YagHvRy-uA7pzFJb3OFo,17428
+stateMINT/eval/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+stateMINT/eval/metrics.py,sha256=J79ANLzGGb3s_ARFExF4AsR0YRqPULQsKwVL0MmZKRk,4335
+stateMINT/eval/viz_preds_truth.py,sha256=CTDT58MkRZgxVRaZDUCTofCW6VOWZQtVPN07Od-bSzo,6104
+stateMINT/model/__init__.py,sha256=xL_8A4TghG_bYEYuU8qNl_ov5yFtB8WcvYzkPp239vY,67
+stateMINT/model/hub.py,sha256=OHveRm0qvXpuAd6f-4QOg7MTkO-7tJ3BGfC6npWSmEg,4802
+stateMINT/model/mamba2.py,sha256=IIo3VDuSq1UbcBnDWpuOCB-s6wCJMRhL7FOe_pMIAek,5581
+stateMINT/training/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+stateMINT/training/checkpoint.py,sha256=jKQmeP28aCY_C7koHL3P8OmQHsmDhVA0QjmL77ryO6A,4372
+stateMINT/training/loss.py,sha256=ZmP62C8i-xRB_pNMTGl4-LkT5esrtgwOmpJozbbL64Q,746
+stateMINT/training/train_step.py,sha256=CMYt2kcmpKXI5qgsk2oN52AKgr-emyPwjTj9OU70sPA,4469
+mintstate-0.1.0.dist-info/METADATA,sha256=Z_ZP7Okvm8OFnrYKB2iBlb1SAQVyNzm9mlQN_YQVmEE,10137
+mintstate-0.1.0.dist-info/WHEEL,sha256=mffPy8wBnZQn2VnJUU5jE99KsxaSfiyMHV9Yt0aLVxs,87
+mintstate-0.1.0.dist-info/RECORD,,

mintstate-0.1.0.dist-info/WHEEL ADDED Viewed

@@ -0,0 +1,4 @@
+Wheel-Version: 1.0
+Generator: hatchling 1.30.1
+Root-Is-Purelib: true
+Tag: py3-none-any

stateMINT/__init__.py ADDED Viewed

File without changes

stateMINT/common/__init__.py ADDED Viewed

File without changes

stateMINT/common/dataclasses.py ADDED Viewed

@@ -0,0 +1,11 @@
+from typing import Literal, Protocol
+from flax import nnx
+from omegaconf import DictConfig
+Predictor = Literal["prevalence", "cases"]
+class ModelFactory(Protocol):
+    @classmethod
+    def from_cfg(cls, cfg: DictConfig, input_size: int) -> nnx.Module: ...

stateMINT/common/utils.py ADDED Viewed

@@ -0,0 +1,74 @@
+import numpy as np
+import jax
+import jax.numpy as jnp
+from .dataclasses import Predictor
+from jaxtyping import Array
+from flax import nnx
+def transform_targets_np(y: np.ndarray, predictor: Predictor, eps: float = 1e-5) -> np.ndarray:
+    """
+    Apply train-time transform to targets.
+    Args:
+        y: Target values.
+        predictor: Target type.
+        eps: Prevalence clipping epsilon.
+    Returns:
+        Transformed target values.
+    """
+    if predictor == "prevalence":
+        y = np.clip(y, eps, 1.0 - eps)
+        return np.log(y / (1.0 - y))  # logit transform
+    else:
+        return np.log1p(np.maximum(y, 0.0))  # log1p transform for counts/rates
+def inverse_transform_np(y: np.ndarray, predictor: Predictor) -> np.ndarray:
+    """
+    Invert transform for metrics/plots.
+    Args:
+        y: Transformed target values.
+        predictor: Target type.
+    Returns:
+        Values in the original target scale.
+    """
+    if predictor == "prevalence":
+        return 1.0 / (1.0 + np.exp(-y))  # sigmoid
+    else:
+        return np.expm1(y)
+def inverse_transform_jax(y: jax.Array, predictor: Predictor) -> jax.Array:
+    """
+    Invert transformed targets with JAX.
+    Args:
+        y: Transformed target values.
+        predictor: Target type.
+    Returns:
+        Values in the original target scale.
+    """
+    if predictor == "prevalence":
+        return jax.nn.sigmoid(y)
+    else:
+        return jnp.expm1(y)
+@nnx.jit
+def forward(model: nnx.Module, x: Array) -> Array:
+    """
+    Forward pass through the model.
+    Args:
+        model: Model to evaluate.
+        x: Input batch.
+    Returns:
+        Model predictions with shape (B, T).
+    """
+    return model(x).squeeze(-1)  # (B, T, 1) -> (B, T)

stateMINT/conf/export_config.yaml ADDED Viewed

@@ -0,0 +1,25 @@
+predictor: prevalence
+# Data
+eps_prevalence: 1.0e-5
+scaler_file: train_outputs/${predictor}/static_scaler.pkl
+window_size: 14
+use_cyclical_time: true
+# Model - ensure these match the checkpointed model's parameters
+n_layers: 4
+d_conv: 4
+expand: 2
+head_dim: 64
+chunk_size: 256
+output_dim: 1
+d_model: 256
+d_state: 128
+dropout: 0.3
+# Checkpointing
+checkpoint_dir: ???
+# General
+seed: 42
+artifact_dir: "artifacts/${predictor}"

stateMINT/conf/sweeps/cases.yaml ADDED Viewed

@@ -0,0 +1,46 @@
+program: stateMINT/train.py
+project: stateMINT-sweep
+name: sweep-cases
+method: bayes
+metric:
+  goal: minimize
+  name: val/loss
+# early_terminate:
+#   type: hyperband
+#   min_iter: 10
+#   eta: 3
+parameters:
+  lr:
+    distribution: log_uniform_values
+    min: 0.001
+    max: 0.01
+  dropout:
+    distribution: uniform
+    min: 0.25
+    max: 0.6
+  batch_size:
+    values: [128]
+  # model-specific
+  d_state:
+    values: [128]
+  d_model:
+    values: [256]
+  n_layers:
+    values: [1, 2, 3, 4]
+command:
+  - ${env}
+  - python
+  - -m
+  - stateMINT.train
+  - "hydra.output_subdir=null"
+  - "hydra.run.dir=."
+  - ${args_no_hyphens}
+  - target=cases
+  - use_wandb=true
+  - num_epochs=300
+  - patience=100

stateMINT/conf/sweeps/prevalence.yaml ADDED Viewed

@@ -0,0 +1,46 @@
+program: stateMINT/train.py
+project: stateMINT-sweep
+name: sweep-prevalence
+method: bayes
+metric:
+  goal: minimize
+  name: val/loss
+# early_terminate:
+#   type: hyperband
+#   min_iter: 40
+#   eta: 3
+parameters:
+  lr:
+    distribution: log_uniform_values
+    min: 0.0001
+    max: 0.001
+  dropout:
+    distribution: uniform
+    min: 0.25
+    max: 0.6
+  batch_size:
+    values: [128]
+  # model-specific
+  d_state:
+    values: [128]
+  d_model:
+    values: [256]
+  n_layers:
+    values: [1, 2, 3, 4]
+command:
+  - ${env}
+  - python
+  - -m
+  - stateMINT.train
+  - "hydra.output_subdir=null"
+  - "hydra.run.dir=."
+  - ${args_no_hyphens}
+  - target=prevalence
+  - use_wandb=true
+  - num_epochs=300
+  - patience=100

stateMINT/conf/target/cases.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+# @package _global_
+predictor: cases
+# Data
+min_cases: 0.1
+ylabel: "Cases per 1000 per day"
+# Model
+n_layers: 4
+d_model: 256
+d_state: 128
+dropout: 0.3
+# Hyperparameters
+lr: 8e-3
+batch_size: 128

stateMINT/conf/target/prevalence.yaml ADDED Viewed

@@ -0,0 +1,18 @@
+# @package _global_
+predictor: prevalence
+# Data
+min_prevalence: 0.01
+ylabel: "Prevalence"
+# Model
+n_layers: 4
+d_model: 256
+d_state: 128
+dropout: 0.4
+# Hyperparameters
+lr: 4e-4
+batch_size: 128

stateMINT/conf/train_config.yaml ADDED Viewed

@@ -0,0 +1,46 @@
+defaults:
+  - _self_ # Load current and override with the following defaults
+  - target: prevalence
+# Data
+data_file: "data/filtered_data_${predictor}.parquet"
+split_file: "data/split_${predictor}.json"
+num_workers: 0
+use_existing_split: false
+eps_prevalence: 1.0e-5
+use_cyclical_time: true
+# Model
+loss_method: stateMINT.training.loss.weighted_mse
+n_layers: 4
+d_conv: 4
+expand: 2
+head_dim: 64
+chunk_size: 256
+output_dim: 1
+d_model: 256
+d_state: 128
+# Hyperparameters
+num_epochs: 200
+min_epochs: 100
+patience: 50
+diff_loss_alpha: 0.05
+lr: 1e-3
+batch_size: 128
+dropout: 0.3
+weight_decay: 1e-4
+# Checkpointing
+checkpoint_dir: "${output_dir}/ckpts-${cur_time}"
+restore_checkpoint: false
+max_checkpoints_to_keep: 1
+# General
+cur_time: ${now:%Y-%m-%dT%H:%M:%S}
+seed: 42
+use_wandb: false
+output_dir: "train_outputs/${predictor}"
+wandb:
+  project: "stateMINT-${predictor}"
+  name: "train-${predictor}-${cur_time}"