PyPI - rock-physics-open - Versions diffs - 0.3.2__py3-none-any.whl - Mend

rock-physics-open 0.3.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (145) hide show

rock_physics_open/equinor_utilities/gen_utilities/dim_check_vector.py ADDED Viewed

@@ -0,0 +1,113 @@
+from typing import Any, overload
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+@overload
+def dim_check_vector(
+    args: list[Any] | tuple[Any, ...],
+    force_type: np.dtype | None = ...,
+) -> list[npt.NDArray[Any] | pd.DataFrame]:
+    """Overload for when the input is a list or tuple."""
+@overload
+def dim_check_vector(
+    args: pd.DataFrame,
+    force_type: np.dtype | None = ...,
+) -> pd.DataFrame:
+    """Overload for when the input is a pandas DataFrame."""
+@overload
+def dim_check_vector(
+    args: npt.NDArray[Any],
+    force_type: np.dtype | None = ...,
+) -> npt.NDArray[Any]:
+    """Overload for when the input is a numpy array."""
+def dim_check_vector(
+    args: list[Any] | tuple[Any, ...] | npt.NDArray[Any] | pd.DataFrame,
+    force_type: np.dtype | None = None,
+) -> npt.NDArray[Any] | pd.DataFrame | list[npt.NDArray[Any] | pd.DataFrame]:
+    """
+    Check that all inputs are of the same (one-dimensional) size. Raise ValueError in case there are several lengths
+    present in the inputs. All inputs will be checked and possibly expanded to common length. Only the first dimension
+    is harmonised.
+    Parameters
+    ----------
+    args : list or tuple
+        Input list or tuple of scalars, numpy arrays or pandas data frames of numerical or boolean type.
+    force_type : np.dtype
+        Force all outputs to be of a specific dtype.
+    Returns
+    -------
+    output_args : list
+        List of inputs where all are of the same length.
+    """
+    single_types = (np.ndarray, pd.DataFrame)
+    iterable_types = (list, tuple)
+    allowed_types = single_types + iterable_types
+    if not isinstance(args, allowed_types):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+        raise ValueError("dim_check_vector: unknown input type: {}".format(type(args)))  # pyright: ignore[reportUnreachable] | Kept for backward compatibility
+    # Single array or dataframe is just returned
+    if isinstance(args, single_types):
+        if force_type is not None:
+            try:
+                args = args.astype(force_type)
+            except ValueError:
+                raise ValueError(
+                    "dim_check_vector: not possible to force dtype to {}".format(
+                        force_type
+                    )
+                )
+        return args
+    # If any input is a scalar, make it into an array
+    if force_type is not None:
+        try:
+            args = [
+                np.array(item, ndmin=1, dtype=force_type)
+                if np.isscalar(item)
+                else item.astype(force_type)
+                for item in args
+            ]
+        except ValueError:
+            raise ValueError(
+                "dim_check_vector: not possible to force dtype to {}".format(force_type)
+            )
+    else:
+        args = [np.array(item, ndmin=1) if np.isscalar(item) else item for item in args]
+    # Can now test for length - must either be a scalar or have the same length
+    max_length: int = np.max([item.shape[0] for item in args])
+    if not np.all([item.shape[0] == max_length or item.shape[0] == 1 for item in args]):
+        raise ValueError(
+            "dim_check_vector: Unequal array lengths in input to dim_check_vector"
+        )
+    output_arg: list[npt.NDArray[Any] | pd.DataFrame] = []
+    for item in args:
+        if item.shape[0] == max_length:
+            output_arg.append(item)
+        else:
+            item_dim = item.ndim
+            repeat_tuple = tuple([max_length] + [1] * (item_dim - 1))
+            if isinstance(item, pd.DataFrame):
+                output_arg.append(
+                    pd.DataFrame(
+                        np.tile(np.array(item), repeat_tuple),
+                        columns=item.columns,
+                        index=np.arange(max_length),
+                    )
+                )
+            else:
+                output_arg.append(np.tile(item, repeat_tuple))
+    return output_arg

rock_physics_open/equinor_utilities/gen_utilities/filter_input.py ADDED Viewed

@@ -0,0 +1,131 @@
+from sys import byteorder
+from typing import Any
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+WRONG_BYTEORDER = ">" if byteorder == "little" else "<"
+def filter_input_log(
+    args: list[Any] | tuple[Any, ...] | npt.NDArray[Any] | pd.DataFrame,
+    working_int: npt.NDArray[Any] | None = None,
+    negative: bool = False,
+    no_zero: bool = False,
+    positive: bool = True,
+) -> tuple[npt.NDArray[np.bool_], list[npt.NDArray[Any] | pd.DataFrame]]:
+    """
+    Check for valid input values in numpy arrays or pandas data frames. Default behaviour is to
+    identify missing values - assumed to be NaN and Inf. Other conditions
+    can be stated in the key word arguments. Unknown conditions are ignored and a warning
+    is issued. Run dim_check_vector to make sure that all inputs have the same length.
+    Erroneous values in a sample in one log will remove the sample from all the logs.
+    All inputs must have the same array length (data frames the same number of indices).
+    Parameters
+    ----------
+    args : list or tuple or np.ndarray or pd.DataFrame
+        Inputs to be filtered, single array or dataframe or lists of arrays or data frames.
+    working_int : np.ndarray
+        Valid positions are shown as values > 0.
+    negative : bool
+        Positive values are excluded (zero values are retained).
+    no_zero : bool
+        Zero values are excluded.
+    positive : bool
+        Negative values are excluded.
+    Returns
+    -------
+    tuple
+        idx, output_args : (np.ndarray, list)
+        indices of valid values [bool],
+        list of input arrays at valid indices.
+    """
+    type_error = "filter_input_log: unknown input data type: {}".format(type(args))
+    size_error = "filter_input_log: inputs of different length"
+    if not isinstance(args, (list, tuple, np.ndarray, pd.DataFrame)):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+        raise ValueError(type_error)  # pyright: ignore[reportUnreachable] | Kept for backward compatibility
+    # Make sure that 'args' is iterable
+    if isinstance(args, (np.ndarray, pd.DataFrame)):
+        args = [args]
+    # Input tuple
+    if isinstance(args, tuple):
+        args = list(args)
+    # Need to preserve original inputs
+    input_args = args.copy()
+    # Test that inputs are of the right types and the same length
+    if not np.all([isinstance(log, (np.ndarray, pd.DataFrame)) for log in args]):
+        raise ValueError(type_error)
+    if not np.all([log.shape[0] == args[0].shape[0] for log in args]):
+        raise ValueError(size_error)
+    # Generate pandas series from numpy arrays
+    args = [pd.Series(log) if isinstance(log, np.ndarray) else log for log in args]
+    # Merge into a data frame
+    logs = pd.concat(args, axis=1)
+    # If any of the input logs are of type boolean, False means that they should not be included,
+    # regardless of filter flags
+    # https://github.com/pandas-dev/pandas/issues/32432
+    # idx = ~logs.any(bool_only=True, axis=1)
+    # Need to do it the cumbersome way for the time being
+    bool_col = logs.dtypes == "bool"
+    if any(bool_col):
+        idx = ~logs.loc[:, logs.columns[bool_col]].any(axis=1)
+        logs.drop(columns=logs.columns[bool_col], inplace=True)
+    else:
+        idx = pd.Series(index=logs.index, data=np.zeros_like(logs.index).astype(bool))
+    # Standard checks: NaN and Inf
+    idx = np.logical_or(idx, logs.isna().any(axis=1))
+    idx = np.logical_or(idx, logs.isin([np.inf, -np.inf]).any(axis=1))
+    # Remove columns with dtype that is not numeric
+    obj_col = [dd.kind not in ["i", "u", "f", "c"] for dd in logs.dtypes]
+    logs.drop(columns=logs.columns[obj_col], inplace=True)
+    # Checks according to the input options input_dict
+    # Only consider working interval if it is included or set to some value
+    if working_int is not None and not np.all(working_int == 0):
+        idx = np.logical_or(idx, working_int == 0)
+    if negative:
+        # noinspection PyTypeChecker
+        idx = np.logical_or(idx, (logs >= 0.0).all(axis=1))
+        # idx = np.logical_or(idx, logs.loc[logs > 0.0]).any(axis=1)
+    if no_zero:
+        idx = np.logical_or(idx, (logs == 0.0).any(axis=1))
+    if positive:
+        # noinspection PyTypeChecker
+        idx = np.logical_or(idx, (logs < 0.0).any(axis=1))
+    # Negate idx to identify samples to retain
+    idx = np.logical_not(idx)
+    num_valid_samples = idx.sum()
+    if num_valid_samples == 0:
+        raise ValueError("No acceptable input values")
+    for i in range(len(input_args)):
+        if isinstance(input_args[i], np.ndarray):
+            input_args[i] = input_args[i][idx]
+        else:  # data frame
+            # https://pandas.pydata.org/pandas-docs/stable/user_guide/gotchas.html#byte-ordering-issues
+            check_type = (
+                np.array([col_type.byteorder for col_type in input_args[i].dtypes])
+                == WRONG_BYTEORDER
+            )
+            if np.any(check_type):
+                tmp_array = (
+                    input_args[i].to_numpy().byteswap().newbyteorder().astype(float)
+                )
+                cols = input_args[i].columns
+                for j in range(check_type.shape[0]):
+                    if check_type[j]:
+                        input_args[i][cols[j]] = tmp_array[:, j]
+            input_args[i] = input_args[i].loc[idx]
+    return np.array(idx), input_args

rock_physics_open/equinor_utilities/gen_utilities/filter_output.py ADDED Viewed

@@ -0,0 +1,88 @@
+from typing import Any
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+def filter_output(
+    idx_inp: npt.NDArray[np.bool_],
+    inp_log: list[npt.NDArray[Any] | pd.DataFrame]
+    | tuple[npt.NDArray[Any] | pd.DataFrame, ...]
+    | npt.NDArray[Any]
+    | pd.DataFrame,
+) -> list[npt.NDArray[Any] | pd.DataFrame]:
+    """
+    Function to restore outputs from a plugin to original length and
+    with values at correct positions. The logs are assumed to go through
+    matching input filtering done by gen_utilities.filter_input_log earlier.
+    Parameters
+    ----------
+    idx_inp: np.ndarray
+        boolean array which is True at locations to be filled, length idx_inp is returned length of
+        arrays or data frames.
+    inp_log: tuple or list or np.ndarray or pd.DataFrame
+        input numpy array(s) or pandas data frame(s), in list or tuple that are to be expanded to original
+        length.
+    Returns
+    -------
+    return_logs : list
+        Expanded inputs.
+    """
+    def _expand_array(
+        idx: npt.NDArray[np.bool_], inp_single_log: npt.NDArray[Any]
+    ) -> npt.NDArray[Any]:
+        logs = np.ones(idx.shape, dtype=float) * np.nan
+        try:
+            logs[idx] = inp_single_log.flatten()
+        except ValueError:
+            # Assume that the dtype  of the input log is not fit for casting to float, set to object and retry
+            logs = logs.astype(object)
+            logs[idx] = inp_single_log
+        return logs.reshape(idx.shape)
+    def _expand_df(idx: npt.NDArray[np.bool_], inp_df: pd.DataFrame) -> pd.DataFrame:
+        logs = pd.DataFrame(
+            columns=inp_df.columns, index=np.arange(idx.shape[0], dtype=np.intp)
+        )
+        logs.loc[idx] = inp_df
+        return logs
+    if not isinstance(inp_log, (list, tuple, np.ndarray, pd.DataFrame)):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+        raise ValueError(  # pyright: ignore[reportUnreachable] | Kept for backward compatibility
+            "filter_output: unknown input data type: {}".format(type(inp_log))
+        )
+    if not isinstance(idx_inp, (list, np.ndarray)):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+        raise ValueError(  # pyright: ignore[reportUnreachable] | Kept for backward compatibility
+            "filter_output: unknown filter array data type: {}".format(type(idx_inp))
+        )
+    # Make iterable in case of single input
+    if isinstance(inp_log, (np.ndarray, pd.DataFrame)):
+        inp_log = [inp_log]
+    if isinstance(idx_inp, np.ndarray):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+        idx_inp_ = [idx_inp]
+    # Possible to simplify?
+    if len(idx_inp_) != len(inp_log):
+        if len(idx_inp_) == 1:
+            idx_inp_ = idx_inp_ * len(inp_log)
+        else:
+            raise ValueError(
+                "filter_output: mismatch between length of filter arrays and inputs: {} and {}".format(
+                    len(idx_inp), len(inp_log)
+                )
+            )
+    return_logs: list[npt.NDArray[Any] | pd.DataFrame] = []
+    for this_idx, this_log in zip(idx_inp_, inp_log):
+        if isinstance(this_log, np.ndarray):
+            return_logs.append(_expand_array(this_idx, this_log))
+        elif isinstance(this_log, pd.DataFrame):  # pyright: ignore[reportUnnecessaryIsInstance] | Kept for backward compatibility
+            return_logs.append(_expand_df(this_idx, this_log))
+    return return_logs

rock_physics_open/equinor_utilities/machine_learning_utilities/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .dummy_vars import generate_dummy_vars
+from .exponential_model import ExponentialPressureModel
+from .import_ml_models import import_model
+from .polynomial_model import PolynomialPressureModel
+from .run_regression import run_regression
+from .sigmoidal_model import SigmoidalPressureModel
+__all__ = [
+    "generate_dummy_vars",
+    "import_model",
+    "run_regression",
+    "ExponentialPressureModel",
+    "PolynomialPressureModel",
+    "SigmoidalPressureModel",
+]

rock_physics_open/equinor_utilities/machine_learning_utilities/base_pressure_model.py ADDED Viewed

@@ -0,0 +1,170 @@
+import pickle
+from abc import ABC, abstractmethod
+from typing import Any, Self
+import numpy as np
+class BasePressureModel(ABC):
+    """
+    Abstract base class for pressure sensitivity models.
+    All pressure models follow the convention:
+    - predict(): returns differential change (depleted - in_situ)
+    - predict_abs(): returns absolute values for specified case
+    - predict_max(): uses model_max_pressure instead of depleted pressure
+    Input validation is delegated to concrete implementations since
+    each model has different column requirements.
+    """
+    def __init__(self, model_max_pressure: float | None = None, description: str = ""):
+        """
+        Initialize base pressure model.
+        Parameters
+        ----------
+        model_max_pressure : float | None
+            Maximum pressure for predict_max method. Required for predict_max to work.
+        description : str
+            Human-readable description of the model instance.
+        """
+        self._model_max_pressure: float | None = model_max_pressure
+        self._description: str = description
+    @property
+    def max_pressure(self) -> float | None:
+        """Maximum pressure setting for predict_max method."""
+        return self._model_max_pressure
+    @property
+    def description(self) -> str:
+        """Model description."""
+        return self._description
+    def predict(self, inp_arr: np.ndarray) -> np.ndarray:
+        """
+        Predict differential change: result(depleted) - result(in_situ).
+        Parameters
+        ----------
+        inp_arr : np.ndarray
+            Input array with pressure columns and other model-specific parameters.
+        Returns
+        -------
+        np.ndarray
+            Differential change values.
+        """
+        arr = self.validate_input(inp_arr)
+        return self.predict_abs(arr, case="depleted") - self.predict_abs(
+            arr, case="in_situ"
+        )
+    def predict_max(self, inp_arr: np.ndarray) -> np.ndarray:
+        """
+        Predict using model_max_pressure instead of depleted pressure.
+        Parameters
+        ----------
+        inp_arr : np.ndarray
+            Input array where last column (depleted pressure) will be replaced.
+        Returns
+        -------
+        np.ndarray
+            Values at model_max_pressure minus values at in_situ pressure.
+        Raises
+        ------
+        ValueError
+            If model_max_pressure is not set.
+        """
+        if self._model_max_pressure is None:
+            raise ValueError('Field "model_max_pressure" is not set')
+        arr = self.validate_input(inp_arr).copy()
+        # Replace last column (assumed to be depleted pressure) with max pressure
+        arr[:, -1] = self._model_max_pressure
+        return self.predict_abs(arr, case="depleted") - self.predict_abs(
+            arr, case="in_situ"
+        )
+    @abstractmethod
+    def validate_input(self, inp_arr: np.ndarray) -> np.ndarray:
+        """
+        Validate input array format for this specific model.
+        Parameters
+        ----------
+        inp_arr : np.ndarray
+            Input array to validate.
+        Returns
+        -------
+        np.ndarray
+            Validated input array.
+        Raises
+        ------
+        ValueError
+            If input format is invalid for this model.
+        """
+    @abstractmethod
+    def predict_abs(self, inp_arr: np.ndarray, case: str = "in_situ") -> np.ndarray:
+        """
+        Predict absolute values for specified pressure case.
+        Parameters
+        ----------
+        inp_arr : np.ndarray
+            Validated input array.
+        case : str
+            Either "in_situ" or "depleted" to specify which pressure to use.
+        Returns
+        -------
+        np.ndarray
+            Absolute predicted values.
+        """
+    @abstractmethod
+    def todict(self) -> dict[str, Any]:
+        """
+        Convert model to dictionary for serialization.
+        Returns
+        -------
+        dict[str, Any]
+            Dictionary containing all model parameters.
+        """
+    def save(self, file: str | bytes) -> None:
+        """
+        Save model to pickle file.
+        Parameters
+        ----------
+        file : str | bytes
+            File path for saving.
+        """
+        with open(file, "wb") as f_out:
+            pickle.dump(self.todict(), f_out)
+    @classmethod
+    @abstractmethod
+    def load(cls, file: str | bytes) -> Self:
+        """
+        Load model from pickle file.
+        Parameters
+        ----------
+        file : str | bytes
+            File path for loading.
+        Returns
+        -------
+        BasePressureModel
+            Loaded model instance.
+        """

rock_physics_open/equinor_utilities/machine_learning_utilities/dummy_vars.py ADDED Viewed

@@ -0,0 +1,53 @@
+from typing import cast
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from pandas.api.types import is_numeric_dtype
+from sklearn.preprocessing import OneHotEncoder
+def generate_dummy_vars(
+    inp_frame: pd.DataFrame,
+    class_var: str,
+    ohe: OneHotEncoder | None = None,
+) -> tuple[npt.NDArray[np.float64], int, npt.NDArray[np.str_]]:
+    """
+    From categorical variables generate a one-hot-encoder, i.e. each value in the categorical variable becomes a binary
+    variable. See sklearn.preprocessing.OneHotEncoder.
+    Parameters
+    ----------
+    inp_frame : pd.DataFrame
+        Input data containing categorical variables.
+    class_var : str
+        Name of categorical variable.
+    ohe : preprocessing.OneHotEncoder
+        One-hot-encoder object.
+    Returns
+    -------
+    dum_features, no_dummy_cols, dum_var_names : (np.ndarray, int, np.ndarray)
+        dum_features: 2D array with transformed dummy variables, no_dummy_cols: number of columns in returned array,
+        dum_var_names: automatically generated feature names.
+    """
+    if is_numeric_dtype(inp_frame[class_var]):
+        # Make sure that the chosen indicator variable contains discrete values
+        inp_frame = inp_frame.astype({class_var: "int32"})
+    features_in = np.array(inp_frame[class_var]).reshape(-1, 1)
+    if ohe is None:
+        classes = features_in
+        ohe = OneHotEncoder(categories="auto", sparse_output=False)
+        _ = ohe.fit(classes)
+    dum_features = cast(  # Casting since scikit-learn is not yet fully typed. `.transform` returns sparse matrix only if `sparse_output=True`.
+        npt.NDArray[np.float64],
+        ohe.transform(features_in),
+    )
+    no_dummy_cols = dum_features.shape[1]
+    dum_var_names = ohe.get_feature_names_out()
+    return dum_features, no_dummy_cols, dum_var_names