PyPI - onekit - Versions diffs - 1.2.0__tar.gz → 1.4.0__tar.gz - Mend

onekit 1.2.0tar.gz → 1.4.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{onekit-1.2.0 → onekit-1.4.0}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,13 @@
 Metadata-Version: 2.1
 Name: onekit
-Version: 1.2.0
+Version: 1.4.0
 Summary: All-in-One Python Kit.
 Home-page: https://github.com/estripling/onekit
 License: BSD 3-Clause
 Keywords: onekit
 Author: Eugen Stripling
 Author-email: estripling042@gmail.com
-Requires-Python: >=3.8.1
+Requires-Python: >=3.9
 Classifier: License :: Other/Proprietary License
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.9
@@ -15,7 +15,6 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3 :: Only
-Classifier: Programming Language :: Python :: 3.8
 Requires-Dist: pytz (>=2024.1,<2025.0)
 Requires-Dist: toolz (>=0.12.0,<0.13.0)
 Project-URL: Documentation, https://onekit.readthedocs.io/en/stable/
@@ -46,7 +45,7 @@ All-in-One Python Kit:
 ## Installation
-`onekit` is available on [PyPI](https://pypi.org/project/onekit/) for Python 3.8+:
+`onekit` is available on [PyPI](https://pypi.org/project/onekit/) for Python 3.9+:
 ```console
 pip install onekit

{onekit-1.2.0 → onekit-1.4.0}/README.md RENAMED Viewed

@@ -22,7 +22,7 @@ All-in-One Python Kit:
 ## Installation
-`onekit` is available on [PyPI](https://pypi.org/project/onekit/) for Python 3.8+:
+`onekit` is available on [PyPI](https://pypi.org/project/onekit/) for Python 3.9+:
 ```console
 pip install onekit

{onekit-1.2.0 → onekit-1.4.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "onekit"
-version = "1.2.0"
+version = "1.4.0"
 description = "All-in-One Python Kit."
 authors = ["Eugen Stripling <estripling042@gmail.com>"]
 license = "BSD 3-Clause"
@@ -10,7 +10,6 @@ documentation = "https://onekit.readthedocs.io/en/stable/"
 keywords = ["onekit"]
 classifiers = [
     "Programming Language :: Python :: 3 :: Only",
-    "Programming Language :: Python :: 3.8",
     "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
@@ -18,7 +17,7 @@ classifiers = [
 ]
 [tool.poetry.dependencies]
-python = ">=3.8.1"
+python = ">=3.9"
 toolz = "^0.12.0"
 pytz = "^2024.1"
@@ -50,6 +49,9 @@ python-semantic-release = "^8.3.0"
 [tool.poetry.group.pandaskit.dependencies]
 pandas = ">=0.23.2"
+[tool.poetry.group.sklearnkit.dependencies]
+scikit-learn = ">=1.3"
 [tool.poetry.group.sparkkit.dependencies]
 pyspark = "3.1.1"

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/mathkit.py RENAMED Viewed

@@ -82,7 +82,8 @@ def collatz(n: int, /) -> Generator:
         n = n // 2 if iseven(n) else 3 * n + 1
-def digitscale(x: Union[int, float], /) -> float:
+@toolz.curry
+def digitscale(x: Union[int, float], /, *, kind: str = "log") -> Union[int, float]:
     """Scale :math:`x` such that its mapped integer part is its number of digits.
     Given a number :math:`x \\in \\mathbb{R}`, the following function
@@ -102,8 +103,24 @@ def digitscale(x: Union[int, float], /) -> float:
     -----
     - :math:`\\lfloor \\cdot \\rfloor`: floor function
     - :math:`\\left[ \\, \\cdot \\, \\right]`: truncation function
-    - For any positive integer :math:`n`, the number of digits in :math:`n` is
-      :math:`1 + \\lfloor \\log_{10} n \\rfloor`
+    - For any positive integer :math:`k`, the number of digits in :math:`k` is
+      :math:`1 + \\lfloor \\log_{10} k \\rfloor`
+    - If `kind="int"`, returns :math:`\\lfloor f(x) \\rfloor`
+    - If `kind="linear"`, linear interpolation is performed:
+    .. math::
+        f_{linear}(x) =
+        \\begin{cases}
+            \\frac{y_{0} (x_{1} - x) + y_{1} (x - x_{0})}{x_{1} - x_{0}}
+              & \\text{ if } |x| \\ge 0.1 \\\\[6pt]
+            0 & \\text{ otherwise }
+        \\end{cases}
+        \\\\[6pt]
+        \\text{ with } n = \\lfloor f(x) \\rfloor, y_{0} = n, y_{1} = n + 1,
+        x_{0} = 10^{n - 1}, \\text{ and } x_{1} = 10^{n}
     See Also
     --------
@@ -121,8 +138,37 @@ def digitscale(x: Union[int, float], /) -> float:
     >>> list(map(mk.digitscale, [-0.5, -5, -50, -500]))
     [0.6989700043360187, 1.6989700043360187, 2.6989700043360187, 3.6989700043360187]
+    >>> # function is curried
+    >>> list(map(mk.digitscale(kind="int"), [-0.5, -5, -50, -500]))
+    [0, 1, 2, 3]
+    >>> list(map(mk.digitscale(kind="linear"), [0.1, 1, 10, 100, 1_000, 10_000]))
+    [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+    >>> list(map(mk.digitscale(kind="linear"), [0.2, 2, 20, 200]))
+    [0.11111111111111112, 1.1111111111111112, 2.111111111111111, 3.111111111111111]
+    >>> list(map(mk.digitscale(kind="linear"), [-0.5, -5, -50, -500]))
+    [0.4444444444444445, 1.4444444444444444, 2.4444444444444446, 3.4444444444444446]
     """
-    return 1 + math.log10(abs(x)) if abs(x) >= 0.1 else 0.0
+    valid_kind = ["log", "int", "linear"]
+    x = abs(x)
+    fx = 1 + math.log10(x) if x >= 0.1 else 0.0
+    if kind == "log":
+        return fx
+    elif kind == "int":
+        return math.floor(fx)
+    elif kind == "linear":
+        n = math.floor(fx)
+        y0, y1 = n, n + 1
+        x0, x1 = 10 ** (n - 1), 10**n
+        return (y0 * (x1 - x) + y1 * (x - x0)) / (x1 - x0) if x >= 0.1 else 0.0
+    else:
+        raise ValueError(f"{kind=} - must be a valid value: {valid_kind}")
 def fibonacci() -> Generator:

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/numpykit.py RENAMED Viewed

@@ -52,7 +52,7 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
     return x
-def digitscale(x: ArrayLike, /) -> np.ndarray:
+def digitscale(x: ArrayLike, /, *, kind: str = "log") -> np.ndarray:
     """NumPy version of digitscale.
     See Also
@@ -63,10 +63,17 @@ def digitscale(x: ArrayLike, /) -> np.ndarray:
     Examples
     --------
     >>> import onekit.numpykit as npk
-    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000])
-    array([0., 1., 2., 3., 4., 5., 6., 7.])
+    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 2_000_000])
+    array([0.     , 1.     , 2.     , 3.     , 4.     , 5.     , 7.30103])
+    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 2_000_000], kind="int")
+    array([0, 1, 2, 3, 4, 5, 6, 7])
+    >>> npk.digitscale([0.2, 2, 20], kind="linear")
+    array([0.11111111, 1.11111111, 2.11111111])
     """
-    return np.vectorize(mk.digitscale, otypes=[float])(x)
+    otypes = [int] if kind == "int" else [float]
+    return np.vectorize(mk.digitscale(kind=kind), otypes=otypes)(x)
 def stderr(x: ArrayLike, /) -> float:

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/pythonkit.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import calendar
 import datetime as dt
-import distutils
 import functools
 import inspect
 import itertools
@@ -37,7 +36,6 @@ __all__ = (
     "coinflip",
     "concat_strings",
     "contrast_sets",
-    "create_path",
     "date_ago",
     "date_ahead",
     "date_count_backward",
@@ -390,21 +388,6 @@ def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
     return output
-def create_path(*strings: str) -> str:
-    """Create path by concatenating strings.
-    Examples
-    --------
-    >>> import onekit.pythonkit as pk
-    >>> pk.create_path("path", "to", "file")
-    'path/to/file'
-    >>> pk.create_path(["hdfs://", "path", "to", "file"])
-    'hdfs://path/to/file'
-    """
-    return functools.reduce(os.path.join, flatten(strings))
 @toolz.curry
 def date_ago(d0: dt.date, /, n: int) -> dt.date:
     """Compute date that is :math:`n \\in \\mathbb{N}_{0}` days ago.
@@ -683,13 +666,13 @@ def highlight_string_differences(lft_str: str, rgt_str: str, /) -> str:
     Examples
     --------
     >>> import onekit.pythonkit as pk
-    >>> print(pk.highlight_string_differences("hello", "hall"))
+    >>> print(pk.highlight_string_differences("hello", "hall"))  # doctest: +SKIP
     hello
      |  |
     hall
     >>> # no differences when there is no '|' character
-    >>> print(pk.highlight_string_differences("hello", "hello"))
+    >>> print(pk.highlight_string_differences("hello", "hello"))  # doctest: +SKIP
     hello
     <BLANKLINE>
     hello
@@ -699,7 +682,7 @@ def highlight_string_differences(lft_str: str, rgt_str: str, /) -> str:
         lft_str,
         concat_strings(
             "",
-            (
+            *(
                 " " if x == y else "|"
                 for x, y in itertools.zip_longest(lft_str, rgt_str, fillvalue="")
             ),
@@ -936,11 +919,31 @@ def prompt_yes_no(question: str, /, *, default: Optional[str] = None) -> bool:
     answer = input(f"{question} {prompt} ").lower()
+    def strtobool(value: str) -> bool:
+        """Convert a string representation of truth to true (1) or false (0).
+        True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+        are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
+        'val' is anything else.
+        Notes
+        -----
+        - Shamelessly copied and modified from: distutils.util.strtobool
+        - distutils is not available with Python>=3.12
+        """
+        value = value.lower()
+        if value in ("y", "yes", "t", "true", "on", "1"):
+            return True
+        elif value in ("n", "no", "f", "false", "off", "0"):
+            return False
+        else:
+            raise ValueError("invalid truth value {!r}".format(value))
     while True:
         try:
             if answer == "" and default in ["yes", "no"]:
-                return bool(distutils.util.strtobool(default))
-            return bool(distutils.util.strtobool(answer))
+                return bool(strtobool(default))
+            return bool(strtobool(answer))
         except ValueError:
             response_text = "{} Please respond with 'yes' [{}] or 'no' [{}] ".format(

onekit-1.4.0/src/onekit/sklearnkit.py ADDED Viewed

@@ -0,0 +1,153 @@
+from typing import (
+    Optional,
+    Union,
+)
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+from pandas import DataFrame as PandasDF
+from sklearn import metrics
+from sklearn.utils import validation
+__all__ = (
+    "precision_given_recall_score",
+    "threshold_summary",
+)
+ArrayLike = npt.ArrayLike
+def precision_given_recall_score(
+    y_true: ArrayLike,
+    y_score: ArrayLike,
+    *,
+    min_recall: float,
+    pos_label: Optional[Union[int, str]] = None,
+) -> float:
+    """Compute precision given a desired minimum recall level.
+    Examples
+    --------
+    >>> import onekit.sklearnkit as slk
+    >>> y_true = [0, 1, 1, 1, 0, 0, 0, 1]
+    >>> y_score = [0.1, 0.4, 0.35, 0.8, 0.5, 0.2, 0.75, 0.5]
+    >>> slk.precision_given_recall_score(y_true, y_score, min_recall=0.7)
+    0.6
+    """
+    if not (0 < min_recall <= 1):
+        raise ValueError(f"{min_recall=} - must be a float in (0, 1]")
+    df = (
+        threshold_summary(y_true, y_score, pos_label=pos_label)
+        .filter(items=["precision", "recall"])
+        .query(f"recall >= {min_recall}")
+    )
+    min_empirical_recall = df["recall"].min()
+    return float(
+        0
+        if df.empty
+        else df.query(f"recall == {min_empirical_recall}")["precision"].max()
+    )
+def threshold_summary(
+    y_true: ArrayLike,
+    y_score: ArrayLike,
+    *,
+    pos_label: Optional[Union[int, str]] = None,
+) -> PandasDF:
+    """Threshold summary.
+    Notes
+    -----
+    - Support for binary classification only
+    - Assumpution: classifier returns scores
+    - First values correspond to the edge case where everything is predicted positive
+    - Last values correspond to the edge case where everything is predicted negative
+    Examples
+    --------
+    >>> import onekit.sklearnkit as slk
+    >>> y_true = [0, 1, 1, 1, 0, 0, 0, 1]
+    >>> y_score = [0.1, 0.4, 0.35, 0.8, 0.5, 0.2, 0.75, 0.5]
+    >>> with pd.option_context("display.float_format", "{:.2f}".format):
+    ...     slk.threshold_summary(y_true, y_score).T
+                          0    1    2    3    4    5    6    7
+    threshold          0.10 0.20 0.35 0.40 0.50 0.75 0.80  inf
+    predicted_positive 8.00 7.00 6.00 5.00 4.00 2.00 1.00 0.00
+    true_positive      4.00 4.00 4.00 3.00 2.00 1.00 1.00 0.00
+    false_positive     4.00 3.00 2.00 2.00 2.00 1.00 0.00 0.00
+    false_negative     0.00 0.00 0.00 1.00 2.00 3.00 3.00 4.00
+    true_negative      0.00 1.00 2.00 2.00 2.00 3.00 4.00 4.00
+    precision          0.50 0.57 0.67 0.60 0.50 0.50 1.00 1.00
+    recall             1.00 1.00 1.00 0.75 0.50 0.25 0.25 0.00
+    f1                 0.67 0.73 0.80 0.67 0.50 0.33 0.40 0.00
+    accuracy           0.50 0.62 0.75 0.62 0.50 0.50 0.62 0.50
+    balanced_accuracy  0.50 0.62 0.75 0.62 0.50 0.50 0.62 0.50
+    matthews_corrcoef   NaN 0.38 0.58 0.26 0.00 0.00 0.38  NaN
+    """
+    y = validation.column_or_1d(y_true)
+    s = validation.column_or_1d(y_score)
+    validation.check_consistent_length(y, s)
+    validation.assert_all_finite(y)
+    validation.assert_all_finite(s)
+    pos_label = validation._check_pos_label_consistency(pos_label, y)
+    precision, recall, thresholds = metrics.precision_recall_curve(
+        y_true=y,
+        y_score=s,
+        pos_label=pos_label,
+        sample_weight=None,
+        drop_intermediate=False,
+    )
+    is_true_pos = y == pos_label
+    is_true_neg = y != pos_label
+    def is_pred_pos(t: float) -> np.ndarray:
+        return s >= t
+    def is_pred_neg(t: float) -> np.ndarray:
+        return s < t
+    return (
+        pd.DataFrame(np.append(thresholds, np.inf), columns=["t"])
+        .assign(
+            pp=lambda df: df.t.map(lambda t: is_pred_pos(t).sum()),
+            tp=lambda df: df.t.map(lambda t: (is_pred_pos(t) & is_true_pos).sum()),
+            fp=lambda df: df.t.map(lambda t: (is_pred_pos(t) & is_true_neg).sum()),
+            fn=lambda df: df.t.map(lambda t: (is_pred_neg(t) & is_true_pos).sum()),
+            tn=lambda df: df.t.map(lambda t: (is_pred_neg(t) & is_true_neg).sum()),
+            precision=precision,
+            recall=recall,
+            f1=2 * (precision * recall) / (precision + recall),
+            acc=lambda df: (df.tp + df.tn) / (df.tp + df.tn + df.fp + df.fn),
+            bacc=lambda df: 0.5 * (df.tp / (df.tp + df.fn) + df.tn / (df.tn + df.fp)),
+            mcc=lambda df: np.true_divide(
+                (df.tp * df.tn - df.fp * df.fn),
+                np.sqrt(
+                    (df.tp + df.fp)
+                    * (df.tp + df.fn)
+                    * (df.tn + df.fp)
+                    * (df.tn + df.fn)
+                ),
+            ),
+        )
+        .rename(
+            columns={
+                "t": "threshold",
+                "pp": "predicted_positive",
+                "tp": "true_positive",
+                "fp": "false_positive",
+                "fn": "false_negative",
+                "tn": "true_negative",
+                "acc": "accuracy",
+                "bacc": "balanced_accuracy",
+                "mcc": "matthews_corrcoef",
+            },
+        )
+    )

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/sparkkit.py RENAMED Viewed

@@ -34,6 +34,7 @@ __all__ = (
     "assert_row_equal",
     "assert_schema_equal",
     "bool_to_int",
+    "bool_to_str",
     "check_column_present",
     "count_nulls",
     "cvf",
@@ -46,6 +47,7 @@ __all__ = (
     "is_schema_equal",
     "join",
     "peek",
+    "select_col_types",
     "str_to_col",
     "union",
     "with_date_diff_ago",
@@ -475,12 +477,53 @@ def bool_to_int(df: SparkDF, /, *, subset=None) -> SparkDF:
     <BLANKLINE>
     """
     cols = subset or df.columns
-    bool_cols = [c for c in cols if isinstance(df.schema[c].dataType, T.BooleanType)]
+    bool_cols = [c for c in select_col_types(df, T.BooleanType) if c in cols]
     for bool_col in bool_cols:
         df = df.withColumn(bool_col, F.col(bool_col).cast(T.IntegerType()))
     return df
+@toolz.curry
+def bool_to_str(df: SparkDF, /, *, subset=None) -> SparkDF:
+    """Cast values of Boolean columns to string values.
+    Examples
+    --------
+    >>> from pyspark.sql import SparkSession
+    >>> import onekit.sparkkit as sk
+    >>> spark = SparkSession.builder.getOrCreate()
+    >>> df = spark.createDataFrame(
+    ...     [
+    ...         dict(x=True, y=False, z=None),
+    ...         dict(x=False, y=None, z=True),
+    ...         dict(x=True, y=None, z=None),
+    ...     ]
+    ... )
+    >>> sk.bool_to_str(df).show()
+    +-----+-----+----+
+    |    x|    y|   z|
+    +-----+-----+----+
+    | true|false|null|
+    |false| null|true|
+    | true| null|null|
+    +-----+-----+----+
+    <BLANKLINE>
+    >>> # function is curried
+    >>> df.transform(sk.bool_to_str(subset=["y", "z"])).printSchema()
+    root
+     |-- x: boolean (nullable = true)
+     |-- y: string (nullable = true)
+     |-- z: string (nullable = true)
+    <BLANKLINE>
+    """
+    cols = subset or df.columns
+    bool_cols = [c for c in select_col_types(df, T.BooleanType) if c in cols]
+    for bool_col in bool_cols:
+        df = df.withColumn(bool_col, F.col(bool_col).cast(T.StringType()))
+    return df
 def check_column_present(*cols: str) -> SparkDFTransformFunc:
     """Check if columns are present in dataframe.
@@ -1004,6 +1047,42 @@ def peek(
     return inner
+def select_col_types(df: SparkDF, /, *col_types: T.DataType) -> List[str]:
+    """Identify columns of specified data type.
+    Examples
+    --------
+    >>> from pyspark.sql import SparkSession
+    >>> from pyspark.sql import types as T
+    >>> import onekit.sparkkit as sk
+    >>> spark = SparkSession.builder.getOrCreate()
+    >>> df = spark.createDataFrame(
+    ...     [dict(bool=True, double=1.0, float=2.0, int=3, long=4, str="string")],
+    ...     schema=T.StructType(
+    ...         [
+    ...             T.StructField("bool", T.BooleanType(), nullable=True),
+    ...             T.StructField("double", T.DoubleType(), nullable=True),
+    ...             T.StructField("float", T.FloatType(), nullable=True),
+    ...             T.StructField("int", T.IntegerType(), nullable=True),
+    ...             T.StructField("long", T.LongType(), nullable=True),
+    ...             T.StructField("str", T.StringType(), nullable=True),
+    ...         ]
+    ...     ),
+    ... )
+    >>> sk.select_col_types(df, T.BooleanType)
+    ['bool']
+    >>> sk.select_col_types(df, T.IntegerType, T.LongType)
+    ['int', 'long']
+    """
+    valid_types = {v.typeName() for k, v in T.__dict__.items() if k.endswith("Type")}
+    col_types = tuple(pk.flatten(col_types))
+    for col_type in col_types:
+        if not hasattr(col_type, "typeName") or col_type.typeName() not in valid_types:
+            raise TypeError(f"{col_type=} - must be a valid data type: {valid_types}")
+    return [c for c in df.columns if isinstance(df.schema[c].dataType, col_types)]
 def str_to_col(x: str, /) -> SparkCol:
     """Cast string ``x`` to Spark column else return ``x``.
@@ -1145,7 +1224,13 @@ def with_date_diff_ahead(
     return inner
-def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
+def with_digitscale(
+    num_col: str,
+    new_col: str,
+    /,
+    *,
+    kind: str = "log",
+) -> SparkDFTransformFunc:
     """PySpark version of digitscale.
     See Also
@@ -1168,33 +1253,95 @@ def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
     ...         dict(x=10_000.0),
     ...         dict(x=100_000.0),
     ...         dict(x=1_000_000.0),
+    ...         dict(x=2_000_000.0),
     ...         dict(x=None),
     ...     ],
     ... )
     >>> df.transform(sk.with_digitscale("x", "fx")).show()
+    +---------+-----------------+
+    |        x|               fx|
+    +---------+-----------------+
+    |      0.1|              0.0|
+    |      1.0|              1.0|
+    |     10.0|              2.0|
+    |    100.0|              3.0|
+    |   1000.0|              4.0|
+    |  10000.0|              5.0|
+    | 100000.0|              6.0|
+    |1000000.0|              7.0|
+    |2000000.0|7.301029995663981|
+    |     null|             null|
+    +---------+-----------------+
+    <BLANKLINE>
+    >>> df.transform(sk.with_digitscale("x", "fx", kind="int")).show()
     +---------+----+
     |        x|  fx|
     +---------+----+
-    |      0.1| 0.0|
-    |      1.0| 1.0|
-    |     10.0| 2.0|
-    |    100.0| 3.0|
-    |   1000.0| 4.0|
-    |  10000.0| 5.0|
-    | 100000.0| 6.0|
-    |1000000.0| 7.0|
+    |      0.1|   0|
+    |      1.0|   1|
+    |     10.0|   2|
+    |    100.0|   3|
+    |   1000.0|   4|
+    |  10000.0|   5|
+    | 100000.0|   6|
+    |1000000.0|   7|
+    |2000000.0|   7|
     |     null|null|
     +---------+----+
     <BLANKLINE>
+    >>> df.transform(sk.with_digitscale("x", "fx", kind="linear")).show()
+    +---------+-----------------+
+    |        x|               fx|
+    +---------+-----------------+
+    |      0.1|              0.0|
+    |      1.0|              1.0|
+    |     10.0|              2.0|
+    |    100.0|              3.0|
+    |   1000.0|              4.0|
+    |  10000.0|              5.0|
+    | 100000.0|              6.0|
+    |1000000.0|              7.0|
+    |2000000.0|7.111111111111111|
+    |     null|             null|
+    +---------+-----------------+
+    <BLANKLINE>
     """
+    valid_kind = ["log", "int", "linear"]
+    if kind not in valid_kind:
+        raise ValueError(f"{kind=} - must be a valid value: {valid_kind}")
     def inner(df: SparkDF, /) -> SparkDF:
         x = F.abs(num_col)
-        return df.withColumn(
+        df = df.withColumn(
             new_col,
             F.when(x.isNull(), None).when(x >= 0.1, 1 + F.log10(x)).otherwise(0.0),
         )
+        if kind == "int":
+            df = df.withColumn(new_col, F.floor(new_col).cast(T.IntegerType()))
+        if kind == "linear":
+            n = "_n_"
+            y0 = F.col(n)
+            y1 = F.col(n) + 1
+            x0 = 10 ** (F.col(n) - 1)
+            x1 = 10 ** F.col(n)
+            df = (
+                df.withColumn(n, F.floor(new_col).cast(T.IntegerType()))
+                .withColumn(
+                    new_col,
+                    F.when(x.isNull(), None)
+                    .when(x >= 0.1, (y0 * (x1 - x) + y1 * (x - x0)) / (x1 - x0))
+                    .otherwise(0.0),
+                )
+                .drop(n)
+            )
+        return df
     return inner

{onekit-1.2.0 → onekit-1.4.0}/LICENSE RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/__init__.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/optfunckit.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/pandaskit.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.4.0}/src/onekit/vizkit.py RENAMED Viewed

File without changes

onekit 1.2.0__tar.gz → 1.4.0__tar.gz

onekit 1.2.0tar.gz → 1.4.0tar.gz