PyPI - onekit - Versions diffs - 1.2.0__tar.gz → 1.3.0__tar.gz - Mend

onekit 1.2.0tar.gz → 1.3.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

{onekit-1.2.0 → onekit-1.3.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: onekit
-Version: 1.2.0
+Version: 1.3.0
 Summary: All-in-One Python Kit.
 Home-page: https://github.com/estripling/onekit
 License: BSD 3-Clause

{onekit-1.2.0 → onekit-1.3.0}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "onekit"
-version = "1.2.0"
+version = "1.3.0"
 description = "All-in-One Python Kit."
 authors = ["Eugen Stripling <estripling042@gmail.com>"]
 license = "BSD 3-Clause"

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/mathkit.py RENAMED Viewed

@@ -82,7 +82,8 @@ def collatz(n: int, /) -> Generator:
         n = n // 2 if iseven(n) else 3 * n + 1
-def digitscale(x: Union[int, float], /) -> float:
+@toolz.curry
+def digitscale(x: Union[int, float], /, *, kind: str = "log") -> Union[int, float]:
     """Scale :math:`x` such that its mapped integer part is its number of digits.
     Given a number :math:`x \\in \\mathbb{R}`, the following function
@@ -102,8 +103,24 @@ def digitscale(x: Union[int, float], /) -> float:
     -----
     - :math:`\\lfloor \\cdot \\rfloor`: floor function
     - :math:`\\left[ \\, \\cdot \\, \\right]`: truncation function
-    - For any positive integer :math:`n`, the number of digits in :math:`n` is
-      :math:`1 + \\lfloor \\log_{10} n \\rfloor`
+    - For any positive integer :math:`k`, the number of digits in :math:`k` is
+      :math:`1 + \\lfloor \\log_{10} k \\rfloor`
+    - If `kind="int"`, returns :math:`\\lfloor f(x) \\rfloor`
+    - If `kind="linear"`, linear interpolation is performed:
+    .. math::
+        f_{linear}(x) =
+        \\begin{cases}
+            \\frac{y_{0} (x_{1} - x) + y_{1} (x - x_{0})}{x_{1} - x_{0}}
+              & \\text{ if } |x| \\ge 0.1 \\\\[6pt]
+            0 & \\text{ otherwise }
+        \\end{cases}
+        \\\\[6pt]
+        \\text{ with } n = \\lfloor f(x) \\rfloor, y_{0} = n, y_{1} = n + 1,
+        x_{0} = 10^{n - 1}, \\text{ and } x_{1} = 10^{n}
     See Also
     --------
@@ -121,8 +138,37 @@ def digitscale(x: Union[int, float], /) -> float:
     >>> list(map(mk.digitscale, [-0.5, -5, -50, -500]))
     [0.6989700043360187, 1.6989700043360187, 2.6989700043360187, 3.6989700043360187]
+    >>> # function is curried
+    >>> list(map(mk.digitscale(kind="int"), [-0.5, -5, -50, -500]))
+    [0, 1, 2, 3]
+    >>> list(map(mk.digitscale(kind="linear"), [0.1, 1, 10, 100, 1_000, 10_000]))
+    [0.0, 1.0, 2.0, 3.0, 4.0, 5.0]
+    >>> list(map(mk.digitscale(kind="linear"), [0.2, 2, 20, 200]))
+    [0.11111111111111112, 1.1111111111111112, 2.111111111111111, 3.111111111111111]
+    >>> list(map(mk.digitscale(kind="linear"), [-0.5, -5, -50, -500]))
+    [0.4444444444444445, 1.4444444444444444, 2.4444444444444446, 3.4444444444444446]
     """
-    return 1 + math.log10(abs(x)) if abs(x) >= 0.1 else 0.0
+    valid_kind = ["log", "int", "linear"]
+    x = abs(x)
+    fx = 1 + math.log10(x) if x >= 0.1 else 0.0
+    if kind == "log":
+        return fx
+    elif kind == "int":
+        return math.floor(fx)
+    elif kind == "linear":
+        n = math.floor(fx)
+        y0, y1 = n, n + 1
+        x0, x1 = 10 ** (n - 1), 10**n
+        return (y0 * (x1 - x) + y1 * (x - x0)) / (x1 - x0) if x >= 0.1 else 0.0
+    else:
+        raise ValueError(f"{kind=} - must be a valid value: {valid_kind}")
 def fibonacci() -> Generator:

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/numpykit.py RENAMED Viewed

@@ -52,7 +52,7 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
     return x
-def digitscale(x: ArrayLike, /) -> np.ndarray:
+def digitscale(x: ArrayLike, /, *, kind: str = "log") -> np.ndarray:
     """NumPy version of digitscale.
     See Also
@@ -63,10 +63,17 @@ def digitscale(x: ArrayLike, /) -> np.ndarray:
     Examples
     --------
     >>> import onekit.numpykit as npk
-    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000])
-    array([0., 1., 2., 3., 4., 5., 6., 7.])
+    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 2_000_000])
+    array([0.     , 1.     , 2.     , 3.     , 4.     , 5.     , 7.30103])
+    >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 2_000_000], kind="int")
+    array([0, 1, 2, 3, 4, 5, 6, 7])
+    >>> npk.digitscale([0.2, 2, 20], kind="linear")
+    array([0.11111111, 1.11111111, 2.11111111])
     """
-    return np.vectorize(mk.digitscale, otypes=[float])(x)
+    otypes = [int] if kind == "int" else [float]
+    return np.vectorize(mk.digitscale(kind=kind), otypes=otypes)(x)
 def stderr(x: ArrayLike, /) -> float:

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/pythonkit.py RENAMED Viewed

@@ -1,6 +1,5 @@
 import calendar
 import datetime as dt
-import distutils
 import functools
 import inspect
 import itertools
@@ -37,7 +36,6 @@ __all__ = (
     "coinflip",
     "concat_strings",
     "contrast_sets",
-    "create_path",
     "date_ago",
     "date_ahead",
     "date_count_backward",
@@ -390,21 +388,6 @@ def contrast_sets(x: set, y: set, /, *, n: int = 3) -> dict:
     return output
-def create_path(*strings: str) -> str:
-    """Create path by concatenating strings.
-    Examples
-    --------
-    >>> import onekit.pythonkit as pk
-    >>> pk.create_path("path", "to", "file")
-    'path/to/file'
-    >>> pk.create_path(["hdfs://", "path", "to", "file"])
-    'hdfs://path/to/file'
-    """
-    return functools.reduce(os.path.join, flatten(strings))
 @toolz.curry
 def date_ago(d0: dt.date, /, n: int) -> dt.date:
     """Compute date that is :math:`n \\in \\mathbb{N}_{0}` days ago.
@@ -683,13 +666,13 @@ def highlight_string_differences(lft_str: str, rgt_str: str, /) -> str:
     Examples
     --------
     >>> import onekit.pythonkit as pk
-    >>> print(pk.highlight_string_differences("hello", "hall"))
+    >>> print(pk.highlight_string_differences("hello", "hall"))  # doctest: +SKIP
     hello
      |  |
     hall
     >>> # no differences when there is no '|' character
-    >>> print(pk.highlight_string_differences("hello", "hello"))
+    >>> print(pk.highlight_string_differences("hello", "hello"))  # doctest: +SKIP
     hello
     <BLANKLINE>
     hello
@@ -699,7 +682,7 @@ def highlight_string_differences(lft_str: str, rgt_str: str, /) -> str:
         lft_str,
         concat_strings(
             "",
-            (
+            *(
                 " " if x == y else "|"
                 for x, y in itertools.zip_longest(lft_str, rgt_str, fillvalue="")
             ),
@@ -936,11 +919,31 @@ def prompt_yes_no(question: str, /, *, default: Optional[str] = None) -> bool:
     answer = input(f"{question} {prompt} ").lower()
+    def strtobool(value: str) -> bool:
+        """Convert a string representation of truth to true (1) or false (0).
+        True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values
+        are 'n', 'no', 'f', 'false', 'off', and '0'.  Raises ValueError if
+        'val' is anything else.
+        Notes
+        -----
+        - Shamelessly copied and modified from: distutils.util.strtobool
+        - distutils is not available with Python>=3.12
+        """
+        value = value.lower()
+        if value in ("y", "yes", "t", "true", "on", "1"):
+            return True
+        elif value in ("n", "no", "f", "false", "off", "0"):
+            return False
+        else:
+            raise ValueError("invalid truth value {!r}".format(value))
     while True:
         try:
             if answer == "" and default in ["yes", "no"]:
-                return bool(distutils.util.strtobool(default))
-            return bool(distutils.util.strtobool(answer))
+                return bool(strtobool(default))
+            return bool(strtobool(answer))
         except ValueError:
             response_text = "{} Please respond with 'yes' [{}] or 'no' [{}] ".format(

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/sparkkit.py RENAMED Viewed

@@ -34,6 +34,7 @@ __all__ = (
     "assert_row_equal",
     "assert_schema_equal",
     "bool_to_int",
+    "bool_to_str",
     "check_column_present",
     "count_nulls",
     "cvf",
@@ -46,6 +47,7 @@ __all__ = (
     "is_schema_equal",
     "join",
     "peek",
+    "select_col_types",
     "str_to_col",
     "union",
     "with_date_diff_ago",
@@ -475,12 +477,53 @@ def bool_to_int(df: SparkDF, /, *, subset=None) -> SparkDF:
     <BLANKLINE>
     """
     cols = subset or df.columns
-    bool_cols = [c for c in cols if isinstance(df.schema[c].dataType, T.BooleanType)]
+    bool_cols = [c for c in select_col_types(df, T.BooleanType) if c in cols]
     for bool_col in bool_cols:
         df = df.withColumn(bool_col, F.col(bool_col).cast(T.IntegerType()))
     return df
+@toolz.curry
+def bool_to_str(df: SparkDF, /, *, subset=None) -> SparkDF:
+    """Cast values of Boolean columns to string values.
+    Examples
+    --------
+    >>> from pyspark.sql import SparkSession
+    >>> import onekit.sparkkit as sk
+    >>> spark = SparkSession.builder.getOrCreate()
+    >>> df = spark.createDataFrame(
+    ...     [
+    ...         dict(x=True, y=False, z=None),
+    ...         dict(x=False, y=None, z=True),
+    ...         dict(x=True, y=None, z=None),
+    ...     ]
+    ... )
+    >>> sk.bool_to_str(df).show()
+    +-----+-----+----+
+    |    x|    y|   z|
+    +-----+-----+----+
+    | true|false|null|
+    |false| null|true|
+    | true| null|null|
+    +-----+-----+----+
+    <BLANKLINE>
+    >>> # function is curried
+    >>> df.transform(sk.bool_to_str(subset=["y", "z"])).printSchema()
+    root
+     |-- x: boolean (nullable = true)
+     |-- y: string (nullable = true)
+     |-- z: string (nullable = true)
+    <BLANKLINE>
+    """
+    cols = subset or df.columns
+    bool_cols = [c for c in select_col_types(df, T.BooleanType) if c in cols]
+    for bool_col in bool_cols:
+        df = df.withColumn(bool_col, F.col(bool_col).cast(T.StringType()))
+    return df
 def check_column_present(*cols: str) -> SparkDFTransformFunc:
     """Check if columns are present in dataframe.
@@ -1004,6 +1047,40 @@ def peek(
     return inner
+def select_col_types(df: SparkDF, /, *col_types: T.DataType) -> List[str]:
+    """Identify columns of specified data type.
+    Examples
+    --------
+    >>> from pyspark.sql import SparkSession
+    >>> from pyspark.sql import types as T
+    >>> import onekit.sparkkit as sk
+    >>> spark = SparkSession.builder.getOrCreate()
+    >>> df = spark.createDataFrame(
+    ...     [dict(bool=True, double=1.0, float=2.0, int=3, long=4, str="string")],
+    ...     schema=T.StructType(
+    ...         [
+    ...             T.StructField("bool", T.BooleanType(), nullable=True),
+    ...             T.StructField("double", T.DoubleType(), nullable=True),
+    ...             T.StructField("float", T.FloatType(), nullable=True),
+    ...             T.StructField("int", T.IntegerType(), nullable=True),
+    ...             T.StructField("long", T.LongType(), nullable=True),
+    ...             T.StructField("str", T.StringType(), nullable=True),
+    ...         ]
+    ...     ),
+    ... )
+    >>> sk.select_col_types(df, T.BooleanType)
+    ['bool']
+    >>> sk.select_col_types(df, T.IntegerType, T.LongType)
+    ['int', 'long']
+    """
+    col_types = tuple(pk.flatten(col_types))
+    if not all(isinstance(col_type, T.DataTypeSingleton) for col_type in col_types):
+        raise TypeError(f"{col_types=} - must be a data type of pyspark.sql.types")
+    return [c for c in df.columns if isinstance(df.schema[c].dataType, col_types)]
 def str_to_col(x: str, /) -> SparkCol:
     """Cast string ``x`` to Spark column else return ``x``.
@@ -1145,7 +1222,13 @@ def with_date_diff_ahead(
     return inner
-def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
+def with_digitscale(
+    num_col: str,
+    new_col: str,
+    /,
+    *,
+    kind: str = "log",
+) -> SparkDFTransformFunc:
     """PySpark version of digitscale.
     See Also
@@ -1168,33 +1251,95 @@ def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
     ...         dict(x=10_000.0),
     ...         dict(x=100_000.0),
     ...         dict(x=1_000_000.0),
+    ...         dict(x=2_000_000.0),
     ...         dict(x=None),
     ...     ],
     ... )
     >>> df.transform(sk.with_digitscale("x", "fx")).show()
+    +---------+-----------------+
+    |        x|               fx|
+    +---------+-----------------+
+    |      0.1|              0.0|
+    |      1.0|              1.0|
+    |     10.0|              2.0|
+    |    100.0|              3.0|
+    |   1000.0|              4.0|
+    |  10000.0|              5.0|
+    | 100000.0|              6.0|
+    |1000000.0|              7.0|
+    |2000000.0|7.301029995663981|
+    |     null|             null|
+    +---------+-----------------+
+    <BLANKLINE>
+    >>> df.transform(sk.with_digitscale("x", "fx", kind="int")).show()
     +---------+----+
     |        x|  fx|
     +---------+----+
-    |      0.1| 0.0|
-    |      1.0| 1.0|
-    |     10.0| 2.0|
-    |    100.0| 3.0|
-    |   1000.0| 4.0|
-    |  10000.0| 5.0|
-    | 100000.0| 6.0|
-    |1000000.0| 7.0|
+    |      0.1|   0|
+    |      1.0|   1|
+    |     10.0|   2|
+    |    100.0|   3|
+    |   1000.0|   4|
+    |  10000.0|   5|
+    | 100000.0|   6|
+    |1000000.0|   7|
+    |2000000.0|   7|
     |     null|null|
     +---------+----+
     <BLANKLINE>
+    >>> df.transform(sk.with_digitscale("x", "fx", kind="linear")).show()
+    +---------+-----------------+
+    |        x|               fx|
+    +---------+-----------------+
+    |      0.1|              0.0|
+    |      1.0|              1.0|
+    |     10.0|              2.0|
+    |    100.0|              3.0|
+    |   1000.0|              4.0|
+    |  10000.0|              5.0|
+    | 100000.0|              6.0|
+    |1000000.0|              7.0|
+    |2000000.0|7.111111111111111|
+    |     null|             null|
+    +---------+-----------------+
+    <BLANKLINE>
     """
+    valid_kind = ["log", "int", "linear"]
+    if kind not in valid_kind:
+        raise ValueError(f"{kind=} - must be a valid value: {valid_kind}")
     def inner(df: SparkDF, /) -> SparkDF:
         x = F.abs(num_col)
-        return df.withColumn(
+        df = df.withColumn(
             new_col,
             F.when(x.isNull(), None).when(x >= 0.1, 1 + F.log10(x)).otherwise(0.0),
         )
+        if kind == "int":
+            df = df.withColumn(new_col, F.floor(new_col).cast(T.IntegerType()))
+        if kind == "linear":
+            n = "_n_"
+            y0 = F.col(n)
+            y1 = F.col(n) + 1
+            x0 = 10 ** (F.col(n) - 1)
+            x1 = 10 ** F.col(n)
+            df = (
+                df.withColumn(n, F.floor(new_col).cast(T.IntegerType()))
+                .withColumn(
+                    new_col,
+                    F.when(x.isNull(), None)
+                    .when(x >= 0.1, (y0 * (x1 - x) + y1 * (x - x0)) / (x1 - x0))
+                    .otherwise(0.0),
+                )
+                .drop(n)
+            )
+        return df
     return inner

{onekit-1.2.0 → onekit-1.3.0}/LICENSE RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.3.0}/README.md RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/__init__.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/optfunckit.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/pandaskit.py RENAMED Viewed

File without changes

{onekit-1.2.0 → onekit-1.3.0}/src/onekit/vizkit.py RENAMED Viewed

File without changes

onekit 1.2.0__tar.gz → 1.3.0__tar.gz

onekit 1.2.0tar.gz → 1.3.0tar.gz