PyPI - onekit - Versions diffs - 2.2.2__tar.gz → 3.0.1__tar.gz - Mend

onekit 2.2.2tar.gz → 3.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

{onekit-2.2.2 → onekit-3.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: onekit
-Version: 2.2.2
+Version: 3.0.1
 Summary: All-in-One Python Kit.
 License: BSD 3-Clause
 Keywords: onekit

{onekit-2.2.2 → onekit-3.0.1}/pyproject.toml RENAMED Viewed

@@ -22,7 +22,7 @@ requires-python = ">=3.11"
 dependencies = []
 [tool.poetry]
-version = "2.2.2"
+version = "3.0.1"
 [project.optional-dependencies]
 base = [

onekit-3.0.1/src/onekit/exception.py ADDED Viewed

@@ -0,0 +1,142 @@
+import os
+from typing import (
+    Any,
+    Iterable,
+)
+from pyspark.sql import DataFrame as SparkDF
+from onekit import pythonkit as pk
+__all__ = (
+    "ColumnNotFoundError",
+    "InvalidChoiceError",
+    "OnekitError",
+    "RowCountMismatchError",
+    "RowValueMismatchError",
+    "SchemaMismatchError",
+)
+class OnekitError(Exception):
+    """A base class for onekit exceptions."""
+class ColumnNotFoundError(OnekitError):
+    """Exception for missing columns in dataframe.
+    See Also
+    --------
+    check_column_present : Validate column presence.
+    has_column : Evaluate column presence.
+    Examples
+    --------
+    >>> from onekit.exception import ColumnNotFoundError
+    >>> error = ColumnNotFoundError(missing_cols=["a", "b", "c"])
+    >>> error.message
+    "following columns not found: ['a', 'b', 'c']"
+    """
+    def __init__(self, missing_cols: Iterable[str]):
+        self.missing_cols = missing_cols
+        self.message = f"following columns not found: {missing_cols}"
+        super().__init__(self.message)
+class InvalidChoiceError(OnekitError):
+    """Exception for invalid choice error.
+    Examples
+    --------
+    >>> from onekit.exception import InvalidChoiceError
+    >>> x = 0
+    >>> error = InvalidChoiceError(value=x, choices=[1, 2, 3])
+    >>> error.message
+    'x=0 invalid choice - choose from [1, 2, 3]'
+    """
+    def __init__(self, value: Any, choices: Iterable[Any] | None = None):
+        self.value = value
+        self.choices = choices
+        msg = f"{pk.parent_varname(value)}={value} invalid choice"
+        if choices is not None:
+            msg += f" - choose from {choices}"
+        self.message = msg
+        super().__init__(self.message)
+class RowCountMismatchError(OnekitError):
+    """Exception for mismatch of row counts.
+    See Also
+    --------
+    assert_row_count_equal : Validate row counts.
+    is_row_count_equal : Evaluate row counts.
+    Examples
+    --------
+    >>> from onekit.exception import RowCountMismatchError
+    >>> error = RowCountMismatchError(num_lft=10000, num_rgt=12000)
+    >>> error.message
+    'num_lft=10_000, num_rgt=12_000, num_diff=2_000'
+    """
+    def __init__(self, num_lft: int, num_rgt: int):
+        num_diff = abs(num_lft - num_rgt)
+        self.num_lft = num_lft
+        self.num_rgt = num_rgt
+        self.num_diff = num_diff
+        self.message = pk.concat_strings(
+            ", ",
+            f"num_lft={pk.num_to_str(num_lft)}",
+            f"num_rgt={pk.num_to_str(num_rgt)}",
+            f"num_diff={pk.num_to_str(num_diff)}",
+        )
+        super().__init__(self.message)
+class RowValueMismatchError(OnekitError):
+    """Exception for mismatch of row values.
+    See Also
+    --------
+    assert_row_value_equal : Validate row values.
+    is_row_value_equal : Evaluate row values.
+    """
+    def __init__(
+        self,
+        lft_rows: SparkDF,
+        rgt_rows: SparkDF,
+        num_lft: int,
+        num_rgt: int,
+    ):
+        self.lft_rows = lft_rows
+        self.rgt_rows = rgt_rows
+        self.num_lft = num_lft
+        self.num_rgt = num_rgt
+        self.message = pk.concat_strings(
+            ", ",
+            f"num_lft={pk.num_to_str(num_lft)}",
+            f"num_rgt={pk.num_to_str(num_rgt)}",
+        )
+        super().__init__(self.message)
+class SchemaMismatchError(OnekitError):
+    """Exception for mismatch of schemas.
+    See Also
+    --------
+    assert_schema_equal : Validate schemas.
+    is_schema_equal : Evaluate schemas.
+    """
+    def __init__(self, lft_schema: str, rgt_schema: str):
+        self.lft_schema = lft_schema
+        self.rgt_schema = rgt_schema
+        msg = pk.highlight_string_differences(lft_schema, rgt_schema)
+        num_diff = sum(c == "|" for c in msg.splitlines()[1])
+        self.message = pk.concat_strings(os.linesep, f"{num_diff=}", msg)
+        super().__init__(self.message)

{onekit-2.2.2 → onekit-3.0.1}/src/onekit/pandaskit.py RENAMED Viewed

@@ -209,7 +209,7 @@ def profile(df: PandasDF, /, *, q: list[int] | None = None) -> PandasDF:
     q95               NaN        1.0     NaN
     max               NaN        1.0     NaN
     """
-    n_rows, _ = df.shape
+    num_rows, _ = df.shape
     quantiles = q or (5, 25, 50, 75, 95)
     basic_info_df = pd.concat(
@@ -220,12 +220,12 @@ def profile(df: PandasDF, /, *, q: list[int] | None = None) -> PandasDF:
                 df.isnull()
                 .sum()
                 .to_frame("isnull")
-                .assign(isnull_pct=lambda df: 100 * df["isnull"] / n_rows)
+                .assign(isnull_pct=lambda df: 100 * df["isnull"] / num_rows)
             ),
             (
                 df.nunique()
                 .to_frame("unique")
-                .assign(unique_pct=lambda df: 100 * df["unique"] / n_rows)
+                .assign(unique_pct=lambda df: 100 * df["unique"] / num_rows)
             ),
         ],
         axis=1,

{onekit-2.2.2 → onekit-3.0.1}/src/onekit/pythonkit.py RENAMED Viewed

@@ -60,6 +60,7 @@ __all__ = (
     "num_days",
     "num_to_str",
     "op",
+    "parent_varname",
     "prompt_yes_no",
     "reduce_sets",
     "remove_punctuation",
@@ -958,6 +959,23 @@ def op(func: Callable, const: Any, /) -> Callable[[Any], Any]:
     return inner
+def parent_varname(x: Any, /) -> str:
+    """Returns the name of the parent variable of :math:`x`.
+    Examples
+    --------
+    >>> from onekit import pythonkit as pk
+    >>> my_var = "my_string_value"
+    >>> def f(x) -> str:
+    ...     return pk.parent_varname(x)
+    ...
+    >>> f(my_var)
+    'my_var'
+    """
+    variables = inspect.currentframe().f_back.f_back.f_locals.items()
+    return [name for name, value in variables if value is x][0]
 def prompt_yes_no(question: str, /, *, default: str | None = None) -> bool:
     """Prompt yes-no question.

{onekit-2.2.2 → onekit-3.0.1}/src/onekit/sparkkit.py RENAMED Viewed

@@ -1,7 +1,6 @@
 import datetime as dt
 import functools
 import math
-import os
 from typing import (
     Any,
     Iterable,
@@ -26,7 +25,7 @@ __all__ = (
     "any_col",
     "assert_dataframe_equal",
     "assert_row_count_equal",
-    "assert_row_equal",
+    "assert_row_value_equal",
     "assert_schema_equal",
     "bool_to_int",
     "bool_to_str",
@@ -38,7 +37,7 @@ __all__ = (
     "has_column",
     "is_dataframe_equal",
     "is_row_count_equal",
-    "is_row_equal",
+    "is_row_value_equal",
     "is_schema_equal",
     "join",
     "peek",
@@ -55,54 +54,13 @@ __all__ = (
     "with_weekday",
 )
-class SparkkitError(Exception):
-    """A base class for sparkkit exceptions."""
-class ColumnNotFoundError(SparkkitError):
-    """Exception if columns are not found in dataframe."""
-    def __init__(self, missing_cols: list[str]):
-        self.missing_cols = missing_cols
-        self.message = f"following columns not found: {missing_cols}"
-        super().__init__(self.message)
-class RowCountMismatchError(SparkkitError):
-    """Exception if row counts mismatch."""
-    def __init__(self, n_lft: int, n_rgt: int):
-        n_diff = abs(n_lft - n_rgt)
-        self.n_lft = n_lft
-        self.n_rgt = n_rgt
-        self.n_diff = n_diff
-        self.message = f"{n_lft=:_}, {n_rgt=:_}, {n_diff=:_}"
-        super().__init__(self.message)
-class RowMismatchError(SparkkitError):
-    """Exception if rows mismatch."""
-    def __init__(self, lft_rows: SparkDF, rgt_rows: SparkDF, n_lft: int, n_rgt: int):
-        self.lft_rows = lft_rows
-        self.rgt_rows = rgt_rows
-        self.n_lft = n_lft
-        self.n_rgt = n_rgt
-        self.message = f"{n_lft=:_}, {n_rgt=:_}"
-        super().__init__(self.message)
-class SchemaMismatchError(SparkkitError):
-    """Exception if schemas mismatch."""
-    def __init__(self, lft_schema: str, rgt_schema: str):
-        self.lft_schema = lft_schema
-        self.rgt_schema = rgt_schema
-        msg = pk.highlight_string_differences(lft_schema, rgt_schema)
-        n_diff = sum(c == "|" for c in msg.splitlines()[1])
-        self.message = pk.concat_strings(os.linesep, f"{n_diff=}", msg)
-        super().__init__(self.message)
+from onekit.exception import (
+    ColumnNotFoundError,
+    OnekitError,
+    RowCountMismatchError,
+    RowValueMismatchError,
+    SchemaMismatchError,
+)
 def add_prefix(df: SparkDF, prefix: str, subset: list[str] | None = None) -> SparkDF:
@@ -246,19 +204,24 @@ def assert_dataframe_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
         If schemas are not equal.
     RowCountMismatchError
         If row counts are not equal.
-    RowMismatchError
-        If rows are not equal.
+    RowValueMismatchError
+        If row values are not equal.
     See Also
     --------
     assert_schema_equal : Validate schemas.
     assert_row_count_equal : Validate row counts.
-    assert_row_equal : Validate rows.
+    assert_row_value_equal : Validate row values.
     Examples
     --------
     >>> from pyspark.sql import Row, SparkSession
     >>> from onekit import sparkkit as sk
+    >>> from onekit.exception import (
+    ...     SchemaMismatchError,
+    ...     RowCountMismatchError,
+    ...     RowValueMismatchError,
+    ... )
     >>> spark = SparkSession.builder.getOrCreate()
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
@@ -269,10 +232,10 @@ def assert_dataframe_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     >>> rgt_df = spark.createDataFrame([Row(z=1, y="a", x=9), Row(z=3, y="b", x=8)])
     >>> try:
     ...     sk.assert_dataframe_equal(lft_df, rgt_df)
-    ... except sk.SchemaMismatchError as error:
+    ... except SchemaMismatchError as error:
     ...     print(error)
     ...
-    n_diff=15
+    num_diff=15
     struct<x:bigint,y:bigint>
            |          |||  |||||||||||
     struct<z:bigint,y:string,x:bigint>
@@ -281,23 +244,23 @@ def assert_dataframe_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     >>> rgt_df = spark.createDataFrame([Row(x=3, y=4), Row(x=5, y=6)])
     >>> try:
     ...     sk.assert_dataframe_equal(lft_df, rgt_df)
-    ... except sk.RowCountMismatchError as error:
+    ... except RowCountMismatchError as error:
     ...     print(error)
     ...
-    n_lft=1, n_rgt=2, n_diff=1
+    num_lft=1, num_rgt=2, num_diff=1
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4), Row(x=5, y=6)])
     >>> rgt_df = spark.createDataFrame([Row(x=3, y=4), Row(x=5, y=9), Row(x=7, y=8)])
     >>> try:
     ...     sk.assert_dataframe_equal(lft_df, rgt_df)
-    ... except sk.RowMismatchError as error:
+    ... except RowValueMismatchError as error:
     ...     print(error)
     ...
-    n_lft=2, n_rgt=2
+    num_lft=2, num_rgt=2
     """
     assert_schema_equal(lft_df, rgt_df)
     assert_row_count_equal(lft_df, rgt_df)
-    assert_row_equal(lft_df, rgt_df)
+    assert_row_value_equal(lft_df, rgt_df)
 def assert_row_count_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
@@ -316,6 +279,7 @@ def assert_row_count_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     --------
     >>> from pyspark.sql import Row, SparkSession
     >>> from onekit import sparkkit as sk
+    >>> from onekit.exception import RowCountMismatchError
     >>> spark = SparkSession.builder.getOrCreate()
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
@@ -326,25 +290,25 @@ def assert_row_count_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     >>> rgt_df = spark.createDataFrame([Row(x=1)])
     >>> try:
     ...     sk.assert_row_count_equal(lft_df, rgt_df)
-    ... except sk.RowCountMismatchError as error:
+    ... except RowCountMismatchError as error:
     ...     print(error)
     ...
-    n_lft=2, n_rgt=1, n_diff=1
+    num_lft=2, num_rgt=1, num_diff=1
     """
-    n_lft = lft_df.count()
-    n_rgt = rgt_df.count()
+    num_lft = lft_df.count()
+    num_rgt = rgt_df.count()
-    if n_lft != n_rgt:
-        raise RowCountMismatchError(n_lft, n_rgt)
+    if num_lft != num_rgt:
+        raise RowCountMismatchError(num_lft, num_rgt)
-def assert_row_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
-    """Validate rows of both dataframes are equal.
+def assert_row_value_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
+    """Validate row values of both dataframes are equal.
     Raises
     ------
-    RowMismatchError
-        If rows are not equal.
+    RowValueMismatchError
+        If row values are not equal.
     See Also
     --------
@@ -354,31 +318,32 @@ def assert_row_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     --------
     >>> from pyspark.sql import Row, SparkSession
     >>> from onekit import sparkkit as sk
+    >>> from onekit.exception import RowValueMismatchError
     >>> spark = SparkSession.builder.getOrCreate()
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
-    >>> sk.assert_row_equal(lft_df, rgt_df) is None
+    >>> sk.assert_row_value_equal(lft_df, rgt_df) is None
     True
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=3, y=4), Row(x=5, y=6), Row(x=7, y=8)])
     >>> try:
-    ...     sk.assert_row_equal(lft_df, rgt_df)
-    ... except sk.RowMismatchError as error:
+    ...     sk.assert_row_value_equal(lft_df, rgt_df)
+    ... except RowValueMismatchError as error:
     ...     print(error)
     ...
-    n_lft=1, n_rgt=2
+    num_lft=1, num_rgt=2
     """
     lft_rows = lft_df.subtract(rgt_df)
     rgt_rows = rgt_df.subtract(lft_df)
-    n_lft = lft_rows.count()
-    n_rgt = rgt_rows.count()
+    num_lft = lft_rows.count()
+    num_rgt = rgt_rows.count()
-    is_equal = (n_lft == 0) and (n_rgt == 0)
+    is_equal = (num_lft == 0) and (num_rgt == 0)
     if not is_equal:
-        raise RowMismatchError(lft_rows, rgt_rows, n_lft, n_rgt)
+        raise RowValueMismatchError(lft_rows, rgt_rows, num_lft, num_rgt)
 def assert_schema_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
@@ -386,7 +351,7 @@ def assert_schema_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     Raises
     ------
-    sk.SchemaMismatchError
+    SchemaMismatchError
         If schemas are not equal.
     See Also
@@ -397,6 +362,7 @@ def assert_schema_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     --------
     >>> from pyspark.sql import Row, SparkSession
     >>> from onekit import sparkkit as sk
+    >>> from onekit.exception import SchemaMismatchError
     >>> spark = SparkSession.builder.getOrCreate()
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
@@ -407,10 +373,10 @@ def assert_schema_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> None:
     >>> rgt_df = spark.createDataFrame([Row(x=1), Row(x=3)])
     >>> try:
     ...     sk.assert_schema_equal(lft_df, rgt_df)
-    ... except sk.SchemaMismatchError as error:
+    ... except SchemaMismatchError as error:
     ...     print(error)
     ...
-    n_diff=10
+    num_diff=10
     struct<x:bigint,y:bigint>
                    ||||||||||
     struct<x:bigint>
@@ -509,13 +475,14 @@ def check_column_present(df: SparkDF, *cols: str | Iterable[str]) -> SparkDF:
     Raises
     ------
-    sk.ColumnNotFoundError
+    ColumnNotFoundError
         If columns are not found in dataframe.
     Examples
     --------
     >>> from pyspark.sql import Row, SparkSession
     >>> from onekit import sparkkit as sk
+    >>> from onekit.exception import ColumnNotFoundError
     >>> spark = SparkSession.builder.getOrCreate()
     >>> df = spark.createDataFrame([Row(x=1), Row(x=2), Row(x=3)])
     >>> sk.check_column_present(df, "x").show()
@@ -530,7 +497,7 @@ def check_column_present(df: SparkDF, *cols: str | Iterable[str]) -> SparkDF:
     >>> try:
     ...     sk.check_column_present(df, "y").show()
-    ... except sk.ColumnNotFoundError as error:
+    ... except ColumnNotFoundError as error:
     ...     print(error)
     ...
     following columns not found: ['y']
@@ -773,7 +740,7 @@ def is_dataframe_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
     --------
     is_schema_equal : Evaluate schemas.
     is_row_count_equal : Evaluate row counts.
-    is_row_equal : Evaluate rows.
+    is_row_value_equal : Evaluate row values.
     Examples
     --------
@@ -803,9 +770,9 @@ def is_dataframe_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
     try:
         assert_schema_equal(lft_df, rgt_df)
         assert_row_count_equal(lft_df, rgt_df)
-        assert_row_equal(lft_df, rgt_df)
+        assert_row_value_equal(lft_df, rgt_df)
         return True
-    except SparkkitError:
+    except OnekitError:
         return False
@@ -838,7 +805,7 @@ def is_row_count_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
         return False
-def is_row_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
+def is_row_value_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
     """Evaluate if rows of both dataframes are equal.
     See Also
@@ -852,18 +819,18 @@ def is_row_equal(lft_df: SparkDF, rgt_df: SparkDF, /) -> bool:
     >>> spark = SparkSession.builder.getOrCreate()
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
-    >>> sk.is_row_equal(lft_df, rgt_df)
+    >>> sk.is_row_value_equal(lft_df, rgt_df)
     True
     >>> lft_df = spark.createDataFrame([Row(x=1, y=2), Row(x=3, y=4)])
     >>> rgt_df = spark.createDataFrame([Row(x=3, y=4), Row(x=5, y=6), Row(x=7, y=8)])
-    >>> sk.is_row_equal(lft_df, rgt_df)
+    >>> sk.is_row_value_equal(lft_df, rgt_df)
     False
     """
     try:
-        assert_row_equal(lft_df, rgt_df)
+        assert_row_value_equal(lft_df, rgt_df)
         return True
-    except RowMismatchError:
+    except RowValueMismatchError:
         return False