onekit 3.0.1__tar.gz → 4.0.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onekit-3.0.1 → onekit-4.0.0}/PKG-INFO +3 -2
- {onekit-3.0.1 → onekit-4.0.0}/README.md +1 -1
- {onekit-3.0.1 → onekit-4.0.0}/pyproject.toml +4 -3
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/exception.py +13 -4
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/numpykit.py +20 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/pythonkit.py +13 -627
- onekit-4.0.0/src/onekit/scipykit.py +126 -0
- onekit-4.0.0/src/onekit/timekit.py +812 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/vizkit.py +127 -0
- {onekit-3.0.1 → onekit-4.0.0}/LICENSE +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/__init__.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/dekit.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/mathkit.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/optfunckit.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/pandaskit.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/sklearnkit.py +0 -0
- {onekit-3.0.1 → onekit-4.0.0}/src/onekit/sparkkit.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: onekit
|
|
3
|
-
Version:
|
|
3
|
+
Version: 4.0.0
|
|
4
4
|
Summary: All-in-One Python Kit.
|
|
5
5
|
License: BSD 3-Clause
|
|
6
6
|
Keywords: onekit
|
|
@@ -15,6 +15,7 @@ Provides-Extra: base
|
|
|
15
15
|
Provides-Extra: pyspark
|
|
16
16
|
Requires-Dist: pandas[compression,computation,excel,output-formatting,parquet,performance,plot] (>=2.2.3,<3.0.0) ; extra == "analytics"
|
|
17
17
|
Requires-Dist: pyspark (==3.5.3) ; extra == "pyspark"
|
|
18
|
+
Requires-Dist: python-dateutil (>=2.9.0.post0,<3.0.0) ; extra == "base"
|
|
18
19
|
Requires-Dist: pytz (>=2025.2,<2026.0) ; extra == "base"
|
|
19
20
|
Requires-Dist: scikit-learn (>=1.6.1,<2.0.0) ; extra == "analytics"
|
|
20
21
|
Requires-Dist: toolz (>=1.0.0,<2.0.0) ; extra == "base"
|
|
@@ -40,7 +41,7 @@ All-in-One Python Kit:
|
|
|
40
41
|
|
|
41
42
|
- [Examples](https://onekit.readthedocs.io/en/stable/examples.html)
|
|
42
43
|
- [Documentation](https://onekit.readthedocs.io/en/stable/index.html)
|
|
43
|
-
- [
|
|
44
|
+
- [Dev Guide](https://onekit.readthedocs.io/en/stable/devguide.html)
|
|
44
45
|
- [API Reference](https://onekit.readthedocs.io/en/stable/autoapi/index.html)
|
|
45
46
|
|
|
46
47
|
## Installation
|
|
@@ -17,7 +17,7 @@ All-in-One Python Kit:
|
|
|
17
17
|
|
|
18
18
|
- [Examples](https://onekit.readthedocs.io/en/stable/examples.html)
|
|
19
19
|
- [Documentation](https://onekit.readthedocs.io/en/stable/index.html)
|
|
20
|
-
- [
|
|
20
|
+
- [Dev Guide](https://onekit.readthedocs.io/en/stable/devguide.html)
|
|
21
21
|
- [API Reference](https://onekit.readthedocs.io/en/stable/autoapi/index.html)
|
|
22
22
|
|
|
23
23
|
## Installation
|
|
@@ -22,12 +22,13 @@ requires-python = ">=3.11"
|
|
|
22
22
|
dependencies = []
|
|
23
23
|
|
|
24
24
|
[tool.poetry]
|
|
25
|
-
version = "
|
|
25
|
+
version = "4.0.0"
|
|
26
26
|
|
|
27
27
|
[project.optional-dependencies]
|
|
28
28
|
base = [
|
|
29
|
-
"
|
|
29
|
+
"python-dateutil (>=2.9.0.post0,<3.0.0)",
|
|
30
30
|
"pytz (>=2025.2,<2026.0)",
|
|
31
|
+
"toolz (>=1.0.0,<2.0.0)",
|
|
31
32
|
]
|
|
32
33
|
analytics = [
|
|
33
34
|
"pandas[compression,computation,excel,output-formatting,parquet,performance,plot] (>=2.2.3,<3.0.0)",
|
|
@@ -58,7 +59,7 @@ sphinx-copybutton = "^0.5.2"
|
|
|
58
59
|
time-machine = "^2.16.0"
|
|
59
60
|
|
|
60
61
|
[tool.poetry.group.packaging.dependencies]
|
|
61
|
-
python-semantic-release = "
|
|
62
|
+
python-semantic-release = "8.3.0"
|
|
62
63
|
|
|
63
64
|
[tool.black]
|
|
64
65
|
line-length = 88
|
|
@@ -4,14 +4,14 @@ from typing import (
|
|
|
4
4
|
Iterable,
|
|
5
5
|
)
|
|
6
6
|
|
|
7
|
-
from pyspark.sql import DataFrame as SparkDF
|
|
8
|
-
|
|
9
7
|
from onekit import pythonkit as pk
|
|
10
8
|
|
|
11
9
|
__all__ = (
|
|
12
10
|
"ColumnNotFoundError",
|
|
13
11
|
"InvalidChoiceError",
|
|
12
|
+
"InvalidDateRangeWarning",
|
|
14
13
|
"OnekitError",
|
|
14
|
+
"OnekitWarning",
|
|
15
15
|
"RowCountMismatchError",
|
|
16
16
|
"RowValueMismatchError",
|
|
17
17
|
"SchemaMismatchError",
|
|
@@ -96,6 +96,7 @@ class RowCountMismatchError(OnekitError):
|
|
|
96
96
|
super().__init__(self.message)
|
|
97
97
|
|
|
98
98
|
|
|
99
|
+
# noinspection PyUnresolvedReferences
|
|
99
100
|
class RowValueMismatchError(OnekitError):
|
|
100
101
|
"""Exception for mismatch of row values.
|
|
101
102
|
|
|
@@ -107,8 +108,8 @@ class RowValueMismatchError(OnekitError):
|
|
|
107
108
|
|
|
108
109
|
def __init__(
|
|
109
110
|
self,
|
|
110
|
-
lft_rows: SparkDF,
|
|
111
|
-
rgt_rows: SparkDF,
|
|
111
|
+
lft_rows: "SparkDF", # noqa: F821
|
|
112
|
+
rgt_rows: "SparkDF", # noqa: F821
|
|
112
113
|
num_lft: int,
|
|
113
114
|
num_rgt: int,
|
|
114
115
|
):
|
|
@@ -140,3 +141,11 @@ class SchemaMismatchError(OnekitError):
|
|
|
140
141
|
num_diff = sum(c == "|" for c in msg.splitlines()[1])
|
|
141
142
|
self.message = pk.concat_strings(os.linesep, f"{num_diff=}", msg)
|
|
142
143
|
super().__init__(self.message)
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class OnekitWarning(UserWarning):
|
|
147
|
+
"""A base class for onekit warnings."""
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class InvalidDateRangeWarning(OnekitWarning):
|
|
151
|
+
"""Warning for when a date range is provided in reverse order but is corrected."""
|
|
@@ -8,6 +8,7 @@ from onekit import mathkit as mk
|
|
|
8
8
|
__all__ = (
|
|
9
9
|
"check_random_state",
|
|
10
10
|
"check_vector",
|
|
11
|
+
"create_boolean_array",
|
|
11
12
|
"digitscale",
|
|
12
13
|
"stderr",
|
|
13
14
|
)
|
|
@@ -91,6 +92,25 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
|
|
|
91
92
|
return x
|
|
92
93
|
|
|
93
94
|
|
|
95
|
+
# noinspection PyTypeChecker
|
|
96
|
+
def create_boolean_array(data: ArrayLike, pos_label: int | str) -> np.ndarray:
|
|
97
|
+
"""Returns a boolean array indicating positions of pos_label in input data.
|
|
98
|
+
|
|
99
|
+
Examples
|
|
100
|
+
--------
|
|
101
|
+
>>> from onekit import numpykit as npk
|
|
102
|
+
>>> data = [0, 1, 2, 1, 0, 1]
|
|
103
|
+
>>> npk.create_boolean_array(data, pos_label=1)
|
|
104
|
+
array([False, True, False, True, False, True])
|
|
105
|
+
|
|
106
|
+
>>> data = ["cat", "dog", "cat", "bird", "cat", "dog"]
|
|
107
|
+
>>> npk.create_boolean_array(data, pos_label="dog")
|
|
108
|
+
array([False, True, False, False, False, True])
|
|
109
|
+
"""
|
|
110
|
+
data_array = np.asarray(data)
|
|
111
|
+
return data_array == pos_label
|
|
112
|
+
|
|
113
|
+
|
|
94
114
|
def digitscale(x: ArrayLike, /, *, kind: str = "log") -> np.ndarray:
|
|
95
115
|
"""NumPy version of digitscale.
|
|
96
116
|
|