onekit 1.1.0__tar.gz → 1.2.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {onekit-1.1.0 → onekit-1.2.0}/PKG-INFO +3 -2
- {onekit-1.1.0 → onekit-1.2.0}/pyproject.toml +9 -5
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/mathkit.py +73 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/numpykit.py +20 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/sparkkit.py +82 -2
- {onekit-1.1.0 → onekit-1.2.0}/LICENSE +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/README.md +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/__init__.py +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/optfunckit.py +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/pandaskit.py +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/pythonkit.py +0 -0
- {onekit-1.1.0 → onekit-1.2.0}/src/onekit/vizkit.py +0 -0
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: onekit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: All-in-One Python Kit.
|
|
5
5
|
Home-page: https://github.com/estripling/onekit
|
|
6
6
|
License: BSD 3-Clause
|
|
7
7
|
Keywords: onekit
|
|
8
8
|
Author: Eugen Stripling
|
|
9
9
|
Author-email: estripling042@gmail.com
|
|
10
|
-
Requires-Python: >=3.8.1
|
|
10
|
+
Requires-Python: >=3.8.1
|
|
11
11
|
Classifier: License :: Other/Proprietary License
|
|
12
12
|
Classifier: Programming Language :: Python :: 3
|
|
13
13
|
Classifier: Programming Language :: Python :: 3.9
|
|
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.11
|
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
17
|
Classifier: Programming Language :: Python :: 3 :: Only
|
|
18
18
|
Classifier: Programming Language :: Python :: 3.8
|
|
19
|
+
Requires-Dist: pytz (>=2024.1,<2025.0)
|
|
19
20
|
Requires-Dist: toolz (>=0.12.0,<0.13.0)
|
|
20
21
|
Project-URL: Documentation, https://onekit.readthedocs.io/en/stable/
|
|
21
22
|
Project-URL: Repository, https://github.com/estripling/onekit
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[tool.poetry]
|
|
2
2
|
name = "onekit"
|
|
3
|
-
version = "1.
|
|
3
|
+
version = "1.2.0"
|
|
4
4
|
description = "All-in-One Python Kit."
|
|
5
5
|
authors = ["Eugen Stripling <estripling042@gmail.com>"]
|
|
6
6
|
license = "BSD 3-Clause"
|
|
@@ -18,14 +18,15 @@ classifiers = [
|
|
|
18
18
|
]
|
|
19
19
|
|
|
20
20
|
[tool.poetry.dependencies]
|
|
21
|
-
python = ">=3.8.1
|
|
21
|
+
python = ">=3.8.1"
|
|
22
22
|
toolz = "^0.12.0"
|
|
23
|
+
pytz = "^2024.1"
|
|
23
24
|
|
|
24
25
|
[tool.poetry.group.precommit.dependencies]
|
|
25
26
|
autoflake = "^2.2.1"
|
|
26
27
|
black = {extras = ["jupyter"], version = "^23.11.0"}
|
|
27
28
|
isort = "^5.12.0"
|
|
28
|
-
flake8 = "
|
|
29
|
+
flake8 = ">=5.0.4"
|
|
29
30
|
pre-commit = "^3.5.0"
|
|
30
31
|
pre-commit-hooks = "^4.5.0"
|
|
31
32
|
|
|
@@ -33,6 +34,7 @@ pre-commit-hooks = "^4.5.0"
|
|
|
33
34
|
pytest = "^7.4.3"
|
|
34
35
|
pytest-cov = "^4.1.0"
|
|
35
36
|
pytest-skip-slow = "^0.0.5"
|
|
37
|
+
time-machine = "^2.13.0"
|
|
36
38
|
|
|
37
39
|
[tool.poetry.group.docs.dependencies]
|
|
38
40
|
furo = "^2023.9.10"
|
|
@@ -41,7 +43,6 @@ myst-parser = "^2.0.0"
|
|
|
41
43
|
nbsphinx = "^0.9.3"
|
|
42
44
|
sphinx-autoapi = "^3.0.0"
|
|
43
45
|
sphinx-copybutton = "^0.5.2"
|
|
44
|
-
time-machine = "^2.13.0"
|
|
45
46
|
|
|
46
47
|
[tool.poetry.group.packaging.dependencies]
|
|
47
48
|
python-semantic-release = "^8.3.0"
|
|
@@ -73,7 +74,10 @@ markers = [
|
|
|
73
74
|
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
74
75
|
"serial",
|
|
75
76
|
]
|
|
76
|
-
filterwarnings = [
|
|
77
|
+
filterwarnings = [
|
|
78
|
+
"ignore::DeprecationWarning",
|
|
79
|
+
"ignore::RuntimeWarning",
|
|
80
|
+
]
|
|
77
81
|
|
|
78
82
|
[tool.semantic_release]
|
|
79
83
|
branch = "main"
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import math
|
|
1
2
|
from typing import (
|
|
2
3
|
Generator,
|
|
3
4
|
Union,
|
|
@@ -7,10 +8,12 @@ import toolz
|
|
|
7
8
|
|
|
8
9
|
__all__ = (
|
|
9
10
|
"collatz",
|
|
11
|
+
"digitscale",
|
|
10
12
|
"fibonacci",
|
|
11
13
|
"isdivisible",
|
|
12
14
|
"iseven",
|
|
13
15
|
"isodd",
|
|
16
|
+
"sign",
|
|
14
17
|
)
|
|
15
18
|
|
|
16
19
|
|
|
@@ -79,6 +82,49 @@ def collatz(n: int, /) -> Generator:
|
|
|
79
82
|
n = n // 2 if iseven(n) else 3 * n + 1
|
|
80
83
|
|
|
81
84
|
|
|
85
|
+
def digitscale(x: Union[int, float], /) -> float:
|
|
86
|
+
"""Scale :math:`x` such that its mapped integer part is its number of digits.
|
|
87
|
+
|
|
88
|
+
Given a number :math:`x \\in \\mathbb{R}`, the following function
|
|
89
|
+
:math:`f \\colon \\mathbb{R} \\rightarrow \\mathbb{R}_{\\ge 0}` scales it such that
|
|
90
|
+
its mapped integer part :math:`\\lfloor f(x) \\rfloor \\in \\mathbb{N}_{0}`
|
|
91
|
+
is the number of digits in :math:`[x]`:
|
|
92
|
+
|
|
93
|
+
.. math::
|
|
94
|
+
|
|
95
|
+
f(x) =
|
|
96
|
+
\\begin{cases}
|
|
97
|
+
1 + \\log_{10}|x| & \\text{ if } |x| \\ge 0.1 \\\\[6pt]
|
|
98
|
+
0 & \\text{ otherwise }
|
|
99
|
+
\\end{cases}
|
|
100
|
+
|
|
101
|
+
Notes
|
|
102
|
+
-----
|
|
103
|
+
- :math:`\\lfloor \\cdot \\rfloor`: floor function
|
|
104
|
+
- :math:`\\left[ \\, \\cdot \\, \\right]`: truncation function
|
|
105
|
+
- For any positive integer :math:`n`, the number of digits in :math:`n` is
|
|
106
|
+
:math:`1 + \\lfloor \\log_{10} n \\rfloor`
|
|
107
|
+
|
|
108
|
+
See Also
|
|
109
|
+
--------
|
|
110
|
+
onekit.numpykit.digitscale : NumPy version
|
|
111
|
+
onekit.sparkkit.with_digitscale : PySpark version
|
|
112
|
+
|
|
113
|
+
Examples
|
|
114
|
+
--------
|
|
115
|
+
>>> import onekit.mathkit as mk
|
|
116
|
+
>>> list(map(mk.digitscale, [0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000]))
|
|
117
|
+
[0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
|
|
118
|
+
|
|
119
|
+
>>> list(map(mk.digitscale, [0.2, 2, 20, 200]))
|
|
120
|
+
[0.30102999566398125, 1.3010299956639813, 2.3010299956639813, 3.3010299956639813]
|
|
121
|
+
|
|
122
|
+
>>> list(map(mk.digitscale, [-0.5, -5, -50, -500]))
|
|
123
|
+
[0.6989700043360187, 1.6989700043360187, 2.6989700043360187, 3.6989700043360187]
|
|
124
|
+
"""
|
|
125
|
+
return 1 + math.log10(abs(x)) if abs(x) >= 0.1 else 0.0
|
|
126
|
+
|
|
127
|
+
|
|
82
128
|
def fibonacci() -> Generator:
|
|
83
129
|
"""Generate the Fibonacci sequence.
|
|
84
130
|
|
|
@@ -182,3 +228,30 @@ def isodd(x: Union[int, float], /) -> bool:
|
|
|
182
228
|
False
|
|
183
229
|
"""
|
|
184
230
|
return toolz.complement(iseven)(x)
|
|
231
|
+
|
|
232
|
+
|
|
233
|
+
def sign(x: Union[int, float], /) -> int:
|
|
234
|
+
"""Sign function.
|
|
235
|
+
|
|
236
|
+
.. math::
|
|
237
|
+
|
|
238
|
+
f(x) =
|
|
239
|
+
\\begin{cases}
|
|
240
|
+
-1 & \\text{ if } x < 0 \\\\[6pt]
|
|
241
|
+
0 & \\text{ if } x = 0 \\\\[6pt]
|
|
242
|
+
1 & \\text{ if } x > 0
|
|
243
|
+
\\end{cases}
|
|
244
|
+
|
|
245
|
+
Examples
|
|
246
|
+
--------
|
|
247
|
+
>>> import onekit.mathkit as mk
|
|
248
|
+
>>> mk.sign(0)
|
|
249
|
+
0
|
|
250
|
+
|
|
251
|
+
>>> mk.sign(3.14)
|
|
252
|
+
1
|
|
253
|
+
|
|
254
|
+
>>> mk.sign(-10)
|
|
255
|
+
-1
|
|
256
|
+
"""
|
|
257
|
+
return int(0 if math.isclose(x, 0) else math.copysign(1, x))
|
|
@@ -1,8 +1,11 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
import numpy.typing as npt
|
|
3
3
|
|
|
4
|
+
import onekit.mathkit as mk
|
|
5
|
+
|
|
4
6
|
__all__ = (
|
|
5
7
|
"check_vector",
|
|
8
|
+
"digitscale",
|
|
6
9
|
"stderr",
|
|
7
10
|
)
|
|
8
11
|
|
|
@@ -49,6 +52,23 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
|
|
|
49
52
|
return x
|
|
50
53
|
|
|
51
54
|
|
|
55
|
+
def digitscale(x: ArrayLike, /) -> np.ndarray:
|
|
56
|
+
"""NumPy version of digitscale.
|
|
57
|
+
|
|
58
|
+
See Also
|
|
59
|
+
--------
|
|
60
|
+
onekit.mathkit.digitscale : Python version
|
|
61
|
+
onekit.sparkkit.with_digitscale : PySpark version
|
|
62
|
+
|
|
63
|
+
Examples
|
|
64
|
+
--------
|
|
65
|
+
>>> import onekit.numpykit as npk
|
|
66
|
+
>>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000])
|
|
67
|
+
array([0., 1., 2., 3., 4., 5., 6., 7.])
|
|
68
|
+
"""
|
|
69
|
+
return np.vectorize(mk.digitscale, otypes=[float])(x)
|
|
70
|
+
|
|
71
|
+
|
|
52
72
|
def stderr(x: ArrayLike, /) -> float:
|
|
53
73
|
"""Compute standard error of the mean.
|
|
54
74
|
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import datetime as dt
|
|
2
2
|
import functools
|
|
3
|
+
import math
|
|
3
4
|
import os
|
|
4
5
|
from typing import (
|
|
5
6
|
Callable,
|
|
@@ -49,6 +50,7 @@ __all__ = (
|
|
|
49
50
|
"union",
|
|
50
51
|
"with_date_diff_ago",
|
|
51
52
|
"with_date_diff_ahead",
|
|
53
|
+
"with_digitscale",
|
|
52
54
|
"with_endofweek_date",
|
|
53
55
|
"with_increasing_id",
|
|
54
56
|
"with_index",
|
|
@@ -667,7 +669,11 @@ def date_range(
|
|
|
667
669
|
)
|
|
668
670
|
|
|
669
671
|
|
|
670
|
-
def filter_date(
|
|
672
|
+
def filter_date(
|
|
673
|
+
date_col: str,
|
|
674
|
+
d0: Union[str, dt.date],
|
|
675
|
+
n: Union[int, float],
|
|
676
|
+
) -> SparkDFTransformFunc:
|
|
671
677
|
"""Returns dataframe with rows such that date is in :math:`(d_{-n}, d_{0}]`.
|
|
672
678
|
|
|
673
679
|
Notes
|
|
@@ -675,6 +681,7 @@ def filter_date(date_col: str, d0: Union[str, dt.date], n: int) -> SparkDFTransf
|
|
|
675
681
|
- :math:`d_{0}`: reference date (inclusive)
|
|
676
682
|
- :math:`d_{-n} < d_{0}`: relative date (exclusive)
|
|
677
683
|
- :math:`n > 0`: number of dates from :math:`d_{-n}` to :math:`d_{0}`
|
|
684
|
+
- If `n=float("inf")`, returned dates are in :math:`(d_{-\\infty}, d_{0}]`
|
|
678
685
|
|
|
679
686
|
Examples
|
|
680
687
|
--------
|
|
@@ -702,10 +709,30 @@ def filter_date(date_col: str, d0: Union[str, dt.date], n: int) -> SparkDFTransf
|
|
|
702
709
|
|2024-01-07|
|
|
703
710
|
+----------+
|
|
704
711
|
<BLANKLINE>
|
|
712
|
+
|
|
713
|
+
>>> df.transform(sk.filter_date("d", d0="2024-01-07", n=float("inf"))).show()
|
|
714
|
+
+----------+
|
|
715
|
+
| d|
|
|
716
|
+
+----------+
|
|
717
|
+
|2024-01-01|
|
|
718
|
+
|2024-01-02|
|
|
719
|
+
|2024-01-03|
|
|
720
|
+
|2024-01-04|
|
|
721
|
+
|2024-01-05|
|
|
722
|
+
|2024-01-06|
|
|
723
|
+
|2024-01-07|
|
|
724
|
+
+----------+
|
|
725
|
+
<BLANKLINE>
|
|
705
726
|
"""
|
|
706
|
-
if not isinstance(n, int)
|
|
727
|
+
if not isinstance(n, (int, float)):
|
|
728
|
+
raise TypeError(f"{type(n)=} - must be an int or float")
|
|
729
|
+
|
|
730
|
+
if isinstance(n, int) and n < 1:
|
|
707
731
|
raise ValueError(f"{n=} - must be a positive integer")
|
|
708
732
|
|
|
733
|
+
if isinstance(n, float) and not math.isinf(n):
|
|
734
|
+
raise ValueError(f'{n=} - only valid float value: float("inf")')
|
|
735
|
+
|
|
709
736
|
def inner(df: SparkDF, /) -> SparkDF:
|
|
710
737
|
date_diff_ago = "_date_diff_ago_"
|
|
711
738
|
return (
|
|
@@ -1118,6 +1145,59 @@ def with_date_diff_ahead(
|
|
|
1118
1145
|
return inner
|
|
1119
1146
|
|
|
1120
1147
|
|
|
1148
|
+
def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
|
|
1149
|
+
"""PySpark version of digitscale.
|
|
1150
|
+
|
|
1151
|
+
See Also
|
|
1152
|
+
--------
|
|
1153
|
+
onekit.mathkit.digitscale : Python version
|
|
1154
|
+
onekit.numpykit.digitscale : NumPy version
|
|
1155
|
+
|
|
1156
|
+
Examples
|
|
1157
|
+
--------
|
|
1158
|
+
>>> from pyspark.sql import SparkSession
|
|
1159
|
+
>>> import onekit.sparkkit as sk
|
|
1160
|
+
>>> spark = SparkSession.builder.getOrCreate()
|
|
1161
|
+
>>> df = spark.createDataFrame(
|
|
1162
|
+
... [
|
|
1163
|
+
... dict(x=0.1),
|
|
1164
|
+
... dict(x=1.0),
|
|
1165
|
+
... dict(x=10.0),
|
|
1166
|
+
... dict(x=100.0),
|
|
1167
|
+
... dict(x=1_000.0),
|
|
1168
|
+
... dict(x=10_000.0),
|
|
1169
|
+
... dict(x=100_000.0),
|
|
1170
|
+
... dict(x=1_000_000.0),
|
|
1171
|
+
... dict(x=None),
|
|
1172
|
+
... ],
|
|
1173
|
+
... )
|
|
1174
|
+
>>> df.transform(sk.with_digitscale("x", "fx")).show()
|
|
1175
|
+
+---------+----+
|
|
1176
|
+
| x| fx|
|
|
1177
|
+
+---------+----+
|
|
1178
|
+
| 0.1| 0.0|
|
|
1179
|
+
| 1.0| 1.0|
|
|
1180
|
+
| 10.0| 2.0|
|
|
1181
|
+
| 100.0| 3.0|
|
|
1182
|
+
| 1000.0| 4.0|
|
|
1183
|
+
| 10000.0| 5.0|
|
|
1184
|
+
| 100000.0| 6.0|
|
|
1185
|
+
|1000000.0| 7.0|
|
|
1186
|
+
| null|null|
|
|
1187
|
+
+---------+----+
|
|
1188
|
+
<BLANKLINE>
|
|
1189
|
+
"""
|
|
1190
|
+
|
|
1191
|
+
def inner(df: SparkDF, /) -> SparkDF:
|
|
1192
|
+
x = F.abs(num_col)
|
|
1193
|
+
return df.withColumn(
|
|
1194
|
+
new_col,
|
|
1195
|
+
F.when(x.isNull(), None).when(x >= 0.1, 1 + F.log10(x)).otherwise(0.0),
|
|
1196
|
+
)
|
|
1197
|
+
|
|
1198
|
+
return inner
|
|
1199
|
+
|
|
1200
|
+
|
|
1121
1201
|
def with_endofweek_date(
|
|
1122
1202
|
date_col: str,
|
|
1123
1203
|
new_col: str,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|