onekit 1.1.0__tar.gz → 1.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,13 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: onekit
3
- Version: 1.1.0
3
+ Version: 1.2.0
4
4
  Summary: All-in-One Python Kit.
5
5
  Home-page: https://github.com/estripling/onekit
6
6
  License: BSD 3-Clause
7
7
  Keywords: onekit
8
8
  Author: Eugen Stripling
9
9
  Author-email: estripling042@gmail.com
10
- Requires-Python: >=3.8.1,<4.0
10
+ Requires-Python: >=3.8.1
11
11
  Classifier: License :: Other/Proprietary License
12
12
  Classifier: Programming Language :: Python :: 3
13
13
  Classifier: Programming Language :: Python :: 3.9
@@ -16,6 +16,7 @@ Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3 :: Only
18
18
  Classifier: Programming Language :: Python :: 3.8
19
+ Requires-Dist: pytz (>=2024.1,<2025.0)
19
20
  Requires-Dist: toolz (>=0.12.0,<0.13.0)
20
21
  Project-URL: Documentation, https://onekit.readthedocs.io/en/stable/
21
22
  Project-URL: Repository, https://github.com/estripling/onekit
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "onekit"
3
- version = "1.1.0"
3
+ version = "1.2.0"
4
4
  description = "All-in-One Python Kit."
5
5
  authors = ["Eugen Stripling <estripling042@gmail.com>"]
6
6
  license = "BSD 3-Clause"
@@ -18,14 +18,15 @@ classifiers = [
18
18
  ]
19
19
 
20
20
  [tool.poetry.dependencies]
21
- python = ">=3.8.1,<4.0"
21
+ python = ">=3.8.1"
22
22
  toolz = "^0.12.0"
23
+ pytz = "^2024.1"
23
24
 
24
25
  [tool.poetry.group.precommit.dependencies]
25
26
  autoflake = "^2.2.1"
26
27
  black = {extras = ["jupyter"], version = "^23.11.0"}
27
28
  isort = "^5.12.0"
28
- flake8 = "^6.1.0"
29
+ flake8 = ">=5.0.4"
29
30
  pre-commit = "^3.5.0"
30
31
  pre-commit-hooks = "^4.5.0"
31
32
 
@@ -33,6 +34,7 @@ pre-commit-hooks = "^4.5.0"
33
34
  pytest = "^7.4.3"
34
35
  pytest-cov = "^4.1.0"
35
36
  pytest-skip-slow = "^0.0.5"
37
+ time-machine = "^2.13.0"
36
38
 
37
39
  [tool.poetry.group.docs.dependencies]
38
40
  furo = "^2023.9.10"
@@ -41,7 +43,6 @@ myst-parser = "^2.0.0"
41
43
  nbsphinx = "^0.9.3"
42
44
  sphinx-autoapi = "^3.0.0"
43
45
  sphinx-copybutton = "^0.5.2"
44
- time-machine = "^2.13.0"
45
46
 
46
47
  [tool.poetry.group.packaging.dependencies]
47
48
  python-semantic-release = "^8.3.0"
@@ -73,7 +74,10 @@ markers = [
73
74
  "slow: marks tests as slow (deselect with '-m \"not slow\"')",
74
75
  "serial",
75
76
  ]
76
- filterwarnings = ["ignore::DeprecationWarning"]
77
+ filterwarnings = [
78
+ "ignore::DeprecationWarning",
79
+ "ignore::RuntimeWarning",
80
+ ]
77
81
 
78
82
  [tool.semantic_release]
79
83
  branch = "main"
@@ -1,3 +1,4 @@
1
+ import math
1
2
  from typing import (
2
3
  Generator,
3
4
  Union,
@@ -7,10 +8,12 @@ import toolz
7
8
 
8
9
  __all__ = (
9
10
  "collatz",
11
+ "digitscale",
10
12
  "fibonacci",
11
13
  "isdivisible",
12
14
  "iseven",
13
15
  "isodd",
16
+ "sign",
14
17
  )
15
18
 
16
19
 
@@ -79,6 +82,49 @@ def collatz(n: int, /) -> Generator:
79
82
  n = n // 2 if iseven(n) else 3 * n + 1
80
83
 
81
84
 
85
+ def digitscale(x: Union[int, float], /) -> float:
86
+ """Scale :math:`x` such that its mapped integer part is its number of digits.
87
+
88
+ Given a number :math:`x \\in \\mathbb{R}`, the following function
89
+ :math:`f \\colon \\mathbb{R} \\rightarrow \\mathbb{R}_{\\ge 0}` scales it such that
90
+ its mapped integer part :math:`\\lfloor f(x) \\rfloor \\in \\mathbb{N}_{0}`
91
+ is the number of digits in :math:`[x]`:
92
+
93
+ .. math::
94
+
95
+ f(x) =
96
+ \\begin{cases}
97
+ 1 + \\log_{10}|x| & \\text{ if } |x| \\ge 0.1 \\\\[6pt]
98
+ 0 & \\text{ otherwise }
99
+ \\end{cases}
100
+
101
+ Notes
102
+ -----
103
+ - :math:`\\lfloor \\cdot \\rfloor`: floor function
104
+ - :math:`\\left[ \\, \\cdot \\, \\right]`: truncation function
105
+ - For any positive integer :math:`n`, the number of digits in :math:`n` is
106
+ :math:`1 + \\lfloor \\log_{10} n \\rfloor`
107
+
108
+ See Also
109
+ --------
110
+ onekit.numpykit.digitscale : NumPy version
111
+ onekit.sparkkit.with_digitscale : PySpark version
112
+
113
+ Examples
114
+ --------
115
+ >>> import onekit.mathkit as mk
116
+ >>> list(map(mk.digitscale, [0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000]))
117
+ [0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0]
118
+
119
+ >>> list(map(mk.digitscale, [0.2, 2, 20, 200]))
120
+ [0.30102999566398125, 1.3010299956639813, 2.3010299956639813, 3.3010299956639813]
121
+
122
+ >>> list(map(mk.digitscale, [-0.5, -5, -50, -500]))
123
+ [0.6989700043360187, 1.6989700043360187, 2.6989700043360187, 3.6989700043360187]
124
+ """
125
+ return 1 + math.log10(abs(x)) if abs(x) >= 0.1 else 0.0
126
+
127
+
82
128
  def fibonacci() -> Generator:
83
129
  """Generate the Fibonacci sequence.
84
130
 
@@ -182,3 +228,30 @@ def isodd(x: Union[int, float], /) -> bool:
182
228
  False
183
229
  """
184
230
  return toolz.complement(iseven)(x)
231
+
232
+
233
+ def sign(x: Union[int, float], /) -> int:
234
+ """Sign function.
235
+
236
+ .. math::
237
+
238
+ f(x) =
239
+ \\begin{cases}
240
+ -1 & \\text{ if } x < 0 \\\\[6pt]
241
+ 0 & \\text{ if } x = 0 \\\\[6pt]
242
+ 1 & \\text{ if } x > 0
243
+ \\end{cases}
244
+
245
+ Examples
246
+ --------
247
+ >>> import onekit.mathkit as mk
248
+ >>> mk.sign(0)
249
+ 0
250
+
251
+ >>> mk.sign(3.14)
252
+ 1
253
+
254
+ >>> mk.sign(-10)
255
+ -1
256
+ """
257
+ return int(0 if math.isclose(x, 0) else math.copysign(1, x))
@@ -1,8 +1,11 @@
1
1
  import numpy as np
2
2
  import numpy.typing as npt
3
3
 
4
+ import onekit.mathkit as mk
5
+
4
6
  __all__ = (
5
7
  "check_vector",
8
+ "digitscale",
6
9
  "stderr",
7
10
  )
8
11
 
@@ -49,6 +52,23 @@ def check_vector(x: ArrayLike, /, *, n_min: int = 1, n_max: int = np.inf) -> Vec
49
52
  return x
50
53
 
51
54
 
55
+ def digitscale(x: ArrayLike, /) -> np.ndarray:
56
+ """NumPy version of digitscale.
57
+
58
+ See Also
59
+ --------
60
+ onekit.mathkit.digitscale : Python version
61
+ onekit.sparkkit.with_digitscale : PySpark version
62
+
63
+ Examples
64
+ --------
65
+ >>> import onekit.numpykit as npk
66
+ >>> npk.digitscale([0.1, 1, 10, 100, 1_000, 10_000, 100_000, 1_000_000])
67
+ array([0., 1., 2., 3., 4., 5., 6., 7.])
68
+ """
69
+ return np.vectorize(mk.digitscale, otypes=[float])(x)
70
+
71
+
52
72
  def stderr(x: ArrayLike, /) -> float:
53
73
  """Compute standard error of the mean.
54
74
 
@@ -1,5 +1,6 @@
1
1
  import datetime as dt
2
2
  import functools
3
+ import math
3
4
  import os
4
5
  from typing import (
5
6
  Callable,
@@ -49,6 +50,7 @@ __all__ = (
49
50
  "union",
50
51
  "with_date_diff_ago",
51
52
  "with_date_diff_ahead",
53
+ "with_digitscale",
52
54
  "with_endofweek_date",
53
55
  "with_increasing_id",
54
56
  "with_index",
@@ -667,7 +669,11 @@ def date_range(
667
669
  )
668
670
 
669
671
 
670
- def filter_date(date_col: str, d0: Union[str, dt.date], n: int) -> SparkDFTransformFunc:
672
+ def filter_date(
673
+ date_col: str,
674
+ d0: Union[str, dt.date],
675
+ n: Union[int, float],
676
+ ) -> SparkDFTransformFunc:
671
677
  """Returns dataframe with rows such that date is in :math:`(d_{-n}, d_{0}]`.
672
678
 
673
679
  Notes
@@ -675,6 +681,7 @@ def filter_date(date_col: str, d0: Union[str, dt.date], n: int) -> SparkDFTransf
675
681
  - :math:`d_{0}`: reference date (inclusive)
676
682
  - :math:`d_{-n} < d_{0}`: relative date (exclusive)
677
683
  - :math:`n > 0`: number of dates from :math:`d_{-n}` to :math:`d_{0}`
684
+ - If `n=float("inf")`, returned dates are in :math:`(d_{-\\infty}, d_{0}]`
678
685
 
679
686
  Examples
680
687
  --------
@@ -702,10 +709,30 @@ def filter_date(date_col: str, d0: Union[str, dt.date], n: int) -> SparkDFTransf
702
709
  |2024-01-07|
703
710
  +----------+
704
711
  <BLANKLINE>
712
+
713
+ >>> df.transform(sk.filter_date("d", d0="2024-01-07", n=float("inf"))).show()
714
+ +----------+
715
+ | d|
716
+ +----------+
717
+ |2024-01-01|
718
+ |2024-01-02|
719
+ |2024-01-03|
720
+ |2024-01-04|
721
+ |2024-01-05|
722
+ |2024-01-06|
723
+ |2024-01-07|
724
+ +----------+
725
+ <BLANKLINE>
705
726
  """
706
- if not isinstance(n, int) or n < 1:
727
+ if not isinstance(n, (int, float)):
728
+ raise TypeError(f"{type(n)=} - must be an int or float")
729
+
730
+ if isinstance(n, int) and n < 1:
707
731
  raise ValueError(f"{n=} - must be a positive integer")
708
732
 
733
+ if isinstance(n, float) and not math.isinf(n):
734
+ raise ValueError(f'{n=} - only valid float value: float("inf")')
735
+
709
736
  def inner(df: SparkDF, /) -> SparkDF:
710
737
  date_diff_ago = "_date_diff_ago_"
711
738
  return (
@@ -1118,6 +1145,59 @@ def with_date_diff_ahead(
1118
1145
  return inner
1119
1146
 
1120
1147
 
1148
+ def with_digitscale(num_col: str, new_col: str) -> SparkDFTransformFunc:
1149
+ """PySpark version of digitscale.
1150
+
1151
+ See Also
1152
+ --------
1153
+ onekit.mathkit.digitscale : Python version
1154
+ onekit.numpykit.digitscale : NumPy version
1155
+
1156
+ Examples
1157
+ --------
1158
+ >>> from pyspark.sql import SparkSession
1159
+ >>> import onekit.sparkkit as sk
1160
+ >>> spark = SparkSession.builder.getOrCreate()
1161
+ >>> df = spark.createDataFrame(
1162
+ ... [
1163
+ ... dict(x=0.1),
1164
+ ... dict(x=1.0),
1165
+ ... dict(x=10.0),
1166
+ ... dict(x=100.0),
1167
+ ... dict(x=1_000.0),
1168
+ ... dict(x=10_000.0),
1169
+ ... dict(x=100_000.0),
1170
+ ... dict(x=1_000_000.0),
1171
+ ... dict(x=None),
1172
+ ... ],
1173
+ ... )
1174
+ >>> df.transform(sk.with_digitscale("x", "fx")).show()
1175
+ +---------+----+
1176
+ | x| fx|
1177
+ +---------+----+
1178
+ | 0.1| 0.0|
1179
+ | 1.0| 1.0|
1180
+ | 10.0| 2.0|
1181
+ | 100.0| 3.0|
1182
+ | 1000.0| 4.0|
1183
+ | 10000.0| 5.0|
1184
+ | 100000.0| 6.0|
1185
+ |1000000.0| 7.0|
1186
+ | null|null|
1187
+ +---------+----+
1188
+ <BLANKLINE>
1189
+ """
1190
+
1191
+ def inner(df: SparkDF, /) -> SparkDF:
1192
+ x = F.abs(num_col)
1193
+ return df.withColumn(
1194
+ new_col,
1195
+ F.when(x.isNull(), None).when(x >= 0.1, 1 + F.log10(x)).otherwise(0.0),
1196
+ )
1197
+
1198
+ return inner
1199
+
1200
+
1121
1201
  def with_endofweek_date(
1122
1202
  date_col: str,
1123
1203
  new_col: str,
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes