absfuyu 5.6.1__py3-none-any.whl → 6.1.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of absfuyu might be problematic. Click here for more details.
- absfuyu/__init__.py +5 -3
- absfuyu/__main__.py +2 -2
- absfuyu/cli/__init__.py +13 -2
- absfuyu/cli/audio_group.py +98 -0
- absfuyu/cli/color.py +2 -2
- absfuyu/cli/config_group.py +2 -2
- absfuyu/cli/do_group.py +2 -2
- absfuyu/cli/game_group.py +20 -2
- absfuyu/cli/tool_group.py +68 -4
- absfuyu/config/__init__.py +3 -3
- absfuyu/core/__init__.py +10 -6
- absfuyu/core/baseclass.py +104 -34
- absfuyu/core/baseclass2.py +43 -2
- absfuyu/core/decorator.py +2 -2
- absfuyu/core/docstring.py +4 -2
- absfuyu/core/dummy_cli.py +3 -3
- absfuyu/core/dummy_func.py +2 -2
- absfuyu/dxt/__init__.py +2 -2
- absfuyu/dxt/base_type.py +93 -0
- absfuyu/dxt/dictext.py +188 -6
- absfuyu/dxt/dxt_support.py +2 -2
- absfuyu/dxt/intext.py +72 -4
- absfuyu/dxt/listext.py +495 -23
- absfuyu/dxt/strext.py +2 -2
- absfuyu/extra/__init__.py +2 -2
- absfuyu/extra/audio/__init__.py +8 -0
- absfuyu/extra/audio/_util.py +57 -0
- absfuyu/extra/audio/convert.py +192 -0
- absfuyu/extra/audio/lossless.py +281 -0
- absfuyu/extra/beautiful.py +2 -2
- absfuyu/extra/da/__init__.py +39 -3
- absfuyu/extra/da/dadf.py +458 -29
- absfuyu/extra/da/dadf_base.py +2 -2
- absfuyu/extra/da/df_func.py +89 -5
- absfuyu/extra/da/mplt.py +2 -2
- absfuyu/extra/ggapi/__init__.py +8 -0
- absfuyu/extra/ggapi/gdrive.py +223 -0
- absfuyu/extra/ggapi/glicense.py +148 -0
- absfuyu/extra/ggapi/glicense_df.py +186 -0
- absfuyu/extra/ggapi/gsheet.py +88 -0
- absfuyu/extra/img/__init__.py +30 -0
- absfuyu/extra/img/converter.py +402 -0
- absfuyu/extra/img/dup_check.py +291 -0
- absfuyu/extra/pdf.py +4 -6
- absfuyu/extra/rclone.py +253 -0
- absfuyu/extra/xml.py +90 -0
- absfuyu/fun/__init__.py +2 -20
- absfuyu/fun/rubik.py +2 -2
- absfuyu/fun/tarot.py +2 -2
- absfuyu/game/__init__.py +2 -2
- absfuyu/game/game_stat.py +2 -2
- absfuyu/game/schulte.py +78 -0
- absfuyu/game/sudoku.py +2 -2
- absfuyu/game/tictactoe.py +2 -2
- absfuyu/game/wordle.py +6 -4
- absfuyu/general/__init__.py +2 -2
- absfuyu/general/content.py +2 -2
- absfuyu/general/human.py +2 -2
- absfuyu/general/resrel.py +213 -0
- absfuyu/general/shape.py +3 -8
- absfuyu/general/tax.py +344 -0
- absfuyu/logger.py +806 -59
- absfuyu/numbers/__init__.py +13 -0
- absfuyu/numbers/number_to_word.py +321 -0
- absfuyu/numbers/shorten_number.py +303 -0
- absfuyu/numbers/time_duration.py +217 -0
- absfuyu/pkg_data/__init__.py +2 -2
- absfuyu/pkg_data/deprecated.py +2 -2
- absfuyu/pkg_data/logo.py +1462 -0
- absfuyu/sort.py +4 -4
- absfuyu/tools/__init__.py +2 -2
- absfuyu/tools/checksum.py +119 -4
- absfuyu/tools/converter.py +2 -2
- absfuyu/tools/generator.py +24 -7
- absfuyu/tools/inspector.py +2 -2
- absfuyu/tools/keygen.py +2 -2
- absfuyu/tools/obfuscator.py +2 -2
- absfuyu/tools/passwordlib.py +2 -2
- absfuyu/tools/shutdownizer.py +3 -8
- absfuyu/tools/sw.py +213 -10
- absfuyu/tools/web.py +10 -13
- absfuyu/typings.py +5 -8
- absfuyu/util/__init__.py +31 -2
- absfuyu/util/api.py +7 -4
- absfuyu/util/cli.py +119 -0
- absfuyu/util/gui.py +91 -0
- absfuyu/util/json_method.py +2 -2
- absfuyu/util/lunar.py +2 -2
- absfuyu/util/package.py +124 -0
- absfuyu/util/path.py +313 -4
- absfuyu/util/performance.py +2 -2
- absfuyu/util/shorten_number.py +206 -13
- absfuyu/util/text_table.py +2 -2
- absfuyu/util/zipped.py +2 -2
- absfuyu/version.py +22 -19
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.3.dist-info}/METADATA +37 -8
- absfuyu-6.1.3.dist-info/RECORD +105 -0
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.3.dist-info}/WHEEL +1 -1
- absfuyu/extra/data_analysis.py +0 -21
- absfuyu-5.6.1.dist-info/RECORD +0 -79
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.3.dist-info}/entry_points.txt +0 -0
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.3.dist-info}/licenses/LICENSE +0 -0
absfuyu/extra/da/dadf.py
CHANGED
|
@@ -3,8 +3,8 @@ Absfuyu: Data Analysis
|
|
|
3
3
|
----------------------
|
|
4
4
|
Data Analyst DataFrame
|
|
5
5
|
|
|
6
|
-
Version:
|
|
7
|
-
Date updated: 12/
|
|
6
|
+
Version: 6.1.2
|
|
7
|
+
Date updated: 30/12/2025 (dd/mm/yyyy)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
# Module level
|
|
@@ -17,6 +17,7 @@ __all__ = [
|
|
|
17
17
|
"DataAnalystDataFrameNAMixin",
|
|
18
18
|
"DataAnalystDataFrameOtherMixin",
|
|
19
19
|
"DataAnalystDataFrameDateMixin",
|
|
20
|
+
"DataAnalystDataFrameExportMixin",
|
|
20
21
|
"DataAnalystDataFrameCityMixin",
|
|
21
22
|
]
|
|
22
23
|
|
|
@@ -25,24 +26,21 @@ __all__ = [
|
|
|
25
26
|
# ---------------------------------------------------------------------------
|
|
26
27
|
import random
|
|
27
28
|
import string
|
|
28
|
-
from collections.abc import Callable, Sequence
|
|
29
|
+
from collections.abc import Callable, Iterable, Mapping, Sequence
|
|
29
30
|
from datetime import datetime, timedelta
|
|
30
|
-
from typing import Any, Literal, Self
|
|
31
|
+
from typing import Any, Literal, Self, cast, override
|
|
31
32
|
|
|
32
33
|
import numpy as np
|
|
33
34
|
import pandas as pd
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
from typing import override # type: ignore
|
|
37
|
-
except ImportError:
|
|
38
|
-
from absfuyu.core.decorator import dummy_decorator as override
|
|
35
|
+
from xlsxwriter import Workbook
|
|
36
|
+
from xlsxwriter.worksheet import Worksheet
|
|
39
37
|
|
|
40
38
|
from absfuyu.core.baseclass import GetClassMembersMixin
|
|
41
|
-
from absfuyu.core.docstring import deprecated, versionadded
|
|
39
|
+
from absfuyu.core.docstring import deprecated, versionadded, versionchanged
|
|
40
|
+
from absfuyu.core.dummy_func import unidecode
|
|
42
41
|
from absfuyu.extra.da.dadf_base import CityData
|
|
43
42
|
from absfuyu.extra.da.dadf_base import DataAnalystDataFrameBase as DFBase
|
|
44
43
|
from absfuyu.extra.da.dadf_base import SplittedDF
|
|
45
|
-
from absfuyu.logger import logger
|
|
46
44
|
from absfuyu.typings import R as _R
|
|
47
45
|
from absfuyu.typings import T as _T
|
|
48
46
|
from absfuyu.util import set_min_max
|
|
@@ -59,6 +57,8 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
59
57
|
- Drop rightmost column
|
|
60
58
|
- Add blank column
|
|
61
59
|
- Split str column
|
|
60
|
+
- Get column name unidecoded
|
|
61
|
+
- Get column unidecoded
|
|
62
62
|
"""
|
|
63
63
|
|
|
64
64
|
def rearrange_rightmost_column(
|
|
@@ -135,8 +135,8 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
135
135
|
try:
|
|
136
136
|
self.drop(columns=[column], inplace=True)
|
|
137
137
|
except KeyError:
|
|
138
|
-
logger.debug(f"{column} column does not exist")
|
|
139
|
-
|
|
138
|
+
# logger.debug(f"{column} column does not exist")
|
|
139
|
+
pass
|
|
140
140
|
return self
|
|
141
141
|
|
|
142
142
|
def drop_rightmost(self, num_of_cols: int = 1) -> Self:
|
|
@@ -183,7 +183,9 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
183
183
|
@deprecated("5.1.0", reason="Use pd.DataFrame.assign(...) method instead")
|
|
184
184
|
def add_blank_column(self, column_name: str, fill: Any = np.nan, /) -> Self:
|
|
185
185
|
"""
|
|
186
|
-
Add a blank column
|
|
186
|
+
[DEPRECATED] Add a blank column.
|
|
187
|
+
|
|
188
|
+
E.g: Use `pd.DataFrame.assign(new_col=lambda x: x['old_col'])` instead
|
|
187
189
|
|
|
188
190
|
Parameters
|
|
189
191
|
----------
|
|
@@ -246,14 +248,75 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
246
248
|
"""
|
|
247
249
|
if n is None:
|
|
248
250
|
pass
|
|
249
|
-
splited_data: pd.DataFrame = self[col].str.split(
|
|
250
|
-
pat=pattern, n=n, expand=True, regex=regex
|
|
251
|
-
)
|
|
251
|
+
splited_data: pd.DataFrame = self[col].str.split(pat=pattern, n=n, expand=True, regex=regex) # type: ignore
|
|
252
252
|
num_of_splitted_cols = splited_data.shape[1]
|
|
253
253
|
new_col_names = [f"{col}_{x}" for x in range(num_of_splitted_cols)]
|
|
254
254
|
self[new_col_names] = splited_data
|
|
255
255
|
return self
|
|
256
256
|
|
|
257
|
+
@versionadded("5.12.0") # No test cases
|
|
258
|
+
def get_column_name_unidecoded(self, col_name: str, /, *, mode: Literal["start", "end", "in"] = "start") -> str:
|
|
259
|
+
"""
|
|
260
|
+
Get column name from lowercase unidecode'd version name
|
|
261
|
+
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
col_name : str
|
|
265
|
+
Column name to find
|
|
266
|
+
|
|
267
|
+
mode : Literal["start", "end", "in"], optional
|
|
268
|
+
Which mode to find, by default "start"
|
|
269
|
+
- "start": str.startswith()
|
|
270
|
+
- "end": str.endswith()
|
|
271
|
+
- "in": if x in y
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
str
|
|
276
|
+
Column name
|
|
277
|
+
|
|
278
|
+
Raises
|
|
279
|
+
------
|
|
280
|
+
ValueError
|
|
281
|
+
Column not found
|
|
282
|
+
"""
|
|
283
|
+
for x in self.columns.to_list():
|
|
284
|
+
col_name_mod = cast(str, unidecode(x.strip().lower()))
|
|
285
|
+
if mode == "start":
|
|
286
|
+
if col_name_mod.startswith(col_name):
|
|
287
|
+
return x
|
|
288
|
+
elif mode == "end":
|
|
289
|
+
if col_name_mod.endswith(col_name):
|
|
290
|
+
return x
|
|
291
|
+
elif mode == "in":
|
|
292
|
+
if col_name_mod in col_name:
|
|
293
|
+
return x
|
|
294
|
+
|
|
295
|
+
raise ValueError(f"Column not found: {col_name}")
|
|
296
|
+
|
|
297
|
+
@versionadded("5.12.0") # No test cases
|
|
298
|
+
def get_column_unidecoded(self, col_name: str, /, *, mode: Literal["start", "end", "in"] = "start") -> pd.Series:
|
|
299
|
+
"""
|
|
300
|
+
Get column from lowercase unidecode'd version column name
|
|
301
|
+
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
col_name : str
|
|
305
|
+
Column name to find
|
|
306
|
+
|
|
307
|
+
mode : Literal["start", "end", "in"], optional
|
|
308
|
+
Which mode to find, by default "start"
|
|
309
|
+
- "start": str.startswith()
|
|
310
|
+
- "end": str.endswith()
|
|
311
|
+
- "in": if x in y
|
|
312
|
+
|
|
313
|
+
Returns
|
|
314
|
+
-------
|
|
315
|
+
Series
|
|
316
|
+
Column data
|
|
317
|
+
"""
|
|
318
|
+
return self[self.get_column_name_unidecoded(col_name, mode=mode)]
|
|
319
|
+
|
|
257
320
|
|
|
258
321
|
# Row method
|
|
259
322
|
# ---------------------------------------------------------------------------
|
|
@@ -262,6 +325,7 @@ class DataAnalystDataFrameRowMethodMixin(DFBase):
|
|
|
262
325
|
Data Analyst ``pd.DataFrame`` - Row method
|
|
263
326
|
|
|
264
327
|
- Get different rows
|
|
328
|
+
- Add blank row
|
|
265
329
|
"""
|
|
266
330
|
|
|
267
331
|
@versionadded("4.0.0")
|
|
@@ -297,6 +361,190 @@ class DataAnalystDataFrameRowMethodMixin(DFBase):
|
|
|
297
361
|
)
|
|
298
362
|
return self.__class__(out)
|
|
299
363
|
|
|
364
|
+
@versionchanged("6.1.3", reason="Reverted back to original logic")
|
|
365
|
+
@versionchanged("6.0.0", reason="Improved logic")
|
|
366
|
+
@versionadded("5.7.0")
|
|
367
|
+
def add_blank_row(self, fill: Any = np.nan, /) -> Self:
|
|
368
|
+
"""
|
|
369
|
+
Add a new row to the end of a DataFrame.
|
|
370
|
+
|
|
371
|
+
Parameters
|
|
372
|
+
----------
|
|
373
|
+
fill : Any, default np.nan
|
|
374
|
+
Value to fill in the new row (e.g., np.nan, None, "", 0).
|
|
375
|
+
|
|
376
|
+
Returns
|
|
377
|
+
-------
|
|
378
|
+
Self
|
|
379
|
+
DataFrame with the new row appended.
|
|
380
|
+
"""
|
|
381
|
+
# Create a dict with all columns filled with fill
|
|
382
|
+
new_row = {col: fill for col in self.columns}
|
|
383
|
+
self.loc[len(self)] = new_row # type: ignore
|
|
384
|
+
return self
|
|
385
|
+
|
|
386
|
+
@versionadded("6.1.3")
|
|
387
|
+
def add_blank_row2(self, fill: Any = np.nan, /) -> Self:
|
|
388
|
+
"""
|
|
389
|
+
Add a new row to the end of a DataFrame.
|
|
390
|
+
(Improved version - but not working right now)
|
|
391
|
+
|
|
392
|
+
Parameters
|
|
393
|
+
----------
|
|
394
|
+
fill : Any, default np.nan
|
|
395
|
+
Value to fill in the new row (e.g., np.nan, None, "", 0).
|
|
396
|
+
|
|
397
|
+
Returns
|
|
398
|
+
-------
|
|
399
|
+
Self
|
|
400
|
+
DataFrame with the new row appended.
|
|
401
|
+
"""
|
|
402
|
+
# Create a dict with all columns filled with fill
|
|
403
|
+
new_row = {col: fill for col in self.columns}
|
|
404
|
+
safe_types = self._safe_dtypes(self.dtypes)
|
|
405
|
+
blank_row_df = pd.DataFrame([new_row], columns=self.columns).astype(safe_types)
|
|
406
|
+
|
|
407
|
+
# self.loc[len(self)] = new_row # type: ignore
|
|
408
|
+
# return self
|
|
409
|
+
out = cast(pd.DataFrame, pd.concat([self, blank_row_df], ignore_index=True))
|
|
410
|
+
return self.__class__(out)
|
|
411
|
+
|
|
412
|
+
@versionadded("6.0.0") # Support
|
|
413
|
+
def _safe_dtypes(self, dtypes: pd.Series) -> dict[str, Any]:
|
|
414
|
+
"""
|
|
415
|
+
Convert DataFrame dtypes into a safe mapping for operations involving
|
|
416
|
+
missing values (NA), especially during row insertion or concatenation.
|
|
417
|
+
|
|
418
|
+
This function is primarily used to prevent pandas errors when inserting
|
|
419
|
+
rows containing missing values (``NaN``) into columns with non-nullable
|
|
420
|
+
integer dtypes (e.g. ``int64``). Since standard NumPy integer dtypes do not
|
|
421
|
+
support missing values, they are converted to pandas' nullable integer
|
|
422
|
+
dtype (``Int64``).
|
|
423
|
+
|
|
424
|
+
All non-integer dtypes are preserved without modification.
|
|
425
|
+
|
|
426
|
+
- Pandas nullable integer dtypes (``Int64``, ``Int32``, etc.) allow missing
|
|
427
|
+
values via ``pd.NA``, unlike NumPy integer dtypes.
|
|
428
|
+
- This function is commonly used before calling ``DataFrame.astype`` to
|
|
429
|
+
avoid ``IntCastingNaNError`` when NA values are present.
|
|
430
|
+
- The function does **not** modify floating-point, boolean, datetime,
|
|
431
|
+
categorical, or object dtypes.
|
|
432
|
+
|
|
433
|
+
Parameters
|
|
434
|
+
----------
|
|
435
|
+
dtypes : Series
|
|
436
|
+
A Series mapping column names to their pandas dtypes, typically obtained
|
|
437
|
+
from ``DataFrame.dtypes``.
|
|
438
|
+
|
|
439
|
+
Returns
|
|
440
|
+
-------
|
|
441
|
+
dict
|
|
442
|
+
A dictionary mapping column names to safe dtypes. Integer dtypes are
|
|
443
|
+
converted to pandas nullable integer dtype (``"Int64"``), while all
|
|
444
|
+
other dtypes remain unchanged.
|
|
445
|
+
|
|
446
|
+
|
|
447
|
+
Example:
|
|
448
|
+
--------
|
|
449
|
+
Basic usage with a DataFrame::
|
|
450
|
+
|
|
451
|
+
>>> df.dtypes
|
|
452
|
+
id int64
|
|
453
|
+
name object
|
|
454
|
+
amount float64
|
|
455
|
+
dtype: object
|
|
456
|
+
|
|
457
|
+
>>> _safe_dtypes(df.dtypes)
|
|
458
|
+
{
|
|
459
|
+
"id": "Int64",
|
|
460
|
+
"name": dtype("O"),
|
|
461
|
+
"amount": dtype("float64"),
|
|
462
|
+
}
|
|
463
|
+
|
|
464
|
+
Typical integration with ``astype``::
|
|
465
|
+
|
|
466
|
+
>>> safe_types = _safe_dtypes(df.dtypes)
|
|
467
|
+
>>> new_df = df.astype(safe_types)
|
|
468
|
+
|
|
469
|
+
This is especially useful when inserting rows with missing values::
|
|
470
|
+
|
|
471
|
+
>>> sep_row = {"id": pd.NA, "name": "---", "amount": pd.NA}
|
|
472
|
+
>>> sep_df = pd.DataFrame([sep_row]).astype(_safe_dtypes(df.dtypes))
|
|
473
|
+
"""
|
|
474
|
+
out = {}
|
|
475
|
+
for col, dt in dtypes.items():
|
|
476
|
+
if pd.api.types.is_integer_dtype(dt):
|
|
477
|
+
out[col] = "Int64" # nullable integer
|
|
478
|
+
else:
|
|
479
|
+
out[col] = dt
|
|
480
|
+
return out
|
|
481
|
+
|
|
482
|
+
@versionadded("6.0.0") # Better version of add_blank_row()
|
|
483
|
+
def add_separator_row(
|
|
484
|
+
self,
|
|
485
|
+
group_cols: str | Iterable[str],
|
|
486
|
+
*,
|
|
487
|
+
separator: Mapping[str, object] | None = None,
|
|
488
|
+
drop_last: bool = True,
|
|
489
|
+
) -> Self:
|
|
490
|
+
"""
|
|
491
|
+
Insert a separator row after each group in a DataFrame.
|
|
492
|
+
|
|
493
|
+
Parameters
|
|
494
|
+
----------
|
|
495
|
+
df : pandas.DataFrame
|
|
496
|
+
Input DataFrame (must be pre-sorted by ``group_cols``).
|
|
497
|
+
|
|
498
|
+
group_cols : str | Iterable[str]
|
|
499
|
+
Column(s) used to define grouping boundaries.
|
|
500
|
+
|
|
501
|
+
separator : Mapping[str, object] | None, optional
|
|
502
|
+
Custom separator row values (e.g. {"col": "---"}).
|
|
503
|
+
Columns not provided will be filled with NaN.
|
|
504
|
+
If None, a fully blank row is inserted.
|
|
505
|
+
|
|
506
|
+
drop_last : bool, optional
|
|
507
|
+
If True, do not insert a separator after the last group.
|
|
508
|
+
|
|
509
|
+
Returns
|
|
510
|
+
-------
|
|
511
|
+
Self
|
|
512
|
+
DataFrame with separator rows inserted.
|
|
513
|
+
"""
|
|
514
|
+
df = self.copy()
|
|
515
|
+
|
|
516
|
+
if isinstance(group_cols, str):
|
|
517
|
+
group_cols = [group_cols]
|
|
518
|
+
|
|
519
|
+
# Validate columns
|
|
520
|
+
missing = set(group_cols) - set(df.columns)
|
|
521
|
+
if missing:
|
|
522
|
+
raise KeyError(f"Missing columns: {missing}")
|
|
523
|
+
|
|
524
|
+
# Build separator row template
|
|
525
|
+
if separator is None:
|
|
526
|
+
sep_row = {c: np.nan for c in df.columns}
|
|
527
|
+
else:
|
|
528
|
+
sep_row = {c: separator.get(c, np.nan) for c in df.columns}
|
|
529
|
+
|
|
530
|
+
rows = []
|
|
531
|
+
|
|
532
|
+
safe_types = self._safe_dtypes(df.dtypes)
|
|
533
|
+
|
|
534
|
+
# Group while preserving order
|
|
535
|
+
for _, g in df.groupby(group_cols, sort=False):
|
|
536
|
+
rows.append(g)
|
|
537
|
+
|
|
538
|
+
sep_df = pd.DataFrame([sep_row], columns=df.columns).astype(safe_types)
|
|
539
|
+
rows.append(sep_df)
|
|
540
|
+
|
|
541
|
+
out = cast(pd.DataFrame, pd.concat(rows, ignore_index=True))
|
|
542
|
+
|
|
543
|
+
if drop_last:
|
|
544
|
+
out = out.iloc[:-1].reset_index(drop=True)
|
|
545
|
+
|
|
546
|
+
return self.__class__(out)
|
|
547
|
+
|
|
300
548
|
|
|
301
549
|
# Info
|
|
302
550
|
# ---------------------------------------------------------------------------
|
|
@@ -342,7 +590,7 @@ class DataAnalystDataFrameInfoMixin(DFBase):
|
|
|
342
590
|
return info
|
|
343
591
|
|
|
344
592
|
@override
|
|
345
|
-
def describe(self, percentiles=None, include=None, exclude=None) -> Self:
|
|
593
|
+
def describe(self, percentiles=None, include=None, exclude=None) -> Self: # type: ignore
|
|
346
594
|
"""pd.DataFrame.describe() override"""
|
|
347
595
|
return self.__class__(super().describe(percentiles, include, exclude)) # type: ignore [no-any-return]
|
|
348
596
|
|
|
@@ -486,7 +734,7 @@ class DataAnalystDataFrameInfoMixin(DFBase):
|
|
|
486
734
|
if top is not None:
|
|
487
735
|
list_of_keep: list = (
|
|
488
736
|
col_df[destination_column]
|
|
489
|
-
.head(set_min_max(top - 1, min_value=1, max_value=col_df.shape[0]))
|
|
737
|
+
.head(set_min_max(top - 1, min_value=1, max_value=col_df.shape[0])) # type: ignore
|
|
490
738
|
.to_list()
|
|
491
739
|
)
|
|
492
740
|
# logger.debug(list_of_keep)
|
|
@@ -561,7 +809,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
561
809
|
except KeyError:
|
|
562
810
|
if getattr(self, "add_blank_column", None) is not None:
|
|
563
811
|
# Compatible with DataAnalystDataFrameColumnMethodMixin
|
|
564
|
-
self.add_blank_column(column_name, fill_when_not_exist)
|
|
812
|
+
self.add_blank_column(column_name, fill_when_not_exist) # type: ignore
|
|
565
813
|
return self
|
|
566
814
|
|
|
567
815
|
def get_missing_values(
|
|
@@ -679,7 +927,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
679
927
|
3 -1.435079 400 400 REPLACED ywahcasi 2024-05-20
|
|
680
928
|
4 0.118993 861 800 REPLACED saoupuby 2019-04-28
|
|
681
929
|
"""
|
|
682
|
-
self[col] = self[col].apply(lambda x: callable(x) if pd.notnull(x) else x)
|
|
930
|
+
self[col] = self[col].apply(lambda x: callable(x) if pd.notnull(x) else x) # type: ignore
|
|
683
931
|
return self
|
|
684
932
|
|
|
685
933
|
@versionadded("5.1.0") # type: ignore
|
|
@@ -750,7 +998,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
750
998
|
|
|
751
999
|
# Column name
|
|
752
1000
|
cname = "applied_row_null" if col_name is None else col_name
|
|
753
|
-
self[cname] = self.apply(apply_func, axis=1)
|
|
1001
|
+
self[cname] = self.apply(apply_func, axis=1) # type: ignore
|
|
754
1002
|
|
|
755
1003
|
return self
|
|
756
1004
|
|
|
@@ -825,7 +1073,7 @@ class DataAnalystDataFrameOtherMixin(DFBase):
|
|
|
825
1073
|
|
|
826
1074
|
if getattr(self, "drop_columns", None) is not None:
|
|
827
1075
|
# Compatible with DataAnalystDataFrameColumnMethodMixin
|
|
828
|
-
self.drop_columns(cols)
|
|
1076
|
+
self.drop_columns(cols) # type: ignore
|
|
829
1077
|
|
|
830
1078
|
out = self.merge(other, how="left", on=on)
|
|
831
1079
|
return self.__class__(out)
|
|
@@ -935,19 +1183,19 @@ class DataAnalystDataFrameDateMixin(DFBase):
|
|
|
935
1183
|
col_counter = 0
|
|
936
1184
|
# self["weekday"] = self["day"].dt.isocalendar().day # Weekday
|
|
937
1185
|
if mode.find("d") != -1:
|
|
938
|
-
logger.debug("Mode: 'day'")
|
|
1186
|
+
# logger.debug("Mode: 'day'")
|
|
939
1187
|
self["day"] = self["date"].dt.day
|
|
940
1188
|
col_counter += 1
|
|
941
1189
|
if mode.find("w") != -1:
|
|
942
|
-
logger.debug("Mode: 'weekday'")
|
|
1190
|
+
# logger.debug("Mode: 'weekday'")
|
|
943
1191
|
self["week"] = self["date"].dt.isocalendar().week
|
|
944
1192
|
col_counter += 1
|
|
945
1193
|
if mode.find("m") != -1:
|
|
946
|
-
logger.debug("Mode: 'month'")
|
|
1194
|
+
# logger.debug("Mode: 'month'")
|
|
947
1195
|
self["month"] = self["date"].dt.month
|
|
948
1196
|
col_counter += 1
|
|
949
1197
|
if mode.find("y") != -1:
|
|
950
|
-
logger.debug("Mode: 'year'")
|
|
1198
|
+
# logger.debug("Mode: 'year'")
|
|
951
1199
|
self["year"] = self["date"].dt.year
|
|
952
1200
|
col_counter += 1
|
|
953
1201
|
|
|
@@ -1017,6 +1265,172 @@ class DataAnalystDataFrameDateMixin(DFBase):
|
|
|
1017
1265
|
)
|
|
1018
1266
|
return self
|
|
1019
1267
|
|
|
1268
|
+
@versionadded("6.0.0")
|
|
1269
|
+
def normalize_datetime_column(
|
|
1270
|
+
self,
|
|
1271
|
+
col: str,
|
|
1272
|
+
*,
|
|
1273
|
+
inplace: bool = False,
|
|
1274
|
+
) -> Self:
|
|
1275
|
+
"""
|
|
1276
|
+
Normalize a datetime column by removing the time component.
|
|
1277
|
+
|
|
1278
|
+
This function converts the specified column to pandas datetime (``datetime64[ns]``)
|
|
1279
|
+
(if not already), then normalizes all values so that the time
|
|
1280
|
+
component is set to ``00:00:00``. The date component is preserved.
|
|
1281
|
+
|
|
1282
|
+
The function safely handles missing or invalid values by coercing
|
|
1283
|
+
them to ``NaT``.
|
|
1284
|
+
|
|
1285
|
+
Parameters
|
|
1286
|
+
----------
|
|
1287
|
+
col : str
|
|
1288
|
+
Name of the column to normalize. The column may contain
|
|
1289
|
+
datetime-like values, strings, or mixed types.
|
|
1290
|
+
|
|
1291
|
+
inplace : bool, default False
|
|
1292
|
+
| If ``True``, modify the input DataFrame in place.
|
|
1293
|
+
| If ``False``, operate on a copy and return the modified DataFrame.
|
|
1294
|
+
|
|
1295
|
+
Returns
|
|
1296
|
+
-------
|
|
1297
|
+
Self
|
|
1298
|
+
DataFrame with the normalized datetime column.
|
|
1299
|
+
|
|
1300
|
+
|
|
1301
|
+
Example:
|
|
1302
|
+
--------
|
|
1303
|
+
Basic usage::
|
|
1304
|
+
|
|
1305
|
+
>>> df = DADF({
|
|
1306
|
+
... "created_at": ["2024-01-01 10:15:30", "2024-01-02 23:59:59"]
|
|
1307
|
+
... })
|
|
1308
|
+
>>> normalize_datetime_column(df, "created_at")
|
|
1309
|
+
created_at
|
|
1310
|
+
0 2024-01-01 00:00:00
|
|
1311
|
+
1 2024-01-02 00:00:00
|
|
1312
|
+
|
|
1313
|
+
In-place modification::
|
|
1314
|
+
|
|
1315
|
+
>>> normalize_datetime_column(df, "created_at", inplace=True)
|
|
1316
|
+
|
|
1317
|
+
Handling invalid values::
|
|
1318
|
+
|
|
1319
|
+
>>> df = DADF({"dt": ["2024-01-01 10:00", "invalid"]})
|
|
1320
|
+
>>> normalize_datetime_column(df, "dt")
|
|
1321
|
+
dt
|
|
1322
|
+
0 2024-01-01 00:00:00
|
|
1323
|
+
1 NaT
|
|
1324
|
+
|
|
1325
|
+
"""
|
|
1326
|
+
if not inplace:
|
|
1327
|
+
df = self.copy()
|
|
1328
|
+
else:
|
|
1329
|
+
df = self
|
|
1330
|
+
|
|
1331
|
+
# Using ``df.loc[:, col]`` avoids ``SettingWithCopyWarning`` when the input DataFrame is a slice.
|
|
1332
|
+
df.loc[:, col] = pd.to_datetime(df[col], errors="coerce").dt.normalize()
|
|
1333
|
+
return df
|
|
1334
|
+
|
|
1335
|
+
|
|
1336
|
+
# Export
|
|
1337
|
+
# ---------------------------------------------------------------------------
|
|
1338
|
+
class DataAnalystDataFrameExportMixin(DFBase):
|
|
1339
|
+
"""
|
|
1340
|
+
Data Analyst ``pd.DataFrame`` - Export method
|
|
1341
|
+
|
|
1342
|
+
- da_export
|
|
1343
|
+
"""
|
|
1344
|
+
|
|
1345
|
+
@versionchanged("5.8.0", "New parameter")
|
|
1346
|
+
def da_export(
|
|
1347
|
+
self,
|
|
1348
|
+
path: str,
|
|
1349
|
+
sheet_name: str = "Sheet1",
|
|
1350
|
+
*,
|
|
1351
|
+
auto_width: bool = True,
|
|
1352
|
+
cols_contain_centered_text: list[str] | None = None,
|
|
1353
|
+
cols_contain_number: list[str] | None = None,
|
|
1354
|
+
cols_contain_percentage: list[str] | None = None,
|
|
1355
|
+
) -> None:
|
|
1356
|
+
"""
|
|
1357
|
+
Export DataFrame with `xlsxwriter` engine
|
|
1358
|
+
|
|
1359
|
+
Parameters
|
|
1360
|
+
----------
|
|
1361
|
+
path : Path | str
|
|
1362
|
+
Path to export
|
|
1363
|
+
|
|
1364
|
+
sheet_name : str, optional
|
|
1365
|
+
Sheet name, by default "Sheet1"
|
|
1366
|
+
|
|
1367
|
+
auto_width : bool, optional
|
|
1368
|
+
Auto resize column width, by default ``True``
|
|
1369
|
+
|
|
1370
|
+
cols_contain_centered_text : list[str] | None, optional
|
|
1371
|
+
Columns that contain centered text (Align center), by default None
|
|
1372
|
+
|
|
1373
|
+
cols_contain_number : list[str] | None, optional
|
|
1374
|
+
Columns that contain number value (to format as number - int), by default None
|
|
1375
|
+
|
|
1376
|
+
cols_contain_percentage : list[str] | None, optional
|
|
1377
|
+
Columns that contain percentage value (to format as percentage), by default None
|
|
1378
|
+
"""
|
|
1379
|
+
|
|
1380
|
+
# Using xlsxwriter engine
|
|
1381
|
+
with pd.ExcelWriter(path, engine="xlsxwriter") as writer:
|
|
1382
|
+
self.to_excel(writer, sheet_name=sheet_name, index=False, float_format="%.2f", na_rep="")
|
|
1383
|
+
|
|
1384
|
+
# Format style
|
|
1385
|
+
workbook: Workbook = writer.book # type: ignore
|
|
1386
|
+
header_fmt = workbook.add_format(
|
|
1387
|
+
{
|
|
1388
|
+
"bold": True,
|
|
1389
|
+
"text_wrap": True,
|
|
1390
|
+
"border": 1,
|
|
1391
|
+
"align": "center",
|
|
1392
|
+
"valign": "vcenter",
|
|
1393
|
+
# "bg_color": "#A0BEFD",
|
|
1394
|
+
}
|
|
1395
|
+
)
|
|
1396
|
+
number_fmt = workbook.add_format(
|
|
1397
|
+
{"num_format": "#,##0", "align": "center", "valign": "vcenter"}
|
|
1398
|
+
) # 1,000,000
|
|
1399
|
+
percent_fmt = workbook.add_format({"num_format": "0.00%", "align": "center", "valign": "vcenter"}) # 1.00%
|
|
1400
|
+
text_fmt = workbook.add_format({"valign": "vcenter"})
|
|
1401
|
+
text_center_fmt = workbook.add_format({"align": "center", "valign": "vcenter"})
|
|
1402
|
+
|
|
1403
|
+
# Format sheet
|
|
1404
|
+
worksheet: Worksheet = writer.sheets[sheet_name]
|
|
1405
|
+
|
|
1406
|
+
# Format header - First row
|
|
1407
|
+
for col_num, value in enumerate(self.columns.values):
|
|
1408
|
+
worksheet.write(0, col_num, value, header_fmt)
|
|
1409
|
+
|
|
1410
|
+
rules = [
|
|
1411
|
+
(cols_contain_number, number_fmt),
|
|
1412
|
+
(cols_contain_percentage, percent_fmt),
|
|
1413
|
+
(cols_contain_centered_text, text_center_fmt),
|
|
1414
|
+
]
|
|
1415
|
+
|
|
1416
|
+
# Auto width + col format
|
|
1417
|
+
for i, col in enumerate(self.columns):
|
|
1418
|
+
# Max str len of each column
|
|
1419
|
+
max_len = None if auto_width is None else max(self[col].astype(str).map(len).max(), len(col)) + 2
|
|
1420
|
+
worksheet.set_column(i, i, max_len) # Set width
|
|
1421
|
+
|
|
1422
|
+
# Format style
|
|
1423
|
+
fmt = text_fmt # default
|
|
1424
|
+
for cols, f in rules:
|
|
1425
|
+
if cols is not None and col in cols:
|
|
1426
|
+
fmt = f
|
|
1427
|
+
break
|
|
1428
|
+
worksheet.set_column(i, i, max_len, fmt)
|
|
1429
|
+
|
|
1430
|
+
# if cols_contain_number is not None:
|
|
1431
|
+
# for x in cols_contain_number:
|
|
1432
|
+
# self[x] = pd.to_numeric(self[x], errors="coerce")
|
|
1433
|
+
|
|
1020
1434
|
|
|
1021
1435
|
# City
|
|
1022
1436
|
# ---------------------------------------------------------------------------
|
|
@@ -1068,13 +1482,13 @@ class DataAnalystDataFrameCityMixin(DFBase):
|
|
|
1068
1482
|
# Convert
|
|
1069
1483
|
col_counter = 0
|
|
1070
1484
|
if mode.find("r") != -1:
|
|
1071
|
-
logger.debug("Mode: 'region'")
|
|
1485
|
+
# logger.debug("Mode: 'region'")
|
|
1072
1486
|
self["region"] = self[city_column].apply(
|
|
1073
1487
|
lambda x: _convert_city_support(x).region
|
|
1074
1488
|
)
|
|
1075
1489
|
col_counter += 1
|
|
1076
1490
|
if mode.find("a") != -1:
|
|
1077
|
-
logger.debug("Mode: 'area'")
|
|
1491
|
+
# logger.debug("Mode: 'area'")
|
|
1078
1492
|
self["area"] = self[city_column].apply(
|
|
1079
1493
|
lambda x: _convert_city_support(x).area
|
|
1080
1494
|
)
|
|
@@ -1091,6 +1505,7 @@ class DataAnalystDataFrameCityMixin(DFBase):
|
|
|
1091
1505
|
class DADF(
|
|
1092
1506
|
GetClassMembersMixin,
|
|
1093
1507
|
DataAnalystDataFrameCityMixin,
|
|
1508
|
+
DataAnalystDataFrameExportMixin,
|
|
1094
1509
|
DataAnalystDataFrameDateMixin,
|
|
1095
1510
|
DataAnalystDataFrameOtherMixin,
|
|
1096
1511
|
DataAnalystDataFrameNAMixin,
|
|
@@ -1191,3 +1606,17 @@ class DADF_WIP(DADF):
|
|
|
1191
1606
|
"""
|
|
1192
1607
|
|
|
1193
1608
|
pass
|
|
1609
|
+
|
|
1610
|
+
if __name__ == "__main__":
|
|
1611
|
+
from pathlib import Path
|
|
1612
|
+
|
|
1613
|
+
# t = DADF.sample_df().show_distribution("number_range", show_percentage=False)
|
|
1614
|
+
# t.da_export(
|
|
1615
|
+
# Path(__file__).parent.joinpath("a.xlsx").resolve().__str__(),
|
|
1616
|
+
# cols_contain_number=["number_range"],
|
|
1617
|
+
# cols_contain_percentage=["percentage"],
|
|
1618
|
+
# )
|
|
1619
|
+
# print(t)
|
|
1620
|
+
|
|
1621
|
+
df = DADF.sample_df(10)
|
|
1622
|
+
print(df.add_blank_row())
|
absfuyu/extra/da/dadf_base.py
CHANGED