absfuyu 5.6.1__py3-none-any.whl → 6.1.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of absfuyu might be problematic. Click here for more details.
- absfuyu/__init__.py +5 -3
- absfuyu/__main__.py +2 -2
- absfuyu/cli/__init__.py +13 -2
- absfuyu/cli/audio_group.py +98 -0
- absfuyu/cli/color.py +2 -2
- absfuyu/cli/config_group.py +2 -2
- absfuyu/cli/do_group.py +2 -2
- absfuyu/cli/game_group.py +20 -2
- absfuyu/cli/tool_group.py +68 -4
- absfuyu/config/__init__.py +3 -3
- absfuyu/core/__init__.py +10 -6
- absfuyu/core/baseclass.py +104 -34
- absfuyu/core/baseclass2.py +43 -2
- absfuyu/core/decorator.py +2 -2
- absfuyu/core/docstring.py +4 -2
- absfuyu/core/dummy_cli.py +3 -3
- absfuyu/core/dummy_func.py +2 -2
- absfuyu/dxt/__init__.py +2 -2
- absfuyu/dxt/base_type.py +93 -0
- absfuyu/dxt/dictext.py +188 -6
- absfuyu/dxt/dxt_support.py +2 -2
- absfuyu/dxt/intext.py +72 -4
- absfuyu/dxt/listext.py +495 -23
- absfuyu/dxt/strext.py +2 -2
- absfuyu/extra/__init__.py +2 -2
- absfuyu/extra/audio/__init__.py +8 -0
- absfuyu/extra/audio/_util.py +57 -0
- absfuyu/extra/audio/convert.py +192 -0
- absfuyu/extra/audio/lossless.py +281 -0
- absfuyu/extra/beautiful.py +2 -2
- absfuyu/extra/da/__init__.py +39 -3
- absfuyu/extra/da/dadf.py +436 -29
- absfuyu/extra/da/dadf_base.py +2 -2
- absfuyu/extra/da/df_func.py +89 -5
- absfuyu/extra/da/mplt.py +2 -2
- absfuyu/extra/ggapi/__init__.py +8 -0
- absfuyu/extra/ggapi/gdrive.py +223 -0
- absfuyu/extra/ggapi/glicense.py +148 -0
- absfuyu/extra/ggapi/glicense_df.py +186 -0
- absfuyu/extra/ggapi/gsheet.py +88 -0
- absfuyu/extra/img/__init__.py +30 -0
- absfuyu/extra/img/converter.py +402 -0
- absfuyu/extra/img/dup_check.py +291 -0
- absfuyu/extra/pdf.py +4 -6
- absfuyu/extra/rclone.py +253 -0
- absfuyu/extra/xml.py +90 -0
- absfuyu/fun/__init__.py +2 -20
- absfuyu/fun/rubik.py +2 -2
- absfuyu/fun/tarot.py +2 -2
- absfuyu/game/__init__.py +2 -2
- absfuyu/game/game_stat.py +2 -2
- absfuyu/game/schulte.py +78 -0
- absfuyu/game/sudoku.py +2 -2
- absfuyu/game/tictactoe.py +2 -2
- absfuyu/game/wordle.py +6 -4
- absfuyu/general/__init__.py +2 -2
- absfuyu/general/content.py +2 -2
- absfuyu/general/human.py +2 -2
- absfuyu/general/resrel.py +213 -0
- absfuyu/general/shape.py +3 -8
- absfuyu/general/tax.py +344 -0
- absfuyu/logger.py +806 -59
- absfuyu/numbers/__init__.py +13 -0
- absfuyu/numbers/number_to_word.py +321 -0
- absfuyu/numbers/shorten_number.py +303 -0
- absfuyu/numbers/time_duration.py +217 -0
- absfuyu/pkg_data/__init__.py +2 -2
- absfuyu/pkg_data/deprecated.py +2 -2
- absfuyu/pkg_data/logo.py +1462 -0
- absfuyu/sort.py +4 -4
- absfuyu/tools/__init__.py +2 -2
- absfuyu/tools/checksum.py +119 -4
- absfuyu/tools/converter.py +2 -2
- absfuyu/tools/generator.py +24 -7
- absfuyu/tools/inspector.py +2 -2
- absfuyu/tools/keygen.py +2 -2
- absfuyu/tools/obfuscator.py +2 -2
- absfuyu/tools/passwordlib.py +2 -2
- absfuyu/tools/shutdownizer.py +3 -8
- absfuyu/tools/sw.py +213 -10
- absfuyu/tools/web.py +10 -13
- absfuyu/typings.py +5 -8
- absfuyu/util/__init__.py +31 -2
- absfuyu/util/api.py +7 -4
- absfuyu/util/cli.py +119 -0
- absfuyu/util/gui.py +91 -0
- absfuyu/util/json_method.py +2 -2
- absfuyu/util/lunar.py +2 -2
- absfuyu/util/package.py +124 -0
- absfuyu/util/path.py +313 -4
- absfuyu/util/performance.py +2 -2
- absfuyu/util/shorten_number.py +206 -13
- absfuyu/util/text_table.py +2 -2
- absfuyu/util/zipped.py +2 -2
- absfuyu/version.py +22 -19
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.2.dist-info}/METADATA +37 -8
- absfuyu-6.1.2.dist-info/RECORD +105 -0
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.2.dist-info}/WHEEL +1 -1
- absfuyu/extra/data_analysis.py +0 -21
- absfuyu-5.6.1.dist-info/RECORD +0 -79
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.2.dist-info}/entry_points.txt +0 -0
- {absfuyu-5.6.1.dist-info → absfuyu-6.1.2.dist-info}/licenses/LICENSE +0 -0
absfuyu/extra/da/dadf.py
CHANGED
|
@@ -3,8 +3,8 @@ Absfuyu: Data Analysis
|
|
|
3
3
|
----------------------
|
|
4
4
|
Data Analyst DataFrame
|
|
5
5
|
|
|
6
|
-
Version:
|
|
7
|
-
Date updated: 12/
|
|
6
|
+
Version: 6.1.1
|
|
7
|
+
Date updated: 30/12/2025 (dd/mm/yyyy)
|
|
8
8
|
"""
|
|
9
9
|
|
|
10
10
|
# Module level
|
|
@@ -17,6 +17,7 @@ __all__ = [
|
|
|
17
17
|
"DataAnalystDataFrameNAMixin",
|
|
18
18
|
"DataAnalystDataFrameOtherMixin",
|
|
19
19
|
"DataAnalystDataFrameDateMixin",
|
|
20
|
+
"DataAnalystDataFrameExportMixin",
|
|
20
21
|
"DataAnalystDataFrameCityMixin",
|
|
21
22
|
]
|
|
22
23
|
|
|
@@ -25,24 +26,21 @@ __all__ = [
|
|
|
25
26
|
# ---------------------------------------------------------------------------
|
|
26
27
|
import random
|
|
27
28
|
import string
|
|
28
|
-
from collections.abc import Callable, Sequence
|
|
29
|
+
from collections.abc import Callable, Iterable, Mapping, Sequence
|
|
29
30
|
from datetime import datetime, timedelta
|
|
30
|
-
from typing import Any, Literal, Self
|
|
31
|
+
from typing import Any, Literal, Self, cast, override
|
|
31
32
|
|
|
32
33
|
import numpy as np
|
|
33
34
|
import pandas as pd
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
from typing import override # type: ignore
|
|
37
|
-
except ImportError:
|
|
38
|
-
from absfuyu.core.decorator import dummy_decorator as override
|
|
35
|
+
from xlsxwriter import Workbook
|
|
36
|
+
from xlsxwriter.worksheet import Worksheet
|
|
39
37
|
|
|
40
38
|
from absfuyu.core.baseclass import GetClassMembersMixin
|
|
41
|
-
from absfuyu.core.docstring import deprecated, versionadded
|
|
39
|
+
from absfuyu.core.docstring import deprecated, versionadded, versionchanged
|
|
40
|
+
from absfuyu.core.dummy_func import unidecode
|
|
42
41
|
from absfuyu.extra.da.dadf_base import CityData
|
|
43
42
|
from absfuyu.extra.da.dadf_base import DataAnalystDataFrameBase as DFBase
|
|
44
43
|
from absfuyu.extra.da.dadf_base import SplittedDF
|
|
45
|
-
from absfuyu.logger import logger
|
|
46
44
|
from absfuyu.typings import R as _R
|
|
47
45
|
from absfuyu.typings import T as _T
|
|
48
46
|
from absfuyu.util import set_min_max
|
|
@@ -59,6 +57,8 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
59
57
|
- Drop rightmost column
|
|
60
58
|
- Add blank column
|
|
61
59
|
- Split str column
|
|
60
|
+
- Get column name unidecoded
|
|
61
|
+
- Get column unidecoded
|
|
62
62
|
"""
|
|
63
63
|
|
|
64
64
|
def rearrange_rightmost_column(
|
|
@@ -135,8 +135,8 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
135
135
|
try:
|
|
136
136
|
self.drop(columns=[column], inplace=True)
|
|
137
137
|
except KeyError:
|
|
138
|
-
logger.debug(f"{column} column does not exist")
|
|
139
|
-
|
|
138
|
+
# logger.debug(f"{column} column does not exist")
|
|
139
|
+
pass
|
|
140
140
|
return self
|
|
141
141
|
|
|
142
142
|
def drop_rightmost(self, num_of_cols: int = 1) -> Self:
|
|
@@ -183,7 +183,9 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
183
183
|
@deprecated("5.1.0", reason="Use pd.DataFrame.assign(...) method instead")
|
|
184
184
|
def add_blank_column(self, column_name: str, fill: Any = np.nan, /) -> Self:
|
|
185
185
|
"""
|
|
186
|
-
Add a blank column
|
|
186
|
+
[DEPRECATED] Add a blank column.
|
|
187
|
+
|
|
188
|
+
E.g: Use `pd.DataFrame.assign(new_col=lambda x: x['old_col'])` instead
|
|
187
189
|
|
|
188
190
|
Parameters
|
|
189
191
|
----------
|
|
@@ -246,14 +248,75 @@ class DataAnalystDataFrameColumnMethodMixin(DFBase):
|
|
|
246
248
|
"""
|
|
247
249
|
if n is None:
|
|
248
250
|
pass
|
|
249
|
-
splited_data: pd.DataFrame = self[col].str.split(
|
|
250
|
-
pat=pattern, n=n, expand=True, regex=regex
|
|
251
|
-
)
|
|
251
|
+
splited_data: pd.DataFrame = self[col].str.split(pat=pattern, n=n, expand=True, regex=regex) # type: ignore
|
|
252
252
|
num_of_splitted_cols = splited_data.shape[1]
|
|
253
253
|
new_col_names = [f"{col}_{x}" for x in range(num_of_splitted_cols)]
|
|
254
254
|
self[new_col_names] = splited_data
|
|
255
255
|
return self
|
|
256
256
|
|
|
257
|
+
@versionadded("5.12.0") # No test cases
|
|
258
|
+
def get_column_name_unidecoded(self, col_name: str, /, *, mode: Literal["start", "end", "in"] = "start") -> str:
|
|
259
|
+
"""
|
|
260
|
+
Get column name from lowercase unidecode'd version name
|
|
261
|
+
|
|
262
|
+
Parameters
|
|
263
|
+
----------
|
|
264
|
+
col_name : str
|
|
265
|
+
Column name to find
|
|
266
|
+
|
|
267
|
+
mode : Literal["start", "end", "in"], optional
|
|
268
|
+
Which mode to find, by default "start"
|
|
269
|
+
- "start": str.startswith()
|
|
270
|
+
- "end": str.endswith()
|
|
271
|
+
- "in": if x in y
|
|
272
|
+
|
|
273
|
+
Returns
|
|
274
|
+
-------
|
|
275
|
+
str
|
|
276
|
+
Column name
|
|
277
|
+
|
|
278
|
+
Raises
|
|
279
|
+
------
|
|
280
|
+
ValueError
|
|
281
|
+
Column not found
|
|
282
|
+
"""
|
|
283
|
+
for x in self.columns.to_list():
|
|
284
|
+
col_name_mod = cast(str, unidecode(x.strip().lower()))
|
|
285
|
+
if mode == "start":
|
|
286
|
+
if col_name_mod.startswith(col_name):
|
|
287
|
+
return x
|
|
288
|
+
elif mode == "end":
|
|
289
|
+
if col_name_mod.endswith(col_name):
|
|
290
|
+
return x
|
|
291
|
+
elif mode == "in":
|
|
292
|
+
if col_name_mod in col_name:
|
|
293
|
+
return x
|
|
294
|
+
|
|
295
|
+
raise ValueError(f"Column not found: {col_name}")
|
|
296
|
+
|
|
297
|
+
@versionadded("5.12.0") # No test cases
|
|
298
|
+
def get_column_unidecoded(self, col_name: str, /, *, mode: Literal["start", "end", "in"] = "start") -> pd.Series:
|
|
299
|
+
"""
|
|
300
|
+
Get column from lowercase unidecode'd version column name
|
|
301
|
+
|
|
302
|
+
Parameters
|
|
303
|
+
----------
|
|
304
|
+
col_name : str
|
|
305
|
+
Column name to find
|
|
306
|
+
|
|
307
|
+
mode : Literal["start", "end", "in"], optional
|
|
308
|
+
Which mode to find, by default "start"
|
|
309
|
+
- "start": str.startswith()
|
|
310
|
+
- "end": str.endswith()
|
|
311
|
+
- "in": if x in y
|
|
312
|
+
|
|
313
|
+
Returns
|
|
314
|
+
-------
|
|
315
|
+
Series
|
|
316
|
+
Column data
|
|
317
|
+
"""
|
|
318
|
+
return self[self.get_column_name_unidecoded(col_name, mode=mode)]
|
|
319
|
+
|
|
257
320
|
|
|
258
321
|
# Row method
|
|
259
322
|
# ---------------------------------------------------------------------------
|
|
@@ -262,6 +325,7 @@ class DataAnalystDataFrameRowMethodMixin(DFBase):
|
|
|
262
325
|
Data Analyst ``pd.DataFrame`` - Row method
|
|
263
326
|
|
|
264
327
|
- Get different rows
|
|
328
|
+
- Add blank row
|
|
265
329
|
"""
|
|
266
330
|
|
|
267
331
|
@versionadded("4.0.0")
|
|
@@ -297,6 +361,168 @@ class DataAnalystDataFrameRowMethodMixin(DFBase):
|
|
|
297
361
|
)
|
|
298
362
|
return self.__class__(out)
|
|
299
363
|
|
|
364
|
+
@versionchanged("6.0.0", reason="Improved logic")
|
|
365
|
+
@versionadded("5.7.0")
|
|
366
|
+
def add_blank_row(self, fill: Any = np.nan, /) -> Self:
|
|
367
|
+
"""
|
|
368
|
+
Add a new row to the end of a DataFrame.
|
|
369
|
+
|
|
370
|
+
Parameters
|
|
371
|
+
----------
|
|
372
|
+
fill : Any, default np.nan
|
|
373
|
+
Value to fill in the new row (e.g., np.nan, None, "", 0).
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
Self
|
|
378
|
+
DataFrame with the new row appended.
|
|
379
|
+
"""
|
|
380
|
+
# Create a dict with all columns filled with fill
|
|
381
|
+
new_row = {col: fill for col in self.columns}
|
|
382
|
+
safe_types = self._safe_dtypes(self.dtypes)
|
|
383
|
+
blank_row_df = pd.DataFrame([new_row], columns=self.columns).astype(safe_types)
|
|
384
|
+
|
|
385
|
+
# self.loc[len(self)] = new_row # type: ignore
|
|
386
|
+
# return self
|
|
387
|
+
out = cast(pd.DataFrame, pd.concat([self, blank_row_df], ignore_index=True))
|
|
388
|
+
return self.__class__(out)
|
|
389
|
+
|
|
390
|
+
@versionadded("6.0.0") # Support
|
|
391
|
+
def _safe_dtypes(self, dtypes: pd.Series) -> dict[str, Any]:
|
|
392
|
+
"""
|
|
393
|
+
Convert DataFrame dtypes into a safe mapping for operations involving
|
|
394
|
+
missing values (NA), especially during row insertion or concatenation.
|
|
395
|
+
|
|
396
|
+
This function is primarily used to prevent pandas errors when inserting
|
|
397
|
+
rows containing missing values (``NaN``) into columns with non-nullable
|
|
398
|
+
integer dtypes (e.g. ``int64``). Since standard NumPy integer dtypes do not
|
|
399
|
+
support missing values, they are converted to pandas' nullable integer
|
|
400
|
+
dtype (``Int64``).
|
|
401
|
+
|
|
402
|
+
All non-integer dtypes are preserved without modification.
|
|
403
|
+
|
|
404
|
+
- Pandas nullable integer dtypes (``Int64``, ``Int32``, etc.) allow missing
|
|
405
|
+
values via ``pd.NA``, unlike NumPy integer dtypes.
|
|
406
|
+
- This function is commonly used before calling ``DataFrame.astype`` to
|
|
407
|
+
avoid ``IntCastingNaNError`` when NA values are present.
|
|
408
|
+
- The function does **not** modify floating-point, boolean, datetime,
|
|
409
|
+
categorical, or object dtypes.
|
|
410
|
+
|
|
411
|
+
Parameters
|
|
412
|
+
----------
|
|
413
|
+
dtypes : Series
|
|
414
|
+
A Series mapping column names to their pandas dtypes, typically obtained
|
|
415
|
+
from ``DataFrame.dtypes``.
|
|
416
|
+
|
|
417
|
+
Returns
|
|
418
|
+
-------
|
|
419
|
+
dict
|
|
420
|
+
A dictionary mapping column names to safe dtypes. Integer dtypes are
|
|
421
|
+
converted to pandas nullable integer dtype (``"Int64"``), while all
|
|
422
|
+
other dtypes remain unchanged.
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
Example:
|
|
426
|
+
--------
|
|
427
|
+
Basic usage with a DataFrame::
|
|
428
|
+
|
|
429
|
+
>>> df.dtypes
|
|
430
|
+
id int64
|
|
431
|
+
name object
|
|
432
|
+
amount float64
|
|
433
|
+
dtype: object
|
|
434
|
+
|
|
435
|
+
>>> _safe_dtypes(df.dtypes)
|
|
436
|
+
{
|
|
437
|
+
"id": "Int64",
|
|
438
|
+
"name": dtype("O"),
|
|
439
|
+
"amount": dtype("float64"),
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
Typical integration with ``astype``::
|
|
443
|
+
|
|
444
|
+
>>> safe_types = _safe_dtypes(df.dtypes)
|
|
445
|
+
>>> new_df = df.astype(safe_types)
|
|
446
|
+
|
|
447
|
+
This is especially useful when inserting rows with missing values::
|
|
448
|
+
|
|
449
|
+
>>> sep_row = {"id": pd.NA, "name": "---", "amount": pd.NA}
|
|
450
|
+
>>> sep_df = pd.DataFrame([sep_row]).astype(_safe_dtypes(df.dtypes))
|
|
451
|
+
"""
|
|
452
|
+
out = {}
|
|
453
|
+
for col, dt in dtypes.items():
|
|
454
|
+
if pd.api.types.is_integer_dtype(dt):
|
|
455
|
+
out[col] = "Int64" # nullable integer
|
|
456
|
+
else:
|
|
457
|
+
out[col] = dt
|
|
458
|
+
return out
|
|
459
|
+
|
|
460
|
+
@versionadded("6.0.0") # Better version of add_blank_row()
|
|
461
|
+
def add_separator_row(
|
|
462
|
+
self,
|
|
463
|
+
group_cols: str | Iterable[str],
|
|
464
|
+
*,
|
|
465
|
+
separator: Mapping[str, object] | None = None,
|
|
466
|
+
drop_last: bool = True,
|
|
467
|
+
) -> Self:
|
|
468
|
+
"""
|
|
469
|
+
Insert a separator row after each group in a DataFrame.
|
|
470
|
+
|
|
471
|
+
Parameters
|
|
472
|
+
----------
|
|
473
|
+
df : pandas.DataFrame
|
|
474
|
+
Input DataFrame (must be pre-sorted by ``group_cols``).
|
|
475
|
+
|
|
476
|
+
group_cols : str | Iterable[str]
|
|
477
|
+
Column(s) used to define grouping boundaries.
|
|
478
|
+
|
|
479
|
+
separator : Mapping[str, object] | None, optional
|
|
480
|
+
Custom separator row values (e.g. {"col": "---"}).
|
|
481
|
+
Columns not provided will be filled with NaN.
|
|
482
|
+
If None, a fully blank row is inserted.
|
|
483
|
+
|
|
484
|
+
drop_last : bool, optional
|
|
485
|
+
If True, do not insert a separator after the last group.
|
|
486
|
+
|
|
487
|
+
Returns
|
|
488
|
+
-------
|
|
489
|
+
Self
|
|
490
|
+
DataFrame with separator rows inserted.
|
|
491
|
+
"""
|
|
492
|
+
df = self.copy()
|
|
493
|
+
|
|
494
|
+
if isinstance(group_cols, str):
|
|
495
|
+
group_cols = [group_cols]
|
|
496
|
+
|
|
497
|
+
# Validate columns
|
|
498
|
+
missing = set(group_cols) - set(df.columns)
|
|
499
|
+
if missing:
|
|
500
|
+
raise KeyError(f"Missing columns: {missing}")
|
|
501
|
+
|
|
502
|
+
# Build separator row template
|
|
503
|
+
if separator is None:
|
|
504
|
+
sep_row = {c: np.nan for c in df.columns}
|
|
505
|
+
else:
|
|
506
|
+
sep_row = {c: separator.get(c, np.nan) for c in df.columns}
|
|
507
|
+
|
|
508
|
+
rows = []
|
|
509
|
+
|
|
510
|
+
safe_types = self._safe_dtypes(df.dtypes)
|
|
511
|
+
|
|
512
|
+
# Group while preserving order
|
|
513
|
+
for _, g in df.groupby(group_cols, sort=False):
|
|
514
|
+
rows.append(g)
|
|
515
|
+
|
|
516
|
+
sep_df = pd.DataFrame([sep_row], columns=df.columns).astype(safe_types)
|
|
517
|
+
rows.append(sep_df)
|
|
518
|
+
|
|
519
|
+
out = cast(pd.DataFrame, pd.concat(rows, ignore_index=True))
|
|
520
|
+
|
|
521
|
+
if drop_last:
|
|
522
|
+
out = out.iloc[:-1].reset_index(drop=True)
|
|
523
|
+
|
|
524
|
+
return self.__class__(out)
|
|
525
|
+
|
|
300
526
|
|
|
301
527
|
# Info
|
|
302
528
|
# ---------------------------------------------------------------------------
|
|
@@ -342,7 +568,7 @@ class DataAnalystDataFrameInfoMixin(DFBase):
|
|
|
342
568
|
return info
|
|
343
569
|
|
|
344
570
|
@override
|
|
345
|
-
def describe(self, percentiles=None, include=None, exclude=None) -> Self:
|
|
571
|
+
def describe(self, percentiles=None, include=None, exclude=None) -> Self: # type: ignore
|
|
346
572
|
"""pd.DataFrame.describe() override"""
|
|
347
573
|
return self.__class__(super().describe(percentiles, include, exclude)) # type: ignore [no-any-return]
|
|
348
574
|
|
|
@@ -486,7 +712,7 @@ class DataAnalystDataFrameInfoMixin(DFBase):
|
|
|
486
712
|
if top is not None:
|
|
487
713
|
list_of_keep: list = (
|
|
488
714
|
col_df[destination_column]
|
|
489
|
-
.head(set_min_max(top - 1, min_value=1, max_value=col_df.shape[0]))
|
|
715
|
+
.head(set_min_max(top - 1, min_value=1, max_value=col_df.shape[0])) # type: ignore
|
|
490
716
|
.to_list()
|
|
491
717
|
)
|
|
492
718
|
# logger.debug(list_of_keep)
|
|
@@ -561,7 +787,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
561
787
|
except KeyError:
|
|
562
788
|
if getattr(self, "add_blank_column", None) is not None:
|
|
563
789
|
# Compatible with DataAnalystDataFrameColumnMethodMixin
|
|
564
|
-
self.add_blank_column(column_name, fill_when_not_exist)
|
|
790
|
+
self.add_blank_column(column_name, fill_when_not_exist) # type: ignore
|
|
565
791
|
return self
|
|
566
792
|
|
|
567
793
|
def get_missing_values(
|
|
@@ -679,7 +905,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
679
905
|
3 -1.435079 400 400 REPLACED ywahcasi 2024-05-20
|
|
680
906
|
4 0.118993 861 800 REPLACED saoupuby 2019-04-28
|
|
681
907
|
"""
|
|
682
|
-
self[col] = self[col].apply(lambda x: callable(x) if pd.notnull(x) else x)
|
|
908
|
+
self[col] = self[col].apply(lambda x: callable(x) if pd.notnull(x) else x) # type: ignore
|
|
683
909
|
return self
|
|
684
910
|
|
|
685
911
|
@versionadded("5.1.0") # type: ignore
|
|
@@ -750,7 +976,7 @@ class DataAnalystDataFrameNAMixin(DFBase):
|
|
|
750
976
|
|
|
751
977
|
# Column name
|
|
752
978
|
cname = "applied_row_null" if col_name is None else col_name
|
|
753
|
-
self[cname] = self.apply(apply_func, axis=1)
|
|
979
|
+
self[cname] = self.apply(apply_func, axis=1) # type: ignore
|
|
754
980
|
|
|
755
981
|
return self
|
|
756
982
|
|
|
@@ -825,7 +1051,7 @@ class DataAnalystDataFrameOtherMixin(DFBase):
|
|
|
825
1051
|
|
|
826
1052
|
if getattr(self, "drop_columns", None) is not None:
|
|
827
1053
|
# Compatible with DataAnalystDataFrameColumnMethodMixin
|
|
828
|
-
self.drop_columns(cols)
|
|
1054
|
+
self.drop_columns(cols) # type: ignore
|
|
829
1055
|
|
|
830
1056
|
out = self.merge(other, how="left", on=on)
|
|
831
1057
|
return self.__class__(out)
|
|
@@ -935,19 +1161,19 @@ class DataAnalystDataFrameDateMixin(DFBase):
|
|
|
935
1161
|
col_counter = 0
|
|
936
1162
|
# self["weekday"] = self["day"].dt.isocalendar().day # Weekday
|
|
937
1163
|
if mode.find("d") != -1:
|
|
938
|
-
logger.debug("Mode: 'day'")
|
|
1164
|
+
# logger.debug("Mode: 'day'")
|
|
939
1165
|
self["day"] = self["date"].dt.day
|
|
940
1166
|
col_counter += 1
|
|
941
1167
|
if mode.find("w") != -1:
|
|
942
|
-
logger.debug("Mode: 'weekday'")
|
|
1168
|
+
# logger.debug("Mode: 'weekday'")
|
|
943
1169
|
self["week"] = self["date"].dt.isocalendar().week
|
|
944
1170
|
col_counter += 1
|
|
945
1171
|
if mode.find("m") != -1:
|
|
946
|
-
logger.debug("Mode: 'month'")
|
|
1172
|
+
# logger.debug("Mode: 'month'")
|
|
947
1173
|
self["month"] = self["date"].dt.month
|
|
948
1174
|
col_counter += 1
|
|
949
1175
|
if mode.find("y") != -1:
|
|
950
|
-
logger.debug("Mode: 'year'")
|
|
1176
|
+
# logger.debug("Mode: 'year'")
|
|
951
1177
|
self["year"] = self["date"].dt.year
|
|
952
1178
|
col_counter += 1
|
|
953
1179
|
|
|
@@ -1017,6 +1243,172 @@ class DataAnalystDataFrameDateMixin(DFBase):
|
|
|
1017
1243
|
)
|
|
1018
1244
|
return self
|
|
1019
1245
|
|
|
1246
|
+
@versionadded("6.0.0")
|
|
1247
|
+
def normalize_datetime_column(
|
|
1248
|
+
self,
|
|
1249
|
+
col: str,
|
|
1250
|
+
*,
|
|
1251
|
+
inplace: bool = False,
|
|
1252
|
+
) -> Self:
|
|
1253
|
+
"""
|
|
1254
|
+
Normalize a datetime column by removing the time component.
|
|
1255
|
+
|
|
1256
|
+
This function converts the specified column to pandas datetime (``datetime64[ns]``)
|
|
1257
|
+
(if not already), then normalizes all values so that the time
|
|
1258
|
+
component is set to ``00:00:00``. The date component is preserved.
|
|
1259
|
+
|
|
1260
|
+
The function safely handles missing or invalid values by coercing
|
|
1261
|
+
them to ``NaT``.
|
|
1262
|
+
|
|
1263
|
+
Parameters
|
|
1264
|
+
----------
|
|
1265
|
+
col : str
|
|
1266
|
+
Name of the column to normalize. The column may contain
|
|
1267
|
+
datetime-like values, strings, or mixed types.
|
|
1268
|
+
|
|
1269
|
+
inplace : bool, default False
|
|
1270
|
+
| If ``True``, modify the input DataFrame in place.
|
|
1271
|
+
| If ``False``, operate on a copy and return the modified DataFrame.
|
|
1272
|
+
|
|
1273
|
+
Returns
|
|
1274
|
+
-------
|
|
1275
|
+
Self
|
|
1276
|
+
DataFrame with the normalized datetime column.
|
|
1277
|
+
|
|
1278
|
+
|
|
1279
|
+
Example:
|
|
1280
|
+
--------
|
|
1281
|
+
Basic usage::
|
|
1282
|
+
|
|
1283
|
+
>>> df = DADF({
|
|
1284
|
+
... "created_at": ["2024-01-01 10:15:30", "2024-01-02 23:59:59"]
|
|
1285
|
+
... })
|
|
1286
|
+
>>> normalize_datetime_column(df, "created_at")
|
|
1287
|
+
created_at
|
|
1288
|
+
0 2024-01-01 00:00:00
|
|
1289
|
+
1 2024-01-02 00:00:00
|
|
1290
|
+
|
|
1291
|
+
In-place modification::
|
|
1292
|
+
|
|
1293
|
+
>>> normalize_datetime_column(df, "created_at", inplace=True)
|
|
1294
|
+
|
|
1295
|
+
Handling invalid values::
|
|
1296
|
+
|
|
1297
|
+
>>> df = DADF({"dt": ["2024-01-01 10:00", "invalid"]})
|
|
1298
|
+
>>> normalize_datetime_column(df, "dt")
|
|
1299
|
+
dt
|
|
1300
|
+
0 2024-01-01 00:00:00
|
|
1301
|
+
1 NaT
|
|
1302
|
+
|
|
1303
|
+
"""
|
|
1304
|
+
if not inplace:
|
|
1305
|
+
df = self.copy()
|
|
1306
|
+
else:
|
|
1307
|
+
df = self
|
|
1308
|
+
|
|
1309
|
+
# Using ``df.loc[:, col]`` avoids ``SettingWithCopyWarning`` when the input DataFrame is a slice.
|
|
1310
|
+
df.loc[:, col] = pd.to_datetime(df[col], errors="coerce").dt.normalize()
|
|
1311
|
+
return df
|
|
1312
|
+
|
|
1313
|
+
|
|
1314
|
+
# Export
|
|
1315
|
+
# ---------------------------------------------------------------------------
|
|
1316
|
+
class DataAnalystDataFrameExportMixin(DFBase):
|
|
1317
|
+
"""
|
|
1318
|
+
Data Analyst ``pd.DataFrame`` - Export method
|
|
1319
|
+
|
|
1320
|
+
- da_export
|
|
1321
|
+
"""
|
|
1322
|
+
|
|
1323
|
+
@versionchanged("5.8.0", "New parameter")
|
|
1324
|
+
def da_export(
|
|
1325
|
+
self,
|
|
1326
|
+
path: str,
|
|
1327
|
+
sheet_name: str = "Sheet1",
|
|
1328
|
+
*,
|
|
1329
|
+
auto_width: bool = True,
|
|
1330
|
+
cols_contain_centered_text: list[str] | None = None,
|
|
1331
|
+
cols_contain_number: list[str] | None = None,
|
|
1332
|
+
cols_contain_percentage: list[str] | None = None,
|
|
1333
|
+
) -> None:
|
|
1334
|
+
"""
|
|
1335
|
+
Export DataFrame with `xlsxwriter` engine
|
|
1336
|
+
|
|
1337
|
+
Parameters
|
|
1338
|
+
----------
|
|
1339
|
+
path : Path | str
|
|
1340
|
+
Path to export
|
|
1341
|
+
|
|
1342
|
+
sheet_name : str, optional
|
|
1343
|
+
Sheet name, by default "Sheet1"
|
|
1344
|
+
|
|
1345
|
+
auto_width : bool, optional
|
|
1346
|
+
Auto resize column width, by default ``True``
|
|
1347
|
+
|
|
1348
|
+
cols_contain_centered_text : list[str] | None, optional
|
|
1349
|
+
Columns that contain centered text (Align center), by default None
|
|
1350
|
+
|
|
1351
|
+
cols_contain_number : list[str] | None, optional
|
|
1352
|
+
Columns that contain number value (to format as number - int), by default None
|
|
1353
|
+
|
|
1354
|
+
cols_contain_percentage : list[str] | None, optional
|
|
1355
|
+
Columns that contain percentage value (to format as percentage), by default None
|
|
1356
|
+
"""
|
|
1357
|
+
|
|
1358
|
+
# Using xlsxwriter engine
|
|
1359
|
+
with pd.ExcelWriter(path, engine="xlsxwriter") as writer:
|
|
1360
|
+
self.to_excel(writer, sheet_name=sheet_name, index=False, float_format="%.2f", na_rep="")
|
|
1361
|
+
|
|
1362
|
+
# Format style
|
|
1363
|
+
workbook: Workbook = writer.book # type: ignore
|
|
1364
|
+
header_fmt = workbook.add_format(
|
|
1365
|
+
{
|
|
1366
|
+
"bold": True,
|
|
1367
|
+
"text_wrap": True,
|
|
1368
|
+
"border": 1,
|
|
1369
|
+
"align": "center",
|
|
1370
|
+
"valign": "vcenter",
|
|
1371
|
+
# "bg_color": "#A0BEFD",
|
|
1372
|
+
}
|
|
1373
|
+
)
|
|
1374
|
+
number_fmt = workbook.add_format(
|
|
1375
|
+
{"num_format": "#,##0", "align": "center", "valign": "vcenter"}
|
|
1376
|
+
) # 1,000,000
|
|
1377
|
+
percent_fmt = workbook.add_format({"num_format": "0.00%", "align": "center", "valign": "vcenter"}) # 1.00%
|
|
1378
|
+
text_fmt = workbook.add_format({"valign": "vcenter"})
|
|
1379
|
+
text_center_fmt = workbook.add_format({"align": "center", "valign": "vcenter"})
|
|
1380
|
+
|
|
1381
|
+
# Format sheet
|
|
1382
|
+
worksheet: Worksheet = writer.sheets[sheet_name]
|
|
1383
|
+
|
|
1384
|
+
# Format header - First row
|
|
1385
|
+
for col_num, value in enumerate(self.columns.values):
|
|
1386
|
+
worksheet.write(0, col_num, value, header_fmt)
|
|
1387
|
+
|
|
1388
|
+
rules = [
|
|
1389
|
+
(cols_contain_number, number_fmt),
|
|
1390
|
+
(cols_contain_percentage, percent_fmt),
|
|
1391
|
+
(cols_contain_centered_text, text_center_fmt),
|
|
1392
|
+
]
|
|
1393
|
+
|
|
1394
|
+
# Auto width + col format
|
|
1395
|
+
for i, col in enumerate(self.columns):
|
|
1396
|
+
# Max str len of each column
|
|
1397
|
+
max_len = None if auto_width is None else max(self[col].astype(str).map(len).max(), len(col)) + 2
|
|
1398
|
+
worksheet.set_column(i, i, max_len) # Set width
|
|
1399
|
+
|
|
1400
|
+
# Format style
|
|
1401
|
+
fmt = text_fmt # default
|
|
1402
|
+
for cols, f in rules:
|
|
1403
|
+
if cols is not None and col in cols:
|
|
1404
|
+
fmt = f
|
|
1405
|
+
break
|
|
1406
|
+
worksheet.set_column(i, i, max_len, fmt)
|
|
1407
|
+
|
|
1408
|
+
# if cols_contain_number is not None:
|
|
1409
|
+
# for x in cols_contain_number:
|
|
1410
|
+
# self[x] = pd.to_numeric(self[x], errors="coerce")
|
|
1411
|
+
|
|
1020
1412
|
|
|
1021
1413
|
# City
|
|
1022
1414
|
# ---------------------------------------------------------------------------
|
|
@@ -1068,13 +1460,13 @@ class DataAnalystDataFrameCityMixin(DFBase):
|
|
|
1068
1460
|
# Convert
|
|
1069
1461
|
col_counter = 0
|
|
1070
1462
|
if mode.find("r") != -1:
|
|
1071
|
-
logger.debug("Mode: 'region'")
|
|
1463
|
+
# logger.debug("Mode: 'region'")
|
|
1072
1464
|
self["region"] = self[city_column].apply(
|
|
1073
1465
|
lambda x: _convert_city_support(x).region
|
|
1074
1466
|
)
|
|
1075
1467
|
col_counter += 1
|
|
1076
1468
|
if mode.find("a") != -1:
|
|
1077
|
-
logger.debug("Mode: 'area'")
|
|
1469
|
+
# logger.debug("Mode: 'area'")
|
|
1078
1470
|
self["area"] = self[city_column].apply(
|
|
1079
1471
|
lambda x: _convert_city_support(x).area
|
|
1080
1472
|
)
|
|
@@ -1091,6 +1483,7 @@ class DataAnalystDataFrameCityMixin(DFBase):
|
|
|
1091
1483
|
class DADF(
|
|
1092
1484
|
GetClassMembersMixin,
|
|
1093
1485
|
DataAnalystDataFrameCityMixin,
|
|
1486
|
+
DataAnalystDataFrameExportMixin,
|
|
1094
1487
|
DataAnalystDataFrameDateMixin,
|
|
1095
1488
|
DataAnalystDataFrameOtherMixin,
|
|
1096
1489
|
DataAnalystDataFrameNAMixin,
|
|
@@ -1191,3 +1584,17 @@ class DADF_WIP(DADF):
|
|
|
1191
1584
|
"""
|
|
1192
1585
|
|
|
1193
1586
|
pass
|
|
1587
|
+
|
|
1588
|
+
if __name__ == "__main__":
|
|
1589
|
+
from pathlib import Path
|
|
1590
|
+
|
|
1591
|
+
# t = DADF.sample_df().show_distribution("number_range", show_percentage=False)
|
|
1592
|
+
# t.da_export(
|
|
1593
|
+
# Path(__file__).parent.joinpath("a.xlsx").resolve().__str__(),
|
|
1594
|
+
# cols_contain_number=["number_range"],
|
|
1595
|
+
# cols_contain_percentage=["percentage"],
|
|
1596
|
+
# )
|
|
1597
|
+
# print(t)
|
|
1598
|
+
|
|
1599
|
+
df = DADF.sample_df(10)
|
|
1600
|
+
print(df.add_blank_row())
|
absfuyu/extra/da/dadf_base.py
CHANGED